diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2009-04-01 13:20:44 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2009-04-01 13:20:44 -0400 |
commit | c226fd659fa7b6a7b038df5ae6856a68514bacde (patch) | |
tree | a695689d4d8cdd927400747b94250521eb863686 | |
parent | c09bca786ff941ed17c5f381c4eca5b106808c51 (diff) | |
parent | d57e62b89796f751c9422801cbcd407a9f8dcdc4 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable:
Btrfs: try to free metadata pages when we free btree blocks
Btrfs: add extra flushing for renames and truncates
Btrfs: make sure btrfs_update_delayed_ref doesn't increase ref_mod
Btrfs: optimize fsyncs on old files
Btrfs: tree logging unlink/rename fixes
Btrfs: Make sure i_nlink doesn't hit zero too soon during log replay
Btrfs: limit balancing work while flushing delayed refs
Btrfs: readahead checksums during btrfs_finish_ordered_io
Btrfs: leave btree locks spinning more often
Btrfs: Only let very young transactions grow during commit
Btrfs: Check for a blocking lock before taking the spin
Btrfs: reduce stack in cow_file_range
Btrfs: reduce stalls during transaction commit
Btrfs: process the delayed reference queue in clusters
Btrfs: try to cleanup delayed refs while freeing extents
Btrfs: reduce stack usage in some crucial tree balancing functions
Btrfs: do extent allocation and reference count updates in the background
Btrfs: don't preallocate metadata blocks during btrfs_search_slot
-rw-r--r-- | fs/btrfs/Makefile | 2 | ||||
-rw-r--r-- | fs/btrfs/btrfs_inode.h | 31 | ||||
-rw-r--r-- | fs/btrfs/ctree.c | 588 | ||||
-rw-r--r-- | fs/btrfs/ctree.h | 69 | ||||
-rw-r--r-- | fs/btrfs/delayed-ref.c | 669 | ||||
-rw-r--r-- | fs/btrfs/delayed-ref.h | 193 | ||||
-rw-r--r-- | fs/btrfs/dir-item.c | 3 | ||||
-rw-r--r-- | fs/btrfs/disk-io.c | 81 | ||||
-rw-r--r-- | fs/btrfs/disk-io.h | 1 | ||||
-rw-r--r-- | fs/btrfs/extent-tree.c | 1674 | ||||
-rw-r--r-- | fs/btrfs/extent_io.c | 51 | ||||
-rw-r--r-- | fs/btrfs/extent_io.h | 3 | ||||
-rw-r--r-- | fs/btrfs/file-item.c | 7 | ||||
-rw-r--r-- | fs/btrfs/file.c | 50 | ||||
-rw-r--r-- | fs/btrfs/inode-item.c | 3 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 194 | ||||
-rw-r--r-- | fs/btrfs/locking.c | 21 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 118 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.h | 4 | ||||
-rw-r--r-- | fs/btrfs/transaction.c | 151 | ||||
-rw-r--r-- | fs/btrfs/transaction.h | 8 | ||||
-rw-r--r-- | fs/btrfs/tree-defrag.c | 2 | ||||
-rw-r--r-- | fs/btrfs/tree-log.c | 444 | ||||
-rw-r--r-- | fs/btrfs/tree-log.h | 17 |
24 files changed, 2762 insertions, 1622 deletions
diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index d2cf5a54a4b8..9adf5e4f7e96 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile | |||
@@ -8,7 +8,7 @@ btrfs-y := super.o ctree.o extent-tree.o print-tree.o root-tree.o dir-item.o \ | |||
8 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ | 8 | extent_map.o sysfs.o struct-funcs.o xattr.o ordered-data.o \ |
9 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ | 9 | extent_io.o volumes.o async-thread.o ioctl.o locking.o orphan.o \ |
10 | ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ | 10 | ref-cache.o export.o tree-log.o acl.o free-space-cache.o zlib.o \ |
11 | compression.o | 11 | compression.o delayed-ref.o |
12 | else | 12 | else |
13 | 13 | ||
14 | # Normal Makefile | 14 | # Normal Makefile |
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 72677ce2b74f..b30986f00b9d 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -66,6 +66,12 @@ struct btrfs_inode { | |||
66 | */ | 66 | */ |
67 | struct list_head delalloc_inodes; | 67 | struct list_head delalloc_inodes; |
68 | 68 | ||
69 | /* | ||
70 | * list for tracking inodes that must be sent to disk before a | ||
71 | * rename or truncate commit | ||
72 | */ | ||
73 | struct list_head ordered_operations; | ||
74 | |||
69 | /* the space_info for where this inode's data allocations are done */ | 75 | /* the space_info for where this inode's data allocations are done */ |
70 | struct btrfs_space_info *space_info; | 76 | struct btrfs_space_info *space_info; |
71 | 77 | ||
@@ -86,12 +92,6 @@ struct btrfs_inode { | |||
86 | */ | 92 | */ |
87 | u64 logged_trans; | 93 | u64 logged_trans; |
88 | 94 | ||
89 | /* | ||
90 | * trans that last made a change that should be fully fsync'd. This | ||
91 | * gets reset to zero each time the inode is logged | ||
92 | */ | ||
93 | u64 log_dirty_trans; | ||
94 | |||
95 | /* total number of bytes pending delalloc, used by stat to calc the | 95 | /* total number of bytes pending delalloc, used by stat to calc the |
96 | * real block usage of the file | 96 | * real block usage of the file |
97 | */ | 97 | */ |
@@ -121,6 +121,25 @@ struct btrfs_inode { | |||
121 | /* the start of block group preferred for allocations. */ | 121 | /* the start of block group preferred for allocations. */ |
122 | u64 block_group; | 122 | u64 block_group; |
123 | 123 | ||
124 | /* the fsync log has some corner cases that mean we have to check | ||
125 | * directories to see if any unlinks have been done before | ||
126 | * the directory was logged. See tree-log.c for all the | ||
127 | * details | ||
128 | */ | ||
129 | u64 last_unlink_trans; | ||
130 | |||
131 | /* | ||
132 | * ordered_data_close is set by truncate when a file that used | ||
133 | * to have good data has been truncated to zero. When it is set | ||
134 | * the btrfs file release call will add this inode to the | ||
135 | * ordered operations list so that we make sure to flush out any | ||
136 | * new data the application may have written before commit. | ||
137 | * | ||
138 | * yes, its silly to have a single bitflag, but we might grow more | ||
139 | * of these. | ||
140 | */ | ||
141 | unsigned ordered_data_close:1; | ||
142 | |||
124 | struct inode vfs_inode; | 143 | struct inode vfs_inode; |
125 | }; | 144 | }; |
126 | 145 | ||
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 37f31b5529aa..dbb724124633 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -254,18 +254,13 @@ int btrfs_copy_root(struct btrfs_trans_handle *trans, | |||
254 | * empty_size -- a hint that you plan on doing more cow. This is the size in | 254 | * empty_size -- a hint that you plan on doing more cow. This is the size in |
255 | * bytes the allocator should try to find free next to the block it returns. | 255 | * bytes the allocator should try to find free next to the block it returns. |
256 | * This is just a hint and may be ignored by the allocator. | 256 | * This is just a hint and may be ignored by the allocator. |
257 | * | ||
258 | * prealloc_dest -- if you have already reserved a destination for the cow, | ||
259 | * this uses that block instead of allocating a new one. | ||
260 | * btrfs_alloc_reserved_extent is used to finish the allocation. | ||
261 | */ | 257 | */ |
262 | static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | 258 | static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, |
263 | struct btrfs_root *root, | 259 | struct btrfs_root *root, |
264 | struct extent_buffer *buf, | 260 | struct extent_buffer *buf, |
265 | struct extent_buffer *parent, int parent_slot, | 261 | struct extent_buffer *parent, int parent_slot, |
266 | struct extent_buffer **cow_ret, | 262 | struct extent_buffer **cow_ret, |
267 | u64 search_start, u64 empty_size, | 263 | u64 search_start, u64 empty_size) |
268 | u64 prealloc_dest) | ||
269 | { | 264 | { |
270 | u64 parent_start; | 265 | u64 parent_start; |
271 | struct extent_buffer *cow; | 266 | struct extent_buffer *cow; |
@@ -291,26 +286,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
291 | level = btrfs_header_level(buf); | 286 | level = btrfs_header_level(buf); |
292 | nritems = btrfs_header_nritems(buf); | 287 | nritems = btrfs_header_nritems(buf); |
293 | 288 | ||
294 | if (prealloc_dest) { | 289 | cow = btrfs_alloc_free_block(trans, root, buf->len, |
295 | struct btrfs_key ins; | 290 | parent_start, root->root_key.objectid, |
296 | 291 | trans->transid, level, | |
297 | ins.objectid = prealloc_dest; | 292 | search_start, empty_size); |
298 | ins.offset = buf->len; | ||
299 | ins.type = BTRFS_EXTENT_ITEM_KEY; | ||
300 | |||
301 | ret = btrfs_alloc_reserved_extent(trans, root, parent_start, | ||
302 | root->root_key.objectid, | ||
303 | trans->transid, level, &ins); | ||
304 | BUG_ON(ret); | ||
305 | cow = btrfs_init_new_buffer(trans, root, prealloc_dest, | ||
306 | buf->len, level); | ||
307 | } else { | ||
308 | cow = btrfs_alloc_free_block(trans, root, buf->len, | ||
309 | parent_start, | ||
310 | root->root_key.objectid, | ||
311 | trans->transid, level, | ||
312 | search_start, empty_size); | ||
313 | } | ||
314 | if (IS_ERR(cow)) | 293 | if (IS_ERR(cow)) |
315 | return PTR_ERR(cow); | 294 | return PTR_ERR(cow); |
316 | 295 | ||
@@ -413,7 +392,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
413 | noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, | 392 | noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, |
414 | struct btrfs_root *root, struct extent_buffer *buf, | 393 | struct btrfs_root *root, struct extent_buffer *buf, |
415 | struct extent_buffer *parent, int parent_slot, | 394 | struct extent_buffer *parent, int parent_slot, |
416 | struct extent_buffer **cow_ret, u64 prealloc_dest) | 395 | struct extent_buffer **cow_ret) |
417 | { | 396 | { |
418 | u64 search_start; | 397 | u64 search_start; |
419 | int ret; | 398 | int ret; |
@@ -436,7 +415,6 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
436 | btrfs_header_owner(buf) == root->root_key.objectid && | 415 | btrfs_header_owner(buf) == root->root_key.objectid && |
437 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | 416 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { |
438 | *cow_ret = buf; | 417 | *cow_ret = buf; |
439 | WARN_ON(prealloc_dest); | ||
440 | return 0; | 418 | return 0; |
441 | } | 419 | } |
442 | 420 | ||
@@ -447,8 +425,7 @@ noinline int btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
447 | btrfs_set_lock_blocking(buf); | 425 | btrfs_set_lock_blocking(buf); |
448 | 426 | ||
449 | ret = __btrfs_cow_block(trans, root, buf, parent, | 427 | ret = __btrfs_cow_block(trans, root, buf, parent, |
450 | parent_slot, cow_ret, search_start, 0, | 428 | parent_slot, cow_ret, search_start, 0); |
451 | prealloc_dest); | ||
452 | return ret; | 429 | return ret; |
453 | } | 430 | } |
454 | 431 | ||
@@ -617,7 +594,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, | |||
617 | err = __btrfs_cow_block(trans, root, cur, parent, i, | 594 | err = __btrfs_cow_block(trans, root, cur, parent, i, |
618 | &cur, search_start, | 595 | &cur, search_start, |
619 | min(16 * blocksize, | 596 | min(16 * blocksize, |
620 | (end_slot - i) * blocksize), 0); | 597 | (end_slot - i) * blocksize)); |
621 | if (err) { | 598 | if (err) { |
622 | btrfs_tree_unlock(cur); | 599 | btrfs_tree_unlock(cur); |
623 | free_extent_buffer(cur); | 600 | free_extent_buffer(cur); |
@@ -937,7 +914,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
937 | BUG_ON(!child); | 914 | BUG_ON(!child); |
938 | btrfs_tree_lock(child); | 915 | btrfs_tree_lock(child); |
939 | btrfs_set_lock_blocking(child); | 916 | btrfs_set_lock_blocking(child); |
940 | ret = btrfs_cow_block(trans, root, child, mid, 0, &child, 0); | 917 | ret = btrfs_cow_block(trans, root, child, mid, 0, &child); |
941 | BUG_ON(ret); | 918 | BUG_ON(ret); |
942 | 919 | ||
943 | spin_lock(&root->node_lock); | 920 | spin_lock(&root->node_lock); |
@@ -945,6 +922,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
945 | spin_unlock(&root->node_lock); | 922 | spin_unlock(&root->node_lock); |
946 | 923 | ||
947 | ret = btrfs_update_extent_ref(trans, root, child->start, | 924 | ret = btrfs_update_extent_ref(trans, root, child->start, |
925 | child->len, | ||
948 | mid->start, child->start, | 926 | mid->start, child->start, |
949 | root->root_key.objectid, | 927 | root->root_key.objectid, |
950 | trans->transid, level - 1); | 928 | trans->transid, level - 1); |
@@ -971,6 +949,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
971 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) | 949 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) |
972 | return 0; | 950 | return 0; |
973 | 951 | ||
952 | if (trans->transaction->delayed_refs.flushing && | ||
953 | btrfs_header_nritems(mid) > 2) | ||
954 | return 0; | ||
955 | |||
974 | if (btrfs_header_nritems(mid) < 2) | 956 | if (btrfs_header_nritems(mid) < 2) |
975 | err_on_enospc = 1; | 957 | err_on_enospc = 1; |
976 | 958 | ||
@@ -979,7 +961,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
979 | btrfs_tree_lock(left); | 961 | btrfs_tree_lock(left); |
980 | btrfs_set_lock_blocking(left); | 962 | btrfs_set_lock_blocking(left); |
981 | wret = btrfs_cow_block(trans, root, left, | 963 | wret = btrfs_cow_block(trans, root, left, |
982 | parent, pslot - 1, &left, 0); | 964 | parent, pslot - 1, &left); |
983 | if (wret) { | 965 | if (wret) { |
984 | ret = wret; | 966 | ret = wret; |
985 | goto enospc; | 967 | goto enospc; |
@@ -990,7 +972,7 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
990 | btrfs_tree_lock(right); | 972 | btrfs_tree_lock(right); |
991 | btrfs_set_lock_blocking(right); | 973 | btrfs_set_lock_blocking(right); |
992 | wret = btrfs_cow_block(trans, root, right, | 974 | wret = btrfs_cow_block(trans, root, right, |
993 | parent, pslot + 1, &right, 0); | 975 | parent, pslot + 1, &right); |
994 | if (wret) { | 976 | if (wret) { |
995 | ret = wret; | 977 | ret = wret; |
996 | goto enospc; | 978 | goto enospc; |
@@ -1171,7 +1153,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1171 | wret = 1; | 1153 | wret = 1; |
1172 | } else { | 1154 | } else { |
1173 | ret = btrfs_cow_block(trans, root, left, parent, | 1155 | ret = btrfs_cow_block(trans, root, left, parent, |
1174 | pslot - 1, &left, 0); | 1156 | pslot - 1, &left); |
1175 | if (ret) | 1157 | if (ret) |
1176 | wret = 1; | 1158 | wret = 1; |
1177 | else { | 1159 | else { |
@@ -1222,7 +1204,7 @@ static noinline int push_nodes_for_insert(struct btrfs_trans_handle *trans, | |||
1222 | } else { | 1204 | } else { |
1223 | ret = btrfs_cow_block(trans, root, right, | 1205 | ret = btrfs_cow_block(trans, root, right, |
1224 | parent, pslot + 1, | 1206 | parent, pslot + 1, |
1225 | &right, 0); | 1207 | &right); |
1226 | if (ret) | 1208 | if (ret) |
1227 | wret = 1; | 1209 | wret = 1; |
1228 | else { | 1210 | else { |
@@ -1492,7 +1474,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
1492 | u8 lowest_level = 0; | 1474 | u8 lowest_level = 0; |
1493 | u64 blocknr; | 1475 | u64 blocknr; |
1494 | u64 gen; | 1476 | u64 gen; |
1495 | struct btrfs_key prealloc_block; | ||
1496 | 1477 | ||
1497 | lowest_level = p->lowest_level; | 1478 | lowest_level = p->lowest_level; |
1498 | WARN_ON(lowest_level && ins_len > 0); | 1479 | WARN_ON(lowest_level && ins_len > 0); |
@@ -1501,8 +1482,6 @@ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
1501 | if (ins_len < 0) | 1482 | if (ins_len < 0) |
1502 | lowest_unlock = 2; | 1483 | lowest_unlock = 2; |
1503 | 1484 | ||
1504 | prealloc_block.objectid = 0; | ||
1505 | |||
1506 | again: | 1485 | again: |
1507 | if (p->skip_locking) | 1486 | if (p->skip_locking) |
1508 | b = btrfs_root_node(root); | 1487 | b = btrfs_root_node(root); |
@@ -1529,44 +1508,11 @@ again: | |||
1529 | !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { | 1508 | !btrfs_header_flag(b, BTRFS_HEADER_FLAG_WRITTEN)) { |
1530 | goto cow_done; | 1509 | goto cow_done; |
1531 | } | 1510 | } |
1532 | |||
1533 | /* ok, we have to cow, is our old prealloc the right | ||
1534 | * size? | ||
1535 | */ | ||
1536 | if (prealloc_block.objectid && | ||
1537 | prealloc_block.offset != b->len) { | ||
1538 | btrfs_release_path(root, p); | ||
1539 | btrfs_free_reserved_extent(root, | ||
1540 | prealloc_block.objectid, | ||
1541 | prealloc_block.offset); | ||
1542 | prealloc_block.objectid = 0; | ||
1543 | goto again; | ||
1544 | } | ||
1545 | |||
1546 | /* | ||
1547 | * for higher level blocks, try not to allocate blocks | ||
1548 | * with the block and the parent locks held. | ||
1549 | */ | ||
1550 | if (level > 0 && !prealloc_block.objectid) { | ||
1551 | u32 size = b->len; | ||
1552 | u64 hint = b->start; | ||
1553 | |||
1554 | btrfs_release_path(root, p); | ||
1555 | ret = btrfs_reserve_extent(trans, root, | ||
1556 | size, size, 0, | ||
1557 | hint, (u64)-1, | ||
1558 | &prealloc_block, 0); | ||
1559 | BUG_ON(ret); | ||
1560 | goto again; | ||
1561 | } | ||
1562 | |||
1563 | btrfs_set_path_blocking(p); | 1511 | btrfs_set_path_blocking(p); |
1564 | 1512 | ||
1565 | wret = btrfs_cow_block(trans, root, b, | 1513 | wret = btrfs_cow_block(trans, root, b, |
1566 | p->nodes[level + 1], | 1514 | p->nodes[level + 1], |
1567 | p->slots[level + 1], | 1515 | p->slots[level + 1], &b); |
1568 | &b, prealloc_block.objectid); | ||
1569 | prealloc_block.objectid = 0; | ||
1570 | if (wret) { | 1516 | if (wret) { |
1571 | free_extent_buffer(b); | 1517 | free_extent_buffer(b); |
1572 | ret = wret; | 1518 | ret = wret; |
@@ -1742,12 +1688,8 @@ done: | |||
1742 | * we don't really know what they plan on doing with the path | 1688 | * we don't really know what they plan on doing with the path |
1743 | * from here on, so for now just mark it as blocking | 1689 | * from here on, so for now just mark it as blocking |
1744 | */ | 1690 | */ |
1745 | btrfs_set_path_blocking(p); | 1691 | if (!p->leave_spinning) |
1746 | if (prealloc_block.objectid) { | 1692 | btrfs_set_path_blocking(p); |
1747 | btrfs_free_reserved_extent(root, | ||
1748 | prealloc_block.objectid, | ||
1749 | prealloc_block.offset); | ||
1750 | } | ||
1751 | return ret; | 1693 | return ret; |
1752 | } | 1694 | } |
1753 | 1695 | ||
@@ -1768,7 +1710,7 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans, | |||
1768 | int ret; | 1710 | int ret; |
1769 | 1711 | ||
1770 | eb = btrfs_lock_root_node(root); | 1712 | eb = btrfs_lock_root_node(root); |
1771 | ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb, 0); | 1713 | ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb); |
1772 | BUG_ON(ret); | 1714 | BUG_ON(ret); |
1773 | 1715 | ||
1774 | btrfs_set_lock_blocking(eb); | 1716 | btrfs_set_lock_blocking(eb); |
@@ -1826,7 +1768,7 @@ int btrfs_merge_path(struct btrfs_trans_handle *trans, | |||
1826 | } | 1768 | } |
1827 | 1769 | ||
1828 | ret = btrfs_cow_block(trans, root, eb, parent, slot, | 1770 | ret = btrfs_cow_block(trans, root, eb, parent, slot, |
1829 | &eb, 0); | 1771 | &eb); |
1830 | BUG_ON(ret); | 1772 | BUG_ON(ret); |
1831 | 1773 | ||
1832 | if (root->root_key.objectid == | 1774 | if (root->root_key.objectid == |
@@ -2139,7 +2081,7 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2139 | spin_unlock(&root->node_lock); | 2081 | spin_unlock(&root->node_lock); |
2140 | 2082 | ||
2141 | ret = btrfs_update_extent_ref(trans, root, lower->start, | 2083 | ret = btrfs_update_extent_ref(trans, root, lower->start, |
2142 | lower->start, c->start, | 2084 | lower->len, lower->start, c->start, |
2143 | root->root_key.objectid, | 2085 | root->root_key.objectid, |
2144 | trans->transid, level - 1); | 2086 | trans->transid, level - 1); |
2145 | BUG_ON(ret); | 2087 | BUG_ON(ret); |
@@ -2221,7 +2163,7 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
2221 | ret = insert_new_root(trans, root, path, level + 1); | 2163 | ret = insert_new_root(trans, root, path, level + 1); |
2222 | if (ret) | 2164 | if (ret) |
2223 | return ret; | 2165 | return ret; |
2224 | } else { | 2166 | } else if (!trans->transaction->delayed_refs.flushing) { |
2225 | ret = push_nodes_for_insert(trans, root, path, level); | 2167 | ret = push_nodes_for_insert(trans, root, path, level); |
2226 | c = path->nodes[level]; | 2168 | c = path->nodes[level]; |
2227 | if (!ret && btrfs_header_nritems(c) < | 2169 | if (!ret && btrfs_header_nritems(c) < |
@@ -2329,66 +2271,27 @@ noinline int btrfs_leaf_free_space(struct btrfs_root *root, | |||
2329 | return ret; | 2271 | return ret; |
2330 | } | 2272 | } |
2331 | 2273 | ||
2332 | /* | 2274 | static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, |
2333 | * push some data in the path leaf to the right, trying to free up at | 2275 | struct btrfs_root *root, |
2334 | * least data_size bytes. returns zero if the push worked, nonzero otherwise | 2276 | struct btrfs_path *path, |
2335 | * | 2277 | int data_size, int empty, |
2336 | * returns 1 if the push failed because the other node didn't have enough | 2278 | struct extent_buffer *right, |
2337 | * room, 0 if everything worked out and < 0 if there were major errors. | 2279 | int free_space, u32 left_nritems) |
2338 | */ | ||
2339 | static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root | ||
2340 | *root, struct btrfs_path *path, int data_size, | ||
2341 | int empty) | ||
2342 | { | 2280 | { |
2343 | struct extent_buffer *left = path->nodes[0]; | 2281 | struct extent_buffer *left = path->nodes[0]; |
2344 | struct extent_buffer *right; | 2282 | struct extent_buffer *upper = path->nodes[1]; |
2345 | struct extent_buffer *upper; | ||
2346 | struct btrfs_disk_key disk_key; | 2283 | struct btrfs_disk_key disk_key; |
2347 | int slot; | 2284 | int slot; |
2348 | u32 i; | 2285 | u32 i; |
2349 | int free_space; | ||
2350 | int push_space = 0; | 2286 | int push_space = 0; |
2351 | int push_items = 0; | 2287 | int push_items = 0; |
2352 | struct btrfs_item *item; | 2288 | struct btrfs_item *item; |
2353 | u32 left_nritems; | ||
2354 | u32 nr; | 2289 | u32 nr; |
2355 | u32 right_nritems; | 2290 | u32 right_nritems; |
2356 | u32 data_end; | 2291 | u32 data_end; |
2357 | u32 this_item_size; | 2292 | u32 this_item_size; |
2358 | int ret; | 2293 | int ret; |
2359 | 2294 | ||
2360 | slot = path->slots[1]; | ||
2361 | if (!path->nodes[1]) | ||
2362 | return 1; | ||
2363 | |||
2364 | upper = path->nodes[1]; | ||
2365 | if (slot >= btrfs_header_nritems(upper) - 1) | ||
2366 | return 1; | ||
2367 | |||
2368 | btrfs_assert_tree_locked(path->nodes[1]); | ||
2369 | |||
2370 | right = read_node_slot(root, upper, slot + 1); | ||
2371 | btrfs_tree_lock(right); | ||
2372 | btrfs_set_lock_blocking(right); | ||
2373 | |||
2374 | free_space = btrfs_leaf_free_space(root, right); | ||
2375 | if (free_space < data_size) | ||
2376 | goto out_unlock; | ||
2377 | |||
2378 | /* cow and double check */ | ||
2379 | ret = btrfs_cow_block(trans, root, right, upper, | ||
2380 | slot + 1, &right, 0); | ||
2381 | if (ret) | ||
2382 | goto out_unlock; | ||
2383 | |||
2384 | free_space = btrfs_leaf_free_space(root, right); | ||
2385 | if (free_space < data_size) | ||
2386 | goto out_unlock; | ||
2387 | |||
2388 | left_nritems = btrfs_header_nritems(left); | ||
2389 | if (left_nritems == 0) | ||
2390 | goto out_unlock; | ||
2391 | |||
2392 | if (empty) | 2295 | if (empty) |
2393 | nr = 0; | 2296 | nr = 0; |
2394 | else | 2297 | else |
@@ -2397,6 +2300,7 @@ static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2397 | if (path->slots[0] >= left_nritems) | 2300 | if (path->slots[0] >= left_nritems) |
2398 | push_space += data_size; | 2301 | push_space += data_size; |
2399 | 2302 | ||
2303 | slot = path->slots[1]; | ||
2400 | i = left_nritems - 1; | 2304 | i = left_nritems - 1; |
2401 | while (i >= nr) { | 2305 | while (i >= nr) { |
2402 | item = btrfs_item_nr(left, i); | 2306 | item = btrfs_item_nr(left, i); |
@@ -2528,24 +2432,82 @@ out_unlock: | |||
2528 | } | 2432 | } |
2529 | 2433 | ||
2530 | /* | 2434 | /* |
2435 | * push some data in the path leaf to the right, trying to free up at | ||
2436 | * least data_size bytes. returns zero if the push worked, nonzero otherwise | ||
2437 | * | ||
2438 | * returns 1 if the push failed because the other node didn't have enough | ||
2439 | * room, 0 if everything worked out and < 0 if there were major errors. | ||
2440 | */ | ||
2441 | static int push_leaf_right(struct btrfs_trans_handle *trans, struct btrfs_root | ||
2442 | *root, struct btrfs_path *path, int data_size, | ||
2443 | int empty) | ||
2444 | { | ||
2445 | struct extent_buffer *left = path->nodes[0]; | ||
2446 | struct extent_buffer *right; | ||
2447 | struct extent_buffer *upper; | ||
2448 | int slot; | ||
2449 | int free_space; | ||
2450 | u32 left_nritems; | ||
2451 | int ret; | ||
2452 | |||
2453 | if (!path->nodes[1]) | ||
2454 | return 1; | ||
2455 | |||
2456 | slot = path->slots[1]; | ||
2457 | upper = path->nodes[1]; | ||
2458 | if (slot >= btrfs_header_nritems(upper) - 1) | ||
2459 | return 1; | ||
2460 | |||
2461 | btrfs_assert_tree_locked(path->nodes[1]); | ||
2462 | |||
2463 | right = read_node_slot(root, upper, slot + 1); | ||
2464 | btrfs_tree_lock(right); | ||
2465 | btrfs_set_lock_blocking(right); | ||
2466 | |||
2467 | free_space = btrfs_leaf_free_space(root, right); | ||
2468 | if (free_space < data_size) | ||
2469 | goto out_unlock; | ||
2470 | |||
2471 | /* cow and double check */ | ||
2472 | ret = btrfs_cow_block(trans, root, right, upper, | ||
2473 | slot + 1, &right); | ||
2474 | if (ret) | ||
2475 | goto out_unlock; | ||
2476 | |||
2477 | free_space = btrfs_leaf_free_space(root, right); | ||
2478 | if (free_space < data_size) | ||
2479 | goto out_unlock; | ||
2480 | |||
2481 | left_nritems = btrfs_header_nritems(left); | ||
2482 | if (left_nritems == 0) | ||
2483 | goto out_unlock; | ||
2484 | |||
2485 | return __push_leaf_right(trans, root, path, data_size, empty, | ||
2486 | right, free_space, left_nritems); | ||
2487 | out_unlock: | ||
2488 | btrfs_tree_unlock(right); | ||
2489 | free_extent_buffer(right); | ||
2490 | return 1; | ||
2491 | } | ||
2492 | |||
2493 | /* | ||
2531 | * push some data in the path leaf to the left, trying to free up at | 2494 | * push some data in the path leaf to the left, trying to free up at |
2532 | * least data_size bytes. returns zero if the push worked, nonzero otherwise | 2495 | * least data_size bytes. returns zero if the push worked, nonzero otherwise |
2533 | */ | 2496 | */ |
2534 | static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root | 2497 | static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, |
2535 | *root, struct btrfs_path *path, int data_size, | 2498 | struct btrfs_root *root, |
2536 | int empty) | 2499 | struct btrfs_path *path, int data_size, |
2500 | int empty, struct extent_buffer *left, | ||
2501 | int free_space, int right_nritems) | ||
2537 | { | 2502 | { |
2538 | struct btrfs_disk_key disk_key; | 2503 | struct btrfs_disk_key disk_key; |
2539 | struct extent_buffer *right = path->nodes[0]; | 2504 | struct extent_buffer *right = path->nodes[0]; |
2540 | struct extent_buffer *left; | ||
2541 | int slot; | 2505 | int slot; |
2542 | int i; | 2506 | int i; |
2543 | int free_space; | ||
2544 | int push_space = 0; | 2507 | int push_space = 0; |
2545 | int push_items = 0; | 2508 | int push_items = 0; |
2546 | struct btrfs_item *item; | 2509 | struct btrfs_item *item; |
2547 | u32 old_left_nritems; | 2510 | u32 old_left_nritems; |
2548 | u32 right_nritems; | ||
2549 | u32 nr; | 2511 | u32 nr; |
2550 | int ret = 0; | 2512 | int ret = 0; |
2551 | int wret; | 2513 | int wret; |
@@ -2553,41 +2515,6 @@ static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2553 | u32 old_left_item_size; | 2515 | u32 old_left_item_size; |
2554 | 2516 | ||
2555 | slot = path->slots[1]; | 2517 | slot = path->slots[1]; |
2556 | if (slot == 0) | ||
2557 | return 1; | ||
2558 | if (!path->nodes[1]) | ||
2559 | return 1; | ||
2560 | |||
2561 | right_nritems = btrfs_header_nritems(right); | ||
2562 | if (right_nritems == 0) | ||
2563 | return 1; | ||
2564 | |||
2565 | btrfs_assert_tree_locked(path->nodes[1]); | ||
2566 | |||
2567 | left = read_node_slot(root, path->nodes[1], slot - 1); | ||
2568 | btrfs_tree_lock(left); | ||
2569 | btrfs_set_lock_blocking(left); | ||
2570 | |||
2571 | free_space = btrfs_leaf_free_space(root, left); | ||
2572 | if (free_space < data_size) { | ||
2573 | ret = 1; | ||
2574 | goto out; | ||
2575 | } | ||
2576 | |||
2577 | /* cow and double check */ | ||
2578 | ret = btrfs_cow_block(trans, root, left, | ||
2579 | path->nodes[1], slot - 1, &left, 0); | ||
2580 | if (ret) { | ||
2581 | /* we hit -ENOSPC, but it isn't fatal here */ | ||
2582 | ret = 1; | ||
2583 | goto out; | ||
2584 | } | ||
2585 | |||
2586 | free_space = btrfs_leaf_free_space(root, left); | ||
2587 | if (free_space < data_size) { | ||
2588 | ret = 1; | ||
2589 | goto out; | ||
2590 | } | ||
2591 | 2518 | ||
2592 | if (empty) | 2519 | if (empty) |
2593 | nr = right_nritems; | 2520 | nr = right_nritems; |
@@ -2755,6 +2682,154 @@ out: | |||
2755 | } | 2682 | } |
2756 | 2683 | ||
2757 | /* | 2684 | /* |
2685 | * push some data in the path leaf to the left, trying to free up at | ||
2686 | * least data_size bytes. returns zero if the push worked, nonzero otherwise | ||
2687 | */ | ||
2688 | static int push_leaf_left(struct btrfs_trans_handle *trans, struct btrfs_root | ||
2689 | *root, struct btrfs_path *path, int data_size, | ||
2690 | int empty) | ||
2691 | { | ||
2692 | struct extent_buffer *right = path->nodes[0]; | ||
2693 | struct extent_buffer *left; | ||
2694 | int slot; | ||
2695 | int free_space; | ||
2696 | u32 right_nritems; | ||
2697 | int ret = 0; | ||
2698 | |||
2699 | slot = path->slots[1]; | ||
2700 | if (slot == 0) | ||
2701 | return 1; | ||
2702 | if (!path->nodes[1]) | ||
2703 | return 1; | ||
2704 | |||
2705 | right_nritems = btrfs_header_nritems(right); | ||
2706 | if (right_nritems == 0) | ||
2707 | return 1; | ||
2708 | |||
2709 | btrfs_assert_tree_locked(path->nodes[1]); | ||
2710 | |||
2711 | left = read_node_slot(root, path->nodes[1], slot - 1); | ||
2712 | btrfs_tree_lock(left); | ||
2713 | btrfs_set_lock_blocking(left); | ||
2714 | |||
2715 | free_space = btrfs_leaf_free_space(root, left); | ||
2716 | if (free_space < data_size) { | ||
2717 | ret = 1; | ||
2718 | goto out; | ||
2719 | } | ||
2720 | |||
2721 | /* cow and double check */ | ||
2722 | ret = btrfs_cow_block(trans, root, left, | ||
2723 | path->nodes[1], slot - 1, &left); | ||
2724 | if (ret) { | ||
2725 | /* we hit -ENOSPC, but it isn't fatal here */ | ||
2726 | ret = 1; | ||
2727 | goto out; | ||
2728 | } | ||
2729 | |||
2730 | free_space = btrfs_leaf_free_space(root, left); | ||
2731 | if (free_space < data_size) { | ||
2732 | ret = 1; | ||
2733 | goto out; | ||
2734 | } | ||
2735 | |||
2736 | return __push_leaf_left(trans, root, path, data_size, | ||
2737 | empty, left, free_space, right_nritems); | ||
2738 | out: | ||
2739 | btrfs_tree_unlock(left); | ||
2740 | free_extent_buffer(left); | ||
2741 | return ret; | ||
2742 | } | ||
2743 | |||
2744 | /* | ||
2745 | * split the path's leaf in two, making sure there is at least data_size | ||
2746 | * available for the resulting leaf level of the path. | ||
2747 | * | ||
2748 | * returns 0 if all went well and < 0 on failure. | ||
2749 | */ | ||
2750 | static noinline int copy_for_split(struct btrfs_trans_handle *trans, | ||
2751 | struct btrfs_root *root, | ||
2752 | struct btrfs_path *path, | ||
2753 | struct extent_buffer *l, | ||
2754 | struct extent_buffer *right, | ||
2755 | int slot, int mid, int nritems) | ||
2756 | { | ||
2757 | int data_copy_size; | ||
2758 | int rt_data_off; | ||
2759 | int i; | ||
2760 | int ret = 0; | ||
2761 | int wret; | ||
2762 | struct btrfs_disk_key disk_key; | ||
2763 | |||
2764 | nritems = nritems - mid; | ||
2765 | btrfs_set_header_nritems(right, nritems); | ||
2766 | data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l); | ||
2767 | |||
2768 | copy_extent_buffer(right, l, btrfs_item_nr_offset(0), | ||
2769 | btrfs_item_nr_offset(mid), | ||
2770 | nritems * sizeof(struct btrfs_item)); | ||
2771 | |||
2772 | copy_extent_buffer(right, l, | ||
2773 | btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - | ||
2774 | data_copy_size, btrfs_leaf_data(l) + | ||
2775 | leaf_data_end(root, l), data_copy_size); | ||
2776 | |||
2777 | rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - | ||
2778 | btrfs_item_end_nr(l, mid); | ||
2779 | |||
2780 | for (i = 0; i < nritems; i++) { | ||
2781 | struct btrfs_item *item = btrfs_item_nr(right, i); | ||
2782 | u32 ioff; | ||
2783 | |||
2784 | if (!right->map_token) { | ||
2785 | map_extent_buffer(right, (unsigned long)item, | ||
2786 | sizeof(struct btrfs_item), | ||
2787 | &right->map_token, &right->kaddr, | ||
2788 | &right->map_start, &right->map_len, | ||
2789 | KM_USER1); | ||
2790 | } | ||
2791 | |||
2792 | ioff = btrfs_item_offset(right, item); | ||
2793 | btrfs_set_item_offset(right, item, ioff + rt_data_off); | ||
2794 | } | ||
2795 | |||
2796 | if (right->map_token) { | ||
2797 | unmap_extent_buffer(right, right->map_token, KM_USER1); | ||
2798 | right->map_token = NULL; | ||
2799 | } | ||
2800 | |||
2801 | btrfs_set_header_nritems(l, mid); | ||
2802 | ret = 0; | ||
2803 | btrfs_item_key(right, &disk_key, 0); | ||
2804 | wret = insert_ptr(trans, root, path, &disk_key, right->start, | ||
2805 | path->slots[1] + 1, 1); | ||
2806 | if (wret) | ||
2807 | ret = wret; | ||
2808 | |||
2809 | btrfs_mark_buffer_dirty(right); | ||
2810 | btrfs_mark_buffer_dirty(l); | ||
2811 | BUG_ON(path->slots[0] != slot); | ||
2812 | |||
2813 | ret = btrfs_update_ref(trans, root, l, right, 0, nritems); | ||
2814 | BUG_ON(ret); | ||
2815 | |||
2816 | if (mid <= slot) { | ||
2817 | btrfs_tree_unlock(path->nodes[0]); | ||
2818 | free_extent_buffer(path->nodes[0]); | ||
2819 | path->nodes[0] = right; | ||
2820 | path->slots[0] -= mid; | ||
2821 | path->slots[1] += 1; | ||
2822 | } else { | ||
2823 | btrfs_tree_unlock(right); | ||
2824 | free_extent_buffer(right); | ||
2825 | } | ||
2826 | |||
2827 | BUG_ON(path->slots[0] < 0); | ||
2828 | |||
2829 | return ret; | ||
2830 | } | ||
2831 | |||
2832 | /* | ||
2758 | * split the path's leaf in two, making sure there is at least data_size | 2833 | * split the path's leaf in two, making sure there is at least data_size |
2759 | * available for the resulting leaf level of the path. | 2834 | * available for the resulting leaf level of the path. |
2760 | * | 2835 | * |
@@ -2771,17 +2846,14 @@ static noinline int split_leaf(struct btrfs_trans_handle *trans, | |||
2771 | int mid; | 2846 | int mid; |
2772 | int slot; | 2847 | int slot; |
2773 | struct extent_buffer *right; | 2848 | struct extent_buffer *right; |
2774 | int data_copy_size; | ||
2775 | int rt_data_off; | ||
2776 | int i; | ||
2777 | int ret = 0; | 2849 | int ret = 0; |
2778 | int wret; | 2850 | int wret; |
2779 | int double_split; | 2851 | int double_split; |
2780 | int num_doubles = 0; | 2852 | int num_doubles = 0; |
2781 | struct btrfs_disk_key disk_key; | ||
2782 | 2853 | ||
2783 | /* first try to make some room by pushing left and right */ | 2854 | /* first try to make some room by pushing left and right */ |
2784 | if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY) { | 2855 | if (data_size && ins_key->type != BTRFS_DIR_ITEM_KEY && |
2856 | !trans->transaction->delayed_refs.flushing) { | ||
2785 | wret = push_leaf_right(trans, root, path, data_size, 0); | 2857 | wret = push_leaf_right(trans, root, path, data_size, 0); |
2786 | if (wret < 0) | 2858 | if (wret < 0) |
2787 | return wret; | 2859 | return wret; |
@@ -2830,11 +2902,14 @@ again: | |||
2830 | write_extent_buffer(right, root->fs_info->chunk_tree_uuid, | 2902 | write_extent_buffer(right, root->fs_info->chunk_tree_uuid, |
2831 | (unsigned long)btrfs_header_chunk_tree_uuid(right), | 2903 | (unsigned long)btrfs_header_chunk_tree_uuid(right), |
2832 | BTRFS_UUID_SIZE); | 2904 | BTRFS_UUID_SIZE); |
2905 | |||
2833 | if (mid <= slot) { | 2906 | if (mid <= slot) { |
2834 | if (nritems == 1 || | 2907 | if (nritems == 1 || |
2835 | leaf_space_used(l, mid, nritems - mid) + data_size > | 2908 | leaf_space_used(l, mid, nritems - mid) + data_size > |
2836 | BTRFS_LEAF_DATA_SIZE(root)) { | 2909 | BTRFS_LEAF_DATA_SIZE(root)) { |
2837 | if (slot >= nritems) { | 2910 | if (slot >= nritems) { |
2911 | struct btrfs_disk_key disk_key; | ||
2912 | |||
2838 | btrfs_cpu_key_to_disk(&disk_key, ins_key); | 2913 | btrfs_cpu_key_to_disk(&disk_key, ins_key); |
2839 | btrfs_set_header_nritems(right, 0); | 2914 | btrfs_set_header_nritems(right, 0); |
2840 | wret = insert_ptr(trans, root, path, | 2915 | wret = insert_ptr(trans, root, path, |
@@ -2862,6 +2937,8 @@ again: | |||
2862 | if (leaf_space_used(l, 0, mid) + data_size > | 2937 | if (leaf_space_used(l, 0, mid) + data_size > |
2863 | BTRFS_LEAF_DATA_SIZE(root)) { | 2938 | BTRFS_LEAF_DATA_SIZE(root)) { |
2864 | if (!extend && data_size && slot == 0) { | 2939 | if (!extend && data_size && slot == 0) { |
2940 | struct btrfs_disk_key disk_key; | ||
2941 | |||
2865 | btrfs_cpu_key_to_disk(&disk_key, ins_key); | 2942 | btrfs_cpu_key_to_disk(&disk_key, ins_key); |
2866 | btrfs_set_header_nritems(right, 0); | 2943 | btrfs_set_header_nritems(right, 0); |
2867 | wret = insert_ptr(trans, root, path, | 2944 | wret = insert_ptr(trans, root, path, |
@@ -2894,76 +2971,16 @@ again: | |||
2894 | } | 2971 | } |
2895 | } | 2972 | } |
2896 | } | 2973 | } |
2897 | nritems = nritems - mid; | ||
2898 | btrfs_set_header_nritems(right, nritems); | ||
2899 | data_copy_size = btrfs_item_end_nr(l, mid) - leaf_data_end(root, l); | ||
2900 | |||
2901 | copy_extent_buffer(right, l, btrfs_item_nr_offset(0), | ||
2902 | btrfs_item_nr_offset(mid), | ||
2903 | nritems * sizeof(struct btrfs_item)); | ||
2904 | |||
2905 | copy_extent_buffer(right, l, | ||
2906 | btrfs_leaf_data(right) + BTRFS_LEAF_DATA_SIZE(root) - | ||
2907 | data_copy_size, btrfs_leaf_data(l) + | ||
2908 | leaf_data_end(root, l), data_copy_size); | ||
2909 | |||
2910 | rt_data_off = BTRFS_LEAF_DATA_SIZE(root) - | ||
2911 | btrfs_item_end_nr(l, mid); | ||
2912 | |||
2913 | for (i = 0; i < nritems; i++) { | ||
2914 | struct btrfs_item *item = btrfs_item_nr(right, i); | ||
2915 | u32 ioff; | ||
2916 | |||
2917 | if (!right->map_token) { | ||
2918 | map_extent_buffer(right, (unsigned long)item, | ||
2919 | sizeof(struct btrfs_item), | ||
2920 | &right->map_token, &right->kaddr, | ||
2921 | &right->map_start, &right->map_len, | ||
2922 | KM_USER1); | ||
2923 | } | ||
2924 | |||
2925 | ioff = btrfs_item_offset(right, item); | ||
2926 | btrfs_set_item_offset(right, item, ioff + rt_data_off); | ||
2927 | } | ||
2928 | |||
2929 | if (right->map_token) { | ||
2930 | unmap_extent_buffer(right, right->map_token, KM_USER1); | ||
2931 | right->map_token = NULL; | ||
2932 | } | ||
2933 | |||
2934 | btrfs_set_header_nritems(l, mid); | ||
2935 | ret = 0; | ||
2936 | btrfs_item_key(right, &disk_key, 0); | ||
2937 | wret = insert_ptr(trans, root, path, &disk_key, right->start, | ||
2938 | path->slots[1] + 1, 1); | ||
2939 | if (wret) | ||
2940 | ret = wret; | ||
2941 | |||
2942 | btrfs_mark_buffer_dirty(right); | ||
2943 | btrfs_mark_buffer_dirty(l); | ||
2944 | BUG_ON(path->slots[0] != slot); | ||
2945 | 2974 | ||
2946 | ret = btrfs_update_ref(trans, root, l, right, 0, nritems); | 2975 | ret = copy_for_split(trans, root, path, l, right, slot, mid, nritems); |
2947 | BUG_ON(ret); | 2976 | BUG_ON(ret); |
2948 | 2977 | ||
2949 | if (mid <= slot) { | ||
2950 | btrfs_tree_unlock(path->nodes[0]); | ||
2951 | free_extent_buffer(path->nodes[0]); | ||
2952 | path->nodes[0] = right; | ||
2953 | path->slots[0] -= mid; | ||
2954 | path->slots[1] += 1; | ||
2955 | } else { | ||
2956 | btrfs_tree_unlock(right); | ||
2957 | free_extent_buffer(right); | ||
2958 | } | ||
2959 | |||
2960 | BUG_ON(path->slots[0] < 0); | ||
2961 | |||
2962 | if (double_split) { | 2978 | if (double_split) { |
2963 | BUG_ON(num_doubles != 0); | 2979 | BUG_ON(num_doubles != 0); |
2964 | num_doubles++; | 2980 | num_doubles++; |
2965 | goto again; | 2981 | goto again; |
2966 | } | 2982 | } |
2983 | |||
2967 | return ret; | 2984 | return ret; |
2968 | } | 2985 | } |
2969 | 2986 | ||
@@ -3021,26 +3038,27 @@ int btrfs_split_item(struct btrfs_trans_handle *trans, | |||
3021 | return -EAGAIN; | 3038 | return -EAGAIN; |
3022 | } | 3039 | } |
3023 | 3040 | ||
3041 | btrfs_set_path_blocking(path); | ||
3024 | ret = split_leaf(trans, root, &orig_key, path, | 3042 | ret = split_leaf(trans, root, &orig_key, path, |
3025 | sizeof(struct btrfs_item), 1); | 3043 | sizeof(struct btrfs_item), 1); |
3026 | path->keep_locks = 0; | 3044 | path->keep_locks = 0; |
3027 | BUG_ON(ret); | 3045 | BUG_ON(ret); |
3028 | 3046 | ||
3047 | btrfs_unlock_up_safe(path, 1); | ||
3048 | leaf = path->nodes[0]; | ||
3049 | BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); | ||
3050 | |||
3051 | split: | ||
3029 | /* | 3052 | /* |
3030 | * make sure any changes to the path from split_leaf leave it | 3053 | * make sure any changes to the path from split_leaf leave it |
3031 | * in a blocking state | 3054 | * in a blocking state |
3032 | */ | 3055 | */ |
3033 | btrfs_set_path_blocking(path); | 3056 | btrfs_set_path_blocking(path); |
3034 | 3057 | ||
3035 | leaf = path->nodes[0]; | ||
3036 | BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item)); | ||
3037 | |||
3038 | split: | ||
3039 | item = btrfs_item_nr(leaf, path->slots[0]); | 3058 | item = btrfs_item_nr(leaf, path->slots[0]); |
3040 | orig_offset = btrfs_item_offset(leaf, item); | 3059 | orig_offset = btrfs_item_offset(leaf, item); |
3041 | item_size = btrfs_item_size(leaf, item); | 3060 | item_size = btrfs_item_size(leaf, item); |
3042 | 3061 | ||
3043 | |||
3044 | buf = kmalloc(item_size, GFP_NOFS); | 3062 | buf = kmalloc(item_size, GFP_NOFS); |
3045 | read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, | 3063 | read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf, |
3046 | path->slots[0]), item_size); | 3064 | path->slots[0]), item_size); |
@@ -3445,39 +3463,27 @@ out: | |||
3445 | } | 3463 | } |
3446 | 3464 | ||
3447 | /* | 3465 | /* |
3448 | * Given a key and some data, insert items into the tree. | 3466 | * this is a helper for btrfs_insert_empty_items, the main goal here is |
3449 | * This does all the path init required, making room in the tree if needed. | 3467 | * to save stack depth by doing the bulk of the work in a function |
3468 | * that doesn't call btrfs_search_slot | ||
3450 | */ | 3469 | */ |
3451 | int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, | 3470 | static noinline_for_stack int |
3452 | struct btrfs_root *root, | 3471 | setup_items_for_insert(struct btrfs_trans_handle *trans, |
3453 | struct btrfs_path *path, | 3472 | struct btrfs_root *root, struct btrfs_path *path, |
3454 | struct btrfs_key *cpu_key, u32 *data_size, | 3473 | struct btrfs_key *cpu_key, u32 *data_size, |
3455 | int nr) | 3474 | u32 total_data, u32 total_size, int nr) |
3456 | { | 3475 | { |
3457 | struct extent_buffer *leaf; | ||
3458 | struct btrfs_item *item; | 3476 | struct btrfs_item *item; |
3459 | int ret = 0; | ||
3460 | int slot; | ||
3461 | int slot_orig; | ||
3462 | int i; | 3477 | int i; |
3463 | u32 nritems; | 3478 | u32 nritems; |
3464 | u32 total_size = 0; | ||
3465 | u32 total_data = 0; | ||
3466 | unsigned int data_end; | 3479 | unsigned int data_end; |
3467 | struct btrfs_disk_key disk_key; | 3480 | struct btrfs_disk_key disk_key; |
3481 | int ret; | ||
3482 | struct extent_buffer *leaf; | ||
3483 | int slot; | ||
3468 | 3484 | ||
3469 | for (i = 0; i < nr; i++) | ||
3470 | total_data += data_size[i]; | ||
3471 | |||
3472 | total_size = total_data + (nr * sizeof(struct btrfs_item)); | ||
3473 | ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); | ||
3474 | if (ret == 0) | ||
3475 | return -EEXIST; | ||
3476 | if (ret < 0) | ||
3477 | goto out; | ||
3478 | |||
3479 | slot_orig = path->slots[0]; | ||
3480 | leaf = path->nodes[0]; | 3485 | leaf = path->nodes[0]; |
3486 | slot = path->slots[0]; | ||
3481 | 3487 | ||
3482 | nritems = btrfs_header_nritems(leaf); | 3488 | nritems = btrfs_header_nritems(leaf); |
3483 | data_end = leaf_data_end(root, leaf); | 3489 | data_end = leaf_data_end(root, leaf); |
@@ -3489,9 +3495,6 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, | |||
3489 | BUG(); | 3495 | BUG(); |
3490 | } | 3496 | } |
3491 | 3497 | ||
3492 | slot = path->slots[0]; | ||
3493 | BUG_ON(slot < 0); | ||
3494 | |||
3495 | if (slot != nritems) { | 3498 | if (slot != nritems) { |
3496 | unsigned int old_data = btrfs_item_end_nr(leaf, slot); | 3499 | unsigned int old_data = btrfs_item_end_nr(leaf, slot); |
3497 | 3500 | ||
@@ -3547,21 +3550,60 @@ int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, | |||
3547 | data_end -= data_size[i]; | 3550 | data_end -= data_size[i]; |
3548 | btrfs_set_item_size(leaf, item, data_size[i]); | 3551 | btrfs_set_item_size(leaf, item, data_size[i]); |
3549 | } | 3552 | } |
3553 | |||
3550 | btrfs_set_header_nritems(leaf, nritems + nr); | 3554 | btrfs_set_header_nritems(leaf, nritems + nr); |
3551 | btrfs_mark_buffer_dirty(leaf); | ||
3552 | 3555 | ||
3553 | ret = 0; | 3556 | ret = 0; |
3554 | if (slot == 0) { | 3557 | if (slot == 0) { |
3558 | struct btrfs_disk_key disk_key; | ||
3555 | btrfs_cpu_key_to_disk(&disk_key, cpu_key); | 3559 | btrfs_cpu_key_to_disk(&disk_key, cpu_key); |
3556 | ret = fixup_low_keys(trans, root, path, &disk_key, 1); | 3560 | ret = fixup_low_keys(trans, root, path, &disk_key, 1); |
3557 | } | 3561 | } |
3562 | btrfs_unlock_up_safe(path, 1); | ||
3563 | btrfs_mark_buffer_dirty(leaf); | ||
3558 | 3564 | ||
3559 | if (btrfs_leaf_free_space(root, leaf) < 0) { | 3565 | if (btrfs_leaf_free_space(root, leaf) < 0) { |
3560 | btrfs_print_leaf(root, leaf); | 3566 | btrfs_print_leaf(root, leaf); |
3561 | BUG(); | 3567 | BUG(); |
3562 | } | 3568 | } |
3569 | return ret; | ||
3570 | } | ||
3571 | |||
3572 | /* | ||
3573 | * Given a key and some data, insert items into the tree. | ||
3574 | * This does all the path init required, making room in the tree if needed. | ||
3575 | */ | ||
3576 | int btrfs_insert_empty_items(struct btrfs_trans_handle *trans, | ||
3577 | struct btrfs_root *root, | ||
3578 | struct btrfs_path *path, | ||
3579 | struct btrfs_key *cpu_key, u32 *data_size, | ||
3580 | int nr) | ||
3581 | { | ||
3582 | struct extent_buffer *leaf; | ||
3583 | int ret = 0; | ||
3584 | int slot; | ||
3585 | int i; | ||
3586 | u32 total_size = 0; | ||
3587 | u32 total_data = 0; | ||
3588 | |||
3589 | for (i = 0; i < nr; i++) | ||
3590 | total_data += data_size[i]; | ||
3591 | |||
3592 | total_size = total_data + (nr * sizeof(struct btrfs_item)); | ||
3593 | ret = btrfs_search_slot(trans, root, cpu_key, path, total_size, 1); | ||
3594 | if (ret == 0) | ||
3595 | return -EEXIST; | ||
3596 | if (ret < 0) | ||
3597 | goto out; | ||
3598 | |||
3599 | leaf = path->nodes[0]; | ||
3600 | slot = path->slots[0]; | ||
3601 | BUG_ON(slot < 0); | ||
3602 | |||
3603 | ret = setup_items_for_insert(trans, root, path, cpu_key, data_size, | ||
3604 | total_data, total_size, nr); | ||
3605 | |||
3563 | out: | 3606 | out: |
3564 | btrfs_unlock_up_safe(path, 1); | ||
3565 | return ret; | 3607 | return ret; |
3566 | } | 3608 | } |
3567 | 3609 | ||
@@ -3749,7 +3791,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
3749 | } | 3791 | } |
3750 | 3792 | ||
3751 | /* delete the leaf if it is mostly empty */ | 3793 | /* delete the leaf if it is mostly empty */ |
3752 | if (used < BTRFS_LEAF_DATA_SIZE(root) / 4) { | 3794 | if (used < BTRFS_LEAF_DATA_SIZE(root) / 4 && |
3795 | !trans->transaction->delayed_refs.flushing) { | ||
3753 | /* push_leaf_left fixes the path. | 3796 | /* push_leaf_left fixes the path. |
3754 | * make sure the path still points to our leaf | 3797 | * make sure the path still points to our leaf |
3755 | * for possible call to del_ptr below | 3798 | * for possible call to del_ptr below |
@@ -3757,6 +3800,7 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
3757 | slot = path->slots[1]; | 3800 | slot = path->slots[1]; |
3758 | extent_buffer_get(leaf); | 3801 | extent_buffer_get(leaf); |
3759 | 3802 | ||
3803 | btrfs_set_path_blocking(path); | ||
3760 | wret = push_leaf_left(trans, root, path, 1, 1); | 3804 | wret = push_leaf_left(trans, root, path, 1, 1); |
3761 | if (wret < 0 && wret != -ENOSPC) | 3805 | if (wret < 0 && wret != -ENOSPC) |
3762 | ret = wret; | 3806 | ret = wret; |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 7dd1b6d0bf32..9417713542a2 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -45,6 +45,13 @@ struct btrfs_ordered_sum; | |||
45 | 45 | ||
46 | #define BTRFS_MAX_LEVEL 8 | 46 | #define BTRFS_MAX_LEVEL 8 |
47 | 47 | ||
48 | /* | ||
49 | * files bigger than this get some pre-flushing when they are added | ||
50 | * to the ordered operations list. That way we limit the total | ||
51 | * work done by the commit | ||
52 | */ | ||
53 | #define BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT (8 * 1024 * 1024) | ||
54 | |||
48 | /* holds pointers to all of the tree roots */ | 55 | /* holds pointers to all of the tree roots */ |
49 | #define BTRFS_ROOT_TREE_OBJECTID 1ULL | 56 | #define BTRFS_ROOT_TREE_OBJECTID 1ULL |
50 | 57 | ||
@@ -401,15 +408,16 @@ struct btrfs_path { | |||
401 | int locks[BTRFS_MAX_LEVEL]; | 408 | int locks[BTRFS_MAX_LEVEL]; |
402 | int reada; | 409 | int reada; |
403 | /* keep some upper locks as we walk down */ | 410 | /* keep some upper locks as we walk down */ |
404 | int keep_locks; | ||
405 | int skip_locking; | ||
406 | int lowest_level; | 411 | int lowest_level; |
407 | 412 | ||
408 | /* | 413 | /* |
409 | * set by btrfs_split_item, tells search_slot to keep all locks | 414 | * set by btrfs_split_item, tells search_slot to keep all locks |
410 | * and to force calls to keep space in the nodes | 415 | * and to force calls to keep space in the nodes |
411 | */ | 416 | */ |
412 | int search_for_split; | 417 | unsigned int search_for_split:1; |
418 | unsigned int keep_locks:1; | ||
419 | unsigned int skip_locking:1; | ||
420 | unsigned int leave_spinning:1; | ||
413 | }; | 421 | }; |
414 | 422 | ||
415 | /* | 423 | /* |
@@ -688,15 +696,18 @@ struct btrfs_fs_info { | |||
688 | struct rb_root block_group_cache_tree; | 696 | struct rb_root block_group_cache_tree; |
689 | 697 | ||
690 | struct extent_io_tree pinned_extents; | 698 | struct extent_io_tree pinned_extents; |
691 | struct extent_io_tree pending_del; | ||
692 | struct extent_io_tree extent_ins; | ||
693 | 699 | ||
694 | /* logical->physical extent mapping */ | 700 | /* logical->physical extent mapping */ |
695 | struct btrfs_mapping_tree mapping_tree; | 701 | struct btrfs_mapping_tree mapping_tree; |
696 | 702 | ||
697 | u64 generation; | 703 | u64 generation; |
698 | u64 last_trans_committed; | 704 | u64 last_trans_committed; |
699 | u64 last_trans_new_blockgroup; | 705 | |
706 | /* | ||
707 | * this is updated to the current trans every time a full commit | ||
708 | * is required instead of the faster short fsync log commits | ||
709 | */ | ||
710 | u64 last_trans_log_full_commit; | ||
700 | u64 open_ioctl_trans; | 711 | u64 open_ioctl_trans; |
701 | unsigned long mount_opt; | 712 | unsigned long mount_opt; |
702 | u64 max_extent; | 713 | u64 max_extent; |
@@ -717,12 +728,21 @@ struct btrfs_fs_info { | |||
717 | struct mutex tree_log_mutex; | 728 | struct mutex tree_log_mutex; |
718 | struct mutex transaction_kthread_mutex; | 729 | struct mutex transaction_kthread_mutex; |
719 | struct mutex cleaner_mutex; | 730 | struct mutex cleaner_mutex; |
720 | struct mutex extent_ins_mutex; | ||
721 | struct mutex pinned_mutex; | 731 | struct mutex pinned_mutex; |
722 | struct mutex chunk_mutex; | 732 | struct mutex chunk_mutex; |
723 | struct mutex drop_mutex; | 733 | struct mutex drop_mutex; |
724 | struct mutex volume_mutex; | 734 | struct mutex volume_mutex; |
725 | struct mutex tree_reloc_mutex; | 735 | struct mutex tree_reloc_mutex; |
736 | |||
737 | /* | ||
738 | * this protects the ordered operations list only while we are | ||
739 | * processing all of the entries on it. This way we make | ||
740 | * sure the commit code doesn't find the list temporarily empty | ||
741 | * because another function happens to be doing non-waiting preflush | ||
742 | * before jumping into the main commit. | ||
743 | */ | ||
744 | struct mutex ordered_operations_mutex; | ||
745 | |||
726 | struct list_head trans_list; | 746 | struct list_head trans_list; |
727 | struct list_head hashers; | 747 | struct list_head hashers; |
728 | struct list_head dead_roots; | 748 | struct list_head dead_roots; |
@@ -737,10 +757,29 @@ struct btrfs_fs_info { | |||
737 | * ordered extents | 757 | * ordered extents |
738 | */ | 758 | */ |
739 | spinlock_t ordered_extent_lock; | 759 | spinlock_t ordered_extent_lock; |
760 | |||
761 | /* | ||
762 | * all of the data=ordered extents pending writeback | ||
763 | * these can span multiple transactions and basically include | ||
764 | * every dirty data page that isn't from nodatacow | ||
765 | */ | ||
740 | struct list_head ordered_extents; | 766 | struct list_head ordered_extents; |
767 | |||
768 | /* | ||
769 | * all of the inodes that have delalloc bytes. It is possible for | ||
770 | * this list to be empty even when there is still dirty data=ordered | ||
771 | * extents waiting to finish IO. | ||
772 | */ | ||
741 | struct list_head delalloc_inodes; | 773 | struct list_head delalloc_inodes; |
742 | 774 | ||
743 | /* | 775 | /* |
776 | * special rename and truncate targets that must be on disk before | ||
777 | * we're allowed to commit. This is basically the ext3 style | ||
778 | * data=ordered list. | ||
779 | */ | ||
780 | struct list_head ordered_operations; | ||
781 | |||
782 | /* | ||
744 | * there is a pool of worker threads for checksumming during writes | 783 | * there is a pool of worker threads for checksumming during writes |
745 | * and a pool for checksumming after reads. This is because readers | 784 | * and a pool for checksumming after reads. This is because readers |
746 | * can run with FS locks held, and the writers may be waiting for | 785 | * can run with FS locks held, and the writers may be waiting for |
@@ -781,6 +820,11 @@ struct btrfs_fs_info { | |||
781 | atomic_t throttle_gen; | 820 | atomic_t throttle_gen; |
782 | 821 | ||
783 | u64 total_pinned; | 822 | u64 total_pinned; |
823 | |||
824 | /* protected by the delalloc lock, used to keep from writing | ||
825 | * metadata until there is a nice batch | ||
826 | */ | ||
827 | u64 dirty_metadata_bytes; | ||
784 | struct list_head dirty_cowonly_roots; | 828 | struct list_head dirty_cowonly_roots; |
785 | 829 | ||
786 | struct btrfs_fs_devices *fs_devices; | 830 | struct btrfs_fs_devices *fs_devices; |
@@ -1704,18 +1748,15 @@ static inline struct dentry *fdentry(struct file *file) | |||
1704 | } | 1748 | } |
1705 | 1749 | ||
1706 | /* extent-tree.c */ | 1750 | /* extent-tree.c */ |
1751 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | ||
1752 | struct btrfs_root *root, unsigned long count); | ||
1707 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); | 1753 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); |
1708 | int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, | ||
1709 | struct btrfs_root *root, u64 bytenr, | ||
1710 | u64 num_bytes, u32 *refs); | ||
1711 | int btrfs_update_pinned_extents(struct btrfs_root *root, | 1754 | int btrfs_update_pinned_extents(struct btrfs_root *root, |
1712 | u64 bytenr, u64 num, int pin); | 1755 | u64 bytenr, u64 num, int pin); |
1713 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | 1756 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, |
1714 | struct btrfs_root *root, struct extent_buffer *leaf); | 1757 | struct btrfs_root *root, struct extent_buffer *leaf); |
1715 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, | 1758 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, |
1716 | struct btrfs_root *root, u64 objectid, u64 bytenr); | 1759 | struct btrfs_root *root, u64 objectid, u64 bytenr); |
1717 | int btrfs_extent_post_op(struct btrfs_trans_handle *trans, | ||
1718 | struct btrfs_root *root); | ||
1719 | int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); | 1760 | int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); |
1720 | struct btrfs_block_group_cache *btrfs_lookup_block_group( | 1761 | struct btrfs_block_group_cache *btrfs_lookup_block_group( |
1721 | struct btrfs_fs_info *info, | 1762 | struct btrfs_fs_info *info, |
@@ -1777,7 +1818,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
1777 | u64 root_objectid, u64 ref_generation, | 1818 | u64 root_objectid, u64 ref_generation, |
1778 | u64 owner_objectid); | 1819 | u64 owner_objectid); |
1779 | int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, | 1820 | int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, |
1780 | struct btrfs_root *root, u64 bytenr, | 1821 | struct btrfs_root *root, u64 bytenr, u64 num_bytes, |
1781 | u64 orig_parent, u64 parent, | 1822 | u64 orig_parent, u64 parent, |
1782 | u64 root_objectid, u64 ref_generation, | 1823 | u64 root_objectid, u64 ref_generation, |
1783 | u64 owner_objectid); | 1824 | u64 owner_objectid); |
@@ -1838,7 +1879,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, | |||
1838 | int btrfs_cow_block(struct btrfs_trans_handle *trans, | 1879 | int btrfs_cow_block(struct btrfs_trans_handle *trans, |
1839 | struct btrfs_root *root, struct extent_buffer *buf, | 1880 | struct btrfs_root *root, struct extent_buffer *buf, |
1840 | struct extent_buffer *parent, int parent_slot, | 1881 | struct extent_buffer *parent, int parent_slot, |
1841 | struct extent_buffer **cow_ret, u64 prealloc_dest); | 1882 | struct extent_buffer **cow_ret); |
1842 | int btrfs_copy_root(struct btrfs_trans_handle *trans, | 1883 | int btrfs_copy_root(struct btrfs_trans_handle *trans, |
1843 | struct btrfs_root *root, | 1884 | struct btrfs_root *root, |
1844 | struct extent_buffer *buf, | 1885 | struct extent_buffer *buf, |
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c new file mode 100644 index 000000000000..cbf7dc8ae3ec --- /dev/null +++ b/fs/btrfs/delayed-ref.c | |||
@@ -0,0 +1,669 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2009 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/sched.h> | ||
20 | #include <linux/sort.h> | ||
21 | #include <linux/ftrace.h> | ||
22 | #include "ctree.h" | ||
23 | #include "delayed-ref.h" | ||
24 | #include "transaction.h" | ||
25 | |||
26 | /* | ||
27 | * delayed back reference update tracking. For subvolume trees | ||
28 | * we queue up extent allocations and backref maintenance for | ||
29 | * delayed processing. This avoids deep call chains where we | ||
30 | * add extents in the middle of btrfs_search_slot, and it allows | ||
31 | * us to buffer up frequently modified backrefs in an rb tree instead | ||
32 | * of hammering updates on the extent allocation tree. | ||
33 | * | ||
34 | * Right now this code is only used for reference counted trees, but | ||
35 | * the long term goal is to get rid of the similar code for delayed | ||
36 | * extent tree modifications. | ||
37 | */ | ||
38 | |||
39 | /* | ||
40 | * entries in the rb tree are ordered by the byte number of the extent | ||
41 | * and by the byte number of the parent block. | ||
42 | */ | ||
43 | static int comp_entry(struct btrfs_delayed_ref_node *ref, | ||
44 | u64 bytenr, u64 parent) | ||
45 | { | ||
46 | if (bytenr < ref->bytenr) | ||
47 | return -1; | ||
48 | if (bytenr > ref->bytenr) | ||
49 | return 1; | ||
50 | if (parent < ref->parent) | ||
51 | return -1; | ||
52 | if (parent > ref->parent) | ||
53 | return 1; | ||
54 | return 0; | ||
55 | } | ||
56 | |||
57 | /* | ||
58 | * insert a new ref into the rbtree. This returns any existing refs | ||
59 | * for the same (bytenr,parent) tuple, or NULL if the new node was properly | ||
60 | * inserted. | ||
61 | */ | ||
62 | static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root, | ||
63 | u64 bytenr, u64 parent, | ||
64 | struct rb_node *node) | ||
65 | { | ||
66 | struct rb_node **p = &root->rb_node; | ||
67 | struct rb_node *parent_node = NULL; | ||
68 | struct btrfs_delayed_ref_node *entry; | ||
69 | int cmp; | ||
70 | |||
71 | while (*p) { | ||
72 | parent_node = *p; | ||
73 | entry = rb_entry(parent_node, struct btrfs_delayed_ref_node, | ||
74 | rb_node); | ||
75 | |||
76 | cmp = comp_entry(entry, bytenr, parent); | ||
77 | if (cmp < 0) | ||
78 | p = &(*p)->rb_left; | ||
79 | else if (cmp > 0) | ||
80 | p = &(*p)->rb_right; | ||
81 | else | ||
82 | return entry; | ||
83 | } | ||
84 | |||
85 | entry = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); | ||
86 | rb_link_node(node, parent_node, p); | ||
87 | rb_insert_color(node, root); | ||
88 | return NULL; | ||
89 | } | ||
90 | |||
91 | /* | ||
92 | * find an entry based on (bytenr,parent). This returns the delayed | ||
93 | * ref if it was able to find one, or NULL if nothing was in that spot | ||
94 | */ | ||
95 | static struct btrfs_delayed_ref_node *tree_search(struct rb_root *root, | ||
96 | u64 bytenr, u64 parent, | ||
97 | struct btrfs_delayed_ref_node **last) | ||
98 | { | ||
99 | struct rb_node *n = root->rb_node; | ||
100 | struct btrfs_delayed_ref_node *entry; | ||
101 | int cmp; | ||
102 | |||
103 | while (n) { | ||
104 | entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); | ||
105 | WARN_ON(!entry->in_tree); | ||
106 | if (last) | ||
107 | *last = entry; | ||
108 | |||
109 | cmp = comp_entry(entry, bytenr, parent); | ||
110 | if (cmp < 0) | ||
111 | n = n->rb_left; | ||
112 | else if (cmp > 0) | ||
113 | n = n->rb_right; | ||
114 | else | ||
115 | return entry; | ||
116 | } | ||
117 | return NULL; | ||
118 | } | ||
119 | |||
120 | int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, | ||
121 | struct btrfs_delayed_ref_head *head) | ||
122 | { | ||
123 | struct btrfs_delayed_ref_root *delayed_refs; | ||
124 | |||
125 | delayed_refs = &trans->transaction->delayed_refs; | ||
126 | assert_spin_locked(&delayed_refs->lock); | ||
127 | if (mutex_trylock(&head->mutex)) | ||
128 | return 0; | ||
129 | |||
130 | atomic_inc(&head->node.refs); | ||
131 | spin_unlock(&delayed_refs->lock); | ||
132 | |||
133 | mutex_lock(&head->mutex); | ||
134 | spin_lock(&delayed_refs->lock); | ||
135 | if (!head->node.in_tree) { | ||
136 | mutex_unlock(&head->mutex); | ||
137 | btrfs_put_delayed_ref(&head->node); | ||
138 | return -EAGAIN; | ||
139 | } | ||
140 | btrfs_put_delayed_ref(&head->node); | ||
141 | return 0; | ||
142 | } | ||
143 | |||
144 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, | ||
145 | struct list_head *cluster, u64 start) | ||
146 | { | ||
147 | int count = 0; | ||
148 | struct btrfs_delayed_ref_root *delayed_refs; | ||
149 | struct rb_node *node; | ||
150 | struct btrfs_delayed_ref_node *ref; | ||
151 | struct btrfs_delayed_ref_head *head; | ||
152 | |||
153 | delayed_refs = &trans->transaction->delayed_refs; | ||
154 | if (start == 0) { | ||
155 | node = rb_first(&delayed_refs->root); | ||
156 | } else { | ||
157 | ref = NULL; | ||
158 | tree_search(&delayed_refs->root, start, (u64)-1, &ref); | ||
159 | if (ref) { | ||
160 | struct btrfs_delayed_ref_node *tmp; | ||
161 | |||
162 | node = rb_prev(&ref->rb_node); | ||
163 | while (node) { | ||
164 | tmp = rb_entry(node, | ||
165 | struct btrfs_delayed_ref_node, | ||
166 | rb_node); | ||
167 | if (tmp->bytenr < start) | ||
168 | break; | ||
169 | ref = tmp; | ||
170 | node = rb_prev(&ref->rb_node); | ||
171 | } | ||
172 | node = &ref->rb_node; | ||
173 | } else | ||
174 | node = rb_first(&delayed_refs->root); | ||
175 | } | ||
176 | again: | ||
177 | while (node && count < 32) { | ||
178 | ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); | ||
179 | if (btrfs_delayed_ref_is_head(ref)) { | ||
180 | head = btrfs_delayed_node_to_head(ref); | ||
181 | if (list_empty(&head->cluster)) { | ||
182 | list_add_tail(&head->cluster, cluster); | ||
183 | delayed_refs->run_delayed_start = | ||
184 | head->node.bytenr; | ||
185 | count++; | ||
186 | |||
187 | WARN_ON(delayed_refs->num_heads_ready == 0); | ||
188 | delayed_refs->num_heads_ready--; | ||
189 | } else if (count) { | ||
190 | /* the goal of the clustering is to find extents | ||
191 | * that are likely to end up in the same extent | ||
192 | * leaf on disk. So, we don't want them spread | ||
193 | * all over the tree. Stop now if we've hit | ||
194 | * a head that was already in use | ||
195 | */ | ||
196 | break; | ||
197 | } | ||
198 | } | ||
199 | node = rb_next(node); | ||
200 | } | ||
201 | if (count) { | ||
202 | return 0; | ||
203 | } else if (start) { | ||
204 | /* | ||
205 | * we've gone to the end of the rbtree without finding any | ||
206 | * clusters. start from the beginning and try again | ||
207 | */ | ||
208 | start = 0; | ||
209 | node = rb_first(&delayed_refs->root); | ||
210 | goto again; | ||
211 | } | ||
212 | return 1; | ||
213 | } | ||
214 | |||
215 | /* | ||
216 | * This checks to see if there are any delayed refs in the | ||
217 | * btree for a given bytenr. It returns one if it finds any | ||
218 | * and zero otherwise. | ||
219 | * | ||
220 | * If it only finds a head node, it returns 0. | ||
221 | * | ||
222 | * The idea is to use this when deciding if you can safely delete an | ||
223 | * extent from the extent allocation tree. There may be a pending | ||
224 | * ref in the rbtree that adds or removes references, so as long as this | ||
225 | * returns one you need to leave the BTRFS_EXTENT_ITEM in the extent | ||
226 | * allocation tree. | ||
227 | */ | ||
228 | int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr) | ||
229 | { | ||
230 | struct btrfs_delayed_ref_node *ref; | ||
231 | struct btrfs_delayed_ref_root *delayed_refs; | ||
232 | struct rb_node *prev_node; | ||
233 | int ret = 0; | ||
234 | |||
235 | delayed_refs = &trans->transaction->delayed_refs; | ||
236 | spin_lock(&delayed_refs->lock); | ||
237 | |||
238 | ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL); | ||
239 | if (ref) { | ||
240 | prev_node = rb_prev(&ref->rb_node); | ||
241 | if (!prev_node) | ||
242 | goto out; | ||
243 | ref = rb_entry(prev_node, struct btrfs_delayed_ref_node, | ||
244 | rb_node); | ||
245 | if (ref->bytenr == bytenr) | ||
246 | ret = 1; | ||
247 | } | ||
248 | out: | ||
249 | spin_unlock(&delayed_refs->lock); | ||
250 | return ret; | ||
251 | } | ||
252 | |||
253 | /* | ||
254 | * helper function to lookup reference count | ||
255 | * | ||
256 | * the head node for delayed ref is used to store the sum of all the | ||
257 | * reference count modifications queued up in the rbtree. This way you | ||
258 | * can check to see what the reference count would be if all of the | ||
259 | * delayed refs are processed. | ||
260 | */ | ||
261 | int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, | ||
262 | struct btrfs_root *root, u64 bytenr, | ||
263 | u64 num_bytes, u32 *refs) | ||
264 | { | ||
265 | struct btrfs_delayed_ref_node *ref; | ||
266 | struct btrfs_delayed_ref_head *head; | ||
267 | struct btrfs_delayed_ref_root *delayed_refs; | ||
268 | struct btrfs_path *path; | ||
269 | struct extent_buffer *leaf; | ||
270 | struct btrfs_extent_item *ei; | ||
271 | struct btrfs_key key; | ||
272 | u32 num_refs; | ||
273 | int ret; | ||
274 | |||
275 | path = btrfs_alloc_path(); | ||
276 | if (!path) | ||
277 | return -ENOMEM; | ||
278 | |||
279 | key.objectid = bytenr; | ||
280 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
281 | key.offset = num_bytes; | ||
282 | delayed_refs = &trans->transaction->delayed_refs; | ||
283 | again: | ||
284 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, | ||
285 | &key, path, 0, 0); | ||
286 | if (ret < 0) | ||
287 | goto out; | ||
288 | |||
289 | if (ret == 0) { | ||
290 | leaf = path->nodes[0]; | ||
291 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
292 | struct btrfs_extent_item); | ||
293 | num_refs = btrfs_extent_refs(leaf, ei); | ||
294 | } else { | ||
295 | num_refs = 0; | ||
296 | ret = 0; | ||
297 | } | ||
298 | |||
299 | spin_lock(&delayed_refs->lock); | ||
300 | ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL); | ||
301 | if (ref) { | ||
302 | head = btrfs_delayed_node_to_head(ref); | ||
303 | if (mutex_trylock(&head->mutex)) { | ||
304 | num_refs += ref->ref_mod; | ||
305 | mutex_unlock(&head->mutex); | ||
306 | *refs = num_refs; | ||
307 | goto out; | ||
308 | } | ||
309 | |||
310 | atomic_inc(&ref->refs); | ||
311 | spin_unlock(&delayed_refs->lock); | ||
312 | |||
313 | btrfs_release_path(root->fs_info->extent_root, path); | ||
314 | |||
315 | mutex_lock(&head->mutex); | ||
316 | mutex_unlock(&head->mutex); | ||
317 | btrfs_put_delayed_ref(ref); | ||
318 | goto again; | ||
319 | } else { | ||
320 | *refs = num_refs; | ||
321 | } | ||
322 | out: | ||
323 | spin_unlock(&delayed_refs->lock); | ||
324 | btrfs_free_path(path); | ||
325 | return ret; | ||
326 | } | ||
327 | |||
328 | /* | ||
329 | * helper function to update an extent delayed ref in the | ||
330 | * rbtree. existing and update must both have the same | ||
331 | * bytenr and parent | ||
332 | * | ||
333 | * This may free existing if the update cancels out whatever | ||
334 | * operation it was doing. | ||
335 | */ | ||
336 | static noinline void | ||
337 | update_existing_ref(struct btrfs_trans_handle *trans, | ||
338 | struct btrfs_delayed_ref_root *delayed_refs, | ||
339 | struct btrfs_delayed_ref_node *existing, | ||
340 | struct btrfs_delayed_ref_node *update) | ||
341 | { | ||
342 | struct btrfs_delayed_ref *existing_ref; | ||
343 | struct btrfs_delayed_ref *ref; | ||
344 | |||
345 | existing_ref = btrfs_delayed_node_to_ref(existing); | ||
346 | ref = btrfs_delayed_node_to_ref(update); | ||
347 | |||
348 | if (ref->pin) | ||
349 | existing_ref->pin = 1; | ||
350 | |||
351 | if (ref->action != existing_ref->action) { | ||
352 | /* | ||
353 | * this is effectively undoing either an add or a | ||
354 | * drop. We decrement the ref_mod, and if it goes | ||
355 | * down to zero we just delete the entry without | ||
356 | * every changing the extent allocation tree. | ||
357 | */ | ||
358 | existing->ref_mod--; | ||
359 | if (existing->ref_mod == 0) { | ||
360 | rb_erase(&existing->rb_node, | ||
361 | &delayed_refs->root); | ||
362 | existing->in_tree = 0; | ||
363 | btrfs_put_delayed_ref(existing); | ||
364 | delayed_refs->num_entries--; | ||
365 | if (trans->delayed_ref_updates) | ||
366 | trans->delayed_ref_updates--; | ||
367 | } | ||
368 | } else { | ||
369 | if (existing_ref->action == BTRFS_ADD_DELAYED_REF) { | ||
370 | /* if we're adding refs, make sure all the | ||
371 | * details match up. The extent could | ||
372 | * have been totally freed and reallocated | ||
373 | * by a different owner before the delayed | ||
374 | * ref entries were removed. | ||
375 | */ | ||
376 | existing_ref->owner_objectid = ref->owner_objectid; | ||
377 | existing_ref->generation = ref->generation; | ||
378 | existing_ref->root = ref->root; | ||
379 | existing->num_bytes = update->num_bytes; | ||
380 | } | ||
381 | /* | ||
382 | * the action on the existing ref matches | ||
383 | * the action on the ref we're trying to add. | ||
384 | * Bump the ref_mod by one so the backref that | ||
385 | * is eventually added/removed has the correct | ||
386 | * reference count | ||
387 | */ | ||
388 | existing->ref_mod += update->ref_mod; | ||
389 | } | ||
390 | } | ||
391 | |||
392 | /* | ||
393 | * helper function to update the accounting in the head ref | ||
394 | * existing and update must have the same bytenr | ||
395 | */ | ||
396 | static noinline void | ||
397 | update_existing_head_ref(struct btrfs_delayed_ref_node *existing, | ||
398 | struct btrfs_delayed_ref_node *update) | ||
399 | { | ||
400 | struct btrfs_delayed_ref_head *existing_ref; | ||
401 | struct btrfs_delayed_ref_head *ref; | ||
402 | |||
403 | existing_ref = btrfs_delayed_node_to_head(existing); | ||
404 | ref = btrfs_delayed_node_to_head(update); | ||
405 | |||
406 | if (ref->must_insert_reserved) { | ||
407 | /* if the extent was freed and then | ||
408 | * reallocated before the delayed ref | ||
409 | * entries were processed, we can end up | ||
410 | * with an existing head ref without | ||
411 | * the must_insert_reserved flag set. | ||
412 | * Set it again here | ||
413 | */ | ||
414 | existing_ref->must_insert_reserved = ref->must_insert_reserved; | ||
415 | |||
416 | /* | ||
417 | * update the num_bytes so we make sure the accounting | ||
418 | * is done correctly | ||
419 | */ | ||
420 | existing->num_bytes = update->num_bytes; | ||
421 | |||
422 | } | ||
423 | |||
424 | /* | ||
425 | * update the reference mod on the head to reflect this new operation | ||
426 | */ | ||
427 | existing->ref_mod += update->ref_mod; | ||
428 | } | ||
429 | |||
430 | /* | ||
431 | * helper function to actually insert a delayed ref into the rbtree. | ||
432 | * this does all the dirty work in terms of maintaining the correct | ||
433 | * overall modification count in the head node and properly dealing | ||
434 | * with updating existing nodes as new modifications are queued. | ||
435 | */ | ||
436 | static noinline int __btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, | ||
437 | struct btrfs_delayed_ref_node *ref, | ||
438 | u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, | ||
439 | u64 ref_generation, u64 owner_objectid, int action, | ||
440 | int pin) | ||
441 | { | ||
442 | struct btrfs_delayed_ref_node *existing; | ||
443 | struct btrfs_delayed_ref *full_ref; | ||
444 | struct btrfs_delayed_ref_head *head_ref = NULL; | ||
445 | struct btrfs_delayed_ref_root *delayed_refs; | ||
446 | int count_mod = 1; | ||
447 | int must_insert_reserved = 0; | ||
448 | |||
449 | /* | ||
450 | * the head node stores the sum of all the mods, so dropping a ref | ||
451 | * should drop the sum in the head node by one. | ||
452 | */ | ||
453 | if (parent == (u64)-1) { | ||
454 | if (action == BTRFS_DROP_DELAYED_REF) | ||
455 | count_mod = -1; | ||
456 | else if (action == BTRFS_UPDATE_DELAYED_HEAD) | ||
457 | count_mod = 0; | ||
458 | } | ||
459 | |||
460 | /* | ||
461 | * BTRFS_ADD_DELAYED_EXTENT means that we need to update | ||
462 | * the reserved accounting when the extent is finally added, or | ||
463 | * if a later modification deletes the delayed ref without ever | ||
464 | * inserting the extent into the extent allocation tree. | ||
465 | * ref->must_insert_reserved is the flag used to record | ||
466 | * that accounting mods are required. | ||
467 | * | ||
468 | * Once we record must_insert_reserved, switch the action to | ||
469 | * BTRFS_ADD_DELAYED_REF because other special casing is not required. | ||
470 | */ | ||
471 | if (action == BTRFS_ADD_DELAYED_EXTENT) { | ||
472 | must_insert_reserved = 1; | ||
473 | action = BTRFS_ADD_DELAYED_REF; | ||
474 | } else { | ||
475 | must_insert_reserved = 0; | ||
476 | } | ||
477 | |||
478 | |||
479 | delayed_refs = &trans->transaction->delayed_refs; | ||
480 | |||
481 | /* first set the basic ref node struct up */ | ||
482 | atomic_set(&ref->refs, 1); | ||
483 | ref->bytenr = bytenr; | ||
484 | ref->parent = parent; | ||
485 | ref->ref_mod = count_mod; | ||
486 | ref->in_tree = 1; | ||
487 | ref->num_bytes = num_bytes; | ||
488 | |||
489 | if (btrfs_delayed_ref_is_head(ref)) { | ||
490 | head_ref = btrfs_delayed_node_to_head(ref); | ||
491 | head_ref->must_insert_reserved = must_insert_reserved; | ||
492 | INIT_LIST_HEAD(&head_ref->cluster); | ||
493 | mutex_init(&head_ref->mutex); | ||
494 | } else { | ||
495 | full_ref = btrfs_delayed_node_to_ref(ref); | ||
496 | full_ref->root = ref_root; | ||
497 | full_ref->generation = ref_generation; | ||
498 | full_ref->owner_objectid = owner_objectid; | ||
499 | full_ref->pin = pin; | ||
500 | full_ref->action = action; | ||
501 | } | ||
502 | |||
503 | existing = tree_insert(&delayed_refs->root, bytenr, | ||
504 | parent, &ref->rb_node); | ||
505 | |||
506 | if (existing) { | ||
507 | if (btrfs_delayed_ref_is_head(ref)) | ||
508 | update_existing_head_ref(existing, ref); | ||
509 | else | ||
510 | update_existing_ref(trans, delayed_refs, existing, ref); | ||
511 | |||
512 | /* | ||
513 | * we've updated the existing ref, free the newly | ||
514 | * allocated ref | ||
515 | */ | ||
516 | kfree(ref); | ||
517 | } else { | ||
518 | if (btrfs_delayed_ref_is_head(ref)) { | ||
519 | delayed_refs->num_heads++; | ||
520 | delayed_refs->num_heads_ready++; | ||
521 | } | ||
522 | delayed_refs->num_entries++; | ||
523 | trans->delayed_ref_updates++; | ||
524 | } | ||
525 | return 0; | ||
526 | } | ||
527 | |||
528 | /* | ||
529 | * add a delayed ref to the tree. This does all of the accounting required | ||
530 | * to make sure the delayed ref is eventually processed before this | ||
531 | * transaction commits. | ||
532 | */ | ||
533 | int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, | ||
534 | u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, | ||
535 | u64 ref_generation, u64 owner_objectid, int action, | ||
536 | int pin) | ||
537 | { | ||
538 | struct btrfs_delayed_ref *ref; | ||
539 | struct btrfs_delayed_ref_head *head_ref; | ||
540 | struct btrfs_delayed_ref_root *delayed_refs; | ||
541 | int ret; | ||
542 | |||
543 | ref = kmalloc(sizeof(*ref), GFP_NOFS); | ||
544 | if (!ref) | ||
545 | return -ENOMEM; | ||
546 | |||
547 | /* | ||
548 | * the parent = 0 case comes from cases where we don't actually | ||
549 | * know the parent yet. It will get updated later via a add/drop | ||
550 | * pair. | ||
551 | */ | ||
552 | if (parent == 0) | ||
553 | parent = bytenr; | ||
554 | |||
555 | head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); | ||
556 | if (!head_ref) { | ||
557 | kfree(ref); | ||
558 | return -ENOMEM; | ||
559 | } | ||
560 | delayed_refs = &trans->transaction->delayed_refs; | ||
561 | spin_lock(&delayed_refs->lock); | ||
562 | |||
563 | /* | ||
564 | * insert both the head node and the new ref without dropping | ||
565 | * the spin lock | ||
566 | */ | ||
567 | ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes, | ||
568 | (u64)-1, 0, 0, 0, action, pin); | ||
569 | BUG_ON(ret); | ||
570 | |||
571 | ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes, | ||
572 | parent, ref_root, ref_generation, | ||
573 | owner_objectid, action, pin); | ||
574 | BUG_ON(ret); | ||
575 | spin_unlock(&delayed_refs->lock); | ||
576 | return 0; | ||
577 | } | ||
578 | |||
579 | /* | ||
580 | * this does a simple search for the head node for a given extent. | ||
581 | * It must be called with the delayed ref spinlock held, and it returns | ||
582 | * the head node if any where found, or NULL if not. | ||
583 | */ | ||
584 | struct btrfs_delayed_ref_head * | ||
585 | btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr) | ||
586 | { | ||
587 | struct btrfs_delayed_ref_node *ref; | ||
588 | struct btrfs_delayed_ref_root *delayed_refs; | ||
589 | |||
590 | delayed_refs = &trans->transaction->delayed_refs; | ||
591 | ref = tree_search(&delayed_refs->root, bytenr, (u64)-1, NULL); | ||
592 | if (ref) | ||
593 | return btrfs_delayed_node_to_head(ref); | ||
594 | return NULL; | ||
595 | } | ||
596 | |||
597 | /* | ||
598 | * add a delayed ref to the tree. This does all of the accounting required | ||
599 | * to make sure the delayed ref is eventually processed before this | ||
600 | * transaction commits. | ||
601 | * | ||
602 | * The main point of this call is to add and remove a backreference in a single | ||
603 | * shot, taking the lock only once, and only searching for the head node once. | ||
604 | * | ||
605 | * It is the same as doing a ref add and delete in two separate calls. | ||
606 | */ | ||
607 | int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, | ||
608 | u64 bytenr, u64 num_bytes, u64 orig_parent, | ||
609 | u64 parent, u64 orig_ref_root, u64 ref_root, | ||
610 | u64 orig_ref_generation, u64 ref_generation, | ||
611 | u64 owner_objectid, int pin) | ||
612 | { | ||
613 | struct btrfs_delayed_ref *ref; | ||
614 | struct btrfs_delayed_ref *old_ref; | ||
615 | struct btrfs_delayed_ref_head *head_ref; | ||
616 | struct btrfs_delayed_ref_root *delayed_refs; | ||
617 | int ret; | ||
618 | |||
619 | ref = kmalloc(sizeof(*ref), GFP_NOFS); | ||
620 | if (!ref) | ||
621 | return -ENOMEM; | ||
622 | |||
623 | old_ref = kmalloc(sizeof(*old_ref), GFP_NOFS); | ||
624 | if (!old_ref) { | ||
625 | kfree(ref); | ||
626 | return -ENOMEM; | ||
627 | } | ||
628 | |||
629 | /* | ||
630 | * the parent = 0 case comes from cases where we don't actually | ||
631 | * know the parent yet. It will get updated later via a add/drop | ||
632 | * pair. | ||
633 | */ | ||
634 | if (parent == 0) | ||
635 | parent = bytenr; | ||
636 | if (orig_parent == 0) | ||
637 | orig_parent = bytenr; | ||
638 | |||
639 | head_ref = kmalloc(sizeof(*head_ref), GFP_NOFS); | ||
640 | if (!head_ref) { | ||
641 | kfree(ref); | ||
642 | kfree(old_ref); | ||
643 | return -ENOMEM; | ||
644 | } | ||
645 | delayed_refs = &trans->transaction->delayed_refs; | ||
646 | spin_lock(&delayed_refs->lock); | ||
647 | |||
648 | /* | ||
649 | * insert both the head node and the new ref without dropping | ||
650 | * the spin lock | ||
651 | */ | ||
652 | ret = __btrfs_add_delayed_ref(trans, &head_ref->node, bytenr, num_bytes, | ||
653 | (u64)-1, 0, 0, 0, | ||
654 | BTRFS_UPDATE_DELAYED_HEAD, 0); | ||
655 | BUG_ON(ret); | ||
656 | |||
657 | ret = __btrfs_add_delayed_ref(trans, &ref->node, bytenr, num_bytes, | ||
658 | parent, ref_root, ref_generation, | ||
659 | owner_objectid, BTRFS_ADD_DELAYED_REF, 0); | ||
660 | BUG_ON(ret); | ||
661 | |||
662 | ret = __btrfs_add_delayed_ref(trans, &old_ref->node, bytenr, num_bytes, | ||
663 | orig_parent, orig_ref_root, | ||
664 | orig_ref_generation, owner_objectid, | ||
665 | BTRFS_DROP_DELAYED_REF, pin); | ||
666 | BUG_ON(ret); | ||
667 | spin_unlock(&delayed_refs->lock); | ||
668 | return 0; | ||
669 | } | ||
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h new file mode 100644 index 000000000000..3bec2ff0b15c --- /dev/null +++ b/fs/btrfs/delayed-ref.h | |||
@@ -0,0 +1,193 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2008 Oracle. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | #ifndef __DELAYED_REF__ | ||
19 | #define __DELAYED_REF__ | ||
20 | |||
21 | /* these are the possible values of struct btrfs_delayed_ref->action */ | ||
22 | #define BTRFS_ADD_DELAYED_REF 1 /* add one backref to the tree */ | ||
23 | #define BTRFS_DROP_DELAYED_REF 2 /* delete one backref from the tree */ | ||
24 | #define BTRFS_ADD_DELAYED_EXTENT 3 /* record a full extent allocation */ | ||
25 | #define BTRFS_UPDATE_DELAYED_HEAD 4 /* not changing ref count on head ref */ | ||
26 | |||
27 | struct btrfs_delayed_ref_node { | ||
28 | struct rb_node rb_node; | ||
29 | |||
30 | /* the starting bytenr of the extent */ | ||
31 | u64 bytenr; | ||
32 | |||
33 | /* the parent our backref will point to */ | ||
34 | u64 parent; | ||
35 | |||
36 | /* the size of the extent */ | ||
37 | u64 num_bytes; | ||
38 | |||
39 | /* ref count on this data structure */ | ||
40 | atomic_t refs; | ||
41 | |||
42 | /* | ||
43 | * how many refs is this entry adding or deleting. For | ||
44 | * head refs, this may be a negative number because it is keeping | ||
45 | * track of the total mods done to the reference count. | ||
46 | * For individual refs, this will always be a positive number | ||
47 | * | ||
48 | * It may be more than one, since it is possible for a single | ||
49 | * parent to have more than one ref on an extent | ||
50 | */ | ||
51 | int ref_mod; | ||
52 | |||
53 | /* is this node still in the rbtree? */ | ||
54 | unsigned int in_tree:1; | ||
55 | }; | ||
56 | |||
57 | /* | ||
58 | * the head refs are used to hold a lock on a given extent, which allows us | ||
59 | * to make sure that only one process is running the delayed refs | ||
60 | * at a time for a single extent. They also store the sum of all the | ||
61 | * reference count modifications we've queued up. | ||
62 | */ | ||
63 | struct btrfs_delayed_ref_head { | ||
64 | struct btrfs_delayed_ref_node node; | ||
65 | |||
66 | /* | ||
67 | * the mutex is held while running the refs, and it is also | ||
68 | * held when checking the sum of reference modifications. | ||
69 | */ | ||
70 | struct mutex mutex; | ||
71 | |||
72 | struct list_head cluster; | ||
73 | |||
74 | /* | ||
75 | * when a new extent is allocated, it is just reserved in memory | ||
76 | * The actual extent isn't inserted into the extent allocation tree | ||
77 | * until the delayed ref is processed. must_insert_reserved is | ||
78 | * used to flag a delayed ref so the accounting can be updated | ||
79 | * when a full insert is done. | ||
80 | * | ||
81 | * It is possible the extent will be freed before it is ever | ||
82 | * inserted into the extent allocation tree. In this case | ||
83 | * we need to update the in ram accounting to properly reflect | ||
84 | * the free has happened. | ||
85 | */ | ||
86 | unsigned int must_insert_reserved:1; | ||
87 | }; | ||
88 | |||
89 | struct btrfs_delayed_ref { | ||
90 | struct btrfs_delayed_ref_node node; | ||
91 | |||
92 | /* the root objectid our ref will point to */ | ||
93 | u64 root; | ||
94 | |||
95 | /* the generation for the backref */ | ||
96 | u64 generation; | ||
97 | |||
98 | /* owner_objectid of the backref */ | ||
99 | u64 owner_objectid; | ||
100 | |||
101 | /* operation done by this entry in the rbtree */ | ||
102 | u8 action; | ||
103 | |||
104 | /* if pin == 1, when the extent is freed it will be pinned until | ||
105 | * transaction commit | ||
106 | */ | ||
107 | unsigned int pin:1; | ||
108 | }; | ||
109 | |||
110 | struct btrfs_delayed_ref_root { | ||
111 | struct rb_root root; | ||
112 | |||
113 | /* this spin lock protects the rbtree and the entries inside */ | ||
114 | spinlock_t lock; | ||
115 | |||
116 | /* how many delayed ref updates we've queued, used by the | ||
117 | * throttling code | ||
118 | */ | ||
119 | unsigned long num_entries; | ||
120 | |||
121 | /* total number of head nodes in tree */ | ||
122 | unsigned long num_heads; | ||
123 | |||
124 | /* total number of head nodes ready for processing */ | ||
125 | unsigned long num_heads_ready; | ||
126 | |||
127 | /* | ||
128 | * set when the tree is flushing before a transaction commit, | ||
129 | * used by the throttling code to decide if new updates need | ||
130 | * to be run right away | ||
131 | */ | ||
132 | int flushing; | ||
133 | |||
134 | u64 run_delayed_start; | ||
135 | }; | ||
136 | |||
137 | static inline void btrfs_put_delayed_ref(struct btrfs_delayed_ref_node *ref) | ||
138 | { | ||
139 | WARN_ON(atomic_read(&ref->refs) == 0); | ||
140 | if (atomic_dec_and_test(&ref->refs)) { | ||
141 | WARN_ON(ref->in_tree); | ||
142 | kfree(ref); | ||
143 | } | ||
144 | } | ||
145 | |||
146 | int btrfs_add_delayed_ref(struct btrfs_trans_handle *trans, | ||
147 | u64 bytenr, u64 num_bytes, u64 parent, u64 ref_root, | ||
148 | u64 ref_generation, u64 owner_objectid, int action, | ||
149 | int pin); | ||
150 | |||
151 | struct btrfs_delayed_ref_head * | ||
152 | btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); | ||
153 | int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr); | ||
154 | int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, | ||
155 | struct btrfs_root *root, u64 bytenr, | ||
156 | u64 num_bytes, u32 *refs); | ||
157 | int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, | ||
158 | u64 bytenr, u64 num_bytes, u64 orig_parent, | ||
159 | u64 parent, u64 orig_ref_root, u64 ref_root, | ||
160 | u64 orig_ref_generation, u64 ref_generation, | ||
161 | u64 owner_objectid, int pin); | ||
162 | int btrfs_delayed_ref_lock(struct btrfs_trans_handle *trans, | ||
163 | struct btrfs_delayed_ref_head *head); | ||
164 | int btrfs_find_ref_cluster(struct btrfs_trans_handle *trans, | ||
165 | struct list_head *cluster, u64 search_start); | ||
166 | /* | ||
167 | * a node might live in a head or a regular ref, this lets you | ||
168 | * test for the proper type to use. | ||
169 | */ | ||
170 | static int btrfs_delayed_ref_is_head(struct btrfs_delayed_ref_node *node) | ||
171 | { | ||
172 | return node->parent == (u64)-1; | ||
173 | } | ||
174 | |||
175 | /* | ||
176 | * helper functions to cast a node into its container | ||
177 | */ | ||
178 | static inline struct btrfs_delayed_ref * | ||
179 | btrfs_delayed_node_to_ref(struct btrfs_delayed_ref_node *node) | ||
180 | { | ||
181 | WARN_ON(btrfs_delayed_ref_is_head(node)); | ||
182 | return container_of(node, struct btrfs_delayed_ref, node); | ||
183 | |||
184 | } | ||
185 | |||
186 | static inline struct btrfs_delayed_ref_head * | ||
187 | btrfs_delayed_node_to_head(struct btrfs_delayed_ref_node *node) | ||
188 | { | ||
189 | WARN_ON(!btrfs_delayed_ref_is_head(node)); | ||
190 | return container_of(node, struct btrfs_delayed_ref_head, node); | ||
191 | |||
192 | } | ||
193 | #endif | ||
diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c index 926a0b287a7d..1d70236ba00c 100644 --- a/fs/btrfs/dir-item.c +++ b/fs/btrfs/dir-item.c | |||
@@ -145,7 +145,10 @@ int btrfs_insert_dir_item(struct btrfs_trans_handle *trans, struct btrfs_root | |||
145 | key.objectid = dir; | 145 | key.objectid = dir; |
146 | btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); | 146 | btrfs_set_key_type(&key, BTRFS_DIR_ITEM_KEY); |
147 | key.offset = btrfs_name_hash(name, name_len); | 147 | key.offset = btrfs_name_hash(name, name_len); |
148 | |||
148 | path = btrfs_alloc_path(); | 149 | path = btrfs_alloc_path(); |
150 | path->leave_spinning = 1; | ||
151 | |||
149 | data_size = sizeof(*dir_item) + name_len; | 152 | data_size = sizeof(*dir_item) + name_len; |
150 | dir_item = insert_with_overflow(trans, root, path, &key, data_size, | 153 | dir_item = insert_with_overflow(trans, root, path, &key, data_size, |
151 | name, name_len); | 154 | name, name_len); |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 6ec80c0fc869..92d73929d381 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -668,14 +668,31 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
668 | static int btree_writepage(struct page *page, struct writeback_control *wbc) | 668 | static int btree_writepage(struct page *page, struct writeback_control *wbc) |
669 | { | 669 | { |
670 | struct extent_io_tree *tree; | 670 | struct extent_io_tree *tree; |
671 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | ||
672 | struct extent_buffer *eb; | ||
673 | int was_dirty; | ||
674 | |||
671 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 675 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
676 | if (!(current->flags & PF_MEMALLOC)) { | ||
677 | return extent_write_full_page(tree, page, | ||
678 | btree_get_extent, wbc); | ||
679 | } | ||
672 | 680 | ||
673 | if (current->flags & PF_MEMALLOC) { | 681 | redirty_page_for_writepage(wbc, page); |
674 | redirty_page_for_writepage(wbc, page); | 682 | eb = btrfs_find_tree_block(root, page_offset(page), |
675 | unlock_page(page); | 683 | PAGE_CACHE_SIZE); |
676 | return 0; | 684 | WARN_ON(!eb); |
685 | |||
686 | was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); | ||
687 | if (!was_dirty) { | ||
688 | spin_lock(&root->fs_info->delalloc_lock); | ||
689 | root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE; | ||
690 | spin_unlock(&root->fs_info->delalloc_lock); | ||
677 | } | 691 | } |
678 | return extent_write_full_page(tree, page, btree_get_extent, wbc); | 692 | free_extent_buffer(eb); |
693 | |||
694 | unlock_page(page); | ||
695 | return 0; | ||
679 | } | 696 | } |
680 | 697 | ||
681 | static int btree_writepages(struct address_space *mapping, | 698 | static int btree_writepages(struct address_space *mapping, |
@@ -684,15 +701,15 @@ static int btree_writepages(struct address_space *mapping, | |||
684 | struct extent_io_tree *tree; | 701 | struct extent_io_tree *tree; |
685 | tree = &BTRFS_I(mapping->host)->io_tree; | 702 | tree = &BTRFS_I(mapping->host)->io_tree; |
686 | if (wbc->sync_mode == WB_SYNC_NONE) { | 703 | if (wbc->sync_mode == WB_SYNC_NONE) { |
704 | struct btrfs_root *root = BTRFS_I(mapping->host)->root; | ||
687 | u64 num_dirty; | 705 | u64 num_dirty; |
688 | u64 start = 0; | ||
689 | unsigned long thresh = 32 * 1024 * 1024; | 706 | unsigned long thresh = 32 * 1024 * 1024; |
690 | 707 | ||
691 | if (wbc->for_kupdate) | 708 | if (wbc->for_kupdate) |
692 | return 0; | 709 | return 0; |
693 | 710 | ||
694 | num_dirty = count_range_bits(tree, &start, (u64)-1, | 711 | /* this is a bit racy, but that's ok */ |
695 | thresh, EXTENT_DIRTY); | 712 | num_dirty = root->fs_info->dirty_metadata_bytes; |
696 | if (num_dirty < thresh) | 713 | if (num_dirty < thresh) |
697 | return 0; | 714 | return 0; |
698 | } | 715 | } |
@@ -859,9 +876,17 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
859 | root->fs_info->running_transaction->transid) { | 876 | root->fs_info->running_transaction->transid) { |
860 | btrfs_assert_tree_locked(buf); | 877 | btrfs_assert_tree_locked(buf); |
861 | 878 | ||
862 | /* ugh, clear_extent_buffer_dirty can be expensive */ | 879 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)) { |
863 | btrfs_set_lock_blocking(buf); | 880 | spin_lock(&root->fs_info->delalloc_lock); |
881 | if (root->fs_info->dirty_metadata_bytes >= buf->len) | ||
882 | root->fs_info->dirty_metadata_bytes -= buf->len; | ||
883 | else | ||
884 | WARN_ON(1); | ||
885 | spin_unlock(&root->fs_info->delalloc_lock); | ||
886 | } | ||
864 | 887 | ||
888 | /* ugh, clear_extent_buffer_dirty needs to lock the page */ | ||
889 | btrfs_set_lock_blocking(buf); | ||
865 | clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, | 890 | clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, |
866 | buf); | 891 | buf); |
867 | } | 892 | } |
@@ -1471,12 +1496,6 @@ static int transaction_kthread(void *arg) | |||
1471 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | 1496 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); |
1472 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | 1497 | mutex_lock(&root->fs_info->transaction_kthread_mutex); |
1473 | 1498 | ||
1474 | if (root->fs_info->total_ref_cache_size > 20 * 1024 * 1024) { | ||
1475 | printk(KERN_INFO "btrfs: total reference cache " | ||
1476 | "size %llu\n", | ||
1477 | root->fs_info->total_ref_cache_size); | ||
1478 | } | ||
1479 | |||
1480 | mutex_lock(&root->fs_info->trans_mutex); | 1499 | mutex_lock(&root->fs_info->trans_mutex); |
1481 | cur = root->fs_info->running_transaction; | 1500 | cur = root->fs_info->running_transaction; |
1482 | if (!cur) { | 1501 | if (!cur) { |
@@ -1493,6 +1512,7 @@ static int transaction_kthread(void *arg) | |||
1493 | mutex_unlock(&root->fs_info->trans_mutex); | 1512 | mutex_unlock(&root->fs_info->trans_mutex); |
1494 | trans = btrfs_start_transaction(root, 1); | 1513 | trans = btrfs_start_transaction(root, 1); |
1495 | ret = btrfs_commit_transaction(trans, root); | 1514 | ret = btrfs_commit_transaction(trans, root); |
1515 | |||
1496 | sleep: | 1516 | sleep: |
1497 | wake_up_process(root->fs_info->cleaner_kthread); | 1517 | wake_up_process(root->fs_info->cleaner_kthread); |
1498 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); | 1518 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); |
@@ -1552,6 +1572,7 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1552 | INIT_LIST_HEAD(&fs_info->dead_roots); | 1572 | INIT_LIST_HEAD(&fs_info->dead_roots); |
1553 | INIT_LIST_HEAD(&fs_info->hashers); | 1573 | INIT_LIST_HEAD(&fs_info->hashers); |
1554 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); | 1574 | INIT_LIST_HEAD(&fs_info->delalloc_inodes); |
1575 | INIT_LIST_HEAD(&fs_info->ordered_operations); | ||
1555 | spin_lock_init(&fs_info->delalloc_lock); | 1576 | spin_lock_init(&fs_info->delalloc_lock); |
1556 | spin_lock_init(&fs_info->new_trans_lock); | 1577 | spin_lock_init(&fs_info->new_trans_lock); |
1557 | spin_lock_init(&fs_info->ref_cache_lock); | 1578 | spin_lock_init(&fs_info->ref_cache_lock); |
@@ -1611,10 +1632,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1611 | 1632 | ||
1612 | extent_io_tree_init(&fs_info->pinned_extents, | 1633 | extent_io_tree_init(&fs_info->pinned_extents, |
1613 | fs_info->btree_inode->i_mapping, GFP_NOFS); | 1634 | fs_info->btree_inode->i_mapping, GFP_NOFS); |
1614 | extent_io_tree_init(&fs_info->pending_del, | ||
1615 | fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
1616 | extent_io_tree_init(&fs_info->extent_ins, | ||
1617 | fs_info->btree_inode->i_mapping, GFP_NOFS); | ||
1618 | fs_info->do_barriers = 1; | 1635 | fs_info->do_barriers = 1; |
1619 | 1636 | ||
1620 | INIT_LIST_HEAD(&fs_info->dead_reloc_roots); | 1637 | INIT_LIST_HEAD(&fs_info->dead_reloc_roots); |
@@ -1627,9 +1644,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1627 | insert_inode_hash(fs_info->btree_inode); | 1644 | insert_inode_hash(fs_info->btree_inode); |
1628 | 1645 | ||
1629 | mutex_init(&fs_info->trans_mutex); | 1646 | mutex_init(&fs_info->trans_mutex); |
1647 | mutex_init(&fs_info->ordered_operations_mutex); | ||
1630 | mutex_init(&fs_info->tree_log_mutex); | 1648 | mutex_init(&fs_info->tree_log_mutex); |
1631 | mutex_init(&fs_info->drop_mutex); | 1649 | mutex_init(&fs_info->drop_mutex); |
1632 | mutex_init(&fs_info->extent_ins_mutex); | ||
1633 | mutex_init(&fs_info->pinned_mutex); | 1650 | mutex_init(&fs_info->pinned_mutex); |
1634 | mutex_init(&fs_info->chunk_mutex); | 1651 | mutex_init(&fs_info->chunk_mutex); |
1635 | mutex_init(&fs_info->transaction_kthread_mutex); | 1652 | mutex_init(&fs_info->transaction_kthread_mutex); |
@@ -2358,8 +2375,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
2358 | struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; | 2375 | struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; |
2359 | u64 transid = btrfs_header_generation(buf); | 2376 | u64 transid = btrfs_header_generation(buf); |
2360 | struct inode *btree_inode = root->fs_info->btree_inode; | 2377 | struct inode *btree_inode = root->fs_info->btree_inode; |
2361 | 2378 | int was_dirty; | |
2362 | btrfs_set_lock_blocking(buf); | ||
2363 | 2379 | ||
2364 | btrfs_assert_tree_locked(buf); | 2380 | btrfs_assert_tree_locked(buf); |
2365 | if (transid != root->fs_info->generation) { | 2381 | if (transid != root->fs_info->generation) { |
@@ -2370,7 +2386,13 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
2370 | (unsigned long long)root->fs_info->generation); | 2386 | (unsigned long long)root->fs_info->generation); |
2371 | WARN_ON(1); | 2387 | WARN_ON(1); |
2372 | } | 2388 | } |
2373 | set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, buf); | 2389 | was_dirty = set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, |
2390 | buf); | ||
2391 | if (!was_dirty) { | ||
2392 | spin_lock(&root->fs_info->delalloc_lock); | ||
2393 | root->fs_info->dirty_metadata_bytes += buf->len; | ||
2394 | spin_unlock(&root->fs_info->delalloc_lock); | ||
2395 | } | ||
2374 | } | 2396 | } |
2375 | 2397 | ||
2376 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | 2398 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) |
@@ -2410,6 +2432,7 @@ int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) | |||
2410 | int btree_lock_page_hook(struct page *page) | 2432 | int btree_lock_page_hook(struct page *page) |
2411 | { | 2433 | { |
2412 | struct inode *inode = page->mapping->host; | 2434 | struct inode *inode = page->mapping->host; |
2435 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
2413 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 2436 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
2414 | struct extent_buffer *eb; | 2437 | struct extent_buffer *eb; |
2415 | unsigned long len; | 2438 | unsigned long len; |
@@ -2425,6 +2448,16 @@ int btree_lock_page_hook(struct page *page) | |||
2425 | 2448 | ||
2426 | btrfs_tree_lock(eb); | 2449 | btrfs_tree_lock(eb); |
2427 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); | 2450 | btrfs_set_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN); |
2451 | |||
2452 | if (test_and_clear_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { | ||
2453 | spin_lock(&root->fs_info->delalloc_lock); | ||
2454 | if (root->fs_info->dirty_metadata_bytes >= eb->len) | ||
2455 | root->fs_info->dirty_metadata_bytes -= eb->len; | ||
2456 | else | ||
2457 | WARN_ON(1); | ||
2458 | spin_unlock(&root->fs_info->delalloc_lock); | ||
2459 | } | ||
2460 | |||
2428 | btrfs_tree_unlock(eb); | 2461 | btrfs_tree_unlock(eb); |
2429 | free_extent_buffer(eb); | 2462 | free_extent_buffer(eb); |
2430 | out: | 2463 | out: |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index 95029db227be..c958ecbc1916 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -72,6 +72,7 @@ int btrfs_insert_dev_radix(struct btrfs_root *root, | |||
72 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); | 72 | void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); |
73 | int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); | 73 | int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); |
74 | void btrfs_mark_buffer_dirty(struct extent_buffer *buf); | 74 | void btrfs_mark_buffer_dirty(struct extent_buffer *buf); |
75 | void btrfs_mark_buffer_dirty_nonblocking(struct extent_buffer *buf); | ||
75 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); | 76 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); |
76 | int btrfs_set_buffer_uptodate(struct extent_buffer *buf); | 77 | int btrfs_set_buffer_uptodate(struct extent_buffer *buf); |
77 | int wait_on_tree_block_writeback(struct btrfs_root *root, | 78 | int wait_on_tree_block_writeback(struct btrfs_root *root, |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index fefe83ad2059..f5e7cae63d80 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -49,17 +49,23 @@ struct pending_extent_op { | |||
49 | int del; | 49 | int del; |
50 | }; | 50 | }; |
51 | 51 | ||
52 | static int finish_current_insert(struct btrfs_trans_handle *trans, | 52 | static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, |
53 | struct btrfs_root *extent_root, int all); | 53 | struct btrfs_root *root, u64 parent, |
54 | static int del_pending_extents(struct btrfs_trans_handle *trans, | 54 | u64 root_objectid, u64 ref_generation, |
55 | struct btrfs_root *extent_root, int all); | 55 | u64 owner, struct btrfs_key *ins, |
56 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | 56 | int ref_mod); |
57 | struct btrfs_root *root, | 57 | static int update_reserved_extents(struct btrfs_root *root, |
58 | u64 bytenr, u64 num_bytes, int is_data); | 58 | u64 bytenr, u64 num, int reserve); |
59 | static int update_block_group(struct btrfs_trans_handle *trans, | 59 | static int update_block_group(struct btrfs_trans_handle *trans, |
60 | struct btrfs_root *root, | 60 | struct btrfs_root *root, |
61 | u64 bytenr, u64 num_bytes, int alloc, | 61 | u64 bytenr, u64 num_bytes, int alloc, |
62 | int mark_free); | 62 | int mark_free); |
63 | static noinline int __btrfs_free_extent(struct btrfs_trans_handle *trans, | ||
64 | struct btrfs_root *root, | ||
65 | u64 bytenr, u64 num_bytes, u64 parent, | ||
66 | u64 root_objectid, u64 ref_generation, | ||
67 | u64 owner_objectid, int pin, | ||
68 | int ref_to_drop); | ||
63 | 69 | ||
64 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 70 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
65 | struct btrfs_root *extent_root, u64 alloc_bytes, | 71 | struct btrfs_root *extent_root, u64 alloc_bytes, |
@@ -554,262 +560,13 @@ out: | |||
554 | return ret; | 560 | return ret; |
555 | } | 561 | } |
556 | 562 | ||
557 | /* | ||
558 | * updates all the backrefs that are pending on update_list for the | ||
559 | * extent_root | ||
560 | */ | ||
561 | static noinline int update_backrefs(struct btrfs_trans_handle *trans, | ||
562 | struct btrfs_root *extent_root, | ||
563 | struct btrfs_path *path, | ||
564 | struct list_head *update_list) | ||
565 | { | ||
566 | struct btrfs_key key; | ||
567 | struct btrfs_extent_ref *ref; | ||
568 | struct btrfs_fs_info *info = extent_root->fs_info; | ||
569 | struct pending_extent_op *op; | ||
570 | struct extent_buffer *leaf; | ||
571 | int ret = 0; | ||
572 | struct list_head *cur = update_list->next; | ||
573 | u64 ref_objectid; | ||
574 | u64 ref_root = extent_root->root_key.objectid; | ||
575 | |||
576 | op = list_entry(cur, struct pending_extent_op, list); | ||
577 | |||
578 | search: | ||
579 | key.objectid = op->bytenr; | ||
580 | key.type = BTRFS_EXTENT_REF_KEY; | ||
581 | key.offset = op->orig_parent; | ||
582 | |||
583 | ret = btrfs_search_slot(trans, extent_root, &key, path, 0, 1); | ||
584 | BUG_ON(ret); | ||
585 | |||
586 | leaf = path->nodes[0]; | ||
587 | |||
588 | loop: | ||
589 | ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); | ||
590 | |||
591 | ref_objectid = btrfs_ref_objectid(leaf, ref); | ||
592 | |||
593 | if (btrfs_ref_root(leaf, ref) != ref_root || | ||
594 | btrfs_ref_generation(leaf, ref) != op->orig_generation || | ||
595 | (ref_objectid != op->level && | ||
596 | ref_objectid != BTRFS_MULTIPLE_OBJECTIDS)) { | ||
597 | printk(KERN_ERR "btrfs couldn't find %llu, parent %llu, " | ||
598 | "root %llu, owner %u\n", | ||
599 | (unsigned long long)op->bytenr, | ||
600 | (unsigned long long)op->orig_parent, | ||
601 | (unsigned long long)ref_root, op->level); | ||
602 | btrfs_print_leaf(extent_root, leaf); | ||
603 | BUG(); | ||
604 | } | ||
605 | |||
606 | key.objectid = op->bytenr; | ||
607 | key.offset = op->parent; | ||
608 | key.type = BTRFS_EXTENT_REF_KEY; | ||
609 | ret = btrfs_set_item_key_safe(trans, extent_root, path, &key); | ||
610 | BUG_ON(ret); | ||
611 | ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); | ||
612 | btrfs_set_ref_generation(leaf, ref, op->generation); | ||
613 | |||
614 | cur = cur->next; | ||
615 | |||
616 | list_del_init(&op->list); | ||
617 | unlock_extent(&info->extent_ins, op->bytenr, | ||
618 | op->bytenr + op->num_bytes - 1, GFP_NOFS); | ||
619 | kfree(op); | ||
620 | |||
621 | if (cur == update_list) { | ||
622 | btrfs_mark_buffer_dirty(path->nodes[0]); | ||
623 | btrfs_release_path(extent_root, path); | ||
624 | goto out; | ||
625 | } | ||
626 | |||
627 | op = list_entry(cur, struct pending_extent_op, list); | ||
628 | |||
629 | path->slots[0]++; | ||
630 | while (path->slots[0] < btrfs_header_nritems(leaf)) { | ||
631 | btrfs_item_key_to_cpu(leaf, &key, path->slots[0]); | ||
632 | if (key.objectid == op->bytenr && | ||
633 | key.type == BTRFS_EXTENT_REF_KEY) | ||
634 | goto loop; | ||
635 | path->slots[0]++; | ||
636 | } | ||
637 | |||
638 | btrfs_mark_buffer_dirty(path->nodes[0]); | ||
639 | btrfs_release_path(extent_root, path); | ||
640 | goto search; | ||
641 | |||
642 | out: | ||
643 | return 0; | ||
644 | } | ||
645 | |||
646 | static noinline int insert_extents(struct btrfs_trans_handle *trans, | ||
647 | struct btrfs_root *extent_root, | ||
648 | struct btrfs_path *path, | ||
649 | struct list_head *insert_list, int nr) | ||
650 | { | ||
651 | struct btrfs_key *keys; | ||
652 | u32 *data_size; | ||
653 | struct pending_extent_op *op; | ||
654 | struct extent_buffer *leaf; | ||
655 | struct list_head *cur = insert_list->next; | ||
656 | struct btrfs_fs_info *info = extent_root->fs_info; | ||
657 | u64 ref_root = extent_root->root_key.objectid; | ||
658 | int i = 0, last = 0, ret; | ||
659 | int total = nr * 2; | ||
660 | |||
661 | if (!nr) | ||
662 | return 0; | ||
663 | |||
664 | keys = kzalloc(total * sizeof(struct btrfs_key), GFP_NOFS); | ||
665 | if (!keys) | ||
666 | return -ENOMEM; | ||
667 | |||
668 | data_size = kzalloc(total * sizeof(u32), GFP_NOFS); | ||
669 | if (!data_size) { | ||
670 | kfree(keys); | ||
671 | return -ENOMEM; | ||
672 | } | ||
673 | |||
674 | list_for_each_entry(op, insert_list, list) { | ||
675 | keys[i].objectid = op->bytenr; | ||
676 | keys[i].offset = op->num_bytes; | ||
677 | keys[i].type = BTRFS_EXTENT_ITEM_KEY; | ||
678 | data_size[i] = sizeof(struct btrfs_extent_item); | ||
679 | i++; | ||
680 | |||
681 | keys[i].objectid = op->bytenr; | ||
682 | keys[i].offset = op->parent; | ||
683 | keys[i].type = BTRFS_EXTENT_REF_KEY; | ||
684 | data_size[i] = sizeof(struct btrfs_extent_ref); | ||
685 | i++; | ||
686 | } | ||
687 | |||
688 | op = list_entry(cur, struct pending_extent_op, list); | ||
689 | i = 0; | ||
690 | while (i < total) { | ||
691 | int c; | ||
692 | ret = btrfs_insert_some_items(trans, extent_root, path, | ||
693 | keys+i, data_size+i, total-i); | ||
694 | BUG_ON(ret < 0); | ||
695 | |||
696 | if (last && ret > 1) | ||
697 | BUG(); | ||
698 | |||
699 | leaf = path->nodes[0]; | ||
700 | for (c = 0; c < ret; c++) { | ||
701 | int ref_first = keys[i].type == BTRFS_EXTENT_REF_KEY; | ||
702 | |||
703 | /* | ||
704 | * if the first item we inserted was a backref, then | ||
705 | * the EXTENT_ITEM will be the odd c's, else it will | ||
706 | * be the even c's | ||
707 | */ | ||
708 | if ((ref_first && (c % 2)) || | ||
709 | (!ref_first && !(c % 2))) { | ||
710 | struct btrfs_extent_item *itm; | ||
711 | |||
712 | itm = btrfs_item_ptr(leaf, path->slots[0] + c, | ||
713 | struct btrfs_extent_item); | ||
714 | btrfs_set_extent_refs(path->nodes[0], itm, 1); | ||
715 | op->del++; | ||
716 | } else { | ||
717 | struct btrfs_extent_ref *ref; | ||
718 | |||
719 | ref = btrfs_item_ptr(leaf, path->slots[0] + c, | ||
720 | struct btrfs_extent_ref); | ||
721 | btrfs_set_ref_root(leaf, ref, ref_root); | ||
722 | btrfs_set_ref_generation(leaf, ref, | ||
723 | op->generation); | ||
724 | btrfs_set_ref_objectid(leaf, ref, op->level); | ||
725 | btrfs_set_ref_num_refs(leaf, ref, 1); | ||
726 | op->del++; | ||
727 | } | ||
728 | |||
729 | /* | ||
730 | * using del to see when its ok to free up the | ||
731 | * pending_extent_op. In the case where we insert the | ||
732 | * last item on the list in order to help do batching | ||
733 | * we need to not free the extent op until we actually | ||
734 | * insert the extent_item | ||
735 | */ | ||
736 | if (op->del == 2) { | ||
737 | unlock_extent(&info->extent_ins, op->bytenr, | ||
738 | op->bytenr + op->num_bytes - 1, | ||
739 | GFP_NOFS); | ||
740 | cur = cur->next; | ||
741 | list_del_init(&op->list); | ||
742 | kfree(op); | ||
743 | if (cur != insert_list) | ||
744 | op = list_entry(cur, | ||
745 | struct pending_extent_op, | ||
746 | list); | ||
747 | } | ||
748 | } | ||
749 | btrfs_mark_buffer_dirty(leaf); | ||
750 | btrfs_release_path(extent_root, path); | ||
751 | |||
752 | /* | ||
753 | * Ok backref's and items usually go right next to eachother, | ||
754 | * but if we could only insert 1 item that means that we | ||
755 | * inserted on the end of a leaf, and we have no idea what may | ||
756 | * be on the next leaf so we just play it safe. In order to | ||
757 | * try and help this case we insert the last thing on our | ||
758 | * insert list so hopefully it will end up being the last | ||
759 | * thing on the leaf and everything else will be before it, | ||
760 | * which will let us insert a whole bunch of items at the same | ||
761 | * time. | ||
762 | */ | ||
763 | if (ret == 1 && !last && (i + ret < total)) { | ||
764 | /* | ||
765 | * last: where we will pick up the next time around | ||
766 | * i: our current key to insert, will be total - 1 | ||
767 | * cur: the current op we are screwing with | ||
768 | * op: duh | ||
769 | */ | ||
770 | last = i + ret; | ||
771 | i = total - 1; | ||
772 | cur = insert_list->prev; | ||
773 | op = list_entry(cur, struct pending_extent_op, list); | ||
774 | } else if (last) { | ||
775 | /* | ||
776 | * ok we successfully inserted the last item on the | ||
777 | * list, lets reset everything | ||
778 | * | ||
779 | * i: our current key to insert, so where we left off | ||
780 | * last time | ||
781 | * last: done with this | ||
782 | * cur: the op we are messing with | ||
783 | * op: duh | ||
784 | * total: since we inserted the last key, we need to | ||
785 | * decrement total so we dont overflow | ||
786 | */ | ||
787 | i = last; | ||
788 | last = 0; | ||
789 | total--; | ||
790 | if (i < total) { | ||
791 | cur = insert_list->next; | ||
792 | op = list_entry(cur, struct pending_extent_op, | ||
793 | list); | ||
794 | } | ||
795 | } else { | ||
796 | i += ret; | ||
797 | } | ||
798 | |||
799 | cond_resched(); | ||
800 | } | ||
801 | ret = 0; | ||
802 | kfree(keys); | ||
803 | kfree(data_size); | ||
804 | return ret; | ||
805 | } | ||
806 | |||
807 | static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, | 563 | static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, |
808 | struct btrfs_root *root, | 564 | struct btrfs_root *root, |
809 | struct btrfs_path *path, | 565 | struct btrfs_path *path, |
810 | u64 bytenr, u64 parent, | 566 | u64 bytenr, u64 parent, |
811 | u64 ref_root, u64 ref_generation, | 567 | u64 ref_root, u64 ref_generation, |
812 | u64 owner_objectid) | 568 | u64 owner_objectid, |
569 | int refs_to_add) | ||
813 | { | 570 | { |
814 | struct btrfs_key key; | 571 | struct btrfs_key key; |
815 | struct extent_buffer *leaf; | 572 | struct extent_buffer *leaf; |
@@ -829,9 +586,10 @@ static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, | |||
829 | btrfs_set_ref_root(leaf, ref, ref_root); | 586 | btrfs_set_ref_root(leaf, ref, ref_root); |
830 | btrfs_set_ref_generation(leaf, ref, ref_generation); | 587 | btrfs_set_ref_generation(leaf, ref, ref_generation); |
831 | btrfs_set_ref_objectid(leaf, ref, owner_objectid); | 588 | btrfs_set_ref_objectid(leaf, ref, owner_objectid); |
832 | btrfs_set_ref_num_refs(leaf, ref, 1); | 589 | btrfs_set_ref_num_refs(leaf, ref, refs_to_add); |
833 | } else if (ret == -EEXIST) { | 590 | } else if (ret == -EEXIST) { |
834 | u64 existing_owner; | 591 | u64 existing_owner; |
592 | |||
835 | BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID); | 593 | BUG_ON(owner_objectid < BTRFS_FIRST_FREE_OBJECTID); |
836 | leaf = path->nodes[0]; | 594 | leaf = path->nodes[0]; |
837 | ref = btrfs_item_ptr(leaf, path->slots[0], | 595 | ref = btrfs_item_ptr(leaf, path->slots[0], |
@@ -845,7 +603,7 @@ static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, | |||
845 | 603 | ||
846 | num_refs = btrfs_ref_num_refs(leaf, ref); | 604 | num_refs = btrfs_ref_num_refs(leaf, ref); |
847 | BUG_ON(num_refs == 0); | 605 | BUG_ON(num_refs == 0); |
848 | btrfs_set_ref_num_refs(leaf, ref, num_refs + 1); | 606 | btrfs_set_ref_num_refs(leaf, ref, num_refs + refs_to_add); |
849 | 607 | ||
850 | existing_owner = btrfs_ref_objectid(leaf, ref); | 608 | existing_owner = btrfs_ref_objectid(leaf, ref); |
851 | if (existing_owner != owner_objectid && | 609 | if (existing_owner != owner_objectid && |
@@ -857,6 +615,7 @@ static noinline int insert_extent_backref(struct btrfs_trans_handle *trans, | |||
857 | } else { | 615 | } else { |
858 | goto out; | 616 | goto out; |
859 | } | 617 | } |
618 | btrfs_unlock_up_safe(path, 1); | ||
860 | btrfs_mark_buffer_dirty(path->nodes[0]); | 619 | btrfs_mark_buffer_dirty(path->nodes[0]); |
861 | out: | 620 | out: |
862 | btrfs_release_path(root, path); | 621 | btrfs_release_path(root, path); |
@@ -865,7 +624,8 @@ out: | |||
865 | 624 | ||
866 | static noinline int remove_extent_backref(struct btrfs_trans_handle *trans, | 625 | static noinline int remove_extent_backref(struct btrfs_trans_handle *trans, |
867 | struct btrfs_root *root, | 626 | struct btrfs_root *root, |
868 | struct btrfs_path *path) | 627 | struct btrfs_path *path, |
628 | int refs_to_drop) | ||
869 | { | 629 | { |
870 | struct extent_buffer *leaf; | 630 | struct extent_buffer *leaf; |
871 | struct btrfs_extent_ref *ref; | 631 | struct btrfs_extent_ref *ref; |
@@ -875,8 +635,8 @@ static noinline int remove_extent_backref(struct btrfs_trans_handle *trans, | |||
875 | leaf = path->nodes[0]; | 635 | leaf = path->nodes[0]; |
876 | ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); | 636 | ref = btrfs_item_ptr(leaf, path->slots[0], struct btrfs_extent_ref); |
877 | num_refs = btrfs_ref_num_refs(leaf, ref); | 637 | num_refs = btrfs_ref_num_refs(leaf, ref); |
878 | BUG_ON(num_refs == 0); | 638 | BUG_ON(num_refs < refs_to_drop); |
879 | num_refs -= 1; | 639 | num_refs -= refs_to_drop; |
880 | if (num_refs == 0) { | 640 | if (num_refs == 0) { |
881 | ret = btrfs_del_item(trans, root, path); | 641 | ret = btrfs_del_item(trans, root, path); |
882 | } else { | 642 | } else { |
@@ -927,332 +687,28 @@ static int btrfs_discard_extent(struct btrfs_root *root, u64 bytenr, | |||
927 | #endif | 687 | #endif |
928 | } | 688 | } |
929 | 689 | ||
930 | static noinline int free_extents(struct btrfs_trans_handle *trans, | ||
931 | struct btrfs_root *extent_root, | ||
932 | struct list_head *del_list) | ||
933 | { | ||
934 | struct btrfs_fs_info *info = extent_root->fs_info; | ||
935 | struct btrfs_path *path; | ||
936 | struct btrfs_key key, found_key; | ||
937 | struct extent_buffer *leaf; | ||
938 | struct list_head *cur; | ||
939 | struct pending_extent_op *op; | ||
940 | struct btrfs_extent_item *ei; | ||
941 | int ret, num_to_del, extent_slot = 0, found_extent = 0; | ||
942 | u32 refs; | ||
943 | u64 bytes_freed = 0; | ||
944 | |||
945 | path = btrfs_alloc_path(); | ||
946 | if (!path) | ||
947 | return -ENOMEM; | ||
948 | path->reada = 1; | ||
949 | |||
950 | search: | ||
951 | /* search for the backref for the current ref we want to delete */ | ||
952 | cur = del_list->next; | ||
953 | op = list_entry(cur, struct pending_extent_op, list); | ||
954 | ret = lookup_extent_backref(trans, extent_root, path, op->bytenr, | ||
955 | op->orig_parent, | ||
956 | extent_root->root_key.objectid, | ||
957 | op->orig_generation, op->level, 1); | ||
958 | if (ret) { | ||
959 | printk(KERN_ERR "btrfs unable to find backref byte nr %llu " | ||
960 | "root %llu gen %llu owner %u\n", | ||
961 | (unsigned long long)op->bytenr, | ||
962 | (unsigned long long)extent_root->root_key.objectid, | ||
963 | (unsigned long long)op->orig_generation, op->level); | ||
964 | btrfs_print_leaf(extent_root, path->nodes[0]); | ||
965 | WARN_ON(1); | ||
966 | goto out; | ||
967 | } | ||
968 | |||
969 | extent_slot = path->slots[0]; | ||
970 | num_to_del = 1; | ||
971 | found_extent = 0; | ||
972 | |||
973 | /* | ||
974 | * if we aren't the first item on the leaf we can move back one and see | ||
975 | * if our ref is right next to our extent item | ||
976 | */ | ||
977 | if (likely(extent_slot)) { | ||
978 | extent_slot--; | ||
979 | btrfs_item_key_to_cpu(path->nodes[0], &found_key, | ||
980 | extent_slot); | ||
981 | if (found_key.objectid == op->bytenr && | ||
982 | found_key.type == BTRFS_EXTENT_ITEM_KEY && | ||
983 | found_key.offset == op->num_bytes) { | ||
984 | num_to_del++; | ||
985 | found_extent = 1; | ||
986 | } | ||
987 | } | ||
988 | |||
989 | /* | ||
990 | * if we didn't find the extent we need to delete the backref and then | ||
991 | * search for the extent item key so we can update its ref count | ||
992 | */ | ||
993 | if (!found_extent) { | ||
994 | key.objectid = op->bytenr; | ||
995 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
996 | key.offset = op->num_bytes; | ||
997 | |||
998 | ret = remove_extent_backref(trans, extent_root, path); | ||
999 | BUG_ON(ret); | ||
1000 | btrfs_release_path(extent_root, path); | ||
1001 | ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1); | ||
1002 | BUG_ON(ret); | ||
1003 | extent_slot = path->slots[0]; | ||
1004 | } | ||
1005 | |||
1006 | /* this is where we update the ref count for the extent */ | ||
1007 | leaf = path->nodes[0]; | ||
1008 | ei = btrfs_item_ptr(leaf, extent_slot, struct btrfs_extent_item); | ||
1009 | refs = btrfs_extent_refs(leaf, ei); | ||
1010 | BUG_ON(refs == 0); | ||
1011 | refs--; | ||
1012 | btrfs_set_extent_refs(leaf, ei, refs); | ||
1013 | |||
1014 | btrfs_mark_buffer_dirty(leaf); | ||
1015 | |||
1016 | /* | ||
1017 | * This extent needs deleting. The reason cur_slot is extent_slot + | ||
1018 | * num_to_del is because extent_slot points to the slot where the extent | ||
1019 | * is, and if the backref was not right next to the extent we will be | ||
1020 | * deleting at least 1 item, and will want to start searching at the | ||
1021 | * slot directly next to extent_slot. However if we did find the | ||
1022 | * backref next to the extent item them we will be deleting at least 2 | ||
1023 | * items and will want to start searching directly after the ref slot | ||
1024 | */ | ||
1025 | if (!refs) { | ||
1026 | struct list_head *pos, *n, *end; | ||
1027 | int cur_slot = extent_slot+num_to_del; | ||
1028 | u64 super_used; | ||
1029 | u64 root_used; | ||
1030 | |||
1031 | path->slots[0] = extent_slot; | ||
1032 | bytes_freed = op->num_bytes; | ||
1033 | |||
1034 | mutex_lock(&info->pinned_mutex); | ||
1035 | ret = pin_down_bytes(trans, extent_root, op->bytenr, | ||
1036 | op->num_bytes, op->level >= | ||
1037 | BTRFS_FIRST_FREE_OBJECTID); | ||
1038 | mutex_unlock(&info->pinned_mutex); | ||
1039 | BUG_ON(ret < 0); | ||
1040 | op->del = ret; | ||
1041 | |||
1042 | /* | ||
1043 | * we need to see if we can delete multiple things at once, so | ||
1044 | * start looping through the list of extents we are wanting to | ||
1045 | * delete and see if their extent/backref's are right next to | ||
1046 | * eachother and the extents only have 1 ref | ||
1047 | */ | ||
1048 | for (pos = cur->next; pos != del_list; pos = pos->next) { | ||
1049 | struct pending_extent_op *tmp; | ||
1050 | |||
1051 | tmp = list_entry(pos, struct pending_extent_op, list); | ||
1052 | |||
1053 | /* we only want to delete extent+ref at this stage */ | ||
1054 | if (cur_slot >= btrfs_header_nritems(leaf) - 1) | ||
1055 | break; | ||
1056 | |||
1057 | btrfs_item_key_to_cpu(leaf, &found_key, cur_slot); | ||
1058 | if (found_key.objectid != tmp->bytenr || | ||
1059 | found_key.type != BTRFS_EXTENT_ITEM_KEY || | ||
1060 | found_key.offset != tmp->num_bytes) | ||
1061 | break; | ||
1062 | |||
1063 | /* check to make sure this extent only has one ref */ | ||
1064 | ei = btrfs_item_ptr(leaf, cur_slot, | ||
1065 | struct btrfs_extent_item); | ||
1066 | if (btrfs_extent_refs(leaf, ei) != 1) | ||
1067 | break; | ||
1068 | |||
1069 | btrfs_item_key_to_cpu(leaf, &found_key, cur_slot+1); | ||
1070 | if (found_key.objectid != tmp->bytenr || | ||
1071 | found_key.type != BTRFS_EXTENT_REF_KEY || | ||
1072 | found_key.offset != tmp->orig_parent) | ||
1073 | break; | ||
1074 | |||
1075 | /* | ||
1076 | * the ref is right next to the extent, we can set the | ||
1077 | * ref count to 0 since we will delete them both now | ||
1078 | */ | ||
1079 | btrfs_set_extent_refs(leaf, ei, 0); | ||
1080 | |||
1081 | /* pin down the bytes for this extent */ | ||
1082 | mutex_lock(&info->pinned_mutex); | ||
1083 | ret = pin_down_bytes(trans, extent_root, tmp->bytenr, | ||
1084 | tmp->num_bytes, tmp->level >= | ||
1085 | BTRFS_FIRST_FREE_OBJECTID); | ||
1086 | mutex_unlock(&info->pinned_mutex); | ||
1087 | BUG_ON(ret < 0); | ||
1088 | |||
1089 | /* | ||
1090 | * use the del field to tell if we need to go ahead and | ||
1091 | * free up the extent when we delete the item or not. | ||
1092 | */ | ||
1093 | tmp->del = ret; | ||
1094 | bytes_freed += tmp->num_bytes; | ||
1095 | |||
1096 | num_to_del += 2; | ||
1097 | cur_slot += 2; | ||
1098 | } | ||
1099 | end = pos; | ||
1100 | |||
1101 | /* update the free space counters */ | ||
1102 | spin_lock(&info->delalloc_lock); | ||
1103 | super_used = btrfs_super_bytes_used(&info->super_copy); | ||
1104 | btrfs_set_super_bytes_used(&info->super_copy, | ||
1105 | super_used - bytes_freed); | ||
1106 | |||
1107 | root_used = btrfs_root_used(&extent_root->root_item); | ||
1108 | btrfs_set_root_used(&extent_root->root_item, | ||
1109 | root_used - bytes_freed); | ||
1110 | spin_unlock(&info->delalloc_lock); | ||
1111 | |||
1112 | /* delete the items */ | ||
1113 | ret = btrfs_del_items(trans, extent_root, path, | ||
1114 | path->slots[0], num_to_del); | ||
1115 | BUG_ON(ret); | ||
1116 | |||
1117 | /* | ||
1118 | * loop through the extents we deleted and do the cleanup work | ||
1119 | * on them | ||
1120 | */ | ||
1121 | for (pos = cur, n = pos->next; pos != end; | ||
1122 | pos = n, n = pos->next) { | ||
1123 | struct pending_extent_op *tmp; | ||
1124 | tmp = list_entry(pos, struct pending_extent_op, list); | ||
1125 | |||
1126 | /* | ||
1127 | * remember tmp->del tells us wether or not we pinned | ||
1128 | * down the extent | ||
1129 | */ | ||
1130 | ret = update_block_group(trans, extent_root, | ||
1131 | tmp->bytenr, tmp->num_bytes, 0, | ||
1132 | tmp->del); | ||
1133 | BUG_ON(ret); | ||
1134 | |||
1135 | list_del_init(&tmp->list); | ||
1136 | unlock_extent(&info->extent_ins, tmp->bytenr, | ||
1137 | tmp->bytenr + tmp->num_bytes - 1, | ||
1138 | GFP_NOFS); | ||
1139 | kfree(tmp); | ||
1140 | } | ||
1141 | } else if (refs && found_extent) { | ||
1142 | /* | ||
1143 | * the ref and extent were right next to eachother, but the | ||
1144 | * extent still has a ref, so just free the backref and keep | ||
1145 | * going | ||
1146 | */ | ||
1147 | ret = remove_extent_backref(trans, extent_root, path); | ||
1148 | BUG_ON(ret); | ||
1149 | |||
1150 | list_del_init(&op->list); | ||
1151 | unlock_extent(&info->extent_ins, op->bytenr, | ||
1152 | op->bytenr + op->num_bytes - 1, GFP_NOFS); | ||
1153 | kfree(op); | ||
1154 | } else { | ||
1155 | /* | ||
1156 | * the extent has multiple refs and the backref we were looking | ||
1157 | * for was not right next to it, so just unlock and go next, | ||
1158 | * we're good to go | ||
1159 | */ | ||
1160 | list_del_init(&op->list); | ||
1161 | unlock_extent(&info->extent_ins, op->bytenr, | ||
1162 | op->bytenr + op->num_bytes - 1, GFP_NOFS); | ||
1163 | kfree(op); | ||
1164 | } | ||
1165 | |||
1166 | btrfs_release_path(extent_root, path); | ||
1167 | if (!list_empty(del_list)) | ||
1168 | goto search; | ||
1169 | |||
1170 | out: | ||
1171 | btrfs_free_path(path); | ||
1172 | return ret; | ||
1173 | } | ||
1174 | |||
1175 | static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, | 690 | static int __btrfs_update_extent_ref(struct btrfs_trans_handle *trans, |
1176 | struct btrfs_root *root, u64 bytenr, | 691 | struct btrfs_root *root, u64 bytenr, |
692 | u64 num_bytes, | ||
1177 | u64 orig_parent, u64 parent, | 693 | u64 orig_parent, u64 parent, |
1178 | u64 orig_root, u64 ref_root, | 694 | u64 orig_root, u64 ref_root, |
1179 | u64 orig_generation, u64 ref_generation, | 695 | u64 orig_generation, u64 ref_generation, |
1180 | u64 owner_objectid) | 696 | u64 owner_objectid) |
1181 | { | 697 | { |
1182 | int ret; | 698 | int ret; |
1183 | struct btrfs_root *extent_root = root->fs_info->extent_root; | 699 | int pin = owner_objectid < BTRFS_FIRST_FREE_OBJECTID; |
1184 | struct btrfs_path *path; | ||
1185 | |||
1186 | if (root == root->fs_info->extent_root) { | ||
1187 | struct pending_extent_op *extent_op; | ||
1188 | u64 num_bytes; | ||
1189 | |||
1190 | BUG_ON(owner_objectid >= BTRFS_MAX_LEVEL); | ||
1191 | num_bytes = btrfs_level_size(root, (int)owner_objectid); | ||
1192 | mutex_lock(&root->fs_info->extent_ins_mutex); | ||
1193 | if (test_range_bit(&root->fs_info->extent_ins, bytenr, | ||
1194 | bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) { | ||
1195 | u64 priv; | ||
1196 | ret = get_state_private(&root->fs_info->extent_ins, | ||
1197 | bytenr, &priv); | ||
1198 | BUG_ON(ret); | ||
1199 | extent_op = (struct pending_extent_op *) | ||
1200 | (unsigned long)priv; | ||
1201 | BUG_ON(extent_op->parent != orig_parent); | ||
1202 | BUG_ON(extent_op->generation != orig_generation); | ||
1203 | 700 | ||
1204 | extent_op->parent = parent; | 701 | ret = btrfs_update_delayed_ref(trans, bytenr, num_bytes, |
1205 | extent_op->generation = ref_generation; | 702 | orig_parent, parent, orig_root, |
1206 | } else { | 703 | ref_root, orig_generation, |
1207 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | 704 | ref_generation, owner_objectid, pin); |
1208 | BUG_ON(!extent_op); | ||
1209 | |||
1210 | extent_op->type = PENDING_BACKREF_UPDATE; | ||
1211 | extent_op->bytenr = bytenr; | ||
1212 | extent_op->num_bytes = num_bytes; | ||
1213 | extent_op->parent = parent; | ||
1214 | extent_op->orig_parent = orig_parent; | ||
1215 | extent_op->generation = ref_generation; | ||
1216 | extent_op->orig_generation = orig_generation; | ||
1217 | extent_op->level = (int)owner_objectid; | ||
1218 | INIT_LIST_HEAD(&extent_op->list); | ||
1219 | extent_op->del = 0; | ||
1220 | |||
1221 | set_extent_bits(&root->fs_info->extent_ins, | ||
1222 | bytenr, bytenr + num_bytes - 1, | ||
1223 | EXTENT_WRITEBACK, GFP_NOFS); | ||
1224 | set_state_private(&root->fs_info->extent_ins, | ||
1225 | bytenr, (unsigned long)extent_op); | ||
1226 | } | ||
1227 | mutex_unlock(&root->fs_info->extent_ins_mutex); | ||
1228 | return 0; | ||
1229 | } | ||
1230 | |||
1231 | path = btrfs_alloc_path(); | ||
1232 | if (!path) | ||
1233 | return -ENOMEM; | ||
1234 | ret = lookup_extent_backref(trans, extent_root, path, | ||
1235 | bytenr, orig_parent, orig_root, | ||
1236 | orig_generation, owner_objectid, 1); | ||
1237 | if (ret) | ||
1238 | goto out; | ||
1239 | ret = remove_extent_backref(trans, extent_root, path); | ||
1240 | if (ret) | ||
1241 | goto out; | ||
1242 | ret = insert_extent_backref(trans, extent_root, path, bytenr, | ||
1243 | parent, ref_root, ref_generation, | ||
1244 | owner_objectid); | ||
1245 | BUG_ON(ret); | 705 | BUG_ON(ret); |
1246 | finish_current_insert(trans, extent_root, 0); | ||
1247 | del_pending_extents(trans, extent_root, 0); | ||
1248 | out: | ||
1249 | btrfs_free_path(path); | ||
1250 | return ret; | 706 | return ret; |
1251 | } | 707 | } |
1252 | 708 | ||
1253 | int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, | 709 | int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, |
1254 | struct btrfs_root *root, u64 bytenr, | 710 | struct btrfs_root *root, u64 bytenr, |
1255 | u64 orig_parent, u64 parent, | 711 | u64 num_bytes, u64 orig_parent, u64 parent, |
1256 | u64 ref_root, u64 ref_generation, | 712 | u64 ref_root, u64 ref_generation, |
1257 | u64 owner_objectid) | 713 | u64 owner_objectid) |
1258 | { | 714 | { |
@@ -1260,20 +716,36 @@ int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, | |||
1260 | if (ref_root == BTRFS_TREE_LOG_OBJECTID && | 716 | if (ref_root == BTRFS_TREE_LOG_OBJECTID && |
1261 | owner_objectid < BTRFS_FIRST_FREE_OBJECTID) | 717 | owner_objectid < BTRFS_FIRST_FREE_OBJECTID) |
1262 | return 0; | 718 | return 0; |
1263 | ret = __btrfs_update_extent_ref(trans, root, bytenr, orig_parent, | 719 | |
1264 | parent, ref_root, ref_root, | 720 | ret = __btrfs_update_extent_ref(trans, root, bytenr, num_bytes, |
1265 | ref_generation, ref_generation, | 721 | orig_parent, parent, ref_root, |
1266 | owner_objectid); | 722 | ref_root, ref_generation, |
723 | ref_generation, owner_objectid); | ||
1267 | return ret; | 724 | return ret; |
1268 | } | 725 | } |
1269 | |||
1270 | static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | 726 | static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, |
1271 | struct btrfs_root *root, u64 bytenr, | 727 | struct btrfs_root *root, u64 bytenr, |
728 | u64 num_bytes, | ||
1272 | u64 orig_parent, u64 parent, | 729 | u64 orig_parent, u64 parent, |
1273 | u64 orig_root, u64 ref_root, | 730 | u64 orig_root, u64 ref_root, |
1274 | u64 orig_generation, u64 ref_generation, | 731 | u64 orig_generation, u64 ref_generation, |
1275 | u64 owner_objectid) | 732 | u64 owner_objectid) |
1276 | { | 733 | { |
734 | int ret; | ||
735 | |||
736 | ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent, ref_root, | ||
737 | ref_generation, owner_objectid, | ||
738 | BTRFS_ADD_DELAYED_REF, 0); | ||
739 | BUG_ON(ret); | ||
740 | return ret; | ||
741 | } | ||
742 | |||
743 | static noinline_for_stack int add_extent_ref(struct btrfs_trans_handle *trans, | ||
744 | struct btrfs_root *root, u64 bytenr, | ||
745 | u64 num_bytes, u64 parent, u64 ref_root, | ||
746 | u64 ref_generation, u64 owner_objectid, | ||
747 | int refs_to_add) | ||
748 | { | ||
1277 | struct btrfs_path *path; | 749 | struct btrfs_path *path; |
1278 | int ret; | 750 | int ret; |
1279 | struct btrfs_key key; | 751 | struct btrfs_key key; |
@@ -1286,17 +758,24 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
1286 | return -ENOMEM; | 758 | return -ENOMEM; |
1287 | 759 | ||
1288 | path->reada = 1; | 760 | path->reada = 1; |
761 | path->leave_spinning = 1; | ||
1289 | key.objectid = bytenr; | 762 | key.objectid = bytenr; |
1290 | key.type = BTRFS_EXTENT_ITEM_KEY; | 763 | key.type = BTRFS_EXTENT_ITEM_KEY; |
1291 | key.offset = (u64)-1; | 764 | key.offset = num_bytes; |
1292 | 765 | ||
1293 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, | 766 | /* first find the extent item and update its reference count */ |
1294 | 0, 1); | 767 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, |
1295 | if (ret < 0) | 768 | path, 0, 1); |
769 | if (ret < 0) { | ||
770 | btrfs_set_path_blocking(path); | ||
1296 | return ret; | 771 | return ret; |
1297 | BUG_ON(ret == 0 || path->slots[0] == 0); | 772 | } |
1298 | 773 | ||
1299 | path->slots[0]--; | 774 | if (ret > 0) { |
775 | WARN_ON(1); | ||
776 | btrfs_free_path(path); | ||
777 | return -EIO; | ||
778 | } | ||
1300 | l = path->nodes[0]; | 779 | l = path->nodes[0]; |
1301 | 780 | ||
1302 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); | 781 | btrfs_item_key_to_cpu(l, &key, path->slots[0]); |
@@ -1310,21 +789,24 @@ static int __btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
1310 | BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY); | 789 | BUG_ON(key.type != BTRFS_EXTENT_ITEM_KEY); |
1311 | 790 | ||
1312 | item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); | 791 | item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); |
792 | |||
1313 | refs = btrfs_extent_refs(l, item); | 793 | refs = btrfs_extent_refs(l, item); |
1314 | btrfs_set_extent_refs(l, item, refs + 1); | 794 | btrfs_set_extent_refs(l, item, refs + refs_to_add); |
795 | btrfs_unlock_up_safe(path, 1); | ||
796 | |||
1315 | btrfs_mark_buffer_dirty(path->nodes[0]); | 797 | btrfs_mark_buffer_dirty(path->nodes[0]); |
1316 | 798 | ||
1317 | btrfs_release_path(root->fs_info->extent_root, path); | 799 | btrfs_release_path(root->fs_info->extent_root, path); |
1318 | 800 | ||
1319 | path->reada = 1; | 801 | path->reada = 1; |
802 | path->leave_spinning = 1; | ||
803 | |||
804 | /* now insert the actual backref */ | ||
1320 | ret = insert_extent_backref(trans, root->fs_info->extent_root, | 805 | ret = insert_extent_backref(trans, root->fs_info->extent_root, |
1321 | path, bytenr, parent, | 806 | path, bytenr, parent, |
1322 | ref_root, ref_generation, | 807 | ref_root, ref_generation, |
1323 | owner_objectid); | 808 | owner_objectid, refs_to_add); |
1324 | BUG_ON(ret); | 809 | BUG_ON(ret); |
1325 | finish_current_insert(trans, root->fs_info->extent_root, 0); | ||
1326 | del_pending_extents(trans, root->fs_info->extent_root, 0); | ||
1327 | |||
1328 | btrfs_free_path(path); | 810 | btrfs_free_path(path); |
1329 | return 0; | 811 | return 0; |
1330 | } | 812 | } |
@@ -1339,68 +821,278 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
1339 | if (ref_root == BTRFS_TREE_LOG_OBJECTID && | 821 | if (ref_root == BTRFS_TREE_LOG_OBJECTID && |
1340 | owner_objectid < BTRFS_FIRST_FREE_OBJECTID) | 822 | owner_objectid < BTRFS_FIRST_FREE_OBJECTID) |
1341 | return 0; | 823 | return 0; |
1342 | ret = __btrfs_inc_extent_ref(trans, root, bytenr, 0, parent, | 824 | |
825 | ret = __btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0, parent, | ||
1343 | 0, ref_root, 0, ref_generation, | 826 | 0, ref_root, 0, ref_generation, |
1344 | owner_objectid); | 827 | owner_objectid); |
1345 | return ret; | 828 | return ret; |
1346 | } | 829 | } |
1347 | 830 | ||
1348 | int btrfs_extent_post_op(struct btrfs_trans_handle *trans, | 831 | static int drop_delayed_ref(struct btrfs_trans_handle *trans, |
1349 | struct btrfs_root *root) | 832 | struct btrfs_root *root, |
833 | struct btrfs_delayed_ref_node *node) | ||
834 | { | ||
835 | int ret = 0; | ||
836 | struct btrfs_delayed_ref *ref = btrfs_delayed_node_to_ref(node); | ||
837 | |||
838 | BUG_ON(node->ref_mod == 0); | ||
839 | ret = __btrfs_free_extent(trans, root, node->bytenr, node->num_bytes, | ||
840 | node->parent, ref->root, ref->generation, | ||
841 | ref->owner_objectid, ref->pin, node->ref_mod); | ||
842 | |||
843 | return ret; | ||
844 | } | ||
845 | |||
846 | /* helper function to actually process a single delayed ref entry */ | ||
847 | static noinline int run_one_delayed_ref(struct btrfs_trans_handle *trans, | ||
848 | struct btrfs_root *root, | ||
849 | struct btrfs_delayed_ref_node *node, | ||
850 | int insert_reserved) | ||
1350 | { | 851 | { |
1351 | u64 start; | ||
1352 | u64 end; | ||
1353 | int ret; | 852 | int ret; |
853 | struct btrfs_delayed_ref *ref; | ||
854 | |||
855 | if (node->parent == (u64)-1) { | ||
856 | struct btrfs_delayed_ref_head *head; | ||
857 | /* | ||
858 | * we've hit the end of the chain and we were supposed | ||
859 | * to insert this extent into the tree. But, it got | ||
860 | * deleted before we ever needed to insert it, so all | ||
861 | * we have to do is clean up the accounting | ||
862 | */ | ||
863 | if (insert_reserved) { | ||
864 | update_reserved_extents(root, node->bytenr, | ||
865 | node->num_bytes, 0); | ||
866 | } | ||
867 | head = btrfs_delayed_node_to_head(node); | ||
868 | mutex_unlock(&head->mutex); | ||
869 | return 0; | ||
870 | } | ||
1354 | 871 | ||
1355 | while(1) { | 872 | ref = btrfs_delayed_node_to_ref(node); |
1356 | finish_current_insert(trans, root->fs_info->extent_root, 1); | 873 | if (ref->action == BTRFS_ADD_DELAYED_REF) { |
1357 | del_pending_extents(trans, root->fs_info->extent_root, 1); | 874 | if (insert_reserved) { |
875 | struct btrfs_key ins; | ||
1358 | 876 | ||
1359 | /* is there more work to do? */ | 877 | ins.objectid = node->bytenr; |
1360 | ret = find_first_extent_bit(&root->fs_info->pending_del, | 878 | ins.offset = node->num_bytes; |
1361 | 0, &start, &end, EXTENT_WRITEBACK); | 879 | ins.type = BTRFS_EXTENT_ITEM_KEY; |
1362 | if (!ret) | 880 | |
1363 | continue; | 881 | /* record the full extent allocation */ |
1364 | ret = find_first_extent_bit(&root->fs_info->extent_ins, | 882 | ret = __btrfs_alloc_reserved_extent(trans, root, |
1365 | 0, &start, &end, EXTENT_WRITEBACK); | 883 | node->parent, ref->root, |
1366 | if (!ret) | 884 | ref->generation, ref->owner_objectid, |
1367 | continue; | 885 | &ins, node->ref_mod); |
1368 | break; | 886 | update_reserved_extents(root, node->bytenr, |
887 | node->num_bytes, 0); | ||
888 | } else { | ||
889 | /* just add one backref */ | ||
890 | ret = add_extent_ref(trans, root, node->bytenr, | ||
891 | node->num_bytes, | ||
892 | node->parent, ref->root, ref->generation, | ||
893 | ref->owner_objectid, node->ref_mod); | ||
894 | } | ||
895 | BUG_ON(ret); | ||
896 | } else if (ref->action == BTRFS_DROP_DELAYED_REF) { | ||
897 | WARN_ON(insert_reserved); | ||
898 | ret = drop_delayed_ref(trans, root, node); | ||
1369 | } | 899 | } |
1370 | return 0; | 900 | return 0; |
1371 | } | 901 | } |
1372 | 902 | ||
1373 | int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, | 903 | static noinline struct btrfs_delayed_ref_node * |
1374 | struct btrfs_root *root, u64 bytenr, | 904 | select_delayed_ref(struct btrfs_delayed_ref_head *head) |
1375 | u64 num_bytes, u32 *refs) | ||
1376 | { | 905 | { |
1377 | struct btrfs_path *path; | 906 | struct rb_node *node; |
907 | struct btrfs_delayed_ref_node *ref; | ||
908 | int action = BTRFS_ADD_DELAYED_REF; | ||
909 | again: | ||
910 | /* | ||
911 | * select delayed ref of type BTRFS_ADD_DELAYED_REF first. | ||
912 | * this prevents ref count from going down to zero when | ||
913 | * there still are pending delayed ref. | ||
914 | */ | ||
915 | node = rb_prev(&head->node.rb_node); | ||
916 | while (1) { | ||
917 | if (!node) | ||
918 | break; | ||
919 | ref = rb_entry(node, struct btrfs_delayed_ref_node, | ||
920 | rb_node); | ||
921 | if (ref->bytenr != head->node.bytenr) | ||
922 | break; | ||
923 | if (btrfs_delayed_node_to_ref(ref)->action == action) | ||
924 | return ref; | ||
925 | node = rb_prev(node); | ||
926 | } | ||
927 | if (action == BTRFS_ADD_DELAYED_REF) { | ||
928 | action = BTRFS_DROP_DELAYED_REF; | ||
929 | goto again; | ||
930 | } | ||
931 | return NULL; | ||
932 | } | ||
933 | |||
934 | static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, | ||
935 | struct btrfs_root *root, | ||
936 | struct list_head *cluster) | ||
937 | { | ||
938 | struct btrfs_delayed_ref_root *delayed_refs; | ||
939 | struct btrfs_delayed_ref_node *ref; | ||
940 | struct btrfs_delayed_ref_head *locked_ref = NULL; | ||
1378 | int ret; | 941 | int ret; |
1379 | struct btrfs_key key; | 942 | int count = 0; |
1380 | struct extent_buffer *l; | 943 | int must_insert_reserved = 0; |
1381 | struct btrfs_extent_item *item; | ||
1382 | 944 | ||
1383 | WARN_ON(num_bytes < root->sectorsize); | 945 | delayed_refs = &trans->transaction->delayed_refs; |
1384 | path = btrfs_alloc_path(); | 946 | while (1) { |
1385 | path->reada = 1; | 947 | if (!locked_ref) { |
1386 | key.objectid = bytenr; | 948 | /* pick a new head ref from the cluster list */ |
1387 | key.offset = num_bytes; | 949 | if (list_empty(cluster)) |
1388 | btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY); | 950 | break; |
1389 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path, | 951 | |
1390 | 0, 0); | 952 | locked_ref = list_entry(cluster->next, |
1391 | if (ret < 0) | 953 | struct btrfs_delayed_ref_head, cluster); |
1392 | goto out; | 954 | |
1393 | if (ret != 0) { | 955 | /* grab the lock that says we are going to process |
1394 | btrfs_print_leaf(root, path->nodes[0]); | 956 | * all the refs for this head */ |
1395 | printk(KERN_INFO "btrfs failed to find block number %llu\n", | 957 | ret = btrfs_delayed_ref_lock(trans, locked_ref); |
1396 | (unsigned long long)bytenr); | 958 | |
1397 | BUG(); | 959 | /* |
960 | * we may have dropped the spin lock to get the head | ||
961 | * mutex lock, and that might have given someone else | ||
962 | * time to free the head. If that's true, it has been | ||
963 | * removed from our list and we can move on. | ||
964 | */ | ||
965 | if (ret == -EAGAIN) { | ||
966 | locked_ref = NULL; | ||
967 | count++; | ||
968 | continue; | ||
969 | } | ||
970 | } | ||
971 | |||
972 | /* | ||
973 | * record the must insert reserved flag before we | ||
974 | * drop the spin lock. | ||
975 | */ | ||
976 | must_insert_reserved = locked_ref->must_insert_reserved; | ||
977 | locked_ref->must_insert_reserved = 0; | ||
978 | |||
979 | /* | ||
980 | * locked_ref is the head node, so we have to go one | ||
981 | * node back for any delayed ref updates | ||
982 | */ | ||
983 | ref = select_delayed_ref(locked_ref); | ||
984 | if (!ref) { | ||
985 | /* All delayed refs have been processed, Go ahead | ||
986 | * and send the head node to run_one_delayed_ref, | ||
987 | * so that any accounting fixes can happen | ||
988 | */ | ||
989 | ref = &locked_ref->node; | ||
990 | list_del_init(&locked_ref->cluster); | ||
991 | locked_ref = NULL; | ||
992 | } | ||
993 | |||
994 | ref->in_tree = 0; | ||
995 | rb_erase(&ref->rb_node, &delayed_refs->root); | ||
996 | delayed_refs->num_entries--; | ||
997 | spin_unlock(&delayed_refs->lock); | ||
998 | |||
999 | ret = run_one_delayed_ref(trans, root, ref, | ||
1000 | must_insert_reserved); | ||
1001 | BUG_ON(ret); | ||
1002 | btrfs_put_delayed_ref(ref); | ||
1003 | |||
1004 | count++; | ||
1005 | cond_resched(); | ||
1006 | spin_lock(&delayed_refs->lock); | ||
1007 | } | ||
1008 | return count; | ||
1009 | } | ||
1010 | |||
1011 | /* | ||
1012 | * this starts processing the delayed reference count updates and | ||
1013 | * extent insertions we have queued up so far. count can be | ||
1014 | * 0, which means to process everything in the tree at the start | ||
1015 | * of the run (but not newly added entries), or it can be some target | ||
1016 | * number you'd like to process. | ||
1017 | */ | ||
1018 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | ||
1019 | struct btrfs_root *root, unsigned long count) | ||
1020 | { | ||
1021 | struct rb_node *node; | ||
1022 | struct btrfs_delayed_ref_root *delayed_refs; | ||
1023 | struct btrfs_delayed_ref_node *ref; | ||
1024 | struct list_head cluster; | ||
1025 | int ret; | ||
1026 | int run_all = count == (unsigned long)-1; | ||
1027 | int run_most = 0; | ||
1028 | |||
1029 | if (root == root->fs_info->extent_root) | ||
1030 | root = root->fs_info->tree_root; | ||
1031 | |||
1032 | delayed_refs = &trans->transaction->delayed_refs; | ||
1033 | INIT_LIST_HEAD(&cluster); | ||
1034 | again: | ||
1035 | spin_lock(&delayed_refs->lock); | ||
1036 | if (count == 0) { | ||
1037 | count = delayed_refs->num_entries * 2; | ||
1038 | run_most = 1; | ||
1039 | } | ||
1040 | while (1) { | ||
1041 | if (!(run_all || run_most) && | ||
1042 | delayed_refs->num_heads_ready < 64) | ||
1043 | break; | ||
1044 | |||
1045 | /* | ||
1046 | * go find something we can process in the rbtree. We start at | ||
1047 | * the beginning of the tree, and then build a cluster | ||
1048 | * of refs to process starting at the first one we are able to | ||
1049 | * lock | ||
1050 | */ | ||
1051 | ret = btrfs_find_ref_cluster(trans, &cluster, | ||
1052 | delayed_refs->run_delayed_start); | ||
1053 | if (ret) | ||
1054 | break; | ||
1055 | |||
1056 | ret = run_clustered_refs(trans, root, &cluster); | ||
1057 | BUG_ON(ret < 0); | ||
1058 | |||
1059 | count -= min_t(unsigned long, ret, count); | ||
1060 | |||
1061 | if (count == 0) | ||
1062 | break; | ||
1063 | } | ||
1064 | |||
1065 | if (run_all) { | ||
1066 | node = rb_first(&delayed_refs->root); | ||
1067 | if (!node) | ||
1068 | goto out; | ||
1069 | count = (unsigned long)-1; | ||
1070 | |||
1071 | while (node) { | ||
1072 | ref = rb_entry(node, struct btrfs_delayed_ref_node, | ||
1073 | rb_node); | ||
1074 | if (btrfs_delayed_ref_is_head(ref)) { | ||
1075 | struct btrfs_delayed_ref_head *head; | ||
1076 | |||
1077 | head = btrfs_delayed_node_to_head(ref); | ||
1078 | atomic_inc(&ref->refs); | ||
1079 | |||
1080 | spin_unlock(&delayed_refs->lock); | ||
1081 | mutex_lock(&head->mutex); | ||
1082 | mutex_unlock(&head->mutex); | ||
1083 | |||
1084 | btrfs_put_delayed_ref(ref); | ||
1085 | cond_resched(); | ||
1086 | goto again; | ||
1087 | } | ||
1088 | node = rb_next(node); | ||
1089 | } | ||
1090 | spin_unlock(&delayed_refs->lock); | ||
1091 | schedule_timeout(1); | ||
1092 | goto again; | ||
1398 | } | 1093 | } |
1399 | l = path->nodes[0]; | ||
1400 | item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item); | ||
1401 | *refs = btrfs_extent_refs(l, item); | ||
1402 | out: | 1094 | out: |
1403 | btrfs_free_path(path); | 1095 | spin_unlock(&delayed_refs->lock); |
1404 | return 0; | 1096 | return 0; |
1405 | } | 1097 | } |
1406 | 1098 | ||
@@ -1624,7 +1316,7 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans, | |||
1624 | int refi = 0; | 1316 | int refi = 0; |
1625 | int slot; | 1317 | int slot; |
1626 | int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, | 1318 | int (*process_func)(struct btrfs_trans_handle *, struct btrfs_root *, |
1627 | u64, u64, u64, u64, u64, u64, u64, u64); | 1319 | u64, u64, u64, u64, u64, u64, u64, u64, u64); |
1628 | 1320 | ||
1629 | ref_root = btrfs_header_owner(buf); | 1321 | ref_root = btrfs_header_owner(buf); |
1630 | ref_generation = btrfs_header_generation(buf); | 1322 | ref_generation = btrfs_header_generation(buf); |
@@ -1696,12 +1388,19 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans, | |||
1696 | 1388 | ||
1697 | if (level == 0) { | 1389 | if (level == 0) { |
1698 | btrfs_item_key_to_cpu(buf, &key, slot); | 1390 | btrfs_item_key_to_cpu(buf, &key, slot); |
1391 | fi = btrfs_item_ptr(buf, slot, | ||
1392 | struct btrfs_file_extent_item); | ||
1393 | |||
1394 | bytenr = btrfs_file_extent_disk_bytenr(buf, fi); | ||
1395 | if (bytenr == 0) | ||
1396 | continue; | ||
1699 | 1397 | ||
1700 | ret = process_func(trans, root, bytenr, | 1398 | ret = process_func(trans, root, bytenr, |
1701 | orig_buf->start, buf->start, | 1399 | btrfs_file_extent_disk_num_bytes(buf, fi), |
1702 | orig_root, ref_root, | 1400 | orig_buf->start, buf->start, |
1703 | orig_generation, ref_generation, | 1401 | orig_root, ref_root, |
1704 | key.objectid); | 1402 | orig_generation, ref_generation, |
1403 | key.objectid); | ||
1705 | 1404 | ||
1706 | if (ret) { | 1405 | if (ret) { |
1707 | faili = slot; | 1406 | faili = slot; |
@@ -1709,7 +1408,7 @@ noinline int btrfs_inc_ref(struct btrfs_trans_handle *trans, | |||
1709 | goto fail; | 1408 | goto fail; |
1710 | } | 1409 | } |
1711 | } else { | 1410 | } else { |
1712 | ret = process_func(trans, root, bytenr, | 1411 | ret = process_func(trans, root, bytenr, buf->len, |
1713 | orig_buf->start, buf->start, | 1412 | orig_buf->start, buf->start, |
1714 | orig_root, ref_root, | 1413 | orig_root, ref_root, |
1715 | orig_generation, ref_generation, | 1414 | orig_generation, ref_generation, |
@@ -1786,17 +1485,17 @@ int btrfs_update_ref(struct btrfs_trans_handle *trans, | |||
1786 | if (bytenr == 0) | 1485 | if (bytenr == 0) |
1787 | continue; | 1486 | continue; |
1788 | ret = __btrfs_update_extent_ref(trans, root, bytenr, | 1487 | ret = __btrfs_update_extent_ref(trans, root, bytenr, |
1789 | orig_buf->start, buf->start, | 1488 | btrfs_file_extent_disk_num_bytes(buf, fi), |
1790 | orig_root, ref_root, | 1489 | orig_buf->start, buf->start, |
1791 | orig_generation, ref_generation, | 1490 | orig_root, ref_root, orig_generation, |
1792 | key.objectid); | 1491 | ref_generation, key.objectid); |
1793 | if (ret) | 1492 | if (ret) |
1794 | goto fail; | 1493 | goto fail; |
1795 | } else { | 1494 | } else { |
1796 | bytenr = btrfs_node_blockptr(buf, slot); | 1495 | bytenr = btrfs_node_blockptr(buf, slot); |
1797 | ret = __btrfs_update_extent_ref(trans, root, bytenr, | 1496 | ret = __btrfs_update_extent_ref(trans, root, bytenr, |
1798 | orig_buf->start, buf->start, | 1497 | buf->len, orig_buf->start, |
1799 | orig_root, ref_root, | 1498 | buf->start, orig_root, ref_root, |
1800 | orig_generation, ref_generation, | 1499 | orig_generation, ref_generation, |
1801 | level - 1); | 1500 | level - 1); |
1802 | if (ret) | 1501 | if (ret) |
@@ -1815,7 +1514,6 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, | |||
1815 | struct btrfs_block_group_cache *cache) | 1514 | struct btrfs_block_group_cache *cache) |
1816 | { | 1515 | { |
1817 | int ret; | 1516 | int ret; |
1818 | int pending_ret; | ||
1819 | struct btrfs_root *extent_root = root->fs_info->extent_root; | 1517 | struct btrfs_root *extent_root = root->fs_info->extent_root; |
1820 | unsigned long bi; | 1518 | unsigned long bi; |
1821 | struct extent_buffer *leaf; | 1519 | struct extent_buffer *leaf; |
@@ -1831,12 +1529,8 @@ static int write_one_cache_group(struct btrfs_trans_handle *trans, | |||
1831 | btrfs_mark_buffer_dirty(leaf); | 1529 | btrfs_mark_buffer_dirty(leaf); |
1832 | btrfs_release_path(extent_root, path); | 1530 | btrfs_release_path(extent_root, path); |
1833 | fail: | 1531 | fail: |
1834 | finish_current_insert(trans, extent_root, 0); | ||
1835 | pending_ret = del_pending_extents(trans, extent_root, 0); | ||
1836 | if (ret) | 1532 | if (ret) |
1837 | return ret; | 1533 | return ret; |
1838 | if (pending_ret) | ||
1839 | return pending_ret; | ||
1840 | return 0; | 1534 | return 0; |
1841 | 1535 | ||
1842 | } | 1536 | } |
@@ -2361,6 +2055,8 @@ int btrfs_update_pinned_extents(struct btrfs_root *root, | |||
2361 | clear_extent_dirty(&fs_info->pinned_extents, | 2055 | clear_extent_dirty(&fs_info->pinned_extents, |
2362 | bytenr, bytenr + num - 1, GFP_NOFS); | 2056 | bytenr, bytenr + num - 1, GFP_NOFS); |
2363 | } | 2057 | } |
2058 | mutex_unlock(&root->fs_info->pinned_mutex); | ||
2059 | |||
2364 | while (num > 0) { | 2060 | while (num > 0) { |
2365 | cache = btrfs_lookup_block_group(fs_info, bytenr); | 2061 | cache = btrfs_lookup_block_group(fs_info, bytenr); |
2366 | BUG_ON(!cache); | 2062 | BUG_ON(!cache); |
@@ -2452,8 +2148,8 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
2452 | u64 end; | 2148 | u64 end; |
2453 | int ret; | 2149 | int ret; |
2454 | 2150 | ||
2455 | mutex_lock(&root->fs_info->pinned_mutex); | ||
2456 | while (1) { | 2151 | while (1) { |
2152 | mutex_lock(&root->fs_info->pinned_mutex); | ||
2457 | ret = find_first_extent_bit(unpin, 0, &start, &end, | 2153 | ret = find_first_extent_bit(unpin, 0, &start, &end, |
2458 | EXTENT_DIRTY); | 2154 | EXTENT_DIRTY); |
2459 | if (ret) | 2155 | if (ret) |
@@ -2461,209 +2157,21 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
2461 | 2157 | ||
2462 | ret = btrfs_discard_extent(root, start, end + 1 - start); | 2158 | ret = btrfs_discard_extent(root, start, end + 1 - start); |
2463 | 2159 | ||
2160 | /* unlocks the pinned mutex */ | ||
2464 | btrfs_update_pinned_extents(root, start, end + 1 - start, 0); | 2161 | btrfs_update_pinned_extents(root, start, end + 1 - start, 0); |
2465 | clear_extent_dirty(unpin, start, end, GFP_NOFS); | 2162 | clear_extent_dirty(unpin, start, end, GFP_NOFS); |
2466 | 2163 | ||
2467 | if (need_resched()) { | 2164 | cond_resched(); |
2468 | mutex_unlock(&root->fs_info->pinned_mutex); | ||
2469 | cond_resched(); | ||
2470 | mutex_lock(&root->fs_info->pinned_mutex); | ||
2471 | } | ||
2472 | } | 2165 | } |
2473 | mutex_unlock(&root->fs_info->pinned_mutex); | 2166 | mutex_unlock(&root->fs_info->pinned_mutex); |
2474 | return ret; | 2167 | return ret; |
2475 | } | 2168 | } |
2476 | 2169 | ||
2477 | static int finish_current_insert(struct btrfs_trans_handle *trans, | ||
2478 | struct btrfs_root *extent_root, int all) | ||
2479 | { | ||
2480 | u64 start; | ||
2481 | u64 end; | ||
2482 | u64 priv; | ||
2483 | u64 search = 0; | ||
2484 | struct btrfs_fs_info *info = extent_root->fs_info; | ||
2485 | struct btrfs_path *path; | ||
2486 | struct pending_extent_op *extent_op, *tmp; | ||
2487 | struct list_head insert_list, update_list; | ||
2488 | int ret; | ||
2489 | int num_inserts = 0, max_inserts, restart = 0; | ||
2490 | |||
2491 | path = btrfs_alloc_path(); | ||
2492 | INIT_LIST_HEAD(&insert_list); | ||
2493 | INIT_LIST_HEAD(&update_list); | ||
2494 | |||
2495 | max_inserts = extent_root->leafsize / | ||
2496 | (2 * sizeof(struct btrfs_key) + 2 * sizeof(struct btrfs_item) + | ||
2497 | sizeof(struct btrfs_extent_ref) + | ||
2498 | sizeof(struct btrfs_extent_item)); | ||
2499 | again: | ||
2500 | mutex_lock(&info->extent_ins_mutex); | ||
2501 | while (1) { | ||
2502 | ret = find_first_extent_bit(&info->extent_ins, search, &start, | ||
2503 | &end, EXTENT_WRITEBACK); | ||
2504 | if (ret) { | ||
2505 | if (restart && !num_inserts && | ||
2506 | list_empty(&update_list)) { | ||
2507 | restart = 0; | ||
2508 | search = 0; | ||
2509 | continue; | ||
2510 | } | ||
2511 | break; | ||
2512 | } | ||
2513 | |||
2514 | ret = try_lock_extent(&info->extent_ins, start, end, GFP_NOFS); | ||
2515 | if (!ret) { | ||
2516 | if (all) | ||
2517 | restart = 1; | ||
2518 | search = end + 1; | ||
2519 | if (need_resched()) { | ||
2520 | mutex_unlock(&info->extent_ins_mutex); | ||
2521 | cond_resched(); | ||
2522 | mutex_lock(&info->extent_ins_mutex); | ||
2523 | } | ||
2524 | continue; | ||
2525 | } | ||
2526 | |||
2527 | ret = get_state_private(&info->extent_ins, start, &priv); | ||
2528 | BUG_ON(ret); | ||
2529 | extent_op = (struct pending_extent_op *)(unsigned long) priv; | ||
2530 | |||
2531 | if (extent_op->type == PENDING_EXTENT_INSERT) { | ||
2532 | num_inserts++; | ||
2533 | list_add_tail(&extent_op->list, &insert_list); | ||
2534 | search = end + 1; | ||
2535 | if (num_inserts == max_inserts) { | ||
2536 | restart = 1; | ||
2537 | break; | ||
2538 | } | ||
2539 | } else if (extent_op->type == PENDING_BACKREF_UPDATE) { | ||
2540 | list_add_tail(&extent_op->list, &update_list); | ||
2541 | search = end + 1; | ||
2542 | } else { | ||
2543 | BUG(); | ||
2544 | } | ||
2545 | } | ||
2546 | |||
2547 | /* | ||
2548 | * process the update list, clear the writeback bit for it, and if | ||
2549 | * somebody marked this thing for deletion then just unlock it and be | ||
2550 | * done, the free_extents will handle it | ||
2551 | */ | ||
2552 | list_for_each_entry_safe(extent_op, tmp, &update_list, list) { | ||
2553 | clear_extent_bits(&info->extent_ins, extent_op->bytenr, | ||
2554 | extent_op->bytenr + extent_op->num_bytes - 1, | ||
2555 | EXTENT_WRITEBACK, GFP_NOFS); | ||
2556 | if (extent_op->del) { | ||
2557 | list_del_init(&extent_op->list); | ||
2558 | unlock_extent(&info->extent_ins, extent_op->bytenr, | ||
2559 | extent_op->bytenr + extent_op->num_bytes | ||
2560 | - 1, GFP_NOFS); | ||
2561 | kfree(extent_op); | ||
2562 | } | ||
2563 | } | ||
2564 | mutex_unlock(&info->extent_ins_mutex); | ||
2565 | |||
2566 | /* | ||
2567 | * still have things left on the update list, go ahead an update | ||
2568 | * everything | ||
2569 | */ | ||
2570 | if (!list_empty(&update_list)) { | ||
2571 | ret = update_backrefs(trans, extent_root, path, &update_list); | ||
2572 | BUG_ON(ret); | ||
2573 | |||
2574 | /* we may have COW'ed new blocks, so lets start over */ | ||
2575 | if (all) | ||
2576 | restart = 1; | ||
2577 | } | ||
2578 | |||
2579 | /* | ||
2580 | * if no inserts need to be done, but we skipped some extents and we | ||
2581 | * need to make sure everything is cleaned then reset everything and | ||
2582 | * go back to the beginning | ||
2583 | */ | ||
2584 | if (!num_inserts && restart) { | ||
2585 | search = 0; | ||
2586 | restart = 0; | ||
2587 | INIT_LIST_HEAD(&update_list); | ||
2588 | INIT_LIST_HEAD(&insert_list); | ||
2589 | goto again; | ||
2590 | } else if (!num_inserts) { | ||
2591 | goto out; | ||
2592 | } | ||
2593 | |||
2594 | /* | ||
2595 | * process the insert extents list. Again if we are deleting this | ||
2596 | * extent, then just unlock it, pin down the bytes if need be, and be | ||
2597 | * done with it. Saves us from having to actually insert the extent | ||
2598 | * into the tree and then subsequently come along and delete it | ||
2599 | */ | ||
2600 | mutex_lock(&info->extent_ins_mutex); | ||
2601 | list_for_each_entry_safe(extent_op, tmp, &insert_list, list) { | ||
2602 | clear_extent_bits(&info->extent_ins, extent_op->bytenr, | ||
2603 | extent_op->bytenr + extent_op->num_bytes - 1, | ||
2604 | EXTENT_WRITEBACK, GFP_NOFS); | ||
2605 | if (extent_op->del) { | ||
2606 | u64 used; | ||
2607 | list_del_init(&extent_op->list); | ||
2608 | unlock_extent(&info->extent_ins, extent_op->bytenr, | ||
2609 | extent_op->bytenr + extent_op->num_bytes | ||
2610 | - 1, GFP_NOFS); | ||
2611 | |||
2612 | mutex_lock(&extent_root->fs_info->pinned_mutex); | ||
2613 | ret = pin_down_bytes(trans, extent_root, | ||
2614 | extent_op->bytenr, | ||
2615 | extent_op->num_bytes, 0); | ||
2616 | mutex_unlock(&extent_root->fs_info->pinned_mutex); | ||
2617 | |||
2618 | spin_lock(&info->delalloc_lock); | ||
2619 | used = btrfs_super_bytes_used(&info->super_copy); | ||
2620 | btrfs_set_super_bytes_used(&info->super_copy, | ||
2621 | used - extent_op->num_bytes); | ||
2622 | used = btrfs_root_used(&extent_root->root_item); | ||
2623 | btrfs_set_root_used(&extent_root->root_item, | ||
2624 | used - extent_op->num_bytes); | ||
2625 | spin_unlock(&info->delalloc_lock); | ||
2626 | |||
2627 | ret = update_block_group(trans, extent_root, | ||
2628 | extent_op->bytenr, | ||
2629 | extent_op->num_bytes, | ||
2630 | 0, ret > 0); | ||
2631 | BUG_ON(ret); | ||
2632 | kfree(extent_op); | ||
2633 | num_inserts--; | ||
2634 | } | ||
2635 | } | ||
2636 | mutex_unlock(&info->extent_ins_mutex); | ||
2637 | |||
2638 | ret = insert_extents(trans, extent_root, path, &insert_list, | ||
2639 | num_inserts); | ||
2640 | BUG_ON(ret); | ||
2641 | |||
2642 | /* | ||
2643 | * if restart is set for whatever reason we need to go back and start | ||
2644 | * searching through the pending list again. | ||
2645 | * | ||
2646 | * We just inserted some extents, which could have resulted in new | ||
2647 | * blocks being allocated, which would result in new blocks needing | ||
2648 | * updates, so if all is set we _must_ restart to get the updated | ||
2649 | * blocks. | ||
2650 | */ | ||
2651 | if (restart || all) { | ||
2652 | INIT_LIST_HEAD(&insert_list); | ||
2653 | INIT_LIST_HEAD(&update_list); | ||
2654 | search = 0; | ||
2655 | restart = 0; | ||
2656 | num_inserts = 0; | ||
2657 | goto again; | ||
2658 | } | ||
2659 | out: | ||
2660 | btrfs_free_path(path); | ||
2661 | return 0; | ||
2662 | } | ||
2663 | |||
2664 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | 2170 | static int pin_down_bytes(struct btrfs_trans_handle *trans, |
2665 | struct btrfs_root *root, | 2171 | struct btrfs_root *root, |
2666 | u64 bytenr, u64 num_bytes, int is_data) | 2172 | struct btrfs_path *path, |
2173 | u64 bytenr, u64 num_bytes, int is_data, | ||
2174 | struct extent_buffer **must_clean) | ||
2667 | { | 2175 | { |
2668 | int err = 0; | 2176 | int err = 0; |
2669 | struct extent_buffer *buf; | 2177 | struct extent_buffer *buf; |
@@ -2686,17 +2194,19 @@ static int pin_down_bytes(struct btrfs_trans_handle *trans, | |||
2686 | u64 header_transid = btrfs_header_generation(buf); | 2194 | u64 header_transid = btrfs_header_generation(buf); |
2687 | if (header_owner != BTRFS_TREE_LOG_OBJECTID && | 2195 | if (header_owner != BTRFS_TREE_LOG_OBJECTID && |
2688 | header_owner != BTRFS_TREE_RELOC_OBJECTID && | 2196 | header_owner != BTRFS_TREE_RELOC_OBJECTID && |
2197 | header_owner != BTRFS_DATA_RELOC_TREE_OBJECTID && | ||
2689 | header_transid == trans->transid && | 2198 | header_transid == trans->transid && |
2690 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | 2199 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { |
2691 | clean_tree_block(NULL, root, buf); | 2200 | *must_clean = buf; |
2692 | btrfs_tree_unlock(buf); | ||
2693 | free_extent_buffer(buf); | ||
2694 | return 1; | 2201 | return 1; |
2695 | } | 2202 | } |
2696 | btrfs_tree_unlock(buf); | 2203 | btrfs_tree_unlock(buf); |
2697 | } | 2204 | } |
2698 | free_extent_buffer(buf); | 2205 | free_extent_buffer(buf); |
2699 | pinit: | 2206 | pinit: |
2207 | btrfs_set_path_blocking(path); | ||
2208 | mutex_lock(&root->fs_info->pinned_mutex); | ||
2209 | /* unlocks the pinned mutex */ | ||
2700 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | 2210 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); |
2701 | 2211 | ||
2702 | BUG_ON(err < 0); | 2212 | BUG_ON(err < 0); |
@@ -2710,7 +2220,8 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2710 | struct btrfs_root *root, | 2220 | struct btrfs_root *root, |
2711 | u64 bytenr, u64 num_bytes, u64 parent, | 2221 | u64 bytenr, u64 num_bytes, u64 parent, |
2712 | u64 root_objectid, u64 ref_generation, | 2222 | u64 root_objectid, u64 ref_generation, |
2713 | u64 owner_objectid, int pin, int mark_free) | 2223 | u64 owner_objectid, int pin, int mark_free, |
2224 | int refs_to_drop) | ||
2714 | { | 2225 | { |
2715 | struct btrfs_path *path; | 2226 | struct btrfs_path *path; |
2716 | struct btrfs_key key; | 2227 | struct btrfs_key key; |
@@ -2732,6 +2243,7 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2732 | return -ENOMEM; | 2243 | return -ENOMEM; |
2733 | 2244 | ||
2734 | path->reada = 1; | 2245 | path->reada = 1; |
2246 | path->leave_spinning = 1; | ||
2735 | ret = lookup_extent_backref(trans, extent_root, path, | 2247 | ret = lookup_extent_backref(trans, extent_root, path, |
2736 | bytenr, parent, root_objectid, | 2248 | bytenr, parent, root_objectid, |
2737 | ref_generation, owner_objectid, 1); | 2249 | ref_generation, owner_objectid, 1); |
@@ -2753,9 +2265,11 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2753 | break; | 2265 | break; |
2754 | } | 2266 | } |
2755 | if (!found_extent) { | 2267 | if (!found_extent) { |
2756 | ret = remove_extent_backref(trans, extent_root, path); | 2268 | ret = remove_extent_backref(trans, extent_root, path, |
2269 | refs_to_drop); | ||
2757 | BUG_ON(ret); | 2270 | BUG_ON(ret); |
2758 | btrfs_release_path(extent_root, path); | 2271 | btrfs_release_path(extent_root, path); |
2272 | path->leave_spinning = 1; | ||
2759 | ret = btrfs_search_slot(trans, extent_root, | 2273 | ret = btrfs_search_slot(trans, extent_root, |
2760 | &key, path, -1, 1); | 2274 | &key, path, -1, 1); |
2761 | if (ret) { | 2275 | if (ret) { |
@@ -2771,8 +2285,9 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2771 | btrfs_print_leaf(extent_root, path->nodes[0]); | 2285 | btrfs_print_leaf(extent_root, path->nodes[0]); |
2772 | WARN_ON(1); | 2286 | WARN_ON(1); |
2773 | printk(KERN_ERR "btrfs unable to find ref byte nr %llu " | 2287 | printk(KERN_ERR "btrfs unable to find ref byte nr %llu " |
2774 | "root %llu gen %llu owner %llu\n", | 2288 | "parent %llu root %llu gen %llu owner %llu\n", |
2775 | (unsigned long long)bytenr, | 2289 | (unsigned long long)bytenr, |
2290 | (unsigned long long)parent, | ||
2776 | (unsigned long long)root_objectid, | 2291 | (unsigned long long)root_objectid, |
2777 | (unsigned long long)ref_generation, | 2292 | (unsigned long long)ref_generation, |
2778 | (unsigned long long)owner_objectid); | 2293 | (unsigned long long)owner_objectid); |
@@ -2782,17 +2297,23 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2782 | ei = btrfs_item_ptr(leaf, extent_slot, | 2297 | ei = btrfs_item_ptr(leaf, extent_slot, |
2783 | struct btrfs_extent_item); | 2298 | struct btrfs_extent_item); |
2784 | refs = btrfs_extent_refs(leaf, ei); | 2299 | refs = btrfs_extent_refs(leaf, ei); |
2785 | BUG_ON(refs == 0); | ||
2786 | refs -= 1; | ||
2787 | btrfs_set_extent_refs(leaf, ei, refs); | ||
2788 | 2300 | ||
2301 | /* | ||
2302 | * we're not allowed to delete the extent item if there | ||
2303 | * are other delayed ref updates pending | ||
2304 | */ | ||
2305 | |||
2306 | BUG_ON(refs < refs_to_drop); | ||
2307 | refs -= refs_to_drop; | ||
2308 | btrfs_set_extent_refs(leaf, ei, refs); | ||
2789 | btrfs_mark_buffer_dirty(leaf); | 2309 | btrfs_mark_buffer_dirty(leaf); |
2790 | 2310 | ||
2791 | if (refs == 0 && found_extent && path->slots[0] == extent_slot + 1) { | 2311 | if (refs == 0 && found_extent && |
2312 | path->slots[0] == extent_slot + 1) { | ||
2792 | struct btrfs_extent_ref *ref; | 2313 | struct btrfs_extent_ref *ref; |
2793 | ref = btrfs_item_ptr(leaf, path->slots[0], | 2314 | ref = btrfs_item_ptr(leaf, path->slots[0], |
2794 | struct btrfs_extent_ref); | 2315 | struct btrfs_extent_ref); |
2795 | BUG_ON(btrfs_ref_num_refs(leaf, ref) != 1); | 2316 | BUG_ON(btrfs_ref_num_refs(leaf, ref) != refs_to_drop); |
2796 | /* if the back ref and the extent are next to each other | 2317 | /* if the back ref and the extent are next to each other |
2797 | * they get deleted below in one shot | 2318 | * they get deleted below in one shot |
2798 | */ | 2319 | */ |
@@ -2800,11 +2321,13 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2800 | num_to_del = 2; | 2321 | num_to_del = 2; |
2801 | } else if (found_extent) { | 2322 | } else if (found_extent) { |
2802 | /* otherwise delete the extent back ref */ | 2323 | /* otherwise delete the extent back ref */ |
2803 | ret = remove_extent_backref(trans, extent_root, path); | 2324 | ret = remove_extent_backref(trans, extent_root, path, |
2325 | refs_to_drop); | ||
2804 | BUG_ON(ret); | 2326 | BUG_ON(ret); |
2805 | /* if refs are 0, we need to setup the path for deletion */ | 2327 | /* if refs are 0, we need to setup the path for deletion */ |
2806 | if (refs == 0) { | 2328 | if (refs == 0) { |
2807 | btrfs_release_path(extent_root, path); | 2329 | btrfs_release_path(extent_root, path); |
2330 | path->leave_spinning = 1; | ||
2808 | ret = btrfs_search_slot(trans, extent_root, &key, path, | 2331 | ret = btrfs_search_slot(trans, extent_root, &key, path, |
2809 | -1, 1); | 2332 | -1, 1); |
2810 | BUG_ON(ret); | 2333 | BUG_ON(ret); |
@@ -2814,16 +2337,18 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2814 | if (refs == 0) { | 2337 | if (refs == 0) { |
2815 | u64 super_used; | 2338 | u64 super_used; |
2816 | u64 root_used; | 2339 | u64 root_used; |
2340 | struct extent_buffer *must_clean = NULL; | ||
2817 | 2341 | ||
2818 | if (pin) { | 2342 | if (pin) { |
2819 | mutex_lock(&root->fs_info->pinned_mutex); | 2343 | ret = pin_down_bytes(trans, root, path, |
2820 | ret = pin_down_bytes(trans, root, bytenr, num_bytes, | 2344 | bytenr, num_bytes, |
2821 | owner_objectid >= BTRFS_FIRST_FREE_OBJECTID); | 2345 | owner_objectid >= BTRFS_FIRST_FREE_OBJECTID, |
2822 | mutex_unlock(&root->fs_info->pinned_mutex); | 2346 | &must_clean); |
2823 | if (ret > 0) | 2347 | if (ret > 0) |
2824 | mark_free = 1; | 2348 | mark_free = 1; |
2825 | BUG_ON(ret < 0); | 2349 | BUG_ON(ret < 0); |
2826 | } | 2350 | } |
2351 | |||
2827 | /* block accounting for super block */ | 2352 | /* block accounting for super block */ |
2828 | spin_lock(&info->delalloc_lock); | 2353 | spin_lock(&info->delalloc_lock); |
2829 | super_used = btrfs_super_bytes_used(&info->super_copy); | 2354 | super_used = btrfs_super_bytes_used(&info->super_copy); |
@@ -2835,14 +2360,34 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2835 | btrfs_set_root_used(&root->root_item, | 2360 | btrfs_set_root_used(&root->root_item, |
2836 | root_used - num_bytes); | 2361 | root_used - num_bytes); |
2837 | spin_unlock(&info->delalloc_lock); | 2362 | spin_unlock(&info->delalloc_lock); |
2363 | |||
2364 | /* | ||
2365 | * it is going to be very rare for someone to be waiting | ||
2366 | * on the block we're freeing. del_items might need to | ||
2367 | * schedule, so rather than get fancy, just force it | ||
2368 | * to blocking here | ||
2369 | */ | ||
2370 | if (must_clean) | ||
2371 | btrfs_set_lock_blocking(must_clean); | ||
2372 | |||
2838 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], | 2373 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], |
2839 | num_to_del); | 2374 | num_to_del); |
2840 | BUG_ON(ret); | 2375 | BUG_ON(ret); |
2841 | btrfs_release_path(extent_root, path); | 2376 | btrfs_release_path(extent_root, path); |
2842 | 2377 | ||
2378 | if (must_clean) { | ||
2379 | clean_tree_block(NULL, root, must_clean); | ||
2380 | btrfs_tree_unlock(must_clean); | ||
2381 | free_extent_buffer(must_clean); | ||
2382 | } | ||
2383 | |||
2843 | if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { | 2384 | if (owner_objectid >= BTRFS_FIRST_FREE_OBJECTID) { |
2844 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); | 2385 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); |
2845 | BUG_ON(ret); | 2386 | BUG_ON(ret); |
2387 | } else { | ||
2388 | invalidate_mapping_pages(info->btree_inode->i_mapping, | ||
2389 | bytenr >> PAGE_CACHE_SHIFT, | ||
2390 | (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); | ||
2846 | } | 2391 | } |
2847 | 2392 | ||
2848 | ret = update_block_group(trans, root, bytenr, num_bytes, 0, | 2393 | ret = update_block_group(trans, root, bytenr, num_bytes, 0, |
@@ -2850,218 +2395,103 @@ static int __free_extent(struct btrfs_trans_handle *trans, | |||
2850 | BUG_ON(ret); | 2395 | BUG_ON(ret); |
2851 | } | 2396 | } |
2852 | btrfs_free_path(path); | 2397 | btrfs_free_path(path); |
2853 | finish_current_insert(trans, extent_root, 0); | ||
2854 | return ret; | 2398 | return ret; |
2855 | } | 2399 | } |
2856 | 2400 | ||
2857 | /* | 2401 | /* |
2858 | * find all the blocks marked as pending in the radix tree and remove | 2402 | * remove an extent from the root, returns 0 on success |
2859 | * them from the extent map | ||
2860 | */ | 2403 | */ |
2861 | static int del_pending_extents(struct btrfs_trans_handle *trans, | 2404 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
2862 | struct btrfs_root *extent_root, int all) | 2405 | struct btrfs_root *root, |
2406 | u64 bytenr, u64 num_bytes, u64 parent, | ||
2407 | u64 root_objectid, u64 ref_generation, | ||
2408 | u64 owner_objectid, int pin, | ||
2409 | int refs_to_drop) | ||
2863 | { | 2410 | { |
2864 | int ret; | 2411 | WARN_ON(num_bytes < root->sectorsize); |
2865 | int err = 0; | ||
2866 | u64 start; | ||
2867 | u64 end; | ||
2868 | u64 priv; | ||
2869 | u64 search = 0; | ||
2870 | int nr = 0, skipped = 0; | ||
2871 | struct extent_io_tree *pending_del; | ||
2872 | struct extent_io_tree *extent_ins; | ||
2873 | struct pending_extent_op *extent_op; | ||
2874 | struct btrfs_fs_info *info = extent_root->fs_info; | ||
2875 | struct list_head delete_list; | ||
2876 | |||
2877 | INIT_LIST_HEAD(&delete_list); | ||
2878 | extent_ins = &extent_root->fs_info->extent_ins; | ||
2879 | pending_del = &extent_root->fs_info->pending_del; | ||
2880 | |||
2881 | again: | ||
2882 | mutex_lock(&info->extent_ins_mutex); | ||
2883 | while (1) { | ||
2884 | ret = find_first_extent_bit(pending_del, search, &start, &end, | ||
2885 | EXTENT_WRITEBACK); | ||
2886 | if (ret) { | ||
2887 | if (all && skipped && !nr) { | ||
2888 | search = 0; | ||
2889 | skipped = 0; | ||
2890 | continue; | ||
2891 | } | ||
2892 | mutex_unlock(&info->extent_ins_mutex); | ||
2893 | break; | ||
2894 | } | ||
2895 | |||
2896 | ret = try_lock_extent(extent_ins, start, end, GFP_NOFS); | ||
2897 | if (!ret) { | ||
2898 | search = end+1; | ||
2899 | skipped = 1; | ||
2900 | |||
2901 | if (need_resched()) { | ||
2902 | mutex_unlock(&info->extent_ins_mutex); | ||
2903 | cond_resched(); | ||
2904 | mutex_lock(&info->extent_ins_mutex); | ||
2905 | } | ||
2906 | |||
2907 | continue; | ||
2908 | } | ||
2909 | BUG_ON(ret < 0); | ||
2910 | |||
2911 | ret = get_state_private(pending_del, start, &priv); | ||
2912 | BUG_ON(ret); | ||
2913 | extent_op = (struct pending_extent_op *)(unsigned long)priv; | ||
2914 | |||
2915 | clear_extent_bits(pending_del, start, end, EXTENT_WRITEBACK, | ||
2916 | GFP_NOFS); | ||
2917 | if (!test_range_bit(extent_ins, start, end, | ||
2918 | EXTENT_WRITEBACK, 0)) { | ||
2919 | list_add_tail(&extent_op->list, &delete_list); | ||
2920 | nr++; | ||
2921 | } else { | ||
2922 | kfree(extent_op); | ||
2923 | |||
2924 | ret = get_state_private(&info->extent_ins, start, | ||
2925 | &priv); | ||
2926 | BUG_ON(ret); | ||
2927 | extent_op = (struct pending_extent_op *) | ||
2928 | (unsigned long)priv; | ||
2929 | |||
2930 | clear_extent_bits(&info->extent_ins, start, end, | ||
2931 | EXTENT_WRITEBACK, GFP_NOFS); | ||
2932 | |||
2933 | if (extent_op->type == PENDING_BACKREF_UPDATE) { | ||
2934 | list_add_tail(&extent_op->list, &delete_list); | ||
2935 | search = end + 1; | ||
2936 | nr++; | ||
2937 | continue; | ||
2938 | } | ||
2939 | |||
2940 | mutex_lock(&extent_root->fs_info->pinned_mutex); | ||
2941 | ret = pin_down_bytes(trans, extent_root, start, | ||
2942 | end + 1 - start, 0); | ||
2943 | mutex_unlock(&extent_root->fs_info->pinned_mutex); | ||
2944 | |||
2945 | ret = update_block_group(trans, extent_root, start, | ||
2946 | end + 1 - start, 0, ret > 0); | ||
2947 | |||
2948 | unlock_extent(extent_ins, start, end, GFP_NOFS); | ||
2949 | BUG_ON(ret); | ||
2950 | kfree(extent_op); | ||
2951 | } | ||
2952 | if (ret) | ||
2953 | err = ret; | ||
2954 | |||
2955 | search = end + 1; | ||
2956 | |||
2957 | if (need_resched()) { | ||
2958 | mutex_unlock(&info->extent_ins_mutex); | ||
2959 | cond_resched(); | ||
2960 | mutex_lock(&info->extent_ins_mutex); | ||
2961 | } | ||
2962 | } | ||
2963 | 2412 | ||
2964 | if (nr) { | 2413 | /* |
2965 | ret = free_extents(trans, extent_root, &delete_list); | 2414 | * if metadata always pin |
2966 | BUG_ON(ret); | 2415 | * if data pin when any transaction has committed this |
2967 | } | 2416 | */ |
2417 | if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID || | ||
2418 | ref_generation != trans->transid) | ||
2419 | pin = 1; | ||
2968 | 2420 | ||
2969 | if (all && skipped) { | 2421 | if (ref_generation != trans->transid) |
2970 | INIT_LIST_HEAD(&delete_list); | 2422 | pin = 1; |
2971 | search = 0; | ||
2972 | nr = 0; | ||
2973 | goto again; | ||
2974 | } | ||
2975 | 2423 | ||
2976 | if (!err) | 2424 | return __free_extent(trans, root, bytenr, num_bytes, parent, |
2977 | finish_current_insert(trans, extent_root, 0); | 2425 | root_objectid, ref_generation, |
2978 | return err; | 2426 | owner_objectid, pin, pin == 0, refs_to_drop); |
2979 | } | 2427 | } |
2980 | 2428 | ||
2981 | /* | 2429 | /* |
2982 | * remove an extent from the root, returns 0 on success | 2430 | * when we free an extent, it is possible (and likely) that we free the last |
2431 | * delayed ref for that extent as well. This searches the delayed ref tree for | ||
2432 | * a given extent, and if there are no other delayed refs to be processed, it | ||
2433 | * removes it from the tree. | ||
2983 | */ | 2434 | */ |
2984 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 2435 | static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, |
2985 | struct btrfs_root *root, | 2436 | struct btrfs_root *root, u64 bytenr) |
2986 | u64 bytenr, u64 num_bytes, u64 parent, | ||
2987 | u64 root_objectid, u64 ref_generation, | ||
2988 | u64 owner_objectid, int pin) | ||
2989 | { | 2437 | { |
2990 | struct btrfs_root *extent_root = root->fs_info->extent_root; | 2438 | struct btrfs_delayed_ref_head *head; |
2991 | int pending_ret; | 2439 | struct btrfs_delayed_ref_root *delayed_refs; |
2440 | struct btrfs_delayed_ref_node *ref; | ||
2441 | struct rb_node *node; | ||
2992 | int ret; | 2442 | int ret; |
2993 | 2443 | ||
2994 | WARN_ON(num_bytes < root->sectorsize); | 2444 | delayed_refs = &trans->transaction->delayed_refs; |
2995 | if (root == extent_root) { | 2445 | spin_lock(&delayed_refs->lock); |
2996 | struct pending_extent_op *extent_op = NULL; | 2446 | head = btrfs_find_delayed_ref_head(trans, bytenr); |
2997 | 2447 | if (!head) | |
2998 | mutex_lock(&root->fs_info->extent_ins_mutex); | 2448 | goto out; |
2999 | if (test_range_bit(&root->fs_info->extent_ins, bytenr, | ||
3000 | bytenr + num_bytes - 1, EXTENT_WRITEBACK, 0)) { | ||
3001 | u64 priv; | ||
3002 | ret = get_state_private(&root->fs_info->extent_ins, | ||
3003 | bytenr, &priv); | ||
3004 | BUG_ON(ret); | ||
3005 | extent_op = (struct pending_extent_op *) | ||
3006 | (unsigned long)priv; | ||
3007 | 2449 | ||
3008 | extent_op->del = 1; | 2450 | node = rb_prev(&head->node.rb_node); |
3009 | if (extent_op->type == PENDING_EXTENT_INSERT) { | 2451 | if (!node) |
3010 | mutex_unlock(&root->fs_info->extent_ins_mutex); | 2452 | goto out; |
3011 | return 0; | ||
3012 | } | ||
3013 | } | ||
3014 | 2453 | ||
3015 | if (extent_op) { | 2454 | ref = rb_entry(node, struct btrfs_delayed_ref_node, rb_node); |
3016 | ref_generation = extent_op->orig_generation; | ||
3017 | parent = extent_op->orig_parent; | ||
3018 | } | ||
3019 | 2455 | ||
3020 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | 2456 | /* there are still entries for this ref, we can't drop it */ |
3021 | BUG_ON(!extent_op); | 2457 | if (ref->bytenr == bytenr) |
3022 | 2458 | goto out; | |
3023 | extent_op->type = PENDING_EXTENT_DELETE; | ||
3024 | extent_op->bytenr = bytenr; | ||
3025 | extent_op->num_bytes = num_bytes; | ||
3026 | extent_op->parent = parent; | ||
3027 | extent_op->orig_parent = parent; | ||
3028 | extent_op->generation = ref_generation; | ||
3029 | extent_op->orig_generation = ref_generation; | ||
3030 | extent_op->level = (int)owner_objectid; | ||
3031 | INIT_LIST_HEAD(&extent_op->list); | ||
3032 | extent_op->del = 0; | ||
3033 | |||
3034 | set_extent_bits(&root->fs_info->pending_del, | ||
3035 | bytenr, bytenr + num_bytes - 1, | ||
3036 | EXTENT_WRITEBACK, GFP_NOFS); | ||
3037 | set_state_private(&root->fs_info->pending_del, | ||
3038 | bytenr, (unsigned long)extent_op); | ||
3039 | mutex_unlock(&root->fs_info->extent_ins_mutex); | ||
3040 | return 0; | ||
3041 | } | ||
3042 | /* if metadata always pin */ | ||
3043 | if (owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { | ||
3044 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) { | ||
3045 | mutex_lock(&root->fs_info->pinned_mutex); | ||
3046 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | ||
3047 | mutex_unlock(&root->fs_info->pinned_mutex); | ||
3048 | update_reserved_extents(root, bytenr, num_bytes, 0); | ||
3049 | return 0; | ||
3050 | } | ||
3051 | pin = 1; | ||
3052 | } | ||
3053 | 2459 | ||
3054 | /* if data pin when any transaction has committed this */ | 2460 | /* |
3055 | if (ref_generation != trans->transid) | 2461 | * waiting for the lock here would deadlock. If someone else has it |
3056 | pin = 1; | 2462 | * locked they are already in the process of dropping it anyway |
2463 | */ | ||
2464 | if (!mutex_trylock(&head->mutex)) | ||
2465 | goto out; | ||
3057 | 2466 | ||
3058 | ret = __free_extent(trans, root, bytenr, num_bytes, parent, | 2467 | /* |
3059 | root_objectid, ref_generation, | 2468 | * at this point we have a head with no other entries. Go |
3060 | owner_objectid, pin, pin == 0); | 2469 | * ahead and process it. |
2470 | */ | ||
2471 | head->node.in_tree = 0; | ||
2472 | rb_erase(&head->node.rb_node, &delayed_refs->root); | ||
3061 | 2473 | ||
3062 | finish_current_insert(trans, root->fs_info->extent_root, 0); | 2474 | delayed_refs->num_entries--; |
3063 | pending_ret = del_pending_extents(trans, root->fs_info->extent_root, 0); | 2475 | |
3064 | return ret ? ret : pending_ret; | 2476 | /* |
2477 | * we don't take a ref on the node because we're removing it from the | ||
2478 | * tree, so we just steal the ref the tree was holding. | ||
2479 | */ | ||
2480 | delayed_refs->num_heads--; | ||
2481 | if (list_empty(&head->cluster)) | ||
2482 | delayed_refs->num_heads_ready--; | ||
2483 | |||
2484 | list_del_init(&head->cluster); | ||
2485 | spin_unlock(&delayed_refs->lock); | ||
2486 | |||
2487 | ret = run_one_delayed_ref(trans, root->fs_info->tree_root, | ||
2488 | &head->node, head->must_insert_reserved); | ||
2489 | BUG_ON(ret); | ||
2490 | btrfs_put_delayed_ref(&head->node); | ||
2491 | return 0; | ||
2492 | out: | ||
2493 | spin_unlock(&delayed_refs->lock); | ||
2494 | return 0; | ||
3065 | } | 2495 | } |
3066 | 2496 | ||
3067 | int btrfs_free_extent(struct btrfs_trans_handle *trans, | 2497 | int btrfs_free_extent(struct btrfs_trans_handle *trans, |
@@ -3072,9 +2502,30 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
3072 | { | 2502 | { |
3073 | int ret; | 2503 | int ret; |
3074 | 2504 | ||
3075 | ret = __btrfs_free_extent(trans, root, bytenr, num_bytes, parent, | 2505 | /* |
3076 | root_objectid, ref_generation, | 2506 | * tree log blocks never actually go into the extent allocation |
3077 | owner_objectid, pin); | 2507 | * tree, just update pinning info and exit early. |
2508 | * | ||
2509 | * data extents referenced by the tree log do need to have | ||
2510 | * their reference counts bumped. | ||
2511 | */ | ||
2512 | if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID && | ||
2513 | owner_objectid < BTRFS_FIRST_FREE_OBJECTID) { | ||
2514 | mutex_lock(&root->fs_info->pinned_mutex); | ||
2515 | |||
2516 | /* unlocks the pinned mutex */ | ||
2517 | btrfs_update_pinned_extents(root, bytenr, num_bytes, 1); | ||
2518 | update_reserved_extents(root, bytenr, num_bytes, 0); | ||
2519 | ret = 0; | ||
2520 | } else { | ||
2521 | ret = btrfs_add_delayed_ref(trans, bytenr, num_bytes, parent, | ||
2522 | root_objectid, ref_generation, | ||
2523 | owner_objectid, | ||
2524 | BTRFS_DROP_DELAYED_REF, 1); | ||
2525 | BUG_ON(ret); | ||
2526 | ret = check_ref_cleanup(trans, root, bytenr); | ||
2527 | BUG_ON(ret); | ||
2528 | } | ||
3078 | return ret; | 2529 | return ret; |
3079 | } | 2530 | } |
3080 | 2531 | ||
@@ -3475,10 +2926,10 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans, | |||
3475 | static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, | 2926 | static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, |
3476 | struct btrfs_root *root, u64 parent, | 2927 | struct btrfs_root *root, u64 parent, |
3477 | u64 root_objectid, u64 ref_generation, | 2928 | u64 root_objectid, u64 ref_generation, |
3478 | u64 owner, struct btrfs_key *ins) | 2929 | u64 owner, struct btrfs_key *ins, |
2930 | int ref_mod) | ||
3479 | { | 2931 | { |
3480 | int ret; | 2932 | int ret; |
3481 | int pending_ret; | ||
3482 | u64 super_used; | 2933 | u64 super_used; |
3483 | u64 root_used; | 2934 | u64 root_used; |
3484 | u64 num_bytes = ins->offset; | 2935 | u64 num_bytes = ins->offset; |
@@ -3503,33 +2954,6 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, | |||
3503 | btrfs_set_root_used(&root->root_item, root_used + num_bytes); | 2954 | btrfs_set_root_used(&root->root_item, root_used + num_bytes); |
3504 | spin_unlock(&info->delalloc_lock); | 2955 | spin_unlock(&info->delalloc_lock); |
3505 | 2956 | ||
3506 | if (root == extent_root) { | ||
3507 | struct pending_extent_op *extent_op; | ||
3508 | |||
3509 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | ||
3510 | BUG_ON(!extent_op); | ||
3511 | |||
3512 | extent_op->type = PENDING_EXTENT_INSERT; | ||
3513 | extent_op->bytenr = ins->objectid; | ||
3514 | extent_op->num_bytes = ins->offset; | ||
3515 | extent_op->parent = parent; | ||
3516 | extent_op->orig_parent = 0; | ||
3517 | extent_op->generation = ref_generation; | ||
3518 | extent_op->orig_generation = 0; | ||
3519 | extent_op->level = (int)owner; | ||
3520 | INIT_LIST_HEAD(&extent_op->list); | ||
3521 | extent_op->del = 0; | ||
3522 | |||
3523 | mutex_lock(&root->fs_info->extent_ins_mutex); | ||
3524 | set_extent_bits(&root->fs_info->extent_ins, ins->objectid, | ||
3525 | ins->objectid + ins->offset - 1, | ||
3526 | EXTENT_WRITEBACK, GFP_NOFS); | ||
3527 | set_state_private(&root->fs_info->extent_ins, | ||
3528 | ins->objectid, (unsigned long)extent_op); | ||
3529 | mutex_unlock(&root->fs_info->extent_ins_mutex); | ||
3530 | goto update_block; | ||
3531 | } | ||
3532 | |||
3533 | memcpy(&keys[0], ins, sizeof(*ins)); | 2957 | memcpy(&keys[0], ins, sizeof(*ins)); |
3534 | keys[1].objectid = ins->objectid; | 2958 | keys[1].objectid = ins->objectid; |
3535 | keys[1].type = BTRFS_EXTENT_REF_KEY; | 2959 | keys[1].type = BTRFS_EXTENT_REF_KEY; |
@@ -3540,37 +2964,31 @@ static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, | |||
3540 | path = btrfs_alloc_path(); | 2964 | path = btrfs_alloc_path(); |
3541 | BUG_ON(!path); | 2965 | BUG_ON(!path); |
3542 | 2966 | ||
2967 | path->leave_spinning = 1; | ||
3543 | ret = btrfs_insert_empty_items(trans, extent_root, path, keys, | 2968 | ret = btrfs_insert_empty_items(trans, extent_root, path, keys, |
3544 | sizes, 2); | 2969 | sizes, 2); |
3545 | BUG_ON(ret); | 2970 | BUG_ON(ret); |
3546 | 2971 | ||
3547 | extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0], | 2972 | extent_item = btrfs_item_ptr(path->nodes[0], path->slots[0], |
3548 | struct btrfs_extent_item); | 2973 | struct btrfs_extent_item); |
3549 | btrfs_set_extent_refs(path->nodes[0], extent_item, 1); | 2974 | btrfs_set_extent_refs(path->nodes[0], extent_item, ref_mod); |
3550 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, | 2975 | ref = btrfs_item_ptr(path->nodes[0], path->slots[0] + 1, |
3551 | struct btrfs_extent_ref); | 2976 | struct btrfs_extent_ref); |
3552 | 2977 | ||
3553 | btrfs_set_ref_root(path->nodes[0], ref, root_objectid); | 2978 | btrfs_set_ref_root(path->nodes[0], ref, root_objectid); |
3554 | btrfs_set_ref_generation(path->nodes[0], ref, ref_generation); | 2979 | btrfs_set_ref_generation(path->nodes[0], ref, ref_generation); |
3555 | btrfs_set_ref_objectid(path->nodes[0], ref, owner); | 2980 | btrfs_set_ref_objectid(path->nodes[0], ref, owner); |
3556 | btrfs_set_ref_num_refs(path->nodes[0], ref, 1); | 2981 | btrfs_set_ref_num_refs(path->nodes[0], ref, ref_mod); |
3557 | 2982 | ||
3558 | btrfs_mark_buffer_dirty(path->nodes[0]); | 2983 | btrfs_mark_buffer_dirty(path->nodes[0]); |
3559 | 2984 | ||
3560 | trans->alloc_exclude_start = 0; | 2985 | trans->alloc_exclude_start = 0; |
3561 | trans->alloc_exclude_nr = 0; | 2986 | trans->alloc_exclude_nr = 0; |
3562 | btrfs_free_path(path); | 2987 | btrfs_free_path(path); |
3563 | finish_current_insert(trans, extent_root, 0); | ||
3564 | pending_ret = del_pending_extents(trans, extent_root, 0); | ||
3565 | 2988 | ||
3566 | if (ret) | 2989 | if (ret) |
3567 | goto out; | 2990 | goto out; |
3568 | if (pending_ret) { | ||
3569 | ret = pending_ret; | ||
3570 | goto out; | ||
3571 | } | ||
3572 | 2991 | ||
3573 | update_block: | ||
3574 | ret = update_block_group(trans, root, ins->objectid, | 2992 | ret = update_block_group(trans, root, ins->objectid, |
3575 | ins->offset, 1, 0); | 2993 | ins->offset, 1, 0); |
3576 | if (ret) { | 2994 | if (ret) { |
@@ -3592,9 +3010,12 @@ int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, | |||
3592 | 3010 | ||
3593 | if (root_objectid == BTRFS_TREE_LOG_OBJECTID) | 3011 | if (root_objectid == BTRFS_TREE_LOG_OBJECTID) |
3594 | return 0; | 3012 | return 0; |
3595 | ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, | 3013 | |
3596 | ref_generation, owner, ins); | 3014 | ret = btrfs_add_delayed_ref(trans, ins->objectid, |
3597 | update_reserved_extents(root, ins->objectid, ins->offset, 0); | 3015 | ins->offset, parent, root_objectid, |
3016 | ref_generation, owner, | ||
3017 | BTRFS_ADD_DELAYED_EXTENT, 0); | ||
3018 | BUG_ON(ret); | ||
3598 | return ret; | 3019 | return ret; |
3599 | } | 3020 | } |
3600 | 3021 | ||
@@ -3621,7 +3042,7 @@ int btrfs_alloc_logged_extent(struct btrfs_trans_handle *trans, | |||
3621 | BUG_ON(ret); | 3042 | BUG_ON(ret); |
3622 | put_block_group(block_group); | 3043 | put_block_group(block_group); |
3623 | ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, | 3044 | ret = __btrfs_alloc_reserved_extent(trans, root, parent, root_objectid, |
3624 | ref_generation, owner, ins); | 3045 | ref_generation, owner, ins, 1); |
3625 | return ret; | 3046 | return ret; |
3626 | } | 3047 | } |
3627 | 3048 | ||
@@ -3640,20 +3061,18 @@ int btrfs_alloc_extent(struct btrfs_trans_handle *trans, | |||
3640 | u64 search_end, struct btrfs_key *ins, u64 data) | 3061 | u64 search_end, struct btrfs_key *ins, u64 data) |
3641 | { | 3062 | { |
3642 | int ret; | 3063 | int ret; |
3643 | |||
3644 | ret = __btrfs_reserve_extent(trans, root, num_bytes, | 3064 | ret = __btrfs_reserve_extent(trans, root, num_bytes, |
3645 | min_alloc_size, empty_size, hint_byte, | 3065 | min_alloc_size, empty_size, hint_byte, |
3646 | search_end, ins, data); | 3066 | search_end, ins, data); |
3647 | BUG_ON(ret); | 3067 | BUG_ON(ret); |
3648 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { | 3068 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { |
3649 | ret = __btrfs_alloc_reserved_extent(trans, root, parent, | 3069 | ret = btrfs_add_delayed_ref(trans, ins->objectid, |
3650 | root_objectid, ref_generation, | 3070 | ins->offset, parent, root_objectid, |
3651 | owner_objectid, ins); | 3071 | ref_generation, owner_objectid, |
3072 | BTRFS_ADD_DELAYED_EXTENT, 0); | ||
3652 | BUG_ON(ret); | 3073 | BUG_ON(ret); |
3653 | |||
3654 | } else { | ||
3655 | update_reserved_extents(root, ins->objectid, ins->offset, 1); | ||
3656 | } | 3074 | } |
3075 | update_reserved_extents(root, ins->objectid, ins->offset, 1); | ||
3657 | return ret; | 3076 | return ret; |
3658 | } | 3077 | } |
3659 | 3078 | ||
@@ -3789,7 +3208,7 @@ int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | |||
3789 | 3208 | ||
3790 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); | 3209 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); |
3791 | 3210 | ||
3792 | ret = __btrfs_free_extent(trans, root, disk_bytenr, | 3211 | ret = btrfs_free_extent(trans, root, disk_bytenr, |
3793 | btrfs_file_extent_disk_num_bytes(leaf, fi), | 3212 | btrfs_file_extent_disk_num_bytes(leaf, fi), |
3794 | leaf->start, leaf_owner, leaf_generation, | 3213 | leaf->start, leaf_owner, leaf_generation, |
3795 | key.objectid, 0); | 3214 | key.objectid, 0); |
@@ -3829,7 +3248,7 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, | |||
3829 | */ | 3248 | */ |
3830 | for (i = 0; i < ref->nritems; i++) { | 3249 | for (i = 0; i < ref->nritems; i++) { |
3831 | info = ref->extents + sorted[i].slot; | 3250 | info = ref->extents + sorted[i].slot; |
3832 | ret = __btrfs_free_extent(trans, root, info->bytenr, | 3251 | ret = btrfs_free_extent(trans, root, info->bytenr, |
3833 | info->num_bytes, ref->bytenr, | 3252 | info->num_bytes, ref->bytenr, |
3834 | ref->owner, ref->generation, | 3253 | ref->owner, ref->generation, |
3835 | info->objectid, 0); | 3254 | info->objectid, 0); |
@@ -3846,12 +3265,13 @@ static noinline int cache_drop_leaf_ref(struct btrfs_trans_handle *trans, | |||
3846 | return 0; | 3265 | return 0; |
3847 | } | 3266 | } |
3848 | 3267 | ||
3849 | static int drop_snap_lookup_refcount(struct btrfs_root *root, u64 start, | 3268 | static int drop_snap_lookup_refcount(struct btrfs_trans_handle *trans, |
3269 | struct btrfs_root *root, u64 start, | ||
3850 | u64 len, u32 *refs) | 3270 | u64 len, u32 *refs) |
3851 | { | 3271 | { |
3852 | int ret; | 3272 | int ret; |
3853 | 3273 | ||
3854 | ret = btrfs_lookup_extent_ref(NULL, root, start, len, refs); | 3274 | ret = btrfs_lookup_extent_ref(trans, root, start, len, refs); |
3855 | BUG_ON(ret); | 3275 | BUG_ON(ret); |
3856 | 3276 | ||
3857 | #if 0 /* some debugging code in case we see problems here */ | 3277 | #if 0 /* some debugging code in case we see problems here */ |
@@ -3959,7 +3379,8 @@ static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans, | |||
3959 | * we just decrement it below and don't update any | 3379 | * we just decrement it below and don't update any |
3960 | * of the refs the leaf points to. | 3380 | * of the refs the leaf points to. |
3961 | */ | 3381 | */ |
3962 | ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); | 3382 | ret = drop_snap_lookup_refcount(trans, root, bytenr, |
3383 | blocksize, &refs); | ||
3963 | BUG_ON(ret); | 3384 | BUG_ON(ret); |
3964 | if (refs != 1) | 3385 | if (refs != 1) |
3965 | continue; | 3386 | continue; |
@@ -4010,7 +3431,7 @@ static noinline int drop_level_one_refs(struct btrfs_trans_handle *trans, | |||
4010 | */ | 3431 | */ |
4011 | for (i = 0; i < refi; i++) { | 3432 | for (i = 0; i < refi; i++) { |
4012 | bytenr = sorted[i].bytenr; | 3433 | bytenr = sorted[i].bytenr; |
4013 | ret = __btrfs_free_extent(trans, root, bytenr, | 3434 | ret = btrfs_free_extent(trans, root, bytenr, |
4014 | blocksize, eb->start, | 3435 | blocksize, eb->start, |
4015 | root_owner, root_gen, 0, 1); | 3436 | root_owner, root_gen, 0, 1); |
4016 | BUG_ON(ret); | 3437 | BUG_ON(ret); |
@@ -4053,7 +3474,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
4053 | 3474 | ||
4054 | WARN_ON(*level < 0); | 3475 | WARN_ON(*level < 0); |
4055 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | 3476 | WARN_ON(*level >= BTRFS_MAX_LEVEL); |
4056 | ret = drop_snap_lookup_refcount(root, path->nodes[*level]->start, | 3477 | ret = drop_snap_lookup_refcount(trans, root, path->nodes[*level]->start, |
4057 | path->nodes[*level]->len, &refs); | 3478 | path->nodes[*level]->len, &refs); |
4058 | BUG_ON(ret); | 3479 | BUG_ON(ret); |
4059 | if (refs > 1) | 3480 | if (refs > 1) |
@@ -4104,7 +3525,8 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
4104 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); | 3525 | ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]); |
4105 | blocksize = btrfs_level_size(root, *level - 1); | 3526 | blocksize = btrfs_level_size(root, *level - 1); |
4106 | 3527 | ||
4107 | ret = drop_snap_lookup_refcount(root, bytenr, blocksize, &refs); | 3528 | ret = drop_snap_lookup_refcount(trans, root, bytenr, |
3529 | blocksize, &refs); | ||
4108 | BUG_ON(ret); | 3530 | BUG_ON(ret); |
4109 | 3531 | ||
4110 | /* | 3532 | /* |
@@ -4119,7 +3541,7 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | |||
4119 | root_gen = btrfs_header_generation(parent); | 3541 | root_gen = btrfs_header_generation(parent); |
4120 | path->slots[*level]++; | 3542 | path->slots[*level]++; |
4121 | 3543 | ||
4122 | ret = __btrfs_free_extent(trans, root, bytenr, | 3544 | ret = btrfs_free_extent(trans, root, bytenr, |
4123 | blocksize, parent->start, | 3545 | blocksize, parent->start, |
4124 | root_owner, root_gen, | 3546 | root_owner, root_gen, |
4125 | *level - 1, 1); | 3547 | *level - 1, 1); |
@@ -4165,7 +3587,7 @@ out: | |||
4165 | * cleanup and free the reference on the last node | 3587 | * cleanup and free the reference on the last node |
4166 | * we processed | 3588 | * we processed |
4167 | */ | 3589 | */ |
4168 | ret = __btrfs_free_extent(trans, root, bytenr, blocksize, | 3590 | ret = btrfs_free_extent(trans, root, bytenr, blocksize, |
4169 | parent->start, root_owner, root_gen, | 3591 | parent->start, root_owner, root_gen, |
4170 | *level, 1); | 3592 | *level, 1); |
4171 | free_extent_buffer(path->nodes[*level]); | 3593 | free_extent_buffer(path->nodes[*level]); |
@@ -4354,6 +3776,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
4354 | struct btrfs_path *path; | 3776 | struct btrfs_path *path; |
4355 | int i; | 3777 | int i; |
4356 | int orig_level; | 3778 | int orig_level; |
3779 | int update_count; | ||
4357 | struct btrfs_root_item *root_item = &root->root_item; | 3780 | struct btrfs_root_item *root_item = &root->root_item; |
4358 | 3781 | ||
4359 | WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex)); | 3782 | WARN_ON(!mutex_is_locked(&root->fs_info->drop_mutex)); |
@@ -4395,6 +3818,7 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
4395 | } | 3818 | } |
4396 | } | 3819 | } |
4397 | while (1) { | 3820 | while (1) { |
3821 | unsigned long update; | ||
4398 | wret = walk_down_tree(trans, root, path, &level); | 3822 | wret = walk_down_tree(trans, root, path, &level); |
4399 | if (wret > 0) | 3823 | if (wret > 0) |
4400 | break; | 3824 | break; |
@@ -4407,12 +3831,21 @@ int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root | |||
4407 | break; | 3831 | break; |
4408 | if (wret < 0) | 3832 | if (wret < 0) |
4409 | ret = wret; | 3833 | ret = wret; |
4410 | if (trans->transaction->in_commit) { | 3834 | if (trans->transaction->in_commit || |
3835 | trans->transaction->delayed_refs.flushing) { | ||
4411 | ret = -EAGAIN; | 3836 | ret = -EAGAIN; |
4412 | break; | 3837 | break; |
4413 | } | 3838 | } |
4414 | atomic_inc(&root->fs_info->throttle_gen); | 3839 | atomic_inc(&root->fs_info->throttle_gen); |
4415 | wake_up(&root->fs_info->transaction_throttle); | 3840 | wake_up(&root->fs_info->transaction_throttle); |
3841 | for (update_count = 0; update_count < 16; update_count++) { | ||
3842 | update = trans->delayed_ref_updates; | ||
3843 | trans->delayed_ref_updates = 0; | ||
3844 | if (update) | ||
3845 | btrfs_run_delayed_refs(trans, root, update); | ||
3846 | else | ||
3847 | break; | ||
3848 | } | ||
4416 | } | 3849 | } |
4417 | for (i = 0; i <= orig_level; i++) { | 3850 | for (i = 0; i <= orig_level; i++) { |
4418 | if (path->nodes[i]) { | 3851 | if (path->nodes[i]) { |
@@ -5457,6 +4890,7 @@ static noinline int replace_extents_in_leaf(struct btrfs_trans_handle *trans, | |||
5457 | root->root_key.objectid, | 4890 | root->root_key.objectid, |
5458 | trans->transid, key.objectid); | 4891 | trans->transid, key.objectid); |
5459 | BUG_ON(ret); | 4892 | BUG_ON(ret); |
4893 | |||
5460 | ret = btrfs_free_extent(trans, root, | 4894 | ret = btrfs_free_extent(trans, root, |
5461 | bytenr, num_bytes, leaf->start, | 4895 | bytenr, num_bytes, leaf->start, |
5462 | btrfs_header_owner(leaf), | 4896 | btrfs_header_owner(leaf), |
@@ -5768,9 +5202,6 @@ static noinline int relocate_tree_block(struct btrfs_trans_handle *trans, | |||
5768 | ref_path, NULL, NULL); | 5202 | ref_path, NULL, NULL); |
5769 | BUG_ON(ret); | 5203 | BUG_ON(ret); |
5770 | 5204 | ||
5771 | if (root == root->fs_info->extent_root) | ||
5772 | btrfs_extent_post_op(trans, root); | ||
5773 | |||
5774 | return 0; | 5205 | return 0; |
5775 | } | 5206 | } |
5776 | 5207 | ||
@@ -6038,6 +5469,7 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | |||
6038 | if (!path) | 5469 | if (!path) |
6039 | return -ENOMEM; | 5470 | return -ENOMEM; |
6040 | 5471 | ||
5472 | path->leave_spinning = 1; | ||
6041 | ret = btrfs_insert_empty_inode(trans, root, path, objectid); | 5473 | ret = btrfs_insert_empty_inode(trans, root, path, objectid); |
6042 | if (ret) | 5474 | if (ret) |
6043 | goto out; | 5475 | goto out; |
@@ -6208,6 +5640,9 @@ again: | |||
6208 | btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); | 5640 | btrfs_remove_leaf_refs(info->tree_root, (u64)-1, 1); |
6209 | mutex_unlock(&root->fs_info->cleaner_mutex); | 5641 | mutex_unlock(&root->fs_info->cleaner_mutex); |
6210 | 5642 | ||
5643 | trans = btrfs_start_transaction(info->tree_root, 1); | ||
5644 | btrfs_commit_transaction(trans, info->tree_root); | ||
5645 | |||
6211 | while (1) { | 5646 | while (1) { |
6212 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | 5647 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
6213 | if (ret < 0) | 5648 | if (ret < 0) |
@@ -6466,7 +5901,7 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
6466 | 5901 | ||
6467 | extent_root = root->fs_info->extent_root; | 5902 | extent_root = root->fs_info->extent_root; |
6468 | 5903 | ||
6469 | root->fs_info->last_trans_new_blockgroup = trans->transid; | 5904 | root->fs_info->last_trans_log_full_commit = trans->transid; |
6470 | 5905 | ||
6471 | cache = kzalloc(sizeof(*cache), GFP_NOFS); | 5906 | cache = kzalloc(sizeof(*cache), GFP_NOFS); |
6472 | if (!cache) | 5907 | if (!cache) |
@@ -6500,9 +5935,6 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
6500 | sizeof(cache->item)); | 5935 | sizeof(cache->item)); |
6501 | BUG_ON(ret); | 5936 | BUG_ON(ret); |
6502 | 5937 | ||
6503 | finish_current_insert(trans, extent_root, 0); | ||
6504 | ret = del_pending_extents(trans, extent_root, 0); | ||
6505 | BUG_ON(ret); | ||
6506 | set_avail_alloc_bits(extent_root->fs_info, type); | 5938 | set_avail_alloc_bits(extent_root->fs_info, type); |
6507 | 5939 | ||
6508 | return 0; | 5940 | return 0; |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index ebe6b29e6069..08085af089e2 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -3124,20 +3124,15 @@ void free_extent_buffer(struct extent_buffer *eb) | |||
3124 | int clear_extent_buffer_dirty(struct extent_io_tree *tree, | 3124 | int clear_extent_buffer_dirty(struct extent_io_tree *tree, |
3125 | struct extent_buffer *eb) | 3125 | struct extent_buffer *eb) |
3126 | { | 3126 | { |
3127 | int set; | ||
3128 | unsigned long i; | 3127 | unsigned long i; |
3129 | unsigned long num_pages; | 3128 | unsigned long num_pages; |
3130 | struct page *page; | 3129 | struct page *page; |
3131 | 3130 | ||
3132 | u64 start = eb->start; | ||
3133 | u64 end = start + eb->len - 1; | ||
3134 | |||
3135 | set = clear_extent_dirty(tree, start, end, GFP_NOFS); | ||
3136 | num_pages = num_extent_pages(eb->start, eb->len); | 3131 | num_pages = num_extent_pages(eb->start, eb->len); |
3137 | 3132 | ||
3138 | for (i = 0; i < num_pages; i++) { | 3133 | for (i = 0; i < num_pages; i++) { |
3139 | page = extent_buffer_page(eb, i); | 3134 | page = extent_buffer_page(eb, i); |
3140 | if (!set && !PageDirty(page)) | 3135 | if (!PageDirty(page)) |
3141 | continue; | 3136 | continue; |
3142 | 3137 | ||
3143 | lock_page(page); | 3138 | lock_page(page); |
@@ -3146,22 +3141,6 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, | |||
3146 | else | 3141 | else |
3147 | set_page_private(page, EXTENT_PAGE_PRIVATE); | 3142 | set_page_private(page, EXTENT_PAGE_PRIVATE); |
3148 | 3143 | ||
3149 | /* | ||
3150 | * if we're on the last page or the first page and the | ||
3151 | * block isn't aligned on a page boundary, do extra checks | ||
3152 | * to make sure we don't clean page that is partially dirty | ||
3153 | */ | ||
3154 | if ((i == 0 && (eb->start & (PAGE_CACHE_SIZE - 1))) || | ||
3155 | ((i == num_pages - 1) && | ||
3156 | ((eb->start + eb->len) & (PAGE_CACHE_SIZE - 1)))) { | ||
3157 | start = (u64)page->index << PAGE_CACHE_SHIFT; | ||
3158 | end = start + PAGE_CACHE_SIZE - 1; | ||
3159 | if (test_range_bit(tree, start, end, | ||
3160 | EXTENT_DIRTY, 0)) { | ||
3161 | unlock_page(page); | ||
3162 | continue; | ||
3163 | } | ||
3164 | } | ||
3165 | clear_page_dirty_for_io(page); | 3144 | clear_page_dirty_for_io(page); |
3166 | spin_lock_irq(&page->mapping->tree_lock); | 3145 | spin_lock_irq(&page->mapping->tree_lock); |
3167 | if (!PageDirty(page)) { | 3146 | if (!PageDirty(page)) { |
@@ -3187,29 +3166,13 @@ int set_extent_buffer_dirty(struct extent_io_tree *tree, | |||
3187 | { | 3166 | { |
3188 | unsigned long i; | 3167 | unsigned long i; |
3189 | unsigned long num_pages; | 3168 | unsigned long num_pages; |
3169 | int was_dirty = 0; | ||
3190 | 3170 | ||
3171 | was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); | ||
3191 | num_pages = num_extent_pages(eb->start, eb->len); | 3172 | num_pages = num_extent_pages(eb->start, eb->len); |
3192 | for (i = 0; i < num_pages; i++) { | 3173 | for (i = 0; i < num_pages; i++) |
3193 | struct page *page = extent_buffer_page(eb, i); | ||
3194 | /* writepage may need to do something special for the | ||
3195 | * first page, we have to make sure page->private is | ||
3196 | * properly set. releasepage may drop page->private | ||
3197 | * on us if the page isn't already dirty. | ||
3198 | */ | ||
3199 | lock_page(page); | ||
3200 | if (i == 0) { | ||
3201 | set_page_extent_head(page, eb->len); | ||
3202 | } else if (PagePrivate(page) && | ||
3203 | page->private != EXTENT_PAGE_PRIVATE) { | ||
3204 | set_page_extent_mapped(page); | ||
3205 | } | ||
3206 | __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); | 3174 | __set_page_dirty_nobuffers(extent_buffer_page(eb, i)); |
3207 | set_extent_dirty(tree, page_offset(page), | 3175 | return was_dirty; |
3208 | page_offset(page) + PAGE_CACHE_SIZE - 1, | ||
3209 | GFP_NOFS); | ||
3210 | unlock_page(page); | ||
3211 | } | ||
3212 | return 0; | ||
3213 | } | 3176 | } |
3214 | 3177 | ||
3215 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, | 3178 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, |
@@ -3789,6 +3752,10 @@ int try_release_extent_buffer(struct extent_io_tree *tree, struct page *page) | |||
3789 | ret = 0; | 3752 | ret = 0; |
3790 | goto out; | 3753 | goto out; |
3791 | } | 3754 | } |
3755 | if (test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)) { | ||
3756 | ret = 0; | ||
3757 | goto out; | ||
3758 | } | ||
3792 | /* at this point we can safely release the extent buffer */ | 3759 | /* at this point we can safely release the extent buffer */ |
3793 | num_pages = num_extent_pages(eb->start, eb->len); | 3760 | num_pages = num_extent_pages(eb->start, eb->len); |
3794 | for (i = 0; i < num_pages; i++) | 3761 | for (i = 0; i < num_pages; i++) |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 1f9df88afbf6..5bc20abf3f3d 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -25,6 +25,7 @@ | |||
25 | /* these are bit numbers for test/set bit */ | 25 | /* these are bit numbers for test/set bit */ |
26 | #define EXTENT_BUFFER_UPTODATE 0 | 26 | #define EXTENT_BUFFER_UPTODATE 0 |
27 | #define EXTENT_BUFFER_BLOCKING 1 | 27 | #define EXTENT_BUFFER_BLOCKING 1 |
28 | #define EXTENT_BUFFER_DIRTY 2 | ||
28 | 29 | ||
29 | /* | 30 | /* |
30 | * page->private values. Every page that is controlled by the extent | 31 | * page->private values. Every page that is controlled by the extent |
@@ -254,6 +255,8 @@ int clear_extent_buffer_dirty(struct extent_io_tree *tree, | |||
254 | struct extent_buffer *eb); | 255 | struct extent_buffer *eb); |
255 | int set_extent_buffer_dirty(struct extent_io_tree *tree, | 256 | int set_extent_buffer_dirty(struct extent_io_tree *tree, |
256 | struct extent_buffer *eb); | 257 | struct extent_buffer *eb); |
258 | int test_extent_buffer_dirty(struct extent_io_tree *tree, | ||
259 | struct extent_buffer *eb); | ||
257 | int set_extent_buffer_uptodate(struct extent_io_tree *tree, | 260 | int set_extent_buffer_uptodate(struct extent_io_tree *tree, |
258 | struct extent_buffer *eb); | 261 | struct extent_buffer *eb); |
259 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, | 262 | int clear_extent_buffer_uptodate(struct extent_io_tree *tree, |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 964652435fd1..9b99886562d0 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -52,6 +52,7 @@ int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | |||
52 | file_key.offset = pos; | 52 | file_key.offset = pos; |
53 | btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); | 53 | btrfs_set_key_type(&file_key, BTRFS_EXTENT_DATA_KEY); |
54 | 54 | ||
55 | path->leave_spinning = 1; | ||
55 | ret = btrfs_insert_empty_item(trans, root, path, &file_key, | 56 | ret = btrfs_insert_empty_item(trans, root, path, &file_key, |
56 | sizeof(*item)); | 57 | sizeof(*item)); |
57 | if (ret < 0) | 58 | if (ret < 0) |
@@ -523,6 +524,7 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, | |||
523 | key.offset = end_byte - 1; | 524 | key.offset = end_byte - 1; |
524 | key.type = BTRFS_EXTENT_CSUM_KEY; | 525 | key.type = BTRFS_EXTENT_CSUM_KEY; |
525 | 526 | ||
527 | path->leave_spinning = 1; | ||
526 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 528 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
527 | if (ret > 0) { | 529 | if (ret > 0) { |
528 | if (path->slots[0] == 0) | 530 | if (path->slots[0] == 0) |
@@ -757,8 +759,10 @@ insert: | |||
757 | } else { | 759 | } else { |
758 | ins_size = csum_size; | 760 | ins_size = csum_size; |
759 | } | 761 | } |
762 | path->leave_spinning = 1; | ||
760 | ret = btrfs_insert_empty_item(trans, root, path, &file_key, | 763 | ret = btrfs_insert_empty_item(trans, root, path, &file_key, |
761 | ins_size); | 764 | ins_size); |
765 | path->leave_spinning = 0; | ||
762 | if (ret < 0) | 766 | if (ret < 0) |
763 | goto fail_unlock; | 767 | goto fail_unlock; |
764 | if (ret != 0) { | 768 | if (ret != 0) { |
@@ -776,7 +780,6 @@ found: | |||
776 | item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + | 780 | item_end = (struct btrfs_csum_item *)((unsigned char *)item_end + |
777 | btrfs_item_size_nr(leaf, path->slots[0])); | 781 | btrfs_item_size_nr(leaf, path->slots[0])); |
778 | eb_token = NULL; | 782 | eb_token = NULL; |
779 | cond_resched(); | ||
780 | next_sector: | 783 | next_sector: |
781 | 784 | ||
782 | if (!eb_token || | 785 | if (!eb_token || |
@@ -817,9 +820,9 @@ next_sector: | |||
817 | eb_token = NULL; | 820 | eb_token = NULL; |
818 | } | 821 | } |
819 | btrfs_mark_buffer_dirty(path->nodes[0]); | 822 | btrfs_mark_buffer_dirty(path->nodes[0]); |
820 | cond_resched(); | ||
821 | if (total_bytes < sums->len) { | 823 | if (total_bytes < sums->len) { |
822 | btrfs_release_path(root, path); | 824 | btrfs_release_path(root, path); |
825 | cond_resched(); | ||
823 | goto again; | 826 | goto again; |
824 | } | 827 | } |
825 | out: | 828 | out: |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index dc78954861b3..9c9fb46ccd08 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -606,6 +606,7 @@ next_slot: | |||
606 | btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); | 606 | btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY); |
607 | 607 | ||
608 | btrfs_release_path(root, path); | 608 | btrfs_release_path(root, path); |
609 | path->leave_spinning = 1; | ||
609 | ret = btrfs_insert_empty_item(trans, root, path, &ins, | 610 | ret = btrfs_insert_empty_item(trans, root, path, &ins, |
610 | sizeof(*extent)); | 611 | sizeof(*extent)); |
611 | BUG_ON(ret); | 612 | BUG_ON(ret); |
@@ -639,17 +640,22 @@ next_slot: | |||
639 | ram_bytes); | 640 | ram_bytes); |
640 | btrfs_set_file_extent_type(leaf, extent, found_type); | 641 | btrfs_set_file_extent_type(leaf, extent, found_type); |
641 | 642 | ||
643 | btrfs_unlock_up_safe(path, 1); | ||
642 | btrfs_mark_buffer_dirty(path->nodes[0]); | 644 | btrfs_mark_buffer_dirty(path->nodes[0]); |
645 | btrfs_set_lock_blocking(path->nodes[0]); | ||
643 | 646 | ||
644 | if (disk_bytenr != 0) { | 647 | if (disk_bytenr != 0) { |
645 | ret = btrfs_update_extent_ref(trans, root, | 648 | ret = btrfs_update_extent_ref(trans, root, |
646 | disk_bytenr, orig_parent, | 649 | disk_bytenr, |
650 | le64_to_cpu(old.disk_num_bytes), | ||
651 | orig_parent, | ||
647 | leaf->start, | 652 | leaf->start, |
648 | root->root_key.objectid, | 653 | root->root_key.objectid, |
649 | trans->transid, ins.objectid); | 654 | trans->transid, ins.objectid); |
650 | 655 | ||
651 | BUG_ON(ret); | 656 | BUG_ON(ret); |
652 | } | 657 | } |
658 | path->leave_spinning = 0; | ||
653 | btrfs_release_path(root, path); | 659 | btrfs_release_path(root, path); |
654 | if (disk_bytenr != 0) | 660 | if (disk_bytenr != 0) |
655 | inode_add_bytes(inode, extent_end - end); | 661 | inode_add_bytes(inode, extent_end - end); |
@@ -912,7 +918,7 @@ again: | |||
912 | btrfs_set_file_extent_other_encoding(leaf, fi, 0); | 918 | btrfs_set_file_extent_other_encoding(leaf, fi, 0); |
913 | 919 | ||
914 | if (orig_parent != leaf->start) { | 920 | if (orig_parent != leaf->start) { |
915 | ret = btrfs_update_extent_ref(trans, root, bytenr, | 921 | ret = btrfs_update_extent_ref(trans, root, bytenr, num_bytes, |
916 | orig_parent, leaf->start, | 922 | orig_parent, leaf->start, |
917 | root->root_key.objectid, | 923 | root->root_key.objectid, |
918 | trans->transid, inode->i_ino); | 924 | trans->transid, inode->i_ino); |
@@ -1155,6 +1161,20 @@ out_nolock: | |||
1155 | page_cache_release(pinned[1]); | 1161 | page_cache_release(pinned[1]); |
1156 | *ppos = pos; | 1162 | *ppos = pos; |
1157 | 1163 | ||
1164 | /* | ||
1165 | * we want to make sure fsync finds this change | ||
1166 | * but we haven't joined a transaction running right now. | ||
1167 | * | ||
1168 | * Later on, someone is sure to update the inode and get the | ||
1169 | * real transid recorded. | ||
1170 | * | ||
1171 | * We set last_trans now to the fs_info generation + 1, | ||
1172 | * this will either be one more than the running transaction | ||
1173 | * or the generation used for the next transaction if there isn't | ||
1174 | * one running right now. | ||
1175 | */ | ||
1176 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; | ||
1177 | |||
1158 | if (num_written > 0 && will_write) { | 1178 | if (num_written > 0 && will_write) { |
1159 | struct btrfs_trans_handle *trans; | 1179 | struct btrfs_trans_handle *trans; |
1160 | 1180 | ||
@@ -1167,8 +1187,11 @@ out_nolock: | |||
1167 | ret = btrfs_log_dentry_safe(trans, root, | 1187 | ret = btrfs_log_dentry_safe(trans, root, |
1168 | file->f_dentry); | 1188 | file->f_dentry); |
1169 | if (ret == 0) { | 1189 | if (ret == 0) { |
1170 | btrfs_sync_log(trans, root); | 1190 | ret = btrfs_sync_log(trans, root); |
1171 | btrfs_end_transaction(trans, root); | 1191 | if (ret == 0) |
1192 | btrfs_end_transaction(trans, root); | ||
1193 | else | ||
1194 | btrfs_commit_transaction(trans, root); | ||
1172 | } else { | 1195 | } else { |
1173 | btrfs_commit_transaction(trans, root); | 1196 | btrfs_commit_transaction(trans, root); |
1174 | } | 1197 | } |
@@ -1185,6 +1208,18 @@ out_nolock: | |||
1185 | 1208 | ||
1186 | int btrfs_release_file(struct inode *inode, struct file *filp) | 1209 | int btrfs_release_file(struct inode *inode, struct file *filp) |
1187 | { | 1210 | { |
1211 | /* | ||
1212 | * ordered_data_close is set by settattr when we are about to truncate | ||
1213 | * a file from a non-zero size to a zero size. This tries to | ||
1214 | * flush down new bytes that may have been written if the | ||
1215 | * application were using truncate to replace a file in place. | ||
1216 | */ | ||
1217 | if (BTRFS_I(inode)->ordered_data_close) { | ||
1218 | BTRFS_I(inode)->ordered_data_close = 0; | ||
1219 | btrfs_add_ordered_operation(NULL, BTRFS_I(inode)->root, inode); | ||
1220 | if (inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) | ||
1221 | filemap_flush(inode->i_mapping); | ||
1222 | } | ||
1188 | if (filp->private_data) | 1223 | if (filp->private_data) |
1189 | btrfs_ioctl_trans_end(filp); | 1224 | btrfs_ioctl_trans_end(filp); |
1190 | return 0; | 1225 | return 0; |
@@ -1260,8 +1295,11 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
1260 | if (ret > 0) { | 1295 | if (ret > 0) { |
1261 | ret = btrfs_commit_transaction(trans, root); | 1296 | ret = btrfs_commit_transaction(trans, root); |
1262 | } else { | 1297 | } else { |
1263 | btrfs_sync_log(trans, root); | 1298 | ret = btrfs_sync_log(trans, root); |
1264 | ret = btrfs_end_transaction(trans, root); | 1299 | if (ret == 0) |
1300 | ret = btrfs_end_transaction(trans, root); | ||
1301 | else | ||
1302 | ret = btrfs_commit_transaction(trans, root); | ||
1265 | } | 1303 | } |
1266 | mutex_lock(&dentry->d_inode->i_mutex); | 1304 | mutex_lock(&dentry->d_inode->i_mutex); |
1267 | out: | 1305 | out: |
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 3d46fa1f29a4..6b627c611808 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c | |||
@@ -73,6 +73,8 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | |||
73 | if (!path) | 73 | if (!path) |
74 | return -ENOMEM; | 74 | return -ENOMEM; |
75 | 75 | ||
76 | path->leave_spinning = 1; | ||
77 | |||
76 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 78 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
77 | if (ret > 0) { | 79 | if (ret > 0) { |
78 | ret = -ENOENT; | 80 | ret = -ENOENT; |
@@ -127,6 +129,7 @@ int btrfs_insert_inode_ref(struct btrfs_trans_handle *trans, | |||
127 | if (!path) | 129 | if (!path) |
128 | return -ENOMEM; | 130 | return -ENOMEM; |
129 | 131 | ||
132 | path->leave_spinning = 1; | ||
130 | ret = btrfs_insert_empty_item(trans, root, path, &key, | 133 | ret = btrfs_insert_empty_item(trans, root, path, &key, |
131 | ins_len); | 134 | ins_len); |
132 | if (ret == -EEXIST) { | 135 | if (ret == -EEXIST) { |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 17e608c4dc70..06d8db5afb08 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -134,6 +134,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
134 | if (!path) | 134 | if (!path) |
135 | return -ENOMEM; | 135 | return -ENOMEM; |
136 | 136 | ||
137 | path->leave_spinning = 1; | ||
137 | btrfs_set_trans_block_group(trans, inode); | 138 | btrfs_set_trans_block_group(trans, inode); |
138 | 139 | ||
139 | key.objectid = inode->i_ino; | 140 | key.objectid = inode->i_ino; |
@@ -167,9 +168,9 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, | |||
167 | cur_size = min_t(unsigned long, compressed_size, | 168 | cur_size = min_t(unsigned long, compressed_size, |
168 | PAGE_CACHE_SIZE); | 169 | PAGE_CACHE_SIZE); |
169 | 170 | ||
170 | kaddr = kmap(cpage); | 171 | kaddr = kmap_atomic(cpage, KM_USER0); |
171 | write_extent_buffer(leaf, kaddr, ptr, cur_size); | 172 | write_extent_buffer(leaf, kaddr, ptr, cur_size); |
172 | kunmap(cpage); | 173 | kunmap_atomic(kaddr, KM_USER0); |
173 | 174 | ||
174 | i++; | 175 | i++; |
175 | ptr += cur_size; | 176 | ptr += cur_size; |
@@ -204,7 +205,7 @@ fail: | |||
204 | * does the checks required to make sure the data is small enough | 205 | * does the checks required to make sure the data is small enough |
205 | * to fit as an inline extent. | 206 | * to fit as an inline extent. |
206 | */ | 207 | */ |
207 | static int cow_file_range_inline(struct btrfs_trans_handle *trans, | 208 | static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, |
208 | struct btrfs_root *root, | 209 | struct btrfs_root *root, |
209 | struct inode *inode, u64 start, u64 end, | 210 | struct inode *inode, u64 start, u64 end, |
210 | size_t compressed_size, | 211 | size_t compressed_size, |
@@ -854,11 +855,6 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, | |||
854 | u64 cur_end; | 855 | u64 cur_end; |
855 | int limit = 10 * 1024 * 1042; | 856 | int limit = 10 * 1024 * 1042; |
856 | 857 | ||
857 | if (!btrfs_test_opt(root, COMPRESS)) { | ||
858 | return cow_file_range(inode, locked_page, start, end, | ||
859 | page_started, nr_written, 1); | ||
860 | } | ||
861 | |||
862 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED | | 858 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, end, EXTENT_LOCKED | |
863 | EXTENT_DELALLOC, 1, 0, GFP_NOFS); | 859 | EXTENT_DELALLOC, 1, 0, GFP_NOFS); |
864 | while (start < end) { | 860 | while (start < end) { |
@@ -935,7 +931,8 @@ static noinline int csum_exist_in_range(struct btrfs_root *root, | |||
935 | * If no cow copies or snapshots exist, we write directly to the existing | 931 | * If no cow copies or snapshots exist, we write directly to the existing |
936 | * blocks on disk | 932 | * blocks on disk |
937 | */ | 933 | */ |
938 | static int run_delalloc_nocow(struct inode *inode, struct page *locked_page, | 934 | static noinline int run_delalloc_nocow(struct inode *inode, |
935 | struct page *locked_page, | ||
939 | u64 start, u64 end, int *page_started, int force, | 936 | u64 start, u64 end, int *page_started, int force, |
940 | unsigned long *nr_written) | 937 | unsigned long *nr_written) |
941 | { | 938 | { |
@@ -1133,6 +1130,7 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
1133 | unsigned long *nr_written) | 1130 | unsigned long *nr_written) |
1134 | { | 1131 | { |
1135 | int ret; | 1132 | int ret; |
1133 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
1136 | 1134 | ||
1137 | if (btrfs_test_flag(inode, NODATACOW)) | 1135 | if (btrfs_test_flag(inode, NODATACOW)) |
1138 | ret = run_delalloc_nocow(inode, locked_page, start, end, | 1136 | ret = run_delalloc_nocow(inode, locked_page, start, end, |
@@ -1140,10 +1138,12 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
1140 | else if (btrfs_test_flag(inode, PREALLOC)) | 1138 | else if (btrfs_test_flag(inode, PREALLOC)) |
1141 | ret = run_delalloc_nocow(inode, locked_page, start, end, | 1139 | ret = run_delalloc_nocow(inode, locked_page, start, end, |
1142 | page_started, 0, nr_written); | 1140 | page_started, 0, nr_written); |
1141 | else if (!btrfs_test_opt(root, COMPRESS)) | ||
1142 | ret = cow_file_range(inode, locked_page, start, end, | ||
1143 | page_started, nr_written, 1); | ||
1143 | else | 1144 | else |
1144 | ret = cow_file_range_async(inode, locked_page, start, end, | 1145 | ret = cow_file_range_async(inode, locked_page, start, end, |
1145 | page_started, nr_written); | 1146 | page_started, nr_written); |
1146 | |||
1147 | return ret; | 1147 | return ret; |
1148 | } | 1148 | } |
1149 | 1149 | ||
@@ -1453,6 +1453,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1453 | path = btrfs_alloc_path(); | 1453 | path = btrfs_alloc_path(); |
1454 | BUG_ON(!path); | 1454 | BUG_ON(!path); |
1455 | 1455 | ||
1456 | path->leave_spinning = 1; | ||
1456 | ret = btrfs_drop_extents(trans, root, inode, file_pos, | 1457 | ret = btrfs_drop_extents(trans, root, inode, file_pos, |
1457 | file_pos + num_bytes, file_pos, &hint); | 1458 | file_pos + num_bytes, file_pos, &hint); |
1458 | BUG_ON(ret); | 1459 | BUG_ON(ret); |
@@ -1475,6 +1476,10 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1475 | btrfs_set_file_extent_compression(leaf, fi, compression); | 1476 | btrfs_set_file_extent_compression(leaf, fi, compression); |
1476 | btrfs_set_file_extent_encryption(leaf, fi, encryption); | 1477 | btrfs_set_file_extent_encryption(leaf, fi, encryption); |
1477 | btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding); | 1478 | btrfs_set_file_extent_other_encoding(leaf, fi, other_encoding); |
1479 | |||
1480 | btrfs_unlock_up_safe(path, 1); | ||
1481 | btrfs_set_lock_blocking(leaf); | ||
1482 | |||
1478 | btrfs_mark_buffer_dirty(leaf); | 1483 | btrfs_mark_buffer_dirty(leaf); |
1479 | 1484 | ||
1480 | inode_add_bytes(inode, num_bytes); | 1485 | inode_add_bytes(inode, num_bytes); |
@@ -1487,11 +1492,35 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1487 | root->root_key.objectid, | 1492 | root->root_key.objectid, |
1488 | trans->transid, inode->i_ino, &ins); | 1493 | trans->transid, inode->i_ino, &ins); |
1489 | BUG_ON(ret); | 1494 | BUG_ON(ret); |
1490 | |||
1491 | btrfs_free_path(path); | 1495 | btrfs_free_path(path); |
1496 | |||
1492 | return 0; | 1497 | return 0; |
1493 | } | 1498 | } |
1494 | 1499 | ||
1500 | /* | ||
1501 | * helper function for btrfs_finish_ordered_io, this | ||
1502 | * just reads in some of the csum leaves to prime them into ram | ||
1503 | * before we start the transaction. It limits the amount of btree | ||
1504 | * reads required while inside the transaction. | ||
1505 | */ | ||
1506 | static noinline void reada_csum(struct btrfs_root *root, | ||
1507 | struct btrfs_path *path, | ||
1508 | struct btrfs_ordered_extent *ordered_extent) | ||
1509 | { | ||
1510 | struct btrfs_ordered_sum *sum; | ||
1511 | u64 bytenr; | ||
1512 | |||
1513 | sum = list_entry(ordered_extent->list.next, struct btrfs_ordered_sum, | ||
1514 | list); | ||
1515 | bytenr = sum->sums[0].bytenr; | ||
1516 | |||
1517 | /* | ||
1518 | * we don't care about the results, the point of this search is | ||
1519 | * just to get the btree leaves into ram | ||
1520 | */ | ||
1521 | btrfs_lookup_csum(NULL, root->fs_info->csum_root, path, bytenr, 0); | ||
1522 | } | ||
1523 | |||
1495 | /* as ordered data IO finishes, this gets called so we can finish | 1524 | /* as ordered data IO finishes, this gets called so we can finish |
1496 | * an ordered extent if the range of bytes in the file it covers are | 1525 | * an ordered extent if the range of bytes in the file it covers are |
1497 | * fully written. | 1526 | * fully written. |
@@ -1500,8 +1529,9 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1500 | { | 1529 | { |
1501 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1530 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1502 | struct btrfs_trans_handle *trans; | 1531 | struct btrfs_trans_handle *trans; |
1503 | struct btrfs_ordered_extent *ordered_extent; | 1532 | struct btrfs_ordered_extent *ordered_extent = NULL; |
1504 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 1533 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
1534 | struct btrfs_path *path; | ||
1505 | int compressed = 0; | 1535 | int compressed = 0; |
1506 | int ret; | 1536 | int ret; |
1507 | 1537 | ||
@@ -1509,9 +1539,33 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1509 | if (!ret) | 1539 | if (!ret) |
1510 | return 0; | 1540 | return 0; |
1511 | 1541 | ||
1542 | /* | ||
1543 | * before we join the transaction, try to do some of our IO. | ||
1544 | * This will limit the amount of IO that we have to do with | ||
1545 | * the transaction running. We're unlikely to need to do any | ||
1546 | * IO if the file extents are new, the disk_i_size checks | ||
1547 | * covers the most common case. | ||
1548 | */ | ||
1549 | if (start < BTRFS_I(inode)->disk_i_size) { | ||
1550 | path = btrfs_alloc_path(); | ||
1551 | if (path) { | ||
1552 | ret = btrfs_lookup_file_extent(NULL, root, path, | ||
1553 | inode->i_ino, | ||
1554 | start, 0); | ||
1555 | ordered_extent = btrfs_lookup_ordered_extent(inode, | ||
1556 | start); | ||
1557 | if (!list_empty(&ordered_extent->list)) { | ||
1558 | btrfs_release_path(root, path); | ||
1559 | reada_csum(root, path, ordered_extent); | ||
1560 | } | ||
1561 | btrfs_free_path(path); | ||
1562 | } | ||
1563 | } | ||
1564 | |||
1512 | trans = btrfs_join_transaction(root, 1); | 1565 | trans = btrfs_join_transaction(root, 1); |
1513 | 1566 | ||
1514 | ordered_extent = btrfs_lookup_ordered_extent(inode, start); | 1567 | if (!ordered_extent) |
1568 | ordered_extent = btrfs_lookup_ordered_extent(inode, start); | ||
1515 | BUG_ON(!ordered_extent); | 1569 | BUG_ON(!ordered_extent); |
1516 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) | 1570 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) |
1517 | goto nocow; | 1571 | goto nocow; |
@@ -2101,6 +2155,7 @@ noinline int btrfs_update_inode(struct btrfs_trans_handle *trans, | |||
2101 | 2155 | ||
2102 | path = btrfs_alloc_path(); | 2156 | path = btrfs_alloc_path(); |
2103 | BUG_ON(!path); | 2157 | BUG_ON(!path); |
2158 | path->leave_spinning = 1; | ||
2104 | ret = btrfs_lookup_inode(trans, root, path, | 2159 | ret = btrfs_lookup_inode(trans, root, path, |
2105 | &BTRFS_I(inode)->location, 1); | 2160 | &BTRFS_I(inode)->location, 1); |
2106 | if (ret) { | 2161 | if (ret) { |
@@ -2147,6 +2202,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | |||
2147 | goto err; | 2202 | goto err; |
2148 | } | 2203 | } |
2149 | 2204 | ||
2205 | path->leave_spinning = 1; | ||
2150 | di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, | 2206 | di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, |
2151 | name, name_len, -1); | 2207 | name, name_len, -1); |
2152 | if (IS_ERR(di)) { | 2208 | if (IS_ERR(di)) { |
@@ -2190,8 +2246,6 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, | |||
2190 | ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, | 2246 | ret = btrfs_del_inode_ref_in_log(trans, root, name, name_len, |
2191 | inode, dir->i_ino); | 2247 | inode, dir->i_ino); |
2192 | BUG_ON(ret != 0 && ret != -ENOENT); | 2248 | BUG_ON(ret != 0 && ret != -ENOENT); |
2193 | if (ret != -ENOENT) | ||
2194 | BTRFS_I(dir)->log_dirty_trans = trans->transid; | ||
2195 | 2249 | ||
2196 | ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, | 2250 | ret = btrfs_del_dir_entries_in_log(trans, root, name, name_len, |
2197 | dir, index); | 2251 | dir, index); |
@@ -2224,6 +2278,9 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
2224 | trans = btrfs_start_transaction(root, 1); | 2278 | trans = btrfs_start_transaction(root, 1); |
2225 | 2279 | ||
2226 | btrfs_set_trans_block_group(trans, dir); | 2280 | btrfs_set_trans_block_group(trans, dir); |
2281 | |||
2282 | btrfs_record_unlink_dir(trans, dir, dentry->d_inode, 0); | ||
2283 | |||
2227 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, | 2284 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, |
2228 | dentry->d_name.name, dentry->d_name.len); | 2285 | dentry->d_name.name, dentry->d_name.len); |
2229 | 2286 | ||
@@ -2498,6 +2555,7 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
2498 | key.type = (u8)-1; | 2555 | key.type = (u8)-1; |
2499 | 2556 | ||
2500 | search_again: | 2557 | search_again: |
2558 | path->leave_spinning = 1; | ||
2501 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); | 2559 | ret = btrfs_search_slot(trans, root, &key, path, -1, 1); |
2502 | if (ret < 0) | 2560 | if (ret < 0) |
2503 | goto error; | 2561 | goto error; |
@@ -2644,6 +2702,7 @@ delete: | |||
2644 | break; | 2702 | break; |
2645 | } | 2703 | } |
2646 | if (found_extent) { | 2704 | if (found_extent) { |
2705 | btrfs_set_path_blocking(path); | ||
2647 | ret = btrfs_free_extent(trans, root, extent_start, | 2706 | ret = btrfs_free_extent(trans, root, extent_start, |
2648 | extent_num_bytes, | 2707 | extent_num_bytes, |
2649 | leaf->start, root_owner, | 2708 | leaf->start, root_owner, |
@@ -2848,11 +2907,21 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr) | |||
2848 | if (err) | 2907 | if (err) |
2849 | return err; | 2908 | return err; |
2850 | 2909 | ||
2851 | if (S_ISREG(inode->i_mode) && | 2910 | if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { |
2852 | attr->ia_valid & ATTR_SIZE && attr->ia_size > inode->i_size) { | 2911 | if (attr->ia_size > inode->i_size) { |
2853 | err = btrfs_cont_expand(inode, attr->ia_size); | 2912 | err = btrfs_cont_expand(inode, attr->ia_size); |
2854 | if (err) | 2913 | if (err) |
2855 | return err; | 2914 | return err; |
2915 | } else if (inode->i_size > 0 && | ||
2916 | attr->ia_size == 0) { | ||
2917 | |||
2918 | /* we're truncating a file that used to have good | ||
2919 | * data down to zero. Make sure it gets into | ||
2920 | * the ordered flush list so that any new writes | ||
2921 | * get down to disk quickly. | ||
2922 | */ | ||
2923 | BTRFS_I(inode)->ordered_data_close = 1; | ||
2924 | } | ||
2856 | } | 2925 | } |
2857 | 2926 | ||
2858 | err = inode_setattr(inode, attr); | 2927 | err = inode_setattr(inode, attr); |
@@ -2984,13 +3053,14 @@ static noinline void init_btrfs_i(struct inode *inode) | |||
2984 | bi->disk_i_size = 0; | 3053 | bi->disk_i_size = 0; |
2985 | bi->flags = 0; | 3054 | bi->flags = 0; |
2986 | bi->index_cnt = (u64)-1; | 3055 | bi->index_cnt = (u64)-1; |
2987 | bi->log_dirty_trans = 0; | 3056 | bi->last_unlink_trans = 0; |
2988 | extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); | 3057 | extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); |
2989 | extent_io_tree_init(&BTRFS_I(inode)->io_tree, | 3058 | extent_io_tree_init(&BTRFS_I(inode)->io_tree, |
2990 | inode->i_mapping, GFP_NOFS); | 3059 | inode->i_mapping, GFP_NOFS); |
2991 | extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, | 3060 | extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, |
2992 | inode->i_mapping, GFP_NOFS); | 3061 | inode->i_mapping, GFP_NOFS); |
2993 | INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); | 3062 | INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); |
3063 | INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); | ||
2994 | btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); | 3064 | btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); |
2995 | mutex_init(&BTRFS_I(inode)->extent_mutex); | 3065 | mutex_init(&BTRFS_I(inode)->extent_mutex); |
2996 | mutex_init(&BTRFS_I(inode)->log_mutex); | 3066 | mutex_init(&BTRFS_I(inode)->log_mutex); |
@@ -3449,6 +3519,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
3449 | sizes[0] = sizeof(struct btrfs_inode_item); | 3519 | sizes[0] = sizeof(struct btrfs_inode_item); |
3450 | sizes[1] = name_len + sizeof(*ref); | 3520 | sizes[1] = name_len + sizeof(*ref); |
3451 | 3521 | ||
3522 | path->leave_spinning = 1; | ||
3452 | ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2); | 3523 | ret = btrfs_insert_empty_items(trans, root, path, key, sizes, 2); |
3453 | if (ret != 0) | 3524 | if (ret != 0) |
3454 | goto fail; | 3525 | goto fail; |
@@ -3727,6 +3798,8 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
3727 | drop_inode = 1; | 3798 | drop_inode = 1; |
3728 | 3799 | ||
3729 | nr = trans->blocks_used; | 3800 | nr = trans->blocks_used; |
3801 | |||
3802 | btrfs_log_new_name(trans, inode, NULL, dentry->d_parent); | ||
3730 | btrfs_end_transaction_throttle(trans, root); | 3803 | btrfs_end_transaction_throttle(trans, root); |
3731 | fail: | 3804 | fail: |
3732 | if (drop_inode) { | 3805 | if (drop_inode) { |
@@ -4363,6 +4436,8 @@ again: | |||
4363 | } | 4436 | } |
4364 | ClearPageChecked(page); | 4437 | ClearPageChecked(page); |
4365 | set_page_dirty(page); | 4438 | set_page_dirty(page); |
4439 | |||
4440 | BTRFS_I(inode)->last_trans = root->fs_info->generation + 1; | ||
4366 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); | 4441 | unlock_extent(io_tree, page_start, page_end, GFP_NOFS); |
4367 | 4442 | ||
4368 | out_unlock: | 4443 | out_unlock: |
@@ -4388,6 +4463,27 @@ static void btrfs_truncate(struct inode *inode) | |||
4388 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); | 4463 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); |
4389 | 4464 | ||
4390 | trans = btrfs_start_transaction(root, 1); | 4465 | trans = btrfs_start_transaction(root, 1); |
4466 | |||
4467 | /* | ||
4468 | * setattr is responsible for setting the ordered_data_close flag, | ||
4469 | * but that is only tested during the last file release. That | ||
4470 | * could happen well after the next commit, leaving a great big | ||
4471 | * window where new writes may get lost if someone chooses to write | ||
4472 | * to this file after truncating to zero | ||
4473 | * | ||
4474 | * The inode doesn't have any dirty data here, and so if we commit | ||
4475 | * this is a noop. If someone immediately starts writing to the inode | ||
4476 | * it is very likely we'll catch some of their writes in this | ||
4477 | * transaction, and the commit will find this file on the ordered | ||
4478 | * data list with good things to send down. | ||
4479 | * | ||
4480 | * This is a best effort solution, there is still a window where | ||
4481 | * using truncate to replace the contents of the file will | ||
4482 | * end up with a zero length file after a crash. | ||
4483 | */ | ||
4484 | if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close) | ||
4485 | btrfs_add_ordered_operation(trans, root, inode); | ||
4486 | |||
4391 | btrfs_set_trans_block_group(trans, inode); | 4487 | btrfs_set_trans_block_group(trans, inode); |
4392 | btrfs_i_size_write(inode, inode->i_size); | 4488 | btrfs_i_size_write(inode, inode->i_size); |
4393 | 4489 | ||
@@ -4464,12 +4560,15 @@ struct inode *btrfs_alloc_inode(struct super_block *sb) | |||
4464 | ei->i_acl = BTRFS_ACL_NOT_CACHED; | 4560 | ei->i_acl = BTRFS_ACL_NOT_CACHED; |
4465 | ei->i_default_acl = BTRFS_ACL_NOT_CACHED; | 4561 | ei->i_default_acl = BTRFS_ACL_NOT_CACHED; |
4466 | INIT_LIST_HEAD(&ei->i_orphan); | 4562 | INIT_LIST_HEAD(&ei->i_orphan); |
4563 | INIT_LIST_HEAD(&ei->ordered_operations); | ||
4467 | return &ei->vfs_inode; | 4564 | return &ei->vfs_inode; |
4468 | } | 4565 | } |
4469 | 4566 | ||
4470 | void btrfs_destroy_inode(struct inode *inode) | 4567 | void btrfs_destroy_inode(struct inode *inode) |
4471 | { | 4568 | { |
4472 | struct btrfs_ordered_extent *ordered; | 4569 | struct btrfs_ordered_extent *ordered; |
4570 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
4571 | |||
4473 | WARN_ON(!list_empty(&inode->i_dentry)); | 4572 | WARN_ON(!list_empty(&inode->i_dentry)); |
4474 | WARN_ON(inode->i_data.nrpages); | 4573 | WARN_ON(inode->i_data.nrpages); |
4475 | 4574 | ||
@@ -4480,13 +4579,24 @@ void btrfs_destroy_inode(struct inode *inode) | |||
4480 | BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED) | 4579 | BTRFS_I(inode)->i_default_acl != BTRFS_ACL_NOT_CACHED) |
4481 | posix_acl_release(BTRFS_I(inode)->i_default_acl); | 4580 | posix_acl_release(BTRFS_I(inode)->i_default_acl); |
4482 | 4581 | ||
4483 | spin_lock(&BTRFS_I(inode)->root->list_lock); | 4582 | /* |
4583 | * Make sure we're properly removed from the ordered operation | ||
4584 | * lists. | ||
4585 | */ | ||
4586 | smp_mb(); | ||
4587 | if (!list_empty(&BTRFS_I(inode)->ordered_operations)) { | ||
4588 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
4589 | list_del_init(&BTRFS_I(inode)->ordered_operations); | ||
4590 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
4591 | } | ||
4592 | |||
4593 | spin_lock(&root->list_lock); | ||
4484 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | 4594 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { |
4485 | printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" | 4595 | printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan" |
4486 | " list\n", inode->i_ino); | 4596 | " list\n", inode->i_ino); |
4487 | dump_stack(); | 4597 | dump_stack(); |
4488 | } | 4598 | } |
4489 | spin_unlock(&BTRFS_I(inode)->root->list_lock); | 4599 | spin_unlock(&root->list_lock); |
4490 | 4600 | ||
4491 | while (1) { | 4601 | while (1) { |
4492 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); | 4602 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); |
@@ -4611,8 +4721,36 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
4611 | if (ret) | 4721 | if (ret) |
4612 | goto out_unlock; | 4722 | goto out_unlock; |
4613 | 4723 | ||
4724 | /* | ||
4725 | * we're using rename to replace one file with another. | ||
4726 | * and the replacement file is large. Start IO on it now so | ||
4727 | * we don't add too much work to the end of the transaction | ||
4728 | */ | ||
4729 | if (new_inode && old_inode && S_ISREG(old_inode->i_mode) && | ||
4730 | new_inode->i_size && | ||
4731 | old_inode->i_size > BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT) | ||
4732 | filemap_flush(old_inode->i_mapping); | ||
4733 | |||
4614 | trans = btrfs_start_transaction(root, 1); | 4734 | trans = btrfs_start_transaction(root, 1); |
4615 | 4735 | ||
4736 | /* | ||
4737 | * make sure the inode gets flushed if it is replacing | ||
4738 | * something. | ||
4739 | */ | ||
4740 | if (new_inode && new_inode->i_size && | ||
4741 | old_inode && S_ISREG(old_inode->i_mode)) { | ||
4742 | btrfs_add_ordered_operation(trans, root, old_inode); | ||
4743 | } | ||
4744 | |||
4745 | /* | ||
4746 | * this is an ugly little race, but the rename is required to make | ||
4747 | * sure that if we crash, the inode is either at the old name | ||
4748 | * or the new one. pinning the log transaction lets us make sure | ||
4749 | * we don't allow a log commit to come in after we unlink the | ||
4750 | * name but before we add the new name back in. | ||
4751 | */ | ||
4752 | btrfs_pin_log_trans(root); | ||
4753 | |||
4616 | btrfs_set_trans_block_group(trans, new_dir); | 4754 | btrfs_set_trans_block_group(trans, new_dir); |
4617 | 4755 | ||
4618 | btrfs_inc_nlink(old_dentry->d_inode); | 4756 | btrfs_inc_nlink(old_dentry->d_inode); |
@@ -4620,6 +4758,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
4620 | new_dir->i_ctime = new_dir->i_mtime = ctime; | 4758 | new_dir->i_ctime = new_dir->i_mtime = ctime; |
4621 | old_inode->i_ctime = ctime; | 4759 | old_inode->i_ctime = ctime; |
4622 | 4760 | ||
4761 | if (old_dentry->d_parent != new_dentry->d_parent) | ||
4762 | btrfs_record_unlink_dir(trans, old_dir, old_inode, 1); | ||
4763 | |||
4623 | ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode, | 4764 | ret = btrfs_unlink_inode(trans, root, old_dir, old_dentry->d_inode, |
4624 | old_dentry->d_name.name, | 4765 | old_dentry->d_name.name, |
4625 | old_dentry->d_name.len); | 4766 | old_dentry->d_name.len); |
@@ -4651,7 +4792,14 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
4651 | if (ret) | 4792 | if (ret) |
4652 | goto out_fail; | 4793 | goto out_fail; |
4653 | 4794 | ||
4795 | btrfs_log_new_name(trans, old_inode, old_dir, | ||
4796 | new_dentry->d_parent); | ||
4654 | out_fail: | 4797 | out_fail: |
4798 | |||
4799 | /* this btrfs_end_log_trans just allows the current | ||
4800 | * log-sub transaction to complete | ||
4801 | */ | ||
4802 | btrfs_end_log_trans(root); | ||
4655 | btrfs_end_transaction_throttle(trans, root); | 4803 | btrfs_end_transaction_throttle(trans, root); |
4656 | out_unlock: | 4804 | out_unlock: |
4657 | return ret; | 4805 | return ret; |
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c index 47b0a88c12a2..a5310c0f41e2 100644 --- a/fs/btrfs/locking.c +++ b/fs/btrfs/locking.c | |||
@@ -71,12 +71,13 @@ void btrfs_clear_lock_blocking(struct extent_buffer *eb) | |||
71 | static int btrfs_spin_on_block(struct extent_buffer *eb) | 71 | static int btrfs_spin_on_block(struct extent_buffer *eb) |
72 | { | 72 | { |
73 | int i; | 73 | int i; |
74 | |||
74 | for (i = 0; i < 512; i++) { | 75 | for (i = 0; i < 512; i++) { |
75 | cpu_relax(); | ||
76 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 76 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) |
77 | return 1; | 77 | return 1; |
78 | if (need_resched()) | 78 | if (need_resched()) |
79 | break; | 79 | break; |
80 | cpu_relax(); | ||
80 | } | 81 | } |
81 | return 0; | 82 | return 0; |
82 | } | 83 | } |
@@ -95,13 +96,15 @@ int btrfs_try_spin_lock(struct extent_buffer *eb) | |||
95 | { | 96 | { |
96 | int i; | 97 | int i; |
97 | 98 | ||
98 | spin_nested(eb); | 99 | if (btrfs_spin_on_block(eb)) { |
99 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) | 100 | spin_nested(eb); |
100 | return 1; | 101 | if (!test_bit(EXTENT_BUFFER_BLOCKING, &eb->bflags)) |
101 | spin_unlock(&eb->lock); | 102 | return 1; |
102 | 103 | spin_unlock(&eb->lock); | |
104 | } | ||
103 | /* spin for a bit on the BLOCKING flag */ | 105 | /* spin for a bit on the BLOCKING flag */ |
104 | for (i = 0; i < 2; i++) { | 106 | for (i = 0; i < 2; i++) { |
107 | cpu_relax(); | ||
105 | if (!btrfs_spin_on_block(eb)) | 108 | if (!btrfs_spin_on_block(eb)) |
106 | break; | 109 | break; |
107 | 110 | ||
@@ -148,6 +151,9 @@ int btrfs_tree_lock(struct extent_buffer *eb) | |||
148 | DEFINE_WAIT(wait); | 151 | DEFINE_WAIT(wait); |
149 | wait.func = btrfs_wake_function; | 152 | wait.func = btrfs_wake_function; |
150 | 153 | ||
154 | if (!btrfs_spin_on_block(eb)) | ||
155 | goto sleep; | ||
156 | |||
151 | while(1) { | 157 | while(1) { |
152 | spin_nested(eb); | 158 | spin_nested(eb); |
153 | 159 | ||
@@ -165,9 +171,10 @@ int btrfs_tree_lock(struct extent_buffer *eb) | |||
165 | * spin for a bit, and if the blocking flag goes away, | 171 | * spin for a bit, and if the blocking flag goes away, |
166 | * loop around | 172 | * loop around |
167 | */ | 173 | */ |
174 | cpu_relax(); | ||
168 | if (btrfs_spin_on_block(eb)) | 175 | if (btrfs_spin_on_block(eb)) |
169 | continue; | 176 | continue; |
170 | 177 | sleep: | |
171 | prepare_to_wait_exclusive(&eb->lock_wq, &wait, | 178 | prepare_to_wait_exclusive(&eb->lock_wq, &wait, |
172 | TASK_UNINTERRUPTIBLE); | 179 | TASK_UNINTERRUPTIBLE); |
173 | 180 | ||
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 77c2411a5f0f..53c87b197d70 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -310,6 +310,16 @@ int btrfs_remove_ordered_extent(struct inode *inode, | |||
310 | 310 | ||
311 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 311 | spin_lock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); |
312 | list_del_init(&entry->root_extent_list); | 312 | list_del_init(&entry->root_extent_list); |
313 | |||
314 | /* | ||
315 | * we have no more ordered extents for this inode and | ||
316 | * no dirty pages. We can safely remove it from the | ||
317 | * list of ordered extents | ||
318 | */ | ||
319 | if (RB_EMPTY_ROOT(&tree->tree) && | ||
320 | !mapping_tagged(inode->i_mapping, PAGECACHE_TAG_DIRTY)) { | ||
321 | list_del_init(&BTRFS_I(inode)->ordered_operations); | ||
322 | } | ||
313 | spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); | 323 | spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock); |
314 | 324 | ||
315 | mutex_unlock(&tree->mutex); | 325 | mutex_unlock(&tree->mutex); |
@@ -370,6 +380,68 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only) | |||
370 | } | 380 | } |
371 | 381 | ||
372 | /* | 382 | /* |
383 | * this is used during transaction commit to write all the inodes | ||
384 | * added to the ordered operation list. These files must be fully on | ||
385 | * disk before the transaction commits. | ||
386 | * | ||
387 | * we have two modes here, one is to just start the IO via filemap_flush | ||
388 | * and the other is to wait for all the io. When we wait, we have an | ||
389 | * extra check to make sure the ordered operation list really is empty | ||
390 | * before we return | ||
391 | */ | ||
392 | int btrfs_run_ordered_operations(struct btrfs_root *root, int wait) | ||
393 | { | ||
394 | struct btrfs_inode *btrfs_inode; | ||
395 | struct inode *inode; | ||
396 | struct list_head splice; | ||
397 | |||
398 | INIT_LIST_HEAD(&splice); | ||
399 | |||
400 | mutex_lock(&root->fs_info->ordered_operations_mutex); | ||
401 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
402 | again: | ||
403 | list_splice_init(&root->fs_info->ordered_operations, &splice); | ||
404 | |||
405 | while (!list_empty(&splice)) { | ||
406 | btrfs_inode = list_entry(splice.next, struct btrfs_inode, | ||
407 | ordered_operations); | ||
408 | |||
409 | inode = &btrfs_inode->vfs_inode; | ||
410 | |||
411 | list_del_init(&btrfs_inode->ordered_operations); | ||
412 | |||
413 | /* | ||
414 | * the inode may be getting freed (in sys_unlink path). | ||
415 | */ | ||
416 | inode = igrab(inode); | ||
417 | |||
418 | if (!wait && inode) { | ||
419 | list_add_tail(&BTRFS_I(inode)->ordered_operations, | ||
420 | &root->fs_info->ordered_operations); | ||
421 | } | ||
422 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
423 | |||
424 | if (inode) { | ||
425 | if (wait) | ||
426 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | ||
427 | else | ||
428 | filemap_flush(inode->i_mapping); | ||
429 | iput(inode); | ||
430 | } | ||
431 | |||
432 | cond_resched(); | ||
433 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
434 | } | ||
435 | if (wait && !list_empty(&root->fs_info->ordered_operations)) | ||
436 | goto again; | ||
437 | |||
438 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
439 | mutex_unlock(&root->fs_info->ordered_operations_mutex); | ||
440 | |||
441 | return 0; | ||
442 | } | ||
443 | |||
444 | /* | ||
373 | * Used to start IO or wait for a given ordered extent to finish. | 445 | * Used to start IO or wait for a given ordered extent to finish. |
374 | * | 446 | * |
375 | * If wait is one, this effectively waits on page writeback for all the pages | 447 | * If wait is one, this effectively waits on page writeback for all the pages |
@@ -726,3 +798,49 @@ int btrfs_wait_on_page_writeback_range(struct address_space *mapping, | |||
726 | 798 | ||
727 | return ret; | 799 | return ret; |
728 | } | 800 | } |
801 | |||
802 | /* | ||
803 | * add a given inode to the list of inodes that must be fully on | ||
804 | * disk before a transaction commit finishes. | ||
805 | * | ||
806 | * This basically gives us the ext3 style data=ordered mode, and it is mostly | ||
807 | * used to make sure renamed files are fully on disk. | ||
808 | * | ||
809 | * It is a noop if the inode is already fully on disk. | ||
810 | * | ||
811 | * If trans is not null, we'll do a friendly check for a transaction that | ||
812 | * is already flushing things and force the IO down ourselves. | ||
813 | */ | ||
814 | int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | ||
815 | struct btrfs_root *root, | ||
816 | struct inode *inode) | ||
817 | { | ||
818 | u64 last_mod; | ||
819 | |||
820 | last_mod = max(BTRFS_I(inode)->generation, BTRFS_I(inode)->last_trans); | ||
821 | |||
822 | /* | ||
823 | * if this file hasn't been changed since the last transaction | ||
824 | * commit, we can safely return without doing anything | ||
825 | */ | ||
826 | if (last_mod < root->fs_info->last_trans_committed) | ||
827 | return 0; | ||
828 | |||
829 | /* | ||
830 | * the transaction is already committing. Just start the IO and | ||
831 | * don't bother with all of this list nonsense | ||
832 | */ | ||
833 | if (trans && root->fs_info->running_transaction->blocked) { | ||
834 | btrfs_wait_ordered_range(inode, 0, (u64)-1); | ||
835 | return 0; | ||
836 | } | ||
837 | |||
838 | spin_lock(&root->fs_info->ordered_extent_lock); | ||
839 | if (list_empty(&BTRFS_I(inode)->ordered_operations)) { | ||
840 | list_add_tail(&BTRFS_I(inode)->ordered_operations, | ||
841 | &root->fs_info->ordered_operations); | ||
842 | } | ||
843 | spin_unlock(&root->fs_info->ordered_extent_lock); | ||
844 | |||
845 | return 0; | ||
846 | } | ||
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index ab66d5e8d6d6..3d31c8827b01 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -155,4 +155,8 @@ int btrfs_wait_on_page_writeback_range(struct address_space *mapping, | |||
155 | int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, | 155 | int btrfs_fdatawrite_range(struct address_space *mapping, loff_t start, |
156 | loff_t end, int sync_mode); | 156 | loff_t end, int sync_mode); |
157 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only); | 157 | int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only); |
158 | int btrfs_run_ordered_operations(struct btrfs_root *root, int wait); | ||
159 | int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans, | ||
160 | struct btrfs_root *root, | ||
161 | struct inode *inode); | ||
158 | #endif | 162 | #endif |
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 4112d53d4f4d..664782c6a2df 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -65,6 +65,15 @@ static noinline int join_transaction(struct btrfs_root *root) | |||
65 | cur_trans->use_count = 1; | 65 | cur_trans->use_count = 1; |
66 | cur_trans->commit_done = 0; | 66 | cur_trans->commit_done = 0; |
67 | cur_trans->start_time = get_seconds(); | 67 | cur_trans->start_time = get_seconds(); |
68 | |||
69 | cur_trans->delayed_refs.root.rb_node = NULL; | ||
70 | cur_trans->delayed_refs.num_entries = 0; | ||
71 | cur_trans->delayed_refs.num_heads_ready = 0; | ||
72 | cur_trans->delayed_refs.num_heads = 0; | ||
73 | cur_trans->delayed_refs.flushing = 0; | ||
74 | cur_trans->delayed_refs.run_delayed_start = 0; | ||
75 | spin_lock_init(&cur_trans->delayed_refs.lock); | ||
76 | |||
68 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); | 77 | INIT_LIST_HEAD(&cur_trans->pending_snapshots); |
69 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); | 78 | list_add_tail(&cur_trans->list, &root->fs_info->trans_list); |
70 | extent_io_tree_init(&cur_trans->dirty_pages, | 79 | extent_io_tree_init(&cur_trans->dirty_pages, |
@@ -182,6 +191,8 @@ static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | |||
182 | h->block_group = 0; | 191 | h->block_group = 0; |
183 | h->alloc_exclude_nr = 0; | 192 | h->alloc_exclude_nr = 0; |
184 | h->alloc_exclude_start = 0; | 193 | h->alloc_exclude_start = 0; |
194 | h->delayed_ref_updates = 0; | ||
195 | |||
185 | root->fs_info->running_transaction->use_count++; | 196 | root->fs_info->running_transaction->use_count++; |
186 | mutex_unlock(&root->fs_info->trans_mutex); | 197 | mutex_unlock(&root->fs_info->trans_mutex); |
187 | return h; | 198 | return h; |
@@ -271,7 +282,6 @@ void btrfs_throttle(struct btrfs_root *root) | |||
271 | if (!root->fs_info->open_ioctl_trans) | 282 | if (!root->fs_info->open_ioctl_trans) |
272 | wait_current_trans(root); | 283 | wait_current_trans(root); |
273 | mutex_unlock(&root->fs_info->trans_mutex); | 284 | mutex_unlock(&root->fs_info->trans_mutex); |
274 | |||
275 | throttle_on_drops(root); | 285 | throttle_on_drops(root); |
276 | } | 286 | } |
277 | 287 | ||
@@ -280,6 +290,27 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
280 | { | 290 | { |
281 | struct btrfs_transaction *cur_trans; | 291 | struct btrfs_transaction *cur_trans; |
282 | struct btrfs_fs_info *info = root->fs_info; | 292 | struct btrfs_fs_info *info = root->fs_info; |
293 | int count = 0; | ||
294 | |||
295 | while (count < 4) { | ||
296 | unsigned long cur = trans->delayed_ref_updates; | ||
297 | trans->delayed_ref_updates = 0; | ||
298 | if (cur && | ||
299 | trans->transaction->delayed_refs.num_heads_ready > 64) { | ||
300 | trans->delayed_ref_updates = 0; | ||
301 | |||
302 | /* | ||
303 | * do a full flush if the transaction is trying | ||
304 | * to close | ||
305 | */ | ||
306 | if (trans->transaction->delayed_refs.flushing) | ||
307 | cur = 0; | ||
308 | btrfs_run_delayed_refs(trans, root, cur); | ||
309 | } else { | ||
310 | break; | ||
311 | } | ||
312 | count++; | ||
313 | } | ||
283 | 314 | ||
284 | mutex_lock(&info->trans_mutex); | 315 | mutex_lock(&info->trans_mutex); |
285 | cur_trans = info->running_transaction; | 316 | cur_trans = info->running_transaction; |
@@ -424,9 +455,10 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, | |||
424 | u64 old_root_bytenr; | 455 | u64 old_root_bytenr; |
425 | struct btrfs_root *tree_root = root->fs_info->tree_root; | 456 | struct btrfs_root *tree_root = root->fs_info->tree_root; |
426 | 457 | ||
427 | btrfs_extent_post_op(trans, root); | ||
428 | btrfs_write_dirty_block_groups(trans, root); | 458 | btrfs_write_dirty_block_groups(trans, root); |
429 | btrfs_extent_post_op(trans, root); | 459 | |
460 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
461 | BUG_ON(ret); | ||
430 | 462 | ||
431 | while (1) { | 463 | while (1) { |
432 | old_root_bytenr = btrfs_root_bytenr(&root->root_item); | 464 | old_root_bytenr = btrfs_root_bytenr(&root->root_item); |
@@ -438,14 +470,14 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans, | |||
438 | btrfs_header_level(root->node)); | 470 | btrfs_header_level(root->node)); |
439 | btrfs_set_root_generation(&root->root_item, trans->transid); | 471 | btrfs_set_root_generation(&root->root_item, trans->transid); |
440 | 472 | ||
441 | btrfs_extent_post_op(trans, root); | ||
442 | |||
443 | ret = btrfs_update_root(trans, tree_root, | 473 | ret = btrfs_update_root(trans, tree_root, |
444 | &root->root_key, | 474 | &root->root_key, |
445 | &root->root_item); | 475 | &root->root_item); |
446 | BUG_ON(ret); | 476 | BUG_ON(ret); |
447 | btrfs_write_dirty_block_groups(trans, root); | 477 | btrfs_write_dirty_block_groups(trans, root); |
448 | btrfs_extent_post_op(trans, root); | 478 | |
479 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
480 | BUG_ON(ret); | ||
449 | } | 481 | } |
450 | return 0; | 482 | return 0; |
451 | } | 483 | } |
@@ -459,15 +491,18 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, | |||
459 | struct btrfs_fs_info *fs_info = root->fs_info; | 491 | struct btrfs_fs_info *fs_info = root->fs_info; |
460 | struct list_head *next; | 492 | struct list_head *next; |
461 | struct extent_buffer *eb; | 493 | struct extent_buffer *eb; |
494 | int ret; | ||
462 | 495 | ||
463 | btrfs_extent_post_op(trans, fs_info->tree_root); | 496 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); |
497 | BUG_ON(ret); | ||
464 | 498 | ||
465 | eb = btrfs_lock_root_node(fs_info->tree_root); | 499 | eb = btrfs_lock_root_node(fs_info->tree_root); |
466 | btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb, 0); | 500 | btrfs_cow_block(trans, fs_info->tree_root, eb, NULL, 0, &eb); |
467 | btrfs_tree_unlock(eb); | 501 | btrfs_tree_unlock(eb); |
468 | free_extent_buffer(eb); | 502 | free_extent_buffer(eb); |
469 | 503 | ||
470 | btrfs_extent_post_op(trans, fs_info->tree_root); | 504 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); |
505 | BUG_ON(ret); | ||
471 | 506 | ||
472 | while (!list_empty(&fs_info->dirty_cowonly_roots)) { | 507 | while (!list_empty(&fs_info->dirty_cowonly_roots)) { |
473 | next = fs_info->dirty_cowonly_roots.next; | 508 | next = fs_info->dirty_cowonly_roots.next; |
@@ -475,6 +510,9 @@ int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, | |||
475 | root = list_entry(next, struct btrfs_root, dirty_list); | 510 | root = list_entry(next, struct btrfs_root, dirty_list); |
476 | 511 | ||
477 | update_cowonly_root(trans, root); | 512 | update_cowonly_root(trans, root); |
513 | |||
514 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
515 | BUG_ON(ret); | ||
478 | } | 516 | } |
479 | return 0; | 517 | return 0; |
480 | } | 518 | } |
@@ -635,6 +673,31 @@ int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) | |||
635 | } | 673 | } |
636 | 674 | ||
637 | /* | 675 | /* |
676 | * when dropping snapshots, we generate a ton of delayed refs, and it makes | ||
677 | * sense not to join the transaction while it is trying to flush the current | ||
678 | * queue of delayed refs out. | ||
679 | * | ||
680 | * This is used by the drop snapshot code only | ||
681 | */ | ||
682 | static noinline int wait_transaction_pre_flush(struct btrfs_fs_info *info) | ||
683 | { | ||
684 | DEFINE_WAIT(wait); | ||
685 | |||
686 | mutex_lock(&info->trans_mutex); | ||
687 | while (info->running_transaction && | ||
688 | info->running_transaction->delayed_refs.flushing) { | ||
689 | prepare_to_wait(&info->transaction_wait, &wait, | ||
690 | TASK_UNINTERRUPTIBLE); | ||
691 | mutex_unlock(&info->trans_mutex); | ||
692 | schedule(); | ||
693 | mutex_lock(&info->trans_mutex); | ||
694 | finish_wait(&info->transaction_wait, &wait); | ||
695 | } | ||
696 | mutex_unlock(&info->trans_mutex); | ||
697 | return 0; | ||
698 | } | ||
699 | |||
700 | /* | ||
638 | * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on | 701 | * Given a list of roots that need to be deleted, call btrfs_drop_snapshot on |
639 | * all of them | 702 | * all of them |
640 | */ | 703 | */ |
@@ -661,7 +724,22 @@ static noinline int drop_dirty_roots(struct btrfs_root *tree_root, | |||
661 | atomic_inc(&root->fs_info->throttles); | 724 | atomic_inc(&root->fs_info->throttles); |
662 | 725 | ||
663 | while (1) { | 726 | while (1) { |
727 | /* | ||
728 | * we don't want to jump in and create a bunch of | ||
729 | * delayed refs if the transaction is starting to close | ||
730 | */ | ||
731 | wait_transaction_pre_flush(tree_root->fs_info); | ||
664 | trans = btrfs_start_transaction(tree_root, 1); | 732 | trans = btrfs_start_transaction(tree_root, 1); |
733 | |||
734 | /* | ||
735 | * we've joined a transaction, make sure it isn't | ||
736 | * closing right now | ||
737 | */ | ||
738 | if (trans->transaction->delayed_refs.flushing) { | ||
739 | btrfs_end_transaction(trans, tree_root); | ||
740 | continue; | ||
741 | } | ||
742 | |||
665 | mutex_lock(&root->fs_info->drop_mutex); | 743 | mutex_lock(&root->fs_info->drop_mutex); |
666 | ret = btrfs_drop_snapshot(trans, dirty->root); | 744 | ret = btrfs_drop_snapshot(trans, dirty->root); |
667 | if (ret != -EAGAIN) | 745 | if (ret != -EAGAIN) |
@@ -766,7 +844,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
766 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | 844 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); |
767 | 845 | ||
768 | old = btrfs_lock_root_node(root); | 846 | old = btrfs_lock_root_node(root); |
769 | btrfs_cow_block(trans, root, old, NULL, 0, &old, 0); | 847 | btrfs_cow_block(trans, root, old, NULL, 0, &old); |
770 | 848 | ||
771 | btrfs_copy_root(trans, root, old, &tmp, objectid); | 849 | btrfs_copy_root(trans, root, old, &tmp, objectid); |
772 | btrfs_tree_unlock(old); | 850 | btrfs_tree_unlock(old); |
@@ -894,12 +972,31 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
894 | struct extent_io_tree *pinned_copy; | 972 | struct extent_io_tree *pinned_copy; |
895 | DEFINE_WAIT(wait); | 973 | DEFINE_WAIT(wait); |
896 | int ret; | 974 | int ret; |
975 | int should_grow = 0; | ||
976 | unsigned long now = get_seconds(); | ||
977 | |||
978 | btrfs_run_ordered_operations(root, 0); | ||
979 | |||
980 | /* make a pass through all the delayed refs we have so far | ||
981 | * any runnings procs may add more while we are here | ||
982 | */ | ||
983 | ret = btrfs_run_delayed_refs(trans, root, 0); | ||
984 | BUG_ON(ret); | ||
985 | |||
986 | cur_trans = trans->transaction; | ||
987 | /* | ||
988 | * set the flushing flag so procs in this transaction have to | ||
989 | * start sending their work down. | ||
990 | */ | ||
991 | cur_trans->delayed_refs.flushing = 1; | ||
992 | |||
993 | ret = btrfs_run_delayed_refs(trans, root, 0); | ||
994 | BUG_ON(ret); | ||
897 | 995 | ||
898 | INIT_LIST_HEAD(&dirty_fs_roots); | ||
899 | mutex_lock(&root->fs_info->trans_mutex); | 996 | mutex_lock(&root->fs_info->trans_mutex); |
900 | if (trans->transaction->in_commit) { | 997 | INIT_LIST_HEAD(&dirty_fs_roots); |
901 | cur_trans = trans->transaction; | 998 | if (cur_trans->in_commit) { |
902 | trans->transaction->use_count++; | 999 | cur_trans->use_count++; |
903 | mutex_unlock(&root->fs_info->trans_mutex); | 1000 | mutex_unlock(&root->fs_info->trans_mutex); |
904 | btrfs_end_transaction(trans, root); | 1001 | btrfs_end_transaction(trans, root); |
905 | 1002 | ||
@@ -922,7 +1019,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
922 | 1019 | ||
923 | trans->transaction->in_commit = 1; | 1020 | trans->transaction->in_commit = 1; |
924 | trans->transaction->blocked = 1; | 1021 | trans->transaction->blocked = 1; |
925 | cur_trans = trans->transaction; | ||
926 | if (cur_trans->list.prev != &root->fs_info->trans_list) { | 1022 | if (cur_trans->list.prev != &root->fs_info->trans_list) { |
927 | prev_trans = list_entry(cur_trans->list.prev, | 1023 | prev_trans = list_entry(cur_trans->list.prev, |
928 | struct btrfs_transaction, list); | 1024 | struct btrfs_transaction, list); |
@@ -937,6 +1033,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
937 | } | 1033 | } |
938 | } | 1034 | } |
939 | 1035 | ||
1036 | if (now < cur_trans->start_time || now - cur_trans->start_time < 1) | ||
1037 | should_grow = 1; | ||
1038 | |||
940 | do { | 1039 | do { |
941 | int snap_pending = 0; | 1040 | int snap_pending = 0; |
942 | joined = cur_trans->num_joined; | 1041 | joined = cur_trans->num_joined; |
@@ -949,7 +1048,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
949 | 1048 | ||
950 | if (cur_trans->num_writers > 1) | 1049 | if (cur_trans->num_writers > 1) |
951 | timeout = MAX_SCHEDULE_TIMEOUT; | 1050 | timeout = MAX_SCHEDULE_TIMEOUT; |
952 | else | 1051 | else if (should_grow) |
953 | timeout = 1; | 1052 | timeout = 1; |
954 | 1053 | ||
955 | mutex_unlock(&root->fs_info->trans_mutex); | 1054 | mutex_unlock(&root->fs_info->trans_mutex); |
@@ -959,16 +1058,30 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
959 | BUG_ON(ret); | 1058 | BUG_ON(ret); |
960 | } | 1059 | } |
961 | 1060 | ||
962 | schedule_timeout(timeout); | 1061 | /* |
1062 | * rename don't use btrfs_join_transaction, so, once we | ||
1063 | * set the transaction to blocked above, we aren't going | ||
1064 | * to get any new ordered operations. We can safely run | ||
1065 | * it here and no for sure that nothing new will be added | ||
1066 | * to the list | ||
1067 | */ | ||
1068 | btrfs_run_ordered_operations(root, 1); | ||
1069 | |||
1070 | smp_mb(); | ||
1071 | if (cur_trans->num_writers > 1 || should_grow) | ||
1072 | schedule_timeout(timeout); | ||
963 | 1073 | ||
964 | mutex_lock(&root->fs_info->trans_mutex); | 1074 | mutex_lock(&root->fs_info->trans_mutex); |
965 | finish_wait(&cur_trans->writer_wait, &wait); | 1075 | finish_wait(&cur_trans->writer_wait, &wait); |
966 | } while (cur_trans->num_writers > 1 || | 1076 | } while (cur_trans->num_writers > 1 || |
967 | (cur_trans->num_joined != joined)); | 1077 | (should_grow && cur_trans->num_joined != joined)); |
968 | 1078 | ||
969 | ret = create_pending_snapshots(trans, root->fs_info); | 1079 | ret = create_pending_snapshots(trans, root->fs_info); |
970 | BUG_ON(ret); | 1080 | BUG_ON(ret); |
971 | 1081 | ||
1082 | ret = btrfs_run_delayed_refs(trans, root, (unsigned long)-1); | ||
1083 | BUG_ON(ret); | ||
1084 | |||
972 | WARN_ON(cur_trans != trans->transaction); | 1085 | WARN_ON(cur_trans != trans->transaction); |
973 | 1086 | ||
974 | /* btrfs_commit_tree_roots is responsible for getting the | 1087 | /* btrfs_commit_tree_roots is responsible for getting the |
@@ -1032,6 +1145,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1032 | btrfs_copy_pinned(root, pinned_copy); | 1145 | btrfs_copy_pinned(root, pinned_copy); |
1033 | 1146 | ||
1034 | trans->transaction->blocked = 0; | 1147 | trans->transaction->blocked = 0; |
1148 | |||
1035 | wake_up(&root->fs_info->transaction_throttle); | 1149 | wake_up(&root->fs_info->transaction_throttle); |
1036 | wake_up(&root->fs_info->transaction_wait); | 1150 | wake_up(&root->fs_info->transaction_wait); |
1037 | 1151 | ||
@@ -1058,6 +1172,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
1058 | mutex_lock(&root->fs_info->trans_mutex); | 1172 | mutex_lock(&root->fs_info->trans_mutex); |
1059 | 1173 | ||
1060 | cur_trans->commit_done = 1; | 1174 | cur_trans->commit_done = 1; |
1175 | |||
1061 | root->fs_info->last_trans_committed = cur_trans->transid; | 1176 | root->fs_info->last_trans_committed = cur_trans->transid; |
1062 | wake_up(&cur_trans->commit_wait); | 1177 | wake_up(&cur_trans->commit_wait); |
1063 | 1178 | ||
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index ea292117f882..94f5bde2b58d 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -19,10 +19,16 @@ | |||
19 | #ifndef __BTRFS_TRANSACTION__ | 19 | #ifndef __BTRFS_TRANSACTION__ |
20 | #define __BTRFS_TRANSACTION__ | 20 | #define __BTRFS_TRANSACTION__ |
21 | #include "btrfs_inode.h" | 21 | #include "btrfs_inode.h" |
22 | #include "delayed-ref.h" | ||
22 | 23 | ||
23 | struct btrfs_transaction { | 24 | struct btrfs_transaction { |
24 | u64 transid; | 25 | u64 transid; |
26 | /* | ||
27 | * total writers in this transaction, it must be zero before the | ||
28 | * transaction can end | ||
29 | */ | ||
25 | unsigned long num_writers; | 30 | unsigned long num_writers; |
31 | |||
26 | unsigned long num_joined; | 32 | unsigned long num_joined; |
27 | int in_commit; | 33 | int in_commit; |
28 | int use_count; | 34 | int use_count; |
@@ -34,6 +40,7 @@ struct btrfs_transaction { | |||
34 | wait_queue_head_t writer_wait; | 40 | wait_queue_head_t writer_wait; |
35 | wait_queue_head_t commit_wait; | 41 | wait_queue_head_t commit_wait; |
36 | struct list_head pending_snapshots; | 42 | struct list_head pending_snapshots; |
43 | struct btrfs_delayed_ref_root delayed_refs; | ||
37 | }; | 44 | }; |
38 | 45 | ||
39 | struct btrfs_trans_handle { | 46 | struct btrfs_trans_handle { |
@@ -44,6 +51,7 @@ struct btrfs_trans_handle { | |||
44 | u64 block_group; | 51 | u64 block_group; |
45 | u64 alloc_exclude_start; | 52 | u64 alloc_exclude_start; |
46 | u64 alloc_exclude_nr; | 53 | u64 alloc_exclude_nr; |
54 | unsigned long delayed_ref_updates; | ||
47 | }; | 55 | }; |
48 | 56 | ||
49 | struct btrfs_pending_snapshot { | 57 | struct btrfs_pending_snapshot { |
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index 98d25fa4570e..b10eacdb1620 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c | |||
@@ -124,8 +124,6 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | |||
124 | } | 124 | } |
125 | 125 | ||
126 | btrfs_release_path(root, path); | 126 | btrfs_release_path(root, path); |
127 | if (is_extent) | ||
128 | btrfs_extent_post_op(trans, root); | ||
129 | out: | 127 | out: |
130 | if (path) | 128 | if (path) |
131 | btrfs_free_path(path); | 129 | btrfs_free_path(path); |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 9c462fbd60fa..fc9b87a7975b 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -35,6 +35,49 @@ | |||
35 | #define LOG_INODE_EXISTS 1 | 35 | #define LOG_INODE_EXISTS 1 |
36 | 36 | ||
37 | /* | 37 | /* |
38 | * directory trouble cases | ||
39 | * | ||
40 | * 1) on rename or unlink, if the inode being unlinked isn't in the fsync | ||
41 | * log, we must force a full commit before doing an fsync of the directory | ||
42 | * where the unlink was done. | ||
43 | * ---> record transid of last unlink/rename per directory | ||
44 | * | ||
45 | * mkdir foo/some_dir | ||
46 | * normal commit | ||
47 | * rename foo/some_dir foo2/some_dir | ||
48 | * mkdir foo/some_dir | ||
49 | * fsync foo/some_dir/some_file | ||
50 | * | ||
51 | * The fsync above will unlink the original some_dir without recording | ||
52 | * it in its new location (foo2). After a crash, some_dir will be gone | ||
53 | * unless the fsync of some_file forces a full commit | ||
54 | * | ||
55 | * 2) we must log any new names for any file or dir that is in the fsync | ||
56 | * log. ---> check inode while renaming/linking. | ||
57 | * | ||
58 | * 2a) we must log any new names for any file or dir during rename | ||
59 | * when the directory they are being removed from was logged. | ||
60 | * ---> check inode and old parent dir during rename | ||
61 | * | ||
62 | * 2a is actually the more important variant. With the extra logging | ||
63 | * a crash might unlink the old name without recreating the new one | ||
64 | * | ||
65 | * 3) after a crash, we must go through any directories with a link count | ||
66 | * of zero and redo the rm -rf | ||
67 | * | ||
68 | * mkdir f1/foo | ||
69 | * normal commit | ||
70 | * rm -rf f1/foo | ||
71 | * fsync(f1) | ||
72 | * | ||
73 | * The directory f1 was fully removed from the FS, but fsync was never | ||
74 | * called on f1, only its parent dir. After a crash the rm -rf must | ||
75 | * be replayed. This must be able to recurse down the entire | ||
76 | * directory tree. The inode link count fixup code takes care of the | ||
77 | * ugly details. | ||
78 | */ | ||
79 | |||
80 | /* | ||
38 | * stages for the tree walking. The first | 81 | * stages for the tree walking. The first |
39 | * stage (0) is to only pin down the blocks we find | 82 | * stage (0) is to only pin down the blocks we find |
40 | * the second stage (1) is to make sure that all the inodes | 83 | * the second stage (1) is to make sure that all the inodes |
@@ -47,12 +90,17 @@ | |||
47 | #define LOG_WALK_REPLAY_INODES 1 | 90 | #define LOG_WALK_REPLAY_INODES 1 |
48 | #define LOG_WALK_REPLAY_ALL 2 | 91 | #define LOG_WALK_REPLAY_ALL 2 |
49 | 92 | ||
50 | static int __btrfs_log_inode(struct btrfs_trans_handle *trans, | 93 | static int btrfs_log_inode(struct btrfs_trans_handle *trans, |
51 | struct btrfs_root *root, struct inode *inode, | 94 | struct btrfs_root *root, struct inode *inode, |
52 | int inode_only); | 95 | int inode_only); |
53 | static int link_to_fixup_dir(struct btrfs_trans_handle *trans, | 96 | static int link_to_fixup_dir(struct btrfs_trans_handle *trans, |
54 | struct btrfs_root *root, | 97 | struct btrfs_root *root, |
55 | struct btrfs_path *path, u64 objectid); | 98 | struct btrfs_path *path, u64 objectid); |
99 | static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, | ||
100 | struct btrfs_root *root, | ||
101 | struct btrfs_root *log, | ||
102 | struct btrfs_path *path, | ||
103 | u64 dirid, int del_all); | ||
56 | 104 | ||
57 | /* | 105 | /* |
58 | * tree logging is a special write ahead log used to make sure that | 106 | * tree logging is a special write ahead log used to make sure that |
@@ -133,10 +181,25 @@ static int join_running_log_trans(struct btrfs_root *root) | |||
133 | } | 181 | } |
134 | 182 | ||
135 | /* | 183 | /* |
184 | * This either makes the current running log transaction wait | ||
185 | * until you call btrfs_end_log_trans() or it makes any future | ||
186 | * log transactions wait until you call btrfs_end_log_trans() | ||
187 | */ | ||
188 | int btrfs_pin_log_trans(struct btrfs_root *root) | ||
189 | { | ||
190 | int ret = -ENOENT; | ||
191 | |||
192 | mutex_lock(&root->log_mutex); | ||
193 | atomic_inc(&root->log_writers); | ||
194 | mutex_unlock(&root->log_mutex); | ||
195 | return ret; | ||
196 | } | ||
197 | |||
198 | /* | ||
136 | * indicate we're done making changes to the log tree | 199 | * indicate we're done making changes to the log tree |
137 | * and wake up anyone waiting to do a sync | 200 | * and wake up anyone waiting to do a sync |
138 | */ | 201 | */ |
139 | static int end_log_trans(struct btrfs_root *root) | 202 | int btrfs_end_log_trans(struct btrfs_root *root) |
140 | { | 203 | { |
141 | if (atomic_dec_and_test(&root->log_writers)) { | 204 | if (atomic_dec_and_test(&root->log_writers)) { |
142 | smp_mb(); | 205 | smp_mb(); |
@@ -203,7 +266,6 @@ static int process_one_buffer(struct btrfs_root *log, | |||
203 | mutex_lock(&log->fs_info->pinned_mutex); | 266 | mutex_lock(&log->fs_info->pinned_mutex); |
204 | btrfs_update_pinned_extents(log->fs_info->extent_root, | 267 | btrfs_update_pinned_extents(log->fs_info->extent_root, |
205 | eb->start, eb->len, 1); | 268 | eb->start, eb->len, 1); |
206 | mutex_unlock(&log->fs_info->pinned_mutex); | ||
207 | } | 269 | } |
208 | 270 | ||
209 | if (btrfs_buffer_uptodate(eb, gen)) { | 271 | if (btrfs_buffer_uptodate(eb, gen)) { |
@@ -603,6 +665,7 @@ static noinline int drop_one_dir_item(struct btrfs_trans_handle *trans, | |||
603 | 665 | ||
604 | ret = link_to_fixup_dir(trans, root, path, location.objectid); | 666 | ret = link_to_fixup_dir(trans, root, path, location.objectid); |
605 | BUG_ON(ret); | 667 | BUG_ON(ret); |
668 | |||
606 | ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); | 669 | ret = btrfs_unlink_inode(trans, root, dir, inode, name, name_len); |
607 | BUG_ON(ret); | 670 | BUG_ON(ret); |
608 | kfree(name); | 671 | kfree(name); |
@@ -804,6 +867,7 @@ conflict_again: | |||
804 | victim_name_len)) { | 867 | victim_name_len)) { |
805 | btrfs_inc_nlink(inode); | 868 | btrfs_inc_nlink(inode); |
806 | btrfs_release_path(root, path); | 869 | btrfs_release_path(root, path); |
870 | |||
807 | ret = btrfs_unlink_inode(trans, root, dir, | 871 | ret = btrfs_unlink_inode(trans, root, dir, |
808 | inode, victim_name, | 872 | inode, victim_name, |
809 | victim_name_len); | 873 | victim_name_len); |
@@ -922,13 +986,20 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans, | |||
922 | key.offset--; | 986 | key.offset--; |
923 | btrfs_release_path(root, path); | 987 | btrfs_release_path(root, path); |
924 | } | 988 | } |
925 | btrfs_free_path(path); | 989 | btrfs_release_path(root, path); |
926 | if (nlink != inode->i_nlink) { | 990 | if (nlink != inode->i_nlink) { |
927 | inode->i_nlink = nlink; | 991 | inode->i_nlink = nlink; |
928 | btrfs_update_inode(trans, root, inode); | 992 | btrfs_update_inode(trans, root, inode); |
929 | } | 993 | } |
930 | BTRFS_I(inode)->index_cnt = (u64)-1; | 994 | BTRFS_I(inode)->index_cnt = (u64)-1; |
931 | 995 | ||
996 | if (inode->i_nlink == 0 && S_ISDIR(inode->i_mode)) { | ||
997 | ret = replay_dir_deletes(trans, root, NULL, path, | ||
998 | inode->i_ino, 1); | ||
999 | BUG_ON(ret); | ||
1000 | } | ||
1001 | btrfs_free_path(path); | ||
1002 | |||
932 | return 0; | 1003 | return 0; |
933 | } | 1004 | } |
934 | 1005 | ||
@@ -971,9 +1042,12 @@ static noinline int fixup_inode_link_counts(struct btrfs_trans_handle *trans, | |||
971 | 1042 | ||
972 | iput(inode); | 1043 | iput(inode); |
973 | 1044 | ||
974 | if (key.offset == 0) | 1045 | /* |
975 | break; | 1046 | * fixup on a directory may create new entries, |
976 | key.offset--; | 1047 | * make sure we always look for the highset possible |
1048 | * offset | ||
1049 | */ | ||
1050 | key.offset = (u64)-1; | ||
977 | } | 1051 | } |
978 | btrfs_release_path(root, path); | 1052 | btrfs_release_path(root, path); |
979 | return 0; | 1053 | return 0; |
@@ -1313,11 +1387,11 @@ again: | |||
1313 | read_extent_buffer(eb, name, (unsigned long)(di + 1), | 1387 | read_extent_buffer(eb, name, (unsigned long)(di + 1), |
1314 | name_len); | 1388 | name_len); |
1315 | log_di = NULL; | 1389 | log_di = NULL; |
1316 | if (dir_key->type == BTRFS_DIR_ITEM_KEY) { | 1390 | if (log && dir_key->type == BTRFS_DIR_ITEM_KEY) { |
1317 | log_di = btrfs_lookup_dir_item(trans, log, log_path, | 1391 | log_di = btrfs_lookup_dir_item(trans, log, log_path, |
1318 | dir_key->objectid, | 1392 | dir_key->objectid, |
1319 | name, name_len, 0); | 1393 | name, name_len, 0); |
1320 | } else if (dir_key->type == BTRFS_DIR_INDEX_KEY) { | 1394 | } else if (log && dir_key->type == BTRFS_DIR_INDEX_KEY) { |
1321 | log_di = btrfs_lookup_dir_index_item(trans, log, | 1395 | log_di = btrfs_lookup_dir_index_item(trans, log, |
1322 | log_path, | 1396 | log_path, |
1323 | dir_key->objectid, | 1397 | dir_key->objectid, |
@@ -1378,7 +1452,7 @@ static noinline int replay_dir_deletes(struct btrfs_trans_handle *trans, | |||
1378 | struct btrfs_root *root, | 1452 | struct btrfs_root *root, |
1379 | struct btrfs_root *log, | 1453 | struct btrfs_root *log, |
1380 | struct btrfs_path *path, | 1454 | struct btrfs_path *path, |
1381 | u64 dirid) | 1455 | u64 dirid, int del_all) |
1382 | { | 1456 | { |
1383 | u64 range_start; | 1457 | u64 range_start; |
1384 | u64 range_end; | 1458 | u64 range_end; |
@@ -1408,10 +1482,14 @@ again: | |||
1408 | range_start = 0; | 1482 | range_start = 0; |
1409 | range_end = 0; | 1483 | range_end = 0; |
1410 | while (1) { | 1484 | while (1) { |
1411 | ret = find_dir_range(log, path, dirid, key_type, | 1485 | if (del_all) |
1412 | &range_start, &range_end); | 1486 | range_end = (u64)-1; |
1413 | if (ret != 0) | 1487 | else { |
1414 | break; | 1488 | ret = find_dir_range(log, path, dirid, key_type, |
1489 | &range_start, &range_end); | ||
1490 | if (ret != 0) | ||
1491 | break; | ||
1492 | } | ||
1415 | 1493 | ||
1416 | dir_key.offset = range_start; | 1494 | dir_key.offset = range_start; |
1417 | while (1) { | 1495 | while (1) { |
@@ -1437,7 +1515,8 @@ again: | |||
1437 | break; | 1515 | break; |
1438 | 1516 | ||
1439 | ret = check_item_in_log(trans, root, log, path, | 1517 | ret = check_item_in_log(trans, root, log, path, |
1440 | log_path, dir, &found_key); | 1518 | log_path, dir, |
1519 | &found_key); | ||
1441 | BUG_ON(ret); | 1520 | BUG_ON(ret); |
1442 | if (found_key.offset == (u64)-1) | 1521 | if (found_key.offset == (u64)-1) |
1443 | break; | 1522 | break; |
@@ -1514,7 +1593,7 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
1514 | mode = btrfs_inode_mode(eb, inode_item); | 1593 | mode = btrfs_inode_mode(eb, inode_item); |
1515 | if (S_ISDIR(mode)) { | 1594 | if (S_ISDIR(mode)) { |
1516 | ret = replay_dir_deletes(wc->trans, | 1595 | ret = replay_dir_deletes(wc->trans, |
1517 | root, log, path, key.objectid); | 1596 | root, log, path, key.objectid, 0); |
1518 | BUG_ON(ret); | 1597 | BUG_ON(ret); |
1519 | } | 1598 | } |
1520 | ret = overwrite_item(wc->trans, root, path, | 1599 | ret = overwrite_item(wc->trans, root, path, |
@@ -1533,6 +1612,17 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb, | |||
1533 | root, inode, inode->i_size, | 1612 | root, inode, inode->i_size, |
1534 | BTRFS_EXTENT_DATA_KEY); | 1613 | BTRFS_EXTENT_DATA_KEY); |
1535 | BUG_ON(ret); | 1614 | BUG_ON(ret); |
1615 | |||
1616 | /* if the nlink count is zero here, the iput | ||
1617 | * will free the inode. We bump it to make | ||
1618 | * sure it doesn't get freed until the link | ||
1619 | * count fixup is done | ||
1620 | */ | ||
1621 | if (inode->i_nlink == 0) { | ||
1622 | btrfs_inc_nlink(inode); | ||
1623 | btrfs_update_inode(wc->trans, | ||
1624 | root, inode); | ||
1625 | } | ||
1536 | iput(inode); | 1626 | iput(inode); |
1537 | } | 1627 | } |
1538 | ret = link_to_fixup_dir(wc->trans, root, | 1628 | ret = link_to_fixup_dir(wc->trans, root, |
@@ -1840,7 +1930,8 @@ static int update_log_root(struct btrfs_trans_handle *trans, | |||
1840 | return ret; | 1930 | return ret; |
1841 | } | 1931 | } |
1842 | 1932 | ||
1843 | static int wait_log_commit(struct btrfs_root *root, unsigned long transid) | 1933 | static int wait_log_commit(struct btrfs_trans_handle *trans, |
1934 | struct btrfs_root *root, unsigned long transid) | ||
1844 | { | 1935 | { |
1845 | DEFINE_WAIT(wait); | 1936 | DEFINE_WAIT(wait); |
1846 | int index = transid % 2; | 1937 | int index = transid % 2; |
@@ -1854,9 +1945,12 @@ static int wait_log_commit(struct btrfs_root *root, unsigned long transid) | |||
1854 | prepare_to_wait(&root->log_commit_wait[index], | 1945 | prepare_to_wait(&root->log_commit_wait[index], |
1855 | &wait, TASK_UNINTERRUPTIBLE); | 1946 | &wait, TASK_UNINTERRUPTIBLE); |
1856 | mutex_unlock(&root->log_mutex); | 1947 | mutex_unlock(&root->log_mutex); |
1857 | if (root->log_transid < transid + 2 && | 1948 | |
1949 | if (root->fs_info->last_trans_log_full_commit != | ||
1950 | trans->transid && root->log_transid < transid + 2 && | ||
1858 | atomic_read(&root->log_commit[index])) | 1951 | atomic_read(&root->log_commit[index])) |
1859 | schedule(); | 1952 | schedule(); |
1953 | |||
1860 | finish_wait(&root->log_commit_wait[index], &wait); | 1954 | finish_wait(&root->log_commit_wait[index], &wait); |
1861 | mutex_lock(&root->log_mutex); | 1955 | mutex_lock(&root->log_mutex); |
1862 | } while (root->log_transid < transid + 2 && | 1956 | } while (root->log_transid < transid + 2 && |
@@ -1864,14 +1958,16 @@ static int wait_log_commit(struct btrfs_root *root, unsigned long transid) | |||
1864 | return 0; | 1958 | return 0; |
1865 | } | 1959 | } |
1866 | 1960 | ||
1867 | static int wait_for_writer(struct btrfs_root *root) | 1961 | static int wait_for_writer(struct btrfs_trans_handle *trans, |
1962 | struct btrfs_root *root) | ||
1868 | { | 1963 | { |
1869 | DEFINE_WAIT(wait); | 1964 | DEFINE_WAIT(wait); |
1870 | while (atomic_read(&root->log_writers)) { | 1965 | while (atomic_read(&root->log_writers)) { |
1871 | prepare_to_wait(&root->log_writer_wait, | 1966 | prepare_to_wait(&root->log_writer_wait, |
1872 | &wait, TASK_UNINTERRUPTIBLE); | 1967 | &wait, TASK_UNINTERRUPTIBLE); |
1873 | mutex_unlock(&root->log_mutex); | 1968 | mutex_unlock(&root->log_mutex); |
1874 | if (atomic_read(&root->log_writers)) | 1969 | if (root->fs_info->last_trans_log_full_commit != |
1970 | trans->transid && atomic_read(&root->log_writers)) | ||
1875 | schedule(); | 1971 | schedule(); |
1876 | mutex_lock(&root->log_mutex); | 1972 | mutex_lock(&root->log_mutex); |
1877 | finish_wait(&root->log_writer_wait, &wait); | 1973 | finish_wait(&root->log_writer_wait, &wait); |
@@ -1882,7 +1978,14 @@ static int wait_for_writer(struct btrfs_root *root) | |||
1882 | /* | 1978 | /* |
1883 | * btrfs_sync_log does sends a given tree log down to the disk and | 1979 | * btrfs_sync_log does sends a given tree log down to the disk and |
1884 | * updates the super blocks to record it. When this call is done, | 1980 | * updates the super blocks to record it. When this call is done, |
1885 | * you know that any inodes previously logged are safely on disk | 1981 | * you know that any inodes previously logged are safely on disk only |
1982 | * if it returns 0. | ||
1983 | * | ||
1984 | * Any other return value means you need to call btrfs_commit_transaction. | ||
1985 | * Some of the edge cases for fsyncing directories that have had unlinks | ||
1986 | * or renames done in the past mean that sometimes the only safe | ||
1987 | * fsync is to commit the whole FS. When btrfs_sync_log returns -EAGAIN, | ||
1988 | * that has happened. | ||
1886 | */ | 1989 | */ |
1887 | int btrfs_sync_log(struct btrfs_trans_handle *trans, | 1990 | int btrfs_sync_log(struct btrfs_trans_handle *trans, |
1888 | struct btrfs_root *root) | 1991 | struct btrfs_root *root) |
@@ -1896,7 +1999,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
1896 | mutex_lock(&root->log_mutex); | 1999 | mutex_lock(&root->log_mutex); |
1897 | index1 = root->log_transid % 2; | 2000 | index1 = root->log_transid % 2; |
1898 | if (atomic_read(&root->log_commit[index1])) { | 2001 | if (atomic_read(&root->log_commit[index1])) { |
1899 | wait_log_commit(root, root->log_transid); | 2002 | wait_log_commit(trans, root, root->log_transid); |
1900 | mutex_unlock(&root->log_mutex); | 2003 | mutex_unlock(&root->log_mutex); |
1901 | return 0; | 2004 | return 0; |
1902 | } | 2005 | } |
@@ -1904,18 +2007,26 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
1904 | 2007 | ||
1905 | /* wait for previous tree log sync to complete */ | 2008 | /* wait for previous tree log sync to complete */ |
1906 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) | 2009 | if (atomic_read(&root->log_commit[(index1 + 1) % 2])) |
1907 | wait_log_commit(root, root->log_transid - 1); | 2010 | wait_log_commit(trans, root, root->log_transid - 1); |
1908 | 2011 | ||
1909 | while (1) { | 2012 | while (1) { |
1910 | unsigned long batch = root->log_batch; | 2013 | unsigned long batch = root->log_batch; |
1911 | mutex_unlock(&root->log_mutex); | 2014 | mutex_unlock(&root->log_mutex); |
1912 | schedule_timeout_uninterruptible(1); | 2015 | schedule_timeout_uninterruptible(1); |
1913 | mutex_lock(&root->log_mutex); | 2016 | mutex_lock(&root->log_mutex); |
1914 | wait_for_writer(root); | 2017 | |
2018 | wait_for_writer(trans, root); | ||
1915 | if (batch == root->log_batch) | 2019 | if (batch == root->log_batch) |
1916 | break; | 2020 | break; |
1917 | } | 2021 | } |
1918 | 2022 | ||
2023 | /* bail out if we need to do a full commit */ | ||
2024 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { | ||
2025 | ret = -EAGAIN; | ||
2026 | mutex_unlock(&root->log_mutex); | ||
2027 | goto out; | ||
2028 | } | ||
2029 | |||
1919 | ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); | 2030 | ret = btrfs_write_and_wait_marked_extents(log, &log->dirty_log_pages); |
1920 | BUG_ON(ret); | 2031 | BUG_ON(ret); |
1921 | 2032 | ||
@@ -1951,16 +2062,29 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
1951 | 2062 | ||
1952 | index2 = log_root_tree->log_transid % 2; | 2063 | index2 = log_root_tree->log_transid % 2; |
1953 | if (atomic_read(&log_root_tree->log_commit[index2])) { | 2064 | if (atomic_read(&log_root_tree->log_commit[index2])) { |
1954 | wait_log_commit(log_root_tree, log_root_tree->log_transid); | 2065 | wait_log_commit(trans, log_root_tree, |
2066 | log_root_tree->log_transid); | ||
1955 | mutex_unlock(&log_root_tree->log_mutex); | 2067 | mutex_unlock(&log_root_tree->log_mutex); |
1956 | goto out; | 2068 | goto out; |
1957 | } | 2069 | } |
1958 | atomic_set(&log_root_tree->log_commit[index2], 1); | 2070 | atomic_set(&log_root_tree->log_commit[index2], 1); |
1959 | 2071 | ||
1960 | if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) | 2072 | if (atomic_read(&log_root_tree->log_commit[(index2 + 1) % 2])) { |
1961 | wait_log_commit(log_root_tree, log_root_tree->log_transid - 1); | 2073 | wait_log_commit(trans, log_root_tree, |
2074 | log_root_tree->log_transid - 1); | ||
2075 | } | ||
2076 | |||
2077 | wait_for_writer(trans, log_root_tree); | ||
1962 | 2078 | ||
1963 | wait_for_writer(log_root_tree); | 2079 | /* |
2080 | * now that we've moved on to the tree of log tree roots, | ||
2081 | * check the full commit flag again | ||
2082 | */ | ||
2083 | if (root->fs_info->last_trans_log_full_commit == trans->transid) { | ||
2084 | mutex_unlock(&log_root_tree->log_mutex); | ||
2085 | ret = -EAGAIN; | ||
2086 | goto out_wake_log_root; | ||
2087 | } | ||
1964 | 2088 | ||
1965 | ret = btrfs_write_and_wait_marked_extents(log_root_tree, | 2089 | ret = btrfs_write_and_wait_marked_extents(log_root_tree, |
1966 | &log_root_tree->dirty_log_pages); | 2090 | &log_root_tree->dirty_log_pages); |
@@ -1985,7 +2109,9 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
1985 | * in and cause problems either. | 2109 | * in and cause problems either. |
1986 | */ | 2110 | */ |
1987 | write_ctree_super(trans, root->fs_info->tree_root, 2); | 2111 | write_ctree_super(trans, root->fs_info->tree_root, 2); |
2112 | ret = 0; | ||
1988 | 2113 | ||
2114 | out_wake_log_root: | ||
1989 | atomic_set(&log_root_tree->log_commit[index2], 0); | 2115 | atomic_set(&log_root_tree->log_commit[index2], 0); |
1990 | smp_mb(); | 2116 | smp_mb(); |
1991 | if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) | 2117 | if (waitqueue_active(&log_root_tree->log_commit_wait[index2])) |
@@ -1998,7 +2124,8 @@ out: | |||
1998 | return 0; | 2124 | return 0; |
1999 | } | 2125 | } |
2000 | 2126 | ||
2001 | /* * free all the extents used by the tree log. This should be called | 2127 | /* |
2128 | * free all the extents used by the tree log. This should be called | ||
2002 | * at commit time of the full transaction | 2129 | * at commit time of the full transaction |
2003 | */ | 2130 | */ |
2004 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | 2131 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) |
@@ -2132,7 +2259,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2132 | 2259 | ||
2133 | btrfs_free_path(path); | 2260 | btrfs_free_path(path); |
2134 | mutex_unlock(&BTRFS_I(dir)->log_mutex); | 2261 | mutex_unlock(&BTRFS_I(dir)->log_mutex); |
2135 | end_log_trans(root); | 2262 | btrfs_end_log_trans(root); |
2136 | 2263 | ||
2137 | return 0; | 2264 | return 0; |
2138 | } | 2265 | } |
@@ -2159,7 +2286,7 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, | |||
2159 | ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, | 2286 | ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, |
2160 | dirid, &index); | 2287 | dirid, &index); |
2161 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 2288 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
2162 | end_log_trans(root); | 2289 | btrfs_end_log_trans(root); |
2163 | 2290 | ||
2164 | return ret; | 2291 | return ret; |
2165 | } | 2292 | } |
@@ -2559,7 +2686,7 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2559 | * | 2686 | * |
2560 | * This handles both files and directories. | 2687 | * This handles both files and directories. |
2561 | */ | 2688 | */ |
2562 | static int __btrfs_log_inode(struct btrfs_trans_handle *trans, | 2689 | static int btrfs_log_inode(struct btrfs_trans_handle *trans, |
2563 | struct btrfs_root *root, struct inode *inode, | 2690 | struct btrfs_root *root, struct inode *inode, |
2564 | int inode_only) | 2691 | int inode_only) |
2565 | { | 2692 | { |
@@ -2585,28 +2712,17 @@ static int __btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
2585 | min_key.offset = 0; | 2712 | min_key.offset = 0; |
2586 | 2713 | ||
2587 | max_key.objectid = inode->i_ino; | 2714 | max_key.objectid = inode->i_ino; |
2715 | |||
2716 | /* today the code can only do partial logging of directories */ | ||
2717 | if (!S_ISDIR(inode->i_mode)) | ||
2718 | inode_only = LOG_INODE_ALL; | ||
2719 | |||
2588 | if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode)) | 2720 | if (inode_only == LOG_INODE_EXISTS || S_ISDIR(inode->i_mode)) |
2589 | max_key.type = BTRFS_XATTR_ITEM_KEY; | 2721 | max_key.type = BTRFS_XATTR_ITEM_KEY; |
2590 | else | 2722 | else |
2591 | max_key.type = (u8)-1; | 2723 | max_key.type = (u8)-1; |
2592 | max_key.offset = (u64)-1; | 2724 | max_key.offset = (u64)-1; |
2593 | 2725 | ||
2594 | /* | ||
2595 | * if this inode has already been logged and we're in inode_only | ||
2596 | * mode, we don't want to delete the things that have already | ||
2597 | * been written to the log. | ||
2598 | * | ||
2599 | * But, if the inode has been through an inode_only log, | ||
2600 | * the logged_trans field is not set. This allows us to catch | ||
2601 | * any new names for this inode in the backrefs by logging it | ||
2602 | * again | ||
2603 | */ | ||
2604 | if (inode_only == LOG_INODE_EXISTS && | ||
2605 | BTRFS_I(inode)->logged_trans == trans->transid) { | ||
2606 | btrfs_free_path(path); | ||
2607 | btrfs_free_path(dst_path); | ||
2608 | goto out; | ||
2609 | } | ||
2610 | mutex_lock(&BTRFS_I(inode)->log_mutex); | 2726 | mutex_lock(&BTRFS_I(inode)->log_mutex); |
2611 | 2727 | ||
2612 | /* | 2728 | /* |
@@ -2693,7 +2809,6 @@ next_slot: | |||
2693 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { | 2809 | if (inode_only == LOG_INODE_ALL && S_ISDIR(inode->i_mode)) { |
2694 | btrfs_release_path(root, path); | 2810 | btrfs_release_path(root, path); |
2695 | btrfs_release_path(log, dst_path); | 2811 | btrfs_release_path(log, dst_path); |
2696 | BTRFS_I(inode)->log_dirty_trans = 0; | ||
2697 | ret = log_directory_changes(trans, root, inode, path, dst_path); | 2812 | ret = log_directory_changes(trans, root, inode, path, dst_path); |
2698 | BUG_ON(ret); | 2813 | BUG_ON(ret); |
2699 | } | 2814 | } |
@@ -2702,19 +2817,69 @@ next_slot: | |||
2702 | 2817 | ||
2703 | btrfs_free_path(path); | 2818 | btrfs_free_path(path); |
2704 | btrfs_free_path(dst_path); | 2819 | btrfs_free_path(dst_path); |
2705 | out: | ||
2706 | return 0; | 2820 | return 0; |
2707 | } | 2821 | } |
2708 | 2822 | ||
2709 | int btrfs_log_inode(struct btrfs_trans_handle *trans, | 2823 | /* |
2710 | struct btrfs_root *root, struct inode *inode, | 2824 | * follow the dentry parent pointers up the chain and see if any |
2711 | int inode_only) | 2825 | * of the directories in it require a full commit before they can |
2826 | * be logged. Returns zero if nothing special needs to be done or 1 if | ||
2827 | * a full commit is required. | ||
2828 | */ | ||
2829 | static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans, | ||
2830 | struct inode *inode, | ||
2831 | struct dentry *parent, | ||
2832 | struct super_block *sb, | ||
2833 | u64 last_committed) | ||
2712 | { | 2834 | { |
2713 | int ret; | 2835 | int ret = 0; |
2836 | struct btrfs_root *root; | ||
2714 | 2837 | ||
2715 | start_log_trans(trans, root); | 2838 | /* |
2716 | ret = __btrfs_log_inode(trans, root, inode, inode_only); | 2839 | * for regular files, if its inode is already on disk, we don't |
2717 | end_log_trans(root); | 2840 | * have to worry about the parents at all. This is because |
2841 | * we can use the last_unlink_trans field to record renames | ||
2842 | * and other fun in this file. | ||
2843 | */ | ||
2844 | if (S_ISREG(inode->i_mode) && | ||
2845 | BTRFS_I(inode)->generation <= last_committed && | ||
2846 | BTRFS_I(inode)->last_unlink_trans <= last_committed) | ||
2847 | goto out; | ||
2848 | |||
2849 | if (!S_ISDIR(inode->i_mode)) { | ||
2850 | if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) | ||
2851 | goto out; | ||
2852 | inode = parent->d_inode; | ||
2853 | } | ||
2854 | |||
2855 | while (1) { | ||
2856 | BTRFS_I(inode)->logged_trans = trans->transid; | ||
2857 | smp_mb(); | ||
2858 | |||
2859 | if (BTRFS_I(inode)->last_unlink_trans > last_committed) { | ||
2860 | root = BTRFS_I(inode)->root; | ||
2861 | |||
2862 | /* | ||
2863 | * make sure any commits to the log are forced | ||
2864 | * to be full commits | ||
2865 | */ | ||
2866 | root->fs_info->last_trans_log_full_commit = | ||
2867 | trans->transid; | ||
2868 | ret = 1; | ||
2869 | break; | ||
2870 | } | ||
2871 | |||
2872 | if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) | ||
2873 | break; | ||
2874 | |||
2875 | if (parent == sb->s_root) | ||
2876 | break; | ||
2877 | |||
2878 | parent = parent->d_parent; | ||
2879 | inode = parent->d_inode; | ||
2880 | |||
2881 | } | ||
2882 | out: | ||
2718 | return ret; | 2883 | return ret; |
2719 | } | 2884 | } |
2720 | 2885 | ||
@@ -2724,31 +2889,65 @@ int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
2724 | * only logging is done of any parent directories that are older than | 2889 | * only logging is done of any parent directories that are older than |
2725 | * the last committed transaction | 2890 | * the last committed transaction |
2726 | */ | 2891 | */ |
2727 | int btrfs_log_dentry(struct btrfs_trans_handle *trans, | 2892 | int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, |
2728 | struct btrfs_root *root, struct dentry *dentry) | 2893 | struct btrfs_root *root, struct inode *inode, |
2894 | struct dentry *parent, int exists_only) | ||
2729 | { | 2895 | { |
2730 | int inode_only = LOG_INODE_ALL; | 2896 | int inode_only = exists_only ? LOG_INODE_EXISTS : LOG_INODE_ALL; |
2731 | struct super_block *sb; | 2897 | struct super_block *sb; |
2732 | int ret; | 2898 | int ret = 0; |
2899 | u64 last_committed = root->fs_info->last_trans_committed; | ||
2900 | |||
2901 | sb = inode->i_sb; | ||
2902 | |||
2903 | if (root->fs_info->last_trans_log_full_commit > | ||
2904 | root->fs_info->last_trans_committed) { | ||
2905 | ret = 1; | ||
2906 | goto end_no_trans; | ||
2907 | } | ||
2908 | |||
2909 | ret = check_parent_dirs_for_sync(trans, inode, parent, | ||
2910 | sb, last_committed); | ||
2911 | if (ret) | ||
2912 | goto end_no_trans; | ||
2733 | 2913 | ||
2734 | start_log_trans(trans, root); | 2914 | start_log_trans(trans, root); |
2735 | sb = dentry->d_inode->i_sb; | ||
2736 | while (1) { | ||
2737 | ret = __btrfs_log_inode(trans, root, dentry->d_inode, | ||
2738 | inode_only); | ||
2739 | BUG_ON(ret); | ||
2740 | inode_only = LOG_INODE_EXISTS; | ||
2741 | 2915 | ||
2742 | dentry = dentry->d_parent; | 2916 | ret = btrfs_log_inode(trans, root, inode, inode_only); |
2743 | if (!dentry || !dentry->d_inode || sb != dentry->d_inode->i_sb) | 2917 | BUG_ON(ret); |
2918 | |||
2919 | /* | ||
2920 | * for regular files, if its inode is already on disk, we don't | ||
2921 | * have to worry about the parents at all. This is because | ||
2922 | * we can use the last_unlink_trans field to record renames | ||
2923 | * and other fun in this file. | ||
2924 | */ | ||
2925 | if (S_ISREG(inode->i_mode) && | ||
2926 | BTRFS_I(inode)->generation <= last_committed && | ||
2927 | BTRFS_I(inode)->last_unlink_trans <= last_committed) | ||
2928 | goto no_parent; | ||
2929 | |||
2930 | inode_only = LOG_INODE_EXISTS; | ||
2931 | while (1) { | ||
2932 | if (!parent || !parent->d_inode || sb != parent->d_inode->i_sb) | ||
2744 | break; | 2933 | break; |
2745 | 2934 | ||
2746 | if (BTRFS_I(dentry->d_inode)->generation <= | 2935 | inode = parent->d_inode; |
2747 | root->fs_info->last_trans_committed) | 2936 | if (BTRFS_I(inode)->generation > |
2937 | root->fs_info->last_trans_committed) { | ||
2938 | ret = btrfs_log_inode(trans, root, inode, inode_only); | ||
2939 | BUG_ON(ret); | ||
2940 | } | ||
2941 | if (parent == sb->s_root) | ||
2748 | break; | 2942 | break; |
2943 | |||
2944 | parent = parent->d_parent; | ||
2749 | } | 2945 | } |
2750 | end_log_trans(root); | 2946 | no_parent: |
2751 | return 0; | 2947 | ret = 0; |
2948 | btrfs_end_log_trans(root); | ||
2949 | end_no_trans: | ||
2950 | return ret; | ||
2752 | } | 2951 | } |
2753 | 2952 | ||
2754 | /* | 2953 | /* |
@@ -2760,12 +2959,8 @@ int btrfs_log_dentry(struct btrfs_trans_handle *trans, | |||
2760 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | 2959 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, |
2761 | struct btrfs_root *root, struct dentry *dentry) | 2960 | struct btrfs_root *root, struct dentry *dentry) |
2762 | { | 2961 | { |
2763 | u64 gen; | 2962 | return btrfs_log_inode_parent(trans, root, dentry->d_inode, |
2764 | gen = root->fs_info->last_trans_new_blockgroup; | 2963 | dentry->d_parent, 0); |
2765 | if (gen > root->fs_info->last_trans_committed) | ||
2766 | return 1; | ||
2767 | else | ||
2768 | return btrfs_log_dentry(trans, root, dentry); | ||
2769 | } | 2964 | } |
2770 | 2965 | ||
2771 | /* | 2966 | /* |
@@ -2884,3 +3079,94 @@ again: | |||
2884 | kfree(log_root_tree); | 3079 | kfree(log_root_tree); |
2885 | return 0; | 3080 | return 0; |
2886 | } | 3081 | } |
3082 | |||
3083 | /* | ||
3084 | * there are some corner cases where we want to force a full | ||
3085 | * commit instead of allowing a directory to be logged. | ||
3086 | * | ||
3087 | * They revolve around files there were unlinked from the directory, and | ||
3088 | * this function updates the parent directory so that a full commit is | ||
3089 | * properly done if it is fsync'd later after the unlinks are done. | ||
3090 | */ | ||
3091 | void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, | ||
3092 | struct inode *dir, struct inode *inode, | ||
3093 | int for_rename) | ||
3094 | { | ||
3095 | /* | ||
3096 | * when we're logging a file, if it hasn't been renamed | ||
3097 | * or unlinked, and its inode is fully committed on disk, | ||
3098 | * we don't have to worry about walking up the directory chain | ||
3099 | * to log its parents. | ||
3100 | * | ||
3101 | * So, we use the last_unlink_trans field to put this transid | ||
3102 | * into the file. When the file is logged we check it and | ||
3103 | * don't log the parents if the file is fully on disk. | ||
3104 | */ | ||
3105 | if (S_ISREG(inode->i_mode)) | ||
3106 | BTRFS_I(inode)->last_unlink_trans = trans->transid; | ||
3107 | |||
3108 | /* | ||
3109 | * if this directory was already logged any new | ||
3110 | * names for this file/dir will get recorded | ||
3111 | */ | ||
3112 | smp_mb(); | ||
3113 | if (BTRFS_I(dir)->logged_trans == trans->transid) | ||
3114 | return; | ||
3115 | |||
3116 | /* | ||
3117 | * if the inode we're about to unlink was logged, | ||
3118 | * the log will be properly updated for any new names | ||
3119 | */ | ||
3120 | if (BTRFS_I(inode)->logged_trans == trans->transid) | ||
3121 | return; | ||
3122 | |||
3123 | /* | ||
3124 | * when renaming files across directories, if the directory | ||
3125 | * there we're unlinking from gets fsync'd later on, there's | ||
3126 | * no way to find the destination directory later and fsync it | ||
3127 | * properly. So, we have to be conservative and force commits | ||
3128 | * so the new name gets discovered. | ||
3129 | */ | ||
3130 | if (for_rename) | ||
3131 | goto record; | ||
3132 | |||
3133 | /* we can safely do the unlink without any special recording */ | ||
3134 | return; | ||
3135 | |||
3136 | record: | ||
3137 | BTRFS_I(dir)->last_unlink_trans = trans->transid; | ||
3138 | } | ||
3139 | |||
3140 | /* | ||
3141 | * Call this after adding a new name for a file and it will properly | ||
3142 | * update the log to reflect the new name. | ||
3143 | * | ||
3144 | * It will return zero if all goes well, and it will return 1 if a | ||
3145 | * full transaction commit is required. | ||
3146 | */ | ||
3147 | int btrfs_log_new_name(struct btrfs_trans_handle *trans, | ||
3148 | struct inode *inode, struct inode *old_dir, | ||
3149 | struct dentry *parent) | ||
3150 | { | ||
3151 | struct btrfs_root * root = BTRFS_I(inode)->root; | ||
3152 | |||
3153 | /* | ||
3154 | * this will force the logging code to walk the dentry chain | ||
3155 | * up for the file | ||
3156 | */ | ||
3157 | if (S_ISREG(inode->i_mode)) | ||
3158 | BTRFS_I(inode)->last_unlink_trans = trans->transid; | ||
3159 | |||
3160 | /* | ||
3161 | * if this inode hasn't been logged and directory we're renaming it | ||
3162 | * from hasn't been logged, we don't need to log it | ||
3163 | */ | ||
3164 | if (BTRFS_I(inode)->logged_trans <= | ||
3165 | root->fs_info->last_trans_committed && | ||
3166 | (!old_dir || BTRFS_I(old_dir)->logged_trans <= | ||
3167 | root->fs_info->last_trans_committed)) | ||
3168 | return 0; | ||
3169 | |||
3170 | return btrfs_log_inode_parent(trans, root, inode, parent, 1); | ||
3171 | } | ||
3172 | |||
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index b9409b32ed02..d09c7609e16b 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h | |||
@@ -22,14 +22,9 @@ | |||
22 | int btrfs_sync_log(struct btrfs_trans_handle *trans, | 22 | int btrfs_sync_log(struct btrfs_trans_handle *trans, |
23 | struct btrfs_root *root); | 23 | struct btrfs_root *root); |
24 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); | 24 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); |
25 | int btrfs_log_dentry(struct btrfs_trans_handle *trans, | ||
26 | struct btrfs_root *root, struct dentry *dentry); | ||
27 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); | 25 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); |
28 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | 26 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, |
29 | struct btrfs_root *root, struct dentry *dentry); | 27 | struct btrfs_root *root, struct dentry *dentry); |
30 | int btrfs_log_inode(struct btrfs_trans_handle *trans, | ||
31 | struct btrfs_root *root, struct inode *inode, | ||
32 | int inode_only); | ||
33 | int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | 28 | int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, |
34 | struct btrfs_root *root, | 29 | struct btrfs_root *root, |
35 | const char *name, int name_len, | 30 | const char *name, int name_len, |
@@ -38,4 +33,16 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, | |||
38 | struct btrfs_root *root, | 33 | struct btrfs_root *root, |
39 | const char *name, int name_len, | 34 | const char *name, int name_len, |
40 | struct inode *inode, u64 dirid); | 35 | struct inode *inode, u64 dirid); |
36 | int btrfs_join_running_log_trans(struct btrfs_root *root); | ||
37 | int btrfs_end_log_trans(struct btrfs_root *root); | ||
38 | int btrfs_pin_log_trans(struct btrfs_root *root); | ||
39 | int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | ||
40 | struct btrfs_root *root, struct inode *inode, | ||
41 | struct dentry *parent, int exists_only); | ||
42 | void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans, | ||
43 | struct inode *dir, struct inode *inode, | ||
44 | int for_rename); | ||
45 | int btrfs_log_new_name(struct btrfs_trans_handle *trans, | ||
46 | struct inode *inode, struct inode *old_dir, | ||
47 | struct dentry *parent); | ||
41 | #endif | 48 | #endif |