diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-27 13:43:44 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-27 13:43:44 -0400 |
commit | 105a048a4f35f7a74c7cc20b36dd83658b6ec232 (patch) | |
tree | 043b1110cda0042ba35d8aae59382bb094d0af3f | |
parent | 00b9b0af5887fed54e899e3b7f5c2ccf5e739def (diff) | |
parent | 9aeead73782c4b8e2a91def36dbf95db28605c95 (diff) |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (27 commits)
Btrfs: add more error checking to btrfs_dirty_inode
Btrfs: allow unaligned DIO
Btrfs: drop verbose enospc printk
Btrfs: Fix block generation verification race
Btrfs: fix preallocation and nodatacow checks in O_DIRECT
Btrfs: avoid ENOSPC errors in btrfs_dirty_inode
Btrfs: move O_DIRECT space reservation to btrfs_direct_IO
Btrfs: rework O_DIRECT enospc handling
Btrfs: use async helpers for DIO write checksumming
Btrfs: don't walk around with task->state != TASK_RUNNING
Btrfs: do aio_write instead of write
Btrfs: add basic DIO read/write support
direct-io: do not merge logically non-contiguous requests
direct-io: add a hook for the fs to provide its own submit_bio function
fs: allow short direct-io reads to be completed via buffered IO
Btrfs: Metadata ENOSPC handling for balance
Btrfs: Pre-allocate space for data relocation
Btrfs: Metadata ENOSPC handling for tree log
Btrfs: Metadata reservation for orphan inodes
Btrfs: Introduce global metadata reservation
...
31 files changed, 5066 insertions, 2740 deletions
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 462859a30141..7ec14097fef1 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
@@ -377,6 +377,7 @@ again: | |||
377 | if (!list_empty(&worker->pending) || | 377 | if (!list_empty(&worker->pending) || |
378 | !list_empty(&worker->prio_pending)) { | 378 | !list_empty(&worker->prio_pending)) { |
379 | spin_unlock_irq(&worker->lock); | 379 | spin_unlock_irq(&worker->lock); |
380 | set_current_state(TASK_RUNNING); | ||
380 | goto again; | 381 | goto again; |
381 | } | 382 | } |
382 | 383 | ||
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 7a4dee199832..6ad63f17eca0 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
@@ -137,8 +137,8 @@ struct btrfs_inode { | |||
137 | * of extent items we've reserved metadata for. | 137 | * of extent items we've reserved metadata for. |
138 | */ | 138 | */ |
139 | spinlock_t accounting_lock; | 139 | spinlock_t accounting_lock; |
140 | atomic_t outstanding_extents; | ||
140 | int reserved_extents; | 141 | int reserved_extents; |
141 | int outstanding_extents; | ||
142 | 142 | ||
143 | /* | 143 | /* |
144 | * ordered_data_close is set by truncate when a file that used | 144 | * ordered_data_close is set by truncate when a file that used |
@@ -151,6 +151,7 @@ struct btrfs_inode { | |||
151 | * of these. | 151 | * of these. |
152 | */ | 152 | */ |
153 | unsigned ordered_data_close:1; | 153 | unsigned ordered_data_close:1; |
154 | unsigned orphan_meta_reserved:1; | ||
154 | unsigned dummy_inode:1; | 155 | unsigned dummy_inode:1; |
155 | 156 | ||
156 | /* | 157 | /* |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 6795a713b205..0d1d966b0fe4 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
@@ -280,7 +280,8 @@ int btrfs_block_can_be_shared(struct btrfs_root *root, | |||
280 | static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, | 280 | static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, |
281 | struct btrfs_root *root, | 281 | struct btrfs_root *root, |
282 | struct extent_buffer *buf, | 282 | struct extent_buffer *buf, |
283 | struct extent_buffer *cow) | 283 | struct extent_buffer *cow, |
284 | int *last_ref) | ||
284 | { | 285 | { |
285 | u64 refs; | 286 | u64 refs; |
286 | u64 owner; | 287 | u64 owner; |
@@ -366,6 +367,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, | |||
366 | BUG_ON(ret); | 367 | BUG_ON(ret); |
367 | } | 368 | } |
368 | clean_tree_block(trans, root, buf); | 369 | clean_tree_block(trans, root, buf); |
370 | *last_ref = 1; | ||
369 | } | 371 | } |
370 | return 0; | 372 | return 0; |
371 | } | 373 | } |
@@ -392,6 +394,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
392 | struct btrfs_disk_key disk_key; | 394 | struct btrfs_disk_key disk_key; |
393 | struct extent_buffer *cow; | 395 | struct extent_buffer *cow; |
394 | int level; | 396 | int level; |
397 | int last_ref = 0; | ||
395 | int unlock_orig = 0; | 398 | int unlock_orig = 0; |
396 | u64 parent_start; | 399 | u64 parent_start; |
397 | 400 | ||
@@ -442,7 +445,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
442 | (unsigned long)btrfs_header_fsid(cow), | 445 | (unsigned long)btrfs_header_fsid(cow), |
443 | BTRFS_FSID_SIZE); | 446 | BTRFS_FSID_SIZE); |
444 | 447 | ||
445 | update_ref_for_cow(trans, root, buf, cow); | 448 | update_ref_for_cow(trans, root, buf, cow, &last_ref); |
449 | |||
450 | if (root->ref_cows) | ||
451 | btrfs_reloc_cow_block(trans, root, buf, cow); | ||
446 | 452 | ||
447 | if (buf == root->node) { | 453 | if (buf == root->node) { |
448 | WARN_ON(parent && parent != buf); | 454 | WARN_ON(parent && parent != buf); |
@@ -457,8 +463,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
457 | extent_buffer_get(cow); | 463 | extent_buffer_get(cow); |
458 | spin_unlock(&root->node_lock); | 464 | spin_unlock(&root->node_lock); |
459 | 465 | ||
460 | btrfs_free_tree_block(trans, root, buf->start, buf->len, | 466 | btrfs_free_tree_block(trans, root, buf, parent_start, |
461 | parent_start, root->root_key.objectid, level); | 467 | last_ref); |
462 | free_extent_buffer(buf); | 468 | free_extent_buffer(buf); |
463 | add_root_to_dirty_list(root); | 469 | add_root_to_dirty_list(root); |
464 | } else { | 470 | } else { |
@@ -473,8 +479,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
473 | btrfs_set_node_ptr_generation(parent, parent_slot, | 479 | btrfs_set_node_ptr_generation(parent, parent_slot, |
474 | trans->transid); | 480 | trans->transid); |
475 | btrfs_mark_buffer_dirty(parent); | 481 | btrfs_mark_buffer_dirty(parent); |
476 | btrfs_free_tree_block(trans, root, buf->start, buf->len, | 482 | btrfs_free_tree_block(trans, root, buf, parent_start, |
477 | parent_start, root->root_key.objectid, level); | 483 | last_ref); |
478 | } | 484 | } |
479 | if (unlock_orig) | 485 | if (unlock_orig) |
480 | btrfs_tree_unlock(buf); | 486 | btrfs_tree_unlock(buf); |
@@ -949,6 +955,22 @@ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | |||
949 | return bin_search(eb, key, level, slot); | 955 | return bin_search(eb, key, level, slot); |
950 | } | 956 | } |
951 | 957 | ||
958 | static void root_add_used(struct btrfs_root *root, u32 size) | ||
959 | { | ||
960 | spin_lock(&root->accounting_lock); | ||
961 | btrfs_set_root_used(&root->root_item, | ||
962 | btrfs_root_used(&root->root_item) + size); | ||
963 | spin_unlock(&root->accounting_lock); | ||
964 | } | ||
965 | |||
966 | static void root_sub_used(struct btrfs_root *root, u32 size) | ||
967 | { | ||
968 | spin_lock(&root->accounting_lock); | ||
969 | btrfs_set_root_used(&root->root_item, | ||
970 | btrfs_root_used(&root->root_item) - size); | ||
971 | spin_unlock(&root->accounting_lock); | ||
972 | } | ||
973 | |||
952 | /* given a node and slot number, this reads the blocks it points to. The | 974 | /* given a node and slot number, this reads the blocks it points to. The |
953 | * extent buffer is returned with a reference taken (but unlocked). | 975 | * extent buffer is returned with a reference taken (but unlocked). |
954 | * NULL is returned on error. | 976 | * NULL is returned on error. |
@@ -1019,7 +1041,11 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1019 | btrfs_tree_lock(child); | 1041 | btrfs_tree_lock(child); |
1020 | btrfs_set_lock_blocking(child); | 1042 | btrfs_set_lock_blocking(child); |
1021 | ret = btrfs_cow_block(trans, root, child, mid, 0, &child); | 1043 | ret = btrfs_cow_block(trans, root, child, mid, 0, &child); |
1022 | BUG_ON(ret); | 1044 | if (ret) { |
1045 | btrfs_tree_unlock(child); | ||
1046 | free_extent_buffer(child); | ||
1047 | goto enospc; | ||
1048 | } | ||
1023 | 1049 | ||
1024 | spin_lock(&root->node_lock); | 1050 | spin_lock(&root->node_lock); |
1025 | root->node = child; | 1051 | root->node = child; |
@@ -1034,11 +1060,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1034 | btrfs_tree_unlock(mid); | 1060 | btrfs_tree_unlock(mid); |
1035 | /* once for the path */ | 1061 | /* once for the path */ |
1036 | free_extent_buffer(mid); | 1062 | free_extent_buffer(mid); |
1037 | ret = btrfs_free_tree_block(trans, root, mid->start, mid->len, | 1063 | |
1038 | 0, root->root_key.objectid, level); | 1064 | root_sub_used(root, mid->len); |
1065 | btrfs_free_tree_block(trans, root, mid, 0, 1); | ||
1039 | /* once for the root ptr */ | 1066 | /* once for the root ptr */ |
1040 | free_extent_buffer(mid); | 1067 | free_extent_buffer(mid); |
1041 | return ret; | 1068 | return 0; |
1042 | } | 1069 | } |
1043 | if (btrfs_header_nritems(mid) > | 1070 | if (btrfs_header_nritems(mid) > |
1044 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) | 1071 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) |
@@ -1088,23 +1115,16 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1088 | if (wret < 0 && wret != -ENOSPC) | 1115 | if (wret < 0 && wret != -ENOSPC) |
1089 | ret = wret; | 1116 | ret = wret; |
1090 | if (btrfs_header_nritems(right) == 0) { | 1117 | if (btrfs_header_nritems(right) == 0) { |
1091 | u64 bytenr = right->start; | ||
1092 | u32 blocksize = right->len; | ||
1093 | |||
1094 | clean_tree_block(trans, root, right); | 1118 | clean_tree_block(trans, root, right); |
1095 | btrfs_tree_unlock(right); | 1119 | btrfs_tree_unlock(right); |
1096 | free_extent_buffer(right); | ||
1097 | right = NULL; | ||
1098 | wret = del_ptr(trans, root, path, level + 1, pslot + | 1120 | wret = del_ptr(trans, root, path, level + 1, pslot + |
1099 | 1); | 1121 | 1); |
1100 | if (wret) | 1122 | if (wret) |
1101 | ret = wret; | 1123 | ret = wret; |
1102 | wret = btrfs_free_tree_block(trans, root, | 1124 | root_sub_used(root, right->len); |
1103 | bytenr, blocksize, 0, | 1125 | btrfs_free_tree_block(trans, root, right, 0, 1); |
1104 | root->root_key.objectid, | 1126 | free_extent_buffer(right); |
1105 | level); | 1127 | right = NULL; |
1106 | if (wret) | ||
1107 | ret = wret; | ||
1108 | } else { | 1128 | } else { |
1109 | struct btrfs_disk_key right_key; | 1129 | struct btrfs_disk_key right_key; |
1110 | btrfs_node_key(right, &right_key, 0); | 1130 | btrfs_node_key(right, &right_key, 0); |
@@ -1136,21 +1156,15 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
1136 | BUG_ON(wret == 1); | 1156 | BUG_ON(wret == 1); |
1137 | } | 1157 | } |
1138 | if (btrfs_header_nritems(mid) == 0) { | 1158 | if (btrfs_header_nritems(mid) == 0) { |
1139 | /* we've managed to empty the middle node, drop it */ | ||
1140 | u64 bytenr = mid->start; | ||
1141 | u32 blocksize = mid->len; | ||
1142 | |||
1143 | clean_tree_block(trans, root, mid); | 1159 | clean_tree_block(trans, root, mid); |
1144 | btrfs_tree_unlock(mid); | 1160 | btrfs_tree_unlock(mid); |
1145 | free_extent_buffer(mid); | ||
1146 | mid = NULL; | ||
1147 | wret = del_ptr(trans, root, path, level + 1, pslot); | 1161 | wret = del_ptr(trans, root, path, level + 1, pslot); |
1148 | if (wret) | 1162 | if (wret) |
1149 | ret = wret; | 1163 | ret = wret; |
1150 | wret = btrfs_free_tree_block(trans, root, bytenr, blocksize, | 1164 | root_sub_used(root, mid->len); |
1151 | 0, root->root_key.objectid, level); | 1165 | btrfs_free_tree_block(trans, root, mid, 0, 1); |
1152 | if (wret) | 1166 | free_extent_buffer(mid); |
1153 | ret = wret; | 1167 | mid = NULL; |
1154 | } else { | 1168 | } else { |
1155 | /* update the parent key to reflect our changes */ | 1169 | /* update the parent key to reflect our changes */ |
1156 | struct btrfs_disk_key mid_key; | 1170 | struct btrfs_disk_key mid_key; |
@@ -1590,7 +1604,7 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
1590 | btrfs_release_path(NULL, p); | 1604 | btrfs_release_path(NULL, p); |
1591 | 1605 | ||
1592 | ret = -EAGAIN; | 1606 | ret = -EAGAIN; |
1593 | tmp = read_tree_block(root, blocknr, blocksize, gen); | 1607 | tmp = read_tree_block(root, blocknr, blocksize, 0); |
1594 | if (tmp) { | 1608 | if (tmp) { |
1595 | /* | 1609 | /* |
1596 | * If the read above didn't mark this buffer up to date, | 1610 | * If the read above didn't mark this buffer up to date, |
@@ -1740,7 +1754,6 @@ again: | |||
1740 | p->nodes[level + 1], | 1754 | p->nodes[level + 1], |
1741 | p->slots[level + 1], &b); | 1755 | p->slots[level + 1], &b); |
1742 | if (err) { | 1756 | if (err) { |
1743 | free_extent_buffer(b); | ||
1744 | ret = err; | 1757 | ret = err; |
1745 | goto done; | 1758 | goto done; |
1746 | } | 1759 | } |
@@ -2076,6 +2089,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
2076 | if (IS_ERR(c)) | 2089 | if (IS_ERR(c)) |
2077 | return PTR_ERR(c); | 2090 | return PTR_ERR(c); |
2078 | 2091 | ||
2092 | root_add_used(root, root->nodesize); | ||
2093 | |||
2079 | memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); | 2094 | memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); |
2080 | btrfs_set_header_nritems(c, 1); | 2095 | btrfs_set_header_nritems(c, 1); |
2081 | btrfs_set_header_level(c, level); | 2096 | btrfs_set_header_level(c, level); |
@@ -2134,6 +2149,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root | |||
2134 | int nritems; | 2149 | int nritems; |
2135 | 2150 | ||
2136 | BUG_ON(!path->nodes[level]); | 2151 | BUG_ON(!path->nodes[level]); |
2152 | btrfs_assert_tree_locked(path->nodes[level]); | ||
2137 | lower = path->nodes[level]; | 2153 | lower = path->nodes[level]; |
2138 | nritems = btrfs_header_nritems(lower); | 2154 | nritems = btrfs_header_nritems(lower); |
2139 | BUG_ON(slot > nritems); | 2155 | BUG_ON(slot > nritems); |
@@ -2202,6 +2218,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
2202 | if (IS_ERR(split)) | 2218 | if (IS_ERR(split)) |
2203 | return PTR_ERR(split); | 2219 | return PTR_ERR(split); |
2204 | 2220 | ||
2221 | root_add_used(root, root->nodesize); | ||
2222 | |||
2205 | memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header)); | 2223 | memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header)); |
2206 | btrfs_set_header_level(split, btrfs_header_level(c)); | 2224 | btrfs_set_header_level(split, btrfs_header_level(c)); |
2207 | btrfs_set_header_bytenr(split, split->start); | 2225 | btrfs_set_header_bytenr(split, split->start); |
@@ -2415,6 +2433,9 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | |||
2415 | 2433 | ||
2416 | if (left_nritems) | 2434 | if (left_nritems) |
2417 | btrfs_mark_buffer_dirty(left); | 2435 | btrfs_mark_buffer_dirty(left); |
2436 | else | ||
2437 | clean_tree_block(trans, root, left); | ||
2438 | |||
2418 | btrfs_mark_buffer_dirty(right); | 2439 | btrfs_mark_buffer_dirty(right); |
2419 | 2440 | ||
2420 | btrfs_item_key(right, &disk_key, 0); | 2441 | btrfs_item_key(right, &disk_key, 0); |
@@ -2660,6 +2681,8 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2660 | btrfs_mark_buffer_dirty(left); | 2681 | btrfs_mark_buffer_dirty(left); |
2661 | if (right_nritems) | 2682 | if (right_nritems) |
2662 | btrfs_mark_buffer_dirty(right); | 2683 | btrfs_mark_buffer_dirty(right); |
2684 | else | ||
2685 | clean_tree_block(trans, root, right); | ||
2663 | 2686 | ||
2664 | btrfs_item_key(right, &disk_key, 0); | 2687 | btrfs_item_key(right, &disk_key, 0); |
2665 | wret = fixup_low_keys(trans, root, path, &disk_key, 1); | 2688 | wret = fixup_low_keys(trans, root, path, &disk_key, 1); |
@@ -2669,8 +2692,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
2669 | /* then fixup the leaf pointer in the path */ | 2692 | /* then fixup the leaf pointer in the path */ |
2670 | if (path->slots[0] < push_items) { | 2693 | if (path->slots[0] < push_items) { |
2671 | path->slots[0] += old_left_nritems; | 2694 | path->slots[0] += old_left_nritems; |
2672 | if (btrfs_header_nritems(path->nodes[0]) == 0) | ||
2673 | clean_tree_block(trans, root, path->nodes[0]); | ||
2674 | btrfs_tree_unlock(path->nodes[0]); | 2695 | btrfs_tree_unlock(path->nodes[0]); |
2675 | free_extent_buffer(path->nodes[0]); | 2696 | free_extent_buffer(path->nodes[0]); |
2676 | path->nodes[0] = left; | 2697 | path->nodes[0] = left; |
@@ -2932,10 +2953,10 @@ again: | |||
2932 | right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, | 2953 | right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, |
2933 | root->root_key.objectid, | 2954 | root->root_key.objectid, |
2934 | &disk_key, 0, l->start, 0); | 2955 | &disk_key, 0, l->start, 0); |
2935 | if (IS_ERR(right)) { | 2956 | if (IS_ERR(right)) |
2936 | BUG_ON(1); | ||
2937 | return PTR_ERR(right); | 2957 | return PTR_ERR(right); |
2938 | } | 2958 | |
2959 | root_add_used(root, root->leafsize); | ||
2939 | 2960 | ||
2940 | memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); | 2961 | memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); |
2941 | btrfs_set_header_bytenr(right, right->start); | 2962 | btrfs_set_header_bytenr(right, right->start); |
@@ -3054,7 +3075,8 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, | |||
3054 | 3075 | ||
3055 | btrfs_set_path_blocking(path); | 3076 | btrfs_set_path_blocking(path); |
3056 | ret = split_leaf(trans, root, &key, path, ins_len, 1); | 3077 | ret = split_leaf(trans, root, &key, path, ins_len, 1); |
3057 | BUG_ON(ret); | 3078 | if (ret) |
3079 | goto err; | ||
3058 | 3080 | ||
3059 | path->keep_locks = 0; | 3081 | path->keep_locks = 0; |
3060 | btrfs_unlock_up_safe(path, 1); | 3082 | btrfs_unlock_up_safe(path, 1); |
@@ -3796,9 +3818,10 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, | |||
3796 | */ | 3818 | */ |
3797 | btrfs_unlock_up_safe(path, 0); | 3819 | btrfs_unlock_up_safe(path, 0); |
3798 | 3820 | ||
3799 | ret = btrfs_free_tree_block(trans, root, leaf->start, leaf->len, | 3821 | root_sub_used(root, leaf->len); |
3800 | 0, root->root_key.objectid, 0); | 3822 | |
3801 | return ret; | 3823 | btrfs_free_tree_block(trans, root, leaf, 0, 1); |
3824 | return 0; | ||
3802 | } | 3825 | } |
3803 | /* | 3826 | /* |
3804 | * delete the item at the leaf level in path. If that empties | 3827 | * delete the item at the leaf level in path. If that empties |
@@ -3865,6 +3888,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
3865 | if (leaf == root->node) { | 3888 | if (leaf == root->node) { |
3866 | btrfs_set_header_level(leaf, 0); | 3889 | btrfs_set_header_level(leaf, 0); |
3867 | } else { | 3890 | } else { |
3891 | btrfs_set_path_blocking(path); | ||
3892 | clean_tree_block(trans, root, leaf); | ||
3868 | ret = btrfs_del_leaf(trans, root, path, leaf); | 3893 | ret = btrfs_del_leaf(trans, root, path, leaf); |
3869 | BUG_ON(ret); | 3894 | BUG_ON(ret); |
3870 | } | 3895 | } |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 746a7248678e..e9bf86415e86 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -34,6 +34,7 @@ | |||
34 | 34 | ||
35 | struct btrfs_trans_handle; | 35 | struct btrfs_trans_handle; |
36 | struct btrfs_transaction; | 36 | struct btrfs_transaction; |
37 | struct btrfs_pending_snapshot; | ||
37 | extern struct kmem_cache *btrfs_trans_handle_cachep; | 38 | extern struct kmem_cache *btrfs_trans_handle_cachep; |
38 | extern struct kmem_cache *btrfs_transaction_cachep; | 39 | extern struct kmem_cache *btrfs_transaction_cachep; |
39 | extern struct kmem_cache *btrfs_bit_radix_cachep; | 40 | extern struct kmem_cache *btrfs_bit_radix_cachep; |
@@ -663,6 +664,7 @@ struct btrfs_csum_item { | |||
663 | #define BTRFS_BLOCK_GROUP_RAID1 (1 << 4) | 664 | #define BTRFS_BLOCK_GROUP_RAID1 (1 << 4) |
664 | #define BTRFS_BLOCK_GROUP_DUP (1 << 5) | 665 | #define BTRFS_BLOCK_GROUP_DUP (1 << 5) |
665 | #define BTRFS_BLOCK_GROUP_RAID10 (1 << 6) | 666 | #define BTRFS_BLOCK_GROUP_RAID10 (1 << 6) |
667 | #define BTRFS_NR_RAID_TYPES 5 | ||
666 | 668 | ||
667 | struct btrfs_block_group_item { | 669 | struct btrfs_block_group_item { |
668 | __le64 used; | 670 | __le64 used; |
@@ -674,42 +676,46 @@ struct btrfs_space_info { | |||
674 | u64 flags; | 676 | u64 flags; |
675 | 677 | ||
676 | u64 total_bytes; /* total bytes in the space */ | 678 | u64 total_bytes; /* total bytes in the space */ |
677 | u64 bytes_used; /* total bytes used on disk */ | 679 | u64 bytes_used; /* total bytes used, |
680 | this does't take mirrors into account */ | ||
678 | u64 bytes_pinned; /* total bytes pinned, will be freed when the | 681 | u64 bytes_pinned; /* total bytes pinned, will be freed when the |
679 | transaction finishes */ | 682 | transaction finishes */ |
680 | u64 bytes_reserved; /* total bytes the allocator has reserved for | 683 | u64 bytes_reserved; /* total bytes the allocator has reserved for |
681 | current allocations */ | 684 | current allocations */ |
682 | u64 bytes_readonly; /* total bytes that are read only */ | 685 | u64 bytes_readonly; /* total bytes that are read only */ |
683 | u64 bytes_super; /* total bytes reserved for the super blocks */ | 686 | |
684 | u64 bytes_root; /* the number of bytes needed to commit a | ||
685 | transaction */ | ||
686 | u64 bytes_may_use; /* number of bytes that may be used for | 687 | u64 bytes_may_use; /* number of bytes that may be used for |
687 | delalloc/allocations */ | 688 | delalloc/allocations */ |
688 | u64 bytes_delalloc; /* number of bytes currently reserved for | 689 | u64 disk_used; /* total bytes used on disk */ |
689 | delayed allocation */ | ||
690 | 690 | ||
691 | int full; /* indicates that we cannot allocate any more | 691 | int full; /* indicates that we cannot allocate any more |
692 | chunks for this space */ | 692 | chunks for this space */ |
693 | int force_alloc; /* set if we need to force a chunk alloc for | 693 | int force_alloc; /* set if we need to force a chunk alloc for |
694 | this space */ | 694 | this space */ |
695 | int force_delalloc; /* make people start doing filemap_flush until | ||
696 | we're under a threshold */ | ||
697 | 695 | ||
698 | struct list_head list; | 696 | struct list_head list; |
699 | 697 | ||
700 | /* for controlling how we free up space for allocations */ | ||
701 | wait_queue_head_t allocate_wait; | ||
702 | wait_queue_head_t flush_wait; | ||
703 | int allocating_chunk; | ||
704 | int flushing; | ||
705 | |||
706 | /* for block groups in our same type */ | 698 | /* for block groups in our same type */ |
707 | struct list_head block_groups; | 699 | struct list_head block_groups[BTRFS_NR_RAID_TYPES]; |
708 | spinlock_t lock; | 700 | spinlock_t lock; |
709 | struct rw_semaphore groups_sem; | 701 | struct rw_semaphore groups_sem; |
710 | atomic_t caching_threads; | 702 | atomic_t caching_threads; |
711 | }; | 703 | }; |
712 | 704 | ||
705 | struct btrfs_block_rsv { | ||
706 | u64 size; | ||
707 | u64 reserved; | ||
708 | u64 freed[2]; | ||
709 | struct btrfs_space_info *space_info; | ||
710 | struct list_head list; | ||
711 | spinlock_t lock; | ||
712 | atomic_t usage; | ||
713 | unsigned int priority:8; | ||
714 | unsigned int durable:1; | ||
715 | unsigned int refill_used:1; | ||
716 | unsigned int full:1; | ||
717 | }; | ||
718 | |||
713 | /* | 719 | /* |
714 | * free clusters are used to claim free space in relatively large chunks, | 720 | * free clusters are used to claim free space in relatively large chunks, |
715 | * allowing us to do less seeky writes. They are used for all metadata | 721 | * allowing us to do less seeky writes. They are used for all metadata |
@@ -760,6 +766,7 @@ struct btrfs_block_group_cache { | |||
760 | spinlock_t lock; | 766 | spinlock_t lock; |
761 | u64 pinned; | 767 | u64 pinned; |
762 | u64 reserved; | 768 | u64 reserved; |
769 | u64 reserved_pinned; | ||
763 | u64 bytes_super; | 770 | u64 bytes_super; |
764 | u64 flags; | 771 | u64 flags; |
765 | u64 sectorsize; | 772 | u64 sectorsize; |
@@ -825,6 +832,22 @@ struct btrfs_fs_info { | |||
825 | /* logical->physical extent mapping */ | 832 | /* logical->physical extent mapping */ |
826 | struct btrfs_mapping_tree mapping_tree; | 833 | struct btrfs_mapping_tree mapping_tree; |
827 | 834 | ||
835 | /* block reservation for extent, checksum and root tree */ | ||
836 | struct btrfs_block_rsv global_block_rsv; | ||
837 | /* block reservation for delay allocation */ | ||
838 | struct btrfs_block_rsv delalloc_block_rsv; | ||
839 | /* block reservation for metadata operations */ | ||
840 | struct btrfs_block_rsv trans_block_rsv; | ||
841 | /* block reservation for chunk tree */ | ||
842 | struct btrfs_block_rsv chunk_block_rsv; | ||
843 | |||
844 | struct btrfs_block_rsv empty_block_rsv; | ||
845 | |||
846 | /* list of block reservations that cross multiple transactions */ | ||
847 | struct list_head durable_block_rsv_list; | ||
848 | |||
849 | struct mutex durable_block_rsv_mutex; | ||
850 | |||
828 | u64 generation; | 851 | u64 generation; |
829 | u64 last_trans_committed; | 852 | u64 last_trans_committed; |
830 | 853 | ||
@@ -927,7 +950,6 @@ struct btrfs_fs_info { | |||
927 | struct btrfs_workers endio_meta_write_workers; | 950 | struct btrfs_workers endio_meta_write_workers; |
928 | struct btrfs_workers endio_write_workers; | 951 | struct btrfs_workers endio_write_workers; |
929 | struct btrfs_workers submit_workers; | 952 | struct btrfs_workers submit_workers; |
930 | struct btrfs_workers enospc_workers; | ||
931 | /* | 953 | /* |
932 | * fixup workers take dirty pages that didn't properly go through | 954 | * fixup workers take dirty pages that didn't properly go through |
933 | * the cow mechanism and make them safe to write. It happens | 955 | * the cow mechanism and make them safe to write. It happens |
@@ -943,6 +965,7 @@ struct btrfs_fs_info { | |||
943 | int do_barriers; | 965 | int do_barriers; |
944 | int closing; | 966 | int closing; |
945 | int log_root_recovering; | 967 | int log_root_recovering; |
968 | int enospc_unlink; | ||
946 | 969 | ||
947 | u64 total_pinned; | 970 | u64 total_pinned; |
948 | 971 | ||
@@ -1012,6 +1035,9 @@ struct btrfs_root { | |||
1012 | struct completion kobj_unregister; | 1035 | struct completion kobj_unregister; |
1013 | struct mutex objectid_mutex; | 1036 | struct mutex objectid_mutex; |
1014 | 1037 | ||
1038 | spinlock_t accounting_lock; | ||
1039 | struct btrfs_block_rsv *block_rsv; | ||
1040 | |||
1015 | struct mutex log_mutex; | 1041 | struct mutex log_mutex; |
1016 | wait_queue_head_t log_writer_wait; | 1042 | wait_queue_head_t log_writer_wait; |
1017 | wait_queue_head_t log_commit_wait[2]; | 1043 | wait_queue_head_t log_commit_wait[2]; |
@@ -1043,7 +1069,6 @@ struct btrfs_root { | |||
1043 | int ref_cows; | 1069 | int ref_cows; |
1044 | int track_dirty; | 1070 | int track_dirty; |
1045 | int in_radix; | 1071 | int in_radix; |
1046 | int clean_orphans; | ||
1047 | 1072 | ||
1048 | u64 defrag_trans_start; | 1073 | u64 defrag_trans_start; |
1049 | struct btrfs_key defrag_progress; | 1074 | struct btrfs_key defrag_progress; |
@@ -1057,8 +1082,11 @@ struct btrfs_root { | |||
1057 | 1082 | ||
1058 | struct list_head root_list; | 1083 | struct list_head root_list; |
1059 | 1084 | ||
1060 | spinlock_t list_lock; | 1085 | spinlock_t orphan_lock; |
1061 | struct list_head orphan_list; | 1086 | struct list_head orphan_list; |
1087 | struct btrfs_block_rsv *orphan_block_rsv; | ||
1088 | int orphan_item_inserted; | ||
1089 | int orphan_cleanup_state; | ||
1062 | 1090 | ||
1063 | spinlock_t inode_lock; | 1091 | spinlock_t inode_lock; |
1064 | /* red-black tree that keeps track of in-memory inodes */ | 1092 | /* red-black tree that keeps track of in-memory inodes */ |
@@ -1965,6 +1993,9 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache); | |||
1965 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | 1993 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, |
1966 | struct btrfs_root *root, unsigned long count); | 1994 | struct btrfs_root *root, unsigned long count); |
1967 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); | 1995 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); |
1996 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | ||
1997 | struct btrfs_root *root, u64 bytenr, | ||
1998 | u64 num_bytes, u64 *refs, u64 *flags); | ||
1968 | int btrfs_pin_extent(struct btrfs_root *root, | 1999 | int btrfs_pin_extent(struct btrfs_root *root, |
1969 | u64 bytenr, u64 num, int reserved); | 2000 | u64 bytenr, u64 num, int reserved); |
1970 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | 2001 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, |
@@ -1984,10 +2015,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
1984 | u64 parent, u64 root_objectid, | 2015 | u64 parent, u64 root_objectid, |
1985 | struct btrfs_disk_key *key, int level, | 2016 | struct btrfs_disk_key *key, int level, |
1986 | u64 hint, u64 empty_size); | 2017 | u64 hint, u64 empty_size); |
1987 | int btrfs_free_tree_block(struct btrfs_trans_handle *trans, | 2018 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, |
1988 | struct btrfs_root *root, | 2019 | struct btrfs_root *root, |
1989 | u64 bytenr, u32 blocksize, | 2020 | struct extent_buffer *buf, |
1990 | u64 parent, u64 root_objectid, int level); | 2021 | u64 parent, int last_ref); |
1991 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | 2022 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, |
1992 | struct btrfs_root *root, | 2023 | struct btrfs_root *root, |
1993 | u64 bytenr, u32 blocksize, | 2024 | u64 bytenr, u32 blocksize, |
@@ -2041,27 +2072,49 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
2041 | u64 size); | 2072 | u64 size); |
2042 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | 2073 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, |
2043 | struct btrfs_root *root, u64 group_start); | 2074 | struct btrfs_root *root, u64 group_start); |
2044 | int btrfs_prepare_block_group_relocation(struct btrfs_root *root, | ||
2045 | struct btrfs_block_group_cache *group); | ||
2046 | |||
2047 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); | 2075 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); |
2048 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); | 2076 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); |
2049 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 2077 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
2050 | 2078 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); | |
2051 | int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items); | 2079 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); |
2052 | int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items); | 2080 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, |
2053 | int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, | 2081 | struct btrfs_root *root, |
2054 | struct inode *inode, int num_items); | 2082 | int num_items, int *retries); |
2055 | int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, | 2083 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, |
2056 | struct inode *inode, int num_items); | 2084 | struct btrfs_root *root); |
2057 | int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | 2085 | int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, |
2058 | u64 bytes); | 2086 | struct inode *inode); |
2059 | void btrfs_free_reserved_data_space(struct btrfs_root *root, | 2087 | void btrfs_orphan_release_metadata(struct inode *inode); |
2060 | struct inode *inode, u64 bytes); | 2088 | int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, |
2061 | void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, | 2089 | struct btrfs_pending_snapshot *pending); |
2062 | u64 bytes); | 2090 | int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes); |
2063 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, | 2091 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes); |
2064 | u64 bytes); | 2092 | int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes); |
2093 | void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes); | ||
2094 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv); | ||
2095 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root); | ||
2096 | void btrfs_free_block_rsv(struct btrfs_root *root, | ||
2097 | struct btrfs_block_rsv *rsv); | ||
2098 | void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, | ||
2099 | struct btrfs_block_rsv *rsv); | ||
2100 | int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, | ||
2101 | struct btrfs_root *root, | ||
2102 | struct btrfs_block_rsv *block_rsv, | ||
2103 | u64 num_bytes, int *retries); | ||
2104 | int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | ||
2105 | struct btrfs_root *root, | ||
2106 | struct btrfs_block_rsv *block_rsv, | ||
2107 | u64 min_reserved, int min_factor); | ||
2108 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | ||
2109 | struct btrfs_block_rsv *dst_rsv, | ||
2110 | u64 num_bytes); | ||
2111 | void btrfs_block_rsv_release(struct btrfs_root *root, | ||
2112 | struct btrfs_block_rsv *block_rsv, | ||
2113 | u64 num_bytes); | ||
2114 | int btrfs_set_block_group_ro(struct btrfs_root *root, | ||
2115 | struct btrfs_block_group_cache *cache); | ||
2116 | int btrfs_set_block_group_rw(struct btrfs_root *root, | ||
2117 | struct btrfs_block_group_cache *cache); | ||
2065 | /* ctree.c */ | 2118 | /* ctree.c */ |
2066 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 2119 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
2067 | int level, int *slot); | 2120 | int level, int *slot); |
@@ -2152,7 +2205,8 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, | |||
2152 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); | 2205 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); |
2153 | int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); | 2206 | int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); |
2154 | int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); | 2207 | int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); |
2155 | int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref); | 2208 | int btrfs_drop_snapshot(struct btrfs_root *root, |
2209 | struct btrfs_block_rsv *block_rsv, int update_ref); | ||
2156 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | 2210 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, |
2157 | struct btrfs_root *root, | 2211 | struct btrfs_root *root, |
2158 | struct extent_buffer *node, | 2212 | struct extent_buffer *node, |
@@ -2245,6 +2299,12 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | |||
2245 | struct btrfs_root *root, | 2299 | struct btrfs_root *root, |
2246 | const char *name, int name_len, | 2300 | const char *name, int name_len, |
2247 | u64 inode_objectid, u64 ref_objectid, u64 *index); | 2301 | u64 inode_objectid, u64 ref_objectid, u64 *index); |
2302 | struct btrfs_inode_ref * | ||
2303 | btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans, | ||
2304 | struct btrfs_root *root, | ||
2305 | struct btrfs_path *path, | ||
2306 | const char *name, int name_len, | ||
2307 | u64 inode_objectid, u64 ref_objectid, int mod); | ||
2248 | int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, | 2308 | int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, |
2249 | struct btrfs_root *root, | 2309 | struct btrfs_root *root, |
2250 | struct btrfs_path *path, u64 objectid); | 2310 | struct btrfs_path *path, u64 objectid); |
@@ -2257,6 +2317,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, | |||
2257 | struct btrfs_root *root, u64 bytenr, u64 len); | 2317 | struct btrfs_root *root, u64 bytenr, u64 len); |
2258 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | 2318 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, |
2259 | struct bio *bio, u32 *dst); | 2319 | struct bio *bio, u32 *dst); |
2320 | int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, | ||
2321 | struct bio *bio, u64 logical_offset, u32 *dst); | ||
2260 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | 2322 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, |
2261 | struct btrfs_root *root, | 2323 | struct btrfs_root *root, |
2262 | u64 objectid, u64 pos, | 2324 | u64 objectid, u64 pos, |
@@ -2311,6 +2373,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
2311 | u32 min_type); | 2373 | u32 min_type); |
2312 | 2374 | ||
2313 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); | 2375 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); |
2376 | int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput); | ||
2314 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, | 2377 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, |
2315 | struct extent_state **cached_state); | 2378 | struct extent_state **cached_state); |
2316 | int btrfs_writepages(struct address_space *mapping, | 2379 | int btrfs_writepages(struct address_space *mapping, |
@@ -2349,10 +2412,20 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, | |||
2349 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); | 2412 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); |
2350 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); | 2413 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); |
2351 | void btrfs_orphan_cleanup(struct btrfs_root *root); | 2414 | void btrfs_orphan_cleanup(struct btrfs_root *root); |
2415 | void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, | ||
2416 | struct btrfs_pending_snapshot *pending, | ||
2417 | u64 *bytes_to_reserve); | ||
2418 | void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, | ||
2419 | struct btrfs_pending_snapshot *pending); | ||
2420 | void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, | ||
2421 | struct btrfs_root *root); | ||
2352 | int btrfs_cont_expand(struct inode *inode, loff_t size); | 2422 | int btrfs_cont_expand(struct inode *inode, loff_t size); |
2353 | int btrfs_invalidate_inodes(struct btrfs_root *root); | 2423 | int btrfs_invalidate_inodes(struct btrfs_root *root); |
2354 | void btrfs_add_delayed_iput(struct inode *inode); | 2424 | void btrfs_add_delayed_iput(struct inode *inode); |
2355 | void btrfs_run_delayed_iputs(struct btrfs_root *root); | 2425 | void btrfs_run_delayed_iputs(struct btrfs_root *root); |
2426 | int btrfs_prealloc_file_range(struct inode *inode, int mode, | ||
2427 | u64 start, u64 num_bytes, u64 min_size, | ||
2428 | loff_t actual_len, u64 *alloc_hint); | ||
2356 | extern const struct dentry_operations btrfs_dentry_operations; | 2429 | extern const struct dentry_operations btrfs_dentry_operations; |
2357 | 2430 | ||
2358 | /* ioctl.c */ | 2431 | /* ioctl.c */ |
@@ -2409,4 +2482,12 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, | |||
2409 | struct btrfs_root *root); | 2482 | struct btrfs_root *root); |
2410 | int btrfs_recover_relocation(struct btrfs_root *root); | 2483 | int btrfs_recover_relocation(struct btrfs_root *root); |
2411 | int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); | 2484 | int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); |
2485 | void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, | ||
2486 | struct btrfs_root *root, struct extent_buffer *buf, | ||
2487 | struct extent_buffer *cow); | ||
2488 | void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans, | ||
2489 | struct btrfs_pending_snapshot *pending, | ||
2490 | u64 *bytes_to_reserve); | ||
2491 | void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, | ||
2492 | struct btrfs_pending_snapshot *pending); | ||
2412 | #endif | 2493 | #endif |
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 902ce507c4e3..e807b143b857 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
@@ -319,107 +319,6 @@ out: | |||
319 | } | 319 | } |
320 | 320 | ||
321 | /* | 321 | /* |
322 | * helper function to lookup reference count and flags of extent. | ||
323 | * | ||
324 | * the head node for delayed ref is used to store the sum of all the | ||
325 | * reference count modifications queued up in the rbtree. the head | ||
326 | * node may also store the extent flags to set. This way you can check | ||
327 | * to see what the reference count and extent flags would be if all of | ||
328 | * the delayed refs are not processed. | ||
329 | */ | ||
330 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | ||
331 | struct btrfs_root *root, u64 bytenr, | ||
332 | u64 num_bytes, u64 *refs, u64 *flags) | ||
333 | { | ||
334 | struct btrfs_delayed_ref_node *ref; | ||
335 | struct btrfs_delayed_ref_head *head; | ||
336 | struct btrfs_delayed_ref_root *delayed_refs; | ||
337 | struct btrfs_path *path; | ||
338 | struct btrfs_extent_item *ei; | ||
339 | struct extent_buffer *leaf; | ||
340 | struct btrfs_key key; | ||
341 | u32 item_size; | ||
342 | u64 num_refs; | ||
343 | u64 extent_flags; | ||
344 | int ret; | ||
345 | |||
346 | path = btrfs_alloc_path(); | ||
347 | if (!path) | ||
348 | return -ENOMEM; | ||
349 | |||
350 | key.objectid = bytenr; | ||
351 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
352 | key.offset = num_bytes; | ||
353 | delayed_refs = &trans->transaction->delayed_refs; | ||
354 | again: | ||
355 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, | ||
356 | &key, path, 0, 0); | ||
357 | if (ret < 0) | ||
358 | goto out; | ||
359 | |||
360 | if (ret == 0) { | ||
361 | leaf = path->nodes[0]; | ||
362 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
363 | if (item_size >= sizeof(*ei)) { | ||
364 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
365 | struct btrfs_extent_item); | ||
366 | num_refs = btrfs_extent_refs(leaf, ei); | ||
367 | extent_flags = btrfs_extent_flags(leaf, ei); | ||
368 | } else { | ||
369 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
370 | struct btrfs_extent_item_v0 *ei0; | ||
371 | BUG_ON(item_size != sizeof(*ei0)); | ||
372 | ei0 = btrfs_item_ptr(leaf, path->slots[0], | ||
373 | struct btrfs_extent_item_v0); | ||
374 | num_refs = btrfs_extent_refs_v0(leaf, ei0); | ||
375 | /* FIXME: this isn't correct for data */ | ||
376 | extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
377 | #else | ||
378 | BUG(); | ||
379 | #endif | ||
380 | } | ||
381 | BUG_ON(num_refs == 0); | ||
382 | } else { | ||
383 | num_refs = 0; | ||
384 | extent_flags = 0; | ||
385 | ret = 0; | ||
386 | } | ||
387 | |||
388 | spin_lock(&delayed_refs->lock); | ||
389 | ref = find_ref_head(&delayed_refs->root, bytenr, NULL); | ||
390 | if (ref) { | ||
391 | head = btrfs_delayed_node_to_head(ref); | ||
392 | if (!mutex_trylock(&head->mutex)) { | ||
393 | atomic_inc(&ref->refs); | ||
394 | spin_unlock(&delayed_refs->lock); | ||
395 | |||
396 | btrfs_release_path(root->fs_info->extent_root, path); | ||
397 | |||
398 | mutex_lock(&head->mutex); | ||
399 | mutex_unlock(&head->mutex); | ||
400 | btrfs_put_delayed_ref(ref); | ||
401 | goto again; | ||
402 | } | ||
403 | if (head->extent_op && head->extent_op->update_flags) | ||
404 | extent_flags |= head->extent_op->flags_to_set; | ||
405 | else | ||
406 | BUG_ON(num_refs == 0); | ||
407 | |||
408 | num_refs += ref->ref_mod; | ||
409 | mutex_unlock(&head->mutex); | ||
410 | } | ||
411 | WARN_ON(num_refs == 0); | ||
412 | if (refs) | ||
413 | *refs = num_refs; | ||
414 | if (flags) | ||
415 | *flags = extent_flags; | ||
416 | out: | ||
417 | spin_unlock(&delayed_refs->lock); | ||
418 | btrfs_free_path(path); | ||
419 | return ret; | ||
420 | } | ||
421 | |||
422 | /* | ||
423 | * helper function to update an extent delayed ref in the | 322 | * helper function to update an extent delayed ref in the |
424 | * rbtree. existing and update must both have the same | 323 | * rbtree. existing and update must both have the same |
425 | * bytenr and parent | 324 | * bytenr and parent |
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index f6fc67ddad36..50e3cf92fbda 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h | |||
@@ -167,9 +167,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, | |||
167 | struct btrfs_delayed_ref_head * | 167 | struct btrfs_delayed_ref_head * |
168 | btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); | 168 | btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); |
169 | int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr); | 169 | int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr); |
170 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | ||
171 | struct btrfs_root *root, u64 bytenr, | ||
172 | u64 num_bytes, u64 *refs, u64 *flags); | ||
173 | int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, | 170 | int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, |
174 | u64 bytenr, u64 num_bytes, u64 orig_parent, | 171 | u64 bytenr, u64 num_bytes, u64 orig_parent, |
175 | u64 parent, u64 orig_ref_root, u64 ref_root, | 172 | u64 parent, u64 orig_ref_root, u64 ref_root, |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index feca04197d02..f3b287c22caf 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -74,6 +74,11 @@ struct async_submit_bio { | |||
74 | int rw; | 74 | int rw; |
75 | int mirror_num; | 75 | int mirror_num; |
76 | unsigned long bio_flags; | 76 | unsigned long bio_flags; |
77 | /* | ||
78 | * bio_offset is optional, can be used if the pages in the bio | ||
79 | * can't tell us where in the file the bio should go | ||
80 | */ | ||
81 | u64 bio_offset; | ||
77 | struct btrfs_work work; | 82 | struct btrfs_work work; |
78 | }; | 83 | }; |
79 | 84 | ||
@@ -534,7 +539,8 @@ static void run_one_async_start(struct btrfs_work *work) | |||
534 | async = container_of(work, struct async_submit_bio, work); | 539 | async = container_of(work, struct async_submit_bio, work); |
535 | fs_info = BTRFS_I(async->inode)->root->fs_info; | 540 | fs_info = BTRFS_I(async->inode)->root->fs_info; |
536 | async->submit_bio_start(async->inode, async->rw, async->bio, | 541 | async->submit_bio_start(async->inode, async->rw, async->bio, |
537 | async->mirror_num, async->bio_flags); | 542 | async->mirror_num, async->bio_flags, |
543 | async->bio_offset); | ||
538 | } | 544 | } |
539 | 545 | ||
540 | static void run_one_async_done(struct btrfs_work *work) | 546 | static void run_one_async_done(struct btrfs_work *work) |
@@ -556,7 +562,8 @@ static void run_one_async_done(struct btrfs_work *work) | |||
556 | wake_up(&fs_info->async_submit_wait); | 562 | wake_up(&fs_info->async_submit_wait); |
557 | 563 | ||
558 | async->submit_bio_done(async->inode, async->rw, async->bio, | 564 | async->submit_bio_done(async->inode, async->rw, async->bio, |
559 | async->mirror_num, async->bio_flags); | 565 | async->mirror_num, async->bio_flags, |
566 | async->bio_offset); | ||
560 | } | 567 | } |
561 | 568 | ||
562 | static void run_one_async_free(struct btrfs_work *work) | 569 | static void run_one_async_free(struct btrfs_work *work) |
@@ -570,6 +577,7 @@ static void run_one_async_free(struct btrfs_work *work) | |||
570 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | 577 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, |
571 | int rw, struct bio *bio, int mirror_num, | 578 | int rw, struct bio *bio, int mirror_num, |
572 | unsigned long bio_flags, | 579 | unsigned long bio_flags, |
580 | u64 bio_offset, | ||
573 | extent_submit_bio_hook_t *submit_bio_start, | 581 | extent_submit_bio_hook_t *submit_bio_start, |
574 | extent_submit_bio_hook_t *submit_bio_done) | 582 | extent_submit_bio_hook_t *submit_bio_done) |
575 | { | 583 | { |
@@ -592,6 +600,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
592 | 600 | ||
593 | async->work.flags = 0; | 601 | async->work.flags = 0; |
594 | async->bio_flags = bio_flags; | 602 | async->bio_flags = bio_flags; |
603 | async->bio_offset = bio_offset; | ||
595 | 604 | ||
596 | atomic_inc(&fs_info->nr_async_submits); | 605 | atomic_inc(&fs_info->nr_async_submits); |
597 | 606 | ||
@@ -627,7 +636,8 @@ static int btree_csum_one_bio(struct bio *bio) | |||
627 | 636 | ||
628 | static int __btree_submit_bio_start(struct inode *inode, int rw, | 637 | static int __btree_submit_bio_start(struct inode *inode, int rw, |
629 | struct bio *bio, int mirror_num, | 638 | struct bio *bio, int mirror_num, |
630 | unsigned long bio_flags) | 639 | unsigned long bio_flags, |
640 | u64 bio_offset) | ||
631 | { | 641 | { |
632 | /* | 642 | /* |
633 | * when we're called for a write, we're already in the async | 643 | * when we're called for a write, we're already in the async |
@@ -638,7 +648,8 @@ static int __btree_submit_bio_start(struct inode *inode, int rw, | |||
638 | } | 648 | } |
639 | 649 | ||
640 | static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | 650 | static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, |
641 | int mirror_num, unsigned long bio_flags) | 651 | int mirror_num, unsigned long bio_flags, |
652 | u64 bio_offset) | ||
642 | { | 653 | { |
643 | /* | 654 | /* |
644 | * when we're called for a write, we're already in the async | 655 | * when we're called for a write, we're already in the async |
@@ -648,7 +659,8 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | |||
648 | } | 659 | } |
649 | 660 | ||
650 | static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | 661 | static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, |
651 | int mirror_num, unsigned long bio_flags) | 662 | int mirror_num, unsigned long bio_flags, |
663 | u64 bio_offset) | ||
652 | { | 664 | { |
653 | int ret; | 665 | int ret; |
654 | 666 | ||
@@ -671,6 +683,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
671 | */ | 683 | */ |
672 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | 684 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, |
673 | inode, rw, bio, mirror_num, 0, | 685 | inode, rw, bio, mirror_num, 0, |
686 | bio_offset, | ||
674 | __btree_submit_bio_start, | 687 | __btree_submit_bio_start, |
675 | __btree_submit_bio_done); | 688 | __btree_submit_bio_done); |
676 | } | 689 | } |
@@ -894,7 +907,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
894 | root->ref_cows = 0; | 907 | root->ref_cows = 0; |
895 | root->track_dirty = 0; | 908 | root->track_dirty = 0; |
896 | root->in_radix = 0; | 909 | root->in_radix = 0; |
897 | root->clean_orphans = 0; | 910 | root->orphan_item_inserted = 0; |
911 | root->orphan_cleanup_state = 0; | ||
898 | 912 | ||
899 | root->fs_info = fs_info; | 913 | root->fs_info = fs_info; |
900 | root->objectid = objectid; | 914 | root->objectid = objectid; |
@@ -903,13 +917,16 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
903 | root->name = NULL; | 917 | root->name = NULL; |
904 | root->in_sysfs = 0; | 918 | root->in_sysfs = 0; |
905 | root->inode_tree = RB_ROOT; | 919 | root->inode_tree = RB_ROOT; |
920 | root->block_rsv = NULL; | ||
921 | root->orphan_block_rsv = NULL; | ||
906 | 922 | ||
907 | INIT_LIST_HEAD(&root->dirty_list); | 923 | INIT_LIST_HEAD(&root->dirty_list); |
908 | INIT_LIST_HEAD(&root->orphan_list); | 924 | INIT_LIST_HEAD(&root->orphan_list); |
909 | INIT_LIST_HEAD(&root->root_list); | 925 | INIT_LIST_HEAD(&root->root_list); |
910 | spin_lock_init(&root->node_lock); | 926 | spin_lock_init(&root->node_lock); |
911 | spin_lock_init(&root->list_lock); | 927 | spin_lock_init(&root->orphan_lock); |
912 | spin_lock_init(&root->inode_lock); | 928 | spin_lock_init(&root->inode_lock); |
929 | spin_lock_init(&root->accounting_lock); | ||
913 | mutex_init(&root->objectid_mutex); | 930 | mutex_init(&root->objectid_mutex); |
914 | mutex_init(&root->log_mutex); | 931 | mutex_init(&root->log_mutex); |
915 | init_waitqueue_head(&root->log_writer_wait); | 932 | init_waitqueue_head(&root->log_writer_wait); |
@@ -968,42 +985,6 @@ static int find_and_setup_root(struct btrfs_root *tree_root, | |||
968 | return 0; | 985 | return 0; |
969 | } | 986 | } |
970 | 987 | ||
971 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | ||
972 | struct btrfs_fs_info *fs_info) | ||
973 | { | ||
974 | struct extent_buffer *eb; | ||
975 | struct btrfs_root *log_root_tree = fs_info->log_root_tree; | ||
976 | u64 start = 0; | ||
977 | u64 end = 0; | ||
978 | int ret; | ||
979 | |||
980 | if (!log_root_tree) | ||
981 | return 0; | ||
982 | |||
983 | while (1) { | ||
984 | ret = find_first_extent_bit(&log_root_tree->dirty_log_pages, | ||
985 | 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW); | ||
986 | if (ret) | ||
987 | break; | ||
988 | |||
989 | clear_extent_bits(&log_root_tree->dirty_log_pages, start, end, | ||
990 | EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); | ||
991 | } | ||
992 | eb = fs_info->log_root_tree->node; | ||
993 | |||
994 | WARN_ON(btrfs_header_level(eb) != 0); | ||
995 | WARN_ON(btrfs_header_nritems(eb) != 0); | ||
996 | |||
997 | ret = btrfs_free_reserved_extent(fs_info->tree_root, | ||
998 | eb->start, eb->len); | ||
999 | BUG_ON(ret); | ||
1000 | |||
1001 | free_extent_buffer(eb); | ||
1002 | kfree(fs_info->log_root_tree); | ||
1003 | fs_info->log_root_tree = NULL; | ||
1004 | return 0; | ||
1005 | } | ||
1006 | |||
1007 | static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, | 988 | static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, |
1008 | struct btrfs_fs_info *fs_info) | 989 | struct btrfs_fs_info *fs_info) |
1009 | { | 990 | { |
@@ -1191,19 +1172,23 @@ again: | |||
1191 | if (root) | 1172 | if (root) |
1192 | return root; | 1173 | return root; |
1193 | 1174 | ||
1194 | ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); | ||
1195 | if (ret == 0) | ||
1196 | ret = -ENOENT; | ||
1197 | if (ret < 0) | ||
1198 | return ERR_PTR(ret); | ||
1199 | |||
1200 | root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); | 1175 | root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); |
1201 | if (IS_ERR(root)) | 1176 | if (IS_ERR(root)) |
1202 | return root; | 1177 | return root; |
1203 | 1178 | ||
1204 | WARN_ON(btrfs_root_refs(&root->root_item) == 0); | ||
1205 | set_anon_super(&root->anon_super, NULL); | 1179 | set_anon_super(&root->anon_super, NULL); |
1206 | 1180 | ||
1181 | if (btrfs_root_refs(&root->root_item) == 0) { | ||
1182 | ret = -ENOENT; | ||
1183 | goto fail; | ||
1184 | } | ||
1185 | |||
1186 | ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); | ||
1187 | if (ret < 0) | ||
1188 | goto fail; | ||
1189 | if (ret == 0) | ||
1190 | root->orphan_item_inserted = 1; | ||
1191 | |||
1207 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | 1192 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); |
1208 | if (ret) | 1193 | if (ret) |
1209 | goto fail; | 1194 | goto fail; |
@@ -1212,10 +1197,9 @@ again: | |||
1212 | ret = radix_tree_insert(&fs_info->fs_roots_radix, | 1197 | ret = radix_tree_insert(&fs_info->fs_roots_radix, |
1213 | (unsigned long)root->root_key.objectid, | 1198 | (unsigned long)root->root_key.objectid, |
1214 | root); | 1199 | root); |
1215 | if (ret == 0) { | 1200 | if (ret == 0) |
1216 | root->in_radix = 1; | 1201 | root->in_radix = 1; |
1217 | root->clean_orphans = 1; | 1202 | |
1218 | } | ||
1219 | spin_unlock(&fs_info->fs_roots_radix_lock); | 1203 | spin_unlock(&fs_info->fs_roots_radix_lock); |
1220 | radix_tree_preload_end(); | 1204 | radix_tree_preload_end(); |
1221 | if (ret) { | 1205 | if (ret) { |
@@ -1461,10 +1445,6 @@ static int cleaner_kthread(void *arg) | |||
1461 | struct btrfs_root *root = arg; | 1445 | struct btrfs_root *root = arg; |
1462 | 1446 | ||
1463 | do { | 1447 | do { |
1464 | smp_mb(); | ||
1465 | if (root->fs_info->closing) | ||
1466 | break; | ||
1467 | |||
1468 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | 1448 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); |
1469 | 1449 | ||
1470 | if (!(root->fs_info->sb->s_flags & MS_RDONLY) && | 1450 | if (!(root->fs_info->sb->s_flags & MS_RDONLY) && |
@@ -1477,11 +1457,9 @@ static int cleaner_kthread(void *arg) | |||
1477 | if (freezing(current)) { | 1457 | if (freezing(current)) { |
1478 | refrigerator(); | 1458 | refrigerator(); |
1479 | } else { | 1459 | } else { |
1480 | smp_mb(); | ||
1481 | if (root->fs_info->closing) | ||
1482 | break; | ||
1483 | set_current_state(TASK_INTERRUPTIBLE); | 1460 | set_current_state(TASK_INTERRUPTIBLE); |
1484 | schedule(); | 1461 | if (!kthread_should_stop()) |
1462 | schedule(); | ||
1485 | __set_current_state(TASK_RUNNING); | 1463 | __set_current_state(TASK_RUNNING); |
1486 | } | 1464 | } |
1487 | } while (!kthread_should_stop()); | 1465 | } while (!kthread_should_stop()); |
@@ -1493,36 +1471,40 @@ static int transaction_kthread(void *arg) | |||
1493 | struct btrfs_root *root = arg; | 1471 | struct btrfs_root *root = arg; |
1494 | struct btrfs_trans_handle *trans; | 1472 | struct btrfs_trans_handle *trans; |
1495 | struct btrfs_transaction *cur; | 1473 | struct btrfs_transaction *cur; |
1474 | u64 transid; | ||
1496 | unsigned long now; | 1475 | unsigned long now; |
1497 | unsigned long delay; | 1476 | unsigned long delay; |
1498 | int ret; | 1477 | int ret; |
1499 | 1478 | ||
1500 | do { | 1479 | do { |
1501 | smp_mb(); | ||
1502 | if (root->fs_info->closing) | ||
1503 | break; | ||
1504 | |||
1505 | delay = HZ * 30; | 1480 | delay = HZ * 30; |
1506 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | 1481 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); |
1507 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | 1482 | mutex_lock(&root->fs_info->transaction_kthread_mutex); |
1508 | 1483 | ||
1509 | mutex_lock(&root->fs_info->trans_mutex); | 1484 | spin_lock(&root->fs_info->new_trans_lock); |
1510 | cur = root->fs_info->running_transaction; | 1485 | cur = root->fs_info->running_transaction; |
1511 | if (!cur) { | 1486 | if (!cur) { |
1512 | mutex_unlock(&root->fs_info->trans_mutex); | 1487 | spin_unlock(&root->fs_info->new_trans_lock); |
1513 | goto sleep; | 1488 | goto sleep; |
1514 | } | 1489 | } |
1515 | 1490 | ||
1516 | now = get_seconds(); | 1491 | now = get_seconds(); |
1517 | if (now < cur->start_time || now - cur->start_time < 30) { | 1492 | if (!cur->blocked && |
1518 | mutex_unlock(&root->fs_info->trans_mutex); | 1493 | (now < cur->start_time || now - cur->start_time < 30)) { |
1494 | spin_unlock(&root->fs_info->new_trans_lock); | ||
1519 | delay = HZ * 5; | 1495 | delay = HZ * 5; |
1520 | goto sleep; | 1496 | goto sleep; |
1521 | } | 1497 | } |
1522 | mutex_unlock(&root->fs_info->trans_mutex); | 1498 | transid = cur->transid; |
1523 | trans = btrfs_start_transaction(root, 1); | 1499 | spin_unlock(&root->fs_info->new_trans_lock); |
1524 | ret = btrfs_commit_transaction(trans, root); | ||
1525 | 1500 | ||
1501 | trans = btrfs_join_transaction(root, 1); | ||
1502 | if (transid == trans->transid) { | ||
1503 | ret = btrfs_commit_transaction(trans, root); | ||
1504 | BUG_ON(ret); | ||
1505 | } else { | ||
1506 | btrfs_end_transaction(trans, root); | ||
1507 | } | ||
1526 | sleep: | 1508 | sleep: |
1527 | wake_up_process(root->fs_info->cleaner_kthread); | 1509 | wake_up_process(root->fs_info->cleaner_kthread); |
1528 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); | 1510 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); |
@@ -1530,10 +1512,10 @@ sleep: | |||
1530 | if (freezing(current)) { | 1512 | if (freezing(current)) { |
1531 | refrigerator(); | 1513 | refrigerator(); |
1532 | } else { | 1514 | } else { |
1533 | if (root->fs_info->closing) | ||
1534 | break; | ||
1535 | set_current_state(TASK_INTERRUPTIBLE); | 1515 | set_current_state(TASK_INTERRUPTIBLE); |
1536 | schedule_timeout(delay); | 1516 | if (!kthread_should_stop() && |
1517 | !btrfs_transaction_blocked(root->fs_info)) | ||
1518 | schedule_timeout(delay); | ||
1537 | __set_current_state(TASK_RUNNING); | 1519 | __set_current_state(TASK_RUNNING); |
1538 | } | 1520 | } |
1539 | } while (!kthread_should_stop()); | 1521 | } while (!kthread_should_stop()); |
@@ -1620,6 +1602,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1620 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); | 1602 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); |
1621 | INIT_LIST_HEAD(&fs_info->space_info); | 1603 | INIT_LIST_HEAD(&fs_info->space_info); |
1622 | btrfs_mapping_init(&fs_info->mapping_tree); | 1604 | btrfs_mapping_init(&fs_info->mapping_tree); |
1605 | btrfs_init_block_rsv(&fs_info->global_block_rsv); | ||
1606 | btrfs_init_block_rsv(&fs_info->delalloc_block_rsv); | ||
1607 | btrfs_init_block_rsv(&fs_info->trans_block_rsv); | ||
1608 | btrfs_init_block_rsv(&fs_info->chunk_block_rsv); | ||
1609 | btrfs_init_block_rsv(&fs_info->empty_block_rsv); | ||
1610 | INIT_LIST_HEAD(&fs_info->durable_block_rsv_list); | ||
1611 | mutex_init(&fs_info->durable_block_rsv_mutex); | ||
1623 | atomic_set(&fs_info->nr_async_submits, 0); | 1612 | atomic_set(&fs_info->nr_async_submits, 0); |
1624 | atomic_set(&fs_info->async_delalloc_pages, 0); | 1613 | atomic_set(&fs_info->async_delalloc_pages, 0); |
1625 | atomic_set(&fs_info->async_submit_draining, 0); | 1614 | atomic_set(&fs_info->async_submit_draining, 0); |
@@ -1759,9 +1748,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1759 | min_t(u64, fs_devices->num_devices, | 1748 | min_t(u64, fs_devices->num_devices, |
1760 | fs_info->thread_pool_size), | 1749 | fs_info->thread_pool_size), |
1761 | &fs_info->generic_worker); | 1750 | &fs_info->generic_worker); |
1762 | btrfs_init_workers(&fs_info->enospc_workers, "enospc", | ||
1763 | fs_info->thread_pool_size, | ||
1764 | &fs_info->generic_worker); | ||
1765 | 1751 | ||
1766 | /* a higher idle thresh on the submit workers makes it much more | 1752 | /* a higher idle thresh on the submit workers makes it much more |
1767 | * likely that bios will be send down in a sane order to the | 1753 | * likely that bios will be send down in a sane order to the |
@@ -1809,7 +1795,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1809 | btrfs_start_workers(&fs_info->endio_meta_workers, 1); | 1795 | btrfs_start_workers(&fs_info->endio_meta_workers, 1); |
1810 | btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); | 1796 | btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); |
1811 | btrfs_start_workers(&fs_info->endio_write_workers, 1); | 1797 | btrfs_start_workers(&fs_info->endio_write_workers, 1); |
1812 | btrfs_start_workers(&fs_info->enospc_workers, 1); | ||
1813 | 1798 | ||
1814 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); | 1799 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); |
1815 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, | 1800 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, |
@@ -1912,17 +1897,18 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1912 | 1897 | ||
1913 | csum_root->track_dirty = 1; | 1898 | csum_root->track_dirty = 1; |
1914 | 1899 | ||
1900 | fs_info->generation = generation; | ||
1901 | fs_info->last_trans_committed = generation; | ||
1902 | fs_info->data_alloc_profile = (u64)-1; | ||
1903 | fs_info->metadata_alloc_profile = (u64)-1; | ||
1904 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; | ||
1905 | |||
1915 | ret = btrfs_read_block_groups(extent_root); | 1906 | ret = btrfs_read_block_groups(extent_root); |
1916 | if (ret) { | 1907 | if (ret) { |
1917 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); | 1908 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); |
1918 | goto fail_block_groups; | 1909 | goto fail_block_groups; |
1919 | } | 1910 | } |
1920 | 1911 | ||
1921 | fs_info->generation = generation; | ||
1922 | fs_info->last_trans_committed = generation; | ||
1923 | fs_info->data_alloc_profile = (u64)-1; | ||
1924 | fs_info->metadata_alloc_profile = (u64)-1; | ||
1925 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; | ||
1926 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, | 1912 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, |
1927 | "btrfs-cleaner"); | 1913 | "btrfs-cleaner"); |
1928 | if (IS_ERR(fs_info->cleaner_kthread)) | 1914 | if (IS_ERR(fs_info->cleaner_kthread)) |
@@ -1977,6 +1963,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
1977 | BUG_ON(ret); | 1963 | BUG_ON(ret); |
1978 | 1964 | ||
1979 | if (!(sb->s_flags & MS_RDONLY)) { | 1965 | if (!(sb->s_flags & MS_RDONLY)) { |
1966 | ret = btrfs_cleanup_fs_roots(fs_info); | ||
1967 | BUG_ON(ret); | ||
1968 | |||
1980 | ret = btrfs_recover_relocation(tree_root); | 1969 | ret = btrfs_recover_relocation(tree_root); |
1981 | if (ret < 0) { | 1970 | if (ret < 0) { |
1982 | printk(KERN_WARNING | 1971 | printk(KERN_WARNING |
@@ -2040,7 +2029,6 @@ fail_sb_buffer: | |||
2040 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2029 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); |
2041 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2030 | btrfs_stop_workers(&fs_info->endio_write_workers); |
2042 | btrfs_stop_workers(&fs_info->submit_workers); | 2031 | btrfs_stop_workers(&fs_info->submit_workers); |
2043 | btrfs_stop_workers(&fs_info->enospc_workers); | ||
2044 | fail_iput: | 2032 | fail_iput: |
2045 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | 2033 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); |
2046 | iput(fs_info->btree_inode); | 2034 | iput(fs_info->btree_inode); |
@@ -2405,11 +2393,11 @@ int btrfs_commit_super(struct btrfs_root *root) | |||
2405 | down_write(&root->fs_info->cleanup_work_sem); | 2393 | down_write(&root->fs_info->cleanup_work_sem); |
2406 | up_write(&root->fs_info->cleanup_work_sem); | 2394 | up_write(&root->fs_info->cleanup_work_sem); |
2407 | 2395 | ||
2408 | trans = btrfs_start_transaction(root, 1); | 2396 | trans = btrfs_join_transaction(root, 1); |
2409 | ret = btrfs_commit_transaction(trans, root); | 2397 | ret = btrfs_commit_transaction(trans, root); |
2410 | BUG_ON(ret); | 2398 | BUG_ON(ret); |
2411 | /* run commit again to drop the original snapshot */ | 2399 | /* run commit again to drop the original snapshot */ |
2412 | trans = btrfs_start_transaction(root, 1); | 2400 | trans = btrfs_join_transaction(root, 1); |
2413 | btrfs_commit_transaction(trans, root); | 2401 | btrfs_commit_transaction(trans, root); |
2414 | ret = btrfs_write_and_wait_transaction(NULL, root); | 2402 | ret = btrfs_write_and_wait_transaction(NULL, root); |
2415 | BUG_ON(ret); | 2403 | BUG_ON(ret); |
@@ -2426,15 +2414,15 @@ int close_ctree(struct btrfs_root *root) | |||
2426 | fs_info->closing = 1; | 2414 | fs_info->closing = 1; |
2427 | smp_mb(); | 2415 | smp_mb(); |
2428 | 2416 | ||
2429 | kthread_stop(root->fs_info->transaction_kthread); | ||
2430 | kthread_stop(root->fs_info->cleaner_kthread); | ||
2431 | |||
2432 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | 2417 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { |
2433 | ret = btrfs_commit_super(root); | 2418 | ret = btrfs_commit_super(root); |
2434 | if (ret) | 2419 | if (ret) |
2435 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | 2420 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); |
2436 | } | 2421 | } |
2437 | 2422 | ||
2423 | kthread_stop(root->fs_info->transaction_kthread); | ||
2424 | kthread_stop(root->fs_info->cleaner_kthread); | ||
2425 | |||
2438 | fs_info->closing = 2; | 2426 | fs_info->closing = 2; |
2439 | smp_mb(); | 2427 | smp_mb(); |
2440 | 2428 | ||
@@ -2473,7 +2461,6 @@ int close_ctree(struct btrfs_root *root) | |||
2473 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2461 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); |
2474 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2462 | btrfs_stop_workers(&fs_info->endio_write_workers); |
2475 | btrfs_stop_workers(&fs_info->submit_workers); | 2463 | btrfs_stop_workers(&fs_info->submit_workers); |
2476 | btrfs_stop_workers(&fs_info->enospc_workers); | ||
2477 | 2464 | ||
2478 | btrfs_close_devices(fs_info->fs_devices); | 2465 | btrfs_close_devices(fs_info->fs_devices); |
2479 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2466 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index c958ecbc1916..88e825a0bf21 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
@@ -87,7 +87,7 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, | |||
87 | int metadata); | 87 | int metadata); |
88 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | 88 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, |
89 | int rw, struct bio *bio, int mirror_num, | 89 | int rw, struct bio *bio, int mirror_num, |
90 | unsigned long bio_flags, | 90 | unsigned long bio_flags, u64 bio_offset, |
91 | extent_submit_bio_hook_t *submit_bio_start, | 91 | extent_submit_bio_hook_t *submit_bio_start, |
92 | extent_submit_bio_hook_t *submit_bio_done); | 92 | extent_submit_bio_hook_t *submit_bio_done); |
93 | 93 | ||
@@ -95,8 +95,6 @@ int btrfs_congested_async(struct btrfs_fs_info *info, int iodone); | |||
95 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); | 95 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); |
96 | int btrfs_write_tree_block(struct extent_buffer *buf); | 96 | int btrfs_write_tree_block(struct extent_buffer *buf); |
97 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf); | 97 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf); |
98 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | ||
99 | struct btrfs_fs_info *fs_info); | ||
100 | int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, | 98 | int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, |
101 | struct btrfs_fs_info *fs_info); | 99 | struct btrfs_fs_info *fs_info); |
102 | int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | 100 | int btrfs_add_log_tree(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c6a4f459ad76..b9080d71991a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -35,10 +35,9 @@ | |||
35 | 35 | ||
36 | static int update_block_group(struct btrfs_trans_handle *trans, | 36 | static int update_block_group(struct btrfs_trans_handle *trans, |
37 | struct btrfs_root *root, | 37 | struct btrfs_root *root, |
38 | u64 bytenr, u64 num_bytes, int alloc, | 38 | u64 bytenr, u64 num_bytes, int alloc); |
39 | int mark_free); | 39 | static int update_reserved_bytes(struct btrfs_block_group_cache *cache, |
40 | static int update_reserved_extents(struct btrfs_block_group_cache *cache, | 40 | u64 num_bytes, int reserve, int sinfo); |
41 | u64 num_bytes, int reserve); | ||
42 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 41 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
43 | struct btrfs_root *root, | 42 | struct btrfs_root *root, |
44 | u64 bytenr, u64 num_bytes, u64 parent, | 43 | u64 bytenr, u64 num_bytes, u64 parent, |
@@ -61,12 +60,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
61 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 60 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
62 | struct btrfs_root *extent_root, u64 alloc_bytes, | 61 | struct btrfs_root *extent_root, u64 alloc_bytes, |
63 | u64 flags, int force); | 62 | u64 flags, int force); |
64 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | ||
65 | struct btrfs_root *root, | ||
66 | struct btrfs_path *path, | ||
67 | u64 bytenr, u64 num_bytes, | ||
68 | int is_data, int reserved, | ||
69 | struct extent_buffer **must_clean); | ||
70 | static int find_next_key(struct btrfs_path *path, int level, | 63 | static int find_next_key(struct btrfs_path *path, int level, |
71 | struct btrfs_key *key); | 64 | struct btrfs_key *key); |
72 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | 65 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, |
@@ -91,8 +84,12 @@ void btrfs_get_block_group(struct btrfs_block_group_cache *cache) | |||
91 | 84 | ||
92 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache) | 85 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache) |
93 | { | 86 | { |
94 | if (atomic_dec_and_test(&cache->count)) | 87 | if (atomic_dec_and_test(&cache->count)) { |
88 | WARN_ON(cache->pinned > 0); | ||
89 | WARN_ON(cache->reserved > 0); | ||
90 | WARN_ON(cache->reserved_pinned > 0); | ||
95 | kfree(cache); | 91 | kfree(cache); |
92 | } | ||
96 | } | 93 | } |
97 | 94 | ||
98 | /* | 95 | /* |
@@ -319,7 +316,7 @@ static int caching_kthread(void *data) | |||
319 | 316 | ||
320 | exclude_super_stripes(extent_root, block_group); | 317 | exclude_super_stripes(extent_root, block_group); |
321 | spin_lock(&block_group->space_info->lock); | 318 | spin_lock(&block_group->space_info->lock); |
322 | block_group->space_info->bytes_super += block_group->bytes_super; | 319 | block_group->space_info->bytes_readonly += block_group->bytes_super; |
323 | spin_unlock(&block_group->space_info->lock); | 320 | spin_unlock(&block_group->space_info->lock); |
324 | 321 | ||
325 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); | 322 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); |
@@ -507,6 +504,9 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, | |||
507 | struct list_head *head = &info->space_info; | 504 | struct list_head *head = &info->space_info; |
508 | struct btrfs_space_info *found; | 505 | struct btrfs_space_info *found; |
509 | 506 | ||
507 | flags &= BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_SYSTEM | | ||
508 | BTRFS_BLOCK_GROUP_METADATA; | ||
509 | |||
510 | rcu_read_lock(); | 510 | rcu_read_lock(); |
511 | list_for_each_entry_rcu(found, head, list) { | 511 | list_for_each_entry_rcu(found, head, list) { |
512 | if (found->flags == flags) { | 512 | if (found->flags == flags) { |
@@ -610,6 +610,113 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) | |||
610 | } | 610 | } |
611 | 611 | ||
612 | /* | 612 | /* |
613 | * helper function to lookup reference count and flags of extent. | ||
614 | * | ||
615 | * the head node for delayed ref is used to store the sum of all the | ||
616 | * reference count modifications queued up in the rbtree. the head | ||
617 | * node may also store the extent flags to set. This way you can check | ||
618 | * to see what the reference count and extent flags would be if all of | ||
619 | * the delayed refs are not processed. | ||
620 | */ | ||
621 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | ||
622 | struct btrfs_root *root, u64 bytenr, | ||
623 | u64 num_bytes, u64 *refs, u64 *flags) | ||
624 | { | ||
625 | struct btrfs_delayed_ref_head *head; | ||
626 | struct btrfs_delayed_ref_root *delayed_refs; | ||
627 | struct btrfs_path *path; | ||
628 | struct btrfs_extent_item *ei; | ||
629 | struct extent_buffer *leaf; | ||
630 | struct btrfs_key key; | ||
631 | u32 item_size; | ||
632 | u64 num_refs; | ||
633 | u64 extent_flags; | ||
634 | int ret; | ||
635 | |||
636 | path = btrfs_alloc_path(); | ||
637 | if (!path) | ||
638 | return -ENOMEM; | ||
639 | |||
640 | key.objectid = bytenr; | ||
641 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
642 | key.offset = num_bytes; | ||
643 | if (!trans) { | ||
644 | path->skip_locking = 1; | ||
645 | path->search_commit_root = 1; | ||
646 | } | ||
647 | again: | ||
648 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, | ||
649 | &key, path, 0, 0); | ||
650 | if (ret < 0) | ||
651 | goto out_free; | ||
652 | |||
653 | if (ret == 0) { | ||
654 | leaf = path->nodes[0]; | ||
655 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
656 | if (item_size >= sizeof(*ei)) { | ||
657 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
658 | struct btrfs_extent_item); | ||
659 | num_refs = btrfs_extent_refs(leaf, ei); | ||
660 | extent_flags = btrfs_extent_flags(leaf, ei); | ||
661 | } else { | ||
662 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
663 | struct btrfs_extent_item_v0 *ei0; | ||
664 | BUG_ON(item_size != sizeof(*ei0)); | ||
665 | ei0 = btrfs_item_ptr(leaf, path->slots[0], | ||
666 | struct btrfs_extent_item_v0); | ||
667 | num_refs = btrfs_extent_refs_v0(leaf, ei0); | ||
668 | /* FIXME: this isn't correct for data */ | ||
669 | extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
670 | #else | ||
671 | BUG(); | ||
672 | #endif | ||
673 | } | ||
674 | BUG_ON(num_refs == 0); | ||
675 | } else { | ||
676 | num_refs = 0; | ||
677 | extent_flags = 0; | ||
678 | ret = 0; | ||
679 | } | ||
680 | |||
681 | if (!trans) | ||
682 | goto out; | ||
683 | |||
684 | delayed_refs = &trans->transaction->delayed_refs; | ||
685 | spin_lock(&delayed_refs->lock); | ||
686 | head = btrfs_find_delayed_ref_head(trans, bytenr); | ||
687 | if (head) { | ||
688 | if (!mutex_trylock(&head->mutex)) { | ||
689 | atomic_inc(&head->node.refs); | ||
690 | spin_unlock(&delayed_refs->lock); | ||
691 | |||
692 | btrfs_release_path(root->fs_info->extent_root, path); | ||
693 | |||
694 | mutex_lock(&head->mutex); | ||
695 | mutex_unlock(&head->mutex); | ||
696 | btrfs_put_delayed_ref(&head->node); | ||
697 | goto again; | ||
698 | } | ||
699 | if (head->extent_op && head->extent_op->update_flags) | ||
700 | extent_flags |= head->extent_op->flags_to_set; | ||
701 | else | ||
702 | BUG_ON(num_refs == 0); | ||
703 | |||
704 | num_refs += head->node.ref_mod; | ||
705 | mutex_unlock(&head->mutex); | ||
706 | } | ||
707 | spin_unlock(&delayed_refs->lock); | ||
708 | out: | ||
709 | WARN_ON(num_refs == 0); | ||
710 | if (refs) | ||
711 | *refs = num_refs; | ||
712 | if (flags) | ||
713 | *flags = extent_flags; | ||
714 | out_free: | ||
715 | btrfs_free_path(path); | ||
716 | return ret; | ||
717 | } | ||
718 | |||
719 | /* | ||
613 | * Back reference rules. Back refs have three main goals: | 720 | * Back reference rules. Back refs have three main goals: |
614 | * | 721 | * |
615 | * 1) differentiate between all holders of references to an extent so that | 722 | * 1) differentiate between all holders of references to an extent so that |
@@ -1871,7 +1978,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, | |||
1871 | return ret; | 1978 | return ret; |
1872 | } | 1979 | } |
1873 | 1980 | ||
1874 | |||
1875 | /* helper function to actually process a single delayed ref entry */ | 1981 | /* helper function to actually process a single delayed ref entry */ |
1876 | static int run_one_delayed_ref(struct btrfs_trans_handle *trans, | 1982 | static int run_one_delayed_ref(struct btrfs_trans_handle *trans, |
1877 | struct btrfs_root *root, | 1983 | struct btrfs_root *root, |
@@ -1891,32 +1997,14 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, | |||
1891 | BUG_ON(extent_op); | 1997 | BUG_ON(extent_op); |
1892 | head = btrfs_delayed_node_to_head(node); | 1998 | head = btrfs_delayed_node_to_head(node); |
1893 | if (insert_reserved) { | 1999 | if (insert_reserved) { |
1894 | int mark_free = 0; | 2000 | btrfs_pin_extent(root, node->bytenr, |
1895 | struct extent_buffer *must_clean = NULL; | 2001 | node->num_bytes, 1); |
1896 | |||
1897 | ret = pin_down_bytes(trans, root, NULL, | ||
1898 | node->bytenr, node->num_bytes, | ||
1899 | head->is_data, 1, &must_clean); | ||
1900 | if (ret > 0) | ||
1901 | mark_free = 1; | ||
1902 | |||
1903 | if (must_clean) { | ||
1904 | clean_tree_block(NULL, root, must_clean); | ||
1905 | btrfs_tree_unlock(must_clean); | ||
1906 | free_extent_buffer(must_clean); | ||
1907 | } | ||
1908 | if (head->is_data) { | 2002 | if (head->is_data) { |
1909 | ret = btrfs_del_csums(trans, root, | 2003 | ret = btrfs_del_csums(trans, root, |
1910 | node->bytenr, | 2004 | node->bytenr, |
1911 | node->num_bytes); | 2005 | node->num_bytes); |
1912 | BUG_ON(ret); | 2006 | BUG_ON(ret); |
1913 | } | 2007 | } |
1914 | if (mark_free) { | ||
1915 | ret = btrfs_free_reserved_extent(root, | ||
1916 | node->bytenr, | ||
1917 | node->num_bytes); | ||
1918 | BUG_ON(ret); | ||
1919 | } | ||
1920 | } | 2008 | } |
1921 | mutex_unlock(&head->mutex); | 2009 | mutex_unlock(&head->mutex); |
1922 | return 0; | 2010 | return 0; |
@@ -2347,6 +2435,8 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, | |||
2347 | ret = 0; | 2435 | ret = 0; |
2348 | out: | 2436 | out: |
2349 | btrfs_free_path(path); | 2437 | btrfs_free_path(path); |
2438 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) | ||
2439 | WARN_ON(ret > 0); | ||
2350 | return ret; | 2440 | return ret; |
2351 | } | 2441 | } |
2352 | 2442 | ||
@@ -2660,12 +2750,21 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
2660 | struct btrfs_space_info **space_info) | 2750 | struct btrfs_space_info **space_info) |
2661 | { | 2751 | { |
2662 | struct btrfs_space_info *found; | 2752 | struct btrfs_space_info *found; |
2753 | int i; | ||
2754 | int factor; | ||
2755 | |||
2756 | if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | | ||
2757 | BTRFS_BLOCK_GROUP_RAID10)) | ||
2758 | factor = 2; | ||
2759 | else | ||
2760 | factor = 1; | ||
2663 | 2761 | ||
2664 | found = __find_space_info(info, flags); | 2762 | found = __find_space_info(info, flags); |
2665 | if (found) { | 2763 | if (found) { |
2666 | spin_lock(&found->lock); | 2764 | spin_lock(&found->lock); |
2667 | found->total_bytes += total_bytes; | 2765 | found->total_bytes += total_bytes; |
2668 | found->bytes_used += bytes_used; | 2766 | found->bytes_used += bytes_used; |
2767 | found->disk_used += bytes_used * factor; | ||
2669 | found->full = 0; | 2768 | found->full = 0; |
2670 | spin_unlock(&found->lock); | 2769 | spin_unlock(&found->lock); |
2671 | *space_info = found; | 2770 | *space_info = found; |
@@ -2675,18 +2774,20 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
2675 | if (!found) | 2774 | if (!found) |
2676 | return -ENOMEM; | 2775 | return -ENOMEM; |
2677 | 2776 | ||
2678 | INIT_LIST_HEAD(&found->block_groups); | 2777 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) |
2778 | INIT_LIST_HEAD(&found->block_groups[i]); | ||
2679 | init_rwsem(&found->groups_sem); | 2779 | init_rwsem(&found->groups_sem); |
2680 | init_waitqueue_head(&found->flush_wait); | ||
2681 | init_waitqueue_head(&found->allocate_wait); | ||
2682 | spin_lock_init(&found->lock); | 2780 | spin_lock_init(&found->lock); |
2683 | found->flags = flags; | 2781 | found->flags = flags & (BTRFS_BLOCK_GROUP_DATA | |
2782 | BTRFS_BLOCK_GROUP_SYSTEM | | ||
2783 | BTRFS_BLOCK_GROUP_METADATA); | ||
2684 | found->total_bytes = total_bytes; | 2784 | found->total_bytes = total_bytes; |
2685 | found->bytes_used = bytes_used; | 2785 | found->bytes_used = bytes_used; |
2786 | found->disk_used = bytes_used * factor; | ||
2686 | found->bytes_pinned = 0; | 2787 | found->bytes_pinned = 0; |
2687 | found->bytes_reserved = 0; | 2788 | found->bytes_reserved = 0; |
2688 | found->bytes_readonly = 0; | 2789 | found->bytes_readonly = 0; |
2689 | found->bytes_delalloc = 0; | 2790 | found->bytes_may_use = 0; |
2690 | found->full = 0; | 2791 | found->full = 0; |
2691 | found->force_alloc = 0; | 2792 | found->force_alloc = 0; |
2692 | *space_info = found; | 2793 | *space_info = found; |
@@ -2711,19 +2812,6 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) | |||
2711 | } | 2812 | } |
2712 | } | 2813 | } |
2713 | 2814 | ||
2714 | static void set_block_group_readonly(struct btrfs_block_group_cache *cache) | ||
2715 | { | ||
2716 | spin_lock(&cache->space_info->lock); | ||
2717 | spin_lock(&cache->lock); | ||
2718 | if (!cache->ro) { | ||
2719 | cache->space_info->bytes_readonly += cache->key.offset - | ||
2720 | btrfs_block_group_used(&cache->item); | ||
2721 | cache->ro = 1; | ||
2722 | } | ||
2723 | spin_unlock(&cache->lock); | ||
2724 | spin_unlock(&cache->space_info->lock); | ||
2725 | } | ||
2726 | |||
2727 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | 2815 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) |
2728 | { | 2816 | { |
2729 | u64 num_devices = root->fs_info->fs_devices->rw_devices; | 2817 | u64 num_devices = root->fs_info->fs_devices->rw_devices; |
@@ -2752,491 +2840,50 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | |||
2752 | return flags; | 2840 | return flags; |
2753 | } | 2841 | } |
2754 | 2842 | ||
2755 | static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data) | 2843 | static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) |
2756 | { | ||
2757 | struct btrfs_fs_info *info = root->fs_info; | ||
2758 | u64 alloc_profile; | ||
2759 | |||
2760 | if (data) { | ||
2761 | alloc_profile = info->avail_data_alloc_bits & | ||
2762 | info->data_alloc_profile; | ||
2763 | data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; | ||
2764 | } else if (root == root->fs_info->chunk_root) { | ||
2765 | alloc_profile = info->avail_system_alloc_bits & | ||
2766 | info->system_alloc_profile; | ||
2767 | data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; | ||
2768 | } else { | ||
2769 | alloc_profile = info->avail_metadata_alloc_bits & | ||
2770 | info->metadata_alloc_profile; | ||
2771 | data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; | ||
2772 | } | ||
2773 | |||
2774 | return btrfs_reduce_alloc_profile(root, data); | ||
2775 | } | ||
2776 | |||
2777 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) | ||
2778 | { | ||
2779 | u64 alloc_target; | ||
2780 | |||
2781 | alloc_target = btrfs_get_alloc_profile(root, 1); | ||
2782 | BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, | ||
2783 | alloc_target); | ||
2784 | } | ||
2785 | |||
2786 | static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items) | ||
2787 | { | ||
2788 | u64 num_bytes; | ||
2789 | int level; | ||
2790 | |||
2791 | level = BTRFS_MAX_LEVEL - 2; | ||
2792 | /* | ||
2793 | * NOTE: these calculations are absolutely the worst possible case. | ||
2794 | * This assumes that _every_ item we insert will require a new leaf, and | ||
2795 | * that the tree has grown to its maximum level size. | ||
2796 | */ | ||
2797 | |||
2798 | /* | ||
2799 | * for every item we insert we could insert both an extent item and a | ||
2800 | * extent ref item. Then for ever item we insert, we will need to cow | ||
2801 | * both the original leaf, plus the leaf to the left and right of it. | ||
2802 | * | ||
2803 | * Unless we are talking about the extent root, then we just want the | ||
2804 | * number of items * 2, since we just need the extent item plus its ref. | ||
2805 | */ | ||
2806 | if (root == root->fs_info->extent_root) | ||
2807 | num_bytes = num_items * 2; | ||
2808 | else | ||
2809 | num_bytes = (num_items + (2 * num_items)) * 3; | ||
2810 | |||
2811 | /* | ||
2812 | * num_bytes is total number of leaves we could need times the leaf | ||
2813 | * size, and then for every leaf we could end up cow'ing 2 nodes per | ||
2814 | * level, down to the leaf level. | ||
2815 | */ | ||
2816 | num_bytes = (num_bytes * root->leafsize) + | ||
2817 | (num_bytes * (level * 2)) * root->nodesize; | ||
2818 | |||
2819 | return num_bytes; | ||
2820 | } | ||
2821 | |||
2822 | /* | ||
2823 | * Unreserve metadata space for delalloc. If we have less reserved credits than | ||
2824 | * we have extents, this function does nothing. | ||
2825 | */ | ||
2826 | int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, | ||
2827 | struct inode *inode, int num_items) | ||
2828 | { | ||
2829 | struct btrfs_fs_info *info = root->fs_info; | ||
2830 | struct btrfs_space_info *meta_sinfo; | ||
2831 | u64 num_bytes; | ||
2832 | u64 alloc_target; | ||
2833 | bool bug = false; | ||
2834 | |||
2835 | /* get the space info for where the metadata will live */ | ||
2836 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
2837 | meta_sinfo = __find_space_info(info, alloc_target); | ||
2838 | |||
2839 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, | ||
2840 | num_items); | ||
2841 | |||
2842 | spin_lock(&meta_sinfo->lock); | ||
2843 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
2844 | if (BTRFS_I(inode)->reserved_extents <= | ||
2845 | BTRFS_I(inode)->outstanding_extents) { | ||
2846 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
2847 | spin_unlock(&meta_sinfo->lock); | ||
2848 | return 0; | ||
2849 | } | ||
2850 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
2851 | |||
2852 | BTRFS_I(inode)->reserved_extents -= num_items; | ||
2853 | BUG_ON(BTRFS_I(inode)->reserved_extents < 0); | ||
2854 | |||
2855 | if (meta_sinfo->bytes_delalloc < num_bytes) { | ||
2856 | bug = true; | ||
2857 | meta_sinfo->bytes_delalloc = 0; | ||
2858 | } else { | ||
2859 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
2860 | } | ||
2861 | spin_unlock(&meta_sinfo->lock); | ||
2862 | |||
2863 | BUG_ON(bug); | ||
2864 | |||
2865 | return 0; | ||
2866 | } | ||
2867 | |||
2868 | static void check_force_delalloc(struct btrfs_space_info *meta_sinfo) | ||
2869 | { | 2844 | { |
2870 | u64 thresh; | 2845 | if (flags & BTRFS_BLOCK_GROUP_DATA) |
2871 | 2846 | flags |= root->fs_info->avail_data_alloc_bits & | |
2872 | thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | 2847 | root->fs_info->data_alloc_profile; |
2873 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | 2848 | else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) |
2874 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | 2849 | flags |= root->fs_info->avail_system_alloc_bits & |
2875 | meta_sinfo->bytes_may_use; | 2850 | root->fs_info->system_alloc_profile; |
2876 | 2851 | else if (flags & BTRFS_BLOCK_GROUP_METADATA) | |
2877 | thresh = meta_sinfo->total_bytes - thresh; | 2852 | flags |= root->fs_info->avail_metadata_alloc_bits & |
2878 | thresh *= 80; | 2853 | root->fs_info->metadata_alloc_profile; |
2879 | do_div(thresh, 100); | 2854 | return btrfs_reduce_alloc_profile(root, flags); |
2880 | if (thresh <= meta_sinfo->bytes_delalloc) | ||
2881 | meta_sinfo->force_delalloc = 1; | ||
2882 | else | ||
2883 | meta_sinfo->force_delalloc = 0; | ||
2884 | } | 2855 | } |
2885 | 2856 | ||
2886 | struct async_flush { | 2857 | static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) |
2887 | struct btrfs_root *root; | ||
2888 | struct btrfs_space_info *info; | ||
2889 | struct btrfs_work work; | ||
2890 | }; | ||
2891 | |||
2892 | static noinline void flush_delalloc_async(struct btrfs_work *work) | ||
2893 | { | 2858 | { |
2894 | struct async_flush *async; | 2859 | u64 flags; |
2895 | struct btrfs_root *root; | ||
2896 | struct btrfs_space_info *info; | ||
2897 | |||
2898 | async = container_of(work, struct async_flush, work); | ||
2899 | root = async->root; | ||
2900 | info = async->info; | ||
2901 | |||
2902 | btrfs_start_delalloc_inodes(root, 0); | ||
2903 | wake_up(&info->flush_wait); | ||
2904 | btrfs_wait_ordered_extents(root, 0, 0); | ||
2905 | |||
2906 | spin_lock(&info->lock); | ||
2907 | info->flushing = 0; | ||
2908 | spin_unlock(&info->lock); | ||
2909 | wake_up(&info->flush_wait); | ||
2910 | |||
2911 | kfree(async); | ||
2912 | } | ||
2913 | |||
2914 | static void wait_on_flush(struct btrfs_space_info *info) | ||
2915 | { | ||
2916 | DEFINE_WAIT(wait); | ||
2917 | u64 used; | ||
2918 | |||
2919 | while (1) { | ||
2920 | prepare_to_wait(&info->flush_wait, &wait, | ||
2921 | TASK_UNINTERRUPTIBLE); | ||
2922 | spin_lock(&info->lock); | ||
2923 | if (!info->flushing) { | ||
2924 | spin_unlock(&info->lock); | ||
2925 | break; | ||
2926 | } | ||
2927 | |||
2928 | used = info->bytes_used + info->bytes_reserved + | ||
2929 | info->bytes_pinned + info->bytes_readonly + | ||
2930 | info->bytes_super + info->bytes_root + | ||
2931 | info->bytes_may_use + info->bytes_delalloc; | ||
2932 | if (used < info->total_bytes) { | ||
2933 | spin_unlock(&info->lock); | ||
2934 | break; | ||
2935 | } | ||
2936 | spin_unlock(&info->lock); | ||
2937 | schedule(); | ||
2938 | } | ||
2939 | finish_wait(&info->flush_wait, &wait); | ||
2940 | } | ||
2941 | |||
2942 | static void flush_delalloc(struct btrfs_root *root, | ||
2943 | struct btrfs_space_info *info) | ||
2944 | { | ||
2945 | struct async_flush *async; | ||
2946 | bool wait = false; | ||
2947 | |||
2948 | spin_lock(&info->lock); | ||
2949 | 2860 | ||
2950 | if (!info->flushing) | 2861 | if (data) |
2951 | info->flushing = 1; | 2862 | flags = BTRFS_BLOCK_GROUP_DATA; |
2863 | else if (root == root->fs_info->chunk_root) | ||
2864 | flags = BTRFS_BLOCK_GROUP_SYSTEM; | ||
2952 | else | 2865 | else |
2953 | wait = true; | 2866 | flags = BTRFS_BLOCK_GROUP_METADATA; |
2954 | |||
2955 | spin_unlock(&info->lock); | ||
2956 | |||
2957 | if (wait) { | ||
2958 | wait_on_flush(info); | ||
2959 | return; | ||
2960 | } | ||
2961 | |||
2962 | async = kzalloc(sizeof(*async), GFP_NOFS); | ||
2963 | if (!async) | ||
2964 | goto flush; | ||
2965 | |||
2966 | async->root = root; | ||
2967 | async->info = info; | ||
2968 | async->work.func = flush_delalloc_async; | ||
2969 | 2867 | ||
2970 | btrfs_queue_worker(&root->fs_info->enospc_workers, | 2868 | return get_alloc_profile(root, flags); |
2971 | &async->work); | ||
2972 | wait_on_flush(info); | ||
2973 | return; | ||
2974 | |||
2975 | flush: | ||
2976 | btrfs_start_delalloc_inodes(root, 0); | ||
2977 | btrfs_wait_ordered_extents(root, 0, 0); | ||
2978 | |||
2979 | spin_lock(&info->lock); | ||
2980 | info->flushing = 0; | ||
2981 | spin_unlock(&info->lock); | ||
2982 | wake_up(&info->flush_wait); | ||
2983 | } | 2869 | } |
2984 | 2870 | ||
2985 | static int maybe_allocate_chunk(struct btrfs_root *root, | 2871 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) |
2986 | struct btrfs_space_info *info) | ||
2987 | { | ||
2988 | struct btrfs_super_block *disk_super = &root->fs_info->super_copy; | ||
2989 | struct btrfs_trans_handle *trans; | ||
2990 | bool wait = false; | ||
2991 | int ret = 0; | ||
2992 | u64 min_metadata; | ||
2993 | u64 free_space; | ||
2994 | |||
2995 | free_space = btrfs_super_total_bytes(disk_super); | ||
2996 | /* | ||
2997 | * we allow the metadata to grow to a max of either 10gb or 5% of the | ||
2998 | * space in the volume. | ||
2999 | */ | ||
3000 | min_metadata = min((u64)10 * 1024 * 1024 * 1024, | ||
3001 | div64_u64(free_space * 5, 100)); | ||
3002 | if (info->total_bytes >= min_metadata) { | ||
3003 | spin_unlock(&info->lock); | ||
3004 | return 0; | ||
3005 | } | ||
3006 | |||
3007 | if (info->full) { | ||
3008 | spin_unlock(&info->lock); | ||
3009 | return 0; | ||
3010 | } | ||
3011 | |||
3012 | if (!info->allocating_chunk) { | ||
3013 | info->force_alloc = 1; | ||
3014 | info->allocating_chunk = 1; | ||
3015 | } else { | ||
3016 | wait = true; | ||
3017 | } | ||
3018 | |||
3019 | spin_unlock(&info->lock); | ||
3020 | |||
3021 | if (wait) { | ||
3022 | wait_event(info->allocate_wait, | ||
3023 | !info->allocating_chunk); | ||
3024 | return 1; | ||
3025 | } | ||
3026 | |||
3027 | trans = btrfs_start_transaction(root, 1); | ||
3028 | if (!trans) { | ||
3029 | ret = -ENOMEM; | ||
3030 | goto out; | ||
3031 | } | ||
3032 | |||
3033 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
3034 | 4096 + 2 * 1024 * 1024, | ||
3035 | info->flags, 0); | ||
3036 | btrfs_end_transaction(trans, root); | ||
3037 | if (ret) | ||
3038 | goto out; | ||
3039 | out: | ||
3040 | spin_lock(&info->lock); | ||
3041 | info->allocating_chunk = 0; | ||
3042 | spin_unlock(&info->lock); | ||
3043 | wake_up(&info->allocate_wait); | ||
3044 | |||
3045 | if (ret) | ||
3046 | return 0; | ||
3047 | return 1; | ||
3048 | } | ||
3049 | |||
3050 | /* | ||
3051 | * Reserve metadata space for delalloc. | ||
3052 | */ | ||
3053 | int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, | ||
3054 | struct inode *inode, int num_items) | ||
3055 | { | ||
3056 | struct btrfs_fs_info *info = root->fs_info; | ||
3057 | struct btrfs_space_info *meta_sinfo; | ||
3058 | u64 num_bytes; | ||
3059 | u64 used; | ||
3060 | u64 alloc_target; | ||
3061 | int flushed = 0; | ||
3062 | int force_delalloc; | ||
3063 | |||
3064 | /* get the space info for where the metadata will live */ | ||
3065 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3066 | meta_sinfo = __find_space_info(info, alloc_target); | ||
3067 | |||
3068 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, | ||
3069 | num_items); | ||
3070 | again: | ||
3071 | spin_lock(&meta_sinfo->lock); | ||
3072 | |||
3073 | force_delalloc = meta_sinfo->force_delalloc; | ||
3074 | |||
3075 | if (unlikely(!meta_sinfo->bytes_root)) | ||
3076 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
3077 | |||
3078 | if (!flushed) | ||
3079 | meta_sinfo->bytes_delalloc += num_bytes; | ||
3080 | |||
3081 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
3082 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
3083 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
3084 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
3085 | |||
3086 | if (used > meta_sinfo->total_bytes) { | ||
3087 | flushed++; | ||
3088 | |||
3089 | if (flushed == 1) { | ||
3090 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
3091 | goto again; | ||
3092 | flushed++; | ||
3093 | } else { | ||
3094 | spin_unlock(&meta_sinfo->lock); | ||
3095 | } | ||
3096 | |||
3097 | if (flushed == 2) { | ||
3098 | filemap_flush(inode->i_mapping); | ||
3099 | goto again; | ||
3100 | } else if (flushed == 3) { | ||
3101 | flush_delalloc(root, meta_sinfo); | ||
3102 | goto again; | ||
3103 | } | ||
3104 | spin_lock(&meta_sinfo->lock); | ||
3105 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
3106 | spin_unlock(&meta_sinfo->lock); | ||
3107 | printk(KERN_ERR "enospc, has %d, reserved %d\n", | ||
3108 | BTRFS_I(inode)->outstanding_extents, | ||
3109 | BTRFS_I(inode)->reserved_extents); | ||
3110 | dump_space_info(meta_sinfo, 0, 0); | ||
3111 | return -ENOSPC; | ||
3112 | } | ||
3113 | |||
3114 | BTRFS_I(inode)->reserved_extents += num_items; | ||
3115 | check_force_delalloc(meta_sinfo); | ||
3116 | spin_unlock(&meta_sinfo->lock); | ||
3117 | |||
3118 | if (!flushed && force_delalloc) | ||
3119 | filemap_flush(inode->i_mapping); | ||
3120 | |||
3121 | return 0; | ||
3122 | } | ||
3123 | |||
3124 | /* | ||
3125 | * unreserve num_items number of items worth of metadata space. This needs to | ||
3126 | * be paired with btrfs_reserve_metadata_space. | ||
3127 | * | ||
3128 | * NOTE: if you have the option, run this _AFTER_ you do a | ||
3129 | * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref | ||
3130 | * oprations which will result in more used metadata, so we want to make sure we | ||
3131 | * can do that without issue. | ||
3132 | */ | ||
3133 | int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items) | ||
3134 | { | ||
3135 | struct btrfs_fs_info *info = root->fs_info; | ||
3136 | struct btrfs_space_info *meta_sinfo; | ||
3137 | u64 num_bytes; | ||
3138 | u64 alloc_target; | ||
3139 | bool bug = false; | ||
3140 | |||
3141 | /* get the space info for where the metadata will live */ | ||
3142 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3143 | meta_sinfo = __find_space_info(info, alloc_target); | ||
3144 | |||
3145 | num_bytes = calculate_bytes_needed(root, num_items); | ||
3146 | |||
3147 | spin_lock(&meta_sinfo->lock); | ||
3148 | if (meta_sinfo->bytes_may_use < num_bytes) { | ||
3149 | bug = true; | ||
3150 | meta_sinfo->bytes_may_use = 0; | ||
3151 | } else { | ||
3152 | meta_sinfo->bytes_may_use -= num_bytes; | ||
3153 | } | ||
3154 | spin_unlock(&meta_sinfo->lock); | ||
3155 | |||
3156 | BUG_ON(bug); | ||
3157 | |||
3158 | return 0; | ||
3159 | } | ||
3160 | |||
3161 | /* | ||
3162 | * Reserve some metadata space for use. We'll calculate the worste case number | ||
3163 | * of bytes that would be needed to modify num_items number of items. If we | ||
3164 | * have space, fantastic, if not, you get -ENOSPC. Please call | ||
3165 | * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of | ||
3166 | * items you reserved, since whatever metadata you needed should have already | ||
3167 | * been allocated. | ||
3168 | * | ||
3169 | * This will commit the transaction to make more space if we don't have enough | ||
3170 | * metadata space. THe only time we don't do this is if we're reserving space | ||
3171 | * inside of a transaction, then we will just return -ENOSPC and it is the | ||
3172 | * callers responsibility to handle it properly. | ||
3173 | */ | ||
3174 | int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items) | ||
3175 | { | 2872 | { |
3176 | struct btrfs_fs_info *info = root->fs_info; | 2873 | BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, |
3177 | struct btrfs_space_info *meta_sinfo; | 2874 | BTRFS_BLOCK_GROUP_DATA); |
3178 | u64 num_bytes; | ||
3179 | u64 used; | ||
3180 | u64 alloc_target; | ||
3181 | int retries = 0; | ||
3182 | |||
3183 | /* get the space info for where the metadata will live */ | ||
3184 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3185 | meta_sinfo = __find_space_info(info, alloc_target); | ||
3186 | |||
3187 | num_bytes = calculate_bytes_needed(root, num_items); | ||
3188 | again: | ||
3189 | spin_lock(&meta_sinfo->lock); | ||
3190 | |||
3191 | if (unlikely(!meta_sinfo->bytes_root)) | ||
3192 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
3193 | |||
3194 | if (!retries) | ||
3195 | meta_sinfo->bytes_may_use += num_bytes; | ||
3196 | |||
3197 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
3198 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
3199 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
3200 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
3201 | |||
3202 | if (used > meta_sinfo->total_bytes) { | ||
3203 | retries++; | ||
3204 | if (retries == 1) { | ||
3205 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
3206 | goto again; | ||
3207 | retries++; | ||
3208 | } else { | ||
3209 | spin_unlock(&meta_sinfo->lock); | ||
3210 | } | ||
3211 | |||
3212 | if (retries == 2) { | ||
3213 | flush_delalloc(root, meta_sinfo); | ||
3214 | goto again; | ||
3215 | } | ||
3216 | spin_lock(&meta_sinfo->lock); | ||
3217 | meta_sinfo->bytes_may_use -= num_bytes; | ||
3218 | spin_unlock(&meta_sinfo->lock); | ||
3219 | |||
3220 | dump_space_info(meta_sinfo, 0, 0); | ||
3221 | return -ENOSPC; | ||
3222 | } | ||
3223 | |||
3224 | check_force_delalloc(meta_sinfo); | ||
3225 | spin_unlock(&meta_sinfo->lock); | ||
3226 | |||
3227 | return 0; | ||
3228 | } | 2875 | } |
3229 | 2876 | ||
3230 | /* | 2877 | /* |
3231 | * This will check the space that the inode allocates from to make sure we have | 2878 | * This will check the space that the inode allocates from to make sure we have |
3232 | * enough space for bytes. | 2879 | * enough space for bytes. |
3233 | */ | 2880 | */ |
3234 | int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | 2881 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes) |
3235 | u64 bytes) | ||
3236 | { | 2882 | { |
3237 | struct btrfs_space_info *data_sinfo; | 2883 | struct btrfs_space_info *data_sinfo; |
2884 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3238 | u64 used; | 2885 | u64 used; |
3239 | int ret = 0, committed = 0, flushed = 0; | 2886 | int ret = 0, committed = 0; |
3240 | 2887 | ||
3241 | /* make sure bytes are sectorsize aligned */ | 2888 | /* make sure bytes are sectorsize aligned */ |
3242 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 2889 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); |
@@ -3248,21 +2895,13 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | |||
3248 | again: | 2895 | again: |
3249 | /* make sure we have enough space to handle the data first */ | 2896 | /* make sure we have enough space to handle the data first */ |
3250 | spin_lock(&data_sinfo->lock); | 2897 | spin_lock(&data_sinfo->lock); |
3251 | used = data_sinfo->bytes_used + data_sinfo->bytes_delalloc + | 2898 | used = data_sinfo->bytes_used + data_sinfo->bytes_reserved + |
3252 | data_sinfo->bytes_reserved + data_sinfo->bytes_pinned + | 2899 | data_sinfo->bytes_pinned + data_sinfo->bytes_readonly + |
3253 | data_sinfo->bytes_readonly + data_sinfo->bytes_may_use + | 2900 | data_sinfo->bytes_may_use; |
3254 | data_sinfo->bytes_super; | ||
3255 | 2901 | ||
3256 | if (used + bytes > data_sinfo->total_bytes) { | 2902 | if (used + bytes > data_sinfo->total_bytes) { |
3257 | struct btrfs_trans_handle *trans; | 2903 | struct btrfs_trans_handle *trans; |
3258 | 2904 | ||
3259 | if (!flushed) { | ||
3260 | spin_unlock(&data_sinfo->lock); | ||
3261 | flush_delalloc(root, data_sinfo); | ||
3262 | flushed = 1; | ||
3263 | goto again; | ||
3264 | } | ||
3265 | |||
3266 | /* | 2905 | /* |
3267 | * if we don't have enough free bytes in this space then we need | 2906 | * if we don't have enough free bytes in this space then we need |
3268 | * to alloc a new chunk. | 2907 | * to alloc a new chunk. |
@@ -3274,15 +2913,15 @@ again: | |||
3274 | spin_unlock(&data_sinfo->lock); | 2913 | spin_unlock(&data_sinfo->lock); |
3275 | alloc: | 2914 | alloc: |
3276 | alloc_target = btrfs_get_alloc_profile(root, 1); | 2915 | alloc_target = btrfs_get_alloc_profile(root, 1); |
3277 | trans = btrfs_start_transaction(root, 1); | 2916 | trans = btrfs_join_transaction(root, 1); |
3278 | if (!trans) | 2917 | if (IS_ERR(trans)) |
3279 | return -ENOMEM; | 2918 | return PTR_ERR(trans); |
3280 | 2919 | ||
3281 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 2920 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, |
3282 | bytes + 2 * 1024 * 1024, | 2921 | bytes + 2 * 1024 * 1024, |
3283 | alloc_target, 0); | 2922 | alloc_target, 0); |
3284 | btrfs_end_transaction(trans, root); | 2923 | btrfs_end_transaction(trans, root); |
3285 | if (ret) | 2924 | if (ret < 0) |
3286 | return ret; | 2925 | return ret; |
3287 | 2926 | ||
3288 | if (!data_sinfo) { | 2927 | if (!data_sinfo) { |
@@ -3297,25 +2936,26 @@ alloc: | |||
3297 | if (!committed && !root->fs_info->open_ioctl_trans) { | 2936 | if (!committed && !root->fs_info->open_ioctl_trans) { |
3298 | committed = 1; | 2937 | committed = 1; |
3299 | trans = btrfs_join_transaction(root, 1); | 2938 | trans = btrfs_join_transaction(root, 1); |
3300 | if (!trans) | 2939 | if (IS_ERR(trans)) |
3301 | return -ENOMEM; | 2940 | return PTR_ERR(trans); |
3302 | ret = btrfs_commit_transaction(trans, root); | 2941 | ret = btrfs_commit_transaction(trans, root); |
3303 | if (ret) | 2942 | if (ret) |
3304 | return ret; | 2943 | return ret; |
3305 | goto again; | 2944 | goto again; |
3306 | } | 2945 | } |
3307 | 2946 | ||
3308 | printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" | 2947 | #if 0 /* I hope we never need this code again, just in case */ |
3309 | ", %llu bytes_used, %llu bytes_reserved, " | 2948 | printk(KERN_ERR "no space left, need %llu, %llu bytes_used, " |
3310 | "%llu bytes_pinned, %llu bytes_readonly, %llu may use " | 2949 | "%llu bytes_reserved, " "%llu bytes_pinned, " |
3311 | "%llu total\n", (unsigned long long)bytes, | 2950 | "%llu bytes_readonly, %llu may use %llu total\n", |
3312 | (unsigned long long)data_sinfo->bytes_delalloc, | 2951 | (unsigned long long)bytes, |
3313 | (unsigned long long)data_sinfo->bytes_used, | 2952 | (unsigned long long)data_sinfo->bytes_used, |
3314 | (unsigned long long)data_sinfo->bytes_reserved, | 2953 | (unsigned long long)data_sinfo->bytes_reserved, |
3315 | (unsigned long long)data_sinfo->bytes_pinned, | 2954 | (unsigned long long)data_sinfo->bytes_pinned, |
3316 | (unsigned long long)data_sinfo->bytes_readonly, | 2955 | (unsigned long long)data_sinfo->bytes_readonly, |
3317 | (unsigned long long)data_sinfo->bytes_may_use, | 2956 | (unsigned long long)data_sinfo->bytes_may_use, |
3318 | (unsigned long long)data_sinfo->total_bytes); | 2957 | (unsigned long long)data_sinfo->total_bytes); |
2958 | #endif | ||
3319 | return -ENOSPC; | 2959 | return -ENOSPC; |
3320 | } | 2960 | } |
3321 | data_sinfo->bytes_may_use += bytes; | 2961 | data_sinfo->bytes_may_use += bytes; |
@@ -3326,12 +2966,13 @@ alloc: | |||
3326 | } | 2966 | } |
3327 | 2967 | ||
3328 | /* | 2968 | /* |
3329 | * if there was an error for whatever reason after calling | 2969 | * called when we are clearing an delalloc extent from the |
3330 | * btrfs_check_data_free_space, call this so we can cleanup the counters. | 2970 | * inode's io_tree or there was an error for whatever reason |
2971 | * after calling btrfs_check_data_free_space | ||
3331 | */ | 2972 | */ |
3332 | void btrfs_free_reserved_data_space(struct btrfs_root *root, | 2973 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) |
3333 | struct inode *inode, u64 bytes) | ||
3334 | { | 2974 | { |
2975 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3335 | struct btrfs_space_info *data_sinfo; | 2976 | struct btrfs_space_info *data_sinfo; |
3336 | 2977 | ||
3337 | /* make sure bytes are sectorsize aligned */ | 2978 | /* make sure bytes are sectorsize aligned */ |
@@ -3344,48 +2985,6 @@ void btrfs_free_reserved_data_space(struct btrfs_root *root, | |||
3344 | spin_unlock(&data_sinfo->lock); | 2985 | spin_unlock(&data_sinfo->lock); |
3345 | } | 2986 | } |
3346 | 2987 | ||
3347 | /* called when we are adding a delalloc extent to the inode's io_tree */ | ||
3348 | void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, | ||
3349 | u64 bytes) | ||
3350 | { | ||
3351 | struct btrfs_space_info *data_sinfo; | ||
3352 | |||
3353 | /* get the space info for where this inode will be storing its data */ | ||
3354 | data_sinfo = BTRFS_I(inode)->space_info; | ||
3355 | |||
3356 | /* make sure we have enough space to handle the data first */ | ||
3357 | spin_lock(&data_sinfo->lock); | ||
3358 | data_sinfo->bytes_delalloc += bytes; | ||
3359 | |||
3360 | /* | ||
3361 | * we are adding a delalloc extent without calling | ||
3362 | * btrfs_check_data_free_space first. This happens on a weird | ||
3363 | * writepage condition, but shouldn't hurt our accounting | ||
3364 | */ | ||
3365 | if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) { | ||
3366 | data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes; | ||
3367 | BTRFS_I(inode)->reserved_bytes = 0; | ||
3368 | } else { | ||
3369 | data_sinfo->bytes_may_use -= bytes; | ||
3370 | BTRFS_I(inode)->reserved_bytes -= bytes; | ||
3371 | } | ||
3372 | |||
3373 | spin_unlock(&data_sinfo->lock); | ||
3374 | } | ||
3375 | |||
3376 | /* called when we are clearing an delalloc extent from the inode's io_tree */ | ||
3377 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, | ||
3378 | u64 bytes) | ||
3379 | { | ||
3380 | struct btrfs_space_info *info; | ||
3381 | |||
3382 | info = BTRFS_I(inode)->space_info; | ||
3383 | |||
3384 | spin_lock(&info->lock); | ||
3385 | info->bytes_delalloc -= bytes; | ||
3386 | spin_unlock(&info->lock); | ||
3387 | } | ||
3388 | |||
3389 | static void force_metadata_allocation(struct btrfs_fs_info *info) | 2988 | static void force_metadata_allocation(struct btrfs_fs_info *info) |
3390 | { | 2989 | { |
3391 | struct list_head *head = &info->space_info; | 2990 | struct list_head *head = &info->space_info; |
@@ -3399,13 +2998,28 @@ static void force_metadata_allocation(struct btrfs_fs_info *info) | |||
3399 | rcu_read_unlock(); | 2998 | rcu_read_unlock(); |
3400 | } | 2999 | } |
3401 | 3000 | ||
3001 | static int should_alloc_chunk(struct btrfs_space_info *sinfo, | ||
3002 | u64 alloc_bytes) | ||
3003 | { | ||
3004 | u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; | ||
3005 | |||
3006 | if (sinfo->bytes_used + sinfo->bytes_reserved + | ||
3007 | alloc_bytes + 256 * 1024 * 1024 < num_bytes) | ||
3008 | return 0; | ||
3009 | |||
3010 | if (sinfo->bytes_used + sinfo->bytes_reserved + | ||
3011 | alloc_bytes < div_factor(num_bytes, 8)) | ||
3012 | return 0; | ||
3013 | |||
3014 | return 1; | ||
3015 | } | ||
3016 | |||
3402 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 3017 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
3403 | struct btrfs_root *extent_root, u64 alloc_bytes, | 3018 | struct btrfs_root *extent_root, u64 alloc_bytes, |
3404 | u64 flags, int force) | 3019 | u64 flags, int force) |
3405 | { | 3020 | { |
3406 | struct btrfs_space_info *space_info; | 3021 | struct btrfs_space_info *space_info; |
3407 | struct btrfs_fs_info *fs_info = extent_root->fs_info; | 3022 | struct btrfs_fs_info *fs_info = extent_root->fs_info; |
3408 | u64 thresh; | ||
3409 | int ret = 0; | 3023 | int ret = 0; |
3410 | 3024 | ||
3411 | mutex_lock(&fs_info->chunk_mutex); | 3025 | mutex_lock(&fs_info->chunk_mutex); |
@@ -3428,11 +3042,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
3428 | goto out; | 3042 | goto out; |
3429 | } | 3043 | } |
3430 | 3044 | ||
3431 | thresh = space_info->total_bytes - space_info->bytes_readonly; | 3045 | if (!force && !should_alloc_chunk(space_info, alloc_bytes)) { |
3432 | thresh = div_factor(thresh, 8); | ||
3433 | if (!force && | ||
3434 | (space_info->bytes_used + space_info->bytes_pinned + | ||
3435 | space_info->bytes_reserved + alloc_bytes) < thresh) { | ||
3436 | spin_unlock(&space_info->lock); | 3046 | spin_unlock(&space_info->lock); |
3437 | goto out; | 3047 | goto out; |
3438 | } | 3048 | } |
@@ -3454,6 +3064,8 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
3454 | spin_lock(&space_info->lock); | 3064 | spin_lock(&space_info->lock); |
3455 | if (ret) | 3065 | if (ret) |
3456 | space_info->full = 1; | 3066 | space_info->full = 1; |
3067 | else | ||
3068 | ret = 1; | ||
3457 | space_info->force_alloc = 0; | 3069 | space_info->force_alloc = 0; |
3458 | spin_unlock(&space_info->lock); | 3070 | spin_unlock(&space_info->lock); |
3459 | out: | 3071 | out: |
@@ -3461,13 +3073,713 @@ out: | |||
3461 | return ret; | 3073 | return ret; |
3462 | } | 3074 | } |
3463 | 3075 | ||
3076 | static int maybe_allocate_chunk(struct btrfs_trans_handle *trans, | ||
3077 | struct btrfs_root *root, | ||
3078 | struct btrfs_space_info *sinfo, u64 num_bytes) | ||
3079 | { | ||
3080 | int ret; | ||
3081 | int end_trans = 0; | ||
3082 | |||
3083 | if (sinfo->full) | ||
3084 | return 0; | ||
3085 | |||
3086 | spin_lock(&sinfo->lock); | ||
3087 | ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024); | ||
3088 | spin_unlock(&sinfo->lock); | ||
3089 | if (!ret) | ||
3090 | return 0; | ||
3091 | |||
3092 | if (!trans) { | ||
3093 | trans = btrfs_join_transaction(root, 1); | ||
3094 | BUG_ON(IS_ERR(trans)); | ||
3095 | end_trans = 1; | ||
3096 | } | ||
3097 | |||
3098 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
3099 | num_bytes + 2 * 1024 * 1024, | ||
3100 | get_alloc_profile(root, sinfo->flags), 0); | ||
3101 | |||
3102 | if (end_trans) | ||
3103 | btrfs_end_transaction(trans, root); | ||
3104 | |||
3105 | return ret == 1 ? 1 : 0; | ||
3106 | } | ||
3107 | |||
3108 | /* | ||
3109 | * shrink metadata reservation for delalloc | ||
3110 | */ | ||
3111 | static int shrink_delalloc(struct btrfs_trans_handle *trans, | ||
3112 | struct btrfs_root *root, u64 to_reclaim) | ||
3113 | { | ||
3114 | struct btrfs_block_rsv *block_rsv; | ||
3115 | u64 reserved; | ||
3116 | u64 max_reclaim; | ||
3117 | u64 reclaimed = 0; | ||
3118 | int pause = 1; | ||
3119 | int ret; | ||
3120 | |||
3121 | block_rsv = &root->fs_info->delalloc_block_rsv; | ||
3122 | spin_lock(&block_rsv->lock); | ||
3123 | reserved = block_rsv->reserved; | ||
3124 | spin_unlock(&block_rsv->lock); | ||
3125 | |||
3126 | if (reserved == 0) | ||
3127 | return 0; | ||
3128 | |||
3129 | max_reclaim = min(reserved, to_reclaim); | ||
3130 | |||
3131 | while (1) { | ||
3132 | ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0); | ||
3133 | if (!ret) { | ||
3134 | __set_current_state(TASK_INTERRUPTIBLE); | ||
3135 | schedule_timeout(pause); | ||
3136 | pause <<= 1; | ||
3137 | if (pause > HZ / 10) | ||
3138 | pause = HZ / 10; | ||
3139 | } else { | ||
3140 | pause = 1; | ||
3141 | } | ||
3142 | |||
3143 | spin_lock(&block_rsv->lock); | ||
3144 | if (reserved > block_rsv->reserved) | ||
3145 | reclaimed = reserved - block_rsv->reserved; | ||
3146 | reserved = block_rsv->reserved; | ||
3147 | spin_unlock(&block_rsv->lock); | ||
3148 | |||
3149 | if (reserved == 0 || reclaimed >= max_reclaim) | ||
3150 | break; | ||
3151 | |||
3152 | if (trans && trans->transaction->blocked) | ||
3153 | return -EAGAIN; | ||
3154 | } | ||
3155 | return reclaimed >= to_reclaim; | ||
3156 | } | ||
3157 | |||
3158 | static int should_retry_reserve(struct btrfs_trans_handle *trans, | ||
3159 | struct btrfs_root *root, | ||
3160 | struct btrfs_block_rsv *block_rsv, | ||
3161 | u64 num_bytes, int *retries) | ||
3162 | { | ||
3163 | struct btrfs_space_info *space_info = block_rsv->space_info; | ||
3164 | int ret; | ||
3165 | |||
3166 | if ((*retries) > 2) | ||
3167 | return -ENOSPC; | ||
3168 | |||
3169 | ret = maybe_allocate_chunk(trans, root, space_info, num_bytes); | ||
3170 | if (ret) | ||
3171 | return 1; | ||
3172 | |||
3173 | if (trans && trans->transaction->in_commit) | ||
3174 | return -ENOSPC; | ||
3175 | |||
3176 | ret = shrink_delalloc(trans, root, num_bytes); | ||
3177 | if (ret) | ||
3178 | return ret; | ||
3179 | |||
3180 | spin_lock(&space_info->lock); | ||
3181 | if (space_info->bytes_pinned < num_bytes) | ||
3182 | ret = 1; | ||
3183 | spin_unlock(&space_info->lock); | ||
3184 | if (ret) | ||
3185 | return -ENOSPC; | ||
3186 | |||
3187 | (*retries)++; | ||
3188 | |||
3189 | if (trans) | ||
3190 | return -EAGAIN; | ||
3191 | |||
3192 | trans = btrfs_join_transaction(root, 1); | ||
3193 | BUG_ON(IS_ERR(trans)); | ||
3194 | ret = btrfs_commit_transaction(trans, root); | ||
3195 | BUG_ON(ret); | ||
3196 | |||
3197 | return 1; | ||
3198 | } | ||
3199 | |||
3200 | static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv, | ||
3201 | u64 num_bytes) | ||
3202 | { | ||
3203 | struct btrfs_space_info *space_info = block_rsv->space_info; | ||
3204 | u64 unused; | ||
3205 | int ret = -ENOSPC; | ||
3206 | |||
3207 | spin_lock(&space_info->lock); | ||
3208 | unused = space_info->bytes_used + space_info->bytes_reserved + | ||
3209 | space_info->bytes_pinned + space_info->bytes_readonly; | ||
3210 | |||
3211 | if (unused < space_info->total_bytes) | ||
3212 | unused = space_info->total_bytes - unused; | ||
3213 | else | ||
3214 | unused = 0; | ||
3215 | |||
3216 | if (unused >= num_bytes) { | ||
3217 | if (block_rsv->priority >= 10) { | ||
3218 | space_info->bytes_reserved += num_bytes; | ||
3219 | ret = 0; | ||
3220 | } else { | ||
3221 | if ((unused + block_rsv->reserved) * | ||
3222 | block_rsv->priority >= | ||
3223 | (num_bytes + block_rsv->reserved) * 10) { | ||
3224 | space_info->bytes_reserved += num_bytes; | ||
3225 | ret = 0; | ||
3226 | } | ||
3227 | } | ||
3228 | } | ||
3229 | spin_unlock(&space_info->lock); | ||
3230 | |||
3231 | return ret; | ||
3232 | } | ||
3233 | |||
3234 | static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans, | ||
3235 | struct btrfs_root *root) | ||
3236 | { | ||
3237 | struct btrfs_block_rsv *block_rsv; | ||
3238 | if (root->ref_cows) | ||
3239 | block_rsv = trans->block_rsv; | ||
3240 | else | ||
3241 | block_rsv = root->block_rsv; | ||
3242 | |||
3243 | if (!block_rsv) | ||
3244 | block_rsv = &root->fs_info->empty_block_rsv; | ||
3245 | |||
3246 | return block_rsv; | ||
3247 | } | ||
3248 | |||
3249 | static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, | ||
3250 | u64 num_bytes) | ||
3251 | { | ||
3252 | int ret = -ENOSPC; | ||
3253 | spin_lock(&block_rsv->lock); | ||
3254 | if (block_rsv->reserved >= num_bytes) { | ||
3255 | block_rsv->reserved -= num_bytes; | ||
3256 | if (block_rsv->reserved < block_rsv->size) | ||
3257 | block_rsv->full = 0; | ||
3258 | ret = 0; | ||
3259 | } | ||
3260 | spin_unlock(&block_rsv->lock); | ||
3261 | return ret; | ||
3262 | } | ||
3263 | |||
3264 | static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv, | ||
3265 | u64 num_bytes, int update_size) | ||
3266 | { | ||
3267 | spin_lock(&block_rsv->lock); | ||
3268 | block_rsv->reserved += num_bytes; | ||
3269 | if (update_size) | ||
3270 | block_rsv->size += num_bytes; | ||
3271 | else if (block_rsv->reserved >= block_rsv->size) | ||
3272 | block_rsv->full = 1; | ||
3273 | spin_unlock(&block_rsv->lock); | ||
3274 | } | ||
3275 | |||
3276 | void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, | ||
3277 | struct btrfs_block_rsv *dest, u64 num_bytes) | ||
3278 | { | ||
3279 | struct btrfs_space_info *space_info = block_rsv->space_info; | ||
3280 | |||
3281 | spin_lock(&block_rsv->lock); | ||
3282 | if (num_bytes == (u64)-1) | ||
3283 | num_bytes = block_rsv->size; | ||
3284 | block_rsv->size -= num_bytes; | ||
3285 | if (block_rsv->reserved >= block_rsv->size) { | ||
3286 | num_bytes = block_rsv->reserved - block_rsv->size; | ||
3287 | block_rsv->reserved = block_rsv->size; | ||
3288 | block_rsv->full = 1; | ||
3289 | } else { | ||
3290 | num_bytes = 0; | ||
3291 | } | ||
3292 | spin_unlock(&block_rsv->lock); | ||
3293 | |||
3294 | if (num_bytes > 0) { | ||
3295 | if (dest) { | ||
3296 | block_rsv_add_bytes(dest, num_bytes, 0); | ||
3297 | } else { | ||
3298 | spin_lock(&space_info->lock); | ||
3299 | space_info->bytes_reserved -= num_bytes; | ||
3300 | spin_unlock(&space_info->lock); | ||
3301 | } | ||
3302 | } | ||
3303 | } | ||
3304 | |||
3305 | static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src, | ||
3306 | struct btrfs_block_rsv *dst, u64 num_bytes) | ||
3307 | { | ||
3308 | int ret; | ||
3309 | |||
3310 | ret = block_rsv_use_bytes(src, num_bytes); | ||
3311 | if (ret) | ||
3312 | return ret; | ||
3313 | |||
3314 | block_rsv_add_bytes(dst, num_bytes, 1); | ||
3315 | return 0; | ||
3316 | } | ||
3317 | |||
3318 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv) | ||
3319 | { | ||
3320 | memset(rsv, 0, sizeof(*rsv)); | ||
3321 | spin_lock_init(&rsv->lock); | ||
3322 | atomic_set(&rsv->usage, 1); | ||
3323 | rsv->priority = 6; | ||
3324 | INIT_LIST_HEAD(&rsv->list); | ||
3325 | } | ||
3326 | |||
3327 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) | ||
3328 | { | ||
3329 | struct btrfs_block_rsv *block_rsv; | ||
3330 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
3331 | u64 alloc_target; | ||
3332 | |||
3333 | block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS); | ||
3334 | if (!block_rsv) | ||
3335 | return NULL; | ||
3336 | |||
3337 | btrfs_init_block_rsv(block_rsv); | ||
3338 | |||
3339 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
3340 | block_rsv->space_info = __find_space_info(fs_info, | ||
3341 | BTRFS_BLOCK_GROUP_METADATA); | ||
3342 | |||
3343 | return block_rsv; | ||
3344 | } | ||
3345 | |||
3346 | void btrfs_free_block_rsv(struct btrfs_root *root, | ||
3347 | struct btrfs_block_rsv *rsv) | ||
3348 | { | ||
3349 | if (rsv && atomic_dec_and_test(&rsv->usage)) { | ||
3350 | btrfs_block_rsv_release(root, rsv, (u64)-1); | ||
3351 | if (!rsv->durable) | ||
3352 | kfree(rsv); | ||
3353 | } | ||
3354 | } | ||
3355 | |||
3356 | /* | ||
3357 | * make the block_rsv struct be able to capture freed space. | ||
3358 | * the captured space will re-add to the the block_rsv struct | ||
3359 | * after transaction commit | ||
3360 | */ | ||
3361 | void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, | ||
3362 | struct btrfs_block_rsv *block_rsv) | ||
3363 | { | ||
3364 | block_rsv->durable = 1; | ||
3365 | mutex_lock(&fs_info->durable_block_rsv_mutex); | ||
3366 | list_add_tail(&block_rsv->list, &fs_info->durable_block_rsv_list); | ||
3367 | mutex_unlock(&fs_info->durable_block_rsv_mutex); | ||
3368 | } | ||
3369 | |||
3370 | int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, | ||
3371 | struct btrfs_root *root, | ||
3372 | struct btrfs_block_rsv *block_rsv, | ||
3373 | u64 num_bytes, int *retries) | ||
3374 | { | ||
3375 | int ret; | ||
3376 | |||
3377 | if (num_bytes == 0) | ||
3378 | return 0; | ||
3379 | again: | ||
3380 | ret = reserve_metadata_bytes(block_rsv, num_bytes); | ||
3381 | if (!ret) { | ||
3382 | block_rsv_add_bytes(block_rsv, num_bytes, 1); | ||
3383 | return 0; | ||
3384 | } | ||
3385 | |||
3386 | ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries); | ||
3387 | if (ret > 0) | ||
3388 | goto again; | ||
3389 | |||
3390 | return ret; | ||
3391 | } | ||
3392 | |||
3393 | int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | ||
3394 | struct btrfs_root *root, | ||
3395 | struct btrfs_block_rsv *block_rsv, | ||
3396 | u64 min_reserved, int min_factor) | ||
3397 | { | ||
3398 | u64 num_bytes = 0; | ||
3399 | int commit_trans = 0; | ||
3400 | int ret = -ENOSPC; | ||
3401 | |||
3402 | if (!block_rsv) | ||
3403 | return 0; | ||
3404 | |||
3405 | spin_lock(&block_rsv->lock); | ||
3406 | if (min_factor > 0) | ||
3407 | num_bytes = div_factor(block_rsv->size, min_factor); | ||
3408 | if (min_reserved > num_bytes) | ||
3409 | num_bytes = min_reserved; | ||
3410 | |||
3411 | if (block_rsv->reserved >= num_bytes) { | ||
3412 | ret = 0; | ||
3413 | } else { | ||
3414 | num_bytes -= block_rsv->reserved; | ||
3415 | if (block_rsv->durable && | ||
3416 | block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes) | ||
3417 | commit_trans = 1; | ||
3418 | } | ||
3419 | spin_unlock(&block_rsv->lock); | ||
3420 | if (!ret) | ||
3421 | return 0; | ||
3422 | |||
3423 | if (block_rsv->refill_used) { | ||
3424 | ret = reserve_metadata_bytes(block_rsv, num_bytes); | ||
3425 | if (!ret) { | ||
3426 | block_rsv_add_bytes(block_rsv, num_bytes, 0); | ||
3427 | return 0; | ||
3428 | } | ||
3429 | } | ||
3430 | |||
3431 | if (commit_trans) { | ||
3432 | if (trans) | ||
3433 | return -EAGAIN; | ||
3434 | |||
3435 | trans = btrfs_join_transaction(root, 1); | ||
3436 | BUG_ON(IS_ERR(trans)); | ||
3437 | ret = btrfs_commit_transaction(trans, root); | ||
3438 | return 0; | ||
3439 | } | ||
3440 | |||
3441 | WARN_ON(1); | ||
3442 | printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n", | ||
3443 | block_rsv->size, block_rsv->reserved, | ||
3444 | block_rsv->freed[0], block_rsv->freed[1]); | ||
3445 | |||
3446 | return -ENOSPC; | ||
3447 | } | ||
3448 | |||
3449 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | ||
3450 | struct btrfs_block_rsv *dst_rsv, | ||
3451 | u64 num_bytes) | ||
3452 | { | ||
3453 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | ||
3454 | } | ||
3455 | |||
3456 | void btrfs_block_rsv_release(struct btrfs_root *root, | ||
3457 | struct btrfs_block_rsv *block_rsv, | ||
3458 | u64 num_bytes) | ||
3459 | { | ||
3460 | struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; | ||
3461 | if (global_rsv->full || global_rsv == block_rsv || | ||
3462 | block_rsv->space_info != global_rsv->space_info) | ||
3463 | global_rsv = NULL; | ||
3464 | block_rsv_release_bytes(block_rsv, global_rsv, num_bytes); | ||
3465 | } | ||
3466 | |||
3467 | /* | ||
3468 | * helper to calculate size of global block reservation. | ||
3469 | * the desired value is sum of space used by extent tree, | ||
3470 | * checksum tree and root tree | ||
3471 | */ | ||
3472 | static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) | ||
3473 | { | ||
3474 | struct btrfs_space_info *sinfo; | ||
3475 | u64 num_bytes; | ||
3476 | u64 meta_used; | ||
3477 | u64 data_used; | ||
3478 | int csum_size = btrfs_super_csum_size(&fs_info->super_copy); | ||
3479 | #if 0 | ||
3480 | /* | ||
3481 | * per tree used space accounting can be inaccuracy, so we | ||
3482 | * can't rely on it. | ||
3483 | */ | ||
3484 | spin_lock(&fs_info->extent_root->accounting_lock); | ||
3485 | num_bytes = btrfs_root_used(&fs_info->extent_root->root_item); | ||
3486 | spin_unlock(&fs_info->extent_root->accounting_lock); | ||
3487 | |||
3488 | spin_lock(&fs_info->csum_root->accounting_lock); | ||
3489 | num_bytes += btrfs_root_used(&fs_info->csum_root->root_item); | ||
3490 | spin_unlock(&fs_info->csum_root->accounting_lock); | ||
3491 | |||
3492 | spin_lock(&fs_info->tree_root->accounting_lock); | ||
3493 | num_bytes += btrfs_root_used(&fs_info->tree_root->root_item); | ||
3494 | spin_unlock(&fs_info->tree_root->accounting_lock); | ||
3495 | #endif | ||
3496 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); | ||
3497 | spin_lock(&sinfo->lock); | ||
3498 | data_used = sinfo->bytes_used; | ||
3499 | spin_unlock(&sinfo->lock); | ||
3500 | |||
3501 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | ||
3502 | spin_lock(&sinfo->lock); | ||
3503 | meta_used = sinfo->bytes_used; | ||
3504 | spin_unlock(&sinfo->lock); | ||
3505 | |||
3506 | num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) * | ||
3507 | csum_size * 2; | ||
3508 | num_bytes += div64_u64(data_used + meta_used, 50); | ||
3509 | |||
3510 | if (num_bytes * 3 > meta_used) | ||
3511 | num_bytes = div64_u64(meta_used, 3); | ||
3512 | |||
3513 | return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10); | ||
3514 | } | ||
3515 | |||
3516 | static void update_global_block_rsv(struct btrfs_fs_info *fs_info) | ||
3517 | { | ||
3518 | struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; | ||
3519 | struct btrfs_space_info *sinfo = block_rsv->space_info; | ||
3520 | u64 num_bytes; | ||
3521 | |||
3522 | num_bytes = calc_global_metadata_size(fs_info); | ||
3523 | |||
3524 | spin_lock(&block_rsv->lock); | ||
3525 | spin_lock(&sinfo->lock); | ||
3526 | |||
3527 | block_rsv->size = num_bytes; | ||
3528 | |||
3529 | num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + | ||
3530 | sinfo->bytes_reserved + sinfo->bytes_readonly; | ||
3531 | |||
3532 | if (sinfo->total_bytes > num_bytes) { | ||
3533 | num_bytes = sinfo->total_bytes - num_bytes; | ||
3534 | block_rsv->reserved += num_bytes; | ||
3535 | sinfo->bytes_reserved += num_bytes; | ||
3536 | } | ||
3537 | |||
3538 | if (block_rsv->reserved >= block_rsv->size) { | ||
3539 | num_bytes = block_rsv->reserved - block_rsv->size; | ||
3540 | sinfo->bytes_reserved -= num_bytes; | ||
3541 | block_rsv->reserved = block_rsv->size; | ||
3542 | block_rsv->full = 1; | ||
3543 | } | ||
3544 | #if 0 | ||
3545 | printk(KERN_INFO"global block rsv size %llu reserved %llu\n", | ||
3546 | block_rsv->size, block_rsv->reserved); | ||
3547 | #endif | ||
3548 | spin_unlock(&sinfo->lock); | ||
3549 | spin_unlock(&block_rsv->lock); | ||
3550 | } | ||
3551 | |||
3552 | static void init_global_block_rsv(struct btrfs_fs_info *fs_info) | ||
3553 | { | ||
3554 | struct btrfs_space_info *space_info; | ||
3555 | |||
3556 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); | ||
3557 | fs_info->chunk_block_rsv.space_info = space_info; | ||
3558 | fs_info->chunk_block_rsv.priority = 10; | ||
3559 | |||
3560 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | ||
3561 | fs_info->global_block_rsv.space_info = space_info; | ||
3562 | fs_info->global_block_rsv.priority = 10; | ||
3563 | fs_info->global_block_rsv.refill_used = 1; | ||
3564 | fs_info->delalloc_block_rsv.space_info = space_info; | ||
3565 | fs_info->trans_block_rsv.space_info = space_info; | ||
3566 | fs_info->empty_block_rsv.space_info = space_info; | ||
3567 | fs_info->empty_block_rsv.priority = 10; | ||
3568 | |||
3569 | fs_info->extent_root->block_rsv = &fs_info->global_block_rsv; | ||
3570 | fs_info->csum_root->block_rsv = &fs_info->global_block_rsv; | ||
3571 | fs_info->dev_root->block_rsv = &fs_info->global_block_rsv; | ||
3572 | fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; | ||
3573 | fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; | ||
3574 | |||
3575 | btrfs_add_durable_block_rsv(fs_info, &fs_info->global_block_rsv); | ||
3576 | |||
3577 | btrfs_add_durable_block_rsv(fs_info, &fs_info->delalloc_block_rsv); | ||
3578 | |||
3579 | update_global_block_rsv(fs_info); | ||
3580 | } | ||
3581 | |||
3582 | static void release_global_block_rsv(struct btrfs_fs_info *fs_info) | ||
3583 | { | ||
3584 | block_rsv_release_bytes(&fs_info->global_block_rsv, NULL, (u64)-1); | ||
3585 | WARN_ON(fs_info->delalloc_block_rsv.size > 0); | ||
3586 | WARN_ON(fs_info->delalloc_block_rsv.reserved > 0); | ||
3587 | WARN_ON(fs_info->trans_block_rsv.size > 0); | ||
3588 | WARN_ON(fs_info->trans_block_rsv.reserved > 0); | ||
3589 | WARN_ON(fs_info->chunk_block_rsv.size > 0); | ||
3590 | WARN_ON(fs_info->chunk_block_rsv.reserved > 0); | ||
3591 | } | ||
3592 | |||
3593 | static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items) | ||
3594 | { | ||
3595 | return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * | ||
3596 | 3 * num_items; | ||
3597 | } | ||
3598 | |||
3599 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, | ||
3600 | struct btrfs_root *root, | ||
3601 | int num_items, int *retries) | ||
3602 | { | ||
3603 | u64 num_bytes; | ||
3604 | int ret; | ||
3605 | |||
3606 | if (num_items == 0 || root->fs_info->chunk_root == root) | ||
3607 | return 0; | ||
3608 | |||
3609 | num_bytes = calc_trans_metadata_size(root, num_items); | ||
3610 | ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, | ||
3611 | num_bytes, retries); | ||
3612 | if (!ret) { | ||
3613 | trans->bytes_reserved += num_bytes; | ||
3614 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
3615 | } | ||
3616 | return ret; | ||
3617 | } | ||
3618 | |||
3619 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | ||
3620 | struct btrfs_root *root) | ||
3621 | { | ||
3622 | if (!trans->bytes_reserved) | ||
3623 | return; | ||
3624 | |||
3625 | BUG_ON(trans->block_rsv != &root->fs_info->trans_block_rsv); | ||
3626 | btrfs_block_rsv_release(root, trans->block_rsv, | ||
3627 | trans->bytes_reserved); | ||
3628 | trans->bytes_reserved = 0; | ||
3629 | } | ||
3630 | |||
3631 | int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, | ||
3632 | struct inode *inode) | ||
3633 | { | ||
3634 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3635 | struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); | ||
3636 | struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; | ||
3637 | |||
3638 | /* | ||
3639 | * one for deleting orphan item, one for updating inode and | ||
3640 | * two for calling btrfs_truncate_inode_items. | ||
3641 | * | ||
3642 | * btrfs_truncate_inode_items is a delete operation, it frees | ||
3643 | * more space than it uses in most cases. So two units of | ||
3644 | * metadata space should be enough for calling it many times. | ||
3645 | * If all of the metadata space is used, we can commit | ||
3646 | * transaction and use space it freed. | ||
3647 | */ | ||
3648 | u64 num_bytes = calc_trans_metadata_size(root, 4); | ||
3649 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | ||
3650 | } | ||
3651 | |||
3652 | void btrfs_orphan_release_metadata(struct inode *inode) | ||
3653 | { | ||
3654 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3655 | u64 num_bytes = calc_trans_metadata_size(root, 4); | ||
3656 | btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); | ||
3657 | } | ||
3658 | |||
3659 | int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, | ||
3660 | struct btrfs_pending_snapshot *pending) | ||
3661 | { | ||
3662 | struct btrfs_root *root = pending->root; | ||
3663 | struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); | ||
3664 | struct btrfs_block_rsv *dst_rsv = &pending->block_rsv; | ||
3665 | /* | ||
3666 | * two for root back/forward refs, two for directory entries | ||
3667 | * and one for root of the snapshot. | ||
3668 | */ | ||
3669 | u64 num_bytes = calc_trans_metadata_size(root, 5); | ||
3670 | dst_rsv->space_info = src_rsv->space_info; | ||
3671 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | ||
3672 | } | ||
3673 | |||
3674 | static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) | ||
3675 | { | ||
3676 | return num_bytes >>= 3; | ||
3677 | } | ||
3678 | |||
3679 | int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | ||
3680 | { | ||
3681 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3682 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; | ||
3683 | u64 to_reserve; | ||
3684 | int nr_extents; | ||
3685 | int retries = 0; | ||
3686 | int ret; | ||
3687 | |||
3688 | if (btrfs_transaction_in_commit(root->fs_info)) | ||
3689 | schedule_timeout(1); | ||
3690 | |||
3691 | num_bytes = ALIGN(num_bytes, root->sectorsize); | ||
3692 | again: | ||
3693 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
3694 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; | ||
3695 | if (nr_extents > BTRFS_I(inode)->reserved_extents) { | ||
3696 | nr_extents -= BTRFS_I(inode)->reserved_extents; | ||
3697 | to_reserve = calc_trans_metadata_size(root, nr_extents); | ||
3698 | } else { | ||
3699 | nr_extents = 0; | ||
3700 | to_reserve = 0; | ||
3701 | } | ||
3702 | |||
3703 | to_reserve += calc_csum_metadata_size(inode, num_bytes); | ||
3704 | ret = reserve_metadata_bytes(block_rsv, to_reserve); | ||
3705 | if (ret) { | ||
3706 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
3707 | ret = should_retry_reserve(NULL, root, block_rsv, to_reserve, | ||
3708 | &retries); | ||
3709 | if (ret > 0) | ||
3710 | goto again; | ||
3711 | return ret; | ||
3712 | } | ||
3713 | |||
3714 | BTRFS_I(inode)->reserved_extents += nr_extents; | ||
3715 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | ||
3716 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
3717 | |||
3718 | block_rsv_add_bytes(block_rsv, to_reserve, 1); | ||
3719 | |||
3720 | if (block_rsv->size > 512 * 1024 * 1024) | ||
3721 | shrink_delalloc(NULL, root, to_reserve); | ||
3722 | |||
3723 | return 0; | ||
3724 | } | ||
3725 | |||
3726 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | ||
3727 | { | ||
3728 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
3729 | u64 to_free; | ||
3730 | int nr_extents; | ||
3731 | |||
3732 | num_bytes = ALIGN(num_bytes, root->sectorsize); | ||
3733 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); | ||
3734 | |||
3735 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
3736 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); | ||
3737 | if (nr_extents < BTRFS_I(inode)->reserved_extents) { | ||
3738 | nr_extents = BTRFS_I(inode)->reserved_extents - nr_extents; | ||
3739 | BTRFS_I(inode)->reserved_extents -= nr_extents; | ||
3740 | } else { | ||
3741 | nr_extents = 0; | ||
3742 | } | ||
3743 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
3744 | |||
3745 | to_free = calc_csum_metadata_size(inode, num_bytes); | ||
3746 | if (nr_extents > 0) | ||
3747 | to_free += calc_trans_metadata_size(root, nr_extents); | ||
3748 | |||
3749 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, | ||
3750 | to_free); | ||
3751 | } | ||
3752 | |||
3753 | int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes) | ||
3754 | { | ||
3755 | int ret; | ||
3756 | |||
3757 | ret = btrfs_check_data_free_space(inode, num_bytes); | ||
3758 | if (ret) | ||
3759 | return ret; | ||
3760 | |||
3761 | ret = btrfs_delalloc_reserve_metadata(inode, num_bytes); | ||
3762 | if (ret) { | ||
3763 | btrfs_free_reserved_data_space(inode, num_bytes); | ||
3764 | return ret; | ||
3765 | } | ||
3766 | |||
3767 | return 0; | ||
3768 | } | ||
3769 | |||
3770 | void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes) | ||
3771 | { | ||
3772 | btrfs_delalloc_release_metadata(inode, num_bytes); | ||
3773 | btrfs_free_reserved_data_space(inode, num_bytes); | ||
3774 | } | ||
3775 | |||
3464 | static int update_block_group(struct btrfs_trans_handle *trans, | 3776 | static int update_block_group(struct btrfs_trans_handle *trans, |
3465 | struct btrfs_root *root, | 3777 | struct btrfs_root *root, |
3466 | u64 bytenr, u64 num_bytes, int alloc, | 3778 | u64 bytenr, u64 num_bytes, int alloc) |
3467 | int mark_free) | ||
3468 | { | 3779 | { |
3469 | struct btrfs_block_group_cache *cache; | 3780 | struct btrfs_block_group_cache *cache; |
3470 | struct btrfs_fs_info *info = root->fs_info; | 3781 | struct btrfs_fs_info *info = root->fs_info; |
3782 | int factor; | ||
3471 | u64 total = num_bytes; | 3783 | u64 total = num_bytes; |
3472 | u64 old_val; | 3784 | u64 old_val; |
3473 | u64 byte_in_group; | 3785 | u64 byte_in_group; |
@@ -3486,6 +3798,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
3486 | cache = btrfs_lookup_block_group(info, bytenr); | 3798 | cache = btrfs_lookup_block_group(info, bytenr); |
3487 | if (!cache) | 3799 | if (!cache) |
3488 | return -1; | 3800 | return -1; |
3801 | if (cache->flags & (BTRFS_BLOCK_GROUP_DUP | | ||
3802 | BTRFS_BLOCK_GROUP_RAID1 | | ||
3803 | BTRFS_BLOCK_GROUP_RAID10)) | ||
3804 | factor = 2; | ||
3805 | else | ||
3806 | factor = 1; | ||
3489 | byte_in_group = bytenr - cache->key.objectid; | 3807 | byte_in_group = bytenr - cache->key.objectid; |
3490 | WARN_ON(byte_in_group > cache->key.offset); | 3808 | WARN_ON(byte_in_group > cache->key.offset); |
3491 | 3809 | ||
@@ -3498,31 +3816,24 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
3498 | old_val += num_bytes; | 3816 | old_val += num_bytes; |
3499 | btrfs_set_block_group_used(&cache->item, old_val); | 3817 | btrfs_set_block_group_used(&cache->item, old_val); |
3500 | cache->reserved -= num_bytes; | 3818 | cache->reserved -= num_bytes; |
3501 | cache->space_info->bytes_used += num_bytes; | ||
3502 | cache->space_info->bytes_reserved -= num_bytes; | 3819 | cache->space_info->bytes_reserved -= num_bytes; |
3503 | if (cache->ro) | 3820 | cache->space_info->bytes_used += num_bytes; |
3504 | cache->space_info->bytes_readonly -= num_bytes; | 3821 | cache->space_info->disk_used += num_bytes * factor; |
3505 | spin_unlock(&cache->lock); | 3822 | spin_unlock(&cache->lock); |
3506 | spin_unlock(&cache->space_info->lock); | 3823 | spin_unlock(&cache->space_info->lock); |
3507 | } else { | 3824 | } else { |
3508 | old_val -= num_bytes; | 3825 | old_val -= num_bytes; |
3509 | cache->space_info->bytes_used -= num_bytes; | ||
3510 | if (cache->ro) | ||
3511 | cache->space_info->bytes_readonly += num_bytes; | ||
3512 | btrfs_set_block_group_used(&cache->item, old_val); | 3826 | btrfs_set_block_group_used(&cache->item, old_val); |
3827 | cache->pinned += num_bytes; | ||
3828 | cache->space_info->bytes_pinned += num_bytes; | ||
3829 | cache->space_info->bytes_used -= num_bytes; | ||
3830 | cache->space_info->disk_used -= num_bytes * factor; | ||
3513 | spin_unlock(&cache->lock); | 3831 | spin_unlock(&cache->lock); |
3514 | spin_unlock(&cache->space_info->lock); | 3832 | spin_unlock(&cache->space_info->lock); |
3515 | if (mark_free) { | ||
3516 | int ret; | ||
3517 | 3833 | ||
3518 | ret = btrfs_discard_extent(root, bytenr, | 3834 | set_extent_dirty(info->pinned_extents, |
3519 | num_bytes); | 3835 | bytenr, bytenr + num_bytes - 1, |
3520 | WARN_ON(ret); | 3836 | GFP_NOFS | __GFP_NOFAIL); |
3521 | |||
3522 | ret = btrfs_add_free_space(cache, bytenr, | ||
3523 | num_bytes); | ||
3524 | WARN_ON(ret); | ||
3525 | } | ||
3526 | } | 3837 | } |
3527 | btrfs_put_block_group(cache); | 3838 | btrfs_put_block_group(cache); |
3528 | total -= num_bytes; | 3839 | total -= num_bytes; |
@@ -3546,18 +3857,10 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) | |||
3546 | return bytenr; | 3857 | return bytenr; |
3547 | } | 3858 | } |
3548 | 3859 | ||
3549 | /* | 3860 | static int pin_down_extent(struct btrfs_root *root, |
3550 | * this function must be called within transaction | 3861 | struct btrfs_block_group_cache *cache, |
3551 | */ | 3862 | u64 bytenr, u64 num_bytes, int reserved) |
3552 | int btrfs_pin_extent(struct btrfs_root *root, | ||
3553 | u64 bytenr, u64 num_bytes, int reserved) | ||
3554 | { | 3863 | { |
3555 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
3556 | struct btrfs_block_group_cache *cache; | ||
3557 | |||
3558 | cache = btrfs_lookup_block_group(fs_info, bytenr); | ||
3559 | BUG_ON(!cache); | ||
3560 | |||
3561 | spin_lock(&cache->space_info->lock); | 3864 | spin_lock(&cache->space_info->lock); |
3562 | spin_lock(&cache->lock); | 3865 | spin_lock(&cache->lock); |
3563 | cache->pinned += num_bytes; | 3866 | cache->pinned += num_bytes; |
@@ -3569,28 +3872,68 @@ int btrfs_pin_extent(struct btrfs_root *root, | |||
3569 | spin_unlock(&cache->lock); | 3872 | spin_unlock(&cache->lock); |
3570 | spin_unlock(&cache->space_info->lock); | 3873 | spin_unlock(&cache->space_info->lock); |
3571 | 3874 | ||
3572 | btrfs_put_block_group(cache); | 3875 | set_extent_dirty(root->fs_info->pinned_extents, bytenr, |
3876 | bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); | ||
3877 | return 0; | ||
3878 | } | ||
3879 | |||
3880 | /* | ||
3881 | * this function must be called within transaction | ||
3882 | */ | ||
3883 | int btrfs_pin_extent(struct btrfs_root *root, | ||
3884 | u64 bytenr, u64 num_bytes, int reserved) | ||
3885 | { | ||
3886 | struct btrfs_block_group_cache *cache; | ||
3887 | |||
3888 | cache = btrfs_lookup_block_group(root->fs_info, bytenr); | ||
3889 | BUG_ON(!cache); | ||
3890 | |||
3891 | pin_down_extent(root, cache, bytenr, num_bytes, reserved); | ||
3573 | 3892 | ||
3574 | set_extent_dirty(fs_info->pinned_extents, | 3893 | btrfs_put_block_group(cache); |
3575 | bytenr, bytenr + num_bytes - 1, GFP_NOFS); | ||
3576 | return 0; | 3894 | return 0; |
3577 | } | 3895 | } |
3578 | 3896 | ||
3579 | static int update_reserved_extents(struct btrfs_block_group_cache *cache, | 3897 | /* |
3580 | u64 num_bytes, int reserve) | 3898 | * update size of reserved extents. this function may return -EAGAIN |
3899 | * if 'reserve' is true or 'sinfo' is false. | ||
3900 | */ | ||
3901 | static int update_reserved_bytes(struct btrfs_block_group_cache *cache, | ||
3902 | u64 num_bytes, int reserve, int sinfo) | ||
3581 | { | 3903 | { |
3582 | spin_lock(&cache->space_info->lock); | 3904 | int ret = 0; |
3583 | spin_lock(&cache->lock); | 3905 | if (sinfo) { |
3584 | if (reserve) { | 3906 | struct btrfs_space_info *space_info = cache->space_info; |
3585 | cache->reserved += num_bytes; | 3907 | spin_lock(&space_info->lock); |
3586 | cache->space_info->bytes_reserved += num_bytes; | 3908 | spin_lock(&cache->lock); |
3909 | if (reserve) { | ||
3910 | if (cache->ro) { | ||
3911 | ret = -EAGAIN; | ||
3912 | } else { | ||
3913 | cache->reserved += num_bytes; | ||
3914 | space_info->bytes_reserved += num_bytes; | ||
3915 | } | ||
3916 | } else { | ||
3917 | if (cache->ro) | ||
3918 | space_info->bytes_readonly += num_bytes; | ||
3919 | cache->reserved -= num_bytes; | ||
3920 | space_info->bytes_reserved -= num_bytes; | ||
3921 | } | ||
3922 | spin_unlock(&cache->lock); | ||
3923 | spin_unlock(&space_info->lock); | ||
3587 | } else { | 3924 | } else { |
3588 | cache->reserved -= num_bytes; | 3925 | spin_lock(&cache->lock); |
3589 | cache->space_info->bytes_reserved -= num_bytes; | 3926 | if (cache->ro) { |
3927 | ret = -EAGAIN; | ||
3928 | } else { | ||
3929 | if (reserve) | ||
3930 | cache->reserved += num_bytes; | ||
3931 | else | ||
3932 | cache->reserved -= num_bytes; | ||
3933 | } | ||
3934 | spin_unlock(&cache->lock); | ||
3590 | } | 3935 | } |
3591 | spin_unlock(&cache->lock); | 3936 | return ret; |
3592 | spin_unlock(&cache->space_info->lock); | ||
3593 | return 0; | ||
3594 | } | 3937 | } |
3595 | 3938 | ||
3596 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | 3939 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, |
@@ -3621,6 +3964,8 @@ int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | |||
3621 | fs_info->pinned_extents = &fs_info->freed_extents[0]; | 3964 | fs_info->pinned_extents = &fs_info->freed_extents[0]; |
3622 | 3965 | ||
3623 | up_write(&fs_info->extent_commit_sem); | 3966 | up_write(&fs_info->extent_commit_sem); |
3967 | |||
3968 | update_global_block_rsv(fs_info); | ||
3624 | return 0; | 3969 | return 0; |
3625 | } | 3970 | } |
3626 | 3971 | ||
@@ -3647,14 +3992,21 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) | |||
3647 | btrfs_add_free_space(cache, start, len); | 3992 | btrfs_add_free_space(cache, start, len); |
3648 | } | 3993 | } |
3649 | 3994 | ||
3995 | start += len; | ||
3996 | |||
3650 | spin_lock(&cache->space_info->lock); | 3997 | spin_lock(&cache->space_info->lock); |
3651 | spin_lock(&cache->lock); | 3998 | spin_lock(&cache->lock); |
3652 | cache->pinned -= len; | 3999 | cache->pinned -= len; |
3653 | cache->space_info->bytes_pinned -= len; | 4000 | cache->space_info->bytes_pinned -= len; |
4001 | if (cache->ro) { | ||
4002 | cache->space_info->bytes_readonly += len; | ||
4003 | } else if (cache->reserved_pinned > 0) { | ||
4004 | len = min(len, cache->reserved_pinned); | ||
4005 | cache->reserved_pinned -= len; | ||
4006 | cache->space_info->bytes_reserved += len; | ||
4007 | } | ||
3654 | spin_unlock(&cache->lock); | 4008 | spin_unlock(&cache->lock); |
3655 | spin_unlock(&cache->space_info->lock); | 4009 | spin_unlock(&cache->space_info->lock); |
3656 | |||
3657 | start += len; | ||
3658 | } | 4010 | } |
3659 | 4011 | ||
3660 | if (cache) | 4012 | if (cache) |
@@ -3667,8 +4019,11 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
3667 | { | 4019 | { |
3668 | struct btrfs_fs_info *fs_info = root->fs_info; | 4020 | struct btrfs_fs_info *fs_info = root->fs_info; |
3669 | struct extent_io_tree *unpin; | 4021 | struct extent_io_tree *unpin; |
4022 | struct btrfs_block_rsv *block_rsv; | ||
4023 | struct btrfs_block_rsv *next_rsv; | ||
3670 | u64 start; | 4024 | u64 start; |
3671 | u64 end; | 4025 | u64 end; |
4026 | int idx; | ||
3672 | int ret; | 4027 | int ret; |
3673 | 4028 | ||
3674 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) | 4029 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) |
@@ -3689,59 +4044,30 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
3689 | cond_resched(); | 4044 | cond_resched(); |
3690 | } | 4045 | } |
3691 | 4046 | ||
3692 | return ret; | 4047 | mutex_lock(&fs_info->durable_block_rsv_mutex); |
3693 | } | 4048 | list_for_each_entry_safe(block_rsv, next_rsv, |
4049 | &fs_info->durable_block_rsv_list, list) { | ||
3694 | 4050 | ||
3695 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | 4051 | idx = trans->transid & 0x1; |
3696 | struct btrfs_root *root, | 4052 | if (block_rsv->freed[idx] > 0) { |
3697 | struct btrfs_path *path, | 4053 | block_rsv_add_bytes(block_rsv, |
3698 | u64 bytenr, u64 num_bytes, | 4054 | block_rsv->freed[idx], 0); |
3699 | int is_data, int reserved, | 4055 | block_rsv->freed[idx] = 0; |
3700 | struct extent_buffer **must_clean) | 4056 | } |
3701 | { | 4057 | if (atomic_read(&block_rsv->usage) == 0) { |
3702 | int err = 0; | 4058 | btrfs_block_rsv_release(root, block_rsv, (u64)-1); |
3703 | struct extent_buffer *buf; | ||
3704 | |||
3705 | if (is_data) | ||
3706 | goto pinit; | ||
3707 | |||
3708 | /* | ||
3709 | * discard is sloooow, and so triggering discards on | ||
3710 | * individual btree blocks isn't a good plan. Just | ||
3711 | * pin everything in discard mode. | ||
3712 | */ | ||
3713 | if (btrfs_test_opt(root, DISCARD)) | ||
3714 | goto pinit; | ||
3715 | |||
3716 | buf = btrfs_find_tree_block(root, bytenr, num_bytes); | ||
3717 | if (!buf) | ||
3718 | goto pinit; | ||
3719 | 4059 | ||
3720 | /* we can reuse a block if it hasn't been written | 4060 | if (block_rsv->freed[0] == 0 && |
3721 | * and it is from this transaction. We can't | 4061 | block_rsv->freed[1] == 0) { |
3722 | * reuse anything from the tree log root because | 4062 | list_del_init(&block_rsv->list); |
3723 | * it has tiny sub-transactions. | 4063 | kfree(block_rsv); |
3724 | */ | 4064 | } |
3725 | if (btrfs_buffer_uptodate(buf, 0) && | 4065 | } else { |
3726 | btrfs_try_tree_lock(buf)) { | 4066 | btrfs_block_rsv_release(root, block_rsv, 0); |
3727 | u64 header_owner = btrfs_header_owner(buf); | ||
3728 | u64 header_transid = btrfs_header_generation(buf); | ||
3729 | if (header_owner != BTRFS_TREE_LOG_OBJECTID && | ||
3730 | header_transid == trans->transid && | ||
3731 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | ||
3732 | *must_clean = buf; | ||
3733 | return 1; | ||
3734 | } | 4067 | } |
3735 | btrfs_tree_unlock(buf); | ||
3736 | } | 4068 | } |
3737 | free_extent_buffer(buf); | 4069 | mutex_unlock(&fs_info->durable_block_rsv_mutex); |
3738 | pinit: | ||
3739 | if (path) | ||
3740 | btrfs_set_path_blocking(path); | ||
3741 | /* unlocks the pinned mutex */ | ||
3742 | btrfs_pin_extent(root, bytenr, num_bytes, reserved); | ||
3743 | 4070 | ||
3744 | BUG_ON(err < 0); | ||
3745 | return 0; | 4071 | return 0; |
3746 | } | 4072 | } |
3747 | 4073 | ||
@@ -3902,9 +4228,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
3902 | BUG_ON(ret); | 4228 | BUG_ON(ret); |
3903 | } | 4229 | } |
3904 | } else { | 4230 | } else { |
3905 | int mark_free = 0; | ||
3906 | struct extent_buffer *must_clean = NULL; | ||
3907 | |||
3908 | if (found_extent) { | 4231 | if (found_extent) { |
3909 | BUG_ON(is_data && refs_to_drop != | 4232 | BUG_ON(is_data && refs_to_drop != |
3910 | extent_data_ref_count(root, path, iref)); | 4233 | extent_data_ref_count(root, path, iref)); |
@@ -3917,31 +4240,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
3917 | } | 4240 | } |
3918 | } | 4241 | } |
3919 | 4242 | ||
3920 | ret = pin_down_bytes(trans, root, path, bytenr, | ||
3921 | num_bytes, is_data, 0, &must_clean); | ||
3922 | if (ret > 0) | ||
3923 | mark_free = 1; | ||
3924 | BUG_ON(ret < 0); | ||
3925 | /* | ||
3926 | * it is going to be very rare for someone to be waiting | ||
3927 | * on the block we're freeing. del_items might need to | ||
3928 | * schedule, so rather than get fancy, just force it | ||
3929 | * to blocking here | ||
3930 | */ | ||
3931 | if (must_clean) | ||
3932 | btrfs_set_lock_blocking(must_clean); | ||
3933 | |||
3934 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], | 4243 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], |
3935 | num_to_del); | 4244 | num_to_del); |
3936 | BUG_ON(ret); | 4245 | BUG_ON(ret); |
3937 | btrfs_release_path(extent_root, path); | 4246 | btrfs_release_path(extent_root, path); |
3938 | 4247 | ||
3939 | if (must_clean) { | ||
3940 | clean_tree_block(NULL, root, must_clean); | ||
3941 | btrfs_tree_unlock(must_clean); | ||
3942 | free_extent_buffer(must_clean); | ||
3943 | } | ||
3944 | |||
3945 | if (is_data) { | 4248 | if (is_data) { |
3946 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); | 4249 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); |
3947 | BUG_ON(ret); | 4250 | BUG_ON(ret); |
@@ -3951,8 +4254,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
3951 | (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); | 4254 | (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); |
3952 | } | 4255 | } |
3953 | 4256 | ||
3954 | ret = update_block_group(trans, root, bytenr, num_bytes, 0, | 4257 | ret = update_block_group(trans, root, bytenr, num_bytes, 0); |
3955 | mark_free); | ||
3956 | BUG_ON(ret); | 4258 | BUG_ON(ret); |
3957 | } | 4259 | } |
3958 | btrfs_free_path(path); | 4260 | btrfs_free_path(path); |
@@ -3960,7 +4262,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
3960 | } | 4262 | } |
3961 | 4263 | ||
3962 | /* | 4264 | /* |
3963 | * when we free an extent, it is possible (and likely) that we free the last | 4265 | * when we free an block, it is possible (and likely) that we free the last |
3964 | * delayed ref for that extent as well. This searches the delayed ref tree for | 4266 | * delayed ref for that extent as well. This searches the delayed ref tree for |
3965 | * a given extent, and if there are no other delayed refs to be processed, it | 4267 | * a given extent, and if there are no other delayed refs to be processed, it |
3966 | * removes it from the tree. | 4268 | * removes it from the tree. |
@@ -3972,7 +4274,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, | |||
3972 | struct btrfs_delayed_ref_root *delayed_refs; | 4274 | struct btrfs_delayed_ref_root *delayed_refs; |
3973 | struct btrfs_delayed_ref_node *ref; | 4275 | struct btrfs_delayed_ref_node *ref; |
3974 | struct rb_node *node; | 4276 | struct rb_node *node; |
3975 | int ret; | 4277 | int ret = 0; |
3976 | 4278 | ||
3977 | delayed_refs = &trans->transaction->delayed_refs; | 4279 | delayed_refs = &trans->transaction->delayed_refs; |
3978 | spin_lock(&delayed_refs->lock); | 4280 | spin_lock(&delayed_refs->lock); |
@@ -4024,17 +4326,99 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, | |||
4024 | list_del_init(&head->cluster); | 4326 | list_del_init(&head->cluster); |
4025 | spin_unlock(&delayed_refs->lock); | 4327 | spin_unlock(&delayed_refs->lock); |
4026 | 4328 | ||
4027 | ret = run_one_delayed_ref(trans, root->fs_info->tree_root, | 4329 | BUG_ON(head->extent_op); |
4028 | &head->node, head->extent_op, | 4330 | if (head->must_insert_reserved) |
4029 | head->must_insert_reserved); | 4331 | ret = 1; |
4030 | BUG_ON(ret); | 4332 | |
4333 | mutex_unlock(&head->mutex); | ||
4031 | btrfs_put_delayed_ref(&head->node); | 4334 | btrfs_put_delayed_ref(&head->node); |
4032 | return 0; | 4335 | return ret; |
4033 | out: | 4336 | out: |
4034 | spin_unlock(&delayed_refs->lock); | 4337 | spin_unlock(&delayed_refs->lock); |
4035 | return 0; | 4338 | return 0; |
4036 | } | 4339 | } |
4037 | 4340 | ||
4341 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | ||
4342 | struct btrfs_root *root, | ||
4343 | struct extent_buffer *buf, | ||
4344 | u64 parent, int last_ref) | ||
4345 | { | ||
4346 | struct btrfs_block_rsv *block_rsv; | ||
4347 | struct btrfs_block_group_cache *cache = NULL; | ||
4348 | int ret; | ||
4349 | |||
4350 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
4351 | ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len, | ||
4352 | parent, root->root_key.objectid, | ||
4353 | btrfs_header_level(buf), | ||
4354 | BTRFS_DROP_DELAYED_REF, NULL); | ||
4355 | BUG_ON(ret); | ||
4356 | } | ||
4357 | |||
4358 | if (!last_ref) | ||
4359 | return; | ||
4360 | |||
4361 | block_rsv = get_block_rsv(trans, root); | ||
4362 | cache = btrfs_lookup_block_group(root->fs_info, buf->start); | ||
4363 | BUG_ON(block_rsv->space_info != cache->space_info); | ||
4364 | |||
4365 | if (btrfs_header_generation(buf) == trans->transid) { | ||
4366 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
4367 | ret = check_ref_cleanup(trans, root, buf->start); | ||
4368 | if (!ret) | ||
4369 | goto pin; | ||
4370 | } | ||
4371 | |||
4372 | if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | ||
4373 | pin_down_extent(root, cache, buf->start, buf->len, 1); | ||
4374 | goto pin; | ||
4375 | } | ||
4376 | |||
4377 | WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); | ||
4378 | |||
4379 | btrfs_add_free_space(cache, buf->start, buf->len); | ||
4380 | ret = update_reserved_bytes(cache, buf->len, 0, 0); | ||
4381 | if (ret == -EAGAIN) { | ||
4382 | /* block group became read-only */ | ||
4383 | update_reserved_bytes(cache, buf->len, 0, 1); | ||
4384 | goto out; | ||
4385 | } | ||
4386 | |||
4387 | ret = 1; | ||
4388 | spin_lock(&block_rsv->lock); | ||
4389 | if (block_rsv->reserved < block_rsv->size) { | ||
4390 | block_rsv->reserved += buf->len; | ||
4391 | ret = 0; | ||
4392 | } | ||
4393 | spin_unlock(&block_rsv->lock); | ||
4394 | |||
4395 | if (ret) { | ||
4396 | spin_lock(&cache->space_info->lock); | ||
4397 | cache->space_info->bytes_reserved -= buf->len; | ||
4398 | spin_unlock(&cache->space_info->lock); | ||
4399 | } | ||
4400 | goto out; | ||
4401 | } | ||
4402 | pin: | ||
4403 | if (block_rsv->durable && !cache->ro) { | ||
4404 | ret = 0; | ||
4405 | spin_lock(&cache->lock); | ||
4406 | if (!cache->ro) { | ||
4407 | cache->reserved_pinned += buf->len; | ||
4408 | ret = 1; | ||
4409 | } | ||
4410 | spin_unlock(&cache->lock); | ||
4411 | |||
4412 | if (ret) { | ||
4413 | spin_lock(&block_rsv->lock); | ||
4414 | block_rsv->freed[trans->transid & 0x1] += buf->len; | ||
4415 | spin_unlock(&block_rsv->lock); | ||
4416 | } | ||
4417 | } | ||
4418 | out: | ||
4419 | btrfs_put_block_group(cache); | ||
4420 | } | ||
4421 | |||
4038 | int btrfs_free_extent(struct btrfs_trans_handle *trans, | 4422 | int btrfs_free_extent(struct btrfs_trans_handle *trans, |
4039 | struct btrfs_root *root, | 4423 | struct btrfs_root *root, |
4040 | u64 bytenr, u64 num_bytes, u64 parent, | 4424 | u64 bytenr, u64 num_bytes, u64 parent, |
@@ -4056,8 +4440,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
4056 | parent, root_objectid, (int)owner, | 4440 | parent, root_objectid, (int)owner, |
4057 | BTRFS_DROP_DELAYED_REF, NULL); | 4441 | BTRFS_DROP_DELAYED_REF, NULL); |
4058 | BUG_ON(ret); | 4442 | BUG_ON(ret); |
4059 | ret = check_ref_cleanup(trans, root, bytenr); | ||
4060 | BUG_ON(ret); | ||
4061 | } else { | 4443 | } else { |
4062 | ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, | 4444 | ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, |
4063 | parent, root_objectid, owner, | 4445 | parent, root_objectid, owner, |
@@ -4067,21 +4449,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
4067 | return ret; | 4449 | return ret; |
4068 | } | 4450 | } |
4069 | 4451 | ||
4070 | int btrfs_free_tree_block(struct btrfs_trans_handle *trans, | ||
4071 | struct btrfs_root *root, | ||
4072 | u64 bytenr, u32 blocksize, | ||
4073 | u64 parent, u64 root_objectid, int level) | ||
4074 | { | ||
4075 | u64 used; | ||
4076 | spin_lock(&root->node_lock); | ||
4077 | used = btrfs_root_used(&root->root_item) - blocksize; | ||
4078 | btrfs_set_root_used(&root->root_item, used); | ||
4079 | spin_unlock(&root->node_lock); | ||
4080 | |||
4081 | return btrfs_free_extent(trans, root, bytenr, blocksize, | ||
4082 | parent, root_objectid, level, 0); | ||
4083 | } | ||
4084 | |||
4085 | static u64 stripe_align(struct btrfs_root *root, u64 val) | 4452 | static u64 stripe_align(struct btrfs_root *root, u64 val) |
4086 | { | 4453 | { |
4087 | u64 mask = ((u64)root->stripesize - 1); | 4454 | u64 mask = ((u64)root->stripesize - 1); |
@@ -4134,6 +4501,22 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache) | |||
4134 | return 0; | 4501 | return 0; |
4135 | } | 4502 | } |
4136 | 4503 | ||
4504 | static int get_block_group_index(struct btrfs_block_group_cache *cache) | ||
4505 | { | ||
4506 | int index; | ||
4507 | if (cache->flags & BTRFS_BLOCK_GROUP_RAID10) | ||
4508 | index = 0; | ||
4509 | else if (cache->flags & BTRFS_BLOCK_GROUP_RAID1) | ||
4510 | index = 1; | ||
4511 | else if (cache->flags & BTRFS_BLOCK_GROUP_DUP) | ||
4512 | index = 2; | ||
4513 | else if (cache->flags & BTRFS_BLOCK_GROUP_RAID0) | ||
4514 | index = 3; | ||
4515 | else | ||
4516 | index = 4; | ||
4517 | return index; | ||
4518 | } | ||
4519 | |||
4137 | enum btrfs_loop_type { | 4520 | enum btrfs_loop_type { |
4138 | LOOP_FIND_IDEAL = 0, | 4521 | LOOP_FIND_IDEAL = 0, |
4139 | LOOP_CACHING_NOWAIT = 1, | 4522 | LOOP_CACHING_NOWAIT = 1, |
@@ -4155,7 +4538,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
4155 | u64 num_bytes, u64 empty_size, | 4538 | u64 num_bytes, u64 empty_size, |
4156 | u64 search_start, u64 search_end, | 4539 | u64 search_start, u64 search_end, |
4157 | u64 hint_byte, struct btrfs_key *ins, | 4540 | u64 hint_byte, struct btrfs_key *ins, |
4158 | u64 exclude_start, u64 exclude_nr, | ||
4159 | int data) | 4541 | int data) |
4160 | { | 4542 | { |
4161 | int ret = 0; | 4543 | int ret = 0; |
@@ -4168,6 +4550,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
4168 | struct btrfs_space_info *space_info; | 4550 | struct btrfs_space_info *space_info; |
4169 | int last_ptr_loop = 0; | 4551 | int last_ptr_loop = 0; |
4170 | int loop = 0; | 4552 | int loop = 0; |
4553 | int index = 0; | ||
4171 | bool found_uncached_bg = false; | 4554 | bool found_uncached_bg = false; |
4172 | bool failed_cluster_refill = false; | 4555 | bool failed_cluster_refill = false; |
4173 | bool failed_alloc = false; | 4556 | bool failed_alloc = false; |
@@ -4237,6 +4620,7 @@ ideal_cache: | |||
4237 | btrfs_put_block_group(block_group); | 4620 | btrfs_put_block_group(block_group); |
4238 | up_read(&space_info->groups_sem); | 4621 | up_read(&space_info->groups_sem); |
4239 | } else { | 4622 | } else { |
4623 | index = get_block_group_index(block_group); | ||
4240 | goto have_block_group; | 4624 | goto have_block_group; |
4241 | } | 4625 | } |
4242 | } else if (block_group) { | 4626 | } else if (block_group) { |
@@ -4245,7 +4629,8 @@ ideal_cache: | |||
4245 | } | 4629 | } |
4246 | search: | 4630 | search: |
4247 | down_read(&space_info->groups_sem); | 4631 | down_read(&space_info->groups_sem); |
4248 | list_for_each_entry(block_group, &space_info->block_groups, list) { | 4632 | list_for_each_entry(block_group, &space_info->block_groups[index], |
4633 | list) { | ||
4249 | u64 offset; | 4634 | u64 offset; |
4250 | int cached; | 4635 | int cached; |
4251 | 4636 | ||
@@ -4436,23 +4821,22 @@ checks: | |||
4436 | goto loop; | 4821 | goto loop; |
4437 | } | 4822 | } |
4438 | 4823 | ||
4439 | if (exclude_nr > 0 && | 4824 | ins->objectid = search_start; |
4440 | (search_start + num_bytes > exclude_start && | 4825 | ins->offset = num_bytes; |
4441 | search_start < exclude_start + exclude_nr)) { | 4826 | |
4442 | search_start = exclude_start + exclude_nr; | 4827 | if (offset < search_start) |
4828 | btrfs_add_free_space(block_group, offset, | ||
4829 | search_start - offset); | ||
4830 | BUG_ON(offset > search_start); | ||
4443 | 4831 | ||
4832 | ret = update_reserved_bytes(block_group, num_bytes, 1, | ||
4833 | (data & BTRFS_BLOCK_GROUP_DATA)); | ||
4834 | if (ret == -EAGAIN) { | ||
4444 | btrfs_add_free_space(block_group, offset, num_bytes); | 4835 | btrfs_add_free_space(block_group, offset, num_bytes); |
4445 | /* | ||
4446 | * if search_start is still in this block group | ||
4447 | * then we just re-search this block group | ||
4448 | */ | ||
4449 | if (search_start >= block_group->key.objectid && | ||
4450 | search_start < (block_group->key.objectid + | ||
4451 | block_group->key.offset)) | ||
4452 | goto have_block_group; | ||
4453 | goto loop; | 4836 | goto loop; |
4454 | } | 4837 | } |
4455 | 4838 | ||
4839 | /* we are all good, lets return */ | ||
4456 | ins->objectid = search_start; | 4840 | ins->objectid = search_start; |
4457 | ins->offset = num_bytes; | 4841 | ins->offset = num_bytes; |
4458 | 4842 | ||
@@ -4460,18 +4844,18 @@ checks: | |||
4460 | btrfs_add_free_space(block_group, offset, | 4844 | btrfs_add_free_space(block_group, offset, |
4461 | search_start - offset); | 4845 | search_start - offset); |
4462 | BUG_ON(offset > search_start); | 4846 | BUG_ON(offset > search_start); |
4463 | |||
4464 | update_reserved_extents(block_group, num_bytes, 1); | ||
4465 | |||
4466 | /* we are all good, lets return */ | ||
4467 | break; | 4847 | break; |
4468 | loop: | 4848 | loop: |
4469 | failed_cluster_refill = false; | 4849 | failed_cluster_refill = false; |
4470 | failed_alloc = false; | 4850 | failed_alloc = false; |
4851 | BUG_ON(index != get_block_group_index(block_group)); | ||
4471 | btrfs_put_block_group(block_group); | 4852 | btrfs_put_block_group(block_group); |
4472 | } | 4853 | } |
4473 | up_read(&space_info->groups_sem); | 4854 | up_read(&space_info->groups_sem); |
4474 | 4855 | ||
4856 | if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES) | ||
4857 | goto search; | ||
4858 | |||
4475 | /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for | 4859 | /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for |
4476 | * for them to make caching progress. Also | 4860 | * for them to make caching progress. Also |
4477 | * determine the best possible bg to cache | 4861 | * determine the best possible bg to cache |
@@ -4485,6 +4869,7 @@ loop: | |||
4485 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && | 4869 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && |
4486 | (found_uncached_bg || empty_size || empty_cluster || | 4870 | (found_uncached_bg || empty_size || empty_cluster || |
4487 | allowed_chunk_alloc)) { | 4871 | allowed_chunk_alloc)) { |
4872 | index = 0; | ||
4488 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { | 4873 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { |
4489 | found_uncached_bg = false; | 4874 | found_uncached_bg = false; |
4490 | loop++; | 4875 | loop++; |
@@ -4567,31 +4952,30 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | |||
4567 | int dump_block_groups) | 4952 | int dump_block_groups) |
4568 | { | 4953 | { |
4569 | struct btrfs_block_group_cache *cache; | 4954 | struct btrfs_block_group_cache *cache; |
4955 | int index = 0; | ||
4570 | 4956 | ||
4571 | spin_lock(&info->lock); | 4957 | spin_lock(&info->lock); |
4572 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", | 4958 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", |
4573 | (unsigned long long)(info->total_bytes - info->bytes_used - | 4959 | (unsigned long long)(info->total_bytes - info->bytes_used - |
4574 | info->bytes_pinned - info->bytes_reserved - | 4960 | info->bytes_pinned - info->bytes_reserved - |
4575 | info->bytes_super), | 4961 | info->bytes_readonly), |
4576 | (info->full) ? "" : "not "); | 4962 | (info->full) ? "" : "not "); |
4577 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," | 4963 | printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, " |
4578 | " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu" | 4964 | "reserved=%llu, may_use=%llu, readonly=%llu\n", |
4579 | "\n", | ||
4580 | (unsigned long long)info->total_bytes, | 4965 | (unsigned long long)info->total_bytes, |
4966 | (unsigned long long)info->bytes_used, | ||
4581 | (unsigned long long)info->bytes_pinned, | 4967 | (unsigned long long)info->bytes_pinned, |
4582 | (unsigned long long)info->bytes_delalloc, | 4968 | (unsigned long long)info->bytes_reserved, |
4583 | (unsigned long long)info->bytes_may_use, | 4969 | (unsigned long long)info->bytes_may_use, |
4584 | (unsigned long long)info->bytes_used, | 4970 | (unsigned long long)info->bytes_readonly); |
4585 | (unsigned long long)info->bytes_root, | ||
4586 | (unsigned long long)info->bytes_super, | ||
4587 | (unsigned long long)info->bytes_reserved); | ||
4588 | spin_unlock(&info->lock); | 4971 | spin_unlock(&info->lock); |
4589 | 4972 | ||
4590 | if (!dump_block_groups) | 4973 | if (!dump_block_groups) |
4591 | return; | 4974 | return; |
4592 | 4975 | ||
4593 | down_read(&info->groups_sem); | 4976 | down_read(&info->groups_sem); |
4594 | list_for_each_entry(cache, &info->block_groups, list) { | 4977 | again: |
4978 | list_for_each_entry(cache, &info->block_groups[index], list) { | ||
4595 | spin_lock(&cache->lock); | 4979 | spin_lock(&cache->lock); |
4596 | printk(KERN_INFO "block group %llu has %llu bytes, %llu used " | 4980 | printk(KERN_INFO "block group %llu has %llu bytes, %llu used " |
4597 | "%llu pinned %llu reserved\n", | 4981 | "%llu pinned %llu reserved\n", |
@@ -4603,6 +4987,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | |||
4603 | btrfs_dump_free_space(cache, bytes); | 4987 | btrfs_dump_free_space(cache, bytes); |
4604 | spin_unlock(&cache->lock); | 4988 | spin_unlock(&cache->lock); |
4605 | } | 4989 | } |
4990 | if (++index < BTRFS_NR_RAID_TYPES) | ||
4991 | goto again; | ||
4606 | up_read(&info->groups_sem); | 4992 | up_read(&info->groups_sem); |
4607 | } | 4993 | } |
4608 | 4994 | ||
@@ -4628,9 +5014,8 @@ again: | |||
4628 | 5014 | ||
4629 | WARN_ON(num_bytes < root->sectorsize); | 5015 | WARN_ON(num_bytes < root->sectorsize); |
4630 | ret = find_free_extent(trans, root, num_bytes, empty_size, | 5016 | ret = find_free_extent(trans, root, num_bytes, empty_size, |
4631 | search_start, search_end, hint_byte, ins, | 5017 | search_start, search_end, hint_byte, |
4632 | trans->alloc_exclude_start, | 5018 | ins, data); |
4633 | trans->alloc_exclude_nr, data); | ||
4634 | 5019 | ||
4635 | if (ret == -ENOSPC && num_bytes > min_alloc_size) { | 5020 | if (ret == -ENOSPC && num_bytes > min_alloc_size) { |
4636 | num_bytes = num_bytes >> 1; | 5021 | num_bytes = num_bytes >> 1; |
@@ -4668,7 +5053,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | |||
4668 | ret = btrfs_discard_extent(root, start, len); | 5053 | ret = btrfs_discard_extent(root, start, len); |
4669 | 5054 | ||
4670 | btrfs_add_free_space(cache, start, len); | 5055 | btrfs_add_free_space(cache, start, len); |
4671 | update_reserved_extents(cache, len, 0); | 5056 | update_reserved_bytes(cache, len, 0, 1); |
4672 | btrfs_put_block_group(cache); | 5057 | btrfs_put_block_group(cache); |
4673 | 5058 | ||
4674 | return ret; | 5059 | return ret; |
@@ -4731,8 +5116,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
4731 | btrfs_mark_buffer_dirty(path->nodes[0]); | 5116 | btrfs_mark_buffer_dirty(path->nodes[0]); |
4732 | btrfs_free_path(path); | 5117 | btrfs_free_path(path); |
4733 | 5118 | ||
4734 | ret = update_block_group(trans, root, ins->objectid, ins->offset, | 5119 | ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); |
4735 | 1, 0); | ||
4736 | if (ret) { | 5120 | if (ret) { |
4737 | printk(KERN_ERR "btrfs update block group failed for %llu " | 5121 | printk(KERN_ERR "btrfs update block group failed for %llu " |
4738 | "%llu\n", (unsigned long long)ins->objectid, | 5122 | "%llu\n", (unsigned long long)ins->objectid, |
@@ -4792,8 +5176,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
4792 | btrfs_mark_buffer_dirty(leaf); | 5176 | btrfs_mark_buffer_dirty(leaf); |
4793 | btrfs_free_path(path); | 5177 | btrfs_free_path(path); |
4794 | 5178 | ||
4795 | ret = update_block_group(trans, root, ins->objectid, ins->offset, | 5179 | ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); |
4796 | 1, 0); | ||
4797 | if (ret) { | 5180 | if (ret) { |
4798 | printk(KERN_ERR "btrfs update block group failed for %llu " | 5181 | printk(KERN_ERR "btrfs update block group failed for %llu " |
4799 | "%llu\n", (unsigned long long)ins->objectid, | 5182 | "%llu\n", (unsigned long long)ins->objectid, |
@@ -4869,73 +5252,14 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
4869 | put_caching_control(caching_ctl); | 5252 | put_caching_control(caching_ctl); |
4870 | } | 5253 | } |
4871 | 5254 | ||
4872 | update_reserved_extents(block_group, ins->offset, 1); | 5255 | ret = update_reserved_bytes(block_group, ins->offset, 1, 1); |
5256 | BUG_ON(ret); | ||
4873 | btrfs_put_block_group(block_group); | 5257 | btrfs_put_block_group(block_group); |
4874 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, | 5258 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, |
4875 | 0, owner, offset, ins, 1); | 5259 | 0, owner, offset, ins, 1); |
4876 | return ret; | 5260 | return ret; |
4877 | } | 5261 | } |
4878 | 5262 | ||
4879 | /* | ||
4880 | * finds a free extent and does all the dirty work required for allocation | ||
4881 | * returns the key for the extent through ins, and a tree buffer for | ||
4882 | * the first block of the extent through buf. | ||
4883 | * | ||
4884 | * returns 0 if everything worked, non-zero otherwise. | ||
4885 | */ | ||
4886 | static int alloc_tree_block(struct btrfs_trans_handle *trans, | ||
4887 | struct btrfs_root *root, | ||
4888 | u64 num_bytes, u64 parent, u64 root_objectid, | ||
4889 | struct btrfs_disk_key *key, int level, | ||
4890 | u64 empty_size, u64 hint_byte, u64 search_end, | ||
4891 | struct btrfs_key *ins) | ||
4892 | { | ||
4893 | int ret; | ||
4894 | u64 flags = 0; | ||
4895 | |||
4896 | ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes, | ||
4897 | empty_size, hint_byte, search_end, | ||
4898 | ins, 0); | ||
4899 | if (ret) | ||
4900 | return ret; | ||
4901 | |||
4902 | if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { | ||
4903 | if (parent == 0) | ||
4904 | parent = ins->objectid; | ||
4905 | flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
4906 | } else | ||
4907 | BUG_ON(parent > 0); | ||
4908 | |||
4909 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
4910 | struct btrfs_delayed_extent_op *extent_op; | ||
4911 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | ||
4912 | BUG_ON(!extent_op); | ||
4913 | if (key) | ||
4914 | memcpy(&extent_op->key, key, sizeof(extent_op->key)); | ||
4915 | else | ||
4916 | memset(&extent_op->key, 0, sizeof(extent_op->key)); | ||
4917 | extent_op->flags_to_set = flags; | ||
4918 | extent_op->update_key = 1; | ||
4919 | extent_op->update_flags = 1; | ||
4920 | extent_op->is_data = 0; | ||
4921 | |||
4922 | ret = btrfs_add_delayed_tree_ref(trans, ins->objectid, | ||
4923 | ins->offset, parent, root_objectid, | ||
4924 | level, BTRFS_ADD_DELAYED_EXTENT, | ||
4925 | extent_op); | ||
4926 | BUG_ON(ret); | ||
4927 | } | ||
4928 | |||
4929 | if (root_objectid == root->root_key.objectid) { | ||
4930 | u64 used; | ||
4931 | spin_lock(&root->node_lock); | ||
4932 | used = btrfs_root_used(&root->root_item) + num_bytes; | ||
4933 | btrfs_set_root_used(&root->root_item, used); | ||
4934 | spin_unlock(&root->node_lock); | ||
4935 | } | ||
4936 | return ret; | ||
4937 | } | ||
4938 | |||
4939 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | 5263 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, |
4940 | struct btrfs_root *root, | 5264 | struct btrfs_root *root, |
4941 | u64 bytenr, u32 blocksize, | 5265 | u64 bytenr, u32 blocksize, |
@@ -4974,8 +5298,45 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | |||
4974 | return buf; | 5298 | return buf; |
4975 | } | 5299 | } |
4976 | 5300 | ||
5301 | static struct btrfs_block_rsv * | ||
5302 | use_block_rsv(struct btrfs_trans_handle *trans, | ||
5303 | struct btrfs_root *root, u32 blocksize) | ||
5304 | { | ||
5305 | struct btrfs_block_rsv *block_rsv; | ||
5306 | int ret; | ||
5307 | |||
5308 | block_rsv = get_block_rsv(trans, root); | ||
5309 | |||
5310 | if (block_rsv->size == 0) { | ||
5311 | ret = reserve_metadata_bytes(block_rsv, blocksize); | ||
5312 | if (ret) | ||
5313 | return ERR_PTR(ret); | ||
5314 | return block_rsv; | ||
5315 | } | ||
5316 | |||
5317 | ret = block_rsv_use_bytes(block_rsv, blocksize); | ||
5318 | if (!ret) | ||
5319 | return block_rsv; | ||
5320 | |||
5321 | WARN_ON(1); | ||
5322 | printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n", | ||
5323 | block_rsv->size, block_rsv->reserved, | ||
5324 | block_rsv->freed[0], block_rsv->freed[1]); | ||
5325 | |||
5326 | return ERR_PTR(-ENOSPC); | ||
5327 | } | ||
5328 | |||
5329 | static void unuse_block_rsv(struct btrfs_block_rsv *block_rsv, u32 blocksize) | ||
5330 | { | ||
5331 | block_rsv_add_bytes(block_rsv, blocksize, 0); | ||
5332 | block_rsv_release_bytes(block_rsv, NULL, 0); | ||
5333 | } | ||
5334 | |||
4977 | /* | 5335 | /* |
4978 | * helper function to allocate a block for a given tree | 5336 | * finds a free extent and does all the dirty work required for allocation |
5337 | * returns the key for the extent through ins, and a tree buffer for | ||
5338 | * the first block of the extent through buf. | ||
5339 | * | ||
4979 | * returns the tree buffer or NULL. | 5340 | * returns the tree buffer or NULL. |
4980 | */ | 5341 | */ |
4981 | struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | 5342 | struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, |
@@ -4985,18 +5346,53 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
4985 | u64 hint, u64 empty_size) | 5346 | u64 hint, u64 empty_size) |
4986 | { | 5347 | { |
4987 | struct btrfs_key ins; | 5348 | struct btrfs_key ins; |
4988 | int ret; | 5349 | struct btrfs_block_rsv *block_rsv; |
4989 | struct extent_buffer *buf; | 5350 | struct extent_buffer *buf; |
5351 | u64 flags = 0; | ||
5352 | int ret; | ||
5353 | |||
4990 | 5354 | ||
4991 | ret = alloc_tree_block(trans, root, blocksize, parent, root_objectid, | 5355 | block_rsv = use_block_rsv(trans, root, blocksize); |
4992 | key, level, empty_size, hint, (u64)-1, &ins); | 5356 | if (IS_ERR(block_rsv)) |
5357 | return ERR_CAST(block_rsv); | ||
5358 | |||
5359 | ret = btrfs_reserve_extent(trans, root, blocksize, blocksize, | ||
5360 | empty_size, hint, (u64)-1, &ins, 0); | ||
4993 | if (ret) { | 5361 | if (ret) { |
4994 | BUG_ON(ret > 0); | 5362 | unuse_block_rsv(block_rsv, blocksize); |
4995 | return ERR_PTR(ret); | 5363 | return ERR_PTR(ret); |
4996 | } | 5364 | } |
4997 | 5365 | ||
4998 | buf = btrfs_init_new_buffer(trans, root, ins.objectid, | 5366 | buf = btrfs_init_new_buffer(trans, root, ins.objectid, |
4999 | blocksize, level); | 5367 | blocksize, level); |
5368 | BUG_ON(IS_ERR(buf)); | ||
5369 | |||
5370 | if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { | ||
5371 | if (parent == 0) | ||
5372 | parent = ins.objectid; | ||
5373 | flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
5374 | } else | ||
5375 | BUG_ON(parent > 0); | ||
5376 | |||
5377 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
5378 | struct btrfs_delayed_extent_op *extent_op; | ||
5379 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | ||
5380 | BUG_ON(!extent_op); | ||
5381 | if (key) | ||
5382 | memcpy(&extent_op->key, key, sizeof(extent_op->key)); | ||
5383 | else | ||
5384 | memset(&extent_op->key, 0, sizeof(extent_op->key)); | ||
5385 | extent_op->flags_to_set = flags; | ||
5386 | extent_op->update_key = 1; | ||
5387 | extent_op->update_flags = 1; | ||
5388 | extent_op->is_data = 0; | ||
5389 | |||
5390 | ret = btrfs_add_delayed_tree_ref(trans, ins.objectid, | ||
5391 | ins.offset, parent, root_objectid, | ||
5392 | level, BTRFS_ADD_DELAYED_EXTENT, | ||
5393 | extent_op); | ||
5394 | BUG_ON(ret); | ||
5395 | } | ||
5000 | return buf; | 5396 | return buf; |
5001 | } | 5397 | } |
5002 | 5398 | ||
@@ -5321,7 +5717,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
5321 | struct btrfs_path *path, | 5717 | struct btrfs_path *path, |
5322 | struct walk_control *wc) | 5718 | struct walk_control *wc) |
5323 | { | 5719 | { |
5324 | int ret = 0; | 5720 | int ret; |
5325 | int level = wc->level; | 5721 | int level = wc->level; |
5326 | struct extent_buffer *eb = path->nodes[level]; | 5722 | struct extent_buffer *eb = path->nodes[level]; |
5327 | u64 parent = 0; | 5723 | u64 parent = 0; |
@@ -5399,13 +5795,11 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
5399 | btrfs_header_owner(path->nodes[level + 1])); | 5795 | btrfs_header_owner(path->nodes[level + 1])); |
5400 | } | 5796 | } |
5401 | 5797 | ||
5402 | ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent, | 5798 | btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); |
5403 | root->root_key.objectid, level, 0); | ||
5404 | BUG_ON(ret); | ||
5405 | out: | 5799 | out: |
5406 | wc->refs[level] = 0; | 5800 | wc->refs[level] = 0; |
5407 | wc->flags[level] = 0; | 5801 | wc->flags[level] = 0; |
5408 | return ret; | 5802 | return 0; |
5409 | } | 5803 | } |
5410 | 5804 | ||
5411 | static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | 5805 | static noinline int walk_down_tree(struct btrfs_trans_handle *trans, |
@@ -5483,7 +5877,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, | |||
5483 | * also make sure backrefs for the shared block and all lower level | 5877 | * also make sure backrefs for the shared block and all lower level |
5484 | * blocks are properly updated. | 5878 | * blocks are properly updated. |
5485 | */ | 5879 | */ |
5486 | int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | 5880 | int btrfs_drop_snapshot(struct btrfs_root *root, |
5881 | struct btrfs_block_rsv *block_rsv, int update_ref) | ||
5487 | { | 5882 | { |
5488 | struct btrfs_path *path; | 5883 | struct btrfs_path *path; |
5489 | struct btrfs_trans_handle *trans; | 5884 | struct btrfs_trans_handle *trans; |
@@ -5501,7 +5896,9 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
5501 | wc = kzalloc(sizeof(*wc), GFP_NOFS); | 5896 | wc = kzalloc(sizeof(*wc), GFP_NOFS); |
5502 | BUG_ON(!wc); | 5897 | BUG_ON(!wc); |
5503 | 5898 | ||
5504 | trans = btrfs_start_transaction(tree_root, 1); | 5899 | trans = btrfs_start_transaction(tree_root, 0); |
5900 | if (block_rsv) | ||
5901 | trans->block_rsv = block_rsv; | ||
5505 | 5902 | ||
5506 | if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { | 5903 | if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { |
5507 | level = btrfs_header_level(root->node); | 5904 | level = btrfs_header_level(root->node); |
@@ -5589,22 +5986,16 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
5589 | } | 5986 | } |
5590 | 5987 | ||
5591 | BUG_ON(wc->level == 0); | 5988 | BUG_ON(wc->level == 0); |
5592 | if (trans->transaction->in_commit || | 5989 | if (btrfs_should_end_transaction(trans, tree_root)) { |
5593 | trans->transaction->delayed_refs.flushing) { | ||
5594 | ret = btrfs_update_root(trans, tree_root, | 5990 | ret = btrfs_update_root(trans, tree_root, |
5595 | &root->root_key, | 5991 | &root->root_key, |
5596 | root_item); | 5992 | root_item); |
5597 | BUG_ON(ret); | 5993 | BUG_ON(ret); |
5598 | 5994 | ||
5599 | btrfs_end_transaction(trans, tree_root); | 5995 | btrfs_end_transaction_throttle(trans, tree_root); |
5600 | trans = btrfs_start_transaction(tree_root, 1); | 5996 | trans = btrfs_start_transaction(tree_root, 0); |
5601 | } else { | 5997 | if (block_rsv) |
5602 | unsigned long update; | 5998 | trans->block_rsv = block_rsv; |
5603 | update = trans->delayed_ref_updates; | ||
5604 | trans->delayed_ref_updates = 0; | ||
5605 | if (update) | ||
5606 | btrfs_run_delayed_refs(trans, tree_root, | ||
5607 | update); | ||
5608 | } | 5999 | } |
5609 | } | 6000 | } |
5610 | btrfs_release_path(root, path); | 6001 | btrfs_release_path(root, path); |
@@ -5632,7 +6023,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
5632 | kfree(root); | 6023 | kfree(root); |
5633 | } | 6024 | } |
5634 | out: | 6025 | out: |
5635 | btrfs_end_transaction(trans, tree_root); | 6026 | btrfs_end_transaction_throttle(trans, tree_root); |
5636 | kfree(wc); | 6027 | kfree(wc); |
5637 | btrfs_free_path(path); | 6028 | btrfs_free_path(path); |
5638 | return err; | 6029 | return err; |
@@ -7228,48 +7619,80 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) | |||
7228 | return flags; | 7619 | return flags; |
7229 | } | 7620 | } |
7230 | 7621 | ||
7231 | static int __alloc_chunk_for_shrink(struct btrfs_root *root, | 7622 | static int set_block_group_ro(struct btrfs_block_group_cache *cache) |
7232 | struct btrfs_block_group_cache *shrink_block_group, | ||
7233 | int force) | ||
7234 | { | 7623 | { |
7235 | struct btrfs_trans_handle *trans; | 7624 | struct btrfs_space_info *sinfo = cache->space_info; |
7236 | u64 new_alloc_flags; | 7625 | u64 num_bytes; |
7237 | u64 calc; | 7626 | int ret = -ENOSPC; |
7238 | 7627 | ||
7239 | spin_lock(&shrink_block_group->lock); | 7628 | if (cache->ro) |
7240 | if (btrfs_block_group_used(&shrink_block_group->item) + | 7629 | return 0; |
7241 | shrink_block_group->reserved > 0) { | ||
7242 | spin_unlock(&shrink_block_group->lock); | ||
7243 | 7630 | ||
7244 | trans = btrfs_start_transaction(root, 1); | 7631 | spin_lock(&sinfo->lock); |
7245 | spin_lock(&shrink_block_group->lock); | 7632 | spin_lock(&cache->lock); |
7633 | num_bytes = cache->key.offset - cache->reserved - cache->pinned - | ||
7634 | cache->bytes_super - btrfs_block_group_used(&cache->item); | ||
7635 | |||
7636 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + | ||
7637 | sinfo->bytes_may_use + sinfo->bytes_readonly + | ||
7638 | cache->reserved_pinned + num_bytes < sinfo->total_bytes) { | ||
7639 | sinfo->bytes_readonly += num_bytes; | ||
7640 | sinfo->bytes_reserved += cache->reserved_pinned; | ||
7641 | cache->reserved_pinned = 0; | ||
7642 | cache->ro = 1; | ||
7643 | ret = 0; | ||
7644 | } | ||
7645 | spin_unlock(&cache->lock); | ||
7646 | spin_unlock(&sinfo->lock); | ||
7647 | return ret; | ||
7648 | } | ||
7246 | 7649 | ||
7247 | new_alloc_flags = update_block_group_flags(root, | 7650 | int btrfs_set_block_group_ro(struct btrfs_root *root, |
7248 | shrink_block_group->flags); | 7651 | struct btrfs_block_group_cache *cache) |
7249 | if (new_alloc_flags != shrink_block_group->flags) { | ||
7250 | calc = | ||
7251 | btrfs_block_group_used(&shrink_block_group->item); | ||
7252 | } else { | ||
7253 | calc = shrink_block_group->key.offset; | ||
7254 | } | ||
7255 | spin_unlock(&shrink_block_group->lock); | ||
7256 | 7652 | ||
7257 | do_chunk_alloc(trans, root->fs_info->extent_root, | 7653 | { |
7258 | calc + 2 * 1024 * 1024, new_alloc_flags, force); | 7654 | struct btrfs_trans_handle *trans; |
7655 | u64 alloc_flags; | ||
7656 | int ret; | ||
7259 | 7657 | ||
7260 | btrfs_end_transaction(trans, root); | 7658 | BUG_ON(cache->ro); |
7261 | } else | 7659 | |
7262 | spin_unlock(&shrink_block_group->lock); | 7660 | trans = btrfs_join_transaction(root, 1); |
7263 | return 0; | 7661 | BUG_ON(IS_ERR(trans)); |
7264 | } | ||
7265 | 7662 | ||
7663 | alloc_flags = update_block_group_flags(root, cache->flags); | ||
7664 | if (alloc_flags != cache->flags) | ||
7665 | do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); | ||
7266 | 7666 | ||
7267 | int btrfs_prepare_block_group_relocation(struct btrfs_root *root, | 7667 | ret = set_block_group_ro(cache); |
7268 | struct btrfs_block_group_cache *group) | 7668 | if (!ret) |
7669 | goto out; | ||
7670 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); | ||
7671 | ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); | ||
7672 | if (ret < 0) | ||
7673 | goto out; | ||
7674 | ret = set_block_group_ro(cache); | ||
7675 | out: | ||
7676 | btrfs_end_transaction(trans, root); | ||
7677 | return ret; | ||
7678 | } | ||
7269 | 7679 | ||
7680 | int btrfs_set_block_group_rw(struct btrfs_root *root, | ||
7681 | struct btrfs_block_group_cache *cache) | ||
7270 | { | 7682 | { |
7271 | __alloc_chunk_for_shrink(root, group, 1); | 7683 | struct btrfs_space_info *sinfo = cache->space_info; |
7272 | set_block_group_readonly(group); | 7684 | u64 num_bytes; |
7685 | |||
7686 | BUG_ON(!cache->ro); | ||
7687 | |||
7688 | spin_lock(&sinfo->lock); | ||
7689 | spin_lock(&cache->lock); | ||
7690 | num_bytes = cache->key.offset - cache->reserved - cache->pinned - | ||
7691 | cache->bytes_super - btrfs_block_group_used(&cache->item); | ||
7692 | sinfo->bytes_readonly -= num_bytes; | ||
7693 | cache->ro = 0; | ||
7694 | spin_unlock(&cache->lock); | ||
7695 | spin_unlock(&sinfo->lock); | ||
7273 | return 0; | 7696 | return 0; |
7274 | } | 7697 | } |
7275 | 7698 | ||
@@ -7436,17 +7859,33 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
7436 | */ | 7859 | */ |
7437 | synchronize_rcu(); | 7860 | synchronize_rcu(); |
7438 | 7861 | ||
7862 | release_global_block_rsv(info); | ||
7863 | |||
7439 | while(!list_empty(&info->space_info)) { | 7864 | while(!list_empty(&info->space_info)) { |
7440 | space_info = list_entry(info->space_info.next, | 7865 | space_info = list_entry(info->space_info.next, |
7441 | struct btrfs_space_info, | 7866 | struct btrfs_space_info, |
7442 | list); | 7867 | list); |
7443 | 7868 | if (space_info->bytes_pinned > 0 || | |
7869 | space_info->bytes_reserved > 0) { | ||
7870 | WARN_ON(1); | ||
7871 | dump_space_info(space_info, 0, 0); | ||
7872 | } | ||
7444 | list_del(&space_info->list); | 7873 | list_del(&space_info->list); |
7445 | kfree(space_info); | 7874 | kfree(space_info); |
7446 | } | 7875 | } |
7447 | return 0; | 7876 | return 0; |
7448 | } | 7877 | } |
7449 | 7878 | ||
7879 | static void __link_block_group(struct btrfs_space_info *space_info, | ||
7880 | struct btrfs_block_group_cache *cache) | ||
7881 | { | ||
7882 | int index = get_block_group_index(cache); | ||
7883 | |||
7884 | down_write(&space_info->groups_sem); | ||
7885 | list_add_tail(&cache->list, &space_info->block_groups[index]); | ||
7886 | up_write(&space_info->groups_sem); | ||
7887 | } | ||
7888 | |||
7450 | int btrfs_read_block_groups(struct btrfs_root *root) | 7889 | int btrfs_read_block_groups(struct btrfs_root *root) |
7451 | { | 7890 | { |
7452 | struct btrfs_path *path; | 7891 | struct btrfs_path *path; |
@@ -7468,10 +7907,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7468 | 7907 | ||
7469 | while (1) { | 7908 | while (1) { |
7470 | ret = find_first_block_group(root, path, &key); | 7909 | ret = find_first_block_group(root, path, &key); |
7471 | if (ret > 0) { | 7910 | if (ret > 0) |
7472 | ret = 0; | 7911 | break; |
7473 | goto error; | ||
7474 | } | ||
7475 | if (ret != 0) | 7912 | if (ret != 0) |
7476 | goto error; | 7913 | goto error; |
7477 | 7914 | ||
@@ -7480,7 +7917,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7480 | cache = kzalloc(sizeof(*cache), GFP_NOFS); | 7917 | cache = kzalloc(sizeof(*cache), GFP_NOFS); |
7481 | if (!cache) { | 7918 | if (!cache) { |
7482 | ret = -ENOMEM; | 7919 | ret = -ENOMEM; |
7483 | break; | 7920 | goto error; |
7484 | } | 7921 | } |
7485 | 7922 | ||
7486 | atomic_set(&cache->count, 1); | 7923 | atomic_set(&cache->count, 1); |
@@ -7537,20 +7974,36 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
7537 | BUG_ON(ret); | 7974 | BUG_ON(ret); |
7538 | cache->space_info = space_info; | 7975 | cache->space_info = space_info; |
7539 | spin_lock(&cache->space_info->lock); | 7976 | spin_lock(&cache->space_info->lock); |
7540 | cache->space_info->bytes_super += cache->bytes_super; | 7977 | cache->space_info->bytes_readonly += cache->bytes_super; |
7541 | spin_unlock(&cache->space_info->lock); | 7978 | spin_unlock(&cache->space_info->lock); |
7542 | 7979 | ||
7543 | down_write(&space_info->groups_sem); | 7980 | __link_block_group(space_info, cache); |
7544 | list_add_tail(&cache->list, &space_info->block_groups); | ||
7545 | up_write(&space_info->groups_sem); | ||
7546 | 7981 | ||
7547 | ret = btrfs_add_block_group_cache(root->fs_info, cache); | 7982 | ret = btrfs_add_block_group_cache(root->fs_info, cache); |
7548 | BUG_ON(ret); | 7983 | BUG_ON(ret); |
7549 | 7984 | ||
7550 | set_avail_alloc_bits(root->fs_info, cache->flags); | 7985 | set_avail_alloc_bits(root->fs_info, cache->flags); |
7551 | if (btrfs_chunk_readonly(root, cache->key.objectid)) | 7986 | if (btrfs_chunk_readonly(root, cache->key.objectid)) |
7552 | set_block_group_readonly(cache); | 7987 | set_block_group_ro(cache); |
7553 | } | 7988 | } |
7989 | |||
7990 | list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { | ||
7991 | if (!(get_alloc_profile(root, space_info->flags) & | ||
7992 | (BTRFS_BLOCK_GROUP_RAID10 | | ||
7993 | BTRFS_BLOCK_GROUP_RAID1 | | ||
7994 | BTRFS_BLOCK_GROUP_DUP))) | ||
7995 | continue; | ||
7996 | /* | ||
7997 | * avoid allocating from un-mirrored block group if there are | ||
7998 | * mirrored block groups. | ||
7999 | */ | ||
8000 | list_for_each_entry(cache, &space_info->block_groups[3], list) | ||
8001 | set_block_group_ro(cache); | ||
8002 | list_for_each_entry(cache, &space_info->block_groups[4], list) | ||
8003 | set_block_group_ro(cache); | ||
8004 | } | ||
8005 | |||
8006 | init_global_block_rsv(info); | ||
7554 | ret = 0; | 8007 | ret = 0; |
7555 | error: | 8008 | error: |
7556 | btrfs_free_path(path); | 8009 | btrfs_free_path(path); |
@@ -7611,12 +8064,10 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
7611 | BUG_ON(ret); | 8064 | BUG_ON(ret); |
7612 | 8065 | ||
7613 | spin_lock(&cache->space_info->lock); | 8066 | spin_lock(&cache->space_info->lock); |
7614 | cache->space_info->bytes_super += cache->bytes_super; | 8067 | cache->space_info->bytes_readonly += cache->bytes_super; |
7615 | spin_unlock(&cache->space_info->lock); | 8068 | spin_unlock(&cache->space_info->lock); |
7616 | 8069 | ||
7617 | down_write(&cache->space_info->groups_sem); | 8070 | __link_block_group(cache->space_info, cache); |
7618 | list_add_tail(&cache->list, &cache->space_info->block_groups); | ||
7619 | up_write(&cache->space_info->groups_sem); | ||
7620 | 8071 | ||
7621 | ret = btrfs_add_block_group_cache(root->fs_info, cache); | 8072 | ret = btrfs_add_block_group_cache(root->fs_info, cache); |
7622 | BUG_ON(ret); | 8073 | BUG_ON(ret); |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index d2d03684fab2..a4080c21ec55 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -135,7 +135,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask) | |||
135 | return state; | 135 | return state; |
136 | } | 136 | } |
137 | 137 | ||
138 | static void free_extent_state(struct extent_state *state) | 138 | void free_extent_state(struct extent_state *state) |
139 | { | 139 | { |
140 | if (!state) | 140 | if (!state) |
141 | return; | 141 | return; |
@@ -335,21 +335,18 @@ static int merge_state(struct extent_io_tree *tree, | |||
335 | } | 335 | } |
336 | 336 | ||
337 | static int set_state_cb(struct extent_io_tree *tree, | 337 | static int set_state_cb(struct extent_io_tree *tree, |
338 | struct extent_state *state, | 338 | struct extent_state *state, int *bits) |
339 | unsigned long bits) | ||
340 | { | 339 | { |
341 | if (tree->ops && tree->ops->set_bit_hook) { | 340 | if (tree->ops && tree->ops->set_bit_hook) { |
342 | return tree->ops->set_bit_hook(tree->mapping->host, | 341 | return tree->ops->set_bit_hook(tree->mapping->host, |
343 | state->start, state->end, | 342 | state, bits); |
344 | state->state, bits); | ||
345 | } | 343 | } |
346 | 344 | ||
347 | return 0; | 345 | return 0; |
348 | } | 346 | } |
349 | 347 | ||
350 | static void clear_state_cb(struct extent_io_tree *tree, | 348 | static void clear_state_cb(struct extent_io_tree *tree, |
351 | struct extent_state *state, | 349 | struct extent_state *state, int *bits) |
352 | unsigned long bits) | ||
353 | { | 350 | { |
354 | if (tree->ops && tree->ops->clear_bit_hook) | 351 | if (tree->ops && tree->ops->clear_bit_hook) |
355 | tree->ops->clear_bit_hook(tree->mapping->host, state, bits); | 352 | tree->ops->clear_bit_hook(tree->mapping->host, state, bits); |
@@ -367,9 +364,10 @@ static void clear_state_cb(struct extent_io_tree *tree, | |||
367 | */ | 364 | */ |
368 | static int insert_state(struct extent_io_tree *tree, | 365 | static int insert_state(struct extent_io_tree *tree, |
369 | struct extent_state *state, u64 start, u64 end, | 366 | struct extent_state *state, u64 start, u64 end, |
370 | int bits) | 367 | int *bits) |
371 | { | 368 | { |
372 | struct rb_node *node; | 369 | struct rb_node *node; |
370 | int bits_to_set = *bits & ~EXTENT_CTLBITS; | ||
373 | int ret; | 371 | int ret; |
374 | 372 | ||
375 | if (end < start) { | 373 | if (end < start) { |
@@ -384,9 +382,9 @@ static int insert_state(struct extent_io_tree *tree, | |||
384 | if (ret) | 382 | if (ret) |
385 | return ret; | 383 | return ret; |
386 | 384 | ||
387 | if (bits & EXTENT_DIRTY) | 385 | if (bits_to_set & EXTENT_DIRTY) |
388 | tree->dirty_bytes += end - start + 1; | 386 | tree->dirty_bytes += end - start + 1; |
389 | state->state |= bits; | 387 | state->state |= bits_to_set; |
390 | node = tree_insert(&tree->state, end, &state->rb_node); | 388 | node = tree_insert(&tree->state, end, &state->rb_node); |
391 | if (node) { | 389 | if (node) { |
392 | struct extent_state *found; | 390 | struct extent_state *found; |
@@ -456,13 +454,13 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, | |||
456 | * struct is freed and removed from the tree | 454 | * struct is freed and removed from the tree |
457 | */ | 455 | */ |
458 | static int clear_state_bit(struct extent_io_tree *tree, | 456 | static int clear_state_bit(struct extent_io_tree *tree, |
459 | struct extent_state *state, int bits, int wake, | 457 | struct extent_state *state, |
460 | int delete) | 458 | int *bits, int wake) |
461 | { | 459 | { |
462 | int bits_to_clear = bits & ~EXTENT_DO_ACCOUNTING; | 460 | int bits_to_clear = *bits & ~EXTENT_CTLBITS; |
463 | int ret = state->state & bits_to_clear; | 461 | int ret = state->state & bits_to_clear; |
464 | 462 | ||
465 | if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { | 463 | if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { |
466 | u64 range = state->end - state->start + 1; | 464 | u64 range = state->end - state->start + 1; |
467 | WARN_ON(range > tree->dirty_bytes); | 465 | WARN_ON(range > tree->dirty_bytes); |
468 | tree->dirty_bytes -= range; | 466 | tree->dirty_bytes -= range; |
@@ -471,9 +469,8 @@ static int clear_state_bit(struct extent_io_tree *tree, | |||
471 | state->state &= ~bits_to_clear; | 469 | state->state &= ~bits_to_clear; |
472 | if (wake) | 470 | if (wake) |
473 | wake_up(&state->wq); | 471 | wake_up(&state->wq); |
474 | if (delete || state->state == 0) { | 472 | if (state->state == 0) { |
475 | if (state->tree) { | 473 | if (state->tree) { |
476 | clear_state_cb(tree, state, state->state); | ||
477 | rb_erase(&state->rb_node, &tree->state); | 474 | rb_erase(&state->rb_node, &tree->state); |
478 | state->tree = NULL; | 475 | state->tree = NULL; |
479 | free_extent_state(state); | 476 | free_extent_state(state); |
@@ -514,6 +511,10 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
514 | int set = 0; | 511 | int set = 0; |
515 | int clear = 0; | 512 | int clear = 0; |
516 | 513 | ||
514 | if (delete) | ||
515 | bits |= ~EXTENT_CTLBITS; | ||
516 | bits |= EXTENT_FIRST_DELALLOC; | ||
517 | |||
517 | if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY)) | 518 | if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY)) |
518 | clear = 1; | 519 | clear = 1; |
519 | again: | 520 | again: |
@@ -580,8 +581,7 @@ hit_next: | |||
580 | if (err) | 581 | if (err) |
581 | goto out; | 582 | goto out; |
582 | if (state->end <= end) { | 583 | if (state->end <= end) { |
583 | set |= clear_state_bit(tree, state, bits, wake, | 584 | set |= clear_state_bit(tree, state, &bits, wake); |
584 | delete); | ||
585 | if (last_end == (u64)-1) | 585 | if (last_end == (u64)-1) |
586 | goto out; | 586 | goto out; |
587 | start = last_end + 1; | 587 | start = last_end + 1; |
@@ -602,7 +602,7 @@ hit_next: | |||
602 | if (wake) | 602 | if (wake) |
603 | wake_up(&state->wq); | 603 | wake_up(&state->wq); |
604 | 604 | ||
605 | set |= clear_state_bit(tree, prealloc, bits, wake, delete); | 605 | set |= clear_state_bit(tree, prealloc, &bits, wake); |
606 | 606 | ||
607 | prealloc = NULL; | 607 | prealloc = NULL; |
608 | goto out; | 608 | goto out; |
@@ -613,7 +613,7 @@ hit_next: | |||
613 | else | 613 | else |
614 | next_node = NULL; | 614 | next_node = NULL; |
615 | 615 | ||
616 | set |= clear_state_bit(tree, state, bits, wake, delete); | 616 | set |= clear_state_bit(tree, state, &bits, wake); |
617 | if (last_end == (u64)-1) | 617 | if (last_end == (u64)-1) |
618 | goto out; | 618 | goto out; |
619 | start = last_end + 1; | 619 | start = last_end + 1; |
@@ -706,19 +706,19 @@ out: | |||
706 | 706 | ||
707 | static int set_state_bits(struct extent_io_tree *tree, | 707 | static int set_state_bits(struct extent_io_tree *tree, |
708 | struct extent_state *state, | 708 | struct extent_state *state, |
709 | int bits) | 709 | int *bits) |
710 | { | 710 | { |
711 | int ret; | 711 | int ret; |
712 | int bits_to_set = *bits & ~EXTENT_CTLBITS; | ||
712 | 713 | ||
713 | ret = set_state_cb(tree, state, bits); | 714 | ret = set_state_cb(tree, state, bits); |
714 | if (ret) | 715 | if (ret) |
715 | return ret; | 716 | return ret; |
716 | 717 | if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { | |
717 | if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { | ||
718 | u64 range = state->end - state->start + 1; | 718 | u64 range = state->end - state->start + 1; |
719 | tree->dirty_bytes += range; | 719 | tree->dirty_bytes += range; |
720 | } | 720 | } |
721 | state->state |= bits; | 721 | state->state |= bits_to_set; |
722 | 722 | ||
723 | return 0; | 723 | return 0; |
724 | } | 724 | } |
@@ -745,10 +745,9 @@ static void cache_state(struct extent_state *state, | |||
745 | * [start, end] is inclusive This takes the tree lock. | 745 | * [start, end] is inclusive This takes the tree lock. |
746 | */ | 746 | */ |
747 | 747 | ||
748 | static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 748 | int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
749 | int bits, int exclusive_bits, u64 *failed_start, | 749 | int bits, int exclusive_bits, u64 *failed_start, |
750 | struct extent_state **cached_state, | 750 | struct extent_state **cached_state, gfp_t mask) |
751 | gfp_t mask) | ||
752 | { | 751 | { |
753 | struct extent_state *state; | 752 | struct extent_state *state; |
754 | struct extent_state *prealloc = NULL; | 753 | struct extent_state *prealloc = NULL; |
@@ -757,6 +756,7 @@ static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
757 | u64 last_start; | 756 | u64 last_start; |
758 | u64 last_end; | 757 | u64 last_end; |
759 | 758 | ||
759 | bits |= EXTENT_FIRST_DELALLOC; | ||
760 | again: | 760 | again: |
761 | if (!prealloc && (mask & __GFP_WAIT)) { | 761 | if (!prealloc && (mask & __GFP_WAIT)) { |
762 | prealloc = alloc_extent_state(mask); | 762 | prealloc = alloc_extent_state(mask); |
@@ -778,7 +778,7 @@ again: | |||
778 | */ | 778 | */ |
779 | node = tree_search(tree, start); | 779 | node = tree_search(tree, start); |
780 | if (!node) { | 780 | if (!node) { |
781 | err = insert_state(tree, prealloc, start, end, bits); | 781 | err = insert_state(tree, prealloc, start, end, &bits); |
782 | prealloc = NULL; | 782 | prealloc = NULL; |
783 | BUG_ON(err == -EEXIST); | 783 | BUG_ON(err == -EEXIST); |
784 | goto out; | 784 | goto out; |
@@ -802,7 +802,7 @@ hit_next: | |||
802 | goto out; | 802 | goto out; |
803 | } | 803 | } |
804 | 804 | ||
805 | err = set_state_bits(tree, state, bits); | 805 | err = set_state_bits(tree, state, &bits); |
806 | if (err) | 806 | if (err) |
807 | goto out; | 807 | goto out; |
808 | 808 | ||
@@ -852,7 +852,7 @@ hit_next: | |||
852 | if (err) | 852 | if (err) |
853 | goto out; | 853 | goto out; |
854 | if (state->end <= end) { | 854 | if (state->end <= end) { |
855 | err = set_state_bits(tree, state, bits); | 855 | err = set_state_bits(tree, state, &bits); |
856 | if (err) | 856 | if (err) |
857 | goto out; | 857 | goto out; |
858 | cache_state(state, cached_state); | 858 | cache_state(state, cached_state); |
@@ -877,7 +877,7 @@ hit_next: | |||
877 | else | 877 | else |
878 | this_end = last_start - 1; | 878 | this_end = last_start - 1; |
879 | err = insert_state(tree, prealloc, start, this_end, | 879 | err = insert_state(tree, prealloc, start, this_end, |
880 | bits); | 880 | &bits); |
881 | BUG_ON(err == -EEXIST); | 881 | BUG_ON(err == -EEXIST); |
882 | if (err) { | 882 | if (err) { |
883 | prealloc = NULL; | 883 | prealloc = NULL; |
@@ -903,7 +903,7 @@ hit_next: | |||
903 | err = split_state(tree, state, prealloc, end + 1); | 903 | err = split_state(tree, state, prealloc, end + 1); |
904 | BUG_ON(err == -EEXIST); | 904 | BUG_ON(err == -EEXIST); |
905 | 905 | ||
906 | err = set_state_bits(tree, prealloc, bits); | 906 | err = set_state_bits(tree, prealloc, &bits); |
907 | if (err) { | 907 | if (err) { |
908 | prealloc = NULL; | 908 | prealloc = NULL; |
909 | goto out; | 909 | goto out; |
@@ -966,8 +966,7 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | |||
966 | { | 966 | { |
967 | return clear_extent_bit(tree, start, end, | 967 | return clear_extent_bit(tree, start, end, |
968 | EXTENT_DIRTY | EXTENT_DELALLOC | | 968 | EXTENT_DIRTY | EXTENT_DELALLOC | |
969 | EXTENT_DO_ACCOUNTING, 0, 0, | 969 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask); |
970 | NULL, mask); | ||
971 | } | 970 | } |
972 | 971 | ||
973 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 972 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
@@ -1435,9 +1434,6 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
1435 | if (op & EXTENT_CLEAR_DELALLOC) | 1434 | if (op & EXTENT_CLEAR_DELALLOC) |
1436 | clear_bits |= EXTENT_DELALLOC; | 1435 | clear_bits |= EXTENT_DELALLOC; |
1437 | 1436 | ||
1438 | if (op & EXTENT_CLEAR_ACCOUNTING) | ||
1439 | clear_bits |= EXTENT_DO_ACCOUNTING; | ||
1440 | |||
1441 | clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); | 1437 | clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); |
1442 | if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | | 1438 | if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | |
1443 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK | | 1439 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK | |
@@ -1916,7 +1912,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num, | |||
1916 | 1912 | ||
1917 | if (tree->ops && tree->ops->submit_bio_hook) | 1913 | if (tree->ops && tree->ops->submit_bio_hook) |
1918 | tree->ops->submit_bio_hook(page->mapping->host, rw, bio, | 1914 | tree->ops->submit_bio_hook(page->mapping->host, rw, bio, |
1919 | mirror_num, bio_flags); | 1915 | mirror_num, bio_flags, start); |
1920 | else | 1916 | else |
1921 | submit_bio(rw, bio); | 1917 | submit_bio(rw, bio); |
1922 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) | 1918 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) |
@@ -2020,6 +2016,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2020 | sector_t sector; | 2016 | sector_t sector; |
2021 | struct extent_map *em; | 2017 | struct extent_map *em; |
2022 | struct block_device *bdev; | 2018 | struct block_device *bdev; |
2019 | struct btrfs_ordered_extent *ordered; | ||
2023 | int ret; | 2020 | int ret; |
2024 | int nr = 0; | 2021 | int nr = 0; |
2025 | size_t page_offset = 0; | 2022 | size_t page_offset = 0; |
@@ -2031,7 +2028,15 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
2031 | set_page_extent_mapped(page); | 2028 | set_page_extent_mapped(page); |
2032 | 2029 | ||
2033 | end = page_end; | 2030 | end = page_end; |
2034 | lock_extent(tree, start, end, GFP_NOFS); | 2031 | while (1) { |
2032 | lock_extent(tree, start, end, GFP_NOFS); | ||
2033 | ordered = btrfs_lookup_ordered_extent(inode, start); | ||
2034 | if (!ordered) | ||
2035 | break; | ||
2036 | unlock_extent(tree, start, end, GFP_NOFS); | ||
2037 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
2038 | btrfs_put_ordered_extent(ordered); | ||
2039 | } | ||
2035 | 2040 | ||
2036 | if (page->index == last_byte >> PAGE_CACHE_SHIFT) { | 2041 | if (page->index == last_byte >> PAGE_CACHE_SHIFT) { |
2037 | char *userpage; | 2042 | char *userpage; |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index bbab4813646f..5691c7b590da 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -16,7 +16,9 @@ | |||
16 | #define EXTENT_BOUNDARY (1 << 9) | 16 | #define EXTENT_BOUNDARY (1 << 9) |
17 | #define EXTENT_NODATASUM (1 << 10) | 17 | #define EXTENT_NODATASUM (1 << 10) |
18 | #define EXTENT_DO_ACCOUNTING (1 << 11) | 18 | #define EXTENT_DO_ACCOUNTING (1 << 11) |
19 | #define EXTENT_FIRST_DELALLOC (1 << 12) | ||
19 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) | 20 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) |
21 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) | ||
20 | 22 | ||
21 | /* flags for bio submission */ | 23 | /* flags for bio submission */ |
22 | #define EXTENT_BIO_COMPRESSED 1 | 24 | #define EXTENT_BIO_COMPRESSED 1 |
@@ -47,7 +49,7 @@ struct extent_state; | |||
47 | 49 | ||
48 | typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, | 50 | typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, |
49 | struct bio *bio, int mirror_num, | 51 | struct bio *bio, int mirror_num, |
50 | unsigned long bio_flags); | 52 | unsigned long bio_flags, u64 bio_offset); |
51 | struct extent_io_ops { | 53 | struct extent_io_ops { |
52 | int (*fill_delalloc)(struct inode *inode, struct page *locked_page, | 54 | int (*fill_delalloc)(struct inode *inode, struct page *locked_page, |
53 | u64 start, u64 end, int *page_started, | 55 | u64 start, u64 end, int *page_started, |
@@ -69,10 +71,10 @@ struct extent_io_ops { | |||
69 | struct extent_state *state); | 71 | struct extent_state *state); |
70 | int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, | 72 | int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, |
71 | struct extent_state *state, int uptodate); | 73 | struct extent_state *state, int uptodate); |
72 | int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, | 74 | int (*set_bit_hook)(struct inode *inode, struct extent_state *state, |
73 | unsigned long old, unsigned long bits); | 75 | int *bits); |
74 | int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, | 76 | int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, |
75 | unsigned long bits); | 77 | int *bits); |
76 | int (*merge_extent_hook)(struct inode *inode, | 78 | int (*merge_extent_hook)(struct inode *inode, |
77 | struct extent_state *new, | 79 | struct extent_state *new, |
78 | struct extent_state *other); | 80 | struct extent_state *other); |
@@ -176,6 +178,7 @@ u64 count_range_bits(struct extent_io_tree *tree, | |||
176 | u64 *start, u64 search_end, | 178 | u64 *start, u64 search_end, |
177 | u64 max_bytes, unsigned long bits); | 179 | u64 max_bytes, unsigned long bits); |
178 | 180 | ||
181 | void free_extent_state(struct extent_state *state); | ||
179 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | 182 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, |
180 | int bits, int filled, struct extent_state *cached_state); | 183 | int bits, int filled, struct extent_state *cached_state); |
181 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 184 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
@@ -185,6 +188,9 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
185 | gfp_t mask); | 188 | gfp_t mask); |
186 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 189 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
187 | int bits, gfp_t mask); | 190 | int bits, gfp_t mask); |
191 | int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | ||
192 | int bits, int exclusive_bits, u64 *failed_start, | ||
193 | struct extent_state **cached_state, gfp_t mask); | ||
188 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | 194 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
189 | gfp_t mask); | 195 | gfp_t mask); |
190 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 196 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 54a255065aa3..a562a250ae77 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -149,13 +149,14 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, | |||
149 | } | 149 | } |
150 | 150 | ||
151 | 151 | ||
152 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | 152 | static int __btrfs_lookup_bio_sums(struct btrfs_root *root, |
153 | struct bio *bio, u32 *dst) | 153 | struct inode *inode, struct bio *bio, |
154 | u64 logical_offset, u32 *dst, int dio) | ||
154 | { | 155 | { |
155 | u32 sum; | 156 | u32 sum; |
156 | struct bio_vec *bvec = bio->bi_io_vec; | 157 | struct bio_vec *bvec = bio->bi_io_vec; |
157 | int bio_index = 0; | 158 | int bio_index = 0; |
158 | u64 offset; | 159 | u64 offset = 0; |
159 | u64 item_start_offset = 0; | 160 | u64 item_start_offset = 0; |
160 | u64 item_last_offset = 0; | 161 | u64 item_last_offset = 0; |
161 | u64 disk_bytenr; | 162 | u64 disk_bytenr; |
@@ -174,8 +175,11 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | |||
174 | WARN_ON(bio->bi_vcnt <= 0); | 175 | WARN_ON(bio->bi_vcnt <= 0); |
175 | 176 | ||
176 | disk_bytenr = (u64)bio->bi_sector << 9; | 177 | disk_bytenr = (u64)bio->bi_sector << 9; |
178 | if (dio) | ||
179 | offset = logical_offset; | ||
177 | while (bio_index < bio->bi_vcnt) { | 180 | while (bio_index < bio->bi_vcnt) { |
178 | offset = page_offset(bvec->bv_page) + bvec->bv_offset; | 181 | if (!dio) |
182 | offset = page_offset(bvec->bv_page) + bvec->bv_offset; | ||
179 | ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum); | 183 | ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum); |
180 | if (ret == 0) | 184 | if (ret == 0) |
181 | goto found; | 185 | goto found; |
@@ -238,6 +242,7 @@ found: | |||
238 | else | 242 | else |
239 | set_state_private(io_tree, offset, sum); | 243 | set_state_private(io_tree, offset, sum); |
240 | disk_bytenr += bvec->bv_len; | 244 | disk_bytenr += bvec->bv_len; |
245 | offset += bvec->bv_len; | ||
241 | bio_index++; | 246 | bio_index++; |
242 | bvec++; | 247 | bvec++; |
243 | } | 248 | } |
@@ -245,6 +250,18 @@ found: | |||
245 | return 0; | 250 | return 0; |
246 | } | 251 | } |
247 | 252 | ||
253 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | ||
254 | struct bio *bio, u32 *dst) | ||
255 | { | ||
256 | return __btrfs_lookup_bio_sums(root, inode, bio, 0, dst, 0); | ||
257 | } | ||
258 | |||
259 | int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, | ||
260 | struct bio *bio, u64 offset, u32 *dst) | ||
261 | { | ||
262 | return __btrfs_lookup_bio_sums(root, inode, bio, offset, dst, 1); | ||
263 | } | ||
264 | |||
248 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | 265 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, |
249 | struct list_head *list) | 266 | struct list_head *list) |
250 | { | 267 | { |
@@ -657,6 +674,9 @@ again: | |||
657 | goto found; | 674 | goto found; |
658 | } | 675 | } |
659 | ret = PTR_ERR(item); | 676 | ret = PTR_ERR(item); |
677 | if (ret != -EFBIG && ret != -ENOENT) | ||
678 | goto fail_unlock; | ||
679 | |||
660 | if (ret == -EFBIG) { | 680 | if (ret == -EFBIG) { |
661 | u32 item_size; | 681 | u32 item_size; |
662 | /* we found one, but it isn't big enough yet */ | 682 | /* we found one, but it isn't big enough yet */ |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 29ff749ff4ca..79437c5eeb1e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -46,32 +46,42 @@ | |||
46 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | 46 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, |
47 | int write_bytes, | 47 | int write_bytes, |
48 | struct page **prepared_pages, | 48 | struct page **prepared_pages, |
49 | const char __user *buf) | 49 | struct iov_iter *i) |
50 | { | 50 | { |
51 | long page_fault = 0; | 51 | size_t copied; |
52 | int i; | 52 | int pg = 0; |
53 | int offset = pos & (PAGE_CACHE_SIZE - 1); | 53 | int offset = pos & (PAGE_CACHE_SIZE - 1); |
54 | 54 | ||
55 | for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) { | 55 | while (write_bytes > 0) { |
56 | size_t count = min_t(size_t, | 56 | size_t count = min_t(size_t, |
57 | PAGE_CACHE_SIZE - offset, write_bytes); | 57 | PAGE_CACHE_SIZE - offset, write_bytes); |
58 | struct page *page = prepared_pages[i]; | 58 | struct page *page = prepared_pages[pg]; |
59 | fault_in_pages_readable(buf, count); | 59 | again: |
60 | if (unlikely(iov_iter_fault_in_readable(i, count))) | ||
61 | return -EFAULT; | ||
60 | 62 | ||
61 | /* Copy data from userspace to the current page */ | 63 | /* Copy data from userspace to the current page */ |
62 | kmap(page); | 64 | copied = iov_iter_copy_from_user(page, i, offset, count); |
63 | page_fault = __copy_from_user(page_address(page) + offset, | 65 | |
64 | buf, count); | ||
65 | /* Flush processor's dcache for this page */ | 66 | /* Flush processor's dcache for this page */ |
66 | flush_dcache_page(page); | 67 | flush_dcache_page(page); |
67 | kunmap(page); | 68 | iov_iter_advance(i, copied); |
68 | buf += count; | 69 | write_bytes -= copied; |
69 | write_bytes -= count; | ||
70 | 70 | ||
71 | if (page_fault) | 71 | if (unlikely(copied == 0)) { |
72 | break; | 72 | count = min_t(size_t, PAGE_CACHE_SIZE - offset, |
73 | iov_iter_single_seg_count(i)); | ||
74 | goto again; | ||
75 | } | ||
76 | |||
77 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { | ||
78 | offset += copied; | ||
79 | } else { | ||
80 | pg++; | ||
81 | offset = 0; | ||
82 | } | ||
73 | } | 83 | } |
74 | return page_fault ? -EFAULT : 0; | 84 | return 0; |
75 | } | 85 | } |
76 | 86 | ||
77 | /* | 87 | /* |
@@ -126,8 +136,7 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
126 | end_of_last_block = start_pos + num_bytes - 1; | 136 | end_of_last_block = start_pos + num_bytes - 1; |
127 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, | 137 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, |
128 | NULL); | 138 | NULL); |
129 | if (err) | 139 | BUG_ON(err); |
130 | return err; | ||
131 | 140 | ||
132 | for (i = 0; i < num_pages; i++) { | 141 | for (i = 0; i < num_pages; i++) { |
133 | struct page *p = pages[i]; | 142 | struct page *p = pages[i]; |
@@ -142,7 +151,7 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
142 | * at this time. | 151 | * at this time. |
143 | */ | 152 | */ |
144 | } | 153 | } |
145 | return err; | 154 | return 0; |
146 | } | 155 | } |
147 | 156 | ||
148 | /* | 157 | /* |
@@ -823,45 +832,46 @@ again: | |||
823 | return 0; | 832 | return 0; |
824 | } | 833 | } |
825 | 834 | ||
826 | static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | 835 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, |
827 | size_t count, loff_t *ppos) | 836 | const struct iovec *iov, |
837 | unsigned long nr_segs, loff_t pos) | ||
828 | { | 838 | { |
829 | loff_t pos; | 839 | struct file *file = iocb->ki_filp; |
840 | struct inode *inode = fdentry(file)->d_inode; | ||
841 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
842 | struct page *pinned[2]; | ||
843 | struct page **pages = NULL; | ||
844 | struct iov_iter i; | ||
845 | loff_t *ppos = &iocb->ki_pos; | ||
830 | loff_t start_pos; | 846 | loff_t start_pos; |
831 | ssize_t num_written = 0; | 847 | ssize_t num_written = 0; |
832 | ssize_t err = 0; | 848 | ssize_t err = 0; |
849 | size_t count; | ||
850 | size_t ocount; | ||
833 | int ret = 0; | 851 | int ret = 0; |
834 | struct inode *inode = fdentry(file)->d_inode; | ||
835 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
836 | struct page **pages = NULL; | ||
837 | int nrptrs; | 852 | int nrptrs; |
838 | struct page *pinned[2]; | ||
839 | unsigned long first_index; | 853 | unsigned long first_index; |
840 | unsigned long last_index; | 854 | unsigned long last_index; |
841 | int will_write; | 855 | int will_write; |
856 | int buffered = 0; | ||
842 | 857 | ||
843 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || | 858 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || |
844 | (file->f_flags & O_DIRECT)); | 859 | (file->f_flags & O_DIRECT)); |
845 | 860 | ||
846 | nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, | ||
847 | PAGE_CACHE_SIZE / (sizeof(struct page *))); | ||
848 | pinned[0] = NULL; | 861 | pinned[0] = NULL; |
849 | pinned[1] = NULL; | 862 | pinned[1] = NULL; |
850 | 863 | ||
851 | pos = *ppos; | ||
852 | start_pos = pos; | 864 | start_pos = pos; |
853 | 865 | ||
854 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | 866 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); |
855 | 867 | ||
856 | /* do the reserve before the mutex lock in case we have to do some | ||
857 | * flushing. We wouldn't deadlock, but this is more polite. | ||
858 | */ | ||
859 | err = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
860 | if (err) | ||
861 | goto out_nolock; | ||
862 | |||
863 | mutex_lock(&inode->i_mutex); | 868 | mutex_lock(&inode->i_mutex); |
864 | 869 | ||
870 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); | ||
871 | if (err) | ||
872 | goto out; | ||
873 | count = ocount; | ||
874 | |||
865 | current->backing_dev_info = inode->i_mapping->backing_dev_info; | 875 | current->backing_dev_info = inode->i_mapping->backing_dev_info; |
866 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | 876 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); |
867 | if (err) | 877 | if (err) |
@@ -875,15 +885,53 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
875 | goto out; | 885 | goto out; |
876 | 886 | ||
877 | file_update_time(file); | 887 | file_update_time(file); |
888 | BTRFS_I(inode)->sequence++; | ||
889 | |||
890 | if (unlikely(file->f_flags & O_DIRECT)) { | ||
891 | num_written = generic_file_direct_write(iocb, iov, &nr_segs, | ||
892 | pos, ppos, count, | ||
893 | ocount); | ||
894 | /* | ||
895 | * the generic O_DIRECT will update in-memory i_size after the | ||
896 | * DIOs are done. But our endio handlers that update the on | ||
897 | * disk i_size never update past the in memory i_size. So we | ||
898 | * need one more update here to catch any additions to the | ||
899 | * file | ||
900 | */ | ||
901 | if (inode->i_size != BTRFS_I(inode)->disk_i_size) { | ||
902 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | ||
903 | mark_inode_dirty(inode); | ||
904 | } | ||
878 | 905 | ||
906 | if (num_written < 0) { | ||
907 | ret = num_written; | ||
908 | num_written = 0; | ||
909 | goto out; | ||
910 | } else if (num_written == count) { | ||
911 | /* pick up pos changes done by the generic code */ | ||
912 | pos = *ppos; | ||
913 | goto out; | ||
914 | } | ||
915 | /* | ||
916 | * We are going to do buffered for the rest of the range, so we | ||
917 | * need to make sure to invalidate the buffered pages when we're | ||
918 | * done. | ||
919 | */ | ||
920 | buffered = 1; | ||
921 | pos += num_written; | ||
922 | } | ||
923 | |||
924 | iov_iter_init(&i, iov, nr_segs, count, num_written); | ||
925 | nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) / | ||
926 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / | ||
927 | (sizeof(struct page *))); | ||
879 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | 928 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); |
880 | 929 | ||
881 | /* generic_write_checks can change our pos */ | 930 | /* generic_write_checks can change our pos */ |
882 | start_pos = pos; | 931 | start_pos = pos; |
883 | 932 | ||
884 | BTRFS_I(inode)->sequence++; | ||
885 | first_index = pos >> PAGE_CACHE_SHIFT; | 933 | first_index = pos >> PAGE_CACHE_SHIFT; |
886 | last_index = (pos + count) >> PAGE_CACHE_SHIFT; | 934 | last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT; |
887 | 935 | ||
888 | /* | 936 | /* |
889 | * there are lots of better ways to do this, but this code | 937 | * there are lots of better ways to do this, but this code |
@@ -900,7 +948,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
900 | unlock_page(pinned[0]); | 948 | unlock_page(pinned[0]); |
901 | } | 949 | } |
902 | } | 950 | } |
903 | if ((pos + count) & (PAGE_CACHE_SIZE - 1)) { | 951 | if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) { |
904 | pinned[1] = grab_cache_page(inode->i_mapping, last_index); | 952 | pinned[1] = grab_cache_page(inode->i_mapping, last_index); |
905 | if (!PageUptodate(pinned[1])) { | 953 | if (!PageUptodate(pinned[1])) { |
906 | ret = btrfs_readpage(NULL, pinned[1]); | 954 | ret = btrfs_readpage(NULL, pinned[1]); |
@@ -911,10 +959,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
911 | } | 959 | } |
912 | } | 960 | } |
913 | 961 | ||
914 | while (count > 0) { | 962 | while (iov_iter_count(&i) > 0) { |
915 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); | 963 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); |
916 | size_t write_bytes = min(count, nrptrs * | 964 | size_t write_bytes = min(iov_iter_count(&i), |
917 | (size_t)PAGE_CACHE_SIZE - | 965 | nrptrs * (size_t)PAGE_CACHE_SIZE - |
918 | offset); | 966 | offset); |
919 | size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> | 967 | size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> |
920 | PAGE_CACHE_SHIFT; | 968 | PAGE_CACHE_SHIFT; |
@@ -922,7 +970,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
922 | WARN_ON(num_pages > nrptrs); | 970 | WARN_ON(num_pages > nrptrs); |
923 | memset(pages, 0, sizeof(struct page *) * nrptrs); | 971 | memset(pages, 0, sizeof(struct page *) * nrptrs); |
924 | 972 | ||
925 | ret = btrfs_check_data_free_space(root, inode, write_bytes); | 973 | ret = btrfs_delalloc_reserve_space(inode, write_bytes); |
926 | if (ret) | 974 | if (ret) |
927 | goto out; | 975 | goto out; |
928 | 976 | ||
@@ -930,26 +978,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
930 | pos, first_index, last_index, | 978 | pos, first_index, last_index, |
931 | write_bytes); | 979 | write_bytes); |
932 | if (ret) { | 980 | if (ret) { |
933 | btrfs_free_reserved_data_space(root, inode, | 981 | btrfs_delalloc_release_space(inode, write_bytes); |
934 | write_bytes); | ||
935 | goto out; | 982 | goto out; |
936 | } | 983 | } |
937 | 984 | ||
938 | ret = btrfs_copy_from_user(pos, num_pages, | 985 | ret = btrfs_copy_from_user(pos, num_pages, |
939 | write_bytes, pages, buf); | 986 | write_bytes, pages, &i); |
940 | if (ret) { | 987 | if (ret == 0) { |
941 | btrfs_free_reserved_data_space(root, inode, | 988 | dirty_and_release_pages(NULL, root, file, pages, |
942 | write_bytes); | 989 | num_pages, pos, write_bytes); |
943 | btrfs_drop_pages(pages, num_pages); | ||
944 | goto out; | ||
945 | } | 990 | } |
946 | 991 | ||
947 | ret = dirty_and_release_pages(NULL, root, file, pages, | ||
948 | num_pages, pos, write_bytes); | ||
949 | btrfs_drop_pages(pages, num_pages); | 992 | btrfs_drop_pages(pages, num_pages); |
950 | if (ret) { | 993 | if (ret) { |
951 | btrfs_free_reserved_data_space(root, inode, | 994 | btrfs_delalloc_release_space(inode, write_bytes); |
952 | write_bytes); | ||
953 | goto out; | 995 | goto out; |
954 | } | 996 | } |
955 | 997 | ||
@@ -965,8 +1007,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
965 | btrfs_throttle(root); | 1007 | btrfs_throttle(root); |
966 | } | 1008 | } |
967 | 1009 | ||
968 | buf += write_bytes; | ||
969 | count -= write_bytes; | ||
970 | pos += write_bytes; | 1010 | pos += write_bytes; |
971 | num_written += write_bytes; | 1011 | num_written += write_bytes; |
972 | 1012 | ||
@@ -976,9 +1016,7 @@ out: | |||
976 | mutex_unlock(&inode->i_mutex); | 1016 | mutex_unlock(&inode->i_mutex); |
977 | if (ret) | 1017 | if (ret) |
978 | err = ret; | 1018 | err = ret; |
979 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
980 | 1019 | ||
981 | out_nolock: | ||
982 | kfree(pages); | 1020 | kfree(pages); |
983 | if (pinned[0]) | 1021 | if (pinned[0]) |
984 | page_cache_release(pinned[0]); | 1022 | page_cache_release(pinned[0]); |
@@ -1008,7 +1046,7 @@ out_nolock: | |||
1008 | num_written = err; | 1046 | num_written = err; |
1009 | 1047 | ||
1010 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { | 1048 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { |
1011 | trans = btrfs_start_transaction(root, 1); | 1049 | trans = btrfs_start_transaction(root, 0); |
1012 | ret = btrfs_log_dentry_safe(trans, root, | 1050 | ret = btrfs_log_dentry_safe(trans, root, |
1013 | file->f_dentry); | 1051 | file->f_dentry); |
1014 | if (ret == 0) { | 1052 | if (ret == 0) { |
@@ -1023,7 +1061,7 @@ out_nolock: | |||
1023 | btrfs_end_transaction(trans, root); | 1061 | btrfs_end_transaction(trans, root); |
1024 | } | 1062 | } |
1025 | } | 1063 | } |
1026 | if (file->f_flags & O_DIRECT) { | 1064 | if (file->f_flags & O_DIRECT && buffered) { |
1027 | invalidate_mapping_pages(inode->i_mapping, | 1065 | invalidate_mapping_pages(inode->i_mapping, |
1028 | start_pos >> PAGE_CACHE_SHIFT, | 1066 | start_pos >> PAGE_CACHE_SHIFT, |
1029 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); | 1067 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); |
@@ -1104,9 +1142,9 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
1104 | if (file && file->private_data) | 1142 | if (file && file->private_data) |
1105 | btrfs_ioctl_trans_end(file); | 1143 | btrfs_ioctl_trans_end(file); |
1106 | 1144 | ||
1107 | trans = btrfs_start_transaction(root, 1); | 1145 | trans = btrfs_start_transaction(root, 0); |
1108 | if (!trans) { | 1146 | if (IS_ERR(trans)) { |
1109 | ret = -ENOMEM; | 1147 | ret = PTR_ERR(trans); |
1110 | goto out; | 1148 | goto out; |
1111 | } | 1149 | } |
1112 | 1150 | ||
@@ -1161,7 +1199,7 @@ const struct file_operations btrfs_file_operations = { | |||
1161 | .read = do_sync_read, | 1199 | .read = do_sync_read, |
1162 | .aio_read = generic_file_aio_read, | 1200 | .aio_read = generic_file_aio_read, |
1163 | .splice_read = generic_file_splice_read, | 1201 | .splice_read = generic_file_splice_read, |
1164 | .write = btrfs_file_write, | 1202 | .aio_write = btrfs_file_aio_write, |
1165 | .mmap = btrfs_file_mmap, | 1203 | .mmap = btrfs_file_mmap, |
1166 | .open = generic_file_open, | 1204 | .open = generic_file_open, |
1167 | .release = btrfs_release_file, | 1205 | .release = btrfs_release_file, |
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 72ce3c173d6a..64f1150bb48d 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c | |||
@@ -49,6 +49,33 @@ static int find_name_in_backref(struct btrfs_path *path, const char *name, | |||
49 | return 0; | 49 | return 0; |
50 | } | 50 | } |
51 | 51 | ||
52 | struct btrfs_inode_ref * | ||
53 | btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans, | ||
54 | struct btrfs_root *root, | ||
55 | struct btrfs_path *path, | ||
56 | const char *name, int name_len, | ||
57 | u64 inode_objectid, u64 ref_objectid, int mod) | ||
58 | { | ||
59 | struct btrfs_key key; | ||
60 | struct btrfs_inode_ref *ref; | ||
61 | int ins_len = mod < 0 ? -1 : 0; | ||
62 | int cow = mod != 0; | ||
63 | int ret; | ||
64 | |||
65 | key.objectid = inode_objectid; | ||
66 | key.type = BTRFS_INODE_REF_KEY; | ||
67 | key.offset = ref_objectid; | ||
68 | |||
69 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); | ||
70 | if (ret < 0) | ||
71 | return ERR_PTR(ret); | ||
72 | if (ret > 0) | ||
73 | return NULL; | ||
74 | if (!find_name_in_backref(path, name, name_len, &ref)) | ||
75 | return NULL; | ||
76 | return ref; | ||
77 | } | ||
78 | |||
52 | int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | 79 | int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, |
53 | struct btrfs_root *root, | 80 | struct btrfs_root *root, |
54 | const char *name, int name_len, | 81 | const char *name, int name_len, |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d601629b85d1..fa6ccc1bfe2a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -252,6 +252,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
252 | inline_len, compressed_size, | 252 | inline_len, compressed_size, |
253 | compressed_pages); | 253 | compressed_pages); |
254 | BUG_ON(ret); | 254 | BUG_ON(ret); |
255 | btrfs_delalloc_release_metadata(inode, end + 1 - start); | ||
255 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); | 256 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); |
256 | return 0; | 257 | return 0; |
257 | } | 258 | } |
@@ -414,6 +415,7 @@ again: | |||
414 | trans = btrfs_join_transaction(root, 1); | 415 | trans = btrfs_join_transaction(root, 1); |
415 | BUG_ON(!trans); | 416 | BUG_ON(!trans); |
416 | btrfs_set_trans_block_group(trans, inode); | 417 | btrfs_set_trans_block_group(trans, inode); |
418 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
417 | 419 | ||
418 | /* lets try to make an inline extent */ | 420 | /* lets try to make an inline extent */ |
419 | if (ret || total_in < (actual_end - start)) { | 421 | if (ret || total_in < (actual_end - start)) { |
@@ -439,7 +441,6 @@ again: | |||
439 | start, end, NULL, | 441 | start, end, NULL, |
440 | EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | | 442 | EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | |
441 | EXTENT_CLEAR_DELALLOC | | 443 | EXTENT_CLEAR_DELALLOC | |
442 | EXTENT_CLEAR_ACCOUNTING | | ||
443 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); | 444 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); |
444 | 445 | ||
445 | btrfs_end_transaction(trans, root); | 446 | btrfs_end_transaction(trans, root); |
@@ -697,6 +698,38 @@ retry: | |||
697 | return 0; | 698 | return 0; |
698 | } | 699 | } |
699 | 700 | ||
701 | static u64 get_extent_allocation_hint(struct inode *inode, u64 start, | ||
702 | u64 num_bytes) | ||
703 | { | ||
704 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
705 | struct extent_map *em; | ||
706 | u64 alloc_hint = 0; | ||
707 | |||
708 | read_lock(&em_tree->lock); | ||
709 | em = search_extent_mapping(em_tree, start, num_bytes); | ||
710 | if (em) { | ||
711 | /* | ||
712 | * if block start isn't an actual block number then find the | ||
713 | * first block in this inode and use that as a hint. If that | ||
714 | * block is also bogus then just don't worry about it. | ||
715 | */ | ||
716 | if (em->block_start >= EXTENT_MAP_LAST_BYTE) { | ||
717 | free_extent_map(em); | ||
718 | em = search_extent_mapping(em_tree, 0, 0); | ||
719 | if (em && em->block_start < EXTENT_MAP_LAST_BYTE) | ||
720 | alloc_hint = em->block_start; | ||
721 | if (em) | ||
722 | free_extent_map(em); | ||
723 | } else { | ||
724 | alloc_hint = em->block_start; | ||
725 | free_extent_map(em); | ||
726 | } | ||
727 | } | ||
728 | read_unlock(&em_tree->lock); | ||
729 | |||
730 | return alloc_hint; | ||
731 | } | ||
732 | |||
700 | /* | 733 | /* |
701 | * when extent_io.c finds a delayed allocation range in the file, | 734 | * when extent_io.c finds a delayed allocation range in the file, |
702 | * the call backs end up in this code. The basic idea is to | 735 | * the call backs end up in this code. The basic idea is to |
@@ -734,6 +767,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
734 | trans = btrfs_join_transaction(root, 1); | 767 | trans = btrfs_join_transaction(root, 1); |
735 | BUG_ON(!trans); | 768 | BUG_ON(!trans); |
736 | btrfs_set_trans_block_group(trans, inode); | 769 | btrfs_set_trans_block_group(trans, inode); |
770 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
737 | 771 | ||
738 | actual_end = min_t(u64, isize, end + 1); | 772 | actual_end = min_t(u64, isize, end + 1); |
739 | 773 | ||
@@ -753,7 +787,6 @@ static noinline int cow_file_range(struct inode *inode, | |||
753 | EXTENT_CLEAR_UNLOCK_PAGE | | 787 | EXTENT_CLEAR_UNLOCK_PAGE | |
754 | EXTENT_CLEAR_UNLOCK | | 788 | EXTENT_CLEAR_UNLOCK | |
755 | EXTENT_CLEAR_DELALLOC | | 789 | EXTENT_CLEAR_DELALLOC | |
756 | EXTENT_CLEAR_ACCOUNTING | | ||
757 | EXTENT_CLEAR_DIRTY | | 790 | EXTENT_CLEAR_DIRTY | |
758 | EXTENT_SET_WRITEBACK | | 791 | EXTENT_SET_WRITEBACK | |
759 | EXTENT_END_WRITEBACK); | 792 | EXTENT_END_WRITEBACK); |
@@ -769,29 +802,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
769 | BUG_ON(disk_num_bytes > | 802 | BUG_ON(disk_num_bytes > |
770 | btrfs_super_total_bytes(&root->fs_info->super_copy)); | 803 | btrfs_super_total_bytes(&root->fs_info->super_copy)); |
771 | 804 | ||
772 | 805 | alloc_hint = get_extent_allocation_hint(inode, start, num_bytes); | |
773 | read_lock(&BTRFS_I(inode)->extent_tree.lock); | ||
774 | em = search_extent_mapping(&BTRFS_I(inode)->extent_tree, | ||
775 | start, num_bytes); | ||
776 | if (em) { | ||
777 | /* | ||
778 | * if block start isn't an actual block number then find the | ||
779 | * first block in this inode and use that as a hint. If that | ||
780 | * block is also bogus then just don't worry about it. | ||
781 | */ | ||
782 | if (em->block_start >= EXTENT_MAP_LAST_BYTE) { | ||
783 | free_extent_map(em); | ||
784 | em = search_extent_mapping(em_tree, 0, 0); | ||
785 | if (em && em->block_start < EXTENT_MAP_LAST_BYTE) | ||
786 | alloc_hint = em->block_start; | ||
787 | if (em) | ||
788 | free_extent_map(em); | ||
789 | } else { | ||
790 | alloc_hint = em->block_start; | ||
791 | free_extent_map(em); | ||
792 | } | ||
793 | } | ||
794 | read_unlock(&BTRFS_I(inode)->extent_tree.lock); | ||
795 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); | 806 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); |
796 | 807 | ||
797 | while (disk_num_bytes > 0) { | 808 | while (disk_num_bytes > 0) { |
@@ -1174,6 +1185,13 @@ out_check: | |||
1174 | num_bytes, num_bytes, type); | 1185 | num_bytes, num_bytes, type); |
1175 | BUG_ON(ret); | 1186 | BUG_ON(ret); |
1176 | 1187 | ||
1188 | if (root->root_key.objectid == | ||
1189 | BTRFS_DATA_RELOC_TREE_OBJECTID) { | ||
1190 | ret = btrfs_reloc_clone_csums(inode, cur_offset, | ||
1191 | num_bytes); | ||
1192 | BUG_ON(ret); | ||
1193 | } | ||
1194 | |||
1177 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, | 1195 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, |
1178 | cur_offset, cur_offset + num_bytes - 1, | 1196 | cur_offset, cur_offset + num_bytes - 1, |
1179 | locked_page, EXTENT_CLEAR_UNLOCK_PAGE | | 1197 | locked_page, EXTENT_CLEAR_UNLOCK_PAGE | |
@@ -1226,15 +1244,13 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
1226 | } | 1244 | } |
1227 | 1245 | ||
1228 | static int btrfs_split_extent_hook(struct inode *inode, | 1246 | static int btrfs_split_extent_hook(struct inode *inode, |
1229 | struct extent_state *orig, u64 split) | 1247 | struct extent_state *orig, u64 split) |
1230 | { | 1248 | { |
1249 | /* not delalloc, ignore it */ | ||
1231 | if (!(orig->state & EXTENT_DELALLOC)) | 1250 | if (!(orig->state & EXTENT_DELALLOC)) |
1232 | return 0; | 1251 | return 0; |
1233 | 1252 | ||
1234 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 1253 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); |
1235 | BTRFS_I(inode)->outstanding_extents++; | ||
1236 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
1237 | |||
1238 | return 0; | 1254 | return 0; |
1239 | } | 1255 | } |
1240 | 1256 | ||
@@ -1252,10 +1268,7 @@ static int btrfs_merge_extent_hook(struct inode *inode, | |||
1252 | if (!(other->state & EXTENT_DELALLOC)) | 1268 | if (!(other->state & EXTENT_DELALLOC)) |
1253 | return 0; | 1269 | return 0; |
1254 | 1270 | ||
1255 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 1271 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); |
1256 | BTRFS_I(inode)->outstanding_extents--; | ||
1257 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
1258 | |||
1259 | return 0; | 1272 | return 0; |
1260 | } | 1273 | } |
1261 | 1274 | ||
@@ -1264,8 +1277,8 @@ static int btrfs_merge_extent_hook(struct inode *inode, | |||
1264 | * bytes in this file, and to maintain the list of inodes that | 1277 | * bytes in this file, and to maintain the list of inodes that |
1265 | * have pending delalloc work to be done. | 1278 | * have pending delalloc work to be done. |
1266 | */ | 1279 | */ |
1267 | static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | 1280 | static int btrfs_set_bit_hook(struct inode *inode, |
1268 | unsigned long old, unsigned long bits) | 1281 | struct extent_state *state, int *bits) |
1269 | { | 1282 | { |
1270 | 1283 | ||
1271 | /* | 1284 | /* |
@@ -1273,17 +1286,18 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | |||
1273 | * but in this case, we are only testeing for the DELALLOC | 1286 | * but in this case, we are only testeing for the DELALLOC |
1274 | * bit, which is only set or cleared with irqs on | 1287 | * bit, which is only set or cleared with irqs on |
1275 | */ | 1288 | */ |
1276 | if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { | 1289 | if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { |
1277 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1290 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1291 | u64 len = state->end + 1 - state->start; | ||
1278 | 1292 | ||
1279 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 1293 | if (*bits & EXTENT_FIRST_DELALLOC) |
1280 | BTRFS_I(inode)->outstanding_extents++; | 1294 | *bits &= ~EXTENT_FIRST_DELALLOC; |
1281 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | 1295 | else |
1282 | btrfs_delalloc_reserve_space(root, inode, end - start + 1); | 1296 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); |
1283 | 1297 | ||
1284 | spin_lock(&root->fs_info->delalloc_lock); | 1298 | spin_lock(&root->fs_info->delalloc_lock); |
1285 | BTRFS_I(inode)->delalloc_bytes += end - start + 1; | 1299 | BTRFS_I(inode)->delalloc_bytes += len; |
1286 | root->fs_info->delalloc_bytes += end - start + 1; | 1300 | root->fs_info->delalloc_bytes += len; |
1287 | if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | 1301 | if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { |
1288 | list_add_tail(&BTRFS_I(inode)->delalloc_inodes, | 1302 | list_add_tail(&BTRFS_I(inode)->delalloc_inodes, |
1289 | &root->fs_info->delalloc_inodes); | 1303 | &root->fs_info->delalloc_inodes); |
@@ -1297,45 +1311,32 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | |||
1297 | * extent_io.c clear_bit_hook, see set_bit_hook for why | 1311 | * extent_io.c clear_bit_hook, see set_bit_hook for why |
1298 | */ | 1312 | */ |
1299 | static int btrfs_clear_bit_hook(struct inode *inode, | 1313 | static int btrfs_clear_bit_hook(struct inode *inode, |
1300 | struct extent_state *state, unsigned long bits) | 1314 | struct extent_state *state, int *bits) |
1301 | { | 1315 | { |
1302 | /* | 1316 | /* |
1303 | * set_bit and clear bit hooks normally require _irqsave/restore | 1317 | * set_bit and clear bit hooks normally require _irqsave/restore |
1304 | * but in this case, we are only testeing for the DELALLOC | 1318 | * but in this case, we are only testeing for the DELALLOC |
1305 | * bit, which is only set or cleared with irqs on | 1319 | * bit, which is only set or cleared with irqs on |
1306 | */ | 1320 | */ |
1307 | if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { | 1321 | if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { |
1308 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1322 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1323 | u64 len = state->end + 1 - state->start; | ||
1309 | 1324 | ||
1310 | if (bits & EXTENT_DO_ACCOUNTING) { | 1325 | if (*bits & EXTENT_FIRST_DELALLOC) |
1311 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 1326 | *bits &= ~EXTENT_FIRST_DELALLOC; |
1312 | WARN_ON(!BTRFS_I(inode)->outstanding_extents); | 1327 | else if (!(*bits & EXTENT_DO_ACCOUNTING)) |
1313 | BTRFS_I(inode)->outstanding_extents--; | 1328 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); |
1314 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | 1329 | |
1315 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | 1330 | if (*bits & EXTENT_DO_ACCOUNTING) |
1316 | } | 1331 | btrfs_delalloc_release_metadata(inode, len); |
1332 | |||
1333 | if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) | ||
1334 | btrfs_free_reserved_data_space(inode, len); | ||
1317 | 1335 | ||
1318 | spin_lock(&root->fs_info->delalloc_lock); | 1336 | spin_lock(&root->fs_info->delalloc_lock); |
1319 | if (state->end - state->start + 1 > | 1337 | root->fs_info->delalloc_bytes -= len; |
1320 | root->fs_info->delalloc_bytes) { | 1338 | BTRFS_I(inode)->delalloc_bytes -= len; |
1321 | printk(KERN_INFO "btrfs warning: delalloc account " | 1339 | |
1322 | "%llu %llu\n", | ||
1323 | (unsigned long long) | ||
1324 | state->end - state->start + 1, | ||
1325 | (unsigned long long) | ||
1326 | root->fs_info->delalloc_bytes); | ||
1327 | btrfs_delalloc_free_space(root, inode, (u64)-1); | ||
1328 | root->fs_info->delalloc_bytes = 0; | ||
1329 | BTRFS_I(inode)->delalloc_bytes = 0; | ||
1330 | } else { | ||
1331 | btrfs_delalloc_free_space(root, inode, | ||
1332 | state->end - | ||
1333 | state->start + 1); | ||
1334 | root->fs_info->delalloc_bytes -= state->end - | ||
1335 | state->start + 1; | ||
1336 | BTRFS_I(inode)->delalloc_bytes -= state->end - | ||
1337 | state->start + 1; | ||
1338 | } | ||
1339 | if (BTRFS_I(inode)->delalloc_bytes == 0 && | 1340 | if (BTRFS_I(inode)->delalloc_bytes == 0 && |
1340 | !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | 1341 | !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { |
1341 | list_del_init(&BTRFS_I(inode)->delalloc_inodes); | 1342 | list_del_init(&BTRFS_I(inode)->delalloc_inodes); |
@@ -1384,7 +1385,8 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | |||
1384 | */ | 1385 | */ |
1385 | static int __btrfs_submit_bio_start(struct inode *inode, int rw, | 1386 | static int __btrfs_submit_bio_start(struct inode *inode, int rw, |
1386 | struct bio *bio, int mirror_num, | 1387 | struct bio *bio, int mirror_num, |
1387 | unsigned long bio_flags) | 1388 | unsigned long bio_flags, |
1389 | u64 bio_offset) | ||
1388 | { | 1390 | { |
1389 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1391 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1390 | int ret = 0; | 1392 | int ret = 0; |
@@ -1403,7 +1405,8 @@ static int __btrfs_submit_bio_start(struct inode *inode, int rw, | |||
1403 | * are inserted into the btree | 1405 | * are inserted into the btree |
1404 | */ | 1406 | */ |
1405 | static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | 1407 | static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, |
1406 | int mirror_num, unsigned long bio_flags) | 1408 | int mirror_num, unsigned long bio_flags, |
1409 | u64 bio_offset) | ||
1407 | { | 1410 | { |
1408 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1411 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1409 | return btrfs_map_bio(root, rw, bio, mirror_num, 1); | 1412 | return btrfs_map_bio(root, rw, bio, mirror_num, 1); |
@@ -1414,7 +1417,8 @@ static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | |||
1414 | * on write, or reading the csums from the tree before a read | 1417 | * on write, or reading the csums from the tree before a read |
1415 | */ | 1418 | */ |
1416 | static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | 1419 | static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, |
1417 | int mirror_num, unsigned long bio_flags) | 1420 | int mirror_num, unsigned long bio_flags, |
1421 | u64 bio_offset) | ||
1418 | { | 1422 | { |
1419 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1423 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1420 | int ret = 0; | 1424 | int ret = 0; |
@@ -1439,7 +1443,8 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
1439 | /* we're doing a write, do the async checksumming */ | 1443 | /* we're doing a write, do the async checksumming */ |
1440 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | 1444 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, |
1441 | inode, rw, bio, mirror_num, | 1445 | inode, rw, bio, mirror_num, |
1442 | bio_flags, __btrfs_submit_bio_start, | 1446 | bio_flags, bio_offset, |
1447 | __btrfs_submit_bio_start, | ||
1443 | __btrfs_submit_bio_done); | 1448 | __btrfs_submit_bio_done); |
1444 | } | 1449 | } |
1445 | 1450 | ||
@@ -1520,6 +1525,7 @@ again: | |||
1520 | goto again; | 1525 | goto again; |
1521 | } | 1526 | } |
1522 | 1527 | ||
1528 | BUG(); | ||
1523 | btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state); | 1529 | btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state); |
1524 | ClearPageChecked(page); | 1530 | ClearPageChecked(page); |
1525 | out: | 1531 | out: |
@@ -1650,7 +1656,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
1650 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | 1656 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) |
1651 | { | 1657 | { |
1652 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1658 | struct btrfs_root *root = BTRFS_I(inode)->root; |
1653 | struct btrfs_trans_handle *trans; | 1659 | struct btrfs_trans_handle *trans = NULL; |
1654 | struct btrfs_ordered_extent *ordered_extent = NULL; | 1660 | struct btrfs_ordered_extent *ordered_extent = NULL; |
1655 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 1661 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
1656 | struct extent_state *cached_state = NULL; | 1662 | struct extent_state *cached_state = NULL; |
@@ -1668,9 +1674,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1668 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1674 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
1669 | if (!ret) { | 1675 | if (!ret) { |
1670 | trans = btrfs_join_transaction(root, 1); | 1676 | trans = btrfs_join_transaction(root, 1); |
1677 | btrfs_set_trans_block_group(trans, inode); | ||
1678 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
1671 | ret = btrfs_update_inode(trans, root, inode); | 1679 | ret = btrfs_update_inode(trans, root, inode); |
1672 | BUG_ON(ret); | 1680 | BUG_ON(ret); |
1673 | btrfs_end_transaction(trans, root); | ||
1674 | } | 1681 | } |
1675 | goto out; | 1682 | goto out; |
1676 | } | 1683 | } |
@@ -1680,6 +1687,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1680 | 0, &cached_state, GFP_NOFS); | 1687 | 0, &cached_state, GFP_NOFS); |
1681 | 1688 | ||
1682 | trans = btrfs_join_transaction(root, 1); | 1689 | trans = btrfs_join_transaction(root, 1); |
1690 | btrfs_set_trans_block_group(trans, inode); | ||
1691 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
1683 | 1692 | ||
1684 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) | 1693 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) |
1685 | compressed = 1; | 1694 | compressed = 1; |
@@ -1711,12 +1720,13 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
1711 | add_pending_csums(trans, inode, ordered_extent->file_offset, | 1720 | add_pending_csums(trans, inode, ordered_extent->file_offset, |
1712 | &ordered_extent->list); | 1721 | &ordered_extent->list); |
1713 | 1722 | ||
1714 | /* this also removes the ordered extent from the tree */ | ||
1715 | btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1723 | btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
1716 | ret = btrfs_update_inode(trans, root, inode); | 1724 | ret = btrfs_update_inode(trans, root, inode); |
1717 | BUG_ON(ret); | 1725 | BUG_ON(ret); |
1718 | btrfs_end_transaction(trans, root); | ||
1719 | out: | 1726 | out: |
1727 | btrfs_delalloc_release_metadata(inode, ordered_extent->len); | ||
1728 | if (trans) | ||
1729 | btrfs_end_transaction(trans, root); | ||
1720 | /* once for us */ | 1730 | /* once for us */ |
1721 | btrfs_put_ordered_extent(ordered_extent); | 1731 | btrfs_put_ordered_extent(ordered_extent); |
1722 | /* once for the tree */ | 1732 | /* once for the tree */ |
@@ -1838,7 +1848,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, | |||
1838 | 1848 | ||
1839 | BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, | 1849 | BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, |
1840 | failrec->last_mirror, | 1850 | failrec->last_mirror, |
1841 | failrec->bio_flags); | 1851 | failrec->bio_flags, 0); |
1842 | return 0; | 1852 | return 0; |
1843 | } | 1853 | } |
1844 | 1854 | ||
@@ -1993,32 +2003,196 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) | |||
1993 | } | 2003 | } |
1994 | 2004 | ||
1995 | /* | 2005 | /* |
2006 | * calculate extra metadata reservation when snapshotting a subvolume | ||
2007 | * contains orphan files. | ||
2008 | */ | ||
2009 | void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, | ||
2010 | struct btrfs_pending_snapshot *pending, | ||
2011 | u64 *bytes_to_reserve) | ||
2012 | { | ||
2013 | struct btrfs_root *root; | ||
2014 | struct btrfs_block_rsv *block_rsv; | ||
2015 | u64 num_bytes; | ||
2016 | int index; | ||
2017 | |||
2018 | root = pending->root; | ||
2019 | if (!root->orphan_block_rsv || list_empty(&root->orphan_list)) | ||
2020 | return; | ||
2021 | |||
2022 | block_rsv = root->orphan_block_rsv; | ||
2023 | |||
2024 | /* orphan block reservation for the snapshot */ | ||
2025 | num_bytes = block_rsv->size; | ||
2026 | |||
2027 | /* | ||
2028 | * after the snapshot is created, COWing tree blocks may use more | ||
2029 | * space than it frees. So we should make sure there is enough | ||
2030 | * reserved space. | ||
2031 | */ | ||
2032 | index = trans->transid & 0x1; | ||
2033 | if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) { | ||
2034 | num_bytes += block_rsv->size - | ||
2035 | (block_rsv->reserved + block_rsv->freed[index]); | ||
2036 | } | ||
2037 | |||
2038 | *bytes_to_reserve += num_bytes; | ||
2039 | } | ||
2040 | |||
2041 | void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, | ||
2042 | struct btrfs_pending_snapshot *pending) | ||
2043 | { | ||
2044 | struct btrfs_root *root = pending->root; | ||
2045 | struct btrfs_root *snap = pending->snap; | ||
2046 | struct btrfs_block_rsv *block_rsv; | ||
2047 | u64 num_bytes; | ||
2048 | int index; | ||
2049 | int ret; | ||
2050 | |||
2051 | if (!root->orphan_block_rsv || list_empty(&root->orphan_list)) | ||
2052 | return; | ||
2053 | |||
2054 | /* refill source subvolume's orphan block reservation */ | ||
2055 | block_rsv = root->orphan_block_rsv; | ||
2056 | index = trans->transid & 0x1; | ||
2057 | if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) { | ||
2058 | num_bytes = block_rsv->size - | ||
2059 | (block_rsv->reserved + block_rsv->freed[index]); | ||
2060 | ret = btrfs_block_rsv_migrate(&pending->block_rsv, | ||
2061 | root->orphan_block_rsv, | ||
2062 | num_bytes); | ||
2063 | BUG_ON(ret); | ||
2064 | } | ||
2065 | |||
2066 | /* setup orphan block reservation for the snapshot */ | ||
2067 | block_rsv = btrfs_alloc_block_rsv(snap); | ||
2068 | BUG_ON(!block_rsv); | ||
2069 | |||
2070 | btrfs_add_durable_block_rsv(root->fs_info, block_rsv); | ||
2071 | snap->orphan_block_rsv = block_rsv; | ||
2072 | |||
2073 | num_bytes = root->orphan_block_rsv->size; | ||
2074 | ret = btrfs_block_rsv_migrate(&pending->block_rsv, | ||
2075 | block_rsv, num_bytes); | ||
2076 | BUG_ON(ret); | ||
2077 | |||
2078 | #if 0 | ||
2079 | /* insert orphan item for the snapshot */ | ||
2080 | WARN_ON(!root->orphan_item_inserted); | ||
2081 | ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root, | ||
2082 | snap->root_key.objectid); | ||
2083 | BUG_ON(ret); | ||
2084 | snap->orphan_item_inserted = 1; | ||
2085 | #endif | ||
2086 | } | ||
2087 | |||
2088 | enum btrfs_orphan_cleanup_state { | ||
2089 | ORPHAN_CLEANUP_STARTED = 1, | ||
2090 | ORPHAN_CLEANUP_DONE = 2, | ||
2091 | }; | ||
2092 | |||
2093 | /* | ||
2094 | * This is called in transaction commmit time. If there are no orphan | ||
2095 | * files in the subvolume, it removes orphan item and frees block_rsv | ||
2096 | * structure. | ||
2097 | */ | ||
2098 | void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, | ||
2099 | struct btrfs_root *root) | ||
2100 | { | ||
2101 | int ret; | ||
2102 | |||
2103 | if (!list_empty(&root->orphan_list) || | ||
2104 | root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) | ||
2105 | return; | ||
2106 | |||
2107 | if (root->orphan_item_inserted && | ||
2108 | btrfs_root_refs(&root->root_item) > 0) { | ||
2109 | ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root, | ||
2110 | root->root_key.objectid); | ||
2111 | BUG_ON(ret); | ||
2112 | root->orphan_item_inserted = 0; | ||
2113 | } | ||
2114 | |||
2115 | if (root->orphan_block_rsv) { | ||
2116 | WARN_ON(root->orphan_block_rsv->size > 0); | ||
2117 | btrfs_free_block_rsv(root, root->orphan_block_rsv); | ||
2118 | root->orphan_block_rsv = NULL; | ||
2119 | } | ||
2120 | } | ||
2121 | |||
2122 | /* | ||
1996 | * This creates an orphan entry for the given inode in case something goes | 2123 | * This creates an orphan entry for the given inode in case something goes |
1997 | * wrong in the middle of an unlink/truncate. | 2124 | * wrong in the middle of an unlink/truncate. |
2125 | * | ||
2126 | * NOTE: caller of this function should reserve 5 units of metadata for | ||
2127 | * this function. | ||
1998 | */ | 2128 | */ |
1999 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | 2129 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) |
2000 | { | 2130 | { |
2001 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2131 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2002 | int ret = 0; | 2132 | struct btrfs_block_rsv *block_rsv = NULL; |
2133 | int reserve = 0; | ||
2134 | int insert = 0; | ||
2135 | int ret; | ||
2003 | 2136 | ||
2004 | spin_lock(&root->list_lock); | 2137 | if (!root->orphan_block_rsv) { |
2138 | block_rsv = btrfs_alloc_block_rsv(root); | ||
2139 | BUG_ON(!block_rsv); | ||
2140 | } | ||
2005 | 2141 | ||
2006 | /* already on the orphan list, we're good */ | 2142 | spin_lock(&root->orphan_lock); |
2007 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | 2143 | if (!root->orphan_block_rsv) { |
2008 | spin_unlock(&root->list_lock); | 2144 | root->orphan_block_rsv = block_rsv; |
2009 | return 0; | 2145 | } else if (block_rsv) { |
2146 | btrfs_free_block_rsv(root, block_rsv); | ||
2147 | block_rsv = NULL; | ||
2148 | } | ||
2149 | |||
2150 | if (list_empty(&BTRFS_I(inode)->i_orphan)) { | ||
2151 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | ||
2152 | #if 0 | ||
2153 | /* | ||
2154 | * For proper ENOSPC handling, we should do orphan | ||
2155 | * cleanup when mounting. But this introduces backward | ||
2156 | * compatibility issue. | ||
2157 | */ | ||
2158 | if (!xchg(&root->orphan_item_inserted, 1)) | ||
2159 | insert = 2; | ||
2160 | else | ||
2161 | insert = 1; | ||
2162 | #endif | ||
2163 | insert = 1; | ||
2164 | } else { | ||
2165 | WARN_ON(!BTRFS_I(inode)->orphan_meta_reserved); | ||
2010 | } | 2166 | } |
2011 | 2167 | ||
2012 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | 2168 | if (!BTRFS_I(inode)->orphan_meta_reserved) { |
2169 | BTRFS_I(inode)->orphan_meta_reserved = 1; | ||
2170 | reserve = 1; | ||
2171 | } | ||
2172 | spin_unlock(&root->orphan_lock); | ||
2013 | 2173 | ||
2014 | spin_unlock(&root->list_lock); | 2174 | if (block_rsv) |
2175 | btrfs_add_durable_block_rsv(root->fs_info, block_rsv); | ||
2015 | 2176 | ||
2016 | /* | 2177 | /* grab metadata reservation from transaction handle */ |
2017 | * insert an orphan item to track this unlinked/truncated file | 2178 | if (reserve) { |
2018 | */ | 2179 | ret = btrfs_orphan_reserve_metadata(trans, inode); |
2019 | ret = btrfs_insert_orphan_item(trans, root, inode->i_ino); | 2180 | BUG_ON(ret); |
2181 | } | ||
2020 | 2182 | ||
2021 | return ret; | 2183 | /* insert an orphan item to track this unlinked/truncated file */ |
2184 | if (insert >= 1) { | ||
2185 | ret = btrfs_insert_orphan_item(trans, root, inode->i_ino); | ||
2186 | BUG_ON(ret); | ||
2187 | } | ||
2188 | |||
2189 | /* insert an orphan item to track subvolume contains orphan files */ | ||
2190 | if (insert >= 2) { | ||
2191 | ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root, | ||
2192 | root->root_key.objectid); | ||
2193 | BUG_ON(ret); | ||
2194 | } | ||
2195 | return 0; | ||
2022 | } | 2196 | } |
2023 | 2197 | ||
2024 | /* | 2198 | /* |
@@ -2028,26 +2202,31 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
2028 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) | 2202 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) |
2029 | { | 2203 | { |
2030 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2204 | struct btrfs_root *root = BTRFS_I(inode)->root; |
2205 | int delete_item = 0; | ||
2206 | int release_rsv = 0; | ||
2031 | int ret = 0; | 2207 | int ret = 0; |
2032 | 2208 | ||
2033 | spin_lock(&root->list_lock); | 2209 | spin_lock(&root->orphan_lock); |
2034 | 2210 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | |
2035 | if (list_empty(&BTRFS_I(inode)->i_orphan)) { | 2211 | list_del_init(&BTRFS_I(inode)->i_orphan); |
2036 | spin_unlock(&root->list_lock); | 2212 | delete_item = 1; |
2037 | return 0; | ||
2038 | } | 2213 | } |
2039 | 2214 | ||
2040 | list_del_init(&BTRFS_I(inode)->i_orphan); | 2215 | if (BTRFS_I(inode)->orphan_meta_reserved) { |
2041 | if (!trans) { | 2216 | BTRFS_I(inode)->orphan_meta_reserved = 0; |
2042 | spin_unlock(&root->list_lock); | 2217 | release_rsv = 1; |
2043 | return 0; | ||
2044 | } | 2218 | } |
2219 | spin_unlock(&root->orphan_lock); | ||
2045 | 2220 | ||
2046 | spin_unlock(&root->list_lock); | 2221 | if (trans && delete_item) { |
2222 | ret = btrfs_del_orphan_item(trans, root, inode->i_ino); | ||
2223 | BUG_ON(ret); | ||
2224 | } | ||
2047 | 2225 | ||
2048 | ret = btrfs_del_orphan_item(trans, root, inode->i_ino); | 2226 | if (release_rsv) |
2227 | btrfs_orphan_release_metadata(inode); | ||
2049 | 2228 | ||
2050 | return ret; | 2229 | return 0; |
2051 | } | 2230 | } |
2052 | 2231 | ||
2053 | /* | 2232 | /* |
@@ -2064,7 +2243,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2064 | struct inode *inode; | 2243 | struct inode *inode; |
2065 | int ret = 0, nr_unlink = 0, nr_truncate = 0; | 2244 | int ret = 0, nr_unlink = 0, nr_truncate = 0; |
2066 | 2245 | ||
2067 | if (!xchg(&root->clean_orphans, 0)) | 2246 | if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) |
2068 | return; | 2247 | return; |
2069 | 2248 | ||
2070 | path = btrfs_alloc_path(); | 2249 | path = btrfs_alloc_path(); |
@@ -2117,16 +2296,15 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2117 | found_key.type = BTRFS_INODE_ITEM_KEY; | 2296 | found_key.type = BTRFS_INODE_ITEM_KEY; |
2118 | found_key.offset = 0; | 2297 | found_key.offset = 0; |
2119 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); | 2298 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); |
2120 | if (IS_ERR(inode)) | 2299 | BUG_ON(IS_ERR(inode)); |
2121 | break; | ||
2122 | 2300 | ||
2123 | /* | 2301 | /* |
2124 | * add this inode to the orphan list so btrfs_orphan_del does | 2302 | * add this inode to the orphan list so btrfs_orphan_del does |
2125 | * the proper thing when we hit it | 2303 | * the proper thing when we hit it |
2126 | */ | 2304 | */ |
2127 | spin_lock(&root->list_lock); | 2305 | spin_lock(&root->orphan_lock); |
2128 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | 2306 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); |
2129 | spin_unlock(&root->list_lock); | 2307 | spin_unlock(&root->orphan_lock); |
2130 | 2308 | ||
2131 | /* | 2309 | /* |
2132 | * if this is a bad inode, means we actually succeeded in | 2310 | * if this is a bad inode, means we actually succeeded in |
@@ -2135,7 +2313,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2135 | * do a destroy_inode | 2313 | * do a destroy_inode |
2136 | */ | 2314 | */ |
2137 | if (is_bad_inode(inode)) { | 2315 | if (is_bad_inode(inode)) { |
2138 | trans = btrfs_start_transaction(root, 1); | 2316 | trans = btrfs_start_transaction(root, 0); |
2139 | btrfs_orphan_del(trans, inode); | 2317 | btrfs_orphan_del(trans, inode); |
2140 | btrfs_end_transaction(trans, root); | 2318 | btrfs_end_transaction(trans, root); |
2141 | iput(inode); | 2319 | iput(inode); |
@@ -2153,13 +2331,23 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
2153 | /* this will do delete_inode and everything for us */ | 2331 | /* this will do delete_inode and everything for us */ |
2154 | iput(inode); | 2332 | iput(inode); |
2155 | } | 2333 | } |
2334 | btrfs_free_path(path); | ||
2335 | |||
2336 | root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE; | ||
2337 | |||
2338 | if (root->orphan_block_rsv) | ||
2339 | btrfs_block_rsv_release(root, root->orphan_block_rsv, | ||
2340 | (u64)-1); | ||
2341 | |||
2342 | if (root->orphan_block_rsv || root->orphan_item_inserted) { | ||
2343 | trans = btrfs_join_transaction(root, 1); | ||
2344 | btrfs_end_transaction(trans, root); | ||
2345 | } | ||
2156 | 2346 | ||
2157 | if (nr_unlink) | 2347 | if (nr_unlink) |
2158 | printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); | 2348 | printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); |
2159 | if (nr_truncate) | 2349 | if (nr_truncate) |
2160 | printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); | 2350 | printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); |
2161 | |||
2162 | btrfs_free_path(path); | ||
2163 | } | 2351 | } |
2164 | 2352 | ||
2165 | /* | 2353 | /* |
@@ -2478,29 +2666,201 @@ out: | |||
2478 | return ret; | 2666 | return ret; |
2479 | } | 2667 | } |
2480 | 2668 | ||
2481 | static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | 2669 | /* helper to check if there is any shared block in the path */ |
2670 | static int check_path_shared(struct btrfs_root *root, | ||
2671 | struct btrfs_path *path) | ||
2672 | { | ||
2673 | struct extent_buffer *eb; | ||
2674 | int level; | ||
2675 | int ret; | ||
2676 | u64 refs; | ||
2677 | |||
2678 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { | ||
2679 | if (!path->nodes[level]) | ||
2680 | break; | ||
2681 | eb = path->nodes[level]; | ||
2682 | if (!btrfs_block_can_be_shared(root, eb)) | ||
2683 | continue; | ||
2684 | ret = btrfs_lookup_extent_info(NULL, root, eb->start, eb->len, | ||
2685 | &refs, NULL); | ||
2686 | if (refs > 1) | ||
2687 | return 1; | ||
2688 | } | ||
2689 | return 0; | ||
2690 | } | ||
2691 | |||
2692 | /* | ||
2693 | * helper to start transaction for unlink and rmdir. | ||
2694 | * | ||
2695 | * unlink and rmdir are special in btrfs, they do not always free space. | ||
2696 | * so in enospc case, we should make sure they will free space before | ||
2697 | * allowing them to use the global metadata reservation. | ||
2698 | */ | ||
2699 | static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | ||
2700 | struct dentry *dentry) | ||
2482 | { | 2701 | { |
2483 | struct btrfs_root *root; | ||
2484 | struct btrfs_trans_handle *trans; | 2702 | struct btrfs_trans_handle *trans; |
2703 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
2704 | struct btrfs_path *path; | ||
2705 | struct btrfs_inode_ref *ref; | ||
2706 | struct btrfs_dir_item *di; | ||
2485 | struct inode *inode = dentry->d_inode; | 2707 | struct inode *inode = dentry->d_inode; |
2708 | u64 index; | ||
2709 | int check_link = 1; | ||
2710 | int err = -ENOSPC; | ||
2486 | int ret; | 2711 | int ret; |
2487 | unsigned long nr = 0; | ||
2488 | 2712 | ||
2489 | root = BTRFS_I(dir)->root; | 2713 | trans = btrfs_start_transaction(root, 10); |
2714 | if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) | ||
2715 | return trans; | ||
2490 | 2716 | ||
2491 | /* | 2717 | if (inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) |
2492 | * 5 items for unlink inode | 2718 | return ERR_PTR(-ENOSPC); |
2493 | * 1 for orphan | 2719 | |
2494 | */ | 2720 | /* check if there is someone else holds reference */ |
2495 | ret = btrfs_reserve_metadata_space(root, 6); | 2721 | if (S_ISDIR(inode->i_mode) && atomic_read(&inode->i_count) > 1) |
2496 | if (ret) | 2722 | return ERR_PTR(-ENOSPC); |
2497 | return ret; | 2723 | |
2724 | if (atomic_read(&inode->i_count) > 2) | ||
2725 | return ERR_PTR(-ENOSPC); | ||
2726 | |||
2727 | if (xchg(&root->fs_info->enospc_unlink, 1)) | ||
2728 | return ERR_PTR(-ENOSPC); | ||
2498 | 2729 | ||
2499 | trans = btrfs_start_transaction(root, 1); | 2730 | path = btrfs_alloc_path(); |
2731 | if (!path) { | ||
2732 | root->fs_info->enospc_unlink = 0; | ||
2733 | return ERR_PTR(-ENOMEM); | ||
2734 | } | ||
2735 | |||
2736 | trans = btrfs_start_transaction(root, 0); | ||
2500 | if (IS_ERR(trans)) { | 2737 | if (IS_ERR(trans)) { |
2501 | btrfs_unreserve_metadata_space(root, 6); | 2738 | btrfs_free_path(path); |
2502 | return PTR_ERR(trans); | 2739 | root->fs_info->enospc_unlink = 0; |
2740 | return trans; | ||
2741 | } | ||
2742 | |||
2743 | path->skip_locking = 1; | ||
2744 | path->search_commit_root = 1; | ||
2745 | |||
2746 | ret = btrfs_lookup_inode(trans, root, path, | ||
2747 | &BTRFS_I(dir)->location, 0); | ||
2748 | if (ret < 0) { | ||
2749 | err = ret; | ||
2750 | goto out; | ||
2751 | } | ||
2752 | if (ret == 0) { | ||
2753 | if (check_path_shared(root, path)) | ||
2754 | goto out; | ||
2755 | } else { | ||
2756 | check_link = 0; | ||
2757 | } | ||
2758 | btrfs_release_path(root, path); | ||
2759 | |||
2760 | ret = btrfs_lookup_inode(trans, root, path, | ||
2761 | &BTRFS_I(inode)->location, 0); | ||
2762 | if (ret < 0) { | ||
2763 | err = ret; | ||
2764 | goto out; | ||
2765 | } | ||
2766 | if (ret == 0) { | ||
2767 | if (check_path_shared(root, path)) | ||
2768 | goto out; | ||
2769 | } else { | ||
2770 | check_link = 0; | ||
2771 | } | ||
2772 | btrfs_release_path(root, path); | ||
2773 | |||
2774 | if (ret == 0 && S_ISREG(inode->i_mode)) { | ||
2775 | ret = btrfs_lookup_file_extent(trans, root, path, | ||
2776 | inode->i_ino, (u64)-1, 0); | ||
2777 | if (ret < 0) { | ||
2778 | err = ret; | ||
2779 | goto out; | ||
2780 | } | ||
2781 | BUG_ON(ret == 0); | ||
2782 | if (check_path_shared(root, path)) | ||
2783 | goto out; | ||
2784 | btrfs_release_path(root, path); | ||
2785 | } | ||
2786 | |||
2787 | if (!check_link) { | ||
2788 | err = 0; | ||
2789 | goto out; | ||
2790 | } | ||
2791 | |||
2792 | di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, | ||
2793 | dentry->d_name.name, dentry->d_name.len, 0); | ||
2794 | if (IS_ERR(di)) { | ||
2795 | err = PTR_ERR(di); | ||
2796 | goto out; | ||
2797 | } | ||
2798 | if (di) { | ||
2799 | if (check_path_shared(root, path)) | ||
2800 | goto out; | ||
2801 | } else { | ||
2802 | err = 0; | ||
2803 | goto out; | ||
2503 | } | 2804 | } |
2805 | btrfs_release_path(root, path); | ||
2806 | |||
2807 | ref = btrfs_lookup_inode_ref(trans, root, path, | ||
2808 | dentry->d_name.name, dentry->d_name.len, | ||
2809 | inode->i_ino, dir->i_ino, 0); | ||
2810 | if (IS_ERR(ref)) { | ||
2811 | err = PTR_ERR(ref); | ||
2812 | goto out; | ||
2813 | } | ||
2814 | BUG_ON(!ref); | ||
2815 | if (check_path_shared(root, path)) | ||
2816 | goto out; | ||
2817 | index = btrfs_inode_ref_index(path->nodes[0], ref); | ||
2818 | btrfs_release_path(root, path); | ||
2819 | |||
2820 | di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, index, | ||
2821 | dentry->d_name.name, dentry->d_name.len, 0); | ||
2822 | if (IS_ERR(di)) { | ||
2823 | err = PTR_ERR(di); | ||
2824 | goto out; | ||
2825 | } | ||
2826 | BUG_ON(ret == -ENOENT); | ||
2827 | if (check_path_shared(root, path)) | ||
2828 | goto out; | ||
2829 | |||
2830 | err = 0; | ||
2831 | out: | ||
2832 | btrfs_free_path(path); | ||
2833 | if (err) { | ||
2834 | btrfs_end_transaction(trans, root); | ||
2835 | root->fs_info->enospc_unlink = 0; | ||
2836 | return ERR_PTR(err); | ||
2837 | } | ||
2838 | |||
2839 | trans->block_rsv = &root->fs_info->global_block_rsv; | ||
2840 | return trans; | ||
2841 | } | ||
2842 | |||
2843 | static void __unlink_end_trans(struct btrfs_trans_handle *trans, | ||
2844 | struct btrfs_root *root) | ||
2845 | { | ||
2846 | if (trans->block_rsv == &root->fs_info->global_block_rsv) { | ||
2847 | BUG_ON(!root->fs_info->enospc_unlink); | ||
2848 | root->fs_info->enospc_unlink = 0; | ||
2849 | } | ||
2850 | btrfs_end_transaction_throttle(trans, root); | ||
2851 | } | ||
2852 | |||
2853 | static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | ||
2854 | { | ||
2855 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
2856 | struct btrfs_trans_handle *trans; | ||
2857 | struct inode *inode = dentry->d_inode; | ||
2858 | int ret; | ||
2859 | unsigned long nr = 0; | ||
2860 | |||
2861 | trans = __unlink_start_trans(dir, dentry); | ||
2862 | if (IS_ERR(trans)) | ||
2863 | return PTR_ERR(trans); | ||
2504 | 2864 | ||
2505 | btrfs_set_trans_block_group(trans, dir); | 2865 | btrfs_set_trans_block_group(trans, dir); |
2506 | 2866 | ||
@@ -2508,14 +2868,15 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
2508 | 2868 | ||
2509 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, | 2869 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, |
2510 | dentry->d_name.name, dentry->d_name.len); | 2870 | dentry->d_name.name, dentry->d_name.len); |
2871 | BUG_ON(ret); | ||
2511 | 2872 | ||
2512 | if (inode->i_nlink == 0) | 2873 | if (inode->i_nlink == 0) { |
2513 | ret = btrfs_orphan_add(trans, inode); | 2874 | ret = btrfs_orphan_add(trans, inode); |
2875 | BUG_ON(ret); | ||
2876 | } | ||
2514 | 2877 | ||
2515 | nr = trans->blocks_used; | 2878 | nr = trans->blocks_used; |
2516 | 2879 | __unlink_end_trans(trans, root); | |
2517 | btrfs_end_transaction_throttle(trans, root); | ||
2518 | btrfs_unreserve_metadata_space(root, 6); | ||
2519 | btrfs_btree_balance_dirty(root, nr); | 2880 | btrfs_btree_balance_dirty(root, nr); |
2520 | return ret; | 2881 | return ret; |
2521 | } | 2882 | } |
@@ -2587,7 +2948,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
2587 | { | 2948 | { |
2588 | struct inode *inode = dentry->d_inode; | 2949 | struct inode *inode = dentry->d_inode; |
2589 | int err = 0; | 2950 | int err = 0; |
2590 | int ret; | ||
2591 | struct btrfs_root *root = BTRFS_I(dir)->root; | 2951 | struct btrfs_root *root = BTRFS_I(dir)->root; |
2592 | struct btrfs_trans_handle *trans; | 2952 | struct btrfs_trans_handle *trans; |
2593 | unsigned long nr = 0; | 2953 | unsigned long nr = 0; |
@@ -2596,15 +2956,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
2596 | inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 2956 | inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) |
2597 | return -ENOTEMPTY; | 2957 | return -ENOTEMPTY; |
2598 | 2958 | ||
2599 | ret = btrfs_reserve_metadata_space(root, 5); | 2959 | trans = __unlink_start_trans(dir, dentry); |
2600 | if (ret) | 2960 | if (IS_ERR(trans)) |
2601 | return ret; | ||
2602 | |||
2603 | trans = btrfs_start_transaction(root, 1); | ||
2604 | if (IS_ERR(trans)) { | ||
2605 | btrfs_unreserve_metadata_space(root, 5); | ||
2606 | return PTR_ERR(trans); | 2961 | return PTR_ERR(trans); |
2607 | } | ||
2608 | 2962 | ||
2609 | btrfs_set_trans_block_group(trans, dir); | 2963 | btrfs_set_trans_block_group(trans, dir); |
2610 | 2964 | ||
@@ -2627,12 +2981,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
2627 | btrfs_i_size_write(inode, 0); | 2981 | btrfs_i_size_write(inode, 0); |
2628 | out: | 2982 | out: |
2629 | nr = trans->blocks_used; | 2983 | nr = trans->blocks_used; |
2630 | ret = btrfs_end_transaction_throttle(trans, root); | 2984 | __unlink_end_trans(trans, root); |
2631 | btrfs_unreserve_metadata_space(root, 5); | ||
2632 | btrfs_btree_balance_dirty(root, nr); | 2985 | btrfs_btree_balance_dirty(root, nr); |
2633 | 2986 | ||
2634 | if (ret && !err) | ||
2635 | err = ret; | ||
2636 | return err; | 2987 | return err; |
2637 | } | 2988 | } |
2638 | 2989 | ||
@@ -3029,6 +3380,7 @@ out: | |||
3029 | if (pending_del_nr) { | 3380 | if (pending_del_nr) { |
3030 | ret = btrfs_del_items(trans, root, path, pending_del_slot, | 3381 | ret = btrfs_del_items(trans, root, path, pending_del_slot, |
3031 | pending_del_nr); | 3382 | pending_del_nr); |
3383 | BUG_ON(ret); | ||
3032 | } | 3384 | } |
3033 | btrfs_free_path(path); | 3385 | btrfs_free_path(path); |
3034 | return err; | 3386 | return err; |
@@ -3056,11 +3408,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
3056 | 3408 | ||
3057 | if ((offset & (blocksize - 1)) == 0) | 3409 | if ((offset & (blocksize - 1)) == 0) |
3058 | goto out; | 3410 | goto out; |
3059 | ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); | 3411 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
3060 | if (ret) | ||
3061 | goto out; | ||
3062 | |||
3063 | ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
3064 | if (ret) | 3412 | if (ret) |
3065 | goto out; | 3413 | goto out; |
3066 | 3414 | ||
@@ -3068,8 +3416,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
3068 | again: | 3416 | again: |
3069 | page = grab_cache_page(mapping, index); | 3417 | page = grab_cache_page(mapping, index); |
3070 | if (!page) { | 3418 | if (!page) { |
3071 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | 3419 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); |
3072 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
3073 | goto out; | 3420 | goto out; |
3074 | } | 3421 | } |
3075 | 3422 | ||
@@ -3132,8 +3479,7 @@ again: | |||
3132 | 3479 | ||
3133 | out_unlock: | 3480 | out_unlock: |
3134 | if (ret) | 3481 | if (ret) |
3135 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | 3482 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); |
3136 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
3137 | unlock_page(page); | 3483 | unlock_page(page); |
3138 | page_cache_release(page); | 3484 | page_cache_release(page); |
3139 | out: | 3485 | out: |
@@ -3145,7 +3491,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
3145 | struct btrfs_trans_handle *trans; | 3491 | struct btrfs_trans_handle *trans; |
3146 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3492 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3147 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 3493 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
3148 | struct extent_map *em; | 3494 | struct extent_map *em = NULL; |
3149 | struct extent_state *cached_state = NULL; | 3495 | struct extent_state *cached_state = NULL; |
3150 | u64 mask = root->sectorsize - 1; | 3496 | u64 mask = root->sectorsize - 1; |
3151 | u64 hole_start = (inode->i_size + mask) & ~mask; | 3497 | u64 hole_start = (inode->i_size + mask) & ~mask; |
@@ -3183,11 +3529,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
3183 | u64 hint_byte = 0; | 3529 | u64 hint_byte = 0; |
3184 | hole_size = last_byte - cur_offset; | 3530 | hole_size = last_byte - cur_offset; |
3185 | 3531 | ||
3186 | err = btrfs_reserve_metadata_space(root, 2); | 3532 | trans = btrfs_start_transaction(root, 2); |
3187 | if (err) | 3533 | if (IS_ERR(trans)) { |
3534 | err = PTR_ERR(trans); | ||
3188 | break; | 3535 | break; |
3189 | 3536 | } | |
3190 | trans = btrfs_start_transaction(root, 1); | ||
3191 | btrfs_set_trans_block_group(trans, inode); | 3537 | btrfs_set_trans_block_group(trans, inode); |
3192 | 3538 | ||
3193 | err = btrfs_drop_extents(trans, inode, cur_offset, | 3539 | err = btrfs_drop_extents(trans, inode, cur_offset, |
@@ -3205,14 +3551,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
3205 | last_byte - 1, 0); | 3551 | last_byte - 1, 0); |
3206 | 3552 | ||
3207 | btrfs_end_transaction(trans, root); | 3553 | btrfs_end_transaction(trans, root); |
3208 | btrfs_unreserve_metadata_space(root, 2); | ||
3209 | } | 3554 | } |
3210 | free_extent_map(em); | 3555 | free_extent_map(em); |
3556 | em = NULL; | ||
3211 | cur_offset = last_byte; | 3557 | cur_offset = last_byte; |
3212 | if (cur_offset >= block_end) | 3558 | if (cur_offset >= block_end) |
3213 | break; | 3559 | break; |
3214 | } | 3560 | } |
3215 | 3561 | ||
3562 | free_extent_map(em); | ||
3216 | unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state, | 3563 | unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state, |
3217 | GFP_NOFS); | 3564 | GFP_NOFS); |
3218 | return err; | 3565 | return err; |
@@ -3239,11 +3586,10 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) | |||
3239 | } | 3586 | } |
3240 | } | 3587 | } |
3241 | 3588 | ||
3242 | ret = btrfs_reserve_metadata_space(root, 1); | 3589 | trans = btrfs_start_transaction(root, 5); |
3243 | if (ret) | 3590 | if (IS_ERR(trans)) |
3244 | return ret; | 3591 | return PTR_ERR(trans); |
3245 | 3592 | ||
3246 | trans = btrfs_start_transaction(root, 1); | ||
3247 | btrfs_set_trans_block_group(trans, inode); | 3593 | btrfs_set_trans_block_group(trans, inode); |
3248 | 3594 | ||
3249 | ret = btrfs_orphan_add(trans, inode); | 3595 | ret = btrfs_orphan_add(trans, inode); |
@@ -3251,7 +3597,6 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) | |||
3251 | 3597 | ||
3252 | nr = trans->blocks_used; | 3598 | nr = trans->blocks_used; |
3253 | btrfs_end_transaction(trans, root); | 3599 | btrfs_end_transaction(trans, root); |
3254 | btrfs_unreserve_metadata_space(root, 1); | ||
3255 | btrfs_btree_balance_dirty(root, nr); | 3600 | btrfs_btree_balance_dirty(root, nr); |
3256 | 3601 | ||
3257 | if (attr->ia_size > inode->i_size) { | 3602 | if (attr->ia_size > inode->i_size) { |
@@ -3264,8 +3609,11 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) | |||
3264 | i_size_write(inode, attr->ia_size); | 3609 | i_size_write(inode, attr->ia_size); |
3265 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | 3610 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); |
3266 | 3611 | ||
3267 | trans = btrfs_start_transaction(root, 1); | 3612 | trans = btrfs_start_transaction(root, 0); |
3613 | BUG_ON(IS_ERR(trans)); | ||
3268 | btrfs_set_trans_block_group(trans, inode); | 3614 | btrfs_set_trans_block_group(trans, inode); |
3615 | trans->block_rsv = root->orphan_block_rsv; | ||
3616 | BUG_ON(!trans->block_rsv); | ||
3269 | 3617 | ||
3270 | ret = btrfs_update_inode(trans, root, inode); | 3618 | ret = btrfs_update_inode(trans, root, inode); |
3271 | BUG_ON(ret); | 3619 | BUG_ON(ret); |
@@ -3345,10 +3693,21 @@ void btrfs_delete_inode(struct inode *inode) | |||
3345 | btrfs_i_size_write(inode, 0); | 3693 | btrfs_i_size_write(inode, 0); |
3346 | 3694 | ||
3347 | while (1) { | 3695 | while (1) { |
3348 | trans = btrfs_start_transaction(root, 1); | 3696 | trans = btrfs_start_transaction(root, 0); |
3697 | BUG_ON(IS_ERR(trans)); | ||
3349 | btrfs_set_trans_block_group(trans, inode); | 3698 | btrfs_set_trans_block_group(trans, inode); |
3350 | ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); | 3699 | trans->block_rsv = root->orphan_block_rsv; |
3700 | |||
3701 | ret = btrfs_block_rsv_check(trans, root, | ||
3702 | root->orphan_block_rsv, 0, 5); | ||
3703 | if (ret) { | ||
3704 | BUG_ON(ret != -EAGAIN); | ||
3705 | ret = btrfs_commit_transaction(trans, root); | ||
3706 | BUG_ON(ret); | ||
3707 | continue; | ||
3708 | } | ||
3351 | 3709 | ||
3710 | ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); | ||
3352 | if (ret != -EAGAIN) | 3711 | if (ret != -EAGAIN) |
3353 | break; | 3712 | break; |
3354 | 3713 | ||
@@ -3356,6 +3715,7 @@ void btrfs_delete_inode(struct inode *inode) | |||
3356 | btrfs_end_transaction(trans, root); | 3715 | btrfs_end_transaction(trans, root); |
3357 | trans = NULL; | 3716 | trans = NULL; |
3358 | btrfs_btree_balance_dirty(root, nr); | 3717 | btrfs_btree_balance_dirty(root, nr); |
3718 | |||
3359 | } | 3719 | } |
3360 | 3720 | ||
3361 | if (ret == 0) { | 3721 | if (ret == 0) { |
@@ -3596,40 +3956,10 @@ again: | |||
3596 | return 0; | 3956 | return 0; |
3597 | } | 3957 | } |
3598 | 3958 | ||
3599 | static noinline void init_btrfs_i(struct inode *inode) | ||
3600 | { | ||
3601 | struct btrfs_inode *bi = BTRFS_I(inode); | ||
3602 | |||
3603 | bi->generation = 0; | ||
3604 | bi->sequence = 0; | ||
3605 | bi->last_trans = 0; | ||
3606 | bi->last_sub_trans = 0; | ||
3607 | bi->logged_trans = 0; | ||
3608 | bi->delalloc_bytes = 0; | ||
3609 | bi->reserved_bytes = 0; | ||
3610 | bi->disk_i_size = 0; | ||
3611 | bi->flags = 0; | ||
3612 | bi->index_cnt = (u64)-1; | ||
3613 | bi->last_unlink_trans = 0; | ||
3614 | bi->ordered_data_close = 0; | ||
3615 | bi->force_compress = 0; | ||
3616 | extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); | ||
3617 | extent_io_tree_init(&BTRFS_I(inode)->io_tree, | ||
3618 | inode->i_mapping, GFP_NOFS); | ||
3619 | extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, | ||
3620 | inode->i_mapping, GFP_NOFS); | ||
3621 | INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); | ||
3622 | INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); | ||
3623 | RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); | ||
3624 | btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); | ||
3625 | mutex_init(&BTRFS_I(inode)->log_mutex); | ||
3626 | } | ||
3627 | |||
3628 | static int btrfs_init_locked_inode(struct inode *inode, void *p) | 3959 | static int btrfs_init_locked_inode(struct inode *inode, void *p) |
3629 | { | 3960 | { |
3630 | struct btrfs_iget_args *args = p; | 3961 | struct btrfs_iget_args *args = p; |
3631 | inode->i_ino = args->ino; | 3962 | inode->i_ino = args->ino; |
3632 | init_btrfs_i(inode); | ||
3633 | BTRFS_I(inode)->root = args->root; | 3963 | BTRFS_I(inode)->root = args->root; |
3634 | btrfs_set_inode_space_info(args->root, inode); | 3964 | btrfs_set_inode_space_info(args->root, inode); |
3635 | return 0; | 3965 | return 0; |
@@ -3692,8 +4022,6 @@ static struct inode *new_simple_dir(struct super_block *s, | |||
3692 | if (!inode) | 4022 | if (!inode) |
3693 | return ERR_PTR(-ENOMEM); | 4023 | return ERR_PTR(-ENOMEM); |
3694 | 4024 | ||
3695 | init_btrfs_i(inode); | ||
3696 | |||
3697 | BTRFS_I(inode)->root = root; | 4025 | BTRFS_I(inode)->root = root; |
3698 | memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); | 4026 | memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); |
3699 | BTRFS_I(inode)->dummy_inode = 1; | 4027 | BTRFS_I(inode)->dummy_inode = 1; |
@@ -3950,7 +4278,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
3950 | struct btrfs_trans_handle *trans; | 4278 | struct btrfs_trans_handle *trans; |
3951 | int ret = 0; | 4279 | int ret = 0; |
3952 | 4280 | ||
3953 | if (root->fs_info->btree_inode == inode) | 4281 | if (BTRFS_I(inode)->dummy_inode) |
3954 | return 0; | 4282 | return 0; |
3955 | 4283 | ||
3956 | if (wbc->sync_mode == WB_SYNC_ALL) { | 4284 | if (wbc->sync_mode == WB_SYNC_ALL) { |
@@ -3971,10 +4299,38 @@ void btrfs_dirty_inode(struct inode *inode) | |||
3971 | { | 4299 | { |
3972 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4300 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3973 | struct btrfs_trans_handle *trans; | 4301 | struct btrfs_trans_handle *trans; |
4302 | int ret; | ||
4303 | |||
4304 | if (BTRFS_I(inode)->dummy_inode) | ||
4305 | return; | ||
3974 | 4306 | ||
3975 | trans = btrfs_join_transaction(root, 1); | 4307 | trans = btrfs_join_transaction(root, 1); |
3976 | btrfs_set_trans_block_group(trans, inode); | 4308 | btrfs_set_trans_block_group(trans, inode); |
3977 | btrfs_update_inode(trans, root, inode); | 4309 | |
4310 | ret = btrfs_update_inode(trans, root, inode); | ||
4311 | if (ret && ret == -ENOSPC) { | ||
4312 | /* whoops, lets try again with the full transaction */ | ||
4313 | btrfs_end_transaction(trans, root); | ||
4314 | trans = btrfs_start_transaction(root, 1); | ||
4315 | if (IS_ERR(trans)) { | ||
4316 | if (printk_ratelimit()) { | ||
4317 | printk(KERN_ERR "btrfs: fail to " | ||
4318 | "dirty inode %lu error %ld\n", | ||
4319 | inode->i_ino, PTR_ERR(trans)); | ||
4320 | } | ||
4321 | return; | ||
4322 | } | ||
4323 | btrfs_set_trans_block_group(trans, inode); | ||
4324 | |||
4325 | ret = btrfs_update_inode(trans, root, inode); | ||
4326 | if (ret) { | ||
4327 | if (printk_ratelimit()) { | ||
4328 | printk(KERN_ERR "btrfs: fail to " | ||
4329 | "dirty inode %lu error %d\n", | ||
4330 | inode->i_ino, ret); | ||
4331 | } | ||
4332 | } | ||
4333 | } | ||
3978 | btrfs_end_transaction(trans, root); | 4334 | btrfs_end_transaction(trans, root); |
3979 | } | 4335 | } |
3980 | 4336 | ||
@@ -4092,7 +4448,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
4092 | * btrfs_get_inode_index_count has an explanation for the magic | 4448 | * btrfs_get_inode_index_count has an explanation for the magic |
4093 | * number | 4449 | * number |
4094 | */ | 4450 | */ |
4095 | init_btrfs_i(inode); | ||
4096 | BTRFS_I(inode)->index_cnt = 2; | 4451 | BTRFS_I(inode)->index_cnt = 2; |
4097 | BTRFS_I(inode)->root = root; | 4452 | BTRFS_I(inode)->root = root; |
4098 | BTRFS_I(inode)->generation = trans->transid; | 4453 | BTRFS_I(inode)->generation = trans->transid; |
@@ -4247,26 +4602,21 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
4247 | if (!new_valid_dev(rdev)) | 4602 | if (!new_valid_dev(rdev)) |
4248 | return -EINVAL; | 4603 | return -EINVAL; |
4249 | 4604 | ||
4605 | err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); | ||
4606 | if (err) | ||
4607 | return err; | ||
4608 | |||
4250 | /* | 4609 | /* |
4251 | * 2 for inode item and ref | 4610 | * 2 for inode item and ref |
4252 | * 2 for dir items | 4611 | * 2 for dir items |
4253 | * 1 for xattr if selinux is on | 4612 | * 1 for xattr if selinux is on |
4254 | */ | 4613 | */ |
4255 | err = btrfs_reserve_metadata_space(root, 5); | 4614 | trans = btrfs_start_transaction(root, 5); |
4256 | if (err) | 4615 | if (IS_ERR(trans)) |
4257 | return err; | 4616 | return PTR_ERR(trans); |
4258 | 4617 | ||
4259 | trans = btrfs_start_transaction(root, 1); | ||
4260 | if (!trans) | ||
4261 | goto fail; | ||
4262 | btrfs_set_trans_block_group(trans, dir); | 4618 | btrfs_set_trans_block_group(trans, dir); |
4263 | 4619 | ||
4264 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | ||
4265 | if (err) { | ||
4266 | err = -ENOSPC; | ||
4267 | goto out_unlock; | ||
4268 | } | ||
4269 | |||
4270 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4620 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
4271 | dentry->d_name.len, | 4621 | dentry->d_name.len, |
4272 | dentry->d_parent->d_inode->i_ino, objectid, | 4622 | dentry->d_parent->d_inode->i_ino, objectid, |
@@ -4295,13 +4645,11 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
4295 | out_unlock: | 4645 | out_unlock: |
4296 | nr = trans->blocks_used; | 4646 | nr = trans->blocks_used; |
4297 | btrfs_end_transaction_throttle(trans, root); | 4647 | btrfs_end_transaction_throttle(trans, root); |
4298 | fail: | 4648 | btrfs_btree_balance_dirty(root, nr); |
4299 | btrfs_unreserve_metadata_space(root, 5); | ||
4300 | if (drop_inode) { | 4649 | if (drop_inode) { |
4301 | inode_dec_link_count(inode); | 4650 | inode_dec_link_count(inode); |
4302 | iput(inode); | 4651 | iput(inode); |
4303 | } | 4652 | } |
4304 | btrfs_btree_balance_dirty(root, nr); | ||
4305 | return err; | 4653 | return err; |
4306 | } | 4654 | } |
4307 | 4655 | ||
@@ -4311,32 +4659,26 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
4311 | struct btrfs_trans_handle *trans; | 4659 | struct btrfs_trans_handle *trans; |
4312 | struct btrfs_root *root = BTRFS_I(dir)->root; | 4660 | struct btrfs_root *root = BTRFS_I(dir)->root; |
4313 | struct inode *inode = NULL; | 4661 | struct inode *inode = NULL; |
4314 | int err; | ||
4315 | int drop_inode = 0; | 4662 | int drop_inode = 0; |
4663 | int err; | ||
4316 | unsigned long nr = 0; | 4664 | unsigned long nr = 0; |
4317 | u64 objectid; | 4665 | u64 objectid; |
4318 | u64 index = 0; | 4666 | u64 index = 0; |
4319 | 4667 | ||
4668 | err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); | ||
4669 | if (err) | ||
4670 | return err; | ||
4320 | /* | 4671 | /* |
4321 | * 2 for inode item and ref | 4672 | * 2 for inode item and ref |
4322 | * 2 for dir items | 4673 | * 2 for dir items |
4323 | * 1 for xattr if selinux is on | 4674 | * 1 for xattr if selinux is on |
4324 | */ | 4675 | */ |
4325 | err = btrfs_reserve_metadata_space(root, 5); | 4676 | trans = btrfs_start_transaction(root, 5); |
4326 | if (err) | 4677 | if (IS_ERR(trans)) |
4327 | return err; | 4678 | return PTR_ERR(trans); |
4328 | 4679 | ||
4329 | trans = btrfs_start_transaction(root, 1); | ||
4330 | if (!trans) | ||
4331 | goto fail; | ||
4332 | btrfs_set_trans_block_group(trans, dir); | 4680 | btrfs_set_trans_block_group(trans, dir); |
4333 | 4681 | ||
4334 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | ||
4335 | if (err) { | ||
4336 | err = -ENOSPC; | ||
4337 | goto out_unlock; | ||
4338 | } | ||
4339 | |||
4340 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4682 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
4341 | dentry->d_name.len, | 4683 | dentry->d_name.len, |
4342 | dentry->d_parent->d_inode->i_ino, | 4684 | dentry->d_parent->d_inode->i_ino, |
@@ -4368,8 +4710,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
4368 | out_unlock: | 4710 | out_unlock: |
4369 | nr = trans->blocks_used; | 4711 | nr = trans->blocks_used; |
4370 | btrfs_end_transaction_throttle(trans, root); | 4712 | btrfs_end_transaction_throttle(trans, root); |
4371 | fail: | ||
4372 | btrfs_unreserve_metadata_space(root, 5); | ||
4373 | if (drop_inode) { | 4713 | if (drop_inode) { |
4374 | inode_dec_link_count(inode); | 4714 | inode_dec_link_count(inode); |
4375 | iput(inode); | 4715 | iput(inode); |
@@ -4396,21 +4736,21 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
4396 | if (root->objectid != BTRFS_I(inode)->root->objectid) | 4736 | if (root->objectid != BTRFS_I(inode)->root->objectid) |
4397 | return -EPERM; | 4737 | return -EPERM; |
4398 | 4738 | ||
4399 | /* | ||
4400 | * 1 item for inode ref | ||
4401 | * 2 items for dir items | ||
4402 | */ | ||
4403 | err = btrfs_reserve_metadata_space(root, 3); | ||
4404 | if (err) | ||
4405 | return err; | ||
4406 | |||
4407 | btrfs_inc_nlink(inode); | 4739 | btrfs_inc_nlink(inode); |
4408 | 4740 | ||
4409 | err = btrfs_set_inode_index(dir, &index); | 4741 | err = btrfs_set_inode_index(dir, &index); |
4410 | if (err) | 4742 | if (err) |
4411 | goto fail; | 4743 | goto fail; |
4412 | 4744 | ||
4413 | trans = btrfs_start_transaction(root, 1); | 4745 | /* |
4746 | * 1 item for inode ref | ||
4747 | * 2 items for dir items | ||
4748 | */ | ||
4749 | trans = btrfs_start_transaction(root, 3); | ||
4750 | if (IS_ERR(trans)) { | ||
4751 | err = PTR_ERR(trans); | ||
4752 | goto fail; | ||
4753 | } | ||
4414 | 4754 | ||
4415 | btrfs_set_trans_block_group(trans, dir); | 4755 | btrfs_set_trans_block_group(trans, dir); |
4416 | atomic_inc(&inode->i_count); | 4756 | atomic_inc(&inode->i_count); |
@@ -4429,7 +4769,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
4429 | nr = trans->blocks_used; | 4769 | nr = trans->blocks_used; |
4430 | btrfs_end_transaction_throttle(trans, root); | 4770 | btrfs_end_transaction_throttle(trans, root); |
4431 | fail: | 4771 | fail: |
4432 | btrfs_unreserve_metadata_space(root, 3); | ||
4433 | if (drop_inode) { | 4772 | if (drop_inode) { |
4434 | inode_dec_link_count(inode); | 4773 | inode_dec_link_count(inode); |
4435 | iput(inode); | 4774 | iput(inode); |
@@ -4449,28 +4788,20 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
4449 | u64 index = 0; | 4788 | u64 index = 0; |
4450 | unsigned long nr = 1; | 4789 | unsigned long nr = 1; |
4451 | 4790 | ||
4791 | err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); | ||
4792 | if (err) | ||
4793 | return err; | ||
4794 | |||
4452 | /* | 4795 | /* |
4453 | * 2 items for inode and ref | 4796 | * 2 items for inode and ref |
4454 | * 2 items for dir items | 4797 | * 2 items for dir items |
4455 | * 1 for xattr if selinux is on | 4798 | * 1 for xattr if selinux is on |
4456 | */ | 4799 | */ |
4457 | err = btrfs_reserve_metadata_space(root, 5); | 4800 | trans = btrfs_start_transaction(root, 5); |
4458 | if (err) | 4801 | if (IS_ERR(trans)) |
4459 | return err; | 4802 | return PTR_ERR(trans); |
4460 | |||
4461 | trans = btrfs_start_transaction(root, 1); | ||
4462 | if (!trans) { | ||
4463 | err = -ENOMEM; | ||
4464 | goto out_unlock; | ||
4465 | } | ||
4466 | btrfs_set_trans_block_group(trans, dir); | 4803 | btrfs_set_trans_block_group(trans, dir); |
4467 | 4804 | ||
4468 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | ||
4469 | if (err) { | ||
4470 | err = -ENOSPC; | ||
4471 | goto out_fail; | ||
4472 | } | ||
4473 | |||
4474 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4805 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
4475 | dentry->d_name.len, | 4806 | dentry->d_name.len, |
4476 | dentry->d_parent->d_inode->i_ino, objectid, | 4807 | dentry->d_parent->d_inode->i_ino, objectid, |
@@ -4510,9 +4841,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
4510 | out_fail: | 4841 | out_fail: |
4511 | nr = trans->blocks_used; | 4842 | nr = trans->blocks_used; |
4512 | btrfs_end_transaction_throttle(trans, root); | 4843 | btrfs_end_transaction_throttle(trans, root); |
4513 | |||
4514 | out_unlock: | ||
4515 | btrfs_unreserve_metadata_space(root, 5); | ||
4516 | if (drop_on_err) | 4844 | if (drop_on_err) |
4517 | iput(inode); | 4845 | iput(inode); |
4518 | btrfs_btree_balance_dirty(root, nr); | 4846 | btrfs_btree_balance_dirty(root, nr); |
@@ -4770,6 +5098,7 @@ again: | |||
4770 | } | 5098 | } |
4771 | flush_dcache_page(page); | 5099 | flush_dcache_page(page); |
4772 | } else if (create && PageUptodate(page)) { | 5100 | } else if (create && PageUptodate(page)) { |
5101 | WARN_ON(1); | ||
4773 | if (!trans) { | 5102 | if (!trans) { |
4774 | kunmap(page); | 5103 | kunmap(page); |
4775 | free_extent_map(em); | 5104 | free_extent_map(em); |
@@ -4866,11 +5195,651 @@ out: | |||
4866 | return em; | 5195 | return em; |
4867 | } | 5196 | } |
4868 | 5197 | ||
5198 | static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | ||
5199 | u64 start, u64 len) | ||
5200 | { | ||
5201 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5202 | struct btrfs_trans_handle *trans; | ||
5203 | struct extent_map *em; | ||
5204 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
5205 | struct btrfs_key ins; | ||
5206 | u64 alloc_hint; | ||
5207 | int ret; | ||
5208 | |||
5209 | btrfs_drop_extent_cache(inode, start, start + len - 1, 0); | ||
5210 | |||
5211 | trans = btrfs_join_transaction(root, 0); | ||
5212 | if (!trans) | ||
5213 | return ERR_PTR(-ENOMEM); | ||
5214 | |||
5215 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
5216 | |||
5217 | alloc_hint = get_extent_allocation_hint(inode, start, len); | ||
5218 | ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0, | ||
5219 | alloc_hint, (u64)-1, &ins, 1); | ||
5220 | if (ret) { | ||
5221 | em = ERR_PTR(ret); | ||
5222 | goto out; | ||
5223 | } | ||
5224 | |||
5225 | em = alloc_extent_map(GFP_NOFS); | ||
5226 | if (!em) { | ||
5227 | em = ERR_PTR(-ENOMEM); | ||
5228 | goto out; | ||
5229 | } | ||
5230 | |||
5231 | em->start = start; | ||
5232 | em->orig_start = em->start; | ||
5233 | em->len = ins.offset; | ||
5234 | |||
5235 | em->block_start = ins.objectid; | ||
5236 | em->block_len = ins.offset; | ||
5237 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
5238 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
5239 | |||
5240 | while (1) { | ||
5241 | write_lock(&em_tree->lock); | ||
5242 | ret = add_extent_mapping(em_tree, em); | ||
5243 | write_unlock(&em_tree->lock); | ||
5244 | if (ret != -EEXIST) | ||
5245 | break; | ||
5246 | btrfs_drop_extent_cache(inode, start, start + em->len - 1, 0); | ||
5247 | } | ||
5248 | |||
5249 | ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, | ||
5250 | ins.offset, ins.offset, 0); | ||
5251 | if (ret) { | ||
5252 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset); | ||
5253 | em = ERR_PTR(ret); | ||
5254 | } | ||
5255 | out: | ||
5256 | btrfs_end_transaction(trans, root); | ||
5257 | return em; | ||
5258 | } | ||
5259 | |||
5260 | /* | ||
5261 | * returns 1 when the nocow is safe, < 1 on error, 0 if the | ||
5262 | * block must be cow'd | ||
5263 | */ | ||
5264 | static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, | ||
5265 | struct inode *inode, u64 offset, u64 len) | ||
5266 | { | ||
5267 | struct btrfs_path *path; | ||
5268 | int ret; | ||
5269 | struct extent_buffer *leaf; | ||
5270 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5271 | struct btrfs_file_extent_item *fi; | ||
5272 | struct btrfs_key key; | ||
5273 | u64 disk_bytenr; | ||
5274 | u64 backref_offset; | ||
5275 | u64 extent_end; | ||
5276 | u64 num_bytes; | ||
5277 | int slot; | ||
5278 | int found_type; | ||
5279 | |||
5280 | path = btrfs_alloc_path(); | ||
5281 | if (!path) | ||
5282 | return -ENOMEM; | ||
5283 | |||
5284 | ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, | ||
5285 | offset, 0); | ||
5286 | if (ret < 0) | ||
5287 | goto out; | ||
5288 | |||
5289 | slot = path->slots[0]; | ||
5290 | if (ret == 1) { | ||
5291 | if (slot == 0) { | ||
5292 | /* can't find the item, must cow */ | ||
5293 | ret = 0; | ||
5294 | goto out; | ||
5295 | } | ||
5296 | slot--; | ||
5297 | } | ||
5298 | ret = 0; | ||
5299 | leaf = path->nodes[0]; | ||
5300 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
5301 | if (key.objectid != inode->i_ino || | ||
5302 | key.type != BTRFS_EXTENT_DATA_KEY) { | ||
5303 | /* not our file or wrong item type, must cow */ | ||
5304 | goto out; | ||
5305 | } | ||
5306 | |||
5307 | if (key.offset > offset) { | ||
5308 | /* Wrong offset, must cow */ | ||
5309 | goto out; | ||
5310 | } | ||
5311 | |||
5312 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); | ||
5313 | found_type = btrfs_file_extent_type(leaf, fi); | ||
5314 | if (found_type != BTRFS_FILE_EXTENT_REG && | ||
5315 | found_type != BTRFS_FILE_EXTENT_PREALLOC) { | ||
5316 | /* not a regular extent, must cow */ | ||
5317 | goto out; | ||
5318 | } | ||
5319 | disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
5320 | backref_offset = btrfs_file_extent_offset(leaf, fi); | ||
5321 | |||
5322 | extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); | ||
5323 | if (extent_end < offset + len) { | ||
5324 | /* extent doesn't include our full range, must cow */ | ||
5325 | goto out; | ||
5326 | } | ||
5327 | |||
5328 | if (btrfs_extent_readonly(root, disk_bytenr)) | ||
5329 | goto out; | ||
5330 | |||
5331 | /* | ||
5332 | * look for other files referencing this extent, if we | ||
5333 | * find any we must cow | ||
5334 | */ | ||
5335 | if (btrfs_cross_ref_exist(trans, root, inode->i_ino, | ||
5336 | key.offset - backref_offset, disk_bytenr)) | ||
5337 | goto out; | ||
5338 | |||
5339 | /* | ||
5340 | * adjust disk_bytenr and num_bytes to cover just the bytes | ||
5341 | * in this extent we are about to write. If there | ||
5342 | * are any csums in that range we have to cow in order | ||
5343 | * to keep the csums correct | ||
5344 | */ | ||
5345 | disk_bytenr += backref_offset; | ||
5346 | disk_bytenr += offset - key.offset; | ||
5347 | num_bytes = min(offset + len, extent_end) - offset; | ||
5348 | if (csum_exist_in_range(root, disk_bytenr, num_bytes)) | ||
5349 | goto out; | ||
5350 | /* | ||
5351 | * all of the above have passed, it is safe to overwrite this extent | ||
5352 | * without cow | ||
5353 | */ | ||
5354 | ret = 1; | ||
5355 | out: | ||
5356 | btrfs_free_path(path); | ||
5357 | return ret; | ||
5358 | } | ||
5359 | |||
5360 | static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | ||
5361 | struct buffer_head *bh_result, int create) | ||
5362 | { | ||
5363 | struct extent_map *em; | ||
5364 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5365 | u64 start = iblock << inode->i_blkbits; | ||
5366 | u64 len = bh_result->b_size; | ||
5367 | struct btrfs_trans_handle *trans; | ||
5368 | |||
5369 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); | ||
5370 | if (IS_ERR(em)) | ||
5371 | return PTR_ERR(em); | ||
5372 | |||
5373 | /* | ||
5374 | * Ok for INLINE and COMPRESSED extents we need to fallback on buffered | ||
5375 | * io. INLINE is special, and we could probably kludge it in here, but | ||
5376 | * it's still buffered so for safety lets just fall back to the generic | ||
5377 | * buffered path. | ||
5378 | * | ||
5379 | * For COMPRESSED we _have_ to read the entire extent in so we can | ||
5380 | * decompress it, so there will be buffering required no matter what we | ||
5381 | * do, so go ahead and fallback to buffered. | ||
5382 | * | ||
5383 | * We return -ENOTBLK because thats what makes DIO go ahead and go back | ||
5384 | * to buffered IO. Don't blame me, this is the price we pay for using | ||
5385 | * the generic code. | ||
5386 | */ | ||
5387 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || | ||
5388 | em->block_start == EXTENT_MAP_INLINE) { | ||
5389 | free_extent_map(em); | ||
5390 | return -ENOTBLK; | ||
5391 | } | ||
5392 | |||
5393 | /* Just a good old fashioned hole, return */ | ||
5394 | if (!create && (em->block_start == EXTENT_MAP_HOLE || | ||
5395 | test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | ||
5396 | free_extent_map(em); | ||
5397 | /* DIO will do one hole at a time, so just unlock a sector */ | ||
5398 | unlock_extent(&BTRFS_I(inode)->io_tree, start, | ||
5399 | start + root->sectorsize - 1, GFP_NOFS); | ||
5400 | return 0; | ||
5401 | } | ||
5402 | |||
5403 | /* | ||
5404 | * We don't allocate a new extent in the following cases | ||
5405 | * | ||
5406 | * 1) The inode is marked as NODATACOW. In this case we'll just use the | ||
5407 | * existing extent. | ||
5408 | * 2) The extent is marked as PREALLOC. We're good to go here and can | ||
5409 | * just use the extent. | ||
5410 | * | ||
5411 | */ | ||
5412 | if (!create) { | ||
5413 | len = em->len - (start - em->start); | ||
5414 | goto map; | ||
5415 | } | ||
5416 | |||
5417 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || | ||
5418 | ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) && | ||
5419 | em->block_start != EXTENT_MAP_HOLE)) { | ||
5420 | int type; | ||
5421 | int ret; | ||
5422 | u64 block_start; | ||
5423 | |||
5424 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) | ||
5425 | type = BTRFS_ORDERED_PREALLOC; | ||
5426 | else | ||
5427 | type = BTRFS_ORDERED_NOCOW; | ||
5428 | len = min(len, em->len - (start - em->start)); | ||
5429 | block_start = em->block_start + (start - em->start); | ||
5430 | |||
5431 | /* | ||
5432 | * we're not going to log anything, but we do need | ||
5433 | * to make sure the current transaction stays open | ||
5434 | * while we look for nocow cross refs | ||
5435 | */ | ||
5436 | trans = btrfs_join_transaction(root, 0); | ||
5437 | if (!trans) | ||
5438 | goto must_cow; | ||
5439 | |||
5440 | if (can_nocow_odirect(trans, inode, start, len) == 1) { | ||
5441 | ret = btrfs_add_ordered_extent_dio(inode, start, | ||
5442 | block_start, len, len, type); | ||
5443 | btrfs_end_transaction(trans, root); | ||
5444 | if (ret) { | ||
5445 | free_extent_map(em); | ||
5446 | return ret; | ||
5447 | } | ||
5448 | goto unlock; | ||
5449 | } | ||
5450 | btrfs_end_transaction(trans, root); | ||
5451 | } | ||
5452 | must_cow: | ||
5453 | /* | ||
5454 | * this will cow the extent, reset the len in case we changed | ||
5455 | * it above | ||
5456 | */ | ||
5457 | len = bh_result->b_size; | ||
5458 | free_extent_map(em); | ||
5459 | em = btrfs_new_extent_direct(inode, start, len); | ||
5460 | if (IS_ERR(em)) | ||
5461 | return PTR_ERR(em); | ||
5462 | len = min(len, em->len - (start - em->start)); | ||
5463 | unlock: | ||
5464 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1, | ||
5465 | EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1, | ||
5466 | 0, NULL, GFP_NOFS); | ||
5467 | map: | ||
5468 | bh_result->b_blocknr = (em->block_start + (start - em->start)) >> | ||
5469 | inode->i_blkbits; | ||
5470 | bh_result->b_size = len; | ||
5471 | bh_result->b_bdev = em->bdev; | ||
5472 | set_buffer_mapped(bh_result); | ||
5473 | if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) | ||
5474 | set_buffer_new(bh_result); | ||
5475 | |||
5476 | free_extent_map(em); | ||
5477 | |||
5478 | return 0; | ||
5479 | } | ||
5480 | |||
5481 | struct btrfs_dio_private { | ||
5482 | struct inode *inode; | ||
5483 | u64 logical_offset; | ||
5484 | u64 disk_bytenr; | ||
5485 | u64 bytes; | ||
5486 | u32 *csums; | ||
5487 | void *private; | ||
5488 | }; | ||
5489 | |||
5490 | static void btrfs_endio_direct_read(struct bio *bio, int err) | ||
5491 | { | ||
5492 | struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
5493 | struct bio_vec *bvec = bio->bi_io_vec; | ||
5494 | struct btrfs_dio_private *dip = bio->bi_private; | ||
5495 | struct inode *inode = dip->inode; | ||
5496 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5497 | u64 start; | ||
5498 | u32 *private = dip->csums; | ||
5499 | |||
5500 | start = dip->logical_offset; | ||
5501 | do { | ||
5502 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { | ||
5503 | struct page *page = bvec->bv_page; | ||
5504 | char *kaddr; | ||
5505 | u32 csum = ~(u32)0; | ||
5506 | unsigned long flags; | ||
5507 | |||
5508 | local_irq_save(flags); | ||
5509 | kaddr = kmap_atomic(page, KM_IRQ0); | ||
5510 | csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, | ||
5511 | csum, bvec->bv_len); | ||
5512 | btrfs_csum_final(csum, (char *)&csum); | ||
5513 | kunmap_atomic(kaddr, KM_IRQ0); | ||
5514 | local_irq_restore(flags); | ||
5515 | |||
5516 | flush_dcache_page(bvec->bv_page); | ||
5517 | if (csum != *private) { | ||
5518 | printk(KERN_ERR "btrfs csum failed ino %lu off" | ||
5519 | " %llu csum %u private %u\n", | ||
5520 | inode->i_ino, (unsigned long long)start, | ||
5521 | csum, *private); | ||
5522 | err = -EIO; | ||
5523 | } | ||
5524 | } | ||
5525 | |||
5526 | start += bvec->bv_len; | ||
5527 | private++; | ||
5528 | bvec++; | ||
5529 | } while (bvec <= bvec_end); | ||
5530 | |||
5531 | unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, | ||
5532 | dip->logical_offset + dip->bytes - 1, GFP_NOFS); | ||
5533 | bio->bi_private = dip->private; | ||
5534 | |||
5535 | kfree(dip->csums); | ||
5536 | kfree(dip); | ||
5537 | dio_end_io(bio, err); | ||
5538 | } | ||
5539 | |||
5540 | static void btrfs_endio_direct_write(struct bio *bio, int err) | ||
5541 | { | ||
5542 | struct btrfs_dio_private *dip = bio->bi_private; | ||
5543 | struct inode *inode = dip->inode; | ||
5544 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5545 | struct btrfs_trans_handle *trans; | ||
5546 | struct btrfs_ordered_extent *ordered = NULL; | ||
5547 | struct extent_state *cached_state = NULL; | ||
5548 | int ret; | ||
5549 | |||
5550 | if (err) | ||
5551 | goto out_done; | ||
5552 | |||
5553 | ret = btrfs_dec_test_ordered_pending(inode, &ordered, | ||
5554 | dip->logical_offset, dip->bytes); | ||
5555 | if (!ret) | ||
5556 | goto out_done; | ||
5557 | |||
5558 | BUG_ON(!ordered); | ||
5559 | |||
5560 | trans = btrfs_join_transaction(root, 1); | ||
5561 | if (!trans) { | ||
5562 | err = -ENOMEM; | ||
5563 | goto out; | ||
5564 | } | ||
5565 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
5566 | |||
5567 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { | ||
5568 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); | ||
5569 | if (!ret) | ||
5570 | ret = btrfs_update_inode(trans, root, inode); | ||
5571 | err = ret; | ||
5572 | goto out; | ||
5573 | } | ||
5574 | |||
5575 | lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset, | ||
5576 | ordered->file_offset + ordered->len - 1, 0, | ||
5577 | &cached_state, GFP_NOFS); | ||
5578 | |||
5579 | if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) { | ||
5580 | ret = btrfs_mark_extent_written(trans, inode, | ||
5581 | ordered->file_offset, | ||
5582 | ordered->file_offset + | ||
5583 | ordered->len); | ||
5584 | if (ret) { | ||
5585 | err = ret; | ||
5586 | goto out_unlock; | ||
5587 | } | ||
5588 | } else { | ||
5589 | ret = insert_reserved_file_extent(trans, inode, | ||
5590 | ordered->file_offset, | ||
5591 | ordered->start, | ||
5592 | ordered->disk_len, | ||
5593 | ordered->len, | ||
5594 | ordered->len, | ||
5595 | 0, 0, 0, | ||
5596 | BTRFS_FILE_EXTENT_REG); | ||
5597 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | ||
5598 | ordered->file_offset, ordered->len); | ||
5599 | if (ret) { | ||
5600 | err = ret; | ||
5601 | WARN_ON(1); | ||
5602 | goto out_unlock; | ||
5603 | } | ||
5604 | } | ||
5605 | |||
5606 | add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); | ||
5607 | btrfs_ordered_update_i_size(inode, 0, ordered); | ||
5608 | btrfs_update_inode(trans, root, inode); | ||
5609 | out_unlock: | ||
5610 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, | ||
5611 | ordered->file_offset + ordered->len - 1, | ||
5612 | &cached_state, GFP_NOFS); | ||
5613 | out: | ||
5614 | btrfs_delalloc_release_metadata(inode, ordered->len); | ||
5615 | btrfs_end_transaction(trans, root); | ||
5616 | btrfs_put_ordered_extent(ordered); | ||
5617 | btrfs_put_ordered_extent(ordered); | ||
5618 | out_done: | ||
5619 | bio->bi_private = dip->private; | ||
5620 | |||
5621 | kfree(dip->csums); | ||
5622 | kfree(dip); | ||
5623 | dio_end_io(bio, err); | ||
5624 | } | ||
5625 | |||
5626 | static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw, | ||
5627 | struct bio *bio, int mirror_num, | ||
5628 | unsigned long bio_flags, u64 offset) | ||
5629 | { | ||
5630 | int ret; | ||
5631 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5632 | ret = btrfs_csum_one_bio(root, inode, bio, offset, 1); | ||
5633 | BUG_ON(ret); | ||
5634 | return 0; | ||
5635 | } | ||
5636 | |||
5637 | static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, | ||
5638 | loff_t file_offset) | ||
5639 | { | ||
5640 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5641 | struct btrfs_dio_private *dip; | ||
5642 | struct bio_vec *bvec = bio->bi_io_vec; | ||
5643 | u64 start; | ||
5644 | int skip_sum; | ||
5645 | int write = rw & (1 << BIO_RW); | ||
5646 | int ret = 0; | ||
5647 | |||
5648 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | ||
5649 | |||
5650 | dip = kmalloc(sizeof(*dip), GFP_NOFS); | ||
5651 | if (!dip) { | ||
5652 | ret = -ENOMEM; | ||
5653 | goto free_ordered; | ||
5654 | } | ||
5655 | dip->csums = NULL; | ||
5656 | |||
5657 | if (!skip_sum) { | ||
5658 | dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); | ||
5659 | if (!dip->csums) { | ||
5660 | ret = -ENOMEM; | ||
5661 | goto free_ordered; | ||
5662 | } | ||
5663 | } | ||
5664 | |||
5665 | dip->private = bio->bi_private; | ||
5666 | dip->inode = inode; | ||
5667 | dip->logical_offset = file_offset; | ||
5668 | |||
5669 | start = dip->logical_offset; | ||
5670 | dip->bytes = 0; | ||
5671 | do { | ||
5672 | dip->bytes += bvec->bv_len; | ||
5673 | bvec++; | ||
5674 | } while (bvec <= (bio->bi_io_vec + bio->bi_vcnt - 1)); | ||
5675 | |||
5676 | dip->disk_bytenr = (u64)bio->bi_sector << 9; | ||
5677 | bio->bi_private = dip; | ||
5678 | |||
5679 | if (write) | ||
5680 | bio->bi_end_io = btrfs_endio_direct_write; | ||
5681 | else | ||
5682 | bio->bi_end_io = btrfs_endio_direct_read; | ||
5683 | |||
5684 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | ||
5685 | if (ret) | ||
5686 | goto out_err; | ||
5687 | |||
5688 | if (write && !skip_sum) { | ||
5689 | ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | ||
5690 | inode, rw, bio, 0, 0, | ||
5691 | dip->logical_offset, | ||
5692 | __btrfs_submit_bio_start_direct_io, | ||
5693 | __btrfs_submit_bio_done); | ||
5694 | if (ret) | ||
5695 | goto out_err; | ||
5696 | return; | ||
5697 | } else if (!skip_sum) | ||
5698 | btrfs_lookup_bio_sums_dio(root, inode, bio, | ||
5699 | dip->logical_offset, dip->csums); | ||
5700 | |||
5701 | ret = btrfs_map_bio(root, rw, bio, 0, 1); | ||
5702 | if (ret) | ||
5703 | goto out_err; | ||
5704 | return; | ||
5705 | out_err: | ||
5706 | kfree(dip->csums); | ||
5707 | kfree(dip); | ||
5708 | free_ordered: | ||
5709 | /* | ||
5710 | * If this is a write, we need to clean up the reserved space and kill | ||
5711 | * the ordered extent. | ||
5712 | */ | ||
5713 | if (write) { | ||
5714 | struct btrfs_ordered_extent *ordered; | ||
5715 | ordered = btrfs_lookup_ordered_extent(inode, | ||
5716 | dip->logical_offset); | ||
5717 | if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && | ||
5718 | !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) | ||
5719 | btrfs_free_reserved_extent(root, ordered->start, | ||
5720 | ordered->disk_len); | ||
5721 | btrfs_put_ordered_extent(ordered); | ||
5722 | btrfs_put_ordered_extent(ordered); | ||
5723 | } | ||
5724 | bio_endio(bio, ret); | ||
5725 | } | ||
5726 | |||
5727 | static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb, | ||
5728 | const struct iovec *iov, loff_t offset, | ||
5729 | unsigned long nr_segs) | ||
5730 | { | ||
5731 | int seg; | ||
5732 | size_t size; | ||
5733 | unsigned long addr; | ||
5734 | unsigned blocksize_mask = root->sectorsize - 1; | ||
5735 | ssize_t retval = -EINVAL; | ||
5736 | loff_t end = offset; | ||
5737 | |||
5738 | if (offset & blocksize_mask) | ||
5739 | goto out; | ||
5740 | |||
5741 | /* Check the memory alignment. Blocks cannot straddle pages */ | ||
5742 | for (seg = 0; seg < nr_segs; seg++) { | ||
5743 | addr = (unsigned long)iov[seg].iov_base; | ||
5744 | size = iov[seg].iov_len; | ||
5745 | end += size; | ||
5746 | if ((addr & blocksize_mask) || (size & blocksize_mask)) | ||
5747 | goto out; | ||
5748 | } | ||
5749 | retval = 0; | ||
5750 | out: | ||
5751 | return retval; | ||
5752 | } | ||
4869 | static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | 5753 | static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, |
4870 | const struct iovec *iov, loff_t offset, | 5754 | const struct iovec *iov, loff_t offset, |
4871 | unsigned long nr_segs) | 5755 | unsigned long nr_segs) |
4872 | { | 5756 | { |
4873 | return -EINVAL; | 5757 | struct file *file = iocb->ki_filp; |
5758 | struct inode *inode = file->f_mapping->host; | ||
5759 | struct btrfs_ordered_extent *ordered; | ||
5760 | struct extent_state *cached_state = NULL; | ||
5761 | u64 lockstart, lockend; | ||
5762 | ssize_t ret; | ||
5763 | int writing = rw & WRITE; | ||
5764 | int write_bits = 0; | ||
5765 | size_t count = iov_length(iov, nr_segs); | ||
5766 | |||
5767 | if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, | ||
5768 | offset, nr_segs)) { | ||
5769 | return 0; | ||
5770 | } | ||
5771 | |||
5772 | lockstart = offset; | ||
5773 | lockend = offset + count - 1; | ||
5774 | |||
5775 | if (writing) { | ||
5776 | ret = btrfs_delalloc_reserve_space(inode, count); | ||
5777 | if (ret) | ||
5778 | goto out; | ||
5779 | } | ||
5780 | |||
5781 | while (1) { | ||
5782 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
5783 | 0, &cached_state, GFP_NOFS); | ||
5784 | /* | ||
5785 | * We're concerned with the entire range that we're going to be | ||
5786 | * doing DIO to, so we need to make sure theres no ordered | ||
5787 | * extents in this range. | ||
5788 | */ | ||
5789 | ordered = btrfs_lookup_ordered_range(inode, lockstart, | ||
5790 | lockend - lockstart + 1); | ||
5791 | if (!ordered) | ||
5792 | break; | ||
5793 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
5794 | &cached_state, GFP_NOFS); | ||
5795 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
5796 | btrfs_put_ordered_extent(ordered); | ||
5797 | cond_resched(); | ||
5798 | } | ||
5799 | |||
5800 | /* | ||
5801 | * we don't use btrfs_set_extent_delalloc because we don't want | ||
5802 | * the dirty or uptodate bits | ||
5803 | */ | ||
5804 | if (writing) { | ||
5805 | write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING; | ||
5806 | ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
5807 | EXTENT_DELALLOC, 0, NULL, &cached_state, | ||
5808 | GFP_NOFS); | ||
5809 | if (ret) { | ||
5810 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | ||
5811 | lockend, EXTENT_LOCKED | write_bits, | ||
5812 | 1, 0, &cached_state, GFP_NOFS); | ||
5813 | goto out; | ||
5814 | } | ||
5815 | } | ||
5816 | |||
5817 | free_extent_state(cached_state); | ||
5818 | cached_state = NULL; | ||
5819 | |||
5820 | ret = __blockdev_direct_IO(rw, iocb, inode, | ||
5821 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, | ||
5822 | iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, | ||
5823 | btrfs_submit_direct, 0); | ||
5824 | |||
5825 | if (ret < 0 && ret != -EIOCBQUEUED) { | ||
5826 | clear_extent_bit(&BTRFS_I(inode)->io_tree, offset, | ||
5827 | offset + iov_length(iov, nr_segs) - 1, | ||
5828 | EXTENT_LOCKED | write_bits, 1, 0, | ||
5829 | &cached_state, GFP_NOFS); | ||
5830 | } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) { | ||
5831 | /* | ||
5832 | * We're falling back to buffered, unlock the section we didn't | ||
5833 | * do IO on. | ||
5834 | */ | ||
5835 | clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret, | ||
5836 | offset + iov_length(iov, nr_segs) - 1, | ||
5837 | EXTENT_LOCKED | write_bits, 1, 0, | ||
5838 | &cached_state, GFP_NOFS); | ||
5839 | } | ||
5840 | out: | ||
5841 | free_extent_state(cached_state); | ||
5842 | return ret; | ||
4874 | } | 5843 | } |
4875 | 5844 | ||
4876 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 5845 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
@@ -5034,7 +6003,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
5034 | u64 page_start; | 6003 | u64 page_start; |
5035 | u64 page_end; | 6004 | u64 page_end; |
5036 | 6005 | ||
5037 | ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); | 6006 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
5038 | if (ret) { | 6007 | if (ret) { |
5039 | if (ret == -ENOMEM) | 6008 | if (ret == -ENOMEM) |
5040 | ret = VM_FAULT_OOM; | 6009 | ret = VM_FAULT_OOM; |
@@ -5043,13 +6012,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
5043 | goto out; | 6012 | goto out; |
5044 | } | 6013 | } |
5045 | 6014 | ||
5046 | ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
5047 | if (ret) { | ||
5048 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
5049 | ret = VM_FAULT_SIGBUS; | ||
5050 | goto out; | ||
5051 | } | ||
5052 | |||
5053 | ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ | 6015 | ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ |
5054 | again: | 6016 | again: |
5055 | lock_page(page); | 6017 | lock_page(page); |
@@ -5059,7 +6021,6 @@ again: | |||
5059 | 6021 | ||
5060 | if ((page->mapping != inode->i_mapping) || | 6022 | if ((page->mapping != inode->i_mapping) || |
5061 | (page_start >= size)) { | 6023 | (page_start >= size)) { |
5062 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
5063 | /* page got truncated out from underneath us */ | 6024 | /* page got truncated out from underneath us */ |
5064 | goto out_unlock; | 6025 | goto out_unlock; |
5065 | } | 6026 | } |
@@ -5100,7 +6061,6 @@ again: | |||
5100 | unlock_extent_cached(io_tree, page_start, page_end, | 6061 | unlock_extent_cached(io_tree, page_start, page_end, |
5101 | &cached_state, GFP_NOFS); | 6062 | &cached_state, GFP_NOFS); |
5102 | ret = VM_FAULT_SIGBUS; | 6063 | ret = VM_FAULT_SIGBUS; |
5103 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
5104 | goto out_unlock; | 6064 | goto out_unlock; |
5105 | } | 6065 | } |
5106 | ret = 0; | 6066 | ret = 0; |
@@ -5127,10 +6087,10 @@ again: | |||
5127 | unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); | 6087 | unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); |
5128 | 6088 | ||
5129 | out_unlock: | 6089 | out_unlock: |
5130 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
5131 | if (!ret) | 6090 | if (!ret) |
5132 | return VM_FAULT_LOCKED; | 6091 | return VM_FAULT_LOCKED; |
5133 | unlock_page(page); | 6092 | unlock_page(page); |
6093 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | ||
5134 | out: | 6094 | out: |
5135 | return ret; | 6095 | return ret; |
5136 | } | 6096 | } |
@@ -5155,8 +6115,10 @@ static void btrfs_truncate(struct inode *inode) | |||
5155 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); | 6115 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); |
5156 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | 6116 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); |
5157 | 6117 | ||
5158 | trans = btrfs_start_transaction(root, 1); | 6118 | trans = btrfs_start_transaction(root, 0); |
6119 | BUG_ON(IS_ERR(trans)); | ||
5159 | btrfs_set_trans_block_group(trans, inode); | 6120 | btrfs_set_trans_block_group(trans, inode); |
6121 | trans->block_rsv = root->orphan_block_rsv; | ||
5160 | 6122 | ||
5161 | /* | 6123 | /* |
5162 | * setattr is responsible for setting the ordered_data_close flag, | 6124 | * setattr is responsible for setting the ordered_data_close flag, |
@@ -5179,6 +6141,23 @@ static void btrfs_truncate(struct inode *inode) | |||
5179 | btrfs_add_ordered_operation(trans, root, inode); | 6141 | btrfs_add_ordered_operation(trans, root, inode); |
5180 | 6142 | ||
5181 | while (1) { | 6143 | while (1) { |
6144 | if (!trans) { | ||
6145 | trans = btrfs_start_transaction(root, 0); | ||
6146 | BUG_ON(IS_ERR(trans)); | ||
6147 | btrfs_set_trans_block_group(trans, inode); | ||
6148 | trans->block_rsv = root->orphan_block_rsv; | ||
6149 | } | ||
6150 | |||
6151 | ret = btrfs_block_rsv_check(trans, root, | ||
6152 | root->orphan_block_rsv, 0, 5); | ||
6153 | if (ret) { | ||
6154 | BUG_ON(ret != -EAGAIN); | ||
6155 | ret = btrfs_commit_transaction(trans, root); | ||
6156 | BUG_ON(ret); | ||
6157 | trans = NULL; | ||
6158 | continue; | ||
6159 | } | ||
6160 | |||
5182 | ret = btrfs_truncate_inode_items(trans, root, inode, | 6161 | ret = btrfs_truncate_inode_items(trans, root, inode, |
5183 | inode->i_size, | 6162 | inode->i_size, |
5184 | BTRFS_EXTENT_DATA_KEY); | 6163 | BTRFS_EXTENT_DATA_KEY); |
@@ -5190,10 +6169,8 @@ static void btrfs_truncate(struct inode *inode) | |||
5190 | 6169 | ||
5191 | nr = trans->blocks_used; | 6170 | nr = trans->blocks_used; |
5192 | btrfs_end_transaction(trans, root); | 6171 | btrfs_end_transaction(trans, root); |
6172 | trans = NULL; | ||
5193 | btrfs_btree_balance_dirty(root, nr); | 6173 | btrfs_btree_balance_dirty(root, nr); |
5194 | |||
5195 | trans = btrfs_start_transaction(root, 1); | ||
5196 | btrfs_set_trans_block_group(trans, inode); | ||
5197 | } | 6174 | } |
5198 | 6175 | ||
5199 | if (ret == 0 && inode->i_nlink > 0) { | 6176 | if (ret == 0 && inode->i_nlink > 0) { |
@@ -5254,21 +6231,47 @@ unsigned long btrfs_force_ra(struct address_space *mapping, | |||
5254 | struct inode *btrfs_alloc_inode(struct super_block *sb) | 6231 | struct inode *btrfs_alloc_inode(struct super_block *sb) |
5255 | { | 6232 | { |
5256 | struct btrfs_inode *ei; | 6233 | struct btrfs_inode *ei; |
6234 | struct inode *inode; | ||
5257 | 6235 | ||
5258 | ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); | 6236 | ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); |
5259 | if (!ei) | 6237 | if (!ei) |
5260 | return NULL; | 6238 | return NULL; |
6239 | |||
6240 | ei->root = NULL; | ||
6241 | ei->space_info = NULL; | ||
6242 | ei->generation = 0; | ||
6243 | ei->sequence = 0; | ||
5261 | ei->last_trans = 0; | 6244 | ei->last_trans = 0; |
5262 | ei->last_sub_trans = 0; | 6245 | ei->last_sub_trans = 0; |
5263 | ei->logged_trans = 0; | 6246 | ei->logged_trans = 0; |
5264 | ei->outstanding_extents = 0; | 6247 | ei->delalloc_bytes = 0; |
5265 | ei->reserved_extents = 0; | 6248 | ei->reserved_bytes = 0; |
5266 | ei->root = NULL; | 6249 | ei->disk_i_size = 0; |
6250 | ei->flags = 0; | ||
6251 | ei->index_cnt = (u64)-1; | ||
6252 | ei->last_unlink_trans = 0; | ||
6253 | |||
5267 | spin_lock_init(&ei->accounting_lock); | 6254 | spin_lock_init(&ei->accounting_lock); |
6255 | atomic_set(&ei->outstanding_extents, 0); | ||
6256 | ei->reserved_extents = 0; | ||
6257 | |||
6258 | ei->ordered_data_close = 0; | ||
6259 | ei->orphan_meta_reserved = 0; | ||
6260 | ei->dummy_inode = 0; | ||
6261 | ei->force_compress = 0; | ||
6262 | |||
6263 | inode = &ei->vfs_inode; | ||
6264 | extent_map_tree_init(&ei->extent_tree, GFP_NOFS); | ||
6265 | extent_io_tree_init(&ei->io_tree, &inode->i_data, GFP_NOFS); | ||
6266 | extent_io_tree_init(&ei->io_failure_tree, &inode->i_data, GFP_NOFS); | ||
6267 | mutex_init(&ei->log_mutex); | ||
5268 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); | 6268 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); |
5269 | INIT_LIST_HEAD(&ei->i_orphan); | 6269 | INIT_LIST_HEAD(&ei->i_orphan); |
6270 | INIT_LIST_HEAD(&ei->delalloc_inodes); | ||
5270 | INIT_LIST_HEAD(&ei->ordered_operations); | 6271 | INIT_LIST_HEAD(&ei->ordered_operations); |
5271 | return &ei->vfs_inode; | 6272 | RB_CLEAR_NODE(&ei->rb_node); |
6273 | |||
6274 | return inode; | ||
5272 | } | 6275 | } |
5273 | 6276 | ||
5274 | void btrfs_destroy_inode(struct inode *inode) | 6277 | void btrfs_destroy_inode(struct inode *inode) |
@@ -5278,6 +6281,8 @@ void btrfs_destroy_inode(struct inode *inode) | |||
5278 | 6281 | ||
5279 | WARN_ON(!list_empty(&inode->i_dentry)); | 6282 | WARN_ON(!list_empty(&inode->i_dentry)); |
5280 | WARN_ON(inode->i_data.nrpages); | 6283 | WARN_ON(inode->i_data.nrpages); |
6284 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents)); | ||
6285 | WARN_ON(BTRFS_I(inode)->reserved_extents); | ||
5281 | 6286 | ||
5282 | /* | 6287 | /* |
5283 | * This can happen where we create an inode, but somebody else also | 6288 | * This can happen where we create an inode, but somebody else also |
@@ -5298,13 +6303,13 @@ void btrfs_destroy_inode(struct inode *inode) | |||
5298 | spin_unlock(&root->fs_info->ordered_extent_lock); | 6303 | spin_unlock(&root->fs_info->ordered_extent_lock); |
5299 | } | 6304 | } |
5300 | 6305 | ||
5301 | spin_lock(&root->list_lock); | 6306 | spin_lock(&root->orphan_lock); |
5302 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | 6307 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { |
5303 | printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", | 6308 | printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", |
5304 | inode->i_ino); | 6309 | inode->i_ino); |
5305 | list_del_init(&BTRFS_I(inode)->i_orphan); | 6310 | list_del_init(&BTRFS_I(inode)->i_orphan); |
5306 | } | 6311 | } |
5307 | spin_unlock(&root->list_lock); | 6312 | spin_unlock(&root->orphan_lock); |
5308 | 6313 | ||
5309 | while (1) { | 6314 | while (1) { |
5310 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); | 6315 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); |
@@ -5425,19 +6430,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
5425 | if (S_ISDIR(old_inode->i_mode) && new_inode && | 6430 | if (S_ISDIR(old_inode->i_mode) && new_inode && |
5426 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) | 6431 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) |
5427 | return -ENOTEMPTY; | 6432 | return -ENOTEMPTY; |
5428 | |||
5429 | /* | ||
5430 | * We want to reserve the absolute worst case amount of items. So if | ||
5431 | * both inodes are subvols and we need to unlink them then that would | ||
5432 | * require 4 item modifications, but if they are both normal inodes it | ||
5433 | * would require 5 item modifications, so we'll assume their normal | ||
5434 | * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items | ||
5435 | * should cover the worst case number of items we'll modify. | ||
5436 | */ | ||
5437 | ret = btrfs_reserve_metadata_space(root, 11); | ||
5438 | if (ret) | ||
5439 | return ret; | ||
5440 | |||
5441 | /* | 6433 | /* |
5442 | * we're using rename to replace one file with another. | 6434 | * we're using rename to replace one file with another. |
5443 | * and the replacement file is large. Start IO on it now so | 6435 | * and the replacement file is large. Start IO on it now so |
@@ -5450,8 +6442,18 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
5450 | /* close the racy window with snapshot create/destroy ioctl */ | 6442 | /* close the racy window with snapshot create/destroy ioctl */ |
5451 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 6443 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) |
5452 | down_read(&root->fs_info->subvol_sem); | 6444 | down_read(&root->fs_info->subvol_sem); |
6445 | /* | ||
6446 | * We want to reserve the absolute worst case amount of items. So if | ||
6447 | * both inodes are subvols and we need to unlink them then that would | ||
6448 | * require 4 item modifications, but if they are both normal inodes it | ||
6449 | * would require 5 item modifications, so we'll assume their normal | ||
6450 | * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items | ||
6451 | * should cover the worst case number of items we'll modify. | ||
6452 | */ | ||
6453 | trans = btrfs_start_transaction(root, 20); | ||
6454 | if (IS_ERR(trans)) | ||
6455 | return PTR_ERR(trans); | ||
5453 | 6456 | ||
5454 | trans = btrfs_start_transaction(root, 1); | ||
5455 | btrfs_set_trans_block_group(trans, new_dir); | 6457 | btrfs_set_trans_block_group(trans, new_dir); |
5456 | 6458 | ||
5457 | if (dest != root) | 6459 | if (dest != root) |
@@ -5550,7 +6552,6 @@ out_fail: | |||
5550 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 6552 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) |
5551 | up_read(&root->fs_info->subvol_sem); | 6553 | up_read(&root->fs_info->subvol_sem); |
5552 | 6554 | ||
5553 | btrfs_unreserve_metadata_space(root, 11); | ||
5554 | return ret; | 6555 | return ret; |
5555 | } | 6556 | } |
5556 | 6557 | ||
@@ -5602,6 +6603,38 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
5602 | return 0; | 6603 | return 0; |
5603 | } | 6604 | } |
5604 | 6605 | ||
6606 | int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput) | ||
6607 | { | ||
6608 | struct btrfs_inode *binode; | ||
6609 | struct inode *inode = NULL; | ||
6610 | |||
6611 | spin_lock(&root->fs_info->delalloc_lock); | ||
6612 | while (!list_empty(&root->fs_info->delalloc_inodes)) { | ||
6613 | binode = list_entry(root->fs_info->delalloc_inodes.next, | ||
6614 | struct btrfs_inode, delalloc_inodes); | ||
6615 | inode = igrab(&binode->vfs_inode); | ||
6616 | if (inode) { | ||
6617 | list_move_tail(&binode->delalloc_inodes, | ||
6618 | &root->fs_info->delalloc_inodes); | ||
6619 | break; | ||
6620 | } | ||
6621 | |||
6622 | list_del_init(&binode->delalloc_inodes); | ||
6623 | cond_resched_lock(&root->fs_info->delalloc_lock); | ||
6624 | } | ||
6625 | spin_unlock(&root->fs_info->delalloc_lock); | ||
6626 | |||
6627 | if (inode) { | ||
6628 | write_inode_now(inode, 0); | ||
6629 | if (delay_iput) | ||
6630 | btrfs_add_delayed_iput(inode); | ||
6631 | else | ||
6632 | iput(inode); | ||
6633 | return 1; | ||
6634 | } | ||
6635 | return 0; | ||
6636 | } | ||
6637 | |||
5605 | static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | 6638 | static int btrfs_symlink(struct inode *dir, struct dentry *dentry, |
5606 | const char *symname) | 6639 | const char *symname) |
5607 | { | 6640 | { |
@@ -5625,26 +6658,20 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
5625 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) | 6658 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) |
5626 | return -ENAMETOOLONG; | 6659 | return -ENAMETOOLONG; |
5627 | 6660 | ||
6661 | err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); | ||
6662 | if (err) | ||
6663 | return err; | ||
5628 | /* | 6664 | /* |
5629 | * 2 items for inode item and ref | 6665 | * 2 items for inode item and ref |
5630 | * 2 items for dir items | 6666 | * 2 items for dir items |
5631 | * 1 item for xattr if selinux is on | 6667 | * 1 item for xattr if selinux is on |
5632 | */ | 6668 | */ |
5633 | err = btrfs_reserve_metadata_space(root, 5); | 6669 | trans = btrfs_start_transaction(root, 5); |
5634 | if (err) | 6670 | if (IS_ERR(trans)) |
5635 | return err; | 6671 | return PTR_ERR(trans); |
5636 | 6672 | ||
5637 | trans = btrfs_start_transaction(root, 1); | ||
5638 | if (!trans) | ||
5639 | goto out_fail; | ||
5640 | btrfs_set_trans_block_group(trans, dir); | 6673 | btrfs_set_trans_block_group(trans, dir); |
5641 | 6674 | ||
5642 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | ||
5643 | if (err) { | ||
5644 | err = -ENOSPC; | ||
5645 | goto out_unlock; | ||
5646 | } | ||
5647 | |||
5648 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 6675 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
5649 | dentry->d_name.len, | 6676 | dentry->d_name.len, |
5650 | dentry->d_parent->d_inode->i_ino, objectid, | 6677 | dentry->d_parent->d_inode->i_ino, objectid, |
@@ -5716,8 +6743,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
5716 | out_unlock: | 6743 | out_unlock: |
5717 | nr = trans->blocks_used; | 6744 | nr = trans->blocks_used; |
5718 | btrfs_end_transaction_throttle(trans, root); | 6745 | btrfs_end_transaction_throttle(trans, root); |
5719 | out_fail: | ||
5720 | btrfs_unreserve_metadata_space(root, 5); | ||
5721 | if (drop_inode) { | 6746 | if (drop_inode) { |
5722 | inode_dec_link_count(inode); | 6747 | inode_dec_link_count(inode); |
5723 | iput(inode); | 6748 | iput(inode); |
@@ -5726,33 +6751,28 @@ out_fail: | |||
5726 | return err; | 6751 | return err; |
5727 | } | 6752 | } |
5728 | 6753 | ||
5729 | static int prealloc_file_range(struct inode *inode, u64 start, u64 end, | 6754 | int btrfs_prealloc_file_range(struct inode *inode, int mode, |
5730 | u64 alloc_hint, int mode, loff_t actual_len) | 6755 | u64 start, u64 num_bytes, u64 min_size, |
6756 | loff_t actual_len, u64 *alloc_hint) | ||
5731 | { | 6757 | { |
5732 | struct btrfs_trans_handle *trans; | 6758 | struct btrfs_trans_handle *trans; |
5733 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6759 | struct btrfs_root *root = BTRFS_I(inode)->root; |
5734 | struct btrfs_key ins; | 6760 | struct btrfs_key ins; |
5735 | u64 cur_offset = start; | 6761 | u64 cur_offset = start; |
5736 | u64 num_bytes = end - start; | ||
5737 | int ret = 0; | 6762 | int ret = 0; |
5738 | u64 i_size; | ||
5739 | 6763 | ||
5740 | while (num_bytes > 0) { | 6764 | while (num_bytes > 0) { |
5741 | trans = btrfs_start_transaction(root, 1); | 6765 | trans = btrfs_start_transaction(root, 3); |
5742 | 6766 | if (IS_ERR(trans)) { | |
5743 | ret = btrfs_reserve_extent(trans, root, num_bytes, | 6767 | ret = PTR_ERR(trans); |
5744 | root->sectorsize, 0, alloc_hint, | 6768 | break; |
5745 | (u64)-1, &ins, 1); | ||
5746 | if (ret) { | ||
5747 | WARN_ON(1); | ||
5748 | goto stop_trans; | ||
5749 | } | 6769 | } |
5750 | 6770 | ||
5751 | ret = btrfs_reserve_metadata_space(root, 3); | 6771 | ret = btrfs_reserve_extent(trans, root, num_bytes, min_size, |
6772 | 0, *alloc_hint, (u64)-1, &ins, 1); | ||
5752 | if (ret) { | 6773 | if (ret) { |
5753 | btrfs_free_reserved_extent(root, ins.objectid, | 6774 | btrfs_end_transaction(trans, root); |
5754 | ins.offset); | 6775 | break; |
5755 | goto stop_trans; | ||
5756 | } | 6776 | } |
5757 | 6777 | ||
5758 | ret = insert_reserved_file_extent(trans, inode, | 6778 | ret = insert_reserved_file_extent(trans, inode, |
@@ -5766,34 +6786,27 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, | |||
5766 | 6786 | ||
5767 | num_bytes -= ins.offset; | 6787 | num_bytes -= ins.offset; |
5768 | cur_offset += ins.offset; | 6788 | cur_offset += ins.offset; |
5769 | alloc_hint = ins.objectid + ins.offset; | 6789 | *alloc_hint = ins.objectid + ins.offset; |
5770 | 6790 | ||
5771 | inode->i_ctime = CURRENT_TIME; | 6791 | inode->i_ctime = CURRENT_TIME; |
5772 | BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; | 6792 | BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; |
5773 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | 6793 | if (!(mode & FALLOC_FL_KEEP_SIZE) && |
5774 | (actual_len > inode->i_size) && | 6794 | (actual_len > inode->i_size) && |
5775 | (cur_offset > inode->i_size)) { | 6795 | (cur_offset > inode->i_size)) { |
5776 | |||
5777 | if (cur_offset > actual_len) | 6796 | if (cur_offset > actual_len) |
5778 | i_size = actual_len; | 6797 | i_size_write(inode, actual_len); |
5779 | else | 6798 | else |
5780 | i_size = cur_offset; | 6799 | i_size_write(inode, cur_offset); |
5781 | i_size_write(inode, i_size); | 6800 | i_size_write(inode, cur_offset); |
5782 | btrfs_ordered_update_i_size(inode, i_size, NULL); | 6801 | btrfs_ordered_update_i_size(inode, cur_offset, NULL); |
5783 | } | 6802 | } |
5784 | 6803 | ||
5785 | ret = btrfs_update_inode(trans, root, inode); | 6804 | ret = btrfs_update_inode(trans, root, inode); |
5786 | BUG_ON(ret); | 6805 | BUG_ON(ret); |
5787 | 6806 | ||
5788 | btrfs_end_transaction(trans, root); | 6807 | btrfs_end_transaction(trans, root); |
5789 | btrfs_unreserve_metadata_space(root, 3); | ||
5790 | } | 6808 | } |
5791 | return ret; | 6809 | return ret; |
5792 | |||
5793 | stop_trans: | ||
5794 | btrfs_end_transaction(trans, root); | ||
5795 | return ret; | ||
5796 | |||
5797 | } | 6810 | } |
5798 | 6811 | ||
5799 | static long btrfs_fallocate(struct inode *inode, int mode, | 6812 | static long btrfs_fallocate(struct inode *inode, int mode, |
@@ -5826,8 +6839,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5826 | goto out; | 6839 | goto out; |
5827 | } | 6840 | } |
5828 | 6841 | ||
5829 | ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode, | 6842 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); |
5830 | alloc_end - alloc_start); | ||
5831 | if (ret) | 6843 | if (ret) |
5832 | goto out; | 6844 | goto out; |
5833 | 6845 | ||
@@ -5872,16 +6884,16 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5872 | if (em->block_start == EXTENT_MAP_HOLE || | 6884 | if (em->block_start == EXTENT_MAP_HOLE || |
5873 | (cur_offset >= inode->i_size && | 6885 | (cur_offset >= inode->i_size && |
5874 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | 6886 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { |
5875 | ret = prealloc_file_range(inode, | 6887 | ret = btrfs_prealloc_file_range(inode, 0, cur_offset, |
5876 | cur_offset, last_byte, | 6888 | last_byte - cur_offset, |
5877 | alloc_hint, mode, offset+len); | 6889 | 1 << inode->i_blkbits, |
6890 | offset + len, | ||
6891 | &alloc_hint); | ||
5878 | if (ret < 0) { | 6892 | if (ret < 0) { |
5879 | free_extent_map(em); | 6893 | free_extent_map(em); |
5880 | break; | 6894 | break; |
5881 | } | 6895 | } |
5882 | } | 6896 | } |
5883 | if (em->block_start <= EXTENT_MAP_LAST_BYTE) | ||
5884 | alloc_hint = em->block_start; | ||
5885 | free_extent_map(em); | 6897 | free_extent_map(em); |
5886 | 6898 | ||
5887 | cur_offset = last_byte; | 6899 | cur_offset = last_byte; |
@@ -5893,8 +6905,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
5893 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | 6905 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, |
5894 | &cached_state, GFP_NOFS); | 6906 | &cached_state, GFP_NOFS); |
5895 | 6907 | ||
5896 | btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode, | 6908 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); |
5897 | alloc_end - alloc_start); | ||
5898 | out: | 6909 | out: |
5899 | mutex_unlock(&inode->i_mutex); | 6910 | mutex_unlock(&inode->i_mutex); |
5900 | return ret; | 6911 | return ret; |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 97a97839a867..4cdb98cf26de 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
@@ -239,23 +239,19 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
239 | u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; | 239 | u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; |
240 | u64 index = 0; | 240 | u64 index = 0; |
241 | 241 | ||
242 | ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root, | ||
243 | 0, &objectid); | ||
244 | if (ret) | ||
245 | return ret; | ||
242 | /* | 246 | /* |
243 | * 1 - inode item | 247 | * 1 - inode item |
244 | * 2 - refs | 248 | * 2 - refs |
245 | * 1 - root item | 249 | * 1 - root item |
246 | * 2 - dir items | 250 | * 2 - dir items |
247 | */ | 251 | */ |
248 | ret = btrfs_reserve_metadata_space(root, 6); | 252 | trans = btrfs_start_transaction(root, 6); |
249 | if (ret) | 253 | if (IS_ERR(trans)) |
250 | return ret; | 254 | return PTR_ERR(trans); |
251 | |||
252 | trans = btrfs_start_transaction(root, 1); | ||
253 | BUG_ON(!trans); | ||
254 | |||
255 | ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, | ||
256 | 0, &objectid); | ||
257 | if (ret) | ||
258 | goto fail; | ||
259 | 255 | ||
260 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, | 256 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, |
261 | 0, objectid, NULL, 0, 0, 0); | 257 | 0, objectid, NULL, 0, 0, 0); |
@@ -345,13 +341,10 @@ fail: | |||
345 | err = btrfs_commit_transaction(trans, root); | 341 | err = btrfs_commit_transaction(trans, root); |
346 | if (err && !ret) | 342 | if (err && !ret) |
347 | ret = err; | 343 | ret = err; |
348 | |||
349 | btrfs_unreserve_metadata_space(root, 6); | ||
350 | return ret; | 344 | return ret; |
351 | } | 345 | } |
352 | 346 | ||
353 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | 347 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry) |
354 | char *name, int namelen) | ||
355 | { | 348 | { |
356 | struct inode *inode; | 349 | struct inode *inode; |
357 | struct btrfs_pending_snapshot *pending_snapshot; | 350 | struct btrfs_pending_snapshot *pending_snapshot; |
@@ -361,40 +354,33 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
361 | if (!root->ref_cows) | 354 | if (!root->ref_cows) |
362 | return -EINVAL; | 355 | return -EINVAL; |
363 | 356 | ||
364 | /* | ||
365 | * 1 - inode item | ||
366 | * 2 - refs | ||
367 | * 1 - root item | ||
368 | * 2 - dir items | ||
369 | */ | ||
370 | ret = btrfs_reserve_metadata_space(root, 6); | ||
371 | if (ret) | ||
372 | goto fail; | ||
373 | |||
374 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); | 357 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); |
375 | if (!pending_snapshot) { | 358 | if (!pending_snapshot) |
376 | ret = -ENOMEM; | 359 | return -ENOMEM; |
377 | btrfs_unreserve_metadata_space(root, 6); | 360 | |
378 | goto fail; | 361 | btrfs_init_block_rsv(&pending_snapshot->block_rsv); |
379 | } | ||
380 | pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); | ||
381 | if (!pending_snapshot->name) { | ||
382 | ret = -ENOMEM; | ||
383 | kfree(pending_snapshot); | ||
384 | btrfs_unreserve_metadata_space(root, 6); | ||
385 | goto fail; | ||
386 | } | ||
387 | memcpy(pending_snapshot->name, name, namelen); | ||
388 | pending_snapshot->name[namelen] = '\0'; | ||
389 | pending_snapshot->dentry = dentry; | 362 | pending_snapshot->dentry = dentry; |
390 | trans = btrfs_start_transaction(root, 1); | ||
391 | BUG_ON(!trans); | ||
392 | pending_snapshot->root = root; | 363 | pending_snapshot->root = root; |
364 | |||
365 | trans = btrfs_start_transaction(root->fs_info->extent_root, 5); | ||
366 | if (IS_ERR(trans)) { | ||
367 | ret = PTR_ERR(trans); | ||
368 | goto fail; | ||
369 | } | ||
370 | |||
371 | ret = btrfs_snap_reserve_metadata(trans, pending_snapshot); | ||
372 | BUG_ON(ret); | ||
373 | |||
393 | list_add(&pending_snapshot->list, | 374 | list_add(&pending_snapshot->list, |
394 | &trans->transaction->pending_snapshots); | 375 | &trans->transaction->pending_snapshots); |
395 | ret = btrfs_commit_transaction(trans, root); | 376 | ret = btrfs_commit_transaction(trans, root->fs_info->extent_root); |
396 | BUG_ON(ret); | 377 | BUG_ON(ret); |
397 | btrfs_unreserve_metadata_space(root, 6); | 378 | |
379 | ret = pending_snapshot->error; | ||
380 | if (ret) | ||
381 | goto fail; | ||
382 | |||
383 | btrfs_orphan_cleanup(pending_snapshot->snap); | ||
398 | 384 | ||
399 | inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); | 385 | inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); |
400 | if (IS_ERR(inode)) { | 386 | if (IS_ERR(inode)) { |
@@ -405,6 +391,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
405 | d_instantiate(dentry, inode); | 391 | d_instantiate(dentry, inode); |
406 | ret = 0; | 392 | ret = 0; |
407 | fail: | 393 | fail: |
394 | kfree(pending_snapshot); | ||
408 | return ret; | 395 | return ret; |
409 | } | 396 | } |
410 | 397 | ||
@@ -456,8 +443,7 @@ static noinline int btrfs_mksubvol(struct path *parent, | |||
456 | goto out_up_read; | 443 | goto out_up_read; |
457 | 444 | ||
458 | if (snap_src) { | 445 | if (snap_src) { |
459 | error = create_snapshot(snap_src, dentry, | 446 | error = create_snapshot(snap_src, dentry); |
460 | name, namelen); | ||
461 | } else { | 447 | } else { |
462 | error = create_subvol(BTRFS_I(dir)->root, dentry, | 448 | error = create_subvol(BTRFS_I(dir)->root, dentry, |
463 | name, namelen); | 449 | name, namelen); |
@@ -601,19 +587,9 @@ static int btrfs_defrag_file(struct file *file, | |||
601 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) | 587 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) |
602 | BTRFS_I(inode)->force_compress = 1; | 588 | BTRFS_I(inode)->force_compress = 1; |
603 | 589 | ||
604 | ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); | 590 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
605 | if (ret) { | 591 | if (ret) |
606 | ret = -ENOSPC; | 592 | goto err_unlock; |
607 | break; | ||
608 | } | ||
609 | |||
610 | ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
611 | if (ret) { | ||
612 | btrfs_free_reserved_data_space(root, inode, | ||
613 | PAGE_CACHE_SIZE); | ||
614 | ret = -ENOSPC; | ||
615 | break; | ||
616 | } | ||
617 | again: | 593 | again: |
618 | if (inode->i_size == 0 || | 594 | if (inode->i_size == 0 || |
619 | i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) { | 595 | i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) { |
@@ -622,8 +598,10 @@ again: | |||
622 | } | 598 | } |
623 | 599 | ||
624 | page = grab_cache_page(inode->i_mapping, i); | 600 | page = grab_cache_page(inode->i_mapping, i); |
625 | if (!page) | 601 | if (!page) { |
602 | ret = -ENOMEM; | ||
626 | goto err_reservations; | 603 | goto err_reservations; |
604 | } | ||
627 | 605 | ||
628 | if (!PageUptodate(page)) { | 606 | if (!PageUptodate(page)) { |
629 | btrfs_readpage(NULL, page); | 607 | btrfs_readpage(NULL, page); |
@@ -631,6 +609,7 @@ again: | |||
631 | if (!PageUptodate(page)) { | 609 | if (!PageUptodate(page)) { |
632 | unlock_page(page); | 610 | unlock_page(page); |
633 | page_cache_release(page); | 611 | page_cache_release(page); |
612 | ret = -EIO; | ||
634 | goto err_reservations; | 613 | goto err_reservations; |
635 | } | 614 | } |
636 | } | 615 | } |
@@ -644,8 +623,7 @@ again: | |||
644 | wait_on_page_writeback(page); | 623 | wait_on_page_writeback(page); |
645 | 624 | ||
646 | if (PageDirty(page)) { | 625 | if (PageDirty(page)) { |
647 | btrfs_free_reserved_data_space(root, inode, | 626 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); |
648 | PAGE_CACHE_SIZE); | ||
649 | goto loop_unlock; | 627 | goto loop_unlock; |
650 | } | 628 | } |
651 | 629 | ||
@@ -683,7 +661,6 @@ loop_unlock: | |||
683 | page_cache_release(page); | 661 | page_cache_release(page); |
684 | mutex_unlock(&inode->i_mutex); | 662 | mutex_unlock(&inode->i_mutex); |
685 | 663 | ||
686 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
687 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); | 664 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); |
688 | i++; | 665 | i++; |
689 | } | 666 | } |
@@ -713,9 +690,9 @@ loop_unlock: | |||
713 | return 0; | 690 | return 0; |
714 | 691 | ||
715 | err_reservations: | 692 | err_reservations: |
693 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | ||
694 | err_unlock: | ||
716 | mutex_unlock(&inode->i_mutex); | 695 | mutex_unlock(&inode->i_mutex); |
717 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
718 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
719 | return ret; | 696 | return ret; |
720 | } | 697 | } |
721 | 698 | ||
@@ -811,7 +788,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
811 | device->name, (unsigned long long)new_size); | 788 | device->name, (unsigned long long)new_size); |
812 | 789 | ||
813 | if (new_size > old_size) { | 790 | if (new_size > old_size) { |
814 | trans = btrfs_start_transaction(root, 1); | 791 | trans = btrfs_start_transaction(root, 0); |
815 | ret = btrfs_grow_device(trans, device, new_size); | 792 | ret = btrfs_grow_device(trans, device, new_size); |
816 | btrfs_commit_transaction(trans, root); | 793 | btrfs_commit_transaction(trans, root); |
817 | } else { | 794 | } else { |
@@ -1300,7 +1277,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
1300 | if (err) | 1277 | if (err) |
1301 | goto out_up_write; | 1278 | goto out_up_write; |
1302 | 1279 | ||
1303 | trans = btrfs_start_transaction(root, 1); | 1280 | trans = btrfs_start_transaction(root, 0); |
1281 | if (IS_ERR(trans)) { | ||
1282 | err = PTR_ERR(trans); | ||
1283 | goto out; | ||
1284 | } | ||
1285 | trans->block_rsv = &root->fs_info->global_block_rsv; | ||
1286 | |||
1304 | ret = btrfs_unlink_subvol(trans, root, dir, | 1287 | ret = btrfs_unlink_subvol(trans, root, dir, |
1305 | dest->root_key.objectid, | 1288 | dest->root_key.objectid, |
1306 | dentry->d_name.name, | 1289 | dentry->d_name.name, |
@@ -1314,10 +1297,12 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
1314 | dest->root_item.drop_level = 0; | 1297 | dest->root_item.drop_level = 0; |
1315 | btrfs_set_root_refs(&dest->root_item, 0); | 1298 | btrfs_set_root_refs(&dest->root_item, 0); |
1316 | 1299 | ||
1317 | ret = btrfs_insert_orphan_item(trans, | 1300 | if (!xchg(&dest->orphan_item_inserted, 1)) { |
1318 | root->fs_info->tree_root, | 1301 | ret = btrfs_insert_orphan_item(trans, |
1319 | dest->root_key.objectid); | 1302 | root->fs_info->tree_root, |
1320 | BUG_ON(ret); | 1303 | dest->root_key.objectid); |
1304 | BUG_ON(ret); | ||
1305 | } | ||
1321 | 1306 | ||
1322 | ret = btrfs_commit_transaction(trans, root); | 1307 | ret = btrfs_commit_transaction(trans, root); |
1323 | BUG_ON(ret); | 1308 | BUG_ON(ret); |
@@ -1358,8 +1343,10 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) | |||
1358 | ret = -EPERM; | 1343 | ret = -EPERM; |
1359 | goto out; | 1344 | goto out; |
1360 | } | 1345 | } |
1361 | btrfs_defrag_root(root, 0); | 1346 | ret = btrfs_defrag_root(root, 0); |
1362 | btrfs_defrag_root(root->fs_info->extent_root, 0); | 1347 | if (ret) |
1348 | goto out; | ||
1349 | ret = btrfs_defrag_root(root->fs_info->extent_root, 0); | ||
1363 | break; | 1350 | break; |
1364 | case S_IFREG: | 1351 | case S_IFREG: |
1365 | if (!(file->f_mode & FMODE_WRITE)) { | 1352 | if (!(file->f_mode & FMODE_WRITE)) { |
@@ -1389,9 +1376,11 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) | |||
1389 | /* the rest are all set to zero by kzalloc */ | 1376 | /* the rest are all set to zero by kzalloc */ |
1390 | range->len = (u64)-1; | 1377 | range->len = (u64)-1; |
1391 | } | 1378 | } |
1392 | btrfs_defrag_file(file, range); | 1379 | ret = btrfs_defrag_file(file, range); |
1393 | kfree(range); | 1380 | kfree(range); |
1394 | break; | 1381 | break; |
1382 | default: | ||
1383 | ret = -EINVAL; | ||
1395 | } | 1384 | } |
1396 | out: | 1385 | out: |
1397 | mnt_drop_write(file->f_path.mnt); | 1386 | mnt_drop_write(file->f_path.mnt); |
@@ -1550,12 +1539,6 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1550 | btrfs_wait_ordered_range(src, off, off+len); | 1539 | btrfs_wait_ordered_range(src, off, off+len); |
1551 | } | 1540 | } |
1552 | 1541 | ||
1553 | trans = btrfs_start_transaction(root, 1); | ||
1554 | BUG_ON(!trans); | ||
1555 | |||
1556 | /* punch hole in destination first */ | ||
1557 | btrfs_drop_extents(trans, inode, off, off + len, &hint_byte, 1); | ||
1558 | |||
1559 | /* clone data */ | 1542 | /* clone data */ |
1560 | key.objectid = src->i_ino; | 1543 | key.objectid = src->i_ino; |
1561 | key.type = BTRFS_EXTENT_DATA_KEY; | 1544 | key.type = BTRFS_EXTENT_DATA_KEY; |
@@ -1566,7 +1549,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1566 | * note the key will change type as we walk through the | 1549 | * note the key will change type as we walk through the |
1567 | * tree. | 1550 | * tree. |
1568 | */ | 1551 | */ |
1569 | ret = btrfs_search_slot(trans, root, &key, path, 0, 0); | 1552 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
1570 | if (ret < 0) | 1553 | if (ret < 0) |
1571 | goto out; | 1554 | goto out; |
1572 | 1555 | ||
@@ -1629,12 +1612,31 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1629 | new_key.objectid = inode->i_ino; | 1612 | new_key.objectid = inode->i_ino; |
1630 | new_key.offset = key.offset + destoff - off; | 1613 | new_key.offset = key.offset + destoff - off; |
1631 | 1614 | ||
1615 | trans = btrfs_start_transaction(root, 1); | ||
1616 | if (IS_ERR(trans)) { | ||
1617 | ret = PTR_ERR(trans); | ||
1618 | goto out; | ||
1619 | } | ||
1620 | |||
1632 | if (type == BTRFS_FILE_EXTENT_REG || | 1621 | if (type == BTRFS_FILE_EXTENT_REG || |
1633 | type == BTRFS_FILE_EXTENT_PREALLOC) { | 1622 | type == BTRFS_FILE_EXTENT_PREALLOC) { |
1623 | if (off > key.offset) { | ||
1624 | datao += off - key.offset; | ||
1625 | datal -= off - key.offset; | ||
1626 | } | ||
1627 | |||
1628 | if (key.offset + datal > off + len) | ||
1629 | datal = off + len - key.offset; | ||
1630 | |||
1631 | ret = btrfs_drop_extents(trans, inode, | ||
1632 | new_key.offset, | ||
1633 | new_key.offset + datal, | ||
1634 | &hint_byte, 1); | ||
1635 | BUG_ON(ret); | ||
1636 | |||
1634 | ret = btrfs_insert_empty_item(trans, root, path, | 1637 | ret = btrfs_insert_empty_item(trans, root, path, |
1635 | &new_key, size); | 1638 | &new_key, size); |
1636 | if (ret) | 1639 | BUG_ON(ret); |
1637 | goto out; | ||
1638 | 1640 | ||
1639 | leaf = path->nodes[0]; | 1641 | leaf = path->nodes[0]; |
1640 | slot = path->slots[0]; | 1642 | slot = path->slots[0]; |
@@ -1645,14 +1647,6 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1645 | extent = btrfs_item_ptr(leaf, slot, | 1647 | extent = btrfs_item_ptr(leaf, slot, |
1646 | struct btrfs_file_extent_item); | 1648 | struct btrfs_file_extent_item); |
1647 | 1649 | ||
1648 | if (off > key.offset) { | ||
1649 | datao += off - key.offset; | ||
1650 | datal -= off - key.offset; | ||
1651 | } | ||
1652 | |||
1653 | if (key.offset + datal > off + len) | ||
1654 | datal = off + len - key.offset; | ||
1655 | |||
1656 | /* disko == 0 means it's a hole */ | 1650 | /* disko == 0 means it's a hole */ |
1657 | if (!disko) | 1651 | if (!disko) |
1658 | datao = 0; | 1652 | datao = 0; |
@@ -1683,14 +1677,21 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1683 | 1677 | ||
1684 | if (comp && (skip || trim)) { | 1678 | if (comp && (skip || trim)) { |
1685 | ret = -EINVAL; | 1679 | ret = -EINVAL; |
1680 | btrfs_end_transaction(trans, root); | ||
1686 | goto out; | 1681 | goto out; |
1687 | } | 1682 | } |
1688 | size -= skip + trim; | 1683 | size -= skip + trim; |
1689 | datal -= skip + trim; | 1684 | datal -= skip + trim; |
1685 | |||
1686 | ret = btrfs_drop_extents(trans, inode, | ||
1687 | new_key.offset, | ||
1688 | new_key.offset + datal, | ||
1689 | &hint_byte, 1); | ||
1690 | BUG_ON(ret); | ||
1691 | |||
1690 | ret = btrfs_insert_empty_item(trans, root, path, | 1692 | ret = btrfs_insert_empty_item(trans, root, path, |
1691 | &new_key, size); | 1693 | &new_key, size); |
1692 | if (ret) | 1694 | BUG_ON(ret); |
1693 | goto out; | ||
1694 | 1695 | ||
1695 | if (skip) { | 1696 | if (skip) { |
1696 | u32 start = | 1697 | u32 start = |
@@ -1708,8 +1709,17 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
1708 | } | 1709 | } |
1709 | 1710 | ||
1710 | btrfs_mark_buffer_dirty(leaf); | 1711 | btrfs_mark_buffer_dirty(leaf); |
1711 | } | 1712 | btrfs_release_path(root, path); |
1712 | 1713 | ||
1714 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
1715 | if (new_key.offset + datal > inode->i_size) | ||
1716 | btrfs_i_size_write(inode, | ||
1717 | new_key.offset + datal); | ||
1718 | BTRFS_I(inode)->flags = BTRFS_I(src)->flags; | ||
1719 | ret = btrfs_update_inode(trans, root, inode); | ||
1720 | BUG_ON(ret); | ||
1721 | btrfs_end_transaction(trans, root); | ||
1722 | } | ||
1713 | next: | 1723 | next: |
1714 | btrfs_release_path(root, path); | 1724 | btrfs_release_path(root, path); |
1715 | key.offset++; | 1725 | key.offset++; |
@@ -1717,17 +1727,7 @@ next: | |||
1717 | ret = 0; | 1727 | ret = 0; |
1718 | out: | 1728 | out: |
1719 | btrfs_release_path(root, path); | 1729 | btrfs_release_path(root, path); |
1720 | if (ret == 0) { | ||
1721 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
1722 | if (destoff + olen > inode->i_size) | ||
1723 | btrfs_i_size_write(inode, destoff + olen); | ||
1724 | BTRFS_I(inode)->flags = BTRFS_I(src)->flags; | ||
1725 | ret = btrfs_update_inode(trans, root, inode); | ||
1726 | } | ||
1727 | btrfs_end_transaction(trans, root); | ||
1728 | unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); | 1730 | unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); |
1729 | if (ret) | ||
1730 | vmtruncate(inode, 0); | ||
1731 | out_unlock: | 1731 | out_unlock: |
1732 | mutex_unlock(&src->i_mutex); | 1732 | mutex_unlock(&src->i_mutex); |
1733 | mutex_unlock(&inode->i_mutex); | 1733 | mutex_unlock(&inode->i_mutex); |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index a127c0ebb2dc..e56c72bc5add 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -124,6 +124,15 @@ static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset) | |||
124 | return 1; | 124 | return 1; |
125 | } | 125 | } |
126 | 126 | ||
127 | static int range_overlaps(struct btrfs_ordered_extent *entry, u64 file_offset, | ||
128 | u64 len) | ||
129 | { | ||
130 | if (file_offset + len <= entry->file_offset || | ||
131 | entry->file_offset + entry->len <= file_offset) | ||
132 | return 0; | ||
133 | return 1; | ||
134 | } | ||
135 | |||
127 | /* | 136 | /* |
128 | * look find the first ordered struct that has this offset, otherwise | 137 | * look find the first ordered struct that has this offset, otherwise |
129 | * the first one less than this offset | 138 | * the first one less than this offset |
@@ -161,8 +170,9 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | |||
161 | * The tree is given a single reference on the ordered extent that was | 170 | * The tree is given a single reference on the ordered extent that was |
162 | * inserted. | 171 | * inserted. |
163 | */ | 172 | */ |
164 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 173 | static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
165 | u64 start, u64 len, u64 disk_len, int type) | 174 | u64 start, u64 len, u64 disk_len, |
175 | int type, int dio) | ||
166 | { | 176 | { |
167 | struct btrfs_ordered_inode_tree *tree; | 177 | struct btrfs_ordered_inode_tree *tree; |
168 | struct rb_node *node; | 178 | struct rb_node *node; |
@@ -182,6 +192,9 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
182 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) | 192 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) |
183 | set_bit(type, &entry->flags); | 193 | set_bit(type, &entry->flags); |
184 | 194 | ||
195 | if (dio) | ||
196 | set_bit(BTRFS_ORDERED_DIRECT, &entry->flags); | ||
197 | |||
185 | /* one ref for the tree */ | 198 | /* one ref for the tree */ |
186 | atomic_set(&entry->refs, 1); | 199 | atomic_set(&entry->refs, 1); |
187 | init_waitqueue_head(&entry->wait); | 200 | init_waitqueue_head(&entry->wait); |
@@ -203,6 +216,20 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
203 | return 0; | 216 | return 0; |
204 | } | 217 | } |
205 | 218 | ||
219 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | ||
220 | u64 start, u64 len, u64 disk_len, int type) | ||
221 | { | ||
222 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | ||
223 | disk_len, type, 0); | ||
224 | } | ||
225 | |||
226 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | ||
227 | u64 start, u64 len, u64 disk_len, int type) | ||
228 | { | ||
229 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | ||
230 | disk_len, type, 1); | ||
231 | } | ||
232 | |||
206 | /* | 233 | /* |
207 | * Add a struct btrfs_ordered_sum into the list of checksums to be inserted | 234 | * Add a struct btrfs_ordered_sum into the list of checksums to be inserted |
208 | * when an ordered extent is finished. If the list covers more than one | 235 | * when an ordered extent is finished. If the list covers more than one |
@@ -311,13 +338,6 @@ static int __btrfs_remove_ordered_extent(struct inode *inode, | |||
311 | tree->last = NULL; | 338 | tree->last = NULL; |
312 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); | 339 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); |
313 | 340 | ||
314 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
315 | WARN_ON(!BTRFS_I(inode)->outstanding_extents); | ||
316 | BTRFS_I(inode)->outstanding_extents--; | ||
317 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
318 | btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root, | ||
319 | inode, 1); | ||
320 | |||
321 | spin_lock(&root->fs_info->ordered_extent_lock); | 341 | spin_lock(&root->fs_info->ordered_extent_lock); |
322 | list_del_init(&entry->root_extent_list); | 342 | list_del_init(&entry->root_extent_list); |
323 | 343 | ||
@@ -491,7 +511,8 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
491 | * start IO on any dirty ones so the wait doesn't stall waiting | 511 | * start IO on any dirty ones so the wait doesn't stall waiting |
492 | * for pdflush to find them | 512 | * for pdflush to find them |
493 | */ | 513 | */ |
494 | filemap_fdatawrite_range(inode->i_mapping, start, end); | 514 | if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags)) |
515 | filemap_fdatawrite_range(inode->i_mapping, start, end); | ||
495 | if (wait) { | 516 | if (wait) { |
496 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, | 517 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, |
497 | &entry->flags)); | 518 | &entry->flags)); |
@@ -588,6 +609,47 @@ out: | |||
588 | return entry; | 609 | return entry; |
589 | } | 610 | } |
590 | 611 | ||
612 | /* Since the DIO code tries to lock a wide area we need to look for any ordered | ||
613 | * extents that exist in the range, rather than just the start of the range. | ||
614 | */ | ||
615 | struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, | ||
616 | u64 file_offset, | ||
617 | u64 len) | ||
618 | { | ||
619 | struct btrfs_ordered_inode_tree *tree; | ||
620 | struct rb_node *node; | ||
621 | struct btrfs_ordered_extent *entry = NULL; | ||
622 | |||
623 | tree = &BTRFS_I(inode)->ordered_tree; | ||
624 | spin_lock(&tree->lock); | ||
625 | node = tree_search(tree, file_offset); | ||
626 | if (!node) { | ||
627 | node = tree_search(tree, file_offset + len); | ||
628 | if (!node) | ||
629 | goto out; | ||
630 | } | ||
631 | |||
632 | while (1) { | ||
633 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); | ||
634 | if (range_overlaps(entry, file_offset, len)) | ||
635 | break; | ||
636 | |||
637 | if (entry->file_offset >= file_offset + len) { | ||
638 | entry = NULL; | ||
639 | break; | ||
640 | } | ||
641 | entry = NULL; | ||
642 | node = rb_next(node); | ||
643 | if (!node) | ||
644 | break; | ||
645 | } | ||
646 | out: | ||
647 | if (entry) | ||
648 | atomic_inc(&entry->refs); | ||
649 | spin_unlock(&tree->lock); | ||
650 | return entry; | ||
651 | } | ||
652 | |||
591 | /* | 653 | /* |
592 | * lookup and return any extent before 'file_offset'. NULL is returned | 654 | * lookup and return any extent before 'file_offset'. NULL is returned |
593 | * if none is found | 655 | * if none is found |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index c82f76a9f040..8ac365492a3f 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -72,6 +72,8 @@ struct btrfs_ordered_sum { | |||
72 | 72 | ||
73 | #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ | 73 | #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ |
74 | 74 | ||
75 | #define BTRFS_ORDERED_DIRECT 5 /* set when we're doing DIO with this extent */ | ||
76 | |||
75 | struct btrfs_ordered_extent { | 77 | struct btrfs_ordered_extent { |
76 | /* logical offset in the file */ | 78 | /* logical offset in the file */ |
77 | u64 file_offset; | 79 | u64 file_offset; |
@@ -140,7 +142,9 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, | |||
140 | struct btrfs_ordered_extent **cached, | 142 | struct btrfs_ordered_extent **cached, |
141 | u64 file_offset, u64 io_size); | 143 | u64 file_offset, u64 io_size); |
142 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 144 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
143 | u64 start, u64 len, u64 disk_len, int tyep); | 145 | u64 start, u64 len, u64 disk_len, int type); |
146 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | ||
147 | u64 start, u64 len, u64 disk_len, int type); | ||
144 | int btrfs_add_ordered_sum(struct inode *inode, | 148 | int btrfs_add_ordered_sum(struct inode *inode, |
145 | struct btrfs_ordered_extent *entry, | 149 | struct btrfs_ordered_extent *entry, |
146 | struct btrfs_ordered_sum *sum); | 150 | struct btrfs_ordered_sum *sum); |
@@ -151,6 +155,9 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
151 | int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); | 155 | int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); |
152 | struct btrfs_ordered_extent * | 156 | struct btrfs_ordered_extent * |
153 | btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); | 157 | btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); |
158 | struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, | ||
159 | u64 file_offset, | ||
160 | u64 len); | ||
154 | int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | 161 | int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, |
155 | struct btrfs_ordered_extent *ordered); | 162 | struct btrfs_ordered_extent *ordered); |
156 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); | 163 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index e558dd941ded..05d41e569236 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
@@ -44,8 +44,12 @@ struct tree_entry { | |||
44 | struct backref_node { | 44 | struct backref_node { |
45 | struct rb_node rb_node; | 45 | struct rb_node rb_node; |
46 | u64 bytenr; | 46 | u64 bytenr; |
47 | /* objectid tree block owner */ | 47 | |
48 | u64 new_bytenr; | ||
49 | /* objectid of tree block owner, can be not uptodate */ | ||
48 | u64 owner; | 50 | u64 owner; |
51 | /* link to pending, changed or detached list */ | ||
52 | struct list_head list; | ||
49 | /* list of upper level blocks reference this block */ | 53 | /* list of upper level blocks reference this block */ |
50 | struct list_head upper; | 54 | struct list_head upper; |
51 | /* list of child blocks in the cache */ | 55 | /* list of child blocks in the cache */ |
@@ -56,9 +60,9 @@ struct backref_node { | |||
56 | struct extent_buffer *eb; | 60 | struct extent_buffer *eb; |
57 | /* level of tree block */ | 61 | /* level of tree block */ |
58 | unsigned int level:8; | 62 | unsigned int level:8; |
59 | /* 1 if the block is root of old snapshot */ | 63 | /* is the block in non-reference counted tree */ |
60 | unsigned int old_root:1; | 64 | unsigned int cowonly:1; |
61 | /* 1 if no child blocks in the cache */ | 65 | /* 1 if no child node in the cache */ |
62 | unsigned int lowest:1; | 66 | unsigned int lowest:1; |
63 | /* is the extent buffer locked */ | 67 | /* is the extent buffer locked */ |
64 | unsigned int locked:1; | 68 | unsigned int locked:1; |
@@ -66,6 +70,16 @@ struct backref_node { | |||
66 | unsigned int processed:1; | 70 | unsigned int processed:1; |
67 | /* have backrefs of this block been checked */ | 71 | /* have backrefs of this block been checked */ |
68 | unsigned int checked:1; | 72 | unsigned int checked:1; |
73 | /* | ||
74 | * 1 if corresponding block has been cowed but some upper | ||
75 | * level block pointers may not point to the new location | ||
76 | */ | ||
77 | unsigned int pending:1; | ||
78 | /* | ||
79 | * 1 if the backref node isn't connected to any other | ||
80 | * backref node. | ||
81 | */ | ||
82 | unsigned int detached:1; | ||
69 | }; | 83 | }; |
70 | 84 | ||
71 | /* | 85 | /* |
@@ -74,7 +88,6 @@ struct backref_node { | |||
74 | struct backref_edge { | 88 | struct backref_edge { |
75 | struct list_head list[2]; | 89 | struct list_head list[2]; |
76 | struct backref_node *node[2]; | 90 | struct backref_node *node[2]; |
77 | u64 blockptr; | ||
78 | }; | 91 | }; |
79 | 92 | ||
80 | #define LOWER 0 | 93 | #define LOWER 0 |
@@ -83,9 +96,25 @@ struct backref_edge { | |||
83 | struct backref_cache { | 96 | struct backref_cache { |
84 | /* red black tree of all backref nodes in the cache */ | 97 | /* red black tree of all backref nodes in the cache */ |
85 | struct rb_root rb_root; | 98 | struct rb_root rb_root; |
86 | /* list of backref nodes with no child block in the cache */ | 99 | /* for passing backref nodes to btrfs_reloc_cow_block */ |
100 | struct backref_node *path[BTRFS_MAX_LEVEL]; | ||
101 | /* | ||
102 | * list of blocks that have been cowed but some block | ||
103 | * pointers in upper level blocks may not reflect the | ||
104 | * new location | ||
105 | */ | ||
87 | struct list_head pending[BTRFS_MAX_LEVEL]; | 106 | struct list_head pending[BTRFS_MAX_LEVEL]; |
88 | spinlock_t lock; | 107 | /* list of backref nodes with no child node */ |
108 | struct list_head leaves; | ||
109 | /* list of blocks that have been cowed in current transaction */ | ||
110 | struct list_head changed; | ||
111 | /* list of detached backref node. */ | ||
112 | struct list_head detached; | ||
113 | |||
114 | u64 last_trans; | ||
115 | |||
116 | int nr_nodes; | ||
117 | int nr_edges; | ||
89 | }; | 118 | }; |
90 | 119 | ||
91 | /* | 120 | /* |
@@ -113,15 +142,6 @@ struct tree_block { | |||
113 | unsigned int key_ready:1; | 142 | unsigned int key_ready:1; |
114 | }; | 143 | }; |
115 | 144 | ||
116 | /* inode vector */ | ||
117 | #define INODEVEC_SIZE 16 | ||
118 | |||
119 | struct inodevec { | ||
120 | struct list_head list; | ||
121 | struct inode *inode[INODEVEC_SIZE]; | ||
122 | int nr; | ||
123 | }; | ||
124 | |||
125 | #define MAX_EXTENTS 128 | 145 | #define MAX_EXTENTS 128 |
126 | 146 | ||
127 | struct file_extent_cluster { | 147 | struct file_extent_cluster { |
@@ -138,36 +158,43 @@ struct reloc_control { | |||
138 | struct btrfs_root *extent_root; | 158 | struct btrfs_root *extent_root; |
139 | /* inode for moving data */ | 159 | /* inode for moving data */ |
140 | struct inode *data_inode; | 160 | struct inode *data_inode; |
141 | struct btrfs_workers workers; | 161 | |
162 | struct btrfs_block_rsv *block_rsv; | ||
163 | |||
164 | struct backref_cache backref_cache; | ||
165 | |||
166 | struct file_extent_cluster cluster; | ||
142 | /* tree blocks have been processed */ | 167 | /* tree blocks have been processed */ |
143 | struct extent_io_tree processed_blocks; | 168 | struct extent_io_tree processed_blocks; |
144 | /* map start of tree root to corresponding reloc tree */ | 169 | /* map start of tree root to corresponding reloc tree */ |
145 | struct mapping_tree reloc_root_tree; | 170 | struct mapping_tree reloc_root_tree; |
146 | /* list of reloc trees */ | 171 | /* list of reloc trees */ |
147 | struct list_head reloc_roots; | 172 | struct list_head reloc_roots; |
173 | /* size of metadata reservation for merging reloc trees */ | ||
174 | u64 merging_rsv_size; | ||
175 | /* size of relocated tree nodes */ | ||
176 | u64 nodes_relocated; | ||
177 | |||
148 | u64 search_start; | 178 | u64 search_start; |
149 | u64 extents_found; | 179 | u64 extents_found; |
150 | u64 extents_skipped; | 180 | |
151 | int stage; | 181 | int block_rsv_retries; |
152 | int create_reloc_root; | 182 | |
183 | unsigned int stage:8; | ||
184 | unsigned int create_reloc_tree:1; | ||
185 | unsigned int merge_reloc_tree:1; | ||
153 | unsigned int found_file_extent:1; | 186 | unsigned int found_file_extent:1; |
154 | unsigned int found_old_snapshot:1; | 187 | unsigned int commit_transaction:1; |
155 | }; | 188 | }; |
156 | 189 | ||
157 | /* stages of data relocation */ | 190 | /* stages of data relocation */ |
158 | #define MOVE_DATA_EXTENTS 0 | 191 | #define MOVE_DATA_EXTENTS 0 |
159 | #define UPDATE_DATA_PTRS 1 | 192 | #define UPDATE_DATA_PTRS 1 |
160 | 193 | ||
161 | /* | 194 | static void remove_backref_node(struct backref_cache *cache, |
162 | * merge reloc tree to corresponding fs tree in worker threads | 195 | struct backref_node *node); |
163 | */ | 196 | static void __mark_block_processed(struct reloc_control *rc, |
164 | struct async_merge { | 197 | struct backref_node *node); |
165 | struct btrfs_work work; | ||
166 | struct reloc_control *rc; | ||
167 | struct btrfs_root *root; | ||
168 | struct completion *done; | ||
169 | atomic_t *num_pending; | ||
170 | }; | ||
171 | 198 | ||
172 | static void mapping_tree_init(struct mapping_tree *tree) | 199 | static void mapping_tree_init(struct mapping_tree *tree) |
173 | { | 200 | { |
@@ -181,15 +208,80 @@ static void backref_cache_init(struct backref_cache *cache) | |||
181 | cache->rb_root = RB_ROOT; | 208 | cache->rb_root = RB_ROOT; |
182 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) | 209 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) |
183 | INIT_LIST_HEAD(&cache->pending[i]); | 210 | INIT_LIST_HEAD(&cache->pending[i]); |
184 | spin_lock_init(&cache->lock); | 211 | INIT_LIST_HEAD(&cache->changed); |
212 | INIT_LIST_HEAD(&cache->detached); | ||
213 | INIT_LIST_HEAD(&cache->leaves); | ||
214 | } | ||
215 | |||
216 | static void backref_cache_cleanup(struct backref_cache *cache) | ||
217 | { | ||
218 | struct backref_node *node; | ||
219 | int i; | ||
220 | |||
221 | while (!list_empty(&cache->detached)) { | ||
222 | node = list_entry(cache->detached.next, | ||
223 | struct backref_node, list); | ||
224 | remove_backref_node(cache, node); | ||
225 | } | ||
226 | |||
227 | while (!list_empty(&cache->leaves)) { | ||
228 | node = list_entry(cache->leaves.next, | ||
229 | struct backref_node, lower); | ||
230 | remove_backref_node(cache, node); | ||
231 | } | ||
232 | |||
233 | cache->last_trans = 0; | ||
234 | |||
235 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) | ||
236 | BUG_ON(!list_empty(&cache->pending[i])); | ||
237 | BUG_ON(!list_empty(&cache->changed)); | ||
238 | BUG_ON(!list_empty(&cache->detached)); | ||
239 | BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root)); | ||
240 | BUG_ON(cache->nr_nodes); | ||
241 | BUG_ON(cache->nr_edges); | ||
242 | } | ||
243 | |||
244 | static struct backref_node *alloc_backref_node(struct backref_cache *cache) | ||
245 | { | ||
246 | struct backref_node *node; | ||
247 | |||
248 | node = kzalloc(sizeof(*node), GFP_NOFS); | ||
249 | if (node) { | ||
250 | INIT_LIST_HEAD(&node->list); | ||
251 | INIT_LIST_HEAD(&node->upper); | ||
252 | INIT_LIST_HEAD(&node->lower); | ||
253 | RB_CLEAR_NODE(&node->rb_node); | ||
254 | cache->nr_nodes++; | ||
255 | } | ||
256 | return node; | ||
257 | } | ||
258 | |||
259 | static void free_backref_node(struct backref_cache *cache, | ||
260 | struct backref_node *node) | ||
261 | { | ||
262 | if (node) { | ||
263 | cache->nr_nodes--; | ||
264 | kfree(node); | ||
265 | } | ||
266 | } | ||
267 | |||
268 | static struct backref_edge *alloc_backref_edge(struct backref_cache *cache) | ||
269 | { | ||
270 | struct backref_edge *edge; | ||
271 | |||
272 | edge = kzalloc(sizeof(*edge), GFP_NOFS); | ||
273 | if (edge) | ||
274 | cache->nr_edges++; | ||
275 | return edge; | ||
185 | } | 276 | } |
186 | 277 | ||
187 | static void backref_node_init(struct backref_node *node) | 278 | static void free_backref_edge(struct backref_cache *cache, |
279 | struct backref_edge *edge) | ||
188 | { | 280 | { |
189 | memset(node, 0, sizeof(*node)); | 281 | if (edge) { |
190 | INIT_LIST_HEAD(&node->upper); | 282 | cache->nr_edges--; |
191 | INIT_LIST_HEAD(&node->lower); | 283 | kfree(edge); |
192 | RB_CLEAR_NODE(&node->rb_node); | 284 | } |
193 | } | 285 | } |
194 | 286 | ||
195 | static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, | 287 | static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, |
@@ -250,6 +342,7 @@ static struct backref_node *walk_up_backref(struct backref_node *node, | |||
250 | edges[idx++] = edge; | 342 | edges[idx++] = edge; |
251 | node = edge->node[UPPER]; | 343 | node = edge->node[UPPER]; |
252 | } | 344 | } |
345 | BUG_ON(node->detached); | ||
253 | *index = idx; | 346 | *index = idx; |
254 | return node; | 347 | return node; |
255 | } | 348 | } |
@@ -281,13 +374,18 @@ static struct backref_node *walk_down_backref(struct backref_edge *edges[], | |||
281 | return NULL; | 374 | return NULL; |
282 | } | 375 | } |
283 | 376 | ||
377 | static void unlock_node_buffer(struct backref_node *node) | ||
378 | { | ||
379 | if (node->locked) { | ||
380 | btrfs_tree_unlock(node->eb); | ||
381 | node->locked = 0; | ||
382 | } | ||
383 | } | ||
384 | |||
284 | static void drop_node_buffer(struct backref_node *node) | 385 | static void drop_node_buffer(struct backref_node *node) |
285 | { | 386 | { |
286 | if (node->eb) { | 387 | if (node->eb) { |
287 | if (node->locked) { | 388 | unlock_node_buffer(node); |
288 | btrfs_tree_unlock(node->eb); | ||
289 | node->locked = 0; | ||
290 | } | ||
291 | free_extent_buffer(node->eb); | 389 | free_extent_buffer(node->eb); |
292 | node->eb = NULL; | 390 | node->eb = NULL; |
293 | } | 391 | } |
@@ -296,14 +394,14 @@ static void drop_node_buffer(struct backref_node *node) | |||
296 | static void drop_backref_node(struct backref_cache *tree, | 394 | static void drop_backref_node(struct backref_cache *tree, |
297 | struct backref_node *node) | 395 | struct backref_node *node) |
298 | { | 396 | { |
299 | BUG_ON(!node->lowest); | ||
300 | BUG_ON(!list_empty(&node->upper)); | 397 | BUG_ON(!list_empty(&node->upper)); |
301 | 398 | ||
302 | drop_node_buffer(node); | 399 | drop_node_buffer(node); |
400 | list_del(&node->list); | ||
303 | list_del(&node->lower); | 401 | list_del(&node->lower); |
304 | 402 | if (!RB_EMPTY_NODE(&node->rb_node)) | |
305 | rb_erase(&node->rb_node, &tree->rb_root); | 403 | rb_erase(&node->rb_node, &tree->rb_root); |
306 | kfree(node); | 404 | free_backref_node(tree, node); |
307 | } | 405 | } |
308 | 406 | ||
309 | /* | 407 | /* |
@@ -318,27 +416,121 @@ static void remove_backref_node(struct backref_cache *cache, | |||
318 | if (!node) | 416 | if (!node) |
319 | return; | 417 | return; |
320 | 418 | ||
321 | BUG_ON(!node->lowest); | 419 | BUG_ON(!node->lowest && !node->detached); |
322 | while (!list_empty(&node->upper)) { | 420 | while (!list_empty(&node->upper)) { |
323 | edge = list_entry(node->upper.next, struct backref_edge, | 421 | edge = list_entry(node->upper.next, struct backref_edge, |
324 | list[LOWER]); | 422 | list[LOWER]); |
325 | upper = edge->node[UPPER]; | 423 | upper = edge->node[UPPER]; |
326 | list_del(&edge->list[LOWER]); | 424 | list_del(&edge->list[LOWER]); |
327 | list_del(&edge->list[UPPER]); | 425 | list_del(&edge->list[UPPER]); |
328 | kfree(edge); | 426 | free_backref_edge(cache, edge); |
427 | |||
428 | if (RB_EMPTY_NODE(&upper->rb_node)) { | ||
429 | BUG_ON(!list_empty(&node->upper)); | ||
430 | drop_backref_node(cache, node); | ||
431 | node = upper; | ||
432 | node->lowest = 1; | ||
433 | continue; | ||
434 | } | ||
329 | /* | 435 | /* |
330 | * add the node to pending list if no other | 436 | * add the node to leaf node list if no other |
331 | * child block cached. | 437 | * child block cached. |
332 | */ | 438 | */ |
333 | if (list_empty(&upper->lower)) { | 439 | if (list_empty(&upper->lower)) { |
334 | list_add_tail(&upper->lower, | 440 | list_add_tail(&upper->lower, &cache->leaves); |
335 | &cache->pending[upper->level]); | ||
336 | upper->lowest = 1; | 441 | upper->lowest = 1; |
337 | } | 442 | } |
338 | } | 443 | } |
444 | |||
339 | drop_backref_node(cache, node); | 445 | drop_backref_node(cache, node); |
340 | } | 446 | } |
341 | 447 | ||
448 | static void update_backref_node(struct backref_cache *cache, | ||
449 | struct backref_node *node, u64 bytenr) | ||
450 | { | ||
451 | struct rb_node *rb_node; | ||
452 | rb_erase(&node->rb_node, &cache->rb_root); | ||
453 | node->bytenr = bytenr; | ||
454 | rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); | ||
455 | BUG_ON(rb_node); | ||
456 | } | ||
457 | |||
458 | /* | ||
459 | * update backref cache after a transaction commit | ||
460 | */ | ||
461 | static int update_backref_cache(struct btrfs_trans_handle *trans, | ||
462 | struct backref_cache *cache) | ||
463 | { | ||
464 | struct backref_node *node; | ||
465 | int level = 0; | ||
466 | |||
467 | if (cache->last_trans == 0) { | ||
468 | cache->last_trans = trans->transid; | ||
469 | return 0; | ||
470 | } | ||
471 | |||
472 | if (cache->last_trans == trans->transid) | ||
473 | return 0; | ||
474 | |||
475 | /* | ||
476 | * detached nodes are used to avoid unnecessary backref | ||
477 | * lookup. transaction commit changes the extent tree. | ||
478 | * so the detached nodes are no longer useful. | ||
479 | */ | ||
480 | while (!list_empty(&cache->detached)) { | ||
481 | node = list_entry(cache->detached.next, | ||
482 | struct backref_node, list); | ||
483 | remove_backref_node(cache, node); | ||
484 | } | ||
485 | |||
486 | while (!list_empty(&cache->changed)) { | ||
487 | node = list_entry(cache->changed.next, | ||
488 | struct backref_node, list); | ||
489 | list_del_init(&node->list); | ||
490 | BUG_ON(node->pending); | ||
491 | update_backref_node(cache, node, node->new_bytenr); | ||
492 | } | ||
493 | |||
494 | /* | ||
495 | * some nodes can be left in the pending list if there were | ||
496 | * errors during processing the pending nodes. | ||
497 | */ | ||
498 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { | ||
499 | list_for_each_entry(node, &cache->pending[level], list) { | ||
500 | BUG_ON(!node->pending); | ||
501 | if (node->bytenr == node->new_bytenr) | ||
502 | continue; | ||
503 | update_backref_node(cache, node, node->new_bytenr); | ||
504 | } | ||
505 | } | ||
506 | |||
507 | cache->last_trans = 0; | ||
508 | return 1; | ||
509 | } | ||
510 | |||
511 | static int should_ignore_root(struct btrfs_root *root) | ||
512 | { | ||
513 | struct btrfs_root *reloc_root; | ||
514 | |||
515 | if (!root->ref_cows) | ||
516 | return 0; | ||
517 | |||
518 | reloc_root = root->reloc_root; | ||
519 | if (!reloc_root) | ||
520 | return 0; | ||
521 | |||
522 | if (btrfs_root_last_snapshot(&reloc_root->root_item) == | ||
523 | root->fs_info->running_transaction->transid - 1) | ||
524 | return 0; | ||
525 | /* | ||
526 | * if there is reloc tree and it was created in previous | ||
527 | * transaction backref lookup can find the reloc tree, | ||
528 | * so backref node for the fs tree root is useless for | ||
529 | * relocation. | ||
530 | */ | ||
531 | return 1; | ||
532 | } | ||
533 | |||
342 | /* | 534 | /* |
343 | * find reloc tree by address of tree root | 535 | * find reloc tree by address of tree root |
344 | */ | 536 | */ |
@@ -453,11 +645,12 @@ int find_inline_backref(struct extent_buffer *leaf, int slot, | |||
453 | * for all upper level blocks that directly/indirectly reference the | 645 | * for all upper level blocks that directly/indirectly reference the |
454 | * block are also cached. | 646 | * block are also cached. |
455 | */ | 647 | */ |
456 | static struct backref_node *build_backref_tree(struct reloc_control *rc, | 648 | static noinline_for_stack |
457 | struct backref_cache *cache, | 649 | struct backref_node *build_backref_tree(struct reloc_control *rc, |
458 | struct btrfs_key *node_key, | 650 | struct btrfs_key *node_key, |
459 | int level, u64 bytenr) | 651 | int level, u64 bytenr) |
460 | { | 652 | { |
653 | struct backref_cache *cache = &rc->backref_cache; | ||
461 | struct btrfs_path *path1; | 654 | struct btrfs_path *path1; |
462 | struct btrfs_path *path2; | 655 | struct btrfs_path *path2; |
463 | struct extent_buffer *eb; | 656 | struct extent_buffer *eb; |
@@ -473,6 +666,8 @@ static struct backref_node *build_backref_tree(struct reloc_control *rc, | |||
473 | unsigned long end; | 666 | unsigned long end; |
474 | unsigned long ptr; | 667 | unsigned long ptr; |
475 | LIST_HEAD(list); | 668 | LIST_HEAD(list); |
669 | LIST_HEAD(useless); | ||
670 | int cowonly; | ||
476 | int ret; | 671 | int ret; |
477 | int err = 0; | 672 | int err = 0; |
478 | 673 | ||
@@ -483,15 +678,13 @@ static struct backref_node *build_backref_tree(struct reloc_control *rc, | |||
483 | goto out; | 678 | goto out; |
484 | } | 679 | } |
485 | 680 | ||
486 | node = kmalloc(sizeof(*node), GFP_NOFS); | 681 | node = alloc_backref_node(cache); |
487 | if (!node) { | 682 | if (!node) { |
488 | err = -ENOMEM; | 683 | err = -ENOMEM; |
489 | goto out; | 684 | goto out; |
490 | } | 685 | } |
491 | 686 | ||
492 | backref_node_init(node); | ||
493 | node->bytenr = bytenr; | 687 | node->bytenr = bytenr; |
494 | node->owner = 0; | ||
495 | node->level = level; | 688 | node->level = level; |
496 | node->lowest = 1; | 689 | node->lowest = 1; |
497 | cur = node; | 690 | cur = node; |
@@ -587,17 +780,20 @@ again: | |||
587 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | 780 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 |
588 | if (key.type == BTRFS_SHARED_BLOCK_REF_KEY || | 781 | if (key.type == BTRFS_SHARED_BLOCK_REF_KEY || |
589 | key.type == BTRFS_EXTENT_REF_V0_KEY) { | 782 | key.type == BTRFS_EXTENT_REF_V0_KEY) { |
590 | if (key.objectid == key.offset && | 783 | if (key.type == BTRFS_EXTENT_REF_V0_KEY) { |
591 | key.type == BTRFS_EXTENT_REF_V0_KEY) { | ||
592 | struct btrfs_extent_ref_v0 *ref0; | 784 | struct btrfs_extent_ref_v0 *ref0; |
593 | ref0 = btrfs_item_ptr(eb, path1->slots[0], | 785 | ref0 = btrfs_item_ptr(eb, path1->slots[0], |
594 | struct btrfs_extent_ref_v0); | 786 | struct btrfs_extent_ref_v0); |
595 | root = find_tree_root(rc, eb, ref0); | 787 | root = find_tree_root(rc, eb, ref0); |
596 | if (root) | 788 | if (!root->ref_cows) |
597 | cur->root = root; | 789 | cur->cowonly = 1; |
598 | else | 790 | if (key.objectid == key.offset) { |
599 | cur->old_root = 1; | 791 | if (root && !should_ignore_root(root)) |
600 | break; | 792 | cur->root = root; |
793 | else | ||
794 | list_add(&cur->list, &useless); | ||
795 | break; | ||
796 | } | ||
601 | } | 797 | } |
602 | #else | 798 | #else |
603 | BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); | 799 | BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); |
@@ -614,22 +810,20 @@ again: | |||
614 | break; | 810 | break; |
615 | } | 811 | } |
616 | 812 | ||
617 | edge = kzalloc(sizeof(*edge), GFP_NOFS); | 813 | edge = alloc_backref_edge(cache); |
618 | if (!edge) { | 814 | if (!edge) { |
619 | err = -ENOMEM; | 815 | err = -ENOMEM; |
620 | goto out; | 816 | goto out; |
621 | } | 817 | } |
622 | rb_node = tree_search(&cache->rb_root, key.offset); | 818 | rb_node = tree_search(&cache->rb_root, key.offset); |
623 | if (!rb_node) { | 819 | if (!rb_node) { |
624 | upper = kmalloc(sizeof(*upper), GFP_NOFS); | 820 | upper = alloc_backref_node(cache); |
625 | if (!upper) { | 821 | if (!upper) { |
626 | kfree(edge); | 822 | free_backref_edge(cache, edge); |
627 | err = -ENOMEM; | 823 | err = -ENOMEM; |
628 | goto out; | 824 | goto out; |
629 | } | 825 | } |
630 | backref_node_init(upper); | ||
631 | upper->bytenr = key.offset; | 826 | upper->bytenr = key.offset; |
632 | upper->owner = 0; | ||
633 | upper->level = cur->level + 1; | 827 | upper->level = cur->level + 1; |
634 | /* | 828 | /* |
635 | * backrefs for the upper level block isn't | 829 | * backrefs for the upper level block isn't |
@@ -639,11 +833,12 @@ again: | |||
639 | } else { | 833 | } else { |
640 | upper = rb_entry(rb_node, struct backref_node, | 834 | upper = rb_entry(rb_node, struct backref_node, |
641 | rb_node); | 835 | rb_node); |
836 | BUG_ON(!upper->checked); | ||
642 | INIT_LIST_HEAD(&edge->list[UPPER]); | 837 | INIT_LIST_HEAD(&edge->list[UPPER]); |
643 | } | 838 | } |
644 | list_add(&edge->list[LOWER], &cur->upper); | 839 | list_add_tail(&edge->list[LOWER], &cur->upper); |
645 | edge->node[UPPER] = upper; | ||
646 | edge->node[LOWER] = cur; | 840 | edge->node[LOWER] = cur; |
841 | edge->node[UPPER] = upper; | ||
647 | 842 | ||
648 | goto next; | 843 | goto next; |
649 | } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { | 844 | } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { |
@@ -657,11 +852,17 @@ again: | |||
657 | goto out; | 852 | goto out; |
658 | } | 853 | } |
659 | 854 | ||
855 | if (!root->ref_cows) | ||
856 | cur->cowonly = 1; | ||
857 | |||
660 | if (btrfs_root_level(&root->root_item) == cur->level) { | 858 | if (btrfs_root_level(&root->root_item) == cur->level) { |
661 | /* tree root */ | 859 | /* tree root */ |
662 | BUG_ON(btrfs_root_bytenr(&root->root_item) != | 860 | BUG_ON(btrfs_root_bytenr(&root->root_item) != |
663 | cur->bytenr); | 861 | cur->bytenr); |
664 | cur->root = root; | 862 | if (should_ignore_root(root)) |
863 | list_add(&cur->list, &useless); | ||
864 | else | ||
865 | cur->root = root; | ||
665 | break; | 866 | break; |
666 | } | 867 | } |
667 | 868 | ||
@@ -692,11 +893,14 @@ again: | |||
692 | if (!path2->nodes[level]) { | 893 | if (!path2->nodes[level]) { |
693 | BUG_ON(btrfs_root_bytenr(&root->root_item) != | 894 | BUG_ON(btrfs_root_bytenr(&root->root_item) != |
694 | lower->bytenr); | 895 | lower->bytenr); |
695 | lower->root = root; | 896 | if (should_ignore_root(root)) |
897 | list_add(&lower->list, &useless); | ||
898 | else | ||
899 | lower->root = root; | ||
696 | break; | 900 | break; |
697 | } | 901 | } |
698 | 902 | ||
699 | edge = kzalloc(sizeof(*edge), GFP_NOFS); | 903 | edge = alloc_backref_edge(cache); |
700 | if (!edge) { | 904 | if (!edge) { |
701 | err = -ENOMEM; | 905 | err = -ENOMEM; |
702 | goto out; | 906 | goto out; |
@@ -705,16 +909,17 @@ again: | |||
705 | eb = path2->nodes[level]; | 909 | eb = path2->nodes[level]; |
706 | rb_node = tree_search(&cache->rb_root, eb->start); | 910 | rb_node = tree_search(&cache->rb_root, eb->start); |
707 | if (!rb_node) { | 911 | if (!rb_node) { |
708 | upper = kmalloc(sizeof(*upper), GFP_NOFS); | 912 | upper = alloc_backref_node(cache); |
709 | if (!upper) { | 913 | if (!upper) { |
710 | kfree(edge); | 914 | free_backref_edge(cache, edge); |
711 | err = -ENOMEM; | 915 | err = -ENOMEM; |
712 | goto out; | 916 | goto out; |
713 | } | 917 | } |
714 | backref_node_init(upper); | ||
715 | upper->bytenr = eb->start; | 918 | upper->bytenr = eb->start; |
716 | upper->owner = btrfs_header_owner(eb); | 919 | upper->owner = btrfs_header_owner(eb); |
717 | upper->level = lower->level + 1; | 920 | upper->level = lower->level + 1; |
921 | if (!root->ref_cows) | ||
922 | upper->cowonly = 1; | ||
718 | 923 | ||
719 | /* | 924 | /* |
720 | * if we know the block isn't shared | 925 | * if we know the block isn't shared |
@@ -744,10 +949,12 @@ again: | |||
744 | rb_node); | 949 | rb_node); |
745 | BUG_ON(!upper->checked); | 950 | BUG_ON(!upper->checked); |
746 | INIT_LIST_HEAD(&edge->list[UPPER]); | 951 | INIT_LIST_HEAD(&edge->list[UPPER]); |
952 | if (!upper->owner) | ||
953 | upper->owner = btrfs_header_owner(eb); | ||
747 | } | 954 | } |
748 | list_add_tail(&edge->list[LOWER], &lower->upper); | 955 | list_add_tail(&edge->list[LOWER], &lower->upper); |
749 | edge->node[UPPER] = upper; | ||
750 | edge->node[LOWER] = lower; | 956 | edge->node[LOWER] = lower; |
957 | edge->node[UPPER] = upper; | ||
751 | 958 | ||
752 | if (rb_node) | 959 | if (rb_node) |
753 | break; | 960 | break; |
@@ -785,8 +992,13 @@ next: | |||
785 | * into the cache. | 992 | * into the cache. |
786 | */ | 993 | */ |
787 | BUG_ON(!node->checked); | 994 | BUG_ON(!node->checked); |
788 | rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); | 995 | cowonly = node->cowonly; |
789 | BUG_ON(rb_node); | 996 | if (!cowonly) { |
997 | rb_node = tree_insert(&cache->rb_root, node->bytenr, | ||
998 | &node->rb_node); | ||
999 | BUG_ON(rb_node); | ||
1000 | list_add_tail(&node->lower, &cache->leaves); | ||
1001 | } | ||
790 | 1002 | ||
791 | list_for_each_entry(edge, &node->upper, list[LOWER]) | 1003 | list_for_each_entry(edge, &node->upper, list[LOWER]) |
792 | list_add_tail(&edge->list[UPPER], &list); | 1004 | list_add_tail(&edge->list[UPPER], &list); |
@@ -795,6 +1007,14 @@ next: | |||
795 | edge = list_entry(list.next, struct backref_edge, list[UPPER]); | 1007 | edge = list_entry(list.next, struct backref_edge, list[UPPER]); |
796 | list_del_init(&edge->list[UPPER]); | 1008 | list_del_init(&edge->list[UPPER]); |
797 | upper = edge->node[UPPER]; | 1009 | upper = edge->node[UPPER]; |
1010 | if (upper->detached) { | ||
1011 | list_del(&edge->list[LOWER]); | ||
1012 | lower = edge->node[LOWER]; | ||
1013 | free_backref_edge(cache, edge); | ||
1014 | if (list_empty(&lower->upper)) | ||
1015 | list_add(&lower->list, &useless); | ||
1016 | continue; | ||
1017 | } | ||
798 | 1018 | ||
799 | if (!RB_EMPTY_NODE(&upper->rb_node)) { | 1019 | if (!RB_EMPTY_NODE(&upper->rb_node)) { |
800 | if (upper->lowest) { | 1020 | if (upper->lowest) { |
@@ -807,25 +1027,69 @@ next: | |||
807 | } | 1027 | } |
808 | 1028 | ||
809 | BUG_ON(!upper->checked); | 1029 | BUG_ON(!upper->checked); |
810 | rb_node = tree_insert(&cache->rb_root, upper->bytenr, | 1030 | BUG_ON(cowonly != upper->cowonly); |
811 | &upper->rb_node); | 1031 | if (!cowonly) { |
812 | BUG_ON(rb_node); | 1032 | rb_node = tree_insert(&cache->rb_root, upper->bytenr, |
1033 | &upper->rb_node); | ||
1034 | BUG_ON(rb_node); | ||
1035 | } | ||
813 | 1036 | ||
814 | list_add_tail(&edge->list[UPPER], &upper->lower); | 1037 | list_add_tail(&edge->list[UPPER], &upper->lower); |
815 | 1038 | ||
816 | list_for_each_entry(edge, &upper->upper, list[LOWER]) | 1039 | list_for_each_entry(edge, &upper->upper, list[LOWER]) |
817 | list_add_tail(&edge->list[UPPER], &list); | 1040 | list_add_tail(&edge->list[UPPER], &list); |
818 | } | 1041 | } |
1042 | /* | ||
1043 | * process useless backref nodes. backref nodes for tree leaves | ||
1044 | * are deleted from the cache. backref nodes for upper level | ||
1045 | * tree blocks are left in the cache to avoid unnecessary backref | ||
1046 | * lookup. | ||
1047 | */ | ||
1048 | while (!list_empty(&useless)) { | ||
1049 | upper = list_entry(useless.next, struct backref_node, list); | ||
1050 | list_del_init(&upper->list); | ||
1051 | BUG_ON(!list_empty(&upper->upper)); | ||
1052 | if (upper == node) | ||
1053 | node = NULL; | ||
1054 | if (upper->lowest) { | ||
1055 | list_del_init(&upper->lower); | ||
1056 | upper->lowest = 0; | ||
1057 | } | ||
1058 | while (!list_empty(&upper->lower)) { | ||
1059 | edge = list_entry(upper->lower.next, | ||
1060 | struct backref_edge, list[UPPER]); | ||
1061 | list_del(&edge->list[UPPER]); | ||
1062 | list_del(&edge->list[LOWER]); | ||
1063 | lower = edge->node[LOWER]; | ||
1064 | free_backref_edge(cache, edge); | ||
1065 | |||
1066 | if (list_empty(&lower->upper)) | ||
1067 | list_add(&lower->list, &useless); | ||
1068 | } | ||
1069 | __mark_block_processed(rc, upper); | ||
1070 | if (upper->level > 0) { | ||
1071 | list_add(&upper->list, &cache->detached); | ||
1072 | upper->detached = 1; | ||
1073 | } else { | ||
1074 | rb_erase(&upper->rb_node, &cache->rb_root); | ||
1075 | free_backref_node(cache, upper); | ||
1076 | } | ||
1077 | } | ||
819 | out: | 1078 | out: |
820 | btrfs_free_path(path1); | 1079 | btrfs_free_path(path1); |
821 | btrfs_free_path(path2); | 1080 | btrfs_free_path(path2); |
822 | if (err) { | 1081 | if (err) { |
823 | INIT_LIST_HEAD(&list); | 1082 | while (!list_empty(&useless)) { |
1083 | lower = list_entry(useless.next, | ||
1084 | struct backref_node, upper); | ||
1085 | list_del_init(&lower->upper); | ||
1086 | } | ||
824 | upper = node; | 1087 | upper = node; |
1088 | INIT_LIST_HEAD(&list); | ||
825 | while (upper) { | 1089 | while (upper) { |
826 | if (RB_EMPTY_NODE(&upper->rb_node)) { | 1090 | if (RB_EMPTY_NODE(&upper->rb_node)) { |
827 | list_splice_tail(&upper->upper, &list); | 1091 | list_splice_tail(&upper->upper, &list); |
828 | kfree(upper); | 1092 | free_backref_node(cache, upper); |
829 | } | 1093 | } |
830 | 1094 | ||
831 | if (list_empty(&list)) | 1095 | if (list_empty(&list)) |
@@ -833,15 +1097,104 @@ out: | |||
833 | 1097 | ||
834 | edge = list_entry(list.next, struct backref_edge, | 1098 | edge = list_entry(list.next, struct backref_edge, |
835 | list[LOWER]); | 1099 | list[LOWER]); |
1100 | list_del(&edge->list[LOWER]); | ||
836 | upper = edge->node[UPPER]; | 1101 | upper = edge->node[UPPER]; |
837 | kfree(edge); | 1102 | free_backref_edge(cache, edge); |
838 | } | 1103 | } |
839 | return ERR_PTR(err); | 1104 | return ERR_PTR(err); |
840 | } | 1105 | } |
1106 | BUG_ON(node && node->detached); | ||
841 | return node; | 1107 | return node; |
842 | } | 1108 | } |
843 | 1109 | ||
844 | /* | 1110 | /* |
1111 | * helper to add backref node for the newly created snapshot. | ||
1112 | * the backref node is created by cloning backref node that | ||
1113 | * corresponds to root of source tree | ||
1114 | */ | ||
1115 | static int clone_backref_node(struct btrfs_trans_handle *trans, | ||
1116 | struct reloc_control *rc, | ||
1117 | struct btrfs_root *src, | ||
1118 | struct btrfs_root *dest) | ||
1119 | { | ||
1120 | struct btrfs_root *reloc_root = src->reloc_root; | ||
1121 | struct backref_cache *cache = &rc->backref_cache; | ||
1122 | struct backref_node *node = NULL; | ||
1123 | struct backref_node *new_node; | ||
1124 | struct backref_edge *edge; | ||
1125 | struct backref_edge *new_edge; | ||
1126 | struct rb_node *rb_node; | ||
1127 | |||
1128 | if (cache->last_trans > 0) | ||
1129 | update_backref_cache(trans, cache); | ||
1130 | |||
1131 | rb_node = tree_search(&cache->rb_root, src->commit_root->start); | ||
1132 | if (rb_node) { | ||
1133 | node = rb_entry(rb_node, struct backref_node, rb_node); | ||
1134 | if (node->detached) | ||
1135 | node = NULL; | ||
1136 | else | ||
1137 | BUG_ON(node->new_bytenr != reloc_root->node->start); | ||
1138 | } | ||
1139 | |||
1140 | if (!node) { | ||
1141 | rb_node = tree_search(&cache->rb_root, | ||
1142 | reloc_root->commit_root->start); | ||
1143 | if (rb_node) { | ||
1144 | node = rb_entry(rb_node, struct backref_node, | ||
1145 | rb_node); | ||
1146 | BUG_ON(node->detached); | ||
1147 | } | ||
1148 | } | ||
1149 | |||
1150 | if (!node) | ||
1151 | return 0; | ||
1152 | |||
1153 | new_node = alloc_backref_node(cache); | ||
1154 | if (!new_node) | ||
1155 | return -ENOMEM; | ||
1156 | |||
1157 | new_node->bytenr = dest->node->start; | ||
1158 | new_node->level = node->level; | ||
1159 | new_node->lowest = node->lowest; | ||
1160 | new_node->root = dest; | ||
1161 | |||
1162 | if (!node->lowest) { | ||
1163 | list_for_each_entry(edge, &node->lower, list[UPPER]) { | ||
1164 | new_edge = alloc_backref_edge(cache); | ||
1165 | if (!new_edge) | ||
1166 | goto fail; | ||
1167 | |||
1168 | new_edge->node[UPPER] = new_node; | ||
1169 | new_edge->node[LOWER] = edge->node[LOWER]; | ||
1170 | list_add_tail(&new_edge->list[UPPER], | ||
1171 | &new_node->lower); | ||
1172 | } | ||
1173 | } | ||
1174 | |||
1175 | rb_node = tree_insert(&cache->rb_root, new_node->bytenr, | ||
1176 | &new_node->rb_node); | ||
1177 | BUG_ON(rb_node); | ||
1178 | |||
1179 | if (!new_node->lowest) { | ||
1180 | list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) { | ||
1181 | list_add_tail(&new_edge->list[LOWER], | ||
1182 | &new_edge->node[LOWER]->upper); | ||
1183 | } | ||
1184 | } | ||
1185 | return 0; | ||
1186 | fail: | ||
1187 | while (!list_empty(&new_node->lower)) { | ||
1188 | new_edge = list_entry(new_node->lower.next, | ||
1189 | struct backref_edge, list[UPPER]); | ||
1190 | list_del(&new_edge->list[UPPER]); | ||
1191 | free_backref_edge(cache, new_edge); | ||
1192 | } | ||
1193 | free_backref_node(cache, new_node); | ||
1194 | return -ENOMEM; | ||
1195 | } | ||
1196 | |||
1197 | /* | ||
845 | * helper to add 'address of tree root -> reloc tree' mapping | 1198 | * helper to add 'address of tree root -> reloc tree' mapping |
846 | */ | 1199 | */ |
847 | static int __add_reloc_root(struct btrfs_root *root) | 1200 | static int __add_reloc_root(struct btrfs_root *root) |
@@ -901,12 +1254,8 @@ static int __update_reloc_root(struct btrfs_root *root, int del) | |||
901 | return 0; | 1254 | return 0; |
902 | } | 1255 | } |
903 | 1256 | ||
904 | /* | 1257 | static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, |
905 | * create reloc tree for a given fs tree. reloc tree is just a | 1258 | struct btrfs_root *root, u64 objectid) |
906 | * snapshot of the fs tree with special root objectid. | ||
907 | */ | ||
908 | int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, | ||
909 | struct btrfs_root *root) | ||
910 | { | 1259 | { |
911 | struct btrfs_root *reloc_root; | 1260 | struct btrfs_root *reloc_root; |
912 | struct extent_buffer *eb; | 1261 | struct extent_buffer *eb; |
@@ -914,36 +1263,45 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, | |||
914 | struct btrfs_key root_key; | 1263 | struct btrfs_key root_key; |
915 | int ret; | 1264 | int ret; |
916 | 1265 | ||
917 | if (root->reloc_root) { | ||
918 | reloc_root = root->reloc_root; | ||
919 | reloc_root->last_trans = trans->transid; | ||
920 | return 0; | ||
921 | } | ||
922 | |||
923 | if (!root->fs_info->reloc_ctl || | ||
924 | !root->fs_info->reloc_ctl->create_reloc_root || | ||
925 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) | ||
926 | return 0; | ||
927 | |||
928 | root_item = kmalloc(sizeof(*root_item), GFP_NOFS); | 1266 | root_item = kmalloc(sizeof(*root_item), GFP_NOFS); |
929 | BUG_ON(!root_item); | 1267 | BUG_ON(!root_item); |
930 | 1268 | ||
931 | root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; | 1269 | root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; |
932 | root_key.type = BTRFS_ROOT_ITEM_KEY; | 1270 | root_key.type = BTRFS_ROOT_ITEM_KEY; |
933 | root_key.offset = root->root_key.objectid; | 1271 | root_key.offset = objectid; |
934 | 1272 | ||
935 | ret = btrfs_copy_root(trans, root, root->commit_root, &eb, | 1273 | if (root->root_key.objectid == objectid) { |
936 | BTRFS_TREE_RELOC_OBJECTID); | 1274 | /* called by btrfs_init_reloc_root */ |
937 | BUG_ON(ret); | 1275 | ret = btrfs_copy_root(trans, root, root->commit_root, &eb, |
1276 | BTRFS_TREE_RELOC_OBJECTID); | ||
1277 | BUG_ON(ret); | ||
1278 | |||
1279 | btrfs_set_root_last_snapshot(&root->root_item, | ||
1280 | trans->transid - 1); | ||
1281 | } else { | ||
1282 | /* | ||
1283 | * called by btrfs_reloc_post_snapshot_hook. | ||
1284 | * the source tree is a reloc tree, all tree blocks | ||
1285 | * modified after it was created have RELOC flag | ||
1286 | * set in their headers. so it's OK to not update | ||
1287 | * the 'last_snapshot'. | ||
1288 | */ | ||
1289 | ret = btrfs_copy_root(trans, root, root->node, &eb, | ||
1290 | BTRFS_TREE_RELOC_OBJECTID); | ||
1291 | BUG_ON(ret); | ||
1292 | } | ||
938 | 1293 | ||
939 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid - 1); | ||
940 | memcpy(root_item, &root->root_item, sizeof(*root_item)); | 1294 | memcpy(root_item, &root->root_item, sizeof(*root_item)); |
941 | btrfs_set_root_refs(root_item, 1); | ||
942 | btrfs_set_root_bytenr(root_item, eb->start); | 1295 | btrfs_set_root_bytenr(root_item, eb->start); |
943 | btrfs_set_root_level(root_item, btrfs_header_level(eb)); | 1296 | btrfs_set_root_level(root_item, btrfs_header_level(eb)); |
944 | btrfs_set_root_generation(root_item, trans->transid); | 1297 | btrfs_set_root_generation(root_item, trans->transid); |
945 | memset(&root_item->drop_progress, 0, sizeof(struct btrfs_disk_key)); | 1298 | |
946 | root_item->drop_level = 0; | 1299 | if (root->root_key.objectid == objectid) { |
1300 | btrfs_set_root_refs(root_item, 0); | ||
1301 | memset(&root_item->drop_progress, 0, | ||
1302 | sizeof(struct btrfs_disk_key)); | ||
1303 | root_item->drop_level = 0; | ||
1304 | } | ||
947 | 1305 | ||
948 | btrfs_tree_unlock(eb); | 1306 | btrfs_tree_unlock(eb); |
949 | free_extent_buffer(eb); | 1307 | free_extent_buffer(eb); |
@@ -957,6 +1315,37 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, | |||
957 | &root_key); | 1315 | &root_key); |
958 | BUG_ON(IS_ERR(reloc_root)); | 1316 | BUG_ON(IS_ERR(reloc_root)); |
959 | reloc_root->last_trans = trans->transid; | 1317 | reloc_root->last_trans = trans->transid; |
1318 | return reloc_root; | ||
1319 | } | ||
1320 | |||
1321 | /* | ||
1322 | * create reloc tree for a given fs tree. reloc tree is just a | ||
1323 | * snapshot of the fs tree with special root objectid. | ||
1324 | */ | ||
1325 | int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, | ||
1326 | struct btrfs_root *root) | ||
1327 | { | ||
1328 | struct btrfs_root *reloc_root; | ||
1329 | struct reloc_control *rc = root->fs_info->reloc_ctl; | ||
1330 | int clear_rsv = 0; | ||
1331 | |||
1332 | if (root->reloc_root) { | ||
1333 | reloc_root = root->reloc_root; | ||
1334 | reloc_root->last_trans = trans->transid; | ||
1335 | return 0; | ||
1336 | } | ||
1337 | |||
1338 | if (!rc || !rc->create_reloc_tree || | ||
1339 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) | ||
1340 | return 0; | ||
1341 | |||
1342 | if (!trans->block_rsv) { | ||
1343 | trans->block_rsv = rc->block_rsv; | ||
1344 | clear_rsv = 1; | ||
1345 | } | ||
1346 | reloc_root = create_reloc_root(trans, root, root->root_key.objectid); | ||
1347 | if (clear_rsv) | ||
1348 | trans->block_rsv = NULL; | ||
960 | 1349 | ||
961 | __add_reloc_root(reloc_root); | 1350 | __add_reloc_root(reloc_root); |
962 | root->reloc_root = reloc_root; | 1351 | root->reloc_root = reloc_root; |
@@ -980,7 +1369,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, | |||
980 | reloc_root = root->reloc_root; | 1369 | reloc_root = root->reloc_root; |
981 | root_item = &reloc_root->root_item; | 1370 | root_item = &reloc_root->root_item; |
982 | 1371 | ||
983 | if (btrfs_root_refs(root_item) == 0) { | 1372 | if (root->fs_info->reloc_ctl->merge_reloc_tree && |
1373 | btrfs_root_refs(root_item) == 0) { | ||
984 | root->reloc_root = NULL; | 1374 | root->reloc_root = NULL; |
985 | del = 1; | 1375 | del = 1; |
986 | } | 1376 | } |
@@ -1102,8 +1492,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr, | |||
1102 | goto out; | 1492 | goto out; |
1103 | } | 1493 | } |
1104 | 1494 | ||
1105 | if (new_bytenr) | 1495 | *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); |
1106 | *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
1107 | ret = 0; | 1496 | ret = 0; |
1108 | out: | 1497 | out: |
1109 | btrfs_free_path(path); | 1498 | btrfs_free_path(path); |
@@ -1114,19 +1503,18 @@ out: | |||
1114 | * update file extent items in the tree leaf to point to | 1503 | * update file extent items in the tree leaf to point to |
1115 | * the new locations. | 1504 | * the new locations. |
1116 | */ | 1505 | */ |
1117 | static int replace_file_extents(struct btrfs_trans_handle *trans, | 1506 | static noinline_for_stack |
1118 | struct reloc_control *rc, | 1507 | int replace_file_extents(struct btrfs_trans_handle *trans, |
1119 | struct btrfs_root *root, | 1508 | struct reloc_control *rc, |
1120 | struct extent_buffer *leaf, | 1509 | struct btrfs_root *root, |
1121 | struct list_head *inode_list) | 1510 | struct extent_buffer *leaf) |
1122 | { | 1511 | { |
1123 | struct btrfs_key key; | 1512 | struct btrfs_key key; |
1124 | struct btrfs_file_extent_item *fi; | 1513 | struct btrfs_file_extent_item *fi; |
1125 | struct inode *inode = NULL; | 1514 | struct inode *inode = NULL; |
1126 | struct inodevec *ivec = NULL; | ||
1127 | u64 parent; | 1515 | u64 parent; |
1128 | u64 bytenr; | 1516 | u64 bytenr; |
1129 | u64 new_bytenr; | 1517 | u64 new_bytenr = 0; |
1130 | u64 num_bytes; | 1518 | u64 num_bytes; |
1131 | u64 end; | 1519 | u64 end; |
1132 | u32 nritems; | 1520 | u32 nritems; |
@@ -1166,21 +1554,12 @@ static int replace_file_extents(struct btrfs_trans_handle *trans, | |||
1166 | * to complete and drop the extent cache | 1554 | * to complete and drop the extent cache |
1167 | */ | 1555 | */ |
1168 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { | 1556 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { |
1169 | if (!ivec || ivec->nr == INODEVEC_SIZE) { | ||
1170 | ivec = kmalloc(sizeof(*ivec), GFP_NOFS); | ||
1171 | BUG_ON(!ivec); | ||
1172 | ivec->nr = 0; | ||
1173 | list_add_tail(&ivec->list, inode_list); | ||
1174 | } | ||
1175 | if (first) { | 1557 | if (first) { |
1176 | inode = find_next_inode(root, key.objectid); | 1558 | inode = find_next_inode(root, key.objectid); |
1177 | if (inode) | ||
1178 | ivec->inode[ivec->nr++] = inode; | ||
1179 | first = 0; | 1559 | first = 0; |
1180 | } else if (inode && inode->i_ino < key.objectid) { | 1560 | } else if (inode && inode->i_ino < key.objectid) { |
1561 | btrfs_add_delayed_iput(inode); | ||
1181 | inode = find_next_inode(root, key.objectid); | 1562 | inode = find_next_inode(root, key.objectid); |
1182 | if (inode) | ||
1183 | ivec->inode[ivec->nr++] = inode; | ||
1184 | } | 1563 | } |
1185 | if (inode && inode->i_ino == key.objectid) { | 1564 | if (inode && inode->i_ino == key.objectid) { |
1186 | end = key.offset + | 1565 | end = key.offset + |
@@ -1204,8 +1583,10 @@ static int replace_file_extents(struct btrfs_trans_handle *trans, | |||
1204 | 1583 | ||
1205 | ret = get_new_location(rc->data_inode, &new_bytenr, | 1584 | ret = get_new_location(rc->data_inode, &new_bytenr, |
1206 | bytenr, num_bytes); | 1585 | bytenr, num_bytes); |
1207 | if (ret > 0) | 1586 | if (ret > 0) { |
1587 | WARN_ON(1); | ||
1208 | continue; | 1588 | continue; |
1589 | } | ||
1209 | BUG_ON(ret < 0); | 1590 | BUG_ON(ret < 0); |
1210 | 1591 | ||
1211 | btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr); | 1592 | btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr); |
@@ -1225,6 +1606,8 @@ static int replace_file_extents(struct btrfs_trans_handle *trans, | |||
1225 | } | 1606 | } |
1226 | if (dirty) | 1607 | if (dirty) |
1227 | btrfs_mark_buffer_dirty(leaf); | 1608 | btrfs_mark_buffer_dirty(leaf); |
1609 | if (inode) | ||
1610 | btrfs_add_delayed_iput(inode); | ||
1228 | return 0; | 1611 | return 0; |
1229 | } | 1612 | } |
1230 | 1613 | ||
@@ -1248,11 +1631,11 @@ int memcmp_node_keys(struct extent_buffer *eb, int slot, | |||
1248 | * if no block got replaced, 0 is returned. if there are other | 1631 | * if no block got replaced, 0 is returned. if there are other |
1249 | * errors, a negative error number is returned. | 1632 | * errors, a negative error number is returned. |
1250 | */ | 1633 | */ |
1251 | static int replace_path(struct btrfs_trans_handle *trans, | 1634 | static noinline_for_stack |
1252 | struct btrfs_root *dest, struct btrfs_root *src, | 1635 | int replace_path(struct btrfs_trans_handle *trans, |
1253 | struct btrfs_path *path, struct btrfs_key *next_key, | 1636 | struct btrfs_root *dest, struct btrfs_root *src, |
1254 | struct extent_buffer **leaf, | 1637 | struct btrfs_path *path, struct btrfs_key *next_key, |
1255 | int lowest_level, int max_level) | 1638 | int lowest_level, int max_level) |
1256 | { | 1639 | { |
1257 | struct extent_buffer *eb; | 1640 | struct extent_buffer *eb; |
1258 | struct extent_buffer *parent; | 1641 | struct extent_buffer *parent; |
@@ -1263,16 +1646,16 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
1263 | u64 new_ptr_gen; | 1646 | u64 new_ptr_gen; |
1264 | u64 last_snapshot; | 1647 | u64 last_snapshot; |
1265 | u32 blocksize; | 1648 | u32 blocksize; |
1649 | int cow = 0; | ||
1266 | int level; | 1650 | int level; |
1267 | int ret; | 1651 | int ret; |
1268 | int slot; | 1652 | int slot; |
1269 | 1653 | ||
1270 | BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); | 1654 | BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); |
1271 | BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID); | 1655 | BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID); |
1272 | BUG_ON(lowest_level > 1 && leaf); | ||
1273 | 1656 | ||
1274 | last_snapshot = btrfs_root_last_snapshot(&src->root_item); | 1657 | last_snapshot = btrfs_root_last_snapshot(&src->root_item); |
1275 | 1658 | again: | |
1276 | slot = path->slots[lowest_level]; | 1659 | slot = path->slots[lowest_level]; |
1277 | btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot); | 1660 | btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot); |
1278 | 1661 | ||
@@ -1286,8 +1669,10 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
1286 | return 0; | 1669 | return 0; |
1287 | } | 1670 | } |
1288 | 1671 | ||
1289 | ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb); | 1672 | if (cow) { |
1290 | BUG_ON(ret); | 1673 | ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb); |
1674 | BUG_ON(ret); | ||
1675 | } | ||
1291 | btrfs_set_lock_blocking(eb); | 1676 | btrfs_set_lock_blocking(eb); |
1292 | 1677 | ||
1293 | if (next_key) { | 1678 | if (next_key) { |
@@ -1331,7 +1716,7 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
1331 | 1716 | ||
1332 | if (new_bytenr == 0 || old_ptr_gen > last_snapshot || | 1717 | if (new_bytenr == 0 || old_ptr_gen > last_snapshot || |
1333 | memcmp_node_keys(parent, slot, path, level)) { | 1718 | memcmp_node_keys(parent, slot, path, level)) { |
1334 | if (level <= lowest_level && !leaf) { | 1719 | if (level <= lowest_level) { |
1335 | ret = 0; | 1720 | ret = 0; |
1336 | break; | 1721 | break; |
1337 | } | 1722 | } |
@@ -1339,16 +1724,12 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
1339 | eb = read_tree_block(dest, old_bytenr, blocksize, | 1724 | eb = read_tree_block(dest, old_bytenr, blocksize, |
1340 | old_ptr_gen); | 1725 | old_ptr_gen); |
1341 | btrfs_tree_lock(eb); | 1726 | btrfs_tree_lock(eb); |
1342 | ret = btrfs_cow_block(trans, dest, eb, parent, | 1727 | if (cow) { |
1343 | slot, &eb); | 1728 | ret = btrfs_cow_block(trans, dest, eb, parent, |
1344 | BUG_ON(ret); | 1729 | slot, &eb); |
1345 | btrfs_set_lock_blocking(eb); | 1730 | BUG_ON(ret); |
1346 | |||
1347 | if (level <= lowest_level) { | ||
1348 | *leaf = eb; | ||
1349 | ret = 0; | ||
1350 | break; | ||
1351 | } | 1731 | } |
1732 | btrfs_set_lock_blocking(eb); | ||
1352 | 1733 | ||
1353 | btrfs_tree_unlock(parent); | 1734 | btrfs_tree_unlock(parent); |
1354 | free_extent_buffer(parent); | 1735 | free_extent_buffer(parent); |
@@ -1357,6 +1738,13 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
1357 | continue; | 1738 | continue; |
1358 | } | 1739 | } |
1359 | 1740 | ||
1741 | if (!cow) { | ||
1742 | btrfs_tree_unlock(parent); | ||
1743 | free_extent_buffer(parent); | ||
1744 | cow = 1; | ||
1745 | goto again; | ||
1746 | } | ||
1747 | |||
1360 | btrfs_node_key_to_cpu(path->nodes[level], &key, | 1748 | btrfs_node_key_to_cpu(path->nodes[level], &key, |
1361 | path->slots[level]); | 1749 | path->slots[level]); |
1362 | btrfs_release_path(src, path); | 1750 | btrfs_release_path(src, path); |
@@ -1562,20 +1950,6 @@ static int invalidate_extent_cache(struct btrfs_root *root, | |||
1562 | return 0; | 1950 | return 0; |
1563 | } | 1951 | } |
1564 | 1952 | ||
1565 | static void put_inodes(struct list_head *list) | ||
1566 | { | ||
1567 | struct inodevec *ivec; | ||
1568 | while (!list_empty(list)) { | ||
1569 | ivec = list_entry(list->next, struct inodevec, list); | ||
1570 | list_del(&ivec->list); | ||
1571 | while (ivec->nr > 0) { | ||
1572 | ivec->nr--; | ||
1573 | iput(ivec->inode[ivec->nr]); | ||
1574 | } | ||
1575 | kfree(ivec); | ||
1576 | } | ||
1577 | } | ||
1578 | |||
1579 | static int find_next_key(struct btrfs_path *path, int level, | 1953 | static int find_next_key(struct btrfs_path *path, int level, |
1580 | struct btrfs_key *key) | 1954 | struct btrfs_key *key) |
1581 | 1955 | ||
@@ -1608,13 +1982,14 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
1608 | struct btrfs_root *reloc_root; | 1982 | struct btrfs_root *reloc_root; |
1609 | struct btrfs_root_item *root_item; | 1983 | struct btrfs_root_item *root_item; |
1610 | struct btrfs_path *path; | 1984 | struct btrfs_path *path; |
1611 | struct extent_buffer *leaf = NULL; | 1985 | struct extent_buffer *leaf; |
1612 | unsigned long nr; | 1986 | unsigned long nr; |
1613 | int level; | 1987 | int level; |
1614 | int max_level; | 1988 | int max_level; |
1615 | int replaced = 0; | 1989 | int replaced = 0; |
1616 | int ret; | 1990 | int ret; |
1617 | int err = 0; | 1991 | int err = 0; |
1992 | u32 min_reserved; | ||
1618 | 1993 | ||
1619 | path = btrfs_alloc_path(); | 1994 | path = btrfs_alloc_path(); |
1620 | if (!path) | 1995 | if (!path) |
@@ -1648,34 +2023,23 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
1648 | btrfs_unlock_up_safe(path, 0); | 2023 | btrfs_unlock_up_safe(path, 0); |
1649 | } | 2024 | } |
1650 | 2025 | ||
1651 | if (level == 0 && rc->stage == UPDATE_DATA_PTRS) { | 2026 | min_reserved = root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; |
1652 | trans = btrfs_start_transaction(root, 1); | 2027 | memset(&next_key, 0, sizeof(next_key)); |
1653 | 2028 | ||
1654 | leaf = path->nodes[0]; | 2029 | while (1) { |
1655 | btrfs_item_key_to_cpu(leaf, &key, 0); | 2030 | trans = btrfs_start_transaction(root, 0); |
1656 | btrfs_release_path(reloc_root, path); | 2031 | trans->block_rsv = rc->block_rsv; |
1657 | 2032 | ||
1658 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | 2033 | ret = btrfs_block_rsv_check(trans, root, rc->block_rsv, |
1659 | if (ret < 0) { | 2034 | min_reserved, 0); |
1660 | err = ret; | 2035 | if (ret) { |
1661 | goto out; | 2036 | BUG_ON(ret != -EAGAIN); |
2037 | ret = btrfs_commit_transaction(trans, root); | ||
2038 | BUG_ON(ret); | ||
2039 | continue; | ||
1662 | } | 2040 | } |
1663 | 2041 | ||
1664 | leaf = path->nodes[0]; | ||
1665 | btrfs_unlock_up_safe(path, 1); | ||
1666 | ret = replace_file_extents(trans, rc, root, leaf, | ||
1667 | &inode_list); | ||
1668 | if (ret < 0) | ||
1669 | err = ret; | ||
1670 | goto out; | ||
1671 | } | ||
1672 | |||
1673 | memset(&next_key, 0, sizeof(next_key)); | ||
1674 | |||
1675 | while (1) { | ||
1676 | leaf = NULL; | ||
1677 | replaced = 0; | 2042 | replaced = 0; |
1678 | trans = btrfs_start_transaction(root, 1); | ||
1679 | max_level = level; | 2043 | max_level = level; |
1680 | 2044 | ||
1681 | ret = walk_down_reloc_tree(reloc_root, path, &level); | 2045 | ret = walk_down_reloc_tree(reloc_root, path, &level); |
@@ -1689,14 +2053,9 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
1689 | if (!find_next_key(path, level, &key) && | 2053 | if (!find_next_key(path, level, &key) && |
1690 | btrfs_comp_cpu_keys(&next_key, &key) >= 0) { | 2054 | btrfs_comp_cpu_keys(&next_key, &key) >= 0) { |
1691 | ret = 0; | 2055 | ret = 0; |
1692 | } else if (level == 1 && rc->stage == UPDATE_DATA_PTRS) { | ||
1693 | ret = replace_path(trans, root, reloc_root, | ||
1694 | path, &next_key, &leaf, | ||
1695 | level, max_level); | ||
1696 | } else { | 2056 | } else { |
1697 | ret = replace_path(trans, root, reloc_root, | 2057 | ret = replace_path(trans, root, reloc_root, path, |
1698 | path, &next_key, NULL, | 2058 | &next_key, level, max_level); |
1699 | level, max_level); | ||
1700 | } | 2059 | } |
1701 | if (ret < 0) { | 2060 | if (ret < 0) { |
1702 | err = ret; | 2061 | err = ret; |
@@ -1708,16 +2067,6 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
1708 | btrfs_node_key_to_cpu(path->nodes[level], &key, | 2067 | btrfs_node_key_to_cpu(path->nodes[level], &key, |
1709 | path->slots[level]); | 2068 | path->slots[level]); |
1710 | replaced = 1; | 2069 | replaced = 1; |
1711 | } else if (leaf) { | ||
1712 | /* | ||
1713 | * no block got replaced, try replacing file extents | ||
1714 | */ | ||
1715 | btrfs_item_key_to_cpu(leaf, &key, 0); | ||
1716 | ret = replace_file_extents(trans, rc, root, leaf, | ||
1717 | &inode_list); | ||
1718 | btrfs_tree_unlock(leaf); | ||
1719 | free_extent_buffer(leaf); | ||
1720 | BUG_ON(ret < 0); | ||
1721 | } | 2070 | } |
1722 | 2071 | ||
1723 | ret = walk_up_reloc_tree(reloc_root, path, &level); | 2072 | ret = walk_up_reloc_tree(reloc_root, path, &level); |
@@ -1734,15 +2083,10 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
1734 | root_item->drop_level = level; | 2083 | root_item->drop_level = level; |
1735 | 2084 | ||
1736 | nr = trans->blocks_used; | 2085 | nr = trans->blocks_used; |
1737 | btrfs_end_transaction(trans, root); | 2086 | btrfs_end_transaction_throttle(trans, root); |
1738 | 2087 | ||
1739 | btrfs_btree_balance_dirty(root, nr); | 2088 | btrfs_btree_balance_dirty(root, nr); |
1740 | 2089 | ||
1741 | /* | ||
1742 | * put inodes outside transaction, otherwise we may deadlock. | ||
1743 | */ | ||
1744 | put_inodes(&inode_list); | ||
1745 | |||
1746 | if (replaced && rc->stage == UPDATE_DATA_PTRS) | 2090 | if (replaced && rc->stage == UPDATE_DATA_PTRS) |
1747 | invalidate_extent_cache(root, &key, &next_key); | 2091 | invalidate_extent_cache(root, &key, &next_key); |
1748 | } | 2092 | } |
@@ -1765,87 +2109,125 @@ out: | |||
1765 | sizeof(root_item->drop_progress)); | 2109 | sizeof(root_item->drop_progress)); |
1766 | root_item->drop_level = 0; | 2110 | root_item->drop_level = 0; |
1767 | btrfs_set_root_refs(root_item, 0); | 2111 | btrfs_set_root_refs(root_item, 0); |
2112 | btrfs_update_reloc_root(trans, root); | ||
1768 | } | 2113 | } |
1769 | 2114 | ||
1770 | nr = trans->blocks_used; | 2115 | nr = trans->blocks_used; |
1771 | btrfs_end_transaction(trans, root); | 2116 | btrfs_end_transaction_throttle(trans, root); |
1772 | 2117 | ||
1773 | btrfs_btree_balance_dirty(root, nr); | 2118 | btrfs_btree_balance_dirty(root, nr); |
1774 | 2119 | ||
1775 | put_inodes(&inode_list); | ||
1776 | |||
1777 | if (replaced && rc->stage == UPDATE_DATA_PTRS) | 2120 | if (replaced && rc->stage == UPDATE_DATA_PTRS) |
1778 | invalidate_extent_cache(root, &key, &next_key); | 2121 | invalidate_extent_cache(root, &key, &next_key); |
1779 | 2122 | ||
1780 | return err; | 2123 | return err; |
1781 | } | 2124 | } |
1782 | 2125 | ||
1783 | /* | 2126 | static noinline_for_stack |
1784 | * callback for the work threads. | 2127 | int prepare_to_merge(struct reloc_control *rc, int err) |
1785 | * this function merges reloc tree with corresponding fs tree, | ||
1786 | * and then drops the reloc tree. | ||
1787 | */ | ||
1788 | static void merge_func(struct btrfs_work *work) | ||
1789 | { | 2128 | { |
1790 | struct btrfs_trans_handle *trans; | 2129 | struct btrfs_root *root = rc->extent_root; |
1791 | struct btrfs_root *root; | ||
1792 | struct btrfs_root *reloc_root; | 2130 | struct btrfs_root *reloc_root; |
1793 | struct async_merge *async; | 2131 | struct btrfs_trans_handle *trans; |
2132 | LIST_HEAD(reloc_roots); | ||
2133 | u64 num_bytes = 0; | ||
2134 | int ret; | ||
2135 | int retries = 0; | ||
2136 | |||
2137 | mutex_lock(&root->fs_info->trans_mutex); | ||
2138 | rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; | ||
2139 | rc->merging_rsv_size += rc->nodes_relocated * 2; | ||
2140 | mutex_unlock(&root->fs_info->trans_mutex); | ||
2141 | again: | ||
2142 | if (!err) { | ||
2143 | num_bytes = rc->merging_rsv_size; | ||
2144 | ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv, | ||
2145 | num_bytes, &retries); | ||
2146 | if (ret) | ||
2147 | err = ret; | ||
2148 | } | ||
2149 | |||
2150 | trans = btrfs_join_transaction(rc->extent_root, 1); | ||
2151 | |||
2152 | if (!err) { | ||
2153 | if (num_bytes != rc->merging_rsv_size) { | ||
2154 | btrfs_end_transaction(trans, rc->extent_root); | ||
2155 | btrfs_block_rsv_release(rc->extent_root, | ||
2156 | rc->block_rsv, num_bytes); | ||
2157 | retries = 0; | ||
2158 | goto again; | ||
2159 | } | ||
2160 | } | ||
1794 | 2161 | ||
1795 | async = container_of(work, struct async_merge, work); | 2162 | rc->merge_reloc_tree = 1; |
1796 | reloc_root = async->root; | 2163 | |
2164 | while (!list_empty(&rc->reloc_roots)) { | ||
2165 | reloc_root = list_entry(rc->reloc_roots.next, | ||
2166 | struct btrfs_root, root_list); | ||
2167 | list_del_init(&reloc_root->root_list); | ||
1797 | 2168 | ||
1798 | if (btrfs_root_refs(&reloc_root->root_item) > 0) { | ||
1799 | root = read_fs_root(reloc_root->fs_info, | 2169 | root = read_fs_root(reloc_root->fs_info, |
1800 | reloc_root->root_key.offset); | 2170 | reloc_root->root_key.offset); |
1801 | BUG_ON(IS_ERR(root)); | 2171 | BUG_ON(IS_ERR(root)); |
1802 | BUG_ON(root->reloc_root != reloc_root); | 2172 | BUG_ON(root->reloc_root != reloc_root); |
1803 | 2173 | ||
1804 | merge_reloc_root(async->rc, root); | 2174 | /* |
1805 | 2175 | * set reference count to 1, so btrfs_recover_relocation | |
1806 | trans = btrfs_start_transaction(root, 1); | 2176 | * knows it should resumes merging |
2177 | */ | ||
2178 | if (!err) | ||
2179 | btrfs_set_root_refs(&reloc_root->root_item, 1); | ||
1807 | btrfs_update_reloc_root(trans, root); | 2180 | btrfs_update_reloc_root(trans, root); |
1808 | btrfs_end_transaction(trans, root); | ||
1809 | } | ||
1810 | 2181 | ||
1811 | btrfs_drop_snapshot(reloc_root, 0); | 2182 | list_add(&reloc_root->root_list, &reloc_roots); |
2183 | } | ||
1812 | 2184 | ||
1813 | if (atomic_dec_and_test(async->num_pending)) | 2185 | list_splice(&reloc_roots, &rc->reloc_roots); |
1814 | complete(async->done); | ||
1815 | 2186 | ||
1816 | kfree(async); | 2187 | if (!err) |
2188 | btrfs_commit_transaction(trans, rc->extent_root); | ||
2189 | else | ||
2190 | btrfs_end_transaction(trans, rc->extent_root); | ||
2191 | return err; | ||
1817 | } | 2192 | } |
1818 | 2193 | ||
1819 | static int merge_reloc_roots(struct reloc_control *rc) | 2194 | static noinline_for_stack |
2195 | int merge_reloc_roots(struct reloc_control *rc) | ||
1820 | { | 2196 | { |
1821 | struct async_merge *async; | ||
1822 | struct btrfs_root *root; | 2197 | struct btrfs_root *root; |
1823 | struct completion done; | 2198 | struct btrfs_root *reloc_root; |
1824 | atomic_t num_pending; | 2199 | LIST_HEAD(reloc_roots); |
2200 | int found = 0; | ||
2201 | int ret; | ||
2202 | again: | ||
2203 | root = rc->extent_root; | ||
2204 | mutex_lock(&root->fs_info->trans_mutex); | ||
2205 | list_splice_init(&rc->reloc_roots, &reloc_roots); | ||
2206 | mutex_unlock(&root->fs_info->trans_mutex); | ||
1825 | 2207 | ||
1826 | init_completion(&done); | 2208 | while (!list_empty(&reloc_roots)) { |
1827 | atomic_set(&num_pending, 1); | 2209 | found = 1; |
2210 | reloc_root = list_entry(reloc_roots.next, | ||
2211 | struct btrfs_root, root_list); | ||
1828 | 2212 | ||
1829 | while (!list_empty(&rc->reloc_roots)) { | 2213 | if (btrfs_root_refs(&reloc_root->root_item) > 0) { |
1830 | root = list_entry(rc->reloc_roots.next, | 2214 | root = read_fs_root(reloc_root->fs_info, |
1831 | struct btrfs_root, root_list); | 2215 | reloc_root->root_key.offset); |
1832 | list_del_init(&root->root_list); | 2216 | BUG_ON(IS_ERR(root)); |
2217 | BUG_ON(root->reloc_root != reloc_root); | ||
1833 | 2218 | ||
1834 | async = kmalloc(sizeof(*async), GFP_NOFS); | 2219 | ret = merge_reloc_root(rc, root); |
1835 | BUG_ON(!async); | 2220 | BUG_ON(ret); |
1836 | async->work.func = merge_func; | 2221 | } else { |
1837 | async->work.flags = 0; | 2222 | list_del_init(&reloc_root->root_list); |
1838 | async->rc = rc; | 2223 | } |
1839 | async->root = root; | 2224 | btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0); |
1840 | async->done = &done; | ||
1841 | async->num_pending = &num_pending; | ||
1842 | atomic_inc(&num_pending); | ||
1843 | btrfs_queue_worker(&rc->workers, &async->work); | ||
1844 | } | 2225 | } |
1845 | 2226 | ||
1846 | if (!atomic_dec_and_test(&num_pending)) | 2227 | if (found) { |
1847 | wait_for_completion(&done); | 2228 | found = 0; |
1848 | 2229 | goto again; | |
2230 | } | ||
1849 | BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); | 2231 | BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); |
1850 | return 0; | 2232 | return 0; |
1851 | } | 2233 | } |
@@ -1876,119 +2258,169 @@ static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans, | |||
1876 | return btrfs_record_root_in_trans(trans, root); | 2258 | return btrfs_record_root_in_trans(trans, root); |
1877 | } | 2259 | } |
1878 | 2260 | ||
1879 | /* | 2261 | static noinline_for_stack |
1880 | * select one tree from trees that references the block. | 2262 | struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, |
1881 | * for blocks in refernce counted trees, we preper reloc tree. | 2263 | struct reloc_control *rc, |
1882 | * if no reloc tree found and reloc_only is true, NULL is returned. | 2264 | struct backref_node *node, |
1883 | */ | 2265 | struct backref_edge *edges[], int *nr) |
1884 | static struct btrfs_root *__select_one_root(struct btrfs_trans_handle *trans, | ||
1885 | struct backref_node *node, | ||
1886 | struct backref_edge *edges[], | ||
1887 | int *nr, int reloc_only) | ||
1888 | { | 2266 | { |
1889 | struct backref_node *next; | 2267 | struct backref_node *next; |
1890 | struct btrfs_root *root; | 2268 | struct btrfs_root *root; |
1891 | int index; | 2269 | int index = 0; |
1892 | int loop = 0; | 2270 | |
1893 | again: | ||
1894 | index = 0; | ||
1895 | next = node; | 2271 | next = node; |
1896 | while (1) { | 2272 | while (1) { |
1897 | cond_resched(); | 2273 | cond_resched(); |
1898 | next = walk_up_backref(next, edges, &index); | 2274 | next = walk_up_backref(next, edges, &index); |
1899 | root = next->root; | 2275 | root = next->root; |
1900 | if (!root) { | 2276 | BUG_ON(!root); |
1901 | BUG_ON(!node->old_root); | 2277 | BUG_ON(!root->ref_cows); |
1902 | goto skip; | ||
1903 | } | ||
1904 | |||
1905 | /* no other choice for non-refernce counted tree */ | ||
1906 | if (!root->ref_cows) { | ||
1907 | BUG_ON(reloc_only); | ||
1908 | break; | ||
1909 | } | ||
1910 | 2278 | ||
1911 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { | 2279 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { |
1912 | record_reloc_root_in_trans(trans, root); | 2280 | record_reloc_root_in_trans(trans, root); |
1913 | break; | 2281 | break; |
1914 | } | 2282 | } |
1915 | 2283 | ||
1916 | if (loop) { | 2284 | btrfs_record_root_in_trans(trans, root); |
1917 | btrfs_record_root_in_trans(trans, root); | 2285 | root = root->reloc_root; |
2286 | |||
2287 | if (next->new_bytenr != root->node->start) { | ||
2288 | BUG_ON(next->new_bytenr); | ||
2289 | BUG_ON(!list_empty(&next->list)); | ||
2290 | next->new_bytenr = root->node->start; | ||
2291 | next->root = root; | ||
2292 | list_add_tail(&next->list, | ||
2293 | &rc->backref_cache.changed); | ||
2294 | __mark_block_processed(rc, next); | ||
1918 | break; | 2295 | break; |
1919 | } | 2296 | } |
1920 | 2297 | ||
1921 | if (reloc_only || next != node) { | 2298 | WARN_ON(1); |
1922 | if (!root->reloc_root) | ||
1923 | btrfs_record_root_in_trans(trans, root); | ||
1924 | root = root->reloc_root; | ||
1925 | /* | ||
1926 | * if the reloc tree was created in current | ||
1927 | * transation, there is no node in backref tree | ||
1928 | * corresponds to the root of the reloc tree. | ||
1929 | */ | ||
1930 | if (btrfs_root_last_snapshot(&root->root_item) == | ||
1931 | trans->transid - 1) | ||
1932 | break; | ||
1933 | } | ||
1934 | skip: | ||
1935 | root = NULL; | 2299 | root = NULL; |
1936 | next = walk_down_backref(edges, &index); | 2300 | next = walk_down_backref(edges, &index); |
1937 | if (!next || next->level <= node->level) | 2301 | if (!next || next->level <= node->level) |
1938 | break; | 2302 | break; |
1939 | } | 2303 | } |
2304 | if (!root) | ||
2305 | return NULL; | ||
1940 | 2306 | ||
1941 | if (!root && !loop && !reloc_only) { | 2307 | *nr = index; |
1942 | loop = 1; | 2308 | next = node; |
1943 | goto again; | 2309 | /* setup backref node path for btrfs_reloc_cow_block */ |
2310 | while (1) { | ||
2311 | rc->backref_cache.path[next->level] = next; | ||
2312 | if (--index < 0) | ||
2313 | break; | ||
2314 | next = edges[index]->node[UPPER]; | ||
1944 | } | 2315 | } |
1945 | |||
1946 | if (root) | ||
1947 | *nr = index; | ||
1948 | else | ||
1949 | *nr = 0; | ||
1950 | |||
1951 | return root; | 2316 | return root; |
1952 | } | 2317 | } |
1953 | 2318 | ||
2319 | /* | ||
2320 | * select a tree root for relocation. return NULL if the block | ||
2321 | * is reference counted. we should use do_relocation() in this | ||
2322 | * case. return a tree root pointer if the block isn't reference | ||
2323 | * counted. return -ENOENT if the block is root of reloc tree. | ||
2324 | */ | ||
1954 | static noinline_for_stack | 2325 | static noinline_for_stack |
1955 | struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, | 2326 | struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, |
1956 | struct backref_node *node) | 2327 | struct backref_node *node) |
1957 | { | 2328 | { |
2329 | struct backref_node *next; | ||
2330 | struct btrfs_root *root; | ||
2331 | struct btrfs_root *fs_root = NULL; | ||
1958 | struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; | 2332 | struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; |
1959 | int nr; | 2333 | int index = 0; |
1960 | return __select_one_root(trans, node, edges, &nr, 0); | 2334 | |
2335 | next = node; | ||
2336 | while (1) { | ||
2337 | cond_resched(); | ||
2338 | next = walk_up_backref(next, edges, &index); | ||
2339 | root = next->root; | ||
2340 | BUG_ON(!root); | ||
2341 | |||
2342 | /* no other choice for non-refernce counted tree */ | ||
2343 | if (!root->ref_cows) | ||
2344 | return root; | ||
2345 | |||
2346 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) | ||
2347 | fs_root = root; | ||
2348 | |||
2349 | if (next != node) | ||
2350 | return NULL; | ||
2351 | |||
2352 | next = walk_down_backref(edges, &index); | ||
2353 | if (!next || next->level <= node->level) | ||
2354 | break; | ||
2355 | } | ||
2356 | |||
2357 | if (!fs_root) | ||
2358 | return ERR_PTR(-ENOENT); | ||
2359 | return fs_root; | ||
1961 | } | 2360 | } |
1962 | 2361 | ||
1963 | static noinline_for_stack | 2362 | static noinline_for_stack |
1964 | struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, | 2363 | u64 calcu_metadata_size(struct reloc_control *rc, |
1965 | struct backref_node *node, | 2364 | struct backref_node *node, int reserve) |
1966 | struct backref_edge *edges[], int *nr) | ||
1967 | { | 2365 | { |
1968 | return __select_one_root(trans, node, edges, nr, 1); | 2366 | struct backref_node *next = node; |
2367 | struct backref_edge *edge; | ||
2368 | struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; | ||
2369 | u64 num_bytes = 0; | ||
2370 | int index = 0; | ||
2371 | |||
2372 | BUG_ON(reserve && node->processed); | ||
2373 | |||
2374 | while (next) { | ||
2375 | cond_resched(); | ||
2376 | while (1) { | ||
2377 | if (next->processed && (reserve || next != node)) | ||
2378 | break; | ||
2379 | |||
2380 | num_bytes += btrfs_level_size(rc->extent_root, | ||
2381 | next->level); | ||
2382 | |||
2383 | if (list_empty(&next->upper)) | ||
2384 | break; | ||
2385 | |||
2386 | edge = list_entry(next->upper.next, | ||
2387 | struct backref_edge, list[LOWER]); | ||
2388 | edges[index++] = edge; | ||
2389 | next = edge->node[UPPER]; | ||
2390 | } | ||
2391 | next = walk_down_backref(edges, &index); | ||
2392 | } | ||
2393 | return num_bytes; | ||
1969 | } | 2394 | } |
1970 | 2395 | ||
1971 | static void grab_path_buffers(struct btrfs_path *path, | 2396 | static int reserve_metadata_space(struct btrfs_trans_handle *trans, |
1972 | struct backref_node *node, | 2397 | struct reloc_control *rc, |
1973 | struct backref_edge *edges[], int nr) | 2398 | struct backref_node *node) |
1974 | { | 2399 | { |
1975 | int i = 0; | 2400 | struct btrfs_root *root = rc->extent_root; |
1976 | while (1) { | 2401 | u64 num_bytes; |
1977 | drop_node_buffer(node); | 2402 | int ret; |
1978 | node->eb = path->nodes[node->level]; | 2403 | |
1979 | BUG_ON(!node->eb); | 2404 | num_bytes = calcu_metadata_size(rc, node, 1) * 2; |
1980 | if (path->locks[node->level]) | ||
1981 | node->locked = 1; | ||
1982 | path->nodes[node->level] = NULL; | ||
1983 | path->locks[node->level] = 0; | ||
1984 | |||
1985 | if (i >= nr) | ||
1986 | break; | ||
1987 | 2405 | ||
1988 | edges[i]->blockptr = node->eb->start; | 2406 | trans->block_rsv = rc->block_rsv; |
1989 | node = edges[i]->node[UPPER]; | 2407 | ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes, |
1990 | i++; | 2408 | &rc->block_rsv_retries); |
2409 | if (ret) { | ||
2410 | if (ret == -EAGAIN) | ||
2411 | rc->commit_transaction = 1; | ||
2412 | return ret; | ||
1991 | } | 2413 | } |
2414 | |||
2415 | rc->block_rsv_retries = 0; | ||
2416 | return 0; | ||
2417 | } | ||
2418 | |||
2419 | static void release_metadata_space(struct reloc_control *rc, | ||
2420 | struct backref_node *node) | ||
2421 | { | ||
2422 | u64 num_bytes = calcu_metadata_size(rc, node, 0) * 2; | ||
2423 | btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, num_bytes); | ||
1992 | } | 2424 | } |
1993 | 2425 | ||
1994 | /* | 2426 | /* |
@@ -1999,6 +2431,7 @@ static void grab_path_buffers(struct btrfs_path *path, | |||
1999 | * in that case this function just updates pointers. | 2431 | * in that case this function just updates pointers. |
2000 | */ | 2432 | */ |
2001 | static int do_relocation(struct btrfs_trans_handle *trans, | 2433 | static int do_relocation(struct btrfs_trans_handle *trans, |
2434 | struct reloc_control *rc, | ||
2002 | struct backref_node *node, | 2435 | struct backref_node *node, |
2003 | struct btrfs_key *key, | 2436 | struct btrfs_key *key, |
2004 | struct btrfs_path *path, int lowest) | 2437 | struct btrfs_path *path, int lowest) |
@@ -2019,18 +2452,25 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2019 | BUG_ON(lowest && node->eb); | 2452 | BUG_ON(lowest && node->eb); |
2020 | 2453 | ||
2021 | path->lowest_level = node->level + 1; | 2454 | path->lowest_level = node->level + 1; |
2455 | rc->backref_cache.path[node->level] = node; | ||
2022 | list_for_each_entry(edge, &node->upper, list[LOWER]) { | 2456 | list_for_each_entry(edge, &node->upper, list[LOWER]) { |
2023 | cond_resched(); | 2457 | cond_resched(); |
2024 | if (node->eb && node->eb->start == edge->blockptr) | ||
2025 | continue; | ||
2026 | 2458 | ||
2027 | upper = edge->node[UPPER]; | 2459 | upper = edge->node[UPPER]; |
2028 | root = select_reloc_root(trans, upper, edges, &nr); | 2460 | root = select_reloc_root(trans, rc, upper, edges, &nr); |
2029 | if (!root) | 2461 | BUG_ON(!root); |
2030 | continue; | 2462 | |
2031 | 2463 | if (upper->eb && !upper->locked) { | |
2032 | if (upper->eb && !upper->locked) | 2464 | if (!lowest) { |
2465 | ret = btrfs_bin_search(upper->eb, key, | ||
2466 | upper->level, &slot); | ||
2467 | BUG_ON(ret); | ||
2468 | bytenr = btrfs_node_blockptr(upper->eb, slot); | ||
2469 | if (node->eb->start == bytenr) | ||
2470 | goto next; | ||
2471 | } | ||
2033 | drop_node_buffer(upper); | 2472 | drop_node_buffer(upper); |
2473 | } | ||
2034 | 2474 | ||
2035 | if (!upper->eb) { | 2475 | if (!upper->eb) { |
2036 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); | 2476 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); |
@@ -2040,11 +2480,17 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2040 | } | 2480 | } |
2041 | BUG_ON(ret > 0); | 2481 | BUG_ON(ret > 0); |
2042 | 2482 | ||
2043 | slot = path->slots[upper->level]; | 2483 | if (!upper->eb) { |
2484 | upper->eb = path->nodes[upper->level]; | ||
2485 | path->nodes[upper->level] = NULL; | ||
2486 | } else { | ||
2487 | BUG_ON(upper->eb != path->nodes[upper->level]); | ||
2488 | } | ||
2044 | 2489 | ||
2045 | btrfs_unlock_up_safe(path, upper->level + 1); | 2490 | upper->locked = 1; |
2046 | grab_path_buffers(path, upper, edges, nr); | 2491 | path->locks[upper->level] = 0; |
2047 | 2492 | ||
2493 | slot = path->slots[upper->level]; | ||
2048 | btrfs_release_path(NULL, path); | 2494 | btrfs_release_path(NULL, path); |
2049 | } else { | 2495 | } else { |
2050 | ret = btrfs_bin_search(upper->eb, key, upper->level, | 2496 | ret = btrfs_bin_search(upper->eb, key, upper->level, |
@@ -2053,14 +2499,11 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2053 | } | 2499 | } |
2054 | 2500 | ||
2055 | bytenr = btrfs_node_blockptr(upper->eb, slot); | 2501 | bytenr = btrfs_node_blockptr(upper->eb, slot); |
2056 | if (!lowest) { | 2502 | if (lowest) { |
2057 | if (node->eb->start == bytenr) { | 2503 | BUG_ON(bytenr != node->bytenr); |
2058 | btrfs_tree_unlock(upper->eb); | ||
2059 | upper->locked = 0; | ||
2060 | continue; | ||
2061 | } | ||
2062 | } else { | 2504 | } else { |
2063 | BUG_ON(node->bytenr != bytenr); | 2505 | if (node->eb->start == bytenr) |
2506 | goto next; | ||
2064 | } | 2507 | } |
2065 | 2508 | ||
2066 | blocksize = btrfs_level_size(root, node->level); | 2509 | blocksize = btrfs_level_size(root, node->level); |
@@ -2072,13 +2515,13 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2072 | if (!node->eb) { | 2515 | if (!node->eb) { |
2073 | ret = btrfs_cow_block(trans, root, eb, upper->eb, | 2516 | ret = btrfs_cow_block(trans, root, eb, upper->eb, |
2074 | slot, &eb); | 2517 | slot, &eb); |
2518 | btrfs_tree_unlock(eb); | ||
2519 | free_extent_buffer(eb); | ||
2075 | if (ret < 0) { | 2520 | if (ret < 0) { |
2076 | err = ret; | 2521 | err = ret; |
2077 | break; | 2522 | goto next; |
2078 | } | 2523 | } |
2079 | btrfs_set_lock_blocking(eb); | 2524 | BUG_ON(node->eb != eb); |
2080 | node->eb = eb; | ||
2081 | node->locked = 1; | ||
2082 | } else { | 2525 | } else { |
2083 | btrfs_set_node_blockptr(upper->eb, slot, | 2526 | btrfs_set_node_blockptr(upper->eb, slot, |
2084 | node->eb->start); | 2527 | node->eb->start); |
@@ -2096,67 +2539,80 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
2096 | ret = btrfs_drop_subtree(trans, root, eb, upper->eb); | 2539 | ret = btrfs_drop_subtree(trans, root, eb, upper->eb); |
2097 | BUG_ON(ret); | 2540 | BUG_ON(ret); |
2098 | } | 2541 | } |
2099 | if (!lowest) { | 2542 | next: |
2100 | btrfs_tree_unlock(upper->eb); | 2543 | if (!upper->pending) |
2101 | upper->locked = 0; | 2544 | drop_node_buffer(upper); |
2102 | } | 2545 | else |
2546 | unlock_node_buffer(upper); | ||
2547 | if (err) | ||
2548 | break; | ||
2103 | } | 2549 | } |
2550 | |||
2551 | if (!err && node->pending) { | ||
2552 | drop_node_buffer(node); | ||
2553 | list_move_tail(&node->list, &rc->backref_cache.changed); | ||
2554 | node->pending = 0; | ||
2555 | } | ||
2556 | |||
2104 | path->lowest_level = 0; | 2557 | path->lowest_level = 0; |
2558 | BUG_ON(err == -ENOSPC); | ||
2105 | return err; | 2559 | return err; |
2106 | } | 2560 | } |
2107 | 2561 | ||
2108 | static int link_to_upper(struct btrfs_trans_handle *trans, | 2562 | static int link_to_upper(struct btrfs_trans_handle *trans, |
2563 | struct reloc_control *rc, | ||
2109 | struct backref_node *node, | 2564 | struct backref_node *node, |
2110 | struct btrfs_path *path) | 2565 | struct btrfs_path *path) |
2111 | { | 2566 | { |
2112 | struct btrfs_key key; | 2567 | struct btrfs_key key; |
2113 | if (!node->eb || list_empty(&node->upper)) | ||
2114 | return 0; | ||
2115 | 2568 | ||
2116 | btrfs_node_key_to_cpu(node->eb, &key, 0); | 2569 | btrfs_node_key_to_cpu(node->eb, &key, 0); |
2117 | return do_relocation(trans, node, &key, path, 0); | 2570 | return do_relocation(trans, rc, node, &key, path, 0); |
2118 | } | 2571 | } |
2119 | 2572 | ||
2120 | static int finish_pending_nodes(struct btrfs_trans_handle *trans, | 2573 | static int finish_pending_nodes(struct btrfs_trans_handle *trans, |
2121 | struct backref_cache *cache, | 2574 | struct reloc_control *rc, |
2122 | struct btrfs_path *path) | 2575 | struct btrfs_path *path, int err) |
2123 | { | 2576 | { |
2577 | LIST_HEAD(list); | ||
2578 | struct backref_cache *cache = &rc->backref_cache; | ||
2124 | struct backref_node *node; | 2579 | struct backref_node *node; |
2125 | int level; | 2580 | int level; |
2126 | int ret; | 2581 | int ret; |
2127 | int err = 0; | ||
2128 | 2582 | ||
2129 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { | 2583 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { |
2130 | while (!list_empty(&cache->pending[level])) { | 2584 | while (!list_empty(&cache->pending[level])) { |
2131 | node = list_entry(cache->pending[level].next, | 2585 | node = list_entry(cache->pending[level].next, |
2132 | struct backref_node, lower); | 2586 | struct backref_node, list); |
2133 | BUG_ON(node->level != level); | 2587 | list_move_tail(&node->list, &list); |
2588 | BUG_ON(!node->pending); | ||
2134 | 2589 | ||
2135 | ret = link_to_upper(trans, node, path); | 2590 | if (!err) { |
2136 | if (ret < 0) | 2591 | ret = link_to_upper(trans, rc, node, path); |
2137 | err = ret; | 2592 | if (ret < 0) |
2138 | /* | 2593 | err = ret; |
2139 | * this remove the node from the pending list and | 2594 | } |
2140 | * may add some other nodes to the level + 1 | ||
2141 | * pending list | ||
2142 | */ | ||
2143 | remove_backref_node(cache, node); | ||
2144 | } | 2595 | } |
2596 | list_splice_init(&list, &cache->pending[level]); | ||
2145 | } | 2597 | } |
2146 | BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root)); | ||
2147 | return err; | 2598 | return err; |
2148 | } | 2599 | } |
2149 | 2600 | ||
2150 | static void mark_block_processed(struct reloc_control *rc, | 2601 | static void mark_block_processed(struct reloc_control *rc, |
2151 | struct backref_node *node) | 2602 | u64 bytenr, u32 blocksize) |
2603 | { | ||
2604 | set_extent_bits(&rc->processed_blocks, bytenr, bytenr + blocksize - 1, | ||
2605 | EXTENT_DIRTY, GFP_NOFS); | ||
2606 | } | ||
2607 | |||
2608 | static void __mark_block_processed(struct reloc_control *rc, | ||
2609 | struct backref_node *node) | ||
2152 | { | 2610 | { |
2153 | u32 blocksize; | 2611 | u32 blocksize; |
2154 | if (node->level == 0 || | 2612 | if (node->level == 0 || |
2155 | in_block_group(node->bytenr, rc->block_group)) { | 2613 | in_block_group(node->bytenr, rc->block_group)) { |
2156 | blocksize = btrfs_level_size(rc->extent_root, node->level); | 2614 | blocksize = btrfs_level_size(rc->extent_root, node->level); |
2157 | set_extent_bits(&rc->processed_blocks, node->bytenr, | 2615 | mark_block_processed(rc, node->bytenr, blocksize); |
2158 | node->bytenr + blocksize - 1, EXTENT_DIRTY, | ||
2159 | GFP_NOFS); | ||
2160 | } | 2616 | } |
2161 | node->processed = 1; | 2617 | node->processed = 1; |
2162 | } | 2618 | } |
@@ -2179,7 +2635,7 @@ static void update_processed_blocks(struct reloc_control *rc, | |||
2179 | if (next->processed) | 2635 | if (next->processed) |
2180 | break; | 2636 | break; |
2181 | 2637 | ||
2182 | mark_block_processed(rc, next); | 2638 | __mark_block_processed(rc, next); |
2183 | 2639 | ||
2184 | if (list_empty(&next->upper)) | 2640 | if (list_empty(&next->upper)) |
2185 | break; | 2641 | break; |
@@ -2202,138 +2658,6 @@ static int tree_block_processed(u64 bytenr, u32 blocksize, | |||
2202 | return 0; | 2658 | return 0; |
2203 | } | 2659 | } |
2204 | 2660 | ||
2205 | /* | ||
2206 | * check if there are any file extent pointers in the leaf point to | ||
2207 | * data require processing | ||
2208 | */ | ||
2209 | static int check_file_extents(struct reloc_control *rc, | ||
2210 | u64 bytenr, u32 blocksize, u64 ptr_gen) | ||
2211 | { | ||
2212 | struct btrfs_key found_key; | ||
2213 | struct btrfs_file_extent_item *fi; | ||
2214 | struct extent_buffer *leaf; | ||
2215 | u32 nritems; | ||
2216 | int i; | ||
2217 | int ret = 0; | ||
2218 | |||
2219 | leaf = read_tree_block(rc->extent_root, bytenr, blocksize, ptr_gen); | ||
2220 | |||
2221 | nritems = btrfs_header_nritems(leaf); | ||
2222 | for (i = 0; i < nritems; i++) { | ||
2223 | cond_resched(); | ||
2224 | btrfs_item_key_to_cpu(leaf, &found_key, i); | ||
2225 | if (found_key.type != BTRFS_EXTENT_DATA_KEY) | ||
2226 | continue; | ||
2227 | fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); | ||
2228 | if (btrfs_file_extent_type(leaf, fi) == | ||
2229 | BTRFS_FILE_EXTENT_INLINE) | ||
2230 | continue; | ||
2231 | bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
2232 | if (bytenr == 0) | ||
2233 | continue; | ||
2234 | if (in_block_group(bytenr, rc->block_group)) { | ||
2235 | ret = 1; | ||
2236 | break; | ||
2237 | } | ||
2238 | } | ||
2239 | free_extent_buffer(leaf); | ||
2240 | return ret; | ||
2241 | } | ||
2242 | |||
2243 | /* | ||
2244 | * scan child blocks of a given block to find blocks require processing | ||
2245 | */ | ||
2246 | static int add_child_blocks(struct btrfs_trans_handle *trans, | ||
2247 | struct reloc_control *rc, | ||
2248 | struct backref_node *node, | ||
2249 | struct rb_root *blocks) | ||
2250 | { | ||
2251 | struct tree_block *block; | ||
2252 | struct rb_node *rb_node; | ||
2253 | u64 bytenr; | ||
2254 | u64 ptr_gen; | ||
2255 | u32 blocksize; | ||
2256 | u32 nritems; | ||
2257 | int i; | ||
2258 | int err = 0; | ||
2259 | |||
2260 | nritems = btrfs_header_nritems(node->eb); | ||
2261 | blocksize = btrfs_level_size(rc->extent_root, node->level - 1); | ||
2262 | for (i = 0; i < nritems; i++) { | ||
2263 | cond_resched(); | ||
2264 | bytenr = btrfs_node_blockptr(node->eb, i); | ||
2265 | ptr_gen = btrfs_node_ptr_generation(node->eb, i); | ||
2266 | if (ptr_gen == trans->transid) | ||
2267 | continue; | ||
2268 | if (!in_block_group(bytenr, rc->block_group) && | ||
2269 | (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS)) | ||
2270 | continue; | ||
2271 | if (tree_block_processed(bytenr, blocksize, rc)) | ||
2272 | continue; | ||
2273 | |||
2274 | readahead_tree_block(rc->extent_root, | ||
2275 | bytenr, blocksize, ptr_gen); | ||
2276 | } | ||
2277 | |||
2278 | for (i = 0; i < nritems; i++) { | ||
2279 | cond_resched(); | ||
2280 | bytenr = btrfs_node_blockptr(node->eb, i); | ||
2281 | ptr_gen = btrfs_node_ptr_generation(node->eb, i); | ||
2282 | if (ptr_gen == trans->transid) | ||
2283 | continue; | ||
2284 | if (!in_block_group(bytenr, rc->block_group) && | ||
2285 | (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS)) | ||
2286 | continue; | ||
2287 | if (tree_block_processed(bytenr, blocksize, rc)) | ||
2288 | continue; | ||
2289 | if (!in_block_group(bytenr, rc->block_group) && | ||
2290 | !check_file_extents(rc, bytenr, blocksize, ptr_gen)) | ||
2291 | continue; | ||
2292 | |||
2293 | block = kmalloc(sizeof(*block), GFP_NOFS); | ||
2294 | if (!block) { | ||
2295 | err = -ENOMEM; | ||
2296 | break; | ||
2297 | } | ||
2298 | block->bytenr = bytenr; | ||
2299 | btrfs_node_key_to_cpu(node->eb, &block->key, i); | ||
2300 | block->level = node->level - 1; | ||
2301 | block->key_ready = 1; | ||
2302 | rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); | ||
2303 | BUG_ON(rb_node); | ||
2304 | } | ||
2305 | if (err) | ||
2306 | free_block_list(blocks); | ||
2307 | return err; | ||
2308 | } | ||
2309 | |||
2310 | /* | ||
2311 | * find adjacent blocks require processing | ||
2312 | */ | ||
2313 | static noinline_for_stack | ||
2314 | int add_adjacent_blocks(struct btrfs_trans_handle *trans, | ||
2315 | struct reloc_control *rc, | ||
2316 | struct backref_cache *cache, | ||
2317 | struct rb_root *blocks, int level, | ||
2318 | struct backref_node **upper) | ||
2319 | { | ||
2320 | struct backref_node *node; | ||
2321 | int ret = 0; | ||
2322 | |||
2323 | WARN_ON(!list_empty(&cache->pending[level])); | ||
2324 | |||
2325 | if (list_empty(&cache->pending[level + 1])) | ||
2326 | return 1; | ||
2327 | |||
2328 | node = list_entry(cache->pending[level + 1].next, | ||
2329 | struct backref_node, lower); | ||
2330 | if (node->eb) | ||
2331 | ret = add_child_blocks(trans, rc, node, blocks); | ||
2332 | |||
2333 | *upper = node; | ||
2334 | return ret; | ||
2335 | } | ||
2336 | |||
2337 | static int get_tree_block_key(struct reloc_control *rc, | 2661 | static int get_tree_block_key(struct reloc_control *rc, |
2338 | struct tree_block *block) | 2662 | struct tree_block *block) |
2339 | { | 2663 | { |
@@ -2371,40 +2695,53 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans, | |||
2371 | struct btrfs_path *path) | 2695 | struct btrfs_path *path) |
2372 | { | 2696 | { |
2373 | struct btrfs_root *root; | 2697 | struct btrfs_root *root; |
2374 | int ret; | 2698 | int release = 0; |
2699 | int ret = 0; | ||
2375 | 2700 | ||
2701 | if (!node) | ||
2702 | return 0; | ||
2703 | |||
2704 | BUG_ON(node->processed); | ||
2376 | root = select_one_root(trans, node); | 2705 | root = select_one_root(trans, node); |
2377 | if (unlikely(!root)) { | 2706 | if (root == ERR_PTR(-ENOENT)) { |
2378 | rc->found_old_snapshot = 1; | ||
2379 | update_processed_blocks(rc, node); | 2707 | update_processed_blocks(rc, node); |
2380 | return 0; | 2708 | goto out; |
2381 | } | 2709 | } |
2382 | 2710 | ||
2383 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { | 2711 | if (!root || root->ref_cows) { |
2384 | ret = do_relocation(trans, node, key, path, 1); | 2712 | ret = reserve_metadata_space(trans, rc, node); |
2385 | if (ret < 0) | 2713 | if (ret) |
2386 | goto out; | ||
2387 | if (node->level == 0 && rc->stage == UPDATE_DATA_PTRS) { | ||
2388 | ret = replace_file_extents(trans, rc, root, | ||
2389 | node->eb, NULL); | ||
2390 | if (ret < 0) | ||
2391 | goto out; | ||
2392 | } | ||
2393 | drop_node_buffer(node); | ||
2394 | } else if (!root->ref_cows) { | ||
2395 | path->lowest_level = node->level; | ||
2396 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); | ||
2397 | btrfs_release_path(root, path); | ||
2398 | if (ret < 0) | ||
2399 | goto out; | 2714 | goto out; |
2400 | } else if (root != node->root) { | 2715 | release = 1; |
2401 | WARN_ON(node->level > 0 || rc->stage != UPDATE_DATA_PTRS); | ||
2402 | } | 2716 | } |
2403 | 2717 | ||
2404 | update_processed_blocks(rc, node); | 2718 | if (root) { |
2405 | ret = 0; | 2719 | if (root->ref_cows) { |
2720 | BUG_ON(node->new_bytenr); | ||
2721 | BUG_ON(!list_empty(&node->list)); | ||
2722 | btrfs_record_root_in_trans(trans, root); | ||
2723 | root = root->reloc_root; | ||
2724 | node->new_bytenr = root->node->start; | ||
2725 | node->root = root; | ||
2726 | list_add_tail(&node->list, &rc->backref_cache.changed); | ||
2727 | } else { | ||
2728 | path->lowest_level = node->level; | ||
2729 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); | ||
2730 | btrfs_release_path(root, path); | ||
2731 | if (ret > 0) | ||
2732 | ret = 0; | ||
2733 | } | ||
2734 | if (!ret) | ||
2735 | update_processed_blocks(rc, node); | ||
2736 | } else { | ||
2737 | ret = do_relocation(trans, rc, node, key, path, 1); | ||
2738 | } | ||
2406 | out: | 2739 | out: |
2407 | drop_node_buffer(node); | 2740 | if (ret || node->level == 0 || node->cowonly) { |
2741 | if (release) | ||
2742 | release_metadata_space(rc, node); | ||
2743 | remove_backref_node(&rc->backref_cache, node); | ||
2744 | } | ||
2408 | return ret; | 2745 | return ret; |
2409 | } | 2746 | } |
2410 | 2747 | ||
@@ -2415,12 +2752,10 @@ static noinline_for_stack | |||
2415 | int relocate_tree_blocks(struct btrfs_trans_handle *trans, | 2752 | int relocate_tree_blocks(struct btrfs_trans_handle *trans, |
2416 | struct reloc_control *rc, struct rb_root *blocks) | 2753 | struct reloc_control *rc, struct rb_root *blocks) |
2417 | { | 2754 | { |
2418 | struct backref_cache *cache; | ||
2419 | struct backref_node *node; | 2755 | struct backref_node *node; |
2420 | struct btrfs_path *path; | 2756 | struct btrfs_path *path; |
2421 | struct tree_block *block; | 2757 | struct tree_block *block; |
2422 | struct rb_node *rb_node; | 2758 | struct rb_node *rb_node; |
2423 | int level = -1; | ||
2424 | int ret; | 2759 | int ret; |
2425 | int err = 0; | 2760 | int err = 0; |
2426 | 2761 | ||
@@ -2428,21 +2763,9 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, | |||
2428 | if (!path) | 2763 | if (!path) |
2429 | return -ENOMEM; | 2764 | return -ENOMEM; |
2430 | 2765 | ||
2431 | cache = kmalloc(sizeof(*cache), GFP_NOFS); | ||
2432 | if (!cache) { | ||
2433 | btrfs_free_path(path); | ||
2434 | return -ENOMEM; | ||
2435 | } | ||
2436 | |||
2437 | backref_cache_init(cache); | ||
2438 | |||
2439 | rb_node = rb_first(blocks); | 2766 | rb_node = rb_first(blocks); |
2440 | while (rb_node) { | 2767 | while (rb_node) { |
2441 | block = rb_entry(rb_node, struct tree_block, rb_node); | 2768 | block = rb_entry(rb_node, struct tree_block, rb_node); |
2442 | if (level == -1) | ||
2443 | level = block->level; | ||
2444 | else | ||
2445 | BUG_ON(level != block->level); | ||
2446 | if (!block->key_ready) | 2769 | if (!block->key_ready) |
2447 | reada_tree_block(rc, block); | 2770 | reada_tree_block(rc, block); |
2448 | rb_node = rb_next(rb_node); | 2771 | rb_node = rb_next(rb_node); |
@@ -2460,7 +2783,7 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, | |||
2460 | while (rb_node) { | 2783 | while (rb_node) { |
2461 | block = rb_entry(rb_node, struct tree_block, rb_node); | 2784 | block = rb_entry(rb_node, struct tree_block, rb_node); |
2462 | 2785 | ||
2463 | node = build_backref_tree(rc, cache, &block->key, | 2786 | node = build_backref_tree(rc, &block->key, |
2464 | block->level, block->bytenr); | 2787 | block->level, block->bytenr); |
2465 | if (IS_ERR(node)) { | 2788 | if (IS_ERR(node)) { |
2466 | err = PTR_ERR(node); | 2789 | err = PTR_ERR(node); |
@@ -2470,79 +2793,62 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, | |||
2470 | ret = relocate_tree_block(trans, rc, node, &block->key, | 2793 | ret = relocate_tree_block(trans, rc, node, &block->key, |
2471 | path); | 2794 | path); |
2472 | if (ret < 0) { | 2795 | if (ret < 0) { |
2473 | err = ret; | 2796 | if (ret != -EAGAIN || rb_node == rb_first(blocks)) |
2797 | err = ret; | ||
2474 | goto out; | 2798 | goto out; |
2475 | } | 2799 | } |
2476 | remove_backref_node(cache, node); | ||
2477 | rb_node = rb_next(rb_node); | 2800 | rb_node = rb_next(rb_node); |
2478 | } | 2801 | } |
2479 | 2802 | out: | |
2480 | if (level > 0) | ||
2481 | goto out; | ||
2482 | |||
2483 | free_block_list(blocks); | 2803 | free_block_list(blocks); |
2804 | err = finish_pending_nodes(trans, rc, path, err); | ||
2484 | 2805 | ||
2485 | /* | 2806 | btrfs_free_path(path); |
2486 | * now backrefs of some upper level tree blocks have been cached, | 2807 | return err; |
2487 | * try relocating blocks referenced by these upper level blocks. | 2808 | } |
2488 | */ | ||
2489 | while (1) { | ||
2490 | struct backref_node *upper = NULL; | ||
2491 | if (trans->transaction->in_commit || | ||
2492 | trans->transaction->delayed_refs.flushing) | ||
2493 | break; | ||
2494 | 2809 | ||
2495 | ret = add_adjacent_blocks(trans, rc, cache, blocks, level, | 2810 | static noinline_for_stack |
2496 | &upper); | 2811 | int prealloc_file_extent_cluster(struct inode *inode, |
2497 | if (ret < 0) | 2812 | struct file_extent_cluster *cluster) |
2498 | err = ret; | 2813 | { |
2499 | if (ret != 0) | 2814 | u64 alloc_hint = 0; |
2500 | break; | 2815 | u64 start; |
2816 | u64 end; | ||
2817 | u64 offset = BTRFS_I(inode)->index_cnt; | ||
2818 | u64 num_bytes; | ||
2819 | int nr = 0; | ||
2820 | int ret = 0; | ||
2501 | 2821 | ||
2502 | rb_node = rb_first(blocks); | 2822 | BUG_ON(cluster->start != cluster->boundary[0]); |
2503 | while (rb_node) { | 2823 | mutex_lock(&inode->i_mutex); |
2504 | block = rb_entry(rb_node, struct tree_block, rb_node); | ||
2505 | if (trans->transaction->in_commit || | ||
2506 | trans->transaction->delayed_refs.flushing) | ||
2507 | goto out; | ||
2508 | BUG_ON(!block->key_ready); | ||
2509 | node = build_backref_tree(rc, cache, &block->key, | ||
2510 | level, block->bytenr); | ||
2511 | if (IS_ERR(node)) { | ||
2512 | err = PTR_ERR(node); | ||
2513 | goto out; | ||
2514 | } | ||
2515 | 2824 | ||
2516 | ret = relocate_tree_block(trans, rc, node, | 2825 | ret = btrfs_check_data_free_space(inode, cluster->end + |
2517 | &block->key, path); | 2826 | 1 - cluster->start); |
2518 | if (ret < 0) { | 2827 | if (ret) |
2519 | err = ret; | 2828 | goto out; |
2520 | goto out; | ||
2521 | } | ||
2522 | remove_backref_node(cache, node); | ||
2523 | rb_node = rb_next(rb_node); | ||
2524 | } | ||
2525 | free_block_list(blocks); | ||
2526 | 2829 | ||
2527 | if (upper) { | 2830 | while (nr < cluster->nr) { |
2528 | ret = link_to_upper(trans, upper, path); | 2831 | start = cluster->boundary[nr] - offset; |
2529 | if (ret < 0) { | 2832 | if (nr + 1 < cluster->nr) |
2530 | err = ret; | 2833 | end = cluster->boundary[nr + 1] - 1 - offset; |
2531 | break; | 2834 | else |
2532 | } | 2835 | end = cluster->end - offset; |
2533 | remove_backref_node(cache, upper); | 2836 | |
2534 | } | 2837 | lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); |
2838 | num_bytes = end + 1 - start; | ||
2839 | ret = btrfs_prealloc_file_range(inode, 0, start, | ||
2840 | num_bytes, num_bytes, | ||
2841 | end + 1, &alloc_hint); | ||
2842 | unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | ||
2843 | if (ret) | ||
2844 | break; | ||
2845 | nr++; | ||
2535 | } | 2846 | } |
2847 | btrfs_free_reserved_data_space(inode, cluster->end + | ||
2848 | 1 - cluster->start); | ||
2536 | out: | 2849 | out: |
2537 | free_block_list(blocks); | 2850 | mutex_unlock(&inode->i_mutex); |
2538 | 2851 | return ret; | |
2539 | ret = finish_pending_nodes(trans, cache, path); | ||
2540 | if (ret < 0) | ||
2541 | err = ret; | ||
2542 | |||
2543 | kfree(cache); | ||
2544 | btrfs_free_path(path); | ||
2545 | return err; | ||
2546 | } | 2852 | } |
2547 | 2853 | ||
2548 | static noinline_for_stack | 2854 | static noinline_for_stack |
@@ -2588,7 +2894,6 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2588 | u64 offset = BTRFS_I(inode)->index_cnt; | 2894 | u64 offset = BTRFS_I(inode)->index_cnt; |
2589 | unsigned long index; | 2895 | unsigned long index; |
2590 | unsigned long last_index; | 2896 | unsigned long last_index; |
2591 | unsigned int dirty_page = 0; | ||
2592 | struct page *page; | 2897 | struct page *page; |
2593 | struct file_ra_state *ra; | 2898 | struct file_ra_state *ra; |
2594 | int nr = 0; | 2899 | int nr = 0; |
@@ -2601,21 +2906,24 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2601 | if (!ra) | 2906 | if (!ra) |
2602 | return -ENOMEM; | 2907 | return -ENOMEM; |
2603 | 2908 | ||
2604 | index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; | 2909 | ret = prealloc_file_extent_cluster(inode, cluster); |
2605 | last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; | 2910 | if (ret) |
2911 | goto out; | ||
2606 | 2912 | ||
2607 | mutex_lock(&inode->i_mutex); | 2913 | file_ra_state_init(ra, inode->i_mapping); |
2608 | 2914 | ||
2609 | i_size_write(inode, cluster->end + 1 - offset); | ||
2610 | ret = setup_extent_mapping(inode, cluster->start - offset, | 2915 | ret = setup_extent_mapping(inode, cluster->start - offset, |
2611 | cluster->end - offset, cluster->start); | 2916 | cluster->end - offset, cluster->start); |
2612 | if (ret) | 2917 | if (ret) |
2613 | goto out_unlock; | 2918 | goto out; |
2614 | |||
2615 | file_ra_state_init(ra, inode->i_mapping); | ||
2616 | 2919 | ||
2617 | WARN_ON(cluster->start != cluster->boundary[0]); | 2920 | index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; |
2921 | last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; | ||
2618 | while (index <= last_index) { | 2922 | while (index <= last_index) { |
2923 | ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE); | ||
2924 | if (ret) | ||
2925 | goto out; | ||
2926 | |||
2619 | page = find_lock_page(inode->i_mapping, index); | 2927 | page = find_lock_page(inode->i_mapping, index); |
2620 | if (!page) { | 2928 | if (!page) { |
2621 | page_cache_sync_readahead(inode->i_mapping, | 2929 | page_cache_sync_readahead(inode->i_mapping, |
@@ -2623,8 +2931,10 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2623 | last_index + 1 - index); | 2931 | last_index + 1 - index); |
2624 | page = grab_cache_page(inode->i_mapping, index); | 2932 | page = grab_cache_page(inode->i_mapping, index); |
2625 | if (!page) { | 2933 | if (!page) { |
2934 | btrfs_delalloc_release_metadata(inode, | ||
2935 | PAGE_CACHE_SIZE); | ||
2626 | ret = -ENOMEM; | 2936 | ret = -ENOMEM; |
2627 | goto out_unlock; | 2937 | goto out; |
2628 | } | 2938 | } |
2629 | } | 2939 | } |
2630 | 2940 | ||
@@ -2640,8 +2950,10 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2640 | if (!PageUptodate(page)) { | 2950 | if (!PageUptodate(page)) { |
2641 | unlock_page(page); | 2951 | unlock_page(page); |
2642 | page_cache_release(page); | 2952 | page_cache_release(page); |
2953 | btrfs_delalloc_release_metadata(inode, | ||
2954 | PAGE_CACHE_SIZE); | ||
2643 | ret = -EIO; | 2955 | ret = -EIO; |
2644 | goto out_unlock; | 2956 | goto out; |
2645 | } | 2957 | } |
2646 | } | 2958 | } |
2647 | 2959 | ||
@@ -2660,10 +2972,9 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2660 | EXTENT_BOUNDARY, GFP_NOFS); | 2972 | EXTENT_BOUNDARY, GFP_NOFS); |
2661 | nr++; | 2973 | nr++; |
2662 | } | 2974 | } |
2663 | btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); | ||
2664 | 2975 | ||
2976 | btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); | ||
2665 | set_page_dirty(page); | 2977 | set_page_dirty(page); |
2666 | dirty_page++; | ||
2667 | 2978 | ||
2668 | unlock_extent(&BTRFS_I(inode)->io_tree, | 2979 | unlock_extent(&BTRFS_I(inode)->io_tree, |
2669 | page_start, page_end, GFP_NOFS); | 2980 | page_start, page_end, GFP_NOFS); |
@@ -2671,20 +2982,11 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
2671 | page_cache_release(page); | 2982 | page_cache_release(page); |
2672 | 2983 | ||
2673 | index++; | 2984 | index++; |
2674 | if (nr < cluster->nr && | 2985 | balance_dirty_pages_ratelimited(inode->i_mapping); |
2675 | page_end + 1 + offset == cluster->boundary[nr]) { | 2986 | btrfs_throttle(BTRFS_I(inode)->root); |
2676 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | ||
2677 | dirty_page); | ||
2678 | dirty_page = 0; | ||
2679 | } | ||
2680 | } | ||
2681 | if (dirty_page) { | ||
2682 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | ||
2683 | dirty_page); | ||
2684 | } | 2987 | } |
2685 | WARN_ON(nr != cluster->nr); | 2988 | WARN_ON(nr != cluster->nr); |
2686 | out_unlock: | 2989 | out: |
2687 | mutex_unlock(&inode->i_mutex); | ||
2688 | kfree(ra); | 2990 | kfree(ra); |
2689 | return ret; | 2991 | return ret; |
2690 | } | 2992 | } |
@@ -2870,9 +3172,6 @@ out: | |||
2870 | static int block_use_full_backref(struct reloc_control *rc, | 3172 | static int block_use_full_backref(struct reloc_control *rc, |
2871 | struct extent_buffer *eb) | 3173 | struct extent_buffer *eb) |
2872 | { | 3174 | { |
2873 | struct btrfs_path *path; | ||
2874 | struct btrfs_extent_item *ei; | ||
2875 | struct btrfs_key key; | ||
2876 | u64 flags; | 3175 | u64 flags; |
2877 | int ret; | 3176 | int ret; |
2878 | 3177 | ||
@@ -2880,28 +3179,14 @@ static int block_use_full_backref(struct reloc_control *rc, | |||
2880 | btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV) | 3179 | btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV) |
2881 | return 1; | 3180 | return 1; |
2882 | 3181 | ||
2883 | path = btrfs_alloc_path(); | 3182 | ret = btrfs_lookup_extent_info(NULL, rc->extent_root, |
2884 | BUG_ON(!path); | 3183 | eb->start, eb->len, NULL, &flags); |
2885 | |||
2886 | key.objectid = eb->start; | ||
2887 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
2888 | key.offset = eb->len; | ||
2889 | |||
2890 | path->search_commit_root = 1; | ||
2891 | path->skip_locking = 1; | ||
2892 | ret = btrfs_search_slot(NULL, rc->extent_root, | ||
2893 | &key, path, 0, 0); | ||
2894 | BUG_ON(ret); | 3184 | BUG_ON(ret); |
2895 | 3185 | ||
2896 | ei = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
2897 | struct btrfs_extent_item); | ||
2898 | flags = btrfs_extent_flags(path->nodes[0], ei); | ||
2899 | BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)); | ||
2900 | if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) | 3186 | if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) |
2901 | ret = 1; | 3187 | ret = 1; |
2902 | else | 3188 | else |
2903 | ret = 0; | 3189 | ret = 0; |
2904 | btrfs_free_path(path); | ||
2905 | return ret; | 3190 | return ret; |
2906 | } | 3191 | } |
2907 | 3192 | ||
@@ -3074,22 +3359,10 @@ int add_data_references(struct reloc_control *rc, | |||
3074 | struct btrfs_extent_inline_ref *iref; | 3359 | struct btrfs_extent_inline_ref *iref; |
3075 | unsigned long ptr; | 3360 | unsigned long ptr; |
3076 | unsigned long end; | 3361 | unsigned long end; |
3077 | u32 blocksize; | 3362 | u32 blocksize = btrfs_level_size(rc->extent_root, 0); |
3078 | int ret; | 3363 | int ret; |
3079 | int err = 0; | 3364 | int err = 0; |
3080 | 3365 | ||
3081 | ret = get_new_location(rc->data_inode, NULL, extent_key->objectid, | ||
3082 | extent_key->offset); | ||
3083 | BUG_ON(ret < 0); | ||
3084 | if (ret > 0) { | ||
3085 | /* the relocated data is fragmented */ | ||
3086 | rc->extents_skipped++; | ||
3087 | btrfs_release_path(rc->extent_root, path); | ||
3088 | return 0; | ||
3089 | } | ||
3090 | |||
3091 | blocksize = btrfs_level_size(rc->extent_root, 0); | ||
3092 | |||
3093 | eb = path->nodes[0]; | 3366 | eb = path->nodes[0]; |
3094 | ptr = btrfs_item_ptr_offset(eb, path->slots[0]); | 3367 | ptr = btrfs_item_ptr_offset(eb, path->slots[0]); |
3095 | end = ptr + btrfs_item_size_nr(eb, path->slots[0]); | 3368 | end = ptr + btrfs_item_size_nr(eb, path->slots[0]); |
@@ -3170,7 +3443,8 @@ int add_data_references(struct reloc_control *rc, | |||
3170 | */ | 3443 | */ |
3171 | static noinline_for_stack | 3444 | static noinline_for_stack |
3172 | int find_next_extent(struct btrfs_trans_handle *trans, | 3445 | int find_next_extent(struct btrfs_trans_handle *trans, |
3173 | struct reloc_control *rc, struct btrfs_path *path) | 3446 | struct reloc_control *rc, struct btrfs_path *path, |
3447 | struct btrfs_key *extent_key) | ||
3174 | { | 3448 | { |
3175 | struct btrfs_key key; | 3449 | struct btrfs_key key; |
3176 | struct extent_buffer *leaf; | 3450 | struct extent_buffer *leaf; |
@@ -3225,6 +3499,7 @@ next: | |||
3225 | rc->search_start = end + 1; | 3499 | rc->search_start = end + 1; |
3226 | } else { | 3500 | } else { |
3227 | rc->search_start = key.objectid + key.offset; | 3501 | rc->search_start = key.objectid + key.offset; |
3502 | memcpy(extent_key, &key, sizeof(key)); | ||
3228 | return 0; | 3503 | return 0; |
3229 | } | 3504 | } |
3230 | } | 3505 | } |
@@ -3262,12 +3537,49 @@ static int check_extent_flags(u64 flags) | |||
3262 | return 0; | 3537 | return 0; |
3263 | } | 3538 | } |
3264 | 3539 | ||
3540 | static noinline_for_stack | ||
3541 | int prepare_to_relocate(struct reloc_control *rc) | ||
3542 | { | ||
3543 | struct btrfs_trans_handle *trans; | ||
3544 | int ret; | ||
3545 | |||
3546 | rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root); | ||
3547 | if (!rc->block_rsv) | ||
3548 | return -ENOMEM; | ||
3549 | |||
3550 | /* | ||
3551 | * reserve some space for creating reloc trees. | ||
3552 | * btrfs_init_reloc_root will use them when there | ||
3553 | * is no reservation in transaction handle. | ||
3554 | */ | ||
3555 | ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv, | ||
3556 | rc->extent_root->nodesize * 256, | ||
3557 | &rc->block_rsv_retries); | ||
3558 | if (ret) | ||
3559 | return ret; | ||
3560 | |||
3561 | rc->block_rsv->refill_used = 1; | ||
3562 | btrfs_add_durable_block_rsv(rc->extent_root->fs_info, rc->block_rsv); | ||
3563 | |||
3564 | memset(&rc->cluster, 0, sizeof(rc->cluster)); | ||
3565 | rc->search_start = rc->block_group->key.objectid; | ||
3566 | rc->extents_found = 0; | ||
3567 | rc->nodes_relocated = 0; | ||
3568 | rc->merging_rsv_size = 0; | ||
3569 | rc->block_rsv_retries = 0; | ||
3570 | |||
3571 | rc->create_reloc_tree = 1; | ||
3572 | set_reloc_control(rc); | ||
3573 | |||
3574 | trans = btrfs_join_transaction(rc->extent_root, 1); | ||
3575 | btrfs_commit_transaction(trans, rc->extent_root); | ||
3576 | return 0; | ||
3577 | } | ||
3265 | 3578 | ||
3266 | static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | 3579 | static noinline_for_stack int relocate_block_group(struct reloc_control *rc) |
3267 | { | 3580 | { |
3268 | struct rb_root blocks = RB_ROOT; | 3581 | struct rb_root blocks = RB_ROOT; |
3269 | struct btrfs_key key; | 3582 | struct btrfs_key key; |
3270 | struct file_extent_cluster *cluster; | ||
3271 | struct btrfs_trans_handle *trans = NULL; | 3583 | struct btrfs_trans_handle *trans = NULL; |
3272 | struct btrfs_path *path; | 3584 | struct btrfs_path *path; |
3273 | struct btrfs_extent_item *ei; | 3585 | struct btrfs_extent_item *ei; |
@@ -3277,33 +3589,25 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3277 | int ret; | 3589 | int ret; |
3278 | int err = 0; | 3590 | int err = 0; |
3279 | 3591 | ||
3280 | cluster = kzalloc(sizeof(*cluster), GFP_NOFS); | ||
3281 | if (!cluster) | ||
3282 | return -ENOMEM; | ||
3283 | |||
3284 | path = btrfs_alloc_path(); | 3592 | path = btrfs_alloc_path(); |
3285 | if (!path) { | 3593 | if (!path) |
3286 | kfree(cluster); | ||
3287 | return -ENOMEM; | 3594 | return -ENOMEM; |
3288 | } | ||
3289 | |||
3290 | rc->extents_found = 0; | ||
3291 | rc->extents_skipped = 0; | ||
3292 | |||
3293 | rc->search_start = rc->block_group->key.objectid; | ||
3294 | clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, | ||
3295 | GFP_NOFS); | ||
3296 | |||
3297 | rc->create_reloc_root = 1; | ||
3298 | set_reloc_control(rc); | ||
3299 | 3595 | ||
3300 | trans = btrfs_start_transaction(rc->extent_root, 1); | 3596 | ret = prepare_to_relocate(rc); |
3301 | btrfs_commit_transaction(trans, rc->extent_root); | 3597 | if (ret) { |
3598 | err = ret; | ||
3599 | goto out_free; | ||
3600 | } | ||
3302 | 3601 | ||
3303 | while (1) { | 3602 | while (1) { |
3304 | trans = btrfs_start_transaction(rc->extent_root, 1); | 3603 | trans = btrfs_start_transaction(rc->extent_root, 0); |
3604 | |||
3605 | if (update_backref_cache(trans, &rc->backref_cache)) { | ||
3606 | btrfs_end_transaction(trans, rc->extent_root); | ||
3607 | continue; | ||
3608 | } | ||
3305 | 3609 | ||
3306 | ret = find_next_extent(trans, rc, path); | 3610 | ret = find_next_extent(trans, rc, path, &key); |
3307 | if (ret < 0) | 3611 | if (ret < 0) |
3308 | err = ret; | 3612 | err = ret; |
3309 | if (ret != 0) | 3613 | if (ret != 0) |
@@ -3313,9 +3617,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3313 | 3617 | ||
3314 | ei = btrfs_item_ptr(path->nodes[0], path->slots[0], | 3618 | ei = btrfs_item_ptr(path->nodes[0], path->slots[0], |
3315 | struct btrfs_extent_item); | 3619 | struct btrfs_extent_item); |
3316 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); | 3620 | item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); |
3317 | item_size = btrfs_item_size_nr(path->nodes[0], | ||
3318 | path->slots[0]); | ||
3319 | if (item_size >= sizeof(*ei)) { | 3621 | if (item_size >= sizeof(*ei)) { |
3320 | flags = btrfs_extent_flags(path->nodes[0], ei); | 3622 | flags = btrfs_extent_flags(path->nodes[0], ei); |
3321 | ret = check_extent_flags(flags); | 3623 | ret = check_extent_flags(flags); |
@@ -3356,73 +3658,100 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
3356 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 3658 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
3357 | ret = add_tree_block(rc, &key, path, &blocks); | 3659 | ret = add_tree_block(rc, &key, path, &blocks); |
3358 | } else if (rc->stage == UPDATE_DATA_PTRS && | 3660 | } else if (rc->stage == UPDATE_DATA_PTRS && |
3359 | (flags & BTRFS_EXTENT_FLAG_DATA)) { | 3661 | (flags & BTRFS_EXTENT_FLAG_DATA)) { |
3360 | ret = add_data_references(rc, &key, path, &blocks); | 3662 | ret = add_data_references(rc, &key, path, &blocks); |
3361 | } else { | 3663 | } else { |
3362 | btrfs_release_path(rc->extent_root, path); | 3664 | btrfs_release_path(rc->extent_root, path); |
3363 | ret = 0; | 3665 | ret = 0; |
3364 | } | 3666 | } |
3365 | if (ret < 0) { | 3667 | if (ret < 0) { |
3366 | err = 0; | 3668 | err = ret; |
3367 | break; | 3669 | break; |
3368 | } | 3670 | } |
3369 | 3671 | ||
3370 | if (!RB_EMPTY_ROOT(&blocks)) { | 3672 | if (!RB_EMPTY_ROOT(&blocks)) { |
3371 | ret = relocate_tree_blocks(trans, rc, &blocks); | 3673 | ret = relocate_tree_blocks(trans, rc, &blocks); |
3372 | if (ret < 0) { | 3674 | if (ret < 0) { |
3675 | if (ret != -EAGAIN) { | ||
3676 | err = ret; | ||
3677 | break; | ||
3678 | } | ||
3679 | rc->extents_found--; | ||
3680 | rc->search_start = key.objectid; | ||
3681 | } | ||
3682 | } | ||
3683 | |||
3684 | ret = btrfs_block_rsv_check(trans, rc->extent_root, | ||
3685 | rc->block_rsv, 0, 5); | ||
3686 | if (ret < 0) { | ||
3687 | if (ret != -EAGAIN) { | ||
3373 | err = ret; | 3688 | err = ret; |
3689 | WARN_ON(1); | ||
3374 | break; | 3690 | break; |
3375 | } | 3691 | } |
3692 | rc->commit_transaction = 1; | ||
3376 | } | 3693 | } |
3377 | 3694 | ||
3378 | nr = trans->blocks_used; | 3695 | if (rc->commit_transaction) { |
3379 | btrfs_end_transaction(trans, rc->extent_root); | 3696 | rc->commit_transaction = 0; |
3697 | ret = btrfs_commit_transaction(trans, rc->extent_root); | ||
3698 | BUG_ON(ret); | ||
3699 | } else { | ||
3700 | nr = trans->blocks_used; | ||
3701 | btrfs_end_transaction_throttle(trans, rc->extent_root); | ||
3702 | btrfs_btree_balance_dirty(rc->extent_root, nr); | ||
3703 | } | ||
3380 | trans = NULL; | 3704 | trans = NULL; |
3381 | btrfs_btree_balance_dirty(rc->extent_root, nr); | ||
3382 | 3705 | ||
3383 | if (rc->stage == MOVE_DATA_EXTENTS && | 3706 | if (rc->stage == MOVE_DATA_EXTENTS && |
3384 | (flags & BTRFS_EXTENT_FLAG_DATA)) { | 3707 | (flags & BTRFS_EXTENT_FLAG_DATA)) { |
3385 | rc->found_file_extent = 1; | 3708 | rc->found_file_extent = 1; |
3386 | ret = relocate_data_extent(rc->data_inode, | 3709 | ret = relocate_data_extent(rc->data_inode, |
3387 | &key, cluster); | 3710 | &key, &rc->cluster); |
3388 | if (ret < 0) { | 3711 | if (ret < 0) { |
3389 | err = ret; | 3712 | err = ret; |
3390 | break; | 3713 | break; |
3391 | } | 3714 | } |
3392 | } | 3715 | } |
3393 | } | 3716 | } |
3394 | btrfs_free_path(path); | 3717 | |
3718 | btrfs_release_path(rc->extent_root, path); | ||
3719 | clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, | ||
3720 | GFP_NOFS); | ||
3395 | 3721 | ||
3396 | if (trans) { | 3722 | if (trans) { |
3397 | nr = trans->blocks_used; | 3723 | nr = trans->blocks_used; |
3398 | btrfs_end_transaction(trans, rc->extent_root); | 3724 | btrfs_end_transaction_throttle(trans, rc->extent_root); |
3399 | btrfs_btree_balance_dirty(rc->extent_root, nr); | 3725 | btrfs_btree_balance_dirty(rc->extent_root, nr); |
3400 | } | 3726 | } |
3401 | 3727 | ||
3402 | if (!err) { | 3728 | if (!err) { |
3403 | ret = relocate_file_extent_cluster(rc->data_inode, cluster); | 3729 | ret = relocate_file_extent_cluster(rc->data_inode, |
3730 | &rc->cluster); | ||
3404 | if (ret < 0) | 3731 | if (ret < 0) |
3405 | err = ret; | 3732 | err = ret; |
3406 | } | 3733 | } |
3407 | 3734 | ||
3408 | kfree(cluster); | 3735 | rc->create_reloc_tree = 0; |
3736 | set_reloc_control(rc); | ||
3409 | 3737 | ||
3410 | rc->create_reloc_root = 0; | 3738 | backref_cache_cleanup(&rc->backref_cache); |
3411 | smp_mb(); | 3739 | btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); |
3412 | 3740 | ||
3413 | if (rc->extents_found > 0) { | 3741 | err = prepare_to_merge(rc, err); |
3414 | trans = btrfs_start_transaction(rc->extent_root, 1); | ||
3415 | btrfs_commit_transaction(trans, rc->extent_root); | ||
3416 | } | ||
3417 | 3742 | ||
3418 | merge_reloc_roots(rc); | 3743 | merge_reloc_roots(rc); |
3419 | 3744 | ||
3745 | rc->merge_reloc_tree = 0; | ||
3420 | unset_reloc_control(rc); | 3746 | unset_reloc_control(rc); |
3747 | btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); | ||
3421 | 3748 | ||
3422 | /* get rid of pinned extents */ | 3749 | /* get rid of pinned extents */ |
3423 | trans = btrfs_start_transaction(rc->extent_root, 1); | 3750 | trans = btrfs_join_transaction(rc->extent_root, 1); |
3424 | btrfs_commit_transaction(trans, rc->extent_root); | 3751 | btrfs_commit_transaction(trans, rc->extent_root); |
3425 | 3752 | out_free: | |
3753 | btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); | ||
3754 | btrfs_free_path(path); | ||
3426 | return err; | 3755 | return err; |
3427 | } | 3756 | } |
3428 | 3757 | ||
@@ -3448,7 +3777,8 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | |||
3448 | btrfs_set_inode_generation(leaf, item, 1); | 3777 | btrfs_set_inode_generation(leaf, item, 1); |
3449 | btrfs_set_inode_size(leaf, item, 0); | 3778 | btrfs_set_inode_size(leaf, item, 0); |
3450 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); | 3779 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); |
3451 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); | 3780 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS | |
3781 | BTRFS_INODE_PREALLOC); | ||
3452 | btrfs_mark_buffer_dirty(leaf); | 3782 | btrfs_mark_buffer_dirty(leaf); |
3453 | btrfs_release_path(root, path); | 3783 | btrfs_release_path(root, path); |
3454 | out: | 3784 | out: |
@@ -3460,8 +3790,9 @@ out: | |||
3460 | * helper to create inode for data relocation. | 3790 | * helper to create inode for data relocation. |
3461 | * the inode is in data relocation tree and its link count is 0 | 3791 | * the inode is in data relocation tree and its link count is 0 |
3462 | */ | 3792 | */ |
3463 | static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | 3793 | static noinline_for_stack |
3464 | struct btrfs_block_group_cache *group) | 3794 | struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, |
3795 | struct btrfs_block_group_cache *group) | ||
3465 | { | 3796 | { |
3466 | struct inode *inode = NULL; | 3797 | struct inode *inode = NULL; |
3467 | struct btrfs_trans_handle *trans; | 3798 | struct btrfs_trans_handle *trans; |
@@ -3475,8 +3806,9 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | |||
3475 | if (IS_ERR(root)) | 3806 | if (IS_ERR(root)) |
3476 | return ERR_CAST(root); | 3807 | return ERR_CAST(root); |
3477 | 3808 | ||
3478 | trans = btrfs_start_transaction(root, 1); | 3809 | trans = btrfs_start_transaction(root, 6); |
3479 | BUG_ON(!trans); | 3810 | if (IS_ERR(trans)) |
3811 | return ERR_CAST(trans); | ||
3480 | 3812 | ||
3481 | err = btrfs_find_free_objectid(trans, root, objectid, &objectid); | 3813 | err = btrfs_find_free_objectid(trans, root, objectid, &objectid); |
3482 | if (err) | 3814 | if (err) |
@@ -3496,7 +3828,6 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | |||
3496 | out: | 3828 | out: |
3497 | nr = trans->blocks_used; | 3829 | nr = trans->blocks_used; |
3498 | btrfs_end_transaction(trans, root); | 3830 | btrfs_end_transaction(trans, root); |
3499 | |||
3500 | btrfs_btree_balance_dirty(root, nr); | 3831 | btrfs_btree_balance_dirty(root, nr); |
3501 | if (err) { | 3832 | if (err) { |
3502 | if (inode) | 3833 | if (inode) |
@@ -3506,6 +3837,21 @@ out: | |||
3506 | return inode; | 3837 | return inode; |
3507 | } | 3838 | } |
3508 | 3839 | ||
3840 | static struct reloc_control *alloc_reloc_control(void) | ||
3841 | { | ||
3842 | struct reloc_control *rc; | ||
3843 | |||
3844 | rc = kzalloc(sizeof(*rc), GFP_NOFS); | ||
3845 | if (!rc) | ||
3846 | return NULL; | ||
3847 | |||
3848 | INIT_LIST_HEAD(&rc->reloc_roots); | ||
3849 | backref_cache_init(&rc->backref_cache); | ||
3850 | mapping_tree_init(&rc->reloc_root_tree); | ||
3851 | extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); | ||
3852 | return rc; | ||
3853 | } | ||
3854 | |||
3509 | /* | 3855 | /* |
3510 | * function to relocate all extents in a block group. | 3856 | * function to relocate all extents in a block group. |
3511 | */ | 3857 | */ |
@@ -3514,24 +3860,26 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3514 | struct btrfs_fs_info *fs_info = extent_root->fs_info; | 3860 | struct btrfs_fs_info *fs_info = extent_root->fs_info; |
3515 | struct reloc_control *rc; | 3861 | struct reloc_control *rc; |
3516 | int ret; | 3862 | int ret; |
3863 | int rw = 0; | ||
3517 | int err = 0; | 3864 | int err = 0; |
3518 | 3865 | ||
3519 | rc = kzalloc(sizeof(*rc), GFP_NOFS); | 3866 | rc = alloc_reloc_control(); |
3520 | if (!rc) | 3867 | if (!rc) |
3521 | return -ENOMEM; | 3868 | return -ENOMEM; |
3522 | 3869 | ||
3523 | mapping_tree_init(&rc->reloc_root_tree); | 3870 | rc->extent_root = extent_root; |
3524 | extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); | ||
3525 | INIT_LIST_HEAD(&rc->reloc_roots); | ||
3526 | 3871 | ||
3527 | rc->block_group = btrfs_lookup_block_group(fs_info, group_start); | 3872 | rc->block_group = btrfs_lookup_block_group(fs_info, group_start); |
3528 | BUG_ON(!rc->block_group); | 3873 | BUG_ON(!rc->block_group); |
3529 | 3874 | ||
3530 | btrfs_init_workers(&rc->workers, "relocate", | 3875 | if (!rc->block_group->ro) { |
3531 | fs_info->thread_pool_size, NULL); | 3876 | ret = btrfs_set_block_group_ro(extent_root, rc->block_group); |
3532 | 3877 | if (ret) { | |
3533 | rc->extent_root = extent_root; | 3878 | err = ret; |
3534 | btrfs_prepare_block_group_relocation(extent_root, rc->block_group); | 3879 | goto out; |
3880 | } | ||
3881 | rw = 1; | ||
3882 | } | ||
3535 | 3883 | ||
3536 | rc->data_inode = create_reloc_inode(fs_info, rc->block_group); | 3884 | rc->data_inode = create_reloc_inode(fs_info, rc->block_group); |
3537 | if (IS_ERR(rc->data_inode)) { | 3885 | if (IS_ERR(rc->data_inode)) { |
@@ -3548,9 +3896,6 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3548 | btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0); | 3896 | btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0); |
3549 | 3897 | ||
3550 | while (1) { | 3898 | while (1) { |
3551 | rc->extents_found = 0; | ||
3552 | rc->extents_skipped = 0; | ||
3553 | |||
3554 | mutex_lock(&fs_info->cleaner_mutex); | 3899 | mutex_lock(&fs_info->cleaner_mutex); |
3555 | 3900 | ||
3556 | btrfs_clean_old_snapshots(fs_info->tree_root); | 3901 | btrfs_clean_old_snapshots(fs_info->tree_root); |
@@ -3559,7 +3904,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3559 | mutex_unlock(&fs_info->cleaner_mutex); | 3904 | mutex_unlock(&fs_info->cleaner_mutex); |
3560 | if (ret < 0) { | 3905 | if (ret < 0) { |
3561 | err = ret; | 3906 | err = ret; |
3562 | break; | 3907 | goto out; |
3563 | } | 3908 | } |
3564 | 3909 | ||
3565 | if (rc->extents_found == 0) | 3910 | if (rc->extents_found == 0) |
@@ -3573,18 +3918,6 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3573 | invalidate_mapping_pages(rc->data_inode->i_mapping, | 3918 | invalidate_mapping_pages(rc->data_inode->i_mapping, |
3574 | 0, -1); | 3919 | 0, -1); |
3575 | rc->stage = UPDATE_DATA_PTRS; | 3920 | rc->stage = UPDATE_DATA_PTRS; |
3576 | } else if (rc->stage == UPDATE_DATA_PTRS && | ||
3577 | rc->extents_skipped >= rc->extents_found) { | ||
3578 | iput(rc->data_inode); | ||
3579 | rc->data_inode = create_reloc_inode(fs_info, | ||
3580 | rc->block_group); | ||
3581 | if (IS_ERR(rc->data_inode)) { | ||
3582 | err = PTR_ERR(rc->data_inode); | ||
3583 | rc->data_inode = NULL; | ||
3584 | break; | ||
3585 | } | ||
3586 | rc->stage = MOVE_DATA_EXTENTS; | ||
3587 | rc->found_file_extent = 0; | ||
3588 | } | 3921 | } |
3589 | } | 3922 | } |
3590 | 3923 | ||
@@ -3597,8 +3930,9 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
3597 | WARN_ON(rc->block_group->reserved > 0); | 3930 | WARN_ON(rc->block_group->reserved > 0); |
3598 | WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0); | 3931 | WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0); |
3599 | out: | 3932 | out: |
3933 | if (err && rw) | ||
3934 | btrfs_set_block_group_rw(extent_root, rc->block_group); | ||
3600 | iput(rc->data_inode); | 3935 | iput(rc->data_inode); |
3601 | btrfs_stop_workers(&rc->workers); | ||
3602 | btrfs_put_block_group(rc->block_group); | 3936 | btrfs_put_block_group(rc->block_group); |
3603 | kfree(rc); | 3937 | kfree(rc); |
3604 | return err; | 3938 | return err; |
@@ -3609,7 +3943,7 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) | |||
3609 | struct btrfs_trans_handle *trans; | 3943 | struct btrfs_trans_handle *trans; |
3610 | int ret; | 3944 | int ret; |
3611 | 3945 | ||
3612 | trans = btrfs_start_transaction(root->fs_info->tree_root, 1); | 3946 | trans = btrfs_start_transaction(root->fs_info->tree_root, 0); |
3613 | 3947 | ||
3614 | memset(&root->root_item.drop_progress, 0, | 3948 | memset(&root->root_item.drop_progress, 0, |
3615 | sizeof(root->root_item.drop_progress)); | 3949 | sizeof(root->root_item.drop_progress)); |
@@ -3702,20 +4036,20 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
3702 | if (list_empty(&reloc_roots)) | 4036 | if (list_empty(&reloc_roots)) |
3703 | goto out; | 4037 | goto out; |
3704 | 4038 | ||
3705 | rc = kzalloc(sizeof(*rc), GFP_NOFS); | 4039 | rc = alloc_reloc_control(); |
3706 | if (!rc) { | 4040 | if (!rc) { |
3707 | err = -ENOMEM; | 4041 | err = -ENOMEM; |
3708 | goto out; | 4042 | goto out; |
3709 | } | 4043 | } |
3710 | 4044 | ||
3711 | mapping_tree_init(&rc->reloc_root_tree); | ||
3712 | INIT_LIST_HEAD(&rc->reloc_roots); | ||
3713 | btrfs_init_workers(&rc->workers, "relocate", | ||
3714 | root->fs_info->thread_pool_size, NULL); | ||
3715 | rc->extent_root = root->fs_info->extent_root; | 4045 | rc->extent_root = root->fs_info->extent_root; |
3716 | 4046 | ||
3717 | set_reloc_control(rc); | 4047 | set_reloc_control(rc); |
3718 | 4048 | ||
4049 | trans = btrfs_join_transaction(rc->extent_root, 1); | ||
4050 | |||
4051 | rc->merge_reloc_tree = 1; | ||
4052 | |||
3719 | while (!list_empty(&reloc_roots)) { | 4053 | while (!list_empty(&reloc_roots)) { |
3720 | reloc_root = list_entry(reloc_roots.next, | 4054 | reloc_root = list_entry(reloc_roots.next, |
3721 | struct btrfs_root, root_list); | 4055 | struct btrfs_root, root_list); |
@@ -3735,20 +4069,16 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
3735 | fs_root->reloc_root = reloc_root; | 4069 | fs_root->reloc_root = reloc_root; |
3736 | } | 4070 | } |
3737 | 4071 | ||
3738 | trans = btrfs_start_transaction(rc->extent_root, 1); | ||
3739 | btrfs_commit_transaction(trans, rc->extent_root); | 4072 | btrfs_commit_transaction(trans, rc->extent_root); |
3740 | 4073 | ||
3741 | merge_reloc_roots(rc); | 4074 | merge_reloc_roots(rc); |
3742 | 4075 | ||
3743 | unset_reloc_control(rc); | 4076 | unset_reloc_control(rc); |
3744 | 4077 | ||
3745 | trans = btrfs_start_transaction(rc->extent_root, 1); | 4078 | trans = btrfs_join_transaction(rc->extent_root, 1); |
3746 | btrfs_commit_transaction(trans, rc->extent_root); | 4079 | btrfs_commit_transaction(trans, rc->extent_root); |
3747 | out: | 4080 | out: |
3748 | if (rc) { | 4081 | kfree(rc); |
3749 | btrfs_stop_workers(&rc->workers); | ||
3750 | kfree(rc); | ||
3751 | } | ||
3752 | while (!list_empty(&reloc_roots)) { | 4082 | while (!list_empty(&reloc_roots)) { |
3753 | reloc_root = list_entry(reloc_roots.next, | 4083 | reloc_root = list_entry(reloc_roots.next, |
3754 | struct btrfs_root, root_list); | 4084 | struct btrfs_root, root_list); |
@@ -3814,3 +4144,130 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) | |||
3814 | btrfs_put_ordered_extent(ordered); | 4144 | btrfs_put_ordered_extent(ordered); |
3815 | return 0; | 4145 | return 0; |
3816 | } | 4146 | } |
4147 | |||
4148 | void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, | ||
4149 | struct btrfs_root *root, struct extent_buffer *buf, | ||
4150 | struct extent_buffer *cow) | ||
4151 | { | ||
4152 | struct reloc_control *rc; | ||
4153 | struct backref_node *node; | ||
4154 | int first_cow = 0; | ||
4155 | int level; | ||
4156 | int ret; | ||
4157 | |||
4158 | rc = root->fs_info->reloc_ctl; | ||
4159 | if (!rc) | ||
4160 | return; | ||
4161 | |||
4162 | BUG_ON(rc->stage == UPDATE_DATA_PTRS && | ||
4163 | root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID); | ||
4164 | |||
4165 | level = btrfs_header_level(buf); | ||
4166 | if (btrfs_header_generation(buf) <= | ||
4167 | btrfs_root_last_snapshot(&root->root_item)) | ||
4168 | first_cow = 1; | ||
4169 | |||
4170 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID && | ||
4171 | rc->create_reloc_tree) { | ||
4172 | WARN_ON(!first_cow && level == 0); | ||
4173 | |||
4174 | node = rc->backref_cache.path[level]; | ||
4175 | BUG_ON(node->bytenr != buf->start && | ||
4176 | node->new_bytenr != buf->start); | ||
4177 | |||
4178 | drop_node_buffer(node); | ||
4179 | extent_buffer_get(cow); | ||
4180 | node->eb = cow; | ||
4181 | node->new_bytenr = cow->start; | ||
4182 | |||
4183 | if (!node->pending) { | ||
4184 | list_move_tail(&node->list, | ||
4185 | &rc->backref_cache.pending[level]); | ||
4186 | node->pending = 1; | ||
4187 | } | ||
4188 | |||
4189 | if (first_cow) | ||
4190 | __mark_block_processed(rc, node); | ||
4191 | |||
4192 | if (first_cow && level > 0) | ||
4193 | rc->nodes_relocated += buf->len; | ||
4194 | } | ||
4195 | |||
4196 | if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS) { | ||
4197 | ret = replace_file_extents(trans, rc, root, cow); | ||
4198 | BUG_ON(ret); | ||
4199 | } | ||
4200 | } | ||
4201 | |||
4202 | /* | ||
4203 | * called before creating snapshot. it calculates metadata reservation | ||
4204 | * requried for relocating tree blocks in the snapshot | ||
4205 | */ | ||
4206 | void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans, | ||
4207 | struct btrfs_pending_snapshot *pending, | ||
4208 | u64 *bytes_to_reserve) | ||
4209 | { | ||
4210 | struct btrfs_root *root; | ||
4211 | struct reloc_control *rc; | ||
4212 | |||
4213 | root = pending->root; | ||
4214 | if (!root->reloc_root) | ||
4215 | return; | ||
4216 | |||
4217 | rc = root->fs_info->reloc_ctl; | ||
4218 | if (!rc->merge_reloc_tree) | ||
4219 | return; | ||
4220 | |||
4221 | root = root->reloc_root; | ||
4222 | BUG_ON(btrfs_root_refs(&root->root_item) == 0); | ||
4223 | /* | ||
4224 | * relocation is in the stage of merging trees. the space | ||
4225 | * used by merging a reloc tree is twice the size of | ||
4226 | * relocated tree nodes in the worst case. half for cowing | ||
4227 | * the reloc tree, half for cowing the fs tree. the space | ||
4228 | * used by cowing the reloc tree will be freed after the | ||
4229 | * tree is dropped. if we create snapshot, cowing the fs | ||
4230 | * tree may use more space than it frees. so we need | ||
4231 | * reserve extra space. | ||
4232 | */ | ||
4233 | *bytes_to_reserve += rc->nodes_relocated; | ||
4234 | } | ||
4235 | |||
4236 | /* | ||
4237 | * called after snapshot is created. migrate block reservation | ||
4238 | * and create reloc root for the newly created snapshot | ||
4239 | */ | ||
4240 | void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, | ||
4241 | struct btrfs_pending_snapshot *pending) | ||
4242 | { | ||
4243 | struct btrfs_root *root = pending->root; | ||
4244 | struct btrfs_root *reloc_root; | ||
4245 | struct btrfs_root *new_root; | ||
4246 | struct reloc_control *rc; | ||
4247 | int ret; | ||
4248 | |||
4249 | if (!root->reloc_root) | ||
4250 | return; | ||
4251 | |||
4252 | rc = root->fs_info->reloc_ctl; | ||
4253 | rc->merging_rsv_size += rc->nodes_relocated; | ||
4254 | |||
4255 | if (rc->merge_reloc_tree) { | ||
4256 | ret = btrfs_block_rsv_migrate(&pending->block_rsv, | ||
4257 | rc->block_rsv, | ||
4258 | rc->nodes_relocated); | ||
4259 | BUG_ON(ret); | ||
4260 | } | ||
4261 | |||
4262 | new_root = pending->snap; | ||
4263 | reloc_root = create_reloc_root(trans, root->reloc_root, | ||
4264 | new_root->root_key.objectid); | ||
4265 | |||
4266 | __add_reloc_root(reloc_root); | ||
4267 | new_root->reloc_root = reloc_root; | ||
4268 | |||
4269 | if (rc->create_reloc_tree) { | ||
4270 | ret = clone_backref_node(trans, rc, root, reloc_root); | ||
4271 | BUG_ON(ret); | ||
4272 | } | ||
4273 | } | ||
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 67fa2d29d663..b91ccd972644 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
@@ -259,6 +259,8 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
259 | struct extent_buffer *leaf; | 259 | struct extent_buffer *leaf; |
260 | struct btrfs_path *path; | 260 | struct btrfs_path *path; |
261 | struct btrfs_key key; | 261 | struct btrfs_key key; |
262 | struct btrfs_key root_key; | ||
263 | struct btrfs_root *root; | ||
262 | int err = 0; | 264 | int err = 0; |
263 | int ret; | 265 | int ret; |
264 | 266 | ||
@@ -270,6 +272,9 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
270 | key.type = BTRFS_ORPHAN_ITEM_KEY; | 272 | key.type = BTRFS_ORPHAN_ITEM_KEY; |
271 | key.offset = 0; | 273 | key.offset = 0; |
272 | 274 | ||
275 | root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
276 | root_key.offset = (u64)-1; | ||
277 | |||
273 | while (1) { | 278 | while (1) { |
274 | ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); | 279 | ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); |
275 | if (ret < 0) { | 280 | if (ret < 0) { |
@@ -294,13 +299,25 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
294 | key.type != BTRFS_ORPHAN_ITEM_KEY) | 299 | key.type != BTRFS_ORPHAN_ITEM_KEY) |
295 | break; | 300 | break; |
296 | 301 | ||
297 | ret = btrfs_find_dead_roots(tree_root, key.offset); | 302 | root_key.objectid = key.offset; |
298 | if (ret) { | 303 | key.offset++; |
304 | |||
305 | root = btrfs_read_fs_root_no_name(tree_root->fs_info, | ||
306 | &root_key); | ||
307 | if (!IS_ERR(root)) | ||
308 | continue; | ||
309 | |||
310 | ret = PTR_ERR(root); | ||
311 | if (ret != -ENOENT) { | ||
299 | err = ret; | 312 | err = ret; |
300 | break; | 313 | break; |
301 | } | 314 | } |
302 | 315 | ||
303 | key.offset++; | 316 | ret = btrfs_find_dead_roots(tree_root, root_key.objectid); |
317 | if (ret) { | ||
318 | err = ret; | ||
319 | break; | ||
320 | } | ||
304 | } | 321 | } |
305 | 322 | ||
306 | btrfs_free_path(path); | 323 | btrfs_free_path(path); |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2909a03e5230..d34b2dfc9628 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
@@ -498,7 +498,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
498 | btrfs_start_delalloc_inodes(root, 0); | 498 | btrfs_start_delalloc_inodes(root, 0); |
499 | btrfs_wait_ordered_extents(root, 0, 0); | 499 | btrfs_wait_ordered_extents(root, 0, 0); |
500 | 500 | ||
501 | trans = btrfs_start_transaction(root, 1); | 501 | trans = btrfs_start_transaction(root, 0); |
502 | ret = btrfs_commit_transaction(trans, root); | 502 | ret = btrfs_commit_transaction(trans, root); |
503 | return ret; | 503 | return ret; |
504 | } | 504 | } |
@@ -694,11 +694,11 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
694 | if (btrfs_super_log_root(&root->fs_info->super_copy) != 0) | 694 | if (btrfs_super_log_root(&root->fs_info->super_copy) != 0) |
695 | return -EINVAL; | 695 | return -EINVAL; |
696 | 696 | ||
697 | /* recover relocation */ | 697 | ret = btrfs_cleanup_fs_roots(root->fs_info); |
698 | ret = btrfs_recover_relocation(root); | ||
699 | WARN_ON(ret); | 698 | WARN_ON(ret); |
700 | 699 | ||
701 | ret = btrfs_cleanup_fs_roots(root->fs_info); | 700 | /* recover relocation */ |
701 | ret = btrfs_recover_relocation(root); | ||
702 | WARN_ON(ret); | 702 | WARN_ON(ret); |
703 | 703 | ||
704 | sb->s_flags &= ~MS_RDONLY; | 704 | sb->s_flags &= ~MS_RDONLY; |
@@ -714,34 +714,18 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
714 | struct list_head *head = &root->fs_info->space_info; | 714 | struct list_head *head = &root->fs_info->space_info; |
715 | struct btrfs_space_info *found; | 715 | struct btrfs_space_info *found; |
716 | u64 total_used = 0; | 716 | u64 total_used = 0; |
717 | u64 data_used = 0; | ||
718 | int bits = dentry->d_sb->s_blocksize_bits; | 717 | int bits = dentry->d_sb->s_blocksize_bits; |
719 | __be32 *fsid = (__be32 *)root->fs_info->fsid; | 718 | __be32 *fsid = (__be32 *)root->fs_info->fsid; |
720 | 719 | ||
721 | rcu_read_lock(); | 720 | rcu_read_lock(); |
722 | list_for_each_entry_rcu(found, head, list) { | 721 | list_for_each_entry_rcu(found, head, list) |
723 | if (found->flags & (BTRFS_BLOCK_GROUP_DUP| | 722 | total_used += found->disk_used; |
724 | BTRFS_BLOCK_GROUP_RAID10| | ||
725 | BTRFS_BLOCK_GROUP_RAID1)) { | ||
726 | total_used += found->bytes_used; | ||
727 | if (found->flags & BTRFS_BLOCK_GROUP_DATA) | ||
728 | data_used += found->bytes_used; | ||
729 | else | ||
730 | data_used += found->total_bytes; | ||
731 | } | ||
732 | |||
733 | total_used += found->bytes_used; | ||
734 | if (found->flags & BTRFS_BLOCK_GROUP_DATA) | ||
735 | data_used += found->bytes_used; | ||
736 | else | ||
737 | data_used += found->total_bytes; | ||
738 | } | ||
739 | rcu_read_unlock(); | 723 | rcu_read_unlock(); |
740 | 724 | ||
741 | buf->f_namelen = BTRFS_NAME_LEN; | 725 | buf->f_namelen = BTRFS_NAME_LEN; |
742 | buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; | 726 | buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; |
743 | buf->f_bfree = buf->f_blocks - (total_used >> bits); | 727 | buf->f_bfree = buf->f_blocks - (total_used >> bits); |
744 | buf->f_bavail = buf->f_blocks - (data_used >> bits); | 728 | buf->f_bavail = buf->f_bfree; |
745 | buf->f_bsize = dentry->d_sb->s_blocksize; | 729 | buf->f_bsize = dentry->d_sb->s_blocksize; |
746 | buf->f_type = BTRFS_SUPER_MAGIC; | 730 | buf->f_type = BTRFS_SUPER_MAGIC; |
747 | 731 | ||
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2cb116099b90..66e4c66cc63b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
@@ -165,54 +165,89 @@ enum btrfs_trans_type { | |||
165 | TRANS_USERSPACE, | 165 | TRANS_USERSPACE, |
166 | }; | 166 | }; |
167 | 167 | ||
168 | static int may_wait_transaction(struct btrfs_root *root, int type) | ||
169 | { | ||
170 | if (!root->fs_info->log_root_recovering && | ||
171 | ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || | ||
172 | type == TRANS_USERSPACE)) | ||
173 | return 1; | ||
174 | return 0; | ||
175 | } | ||
176 | |||
168 | static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | 177 | static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, |
169 | int num_blocks, int type) | 178 | u64 num_items, int type) |
170 | { | 179 | { |
171 | struct btrfs_trans_handle *h = | 180 | struct btrfs_trans_handle *h; |
172 | kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); | 181 | struct btrfs_transaction *cur_trans; |
182 | int retries = 0; | ||
173 | int ret; | 183 | int ret; |
184 | again: | ||
185 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); | ||
186 | if (!h) | ||
187 | return ERR_PTR(-ENOMEM); | ||
174 | 188 | ||
175 | mutex_lock(&root->fs_info->trans_mutex); | 189 | mutex_lock(&root->fs_info->trans_mutex); |
176 | if (!root->fs_info->log_root_recovering && | 190 | if (may_wait_transaction(root, type)) |
177 | ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || | ||
178 | type == TRANS_USERSPACE)) | ||
179 | wait_current_trans(root); | 191 | wait_current_trans(root); |
192 | |||
180 | ret = join_transaction(root); | 193 | ret = join_transaction(root); |
181 | BUG_ON(ret); | 194 | BUG_ON(ret); |
182 | 195 | ||
183 | h->transid = root->fs_info->running_transaction->transid; | 196 | cur_trans = root->fs_info->running_transaction; |
184 | h->transaction = root->fs_info->running_transaction; | 197 | cur_trans->use_count++; |
185 | h->blocks_reserved = num_blocks; | 198 | mutex_unlock(&root->fs_info->trans_mutex); |
199 | |||
200 | h->transid = cur_trans->transid; | ||
201 | h->transaction = cur_trans; | ||
186 | h->blocks_used = 0; | 202 | h->blocks_used = 0; |
187 | h->block_group = 0; | 203 | h->block_group = 0; |
188 | h->alloc_exclude_nr = 0; | 204 | h->bytes_reserved = 0; |
189 | h->alloc_exclude_start = 0; | ||
190 | h->delayed_ref_updates = 0; | 205 | h->delayed_ref_updates = 0; |
206 | h->block_rsv = NULL; | ||
191 | 207 | ||
192 | if (!current->journal_info && type != TRANS_USERSPACE) | 208 | smp_mb(); |
193 | current->journal_info = h; | 209 | if (cur_trans->blocked && may_wait_transaction(root, type)) { |
210 | btrfs_commit_transaction(h, root); | ||
211 | goto again; | ||
212 | } | ||
213 | |||
214 | if (num_items > 0) { | ||
215 | ret = btrfs_trans_reserve_metadata(h, root, num_items, | ||
216 | &retries); | ||
217 | if (ret == -EAGAIN) { | ||
218 | btrfs_commit_transaction(h, root); | ||
219 | goto again; | ||
220 | } | ||
221 | if (ret < 0) { | ||
222 | btrfs_end_transaction(h, root); | ||
223 | return ERR_PTR(ret); | ||
224 | } | ||
225 | } | ||
194 | 226 | ||
195 | root->fs_info->running_transaction->use_count++; | 227 | mutex_lock(&root->fs_info->trans_mutex); |
196 | record_root_in_trans(h, root); | 228 | record_root_in_trans(h, root); |
197 | mutex_unlock(&root->fs_info->trans_mutex); | 229 | mutex_unlock(&root->fs_info->trans_mutex); |
230 | |||
231 | if (!current->journal_info && type != TRANS_USERSPACE) | ||
232 | current->journal_info = h; | ||
198 | return h; | 233 | return h; |
199 | } | 234 | } |
200 | 235 | ||
201 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | 236 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, |
202 | int num_blocks) | 237 | int num_items) |
203 | { | 238 | { |
204 | return start_transaction(root, num_blocks, TRANS_START); | 239 | return start_transaction(root, num_items, TRANS_START); |
205 | } | 240 | } |
206 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, | 241 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, |
207 | int num_blocks) | 242 | int num_blocks) |
208 | { | 243 | { |
209 | return start_transaction(root, num_blocks, TRANS_JOIN); | 244 | return start_transaction(root, 0, TRANS_JOIN); |
210 | } | 245 | } |
211 | 246 | ||
212 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, | 247 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, |
213 | int num_blocks) | 248 | int num_blocks) |
214 | { | 249 | { |
215 | return start_transaction(r, num_blocks, TRANS_USERSPACE); | 250 | return start_transaction(r, 0, TRANS_USERSPACE); |
216 | } | 251 | } |
217 | 252 | ||
218 | /* wait for a transaction commit to be fully complete */ | 253 | /* wait for a transaction commit to be fully complete */ |
@@ -286,10 +321,36 @@ void btrfs_throttle(struct btrfs_root *root) | |||
286 | mutex_unlock(&root->fs_info->trans_mutex); | 321 | mutex_unlock(&root->fs_info->trans_mutex); |
287 | } | 322 | } |
288 | 323 | ||
324 | static int should_end_transaction(struct btrfs_trans_handle *trans, | ||
325 | struct btrfs_root *root) | ||
326 | { | ||
327 | int ret; | ||
328 | ret = btrfs_block_rsv_check(trans, root, | ||
329 | &root->fs_info->global_block_rsv, 0, 5); | ||
330 | return ret ? 1 : 0; | ||
331 | } | ||
332 | |||
333 | int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | ||
334 | struct btrfs_root *root) | ||
335 | { | ||
336 | struct btrfs_transaction *cur_trans = trans->transaction; | ||
337 | int updates; | ||
338 | |||
339 | if (cur_trans->blocked || cur_trans->delayed_refs.flushing) | ||
340 | return 1; | ||
341 | |||
342 | updates = trans->delayed_ref_updates; | ||
343 | trans->delayed_ref_updates = 0; | ||
344 | if (updates) | ||
345 | btrfs_run_delayed_refs(trans, root, updates); | ||
346 | |||
347 | return should_end_transaction(trans, root); | ||
348 | } | ||
349 | |||
289 | static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | 350 | static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, |
290 | struct btrfs_root *root, int throttle) | 351 | struct btrfs_root *root, int throttle) |
291 | { | 352 | { |
292 | struct btrfs_transaction *cur_trans; | 353 | struct btrfs_transaction *cur_trans = trans->transaction; |
293 | struct btrfs_fs_info *info = root->fs_info; | 354 | struct btrfs_fs_info *info = root->fs_info; |
294 | int count = 0; | 355 | int count = 0; |
295 | 356 | ||
@@ -313,9 +374,21 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
313 | count++; | 374 | count++; |
314 | } | 375 | } |
315 | 376 | ||
377 | btrfs_trans_release_metadata(trans, root); | ||
378 | |||
379 | if (!root->fs_info->open_ioctl_trans && | ||
380 | should_end_transaction(trans, root)) | ||
381 | trans->transaction->blocked = 1; | ||
382 | |||
383 | if (cur_trans->blocked && !cur_trans->in_commit) { | ||
384 | if (throttle) | ||
385 | return btrfs_commit_transaction(trans, root); | ||
386 | else | ||
387 | wake_up_process(info->transaction_kthread); | ||
388 | } | ||
389 | |||
316 | mutex_lock(&info->trans_mutex); | 390 | mutex_lock(&info->trans_mutex); |
317 | cur_trans = info->running_transaction; | 391 | WARN_ON(cur_trans != info->running_transaction); |
318 | WARN_ON(cur_trans != trans->transaction); | ||
319 | WARN_ON(cur_trans->num_writers < 1); | 392 | WARN_ON(cur_trans->num_writers < 1); |
320 | cur_trans->num_writers--; | 393 | cur_trans->num_writers--; |
321 | 394 | ||
@@ -603,6 +676,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
603 | 676 | ||
604 | btrfs_free_log(trans, root); | 677 | btrfs_free_log(trans, root); |
605 | btrfs_update_reloc_root(trans, root); | 678 | btrfs_update_reloc_root(trans, root); |
679 | btrfs_orphan_commit_root(trans, root); | ||
606 | 680 | ||
607 | if (root->commit_root != root->node) { | 681 | if (root->commit_root != root->node) { |
608 | switch_commit_root(root); | 682 | switch_commit_root(root); |
@@ -627,30 +701,30 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
627 | int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) | 701 | int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) |
628 | { | 702 | { |
629 | struct btrfs_fs_info *info = root->fs_info; | 703 | struct btrfs_fs_info *info = root->fs_info; |
630 | int ret; | ||
631 | struct btrfs_trans_handle *trans; | 704 | struct btrfs_trans_handle *trans; |
705 | int ret; | ||
632 | unsigned long nr; | 706 | unsigned long nr; |
633 | 707 | ||
634 | smp_mb(); | 708 | if (xchg(&root->defrag_running, 1)) |
635 | if (root->defrag_running) | ||
636 | return 0; | 709 | return 0; |
637 | trans = btrfs_start_transaction(root, 1); | 710 | |
638 | while (1) { | 711 | while (1) { |
639 | root->defrag_running = 1; | 712 | trans = btrfs_start_transaction(root, 0); |
713 | if (IS_ERR(trans)) | ||
714 | return PTR_ERR(trans); | ||
715 | |||
640 | ret = btrfs_defrag_leaves(trans, root, cacheonly); | 716 | ret = btrfs_defrag_leaves(trans, root, cacheonly); |
717 | |||
641 | nr = trans->blocks_used; | 718 | nr = trans->blocks_used; |
642 | btrfs_end_transaction(trans, root); | 719 | btrfs_end_transaction(trans, root); |
643 | btrfs_btree_balance_dirty(info->tree_root, nr); | 720 | btrfs_btree_balance_dirty(info->tree_root, nr); |
644 | cond_resched(); | 721 | cond_resched(); |
645 | 722 | ||
646 | trans = btrfs_start_transaction(root, 1); | ||
647 | if (root->fs_info->closing || ret != -EAGAIN) | 723 | if (root->fs_info->closing || ret != -EAGAIN) |
648 | break; | 724 | break; |
649 | } | 725 | } |
650 | root->defrag_running = 0; | 726 | root->defrag_running = 0; |
651 | smp_mb(); | 727 | return ret; |
652 | btrfs_end_transaction(trans, root); | ||
653 | return 0; | ||
654 | } | 728 | } |
655 | 729 | ||
656 | #if 0 | 730 | #if 0 |
@@ -758,47 +832,63 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
758 | struct btrfs_root *root = pending->root; | 832 | struct btrfs_root *root = pending->root; |
759 | struct btrfs_root *parent_root; | 833 | struct btrfs_root *parent_root; |
760 | struct inode *parent_inode; | 834 | struct inode *parent_inode; |
835 | struct dentry *dentry; | ||
761 | struct extent_buffer *tmp; | 836 | struct extent_buffer *tmp; |
762 | struct extent_buffer *old; | 837 | struct extent_buffer *old; |
763 | int ret; | 838 | int ret; |
764 | u64 objectid; | 839 | int retries = 0; |
765 | int namelen; | 840 | u64 to_reserve = 0; |
766 | u64 index = 0; | 841 | u64 index = 0; |
767 | 842 | u64 objectid; | |
768 | parent_inode = pending->dentry->d_parent->d_inode; | ||
769 | parent_root = BTRFS_I(parent_inode)->root; | ||
770 | 843 | ||
771 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); | 844 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); |
772 | if (!new_root_item) { | 845 | if (!new_root_item) { |
773 | ret = -ENOMEM; | 846 | pending->error = -ENOMEM; |
774 | goto fail; | 847 | goto fail; |
775 | } | 848 | } |
849 | |||
776 | ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); | 850 | ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); |
777 | if (ret) | 851 | if (ret) { |
852 | pending->error = ret; | ||
778 | goto fail; | 853 | goto fail; |
854 | } | ||
855 | |||
856 | btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); | ||
857 | btrfs_orphan_pre_snapshot(trans, pending, &to_reserve); | ||
858 | |||
859 | if (to_reserve > 0) { | ||
860 | ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv, | ||
861 | to_reserve, &retries); | ||
862 | if (ret) { | ||
863 | pending->error = ret; | ||
864 | goto fail; | ||
865 | } | ||
866 | } | ||
779 | 867 | ||
780 | key.objectid = objectid; | 868 | key.objectid = objectid; |
781 | /* record when the snapshot was created in key.offset */ | 869 | key.offset = (u64)-1; |
782 | key.offset = trans->transid; | 870 | key.type = BTRFS_ROOT_ITEM_KEY; |
783 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | ||
784 | 871 | ||
785 | memcpy(&pending->root_key, &key, sizeof(key)); | 872 | trans->block_rsv = &pending->block_rsv; |
786 | pending->root_key.offset = (u64)-1; | ||
787 | 873 | ||
874 | dentry = pending->dentry; | ||
875 | parent_inode = dentry->d_parent->d_inode; | ||
876 | parent_root = BTRFS_I(parent_inode)->root; | ||
788 | record_root_in_trans(trans, parent_root); | 877 | record_root_in_trans(trans, parent_root); |
878 | |||
789 | /* | 879 | /* |
790 | * insert the directory item | 880 | * insert the directory item |
791 | */ | 881 | */ |
792 | namelen = strlen(pending->name); | ||
793 | ret = btrfs_set_inode_index(parent_inode, &index); | 882 | ret = btrfs_set_inode_index(parent_inode, &index); |
794 | BUG_ON(ret); | 883 | BUG_ON(ret); |
795 | ret = btrfs_insert_dir_item(trans, parent_root, | 884 | ret = btrfs_insert_dir_item(trans, parent_root, |
796 | pending->name, namelen, | 885 | dentry->d_name.name, dentry->d_name.len, |
797 | parent_inode->i_ino, | 886 | parent_inode->i_ino, &key, |
798 | &pending->root_key, BTRFS_FT_DIR, index); | 887 | BTRFS_FT_DIR, index); |
799 | BUG_ON(ret); | 888 | BUG_ON(ret); |
800 | 889 | ||
801 | btrfs_i_size_write(parent_inode, parent_inode->i_size + namelen * 2); | 890 | btrfs_i_size_write(parent_inode, parent_inode->i_size + |
891 | dentry->d_name.len * 2); | ||
802 | ret = btrfs_update_inode(trans, parent_root, parent_inode); | 892 | ret = btrfs_update_inode(trans, parent_root, parent_inode); |
803 | BUG_ON(ret); | 893 | BUG_ON(ret); |
804 | 894 | ||
@@ -815,22 +905,32 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
815 | free_extent_buffer(old); | 905 | free_extent_buffer(old); |
816 | 906 | ||
817 | btrfs_set_root_node(new_root_item, tmp); | 907 | btrfs_set_root_node(new_root_item, tmp); |
818 | ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, | 908 | /* record when the snapshot was created in key.offset */ |
819 | new_root_item); | 909 | key.offset = trans->transid; |
820 | BUG_ON(ret); | 910 | ret = btrfs_insert_root(trans, tree_root, &key, new_root_item); |
821 | btrfs_tree_unlock(tmp); | 911 | btrfs_tree_unlock(tmp); |
822 | free_extent_buffer(tmp); | 912 | free_extent_buffer(tmp); |
913 | BUG_ON(ret); | ||
823 | 914 | ||
824 | ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, | 915 | /* |
825 | pending->root_key.objectid, | 916 | * insert root back/forward references |
917 | */ | ||
918 | ret = btrfs_add_root_ref(trans, tree_root, objectid, | ||
826 | parent_root->root_key.objectid, | 919 | parent_root->root_key.objectid, |
827 | parent_inode->i_ino, index, pending->name, | 920 | parent_inode->i_ino, index, |
828 | namelen); | 921 | dentry->d_name.name, dentry->d_name.len); |
829 | BUG_ON(ret); | 922 | BUG_ON(ret); |
830 | 923 | ||
924 | key.offset = (u64)-1; | ||
925 | pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); | ||
926 | BUG_ON(IS_ERR(pending->snap)); | ||
927 | |||
928 | btrfs_reloc_post_snapshot(trans, pending); | ||
929 | btrfs_orphan_post_snapshot(trans, pending); | ||
831 | fail: | 930 | fail: |
832 | kfree(new_root_item); | 931 | kfree(new_root_item); |
833 | return ret; | 932 | btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1); |
933 | return 0; | ||
834 | } | 934 | } |
835 | 935 | ||
836 | /* | 936 | /* |
@@ -878,6 +978,16 @@ int btrfs_transaction_in_commit(struct btrfs_fs_info *info) | |||
878 | return ret; | 978 | return ret; |
879 | } | 979 | } |
880 | 980 | ||
981 | int btrfs_transaction_blocked(struct btrfs_fs_info *info) | ||
982 | { | ||
983 | int ret = 0; | ||
984 | spin_lock(&info->new_trans_lock); | ||
985 | if (info->running_transaction) | ||
986 | ret = info->running_transaction->blocked; | ||
987 | spin_unlock(&info->new_trans_lock); | ||
988 | return ret; | ||
989 | } | ||
990 | |||
881 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | 991 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, |
882 | struct btrfs_root *root) | 992 | struct btrfs_root *root) |
883 | { | 993 | { |
@@ -899,6 +1009,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
899 | ret = btrfs_run_delayed_refs(trans, root, 0); | 1009 | ret = btrfs_run_delayed_refs(trans, root, 0); |
900 | BUG_ON(ret); | 1010 | BUG_ON(ret); |
901 | 1011 | ||
1012 | btrfs_trans_release_metadata(trans, root); | ||
1013 | |||
902 | cur_trans = trans->transaction; | 1014 | cur_trans = trans->transaction; |
903 | /* | 1015 | /* |
904 | * set the flushing flag so procs in this transaction have to | 1016 | * set the flushing flag so procs in this transaction have to |
@@ -951,9 +1063,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
951 | snap_pending = 1; | 1063 | snap_pending = 1; |
952 | 1064 | ||
953 | WARN_ON(cur_trans != trans->transaction); | 1065 | WARN_ON(cur_trans != trans->transaction); |
954 | prepare_to_wait(&cur_trans->writer_wait, &wait, | ||
955 | TASK_UNINTERRUPTIBLE); | ||
956 | |||
957 | if (cur_trans->num_writers > 1) | 1066 | if (cur_trans->num_writers > 1) |
958 | timeout = MAX_SCHEDULE_TIMEOUT; | 1067 | timeout = MAX_SCHEDULE_TIMEOUT; |
959 | else if (should_grow) | 1068 | else if (should_grow) |
@@ -976,6 +1085,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
976 | */ | 1085 | */ |
977 | btrfs_run_ordered_operations(root, 1); | 1086 | btrfs_run_ordered_operations(root, 1); |
978 | 1087 | ||
1088 | prepare_to_wait(&cur_trans->writer_wait, &wait, | ||
1089 | TASK_UNINTERRUPTIBLE); | ||
1090 | |||
979 | smp_mb(); | 1091 | smp_mb(); |
980 | if (cur_trans->num_writers > 1 || should_grow) | 1092 | if (cur_trans->num_writers > 1 || should_grow) |
981 | schedule_timeout(timeout); | 1093 | schedule_timeout(timeout); |
@@ -1103,9 +1215,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) | |||
1103 | 1215 | ||
1104 | if (btrfs_header_backref_rev(root->node) < | 1216 | if (btrfs_header_backref_rev(root->node) < |
1105 | BTRFS_MIXED_BACKREF_REV) | 1217 | BTRFS_MIXED_BACKREF_REV) |
1106 | btrfs_drop_snapshot(root, 0); | 1218 | btrfs_drop_snapshot(root, NULL, 0); |
1107 | else | 1219 | else |
1108 | btrfs_drop_snapshot(root, 1); | 1220 | btrfs_drop_snapshot(root, NULL, 1); |
1109 | } | 1221 | } |
1110 | return 0; | 1222 | return 0; |
1111 | } | 1223 | } |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 93c7ccb33118..e104986d0bfd 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
@@ -45,20 +45,23 @@ struct btrfs_transaction { | |||
45 | 45 | ||
46 | struct btrfs_trans_handle { | 46 | struct btrfs_trans_handle { |
47 | u64 transid; | 47 | u64 transid; |
48 | u64 block_group; | ||
49 | u64 bytes_reserved; | ||
48 | unsigned long blocks_reserved; | 50 | unsigned long blocks_reserved; |
49 | unsigned long blocks_used; | 51 | unsigned long blocks_used; |
50 | struct btrfs_transaction *transaction; | ||
51 | u64 block_group; | ||
52 | u64 alloc_exclude_start; | ||
53 | u64 alloc_exclude_nr; | ||
54 | unsigned long delayed_ref_updates; | 52 | unsigned long delayed_ref_updates; |
53 | struct btrfs_transaction *transaction; | ||
54 | struct btrfs_block_rsv *block_rsv; | ||
55 | }; | 55 | }; |
56 | 56 | ||
57 | struct btrfs_pending_snapshot { | 57 | struct btrfs_pending_snapshot { |
58 | struct dentry *dentry; | 58 | struct dentry *dentry; |
59 | struct btrfs_root *root; | 59 | struct btrfs_root *root; |
60 | char *name; | 60 | struct btrfs_root *snap; |
61 | struct btrfs_key root_key; | 61 | /* block reservation for the operation */ |
62 | struct btrfs_block_rsv block_rsv; | ||
63 | /* extra metadata reseration for relocation */ | ||
64 | int error; | ||
62 | struct list_head list; | 65 | struct list_head list; |
63 | }; | 66 | }; |
64 | 67 | ||
@@ -85,11 +88,11 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, | |||
85 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, | 88 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, |
86 | struct btrfs_root *root); | 89 | struct btrfs_root *root); |
87 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | 90 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, |
88 | int num_blocks); | 91 | int num_items); |
89 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, | 92 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, |
90 | int num_blocks); | 93 | int num_blocks); |
91 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, | 94 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, |
92 | int num_blocks); | 95 | int num_blocks); |
93 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | 96 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, |
94 | struct btrfs_root *root); | 97 | struct btrfs_root *root); |
95 | int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, | 98 | int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, |
@@ -103,6 +106,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
103 | struct btrfs_root *root); | 106 | struct btrfs_root *root); |
104 | int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, | 107 | int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, |
105 | struct btrfs_root *root); | 108 | struct btrfs_root *root); |
109 | int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | ||
110 | struct btrfs_root *root); | ||
106 | void btrfs_throttle(struct btrfs_root *root); | 111 | void btrfs_throttle(struct btrfs_root *root); |
107 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, | 112 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, |
108 | struct btrfs_root *root); | 113 | struct btrfs_root *root); |
@@ -112,5 +117,6 @@ int btrfs_write_marked_extents(struct btrfs_root *root, | |||
112 | struct extent_io_tree *dirty_pages, int mark); | 117 | struct extent_io_tree *dirty_pages, int mark); |
113 | int btrfs_wait_marked_extents(struct btrfs_root *root, | 118 | int btrfs_wait_marked_extents(struct btrfs_root *root, |
114 | struct extent_io_tree *dirty_pages, int mark); | 119 | struct extent_io_tree *dirty_pages, int mark); |
120 | int btrfs_transaction_blocked(struct btrfs_fs_info *info); | ||
115 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info); | 121 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info); |
116 | #endif | 122 | #endif |
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index b10eacdb1620..f7ac8e013ed7 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c | |||
@@ -117,13 +117,14 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | |||
117 | path->nodes[1], 0, | 117 | path->nodes[1], 0, |
118 | cache_only, &last_ret, | 118 | cache_only, &last_ret, |
119 | &root->defrag_progress); | 119 | &root->defrag_progress); |
120 | WARN_ON(ret && ret != -EAGAIN); | 120 | if (ret) { |
121 | WARN_ON(ret == -EAGAIN); | ||
122 | goto out; | ||
123 | } | ||
121 | if (next_key_ret == 0) { | 124 | if (next_key_ret == 0) { |
122 | memcpy(&root->defrag_progress, &key, sizeof(key)); | 125 | memcpy(&root->defrag_progress, &key, sizeof(key)); |
123 | ret = -EAGAIN; | 126 | ret = -EAGAIN; |
124 | } | 127 | } |
125 | |||
126 | btrfs_release_path(root, path); | ||
127 | out: | 128 | out: |
128 | if (path) | 129 | if (path) |
129 | btrfs_free_path(path); | 130 | btrfs_free_path(path); |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index af57dd2b43d4..fb102a9aee9c 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
@@ -135,6 +135,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
135 | struct btrfs_root *root) | 135 | struct btrfs_root *root) |
136 | { | 136 | { |
137 | int ret; | 137 | int ret; |
138 | int err = 0; | ||
138 | 139 | ||
139 | mutex_lock(&root->log_mutex); | 140 | mutex_lock(&root->log_mutex); |
140 | if (root->log_root) { | 141 | if (root->log_root) { |
@@ -155,17 +156,19 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
155 | mutex_lock(&root->fs_info->tree_log_mutex); | 156 | mutex_lock(&root->fs_info->tree_log_mutex); |
156 | if (!root->fs_info->log_root_tree) { | 157 | if (!root->fs_info->log_root_tree) { |
157 | ret = btrfs_init_log_root_tree(trans, root->fs_info); | 158 | ret = btrfs_init_log_root_tree(trans, root->fs_info); |
158 | BUG_ON(ret); | 159 | if (ret) |
160 | err = ret; | ||
159 | } | 161 | } |
160 | if (!root->log_root) { | 162 | if (err == 0 && !root->log_root) { |
161 | ret = btrfs_add_log_tree(trans, root); | 163 | ret = btrfs_add_log_tree(trans, root); |
162 | BUG_ON(ret); | 164 | if (ret) |
165 | err = ret; | ||
163 | } | 166 | } |
164 | mutex_unlock(&root->fs_info->tree_log_mutex); | 167 | mutex_unlock(&root->fs_info->tree_log_mutex); |
165 | root->log_batch++; | 168 | root->log_batch++; |
166 | atomic_inc(&root->log_writers); | 169 | atomic_inc(&root->log_writers); |
167 | mutex_unlock(&root->log_mutex); | 170 | mutex_unlock(&root->log_mutex); |
168 | return 0; | 171 | return err; |
169 | } | 172 | } |
170 | 173 | ||
171 | /* | 174 | /* |
@@ -376,7 +379,7 @@ insert: | |||
376 | BUG_ON(ret); | 379 | BUG_ON(ret); |
377 | } | 380 | } |
378 | } else if (ret) { | 381 | } else if (ret) { |
379 | BUG(); | 382 | return ret; |
380 | } | 383 | } |
381 | dst_ptr = btrfs_item_ptr_offset(path->nodes[0], | 384 | dst_ptr = btrfs_item_ptr_offset(path->nodes[0], |
382 | path->slots[0]); | 385 | path->slots[0]); |
@@ -1699,9 +1702,9 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
1699 | 1702 | ||
1700 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); | 1703 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); |
1701 | 1704 | ||
1702 | wc->process_func(root, next, wc, ptr_gen); | ||
1703 | |||
1704 | if (*level == 1) { | 1705 | if (*level == 1) { |
1706 | wc->process_func(root, next, wc, ptr_gen); | ||
1707 | |||
1705 | path->slots[*level]++; | 1708 | path->slots[*level]++; |
1706 | if (wc->free) { | 1709 | if (wc->free) { |
1707 | btrfs_read_buffer(next, ptr_gen); | 1710 | btrfs_read_buffer(next, ptr_gen); |
@@ -1734,35 +1737,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
1734 | WARN_ON(*level < 0); | 1737 | WARN_ON(*level < 0); |
1735 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | 1738 | WARN_ON(*level >= BTRFS_MAX_LEVEL); |
1736 | 1739 | ||
1737 | if (path->nodes[*level] == root->node) | 1740 | path->slots[*level] = btrfs_header_nritems(path->nodes[*level]); |
1738 | parent = path->nodes[*level]; | ||
1739 | else | ||
1740 | parent = path->nodes[*level + 1]; | ||
1741 | |||
1742 | bytenr = path->nodes[*level]->start; | ||
1743 | |||
1744 | blocksize = btrfs_level_size(root, *level); | ||
1745 | root_owner = btrfs_header_owner(parent); | ||
1746 | root_gen = btrfs_header_generation(parent); | ||
1747 | |||
1748 | wc->process_func(root, path->nodes[*level], wc, | ||
1749 | btrfs_header_generation(path->nodes[*level])); | ||
1750 | |||
1751 | if (wc->free) { | ||
1752 | next = path->nodes[*level]; | ||
1753 | btrfs_tree_lock(next); | ||
1754 | clean_tree_block(trans, root, next); | ||
1755 | btrfs_set_lock_blocking(next); | ||
1756 | btrfs_wait_tree_block_writeback(next); | ||
1757 | btrfs_tree_unlock(next); | ||
1758 | |||
1759 | WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); | ||
1760 | ret = btrfs_free_reserved_extent(root, bytenr, blocksize); | ||
1761 | BUG_ON(ret); | ||
1762 | } | ||
1763 | free_extent_buffer(path->nodes[*level]); | ||
1764 | path->nodes[*level] = NULL; | ||
1765 | *level += 1; | ||
1766 | 1741 | ||
1767 | cond_resched(); | 1742 | cond_resched(); |
1768 | return 0; | 1743 | return 0; |
@@ -1781,7 +1756,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, | |||
1781 | 1756 | ||
1782 | for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { | 1757 | for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { |
1783 | slot = path->slots[i]; | 1758 | slot = path->slots[i]; |
1784 | if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { | 1759 | if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { |
1785 | struct extent_buffer *node; | 1760 | struct extent_buffer *node; |
1786 | node = path->nodes[i]; | 1761 | node = path->nodes[i]; |
1787 | path->slots[i]++; | 1762 | path->slots[i]++; |
@@ -2047,7 +2022,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2047 | mutex_unlock(&log_root_tree->log_mutex); | 2022 | mutex_unlock(&log_root_tree->log_mutex); |
2048 | 2023 | ||
2049 | ret = update_log_root(trans, log); | 2024 | ret = update_log_root(trans, log); |
2050 | BUG_ON(ret); | ||
2051 | 2025 | ||
2052 | mutex_lock(&log_root_tree->log_mutex); | 2026 | mutex_lock(&log_root_tree->log_mutex); |
2053 | if (atomic_dec_and_test(&log_root_tree->log_writers)) { | 2027 | if (atomic_dec_and_test(&log_root_tree->log_writers)) { |
@@ -2056,6 +2030,15 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
2056 | wake_up(&log_root_tree->log_writer_wait); | 2030 | wake_up(&log_root_tree->log_writer_wait); |
2057 | } | 2031 | } |
2058 | 2032 | ||
2033 | if (ret) { | ||
2034 | BUG_ON(ret != -ENOSPC); | ||
2035 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
2036 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | ||
2037 | mutex_unlock(&log_root_tree->log_mutex); | ||
2038 | ret = -EAGAIN; | ||
2039 | goto out; | ||
2040 | } | ||
2041 | |||
2059 | index2 = log_root_tree->log_transid % 2; | 2042 | index2 = log_root_tree->log_transid % 2; |
2060 | if (atomic_read(&log_root_tree->log_commit[index2])) { | 2043 | if (atomic_read(&log_root_tree->log_commit[index2])) { |
2061 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2044 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
@@ -2129,15 +2112,10 @@ out: | |||
2129 | return 0; | 2112 | return 0; |
2130 | } | 2113 | } |
2131 | 2114 | ||
2132 | /* | 2115 | static void free_log_tree(struct btrfs_trans_handle *trans, |
2133 | * free all the extents used by the tree log. This should be called | 2116 | struct btrfs_root *log) |
2134 | * at commit time of the full transaction | ||
2135 | */ | ||
2136 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | ||
2137 | { | 2117 | { |
2138 | int ret; | 2118 | int ret; |
2139 | struct btrfs_root *log; | ||
2140 | struct key; | ||
2141 | u64 start; | 2119 | u64 start; |
2142 | u64 end; | 2120 | u64 end; |
2143 | struct walk_control wc = { | 2121 | struct walk_control wc = { |
@@ -2145,10 +2123,6 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | |||
2145 | .process_func = process_one_buffer | 2123 | .process_func = process_one_buffer |
2146 | }; | 2124 | }; |
2147 | 2125 | ||
2148 | if (!root->log_root || root->fs_info->log_root_recovering) | ||
2149 | return 0; | ||
2150 | |||
2151 | log = root->log_root; | ||
2152 | ret = walk_log_tree(trans, log, &wc); | 2126 | ret = walk_log_tree(trans, log, &wc); |
2153 | BUG_ON(ret); | 2127 | BUG_ON(ret); |
2154 | 2128 | ||
@@ -2162,14 +2136,30 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | |||
2162 | EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); | 2136 | EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); |
2163 | } | 2137 | } |
2164 | 2138 | ||
2165 | if (log->log_transid > 0) { | ||
2166 | ret = btrfs_del_root(trans, root->fs_info->log_root_tree, | ||
2167 | &log->root_key); | ||
2168 | BUG_ON(ret); | ||
2169 | } | ||
2170 | root->log_root = NULL; | ||
2171 | free_extent_buffer(log->node); | 2139 | free_extent_buffer(log->node); |
2172 | kfree(log); | 2140 | kfree(log); |
2141 | } | ||
2142 | |||
2143 | /* | ||
2144 | * free all the extents used by the tree log. This should be called | ||
2145 | * at commit time of the full transaction | ||
2146 | */ | ||
2147 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | ||
2148 | { | ||
2149 | if (root->log_root) { | ||
2150 | free_log_tree(trans, root->log_root); | ||
2151 | root->log_root = NULL; | ||
2152 | } | ||
2153 | return 0; | ||
2154 | } | ||
2155 | |||
2156 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | ||
2157 | struct btrfs_fs_info *fs_info) | ||
2158 | { | ||
2159 | if (fs_info->log_root_tree) { | ||
2160 | free_log_tree(trans, fs_info->log_root_tree); | ||
2161 | fs_info->log_root_tree = NULL; | ||
2162 | } | ||
2173 | return 0; | 2163 | return 0; |
2174 | } | 2164 | } |
2175 | 2165 | ||
@@ -2203,6 +2193,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2203 | struct btrfs_dir_item *di; | 2193 | struct btrfs_dir_item *di; |
2204 | struct btrfs_path *path; | 2194 | struct btrfs_path *path; |
2205 | int ret; | 2195 | int ret; |
2196 | int err = 0; | ||
2206 | int bytes_del = 0; | 2197 | int bytes_del = 0; |
2207 | 2198 | ||
2208 | if (BTRFS_I(dir)->logged_trans < trans->transid) | 2199 | if (BTRFS_I(dir)->logged_trans < trans->transid) |
@@ -2218,7 +2209,11 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2218 | path = btrfs_alloc_path(); | 2209 | path = btrfs_alloc_path(); |
2219 | di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, | 2210 | di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, |
2220 | name, name_len, -1); | 2211 | name, name_len, -1); |
2221 | if (di && !IS_ERR(di)) { | 2212 | if (IS_ERR(di)) { |
2213 | err = PTR_ERR(di); | ||
2214 | goto fail; | ||
2215 | } | ||
2216 | if (di) { | ||
2222 | ret = btrfs_delete_one_dir_name(trans, log, path, di); | 2217 | ret = btrfs_delete_one_dir_name(trans, log, path, di); |
2223 | bytes_del += name_len; | 2218 | bytes_del += name_len; |
2224 | BUG_ON(ret); | 2219 | BUG_ON(ret); |
@@ -2226,7 +2221,11 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2226 | btrfs_release_path(log, path); | 2221 | btrfs_release_path(log, path); |
2227 | di = btrfs_lookup_dir_index_item(trans, log, path, dir->i_ino, | 2222 | di = btrfs_lookup_dir_index_item(trans, log, path, dir->i_ino, |
2228 | index, name, name_len, -1); | 2223 | index, name, name_len, -1); |
2229 | if (di && !IS_ERR(di)) { | 2224 | if (IS_ERR(di)) { |
2225 | err = PTR_ERR(di); | ||
2226 | goto fail; | ||
2227 | } | ||
2228 | if (di) { | ||
2230 | ret = btrfs_delete_one_dir_name(trans, log, path, di); | 2229 | ret = btrfs_delete_one_dir_name(trans, log, path, di); |
2231 | bytes_del += name_len; | 2230 | bytes_del += name_len; |
2232 | BUG_ON(ret); | 2231 | BUG_ON(ret); |
@@ -2244,6 +2243,10 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2244 | btrfs_release_path(log, path); | 2243 | btrfs_release_path(log, path); |
2245 | 2244 | ||
2246 | ret = btrfs_search_slot(trans, log, &key, path, 0, 1); | 2245 | ret = btrfs_search_slot(trans, log, &key, path, 0, 1); |
2246 | if (ret < 0) { | ||
2247 | err = ret; | ||
2248 | goto fail; | ||
2249 | } | ||
2247 | if (ret == 0) { | 2250 | if (ret == 0) { |
2248 | struct btrfs_inode_item *item; | 2251 | struct btrfs_inode_item *item; |
2249 | u64 i_size; | 2252 | u64 i_size; |
@@ -2261,9 +2264,13 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
2261 | ret = 0; | 2264 | ret = 0; |
2262 | btrfs_release_path(log, path); | 2265 | btrfs_release_path(log, path); |
2263 | } | 2266 | } |
2264 | 2267 | fail: | |
2265 | btrfs_free_path(path); | 2268 | btrfs_free_path(path); |
2266 | mutex_unlock(&BTRFS_I(dir)->log_mutex); | 2269 | mutex_unlock(&BTRFS_I(dir)->log_mutex); |
2270 | if (ret == -ENOSPC) { | ||
2271 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
2272 | ret = 0; | ||
2273 | } | ||
2267 | btrfs_end_log_trans(root); | 2274 | btrfs_end_log_trans(root); |
2268 | 2275 | ||
2269 | return 0; | 2276 | return 0; |
@@ -2291,6 +2298,10 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, | |||
2291 | ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, | 2298 | ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, |
2292 | dirid, &index); | 2299 | dirid, &index); |
2293 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 2300 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
2301 | if (ret == -ENOSPC) { | ||
2302 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
2303 | ret = 0; | ||
2304 | } | ||
2294 | btrfs_end_log_trans(root); | 2305 | btrfs_end_log_trans(root); |
2295 | 2306 | ||
2296 | return ret; | 2307 | return ret; |
@@ -2318,7 +2329,8 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, | |||
2318 | else | 2329 | else |
2319 | key.type = BTRFS_DIR_LOG_INDEX_KEY; | 2330 | key.type = BTRFS_DIR_LOG_INDEX_KEY; |
2320 | ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item)); | 2331 | ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item)); |
2321 | BUG_ON(ret); | 2332 | if (ret) |
2333 | return ret; | ||
2322 | 2334 | ||
2323 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], | 2335 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], |
2324 | struct btrfs_dir_log_item); | 2336 | struct btrfs_dir_log_item); |
@@ -2343,6 +2355,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
2343 | struct btrfs_key max_key; | 2355 | struct btrfs_key max_key; |
2344 | struct btrfs_root *log = root->log_root; | 2356 | struct btrfs_root *log = root->log_root; |
2345 | struct extent_buffer *src; | 2357 | struct extent_buffer *src; |
2358 | int err = 0; | ||
2346 | int ret; | 2359 | int ret; |
2347 | int i; | 2360 | int i; |
2348 | int nritems; | 2361 | int nritems; |
@@ -2405,6 +2418,10 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
2405 | ret = overwrite_item(trans, log, dst_path, | 2418 | ret = overwrite_item(trans, log, dst_path, |
2406 | path->nodes[0], path->slots[0], | 2419 | path->nodes[0], path->slots[0], |
2407 | &tmp); | 2420 | &tmp); |
2421 | if (ret) { | ||
2422 | err = ret; | ||
2423 | goto done; | ||
2424 | } | ||
2408 | } | 2425 | } |
2409 | } | 2426 | } |
2410 | btrfs_release_path(root, path); | 2427 | btrfs_release_path(root, path); |
@@ -2432,7 +2449,10 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
2432 | goto done; | 2449 | goto done; |
2433 | ret = overwrite_item(trans, log, dst_path, src, i, | 2450 | ret = overwrite_item(trans, log, dst_path, src, i, |
2434 | &min_key); | 2451 | &min_key); |
2435 | BUG_ON(ret); | 2452 | if (ret) { |
2453 | err = ret; | ||
2454 | goto done; | ||
2455 | } | ||
2436 | } | 2456 | } |
2437 | path->slots[0] = nritems; | 2457 | path->slots[0] = nritems; |
2438 | 2458 | ||
@@ -2454,22 +2474,30 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
2454 | ret = overwrite_item(trans, log, dst_path, | 2474 | ret = overwrite_item(trans, log, dst_path, |
2455 | path->nodes[0], path->slots[0], | 2475 | path->nodes[0], path->slots[0], |
2456 | &tmp); | 2476 | &tmp); |
2457 | 2477 | if (ret) | |
2458 | BUG_ON(ret); | 2478 | err = ret; |
2459 | last_offset = tmp.offset; | 2479 | else |
2480 | last_offset = tmp.offset; | ||
2460 | goto done; | 2481 | goto done; |
2461 | } | 2482 | } |
2462 | } | 2483 | } |
2463 | done: | 2484 | done: |
2464 | *last_offset_ret = last_offset; | ||
2465 | btrfs_release_path(root, path); | 2485 | btrfs_release_path(root, path); |
2466 | btrfs_release_path(log, dst_path); | 2486 | btrfs_release_path(log, dst_path); |
2467 | 2487 | ||
2468 | /* insert the log range keys to indicate where the log is valid */ | 2488 | if (err == 0) { |
2469 | ret = insert_dir_log_key(trans, log, path, key_type, inode->i_ino, | 2489 | *last_offset_ret = last_offset; |
2470 | first_offset, last_offset); | 2490 | /* |
2471 | BUG_ON(ret); | 2491 | * insert the log range keys to indicate where the log |
2472 | return 0; | 2492 | * is valid |
2493 | */ | ||
2494 | ret = insert_dir_log_key(trans, log, path, key_type, | ||
2495 | inode->i_ino, first_offset, | ||
2496 | last_offset); | ||
2497 | if (ret) | ||
2498 | err = ret; | ||
2499 | } | ||
2500 | return err; | ||
2473 | } | 2501 | } |
2474 | 2502 | ||
2475 | /* | 2503 | /* |
@@ -2501,7 +2529,8 @@ again: | |||
2501 | ret = log_dir_items(trans, root, inode, path, | 2529 | ret = log_dir_items(trans, root, inode, path, |
2502 | dst_path, key_type, min_key, | 2530 | dst_path, key_type, min_key, |
2503 | &max_key); | 2531 | &max_key); |
2504 | BUG_ON(ret); | 2532 | if (ret) |
2533 | return ret; | ||
2505 | if (max_key == (u64)-1) | 2534 | if (max_key == (u64)-1) |
2506 | break; | 2535 | break; |
2507 | min_key = max_key + 1; | 2536 | min_key = max_key + 1; |
@@ -2535,8 +2564,8 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
2535 | 2564 | ||
2536 | while (1) { | 2565 | while (1) { |
2537 | ret = btrfs_search_slot(trans, log, &key, path, -1, 1); | 2566 | ret = btrfs_search_slot(trans, log, &key, path, -1, 1); |
2538 | 2567 | BUG_ON(ret == 0); | |
2539 | if (ret != 1) | 2568 | if (ret < 0) |
2540 | break; | 2569 | break; |
2541 | 2570 | ||
2542 | if (path->slots[0] == 0) | 2571 | if (path->slots[0] == 0) |
@@ -2554,7 +2583,7 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
2554 | btrfs_release_path(log, path); | 2583 | btrfs_release_path(log, path); |
2555 | } | 2584 | } |
2556 | btrfs_release_path(log, path); | 2585 | btrfs_release_path(log, path); |
2557 | return 0; | 2586 | return ret; |
2558 | } | 2587 | } |
2559 | 2588 | ||
2560 | static noinline int copy_items(struct btrfs_trans_handle *trans, | 2589 | static noinline int copy_items(struct btrfs_trans_handle *trans, |
@@ -2587,7 +2616,10 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2587 | } | 2616 | } |
2588 | ret = btrfs_insert_empty_items(trans, log, dst_path, | 2617 | ret = btrfs_insert_empty_items(trans, log, dst_path, |
2589 | ins_keys, ins_sizes, nr); | 2618 | ins_keys, ins_sizes, nr); |
2590 | BUG_ON(ret); | 2619 | if (ret) { |
2620 | kfree(ins_data); | ||
2621 | return ret; | ||
2622 | } | ||
2591 | 2623 | ||
2592 | for (i = 0; i < nr; i++, dst_path->slots[0]++) { | 2624 | for (i = 0; i < nr; i++, dst_path->slots[0]++) { |
2593 | dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], | 2625 | dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], |
@@ -2660,16 +2692,17 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
2660 | * we have to do this after the loop above to avoid changing the | 2692 | * we have to do this after the loop above to avoid changing the |
2661 | * log tree while trying to change the log tree. | 2693 | * log tree while trying to change the log tree. |
2662 | */ | 2694 | */ |
2695 | ret = 0; | ||
2663 | while (!list_empty(&ordered_sums)) { | 2696 | while (!list_empty(&ordered_sums)) { |
2664 | struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, | 2697 | struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, |
2665 | struct btrfs_ordered_sum, | 2698 | struct btrfs_ordered_sum, |
2666 | list); | 2699 | list); |
2667 | ret = btrfs_csum_file_blocks(trans, log, sums); | 2700 | if (!ret) |
2668 | BUG_ON(ret); | 2701 | ret = btrfs_csum_file_blocks(trans, log, sums); |
2669 | list_del(&sums->list); | 2702 | list_del(&sums->list); |
2670 | kfree(sums); | 2703 | kfree(sums); |
2671 | } | 2704 | } |
2672 | return 0; | 2705 | return ret; |
2673 | } | 2706 | } |
2674 | 2707 | ||
2675 | /* log a single inode in the tree log. | 2708 | /* log a single inode in the tree log. |
@@ -2697,6 +2730,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
2697 | struct btrfs_root *log = root->log_root; | 2730 | struct btrfs_root *log = root->log_root; |
2698 | struct extent_buffer *src = NULL; | 2731 | struct extent_buffer *src = NULL; |
2699 | u32 size; | 2732 | u32 size; |
2733 | int err = 0; | ||
2700 | int ret; | 2734 | int ret; |
2701 | int nritems; | 2735 | int nritems; |
2702 | int ins_start_slot = 0; | 2736 | int ins_start_slot = 0; |
@@ -2739,7 +2773,10 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
2739 | } else { | 2773 | } else { |
2740 | ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); | 2774 | ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); |
2741 | } | 2775 | } |
2742 | BUG_ON(ret); | 2776 | if (ret) { |
2777 | err = ret; | ||
2778 | goto out_unlock; | ||
2779 | } | ||
2743 | path->keep_locks = 1; | 2780 | path->keep_locks = 1; |
2744 | 2781 | ||
2745 | while (1) { | 2782 | while (1) { |
@@ -2768,7 +2805,10 @@ again: | |||
2768 | 2805 | ||
2769 | ret = copy_items(trans, log, dst_path, src, ins_start_slot, | 2806 | ret = copy_items(trans, log, dst_path, src, ins_start_slot, |
2770 | ins_nr, inode_only); | 2807 | ins_nr, inode_only); |
2771 | BUG_ON(ret); | 2808 | if (ret) { |
2809 | err = ret; | ||
2810 | goto out_unlock; | ||
2811 | } | ||
2772 | ins_nr = 1; | 2812 | ins_nr = 1; |
2773 | ins_start_slot = path->slots[0]; | 2813 | ins_start_slot = path->slots[0]; |
2774 | next_slot: | 2814 | next_slot: |
@@ -2784,7 +2824,10 @@ next_slot: | |||
2784 | ret = copy_items(trans, log, dst_path, src, | 2824 | ret = copy_items(trans, log, dst_path, src, |
2785 | ins_start_slot, | 2825 | ins_start_slot, |
2786 | ins_nr, inode_only); | 2826 | ins_nr, inode_only); |
2787 | BUG_ON(ret); | 2827 | if (ret) { |
2828 | err = ret; | ||
2829 | goto out_unlock; | ||
2830 | } | ||
2788 | ins_nr = 0; | 2831 | ins_nr = 0; |
2789 | } | 2832 | } |
2790 | btrfs_release_path(root, path); | 2833 | btrfs_release_path(root, path); |
@@ -2802,7 +2845,10 @@ next_slot: | |||
2802 | ret = copy_items(trans, log, dst_path, src, | 2845 | ret = copy_items(trans, log, dst_path, src, |
2803 | ins_start_slot, | 2846 | ins_start_slot, |
2804 | ins_nr, inode_only); | 2847 | ins_nr, inode_only); |
2805 | BUG_ON(ret); | 2848 | if (ret) { |
2849 | err = ret; | ||
2850 | goto out_unlock; | ||
2851 | } | ||
2806 | ins_nr = 0; | 2852 | ins_nr = 0; |
2807 | } | 2853 | } |
2808 | WARN_ON(ins_nr); | 2854 | WARN_ON(ins_nr); |
@@ -2810,14 +2856,18 @@ next_slot: | |||
2810 | btrfs_release_path(root, path); | 2856 | btrfs_release_path(root, path); |
2811 | btrfs_release_path(log, dst_path); | 2857 | btrfs_release_path(log, dst_path); |
2812 | ret = log_directory_changes(trans, root, inode, path, dst_path); | 2858 | ret = log_directory_changes(trans, root, inode, path, dst_path); |
2813 | BUG_ON(ret); | 2859 | if (ret) { |
2860 | err = ret; | ||
2861 | goto out_unlock; | ||
2862 | } | ||
2814 | } | 2863 | } |
2815 | BTRFS_I(inode)->logged_trans = trans->transid; | 2864 | BTRFS_I(inode)->logged_trans = trans->transid; |
2865 | out_unlock: | ||
2816 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 2866 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
2817 | 2867 | ||
2818 | btrfs_free_path(path); | 2868 | btrfs_free_path(path); |
2819 | btrfs_free_path(dst_path); | 2869 | btrfs_free_path(dst_path); |
2820 | return 0; | 2870 | return err; |
2821 | } | 2871 | } |
2822 | 2872 | ||
2823 | /* | 2873 | /* |
@@ -2942,10 +2992,13 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
2942 | goto end_no_trans; | 2992 | goto end_no_trans; |
2943 | } | 2993 | } |
2944 | 2994 | ||
2945 | start_log_trans(trans, root); | 2995 | ret = start_log_trans(trans, root); |
2996 | if (ret) | ||
2997 | goto end_trans; | ||
2946 | 2998 | ||
2947 | ret = btrfs_log_inode(trans, root, inode, inode_only); | 2999 | ret = btrfs_log_inode(trans, root, inode, inode_only); |
2948 | BUG_ON(ret); | 3000 | if (ret) |
3001 | goto end_trans; | ||
2949 | 3002 | ||
2950 | /* | 3003 | /* |
2951 | * for regular files, if its inode is already on disk, we don't | 3004 | * for regular files, if its inode is already on disk, we don't |
@@ -2955,8 +3008,10 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
2955 | */ | 3008 | */ |
2956 | if (S_ISREG(inode->i_mode) && | 3009 | if (S_ISREG(inode->i_mode) && |
2957 | BTRFS_I(inode)->generation <= last_committed && | 3010 | BTRFS_I(inode)->generation <= last_committed && |
2958 | BTRFS_I(inode)->last_unlink_trans <= last_committed) | 3011 | BTRFS_I(inode)->last_unlink_trans <= last_committed) { |
2959 | goto no_parent; | 3012 | ret = 0; |
3013 | goto end_trans; | ||
3014 | } | ||
2960 | 3015 | ||
2961 | inode_only = LOG_INODE_EXISTS; | 3016 | inode_only = LOG_INODE_EXISTS; |
2962 | while (1) { | 3017 | while (1) { |
@@ -2970,15 +3025,21 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
2970 | if (BTRFS_I(inode)->generation > | 3025 | if (BTRFS_I(inode)->generation > |
2971 | root->fs_info->last_trans_committed) { | 3026 | root->fs_info->last_trans_committed) { |
2972 | ret = btrfs_log_inode(trans, root, inode, inode_only); | 3027 | ret = btrfs_log_inode(trans, root, inode, inode_only); |
2973 | BUG_ON(ret); | 3028 | if (ret) |
3029 | goto end_trans; | ||
2974 | } | 3030 | } |
2975 | if (IS_ROOT(parent)) | 3031 | if (IS_ROOT(parent)) |
2976 | break; | 3032 | break; |
2977 | 3033 | ||
2978 | parent = parent->d_parent; | 3034 | parent = parent->d_parent; |
2979 | } | 3035 | } |
2980 | no_parent: | ||
2981 | ret = 0; | 3036 | ret = 0; |
3037 | end_trans: | ||
3038 | if (ret < 0) { | ||
3039 | BUG_ON(ret != -ENOSPC); | ||
3040 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
3041 | ret = 1; | ||
3042 | } | ||
2982 | btrfs_end_log_trans(root); | 3043 | btrfs_end_log_trans(root); |
2983 | end_no_trans: | 3044 | end_no_trans: |
2984 | return ret; | 3045 | return ret; |
@@ -3020,7 +3081,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) | |||
3020 | path = btrfs_alloc_path(); | 3081 | path = btrfs_alloc_path(); |
3021 | BUG_ON(!path); | 3082 | BUG_ON(!path); |
3022 | 3083 | ||
3023 | trans = btrfs_start_transaction(fs_info->tree_root, 1); | 3084 | trans = btrfs_start_transaction(fs_info->tree_root, 0); |
3024 | 3085 | ||
3025 | wc.trans = trans; | 3086 | wc.trans = trans; |
3026 | wc.pin = 1; | 3087 | wc.pin = 1; |
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 0776eacb5083..3dfae84c8cc8 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h | |||
@@ -25,6 +25,8 @@ | |||
25 | int btrfs_sync_log(struct btrfs_trans_handle *trans, | 25 | int btrfs_sync_log(struct btrfs_trans_handle *trans, |
26 | struct btrfs_root *root); | 26 | struct btrfs_root *root); |
27 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); | 27 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); |
28 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | ||
29 | struct btrfs_fs_info *fs_info); | ||
28 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); | 30 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); |
29 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | 31 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, |
30 | struct btrfs_root *root, struct dentry *dentry); | 32 | struct btrfs_root *root, struct dentry *dentry); |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8db7b14bbae8..d6e3af8be95b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
@@ -1097,7 +1097,7 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, | |||
1097 | if (!path) | 1097 | if (!path) |
1098 | return -ENOMEM; | 1098 | return -ENOMEM; |
1099 | 1099 | ||
1100 | trans = btrfs_start_transaction(root, 1); | 1100 | trans = btrfs_start_transaction(root, 0); |
1101 | key.objectid = BTRFS_DEV_ITEMS_OBJECTID; | 1101 | key.objectid = BTRFS_DEV_ITEMS_OBJECTID; |
1102 | key.type = BTRFS_DEV_ITEM_KEY; | 1102 | key.type = BTRFS_DEV_ITEM_KEY; |
1103 | key.offset = device->devid; | 1103 | key.offset = device->devid; |
@@ -1486,7 +1486,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
1486 | goto error; | 1486 | goto error; |
1487 | } | 1487 | } |
1488 | 1488 | ||
1489 | trans = btrfs_start_transaction(root, 1); | 1489 | trans = btrfs_start_transaction(root, 0); |
1490 | lock_chunks(root); | 1490 | lock_chunks(root); |
1491 | 1491 | ||
1492 | device->barriers = 1; | 1492 | device->barriers = 1; |
@@ -1751,9 +1751,10 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
1751 | 1751 | ||
1752 | /* step one, relocate all the extents inside this chunk */ | 1752 | /* step one, relocate all the extents inside this chunk */ |
1753 | ret = btrfs_relocate_block_group(extent_root, chunk_offset); | 1753 | ret = btrfs_relocate_block_group(extent_root, chunk_offset); |
1754 | BUG_ON(ret); | 1754 | if (ret) |
1755 | return ret; | ||
1755 | 1756 | ||
1756 | trans = btrfs_start_transaction(root, 1); | 1757 | trans = btrfs_start_transaction(root, 0); |
1757 | BUG_ON(!trans); | 1758 | BUG_ON(!trans); |
1758 | 1759 | ||
1759 | lock_chunks(root); | 1760 | lock_chunks(root); |
@@ -1925,7 +1926,7 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
1925 | break; | 1926 | break; |
1926 | BUG_ON(ret); | 1927 | BUG_ON(ret); |
1927 | 1928 | ||
1928 | trans = btrfs_start_transaction(dev_root, 1); | 1929 | trans = btrfs_start_transaction(dev_root, 0); |
1929 | BUG_ON(!trans); | 1930 | BUG_ON(!trans); |
1930 | 1931 | ||
1931 | ret = btrfs_grow_device(trans, device, old_size); | 1932 | ret = btrfs_grow_device(trans, device, old_size); |
@@ -2094,11 +2095,7 @@ again: | |||
2094 | } | 2095 | } |
2095 | 2096 | ||
2096 | /* Shrinking succeeded, else we would be at "done". */ | 2097 | /* Shrinking succeeded, else we would be at "done". */ |
2097 | trans = btrfs_start_transaction(root, 1); | 2098 | trans = btrfs_start_transaction(root, 0); |
2098 | if (!trans) { | ||
2099 | ret = -ENOMEM; | ||
2100 | goto done; | ||
2101 | } | ||
2102 | lock_chunks(root); | 2099 | lock_chunks(root); |
2103 | 2100 | ||
2104 | device->disk_total_bytes = new_size; | 2101 | device->disk_total_bytes = new_size; |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 59acd3eb288a..88ecbb215878 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
@@ -154,15 +154,10 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, | |||
154 | if (trans) | 154 | if (trans) |
155 | return do_setxattr(trans, inode, name, value, size, flags); | 155 | return do_setxattr(trans, inode, name, value, size, flags); |
156 | 156 | ||
157 | ret = btrfs_reserve_metadata_space(root, 2); | 157 | trans = btrfs_start_transaction(root, 2); |
158 | if (ret) | 158 | if (IS_ERR(trans)) |
159 | return ret; | 159 | return PTR_ERR(trans); |
160 | 160 | ||
161 | trans = btrfs_start_transaction(root, 1); | ||
162 | if (!trans) { | ||
163 | ret = -ENOMEM; | ||
164 | goto out; | ||
165 | } | ||
166 | btrfs_set_trans_block_group(trans, inode); | 161 | btrfs_set_trans_block_group(trans, inode); |
167 | 162 | ||
168 | ret = do_setxattr(trans, inode, name, value, size, flags); | 163 | ret = do_setxattr(trans, inode, name, value, size, flags); |
@@ -174,7 +169,6 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, | |||
174 | BUG_ON(ret); | 169 | BUG_ON(ret); |
175 | out: | 170 | out: |
176 | btrfs_end_transaction_throttle(trans, root); | 171 | btrfs_end_transaction_throttle(trans, root); |
177 | btrfs_unreserve_metadata_space(root, 2); | ||
178 | return ret; | 172 | return ret; |
179 | } | 173 | } |
180 | 174 | ||
diff --git a/fs/direct-io.c b/fs/direct-io.c index e82adc2debb7..da111aacb46e 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -82,6 +82,8 @@ struct dio { | |||
82 | int reap_counter; /* rate limit reaping */ | 82 | int reap_counter; /* rate limit reaping */ |
83 | get_block_t *get_block; /* block mapping function */ | 83 | get_block_t *get_block; /* block mapping function */ |
84 | dio_iodone_t *end_io; /* IO completion function */ | 84 | dio_iodone_t *end_io; /* IO completion function */ |
85 | dio_submit_t *submit_io; /* IO submition function */ | ||
86 | loff_t logical_offset_in_bio; /* current first logical block in bio */ | ||
85 | sector_t final_block_in_bio; /* current final block in bio + 1 */ | 87 | sector_t final_block_in_bio; /* current final block in bio + 1 */ |
86 | sector_t next_block_for_io; /* next block to be put under IO, | 88 | sector_t next_block_for_io; /* next block to be put under IO, |
87 | in dio_blocks units */ | 89 | in dio_blocks units */ |
@@ -96,6 +98,7 @@ struct dio { | |||
96 | unsigned cur_page_offset; /* Offset into it, in bytes */ | 98 | unsigned cur_page_offset; /* Offset into it, in bytes */ |
97 | unsigned cur_page_len; /* Nr of bytes at cur_page_offset */ | 99 | unsigned cur_page_len; /* Nr of bytes at cur_page_offset */ |
98 | sector_t cur_page_block; /* Where it starts */ | 100 | sector_t cur_page_block; /* Where it starts */ |
101 | loff_t cur_page_fs_offset; /* Offset in file */ | ||
99 | 102 | ||
100 | /* BIO completion state */ | 103 | /* BIO completion state */ |
101 | spinlock_t bio_lock; /* protects BIO fields below */ | 104 | spinlock_t bio_lock; /* protects BIO fields below */ |
@@ -300,6 +303,26 @@ static void dio_bio_end_io(struct bio *bio, int error) | |||
300 | spin_unlock_irqrestore(&dio->bio_lock, flags); | 303 | spin_unlock_irqrestore(&dio->bio_lock, flags); |
301 | } | 304 | } |
302 | 305 | ||
306 | /** | ||
307 | * dio_end_io - handle the end io action for the given bio | ||
308 | * @bio: The direct io bio thats being completed | ||
309 | * @error: Error if there was one | ||
310 | * | ||
311 | * This is meant to be called by any filesystem that uses their own dio_submit_t | ||
312 | * so that the DIO specific endio actions are dealt with after the filesystem | ||
313 | * has done it's completion work. | ||
314 | */ | ||
315 | void dio_end_io(struct bio *bio, int error) | ||
316 | { | ||
317 | struct dio *dio = bio->bi_private; | ||
318 | |||
319 | if (dio->is_async) | ||
320 | dio_bio_end_aio(bio, error); | ||
321 | else | ||
322 | dio_bio_end_io(bio, error); | ||
323 | } | ||
324 | EXPORT_SYMBOL_GPL(dio_end_io); | ||
325 | |||
303 | static int | 326 | static int |
304 | dio_bio_alloc(struct dio *dio, struct block_device *bdev, | 327 | dio_bio_alloc(struct dio *dio, struct block_device *bdev, |
305 | sector_t first_sector, int nr_vecs) | 328 | sector_t first_sector, int nr_vecs) |
@@ -316,6 +339,7 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev, | |||
316 | bio->bi_end_io = dio_bio_end_io; | 339 | bio->bi_end_io = dio_bio_end_io; |
317 | 340 | ||
318 | dio->bio = bio; | 341 | dio->bio = bio; |
342 | dio->logical_offset_in_bio = dio->cur_page_fs_offset; | ||
319 | return 0; | 343 | return 0; |
320 | } | 344 | } |
321 | 345 | ||
@@ -340,10 +364,15 @@ static void dio_bio_submit(struct dio *dio) | |||
340 | if (dio->is_async && dio->rw == READ) | 364 | if (dio->is_async && dio->rw == READ) |
341 | bio_set_pages_dirty(bio); | 365 | bio_set_pages_dirty(bio); |
342 | 366 | ||
343 | submit_bio(dio->rw, bio); | 367 | if (dio->submit_io) |
368 | dio->submit_io(dio->rw, bio, dio->inode, | ||
369 | dio->logical_offset_in_bio); | ||
370 | else | ||
371 | submit_bio(dio->rw, bio); | ||
344 | 372 | ||
345 | dio->bio = NULL; | 373 | dio->bio = NULL; |
346 | dio->boundary = 0; | 374 | dio->boundary = 0; |
375 | dio->logical_offset_in_bio = 0; | ||
347 | } | 376 | } |
348 | 377 | ||
349 | /* | 378 | /* |
@@ -603,10 +632,26 @@ static int dio_send_cur_page(struct dio *dio) | |||
603 | int ret = 0; | 632 | int ret = 0; |
604 | 633 | ||
605 | if (dio->bio) { | 634 | if (dio->bio) { |
635 | loff_t cur_offset = dio->block_in_file << dio->blkbits; | ||
636 | loff_t bio_next_offset = dio->logical_offset_in_bio + | ||
637 | dio->bio->bi_size; | ||
638 | |||
606 | /* | 639 | /* |
607 | * See whether this new request is contiguous with the old | 640 | * See whether this new request is contiguous with the old. |
641 | * | ||
642 | * Btrfs cannot handl having logically non-contiguous requests | ||
643 | * submitted. For exmple if you have | ||
644 | * | ||
645 | * Logical: [0-4095][HOLE][8192-12287] | ||
646 | * Phyiscal: [0-4095] [4096-8181] | ||
647 | * | ||
648 | * We cannot submit those pages together as one BIO. So if our | ||
649 | * current logical offset in the file does not equal what would | ||
650 | * be the next logical offset in the bio, submit the bio we | ||
651 | * have. | ||
608 | */ | 652 | */ |
609 | if (dio->final_block_in_bio != dio->cur_page_block) | 653 | if (dio->final_block_in_bio != dio->cur_page_block || |
654 | cur_offset != bio_next_offset) | ||
610 | dio_bio_submit(dio); | 655 | dio_bio_submit(dio); |
611 | /* | 656 | /* |
612 | * Submit now if the underlying fs is about to perform a | 657 | * Submit now if the underlying fs is about to perform a |
@@ -701,6 +746,7 @@ submit_page_section(struct dio *dio, struct page *page, | |||
701 | dio->cur_page_offset = offset; | 746 | dio->cur_page_offset = offset; |
702 | dio->cur_page_len = len; | 747 | dio->cur_page_len = len; |
703 | dio->cur_page_block = blocknr; | 748 | dio->cur_page_block = blocknr; |
749 | dio->cur_page_fs_offset = dio->block_in_file << dio->blkbits; | ||
704 | out: | 750 | out: |
705 | return ret; | 751 | return ret; |
706 | } | 752 | } |
@@ -935,7 +981,7 @@ static ssize_t | |||
935 | direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | 981 | direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, |
936 | const struct iovec *iov, loff_t offset, unsigned long nr_segs, | 982 | const struct iovec *iov, loff_t offset, unsigned long nr_segs, |
937 | unsigned blkbits, get_block_t get_block, dio_iodone_t end_io, | 983 | unsigned blkbits, get_block_t get_block, dio_iodone_t end_io, |
938 | struct dio *dio) | 984 | dio_submit_t submit_io, struct dio *dio) |
939 | { | 985 | { |
940 | unsigned long user_addr; | 986 | unsigned long user_addr; |
941 | unsigned long flags; | 987 | unsigned long flags; |
@@ -952,6 +998,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
952 | 998 | ||
953 | dio->get_block = get_block; | 999 | dio->get_block = get_block; |
954 | dio->end_io = end_io; | 1000 | dio->end_io = end_io; |
1001 | dio->submit_io = submit_io; | ||
955 | dio->final_block_in_bio = -1; | 1002 | dio->final_block_in_bio = -1; |
956 | dio->next_block_for_io = -1; | 1003 | dio->next_block_for_io = -1; |
957 | 1004 | ||
@@ -1008,7 +1055,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
1008 | } | 1055 | } |
1009 | } /* end iovec loop */ | 1056 | } /* end iovec loop */ |
1010 | 1057 | ||
1011 | if (ret == -ENOTBLK && (rw & WRITE)) { | 1058 | if (ret == -ENOTBLK) { |
1012 | /* | 1059 | /* |
1013 | * The remaining part of the request will be | 1060 | * The remaining part of the request will be |
1014 | * be handled by buffered I/O when we return | 1061 | * be handled by buffered I/O when we return |
@@ -1110,7 +1157,7 @@ ssize_t | |||
1110 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | 1157 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, |
1111 | struct block_device *bdev, const struct iovec *iov, loff_t offset, | 1158 | struct block_device *bdev, const struct iovec *iov, loff_t offset, |
1112 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, | 1159 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, |
1113 | int flags) | 1160 | dio_submit_t submit_io, int flags) |
1114 | { | 1161 | { |
1115 | int seg; | 1162 | int seg; |
1116 | size_t size; | 1163 | size_t size; |
@@ -1197,7 +1244,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1197 | (end > i_size_read(inode))); | 1244 | (end > i_size_read(inode))); |
1198 | 1245 | ||
1199 | retval = direct_io_worker(rw, iocb, inode, iov, offset, | 1246 | retval = direct_io_worker(rw, iocb, inode, iov, offset, |
1200 | nr_segs, blkbits, get_block, end_io, dio); | 1247 | nr_segs, blkbits, get_block, end_io, |
1248 | submit_io, dio); | ||
1201 | 1249 | ||
1202 | /* | 1250 | /* |
1203 | * In case of error extending write may have instantiated a few | 1251 | * In case of error extending write may have instantiated a few |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 9682d52d1507..85e823adcd4a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -2251,10 +2251,15 @@ static inline int xip_truncate_page(struct address_space *mapping, loff_t from) | |||
2251 | #endif | 2251 | #endif |
2252 | 2252 | ||
2253 | #ifdef CONFIG_BLOCK | 2253 | #ifdef CONFIG_BLOCK |
2254 | struct bio; | ||
2255 | typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode, | ||
2256 | loff_t file_offset); | ||
2257 | void dio_end_io(struct bio *bio, int error); | ||
2258 | |||
2254 | ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | 2259 | ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, |
2255 | struct block_device *bdev, const struct iovec *iov, loff_t offset, | 2260 | struct block_device *bdev, const struct iovec *iov, loff_t offset, |
2256 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, | 2261 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, |
2257 | int lock_type); | 2262 | dio_submit_t submit_io, int lock_type); |
2258 | 2263 | ||
2259 | enum { | 2264 | enum { |
2260 | /* need locking between buffered and direct access */ | 2265 | /* need locking between buffered and direct access */ |
@@ -2270,7 +2275,7 @@ static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, | |||
2270 | dio_iodone_t end_io) | 2275 | dio_iodone_t end_io) |
2271 | { | 2276 | { |
2272 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, | 2277 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, |
2273 | nr_segs, get_block, end_io, | 2278 | nr_segs, get_block, end_io, NULL, |
2274 | DIO_LOCKING | DIO_SKIP_HOLES); | 2279 | DIO_LOCKING | DIO_SKIP_HOLES); |
2275 | } | 2280 | } |
2276 | 2281 | ||
@@ -2280,7 +2285,7 @@ static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb, | |||
2280 | dio_iodone_t end_io) | 2285 | dio_iodone_t end_io) |
2281 | { | 2286 | { |
2282 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, | 2287 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, |
2283 | nr_segs, get_block, end_io, 0); | 2288 | nr_segs, get_block, end_io, NULL, 0); |
2284 | } | 2289 | } |
2285 | #endif | 2290 | #endif |
2286 | 2291 | ||
diff --git a/mm/filemap.c b/mm/filemap.c index 35e12d186566..45a2d18df849 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
@@ -1275,7 +1275,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
1275 | { | 1275 | { |
1276 | struct file *filp = iocb->ki_filp; | 1276 | struct file *filp = iocb->ki_filp; |
1277 | ssize_t retval; | 1277 | ssize_t retval; |
1278 | unsigned long seg; | 1278 | unsigned long seg = 0; |
1279 | size_t count; | 1279 | size_t count; |
1280 | loff_t *ppos = &iocb->ki_pos; | 1280 | loff_t *ppos = &iocb->ki_pos; |
1281 | 1281 | ||
@@ -1302,21 +1302,47 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
1302 | retval = mapping->a_ops->direct_IO(READ, iocb, | 1302 | retval = mapping->a_ops->direct_IO(READ, iocb, |
1303 | iov, pos, nr_segs); | 1303 | iov, pos, nr_segs); |
1304 | } | 1304 | } |
1305 | if (retval > 0) | 1305 | if (retval > 0) { |
1306 | *ppos = pos + retval; | 1306 | *ppos = pos + retval; |
1307 | if (retval) { | 1307 | count -= retval; |
1308 | } | ||
1309 | |||
1310 | /* | ||
1311 | * Btrfs can have a short DIO read if we encounter | ||
1312 | * compressed extents, so if there was an error, or if | ||
1313 | * we've already read everything we wanted to, or if | ||
1314 | * there was a short read because we hit EOF, go ahead | ||
1315 | * and return. Otherwise fallthrough to buffered io for | ||
1316 | * the rest of the read. | ||
1317 | */ | ||
1318 | if (retval < 0 || !count || *ppos >= size) { | ||
1308 | file_accessed(filp); | 1319 | file_accessed(filp); |
1309 | goto out; | 1320 | goto out; |
1310 | } | 1321 | } |
1311 | } | 1322 | } |
1312 | } | 1323 | } |
1313 | 1324 | ||
1325 | count = retval; | ||
1314 | for (seg = 0; seg < nr_segs; seg++) { | 1326 | for (seg = 0; seg < nr_segs; seg++) { |
1315 | read_descriptor_t desc; | 1327 | read_descriptor_t desc; |
1328 | loff_t offset = 0; | ||
1329 | |||
1330 | /* | ||
1331 | * If we did a short DIO read we need to skip the section of the | ||
1332 | * iov that we've already read data into. | ||
1333 | */ | ||
1334 | if (count) { | ||
1335 | if (count > iov[seg].iov_len) { | ||
1336 | count -= iov[seg].iov_len; | ||
1337 | continue; | ||
1338 | } | ||
1339 | offset = count; | ||
1340 | count = 0; | ||
1341 | } | ||
1316 | 1342 | ||
1317 | desc.written = 0; | 1343 | desc.written = 0; |
1318 | desc.arg.buf = iov[seg].iov_base; | 1344 | desc.arg.buf = iov[seg].iov_base + offset; |
1319 | desc.count = iov[seg].iov_len; | 1345 | desc.count = iov[seg].iov_len - offset; |
1320 | if (desc.count == 0) | 1346 | if (desc.count == 0) |
1321 | continue; | 1347 | continue; |
1322 | desc.error = 0; | 1348 | desc.error = 0; |