diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-27 13:43:44 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-05-27 13:43:44 -0400 |
| commit | 105a048a4f35f7a74c7cc20b36dd83658b6ec232 (patch) | |
| tree | 043b1110cda0042ba35d8aae59382bb094d0af3f | |
| parent | 00b9b0af5887fed54e899e3b7f5c2ccf5e739def (diff) | |
| parent | 9aeead73782c4b8e2a91def36dbf95db28605c95 (diff) | |
Merge git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable
* git://git.kernel.org/pub/scm/linux/kernel/git/mason/btrfs-unstable: (27 commits)
Btrfs: add more error checking to btrfs_dirty_inode
Btrfs: allow unaligned DIO
Btrfs: drop verbose enospc printk
Btrfs: Fix block generation verification race
Btrfs: fix preallocation and nodatacow checks in O_DIRECT
Btrfs: avoid ENOSPC errors in btrfs_dirty_inode
Btrfs: move O_DIRECT space reservation to btrfs_direct_IO
Btrfs: rework O_DIRECT enospc handling
Btrfs: use async helpers for DIO write checksumming
Btrfs: don't walk around with task->state != TASK_RUNNING
Btrfs: do aio_write instead of write
Btrfs: add basic DIO read/write support
direct-io: do not merge logically non-contiguous requests
direct-io: add a hook for the fs to provide its own submit_bio function
fs: allow short direct-io reads to be completed via buffered IO
Btrfs: Metadata ENOSPC handling for balance
Btrfs: Pre-allocate space for data relocation
Btrfs: Metadata ENOSPC handling for tree log
Btrfs: Metadata reservation for orphan inodes
Btrfs: Introduce global metadata reservation
...
31 files changed, 5066 insertions, 2740 deletions
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c index 462859a30141..7ec14097fef1 100644 --- a/fs/btrfs/async-thread.c +++ b/fs/btrfs/async-thread.c | |||
| @@ -377,6 +377,7 @@ again: | |||
| 377 | if (!list_empty(&worker->pending) || | 377 | if (!list_empty(&worker->pending) || |
| 378 | !list_empty(&worker->prio_pending)) { | 378 | !list_empty(&worker->prio_pending)) { |
| 379 | spin_unlock_irq(&worker->lock); | 379 | spin_unlock_irq(&worker->lock); |
| 380 | set_current_state(TASK_RUNNING); | ||
| 380 | goto again; | 381 | goto again; |
| 381 | } | 382 | } |
| 382 | 383 | ||
diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h index 7a4dee199832..6ad63f17eca0 100644 --- a/fs/btrfs/btrfs_inode.h +++ b/fs/btrfs/btrfs_inode.h | |||
| @@ -137,8 +137,8 @@ struct btrfs_inode { | |||
| 137 | * of extent items we've reserved metadata for. | 137 | * of extent items we've reserved metadata for. |
| 138 | */ | 138 | */ |
| 139 | spinlock_t accounting_lock; | 139 | spinlock_t accounting_lock; |
| 140 | atomic_t outstanding_extents; | ||
| 140 | int reserved_extents; | 141 | int reserved_extents; |
| 141 | int outstanding_extents; | ||
| 142 | 142 | ||
| 143 | /* | 143 | /* |
| 144 | * ordered_data_close is set by truncate when a file that used | 144 | * ordered_data_close is set by truncate when a file that used |
| @@ -151,6 +151,7 @@ struct btrfs_inode { | |||
| 151 | * of these. | 151 | * of these. |
| 152 | */ | 152 | */ |
| 153 | unsigned ordered_data_close:1; | 153 | unsigned ordered_data_close:1; |
| 154 | unsigned orphan_meta_reserved:1; | ||
| 154 | unsigned dummy_inode:1; | 155 | unsigned dummy_inode:1; |
| 155 | 156 | ||
| 156 | /* | 157 | /* |
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 6795a713b205..0d1d966b0fe4 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c | |||
| @@ -280,7 +280,8 @@ int btrfs_block_can_be_shared(struct btrfs_root *root, | |||
| 280 | static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, | 280 | static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, |
| 281 | struct btrfs_root *root, | 281 | struct btrfs_root *root, |
| 282 | struct extent_buffer *buf, | 282 | struct extent_buffer *buf, |
| 283 | struct extent_buffer *cow) | 283 | struct extent_buffer *cow, |
| 284 | int *last_ref) | ||
| 284 | { | 285 | { |
| 285 | u64 refs; | 286 | u64 refs; |
| 286 | u64 owner; | 287 | u64 owner; |
| @@ -366,6 +367,7 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans, | |||
| 366 | BUG_ON(ret); | 367 | BUG_ON(ret); |
| 367 | } | 368 | } |
| 368 | clean_tree_block(trans, root, buf); | 369 | clean_tree_block(trans, root, buf); |
| 370 | *last_ref = 1; | ||
| 369 | } | 371 | } |
| 370 | return 0; | 372 | return 0; |
| 371 | } | 373 | } |
| @@ -392,6 +394,7 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
| 392 | struct btrfs_disk_key disk_key; | 394 | struct btrfs_disk_key disk_key; |
| 393 | struct extent_buffer *cow; | 395 | struct extent_buffer *cow; |
| 394 | int level; | 396 | int level; |
| 397 | int last_ref = 0; | ||
| 395 | int unlock_orig = 0; | 398 | int unlock_orig = 0; |
| 396 | u64 parent_start; | 399 | u64 parent_start; |
| 397 | 400 | ||
| @@ -442,7 +445,10 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
| 442 | (unsigned long)btrfs_header_fsid(cow), | 445 | (unsigned long)btrfs_header_fsid(cow), |
| 443 | BTRFS_FSID_SIZE); | 446 | BTRFS_FSID_SIZE); |
| 444 | 447 | ||
| 445 | update_ref_for_cow(trans, root, buf, cow); | 448 | update_ref_for_cow(trans, root, buf, cow, &last_ref); |
| 449 | |||
| 450 | if (root->ref_cows) | ||
| 451 | btrfs_reloc_cow_block(trans, root, buf, cow); | ||
| 446 | 452 | ||
| 447 | if (buf == root->node) { | 453 | if (buf == root->node) { |
| 448 | WARN_ON(parent && parent != buf); | 454 | WARN_ON(parent && parent != buf); |
| @@ -457,8 +463,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
| 457 | extent_buffer_get(cow); | 463 | extent_buffer_get(cow); |
| 458 | spin_unlock(&root->node_lock); | 464 | spin_unlock(&root->node_lock); |
| 459 | 465 | ||
| 460 | btrfs_free_tree_block(trans, root, buf->start, buf->len, | 466 | btrfs_free_tree_block(trans, root, buf, parent_start, |
| 461 | parent_start, root->root_key.objectid, level); | 467 | last_ref); |
| 462 | free_extent_buffer(buf); | 468 | free_extent_buffer(buf); |
| 463 | add_root_to_dirty_list(root); | 469 | add_root_to_dirty_list(root); |
| 464 | } else { | 470 | } else { |
| @@ -473,8 +479,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans, | |||
| 473 | btrfs_set_node_ptr_generation(parent, parent_slot, | 479 | btrfs_set_node_ptr_generation(parent, parent_slot, |
| 474 | trans->transid); | 480 | trans->transid); |
| 475 | btrfs_mark_buffer_dirty(parent); | 481 | btrfs_mark_buffer_dirty(parent); |
| 476 | btrfs_free_tree_block(trans, root, buf->start, buf->len, | 482 | btrfs_free_tree_block(trans, root, buf, parent_start, |
| 477 | parent_start, root->root_key.objectid, level); | 483 | last_ref); |
| 478 | } | 484 | } |
| 479 | if (unlock_orig) | 485 | if (unlock_orig) |
| 480 | btrfs_tree_unlock(buf); | 486 | btrfs_tree_unlock(buf); |
| @@ -949,6 +955,22 @@ int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | |||
| 949 | return bin_search(eb, key, level, slot); | 955 | return bin_search(eb, key, level, slot); |
| 950 | } | 956 | } |
| 951 | 957 | ||
| 958 | static void root_add_used(struct btrfs_root *root, u32 size) | ||
| 959 | { | ||
| 960 | spin_lock(&root->accounting_lock); | ||
| 961 | btrfs_set_root_used(&root->root_item, | ||
| 962 | btrfs_root_used(&root->root_item) + size); | ||
| 963 | spin_unlock(&root->accounting_lock); | ||
| 964 | } | ||
| 965 | |||
| 966 | static void root_sub_used(struct btrfs_root *root, u32 size) | ||
| 967 | { | ||
| 968 | spin_lock(&root->accounting_lock); | ||
| 969 | btrfs_set_root_used(&root->root_item, | ||
| 970 | btrfs_root_used(&root->root_item) - size); | ||
| 971 | spin_unlock(&root->accounting_lock); | ||
| 972 | } | ||
| 973 | |||
| 952 | /* given a node and slot number, this reads the blocks it points to. The | 974 | /* given a node and slot number, this reads the blocks it points to. The |
| 953 | * extent buffer is returned with a reference taken (but unlocked). | 975 | * extent buffer is returned with a reference taken (but unlocked). |
| 954 | * NULL is returned on error. | 976 | * NULL is returned on error. |
| @@ -1019,7 +1041,11 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
| 1019 | btrfs_tree_lock(child); | 1041 | btrfs_tree_lock(child); |
| 1020 | btrfs_set_lock_blocking(child); | 1042 | btrfs_set_lock_blocking(child); |
| 1021 | ret = btrfs_cow_block(trans, root, child, mid, 0, &child); | 1043 | ret = btrfs_cow_block(trans, root, child, mid, 0, &child); |
| 1022 | BUG_ON(ret); | 1044 | if (ret) { |
| 1045 | btrfs_tree_unlock(child); | ||
| 1046 | free_extent_buffer(child); | ||
| 1047 | goto enospc; | ||
| 1048 | } | ||
| 1023 | 1049 | ||
| 1024 | spin_lock(&root->node_lock); | 1050 | spin_lock(&root->node_lock); |
| 1025 | root->node = child; | 1051 | root->node = child; |
| @@ -1034,11 +1060,12 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
| 1034 | btrfs_tree_unlock(mid); | 1060 | btrfs_tree_unlock(mid); |
| 1035 | /* once for the path */ | 1061 | /* once for the path */ |
| 1036 | free_extent_buffer(mid); | 1062 | free_extent_buffer(mid); |
| 1037 | ret = btrfs_free_tree_block(trans, root, mid->start, mid->len, | 1063 | |
| 1038 | 0, root->root_key.objectid, level); | 1064 | root_sub_used(root, mid->len); |
| 1065 | btrfs_free_tree_block(trans, root, mid, 0, 1); | ||
| 1039 | /* once for the root ptr */ | 1066 | /* once for the root ptr */ |
| 1040 | free_extent_buffer(mid); | 1067 | free_extent_buffer(mid); |
| 1041 | return ret; | 1068 | return 0; |
| 1042 | } | 1069 | } |
| 1043 | if (btrfs_header_nritems(mid) > | 1070 | if (btrfs_header_nritems(mid) > |
| 1044 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) | 1071 | BTRFS_NODEPTRS_PER_BLOCK(root) / 4) |
| @@ -1088,23 +1115,16 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
| 1088 | if (wret < 0 && wret != -ENOSPC) | 1115 | if (wret < 0 && wret != -ENOSPC) |
| 1089 | ret = wret; | 1116 | ret = wret; |
| 1090 | if (btrfs_header_nritems(right) == 0) { | 1117 | if (btrfs_header_nritems(right) == 0) { |
| 1091 | u64 bytenr = right->start; | ||
| 1092 | u32 blocksize = right->len; | ||
| 1093 | |||
| 1094 | clean_tree_block(trans, root, right); | 1118 | clean_tree_block(trans, root, right); |
| 1095 | btrfs_tree_unlock(right); | 1119 | btrfs_tree_unlock(right); |
| 1096 | free_extent_buffer(right); | ||
| 1097 | right = NULL; | ||
| 1098 | wret = del_ptr(trans, root, path, level + 1, pslot + | 1120 | wret = del_ptr(trans, root, path, level + 1, pslot + |
| 1099 | 1); | 1121 | 1); |
| 1100 | if (wret) | 1122 | if (wret) |
| 1101 | ret = wret; | 1123 | ret = wret; |
| 1102 | wret = btrfs_free_tree_block(trans, root, | 1124 | root_sub_used(root, right->len); |
| 1103 | bytenr, blocksize, 0, | 1125 | btrfs_free_tree_block(trans, root, right, 0, 1); |
| 1104 | root->root_key.objectid, | 1126 | free_extent_buffer(right); |
| 1105 | level); | 1127 | right = NULL; |
| 1106 | if (wret) | ||
| 1107 | ret = wret; | ||
| 1108 | } else { | 1128 | } else { |
| 1109 | struct btrfs_disk_key right_key; | 1129 | struct btrfs_disk_key right_key; |
| 1110 | btrfs_node_key(right, &right_key, 0); | 1130 | btrfs_node_key(right, &right_key, 0); |
| @@ -1136,21 +1156,15 @@ static noinline int balance_level(struct btrfs_trans_handle *trans, | |||
| 1136 | BUG_ON(wret == 1); | 1156 | BUG_ON(wret == 1); |
| 1137 | } | 1157 | } |
| 1138 | if (btrfs_header_nritems(mid) == 0) { | 1158 | if (btrfs_header_nritems(mid) == 0) { |
| 1139 | /* we've managed to empty the middle node, drop it */ | ||
| 1140 | u64 bytenr = mid->start; | ||
| 1141 | u32 blocksize = mid->len; | ||
| 1142 | |||
| 1143 | clean_tree_block(trans, root, mid); | 1159 | clean_tree_block(trans, root, mid); |
| 1144 | btrfs_tree_unlock(mid); | 1160 | btrfs_tree_unlock(mid); |
| 1145 | free_extent_buffer(mid); | ||
| 1146 | mid = NULL; | ||
| 1147 | wret = del_ptr(trans, root, path, level + 1, pslot); | 1161 | wret = del_ptr(trans, root, path, level + 1, pslot); |
| 1148 | if (wret) | 1162 | if (wret) |
| 1149 | ret = wret; | 1163 | ret = wret; |
| 1150 | wret = btrfs_free_tree_block(trans, root, bytenr, blocksize, | 1164 | root_sub_used(root, mid->len); |
| 1151 | 0, root->root_key.objectid, level); | 1165 | btrfs_free_tree_block(trans, root, mid, 0, 1); |
| 1152 | if (wret) | 1166 | free_extent_buffer(mid); |
| 1153 | ret = wret; | 1167 | mid = NULL; |
| 1154 | } else { | 1168 | } else { |
| 1155 | /* update the parent key to reflect our changes */ | 1169 | /* update the parent key to reflect our changes */ |
| 1156 | struct btrfs_disk_key mid_key; | 1170 | struct btrfs_disk_key mid_key; |
| @@ -1590,7 +1604,7 @@ read_block_for_search(struct btrfs_trans_handle *trans, | |||
| 1590 | btrfs_release_path(NULL, p); | 1604 | btrfs_release_path(NULL, p); |
| 1591 | 1605 | ||
| 1592 | ret = -EAGAIN; | 1606 | ret = -EAGAIN; |
| 1593 | tmp = read_tree_block(root, blocknr, blocksize, gen); | 1607 | tmp = read_tree_block(root, blocknr, blocksize, 0); |
| 1594 | if (tmp) { | 1608 | if (tmp) { |
| 1595 | /* | 1609 | /* |
| 1596 | * If the read above didn't mark this buffer up to date, | 1610 | * If the read above didn't mark this buffer up to date, |
| @@ -1740,7 +1754,6 @@ again: | |||
| 1740 | p->nodes[level + 1], | 1754 | p->nodes[level + 1], |
| 1741 | p->slots[level + 1], &b); | 1755 | p->slots[level + 1], &b); |
| 1742 | if (err) { | 1756 | if (err) { |
| 1743 | free_extent_buffer(b); | ||
| 1744 | ret = err; | 1757 | ret = err; |
| 1745 | goto done; | 1758 | goto done; |
| 1746 | } | 1759 | } |
| @@ -2076,6 +2089,8 @@ static noinline int insert_new_root(struct btrfs_trans_handle *trans, | |||
| 2076 | if (IS_ERR(c)) | 2089 | if (IS_ERR(c)) |
| 2077 | return PTR_ERR(c); | 2090 | return PTR_ERR(c); |
| 2078 | 2091 | ||
| 2092 | root_add_used(root, root->nodesize); | ||
| 2093 | |||
| 2079 | memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); | 2094 | memset_extent_buffer(c, 0, 0, sizeof(struct btrfs_header)); |
| 2080 | btrfs_set_header_nritems(c, 1); | 2095 | btrfs_set_header_nritems(c, 1); |
| 2081 | btrfs_set_header_level(c, level); | 2096 | btrfs_set_header_level(c, level); |
| @@ -2134,6 +2149,7 @@ static int insert_ptr(struct btrfs_trans_handle *trans, struct btrfs_root | |||
| 2134 | int nritems; | 2149 | int nritems; |
| 2135 | 2150 | ||
| 2136 | BUG_ON(!path->nodes[level]); | 2151 | BUG_ON(!path->nodes[level]); |
| 2152 | btrfs_assert_tree_locked(path->nodes[level]); | ||
| 2137 | lower = path->nodes[level]; | 2153 | lower = path->nodes[level]; |
| 2138 | nritems = btrfs_header_nritems(lower); | 2154 | nritems = btrfs_header_nritems(lower); |
| 2139 | BUG_ON(slot > nritems); | 2155 | BUG_ON(slot > nritems); |
| @@ -2202,6 +2218,8 @@ static noinline int split_node(struct btrfs_trans_handle *trans, | |||
| 2202 | if (IS_ERR(split)) | 2218 | if (IS_ERR(split)) |
| 2203 | return PTR_ERR(split); | 2219 | return PTR_ERR(split); |
| 2204 | 2220 | ||
| 2221 | root_add_used(root, root->nodesize); | ||
| 2222 | |||
| 2205 | memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header)); | 2223 | memset_extent_buffer(split, 0, 0, sizeof(struct btrfs_header)); |
| 2206 | btrfs_set_header_level(split, btrfs_header_level(c)); | 2224 | btrfs_set_header_level(split, btrfs_header_level(c)); |
| 2207 | btrfs_set_header_bytenr(split, split->start); | 2225 | btrfs_set_header_bytenr(split, split->start); |
| @@ -2415,6 +2433,9 @@ static noinline int __push_leaf_right(struct btrfs_trans_handle *trans, | |||
| 2415 | 2433 | ||
| 2416 | if (left_nritems) | 2434 | if (left_nritems) |
| 2417 | btrfs_mark_buffer_dirty(left); | 2435 | btrfs_mark_buffer_dirty(left); |
| 2436 | else | ||
| 2437 | clean_tree_block(trans, root, left); | ||
| 2438 | |||
| 2418 | btrfs_mark_buffer_dirty(right); | 2439 | btrfs_mark_buffer_dirty(right); |
| 2419 | 2440 | ||
| 2420 | btrfs_item_key(right, &disk_key, 0); | 2441 | btrfs_item_key(right, &disk_key, 0); |
| @@ -2660,6 +2681,8 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
| 2660 | btrfs_mark_buffer_dirty(left); | 2681 | btrfs_mark_buffer_dirty(left); |
| 2661 | if (right_nritems) | 2682 | if (right_nritems) |
| 2662 | btrfs_mark_buffer_dirty(right); | 2683 | btrfs_mark_buffer_dirty(right); |
| 2684 | else | ||
| 2685 | clean_tree_block(trans, root, right); | ||
| 2663 | 2686 | ||
| 2664 | btrfs_item_key(right, &disk_key, 0); | 2687 | btrfs_item_key(right, &disk_key, 0); |
| 2665 | wret = fixup_low_keys(trans, root, path, &disk_key, 1); | 2688 | wret = fixup_low_keys(trans, root, path, &disk_key, 1); |
| @@ -2669,8 +2692,6 @@ static noinline int __push_leaf_left(struct btrfs_trans_handle *trans, | |||
| 2669 | /* then fixup the leaf pointer in the path */ | 2692 | /* then fixup the leaf pointer in the path */ |
| 2670 | if (path->slots[0] < push_items) { | 2693 | if (path->slots[0] < push_items) { |
| 2671 | path->slots[0] += old_left_nritems; | 2694 | path->slots[0] += old_left_nritems; |
| 2672 | if (btrfs_header_nritems(path->nodes[0]) == 0) | ||
| 2673 | clean_tree_block(trans, root, path->nodes[0]); | ||
| 2674 | btrfs_tree_unlock(path->nodes[0]); | 2695 | btrfs_tree_unlock(path->nodes[0]); |
| 2675 | free_extent_buffer(path->nodes[0]); | 2696 | free_extent_buffer(path->nodes[0]); |
| 2676 | path->nodes[0] = left; | 2697 | path->nodes[0] = left; |
| @@ -2932,10 +2953,10 @@ again: | |||
| 2932 | right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, | 2953 | right = btrfs_alloc_free_block(trans, root, root->leafsize, 0, |
| 2933 | root->root_key.objectid, | 2954 | root->root_key.objectid, |
| 2934 | &disk_key, 0, l->start, 0); | 2955 | &disk_key, 0, l->start, 0); |
| 2935 | if (IS_ERR(right)) { | 2956 | if (IS_ERR(right)) |
| 2936 | BUG_ON(1); | ||
| 2937 | return PTR_ERR(right); | 2957 | return PTR_ERR(right); |
| 2938 | } | 2958 | |
| 2959 | root_add_used(root, root->leafsize); | ||
| 2939 | 2960 | ||
| 2940 | memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); | 2961 | memset_extent_buffer(right, 0, 0, sizeof(struct btrfs_header)); |
| 2941 | btrfs_set_header_bytenr(right, right->start); | 2962 | btrfs_set_header_bytenr(right, right->start); |
| @@ -3054,7 +3075,8 @@ static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans, | |||
| 3054 | 3075 | ||
| 3055 | btrfs_set_path_blocking(path); | 3076 | btrfs_set_path_blocking(path); |
| 3056 | ret = split_leaf(trans, root, &key, path, ins_len, 1); | 3077 | ret = split_leaf(trans, root, &key, path, ins_len, 1); |
| 3057 | BUG_ON(ret); | 3078 | if (ret) |
| 3079 | goto err; | ||
| 3058 | 3080 | ||
| 3059 | path->keep_locks = 0; | 3081 | path->keep_locks = 0; |
| 3060 | btrfs_unlock_up_safe(path, 1); | 3082 | btrfs_unlock_up_safe(path, 1); |
| @@ -3796,9 +3818,10 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans, | |||
| 3796 | */ | 3818 | */ |
| 3797 | btrfs_unlock_up_safe(path, 0); | 3819 | btrfs_unlock_up_safe(path, 0); |
| 3798 | 3820 | ||
| 3799 | ret = btrfs_free_tree_block(trans, root, leaf->start, leaf->len, | 3821 | root_sub_used(root, leaf->len); |
| 3800 | 0, root->root_key.objectid, 0); | 3822 | |
| 3801 | return ret; | 3823 | btrfs_free_tree_block(trans, root, leaf, 0, 1); |
| 3824 | return 0; | ||
| 3802 | } | 3825 | } |
| 3803 | /* | 3826 | /* |
| 3804 | * delete the item at the leaf level in path. If that empties | 3827 | * delete the item at the leaf level in path. If that empties |
| @@ -3865,6 +3888,8 @@ int btrfs_del_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
| 3865 | if (leaf == root->node) { | 3888 | if (leaf == root->node) { |
| 3866 | btrfs_set_header_level(leaf, 0); | 3889 | btrfs_set_header_level(leaf, 0); |
| 3867 | } else { | 3890 | } else { |
| 3891 | btrfs_set_path_blocking(path); | ||
| 3892 | clean_tree_block(trans, root, leaf); | ||
| 3868 | ret = btrfs_del_leaf(trans, root, path, leaf); | 3893 | ret = btrfs_del_leaf(trans, root, path, leaf); |
| 3869 | BUG_ON(ret); | 3894 | BUG_ON(ret); |
| 3870 | } | 3895 | } |
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 746a7248678e..e9bf86415e86 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
| @@ -34,6 +34,7 @@ | |||
| 34 | 34 | ||
| 35 | struct btrfs_trans_handle; | 35 | struct btrfs_trans_handle; |
| 36 | struct btrfs_transaction; | 36 | struct btrfs_transaction; |
| 37 | struct btrfs_pending_snapshot; | ||
| 37 | extern struct kmem_cache *btrfs_trans_handle_cachep; | 38 | extern struct kmem_cache *btrfs_trans_handle_cachep; |
| 38 | extern struct kmem_cache *btrfs_transaction_cachep; | 39 | extern struct kmem_cache *btrfs_transaction_cachep; |
| 39 | extern struct kmem_cache *btrfs_bit_radix_cachep; | 40 | extern struct kmem_cache *btrfs_bit_radix_cachep; |
| @@ -663,6 +664,7 @@ struct btrfs_csum_item { | |||
| 663 | #define BTRFS_BLOCK_GROUP_RAID1 (1 << 4) | 664 | #define BTRFS_BLOCK_GROUP_RAID1 (1 << 4) |
| 664 | #define BTRFS_BLOCK_GROUP_DUP (1 << 5) | 665 | #define BTRFS_BLOCK_GROUP_DUP (1 << 5) |
| 665 | #define BTRFS_BLOCK_GROUP_RAID10 (1 << 6) | 666 | #define BTRFS_BLOCK_GROUP_RAID10 (1 << 6) |
| 667 | #define BTRFS_NR_RAID_TYPES 5 | ||
| 666 | 668 | ||
| 667 | struct btrfs_block_group_item { | 669 | struct btrfs_block_group_item { |
| 668 | __le64 used; | 670 | __le64 used; |
| @@ -674,42 +676,46 @@ struct btrfs_space_info { | |||
| 674 | u64 flags; | 676 | u64 flags; |
| 675 | 677 | ||
| 676 | u64 total_bytes; /* total bytes in the space */ | 678 | u64 total_bytes; /* total bytes in the space */ |
| 677 | u64 bytes_used; /* total bytes used on disk */ | 679 | u64 bytes_used; /* total bytes used, |
| 680 | this does't take mirrors into account */ | ||
| 678 | u64 bytes_pinned; /* total bytes pinned, will be freed when the | 681 | u64 bytes_pinned; /* total bytes pinned, will be freed when the |
| 679 | transaction finishes */ | 682 | transaction finishes */ |
| 680 | u64 bytes_reserved; /* total bytes the allocator has reserved for | 683 | u64 bytes_reserved; /* total bytes the allocator has reserved for |
| 681 | current allocations */ | 684 | current allocations */ |
| 682 | u64 bytes_readonly; /* total bytes that are read only */ | 685 | u64 bytes_readonly; /* total bytes that are read only */ |
| 683 | u64 bytes_super; /* total bytes reserved for the super blocks */ | 686 | |
| 684 | u64 bytes_root; /* the number of bytes needed to commit a | ||
| 685 | transaction */ | ||
| 686 | u64 bytes_may_use; /* number of bytes that may be used for | 687 | u64 bytes_may_use; /* number of bytes that may be used for |
| 687 | delalloc/allocations */ | 688 | delalloc/allocations */ |
| 688 | u64 bytes_delalloc; /* number of bytes currently reserved for | 689 | u64 disk_used; /* total bytes used on disk */ |
| 689 | delayed allocation */ | ||
| 690 | 690 | ||
| 691 | int full; /* indicates that we cannot allocate any more | 691 | int full; /* indicates that we cannot allocate any more |
| 692 | chunks for this space */ | 692 | chunks for this space */ |
| 693 | int force_alloc; /* set if we need to force a chunk alloc for | 693 | int force_alloc; /* set if we need to force a chunk alloc for |
| 694 | this space */ | 694 | this space */ |
| 695 | int force_delalloc; /* make people start doing filemap_flush until | ||
| 696 | we're under a threshold */ | ||
| 697 | 695 | ||
| 698 | struct list_head list; | 696 | struct list_head list; |
| 699 | 697 | ||
| 700 | /* for controlling how we free up space for allocations */ | ||
| 701 | wait_queue_head_t allocate_wait; | ||
| 702 | wait_queue_head_t flush_wait; | ||
| 703 | int allocating_chunk; | ||
| 704 | int flushing; | ||
| 705 | |||
| 706 | /* for block groups in our same type */ | 698 | /* for block groups in our same type */ |
| 707 | struct list_head block_groups; | 699 | struct list_head block_groups[BTRFS_NR_RAID_TYPES]; |
| 708 | spinlock_t lock; | 700 | spinlock_t lock; |
| 709 | struct rw_semaphore groups_sem; | 701 | struct rw_semaphore groups_sem; |
| 710 | atomic_t caching_threads; | 702 | atomic_t caching_threads; |
| 711 | }; | 703 | }; |
| 712 | 704 | ||
| 705 | struct btrfs_block_rsv { | ||
| 706 | u64 size; | ||
| 707 | u64 reserved; | ||
| 708 | u64 freed[2]; | ||
| 709 | struct btrfs_space_info *space_info; | ||
| 710 | struct list_head list; | ||
| 711 | spinlock_t lock; | ||
| 712 | atomic_t usage; | ||
| 713 | unsigned int priority:8; | ||
| 714 | unsigned int durable:1; | ||
| 715 | unsigned int refill_used:1; | ||
| 716 | unsigned int full:1; | ||
| 717 | }; | ||
| 718 | |||
| 713 | /* | 719 | /* |
| 714 | * free clusters are used to claim free space in relatively large chunks, | 720 | * free clusters are used to claim free space in relatively large chunks, |
| 715 | * allowing us to do less seeky writes. They are used for all metadata | 721 | * allowing us to do less seeky writes. They are used for all metadata |
| @@ -760,6 +766,7 @@ struct btrfs_block_group_cache { | |||
| 760 | spinlock_t lock; | 766 | spinlock_t lock; |
| 761 | u64 pinned; | 767 | u64 pinned; |
| 762 | u64 reserved; | 768 | u64 reserved; |
| 769 | u64 reserved_pinned; | ||
| 763 | u64 bytes_super; | 770 | u64 bytes_super; |
| 764 | u64 flags; | 771 | u64 flags; |
| 765 | u64 sectorsize; | 772 | u64 sectorsize; |
| @@ -825,6 +832,22 @@ struct btrfs_fs_info { | |||
| 825 | /* logical->physical extent mapping */ | 832 | /* logical->physical extent mapping */ |
| 826 | struct btrfs_mapping_tree mapping_tree; | 833 | struct btrfs_mapping_tree mapping_tree; |
| 827 | 834 | ||
| 835 | /* block reservation for extent, checksum and root tree */ | ||
| 836 | struct btrfs_block_rsv global_block_rsv; | ||
| 837 | /* block reservation for delay allocation */ | ||
| 838 | struct btrfs_block_rsv delalloc_block_rsv; | ||
| 839 | /* block reservation for metadata operations */ | ||
| 840 | struct btrfs_block_rsv trans_block_rsv; | ||
| 841 | /* block reservation for chunk tree */ | ||
| 842 | struct btrfs_block_rsv chunk_block_rsv; | ||
| 843 | |||
| 844 | struct btrfs_block_rsv empty_block_rsv; | ||
| 845 | |||
| 846 | /* list of block reservations that cross multiple transactions */ | ||
| 847 | struct list_head durable_block_rsv_list; | ||
| 848 | |||
| 849 | struct mutex durable_block_rsv_mutex; | ||
| 850 | |||
| 828 | u64 generation; | 851 | u64 generation; |
| 829 | u64 last_trans_committed; | 852 | u64 last_trans_committed; |
| 830 | 853 | ||
| @@ -927,7 +950,6 @@ struct btrfs_fs_info { | |||
| 927 | struct btrfs_workers endio_meta_write_workers; | 950 | struct btrfs_workers endio_meta_write_workers; |
| 928 | struct btrfs_workers endio_write_workers; | 951 | struct btrfs_workers endio_write_workers; |
| 929 | struct btrfs_workers submit_workers; | 952 | struct btrfs_workers submit_workers; |
| 930 | struct btrfs_workers enospc_workers; | ||
| 931 | /* | 953 | /* |
| 932 | * fixup workers take dirty pages that didn't properly go through | 954 | * fixup workers take dirty pages that didn't properly go through |
| 933 | * the cow mechanism and make them safe to write. It happens | 955 | * the cow mechanism and make them safe to write. It happens |
| @@ -943,6 +965,7 @@ struct btrfs_fs_info { | |||
| 943 | int do_barriers; | 965 | int do_barriers; |
| 944 | int closing; | 966 | int closing; |
| 945 | int log_root_recovering; | 967 | int log_root_recovering; |
| 968 | int enospc_unlink; | ||
| 946 | 969 | ||
| 947 | u64 total_pinned; | 970 | u64 total_pinned; |
| 948 | 971 | ||
| @@ -1012,6 +1035,9 @@ struct btrfs_root { | |||
| 1012 | struct completion kobj_unregister; | 1035 | struct completion kobj_unregister; |
| 1013 | struct mutex objectid_mutex; | 1036 | struct mutex objectid_mutex; |
| 1014 | 1037 | ||
| 1038 | spinlock_t accounting_lock; | ||
| 1039 | struct btrfs_block_rsv *block_rsv; | ||
| 1040 | |||
| 1015 | struct mutex log_mutex; | 1041 | struct mutex log_mutex; |
| 1016 | wait_queue_head_t log_writer_wait; | 1042 | wait_queue_head_t log_writer_wait; |
| 1017 | wait_queue_head_t log_commit_wait[2]; | 1043 | wait_queue_head_t log_commit_wait[2]; |
| @@ -1043,7 +1069,6 @@ struct btrfs_root { | |||
| 1043 | int ref_cows; | 1069 | int ref_cows; |
| 1044 | int track_dirty; | 1070 | int track_dirty; |
| 1045 | int in_radix; | 1071 | int in_radix; |
| 1046 | int clean_orphans; | ||
| 1047 | 1072 | ||
| 1048 | u64 defrag_trans_start; | 1073 | u64 defrag_trans_start; |
| 1049 | struct btrfs_key defrag_progress; | 1074 | struct btrfs_key defrag_progress; |
| @@ -1057,8 +1082,11 @@ struct btrfs_root { | |||
| 1057 | 1082 | ||
| 1058 | struct list_head root_list; | 1083 | struct list_head root_list; |
| 1059 | 1084 | ||
| 1060 | spinlock_t list_lock; | 1085 | spinlock_t orphan_lock; |
| 1061 | struct list_head orphan_list; | 1086 | struct list_head orphan_list; |
| 1087 | struct btrfs_block_rsv *orphan_block_rsv; | ||
| 1088 | int orphan_item_inserted; | ||
| 1089 | int orphan_cleanup_state; | ||
| 1062 | 1090 | ||
| 1063 | spinlock_t inode_lock; | 1091 | spinlock_t inode_lock; |
| 1064 | /* red-black tree that keeps track of in-memory inodes */ | 1092 | /* red-black tree that keeps track of in-memory inodes */ |
| @@ -1965,6 +1993,9 @@ void btrfs_put_block_group(struct btrfs_block_group_cache *cache); | |||
| 1965 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | 1993 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, |
| 1966 | struct btrfs_root *root, unsigned long count); | 1994 | struct btrfs_root *root, unsigned long count); |
| 1967 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); | 1995 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); |
| 1996 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | ||
| 1997 | struct btrfs_root *root, u64 bytenr, | ||
| 1998 | u64 num_bytes, u64 *refs, u64 *flags); | ||
| 1968 | int btrfs_pin_extent(struct btrfs_root *root, | 1999 | int btrfs_pin_extent(struct btrfs_root *root, |
| 1969 | u64 bytenr, u64 num, int reserved); | 2000 | u64 bytenr, u64 num, int reserved); |
| 1970 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | 2001 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, |
| @@ -1984,10 +2015,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
| 1984 | u64 parent, u64 root_objectid, | 2015 | u64 parent, u64 root_objectid, |
| 1985 | struct btrfs_disk_key *key, int level, | 2016 | struct btrfs_disk_key *key, int level, |
| 1986 | u64 hint, u64 empty_size); | 2017 | u64 hint, u64 empty_size); |
| 1987 | int btrfs_free_tree_block(struct btrfs_trans_handle *trans, | 2018 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, |
| 1988 | struct btrfs_root *root, | 2019 | struct btrfs_root *root, |
| 1989 | u64 bytenr, u32 blocksize, | 2020 | struct extent_buffer *buf, |
| 1990 | u64 parent, u64 root_objectid, int level); | 2021 | u64 parent, int last_ref); |
| 1991 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | 2022 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, |
| 1992 | struct btrfs_root *root, | 2023 | struct btrfs_root *root, |
| 1993 | u64 bytenr, u32 blocksize, | 2024 | u64 bytenr, u32 blocksize, |
| @@ -2041,27 +2072,49 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
| 2041 | u64 size); | 2072 | u64 size); |
| 2042 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, | 2073 | int btrfs_remove_block_group(struct btrfs_trans_handle *trans, |
| 2043 | struct btrfs_root *root, u64 group_start); | 2074 | struct btrfs_root *root, u64 group_start); |
| 2044 | int btrfs_prepare_block_group_relocation(struct btrfs_root *root, | ||
| 2045 | struct btrfs_block_group_cache *group); | ||
| 2046 | |||
| 2047 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); | 2075 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); |
| 2048 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); | 2076 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); |
| 2049 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 2077 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
| 2050 | 2078 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes); | |
| 2051 | int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items); | 2079 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes); |
| 2052 | int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items); | 2080 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, |
| 2053 | int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, | 2081 | struct btrfs_root *root, |
| 2054 | struct inode *inode, int num_items); | 2082 | int num_items, int *retries); |
| 2055 | int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, | 2083 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, |
| 2056 | struct inode *inode, int num_items); | 2084 | struct btrfs_root *root); |
| 2057 | int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | 2085 | int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, |
| 2058 | u64 bytes); | 2086 | struct inode *inode); |
| 2059 | void btrfs_free_reserved_data_space(struct btrfs_root *root, | 2087 | void btrfs_orphan_release_metadata(struct inode *inode); |
| 2060 | struct inode *inode, u64 bytes); | 2088 | int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, |
| 2061 | void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, | 2089 | struct btrfs_pending_snapshot *pending); |
| 2062 | u64 bytes); | 2090 | int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes); |
| 2063 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, | 2091 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes); |
| 2064 | u64 bytes); | 2092 | int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes); |
| 2093 | void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes); | ||
| 2094 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv); | ||
| 2095 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root); | ||
| 2096 | void btrfs_free_block_rsv(struct btrfs_root *root, | ||
| 2097 | struct btrfs_block_rsv *rsv); | ||
| 2098 | void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, | ||
| 2099 | struct btrfs_block_rsv *rsv); | ||
| 2100 | int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, | ||
| 2101 | struct btrfs_root *root, | ||
| 2102 | struct btrfs_block_rsv *block_rsv, | ||
| 2103 | u64 num_bytes, int *retries); | ||
| 2104 | int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | ||
| 2105 | struct btrfs_root *root, | ||
| 2106 | struct btrfs_block_rsv *block_rsv, | ||
| 2107 | u64 min_reserved, int min_factor); | ||
| 2108 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | ||
| 2109 | struct btrfs_block_rsv *dst_rsv, | ||
| 2110 | u64 num_bytes); | ||
| 2111 | void btrfs_block_rsv_release(struct btrfs_root *root, | ||
| 2112 | struct btrfs_block_rsv *block_rsv, | ||
| 2113 | u64 num_bytes); | ||
| 2114 | int btrfs_set_block_group_ro(struct btrfs_root *root, | ||
| 2115 | struct btrfs_block_group_cache *cache); | ||
| 2116 | int btrfs_set_block_group_rw(struct btrfs_root *root, | ||
| 2117 | struct btrfs_block_group_cache *cache); | ||
| 2065 | /* ctree.c */ | 2118 | /* ctree.c */ |
| 2066 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, | 2119 | int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key, |
| 2067 | int level, int *slot); | 2120 | int level, int *slot); |
| @@ -2152,7 +2205,8 @@ static inline int btrfs_insert_empty_item(struct btrfs_trans_handle *trans, | |||
| 2152 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); | 2205 | int btrfs_next_leaf(struct btrfs_root *root, struct btrfs_path *path); |
| 2153 | int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); | 2206 | int btrfs_prev_leaf(struct btrfs_root *root, struct btrfs_path *path); |
| 2154 | int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); | 2207 | int btrfs_leaf_free_space(struct btrfs_root *root, struct extent_buffer *leaf); |
| 2155 | int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref); | 2208 | int btrfs_drop_snapshot(struct btrfs_root *root, |
| 2209 | struct btrfs_block_rsv *block_rsv, int update_ref); | ||
| 2156 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, | 2210 | int btrfs_drop_subtree(struct btrfs_trans_handle *trans, |
| 2157 | struct btrfs_root *root, | 2211 | struct btrfs_root *root, |
| 2158 | struct extent_buffer *node, | 2212 | struct extent_buffer *node, |
| @@ -2245,6 +2299,12 @@ int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | |||
| 2245 | struct btrfs_root *root, | 2299 | struct btrfs_root *root, |
| 2246 | const char *name, int name_len, | 2300 | const char *name, int name_len, |
| 2247 | u64 inode_objectid, u64 ref_objectid, u64 *index); | 2301 | u64 inode_objectid, u64 ref_objectid, u64 *index); |
| 2302 | struct btrfs_inode_ref * | ||
| 2303 | btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans, | ||
| 2304 | struct btrfs_root *root, | ||
| 2305 | struct btrfs_path *path, | ||
| 2306 | const char *name, int name_len, | ||
| 2307 | u64 inode_objectid, u64 ref_objectid, int mod); | ||
| 2248 | int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, | 2308 | int btrfs_insert_empty_inode(struct btrfs_trans_handle *trans, |
| 2249 | struct btrfs_root *root, | 2309 | struct btrfs_root *root, |
| 2250 | struct btrfs_path *path, u64 objectid); | 2310 | struct btrfs_path *path, u64 objectid); |
| @@ -2257,6 +2317,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, | |||
| 2257 | struct btrfs_root *root, u64 bytenr, u64 len); | 2317 | struct btrfs_root *root, u64 bytenr, u64 len); |
| 2258 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | 2318 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, |
| 2259 | struct bio *bio, u32 *dst); | 2319 | struct bio *bio, u32 *dst); |
| 2320 | int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, | ||
| 2321 | struct bio *bio, u64 logical_offset, u32 *dst); | ||
| 2260 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | 2322 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, |
| 2261 | struct btrfs_root *root, | 2323 | struct btrfs_root *root, |
| 2262 | u64 objectid, u64 pos, | 2324 | u64 objectid, u64 pos, |
| @@ -2311,6 +2373,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans, | |||
| 2311 | u32 min_type); | 2373 | u32 min_type); |
| 2312 | 2374 | ||
| 2313 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); | 2375 | int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput); |
| 2376 | int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput); | ||
| 2314 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, | 2377 | int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, |
| 2315 | struct extent_state **cached_state); | 2378 | struct extent_state **cached_state); |
| 2316 | int btrfs_writepages(struct address_space *mapping, | 2379 | int btrfs_writepages(struct address_space *mapping, |
| @@ -2349,10 +2412,20 @@ int btrfs_update_inode(struct btrfs_trans_handle *trans, | |||
| 2349 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); | 2412 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode); |
| 2350 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); | 2413 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode); |
| 2351 | void btrfs_orphan_cleanup(struct btrfs_root *root); | 2414 | void btrfs_orphan_cleanup(struct btrfs_root *root); |
| 2415 | void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, | ||
| 2416 | struct btrfs_pending_snapshot *pending, | ||
| 2417 | u64 *bytes_to_reserve); | ||
| 2418 | void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, | ||
| 2419 | struct btrfs_pending_snapshot *pending); | ||
| 2420 | void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, | ||
| 2421 | struct btrfs_root *root); | ||
| 2352 | int btrfs_cont_expand(struct inode *inode, loff_t size); | 2422 | int btrfs_cont_expand(struct inode *inode, loff_t size); |
| 2353 | int btrfs_invalidate_inodes(struct btrfs_root *root); | 2423 | int btrfs_invalidate_inodes(struct btrfs_root *root); |
| 2354 | void btrfs_add_delayed_iput(struct inode *inode); | 2424 | void btrfs_add_delayed_iput(struct inode *inode); |
| 2355 | void btrfs_run_delayed_iputs(struct btrfs_root *root); | 2425 | void btrfs_run_delayed_iputs(struct btrfs_root *root); |
| 2426 | int btrfs_prealloc_file_range(struct inode *inode, int mode, | ||
| 2427 | u64 start, u64 num_bytes, u64 min_size, | ||
| 2428 | loff_t actual_len, u64 *alloc_hint); | ||
| 2356 | extern const struct dentry_operations btrfs_dentry_operations; | 2429 | extern const struct dentry_operations btrfs_dentry_operations; |
| 2357 | 2430 | ||
| 2358 | /* ioctl.c */ | 2431 | /* ioctl.c */ |
| @@ -2409,4 +2482,12 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, | |||
| 2409 | struct btrfs_root *root); | 2482 | struct btrfs_root *root); |
| 2410 | int btrfs_recover_relocation(struct btrfs_root *root); | 2483 | int btrfs_recover_relocation(struct btrfs_root *root); |
| 2411 | int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); | 2484 | int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len); |
| 2485 | void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, | ||
| 2486 | struct btrfs_root *root, struct extent_buffer *buf, | ||
| 2487 | struct extent_buffer *cow); | ||
| 2488 | void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans, | ||
| 2489 | struct btrfs_pending_snapshot *pending, | ||
| 2490 | u64 *bytes_to_reserve); | ||
| 2491 | void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, | ||
| 2492 | struct btrfs_pending_snapshot *pending); | ||
| 2412 | #endif | 2493 | #endif |
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 902ce507c4e3..e807b143b857 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c | |||
| @@ -319,107 +319,6 @@ out: | |||
| 319 | } | 319 | } |
| 320 | 320 | ||
| 321 | /* | 321 | /* |
| 322 | * helper function to lookup reference count and flags of extent. | ||
| 323 | * | ||
| 324 | * the head node for delayed ref is used to store the sum of all the | ||
| 325 | * reference count modifications queued up in the rbtree. the head | ||
| 326 | * node may also store the extent flags to set. This way you can check | ||
| 327 | * to see what the reference count and extent flags would be if all of | ||
| 328 | * the delayed refs are not processed. | ||
| 329 | */ | ||
| 330 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | ||
| 331 | struct btrfs_root *root, u64 bytenr, | ||
| 332 | u64 num_bytes, u64 *refs, u64 *flags) | ||
| 333 | { | ||
| 334 | struct btrfs_delayed_ref_node *ref; | ||
| 335 | struct btrfs_delayed_ref_head *head; | ||
| 336 | struct btrfs_delayed_ref_root *delayed_refs; | ||
| 337 | struct btrfs_path *path; | ||
| 338 | struct btrfs_extent_item *ei; | ||
| 339 | struct extent_buffer *leaf; | ||
| 340 | struct btrfs_key key; | ||
| 341 | u32 item_size; | ||
| 342 | u64 num_refs; | ||
| 343 | u64 extent_flags; | ||
| 344 | int ret; | ||
| 345 | |||
| 346 | path = btrfs_alloc_path(); | ||
| 347 | if (!path) | ||
| 348 | return -ENOMEM; | ||
| 349 | |||
| 350 | key.objectid = bytenr; | ||
| 351 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
| 352 | key.offset = num_bytes; | ||
| 353 | delayed_refs = &trans->transaction->delayed_refs; | ||
| 354 | again: | ||
| 355 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, | ||
| 356 | &key, path, 0, 0); | ||
| 357 | if (ret < 0) | ||
| 358 | goto out; | ||
| 359 | |||
| 360 | if (ret == 0) { | ||
| 361 | leaf = path->nodes[0]; | ||
| 362 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
| 363 | if (item_size >= sizeof(*ei)) { | ||
| 364 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
| 365 | struct btrfs_extent_item); | ||
| 366 | num_refs = btrfs_extent_refs(leaf, ei); | ||
| 367 | extent_flags = btrfs_extent_flags(leaf, ei); | ||
| 368 | } else { | ||
| 369 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
| 370 | struct btrfs_extent_item_v0 *ei0; | ||
| 371 | BUG_ON(item_size != sizeof(*ei0)); | ||
| 372 | ei0 = btrfs_item_ptr(leaf, path->slots[0], | ||
| 373 | struct btrfs_extent_item_v0); | ||
| 374 | num_refs = btrfs_extent_refs_v0(leaf, ei0); | ||
| 375 | /* FIXME: this isn't correct for data */ | ||
| 376 | extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
| 377 | #else | ||
| 378 | BUG(); | ||
| 379 | #endif | ||
| 380 | } | ||
| 381 | BUG_ON(num_refs == 0); | ||
| 382 | } else { | ||
| 383 | num_refs = 0; | ||
| 384 | extent_flags = 0; | ||
| 385 | ret = 0; | ||
| 386 | } | ||
| 387 | |||
| 388 | spin_lock(&delayed_refs->lock); | ||
| 389 | ref = find_ref_head(&delayed_refs->root, bytenr, NULL); | ||
| 390 | if (ref) { | ||
| 391 | head = btrfs_delayed_node_to_head(ref); | ||
| 392 | if (!mutex_trylock(&head->mutex)) { | ||
| 393 | atomic_inc(&ref->refs); | ||
| 394 | spin_unlock(&delayed_refs->lock); | ||
| 395 | |||
| 396 | btrfs_release_path(root->fs_info->extent_root, path); | ||
| 397 | |||
| 398 | mutex_lock(&head->mutex); | ||
| 399 | mutex_unlock(&head->mutex); | ||
| 400 | btrfs_put_delayed_ref(ref); | ||
| 401 | goto again; | ||
| 402 | } | ||
| 403 | if (head->extent_op && head->extent_op->update_flags) | ||
| 404 | extent_flags |= head->extent_op->flags_to_set; | ||
| 405 | else | ||
| 406 | BUG_ON(num_refs == 0); | ||
| 407 | |||
| 408 | num_refs += ref->ref_mod; | ||
| 409 | mutex_unlock(&head->mutex); | ||
| 410 | } | ||
| 411 | WARN_ON(num_refs == 0); | ||
| 412 | if (refs) | ||
| 413 | *refs = num_refs; | ||
| 414 | if (flags) | ||
| 415 | *flags = extent_flags; | ||
| 416 | out: | ||
| 417 | spin_unlock(&delayed_refs->lock); | ||
| 418 | btrfs_free_path(path); | ||
| 419 | return ret; | ||
| 420 | } | ||
| 421 | |||
| 422 | /* | ||
| 423 | * helper function to update an extent delayed ref in the | 322 | * helper function to update an extent delayed ref in the |
| 424 | * rbtree. existing and update must both have the same | 323 | * rbtree. existing and update must both have the same |
| 425 | * bytenr and parent | 324 | * bytenr and parent |
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index f6fc67ddad36..50e3cf92fbda 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h | |||
| @@ -167,9 +167,6 @@ int btrfs_add_delayed_extent_op(struct btrfs_trans_handle *trans, | |||
| 167 | struct btrfs_delayed_ref_head * | 167 | struct btrfs_delayed_ref_head * |
| 168 | btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); | 168 | btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr); |
| 169 | int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr); | 169 | int btrfs_delayed_ref_pending(struct btrfs_trans_handle *trans, u64 bytenr); |
| 170 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | ||
| 171 | struct btrfs_root *root, u64 bytenr, | ||
| 172 | u64 num_bytes, u64 *refs, u64 *flags); | ||
| 173 | int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, | 170 | int btrfs_update_delayed_ref(struct btrfs_trans_handle *trans, |
| 174 | u64 bytenr, u64 num_bytes, u64 orig_parent, | 171 | u64 bytenr, u64 num_bytes, u64 orig_parent, |
| 175 | u64 parent, u64 orig_ref_root, u64 ref_root, | 172 | u64 parent, u64 orig_ref_root, u64 ref_root, |
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index feca04197d02..f3b287c22caf 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
| @@ -74,6 +74,11 @@ struct async_submit_bio { | |||
| 74 | int rw; | 74 | int rw; |
| 75 | int mirror_num; | 75 | int mirror_num; |
| 76 | unsigned long bio_flags; | 76 | unsigned long bio_flags; |
| 77 | /* | ||
| 78 | * bio_offset is optional, can be used if the pages in the bio | ||
| 79 | * can't tell us where in the file the bio should go | ||
| 80 | */ | ||
| 81 | u64 bio_offset; | ||
| 77 | struct btrfs_work work; | 82 | struct btrfs_work work; |
| 78 | }; | 83 | }; |
| 79 | 84 | ||
| @@ -534,7 +539,8 @@ static void run_one_async_start(struct btrfs_work *work) | |||
| 534 | async = container_of(work, struct async_submit_bio, work); | 539 | async = container_of(work, struct async_submit_bio, work); |
| 535 | fs_info = BTRFS_I(async->inode)->root->fs_info; | 540 | fs_info = BTRFS_I(async->inode)->root->fs_info; |
| 536 | async->submit_bio_start(async->inode, async->rw, async->bio, | 541 | async->submit_bio_start(async->inode, async->rw, async->bio, |
| 537 | async->mirror_num, async->bio_flags); | 542 | async->mirror_num, async->bio_flags, |
| 543 | async->bio_offset); | ||
| 538 | } | 544 | } |
| 539 | 545 | ||
| 540 | static void run_one_async_done(struct btrfs_work *work) | 546 | static void run_one_async_done(struct btrfs_work *work) |
| @@ -556,7 +562,8 @@ static void run_one_async_done(struct btrfs_work *work) | |||
| 556 | wake_up(&fs_info->async_submit_wait); | 562 | wake_up(&fs_info->async_submit_wait); |
| 557 | 563 | ||
| 558 | async->submit_bio_done(async->inode, async->rw, async->bio, | 564 | async->submit_bio_done(async->inode, async->rw, async->bio, |
| 559 | async->mirror_num, async->bio_flags); | 565 | async->mirror_num, async->bio_flags, |
| 566 | async->bio_offset); | ||
| 560 | } | 567 | } |
| 561 | 568 | ||
| 562 | static void run_one_async_free(struct btrfs_work *work) | 569 | static void run_one_async_free(struct btrfs_work *work) |
| @@ -570,6 +577,7 @@ static void run_one_async_free(struct btrfs_work *work) | |||
| 570 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | 577 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, |
| 571 | int rw, struct bio *bio, int mirror_num, | 578 | int rw, struct bio *bio, int mirror_num, |
| 572 | unsigned long bio_flags, | 579 | unsigned long bio_flags, |
| 580 | u64 bio_offset, | ||
| 573 | extent_submit_bio_hook_t *submit_bio_start, | 581 | extent_submit_bio_hook_t *submit_bio_start, |
| 574 | extent_submit_bio_hook_t *submit_bio_done) | 582 | extent_submit_bio_hook_t *submit_bio_done) |
| 575 | { | 583 | { |
| @@ -592,6 +600,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
| 592 | 600 | ||
| 593 | async->work.flags = 0; | 601 | async->work.flags = 0; |
| 594 | async->bio_flags = bio_flags; | 602 | async->bio_flags = bio_flags; |
| 603 | async->bio_offset = bio_offset; | ||
| 595 | 604 | ||
| 596 | atomic_inc(&fs_info->nr_async_submits); | 605 | atomic_inc(&fs_info->nr_async_submits); |
| 597 | 606 | ||
| @@ -627,7 +636,8 @@ static int btree_csum_one_bio(struct bio *bio) | |||
| 627 | 636 | ||
| 628 | static int __btree_submit_bio_start(struct inode *inode, int rw, | 637 | static int __btree_submit_bio_start(struct inode *inode, int rw, |
| 629 | struct bio *bio, int mirror_num, | 638 | struct bio *bio, int mirror_num, |
| 630 | unsigned long bio_flags) | 639 | unsigned long bio_flags, |
| 640 | u64 bio_offset) | ||
| 631 | { | 641 | { |
| 632 | /* | 642 | /* |
| 633 | * when we're called for a write, we're already in the async | 643 | * when we're called for a write, we're already in the async |
| @@ -638,7 +648,8 @@ static int __btree_submit_bio_start(struct inode *inode, int rw, | |||
| 638 | } | 648 | } |
| 639 | 649 | ||
| 640 | static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | 650 | static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, |
| 641 | int mirror_num, unsigned long bio_flags) | 651 | int mirror_num, unsigned long bio_flags, |
| 652 | u64 bio_offset) | ||
| 642 | { | 653 | { |
| 643 | /* | 654 | /* |
| 644 | * when we're called for a write, we're already in the async | 655 | * when we're called for a write, we're already in the async |
| @@ -648,7 +659,8 @@ static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | |||
| 648 | } | 659 | } |
| 649 | 660 | ||
| 650 | static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | 661 | static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, |
| 651 | int mirror_num, unsigned long bio_flags) | 662 | int mirror_num, unsigned long bio_flags, |
| 663 | u64 bio_offset) | ||
| 652 | { | 664 | { |
| 653 | int ret; | 665 | int ret; |
| 654 | 666 | ||
| @@ -671,6 +683,7 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
| 671 | */ | 683 | */ |
| 672 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | 684 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, |
| 673 | inode, rw, bio, mirror_num, 0, | 685 | inode, rw, bio, mirror_num, 0, |
| 686 | bio_offset, | ||
| 674 | __btree_submit_bio_start, | 687 | __btree_submit_bio_start, |
| 675 | __btree_submit_bio_done); | 688 | __btree_submit_bio_done); |
| 676 | } | 689 | } |
| @@ -894,7 +907,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
| 894 | root->ref_cows = 0; | 907 | root->ref_cows = 0; |
| 895 | root->track_dirty = 0; | 908 | root->track_dirty = 0; |
| 896 | root->in_radix = 0; | 909 | root->in_radix = 0; |
| 897 | root->clean_orphans = 0; | 910 | root->orphan_item_inserted = 0; |
| 911 | root->orphan_cleanup_state = 0; | ||
| 898 | 912 | ||
| 899 | root->fs_info = fs_info; | 913 | root->fs_info = fs_info; |
| 900 | root->objectid = objectid; | 914 | root->objectid = objectid; |
| @@ -903,13 +917,16 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
| 903 | root->name = NULL; | 917 | root->name = NULL; |
| 904 | root->in_sysfs = 0; | 918 | root->in_sysfs = 0; |
| 905 | root->inode_tree = RB_ROOT; | 919 | root->inode_tree = RB_ROOT; |
| 920 | root->block_rsv = NULL; | ||
| 921 | root->orphan_block_rsv = NULL; | ||
| 906 | 922 | ||
| 907 | INIT_LIST_HEAD(&root->dirty_list); | 923 | INIT_LIST_HEAD(&root->dirty_list); |
| 908 | INIT_LIST_HEAD(&root->orphan_list); | 924 | INIT_LIST_HEAD(&root->orphan_list); |
| 909 | INIT_LIST_HEAD(&root->root_list); | 925 | INIT_LIST_HEAD(&root->root_list); |
| 910 | spin_lock_init(&root->node_lock); | 926 | spin_lock_init(&root->node_lock); |
| 911 | spin_lock_init(&root->list_lock); | 927 | spin_lock_init(&root->orphan_lock); |
| 912 | spin_lock_init(&root->inode_lock); | 928 | spin_lock_init(&root->inode_lock); |
| 929 | spin_lock_init(&root->accounting_lock); | ||
| 913 | mutex_init(&root->objectid_mutex); | 930 | mutex_init(&root->objectid_mutex); |
| 914 | mutex_init(&root->log_mutex); | 931 | mutex_init(&root->log_mutex); |
| 915 | init_waitqueue_head(&root->log_writer_wait); | 932 | init_waitqueue_head(&root->log_writer_wait); |
| @@ -968,42 +985,6 @@ static int find_and_setup_root(struct btrfs_root *tree_root, | |||
| 968 | return 0; | 985 | return 0; |
| 969 | } | 986 | } |
| 970 | 987 | ||
| 971 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | ||
| 972 | struct btrfs_fs_info *fs_info) | ||
| 973 | { | ||
| 974 | struct extent_buffer *eb; | ||
| 975 | struct btrfs_root *log_root_tree = fs_info->log_root_tree; | ||
| 976 | u64 start = 0; | ||
| 977 | u64 end = 0; | ||
| 978 | int ret; | ||
| 979 | |||
| 980 | if (!log_root_tree) | ||
| 981 | return 0; | ||
| 982 | |||
| 983 | while (1) { | ||
| 984 | ret = find_first_extent_bit(&log_root_tree->dirty_log_pages, | ||
| 985 | 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW); | ||
| 986 | if (ret) | ||
| 987 | break; | ||
| 988 | |||
| 989 | clear_extent_bits(&log_root_tree->dirty_log_pages, start, end, | ||
| 990 | EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); | ||
| 991 | } | ||
| 992 | eb = fs_info->log_root_tree->node; | ||
| 993 | |||
| 994 | WARN_ON(btrfs_header_level(eb) != 0); | ||
| 995 | WARN_ON(btrfs_header_nritems(eb) != 0); | ||
| 996 | |||
| 997 | ret = btrfs_free_reserved_extent(fs_info->tree_root, | ||
| 998 | eb->start, eb->len); | ||
| 999 | BUG_ON(ret); | ||
| 1000 | |||
| 1001 | free_extent_buffer(eb); | ||
| 1002 | kfree(fs_info->log_root_tree); | ||
| 1003 | fs_info->log_root_tree = NULL; | ||
| 1004 | return 0; | ||
| 1005 | } | ||
| 1006 | |||
| 1007 | static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, | 988 | static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans, |
| 1008 | struct btrfs_fs_info *fs_info) | 989 | struct btrfs_fs_info *fs_info) |
| 1009 | { | 990 | { |
| @@ -1191,19 +1172,23 @@ again: | |||
| 1191 | if (root) | 1172 | if (root) |
| 1192 | return root; | 1173 | return root; |
| 1193 | 1174 | ||
| 1194 | ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); | ||
| 1195 | if (ret == 0) | ||
| 1196 | ret = -ENOENT; | ||
| 1197 | if (ret < 0) | ||
| 1198 | return ERR_PTR(ret); | ||
| 1199 | |||
| 1200 | root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); | 1175 | root = btrfs_read_fs_root_no_radix(fs_info->tree_root, location); |
| 1201 | if (IS_ERR(root)) | 1176 | if (IS_ERR(root)) |
| 1202 | return root; | 1177 | return root; |
| 1203 | 1178 | ||
| 1204 | WARN_ON(btrfs_root_refs(&root->root_item) == 0); | ||
| 1205 | set_anon_super(&root->anon_super, NULL); | 1179 | set_anon_super(&root->anon_super, NULL); |
| 1206 | 1180 | ||
| 1181 | if (btrfs_root_refs(&root->root_item) == 0) { | ||
| 1182 | ret = -ENOENT; | ||
| 1183 | goto fail; | ||
| 1184 | } | ||
| 1185 | |||
| 1186 | ret = btrfs_find_orphan_item(fs_info->tree_root, location->objectid); | ||
| 1187 | if (ret < 0) | ||
| 1188 | goto fail; | ||
| 1189 | if (ret == 0) | ||
| 1190 | root->orphan_item_inserted = 1; | ||
| 1191 | |||
| 1207 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); | 1192 | ret = radix_tree_preload(GFP_NOFS & ~__GFP_HIGHMEM); |
| 1208 | if (ret) | 1193 | if (ret) |
| 1209 | goto fail; | 1194 | goto fail; |
| @@ -1212,10 +1197,9 @@ again: | |||
| 1212 | ret = radix_tree_insert(&fs_info->fs_roots_radix, | 1197 | ret = radix_tree_insert(&fs_info->fs_roots_radix, |
| 1213 | (unsigned long)root->root_key.objectid, | 1198 | (unsigned long)root->root_key.objectid, |
| 1214 | root); | 1199 | root); |
| 1215 | if (ret == 0) { | 1200 | if (ret == 0) |
| 1216 | root->in_radix = 1; | 1201 | root->in_radix = 1; |
| 1217 | root->clean_orphans = 1; | 1202 | |
| 1218 | } | ||
| 1219 | spin_unlock(&fs_info->fs_roots_radix_lock); | 1203 | spin_unlock(&fs_info->fs_roots_radix_lock); |
| 1220 | radix_tree_preload_end(); | 1204 | radix_tree_preload_end(); |
| 1221 | if (ret) { | 1205 | if (ret) { |
| @@ -1461,10 +1445,6 @@ static int cleaner_kthread(void *arg) | |||
| 1461 | struct btrfs_root *root = arg; | 1445 | struct btrfs_root *root = arg; |
| 1462 | 1446 | ||
| 1463 | do { | 1447 | do { |
| 1464 | smp_mb(); | ||
| 1465 | if (root->fs_info->closing) | ||
| 1466 | break; | ||
| 1467 | |||
| 1468 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | 1448 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); |
| 1469 | 1449 | ||
| 1470 | if (!(root->fs_info->sb->s_flags & MS_RDONLY) && | 1450 | if (!(root->fs_info->sb->s_flags & MS_RDONLY) && |
| @@ -1477,11 +1457,9 @@ static int cleaner_kthread(void *arg) | |||
| 1477 | if (freezing(current)) { | 1457 | if (freezing(current)) { |
| 1478 | refrigerator(); | 1458 | refrigerator(); |
| 1479 | } else { | 1459 | } else { |
| 1480 | smp_mb(); | ||
| 1481 | if (root->fs_info->closing) | ||
| 1482 | break; | ||
| 1483 | set_current_state(TASK_INTERRUPTIBLE); | 1460 | set_current_state(TASK_INTERRUPTIBLE); |
| 1484 | schedule(); | 1461 | if (!kthread_should_stop()) |
| 1462 | schedule(); | ||
| 1485 | __set_current_state(TASK_RUNNING); | 1463 | __set_current_state(TASK_RUNNING); |
| 1486 | } | 1464 | } |
| 1487 | } while (!kthread_should_stop()); | 1465 | } while (!kthread_should_stop()); |
| @@ -1493,36 +1471,40 @@ static int transaction_kthread(void *arg) | |||
| 1493 | struct btrfs_root *root = arg; | 1471 | struct btrfs_root *root = arg; |
| 1494 | struct btrfs_trans_handle *trans; | 1472 | struct btrfs_trans_handle *trans; |
| 1495 | struct btrfs_transaction *cur; | 1473 | struct btrfs_transaction *cur; |
| 1474 | u64 transid; | ||
| 1496 | unsigned long now; | 1475 | unsigned long now; |
| 1497 | unsigned long delay; | 1476 | unsigned long delay; |
| 1498 | int ret; | 1477 | int ret; |
| 1499 | 1478 | ||
| 1500 | do { | 1479 | do { |
| 1501 | smp_mb(); | ||
| 1502 | if (root->fs_info->closing) | ||
| 1503 | break; | ||
| 1504 | |||
| 1505 | delay = HZ * 30; | 1480 | delay = HZ * 30; |
| 1506 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | 1481 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); |
| 1507 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | 1482 | mutex_lock(&root->fs_info->transaction_kthread_mutex); |
| 1508 | 1483 | ||
| 1509 | mutex_lock(&root->fs_info->trans_mutex); | 1484 | spin_lock(&root->fs_info->new_trans_lock); |
| 1510 | cur = root->fs_info->running_transaction; | 1485 | cur = root->fs_info->running_transaction; |
| 1511 | if (!cur) { | 1486 | if (!cur) { |
| 1512 | mutex_unlock(&root->fs_info->trans_mutex); | 1487 | spin_unlock(&root->fs_info->new_trans_lock); |
| 1513 | goto sleep; | 1488 | goto sleep; |
| 1514 | } | 1489 | } |
| 1515 | 1490 | ||
| 1516 | now = get_seconds(); | 1491 | now = get_seconds(); |
| 1517 | if (now < cur->start_time || now - cur->start_time < 30) { | 1492 | if (!cur->blocked && |
| 1518 | mutex_unlock(&root->fs_info->trans_mutex); | 1493 | (now < cur->start_time || now - cur->start_time < 30)) { |
| 1494 | spin_unlock(&root->fs_info->new_trans_lock); | ||
| 1519 | delay = HZ * 5; | 1495 | delay = HZ * 5; |
| 1520 | goto sleep; | 1496 | goto sleep; |
| 1521 | } | 1497 | } |
| 1522 | mutex_unlock(&root->fs_info->trans_mutex); | 1498 | transid = cur->transid; |
| 1523 | trans = btrfs_start_transaction(root, 1); | 1499 | spin_unlock(&root->fs_info->new_trans_lock); |
| 1524 | ret = btrfs_commit_transaction(trans, root); | ||
| 1525 | 1500 | ||
| 1501 | trans = btrfs_join_transaction(root, 1); | ||
| 1502 | if (transid == trans->transid) { | ||
| 1503 | ret = btrfs_commit_transaction(trans, root); | ||
| 1504 | BUG_ON(ret); | ||
| 1505 | } else { | ||
| 1506 | btrfs_end_transaction(trans, root); | ||
| 1507 | } | ||
| 1526 | sleep: | 1508 | sleep: |
| 1527 | wake_up_process(root->fs_info->cleaner_kthread); | 1509 | wake_up_process(root->fs_info->cleaner_kthread); |
| 1528 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); | 1510 | mutex_unlock(&root->fs_info->transaction_kthread_mutex); |
| @@ -1530,10 +1512,10 @@ sleep: | |||
| 1530 | if (freezing(current)) { | 1512 | if (freezing(current)) { |
| 1531 | refrigerator(); | 1513 | refrigerator(); |
| 1532 | } else { | 1514 | } else { |
| 1533 | if (root->fs_info->closing) | ||
| 1534 | break; | ||
| 1535 | set_current_state(TASK_INTERRUPTIBLE); | 1515 | set_current_state(TASK_INTERRUPTIBLE); |
| 1536 | schedule_timeout(delay); | 1516 | if (!kthread_should_stop() && |
| 1517 | !btrfs_transaction_blocked(root->fs_info)) | ||
| 1518 | schedule_timeout(delay); | ||
| 1537 | __set_current_state(TASK_RUNNING); | 1519 | __set_current_state(TASK_RUNNING); |
| 1538 | } | 1520 | } |
| 1539 | } while (!kthread_should_stop()); | 1521 | } while (!kthread_should_stop()); |
| @@ -1620,6 +1602,13 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1620 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); | 1602 | INIT_LIST_HEAD(&fs_info->dirty_cowonly_roots); |
| 1621 | INIT_LIST_HEAD(&fs_info->space_info); | 1603 | INIT_LIST_HEAD(&fs_info->space_info); |
| 1622 | btrfs_mapping_init(&fs_info->mapping_tree); | 1604 | btrfs_mapping_init(&fs_info->mapping_tree); |
| 1605 | btrfs_init_block_rsv(&fs_info->global_block_rsv); | ||
| 1606 | btrfs_init_block_rsv(&fs_info->delalloc_block_rsv); | ||
| 1607 | btrfs_init_block_rsv(&fs_info->trans_block_rsv); | ||
| 1608 | btrfs_init_block_rsv(&fs_info->chunk_block_rsv); | ||
| 1609 | btrfs_init_block_rsv(&fs_info->empty_block_rsv); | ||
| 1610 | INIT_LIST_HEAD(&fs_info->durable_block_rsv_list); | ||
| 1611 | mutex_init(&fs_info->durable_block_rsv_mutex); | ||
| 1623 | atomic_set(&fs_info->nr_async_submits, 0); | 1612 | atomic_set(&fs_info->nr_async_submits, 0); |
| 1624 | atomic_set(&fs_info->async_delalloc_pages, 0); | 1613 | atomic_set(&fs_info->async_delalloc_pages, 0); |
| 1625 | atomic_set(&fs_info->async_submit_draining, 0); | 1614 | atomic_set(&fs_info->async_submit_draining, 0); |
| @@ -1759,9 +1748,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1759 | min_t(u64, fs_devices->num_devices, | 1748 | min_t(u64, fs_devices->num_devices, |
| 1760 | fs_info->thread_pool_size), | 1749 | fs_info->thread_pool_size), |
| 1761 | &fs_info->generic_worker); | 1750 | &fs_info->generic_worker); |
| 1762 | btrfs_init_workers(&fs_info->enospc_workers, "enospc", | ||
| 1763 | fs_info->thread_pool_size, | ||
| 1764 | &fs_info->generic_worker); | ||
| 1765 | 1751 | ||
| 1766 | /* a higher idle thresh on the submit workers makes it much more | 1752 | /* a higher idle thresh on the submit workers makes it much more |
| 1767 | * likely that bios will be send down in a sane order to the | 1753 | * likely that bios will be send down in a sane order to the |
| @@ -1809,7 +1795,6 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1809 | btrfs_start_workers(&fs_info->endio_meta_workers, 1); | 1795 | btrfs_start_workers(&fs_info->endio_meta_workers, 1); |
| 1810 | btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); | 1796 | btrfs_start_workers(&fs_info->endio_meta_write_workers, 1); |
| 1811 | btrfs_start_workers(&fs_info->endio_write_workers, 1); | 1797 | btrfs_start_workers(&fs_info->endio_write_workers, 1); |
| 1812 | btrfs_start_workers(&fs_info->enospc_workers, 1); | ||
| 1813 | 1798 | ||
| 1814 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); | 1799 | fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super); |
| 1815 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, | 1800 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, |
| @@ -1912,17 +1897,18 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1912 | 1897 | ||
| 1913 | csum_root->track_dirty = 1; | 1898 | csum_root->track_dirty = 1; |
| 1914 | 1899 | ||
| 1900 | fs_info->generation = generation; | ||
| 1901 | fs_info->last_trans_committed = generation; | ||
| 1902 | fs_info->data_alloc_profile = (u64)-1; | ||
| 1903 | fs_info->metadata_alloc_profile = (u64)-1; | ||
| 1904 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; | ||
| 1905 | |||
| 1915 | ret = btrfs_read_block_groups(extent_root); | 1906 | ret = btrfs_read_block_groups(extent_root); |
| 1916 | if (ret) { | 1907 | if (ret) { |
| 1917 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); | 1908 | printk(KERN_ERR "Failed to read block groups: %d\n", ret); |
| 1918 | goto fail_block_groups; | 1909 | goto fail_block_groups; |
| 1919 | } | 1910 | } |
| 1920 | 1911 | ||
| 1921 | fs_info->generation = generation; | ||
| 1922 | fs_info->last_trans_committed = generation; | ||
| 1923 | fs_info->data_alloc_profile = (u64)-1; | ||
| 1924 | fs_info->metadata_alloc_profile = (u64)-1; | ||
| 1925 | fs_info->system_alloc_profile = fs_info->metadata_alloc_profile; | ||
| 1926 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, | 1912 | fs_info->cleaner_kthread = kthread_run(cleaner_kthread, tree_root, |
| 1927 | "btrfs-cleaner"); | 1913 | "btrfs-cleaner"); |
| 1928 | if (IS_ERR(fs_info->cleaner_kthread)) | 1914 | if (IS_ERR(fs_info->cleaner_kthread)) |
| @@ -1977,6 +1963,9 @@ struct btrfs_root *open_ctree(struct super_block *sb, | |||
| 1977 | BUG_ON(ret); | 1963 | BUG_ON(ret); |
| 1978 | 1964 | ||
| 1979 | if (!(sb->s_flags & MS_RDONLY)) { | 1965 | if (!(sb->s_flags & MS_RDONLY)) { |
| 1966 | ret = btrfs_cleanup_fs_roots(fs_info); | ||
| 1967 | BUG_ON(ret); | ||
| 1968 | |||
| 1980 | ret = btrfs_recover_relocation(tree_root); | 1969 | ret = btrfs_recover_relocation(tree_root); |
| 1981 | if (ret < 0) { | 1970 | if (ret < 0) { |
| 1982 | printk(KERN_WARNING | 1971 | printk(KERN_WARNING |
| @@ -2040,7 +2029,6 @@ fail_sb_buffer: | |||
| 2040 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2029 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); |
| 2041 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2030 | btrfs_stop_workers(&fs_info->endio_write_workers); |
| 2042 | btrfs_stop_workers(&fs_info->submit_workers); | 2031 | btrfs_stop_workers(&fs_info->submit_workers); |
| 2043 | btrfs_stop_workers(&fs_info->enospc_workers); | ||
| 2044 | fail_iput: | 2032 | fail_iput: |
| 2045 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); | 2033 | invalidate_inode_pages2(fs_info->btree_inode->i_mapping); |
| 2046 | iput(fs_info->btree_inode); | 2034 | iput(fs_info->btree_inode); |
| @@ -2405,11 +2393,11 @@ int btrfs_commit_super(struct btrfs_root *root) | |||
| 2405 | down_write(&root->fs_info->cleanup_work_sem); | 2393 | down_write(&root->fs_info->cleanup_work_sem); |
| 2406 | up_write(&root->fs_info->cleanup_work_sem); | 2394 | up_write(&root->fs_info->cleanup_work_sem); |
| 2407 | 2395 | ||
| 2408 | trans = btrfs_start_transaction(root, 1); | 2396 | trans = btrfs_join_transaction(root, 1); |
| 2409 | ret = btrfs_commit_transaction(trans, root); | 2397 | ret = btrfs_commit_transaction(trans, root); |
| 2410 | BUG_ON(ret); | 2398 | BUG_ON(ret); |
| 2411 | /* run commit again to drop the original snapshot */ | 2399 | /* run commit again to drop the original snapshot */ |
| 2412 | trans = btrfs_start_transaction(root, 1); | 2400 | trans = btrfs_join_transaction(root, 1); |
| 2413 | btrfs_commit_transaction(trans, root); | 2401 | btrfs_commit_transaction(trans, root); |
| 2414 | ret = btrfs_write_and_wait_transaction(NULL, root); | 2402 | ret = btrfs_write_and_wait_transaction(NULL, root); |
| 2415 | BUG_ON(ret); | 2403 | BUG_ON(ret); |
| @@ -2426,15 +2414,15 @@ int close_ctree(struct btrfs_root *root) | |||
| 2426 | fs_info->closing = 1; | 2414 | fs_info->closing = 1; |
| 2427 | smp_mb(); | 2415 | smp_mb(); |
| 2428 | 2416 | ||
| 2429 | kthread_stop(root->fs_info->transaction_kthread); | ||
| 2430 | kthread_stop(root->fs_info->cleaner_kthread); | ||
| 2431 | |||
| 2432 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { | 2417 | if (!(fs_info->sb->s_flags & MS_RDONLY)) { |
| 2433 | ret = btrfs_commit_super(root); | 2418 | ret = btrfs_commit_super(root); |
| 2434 | if (ret) | 2419 | if (ret) |
| 2435 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); | 2420 | printk(KERN_ERR "btrfs: commit super ret %d\n", ret); |
| 2436 | } | 2421 | } |
| 2437 | 2422 | ||
| 2423 | kthread_stop(root->fs_info->transaction_kthread); | ||
| 2424 | kthread_stop(root->fs_info->cleaner_kthread); | ||
| 2425 | |||
| 2438 | fs_info->closing = 2; | 2426 | fs_info->closing = 2; |
| 2439 | smp_mb(); | 2427 | smp_mb(); |
| 2440 | 2428 | ||
| @@ -2473,7 +2461,6 @@ int close_ctree(struct btrfs_root *root) | |||
| 2473 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); | 2461 | btrfs_stop_workers(&fs_info->endio_meta_write_workers); |
| 2474 | btrfs_stop_workers(&fs_info->endio_write_workers); | 2462 | btrfs_stop_workers(&fs_info->endio_write_workers); |
| 2475 | btrfs_stop_workers(&fs_info->submit_workers); | 2463 | btrfs_stop_workers(&fs_info->submit_workers); |
| 2476 | btrfs_stop_workers(&fs_info->enospc_workers); | ||
| 2477 | 2464 | ||
| 2478 | btrfs_close_devices(fs_info->fs_devices); | 2465 | btrfs_close_devices(fs_info->fs_devices); |
| 2479 | btrfs_mapping_tree_free(&fs_info->mapping_tree); | 2466 | btrfs_mapping_tree_free(&fs_info->mapping_tree); |
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index c958ecbc1916..88e825a0bf21 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h | |||
| @@ -87,7 +87,7 @@ int btrfs_bio_wq_end_io(struct btrfs_fs_info *info, struct bio *bio, | |||
| 87 | int metadata); | 87 | int metadata); |
| 88 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | 88 | int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, |
| 89 | int rw, struct bio *bio, int mirror_num, | 89 | int rw, struct bio *bio, int mirror_num, |
| 90 | unsigned long bio_flags, | 90 | unsigned long bio_flags, u64 bio_offset, |
| 91 | extent_submit_bio_hook_t *submit_bio_start, | 91 | extent_submit_bio_hook_t *submit_bio_start, |
| 92 | extent_submit_bio_hook_t *submit_bio_done); | 92 | extent_submit_bio_hook_t *submit_bio_done); |
| 93 | 93 | ||
| @@ -95,8 +95,6 @@ int btrfs_congested_async(struct btrfs_fs_info *info, int iodone); | |||
| 95 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); | 95 | unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info); |
| 96 | int btrfs_write_tree_block(struct extent_buffer *buf); | 96 | int btrfs_write_tree_block(struct extent_buffer *buf); |
| 97 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf); | 97 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf); |
| 98 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | ||
| 99 | struct btrfs_fs_info *fs_info); | ||
| 100 | int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, | 98 | int btrfs_init_log_root_tree(struct btrfs_trans_handle *trans, |
| 101 | struct btrfs_fs_info *fs_info); | 99 | struct btrfs_fs_info *fs_info); |
| 102 | int btrfs_add_log_tree(struct btrfs_trans_handle *trans, | 100 | int btrfs_add_log_tree(struct btrfs_trans_handle *trans, |
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index c6a4f459ad76..b9080d71991a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
| @@ -35,10 +35,9 @@ | |||
| 35 | 35 | ||
| 36 | static int update_block_group(struct btrfs_trans_handle *trans, | 36 | static int update_block_group(struct btrfs_trans_handle *trans, |
| 37 | struct btrfs_root *root, | 37 | struct btrfs_root *root, |
| 38 | u64 bytenr, u64 num_bytes, int alloc, | 38 | u64 bytenr, u64 num_bytes, int alloc); |
| 39 | int mark_free); | 39 | static int update_reserved_bytes(struct btrfs_block_group_cache *cache, |
| 40 | static int update_reserved_extents(struct btrfs_block_group_cache *cache, | 40 | u64 num_bytes, int reserve, int sinfo); |
| 41 | u64 num_bytes, int reserve); | ||
| 42 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | 41 | static int __btrfs_free_extent(struct btrfs_trans_handle *trans, |
| 43 | struct btrfs_root *root, | 42 | struct btrfs_root *root, |
| 44 | u64 bytenr, u64 num_bytes, u64 parent, | 43 | u64 bytenr, u64 num_bytes, u64 parent, |
| @@ -61,12 +60,6 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
| 61 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 60 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
| 62 | struct btrfs_root *extent_root, u64 alloc_bytes, | 61 | struct btrfs_root *extent_root, u64 alloc_bytes, |
| 63 | u64 flags, int force); | 62 | u64 flags, int force); |
| 64 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | ||
| 65 | struct btrfs_root *root, | ||
| 66 | struct btrfs_path *path, | ||
| 67 | u64 bytenr, u64 num_bytes, | ||
| 68 | int is_data, int reserved, | ||
| 69 | struct extent_buffer **must_clean); | ||
| 70 | static int find_next_key(struct btrfs_path *path, int level, | 63 | static int find_next_key(struct btrfs_path *path, int level, |
| 71 | struct btrfs_key *key); | 64 | struct btrfs_key *key); |
| 72 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | 65 | static void dump_space_info(struct btrfs_space_info *info, u64 bytes, |
| @@ -91,8 +84,12 @@ void btrfs_get_block_group(struct btrfs_block_group_cache *cache) | |||
| 91 | 84 | ||
| 92 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache) | 85 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache) |
| 93 | { | 86 | { |
| 94 | if (atomic_dec_and_test(&cache->count)) | 87 | if (atomic_dec_and_test(&cache->count)) { |
| 88 | WARN_ON(cache->pinned > 0); | ||
| 89 | WARN_ON(cache->reserved > 0); | ||
| 90 | WARN_ON(cache->reserved_pinned > 0); | ||
| 95 | kfree(cache); | 91 | kfree(cache); |
| 92 | } | ||
| 96 | } | 93 | } |
| 97 | 94 | ||
| 98 | /* | 95 | /* |
| @@ -319,7 +316,7 @@ static int caching_kthread(void *data) | |||
| 319 | 316 | ||
| 320 | exclude_super_stripes(extent_root, block_group); | 317 | exclude_super_stripes(extent_root, block_group); |
| 321 | spin_lock(&block_group->space_info->lock); | 318 | spin_lock(&block_group->space_info->lock); |
| 322 | block_group->space_info->bytes_super += block_group->bytes_super; | 319 | block_group->space_info->bytes_readonly += block_group->bytes_super; |
| 323 | spin_unlock(&block_group->space_info->lock); | 320 | spin_unlock(&block_group->space_info->lock); |
| 324 | 321 | ||
| 325 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); | 322 | last = max_t(u64, block_group->key.objectid, BTRFS_SUPER_INFO_OFFSET); |
| @@ -507,6 +504,9 @@ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info, | |||
| 507 | struct list_head *head = &info->space_info; | 504 | struct list_head *head = &info->space_info; |
| 508 | struct btrfs_space_info *found; | 505 | struct btrfs_space_info *found; |
| 509 | 506 | ||
| 507 | flags &= BTRFS_BLOCK_GROUP_DATA | BTRFS_BLOCK_GROUP_SYSTEM | | ||
| 508 | BTRFS_BLOCK_GROUP_METADATA; | ||
| 509 | |||
| 510 | rcu_read_lock(); | 510 | rcu_read_lock(); |
| 511 | list_for_each_entry_rcu(found, head, list) { | 511 | list_for_each_entry_rcu(found, head, list) { |
| 512 | if (found->flags == flags) { | 512 | if (found->flags == flags) { |
| @@ -610,6 +610,113 @@ int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len) | |||
| 610 | } | 610 | } |
| 611 | 611 | ||
| 612 | /* | 612 | /* |
| 613 | * helper function to lookup reference count and flags of extent. | ||
| 614 | * | ||
| 615 | * the head node for delayed ref is used to store the sum of all the | ||
| 616 | * reference count modifications queued up in the rbtree. the head | ||
| 617 | * node may also store the extent flags to set. This way you can check | ||
| 618 | * to see what the reference count and extent flags would be if all of | ||
| 619 | * the delayed refs are not processed. | ||
| 620 | */ | ||
| 621 | int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans, | ||
| 622 | struct btrfs_root *root, u64 bytenr, | ||
| 623 | u64 num_bytes, u64 *refs, u64 *flags) | ||
| 624 | { | ||
| 625 | struct btrfs_delayed_ref_head *head; | ||
| 626 | struct btrfs_delayed_ref_root *delayed_refs; | ||
| 627 | struct btrfs_path *path; | ||
| 628 | struct btrfs_extent_item *ei; | ||
| 629 | struct extent_buffer *leaf; | ||
| 630 | struct btrfs_key key; | ||
| 631 | u32 item_size; | ||
| 632 | u64 num_refs; | ||
| 633 | u64 extent_flags; | ||
| 634 | int ret; | ||
| 635 | |||
| 636 | path = btrfs_alloc_path(); | ||
| 637 | if (!path) | ||
| 638 | return -ENOMEM; | ||
| 639 | |||
| 640 | key.objectid = bytenr; | ||
| 641 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
| 642 | key.offset = num_bytes; | ||
| 643 | if (!trans) { | ||
| 644 | path->skip_locking = 1; | ||
| 645 | path->search_commit_root = 1; | ||
| 646 | } | ||
| 647 | again: | ||
| 648 | ret = btrfs_search_slot(trans, root->fs_info->extent_root, | ||
| 649 | &key, path, 0, 0); | ||
| 650 | if (ret < 0) | ||
| 651 | goto out_free; | ||
| 652 | |||
| 653 | if (ret == 0) { | ||
| 654 | leaf = path->nodes[0]; | ||
| 655 | item_size = btrfs_item_size_nr(leaf, path->slots[0]); | ||
| 656 | if (item_size >= sizeof(*ei)) { | ||
| 657 | ei = btrfs_item_ptr(leaf, path->slots[0], | ||
| 658 | struct btrfs_extent_item); | ||
| 659 | num_refs = btrfs_extent_refs(leaf, ei); | ||
| 660 | extent_flags = btrfs_extent_flags(leaf, ei); | ||
| 661 | } else { | ||
| 662 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | ||
| 663 | struct btrfs_extent_item_v0 *ei0; | ||
| 664 | BUG_ON(item_size != sizeof(*ei0)); | ||
| 665 | ei0 = btrfs_item_ptr(leaf, path->slots[0], | ||
| 666 | struct btrfs_extent_item_v0); | ||
| 667 | num_refs = btrfs_extent_refs_v0(leaf, ei0); | ||
| 668 | /* FIXME: this isn't correct for data */ | ||
| 669 | extent_flags = BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
| 670 | #else | ||
| 671 | BUG(); | ||
| 672 | #endif | ||
| 673 | } | ||
| 674 | BUG_ON(num_refs == 0); | ||
| 675 | } else { | ||
| 676 | num_refs = 0; | ||
| 677 | extent_flags = 0; | ||
| 678 | ret = 0; | ||
| 679 | } | ||
| 680 | |||
| 681 | if (!trans) | ||
| 682 | goto out; | ||
| 683 | |||
| 684 | delayed_refs = &trans->transaction->delayed_refs; | ||
| 685 | spin_lock(&delayed_refs->lock); | ||
| 686 | head = btrfs_find_delayed_ref_head(trans, bytenr); | ||
| 687 | if (head) { | ||
| 688 | if (!mutex_trylock(&head->mutex)) { | ||
| 689 | atomic_inc(&head->node.refs); | ||
| 690 | spin_unlock(&delayed_refs->lock); | ||
| 691 | |||
| 692 | btrfs_release_path(root->fs_info->extent_root, path); | ||
| 693 | |||
| 694 | mutex_lock(&head->mutex); | ||
| 695 | mutex_unlock(&head->mutex); | ||
| 696 | btrfs_put_delayed_ref(&head->node); | ||
| 697 | goto again; | ||
| 698 | } | ||
| 699 | if (head->extent_op && head->extent_op->update_flags) | ||
| 700 | extent_flags |= head->extent_op->flags_to_set; | ||
| 701 | else | ||
| 702 | BUG_ON(num_refs == 0); | ||
| 703 | |||
| 704 | num_refs += head->node.ref_mod; | ||
| 705 | mutex_unlock(&head->mutex); | ||
| 706 | } | ||
| 707 | spin_unlock(&delayed_refs->lock); | ||
| 708 | out: | ||
| 709 | WARN_ON(num_refs == 0); | ||
| 710 | if (refs) | ||
| 711 | *refs = num_refs; | ||
| 712 | if (flags) | ||
| 713 | *flags = extent_flags; | ||
| 714 | out_free: | ||
| 715 | btrfs_free_path(path); | ||
| 716 | return ret; | ||
| 717 | } | ||
| 718 | |||
| 719 | /* | ||
| 613 | * Back reference rules. Back refs have three main goals: | 720 | * Back reference rules. Back refs have three main goals: |
| 614 | * | 721 | * |
| 615 | * 1) differentiate between all holders of references to an extent so that | 722 | * 1) differentiate between all holders of references to an extent so that |
| @@ -1871,7 +1978,6 @@ static int run_delayed_tree_ref(struct btrfs_trans_handle *trans, | |||
| 1871 | return ret; | 1978 | return ret; |
| 1872 | } | 1979 | } |
| 1873 | 1980 | ||
| 1874 | |||
| 1875 | /* helper function to actually process a single delayed ref entry */ | 1981 | /* helper function to actually process a single delayed ref entry */ |
| 1876 | static int run_one_delayed_ref(struct btrfs_trans_handle *trans, | 1982 | static int run_one_delayed_ref(struct btrfs_trans_handle *trans, |
| 1877 | struct btrfs_root *root, | 1983 | struct btrfs_root *root, |
| @@ -1891,32 +1997,14 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans, | |||
| 1891 | BUG_ON(extent_op); | 1997 | BUG_ON(extent_op); |
| 1892 | head = btrfs_delayed_node_to_head(node); | 1998 | head = btrfs_delayed_node_to_head(node); |
| 1893 | if (insert_reserved) { | 1999 | if (insert_reserved) { |
| 1894 | int mark_free = 0; | 2000 | btrfs_pin_extent(root, node->bytenr, |
| 1895 | struct extent_buffer *must_clean = NULL; | 2001 | node->num_bytes, 1); |
| 1896 | |||
| 1897 | ret = pin_down_bytes(trans, root, NULL, | ||
| 1898 | node->bytenr, node->num_bytes, | ||
| 1899 | head->is_data, 1, &must_clean); | ||
| 1900 | if (ret > 0) | ||
| 1901 | mark_free = 1; | ||
| 1902 | |||
| 1903 | if (must_clean) { | ||
| 1904 | clean_tree_block(NULL, root, must_clean); | ||
| 1905 | btrfs_tree_unlock(must_clean); | ||
| 1906 | free_extent_buffer(must_clean); | ||
| 1907 | } | ||
| 1908 | if (head->is_data) { | 2002 | if (head->is_data) { |
| 1909 | ret = btrfs_del_csums(trans, root, | 2003 | ret = btrfs_del_csums(trans, root, |
| 1910 | node->bytenr, | 2004 | node->bytenr, |
| 1911 | node->num_bytes); | 2005 | node->num_bytes); |
| 1912 | BUG_ON(ret); | 2006 | BUG_ON(ret); |
| 1913 | } | 2007 | } |
| 1914 | if (mark_free) { | ||
| 1915 | ret = btrfs_free_reserved_extent(root, | ||
| 1916 | node->bytenr, | ||
| 1917 | node->num_bytes); | ||
| 1918 | BUG_ON(ret); | ||
| 1919 | } | ||
| 1920 | } | 2008 | } |
| 1921 | mutex_unlock(&head->mutex); | 2009 | mutex_unlock(&head->mutex); |
| 1922 | return 0; | 2010 | return 0; |
| @@ -2347,6 +2435,8 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, | |||
| 2347 | ret = 0; | 2435 | ret = 0; |
| 2348 | out: | 2436 | out: |
| 2349 | btrfs_free_path(path); | 2437 | btrfs_free_path(path); |
| 2438 | if (root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID) | ||
| 2439 | WARN_ON(ret > 0); | ||
| 2350 | return ret; | 2440 | return ret; |
| 2351 | } | 2441 | } |
| 2352 | 2442 | ||
| @@ -2660,12 +2750,21 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
| 2660 | struct btrfs_space_info **space_info) | 2750 | struct btrfs_space_info **space_info) |
| 2661 | { | 2751 | { |
| 2662 | struct btrfs_space_info *found; | 2752 | struct btrfs_space_info *found; |
| 2753 | int i; | ||
| 2754 | int factor; | ||
| 2755 | |||
| 2756 | if (flags & (BTRFS_BLOCK_GROUP_DUP | BTRFS_BLOCK_GROUP_RAID1 | | ||
| 2757 | BTRFS_BLOCK_GROUP_RAID10)) | ||
| 2758 | factor = 2; | ||
| 2759 | else | ||
| 2760 | factor = 1; | ||
| 2663 | 2761 | ||
| 2664 | found = __find_space_info(info, flags); | 2762 | found = __find_space_info(info, flags); |
| 2665 | if (found) { | 2763 | if (found) { |
| 2666 | spin_lock(&found->lock); | 2764 | spin_lock(&found->lock); |
| 2667 | found->total_bytes += total_bytes; | 2765 | found->total_bytes += total_bytes; |
| 2668 | found->bytes_used += bytes_used; | 2766 | found->bytes_used += bytes_used; |
| 2767 | found->disk_used += bytes_used * factor; | ||
| 2669 | found->full = 0; | 2768 | found->full = 0; |
| 2670 | spin_unlock(&found->lock); | 2769 | spin_unlock(&found->lock); |
| 2671 | *space_info = found; | 2770 | *space_info = found; |
| @@ -2675,18 +2774,20 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags, | |||
| 2675 | if (!found) | 2774 | if (!found) |
| 2676 | return -ENOMEM; | 2775 | return -ENOMEM; |
| 2677 | 2776 | ||
| 2678 | INIT_LIST_HEAD(&found->block_groups); | 2777 | for (i = 0; i < BTRFS_NR_RAID_TYPES; i++) |
| 2778 | INIT_LIST_HEAD(&found->block_groups[i]); | ||
| 2679 | init_rwsem(&found->groups_sem); | 2779 | init_rwsem(&found->groups_sem); |
| 2680 | init_waitqueue_head(&found->flush_wait); | ||
| 2681 | init_waitqueue_head(&found->allocate_wait); | ||
| 2682 | spin_lock_init(&found->lock); | 2780 | spin_lock_init(&found->lock); |
| 2683 | found->flags = flags; | 2781 | found->flags = flags & (BTRFS_BLOCK_GROUP_DATA | |
| 2782 | BTRFS_BLOCK_GROUP_SYSTEM | | ||
| 2783 | BTRFS_BLOCK_GROUP_METADATA); | ||
| 2684 | found->total_bytes = total_bytes; | 2784 | found->total_bytes = total_bytes; |
| 2685 | found->bytes_used = bytes_used; | 2785 | found->bytes_used = bytes_used; |
| 2786 | found->disk_used = bytes_used * factor; | ||
| 2686 | found->bytes_pinned = 0; | 2787 | found->bytes_pinned = 0; |
| 2687 | found->bytes_reserved = 0; | 2788 | found->bytes_reserved = 0; |
| 2688 | found->bytes_readonly = 0; | 2789 | found->bytes_readonly = 0; |
| 2689 | found->bytes_delalloc = 0; | 2790 | found->bytes_may_use = 0; |
| 2690 | found->full = 0; | 2791 | found->full = 0; |
| 2691 | found->force_alloc = 0; | 2792 | found->force_alloc = 0; |
| 2692 | *space_info = found; | 2793 | *space_info = found; |
| @@ -2711,19 +2812,6 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) | |||
| 2711 | } | 2812 | } |
| 2712 | } | 2813 | } |
| 2713 | 2814 | ||
| 2714 | static void set_block_group_readonly(struct btrfs_block_group_cache *cache) | ||
| 2715 | { | ||
| 2716 | spin_lock(&cache->space_info->lock); | ||
| 2717 | spin_lock(&cache->lock); | ||
| 2718 | if (!cache->ro) { | ||
| 2719 | cache->space_info->bytes_readonly += cache->key.offset - | ||
| 2720 | btrfs_block_group_used(&cache->item); | ||
| 2721 | cache->ro = 1; | ||
| 2722 | } | ||
| 2723 | spin_unlock(&cache->lock); | ||
| 2724 | spin_unlock(&cache->space_info->lock); | ||
| 2725 | } | ||
| 2726 | |||
| 2727 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | 2815 | u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) |
| 2728 | { | 2816 | { |
| 2729 | u64 num_devices = root->fs_info->fs_devices->rw_devices; | 2817 | u64 num_devices = root->fs_info->fs_devices->rw_devices; |
| @@ -2752,491 +2840,50 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags) | |||
| 2752 | return flags; | 2840 | return flags; |
| 2753 | } | 2841 | } |
| 2754 | 2842 | ||
| 2755 | static u64 btrfs_get_alloc_profile(struct btrfs_root *root, u64 data) | 2843 | static u64 get_alloc_profile(struct btrfs_root *root, u64 flags) |
| 2756 | { | ||
| 2757 | struct btrfs_fs_info *info = root->fs_info; | ||
| 2758 | u64 alloc_profile; | ||
| 2759 | |||
| 2760 | if (data) { | ||
| 2761 | alloc_profile = info->avail_data_alloc_bits & | ||
| 2762 | info->data_alloc_profile; | ||
| 2763 | data = BTRFS_BLOCK_GROUP_DATA | alloc_profile; | ||
| 2764 | } else if (root == root->fs_info->chunk_root) { | ||
| 2765 | alloc_profile = info->avail_system_alloc_bits & | ||
| 2766 | info->system_alloc_profile; | ||
| 2767 | data = BTRFS_BLOCK_GROUP_SYSTEM | alloc_profile; | ||
| 2768 | } else { | ||
| 2769 | alloc_profile = info->avail_metadata_alloc_bits & | ||
| 2770 | info->metadata_alloc_profile; | ||
| 2771 | data = BTRFS_BLOCK_GROUP_METADATA | alloc_profile; | ||
| 2772 | } | ||
| 2773 | |||
| 2774 | return btrfs_reduce_alloc_profile(root, data); | ||
| 2775 | } | ||
| 2776 | |||
| 2777 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) | ||
| 2778 | { | ||
| 2779 | u64 alloc_target; | ||
| 2780 | |||
| 2781 | alloc_target = btrfs_get_alloc_profile(root, 1); | ||
| 2782 | BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, | ||
| 2783 | alloc_target); | ||
| 2784 | } | ||
| 2785 | |||
| 2786 | static u64 calculate_bytes_needed(struct btrfs_root *root, int num_items) | ||
| 2787 | { | ||
| 2788 | u64 num_bytes; | ||
| 2789 | int level; | ||
| 2790 | |||
| 2791 | level = BTRFS_MAX_LEVEL - 2; | ||
| 2792 | /* | ||
| 2793 | * NOTE: these calculations are absolutely the worst possible case. | ||
| 2794 | * This assumes that _every_ item we insert will require a new leaf, and | ||
| 2795 | * that the tree has grown to its maximum level size. | ||
| 2796 | */ | ||
| 2797 | |||
| 2798 | /* | ||
| 2799 | * for every item we insert we could insert both an extent item and a | ||
| 2800 | * extent ref item. Then for ever item we insert, we will need to cow | ||
| 2801 | * both the original leaf, plus the leaf to the left and right of it. | ||
| 2802 | * | ||
| 2803 | * Unless we are talking about the extent root, then we just want the | ||
| 2804 | * number of items * 2, since we just need the extent item plus its ref. | ||
| 2805 | */ | ||
| 2806 | if (root == root->fs_info->extent_root) | ||
| 2807 | num_bytes = num_items * 2; | ||
| 2808 | else | ||
| 2809 | num_bytes = (num_items + (2 * num_items)) * 3; | ||
| 2810 | |||
| 2811 | /* | ||
| 2812 | * num_bytes is total number of leaves we could need times the leaf | ||
| 2813 | * size, and then for every leaf we could end up cow'ing 2 nodes per | ||
| 2814 | * level, down to the leaf level. | ||
| 2815 | */ | ||
| 2816 | num_bytes = (num_bytes * root->leafsize) + | ||
| 2817 | (num_bytes * (level * 2)) * root->nodesize; | ||
| 2818 | |||
| 2819 | return num_bytes; | ||
| 2820 | } | ||
| 2821 | |||
| 2822 | /* | ||
| 2823 | * Unreserve metadata space for delalloc. If we have less reserved credits than | ||
| 2824 | * we have extents, this function does nothing. | ||
| 2825 | */ | ||
| 2826 | int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, | ||
| 2827 | struct inode *inode, int num_items) | ||
| 2828 | { | ||
| 2829 | struct btrfs_fs_info *info = root->fs_info; | ||
| 2830 | struct btrfs_space_info *meta_sinfo; | ||
| 2831 | u64 num_bytes; | ||
| 2832 | u64 alloc_target; | ||
| 2833 | bool bug = false; | ||
| 2834 | |||
| 2835 | /* get the space info for where the metadata will live */ | ||
| 2836 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
| 2837 | meta_sinfo = __find_space_info(info, alloc_target); | ||
| 2838 | |||
| 2839 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, | ||
| 2840 | num_items); | ||
| 2841 | |||
| 2842 | spin_lock(&meta_sinfo->lock); | ||
| 2843 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
| 2844 | if (BTRFS_I(inode)->reserved_extents <= | ||
| 2845 | BTRFS_I(inode)->outstanding_extents) { | ||
| 2846 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 2847 | spin_unlock(&meta_sinfo->lock); | ||
| 2848 | return 0; | ||
| 2849 | } | ||
| 2850 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 2851 | |||
| 2852 | BTRFS_I(inode)->reserved_extents -= num_items; | ||
| 2853 | BUG_ON(BTRFS_I(inode)->reserved_extents < 0); | ||
| 2854 | |||
| 2855 | if (meta_sinfo->bytes_delalloc < num_bytes) { | ||
| 2856 | bug = true; | ||
| 2857 | meta_sinfo->bytes_delalloc = 0; | ||
| 2858 | } else { | ||
| 2859 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
| 2860 | } | ||
| 2861 | spin_unlock(&meta_sinfo->lock); | ||
| 2862 | |||
| 2863 | BUG_ON(bug); | ||
| 2864 | |||
| 2865 | return 0; | ||
| 2866 | } | ||
| 2867 | |||
| 2868 | static void check_force_delalloc(struct btrfs_space_info *meta_sinfo) | ||
| 2869 | { | 2844 | { |
| 2870 | u64 thresh; | 2845 | if (flags & BTRFS_BLOCK_GROUP_DATA) |
| 2871 | 2846 | flags |= root->fs_info->avail_data_alloc_bits & | |
| 2872 | thresh = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | 2847 | root->fs_info->data_alloc_profile; |
| 2873 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | 2848 | else if (flags & BTRFS_BLOCK_GROUP_SYSTEM) |
| 2874 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | 2849 | flags |= root->fs_info->avail_system_alloc_bits & |
| 2875 | meta_sinfo->bytes_may_use; | 2850 | root->fs_info->system_alloc_profile; |
| 2876 | 2851 | else if (flags & BTRFS_BLOCK_GROUP_METADATA) | |
| 2877 | thresh = meta_sinfo->total_bytes - thresh; | 2852 | flags |= root->fs_info->avail_metadata_alloc_bits & |
| 2878 | thresh *= 80; | 2853 | root->fs_info->metadata_alloc_profile; |
| 2879 | do_div(thresh, 100); | 2854 | return btrfs_reduce_alloc_profile(root, flags); |
| 2880 | if (thresh <= meta_sinfo->bytes_delalloc) | ||
| 2881 | meta_sinfo->force_delalloc = 1; | ||
| 2882 | else | ||
| 2883 | meta_sinfo->force_delalloc = 0; | ||
| 2884 | } | 2855 | } |
| 2885 | 2856 | ||
| 2886 | struct async_flush { | 2857 | static u64 btrfs_get_alloc_profile(struct btrfs_root *root, int data) |
| 2887 | struct btrfs_root *root; | ||
| 2888 | struct btrfs_space_info *info; | ||
| 2889 | struct btrfs_work work; | ||
| 2890 | }; | ||
| 2891 | |||
| 2892 | static noinline void flush_delalloc_async(struct btrfs_work *work) | ||
| 2893 | { | 2858 | { |
| 2894 | struct async_flush *async; | 2859 | u64 flags; |
| 2895 | struct btrfs_root *root; | ||
| 2896 | struct btrfs_space_info *info; | ||
| 2897 | |||
| 2898 | async = container_of(work, struct async_flush, work); | ||
| 2899 | root = async->root; | ||
| 2900 | info = async->info; | ||
| 2901 | |||
| 2902 | btrfs_start_delalloc_inodes(root, 0); | ||
| 2903 | wake_up(&info->flush_wait); | ||
| 2904 | btrfs_wait_ordered_extents(root, 0, 0); | ||
| 2905 | |||
| 2906 | spin_lock(&info->lock); | ||
| 2907 | info->flushing = 0; | ||
| 2908 | spin_unlock(&info->lock); | ||
| 2909 | wake_up(&info->flush_wait); | ||
| 2910 | |||
| 2911 | kfree(async); | ||
| 2912 | } | ||
| 2913 | |||
| 2914 | static void wait_on_flush(struct btrfs_space_info *info) | ||
| 2915 | { | ||
| 2916 | DEFINE_WAIT(wait); | ||
| 2917 | u64 used; | ||
| 2918 | |||
| 2919 | while (1) { | ||
| 2920 | prepare_to_wait(&info->flush_wait, &wait, | ||
| 2921 | TASK_UNINTERRUPTIBLE); | ||
| 2922 | spin_lock(&info->lock); | ||
| 2923 | if (!info->flushing) { | ||
| 2924 | spin_unlock(&info->lock); | ||
| 2925 | break; | ||
| 2926 | } | ||
| 2927 | |||
| 2928 | used = info->bytes_used + info->bytes_reserved + | ||
| 2929 | info->bytes_pinned + info->bytes_readonly + | ||
| 2930 | info->bytes_super + info->bytes_root + | ||
| 2931 | info->bytes_may_use + info->bytes_delalloc; | ||
| 2932 | if (used < info->total_bytes) { | ||
| 2933 | spin_unlock(&info->lock); | ||
| 2934 | break; | ||
| 2935 | } | ||
| 2936 | spin_unlock(&info->lock); | ||
| 2937 | schedule(); | ||
| 2938 | } | ||
| 2939 | finish_wait(&info->flush_wait, &wait); | ||
| 2940 | } | ||
| 2941 | |||
| 2942 | static void flush_delalloc(struct btrfs_root *root, | ||
| 2943 | struct btrfs_space_info *info) | ||
| 2944 | { | ||
| 2945 | struct async_flush *async; | ||
| 2946 | bool wait = false; | ||
| 2947 | |||
| 2948 | spin_lock(&info->lock); | ||
| 2949 | 2860 | ||
| 2950 | if (!info->flushing) | 2861 | if (data) |
| 2951 | info->flushing = 1; | 2862 | flags = BTRFS_BLOCK_GROUP_DATA; |
| 2863 | else if (root == root->fs_info->chunk_root) | ||
| 2864 | flags = BTRFS_BLOCK_GROUP_SYSTEM; | ||
| 2952 | else | 2865 | else |
| 2953 | wait = true; | 2866 | flags = BTRFS_BLOCK_GROUP_METADATA; |
| 2954 | |||
| 2955 | spin_unlock(&info->lock); | ||
| 2956 | |||
| 2957 | if (wait) { | ||
| 2958 | wait_on_flush(info); | ||
| 2959 | return; | ||
| 2960 | } | ||
| 2961 | |||
| 2962 | async = kzalloc(sizeof(*async), GFP_NOFS); | ||
| 2963 | if (!async) | ||
| 2964 | goto flush; | ||
| 2965 | |||
| 2966 | async->root = root; | ||
| 2967 | async->info = info; | ||
| 2968 | async->work.func = flush_delalloc_async; | ||
| 2969 | 2867 | ||
| 2970 | btrfs_queue_worker(&root->fs_info->enospc_workers, | 2868 | return get_alloc_profile(root, flags); |
| 2971 | &async->work); | ||
| 2972 | wait_on_flush(info); | ||
| 2973 | return; | ||
| 2974 | |||
| 2975 | flush: | ||
| 2976 | btrfs_start_delalloc_inodes(root, 0); | ||
| 2977 | btrfs_wait_ordered_extents(root, 0, 0); | ||
| 2978 | |||
| 2979 | spin_lock(&info->lock); | ||
| 2980 | info->flushing = 0; | ||
| 2981 | spin_unlock(&info->lock); | ||
| 2982 | wake_up(&info->flush_wait); | ||
| 2983 | } | 2869 | } |
| 2984 | 2870 | ||
| 2985 | static int maybe_allocate_chunk(struct btrfs_root *root, | 2871 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *inode) |
| 2986 | struct btrfs_space_info *info) | ||
| 2987 | { | ||
| 2988 | struct btrfs_super_block *disk_super = &root->fs_info->super_copy; | ||
| 2989 | struct btrfs_trans_handle *trans; | ||
| 2990 | bool wait = false; | ||
| 2991 | int ret = 0; | ||
| 2992 | u64 min_metadata; | ||
| 2993 | u64 free_space; | ||
| 2994 | |||
| 2995 | free_space = btrfs_super_total_bytes(disk_super); | ||
| 2996 | /* | ||
| 2997 | * we allow the metadata to grow to a max of either 10gb or 5% of the | ||
| 2998 | * space in the volume. | ||
| 2999 | */ | ||
| 3000 | min_metadata = min((u64)10 * 1024 * 1024 * 1024, | ||
| 3001 | div64_u64(free_space * 5, 100)); | ||
| 3002 | if (info->total_bytes >= min_metadata) { | ||
| 3003 | spin_unlock(&info->lock); | ||
| 3004 | return 0; | ||
| 3005 | } | ||
| 3006 | |||
| 3007 | if (info->full) { | ||
| 3008 | spin_unlock(&info->lock); | ||
| 3009 | return 0; | ||
| 3010 | } | ||
| 3011 | |||
| 3012 | if (!info->allocating_chunk) { | ||
| 3013 | info->force_alloc = 1; | ||
| 3014 | info->allocating_chunk = 1; | ||
| 3015 | } else { | ||
| 3016 | wait = true; | ||
| 3017 | } | ||
| 3018 | |||
| 3019 | spin_unlock(&info->lock); | ||
| 3020 | |||
| 3021 | if (wait) { | ||
| 3022 | wait_event(info->allocate_wait, | ||
| 3023 | !info->allocating_chunk); | ||
| 3024 | return 1; | ||
| 3025 | } | ||
| 3026 | |||
| 3027 | trans = btrfs_start_transaction(root, 1); | ||
| 3028 | if (!trans) { | ||
| 3029 | ret = -ENOMEM; | ||
| 3030 | goto out; | ||
| 3031 | } | ||
| 3032 | |||
| 3033 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
| 3034 | 4096 + 2 * 1024 * 1024, | ||
| 3035 | info->flags, 0); | ||
| 3036 | btrfs_end_transaction(trans, root); | ||
| 3037 | if (ret) | ||
| 3038 | goto out; | ||
| 3039 | out: | ||
| 3040 | spin_lock(&info->lock); | ||
| 3041 | info->allocating_chunk = 0; | ||
| 3042 | spin_unlock(&info->lock); | ||
| 3043 | wake_up(&info->allocate_wait); | ||
| 3044 | |||
| 3045 | if (ret) | ||
| 3046 | return 0; | ||
| 3047 | return 1; | ||
| 3048 | } | ||
| 3049 | |||
| 3050 | /* | ||
| 3051 | * Reserve metadata space for delalloc. | ||
| 3052 | */ | ||
| 3053 | int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, | ||
| 3054 | struct inode *inode, int num_items) | ||
| 3055 | { | ||
| 3056 | struct btrfs_fs_info *info = root->fs_info; | ||
| 3057 | struct btrfs_space_info *meta_sinfo; | ||
| 3058 | u64 num_bytes; | ||
| 3059 | u64 used; | ||
| 3060 | u64 alloc_target; | ||
| 3061 | int flushed = 0; | ||
| 3062 | int force_delalloc; | ||
| 3063 | |||
| 3064 | /* get the space info for where the metadata will live */ | ||
| 3065 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
| 3066 | meta_sinfo = __find_space_info(info, alloc_target); | ||
| 3067 | |||
| 3068 | num_bytes = calculate_bytes_needed(root->fs_info->extent_root, | ||
| 3069 | num_items); | ||
| 3070 | again: | ||
| 3071 | spin_lock(&meta_sinfo->lock); | ||
| 3072 | |||
| 3073 | force_delalloc = meta_sinfo->force_delalloc; | ||
| 3074 | |||
| 3075 | if (unlikely(!meta_sinfo->bytes_root)) | ||
| 3076 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
| 3077 | |||
| 3078 | if (!flushed) | ||
| 3079 | meta_sinfo->bytes_delalloc += num_bytes; | ||
| 3080 | |||
| 3081 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
| 3082 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
| 3083 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
| 3084 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
| 3085 | |||
| 3086 | if (used > meta_sinfo->total_bytes) { | ||
| 3087 | flushed++; | ||
| 3088 | |||
| 3089 | if (flushed == 1) { | ||
| 3090 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
| 3091 | goto again; | ||
| 3092 | flushed++; | ||
| 3093 | } else { | ||
| 3094 | spin_unlock(&meta_sinfo->lock); | ||
| 3095 | } | ||
| 3096 | |||
| 3097 | if (flushed == 2) { | ||
| 3098 | filemap_flush(inode->i_mapping); | ||
| 3099 | goto again; | ||
| 3100 | } else if (flushed == 3) { | ||
| 3101 | flush_delalloc(root, meta_sinfo); | ||
| 3102 | goto again; | ||
| 3103 | } | ||
| 3104 | spin_lock(&meta_sinfo->lock); | ||
| 3105 | meta_sinfo->bytes_delalloc -= num_bytes; | ||
| 3106 | spin_unlock(&meta_sinfo->lock); | ||
| 3107 | printk(KERN_ERR "enospc, has %d, reserved %d\n", | ||
| 3108 | BTRFS_I(inode)->outstanding_extents, | ||
| 3109 | BTRFS_I(inode)->reserved_extents); | ||
| 3110 | dump_space_info(meta_sinfo, 0, 0); | ||
| 3111 | return -ENOSPC; | ||
| 3112 | } | ||
| 3113 | |||
| 3114 | BTRFS_I(inode)->reserved_extents += num_items; | ||
| 3115 | check_force_delalloc(meta_sinfo); | ||
| 3116 | spin_unlock(&meta_sinfo->lock); | ||
| 3117 | |||
| 3118 | if (!flushed && force_delalloc) | ||
| 3119 | filemap_flush(inode->i_mapping); | ||
| 3120 | |||
| 3121 | return 0; | ||
| 3122 | } | ||
| 3123 | |||
| 3124 | /* | ||
| 3125 | * unreserve num_items number of items worth of metadata space. This needs to | ||
| 3126 | * be paired with btrfs_reserve_metadata_space. | ||
| 3127 | * | ||
| 3128 | * NOTE: if you have the option, run this _AFTER_ you do a | ||
| 3129 | * btrfs_end_transaction, since btrfs_end_transaction will run delayed ref | ||
| 3130 | * oprations which will result in more used metadata, so we want to make sure we | ||
| 3131 | * can do that without issue. | ||
| 3132 | */ | ||
| 3133 | int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items) | ||
| 3134 | { | ||
| 3135 | struct btrfs_fs_info *info = root->fs_info; | ||
| 3136 | struct btrfs_space_info *meta_sinfo; | ||
| 3137 | u64 num_bytes; | ||
| 3138 | u64 alloc_target; | ||
| 3139 | bool bug = false; | ||
| 3140 | |||
| 3141 | /* get the space info for where the metadata will live */ | ||
| 3142 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
| 3143 | meta_sinfo = __find_space_info(info, alloc_target); | ||
| 3144 | |||
| 3145 | num_bytes = calculate_bytes_needed(root, num_items); | ||
| 3146 | |||
| 3147 | spin_lock(&meta_sinfo->lock); | ||
| 3148 | if (meta_sinfo->bytes_may_use < num_bytes) { | ||
| 3149 | bug = true; | ||
| 3150 | meta_sinfo->bytes_may_use = 0; | ||
| 3151 | } else { | ||
| 3152 | meta_sinfo->bytes_may_use -= num_bytes; | ||
| 3153 | } | ||
| 3154 | spin_unlock(&meta_sinfo->lock); | ||
| 3155 | |||
| 3156 | BUG_ON(bug); | ||
| 3157 | |||
| 3158 | return 0; | ||
| 3159 | } | ||
| 3160 | |||
| 3161 | /* | ||
| 3162 | * Reserve some metadata space for use. We'll calculate the worste case number | ||
| 3163 | * of bytes that would be needed to modify num_items number of items. If we | ||
| 3164 | * have space, fantastic, if not, you get -ENOSPC. Please call | ||
| 3165 | * btrfs_unreserve_metadata_space when you are done for the _SAME_ number of | ||
| 3166 | * items you reserved, since whatever metadata you needed should have already | ||
| 3167 | * been allocated. | ||
| 3168 | * | ||
| 3169 | * This will commit the transaction to make more space if we don't have enough | ||
| 3170 | * metadata space. THe only time we don't do this is if we're reserving space | ||
| 3171 | * inside of a transaction, then we will just return -ENOSPC and it is the | ||
| 3172 | * callers responsibility to handle it properly. | ||
| 3173 | */ | ||
| 3174 | int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items) | ||
| 3175 | { | 2872 | { |
| 3176 | struct btrfs_fs_info *info = root->fs_info; | 2873 | BTRFS_I(inode)->space_info = __find_space_info(root->fs_info, |
| 3177 | struct btrfs_space_info *meta_sinfo; | 2874 | BTRFS_BLOCK_GROUP_DATA); |
| 3178 | u64 num_bytes; | ||
| 3179 | u64 used; | ||
| 3180 | u64 alloc_target; | ||
| 3181 | int retries = 0; | ||
| 3182 | |||
| 3183 | /* get the space info for where the metadata will live */ | ||
| 3184 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
| 3185 | meta_sinfo = __find_space_info(info, alloc_target); | ||
| 3186 | |||
| 3187 | num_bytes = calculate_bytes_needed(root, num_items); | ||
| 3188 | again: | ||
| 3189 | spin_lock(&meta_sinfo->lock); | ||
| 3190 | |||
| 3191 | if (unlikely(!meta_sinfo->bytes_root)) | ||
| 3192 | meta_sinfo->bytes_root = calculate_bytes_needed(root, 6); | ||
| 3193 | |||
| 3194 | if (!retries) | ||
| 3195 | meta_sinfo->bytes_may_use += num_bytes; | ||
| 3196 | |||
| 3197 | used = meta_sinfo->bytes_used + meta_sinfo->bytes_reserved + | ||
| 3198 | meta_sinfo->bytes_pinned + meta_sinfo->bytes_readonly + | ||
| 3199 | meta_sinfo->bytes_super + meta_sinfo->bytes_root + | ||
| 3200 | meta_sinfo->bytes_may_use + meta_sinfo->bytes_delalloc; | ||
| 3201 | |||
| 3202 | if (used > meta_sinfo->total_bytes) { | ||
| 3203 | retries++; | ||
| 3204 | if (retries == 1) { | ||
| 3205 | if (maybe_allocate_chunk(root, meta_sinfo)) | ||
| 3206 | goto again; | ||
| 3207 | retries++; | ||
| 3208 | } else { | ||
| 3209 | spin_unlock(&meta_sinfo->lock); | ||
| 3210 | } | ||
| 3211 | |||
| 3212 | if (retries == 2) { | ||
| 3213 | flush_delalloc(root, meta_sinfo); | ||
| 3214 | goto again; | ||
| 3215 | } | ||
| 3216 | spin_lock(&meta_sinfo->lock); | ||
| 3217 | meta_sinfo->bytes_may_use -= num_bytes; | ||
| 3218 | spin_unlock(&meta_sinfo->lock); | ||
| 3219 | |||
| 3220 | dump_space_info(meta_sinfo, 0, 0); | ||
| 3221 | return -ENOSPC; | ||
| 3222 | } | ||
| 3223 | |||
| 3224 | check_force_delalloc(meta_sinfo); | ||
| 3225 | spin_unlock(&meta_sinfo->lock); | ||
| 3226 | |||
| 3227 | return 0; | ||
| 3228 | } | 2875 | } |
| 3229 | 2876 | ||
| 3230 | /* | 2877 | /* |
| 3231 | * This will check the space that the inode allocates from to make sure we have | 2878 | * This will check the space that the inode allocates from to make sure we have |
| 3232 | * enough space for bytes. | 2879 | * enough space for bytes. |
| 3233 | */ | 2880 | */ |
| 3234 | int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | 2881 | int btrfs_check_data_free_space(struct inode *inode, u64 bytes) |
| 3235 | u64 bytes) | ||
| 3236 | { | 2882 | { |
| 3237 | struct btrfs_space_info *data_sinfo; | 2883 | struct btrfs_space_info *data_sinfo; |
| 2884 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 3238 | u64 used; | 2885 | u64 used; |
| 3239 | int ret = 0, committed = 0, flushed = 0; | 2886 | int ret = 0, committed = 0; |
| 3240 | 2887 | ||
| 3241 | /* make sure bytes are sectorsize aligned */ | 2888 | /* make sure bytes are sectorsize aligned */ |
| 3242 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); | 2889 | bytes = (bytes + root->sectorsize - 1) & ~((u64)root->sectorsize - 1); |
| @@ -3248,21 +2895,13 @@ int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | |||
| 3248 | again: | 2895 | again: |
| 3249 | /* make sure we have enough space to handle the data first */ | 2896 | /* make sure we have enough space to handle the data first */ |
| 3250 | spin_lock(&data_sinfo->lock); | 2897 | spin_lock(&data_sinfo->lock); |
| 3251 | used = data_sinfo->bytes_used + data_sinfo->bytes_delalloc + | 2898 | used = data_sinfo->bytes_used + data_sinfo->bytes_reserved + |
| 3252 | data_sinfo->bytes_reserved + data_sinfo->bytes_pinned + | 2899 | data_sinfo->bytes_pinned + data_sinfo->bytes_readonly + |
| 3253 | data_sinfo->bytes_readonly + data_sinfo->bytes_may_use + | 2900 | data_sinfo->bytes_may_use; |
| 3254 | data_sinfo->bytes_super; | ||
| 3255 | 2901 | ||
| 3256 | if (used + bytes > data_sinfo->total_bytes) { | 2902 | if (used + bytes > data_sinfo->total_bytes) { |
| 3257 | struct btrfs_trans_handle *trans; | 2903 | struct btrfs_trans_handle *trans; |
| 3258 | 2904 | ||
| 3259 | if (!flushed) { | ||
| 3260 | spin_unlock(&data_sinfo->lock); | ||
| 3261 | flush_delalloc(root, data_sinfo); | ||
| 3262 | flushed = 1; | ||
| 3263 | goto again; | ||
| 3264 | } | ||
| 3265 | |||
| 3266 | /* | 2905 | /* |
| 3267 | * if we don't have enough free bytes in this space then we need | 2906 | * if we don't have enough free bytes in this space then we need |
| 3268 | * to alloc a new chunk. | 2907 | * to alloc a new chunk. |
| @@ -3274,15 +2913,15 @@ again: | |||
| 3274 | spin_unlock(&data_sinfo->lock); | 2913 | spin_unlock(&data_sinfo->lock); |
| 3275 | alloc: | 2914 | alloc: |
| 3276 | alloc_target = btrfs_get_alloc_profile(root, 1); | 2915 | alloc_target = btrfs_get_alloc_profile(root, 1); |
| 3277 | trans = btrfs_start_transaction(root, 1); | 2916 | trans = btrfs_join_transaction(root, 1); |
| 3278 | if (!trans) | 2917 | if (IS_ERR(trans)) |
| 3279 | return -ENOMEM; | 2918 | return PTR_ERR(trans); |
| 3280 | 2919 | ||
| 3281 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | 2920 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, |
| 3282 | bytes + 2 * 1024 * 1024, | 2921 | bytes + 2 * 1024 * 1024, |
| 3283 | alloc_target, 0); | 2922 | alloc_target, 0); |
| 3284 | btrfs_end_transaction(trans, root); | 2923 | btrfs_end_transaction(trans, root); |
| 3285 | if (ret) | 2924 | if (ret < 0) |
| 3286 | return ret; | 2925 | return ret; |
| 3287 | 2926 | ||
| 3288 | if (!data_sinfo) { | 2927 | if (!data_sinfo) { |
| @@ -3297,25 +2936,26 @@ alloc: | |||
| 3297 | if (!committed && !root->fs_info->open_ioctl_trans) { | 2936 | if (!committed && !root->fs_info->open_ioctl_trans) { |
| 3298 | committed = 1; | 2937 | committed = 1; |
| 3299 | trans = btrfs_join_transaction(root, 1); | 2938 | trans = btrfs_join_transaction(root, 1); |
| 3300 | if (!trans) | 2939 | if (IS_ERR(trans)) |
| 3301 | return -ENOMEM; | 2940 | return PTR_ERR(trans); |
| 3302 | ret = btrfs_commit_transaction(trans, root); | 2941 | ret = btrfs_commit_transaction(trans, root); |
| 3303 | if (ret) | 2942 | if (ret) |
| 3304 | return ret; | 2943 | return ret; |
| 3305 | goto again; | 2944 | goto again; |
| 3306 | } | 2945 | } |
| 3307 | 2946 | ||
| 3308 | printk(KERN_ERR "no space left, need %llu, %llu delalloc bytes" | 2947 | #if 0 /* I hope we never need this code again, just in case */ |
| 3309 | ", %llu bytes_used, %llu bytes_reserved, " | 2948 | printk(KERN_ERR "no space left, need %llu, %llu bytes_used, " |
| 3310 | "%llu bytes_pinned, %llu bytes_readonly, %llu may use " | 2949 | "%llu bytes_reserved, " "%llu bytes_pinned, " |
| 3311 | "%llu total\n", (unsigned long long)bytes, | 2950 | "%llu bytes_readonly, %llu may use %llu total\n", |
| 3312 | (unsigned long long)data_sinfo->bytes_delalloc, | 2951 | (unsigned long long)bytes, |
| 3313 | (unsigned long long)data_sinfo->bytes_used, | 2952 | (unsigned long long)data_sinfo->bytes_used, |
| 3314 | (unsigned long long)data_sinfo->bytes_reserved, | 2953 | (unsigned long long)data_sinfo->bytes_reserved, |
| 3315 | (unsigned long long)data_sinfo->bytes_pinned, | 2954 | (unsigned long long)data_sinfo->bytes_pinned, |
| 3316 | (unsigned long long)data_sinfo->bytes_readonly, | 2955 | (unsigned long long)data_sinfo->bytes_readonly, |
| 3317 | (unsigned long long)data_sinfo->bytes_may_use, | 2956 | (unsigned long long)data_sinfo->bytes_may_use, |
| 3318 | (unsigned long long)data_sinfo->total_bytes); | 2957 | (unsigned long long)data_sinfo->total_bytes); |
| 2958 | #endif | ||
| 3319 | return -ENOSPC; | 2959 | return -ENOSPC; |
| 3320 | } | 2960 | } |
| 3321 | data_sinfo->bytes_may_use += bytes; | 2961 | data_sinfo->bytes_may_use += bytes; |
| @@ -3326,12 +2966,13 @@ alloc: | |||
| 3326 | } | 2966 | } |
| 3327 | 2967 | ||
| 3328 | /* | 2968 | /* |
| 3329 | * if there was an error for whatever reason after calling | 2969 | * called when we are clearing an delalloc extent from the |
| 3330 | * btrfs_check_data_free_space, call this so we can cleanup the counters. | 2970 | * inode's io_tree or there was an error for whatever reason |
| 2971 | * after calling btrfs_check_data_free_space | ||
| 3331 | */ | 2972 | */ |
| 3332 | void btrfs_free_reserved_data_space(struct btrfs_root *root, | 2973 | void btrfs_free_reserved_data_space(struct inode *inode, u64 bytes) |
| 3333 | struct inode *inode, u64 bytes) | ||
| 3334 | { | 2974 | { |
| 2975 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 3335 | struct btrfs_space_info *data_sinfo; | 2976 | struct btrfs_space_info *data_sinfo; |
| 3336 | 2977 | ||
| 3337 | /* make sure bytes are sectorsize aligned */ | 2978 | /* make sure bytes are sectorsize aligned */ |
| @@ -3344,48 +2985,6 @@ void btrfs_free_reserved_data_space(struct btrfs_root *root, | |||
| 3344 | spin_unlock(&data_sinfo->lock); | 2985 | spin_unlock(&data_sinfo->lock); |
| 3345 | } | 2986 | } |
| 3346 | 2987 | ||
| 3347 | /* called when we are adding a delalloc extent to the inode's io_tree */ | ||
| 3348 | void btrfs_delalloc_reserve_space(struct btrfs_root *root, struct inode *inode, | ||
| 3349 | u64 bytes) | ||
| 3350 | { | ||
| 3351 | struct btrfs_space_info *data_sinfo; | ||
| 3352 | |||
| 3353 | /* get the space info for where this inode will be storing its data */ | ||
| 3354 | data_sinfo = BTRFS_I(inode)->space_info; | ||
| 3355 | |||
| 3356 | /* make sure we have enough space to handle the data first */ | ||
| 3357 | spin_lock(&data_sinfo->lock); | ||
| 3358 | data_sinfo->bytes_delalloc += bytes; | ||
| 3359 | |||
| 3360 | /* | ||
| 3361 | * we are adding a delalloc extent without calling | ||
| 3362 | * btrfs_check_data_free_space first. This happens on a weird | ||
| 3363 | * writepage condition, but shouldn't hurt our accounting | ||
| 3364 | */ | ||
| 3365 | if (unlikely(bytes > BTRFS_I(inode)->reserved_bytes)) { | ||
| 3366 | data_sinfo->bytes_may_use -= BTRFS_I(inode)->reserved_bytes; | ||
| 3367 | BTRFS_I(inode)->reserved_bytes = 0; | ||
| 3368 | } else { | ||
| 3369 | data_sinfo->bytes_may_use -= bytes; | ||
| 3370 | BTRFS_I(inode)->reserved_bytes -= bytes; | ||
| 3371 | } | ||
| 3372 | |||
| 3373 | spin_unlock(&data_sinfo->lock); | ||
| 3374 | } | ||
| 3375 | |||
| 3376 | /* called when we are clearing an delalloc extent from the inode's io_tree */ | ||
| 3377 | void btrfs_delalloc_free_space(struct btrfs_root *root, struct inode *inode, | ||
| 3378 | u64 bytes) | ||
| 3379 | { | ||
| 3380 | struct btrfs_space_info *info; | ||
| 3381 | |||
| 3382 | info = BTRFS_I(inode)->space_info; | ||
| 3383 | |||
| 3384 | spin_lock(&info->lock); | ||
| 3385 | info->bytes_delalloc -= bytes; | ||
| 3386 | spin_unlock(&info->lock); | ||
| 3387 | } | ||
| 3388 | |||
| 3389 | static void force_metadata_allocation(struct btrfs_fs_info *info) | 2988 | static void force_metadata_allocation(struct btrfs_fs_info *info) |
| 3390 | { | 2989 | { |
| 3391 | struct list_head *head = &info->space_info; | 2990 | struct list_head *head = &info->space_info; |
| @@ -3399,13 +2998,28 @@ static void force_metadata_allocation(struct btrfs_fs_info *info) | |||
| 3399 | rcu_read_unlock(); | 2998 | rcu_read_unlock(); |
| 3400 | } | 2999 | } |
| 3401 | 3000 | ||
| 3001 | static int should_alloc_chunk(struct btrfs_space_info *sinfo, | ||
| 3002 | u64 alloc_bytes) | ||
| 3003 | { | ||
| 3004 | u64 num_bytes = sinfo->total_bytes - sinfo->bytes_readonly; | ||
| 3005 | |||
| 3006 | if (sinfo->bytes_used + sinfo->bytes_reserved + | ||
| 3007 | alloc_bytes + 256 * 1024 * 1024 < num_bytes) | ||
| 3008 | return 0; | ||
| 3009 | |||
| 3010 | if (sinfo->bytes_used + sinfo->bytes_reserved + | ||
| 3011 | alloc_bytes < div_factor(num_bytes, 8)) | ||
| 3012 | return 0; | ||
| 3013 | |||
| 3014 | return 1; | ||
| 3015 | } | ||
| 3016 | |||
| 3402 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, | 3017 | static int do_chunk_alloc(struct btrfs_trans_handle *trans, |
| 3403 | struct btrfs_root *extent_root, u64 alloc_bytes, | 3018 | struct btrfs_root *extent_root, u64 alloc_bytes, |
| 3404 | u64 flags, int force) | 3019 | u64 flags, int force) |
| 3405 | { | 3020 | { |
| 3406 | struct btrfs_space_info *space_info; | 3021 | struct btrfs_space_info *space_info; |
| 3407 | struct btrfs_fs_info *fs_info = extent_root->fs_info; | 3022 | struct btrfs_fs_info *fs_info = extent_root->fs_info; |
| 3408 | u64 thresh; | ||
| 3409 | int ret = 0; | 3023 | int ret = 0; |
| 3410 | 3024 | ||
| 3411 | mutex_lock(&fs_info->chunk_mutex); | 3025 | mutex_lock(&fs_info->chunk_mutex); |
| @@ -3428,11 +3042,7 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
| 3428 | goto out; | 3042 | goto out; |
| 3429 | } | 3043 | } |
| 3430 | 3044 | ||
| 3431 | thresh = space_info->total_bytes - space_info->bytes_readonly; | 3045 | if (!force && !should_alloc_chunk(space_info, alloc_bytes)) { |
| 3432 | thresh = div_factor(thresh, 8); | ||
| 3433 | if (!force && | ||
| 3434 | (space_info->bytes_used + space_info->bytes_pinned + | ||
| 3435 | space_info->bytes_reserved + alloc_bytes) < thresh) { | ||
| 3436 | spin_unlock(&space_info->lock); | 3046 | spin_unlock(&space_info->lock); |
| 3437 | goto out; | 3047 | goto out; |
| 3438 | } | 3048 | } |
| @@ -3454,6 +3064,8 @@ static int do_chunk_alloc(struct btrfs_trans_handle *trans, | |||
| 3454 | spin_lock(&space_info->lock); | 3064 | spin_lock(&space_info->lock); |
| 3455 | if (ret) | 3065 | if (ret) |
| 3456 | space_info->full = 1; | 3066 | space_info->full = 1; |
| 3067 | else | ||
| 3068 | ret = 1; | ||
| 3457 | space_info->force_alloc = 0; | 3069 | space_info->force_alloc = 0; |
| 3458 | spin_unlock(&space_info->lock); | 3070 | spin_unlock(&space_info->lock); |
| 3459 | out: | 3071 | out: |
| @@ -3461,13 +3073,713 @@ out: | |||
| 3461 | return ret; | 3073 | return ret; |
| 3462 | } | 3074 | } |
| 3463 | 3075 | ||
| 3076 | static int maybe_allocate_chunk(struct btrfs_trans_handle *trans, | ||
| 3077 | struct btrfs_root *root, | ||
| 3078 | struct btrfs_space_info *sinfo, u64 num_bytes) | ||
| 3079 | { | ||
| 3080 | int ret; | ||
| 3081 | int end_trans = 0; | ||
| 3082 | |||
| 3083 | if (sinfo->full) | ||
| 3084 | return 0; | ||
| 3085 | |||
| 3086 | spin_lock(&sinfo->lock); | ||
| 3087 | ret = should_alloc_chunk(sinfo, num_bytes + 2 * 1024 * 1024); | ||
| 3088 | spin_unlock(&sinfo->lock); | ||
| 3089 | if (!ret) | ||
| 3090 | return 0; | ||
| 3091 | |||
| 3092 | if (!trans) { | ||
| 3093 | trans = btrfs_join_transaction(root, 1); | ||
| 3094 | BUG_ON(IS_ERR(trans)); | ||
| 3095 | end_trans = 1; | ||
| 3096 | } | ||
| 3097 | |||
| 3098 | ret = do_chunk_alloc(trans, root->fs_info->extent_root, | ||
| 3099 | num_bytes + 2 * 1024 * 1024, | ||
| 3100 | get_alloc_profile(root, sinfo->flags), 0); | ||
| 3101 | |||
| 3102 | if (end_trans) | ||
| 3103 | btrfs_end_transaction(trans, root); | ||
| 3104 | |||
| 3105 | return ret == 1 ? 1 : 0; | ||
| 3106 | } | ||
| 3107 | |||
| 3108 | /* | ||
| 3109 | * shrink metadata reservation for delalloc | ||
| 3110 | */ | ||
| 3111 | static int shrink_delalloc(struct btrfs_trans_handle *trans, | ||
| 3112 | struct btrfs_root *root, u64 to_reclaim) | ||
| 3113 | { | ||
| 3114 | struct btrfs_block_rsv *block_rsv; | ||
| 3115 | u64 reserved; | ||
| 3116 | u64 max_reclaim; | ||
| 3117 | u64 reclaimed = 0; | ||
| 3118 | int pause = 1; | ||
| 3119 | int ret; | ||
| 3120 | |||
| 3121 | block_rsv = &root->fs_info->delalloc_block_rsv; | ||
| 3122 | spin_lock(&block_rsv->lock); | ||
| 3123 | reserved = block_rsv->reserved; | ||
| 3124 | spin_unlock(&block_rsv->lock); | ||
| 3125 | |||
| 3126 | if (reserved == 0) | ||
| 3127 | return 0; | ||
| 3128 | |||
| 3129 | max_reclaim = min(reserved, to_reclaim); | ||
| 3130 | |||
| 3131 | while (1) { | ||
| 3132 | ret = btrfs_start_one_delalloc_inode(root, trans ? 1 : 0); | ||
| 3133 | if (!ret) { | ||
| 3134 | __set_current_state(TASK_INTERRUPTIBLE); | ||
| 3135 | schedule_timeout(pause); | ||
| 3136 | pause <<= 1; | ||
| 3137 | if (pause > HZ / 10) | ||
| 3138 | pause = HZ / 10; | ||
| 3139 | } else { | ||
| 3140 | pause = 1; | ||
| 3141 | } | ||
| 3142 | |||
| 3143 | spin_lock(&block_rsv->lock); | ||
| 3144 | if (reserved > block_rsv->reserved) | ||
| 3145 | reclaimed = reserved - block_rsv->reserved; | ||
| 3146 | reserved = block_rsv->reserved; | ||
| 3147 | spin_unlock(&block_rsv->lock); | ||
| 3148 | |||
| 3149 | if (reserved == 0 || reclaimed >= max_reclaim) | ||
| 3150 | break; | ||
| 3151 | |||
| 3152 | if (trans && trans->transaction->blocked) | ||
| 3153 | return -EAGAIN; | ||
| 3154 | } | ||
| 3155 | return reclaimed >= to_reclaim; | ||
| 3156 | } | ||
| 3157 | |||
| 3158 | static int should_retry_reserve(struct btrfs_trans_handle *trans, | ||
| 3159 | struct btrfs_root *root, | ||
| 3160 | struct btrfs_block_rsv *block_rsv, | ||
| 3161 | u64 num_bytes, int *retries) | ||
| 3162 | { | ||
| 3163 | struct btrfs_space_info *space_info = block_rsv->space_info; | ||
| 3164 | int ret; | ||
| 3165 | |||
| 3166 | if ((*retries) > 2) | ||
| 3167 | return -ENOSPC; | ||
| 3168 | |||
| 3169 | ret = maybe_allocate_chunk(trans, root, space_info, num_bytes); | ||
| 3170 | if (ret) | ||
| 3171 | return 1; | ||
| 3172 | |||
| 3173 | if (trans && trans->transaction->in_commit) | ||
| 3174 | return -ENOSPC; | ||
| 3175 | |||
| 3176 | ret = shrink_delalloc(trans, root, num_bytes); | ||
| 3177 | if (ret) | ||
| 3178 | return ret; | ||
| 3179 | |||
| 3180 | spin_lock(&space_info->lock); | ||
| 3181 | if (space_info->bytes_pinned < num_bytes) | ||
| 3182 | ret = 1; | ||
| 3183 | spin_unlock(&space_info->lock); | ||
| 3184 | if (ret) | ||
| 3185 | return -ENOSPC; | ||
| 3186 | |||
| 3187 | (*retries)++; | ||
| 3188 | |||
| 3189 | if (trans) | ||
| 3190 | return -EAGAIN; | ||
| 3191 | |||
| 3192 | trans = btrfs_join_transaction(root, 1); | ||
| 3193 | BUG_ON(IS_ERR(trans)); | ||
| 3194 | ret = btrfs_commit_transaction(trans, root); | ||
| 3195 | BUG_ON(ret); | ||
| 3196 | |||
| 3197 | return 1; | ||
| 3198 | } | ||
| 3199 | |||
| 3200 | static int reserve_metadata_bytes(struct btrfs_block_rsv *block_rsv, | ||
| 3201 | u64 num_bytes) | ||
| 3202 | { | ||
| 3203 | struct btrfs_space_info *space_info = block_rsv->space_info; | ||
| 3204 | u64 unused; | ||
| 3205 | int ret = -ENOSPC; | ||
| 3206 | |||
| 3207 | spin_lock(&space_info->lock); | ||
| 3208 | unused = space_info->bytes_used + space_info->bytes_reserved + | ||
| 3209 | space_info->bytes_pinned + space_info->bytes_readonly; | ||
| 3210 | |||
| 3211 | if (unused < space_info->total_bytes) | ||
| 3212 | unused = space_info->total_bytes - unused; | ||
| 3213 | else | ||
| 3214 | unused = 0; | ||
| 3215 | |||
| 3216 | if (unused >= num_bytes) { | ||
| 3217 | if (block_rsv->priority >= 10) { | ||
| 3218 | space_info->bytes_reserved += num_bytes; | ||
| 3219 | ret = 0; | ||
| 3220 | } else { | ||
| 3221 | if ((unused + block_rsv->reserved) * | ||
| 3222 | block_rsv->priority >= | ||
| 3223 | (num_bytes + block_rsv->reserved) * 10) { | ||
| 3224 | space_info->bytes_reserved += num_bytes; | ||
| 3225 | ret = 0; | ||
| 3226 | } | ||
| 3227 | } | ||
| 3228 | } | ||
| 3229 | spin_unlock(&space_info->lock); | ||
| 3230 | |||
| 3231 | return ret; | ||
| 3232 | } | ||
| 3233 | |||
| 3234 | static struct btrfs_block_rsv *get_block_rsv(struct btrfs_trans_handle *trans, | ||
| 3235 | struct btrfs_root *root) | ||
| 3236 | { | ||
| 3237 | struct btrfs_block_rsv *block_rsv; | ||
| 3238 | if (root->ref_cows) | ||
| 3239 | block_rsv = trans->block_rsv; | ||
| 3240 | else | ||
| 3241 | block_rsv = root->block_rsv; | ||
| 3242 | |||
| 3243 | if (!block_rsv) | ||
| 3244 | block_rsv = &root->fs_info->empty_block_rsv; | ||
| 3245 | |||
| 3246 | return block_rsv; | ||
| 3247 | } | ||
| 3248 | |||
| 3249 | static int block_rsv_use_bytes(struct btrfs_block_rsv *block_rsv, | ||
| 3250 | u64 num_bytes) | ||
| 3251 | { | ||
| 3252 | int ret = -ENOSPC; | ||
| 3253 | spin_lock(&block_rsv->lock); | ||
| 3254 | if (block_rsv->reserved >= num_bytes) { | ||
| 3255 | block_rsv->reserved -= num_bytes; | ||
| 3256 | if (block_rsv->reserved < block_rsv->size) | ||
| 3257 | block_rsv->full = 0; | ||
| 3258 | ret = 0; | ||
| 3259 | } | ||
| 3260 | spin_unlock(&block_rsv->lock); | ||
| 3261 | return ret; | ||
| 3262 | } | ||
| 3263 | |||
| 3264 | static void block_rsv_add_bytes(struct btrfs_block_rsv *block_rsv, | ||
| 3265 | u64 num_bytes, int update_size) | ||
| 3266 | { | ||
| 3267 | spin_lock(&block_rsv->lock); | ||
| 3268 | block_rsv->reserved += num_bytes; | ||
| 3269 | if (update_size) | ||
| 3270 | block_rsv->size += num_bytes; | ||
| 3271 | else if (block_rsv->reserved >= block_rsv->size) | ||
| 3272 | block_rsv->full = 1; | ||
| 3273 | spin_unlock(&block_rsv->lock); | ||
| 3274 | } | ||
| 3275 | |||
| 3276 | void block_rsv_release_bytes(struct btrfs_block_rsv *block_rsv, | ||
| 3277 | struct btrfs_block_rsv *dest, u64 num_bytes) | ||
| 3278 | { | ||
| 3279 | struct btrfs_space_info *space_info = block_rsv->space_info; | ||
| 3280 | |||
| 3281 | spin_lock(&block_rsv->lock); | ||
| 3282 | if (num_bytes == (u64)-1) | ||
| 3283 | num_bytes = block_rsv->size; | ||
| 3284 | block_rsv->size -= num_bytes; | ||
| 3285 | if (block_rsv->reserved >= block_rsv->size) { | ||
| 3286 | num_bytes = block_rsv->reserved - block_rsv->size; | ||
| 3287 | block_rsv->reserved = block_rsv->size; | ||
| 3288 | block_rsv->full = 1; | ||
| 3289 | } else { | ||
| 3290 | num_bytes = 0; | ||
| 3291 | } | ||
| 3292 | spin_unlock(&block_rsv->lock); | ||
| 3293 | |||
| 3294 | if (num_bytes > 0) { | ||
| 3295 | if (dest) { | ||
| 3296 | block_rsv_add_bytes(dest, num_bytes, 0); | ||
| 3297 | } else { | ||
| 3298 | spin_lock(&space_info->lock); | ||
| 3299 | space_info->bytes_reserved -= num_bytes; | ||
| 3300 | spin_unlock(&space_info->lock); | ||
| 3301 | } | ||
| 3302 | } | ||
| 3303 | } | ||
| 3304 | |||
| 3305 | static int block_rsv_migrate_bytes(struct btrfs_block_rsv *src, | ||
| 3306 | struct btrfs_block_rsv *dst, u64 num_bytes) | ||
| 3307 | { | ||
| 3308 | int ret; | ||
| 3309 | |||
| 3310 | ret = block_rsv_use_bytes(src, num_bytes); | ||
| 3311 | if (ret) | ||
| 3312 | return ret; | ||
| 3313 | |||
| 3314 | block_rsv_add_bytes(dst, num_bytes, 1); | ||
| 3315 | return 0; | ||
| 3316 | } | ||
| 3317 | |||
| 3318 | void btrfs_init_block_rsv(struct btrfs_block_rsv *rsv) | ||
| 3319 | { | ||
| 3320 | memset(rsv, 0, sizeof(*rsv)); | ||
| 3321 | spin_lock_init(&rsv->lock); | ||
| 3322 | atomic_set(&rsv->usage, 1); | ||
| 3323 | rsv->priority = 6; | ||
| 3324 | INIT_LIST_HEAD(&rsv->list); | ||
| 3325 | } | ||
| 3326 | |||
| 3327 | struct btrfs_block_rsv *btrfs_alloc_block_rsv(struct btrfs_root *root) | ||
| 3328 | { | ||
| 3329 | struct btrfs_block_rsv *block_rsv; | ||
| 3330 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 3331 | u64 alloc_target; | ||
| 3332 | |||
| 3333 | block_rsv = kmalloc(sizeof(*block_rsv), GFP_NOFS); | ||
| 3334 | if (!block_rsv) | ||
| 3335 | return NULL; | ||
| 3336 | |||
| 3337 | btrfs_init_block_rsv(block_rsv); | ||
| 3338 | |||
| 3339 | alloc_target = btrfs_get_alloc_profile(root, 0); | ||
| 3340 | block_rsv->space_info = __find_space_info(fs_info, | ||
| 3341 | BTRFS_BLOCK_GROUP_METADATA); | ||
| 3342 | |||
| 3343 | return block_rsv; | ||
| 3344 | } | ||
| 3345 | |||
| 3346 | void btrfs_free_block_rsv(struct btrfs_root *root, | ||
| 3347 | struct btrfs_block_rsv *rsv) | ||
| 3348 | { | ||
| 3349 | if (rsv && atomic_dec_and_test(&rsv->usage)) { | ||
| 3350 | btrfs_block_rsv_release(root, rsv, (u64)-1); | ||
| 3351 | if (!rsv->durable) | ||
| 3352 | kfree(rsv); | ||
| 3353 | } | ||
| 3354 | } | ||
| 3355 | |||
| 3356 | /* | ||
| 3357 | * make the block_rsv struct be able to capture freed space. | ||
| 3358 | * the captured space will re-add to the the block_rsv struct | ||
| 3359 | * after transaction commit | ||
| 3360 | */ | ||
| 3361 | void btrfs_add_durable_block_rsv(struct btrfs_fs_info *fs_info, | ||
| 3362 | struct btrfs_block_rsv *block_rsv) | ||
| 3363 | { | ||
| 3364 | block_rsv->durable = 1; | ||
| 3365 | mutex_lock(&fs_info->durable_block_rsv_mutex); | ||
| 3366 | list_add_tail(&block_rsv->list, &fs_info->durable_block_rsv_list); | ||
| 3367 | mutex_unlock(&fs_info->durable_block_rsv_mutex); | ||
| 3368 | } | ||
| 3369 | |||
| 3370 | int btrfs_block_rsv_add(struct btrfs_trans_handle *trans, | ||
| 3371 | struct btrfs_root *root, | ||
| 3372 | struct btrfs_block_rsv *block_rsv, | ||
| 3373 | u64 num_bytes, int *retries) | ||
| 3374 | { | ||
| 3375 | int ret; | ||
| 3376 | |||
| 3377 | if (num_bytes == 0) | ||
| 3378 | return 0; | ||
| 3379 | again: | ||
| 3380 | ret = reserve_metadata_bytes(block_rsv, num_bytes); | ||
| 3381 | if (!ret) { | ||
| 3382 | block_rsv_add_bytes(block_rsv, num_bytes, 1); | ||
| 3383 | return 0; | ||
| 3384 | } | ||
| 3385 | |||
| 3386 | ret = should_retry_reserve(trans, root, block_rsv, num_bytes, retries); | ||
| 3387 | if (ret > 0) | ||
| 3388 | goto again; | ||
| 3389 | |||
| 3390 | return ret; | ||
| 3391 | } | ||
| 3392 | |||
| 3393 | int btrfs_block_rsv_check(struct btrfs_trans_handle *trans, | ||
| 3394 | struct btrfs_root *root, | ||
| 3395 | struct btrfs_block_rsv *block_rsv, | ||
| 3396 | u64 min_reserved, int min_factor) | ||
| 3397 | { | ||
| 3398 | u64 num_bytes = 0; | ||
| 3399 | int commit_trans = 0; | ||
| 3400 | int ret = -ENOSPC; | ||
| 3401 | |||
| 3402 | if (!block_rsv) | ||
| 3403 | return 0; | ||
| 3404 | |||
| 3405 | spin_lock(&block_rsv->lock); | ||
| 3406 | if (min_factor > 0) | ||
| 3407 | num_bytes = div_factor(block_rsv->size, min_factor); | ||
| 3408 | if (min_reserved > num_bytes) | ||
| 3409 | num_bytes = min_reserved; | ||
| 3410 | |||
| 3411 | if (block_rsv->reserved >= num_bytes) { | ||
| 3412 | ret = 0; | ||
| 3413 | } else { | ||
| 3414 | num_bytes -= block_rsv->reserved; | ||
| 3415 | if (block_rsv->durable && | ||
| 3416 | block_rsv->freed[0] + block_rsv->freed[1] >= num_bytes) | ||
| 3417 | commit_trans = 1; | ||
| 3418 | } | ||
| 3419 | spin_unlock(&block_rsv->lock); | ||
| 3420 | if (!ret) | ||
| 3421 | return 0; | ||
| 3422 | |||
| 3423 | if (block_rsv->refill_used) { | ||
| 3424 | ret = reserve_metadata_bytes(block_rsv, num_bytes); | ||
| 3425 | if (!ret) { | ||
| 3426 | block_rsv_add_bytes(block_rsv, num_bytes, 0); | ||
| 3427 | return 0; | ||
| 3428 | } | ||
| 3429 | } | ||
| 3430 | |||
| 3431 | if (commit_trans) { | ||
| 3432 | if (trans) | ||
| 3433 | return -EAGAIN; | ||
| 3434 | |||
| 3435 | trans = btrfs_join_transaction(root, 1); | ||
| 3436 | BUG_ON(IS_ERR(trans)); | ||
| 3437 | ret = btrfs_commit_transaction(trans, root); | ||
| 3438 | return 0; | ||
| 3439 | } | ||
| 3440 | |||
| 3441 | WARN_ON(1); | ||
| 3442 | printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n", | ||
| 3443 | block_rsv->size, block_rsv->reserved, | ||
| 3444 | block_rsv->freed[0], block_rsv->freed[1]); | ||
| 3445 | |||
| 3446 | return -ENOSPC; | ||
| 3447 | } | ||
| 3448 | |||
| 3449 | int btrfs_block_rsv_migrate(struct btrfs_block_rsv *src_rsv, | ||
| 3450 | struct btrfs_block_rsv *dst_rsv, | ||
| 3451 | u64 num_bytes) | ||
| 3452 | { | ||
| 3453 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | ||
| 3454 | } | ||
| 3455 | |||
| 3456 | void btrfs_block_rsv_release(struct btrfs_root *root, | ||
| 3457 | struct btrfs_block_rsv *block_rsv, | ||
| 3458 | u64 num_bytes) | ||
| 3459 | { | ||
| 3460 | struct btrfs_block_rsv *global_rsv = &root->fs_info->global_block_rsv; | ||
| 3461 | if (global_rsv->full || global_rsv == block_rsv || | ||
| 3462 | block_rsv->space_info != global_rsv->space_info) | ||
| 3463 | global_rsv = NULL; | ||
| 3464 | block_rsv_release_bytes(block_rsv, global_rsv, num_bytes); | ||
| 3465 | } | ||
| 3466 | |||
| 3467 | /* | ||
| 3468 | * helper to calculate size of global block reservation. | ||
| 3469 | * the desired value is sum of space used by extent tree, | ||
| 3470 | * checksum tree and root tree | ||
| 3471 | */ | ||
| 3472 | static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) | ||
| 3473 | { | ||
| 3474 | struct btrfs_space_info *sinfo; | ||
| 3475 | u64 num_bytes; | ||
| 3476 | u64 meta_used; | ||
| 3477 | u64 data_used; | ||
| 3478 | int csum_size = btrfs_super_csum_size(&fs_info->super_copy); | ||
| 3479 | #if 0 | ||
| 3480 | /* | ||
| 3481 | * per tree used space accounting can be inaccuracy, so we | ||
| 3482 | * can't rely on it. | ||
| 3483 | */ | ||
| 3484 | spin_lock(&fs_info->extent_root->accounting_lock); | ||
| 3485 | num_bytes = btrfs_root_used(&fs_info->extent_root->root_item); | ||
| 3486 | spin_unlock(&fs_info->extent_root->accounting_lock); | ||
| 3487 | |||
| 3488 | spin_lock(&fs_info->csum_root->accounting_lock); | ||
| 3489 | num_bytes += btrfs_root_used(&fs_info->csum_root->root_item); | ||
| 3490 | spin_unlock(&fs_info->csum_root->accounting_lock); | ||
| 3491 | |||
| 3492 | spin_lock(&fs_info->tree_root->accounting_lock); | ||
| 3493 | num_bytes += btrfs_root_used(&fs_info->tree_root->root_item); | ||
| 3494 | spin_unlock(&fs_info->tree_root->accounting_lock); | ||
| 3495 | #endif | ||
| 3496 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_DATA); | ||
| 3497 | spin_lock(&sinfo->lock); | ||
| 3498 | data_used = sinfo->bytes_used; | ||
| 3499 | spin_unlock(&sinfo->lock); | ||
| 3500 | |||
| 3501 | sinfo = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | ||
| 3502 | spin_lock(&sinfo->lock); | ||
| 3503 | meta_used = sinfo->bytes_used; | ||
| 3504 | spin_unlock(&sinfo->lock); | ||
| 3505 | |||
| 3506 | num_bytes = (data_used >> fs_info->sb->s_blocksize_bits) * | ||
| 3507 | csum_size * 2; | ||
| 3508 | num_bytes += div64_u64(data_used + meta_used, 50); | ||
| 3509 | |||
| 3510 | if (num_bytes * 3 > meta_used) | ||
| 3511 | num_bytes = div64_u64(meta_used, 3); | ||
| 3512 | |||
| 3513 | return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10); | ||
| 3514 | } | ||
| 3515 | |||
| 3516 | static void update_global_block_rsv(struct btrfs_fs_info *fs_info) | ||
| 3517 | { | ||
| 3518 | struct btrfs_block_rsv *block_rsv = &fs_info->global_block_rsv; | ||
| 3519 | struct btrfs_space_info *sinfo = block_rsv->space_info; | ||
| 3520 | u64 num_bytes; | ||
| 3521 | |||
| 3522 | num_bytes = calc_global_metadata_size(fs_info); | ||
| 3523 | |||
| 3524 | spin_lock(&block_rsv->lock); | ||
| 3525 | spin_lock(&sinfo->lock); | ||
| 3526 | |||
| 3527 | block_rsv->size = num_bytes; | ||
| 3528 | |||
| 3529 | num_bytes = sinfo->bytes_used + sinfo->bytes_pinned + | ||
| 3530 | sinfo->bytes_reserved + sinfo->bytes_readonly; | ||
| 3531 | |||
| 3532 | if (sinfo->total_bytes > num_bytes) { | ||
| 3533 | num_bytes = sinfo->total_bytes - num_bytes; | ||
| 3534 | block_rsv->reserved += num_bytes; | ||
| 3535 | sinfo->bytes_reserved += num_bytes; | ||
| 3536 | } | ||
| 3537 | |||
| 3538 | if (block_rsv->reserved >= block_rsv->size) { | ||
| 3539 | num_bytes = block_rsv->reserved - block_rsv->size; | ||
| 3540 | sinfo->bytes_reserved -= num_bytes; | ||
| 3541 | block_rsv->reserved = block_rsv->size; | ||
| 3542 | block_rsv->full = 1; | ||
| 3543 | } | ||
| 3544 | #if 0 | ||
| 3545 | printk(KERN_INFO"global block rsv size %llu reserved %llu\n", | ||
| 3546 | block_rsv->size, block_rsv->reserved); | ||
| 3547 | #endif | ||
| 3548 | spin_unlock(&sinfo->lock); | ||
| 3549 | spin_unlock(&block_rsv->lock); | ||
| 3550 | } | ||
| 3551 | |||
| 3552 | static void init_global_block_rsv(struct btrfs_fs_info *fs_info) | ||
| 3553 | { | ||
| 3554 | struct btrfs_space_info *space_info; | ||
| 3555 | |||
| 3556 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_SYSTEM); | ||
| 3557 | fs_info->chunk_block_rsv.space_info = space_info; | ||
| 3558 | fs_info->chunk_block_rsv.priority = 10; | ||
| 3559 | |||
| 3560 | space_info = __find_space_info(fs_info, BTRFS_BLOCK_GROUP_METADATA); | ||
| 3561 | fs_info->global_block_rsv.space_info = space_info; | ||
| 3562 | fs_info->global_block_rsv.priority = 10; | ||
| 3563 | fs_info->global_block_rsv.refill_used = 1; | ||
| 3564 | fs_info->delalloc_block_rsv.space_info = space_info; | ||
| 3565 | fs_info->trans_block_rsv.space_info = space_info; | ||
| 3566 | fs_info->empty_block_rsv.space_info = space_info; | ||
| 3567 | fs_info->empty_block_rsv.priority = 10; | ||
| 3568 | |||
| 3569 | fs_info->extent_root->block_rsv = &fs_info->global_block_rsv; | ||
| 3570 | fs_info->csum_root->block_rsv = &fs_info->global_block_rsv; | ||
| 3571 | fs_info->dev_root->block_rsv = &fs_info->global_block_rsv; | ||
| 3572 | fs_info->tree_root->block_rsv = &fs_info->global_block_rsv; | ||
| 3573 | fs_info->chunk_root->block_rsv = &fs_info->chunk_block_rsv; | ||
| 3574 | |||
| 3575 | btrfs_add_durable_block_rsv(fs_info, &fs_info->global_block_rsv); | ||
| 3576 | |||
| 3577 | btrfs_add_durable_block_rsv(fs_info, &fs_info->delalloc_block_rsv); | ||
| 3578 | |||
| 3579 | update_global_block_rsv(fs_info); | ||
| 3580 | } | ||
| 3581 | |||
| 3582 | static void release_global_block_rsv(struct btrfs_fs_info *fs_info) | ||
| 3583 | { | ||
| 3584 | block_rsv_release_bytes(&fs_info->global_block_rsv, NULL, (u64)-1); | ||
| 3585 | WARN_ON(fs_info->delalloc_block_rsv.size > 0); | ||
| 3586 | WARN_ON(fs_info->delalloc_block_rsv.reserved > 0); | ||
| 3587 | WARN_ON(fs_info->trans_block_rsv.size > 0); | ||
| 3588 | WARN_ON(fs_info->trans_block_rsv.reserved > 0); | ||
| 3589 | WARN_ON(fs_info->chunk_block_rsv.size > 0); | ||
| 3590 | WARN_ON(fs_info->chunk_block_rsv.reserved > 0); | ||
| 3591 | } | ||
| 3592 | |||
| 3593 | static u64 calc_trans_metadata_size(struct btrfs_root *root, int num_items) | ||
| 3594 | { | ||
| 3595 | return (root->leafsize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) * | ||
| 3596 | 3 * num_items; | ||
| 3597 | } | ||
| 3598 | |||
| 3599 | int btrfs_trans_reserve_metadata(struct btrfs_trans_handle *trans, | ||
| 3600 | struct btrfs_root *root, | ||
| 3601 | int num_items, int *retries) | ||
| 3602 | { | ||
| 3603 | u64 num_bytes; | ||
| 3604 | int ret; | ||
| 3605 | |||
| 3606 | if (num_items == 0 || root->fs_info->chunk_root == root) | ||
| 3607 | return 0; | ||
| 3608 | |||
| 3609 | num_bytes = calc_trans_metadata_size(root, num_items); | ||
| 3610 | ret = btrfs_block_rsv_add(trans, root, &root->fs_info->trans_block_rsv, | ||
| 3611 | num_bytes, retries); | ||
| 3612 | if (!ret) { | ||
| 3613 | trans->bytes_reserved += num_bytes; | ||
| 3614 | trans->block_rsv = &root->fs_info->trans_block_rsv; | ||
| 3615 | } | ||
| 3616 | return ret; | ||
| 3617 | } | ||
| 3618 | |||
| 3619 | void btrfs_trans_release_metadata(struct btrfs_trans_handle *trans, | ||
| 3620 | struct btrfs_root *root) | ||
| 3621 | { | ||
| 3622 | if (!trans->bytes_reserved) | ||
| 3623 | return; | ||
| 3624 | |||
| 3625 | BUG_ON(trans->block_rsv != &root->fs_info->trans_block_rsv); | ||
| 3626 | btrfs_block_rsv_release(root, trans->block_rsv, | ||
| 3627 | trans->bytes_reserved); | ||
| 3628 | trans->bytes_reserved = 0; | ||
| 3629 | } | ||
| 3630 | |||
| 3631 | int btrfs_orphan_reserve_metadata(struct btrfs_trans_handle *trans, | ||
| 3632 | struct inode *inode) | ||
| 3633 | { | ||
| 3634 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 3635 | struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); | ||
| 3636 | struct btrfs_block_rsv *dst_rsv = root->orphan_block_rsv; | ||
| 3637 | |||
| 3638 | /* | ||
| 3639 | * one for deleting orphan item, one for updating inode and | ||
| 3640 | * two for calling btrfs_truncate_inode_items. | ||
| 3641 | * | ||
| 3642 | * btrfs_truncate_inode_items is a delete operation, it frees | ||
| 3643 | * more space than it uses in most cases. So two units of | ||
| 3644 | * metadata space should be enough for calling it many times. | ||
| 3645 | * If all of the metadata space is used, we can commit | ||
| 3646 | * transaction and use space it freed. | ||
| 3647 | */ | ||
| 3648 | u64 num_bytes = calc_trans_metadata_size(root, 4); | ||
| 3649 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | ||
| 3650 | } | ||
| 3651 | |||
| 3652 | void btrfs_orphan_release_metadata(struct inode *inode) | ||
| 3653 | { | ||
| 3654 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 3655 | u64 num_bytes = calc_trans_metadata_size(root, 4); | ||
| 3656 | btrfs_block_rsv_release(root, root->orphan_block_rsv, num_bytes); | ||
| 3657 | } | ||
| 3658 | |||
| 3659 | int btrfs_snap_reserve_metadata(struct btrfs_trans_handle *trans, | ||
| 3660 | struct btrfs_pending_snapshot *pending) | ||
| 3661 | { | ||
| 3662 | struct btrfs_root *root = pending->root; | ||
| 3663 | struct btrfs_block_rsv *src_rsv = get_block_rsv(trans, root); | ||
| 3664 | struct btrfs_block_rsv *dst_rsv = &pending->block_rsv; | ||
| 3665 | /* | ||
| 3666 | * two for root back/forward refs, two for directory entries | ||
| 3667 | * and one for root of the snapshot. | ||
| 3668 | */ | ||
| 3669 | u64 num_bytes = calc_trans_metadata_size(root, 5); | ||
| 3670 | dst_rsv->space_info = src_rsv->space_info; | ||
| 3671 | return block_rsv_migrate_bytes(src_rsv, dst_rsv, num_bytes); | ||
| 3672 | } | ||
| 3673 | |||
| 3674 | static u64 calc_csum_metadata_size(struct inode *inode, u64 num_bytes) | ||
| 3675 | { | ||
| 3676 | return num_bytes >>= 3; | ||
| 3677 | } | ||
| 3678 | |||
| 3679 | int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes) | ||
| 3680 | { | ||
| 3681 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 3682 | struct btrfs_block_rsv *block_rsv = &root->fs_info->delalloc_block_rsv; | ||
| 3683 | u64 to_reserve; | ||
| 3684 | int nr_extents; | ||
| 3685 | int retries = 0; | ||
| 3686 | int ret; | ||
| 3687 | |||
| 3688 | if (btrfs_transaction_in_commit(root->fs_info)) | ||
| 3689 | schedule_timeout(1); | ||
| 3690 | |||
| 3691 | num_bytes = ALIGN(num_bytes, root->sectorsize); | ||
| 3692 | again: | ||
| 3693 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
| 3694 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents) + 1; | ||
| 3695 | if (nr_extents > BTRFS_I(inode)->reserved_extents) { | ||
| 3696 | nr_extents -= BTRFS_I(inode)->reserved_extents; | ||
| 3697 | to_reserve = calc_trans_metadata_size(root, nr_extents); | ||
| 3698 | } else { | ||
| 3699 | nr_extents = 0; | ||
| 3700 | to_reserve = 0; | ||
| 3701 | } | ||
| 3702 | |||
| 3703 | to_reserve += calc_csum_metadata_size(inode, num_bytes); | ||
| 3704 | ret = reserve_metadata_bytes(block_rsv, to_reserve); | ||
| 3705 | if (ret) { | ||
| 3706 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 3707 | ret = should_retry_reserve(NULL, root, block_rsv, to_reserve, | ||
| 3708 | &retries); | ||
| 3709 | if (ret > 0) | ||
| 3710 | goto again; | ||
| 3711 | return ret; | ||
| 3712 | } | ||
| 3713 | |||
| 3714 | BTRFS_I(inode)->reserved_extents += nr_extents; | ||
| 3715 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); | ||
| 3716 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 3717 | |||
| 3718 | block_rsv_add_bytes(block_rsv, to_reserve, 1); | ||
| 3719 | |||
| 3720 | if (block_rsv->size > 512 * 1024 * 1024) | ||
| 3721 | shrink_delalloc(NULL, root, to_reserve); | ||
| 3722 | |||
| 3723 | return 0; | ||
| 3724 | } | ||
| 3725 | |||
| 3726 | void btrfs_delalloc_release_metadata(struct inode *inode, u64 num_bytes) | ||
| 3727 | { | ||
| 3728 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 3729 | u64 to_free; | ||
| 3730 | int nr_extents; | ||
| 3731 | |||
| 3732 | num_bytes = ALIGN(num_bytes, root->sectorsize); | ||
| 3733 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); | ||
| 3734 | |||
| 3735 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
| 3736 | nr_extents = atomic_read(&BTRFS_I(inode)->outstanding_extents); | ||
| 3737 | if (nr_extents < BTRFS_I(inode)->reserved_extents) { | ||
| 3738 | nr_extents = BTRFS_I(inode)->reserved_extents - nr_extents; | ||
| 3739 | BTRFS_I(inode)->reserved_extents -= nr_extents; | ||
| 3740 | } else { | ||
| 3741 | nr_extents = 0; | ||
| 3742 | } | ||
| 3743 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 3744 | |||
| 3745 | to_free = calc_csum_metadata_size(inode, num_bytes); | ||
| 3746 | if (nr_extents > 0) | ||
| 3747 | to_free += calc_trans_metadata_size(root, nr_extents); | ||
| 3748 | |||
| 3749 | btrfs_block_rsv_release(root, &root->fs_info->delalloc_block_rsv, | ||
| 3750 | to_free); | ||
| 3751 | } | ||
| 3752 | |||
| 3753 | int btrfs_delalloc_reserve_space(struct inode *inode, u64 num_bytes) | ||
| 3754 | { | ||
| 3755 | int ret; | ||
| 3756 | |||
| 3757 | ret = btrfs_check_data_free_space(inode, num_bytes); | ||
| 3758 | if (ret) | ||
| 3759 | return ret; | ||
| 3760 | |||
| 3761 | ret = btrfs_delalloc_reserve_metadata(inode, num_bytes); | ||
| 3762 | if (ret) { | ||
| 3763 | btrfs_free_reserved_data_space(inode, num_bytes); | ||
| 3764 | return ret; | ||
| 3765 | } | ||
| 3766 | |||
| 3767 | return 0; | ||
| 3768 | } | ||
| 3769 | |||
| 3770 | void btrfs_delalloc_release_space(struct inode *inode, u64 num_bytes) | ||
| 3771 | { | ||
| 3772 | btrfs_delalloc_release_metadata(inode, num_bytes); | ||
| 3773 | btrfs_free_reserved_data_space(inode, num_bytes); | ||
| 3774 | } | ||
| 3775 | |||
| 3464 | static int update_block_group(struct btrfs_trans_handle *trans, | 3776 | static int update_block_group(struct btrfs_trans_handle *trans, |
| 3465 | struct btrfs_root *root, | 3777 | struct btrfs_root *root, |
| 3466 | u64 bytenr, u64 num_bytes, int alloc, | 3778 | u64 bytenr, u64 num_bytes, int alloc) |
| 3467 | int mark_free) | ||
| 3468 | { | 3779 | { |
| 3469 | struct btrfs_block_group_cache *cache; | 3780 | struct btrfs_block_group_cache *cache; |
| 3470 | struct btrfs_fs_info *info = root->fs_info; | 3781 | struct btrfs_fs_info *info = root->fs_info; |
| 3782 | int factor; | ||
| 3471 | u64 total = num_bytes; | 3783 | u64 total = num_bytes; |
| 3472 | u64 old_val; | 3784 | u64 old_val; |
| 3473 | u64 byte_in_group; | 3785 | u64 byte_in_group; |
| @@ -3486,6 +3798,12 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
| 3486 | cache = btrfs_lookup_block_group(info, bytenr); | 3798 | cache = btrfs_lookup_block_group(info, bytenr); |
| 3487 | if (!cache) | 3799 | if (!cache) |
| 3488 | return -1; | 3800 | return -1; |
| 3801 | if (cache->flags & (BTRFS_BLOCK_GROUP_DUP | | ||
| 3802 | BTRFS_BLOCK_GROUP_RAID1 | | ||
| 3803 | BTRFS_BLOCK_GROUP_RAID10)) | ||
| 3804 | factor = 2; | ||
| 3805 | else | ||
| 3806 | factor = 1; | ||
| 3489 | byte_in_group = bytenr - cache->key.objectid; | 3807 | byte_in_group = bytenr - cache->key.objectid; |
| 3490 | WARN_ON(byte_in_group > cache->key.offset); | 3808 | WARN_ON(byte_in_group > cache->key.offset); |
| 3491 | 3809 | ||
| @@ -3498,31 +3816,24 @@ static int update_block_group(struct btrfs_trans_handle *trans, | |||
| 3498 | old_val += num_bytes; | 3816 | old_val += num_bytes; |
| 3499 | btrfs_set_block_group_used(&cache->item, old_val); | 3817 | btrfs_set_block_group_used(&cache->item, old_val); |
| 3500 | cache->reserved -= num_bytes; | 3818 | cache->reserved -= num_bytes; |
| 3501 | cache->space_info->bytes_used += num_bytes; | ||
| 3502 | cache->space_info->bytes_reserved -= num_bytes; | 3819 | cache->space_info->bytes_reserved -= num_bytes; |
| 3503 | if (cache->ro) | 3820 | cache->space_info->bytes_used += num_bytes; |
| 3504 | cache->space_info->bytes_readonly -= num_bytes; | 3821 | cache->space_info->disk_used += num_bytes * factor; |
| 3505 | spin_unlock(&cache->lock); | 3822 | spin_unlock(&cache->lock); |
| 3506 | spin_unlock(&cache->space_info->lock); | 3823 | spin_unlock(&cache->space_info->lock); |
| 3507 | } else { | 3824 | } else { |
| 3508 | old_val -= num_bytes; | 3825 | old_val -= num_bytes; |
| 3509 | cache->space_info->bytes_used -= num_bytes; | ||
| 3510 | if (cache->ro) | ||
| 3511 | cache->space_info->bytes_readonly += num_bytes; | ||
| 3512 | btrfs_set_block_group_used(&cache->item, old_val); | 3826 | btrfs_set_block_group_used(&cache->item, old_val); |
| 3827 | cache->pinned += num_bytes; | ||
| 3828 | cache->space_info->bytes_pinned += num_bytes; | ||
| 3829 | cache->space_info->bytes_used -= num_bytes; | ||
| 3830 | cache->space_info->disk_used -= num_bytes * factor; | ||
| 3513 | spin_unlock(&cache->lock); | 3831 | spin_unlock(&cache->lock); |
| 3514 | spin_unlock(&cache->space_info->lock); | 3832 | spin_unlock(&cache->space_info->lock); |
| 3515 | if (mark_free) { | ||
| 3516 | int ret; | ||
| 3517 | 3833 | ||
| 3518 | ret = btrfs_discard_extent(root, bytenr, | 3834 | set_extent_dirty(info->pinned_extents, |
| 3519 | num_bytes); | 3835 | bytenr, bytenr + num_bytes - 1, |
| 3520 | WARN_ON(ret); | 3836 | GFP_NOFS | __GFP_NOFAIL); |
| 3521 | |||
| 3522 | ret = btrfs_add_free_space(cache, bytenr, | ||
| 3523 | num_bytes); | ||
| 3524 | WARN_ON(ret); | ||
| 3525 | } | ||
| 3526 | } | 3837 | } |
| 3527 | btrfs_put_block_group(cache); | 3838 | btrfs_put_block_group(cache); |
| 3528 | total -= num_bytes; | 3839 | total -= num_bytes; |
| @@ -3546,18 +3857,10 @@ static u64 first_logical_byte(struct btrfs_root *root, u64 search_start) | |||
| 3546 | return bytenr; | 3857 | return bytenr; |
| 3547 | } | 3858 | } |
| 3548 | 3859 | ||
| 3549 | /* | 3860 | static int pin_down_extent(struct btrfs_root *root, |
| 3550 | * this function must be called within transaction | 3861 | struct btrfs_block_group_cache *cache, |
| 3551 | */ | 3862 | u64 bytenr, u64 num_bytes, int reserved) |
| 3552 | int btrfs_pin_extent(struct btrfs_root *root, | ||
| 3553 | u64 bytenr, u64 num_bytes, int reserved) | ||
| 3554 | { | 3863 | { |
| 3555 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
| 3556 | struct btrfs_block_group_cache *cache; | ||
| 3557 | |||
| 3558 | cache = btrfs_lookup_block_group(fs_info, bytenr); | ||
| 3559 | BUG_ON(!cache); | ||
| 3560 | |||
| 3561 | spin_lock(&cache->space_info->lock); | 3864 | spin_lock(&cache->space_info->lock); |
| 3562 | spin_lock(&cache->lock); | 3865 | spin_lock(&cache->lock); |
| 3563 | cache->pinned += num_bytes; | 3866 | cache->pinned += num_bytes; |
| @@ -3569,28 +3872,68 @@ int btrfs_pin_extent(struct btrfs_root *root, | |||
| 3569 | spin_unlock(&cache->lock); | 3872 | spin_unlock(&cache->lock); |
| 3570 | spin_unlock(&cache->space_info->lock); | 3873 | spin_unlock(&cache->space_info->lock); |
| 3571 | 3874 | ||
| 3572 | btrfs_put_block_group(cache); | 3875 | set_extent_dirty(root->fs_info->pinned_extents, bytenr, |
| 3876 | bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); | ||
| 3877 | return 0; | ||
| 3878 | } | ||
| 3879 | |||
| 3880 | /* | ||
| 3881 | * this function must be called within transaction | ||
| 3882 | */ | ||
| 3883 | int btrfs_pin_extent(struct btrfs_root *root, | ||
| 3884 | u64 bytenr, u64 num_bytes, int reserved) | ||
| 3885 | { | ||
| 3886 | struct btrfs_block_group_cache *cache; | ||
| 3887 | |||
| 3888 | cache = btrfs_lookup_block_group(root->fs_info, bytenr); | ||
| 3889 | BUG_ON(!cache); | ||
| 3890 | |||
| 3891 | pin_down_extent(root, cache, bytenr, num_bytes, reserved); | ||
| 3573 | 3892 | ||
| 3574 | set_extent_dirty(fs_info->pinned_extents, | 3893 | btrfs_put_block_group(cache); |
| 3575 | bytenr, bytenr + num_bytes - 1, GFP_NOFS); | ||
| 3576 | return 0; | 3894 | return 0; |
| 3577 | } | 3895 | } |
| 3578 | 3896 | ||
| 3579 | static int update_reserved_extents(struct btrfs_block_group_cache *cache, | 3897 | /* |
| 3580 | u64 num_bytes, int reserve) | 3898 | * update size of reserved extents. this function may return -EAGAIN |
| 3899 | * if 'reserve' is true or 'sinfo' is false. | ||
| 3900 | */ | ||
| 3901 | static int update_reserved_bytes(struct btrfs_block_group_cache *cache, | ||
| 3902 | u64 num_bytes, int reserve, int sinfo) | ||
| 3581 | { | 3903 | { |
| 3582 | spin_lock(&cache->space_info->lock); | 3904 | int ret = 0; |
| 3583 | spin_lock(&cache->lock); | 3905 | if (sinfo) { |
| 3584 | if (reserve) { | 3906 | struct btrfs_space_info *space_info = cache->space_info; |
| 3585 | cache->reserved += num_bytes; | 3907 | spin_lock(&space_info->lock); |
| 3586 | cache->space_info->bytes_reserved += num_bytes; | 3908 | spin_lock(&cache->lock); |
| 3909 | if (reserve) { | ||
| 3910 | if (cache->ro) { | ||
| 3911 | ret = -EAGAIN; | ||
| 3912 | } else { | ||
| 3913 | cache->reserved += num_bytes; | ||
| 3914 | space_info->bytes_reserved += num_bytes; | ||
| 3915 | } | ||
| 3916 | } else { | ||
| 3917 | if (cache->ro) | ||
| 3918 | space_info->bytes_readonly += num_bytes; | ||
| 3919 | cache->reserved -= num_bytes; | ||
| 3920 | space_info->bytes_reserved -= num_bytes; | ||
| 3921 | } | ||
| 3922 | spin_unlock(&cache->lock); | ||
| 3923 | spin_unlock(&space_info->lock); | ||
| 3587 | } else { | 3924 | } else { |
| 3588 | cache->reserved -= num_bytes; | 3925 | spin_lock(&cache->lock); |
| 3589 | cache->space_info->bytes_reserved -= num_bytes; | 3926 | if (cache->ro) { |
| 3927 | ret = -EAGAIN; | ||
| 3928 | } else { | ||
| 3929 | if (reserve) | ||
| 3930 | cache->reserved += num_bytes; | ||
| 3931 | else | ||
| 3932 | cache->reserved -= num_bytes; | ||
| 3933 | } | ||
| 3934 | spin_unlock(&cache->lock); | ||
| 3590 | } | 3935 | } |
| 3591 | spin_unlock(&cache->lock); | 3936 | return ret; |
| 3592 | spin_unlock(&cache->space_info->lock); | ||
| 3593 | return 0; | ||
| 3594 | } | 3937 | } |
| 3595 | 3938 | ||
| 3596 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | 3939 | int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, |
| @@ -3621,6 +3964,8 @@ int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, | |||
| 3621 | fs_info->pinned_extents = &fs_info->freed_extents[0]; | 3964 | fs_info->pinned_extents = &fs_info->freed_extents[0]; |
| 3622 | 3965 | ||
| 3623 | up_write(&fs_info->extent_commit_sem); | 3966 | up_write(&fs_info->extent_commit_sem); |
| 3967 | |||
| 3968 | update_global_block_rsv(fs_info); | ||
| 3624 | return 0; | 3969 | return 0; |
| 3625 | } | 3970 | } |
| 3626 | 3971 | ||
| @@ -3647,14 +3992,21 @@ static int unpin_extent_range(struct btrfs_root *root, u64 start, u64 end) | |||
| 3647 | btrfs_add_free_space(cache, start, len); | 3992 | btrfs_add_free_space(cache, start, len); |
| 3648 | } | 3993 | } |
| 3649 | 3994 | ||
| 3995 | start += len; | ||
| 3996 | |||
| 3650 | spin_lock(&cache->space_info->lock); | 3997 | spin_lock(&cache->space_info->lock); |
| 3651 | spin_lock(&cache->lock); | 3998 | spin_lock(&cache->lock); |
| 3652 | cache->pinned -= len; | 3999 | cache->pinned -= len; |
| 3653 | cache->space_info->bytes_pinned -= len; | 4000 | cache->space_info->bytes_pinned -= len; |
| 4001 | if (cache->ro) { | ||
| 4002 | cache->space_info->bytes_readonly += len; | ||
| 4003 | } else if (cache->reserved_pinned > 0) { | ||
| 4004 | len = min(len, cache->reserved_pinned); | ||
| 4005 | cache->reserved_pinned -= len; | ||
| 4006 | cache->space_info->bytes_reserved += len; | ||
| 4007 | } | ||
| 3654 | spin_unlock(&cache->lock); | 4008 | spin_unlock(&cache->lock); |
| 3655 | spin_unlock(&cache->space_info->lock); | 4009 | spin_unlock(&cache->space_info->lock); |
| 3656 | |||
| 3657 | start += len; | ||
| 3658 | } | 4010 | } |
| 3659 | 4011 | ||
| 3660 | if (cache) | 4012 | if (cache) |
| @@ -3667,8 +4019,11 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
| 3667 | { | 4019 | { |
| 3668 | struct btrfs_fs_info *fs_info = root->fs_info; | 4020 | struct btrfs_fs_info *fs_info = root->fs_info; |
| 3669 | struct extent_io_tree *unpin; | 4021 | struct extent_io_tree *unpin; |
| 4022 | struct btrfs_block_rsv *block_rsv; | ||
| 4023 | struct btrfs_block_rsv *next_rsv; | ||
| 3670 | u64 start; | 4024 | u64 start; |
| 3671 | u64 end; | 4025 | u64 end; |
| 4026 | int idx; | ||
| 3672 | int ret; | 4027 | int ret; |
| 3673 | 4028 | ||
| 3674 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) | 4029 | if (fs_info->pinned_extents == &fs_info->freed_extents[0]) |
| @@ -3689,59 +4044,30 @@ int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, | |||
| 3689 | cond_resched(); | 4044 | cond_resched(); |
| 3690 | } | 4045 | } |
| 3691 | 4046 | ||
| 3692 | return ret; | 4047 | mutex_lock(&fs_info->durable_block_rsv_mutex); |
| 3693 | } | 4048 | list_for_each_entry_safe(block_rsv, next_rsv, |
| 4049 | &fs_info->durable_block_rsv_list, list) { | ||
| 3694 | 4050 | ||
| 3695 | static int pin_down_bytes(struct btrfs_trans_handle *trans, | 4051 | idx = trans->transid & 0x1; |
| 3696 | struct btrfs_root *root, | 4052 | if (block_rsv->freed[idx] > 0) { |
| 3697 | struct btrfs_path *path, | 4053 | block_rsv_add_bytes(block_rsv, |
| 3698 | u64 bytenr, u64 num_bytes, | 4054 | block_rsv->freed[idx], 0); |
| 3699 | int is_data, int reserved, | 4055 | block_rsv->freed[idx] = 0; |
| 3700 | struct extent_buffer **must_clean) | 4056 | } |
| 3701 | { | 4057 | if (atomic_read(&block_rsv->usage) == 0) { |
| 3702 | int err = 0; | 4058 | btrfs_block_rsv_release(root, block_rsv, (u64)-1); |
| 3703 | struct extent_buffer *buf; | ||
| 3704 | |||
| 3705 | if (is_data) | ||
| 3706 | goto pinit; | ||
| 3707 | |||
| 3708 | /* | ||
| 3709 | * discard is sloooow, and so triggering discards on | ||
| 3710 | * individual btree blocks isn't a good plan. Just | ||
| 3711 | * pin everything in discard mode. | ||
| 3712 | */ | ||
| 3713 | if (btrfs_test_opt(root, DISCARD)) | ||
| 3714 | goto pinit; | ||
| 3715 | |||
| 3716 | buf = btrfs_find_tree_block(root, bytenr, num_bytes); | ||
| 3717 | if (!buf) | ||
| 3718 | goto pinit; | ||
| 3719 | 4059 | ||
| 3720 | /* we can reuse a block if it hasn't been written | 4060 | if (block_rsv->freed[0] == 0 && |
| 3721 | * and it is from this transaction. We can't | 4061 | block_rsv->freed[1] == 0) { |
| 3722 | * reuse anything from the tree log root because | 4062 | list_del_init(&block_rsv->list); |
| 3723 | * it has tiny sub-transactions. | 4063 | kfree(block_rsv); |
| 3724 | */ | 4064 | } |
| 3725 | if (btrfs_buffer_uptodate(buf, 0) && | 4065 | } else { |
| 3726 | btrfs_try_tree_lock(buf)) { | 4066 | btrfs_block_rsv_release(root, block_rsv, 0); |
| 3727 | u64 header_owner = btrfs_header_owner(buf); | ||
| 3728 | u64 header_transid = btrfs_header_generation(buf); | ||
| 3729 | if (header_owner != BTRFS_TREE_LOG_OBJECTID && | ||
| 3730 | header_transid == trans->transid && | ||
| 3731 | !btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | ||
| 3732 | *must_clean = buf; | ||
| 3733 | return 1; | ||
| 3734 | } | 4067 | } |
| 3735 | btrfs_tree_unlock(buf); | ||
| 3736 | } | 4068 | } |
| 3737 | free_extent_buffer(buf); | 4069 | mutex_unlock(&fs_info->durable_block_rsv_mutex); |
| 3738 | pinit: | ||
| 3739 | if (path) | ||
| 3740 | btrfs_set_path_blocking(path); | ||
| 3741 | /* unlocks the pinned mutex */ | ||
| 3742 | btrfs_pin_extent(root, bytenr, num_bytes, reserved); | ||
| 3743 | 4070 | ||
| 3744 | BUG_ON(err < 0); | ||
| 3745 | return 0; | 4071 | return 0; |
| 3746 | } | 4072 | } |
| 3747 | 4073 | ||
| @@ -3902,9 +4228,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 3902 | BUG_ON(ret); | 4228 | BUG_ON(ret); |
| 3903 | } | 4229 | } |
| 3904 | } else { | 4230 | } else { |
| 3905 | int mark_free = 0; | ||
| 3906 | struct extent_buffer *must_clean = NULL; | ||
| 3907 | |||
| 3908 | if (found_extent) { | 4231 | if (found_extent) { |
| 3909 | BUG_ON(is_data && refs_to_drop != | 4232 | BUG_ON(is_data && refs_to_drop != |
| 3910 | extent_data_ref_count(root, path, iref)); | 4233 | extent_data_ref_count(root, path, iref)); |
| @@ -3917,31 +4240,11 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 3917 | } | 4240 | } |
| 3918 | } | 4241 | } |
| 3919 | 4242 | ||
| 3920 | ret = pin_down_bytes(trans, root, path, bytenr, | ||
| 3921 | num_bytes, is_data, 0, &must_clean); | ||
| 3922 | if (ret > 0) | ||
| 3923 | mark_free = 1; | ||
| 3924 | BUG_ON(ret < 0); | ||
| 3925 | /* | ||
| 3926 | * it is going to be very rare for someone to be waiting | ||
| 3927 | * on the block we're freeing. del_items might need to | ||
| 3928 | * schedule, so rather than get fancy, just force it | ||
| 3929 | * to blocking here | ||
| 3930 | */ | ||
| 3931 | if (must_clean) | ||
| 3932 | btrfs_set_lock_blocking(must_clean); | ||
| 3933 | |||
| 3934 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], | 4243 | ret = btrfs_del_items(trans, extent_root, path, path->slots[0], |
| 3935 | num_to_del); | 4244 | num_to_del); |
| 3936 | BUG_ON(ret); | 4245 | BUG_ON(ret); |
| 3937 | btrfs_release_path(extent_root, path); | 4246 | btrfs_release_path(extent_root, path); |
| 3938 | 4247 | ||
| 3939 | if (must_clean) { | ||
| 3940 | clean_tree_block(NULL, root, must_clean); | ||
| 3941 | btrfs_tree_unlock(must_clean); | ||
| 3942 | free_extent_buffer(must_clean); | ||
| 3943 | } | ||
| 3944 | |||
| 3945 | if (is_data) { | 4248 | if (is_data) { |
| 3946 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); | 4249 | ret = btrfs_del_csums(trans, root, bytenr, num_bytes); |
| 3947 | BUG_ON(ret); | 4250 | BUG_ON(ret); |
| @@ -3951,8 +4254,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 3951 | (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); | 4254 | (bytenr + num_bytes - 1) >> PAGE_CACHE_SHIFT); |
| 3952 | } | 4255 | } |
| 3953 | 4256 | ||
| 3954 | ret = update_block_group(trans, root, bytenr, num_bytes, 0, | 4257 | ret = update_block_group(trans, root, bytenr, num_bytes, 0); |
| 3955 | mark_free); | ||
| 3956 | BUG_ON(ret); | 4258 | BUG_ON(ret); |
| 3957 | } | 4259 | } |
| 3958 | btrfs_free_path(path); | 4260 | btrfs_free_path(path); |
| @@ -3960,7 +4262,7 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 3960 | } | 4262 | } |
| 3961 | 4263 | ||
| 3962 | /* | 4264 | /* |
| 3963 | * when we free an extent, it is possible (and likely) that we free the last | 4265 | * when we free an block, it is possible (and likely) that we free the last |
| 3964 | * delayed ref for that extent as well. This searches the delayed ref tree for | 4266 | * delayed ref for that extent as well. This searches the delayed ref tree for |
| 3965 | * a given extent, and if there are no other delayed refs to be processed, it | 4267 | * a given extent, and if there are no other delayed refs to be processed, it |
| 3966 | * removes it from the tree. | 4268 | * removes it from the tree. |
| @@ -3972,7 +4274,7 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, | |||
| 3972 | struct btrfs_delayed_ref_root *delayed_refs; | 4274 | struct btrfs_delayed_ref_root *delayed_refs; |
| 3973 | struct btrfs_delayed_ref_node *ref; | 4275 | struct btrfs_delayed_ref_node *ref; |
| 3974 | struct rb_node *node; | 4276 | struct rb_node *node; |
| 3975 | int ret; | 4277 | int ret = 0; |
| 3976 | 4278 | ||
| 3977 | delayed_refs = &trans->transaction->delayed_refs; | 4279 | delayed_refs = &trans->transaction->delayed_refs; |
| 3978 | spin_lock(&delayed_refs->lock); | 4280 | spin_lock(&delayed_refs->lock); |
| @@ -4024,17 +4326,99 @@ static noinline int check_ref_cleanup(struct btrfs_trans_handle *trans, | |||
| 4024 | list_del_init(&head->cluster); | 4326 | list_del_init(&head->cluster); |
| 4025 | spin_unlock(&delayed_refs->lock); | 4327 | spin_unlock(&delayed_refs->lock); |
| 4026 | 4328 | ||
| 4027 | ret = run_one_delayed_ref(trans, root->fs_info->tree_root, | 4329 | BUG_ON(head->extent_op); |
| 4028 | &head->node, head->extent_op, | 4330 | if (head->must_insert_reserved) |
| 4029 | head->must_insert_reserved); | 4331 | ret = 1; |
| 4030 | BUG_ON(ret); | 4332 | |
| 4333 | mutex_unlock(&head->mutex); | ||
| 4031 | btrfs_put_delayed_ref(&head->node); | 4334 | btrfs_put_delayed_ref(&head->node); |
| 4032 | return 0; | 4335 | return ret; |
| 4033 | out: | 4336 | out: |
| 4034 | spin_unlock(&delayed_refs->lock); | 4337 | spin_unlock(&delayed_refs->lock); |
| 4035 | return 0; | 4338 | return 0; |
| 4036 | } | 4339 | } |
| 4037 | 4340 | ||
| 4341 | void btrfs_free_tree_block(struct btrfs_trans_handle *trans, | ||
| 4342 | struct btrfs_root *root, | ||
| 4343 | struct extent_buffer *buf, | ||
| 4344 | u64 parent, int last_ref) | ||
| 4345 | { | ||
| 4346 | struct btrfs_block_rsv *block_rsv; | ||
| 4347 | struct btrfs_block_group_cache *cache = NULL; | ||
| 4348 | int ret; | ||
| 4349 | |||
| 4350 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
| 4351 | ret = btrfs_add_delayed_tree_ref(trans, buf->start, buf->len, | ||
| 4352 | parent, root->root_key.objectid, | ||
| 4353 | btrfs_header_level(buf), | ||
| 4354 | BTRFS_DROP_DELAYED_REF, NULL); | ||
| 4355 | BUG_ON(ret); | ||
| 4356 | } | ||
| 4357 | |||
| 4358 | if (!last_ref) | ||
| 4359 | return; | ||
| 4360 | |||
| 4361 | block_rsv = get_block_rsv(trans, root); | ||
| 4362 | cache = btrfs_lookup_block_group(root->fs_info, buf->start); | ||
| 4363 | BUG_ON(block_rsv->space_info != cache->space_info); | ||
| 4364 | |||
| 4365 | if (btrfs_header_generation(buf) == trans->transid) { | ||
| 4366 | if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
| 4367 | ret = check_ref_cleanup(trans, root, buf->start); | ||
| 4368 | if (!ret) | ||
| 4369 | goto pin; | ||
| 4370 | } | ||
| 4371 | |||
| 4372 | if (btrfs_header_flag(buf, BTRFS_HEADER_FLAG_WRITTEN)) { | ||
| 4373 | pin_down_extent(root, cache, buf->start, buf->len, 1); | ||
| 4374 | goto pin; | ||
| 4375 | } | ||
| 4376 | |||
| 4377 | WARN_ON(test_bit(EXTENT_BUFFER_DIRTY, &buf->bflags)); | ||
| 4378 | |||
| 4379 | btrfs_add_free_space(cache, buf->start, buf->len); | ||
| 4380 | ret = update_reserved_bytes(cache, buf->len, 0, 0); | ||
| 4381 | if (ret == -EAGAIN) { | ||
| 4382 | /* block group became read-only */ | ||
| 4383 | update_reserved_bytes(cache, buf->len, 0, 1); | ||
| 4384 | goto out; | ||
| 4385 | } | ||
| 4386 | |||
| 4387 | ret = 1; | ||
| 4388 | spin_lock(&block_rsv->lock); | ||
| 4389 | if (block_rsv->reserved < block_rsv->size) { | ||
| 4390 | block_rsv->reserved += buf->len; | ||
| 4391 | ret = 0; | ||
| 4392 | } | ||
| 4393 | spin_unlock(&block_rsv->lock); | ||
| 4394 | |||
| 4395 | if (ret) { | ||
| 4396 | spin_lock(&cache->space_info->lock); | ||
| 4397 | cache->space_info->bytes_reserved -= buf->len; | ||
| 4398 | spin_unlock(&cache->space_info->lock); | ||
| 4399 | } | ||
| 4400 | goto out; | ||
| 4401 | } | ||
| 4402 | pin: | ||
| 4403 | if (block_rsv->durable && !cache->ro) { | ||
| 4404 | ret = 0; | ||
| 4405 | spin_lock(&cache->lock); | ||
| 4406 | if (!cache->ro) { | ||
| 4407 | cache->reserved_pinned += buf->len; | ||
| 4408 | ret = 1; | ||
| 4409 | } | ||
| 4410 | spin_unlock(&cache->lock); | ||
| 4411 | |||
| 4412 | if (ret) { | ||
| 4413 | spin_lock(&block_rsv->lock); | ||
| 4414 | block_rsv->freed[trans->transid & 0x1] += buf->len; | ||
| 4415 | spin_unlock(&block_rsv->lock); | ||
| 4416 | } | ||
| 4417 | } | ||
| 4418 | out: | ||
| 4419 | btrfs_put_block_group(cache); | ||
| 4420 | } | ||
| 4421 | |||
| 4038 | int btrfs_free_extent(struct btrfs_trans_handle *trans, | 4422 | int btrfs_free_extent(struct btrfs_trans_handle *trans, |
| 4039 | struct btrfs_root *root, | 4423 | struct btrfs_root *root, |
| 4040 | u64 bytenr, u64 num_bytes, u64 parent, | 4424 | u64 bytenr, u64 num_bytes, u64 parent, |
| @@ -4056,8 +4440,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 4056 | parent, root_objectid, (int)owner, | 4440 | parent, root_objectid, (int)owner, |
| 4057 | BTRFS_DROP_DELAYED_REF, NULL); | 4441 | BTRFS_DROP_DELAYED_REF, NULL); |
| 4058 | BUG_ON(ret); | 4442 | BUG_ON(ret); |
| 4059 | ret = check_ref_cleanup(trans, root, bytenr); | ||
| 4060 | BUG_ON(ret); | ||
| 4061 | } else { | 4443 | } else { |
| 4062 | ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, | 4444 | ret = btrfs_add_delayed_data_ref(trans, bytenr, num_bytes, |
| 4063 | parent, root_objectid, owner, | 4445 | parent, root_objectid, owner, |
| @@ -4067,21 +4449,6 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans, | |||
| 4067 | return ret; | 4449 | return ret; |
| 4068 | } | 4450 | } |
| 4069 | 4451 | ||
| 4070 | int btrfs_free_tree_block(struct btrfs_trans_handle *trans, | ||
| 4071 | struct btrfs_root *root, | ||
| 4072 | u64 bytenr, u32 blocksize, | ||
| 4073 | u64 parent, u64 root_objectid, int level) | ||
| 4074 | { | ||
| 4075 | u64 used; | ||
| 4076 | spin_lock(&root->node_lock); | ||
| 4077 | used = btrfs_root_used(&root->root_item) - blocksize; | ||
| 4078 | btrfs_set_root_used(&root->root_item, used); | ||
| 4079 | spin_unlock(&root->node_lock); | ||
| 4080 | |||
| 4081 | return btrfs_free_extent(trans, root, bytenr, blocksize, | ||
| 4082 | parent, root_objectid, level, 0); | ||
| 4083 | } | ||
| 4084 | |||
| 4085 | static u64 stripe_align(struct btrfs_root *root, u64 val) | 4452 | static u64 stripe_align(struct btrfs_root *root, u64 val) |
| 4086 | { | 4453 | { |
| 4087 | u64 mask = ((u64)root->stripesize - 1); | 4454 | u64 mask = ((u64)root->stripesize - 1); |
| @@ -4134,6 +4501,22 @@ wait_block_group_cache_done(struct btrfs_block_group_cache *cache) | |||
| 4134 | return 0; | 4501 | return 0; |
| 4135 | } | 4502 | } |
| 4136 | 4503 | ||
| 4504 | static int get_block_group_index(struct btrfs_block_group_cache *cache) | ||
| 4505 | { | ||
| 4506 | int index; | ||
| 4507 | if (cache->flags & BTRFS_BLOCK_GROUP_RAID10) | ||
| 4508 | index = 0; | ||
| 4509 | else if (cache->flags & BTRFS_BLOCK_GROUP_RAID1) | ||
| 4510 | index = 1; | ||
| 4511 | else if (cache->flags & BTRFS_BLOCK_GROUP_DUP) | ||
| 4512 | index = 2; | ||
| 4513 | else if (cache->flags & BTRFS_BLOCK_GROUP_RAID0) | ||
| 4514 | index = 3; | ||
| 4515 | else | ||
| 4516 | index = 4; | ||
| 4517 | return index; | ||
| 4518 | } | ||
| 4519 | |||
| 4137 | enum btrfs_loop_type { | 4520 | enum btrfs_loop_type { |
| 4138 | LOOP_FIND_IDEAL = 0, | 4521 | LOOP_FIND_IDEAL = 0, |
| 4139 | LOOP_CACHING_NOWAIT = 1, | 4522 | LOOP_CACHING_NOWAIT = 1, |
| @@ -4155,7 +4538,6 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
| 4155 | u64 num_bytes, u64 empty_size, | 4538 | u64 num_bytes, u64 empty_size, |
| 4156 | u64 search_start, u64 search_end, | 4539 | u64 search_start, u64 search_end, |
| 4157 | u64 hint_byte, struct btrfs_key *ins, | 4540 | u64 hint_byte, struct btrfs_key *ins, |
| 4158 | u64 exclude_start, u64 exclude_nr, | ||
| 4159 | int data) | 4541 | int data) |
| 4160 | { | 4542 | { |
| 4161 | int ret = 0; | 4543 | int ret = 0; |
| @@ -4168,6 +4550,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans, | |||
| 4168 | struct btrfs_space_info *space_info; | 4550 | struct btrfs_space_info *space_info; |
| 4169 | int last_ptr_loop = 0; | 4551 | int last_ptr_loop = 0; |
| 4170 | int loop = 0; | 4552 | int loop = 0; |
| 4553 | int index = 0; | ||
| 4171 | bool found_uncached_bg = false; | 4554 | bool found_uncached_bg = false; |
| 4172 | bool failed_cluster_refill = false; | 4555 | bool failed_cluster_refill = false; |
| 4173 | bool failed_alloc = false; | 4556 | bool failed_alloc = false; |
| @@ -4237,6 +4620,7 @@ ideal_cache: | |||
| 4237 | btrfs_put_block_group(block_group); | 4620 | btrfs_put_block_group(block_group); |
| 4238 | up_read(&space_info->groups_sem); | 4621 | up_read(&space_info->groups_sem); |
| 4239 | } else { | 4622 | } else { |
| 4623 | index = get_block_group_index(block_group); | ||
| 4240 | goto have_block_group; | 4624 | goto have_block_group; |
| 4241 | } | 4625 | } |
| 4242 | } else if (block_group) { | 4626 | } else if (block_group) { |
| @@ -4245,7 +4629,8 @@ ideal_cache: | |||
| 4245 | } | 4629 | } |
| 4246 | search: | 4630 | search: |
| 4247 | down_read(&space_info->groups_sem); | 4631 | down_read(&space_info->groups_sem); |
| 4248 | list_for_each_entry(block_group, &space_info->block_groups, list) { | 4632 | list_for_each_entry(block_group, &space_info->block_groups[index], |
| 4633 | list) { | ||
| 4249 | u64 offset; | 4634 | u64 offset; |
| 4250 | int cached; | 4635 | int cached; |
| 4251 | 4636 | ||
| @@ -4436,23 +4821,22 @@ checks: | |||
| 4436 | goto loop; | 4821 | goto loop; |
| 4437 | } | 4822 | } |
| 4438 | 4823 | ||
| 4439 | if (exclude_nr > 0 && | 4824 | ins->objectid = search_start; |
| 4440 | (search_start + num_bytes > exclude_start && | 4825 | ins->offset = num_bytes; |
| 4441 | search_start < exclude_start + exclude_nr)) { | 4826 | |
| 4442 | search_start = exclude_start + exclude_nr; | 4827 | if (offset < search_start) |
| 4828 | btrfs_add_free_space(block_group, offset, | ||
| 4829 | search_start - offset); | ||
| 4830 | BUG_ON(offset > search_start); | ||
| 4443 | 4831 | ||
| 4832 | ret = update_reserved_bytes(block_group, num_bytes, 1, | ||
| 4833 | (data & BTRFS_BLOCK_GROUP_DATA)); | ||
| 4834 | if (ret == -EAGAIN) { | ||
| 4444 | btrfs_add_free_space(block_group, offset, num_bytes); | 4835 | btrfs_add_free_space(block_group, offset, num_bytes); |
| 4445 | /* | ||
| 4446 | * if search_start is still in this block group | ||
| 4447 | * then we just re-search this block group | ||
| 4448 | */ | ||
| 4449 | if (search_start >= block_group->key.objectid && | ||
| 4450 | search_start < (block_group->key.objectid + | ||
| 4451 | block_group->key.offset)) | ||
| 4452 | goto have_block_group; | ||
| 4453 | goto loop; | 4836 | goto loop; |
| 4454 | } | 4837 | } |
| 4455 | 4838 | ||
| 4839 | /* we are all good, lets return */ | ||
| 4456 | ins->objectid = search_start; | 4840 | ins->objectid = search_start; |
| 4457 | ins->offset = num_bytes; | 4841 | ins->offset = num_bytes; |
| 4458 | 4842 | ||
| @@ -4460,18 +4844,18 @@ checks: | |||
| 4460 | btrfs_add_free_space(block_group, offset, | 4844 | btrfs_add_free_space(block_group, offset, |
| 4461 | search_start - offset); | 4845 | search_start - offset); |
| 4462 | BUG_ON(offset > search_start); | 4846 | BUG_ON(offset > search_start); |
| 4463 | |||
| 4464 | update_reserved_extents(block_group, num_bytes, 1); | ||
| 4465 | |||
| 4466 | /* we are all good, lets return */ | ||
| 4467 | break; | 4847 | break; |
| 4468 | loop: | 4848 | loop: |
| 4469 | failed_cluster_refill = false; | 4849 | failed_cluster_refill = false; |
| 4470 | failed_alloc = false; | 4850 | failed_alloc = false; |
| 4851 | BUG_ON(index != get_block_group_index(block_group)); | ||
| 4471 | btrfs_put_block_group(block_group); | 4852 | btrfs_put_block_group(block_group); |
| 4472 | } | 4853 | } |
| 4473 | up_read(&space_info->groups_sem); | 4854 | up_read(&space_info->groups_sem); |
| 4474 | 4855 | ||
| 4856 | if (!ins->objectid && ++index < BTRFS_NR_RAID_TYPES) | ||
| 4857 | goto search; | ||
| 4858 | |||
| 4475 | /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for | 4859 | /* LOOP_FIND_IDEAL, only search caching/cached bg's, and don't wait for |
| 4476 | * for them to make caching progress. Also | 4860 | * for them to make caching progress. Also |
| 4477 | * determine the best possible bg to cache | 4861 | * determine the best possible bg to cache |
| @@ -4485,6 +4869,7 @@ loop: | |||
| 4485 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && | 4869 | if (!ins->objectid && loop < LOOP_NO_EMPTY_SIZE && |
| 4486 | (found_uncached_bg || empty_size || empty_cluster || | 4870 | (found_uncached_bg || empty_size || empty_cluster || |
| 4487 | allowed_chunk_alloc)) { | 4871 | allowed_chunk_alloc)) { |
| 4872 | index = 0; | ||
| 4488 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { | 4873 | if (loop == LOOP_FIND_IDEAL && found_uncached_bg) { |
| 4489 | found_uncached_bg = false; | 4874 | found_uncached_bg = false; |
| 4490 | loop++; | 4875 | loop++; |
| @@ -4567,31 +4952,30 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | |||
| 4567 | int dump_block_groups) | 4952 | int dump_block_groups) |
| 4568 | { | 4953 | { |
| 4569 | struct btrfs_block_group_cache *cache; | 4954 | struct btrfs_block_group_cache *cache; |
| 4955 | int index = 0; | ||
| 4570 | 4956 | ||
| 4571 | spin_lock(&info->lock); | 4957 | spin_lock(&info->lock); |
| 4572 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", | 4958 | printk(KERN_INFO "space_info has %llu free, is %sfull\n", |
| 4573 | (unsigned long long)(info->total_bytes - info->bytes_used - | 4959 | (unsigned long long)(info->total_bytes - info->bytes_used - |
| 4574 | info->bytes_pinned - info->bytes_reserved - | 4960 | info->bytes_pinned - info->bytes_reserved - |
| 4575 | info->bytes_super), | 4961 | info->bytes_readonly), |
| 4576 | (info->full) ? "" : "not "); | 4962 | (info->full) ? "" : "not "); |
| 4577 | printk(KERN_INFO "space_info total=%llu, pinned=%llu, delalloc=%llu," | 4963 | printk(KERN_INFO "space_info total=%llu, used=%llu, pinned=%llu, " |
| 4578 | " may_use=%llu, used=%llu, root=%llu, super=%llu, reserved=%llu" | 4964 | "reserved=%llu, may_use=%llu, readonly=%llu\n", |
| 4579 | "\n", | ||
| 4580 | (unsigned long long)info->total_bytes, | 4965 | (unsigned long long)info->total_bytes, |
| 4966 | (unsigned long long)info->bytes_used, | ||
| 4581 | (unsigned long long)info->bytes_pinned, | 4967 | (unsigned long long)info->bytes_pinned, |
| 4582 | (unsigned long long)info->bytes_delalloc, | 4968 | (unsigned long long)info->bytes_reserved, |
| 4583 | (unsigned long long)info->bytes_may_use, | 4969 | (unsigned long long)info->bytes_may_use, |
| 4584 | (unsigned long long)info->bytes_used, | 4970 | (unsigned long long)info->bytes_readonly); |
| 4585 | (unsigned long long)info->bytes_root, | ||
| 4586 | (unsigned long long)info->bytes_super, | ||
| 4587 | (unsigned long long)info->bytes_reserved); | ||
| 4588 | spin_unlock(&info->lock); | 4971 | spin_unlock(&info->lock); |
| 4589 | 4972 | ||
| 4590 | if (!dump_block_groups) | 4973 | if (!dump_block_groups) |
| 4591 | return; | 4974 | return; |
| 4592 | 4975 | ||
| 4593 | down_read(&info->groups_sem); | 4976 | down_read(&info->groups_sem); |
| 4594 | list_for_each_entry(cache, &info->block_groups, list) { | 4977 | again: |
| 4978 | list_for_each_entry(cache, &info->block_groups[index], list) { | ||
| 4595 | spin_lock(&cache->lock); | 4979 | spin_lock(&cache->lock); |
| 4596 | printk(KERN_INFO "block group %llu has %llu bytes, %llu used " | 4980 | printk(KERN_INFO "block group %llu has %llu bytes, %llu used " |
| 4597 | "%llu pinned %llu reserved\n", | 4981 | "%llu pinned %llu reserved\n", |
| @@ -4603,6 +4987,8 @@ static void dump_space_info(struct btrfs_space_info *info, u64 bytes, | |||
| 4603 | btrfs_dump_free_space(cache, bytes); | 4987 | btrfs_dump_free_space(cache, bytes); |
| 4604 | spin_unlock(&cache->lock); | 4988 | spin_unlock(&cache->lock); |
| 4605 | } | 4989 | } |
| 4990 | if (++index < BTRFS_NR_RAID_TYPES) | ||
| 4991 | goto again; | ||
| 4606 | up_read(&info->groups_sem); | 4992 | up_read(&info->groups_sem); |
| 4607 | } | 4993 | } |
| 4608 | 4994 | ||
| @@ -4628,9 +5014,8 @@ again: | |||
| 4628 | 5014 | ||
| 4629 | WARN_ON(num_bytes < root->sectorsize); | 5015 | WARN_ON(num_bytes < root->sectorsize); |
| 4630 | ret = find_free_extent(trans, root, num_bytes, empty_size, | 5016 | ret = find_free_extent(trans, root, num_bytes, empty_size, |
| 4631 | search_start, search_end, hint_byte, ins, | 5017 | search_start, search_end, hint_byte, |
| 4632 | trans->alloc_exclude_start, | 5018 | ins, data); |
| 4633 | trans->alloc_exclude_nr, data); | ||
| 4634 | 5019 | ||
| 4635 | if (ret == -ENOSPC && num_bytes > min_alloc_size) { | 5020 | if (ret == -ENOSPC && num_bytes > min_alloc_size) { |
| 4636 | num_bytes = num_bytes >> 1; | 5021 | num_bytes = num_bytes >> 1; |
| @@ -4668,7 +5053,7 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) | |||
| 4668 | ret = btrfs_discard_extent(root, start, len); | 5053 | ret = btrfs_discard_extent(root, start, len); |
| 4669 | 5054 | ||
| 4670 | btrfs_add_free_space(cache, start, len); | 5055 | btrfs_add_free_space(cache, start, len); |
| 4671 | update_reserved_extents(cache, len, 0); | 5056 | update_reserved_bytes(cache, len, 0, 1); |
| 4672 | btrfs_put_block_group(cache); | 5057 | btrfs_put_block_group(cache); |
| 4673 | 5058 | ||
| 4674 | return ret; | 5059 | return ret; |
| @@ -4731,8 +5116,7 @@ static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
| 4731 | btrfs_mark_buffer_dirty(path->nodes[0]); | 5116 | btrfs_mark_buffer_dirty(path->nodes[0]); |
| 4732 | btrfs_free_path(path); | 5117 | btrfs_free_path(path); |
| 4733 | 5118 | ||
| 4734 | ret = update_block_group(trans, root, ins->objectid, ins->offset, | 5119 | ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); |
| 4735 | 1, 0); | ||
| 4736 | if (ret) { | 5120 | if (ret) { |
| 4737 | printk(KERN_ERR "btrfs update block group failed for %llu " | 5121 | printk(KERN_ERR "btrfs update block group failed for %llu " |
| 4738 | "%llu\n", (unsigned long long)ins->objectid, | 5122 | "%llu\n", (unsigned long long)ins->objectid, |
| @@ -4792,8 +5176,7 @@ static int alloc_reserved_tree_block(struct btrfs_trans_handle *trans, | |||
| 4792 | btrfs_mark_buffer_dirty(leaf); | 5176 | btrfs_mark_buffer_dirty(leaf); |
| 4793 | btrfs_free_path(path); | 5177 | btrfs_free_path(path); |
| 4794 | 5178 | ||
| 4795 | ret = update_block_group(trans, root, ins->objectid, ins->offset, | 5179 | ret = update_block_group(trans, root, ins->objectid, ins->offset, 1); |
| 4796 | 1, 0); | ||
| 4797 | if (ret) { | 5180 | if (ret) { |
| 4798 | printk(KERN_ERR "btrfs update block group failed for %llu " | 5181 | printk(KERN_ERR "btrfs update block group failed for %llu " |
| 4799 | "%llu\n", (unsigned long long)ins->objectid, | 5182 | "%llu\n", (unsigned long long)ins->objectid, |
| @@ -4869,73 +5252,14 @@ int btrfs_alloc_logged_file_extent(struct btrfs_trans_handle *trans, | |||
| 4869 | put_caching_control(caching_ctl); | 5252 | put_caching_control(caching_ctl); |
| 4870 | } | 5253 | } |
| 4871 | 5254 | ||
| 4872 | update_reserved_extents(block_group, ins->offset, 1); | 5255 | ret = update_reserved_bytes(block_group, ins->offset, 1, 1); |
| 5256 | BUG_ON(ret); | ||
| 4873 | btrfs_put_block_group(block_group); | 5257 | btrfs_put_block_group(block_group); |
| 4874 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, | 5258 | ret = alloc_reserved_file_extent(trans, root, 0, root_objectid, |
| 4875 | 0, owner, offset, ins, 1); | 5259 | 0, owner, offset, ins, 1); |
| 4876 | return ret; | 5260 | return ret; |
| 4877 | } | 5261 | } |
| 4878 | 5262 | ||
| 4879 | /* | ||
| 4880 | * finds a free extent and does all the dirty work required for allocation | ||
| 4881 | * returns the key for the extent through ins, and a tree buffer for | ||
| 4882 | * the first block of the extent through buf. | ||
| 4883 | * | ||
| 4884 | * returns 0 if everything worked, non-zero otherwise. | ||
| 4885 | */ | ||
| 4886 | static int alloc_tree_block(struct btrfs_trans_handle *trans, | ||
| 4887 | struct btrfs_root *root, | ||
| 4888 | u64 num_bytes, u64 parent, u64 root_objectid, | ||
| 4889 | struct btrfs_disk_key *key, int level, | ||
| 4890 | u64 empty_size, u64 hint_byte, u64 search_end, | ||
| 4891 | struct btrfs_key *ins) | ||
| 4892 | { | ||
| 4893 | int ret; | ||
| 4894 | u64 flags = 0; | ||
| 4895 | |||
| 4896 | ret = btrfs_reserve_extent(trans, root, num_bytes, num_bytes, | ||
| 4897 | empty_size, hint_byte, search_end, | ||
| 4898 | ins, 0); | ||
| 4899 | if (ret) | ||
| 4900 | return ret; | ||
| 4901 | |||
| 4902 | if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { | ||
| 4903 | if (parent == 0) | ||
| 4904 | parent = ins->objectid; | ||
| 4905 | flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
| 4906 | } else | ||
| 4907 | BUG_ON(parent > 0); | ||
| 4908 | |||
| 4909 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
| 4910 | struct btrfs_delayed_extent_op *extent_op; | ||
| 4911 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | ||
| 4912 | BUG_ON(!extent_op); | ||
| 4913 | if (key) | ||
| 4914 | memcpy(&extent_op->key, key, sizeof(extent_op->key)); | ||
| 4915 | else | ||
| 4916 | memset(&extent_op->key, 0, sizeof(extent_op->key)); | ||
| 4917 | extent_op->flags_to_set = flags; | ||
| 4918 | extent_op->update_key = 1; | ||
| 4919 | extent_op->update_flags = 1; | ||
| 4920 | extent_op->is_data = 0; | ||
| 4921 | |||
| 4922 | ret = btrfs_add_delayed_tree_ref(trans, ins->objectid, | ||
| 4923 | ins->offset, parent, root_objectid, | ||
| 4924 | level, BTRFS_ADD_DELAYED_EXTENT, | ||
| 4925 | extent_op); | ||
| 4926 | BUG_ON(ret); | ||
| 4927 | } | ||
| 4928 | |||
| 4929 | if (root_objectid == root->root_key.objectid) { | ||
| 4930 | u64 used; | ||
| 4931 | spin_lock(&root->node_lock); | ||
| 4932 | used = btrfs_root_used(&root->root_item) + num_bytes; | ||
| 4933 | btrfs_set_root_used(&root->root_item, used); | ||
| 4934 | spin_unlock(&root->node_lock); | ||
| 4935 | } | ||
| 4936 | return ret; | ||
| 4937 | } | ||
| 4938 | |||
| 4939 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | 5263 | struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, |
| 4940 | struct btrfs_root *root, | 5264 | struct btrfs_root *root, |
| 4941 | u64 bytenr, u32 blocksize, | 5265 | u64 bytenr, u32 blocksize, |
| @@ -4974,8 +5298,45 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans, | |||
| 4974 | return buf; | 5298 | return buf; |
| 4975 | } | 5299 | } |
| 4976 | 5300 | ||
| 5301 | static struct btrfs_block_rsv * | ||
| 5302 | use_block_rsv(struct btrfs_trans_handle *trans, | ||
| 5303 | struct btrfs_root *root, u32 blocksize) | ||
| 5304 | { | ||
| 5305 | struct btrfs_block_rsv *block_rsv; | ||
| 5306 | int ret; | ||
| 5307 | |||
| 5308 | block_rsv = get_block_rsv(trans, root); | ||
| 5309 | |||
| 5310 | if (block_rsv->size == 0) { | ||
| 5311 | ret = reserve_metadata_bytes(block_rsv, blocksize); | ||
| 5312 | if (ret) | ||
| 5313 | return ERR_PTR(ret); | ||
| 5314 | return block_rsv; | ||
| 5315 | } | ||
| 5316 | |||
| 5317 | ret = block_rsv_use_bytes(block_rsv, blocksize); | ||
| 5318 | if (!ret) | ||
| 5319 | return block_rsv; | ||
| 5320 | |||
| 5321 | WARN_ON(1); | ||
| 5322 | printk(KERN_INFO"block_rsv size %llu reserved %llu freed %llu %llu\n", | ||
| 5323 | block_rsv->size, block_rsv->reserved, | ||
| 5324 | block_rsv->freed[0], block_rsv->freed[1]); | ||
| 5325 | |||
| 5326 | return ERR_PTR(-ENOSPC); | ||
| 5327 | } | ||
| 5328 | |||
| 5329 | static void unuse_block_rsv(struct btrfs_block_rsv *block_rsv, u32 blocksize) | ||
| 5330 | { | ||
| 5331 | block_rsv_add_bytes(block_rsv, blocksize, 0); | ||
| 5332 | block_rsv_release_bytes(block_rsv, NULL, 0); | ||
| 5333 | } | ||
| 5334 | |||
| 4977 | /* | 5335 | /* |
| 4978 | * helper function to allocate a block for a given tree | 5336 | * finds a free extent and does all the dirty work required for allocation |
| 5337 | * returns the key for the extent through ins, and a tree buffer for | ||
| 5338 | * the first block of the extent through buf. | ||
| 5339 | * | ||
| 4979 | * returns the tree buffer or NULL. | 5340 | * returns the tree buffer or NULL. |
| 4980 | */ | 5341 | */ |
| 4981 | struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | 5342 | struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, |
| @@ -4985,18 +5346,53 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans, | |||
| 4985 | u64 hint, u64 empty_size) | 5346 | u64 hint, u64 empty_size) |
| 4986 | { | 5347 | { |
| 4987 | struct btrfs_key ins; | 5348 | struct btrfs_key ins; |
| 4988 | int ret; | 5349 | struct btrfs_block_rsv *block_rsv; |
| 4989 | struct extent_buffer *buf; | 5350 | struct extent_buffer *buf; |
| 5351 | u64 flags = 0; | ||
| 5352 | int ret; | ||
| 5353 | |||
| 4990 | 5354 | ||
| 4991 | ret = alloc_tree_block(trans, root, blocksize, parent, root_objectid, | 5355 | block_rsv = use_block_rsv(trans, root, blocksize); |
| 4992 | key, level, empty_size, hint, (u64)-1, &ins); | 5356 | if (IS_ERR(block_rsv)) |
| 5357 | return ERR_CAST(block_rsv); | ||
| 5358 | |||
| 5359 | ret = btrfs_reserve_extent(trans, root, blocksize, blocksize, | ||
| 5360 | empty_size, hint, (u64)-1, &ins, 0); | ||
| 4993 | if (ret) { | 5361 | if (ret) { |
| 4994 | BUG_ON(ret > 0); | 5362 | unuse_block_rsv(block_rsv, blocksize); |
| 4995 | return ERR_PTR(ret); | 5363 | return ERR_PTR(ret); |
| 4996 | } | 5364 | } |
| 4997 | 5365 | ||
| 4998 | buf = btrfs_init_new_buffer(trans, root, ins.objectid, | 5366 | buf = btrfs_init_new_buffer(trans, root, ins.objectid, |
| 4999 | blocksize, level); | 5367 | blocksize, level); |
| 5368 | BUG_ON(IS_ERR(buf)); | ||
| 5369 | |||
| 5370 | if (root_objectid == BTRFS_TREE_RELOC_OBJECTID) { | ||
| 5371 | if (parent == 0) | ||
| 5372 | parent = ins.objectid; | ||
| 5373 | flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; | ||
| 5374 | } else | ||
| 5375 | BUG_ON(parent > 0); | ||
| 5376 | |||
| 5377 | if (root_objectid != BTRFS_TREE_LOG_OBJECTID) { | ||
| 5378 | struct btrfs_delayed_extent_op *extent_op; | ||
| 5379 | extent_op = kmalloc(sizeof(*extent_op), GFP_NOFS); | ||
| 5380 | BUG_ON(!extent_op); | ||
| 5381 | if (key) | ||
| 5382 | memcpy(&extent_op->key, key, sizeof(extent_op->key)); | ||
| 5383 | else | ||
| 5384 | memset(&extent_op->key, 0, sizeof(extent_op->key)); | ||
| 5385 | extent_op->flags_to_set = flags; | ||
| 5386 | extent_op->update_key = 1; | ||
| 5387 | extent_op->update_flags = 1; | ||
| 5388 | extent_op->is_data = 0; | ||
| 5389 | |||
| 5390 | ret = btrfs_add_delayed_tree_ref(trans, ins.objectid, | ||
| 5391 | ins.offset, parent, root_objectid, | ||
| 5392 | level, BTRFS_ADD_DELAYED_EXTENT, | ||
| 5393 | extent_op); | ||
| 5394 | BUG_ON(ret); | ||
| 5395 | } | ||
| 5000 | return buf; | 5396 | return buf; |
| 5001 | } | 5397 | } |
| 5002 | 5398 | ||
| @@ -5321,7 +5717,7 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
| 5321 | struct btrfs_path *path, | 5717 | struct btrfs_path *path, |
| 5322 | struct walk_control *wc) | 5718 | struct walk_control *wc) |
| 5323 | { | 5719 | { |
| 5324 | int ret = 0; | 5720 | int ret; |
| 5325 | int level = wc->level; | 5721 | int level = wc->level; |
| 5326 | struct extent_buffer *eb = path->nodes[level]; | 5722 | struct extent_buffer *eb = path->nodes[level]; |
| 5327 | u64 parent = 0; | 5723 | u64 parent = 0; |
| @@ -5399,13 +5795,11 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans, | |||
| 5399 | btrfs_header_owner(path->nodes[level + 1])); | 5795 | btrfs_header_owner(path->nodes[level + 1])); |
| 5400 | } | 5796 | } |
| 5401 | 5797 | ||
| 5402 | ret = btrfs_free_extent(trans, root, eb->start, eb->len, parent, | 5798 | btrfs_free_tree_block(trans, root, eb, parent, wc->refs[level] == 1); |
| 5403 | root->root_key.objectid, level, 0); | ||
| 5404 | BUG_ON(ret); | ||
| 5405 | out: | 5799 | out: |
| 5406 | wc->refs[level] = 0; | 5800 | wc->refs[level] = 0; |
| 5407 | wc->flags[level] = 0; | 5801 | wc->flags[level] = 0; |
| 5408 | return ret; | 5802 | return 0; |
| 5409 | } | 5803 | } |
| 5410 | 5804 | ||
| 5411 | static noinline int walk_down_tree(struct btrfs_trans_handle *trans, | 5805 | static noinline int walk_down_tree(struct btrfs_trans_handle *trans, |
| @@ -5483,7 +5877,8 @@ static noinline int walk_up_tree(struct btrfs_trans_handle *trans, | |||
| 5483 | * also make sure backrefs for the shared block and all lower level | 5877 | * also make sure backrefs for the shared block and all lower level |
| 5484 | * blocks are properly updated. | 5878 | * blocks are properly updated. |
| 5485 | */ | 5879 | */ |
| 5486 | int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | 5880 | int btrfs_drop_snapshot(struct btrfs_root *root, |
| 5881 | struct btrfs_block_rsv *block_rsv, int update_ref) | ||
| 5487 | { | 5882 | { |
| 5488 | struct btrfs_path *path; | 5883 | struct btrfs_path *path; |
| 5489 | struct btrfs_trans_handle *trans; | 5884 | struct btrfs_trans_handle *trans; |
| @@ -5501,7 +5896,9 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
| 5501 | wc = kzalloc(sizeof(*wc), GFP_NOFS); | 5896 | wc = kzalloc(sizeof(*wc), GFP_NOFS); |
| 5502 | BUG_ON(!wc); | 5897 | BUG_ON(!wc); |
| 5503 | 5898 | ||
| 5504 | trans = btrfs_start_transaction(tree_root, 1); | 5899 | trans = btrfs_start_transaction(tree_root, 0); |
| 5900 | if (block_rsv) | ||
| 5901 | trans->block_rsv = block_rsv; | ||
| 5505 | 5902 | ||
| 5506 | if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { | 5903 | if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) { |
| 5507 | level = btrfs_header_level(root->node); | 5904 | level = btrfs_header_level(root->node); |
| @@ -5589,22 +5986,16 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
| 5589 | } | 5986 | } |
| 5590 | 5987 | ||
| 5591 | BUG_ON(wc->level == 0); | 5988 | BUG_ON(wc->level == 0); |
| 5592 | if (trans->transaction->in_commit || | 5989 | if (btrfs_should_end_transaction(trans, tree_root)) { |
| 5593 | trans->transaction->delayed_refs.flushing) { | ||
| 5594 | ret = btrfs_update_root(trans, tree_root, | 5990 | ret = btrfs_update_root(trans, tree_root, |
| 5595 | &root->root_key, | 5991 | &root->root_key, |
| 5596 | root_item); | 5992 | root_item); |
| 5597 | BUG_ON(ret); | 5993 | BUG_ON(ret); |
| 5598 | 5994 | ||
| 5599 | btrfs_end_transaction(trans, tree_root); | 5995 | btrfs_end_transaction_throttle(trans, tree_root); |
| 5600 | trans = btrfs_start_transaction(tree_root, 1); | 5996 | trans = btrfs_start_transaction(tree_root, 0); |
| 5601 | } else { | 5997 | if (block_rsv) |
| 5602 | unsigned long update; | 5998 | trans->block_rsv = block_rsv; |
| 5603 | update = trans->delayed_ref_updates; | ||
| 5604 | trans->delayed_ref_updates = 0; | ||
| 5605 | if (update) | ||
| 5606 | btrfs_run_delayed_refs(trans, tree_root, | ||
| 5607 | update); | ||
| 5608 | } | 5999 | } |
| 5609 | } | 6000 | } |
| 5610 | btrfs_release_path(root, path); | 6001 | btrfs_release_path(root, path); |
| @@ -5632,7 +6023,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root, int update_ref) | |||
| 5632 | kfree(root); | 6023 | kfree(root); |
| 5633 | } | 6024 | } |
| 5634 | out: | 6025 | out: |
| 5635 | btrfs_end_transaction(trans, tree_root); | 6026 | btrfs_end_transaction_throttle(trans, tree_root); |
| 5636 | kfree(wc); | 6027 | kfree(wc); |
| 5637 | btrfs_free_path(path); | 6028 | btrfs_free_path(path); |
| 5638 | return err; | 6029 | return err; |
| @@ -7228,48 +7619,80 @@ static u64 update_block_group_flags(struct btrfs_root *root, u64 flags) | |||
| 7228 | return flags; | 7619 | return flags; |
| 7229 | } | 7620 | } |
| 7230 | 7621 | ||
| 7231 | static int __alloc_chunk_for_shrink(struct btrfs_root *root, | 7622 | static int set_block_group_ro(struct btrfs_block_group_cache *cache) |
| 7232 | struct btrfs_block_group_cache *shrink_block_group, | ||
| 7233 | int force) | ||
| 7234 | { | 7623 | { |
| 7235 | struct btrfs_trans_handle *trans; | 7624 | struct btrfs_space_info *sinfo = cache->space_info; |
| 7236 | u64 new_alloc_flags; | 7625 | u64 num_bytes; |
| 7237 | u64 calc; | 7626 | int ret = -ENOSPC; |
| 7238 | 7627 | ||
| 7239 | spin_lock(&shrink_block_group->lock); | 7628 | if (cache->ro) |
| 7240 | if (btrfs_block_group_used(&shrink_block_group->item) + | 7629 | return 0; |
| 7241 | shrink_block_group->reserved > 0) { | ||
| 7242 | spin_unlock(&shrink_block_group->lock); | ||
| 7243 | 7630 | ||
| 7244 | trans = btrfs_start_transaction(root, 1); | 7631 | spin_lock(&sinfo->lock); |
| 7245 | spin_lock(&shrink_block_group->lock); | 7632 | spin_lock(&cache->lock); |
| 7633 | num_bytes = cache->key.offset - cache->reserved - cache->pinned - | ||
| 7634 | cache->bytes_super - btrfs_block_group_used(&cache->item); | ||
| 7635 | |||
| 7636 | if (sinfo->bytes_used + sinfo->bytes_reserved + sinfo->bytes_pinned + | ||
| 7637 | sinfo->bytes_may_use + sinfo->bytes_readonly + | ||
| 7638 | cache->reserved_pinned + num_bytes < sinfo->total_bytes) { | ||
| 7639 | sinfo->bytes_readonly += num_bytes; | ||
| 7640 | sinfo->bytes_reserved += cache->reserved_pinned; | ||
| 7641 | cache->reserved_pinned = 0; | ||
| 7642 | cache->ro = 1; | ||
| 7643 | ret = 0; | ||
| 7644 | } | ||
| 7645 | spin_unlock(&cache->lock); | ||
| 7646 | spin_unlock(&sinfo->lock); | ||
| 7647 | return ret; | ||
| 7648 | } | ||
| 7246 | 7649 | ||
| 7247 | new_alloc_flags = update_block_group_flags(root, | 7650 | int btrfs_set_block_group_ro(struct btrfs_root *root, |
| 7248 | shrink_block_group->flags); | 7651 | struct btrfs_block_group_cache *cache) |
| 7249 | if (new_alloc_flags != shrink_block_group->flags) { | ||
| 7250 | calc = | ||
| 7251 | btrfs_block_group_used(&shrink_block_group->item); | ||
| 7252 | } else { | ||
| 7253 | calc = shrink_block_group->key.offset; | ||
| 7254 | } | ||
| 7255 | spin_unlock(&shrink_block_group->lock); | ||
| 7256 | 7652 | ||
| 7257 | do_chunk_alloc(trans, root->fs_info->extent_root, | 7653 | { |
| 7258 | calc + 2 * 1024 * 1024, new_alloc_flags, force); | 7654 | struct btrfs_trans_handle *trans; |
| 7655 | u64 alloc_flags; | ||
| 7656 | int ret; | ||
| 7259 | 7657 | ||
| 7260 | btrfs_end_transaction(trans, root); | 7658 | BUG_ON(cache->ro); |
| 7261 | } else | 7659 | |
| 7262 | spin_unlock(&shrink_block_group->lock); | 7660 | trans = btrfs_join_transaction(root, 1); |
| 7263 | return 0; | 7661 | BUG_ON(IS_ERR(trans)); |
| 7264 | } | ||
| 7265 | 7662 | ||
| 7663 | alloc_flags = update_block_group_flags(root, cache->flags); | ||
| 7664 | if (alloc_flags != cache->flags) | ||
| 7665 | do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); | ||
| 7266 | 7666 | ||
| 7267 | int btrfs_prepare_block_group_relocation(struct btrfs_root *root, | 7667 | ret = set_block_group_ro(cache); |
| 7268 | struct btrfs_block_group_cache *group) | 7668 | if (!ret) |
| 7669 | goto out; | ||
| 7670 | alloc_flags = get_alloc_profile(root, cache->space_info->flags); | ||
| 7671 | ret = do_chunk_alloc(trans, root, 2 * 1024 * 1024, alloc_flags, 1); | ||
| 7672 | if (ret < 0) | ||
| 7673 | goto out; | ||
| 7674 | ret = set_block_group_ro(cache); | ||
| 7675 | out: | ||
| 7676 | btrfs_end_transaction(trans, root); | ||
| 7677 | return ret; | ||
| 7678 | } | ||
| 7269 | 7679 | ||
| 7680 | int btrfs_set_block_group_rw(struct btrfs_root *root, | ||
| 7681 | struct btrfs_block_group_cache *cache) | ||
| 7270 | { | 7682 | { |
| 7271 | __alloc_chunk_for_shrink(root, group, 1); | 7683 | struct btrfs_space_info *sinfo = cache->space_info; |
| 7272 | set_block_group_readonly(group); | 7684 | u64 num_bytes; |
| 7685 | |||
| 7686 | BUG_ON(!cache->ro); | ||
| 7687 | |||
| 7688 | spin_lock(&sinfo->lock); | ||
| 7689 | spin_lock(&cache->lock); | ||
| 7690 | num_bytes = cache->key.offset - cache->reserved - cache->pinned - | ||
| 7691 | cache->bytes_super - btrfs_block_group_used(&cache->item); | ||
| 7692 | sinfo->bytes_readonly -= num_bytes; | ||
| 7693 | cache->ro = 0; | ||
| 7694 | spin_unlock(&cache->lock); | ||
| 7695 | spin_unlock(&sinfo->lock); | ||
| 7273 | return 0; | 7696 | return 0; |
| 7274 | } | 7697 | } |
| 7275 | 7698 | ||
| @@ -7436,17 +7859,33 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info) | |||
| 7436 | */ | 7859 | */ |
| 7437 | synchronize_rcu(); | 7860 | synchronize_rcu(); |
| 7438 | 7861 | ||
| 7862 | release_global_block_rsv(info); | ||
| 7863 | |||
| 7439 | while(!list_empty(&info->space_info)) { | 7864 | while(!list_empty(&info->space_info)) { |
| 7440 | space_info = list_entry(info->space_info.next, | 7865 | space_info = list_entry(info->space_info.next, |
| 7441 | struct btrfs_space_info, | 7866 | struct btrfs_space_info, |
| 7442 | list); | 7867 | list); |
| 7443 | 7868 | if (space_info->bytes_pinned > 0 || | |
| 7869 | space_info->bytes_reserved > 0) { | ||
| 7870 | WARN_ON(1); | ||
| 7871 | dump_space_info(space_info, 0, 0); | ||
| 7872 | } | ||
| 7444 | list_del(&space_info->list); | 7873 | list_del(&space_info->list); |
| 7445 | kfree(space_info); | 7874 | kfree(space_info); |
| 7446 | } | 7875 | } |
| 7447 | return 0; | 7876 | return 0; |
| 7448 | } | 7877 | } |
| 7449 | 7878 | ||
| 7879 | static void __link_block_group(struct btrfs_space_info *space_info, | ||
| 7880 | struct btrfs_block_group_cache *cache) | ||
| 7881 | { | ||
| 7882 | int index = get_block_group_index(cache); | ||
| 7883 | |||
| 7884 | down_write(&space_info->groups_sem); | ||
| 7885 | list_add_tail(&cache->list, &space_info->block_groups[index]); | ||
| 7886 | up_write(&space_info->groups_sem); | ||
| 7887 | } | ||
| 7888 | |||
| 7450 | int btrfs_read_block_groups(struct btrfs_root *root) | 7889 | int btrfs_read_block_groups(struct btrfs_root *root) |
| 7451 | { | 7890 | { |
| 7452 | struct btrfs_path *path; | 7891 | struct btrfs_path *path; |
| @@ -7468,10 +7907,8 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
| 7468 | 7907 | ||
| 7469 | while (1) { | 7908 | while (1) { |
| 7470 | ret = find_first_block_group(root, path, &key); | 7909 | ret = find_first_block_group(root, path, &key); |
| 7471 | if (ret > 0) { | 7910 | if (ret > 0) |
| 7472 | ret = 0; | 7911 | break; |
| 7473 | goto error; | ||
| 7474 | } | ||
| 7475 | if (ret != 0) | 7912 | if (ret != 0) |
| 7476 | goto error; | 7913 | goto error; |
| 7477 | 7914 | ||
| @@ -7480,7 +7917,7 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
| 7480 | cache = kzalloc(sizeof(*cache), GFP_NOFS); | 7917 | cache = kzalloc(sizeof(*cache), GFP_NOFS); |
| 7481 | if (!cache) { | 7918 | if (!cache) { |
| 7482 | ret = -ENOMEM; | 7919 | ret = -ENOMEM; |
| 7483 | break; | 7920 | goto error; |
| 7484 | } | 7921 | } |
| 7485 | 7922 | ||
| 7486 | atomic_set(&cache->count, 1); | 7923 | atomic_set(&cache->count, 1); |
| @@ -7537,20 +7974,36 @@ int btrfs_read_block_groups(struct btrfs_root *root) | |||
| 7537 | BUG_ON(ret); | 7974 | BUG_ON(ret); |
| 7538 | cache->space_info = space_info; | 7975 | cache->space_info = space_info; |
| 7539 | spin_lock(&cache->space_info->lock); | 7976 | spin_lock(&cache->space_info->lock); |
| 7540 | cache->space_info->bytes_super += cache->bytes_super; | 7977 | cache->space_info->bytes_readonly += cache->bytes_super; |
| 7541 | spin_unlock(&cache->space_info->lock); | 7978 | spin_unlock(&cache->space_info->lock); |
| 7542 | 7979 | ||
| 7543 | down_write(&space_info->groups_sem); | 7980 | __link_block_group(space_info, cache); |
| 7544 | list_add_tail(&cache->list, &space_info->block_groups); | ||
| 7545 | up_write(&space_info->groups_sem); | ||
| 7546 | 7981 | ||
| 7547 | ret = btrfs_add_block_group_cache(root->fs_info, cache); | 7982 | ret = btrfs_add_block_group_cache(root->fs_info, cache); |
| 7548 | BUG_ON(ret); | 7983 | BUG_ON(ret); |
| 7549 | 7984 | ||
| 7550 | set_avail_alloc_bits(root->fs_info, cache->flags); | 7985 | set_avail_alloc_bits(root->fs_info, cache->flags); |
| 7551 | if (btrfs_chunk_readonly(root, cache->key.objectid)) | 7986 | if (btrfs_chunk_readonly(root, cache->key.objectid)) |
| 7552 | set_block_group_readonly(cache); | 7987 | set_block_group_ro(cache); |
| 7553 | } | 7988 | } |
| 7989 | |||
| 7990 | list_for_each_entry_rcu(space_info, &root->fs_info->space_info, list) { | ||
| 7991 | if (!(get_alloc_profile(root, space_info->flags) & | ||
| 7992 | (BTRFS_BLOCK_GROUP_RAID10 | | ||
| 7993 | BTRFS_BLOCK_GROUP_RAID1 | | ||
| 7994 | BTRFS_BLOCK_GROUP_DUP))) | ||
| 7995 | continue; | ||
| 7996 | /* | ||
| 7997 | * avoid allocating from un-mirrored block group if there are | ||
| 7998 | * mirrored block groups. | ||
| 7999 | */ | ||
| 8000 | list_for_each_entry(cache, &space_info->block_groups[3], list) | ||
| 8001 | set_block_group_ro(cache); | ||
| 8002 | list_for_each_entry(cache, &space_info->block_groups[4], list) | ||
| 8003 | set_block_group_ro(cache); | ||
| 8004 | } | ||
| 8005 | |||
| 8006 | init_global_block_rsv(info); | ||
| 7554 | ret = 0; | 8007 | ret = 0; |
| 7555 | error: | 8008 | error: |
| 7556 | btrfs_free_path(path); | 8009 | btrfs_free_path(path); |
| @@ -7611,12 +8064,10 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, | |||
| 7611 | BUG_ON(ret); | 8064 | BUG_ON(ret); |
| 7612 | 8065 | ||
| 7613 | spin_lock(&cache->space_info->lock); | 8066 | spin_lock(&cache->space_info->lock); |
| 7614 | cache->space_info->bytes_super += cache->bytes_super; | 8067 | cache->space_info->bytes_readonly += cache->bytes_super; |
| 7615 | spin_unlock(&cache->space_info->lock); | 8068 | spin_unlock(&cache->space_info->lock); |
| 7616 | 8069 | ||
| 7617 | down_write(&cache->space_info->groups_sem); | 8070 | __link_block_group(cache->space_info, cache); |
| 7618 | list_add_tail(&cache->list, &cache->space_info->block_groups); | ||
| 7619 | up_write(&cache->space_info->groups_sem); | ||
| 7620 | 8071 | ||
| 7621 | ret = btrfs_add_block_group_cache(root->fs_info, cache); | 8072 | ret = btrfs_add_block_group_cache(root->fs_info, cache); |
| 7622 | BUG_ON(ret); | 8073 | BUG_ON(ret); |
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index d2d03684fab2..a4080c21ec55 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
| @@ -135,7 +135,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask) | |||
| 135 | return state; | 135 | return state; |
| 136 | } | 136 | } |
| 137 | 137 | ||
| 138 | static void free_extent_state(struct extent_state *state) | 138 | void free_extent_state(struct extent_state *state) |
| 139 | { | 139 | { |
| 140 | if (!state) | 140 | if (!state) |
| 141 | return; | 141 | return; |
| @@ -335,21 +335,18 @@ static int merge_state(struct extent_io_tree *tree, | |||
| 335 | } | 335 | } |
| 336 | 336 | ||
| 337 | static int set_state_cb(struct extent_io_tree *tree, | 337 | static int set_state_cb(struct extent_io_tree *tree, |
| 338 | struct extent_state *state, | 338 | struct extent_state *state, int *bits) |
| 339 | unsigned long bits) | ||
| 340 | { | 339 | { |
| 341 | if (tree->ops && tree->ops->set_bit_hook) { | 340 | if (tree->ops && tree->ops->set_bit_hook) { |
| 342 | return tree->ops->set_bit_hook(tree->mapping->host, | 341 | return tree->ops->set_bit_hook(tree->mapping->host, |
| 343 | state->start, state->end, | 342 | state, bits); |
| 344 | state->state, bits); | ||
| 345 | } | 343 | } |
| 346 | 344 | ||
| 347 | return 0; | 345 | return 0; |
| 348 | } | 346 | } |
| 349 | 347 | ||
| 350 | static void clear_state_cb(struct extent_io_tree *tree, | 348 | static void clear_state_cb(struct extent_io_tree *tree, |
| 351 | struct extent_state *state, | 349 | struct extent_state *state, int *bits) |
| 352 | unsigned long bits) | ||
| 353 | { | 350 | { |
| 354 | if (tree->ops && tree->ops->clear_bit_hook) | 351 | if (tree->ops && tree->ops->clear_bit_hook) |
| 355 | tree->ops->clear_bit_hook(tree->mapping->host, state, bits); | 352 | tree->ops->clear_bit_hook(tree->mapping->host, state, bits); |
| @@ -367,9 +364,10 @@ static void clear_state_cb(struct extent_io_tree *tree, | |||
| 367 | */ | 364 | */ |
| 368 | static int insert_state(struct extent_io_tree *tree, | 365 | static int insert_state(struct extent_io_tree *tree, |
| 369 | struct extent_state *state, u64 start, u64 end, | 366 | struct extent_state *state, u64 start, u64 end, |
| 370 | int bits) | 367 | int *bits) |
| 371 | { | 368 | { |
| 372 | struct rb_node *node; | 369 | struct rb_node *node; |
| 370 | int bits_to_set = *bits & ~EXTENT_CTLBITS; | ||
| 373 | int ret; | 371 | int ret; |
| 374 | 372 | ||
| 375 | if (end < start) { | 373 | if (end < start) { |
| @@ -384,9 +382,9 @@ static int insert_state(struct extent_io_tree *tree, | |||
| 384 | if (ret) | 382 | if (ret) |
| 385 | return ret; | 383 | return ret; |
| 386 | 384 | ||
| 387 | if (bits & EXTENT_DIRTY) | 385 | if (bits_to_set & EXTENT_DIRTY) |
| 388 | tree->dirty_bytes += end - start + 1; | 386 | tree->dirty_bytes += end - start + 1; |
| 389 | state->state |= bits; | 387 | state->state |= bits_to_set; |
| 390 | node = tree_insert(&tree->state, end, &state->rb_node); | 388 | node = tree_insert(&tree->state, end, &state->rb_node); |
| 391 | if (node) { | 389 | if (node) { |
| 392 | struct extent_state *found; | 390 | struct extent_state *found; |
| @@ -456,13 +454,13 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, | |||
| 456 | * struct is freed and removed from the tree | 454 | * struct is freed and removed from the tree |
| 457 | */ | 455 | */ |
| 458 | static int clear_state_bit(struct extent_io_tree *tree, | 456 | static int clear_state_bit(struct extent_io_tree *tree, |
| 459 | struct extent_state *state, int bits, int wake, | 457 | struct extent_state *state, |
| 460 | int delete) | 458 | int *bits, int wake) |
| 461 | { | 459 | { |
| 462 | int bits_to_clear = bits & ~EXTENT_DO_ACCOUNTING; | 460 | int bits_to_clear = *bits & ~EXTENT_CTLBITS; |
| 463 | int ret = state->state & bits_to_clear; | 461 | int ret = state->state & bits_to_clear; |
| 464 | 462 | ||
| 465 | if ((bits & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { | 463 | if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { |
| 466 | u64 range = state->end - state->start + 1; | 464 | u64 range = state->end - state->start + 1; |
| 467 | WARN_ON(range > tree->dirty_bytes); | 465 | WARN_ON(range > tree->dirty_bytes); |
| 468 | tree->dirty_bytes -= range; | 466 | tree->dirty_bytes -= range; |
| @@ -471,9 +469,8 @@ static int clear_state_bit(struct extent_io_tree *tree, | |||
| 471 | state->state &= ~bits_to_clear; | 469 | state->state &= ~bits_to_clear; |
| 472 | if (wake) | 470 | if (wake) |
| 473 | wake_up(&state->wq); | 471 | wake_up(&state->wq); |
| 474 | if (delete || state->state == 0) { | 472 | if (state->state == 0) { |
| 475 | if (state->tree) { | 473 | if (state->tree) { |
| 476 | clear_state_cb(tree, state, state->state); | ||
| 477 | rb_erase(&state->rb_node, &tree->state); | 474 | rb_erase(&state->rb_node, &tree->state); |
| 478 | state->tree = NULL; | 475 | state->tree = NULL; |
| 479 | free_extent_state(state); | 476 | free_extent_state(state); |
| @@ -514,6 +511,10 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 514 | int set = 0; | 511 | int set = 0; |
| 515 | int clear = 0; | 512 | int clear = 0; |
| 516 | 513 | ||
| 514 | if (delete) | ||
| 515 | bits |= ~EXTENT_CTLBITS; | ||
| 516 | bits |= EXTENT_FIRST_DELALLOC; | ||
| 517 | |||
| 517 | if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY)) | 518 | if (bits & (EXTENT_IOBITS | EXTENT_BOUNDARY)) |
| 518 | clear = 1; | 519 | clear = 1; |
| 519 | again: | 520 | again: |
| @@ -580,8 +581,7 @@ hit_next: | |||
| 580 | if (err) | 581 | if (err) |
| 581 | goto out; | 582 | goto out; |
| 582 | if (state->end <= end) { | 583 | if (state->end <= end) { |
| 583 | set |= clear_state_bit(tree, state, bits, wake, | 584 | set |= clear_state_bit(tree, state, &bits, wake); |
| 584 | delete); | ||
| 585 | if (last_end == (u64)-1) | 585 | if (last_end == (u64)-1) |
| 586 | goto out; | 586 | goto out; |
| 587 | start = last_end + 1; | 587 | start = last_end + 1; |
| @@ -602,7 +602,7 @@ hit_next: | |||
| 602 | if (wake) | 602 | if (wake) |
| 603 | wake_up(&state->wq); | 603 | wake_up(&state->wq); |
| 604 | 604 | ||
| 605 | set |= clear_state_bit(tree, prealloc, bits, wake, delete); | 605 | set |= clear_state_bit(tree, prealloc, &bits, wake); |
| 606 | 606 | ||
| 607 | prealloc = NULL; | 607 | prealloc = NULL; |
| 608 | goto out; | 608 | goto out; |
| @@ -613,7 +613,7 @@ hit_next: | |||
| 613 | else | 613 | else |
| 614 | next_node = NULL; | 614 | next_node = NULL; |
| 615 | 615 | ||
| 616 | set |= clear_state_bit(tree, state, bits, wake, delete); | 616 | set |= clear_state_bit(tree, state, &bits, wake); |
| 617 | if (last_end == (u64)-1) | 617 | if (last_end == (u64)-1) |
| 618 | goto out; | 618 | goto out; |
| 619 | start = last_end + 1; | 619 | start = last_end + 1; |
| @@ -706,19 +706,19 @@ out: | |||
| 706 | 706 | ||
| 707 | static int set_state_bits(struct extent_io_tree *tree, | 707 | static int set_state_bits(struct extent_io_tree *tree, |
| 708 | struct extent_state *state, | 708 | struct extent_state *state, |
| 709 | int bits) | 709 | int *bits) |
| 710 | { | 710 | { |
| 711 | int ret; | 711 | int ret; |
| 712 | int bits_to_set = *bits & ~EXTENT_CTLBITS; | ||
| 712 | 713 | ||
| 713 | ret = set_state_cb(tree, state, bits); | 714 | ret = set_state_cb(tree, state, bits); |
| 714 | if (ret) | 715 | if (ret) |
| 715 | return ret; | 716 | return ret; |
| 716 | 717 | if ((bits_to_set & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { | |
| 717 | if ((bits & EXTENT_DIRTY) && !(state->state & EXTENT_DIRTY)) { | ||
| 718 | u64 range = state->end - state->start + 1; | 718 | u64 range = state->end - state->start + 1; |
| 719 | tree->dirty_bytes += range; | 719 | tree->dirty_bytes += range; |
| 720 | } | 720 | } |
| 721 | state->state |= bits; | 721 | state->state |= bits_to_set; |
| 722 | 722 | ||
| 723 | return 0; | 723 | return 0; |
| 724 | } | 724 | } |
| @@ -745,10 +745,9 @@ static void cache_state(struct extent_state *state, | |||
| 745 | * [start, end] is inclusive This takes the tree lock. | 745 | * [start, end] is inclusive This takes the tree lock. |
| 746 | */ | 746 | */ |
| 747 | 747 | ||
| 748 | static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 748 | int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| 749 | int bits, int exclusive_bits, u64 *failed_start, | 749 | int bits, int exclusive_bits, u64 *failed_start, |
| 750 | struct extent_state **cached_state, | 750 | struct extent_state **cached_state, gfp_t mask) |
| 751 | gfp_t mask) | ||
| 752 | { | 751 | { |
| 753 | struct extent_state *state; | 752 | struct extent_state *state; |
| 754 | struct extent_state *prealloc = NULL; | 753 | struct extent_state *prealloc = NULL; |
| @@ -757,6 +756,7 @@ static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 757 | u64 last_start; | 756 | u64 last_start; |
| 758 | u64 last_end; | 757 | u64 last_end; |
| 759 | 758 | ||
| 759 | bits |= EXTENT_FIRST_DELALLOC; | ||
| 760 | again: | 760 | again: |
| 761 | if (!prealloc && (mask & __GFP_WAIT)) { | 761 | if (!prealloc && (mask & __GFP_WAIT)) { |
| 762 | prealloc = alloc_extent_state(mask); | 762 | prealloc = alloc_extent_state(mask); |
| @@ -778,7 +778,7 @@ again: | |||
| 778 | */ | 778 | */ |
| 779 | node = tree_search(tree, start); | 779 | node = tree_search(tree, start); |
| 780 | if (!node) { | 780 | if (!node) { |
| 781 | err = insert_state(tree, prealloc, start, end, bits); | 781 | err = insert_state(tree, prealloc, start, end, &bits); |
| 782 | prealloc = NULL; | 782 | prealloc = NULL; |
| 783 | BUG_ON(err == -EEXIST); | 783 | BUG_ON(err == -EEXIST); |
| 784 | goto out; | 784 | goto out; |
| @@ -802,7 +802,7 @@ hit_next: | |||
| 802 | goto out; | 802 | goto out; |
| 803 | } | 803 | } |
| 804 | 804 | ||
| 805 | err = set_state_bits(tree, state, bits); | 805 | err = set_state_bits(tree, state, &bits); |
| 806 | if (err) | 806 | if (err) |
| 807 | goto out; | 807 | goto out; |
| 808 | 808 | ||
| @@ -852,7 +852,7 @@ hit_next: | |||
| 852 | if (err) | 852 | if (err) |
| 853 | goto out; | 853 | goto out; |
| 854 | if (state->end <= end) { | 854 | if (state->end <= end) { |
| 855 | err = set_state_bits(tree, state, bits); | 855 | err = set_state_bits(tree, state, &bits); |
| 856 | if (err) | 856 | if (err) |
| 857 | goto out; | 857 | goto out; |
| 858 | cache_state(state, cached_state); | 858 | cache_state(state, cached_state); |
| @@ -877,7 +877,7 @@ hit_next: | |||
| 877 | else | 877 | else |
| 878 | this_end = last_start - 1; | 878 | this_end = last_start - 1; |
| 879 | err = insert_state(tree, prealloc, start, this_end, | 879 | err = insert_state(tree, prealloc, start, this_end, |
| 880 | bits); | 880 | &bits); |
| 881 | BUG_ON(err == -EEXIST); | 881 | BUG_ON(err == -EEXIST); |
| 882 | if (err) { | 882 | if (err) { |
| 883 | prealloc = NULL; | 883 | prealloc = NULL; |
| @@ -903,7 +903,7 @@ hit_next: | |||
| 903 | err = split_state(tree, state, prealloc, end + 1); | 903 | err = split_state(tree, state, prealloc, end + 1); |
| 904 | BUG_ON(err == -EEXIST); | 904 | BUG_ON(err == -EEXIST); |
| 905 | 905 | ||
| 906 | err = set_state_bits(tree, prealloc, bits); | 906 | err = set_state_bits(tree, prealloc, &bits); |
| 907 | if (err) { | 907 | if (err) { |
| 908 | prealloc = NULL; | 908 | prealloc = NULL; |
| 909 | goto out; | 909 | goto out; |
| @@ -966,8 +966,7 @@ int clear_extent_dirty(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 966 | { | 966 | { |
| 967 | return clear_extent_bit(tree, start, end, | 967 | return clear_extent_bit(tree, start, end, |
| 968 | EXTENT_DIRTY | EXTENT_DELALLOC | | 968 | EXTENT_DIRTY | EXTENT_DELALLOC | |
| 969 | EXTENT_DO_ACCOUNTING, 0, 0, | 969 | EXTENT_DO_ACCOUNTING, 0, 0, NULL, mask); |
| 970 | NULL, mask); | ||
| 971 | } | 970 | } |
| 972 | 971 | ||
| 973 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 972 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
| @@ -1435,9 +1434,6 @@ int extent_clear_unlock_delalloc(struct inode *inode, | |||
| 1435 | if (op & EXTENT_CLEAR_DELALLOC) | 1434 | if (op & EXTENT_CLEAR_DELALLOC) |
| 1436 | clear_bits |= EXTENT_DELALLOC; | 1435 | clear_bits |= EXTENT_DELALLOC; |
| 1437 | 1436 | ||
| 1438 | if (op & EXTENT_CLEAR_ACCOUNTING) | ||
| 1439 | clear_bits |= EXTENT_DO_ACCOUNTING; | ||
| 1440 | |||
| 1441 | clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); | 1437 | clear_extent_bit(tree, start, end, clear_bits, 1, 0, NULL, GFP_NOFS); |
| 1442 | if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | | 1438 | if (!(op & (EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | |
| 1443 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK | | 1439 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK | |
| @@ -1916,7 +1912,7 @@ static int submit_one_bio(int rw, struct bio *bio, int mirror_num, | |||
| 1916 | 1912 | ||
| 1917 | if (tree->ops && tree->ops->submit_bio_hook) | 1913 | if (tree->ops && tree->ops->submit_bio_hook) |
| 1918 | tree->ops->submit_bio_hook(page->mapping->host, rw, bio, | 1914 | tree->ops->submit_bio_hook(page->mapping->host, rw, bio, |
| 1919 | mirror_num, bio_flags); | 1915 | mirror_num, bio_flags, start); |
| 1920 | else | 1916 | else |
| 1921 | submit_bio(rw, bio); | 1917 | submit_bio(rw, bio); |
| 1922 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) | 1918 | if (bio_flagged(bio, BIO_EOPNOTSUPP)) |
| @@ -2020,6 +2016,7 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
| 2020 | sector_t sector; | 2016 | sector_t sector; |
| 2021 | struct extent_map *em; | 2017 | struct extent_map *em; |
| 2022 | struct block_device *bdev; | 2018 | struct block_device *bdev; |
| 2019 | struct btrfs_ordered_extent *ordered; | ||
| 2023 | int ret; | 2020 | int ret; |
| 2024 | int nr = 0; | 2021 | int nr = 0; |
| 2025 | size_t page_offset = 0; | 2022 | size_t page_offset = 0; |
| @@ -2031,7 +2028,15 @@ static int __extent_read_full_page(struct extent_io_tree *tree, | |||
| 2031 | set_page_extent_mapped(page); | 2028 | set_page_extent_mapped(page); |
| 2032 | 2029 | ||
| 2033 | end = page_end; | 2030 | end = page_end; |
| 2034 | lock_extent(tree, start, end, GFP_NOFS); | 2031 | while (1) { |
| 2032 | lock_extent(tree, start, end, GFP_NOFS); | ||
| 2033 | ordered = btrfs_lookup_ordered_extent(inode, start); | ||
| 2034 | if (!ordered) | ||
| 2035 | break; | ||
| 2036 | unlock_extent(tree, start, end, GFP_NOFS); | ||
| 2037 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
| 2038 | btrfs_put_ordered_extent(ordered); | ||
| 2039 | } | ||
| 2035 | 2040 | ||
| 2036 | if (page->index == last_byte >> PAGE_CACHE_SHIFT) { | 2041 | if (page->index == last_byte >> PAGE_CACHE_SHIFT) { |
| 2037 | char *userpage; | 2042 | char *userpage; |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index bbab4813646f..5691c7b590da 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
| @@ -16,7 +16,9 @@ | |||
| 16 | #define EXTENT_BOUNDARY (1 << 9) | 16 | #define EXTENT_BOUNDARY (1 << 9) |
| 17 | #define EXTENT_NODATASUM (1 << 10) | 17 | #define EXTENT_NODATASUM (1 << 10) |
| 18 | #define EXTENT_DO_ACCOUNTING (1 << 11) | 18 | #define EXTENT_DO_ACCOUNTING (1 << 11) |
| 19 | #define EXTENT_FIRST_DELALLOC (1 << 12) | ||
| 19 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) | 20 | #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) |
| 21 | #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING | EXTENT_FIRST_DELALLOC) | ||
| 20 | 22 | ||
| 21 | /* flags for bio submission */ | 23 | /* flags for bio submission */ |
| 22 | #define EXTENT_BIO_COMPRESSED 1 | 24 | #define EXTENT_BIO_COMPRESSED 1 |
| @@ -47,7 +49,7 @@ struct extent_state; | |||
| 47 | 49 | ||
| 48 | typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, | 50 | typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw, |
| 49 | struct bio *bio, int mirror_num, | 51 | struct bio *bio, int mirror_num, |
| 50 | unsigned long bio_flags); | 52 | unsigned long bio_flags, u64 bio_offset); |
| 51 | struct extent_io_ops { | 53 | struct extent_io_ops { |
| 52 | int (*fill_delalloc)(struct inode *inode, struct page *locked_page, | 54 | int (*fill_delalloc)(struct inode *inode, struct page *locked_page, |
| 53 | u64 start, u64 end, int *page_started, | 55 | u64 start, u64 end, int *page_started, |
| @@ -69,10 +71,10 @@ struct extent_io_ops { | |||
| 69 | struct extent_state *state); | 71 | struct extent_state *state); |
| 70 | int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, | 72 | int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, |
| 71 | struct extent_state *state, int uptodate); | 73 | struct extent_state *state, int uptodate); |
| 72 | int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, | 74 | int (*set_bit_hook)(struct inode *inode, struct extent_state *state, |
| 73 | unsigned long old, unsigned long bits); | 75 | int *bits); |
| 74 | int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, | 76 | int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, |
| 75 | unsigned long bits); | 77 | int *bits); |
| 76 | int (*merge_extent_hook)(struct inode *inode, | 78 | int (*merge_extent_hook)(struct inode *inode, |
| 77 | struct extent_state *new, | 79 | struct extent_state *new, |
| 78 | struct extent_state *other); | 80 | struct extent_state *other); |
| @@ -176,6 +178,7 @@ u64 count_range_bits(struct extent_io_tree *tree, | |||
| 176 | u64 *start, u64 search_end, | 178 | u64 *start, u64 search_end, |
| 177 | u64 max_bytes, unsigned long bits); | 179 | u64 max_bytes, unsigned long bits); |
| 178 | 180 | ||
| 181 | void free_extent_state(struct extent_state *state); | ||
| 179 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | 182 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, |
| 180 | int bits, int filled, struct extent_state *cached_state); | 183 | int bits, int filled, struct extent_state *cached_state); |
| 181 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 184 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
| @@ -185,6 +188,9 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
| 185 | gfp_t mask); | 188 | gfp_t mask); |
| 186 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 189 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
| 187 | int bits, gfp_t mask); | 190 | int bits, gfp_t mask); |
| 191 | int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | ||
| 192 | int bits, int exclusive_bits, u64 *failed_start, | ||
| 193 | struct extent_state **cached_state, gfp_t mask); | ||
| 188 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | 194 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
| 189 | gfp_t mask); | 195 | gfp_t mask); |
| 190 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 196 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 54a255065aa3..a562a250ae77 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
| @@ -149,13 +149,14 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, | |||
| 149 | } | 149 | } |
| 150 | 150 | ||
| 151 | 151 | ||
| 152 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | 152 | static int __btrfs_lookup_bio_sums(struct btrfs_root *root, |
| 153 | struct bio *bio, u32 *dst) | 153 | struct inode *inode, struct bio *bio, |
| 154 | u64 logical_offset, u32 *dst, int dio) | ||
| 154 | { | 155 | { |
| 155 | u32 sum; | 156 | u32 sum; |
| 156 | struct bio_vec *bvec = bio->bi_io_vec; | 157 | struct bio_vec *bvec = bio->bi_io_vec; |
| 157 | int bio_index = 0; | 158 | int bio_index = 0; |
| 158 | u64 offset; | 159 | u64 offset = 0; |
| 159 | u64 item_start_offset = 0; | 160 | u64 item_start_offset = 0; |
| 160 | u64 item_last_offset = 0; | 161 | u64 item_last_offset = 0; |
| 161 | u64 disk_bytenr; | 162 | u64 disk_bytenr; |
| @@ -174,8 +175,11 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | |||
| 174 | WARN_ON(bio->bi_vcnt <= 0); | 175 | WARN_ON(bio->bi_vcnt <= 0); |
| 175 | 176 | ||
| 176 | disk_bytenr = (u64)bio->bi_sector << 9; | 177 | disk_bytenr = (u64)bio->bi_sector << 9; |
| 178 | if (dio) | ||
| 179 | offset = logical_offset; | ||
| 177 | while (bio_index < bio->bi_vcnt) { | 180 | while (bio_index < bio->bi_vcnt) { |
| 178 | offset = page_offset(bvec->bv_page) + bvec->bv_offset; | 181 | if (!dio) |
| 182 | offset = page_offset(bvec->bv_page) + bvec->bv_offset; | ||
| 179 | ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum); | 183 | ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum); |
| 180 | if (ret == 0) | 184 | if (ret == 0) |
| 181 | goto found; | 185 | goto found; |
| @@ -238,6 +242,7 @@ found: | |||
| 238 | else | 242 | else |
| 239 | set_state_private(io_tree, offset, sum); | 243 | set_state_private(io_tree, offset, sum); |
| 240 | disk_bytenr += bvec->bv_len; | 244 | disk_bytenr += bvec->bv_len; |
| 245 | offset += bvec->bv_len; | ||
| 241 | bio_index++; | 246 | bio_index++; |
| 242 | bvec++; | 247 | bvec++; |
| 243 | } | 248 | } |
| @@ -245,6 +250,18 @@ found: | |||
| 245 | return 0; | 250 | return 0; |
| 246 | } | 251 | } |
| 247 | 252 | ||
| 253 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | ||
| 254 | struct bio *bio, u32 *dst) | ||
| 255 | { | ||
| 256 | return __btrfs_lookup_bio_sums(root, inode, bio, 0, dst, 0); | ||
| 257 | } | ||
| 258 | |||
| 259 | int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, | ||
| 260 | struct bio *bio, u64 offset, u32 *dst) | ||
| 261 | { | ||
| 262 | return __btrfs_lookup_bio_sums(root, inode, bio, offset, dst, 1); | ||
| 263 | } | ||
| 264 | |||
| 248 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | 265 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, |
| 249 | struct list_head *list) | 266 | struct list_head *list) |
| 250 | { | 267 | { |
| @@ -657,6 +674,9 @@ again: | |||
| 657 | goto found; | 674 | goto found; |
| 658 | } | 675 | } |
| 659 | ret = PTR_ERR(item); | 676 | ret = PTR_ERR(item); |
| 677 | if (ret != -EFBIG && ret != -ENOENT) | ||
| 678 | goto fail_unlock; | ||
| 679 | |||
| 660 | if (ret == -EFBIG) { | 680 | if (ret == -EFBIG) { |
| 661 | u32 item_size; | 681 | u32 item_size; |
| 662 | /* we found one, but it isn't big enough yet */ | 682 | /* we found one, but it isn't big enough yet */ |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 29ff749ff4ca..79437c5eeb1e 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
| @@ -46,32 +46,42 @@ | |||
| 46 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, | 46 | static noinline int btrfs_copy_from_user(loff_t pos, int num_pages, |
| 47 | int write_bytes, | 47 | int write_bytes, |
| 48 | struct page **prepared_pages, | 48 | struct page **prepared_pages, |
| 49 | const char __user *buf) | 49 | struct iov_iter *i) |
| 50 | { | 50 | { |
| 51 | long page_fault = 0; | 51 | size_t copied; |
| 52 | int i; | 52 | int pg = 0; |
| 53 | int offset = pos & (PAGE_CACHE_SIZE - 1); | 53 | int offset = pos & (PAGE_CACHE_SIZE - 1); |
| 54 | 54 | ||
| 55 | for (i = 0; i < num_pages && write_bytes > 0; i++, offset = 0) { | 55 | while (write_bytes > 0) { |
| 56 | size_t count = min_t(size_t, | 56 | size_t count = min_t(size_t, |
| 57 | PAGE_CACHE_SIZE - offset, write_bytes); | 57 | PAGE_CACHE_SIZE - offset, write_bytes); |
| 58 | struct page *page = prepared_pages[i]; | 58 | struct page *page = prepared_pages[pg]; |
| 59 | fault_in_pages_readable(buf, count); | 59 | again: |
| 60 | if (unlikely(iov_iter_fault_in_readable(i, count))) | ||
| 61 | return -EFAULT; | ||
| 60 | 62 | ||
| 61 | /* Copy data from userspace to the current page */ | 63 | /* Copy data from userspace to the current page */ |
| 62 | kmap(page); | 64 | copied = iov_iter_copy_from_user(page, i, offset, count); |
| 63 | page_fault = __copy_from_user(page_address(page) + offset, | 65 | |
| 64 | buf, count); | ||
| 65 | /* Flush processor's dcache for this page */ | 66 | /* Flush processor's dcache for this page */ |
| 66 | flush_dcache_page(page); | 67 | flush_dcache_page(page); |
| 67 | kunmap(page); | 68 | iov_iter_advance(i, copied); |
| 68 | buf += count; | 69 | write_bytes -= copied; |
| 69 | write_bytes -= count; | ||
| 70 | 70 | ||
| 71 | if (page_fault) | 71 | if (unlikely(copied == 0)) { |
| 72 | break; | 72 | count = min_t(size_t, PAGE_CACHE_SIZE - offset, |
| 73 | iov_iter_single_seg_count(i)); | ||
| 74 | goto again; | ||
| 75 | } | ||
| 76 | |||
| 77 | if (unlikely(copied < PAGE_CACHE_SIZE - offset)) { | ||
| 78 | offset += copied; | ||
| 79 | } else { | ||
| 80 | pg++; | ||
| 81 | offset = 0; | ||
| 82 | } | ||
| 73 | } | 83 | } |
| 74 | return page_fault ? -EFAULT : 0; | 84 | return 0; |
| 75 | } | 85 | } |
| 76 | 86 | ||
| 77 | /* | 87 | /* |
| @@ -126,8 +136,7 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
| 126 | end_of_last_block = start_pos + num_bytes - 1; | 136 | end_of_last_block = start_pos + num_bytes - 1; |
| 127 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, | 137 | err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block, |
| 128 | NULL); | 138 | NULL); |
| 129 | if (err) | 139 | BUG_ON(err); |
| 130 | return err; | ||
| 131 | 140 | ||
| 132 | for (i = 0; i < num_pages; i++) { | 141 | for (i = 0; i < num_pages; i++) { |
| 133 | struct page *p = pages[i]; | 142 | struct page *p = pages[i]; |
| @@ -142,7 +151,7 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans, | |||
| 142 | * at this time. | 151 | * at this time. |
| 143 | */ | 152 | */ |
| 144 | } | 153 | } |
| 145 | return err; | 154 | return 0; |
| 146 | } | 155 | } |
| 147 | 156 | ||
| 148 | /* | 157 | /* |
| @@ -823,45 +832,46 @@ again: | |||
| 823 | return 0; | 832 | return 0; |
| 824 | } | 833 | } |
| 825 | 834 | ||
| 826 | static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | 835 | static ssize_t btrfs_file_aio_write(struct kiocb *iocb, |
| 827 | size_t count, loff_t *ppos) | 836 | const struct iovec *iov, |
| 837 | unsigned long nr_segs, loff_t pos) | ||
| 828 | { | 838 | { |
| 829 | loff_t pos; | 839 | struct file *file = iocb->ki_filp; |
| 840 | struct inode *inode = fdentry(file)->d_inode; | ||
| 841 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 842 | struct page *pinned[2]; | ||
| 843 | struct page **pages = NULL; | ||
| 844 | struct iov_iter i; | ||
| 845 | loff_t *ppos = &iocb->ki_pos; | ||
| 830 | loff_t start_pos; | 846 | loff_t start_pos; |
| 831 | ssize_t num_written = 0; | 847 | ssize_t num_written = 0; |
| 832 | ssize_t err = 0; | 848 | ssize_t err = 0; |
| 849 | size_t count; | ||
| 850 | size_t ocount; | ||
| 833 | int ret = 0; | 851 | int ret = 0; |
| 834 | struct inode *inode = fdentry(file)->d_inode; | ||
| 835 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 836 | struct page **pages = NULL; | ||
| 837 | int nrptrs; | 852 | int nrptrs; |
| 838 | struct page *pinned[2]; | ||
| 839 | unsigned long first_index; | 853 | unsigned long first_index; |
| 840 | unsigned long last_index; | 854 | unsigned long last_index; |
| 841 | int will_write; | 855 | int will_write; |
| 856 | int buffered = 0; | ||
| 842 | 857 | ||
| 843 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || | 858 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || |
| 844 | (file->f_flags & O_DIRECT)); | 859 | (file->f_flags & O_DIRECT)); |
| 845 | 860 | ||
| 846 | nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, | ||
| 847 | PAGE_CACHE_SIZE / (sizeof(struct page *))); | ||
| 848 | pinned[0] = NULL; | 861 | pinned[0] = NULL; |
| 849 | pinned[1] = NULL; | 862 | pinned[1] = NULL; |
| 850 | 863 | ||
| 851 | pos = *ppos; | ||
| 852 | start_pos = pos; | 864 | start_pos = pos; |
| 853 | 865 | ||
| 854 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); | 866 | vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); |
| 855 | 867 | ||
| 856 | /* do the reserve before the mutex lock in case we have to do some | ||
| 857 | * flushing. We wouldn't deadlock, but this is more polite. | ||
| 858 | */ | ||
| 859 | err = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
| 860 | if (err) | ||
| 861 | goto out_nolock; | ||
| 862 | |||
| 863 | mutex_lock(&inode->i_mutex); | 868 | mutex_lock(&inode->i_mutex); |
| 864 | 869 | ||
| 870 | err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); | ||
| 871 | if (err) | ||
| 872 | goto out; | ||
| 873 | count = ocount; | ||
| 874 | |||
| 865 | current->backing_dev_info = inode->i_mapping->backing_dev_info; | 875 | current->backing_dev_info = inode->i_mapping->backing_dev_info; |
| 866 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); | 876 | err = generic_write_checks(file, &pos, &count, S_ISBLK(inode->i_mode)); |
| 867 | if (err) | 877 | if (err) |
| @@ -875,15 +885,53 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
| 875 | goto out; | 885 | goto out; |
| 876 | 886 | ||
| 877 | file_update_time(file); | 887 | file_update_time(file); |
| 888 | BTRFS_I(inode)->sequence++; | ||
| 889 | |||
| 890 | if (unlikely(file->f_flags & O_DIRECT)) { | ||
| 891 | num_written = generic_file_direct_write(iocb, iov, &nr_segs, | ||
| 892 | pos, ppos, count, | ||
| 893 | ocount); | ||
| 894 | /* | ||
| 895 | * the generic O_DIRECT will update in-memory i_size after the | ||
| 896 | * DIOs are done. But our endio handlers that update the on | ||
| 897 | * disk i_size never update past the in memory i_size. So we | ||
| 898 | * need one more update here to catch any additions to the | ||
| 899 | * file | ||
| 900 | */ | ||
| 901 | if (inode->i_size != BTRFS_I(inode)->disk_i_size) { | ||
| 902 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | ||
| 903 | mark_inode_dirty(inode); | ||
| 904 | } | ||
| 878 | 905 | ||
| 906 | if (num_written < 0) { | ||
| 907 | ret = num_written; | ||
| 908 | num_written = 0; | ||
| 909 | goto out; | ||
| 910 | } else if (num_written == count) { | ||
| 911 | /* pick up pos changes done by the generic code */ | ||
| 912 | pos = *ppos; | ||
| 913 | goto out; | ||
| 914 | } | ||
| 915 | /* | ||
| 916 | * We are going to do buffered for the rest of the range, so we | ||
| 917 | * need to make sure to invalidate the buffered pages when we're | ||
| 918 | * done. | ||
| 919 | */ | ||
| 920 | buffered = 1; | ||
| 921 | pos += num_written; | ||
| 922 | } | ||
| 923 | |||
| 924 | iov_iter_init(&i, iov, nr_segs, count, num_written); | ||
| 925 | nrptrs = min((iov_iter_count(&i) + PAGE_CACHE_SIZE - 1) / | ||
| 926 | PAGE_CACHE_SIZE, PAGE_CACHE_SIZE / | ||
| 927 | (sizeof(struct page *))); | ||
| 879 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | 928 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); |
| 880 | 929 | ||
| 881 | /* generic_write_checks can change our pos */ | 930 | /* generic_write_checks can change our pos */ |
| 882 | start_pos = pos; | 931 | start_pos = pos; |
| 883 | 932 | ||
| 884 | BTRFS_I(inode)->sequence++; | ||
| 885 | first_index = pos >> PAGE_CACHE_SHIFT; | 933 | first_index = pos >> PAGE_CACHE_SHIFT; |
| 886 | last_index = (pos + count) >> PAGE_CACHE_SHIFT; | 934 | last_index = (pos + iov_iter_count(&i)) >> PAGE_CACHE_SHIFT; |
| 887 | 935 | ||
| 888 | /* | 936 | /* |
| 889 | * there are lots of better ways to do this, but this code | 937 | * there are lots of better ways to do this, but this code |
| @@ -900,7 +948,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
| 900 | unlock_page(pinned[0]); | 948 | unlock_page(pinned[0]); |
| 901 | } | 949 | } |
| 902 | } | 950 | } |
| 903 | if ((pos + count) & (PAGE_CACHE_SIZE - 1)) { | 951 | if ((pos + iov_iter_count(&i)) & (PAGE_CACHE_SIZE - 1)) { |
| 904 | pinned[1] = grab_cache_page(inode->i_mapping, last_index); | 952 | pinned[1] = grab_cache_page(inode->i_mapping, last_index); |
| 905 | if (!PageUptodate(pinned[1])) { | 953 | if (!PageUptodate(pinned[1])) { |
| 906 | ret = btrfs_readpage(NULL, pinned[1]); | 954 | ret = btrfs_readpage(NULL, pinned[1]); |
| @@ -911,10 +959,10 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
| 911 | } | 959 | } |
| 912 | } | 960 | } |
| 913 | 961 | ||
| 914 | while (count > 0) { | 962 | while (iov_iter_count(&i) > 0) { |
| 915 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); | 963 | size_t offset = pos & (PAGE_CACHE_SIZE - 1); |
| 916 | size_t write_bytes = min(count, nrptrs * | 964 | size_t write_bytes = min(iov_iter_count(&i), |
| 917 | (size_t)PAGE_CACHE_SIZE - | 965 | nrptrs * (size_t)PAGE_CACHE_SIZE - |
| 918 | offset); | 966 | offset); |
| 919 | size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> | 967 | size_t num_pages = (write_bytes + PAGE_CACHE_SIZE - 1) >> |
| 920 | PAGE_CACHE_SHIFT; | 968 | PAGE_CACHE_SHIFT; |
| @@ -922,7 +970,7 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
| 922 | WARN_ON(num_pages > nrptrs); | 970 | WARN_ON(num_pages > nrptrs); |
| 923 | memset(pages, 0, sizeof(struct page *) * nrptrs); | 971 | memset(pages, 0, sizeof(struct page *) * nrptrs); |
| 924 | 972 | ||
| 925 | ret = btrfs_check_data_free_space(root, inode, write_bytes); | 973 | ret = btrfs_delalloc_reserve_space(inode, write_bytes); |
| 926 | if (ret) | 974 | if (ret) |
| 927 | goto out; | 975 | goto out; |
| 928 | 976 | ||
| @@ -930,26 +978,20 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
| 930 | pos, first_index, last_index, | 978 | pos, first_index, last_index, |
| 931 | write_bytes); | 979 | write_bytes); |
| 932 | if (ret) { | 980 | if (ret) { |
| 933 | btrfs_free_reserved_data_space(root, inode, | 981 | btrfs_delalloc_release_space(inode, write_bytes); |
| 934 | write_bytes); | ||
| 935 | goto out; | 982 | goto out; |
| 936 | } | 983 | } |
| 937 | 984 | ||
| 938 | ret = btrfs_copy_from_user(pos, num_pages, | 985 | ret = btrfs_copy_from_user(pos, num_pages, |
| 939 | write_bytes, pages, buf); | 986 | write_bytes, pages, &i); |
| 940 | if (ret) { | 987 | if (ret == 0) { |
| 941 | btrfs_free_reserved_data_space(root, inode, | 988 | dirty_and_release_pages(NULL, root, file, pages, |
| 942 | write_bytes); | 989 | num_pages, pos, write_bytes); |
| 943 | btrfs_drop_pages(pages, num_pages); | ||
| 944 | goto out; | ||
| 945 | } | 990 | } |
| 946 | 991 | ||
| 947 | ret = dirty_and_release_pages(NULL, root, file, pages, | ||
| 948 | num_pages, pos, write_bytes); | ||
| 949 | btrfs_drop_pages(pages, num_pages); | 992 | btrfs_drop_pages(pages, num_pages); |
| 950 | if (ret) { | 993 | if (ret) { |
| 951 | btrfs_free_reserved_data_space(root, inode, | 994 | btrfs_delalloc_release_space(inode, write_bytes); |
| 952 | write_bytes); | ||
| 953 | goto out; | 995 | goto out; |
| 954 | } | 996 | } |
| 955 | 997 | ||
| @@ -965,8 +1007,6 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
| 965 | btrfs_throttle(root); | 1007 | btrfs_throttle(root); |
| 966 | } | 1008 | } |
| 967 | 1009 | ||
| 968 | buf += write_bytes; | ||
| 969 | count -= write_bytes; | ||
| 970 | pos += write_bytes; | 1010 | pos += write_bytes; |
| 971 | num_written += write_bytes; | 1011 | num_written += write_bytes; |
| 972 | 1012 | ||
| @@ -976,9 +1016,7 @@ out: | |||
| 976 | mutex_unlock(&inode->i_mutex); | 1016 | mutex_unlock(&inode->i_mutex); |
| 977 | if (ret) | 1017 | if (ret) |
| 978 | err = ret; | 1018 | err = ret; |
| 979 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
| 980 | 1019 | ||
| 981 | out_nolock: | ||
| 982 | kfree(pages); | 1020 | kfree(pages); |
| 983 | if (pinned[0]) | 1021 | if (pinned[0]) |
| 984 | page_cache_release(pinned[0]); | 1022 | page_cache_release(pinned[0]); |
| @@ -1008,7 +1046,7 @@ out_nolock: | |||
| 1008 | num_written = err; | 1046 | num_written = err; |
| 1009 | 1047 | ||
| 1010 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { | 1048 | if ((file->f_flags & O_DSYNC) || IS_SYNC(inode)) { |
| 1011 | trans = btrfs_start_transaction(root, 1); | 1049 | trans = btrfs_start_transaction(root, 0); |
| 1012 | ret = btrfs_log_dentry_safe(trans, root, | 1050 | ret = btrfs_log_dentry_safe(trans, root, |
| 1013 | file->f_dentry); | 1051 | file->f_dentry); |
| 1014 | if (ret == 0) { | 1052 | if (ret == 0) { |
| @@ -1023,7 +1061,7 @@ out_nolock: | |||
| 1023 | btrfs_end_transaction(trans, root); | 1061 | btrfs_end_transaction(trans, root); |
| 1024 | } | 1062 | } |
| 1025 | } | 1063 | } |
| 1026 | if (file->f_flags & O_DIRECT) { | 1064 | if (file->f_flags & O_DIRECT && buffered) { |
| 1027 | invalidate_mapping_pages(inode->i_mapping, | 1065 | invalidate_mapping_pages(inode->i_mapping, |
| 1028 | start_pos >> PAGE_CACHE_SHIFT, | 1066 | start_pos >> PAGE_CACHE_SHIFT, |
| 1029 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); | 1067 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); |
| @@ -1104,9 +1142,9 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync) | |||
| 1104 | if (file && file->private_data) | 1142 | if (file && file->private_data) |
| 1105 | btrfs_ioctl_trans_end(file); | 1143 | btrfs_ioctl_trans_end(file); |
| 1106 | 1144 | ||
| 1107 | trans = btrfs_start_transaction(root, 1); | 1145 | trans = btrfs_start_transaction(root, 0); |
| 1108 | if (!trans) { | 1146 | if (IS_ERR(trans)) { |
| 1109 | ret = -ENOMEM; | 1147 | ret = PTR_ERR(trans); |
| 1110 | goto out; | 1148 | goto out; |
| 1111 | } | 1149 | } |
| 1112 | 1150 | ||
| @@ -1161,7 +1199,7 @@ const struct file_operations btrfs_file_operations = { | |||
| 1161 | .read = do_sync_read, | 1199 | .read = do_sync_read, |
| 1162 | .aio_read = generic_file_aio_read, | 1200 | .aio_read = generic_file_aio_read, |
| 1163 | .splice_read = generic_file_splice_read, | 1201 | .splice_read = generic_file_splice_read, |
| 1164 | .write = btrfs_file_write, | 1202 | .aio_write = btrfs_file_aio_write, |
| 1165 | .mmap = btrfs_file_mmap, | 1203 | .mmap = btrfs_file_mmap, |
| 1166 | .open = generic_file_open, | 1204 | .open = generic_file_open, |
| 1167 | .release = btrfs_release_file, | 1205 | .release = btrfs_release_file, |
diff --git a/fs/btrfs/inode-item.c b/fs/btrfs/inode-item.c index 72ce3c173d6a..64f1150bb48d 100644 --- a/fs/btrfs/inode-item.c +++ b/fs/btrfs/inode-item.c | |||
| @@ -49,6 +49,33 @@ static int find_name_in_backref(struct btrfs_path *path, const char *name, | |||
| 49 | return 0; | 49 | return 0; |
| 50 | } | 50 | } |
| 51 | 51 | ||
| 52 | struct btrfs_inode_ref * | ||
| 53 | btrfs_lookup_inode_ref(struct btrfs_trans_handle *trans, | ||
| 54 | struct btrfs_root *root, | ||
| 55 | struct btrfs_path *path, | ||
| 56 | const char *name, int name_len, | ||
| 57 | u64 inode_objectid, u64 ref_objectid, int mod) | ||
| 58 | { | ||
| 59 | struct btrfs_key key; | ||
| 60 | struct btrfs_inode_ref *ref; | ||
| 61 | int ins_len = mod < 0 ? -1 : 0; | ||
| 62 | int cow = mod != 0; | ||
| 63 | int ret; | ||
| 64 | |||
| 65 | key.objectid = inode_objectid; | ||
| 66 | key.type = BTRFS_INODE_REF_KEY; | ||
| 67 | key.offset = ref_objectid; | ||
| 68 | |||
| 69 | ret = btrfs_search_slot(trans, root, &key, path, ins_len, cow); | ||
| 70 | if (ret < 0) | ||
| 71 | return ERR_PTR(ret); | ||
| 72 | if (ret > 0) | ||
| 73 | return NULL; | ||
| 74 | if (!find_name_in_backref(path, name, name_len, &ref)) | ||
| 75 | return NULL; | ||
| 76 | return ref; | ||
| 77 | } | ||
| 78 | |||
| 52 | int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, | 79 | int btrfs_del_inode_ref(struct btrfs_trans_handle *trans, |
| 53 | struct btrfs_root *root, | 80 | struct btrfs_root *root, |
| 54 | const char *name, int name_len, | 81 | const char *name, int name_len, |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d601629b85d1..fa6ccc1bfe2a 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
| @@ -252,6 +252,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans, | |||
| 252 | inline_len, compressed_size, | 252 | inline_len, compressed_size, |
| 253 | compressed_pages); | 253 | compressed_pages); |
| 254 | BUG_ON(ret); | 254 | BUG_ON(ret); |
| 255 | btrfs_delalloc_release_metadata(inode, end + 1 - start); | ||
| 255 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); | 256 | btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); |
| 256 | return 0; | 257 | return 0; |
| 257 | } | 258 | } |
| @@ -414,6 +415,7 @@ again: | |||
| 414 | trans = btrfs_join_transaction(root, 1); | 415 | trans = btrfs_join_transaction(root, 1); |
| 415 | BUG_ON(!trans); | 416 | BUG_ON(!trans); |
| 416 | btrfs_set_trans_block_group(trans, inode); | 417 | btrfs_set_trans_block_group(trans, inode); |
| 418 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
| 417 | 419 | ||
| 418 | /* lets try to make an inline extent */ | 420 | /* lets try to make an inline extent */ |
| 419 | if (ret || total_in < (actual_end - start)) { | 421 | if (ret || total_in < (actual_end - start)) { |
| @@ -439,7 +441,6 @@ again: | |||
| 439 | start, end, NULL, | 441 | start, end, NULL, |
| 440 | EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | | 442 | EXTENT_CLEAR_UNLOCK_PAGE | EXTENT_CLEAR_DIRTY | |
| 441 | EXTENT_CLEAR_DELALLOC | | 443 | EXTENT_CLEAR_DELALLOC | |
| 442 | EXTENT_CLEAR_ACCOUNTING | | ||
| 443 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); | 444 | EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK); |
| 444 | 445 | ||
| 445 | btrfs_end_transaction(trans, root); | 446 | btrfs_end_transaction(trans, root); |
| @@ -697,6 +698,38 @@ retry: | |||
| 697 | return 0; | 698 | return 0; |
| 698 | } | 699 | } |
| 699 | 700 | ||
| 701 | static u64 get_extent_allocation_hint(struct inode *inode, u64 start, | ||
| 702 | u64 num_bytes) | ||
| 703 | { | ||
| 704 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 705 | struct extent_map *em; | ||
| 706 | u64 alloc_hint = 0; | ||
| 707 | |||
| 708 | read_lock(&em_tree->lock); | ||
| 709 | em = search_extent_mapping(em_tree, start, num_bytes); | ||
| 710 | if (em) { | ||
| 711 | /* | ||
| 712 | * if block start isn't an actual block number then find the | ||
| 713 | * first block in this inode and use that as a hint. If that | ||
| 714 | * block is also bogus then just don't worry about it. | ||
| 715 | */ | ||
| 716 | if (em->block_start >= EXTENT_MAP_LAST_BYTE) { | ||
| 717 | free_extent_map(em); | ||
| 718 | em = search_extent_mapping(em_tree, 0, 0); | ||
| 719 | if (em && em->block_start < EXTENT_MAP_LAST_BYTE) | ||
| 720 | alloc_hint = em->block_start; | ||
| 721 | if (em) | ||
| 722 | free_extent_map(em); | ||
| 723 | } else { | ||
| 724 | alloc_hint = em->block_start; | ||
| 725 | free_extent_map(em); | ||
| 726 | } | ||
| 727 | } | ||
| 728 | read_unlock(&em_tree->lock); | ||
| 729 | |||
| 730 | return alloc_hint; | ||
| 731 | } | ||
| 732 | |||
| 700 | /* | 733 | /* |
| 701 | * when extent_io.c finds a delayed allocation range in the file, | 734 | * when extent_io.c finds a delayed allocation range in the file, |
| 702 | * the call backs end up in this code. The basic idea is to | 735 | * the call backs end up in this code. The basic idea is to |
| @@ -734,6 +767,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 734 | trans = btrfs_join_transaction(root, 1); | 767 | trans = btrfs_join_transaction(root, 1); |
| 735 | BUG_ON(!trans); | 768 | BUG_ON(!trans); |
| 736 | btrfs_set_trans_block_group(trans, inode); | 769 | btrfs_set_trans_block_group(trans, inode); |
| 770 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
| 737 | 771 | ||
| 738 | actual_end = min_t(u64, isize, end + 1); | 772 | actual_end = min_t(u64, isize, end + 1); |
| 739 | 773 | ||
| @@ -753,7 +787,6 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 753 | EXTENT_CLEAR_UNLOCK_PAGE | | 787 | EXTENT_CLEAR_UNLOCK_PAGE | |
| 754 | EXTENT_CLEAR_UNLOCK | | 788 | EXTENT_CLEAR_UNLOCK | |
| 755 | EXTENT_CLEAR_DELALLOC | | 789 | EXTENT_CLEAR_DELALLOC | |
| 756 | EXTENT_CLEAR_ACCOUNTING | | ||
| 757 | EXTENT_CLEAR_DIRTY | | 790 | EXTENT_CLEAR_DIRTY | |
| 758 | EXTENT_SET_WRITEBACK | | 791 | EXTENT_SET_WRITEBACK | |
| 759 | EXTENT_END_WRITEBACK); | 792 | EXTENT_END_WRITEBACK); |
| @@ -769,29 +802,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
| 769 | BUG_ON(disk_num_bytes > | 802 | BUG_ON(disk_num_bytes > |
| 770 | btrfs_super_total_bytes(&root->fs_info->super_copy)); | 803 | btrfs_super_total_bytes(&root->fs_info->super_copy)); |
| 771 | 804 | ||
| 772 | 805 | alloc_hint = get_extent_allocation_hint(inode, start, num_bytes); | |
| 773 | read_lock(&BTRFS_I(inode)->extent_tree.lock); | ||
| 774 | em = search_extent_mapping(&BTRFS_I(inode)->extent_tree, | ||
| 775 | start, num_bytes); | ||
| 776 | if (em) { | ||
| 777 | /* | ||
| 778 | * if block start isn't an actual block number then find the | ||
| 779 | * first block in this inode and use that as a hint. If that | ||
| 780 | * block is also bogus then just don't worry about it. | ||
| 781 | */ | ||
| 782 | if (em->block_start >= EXTENT_MAP_LAST_BYTE) { | ||
| 783 | free_extent_map(em); | ||
| 784 | em = search_extent_mapping(em_tree, 0, 0); | ||
| 785 | if (em && em->block_start < EXTENT_MAP_LAST_BYTE) | ||
| 786 | alloc_hint = em->block_start; | ||
| 787 | if (em) | ||
| 788 | free_extent_map(em); | ||
| 789 | } else { | ||
| 790 | alloc_hint = em->block_start; | ||
| 791 | free_extent_map(em); | ||
| 792 | } | ||
| 793 | } | ||
| 794 | read_unlock(&BTRFS_I(inode)->extent_tree.lock); | ||
| 795 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); | 806 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); |
| 796 | 807 | ||
| 797 | while (disk_num_bytes > 0) { | 808 | while (disk_num_bytes > 0) { |
| @@ -1174,6 +1185,13 @@ out_check: | |||
| 1174 | num_bytes, num_bytes, type); | 1185 | num_bytes, num_bytes, type); |
| 1175 | BUG_ON(ret); | 1186 | BUG_ON(ret); |
| 1176 | 1187 | ||
| 1188 | if (root->root_key.objectid == | ||
| 1189 | BTRFS_DATA_RELOC_TREE_OBJECTID) { | ||
| 1190 | ret = btrfs_reloc_clone_csums(inode, cur_offset, | ||
| 1191 | num_bytes); | ||
| 1192 | BUG_ON(ret); | ||
| 1193 | } | ||
| 1194 | |||
| 1177 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, | 1195 | extent_clear_unlock_delalloc(inode, &BTRFS_I(inode)->io_tree, |
| 1178 | cur_offset, cur_offset + num_bytes - 1, | 1196 | cur_offset, cur_offset + num_bytes - 1, |
| 1179 | locked_page, EXTENT_CLEAR_UNLOCK_PAGE | | 1197 | locked_page, EXTENT_CLEAR_UNLOCK_PAGE | |
| @@ -1226,15 +1244,13 @@ static int run_delalloc_range(struct inode *inode, struct page *locked_page, | |||
| 1226 | } | 1244 | } |
| 1227 | 1245 | ||
| 1228 | static int btrfs_split_extent_hook(struct inode *inode, | 1246 | static int btrfs_split_extent_hook(struct inode *inode, |
| 1229 | struct extent_state *orig, u64 split) | 1247 | struct extent_state *orig, u64 split) |
| 1230 | { | 1248 | { |
| 1249 | /* not delalloc, ignore it */ | ||
| 1231 | if (!(orig->state & EXTENT_DELALLOC)) | 1250 | if (!(orig->state & EXTENT_DELALLOC)) |
| 1232 | return 0; | 1251 | return 0; |
| 1233 | 1252 | ||
| 1234 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 1253 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); |
| 1235 | BTRFS_I(inode)->outstanding_extents++; | ||
| 1236 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 1237 | |||
| 1238 | return 0; | 1254 | return 0; |
| 1239 | } | 1255 | } |
| 1240 | 1256 | ||
| @@ -1252,10 +1268,7 @@ static int btrfs_merge_extent_hook(struct inode *inode, | |||
| 1252 | if (!(other->state & EXTENT_DELALLOC)) | 1268 | if (!(other->state & EXTENT_DELALLOC)) |
| 1253 | return 0; | 1269 | return 0; |
| 1254 | 1270 | ||
| 1255 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 1271 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); |
| 1256 | BTRFS_I(inode)->outstanding_extents--; | ||
| 1257 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 1258 | |||
| 1259 | return 0; | 1272 | return 0; |
| 1260 | } | 1273 | } |
| 1261 | 1274 | ||
| @@ -1264,8 +1277,8 @@ static int btrfs_merge_extent_hook(struct inode *inode, | |||
| 1264 | * bytes in this file, and to maintain the list of inodes that | 1277 | * bytes in this file, and to maintain the list of inodes that |
| 1265 | * have pending delalloc work to be done. | 1278 | * have pending delalloc work to be done. |
| 1266 | */ | 1279 | */ |
| 1267 | static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | 1280 | static int btrfs_set_bit_hook(struct inode *inode, |
| 1268 | unsigned long old, unsigned long bits) | 1281 | struct extent_state *state, int *bits) |
| 1269 | { | 1282 | { |
| 1270 | 1283 | ||
| 1271 | /* | 1284 | /* |
| @@ -1273,17 +1286,18 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | |||
| 1273 | * but in this case, we are only testeing for the DELALLOC | 1286 | * but in this case, we are only testeing for the DELALLOC |
| 1274 | * bit, which is only set or cleared with irqs on | 1287 | * bit, which is only set or cleared with irqs on |
| 1275 | */ | 1288 | */ |
| 1276 | if (!(old & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { | 1289 | if (!(state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { |
| 1277 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1290 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1291 | u64 len = state->end + 1 - state->start; | ||
| 1278 | 1292 | ||
| 1279 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 1293 | if (*bits & EXTENT_FIRST_DELALLOC) |
| 1280 | BTRFS_I(inode)->outstanding_extents++; | 1294 | *bits &= ~EXTENT_FIRST_DELALLOC; |
| 1281 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | 1295 | else |
| 1282 | btrfs_delalloc_reserve_space(root, inode, end - start + 1); | 1296 | atomic_inc(&BTRFS_I(inode)->outstanding_extents); |
| 1283 | 1297 | ||
| 1284 | spin_lock(&root->fs_info->delalloc_lock); | 1298 | spin_lock(&root->fs_info->delalloc_lock); |
| 1285 | BTRFS_I(inode)->delalloc_bytes += end - start + 1; | 1299 | BTRFS_I(inode)->delalloc_bytes += len; |
| 1286 | root->fs_info->delalloc_bytes += end - start + 1; | 1300 | root->fs_info->delalloc_bytes += len; |
| 1287 | if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | 1301 | if (list_empty(&BTRFS_I(inode)->delalloc_inodes)) { |
| 1288 | list_add_tail(&BTRFS_I(inode)->delalloc_inodes, | 1302 | list_add_tail(&BTRFS_I(inode)->delalloc_inodes, |
| 1289 | &root->fs_info->delalloc_inodes); | 1303 | &root->fs_info->delalloc_inodes); |
| @@ -1297,45 +1311,32 @@ static int btrfs_set_bit_hook(struct inode *inode, u64 start, u64 end, | |||
| 1297 | * extent_io.c clear_bit_hook, see set_bit_hook for why | 1311 | * extent_io.c clear_bit_hook, see set_bit_hook for why |
| 1298 | */ | 1312 | */ |
| 1299 | static int btrfs_clear_bit_hook(struct inode *inode, | 1313 | static int btrfs_clear_bit_hook(struct inode *inode, |
| 1300 | struct extent_state *state, unsigned long bits) | 1314 | struct extent_state *state, int *bits) |
| 1301 | { | 1315 | { |
| 1302 | /* | 1316 | /* |
| 1303 | * set_bit and clear bit hooks normally require _irqsave/restore | 1317 | * set_bit and clear bit hooks normally require _irqsave/restore |
| 1304 | * but in this case, we are only testeing for the DELALLOC | 1318 | * but in this case, we are only testeing for the DELALLOC |
| 1305 | * bit, which is only set or cleared with irqs on | 1319 | * bit, which is only set or cleared with irqs on |
| 1306 | */ | 1320 | */ |
| 1307 | if ((state->state & EXTENT_DELALLOC) && (bits & EXTENT_DELALLOC)) { | 1321 | if ((state->state & EXTENT_DELALLOC) && (*bits & EXTENT_DELALLOC)) { |
| 1308 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1322 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1323 | u64 len = state->end + 1 - state->start; | ||
| 1309 | 1324 | ||
| 1310 | if (bits & EXTENT_DO_ACCOUNTING) { | 1325 | if (*bits & EXTENT_FIRST_DELALLOC) |
| 1311 | spin_lock(&BTRFS_I(inode)->accounting_lock); | 1326 | *bits &= ~EXTENT_FIRST_DELALLOC; |
| 1312 | WARN_ON(!BTRFS_I(inode)->outstanding_extents); | 1327 | else if (!(*bits & EXTENT_DO_ACCOUNTING)) |
| 1313 | BTRFS_I(inode)->outstanding_extents--; | 1328 | atomic_dec(&BTRFS_I(inode)->outstanding_extents); |
| 1314 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | 1329 | |
| 1315 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | 1330 | if (*bits & EXTENT_DO_ACCOUNTING) |
| 1316 | } | 1331 | btrfs_delalloc_release_metadata(inode, len); |
| 1332 | |||
| 1333 | if (root->root_key.objectid != BTRFS_DATA_RELOC_TREE_OBJECTID) | ||
| 1334 | btrfs_free_reserved_data_space(inode, len); | ||
| 1317 | 1335 | ||
| 1318 | spin_lock(&root->fs_info->delalloc_lock); | 1336 | spin_lock(&root->fs_info->delalloc_lock); |
| 1319 | if (state->end - state->start + 1 > | 1337 | root->fs_info->delalloc_bytes -= len; |
| 1320 | root->fs_info->delalloc_bytes) { | 1338 | BTRFS_I(inode)->delalloc_bytes -= len; |
| 1321 | printk(KERN_INFO "btrfs warning: delalloc account " | 1339 | |
| 1322 | "%llu %llu\n", | ||
| 1323 | (unsigned long long) | ||
| 1324 | state->end - state->start + 1, | ||
| 1325 | (unsigned long long) | ||
| 1326 | root->fs_info->delalloc_bytes); | ||
| 1327 | btrfs_delalloc_free_space(root, inode, (u64)-1); | ||
| 1328 | root->fs_info->delalloc_bytes = 0; | ||
| 1329 | BTRFS_I(inode)->delalloc_bytes = 0; | ||
| 1330 | } else { | ||
| 1331 | btrfs_delalloc_free_space(root, inode, | ||
| 1332 | state->end - | ||
| 1333 | state->start + 1); | ||
| 1334 | root->fs_info->delalloc_bytes -= state->end - | ||
| 1335 | state->start + 1; | ||
| 1336 | BTRFS_I(inode)->delalloc_bytes -= state->end - | ||
| 1337 | state->start + 1; | ||
| 1338 | } | ||
| 1339 | if (BTRFS_I(inode)->delalloc_bytes == 0 && | 1340 | if (BTRFS_I(inode)->delalloc_bytes == 0 && |
| 1340 | !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { | 1341 | !list_empty(&BTRFS_I(inode)->delalloc_inodes)) { |
| 1341 | list_del_init(&BTRFS_I(inode)->delalloc_inodes); | 1342 | list_del_init(&BTRFS_I(inode)->delalloc_inodes); |
| @@ -1384,7 +1385,8 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | |||
| 1384 | */ | 1385 | */ |
| 1385 | static int __btrfs_submit_bio_start(struct inode *inode, int rw, | 1386 | static int __btrfs_submit_bio_start(struct inode *inode, int rw, |
| 1386 | struct bio *bio, int mirror_num, | 1387 | struct bio *bio, int mirror_num, |
| 1387 | unsigned long bio_flags) | 1388 | unsigned long bio_flags, |
| 1389 | u64 bio_offset) | ||
| 1388 | { | 1390 | { |
| 1389 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1391 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1390 | int ret = 0; | 1392 | int ret = 0; |
| @@ -1403,7 +1405,8 @@ static int __btrfs_submit_bio_start(struct inode *inode, int rw, | |||
| 1403 | * are inserted into the btree | 1405 | * are inserted into the btree |
| 1404 | */ | 1406 | */ |
| 1405 | static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | 1407 | static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, |
| 1406 | int mirror_num, unsigned long bio_flags) | 1408 | int mirror_num, unsigned long bio_flags, |
| 1409 | u64 bio_offset) | ||
| 1407 | { | 1410 | { |
| 1408 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1411 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1409 | return btrfs_map_bio(root, rw, bio, mirror_num, 1); | 1412 | return btrfs_map_bio(root, rw, bio, mirror_num, 1); |
| @@ -1414,7 +1417,8 @@ static int __btrfs_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | |||
| 1414 | * on write, or reading the csums from the tree before a read | 1417 | * on write, or reading the csums from the tree before a read |
| 1415 | */ | 1418 | */ |
| 1416 | static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | 1419 | static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, |
| 1417 | int mirror_num, unsigned long bio_flags) | 1420 | int mirror_num, unsigned long bio_flags, |
| 1421 | u64 bio_offset) | ||
| 1418 | { | 1422 | { |
| 1419 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1423 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1420 | int ret = 0; | 1424 | int ret = 0; |
| @@ -1439,7 +1443,8 @@ static int btrfs_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
| 1439 | /* we're doing a write, do the async checksumming */ | 1443 | /* we're doing a write, do the async checksumming */ |
| 1440 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | 1444 | return btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, |
| 1441 | inode, rw, bio, mirror_num, | 1445 | inode, rw, bio, mirror_num, |
| 1442 | bio_flags, __btrfs_submit_bio_start, | 1446 | bio_flags, bio_offset, |
| 1447 | __btrfs_submit_bio_start, | ||
| 1443 | __btrfs_submit_bio_done); | 1448 | __btrfs_submit_bio_done); |
| 1444 | } | 1449 | } |
| 1445 | 1450 | ||
| @@ -1520,6 +1525,7 @@ again: | |||
| 1520 | goto again; | 1525 | goto again; |
| 1521 | } | 1526 | } |
| 1522 | 1527 | ||
| 1528 | BUG(); | ||
| 1523 | btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state); | 1529 | btrfs_set_extent_delalloc(inode, page_start, page_end, &cached_state); |
| 1524 | ClearPageChecked(page); | 1530 | ClearPageChecked(page); |
| 1525 | out: | 1531 | out: |
| @@ -1650,7 +1656,7 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans, | |||
| 1650 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | 1656 | static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) |
| 1651 | { | 1657 | { |
| 1652 | struct btrfs_root *root = BTRFS_I(inode)->root; | 1658 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 1653 | struct btrfs_trans_handle *trans; | 1659 | struct btrfs_trans_handle *trans = NULL; |
| 1654 | struct btrfs_ordered_extent *ordered_extent = NULL; | 1660 | struct btrfs_ordered_extent *ordered_extent = NULL; |
| 1655 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 1661 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
| 1656 | struct extent_state *cached_state = NULL; | 1662 | struct extent_state *cached_state = NULL; |
| @@ -1668,9 +1674,10 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
| 1668 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1674 | ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
| 1669 | if (!ret) { | 1675 | if (!ret) { |
| 1670 | trans = btrfs_join_transaction(root, 1); | 1676 | trans = btrfs_join_transaction(root, 1); |
| 1677 | btrfs_set_trans_block_group(trans, inode); | ||
| 1678 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
| 1671 | ret = btrfs_update_inode(trans, root, inode); | 1679 | ret = btrfs_update_inode(trans, root, inode); |
| 1672 | BUG_ON(ret); | 1680 | BUG_ON(ret); |
| 1673 | btrfs_end_transaction(trans, root); | ||
| 1674 | } | 1681 | } |
| 1675 | goto out; | 1682 | goto out; |
| 1676 | } | 1683 | } |
| @@ -1680,6 +1687,8 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
| 1680 | 0, &cached_state, GFP_NOFS); | 1687 | 0, &cached_state, GFP_NOFS); |
| 1681 | 1688 | ||
| 1682 | trans = btrfs_join_transaction(root, 1); | 1689 | trans = btrfs_join_transaction(root, 1); |
| 1690 | btrfs_set_trans_block_group(trans, inode); | ||
| 1691 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
| 1683 | 1692 | ||
| 1684 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) | 1693 | if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags)) |
| 1685 | compressed = 1; | 1694 | compressed = 1; |
| @@ -1711,12 +1720,13 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end) | |||
| 1711 | add_pending_csums(trans, inode, ordered_extent->file_offset, | 1720 | add_pending_csums(trans, inode, ordered_extent->file_offset, |
| 1712 | &ordered_extent->list); | 1721 | &ordered_extent->list); |
| 1713 | 1722 | ||
| 1714 | /* this also removes the ordered extent from the tree */ | ||
| 1715 | btrfs_ordered_update_i_size(inode, 0, ordered_extent); | 1723 | btrfs_ordered_update_i_size(inode, 0, ordered_extent); |
| 1716 | ret = btrfs_update_inode(trans, root, inode); | 1724 | ret = btrfs_update_inode(trans, root, inode); |
| 1717 | BUG_ON(ret); | 1725 | BUG_ON(ret); |
| 1718 | btrfs_end_transaction(trans, root); | ||
| 1719 | out: | 1726 | out: |
| 1727 | btrfs_delalloc_release_metadata(inode, ordered_extent->len); | ||
| 1728 | if (trans) | ||
| 1729 | btrfs_end_transaction(trans, root); | ||
| 1720 | /* once for us */ | 1730 | /* once for us */ |
| 1721 | btrfs_put_ordered_extent(ordered_extent); | 1731 | btrfs_put_ordered_extent(ordered_extent); |
| 1722 | /* once for the tree */ | 1732 | /* once for the tree */ |
| @@ -1838,7 +1848,7 @@ static int btrfs_io_failed_hook(struct bio *failed_bio, | |||
| 1838 | 1848 | ||
| 1839 | BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, | 1849 | BTRFS_I(inode)->io_tree.ops->submit_bio_hook(inode, rw, bio, |
| 1840 | failrec->last_mirror, | 1850 | failrec->last_mirror, |
| 1841 | failrec->bio_flags); | 1851 | failrec->bio_flags, 0); |
| 1842 | return 0; | 1852 | return 0; |
| 1843 | } | 1853 | } |
| 1844 | 1854 | ||
| @@ -1993,32 +2003,196 @@ void btrfs_run_delayed_iputs(struct btrfs_root *root) | |||
| 1993 | } | 2003 | } |
| 1994 | 2004 | ||
| 1995 | /* | 2005 | /* |
| 2006 | * calculate extra metadata reservation when snapshotting a subvolume | ||
| 2007 | * contains orphan files. | ||
| 2008 | */ | ||
| 2009 | void btrfs_orphan_pre_snapshot(struct btrfs_trans_handle *trans, | ||
| 2010 | struct btrfs_pending_snapshot *pending, | ||
| 2011 | u64 *bytes_to_reserve) | ||
| 2012 | { | ||
| 2013 | struct btrfs_root *root; | ||
| 2014 | struct btrfs_block_rsv *block_rsv; | ||
| 2015 | u64 num_bytes; | ||
| 2016 | int index; | ||
| 2017 | |||
| 2018 | root = pending->root; | ||
| 2019 | if (!root->orphan_block_rsv || list_empty(&root->orphan_list)) | ||
| 2020 | return; | ||
| 2021 | |||
| 2022 | block_rsv = root->orphan_block_rsv; | ||
| 2023 | |||
| 2024 | /* orphan block reservation for the snapshot */ | ||
| 2025 | num_bytes = block_rsv->size; | ||
| 2026 | |||
| 2027 | /* | ||
| 2028 | * after the snapshot is created, COWing tree blocks may use more | ||
| 2029 | * space than it frees. So we should make sure there is enough | ||
| 2030 | * reserved space. | ||
| 2031 | */ | ||
| 2032 | index = trans->transid & 0x1; | ||
| 2033 | if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) { | ||
| 2034 | num_bytes += block_rsv->size - | ||
| 2035 | (block_rsv->reserved + block_rsv->freed[index]); | ||
| 2036 | } | ||
| 2037 | |||
| 2038 | *bytes_to_reserve += num_bytes; | ||
| 2039 | } | ||
| 2040 | |||
| 2041 | void btrfs_orphan_post_snapshot(struct btrfs_trans_handle *trans, | ||
| 2042 | struct btrfs_pending_snapshot *pending) | ||
| 2043 | { | ||
| 2044 | struct btrfs_root *root = pending->root; | ||
| 2045 | struct btrfs_root *snap = pending->snap; | ||
| 2046 | struct btrfs_block_rsv *block_rsv; | ||
| 2047 | u64 num_bytes; | ||
| 2048 | int index; | ||
| 2049 | int ret; | ||
| 2050 | |||
| 2051 | if (!root->orphan_block_rsv || list_empty(&root->orphan_list)) | ||
| 2052 | return; | ||
| 2053 | |||
| 2054 | /* refill source subvolume's orphan block reservation */ | ||
| 2055 | block_rsv = root->orphan_block_rsv; | ||
| 2056 | index = trans->transid & 0x1; | ||
| 2057 | if (block_rsv->reserved + block_rsv->freed[index] < block_rsv->size) { | ||
| 2058 | num_bytes = block_rsv->size - | ||
| 2059 | (block_rsv->reserved + block_rsv->freed[index]); | ||
| 2060 | ret = btrfs_block_rsv_migrate(&pending->block_rsv, | ||
| 2061 | root->orphan_block_rsv, | ||
| 2062 | num_bytes); | ||
| 2063 | BUG_ON(ret); | ||
| 2064 | } | ||
| 2065 | |||
| 2066 | /* setup orphan block reservation for the snapshot */ | ||
| 2067 | block_rsv = btrfs_alloc_block_rsv(snap); | ||
| 2068 | BUG_ON(!block_rsv); | ||
| 2069 | |||
| 2070 | btrfs_add_durable_block_rsv(root->fs_info, block_rsv); | ||
| 2071 | snap->orphan_block_rsv = block_rsv; | ||
| 2072 | |||
| 2073 | num_bytes = root->orphan_block_rsv->size; | ||
| 2074 | ret = btrfs_block_rsv_migrate(&pending->block_rsv, | ||
| 2075 | block_rsv, num_bytes); | ||
| 2076 | BUG_ON(ret); | ||
| 2077 | |||
| 2078 | #if 0 | ||
| 2079 | /* insert orphan item for the snapshot */ | ||
| 2080 | WARN_ON(!root->orphan_item_inserted); | ||
| 2081 | ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root, | ||
| 2082 | snap->root_key.objectid); | ||
| 2083 | BUG_ON(ret); | ||
| 2084 | snap->orphan_item_inserted = 1; | ||
| 2085 | #endif | ||
| 2086 | } | ||
| 2087 | |||
| 2088 | enum btrfs_orphan_cleanup_state { | ||
| 2089 | ORPHAN_CLEANUP_STARTED = 1, | ||
| 2090 | ORPHAN_CLEANUP_DONE = 2, | ||
| 2091 | }; | ||
| 2092 | |||
| 2093 | /* | ||
| 2094 | * This is called in transaction commmit time. If there are no orphan | ||
| 2095 | * files in the subvolume, it removes orphan item and frees block_rsv | ||
| 2096 | * structure. | ||
| 2097 | */ | ||
| 2098 | void btrfs_orphan_commit_root(struct btrfs_trans_handle *trans, | ||
| 2099 | struct btrfs_root *root) | ||
| 2100 | { | ||
| 2101 | int ret; | ||
| 2102 | |||
| 2103 | if (!list_empty(&root->orphan_list) || | ||
| 2104 | root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE) | ||
| 2105 | return; | ||
| 2106 | |||
| 2107 | if (root->orphan_item_inserted && | ||
| 2108 | btrfs_root_refs(&root->root_item) > 0) { | ||
| 2109 | ret = btrfs_del_orphan_item(trans, root->fs_info->tree_root, | ||
| 2110 | root->root_key.objectid); | ||
| 2111 | BUG_ON(ret); | ||
| 2112 | root->orphan_item_inserted = 0; | ||
| 2113 | } | ||
| 2114 | |||
| 2115 | if (root->orphan_block_rsv) { | ||
| 2116 | WARN_ON(root->orphan_block_rsv->size > 0); | ||
| 2117 | btrfs_free_block_rsv(root, root->orphan_block_rsv); | ||
| 2118 | root->orphan_block_rsv = NULL; | ||
| 2119 | } | ||
| 2120 | } | ||
| 2121 | |||
| 2122 | /* | ||
| 1996 | * This creates an orphan entry for the given inode in case something goes | 2123 | * This creates an orphan entry for the given inode in case something goes |
| 1997 | * wrong in the middle of an unlink/truncate. | 2124 | * wrong in the middle of an unlink/truncate. |
| 2125 | * | ||
| 2126 | * NOTE: caller of this function should reserve 5 units of metadata for | ||
| 2127 | * this function. | ||
| 1998 | */ | 2128 | */ |
| 1999 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | 2129 | int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) |
| 2000 | { | 2130 | { |
| 2001 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2131 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 2002 | int ret = 0; | 2132 | struct btrfs_block_rsv *block_rsv = NULL; |
| 2133 | int reserve = 0; | ||
| 2134 | int insert = 0; | ||
| 2135 | int ret; | ||
| 2003 | 2136 | ||
| 2004 | spin_lock(&root->list_lock); | 2137 | if (!root->orphan_block_rsv) { |
| 2138 | block_rsv = btrfs_alloc_block_rsv(root); | ||
| 2139 | BUG_ON(!block_rsv); | ||
| 2140 | } | ||
| 2005 | 2141 | ||
| 2006 | /* already on the orphan list, we're good */ | 2142 | spin_lock(&root->orphan_lock); |
| 2007 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | 2143 | if (!root->orphan_block_rsv) { |
| 2008 | spin_unlock(&root->list_lock); | 2144 | root->orphan_block_rsv = block_rsv; |
| 2009 | return 0; | 2145 | } else if (block_rsv) { |
| 2146 | btrfs_free_block_rsv(root, block_rsv); | ||
| 2147 | block_rsv = NULL; | ||
| 2148 | } | ||
| 2149 | |||
| 2150 | if (list_empty(&BTRFS_I(inode)->i_orphan)) { | ||
| 2151 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | ||
| 2152 | #if 0 | ||
| 2153 | /* | ||
| 2154 | * For proper ENOSPC handling, we should do orphan | ||
| 2155 | * cleanup when mounting. But this introduces backward | ||
| 2156 | * compatibility issue. | ||
| 2157 | */ | ||
| 2158 | if (!xchg(&root->orphan_item_inserted, 1)) | ||
| 2159 | insert = 2; | ||
| 2160 | else | ||
| 2161 | insert = 1; | ||
| 2162 | #endif | ||
| 2163 | insert = 1; | ||
| 2164 | } else { | ||
| 2165 | WARN_ON(!BTRFS_I(inode)->orphan_meta_reserved); | ||
| 2010 | } | 2166 | } |
| 2011 | 2167 | ||
| 2012 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | 2168 | if (!BTRFS_I(inode)->orphan_meta_reserved) { |
| 2169 | BTRFS_I(inode)->orphan_meta_reserved = 1; | ||
| 2170 | reserve = 1; | ||
| 2171 | } | ||
| 2172 | spin_unlock(&root->orphan_lock); | ||
| 2013 | 2173 | ||
| 2014 | spin_unlock(&root->list_lock); | 2174 | if (block_rsv) |
| 2175 | btrfs_add_durable_block_rsv(root->fs_info, block_rsv); | ||
| 2015 | 2176 | ||
| 2016 | /* | 2177 | /* grab metadata reservation from transaction handle */ |
| 2017 | * insert an orphan item to track this unlinked/truncated file | 2178 | if (reserve) { |
| 2018 | */ | 2179 | ret = btrfs_orphan_reserve_metadata(trans, inode); |
| 2019 | ret = btrfs_insert_orphan_item(trans, root, inode->i_ino); | 2180 | BUG_ON(ret); |
| 2181 | } | ||
| 2020 | 2182 | ||
| 2021 | return ret; | 2183 | /* insert an orphan item to track this unlinked/truncated file */ |
| 2184 | if (insert >= 1) { | ||
| 2185 | ret = btrfs_insert_orphan_item(trans, root, inode->i_ino); | ||
| 2186 | BUG_ON(ret); | ||
| 2187 | } | ||
| 2188 | |||
| 2189 | /* insert an orphan item to track subvolume contains orphan files */ | ||
| 2190 | if (insert >= 2) { | ||
| 2191 | ret = btrfs_insert_orphan_item(trans, root->fs_info->tree_root, | ||
| 2192 | root->root_key.objectid); | ||
| 2193 | BUG_ON(ret); | ||
| 2194 | } | ||
| 2195 | return 0; | ||
| 2022 | } | 2196 | } |
| 2023 | 2197 | ||
| 2024 | /* | 2198 | /* |
| @@ -2028,26 +2202,31 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, struct inode *inode) | |||
| 2028 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) | 2202 | int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode) |
| 2029 | { | 2203 | { |
| 2030 | struct btrfs_root *root = BTRFS_I(inode)->root; | 2204 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 2205 | int delete_item = 0; | ||
| 2206 | int release_rsv = 0; | ||
| 2031 | int ret = 0; | 2207 | int ret = 0; |
| 2032 | 2208 | ||
| 2033 | spin_lock(&root->list_lock); | 2209 | spin_lock(&root->orphan_lock); |
| 2034 | 2210 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | |
| 2035 | if (list_empty(&BTRFS_I(inode)->i_orphan)) { | 2211 | list_del_init(&BTRFS_I(inode)->i_orphan); |
| 2036 | spin_unlock(&root->list_lock); | 2212 | delete_item = 1; |
| 2037 | return 0; | ||
| 2038 | } | 2213 | } |
| 2039 | 2214 | ||
| 2040 | list_del_init(&BTRFS_I(inode)->i_orphan); | 2215 | if (BTRFS_I(inode)->orphan_meta_reserved) { |
| 2041 | if (!trans) { | 2216 | BTRFS_I(inode)->orphan_meta_reserved = 0; |
| 2042 | spin_unlock(&root->list_lock); | 2217 | release_rsv = 1; |
| 2043 | return 0; | ||
| 2044 | } | 2218 | } |
| 2219 | spin_unlock(&root->orphan_lock); | ||
| 2045 | 2220 | ||
| 2046 | spin_unlock(&root->list_lock); | 2221 | if (trans && delete_item) { |
| 2222 | ret = btrfs_del_orphan_item(trans, root, inode->i_ino); | ||
| 2223 | BUG_ON(ret); | ||
| 2224 | } | ||
| 2047 | 2225 | ||
| 2048 | ret = btrfs_del_orphan_item(trans, root, inode->i_ino); | 2226 | if (release_rsv) |
| 2227 | btrfs_orphan_release_metadata(inode); | ||
| 2049 | 2228 | ||
| 2050 | return ret; | 2229 | return 0; |
| 2051 | } | 2230 | } |
| 2052 | 2231 | ||
| 2053 | /* | 2232 | /* |
| @@ -2064,7 +2243,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
| 2064 | struct inode *inode; | 2243 | struct inode *inode; |
| 2065 | int ret = 0, nr_unlink = 0, nr_truncate = 0; | 2244 | int ret = 0, nr_unlink = 0, nr_truncate = 0; |
| 2066 | 2245 | ||
| 2067 | if (!xchg(&root->clean_orphans, 0)) | 2246 | if (cmpxchg(&root->orphan_cleanup_state, 0, ORPHAN_CLEANUP_STARTED)) |
| 2068 | return; | 2247 | return; |
| 2069 | 2248 | ||
| 2070 | path = btrfs_alloc_path(); | 2249 | path = btrfs_alloc_path(); |
| @@ -2117,16 +2296,15 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
| 2117 | found_key.type = BTRFS_INODE_ITEM_KEY; | 2296 | found_key.type = BTRFS_INODE_ITEM_KEY; |
| 2118 | found_key.offset = 0; | 2297 | found_key.offset = 0; |
| 2119 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); | 2298 | inode = btrfs_iget(root->fs_info->sb, &found_key, root, NULL); |
| 2120 | if (IS_ERR(inode)) | 2299 | BUG_ON(IS_ERR(inode)); |
| 2121 | break; | ||
| 2122 | 2300 | ||
| 2123 | /* | 2301 | /* |
| 2124 | * add this inode to the orphan list so btrfs_orphan_del does | 2302 | * add this inode to the orphan list so btrfs_orphan_del does |
| 2125 | * the proper thing when we hit it | 2303 | * the proper thing when we hit it |
| 2126 | */ | 2304 | */ |
| 2127 | spin_lock(&root->list_lock); | 2305 | spin_lock(&root->orphan_lock); |
| 2128 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); | 2306 | list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list); |
| 2129 | spin_unlock(&root->list_lock); | 2307 | spin_unlock(&root->orphan_lock); |
| 2130 | 2308 | ||
| 2131 | /* | 2309 | /* |
| 2132 | * if this is a bad inode, means we actually succeeded in | 2310 | * if this is a bad inode, means we actually succeeded in |
| @@ -2135,7 +2313,7 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
| 2135 | * do a destroy_inode | 2313 | * do a destroy_inode |
| 2136 | */ | 2314 | */ |
| 2137 | if (is_bad_inode(inode)) { | 2315 | if (is_bad_inode(inode)) { |
| 2138 | trans = btrfs_start_transaction(root, 1); | 2316 | trans = btrfs_start_transaction(root, 0); |
| 2139 | btrfs_orphan_del(trans, inode); | 2317 | btrfs_orphan_del(trans, inode); |
| 2140 | btrfs_end_transaction(trans, root); | 2318 | btrfs_end_transaction(trans, root); |
| 2141 | iput(inode); | 2319 | iput(inode); |
| @@ -2153,13 +2331,23 @@ void btrfs_orphan_cleanup(struct btrfs_root *root) | |||
| 2153 | /* this will do delete_inode and everything for us */ | 2331 | /* this will do delete_inode and everything for us */ |
| 2154 | iput(inode); | 2332 | iput(inode); |
| 2155 | } | 2333 | } |
| 2334 | btrfs_free_path(path); | ||
| 2335 | |||
| 2336 | root->orphan_cleanup_state = ORPHAN_CLEANUP_DONE; | ||
| 2337 | |||
| 2338 | if (root->orphan_block_rsv) | ||
| 2339 | btrfs_block_rsv_release(root, root->orphan_block_rsv, | ||
| 2340 | (u64)-1); | ||
| 2341 | |||
| 2342 | if (root->orphan_block_rsv || root->orphan_item_inserted) { | ||
| 2343 | trans = btrfs_join_transaction(root, 1); | ||
| 2344 | btrfs_end_transaction(trans, root); | ||
| 2345 | } | ||
| 2156 | 2346 | ||
| 2157 | if (nr_unlink) | 2347 | if (nr_unlink) |
| 2158 | printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); | 2348 | printk(KERN_INFO "btrfs: unlinked %d orphans\n", nr_unlink); |
| 2159 | if (nr_truncate) | 2349 | if (nr_truncate) |
| 2160 | printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); | 2350 | printk(KERN_INFO "btrfs: truncated %d orphans\n", nr_truncate); |
| 2161 | |||
| 2162 | btrfs_free_path(path); | ||
| 2163 | } | 2351 | } |
| 2164 | 2352 | ||
| 2165 | /* | 2353 | /* |
| @@ -2478,29 +2666,201 @@ out: | |||
| 2478 | return ret; | 2666 | return ret; |
| 2479 | } | 2667 | } |
| 2480 | 2668 | ||
| 2481 | static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | 2669 | /* helper to check if there is any shared block in the path */ |
| 2670 | static int check_path_shared(struct btrfs_root *root, | ||
| 2671 | struct btrfs_path *path) | ||
| 2672 | { | ||
| 2673 | struct extent_buffer *eb; | ||
| 2674 | int level; | ||
| 2675 | int ret; | ||
| 2676 | u64 refs; | ||
| 2677 | |||
| 2678 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { | ||
| 2679 | if (!path->nodes[level]) | ||
| 2680 | break; | ||
| 2681 | eb = path->nodes[level]; | ||
| 2682 | if (!btrfs_block_can_be_shared(root, eb)) | ||
| 2683 | continue; | ||
| 2684 | ret = btrfs_lookup_extent_info(NULL, root, eb->start, eb->len, | ||
| 2685 | &refs, NULL); | ||
| 2686 | if (refs > 1) | ||
| 2687 | return 1; | ||
| 2688 | } | ||
| 2689 | return 0; | ||
| 2690 | } | ||
| 2691 | |||
| 2692 | /* | ||
| 2693 | * helper to start transaction for unlink and rmdir. | ||
| 2694 | * | ||
| 2695 | * unlink and rmdir are special in btrfs, they do not always free space. | ||
| 2696 | * so in enospc case, we should make sure they will free space before | ||
| 2697 | * allowing them to use the global metadata reservation. | ||
| 2698 | */ | ||
| 2699 | static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir, | ||
| 2700 | struct dentry *dentry) | ||
| 2482 | { | 2701 | { |
| 2483 | struct btrfs_root *root; | ||
| 2484 | struct btrfs_trans_handle *trans; | 2702 | struct btrfs_trans_handle *trans; |
| 2703 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
| 2704 | struct btrfs_path *path; | ||
| 2705 | struct btrfs_inode_ref *ref; | ||
| 2706 | struct btrfs_dir_item *di; | ||
| 2485 | struct inode *inode = dentry->d_inode; | 2707 | struct inode *inode = dentry->d_inode; |
| 2708 | u64 index; | ||
| 2709 | int check_link = 1; | ||
| 2710 | int err = -ENOSPC; | ||
| 2486 | int ret; | 2711 | int ret; |
| 2487 | unsigned long nr = 0; | ||
| 2488 | 2712 | ||
| 2489 | root = BTRFS_I(dir)->root; | 2713 | trans = btrfs_start_transaction(root, 10); |
| 2714 | if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) | ||
| 2715 | return trans; | ||
| 2490 | 2716 | ||
| 2491 | /* | 2717 | if (inode->i_ino == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) |
| 2492 | * 5 items for unlink inode | 2718 | return ERR_PTR(-ENOSPC); |
| 2493 | * 1 for orphan | 2719 | |
| 2494 | */ | 2720 | /* check if there is someone else holds reference */ |
| 2495 | ret = btrfs_reserve_metadata_space(root, 6); | 2721 | if (S_ISDIR(inode->i_mode) && atomic_read(&inode->i_count) > 1) |
| 2496 | if (ret) | 2722 | return ERR_PTR(-ENOSPC); |
| 2497 | return ret; | 2723 | |
| 2724 | if (atomic_read(&inode->i_count) > 2) | ||
| 2725 | return ERR_PTR(-ENOSPC); | ||
| 2726 | |||
| 2727 | if (xchg(&root->fs_info->enospc_unlink, 1)) | ||
| 2728 | return ERR_PTR(-ENOSPC); | ||
| 2498 | 2729 | ||
| 2499 | trans = btrfs_start_transaction(root, 1); | 2730 | path = btrfs_alloc_path(); |
| 2731 | if (!path) { | ||
| 2732 | root->fs_info->enospc_unlink = 0; | ||
| 2733 | return ERR_PTR(-ENOMEM); | ||
| 2734 | } | ||
| 2735 | |||
| 2736 | trans = btrfs_start_transaction(root, 0); | ||
| 2500 | if (IS_ERR(trans)) { | 2737 | if (IS_ERR(trans)) { |
| 2501 | btrfs_unreserve_metadata_space(root, 6); | 2738 | btrfs_free_path(path); |
| 2502 | return PTR_ERR(trans); | 2739 | root->fs_info->enospc_unlink = 0; |
| 2740 | return trans; | ||
| 2741 | } | ||
| 2742 | |||
| 2743 | path->skip_locking = 1; | ||
| 2744 | path->search_commit_root = 1; | ||
| 2745 | |||
| 2746 | ret = btrfs_lookup_inode(trans, root, path, | ||
| 2747 | &BTRFS_I(dir)->location, 0); | ||
| 2748 | if (ret < 0) { | ||
| 2749 | err = ret; | ||
| 2750 | goto out; | ||
| 2751 | } | ||
| 2752 | if (ret == 0) { | ||
| 2753 | if (check_path_shared(root, path)) | ||
| 2754 | goto out; | ||
| 2755 | } else { | ||
| 2756 | check_link = 0; | ||
| 2757 | } | ||
| 2758 | btrfs_release_path(root, path); | ||
| 2759 | |||
| 2760 | ret = btrfs_lookup_inode(trans, root, path, | ||
| 2761 | &BTRFS_I(inode)->location, 0); | ||
| 2762 | if (ret < 0) { | ||
| 2763 | err = ret; | ||
| 2764 | goto out; | ||
| 2765 | } | ||
| 2766 | if (ret == 0) { | ||
| 2767 | if (check_path_shared(root, path)) | ||
| 2768 | goto out; | ||
| 2769 | } else { | ||
| 2770 | check_link = 0; | ||
| 2771 | } | ||
| 2772 | btrfs_release_path(root, path); | ||
| 2773 | |||
| 2774 | if (ret == 0 && S_ISREG(inode->i_mode)) { | ||
| 2775 | ret = btrfs_lookup_file_extent(trans, root, path, | ||
| 2776 | inode->i_ino, (u64)-1, 0); | ||
| 2777 | if (ret < 0) { | ||
| 2778 | err = ret; | ||
| 2779 | goto out; | ||
| 2780 | } | ||
| 2781 | BUG_ON(ret == 0); | ||
| 2782 | if (check_path_shared(root, path)) | ||
| 2783 | goto out; | ||
| 2784 | btrfs_release_path(root, path); | ||
| 2785 | } | ||
| 2786 | |||
| 2787 | if (!check_link) { | ||
| 2788 | err = 0; | ||
| 2789 | goto out; | ||
| 2790 | } | ||
| 2791 | |||
| 2792 | di = btrfs_lookup_dir_item(trans, root, path, dir->i_ino, | ||
| 2793 | dentry->d_name.name, dentry->d_name.len, 0); | ||
| 2794 | if (IS_ERR(di)) { | ||
| 2795 | err = PTR_ERR(di); | ||
| 2796 | goto out; | ||
| 2797 | } | ||
| 2798 | if (di) { | ||
| 2799 | if (check_path_shared(root, path)) | ||
| 2800 | goto out; | ||
| 2801 | } else { | ||
| 2802 | err = 0; | ||
| 2803 | goto out; | ||
| 2503 | } | 2804 | } |
| 2805 | btrfs_release_path(root, path); | ||
| 2806 | |||
| 2807 | ref = btrfs_lookup_inode_ref(trans, root, path, | ||
| 2808 | dentry->d_name.name, dentry->d_name.len, | ||
| 2809 | inode->i_ino, dir->i_ino, 0); | ||
| 2810 | if (IS_ERR(ref)) { | ||
| 2811 | err = PTR_ERR(ref); | ||
| 2812 | goto out; | ||
| 2813 | } | ||
| 2814 | BUG_ON(!ref); | ||
| 2815 | if (check_path_shared(root, path)) | ||
| 2816 | goto out; | ||
| 2817 | index = btrfs_inode_ref_index(path->nodes[0], ref); | ||
| 2818 | btrfs_release_path(root, path); | ||
| 2819 | |||
| 2820 | di = btrfs_lookup_dir_index_item(trans, root, path, dir->i_ino, index, | ||
| 2821 | dentry->d_name.name, dentry->d_name.len, 0); | ||
| 2822 | if (IS_ERR(di)) { | ||
| 2823 | err = PTR_ERR(di); | ||
| 2824 | goto out; | ||
| 2825 | } | ||
| 2826 | BUG_ON(ret == -ENOENT); | ||
| 2827 | if (check_path_shared(root, path)) | ||
| 2828 | goto out; | ||
| 2829 | |||
| 2830 | err = 0; | ||
| 2831 | out: | ||
| 2832 | btrfs_free_path(path); | ||
| 2833 | if (err) { | ||
| 2834 | btrfs_end_transaction(trans, root); | ||
| 2835 | root->fs_info->enospc_unlink = 0; | ||
| 2836 | return ERR_PTR(err); | ||
| 2837 | } | ||
| 2838 | |||
| 2839 | trans->block_rsv = &root->fs_info->global_block_rsv; | ||
| 2840 | return trans; | ||
| 2841 | } | ||
| 2842 | |||
| 2843 | static void __unlink_end_trans(struct btrfs_trans_handle *trans, | ||
| 2844 | struct btrfs_root *root) | ||
| 2845 | { | ||
| 2846 | if (trans->block_rsv == &root->fs_info->global_block_rsv) { | ||
| 2847 | BUG_ON(!root->fs_info->enospc_unlink); | ||
| 2848 | root->fs_info->enospc_unlink = 0; | ||
| 2849 | } | ||
| 2850 | btrfs_end_transaction_throttle(trans, root); | ||
| 2851 | } | ||
| 2852 | |||
| 2853 | static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | ||
| 2854 | { | ||
| 2855 | struct btrfs_root *root = BTRFS_I(dir)->root; | ||
| 2856 | struct btrfs_trans_handle *trans; | ||
| 2857 | struct inode *inode = dentry->d_inode; | ||
| 2858 | int ret; | ||
| 2859 | unsigned long nr = 0; | ||
| 2860 | |||
| 2861 | trans = __unlink_start_trans(dir, dentry); | ||
| 2862 | if (IS_ERR(trans)) | ||
| 2863 | return PTR_ERR(trans); | ||
| 2504 | 2864 | ||
| 2505 | btrfs_set_trans_block_group(trans, dir); | 2865 | btrfs_set_trans_block_group(trans, dir); |
| 2506 | 2866 | ||
| @@ -2508,14 +2868,15 @@ static int btrfs_unlink(struct inode *dir, struct dentry *dentry) | |||
| 2508 | 2868 | ||
| 2509 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, | 2869 | ret = btrfs_unlink_inode(trans, root, dir, dentry->d_inode, |
| 2510 | dentry->d_name.name, dentry->d_name.len); | 2870 | dentry->d_name.name, dentry->d_name.len); |
| 2871 | BUG_ON(ret); | ||
| 2511 | 2872 | ||
| 2512 | if (inode->i_nlink == 0) | 2873 | if (inode->i_nlink == 0) { |
| 2513 | ret = btrfs_orphan_add(trans, inode); | 2874 | ret = btrfs_orphan_add(trans, inode); |
| 2875 | BUG_ON(ret); | ||
| 2876 | } | ||
| 2514 | 2877 | ||
| 2515 | nr = trans->blocks_used; | 2878 | nr = trans->blocks_used; |
| 2516 | 2879 | __unlink_end_trans(trans, root); | |
| 2517 | btrfs_end_transaction_throttle(trans, root); | ||
| 2518 | btrfs_unreserve_metadata_space(root, 6); | ||
| 2519 | btrfs_btree_balance_dirty(root, nr); | 2880 | btrfs_btree_balance_dirty(root, nr); |
| 2520 | return ret; | 2881 | return ret; |
| 2521 | } | 2882 | } |
| @@ -2587,7 +2948,6 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 2587 | { | 2948 | { |
| 2588 | struct inode *inode = dentry->d_inode; | 2949 | struct inode *inode = dentry->d_inode; |
| 2589 | int err = 0; | 2950 | int err = 0; |
| 2590 | int ret; | ||
| 2591 | struct btrfs_root *root = BTRFS_I(dir)->root; | 2951 | struct btrfs_root *root = BTRFS_I(dir)->root; |
| 2592 | struct btrfs_trans_handle *trans; | 2952 | struct btrfs_trans_handle *trans; |
| 2593 | unsigned long nr = 0; | 2953 | unsigned long nr = 0; |
| @@ -2596,15 +2956,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 2596 | inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 2956 | inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) |
| 2597 | return -ENOTEMPTY; | 2957 | return -ENOTEMPTY; |
| 2598 | 2958 | ||
| 2599 | ret = btrfs_reserve_metadata_space(root, 5); | 2959 | trans = __unlink_start_trans(dir, dentry); |
| 2600 | if (ret) | 2960 | if (IS_ERR(trans)) |
| 2601 | return ret; | ||
| 2602 | |||
| 2603 | trans = btrfs_start_transaction(root, 1); | ||
| 2604 | if (IS_ERR(trans)) { | ||
| 2605 | btrfs_unreserve_metadata_space(root, 5); | ||
| 2606 | return PTR_ERR(trans); | 2961 | return PTR_ERR(trans); |
| 2607 | } | ||
| 2608 | 2962 | ||
| 2609 | btrfs_set_trans_block_group(trans, dir); | 2963 | btrfs_set_trans_block_group(trans, dir); |
| 2610 | 2964 | ||
| @@ -2627,12 +2981,9 @@ static int btrfs_rmdir(struct inode *dir, struct dentry *dentry) | |||
| 2627 | btrfs_i_size_write(inode, 0); | 2981 | btrfs_i_size_write(inode, 0); |
| 2628 | out: | 2982 | out: |
| 2629 | nr = trans->blocks_used; | 2983 | nr = trans->blocks_used; |
| 2630 | ret = btrfs_end_transaction_throttle(trans, root); | 2984 | __unlink_end_trans(trans, root); |
| 2631 | btrfs_unreserve_metadata_space(root, 5); | ||
| 2632 | btrfs_btree_balance_dirty(root, nr); | 2985 | btrfs_btree_balance_dirty(root, nr); |
| 2633 | 2986 | ||
| 2634 | if (ret && !err) | ||
| 2635 | err = ret; | ||
| 2636 | return err; | 2987 | return err; |
| 2637 | } | 2988 | } |
| 2638 | 2989 | ||
| @@ -3029,6 +3380,7 @@ out: | |||
| 3029 | if (pending_del_nr) { | 3380 | if (pending_del_nr) { |
| 3030 | ret = btrfs_del_items(trans, root, path, pending_del_slot, | 3381 | ret = btrfs_del_items(trans, root, path, pending_del_slot, |
| 3031 | pending_del_nr); | 3382 | pending_del_nr); |
| 3383 | BUG_ON(ret); | ||
| 3032 | } | 3384 | } |
| 3033 | btrfs_free_path(path); | 3385 | btrfs_free_path(path); |
| 3034 | return err; | 3386 | return err; |
| @@ -3056,11 +3408,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
| 3056 | 3408 | ||
| 3057 | if ((offset & (blocksize - 1)) == 0) | 3409 | if ((offset & (blocksize - 1)) == 0) |
| 3058 | goto out; | 3410 | goto out; |
| 3059 | ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); | 3411 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
| 3060 | if (ret) | ||
| 3061 | goto out; | ||
| 3062 | |||
| 3063 | ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
| 3064 | if (ret) | 3412 | if (ret) |
| 3065 | goto out; | 3413 | goto out; |
| 3066 | 3414 | ||
| @@ -3068,8 +3416,7 @@ static int btrfs_truncate_page(struct address_space *mapping, loff_t from) | |||
| 3068 | again: | 3416 | again: |
| 3069 | page = grab_cache_page(mapping, index); | 3417 | page = grab_cache_page(mapping, index); |
| 3070 | if (!page) { | 3418 | if (!page) { |
| 3071 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | 3419 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); |
| 3072 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
| 3073 | goto out; | 3420 | goto out; |
| 3074 | } | 3421 | } |
| 3075 | 3422 | ||
| @@ -3132,8 +3479,7 @@ again: | |||
| 3132 | 3479 | ||
| 3133 | out_unlock: | 3480 | out_unlock: |
| 3134 | if (ret) | 3481 | if (ret) |
| 3135 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | 3482 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); |
| 3136 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
| 3137 | unlock_page(page); | 3483 | unlock_page(page); |
| 3138 | page_cache_release(page); | 3484 | page_cache_release(page); |
| 3139 | out: | 3485 | out: |
| @@ -3145,7 +3491,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
| 3145 | struct btrfs_trans_handle *trans; | 3491 | struct btrfs_trans_handle *trans; |
| 3146 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3492 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 3147 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | 3493 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; |
| 3148 | struct extent_map *em; | 3494 | struct extent_map *em = NULL; |
| 3149 | struct extent_state *cached_state = NULL; | 3495 | struct extent_state *cached_state = NULL; |
| 3150 | u64 mask = root->sectorsize - 1; | 3496 | u64 mask = root->sectorsize - 1; |
| 3151 | u64 hole_start = (inode->i_size + mask) & ~mask; | 3497 | u64 hole_start = (inode->i_size + mask) & ~mask; |
| @@ -3183,11 +3529,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
| 3183 | u64 hint_byte = 0; | 3529 | u64 hint_byte = 0; |
| 3184 | hole_size = last_byte - cur_offset; | 3530 | hole_size = last_byte - cur_offset; |
| 3185 | 3531 | ||
| 3186 | err = btrfs_reserve_metadata_space(root, 2); | 3532 | trans = btrfs_start_transaction(root, 2); |
| 3187 | if (err) | 3533 | if (IS_ERR(trans)) { |
| 3534 | err = PTR_ERR(trans); | ||
| 3188 | break; | 3535 | break; |
| 3189 | 3536 | } | |
| 3190 | trans = btrfs_start_transaction(root, 1); | ||
| 3191 | btrfs_set_trans_block_group(trans, inode); | 3537 | btrfs_set_trans_block_group(trans, inode); |
| 3192 | 3538 | ||
| 3193 | err = btrfs_drop_extents(trans, inode, cur_offset, | 3539 | err = btrfs_drop_extents(trans, inode, cur_offset, |
| @@ -3205,14 +3551,15 @@ int btrfs_cont_expand(struct inode *inode, loff_t size) | |||
| 3205 | last_byte - 1, 0); | 3551 | last_byte - 1, 0); |
| 3206 | 3552 | ||
| 3207 | btrfs_end_transaction(trans, root); | 3553 | btrfs_end_transaction(trans, root); |
| 3208 | btrfs_unreserve_metadata_space(root, 2); | ||
| 3209 | } | 3554 | } |
| 3210 | free_extent_map(em); | 3555 | free_extent_map(em); |
| 3556 | em = NULL; | ||
| 3211 | cur_offset = last_byte; | 3557 | cur_offset = last_byte; |
| 3212 | if (cur_offset >= block_end) | 3558 | if (cur_offset >= block_end) |
| 3213 | break; | 3559 | break; |
| 3214 | } | 3560 | } |
| 3215 | 3561 | ||
| 3562 | free_extent_map(em); | ||
| 3216 | unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state, | 3563 | unlock_extent_cached(io_tree, hole_start, block_end - 1, &cached_state, |
| 3217 | GFP_NOFS); | 3564 | GFP_NOFS); |
| 3218 | return err; | 3565 | return err; |
| @@ -3239,11 +3586,10 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) | |||
| 3239 | } | 3586 | } |
| 3240 | } | 3587 | } |
| 3241 | 3588 | ||
| 3242 | ret = btrfs_reserve_metadata_space(root, 1); | 3589 | trans = btrfs_start_transaction(root, 5); |
| 3243 | if (ret) | 3590 | if (IS_ERR(trans)) |
| 3244 | return ret; | 3591 | return PTR_ERR(trans); |
| 3245 | 3592 | ||
| 3246 | trans = btrfs_start_transaction(root, 1); | ||
| 3247 | btrfs_set_trans_block_group(trans, inode); | 3593 | btrfs_set_trans_block_group(trans, inode); |
| 3248 | 3594 | ||
| 3249 | ret = btrfs_orphan_add(trans, inode); | 3595 | ret = btrfs_orphan_add(trans, inode); |
| @@ -3251,7 +3597,6 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) | |||
| 3251 | 3597 | ||
| 3252 | nr = trans->blocks_used; | 3598 | nr = trans->blocks_used; |
| 3253 | btrfs_end_transaction(trans, root); | 3599 | btrfs_end_transaction(trans, root); |
| 3254 | btrfs_unreserve_metadata_space(root, 1); | ||
| 3255 | btrfs_btree_balance_dirty(root, nr); | 3600 | btrfs_btree_balance_dirty(root, nr); |
| 3256 | 3601 | ||
| 3257 | if (attr->ia_size > inode->i_size) { | 3602 | if (attr->ia_size > inode->i_size) { |
| @@ -3264,8 +3609,11 @@ static int btrfs_setattr_size(struct inode *inode, struct iattr *attr) | |||
| 3264 | i_size_write(inode, attr->ia_size); | 3609 | i_size_write(inode, attr->ia_size); |
| 3265 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | 3610 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); |
| 3266 | 3611 | ||
| 3267 | trans = btrfs_start_transaction(root, 1); | 3612 | trans = btrfs_start_transaction(root, 0); |
| 3613 | BUG_ON(IS_ERR(trans)); | ||
| 3268 | btrfs_set_trans_block_group(trans, inode); | 3614 | btrfs_set_trans_block_group(trans, inode); |
| 3615 | trans->block_rsv = root->orphan_block_rsv; | ||
| 3616 | BUG_ON(!trans->block_rsv); | ||
| 3269 | 3617 | ||
| 3270 | ret = btrfs_update_inode(trans, root, inode); | 3618 | ret = btrfs_update_inode(trans, root, inode); |
| 3271 | BUG_ON(ret); | 3619 | BUG_ON(ret); |
| @@ -3345,10 +3693,21 @@ void btrfs_delete_inode(struct inode *inode) | |||
| 3345 | btrfs_i_size_write(inode, 0); | 3693 | btrfs_i_size_write(inode, 0); |
| 3346 | 3694 | ||
| 3347 | while (1) { | 3695 | while (1) { |
| 3348 | trans = btrfs_start_transaction(root, 1); | 3696 | trans = btrfs_start_transaction(root, 0); |
| 3697 | BUG_ON(IS_ERR(trans)); | ||
| 3349 | btrfs_set_trans_block_group(trans, inode); | 3698 | btrfs_set_trans_block_group(trans, inode); |
| 3350 | ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); | 3699 | trans->block_rsv = root->orphan_block_rsv; |
| 3700 | |||
| 3701 | ret = btrfs_block_rsv_check(trans, root, | ||
| 3702 | root->orphan_block_rsv, 0, 5); | ||
| 3703 | if (ret) { | ||
| 3704 | BUG_ON(ret != -EAGAIN); | ||
| 3705 | ret = btrfs_commit_transaction(trans, root); | ||
| 3706 | BUG_ON(ret); | ||
| 3707 | continue; | ||
| 3708 | } | ||
| 3351 | 3709 | ||
| 3710 | ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0); | ||
| 3352 | if (ret != -EAGAIN) | 3711 | if (ret != -EAGAIN) |
| 3353 | break; | 3712 | break; |
| 3354 | 3713 | ||
| @@ -3356,6 +3715,7 @@ void btrfs_delete_inode(struct inode *inode) | |||
| 3356 | btrfs_end_transaction(trans, root); | 3715 | btrfs_end_transaction(trans, root); |
| 3357 | trans = NULL; | 3716 | trans = NULL; |
| 3358 | btrfs_btree_balance_dirty(root, nr); | 3717 | btrfs_btree_balance_dirty(root, nr); |
| 3718 | |||
| 3359 | } | 3719 | } |
| 3360 | 3720 | ||
| 3361 | if (ret == 0) { | 3721 | if (ret == 0) { |
| @@ -3596,40 +3956,10 @@ again: | |||
| 3596 | return 0; | 3956 | return 0; |
| 3597 | } | 3957 | } |
| 3598 | 3958 | ||
| 3599 | static noinline void init_btrfs_i(struct inode *inode) | ||
| 3600 | { | ||
| 3601 | struct btrfs_inode *bi = BTRFS_I(inode); | ||
| 3602 | |||
| 3603 | bi->generation = 0; | ||
| 3604 | bi->sequence = 0; | ||
| 3605 | bi->last_trans = 0; | ||
| 3606 | bi->last_sub_trans = 0; | ||
| 3607 | bi->logged_trans = 0; | ||
| 3608 | bi->delalloc_bytes = 0; | ||
| 3609 | bi->reserved_bytes = 0; | ||
| 3610 | bi->disk_i_size = 0; | ||
| 3611 | bi->flags = 0; | ||
| 3612 | bi->index_cnt = (u64)-1; | ||
| 3613 | bi->last_unlink_trans = 0; | ||
| 3614 | bi->ordered_data_close = 0; | ||
| 3615 | bi->force_compress = 0; | ||
| 3616 | extent_map_tree_init(&BTRFS_I(inode)->extent_tree, GFP_NOFS); | ||
| 3617 | extent_io_tree_init(&BTRFS_I(inode)->io_tree, | ||
| 3618 | inode->i_mapping, GFP_NOFS); | ||
| 3619 | extent_io_tree_init(&BTRFS_I(inode)->io_failure_tree, | ||
| 3620 | inode->i_mapping, GFP_NOFS); | ||
| 3621 | INIT_LIST_HEAD(&BTRFS_I(inode)->delalloc_inodes); | ||
| 3622 | INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations); | ||
| 3623 | RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node); | ||
| 3624 | btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree); | ||
| 3625 | mutex_init(&BTRFS_I(inode)->log_mutex); | ||
| 3626 | } | ||
| 3627 | |||
| 3628 | static int btrfs_init_locked_inode(struct inode *inode, void *p) | 3959 | static int btrfs_init_locked_inode(struct inode *inode, void *p) |
| 3629 | { | 3960 | { |
| 3630 | struct btrfs_iget_args *args = p; | 3961 | struct btrfs_iget_args *args = p; |
| 3631 | inode->i_ino = args->ino; | 3962 | inode->i_ino = args->ino; |
| 3632 | init_btrfs_i(inode); | ||
| 3633 | BTRFS_I(inode)->root = args->root; | 3963 | BTRFS_I(inode)->root = args->root; |
| 3634 | btrfs_set_inode_space_info(args->root, inode); | 3964 | btrfs_set_inode_space_info(args->root, inode); |
| 3635 | return 0; | 3965 | return 0; |
| @@ -3692,8 +4022,6 @@ static struct inode *new_simple_dir(struct super_block *s, | |||
| 3692 | if (!inode) | 4022 | if (!inode) |
| 3693 | return ERR_PTR(-ENOMEM); | 4023 | return ERR_PTR(-ENOMEM); |
| 3694 | 4024 | ||
| 3695 | init_btrfs_i(inode); | ||
| 3696 | |||
| 3697 | BTRFS_I(inode)->root = root; | 4025 | BTRFS_I(inode)->root = root; |
| 3698 | memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); | 4026 | memcpy(&BTRFS_I(inode)->location, key, sizeof(*key)); |
| 3699 | BTRFS_I(inode)->dummy_inode = 1; | 4027 | BTRFS_I(inode)->dummy_inode = 1; |
| @@ -3950,7 +4278,7 @@ int btrfs_write_inode(struct inode *inode, struct writeback_control *wbc) | |||
| 3950 | struct btrfs_trans_handle *trans; | 4278 | struct btrfs_trans_handle *trans; |
| 3951 | int ret = 0; | 4279 | int ret = 0; |
| 3952 | 4280 | ||
| 3953 | if (root->fs_info->btree_inode == inode) | 4281 | if (BTRFS_I(inode)->dummy_inode) |
| 3954 | return 0; | 4282 | return 0; |
| 3955 | 4283 | ||
| 3956 | if (wbc->sync_mode == WB_SYNC_ALL) { | 4284 | if (wbc->sync_mode == WB_SYNC_ALL) { |
| @@ -3971,10 +4299,38 @@ void btrfs_dirty_inode(struct inode *inode) | |||
| 3971 | { | 4299 | { |
| 3972 | struct btrfs_root *root = BTRFS_I(inode)->root; | 4300 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 3973 | struct btrfs_trans_handle *trans; | 4301 | struct btrfs_trans_handle *trans; |
| 4302 | int ret; | ||
| 4303 | |||
| 4304 | if (BTRFS_I(inode)->dummy_inode) | ||
| 4305 | return; | ||
| 3974 | 4306 | ||
| 3975 | trans = btrfs_join_transaction(root, 1); | 4307 | trans = btrfs_join_transaction(root, 1); |
| 3976 | btrfs_set_trans_block_group(trans, inode); | 4308 | btrfs_set_trans_block_group(trans, inode); |
| 3977 | btrfs_update_inode(trans, root, inode); | 4309 | |
| 4310 | ret = btrfs_update_inode(trans, root, inode); | ||
| 4311 | if (ret && ret == -ENOSPC) { | ||
| 4312 | /* whoops, lets try again with the full transaction */ | ||
| 4313 | btrfs_end_transaction(trans, root); | ||
| 4314 | trans = btrfs_start_transaction(root, 1); | ||
| 4315 | if (IS_ERR(trans)) { | ||
| 4316 | if (printk_ratelimit()) { | ||
| 4317 | printk(KERN_ERR "btrfs: fail to " | ||
| 4318 | "dirty inode %lu error %ld\n", | ||
| 4319 | inode->i_ino, PTR_ERR(trans)); | ||
| 4320 | } | ||
| 4321 | return; | ||
| 4322 | } | ||
| 4323 | btrfs_set_trans_block_group(trans, inode); | ||
| 4324 | |||
| 4325 | ret = btrfs_update_inode(trans, root, inode); | ||
| 4326 | if (ret) { | ||
| 4327 | if (printk_ratelimit()) { | ||
| 4328 | printk(KERN_ERR "btrfs: fail to " | ||
| 4329 | "dirty inode %lu error %d\n", | ||
| 4330 | inode->i_ino, ret); | ||
| 4331 | } | ||
| 4332 | } | ||
| 4333 | } | ||
| 3978 | btrfs_end_transaction(trans, root); | 4334 | btrfs_end_transaction(trans, root); |
| 3979 | } | 4335 | } |
| 3980 | 4336 | ||
| @@ -4092,7 +4448,6 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans, | |||
| 4092 | * btrfs_get_inode_index_count has an explanation for the magic | 4448 | * btrfs_get_inode_index_count has an explanation for the magic |
| 4093 | * number | 4449 | * number |
| 4094 | */ | 4450 | */ |
| 4095 | init_btrfs_i(inode); | ||
| 4096 | BTRFS_I(inode)->index_cnt = 2; | 4451 | BTRFS_I(inode)->index_cnt = 2; |
| 4097 | BTRFS_I(inode)->root = root; | 4452 | BTRFS_I(inode)->root = root; |
| 4098 | BTRFS_I(inode)->generation = trans->transid; | 4453 | BTRFS_I(inode)->generation = trans->transid; |
| @@ -4247,26 +4602,21 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
| 4247 | if (!new_valid_dev(rdev)) | 4602 | if (!new_valid_dev(rdev)) |
| 4248 | return -EINVAL; | 4603 | return -EINVAL; |
| 4249 | 4604 | ||
| 4605 | err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); | ||
| 4606 | if (err) | ||
| 4607 | return err; | ||
| 4608 | |||
| 4250 | /* | 4609 | /* |
| 4251 | * 2 for inode item and ref | 4610 | * 2 for inode item and ref |
| 4252 | * 2 for dir items | 4611 | * 2 for dir items |
| 4253 | * 1 for xattr if selinux is on | 4612 | * 1 for xattr if selinux is on |
| 4254 | */ | 4613 | */ |
| 4255 | err = btrfs_reserve_metadata_space(root, 5); | 4614 | trans = btrfs_start_transaction(root, 5); |
| 4256 | if (err) | 4615 | if (IS_ERR(trans)) |
| 4257 | return err; | 4616 | return PTR_ERR(trans); |
| 4258 | 4617 | ||
| 4259 | trans = btrfs_start_transaction(root, 1); | ||
| 4260 | if (!trans) | ||
| 4261 | goto fail; | ||
| 4262 | btrfs_set_trans_block_group(trans, dir); | 4618 | btrfs_set_trans_block_group(trans, dir); |
| 4263 | 4619 | ||
| 4264 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | ||
| 4265 | if (err) { | ||
| 4266 | err = -ENOSPC; | ||
| 4267 | goto out_unlock; | ||
| 4268 | } | ||
| 4269 | |||
| 4270 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4620 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
| 4271 | dentry->d_name.len, | 4621 | dentry->d_name.len, |
| 4272 | dentry->d_parent->d_inode->i_ino, objectid, | 4622 | dentry->d_parent->d_inode->i_ino, objectid, |
| @@ -4295,13 +4645,11 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, | |||
| 4295 | out_unlock: | 4645 | out_unlock: |
| 4296 | nr = trans->blocks_used; | 4646 | nr = trans->blocks_used; |
| 4297 | btrfs_end_transaction_throttle(trans, root); | 4647 | btrfs_end_transaction_throttle(trans, root); |
| 4298 | fail: | 4648 | btrfs_btree_balance_dirty(root, nr); |
| 4299 | btrfs_unreserve_metadata_space(root, 5); | ||
| 4300 | if (drop_inode) { | 4649 | if (drop_inode) { |
| 4301 | inode_dec_link_count(inode); | 4650 | inode_dec_link_count(inode); |
| 4302 | iput(inode); | 4651 | iput(inode); |
| 4303 | } | 4652 | } |
| 4304 | btrfs_btree_balance_dirty(root, nr); | ||
| 4305 | return err; | 4653 | return err; |
| 4306 | } | 4654 | } |
| 4307 | 4655 | ||
| @@ -4311,32 +4659,26 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
| 4311 | struct btrfs_trans_handle *trans; | 4659 | struct btrfs_trans_handle *trans; |
| 4312 | struct btrfs_root *root = BTRFS_I(dir)->root; | 4660 | struct btrfs_root *root = BTRFS_I(dir)->root; |
| 4313 | struct inode *inode = NULL; | 4661 | struct inode *inode = NULL; |
| 4314 | int err; | ||
| 4315 | int drop_inode = 0; | 4662 | int drop_inode = 0; |
| 4663 | int err; | ||
| 4316 | unsigned long nr = 0; | 4664 | unsigned long nr = 0; |
| 4317 | u64 objectid; | 4665 | u64 objectid; |
| 4318 | u64 index = 0; | 4666 | u64 index = 0; |
| 4319 | 4667 | ||
| 4668 | err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); | ||
| 4669 | if (err) | ||
| 4670 | return err; | ||
| 4320 | /* | 4671 | /* |
| 4321 | * 2 for inode item and ref | 4672 | * 2 for inode item and ref |
| 4322 | * 2 for dir items | 4673 | * 2 for dir items |
| 4323 | * 1 for xattr if selinux is on | 4674 | * 1 for xattr if selinux is on |
| 4324 | */ | 4675 | */ |
| 4325 | err = btrfs_reserve_metadata_space(root, 5); | 4676 | trans = btrfs_start_transaction(root, 5); |
| 4326 | if (err) | 4677 | if (IS_ERR(trans)) |
| 4327 | return err; | 4678 | return PTR_ERR(trans); |
| 4328 | 4679 | ||
| 4329 | trans = btrfs_start_transaction(root, 1); | ||
| 4330 | if (!trans) | ||
| 4331 | goto fail; | ||
| 4332 | btrfs_set_trans_block_group(trans, dir); | 4680 | btrfs_set_trans_block_group(trans, dir); |
| 4333 | 4681 | ||
| 4334 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | ||
| 4335 | if (err) { | ||
| 4336 | err = -ENOSPC; | ||
| 4337 | goto out_unlock; | ||
| 4338 | } | ||
| 4339 | |||
| 4340 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4682 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
| 4341 | dentry->d_name.len, | 4683 | dentry->d_name.len, |
| 4342 | dentry->d_parent->d_inode->i_ino, | 4684 | dentry->d_parent->d_inode->i_ino, |
| @@ -4368,8 +4710,6 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry, | |||
| 4368 | out_unlock: | 4710 | out_unlock: |
| 4369 | nr = trans->blocks_used; | 4711 | nr = trans->blocks_used; |
| 4370 | btrfs_end_transaction_throttle(trans, root); | 4712 | btrfs_end_transaction_throttle(trans, root); |
| 4371 | fail: | ||
| 4372 | btrfs_unreserve_metadata_space(root, 5); | ||
| 4373 | if (drop_inode) { | 4713 | if (drop_inode) { |
| 4374 | inode_dec_link_count(inode); | 4714 | inode_dec_link_count(inode); |
| 4375 | iput(inode); | 4715 | iput(inode); |
| @@ -4396,21 +4736,21 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
| 4396 | if (root->objectid != BTRFS_I(inode)->root->objectid) | 4736 | if (root->objectid != BTRFS_I(inode)->root->objectid) |
| 4397 | return -EPERM; | 4737 | return -EPERM; |
| 4398 | 4738 | ||
| 4399 | /* | ||
| 4400 | * 1 item for inode ref | ||
| 4401 | * 2 items for dir items | ||
| 4402 | */ | ||
| 4403 | err = btrfs_reserve_metadata_space(root, 3); | ||
| 4404 | if (err) | ||
| 4405 | return err; | ||
| 4406 | |||
| 4407 | btrfs_inc_nlink(inode); | 4739 | btrfs_inc_nlink(inode); |
| 4408 | 4740 | ||
| 4409 | err = btrfs_set_inode_index(dir, &index); | 4741 | err = btrfs_set_inode_index(dir, &index); |
| 4410 | if (err) | 4742 | if (err) |
| 4411 | goto fail; | 4743 | goto fail; |
| 4412 | 4744 | ||
| 4413 | trans = btrfs_start_transaction(root, 1); | 4745 | /* |
| 4746 | * 1 item for inode ref | ||
| 4747 | * 2 items for dir items | ||
| 4748 | */ | ||
| 4749 | trans = btrfs_start_transaction(root, 3); | ||
| 4750 | if (IS_ERR(trans)) { | ||
| 4751 | err = PTR_ERR(trans); | ||
| 4752 | goto fail; | ||
| 4753 | } | ||
| 4414 | 4754 | ||
| 4415 | btrfs_set_trans_block_group(trans, dir); | 4755 | btrfs_set_trans_block_group(trans, dir); |
| 4416 | atomic_inc(&inode->i_count); | 4756 | atomic_inc(&inode->i_count); |
| @@ -4429,7 +4769,6 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, | |||
| 4429 | nr = trans->blocks_used; | 4769 | nr = trans->blocks_used; |
| 4430 | btrfs_end_transaction_throttle(trans, root); | 4770 | btrfs_end_transaction_throttle(trans, root); |
| 4431 | fail: | 4771 | fail: |
| 4432 | btrfs_unreserve_metadata_space(root, 3); | ||
| 4433 | if (drop_inode) { | 4772 | if (drop_inode) { |
| 4434 | inode_dec_link_count(inode); | 4773 | inode_dec_link_count(inode); |
| 4435 | iput(inode); | 4774 | iput(inode); |
| @@ -4449,28 +4788,20 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
| 4449 | u64 index = 0; | 4788 | u64 index = 0; |
| 4450 | unsigned long nr = 1; | 4789 | unsigned long nr = 1; |
| 4451 | 4790 | ||
| 4791 | err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); | ||
| 4792 | if (err) | ||
| 4793 | return err; | ||
| 4794 | |||
| 4452 | /* | 4795 | /* |
| 4453 | * 2 items for inode and ref | 4796 | * 2 items for inode and ref |
| 4454 | * 2 items for dir items | 4797 | * 2 items for dir items |
| 4455 | * 1 for xattr if selinux is on | 4798 | * 1 for xattr if selinux is on |
| 4456 | */ | 4799 | */ |
| 4457 | err = btrfs_reserve_metadata_space(root, 5); | 4800 | trans = btrfs_start_transaction(root, 5); |
| 4458 | if (err) | 4801 | if (IS_ERR(trans)) |
| 4459 | return err; | 4802 | return PTR_ERR(trans); |
| 4460 | |||
| 4461 | trans = btrfs_start_transaction(root, 1); | ||
| 4462 | if (!trans) { | ||
| 4463 | err = -ENOMEM; | ||
| 4464 | goto out_unlock; | ||
| 4465 | } | ||
| 4466 | btrfs_set_trans_block_group(trans, dir); | 4803 | btrfs_set_trans_block_group(trans, dir); |
| 4467 | 4804 | ||
| 4468 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | ||
| 4469 | if (err) { | ||
| 4470 | err = -ENOSPC; | ||
| 4471 | goto out_fail; | ||
| 4472 | } | ||
| 4473 | |||
| 4474 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 4805 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
| 4475 | dentry->d_name.len, | 4806 | dentry->d_name.len, |
| 4476 | dentry->d_parent->d_inode->i_ino, objectid, | 4807 | dentry->d_parent->d_inode->i_ino, objectid, |
| @@ -4510,9 +4841,6 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode) | |||
| 4510 | out_fail: | 4841 | out_fail: |
| 4511 | nr = trans->blocks_used; | 4842 | nr = trans->blocks_used; |
| 4512 | btrfs_end_transaction_throttle(trans, root); | 4843 | btrfs_end_transaction_throttle(trans, root); |
| 4513 | |||
| 4514 | out_unlock: | ||
| 4515 | btrfs_unreserve_metadata_space(root, 5); | ||
| 4516 | if (drop_on_err) | 4844 | if (drop_on_err) |
| 4517 | iput(inode); | 4845 | iput(inode); |
| 4518 | btrfs_btree_balance_dirty(root, nr); | 4846 | btrfs_btree_balance_dirty(root, nr); |
| @@ -4770,6 +5098,7 @@ again: | |||
| 4770 | } | 5098 | } |
| 4771 | flush_dcache_page(page); | 5099 | flush_dcache_page(page); |
| 4772 | } else if (create && PageUptodate(page)) { | 5100 | } else if (create && PageUptodate(page)) { |
| 5101 | WARN_ON(1); | ||
| 4773 | if (!trans) { | 5102 | if (!trans) { |
| 4774 | kunmap(page); | 5103 | kunmap(page); |
| 4775 | free_extent_map(em); | 5104 | free_extent_map(em); |
| @@ -4866,11 +5195,651 @@ out: | |||
| 4866 | return em; | 5195 | return em; |
| 4867 | } | 5196 | } |
| 4868 | 5197 | ||
| 5198 | static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | ||
| 5199 | u64 start, u64 len) | ||
| 5200 | { | ||
| 5201 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 5202 | struct btrfs_trans_handle *trans; | ||
| 5203 | struct extent_map *em; | ||
| 5204 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
| 5205 | struct btrfs_key ins; | ||
| 5206 | u64 alloc_hint; | ||
| 5207 | int ret; | ||
| 5208 | |||
| 5209 | btrfs_drop_extent_cache(inode, start, start + len - 1, 0); | ||
| 5210 | |||
| 5211 | trans = btrfs_join_transaction(root, 0); | ||
| 5212 | if (!trans) | ||
| 5213 | return ERR_PTR(-ENOMEM); | ||
| 5214 | |||
| 5215 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
| 5216 | |||
| 5217 | alloc_hint = get_extent_allocation_hint(inode, start, len); | ||
| 5218 | ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0, | ||
| 5219 | alloc_hint, (u64)-1, &ins, 1); | ||
| 5220 | if (ret) { | ||
| 5221 | em = ERR_PTR(ret); | ||
| 5222 | goto out; | ||
| 5223 | } | ||
| 5224 | |||
| 5225 | em = alloc_extent_map(GFP_NOFS); | ||
| 5226 | if (!em) { | ||
| 5227 | em = ERR_PTR(-ENOMEM); | ||
| 5228 | goto out; | ||
| 5229 | } | ||
| 5230 | |||
| 5231 | em->start = start; | ||
| 5232 | em->orig_start = em->start; | ||
| 5233 | em->len = ins.offset; | ||
| 5234 | |||
| 5235 | em->block_start = ins.objectid; | ||
| 5236 | em->block_len = ins.offset; | ||
| 5237 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
| 5238 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
| 5239 | |||
| 5240 | while (1) { | ||
| 5241 | write_lock(&em_tree->lock); | ||
| 5242 | ret = add_extent_mapping(em_tree, em); | ||
| 5243 | write_unlock(&em_tree->lock); | ||
| 5244 | if (ret != -EEXIST) | ||
| 5245 | break; | ||
| 5246 | btrfs_drop_extent_cache(inode, start, start + em->len - 1, 0); | ||
| 5247 | } | ||
| 5248 | |||
| 5249 | ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, | ||
| 5250 | ins.offset, ins.offset, 0); | ||
| 5251 | if (ret) { | ||
| 5252 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset); | ||
| 5253 | em = ERR_PTR(ret); | ||
| 5254 | } | ||
| 5255 | out: | ||
| 5256 | btrfs_end_transaction(trans, root); | ||
| 5257 | return em; | ||
| 5258 | } | ||
| 5259 | |||
| 5260 | /* | ||
| 5261 | * returns 1 when the nocow is safe, < 1 on error, 0 if the | ||
| 5262 | * block must be cow'd | ||
| 5263 | */ | ||
| 5264 | static noinline int can_nocow_odirect(struct btrfs_trans_handle *trans, | ||
| 5265 | struct inode *inode, u64 offset, u64 len) | ||
| 5266 | { | ||
| 5267 | struct btrfs_path *path; | ||
| 5268 | int ret; | ||
| 5269 | struct extent_buffer *leaf; | ||
| 5270 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 5271 | struct btrfs_file_extent_item *fi; | ||
| 5272 | struct btrfs_key key; | ||
| 5273 | u64 disk_bytenr; | ||
| 5274 | u64 backref_offset; | ||
| 5275 | u64 extent_end; | ||
| 5276 | u64 num_bytes; | ||
| 5277 | int slot; | ||
| 5278 | int found_type; | ||
| 5279 | |||
| 5280 | path = btrfs_alloc_path(); | ||
| 5281 | if (!path) | ||
| 5282 | return -ENOMEM; | ||
| 5283 | |||
| 5284 | ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino, | ||
| 5285 | offset, 0); | ||
| 5286 | if (ret < 0) | ||
| 5287 | goto out; | ||
| 5288 | |||
| 5289 | slot = path->slots[0]; | ||
| 5290 | if (ret == 1) { | ||
| 5291 | if (slot == 0) { | ||
| 5292 | /* can't find the item, must cow */ | ||
| 5293 | ret = 0; | ||
| 5294 | goto out; | ||
| 5295 | } | ||
| 5296 | slot--; | ||
| 5297 | } | ||
| 5298 | ret = 0; | ||
| 5299 | leaf = path->nodes[0]; | ||
| 5300 | btrfs_item_key_to_cpu(leaf, &key, slot); | ||
| 5301 | if (key.objectid != inode->i_ino || | ||
| 5302 | key.type != BTRFS_EXTENT_DATA_KEY) { | ||
| 5303 | /* not our file or wrong item type, must cow */ | ||
| 5304 | goto out; | ||
| 5305 | } | ||
| 5306 | |||
| 5307 | if (key.offset > offset) { | ||
| 5308 | /* Wrong offset, must cow */ | ||
| 5309 | goto out; | ||
| 5310 | } | ||
| 5311 | |||
| 5312 | fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item); | ||
| 5313 | found_type = btrfs_file_extent_type(leaf, fi); | ||
| 5314 | if (found_type != BTRFS_FILE_EXTENT_REG && | ||
| 5315 | found_type != BTRFS_FILE_EXTENT_PREALLOC) { | ||
| 5316 | /* not a regular extent, must cow */ | ||
| 5317 | goto out; | ||
| 5318 | } | ||
| 5319 | disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
| 5320 | backref_offset = btrfs_file_extent_offset(leaf, fi); | ||
| 5321 | |||
| 5322 | extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi); | ||
| 5323 | if (extent_end < offset + len) { | ||
| 5324 | /* extent doesn't include our full range, must cow */ | ||
| 5325 | goto out; | ||
| 5326 | } | ||
| 5327 | |||
| 5328 | if (btrfs_extent_readonly(root, disk_bytenr)) | ||
| 5329 | goto out; | ||
| 5330 | |||
| 5331 | /* | ||
| 5332 | * look for other files referencing this extent, if we | ||
| 5333 | * find any we must cow | ||
| 5334 | */ | ||
| 5335 | if (btrfs_cross_ref_exist(trans, root, inode->i_ino, | ||
| 5336 | key.offset - backref_offset, disk_bytenr)) | ||
| 5337 | goto out; | ||
| 5338 | |||
| 5339 | /* | ||
| 5340 | * adjust disk_bytenr and num_bytes to cover just the bytes | ||
| 5341 | * in this extent we are about to write. If there | ||
| 5342 | * are any csums in that range we have to cow in order | ||
| 5343 | * to keep the csums correct | ||
| 5344 | */ | ||
| 5345 | disk_bytenr += backref_offset; | ||
| 5346 | disk_bytenr += offset - key.offset; | ||
| 5347 | num_bytes = min(offset + len, extent_end) - offset; | ||
| 5348 | if (csum_exist_in_range(root, disk_bytenr, num_bytes)) | ||
| 5349 | goto out; | ||
| 5350 | /* | ||
| 5351 | * all of the above have passed, it is safe to overwrite this extent | ||
| 5352 | * without cow | ||
| 5353 | */ | ||
| 5354 | ret = 1; | ||
| 5355 | out: | ||
| 5356 | btrfs_free_path(path); | ||
| 5357 | return ret; | ||
| 5358 | } | ||
| 5359 | |||
| 5360 | static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | ||
| 5361 | struct buffer_head *bh_result, int create) | ||
| 5362 | { | ||
| 5363 | struct extent_map *em; | ||
| 5364 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 5365 | u64 start = iblock << inode->i_blkbits; | ||
| 5366 | u64 len = bh_result->b_size; | ||
| 5367 | struct btrfs_trans_handle *trans; | ||
| 5368 | |||
| 5369 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); | ||
| 5370 | if (IS_ERR(em)) | ||
| 5371 | return PTR_ERR(em); | ||
| 5372 | |||
| 5373 | /* | ||
| 5374 | * Ok for INLINE and COMPRESSED extents we need to fallback on buffered | ||
| 5375 | * io. INLINE is special, and we could probably kludge it in here, but | ||
| 5376 | * it's still buffered so for safety lets just fall back to the generic | ||
| 5377 | * buffered path. | ||
| 5378 | * | ||
| 5379 | * For COMPRESSED we _have_ to read the entire extent in so we can | ||
| 5380 | * decompress it, so there will be buffering required no matter what we | ||
| 5381 | * do, so go ahead and fallback to buffered. | ||
| 5382 | * | ||
| 5383 | * We return -ENOTBLK because thats what makes DIO go ahead and go back | ||
| 5384 | * to buffered IO. Don't blame me, this is the price we pay for using | ||
| 5385 | * the generic code. | ||
| 5386 | */ | ||
| 5387 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || | ||
| 5388 | em->block_start == EXTENT_MAP_INLINE) { | ||
| 5389 | free_extent_map(em); | ||
| 5390 | return -ENOTBLK; | ||
| 5391 | } | ||
| 5392 | |||
| 5393 | /* Just a good old fashioned hole, return */ | ||
| 5394 | if (!create && (em->block_start == EXTENT_MAP_HOLE || | ||
| 5395 | test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | ||
| 5396 | free_extent_map(em); | ||
| 5397 | /* DIO will do one hole at a time, so just unlock a sector */ | ||
| 5398 | unlock_extent(&BTRFS_I(inode)->io_tree, start, | ||
| 5399 | start + root->sectorsize - 1, GFP_NOFS); | ||
| 5400 | return 0; | ||
| 5401 | } | ||
| 5402 | |||
| 5403 | /* | ||
| 5404 | * We don't allocate a new extent in the following cases | ||
| 5405 | * | ||
| 5406 | * 1) The inode is marked as NODATACOW. In this case we'll just use the | ||
| 5407 | * existing extent. | ||
| 5408 | * 2) The extent is marked as PREALLOC. We're good to go here and can | ||
| 5409 | * just use the extent. | ||
| 5410 | * | ||
| 5411 | */ | ||
| 5412 | if (!create) { | ||
| 5413 | len = em->len - (start - em->start); | ||
| 5414 | goto map; | ||
| 5415 | } | ||
| 5416 | |||
| 5417 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || | ||
| 5418 | ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) && | ||
| 5419 | em->block_start != EXTENT_MAP_HOLE)) { | ||
| 5420 | int type; | ||
| 5421 | int ret; | ||
| 5422 | u64 block_start; | ||
| 5423 | |||
| 5424 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) | ||
| 5425 | type = BTRFS_ORDERED_PREALLOC; | ||
| 5426 | else | ||
| 5427 | type = BTRFS_ORDERED_NOCOW; | ||
| 5428 | len = min(len, em->len - (start - em->start)); | ||
| 5429 | block_start = em->block_start + (start - em->start); | ||
| 5430 | |||
| 5431 | /* | ||
| 5432 | * we're not going to log anything, but we do need | ||
| 5433 | * to make sure the current transaction stays open | ||
| 5434 | * while we look for nocow cross refs | ||
| 5435 | */ | ||
| 5436 | trans = btrfs_join_transaction(root, 0); | ||
| 5437 | if (!trans) | ||
| 5438 | goto must_cow; | ||
| 5439 | |||
| 5440 | if (can_nocow_odirect(trans, inode, start, len) == 1) { | ||
| 5441 | ret = btrfs_add_ordered_extent_dio(inode, start, | ||
| 5442 | block_start, len, len, type); | ||
| 5443 | btrfs_end_transaction(trans, root); | ||
| 5444 | if (ret) { | ||
| 5445 | free_extent_map(em); | ||
| 5446 | return ret; | ||
| 5447 | } | ||
| 5448 | goto unlock; | ||
| 5449 | } | ||
| 5450 | btrfs_end_transaction(trans, root); | ||
| 5451 | } | ||
| 5452 | must_cow: | ||
| 5453 | /* | ||
| 5454 | * this will cow the extent, reset the len in case we changed | ||
| 5455 | * it above | ||
| 5456 | */ | ||
| 5457 | len = bh_result->b_size; | ||
| 5458 | free_extent_map(em); | ||
| 5459 | em = btrfs_new_extent_direct(inode, start, len); | ||
| 5460 | if (IS_ERR(em)) | ||
| 5461 | return PTR_ERR(em); | ||
| 5462 | len = min(len, em->len - (start - em->start)); | ||
| 5463 | unlock: | ||
| 5464 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1, | ||
| 5465 | EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1, | ||
| 5466 | 0, NULL, GFP_NOFS); | ||
| 5467 | map: | ||
| 5468 | bh_result->b_blocknr = (em->block_start + (start - em->start)) >> | ||
| 5469 | inode->i_blkbits; | ||
| 5470 | bh_result->b_size = len; | ||
| 5471 | bh_result->b_bdev = em->bdev; | ||
| 5472 | set_buffer_mapped(bh_result); | ||
| 5473 | if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) | ||
| 5474 | set_buffer_new(bh_result); | ||
| 5475 | |||
| 5476 | free_extent_map(em); | ||
| 5477 | |||
| 5478 | return 0; | ||
| 5479 | } | ||
| 5480 | |||
| 5481 | struct btrfs_dio_private { | ||
| 5482 | struct inode *inode; | ||
| 5483 | u64 logical_offset; | ||
| 5484 | u64 disk_bytenr; | ||
| 5485 | u64 bytes; | ||
| 5486 | u32 *csums; | ||
| 5487 | void *private; | ||
| 5488 | }; | ||
| 5489 | |||
| 5490 | static void btrfs_endio_direct_read(struct bio *bio, int err) | ||
| 5491 | { | ||
| 5492 | struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
| 5493 | struct bio_vec *bvec = bio->bi_io_vec; | ||
| 5494 | struct btrfs_dio_private *dip = bio->bi_private; | ||
| 5495 | struct inode *inode = dip->inode; | ||
| 5496 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 5497 | u64 start; | ||
| 5498 | u32 *private = dip->csums; | ||
| 5499 | |||
| 5500 | start = dip->logical_offset; | ||
| 5501 | do { | ||
| 5502 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { | ||
| 5503 | struct page *page = bvec->bv_page; | ||
| 5504 | char *kaddr; | ||
| 5505 | u32 csum = ~(u32)0; | ||
| 5506 | unsigned long flags; | ||
| 5507 | |||
| 5508 | local_irq_save(flags); | ||
| 5509 | kaddr = kmap_atomic(page, KM_IRQ0); | ||
| 5510 | csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, | ||
| 5511 | csum, bvec->bv_len); | ||
| 5512 | btrfs_csum_final(csum, (char *)&csum); | ||
| 5513 | kunmap_atomic(kaddr, KM_IRQ0); | ||
| 5514 | local_irq_restore(flags); | ||
| 5515 | |||
| 5516 | flush_dcache_page(bvec->bv_page); | ||
| 5517 | if (csum != *private) { | ||
| 5518 | printk(KERN_ERR "btrfs csum failed ino %lu off" | ||
| 5519 | " %llu csum %u private %u\n", | ||
| 5520 | inode->i_ino, (unsigned long long)start, | ||
| 5521 | csum, *private); | ||
| 5522 | err = -EIO; | ||
| 5523 | } | ||
| 5524 | } | ||
| 5525 | |||
| 5526 | start += bvec->bv_len; | ||
| 5527 | private++; | ||
| 5528 | bvec++; | ||
| 5529 | } while (bvec <= bvec_end); | ||
| 5530 | |||
| 5531 | unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, | ||
| 5532 | dip->logical_offset + dip->bytes - 1, GFP_NOFS); | ||
| 5533 | bio->bi_private = dip->private; | ||
| 5534 | |||
| 5535 | kfree(dip->csums); | ||
| 5536 | kfree(dip); | ||
| 5537 | dio_end_io(bio, err); | ||
| 5538 | } | ||
| 5539 | |||
| 5540 | static void btrfs_endio_direct_write(struct bio *bio, int err) | ||
| 5541 | { | ||
| 5542 | struct btrfs_dio_private *dip = bio->bi_private; | ||
| 5543 | struct inode *inode = dip->inode; | ||
| 5544 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 5545 | struct btrfs_trans_handle *trans; | ||
| 5546 | struct btrfs_ordered_extent *ordered = NULL; | ||
| 5547 | struct extent_state *cached_state = NULL; | ||
| 5548 | int ret; | ||
| 5549 | |||
| 5550 | if (err) | ||
| 5551 | goto out_done; | ||
| 5552 | |||
| 5553 | ret = btrfs_dec_test_ordered_pending(inode, &ordered, | ||
| 5554 | dip->logical_offset, dip->bytes); | ||
| 5555 | if (!ret) | ||
| 5556 | goto out_done; | ||
| 5557 | |||
| 5558 | BUG_ON(!ordered); | ||
| 5559 | |||
| 5560 | trans = btrfs_join_transaction(root, 1); | ||
| 5561 | if (!trans) { | ||
| 5562 | err = -ENOMEM; | ||
| 5563 | goto out; | ||
| 5564 | } | ||
| 5565 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
| 5566 | |||
| 5567 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { | ||
| 5568 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); | ||
| 5569 | if (!ret) | ||
| 5570 | ret = btrfs_update_inode(trans, root, inode); | ||
| 5571 | err = ret; | ||
| 5572 | goto out; | ||
| 5573 | } | ||
| 5574 | |||
| 5575 | lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset, | ||
| 5576 | ordered->file_offset + ordered->len - 1, 0, | ||
| 5577 | &cached_state, GFP_NOFS); | ||
| 5578 | |||
| 5579 | if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) { | ||
| 5580 | ret = btrfs_mark_extent_written(trans, inode, | ||
| 5581 | ordered->file_offset, | ||
| 5582 | ordered->file_offset + | ||
| 5583 | ordered->len); | ||
| 5584 | if (ret) { | ||
| 5585 | err = ret; | ||
| 5586 | goto out_unlock; | ||
| 5587 | } | ||
| 5588 | } else { | ||
| 5589 | ret = insert_reserved_file_extent(trans, inode, | ||
| 5590 | ordered->file_offset, | ||
| 5591 | ordered->start, | ||
| 5592 | ordered->disk_len, | ||
| 5593 | ordered->len, | ||
| 5594 | ordered->len, | ||
| 5595 | 0, 0, 0, | ||
| 5596 | BTRFS_FILE_EXTENT_REG); | ||
| 5597 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | ||
| 5598 | ordered->file_offset, ordered->len); | ||
| 5599 | if (ret) { | ||
| 5600 | err = ret; | ||
| 5601 | WARN_ON(1); | ||
| 5602 | goto out_unlock; | ||
| 5603 | } | ||
| 5604 | } | ||
| 5605 | |||
| 5606 | add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); | ||
| 5607 | btrfs_ordered_update_i_size(inode, 0, ordered); | ||
| 5608 | btrfs_update_inode(trans, root, inode); | ||
| 5609 | out_unlock: | ||
| 5610 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, | ||
| 5611 | ordered->file_offset + ordered->len - 1, | ||
| 5612 | &cached_state, GFP_NOFS); | ||
| 5613 | out: | ||
| 5614 | btrfs_delalloc_release_metadata(inode, ordered->len); | ||
| 5615 | btrfs_end_transaction(trans, root); | ||
| 5616 | btrfs_put_ordered_extent(ordered); | ||
| 5617 | btrfs_put_ordered_extent(ordered); | ||
| 5618 | out_done: | ||
| 5619 | bio->bi_private = dip->private; | ||
| 5620 | |||
| 5621 | kfree(dip->csums); | ||
| 5622 | kfree(dip); | ||
| 5623 | dio_end_io(bio, err); | ||
| 5624 | } | ||
| 5625 | |||
| 5626 | static int __btrfs_submit_bio_start_direct_io(struct inode *inode, int rw, | ||
| 5627 | struct bio *bio, int mirror_num, | ||
| 5628 | unsigned long bio_flags, u64 offset) | ||
| 5629 | { | ||
| 5630 | int ret; | ||
| 5631 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 5632 | ret = btrfs_csum_one_bio(root, inode, bio, offset, 1); | ||
| 5633 | BUG_ON(ret); | ||
| 5634 | return 0; | ||
| 5635 | } | ||
| 5636 | |||
| 5637 | static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, | ||
| 5638 | loff_t file_offset) | ||
| 5639 | { | ||
| 5640 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
| 5641 | struct btrfs_dio_private *dip; | ||
| 5642 | struct bio_vec *bvec = bio->bi_io_vec; | ||
| 5643 | u64 start; | ||
| 5644 | int skip_sum; | ||
| 5645 | int write = rw & (1 << BIO_RW); | ||
| 5646 | int ret = 0; | ||
| 5647 | |||
| 5648 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | ||
| 5649 | |||
| 5650 | dip = kmalloc(sizeof(*dip), GFP_NOFS); | ||
| 5651 | if (!dip) { | ||
| 5652 | ret = -ENOMEM; | ||
| 5653 | goto free_ordered; | ||
| 5654 | } | ||
| 5655 | dip->csums = NULL; | ||
| 5656 | |||
| 5657 | if (!skip_sum) { | ||
| 5658 | dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); | ||
| 5659 | if (!dip->csums) { | ||
| 5660 | ret = -ENOMEM; | ||
| 5661 | goto free_ordered; | ||
| 5662 | } | ||
| 5663 | } | ||
| 5664 | |||
| 5665 | dip->private = bio->bi_private; | ||
| 5666 | dip->inode = inode; | ||
| 5667 | dip->logical_offset = file_offset; | ||
| 5668 | |||
| 5669 | start = dip->logical_offset; | ||
| 5670 | dip->bytes = 0; | ||
| 5671 | do { | ||
| 5672 | dip->bytes += bvec->bv_len; | ||
| 5673 | bvec++; | ||
| 5674 | } while (bvec <= (bio->bi_io_vec + bio->bi_vcnt - 1)); | ||
| 5675 | |||
| 5676 | dip->disk_bytenr = (u64)bio->bi_sector << 9; | ||
| 5677 | bio->bi_private = dip; | ||
| 5678 | |||
| 5679 | if (write) | ||
| 5680 | bio->bi_end_io = btrfs_endio_direct_write; | ||
| 5681 | else | ||
| 5682 | bio->bi_end_io = btrfs_endio_direct_read; | ||
| 5683 | |||
| 5684 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | ||
| 5685 | if (ret) | ||
| 5686 | goto out_err; | ||
| 5687 | |||
| 5688 | if (write && !skip_sum) { | ||
| 5689 | ret = btrfs_wq_submit_bio(BTRFS_I(inode)->root->fs_info, | ||
| 5690 | inode, rw, bio, 0, 0, | ||
| 5691 | dip->logical_offset, | ||
| 5692 | __btrfs_submit_bio_start_direct_io, | ||
| 5693 | __btrfs_submit_bio_done); | ||
| 5694 | if (ret) | ||
| 5695 | goto out_err; | ||
| 5696 | return; | ||
| 5697 | } else if (!skip_sum) | ||
| 5698 | btrfs_lookup_bio_sums_dio(root, inode, bio, | ||
| 5699 | dip->logical_offset, dip->csums); | ||
| 5700 | |||
| 5701 | ret = btrfs_map_bio(root, rw, bio, 0, 1); | ||
| 5702 | if (ret) | ||
| 5703 | goto out_err; | ||
| 5704 | return; | ||
| 5705 | out_err: | ||
| 5706 | kfree(dip->csums); | ||
| 5707 | kfree(dip); | ||
| 5708 | free_ordered: | ||
| 5709 | /* | ||
| 5710 | * If this is a write, we need to clean up the reserved space and kill | ||
| 5711 | * the ordered extent. | ||
| 5712 | */ | ||
| 5713 | if (write) { | ||
| 5714 | struct btrfs_ordered_extent *ordered; | ||
| 5715 | ordered = btrfs_lookup_ordered_extent(inode, | ||
| 5716 | dip->logical_offset); | ||
| 5717 | if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && | ||
| 5718 | !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) | ||
| 5719 | btrfs_free_reserved_extent(root, ordered->start, | ||
| 5720 | ordered->disk_len); | ||
| 5721 | btrfs_put_ordered_extent(ordered); | ||
| 5722 | btrfs_put_ordered_extent(ordered); | ||
| 5723 | } | ||
| 5724 | bio_endio(bio, ret); | ||
| 5725 | } | ||
| 5726 | |||
| 5727 | static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *iocb, | ||
| 5728 | const struct iovec *iov, loff_t offset, | ||
| 5729 | unsigned long nr_segs) | ||
| 5730 | { | ||
| 5731 | int seg; | ||
| 5732 | size_t size; | ||
| 5733 | unsigned long addr; | ||
| 5734 | unsigned blocksize_mask = root->sectorsize - 1; | ||
| 5735 | ssize_t retval = -EINVAL; | ||
| 5736 | loff_t end = offset; | ||
| 5737 | |||
| 5738 | if (offset & blocksize_mask) | ||
| 5739 | goto out; | ||
| 5740 | |||
| 5741 | /* Check the memory alignment. Blocks cannot straddle pages */ | ||
| 5742 | for (seg = 0; seg < nr_segs; seg++) { | ||
| 5743 | addr = (unsigned long)iov[seg].iov_base; | ||
| 5744 | size = iov[seg].iov_len; | ||
| 5745 | end += size; | ||
| 5746 | if ((addr & blocksize_mask) || (size & blocksize_mask)) | ||
| 5747 | goto out; | ||
| 5748 | } | ||
| 5749 | retval = 0; | ||
| 5750 | out: | ||
| 5751 | return retval; | ||
| 5752 | } | ||
| 4869 | static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | 5753 | static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, |
| 4870 | const struct iovec *iov, loff_t offset, | 5754 | const struct iovec *iov, loff_t offset, |
| 4871 | unsigned long nr_segs) | 5755 | unsigned long nr_segs) |
| 4872 | { | 5756 | { |
| 4873 | return -EINVAL; | 5757 | struct file *file = iocb->ki_filp; |
| 5758 | struct inode *inode = file->f_mapping->host; | ||
| 5759 | struct btrfs_ordered_extent *ordered; | ||
| 5760 | struct extent_state *cached_state = NULL; | ||
| 5761 | u64 lockstart, lockend; | ||
| 5762 | ssize_t ret; | ||
| 5763 | int writing = rw & WRITE; | ||
| 5764 | int write_bits = 0; | ||
| 5765 | size_t count = iov_length(iov, nr_segs); | ||
| 5766 | |||
| 5767 | if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, | ||
| 5768 | offset, nr_segs)) { | ||
| 5769 | return 0; | ||
| 5770 | } | ||
| 5771 | |||
| 5772 | lockstart = offset; | ||
| 5773 | lockend = offset + count - 1; | ||
| 5774 | |||
| 5775 | if (writing) { | ||
| 5776 | ret = btrfs_delalloc_reserve_space(inode, count); | ||
| 5777 | if (ret) | ||
| 5778 | goto out; | ||
| 5779 | } | ||
| 5780 | |||
| 5781 | while (1) { | ||
| 5782 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
| 5783 | 0, &cached_state, GFP_NOFS); | ||
| 5784 | /* | ||
| 5785 | * We're concerned with the entire range that we're going to be | ||
| 5786 | * doing DIO to, so we need to make sure theres no ordered | ||
| 5787 | * extents in this range. | ||
| 5788 | */ | ||
| 5789 | ordered = btrfs_lookup_ordered_range(inode, lockstart, | ||
| 5790 | lockend - lockstart + 1); | ||
| 5791 | if (!ordered) | ||
| 5792 | break; | ||
| 5793 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
| 5794 | &cached_state, GFP_NOFS); | ||
| 5795 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
| 5796 | btrfs_put_ordered_extent(ordered); | ||
| 5797 | cond_resched(); | ||
| 5798 | } | ||
| 5799 | |||
| 5800 | /* | ||
| 5801 | * we don't use btrfs_set_extent_delalloc because we don't want | ||
| 5802 | * the dirty or uptodate bits | ||
| 5803 | */ | ||
| 5804 | if (writing) { | ||
| 5805 | write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING; | ||
| 5806 | ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
| 5807 | EXTENT_DELALLOC, 0, NULL, &cached_state, | ||
| 5808 | GFP_NOFS); | ||
| 5809 | if (ret) { | ||
| 5810 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | ||
| 5811 | lockend, EXTENT_LOCKED | write_bits, | ||
| 5812 | 1, 0, &cached_state, GFP_NOFS); | ||
| 5813 | goto out; | ||
| 5814 | } | ||
| 5815 | } | ||
| 5816 | |||
| 5817 | free_extent_state(cached_state); | ||
| 5818 | cached_state = NULL; | ||
| 5819 | |||
| 5820 | ret = __blockdev_direct_IO(rw, iocb, inode, | ||
| 5821 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, | ||
| 5822 | iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, | ||
| 5823 | btrfs_submit_direct, 0); | ||
| 5824 | |||
| 5825 | if (ret < 0 && ret != -EIOCBQUEUED) { | ||
| 5826 | clear_extent_bit(&BTRFS_I(inode)->io_tree, offset, | ||
| 5827 | offset + iov_length(iov, nr_segs) - 1, | ||
| 5828 | EXTENT_LOCKED | write_bits, 1, 0, | ||
| 5829 | &cached_state, GFP_NOFS); | ||
| 5830 | } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) { | ||
| 5831 | /* | ||
| 5832 | * We're falling back to buffered, unlock the section we didn't | ||
| 5833 | * do IO on. | ||
| 5834 | */ | ||
| 5835 | clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret, | ||
| 5836 | offset + iov_length(iov, nr_segs) - 1, | ||
| 5837 | EXTENT_LOCKED | write_bits, 1, 0, | ||
| 5838 | &cached_state, GFP_NOFS); | ||
| 5839 | } | ||
| 5840 | out: | ||
| 5841 | free_extent_state(cached_state); | ||
| 5842 | return ret; | ||
| 4874 | } | 5843 | } |
| 4875 | 5844 | ||
| 4876 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 5845 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
| @@ -5034,7 +6003,7 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
| 5034 | u64 page_start; | 6003 | u64 page_start; |
| 5035 | u64 page_end; | 6004 | u64 page_end; |
| 5036 | 6005 | ||
| 5037 | ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); | 6006 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
| 5038 | if (ret) { | 6007 | if (ret) { |
| 5039 | if (ret == -ENOMEM) | 6008 | if (ret == -ENOMEM) |
| 5040 | ret = VM_FAULT_OOM; | 6009 | ret = VM_FAULT_OOM; |
| @@ -5043,13 +6012,6 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf) | |||
| 5043 | goto out; | 6012 | goto out; |
| 5044 | } | 6013 | } |
| 5045 | 6014 | ||
| 5046 | ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
| 5047 | if (ret) { | ||
| 5048 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
| 5049 | ret = VM_FAULT_SIGBUS; | ||
| 5050 | goto out; | ||
| 5051 | } | ||
| 5052 | |||
| 5053 | ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ | 6015 | ret = VM_FAULT_NOPAGE; /* make the VM retry the fault */ |
| 5054 | again: | 6016 | again: |
| 5055 | lock_page(page); | 6017 | lock_page(page); |
| @@ -5059,7 +6021,6 @@ again: | |||
| 5059 | 6021 | ||
| 5060 | if ((page->mapping != inode->i_mapping) || | 6022 | if ((page->mapping != inode->i_mapping) || |
| 5061 | (page_start >= size)) { | 6023 | (page_start >= size)) { |
| 5062 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
| 5063 | /* page got truncated out from underneath us */ | 6024 | /* page got truncated out from underneath us */ |
| 5064 | goto out_unlock; | 6025 | goto out_unlock; |
| 5065 | } | 6026 | } |
| @@ -5100,7 +6061,6 @@ again: | |||
| 5100 | unlock_extent_cached(io_tree, page_start, page_end, | 6061 | unlock_extent_cached(io_tree, page_start, page_end, |
| 5101 | &cached_state, GFP_NOFS); | 6062 | &cached_state, GFP_NOFS); |
| 5102 | ret = VM_FAULT_SIGBUS; | 6063 | ret = VM_FAULT_SIGBUS; |
| 5103 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
| 5104 | goto out_unlock; | 6064 | goto out_unlock; |
| 5105 | } | 6065 | } |
| 5106 | ret = 0; | 6066 | ret = 0; |
| @@ -5127,10 +6087,10 @@ again: | |||
| 5127 | unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); | 6087 | unlock_extent_cached(io_tree, page_start, page_end, &cached_state, GFP_NOFS); |
| 5128 | 6088 | ||
| 5129 | out_unlock: | 6089 | out_unlock: |
| 5130 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
| 5131 | if (!ret) | 6090 | if (!ret) |
| 5132 | return VM_FAULT_LOCKED; | 6091 | return VM_FAULT_LOCKED; |
| 5133 | unlock_page(page); | 6092 | unlock_page(page); |
| 6093 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | ||
| 5134 | out: | 6094 | out: |
| 5135 | return ret; | 6095 | return ret; |
| 5136 | } | 6096 | } |
| @@ -5155,8 +6115,10 @@ static void btrfs_truncate(struct inode *inode) | |||
| 5155 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); | 6115 | btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1); |
| 5156 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); | 6116 | btrfs_ordered_update_i_size(inode, inode->i_size, NULL); |
| 5157 | 6117 | ||
| 5158 | trans = btrfs_start_transaction(root, 1); | 6118 | trans = btrfs_start_transaction(root, 0); |
| 6119 | BUG_ON(IS_ERR(trans)); | ||
| 5159 | btrfs_set_trans_block_group(trans, inode); | 6120 | btrfs_set_trans_block_group(trans, inode); |
| 6121 | trans->block_rsv = root->orphan_block_rsv; | ||
| 5160 | 6122 | ||
| 5161 | /* | 6123 | /* |
| 5162 | * setattr is responsible for setting the ordered_data_close flag, | 6124 | * setattr is responsible for setting the ordered_data_close flag, |
| @@ -5179,6 +6141,23 @@ static void btrfs_truncate(struct inode *inode) | |||
| 5179 | btrfs_add_ordered_operation(trans, root, inode); | 6141 | btrfs_add_ordered_operation(trans, root, inode); |
| 5180 | 6142 | ||
| 5181 | while (1) { | 6143 | while (1) { |
| 6144 | if (!trans) { | ||
| 6145 | trans = btrfs_start_transaction(root, 0); | ||
| 6146 | BUG_ON(IS_ERR(trans)); | ||
| 6147 | btrfs_set_trans_block_group(trans, inode); | ||
| 6148 | trans->block_rsv = root->orphan_block_rsv; | ||
| 6149 | } | ||
| 6150 | |||
| 6151 | ret = btrfs_block_rsv_check(trans, root, | ||
| 6152 | root->orphan_block_rsv, 0, 5); | ||
| 6153 | if (ret) { | ||
| 6154 | BUG_ON(ret != -EAGAIN); | ||
| 6155 | ret = btrfs_commit_transaction(trans, root); | ||
| 6156 | BUG_ON(ret); | ||
| 6157 | trans = NULL; | ||
| 6158 | continue; | ||
| 6159 | } | ||
| 6160 | |||
| 5182 | ret = btrfs_truncate_inode_items(trans, root, inode, | 6161 | ret = btrfs_truncate_inode_items(trans, root, inode, |
| 5183 | inode->i_size, | 6162 | inode->i_size, |
| 5184 | BTRFS_EXTENT_DATA_KEY); | 6163 | BTRFS_EXTENT_DATA_KEY); |
| @@ -5190,10 +6169,8 @@ static void btrfs_truncate(struct inode *inode) | |||
| 5190 | 6169 | ||
| 5191 | nr = trans->blocks_used; | 6170 | nr = trans->blocks_used; |
| 5192 | btrfs_end_transaction(trans, root); | 6171 | btrfs_end_transaction(trans, root); |
| 6172 | trans = NULL; | ||
| 5193 | btrfs_btree_balance_dirty(root, nr); | 6173 | btrfs_btree_balance_dirty(root, nr); |
| 5194 | |||
| 5195 | trans = btrfs_start_transaction(root, 1); | ||
| 5196 | btrfs_set_trans_block_group(trans, inode); | ||
| 5197 | } | 6174 | } |
| 5198 | 6175 | ||
| 5199 | if (ret == 0 && inode->i_nlink > 0) { | 6176 | if (ret == 0 && inode->i_nlink > 0) { |
| @@ -5254,21 +6231,47 @@ unsigned long btrfs_force_ra(struct address_space *mapping, | |||
| 5254 | struct inode *btrfs_alloc_inode(struct super_block *sb) | 6231 | struct inode *btrfs_alloc_inode(struct super_block *sb) |
| 5255 | { | 6232 | { |
| 5256 | struct btrfs_inode *ei; | 6233 | struct btrfs_inode *ei; |
| 6234 | struct inode *inode; | ||
| 5257 | 6235 | ||
| 5258 | ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); | 6236 | ei = kmem_cache_alloc(btrfs_inode_cachep, GFP_NOFS); |
| 5259 | if (!ei) | 6237 | if (!ei) |
| 5260 | return NULL; | 6238 | return NULL; |
| 6239 | |||
| 6240 | ei->root = NULL; | ||
| 6241 | ei->space_info = NULL; | ||
| 6242 | ei->generation = 0; | ||
| 6243 | ei->sequence = 0; | ||
| 5261 | ei->last_trans = 0; | 6244 | ei->last_trans = 0; |
| 5262 | ei->last_sub_trans = 0; | 6245 | ei->last_sub_trans = 0; |
| 5263 | ei->logged_trans = 0; | 6246 | ei->logged_trans = 0; |
| 5264 | ei->outstanding_extents = 0; | 6247 | ei->delalloc_bytes = 0; |
| 5265 | ei->reserved_extents = 0; | 6248 | ei->reserved_bytes = 0; |
| 5266 | ei->root = NULL; | 6249 | ei->disk_i_size = 0; |
| 6250 | ei->flags = 0; | ||
| 6251 | ei->index_cnt = (u64)-1; | ||
| 6252 | ei->last_unlink_trans = 0; | ||
| 6253 | |||
| 5267 | spin_lock_init(&ei->accounting_lock); | 6254 | spin_lock_init(&ei->accounting_lock); |
| 6255 | atomic_set(&ei->outstanding_extents, 0); | ||
| 6256 | ei->reserved_extents = 0; | ||
| 6257 | |||
| 6258 | ei->ordered_data_close = 0; | ||
| 6259 | ei->orphan_meta_reserved = 0; | ||
| 6260 | ei->dummy_inode = 0; | ||
| 6261 | ei->force_compress = 0; | ||
| 6262 | |||
| 6263 | inode = &ei->vfs_inode; | ||
| 6264 | extent_map_tree_init(&ei->extent_tree, GFP_NOFS); | ||
| 6265 | extent_io_tree_init(&ei->io_tree, &inode->i_data, GFP_NOFS); | ||
| 6266 | extent_io_tree_init(&ei->io_failure_tree, &inode->i_data, GFP_NOFS); | ||
| 6267 | mutex_init(&ei->log_mutex); | ||
| 5268 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); | 6268 | btrfs_ordered_inode_tree_init(&ei->ordered_tree); |
| 5269 | INIT_LIST_HEAD(&ei->i_orphan); | 6269 | INIT_LIST_HEAD(&ei->i_orphan); |
| 6270 | INIT_LIST_HEAD(&ei->delalloc_inodes); | ||
| 5270 | INIT_LIST_HEAD(&ei->ordered_operations); | 6271 | INIT_LIST_HEAD(&ei->ordered_operations); |
| 5271 | return &ei->vfs_inode; | 6272 | RB_CLEAR_NODE(&ei->rb_node); |
| 6273 | |||
| 6274 | return inode; | ||
| 5272 | } | 6275 | } |
| 5273 | 6276 | ||
| 5274 | void btrfs_destroy_inode(struct inode *inode) | 6277 | void btrfs_destroy_inode(struct inode *inode) |
| @@ -5278,6 +6281,8 @@ void btrfs_destroy_inode(struct inode *inode) | |||
| 5278 | 6281 | ||
| 5279 | WARN_ON(!list_empty(&inode->i_dentry)); | 6282 | WARN_ON(!list_empty(&inode->i_dentry)); |
| 5280 | WARN_ON(inode->i_data.nrpages); | 6283 | WARN_ON(inode->i_data.nrpages); |
| 6284 | WARN_ON(atomic_read(&BTRFS_I(inode)->outstanding_extents)); | ||
| 6285 | WARN_ON(BTRFS_I(inode)->reserved_extents); | ||
| 5281 | 6286 | ||
| 5282 | /* | 6287 | /* |
| 5283 | * This can happen where we create an inode, but somebody else also | 6288 | * This can happen where we create an inode, but somebody else also |
| @@ -5298,13 +6303,13 @@ void btrfs_destroy_inode(struct inode *inode) | |||
| 5298 | spin_unlock(&root->fs_info->ordered_extent_lock); | 6303 | spin_unlock(&root->fs_info->ordered_extent_lock); |
| 5299 | } | 6304 | } |
| 5300 | 6305 | ||
| 5301 | spin_lock(&root->list_lock); | 6306 | spin_lock(&root->orphan_lock); |
| 5302 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { | 6307 | if (!list_empty(&BTRFS_I(inode)->i_orphan)) { |
| 5303 | printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", | 6308 | printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n", |
| 5304 | inode->i_ino); | 6309 | inode->i_ino); |
| 5305 | list_del_init(&BTRFS_I(inode)->i_orphan); | 6310 | list_del_init(&BTRFS_I(inode)->i_orphan); |
| 5306 | } | 6311 | } |
| 5307 | spin_unlock(&root->list_lock); | 6312 | spin_unlock(&root->orphan_lock); |
| 5308 | 6313 | ||
| 5309 | while (1) { | 6314 | while (1) { |
| 5310 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); | 6315 | ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1); |
| @@ -5425,19 +6430,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 5425 | if (S_ISDIR(old_inode->i_mode) && new_inode && | 6430 | if (S_ISDIR(old_inode->i_mode) && new_inode && |
| 5426 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) | 6431 | new_inode->i_size > BTRFS_EMPTY_DIR_SIZE) |
| 5427 | return -ENOTEMPTY; | 6432 | return -ENOTEMPTY; |
| 5428 | |||
| 5429 | /* | ||
| 5430 | * We want to reserve the absolute worst case amount of items. So if | ||
| 5431 | * both inodes are subvols and we need to unlink them then that would | ||
| 5432 | * require 4 item modifications, but if they are both normal inodes it | ||
| 5433 | * would require 5 item modifications, so we'll assume their normal | ||
| 5434 | * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items | ||
| 5435 | * should cover the worst case number of items we'll modify. | ||
| 5436 | */ | ||
| 5437 | ret = btrfs_reserve_metadata_space(root, 11); | ||
| 5438 | if (ret) | ||
| 5439 | return ret; | ||
| 5440 | |||
| 5441 | /* | 6433 | /* |
| 5442 | * we're using rename to replace one file with another. | 6434 | * we're using rename to replace one file with another. |
| 5443 | * and the replacement file is large. Start IO on it now so | 6435 | * and the replacement file is large. Start IO on it now so |
| @@ -5450,8 +6442,18 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
| 5450 | /* close the racy window with snapshot create/destroy ioctl */ | 6442 | /* close the racy window with snapshot create/destroy ioctl */ |
| 5451 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 6443 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) |
| 5452 | down_read(&root->fs_info->subvol_sem); | 6444 | down_read(&root->fs_info->subvol_sem); |
| 6445 | /* | ||
| 6446 | * We want to reserve the absolute worst case amount of items. So if | ||
| 6447 | * both inodes are subvols and we need to unlink them then that would | ||
| 6448 | * require 4 item modifications, but if they are both normal inodes it | ||
| 6449 | * would require 5 item modifications, so we'll assume their normal | ||
| 6450 | * inodes. So 5 * 2 is 10, plus 1 for the new link, so 11 total items | ||
| 6451 | * should cover the worst case number of items we'll modify. | ||
| 6452 | */ | ||
| 6453 | trans = btrfs_start_transaction(root, 20); | ||
| 6454 | if (IS_ERR(trans)) | ||
| 6455 | return PTR_ERR(trans); | ||
| 5453 | 6456 | ||
| 5454 | trans = btrfs_start_transaction(root, 1); | ||
| 5455 | btrfs_set_trans_block_group(trans, new_dir); | 6457 | btrfs_set_trans_block_group(trans, new_dir); |
| 5456 | 6458 | ||
| 5457 | if (dest != root) | 6459 | if (dest != root) |
| @@ -5550,7 +6552,6 @@ out_fail: | |||
| 5550 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) | 6552 | if (old_inode->i_ino == BTRFS_FIRST_FREE_OBJECTID) |
| 5551 | up_read(&root->fs_info->subvol_sem); | 6553 | up_read(&root->fs_info->subvol_sem); |
| 5552 | 6554 | ||
| 5553 | btrfs_unreserve_metadata_space(root, 11); | ||
| 5554 | return ret; | 6555 | return ret; |
| 5555 | } | 6556 | } |
| 5556 | 6557 | ||
| @@ -5602,6 +6603,38 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) | |||
| 5602 | return 0; | 6603 | return 0; |
| 5603 | } | 6604 | } |
| 5604 | 6605 | ||
| 6606 | int btrfs_start_one_delalloc_inode(struct btrfs_root *root, int delay_iput) | ||
| 6607 | { | ||
| 6608 | struct btrfs_inode *binode; | ||
| 6609 | struct inode *inode = NULL; | ||
| 6610 | |||
| 6611 | spin_lock(&root->fs_info->delalloc_lock); | ||
| 6612 | while (!list_empty(&root->fs_info->delalloc_inodes)) { | ||
| 6613 | binode = list_entry(root->fs_info->delalloc_inodes.next, | ||
| 6614 | struct btrfs_inode, delalloc_inodes); | ||
| 6615 | inode = igrab(&binode->vfs_inode); | ||
| 6616 | if (inode) { | ||
| 6617 | list_move_tail(&binode->delalloc_inodes, | ||
| 6618 | &root->fs_info->delalloc_inodes); | ||
| 6619 | break; | ||
| 6620 | } | ||
| 6621 | |||
| 6622 | list_del_init(&binode->delalloc_inodes); | ||
| 6623 | cond_resched_lock(&root->fs_info->delalloc_lock); | ||
| 6624 | } | ||
| 6625 | spin_unlock(&root->fs_info->delalloc_lock); | ||
| 6626 | |||
| 6627 | if (inode) { | ||
| 6628 | write_inode_now(inode, 0); | ||
| 6629 | if (delay_iput) | ||
| 6630 | btrfs_add_delayed_iput(inode); | ||
| 6631 | else | ||
| 6632 | iput(inode); | ||
| 6633 | return 1; | ||
| 6634 | } | ||
| 6635 | return 0; | ||
| 6636 | } | ||
| 6637 | |||
| 5605 | static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | 6638 | static int btrfs_symlink(struct inode *dir, struct dentry *dentry, |
| 5606 | const char *symname) | 6639 | const char *symname) |
| 5607 | { | 6640 | { |
| @@ -5625,26 +6658,20 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
| 5625 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) | 6658 | if (name_len > BTRFS_MAX_INLINE_DATA_SIZE(root)) |
| 5626 | return -ENAMETOOLONG; | 6659 | return -ENAMETOOLONG; |
| 5627 | 6660 | ||
| 6661 | err = btrfs_find_free_objectid(NULL, root, dir->i_ino, &objectid); | ||
| 6662 | if (err) | ||
| 6663 | return err; | ||
| 5628 | /* | 6664 | /* |
| 5629 | * 2 items for inode item and ref | 6665 | * 2 items for inode item and ref |
| 5630 | * 2 items for dir items | 6666 | * 2 items for dir items |
| 5631 | * 1 item for xattr if selinux is on | 6667 | * 1 item for xattr if selinux is on |
| 5632 | */ | 6668 | */ |
| 5633 | err = btrfs_reserve_metadata_space(root, 5); | 6669 | trans = btrfs_start_transaction(root, 5); |
| 5634 | if (err) | 6670 | if (IS_ERR(trans)) |
| 5635 | return err; | 6671 | return PTR_ERR(trans); |
| 5636 | 6672 | ||
| 5637 | trans = btrfs_start_transaction(root, 1); | ||
| 5638 | if (!trans) | ||
| 5639 | goto out_fail; | ||
| 5640 | btrfs_set_trans_block_group(trans, dir); | 6673 | btrfs_set_trans_block_group(trans, dir); |
| 5641 | 6674 | ||
| 5642 | err = btrfs_find_free_objectid(trans, root, dir->i_ino, &objectid); | ||
| 5643 | if (err) { | ||
| 5644 | err = -ENOSPC; | ||
| 5645 | goto out_unlock; | ||
| 5646 | } | ||
| 5647 | |||
| 5648 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, | 6675 | inode = btrfs_new_inode(trans, root, dir, dentry->d_name.name, |
| 5649 | dentry->d_name.len, | 6676 | dentry->d_name.len, |
| 5650 | dentry->d_parent->d_inode->i_ino, objectid, | 6677 | dentry->d_parent->d_inode->i_ino, objectid, |
| @@ -5716,8 +6743,6 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry, | |||
| 5716 | out_unlock: | 6743 | out_unlock: |
| 5717 | nr = trans->blocks_used; | 6744 | nr = trans->blocks_used; |
| 5718 | btrfs_end_transaction_throttle(trans, root); | 6745 | btrfs_end_transaction_throttle(trans, root); |
| 5719 | out_fail: | ||
| 5720 | btrfs_unreserve_metadata_space(root, 5); | ||
| 5721 | if (drop_inode) { | 6746 | if (drop_inode) { |
| 5722 | inode_dec_link_count(inode); | 6747 | inode_dec_link_count(inode); |
| 5723 | iput(inode); | 6748 | iput(inode); |
| @@ -5726,33 +6751,28 @@ out_fail: | |||
| 5726 | return err; | 6751 | return err; |
| 5727 | } | 6752 | } |
| 5728 | 6753 | ||
| 5729 | static int prealloc_file_range(struct inode *inode, u64 start, u64 end, | 6754 | int btrfs_prealloc_file_range(struct inode *inode, int mode, |
| 5730 | u64 alloc_hint, int mode, loff_t actual_len) | 6755 | u64 start, u64 num_bytes, u64 min_size, |
| 6756 | loff_t actual_len, u64 *alloc_hint) | ||
| 5731 | { | 6757 | { |
| 5732 | struct btrfs_trans_handle *trans; | 6758 | struct btrfs_trans_handle *trans; |
| 5733 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6759 | struct btrfs_root *root = BTRFS_I(inode)->root; |
| 5734 | struct btrfs_key ins; | 6760 | struct btrfs_key ins; |
| 5735 | u64 cur_offset = start; | 6761 | u64 cur_offset = start; |
| 5736 | u64 num_bytes = end - start; | ||
| 5737 | int ret = 0; | 6762 | int ret = 0; |
| 5738 | u64 i_size; | ||
| 5739 | 6763 | ||
| 5740 | while (num_bytes > 0) { | 6764 | while (num_bytes > 0) { |
| 5741 | trans = btrfs_start_transaction(root, 1); | 6765 | trans = btrfs_start_transaction(root, 3); |
| 5742 | 6766 | if (IS_ERR(trans)) { | |
| 5743 | ret = btrfs_reserve_extent(trans, root, num_bytes, | 6767 | ret = PTR_ERR(trans); |
| 5744 | root->sectorsize, 0, alloc_hint, | 6768 | break; |
| 5745 | (u64)-1, &ins, 1); | ||
| 5746 | if (ret) { | ||
| 5747 | WARN_ON(1); | ||
| 5748 | goto stop_trans; | ||
| 5749 | } | 6769 | } |
| 5750 | 6770 | ||
| 5751 | ret = btrfs_reserve_metadata_space(root, 3); | 6771 | ret = btrfs_reserve_extent(trans, root, num_bytes, min_size, |
| 6772 | 0, *alloc_hint, (u64)-1, &ins, 1); | ||
| 5752 | if (ret) { | 6773 | if (ret) { |
| 5753 | btrfs_free_reserved_extent(root, ins.objectid, | 6774 | btrfs_end_transaction(trans, root); |
| 5754 | ins.offset); | 6775 | break; |
| 5755 | goto stop_trans; | ||
| 5756 | } | 6776 | } |
| 5757 | 6777 | ||
| 5758 | ret = insert_reserved_file_extent(trans, inode, | 6778 | ret = insert_reserved_file_extent(trans, inode, |
| @@ -5766,34 +6786,27 @@ static int prealloc_file_range(struct inode *inode, u64 start, u64 end, | |||
| 5766 | 6786 | ||
| 5767 | num_bytes -= ins.offset; | 6787 | num_bytes -= ins.offset; |
| 5768 | cur_offset += ins.offset; | 6788 | cur_offset += ins.offset; |
| 5769 | alloc_hint = ins.objectid + ins.offset; | 6789 | *alloc_hint = ins.objectid + ins.offset; |
| 5770 | 6790 | ||
| 5771 | inode->i_ctime = CURRENT_TIME; | 6791 | inode->i_ctime = CURRENT_TIME; |
| 5772 | BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; | 6792 | BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC; |
| 5773 | if (!(mode & FALLOC_FL_KEEP_SIZE) && | 6793 | if (!(mode & FALLOC_FL_KEEP_SIZE) && |
| 5774 | (actual_len > inode->i_size) && | 6794 | (actual_len > inode->i_size) && |
| 5775 | (cur_offset > inode->i_size)) { | 6795 | (cur_offset > inode->i_size)) { |
| 5776 | |||
| 5777 | if (cur_offset > actual_len) | 6796 | if (cur_offset > actual_len) |
| 5778 | i_size = actual_len; | 6797 | i_size_write(inode, actual_len); |
| 5779 | else | 6798 | else |
| 5780 | i_size = cur_offset; | 6799 | i_size_write(inode, cur_offset); |
| 5781 | i_size_write(inode, i_size); | 6800 | i_size_write(inode, cur_offset); |
| 5782 | btrfs_ordered_update_i_size(inode, i_size, NULL); | 6801 | btrfs_ordered_update_i_size(inode, cur_offset, NULL); |
| 5783 | } | 6802 | } |
| 5784 | 6803 | ||
| 5785 | ret = btrfs_update_inode(trans, root, inode); | 6804 | ret = btrfs_update_inode(trans, root, inode); |
| 5786 | BUG_ON(ret); | 6805 | BUG_ON(ret); |
| 5787 | 6806 | ||
| 5788 | btrfs_end_transaction(trans, root); | 6807 | btrfs_end_transaction(trans, root); |
| 5789 | btrfs_unreserve_metadata_space(root, 3); | ||
| 5790 | } | 6808 | } |
| 5791 | return ret; | 6809 | return ret; |
| 5792 | |||
| 5793 | stop_trans: | ||
| 5794 | btrfs_end_transaction(trans, root); | ||
| 5795 | return ret; | ||
| 5796 | |||
| 5797 | } | 6810 | } |
| 5798 | 6811 | ||
| 5799 | static long btrfs_fallocate(struct inode *inode, int mode, | 6812 | static long btrfs_fallocate(struct inode *inode, int mode, |
| @@ -5826,8 +6839,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
| 5826 | goto out; | 6839 | goto out; |
| 5827 | } | 6840 | } |
| 5828 | 6841 | ||
| 5829 | ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode, | 6842 | ret = btrfs_check_data_free_space(inode, alloc_end - alloc_start); |
| 5830 | alloc_end - alloc_start); | ||
| 5831 | if (ret) | 6843 | if (ret) |
| 5832 | goto out; | 6844 | goto out; |
| 5833 | 6845 | ||
| @@ -5872,16 +6884,16 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
| 5872 | if (em->block_start == EXTENT_MAP_HOLE || | 6884 | if (em->block_start == EXTENT_MAP_HOLE || |
| 5873 | (cur_offset >= inode->i_size && | 6885 | (cur_offset >= inode->i_size && |
| 5874 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | 6886 | !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { |
| 5875 | ret = prealloc_file_range(inode, | 6887 | ret = btrfs_prealloc_file_range(inode, 0, cur_offset, |
| 5876 | cur_offset, last_byte, | 6888 | last_byte - cur_offset, |
| 5877 | alloc_hint, mode, offset+len); | 6889 | 1 << inode->i_blkbits, |
| 6890 | offset + len, | ||
| 6891 | &alloc_hint); | ||
| 5878 | if (ret < 0) { | 6892 | if (ret < 0) { |
| 5879 | free_extent_map(em); | 6893 | free_extent_map(em); |
| 5880 | break; | 6894 | break; |
| 5881 | } | 6895 | } |
| 5882 | } | 6896 | } |
| 5883 | if (em->block_start <= EXTENT_MAP_LAST_BYTE) | ||
| 5884 | alloc_hint = em->block_start; | ||
| 5885 | free_extent_map(em); | 6897 | free_extent_map(em); |
| 5886 | 6898 | ||
| 5887 | cur_offset = last_byte; | 6899 | cur_offset = last_byte; |
| @@ -5893,8 +6905,7 @@ static long btrfs_fallocate(struct inode *inode, int mode, | |||
| 5893 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, | 6905 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end, |
| 5894 | &cached_state, GFP_NOFS); | 6906 | &cached_state, GFP_NOFS); |
| 5895 | 6907 | ||
| 5896 | btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode, | 6908 | btrfs_free_reserved_data_space(inode, alloc_end - alloc_start); |
| 5897 | alloc_end - alloc_start); | ||
| 5898 | out: | 6909 | out: |
| 5899 | mutex_unlock(&inode->i_mutex); | 6910 | mutex_unlock(&inode->i_mutex); |
| 5900 | return ret; | 6911 | return ret; |
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 97a97839a867..4cdb98cf26de 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c | |||
| @@ -239,23 +239,19 @@ static noinline int create_subvol(struct btrfs_root *root, | |||
| 239 | u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; | 239 | u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID; |
| 240 | u64 index = 0; | 240 | u64 index = 0; |
| 241 | 241 | ||
| 242 | ret = btrfs_find_free_objectid(NULL, root->fs_info->tree_root, | ||
| 243 | 0, &objectid); | ||
| 244 | if (ret) | ||
| 245 | return ret; | ||
| 242 | /* | 246 | /* |
| 243 | * 1 - inode item | 247 | * 1 - inode item |
| 244 | * 2 - refs | 248 | * 2 - refs |
| 245 | * 1 - root item | 249 | * 1 - root item |
| 246 | * 2 - dir items | 250 | * 2 - dir items |
| 247 | */ | 251 | */ |
| 248 | ret = btrfs_reserve_metadata_space(root, 6); | 252 | trans = btrfs_start_transaction(root, 6); |
| 249 | if (ret) | 253 | if (IS_ERR(trans)) |
| 250 | return ret; | 254 | return PTR_ERR(trans); |
| 251 | |||
| 252 | trans = btrfs_start_transaction(root, 1); | ||
| 253 | BUG_ON(!trans); | ||
| 254 | |||
| 255 | ret = btrfs_find_free_objectid(trans, root->fs_info->tree_root, | ||
| 256 | 0, &objectid); | ||
| 257 | if (ret) | ||
| 258 | goto fail; | ||
| 259 | 255 | ||
| 260 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, | 256 | leaf = btrfs_alloc_free_block(trans, root, root->leafsize, |
| 261 | 0, objectid, NULL, 0, 0, 0); | 257 | 0, objectid, NULL, 0, 0, 0); |
| @@ -345,13 +341,10 @@ fail: | |||
| 345 | err = btrfs_commit_transaction(trans, root); | 341 | err = btrfs_commit_transaction(trans, root); |
| 346 | if (err && !ret) | 342 | if (err && !ret) |
| 347 | ret = err; | 343 | ret = err; |
| 348 | |||
| 349 | btrfs_unreserve_metadata_space(root, 6); | ||
| 350 | return ret; | 344 | return ret; |
| 351 | } | 345 | } |
| 352 | 346 | ||
| 353 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | 347 | static int create_snapshot(struct btrfs_root *root, struct dentry *dentry) |
| 354 | char *name, int namelen) | ||
| 355 | { | 348 | { |
| 356 | struct inode *inode; | 349 | struct inode *inode; |
| 357 | struct btrfs_pending_snapshot *pending_snapshot; | 350 | struct btrfs_pending_snapshot *pending_snapshot; |
| @@ -361,40 +354,33 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
| 361 | if (!root->ref_cows) | 354 | if (!root->ref_cows) |
| 362 | return -EINVAL; | 355 | return -EINVAL; |
| 363 | 356 | ||
| 364 | /* | ||
| 365 | * 1 - inode item | ||
| 366 | * 2 - refs | ||
| 367 | * 1 - root item | ||
| 368 | * 2 - dir items | ||
| 369 | */ | ||
| 370 | ret = btrfs_reserve_metadata_space(root, 6); | ||
| 371 | if (ret) | ||
| 372 | goto fail; | ||
| 373 | |||
| 374 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); | 357 | pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS); |
| 375 | if (!pending_snapshot) { | 358 | if (!pending_snapshot) |
| 376 | ret = -ENOMEM; | 359 | return -ENOMEM; |
| 377 | btrfs_unreserve_metadata_space(root, 6); | 360 | |
| 378 | goto fail; | 361 | btrfs_init_block_rsv(&pending_snapshot->block_rsv); |
| 379 | } | ||
| 380 | pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS); | ||
| 381 | if (!pending_snapshot->name) { | ||
| 382 | ret = -ENOMEM; | ||
| 383 | kfree(pending_snapshot); | ||
| 384 | btrfs_unreserve_metadata_space(root, 6); | ||
| 385 | goto fail; | ||
| 386 | } | ||
| 387 | memcpy(pending_snapshot->name, name, namelen); | ||
| 388 | pending_snapshot->name[namelen] = '\0'; | ||
| 389 | pending_snapshot->dentry = dentry; | 362 | pending_snapshot->dentry = dentry; |
| 390 | trans = btrfs_start_transaction(root, 1); | ||
| 391 | BUG_ON(!trans); | ||
| 392 | pending_snapshot->root = root; | 363 | pending_snapshot->root = root; |
| 364 | |||
| 365 | trans = btrfs_start_transaction(root->fs_info->extent_root, 5); | ||
| 366 | if (IS_ERR(trans)) { | ||
| 367 | ret = PTR_ERR(trans); | ||
| 368 | goto fail; | ||
| 369 | } | ||
| 370 | |||
| 371 | ret = btrfs_snap_reserve_metadata(trans, pending_snapshot); | ||
| 372 | BUG_ON(ret); | ||
| 373 | |||
| 393 | list_add(&pending_snapshot->list, | 374 | list_add(&pending_snapshot->list, |
| 394 | &trans->transaction->pending_snapshots); | 375 | &trans->transaction->pending_snapshots); |
| 395 | ret = btrfs_commit_transaction(trans, root); | 376 | ret = btrfs_commit_transaction(trans, root->fs_info->extent_root); |
| 396 | BUG_ON(ret); | 377 | BUG_ON(ret); |
| 397 | btrfs_unreserve_metadata_space(root, 6); | 378 | |
| 379 | ret = pending_snapshot->error; | ||
| 380 | if (ret) | ||
| 381 | goto fail; | ||
| 382 | |||
| 383 | btrfs_orphan_cleanup(pending_snapshot->snap); | ||
| 398 | 384 | ||
| 399 | inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); | 385 | inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry); |
| 400 | if (IS_ERR(inode)) { | 386 | if (IS_ERR(inode)) { |
| @@ -405,6 +391,7 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry, | |||
| 405 | d_instantiate(dentry, inode); | 391 | d_instantiate(dentry, inode); |
| 406 | ret = 0; | 392 | ret = 0; |
| 407 | fail: | 393 | fail: |
| 394 | kfree(pending_snapshot); | ||
| 408 | return ret; | 395 | return ret; |
| 409 | } | 396 | } |
| 410 | 397 | ||
| @@ -456,8 +443,7 @@ static noinline int btrfs_mksubvol(struct path *parent, | |||
| 456 | goto out_up_read; | 443 | goto out_up_read; |
| 457 | 444 | ||
| 458 | if (snap_src) { | 445 | if (snap_src) { |
| 459 | error = create_snapshot(snap_src, dentry, | 446 | error = create_snapshot(snap_src, dentry); |
| 460 | name, namelen); | ||
| 461 | } else { | 447 | } else { |
| 462 | error = create_subvol(BTRFS_I(dir)->root, dentry, | 448 | error = create_subvol(BTRFS_I(dir)->root, dentry, |
| 463 | name, namelen); | 449 | name, namelen); |
| @@ -601,19 +587,9 @@ static int btrfs_defrag_file(struct file *file, | |||
| 601 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) | 587 | if (range->flags & BTRFS_DEFRAG_RANGE_COMPRESS) |
| 602 | BTRFS_I(inode)->force_compress = 1; | 588 | BTRFS_I(inode)->force_compress = 1; |
| 603 | 589 | ||
| 604 | ret = btrfs_check_data_free_space(root, inode, PAGE_CACHE_SIZE); | 590 | ret = btrfs_delalloc_reserve_space(inode, PAGE_CACHE_SIZE); |
| 605 | if (ret) { | 591 | if (ret) |
| 606 | ret = -ENOSPC; | 592 | goto err_unlock; |
| 607 | break; | ||
| 608 | } | ||
| 609 | |||
| 610 | ret = btrfs_reserve_metadata_for_delalloc(root, inode, 1); | ||
| 611 | if (ret) { | ||
| 612 | btrfs_free_reserved_data_space(root, inode, | ||
| 613 | PAGE_CACHE_SIZE); | ||
| 614 | ret = -ENOSPC; | ||
| 615 | break; | ||
| 616 | } | ||
| 617 | again: | 593 | again: |
| 618 | if (inode->i_size == 0 || | 594 | if (inode->i_size == 0 || |
| 619 | i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) { | 595 | i > ((inode->i_size - 1) >> PAGE_CACHE_SHIFT)) { |
| @@ -622,8 +598,10 @@ again: | |||
| 622 | } | 598 | } |
| 623 | 599 | ||
| 624 | page = grab_cache_page(inode->i_mapping, i); | 600 | page = grab_cache_page(inode->i_mapping, i); |
| 625 | if (!page) | 601 | if (!page) { |
| 602 | ret = -ENOMEM; | ||
| 626 | goto err_reservations; | 603 | goto err_reservations; |
| 604 | } | ||
| 627 | 605 | ||
| 628 | if (!PageUptodate(page)) { | 606 | if (!PageUptodate(page)) { |
| 629 | btrfs_readpage(NULL, page); | 607 | btrfs_readpage(NULL, page); |
| @@ -631,6 +609,7 @@ again: | |||
| 631 | if (!PageUptodate(page)) { | 609 | if (!PageUptodate(page)) { |
| 632 | unlock_page(page); | 610 | unlock_page(page); |
| 633 | page_cache_release(page); | 611 | page_cache_release(page); |
| 612 | ret = -EIO; | ||
| 634 | goto err_reservations; | 613 | goto err_reservations; |
| 635 | } | 614 | } |
| 636 | } | 615 | } |
| @@ -644,8 +623,7 @@ again: | |||
| 644 | wait_on_page_writeback(page); | 623 | wait_on_page_writeback(page); |
| 645 | 624 | ||
| 646 | if (PageDirty(page)) { | 625 | if (PageDirty(page)) { |
| 647 | btrfs_free_reserved_data_space(root, inode, | 626 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); |
| 648 | PAGE_CACHE_SIZE); | ||
| 649 | goto loop_unlock; | 627 | goto loop_unlock; |
| 650 | } | 628 | } |
| 651 | 629 | ||
| @@ -683,7 +661,6 @@ loop_unlock: | |||
| 683 | page_cache_release(page); | 661 | page_cache_release(page); |
| 684 | mutex_unlock(&inode->i_mutex); | 662 | mutex_unlock(&inode->i_mutex); |
| 685 | 663 | ||
| 686 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
| 687 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); | 664 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, 1); |
| 688 | i++; | 665 | i++; |
| 689 | } | 666 | } |
| @@ -713,9 +690,9 @@ loop_unlock: | |||
| 713 | return 0; | 690 | return 0; |
| 714 | 691 | ||
| 715 | err_reservations: | 692 | err_reservations: |
| 693 | btrfs_delalloc_release_space(inode, PAGE_CACHE_SIZE); | ||
| 694 | err_unlock: | ||
| 716 | mutex_unlock(&inode->i_mutex); | 695 | mutex_unlock(&inode->i_mutex); |
| 717 | btrfs_free_reserved_data_space(root, inode, PAGE_CACHE_SIZE); | ||
| 718 | btrfs_unreserve_metadata_for_delalloc(root, inode, 1); | ||
| 719 | return ret; | 696 | return ret; |
| 720 | } | 697 | } |
| 721 | 698 | ||
| @@ -811,7 +788,7 @@ static noinline int btrfs_ioctl_resize(struct btrfs_root *root, | |||
| 811 | device->name, (unsigned long long)new_size); | 788 | device->name, (unsigned long long)new_size); |
| 812 | 789 | ||
| 813 | if (new_size > old_size) { | 790 | if (new_size > old_size) { |
| 814 | trans = btrfs_start_transaction(root, 1); | 791 | trans = btrfs_start_transaction(root, 0); |
| 815 | ret = btrfs_grow_device(trans, device, new_size); | 792 | ret = btrfs_grow_device(trans, device, new_size); |
| 816 | btrfs_commit_transaction(trans, root); | 793 | btrfs_commit_transaction(trans, root); |
| 817 | } else { | 794 | } else { |
| @@ -1300,7 +1277,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
| 1300 | if (err) | 1277 | if (err) |
| 1301 | goto out_up_write; | 1278 | goto out_up_write; |
| 1302 | 1279 | ||
| 1303 | trans = btrfs_start_transaction(root, 1); | 1280 | trans = btrfs_start_transaction(root, 0); |
| 1281 | if (IS_ERR(trans)) { | ||
| 1282 | err = PTR_ERR(trans); | ||
| 1283 | goto out; | ||
| 1284 | } | ||
| 1285 | trans->block_rsv = &root->fs_info->global_block_rsv; | ||
| 1286 | |||
| 1304 | ret = btrfs_unlink_subvol(trans, root, dir, | 1287 | ret = btrfs_unlink_subvol(trans, root, dir, |
| 1305 | dest->root_key.objectid, | 1288 | dest->root_key.objectid, |
| 1306 | dentry->d_name.name, | 1289 | dentry->d_name.name, |
| @@ -1314,10 +1297,12 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file, | |||
| 1314 | dest->root_item.drop_level = 0; | 1297 | dest->root_item.drop_level = 0; |
| 1315 | btrfs_set_root_refs(&dest->root_item, 0); | 1298 | btrfs_set_root_refs(&dest->root_item, 0); |
| 1316 | 1299 | ||
| 1317 | ret = btrfs_insert_orphan_item(trans, | 1300 | if (!xchg(&dest->orphan_item_inserted, 1)) { |
| 1318 | root->fs_info->tree_root, | 1301 | ret = btrfs_insert_orphan_item(trans, |
| 1319 | dest->root_key.objectid); | 1302 | root->fs_info->tree_root, |
| 1320 | BUG_ON(ret); | 1303 | dest->root_key.objectid); |
| 1304 | BUG_ON(ret); | ||
| 1305 | } | ||
| 1321 | 1306 | ||
| 1322 | ret = btrfs_commit_transaction(trans, root); | 1307 | ret = btrfs_commit_transaction(trans, root); |
| 1323 | BUG_ON(ret); | 1308 | BUG_ON(ret); |
| @@ -1358,8 +1343,10 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) | |||
| 1358 | ret = -EPERM; | 1343 | ret = -EPERM; |
| 1359 | goto out; | 1344 | goto out; |
| 1360 | } | 1345 | } |
| 1361 | btrfs_defrag_root(root, 0); | 1346 | ret = btrfs_defrag_root(root, 0); |
| 1362 | btrfs_defrag_root(root->fs_info->extent_root, 0); | 1347 | if (ret) |
| 1348 | goto out; | ||
| 1349 | ret = btrfs_defrag_root(root->fs_info->extent_root, 0); | ||
| 1363 | break; | 1350 | break; |
| 1364 | case S_IFREG: | 1351 | case S_IFREG: |
| 1365 | if (!(file->f_mode & FMODE_WRITE)) { | 1352 | if (!(file->f_mode & FMODE_WRITE)) { |
| @@ -1389,9 +1376,11 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp) | |||
| 1389 | /* the rest are all set to zero by kzalloc */ | 1376 | /* the rest are all set to zero by kzalloc */ |
| 1390 | range->len = (u64)-1; | 1377 | range->len = (u64)-1; |
| 1391 | } | 1378 | } |
| 1392 | btrfs_defrag_file(file, range); | 1379 | ret = btrfs_defrag_file(file, range); |
| 1393 | kfree(range); | 1380 | kfree(range); |
| 1394 | break; | 1381 | break; |
| 1382 | default: | ||
| 1383 | ret = -EINVAL; | ||
| 1395 | } | 1384 | } |
| 1396 | out: | 1385 | out: |
| 1397 | mnt_drop_write(file->f_path.mnt); | 1386 | mnt_drop_write(file->f_path.mnt); |
| @@ -1550,12 +1539,6 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 1550 | btrfs_wait_ordered_range(src, off, off+len); | 1539 | btrfs_wait_ordered_range(src, off, off+len); |
| 1551 | } | 1540 | } |
| 1552 | 1541 | ||
| 1553 | trans = btrfs_start_transaction(root, 1); | ||
| 1554 | BUG_ON(!trans); | ||
| 1555 | |||
| 1556 | /* punch hole in destination first */ | ||
| 1557 | btrfs_drop_extents(trans, inode, off, off + len, &hint_byte, 1); | ||
| 1558 | |||
| 1559 | /* clone data */ | 1542 | /* clone data */ |
| 1560 | key.objectid = src->i_ino; | 1543 | key.objectid = src->i_ino; |
| 1561 | key.type = BTRFS_EXTENT_DATA_KEY; | 1544 | key.type = BTRFS_EXTENT_DATA_KEY; |
| @@ -1566,7 +1549,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 1566 | * note the key will change type as we walk through the | 1549 | * note the key will change type as we walk through the |
| 1567 | * tree. | 1550 | * tree. |
| 1568 | */ | 1551 | */ |
| 1569 | ret = btrfs_search_slot(trans, root, &key, path, 0, 0); | 1552 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); |
| 1570 | if (ret < 0) | 1553 | if (ret < 0) |
| 1571 | goto out; | 1554 | goto out; |
| 1572 | 1555 | ||
| @@ -1629,12 +1612,31 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 1629 | new_key.objectid = inode->i_ino; | 1612 | new_key.objectid = inode->i_ino; |
| 1630 | new_key.offset = key.offset + destoff - off; | 1613 | new_key.offset = key.offset + destoff - off; |
| 1631 | 1614 | ||
| 1615 | trans = btrfs_start_transaction(root, 1); | ||
| 1616 | if (IS_ERR(trans)) { | ||
| 1617 | ret = PTR_ERR(trans); | ||
| 1618 | goto out; | ||
| 1619 | } | ||
| 1620 | |||
| 1632 | if (type == BTRFS_FILE_EXTENT_REG || | 1621 | if (type == BTRFS_FILE_EXTENT_REG || |
| 1633 | type == BTRFS_FILE_EXTENT_PREALLOC) { | 1622 | type == BTRFS_FILE_EXTENT_PREALLOC) { |
| 1623 | if (off > key.offset) { | ||
| 1624 | datao += off - key.offset; | ||
| 1625 | datal -= off - key.offset; | ||
| 1626 | } | ||
| 1627 | |||
| 1628 | if (key.offset + datal > off + len) | ||
| 1629 | datal = off + len - key.offset; | ||
| 1630 | |||
| 1631 | ret = btrfs_drop_extents(trans, inode, | ||
| 1632 | new_key.offset, | ||
| 1633 | new_key.offset + datal, | ||
| 1634 | &hint_byte, 1); | ||
| 1635 | BUG_ON(ret); | ||
| 1636 | |||
| 1634 | ret = btrfs_insert_empty_item(trans, root, path, | 1637 | ret = btrfs_insert_empty_item(trans, root, path, |
| 1635 | &new_key, size); | 1638 | &new_key, size); |
| 1636 | if (ret) | 1639 | BUG_ON(ret); |
| 1637 | goto out; | ||
| 1638 | 1640 | ||
| 1639 | leaf = path->nodes[0]; | 1641 | leaf = path->nodes[0]; |
| 1640 | slot = path->slots[0]; | 1642 | slot = path->slots[0]; |
| @@ -1645,14 +1647,6 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 1645 | extent = btrfs_item_ptr(leaf, slot, | 1647 | extent = btrfs_item_ptr(leaf, slot, |
| 1646 | struct btrfs_file_extent_item); | 1648 | struct btrfs_file_extent_item); |
| 1647 | 1649 | ||
| 1648 | if (off > key.offset) { | ||
| 1649 | datao += off - key.offset; | ||
| 1650 | datal -= off - key.offset; | ||
| 1651 | } | ||
| 1652 | |||
| 1653 | if (key.offset + datal > off + len) | ||
| 1654 | datal = off + len - key.offset; | ||
| 1655 | |||
| 1656 | /* disko == 0 means it's a hole */ | 1650 | /* disko == 0 means it's a hole */ |
| 1657 | if (!disko) | 1651 | if (!disko) |
| 1658 | datao = 0; | 1652 | datao = 0; |
| @@ -1683,14 +1677,21 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 1683 | 1677 | ||
| 1684 | if (comp && (skip || trim)) { | 1678 | if (comp && (skip || trim)) { |
| 1685 | ret = -EINVAL; | 1679 | ret = -EINVAL; |
| 1680 | btrfs_end_transaction(trans, root); | ||
| 1686 | goto out; | 1681 | goto out; |
| 1687 | } | 1682 | } |
| 1688 | size -= skip + trim; | 1683 | size -= skip + trim; |
| 1689 | datal -= skip + trim; | 1684 | datal -= skip + trim; |
| 1685 | |||
| 1686 | ret = btrfs_drop_extents(trans, inode, | ||
| 1687 | new_key.offset, | ||
| 1688 | new_key.offset + datal, | ||
| 1689 | &hint_byte, 1); | ||
| 1690 | BUG_ON(ret); | ||
| 1691 | |||
| 1690 | ret = btrfs_insert_empty_item(trans, root, path, | 1692 | ret = btrfs_insert_empty_item(trans, root, path, |
| 1691 | &new_key, size); | 1693 | &new_key, size); |
| 1692 | if (ret) | 1694 | BUG_ON(ret); |
| 1693 | goto out; | ||
| 1694 | 1695 | ||
| 1695 | if (skip) { | 1696 | if (skip) { |
| 1696 | u32 start = | 1697 | u32 start = |
| @@ -1708,8 +1709,17 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd, | |||
| 1708 | } | 1709 | } |
| 1709 | 1710 | ||
| 1710 | btrfs_mark_buffer_dirty(leaf); | 1711 | btrfs_mark_buffer_dirty(leaf); |
| 1711 | } | 1712 | btrfs_release_path(root, path); |
| 1712 | 1713 | ||
| 1714 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
| 1715 | if (new_key.offset + datal > inode->i_size) | ||
| 1716 | btrfs_i_size_write(inode, | ||
| 1717 | new_key.offset + datal); | ||
| 1718 | BTRFS_I(inode)->flags = BTRFS_I(src)->flags; | ||
| 1719 | ret = btrfs_update_inode(trans, root, inode); | ||
| 1720 | BUG_ON(ret); | ||
| 1721 | btrfs_end_transaction(trans, root); | ||
| 1722 | } | ||
| 1713 | next: | 1723 | next: |
| 1714 | btrfs_release_path(root, path); | 1724 | btrfs_release_path(root, path); |
| 1715 | key.offset++; | 1725 | key.offset++; |
| @@ -1717,17 +1727,7 @@ next: | |||
| 1717 | ret = 0; | 1727 | ret = 0; |
| 1718 | out: | 1728 | out: |
| 1719 | btrfs_release_path(root, path); | 1729 | btrfs_release_path(root, path); |
| 1720 | if (ret == 0) { | ||
| 1721 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
| 1722 | if (destoff + olen > inode->i_size) | ||
| 1723 | btrfs_i_size_write(inode, destoff + olen); | ||
| 1724 | BTRFS_I(inode)->flags = BTRFS_I(src)->flags; | ||
| 1725 | ret = btrfs_update_inode(trans, root, inode); | ||
| 1726 | } | ||
| 1727 | btrfs_end_transaction(trans, root); | ||
| 1728 | unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); | 1730 | unlock_extent(&BTRFS_I(src)->io_tree, off, off+len, GFP_NOFS); |
| 1729 | if (ret) | ||
| 1730 | vmtruncate(inode, 0); | ||
| 1731 | out_unlock: | 1731 | out_unlock: |
| 1732 | mutex_unlock(&src->i_mutex); | 1732 | mutex_unlock(&src->i_mutex); |
| 1733 | mutex_unlock(&inode->i_mutex); | 1733 | mutex_unlock(&inode->i_mutex); |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index a127c0ebb2dc..e56c72bc5add 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
| @@ -124,6 +124,15 @@ static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset) | |||
| 124 | return 1; | 124 | return 1; |
| 125 | } | 125 | } |
| 126 | 126 | ||
| 127 | static int range_overlaps(struct btrfs_ordered_extent *entry, u64 file_offset, | ||
| 128 | u64 len) | ||
| 129 | { | ||
| 130 | if (file_offset + len <= entry->file_offset || | ||
| 131 | entry->file_offset + entry->len <= file_offset) | ||
| 132 | return 0; | ||
| 133 | return 1; | ||
| 134 | } | ||
| 135 | |||
| 127 | /* | 136 | /* |
| 128 | * look find the first ordered struct that has this offset, otherwise | 137 | * look find the first ordered struct that has this offset, otherwise |
| 129 | * the first one less than this offset | 138 | * the first one less than this offset |
| @@ -161,8 +170,9 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | |||
| 161 | * The tree is given a single reference on the ordered extent that was | 170 | * The tree is given a single reference on the ordered extent that was |
| 162 | * inserted. | 171 | * inserted. |
| 163 | */ | 172 | */ |
| 164 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 173 | static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
| 165 | u64 start, u64 len, u64 disk_len, int type) | 174 | u64 start, u64 len, u64 disk_len, |
| 175 | int type, int dio) | ||
| 166 | { | 176 | { |
| 167 | struct btrfs_ordered_inode_tree *tree; | 177 | struct btrfs_ordered_inode_tree *tree; |
| 168 | struct rb_node *node; | 178 | struct rb_node *node; |
| @@ -182,6 +192,9 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
| 182 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) | 192 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) |
| 183 | set_bit(type, &entry->flags); | 193 | set_bit(type, &entry->flags); |
| 184 | 194 | ||
| 195 | if (dio) | ||
| 196 | set_bit(BTRFS_ORDERED_DIRECT, &entry->flags); | ||
| 197 | |||
| 185 | /* one ref for the tree */ | 198 | /* one ref for the tree */ |
| 186 | atomic_set(&entry->refs, 1); | 199 | atomic_set(&entry->refs, 1); |
| 187 | init_waitqueue_head(&entry->wait); | 200 | init_waitqueue_head(&entry->wait); |
| @@ -203,6 +216,20 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
| 203 | return 0; | 216 | return 0; |
| 204 | } | 217 | } |
| 205 | 218 | ||
| 219 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | ||
| 220 | u64 start, u64 len, u64 disk_len, int type) | ||
| 221 | { | ||
| 222 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | ||
| 223 | disk_len, type, 0); | ||
| 224 | } | ||
| 225 | |||
| 226 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | ||
| 227 | u64 start, u64 len, u64 disk_len, int type) | ||
| 228 | { | ||
| 229 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | ||
| 230 | disk_len, type, 1); | ||
| 231 | } | ||
| 232 | |||
| 206 | /* | 233 | /* |
| 207 | * Add a struct btrfs_ordered_sum into the list of checksums to be inserted | 234 | * Add a struct btrfs_ordered_sum into the list of checksums to be inserted |
| 208 | * when an ordered extent is finished. If the list covers more than one | 235 | * when an ordered extent is finished. If the list covers more than one |
| @@ -311,13 +338,6 @@ static int __btrfs_remove_ordered_extent(struct inode *inode, | |||
| 311 | tree->last = NULL; | 338 | tree->last = NULL; |
| 312 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); | 339 | set_bit(BTRFS_ORDERED_COMPLETE, &entry->flags); |
| 313 | 340 | ||
| 314 | spin_lock(&BTRFS_I(inode)->accounting_lock); | ||
| 315 | WARN_ON(!BTRFS_I(inode)->outstanding_extents); | ||
| 316 | BTRFS_I(inode)->outstanding_extents--; | ||
| 317 | spin_unlock(&BTRFS_I(inode)->accounting_lock); | ||
| 318 | btrfs_unreserve_metadata_for_delalloc(BTRFS_I(inode)->root, | ||
| 319 | inode, 1); | ||
| 320 | |||
| 321 | spin_lock(&root->fs_info->ordered_extent_lock); | 341 | spin_lock(&root->fs_info->ordered_extent_lock); |
| 322 | list_del_init(&entry->root_extent_list); | 342 | list_del_init(&entry->root_extent_list); |
| 323 | 343 | ||
| @@ -491,7 +511,8 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
| 491 | * start IO on any dirty ones so the wait doesn't stall waiting | 511 | * start IO on any dirty ones so the wait doesn't stall waiting |
| 492 | * for pdflush to find them | 512 | * for pdflush to find them |
| 493 | */ | 513 | */ |
| 494 | filemap_fdatawrite_range(inode->i_mapping, start, end); | 514 | if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags)) |
| 515 | filemap_fdatawrite_range(inode->i_mapping, start, end); | ||
| 495 | if (wait) { | 516 | if (wait) { |
| 496 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, | 517 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, |
| 497 | &entry->flags)); | 518 | &entry->flags)); |
| @@ -588,6 +609,47 @@ out: | |||
| 588 | return entry; | 609 | return entry; |
| 589 | } | 610 | } |
| 590 | 611 | ||
| 612 | /* Since the DIO code tries to lock a wide area we need to look for any ordered | ||
| 613 | * extents that exist in the range, rather than just the start of the range. | ||
| 614 | */ | ||
| 615 | struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, | ||
| 616 | u64 file_offset, | ||
| 617 | u64 len) | ||
| 618 | { | ||
| 619 | struct btrfs_ordered_inode_tree *tree; | ||
| 620 | struct rb_node *node; | ||
| 621 | struct btrfs_ordered_extent *entry = NULL; | ||
| 622 | |||
| 623 | tree = &BTRFS_I(inode)->ordered_tree; | ||
| 624 | spin_lock(&tree->lock); | ||
| 625 | node = tree_search(tree, file_offset); | ||
| 626 | if (!node) { | ||
| 627 | node = tree_search(tree, file_offset + len); | ||
| 628 | if (!node) | ||
| 629 | goto out; | ||
| 630 | } | ||
| 631 | |||
| 632 | while (1) { | ||
| 633 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); | ||
| 634 | if (range_overlaps(entry, file_offset, len)) | ||
| 635 | break; | ||
| 636 | |||
| 637 | if (entry->file_offset >= file_offset + len) { | ||
| 638 | entry = NULL; | ||
| 639 | break; | ||
| 640 | } | ||
| 641 | entry = NULL; | ||
| 642 | node = rb_next(node); | ||
| 643 | if (!node) | ||
| 644 | break; | ||
| 645 | } | ||
| 646 | out: | ||
| 647 | if (entry) | ||
| 648 | atomic_inc(&entry->refs); | ||
| 649 | spin_unlock(&tree->lock); | ||
| 650 | return entry; | ||
| 651 | } | ||
| 652 | |||
| 591 | /* | 653 | /* |
| 592 | * lookup and return any extent before 'file_offset'. NULL is returned | 654 | * lookup and return any extent before 'file_offset'. NULL is returned |
| 593 | * if none is found | 655 | * if none is found |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index c82f76a9f040..8ac365492a3f 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
| @@ -72,6 +72,8 @@ struct btrfs_ordered_sum { | |||
| 72 | 72 | ||
| 73 | #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ | 73 | #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ |
| 74 | 74 | ||
| 75 | #define BTRFS_ORDERED_DIRECT 5 /* set when we're doing DIO with this extent */ | ||
| 76 | |||
| 75 | struct btrfs_ordered_extent { | 77 | struct btrfs_ordered_extent { |
| 76 | /* logical offset in the file */ | 78 | /* logical offset in the file */ |
| 77 | u64 file_offset; | 79 | u64 file_offset; |
| @@ -140,7 +142,9 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, | |||
| 140 | struct btrfs_ordered_extent **cached, | 142 | struct btrfs_ordered_extent **cached, |
| 141 | u64 file_offset, u64 io_size); | 143 | u64 file_offset, u64 io_size); |
| 142 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 144 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
| 143 | u64 start, u64 len, u64 disk_len, int tyep); | 145 | u64 start, u64 len, u64 disk_len, int type); |
| 146 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | ||
| 147 | u64 start, u64 len, u64 disk_len, int type); | ||
| 144 | int btrfs_add_ordered_sum(struct inode *inode, | 148 | int btrfs_add_ordered_sum(struct inode *inode, |
| 145 | struct btrfs_ordered_extent *entry, | 149 | struct btrfs_ordered_extent *entry, |
| 146 | struct btrfs_ordered_sum *sum); | 150 | struct btrfs_ordered_sum *sum); |
| @@ -151,6 +155,9 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
| 151 | int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); | 155 | int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); |
| 152 | struct btrfs_ordered_extent * | 156 | struct btrfs_ordered_extent * |
| 153 | btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); | 157 | btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); |
| 158 | struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, | ||
| 159 | u64 file_offset, | ||
| 160 | u64 len); | ||
| 154 | int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | 161 | int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, |
| 155 | struct btrfs_ordered_extent *ordered); | 162 | struct btrfs_ordered_extent *ordered); |
| 156 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); | 163 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); |
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index e558dd941ded..05d41e569236 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c | |||
| @@ -44,8 +44,12 @@ struct tree_entry { | |||
| 44 | struct backref_node { | 44 | struct backref_node { |
| 45 | struct rb_node rb_node; | 45 | struct rb_node rb_node; |
| 46 | u64 bytenr; | 46 | u64 bytenr; |
| 47 | /* objectid tree block owner */ | 47 | |
| 48 | u64 new_bytenr; | ||
| 49 | /* objectid of tree block owner, can be not uptodate */ | ||
| 48 | u64 owner; | 50 | u64 owner; |
| 51 | /* link to pending, changed or detached list */ | ||
| 52 | struct list_head list; | ||
| 49 | /* list of upper level blocks reference this block */ | 53 | /* list of upper level blocks reference this block */ |
| 50 | struct list_head upper; | 54 | struct list_head upper; |
| 51 | /* list of child blocks in the cache */ | 55 | /* list of child blocks in the cache */ |
| @@ -56,9 +60,9 @@ struct backref_node { | |||
| 56 | struct extent_buffer *eb; | 60 | struct extent_buffer *eb; |
| 57 | /* level of tree block */ | 61 | /* level of tree block */ |
| 58 | unsigned int level:8; | 62 | unsigned int level:8; |
| 59 | /* 1 if the block is root of old snapshot */ | 63 | /* is the block in non-reference counted tree */ |
| 60 | unsigned int old_root:1; | 64 | unsigned int cowonly:1; |
| 61 | /* 1 if no child blocks in the cache */ | 65 | /* 1 if no child node in the cache */ |
| 62 | unsigned int lowest:1; | 66 | unsigned int lowest:1; |
| 63 | /* is the extent buffer locked */ | 67 | /* is the extent buffer locked */ |
| 64 | unsigned int locked:1; | 68 | unsigned int locked:1; |
| @@ -66,6 +70,16 @@ struct backref_node { | |||
| 66 | unsigned int processed:1; | 70 | unsigned int processed:1; |
| 67 | /* have backrefs of this block been checked */ | 71 | /* have backrefs of this block been checked */ |
| 68 | unsigned int checked:1; | 72 | unsigned int checked:1; |
| 73 | /* | ||
| 74 | * 1 if corresponding block has been cowed but some upper | ||
| 75 | * level block pointers may not point to the new location | ||
| 76 | */ | ||
| 77 | unsigned int pending:1; | ||
| 78 | /* | ||
| 79 | * 1 if the backref node isn't connected to any other | ||
| 80 | * backref node. | ||
| 81 | */ | ||
| 82 | unsigned int detached:1; | ||
| 69 | }; | 83 | }; |
| 70 | 84 | ||
| 71 | /* | 85 | /* |
| @@ -74,7 +88,6 @@ struct backref_node { | |||
| 74 | struct backref_edge { | 88 | struct backref_edge { |
| 75 | struct list_head list[2]; | 89 | struct list_head list[2]; |
| 76 | struct backref_node *node[2]; | 90 | struct backref_node *node[2]; |
| 77 | u64 blockptr; | ||
| 78 | }; | 91 | }; |
| 79 | 92 | ||
| 80 | #define LOWER 0 | 93 | #define LOWER 0 |
| @@ -83,9 +96,25 @@ struct backref_edge { | |||
| 83 | struct backref_cache { | 96 | struct backref_cache { |
| 84 | /* red black tree of all backref nodes in the cache */ | 97 | /* red black tree of all backref nodes in the cache */ |
| 85 | struct rb_root rb_root; | 98 | struct rb_root rb_root; |
| 86 | /* list of backref nodes with no child block in the cache */ | 99 | /* for passing backref nodes to btrfs_reloc_cow_block */ |
| 100 | struct backref_node *path[BTRFS_MAX_LEVEL]; | ||
| 101 | /* | ||
| 102 | * list of blocks that have been cowed but some block | ||
| 103 | * pointers in upper level blocks may not reflect the | ||
| 104 | * new location | ||
| 105 | */ | ||
| 87 | struct list_head pending[BTRFS_MAX_LEVEL]; | 106 | struct list_head pending[BTRFS_MAX_LEVEL]; |
| 88 | spinlock_t lock; | 107 | /* list of backref nodes with no child node */ |
| 108 | struct list_head leaves; | ||
| 109 | /* list of blocks that have been cowed in current transaction */ | ||
| 110 | struct list_head changed; | ||
| 111 | /* list of detached backref node. */ | ||
| 112 | struct list_head detached; | ||
| 113 | |||
| 114 | u64 last_trans; | ||
| 115 | |||
| 116 | int nr_nodes; | ||
| 117 | int nr_edges; | ||
| 89 | }; | 118 | }; |
| 90 | 119 | ||
| 91 | /* | 120 | /* |
| @@ -113,15 +142,6 @@ struct tree_block { | |||
| 113 | unsigned int key_ready:1; | 142 | unsigned int key_ready:1; |
| 114 | }; | 143 | }; |
| 115 | 144 | ||
| 116 | /* inode vector */ | ||
| 117 | #define INODEVEC_SIZE 16 | ||
| 118 | |||
| 119 | struct inodevec { | ||
| 120 | struct list_head list; | ||
| 121 | struct inode *inode[INODEVEC_SIZE]; | ||
| 122 | int nr; | ||
| 123 | }; | ||
| 124 | |||
| 125 | #define MAX_EXTENTS 128 | 145 | #define MAX_EXTENTS 128 |
| 126 | 146 | ||
| 127 | struct file_extent_cluster { | 147 | struct file_extent_cluster { |
| @@ -138,36 +158,43 @@ struct reloc_control { | |||
| 138 | struct btrfs_root *extent_root; | 158 | struct btrfs_root *extent_root; |
| 139 | /* inode for moving data */ | 159 | /* inode for moving data */ |
| 140 | struct inode *data_inode; | 160 | struct inode *data_inode; |
| 141 | struct btrfs_workers workers; | 161 | |
| 162 | struct btrfs_block_rsv *block_rsv; | ||
| 163 | |||
| 164 | struct backref_cache backref_cache; | ||
| 165 | |||
| 166 | struct file_extent_cluster cluster; | ||
| 142 | /* tree blocks have been processed */ | 167 | /* tree blocks have been processed */ |
| 143 | struct extent_io_tree processed_blocks; | 168 | struct extent_io_tree processed_blocks; |
| 144 | /* map start of tree root to corresponding reloc tree */ | 169 | /* map start of tree root to corresponding reloc tree */ |
| 145 | struct mapping_tree reloc_root_tree; | 170 | struct mapping_tree reloc_root_tree; |
| 146 | /* list of reloc trees */ | 171 | /* list of reloc trees */ |
| 147 | struct list_head reloc_roots; | 172 | struct list_head reloc_roots; |
| 173 | /* size of metadata reservation for merging reloc trees */ | ||
| 174 | u64 merging_rsv_size; | ||
| 175 | /* size of relocated tree nodes */ | ||
| 176 | u64 nodes_relocated; | ||
| 177 | |||
| 148 | u64 search_start; | 178 | u64 search_start; |
| 149 | u64 extents_found; | 179 | u64 extents_found; |
| 150 | u64 extents_skipped; | 180 | |
| 151 | int stage; | 181 | int block_rsv_retries; |
| 152 | int create_reloc_root; | 182 | |
| 183 | unsigned int stage:8; | ||
| 184 | unsigned int create_reloc_tree:1; | ||
| 185 | unsigned int merge_reloc_tree:1; | ||
| 153 | unsigned int found_file_extent:1; | 186 | unsigned int found_file_extent:1; |
| 154 | unsigned int found_old_snapshot:1; | 187 | unsigned int commit_transaction:1; |
| 155 | }; | 188 | }; |
| 156 | 189 | ||
| 157 | /* stages of data relocation */ | 190 | /* stages of data relocation */ |
| 158 | #define MOVE_DATA_EXTENTS 0 | 191 | #define MOVE_DATA_EXTENTS 0 |
| 159 | #define UPDATE_DATA_PTRS 1 | 192 | #define UPDATE_DATA_PTRS 1 |
| 160 | 193 | ||
| 161 | /* | 194 | static void remove_backref_node(struct backref_cache *cache, |
| 162 | * merge reloc tree to corresponding fs tree in worker threads | 195 | struct backref_node *node); |
| 163 | */ | 196 | static void __mark_block_processed(struct reloc_control *rc, |
| 164 | struct async_merge { | 197 | struct backref_node *node); |
| 165 | struct btrfs_work work; | ||
| 166 | struct reloc_control *rc; | ||
| 167 | struct btrfs_root *root; | ||
| 168 | struct completion *done; | ||
| 169 | atomic_t *num_pending; | ||
| 170 | }; | ||
| 171 | 198 | ||
| 172 | static void mapping_tree_init(struct mapping_tree *tree) | 199 | static void mapping_tree_init(struct mapping_tree *tree) |
| 173 | { | 200 | { |
| @@ -181,15 +208,80 @@ static void backref_cache_init(struct backref_cache *cache) | |||
| 181 | cache->rb_root = RB_ROOT; | 208 | cache->rb_root = RB_ROOT; |
| 182 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) | 209 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) |
| 183 | INIT_LIST_HEAD(&cache->pending[i]); | 210 | INIT_LIST_HEAD(&cache->pending[i]); |
| 184 | spin_lock_init(&cache->lock); | 211 | INIT_LIST_HEAD(&cache->changed); |
| 212 | INIT_LIST_HEAD(&cache->detached); | ||
| 213 | INIT_LIST_HEAD(&cache->leaves); | ||
| 214 | } | ||
| 215 | |||
| 216 | static void backref_cache_cleanup(struct backref_cache *cache) | ||
| 217 | { | ||
| 218 | struct backref_node *node; | ||
| 219 | int i; | ||
| 220 | |||
| 221 | while (!list_empty(&cache->detached)) { | ||
| 222 | node = list_entry(cache->detached.next, | ||
| 223 | struct backref_node, list); | ||
| 224 | remove_backref_node(cache, node); | ||
| 225 | } | ||
| 226 | |||
| 227 | while (!list_empty(&cache->leaves)) { | ||
| 228 | node = list_entry(cache->leaves.next, | ||
| 229 | struct backref_node, lower); | ||
| 230 | remove_backref_node(cache, node); | ||
| 231 | } | ||
| 232 | |||
| 233 | cache->last_trans = 0; | ||
| 234 | |||
| 235 | for (i = 0; i < BTRFS_MAX_LEVEL; i++) | ||
| 236 | BUG_ON(!list_empty(&cache->pending[i])); | ||
| 237 | BUG_ON(!list_empty(&cache->changed)); | ||
| 238 | BUG_ON(!list_empty(&cache->detached)); | ||
| 239 | BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root)); | ||
| 240 | BUG_ON(cache->nr_nodes); | ||
| 241 | BUG_ON(cache->nr_edges); | ||
| 242 | } | ||
| 243 | |||
| 244 | static struct backref_node *alloc_backref_node(struct backref_cache *cache) | ||
| 245 | { | ||
| 246 | struct backref_node *node; | ||
| 247 | |||
| 248 | node = kzalloc(sizeof(*node), GFP_NOFS); | ||
| 249 | if (node) { | ||
| 250 | INIT_LIST_HEAD(&node->list); | ||
| 251 | INIT_LIST_HEAD(&node->upper); | ||
| 252 | INIT_LIST_HEAD(&node->lower); | ||
| 253 | RB_CLEAR_NODE(&node->rb_node); | ||
| 254 | cache->nr_nodes++; | ||
| 255 | } | ||
| 256 | return node; | ||
| 257 | } | ||
| 258 | |||
| 259 | static void free_backref_node(struct backref_cache *cache, | ||
| 260 | struct backref_node *node) | ||
| 261 | { | ||
| 262 | if (node) { | ||
| 263 | cache->nr_nodes--; | ||
| 264 | kfree(node); | ||
| 265 | } | ||
| 266 | } | ||
| 267 | |||
| 268 | static struct backref_edge *alloc_backref_edge(struct backref_cache *cache) | ||
| 269 | { | ||
| 270 | struct backref_edge *edge; | ||
| 271 | |||
| 272 | edge = kzalloc(sizeof(*edge), GFP_NOFS); | ||
| 273 | if (edge) | ||
| 274 | cache->nr_edges++; | ||
| 275 | return edge; | ||
| 185 | } | 276 | } |
| 186 | 277 | ||
| 187 | static void backref_node_init(struct backref_node *node) | 278 | static void free_backref_edge(struct backref_cache *cache, |
| 279 | struct backref_edge *edge) | ||
| 188 | { | 280 | { |
| 189 | memset(node, 0, sizeof(*node)); | 281 | if (edge) { |
| 190 | INIT_LIST_HEAD(&node->upper); | 282 | cache->nr_edges--; |
| 191 | INIT_LIST_HEAD(&node->lower); | 283 | kfree(edge); |
| 192 | RB_CLEAR_NODE(&node->rb_node); | 284 | } |
| 193 | } | 285 | } |
| 194 | 286 | ||
| 195 | static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, | 287 | static struct rb_node *tree_insert(struct rb_root *root, u64 bytenr, |
| @@ -250,6 +342,7 @@ static struct backref_node *walk_up_backref(struct backref_node *node, | |||
| 250 | edges[idx++] = edge; | 342 | edges[idx++] = edge; |
| 251 | node = edge->node[UPPER]; | 343 | node = edge->node[UPPER]; |
| 252 | } | 344 | } |
| 345 | BUG_ON(node->detached); | ||
| 253 | *index = idx; | 346 | *index = idx; |
| 254 | return node; | 347 | return node; |
| 255 | } | 348 | } |
| @@ -281,13 +374,18 @@ static struct backref_node *walk_down_backref(struct backref_edge *edges[], | |||
| 281 | return NULL; | 374 | return NULL; |
| 282 | } | 375 | } |
| 283 | 376 | ||
| 377 | static void unlock_node_buffer(struct backref_node *node) | ||
| 378 | { | ||
| 379 | if (node->locked) { | ||
| 380 | btrfs_tree_unlock(node->eb); | ||
| 381 | node->locked = 0; | ||
| 382 | } | ||
| 383 | } | ||
| 384 | |||
| 284 | static void drop_node_buffer(struct backref_node *node) | 385 | static void drop_node_buffer(struct backref_node *node) |
| 285 | { | 386 | { |
| 286 | if (node->eb) { | 387 | if (node->eb) { |
| 287 | if (node->locked) { | 388 | unlock_node_buffer(node); |
| 288 | btrfs_tree_unlock(node->eb); | ||
| 289 | node->locked = 0; | ||
| 290 | } | ||
| 291 | free_extent_buffer(node->eb); | 389 | free_extent_buffer(node->eb); |
| 292 | node->eb = NULL; | 390 | node->eb = NULL; |
| 293 | } | 391 | } |
| @@ -296,14 +394,14 @@ static void drop_node_buffer(struct backref_node *node) | |||
| 296 | static void drop_backref_node(struct backref_cache *tree, | 394 | static void drop_backref_node(struct backref_cache *tree, |
| 297 | struct backref_node *node) | 395 | struct backref_node *node) |
| 298 | { | 396 | { |
| 299 | BUG_ON(!node->lowest); | ||
| 300 | BUG_ON(!list_empty(&node->upper)); | 397 | BUG_ON(!list_empty(&node->upper)); |
| 301 | 398 | ||
| 302 | drop_node_buffer(node); | 399 | drop_node_buffer(node); |
| 400 | list_del(&node->list); | ||
| 303 | list_del(&node->lower); | 401 | list_del(&node->lower); |
| 304 | 402 | if (!RB_EMPTY_NODE(&node->rb_node)) | |
| 305 | rb_erase(&node->rb_node, &tree->rb_root); | 403 | rb_erase(&node->rb_node, &tree->rb_root); |
| 306 | kfree(node); | 404 | free_backref_node(tree, node); |
| 307 | } | 405 | } |
| 308 | 406 | ||
| 309 | /* | 407 | /* |
| @@ -318,27 +416,121 @@ static void remove_backref_node(struct backref_cache *cache, | |||
| 318 | if (!node) | 416 | if (!node) |
| 319 | return; | 417 | return; |
| 320 | 418 | ||
| 321 | BUG_ON(!node->lowest); | 419 | BUG_ON(!node->lowest && !node->detached); |
| 322 | while (!list_empty(&node->upper)) { | 420 | while (!list_empty(&node->upper)) { |
| 323 | edge = list_entry(node->upper.next, struct backref_edge, | 421 | edge = list_entry(node->upper.next, struct backref_edge, |
| 324 | list[LOWER]); | 422 | list[LOWER]); |
| 325 | upper = edge->node[UPPER]; | 423 | upper = edge->node[UPPER]; |
| 326 | list_del(&edge->list[LOWER]); | 424 | list_del(&edge->list[LOWER]); |
| 327 | list_del(&edge->list[UPPER]); | 425 | list_del(&edge->list[UPPER]); |
| 328 | kfree(edge); | 426 | free_backref_edge(cache, edge); |
| 427 | |||
| 428 | if (RB_EMPTY_NODE(&upper->rb_node)) { | ||
| 429 | BUG_ON(!list_empty(&node->upper)); | ||
| 430 | drop_backref_node(cache, node); | ||
| 431 | node = upper; | ||
| 432 | node->lowest = 1; | ||
| 433 | continue; | ||
| 434 | } | ||
| 329 | /* | 435 | /* |
| 330 | * add the node to pending list if no other | 436 | * add the node to leaf node list if no other |
| 331 | * child block cached. | 437 | * child block cached. |
| 332 | */ | 438 | */ |
| 333 | if (list_empty(&upper->lower)) { | 439 | if (list_empty(&upper->lower)) { |
| 334 | list_add_tail(&upper->lower, | 440 | list_add_tail(&upper->lower, &cache->leaves); |
| 335 | &cache->pending[upper->level]); | ||
| 336 | upper->lowest = 1; | 441 | upper->lowest = 1; |
| 337 | } | 442 | } |
| 338 | } | 443 | } |
| 444 | |||
| 339 | drop_backref_node(cache, node); | 445 | drop_backref_node(cache, node); |
| 340 | } | 446 | } |
| 341 | 447 | ||
| 448 | static void update_backref_node(struct backref_cache *cache, | ||
| 449 | struct backref_node *node, u64 bytenr) | ||
| 450 | { | ||
| 451 | struct rb_node *rb_node; | ||
| 452 | rb_erase(&node->rb_node, &cache->rb_root); | ||
| 453 | node->bytenr = bytenr; | ||
| 454 | rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); | ||
| 455 | BUG_ON(rb_node); | ||
| 456 | } | ||
| 457 | |||
| 458 | /* | ||
| 459 | * update backref cache after a transaction commit | ||
| 460 | */ | ||
| 461 | static int update_backref_cache(struct btrfs_trans_handle *trans, | ||
| 462 | struct backref_cache *cache) | ||
| 463 | { | ||
| 464 | struct backref_node *node; | ||
| 465 | int level = 0; | ||
| 466 | |||
| 467 | if (cache->last_trans == 0) { | ||
| 468 | cache->last_trans = trans->transid; | ||
| 469 | return 0; | ||
| 470 | } | ||
| 471 | |||
| 472 | if (cache->last_trans == trans->transid) | ||
| 473 | return 0; | ||
| 474 | |||
| 475 | /* | ||
| 476 | * detached nodes are used to avoid unnecessary backref | ||
| 477 | * lookup. transaction commit changes the extent tree. | ||
| 478 | * so the detached nodes are no longer useful. | ||
| 479 | */ | ||
| 480 | while (!list_empty(&cache->detached)) { | ||
| 481 | node = list_entry(cache->detached.next, | ||
| 482 | struct backref_node, list); | ||
| 483 | remove_backref_node(cache, node); | ||
| 484 | } | ||
| 485 | |||
| 486 | while (!list_empty(&cache->changed)) { | ||
| 487 | node = list_entry(cache->changed.next, | ||
| 488 | struct backref_node, list); | ||
| 489 | list_del_init(&node->list); | ||
| 490 | BUG_ON(node->pending); | ||
| 491 | update_backref_node(cache, node, node->new_bytenr); | ||
| 492 | } | ||
| 493 | |||
| 494 | /* | ||
| 495 | * some nodes can be left in the pending list if there were | ||
| 496 | * errors during processing the pending nodes. | ||
| 497 | */ | ||
| 498 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { | ||
| 499 | list_for_each_entry(node, &cache->pending[level], list) { | ||
| 500 | BUG_ON(!node->pending); | ||
| 501 | if (node->bytenr == node->new_bytenr) | ||
| 502 | continue; | ||
| 503 | update_backref_node(cache, node, node->new_bytenr); | ||
| 504 | } | ||
| 505 | } | ||
| 506 | |||
| 507 | cache->last_trans = 0; | ||
| 508 | return 1; | ||
| 509 | } | ||
| 510 | |||
| 511 | static int should_ignore_root(struct btrfs_root *root) | ||
| 512 | { | ||
| 513 | struct btrfs_root *reloc_root; | ||
| 514 | |||
| 515 | if (!root->ref_cows) | ||
| 516 | return 0; | ||
| 517 | |||
| 518 | reloc_root = root->reloc_root; | ||
| 519 | if (!reloc_root) | ||
| 520 | return 0; | ||
| 521 | |||
| 522 | if (btrfs_root_last_snapshot(&reloc_root->root_item) == | ||
| 523 | root->fs_info->running_transaction->transid - 1) | ||
| 524 | return 0; | ||
| 525 | /* | ||
| 526 | * if there is reloc tree and it was created in previous | ||
| 527 | * transaction backref lookup can find the reloc tree, | ||
| 528 | * so backref node for the fs tree root is useless for | ||
| 529 | * relocation. | ||
| 530 | */ | ||
| 531 | return 1; | ||
| 532 | } | ||
| 533 | |||
| 342 | /* | 534 | /* |
| 343 | * find reloc tree by address of tree root | 535 | * find reloc tree by address of tree root |
| 344 | */ | 536 | */ |
| @@ -453,11 +645,12 @@ int find_inline_backref(struct extent_buffer *leaf, int slot, | |||
| 453 | * for all upper level blocks that directly/indirectly reference the | 645 | * for all upper level blocks that directly/indirectly reference the |
| 454 | * block are also cached. | 646 | * block are also cached. |
| 455 | */ | 647 | */ |
| 456 | static struct backref_node *build_backref_tree(struct reloc_control *rc, | 648 | static noinline_for_stack |
| 457 | struct backref_cache *cache, | 649 | struct backref_node *build_backref_tree(struct reloc_control *rc, |
| 458 | struct btrfs_key *node_key, | 650 | struct btrfs_key *node_key, |
| 459 | int level, u64 bytenr) | 651 | int level, u64 bytenr) |
| 460 | { | 652 | { |
| 653 | struct backref_cache *cache = &rc->backref_cache; | ||
| 461 | struct btrfs_path *path1; | 654 | struct btrfs_path *path1; |
| 462 | struct btrfs_path *path2; | 655 | struct btrfs_path *path2; |
| 463 | struct extent_buffer *eb; | 656 | struct extent_buffer *eb; |
| @@ -473,6 +666,8 @@ static struct backref_node *build_backref_tree(struct reloc_control *rc, | |||
| 473 | unsigned long end; | 666 | unsigned long end; |
| 474 | unsigned long ptr; | 667 | unsigned long ptr; |
| 475 | LIST_HEAD(list); | 668 | LIST_HEAD(list); |
| 669 | LIST_HEAD(useless); | ||
| 670 | int cowonly; | ||
| 476 | int ret; | 671 | int ret; |
| 477 | int err = 0; | 672 | int err = 0; |
| 478 | 673 | ||
| @@ -483,15 +678,13 @@ static struct backref_node *build_backref_tree(struct reloc_control *rc, | |||
| 483 | goto out; | 678 | goto out; |
| 484 | } | 679 | } |
| 485 | 680 | ||
| 486 | node = kmalloc(sizeof(*node), GFP_NOFS); | 681 | node = alloc_backref_node(cache); |
| 487 | if (!node) { | 682 | if (!node) { |
| 488 | err = -ENOMEM; | 683 | err = -ENOMEM; |
| 489 | goto out; | 684 | goto out; |
| 490 | } | 685 | } |
| 491 | 686 | ||
| 492 | backref_node_init(node); | ||
| 493 | node->bytenr = bytenr; | 687 | node->bytenr = bytenr; |
| 494 | node->owner = 0; | ||
| 495 | node->level = level; | 688 | node->level = level; |
| 496 | node->lowest = 1; | 689 | node->lowest = 1; |
| 497 | cur = node; | 690 | cur = node; |
| @@ -587,17 +780,20 @@ again: | |||
| 587 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 | 780 | #ifdef BTRFS_COMPAT_EXTENT_TREE_V0 |
| 588 | if (key.type == BTRFS_SHARED_BLOCK_REF_KEY || | 781 | if (key.type == BTRFS_SHARED_BLOCK_REF_KEY || |
| 589 | key.type == BTRFS_EXTENT_REF_V0_KEY) { | 782 | key.type == BTRFS_EXTENT_REF_V0_KEY) { |
| 590 | if (key.objectid == key.offset && | 783 | if (key.type == BTRFS_EXTENT_REF_V0_KEY) { |
| 591 | key.type == BTRFS_EXTENT_REF_V0_KEY) { | ||
| 592 | struct btrfs_extent_ref_v0 *ref0; | 784 | struct btrfs_extent_ref_v0 *ref0; |
| 593 | ref0 = btrfs_item_ptr(eb, path1->slots[0], | 785 | ref0 = btrfs_item_ptr(eb, path1->slots[0], |
| 594 | struct btrfs_extent_ref_v0); | 786 | struct btrfs_extent_ref_v0); |
| 595 | root = find_tree_root(rc, eb, ref0); | 787 | root = find_tree_root(rc, eb, ref0); |
| 596 | if (root) | 788 | if (!root->ref_cows) |
| 597 | cur->root = root; | 789 | cur->cowonly = 1; |
| 598 | else | 790 | if (key.objectid == key.offset) { |
| 599 | cur->old_root = 1; | 791 | if (root && !should_ignore_root(root)) |
| 600 | break; | 792 | cur->root = root; |
| 793 | else | ||
| 794 | list_add(&cur->list, &useless); | ||
| 795 | break; | ||
| 796 | } | ||
| 601 | } | 797 | } |
| 602 | #else | 798 | #else |
| 603 | BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); | 799 | BUG_ON(key.type == BTRFS_EXTENT_REF_V0_KEY); |
| @@ -614,22 +810,20 @@ again: | |||
| 614 | break; | 810 | break; |
| 615 | } | 811 | } |
| 616 | 812 | ||
| 617 | edge = kzalloc(sizeof(*edge), GFP_NOFS); | 813 | edge = alloc_backref_edge(cache); |
| 618 | if (!edge) { | 814 | if (!edge) { |
| 619 | err = -ENOMEM; | 815 | err = -ENOMEM; |
| 620 | goto out; | 816 | goto out; |
| 621 | } | 817 | } |
| 622 | rb_node = tree_search(&cache->rb_root, key.offset); | 818 | rb_node = tree_search(&cache->rb_root, key.offset); |
| 623 | if (!rb_node) { | 819 | if (!rb_node) { |
| 624 | upper = kmalloc(sizeof(*upper), GFP_NOFS); | 820 | upper = alloc_backref_node(cache); |
| 625 | if (!upper) { | 821 | if (!upper) { |
| 626 | kfree(edge); | 822 | free_backref_edge(cache, edge); |
| 627 | err = -ENOMEM; | 823 | err = -ENOMEM; |
| 628 | goto out; | 824 | goto out; |
| 629 | } | 825 | } |
| 630 | backref_node_init(upper); | ||
| 631 | upper->bytenr = key.offset; | 826 | upper->bytenr = key.offset; |
| 632 | upper->owner = 0; | ||
| 633 | upper->level = cur->level + 1; | 827 | upper->level = cur->level + 1; |
| 634 | /* | 828 | /* |
| 635 | * backrefs for the upper level block isn't | 829 | * backrefs for the upper level block isn't |
| @@ -639,11 +833,12 @@ again: | |||
| 639 | } else { | 833 | } else { |
| 640 | upper = rb_entry(rb_node, struct backref_node, | 834 | upper = rb_entry(rb_node, struct backref_node, |
| 641 | rb_node); | 835 | rb_node); |
| 836 | BUG_ON(!upper->checked); | ||
| 642 | INIT_LIST_HEAD(&edge->list[UPPER]); | 837 | INIT_LIST_HEAD(&edge->list[UPPER]); |
| 643 | } | 838 | } |
| 644 | list_add(&edge->list[LOWER], &cur->upper); | 839 | list_add_tail(&edge->list[LOWER], &cur->upper); |
| 645 | edge->node[UPPER] = upper; | ||
| 646 | edge->node[LOWER] = cur; | 840 | edge->node[LOWER] = cur; |
| 841 | edge->node[UPPER] = upper; | ||
| 647 | 842 | ||
| 648 | goto next; | 843 | goto next; |
| 649 | } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { | 844 | } else if (key.type != BTRFS_TREE_BLOCK_REF_KEY) { |
| @@ -657,11 +852,17 @@ again: | |||
| 657 | goto out; | 852 | goto out; |
| 658 | } | 853 | } |
| 659 | 854 | ||
| 855 | if (!root->ref_cows) | ||
| 856 | cur->cowonly = 1; | ||
| 857 | |||
| 660 | if (btrfs_root_level(&root->root_item) == cur->level) { | 858 | if (btrfs_root_level(&root->root_item) == cur->level) { |
| 661 | /* tree root */ | 859 | /* tree root */ |
| 662 | BUG_ON(btrfs_root_bytenr(&root->root_item) != | 860 | BUG_ON(btrfs_root_bytenr(&root->root_item) != |
| 663 | cur->bytenr); | 861 | cur->bytenr); |
| 664 | cur->root = root; | 862 | if (should_ignore_root(root)) |
| 863 | list_add(&cur->list, &useless); | ||
| 864 | else | ||
| 865 | cur->root = root; | ||
| 665 | break; | 866 | break; |
| 666 | } | 867 | } |
| 667 | 868 | ||
| @@ -692,11 +893,14 @@ again: | |||
| 692 | if (!path2->nodes[level]) { | 893 | if (!path2->nodes[level]) { |
| 693 | BUG_ON(btrfs_root_bytenr(&root->root_item) != | 894 | BUG_ON(btrfs_root_bytenr(&root->root_item) != |
| 694 | lower->bytenr); | 895 | lower->bytenr); |
| 695 | lower->root = root; | 896 | if (should_ignore_root(root)) |
| 897 | list_add(&lower->list, &useless); | ||
| 898 | else | ||
| 899 | lower->root = root; | ||
| 696 | break; | 900 | break; |
| 697 | } | 901 | } |
| 698 | 902 | ||
| 699 | edge = kzalloc(sizeof(*edge), GFP_NOFS); | 903 | edge = alloc_backref_edge(cache); |
| 700 | if (!edge) { | 904 | if (!edge) { |
| 701 | err = -ENOMEM; | 905 | err = -ENOMEM; |
| 702 | goto out; | 906 | goto out; |
| @@ -705,16 +909,17 @@ again: | |||
| 705 | eb = path2->nodes[level]; | 909 | eb = path2->nodes[level]; |
| 706 | rb_node = tree_search(&cache->rb_root, eb->start); | 910 | rb_node = tree_search(&cache->rb_root, eb->start); |
| 707 | if (!rb_node) { | 911 | if (!rb_node) { |
| 708 | upper = kmalloc(sizeof(*upper), GFP_NOFS); | 912 | upper = alloc_backref_node(cache); |
| 709 | if (!upper) { | 913 | if (!upper) { |
| 710 | kfree(edge); | 914 | free_backref_edge(cache, edge); |
| 711 | err = -ENOMEM; | 915 | err = -ENOMEM; |
| 712 | goto out; | 916 | goto out; |
| 713 | } | 917 | } |
| 714 | backref_node_init(upper); | ||
| 715 | upper->bytenr = eb->start; | 918 | upper->bytenr = eb->start; |
| 716 | upper->owner = btrfs_header_owner(eb); | 919 | upper->owner = btrfs_header_owner(eb); |
| 717 | upper->level = lower->level + 1; | 920 | upper->level = lower->level + 1; |
| 921 | if (!root->ref_cows) | ||
| 922 | upper->cowonly = 1; | ||
| 718 | 923 | ||
| 719 | /* | 924 | /* |
| 720 | * if we know the block isn't shared | 925 | * if we know the block isn't shared |
| @@ -744,10 +949,12 @@ again: | |||
| 744 | rb_node); | 949 | rb_node); |
| 745 | BUG_ON(!upper->checked); | 950 | BUG_ON(!upper->checked); |
| 746 | INIT_LIST_HEAD(&edge->list[UPPER]); | 951 | INIT_LIST_HEAD(&edge->list[UPPER]); |
| 952 | if (!upper->owner) | ||
| 953 | upper->owner = btrfs_header_owner(eb); | ||
| 747 | } | 954 | } |
| 748 | list_add_tail(&edge->list[LOWER], &lower->upper); | 955 | list_add_tail(&edge->list[LOWER], &lower->upper); |
| 749 | edge->node[UPPER] = upper; | ||
| 750 | edge->node[LOWER] = lower; | 956 | edge->node[LOWER] = lower; |
| 957 | edge->node[UPPER] = upper; | ||
| 751 | 958 | ||
| 752 | if (rb_node) | 959 | if (rb_node) |
| 753 | break; | 960 | break; |
| @@ -785,8 +992,13 @@ next: | |||
| 785 | * into the cache. | 992 | * into the cache. |
| 786 | */ | 993 | */ |
| 787 | BUG_ON(!node->checked); | 994 | BUG_ON(!node->checked); |
| 788 | rb_node = tree_insert(&cache->rb_root, node->bytenr, &node->rb_node); | 995 | cowonly = node->cowonly; |
| 789 | BUG_ON(rb_node); | 996 | if (!cowonly) { |
| 997 | rb_node = tree_insert(&cache->rb_root, node->bytenr, | ||
| 998 | &node->rb_node); | ||
| 999 | BUG_ON(rb_node); | ||
| 1000 | list_add_tail(&node->lower, &cache->leaves); | ||
| 1001 | } | ||
| 790 | 1002 | ||
| 791 | list_for_each_entry(edge, &node->upper, list[LOWER]) | 1003 | list_for_each_entry(edge, &node->upper, list[LOWER]) |
| 792 | list_add_tail(&edge->list[UPPER], &list); | 1004 | list_add_tail(&edge->list[UPPER], &list); |
| @@ -795,6 +1007,14 @@ next: | |||
| 795 | edge = list_entry(list.next, struct backref_edge, list[UPPER]); | 1007 | edge = list_entry(list.next, struct backref_edge, list[UPPER]); |
| 796 | list_del_init(&edge->list[UPPER]); | 1008 | list_del_init(&edge->list[UPPER]); |
| 797 | upper = edge->node[UPPER]; | 1009 | upper = edge->node[UPPER]; |
| 1010 | if (upper->detached) { | ||
| 1011 | list_del(&edge->list[LOWER]); | ||
| 1012 | lower = edge->node[LOWER]; | ||
| 1013 | free_backref_edge(cache, edge); | ||
| 1014 | if (list_empty(&lower->upper)) | ||
| 1015 | list_add(&lower->list, &useless); | ||
| 1016 | continue; | ||
| 1017 | } | ||
| 798 | 1018 | ||
| 799 | if (!RB_EMPTY_NODE(&upper->rb_node)) { | 1019 | if (!RB_EMPTY_NODE(&upper->rb_node)) { |
| 800 | if (upper->lowest) { | 1020 | if (upper->lowest) { |
| @@ -807,25 +1027,69 @@ next: | |||
| 807 | } | 1027 | } |
| 808 | 1028 | ||
| 809 | BUG_ON(!upper->checked); | 1029 | BUG_ON(!upper->checked); |
| 810 | rb_node = tree_insert(&cache->rb_root, upper->bytenr, | 1030 | BUG_ON(cowonly != upper->cowonly); |
| 811 | &upper->rb_node); | 1031 | if (!cowonly) { |
| 812 | BUG_ON(rb_node); | 1032 | rb_node = tree_insert(&cache->rb_root, upper->bytenr, |
| 1033 | &upper->rb_node); | ||
| 1034 | BUG_ON(rb_node); | ||
| 1035 | } | ||
| 813 | 1036 | ||
| 814 | list_add_tail(&edge->list[UPPER], &upper->lower); | 1037 | list_add_tail(&edge->list[UPPER], &upper->lower); |
| 815 | 1038 | ||
| 816 | list_for_each_entry(edge, &upper->upper, list[LOWER]) | 1039 | list_for_each_entry(edge, &upper->upper, list[LOWER]) |
| 817 | list_add_tail(&edge->list[UPPER], &list); | 1040 | list_add_tail(&edge->list[UPPER], &list); |
| 818 | } | 1041 | } |
| 1042 | /* | ||
| 1043 | * process useless backref nodes. backref nodes for tree leaves | ||
| 1044 | * are deleted from the cache. backref nodes for upper level | ||
| 1045 | * tree blocks are left in the cache to avoid unnecessary backref | ||
| 1046 | * lookup. | ||
| 1047 | */ | ||
| 1048 | while (!list_empty(&useless)) { | ||
| 1049 | upper = list_entry(useless.next, struct backref_node, list); | ||
| 1050 | list_del_init(&upper->list); | ||
| 1051 | BUG_ON(!list_empty(&upper->upper)); | ||
| 1052 | if (upper == node) | ||
| 1053 | node = NULL; | ||
| 1054 | if (upper->lowest) { | ||
| 1055 | list_del_init(&upper->lower); | ||
| 1056 | upper->lowest = 0; | ||
| 1057 | } | ||
| 1058 | while (!list_empty(&upper->lower)) { | ||
| 1059 | edge = list_entry(upper->lower.next, | ||
| 1060 | struct backref_edge, list[UPPER]); | ||
| 1061 | list_del(&edge->list[UPPER]); | ||
| 1062 | list_del(&edge->list[LOWER]); | ||
| 1063 | lower = edge->node[LOWER]; | ||
| 1064 | free_backref_edge(cache, edge); | ||
| 1065 | |||
| 1066 | if (list_empty(&lower->upper)) | ||
| 1067 | list_add(&lower->list, &useless); | ||
| 1068 | } | ||
| 1069 | __mark_block_processed(rc, upper); | ||
| 1070 | if (upper->level > 0) { | ||
| 1071 | list_add(&upper->list, &cache->detached); | ||
| 1072 | upper->detached = 1; | ||
| 1073 | } else { | ||
| 1074 | rb_erase(&upper->rb_node, &cache->rb_root); | ||
| 1075 | free_backref_node(cache, upper); | ||
| 1076 | } | ||
| 1077 | } | ||
| 819 | out: | 1078 | out: |
| 820 | btrfs_free_path(path1); | 1079 | btrfs_free_path(path1); |
| 821 | btrfs_free_path(path2); | 1080 | btrfs_free_path(path2); |
| 822 | if (err) { | 1081 | if (err) { |
| 823 | INIT_LIST_HEAD(&list); | 1082 | while (!list_empty(&useless)) { |
| 1083 | lower = list_entry(useless.next, | ||
| 1084 | struct backref_node, upper); | ||
| 1085 | list_del_init(&lower->upper); | ||
| 1086 | } | ||
| 824 | upper = node; | 1087 | upper = node; |
| 1088 | INIT_LIST_HEAD(&list); | ||
| 825 | while (upper) { | 1089 | while (upper) { |
| 826 | if (RB_EMPTY_NODE(&upper->rb_node)) { | 1090 | if (RB_EMPTY_NODE(&upper->rb_node)) { |
| 827 | list_splice_tail(&upper->upper, &list); | 1091 | list_splice_tail(&upper->upper, &list); |
| 828 | kfree(upper); | 1092 | free_backref_node(cache, upper); |
| 829 | } | 1093 | } |
| 830 | 1094 | ||
| 831 | if (list_empty(&list)) | 1095 | if (list_empty(&list)) |
| @@ -833,15 +1097,104 @@ out: | |||
| 833 | 1097 | ||
| 834 | edge = list_entry(list.next, struct backref_edge, | 1098 | edge = list_entry(list.next, struct backref_edge, |
| 835 | list[LOWER]); | 1099 | list[LOWER]); |
| 1100 | list_del(&edge->list[LOWER]); | ||
| 836 | upper = edge->node[UPPER]; | 1101 | upper = edge->node[UPPER]; |
| 837 | kfree(edge); | 1102 | free_backref_edge(cache, edge); |
| 838 | } | 1103 | } |
| 839 | return ERR_PTR(err); | 1104 | return ERR_PTR(err); |
| 840 | } | 1105 | } |
| 1106 | BUG_ON(node && node->detached); | ||
| 841 | return node; | 1107 | return node; |
| 842 | } | 1108 | } |
| 843 | 1109 | ||
| 844 | /* | 1110 | /* |
| 1111 | * helper to add backref node for the newly created snapshot. | ||
| 1112 | * the backref node is created by cloning backref node that | ||
| 1113 | * corresponds to root of source tree | ||
| 1114 | */ | ||
| 1115 | static int clone_backref_node(struct btrfs_trans_handle *trans, | ||
| 1116 | struct reloc_control *rc, | ||
| 1117 | struct btrfs_root *src, | ||
| 1118 | struct btrfs_root *dest) | ||
| 1119 | { | ||
| 1120 | struct btrfs_root *reloc_root = src->reloc_root; | ||
| 1121 | struct backref_cache *cache = &rc->backref_cache; | ||
| 1122 | struct backref_node *node = NULL; | ||
| 1123 | struct backref_node *new_node; | ||
| 1124 | struct backref_edge *edge; | ||
| 1125 | struct backref_edge *new_edge; | ||
| 1126 | struct rb_node *rb_node; | ||
| 1127 | |||
| 1128 | if (cache->last_trans > 0) | ||
| 1129 | update_backref_cache(trans, cache); | ||
| 1130 | |||
| 1131 | rb_node = tree_search(&cache->rb_root, src->commit_root->start); | ||
| 1132 | if (rb_node) { | ||
| 1133 | node = rb_entry(rb_node, struct backref_node, rb_node); | ||
| 1134 | if (node->detached) | ||
| 1135 | node = NULL; | ||
| 1136 | else | ||
| 1137 | BUG_ON(node->new_bytenr != reloc_root->node->start); | ||
| 1138 | } | ||
| 1139 | |||
| 1140 | if (!node) { | ||
| 1141 | rb_node = tree_search(&cache->rb_root, | ||
| 1142 | reloc_root->commit_root->start); | ||
| 1143 | if (rb_node) { | ||
| 1144 | node = rb_entry(rb_node, struct backref_node, | ||
| 1145 | rb_node); | ||
| 1146 | BUG_ON(node->detached); | ||
| 1147 | } | ||
| 1148 | } | ||
| 1149 | |||
| 1150 | if (!node) | ||
| 1151 | return 0; | ||
| 1152 | |||
| 1153 | new_node = alloc_backref_node(cache); | ||
| 1154 | if (!new_node) | ||
| 1155 | return -ENOMEM; | ||
| 1156 | |||
| 1157 | new_node->bytenr = dest->node->start; | ||
| 1158 | new_node->level = node->level; | ||
| 1159 | new_node->lowest = node->lowest; | ||
| 1160 | new_node->root = dest; | ||
| 1161 | |||
| 1162 | if (!node->lowest) { | ||
| 1163 | list_for_each_entry(edge, &node->lower, list[UPPER]) { | ||
| 1164 | new_edge = alloc_backref_edge(cache); | ||
| 1165 | if (!new_edge) | ||
| 1166 | goto fail; | ||
| 1167 | |||
| 1168 | new_edge->node[UPPER] = new_node; | ||
| 1169 | new_edge->node[LOWER] = edge->node[LOWER]; | ||
| 1170 | list_add_tail(&new_edge->list[UPPER], | ||
| 1171 | &new_node->lower); | ||
| 1172 | } | ||
| 1173 | } | ||
| 1174 | |||
| 1175 | rb_node = tree_insert(&cache->rb_root, new_node->bytenr, | ||
| 1176 | &new_node->rb_node); | ||
| 1177 | BUG_ON(rb_node); | ||
| 1178 | |||
| 1179 | if (!new_node->lowest) { | ||
| 1180 | list_for_each_entry(new_edge, &new_node->lower, list[UPPER]) { | ||
| 1181 | list_add_tail(&new_edge->list[LOWER], | ||
| 1182 | &new_edge->node[LOWER]->upper); | ||
| 1183 | } | ||
| 1184 | } | ||
| 1185 | return 0; | ||
| 1186 | fail: | ||
| 1187 | while (!list_empty(&new_node->lower)) { | ||
| 1188 | new_edge = list_entry(new_node->lower.next, | ||
| 1189 | struct backref_edge, list[UPPER]); | ||
| 1190 | list_del(&new_edge->list[UPPER]); | ||
| 1191 | free_backref_edge(cache, new_edge); | ||
| 1192 | } | ||
| 1193 | free_backref_node(cache, new_node); | ||
| 1194 | return -ENOMEM; | ||
| 1195 | } | ||
| 1196 | |||
| 1197 | /* | ||
| 845 | * helper to add 'address of tree root -> reloc tree' mapping | 1198 | * helper to add 'address of tree root -> reloc tree' mapping |
| 846 | */ | 1199 | */ |
| 847 | static int __add_reloc_root(struct btrfs_root *root) | 1200 | static int __add_reloc_root(struct btrfs_root *root) |
| @@ -901,12 +1254,8 @@ static int __update_reloc_root(struct btrfs_root *root, int del) | |||
| 901 | return 0; | 1254 | return 0; |
| 902 | } | 1255 | } |
| 903 | 1256 | ||
| 904 | /* | 1257 | static struct btrfs_root *create_reloc_root(struct btrfs_trans_handle *trans, |
| 905 | * create reloc tree for a given fs tree. reloc tree is just a | 1258 | struct btrfs_root *root, u64 objectid) |
| 906 | * snapshot of the fs tree with special root objectid. | ||
| 907 | */ | ||
| 908 | int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, | ||
| 909 | struct btrfs_root *root) | ||
| 910 | { | 1259 | { |
| 911 | struct btrfs_root *reloc_root; | 1260 | struct btrfs_root *reloc_root; |
| 912 | struct extent_buffer *eb; | 1261 | struct extent_buffer *eb; |
| @@ -914,36 +1263,45 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, | |||
| 914 | struct btrfs_key root_key; | 1263 | struct btrfs_key root_key; |
| 915 | int ret; | 1264 | int ret; |
| 916 | 1265 | ||
| 917 | if (root->reloc_root) { | ||
| 918 | reloc_root = root->reloc_root; | ||
| 919 | reloc_root->last_trans = trans->transid; | ||
| 920 | return 0; | ||
| 921 | } | ||
| 922 | |||
| 923 | if (!root->fs_info->reloc_ctl || | ||
| 924 | !root->fs_info->reloc_ctl->create_reloc_root || | ||
| 925 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) | ||
| 926 | return 0; | ||
| 927 | |||
| 928 | root_item = kmalloc(sizeof(*root_item), GFP_NOFS); | 1266 | root_item = kmalloc(sizeof(*root_item), GFP_NOFS); |
| 929 | BUG_ON(!root_item); | 1267 | BUG_ON(!root_item); |
| 930 | 1268 | ||
| 931 | root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; | 1269 | root_key.objectid = BTRFS_TREE_RELOC_OBJECTID; |
| 932 | root_key.type = BTRFS_ROOT_ITEM_KEY; | 1270 | root_key.type = BTRFS_ROOT_ITEM_KEY; |
| 933 | root_key.offset = root->root_key.objectid; | 1271 | root_key.offset = objectid; |
| 934 | 1272 | ||
| 935 | ret = btrfs_copy_root(trans, root, root->commit_root, &eb, | 1273 | if (root->root_key.objectid == objectid) { |
| 936 | BTRFS_TREE_RELOC_OBJECTID); | 1274 | /* called by btrfs_init_reloc_root */ |
| 937 | BUG_ON(ret); | 1275 | ret = btrfs_copy_root(trans, root, root->commit_root, &eb, |
| 1276 | BTRFS_TREE_RELOC_OBJECTID); | ||
| 1277 | BUG_ON(ret); | ||
| 1278 | |||
| 1279 | btrfs_set_root_last_snapshot(&root->root_item, | ||
| 1280 | trans->transid - 1); | ||
| 1281 | } else { | ||
| 1282 | /* | ||
| 1283 | * called by btrfs_reloc_post_snapshot_hook. | ||
| 1284 | * the source tree is a reloc tree, all tree blocks | ||
| 1285 | * modified after it was created have RELOC flag | ||
| 1286 | * set in their headers. so it's OK to not update | ||
| 1287 | * the 'last_snapshot'. | ||
| 1288 | */ | ||
| 1289 | ret = btrfs_copy_root(trans, root, root->node, &eb, | ||
| 1290 | BTRFS_TREE_RELOC_OBJECTID); | ||
| 1291 | BUG_ON(ret); | ||
| 1292 | } | ||
| 938 | 1293 | ||
| 939 | btrfs_set_root_last_snapshot(&root->root_item, trans->transid - 1); | ||
| 940 | memcpy(root_item, &root->root_item, sizeof(*root_item)); | 1294 | memcpy(root_item, &root->root_item, sizeof(*root_item)); |
| 941 | btrfs_set_root_refs(root_item, 1); | ||
| 942 | btrfs_set_root_bytenr(root_item, eb->start); | 1295 | btrfs_set_root_bytenr(root_item, eb->start); |
| 943 | btrfs_set_root_level(root_item, btrfs_header_level(eb)); | 1296 | btrfs_set_root_level(root_item, btrfs_header_level(eb)); |
| 944 | btrfs_set_root_generation(root_item, trans->transid); | 1297 | btrfs_set_root_generation(root_item, trans->transid); |
| 945 | memset(&root_item->drop_progress, 0, sizeof(struct btrfs_disk_key)); | 1298 | |
| 946 | root_item->drop_level = 0; | 1299 | if (root->root_key.objectid == objectid) { |
| 1300 | btrfs_set_root_refs(root_item, 0); | ||
| 1301 | memset(&root_item->drop_progress, 0, | ||
| 1302 | sizeof(struct btrfs_disk_key)); | ||
| 1303 | root_item->drop_level = 0; | ||
| 1304 | } | ||
| 947 | 1305 | ||
| 948 | btrfs_tree_unlock(eb); | 1306 | btrfs_tree_unlock(eb); |
| 949 | free_extent_buffer(eb); | 1307 | free_extent_buffer(eb); |
| @@ -957,6 +1315,37 @@ int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, | |||
| 957 | &root_key); | 1315 | &root_key); |
| 958 | BUG_ON(IS_ERR(reloc_root)); | 1316 | BUG_ON(IS_ERR(reloc_root)); |
| 959 | reloc_root->last_trans = trans->transid; | 1317 | reloc_root->last_trans = trans->transid; |
| 1318 | return reloc_root; | ||
| 1319 | } | ||
| 1320 | |||
| 1321 | /* | ||
| 1322 | * create reloc tree for a given fs tree. reloc tree is just a | ||
| 1323 | * snapshot of the fs tree with special root objectid. | ||
| 1324 | */ | ||
| 1325 | int btrfs_init_reloc_root(struct btrfs_trans_handle *trans, | ||
| 1326 | struct btrfs_root *root) | ||
| 1327 | { | ||
| 1328 | struct btrfs_root *reloc_root; | ||
| 1329 | struct reloc_control *rc = root->fs_info->reloc_ctl; | ||
| 1330 | int clear_rsv = 0; | ||
| 1331 | |||
| 1332 | if (root->reloc_root) { | ||
| 1333 | reloc_root = root->reloc_root; | ||
| 1334 | reloc_root->last_trans = trans->transid; | ||
| 1335 | return 0; | ||
| 1336 | } | ||
| 1337 | |||
| 1338 | if (!rc || !rc->create_reloc_tree || | ||
| 1339 | root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) | ||
| 1340 | return 0; | ||
| 1341 | |||
| 1342 | if (!trans->block_rsv) { | ||
| 1343 | trans->block_rsv = rc->block_rsv; | ||
| 1344 | clear_rsv = 1; | ||
| 1345 | } | ||
| 1346 | reloc_root = create_reloc_root(trans, root, root->root_key.objectid); | ||
| 1347 | if (clear_rsv) | ||
| 1348 | trans->block_rsv = NULL; | ||
| 960 | 1349 | ||
| 961 | __add_reloc_root(reloc_root); | 1350 | __add_reloc_root(reloc_root); |
| 962 | root->reloc_root = reloc_root; | 1351 | root->reloc_root = reloc_root; |
| @@ -980,7 +1369,8 @@ int btrfs_update_reloc_root(struct btrfs_trans_handle *trans, | |||
| 980 | reloc_root = root->reloc_root; | 1369 | reloc_root = root->reloc_root; |
| 981 | root_item = &reloc_root->root_item; | 1370 | root_item = &reloc_root->root_item; |
| 982 | 1371 | ||
| 983 | if (btrfs_root_refs(root_item) == 0) { | 1372 | if (root->fs_info->reloc_ctl->merge_reloc_tree && |
| 1373 | btrfs_root_refs(root_item) == 0) { | ||
| 984 | root->reloc_root = NULL; | 1374 | root->reloc_root = NULL; |
| 985 | del = 1; | 1375 | del = 1; |
| 986 | } | 1376 | } |
| @@ -1102,8 +1492,7 @@ static int get_new_location(struct inode *reloc_inode, u64 *new_bytenr, | |||
| 1102 | goto out; | 1492 | goto out; |
| 1103 | } | 1493 | } |
| 1104 | 1494 | ||
| 1105 | if (new_bytenr) | 1495 | *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); |
| 1106 | *new_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
| 1107 | ret = 0; | 1496 | ret = 0; |
| 1108 | out: | 1497 | out: |
| 1109 | btrfs_free_path(path); | 1498 | btrfs_free_path(path); |
| @@ -1114,19 +1503,18 @@ out: | |||
| 1114 | * update file extent items in the tree leaf to point to | 1503 | * update file extent items in the tree leaf to point to |
| 1115 | * the new locations. | 1504 | * the new locations. |
| 1116 | */ | 1505 | */ |
| 1117 | static int replace_file_extents(struct btrfs_trans_handle *trans, | 1506 | static noinline_for_stack |
| 1118 | struct reloc_control *rc, | 1507 | int replace_file_extents(struct btrfs_trans_handle *trans, |
| 1119 | struct btrfs_root *root, | 1508 | struct reloc_control *rc, |
| 1120 | struct extent_buffer *leaf, | 1509 | struct btrfs_root *root, |
| 1121 | struct list_head *inode_list) | 1510 | struct extent_buffer *leaf) |
| 1122 | { | 1511 | { |
| 1123 | struct btrfs_key key; | 1512 | struct btrfs_key key; |
| 1124 | struct btrfs_file_extent_item *fi; | 1513 | struct btrfs_file_extent_item *fi; |
| 1125 | struct inode *inode = NULL; | 1514 | struct inode *inode = NULL; |
| 1126 | struct inodevec *ivec = NULL; | ||
| 1127 | u64 parent; | 1515 | u64 parent; |
| 1128 | u64 bytenr; | 1516 | u64 bytenr; |
| 1129 | u64 new_bytenr; | 1517 | u64 new_bytenr = 0; |
| 1130 | u64 num_bytes; | 1518 | u64 num_bytes; |
| 1131 | u64 end; | 1519 | u64 end; |
| 1132 | u32 nritems; | 1520 | u32 nritems; |
| @@ -1166,21 +1554,12 @@ static int replace_file_extents(struct btrfs_trans_handle *trans, | |||
| 1166 | * to complete and drop the extent cache | 1554 | * to complete and drop the extent cache |
| 1167 | */ | 1555 | */ |
| 1168 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { | 1556 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) { |
| 1169 | if (!ivec || ivec->nr == INODEVEC_SIZE) { | ||
| 1170 | ivec = kmalloc(sizeof(*ivec), GFP_NOFS); | ||
| 1171 | BUG_ON(!ivec); | ||
| 1172 | ivec->nr = 0; | ||
| 1173 | list_add_tail(&ivec->list, inode_list); | ||
| 1174 | } | ||
| 1175 | if (first) { | 1557 | if (first) { |
| 1176 | inode = find_next_inode(root, key.objectid); | 1558 | inode = find_next_inode(root, key.objectid); |
| 1177 | if (inode) | ||
| 1178 | ivec->inode[ivec->nr++] = inode; | ||
| 1179 | first = 0; | 1559 | first = 0; |
| 1180 | } else if (inode && inode->i_ino < key.objectid) { | 1560 | } else if (inode && inode->i_ino < key.objectid) { |
| 1561 | btrfs_add_delayed_iput(inode); | ||
| 1181 | inode = find_next_inode(root, key.objectid); | 1562 | inode = find_next_inode(root, key.objectid); |
| 1182 | if (inode) | ||
| 1183 | ivec->inode[ivec->nr++] = inode; | ||
| 1184 | } | 1563 | } |
| 1185 | if (inode && inode->i_ino == key.objectid) { | 1564 | if (inode && inode->i_ino == key.objectid) { |
| 1186 | end = key.offset + | 1565 | end = key.offset + |
| @@ -1204,8 +1583,10 @@ static int replace_file_extents(struct btrfs_trans_handle *trans, | |||
| 1204 | 1583 | ||
| 1205 | ret = get_new_location(rc->data_inode, &new_bytenr, | 1584 | ret = get_new_location(rc->data_inode, &new_bytenr, |
| 1206 | bytenr, num_bytes); | 1585 | bytenr, num_bytes); |
| 1207 | if (ret > 0) | 1586 | if (ret > 0) { |
| 1587 | WARN_ON(1); | ||
| 1208 | continue; | 1588 | continue; |
| 1589 | } | ||
| 1209 | BUG_ON(ret < 0); | 1590 | BUG_ON(ret < 0); |
| 1210 | 1591 | ||
| 1211 | btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr); | 1592 | btrfs_set_file_extent_disk_bytenr(leaf, fi, new_bytenr); |
| @@ -1225,6 +1606,8 @@ static int replace_file_extents(struct btrfs_trans_handle *trans, | |||
| 1225 | } | 1606 | } |
| 1226 | if (dirty) | 1607 | if (dirty) |
| 1227 | btrfs_mark_buffer_dirty(leaf); | 1608 | btrfs_mark_buffer_dirty(leaf); |
| 1609 | if (inode) | ||
| 1610 | btrfs_add_delayed_iput(inode); | ||
| 1228 | return 0; | 1611 | return 0; |
| 1229 | } | 1612 | } |
| 1230 | 1613 | ||
| @@ -1248,11 +1631,11 @@ int memcmp_node_keys(struct extent_buffer *eb, int slot, | |||
| 1248 | * if no block got replaced, 0 is returned. if there are other | 1631 | * if no block got replaced, 0 is returned. if there are other |
| 1249 | * errors, a negative error number is returned. | 1632 | * errors, a negative error number is returned. |
| 1250 | */ | 1633 | */ |
| 1251 | static int replace_path(struct btrfs_trans_handle *trans, | 1634 | static noinline_for_stack |
| 1252 | struct btrfs_root *dest, struct btrfs_root *src, | 1635 | int replace_path(struct btrfs_trans_handle *trans, |
| 1253 | struct btrfs_path *path, struct btrfs_key *next_key, | 1636 | struct btrfs_root *dest, struct btrfs_root *src, |
| 1254 | struct extent_buffer **leaf, | 1637 | struct btrfs_path *path, struct btrfs_key *next_key, |
| 1255 | int lowest_level, int max_level) | 1638 | int lowest_level, int max_level) |
| 1256 | { | 1639 | { |
| 1257 | struct extent_buffer *eb; | 1640 | struct extent_buffer *eb; |
| 1258 | struct extent_buffer *parent; | 1641 | struct extent_buffer *parent; |
| @@ -1263,16 +1646,16 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
| 1263 | u64 new_ptr_gen; | 1646 | u64 new_ptr_gen; |
| 1264 | u64 last_snapshot; | 1647 | u64 last_snapshot; |
| 1265 | u32 blocksize; | 1648 | u32 blocksize; |
| 1649 | int cow = 0; | ||
| 1266 | int level; | 1650 | int level; |
| 1267 | int ret; | 1651 | int ret; |
| 1268 | int slot; | 1652 | int slot; |
| 1269 | 1653 | ||
| 1270 | BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); | 1654 | BUG_ON(src->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID); |
| 1271 | BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID); | 1655 | BUG_ON(dest->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID); |
| 1272 | BUG_ON(lowest_level > 1 && leaf); | ||
| 1273 | 1656 | ||
| 1274 | last_snapshot = btrfs_root_last_snapshot(&src->root_item); | 1657 | last_snapshot = btrfs_root_last_snapshot(&src->root_item); |
| 1275 | 1658 | again: | |
| 1276 | slot = path->slots[lowest_level]; | 1659 | slot = path->slots[lowest_level]; |
| 1277 | btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot); | 1660 | btrfs_node_key_to_cpu(path->nodes[lowest_level], &key, slot); |
| 1278 | 1661 | ||
| @@ -1286,8 +1669,10 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
| 1286 | return 0; | 1669 | return 0; |
| 1287 | } | 1670 | } |
| 1288 | 1671 | ||
| 1289 | ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb); | 1672 | if (cow) { |
| 1290 | BUG_ON(ret); | 1673 | ret = btrfs_cow_block(trans, dest, eb, NULL, 0, &eb); |
| 1674 | BUG_ON(ret); | ||
| 1675 | } | ||
| 1291 | btrfs_set_lock_blocking(eb); | 1676 | btrfs_set_lock_blocking(eb); |
| 1292 | 1677 | ||
| 1293 | if (next_key) { | 1678 | if (next_key) { |
| @@ -1331,7 +1716,7 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
| 1331 | 1716 | ||
| 1332 | if (new_bytenr == 0 || old_ptr_gen > last_snapshot || | 1717 | if (new_bytenr == 0 || old_ptr_gen > last_snapshot || |
| 1333 | memcmp_node_keys(parent, slot, path, level)) { | 1718 | memcmp_node_keys(parent, slot, path, level)) { |
| 1334 | if (level <= lowest_level && !leaf) { | 1719 | if (level <= lowest_level) { |
| 1335 | ret = 0; | 1720 | ret = 0; |
| 1336 | break; | 1721 | break; |
| 1337 | } | 1722 | } |
| @@ -1339,16 +1724,12 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
| 1339 | eb = read_tree_block(dest, old_bytenr, blocksize, | 1724 | eb = read_tree_block(dest, old_bytenr, blocksize, |
| 1340 | old_ptr_gen); | 1725 | old_ptr_gen); |
| 1341 | btrfs_tree_lock(eb); | 1726 | btrfs_tree_lock(eb); |
| 1342 | ret = btrfs_cow_block(trans, dest, eb, parent, | 1727 | if (cow) { |
| 1343 | slot, &eb); | 1728 | ret = btrfs_cow_block(trans, dest, eb, parent, |
| 1344 | BUG_ON(ret); | 1729 | slot, &eb); |
| 1345 | btrfs_set_lock_blocking(eb); | 1730 | BUG_ON(ret); |
| 1346 | |||
| 1347 | if (level <= lowest_level) { | ||
| 1348 | *leaf = eb; | ||
| 1349 | ret = 0; | ||
| 1350 | break; | ||
| 1351 | } | 1731 | } |
| 1732 | btrfs_set_lock_blocking(eb); | ||
| 1352 | 1733 | ||
| 1353 | btrfs_tree_unlock(parent); | 1734 | btrfs_tree_unlock(parent); |
| 1354 | free_extent_buffer(parent); | 1735 | free_extent_buffer(parent); |
| @@ -1357,6 +1738,13 @@ static int replace_path(struct btrfs_trans_handle *trans, | |||
| 1357 | continue; | 1738 | continue; |
| 1358 | } | 1739 | } |
| 1359 | 1740 | ||
| 1741 | if (!cow) { | ||
| 1742 | btrfs_tree_unlock(parent); | ||
| 1743 | free_extent_buffer(parent); | ||
| 1744 | cow = 1; | ||
| 1745 | goto again; | ||
| 1746 | } | ||
| 1747 | |||
| 1360 | btrfs_node_key_to_cpu(path->nodes[level], &key, | 1748 | btrfs_node_key_to_cpu(path->nodes[level], &key, |
| 1361 | path->slots[level]); | 1749 | path->slots[level]); |
| 1362 | btrfs_release_path(src, path); | 1750 | btrfs_release_path(src, path); |
| @@ -1562,20 +1950,6 @@ static int invalidate_extent_cache(struct btrfs_root *root, | |||
| 1562 | return 0; | 1950 | return 0; |
| 1563 | } | 1951 | } |
| 1564 | 1952 | ||
| 1565 | static void put_inodes(struct list_head *list) | ||
| 1566 | { | ||
| 1567 | struct inodevec *ivec; | ||
| 1568 | while (!list_empty(list)) { | ||
| 1569 | ivec = list_entry(list->next, struct inodevec, list); | ||
| 1570 | list_del(&ivec->list); | ||
| 1571 | while (ivec->nr > 0) { | ||
| 1572 | ivec->nr--; | ||
| 1573 | iput(ivec->inode[ivec->nr]); | ||
| 1574 | } | ||
| 1575 | kfree(ivec); | ||
| 1576 | } | ||
| 1577 | } | ||
| 1578 | |||
| 1579 | static int find_next_key(struct btrfs_path *path, int level, | 1953 | static int find_next_key(struct btrfs_path *path, int level, |
| 1580 | struct btrfs_key *key) | 1954 | struct btrfs_key *key) |
| 1581 | 1955 | ||
| @@ -1608,13 +1982,14 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
| 1608 | struct btrfs_root *reloc_root; | 1982 | struct btrfs_root *reloc_root; |
| 1609 | struct btrfs_root_item *root_item; | 1983 | struct btrfs_root_item *root_item; |
| 1610 | struct btrfs_path *path; | 1984 | struct btrfs_path *path; |
| 1611 | struct extent_buffer *leaf = NULL; | 1985 | struct extent_buffer *leaf; |
| 1612 | unsigned long nr; | 1986 | unsigned long nr; |
| 1613 | int level; | 1987 | int level; |
| 1614 | int max_level; | 1988 | int max_level; |
| 1615 | int replaced = 0; | 1989 | int replaced = 0; |
| 1616 | int ret; | 1990 | int ret; |
| 1617 | int err = 0; | 1991 | int err = 0; |
| 1992 | u32 min_reserved; | ||
| 1618 | 1993 | ||
| 1619 | path = btrfs_alloc_path(); | 1994 | path = btrfs_alloc_path(); |
| 1620 | if (!path) | 1995 | if (!path) |
| @@ -1648,34 +2023,23 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
| 1648 | btrfs_unlock_up_safe(path, 0); | 2023 | btrfs_unlock_up_safe(path, 0); |
| 1649 | } | 2024 | } |
| 1650 | 2025 | ||
| 1651 | if (level == 0 && rc->stage == UPDATE_DATA_PTRS) { | 2026 | min_reserved = root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; |
| 1652 | trans = btrfs_start_transaction(root, 1); | 2027 | memset(&next_key, 0, sizeof(next_key)); |
| 1653 | 2028 | ||
| 1654 | leaf = path->nodes[0]; | 2029 | while (1) { |
| 1655 | btrfs_item_key_to_cpu(leaf, &key, 0); | 2030 | trans = btrfs_start_transaction(root, 0); |
| 1656 | btrfs_release_path(reloc_root, path); | 2031 | trans->block_rsv = rc->block_rsv; |
| 1657 | 2032 | ||
| 1658 | ret = btrfs_search_slot(trans, root, &key, path, 0, 1); | 2033 | ret = btrfs_block_rsv_check(trans, root, rc->block_rsv, |
| 1659 | if (ret < 0) { | 2034 | min_reserved, 0); |
| 1660 | err = ret; | 2035 | if (ret) { |
| 1661 | goto out; | 2036 | BUG_ON(ret != -EAGAIN); |
| 2037 | ret = btrfs_commit_transaction(trans, root); | ||
| 2038 | BUG_ON(ret); | ||
| 2039 | continue; | ||
| 1662 | } | 2040 | } |
| 1663 | 2041 | ||
| 1664 | leaf = path->nodes[0]; | ||
| 1665 | btrfs_unlock_up_safe(path, 1); | ||
| 1666 | ret = replace_file_extents(trans, rc, root, leaf, | ||
| 1667 | &inode_list); | ||
| 1668 | if (ret < 0) | ||
| 1669 | err = ret; | ||
| 1670 | goto out; | ||
| 1671 | } | ||
| 1672 | |||
| 1673 | memset(&next_key, 0, sizeof(next_key)); | ||
| 1674 | |||
| 1675 | while (1) { | ||
| 1676 | leaf = NULL; | ||
| 1677 | replaced = 0; | 2042 | replaced = 0; |
| 1678 | trans = btrfs_start_transaction(root, 1); | ||
| 1679 | max_level = level; | 2043 | max_level = level; |
| 1680 | 2044 | ||
| 1681 | ret = walk_down_reloc_tree(reloc_root, path, &level); | 2045 | ret = walk_down_reloc_tree(reloc_root, path, &level); |
| @@ -1689,14 +2053,9 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
| 1689 | if (!find_next_key(path, level, &key) && | 2053 | if (!find_next_key(path, level, &key) && |
| 1690 | btrfs_comp_cpu_keys(&next_key, &key) >= 0) { | 2054 | btrfs_comp_cpu_keys(&next_key, &key) >= 0) { |
| 1691 | ret = 0; | 2055 | ret = 0; |
| 1692 | } else if (level == 1 && rc->stage == UPDATE_DATA_PTRS) { | ||
| 1693 | ret = replace_path(trans, root, reloc_root, | ||
| 1694 | path, &next_key, &leaf, | ||
| 1695 | level, max_level); | ||
| 1696 | } else { | 2056 | } else { |
| 1697 | ret = replace_path(trans, root, reloc_root, | 2057 | ret = replace_path(trans, root, reloc_root, path, |
| 1698 | path, &next_key, NULL, | 2058 | &next_key, level, max_level); |
| 1699 | level, max_level); | ||
| 1700 | } | 2059 | } |
| 1701 | if (ret < 0) { | 2060 | if (ret < 0) { |
| 1702 | err = ret; | 2061 | err = ret; |
| @@ -1708,16 +2067,6 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
| 1708 | btrfs_node_key_to_cpu(path->nodes[level], &key, | 2067 | btrfs_node_key_to_cpu(path->nodes[level], &key, |
| 1709 | path->slots[level]); | 2068 | path->slots[level]); |
| 1710 | replaced = 1; | 2069 | replaced = 1; |
| 1711 | } else if (leaf) { | ||
| 1712 | /* | ||
| 1713 | * no block got replaced, try replacing file extents | ||
| 1714 | */ | ||
| 1715 | btrfs_item_key_to_cpu(leaf, &key, 0); | ||
| 1716 | ret = replace_file_extents(trans, rc, root, leaf, | ||
| 1717 | &inode_list); | ||
| 1718 | btrfs_tree_unlock(leaf); | ||
| 1719 | free_extent_buffer(leaf); | ||
| 1720 | BUG_ON(ret < 0); | ||
| 1721 | } | 2070 | } |
| 1722 | 2071 | ||
| 1723 | ret = walk_up_reloc_tree(reloc_root, path, &level); | 2072 | ret = walk_up_reloc_tree(reloc_root, path, &level); |
| @@ -1734,15 +2083,10 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc, | |||
| 1734 | root_item->drop_level = level; | 2083 | root_item->drop_level = level; |
| 1735 | 2084 | ||
| 1736 | nr = trans->blocks_used; | 2085 | nr = trans->blocks_used; |
| 1737 | btrfs_end_transaction(trans, root); | 2086 | btrfs_end_transaction_throttle(trans, root); |
| 1738 | 2087 | ||
| 1739 | btrfs_btree_balance_dirty(root, nr); | 2088 | btrfs_btree_balance_dirty(root, nr); |
| 1740 | 2089 | ||
| 1741 | /* | ||
| 1742 | * put inodes outside transaction, otherwise we may deadlock. | ||
| 1743 | */ | ||
| 1744 | put_inodes(&inode_list); | ||
| 1745 | |||
| 1746 | if (replaced && rc->stage == UPDATE_DATA_PTRS) | 2090 | if (replaced && rc->stage == UPDATE_DATA_PTRS) |
| 1747 | invalidate_extent_cache(root, &key, &next_key); | 2091 | invalidate_extent_cache(root, &key, &next_key); |
| 1748 | } | 2092 | } |
| @@ -1765,87 +2109,125 @@ out: | |||
| 1765 | sizeof(root_item->drop_progress)); | 2109 | sizeof(root_item->drop_progress)); |
| 1766 | root_item->drop_level = 0; | 2110 | root_item->drop_level = 0; |
| 1767 | btrfs_set_root_refs(root_item, 0); | 2111 | btrfs_set_root_refs(root_item, 0); |
| 2112 | btrfs_update_reloc_root(trans, root); | ||
| 1768 | } | 2113 | } |
| 1769 | 2114 | ||
| 1770 | nr = trans->blocks_used; | 2115 | nr = trans->blocks_used; |
| 1771 | btrfs_end_transaction(trans, root); | 2116 | btrfs_end_transaction_throttle(trans, root); |
| 1772 | 2117 | ||
| 1773 | btrfs_btree_balance_dirty(root, nr); | 2118 | btrfs_btree_balance_dirty(root, nr); |
| 1774 | 2119 | ||
| 1775 | put_inodes(&inode_list); | ||
| 1776 | |||
| 1777 | if (replaced && rc->stage == UPDATE_DATA_PTRS) | 2120 | if (replaced && rc->stage == UPDATE_DATA_PTRS) |
| 1778 | invalidate_extent_cache(root, &key, &next_key); | 2121 | invalidate_extent_cache(root, &key, &next_key); |
| 1779 | 2122 | ||
| 1780 | return err; | 2123 | return err; |
| 1781 | } | 2124 | } |
| 1782 | 2125 | ||
| 1783 | /* | 2126 | static noinline_for_stack |
| 1784 | * callback for the work threads. | 2127 | int prepare_to_merge(struct reloc_control *rc, int err) |
| 1785 | * this function merges reloc tree with corresponding fs tree, | ||
| 1786 | * and then drops the reloc tree. | ||
| 1787 | */ | ||
| 1788 | static void merge_func(struct btrfs_work *work) | ||
| 1789 | { | 2128 | { |
| 1790 | struct btrfs_trans_handle *trans; | 2129 | struct btrfs_root *root = rc->extent_root; |
| 1791 | struct btrfs_root *root; | ||
| 1792 | struct btrfs_root *reloc_root; | 2130 | struct btrfs_root *reloc_root; |
| 1793 | struct async_merge *async; | 2131 | struct btrfs_trans_handle *trans; |
| 2132 | LIST_HEAD(reloc_roots); | ||
| 2133 | u64 num_bytes = 0; | ||
| 2134 | int ret; | ||
| 2135 | int retries = 0; | ||
| 2136 | |||
| 2137 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 2138 | rc->merging_rsv_size += root->nodesize * (BTRFS_MAX_LEVEL - 1) * 2; | ||
| 2139 | rc->merging_rsv_size += rc->nodes_relocated * 2; | ||
| 2140 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 2141 | again: | ||
| 2142 | if (!err) { | ||
| 2143 | num_bytes = rc->merging_rsv_size; | ||
| 2144 | ret = btrfs_block_rsv_add(NULL, root, rc->block_rsv, | ||
| 2145 | num_bytes, &retries); | ||
| 2146 | if (ret) | ||
| 2147 | err = ret; | ||
| 2148 | } | ||
| 2149 | |||
| 2150 | trans = btrfs_join_transaction(rc->extent_root, 1); | ||
| 2151 | |||
| 2152 | if (!err) { | ||
| 2153 | if (num_bytes != rc->merging_rsv_size) { | ||
| 2154 | btrfs_end_transaction(trans, rc->extent_root); | ||
| 2155 | btrfs_block_rsv_release(rc->extent_root, | ||
| 2156 | rc->block_rsv, num_bytes); | ||
| 2157 | retries = 0; | ||
| 2158 | goto again; | ||
| 2159 | } | ||
| 2160 | } | ||
| 1794 | 2161 | ||
| 1795 | async = container_of(work, struct async_merge, work); | 2162 | rc->merge_reloc_tree = 1; |
| 1796 | reloc_root = async->root; | 2163 | |
| 2164 | while (!list_empty(&rc->reloc_roots)) { | ||
| 2165 | reloc_root = list_entry(rc->reloc_roots.next, | ||
| 2166 | struct btrfs_root, root_list); | ||
| 2167 | list_del_init(&reloc_root->root_list); | ||
| 1797 | 2168 | ||
| 1798 | if (btrfs_root_refs(&reloc_root->root_item) > 0) { | ||
| 1799 | root = read_fs_root(reloc_root->fs_info, | 2169 | root = read_fs_root(reloc_root->fs_info, |
| 1800 | reloc_root->root_key.offset); | 2170 | reloc_root->root_key.offset); |
| 1801 | BUG_ON(IS_ERR(root)); | 2171 | BUG_ON(IS_ERR(root)); |
| 1802 | BUG_ON(root->reloc_root != reloc_root); | 2172 | BUG_ON(root->reloc_root != reloc_root); |
| 1803 | 2173 | ||
| 1804 | merge_reloc_root(async->rc, root); | 2174 | /* |
| 1805 | 2175 | * set reference count to 1, so btrfs_recover_relocation | |
| 1806 | trans = btrfs_start_transaction(root, 1); | 2176 | * knows it should resumes merging |
| 2177 | */ | ||
| 2178 | if (!err) | ||
| 2179 | btrfs_set_root_refs(&reloc_root->root_item, 1); | ||
| 1807 | btrfs_update_reloc_root(trans, root); | 2180 | btrfs_update_reloc_root(trans, root); |
| 1808 | btrfs_end_transaction(trans, root); | ||
| 1809 | } | ||
| 1810 | 2181 | ||
| 1811 | btrfs_drop_snapshot(reloc_root, 0); | 2182 | list_add(&reloc_root->root_list, &reloc_roots); |
| 2183 | } | ||
| 1812 | 2184 | ||
| 1813 | if (atomic_dec_and_test(async->num_pending)) | 2185 | list_splice(&reloc_roots, &rc->reloc_roots); |
| 1814 | complete(async->done); | ||
| 1815 | 2186 | ||
| 1816 | kfree(async); | 2187 | if (!err) |
| 2188 | btrfs_commit_transaction(trans, rc->extent_root); | ||
| 2189 | else | ||
| 2190 | btrfs_end_transaction(trans, rc->extent_root); | ||
| 2191 | return err; | ||
| 1817 | } | 2192 | } |
| 1818 | 2193 | ||
| 1819 | static int merge_reloc_roots(struct reloc_control *rc) | 2194 | static noinline_for_stack |
| 2195 | int merge_reloc_roots(struct reloc_control *rc) | ||
| 1820 | { | 2196 | { |
| 1821 | struct async_merge *async; | ||
| 1822 | struct btrfs_root *root; | 2197 | struct btrfs_root *root; |
| 1823 | struct completion done; | 2198 | struct btrfs_root *reloc_root; |
| 1824 | atomic_t num_pending; | 2199 | LIST_HEAD(reloc_roots); |
| 2200 | int found = 0; | ||
| 2201 | int ret; | ||
| 2202 | again: | ||
| 2203 | root = rc->extent_root; | ||
| 2204 | mutex_lock(&root->fs_info->trans_mutex); | ||
| 2205 | list_splice_init(&rc->reloc_roots, &reloc_roots); | ||
| 2206 | mutex_unlock(&root->fs_info->trans_mutex); | ||
| 1825 | 2207 | ||
| 1826 | init_completion(&done); | 2208 | while (!list_empty(&reloc_roots)) { |
| 1827 | atomic_set(&num_pending, 1); | 2209 | found = 1; |
| 2210 | reloc_root = list_entry(reloc_roots.next, | ||
| 2211 | struct btrfs_root, root_list); | ||
| 1828 | 2212 | ||
| 1829 | while (!list_empty(&rc->reloc_roots)) { | 2213 | if (btrfs_root_refs(&reloc_root->root_item) > 0) { |
| 1830 | root = list_entry(rc->reloc_roots.next, | 2214 | root = read_fs_root(reloc_root->fs_info, |
| 1831 | struct btrfs_root, root_list); | 2215 | reloc_root->root_key.offset); |
| 1832 | list_del_init(&root->root_list); | 2216 | BUG_ON(IS_ERR(root)); |
| 2217 | BUG_ON(root->reloc_root != reloc_root); | ||
| 1833 | 2218 | ||
| 1834 | async = kmalloc(sizeof(*async), GFP_NOFS); | 2219 | ret = merge_reloc_root(rc, root); |
| 1835 | BUG_ON(!async); | 2220 | BUG_ON(ret); |
| 1836 | async->work.func = merge_func; | 2221 | } else { |
| 1837 | async->work.flags = 0; | 2222 | list_del_init(&reloc_root->root_list); |
| 1838 | async->rc = rc; | 2223 | } |
| 1839 | async->root = root; | 2224 | btrfs_drop_snapshot(reloc_root, rc->block_rsv, 0); |
| 1840 | async->done = &done; | ||
| 1841 | async->num_pending = &num_pending; | ||
| 1842 | atomic_inc(&num_pending); | ||
| 1843 | btrfs_queue_worker(&rc->workers, &async->work); | ||
| 1844 | } | 2225 | } |
| 1845 | 2226 | ||
| 1846 | if (!atomic_dec_and_test(&num_pending)) | 2227 | if (found) { |
| 1847 | wait_for_completion(&done); | 2228 | found = 0; |
| 1848 | 2229 | goto again; | |
| 2230 | } | ||
| 1849 | BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); | 2231 | BUG_ON(!RB_EMPTY_ROOT(&rc->reloc_root_tree.rb_root)); |
| 1850 | return 0; | 2232 | return 0; |
| 1851 | } | 2233 | } |
| @@ -1876,119 +2258,169 @@ static int record_reloc_root_in_trans(struct btrfs_trans_handle *trans, | |||
| 1876 | return btrfs_record_root_in_trans(trans, root); | 2258 | return btrfs_record_root_in_trans(trans, root); |
| 1877 | } | 2259 | } |
| 1878 | 2260 | ||
| 1879 | /* | 2261 | static noinline_for_stack |
| 1880 | * select one tree from trees that references the block. | 2262 | struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, |
| 1881 | * for blocks in refernce counted trees, we preper reloc tree. | 2263 | struct reloc_control *rc, |
| 1882 | * if no reloc tree found and reloc_only is true, NULL is returned. | 2264 | struct backref_node *node, |
| 1883 | */ | 2265 | struct backref_edge *edges[], int *nr) |
| 1884 | static struct btrfs_root *__select_one_root(struct btrfs_trans_handle *trans, | ||
| 1885 | struct backref_node *node, | ||
| 1886 | struct backref_edge *edges[], | ||
| 1887 | int *nr, int reloc_only) | ||
| 1888 | { | 2266 | { |
| 1889 | struct backref_node *next; | 2267 | struct backref_node *next; |
| 1890 | struct btrfs_root *root; | 2268 | struct btrfs_root *root; |
| 1891 | int index; | 2269 | int index = 0; |
| 1892 | int loop = 0; | 2270 | |
| 1893 | again: | ||
| 1894 | index = 0; | ||
| 1895 | next = node; | 2271 | next = node; |
| 1896 | while (1) { | 2272 | while (1) { |
| 1897 | cond_resched(); | 2273 | cond_resched(); |
| 1898 | next = walk_up_backref(next, edges, &index); | 2274 | next = walk_up_backref(next, edges, &index); |
| 1899 | root = next->root; | 2275 | root = next->root; |
| 1900 | if (!root) { | 2276 | BUG_ON(!root); |
| 1901 | BUG_ON(!node->old_root); | 2277 | BUG_ON(!root->ref_cows); |
| 1902 | goto skip; | ||
| 1903 | } | ||
| 1904 | |||
| 1905 | /* no other choice for non-refernce counted tree */ | ||
| 1906 | if (!root->ref_cows) { | ||
| 1907 | BUG_ON(reloc_only); | ||
| 1908 | break; | ||
| 1909 | } | ||
| 1910 | 2278 | ||
| 1911 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { | 2279 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { |
| 1912 | record_reloc_root_in_trans(trans, root); | 2280 | record_reloc_root_in_trans(trans, root); |
| 1913 | break; | 2281 | break; |
| 1914 | } | 2282 | } |
| 1915 | 2283 | ||
| 1916 | if (loop) { | 2284 | btrfs_record_root_in_trans(trans, root); |
| 1917 | btrfs_record_root_in_trans(trans, root); | 2285 | root = root->reloc_root; |
| 2286 | |||
| 2287 | if (next->new_bytenr != root->node->start) { | ||
| 2288 | BUG_ON(next->new_bytenr); | ||
| 2289 | BUG_ON(!list_empty(&next->list)); | ||
| 2290 | next->new_bytenr = root->node->start; | ||
| 2291 | next->root = root; | ||
| 2292 | list_add_tail(&next->list, | ||
| 2293 | &rc->backref_cache.changed); | ||
| 2294 | __mark_block_processed(rc, next); | ||
| 1918 | break; | 2295 | break; |
| 1919 | } | 2296 | } |
| 1920 | 2297 | ||
| 1921 | if (reloc_only || next != node) { | 2298 | WARN_ON(1); |
| 1922 | if (!root->reloc_root) | ||
| 1923 | btrfs_record_root_in_trans(trans, root); | ||
| 1924 | root = root->reloc_root; | ||
| 1925 | /* | ||
| 1926 | * if the reloc tree was created in current | ||
| 1927 | * transation, there is no node in backref tree | ||
| 1928 | * corresponds to the root of the reloc tree. | ||
| 1929 | */ | ||
| 1930 | if (btrfs_root_last_snapshot(&root->root_item) == | ||
| 1931 | trans->transid - 1) | ||
| 1932 | break; | ||
| 1933 | } | ||
| 1934 | skip: | ||
| 1935 | root = NULL; | 2299 | root = NULL; |
| 1936 | next = walk_down_backref(edges, &index); | 2300 | next = walk_down_backref(edges, &index); |
| 1937 | if (!next || next->level <= node->level) | 2301 | if (!next || next->level <= node->level) |
| 1938 | break; | 2302 | break; |
| 1939 | } | 2303 | } |
| 2304 | if (!root) | ||
| 2305 | return NULL; | ||
| 1940 | 2306 | ||
| 1941 | if (!root && !loop && !reloc_only) { | 2307 | *nr = index; |
| 1942 | loop = 1; | 2308 | next = node; |
| 1943 | goto again; | 2309 | /* setup backref node path for btrfs_reloc_cow_block */ |
| 2310 | while (1) { | ||
| 2311 | rc->backref_cache.path[next->level] = next; | ||
| 2312 | if (--index < 0) | ||
| 2313 | break; | ||
| 2314 | next = edges[index]->node[UPPER]; | ||
| 1944 | } | 2315 | } |
| 1945 | |||
| 1946 | if (root) | ||
| 1947 | *nr = index; | ||
| 1948 | else | ||
| 1949 | *nr = 0; | ||
| 1950 | |||
| 1951 | return root; | 2316 | return root; |
| 1952 | } | 2317 | } |
| 1953 | 2318 | ||
| 2319 | /* | ||
| 2320 | * select a tree root for relocation. return NULL if the block | ||
| 2321 | * is reference counted. we should use do_relocation() in this | ||
| 2322 | * case. return a tree root pointer if the block isn't reference | ||
| 2323 | * counted. return -ENOENT if the block is root of reloc tree. | ||
| 2324 | */ | ||
| 1954 | static noinline_for_stack | 2325 | static noinline_for_stack |
| 1955 | struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, | 2326 | struct btrfs_root *select_one_root(struct btrfs_trans_handle *trans, |
| 1956 | struct backref_node *node) | 2327 | struct backref_node *node) |
| 1957 | { | 2328 | { |
| 2329 | struct backref_node *next; | ||
| 2330 | struct btrfs_root *root; | ||
| 2331 | struct btrfs_root *fs_root = NULL; | ||
| 1958 | struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; | 2332 | struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; |
| 1959 | int nr; | 2333 | int index = 0; |
| 1960 | return __select_one_root(trans, node, edges, &nr, 0); | 2334 | |
| 2335 | next = node; | ||
| 2336 | while (1) { | ||
| 2337 | cond_resched(); | ||
| 2338 | next = walk_up_backref(next, edges, &index); | ||
| 2339 | root = next->root; | ||
| 2340 | BUG_ON(!root); | ||
| 2341 | |||
| 2342 | /* no other choice for non-refernce counted tree */ | ||
| 2343 | if (!root->ref_cows) | ||
| 2344 | return root; | ||
| 2345 | |||
| 2346 | if (root->root_key.objectid != BTRFS_TREE_RELOC_OBJECTID) | ||
| 2347 | fs_root = root; | ||
| 2348 | |||
| 2349 | if (next != node) | ||
| 2350 | return NULL; | ||
| 2351 | |||
| 2352 | next = walk_down_backref(edges, &index); | ||
| 2353 | if (!next || next->level <= node->level) | ||
| 2354 | break; | ||
| 2355 | } | ||
| 2356 | |||
| 2357 | if (!fs_root) | ||
| 2358 | return ERR_PTR(-ENOENT); | ||
| 2359 | return fs_root; | ||
| 1961 | } | 2360 | } |
| 1962 | 2361 | ||
| 1963 | static noinline_for_stack | 2362 | static noinline_for_stack |
| 1964 | struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans, | 2363 | u64 calcu_metadata_size(struct reloc_control *rc, |
| 1965 | struct backref_node *node, | 2364 | struct backref_node *node, int reserve) |
| 1966 | struct backref_edge *edges[], int *nr) | ||
| 1967 | { | 2365 | { |
| 1968 | return __select_one_root(trans, node, edges, nr, 1); | 2366 | struct backref_node *next = node; |
| 2367 | struct backref_edge *edge; | ||
| 2368 | struct backref_edge *edges[BTRFS_MAX_LEVEL - 1]; | ||
| 2369 | u64 num_bytes = 0; | ||
| 2370 | int index = 0; | ||
| 2371 | |||
| 2372 | BUG_ON(reserve && node->processed); | ||
| 2373 | |||
| 2374 | while (next) { | ||
| 2375 | cond_resched(); | ||
| 2376 | while (1) { | ||
| 2377 | if (next->processed && (reserve || next != node)) | ||
| 2378 | break; | ||
| 2379 | |||
| 2380 | num_bytes += btrfs_level_size(rc->extent_root, | ||
| 2381 | next->level); | ||
| 2382 | |||
| 2383 | if (list_empty(&next->upper)) | ||
| 2384 | break; | ||
| 2385 | |||
| 2386 | edge = list_entry(next->upper.next, | ||
| 2387 | struct backref_edge, list[LOWER]); | ||
| 2388 | edges[index++] = edge; | ||
| 2389 | next = edge->node[UPPER]; | ||
| 2390 | } | ||
| 2391 | next = walk_down_backref(edges, &index); | ||
| 2392 | } | ||
| 2393 | return num_bytes; | ||
| 1969 | } | 2394 | } |
| 1970 | 2395 | ||
| 1971 | static void grab_path_buffers(struct btrfs_path *path, | 2396 | static int reserve_metadata_space(struct btrfs_trans_handle *trans, |
| 1972 | struct backref_node *node, | 2397 | struct reloc_control *rc, |
| 1973 | struct backref_edge *edges[], int nr) | 2398 | struct backref_node *node) |
| 1974 | { | 2399 | { |
| 1975 | int i = 0; | 2400 | struct btrfs_root *root = rc->extent_root; |
| 1976 | while (1) { | 2401 | u64 num_bytes; |
| 1977 | drop_node_buffer(node); | 2402 | int ret; |
| 1978 | node->eb = path->nodes[node->level]; | 2403 | |
| 1979 | BUG_ON(!node->eb); | 2404 | num_bytes = calcu_metadata_size(rc, node, 1) * 2; |
| 1980 | if (path->locks[node->level]) | ||
| 1981 | node->locked = 1; | ||
| 1982 | path->nodes[node->level] = NULL; | ||
| 1983 | path->locks[node->level] = 0; | ||
| 1984 | |||
| 1985 | if (i >= nr) | ||
| 1986 | break; | ||
| 1987 | 2405 | ||
| 1988 | edges[i]->blockptr = node->eb->start; | 2406 | trans->block_rsv = rc->block_rsv; |
| 1989 | node = edges[i]->node[UPPER]; | 2407 | ret = btrfs_block_rsv_add(trans, root, rc->block_rsv, num_bytes, |
| 1990 | i++; | 2408 | &rc->block_rsv_retries); |
| 2409 | if (ret) { | ||
| 2410 | if (ret == -EAGAIN) | ||
| 2411 | rc->commit_transaction = 1; | ||
| 2412 | return ret; | ||
| 1991 | } | 2413 | } |
| 2414 | |||
| 2415 | rc->block_rsv_retries = 0; | ||
| 2416 | return 0; | ||
| 2417 | } | ||
| 2418 | |||
| 2419 | static void release_metadata_space(struct reloc_control *rc, | ||
| 2420 | struct backref_node *node) | ||
| 2421 | { | ||
| 2422 | u64 num_bytes = calcu_metadata_size(rc, node, 0) * 2; | ||
| 2423 | btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, num_bytes); | ||
| 1992 | } | 2424 | } |
| 1993 | 2425 | ||
| 1994 | /* | 2426 | /* |
| @@ -1999,6 +2431,7 @@ static void grab_path_buffers(struct btrfs_path *path, | |||
| 1999 | * in that case this function just updates pointers. | 2431 | * in that case this function just updates pointers. |
| 2000 | */ | 2432 | */ |
| 2001 | static int do_relocation(struct btrfs_trans_handle *trans, | 2433 | static int do_relocation(struct btrfs_trans_handle *trans, |
| 2434 | struct reloc_control *rc, | ||
| 2002 | struct backref_node *node, | 2435 | struct backref_node *node, |
| 2003 | struct btrfs_key *key, | 2436 | struct btrfs_key *key, |
| 2004 | struct btrfs_path *path, int lowest) | 2437 | struct btrfs_path *path, int lowest) |
| @@ -2019,18 +2452,25 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
| 2019 | BUG_ON(lowest && node->eb); | 2452 | BUG_ON(lowest && node->eb); |
| 2020 | 2453 | ||
| 2021 | path->lowest_level = node->level + 1; | 2454 | path->lowest_level = node->level + 1; |
| 2455 | rc->backref_cache.path[node->level] = node; | ||
| 2022 | list_for_each_entry(edge, &node->upper, list[LOWER]) { | 2456 | list_for_each_entry(edge, &node->upper, list[LOWER]) { |
| 2023 | cond_resched(); | 2457 | cond_resched(); |
| 2024 | if (node->eb && node->eb->start == edge->blockptr) | ||
| 2025 | continue; | ||
| 2026 | 2458 | ||
| 2027 | upper = edge->node[UPPER]; | 2459 | upper = edge->node[UPPER]; |
| 2028 | root = select_reloc_root(trans, upper, edges, &nr); | 2460 | root = select_reloc_root(trans, rc, upper, edges, &nr); |
| 2029 | if (!root) | 2461 | BUG_ON(!root); |
| 2030 | continue; | 2462 | |
| 2031 | 2463 | if (upper->eb && !upper->locked) { | |
| 2032 | if (upper->eb && !upper->locked) | 2464 | if (!lowest) { |
| 2465 | ret = btrfs_bin_search(upper->eb, key, | ||
| 2466 | upper->level, &slot); | ||
| 2467 | BUG_ON(ret); | ||
| 2468 | bytenr = btrfs_node_blockptr(upper->eb, slot); | ||
| 2469 | if (node->eb->start == bytenr) | ||
| 2470 | goto next; | ||
| 2471 | } | ||
| 2033 | drop_node_buffer(upper); | 2472 | drop_node_buffer(upper); |
| 2473 | } | ||
| 2034 | 2474 | ||
| 2035 | if (!upper->eb) { | 2475 | if (!upper->eb) { |
| 2036 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); | 2476 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); |
| @@ -2040,11 +2480,17 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
| 2040 | } | 2480 | } |
| 2041 | BUG_ON(ret > 0); | 2481 | BUG_ON(ret > 0); |
| 2042 | 2482 | ||
| 2043 | slot = path->slots[upper->level]; | 2483 | if (!upper->eb) { |
| 2484 | upper->eb = path->nodes[upper->level]; | ||
| 2485 | path->nodes[upper->level] = NULL; | ||
| 2486 | } else { | ||
| 2487 | BUG_ON(upper->eb != path->nodes[upper->level]); | ||
| 2488 | } | ||
| 2044 | 2489 | ||
| 2045 | btrfs_unlock_up_safe(path, upper->level + 1); | 2490 | upper->locked = 1; |
| 2046 | grab_path_buffers(path, upper, edges, nr); | 2491 | path->locks[upper->level] = 0; |
| 2047 | 2492 | ||
| 2493 | slot = path->slots[upper->level]; | ||
| 2048 | btrfs_release_path(NULL, path); | 2494 | btrfs_release_path(NULL, path); |
| 2049 | } else { | 2495 | } else { |
| 2050 | ret = btrfs_bin_search(upper->eb, key, upper->level, | 2496 | ret = btrfs_bin_search(upper->eb, key, upper->level, |
| @@ -2053,14 +2499,11 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
| 2053 | } | 2499 | } |
| 2054 | 2500 | ||
| 2055 | bytenr = btrfs_node_blockptr(upper->eb, slot); | 2501 | bytenr = btrfs_node_blockptr(upper->eb, slot); |
| 2056 | if (!lowest) { | 2502 | if (lowest) { |
| 2057 | if (node->eb->start == bytenr) { | 2503 | BUG_ON(bytenr != node->bytenr); |
| 2058 | btrfs_tree_unlock(upper->eb); | ||
| 2059 | upper->locked = 0; | ||
| 2060 | continue; | ||
| 2061 | } | ||
| 2062 | } else { | 2504 | } else { |
| 2063 | BUG_ON(node->bytenr != bytenr); | 2505 | if (node->eb->start == bytenr) |
| 2506 | goto next; | ||
| 2064 | } | 2507 | } |
| 2065 | 2508 | ||
| 2066 | blocksize = btrfs_level_size(root, node->level); | 2509 | blocksize = btrfs_level_size(root, node->level); |
| @@ -2072,13 +2515,13 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
| 2072 | if (!node->eb) { | 2515 | if (!node->eb) { |
| 2073 | ret = btrfs_cow_block(trans, root, eb, upper->eb, | 2516 | ret = btrfs_cow_block(trans, root, eb, upper->eb, |
| 2074 | slot, &eb); | 2517 | slot, &eb); |
| 2518 | btrfs_tree_unlock(eb); | ||
| 2519 | free_extent_buffer(eb); | ||
| 2075 | if (ret < 0) { | 2520 | if (ret < 0) { |
| 2076 | err = ret; | 2521 | err = ret; |
| 2077 | break; | 2522 | goto next; |
| 2078 | } | 2523 | } |
| 2079 | btrfs_set_lock_blocking(eb); | 2524 | BUG_ON(node->eb != eb); |
| 2080 | node->eb = eb; | ||
| 2081 | node->locked = 1; | ||
| 2082 | } else { | 2525 | } else { |
| 2083 | btrfs_set_node_blockptr(upper->eb, slot, | 2526 | btrfs_set_node_blockptr(upper->eb, slot, |
| 2084 | node->eb->start); | 2527 | node->eb->start); |
| @@ -2096,67 +2539,80 @@ static int do_relocation(struct btrfs_trans_handle *trans, | |||
| 2096 | ret = btrfs_drop_subtree(trans, root, eb, upper->eb); | 2539 | ret = btrfs_drop_subtree(trans, root, eb, upper->eb); |
| 2097 | BUG_ON(ret); | 2540 | BUG_ON(ret); |
| 2098 | } | 2541 | } |
| 2099 | if (!lowest) { | 2542 | next: |
| 2100 | btrfs_tree_unlock(upper->eb); | 2543 | if (!upper->pending) |
| 2101 | upper->locked = 0; | 2544 | drop_node_buffer(upper); |
| 2102 | } | 2545 | else |
| 2546 | unlock_node_buffer(upper); | ||
| 2547 | if (err) | ||
| 2548 | break; | ||
| 2103 | } | 2549 | } |
| 2550 | |||
| 2551 | if (!err && node->pending) { | ||
| 2552 | drop_node_buffer(node); | ||
| 2553 | list_move_tail(&node->list, &rc->backref_cache.changed); | ||
| 2554 | node->pending = 0; | ||
| 2555 | } | ||
| 2556 | |||
| 2104 | path->lowest_level = 0; | 2557 | path->lowest_level = 0; |
| 2558 | BUG_ON(err == -ENOSPC); | ||
| 2105 | return err; | 2559 | return err; |
| 2106 | } | 2560 | } |
| 2107 | 2561 | ||
| 2108 | static int link_to_upper(struct btrfs_trans_handle *trans, | 2562 | static int link_to_upper(struct btrfs_trans_handle *trans, |
| 2563 | struct reloc_control *rc, | ||
| 2109 | struct backref_node *node, | 2564 | struct backref_node *node, |
| 2110 | struct btrfs_path *path) | 2565 | struct btrfs_path *path) |
| 2111 | { | 2566 | { |
| 2112 | struct btrfs_key key; | 2567 | struct btrfs_key key; |
| 2113 | if (!node->eb || list_empty(&node->upper)) | ||
| 2114 | return 0; | ||
| 2115 | 2568 | ||
| 2116 | btrfs_node_key_to_cpu(node->eb, &key, 0); | 2569 | btrfs_node_key_to_cpu(node->eb, &key, 0); |
| 2117 | return do_relocation(trans, node, &key, path, 0); | 2570 | return do_relocation(trans, rc, node, &key, path, 0); |
| 2118 | } | 2571 | } |
| 2119 | 2572 | ||
| 2120 | static int finish_pending_nodes(struct btrfs_trans_handle *trans, | 2573 | static int finish_pending_nodes(struct btrfs_trans_handle *trans, |
| 2121 | struct backref_cache *cache, | 2574 | struct reloc_control *rc, |
| 2122 | struct btrfs_path *path) | 2575 | struct btrfs_path *path, int err) |
| 2123 | { | 2576 | { |
| 2577 | LIST_HEAD(list); | ||
| 2578 | struct backref_cache *cache = &rc->backref_cache; | ||
| 2124 | struct backref_node *node; | 2579 | struct backref_node *node; |
| 2125 | int level; | 2580 | int level; |
| 2126 | int ret; | 2581 | int ret; |
| 2127 | int err = 0; | ||
| 2128 | 2582 | ||
| 2129 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { | 2583 | for (level = 0; level < BTRFS_MAX_LEVEL; level++) { |
| 2130 | while (!list_empty(&cache->pending[level])) { | 2584 | while (!list_empty(&cache->pending[level])) { |
| 2131 | node = list_entry(cache->pending[level].next, | 2585 | node = list_entry(cache->pending[level].next, |
| 2132 | struct backref_node, lower); | 2586 | struct backref_node, list); |
| 2133 | BUG_ON(node->level != level); | 2587 | list_move_tail(&node->list, &list); |
| 2588 | BUG_ON(!node->pending); | ||
| 2134 | 2589 | ||
| 2135 | ret = link_to_upper(trans, node, path); | 2590 | if (!err) { |
| 2136 | if (ret < 0) | 2591 | ret = link_to_upper(trans, rc, node, path); |
| 2137 | err = ret; | 2592 | if (ret < 0) |
| 2138 | /* | 2593 | err = ret; |
| 2139 | * this remove the node from the pending list and | 2594 | } |
| 2140 | * may add some other nodes to the level + 1 | ||
| 2141 | * pending list | ||
| 2142 | */ | ||
| 2143 | remove_backref_node(cache, node); | ||
| 2144 | } | 2595 | } |
| 2596 | list_splice_init(&list, &cache->pending[level]); | ||
| 2145 | } | 2597 | } |
| 2146 | BUG_ON(!RB_EMPTY_ROOT(&cache->rb_root)); | ||
| 2147 | return err; | 2598 | return err; |
| 2148 | } | 2599 | } |
| 2149 | 2600 | ||
| 2150 | static void mark_block_processed(struct reloc_control *rc, | 2601 | static void mark_block_processed(struct reloc_control *rc, |
| 2151 | struct backref_node *node) | 2602 | u64 bytenr, u32 blocksize) |
| 2603 | { | ||
| 2604 | set_extent_bits(&rc->processed_blocks, bytenr, bytenr + blocksize - 1, | ||
| 2605 | EXTENT_DIRTY, GFP_NOFS); | ||
| 2606 | } | ||
| 2607 | |||
| 2608 | static void __mark_block_processed(struct reloc_control *rc, | ||
| 2609 | struct backref_node *node) | ||
| 2152 | { | 2610 | { |
| 2153 | u32 blocksize; | 2611 | u32 blocksize; |
| 2154 | if (node->level == 0 || | 2612 | if (node->level == 0 || |
| 2155 | in_block_group(node->bytenr, rc->block_group)) { | 2613 | in_block_group(node->bytenr, rc->block_group)) { |
| 2156 | blocksize = btrfs_level_size(rc->extent_root, node->level); | 2614 | blocksize = btrfs_level_size(rc->extent_root, node->level); |
| 2157 | set_extent_bits(&rc->processed_blocks, node->bytenr, | 2615 | mark_block_processed(rc, node->bytenr, blocksize); |
| 2158 | node->bytenr + blocksize - 1, EXTENT_DIRTY, | ||
| 2159 | GFP_NOFS); | ||
| 2160 | } | 2616 | } |
| 2161 | node->processed = 1; | 2617 | node->processed = 1; |
| 2162 | } | 2618 | } |
| @@ -2179,7 +2635,7 @@ static void update_processed_blocks(struct reloc_control *rc, | |||
| 2179 | if (next->processed) | 2635 | if (next->processed) |
| 2180 | break; | 2636 | break; |
| 2181 | 2637 | ||
| 2182 | mark_block_processed(rc, next); | 2638 | __mark_block_processed(rc, next); |
| 2183 | 2639 | ||
| 2184 | if (list_empty(&next->upper)) | 2640 | if (list_empty(&next->upper)) |
| 2185 | break; | 2641 | break; |
| @@ -2202,138 +2658,6 @@ static int tree_block_processed(u64 bytenr, u32 blocksize, | |||
| 2202 | return 0; | 2658 | return 0; |
| 2203 | } | 2659 | } |
| 2204 | 2660 | ||
| 2205 | /* | ||
| 2206 | * check if there are any file extent pointers in the leaf point to | ||
| 2207 | * data require processing | ||
| 2208 | */ | ||
| 2209 | static int check_file_extents(struct reloc_control *rc, | ||
| 2210 | u64 bytenr, u32 blocksize, u64 ptr_gen) | ||
| 2211 | { | ||
| 2212 | struct btrfs_key found_key; | ||
| 2213 | struct btrfs_file_extent_item *fi; | ||
| 2214 | struct extent_buffer *leaf; | ||
| 2215 | u32 nritems; | ||
| 2216 | int i; | ||
| 2217 | int ret = 0; | ||
| 2218 | |||
| 2219 | leaf = read_tree_block(rc->extent_root, bytenr, blocksize, ptr_gen); | ||
| 2220 | |||
| 2221 | nritems = btrfs_header_nritems(leaf); | ||
| 2222 | for (i = 0; i < nritems; i++) { | ||
| 2223 | cond_resched(); | ||
| 2224 | btrfs_item_key_to_cpu(leaf, &found_key, i); | ||
| 2225 | if (found_key.type != BTRFS_EXTENT_DATA_KEY) | ||
| 2226 | continue; | ||
| 2227 | fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item); | ||
| 2228 | if (btrfs_file_extent_type(leaf, fi) == | ||
| 2229 | BTRFS_FILE_EXTENT_INLINE) | ||
| 2230 | continue; | ||
| 2231 | bytenr = btrfs_file_extent_disk_bytenr(leaf, fi); | ||
| 2232 | if (bytenr == 0) | ||
| 2233 | continue; | ||
| 2234 | if (in_block_group(bytenr, rc->block_group)) { | ||
| 2235 | ret = 1; | ||
| 2236 | break; | ||
| 2237 | } | ||
| 2238 | } | ||
| 2239 | free_extent_buffer(leaf); | ||
| 2240 | return ret; | ||
| 2241 | } | ||
| 2242 | |||
| 2243 | /* | ||
| 2244 | * scan child blocks of a given block to find blocks require processing | ||
| 2245 | */ | ||
| 2246 | static int add_child_blocks(struct btrfs_trans_handle *trans, | ||
| 2247 | struct reloc_control *rc, | ||
| 2248 | struct backref_node *node, | ||
| 2249 | struct rb_root *blocks) | ||
| 2250 | { | ||
| 2251 | struct tree_block *block; | ||
| 2252 | struct rb_node *rb_node; | ||
| 2253 | u64 bytenr; | ||
| 2254 | u64 ptr_gen; | ||
| 2255 | u32 blocksize; | ||
| 2256 | u32 nritems; | ||
| 2257 | int i; | ||
| 2258 | int err = 0; | ||
| 2259 | |||
| 2260 | nritems = btrfs_header_nritems(node->eb); | ||
| 2261 | blocksize = btrfs_level_size(rc->extent_root, node->level - 1); | ||
| 2262 | for (i = 0; i < nritems; i++) { | ||
| 2263 | cond_resched(); | ||
| 2264 | bytenr = btrfs_node_blockptr(node->eb, i); | ||
| 2265 | ptr_gen = btrfs_node_ptr_generation(node->eb, i); | ||
| 2266 | if (ptr_gen == trans->transid) | ||
| 2267 | continue; | ||
| 2268 | if (!in_block_group(bytenr, rc->block_group) && | ||
| 2269 | (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS)) | ||
| 2270 | continue; | ||
| 2271 | if (tree_block_processed(bytenr, blocksize, rc)) | ||
| 2272 | continue; | ||
| 2273 | |||
| 2274 | readahead_tree_block(rc->extent_root, | ||
| 2275 | bytenr, blocksize, ptr_gen); | ||
| 2276 | } | ||
| 2277 | |||
| 2278 | for (i = 0; i < nritems; i++) { | ||
| 2279 | cond_resched(); | ||
| 2280 | bytenr = btrfs_node_blockptr(node->eb, i); | ||
| 2281 | ptr_gen = btrfs_node_ptr_generation(node->eb, i); | ||
| 2282 | if (ptr_gen == trans->transid) | ||
| 2283 | continue; | ||
| 2284 | if (!in_block_group(bytenr, rc->block_group) && | ||
| 2285 | (node->level > 1 || rc->stage == MOVE_DATA_EXTENTS)) | ||
| 2286 | continue; | ||
| 2287 | if (tree_block_processed(bytenr, blocksize, rc)) | ||
| 2288 | continue; | ||
| 2289 | if (!in_block_group(bytenr, rc->block_group) && | ||
| 2290 | !check_file_extents(rc, bytenr, blocksize, ptr_gen)) | ||
| 2291 | continue; | ||
| 2292 | |||
| 2293 | block = kmalloc(sizeof(*block), GFP_NOFS); | ||
| 2294 | if (!block) { | ||
| 2295 | err = -ENOMEM; | ||
| 2296 | break; | ||
| 2297 | } | ||
| 2298 | block->bytenr = bytenr; | ||
| 2299 | btrfs_node_key_to_cpu(node->eb, &block->key, i); | ||
| 2300 | block->level = node->level - 1; | ||
| 2301 | block->key_ready = 1; | ||
| 2302 | rb_node = tree_insert(blocks, block->bytenr, &block->rb_node); | ||
| 2303 | BUG_ON(rb_node); | ||
| 2304 | } | ||
| 2305 | if (err) | ||
| 2306 | free_block_list(blocks); | ||
| 2307 | return err; | ||
| 2308 | } | ||
| 2309 | |||
| 2310 | /* | ||
| 2311 | * find adjacent blocks require processing | ||
| 2312 | */ | ||
| 2313 | static noinline_for_stack | ||
| 2314 | int add_adjacent_blocks(struct btrfs_trans_handle *trans, | ||
| 2315 | struct reloc_control *rc, | ||
| 2316 | struct backref_cache *cache, | ||
| 2317 | struct rb_root *blocks, int level, | ||
| 2318 | struct backref_node **upper) | ||
| 2319 | { | ||
| 2320 | struct backref_node *node; | ||
| 2321 | int ret = 0; | ||
| 2322 | |||
| 2323 | WARN_ON(!list_empty(&cache->pending[level])); | ||
| 2324 | |||
| 2325 | if (list_empty(&cache->pending[level + 1])) | ||
| 2326 | return 1; | ||
| 2327 | |||
| 2328 | node = list_entry(cache->pending[level + 1].next, | ||
| 2329 | struct backref_node, lower); | ||
| 2330 | if (node->eb) | ||
| 2331 | ret = add_child_blocks(trans, rc, node, blocks); | ||
| 2332 | |||
| 2333 | *upper = node; | ||
| 2334 | return ret; | ||
| 2335 | } | ||
| 2336 | |||
| 2337 | static int get_tree_block_key(struct reloc_control *rc, | 2661 | static int get_tree_block_key(struct reloc_control *rc, |
| 2338 | struct tree_block *block) | 2662 | struct tree_block *block) |
| 2339 | { | 2663 | { |
| @@ -2371,40 +2695,53 @@ static int relocate_tree_block(struct btrfs_trans_handle *trans, | |||
| 2371 | struct btrfs_path *path) | 2695 | struct btrfs_path *path) |
| 2372 | { | 2696 | { |
| 2373 | struct btrfs_root *root; | 2697 | struct btrfs_root *root; |
| 2374 | int ret; | 2698 | int release = 0; |
| 2699 | int ret = 0; | ||
| 2375 | 2700 | ||
| 2701 | if (!node) | ||
| 2702 | return 0; | ||
| 2703 | |||
| 2704 | BUG_ON(node->processed); | ||
| 2376 | root = select_one_root(trans, node); | 2705 | root = select_one_root(trans, node); |
| 2377 | if (unlikely(!root)) { | 2706 | if (root == ERR_PTR(-ENOENT)) { |
| 2378 | rc->found_old_snapshot = 1; | ||
| 2379 | update_processed_blocks(rc, node); | 2707 | update_processed_blocks(rc, node); |
| 2380 | return 0; | 2708 | goto out; |
| 2381 | } | 2709 | } |
| 2382 | 2710 | ||
| 2383 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) { | 2711 | if (!root || root->ref_cows) { |
| 2384 | ret = do_relocation(trans, node, key, path, 1); | 2712 | ret = reserve_metadata_space(trans, rc, node); |
| 2385 | if (ret < 0) | 2713 | if (ret) |
| 2386 | goto out; | ||
| 2387 | if (node->level == 0 && rc->stage == UPDATE_DATA_PTRS) { | ||
| 2388 | ret = replace_file_extents(trans, rc, root, | ||
| 2389 | node->eb, NULL); | ||
| 2390 | if (ret < 0) | ||
| 2391 | goto out; | ||
| 2392 | } | ||
| 2393 | drop_node_buffer(node); | ||
| 2394 | } else if (!root->ref_cows) { | ||
| 2395 | path->lowest_level = node->level; | ||
| 2396 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); | ||
| 2397 | btrfs_release_path(root, path); | ||
| 2398 | if (ret < 0) | ||
| 2399 | goto out; | 2714 | goto out; |
| 2400 | } else if (root != node->root) { | 2715 | release = 1; |
| 2401 | WARN_ON(node->level > 0 || rc->stage != UPDATE_DATA_PTRS); | ||
| 2402 | } | 2716 | } |
| 2403 | 2717 | ||
| 2404 | update_processed_blocks(rc, node); | 2718 | if (root) { |
| 2405 | ret = 0; | 2719 | if (root->ref_cows) { |
| 2720 | BUG_ON(node->new_bytenr); | ||
| 2721 | BUG_ON(!list_empty(&node->list)); | ||
| 2722 | btrfs_record_root_in_trans(trans, root); | ||
| 2723 | root = root->reloc_root; | ||
| 2724 | node->new_bytenr = root->node->start; | ||
| 2725 | node->root = root; | ||
| 2726 | list_add_tail(&node->list, &rc->backref_cache.changed); | ||
| 2727 | } else { | ||
| 2728 | path->lowest_level = node->level; | ||
| 2729 | ret = btrfs_search_slot(trans, root, key, path, 0, 1); | ||
| 2730 | btrfs_release_path(root, path); | ||
| 2731 | if (ret > 0) | ||
| 2732 | ret = 0; | ||
| 2733 | } | ||
| 2734 | if (!ret) | ||
| 2735 | update_processed_blocks(rc, node); | ||
| 2736 | } else { | ||
| 2737 | ret = do_relocation(trans, rc, node, key, path, 1); | ||
| 2738 | } | ||
| 2406 | out: | 2739 | out: |
| 2407 | drop_node_buffer(node); | 2740 | if (ret || node->level == 0 || node->cowonly) { |
| 2741 | if (release) | ||
| 2742 | release_metadata_space(rc, node); | ||
| 2743 | remove_backref_node(&rc->backref_cache, node); | ||
| 2744 | } | ||
| 2408 | return ret; | 2745 | return ret; |
| 2409 | } | 2746 | } |
| 2410 | 2747 | ||
| @@ -2415,12 +2752,10 @@ static noinline_for_stack | |||
| 2415 | int relocate_tree_blocks(struct btrfs_trans_handle *trans, | 2752 | int relocate_tree_blocks(struct btrfs_trans_handle *trans, |
| 2416 | struct reloc_control *rc, struct rb_root *blocks) | 2753 | struct reloc_control *rc, struct rb_root *blocks) |
| 2417 | { | 2754 | { |
| 2418 | struct backref_cache *cache; | ||
| 2419 | struct backref_node *node; | 2755 | struct backref_node *node; |
| 2420 | struct btrfs_path *path; | 2756 | struct btrfs_path *path; |
| 2421 | struct tree_block *block; | 2757 | struct tree_block *block; |
| 2422 | struct rb_node *rb_node; | 2758 | struct rb_node *rb_node; |
| 2423 | int level = -1; | ||
| 2424 | int ret; | 2759 | int ret; |
| 2425 | int err = 0; | 2760 | int err = 0; |
| 2426 | 2761 | ||
| @@ -2428,21 +2763,9 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, | |||
| 2428 | if (!path) | 2763 | if (!path) |
| 2429 | return -ENOMEM; | 2764 | return -ENOMEM; |
| 2430 | 2765 | ||
| 2431 | cache = kmalloc(sizeof(*cache), GFP_NOFS); | ||
| 2432 | if (!cache) { | ||
| 2433 | btrfs_free_path(path); | ||
| 2434 | return -ENOMEM; | ||
| 2435 | } | ||
| 2436 | |||
| 2437 | backref_cache_init(cache); | ||
| 2438 | |||
| 2439 | rb_node = rb_first(blocks); | 2766 | rb_node = rb_first(blocks); |
| 2440 | while (rb_node) { | 2767 | while (rb_node) { |
| 2441 | block = rb_entry(rb_node, struct tree_block, rb_node); | 2768 | block = rb_entry(rb_node, struct tree_block, rb_node); |
| 2442 | if (level == -1) | ||
| 2443 | level = block->level; | ||
| 2444 | else | ||
| 2445 | BUG_ON(level != block->level); | ||
| 2446 | if (!block->key_ready) | 2769 | if (!block->key_ready) |
| 2447 | reada_tree_block(rc, block); | 2770 | reada_tree_block(rc, block); |
| 2448 | rb_node = rb_next(rb_node); | 2771 | rb_node = rb_next(rb_node); |
| @@ -2460,7 +2783,7 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, | |||
| 2460 | while (rb_node) { | 2783 | while (rb_node) { |
| 2461 | block = rb_entry(rb_node, struct tree_block, rb_node); | 2784 | block = rb_entry(rb_node, struct tree_block, rb_node); |
| 2462 | 2785 | ||
| 2463 | node = build_backref_tree(rc, cache, &block->key, | 2786 | node = build_backref_tree(rc, &block->key, |
| 2464 | block->level, block->bytenr); | 2787 | block->level, block->bytenr); |
| 2465 | if (IS_ERR(node)) { | 2788 | if (IS_ERR(node)) { |
| 2466 | err = PTR_ERR(node); | 2789 | err = PTR_ERR(node); |
| @@ -2470,79 +2793,62 @@ int relocate_tree_blocks(struct btrfs_trans_handle *trans, | |||
| 2470 | ret = relocate_tree_block(trans, rc, node, &block->key, | 2793 | ret = relocate_tree_block(trans, rc, node, &block->key, |
| 2471 | path); | 2794 | path); |
| 2472 | if (ret < 0) { | 2795 | if (ret < 0) { |
| 2473 | err = ret; | 2796 | if (ret != -EAGAIN || rb_node == rb_first(blocks)) |
| 2797 | err = ret; | ||
| 2474 | goto out; | 2798 | goto out; |
| 2475 | } | 2799 | } |
| 2476 | remove_backref_node(cache, node); | ||
| 2477 | rb_node = rb_next(rb_node); | 2800 | rb_node = rb_next(rb_node); |
| 2478 | } | 2801 | } |
| 2479 | 2802 | out: | |
| 2480 | if (level > 0) | ||
| 2481 | goto out; | ||
| 2482 | |||
| 2483 | free_block_list(blocks); | 2803 | free_block_list(blocks); |
| 2804 | err = finish_pending_nodes(trans, rc, path, err); | ||
| 2484 | 2805 | ||
| 2485 | /* | 2806 | btrfs_free_path(path); |
| 2486 | * now backrefs of some upper level tree blocks have been cached, | 2807 | return err; |
| 2487 | * try relocating blocks referenced by these upper level blocks. | 2808 | } |
| 2488 | */ | ||
| 2489 | while (1) { | ||
| 2490 | struct backref_node *upper = NULL; | ||
| 2491 | if (trans->transaction->in_commit || | ||
| 2492 | trans->transaction->delayed_refs.flushing) | ||
| 2493 | break; | ||
| 2494 | 2809 | ||
| 2495 | ret = add_adjacent_blocks(trans, rc, cache, blocks, level, | 2810 | static noinline_for_stack |
| 2496 | &upper); | 2811 | int prealloc_file_extent_cluster(struct inode *inode, |
| 2497 | if (ret < 0) | 2812 | struct file_extent_cluster *cluster) |
| 2498 | err = ret; | 2813 | { |
| 2499 | if (ret != 0) | 2814 | u64 alloc_hint = 0; |
| 2500 | break; | 2815 | u64 start; |
| 2816 | u64 end; | ||
| 2817 | u64 offset = BTRFS_I(inode)->index_cnt; | ||
| 2818 | u64 num_bytes; | ||
| 2819 | int nr = 0; | ||
| 2820 | int ret = 0; | ||
| 2501 | 2821 | ||
| 2502 | rb_node = rb_first(blocks); | 2822 | BUG_ON(cluster->start != cluster->boundary[0]); |
| 2503 | while (rb_node) { | 2823 | mutex_lock(&inode->i_mutex); |
| 2504 | block = rb_entry(rb_node, struct tree_block, rb_node); | ||
| 2505 | if (trans->transaction->in_commit || | ||
| 2506 | trans->transaction->delayed_refs.flushing) | ||
| 2507 | goto out; | ||
| 2508 | BUG_ON(!block->key_ready); | ||
| 2509 | node = build_backref_tree(rc, cache, &block->key, | ||
| 2510 | level, block->bytenr); | ||
| 2511 | if (IS_ERR(node)) { | ||
| 2512 | err = PTR_ERR(node); | ||
| 2513 | goto out; | ||
| 2514 | } | ||
| 2515 | 2824 | ||
| 2516 | ret = relocate_tree_block(trans, rc, node, | 2825 | ret = btrfs_check_data_free_space(inode, cluster->end + |
| 2517 | &block->key, path); | 2826 | 1 - cluster->start); |
| 2518 | if (ret < 0) { | 2827 | if (ret) |
| 2519 | err = ret; | 2828 | goto out; |
| 2520 | goto out; | ||
| 2521 | } | ||
| 2522 | remove_backref_node(cache, node); | ||
| 2523 | rb_node = rb_next(rb_node); | ||
| 2524 | } | ||
| 2525 | free_block_list(blocks); | ||
| 2526 | 2829 | ||
| 2527 | if (upper) { | 2830 | while (nr < cluster->nr) { |
| 2528 | ret = link_to_upper(trans, upper, path); | 2831 | start = cluster->boundary[nr] - offset; |
| 2529 | if (ret < 0) { | 2832 | if (nr + 1 < cluster->nr) |
| 2530 | err = ret; | 2833 | end = cluster->boundary[nr + 1] - 1 - offset; |
| 2531 | break; | 2834 | else |
| 2532 | } | 2835 | end = cluster->end - offset; |
| 2533 | remove_backref_node(cache, upper); | 2836 | |
| 2534 | } | 2837 | lock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); |
| 2838 | num_bytes = end + 1 - start; | ||
| 2839 | ret = btrfs_prealloc_file_range(inode, 0, start, | ||
| 2840 | num_bytes, num_bytes, | ||
| 2841 | end + 1, &alloc_hint); | ||
| 2842 | unlock_extent(&BTRFS_I(inode)->io_tree, start, end, GFP_NOFS); | ||
| 2843 | if (ret) | ||
| 2844 | break; | ||
| 2845 | nr++; | ||
| 2535 | } | 2846 | } |
| 2847 | btrfs_free_reserved_data_space(inode, cluster->end + | ||
| 2848 | 1 - cluster->start); | ||
| 2536 | out: | 2849 | out: |
| 2537 | free_block_list(blocks); | 2850 | mutex_unlock(&inode->i_mutex); |
| 2538 | 2851 | return ret; | |
| 2539 | ret = finish_pending_nodes(trans, cache, path); | ||
| 2540 | if (ret < 0) | ||
| 2541 | err = ret; | ||
| 2542 | |||
| 2543 | kfree(cache); | ||
| 2544 | btrfs_free_path(path); | ||
| 2545 | return err; | ||
| 2546 | } | 2852 | } |
| 2547 | 2853 | ||
| 2548 | static noinline_for_stack | 2854 | static noinline_for_stack |
| @@ -2588,7 +2894,6 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
| 2588 | u64 offset = BTRFS_I(inode)->index_cnt; | 2894 | u64 offset = BTRFS_I(inode)->index_cnt; |
| 2589 | unsigned long index; | 2895 | unsigned long index; |
| 2590 | unsigned long last_index; | 2896 | unsigned long last_index; |
| 2591 | unsigned int dirty_page = 0; | ||
| 2592 | struct page *page; | 2897 | struct page *page; |
| 2593 | struct file_ra_state *ra; | 2898 | struct file_ra_state *ra; |
| 2594 | int nr = 0; | 2899 | int nr = 0; |
| @@ -2601,21 +2906,24 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
| 2601 | if (!ra) | 2906 | if (!ra) |
| 2602 | return -ENOMEM; | 2907 | return -ENOMEM; |
| 2603 | 2908 | ||
| 2604 | index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; | 2909 | ret = prealloc_file_extent_cluster(inode, cluster); |
| 2605 | last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; | 2910 | if (ret) |
| 2911 | goto out; | ||
| 2606 | 2912 | ||
| 2607 | mutex_lock(&inode->i_mutex); | 2913 | file_ra_state_init(ra, inode->i_mapping); |
| 2608 | 2914 | ||
| 2609 | i_size_write(inode, cluster->end + 1 - offset); | ||
| 2610 | ret = setup_extent_mapping(inode, cluster->start - offset, | 2915 | ret = setup_extent_mapping(inode, cluster->start - offset, |
| 2611 | cluster->end - offset, cluster->start); | 2916 | cluster->end - offset, cluster->start); |
| 2612 | if (ret) | 2917 | if (ret) |
| 2613 | goto out_unlock; | 2918 | goto out; |
| 2614 | |||
| 2615 | file_ra_state_init(ra, inode->i_mapping); | ||
| 2616 | 2919 | ||
| 2617 | WARN_ON(cluster->start != cluster->boundary[0]); | 2920 | index = (cluster->start - offset) >> PAGE_CACHE_SHIFT; |
| 2921 | last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT; | ||
| 2618 | while (index <= last_index) { | 2922 | while (index <= last_index) { |
| 2923 | ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE); | ||
| 2924 | if (ret) | ||
| 2925 | goto out; | ||
| 2926 | |||
| 2619 | page = find_lock_page(inode->i_mapping, index); | 2927 | page = find_lock_page(inode->i_mapping, index); |
| 2620 | if (!page) { | 2928 | if (!page) { |
| 2621 | page_cache_sync_readahead(inode->i_mapping, | 2929 | page_cache_sync_readahead(inode->i_mapping, |
| @@ -2623,8 +2931,10 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
| 2623 | last_index + 1 - index); | 2931 | last_index + 1 - index); |
| 2624 | page = grab_cache_page(inode->i_mapping, index); | 2932 | page = grab_cache_page(inode->i_mapping, index); |
| 2625 | if (!page) { | 2933 | if (!page) { |
| 2934 | btrfs_delalloc_release_metadata(inode, | ||
| 2935 | PAGE_CACHE_SIZE); | ||
| 2626 | ret = -ENOMEM; | 2936 | ret = -ENOMEM; |
| 2627 | goto out_unlock; | 2937 | goto out; |
| 2628 | } | 2938 | } |
| 2629 | } | 2939 | } |
| 2630 | 2940 | ||
| @@ -2640,8 +2950,10 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
| 2640 | if (!PageUptodate(page)) { | 2950 | if (!PageUptodate(page)) { |
| 2641 | unlock_page(page); | 2951 | unlock_page(page); |
| 2642 | page_cache_release(page); | 2952 | page_cache_release(page); |
| 2953 | btrfs_delalloc_release_metadata(inode, | ||
| 2954 | PAGE_CACHE_SIZE); | ||
| 2643 | ret = -EIO; | 2955 | ret = -EIO; |
| 2644 | goto out_unlock; | 2956 | goto out; |
| 2645 | } | 2957 | } |
| 2646 | } | 2958 | } |
| 2647 | 2959 | ||
| @@ -2660,10 +2972,9 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
| 2660 | EXTENT_BOUNDARY, GFP_NOFS); | 2972 | EXTENT_BOUNDARY, GFP_NOFS); |
| 2661 | nr++; | 2973 | nr++; |
| 2662 | } | 2974 | } |
| 2663 | btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); | ||
| 2664 | 2975 | ||
| 2976 | btrfs_set_extent_delalloc(inode, page_start, page_end, NULL); | ||
| 2665 | set_page_dirty(page); | 2977 | set_page_dirty(page); |
| 2666 | dirty_page++; | ||
| 2667 | 2978 | ||
| 2668 | unlock_extent(&BTRFS_I(inode)->io_tree, | 2979 | unlock_extent(&BTRFS_I(inode)->io_tree, |
| 2669 | page_start, page_end, GFP_NOFS); | 2980 | page_start, page_end, GFP_NOFS); |
| @@ -2671,20 +2982,11 @@ static int relocate_file_extent_cluster(struct inode *inode, | |||
| 2671 | page_cache_release(page); | 2982 | page_cache_release(page); |
| 2672 | 2983 | ||
| 2673 | index++; | 2984 | index++; |
| 2674 | if (nr < cluster->nr && | 2985 | balance_dirty_pages_ratelimited(inode->i_mapping); |
| 2675 | page_end + 1 + offset == cluster->boundary[nr]) { | 2986 | btrfs_throttle(BTRFS_I(inode)->root); |
| 2676 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | ||
| 2677 | dirty_page); | ||
| 2678 | dirty_page = 0; | ||
| 2679 | } | ||
| 2680 | } | ||
| 2681 | if (dirty_page) { | ||
| 2682 | balance_dirty_pages_ratelimited_nr(inode->i_mapping, | ||
| 2683 | dirty_page); | ||
| 2684 | } | 2987 | } |
| 2685 | WARN_ON(nr != cluster->nr); | 2988 | WARN_ON(nr != cluster->nr); |
| 2686 | out_unlock: | 2989 | out: |
| 2687 | mutex_unlock(&inode->i_mutex); | ||
| 2688 | kfree(ra); | 2990 | kfree(ra); |
| 2689 | return ret; | 2991 | return ret; |
| 2690 | } | 2992 | } |
| @@ -2870,9 +3172,6 @@ out: | |||
| 2870 | static int block_use_full_backref(struct reloc_control *rc, | 3172 | static int block_use_full_backref(struct reloc_control *rc, |
| 2871 | struct extent_buffer *eb) | 3173 | struct extent_buffer *eb) |
| 2872 | { | 3174 | { |
| 2873 | struct btrfs_path *path; | ||
| 2874 | struct btrfs_extent_item *ei; | ||
| 2875 | struct btrfs_key key; | ||
| 2876 | u64 flags; | 3175 | u64 flags; |
| 2877 | int ret; | 3176 | int ret; |
| 2878 | 3177 | ||
| @@ -2880,28 +3179,14 @@ static int block_use_full_backref(struct reloc_control *rc, | |||
| 2880 | btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV) | 3179 | btrfs_header_backref_rev(eb) < BTRFS_MIXED_BACKREF_REV) |
| 2881 | return 1; | 3180 | return 1; |
| 2882 | 3181 | ||
| 2883 | path = btrfs_alloc_path(); | 3182 | ret = btrfs_lookup_extent_info(NULL, rc->extent_root, |
| 2884 | BUG_ON(!path); | 3183 | eb->start, eb->len, NULL, &flags); |
| 2885 | |||
| 2886 | key.objectid = eb->start; | ||
| 2887 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
| 2888 | key.offset = eb->len; | ||
| 2889 | |||
| 2890 | path->search_commit_root = 1; | ||
| 2891 | path->skip_locking = 1; | ||
| 2892 | ret = btrfs_search_slot(NULL, rc->extent_root, | ||
| 2893 | &key, path, 0, 0); | ||
| 2894 | BUG_ON(ret); | 3184 | BUG_ON(ret); |
| 2895 | 3185 | ||
| 2896 | ei = btrfs_item_ptr(path->nodes[0], path->slots[0], | ||
| 2897 | struct btrfs_extent_item); | ||
| 2898 | flags = btrfs_extent_flags(path->nodes[0], ei); | ||
| 2899 | BUG_ON(!(flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)); | ||
| 2900 | if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) | 3186 | if (flags & BTRFS_BLOCK_FLAG_FULL_BACKREF) |
| 2901 | ret = 1; | 3187 | ret = 1; |
| 2902 | else | 3188 | else |
| 2903 | ret = 0; | 3189 | ret = 0; |
| 2904 | btrfs_free_path(path); | ||
| 2905 | return ret; | 3190 | return ret; |
| 2906 | } | 3191 | } |
| 2907 | 3192 | ||
| @@ -3074,22 +3359,10 @@ int add_data_references(struct reloc_control *rc, | |||
| 3074 | struct btrfs_extent_inline_ref *iref; | 3359 | struct btrfs_extent_inline_ref *iref; |
| 3075 | unsigned long ptr; | 3360 | unsigned long ptr; |
| 3076 | unsigned long end; | 3361 | unsigned long end; |
| 3077 | u32 blocksize; | 3362 | u32 blocksize = btrfs_level_size(rc->extent_root, 0); |
| 3078 | int ret; | 3363 | int ret; |
| 3079 | int err = 0; | 3364 | int err = 0; |
| 3080 | 3365 | ||
| 3081 | ret = get_new_location(rc->data_inode, NULL, extent_key->objectid, | ||
| 3082 | extent_key->offset); | ||
| 3083 | BUG_ON(ret < 0); | ||
| 3084 | if (ret > 0) { | ||
| 3085 | /* the relocated data is fragmented */ | ||
| 3086 | rc->extents_skipped++; | ||
| 3087 | btrfs_release_path(rc->extent_root, path); | ||
| 3088 | return 0; | ||
| 3089 | } | ||
| 3090 | |||
| 3091 | blocksize = btrfs_level_size(rc->extent_root, 0); | ||
| 3092 | |||
| 3093 | eb = path->nodes[0]; | 3366 | eb = path->nodes[0]; |
| 3094 | ptr = btrfs_item_ptr_offset(eb, path->slots[0]); | 3367 | ptr = btrfs_item_ptr_offset(eb, path->slots[0]); |
| 3095 | end = ptr + btrfs_item_size_nr(eb, path->slots[0]); | 3368 | end = ptr + btrfs_item_size_nr(eb, path->slots[0]); |
| @@ -3170,7 +3443,8 @@ int add_data_references(struct reloc_control *rc, | |||
| 3170 | */ | 3443 | */ |
| 3171 | static noinline_for_stack | 3444 | static noinline_for_stack |
| 3172 | int find_next_extent(struct btrfs_trans_handle *trans, | 3445 | int find_next_extent(struct btrfs_trans_handle *trans, |
| 3173 | struct reloc_control *rc, struct btrfs_path *path) | 3446 | struct reloc_control *rc, struct btrfs_path *path, |
| 3447 | struct btrfs_key *extent_key) | ||
| 3174 | { | 3448 | { |
| 3175 | struct btrfs_key key; | 3449 | struct btrfs_key key; |
| 3176 | struct extent_buffer *leaf; | 3450 | struct extent_buffer *leaf; |
| @@ -3225,6 +3499,7 @@ next: | |||
| 3225 | rc->search_start = end + 1; | 3499 | rc->search_start = end + 1; |
| 3226 | } else { | 3500 | } else { |
| 3227 | rc->search_start = key.objectid + key.offset; | 3501 | rc->search_start = key.objectid + key.offset; |
| 3502 | memcpy(extent_key, &key, sizeof(key)); | ||
| 3228 | return 0; | 3503 | return 0; |
| 3229 | } | 3504 | } |
| 3230 | } | 3505 | } |
| @@ -3262,12 +3537,49 @@ static int check_extent_flags(u64 flags) | |||
| 3262 | return 0; | 3537 | return 0; |
| 3263 | } | 3538 | } |
| 3264 | 3539 | ||
| 3540 | static noinline_for_stack | ||
| 3541 | int prepare_to_relocate(struct reloc_control *rc) | ||
| 3542 | { | ||
| 3543 | struct btrfs_trans_handle *trans; | ||
| 3544 | int ret; | ||
| 3545 | |||
| 3546 | rc->block_rsv = btrfs_alloc_block_rsv(rc->extent_root); | ||
| 3547 | if (!rc->block_rsv) | ||
| 3548 | return -ENOMEM; | ||
| 3549 | |||
| 3550 | /* | ||
| 3551 | * reserve some space for creating reloc trees. | ||
| 3552 | * btrfs_init_reloc_root will use them when there | ||
| 3553 | * is no reservation in transaction handle. | ||
| 3554 | */ | ||
| 3555 | ret = btrfs_block_rsv_add(NULL, rc->extent_root, rc->block_rsv, | ||
| 3556 | rc->extent_root->nodesize * 256, | ||
| 3557 | &rc->block_rsv_retries); | ||
| 3558 | if (ret) | ||
| 3559 | return ret; | ||
| 3560 | |||
| 3561 | rc->block_rsv->refill_used = 1; | ||
| 3562 | btrfs_add_durable_block_rsv(rc->extent_root->fs_info, rc->block_rsv); | ||
| 3563 | |||
| 3564 | memset(&rc->cluster, 0, sizeof(rc->cluster)); | ||
| 3565 | rc->search_start = rc->block_group->key.objectid; | ||
| 3566 | rc->extents_found = 0; | ||
| 3567 | rc->nodes_relocated = 0; | ||
| 3568 | rc->merging_rsv_size = 0; | ||
| 3569 | rc->block_rsv_retries = 0; | ||
| 3570 | |||
| 3571 | rc->create_reloc_tree = 1; | ||
| 3572 | set_reloc_control(rc); | ||
| 3573 | |||
| 3574 | trans = btrfs_join_transaction(rc->extent_root, 1); | ||
| 3575 | btrfs_commit_transaction(trans, rc->extent_root); | ||
| 3576 | return 0; | ||
| 3577 | } | ||
| 3265 | 3578 | ||
| 3266 | static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | 3579 | static noinline_for_stack int relocate_block_group(struct reloc_control *rc) |
| 3267 | { | 3580 | { |
| 3268 | struct rb_root blocks = RB_ROOT; | 3581 | struct rb_root blocks = RB_ROOT; |
| 3269 | struct btrfs_key key; | 3582 | struct btrfs_key key; |
| 3270 | struct file_extent_cluster *cluster; | ||
| 3271 | struct btrfs_trans_handle *trans = NULL; | 3583 | struct btrfs_trans_handle *trans = NULL; |
| 3272 | struct btrfs_path *path; | 3584 | struct btrfs_path *path; |
| 3273 | struct btrfs_extent_item *ei; | 3585 | struct btrfs_extent_item *ei; |
| @@ -3277,33 +3589,25 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
| 3277 | int ret; | 3589 | int ret; |
| 3278 | int err = 0; | 3590 | int err = 0; |
| 3279 | 3591 | ||
| 3280 | cluster = kzalloc(sizeof(*cluster), GFP_NOFS); | ||
| 3281 | if (!cluster) | ||
| 3282 | return -ENOMEM; | ||
| 3283 | |||
| 3284 | path = btrfs_alloc_path(); | 3592 | path = btrfs_alloc_path(); |
| 3285 | if (!path) { | 3593 | if (!path) |
| 3286 | kfree(cluster); | ||
| 3287 | return -ENOMEM; | 3594 | return -ENOMEM; |
| 3288 | } | ||
| 3289 | |||
| 3290 | rc->extents_found = 0; | ||
| 3291 | rc->extents_skipped = 0; | ||
| 3292 | |||
| 3293 | rc->search_start = rc->block_group->key.objectid; | ||
| 3294 | clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, | ||
| 3295 | GFP_NOFS); | ||
| 3296 | |||
| 3297 | rc->create_reloc_root = 1; | ||
| 3298 | set_reloc_control(rc); | ||
| 3299 | 3595 | ||
| 3300 | trans = btrfs_start_transaction(rc->extent_root, 1); | 3596 | ret = prepare_to_relocate(rc); |
| 3301 | btrfs_commit_transaction(trans, rc->extent_root); | 3597 | if (ret) { |
| 3598 | err = ret; | ||
| 3599 | goto out_free; | ||
| 3600 | } | ||
| 3302 | 3601 | ||
| 3303 | while (1) { | 3602 | while (1) { |
| 3304 | trans = btrfs_start_transaction(rc->extent_root, 1); | 3603 | trans = btrfs_start_transaction(rc->extent_root, 0); |
| 3604 | |||
| 3605 | if (update_backref_cache(trans, &rc->backref_cache)) { | ||
| 3606 | btrfs_end_transaction(trans, rc->extent_root); | ||
| 3607 | continue; | ||
| 3608 | } | ||
| 3305 | 3609 | ||
| 3306 | ret = find_next_extent(trans, rc, path); | 3610 | ret = find_next_extent(trans, rc, path, &key); |
| 3307 | if (ret < 0) | 3611 | if (ret < 0) |
| 3308 | err = ret; | 3612 | err = ret; |
| 3309 | if (ret != 0) | 3613 | if (ret != 0) |
| @@ -3313,9 +3617,7 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
| 3313 | 3617 | ||
| 3314 | ei = btrfs_item_ptr(path->nodes[0], path->slots[0], | 3618 | ei = btrfs_item_ptr(path->nodes[0], path->slots[0], |
| 3315 | struct btrfs_extent_item); | 3619 | struct btrfs_extent_item); |
| 3316 | btrfs_item_key_to_cpu(path->nodes[0], &key, path->slots[0]); | 3620 | item_size = btrfs_item_size_nr(path->nodes[0], path->slots[0]); |
| 3317 | item_size = btrfs_item_size_nr(path->nodes[0], | ||
| 3318 | path->slots[0]); | ||
| 3319 | if (item_size >= sizeof(*ei)) { | 3621 | if (item_size >= sizeof(*ei)) { |
| 3320 | flags = btrfs_extent_flags(path->nodes[0], ei); | 3622 | flags = btrfs_extent_flags(path->nodes[0], ei); |
| 3321 | ret = check_extent_flags(flags); | 3623 | ret = check_extent_flags(flags); |
| @@ -3356,73 +3658,100 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc) | |||
| 3356 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 3658 | if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
| 3357 | ret = add_tree_block(rc, &key, path, &blocks); | 3659 | ret = add_tree_block(rc, &key, path, &blocks); |
| 3358 | } else if (rc->stage == UPDATE_DATA_PTRS && | 3660 | } else if (rc->stage == UPDATE_DATA_PTRS && |
| 3359 | (flags & BTRFS_EXTENT_FLAG_DATA)) { | 3661 | (flags & BTRFS_EXTENT_FLAG_DATA)) { |
| 3360 | ret = add_data_references(rc, &key, path, &blocks); | 3662 | ret = add_data_references(rc, &key, path, &blocks); |
| 3361 | } else { | 3663 | } else { |
| 3362 | btrfs_release_path(rc->extent_root, path); | 3664 | btrfs_release_path(rc->extent_root, path); |
| 3363 | ret = 0; | 3665 | ret = 0; |
| 3364 | } | 3666 | } |
| 3365 | if (ret < 0) { | 3667 | if (ret < 0) { |
| 3366 | err = 0; | 3668 | err = ret; |
| 3367 | break; | 3669 | break; |
| 3368 | } | 3670 | } |
| 3369 | 3671 | ||
| 3370 | if (!RB_EMPTY_ROOT(&blocks)) { | 3672 | if (!RB_EMPTY_ROOT(&blocks)) { |
| 3371 | ret = relocate_tree_blocks(trans, rc, &blocks); | 3673 | ret = relocate_tree_blocks(trans, rc, &blocks); |
| 3372 | if (ret < 0) { | 3674 | if (ret < 0) { |
| 3675 | if (ret != -EAGAIN) { | ||
| 3676 | err = ret; | ||
| 3677 | break; | ||
| 3678 | } | ||
| 3679 | rc->extents_found--; | ||
| 3680 | rc->search_start = key.objectid; | ||
| 3681 | } | ||
| 3682 | } | ||
| 3683 | |||
| 3684 | ret = btrfs_block_rsv_check(trans, rc->extent_root, | ||
| 3685 | rc->block_rsv, 0, 5); | ||
| 3686 | if (ret < 0) { | ||
| 3687 | if (ret != -EAGAIN) { | ||
| 3373 | err = ret; | 3688 | err = ret; |
| 3689 | WARN_ON(1); | ||
| 3374 | break; | 3690 | break; |
| 3375 | } | 3691 | } |
| 3692 | rc->commit_transaction = 1; | ||
| 3376 | } | 3693 | } |
| 3377 | 3694 | ||
| 3378 | nr = trans->blocks_used; | 3695 | if (rc->commit_transaction) { |
| 3379 | btrfs_end_transaction(trans, rc->extent_root); | 3696 | rc->commit_transaction = 0; |
| 3697 | ret = btrfs_commit_transaction(trans, rc->extent_root); | ||
| 3698 | BUG_ON(ret); | ||
| 3699 | } else { | ||
| 3700 | nr = trans->blocks_used; | ||
| 3701 | btrfs_end_transaction_throttle(trans, rc->extent_root); | ||
| 3702 | btrfs_btree_balance_dirty(rc->extent_root, nr); | ||
| 3703 | } | ||
| 3380 | trans = NULL; | 3704 | trans = NULL; |
| 3381 | btrfs_btree_balance_dirty(rc->extent_root, nr); | ||
| 3382 | 3705 | ||
| 3383 | if (rc->stage == MOVE_DATA_EXTENTS && | 3706 | if (rc->stage == MOVE_DATA_EXTENTS && |
| 3384 | (flags & BTRFS_EXTENT_FLAG_DATA)) { | 3707 | (flags & BTRFS_EXTENT_FLAG_DATA)) { |
| 3385 | rc->found_file_extent = 1; | 3708 | rc->found_file_extent = 1; |
| 3386 | ret = relocate_data_extent(rc->data_inode, | 3709 | ret = relocate_data_extent(rc->data_inode, |
| 3387 | &key, cluster); | 3710 | &key, &rc->cluster); |
| 3388 | if (ret < 0) { | 3711 | if (ret < 0) { |
| 3389 | err = ret; | 3712 | err = ret; |
| 3390 | break; | 3713 | break; |
| 3391 | } | 3714 | } |
| 3392 | } | 3715 | } |
| 3393 | } | 3716 | } |
| 3394 | btrfs_free_path(path); | 3717 | |
| 3718 | btrfs_release_path(rc->extent_root, path); | ||
| 3719 | clear_extent_bits(&rc->processed_blocks, 0, (u64)-1, EXTENT_DIRTY, | ||
| 3720 | GFP_NOFS); | ||
| 3395 | 3721 | ||
| 3396 | if (trans) { | 3722 | if (trans) { |
| 3397 | nr = trans->blocks_used; | 3723 | nr = trans->blocks_used; |
| 3398 | btrfs_end_transaction(trans, rc->extent_root); | 3724 | btrfs_end_transaction_throttle(trans, rc->extent_root); |
| 3399 | btrfs_btree_balance_dirty(rc->extent_root, nr); | 3725 | btrfs_btree_balance_dirty(rc->extent_root, nr); |
| 3400 | } | 3726 | } |
| 3401 | 3727 | ||
| 3402 | if (!err) { | 3728 | if (!err) { |
| 3403 | ret = relocate_file_extent_cluster(rc->data_inode, cluster); | 3729 | ret = relocate_file_extent_cluster(rc->data_inode, |
| 3730 | &rc->cluster); | ||
| 3404 | if (ret < 0) | 3731 | if (ret < 0) |
| 3405 | err = ret; | 3732 | err = ret; |
| 3406 | } | 3733 | } |
| 3407 | 3734 | ||
| 3408 | kfree(cluster); | 3735 | rc->create_reloc_tree = 0; |
| 3736 | set_reloc_control(rc); | ||
| 3409 | 3737 | ||
| 3410 | rc->create_reloc_root = 0; | 3738 | backref_cache_cleanup(&rc->backref_cache); |
| 3411 | smp_mb(); | 3739 | btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); |
| 3412 | 3740 | ||
| 3413 | if (rc->extents_found > 0) { | 3741 | err = prepare_to_merge(rc, err); |
| 3414 | trans = btrfs_start_transaction(rc->extent_root, 1); | ||
| 3415 | btrfs_commit_transaction(trans, rc->extent_root); | ||
| 3416 | } | ||
| 3417 | 3742 | ||
| 3418 | merge_reloc_roots(rc); | 3743 | merge_reloc_roots(rc); |
| 3419 | 3744 | ||
| 3745 | rc->merge_reloc_tree = 0; | ||
| 3420 | unset_reloc_control(rc); | 3746 | unset_reloc_control(rc); |
| 3747 | btrfs_block_rsv_release(rc->extent_root, rc->block_rsv, (u64)-1); | ||
| 3421 | 3748 | ||
| 3422 | /* get rid of pinned extents */ | 3749 | /* get rid of pinned extents */ |
| 3423 | trans = btrfs_start_transaction(rc->extent_root, 1); | 3750 | trans = btrfs_join_transaction(rc->extent_root, 1); |
| 3424 | btrfs_commit_transaction(trans, rc->extent_root); | 3751 | btrfs_commit_transaction(trans, rc->extent_root); |
| 3425 | 3752 | out_free: | |
| 3753 | btrfs_free_block_rsv(rc->extent_root, rc->block_rsv); | ||
| 3754 | btrfs_free_path(path); | ||
| 3426 | return err; | 3755 | return err; |
| 3427 | } | 3756 | } |
| 3428 | 3757 | ||
| @@ -3448,7 +3777,8 @@ static int __insert_orphan_inode(struct btrfs_trans_handle *trans, | |||
| 3448 | btrfs_set_inode_generation(leaf, item, 1); | 3777 | btrfs_set_inode_generation(leaf, item, 1); |
| 3449 | btrfs_set_inode_size(leaf, item, 0); | 3778 | btrfs_set_inode_size(leaf, item, 0); |
| 3450 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); | 3779 | btrfs_set_inode_mode(leaf, item, S_IFREG | 0600); |
| 3451 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS); | 3780 | btrfs_set_inode_flags(leaf, item, BTRFS_INODE_NOCOMPRESS | |
| 3781 | BTRFS_INODE_PREALLOC); | ||
| 3452 | btrfs_mark_buffer_dirty(leaf); | 3782 | btrfs_mark_buffer_dirty(leaf); |
| 3453 | btrfs_release_path(root, path); | 3783 | btrfs_release_path(root, path); |
| 3454 | out: | 3784 | out: |
| @@ -3460,8 +3790,9 @@ out: | |||
| 3460 | * helper to create inode for data relocation. | 3790 | * helper to create inode for data relocation. |
| 3461 | * the inode is in data relocation tree and its link count is 0 | 3791 | * the inode is in data relocation tree and its link count is 0 |
| 3462 | */ | 3792 | */ |
| 3463 | static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | 3793 | static noinline_for_stack |
| 3464 | struct btrfs_block_group_cache *group) | 3794 | struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, |
| 3795 | struct btrfs_block_group_cache *group) | ||
| 3465 | { | 3796 | { |
| 3466 | struct inode *inode = NULL; | 3797 | struct inode *inode = NULL; |
| 3467 | struct btrfs_trans_handle *trans; | 3798 | struct btrfs_trans_handle *trans; |
| @@ -3475,8 +3806,9 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | |||
| 3475 | if (IS_ERR(root)) | 3806 | if (IS_ERR(root)) |
| 3476 | return ERR_CAST(root); | 3807 | return ERR_CAST(root); |
| 3477 | 3808 | ||
| 3478 | trans = btrfs_start_transaction(root, 1); | 3809 | trans = btrfs_start_transaction(root, 6); |
| 3479 | BUG_ON(!trans); | 3810 | if (IS_ERR(trans)) |
| 3811 | return ERR_CAST(trans); | ||
| 3480 | 3812 | ||
| 3481 | err = btrfs_find_free_objectid(trans, root, objectid, &objectid); | 3813 | err = btrfs_find_free_objectid(trans, root, objectid, &objectid); |
| 3482 | if (err) | 3814 | if (err) |
| @@ -3496,7 +3828,6 @@ static struct inode *create_reloc_inode(struct btrfs_fs_info *fs_info, | |||
| 3496 | out: | 3828 | out: |
| 3497 | nr = trans->blocks_used; | 3829 | nr = trans->blocks_used; |
| 3498 | btrfs_end_transaction(trans, root); | 3830 | btrfs_end_transaction(trans, root); |
| 3499 | |||
| 3500 | btrfs_btree_balance_dirty(root, nr); | 3831 | btrfs_btree_balance_dirty(root, nr); |
| 3501 | if (err) { | 3832 | if (err) { |
| 3502 | if (inode) | 3833 | if (inode) |
| @@ -3506,6 +3837,21 @@ out: | |||
| 3506 | return inode; | 3837 | return inode; |
| 3507 | } | 3838 | } |
| 3508 | 3839 | ||
| 3840 | static struct reloc_control *alloc_reloc_control(void) | ||
| 3841 | { | ||
| 3842 | struct reloc_control *rc; | ||
| 3843 | |||
| 3844 | rc = kzalloc(sizeof(*rc), GFP_NOFS); | ||
| 3845 | if (!rc) | ||
| 3846 | return NULL; | ||
| 3847 | |||
| 3848 | INIT_LIST_HEAD(&rc->reloc_roots); | ||
| 3849 | backref_cache_init(&rc->backref_cache); | ||
| 3850 | mapping_tree_init(&rc->reloc_root_tree); | ||
| 3851 | extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); | ||
| 3852 | return rc; | ||
| 3853 | } | ||
| 3854 | |||
| 3509 | /* | 3855 | /* |
| 3510 | * function to relocate all extents in a block group. | 3856 | * function to relocate all extents in a block group. |
| 3511 | */ | 3857 | */ |
| @@ -3514,24 +3860,26 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
| 3514 | struct btrfs_fs_info *fs_info = extent_root->fs_info; | 3860 | struct btrfs_fs_info *fs_info = extent_root->fs_info; |
| 3515 | struct reloc_control *rc; | 3861 | struct reloc_control *rc; |
| 3516 | int ret; | 3862 | int ret; |
| 3863 | int rw = 0; | ||
| 3517 | int err = 0; | 3864 | int err = 0; |
| 3518 | 3865 | ||
| 3519 | rc = kzalloc(sizeof(*rc), GFP_NOFS); | 3866 | rc = alloc_reloc_control(); |
| 3520 | if (!rc) | 3867 | if (!rc) |
| 3521 | return -ENOMEM; | 3868 | return -ENOMEM; |
| 3522 | 3869 | ||
| 3523 | mapping_tree_init(&rc->reloc_root_tree); | 3870 | rc->extent_root = extent_root; |
| 3524 | extent_io_tree_init(&rc->processed_blocks, NULL, GFP_NOFS); | ||
| 3525 | INIT_LIST_HEAD(&rc->reloc_roots); | ||
| 3526 | 3871 | ||
| 3527 | rc->block_group = btrfs_lookup_block_group(fs_info, group_start); | 3872 | rc->block_group = btrfs_lookup_block_group(fs_info, group_start); |
| 3528 | BUG_ON(!rc->block_group); | 3873 | BUG_ON(!rc->block_group); |
| 3529 | 3874 | ||
| 3530 | btrfs_init_workers(&rc->workers, "relocate", | 3875 | if (!rc->block_group->ro) { |
| 3531 | fs_info->thread_pool_size, NULL); | 3876 | ret = btrfs_set_block_group_ro(extent_root, rc->block_group); |
| 3532 | 3877 | if (ret) { | |
| 3533 | rc->extent_root = extent_root; | 3878 | err = ret; |
| 3534 | btrfs_prepare_block_group_relocation(extent_root, rc->block_group); | 3879 | goto out; |
| 3880 | } | ||
| 3881 | rw = 1; | ||
| 3882 | } | ||
| 3535 | 3883 | ||
| 3536 | rc->data_inode = create_reloc_inode(fs_info, rc->block_group); | 3884 | rc->data_inode = create_reloc_inode(fs_info, rc->block_group); |
| 3537 | if (IS_ERR(rc->data_inode)) { | 3885 | if (IS_ERR(rc->data_inode)) { |
| @@ -3548,9 +3896,6 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
| 3548 | btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0); | 3896 | btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0); |
| 3549 | 3897 | ||
| 3550 | while (1) { | 3898 | while (1) { |
| 3551 | rc->extents_found = 0; | ||
| 3552 | rc->extents_skipped = 0; | ||
| 3553 | |||
| 3554 | mutex_lock(&fs_info->cleaner_mutex); | 3899 | mutex_lock(&fs_info->cleaner_mutex); |
| 3555 | 3900 | ||
| 3556 | btrfs_clean_old_snapshots(fs_info->tree_root); | 3901 | btrfs_clean_old_snapshots(fs_info->tree_root); |
| @@ -3559,7 +3904,7 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
| 3559 | mutex_unlock(&fs_info->cleaner_mutex); | 3904 | mutex_unlock(&fs_info->cleaner_mutex); |
| 3560 | if (ret < 0) { | 3905 | if (ret < 0) { |
| 3561 | err = ret; | 3906 | err = ret; |
| 3562 | break; | 3907 | goto out; |
| 3563 | } | 3908 | } |
| 3564 | 3909 | ||
| 3565 | if (rc->extents_found == 0) | 3910 | if (rc->extents_found == 0) |
| @@ -3573,18 +3918,6 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
| 3573 | invalidate_mapping_pages(rc->data_inode->i_mapping, | 3918 | invalidate_mapping_pages(rc->data_inode->i_mapping, |
| 3574 | 0, -1); | 3919 | 0, -1); |
| 3575 | rc->stage = UPDATE_DATA_PTRS; | 3920 | rc->stage = UPDATE_DATA_PTRS; |
| 3576 | } else if (rc->stage == UPDATE_DATA_PTRS && | ||
| 3577 | rc->extents_skipped >= rc->extents_found) { | ||
| 3578 | iput(rc->data_inode); | ||
| 3579 | rc->data_inode = create_reloc_inode(fs_info, | ||
| 3580 | rc->block_group); | ||
| 3581 | if (IS_ERR(rc->data_inode)) { | ||
| 3582 | err = PTR_ERR(rc->data_inode); | ||
| 3583 | rc->data_inode = NULL; | ||
| 3584 | break; | ||
| 3585 | } | ||
| 3586 | rc->stage = MOVE_DATA_EXTENTS; | ||
| 3587 | rc->found_file_extent = 0; | ||
| 3588 | } | 3921 | } |
| 3589 | } | 3922 | } |
| 3590 | 3923 | ||
| @@ -3597,8 +3930,9 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start) | |||
| 3597 | WARN_ON(rc->block_group->reserved > 0); | 3930 | WARN_ON(rc->block_group->reserved > 0); |
| 3598 | WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0); | 3931 | WARN_ON(btrfs_block_group_used(&rc->block_group->item) > 0); |
| 3599 | out: | 3932 | out: |
| 3933 | if (err && rw) | ||
| 3934 | btrfs_set_block_group_rw(extent_root, rc->block_group); | ||
| 3600 | iput(rc->data_inode); | 3935 | iput(rc->data_inode); |
| 3601 | btrfs_stop_workers(&rc->workers); | ||
| 3602 | btrfs_put_block_group(rc->block_group); | 3936 | btrfs_put_block_group(rc->block_group); |
| 3603 | kfree(rc); | 3937 | kfree(rc); |
| 3604 | return err; | 3938 | return err; |
| @@ -3609,7 +3943,7 @@ static noinline_for_stack int mark_garbage_root(struct btrfs_root *root) | |||
| 3609 | struct btrfs_trans_handle *trans; | 3943 | struct btrfs_trans_handle *trans; |
| 3610 | int ret; | 3944 | int ret; |
| 3611 | 3945 | ||
| 3612 | trans = btrfs_start_transaction(root->fs_info->tree_root, 1); | 3946 | trans = btrfs_start_transaction(root->fs_info->tree_root, 0); |
| 3613 | 3947 | ||
| 3614 | memset(&root->root_item.drop_progress, 0, | 3948 | memset(&root->root_item.drop_progress, 0, |
| 3615 | sizeof(root->root_item.drop_progress)); | 3949 | sizeof(root->root_item.drop_progress)); |
| @@ -3702,20 +4036,20 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
| 3702 | if (list_empty(&reloc_roots)) | 4036 | if (list_empty(&reloc_roots)) |
| 3703 | goto out; | 4037 | goto out; |
| 3704 | 4038 | ||
| 3705 | rc = kzalloc(sizeof(*rc), GFP_NOFS); | 4039 | rc = alloc_reloc_control(); |
| 3706 | if (!rc) { | 4040 | if (!rc) { |
| 3707 | err = -ENOMEM; | 4041 | err = -ENOMEM; |
| 3708 | goto out; | 4042 | goto out; |
| 3709 | } | 4043 | } |
| 3710 | 4044 | ||
| 3711 | mapping_tree_init(&rc->reloc_root_tree); | ||
| 3712 | INIT_LIST_HEAD(&rc->reloc_roots); | ||
| 3713 | btrfs_init_workers(&rc->workers, "relocate", | ||
| 3714 | root->fs_info->thread_pool_size, NULL); | ||
| 3715 | rc->extent_root = root->fs_info->extent_root; | 4045 | rc->extent_root = root->fs_info->extent_root; |
| 3716 | 4046 | ||
| 3717 | set_reloc_control(rc); | 4047 | set_reloc_control(rc); |
| 3718 | 4048 | ||
| 4049 | trans = btrfs_join_transaction(rc->extent_root, 1); | ||
| 4050 | |||
| 4051 | rc->merge_reloc_tree = 1; | ||
| 4052 | |||
| 3719 | while (!list_empty(&reloc_roots)) { | 4053 | while (!list_empty(&reloc_roots)) { |
| 3720 | reloc_root = list_entry(reloc_roots.next, | 4054 | reloc_root = list_entry(reloc_roots.next, |
| 3721 | struct btrfs_root, root_list); | 4055 | struct btrfs_root, root_list); |
| @@ -3735,20 +4069,16 @@ int btrfs_recover_relocation(struct btrfs_root *root) | |||
| 3735 | fs_root->reloc_root = reloc_root; | 4069 | fs_root->reloc_root = reloc_root; |
| 3736 | } | 4070 | } |
| 3737 | 4071 | ||
| 3738 | trans = btrfs_start_transaction(rc->extent_root, 1); | ||
| 3739 | btrfs_commit_transaction(trans, rc->extent_root); | 4072 | btrfs_commit_transaction(trans, rc->extent_root); |
| 3740 | 4073 | ||
| 3741 | merge_reloc_roots(rc); | 4074 | merge_reloc_roots(rc); |
| 3742 | 4075 | ||
| 3743 | unset_reloc_control(rc); | 4076 | unset_reloc_control(rc); |
| 3744 | 4077 | ||
| 3745 | trans = btrfs_start_transaction(rc->extent_root, 1); | 4078 | trans = btrfs_join_transaction(rc->extent_root, 1); |
| 3746 | btrfs_commit_transaction(trans, rc->extent_root); | 4079 | btrfs_commit_transaction(trans, rc->extent_root); |
| 3747 | out: | 4080 | out: |
| 3748 | if (rc) { | 4081 | kfree(rc); |
| 3749 | btrfs_stop_workers(&rc->workers); | ||
| 3750 | kfree(rc); | ||
| 3751 | } | ||
| 3752 | while (!list_empty(&reloc_roots)) { | 4082 | while (!list_empty(&reloc_roots)) { |
| 3753 | reloc_root = list_entry(reloc_roots.next, | 4083 | reloc_root = list_entry(reloc_roots.next, |
| 3754 | struct btrfs_root, root_list); | 4084 | struct btrfs_root, root_list); |
| @@ -3814,3 +4144,130 @@ int btrfs_reloc_clone_csums(struct inode *inode, u64 file_pos, u64 len) | |||
| 3814 | btrfs_put_ordered_extent(ordered); | 4144 | btrfs_put_ordered_extent(ordered); |
| 3815 | return 0; | 4145 | return 0; |
| 3816 | } | 4146 | } |
| 4147 | |||
| 4148 | void btrfs_reloc_cow_block(struct btrfs_trans_handle *trans, | ||
| 4149 | struct btrfs_root *root, struct extent_buffer *buf, | ||
| 4150 | struct extent_buffer *cow) | ||
| 4151 | { | ||
| 4152 | struct reloc_control *rc; | ||
| 4153 | struct backref_node *node; | ||
| 4154 | int first_cow = 0; | ||
| 4155 | int level; | ||
| 4156 | int ret; | ||
| 4157 | |||
| 4158 | rc = root->fs_info->reloc_ctl; | ||
| 4159 | if (!rc) | ||
| 4160 | return; | ||
| 4161 | |||
| 4162 | BUG_ON(rc->stage == UPDATE_DATA_PTRS && | ||
| 4163 | root->root_key.objectid == BTRFS_DATA_RELOC_TREE_OBJECTID); | ||
| 4164 | |||
| 4165 | level = btrfs_header_level(buf); | ||
| 4166 | if (btrfs_header_generation(buf) <= | ||
| 4167 | btrfs_root_last_snapshot(&root->root_item)) | ||
| 4168 | first_cow = 1; | ||
| 4169 | |||
| 4170 | if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID && | ||
| 4171 | rc->create_reloc_tree) { | ||
| 4172 | WARN_ON(!first_cow && level == 0); | ||
| 4173 | |||
| 4174 | node = rc->backref_cache.path[level]; | ||
| 4175 | BUG_ON(node->bytenr != buf->start && | ||
| 4176 | node->new_bytenr != buf->start); | ||
| 4177 | |||
| 4178 | drop_node_buffer(node); | ||
| 4179 | extent_buffer_get(cow); | ||
| 4180 | node->eb = cow; | ||
| 4181 | node->new_bytenr = cow->start; | ||
| 4182 | |||
| 4183 | if (!node->pending) { | ||
| 4184 | list_move_tail(&node->list, | ||
| 4185 | &rc->backref_cache.pending[level]); | ||
| 4186 | node->pending = 1; | ||
| 4187 | } | ||
| 4188 | |||
| 4189 | if (first_cow) | ||
| 4190 | __mark_block_processed(rc, node); | ||
| 4191 | |||
| 4192 | if (first_cow && level > 0) | ||
| 4193 | rc->nodes_relocated += buf->len; | ||
| 4194 | } | ||
| 4195 | |||
| 4196 | if (level == 0 && first_cow && rc->stage == UPDATE_DATA_PTRS) { | ||
| 4197 | ret = replace_file_extents(trans, rc, root, cow); | ||
| 4198 | BUG_ON(ret); | ||
| 4199 | } | ||
| 4200 | } | ||
| 4201 | |||
| 4202 | /* | ||
| 4203 | * called before creating snapshot. it calculates metadata reservation | ||
| 4204 | * requried for relocating tree blocks in the snapshot | ||
| 4205 | */ | ||
| 4206 | void btrfs_reloc_pre_snapshot(struct btrfs_trans_handle *trans, | ||
| 4207 | struct btrfs_pending_snapshot *pending, | ||
| 4208 | u64 *bytes_to_reserve) | ||
| 4209 | { | ||
| 4210 | struct btrfs_root *root; | ||
| 4211 | struct reloc_control *rc; | ||
| 4212 | |||
| 4213 | root = pending->root; | ||
| 4214 | if (!root->reloc_root) | ||
| 4215 | return; | ||
| 4216 | |||
| 4217 | rc = root->fs_info->reloc_ctl; | ||
| 4218 | if (!rc->merge_reloc_tree) | ||
| 4219 | return; | ||
| 4220 | |||
| 4221 | root = root->reloc_root; | ||
| 4222 | BUG_ON(btrfs_root_refs(&root->root_item) == 0); | ||
| 4223 | /* | ||
| 4224 | * relocation is in the stage of merging trees. the space | ||
| 4225 | * used by merging a reloc tree is twice the size of | ||
| 4226 | * relocated tree nodes in the worst case. half for cowing | ||
| 4227 | * the reloc tree, half for cowing the fs tree. the space | ||
| 4228 | * used by cowing the reloc tree will be freed after the | ||
| 4229 | * tree is dropped. if we create snapshot, cowing the fs | ||
| 4230 | * tree may use more space than it frees. so we need | ||
| 4231 | * reserve extra space. | ||
| 4232 | */ | ||
| 4233 | *bytes_to_reserve += rc->nodes_relocated; | ||
| 4234 | } | ||
| 4235 | |||
| 4236 | /* | ||
| 4237 | * called after snapshot is created. migrate block reservation | ||
| 4238 | * and create reloc root for the newly created snapshot | ||
| 4239 | */ | ||
| 4240 | void btrfs_reloc_post_snapshot(struct btrfs_trans_handle *trans, | ||
| 4241 | struct btrfs_pending_snapshot *pending) | ||
| 4242 | { | ||
| 4243 | struct btrfs_root *root = pending->root; | ||
| 4244 | struct btrfs_root *reloc_root; | ||
| 4245 | struct btrfs_root *new_root; | ||
| 4246 | struct reloc_control *rc; | ||
| 4247 | int ret; | ||
| 4248 | |||
| 4249 | if (!root->reloc_root) | ||
| 4250 | return; | ||
| 4251 | |||
| 4252 | rc = root->fs_info->reloc_ctl; | ||
| 4253 | rc->merging_rsv_size += rc->nodes_relocated; | ||
| 4254 | |||
| 4255 | if (rc->merge_reloc_tree) { | ||
| 4256 | ret = btrfs_block_rsv_migrate(&pending->block_rsv, | ||
| 4257 | rc->block_rsv, | ||
| 4258 | rc->nodes_relocated); | ||
| 4259 | BUG_ON(ret); | ||
| 4260 | } | ||
| 4261 | |||
| 4262 | new_root = pending->snap; | ||
| 4263 | reloc_root = create_reloc_root(trans, root->reloc_root, | ||
| 4264 | new_root->root_key.objectid); | ||
| 4265 | |||
| 4266 | __add_reloc_root(reloc_root); | ||
| 4267 | new_root->reloc_root = reloc_root; | ||
| 4268 | |||
| 4269 | if (rc->create_reloc_tree) { | ||
| 4270 | ret = clone_backref_node(trans, rc, root, reloc_root); | ||
| 4271 | BUG_ON(ret); | ||
| 4272 | } | ||
| 4273 | } | ||
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c index 67fa2d29d663..b91ccd972644 100644 --- a/fs/btrfs/root-tree.c +++ b/fs/btrfs/root-tree.c | |||
| @@ -259,6 +259,8 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
| 259 | struct extent_buffer *leaf; | 259 | struct extent_buffer *leaf; |
| 260 | struct btrfs_path *path; | 260 | struct btrfs_path *path; |
| 261 | struct btrfs_key key; | 261 | struct btrfs_key key; |
| 262 | struct btrfs_key root_key; | ||
| 263 | struct btrfs_root *root; | ||
| 262 | int err = 0; | 264 | int err = 0; |
| 263 | int ret; | 265 | int ret; |
| 264 | 266 | ||
| @@ -270,6 +272,9 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
| 270 | key.type = BTRFS_ORPHAN_ITEM_KEY; | 272 | key.type = BTRFS_ORPHAN_ITEM_KEY; |
| 271 | key.offset = 0; | 273 | key.offset = 0; |
| 272 | 274 | ||
| 275 | root_key.type = BTRFS_ROOT_ITEM_KEY; | ||
| 276 | root_key.offset = (u64)-1; | ||
| 277 | |||
| 273 | while (1) { | 278 | while (1) { |
| 274 | ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); | 279 | ret = btrfs_search_slot(NULL, tree_root, &key, path, 0, 0); |
| 275 | if (ret < 0) { | 280 | if (ret < 0) { |
| @@ -294,13 +299,25 @@ int btrfs_find_orphan_roots(struct btrfs_root *tree_root) | |||
| 294 | key.type != BTRFS_ORPHAN_ITEM_KEY) | 299 | key.type != BTRFS_ORPHAN_ITEM_KEY) |
| 295 | break; | 300 | break; |
| 296 | 301 | ||
| 297 | ret = btrfs_find_dead_roots(tree_root, key.offset); | 302 | root_key.objectid = key.offset; |
| 298 | if (ret) { | 303 | key.offset++; |
| 304 | |||
| 305 | root = btrfs_read_fs_root_no_name(tree_root->fs_info, | ||
| 306 | &root_key); | ||
| 307 | if (!IS_ERR(root)) | ||
| 308 | continue; | ||
| 309 | |||
| 310 | ret = PTR_ERR(root); | ||
| 311 | if (ret != -ENOENT) { | ||
| 299 | err = ret; | 312 | err = ret; |
| 300 | break; | 313 | break; |
| 301 | } | 314 | } |
| 302 | 315 | ||
| 303 | key.offset++; | 316 | ret = btrfs_find_dead_roots(tree_root, root_key.objectid); |
| 317 | if (ret) { | ||
| 318 | err = ret; | ||
| 319 | break; | ||
| 320 | } | ||
| 304 | } | 321 | } |
| 305 | 322 | ||
| 306 | btrfs_free_path(path); | 323 | btrfs_free_path(path); |
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 2909a03e5230..d34b2dfc9628 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c | |||
| @@ -498,7 +498,7 @@ int btrfs_sync_fs(struct super_block *sb, int wait) | |||
| 498 | btrfs_start_delalloc_inodes(root, 0); | 498 | btrfs_start_delalloc_inodes(root, 0); |
| 499 | btrfs_wait_ordered_extents(root, 0, 0); | 499 | btrfs_wait_ordered_extents(root, 0, 0); |
| 500 | 500 | ||
| 501 | trans = btrfs_start_transaction(root, 1); | 501 | trans = btrfs_start_transaction(root, 0); |
| 502 | ret = btrfs_commit_transaction(trans, root); | 502 | ret = btrfs_commit_transaction(trans, root); |
| 503 | return ret; | 503 | return ret; |
| 504 | } | 504 | } |
| @@ -694,11 +694,11 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) | |||
| 694 | if (btrfs_super_log_root(&root->fs_info->super_copy) != 0) | 694 | if (btrfs_super_log_root(&root->fs_info->super_copy) != 0) |
| 695 | return -EINVAL; | 695 | return -EINVAL; |
| 696 | 696 | ||
| 697 | /* recover relocation */ | 697 | ret = btrfs_cleanup_fs_roots(root->fs_info); |
| 698 | ret = btrfs_recover_relocation(root); | ||
| 699 | WARN_ON(ret); | 698 | WARN_ON(ret); |
| 700 | 699 | ||
| 701 | ret = btrfs_cleanup_fs_roots(root->fs_info); | 700 | /* recover relocation */ |
| 701 | ret = btrfs_recover_relocation(root); | ||
| 702 | WARN_ON(ret); | 702 | WARN_ON(ret); |
| 703 | 703 | ||
| 704 | sb->s_flags &= ~MS_RDONLY; | 704 | sb->s_flags &= ~MS_RDONLY; |
| @@ -714,34 +714,18 @@ static int btrfs_statfs(struct dentry *dentry, struct kstatfs *buf) | |||
| 714 | struct list_head *head = &root->fs_info->space_info; | 714 | struct list_head *head = &root->fs_info->space_info; |
| 715 | struct btrfs_space_info *found; | 715 | struct btrfs_space_info *found; |
| 716 | u64 total_used = 0; | 716 | u64 total_used = 0; |
| 717 | u64 data_used = 0; | ||
| 718 | int bits = dentry->d_sb->s_blocksize_bits; | 717 | int bits = dentry->d_sb->s_blocksize_bits; |
| 719 | __be32 *fsid = (__be32 *)root->fs_info->fsid; | 718 | __be32 *fsid = (__be32 *)root->fs_info->fsid; |
| 720 | 719 | ||
| 721 | rcu_read_lock(); | 720 | rcu_read_lock(); |
| 722 | list_for_each_entry_rcu(found, head, list) { | 721 | list_for_each_entry_rcu(found, head, list) |
| 723 | if (found->flags & (BTRFS_BLOCK_GROUP_DUP| | 722 | total_used += found->disk_used; |
| 724 | BTRFS_BLOCK_GROUP_RAID10| | ||
| 725 | BTRFS_BLOCK_GROUP_RAID1)) { | ||
| 726 | total_used += found->bytes_used; | ||
| 727 | if (found->flags & BTRFS_BLOCK_GROUP_DATA) | ||
| 728 | data_used += found->bytes_used; | ||
| 729 | else | ||
| 730 | data_used += found->total_bytes; | ||
| 731 | } | ||
| 732 | |||
| 733 | total_used += found->bytes_used; | ||
| 734 | if (found->flags & BTRFS_BLOCK_GROUP_DATA) | ||
| 735 | data_used += found->bytes_used; | ||
| 736 | else | ||
| 737 | data_used += found->total_bytes; | ||
| 738 | } | ||
| 739 | rcu_read_unlock(); | 723 | rcu_read_unlock(); |
| 740 | 724 | ||
| 741 | buf->f_namelen = BTRFS_NAME_LEN; | 725 | buf->f_namelen = BTRFS_NAME_LEN; |
| 742 | buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; | 726 | buf->f_blocks = btrfs_super_total_bytes(disk_super) >> bits; |
| 743 | buf->f_bfree = buf->f_blocks - (total_used >> bits); | 727 | buf->f_bfree = buf->f_blocks - (total_used >> bits); |
| 744 | buf->f_bavail = buf->f_blocks - (data_used >> bits); | 728 | buf->f_bavail = buf->f_bfree; |
| 745 | buf->f_bsize = dentry->d_sb->s_blocksize; | 729 | buf->f_bsize = dentry->d_sb->s_blocksize; |
| 746 | buf->f_type = BTRFS_SUPER_MAGIC; | 730 | buf->f_type = BTRFS_SUPER_MAGIC; |
| 747 | 731 | ||
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 2cb116099b90..66e4c66cc63b 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c | |||
| @@ -165,54 +165,89 @@ enum btrfs_trans_type { | |||
| 165 | TRANS_USERSPACE, | 165 | TRANS_USERSPACE, |
| 166 | }; | 166 | }; |
| 167 | 167 | ||
| 168 | static int may_wait_transaction(struct btrfs_root *root, int type) | ||
| 169 | { | ||
| 170 | if (!root->fs_info->log_root_recovering && | ||
| 171 | ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || | ||
| 172 | type == TRANS_USERSPACE)) | ||
| 173 | return 1; | ||
| 174 | return 0; | ||
| 175 | } | ||
| 176 | |||
| 168 | static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, | 177 | static struct btrfs_trans_handle *start_transaction(struct btrfs_root *root, |
| 169 | int num_blocks, int type) | 178 | u64 num_items, int type) |
| 170 | { | 179 | { |
| 171 | struct btrfs_trans_handle *h = | 180 | struct btrfs_trans_handle *h; |
| 172 | kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); | 181 | struct btrfs_transaction *cur_trans; |
| 182 | int retries = 0; | ||
| 173 | int ret; | 183 | int ret; |
| 184 | again: | ||
| 185 | h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); | ||
| 186 | if (!h) | ||
| 187 | return ERR_PTR(-ENOMEM); | ||
| 174 | 188 | ||
| 175 | mutex_lock(&root->fs_info->trans_mutex); | 189 | mutex_lock(&root->fs_info->trans_mutex); |
| 176 | if (!root->fs_info->log_root_recovering && | 190 | if (may_wait_transaction(root, type)) |
| 177 | ((type == TRANS_START && !root->fs_info->open_ioctl_trans) || | ||
| 178 | type == TRANS_USERSPACE)) | ||
| 179 | wait_current_trans(root); | 191 | wait_current_trans(root); |
| 192 | |||
| 180 | ret = join_transaction(root); | 193 | ret = join_transaction(root); |
| 181 | BUG_ON(ret); | 194 | BUG_ON(ret); |
| 182 | 195 | ||
| 183 | h->transid = root->fs_info->running_transaction->transid; | 196 | cur_trans = root->fs_info->running_transaction; |
| 184 | h->transaction = root->fs_info->running_transaction; | 197 | cur_trans->use_count++; |
| 185 | h->blocks_reserved = num_blocks; | 198 | mutex_unlock(&root->fs_info->trans_mutex); |
| 199 | |||
| 200 | h->transid = cur_trans->transid; | ||
| 201 | h->transaction = cur_trans; | ||
| 186 | h->blocks_used = 0; | 202 | h->blocks_used = 0; |
| 187 | h->block_group = 0; | 203 | h->block_group = 0; |
| 188 | h->alloc_exclude_nr = 0; | 204 | h->bytes_reserved = 0; |
| 189 | h->alloc_exclude_start = 0; | ||
| 190 | h->delayed_ref_updates = 0; | 205 | h->delayed_ref_updates = 0; |
| 206 | h->block_rsv = NULL; | ||
| 191 | 207 | ||
| 192 | if (!current->journal_info && type != TRANS_USERSPACE) | 208 | smp_mb(); |
| 193 | current->journal_info = h; | 209 | if (cur_trans->blocked && may_wait_transaction(root, type)) { |
| 210 | btrfs_commit_transaction(h, root); | ||
| 211 | goto again; | ||
| 212 | } | ||
| 213 | |||
| 214 | if (num_items > 0) { | ||
| 215 | ret = btrfs_trans_reserve_metadata(h, root, num_items, | ||
| 216 | &retries); | ||
| 217 | if (ret == -EAGAIN) { | ||
| 218 | btrfs_commit_transaction(h, root); | ||
| 219 | goto again; | ||
| 220 | } | ||
| 221 | if (ret < 0) { | ||
| 222 | btrfs_end_transaction(h, root); | ||
| 223 | return ERR_PTR(ret); | ||
| 224 | } | ||
| 225 | } | ||
| 194 | 226 | ||
| 195 | root->fs_info->running_transaction->use_count++; | 227 | mutex_lock(&root->fs_info->trans_mutex); |
| 196 | record_root_in_trans(h, root); | 228 | record_root_in_trans(h, root); |
| 197 | mutex_unlock(&root->fs_info->trans_mutex); | 229 | mutex_unlock(&root->fs_info->trans_mutex); |
| 230 | |||
| 231 | if (!current->journal_info && type != TRANS_USERSPACE) | ||
| 232 | current->journal_info = h; | ||
| 198 | return h; | 233 | return h; |
| 199 | } | 234 | } |
| 200 | 235 | ||
| 201 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | 236 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, |
| 202 | int num_blocks) | 237 | int num_items) |
| 203 | { | 238 | { |
| 204 | return start_transaction(root, num_blocks, TRANS_START); | 239 | return start_transaction(root, num_items, TRANS_START); |
| 205 | } | 240 | } |
| 206 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, | 241 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, |
| 207 | int num_blocks) | 242 | int num_blocks) |
| 208 | { | 243 | { |
| 209 | return start_transaction(root, num_blocks, TRANS_JOIN); | 244 | return start_transaction(root, 0, TRANS_JOIN); |
| 210 | } | 245 | } |
| 211 | 246 | ||
| 212 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, | 247 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, |
| 213 | int num_blocks) | 248 | int num_blocks) |
| 214 | { | 249 | { |
| 215 | return start_transaction(r, num_blocks, TRANS_USERSPACE); | 250 | return start_transaction(r, 0, TRANS_USERSPACE); |
| 216 | } | 251 | } |
| 217 | 252 | ||
| 218 | /* wait for a transaction commit to be fully complete */ | 253 | /* wait for a transaction commit to be fully complete */ |
| @@ -286,10 +321,36 @@ void btrfs_throttle(struct btrfs_root *root) | |||
| 286 | mutex_unlock(&root->fs_info->trans_mutex); | 321 | mutex_unlock(&root->fs_info->trans_mutex); |
| 287 | } | 322 | } |
| 288 | 323 | ||
| 324 | static int should_end_transaction(struct btrfs_trans_handle *trans, | ||
| 325 | struct btrfs_root *root) | ||
| 326 | { | ||
| 327 | int ret; | ||
| 328 | ret = btrfs_block_rsv_check(trans, root, | ||
| 329 | &root->fs_info->global_block_rsv, 0, 5); | ||
| 330 | return ret ? 1 : 0; | ||
| 331 | } | ||
| 332 | |||
| 333 | int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | ||
| 334 | struct btrfs_root *root) | ||
| 335 | { | ||
| 336 | struct btrfs_transaction *cur_trans = trans->transaction; | ||
| 337 | int updates; | ||
| 338 | |||
| 339 | if (cur_trans->blocked || cur_trans->delayed_refs.flushing) | ||
| 340 | return 1; | ||
| 341 | |||
| 342 | updates = trans->delayed_ref_updates; | ||
| 343 | trans->delayed_ref_updates = 0; | ||
| 344 | if (updates) | ||
| 345 | btrfs_run_delayed_refs(trans, root, updates); | ||
| 346 | |||
| 347 | return should_end_transaction(trans, root); | ||
| 348 | } | ||
| 349 | |||
| 289 | static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | 350 | static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, |
| 290 | struct btrfs_root *root, int throttle) | 351 | struct btrfs_root *root, int throttle) |
| 291 | { | 352 | { |
| 292 | struct btrfs_transaction *cur_trans; | 353 | struct btrfs_transaction *cur_trans = trans->transaction; |
| 293 | struct btrfs_fs_info *info = root->fs_info; | 354 | struct btrfs_fs_info *info = root->fs_info; |
| 294 | int count = 0; | 355 | int count = 0; |
| 295 | 356 | ||
| @@ -313,9 +374,21 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, | |||
| 313 | count++; | 374 | count++; |
| 314 | } | 375 | } |
| 315 | 376 | ||
| 377 | btrfs_trans_release_metadata(trans, root); | ||
| 378 | |||
| 379 | if (!root->fs_info->open_ioctl_trans && | ||
| 380 | should_end_transaction(trans, root)) | ||
| 381 | trans->transaction->blocked = 1; | ||
| 382 | |||
| 383 | if (cur_trans->blocked && !cur_trans->in_commit) { | ||
| 384 | if (throttle) | ||
| 385 | return btrfs_commit_transaction(trans, root); | ||
| 386 | else | ||
| 387 | wake_up_process(info->transaction_kthread); | ||
| 388 | } | ||
| 389 | |||
| 316 | mutex_lock(&info->trans_mutex); | 390 | mutex_lock(&info->trans_mutex); |
| 317 | cur_trans = info->running_transaction; | 391 | WARN_ON(cur_trans != info->running_transaction); |
| 318 | WARN_ON(cur_trans != trans->transaction); | ||
| 319 | WARN_ON(cur_trans->num_writers < 1); | 392 | WARN_ON(cur_trans->num_writers < 1); |
| 320 | cur_trans->num_writers--; | 393 | cur_trans->num_writers--; |
| 321 | 394 | ||
| @@ -603,6 +676,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
| 603 | 676 | ||
| 604 | btrfs_free_log(trans, root); | 677 | btrfs_free_log(trans, root); |
| 605 | btrfs_update_reloc_root(trans, root); | 678 | btrfs_update_reloc_root(trans, root); |
| 679 | btrfs_orphan_commit_root(trans, root); | ||
| 606 | 680 | ||
| 607 | if (root->commit_root != root->node) { | 681 | if (root->commit_root != root->node) { |
| 608 | switch_commit_root(root); | 682 | switch_commit_root(root); |
| @@ -627,30 +701,30 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans, | |||
| 627 | int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) | 701 | int btrfs_defrag_root(struct btrfs_root *root, int cacheonly) |
| 628 | { | 702 | { |
| 629 | struct btrfs_fs_info *info = root->fs_info; | 703 | struct btrfs_fs_info *info = root->fs_info; |
| 630 | int ret; | ||
| 631 | struct btrfs_trans_handle *trans; | 704 | struct btrfs_trans_handle *trans; |
| 705 | int ret; | ||
| 632 | unsigned long nr; | 706 | unsigned long nr; |
| 633 | 707 | ||
| 634 | smp_mb(); | 708 | if (xchg(&root->defrag_running, 1)) |
| 635 | if (root->defrag_running) | ||
| 636 | return 0; | 709 | return 0; |
| 637 | trans = btrfs_start_transaction(root, 1); | 710 | |
| 638 | while (1) { | 711 | while (1) { |
| 639 | root->defrag_running = 1; | 712 | trans = btrfs_start_transaction(root, 0); |
| 713 | if (IS_ERR(trans)) | ||
| 714 | return PTR_ERR(trans); | ||
| 715 | |||
| 640 | ret = btrfs_defrag_leaves(trans, root, cacheonly); | 716 | ret = btrfs_defrag_leaves(trans, root, cacheonly); |
| 717 | |||
| 641 | nr = trans->blocks_used; | 718 | nr = trans->blocks_used; |
| 642 | btrfs_end_transaction(trans, root); | 719 | btrfs_end_transaction(trans, root); |
| 643 | btrfs_btree_balance_dirty(info->tree_root, nr); | 720 | btrfs_btree_balance_dirty(info->tree_root, nr); |
| 644 | cond_resched(); | 721 | cond_resched(); |
| 645 | 722 | ||
| 646 | trans = btrfs_start_transaction(root, 1); | ||
| 647 | if (root->fs_info->closing || ret != -EAGAIN) | 723 | if (root->fs_info->closing || ret != -EAGAIN) |
| 648 | break; | 724 | break; |
| 649 | } | 725 | } |
| 650 | root->defrag_running = 0; | 726 | root->defrag_running = 0; |
| 651 | smp_mb(); | 727 | return ret; |
| 652 | btrfs_end_transaction(trans, root); | ||
| 653 | return 0; | ||
| 654 | } | 728 | } |
| 655 | 729 | ||
| 656 | #if 0 | 730 | #if 0 |
| @@ -758,47 +832,63 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 758 | struct btrfs_root *root = pending->root; | 832 | struct btrfs_root *root = pending->root; |
| 759 | struct btrfs_root *parent_root; | 833 | struct btrfs_root *parent_root; |
| 760 | struct inode *parent_inode; | 834 | struct inode *parent_inode; |
| 835 | struct dentry *dentry; | ||
| 761 | struct extent_buffer *tmp; | 836 | struct extent_buffer *tmp; |
| 762 | struct extent_buffer *old; | 837 | struct extent_buffer *old; |
| 763 | int ret; | 838 | int ret; |
| 764 | u64 objectid; | 839 | int retries = 0; |
| 765 | int namelen; | 840 | u64 to_reserve = 0; |
| 766 | u64 index = 0; | 841 | u64 index = 0; |
| 767 | 842 | u64 objectid; | |
| 768 | parent_inode = pending->dentry->d_parent->d_inode; | ||
| 769 | parent_root = BTRFS_I(parent_inode)->root; | ||
| 770 | 843 | ||
| 771 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); | 844 | new_root_item = kmalloc(sizeof(*new_root_item), GFP_NOFS); |
| 772 | if (!new_root_item) { | 845 | if (!new_root_item) { |
| 773 | ret = -ENOMEM; | 846 | pending->error = -ENOMEM; |
| 774 | goto fail; | 847 | goto fail; |
| 775 | } | 848 | } |
| 849 | |||
| 776 | ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); | 850 | ret = btrfs_find_free_objectid(trans, tree_root, 0, &objectid); |
| 777 | if (ret) | 851 | if (ret) { |
| 852 | pending->error = ret; | ||
| 778 | goto fail; | 853 | goto fail; |
| 854 | } | ||
| 855 | |||
| 856 | btrfs_reloc_pre_snapshot(trans, pending, &to_reserve); | ||
| 857 | btrfs_orphan_pre_snapshot(trans, pending, &to_reserve); | ||
| 858 | |||
| 859 | if (to_reserve > 0) { | ||
| 860 | ret = btrfs_block_rsv_add(trans, root, &pending->block_rsv, | ||
| 861 | to_reserve, &retries); | ||
| 862 | if (ret) { | ||
| 863 | pending->error = ret; | ||
| 864 | goto fail; | ||
| 865 | } | ||
| 866 | } | ||
| 779 | 867 | ||
| 780 | key.objectid = objectid; | 868 | key.objectid = objectid; |
| 781 | /* record when the snapshot was created in key.offset */ | 869 | key.offset = (u64)-1; |
| 782 | key.offset = trans->transid; | 870 | key.type = BTRFS_ROOT_ITEM_KEY; |
| 783 | btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); | ||
| 784 | 871 | ||
| 785 | memcpy(&pending->root_key, &key, sizeof(key)); | 872 | trans->block_rsv = &pending->block_rsv; |
| 786 | pending->root_key.offset = (u64)-1; | ||
| 787 | 873 | ||
| 874 | dentry = pending->dentry; | ||
| 875 | parent_inode = dentry->d_parent->d_inode; | ||
| 876 | parent_root = BTRFS_I(parent_inode)->root; | ||
| 788 | record_root_in_trans(trans, parent_root); | 877 | record_root_in_trans(trans, parent_root); |
| 878 | |||
| 789 | /* | 879 | /* |
| 790 | * insert the directory item | 880 | * insert the directory item |
| 791 | */ | 881 | */ |
| 792 | namelen = strlen(pending->name); | ||
| 793 | ret = btrfs_set_inode_index(parent_inode, &index); | 882 | ret = btrfs_set_inode_index(parent_inode, &index); |
| 794 | BUG_ON(ret); | 883 | BUG_ON(ret); |
| 795 | ret = btrfs_insert_dir_item(trans, parent_root, | 884 | ret = btrfs_insert_dir_item(trans, parent_root, |
| 796 | pending->name, namelen, | 885 | dentry->d_name.name, dentry->d_name.len, |
| 797 | parent_inode->i_ino, | 886 | parent_inode->i_ino, &key, |
| 798 | &pending->root_key, BTRFS_FT_DIR, index); | 887 | BTRFS_FT_DIR, index); |
| 799 | BUG_ON(ret); | 888 | BUG_ON(ret); |
| 800 | 889 | ||
| 801 | btrfs_i_size_write(parent_inode, parent_inode->i_size + namelen * 2); | 890 | btrfs_i_size_write(parent_inode, parent_inode->i_size + |
| 891 | dentry->d_name.len * 2); | ||
| 802 | ret = btrfs_update_inode(trans, parent_root, parent_inode); | 892 | ret = btrfs_update_inode(trans, parent_root, parent_inode); |
| 803 | BUG_ON(ret); | 893 | BUG_ON(ret); |
| 804 | 894 | ||
| @@ -815,22 +905,32 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans, | |||
| 815 | free_extent_buffer(old); | 905 | free_extent_buffer(old); |
| 816 | 906 | ||
| 817 | btrfs_set_root_node(new_root_item, tmp); | 907 | btrfs_set_root_node(new_root_item, tmp); |
| 818 | ret = btrfs_insert_root(trans, root->fs_info->tree_root, &key, | 908 | /* record when the snapshot was created in key.offset */ |
| 819 | new_root_item); | 909 | key.offset = trans->transid; |
| 820 | BUG_ON(ret); | 910 | ret = btrfs_insert_root(trans, tree_root, &key, new_root_item); |
| 821 | btrfs_tree_unlock(tmp); | 911 | btrfs_tree_unlock(tmp); |
| 822 | free_extent_buffer(tmp); | 912 | free_extent_buffer(tmp); |
| 913 | BUG_ON(ret); | ||
| 823 | 914 | ||
| 824 | ret = btrfs_add_root_ref(trans, parent_root->fs_info->tree_root, | 915 | /* |
| 825 | pending->root_key.objectid, | 916 | * insert root back/forward references |
| 917 | */ | ||
| 918 | ret = btrfs_add_root_ref(trans, tree_root, objectid, | ||
| 826 | parent_root->root_key.objectid, | 919 | parent_root->root_key.objectid, |
| 827 | parent_inode->i_ino, index, pending->name, | 920 | parent_inode->i_ino, index, |
| 828 | namelen); | 921 | dentry->d_name.name, dentry->d_name.len); |
| 829 | BUG_ON(ret); | 922 | BUG_ON(ret); |
| 830 | 923 | ||
| 924 | key.offset = (u64)-1; | ||
| 925 | pending->snap = btrfs_read_fs_root_no_name(root->fs_info, &key); | ||
| 926 | BUG_ON(IS_ERR(pending->snap)); | ||
| 927 | |||
| 928 | btrfs_reloc_post_snapshot(trans, pending); | ||
| 929 | btrfs_orphan_post_snapshot(trans, pending); | ||
| 831 | fail: | 930 | fail: |
| 832 | kfree(new_root_item); | 931 | kfree(new_root_item); |
| 833 | return ret; | 932 | btrfs_block_rsv_release(root, &pending->block_rsv, (u64)-1); |
| 933 | return 0; | ||
| 834 | } | 934 | } |
| 835 | 935 | ||
| 836 | /* | 936 | /* |
| @@ -878,6 +978,16 @@ int btrfs_transaction_in_commit(struct btrfs_fs_info *info) | |||
| 878 | return ret; | 978 | return ret; |
| 879 | } | 979 | } |
| 880 | 980 | ||
| 981 | int btrfs_transaction_blocked(struct btrfs_fs_info *info) | ||
| 982 | { | ||
| 983 | int ret = 0; | ||
| 984 | spin_lock(&info->new_trans_lock); | ||
| 985 | if (info->running_transaction) | ||
| 986 | ret = info->running_transaction->blocked; | ||
| 987 | spin_unlock(&info->new_trans_lock); | ||
| 988 | return ret; | ||
| 989 | } | ||
| 990 | |||
| 881 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | 991 | int btrfs_commit_transaction(struct btrfs_trans_handle *trans, |
| 882 | struct btrfs_root *root) | 992 | struct btrfs_root *root) |
| 883 | { | 993 | { |
| @@ -899,6 +1009,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 899 | ret = btrfs_run_delayed_refs(trans, root, 0); | 1009 | ret = btrfs_run_delayed_refs(trans, root, 0); |
| 900 | BUG_ON(ret); | 1010 | BUG_ON(ret); |
| 901 | 1011 | ||
| 1012 | btrfs_trans_release_metadata(trans, root); | ||
| 1013 | |||
| 902 | cur_trans = trans->transaction; | 1014 | cur_trans = trans->transaction; |
| 903 | /* | 1015 | /* |
| 904 | * set the flushing flag so procs in this transaction have to | 1016 | * set the flushing flag so procs in this transaction have to |
| @@ -951,9 +1063,6 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 951 | snap_pending = 1; | 1063 | snap_pending = 1; |
| 952 | 1064 | ||
| 953 | WARN_ON(cur_trans != trans->transaction); | 1065 | WARN_ON(cur_trans != trans->transaction); |
| 954 | prepare_to_wait(&cur_trans->writer_wait, &wait, | ||
| 955 | TASK_UNINTERRUPTIBLE); | ||
| 956 | |||
| 957 | if (cur_trans->num_writers > 1) | 1066 | if (cur_trans->num_writers > 1) |
| 958 | timeout = MAX_SCHEDULE_TIMEOUT; | 1067 | timeout = MAX_SCHEDULE_TIMEOUT; |
| 959 | else if (should_grow) | 1068 | else if (should_grow) |
| @@ -976,6 +1085,9 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 976 | */ | 1085 | */ |
| 977 | btrfs_run_ordered_operations(root, 1); | 1086 | btrfs_run_ordered_operations(root, 1); |
| 978 | 1087 | ||
| 1088 | prepare_to_wait(&cur_trans->writer_wait, &wait, | ||
| 1089 | TASK_UNINTERRUPTIBLE); | ||
| 1090 | |||
| 979 | smp_mb(); | 1091 | smp_mb(); |
| 980 | if (cur_trans->num_writers > 1 || should_grow) | 1092 | if (cur_trans->num_writers > 1 || should_grow) |
| 981 | schedule_timeout(timeout); | 1093 | schedule_timeout(timeout); |
| @@ -1103,9 +1215,9 @@ int btrfs_clean_old_snapshots(struct btrfs_root *root) | |||
| 1103 | 1215 | ||
| 1104 | if (btrfs_header_backref_rev(root->node) < | 1216 | if (btrfs_header_backref_rev(root->node) < |
| 1105 | BTRFS_MIXED_BACKREF_REV) | 1217 | BTRFS_MIXED_BACKREF_REV) |
| 1106 | btrfs_drop_snapshot(root, 0); | 1218 | btrfs_drop_snapshot(root, NULL, 0); |
| 1107 | else | 1219 | else |
| 1108 | btrfs_drop_snapshot(root, 1); | 1220 | btrfs_drop_snapshot(root, NULL, 1); |
| 1109 | } | 1221 | } |
| 1110 | return 0; | 1222 | return 0; |
| 1111 | } | 1223 | } |
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index 93c7ccb33118..e104986d0bfd 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h | |||
| @@ -45,20 +45,23 @@ struct btrfs_transaction { | |||
| 45 | 45 | ||
| 46 | struct btrfs_trans_handle { | 46 | struct btrfs_trans_handle { |
| 47 | u64 transid; | 47 | u64 transid; |
| 48 | u64 block_group; | ||
| 49 | u64 bytes_reserved; | ||
| 48 | unsigned long blocks_reserved; | 50 | unsigned long blocks_reserved; |
| 49 | unsigned long blocks_used; | 51 | unsigned long blocks_used; |
| 50 | struct btrfs_transaction *transaction; | ||
| 51 | u64 block_group; | ||
| 52 | u64 alloc_exclude_start; | ||
| 53 | u64 alloc_exclude_nr; | ||
| 54 | unsigned long delayed_ref_updates; | 52 | unsigned long delayed_ref_updates; |
| 53 | struct btrfs_transaction *transaction; | ||
| 54 | struct btrfs_block_rsv *block_rsv; | ||
| 55 | }; | 55 | }; |
| 56 | 56 | ||
| 57 | struct btrfs_pending_snapshot { | 57 | struct btrfs_pending_snapshot { |
| 58 | struct dentry *dentry; | 58 | struct dentry *dentry; |
| 59 | struct btrfs_root *root; | 59 | struct btrfs_root *root; |
| 60 | char *name; | 60 | struct btrfs_root *snap; |
| 61 | struct btrfs_key root_key; | 61 | /* block reservation for the operation */ |
| 62 | struct btrfs_block_rsv block_rsv; | ||
| 63 | /* extra metadata reseration for relocation */ | ||
| 64 | int error; | ||
| 62 | struct list_head list; | 65 | struct list_head list; |
| 63 | }; | 66 | }; |
| 64 | 67 | ||
| @@ -85,11 +88,11 @@ static inline void btrfs_set_inode_last_trans(struct btrfs_trans_handle *trans, | |||
| 85 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, | 88 | int btrfs_end_transaction(struct btrfs_trans_handle *trans, |
| 86 | struct btrfs_root *root); | 89 | struct btrfs_root *root); |
| 87 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, | 90 | struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, |
| 88 | int num_blocks); | 91 | int num_items); |
| 89 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, | 92 | struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root, |
| 90 | int num_blocks); | 93 | int num_blocks); |
| 91 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, | 94 | struct btrfs_trans_handle *btrfs_start_ioctl_transaction(struct btrfs_root *r, |
| 92 | int num_blocks); | 95 | int num_blocks); |
| 93 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, | 96 | int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans, |
| 94 | struct btrfs_root *root); | 97 | struct btrfs_root *root); |
| 95 | int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, | 98 | int btrfs_commit_tree_roots(struct btrfs_trans_handle *trans, |
| @@ -103,6 +106,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, | |||
| 103 | struct btrfs_root *root); | 106 | struct btrfs_root *root); |
| 104 | int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, | 107 | int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans, |
| 105 | struct btrfs_root *root); | 108 | struct btrfs_root *root); |
| 109 | int btrfs_should_end_transaction(struct btrfs_trans_handle *trans, | ||
| 110 | struct btrfs_root *root); | ||
| 106 | void btrfs_throttle(struct btrfs_root *root); | 111 | void btrfs_throttle(struct btrfs_root *root); |
| 107 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, | 112 | int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans, |
| 108 | struct btrfs_root *root); | 113 | struct btrfs_root *root); |
| @@ -112,5 +117,6 @@ int btrfs_write_marked_extents(struct btrfs_root *root, | |||
| 112 | struct extent_io_tree *dirty_pages, int mark); | 117 | struct extent_io_tree *dirty_pages, int mark); |
| 113 | int btrfs_wait_marked_extents(struct btrfs_root *root, | 118 | int btrfs_wait_marked_extents(struct btrfs_root *root, |
| 114 | struct extent_io_tree *dirty_pages, int mark); | 119 | struct extent_io_tree *dirty_pages, int mark); |
| 120 | int btrfs_transaction_blocked(struct btrfs_fs_info *info); | ||
| 115 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info); | 121 | int btrfs_transaction_in_commit(struct btrfs_fs_info *info); |
| 116 | #endif | 122 | #endif |
diff --git a/fs/btrfs/tree-defrag.c b/fs/btrfs/tree-defrag.c index b10eacdb1620..f7ac8e013ed7 100644 --- a/fs/btrfs/tree-defrag.c +++ b/fs/btrfs/tree-defrag.c | |||
| @@ -117,13 +117,14 @@ int btrfs_defrag_leaves(struct btrfs_trans_handle *trans, | |||
| 117 | path->nodes[1], 0, | 117 | path->nodes[1], 0, |
| 118 | cache_only, &last_ret, | 118 | cache_only, &last_ret, |
| 119 | &root->defrag_progress); | 119 | &root->defrag_progress); |
| 120 | WARN_ON(ret && ret != -EAGAIN); | 120 | if (ret) { |
| 121 | WARN_ON(ret == -EAGAIN); | ||
| 122 | goto out; | ||
| 123 | } | ||
| 121 | if (next_key_ret == 0) { | 124 | if (next_key_ret == 0) { |
| 122 | memcpy(&root->defrag_progress, &key, sizeof(key)); | 125 | memcpy(&root->defrag_progress, &key, sizeof(key)); |
| 123 | ret = -EAGAIN; | 126 | ret = -EAGAIN; |
| 124 | } | 127 | } |
| 125 | |||
| 126 | btrfs_release_path(root, path); | ||
| 127 | out: | 128 | out: |
| 128 | if (path) | 129 | if (path) |
| 129 | btrfs_free_path(path); | 130 | btrfs_free_path(path); |
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index af57dd2b43d4..fb102a9aee9c 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c | |||
| @@ -135,6 +135,7 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
| 135 | struct btrfs_root *root) | 135 | struct btrfs_root *root) |
| 136 | { | 136 | { |
| 137 | int ret; | 137 | int ret; |
| 138 | int err = 0; | ||
| 138 | 139 | ||
| 139 | mutex_lock(&root->log_mutex); | 140 | mutex_lock(&root->log_mutex); |
| 140 | if (root->log_root) { | 141 | if (root->log_root) { |
| @@ -155,17 +156,19 @@ static int start_log_trans(struct btrfs_trans_handle *trans, | |||
| 155 | mutex_lock(&root->fs_info->tree_log_mutex); | 156 | mutex_lock(&root->fs_info->tree_log_mutex); |
| 156 | if (!root->fs_info->log_root_tree) { | 157 | if (!root->fs_info->log_root_tree) { |
| 157 | ret = btrfs_init_log_root_tree(trans, root->fs_info); | 158 | ret = btrfs_init_log_root_tree(trans, root->fs_info); |
| 158 | BUG_ON(ret); | 159 | if (ret) |
| 160 | err = ret; | ||
| 159 | } | 161 | } |
| 160 | if (!root->log_root) { | 162 | if (err == 0 && !root->log_root) { |
| 161 | ret = btrfs_add_log_tree(trans, root); | 163 | ret = btrfs_add_log_tree(trans, root); |
| 162 | BUG_ON(ret); | 164 | if (ret) |
| 165 | err = ret; | ||
| 163 | } | 166 | } |
| 164 | mutex_unlock(&root->fs_info->tree_log_mutex); | 167 | mutex_unlock(&root->fs_info->tree_log_mutex); |
| 165 | root->log_batch++; | 168 | root->log_batch++; |
| 166 | atomic_inc(&root->log_writers); | 169 | atomic_inc(&root->log_writers); |
| 167 | mutex_unlock(&root->log_mutex); | 170 | mutex_unlock(&root->log_mutex); |
| 168 | return 0; | 171 | return err; |
| 169 | } | 172 | } |
| 170 | 173 | ||
| 171 | /* | 174 | /* |
| @@ -376,7 +379,7 @@ insert: | |||
| 376 | BUG_ON(ret); | 379 | BUG_ON(ret); |
| 377 | } | 380 | } |
| 378 | } else if (ret) { | 381 | } else if (ret) { |
| 379 | BUG(); | 382 | return ret; |
| 380 | } | 383 | } |
| 381 | dst_ptr = btrfs_item_ptr_offset(path->nodes[0], | 384 | dst_ptr = btrfs_item_ptr_offset(path->nodes[0], |
| 382 | path->slots[0]); | 385 | path->slots[0]); |
| @@ -1699,9 +1702,9 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
| 1699 | 1702 | ||
| 1700 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); | 1703 | next = btrfs_find_create_tree_block(root, bytenr, blocksize); |
| 1701 | 1704 | ||
| 1702 | wc->process_func(root, next, wc, ptr_gen); | ||
| 1703 | |||
| 1704 | if (*level == 1) { | 1705 | if (*level == 1) { |
| 1706 | wc->process_func(root, next, wc, ptr_gen); | ||
| 1707 | |||
| 1705 | path->slots[*level]++; | 1708 | path->slots[*level]++; |
| 1706 | if (wc->free) { | 1709 | if (wc->free) { |
| 1707 | btrfs_read_buffer(next, ptr_gen); | 1710 | btrfs_read_buffer(next, ptr_gen); |
| @@ -1734,35 +1737,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans, | |||
| 1734 | WARN_ON(*level < 0); | 1737 | WARN_ON(*level < 0); |
| 1735 | WARN_ON(*level >= BTRFS_MAX_LEVEL); | 1738 | WARN_ON(*level >= BTRFS_MAX_LEVEL); |
| 1736 | 1739 | ||
| 1737 | if (path->nodes[*level] == root->node) | 1740 | path->slots[*level] = btrfs_header_nritems(path->nodes[*level]); |
| 1738 | parent = path->nodes[*level]; | ||
| 1739 | else | ||
| 1740 | parent = path->nodes[*level + 1]; | ||
| 1741 | |||
| 1742 | bytenr = path->nodes[*level]->start; | ||
| 1743 | |||
| 1744 | blocksize = btrfs_level_size(root, *level); | ||
| 1745 | root_owner = btrfs_header_owner(parent); | ||
| 1746 | root_gen = btrfs_header_generation(parent); | ||
| 1747 | |||
| 1748 | wc->process_func(root, path->nodes[*level], wc, | ||
| 1749 | btrfs_header_generation(path->nodes[*level])); | ||
| 1750 | |||
| 1751 | if (wc->free) { | ||
| 1752 | next = path->nodes[*level]; | ||
| 1753 | btrfs_tree_lock(next); | ||
| 1754 | clean_tree_block(trans, root, next); | ||
| 1755 | btrfs_set_lock_blocking(next); | ||
| 1756 | btrfs_wait_tree_block_writeback(next); | ||
| 1757 | btrfs_tree_unlock(next); | ||
| 1758 | |||
| 1759 | WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); | ||
| 1760 | ret = btrfs_free_reserved_extent(root, bytenr, blocksize); | ||
| 1761 | BUG_ON(ret); | ||
| 1762 | } | ||
| 1763 | free_extent_buffer(path->nodes[*level]); | ||
| 1764 | path->nodes[*level] = NULL; | ||
| 1765 | *level += 1; | ||
| 1766 | 1741 | ||
| 1767 | cond_resched(); | 1742 | cond_resched(); |
| 1768 | return 0; | 1743 | return 0; |
| @@ -1781,7 +1756,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans, | |||
| 1781 | 1756 | ||
| 1782 | for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { | 1757 | for (i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) { |
| 1783 | slot = path->slots[i]; | 1758 | slot = path->slots[i]; |
| 1784 | if (slot < btrfs_header_nritems(path->nodes[i]) - 1) { | 1759 | if (slot + 1 < btrfs_header_nritems(path->nodes[i])) { |
| 1785 | struct extent_buffer *node; | 1760 | struct extent_buffer *node; |
| 1786 | node = path->nodes[i]; | 1761 | node = path->nodes[i]; |
| 1787 | path->slots[i]++; | 1762 | path->slots[i]++; |
| @@ -2047,7 +2022,6 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2047 | mutex_unlock(&log_root_tree->log_mutex); | 2022 | mutex_unlock(&log_root_tree->log_mutex); |
| 2048 | 2023 | ||
| 2049 | ret = update_log_root(trans, log); | 2024 | ret = update_log_root(trans, log); |
| 2050 | BUG_ON(ret); | ||
| 2051 | 2025 | ||
| 2052 | mutex_lock(&log_root_tree->log_mutex); | 2026 | mutex_lock(&log_root_tree->log_mutex); |
| 2053 | if (atomic_dec_and_test(&log_root_tree->log_writers)) { | 2027 | if (atomic_dec_and_test(&log_root_tree->log_writers)) { |
| @@ -2056,6 +2030,15 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans, | |||
| 2056 | wake_up(&log_root_tree->log_writer_wait); | 2030 | wake_up(&log_root_tree->log_writer_wait); |
| 2057 | } | 2031 | } |
| 2058 | 2032 | ||
| 2033 | if (ret) { | ||
| 2034 | BUG_ON(ret != -ENOSPC); | ||
| 2035 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
| 2036 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | ||
| 2037 | mutex_unlock(&log_root_tree->log_mutex); | ||
| 2038 | ret = -EAGAIN; | ||
| 2039 | goto out; | ||
| 2040 | } | ||
| 2041 | |||
| 2059 | index2 = log_root_tree->log_transid % 2; | 2042 | index2 = log_root_tree->log_transid % 2; |
| 2060 | if (atomic_read(&log_root_tree->log_commit[index2])) { | 2043 | if (atomic_read(&log_root_tree->log_commit[index2])) { |
| 2061 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); | 2044 | btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark); |
| @@ -2129,15 +2112,10 @@ out: | |||
| 2129 | return 0; | 2112 | return 0; |
| 2130 | } | 2113 | } |
| 2131 | 2114 | ||
| 2132 | /* | 2115 | static void free_log_tree(struct btrfs_trans_handle *trans, |
| 2133 | * free all the extents used by the tree log. This should be called | 2116 | struct btrfs_root *log) |
| 2134 | * at commit time of the full transaction | ||
| 2135 | */ | ||
| 2136 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | ||
| 2137 | { | 2117 | { |
| 2138 | int ret; | 2118 | int ret; |
| 2139 | struct btrfs_root *log; | ||
| 2140 | struct key; | ||
| 2141 | u64 start; | 2119 | u64 start; |
| 2142 | u64 end; | 2120 | u64 end; |
| 2143 | struct walk_control wc = { | 2121 | struct walk_control wc = { |
| @@ -2145,10 +2123,6 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | |||
| 2145 | .process_func = process_one_buffer | 2123 | .process_func = process_one_buffer |
| 2146 | }; | 2124 | }; |
| 2147 | 2125 | ||
| 2148 | if (!root->log_root || root->fs_info->log_root_recovering) | ||
| 2149 | return 0; | ||
| 2150 | |||
| 2151 | log = root->log_root; | ||
| 2152 | ret = walk_log_tree(trans, log, &wc); | 2126 | ret = walk_log_tree(trans, log, &wc); |
| 2153 | BUG_ON(ret); | 2127 | BUG_ON(ret); |
| 2154 | 2128 | ||
| @@ -2162,14 +2136,30 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | |||
| 2162 | EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); | 2136 | EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS); |
| 2163 | } | 2137 | } |
| 2164 | 2138 | ||
| 2165 | if (log->log_transid > 0) { | ||
| 2166 | ret = btrfs_del_root(trans, root->fs_info->log_root_tree, | ||
| 2167 | &log->root_key); | ||
| 2168 | BUG_ON(ret); | ||
| 2169 | } | ||
| 2170 | root->log_root = NULL; | ||
| 2171 | free_extent_buffer(log->node); | 2139 | free_extent_buffer(log->node); |
| 2172 | kfree(log); | 2140 | kfree(log); |
| 2141 | } | ||
| 2142 | |||
| 2143 | /* | ||
| 2144 | * free all the extents used by the tree log. This should be called | ||
| 2145 | * at commit time of the full transaction | ||
| 2146 | */ | ||
| 2147 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root) | ||
| 2148 | { | ||
| 2149 | if (root->log_root) { | ||
| 2150 | free_log_tree(trans, root->log_root); | ||
| 2151 | root->log_root = NULL; | ||
| 2152 | } | ||
| 2153 | return 0; | ||
| 2154 | } | ||
| 2155 | |||
| 2156 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | ||
| 2157 | struct btrfs_fs_info *fs_info) | ||
| 2158 | { | ||
| 2159 | if (fs_info->log_root_tree) { | ||
| 2160 | free_log_tree(trans, fs_info->log_root_tree); | ||
| 2161 | fs_info->log_root_tree = NULL; | ||
| 2162 | } | ||
| 2173 | return 0; | 2163 | return 0; |
| 2174 | } | 2164 | } |
| 2175 | 2165 | ||
| @@ -2203,6 +2193,7 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
| 2203 | struct btrfs_dir_item *di; | 2193 | struct btrfs_dir_item *di; |
| 2204 | struct btrfs_path *path; | 2194 | struct btrfs_path *path; |
| 2205 | int ret; | 2195 | int ret; |
| 2196 | int err = 0; | ||
| 2206 | int bytes_del = 0; | 2197 | int bytes_del = 0; |
| 2207 | 2198 | ||
| 2208 | if (BTRFS_I(dir)->logged_trans < trans->transid) | 2199 | if (BTRFS_I(dir)->logged_trans < trans->transid) |
| @@ -2218,7 +2209,11 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
| 2218 | path = btrfs_alloc_path(); | 2209 | path = btrfs_alloc_path(); |
| 2219 | di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, | 2210 | di = btrfs_lookup_dir_item(trans, log, path, dir->i_ino, |
| 2220 | name, name_len, -1); | 2211 | name, name_len, -1); |
| 2221 | if (di && !IS_ERR(di)) { | 2212 | if (IS_ERR(di)) { |
| 2213 | err = PTR_ERR(di); | ||
| 2214 | goto fail; | ||
| 2215 | } | ||
| 2216 | if (di) { | ||
| 2222 | ret = btrfs_delete_one_dir_name(trans, log, path, di); | 2217 | ret = btrfs_delete_one_dir_name(trans, log, path, di); |
| 2223 | bytes_del += name_len; | 2218 | bytes_del += name_len; |
| 2224 | BUG_ON(ret); | 2219 | BUG_ON(ret); |
| @@ -2226,7 +2221,11 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
| 2226 | btrfs_release_path(log, path); | 2221 | btrfs_release_path(log, path); |
| 2227 | di = btrfs_lookup_dir_index_item(trans, log, path, dir->i_ino, | 2222 | di = btrfs_lookup_dir_index_item(trans, log, path, dir->i_ino, |
| 2228 | index, name, name_len, -1); | 2223 | index, name, name_len, -1); |
| 2229 | if (di && !IS_ERR(di)) { | 2224 | if (IS_ERR(di)) { |
| 2225 | err = PTR_ERR(di); | ||
| 2226 | goto fail; | ||
| 2227 | } | ||
| 2228 | if (di) { | ||
| 2230 | ret = btrfs_delete_one_dir_name(trans, log, path, di); | 2229 | ret = btrfs_delete_one_dir_name(trans, log, path, di); |
| 2231 | bytes_del += name_len; | 2230 | bytes_del += name_len; |
| 2232 | BUG_ON(ret); | 2231 | BUG_ON(ret); |
| @@ -2244,6 +2243,10 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
| 2244 | btrfs_release_path(log, path); | 2243 | btrfs_release_path(log, path); |
| 2245 | 2244 | ||
| 2246 | ret = btrfs_search_slot(trans, log, &key, path, 0, 1); | 2245 | ret = btrfs_search_slot(trans, log, &key, path, 0, 1); |
| 2246 | if (ret < 0) { | ||
| 2247 | err = ret; | ||
| 2248 | goto fail; | ||
| 2249 | } | ||
| 2247 | if (ret == 0) { | 2250 | if (ret == 0) { |
| 2248 | struct btrfs_inode_item *item; | 2251 | struct btrfs_inode_item *item; |
| 2249 | u64 i_size; | 2252 | u64 i_size; |
| @@ -2261,9 +2264,13 @@ int btrfs_del_dir_entries_in_log(struct btrfs_trans_handle *trans, | |||
| 2261 | ret = 0; | 2264 | ret = 0; |
| 2262 | btrfs_release_path(log, path); | 2265 | btrfs_release_path(log, path); |
| 2263 | } | 2266 | } |
| 2264 | 2267 | fail: | |
| 2265 | btrfs_free_path(path); | 2268 | btrfs_free_path(path); |
| 2266 | mutex_unlock(&BTRFS_I(dir)->log_mutex); | 2269 | mutex_unlock(&BTRFS_I(dir)->log_mutex); |
| 2270 | if (ret == -ENOSPC) { | ||
| 2271 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
| 2272 | ret = 0; | ||
| 2273 | } | ||
| 2267 | btrfs_end_log_trans(root); | 2274 | btrfs_end_log_trans(root); |
| 2268 | 2275 | ||
| 2269 | return 0; | 2276 | return 0; |
| @@ -2291,6 +2298,10 @@ int btrfs_del_inode_ref_in_log(struct btrfs_trans_handle *trans, | |||
| 2291 | ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, | 2298 | ret = btrfs_del_inode_ref(trans, log, name, name_len, inode->i_ino, |
| 2292 | dirid, &index); | 2299 | dirid, &index); |
| 2293 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 2300 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
| 2301 | if (ret == -ENOSPC) { | ||
| 2302 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
| 2303 | ret = 0; | ||
| 2304 | } | ||
| 2294 | btrfs_end_log_trans(root); | 2305 | btrfs_end_log_trans(root); |
| 2295 | 2306 | ||
| 2296 | return ret; | 2307 | return ret; |
| @@ -2318,7 +2329,8 @@ static noinline int insert_dir_log_key(struct btrfs_trans_handle *trans, | |||
| 2318 | else | 2329 | else |
| 2319 | key.type = BTRFS_DIR_LOG_INDEX_KEY; | 2330 | key.type = BTRFS_DIR_LOG_INDEX_KEY; |
| 2320 | ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item)); | 2331 | ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*item)); |
| 2321 | BUG_ON(ret); | 2332 | if (ret) |
| 2333 | return ret; | ||
| 2322 | 2334 | ||
| 2323 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], | 2335 | item = btrfs_item_ptr(path->nodes[0], path->slots[0], |
| 2324 | struct btrfs_dir_log_item); | 2336 | struct btrfs_dir_log_item); |
| @@ -2343,6 +2355,7 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
| 2343 | struct btrfs_key max_key; | 2355 | struct btrfs_key max_key; |
| 2344 | struct btrfs_root *log = root->log_root; | 2356 | struct btrfs_root *log = root->log_root; |
| 2345 | struct extent_buffer *src; | 2357 | struct extent_buffer *src; |
| 2358 | int err = 0; | ||
| 2346 | int ret; | 2359 | int ret; |
| 2347 | int i; | 2360 | int i; |
| 2348 | int nritems; | 2361 | int nritems; |
| @@ -2405,6 +2418,10 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
| 2405 | ret = overwrite_item(trans, log, dst_path, | 2418 | ret = overwrite_item(trans, log, dst_path, |
| 2406 | path->nodes[0], path->slots[0], | 2419 | path->nodes[0], path->slots[0], |
| 2407 | &tmp); | 2420 | &tmp); |
| 2421 | if (ret) { | ||
| 2422 | err = ret; | ||
| 2423 | goto done; | ||
| 2424 | } | ||
| 2408 | } | 2425 | } |
| 2409 | } | 2426 | } |
| 2410 | btrfs_release_path(root, path); | 2427 | btrfs_release_path(root, path); |
| @@ -2432,7 +2449,10 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
| 2432 | goto done; | 2449 | goto done; |
| 2433 | ret = overwrite_item(trans, log, dst_path, src, i, | 2450 | ret = overwrite_item(trans, log, dst_path, src, i, |
| 2434 | &min_key); | 2451 | &min_key); |
| 2435 | BUG_ON(ret); | 2452 | if (ret) { |
| 2453 | err = ret; | ||
| 2454 | goto done; | ||
| 2455 | } | ||
| 2436 | } | 2456 | } |
| 2437 | path->slots[0] = nritems; | 2457 | path->slots[0] = nritems; |
| 2438 | 2458 | ||
| @@ -2454,22 +2474,30 @@ static noinline int log_dir_items(struct btrfs_trans_handle *trans, | |||
| 2454 | ret = overwrite_item(trans, log, dst_path, | 2474 | ret = overwrite_item(trans, log, dst_path, |
| 2455 | path->nodes[0], path->slots[0], | 2475 | path->nodes[0], path->slots[0], |
| 2456 | &tmp); | 2476 | &tmp); |
| 2457 | 2477 | if (ret) | |
| 2458 | BUG_ON(ret); | 2478 | err = ret; |
| 2459 | last_offset = tmp.offset; | 2479 | else |
| 2480 | last_offset = tmp.offset; | ||
| 2460 | goto done; | 2481 | goto done; |
| 2461 | } | 2482 | } |
| 2462 | } | 2483 | } |
| 2463 | done: | 2484 | done: |
| 2464 | *last_offset_ret = last_offset; | ||
| 2465 | btrfs_release_path(root, path); | 2485 | btrfs_release_path(root, path); |
| 2466 | btrfs_release_path(log, dst_path); | 2486 | btrfs_release_path(log, dst_path); |
| 2467 | 2487 | ||
| 2468 | /* insert the log range keys to indicate where the log is valid */ | 2488 | if (err == 0) { |
| 2469 | ret = insert_dir_log_key(trans, log, path, key_type, inode->i_ino, | 2489 | *last_offset_ret = last_offset; |
| 2470 | first_offset, last_offset); | 2490 | /* |
| 2471 | BUG_ON(ret); | 2491 | * insert the log range keys to indicate where the log |
| 2472 | return 0; | 2492 | * is valid |
| 2493 | */ | ||
| 2494 | ret = insert_dir_log_key(trans, log, path, key_type, | ||
| 2495 | inode->i_ino, first_offset, | ||
| 2496 | last_offset); | ||
| 2497 | if (ret) | ||
| 2498 | err = ret; | ||
| 2499 | } | ||
| 2500 | return err; | ||
| 2473 | } | 2501 | } |
| 2474 | 2502 | ||
| 2475 | /* | 2503 | /* |
| @@ -2501,7 +2529,8 @@ again: | |||
| 2501 | ret = log_dir_items(trans, root, inode, path, | 2529 | ret = log_dir_items(trans, root, inode, path, |
| 2502 | dst_path, key_type, min_key, | 2530 | dst_path, key_type, min_key, |
| 2503 | &max_key); | 2531 | &max_key); |
| 2504 | BUG_ON(ret); | 2532 | if (ret) |
| 2533 | return ret; | ||
| 2505 | if (max_key == (u64)-1) | 2534 | if (max_key == (u64)-1) |
| 2506 | break; | 2535 | break; |
| 2507 | min_key = max_key + 1; | 2536 | min_key = max_key + 1; |
| @@ -2535,8 +2564,8 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
| 2535 | 2564 | ||
| 2536 | while (1) { | 2565 | while (1) { |
| 2537 | ret = btrfs_search_slot(trans, log, &key, path, -1, 1); | 2566 | ret = btrfs_search_slot(trans, log, &key, path, -1, 1); |
| 2538 | 2567 | BUG_ON(ret == 0); | |
| 2539 | if (ret != 1) | 2568 | if (ret < 0) |
| 2540 | break; | 2569 | break; |
| 2541 | 2570 | ||
| 2542 | if (path->slots[0] == 0) | 2571 | if (path->slots[0] == 0) |
| @@ -2554,7 +2583,7 @@ static int drop_objectid_items(struct btrfs_trans_handle *trans, | |||
| 2554 | btrfs_release_path(log, path); | 2583 | btrfs_release_path(log, path); |
| 2555 | } | 2584 | } |
| 2556 | btrfs_release_path(log, path); | 2585 | btrfs_release_path(log, path); |
| 2557 | return 0; | 2586 | return ret; |
| 2558 | } | 2587 | } |
| 2559 | 2588 | ||
| 2560 | static noinline int copy_items(struct btrfs_trans_handle *trans, | 2589 | static noinline int copy_items(struct btrfs_trans_handle *trans, |
| @@ -2587,7 +2616,10 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
| 2587 | } | 2616 | } |
| 2588 | ret = btrfs_insert_empty_items(trans, log, dst_path, | 2617 | ret = btrfs_insert_empty_items(trans, log, dst_path, |
| 2589 | ins_keys, ins_sizes, nr); | 2618 | ins_keys, ins_sizes, nr); |
| 2590 | BUG_ON(ret); | 2619 | if (ret) { |
| 2620 | kfree(ins_data); | ||
| 2621 | return ret; | ||
| 2622 | } | ||
| 2591 | 2623 | ||
| 2592 | for (i = 0; i < nr; i++, dst_path->slots[0]++) { | 2624 | for (i = 0; i < nr; i++, dst_path->slots[0]++) { |
| 2593 | dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], | 2625 | dst_offset = btrfs_item_ptr_offset(dst_path->nodes[0], |
| @@ -2660,16 +2692,17 @@ static noinline int copy_items(struct btrfs_trans_handle *trans, | |||
| 2660 | * we have to do this after the loop above to avoid changing the | 2692 | * we have to do this after the loop above to avoid changing the |
| 2661 | * log tree while trying to change the log tree. | 2693 | * log tree while trying to change the log tree. |
| 2662 | */ | 2694 | */ |
| 2695 | ret = 0; | ||
| 2663 | while (!list_empty(&ordered_sums)) { | 2696 | while (!list_empty(&ordered_sums)) { |
| 2664 | struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, | 2697 | struct btrfs_ordered_sum *sums = list_entry(ordered_sums.next, |
| 2665 | struct btrfs_ordered_sum, | 2698 | struct btrfs_ordered_sum, |
| 2666 | list); | 2699 | list); |
| 2667 | ret = btrfs_csum_file_blocks(trans, log, sums); | 2700 | if (!ret) |
| 2668 | BUG_ON(ret); | 2701 | ret = btrfs_csum_file_blocks(trans, log, sums); |
| 2669 | list_del(&sums->list); | 2702 | list_del(&sums->list); |
| 2670 | kfree(sums); | 2703 | kfree(sums); |
| 2671 | } | 2704 | } |
| 2672 | return 0; | 2705 | return ret; |
| 2673 | } | 2706 | } |
| 2674 | 2707 | ||
| 2675 | /* log a single inode in the tree log. | 2708 | /* log a single inode in the tree log. |
| @@ -2697,6 +2730,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
| 2697 | struct btrfs_root *log = root->log_root; | 2730 | struct btrfs_root *log = root->log_root; |
| 2698 | struct extent_buffer *src = NULL; | 2731 | struct extent_buffer *src = NULL; |
| 2699 | u32 size; | 2732 | u32 size; |
| 2733 | int err = 0; | ||
| 2700 | int ret; | 2734 | int ret; |
| 2701 | int nritems; | 2735 | int nritems; |
| 2702 | int ins_start_slot = 0; | 2736 | int ins_start_slot = 0; |
| @@ -2739,7 +2773,10 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, | |||
| 2739 | } else { | 2773 | } else { |
| 2740 | ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); | 2774 | ret = btrfs_truncate_inode_items(trans, log, inode, 0, 0); |
| 2741 | } | 2775 | } |
| 2742 | BUG_ON(ret); | 2776 | if (ret) { |
| 2777 | err = ret; | ||
| 2778 | goto out_unlock; | ||
| 2779 | } | ||
| 2743 | path->keep_locks = 1; | 2780 | path->keep_locks = 1; |
| 2744 | 2781 | ||
| 2745 | while (1) { | 2782 | while (1) { |
| @@ -2768,7 +2805,10 @@ again: | |||
| 2768 | 2805 | ||
| 2769 | ret = copy_items(trans, log, dst_path, src, ins_start_slot, | 2806 | ret = copy_items(trans, log, dst_path, src, ins_start_slot, |
| 2770 | ins_nr, inode_only); | 2807 | ins_nr, inode_only); |
| 2771 | BUG_ON(ret); | 2808 | if (ret) { |
| 2809 | err = ret; | ||
| 2810 | goto out_unlock; | ||
| 2811 | } | ||
| 2772 | ins_nr = 1; | 2812 | ins_nr = 1; |
| 2773 | ins_start_slot = path->slots[0]; | 2813 | ins_start_slot = path->slots[0]; |
| 2774 | next_slot: | 2814 | next_slot: |
| @@ -2784,7 +2824,10 @@ next_slot: | |||
| 2784 | ret = copy_items(trans, log, dst_path, src, | 2824 | ret = copy_items(trans, log, dst_path, src, |
| 2785 | ins_start_slot, | 2825 | ins_start_slot, |
| 2786 | ins_nr, inode_only); | 2826 | ins_nr, inode_only); |
| 2787 | BUG_ON(ret); | 2827 | if (ret) { |
| 2828 | err = ret; | ||
| 2829 | goto out_unlock; | ||
| 2830 | } | ||
| 2788 | ins_nr = 0; | 2831 | ins_nr = 0; |
| 2789 | } | 2832 | } |
| 2790 | btrfs_release_path(root, path); | 2833 | btrfs_release_path(root, path); |
| @@ -2802,7 +2845,10 @@ next_slot: | |||
| 2802 | ret = copy_items(trans, log, dst_path, src, | 2845 | ret = copy_items(trans, log, dst_path, src, |
| 2803 | ins_start_slot, | 2846 | ins_start_slot, |
| 2804 | ins_nr, inode_only); | 2847 | ins_nr, inode_only); |
| 2805 | BUG_ON(ret); | 2848 | if (ret) { |
| 2849 | err = ret; | ||
| 2850 | goto out_unlock; | ||
| 2851 | } | ||
| 2806 | ins_nr = 0; | 2852 | ins_nr = 0; |
| 2807 | } | 2853 | } |
| 2808 | WARN_ON(ins_nr); | 2854 | WARN_ON(ins_nr); |
| @@ -2810,14 +2856,18 @@ next_slot: | |||
| 2810 | btrfs_release_path(root, path); | 2856 | btrfs_release_path(root, path); |
| 2811 | btrfs_release_path(log, dst_path); | 2857 | btrfs_release_path(log, dst_path); |
| 2812 | ret = log_directory_changes(trans, root, inode, path, dst_path); | 2858 | ret = log_directory_changes(trans, root, inode, path, dst_path); |
| 2813 | BUG_ON(ret); | 2859 | if (ret) { |
| 2860 | err = ret; | ||
| 2861 | goto out_unlock; | ||
| 2862 | } | ||
| 2814 | } | 2863 | } |
| 2815 | BTRFS_I(inode)->logged_trans = trans->transid; | 2864 | BTRFS_I(inode)->logged_trans = trans->transid; |
| 2865 | out_unlock: | ||
| 2816 | mutex_unlock(&BTRFS_I(inode)->log_mutex); | 2866 | mutex_unlock(&BTRFS_I(inode)->log_mutex); |
| 2817 | 2867 | ||
| 2818 | btrfs_free_path(path); | 2868 | btrfs_free_path(path); |
| 2819 | btrfs_free_path(dst_path); | 2869 | btrfs_free_path(dst_path); |
| 2820 | return 0; | 2870 | return err; |
| 2821 | } | 2871 | } |
| 2822 | 2872 | ||
| 2823 | /* | 2873 | /* |
| @@ -2942,10 +2992,13 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
| 2942 | goto end_no_trans; | 2992 | goto end_no_trans; |
| 2943 | } | 2993 | } |
| 2944 | 2994 | ||
| 2945 | start_log_trans(trans, root); | 2995 | ret = start_log_trans(trans, root); |
| 2996 | if (ret) | ||
| 2997 | goto end_trans; | ||
| 2946 | 2998 | ||
| 2947 | ret = btrfs_log_inode(trans, root, inode, inode_only); | 2999 | ret = btrfs_log_inode(trans, root, inode, inode_only); |
| 2948 | BUG_ON(ret); | 3000 | if (ret) |
| 3001 | goto end_trans; | ||
| 2949 | 3002 | ||
| 2950 | /* | 3003 | /* |
| 2951 | * for regular files, if its inode is already on disk, we don't | 3004 | * for regular files, if its inode is already on disk, we don't |
| @@ -2955,8 +3008,10 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
| 2955 | */ | 3008 | */ |
| 2956 | if (S_ISREG(inode->i_mode) && | 3009 | if (S_ISREG(inode->i_mode) && |
| 2957 | BTRFS_I(inode)->generation <= last_committed && | 3010 | BTRFS_I(inode)->generation <= last_committed && |
| 2958 | BTRFS_I(inode)->last_unlink_trans <= last_committed) | 3011 | BTRFS_I(inode)->last_unlink_trans <= last_committed) { |
| 2959 | goto no_parent; | 3012 | ret = 0; |
| 3013 | goto end_trans; | ||
| 3014 | } | ||
| 2960 | 3015 | ||
| 2961 | inode_only = LOG_INODE_EXISTS; | 3016 | inode_only = LOG_INODE_EXISTS; |
| 2962 | while (1) { | 3017 | while (1) { |
| @@ -2970,15 +3025,21 @@ int btrfs_log_inode_parent(struct btrfs_trans_handle *trans, | |||
| 2970 | if (BTRFS_I(inode)->generation > | 3025 | if (BTRFS_I(inode)->generation > |
| 2971 | root->fs_info->last_trans_committed) { | 3026 | root->fs_info->last_trans_committed) { |
| 2972 | ret = btrfs_log_inode(trans, root, inode, inode_only); | 3027 | ret = btrfs_log_inode(trans, root, inode, inode_only); |
| 2973 | BUG_ON(ret); | 3028 | if (ret) |
| 3029 | goto end_trans; | ||
| 2974 | } | 3030 | } |
| 2975 | if (IS_ROOT(parent)) | 3031 | if (IS_ROOT(parent)) |
| 2976 | break; | 3032 | break; |
| 2977 | 3033 | ||
| 2978 | parent = parent->d_parent; | 3034 | parent = parent->d_parent; |
| 2979 | } | 3035 | } |
| 2980 | no_parent: | ||
| 2981 | ret = 0; | 3036 | ret = 0; |
| 3037 | end_trans: | ||
| 3038 | if (ret < 0) { | ||
| 3039 | BUG_ON(ret != -ENOSPC); | ||
| 3040 | root->fs_info->last_trans_log_full_commit = trans->transid; | ||
| 3041 | ret = 1; | ||
| 3042 | } | ||
| 2982 | btrfs_end_log_trans(root); | 3043 | btrfs_end_log_trans(root); |
| 2983 | end_no_trans: | 3044 | end_no_trans: |
| 2984 | return ret; | 3045 | return ret; |
| @@ -3020,7 +3081,7 @@ int btrfs_recover_log_trees(struct btrfs_root *log_root_tree) | |||
| 3020 | path = btrfs_alloc_path(); | 3081 | path = btrfs_alloc_path(); |
| 3021 | BUG_ON(!path); | 3082 | BUG_ON(!path); |
| 3022 | 3083 | ||
| 3023 | trans = btrfs_start_transaction(fs_info->tree_root, 1); | 3084 | trans = btrfs_start_transaction(fs_info->tree_root, 0); |
| 3024 | 3085 | ||
| 3025 | wc.trans = trans; | 3086 | wc.trans = trans; |
| 3026 | wc.pin = 1; | 3087 | wc.pin = 1; |
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h index 0776eacb5083..3dfae84c8cc8 100644 --- a/fs/btrfs/tree-log.h +++ b/fs/btrfs/tree-log.h | |||
| @@ -25,6 +25,8 @@ | |||
| 25 | int btrfs_sync_log(struct btrfs_trans_handle *trans, | 25 | int btrfs_sync_log(struct btrfs_trans_handle *trans, |
| 26 | struct btrfs_root *root); | 26 | struct btrfs_root *root); |
| 27 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); | 27 | int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root); |
| 28 | int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans, | ||
| 29 | struct btrfs_fs_info *fs_info); | ||
| 28 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); | 30 | int btrfs_recover_log_trees(struct btrfs_root *tree_root); |
| 29 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, | 31 | int btrfs_log_dentry_safe(struct btrfs_trans_handle *trans, |
| 30 | struct btrfs_root *root, struct dentry *dentry); | 32 | struct btrfs_root *root, struct dentry *dentry); |
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8db7b14bbae8..d6e3af8be95b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c | |||
| @@ -1097,7 +1097,7 @@ static int btrfs_rm_dev_item(struct btrfs_root *root, | |||
| 1097 | if (!path) | 1097 | if (!path) |
| 1098 | return -ENOMEM; | 1098 | return -ENOMEM; |
| 1099 | 1099 | ||
| 1100 | trans = btrfs_start_transaction(root, 1); | 1100 | trans = btrfs_start_transaction(root, 0); |
| 1101 | key.objectid = BTRFS_DEV_ITEMS_OBJECTID; | 1101 | key.objectid = BTRFS_DEV_ITEMS_OBJECTID; |
| 1102 | key.type = BTRFS_DEV_ITEM_KEY; | 1102 | key.type = BTRFS_DEV_ITEM_KEY; |
| 1103 | key.offset = device->devid; | 1103 | key.offset = device->devid; |
| @@ -1486,7 +1486,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path) | |||
| 1486 | goto error; | 1486 | goto error; |
| 1487 | } | 1487 | } |
| 1488 | 1488 | ||
| 1489 | trans = btrfs_start_transaction(root, 1); | 1489 | trans = btrfs_start_transaction(root, 0); |
| 1490 | lock_chunks(root); | 1490 | lock_chunks(root); |
| 1491 | 1491 | ||
| 1492 | device->barriers = 1; | 1492 | device->barriers = 1; |
| @@ -1751,9 +1751,10 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, | |||
| 1751 | 1751 | ||
| 1752 | /* step one, relocate all the extents inside this chunk */ | 1752 | /* step one, relocate all the extents inside this chunk */ |
| 1753 | ret = btrfs_relocate_block_group(extent_root, chunk_offset); | 1753 | ret = btrfs_relocate_block_group(extent_root, chunk_offset); |
| 1754 | BUG_ON(ret); | 1754 | if (ret) |
| 1755 | return ret; | ||
| 1755 | 1756 | ||
| 1756 | trans = btrfs_start_transaction(root, 1); | 1757 | trans = btrfs_start_transaction(root, 0); |
| 1757 | BUG_ON(!trans); | 1758 | BUG_ON(!trans); |
| 1758 | 1759 | ||
| 1759 | lock_chunks(root); | 1760 | lock_chunks(root); |
| @@ -1925,7 +1926,7 @@ int btrfs_balance(struct btrfs_root *dev_root) | |||
| 1925 | break; | 1926 | break; |
| 1926 | BUG_ON(ret); | 1927 | BUG_ON(ret); |
| 1927 | 1928 | ||
| 1928 | trans = btrfs_start_transaction(dev_root, 1); | 1929 | trans = btrfs_start_transaction(dev_root, 0); |
| 1929 | BUG_ON(!trans); | 1930 | BUG_ON(!trans); |
| 1930 | 1931 | ||
| 1931 | ret = btrfs_grow_device(trans, device, old_size); | 1932 | ret = btrfs_grow_device(trans, device, old_size); |
| @@ -2094,11 +2095,7 @@ again: | |||
| 2094 | } | 2095 | } |
| 2095 | 2096 | ||
| 2096 | /* Shrinking succeeded, else we would be at "done". */ | 2097 | /* Shrinking succeeded, else we would be at "done". */ |
| 2097 | trans = btrfs_start_transaction(root, 1); | 2098 | trans = btrfs_start_transaction(root, 0); |
| 2098 | if (!trans) { | ||
| 2099 | ret = -ENOMEM; | ||
| 2100 | goto done; | ||
| 2101 | } | ||
| 2102 | lock_chunks(root); | 2099 | lock_chunks(root); |
| 2103 | 2100 | ||
| 2104 | device->disk_total_bytes = new_size; | 2101 | device->disk_total_bytes = new_size; |
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c index 59acd3eb288a..88ecbb215878 100644 --- a/fs/btrfs/xattr.c +++ b/fs/btrfs/xattr.c | |||
| @@ -154,15 +154,10 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, | |||
| 154 | if (trans) | 154 | if (trans) |
| 155 | return do_setxattr(trans, inode, name, value, size, flags); | 155 | return do_setxattr(trans, inode, name, value, size, flags); |
| 156 | 156 | ||
| 157 | ret = btrfs_reserve_metadata_space(root, 2); | 157 | trans = btrfs_start_transaction(root, 2); |
| 158 | if (ret) | 158 | if (IS_ERR(trans)) |
| 159 | return ret; | 159 | return PTR_ERR(trans); |
| 160 | 160 | ||
| 161 | trans = btrfs_start_transaction(root, 1); | ||
| 162 | if (!trans) { | ||
| 163 | ret = -ENOMEM; | ||
| 164 | goto out; | ||
| 165 | } | ||
| 166 | btrfs_set_trans_block_group(trans, inode); | 161 | btrfs_set_trans_block_group(trans, inode); |
| 167 | 162 | ||
| 168 | ret = do_setxattr(trans, inode, name, value, size, flags); | 163 | ret = do_setxattr(trans, inode, name, value, size, flags); |
| @@ -174,7 +169,6 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans, | |||
| 174 | BUG_ON(ret); | 169 | BUG_ON(ret); |
| 175 | out: | 170 | out: |
| 176 | btrfs_end_transaction_throttle(trans, root); | 171 | btrfs_end_transaction_throttle(trans, root); |
| 177 | btrfs_unreserve_metadata_space(root, 2); | ||
| 178 | return ret; | 172 | return ret; |
| 179 | } | 173 | } |
| 180 | 174 | ||
diff --git a/fs/direct-io.c b/fs/direct-io.c index e82adc2debb7..da111aacb46e 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
| @@ -82,6 +82,8 @@ struct dio { | |||
| 82 | int reap_counter; /* rate limit reaping */ | 82 | int reap_counter; /* rate limit reaping */ |
| 83 | get_block_t *get_block; /* block mapping function */ | 83 | get_block_t *get_block; /* block mapping function */ |
| 84 | dio_iodone_t *end_io; /* IO completion function */ | 84 | dio_iodone_t *end_io; /* IO completion function */ |
| 85 | dio_submit_t *submit_io; /* IO submition function */ | ||
| 86 | loff_t logical_offset_in_bio; /* current first logical block in bio */ | ||
| 85 | sector_t final_block_in_bio; /* current final block in bio + 1 */ | 87 | sector_t final_block_in_bio; /* current final block in bio + 1 */ |
| 86 | sector_t next_block_for_io; /* next block to be put under IO, | 88 | sector_t next_block_for_io; /* next block to be put under IO, |
| 87 | in dio_blocks units */ | 89 | in dio_blocks units */ |
| @@ -96,6 +98,7 @@ struct dio { | |||
| 96 | unsigned cur_page_offset; /* Offset into it, in bytes */ | 98 | unsigned cur_page_offset; /* Offset into it, in bytes */ |
| 97 | unsigned cur_page_len; /* Nr of bytes at cur_page_offset */ | 99 | unsigned cur_page_len; /* Nr of bytes at cur_page_offset */ |
| 98 | sector_t cur_page_block; /* Where it starts */ | 100 | sector_t cur_page_block; /* Where it starts */ |
| 101 | loff_t cur_page_fs_offset; /* Offset in file */ | ||
| 99 | 102 | ||
| 100 | /* BIO completion state */ | 103 | /* BIO completion state */ |
| 101 | spinlock_t bio_lock; /* protects BIO fields below */ | 104 | spinlock_t bio_lock; /* protects BIO fields below */ |
| @@ -300,6 +303,26 @@ static void dio_bio_end_io(struct bio *bio, int error) | |||
| 300 | spin_unlock_irqrestore(&dio->bio_lock, flags); | 303 | spin_unlock_irqrestore(&dio->bio_lock, flags); |
| 301 | } | 304 | } |
| 302 | 305 | ||
| 306 | /** | ||
| 307 | * dio_end_io - handle the end io action for the given bio | ||
| 308 | * @bio: The direct io bio thats being completed | ||
| 309 | * @error: Error if there was one | ||
| 310 | * | ||
| 311 | * This is meant to be called by any filesystem that uses their own dio_submit_t | ||
| 312 | * so that the DIO specific endio actions are dealt with after the filesystem | ||
| 313 | * has done it's completion work. | ||
| 314 | */ | ||
| 315 | void dio_end_io(struct bio *bio, int error) | ||
| 316 | { | ||
| 317 | struct dio *dio = bio->bi_private; | ||
| 318 | |||
| 319 | if (dio->is_async) | ||
| 320 | dio_bio_end_aio(bio, error); | ||
| 321 | else | ||
| 322 | dio_bio_end_io(bio, error); | ||
| 323 | } | ||
| 324 | EXPORT_SYMBOL_GPL(dio_end_io); | ||
| 325 | |||
| 303 | static int | 326 | static int |
| 304 | dio_bio_alloc(struct dio *dio, struct block_device *bdev, | 327 | dio_bio_alloc(struct dio *dio, struct block_device *bdev, |
| 305 | sector_t first_sector, int nr_vecs) | 328 | sector_t first_sector, int nr_vecs) |
| @@ -316,6 +339,7 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev, | |||
| 316 | bio->bi_end_io = dio_bio_end_io; | 339 | bio->bi_end_io = dio_bio_end_io; |
| 317 | 340 | ||
| 318 | dio->bio = bio; | 341 | dio->bio = bio; |
| 342 | dio->logical_offset_in_bio = dio->cur_page_fs_offset; | ||
| 319 | return 0; | 343 | return 0; |
| 320 | } | 344 | } |
| 321 | 345 | ||
| @@ -340,10 +364,15 @@ static void dio_bio_submit(struct dio *dio) | |||
| 340 | if (dio->is_async && dio->rw == READ) | 364 | if (dio->is_async && dio->rw == READ) |
| 341 | bio_set_pages_dirty(bio); | 365 | bio_set_pages_dirty(bio); |
| 342 | 366 | ||
| 343 | submit_bio(dio->rw, bio); | 367 | if (dio->submit_io) |
| 368 | dio->submit_io(dio->rw, bio, dio->inode, | ||
| 369 | dio->logical_offset_in_bio); | ||
| 370 | else | ||
| 371 | submit_bio(dio->rw, bio); | ||
| 344 | 372 | ||
| 345 | dio->bio = NULL; | 373 | dio->bio = NULL; |
| 346 | dio->boundary = 0; | 374 | dio->boundary = 0; |
| 375 | dio->logical_offset_in_bio = 0; | ||
| 347 | } | 376 | } |
| 348 | 377 | ||
| 349 | /* | 378 | /* |
| @@ -603,10 +632,26 @@ static int dio_send_cur_page(struct dio *dio) | |||
| 603 | int ret = 0; | 632 | int ret = 0; |
| 604 | 633 | ||
| 605 | if (dio->bio) { | 634 | if (dio->bio) { |
| 635 | loff_t cur_offset = dio->block_in_file << dio->blkbits; | ||
| 636 | loff_t bio_next_offset = dio->logical_offset_in_bio + | ||
| 637 | dio->bio->bi_size; | ||
| 638 | |||
| 606 | /* | 639 | /* |
| 607 | * See whether this new request is contiguous with the old | 640 | * See whether this new request is contiguous with the old. |
| 641 | * | ||
| 642 | * Btrfs cannot handl having logically non-contiguous requests | ||
| 643 | * submitted. For exmple if you have | ||
| 644 | * | ||
| 645 | * Logical: [0-4095][HOLE][8192-12287] | ||
| 646 | * Phyiscal: [0-4095] [4096-8181] | ||
| 647 | * | ||
| 648 | * We cannot submit those pages together as one BIO. So if our | ||
| 649 | * current logical offset in the file does not equal what would | ||
| 650 | * be the next logical offset in the bio, submit the bio we | ||
| 651 | * have. | ||
| 608 | */ | 652 | */ |
| 609 | if (dio->final_block_in_bio != dio->cur_page_block) | 653 | if (dio->final_block_in_bio != dio->cur_page_block || |
| 654 | cur_offset != bio_next_offset) | ||
| 610 | dio_bio_submit(dio); | 655 | dio_bio_submit(dio); |
| 611 | /* | 656 | /* |
| 612 | * Submit now if the underlying fs is about to perform a | 657 | * Submit now if the underlying fs is about to perform a |
| @@ -701,6 +746,7 @@ submit_page_section(struct dio *dio, struct page *page, | |||
| 701 | dio->cur_page_offset = offset; | 746 | dio->cur_page_offset = offset; |
| 702 | dio->cur_page_len = len; | 747 | dio->cur_page_len = len; |
| 703 | dio->cur_page_block = blocknr; | 748 | dio->cur_page_block = blocknr; |
| 749 | dio->cur_page_fs_offset = dio->block_in_file << dio->blkbits; | ||
| 704 | out: | 750 | out: |
| 705 | return ret; | 751 | return ret; |
| 706 | } | 752 | } |
| @@ -935,7 +981,7 @@ static ssize_t | |||
| 935 | direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | 981 | direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, |
| 936 | const struct iovec *iov, loff_t offset, unsigned long nr_segs, | 982 | const struct iovec *iov, loff_t offset, unsigned long nr_segs, |
| 937 | unsigned blkbits, get_block_t get_block, dio_iodone_t end_io, | 983 | unsigned blkbits, get_block_t get_block, dio_iodone_t end_io, |
| 938 | struct dio *dio) | 984 | dio_submit_t submit_io, struct dio *dio) |
| 939 | { | 985 | { |
| 940 | unsigned long user_addr; | 986 | unsigned long user_addr; |
| 941 | unsigned long flags; | 987 | unsigned long flags; |
| @@ -952,6 +998,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
| 952 | 998 | ||
| 953 | dio->get_block = get_block; | 999 | dio->get_block = get_block; |
| 954 | dio->end_io = end_io; | 1000 | dio->end_io = end_io; |
| 1001 | dio->submit_io = submit_io; | ||
| 955 | dio->final_block_in_bio = -1; | 1002 | dio->final_block_in_bio = -1; |
| 956 | dio->next_block_for_io = -1; | 1003 | dio->next_block_for_io = -1; |
| 957 | 1004 | ||
| @@ -1008,7 +1055,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
| 1008 | } | 1055 | } |
| 1009 | } /* end iovec loop */ | 1056 | } /* end iovec loop */ |
| 1010 | 1057 | ||
| 1011 | if (ret == -ENOTBLK && (rw & WRITE)) { | 1058 | if (ret == -ENOTBLK) { |
| 1012 | /* | 1059 | /* |
| 1013 | * The remaining part of the request will be | 1060 | * The remaining part of the request will be |
| 1014 | * be handled by buffered I/O when we return | 1061 | * be handled by buffered I/O when we return |
| @@ -1110,7 +1157,7 @@ ssize_t | |||
| 1110 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | 1157 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, |
| 1111 | struct block_device *bdev, const struct iovec *iov, loff_t offset, | 1158 | struct block_device *bdev, const struct iovec *iov, loff_t offset, |
| 1112 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, | 1159 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, |
| 1113 | int flags) | 1160 | dio_submit_t submit_io, int flags) |
| 1114 | { | 1161 | { |
| 1115 | int seg; | 1162 | int seg; |
| 1116 | size_t size; | 1163 | size_t size; |
| @@ -1197,7 +1244,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
| 1197 | (end > i_size_read(inode))); | 1244 | (end > i_size_read(inode))); |
| 1198 | 1245 | ||
| 1199 | retval = direct_io_worker(rw, iocb, inode, iov, offset, | 1246 | retval = direct_io_worker(rw, iocb, inode, iov, offset, |
| 1200 | nr_segs, blkbits, get_block, end_io, dio); | 1247 | nr_segs, blkbits, get_block, end_io, |
| 1248 | submit_io, dio); | ||
| 1201 | 1249 | ||
| 1202 | /* | 1250 | /* |
| 1203 | * In case of error extending write may have instantiated a few | 1251 | * In case of error extending write may have instantiated a few |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 9682d52d1507..85e823adcd4a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
| @@ -2251,10 +2251,15 @@ static inline int xip_truncate_page(struct address_space *mapping, loff_t from) | |||
| 2251 | #endif | 2251 | #endif |
| 2252 | 2252 | ||
| 2253 | #ifdef CONFIG_BLOCK | 2253 | #ifdef CONFIG_BLOCK |
| 2254 | struct bio; | ||
| 2255 | typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode, | ||
| 2256 | loff_t file_offset); | ||
| 2257 | void dio_end_io(struct bio *bio, int error); | ||
| 2258 | |||
| 2254 | ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | 2259 | ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, |
| 2255 | struct block_device *bdev, const struct iovec *iov, loff_t offset, | 2260 | struct block_device *bdev, const struct iovec *iov, loff_t offset, |
| 2256 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, | 2261 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, |
| 2257 | int lock_type); | 2262 | dio_submit_t submit_io, int lock_type); |
| 2258 | 2263 | ||
| 2259 | enum { | 2264 | enum { |
| 2260 | /* need locking between buffered and direct access */ | 2265 | /* need locking between buffered and direct access */ |
| @@ -2270,7 +2275,7 @@ static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, | |||
| 2270 | dio_iodone_t end_io) | 2275 | dio_iodone_t end_io) |
| 2271 | { | 2276 | { |
| 2272 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, | 2277 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, |
| 2273 | nr_segs, get_block, end_io, | 2278 | nr_segs, get_block, end_io, NULL, |
| 2274 | DIO_LOCKING | DIO_SKIP_HOLES); | 2279 | DIO_LOCKING | DIO_SKIP_HOLES); |
| 2275 | } | 2280 | } |
| 2276 | 2281 | ||
| @@ -2280,7 +2285,7 @@ static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb, | |||
| 2280 | dio_iodone_t end_io) | 2285 | dio_iodone_t end_io) |
| 2281 | { | 2286 | { |
| 2282 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, | 2287 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, |
| 2283 | nr_segs, get_block, end_io, 0); | 2288 | nr_segs, get_block, end_io, NULL, 0); |
| 2284 | } | 2289 | } |
| 2285 | #endif | 2290 | #endif |
| 2286 | 2291 | ||
diff --git a/mm/filemap.c b/mm/filemap.c index 35e12d186566..45a2d18df849 100644 --- a/mm/filemap.c +++ b/mm/filemap.c | |||
| @@ -1275,7 +1275,7 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
| 1275 | { | 1275 | { |
| 1276 | struct file *filp = iocb->ki_filp; | 1276 | struct file *filp = iocb->ki_filp; |
| 1277 | ssize_t retval; | 1277 | ssize_t retval; |
| 1278 | unsigned long seg; | 1278 | unsigned long seg = 0; |
| 1279 | size_t count; | 1279 | size_t count; |
| 1280 | loff_t *ppos = &iocb->ki_pos; | 1280 | loff_t *ppos = &iocb->ki_pos; |
| 1281 | 1281 | ||
| @@ -1302,21 +1302,47 @@ generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, | |||
| 1302 | retval = mapping->a_ops->direct_IO(READ, iocb, | 1302 | retval = mapping->a_ops->direct_IO(READ, iocb, |
| 1303 | iov, pos, nr_segs); | 1303 | iov, pos, nr_segs); |
| 1304 | } | 1304 | } |
| 1305 | if (retval > 0) | 1305 | if (retval > 0) { |
| 1306 | *ppos = pos + retval; | 1306 | *ppos = pos + retval; |
| 1307 | if (retval) { | 1307 | count -= retval; |
| 1308 | } | ||
| 1309 | |||
| 1310 | /* | ||
| 1311 | * Btrfs can have a short DIO read if we encounter | ||
| 1312 | * compressed extents, so if there was an error, or if | ||
| 1313 | * we've already read everything we wanted to, or if | ||
| 1314 | * there was a short read because we hit EOF, go ahead | ||
| 1315 | * and return. Otherwise fallthrough to buffered io for | ||
| 1316 | * the rest of the read. | ||
| 1317 | */ | ||
| 1318 | if (retval < 0 || !count || *ppos >= size) { | ||
| 1308 | file_accessed(filp); | 1319 | file_accessed(filp); |
| 1309 | goto out; | 1320 | goto out; |
| 1310 | } | 1321 | } |
| 1311 | } | 1322 | } |
| 1312 | } | 1323 | } |
| 1313 | 1324 | ||
| 1325 | count = retval; | ||
| 1314 | for (seg = 0; seg < nr_segs; seg++) { | 1326 | for (seg = 0; seg < nr_segs; seg++) { |
| 1315 | read_descriptor_t desc; | 1327 | read_descriptor_t desc; |
| 1328 | loff_t offset = 0; | ||
| 1329 | |||
| 1330 | /* | ||
| 1331 | * If we did a short DIO read we need to skip the section of the | ||
| 1332 | * iov that we've already read data into. | ||
| 1333 | */ | ||
| 1334 | if (count) { | ||
| 1335 | if (count > iov[seg].iov_len) { | ||
| 1336 | count -= iov[seg].iov_len; | ||
| 1337 | continue; | ||
| 1338 | } | ||
| 1339 | offset = count; | ||
| 1340 | count = 0; | ||
| 1341 | } | ||
| 1316 | 1342 | ||
| 1317 | desc.written = 0; | 1343 | desc.written = 0; |
| 1318 | desc.arg.buf = iov[seg].iov_base; | 1344 | desc.arg.buf = iov[seg].iov_base + offset; |
| 1319 | desc.count = iov[seg].iov_len; | 1345 | desc.count = iov[seg].iov_len - offset; |
| 1320 | if (desc.count == 0) | 1346 | if (desc.count == 0) |
| 1321 | continue; | 1347 | continue; |
| 1322 | desc.error = 0; | 1348 | desc.error = 0; |
