aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorOmar Sandoval <osandov@fb.com>2017-06-06 19:45:31 -0400
committerDavid Sterba <dsterba@suse.com>2017-06-29 14:17:01 -0400
commitd7eae3403f46646889a9d172476e61a7aa822cc7 (patch)
tree09ccf5334909215a4f05d2eec857b802746a85fc /fs/btrfs
parent7be07912b32d103d9789082f27dd54b47c89c744 (diff)
Btrfs: rework delayed ref total_bytes_pinned accounting
The total_bytes_pinned counter is completely broken when accounting delayed refs: - If two drops for the same extent are merged, we will decrement total_bytes_pinned twice but only increment it once. - If an add is merged into a drop or vice versa, we will decrement the total_bytes_pinned counter but never increment it. - If multiple references to an extent are dropped, we will account it multiple times, potentially vastly over-estimating the number of bytes that will be freed by a commit and doing unnecessary work when we're close to ENOSPC. The last issue is relatively minor, but the first two make the total_bytes_pinned counter leak or underflow very often. These accounting issues were introduced in b150a4f10d87 ("Btrfs: use a percpu to keep track of possibly pinned bytes"), but they were papered over by zeroing out the counter on every commit until d288db5dc011 ("Btrfs: fix race of using total_bytes_pinned"). We need to make sure that an extent is accounted as pinned exactly once if and only if we will drop references to it when when the transaction is committed. Ideally we would only add to total_bytes_pinned when the *last* reference is dropped, but this information isn't readily available for data extents. Again, this over-estimation can lead to extra commits when we're close to ENOSPC, but it's not as bad as before. The fix implemented here is to increment total_bytes_pinned when the total refmod count for an extent goes negative and decrement it if the refmod count goes back to non-negative or after we've run all of the delayed refs for that extent. Signed-off-by: Omar Sandoval <osandov@fb.com> Tested-by: Holger Hoffstätte <holger@applied-asynchrony.com> Reviewed-by: Liu Bo <bo.li.liu@oracle.com> Signed-off-by: David Sterba <dsterba@suse.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/extent-tree.c41
1 files changed, 32 insertions, 9 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 8121a78f6cbd..f2a6a59da20a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2113,6 +2113,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2113 u64 bytenr, u64 num_bytes, u64 parent, 2113 u64 bytenr, u64 num_bytes, u64 parent,
2114 u64 root_objectid, u64 owner, u64 offset) 2114 u64 root_objectid, u64 owner, u64 offset)
2115{ 2115{
2116 int old_ref_mod, new_ref_mod;
2116 int ret; 2117 int ret;
2117 2118
2118 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID && 2119 BUG_ON(owner < BTRFS_FIRST_FREE_OBJECTID &&
@@ -2123,14 +2124,18 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
2123 num_bytes, parent, 2124 num_bytes, parent,
2124 root_objectid, (int)owner, 2125 root_objectid, (int)owner,
2125 BTRFS_ADD_DELAYED_REF, NULL, 2126 BTRFS_ADD_DELAYED_REF, NULL,
2126 NULL, NULL); 2127 &old_ref_mod, &new_ref_mod);
2127 } else { 2128 } else {
2128 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, 2129 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
2129 num_bytes, parent, 2130 num_bytes, parent,
2130 root_objectid, owner, offset, 2131 root_objectid, owner, offset,
2131 0, BTRFS_ADD_DELAYED_REF, NULL, 2132 0, BTRFS_ADD_DELAYED_REF,
2132 NULL); 2133 &old_ref_mod, &new_ref_mod);
2133 } 2134 }
2135
2136 if (ret == 0 && old_ref_mod < 0 && new_ref_mod >= 0)
2137 add_pinned_bytes(fs_info, -num_bytes, owner, root_objectid);
2138
2134 return ret; 2139 return ret;
2135} 2140}
2136 2141
@@ -2434,6 +2439,16 @@ static int run_one_delayed_ref(struct btrfs_trans_handle *trans,
2434 head = btrfs_delayed_node_to_head(node); 2439 head = btrfs_delayed_node_to_head(node);
2435 trace_run_delayed_ref_head(fs_info, node, head, node->action); 2440 trace_run_delayed_ref_head(fs_info, node, head, node->action);
2436 2441
2442 if (head->total_ref_mod < 0) {
2443 struct btrfs_block_group_cache *cache;
2444
2445 cache = btrfs_lookup_block_group(fs_info, node->bytenr);
2446 ASSERT(cache);
2447 percpu_counter_add(&cache->space_info->total_bytes_pinned,
2448 -node->num_bytes);
2449 btrfs_put_block_group(cache);
2450 }
2451
2437 if (insert_reserved) { 2452 if (insert_reserved) {
2438 btrfs_pin_extent(fs_info, node->bytenr, 2453 btrfs_pin_extent(fs_info, node->bytenr,
2439 node->num_bytes, 1); 2454 node->num_bytes, 1);
@@ -6284,6 +6299,8 @@ static int update_block_group(struct btrfs_trans_handle *trans,
6284 trace_btrfs_space_reservation(info, "pinned", 6299 trace_btrfs_space_reservation(info, "pinned",
6285 cache->space_info->flags, 6300 cache->space_info->flags,
6286 num_bytes, 1); 6301 num_bytes, 1);
6302 percpu_counter_add(&cache->space_info->total_bytes_pinned,
6303 num_bytes);
6287 set_extent_dirty(info->pinned_extents, 6304 set_extent_dirty(info->pinned_extents,
6288 bytenr, bytenr + num_bytes - 1, 6305 bytenr, bytenr + num_bytes - 1,
6289 GFP_NOFS | __GFP_NOFAIL); 6306 GFP_NOFS | __GFP_NOFAIL);
@@ -7053,8 +7070,6 @@ static int __btrfs_free_extent(struct btrfs_trans_handle *trans,
7053 goto out; 7070 goto out;
7054 } 7071 }
7055 } 7072 }
7056 add_pinned_bytes(info, -num_bytes, owner_objectid,
7057 root_objectid);
7058 } else { 7073 } else {
7059 if (found_extent) { 7074 if (found_extent) {
7060 BUG_ON(is_data && refs_to_drop != 7075 BUG_ON(is_data && refs_to_drop !=
@@ -7186,13 +7201,16 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
7186 int ret; 7201 int ret;
7187 7202
7188 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) { 7203 if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
7204 int old_ref_mod, new_ref_mod;
7205
7189 ret = btrfs_add_delayed_tree_ref(fs_info, trans, buf->start, 7206 ret = btrfs_add_delayed_tree_ref(fs_info, trans, buf->start,
7190 buf->len, parent, 7207 buf->len, parent,
7191 root->root_key.objectid, 7208 root->root_key.objectid,
7192 btrfs_header_level(buf), 7209 btrfs_header_level(buf),
7193 BTRFS_DROP_DELAYED_REF, NULL, 7210 BTRFS_DROP_DELAYED_REF, NULL,
7194 NULL, NULL); 7211 &old_ref_mod, &new_ref_mod);
7195 BUG_ON(ret); /* -ENOMEM */ 7212 BUG_ON(ret); /* -ENOMEM */
7213 pin = old_ref_mod >= 0 && new_ref_mod < 0;
7196 } 7214 }
7197 7215
7198 if (last_ref && btrfs_header_generation(buf) == trans->transid) { 7216 if (last_ref && btrfs_header_generation(buf) == trans->transid) {
@@ -7241,12 +7259,12 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
7241 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid, 7259 u64 bytenr, u64 num_bytes, u64 parent, u64 root_objectid,
7242 u64 owner, u64 offset) 7260 u64 owner, u64 offset)
7243{ 7261{
7262 int old_ref_mod, new_ref_mod;
7244 int ret; 7263 int ret;
7245 7264
7246 if (btrfs_is_testing(fs_info)) 7265 if (btrfs_is_testing(fs_info))
7247 return 0; 7266 return 0;
7248 7267
7249 add_pinned_bytes(fs_info, num_bytes, owner, root_objectid);
7250 7268
7251 /* 7269 /*
7252 * tree log blocks never actually go into the extent allocation 7270 * tree log blocks never actually go into the extent allocation
@@ -7256,20 +7274,25 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
7256 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID); 7274 WARN_ON(owner >= BTRFS_FIRST_FREE_OBJECTID);
7257 /* unlocks the pinned mutex */ 7275 /* unlocks the pinned mutex */
7258 btrfs_pin_extent(fs_info, bytenr, num_bytes, 1); 7276 btrfs_pin_extent(fs_info, bytenr, num_bytes, 1);
7277 old_ref_mod = new_ref_mod = 0;
7259 ret = 0; 7278 ret = 0;
7260 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) { 7279 } else if (owner < BTRFS_FIRST_FREE_OBJECTID) {
7261 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr, 7280 ret = btrfs_add_delayed_tree_ref(fs_info, trans, bytenr,
7262 num_bytes, parent, 7281 num_bytes, parent,
7263 root_objectid, (int)owner, 7282 root_objectid, (int)owner,
7264 BTRFS_DROP_DELAYED_REF, NULL, 7283 BTRFS_DROP_DELAYED_REF, NULL,
7265 NULL, NULL); 7284 &old_ref_mod, &new_ref_mod);
7266 } else { 7285 } else {
7267 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr, 7286 ret = btrfs_add_delayed_data_ref(fs_info, trans, bytenr,
7268 num_bytes, parent, 7287 num_bytes, parent,
7269 root_objectid, owner, offset, 7288 root_objectid, owner, offset,
7270 0, BTRFS_DROP_DELAYED_REF, 7289 0, BTRFS_DROP_DELAYED_REF,
7271 NULL, NULL); 7290 &old_ref_mod, &new_ref_mod);
7272 } 7291 }
7292
7293 if (ret == 0 && old_ref_mod >= 0 && new_ref_mod < 0)
7294 add_pinned_bytes(fs_info, num_bytes, owner, root_objectid);
7295
7273 return ret; 7296 return ret;
7274} 7297}
7275 7298