aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2011-10-31 20:52:39 -0400
committerChris Mason <chris.mason@oracle.com>2011-11-06 03:03:48 -0500
commite688b7252f784c2479d559f9f70ca8354752c5e7 (patch)
tree3934b0a9c348b2900e08e8fc9c0e6819e80d0fff /fs/btrfs
parent1eae31e918972bbeefc119d23c1d67674f49a301 (diff)
Btrfs: fix extent pinning bugs in the tree log
The tree log had two important bugs that could cause corruptions after a crash. Sometimes we were allowing tree log blocks to be reused after the tree log was committed but before the transaction commit was done. This allowed a future metadata write to overwrite the tree log data. It is fixed by adding a new variant of freeing reserved extents that always pins them. Credit goes to Stefan Behrens and Arne Jansen for many many hours spent tracking this bug down. During tree log replay, we do a pass through the tree log and pin all the extents we find. This makes sure the replay code won't go in and use any of those blocks for new allocations during replay. The problem is the free space cache isn't honoring these pinned extents. So the allocator can end up handing them out, leading to all kinds of problems during replay. The fix here is to force any free space cache to load while we pin the extents, and then to make sure we remove the pinned extents from the free space rbtree. Signed-off-by: Chris Mason <chris.mason@oracle.com> Reported-by: Stefan Behrens <sbehrens@giantdisaster.de>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/extent-tree.c51
-rw-r--r--fs/btrfs/tree-log.c11
3 files changed, 59 insertions, 8 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 227620993bc..f63c9b3f6e0 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -2156,6 +2156,9 @@ int btrfs_lookup_extent_info(struct btrfs_trans_handle *trans,
2156 u64 num_bytes, u64 *refs, u64 *flags); 2156 u64 num_bytes, u64 *refs, u64 *flags);
2157int btrfs_pin_extent(struct btrfs_root *root, 2157int btrfs_pin_extent(struct btrfs_root *root,
2158 u64 bytenr, u64 num, int reserved); 2158 u64 bytenr, u64 num, int reserved);
2159int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
2160 struct btrfs_root *root,
2161 u64 bytenr, u64 num_bytes);
2159int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 2162int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
2160 struct btrfs_root *root, 2163 struct btrfs_root *root,
2161 u64 objectid, u64 offset, u64 bytenr); 2164 u64 objectid, u64 offset, u64 bytenr);
@@ -2206,6 +2209,8 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
2206 u64 root_objectid, u64 owner, u64 offset); 2209 u64 root_objectid, u64 owner, u64 offset);
2207 2210
2208int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len); 2211int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len);
2212int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
2213 u64 start, u64 len);
2209int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans, 2214int btrfs_prepare_extent_commit(struct btrfs_trans_handle *trans,
2210 struct btrfs_root *root); 2215 struct btrfs_root *root);
2211int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, 2216int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 28c4809851a..cb7626646bb 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4344,6 +4344,34 @@ int btrfs_pin_extent(struct btrfs_root *root,
4344 return 0; 4344 return 0;
4345} 4345}
4346 4346
4347/*
4348 * this function must be called within transaction
4349 */
4350int btrfs_pin_extent_for_log_replay(struct btrfs_trans_handle *trans,
4351 struct btrfs_root *root,
4352 u64 bytenr, u64 num_bytes)
4353{
4354 struct btrfs_block_group_cache *cache;
4355
4356 cache = btrfs_lookup_block_group(root->fs_info, bytenr);
4357 BUG_ON(!cache);
4358
4359 /*
4360 * pull in the free space cache (if any) so that our pin
4361 * removes the free space from the cache. We have load_only set
4362 * to one because the slow code to read in the free extents does check
4363 * the pinned extents.
4364 */
4365 cache_block_group(cache, trans, root, 1);
4366
4367 pin_down_extent(root, cache, bytenr, num_bytes, 0);
4368
4369 /* remove us from the free space cache (if we're there at all) */
4370 btrfs_remove_free_space(cache, bytenr, num_bytes);
4371 btrfs_put_block_group(cache);
4372 return 0;
4373}
4374
4347/** 4375/**
4348 * btrfs_update_reserved_bytes - update the block_group and space info counters 4376 * btrfs_update_reserved_bytes - update the block_group and space info counters
4349 * @cache: The cache we are manipulating 4377 * @cache: The cache we are manipulating
@@ -5487,7 +5515,8 @@ again:
5487 return ret; 5515 return ret;
5488} 5516}
5489 5517
5490int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len) 5518static int __btrfs_free_reserved_extent(struct btrfs_root *root,
5519 u64 start, u64 len, int pin)
5491{ 5520{
5492 struct btrfs_block_group_cache *cache; 5521 struct btrfs_block_group_cache *cache;
5493 int ret = 0; 5522 int ret = 0;
@@ -5502,8 +5531,12 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
5502 if (btrfs_test_opt(root, DISCARD)) 5531 if (btrfs_test_opt(root, DISCARD))
5503 ret = btrfs_discard_extent(root, start, len, NULL); 5532 ret = btrfs_discard_extent(root, start, len, NULL);
5504 5533
5505 btrfs_add_free_space(cache, start, len); 5534 if (pin)
5506 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE); 5535 pin_down_extent(root, cache, start, len, 1);
5536 else {
5537 btrfs_add_free_space(cache, start, len);
5538 btrfs_update_reserved_bytes(cache, len, RESERVE_FREE);
5539 }
5507 btrfs_put_block_group(cache); 5540 btrfs_put_block_group(cache);
5508 5541
5509 trace_btrfs_reserved_extent_free(root, start, len); 5542 trace_btrfs_reserved_extent_free(root, start, len);
@@ -5511,6 +5544,18 @@ int btrfs_free_reserved_extent(struct btrfs_root *root, u64 start, u64 len)
5511 return ret; 5544 return ret;
5512} 5545}
5513 5546
5547int btrfs_free_reserved_extent(struct btrfs_root *root,
5548 u64 start, u64 len)
5549{
5550 return __btrfs_free_reserved_extent(root, start, len, 0);
5551}
5552
5553int btrfs_free_and_pin_reserved_extent(struct btrfs_root *root,
5554 u64 start, u64 len)
5555{
5556 return __btrfs_free_reserved_extent(root, start, len, 1);
5557}
5558
5514static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans, 5559static int alloc_reserved_file_extent(struct btrfs_trans_handle *trans,
5515 struct btrfs_root *root, 5560 struct btrfs_root *root,
5516 u64 parent, u64 root_objectid, 5561 u64 parent, u64 root_objectid,
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 310ab22cfe5..8ca1b6b83bd 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -276,8 +276,9 @@ static int process_one_buffer(struct btrfs_root *log,
276 struct walk_control *wc, u64 gen) 276 struct walk_control *wc, u64 gen)
277{ 277{
278 if (wc->pin) 278 if (wc->pin)
279 btrfs_pin_extent(log->fs_info->extent_root, 279 btrfs_pin_extent_for_log_replay(wc->trans,
280 eb->start, eb->len, 0); 280 log->fs_info->extent_root,
281 eb->start, eb->len);
281 282
282 if (btrfs_buffer_uptodate(eb, gen)) { 283 if (btrfs_buffer_uptodate(eb, gen)) {
283 if (wc->write) 284 if (wc->write)
@@ -1760,7 +1761,7 @@ static noinline int walk_down_log_tree(struct btrfs_trans_handle *trans,
1760 1761
1761 WARN_ON(root_owner != 1762 WARN_ON(root_owner !=
1762 BTRFS_TREE_LOG_OBJECTID); 1763 BTRFS_TREE_LOG_OBJECTID);
1763 ret = btrfs_free_reserved_extent(root, 1764 ret = btrfs_free_and_pin_reserved_extent(root,
1764 bytenr, blocksize); 1765 bytenr, blocksize);
1765 BUG_ON(ret); 1766 BUG_ON(ret);
1766 } 1767 }
@@ -1828,7 +1829,7 @@ static noinline int walk_up_log_tree(struct btrfs_trans_handle *trans,
1828 btrfs_tree_unlock(next); 1829 btrfs_tree_unlock(next);
1829 1830
1830 WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID); 1831 WARN_ON(root_owner != BTRFS_TREE_LOG_OBJECTID);
1831 ret = btrfs_free_reserved_extent(root, 1832 ret = btrfs_free_and_pin_reserved_extent(root,
1832 path->nodes[*level]->start, 1833 path->nodes[*level]->start,
1833 path->nodes[*level]->len); 1834 path->nodes[*level]->len);
1834 BUG_ON(ret); 1835 BUG_ON(ret);
@@ -1897,7 +1898,7 @@ static int walk_log_tree(struct btrfs_trans_handle *trans,
1897 1898
1898 WARN_ON(log->root_key.objectid != 1899 WARN_ON(log->root_key.objectid !=
1899 BTRFS_TREE_LOG_OBJECTID); 1900 BTRFS_TREE_LOG_OBJECTID);
1900 ret = btrfs_free_reserved_extent(log, next->start, 1901 ret = btrfs_free_and_pin_reserved_extent(log, next->start,
1901 next->len); 1902 next->len);
1902 BUG_ON(ret); 1903 BUG_ON(ret);
1903 } 1904 }