From 5349d6c3ffead27d693fdac21270541fa95ef33d Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 19 Jun 2014 10:42:49 +0800 Subject: Btrfs: make free space cache write out functions more readable This patch makes the free space cache write out functions more readable, and beisdes that, it also reduces the stack space that the function -- __btrfs_write_out_cache uses from 194bytes to 144bytes. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/free-space-cache.c | 159 ++++++++++++++++++++++++++------------------ 1 file changed, 93 insertions(+), 66 deletions(-) (limited to 'fs/btrfs/free-space-cache.c') diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 372b05ff1943..a852e15173e5 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -274,18 +274,32 @@ struct io_ctl { }; static int io_ctl_init(struct io_ctl *io_ctl, struct inode *inode, - struct btrfs_root *root) + struct btrfs_root *root, int write) { + int num_pages; + int check_crcs = 0; + + num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> + PAGE_CACHE_SHIFT; + + if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID) + check_crcs = 1; + + /* Make sure we can fit our crcs into the first page */ + if (write && check_crcs && + (num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) + return -ENOSPC; + memset(io_ctl, 0, sizeof(struct io_ctl)); - io_ctl->num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT; - io_ctl->pages = kzalloc(sizeof(struct page *) * io_ctl->num_pages, - GFP_NOFS); + + io_ctl->pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); if (!io_ctl->pages) return -ENOMEM; + + io_ctl->num_pages = num_pages; io_ctl->root = root; - if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID) - io_ctl->check_crcs = 1; + io_ctl->check_crcs = check_crcs; + return 0; } @@ -677,7 +691,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, if (!num_entries) return 0; - ret = io_ctl_init(&io_ctl, inode, root); + ret = io_ctl_init(&io_ctl, inode, root, 0); if (ret) return ret; @@ -957,19 +971,18 @@ fail: } static noinline_for_stack int -add_ioctl_entries(struct btrfs_root *root, - struct inode *inode, - struct btrfs_block_group_cache *block_group, - struct io_ctl *io_ctl, - struct extent_state **cached_state, - struct list_head *bitmap_list, - int *entries) +write_pinned_extent_entries(struct btrfs_root *root, + struct btrfs_block_group_cache *block_group, + struct io_ctl *io_ctl, + int *entries) { u64 start, extent_start, extent_end, len; - struct list_head *pos, *n; struct extent_io_tree *unpin = NULL; int ret; + if (!block_group) + return 0; + /* * We want to add any pinned extents to our free space cache * so we don't leak the space @@ -979,23 +992,19 @@ add_ioctl_entries(struct btrfs_root *root, */ unpin = root->fs_info->pinned_extents; - if (block_group) - start = block_group->key.objectid; + start = block_group->key.objectid; - while (block_group && (start < block_group->key.objectid + - block_group->key.offset)) { + while (start < block_group->key.objectid + block_group->key.offset) { ret = find_first_extent_bit(unpin, start, &extent_start, &extent_end, EXTENT_DIRTY, NULL); - if (ret) { - ret = 0; - break; - } + if (ret) + return 0; /* This pinned extent is out of our range */ if (extent_start >= block_group->key.objectid + block_group->key.offset) - break; + return 0; extent_start = max(extent_start, start); extent_end = min(block_group->key.objectid + @@ -1005,11 +1014,20 @@ add_ioctl_entries(struct btrfs_root *root, *entries += 1; ret = io_ctl_add_entry(io_ctl, extent_start, len, NULL); if (ret) - goto out_nospc; + return -ENOSPC; start = extent_end; } + return 0; +} + +static noinline_for_stack int +write_bitmap_entries(struct io_ctl *io_ctl, struct list_head *bitmap_list) +{ + struct list_head *pos, *n; + int ret; + /* Write out the bitmaps */ list_for_each_safe(pos, n, bitmap_list) { struct btrfs_free_space *entry = @@ -1017,36 +1035,24 @@ add_ioctl_entries(struct btrfs_root *root, ret = io_ctl_add_bitmap(io_ctl, entry->bitmap); if (ret) - goto out_nospc; + return -ENOSPC; list_del_init(&entry->list); } - /* Zero out the rest of the pages just to make sure */ - io_ctl_zero_remaining_pages(io_ctl); - - ret = btrfs_dirty_pages(root, inode, io_ctl->pages, io_ctl->num_pages, - 0, i_size_read(inode), cached_state); - io_ctl_drop_pages(io_ctl); - unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, - i_size_read(inode) - 1, cached_state, GFP_NOFS); + return 0; +} - if (ret) - goto fail; +static int flush_dirty_cache(struct inode *inode) +{ + int ret; ret = btrfs_wait_ordered_range(inode, 0, (u64)-1); - if (ret) { + if (ret) clear_extent_bit(&BTRFS_I(inode)->io_tree, 0, inode->i_size - 1, EXTENT_DIRTY | EXTENT_DELALLOC, 0, 0, NULL, GFP_NOFS); - goto fail; - } - return 0; -fail: - return -1; - -out_nospc: - return -ENOSPC; + return ret; } static void noinline_for_stack @@ -1056,6 +1062,7 @@ cleanup_write_cache_enospc(struct inode *inode, struct list_head *bitmap_list) { struct list_head *pos, *n; + list_for_each_safe(pos, n, bitmap_list) { struct btrfs_free_space *entry = list_entry(pos, struct btrfs_free_space, list); @@ -1088,18 +1095,15 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, { struct extent_state *cached_state = NULL; struct io_ctl io_ctl; - struct list_head bitmap_list; + LIST_HEAD(bitmap_list); int entries = 0; int bitmaps = 0; int ret; - int err = -1; - - INIT_LIST_HEAD(&bitmap_list); if (!i_size_read(inode)) return -1; - ret = io_ctl_init(&io_ctl, inode, root); + ret = io_ctl_init(&io_ctl, inode, root, 1); if (ret) return -1; @@ -1109,42 +1113,65 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 0, &cached_state); - - /* Make sure we can fit our crcs into the first page */ - if (io_ctl.check_crcs && - (io_ctl.num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE) - goto out_nospc; - io_ctl_set_generation(&io_ctl, trans->transid); + /* Write out the extent entries in the free space cache */ ret = write_cache_extent_entries(&io_ctl, ctl, block_group, &entries, &bitmaps, &bitmap_list); if (ret) goto out_nospc; - ret = add_ioctl_entries(root, inode, block_group, &io_ctl, - &cached_state, &bitmap_list, &entries); + /* + * Some spaces that are freed in the current transaction are pinned, + * they will be added into free space cache after the transaction is + * committed, we shouldn't lose them. + */ + ret = write_pinned_extent_entries(root, block_group, &io_ctl, &entries); + if (ret) + goto out_nospc; + + /* At last, we write out all the bitmaps. */ + ret = write_bitmap_entries(&io_ctl, &bitmap_list); + if (ret) + goto out_nospc; - if (ret == -ENOSPC) + /* Zero out the rest of the pages just to make sure */ + io_ctl_zero_remaining_pages(&io_ctl); + + /* Everything is written out, now we dirty the pages in the file. */ + ret = btrfs_dirty_pages(root, inode, io_ctl.pages, io_ctl.num_pages, + 0, i_size_read(inode), &cached_state); + if (ret) goto out_nospc; - else if (ret) + + /* + * Release the pages and unlock the extent, we will flush + * them out later + */ + io_ctl_drop_pages(&io_ctl); + + unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, + i_size_read(inode) - 1, &cached_state, GFP_NOFS); + + /* Flush the dirty pages in the cache file. */ + ret = flush_dirty_cache(inode); + if (ret) goto out; - err = update_cache_item(trans, root, inode, path, offset, + /* Update the cache item to tell everyone this cache file is valid. */ + ret = update_cache_item(trans, root, inode, path, offset, entries, bitmaps); - out: io_ctl_free(&io_ctl); - if (err) { + if (ret) { invalidate_inode_pages2(inode->i_mapping); BTRFS_I(inode)->generation = 0; } btrfs_update_inode(trans, root, inode); - return err; + return ret; out_nospc: - cleanup_write_cache_enospc(inode, &io_ctl, &cached_state, &bitmap_list); goto out; } -- cgit v1.2.2 From e570fd27f2c5d7eac3876bccf99e9838d7f911a3 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 19 Jun 2014 10:42:50 +0800 Subject: Btrfs: fix broken free space cache after the system crashed When we mounted the filesystem after the crash, we got the following message: BTRFS error (device xxx): block group xxxx has wrong amount of free space BTRFS error (device xxx): failed to load free space cache for block group xxx It is because we didn't update the metadata of the allocated space (in extent tree) until the file data was written into the disk. During this time, there was no information about the allocated spaces in either the extent tree nor the free space cache. when we wrote out the free space cache at this time (commit transaction), those spaces were lost. In fact, only the free space that is used to store the file data had this problem, the others didn't because the metadata of them is updated in the same transaction context. There are many methods which can fix the above problem - track the allocated space, and write it out when we write out the free space cache - account the size of the allocated space that is used to store the file data, if the size is not zero, don't write out the free space cache. The first one is complex and may make the performance drop down. This patch chose the second method, we use a per-block-group variant to account the size of that allocated space. Besides that, we also introduce a per-block-group read-write semaphore to avoid the race between the allocation and the free space cache write out. Signed-off-by: Miao Xie Signed-off-by: Chris Mason --- fs/btrfs/free-space-cache.c | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'fs/btrfs/free-space-cache.c') diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index a852e15173e5..2b0a627cb5f9 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -680,6 +680,13 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode, generation = btrfs_free_space_generation(leaf, header); btrfs_release_path(path); + if (!BTRFS_I(inode)->generation) { + btrfs_info(root->fs_info, + "The free space cache file (%llu) is invalid. skip it\n", + offset); + return 0; + } + if (BTRFS_I(inode)->generation != generation) { btrfs_err(root->fs_info, "free space inode generation (%llu) " @@ -1107,6 +1114,20 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, if (ret) return -1; + if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) { + down_write(&block_group->data_rwsem); + spin_lock(&block_group->lock); + if (block_group->delalloc_bytes) { + block_group->disk_cache_state = BTRFS_DC_WRITTEN; + spin_unlock(&block_group->lock); + up_write(&block_group->data_rwsem); + BTRFS_I(inode)->generation = 0; + ret = 0; + goto out; + } + spin_unlock(&block_group->lock); + } + /* Lock all pages first so we can lock the extent safely. */ io_ctl_prepare_pages(&io_ctl, inode, 0); @@ -1145,6 +1166,8 @@ static int __btrfs_write_out_cache(struct btrfs_root *root, struct inode *inode, if (ret) goto out_nospc; + if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) + up_write(&block_group->data_rwsem); /* * Release the pages and unlock the extent, we will flush * them out later @@ -1173,6 +1196,10 @@ out: out_nospc: cleanup_write_cache_enospc(inode, &io_ctl, &cached_state, &bitmap_list); + + if (block_group && (block_group->flags & BTRFS_BLOCK_GROUP_DATA)) + up_write(&block_group->data_rwsem); + goto out; } @@ -1192,6 +1219,12 @@ int btrfs_write_out_cache(struct btrfs_root *root, spin_unlock(&block_group->lock); return 0; } + + if (block_group->delalloc_bytes) { + block_group->disk_cache_state = BTRFS_DC_WRITTEN; + spin_unlock(&block_group->lock); + return 0; + } spin_unlock(&block_group->lock); inode = lookup_free_space_inode(root, block_group, path); -- cgit v1.2.2