From c581afc8db4e9aaa8af2246bb72c1bf72825014d Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 6 Feb 2014 16:06:06 -0500 Subject: Btrfs: balance delayed inode updates While trying to reproduce a delayed ref problem I noticed the box kept falling over using all 80gb of my ram with btrfs_inode's and btrfs_delayed_node's. Turns out this is because we only throttle delayed inode updates in btrfs_dirty_inode, which doesn't actually get called that often, especially when all you are doing is creating a bunch of files. So balance delayed inode updates everytime we create a new inode. With this patch we no longer use up all of our ram with delayed inode updates. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 4ffb6d79f9f0..a7e6690e0946 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -5795,6 +5795,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry, } out_unlock: btrfs_end_transaction(trans, root); + btrfs_balance_delayed_items(root); btrfs_btree_balance_dirty(root); if (drop_inode) { inode_dec_link_count(inode); @@ -5868,6 +5869,7 @@ out_unlock: inode_dec_link_count(inode); iput(inode); } + btrfs_balance_delayed_items(root); btrfs_btree_balance_dirty(root); return err; } @@ -5926,6 +5928,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir, } btrfs_end_transaction(trans, root); + btrfs_balance_delayed_items(root); fail: if (drop_inode) { inode_dec_link_count(inode); @@ -5992,6 +5995,7 @@ out_fail: btrfs_end_transaction(trans, root); if (drop_on_err) iput(inode); + btrfs_balance_delayed_items(root); btrfs_btree_balance_dirty(root); return err; } -- cgit v1.2.2 From 29bce2f3997a8dc5195b7a7724362d1e55df7bb2 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 7 Feb 2014 12:21:23 -0500 Subject: Btrfs: unlock extent and pages on error in cow_file_range When I converted the BUG_ON() for the free_space_cache_inode in cow_file_range I made it so we just return an error instead of unlocking all of our various stuff. This is a mistake and causes us to hang when we run into this. This patch fixes this problem. Thanks, Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a7e6690e0946..5b8925003090 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -864,7 +864,8 @@ static noinline int cow_file_range(struct inode *inode, if (btrfs_is_free_space_inode(inode)) { WARN_ON_ONCE(1); - return -EINVAL; + ret = -EINVAL; + goto out_unlock; } num_bytes = ALIGN(end - start + 1, blocksize); -- cgit v1.2.2 From bcbba5e6593281adc234938b42d3c3d3570335db Mon Sep 17 00:00:00 2001 From: Wang Shilong Date: Sat, 8 Feb 2014 23:46:35 +0800 Subject: Btrfs: skip readonly root for snapshot-aware defragment Btrfs send is assuming readonly root won't change, let's skip readonly root. Signed-off-by: Wang Shilong Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 5b8925003090..b88f6221b48b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2240,6 +2240,11 @@ static noinline int relink_extent_backref(struct btrfs_path *path, return PTR_ERR(root); } + if (btrfs_root_readonly(root)) { + srcu_read_unlock(&fs_info->subvol_srcu, index); + return 0; + } + /* step 2: get inode */ key.objectid = backref->inum; key.type = BTRFS_INODE_ITEM_KEY; -- cgit v1.2.2 From 7813b3db0a9ec77ff1f4b3ee3fb4925848395d59 Mon Sep 17 00:00:00 2001 From: Liu Bo Date: Mon, 10 Feb 2014 17:37:25 +0800 Subject: Btrfs: avoid warning bomb of btrfs_invalidate_inodes So after transaction is aborted, we need to cleanup inode resources by calling btrfs_invalidate_inodes(), and btrfs_invalidate_inodes() hopes roots' refs to be zero in old times and sets a WARN_ON(), however, this is not always true within cleaning up transaction, so we get to detect transaction abortion and not warn at all. Signed-off-by: Liu Bo Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b88f6221b48b..8dba152883d3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4926,7 +4926,8 @@ void btrfs_invalidate_inodes(struct btrfs_root *root) struct inode *inode; u64 objectid = 0; - WARN_ON(btrfs_root_refs(&root->root_item) != 0); + if (!test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) + WARN_ON(btrfs_root_refs(&root->root_item) != 0); spin_lock(&root->inode_lock); again: -- cgit v1.2.2 From 7b2b70851f862b68714f357d2926adbb6c574fdd Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 27 Feb 2014 13:58:05 +0800 Subject: Btrfs: fix preallocate vs double nocow write We can not release the reserved metadata space for the first write if we find the write position is pre-allocated. Because the kernel might write the data on the disk before we do the second write but after the can-nocow check, if we release the space for the first write, we might fail to update the metadata because of no space. Fix this problem by end nocow write if there is dirty data in the range whose space is pre-allocated. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 8dba152883d3..0182f081d499 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6557,6 +6557,7 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, int ret; struct extent_buffer *leaf; struct btrfs_root *root = BTRFS_I(inode)->root; + struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; struct btrfs_file_extent_item *fi; struct btrfs_key key; u64 disk_bytenr; @@ -6633,6 +6634,20 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, if (btrfs_extent_readonly(root, disk_bytenr)) goto out; + + num_bytes = min(offset + *len, extent_end) - offset; + if (!nocow && found_type == BTRFS_FILE_EXTENT_PREALLOC) { + u64 range_end; + + range_end = round_up(offset + num_bytes, root->sectorsize) - 1; + ret = test_range_bit(io_tree, offset, range_end, + EXTENT_DELALLOC, 0, NULL); + if (ret) { + ret = -EAGAIN; + goto out; + } + } + btrfs_release_path(path); /* @@ -6661,7 +6676,6 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, */ disk_bytenr += backref_offset; disk_bytenr += offset - key.offset; - num_bytes = min(offset + *len, extent_end) - offset; if (csum_exist_in_range(root, disk_bytenr, num_bytes)) goto out; /* -- cgit v1.2.2 From afe3d24267926eb78ba863016bdd65cfe718aef5 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 28 Feb 2014 10:46:07 +0800 Subject: btrfs: Replace fs_info->delalloc_workers with btrfs_workqueue Much like the fs_info->workers, replace the fs_info->delalloc_workers use the same btrfs_workqueue. Signed-off-by: Qu Wenruo Tested-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0182f081d499..a41a5a7aa3cb 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -324,7 +324,7 @@ struct async_cow { u64 start; u64 end; struct list_head extents; - struct btrfs_work work; + struct btrfs_work_struct work; }; static noinline int add_async_extent(struct async_cow *cow, @@ -1000,7 +1000,7 @@ out_unlock: /* * work queue call back to started compression on a file and pages */ -static noinline void async_cow_start(struct btrfs_work *work) +static noinline void async_cow_start(struct btrfs_work_struct *work) { struct async_cow *async_cow; int num_added = 0; @@ -1018,7 +1018,7 @@ static noinline void async_cow_start(struct btrfs_work *work) /* * work queue call back to submit previously compressed pages */ -static noinline void async_cow_submit(struct btrfs_work *work) +static noinline void async_cow_submit(struct btrfs_work_struct *work) { struct async_cow *async_cow; struct btrfs_root *root; @@ -1039,7 +1039,7 @@ static noinline void async_cow_submit(struct btrfs_work *work) submit_compressed_extents(async_cow->inode, async_cow); } -static noinline void async_cow_free(struct btrfs_work *work) +static noinline void async_cow_free(struct btrfs_work_struct *work) { struct async_cow *async_cow; async_cow = container_of(work, struct async_cow, work); @@ -1076,17 +1076,15 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page, async_cow->end = cur_end; INIT_LIST_HEAD(&async_cow->extents); - async_cow->work.func = async_cow_start; - async_cow->work.ordered_func = async_cow_submit; - async_cow->work.ordered_free = async_cow_free; - async_cow->work.flags = 0; + btrfs_init_work(&async_cow->work, async_cow_start, + async_cow_submit, async_cow_free); nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >> PAGE_CACHE_SHIFT; atomic_add(nr_pages, &root->fs_info->async_delalloc_pages); - btrfs_queue_worker(&root->fs_info->delalloc_workers, - &async_cow->work); + btrfs_queue_work(root->fs_info->delalloc_workers, + &async_cow->work); if (atomic_read(&root->fs_info->async_delalloc_pages) > limit) { wait_event(root->fs_info->async_submit_wait, -- cgit v1.2.2 From a44903abe9dc23ffa305898368a7a910dbae13c5 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 28 Feb 2014 10:46:09 +0800 Subject: btrfs: Replace fs_info->flush_workers with btrfs_workqueue. Replace the fs_info->submit_workers with the newly created btrfs_workqueue. Signed-off-by: Qu Wenruo Tested-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index a41a5a7aa3cb..6c043bed0c32 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8386,7 +8386,7 @@ out_notrans: return ret; } -static void btrfs_run_delalloc_work(struct btrfs_work *work) +static void btrfs_run_delalloc_work(struct btrfs_work_struct *work) { struct btrfs_delalloc_work *delalloc_work; struct inode *inode; @@ -8424,7 +8424,7 @@ struct btrfs_delalloc_work *btrfs_alloc_delalloc_work(struct inode *inode, work->inode = inode; work->wait = wait; work->delay_iput = delay_iput; - work->work.func = btrfs_run_delalloc_work; + btrfs_init_work(&work->work, btrfs_run_delalloc_work, NULL, NULL); return work; } @@ -8476,8 +8476,8 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput) goto out; } list_add_tail(&work->list, &works); - btrfs_queue_worker(&root->fs_info->flush_workers, - &work->work); + btrfs_queue_work(root->fs_info->flush_workers, + &work->work); cond_resched(); spin_lock(&root->delalloc_lock); -- cgit v1.2.2 From fccb5d86d8f52161e013025ccf3101d8fab99a32 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 28 Feb 2014 10:46:10 +0800 Subject: btrfs: Replace fs_info->endio_* workqueue with btrfs_workqueue. Replace the fs_info->endio_* workqueues with the newly created btrfs_workqueue. Signed-off-by: Qu Wenruo Tested-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 6c043bed0c32..ce3f73046605 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2750,7 +2750,7 @@ out: return ret; } -static void finish_ordered_fn(struct btrfs_work *work) +static void finish_ordered_fn(struct btrfs_work_struct *work) { struct btrfs_ordered_extent *ordered_extent; ordered_extent = container_of(work, struct btrfs_ordered_extent, work); @@ -2763,7 +2763,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, struct inode *inode = page->mapping->host; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ordered_extent *ordered_extent = NULL; - struct btrfs_workers *workers; + struct btrfs_workqueue_struct *workers; trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); @@ -2772,14 +2772,13 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, end - start + 1, uptodate)) return 0; - ordered_extent->work.func = finish_ordered_fn; - ordered_extent->work.flags = 0; + btrfs_init_work(&ordered_extent->work, finish_ordered_fn, NULL, NULL); if (btrfs_is_free_space_inode(inode)) - workers = &root->fs_info->endio_freespace_worker; + workers = root->fs_info->endio_freespace_worker; else - workers = &root->fs_info->endio_write_workers; - btrfs_queue_worker(workers, &ordered_extent->work); + workers = root->fs_info->endio_write_workers; + btrfs_queue_work(workers, &ordered_extent->work); return 0; } @@ -7046,10 +7045,9 @@ again: if (!ret) goto out_test; - ordered->work.func = finish_ordered_fn; - ordered->work.flags = 0; - btrfs_queue_worker(&root->fs_info->endio_write_workers, - &ordered->work); + btrfs_init_work(&ordered->work, finish_ordered_fn, NULL, NULL); + btrfs_queue_work(root->fs_info->endio_write_workers, + &ordered->work); out_test: /* * our bio might span multiple ordered extents. If we haven't -- cgit v1.2.2 From dc6e320998fb907e4c19032d545d461bfe5040d1 Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 28 Feb 2014 10:46:14 +0800 Subject: btrfs: Replace fs_info->fixup_workers workqueue with btrfs_workqueue. Replace the fs_info->fixup_workers with the newly created btrfs_workqueue. Signed-off-by: Qu Wenruo Tested-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ce3f73046605..0885f333574d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1748,10 +1748,10 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, /* see btrfs_writepage_start_hook for details on why this is required */ struct btrfs_writepage_fixup { struct page *page; - struct btrfs_work work; + struct btrfs_work_struct work; }; -static void btrfs_writepage_fixup_worker(struct btrfs_work *work) +static void btrfs_writepage_fixup_worker(struct btrfs_work_struct *work) { struct btrfs_writepage_fixup *fixup; struct btrfs_ordered_extent *ordered; @@ -1842,9 +1842,9 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end) SetPageChecked(page); page_cache_get(page); - fixup->work.func = btrfs_writepage_fixup_worker; + btrfs_init_work(&fixup->work, btrfs_writepage_fixup_worker, NULL, NULL); fixup->page = page; - btrfs_queue_worker(&root->fs_info->fixup_workers, &fixup->work); + btrfs_queue_work(root->fs_info->fixup_workers, &fixup->work); return -EBUSY; } -- cgit v1.2.2 From d458b0540ebd728b4d6ef47cc5ef0dbfd4dd361a Mon Sep 17 00:00:00 2001 From: Qu Wenruo Date: Fri, 28 Feb 2014 10:46:19 +0800 Subject: btrfs: Cleanup the "_struct" suffix in btrfs_workequeue Since the "_struct" suffix is mainly used for distinguish the differnt btrfs_work between the original and the newly created one, there is no need using the suffix since all btrfs_workers are changed into btrfs_workqueue. Also this patch fixed some codes whose code style is changed due to the too long "_struct" suffix. Signed-off-by: Qu Wenruo Tested-by: David Sterba Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 0885f333574d..53697a80b849 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -324,7 +324,7 @@ struct async_cow { u64 start; u64 end; struct list_head extents; - struct btrfs_work_struct work; + struct btrfs_work work; }; static noinline int add_async_extent(struct async_cow *cow, @@ -1000,7 +1000,7 @@ out_unlock: /* * work queue call back to started compression on a file and pages */ -static noinline void async_cow_start(struct btrfs_work_struct *work) +static noinline void async_cow_start(struct btrfs_work *work) { struct async_cow *async_cow; int num_added = 0; @@ -1018,7 +1018,7 @@ static noinline void async_cow_start(struct btrfs_work_struct *work) /* * work queue call back to submit previously compressed pages */ -static noinline void async_cow_submit(struct btrfs_work_struct *work) +static noinline void async_cow_submit(struct btrfs_work *work) { struct async_cow *async_cow; struct btrfs_root *root; @@ -1039,7 +1039,7 @@ static noinline void async_cow_submit(struct btrfs_work_struct *work) submit_compressed_extents(async_cow->inode, async_cow); } -static noinline void async_cow_free(struct btrfs_work_struct *work) +static noinline void async_cow_free(struct btrfs_work *work) { struct async_cow *async_cow; async_cow = container_of(work, struct async_cow, work); @@ -1748,10 +1748,10 @@ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end, /* see btrfs_writepage_start_hook for details on why this is required */ struct btrfs_writepage_fixup { struct page *page; - struct btrfs_work_struct work; + struct btrfs_work work; }; -static void btrfs_writepage_fixup_worker(struct btrfs_work_struct *work) +static void btrfs_writepage_fixup_worker(struct btrfs_work *work) { struct btrfs_writepage_fixup *fixup; struct btrfs_ordered_extent *ordered; @@ -2750,7 +2750,7 @@ out: return ret; } -static void finish_ordered_fn(struct btrfs_work_struct *work) +static void finish_ordered_fn(struct btrfs_work *work) { struct btrfs_ordered_extent *ordered_extent; ordered_extent = container_of(work, struct btrfs_ordered_extent, work); @@ -2763,7 +2763,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, struct inode *inode = page->mapping->host; struct btrfs_root *root = BTRFS_I(inode)->root; struct btrfs_ordered_extent *ordered_extent = NULL; - struct btrfs_workqueue_struct *workers; + struct btrfs_workqueue *workers; trace_btrfs_writepage_end_io_hook(page, start, end, uptodate); @@ -8384,7 +8384,7 @@ out_notrans: return ret; } -static void btrfs_run_delalloc_work(struct btrfs_work_struct *work) +static void btrfs_run_delalloc_work(struct btrfs_work *work) { struct btrfs_delalloc_work *delalloc_work; struct inode *inode; -- cgit v1.2.2 From 41bd9ca459a007cc5588563bb08de9677c8d23fd Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 6 Mar 2014 13:54:57 +0800 Subject: Btrfs: just do dirty page flush for the inode with compression before direct IO As the comment in the btrfs_direct_IO says, only the compressed pages need be flush again to make sure they are on the disk, but the common pages needn't, so we add a if statement to check if the inode has compressed pages or not, if no, skip the flush. And in order to prevent the write ranges from intersecting, we need wait for the running ordered extents. But the current code waits for them twice, one is done before the direct IO starts (in btrfs_wait_ordered_range()), the other is before we get the blocks, it is unnecessary. because we can do the direct IO without holding i_mutex, it means that the intersected ordered extents may happen during the direct IO, the first wait can not avoid this problem. So we use filemap_fdatawrite_range() instead of btrfs_wait_ordered_range() to remove the first wait. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 53697a80b849..f5e623371bf3 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -7422,15 +7422,15 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, smp_mb__after_atomic_inc(); /* - * The generic stuff only does filemap_write_and_wait_range, which isn't - * enough if we've written compressed pages to this area, so we need to - * call btrfs_wait_ordered_range to make absolutely sure that any - * outstanding dirty pages are on disk. + * The generic stuff only does filemap_write_and_wait_range, which + * isn't enough if we've written compressed pages to this area, so + * we need to flush the dirty pages again to make absolutely sure + * that any outstanding dirty pages are on disk. */ count = iov_length(iov, nr_segs); - ret = btrfs_wait_ordered_range(inode, offset, count); - if (ret) - return ret; + if (test_bit(BTRFS_INODE_HAS_ASYNC_EXTENT, + &BTRFS_I(inode)->runtime_flags)) + filemap_fdatawrite_range(inode->i_mapping, offset, count); if (rw & WRITE) { /* -- cgit v1.2.2 From 6c255e67cec1c38a0569c7f823eba63f9449ccf8 Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 6 Mar 2014 13:55:01 +0800 Subject: Btrfs: don't flush all delalloc inodes when we doesn't get s_umount lock We needn't flush all delalloc inodes when we doesn't get s_umount lock, or we would make the tasks wait for a long time. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 34 +++++++++++++++++++--------------- 1 file changed, 19 insertions(+), 15 deletions(-) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index f5e623371bf3..fbaf1ac3941b 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8437,7 +8437,8 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work) * some fairly slow code that needs optimization. This walks the list * of all the inodes with pending delalloc and forces them to disk. */ -static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput) +static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput, + int nr) { struct btrfs_inode *binode; struct inode *inode; @@ -8471,23 +8472,19 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput) else iput(inode); ret = -ENOMEM; - goto out; + break; } list_add_tail(&work->list, &works); btrfs_queue_work(root->fs_info->flush_workers, &work->work); - + ret++; + if (nr != -1 && ret >= nr) + break; cond_resched(); spin_lock(&root->delalloc_lock); } spin_unlock(&root->delalloc_lock); - list_for_each_entry_safe(work, next, &works, list) { - list_del_init(&work->list); - btrfs_wait_and_free_delalloc_work(work); - } - return 0; -out: list_for_each_entry_safe(work, next, &works, list) { list_del_init(&work->list); btrfs_wait_and_free_delalloc_work(work); @@ -8508,7 +8505,9 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) if (test_bit(BTRFS_FS_STATE_ERROR, &root->fs_info->fs_state)) return -EROFS; - ret = __start_delalloc_inodes(root, delay_iput); + ret = __start_delalloc_inodes(root, delay_iput, -1); + if (ret > 0) + ret = 0; /* * the filemap_flush will queue IO into the worker threads, but * we have to make sure the IO is actually started and that @@ -8525,7 +8524,8 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) return ret; } -int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput) +int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput, + int nr) { struct btrfs_root *root; struct list_head splice; @@ -8538,7 +8538,7 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput) spin_lock(&fs_info->delalloc_root_lock); list_splice_init(&fs_info->delalloc_roots, &splice); - while (!list_empty(&splice)) { + while (!list_empty(&splice) && nr) { root = list_first_entry(&splice, struct btrfs_root, delalloc_root); root = btrfs_grab_fs_root(root); @@ -8547,15 +8547,20 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput) &fs_info->delalloc_roots); spin_unlock(&fs_info->delalloc_root_lock); - ret = __start_delalloc_inodes(root, delay_iput); + ret = __start_delalloc_inodes(root, delay_iput, nr); btrfs_put_fs_root(root); - if (ret) + if (ret < 0) goto out; + if (nr != -1) { + nr -= ret; + WARN_ON(nr < 0); + } spin_lock(&fs_info->delalloc_root_lock); } spin_unlock(&fs_info->delalloc_root_lock); + ret = 0; atomic_inc(&fs_info->async_submit_draining); while (atomic_read(&fs_info->nr_async_submits) || atomic_read(&fs_info->async_delalloc_pages)) { @@ -8564,7 +8569,6 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput) atomic_read(&fs_info->async_delalloc_pages) == 0)); } atomic_dec(&fs_info->async_submit_draining); - return 0; out: if (!list_empty_careful(&splice)) { spin_lock(&fs_info->delalloc_root_lock); -- cgit v1.2.2 From 573bfb72f7608eb7097d2dd036a714a6ab20cffe Mon Sep 17 00:00:00 2001 From: Miao Xie Date: Thu, 6 Mar 2014 13:55:03 +0800 Subject: Btrfs: fix possible empty list access when flushing the delalloc inodes We didn't have a lock to protect the access to the delalloc inodes list, that is we might access a empty delalloc inodes list if someone start flushing delalloc inodes because the delalloc inodes were moved into a other list temporarily. Fix it by wrapping the access with a lock. Signed-off-by: Miao Xie Signed-off-by: Josef Bacik --- fs/btrfs/inode.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'fs/btrfs/inode.c') diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index fbaf1ac3941b..0ec876657923 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -8450,6 +8450,7 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput, INIT_LIST_HEAD(&works); INIT_LIST_HEAD(&splice); + mutex_lock(&root->delalloc_mutex); spin_lock(&root->delalloc_lock); list_splice_init(&root->delalloc_inodes, &splice); while (!list_empty(&splice)) { @@ -8495,6 +8496,7 @@ static int __start_delalloc_inodes(struct btrfs_root *root, int delay_iput, list_splice_tail(&splice, &root->delalloc_inodes); spin_unlock(&root->delalloc_lock); } + mutex_unlock(&root->delalloc_mutex); return ret; } @@ -8536,6 +8538,7 @@ int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, int delay_iput, INIT_LIST_HEAD(&splice); + mutex_lock(&fs_info->delalloc_root_mutex); spin_lock(&fs_info->delalloc_root_lock); list_splice_init(&fs_info->delalloc_roots, &splice); while (!list_empty(&splice) && nr) { @@ -8575,6 +8578,7 @@ out: list_splice_tail(&splice, &fs_info->delalloc_roots); spin_unlock(&fs_info->delalloc_root_lock); } + mutex_unlock(&fs_info->delalloc_root_mutex); return ret; } -- cgit v1.2.2