aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs
diff options
context:
space:
mode:
authorFilipe Manana <fdmanana@suse.com>2015-02-09 16:14:24 -0500
committerChris Mason <clm@fb.com>2015-02-14 11:19:14 -0500
commitf55985f4dda5cfb6967c17e96237f3c859076eb3 (patch)
treea821d5a175b47ecf2896ad0f0da451454c69f40e /fs/btrfs
parent575849ecf5d103ca9bbf0a6b9e89eba335d4e750 (diff)
Btrfs: scrub, fix sleep in atomic context
My previous patch "Btrfs: fix scrub race leading to use-after-free" introduced the possibility to sleep in an atomic context, which happens when the scrub_lock mutex is held at the time scrub_pending_bio_dec() is called - this function can be called under an atomic context. Chris ran into this in a debug kernel which gave the following trace: [ 1928.950319] BUG: sleeping function called from invalid context at kernel/locking/mutex.c:621 [ 1928.967334] in_atomic(): 1, irqs_disabled(): 0, pid: 149670, name: fsstress [ 1928.981324] INFO: lockdep is turned off. [ 1928.989244] CPU: 24 PID: 149670 Comm: fsstress Tainted: G W 3.19.0-rc7-mason+ #41 [ 1929.006418] Hardware name: ZTSYSTEMS Echo Ridge T4 /A9DRPF-10D, BIOS 1.07 05/10/2012 [ 1929.022207] ffffffff81a22cf8 ffff881076e03b78 ffffffff816b8dd9 ffff881076e03b78 [ 1929.037267] ffff880d8e828710 ffff881076e03ba8 ffffffff810856c4 ffff881076e03bc8 [ 1929.052315] 0000000000000000 000000000000026d ffffffff81a22cf8 ffff881076e03bd8 [ 1929.067381] Call Trace: [ 1929.072344] <IRQ> [<ffffffff816b8dd9>] dump_stack+0x4f/0x6e [ 1929.083968] [<ffffffff810856c4>] ___might_sleep+0x174/0x230 [ 1929.095352] [<ffffffff810857d2>] __might_sleep+0x52/0x90 [ 1929.106223] [<ffffffff816bb68f>] mutex_lock_nested+0x2f/0x3b0 [ 1929.117951] [<ffffffff810ab37d>] ? trace_hardirqs_on+0xd/0x10 [ 1929.129708] [<ffffffffa05dc838>] scrub_pending_bio_dec+0x38/0x70 [btrfs] [ 1929.143370] [<ffffffffa05dd0e0>] scrub_parity_bio_endio+0x50/0x70 [btrfs] [ 1929.157191] [<ffffffff812fa603>] bio_endio+0x53/0xa0 [ 1929.167382] [<ffffffffa05f96bc>] rbio_orig_end_io+0x7c/0xa0 [btrfs] [ 1929.180161] [<ffffffffa05f97ba>] raid_write_parity_end_io+0x5a/0x80 [btrfs] [ 1929.194318] [<ffffffff812fa603>] bio_endio+0x53/0xa0 [ 1929.204496] [<ffffffff8130401b>] blk_update_request+0x1eb/0x450 [ 1929.216569] [<ffffffff81096e58>] ? trigger_load_balance+0x78/0x500 [ 1929.229176] [<ffffffff8144c74d>] scsi_end_request+0x3d/0x1f0 [ 1929.240740] [<ffffffff8144ccac>] scsi_io_completion+0xac/0x5b0 [ 1929.252654] [<ffffffff81441c50>] scsi_finish_command+0xf0/0x150 [ 1929.264725] [<ffffffff8144d317>] scsi_softirq_done+0x147/0x170 [ 1929.276635] [<ffffffff8130ace6>] blk_done_softirq+0x86/0xa0 [ 1929.288014] [<ffffffff8105d92e>] __do_softirq+0xde/0x600 [ 1929.298885] [<ffffffff8105df6d>] irq_exit+0xbd/0xd0 (...) Fix this by using a reference count on the scrub context structure instead of locking the scrub_lock mutex. Signed-off-by: Filipe Manana <fdmanana@suse.com> Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/scrub.c39
1 files changed, 23 insertions, 16 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 6d6e155c8c8b..db21f17df996 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -193,6 +193,15 @@ struct scrub_ctx {
193 */ 193 */
194 struct btrfs_scrub_progress stat; 194 struct btrfs_scrub_progress stat;
195 spinlock_t stat_lock; 195 spinlock_t stat_lock;
196
197 /*
198 * Use a ref counter to avoid use-after-free issues. Scrub workers
199 * decrement bios_in_flight and workers_pending and then do a wakeup
200 * on the list_wait wait queue. We must ensure the main scrub task
201 * doesn't free the scrub context before or while the workers are
202 * doing the wakeup() call.
203 */
204 atomic_t refs;
196}; 205};
197 206
198struct scrub_fixup_nodatasum { 207struct scrub_fixup_nodatasum {
@@ -297,26 +306,20 @@ static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
297static void copy_nocow_pages_worker(struct btrfs_work *work); 306static void copy_nocow_pages_worker(struct btrfs_work *work);
298static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); 307static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
299static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info); 308static void scrub_blocked_if_needed(struct btrfs_fs_info *fs_info);
309static void scrub_put_ctx(struct scrub_ctx *sctx);
300 310
301 311
302static void scrub_pending_bio_inc(struct scrub_ctx *sctx) 312static void scrub_pending_bio_inc(struct scrub_ctx *sctx)
303{ 313{
314 atomic_inc(&sctx->refs);
304 atomic_inc(&sctx->bios_in_flight); 315 atomic_inc(&sctx->bios_in_flight);
305} 316}
306 317
307static void scrub_pending_bio_dec(struct scrub_ctx *sctx) 318static void scrub_pending_bio_dec(struct scrub_ctx *sctx)
308{ 319{
309 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
310
311 /*
312 * Hold the scrub_lock while doing the wakeup to ensure the
313 * sctx (and its wait queue list_wait) isn't destroyed/freed
314 * during the wakeup.
315 */
316 mutex_lock(&fs_info->scrub_lock);
317 atomic_dec(&sctx->bios_in_flight); 320 atomic_dec(&sctx->bios_in_flight);
318 wake_up(&sctx->list_wait); 321 wake_up(&sctx->list_wait);
319 mutex_unlock(&fs_info->scrub_lock); 322 scrub_put_ctx(sctx);
320} 323}
321 324
322static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info) 325static void __scrub_blocked_if_needed(struct btrfs_fs_info *fs_info)
@@ -350,6 +353,7 @@ static void scrub_pending_trans_workers_inc(struct scrub_ctx *sctx)
350{ 353{
351 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; 354 struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info;
352 355
356 atomic_inc(&sctx->refs);
353 /* 357 /*
354 * increment scrubs_running to prevent cancel requests from 358 * increment scrubs_running to prevent cancel requests from
355 * completing as long as a worker is running. we must also 359 * completing as long as a worker is running. we must also
@@ -388,15 +392,11 @@ static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx)
388 mutex_lock(&fs_info->scrub_lock); 392 mutex_lock(&fs_info->scrub_lock);
389 atomic_dec(&fs_info->scrubs_running); 393 atomic_dec(&fs_info->scrubs_running);
390 atomic_dec(&fs_info->scrubs_paused); 394 atomic_dec(&fs_info->scrubs_paused);
395 mutex_unlock(&fs_info->scrub_lock);
391 atomic_dec(&sctx->workers_pending); 396 atomic_dec(&sctx->workers_pending);
392 wake_up(&fs_info->scrub_pause_wait); 397 wake_up(&fs_info->scrub_pause_wait);
393 /*
394 * Hold the scrub_lock while doing the wakeup to ensure the
395 * sctx (and its wait queue list_wait) isn't destroyed/freed
396 * during the wakeup.
397 */
398 wake_up(&sctx->list_wait); 398 wake_up(&sctx->list_wait);
399 mutex_unlock(&fs_info->scrub_lock); 399 scrub_put_ctx(sctx);
400} 400}
401 401
402static void scrub_free_csums(struct scrub_ctx *sctx) 402static void scrub_free_csums(struct scrub_ctx *sctx)
@@ -442,6 +442,12 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx)
442 kfree(sctx); 442 kfree(sctx);
443} 443}
444 444
445static void scrub_put_ctx(struct scrub_ctx *sctx)
446{
447 if (atomic_dec_and_test(&sctx->refs))
448 scrub_free_ctx(sctx);
449}
450
445static noinline_for_stack 451static noinline_for_stack
446struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) 452struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
447{ 453{
@@ -466,6 +472,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
466 sctx = kzalloc(sizeof(*sctx), GFP_NOFS); 472 sctx = kzalloc(sizeof(*sctx), GFP_NOFS);
467 if (!sctx) 473 if (!sctx)
468 goto nomem; 474 goto nomem;
475 atomic_set(&sctx->refs, 1);
469 sctx->is_dev_replace = is_dev_replace; 476 sctx->is_dev_replace = is_dev_replace;
470 sctx->pages_per_rd_bio = pages_per_rd_bio; 477 sctx->pages_per_rd_bio = pages_per_rd_bio;
471 sctx->curr = -1; 478 sctx->curr = -1;
@@ -3739,7 +3746,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
3739 scrub_workers_put(fs_info); 3746 scrub_workers_put(fs_info);
3740 mutex_unlock(&fs_info->scrub_lock); 3747 mutex_unlock(&fs_info->scrub_lock);
3741 3748
3742 scrub_free_ctx(sctx); 3749 scrub_put_ctx(sctx);
3743 3750
3744 return ret; 3751 return ret;
3745} 3752}