aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/disk-io.c
diff options
context:
space:
mode:
authorLiu Bo <bo.li.liu@oracle.com>2014-08-15 11:36:53 -0400
committerChris Mason <clm@fb.com>2014-08-24 10:17:02 -0400
commit9e0af23764344f7f1b68e4eefbe7dc865018b63d (patch)
treeecea2b1c13165160a5829aec21a91a4b5746b8b8 /fs/btrfs/disk-io.c
parentf6dc45c7a93a011dff6eb9b2ffda59c390c7705a (diff)
Btrfs: fix task hang under heavy compressed write
This has been reported and discussed for a long time, and this hang occurs in both 3.15 and 3.16. Btrfs now migrates to use kernel workqueue, but it introduces this hang problem. Btrfs has a kind of work queued as an ordered way, which means that its ordered_func() must be processed in the way of FIFO, so it usually looks like -- normal_work_helper(arg) work = container_of(arg, struct btrfs_work, normal_work); work->func() <---- (we name it work X) for ordered_work in wq->ordered_list ordered_work->ordered_func() ordered_work->ordered_free() The hang is a rare case, first when we find free space, we get an uncached block group, then we go to read its free space cache inode for free space information, so it will file a readahead request btrfs_readpages() for page that is not in page cache __do_readpage() submit_extent_page() btrfs_submit_bio_hook() btrfs_bio_wq_end_io() submit_bio() end_workqueue_bio() <--(ret by the 1st endio) queue a work(named work Y) for the 2nd also the real endio() So the hang occurs when work Y's work_struct and work X's work_struct happens to share the same address. A bit more explanation, A,B,C -- struct btrfs_work arg -- struct work_struct kthread: worker_thread() pick up a work_struct from @worklist process_one_work(arg) worker->current_work = arg; <-- arg is A->normal_work worker->current_func(arg) normal_work_helper(arg) A = container_of(arg, struct btrfs_work, normal_work); A->func() A->ordered_func() A->ordered_free() <-- A gets freed B->ordered_func() submit_compressed_extents() find_free_extent() load_free_space_inode() ... <-- (the above readhead stack) end_workqueue_bio() btrfs_queue_work(work C) B->ordered_free() As if work A has a high priority in wq->ordered_list and there are more ordered works queued after it, such as B->ordered_func(), its memory could have been freed before normal_work_helper() returns, which means that kernel workqueue code worker_thread() still has worker->current_work pointer to be work A->normal_work's, ie. arg's address. Meanwhile, work C is allocated after work A is freed, work C->normal_work and work A->normal_work are likely to share the same address(I confirmed this with ftrace output, so I'm not just guessing, it's rare though). When another kthread picks up work C->normal_work to process, and finds our kthread is processing it(see find_worker_executing_work()), it'll think work C as a collision and skip then, which ends up nobody processing work C. So the situation is that our kthread is waiting forever on work C. Besides, there're other cases that can lead to deadlock, but the real problem is that all btrfs workqueue shares one work->func, -- normal_work_helper, so this makes each workqueue to have its own helper function, but only a wraper pf normal_work_helper. With this patch, I no long hit the above hang. Signed-off-by: Liu Bo <bo.li.liu@oracle.com> Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r--fs/btrfs/disk-io.c53
1 files changed, 29 insertions, 24 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index c99a414813c1..a1d36e62179c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -39,7 +39,6 @@
39#include "btrfs_inode.h" 39#include "btrfs_inode.h"
40#include "volumes.h" 40#include "volumes.h"
41#include "print-tree.h" 41#include "print-tree.h"
42#include "async-thread.h"
43#include "locking.h" 42#include "locking.h"
44#include "tree-log.h" 43#include "tree-log.h"
45#include "free-space-cache.h" 44#include "free-space-cache.h"
@@ -693,35 +692,41 @@ static void end_workqueue_bio(struct bio *bio, int err)
693{ 692{
694 struct end_io_wq *end_io_wq = bio->bi_private; 693 struct end_io_wq *end_io_wq = bio->bi_private;
695 struct btrfs_fs_info *fs_info; 694 struct btrfs_fs_info *fs_info;
695 struct btrfs_workqueue *wq;
696 btrfs_work_func_t func;
696 697
697 fs_info = end_io_wq->info; 698 fs_info = end_io_wq->info;
698 end_io_wq->error = err; 699 end_io_wq->error = err;
699 btrfs_init_work(&end_io_wq->work, end_workqueue_fn, NULL, NULL);
700 700
701 if (bio->bi_rw & REQ_WRITE) { 701 if (bio->bi_rw & REQ_WRITE) {
702 if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) 702 if (end_io_wq->metadata == BTRFS_WQ_ENDIO_METADATA) {
703 btrfs_queue_work(fs_info->endio_meta_write_workers, 703 wq = fs_info->endio_meta_write_workers;
704 &end_io_wq->work); 704 func = btrfs_endio_meta_write_helper;
705 else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) 705 } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_FREE_SPACE) {
706 btrfs_queue_work(fs_info->endio_freespace_worker, 706 wq = fs_info->endio_freespace_worker;
707 &end_io_wq->work); 707 func = btrfs_freespace_write_helper;
708 else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) 708 } else if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
709 btrfs_queue_work(fs_info->endio_raid56_workers, 709 wq = fs_info->endio_raid56_workers;
710 &end_io_wq->work); 710 func = btrfs_endio_raid56_helper;
711 else 711 } else {
712 btrfs_queue_work(fs_info->endio_write_workers, 712 wq = fs_info->endio_write_workers;
713 &end_io_wq->work); 713 func = btrfs_endio_write_helper;
714 }
714 } else { 715 } else {
715 if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) 716 if (end_io_wq->metadata == BTRFS_WQ_ENDIO_RAID56) {
716 btrfs_queue_work(fs_info->endio_raid56_workers, 717 wq = fs_info->endio_raid56_workers;
717 &end_io_wq->work); 718 func = btrfs_endio_raid56_helper;
718 else if (end_io_wq->metadata) 719 } else if (end_io_wq->metadata) {
719 btrfs_queue_work(fs_info->endio_meta_workers, 720 wq = fs_info->endio_meta_workers;
720 &end_io_wq->work); 721 func = btrfs_endio_meta_helper;
721 else 722 } else {
722 btrfs_queue_work(fs_info->endio_workers, 723 wq = fs_info->endio_workers;
723 &end_io_wq->work); 724 func = btrfs_endio_helper;
725 }
724 } 726 }
727
728 btrfs_init_work(&end_io_wq->work, func, end_workqueue_fn, NULL, NULL);
729 btrfs_queue_work(wq, &end_io_wq->work);
725} 730}
726 731
727/* 732/*
@@ -828,7 +833,7 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode,
828 async->submit_bio_start = submit_bio_start; 833 async->submit_bio_start = submit_bio_start;
829 async->submit_bio_done = submit_bio_done; 834 async->submit_bio_done = submit_bio_done;
830 835
831 btrfs_init_work(&async->work, run_one_async_start, 836 btrfs_init_work(&async->work, btrfs_worker_helper, run_one_async_start,
832 run_one_async_done, run_one_async_free); 837 run_one_async_done, run_one_async_free);
833 838
834 async->bio_flags = bio_flags; 839 async->bio_flags = bio_flags;