aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/async-thread.c
diff options
context:
space:
mode:
authorLiu Bo <bo.li.liu@oracle.com>2014-08-15 11:36:53 -0400
committerChris Mason <clm@fb.com>2014-08-24 10:17:02 -0400
commit9e0af23764344f7f1b68e4eefbe7dc865018b63d (patch)
treeecea2b1c13165160a5829aec21a91a4b5746b8b8 /fs/btrfs/async-thread.c
parentf6dc45c7a93a011dff6eb9b2ffda59c390c7705a (diff)
Btrfs: fix task hang under heavy compressed write
This has been reported and discussed for a long time, and this hang occurs in both 3.15 and 3.16. Btrfs now migrates to use kernel workqueue, but it introduces this hang problem. Btrfs has a kind of work queued as an ordered way, which means that its ordered_func() must be processed in the way of FIFO, so it usually looks like -- normal_work_helper(arg) work = container_of(arg, struct btrfs_work, normal_work); work->func() <---- (we name it work X) for ordered_work in wq->ordered_list ordered_work->ordered_func() ordered_work->ordered_free() The hang is a rare case, first when we find free space, we get an uncached block group, then we go to read its free space cache inode for free space information, so it will file a readahead request btrfs_readpages() for page that is not in page cache __do_readpage() submit_extent_page() btrfs_submit_bio_hook() btrfs_bio_wq_end_io() submit_bio() end_workqueue_bio() <--(ret by the 1st endio) queue a work(named work Y) for the 2nd also the real endio() So the hang occurs when work Y's work_struct and work X's work_struct happens to share the same address. A bit more explanation, A,B,C -- struct btrfs_work arg -- struct work_struct kthread: worker_thread() pick up a work_struct from @worklist process_one_work(arg) worker->current_work = arg; <-- arg is A->normal_work worker->current_func(arg) normal_work_helper(arg) A = container_of(arg, struct btrfs_work, normal_work); A->func() A->ordered_func() A->ordered_free() <-- A gets freed B->ordered_func() submit_compressed_extents() find_free_extent() load_free_space_inode() ... <-- (the above readhead stack) end_workqueue_bio() btrfs_queue_work(work C) B->ordered_free() As if work A has a high priority in wq->ordered_list and there are more ordered works queued after it, such as B->ordered_func(), its memory could have been freed before normal_work_helper() returns, which means that kernel workqueue code worker_thread() still has worker->current_work pointer to be work A->normal_work's, ie. arg's address. Meanwhile, work C is allocated after work A is freed, work C->normal_work and work A->normal_work are likely to share the same address(I confirmed this with ftrace output, so I'm not just guessing, it's rare though). When another kthread picks up work C->normal_work to process, and finds our kthread is processing it(see find_worker_executing_work()), it'll think work C as a collision and skip then, which ends up nobody processing work C. So the situation is that our kthread is waiting forever on work C. Besides, there're other cases that can lead to deadlock, but the real problem is that all btrfs workqueue shares one work->func, -- normal_work_helper, so this makes each workqueue to have its own helper function, but only a wraper pf normal_work_helper. With this patch, I no long hit the above hang. Signed-off-by: Liu Bo <bo.li.liu@oracle.com> Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs/btrfs/async-thread.c')
-rw-r--r--fs/btrfs/async-thread.c44
1 files changed, 36 insertions, 8 deletions
diff --git a/fs/btrfs/async-thread.c b/fs/btrfs/async-thread.c
index 5a201d81049c..fbd76ded9a34 100644
--- a/fs/btrfs/async-thread.c
+++ b/fs/btrfs/async-thread.c
@@ -22,7 +22,6 @@
22#include <linux/list.h> 22#include <linux/list.h>
23#include <linux/spinlock.h> 23#include <linux/spinlock.h>
24#include <linux/freezer.h> 24#include <linux/freezer.h>
25#include <linux/workqueue.h>
26#include "async-thread.h" 25#include "async-thread.h"
27#include "ctree.h" 26#include "ctree.h"
28 27
@@ -55,8 +54,39 @@ struct btrfs_workqueue {
55 struct __btrfs_workqueue *high; 54 struct __btrfs_workqueue *high;
56}; 55};
57 56
58static inline struct __btrfs_workqueue 57static void normal_work_helper(struct btrfs_work *work);
59*__btrfs_alloc_workqueue(const char *name, int flags, int max_active, 58
59#define BTRFS_WORK_HELPER(name) \
60void btrfs_##name(struct work_struct *arg) \
61{ \
62 struct btrfs_work *work = container_of(arg, struct btrfs_work, \
63 normal_work); \
64 normal_work_helper(work); \
65}
66
67BTRFS_WORK_HELPER(worker_helper);
68BTRFS_WORK_HELPER(delalloc_helper);
69BTRFS_WORK_HELPER(flush_delalloc_helper);
70BTRFS_WORK_HELPER(cache_helper);
71BTRFS_WORK_HELPER(submit_helper);
72BTRFS_WORK_HELPER(fixup_helper);
73BTRFS_WORK_HELPER(endio_helper);
74BTRFS_WORK_HELPER(endio_meta_helper);
75BTRFS_WORK_HELPER(endio_meta_write_helper);
76BTRFS_WORK_HELPER(endio_raid56_helper);
77BTRFS_WORK_HELPER(rmw_helper);
78BTRFS_WORK_HELPER(endio_write_helper);
79BTRFS_WORK_HELPER(freespace_write_helper);
80BTRFS_WORK_HELPER(delayed_meta_helper);
81BTRFS_WORK_HELPER(readahead_helper);
82BTRFS_WORK_HELPER(qgroup_rescan_helper);
83BTRFS_WORK_HELPER(extent_refs_helper);
84BTRFS_WORK_HELPER(scrub_helper);
85BTRFS_WORK_HELPER(scrubwrc_helper);
86BTRFS_WORK_HELPER(scrubnc_helper);
87
88static struct __btrfs_workqueue *
89__btrfs_alloc_workqueue(const char *name, int flags, int max_active,
60 int thresh) 90 int thresh)
61{ 91{
62 struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS); 92 struct __btrfs_workqueue *ret = kzalloc(sizeof(*ret), GFP_NOFS);
@@ -232,13 +262,11 @@ static void run_ordered_work(struct __btrfs_workqueue *wq)
232 spin_unlock_irqrestore(lock, flags); 262 spin_unlock_irqrestore(lock, flags);
233} 263}
234 264
235static void normal_work_helper(struct work_struct *arg) 265static void normal_work_helper(struct btrfs_work *work)
236{ 266{
237 struct btrfs_work *work;
238 struct __btrfs_workqueue *wq; 267 struct __btrfs_workqueue *wq;
239 int need_order = 0; 268 int need_order = 0;
240 269
241 work = container_of(arg, struct btrfs_work, normal_work);
242 /* 270 /*
243 * We should not touch things inside work in the following cases: 271 * We should not touch things inside work in the following cases:
244 * 1) after work->func() if it has no ordered_free 272 * 1) after work->func() if it has no ordered_free
@@ -262,7 +290,7 @@ static void normal_work_helper(struct work_struct *arg)
262 trace_btrfs_all_work_done(work); 290 trace_btrfs_all_work_done(work);
263} 291}
264 292
265void btrfs_init_work(struct btrfs_work *work, 293void btrfs_init_work(struct btrfs_work *work, btrfs_work_func_t uniq_func,
266 btrfs_func_t func, 294 btrfs_func_t func,
267 btrfs_func_t ordered_func, 295 btrfs_func_t ordered_func,
268 btrfs_func_t ordered_free) 296 btrfs_func_t ordered_free)
@@ -270,7 +298,7 @@ void btrfs_init_work(struct btrfs_work *work,
270 work->func = func; 298 work->func = func;
271 work->ordered_func = ordered_func; 299 work->ordered_func = ordered_func;
272 work->ordered_free = ordered_free; 300 work->ordered_free = ordered_free;
273 INIT_WORK(&work->normal_work, normal_work_helper); 301 INIT_WORK(&work->normal_work, uniq_func);
274 INIT_LIST_HEAD(&work->ordered_list); 302 INIT_LIST_HEAD(&work->ordered_list);
275 work->flags = 0; 303 work->flags = 0;
276} 304}