diff options
author | Tejun Heo <tj@kernel.org> | 2013-09-05 12:30:04 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2013-11-22 18:14:02 -0500 |
commit | 8a2b75384444488fc4f2cbb9f0921b6a0794838f (patch) | |
tree | c4b39daad8de264be08beec77621048e9b9fb9ed /kernel/workqueue.c | |
parent | 91151228065354a050fd0d190aefdd662a0580aa (diff) |
workqueue: fix ordered workqueues in NUMA setups
An ordered workqueue implements execution ordering by using single
pool_workqueue with max_active == 1. On a given pool_workqueue, work
items are processed in FIFO order and limiting max_active to 1
enforces the queued work items to be processed one by one.
Unfortunately, 4c16bd327c ("workqueue: implement NUMA affinity for
unbound workqueues") accidentally broke this guarantee by applying
NUMA affinity to ordered workqueues too. On NUMA setups, an ordered
workqueue would end up with separate pool_workqueues for different
nodes. Each pool_workqueue still limits max_active to 1 but multiple
work items may be executed concurrently and out of order depending on
which node they are queued to.
Fix it by using dedicated ordered_wq_attrs[] when creating ordered
workqueues. The new attrs match the unbound ones except that no_numa
is always set thus forcing all NUMA nodes to share the default
pool_workqueue.
While at it, add sanity check in workqueue creation path which
verifies that an ordered workqueues has only the default
pool_workqueue.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Libin <huawei.libin@huawei.com>
Cc: stable@vger.kernel.org
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
Diffstat (limited to 'kernel/workqueue.c')
-rw-r--r-- | kernel/workqueue.c | 24 |
1 files changed, 22 insertions, 2 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index f8942429268b..bbb5e9832d85 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
@@ -305,6 +305,9 @@ static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER); | |||
305 | /* I: attributes used when instantiating standard unbound pools on demand */ | 305 | /* I: attributes used when instantiating standard unbound pools on demand */ |
306 | static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS]; | 306 | static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS]; |
307 | 307 | ||
308 | /* I: attributes used when instantiating ordered pools on demand */ | ||
309 | static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS]; | ||
310 | |||
308 | struct workqueue_struct *system_wq __read_mostly; | 311 | struct workqueue_struct *system_wq __read_mostly; |
309 | EXPORT_SYMBOL(system_wq); | 312 | EXPORT_SYMBOL(system_wq); |
310 | struct workqueue_struct *system_highpri_wq __read_mostly; | 313 | struct workqueue_struct *system_highpri_wq __read_mostly; |
@@ -4107,7 +4110,7 @@ out_unlock: | |||
4107 | static int alloc_and_link_pwqs(struct workqueue_struct *wq) | 4110 | static int alloc_and_link_pwqs(struct workqueue_struct *wq) |
4108 | { | 4111 | { |
4109 | bool highpri = wq->flags & WQ_HIGHPRI; | 4112 | bool highpri = wq->flags & WQ_HIGHPRI; |
4110 | int cpu; | 4113 | int cpu, ret; |
4111 | 4114 | ||
4112 | if (!(wq->flags & WQ_UNBOUND)) { | 4115 | if (!(wq->flags & WQ_UNBOUND)) { |
4113 | wq->cpu_pwqs = alloc_percpu(struct pool_workqueue); | 4116 | wq->cpu_pwqs = alloc_percpu(struct pool_workqueue); |
@@ -4127,6 +4130,13 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq) | |||
4127 | mutex_unlock(&wq->mutex); | 4130 | mutex_unlock(&wq->mutex); |
4128 | } | 4131 | } |
4129 | return 0; | 4132 | return 0; |
4133 | } else if (wq->flags & __WQ_ORDERED) { | ||
4134 | ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]); | ||
4135 | /* there should only be single pwq for ordering guarantee */ | ||
4136 | WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node || | ||
4137 | wq->pwqs.prev != &wq->dfl_pwq->pwqs_node), | ||
4138 | "ordering guarantee broken for workqueue %s\n", wq->name); | ||
4139 | return ret; | ||
4130 | } else { | 4140 | } else { |
4131 | return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]); | 4141 | return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]); |
4132 | } | 4142 | } |
@@ -5052,13 +5062,23 @@ static int __init init_workqueues(void) | |||
5052 | } | 5062 | } |
5053 | } | 5063 | } |
5054 | 5064 | ||
5055 | /* create default unbound wq attrs */ | 5065 | /* create default unbound and ordered wq attrs */ |
5056 | for (i = 0; i < NR_STD_WORKER_POOLS; i++) { | 5066 | for (i = 0; i < NR_STD_WORKER_POOLS; i++) { |
5057 | struct workqueue_attrs *attrs; | 5067 | struct workqueue_attrs *attrs; |
5058 | 5068 | ||
5059 | BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL))); | 5069 | BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL))); |
5060 | attrs->nice = std_nice[i]; | 5070 | attrs->nice = std_nice[i]; |
5061 | unbound_std_wq_attrs[i] = attrs; | 5071 | unbound_std_wq_attrs[i] = attrs; |
5072 | |||
5073 | /* | ||
5074 | * An ordered wq should have only one pwq as ordering is | ||
5075 | * guaranteed by max_active which is enforced by pwqs. | ||
5076 | * Turn off NUMA so that dfl_pwq is used for all nodes. | ||
5077 | */ | ||
5078 | BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL))); | ||
5079 | attrs->nice = std_nice[i]; | ||
5080 | attrs->no_numa = true; | ||
5081 | ordered_wq_attrs[i] = attrs; | ||
5062 | } | 5082 | } |
5063 | 5083 | ||
5064 | system_wq = alloc_workqueue("events", 0, 0); | 5084 | system_wq = alloc_workqueue("events", 0, 0); |