diff options
| author | Tejun Heo <tj@kernel.org> | 2013-09-05 12:30:04 -0400 |
|---|---|---|
| committer | Tejun Heo <tj@kernel.org> | 2013-11-22 18:14:02 -0500 |
| commit | 8a2b75384444488fc4f2cbb9f0921b6a0794838f (patch) | |
| tree | c4b39daad8de264be08beec77621048e9b9fb9ed | |
| parent | 91151228065354a050fd0d190aefdd662a0580aa (diff) | |
workqueue: fix ordered workqueues in NUMA setups
An ordered workqueue implements execution ordering by using single
pool_workqueue with max_active == 1. On a given pool_workqueue, work
items are processed in FIFO order and limiting max_active to 1
enforces the queued work items to be processed one by one.
Unfortunately, 4c16bd327c ("workqueue: implement NUMA affinity for
unbound workqueues") accidentally broke this guarantee by applying
NUMA affinity to ordered workqueues too. On NUMA setups, an ordered
workqueue would end up with separate pool_workqueues for different
nodes. Each pool_workqueue still limits max_active to 1 but multiple
work items may be executed concurrently and out of order depending on
which node they are queued to.
Fix it by using dedicated ordered_wq_attrs[] when creating ordered
workqueues. The new attrs match the unbound ones except that no_numa
is always set thus forcing all NUMA nodes to share the default
pool_workqueue.
While at it, add sanity check in workqueue creation path which
verifies that an ordered workqueues has only the default
pool_workqueue.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Libin <huawei.libin@huawei.com>
Cc: stable@vger.kernel.org
Cc: Lai Jiangshan <laijs@cn.fujitsu.com>
| -rw-r--r-- | kernel/workqueue.c | 24 |
1 files changed, 22 insertions, 2 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index f8942429268b..bbb5e9832d85 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c | |||
| @@ -305,6 +305,9 @@ static DEFINE_HASHTABLE(unbound_pool_hash, UNBOUND_POOL_HASH_ORDER); | |||
| 305 | /* I: attributes used when instantiating standard unbound pools on demand */ | 305 | /* I: attributes used when instantiating standard unbound pools on demand */ |
| 306 | static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS]; | 306 | static struct workqueue_attrs *unbound_std_wq_attrs[NR_STD_WORKER_POOLS]; |
| 307 | 307 | ||
| 308 | /* I: attributes used when instantiating ordered pools on demand */ | ||
| 309 | static struct workqueue_attrs *ordered_wq_attrs[NR_STD_WORKER_POOLS]; | ||
| 310 | |||
| 308 | struct workqueue_struct *system_wq __read_mostly; | 311 | struct workqueue_struct *system_wq __read_mostly; |
| 309 | EXPORT_SYMBOL(system_wq); | 312 | EXPORT_SYMBOL(system_wq); |
| 310 | struct workqueue_struct *system_highpri_wq __read_mostly; | 313 | struct workqueue_struct *system_highpri_wq __read_mostly; |
| @@ -4107,7 +4110,7 @@ out_unlock: | |||
| 4107 | static int alloc_and_link_pwqs(struct workqueue_struct *wq) | 4110 | static int alloc_and_link_pwqs(struct workqueue_struct *wq) |
| 4108 | { | 4111 | { |
| 4109 | bool highpri = wq->flags & WQ_HIGHPRI; | 4112 | bool highpri = wq->flags & WQ_HIGHPRI; |
| 4110 | int cpu; | 4113 | int cpu, ret; |
| 4111 | 4114 | ||
| 4112 | if (!(wq->flags & WQ_UNBOUND)) { | 4115 | if (!(wq->flags & WQ_UNBOUND)) { |
| 4113 | wq->cpu_pwqs = alloc_percpu(struct pool_workqueue); | 4116 | wq->cpu_pwqs = alloc_percpu(struct pool_workqueue); |
| @@ -4127,6 +4130,13 @@ static int alloc_and_link_pwqs(struct workqueue_struct *wq) | |||
| 4127 | mutex_unlock(&wq->mutex); | 4130 | mutex_unlock(&wq->mutex); |
| 4128 | } | 4131 | } |
| 4129 | return 0; | 4132 | return 0; |
| 4133 | } else if (wq->flags & __WQ_ORDERED) { | ||
| 4134 | ret = apply_workqueue_attrs(wq, ordered_wq_attrs[highpri]); | ||
| 4135 | /* there should only be single pwq for ordering guarantee */ | ||
| 4136 | WARN(!ret && (wq->pwqs.next != &wq->dfl_pwq->pwqs_node || | ||
| 4137 | wq->pwqs.prev != &wq->dfl_pwq->pwqs_node), | ||
| 4138 | "ordering guarantee broken for workqueue %s\n", wq->name); | ||
| 4139 | return ret; | ||
| 4130 | } else { | 4140 | } else { |
| 4131 | return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]); | 4141 | return apply_workqueue_attrs(wq, unbound_std_wq_attrs[highpri]); |
| 4132 | } | 4142 | } |
| @@ -5052,13 +5062,23 @@ static int __init init_workqueues(void) | |||
| 5052 | } | 5062 | } |
| 5053 | } | 5063 | } |
| 5054 | 5064 | ||
| 5055 | /* create default unbound wq attrs */ | 5065 | /* create default unbound and ordered wq attrs */ |
| 5056 | for (i = 0; i < NR_STD_WORKER_POOLS; i++) { | 5066 | for (i = 0; i < NR_STD_WORKER_POOLS; i++) { |
| 5057 | struct workqueue_attrs *attrs; | 5067 | struct workqueue_attrs *attrs; |
| 5058 | 5068 | ||
| 5059 | BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL))); | 5069 | BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL))); |
| 5060 | attrs->nice = std_nice[i]; | 5070 | attrs->nice = std_nice[i]; |
| 5061 | unbound_std_wq_attrs[i] = attrs; | 5071 | unbound_std_wq_attrs[i] = attrs; |
| 5072 | |||
| 5073 | /* | ||
| 5074 | * An ordered wq should have only one pwq as ordering is | ||
| 5075 | * guaranteed by max_active which is enforced by pwqs. | ||
| 5076 | * Turn off NUMA so that dfl_pwq is used for all nodes. | ||
| 5077 | */ | ||
| 5078 | BUG_ON(!(attrs = alloc_workqueue_attrs(GFP_KERNEL))); | ||
| 5079 | attrs->nice = std_nice[i]; | ||
| 5080 | attrs->no_numa = true; | ||
| 5081 | ordered_wq_attrs[i] = attrs; | ||
| 5062 | } | 5082 | } |
| 5063 | 5083 | ||
| 5064 | system_wq = alloc_workqueue("events", 0, 0); | 5084 | system_wq = alloc_workqueue("events", 0, 0); |
