padata: unbind parallel jobs from specific CPUs

Padata binds the parallel part of a job to a single CPU and round-robins over all CPUs in the system for each successive job. Though the serial parts rely on per-CPU queues for correct ordering, they're not necessary for parallel work, and it improves performance to run the job locally on NUMA machines and let the scheduler pick the CPU within a node on a busy system. So, make the parallel workqueue unbound. Update the parallel workqueue's cpumask when the instance's parallel cpumask changes. Now that parallel jobs no longer run on max_active=1 workqueues, two or more parallel works that hash to the same CPU may run simultaneously, finish out of order, and so be serialized out of order. Prevent this by keeping the works sorted on the reorder list by sequence number and checking that in the reordering logic. padata_get_next becomes padata_find_next so it can be reused for the end of padata_reorder, where it's used to avoid uselessly queueing work when the next job by sequence number isn't finished yet but a later job that hashed to the same CPU has. The ENODATA case in padata_find_next no longer makes sense because parallel jobs aren't bound to specific CPUs. The EINPROGRESS case takes care of the scenario where a parallel job is potentially running on the same CPU as padata_find_next, and with only one error code left, just use NULL instead. Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com> Cc: Herbert Xu <herbert@gondor.apana.org.au> Cc: Lai Jiangshan <jiangshanlai@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Steffen Klassert <steffen.klassert@secunet.com> Cc: Tejun Heo <tj@kernel.org> Cc: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
author: Daniel Jordan <daniel.m.jordan@oracle.com> 2019-09-05 21:40:28 -0400
committer: Herbert Xu <herbert@gondor.apana.org.au> 2019-09-13 07:15:40 -0400
commit: bfde23ce200e6d33291d29b9b8b60cc2f30f0805 (patch)
tree: 697ddcbc28e4709bb70f82e4905cf06101f1f8c5 /kernel/padata.c
parent: 45d153c08bc73c8ced640dc20d8f2b749a6cb0d0 (diff)
1 files changed, 65 insertions, 53 deletions
diff --git a/kernel/padata.c b/kernel/padata.c
index 669f5d53d357..832224dcf2e1 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -46,18 +46,13 @@ static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
        return target_cpu;
 }
-static int padata_cpu_hash(struct parallel_data *pd)
+static int padata_cpu_hash(struct parallel_data *pd, unsigned int seq_nr)
 {
-        unsigned int seq_nr;
-        int cpu_index;
        /*
         * Hash the sequence numbers to the cpus by taking
         * seq_nr mod. number of cpus in use.
         */
+        int cpu_index = seq_nr % cpumask_weight(pd->cpumask.pcpu);
-        seq_nr = atomic_inc_return(&pd->seq_nr);
-        cpu_index = seq_nr % cpumask_weight(pd->cpumask.pcpu);
        return padata_index_to_cpu(pd, cpu_index);
 }
@@ -144,7 +139,8 @@ int padata_do_parallel(struct padata_instance *pinst,
        padata->pd = pd;
        padata->cb_cpu = *cb_cpu;
-        target_cpu = padata_cpu_hash(pd);
+        padata->seq_nr = atomic_inc_return(&pd->seq_nr);
+        target_cpu = padata_cpu_hash(pd, padata->seq_nr);
        padata->cpu = target_cpu;
        queue = per_cpu_ptr(pd->pqueue, target_cpu);
@@ -152,7 +148,7 @@ int padata_do_parallel(struct padata_instance *pinst,
        list_add_tail(&padata->list, &queue->parallel.list);
        spin_unlock(&queue->parallel.lock);
-        queue_work_on(target_cpu, pinst->parallel_wq, &queue->work);
+        queue_work(pinst->parallel_wq, &queue->work);
 out:
        rcu_read_unlock_bh();
@@ -162,21 +158,19 @@ out:
 EXPORT_SYMBOL(padata_do_parallel);
 /*
- * padata_get_next - Get the next object that needs serialization.
+ * padata_find_next - Find the next object that needs serialization.
 *
 * Return values are:
 *
 * A pointer to the control struct of the next object that needs
 * serialization, if present in one of the percpu reorder queues.
 *
- * -EINPROGRESS, if the next object that needs serialization will
+ * NULL, if the next object that needs serialization will
 *  be parallel processed by another cpu and is not yet present in
 *  the cpu's reorder queue.
- *
- * -ENODATA, if this cpu has to do the parallel processing for
- *  the next object.
 */
-static struct padata_priv *padata_get_next(struct parallel_data *pd)
+static struct padata_priv *padata_find_next(struct parallel_data *pd,
+                                            bool remove_object)
 {
        struct padata_parallel_queue *next_queue;
        struct padata_priv *padata;
@@ -187,28 +181,30 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
        reorder = &next_queue->reorder;
        spin_lock(&reorder->lock);
-        if (!list_empty(&reorder->list)) {
+        if (list_empty(&reorder->list)) {
-                padata = list_entry(reorder->list.next,
+                spin_unlock(&reorder->lock);
-                                    struct padata_priv, list);
+                return NULL;
+        }
-                list_del_init(&padata->list);
-                atomic_dec(&pd->reorder_objects);
-                pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1,
+        padata = list_entry(reorder->list.next, struct padata_priv, list);
-                                            false);
+        /*
+         * Checks the rare case where two or more parallel jobs have hashed to
+         * the same CPU and one of the later ones finishes first.
+         */
+        if (padata->seq_nr != pd->processed) {
                spin_unlock(&reorder->lock);
-                goto out;
+                return NULL;
        }
-        spin_unlock(&reorder->lock);
-        if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) {
+        if (remove_object) {
-                padata = ERR_PTR(-ENODATA);
+                list_del_init(&padata->list);
-                goto out;
+                atomic_dec(&pd->reorder_objects);
+                ++pd->processed;
+                pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1, false);
        }
-        padata = ERR_PTR(-EINPROGRESS);
+        spin_unlock(&reorder->lock);
-out:
        return padata;
 }
@@ -234,26 +230,16 @@ static void padata_reorder(struct parallel_data *pd)
                return;
        while (1) {
-                padata = padata_get_next(pd);
+                padata = padata_find_next(pd, true);
                /*
                 * If the next object that needs serialization is parallel
                 * processed by another cpu and is still on it's way to the
                 * cpu's reorder queue, nothing to do for now.
                 */
-                if (PTR_ERR(padata) == -EINPROGRESS)
+                if (!padata)
                        break;
-                /*
-                 * This cpu has to do the parallel processing of the next
-                 * object. It's waiting in the cpu's parallelization queue,
-                 * so exit immediately.
-                 */
-                if (PTR_ERR(padata) == -ENODATA) {
-                        spin_unlock_bh(&pd->lock);
-                        return;
-                }
                cb_cpu = padata->cb_cpu;
                squeue = per_cpu_ptr(pd->squeue, cb_cpu);
@@ -277,7 +263,8 @@ static void padata_reorder(struct parallel_data *pd)
        smp_mb();
        next_queue = per_cpu_ptr(pd->pqueue, pd->cpu);
-        if (!list_empty(&next_queue->reorder.list))
+        if (!list_empty(&next_queue->reorder.list) &&
+            padata_find_next(pd, false))
                queue_work(pinst->serial_wq, &pd->reorder_work);
 }
@@ -332,9 +319,14 @@ void padata_do_serial(struct padata_priv *padata)
        struct parallel_data *pd = padata->pd;
        struct padata_parallel_queue *pqueue = per_cpu_ptr(pd->pqueue,
                                                           padata->cpu);
+        struct padata_priv *cur;
        spin_lock(&pqueue->reorder.lock);
-        list_add_tail(&padata->list, &pqueue->reorder.list);
+        /* Sort in ascending order of sequence number. */
+        list_for_each_entry_reverse(cur, &pqueue->reorder.list, list)
+                if (cur->seq_nr < padata->seq_nr)
+                        break;
+        list_add(&padata->list, &cur->list);
        atomic_inc(&pd->reorder_objects);
        spin_unlock(&pqueue->reorder.lock);
@@ -353,17 +345,36 @@ static int padata_setup_cpumasks(struct parallel_data *pd,
                                 const struct cpumask *pcpumask,
                                 const struct cpumask *cbcpumask)
 {
-        if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
+        struct workqueue_attrs *attrs;
-                return -ENOMEM;
+        int err = -ENOMEM;
+        if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
+                goto out;
        cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask);
-        if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) {
-                free_cpumask_var(pd->cpumask.pcpu);
-                return -ENOMEM;
-        }
+        if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL))
+                goto free_pcpu_mask;
        cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_online_mask);
+        attrs = alloc_workqueue_attrs();
+        if (!attrs)
+                goto free_cbcpu_mask;
+        /* Restrict parallel_wq workers to pd->cpumask.pcpu. */
+        cpumask_copy(attrs->cpumask, pd->cpumask.pcpu);
+        err = apply_workqueue_attrs(pd->pinst->parallel_wq, attrs);
+        free_workqueue_attrs(attrs);
+        if (err < 0)
+                goto free_cbcpu_mask;
        return 0;
+free_cbcpu_mask:
+        free_cpumask_var(pd->cpumask.cbcpu);
+free_pcpu_mask:
+        free_cpumask_var(pd->cpumask.pcpu);
+out:
+        return err;
 }
 static void __padata_list_init(struct padata_list *pd_list)
@@ -429,6 +440,8 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
        pd->squeue = alloc_percpu(struct padata_serial_queue);
        if (!pd->squeue)
                goto err_free_pqueue;
+        pd->pinst = pinst;
        if (padata_setup_cpumasks(pd, pcpumask, cbcpumask) < 0)
                goto err_free_squeue;
@@ -437,7 +450,6 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
        atomic_set(&pd->seq_nr, -1);
        atomic_set(&pd->reorder_objects, 0);
        atomic_set(&pd->refcnt, 0);
-        pd->pinst = pinst;
        spin_lock_init(&pd->lock);
        pd->cpu = cpumask_first(pd->cpumask.pcpu);
        INIT_WORK(&pd->reorder_work, invoke_padata_reorder);
@@ -968,8 +980,8 @@ static struct padata_instance *padata_alloc(const char *name,
        if (!pinst)
                goto err;
-        pinst->parallel_wq = alloc_workqueue("%s_parallel", WQ_MEM_RECLAIM |
+        pinst->parallel_wq = alloc_workqueue("%s_parallel", WQ_UNBOUND, 0,
-                                             WQ_CPU_INTENSIVE, 1, name);
+                                             name);
        if (!pinst->parallel_wq)
                goto err_free_inst;
author	Daniel Jordan <daniel.m.jordan@oracle.com>	2019-09-05 21:40:28 -0400
committer	Herbert Xu <herbert@gondor.apana.org.au>	2019-09-13 07:15:40 -0400
commit	bfde23ce200e6d33291d29b9b8b60cc2f30f0805 (patch)
tree	697ddcbc28e4709bb70f82e4905cf06101f1f8c5 /kernel/padata.c
parent	45d153c08bc73c8ced640dc20d8f2b749a6cb0d0 (diff)

diff --git a/kernel/padata.c b/kernel/padata.c index 669f5d53d357..832224dcf2e1 100644 --- a/kernel/padata.c +++ b/kernel/padata.c
@@ -46,18 +46,13 @@ static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
46	return target_cpu;	46	return target_cpu;
47	}	47	}
48		48
49	static int padata_cpu_hash(struct parallel_data *pd)	49	static int padata_cpu_hash(struct parallel_data *pd, unsigned int seq_nr)
50	{	50	{
51	unsigned int seq_nr;
52	int cpu_index;
53
54	/*	51	/*
55	* Hash the sequence numbers to the cpus by taking	52	* Hash the sequence numbers to the cpus by taking
56	* seq_nr mod. number of cpus in use.	53	* seq_nr mod. number of cpus in use.
57	*/	54	*/
58		55	int cpu_index = seq_nr % cpumask_weight(pd->cpumask.pcpu);
59	seq_nr = atomic_inc_return(&pd->seq_nr);
60	cpu_index = seq_nr % cpumask_weight(pd->cpumask.pcpu);
61		56
62	return padata_index_to_cpu(pd, cpu_index);	57	return padata_index_to_cpu(pd, cpu_index);
63	}	58	}
@@ -144,7 +139,8 @@ int padata_do_parallel(struct padata_instance *pinst,
144	padata->pd = pd;	139	padata->pd = pd;
145	padata->cb_cpu = *cb_cpu;	140	padata->cb_cpu = *cb_cpu;
146		141
147	target_cpu = padata_cpu_hash(pd);	142	padata->seq_nr = atomic_inc_return(&pd->seq_nr);
		143	target_cpu = padata_cpu_hash(pd, padata->seq_nr);
148	padata->cpu = target_cpu;	144	padata->cpu = target_cpu;
149	queue = per_cpu_ptr(pd->pqueue, target_cpu);	145	queue = per_cpu_ptr(pd->pqueue, target_cpu);
150		146
@@ -152,7 +148,7 @@ int padata_do_parallel(struct padata_instance *pinst,
152	list_add_tail(&padata->list, &queue->parallel.list);	148	list_add_tail(&padata->list, &queue->parallel.list);
153	spin_unlock(&queue->parallel.lock);	149	spin_unlock(&queue->parallel.lock);
154		150
155	queue_work_on(target_cpu, pinst->parallel_wq, &queue->work);	151	queue_work(pinst->parallel_wq, &queue->work);
156		152
157	out:	153	out:
158	rcu_read_unlock_bh();	154	rcu_read_unlock_bh();
@@ -162,21 +158,19 @@ out:
162	EXPORT_SYMBOL(padata_do_parallel);	158	EXPORT_SYMBOL(padata_do_parallel);
163		159
164	/*	160	/*
165	* padata_get_next - Get the next object that needs serialization.	161	* padata_find_next - Find the next object that needs serialization.
166	*	162	*
167	* Return values are:	163	* Return values are:
168	*	164	*
169	* A pointer to the control struct of the next object that needs	165	* A pointer to the control struct of the next object that needs
170	* serialization, if present in one of the percpu reorder queues.	166	* serialization, if present in one of the percpu reorder queues.
171	*	167	*
172	* -EINPROGRESS, if the next object that needs serialization will	168	* NULL, if the next object that needs serialization will
173	* be parallel processed by another cpu and is not yet present in	169	* be parallel processed by another cpu and is not yet present in
174	* the cpu's reorder queue.	170	* the cpu's reorder queue.
175	*
176	* -ENODATA, if this cpu has to do the parallel processing for
177	* the next object.
178	*/	171	*/
179	static struct padata_priv padata_get_next(struct parallel_data pd)	172	static struct padata_priv padata_find_next(struct parallel_data pd,
		173	bool remove_object)
180	{	174	{
181	struct padata_parallel_queue *next_queue;	175	struct padata_parallel_queue *next_queue;
182	struct padata_priv *padata;	176	struct padata_priv *padata;
@@ -187,28 +181,30 @@ static struct padata_priv padata_get_next(struct parallel_data pd)
187	reorder = &next_queue->reorder;	181	reorder = &next_queue->reorder;
188		182
189	spin_lock(&reorder->lock);	183	spin_lock(&reorder->lock);
190	if (!list_empty(&reorder->list)) {	184	if (list_empty(&reorder->list)) {
191	padata = list_entry(reorder->list.next,	185	spin_unlock(&reorder->lock);
192	struct padata_priv, list);	186	return NULL;
193		187	}
194	list_del_init(&padata->list);
195	atomic_dec(&pd->reorder_objects);
196		188
197	pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1,	189	padata = list_entry(reorder->list.next, struct padata_priv, list);
198	false);
199		190
		191	/*
		192	* Checks the rare case where two or more parallel jobs have hashed to
		193	* the same CPU and one of the later ones finishes first.
		194	*/
		195	if (padata->seq_nr != pd->processed) {
200	spin_unlock(&reorder->lock);	196	spin_unlock(&reorder->lock);
201	goto out;	197	return NULL;
202	}	198	}
203	spin_unlock(&reorder->lock);
204		199
205	if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) {	200	if (remove_object) {
206	padata = ERR_PTR(-ENODATA);	201	list_del_init(&padata->list);
207	goto out;	202	atomic_dec(&pd->reorder_objects);
		203	++pd->processed;
		204	pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1, false);
208	}	205	}
209		206
210	padata = ERR_PTR(-EINPROGRESS);	207	spin_unlock(&reorder->lock);
211	out:
212	return padata;	208	return padata;
213	}	209	}
214		210
@@ -234,26 +230,16 @@ static void padata_reorder(struct parallel_data *pd)
234	return;	230	return;
235		231
236	while (1) {	232	while (1) {
237	padata = padata_get_next(pd);	233	padata = padata_find_next(pd, true);
238		234
239	/*	235	/*
240	* If the next object that needs serialization is parallel	236	* If the next object that needs serialization is parallel
241	* processed by another cpu and is still on it's way to the	237	* processed by another cpu and is still on it's way to the
242	* cpu's reorder queue, nothing to do for now.	238	* cpu's reorder queue, nothing to do for now.
243	*/	239	*/
244	if (PTR_ERR(padata) == -EINPROGRESS)	240	if (!padata)
245	break;	241	break;
246		242
247	/*
248	* This cpu has to do the parallel processing of the next
249	* object. It's waiting in the cpu's parallelization queue,
250	* so exit immediately.
251	*/
252	if (PTR_ERR(padata) == -ENODATA) {
253	spin_unlock_bh(&pd->lock);
254	return;
255	}
256
257	cb_cpu = padata->cb_cpu;	243	cb_cpu = padata->cb_cpu;
258	squeue = per_cpu_ptr(pd->squeue, cb_cpu);	244	squeue = per_cpu_ptr(pd->squeue, cb_cpu);
259		245
@@ -277,7 +263,8 @@ static void padata_reorder(struct parallel_data *pd)
277	smp_mb();	263	smp_mb();
278		264
279	next_queue = per_cpu_ptr(pd->pqueue, pd->cpu);	265	next_queue = per_cpu_ptr(pd->pqueue, pd->cpu);
280	if (!list_empty(&next_queue->reorder.list))	266	if (!list_empty(&next_queue->reorder.list) &&
		267	padata_find_next(pd, false))
281	queue_work(pinst->serial_wq, &pd->reorder_work);	268	queue_work(pinst->serial_wq, &pd->reorder_work);
282	}	269	}
283		270
@@ -332,9 +319,14 @@ void padata_do_serial(struct padata_priv *padata)
332	struct parallel_data *pd = padata->pd;	319	struct parallel_data *pd = padata->pd;
333	struct padata_parallel_queue *pqueue = per_cpu_ptr(pd->pqueue,	320	struct padata_parallel_queue *pqueue = per_cpu_ptr(pd->pqueue,
334	padata->cpu);	321	padata->cpu);
		322	struct padata_priv *cur;
335		323
336	spin_lock(&pqueue->reorder.lock);	324	spin_lock(&pqueue->reorder.lock);
337	list_add_tail(&padata->list, &pqueue->reorder.list);	325	/* Sort in ascending order of sequence number. */
		326	list_for_each_entry_reverse(cur, &pqueue->reorder.list, list)
		327	if (cur->seq_nr < padata->seq_nr)
		328	break;
		329	list_add(&padata->list, &cur->list);
338	atomic_inc(&pd->reorder_objects);	330	atomic_inc(&pd->reorder_objects);
339	spin_unlock(&pqueue->reorder.lock);	331	spin_unlock(&pqueue->reorder.lock);
340		332
@@ -353,17 +345,36 @@ static int padata_setup_cpumasks(struct parallel_data *pd,
353	const struct cpumask *pcpumask,	345	const struct cpumask *pcpumask,
354	const struct cpumask *cbcpumask)	346	const struct cpumask *cbcpumask)
355	{	347	{
356	if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))	348	struct workqueue_attrs *attrs;
357	return -ENOMEM;	349	int err = -ENOMEM;
358		350
		351	if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
		352	goto out;
359	cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask);	353	cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask);
360	if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) {
361	free_cpumask_var(pd->cpumask.pcpu);
362	return -ENOMEM;
363	}
364		354
		355	if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL))
		356	goto free_pcpu_mask;
365	cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_online_mask);	357	cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_online_mask);
		358
		359	attrs = alloc_workqueue_attrs();
		360	if (!attrs)
		361	goto free_cbcpu_mask;
		362
		363	/* Restrict parallel_wq workers to pd->cpumask.pcpu. */
		364	cpumask_copy(attrs->cpumask, pd->cpumask.pcpu);
		365	err = apply_workqueue_attrs(pd->pinst->parallel_wq, attrs);
		366	free_workqueue_attrs(attrs);
		367	if (err < 0)
		368	goto free_cbcpu_mask;
		369
366	return 0;	370	return 0;
		371
		372	free_cbcpu_mask:
		373	free_cpumask_var(pd->cpumask.cbcpu);
		374	free_pcpu_mask:
		375	free_cpumask_var(pd->cpumask.pcpu);
		376	out:
		377	return err;
367	}	378	}
368		379
369	static void __padata_list_init(struct padata_list *pd_list)	380	static void __padata_list_init(struct padata_list *pd_list)
@@ -429,6 +440,8 @@ static struct parallel_data padata_alloc_pd(struct padata_instance pinst,
429	pd->squeue = alloc_percpu(struct padata_serial_queue);	440	pd->squeue = alloc_percpu(struct padata_serial_queue);
430	if (!pd->squeue)	441	if (!pd->squeue)
431	goto err_free_pqueue;	442	goto err_free_pqueue;
		443
		444	pd->pinst = pinst;
432	if (padata_setup_cpumasks(pd, pcpumask, cbcpumask) < 0)	445	if (padata_setup_cpumasks(pd, pcpumask, cbcpumask) < 0)
433	goto err_free_squeue;	446	goto err_free_squeue;
434		447
@@ -437,7 +450,6 @@ static struct parallel_data padata_alloc_pd(struct padata_instance pinst,
437	atomic_set(&pd->seq_nr, -1);	450	atomic_set(&pd->seq_nr, -1);
438	atomic_set(&pd->reorder_objects, 0);	451	atomic_set(&pd->reorder_objects, 0);
439	atomic_set(&pd->refcnt, 0);	452	atomic_set(&pd->refcnt, 0);
440	pd->pinst = pinst;
441	spin_lock_init(&pd->lock);	453	spin_lock_init(&pd->lock);
442	pd->cpu = cpumask_first(pd->cpumask.pcpu);	454	pd->cpu = cpumask_first(pd->cpumask.pcpu);
443	INIT_WORK(&pd->reorder_work, invoke_padata_reorder);	455	INIT_WORK(&pd->reorder_work, invoke_padata_reorder);
@@ -968,8 +980,8 @@ static struct padata_instance padata_alloc(const char name,
968	if (!pinst)	980	if (!pinst)
969	goto err;	981	goto err;
970		982
971	pinst->parallel_wq = alloc_workqueue("%s_parallel", WQ_MEM_RECLAIM \|	983	pinst->parallel_wq = alloc_workqueue("%s_parallel", WQ_UNBOUND, 0,
972	WQ_CPU_INTENSIVE, 1, name);	984	name);
973	if (!pinst->parallel_wq)	985	if (!pinst->parallel_wq)
974	goto err_free_inst;	986	goto err_free_inst;
975		987