aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/padata.c
diff options
context:
space:
mode:
authorDaniel Jordan <daniel.m.jordan@oracle.com>2019-09-05 21:40:28 -0400
committerHerbert Xu <herbert@gondor.apana.org.au>2019-09-13 07:15:40 -0400
commitbfde23ce200e6d33291d29b9b8b60cc2f30f0805 (patch)
tree697ddcbc28e4709bb70f82e4905cf06101f1f8c5 /kernel/padata.c
parent45d153c08bc73c8ced640dc20d8f2b749a6cb0d0 (diff)
padata: unbind parallel jobs from specific CPUs
Padata binds the parallel part of a job to a single CPU and round-robins over all CPUs in the system for each successive job. Though the serial parts rely on per-CPU queues for correct ordering, they're not necessary for parallel work, and it improves performance to run the job locally on NUMA machines and let the scheduler pick the CPU within a node on a busy system. So, make the parallel workqueue unbound. Update the parallel workqueue's cpumask when the instance's parallel cpumask changes. Now that parallel jobs no longer run on max_active=1 workqueues, two or more parallel works that hash to the same CPU may run simultaneously, finish out of order, and so be serialized out of order. Prevent this by keeping the works sorted on the reorder list by sequence number and checking that in the reordering logic. padata_get_next becomes padata_find_next so it can be reused for the end of padata_reorder, where it's used to avoid uselessly queueing work when the next job by sequence number isn't finished yet but a later job that hashed to the same CPU has. The ENODATA case in padata_find_next no longer makes sense because parallel jobs aren't bound to specific CPUs. The EINPROGRESS case takes care of the scenario where a parallel job is potentially running on the same CPU as padata_find_next, and with only one error code left, just use NULL instead. Signed-off-by: Daniel Jordan <daniel.m.jordan@oracle.com> Cc: Herbert Xu <herbert@gondor.apana.org.au> Cc: Lai Jiangshan <jiangshanlai@gmail.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Steffen Klassert <steffen.klassert@secunet.com> Cc: Tejun Heo <tj@kernel.org> Cc: linux-crypto@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'kernel/padata.c')
-rw-r--r--kernel/padata.c118
1 files changed, 65 insertions, 53 deletions
diff --git a/kernel/padata.c b/kernel/padata.c
index 669f5d53d357..832224dcf2e1 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -46,18 +46,13 @@ static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
46 return target_cpu; 46 return target_cpu;
47} 47}
48 48
49static int padata_cpu_hash(struct parallel_data *pd) 49static int padata_cpu_hash(struct parallel_data *pd, unsigned int seq_nr)
50{ 50{
51 unsigned int seq_nr;
52 int cpu_index;
53
54 /* 51 /*
55 * Hash the sequence numbers to the cpus by taking 52 * Hash the sequence numbers to the cpus by taking
56 * seq_nr mod. number of cpus in use. 53 * seq_nr mod. number of cpus in use.
57 */ 54 */
58 55 int cpu_index = seq_nr % cpumask_weight(pd->cpumask.pcpu);
59 seq_nr = atomic_inc_return(&pd->seq_nr);
60 cpu_index = seq_nr % cpumask_weight(pd->cpumask.pcpu);
61 56
62 return padata_index_to_cpu(pd, cpu_index); 57 return padata_index_to_cpu(pd, cpu_index);
63} 58}
@@ -144,7 +139,8 @@ int padata_do_parallel(struct padata_instance *pinst,
144 padata->pd = pd; 139 padata->pd = pd;
145 padata->cb_cpu = *cb_cpu; 140 padata->cb_cpu = *cb_cpu;
146 141
147 target_cpu = padata_cpu_hash(pd); 142 padata->seq_nr = atomic_inc_return(&pd->seq_nr);
143 target_cpu = padata_cpu_hash(pd, padata->seq_nr);
148 padata->cpu = target_cpu; 144 padata->cpu = target_cpu;
149 queue = per_cpu_ptr(pd->pqueue, target_cpu); 145 queue = per_cpu_ptr(pd->pqueue, target_cpu);
150 146
@@ -152,7 +148,7 @@ int padata_do_parallel(struct padata_instance *pinst,
152 list_add_tail(&padata->list, &queue->parallel.list); 148 list_add_tail(&padata->list, &queue->parallel.list);
153 spin_unlock(&queue->parallel.lock); 149 spin_unlock(&queue->parallel.lock);
154 150
155 queue_work_on(target_cpu, pinst->parallel_wq, &queue->work); 151 queue_work(pinst->parallel_wq, &queue->work);
156 152
157out: 153out:
158 rcu_read_unlock_bh(); 154 rcu_read_unlock_bh();
@@ -162,21 +158,19 @@ out:
162EXPORT_SYMBOL(padata_do_parallel); 158EXPORT_SYMBOL(padata_do_parallel);
163 159
164/* 160/*
165 * padata_get_next - Get the next object that needs serialization. 161 * padata_find_next - Find the next object that needs serialization.
166 * 162 *
167 * Return values are: 163 * Return values are:
168 * 164 *
169 * A pointer to the control struct of the next object that needs 165 * A pointer to the control struct of the next object that needs
170 * serialization, if present in one of the percpu reorder queues. 166 * serialization, if present in one of the percpu reorder queues.
171 * 167 *
172 * -EINPROGRESS, if the next object that needs serialization will 168 * NULL, if the next object that needs serialization will
173 * be parallel processed by another cpu and is not yet present in 169 * be parallel processed by another cpu and is not yet present in
174 * the cpu's reorder queue. 170 * the cpu's reorder queue.
175 *
176 * -ENODATA, if this cpu has to do the parallel processing for
177 * the next object.
178 */ 171 */
179static struct padata_priv *padata_get_next(struct parallel_data *pd) 172static struct padata_priv *padata_find_next(struct parallel_data *pd,
173 bool remove_object)
180{ 174{
181 struct padata_parallel_queue *next_queue; 175 struct padata_parallel_queue *next_queue;
182 struct padata_priv *padata; 176 struct padata_priv *padata;
@@ -187,28 +181,30 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
187 reorder = &next_queue->reorder; 181 reorder = &next_queue->reorder;
188 182
189 spin_lock(&reorder->lock); 183 spin_lock(&reorder->lock);
190 if (!list_empty(&reorder->list)) { 184 if (list_empty(&reorder->list)) {
191 padata = list_entry(reorder->list.next, 185 spin_unlock(&reorder->lock);
192 struct padata_priv, list); 186 return NULL;
193 187 }
194 list_del_init(&padata->list);
195 atomic_dec(&pd->reorder_objects);
196 188
197 pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1, 189 padata = list_entry(reorder->list.next, struct padata_priv, list);
198 false);
199 190
191 /*
192 * Checks the rare case where two or more parallel jobs have hashed to
193 * the same CPU and one of the later ones finishes first.
194 */
195 if (padata->seq_nr != pd->processed) {
200 spin_unlock(&reorder->lock); 196 spin_unlock(&reorder->lock);
201 goto out; 197 return NULL;
202 } 198 }
203 spin_unlock(&reorder->lock);
204 199
205 if (__this_cpu_read(pd->pqueue->cpu_index) == next_queue->cpu_index) { 200 if (remove_object) {
206 padata = ERR_PTR(-ENODATA); 201 list_del_init(&padata->list);
207 goto out; 202 atomic_dec(&pd->reorder_objects);
203 ++pd->processed;
204 pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1, false);
208 } 205 }
209 206
210 padata = ERR_PTR(-EINPROGRESS); 207 spin_unlock(&reorder->lock);
211out:
212 return padata; 208 return padata;
213} 209}
214 210
@@ -234,26 +230,16 @@ static void padata_reorder(struct parallel_data *pd)
234 return; 230 return;
235 231
236 while (1) { 232 while (1) {
237 padata = padata_get_next(pd); 233 padata = padata_find_next(pd, true);
238 234
239 /* 235 /*
240 * If the next object that needs serialization is parallel 236 * If the next object that needs serialization is parallel
241 * processed by another cpu and is still on it's way to the 237 * processed by another cpu and is still on it's way to the
242 * cpu's reorder queue, nothing to do for now. 238 * cpu's reorder queue, nothing to do for now.
243 */ 239 */
244 if (PTR_ERR(padata) == -EINPROGRESS) 240 if (!padata)
245 break; 241 break;
246 242
247 /*
248 * This cpu has to do the parallel processing of the next
249 * object. It's waiting in the cpu's parallelization queue,
250 * so exit immediately.
251 */
252 if (PTR_ERR(padata) == -ENODATA) {
253 spin_unlock_bh(&pd->lock);
254 return;
255 }
256
257 cb_cpu = padata->cb_cpu; 243 cb_cpu = padata->cb_cpu;
258 squeue = per_cpu_ptr(pd->squeue, cb_cpu); 244 squeue = per_cpu_ptr(pd->squeue, cb_cpu);
259 245
@@ -277,7 +263,8 @@ static void padata_reorder(struct parallel_data *pd)
277 smp_mb(); 263 smp_mb();
278 264
279 next_queue = per_cpu_ptr(pd->pqueue, pd->cpu); 265 next_queue = per_cpu_ptr(pd->pqueue, pd->cpu);
280 if (!list_empty(&next_queue->reorder.list)) 266 if (!list_empty(&next_queue->reorder.list) &&
267 padata_find_next(pd, false))
281 queue_work(pinst->serial_wq, &pd->reorder_work); 268 queue_work(pinst->serial_wq, &pd->reorder_work);
282} 269}
283 270
@@ -332,9 +319,14 @@ void padata_do_serial(struct padata_priv *padata)
332 struct parallel_data *pd = padata->pd; 319 struct parallel_data *pd = padata->pd;
333 struct padata_parallel_queue *pqueue = per_cpu_ptr(pd->pqueue, 320 struct padata_parallel_queue *pqueue = per_cpu_ptr(pd->pqueue,
334 padata->cpu); 321 padata->cpu);
322 struct padata_priv *cur;
335 323
336 spin_lock(&pqueue->reorder.lock); 324 spin_lock(&pqueue->reorder.lock);
337 list_add_tail(&padata->list, &pqueue->reorder.list); 325 /* Sort in ascending order of sequence number. */
326 list_for_each_entry_reverse(cur, &pqueue->reorder.list, list)
327 if (cur->seq_nr < padata->seq_nr)
328 break;
329 list_add(&padata->list, &cur->list);
338 atomic_inc(&pd->reorder_objects); 330 atomic_inc(&pd->reorder_objects);
339 spin_unlock(&pqueue->reorder.lock); 331 spin_unlock(&pqueue->reorder.lock);
340 332
@@ -353,17 +345,36 @@ static int padata_setup_cpumasks(struct parallel_data *pd,
353 const struct cpumask *pcpumask, 345 const struct cpumask *pcpumask,
354 const struct cpumask *cbcpumask) 346 const struct cpumask *cbcpumask)
355{ 347{
356 if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL)) 348 struct workqueue_attrs *attrs;
357 return -ENOMEM; 349 int err = -ENOMEM;
358 350
351 if (!alloc_cpumask_var(&pd->cpumask.pcpu, GFP_KERNEL))
352 goto out;
359 cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask); 353 cpumask_and(pd->cpumask.pcpu, pcpumask, cpu_online_mask);
360 if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL)) {
361 free_cpumask_var(pd->cpumask.pcpu);
362 return -ENOMEM;
363 }
364 354
355 if (!alloc_cpumask_var(&pd->cpumask.cbcpu, GFP_KERNEL))
356 goto free_pcpu_mask;
365 cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_online_mask); 357 cpumask_and(pd->cpumask.cbcpu, cbcpumask, cpu_online_mask);
358
359 attrs = alloc_workqueue_attrs();
360 if (!attrs)
361 goto free_cbcpu_mask;
362
363 /* Restrict parallel_wq workers to pd->cpumask.pcpu. */
364 cpumask_copy(attrs->cpumask, pd->cpumask.pcpu);
365 err = apply_workqueue_attrs(pd->pinst->parallel_wq, attrs);
366 free_workqueue_attrs(attrs);
367 if (err < 0)
368 goto free_cbcpu_mask;
369
366 return 0; 370 return 0;
371
372free_cbcpu_mask:
373 free_cpumask_var(pd->cpumask.cbcpu);
374free_pcpu_mask:
375 free_cpumask_var(pd->cpumask.pcpu);
376out:
377 return err;
367} 378}
368 379
369static void __padata_list_init(struct padata_list *pd_list) 380static void __padata_list_init(struct padata_list *pd_list)
@@ -429,6 +440,8 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
429 pd->squeue = alloc_percpu(struct padata_serial_queue); 440 pd->squeue = alloc_percpu(struct padata_serial_queue);
430 if (!pd->squeue) 441 if (!pd->squeue)
431 goto err_free_pqueue; 442 goto err_free_pqueue;
443
444 pd->pinst = pinst;
432 if (padata_setup_cpumasks(pd, pcpumask, cbcpumask) < 0) 445 if (padata_setup_cpumasks(pd, pcpumask, cbcpumask) < 0)
433 goto err_free_squeue; 446 goto err_free_squeue;
434 447
@@ -437,7 +450,6 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
437 atomic_set(&pd->seq_nr, -1); 450 atomic_set(&pd->seq_nr, -1);
438 atomic_set(&pd->reorder_objects, 0); 451 atomic_set(&pd->reorder_objects, 0);
439 atomic_set(&pd->refcnt, 0); 452 atomic_set(&pd->refcnt, 0);
440 pd->pinst = pinst;
441 spin_lock_init(&pd->lock); 453 spin_lock_init(&pd->lock);
442 pd->cpu = cpumask_first(pd->cpumask.pcpu); 454 pd->cpu = cpumask_first(pd->cpumask.pcpu);
443 INIT_WORK(&pd->reorder_work, invoke_padata_reorder); 455 INIT_WORK(&pd->reorder_work, invoke_padata_reorder);
@@ -968,8 +980,8 @@ static struct padata_instance *padata_alloc(const char *name,
968 if (!pinst) 980 if (!pinst)
969 goto err; 981 goto err;
970 982
971 pinst->parallel_wq = alloc_workqueue("%s_parallel", WQ_MEM_RECLAIM | 983 pinst->parallel_wq = alloc_workqueue("%s_parallel", WQ_UNBOUND, 0,
972 WQ_CPU_INTENSIVE, 1, name); 984 name);
973 if (!pinst->parallel_wq) 985 if (!pinst->parallel_wq)
974 goto err_free_inst; 986 goto err_free_inst;
975 987