aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/padata.c
diff options
context:
space:
mode:
authorHerbert Xu <herbert@gondor.apana.org.au>2019-07-18 11:01:46 -0400
committerHerbert Xu <herbert@gondor.apana.org.au>2019-07-27 07:08:36 -0400
commit6fc4dbcf0276279d488c5fbbfabe94734134f4fa (patch)
tree3b7912a87e273ec7be4b8e2306e678c19556bdcb /kernel/padata.c
parent97ac82d9135d989c81268cb0a02d336993b6f9cf (diff)
padata: Replace delayed timer with immediate workqueue in padata_reorder
The function padata_reorder will use a timer when it cannot progress while completed jobs are outstanding (pd->reorder_objects > 0). This is suboptimal as if we do end up using the timer then it would have introduced a gratuitous delay of one second. In fact we can easily distinguish between whether completed jobs are outstanding and whether we can make progress. All we have to do is look at the next pqueue list. This patch does that by replacing pd->processed with pd->cpu so that the next pqueue is more accessible. A work queue is used instead of the original try_again to avoid hogging the CPU. Note that we don't bother removing the work queue in padata_flush_queues because the whole premise is broken. You cannot flush async crypto requests so it makes no sense to even try. A subsequent patch will fix it by replacing it with a ref counting scheme. Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'kernel/padata.c')
-rw-r--r--kernel/padata.c97
1 files changed, 18 insertions, 79 deletions
diff --git a/kernel/padata.c b/kernel/padata.c
index 15a8ad63f4ff..fbafca18597f 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -165,23 +165,12 @@ EXPORT_SYMBOL(padata_do_parallel);
165 */ 165 */
166static struct padata_priv *padata_get_next(struct parallel_data *pd) 166static struct padata_priv *padata_get_next(struct parallel_data *pd)
167{ 167{
168 int cpu, num_cpus;
169 unsigned int next_nr, next_index;
170 struct padata_parallel_queue *next_queue; 168 struct padata_parallel_queue *next_queue;
171 struct padata_priv *padata; 169 struct padata_priv *padata;
172 struct padata_list *reorder; 170 struct padata_list *reorder;
171 int cpu = pd->cpu;
173 172
174 num_cpus = cpumask_weight(pd->cpumask.pcpu);
175
176 /*
177 * Calculate the percpu reorder queue and the sequence
178 * number of the next object.
179 */
180 next_nr = pd->processed;
181 next_index = next_nr % num_cpus;
182 cpu = padata_index_to_cpu(pd, next_index);
183 next_queue = per_cpu_ptr(pd->pqueue, cpu); 173 next_queue = per_cpu_ptr(pd->pqueue, cpu);
184
185 reorder = &next_queue->reorder; 174 reorder = &next_queue->reorder;
186 175
187 spin_lock(&reorder->lock); 176 spin_lock(&reorder->lock);
@@ -192,7 +181,8 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
192 list_del_init(&padata->list); 181 list_del_init(&padata->list);
193 atomic_dec(&pd->reorder_objects); 182 atomic_dec(&pd->reorder_objects);
194 183
195 pd->processed++; 184 pd->cpu = cpumask_next_wrap(cpu, pd->cpumask.pcpu, -1,
185 false);
196 186
197 spin_unlock(&reorder->lock); 187 spin_unlock(&reorder->lock);
198 goto out; 188 goto out;
@@ -215,6 +205,7 @@ static void padata_reorder(struct parallel_data *pd)
215 struct padata_priv *padata; 205 struct padata_priv *padata;
216 struct padata_serial_queue *squeue; 206 struct padata_serial_queue *squeue;
217 struct padata_instance *pinst = pd->pinst; 207 struct padata_instance *pinst = pd->pinst;
208 struct padata_parallel_queue *next_queue;
218 209
219 /* 210 /*
220 * We need to ensure that only one cpu can work on dequeueing of 211 * We need to ensure that only one cpu can work on dequeueing of
@@ -246,7 +237,6 @@ static void padata_reorder(struct parallel_data *pd)
246 * so exit immediately. 237 * so exit immediately.
247 */ 238 */
248 if (PTR_ERR(padata) == -ENODATA) { 239 if (PTR_ERR(padata) == -ENODATA) {
249 del_timer(&pd->timer);
250 spin_unlock_bh(&pd->lock); 240 spin_unlock_bh(&pd->lock);
251 return; 241 return;
252 } 242 }
@@ -265,70 +255,29 @@ static void padata_reorder(struct parallel_data *pd)
265 255
266 /* 256 /*
267 * The next object that needs serialization might have arrived to 257 * The next object that needs serialization might have arrived to
268 * the reorder queues in the meantime, we will be called again 258 * the reorder queues in the meantime.
269 * from the timer function if no one else cares for it.
270 * 259 *
271 * Ensure reorder_objects is read after pd->lock is dropped so we see 260 * Ensure reorder queue is read after pd->lock is dropped so we see
272 * an increment from another task in padata_do_serial. Pairs with 261 * new objects from another task in padata_do_serial. Pairs with
273 * smp_mb__after_atomic in padata_do_serial. 262 * smp_mb__after_atomic in padata_do_serial.
274 */ 263 */
275 smp_mb(); 264 smp_mb();
276 if (atomic_read(&pd->reorder_objects)
277 && !(pinst->flags & PADATA_RESET))
278 mod_timer(&pd->timer, jiffies + HZ);
279 else
280 del_timer(&pd->timer);
281 265
282 return; 266 next_queue = per_cpu_ptr(pd->pqueue, pd->cpu);
267 if (!list_empty(&next_queue->reorder.list))
268 queue_work(pinst->wq, &pd->reorder_work);
283} 269}
284 270
285static void invoke_padata_reorder(struct work_struct *work) 271static void invoke_padata_reorder(struct work_struct *work)
286{ 272{
287 struct padata_parallel_queue *pqueue;
288 struct parallel_data *pd; 273 struct parallel_data *pd;
289 274
290 local_bh_disable(); 275 local_bh_disable();
291 pqueue = container_of(work, struct padata_parallel_queue, reorder_work); 276 pd = container_of(work, struct parallel_data, reorder_work);
292 pd = pqueue->pd;
293 padata_reorder(pd); 277 padata_reorder(pd);
294 local_bh_enable(); 278 local_bh_enable();
295} 279}
296 280
297static void padata_reorder_timer(struct timer_list *t)
298{
299 struct parallel_data *pd = from_timer(pd, t, timer);
300 unsigned int weight;
301 int target_cpu, cpu;
302
303 cpu = get_cpu();
304
305 /* We don't lock pd here to not interfere with parallel processing
306 * padata_reorder() calls on other CPUs. We just need any CPU out of
307 * the cpumask.pcpu set. It would be nice if it's the right one but
308 * it doesn't matter if we're off to the next one by using an outdated
309 * pd->processed value.
310 */
311 weight = cpumask_weight(pd->cpumask.pcpu);
312 target_cpu = padata_index_to_cpu(pd, pd->processed % weight);
313
314 /* ensure to call the reorder callback on the correct CPU */
315 if (cpu != target_cpu) {
316 struct padata_parallel_queue *pqueue;
317 struct padata_instance *pinst;
318
319 /* The timer function is serialized wrt itself -- no locking
320 * needed.
321 */
322 pinst = pd->pinst;
323 pqueue = per_cpu_ptr(pd->pqueue, target_cpu);
324 queue_work_on(target_cpu, pinst->wq, &pqueue->reorder_work);
325 } else {
326 padata_reorder(pd);
327 }
328
329 put_cpu();
330}
331
332static void padata_serial_worker(struct work_struct *serial_work) 281static void padata_serial_worker(struct work_struct *serial_work)
333{ 282{
334 struct padata_serial_queue *squeue; 283 struct padata_serial_queue *squeue;
@@ -376,9 +325,8 @@ void padata_do_serial(struct padata_priv *padata)
376 325
377 cpu = get_cpu(); 326 cpu = get_cpu();
378 327
379 /* We need to run on the same CPU padata_do_parallel(.., padata, ..) 328 /* We need to enqueue the padata object into the correct
380 * was called on -- or, at least, enqueue the padata object into the 329 * per-cpu queue.
381 * correct per-cpu queue.
382 */ 330 */
383 if (cpu != padata->cpu) { 331 if (cpu != padata->cpu) {
384 reorder_via_wq = 1; 332 reorder_via_wq = 1;
@@ -388,12 +336,12 @@ void padata_do_serial(struct padata_priv *padata)
388 pqueue = per_cpu_ptr(pd->pqueue, cpu); 336 pqueue = per_cpu_ptr(pd->pqueue, cpu);
389 337
390 spin_lock(&pqueue->reorder.lock); 338 spin_lock(&pqueue->reorder.lock);
391 atomic_inc(&pd->reorder_objects);
392 list_add_tail(&padata->list, &pqueue->reorder.list); 339 list_add_tail(&padata->list, &pqueue->reorder.list);
340 atomic_inc(&pd->reorder_objects);
393 spin_unlock(&pqueue->reorder.lock); 341 spin_unlock(&pqueue->reorder.lock);
394 342
395 /* 343 /*
396 * Ensure the atomic_inc of reorder_objects above is ordered correctly 344 * Ensure the addition to the reorder list is ordered correctly
397 * with the trylock of pd->lock in padata_reorder. Pairs with smp_mb 345 * with the trylock of pd->lock in padata_reorder. Pairs with smp_mb
398 * in padata_reorder. 346 * in padata_reorder.
399 */ 347 */
@@ -401,13 +349,7 @@ void padata_do_serial(struct padata_priv *padata)
401 349
402 put_cpu(); 350 put_cpu();
403 351
404 /* If we're running on the wrong CPU, call padata_reorder() via a 352 padata_reorder(pd);
405 * kernel worker.
406 */
407 if (reorder_via_wq)
408 queue_work_on(cpu, pd->pinst->wq, &pqueue->reorder_work);
409 else
410 padata_reorder(pd);
411} 353}
412EXPORT_SYMBOL(padata_do_serial); 354EXPORT_SYMBOL(padata_do_serial);
413 355
@@ -463,14 +405,12 @@ static void padata_init_pqueues(struct parallel_data *pd)
463 continue; 405 continue;
464 } 406 }
465 407
466 pqueue->pd = pd;
467 pqueue->cpu_index = cpu_index; 408 pqueue->cpu_index = cpu_index;
468 cpu_index++; 409 cpu_index++;
469 410
470 __padata_list_init(&pqueue->reorder); 411 __padata_list_init(&pqueue->reorder);
471 __padata_list_init(&pqueue->parallel); 412 __padata_list_init(&pqueue->parallel);
472 INIT_WORK(&pqueue->work, padata_parallel_worker); 413 INIT_WORK(&pqueue->work, padata_parallel_worker);
473 INIT_WORK(&pqueue->reorder_work, invoke_padata_reorder);
474 atomic_set(&pqueue->num_obj, 0); 414 atomic_set(&pqueue->num_obj, 0);
475 } 415 }
476} 416}
@@ -498,12 +438,13 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
498 438
499 padata_init_pqueues(pd); 439 padata_init_pqueues(pd);
500 padata_init_squeues(pd); 440 padata_init_squeues(pd);
501 timer_setup(&pd->timer, padata_reorder_timer, 0);
502 atomic_set(&pd->seq_nr, -1); 441 atomic_set(&pd->seq_nr, -1);
503 atomic_set(&pd->reorder_objects, 0); 442 atomic_set(&pd->reorder_objects, 0);
504 atomic_set(&pd->refcnt, 0); 443 atomic_set(&pd->refcnt, 0);
505 pd->pinst = pinst; 444 pd->pinst = pinst;
506 spin_lock_init(&pd->lock); 445 spin_lock_init(&pd->lock);
446 pd->cpu = cpumask_first(pcpumask);
447 INIT_WORK(&pd->reorder_work, invoke_padata_reorder);
507 448
508 return pd; 449 return pd;
509 450
@@ -538,8 +479,6 @@ static void padata_flush_queues(struct parallel_data *pd)
538 flush_work(&pqueue->work); 479 flush_work(&pqueue->work);
539 } 480 }
540 481
541 del_timer_sync(&pd->timer);
542
543 if (atomic_read(&pd->reorder_objects)) 482 if (atomic_read(&pd->reorder_objects))
544 padata_reorder(pd); 483 padata_reorder(pd);
545 484