aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/padata.c185
1 files changed, 131 insertions, 54 deletions
diff --git a/kernel/padata.c b/kernel/padata.c
index fd03513c7327..b1c9857f8402 100644
--- a/kernel/padata.c
+++ b/kernel/padata.c
@@ -29,7 +29,7 @@
29#include <linux/rcupdate.h> 29#include <linux/rcupdate.h>
30 30
31#define MAX_SEQ_NR INT_MAX - NR_CPUS 31#define MAX_SEQ_NR INT_MAX - NR_CPUS
32#define MAX_OBJ_NUM 10000 * NR_CPUS 32#define MAX_OBJ_NUM 1000
33 33
34static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index) 34static int padata_index_to_cpu(struct parallel_data *pd, int cpu_index)
35{ 35{
@@ -88,7 +88,7 @@ static void padata_parallel_worker(struct work_struct *work)
88 local_bh_enable(); 88 local_bh_enable();
89} 89}
90 90
91/* 91/**
92 * padata_do_parallel - padata parallelization function 92 * padata_do_parallel - padata parallelization function
93 * 93 *
94 * @pinst: padata instance 94 * @pinst: padata instance
@@ -152,6 +152,23 @@ out:
152} 152}
153EXPORT_SYMBOL(padata_do_parallel); 153EXPORT_SYMBOL(padata_do_parallel);
154 154
155/*
156 * padata_get_next - Get the next object that needs serialization.
157 *
158 * Return values are:
159 *
160 * A pointer to the control struct of the next object that needs
161 * serialization, if present in one of the percpu reorder queues.
162 *
163 * NULL, if all percpu reorder queues are empty.
164 *
165 * -EINPROGRESS, if the next object that needs serialization will
166 * be parallel processed by another cpu and is not yet present in
167 * the cpu's reorder queue.
168 *
169 * -ENODATA, if this cpu has to do the parallel processing for
170 * the next object.
171 */
155static struct padata_priv *padata_get_next(struct parallel_data *pd) 172static struct padata_priv *padata_get_next(struct parallel_data *pd)
156{ 173{
157 int cpu, num_cpus, empty, calc_seq_nr; 174 int cpu, num_cpus, empty, calc_seq_nr;
@@ -173,7 +190,7 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
173 190
174 /* 191 /*
175 * Calculate the seq_nr of the object that should be 192 * Calculate the seq_nr of the object that should be
176 * next in this queue. 193 * next in this reorder queue.
177 */ 194 */
178 overrun = 0; 195 overrun = 0;
179 calc_seq_nr = (atomic_read(&queue->num_obj) * num_cpus) 196 calc_seq_nr = (atomic_read(&queue->num_obj) * num_cpus)
@@ -231,7 +248,8 @@ static struct padata_priv *padata_get_next(struct parallel_data *pd)
231 goto out; 248 goto out;
232 } 249 }
233 250
234 if (next_nr % num_cpus == next_queue->cpu_index) { 251 queue = per_cpu_ptr(pd->queue, smp_processor_id());
252 if (queue->cpu_index == next_queue->cpu_index) {
235 padata = ERR_PTR(-ENODATA); 253 padata = ERR_PTR(-ENODATA);
236 goto out; 254 goto out;
237 } 255 }
@@ -247,19 +265,40 @@ static void padata_reorder(struct parallel_data *pd)
247 struct padata_queue *queue; 265 struct padata_queue *queue;
248 struct padata_instance *pinst = pd->pinst; 266 struct padata_instance *pinst = pd->pinst;
249 267
250try_again: 268 /*
269 * We need to ensure that only one cpu can work on dequeueing of
270 * the reorder queue the time. Calculating in which percpu reorder
271 * queue the next object will arrive takes some time. A spinlock
272 * would be highly contended. Also it is not clear in which order
273 * the objects arrive to the reorder queues. So a cpu could wait to
274 * get the lock just to notice that there is nothing to do at the
275 * moment. Therefore we use a trylock and let the holder of the lock
276 * care for all the objects enqueued during the holdtime of the lock.
277 */
251 if (!spin_trylock_bh(&pd->lock)) 278 if (!spin_trylock_bh(&pd->lock))
252 goto out; 279 return;
253 280
254 while (1) { 281 while (1) {
255 padata = padata_get_next(pd); 282 padata = padata_get_next(pd);
256 283
284 /*
285 * All reorder queues are empty, or the next object that needs
286 * serialization is parallel processed by another cpu and is
287 * still on it's way to the cpu's reorder queue, nothing to
288 * do for now.
289 */
257 if (!padata || PTR_ERR(padata) == -EINPROGRESS) 290 if (!padata || PTR_ERR(padata) == -EINPROGRESS)
258 break; 291 break;
259 292
293 /*
294 * This cpu has to do the parallel processing of the next
295 * object. It's waiting in the cpu's parallelization queue,
296 * so exit imediately.
297 */
260 if (PTR_ERR(padata) == -ENODATA) { 298 if (PTR_ERR(padata) == -ENODATA) {
299 del_timer(&pd->timer);
261 spin_unlock_bh(&pd->lock); 300 spin_unlock_bh(&pd->lock);
262 goto out; 301 return;
263 } 302 }
264 303
265 queue = per_cpu_ptr(pd->queue, padata->cb_cpu); 304 queue = per_cpu_ptr(pd->queue, padata->cb_cpu);
@@ -273,13 +312,27 @@ try_again:
273 312
274 spin_unlock_bh(&pd->lock); 313 spin_unlock_bh(&pd->lock);
275 314
276 if (atomic_read(&pd->reorder_objects)) 315 /*
277 goto try_again; 316 * The next object that needs serialization might have arrived to
317 * the reorder queues in the meantime, we will be called again
318 * from the timer function if noone else cares for it.
319 */
320 if (atomic_read(&pd->reorder_objects)
321 && !(pinst->flags & PADATA_RESET))
322 mod_timer(&pd->timer, jiffies + HZ);
323 else
324 del_timer(&pd->timer);
278 325
279out:
280 return; 326 return;
281} 327}
282 328
329static void padata_reorder_timer(unsigned long arg)
330{
331 struct parallel_data *pd = (struct parallel_data *)arg;
332
333 padata_reorder(pd);
334}
335
283static void padata_serial_worker(struct work_struct *work) 336static void padata_serial_worker(struct work_struct *work)
284{ 337{
285 struct padata_queue *queue; 338 struct padata_queue *queue;
@@ -308,7 +361,7 @@ static void padata_serial_worker(struct work_struct *work)
308 local_bh_enable(); 361 local_bh_enable();
309} 362}
310 363
311/* 364/**
312 * padata_do_serial - padata serialization function 365 * padata_do_serial - padata serialization function
313 * 366 *
314 * @padata: object to be serialized. 367 * @padata: object to be serialized.
@@ -338,6 +391,7 @@ void padata_do_serial(struct padata_priv *padata)
338} 391}
339EXPORT_SYMBOL(padata_do_serial); 392EXPORT_SYMBOL(padata_do_serial);
340 393
394/* Allocate and initialize the internal cpumask dependend resources. */
341static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst, 395static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
342 const struct cpumask *cpumask) 396 const struct cpumask *cpumask)
343{ 397{
@@ -358,17 +412,15 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
358 if (!alloc_cpumask_var(&pd->cpumask, GFP_KERNEL)) 412 if (!alloc_cpumask_var(&pd->cpumask, GFP_KERNEL))
359 goto err_free_queue; 413 goto err_free_queue;
360 414
361 for_each_possible_cpu(cpu) { 415 cpumask_and(pd->cpumask, cpumask, cpu_active_mask);
416
417 for_each_cpu(cpu, pd->cpumask) {
362 queue = per_cpu_ptr(pd->queue, cpu); 418 queue = per_cpu_ptr(pd->queue, cpu);
363 419
364 queue->pd = pd; 420 queue->pd = pd;
365 421
366 if (cpumask_test_cpu(cpu, cpumask) 422 queue->cpu_index = cpu_index;
367 && cpumask_test_cpu(cpu, cpu_active_mask)) { 423 cpu_index++;
368 queue->cpu_index = cpu_index;
369 cpu_index++;
370 } else
371 queue->cpu_index = -1;
372 424
373 INIT_LIST_HEAD(&queue->reorder.list); 425 INIT_LIST_HEAD(&queue->reorder.list);
374 INIT_LIST_HEAD(&queue->parallel.list); 426 INIT_LIST_HEAD(&queue->parallel.list);
@@ -382,11 +434,10 @@ static struct parallel_data *padata_alloc_pd(struct padata_instance *pinst,
382 atomic_set(&queue->num_obj, 0); 434 atomic_set(&queue->num_obj, 0);
383 } 435 }
384 436
385 cpumask_and(pd->cpumask, cpumask, cpu_active_mask);
386
387 num_cpus = cpumask_weight(pd->cpumask); 437 num_cpus = cpumask_weight(pd->cpumask);
388 pd->max_seq_nr = (MAX_SEQ_NR / num_cpus) * num_cpus - 1; 438 pd->max_seq_nr = (MAX_SEQ_NR / num_cpus) * num_cpus - 1;
389 439
440 setup_timer(&pd->timer, padata_reorder_timer, (unsigned long)pd);
390 atomic_set(&pd->seq_nr, -1); 441 atomic_set(&pd->seq_nr, -1);
391 atomic_set(&pd->reorder_objects, 0); 442 atomic_set(&pd->reorder_objects, 0);
392 atomic_set(&pd->refcnt, 0); 443 atomic_set(&pd->refcnt, 0);
@@ -410,6 +461,31 @@ static void padata_free_pd(struct parallel_data *pd)
410 kfree(pd); 461 kfree(pd);
411} 462}
412 463
464/* Flush all objects out of the padata queues. */
465static void padata_flush_queues(struct parallel_data *pd)
466{
467 int cpu;
468 struct padata_queue *queue;
469
470 for_each_cpu(cpu, pd->cpumask) {
471 queue = per_cpu_ptr(pd->queue, cpu);
472 flush_work(&queue->pwork);
473 }
474
475 del_timer_sync(&pd->timer);
476
477 if (atomic_read(&pd->reorder_objects))
478 padata_reorder(pd);
479
480 for_each_cpu(cpu, pd->cpumask) {
481 queue = per_cpu_ptr(pd->queue, cpu);
482 flush_work(&queue->swork);
483 }
484
485 BUG_ON(atomic_read(&pd->refcnt) != 0);
486}
487
488/* Replace the internal control stucture with a new one. */
413static void padata_replace(struct padata_instance *pinst, 489static void padata_replace(struct padata_instance *pinst,
414 struct parallel_data *pd_new) 490 struct parallel_data *pd_new)
415{ 491{
@@ -421,17 +497,13 @@ static void padata_replace(struct padata_instance *pinst,
421 497
422 synchronize_rcu(); 498 synchronize_rcu();
423 499
424 while (atomic_read(&pd_old->refcnt) != 0) 500 padata_flush_queues(pd_old);
425 yield();
426
427 flush_workqueue(pinst->wq);
428
429 padata_free_pd(pd_old); 501 padata_free_pd(pd_old);
430 502
431 pinst->flags &= ~PADATA_RESET; 503 pinst->flags &= ~PADATA_RESET;
432} 504}
433 505
434/* 506/**
435 * padata_set_cpumask - set the cpumask that padata should use 507 * padata_set_cpumask - set the cpumask that padata should use
436 * 508 *
437 * @pinst: padata instance 509 * @pinst: padata instance
@@ -443,10 +515,10 @@ int padata_set_cpumask(struct padata_instance *pinst,
443 struct parallel_data *pd; 515 struct parallel_data *pd;
444 int err = 0; 516 int err = 0;
445 517
446 might_sleep();
447
448 mutex_lock(&pinst->lock); 518 mutex_lock(&pinst->lock);
449 519
520 get_online_cpus();
521
450 pd = padata_alloc_pd(pinst, cpumask); 522 pd = padata_alloc_pd(pinst, cpumask);
451 if (!pd) { 523 if (!pd) {
452 err = -ENOMEM; 524 err = -ENOMEM;
@@ -458,6 +530,8 @@ int padata_set_cpumask(struct padata_instance *pinst,
458 padata_replace(pinst, pd); 530 padata_replace(pinst, pd);
459 531
460out: 532out:
533 put_online_cpus();
534
461 mutex_unlock(&pinst->lock); 535 mutex_unlock(&pinst->lock);
462 536
463 return err; 537 return err;
@@ -479,7 +553,7 @@ static int __padata_add_cpu(struct padata_instance *pinst, int cpu)
479 return 0; 553 return 0;
480} 554}
481 555
482/* 556/**
483 * padata_add_cpu - add a cpu to the padata cpumask 557 * padata_add_cpu - add a cpu to the padata cpumask
484 * 558 *
485 * @pinst: padata instance 559 * @pinst: padata instance
@@ -489,12 +563,12 @@ int padata_add_cpu(struct padata_instance *pinst, int cpu)
489{ 563{
490 int err; 564 int err;
491 565
492 might_sleep();
493
494 mutex_lock(&pinst->lock); 566 mutex_lock(&pinst->lock);
495 567
568 get_online_cpus();
496 cpumask_set_cpu(cpu, pinst->cpumask); 569 cpumask_set_cpu(cpu, pinst->cpumask);
497 err = __padata_add_cpu(pinst, cpu); 570 err = __padata_add_cpu(pinst, cpu);
571 put_online_cpus();
498 572
499 mutex_unlock(&pinst->lock); 573 mutex_unlock(&pinst->lock);
500 574
@@ -517,7 +591,7 @@ static int __padata_remove_cpu(struct padata_instance *pinst, int cpu)
517 return 0; 591 return 0;
518} 592}
519 593
520/* 594/**
521 * padata_remove_cpu - remove a cpu from the padata cpumask 595 * padata_remove_cpu - remove a cpu from the padata cpumask
522 * 596 *
523 * @pinst: padata instance 597 * @pinst: padata instance
@@ -527,12 +601,12 @@ int padata_remove_cpu(struct padata_instance *pinst, int cpu)
527{ 601{
528 int err; 602 int err;
529 603
530 might_sleep();
531
532 mutex_lock(&pinst->lock); 604 mutex_lock(&pinst->lock);
533 605
606 get_online_cpus();
534 cpumask_clear_cpu(cpu, pinst->cpumask); 607 cpumask_clear_cpu(cpu, pinst->cpumask);
535 err = __padata_remove_cpu(pinst, cpu); 608 err = __padata_remove_cpu(pinst, cpu);
609 put_online_cpus();
536 610
537 mutex_unlock(&pinst->lock); 611 mutex_unlock(&pinst->lock);
538 612
@@ -540,38 +614,35 @@ int padata_remove_cpu(struct padata_instance *pinst, int cpu)
540} 614}
541EXPORT_SYMBOL(padata_remove_cpu); 615EXPORT_SYMBOL(padata_remove_cpu);
542 616
543/* 617/**
544 * padata_start - start the parallel processing 618 * padata_start - start the parallel processing
545 * 619 *
546 * @pinst: padata instance to start 620 * @pinst: padata instance to start
547 */ 621 */
548void padata_start(struct padata_instance *pinst) 622void padata_start(struct padata_instance *pinst)
549{ 623{
550 might_sleep();
551
552 mutex_lock(&pinst->lock); 624 mutex_lock(&pinst->lock);
553 pinst->flags |= PADATA_INIT; 625 pinst->flags |= PADATA_INIT;
554 mutex_unlock(&pinst->lock); 626 mutex_unlock(&pinst->lock);
555} 627}
556EXPORT_SYMBOL(padata_start); 628EXPORT_SYMBOL(padata_start);
557 629
558/* 630/**
559 * padata_stop - stop the parallel processing 631 * padata_stop - stop the parallel processing
560 * 632 *
561 * @pinst: padata instance to stop 633 * @pinst: padata instance to stop
562 */ 634 */
563void padata_stop(struct padata_instance *pinst) 635void padata_stop(struct padata_instance *pinst)
564{ 636{
565 might_sleep();
566
567 mutex_lock(&pinst->lock); 637 mutex_lock(&pinst->lock);
568 pinst->flags &= ~PADATA_INIT; 638 pinst->flags &= ~PADATA_INIT;
569 mutex_unlock(&pinst->lock); 639 mutex_unlock(&pinst->lock);
570} 640}
571EXPORT_SYMBOL(padata_stop); 641EXPORT_SYMBOL(padata_stop);
572 642
573static int __cpuinit padata_cpu_callback(struct notifier_block *nfb, 643#ifdef CONFIG_HOTPLUG_CPU
574 unsigned long action, void *hcpu) 644static int padata_cpu_callback(struct notifier_block *nfb,
645 unsigned long action, void *hcpu)
575{ 646{
576 int err; 647 int err;
577 struct padata_instance *pinst; 648 struct padata_instance *pinst;
@@ -621,8 +692,9 @@ static int __cpuinit padata_cpu_callback(struct notifier_block *nfb,
621 692
622 return NOTIFY_OK; 693 return NOTIFY_OK;
623} 694}
695#endif
624 696
625/* 697/**
626 * padata_alloc - allocate and initialize a padata instance 698 * padata_alloc - allocate and initialize a padata instance
627 * 699 *
628 * @cpumask: cpumask that padata uses for parallelization 700 * @cpumask: cpumask that padata uses for parallelization
@@ -631,7 +703,6 @@ static int __cpuinit padata_cpu_callback(struct notifier_block *nfb,
631struct padata_instance *padata_alloc(const struct cpumask *cpumask, 703struct padata_instance *padata_alloc(const struct cpumask *cpumask,
632 struct workqueue_struct *wq) 704 struct workqueue_struct *wq)
633{ 705{
634 int err;
635 struct padata_instance *pinst; 706 struct padata_instance *pinst;
636 struct parallel_data *pd; 707 struct parallel_data *pd;
637 708
@@ -639,6 +710,8 @@ struct padata_instance *padata_alloc(const struct cpumask *cpumask,
639 if (!pinst) 710 if (!pinst)
640 goto err; 711 goto err;
641 712
713 get_online_cpus();
714
642 pd = padata_alloc_pd(pinst, cpumask); 715 pd = padata_alloc_pd(pinst, cpumask);
643 if (!pd) 716 if (!pd)
644 goto err_free_inst; 717 goto err_free_inst;
@@ -654,31 +727,32 @@ struct padata_instance *padata_alloc(const struct cpumask *cpumask,
654 727
655 pinst->flags = 0; 728 pinst->flags = 0;
656 729
730#ifdef CONFIG_HOTPLUG_CPU
657 pinst->cpu_notifier.notifier_call = padata_cpu_callback; 731 pinst->cpu_notifier.notifier_call = padata_cpu_callback;
658 pinst->cpu_notifier.priority = 0; 732 pinst->cpu_notifier.priority = 0;
659 err = register_hotcpu_notifier(&pinst->cpu_notifier); 733 register_hotcpu_notifier(&pinst->cpu_notifier);
660 if (err) 734#endif
661 goto err_free_cpumask; 735
736 put_online_cpus();
662 737
663 mutex_init(&pinst->lock); 738 mutex_init(&pinst->lock);
664 739
665 return pinst; 740 return pinst;
666 741
667err_free_cpumask:
668 free_cpumask_var(pinst->cpumask);
669err_free_pd: 742err_free_pd:
670 padata_free_pd(pd); 743 padata_free_pd(pd);
671err_free_inst: 744err_free_inst:
672 kfree(pinst); 745 kfree(pinst);
746 put_online_cpus();
673err: 747err:
674 return NULL; 748 return NULL;
675} 749}
676EXPORT_SYMBOL(padata_alloc); 750EXPORT_SYMBOL(padata_alloc);
677 751
678/* 752/**
679 * padata_free - free a padata instance 753 * padata_free - free a padata instance
680 * 754 *
681 * @ padata_inst: padata instance to free 755 * @padata_inst: padata instance to free
682 */ 756 */
683void padata_free(struct padata_instance *pinst) 757void padata_free(struct padata_instance *pinst)
684{ 758{
@@ -686,10 +760,13 @@ void padata_free(struct padata_instance *pinst)
686 760
687 synchronize_rcu(); 761 synchronize_rcu();
688 762
689 while (atomic_read(&pinst->pd->refcnt) != 0) 763#ifdef CONFIG_HOTPLUG_CPU
690 yield();
691
692 unregister_hotcpu_notifier(&pinst->cpu_notifier); 764 unregister_hotcpu_notifier(&pinst->cpu_notifier);
765#endif
766 get_online_cpus();
767 padata_flush_queues(pinst->pd);
768 put_online_cpus();
769
693 padata_free_pd(pinst->pd); 770 padata_free_pd(pinst->pd);
694 free_cpumask_var(pinst->cpumask); 771 free_cpumask_var(pinst->cpumask);
695 kfree(pinst); 772 kfree(pinst);