diff options
Diffstat (limited to 'kernel/rcupreempt.c')
-rw-r--r-- | kernel/rcupreempt.c | 418 |
1 files changed, 371 insertions, 47 deletions
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index 41d275a81df5..536ce83c55fe 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c | |||
@@ -46,11 +46,11 @@ | |||
46 | #include <asm/atomic.h> | 46 | #include <asm/atomic.h> |
47 | #include <linux/bitops.h> | 47 | #include <linux/bitops.h> |
48 | #include <linux/module.h> | 48 | #include <linux/module.h> |
49 | #include <linux/kthread.h> | ||
49 | #include <linux/completion.h> | 50 | #include <linux/completion.h> |
50 | #include <linux/moduleparam.h> | 51 | #include <linux/moduleparam.h> |
51 | #include <linux/percpu.h> | 52 | #include <linux/percpu.h> |
52 | #include <linux/notifier.h> | 53 | #include <linux/notifier.h> |
53 | #include <linux/rcupdate.h> | ||
54 | #include <linux/cpu.h> | 54 | #include <linux/cpu.h> |
55 | #include <linux/random.h> | 55 | #include <linux/random.h> |
56 | #include <linux/delay.h> | 56 | #include <linux/delay.h> |
@@ -82,14 +82,18 @@ struct rcu_data { | |||
82 | spinlock_t lock; /* Protect rcu_data fields. */ | 82 | spinlock_t lock; /* Protect rcu_data fields. */ |
83 | long completed; /* Number of last completed batch. */ | 83 | long completed; /* Number of last completed batch. */ |
84 | int waitlistcount; | 84 | int waitlistcount; |
85 | struct tasklet_struct rcu_tasklet; | ||
86 | struct rcu_head *nextlist; | 85 | struct rcu_head *nextlist; |
87 | struct rcu_head **nexttail; | 86 | struct rcu_head **nexttail; |
88 | struct rcu_head *waitlist[GP_STAGES]; | 87 | struct rcu_head *waitlist[GP_STAGES]; |
89 | struct rcu_head **waittail[GP_STAGES]; | 88 | struct rcu_head **waittail[GP_STAGES]; |
90 | struct rcu_head *donelist; | 89 | struct rcu_head *donelist; /* from waitlist & waitschedlist */ |
91 | struct rcu_head **donetail; | 90 | struct rcu_head **donetail; |
92 | long rcu_flipctr[2]; | 91 | long rcu_flipctr[2]; |
92 | struct rcu_head *nextschedlist; | ||
93 | struct rcu_head **nextschedtail; | ||
94 | struct rcu_head *waitschedlist; | ||
95 | struct rcu_head **waitschedtail; | ||
96 | int rcu_sched_sleeping; | ||
93 | #ifdef CONFIG_RCU_TRACE | 97 | #ifdef CONFIG_RCU_TRACE |
94 | struct rcupreempt_trace trace; | 98 | struct rcupreempt_trace trace; |
95 | #endif /* #ifdef CONFIG_RCU_TRACE */ | 99 | #endif /* #ifdef CONFIG_RCU_TRACE */ |
@@ -131,11 +135,24 @@ enum rcu_try_flip_states { | |||
131 | rcu_try_flip_waitmb_state, | 135 | rcu_try_flip_waitmb_state, |
132 | }; | 136 | }; |
133 | 137 | ||
138 | /* | ||
139 | * States for rcu_ctrlblk.rcu_sched_sleep. | ||
140 | */ | ||
141 | |||
142 | enum rcu_sched_sleep_states { | ||
143 | rcu_sched_not_sleeping, /* Not sleeping, callbacks need GP. */ | ||
144 | rcu_sched_sleep_prep, /* Thinking of sleeping, rechecking. */ | ||
145 | rcu_sched_sleeping, /* Sleeping, awaken if GP needed. */ | ||
146 | }; | ||
147 | |||
134 | struct rcu_ctrlblk { | 148 | struct rcu_ctrlblk { |
135 | spinlock_t fliplock; /* Protect state-machine transitions. */ | 149 | spinlock_t fliplock; /* Protect state-machine transitions. */ |
136 | long completed; /* Number of last completed batch. */ | 150 | long completed; /* Number of last completed batch. */ |
137 | enum rcu_try_flip_states rcu_try_flip_state; /* The current state of | 151 | enum rcu_try_flip_states rcu_try_flip_state; /* The current state of |
138 | the rcu state machine */ | 152 | the rcu state machine */ |
153 | spinlock_t schedlock; /* Protect rcu_sched sleep state. */ | ||
154 | enum rcu_sched_sleep_states sched_sleep; /* rcu_sched state. */ | ||
155 | wait_queue_head_t sched_wq; /* Place for rcu_sched to sleep. */ | ||
139 | }; | 156 | }; |
140 | 157 | ||
141 | static DEFINE_PER_CPU(struct rcu_data, rcu_data); | 158 | static DEFINE_PER_CPU(struct rcu_data, rcu_data); |
@@ -143,8 +160,12 @@ static struct rcu_ctrlblk rcu_ctrlblk = { | |||
143 | .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock), | 160 | .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock), |
144 | .completed = 0, | 161 | .completed = 0, |
145 | .rcu_try_flip_state = rcu_try_flip_idle_state, | 162 | .rcu_try_flip_state = rcu_try_flip_idle_state, |
163 | .schedlock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.schedlock), | ||
164 | .sched_sleep = rcu_sched_not_sleeping, | ||
165 | .sched_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rcu_ctrlblk.sched_wq), | ||
146 | }; | 166 | }; |
147 | 167 | ||
168 | static struct task_struct *rcu_sched_grace_period_task; | ||
148 | 169 | ||
149 | #ifdef CONFIG_RCU_TRACE | 170 | #ifdef CONFIG_RCU_TRACE |
150 | static char *rcu_try_flip_state_names[] = | 171 | static char *rcu_try_flip_state_names[] = |
@@ -207,6 +228,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_mb_flag_values, rcu_mb_flag) | |||
207 | */ | 228 | */ |
208 | #define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace)); | 229 | #define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace)); |
209 | 230 | ||
231 | #define RCU_SCHED_BATCH_TIME (HZ / 50) | ||
232 | |||
210 | /* | 233 | /* |
211 | * Return the number of RCU batches processed thus far. Useful | 234 | * Return the number of RCU batches processed thus far. Useful |
212 | * for debug and statistics. | 235 | * for debug and statistics. |
@@ -411,32 +434,34 @@ static void __rcu_advance_callbacks(struct rcu_data *rdp) | |||
411 | } | 434 | } |
412 | } | 435 | } |
413 | 436 | ||
414 | #ifdef CONFIG_NO_HZ | 437 | DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched) = { |
438 | .dynticks = 1, | ||
439 | }; | ||
415 | 440 | ||
416 | DEFINE_PER_CPU(long, dynticks_progress_counter) = 1; | 441 | #ifdef CONFIG_NO_HZ |
417 | static DEFINE_PER_CPU(long, rcu_dyntick_snapshot); | ||
418 | static DEFINE_PER_CPU(int, rcu_update_flag); | 442 | static DEFINE_PER_CPU(int, rcu_update_flag); |
419 | 443 | ||
420 | /** | 444 | /** |
421 | * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI. | 445 | * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI. |
422 | * | 446 | * |
423 | * If the CPU was idle with dynamic ticks active, this updates the | 447 | * If the CPU was idle with dynamic ticks active, this updates the |
424 | * dynticks_progress_counter to let the RCU handling know that the | 448 | * rcu_dyntick_sched.dynticks to let the RCU handling know that the |
425 | * CPU is active. | 449 | * CPU is active. |
426 | */ | 450 | */ |
427 | void rcu_irq_enter(void) | 451 | void rcu_irq_enter(void) |
428 | { | 452 | { |
429 | int cpu = smp_processor_id(); | 453 | int cpu = smp_processor_id(); |
454 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); | ||
430 | 455 | ||
431 | if (per_cpu(rcu_update_flag, cpu)) | 456 | if (per_cpu(rcu_update_flag, cpu)) |
432 | per_cpu(rcu_update_flag, cpu)++; | 457 | per_cpu(rcu_update_flag, cpu)++; |
433 | 458 | ||
434 | /* | 459 | /* |
435 | * Only update if we are coming from a stopped ticks mode | 460 | * Only update if we are coming from a stopped ticks mode |
436 | * (dynticks_progress_counter is even). | 461 | * (rcu_dyntick_sched.dynticks is even). |
437 | */ | 462 | */ |
438 | if (!in_interrupt() && | 463 | if (!in_interrupt() && |
439 | (per_cpu(dynticks_progress_counter, cpu) & 0x1) == 0) { | 464 | (rdssp->dynticks & 0x1) == 0) { |
440 | /* | 465 | /* |
441 | * The following might seem like we could have a race | 466 | * The following might seem like we could have a race |
442 | * with NMI/SMIs. But this really isn't a problem. | 467 | * with NMI/SMIs. But this really isn't a problem. |
@@ -459,12 +484,12 @@ void rcu_irq_enter(void) | |||
459 | * RCU read-side critical sections on this CPU would | 484 | * RCU read-side critical sections on this CPU would |
460 | * have already completed. | 485 | * have already completed. |
461 | */ | 486 | */ |
462 | per_cpu(dynticks_progress_counter, cpu)++; | 487 | rdssp->dynticks++; |
463 | /* | 488 | /* |
464 | * The following memory barrier ensures that any | 489 | * The following memory barrier ensures that any |
465 | * rcu_read_lock() primitives in the irq handler | 490 | * rcu_read_lock() primitives in the irq handler |
466 | * are seen by other CPUs to follow the above | 491 | * are seen by other CPUs to follow the above |
467 | * increment to dynticks_progress_counter. This is | 492 | * increment to rcu_dyntick_sched.dynticks. This is |
468 | * required in order for other CPUs to correctly | 493 | * required in order for other CPUs to correctly |
469 | * determine when it is safe to advance the RCU | 494 | * determine when it is safe to advance the RCU |
470 | * grace-period state machine. | 495 | * grace-period state machine. |
@@ -472,7 +497,7 @@ void rcu_irq_enter(void) | |||
472 | smp_mb(); /* see above block comment. */ | 497 | smp_mb(); /* see above block comment. */ |
473 | /* | 498 | /* |
474 | * Since we can't determine the dynamic tick mode from | 499 | * Since we can't determine the dynamic tick mode from |
475 | * the dynticks_progress_counter after this routine, | 500 | * the rcu_dyntick_sched.dynticks after this routine, |
476 | * we use a second flag to acknowledge that we came | 501 | * we use a second flag to acknowledge that we came |
477 | * from an idle state with ticks stopped. | 502 | * from an idle state with ticks stopped. |
478 | */ | 503 | */ |
@@ -480,7 +505,7 @@ void rcu_irq_enter(void) | |||
480 | /* | 505 | /* |
481 | * If we take an NMI/SMI now, they will also increment | 506 | * If we take an NMI/SMI now, they will also increment |
482 | * the rcu_update_flag, and will not update the | 507 | * the rcu_update_flag, and will not update the |
483 | * dynticks_progress_counter on exit. That is for | 508 | * rcu_dyntick_sched.dynticks on exit. That is for |
484 | * this IRQ to do. | 509 | * this IRQ to do. |
485 | */ | 510 | */ |
486 | } | 511 | } |
@@ -490,12 +515,13 @@ void rcu_irq_enter(void) | |||
490 | * rcu_irq_exit - Called from exiting Hard irq context. | 515 | * rcu_irq_exit - Called from exiting Hard irq context. |
491 | * | 516 | * |
492 | * If the CPU was idle with dynamic ticks active, update the | 517 | * If the CPU was idle with dynamic ticks active, update the |
493 | * dynticks_progress_counter to put let the RCU handling be | 518 | * rcu_dyntick_sched.dynticks to put let the RCU handling be |
494 | * aware that the CPU is going back to idle with no ticks. | 519 | * aware that the CPU is going back to idle with no ticks. |
495 | */ | 520 | */ |
496 | void rcu_irq_exit(void) | 521 | void rcu_irq_exit(void) |
497 | { | 522 | { |
498 | int cpu = smp_processor_id(); | 523 | int cpu = smp_processor_id(); |
524 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); | ||
499 | 525 | ||
500 | /* | 526 | /* |
501 | * rcu_update_flag is set if we interrupted the CPU | 527 | * rcu_update_flag is set if we interrupted the CPU |
@@ -503,7 +529,7 @@ void rcu_irq_exit(void) | |||
503 | * Once this occurs, we keep track of interrupt nesting | 529 | * Once this occurs, we keep track of interrupt nesting |
504 | * because a NMI/SMI could also come in, and we still | 530 | * because a NMI/SMI could also come in, and we still |
505 | * only want the IRQ that started the increment of the | 531 | * only want the IRQ that started the increment of the |
506 | * dynticks_progress_counter to be the one that modifies | 532 | * rcu_dyntick_sched.dynticks to be the one that modifies |
507 | * it on exit. | 533 | * it on exit. |
508 | */ | 534 | */ |
509 | if (per_cpu(rcu_update_flag, cpu)) { | 535 | if (per_cpu(rcu_update_flag, cpu)) { |
@@ -515,28 +541,29 @@ void rcu_irq_exit(void) | |||
515 | 541 | ||
516 | /* | 542 | /* |
517 | * If an NMI/SMI happens now we are still | 543 | * If an NMI/SMI happens now we are still |
518 | * protected by the dynticks_progress_counter being odd. | 544 | * protected by the rcu_dyntick_sched.dynticks being odd. |
519 | */ | 545 | */ |
520 | 546 | ||
521 | /* | 547 | /* |
522 | * The following memory barrier ensures that any | 548 | * The following memory barrier ensures that any |
523 | * rcu_read_unlock() primitives in the irq handler | 549 | * rcu_read_unlock() primitives in the irq handler |
524 | * are seen by other CPUs to preceed the following | 550 | * are seen by other CPUs to preceed the following |
525 | * increment to dynticks_progress_counter. This | 551 | * increment to rcu_dyntick_sched.dynticks. This |
526 | * is required in order for other CPUs to determine | 552 | * is required in order for other CPUs to determine |
527 | * when it is safe to advance the RCU grace-period | 553 | * when it is safe to advance the RCU grace-period |
528 | * state machine. | 554 | * state machine. |
529 | */ | 555 | */ |
530 | smp_mb(); /* see above block comment. */ | 556 | smp_mb(); /* see above block comment. */ |
531 | per_cpu(dynticks_progress_counter, cpu)++; | 557 | rdssp->dynticks++; |
532 | WARN_ON(per_cpu(dynticks_progress_counter, cpu) & 0x1); | 558 | WARN_ON(rdssp->dynticks & 0x1); |
533 | } | 559 | } |
534 | } | 560 | } |
535 | 561 | ||
536 | static void dyntick_save_progress_counter(int cpu) | 562 | static void dyntick_save_progress_counter(int cpu) |
537 | { | 563 | { |
538 | per_cpu(rcu_dyntick_snapshot, cpu) = | 564 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); |
539 | per_cpu(dynticks_progress_counter, cpu); | 565 | |
566 | rdssp->dynticks_snap = rdssp->dynticks; | ||
540 | } | 567 | } |
541 | 568 | ||
542 | static inline int | 569 | static inline int |
@@ -544,9 +571,10 @@ rcu_try_flip_waitack_needed(int cpu) | |||
544 | { | 571 | { |
545 | long curr; | 572 | long curr; |
546 | long snap; | 573 | long snap; |
574 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); | ||
547 | 575 | ||
548 | curr = per_cpu(dynticks_progress_counter, cpu); | 576 | curr = rdssp->dynticks; |
549 | snap = per_cpu(rcu_dyntick_snapshot, cpu); | 577 | snap = rdssp->dynticks_snap; |
550 | smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ | 578 | smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ |
551 | 579 | ||
552 | /* | 580 | /* |
@@ -567,7 +595,7 @@ rcu_try_flip_waitack_needed(int cpu) | |||
567 | * that this CPU already acknowledged the counter. | 595 | * that this CPU already acknowledged the counter. |
568 | */ | 596 | */ |
569 | 597 | ||
570 | if ((curr - snap) > 2 || (snap & 0x1) == 0) | 598 | if ((curr - snap) > 2 || (curr & 0x1) == 0) |
571 | return 0; | 599 | return 0; |
572 | 600 | ||
573 | /* We need this CPU to explicitly acknowledge the counter flip. */ | 601 | /* We need this CPU to explicitly acknowledge the counter flip. */ |
@@ -580,9 +608,10 @@ rcu_try_flip_waitmb_needed(int cpu) | |||
580 | { | 608 | { |
581 | long curr; | 609 | long curr; |
582 | long snap; | 610 | long snap; |
611 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); | ||
583 | 612 | ||
584 | curr = per_cpu(dynticks_progress_counter, cpu); | 613 | curr = rdssp->dynticks; |
585 | snap = per_cpu(rcu_dyntick_snapshot, cpu); | 614 | snap = rdssp->dynticks_snap; |
586 | smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ | 615 | smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ |
587 | 616 | ||
588 | /* | 617 | /* |
@@ -609,14 +638,86 @@ rcu_try_flip_waitmb_needed(int cpu) | |||
609 | return 1; | 638 | return 1; |
610 | } | 639 | } |
611 | 640 | ||
641 | static void dyntick_save_progress_counter_sched(int cpu) | ||
642 | { | ||
643 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); | ||
644 | |||
645 | rdssp->sched_dynticks_snap = rdssp->dynticks; | ||
646 | } | ||
647 | |||
648 | static int rcu_qsctr_inc_needed_dyntick(int cpu) | ||
649 | { | ||
650 | long curr; | ||
651 | long snap; | ||
652 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); | ||
653 | |||
654 | curr = rdssp->dynticks; | ||
655 | snap = rdssp->sched_dynticks_snap; | ||
656 | smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ | ||
657 | |||
658 | /* | ||
659 | * If the CPU remained in dynticks mode for the entire time | ||
660 | * and didn't take any interrupts, NMIs, SMIs, or whatever, | ||
661 | * then it cannot be in the middle of an rcu_read_lock(), so | ||
662 | * the next rcu_read_lock() it executes must use the new value | ||
663 | * of the counter. Therefore, this CPU has been in a quiescent | ||
664 | * state the entire time, and we don't need to wait for it. | ||
665 | */ | ||
666 | |||
667 | if ((curr == snap) && ((curr & 0x1) == 0)) | ||
668 | return 0; | ||
669 | |||
670 | /* | ||
671 | * If the CPU passed through or entered a dynticks idle phase with | ||
672 | * no active irq handlers, then, as above, this CPU has already | ||
673 | * passed through a quiescent state. | ||
674 | */ | ||
675 | |||
676 | if ((curr - snap) > 2 || (snap & 0x1) == 0) | ||
677 | return 0; | ||
678 | |||
679 | /* We need this CPU to go through a quiescent state. */ | ||
680 | |||
681 | return 1; | ||
682 | } | ||
683 | |||
612 | #else /* !CONFIG_NO_HZ */ | 684 | #else /* !CONFIG_NO_HZ */ |
613 | 685 | ||
614 | # define dyntick_save_progress_counter(cpu) do { } while (0) | 686 | # define dyntick_save_progress_counter(cpu) do { } while (0) |
615 | # define rcu_try_flip_waitack_needed(cpu) (1) | 687 | # define rcu_try_flip_waitack_needed(cpu) (1) |
616 | # define rcu_try_flip_waitmb_needed(cpu) (1) | 688 | # define rcu_try_flip_waitmb_needed(cpu) (1) |
689 | |||
690 | # define dyntick_save_progress_counter_sched(cpu) do { } while (0) | ||
691 | # define rcu_qsctr_inc_needed_dyntick(cpu) (1) | ||
617 | 692 | ||
618 | #endif /* CONFIG_NO_HZ */ | 693 | #endif /* CONFIG_NO_HZ */ |
619 | 694 | ||
695 | static void save_qsctr_sched(int cpu) | ||
696 | { | ||
697 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); | ||
698 | |||
699 | rdssp->sched_qs_snap = rdssp->sched_qs; | ||
700 | } | ||
701 | |||
702 | static inline int rcu_qsctr_inc_needed(int cpu) | ||
703 | { | ||
704 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); | ||
705 | |||
706 | /* | ||
707 | * If there has been a quiescent state, no more need to wait | ||
708 | * on this CPU. | ||
709 | */ | ||
710 | |||
711 | if (rdssp->sched_qs != rdssp->sched_qs_snap) { | ||
712 | smp_mb(); /* force ordering with cpu entering schedule(). */ | ||
713 | return 0; | ||
714 | } | ||
715 | |||
716 | /* We need this CPU to go through a quiescent state. */ | ||
717 | |||
718 | return 1; | ||
719 | } | ||
720 | |||
620 | /* | 721 | /* |
621 | * Get here when RCU is idle. Decide whether we need to | 722 | * Get here when RCU is idle. Decide whether we need to |
622 | * move out of idle state, and return non-zero if so. | 723 | * move out of idle state, and return non-zero if so. |
@@ -819,6 +920,26 @@ void rcu_check_callbacks(int cpu, int user) | |||
819 | unsigned long flags; | 920 | unsigned long flags; |
820 | struct rcu_data *rdp = RCU_DATA_CPU(cpu); | 921 | struct rcu_data *rdp = RCU_DATA_CPU(cpu); |
821 | 922 | ||
923 | /* | ||
924 | * If this CPU took its interrupt from user mode or from the | ||
925 | * idle loop, and this is not a nested interrupt, then | ||
926 | * this CPU has to have exited all prior preept-disable | ||
927 | * sections of code. So increment the counter to note this. | ||
928 | * | ||
929 | * The memory barrier is needed to handle the case where | ||
930 | * writes from a preempt-disable section of code get reordered | ||
931 | * into schedule() by this CPU's write buffer. So the memory | ||
932 | * barrier makes sure that the rcu_qsctr_inc() is seen by other | ||
933 | * CPUs to happen after any such write. | ||
934 | */ | ||
935 | |||
936 | if (user || | ||
937 | (idle_cpu(cpu) && !in_softirq() && | ||
938 | hardirq_count() <= (1 << HARDIRQ_SHIFT))) { | ||
939 | smp_mb(); /* Guard against aggressive schedule(). */ | ||
940 | rcu_qsctr_inc(cpu); | ||
941 | } | ||
942 | |||
822 | rcu_check_mb(cpu); | 943 | rcu_check_mb(cpu); |
823 | if (rcu_ctrlblk.completed == rdp->completed) | 944 | if (rcu_ctrlblk.completed == rdp->completed) |
824 | rcu_try_flip(); | 945 | rcu_try_flip(); |
@@ -869,6 +990,8 @@ void rcu_offline_cpu(int cpu) | |||
869 | struct rcu_head *list = NULL; | 990 | struct rcu_head *list = NULL; |
870 | unsigned long flags; | 991 | unsigned long flags; |
871 | struct rcu_data *rdp = RCU_DATA_CPU(cpu); | 992 | struct rcu_data *rdp = RCU_DATA_CPU(cpu); |
993 | struct rcu_head *schedlist = NULL; | ||
994 | struct rcu_head **schedtail = &schedlist; | ||
872 | struct rcu_head **tail = &list; | 995 | struct rcu_head **tail = &list; |
873 | 996 | ||
874 | /* | 997 | /* |
@@ -882,6 +1005,11 @@ void rcu_offline_cpu(int cpu) | |||
882 | rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i], | 1005 | rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i], |
883 | list, tail); | 1006 | list, tail); |
884 | rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail); | 1007 | rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail); |
1008 | rcu_offline_cpu_enqueue(rdp->waitschedlist, rdp->waitschedtail, | ||
1009 | schedlist, schedtail); | ||
1010 | rcu_offline_cpu_enqueue(rdp->nextschedlist, rdp->nextschedtail, | ||
1011 | schedlist, schedtail); | ||
1012 | rdp->rcu_sched_sleeping = 0; | ||
885 | spin_unlock_irqrestore(&rdp->lock, flags); | 1013 | spin_unlock_irqrestore(&rdp->lock, flags); |
886 | rdp->waitlistcount = 0; | 1014 | rdp->waitlistcount = 0; |
887 | 1015 | ||
@@ -916,12 +1044,15 @@ void rcu_offline_cpu(int cpu) | |||
916 | * fix. | 1044 | * fix. |
917 | */ | 1045 | */ |
918 | 1046 | ||
919 | local_irq_save(flags); | 1047 | local_irq_save(flags); /* disable preempt till we know what lock. */ |
920 | rdp = RCU_DATA_ME(); | 1048 | rdp = RCU_DATA_ME(); |
921 | spin_lock(&rdp->lock); | 1049 | spin_lock(&rdp->lock); |
922 | *rdp->nexttail = list; | 1050 | *rdp->nexttail = list; |
923 | if (list) | 1051 | if (list) |
924 | rdp->nexttail = tail; | 1052 | rdp->nexttail = tail; |
1053 | *rdp->nextschedtail = schedlist; | ||
1054 | if (schedlist) | ||
1055 | rdp->nextschedtail = schedtail; | ||
925 | spin_unlock_irqrestore(&rdp->lock, flags); | 1056 | spin_unlock_irqrestore(&rdp->lock, flags); |
926 | } | 1057 | } |
927 | 1058 | ||
@@ -936,10 +1067,25 @@ void rcu_offline_cpu(int cpu) | |||
936 | void __cpuinit rcu_online_cpu(int cpu) | 1067 | void __cpuinit rcu_online_cpu(int cpu) |
937 | { | 1068 | { |
938 | unsigned long flags; | 1069 | unsigned long flags; |
1070 | struct rcu_data *rdp; | ||
939 | 1071 | ||
940 | spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags); | 1072 | spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags); |
941 | cpu_set(cpu, rcu_cpu_online_map); | 1073 | cpu_set(cpu, rcu_cpu_online_map); |
942 | spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags); | 1074 | spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags); |
1075 | |||
1076 | /* | ||
1077 | * The rcu_sched grace-period processing might have bypassed | ||
1078 | * this CPU, given that it was not in the rcu_cpu_online_map | ||
1079 | * when the grace-period scan started. This means that the | ||
1080 | * grace-period task might sleep. So make sure that if this | ||
1081 | * should happen, the first callback posted to this CPU will | ||
1082 | * wake up the grace-period task if need be. | ||
1083 | */ | ||
1084 | |||
1085 | rdp = RCU_DATA_CPU(cpu); | ||
1086 | spin_lock_irqsave(&rdp->lock, flags); | ||
1087 | rdp->rcu_sched_sleeping = 1; | ||
1088 | spin_unlock_irqrestore(&rdp->lock, flags); | ||
943 | } | 1089 | } |
944 | 1090 | ||
945 | static void rcu_process_callbacks(struct softirq_action *unused) | 1091 | static void rcu_process_callbacks(struct softirq_action *unused) |
@@ -982,31 +1128,196 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | |||
982 | *rdp->nexttail = head; | 1128 | *rdp->nexttail = head; |
983 | rdp->nexttail = &head->next; | 1129 | rdp->nexttail = &head->next; |
984 | RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp); | 1130 | RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp); |
985 | spin_unlock(&rdp->lock); | 1131 | spin_unlock_irqrestore(&rdp->lock, flags); |
986 | local_irq_restore(flags); | ||
987 | } | 1132 | } |
988 | EXPORT_SYMBOL_GPL(call_rcu); | 1133 | EXPORT_SYMBOL_GPL(call_rcu); |
989 | 1134 | ||
1135 | void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | ||
1136 | { | ||
1137 | unsigned long flags; | ||
1138 | struct rcu_data *rdp; | ||
1139 | int wake_gp = 0; | ||
1140 | |||
1141 | head->func = func; | ||
1142 | head->next = NULL; | ||
1143 | local_irq_save(flags); | ||
1144 | rdp = RCU_DATA_ME(); | ||
1145 | spin_lock(&rdp->lock); | ||
1146 | *rdp->nextschedtail = head; | ||
1147 | rdp->nextschedtail = &head->next; | ||
1148 | if (rdp->rcu_sched_sleeping) { | ||
1149 | |||
1150 | /* Grace-period processing might be sleeping... */ | ||
1151 | |||
1152 | rdp->rcu_sched_sleeping = 0; | ||
1153 | wake_gp = 1; | ||
1154 | } | ||
1155 | spin_unlock_irqrestore(&rdp->lock, flags); | ||
1156 | if (wake_gp) { | ||
1157 | |||
1158 | /* Wake up grace-period processing, unless someone beat us. */ | ||
1159 | |||
1160 | spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags); | ||
1161 | if (rcu_ctrlblk.sched_sleep != rcu_sched_sleeping) | ||
1162 | wake_gp = 0; | ||
1163 | rcu_ctrlblk.sched_sleep = rcu_sched_not_sleeping; | ||
1164 | spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags); | ||
1165 | if (wake_gp) | ||
1166 | wake_up_interruptible(&rcu_ctrlblk.sched_wq); | ||
1167 | } | ||
1168 | } | ||
1169 | EXPORT_SYMBOL_GPL(call_rcu_sched); | ||
1170 | |||
990 | /* | 1171 | /* |
991 | * Wait until all currently running preempt_disable() code segments | 1172 | * Wait until all currently running preempt_disable() code segments |
992 | * (including hardware-irq-disable segments) complete. Note that | 1173 | * (including hardware-irq-disable segments) complete. Note that |
993 | * in -rt this does -not- necessarily result in all currently executing | 1174 | * in -rt this does -not- necessarily result in all currently executing |
994 | * interrupt -handlers- having completed. | 1175 | * interrupt -handlers- having completed. |
995 | */ | 1176 | */ |
996 | void __synchronize_sched(void) | 1177 | synchronize_rcu_xxx(__synchronize_sched, call_rcu_sched) |
1178 | EXPORT_SYMBOL_GPL(__synchronize_sched); | ||
1179 | |||
1180 | /* | ||
1181 | * kthread function that manages call_rcu_sched grace periods. | ||
1182 | */ | ||
1183 | static int rcu_sched_grace_period(void *arg) | ||
997 | { | 1184 | { |
998 | cpumask_t oldmask; | 1185 | int couldsleep; /* might sleep after current pass. */ |
1186 | int couldsleepnext = 0; /* might sleep after next pass. */ | ||
999 | int cpu; | 1187 | int cpu; |
1188 | unsigned long flags; | ||
1189 | struct rcu_data *rdp; | ||
1190 | int ret; | ||
1000 | 1191 | ||
1001 | if (sched_getaffinity(0, &oldmask) < 0) | 1192 | /* |
1002 | oldmask = cpu_possible_map; | 1193 | * Each pass through the following loop handles one |
1003 | for_each_online_cpu(cpu) { | 1194 | * rcu_sched grace period cycle. |
1004 | sched_setaffinity(0, &cpumask_of_cpu(cpu)); | 1195 | */ |
1005 | schedule(); | 1196 | do { |
1006 | } | 1197 | /* Save each CPU's current state. */ |
1007 | sched_setaffinity(0, &oldmask); | 1198 | |
1199 | for_each_online_cpu(cpu) { | ||
1200 | dyntick_save_progress_counter_sched(cpu); | ||
1201 | save_qsctr_sched(cpu); | ||
1202 | } | ||
1203 | |||
1204 | /* | ||
1205 | * Sleep for about an RCU grace-period's worth to | ||
1206 | * allow better batching and to consume less CPU. | ||
1207 | */ | ||
1208 | schedule_timeout_interruptible(RCU_SCHED_BATCH_TIME); | ||
1209 | |||
1210 | /* | ||
1211 | * If there was nothing to do last time, prepare to | ||
1212 | * sleep at the end of the current grace period cycle. | ||
1213 | */ | ||
1214 | couldsleep = couldsleepnext; | ||
1215 | couldsleepnext = 1; | ||
1216 | if (couldsleep) { | ||
1217 | spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags); | ||
1218 | rcu_ctrlblk.sched_sleep = rcu_sched_sleep_prep; | ||
1219 | spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags); | ||
1220 | } | ||
1221 | |||
1222 | /* | ||
1223 | * Wait on each CPU in turn to have either visited | ||
1224 | * a quiescent state or been in dynticks-idle mode. | ||
1225 | */ | ||
1226 | for_each_online_cpu(cpu) { | ||
1227 | while (rcu_qsctr_inc_needed(cpu) && | ||
1228 | rcu_qsctr_inc_needed_dyntick(cpu)) { | ||
1229 | /* resched_cpu(cpu); @@@ */ | ||
1230 | schedule_timeout_interruptible(1); | ||
1231 | } | ||
1232 | } | ||
1233 | |||
1234 | /* Advance callbacks for each CPU. */ | ||
1235 | |||
1236 | for_each_online_cpu(cpu) { | ||
1237 | |||
1238 | rdp = RCU_DATA_CPU(cpu); | ||
1239 | spin_lock_irqsave(&rdp->lock, flags); | ||
1240 | |||
1241 | /* | ||
1242 | * We are running on this CPU irq-disabled, so no | ||
1243 | * CPU can go offline until we re-enable irqs. | ||
1244 | * The current CPU might have already gone | ||
1245 | * offline (between the for_each_offline_cpu and | ||
1246 | * the spin_lock_irqsave), but in that case all its | ||
1247 | * callback lists will be empty, so no harm done. | ||
1248 | * | ||
1249 | * Advance the callbacks! We share normal RCU's | ||
1250 | * donelist, since callbacks are invoked the | ||
1251 | * same way in either case. | ||
1252 | */ | ||
1253 | if (rdp->waitschedlist != NULL) { | ||
1254 | *rdp->donetail = rdp->waitschedlist; | ||
1255 | rdp->donetail = rdp->waitschedtail; | ||
1256 | |||
1257 | /* | ||
1258 | * Next rcu_check_callbacks() will | ||
1259 | * do the required raise_softirq(). | ||
1260 | */ | ||
1261 | } | ||
1262 | if (rdp->nextschedlist != NULL) { | ||
1263 | rdp->waitschedlist = rdp->nextschedlist; | ||
1264 | rdp->waitschedtail = rdp->nextschedtail; | ||
1265 | couldsleep = 0; | ||
1266 | couldsleepnext = 0; | ||
1267 | } else { | ||
1268 | rdp->waitschedlist = NULL; | ||
1269 | rdp->waitschedtail = &rdp->waitschedlist; | ||
1270 | } | ||
1271 | rdp->nextschedlist = NULL; | ||
1272 | rdp->nextschedtail = &rdp->nextschedlist; | ||
1273 | |||
1274 | /* Mark sleep intention. */ | ||
1275 | |||
1276 | rdp->rcu_sched_sleeping = couldsleep; | ||
1277 | |||
1278 | spin_unlock_irqrestore(&rdp->lock, flags); | ||
1279 | } | ||
1280 | |||
1281 | /* If we saw callbacks on the last scan, go deal with them. */ | ||
1282 | |||
1283 | if (!couldsleep) | ||
1284 | continue; | ||
1285 | |||
1286 | /* Attempt to block... */ | ||
1287 | |||
1288 | spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags); | ||
1289 | if (rcu_ctrlblk.sched_sleep != rcu_sched_sleep_prep) { | ||
1290 | |||
1291 | /* | ||
1292 | * Someone posted a callback after we scanned. | ||
1293 | * Go take care of it. | ||
1294 | */ | ||
1295 | spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags); | ||
1296 | couldsleepnext = 0; | ||
1297 | continue; | ||
1298 | } | ||
1299 | |||
1300 | /* Block until the next person posts a callback. */ | ||
1301 | |||
1302 | rcu_ctrlblk.sched_sleep = rcu_sched_sleeping; | ||
1303 | spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags); | ||
1304 | ret = 0; | ||
1305 | __wait_event_interruptible(rcu_ctrlblk.sched_wq, | ||
1306 | rcu_ctrlblk.sched_sleep != rcu_sched_sleeping, | ||
1307 | ret); | ||
1308 | |||
1309 | /* | ||
1310 | * Signals would prevent us from sleeping, and we cannot | ||
1311 | * do much with them in any case. So flush them. | ||
1312 | */ | ||
1313 | if (ret) | ||
1314 | flush_signals(current); | ||
1315 | couldsleepnext = 0; | ||
1316 | |||
1317 | } while (!kthread_should_stop()); | ||
1318 | |||
1319 | return (0); | ||
1008 | } | 1320 | } |
1009 | EXPORT_SYMBOL_GPL(__synchronize_sched); | ||
1010 | 1321 | ||
1011 | /* | 1322 | /* |
1012 | * Check to see if any future RCU-related work will need to be done | 1323 | * Check to see if any future RCU-related work will need to be done |
@@ -1023,7 +1334,9 @@ int rcu_needs_cpu(int cpu) | |||
1023 | 1334 | ||
1024 | return (rdp->donelist != NULL || | 1335 | return (rdp->donelist != NULL || |
1025 | !!rdp->waitlistcount || | 1336 | !!rdp->waitlistcount || |
1026 | rdp->nextlist != NULL); | 1337 | rdp->nextlist != NULL || |
1338 | rdp->nextschedlist != NULL || | ||
1339 | rdp->waitschedlist != NULL); | ||
1027 | } | 1340 | } |
1028 | 1341 | ||
1029 | int rcu_pending(int cpu) | 1342 | int rcu_pending(int cpu) |
@@ -1034,7 +1347,9 @@ int rcu_pending(int cpu) | |||
1034 | 1347 | ||
1035 | if (rdp->donelist != NULL || | 1348 | if (rdp->donelist != NULL || |
1036 | !!rdp->waitlistcount || | 1349 | !!rdp->waitlistcount || |
1037 | rdp->nextlist != NULL) | 1350 | rdp->nextlist != NULL || |
1351 | rdp->nextschedlist != NULL || | ||
1352 | rdp->waitschedlist != NULL) | ||
1038 | return 1; | 1353 | return 1; |
1039 | 1354 | ||
1040 | /* The RCU core needs an acknowledgement from this CPU. */ | 1355 | /* The RCU core needs an acknowledgement from this CPU. */ |
@@ -1101,6 +1416,11 @@ void __init __rcu_init(void) | |||
1101 | rdp->donetail = &rdp->donelist; | 1416 | rdp->donetail = &rdp->donelist; |
1102 | rdp->rcu_flipctr[0] = 0; | 1417 | rdp->rcu_flipctr[0] = 0; |
1103 | rdp->rcu_flipctr[1] = 0; | 1418 | rdp->rcu_flipctr[1] = 0; |
1419 | rdp->nextschedlist = NULL; | ||
1420 | rdp->nextschedtail = &rdp->nextschedlist; | ||
1421 | rdp->waitschedlist = NULL; | ||
1422 | rdp->waitschedtail = &rdp->waitschedlist; | ||
1423 | rdp->rcu_sched_sleeping = 0; | ||
1104 | } | 1424 | } |
1105 | register_cpu_notifier(&rcu_nb); | 1425 | register_cpu_notifier(&rcu_nb); |
1106 | 1426 | ||
@@ -1123,11 +1443,15 @@ void __init __rcu_init(void) | |||
1123 | } | 1443 | } |
1124 | 1444 | ||
1125 | /* | 1445 | /* |
1126 | * Deprecated, use synchronize_rcu() or synchronize_sched() instead. | 1446 | * Late-boot-time RCU initialization that must wait until after scheduler |
1447 | * has been initialized. | ||
1127 | */ | 1448 | */ |
1128 | void synchronize_kernel(void) | 1449 | void __init rcu_init_sched(void) |
1129 | { | 1450 | { |
1130 | synchronize_rcu(); | 1451 | rcu_sched_grace_period_task = kthread_run(rcu_sched_grace_period, |
1452 | NULL, | ||
1453 | "rcu_sched_grace_period"); | ||
1454 | WARN_ON(IS_ERR(rcu_sched_grace_period_task)); | ||
1131 | } | 1455 | } |
1132 | 1456 | ||
1133 | #ifdef CONFIG_RCU_TRACE | 1457 | #ifdef CONFIG_RCU_TRACE |