diff options
Diffstat (limited to 'kernel/rcupreempt.c')
-rw-r--r-- | kernel/rcupreempt.c | 414 |
1 files changed, 370 insertions, 44 deletions
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c index 5e02b7740702..aaa7976bd85f 100644 --- a/kernel/rcupreempt.c +++ b/kernel/rcupreempt.c | |||
@@ -46,6 +46,7 @@ | |||
46 | #include <asm/atomic.h> | 46 | #include <asm/atomic.h> |
47 | #include <linux/bitops.h> | 47 | #include <linux/bitops.h> |
48 | #include <linux/module.h> | 48 | #include <linux/module.h> |
49 | #include <linux/kthread.h> | ||
49 | #include <linux/completion.h> | 50 | #include <linux/completion.h> |
50 | #include <linux/moduleparam.h> | 51 | #include <linux/moduleparam.h> |
51 | #include <linux/percpu.h> | 52 | #include <linux/percpu.h> |
@@ -87,9 +88,14 @@ struct rcu_data { | |||
87 | struct rcu_head **nexttail; | 88 | struct rcu_head **nexttail; |
88 | struct rcu_head *waitlist[GP_STAGES]; | 89 | struct rcu_head *waitlist[GP_STAGES]; |
89 | struct rcu_head **waittail[GP_STAGES]; | 90 | struct rcu_head **waittail[GP_STAGES]; |
90 | struct rcu_head *donelist; | 91 | struct rcu_head *donelist; /* from waitlist & waitschedlist */ |
91 | struct rcu_head **donetail; | 92 | struct rcu_head **donetail; |
92 | long rcu_flipctr[2]; | 93 | long rcu_flipctr[2]; |
94 | struct rcu_head *nextschedlist; | ||
95 | struct rcu_head **nextschedtail; | ||
96 | struct rcu_head *waitschedlist; | ||
97 | struct rcu_head **waitschedtail; | ||
98 | int rcu_sched_sleeping; | ||
93 | #ifdef CONFIG_RCU_TRACE | 99 | #ifdef CONFIG_RCU_TRACE |
94 | struct rcupreempt_trace trace; | 100 | struct rcupreempt_trace trace; |
95 | #endif /* #ifdef CONFIG_RCU_TRACE */ | 101 | #endif /* #ifdef CONFIG_RCU_TRACE */ |
@@ -131,11 +137,24 @@ enum rcu_try_flip_states { | |||
131 | rcu_try_flip_waitmb_state, | 137 | rcu_try_flip_waitmb_state, |
132 | }; | 138 | }; |
133 | 139 | ||
140 | /* | ||
141 | * States for rcu_ctrlblk.rcu_sched_sleep. | ||
142 | */ | ||
143 | |||
144 | enum rcu_sched_sleep_states { | ||
145 | rcu_sched_not_sleeping, /* Not sleeping, callbacks need GP. */ | ||
146 | rcu_sched_sleep_prep, /* Thinking of sleeping, rechecking. */ | ||
147 | rcu_sched_sleeping, /* Sleeping, awaken if GP needed. */ | ||
148 | }; | ||
149 | |||
134 | struct rcu_ctrlblk { | 150 | struct rcu_ctrlblk { |
135 | spinlock_t fliplock; /* Protect state-machine transitions. */ | 151 | spinlock_t fliplock; /* Protect state-machine transitions. */ |
136 | long completed; /* Number of last completed batch. */ | 152 | long completed; /* Number of last completed batch. */ |
137 | enum rcu_try_flip_states rcu_try_flip_state; /* The current state of | 153 | enum rcu_try_flip_states rcu_try_flip_state; /* The current state of |
138 | the rcu state machine */ | 154 | the rcu state machine */ |
155 | spinlock_t schedlock; /* Protect rcu_sched sleep state. */ | ||
156 | enum rcu_sched_sleep_states sched_sleep; /* rcu_sched state. */ | ||
157 | wait_queue_head_t sched_wq; /* Place for rcu_sched to sleep. */ | ||
139 | }; | 158 | }; |
140 | 159 | ||
141 | static DEFINE_PER_CPU(struct rcu_data, rcu_data); | 160 | static DEFINE_PER_CPU(struct rcu_data, rcu_data); |
@@ -143,8 +162,12 @@ static struct rcu_ctrlblk rcu_ctrlblk = { | |||
143 | .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock), | 162 | .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock), |
144 | .completed = 0, | 163 | .completed = 0, |
145 | .rcu_try_flip_state = rcu_try_flip_idle_state, | 164 | .rcu_try_flip_state = rcu_try_flip_idle_state, |
165 | .schedlock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.schedlock), | ||
166 | .sched_sleep = rcu_sched_not_sleeping, | ||
167 | .sched_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rcu_ctrlblk.sched_wq), | ||
146 | }; | 168 | }; |
147 | 169 | ||
170 | static struct task_struct *rcu_sched_grace_period_task; | ||
148 | 171 | ||
149 | #ifdef CONFIG_RCU_TRACE | 172 | #ifdef CONFIG_RCU_TRACE |
150 | static char *rcu_try_flip_state_names[] = | 173 | static char *rcu_try_flip_state_names[] = |
@@ -207,6 +230,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_mb_flag_values, rcu_mb_flag) | |||
207 | */ | 230 | */ |
208 | #define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace)); | 231 | #define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace)); |
209 | 232 | ||
233 | #define RCU_SCHED_BATCH_TIME (HZ / 50) | ||
234 | |||
210 | /* | 235 | /* |
211 | * Return the number of RCU batches processed thus far. Useful | 236 | * Return the number of RCU batches processed thus far. Useful |
212 | * for debug and statistics. | 237 | * for debug and statistics. |
@@ -411,32 +436,34 @@ static void __rcu_advance_callbacks(struct rcu_data *rdp) | |||
411 | } | 436 | } |
412 | } | 437 | } |
413 | 438 | ||
414 | #ifdef CONFIG_NO_HZ | 439 | DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched) = { |
440 | .dynticks = 1, | ||
441 | }; | ||
415 | 442 | ||
416 | DEFINE_PER_CPU(long, dynticks_progress_counter) = 1; | 443 | #ifdef CONFIG_NO_HZ |
417 | static DEFINE_PER_CPU(long, rcu_dyntick_snapshot); | ||
418 | static DEFINE_PER_CPU(int, rcu_update_flag); | 444 | static DEFINE_PER_CPU(int, rcu_update_flag); |
419 | 445 | ||
420 | /** | 446 | /** |
421 | * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI. | 447 | * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI. |
422 | * | 448 | * |
423 | * If the CPU was idle with dynamic ticks active, this updates the | 449 | * If the CPU was idle with dynamic ticks active, this updates the |
424 | * dynticks_progress_counter to let the RCU handling know that the | 450 | * rcu_dyntick_sched.dynticks to let the RCU handling know that the |
425 | * CPU is active. | 451 | * CPU is active. |
426 | */ | 452 | */ |
427 | void rcu_irq_enter(void) | 453 | void rcu_irq_enter(void) |
428 | { | 454 | { |
429 | int cpu = smp_processor_id(); | 455 | int cpu = smp_processor_id(); |
456 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); | ||
430 | 457 | ||
431 | if (per_cpu(rcu_update_flag, cpu)) | 458 | if (per_cpu(rcu_update_flag, cpu)) |
432 | per_cpu(rcu_update_flag, cpu)++; | 459 | per_cpu(rcu_update_flag, cpu)++; |
433 | 460 | ||
434 | /* | 461 | /* |
435 | * Only update if we are coming from a stopped ticks mode | 462 | * Only update if we are coming from a stopped ticks mode |
436 | * (dynticks_progress_counter is even). | 463 | * (rcu_dyntick_sched.dynticks is even). |
437 | */ | 464 | */ |
438 | if (!in_interrupt() && | 465 | if (!in_interrupt() && |
439 | (per_cpu(dynticks_progress_counter, cpu) & 0x1) == 0) { | 466 | (rdssp->dynticks & 0x1) == 0) { |
440 | /* | 467 | /* |
441 | * The following might seem like we could have a race | 468 | * The following might seem like we could have a race |
442 | * with NMI/SMIs. But this really isn't a problem. | 469 | * with NMI/SMIs. But this really isn't a problem. |
@@ -459,12 +486,12 @@ void rcu_irq_enter(void) | |||
459 | * RCU read-side critical sections on this CPU would | 486 | * RCU read-side critical sections on this CPU would |
460 | * have already completed. | 487 | * have already completed. |
461 | */ | 488 | */ |
462 | per_cpu(dynticks_progress_counter, cpu)++; | 489 | rdssp->dynticks++; |
463 | /* | 490 | /* |
464 | * The following memory barrier ensures that any | 491 | * The following memory barrier ensures that any |
465 | * rcu_read_lock() primitives in the irq handler | 492 | * rcu_read_lock() primitives in the irq handler |
466 | * are seen by other CPUs to follow the above | 493 | * are seen by other CPUs to follow the above |
467 | * increment to dynticks_progress_counter. This is | 494 | * increment to rcu_dyntick_sched.dynticks. This is |
468 | * required in order for other CPUs to correctly | 495 | * required in order for other CPUs to correctly |
469 | * determine when it is safe to advance the RCU | 496 | * determine when it is safe to advance the RCU |
470 | * grace-period state machine. | 497 | * grace-period state machine. |
@@ -472,7 +499,7 @@ void rcu_irq_enter(void) | |||
472 | smp_mb(); /* see above block comment. */ | 499 | smp_mb(); /* see above block comment. */ |
473 | /* | 500 | /* |
474 | * Since we can't determine the dynamic tick mode from | 501 | * Since we can't determine the dynamic tick mode from |
475 | * the dynticks_progress_counter after this routine, | 502 | * the rcu_dyntick_sched.dynticks after this routine, |
476 | * we use a second flag to acknowledge that we came | 503 | * we use a second flag to acknowledge that we came |
477 | * from an idle state with ticks stopped. | 504 | * from an idle state with ticks stopped. |
478 | */ | 505 | */ |
@@ -480,7 +507,7 @@ void rcu_irq_enter(void) | |||
480 | /* | 507 | /* |
481 | * If we take an NMI/SMI now, they will also increment | 508 | * If we take an NMI/SMI now, they will also increment |
482 | * the rcu_update_flag, and will not update the | 509 | * the rcu_update_flag, and will not update the |
483 | * dynticks_progress_counter on exit. That is for | 510 | * rcu_dyntick_sched.dynticks on exit. That is for |
484 | * this IRQ to do. | 511 | * this IRQ to do. |
485 | */ | 512 | */ |
486 | } | 513 | } |
@@ -490,12 +517,13 @@ void rcu_irq_enter(void) | |||
490 | * rcu_irq_exit - Called from exiting Hard irq context. | 517 | * rcu_irq_exit - Called from exiting Hard irq context. |
491 | * | 518 | * |
492 | * If the CPU was idle with dynamic ticks active, update the | 519 | * If the CPU was idle with dynamic ticks active, update the |
493 | * dynticks_progress_counter to put let the RCU handling be | 520 | * rcu_dyntick_sched.dynticks to put let the RCU handling be |
494 | * aware that the CPU is going back to idle with no ticks. | 521 | * aware that the CPU is going back to idle with no ticks. |
495 | */ | 522 | */ |
496 | void rcu_irq_exit(void) | 523 | void rcu_irq_exit(void) |
497 | { | 524 | { |
498 | int cpu = smp_processor_id(); | 525 | int cpu = smp_processor_id(); |
526 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); | ||
499 | 527 | ||
500 | /* | 528 | /* |
501 | * rcu_update_flag is set if we interrupted the CPU | 529 | * rcu_update_flag is set if we interrupted the CPU |
@@ -503,7 +531,7 @@ void rcu_irq_exit(void) | |||
503 | * Once this occurs, we keep track of interrupt nesting | 531 | * Once this occurs, we keep track of interrupt nesting |
504 | * because a NMI/SMI could also come in, and we still | 532 | * because a NMI/SMI could also come in, and we still |
505 | * only want the IRQ that started the increment of the | 533 | * only want the IRQ that started the increment of the |
506 | * dynticks_progress_counter to be the one that modifies | 534 | * rcu_dyntick_sched.dynticks to be the one that modifies |
507 | * it on exit. | 535 | * it on exit. |
508 | */ | 536 | */ |
509 | if (per_cpu(rcu_update_flag, cpu)) { | 537 | if (per_cpu(rcu_update_flag, cpu)) { |
@@ -515,28 +543,29 @@ void rcu_irq_exit(void) | |||
515 | 543 | ||
516 | /* | 544 | /* |
517 | * If an NMI/SMI happens now we are still | 545 | * If an NMI/SMI happens now we are still |
518 | * protected by the dynticks_progress_counter being odd. | 546 | * protected by the rcu_dyntick_sched.dynticks being odd. |
519 | */ | 547 | */ |
520 | 548 | ||
521 | /* | 549 | /* |
522 | * The following memory barrier ensures that any | 550 | * The following memory barrier ensures that any |
523 | * rcu_read_unlock() primitives in the irq handler | 551 | * rcu_read_unlock() primitives in the irq handler |
524 | * are seen by other CPUs to preceed the following | 552 | * are seen by other CPUs to preceed the following |
525 | * increment to dynticks_progress_counter. This | 553 | * increment to rcu_dyntick_sched.dynticks. This |
526 | * is required in order for other CPUs to determine | 554 | * is required in order for other CPUs to determine |
527 | * when it is safe to advance the RCU grace-period | 555 | * when it is safe to advance the RCU grace-period |
528 | * state machine. | 556 | * state machine. |
529 | */ | 557 | */ |
530 | smp_mb(); /* see above block comment. */ | 558 | smp_mb(); /* see above block comment. */ |
531 | per_cpu(dynticks_progress_counter, cpu)++; | 559 | rdssp->dynticks++; |
532 | WARN_ON(per_cpu(dynticks_progress_counter, cpu) & 0x1); | 560 | WARN_ON(rdssp->dynticks & 0x1); |
533 | } | 561 | } |
534 | } | 562 | } |
535 | 563 | ||
536 | static void dyntick_save_progress_counter(int cpu) | 564 | static void dyntick_save_progress_counter(int cpu) |
537 | { | 565 | { |
538 | per_cpu(rcu_dyntick_snapshot, cpu) = | 566 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); |
539 | per_cpu(dynticks_progress_counter, cpu); | 567 | |
568 | rdssp->dynticks_snap = rdssp->dynticks; | ||
540 | } | 569 | } |
541 | 570 | ||
542 | static inline int | 571 | static inline int |
@@ -544,9 +573,10 @@ rcu_try_flip_waitack_needed(int cpu) | |||
544 | { | 573 | { |
545 | long curr; | 574 | long curr; |
546 | long snap; | 575 | long snap; |
576 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); | ||
547 | 577 | ||
548 | curr = per_cpu(dynticks_progress_counter, cpu); | 578 | curr = rdssp->dynticks; |
549 | snap = per_cpu(rcu_dyntick_snapshot, cpu); | 579 | snap = rdssp->dynticks_snap; |
550 | smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ | 580 | smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ |
551 | 581 | ||
552 | /* | 582 | /* |
@@ -580,9 +610,10 @@ rcu_try_flip_waitmb_needed(int cpu) | |||
580 | { | 610 | { |
581 | long curr; | 611 | long curr; |
582 | long snap; | 612 | long snap; |
613 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); | ||
583 | 614 | ||
584 | curr = per_cpu(dynticks_progress_counter, cpu); | 615 | curr = rdssp->dynticks; |
585 | snap = per_cpu(rcu_dyntick_snapshot, cpu); | 616 | snap = rdssp->dynticks_snap; |
586 | smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ | 617 | smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ |
587 | 618 | ||
588 | /* | 619 | /* |
@@ -609,14 +640,86 @@ rcu_try_flip_waitmb_needed(int cpu) | |||
609 | return 1; | 640 | return 1; |
610 | } | 641 | } |
611 | 642 | ||
643 | static void dyntick_save_progress_counter_sched(int cpu) | ||
644 | { | ||
645 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); | ||
646 | |||
647 | rdssp->sched_dynticks_snap = rdssp->dynticks; | ||
648 | } | ||
649 | |||
650 | static int rcu_qsctr_inc_needed_dyntick(int cpu) | ||
651 | { | ||
652 | long curr; | ||
653 | long snap; | ||
654 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); | ||
655 | |||
656 | curr = rdssp->dynticks; | ||
657 | snap = rdssp->sched_dynticks_snap; | ||
658 | smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ | ||
659 | |||
660 | /* | ||
661 | * If the CPU remained in dynticks mode for the entire time | ||
662 | * and didn't take any interrupts, NMIs, SMIs, or whatever, | ||
663 | * then it cannot be in the middle of an rcu_read_lock(), so | ||
664 | * the next rcu_read_lock() it executes must use the new value | ||
665 | * of the counter. Therefore, this CPU has been in a quiescent | ||
666 | * state the entire time, and we don't need to wait for it. | ||
667 | */ | ||
668 | |||
669 | if ((curr == snap) && ((curr & 0x1) == 0)) | ||
670 | return 0; | ||
671 | |||
672 | /* | ||
673 | * If the CPU passed through or entered a dynticks idle phase with | ||
674 | * no active irq handlers, then, as above, this CPU has already | ||
675 | * passed through a quiescent state. | ||
676 | */ | ||
677 | |||
678 | if ((curr - snap) > 2 || (snap & 0x1) == 0) | ||
679 | return 0; | ||
680 | |||
681 | /* We need this CPU to go through a quiescent state. */ | ||
682 | |||
683 | return 1; | ||
684 | } | ||
685 | |||
612 | #else /* !CONFIG_NO_HZ */ | 686 | #else /* !CONFIG_NO_HZ */ |
613 | 687 | ||
614 | # define dyntick_save_progress_counter(cpu) do { } while (0) | 688 | # define dyntick_save_progress_counter(cpu) do { } while (0) |
615 | # define rcu_try_flip_waitack_needed(cpu) (1) | 689 | # define rcu_try_flip_waitack_needed(cpu) (1) |
616 | # define rcu_try_flip_waitmb_needed(cpu) (1) | 690 | # define rcu_try_flip_waitmb_needed(cpu) (1) |
691 | |||
692 | # define dyntick_save_progress_counter_sched(cpu) do { } while (0) | ||
693 | # define rcu_qsctr_inc_needed_dyntick(cpu) (1) | ||
617 | 694 | ||
618 | #endif /* CONFIG_NO_HZ */ | 695 | #endif /* CONFIG_NO_HZ */ |
619 | 696 | ||
697 | static void save_qsctr_sched(int cpu) | ||
698 | { | ||
699 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); | ||
700 | |||
701 | rdssp->sched_qs_snap = rdssp->sched_qs; | ||
702 | } | ||
703 | |||
704 | static inline int rcu_qsctr_inc_needed(int cpu) | ||
705 | { | ||
706 | struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu); | ||
707 | |||
708 | /* | ||
709 | * If there has been a quiescent state, no more need to wait | ||
710 | * on this CPU. | ||
711 | */ | ||
712 | |||
713 | if (rdssp->sched_qs != rdssp->sched_qs_snap) { | ||
714 | smp_mb(); /* force ordering with cpu entering schedule(). */ | ||
715 | return 0; | ||
716 | } | ||
717 | |||
718 | /* We need this CPU to go through a quiescent state. */ | ||
719 | |||
720 | return 1; | ||
721 | } | ||
722 | |||
620 | /* | 723 | /* |
621 | * Get here when RCU is idle. Decide whether we need to | 724 | * Get here when RCU is idle. Decide whether we need to |
622 | * move out of idle state, and return non-zero if so. | 725 | * move out of idle state, and return non-zero if so. |
@@ -819,6 +922,26 @@ void rcu_check_callbacks(int cpu, int user) | |||
819 | unsigned long flags; | 922 | unsigned long flags; |
820 | struct rcu_data *rdp = RCU_DATA_CPU(cpu); | 923 | struct rcu_data *rdp = RCU_DATA_CPU(cpu); |
821 | 924 | ||
925 | /* | ||
926 | * If this CPU took its interrupt from user mode or from the | ||
927 | * idle loop, and this is not a nested interrupt, then | ||
928 | * this CPU has to have exited all prior preept-disable | ||
929 | * sections of code. So increment the counter to note this. | ||
930 | * | ||
931 | * The memory barrier is needed to handle the case where | ||
932 | * writes from a preempt-disable section of code get reordered | ||
933 | * into schedule() by this CPU's write buffer. So the memory | ||
934 | * barrier makes sure that the rcu_qsctr_inc() is seen by other | ||
935 | * CPUs to happen after any such write. | ||
936 | */ | ||
937 | |||
938 | if (user || | ||
939 | (idle_cpu(cpu) && !in_softirq() && | ||
940 | hardirq_count() <= (1 << HARDIRQ_SHIFT))) { | ||
941 | smp_mb(); /* Guard against aggressive schedule(). */ | ||
942 | rcu_qsctr_inc(cpu); | ||
943 | } | ||
944 | |||
822 | rcu_check_mb(cpu); | 945 | rcu_check_mb(cpu); |
823 | if (rcu_ctrlblk.completed == rdp->completed) | 946 | if (rcu_ctrlblk.completed == rdp->completed) |
824 | rcu_try_flip(); | 947 | rcu_try_flip(); |
@@ -869,6 +992,8 @@ void rcu_offline_cpu(int cpu) | |||
869 | struct rcu_head *list = NULL; | 992 | struct rcu_head *list = NULL; |
870 | unsigned long flags; | 993 | unsigned long flags; |
871 | struct rcu_data *rdp = RCU_DATA_CPU(cpu); | 994 | struct rcu_data *rdp = RCU_DATA_CPU(cpu); |
995 | struct rcu_head *schedlist = NULL; | ||
996 | struct rcu_head **schedtail = &schedlist; | ||
872 | struct rcu_head **tail = &list; | 997 | struct rcu_head **tail = &list; |
873 | 998 | ||
874 | /* | 999 | /* |
@@ -882,6 +1007,11 @@ void rcu_offline_cpu(int cpu) | |||
882 | rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i], | 1007 | rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i], |
883 | list, tail); | 1008 | list, tail); |
884 | rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail); | 1009 | rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail); |
1010 | rcu_offline_cpu_enqueue(rdp->waitschedlist, rdp->waitschedtail, | ||
1011 | schedlist, schedtail); | ||
1012 | rcu_offline_cpu_enqueue(rdp->nextschedlist, rdp->nextschedtail, | ||
1013 | schedlist, schedtail); | ||
1014 | rdp->rcu_sched_sleeping = 0; | ||
885 | spin_unlock_irqrestore(&rdp->lock, flags); | 1015 | spin_unlock_irqrestore(&rdp->lock, flags); |
886 | rdp->waitlistcount = 0; | 1016 | rdp->waitlistcount = 0; |
887 | 1017 | ||
@@ -916,22 +1046,40 @@ void rcu_offline_cpu(int cpu) | |||
916 | * fix. | 1046 | * fix. |
917 | */ | 1047 | */ |
918 | 1048 | ||
919 | local_irq_save(flags); | 1049 | local_irq_save(flags); /* disable preempt till we know what lock. */ |
920 | rdp = RCU_DATA_ME(); | 1050 | rdp = RCU_DATA_ME(); |
921 | spin_lock(&rdp->lock); | 1051 | spin_lock(&rdp->lock); |
922 | *rdp->nexttail = list; | 1052 | *rdp->nexttail = list; |
923 | if (list) | 1053 | if (list) |
924 | rdp->nexttail = tail; | 1054 | rdp->nexttail = tail; |
1055 | *rdp->nextschedtail = schedlist; | ||
1056 | if (schedlist) | ||
1057 | rdp->nextschedtail = schedtail; | ||
925 | spin_unlock_irqrestore(&rdp->lock, flags); | 1058 | spin_unlock_irqrestore(&rdp->lock, flags); |
926 | } | 1059 | } |
927 | 1060 | ||
928 | void __devinit rcu_online_cpu(int cpu) | 1061 | void __devinit rcu_online_cpu(int cpu) |
929 | { | 1062 | { |
930 | unsigned long flags; | 1063 | unsigned long flags; |
1064 | struct rcu_data *rdp; | ||
931 | 1065 | ||
932 | spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags); | 1066 | spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags); |
933 | cpu_set(cpu, rcu_cpu_online_map); | 1067 | cpu_set(cpu, rcu_cpu_online_map); |
934 | spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags); | 1068 | spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags); |
1069 | |||
1070 | /* | ||
1071 | * The rcu_sched grace-period processing might have bypassed | ||
1072 | * this CPU, given that it was not in the rcu_cpu_online_map | ||
1073 | * when the grace-period scan started. This means that the | ||
1074 | * grace-period task might sleep. So make sure that if this | ||
1075 | * should happen, the first callback posted to this CPU will | ||
1076 | * wake up the grace-period task if need be. | ||
1077 | */ | ||
1078 | |||
1079 | rdp = RCU_DATA_CPU(cpu); | ||
1080 | spin_lock_irqsave(&rdp->lock, flags); | ||
1081 | rdp->rcu_sched_sleeping = 1; | ||
1082 | spin_unlock_irqrestore(&rdp->lock, flags); | ||
935 | } | 1083 | } |
936 | 1084 | ||
937 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ | 1085 | #else /* #ifdef CONFIG_HOTPLUG_CPU */ |
@@ -986,31 +1134,196 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | |||
986 | *rdp->nexttail = head; | 1134 | *rdp->nexttail = head; |
987 | rdp->nexttail = &head->next; | 1135 | rdp->nexttail = &head->next; |
988 | RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp); | 1136 | RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp); |
989 | spin_unlock(&rdp->lock); | 1137 | spin_unlock_irqrestore(&rdp->lock, flags); |
990 | local_irq_restore(flags); | ||
991 | } | 1138 | } |
992 | EXPORT_SYMBOL_GPL(call_rcu); | 1139 | EXPORT_SYMBOL_GPL(call_rcu); |
993 | 1140 | ||
1141 | void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) | ||
1142 | { | ||
1143 | unsigned long flags; | ||
1144 | struct rcu_data *rdp; | ||
1145 | int wake_gp = 0; | ||
1146 | |||
1147 | head->func = func; | ||
1148 | head->next = NULL; | ||
1149 | local_irq_save(flags); | ||
1150 | rdp = RCU_DATA_ME(); | ||
1151 | spin_lock(&rdp->lock); | ||
1152 | *rdp->nextschedtail = head; | ||
1153 | rdp->nextschedtail = &head->next; | ||
1154 | if (rdp->rcu_sched_sleeping) { | ||
1155 | |||
1156 | /* Grace-period processing might be sleeping... */ | ||
1157 | |||
1158 | rdp->rcu_sched_sleeping = 0; | ||
1159 | wake_gp = 1; | ||
1160 | } | ||
1161 | spin_unlock_irqrestore(&rdp->lock, flags); | ||
1162 | if (wake_gp) { | ||
1163 | |||
1164 | /* Wake up grace-period processing, unless someone beat us. */ | ||
1165 | |||
1166 | spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags); | ||
1167 | if (rcu_ctrlblk.sched_sleep != rcu_sched_sleeping) | ||
1168 | wake_gp = 0; | ||
1169 | rcu_ctrlblk.sched_sleep = rcu_sched_not_sleeping; | ||
1170 | spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags); | ||
1171 | if (wake_gp) | ||
1172 | wake_up_interruptible(&rcu_ctrlblk.sched_wq); | ||
1173 | } | ||
1174 | } | ||
1175 | EXPORT_SYMBOL_GPL(call_rcu_sched); | ||
1176 | |||
994 | /* | 1177 | /* |
995 | * Wait until all currently running preempt_disable() code segments | 1178 | * Wait until all currently running preempt_disable() code segments |
996 | * (including hardware-irq-disable segments) complete. Note that | 1179 | * (including hardware-irq-disable segments) complete. Note that |
997 | * in -rt this does -not- necessarily result in all currently executing | 1180 | * in -rt this does -not- necessarily result in all currently executing |
998 | * interrupt -handlers- having completed. | 1181 | * interrupt -handlers- having completed. |
999 | */ | 1182 | */ |
1000 | void __synchronize_sched(void) | 1183 | synchronize_rcu_xxx(__synchronize_sched, call_rcu_sched) |
1184 | EXPORT_SYMBOL_GPL(__synchronize_sched); | ||
1185 | |||
1186 | /* | ||
1187 | * kthread function that manages call_rcu_sched grace periods. | ||
1188 | */ | ||
1189 | static int rcu_sched_grace_period(void *arg) | ||
1001 | { | 1190 | { |
1002 | cpumask_t oldmask; | 1191 | int couldsleep; /* might sleep after current pass. */ |
1192 | int couldsleepnext = 0; /* might sleep after next pass. */ | ||
1003 | int cpu; | 1193 | int cpu; |
1194 | unsigned long flags; | ||
1195 | struct rcu_data *rdp; | ||
1196 | int ret; | ||
1004 | 1197 | ||
1005 | if (sched_getaffinity(0, &oldmask) < 0) | 1198 | /* |
1006 | oldmask = cpu_possible_map; | 1199 | * Each pass through the following loop handles one |
1007 | for_each_online_cpu(cpu) { | 1200 | * rcu_sched grace period cycle. |
1008 | sched_setaffinity(0, &cpumask_of_cpu(cpu)); | 1201 | */ |
1009 | schedule(); | 1202 | do { |
1010 | } | 1203 | /* Save each CPU's current state. */ |
1011 | sched_setaffinity(0, &oldmask); | 1204 | |
1205 | for_each_online_cpu(cpu) { | ||
1206 | dyntick_save_progress_counter_sched(cpu); | ||
1207 | save_qsctr_sched(cpu); | ||
1208 | } | ||
1209 | |||
1210 | /* | ||
1211 | * Sleep for about an RCU grace-period's worth to | ||
1212 | * allow better batching and to consume less CPU. | ||
1213 | */ | ||
1214 | schedule_timeout_interruptible(RCU_SCHED_BATCH_TIME); | ||
1215 | |||
1216 | /* | ||
1217 | * If there was nothing to do last time, prepare to | ||
1218 | * sleep at the end of the current grace period cycle. | ||
1219 | */ | ||
1220 | couldsleep = couldsleepnext; | ||
1221 | couldsleepnext = 1; | ||
1222 | if (couldsleep) { | ||
1223 | spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags); | ||
1224 | rcu_ctrlblk.sched_sleep = rcu_sched_sleep_prep; | ||
1225 | spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags); | ||
1226 | } | ||
1227 | |||
1228 | /* | ||
1229 | * Wait on each CPU in turn to have either visited | ||
1230 | * a quiescent state or been in dynticks-idle mode. | ||
1231 | */ | ||
1232 | for_each_online_cpu(cpu) { | ||
1233 | while (rcu_qsctr_inc_needed(cpu) && | ||
1234 | rcu_qsctr_inc_needed_dyntick(cpu)) { | ||
1235 | /* resched_cpu(cpu); @@@ */ | ||
1236 | schedule_timeout_interruptible(1); | ||
1237 | } | ||
1238 | } | ||
1239 | |||
1240 | /* Advance callbacks for each CPU. */ | ||
1241 | |||
1242 | for_each_online_cpu(cpu) { | ||
1243 | |||
1244 | rdp = RCU_DATA_CPU(cpu); | ||
1245 | spin_lock_irqsave(&rdp->lock, flags); | ||
1246 | |||
1247 | /* | ||
1248 | * We are running on this CPU irq-disabled, so no | ||
1249 | * CPU can go offline until we re-enable irqs. | ||
1250 | * The current CPU might have already gone | ||
1251 | * offline (between the for_each_offline_cpu and | ||
1252 | * the spin_lock_irqsave), but in that case all its | ||
1253 | * callback lists will be empty, so no harm done. | ||
1254 | * | ||
1255 | * Advance the callbacks! We share normal RCU's | ||
1256 | * donelist, since callbacks are invoked the | ||
1257 | * same way in either case. | ||
1258 | */ | ||
1259 | if (rdp->waitschedlist != NULL) { | ||
1260 | *rdp->donetail = rdp->waitschedlist; | ||
1261 | rdp->donetail = rdp->waitschedtail; | ||
1262 | |||
1263 | /* | ||
1264 | * Next rcu_check_callbacks() will | ||
1265 | * do the required raise_softirq(). | ||
1266 | */ | ||
1267 | } | ||
1268 | if (rdp->nextschedlist != NULL) { | ||
1269 | rdp->waitschedlist = rdp->nextschedlist; | ||
1270 | rdp->waitschedtail = rdp->nextschedtail; | ||
1271 | couldsleep = 0; | ||
1272 | couldsleepnext = 0; | ||
1273 | } else { | ||
1274 | rdp->waitschedlist = NULL; | ||
1275 | rdp->waitschedtail = &rdp->waitschedlist; | ||
1276 | } | ||
1277 | rdp->nextschedlist = NULL; | ||
1278 | rdp->nextschedtail = &rdp->nextschedlist; | ||
1279 | |||
1280 | /* Mark sleep intention. */ | ||
1281 | |||
1282 | rdp->rcu_sched_sleeping = couldsleep; | ||
1283 | |||
1284 | spin_unlock_irqrestore(&rdp->lock, flags); | ||
1285 | } | ||
1286 | |||
1287 | /* If we saw callbacks on the last scan, go deal with them. */ | ||
1288 | |||
1289 | if (!couldsleep) | ||
1290 | continue; | ||
1291 | |||
1292 | /* Attempt to block... */ | ||
1293 | |||
1294 | spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags); | ||
1295 | if (rcu_ctrlblk.sched_sleep != rcu_sched_sleep_prep) { | ||
1296 | |||
1297 | /* | ||
1298 | * Someone posted a callback after we scanned. | ||
1299 | * Go take care of it. | ||
1300 | */ | ||
1301 | spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags); | ||
1302 | couldsleepnext = 0; | ||
1303 | continue; | ||
1304 | } | ||
1305 | |||
1306 | /* Block until the next person posts a callback. */ | ||
1307 | |||
1308 | rcu_ctrlblk.sched_sleep = rcu_sched_sleeping; | ||
1309 | spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags); | ||
1310 | ret = 0; | ||
1311 | __wait_event_interruptible(rcu_ctrlblk.sched_wq, | ||
1312 | rcu_ctrlblk.sched_sleep != rcu_sched_sleeping, | ||
1313 | ret); | ||
1314 | |||
1315 | /* | ||
1316 | * Signals would prevent us from sleeping, and we cannot | ||
1317 | * do much with them in any case. So flush them. | ||
1318 | */ | ||
1319 | if (ret) | ||
1320 | flush_signals(current); | ||
1321 | couldsleepnext = 0; | ||
1322 | |||
1323 | } while (!kthread_should_stop()); | ||
1324 | |||
1325 | return (0); | ||
1012 | } | 1326 | } |
1013 | EXPORT_SYMBOL_GPL(__synchronize_sched); | ||
1014 | 1327 | ||
1015 | /* | 1328 | /* |
1016 | * Check to see if any future RCU-related work will need to be done | 1329 | * Check to see if any future RCU-related work will need to be done |
@@ -1027,7 +1340,9 @@ int rcu_needs_cpu(int cpu) | |||
1027 | 1340 | ||
1028 | return (rdp->donelist != NULL || | 1341 | return (rdp->donelist != NULL || |
1029 | !!rdp->waitlistcount || | 1342 | !!rdp->waitlistcount || |
1030 | rdp->nextlist != NULL); | 1343 | rdp->nextlist != NULL || |
1344 | rdp->nextschedlist != NULL || | ||
1345 | rdp->waitschedlist != NULL); | ||
1031 | } | 1346 | } |
1032 | 1347 | ||
1033 | int rcu_pending(int cpu) | 1348 | int rcu_pending(int cpu) |
@@ -1038,7 +1353,9 @@ int rcu_pending(int cpu) | |||
1038 | 1353 | ||
1039 | if (rdp->donelist != NULL || | 1354 | if (rdp->donelist != NULL || |
1040 | !!rdp->waitlistcount || | 1355 | !!rdp->waitlistcount || |
1041 | rdp->nextlist != NULL) | 1356 | rdp->nextlist != NULL || |
1357 | rdp->nextschedlist != NULL || | ||
1358 | rdp->waitschedlist != NULL) | ||
1042 | return 1; | 1359 | return 1; |
1043 | 1360 | ||
1044 | /* The RCU core needs an acknowledgement from this CPU. */ | 1361 | /* The RCU core needs an acknowledgement from this CPU. */ |
@@ -1105,6 +1422,11 @@ void __init __rcu_init(void) | |||
1105 | rdp->donetail = &rdp->donelist; | 1422 | rdp->donetail = &rdp->donelist; |
1106 | rdp->rcu_flipctr[0] = 0; | 1423 | rdp->rcu_flipctr[0] = 0; |
1107 | rdp->rcu_flipctr[1] = 0; | 1424 | rdp->rcu_flipctr[1] = 0; |
1425 | rdp->nextschedlist = NULL; | ||
1426 | rdp->nextschedtail = &rdp->nextschedlist; | ||
1427 | rdp->waitschedlist = NULL; | ||
1428 | rdp->waitschedtail = &rdp->waitschedlist; | ||
1429 | rdp->rcu_sched_sleeping = 0; | ||
1108 | } | 1430 | } |
1109 | register_cpu_notifier(&rcu_nb); | 1431 | register_cpu_notifier(&rcu_nb); |
1110 | 1432 | ||
@@ -1127,11 +1449,15 @@ void __init __rcu_init(void) | |||
1127 | } | 1449 | } |
1128 | 1450 | ||
1129 | /* | 1451 | /* |
1130 | * Deprecated, use synchronize_rcu() or synchronize_sched() instead. | 1452 | * Late-boot-time RCU initialization that must wait until after scheduler |
1453 | * has been initialized. | ||
1131 | */ | 1454 | */ |
1132 | void synchronize_kernel(void) | 1455 | void __init rcu_init_sched(void) |
1133 | { | 1456 | { |
1134 | synchronize_rcu(); | 1457 | rcu_sched_grace_period_task = kthread_run(rcu_sched_grace_period, |
1458 | NULL, | ||
1459 | "rcu_sched_grace_period"); | ||
1460 | WARN_ON(IS_ERR(rcu_sched_grace_period_task)); | ||
1135 | } | 1461 | } |
1136 | 1462 | ||
1137 | #ifdef CONFIG_RCU_TRACE | 1463 | #ifdef CONFIG_RCU_TRACE |