aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcupreempt.c
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-07-15 15:10:12 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-15 15:10:12 -0400
commit6c9fcaf2eec1b9f85226a694230dd957dd7926b3 (patch)
treef8c824c6c64dc411752c844f116e693760768bcc /kernel/rcupreempt.c
parentb9d2252c1e44fa83a4e65fdc9eb93db6297c55af (diff)
parent199a952876adbfc2b6c13b8b07adabebf4ff54b2 (diff)
Merge branch 'core/rcu' into core/rcu-for-linus
Diffstat (limited to 'kernel/rcupreempt.c')
-rw-r--r--kernel/rcupreempt.c418
1 files changed, 371 insertions, 47 deletions
diff --git a/kernel/rcupreempt.c b/kernel/rcupreempt.c
index 9bf445664457..6f62b77d93c4 100644
--- a/kernel/rcupreempt.c
+++ b/kernel/rcupreempt.c
@@ -46,11 +46,11 @@
46#include <asm/atomic.h> 46#include <asm/atomic.h>
47#include <linux/bitops.h> 47#include <linux/bitops.h>
48#include <linux/module.h> 48#include <linux/module.h>
49#include <linux/kthread.h>
49#include <linux/completion.h> 50#include <linux/completion.h>
50#include <linux/moduleparam.h> 51#include <linux/moduleparam.h>
51#include <linux/percpu.h> 52#include <linux/percpu.h>
52#include <linux/notifier.h> 53#include <linux/notifier.h>
53#include <linux/rcupdate.h>
54#include <linux/cpu.h> 54#include <linux/cpu.h>
55#include <linux/random.h> 55#include <linux/random.h>
56#include <linux/delay.h> 56#include <linux/delay.h>
@@ -82,14 +82,18 @@ struct rcu_data {
82 spinlock_t lock; /* Protect rcu_data fields. */ 82 spinlock_t lock; /* Protect rcu_data fields. */
83 long completed; /* Number of last completed batch. */ 83 long completed; /* Number of last completed batch. */
84 int waitlistcount; 84 int waitlistcount;
85 struct tasklet_struct rcu_tasklet;
86 struct rcu_head *nextlist; 85 struct rcu_head *nextlist;
87 struct rcu_head **nexttail; 86 struct rcu_head **nexttail;
88 struct rcu_head *waitlist[GP_STAGES]; 87 struct rcu_head *waitlist[GP_STAGES];
89 struct rcu_head **waittail[GP_STAGES]; 88 struct rcu_head **waittail[GP_STAGES];
90 struct rcu_head *donelist; 89 struct rcu_head *donelist; /* from waitlist & waitschedlist */
91 struct rcu_head **donetail; 90 struct rcu_head **donetail;
92 long rcu_flipctr[2]; 91 long rcu_flipctr[2];
92 struct rcu_head *nextschedlist;
93 struct rcu_head **nextschedtail;
94 struct rcu_head *waitschedlist;
95 struct rcu_head **waitschedtail;
96 int rcu_sched_sleeping;
93#ifdef CONFIG_RCU_TRACE 97#ifdef CONFIG_RCU_TRACE
94 struct rcupreempt_trace trace; 98 struct rcupreempt_trace trace;
95#endif /* #ifdef CONFIG_RCU_TRACE */ 99#endif /* #ifdef CONFIG_RCU_TRACE */
@@ -131,11 +135,24 @@ enum rcu_try_flip_states {
131 rcu_try_flip_waitmb_state, 135 rcu_try_flip_waitmb_state,
132}; 136};
133 137
138/*
139 * States for rcu_ctrlblk.rcu_sched_sleep.
140 */
141
142enum rcu_sched_sleep_states {
143 rcu_sched_not_sleeping, /* Not sleeping, callbacks need GP. */
144 rcu_sched_sleep_prep, /* Thinking of sleeping, rechecking. */
145 rcu_sched_sleeping, /* Sleeping, awaken if GP needed. */
146};
147
134struct rcu_ctrlblk { 148struct rcu_ctrlblk {
135 spinlock_t fliplock; /* Protect state-machine transitions. */ 149 spinlock_t fliplock; /* Protect state-machine transitions. */
136 long completed; /* Number of last completed batch. */ 150 long completed; /* Number of last completed batch. */
137 enum rcu_try_flip_states rcu_try_flip_state; /* The current state of 151 enum rcu_try_flip_states rcu_try_flip_state; /* The current state of
138 the rcu state machine */ 152 the rcu state machine */
153 spinlock_t schedlock; /* Protect rcu_sched sleep state. */
154 enum rcu_sched_sleep_states sched_sleep; /* rcu_sched state. */
155 wait_queue_head_t sched_wq; /* Place for rcu_sched to sleep. */
139}; 156};
140 157
141static DEFINE_PER_CPU(struct rcu_data, rcu_data); 158static DEFINE_PER_CPU(struct rcu_data, rcu_data);
@@ -143,8 +160,12 @@ static struct rcu_ctrlblk rcu_ctrlblk = {
143 .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock), 160 .fliplock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.fliplock),
144 .completed = 0, 161 .completed = 0,
145 .rcu_try_flip_state = rcu_try_flip_idle_state, 162 .rcu_try_flip_state = rcu_try_flip_idle_state,
163 .schedlock = __SPIN_LOCK_UNLOCKED(rcu_ctrlblk.schedlock),
164 .sched_sleep = rcu_sched_not_sleeping,
165 .sched_wq = __WAIT_QUEUE_HEAD_INITIALIZER(rcu_ctrlblk.sched_wq),
146}; 166};
147 167
168static struct task_struct *rcu_sched_grace_period_task;
148 169
149#ifdef CONFIG_RCU_TRACE 170#ifdef CONFIG_RCU_TRACE
150static char *rcu_try_flip_state_names[] = 171static char *rcu_try_flip_state_names[] =
@@ -207,6 +228,8 @@ static DEFINE_PER_CPU_SHARED_ALIGNED(enum rcu_mb_flag_values, rcu_mb_flag)
207 */ 228 */
208#define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace)); 229#define RCU_TRACE_RDP(f, rdp) RCU_TRACE(f, &((rdp)->trace));
209 230
231#define RCU_SCHED_BATCH_TIME (HZ / 50)
232
210/* 233/*
211 * Return the number of RCU batches processed thus far. Useful 234 * Return the number of RCU batches processed thus far. Useful
212 * for debug and statistics. 235 * for debug and statistics.
@@ -411,32 +434,34 @@ static void __rcu_advance_callbacks(struct rcu_data *rdp)
411 } 434 }
412} 435}
413 436
414#ifdef CONFIG_NO_HZ 437DEFINE_PER_CPU_SHARED_ALIGNED(struct rcu_dyntick_sched, rcu_dyntick_sched) = {
438 .dynticks = 1,
439};
415 440
416DEFINE_PER_CPU(long, dynticks_progress_counter) = 1; 441#ifdef CONFIG_NO_HZ
417static DEFINE_PER_CPU(long, rcu_dyntick_snapshot);
418static DEFINE_PER_CPU(int, rcu_update_flag); 442static DEFINE_PER_CPU(int, rcu_update_flag);
419 443
420/** 444/**
421 * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI. 445 * rcu_irq_enter - Called from Hard irq handlers and NMI/SMI.
422 * 446 *
423 * If the CPU was idle with dynamic ticks active, this updates the 447 * If the CPU was idle with dynamic ticks active, this updates the
424 * dynticks_progress_counter to let the RCU handling know that the 448 * rcu_dyntick_sched.dynticks to let the RCU handling know that the
425 * CPU is active. 449 * CPU is active.
426 */ 450 */
427void rcu_irq_enter(void) 451void rcu_irq_enter(void)
428{ 452{
429 int cpu = smp_processor_id(); 453 int cpu = smp_processor_id();
454 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
430 455
431 if (per_cpu(rcu_update_flag, cpu)) 456 if (per_cpu(rcu_update_flag, cpu))
432 per_cpu(rcu_update_flag, cpu)++; 457 per_cpu(rcu_update_flag, cpu)++;
433 458
434 /* 459 /*
435 * Only update if we are coming from a stopped ticks mode 460 * Only update if we are coming from a stopped ticks mode
436 * (dynticks_progress_counter is even). 461 * (rcu_dyntick_sched.dynticks is even).
437 */ 462 */
438 if (!in_interrupt() && 463 if (!in_interrupt() &&
439 (per_cpu(dynticks_progress_counter, cpu) & 0x1) == 0) { 464 (rdssp->dynticks & 0x1) == 0) {
440 /* 465 /*
441 * The following might seem like we could have a race 466 * The following might seem like we could have a race
442 * with NMI/SMIs. But this really isn't a problem. 467 * with NMI/SMIs. But this really isn't a problem.
@@ -459,12 +484,12 @@ void rcu_irq_enter(void)
459 * RCU read-side critical sections on this CPU would 484 * RCU read-side critical sections on this CPU would
460 * have already completed. 485 * have already completed.
461 */ 486 */
462 per_cpu(dynticks_progress_counter, cpu)++; 487 rdssp->dynticks++;
463 /* 488 /*
464 * The following memory barrier ensures that any 489 * The following memory barrier ensures that any
465 * rcu_read_lock() primitives in the irq handler 490 * rcu_read_lock() primitives in the irq handler
466 * are seen by other CPUs to follow the above 491 * are seen by other CPUs to follow the above
467 * increment to dynticks_progress_counter. This is 492 * increment to rcu_dyntick_sched.dynticks. This is
468 * required in order for other CPUs to correctly 493 * required in order for other CPUs to correctly
469 * determine when it is safe to advance the RCU 494 * determine when it is safe to advance the RCU
470 * grace-period state machine. 495 * grace-period state machine.
@@ -472,7 +497,7 @@ void rcu_irq_enter(void)
472 smp_mb(); /* see above block comment. */ 497 smp_mb(); /* see above block comment. */
473 /* 498 /*
474 * Since we can't determine the dynamic tick mode from 499 * Since we can't determine the dynamic tick mode from
475 * the dynticks_progress_counter after this routine, 500 * the rcu_dyntick_sched.dynticks after this routine,
476 * we use a second flag to acknowledge that we came 501 * we use a second flag to acknowledge that we came
477 * from an idle state with ticks stopped. 502 * from an idle state with ticks stopped.
478 */ 503 */
@@ -480,7 +505,7 @@ void rcu_irq_enter(void)
480 /* 505 /*
481 * If we take an NMI/SMI now, they will also increment 506 * If we take an NMI/SMI now, they will also increment
482 * the rcu_update_flag, and will not update the 507 * the rcu_update_flag, and will not update the
483 * dynticks_progress_counter on exit. That is for 508 * rcu_dyntick_sched.dynticks on exit. That is for
484 * this IRQ to do. 509 * this IRQ to do.
485 */ 510 */
486 } 511 }
@@ -490,12 +515,13 @@ void rcu_irq_enter(void)
490 * rcu_irq_exit - Called from exiting Hard irq context. 515 * rcu_irq_exit - Called from exiting Hard irq context.
491 * 516 *
492 * If the CPU was idle with dynamic ticks active, update the 517 * If the CPU was idle with dynamic ticks active, update the
493 * dynticks_progress_counter to put let the RCU handling be 518 * rcu_dyntick_sched.dynticks to put let the RCU handling be
494 * aware that the CPU is going back to idle with no ticks. 519 * aware that the CPU is going back to idle with no ticks.
495 */ 520 */
496void rcu_irq_exit(void) 521void rcu_irq_exit(void)
497{ 522{
498 int cpu = smp_processor_id(); 523 int cpu = smp_processor_id();
524 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
499 525
500 /* 526 /*
501 * rcu_update_flag is set if we interrupted the CPU 527 * rcu_update_flag is set if we interrupted the CPU
@@ -503,7 +529,7 @@ void rcu_irq_exit(void)
503 * Once this occurs, we keep track of interrupt nesting 529 * Once this occurs, we keep track of interrupt nesting
504 * because a NMI/SMI could also come in, and we still 530 * because a NMI/SMI could also come in, and we still
505 * only want the IRQ that started the increment of the 531 * only want the IRQ that started the increment of the
506 * dynticks_progress_counter to be the one that modifies 532 * rcu_dyntick_sched.dynticks to be the one that modifies
507 * it on exit. 533 * it on exit.
508 */ 534 */
509 if (per_cpu(rcu_update_flag, cpu)) { 535 if (per_cpu(rcu_update_flag, cpu)) {
@@ -515,28 +541,29 @@ void rcu_irq_exit(void)
515 541
516 /* 542 /*
517 * If an NMI/SMI happens now we are still 543 * If an NMI/SMI happens now we are still
518 * protected by the dynticks_progress_counter being odd. 544 * protected by the rcu_dyntick_sched.dynticks being odd.
519 */ 545 */
520 546
521 /* 547 /*
522 * The following memory barrier ensures that any 548 * The following memory barrier ensures that any
523 * rcu_read_unlock() primitives in the irq handler 549 * rcu_read_unlock() primitives in the irq handler
524 * are seen by other CPUs to preceed the following 550 * are seen by other CPUs to preceed the following
525 * increment to dynticks_progress_counter. This 551 * increment to rcu_dyntick_sched.dynticks. This
526 * is required in order for other CPUs to determine 552 * is required in order for other CPUs to determine
527 * when it is safe to advance the RCU grace-period 553 * when it is safe to advance the RCU grace-period
528 * state machine. 554 * state machine.
529 */ 555 */
530 smp_mb(); /* see above block comment. */ 556 smp_mb(); /* see above block comment. */
531 per_cpu(dynticks_progress_counter, cpu)++; 557 rdssp->dynticks++;
532 WARN_ON(per_cpu(dynticks_progress_counter, cpu) & 0x1); 558 WARN_ON(rdssp->dynticks & 0x1);
533 } 559 }
534} 560}
535 561
536static void dyntick_save_progress_counter(int cpu) 562static void dyntick_save_progress_counter(int cpu)
537{ 563{
538 per_cpu(rcu_dyntick_snapshot, cpu) = 564 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
539 per_cpu(dynticks_progress_counter, cpu); 565
566 rdssp->dynticks_snap = rdssp->dynticks;
540} 567}
541 568
542static inline int 569static inline int
@@ -544,9 +571,10 @@ rcu_try_flip_waitack_needed(int cpu)
544{ 571{
545 long curr; 572 long curr;
546 long snap; 573 long snap;
574 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
547 575
548 curr = per_cpu(dynticks_progress_counter, cpu); 576 curr = rdssp->dynticks;
549 snap = per_cpu(rcu_dyntick_snapshot, cpu); 577 snap = rdssp->dynticks_snap;
550 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ 578 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
551 579
552 /* 580 /*
@@ -567,7 +595,7 @@ rcu_try_flip_waitack_needed(int cpu)
567 * that this CPU already acknowledged the counter. 595 * that this CPU already acknowledged the counter.
568 */ 596 */
569 597
570 if ((curr - snap) > 2 || (snap & 0x1) == 0) 598 if ((curr - snap) > 2 || (curr & 0x1) == 0)
571 return 0; 599 return 0;
572 600
573 /* We need this CPU to explicitly acknowledge the counter flip. */ 601 /* We need this CPU to explicitly acknowledge the counter flip. */
@@ -580,9 +608,10 @@ rcu_try_flip_waitmb_needed(int cpu)
580{ 608{
581 long curr; 609 long curr;
582 long snap; 610 long snap;
611 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
583 612
584 curr = per_cpu(dynticks_progress_counter, cpu); 613 curr = rdssp->dynticks;
585 snap = per_cpu(rcu_dyntick_snapshot, cpu); 614 snap = rdssp->dynticks_snap;
586 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */ 615 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
587 616
588 /* 617 /*
@@ -609,14 +638,86 @@ rcu_try_flip_waitmb_needed(int cpu)
609 return 1; 638 return 1;
610} 639}
611 640
641static void dyntick_save_progress_counter_sched(int cpu)
642{
643 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
644
645 rdssp->sched_dynticks_snap = rdssp->dynticks;
646}
647
648static int rcu_qsctr_inc_needed_dyntick(int cpu)
649{
650 long curr;
651 long snap;
652 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
653
654 curr = rdssp->dynticks;
655 snap = rdssp->sched_dynticks_snap;
656 smp_mb(); /* force ordering with cpu entering/leaving dynticks. */
657
658 /*
659 * If the CPU remained in dynticks mode for the entire time
660 * and didn't take any interrupts, NMIs, SMIs, or whatever,
661 * then it cannot be in the middle of an rcu_read_lock(), so
662 * the next rcu_read_lock() it executes must use the new value
663 * of the counter. Therefore, this CPU has been in a quiescent
664 * state the entire time, and we don't need to wait for it.
665 */
666
667 if ((curr == snap) && ((curr & 0x1) == 0))
668 return 0;
669
670 /*
671 * If the CPU passed through or entered a dynticks idle phase with
672 * no active irq handlers, then, as above, this CPU has already
673 * passed through a quiescent state.
674 */
675
676 if ((curr - snap) > 2 || (snap & 0x1) == 0)
677 return 0;
678
679 /* We need this CPU to go through a quiescent state. */
680
681 return 1;
682}
683
612#else /* !CONFIG_NO_HZ */ 684#else /* !CONFIG_NO_HZ */
613 685
614# define dyntick_save_progress_counter(cpu) do { } while (0) 686# define dyntick_save_progress_counter(cpu) do { } while (0)
615# define rcu_try_flip_waitack_needed(cpu) (1) 687# define rcu_try_flip_waitack_needed(cpu) (1)
616# define rcu_try_flip_waitmb_needed(cpu) (1) 688# define rcu_try_flip_waitmb_needed(cpu) (1)
689
690# define dyntick_save_progress_counter_sched(cpu) do { } while (0)
691# define rcu_qsctr_inc_needed_dyntick(cpu) (1)
617 692
618#endif /* CONFIG_NO_HZ */ 693#endif /* CONFIG_NO_HZ */
619 694
695static void save_qsctr_sched(int cpu)
696{
697 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
698
699 rdssp->sched_qs_snap = rdssp->sched_qs;
700}
701
702static inline int rcu_qsctr_inc_needed(int cpu)
703{
704 struct rcu_dyntick_sched *rdssp = &per_cpu(rcu_dyntick_sched, cpu);
705
706 /*
707 * If there has been a quiescent state, no more need to wait
708 * on this CPU.
709 */
710
711 if (rdssp->sched_qs != rdssp->sched_qs_snap) {
712 smp_mb(); /* force ordering with cpu entering schedule(). */
713 return 0;
714 }
715
716 /* We need this CPU to go through a quiescent state. */
717
718 return 1;
719}
720
620/* 721/*
621 * Get here when RCU is idle. Decide whether we need to 722 * Get here when RCU is idle. Decide whether we need to
622 * move out of idle state, and return non-zero if so. 723 * move out of idle state, and return non-zero if so.
@@ -819,6 +920,26 @@ void rcu_check_callbacks(int cpu, int user)
819 unsigned long flags; 920 unsigned long flags;
820 struct rcu_data *rdp = RCU_DATA_CPU(cpu); 921 struct rcu_data *rdp = RCU_DATA_CPU(cpu);
821 922
923 /*
924 * If this CPU took its interrupt from user mode or from the
925 * idle loop, and this is not a nested interrupt, then
926 * this CPU has to have exited all prior preept-disable
927 * sections of code. So increment the counter to note this.
928 *
929 * The memory barrier is needed to handle the case where
930 * writes from a preempt-disable section of code get reordered
931 * into schedule() by this CPU's write buffer. So the memory
932 * barrier makes sure that the rcu_qsctr_inc() is seen by other
933 * CPUs to happen after any such write.
934 */
935
936 if (user ||
937 (idle_cpu(cpu) && !in_softirq() &&
938 hardirq_count() <= (1 << HARDIRQ_SHIFT))) {
939 smp_mb(); /* Guard against aggressive schedule(). */
940 rcu_qsctr_inc(cpu);
941 }
942
822 rcu_check_mb(cpu); 943 rcu_check_mb(cpu);
823 if (rcu_ctrlblk.completed == rdp->completed) 944 if (rcu_ctrlblk.completed == rdp->completed)
824 rcu_try_flip(); 945 rcu_try_flip();
@@ -869,6 +990,8 @@ void rcu_offline_cpu(int cpu)
869 struct rcu_head *list = NULL; 990 struct rcu_head *list = NULL;
870 unsigned long flags; 991 unsigned long flags;
871 struct rcu_data *rdp = RCU_DATA_CPU(cpu); 992 struct rcu_data *rdp = RCU_DATA_CPU(cpu);
993 struct rcu_head *schedlist = NULL;
994 struct rcu_head **schedtail = &schedlist;
872 struct rcu_head **tail = &list; 995 struct rcu_head **tail = &list;
873 996
874 /* 997 /*
@@ -882,6 +1005,11 @@ void rcu_offline_cpu(int cpu)
882 rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i], 1005 rcu_offline_cpu_enqueue(rdp->waitlist[i], rdp->waittail[i],
883 list, tail); 1006 list, tail);
884 rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail); 1007 rcu_offline_cpu_enqueue(rdp->nextlist, rdp->nexttail, list, tail);
1008 rcu_offline_cpu_enqueue(rdp->waitschedlist, rdp->waitschedtail,
1009 schedlist, schedtail);
1010 rcu_offline_cpu_enqueue(rdp->nextschedlist, rdp->nextschedtail,
1011 schedlist, schedtail);
1012 rdp->rcu_sched_sleeping = 0;
885 spin_unlock_irqrestore(&rdp->lock, flags); 1013 spin_unlock_irqrestore(&rdp->lock, flags);
886 rdp->waitlistcount = 0; 1014 rdp->waitlistcount = 0;
887 1015
@@ -916,12 +1044,15 @@ void rcu_offline_cpu(int cpu)
916 * fix. 1044 * fix.
917 */ 1045 */
918 1046
919 local_irq_save(flags); 1047 local_irq_save(flags); /* disable preempt till we know what lock. */
920 rdp = RCU_DATA_ME(); 1048 rdp = RCU_DATA_ME();
921 spin_lock(&rdp->lock); 1049 spin_lock(&rdp->lock);
922 *rdp->nexttail = list; 1050 *rdp->nexttail = list;
923 if (list) 1051 if (list)
924 rdp->nexttail = tail; 1052 rdp->nexttail = tail;
1053 *rdp->nextschedtail = schedlist;
1054 if (schedlist)
1055 rdp->nextschedtail = schedtail;
925 spin_unlock_irqrestore(&rdp->lock, flags); 1056 spin_unlock_irqrestore(&rdp->lock, flags);
926} 1057}
927 1058
@@ -936,10 +1067,25 @@ void rcu_offline_cpu(int cpu)
936void __cpuinit rcu_online_cpu(int cpu) 1067void __cpuinit rcu_online_cpu(int cpu)
937{ 1068{
938 unsigned long flags; 1069 unsigned long flags;
1070 struct rcu_data *rdp;
939 1071
940 spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags); 1072 spin_lock_irqsave(&rcu_ctrlblk.fliplock, flags);
941 cpu_set(cpu, rcu_cpu_online_map); 1073 cpu_set(cpu, rcu_cpu_online_map);
942 spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags); 1074 spin_unlock_irqrestore(&rcu_ctrlblk.fliplock, flags);
1075
1076 /*
1077 * The rcu_sched grace-period processing might have bypassed
1078 * this CPU, given that it was not in the rcu_cpu_online_map
1079 * when the grace-period scan started. This means that the
1080 * grace-period task might sleep. So make sure that if this
1081 * should happen, the first callback posted to this CPU will
1082 * wake up the grace-period task if need be.
1083 */
1084
1085 rdp = RCU_DATA_CPU(cpu);
1086 spin_lock_irqsave(&rdp->lock, flags);
1087 rdp->rcu_sched_sleeping = 1;
1088 spin_unlock_irqrestore(&rdp->lock, flags);
943} 1089}
944 1090
945static void rcu_process_callbacks(struct softirq_action *unused) 1091static void rcu_process_callbacks(struct softirq_action *unused)
@@ -982,31 +1128,196 @@ void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
982 *rdp->nexttail = head; 1128 *rdp->nexttail = head;
983 rdp->nexttail = &head->next; 1129 rdp->nexttail = &head->next;
984 RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp); 1130 RCU_TRACE_RDP(rcupreempt_trace_next_add, rdp);
985 spin_unlock(&rdp->lock); 1131 spin_unlock_irqrestore(&rdp->lock, flags);
986 local_irq_restore(flags);
987} 1132}
988EXPORT_SYMBOL_GPL(call_rcu); 1133EXPORT_SYMBOL_GPL(call_rcu);
989 1134
1135void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
1136{
1137 unsigned long flags;
1138 struct rcu_data *rdp;
1139 int wake_gp = 0;
1140
1141 head->func = func;
1142 head->next = NULL;
1143 local_irq_save(flags);
1144 rdp = RCU_DATA_ME();
1145 spin_lock(&rdp->lock);
1146 *rdp->nextschedtail = head;
1147 rdp->nextschedtail = &head->next;
1148 if (rdp->rcu_sched_sleeping) {
1149
1150 /* Grace-period processing might be sleeping... */
1151
1152 rdp->rcu_sched_sleeping = 0;
1153 wake_gp = 1;
1154 }
1155 spin_unlock_irqrestore(&rdp->lock, flags);
1156 if (wake_gp) {
1157
1158 /* Wake up grace-period processing, unless someone beat us. */
1159
1160 spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
1161 if (rcu_ctrlblk.sched_sleep != rcu_sched_sleeping)
1162 wake_gp = 0;
1163 rcu_ctrlblk.sched_sleep = rcu_sched_not_sleeping;
1164 spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
1165 if (wake_gp)
1166 wake_up_interruptible(&rcu_ctrlblk.sched_wq);
1167 }
1168}
1169EXPORT_SYMBOL_GPL(call_rcu_sched);
1170
990/* 1171/*
991 * Wait until all currently running preempt_disable() code segments 1172 * Wait until all currently running preempt_disable() code segments
992 * (including hardware-irq-disable segments) complete. Note that 1173 * (including hardware-irq-disable segments) complete. Note that
993 * in -rt this does -not- necessarily result in all currently executing 1174 * in -rt this does -not- necessarily result in all currently executing
994 * interrupt -handlers- having completed. 1175 * interrupt -handlers- having completed.
995 */ 1176 */
996void __synchronize_sched(void) 1177synchronize_rcu_xxx(__synchronize_sched, call_rcu_sched)
1178EXPORT_SYMBOL_GPL(__synchronize_sched);
1179
1180/*
1181 * kthread function that manages call_rcu_sched grace periods.
1182 */
1183static int rcu_sched_grace_period(void *arg)
997{ 1184{
998 cpumask_t oldmask; 1185 int couldsleep; /* might sleep after current pass. */
1186 int couldsleepnext = 0; /* might sleep after next pass. */
999 int cpu; 1187 int cpu;
1188 unsigned long flags;
1189 struct rcu_data *rdp;
1190 int ret;
1000 1191
1001 if (sched_getaffinity(0, &oldmask) < 0) 1192 /*
1002 oldmask = cpu_possible_map; 1193 * Each pass through the following loop handles one
1003 for_each_online_cpu(cpu) { 1194 * rcu_sched grace period cycle.
1004 sched_setaffinity(0, &cpumask_of_cpu(cpu)); 1195 */
1005 schedule(); 1196 do {
1006 } 1197 /* Save each CPU's current state. */
1007 sched_setaffinity(0, &oldmask); 1198
1199 for_each_online_cpu(cpu) {
1200 dyntick_save_progress_counter_sched(cpu);
1201 save_qsctr_sched(cpu);
1202 }
1203
1204 /*
1205 * Sleep for about an RCU grace-period's worth to
1206 * allow better batching and to consume less CPU.
1207 */
1208 schedule_timeout_interruptible(RCU_SCHED_BATCH_TIME);
1209
1210 /*
1211 * If there was nothing to do last time, prepare to
1212 * sleep at the end of the current grace period cycle.
1213 */
1214 couldsleep = couldsleepnext;
1215 couldsleepnext = 1;
1216 if (couldsleep) {
1217 spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
1218 rcu_ctrlblk.sched_sleep = rcu_sched_sleep_prep;
1219 spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
1220 }
1221
1222 /*
1223 * Wait on each CPU in turn to have either visited
1224 * a quiescent state or been in dynticks-idle mode.
1225 */
1226 for_each_online_cpu(cpu) {
1227 while (rcu_qsctr_inc_needed(cpu) &&
1228 rcu_qsctr_inc_needed_dyntick(cpu)) {
1229 /* resched_cpu(cpu); @@@ */
1230 schedule_timeout_interruptible(1);
1231 }
1232 }
1233
1234 /* Advance callbacks for each CPU. */
1235
1236 for_each_online_cpu(cpu) {
1237
1238 rdp = RCU_DATA_CPU(cpu);
1239 spin_lock_irqsave(&rdp->lock, flags);
1240
1241 /*
1242 * We are running on this CPU irq-disabled, so no
1243 * CPU can go offline until we re-enable irqs.
1244 * The current CPU might have already gone
1245 * offline (between the for_each_offline_cpu and
1246 * the spin_lock_irqsave), but in that case all its
1247 * callback lists will be empty, so no harm done.
1248 *
1249 * Advance the callbacks! We share normal RCU's
1250 * donelist, since callbacks are invoked the
1251 * same way in either case.
1252 */
1253 if (rdp->waitschedlist != NULL) {
1254 *rdp->donetail = rdp->waitschedlist;
1255 rdp->donetail = rdp->waitschedtail;
1256
1257 /*
1258 * Next rcu_check_callbacks() will
1259 * do the required raise_softirq().
1260 */
1261 }
1262 if (rdp->nextschedlist != NULL) {
1263 rdp->waitschedlist = rdp->nextschedlist;
1264 rdp->waitschedtail = rdp->nextschedtail;
1265 couldsleep = 0;
1266 couldsleepnext = 0;
1267 } else {
1268 rdp->waitschedlist = NULL;
1269 rdp->waitschedtail = &rdp->waitschedlist;
1270 }
1271 rdp->nextschedlist = NULL;
1272 rdp->nextschedtail = &rdp->nextschedlist;
1273
1274 /* Mark sleep intention. */
1275
1276 rdp->rcu_sched_sleeping = couldsleep;
1277
1278 spin_unlock_irqrestore(&rdp->lock, flags);
1279 }
1280
1281 /* If we saw callbacks on the last scan, go deal with them. */
1282
1283 if (!couldsleep)
1284 continue;
1285
1286 /* Attempt to block... */
1287
1288 spin_lock_irqsave(&rcu_ctrlblk.schedlock, flags);
1289 if (rcu_ctrlblk.sched_sleep != rcu_sched_sleep_prep) {
1290
1291 /*
1292 * Someone posted a callback after we scanned.
1293 * Go take care of it.
1294 */
1295 spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
1296 couldsleepnext = 0;
1297 continue;
1298 }
1299
1300 /* Block until the next person posts a callback. */
1301
1302 rcu_ctrlblk.sched_sleep = rcu_sched_sleeping;
1303 spin_unlock_irqrestore(&rcu_ctrlblk.schedlock, flags);
1304 ret = 0;
1305 __wait_event_interruptible(rcu_ctrlblk.sched_wq,
1306 rcu_ctrlblk.sched_sleep != rcu_sched_sleeping,
1307 ret);
1308
1309 /*
1310 * Signals would prevent us from sleeping, and we cannot
1311 * do much with them in any case. So flush them.
1312 */
1313 if (ret)
1314 flush_signals(current);
1315 couldsleepnext = 0;
1316
1317 } while (!kthread_should_stop());
1318
1319 return (0);
1008} 1320}
1009EXPORT_SYMBOL_GPL(__synchronize_sched);
1010 1321
1011/* 1322/*
1012 * Check to see if any future RCU-related work will need to be done 1323 * Check to see if any future RCU-related work will need to be done
@@ -1023,7 +1334,9 @@ int rcu_needs_cpu(int cpu)
1023 1334
1024 return (rdp->donelist != NULL || 1335 return (rdp->donelist != NULL ||
1025 !!rdp->waitlistcount || 1336 !!rdp->waitlistcount ||
1026 rdp->nextlist != NULL); 1337 rdp->nextlist != NULL ||
1338 rdp->nextschedlist != NULL ||
1339 rdp->waitschedlist != NULL);
1027} 1340}
1028 1341
1029int rcu_pending(int cpu) 1342int rcu_pending(int cpu)
@@ -1034,7 +1347,9 @@ int rcu_pending(int cpu)
1034 1347
1035 if (rdp->donelist != NULL || 1348 if (rdp->donelist != NULL ||
1036 !!rdp->waitlistcount || 1349 !!rdp->waitlistcount ||
1037 rdp->nextlist != NULL) 1350 rdp->nextlist != NULL ||
1351 rdp->nextschedlist != NULL ||
1352 rdp->waitschedlist != NULL)
1038 return 1; 1353 return 1;
1039 1354
1040 /* The RCU core needs an acknowledgement from this CPU. */ 1355 /* The RCU core needs an acknowledgement from this CPU. */
@@ -1101,6 +1416,11 @@ void __init __rcu_init(void)
1101 rdp->donetail = &rdp->donelist; 1416 rdp->donetail = &rdp->donelist;
1102 rdp->rcu_flipctr[0] = 0; 1417 rdp->rcu_flipctr[0] = 0;
1103 rdp->rcu_flipctr[1] = 0; 1418 rdp->rcu_flipctr[1] = 0;
1419 rdp->nextschedlist = NULL;
1420 rdp->nextschedtail = &rdp->nextschedlist;
1421 rdp->waitschedlist = NULL;
1422 rdp->waitschedtail = &rdp->waitschedlist;
1423 rdp->rcu_sched_sleeping = 0;
1104 } 1424 }
1105 register_cpu_notifier(&rcu_nb); 1425 register_cpu_notifier(&rcu_nb);
1106 1426
@@ -1123,11 +1443,15 @@ void __init __rcu_init(void)
1123} 1443}
1124 1444
1125/* 1445/*
1126 * Deprecated, use synchronize_rcu() or synchronize_sched() instead. 1446 * Late-boot-time RCU initialization that must wait until after scheduler
1447 * has been initialized.
1127 */ 1448 */
1128void synchronize_kernel(void) 1449void __init rcu_init_sched(void)
1129{ 1450{
1130 synchronize_rcu(); 1451 rcu_sched_grace_period_task = kthread_run(rcu_sched_grace_period,
1452 NULL,
1453 "rcu_sched_grace_period");
1454 WARN_ON(IS_ERR(rcu_sched_grace_period_task));
1131} 1455}
1132 1456
1133#ifdef CONFIG_RCU_TRACE 1457#ifdef CONFIG_RCU_TRACE