aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcutree.c
diff options
context:
space:
mode:
authorAndrea Bastoni <bastoni@cs.unc.edu>2010-05-30 19:16:45 -0400
committerAndrea Bastoni <bastoni@cs.unc.edu>2010-05-30 19:16:45 -0400
commitada47b5fe13d89735805b566185f4885f5a3f750 (patch)
tree644b88f8a71896307d71438e9b3af49126ffb22b /kernel/rcutree.c
parent43e98717ad40a4ae64545b5ba047c7b86aa44f4f (diff)
parent3280f21d43ee541f97f8cda5792150d2dbec20d5 (diff)
Merge branch 'wip-2.6.34' into old-private-masterarchived-private-master
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r--kernel/rcutree.c627
1 files changed, 390 insertions, 237 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index f3077c0ab181..3ec8160fc75f 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -51,22 +51,25 @@
51 51
52/* Data structures. */ 52/* Data structures. */
53 53
54static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];
55
54#define RCU_STATE_INITIALIZER(name) { \ 56#define RCU_STATE_INITIALIZER(name) { \
55 .level = { &name.node[0] }, \ 57 .level = { &name.node[0] }, \
56 .levelcnt = { \ 58 .levelcnt = { \
57 NUM_RCU_LVL_0, /* root of hierarchy. */ \ 59 NUM_RCU_LVL_0, /* root of hierarchy. */ \
58 NUM_RCU_LVL_1, \ 60 NUM_RCU_LVL_1, \
59 NUM_RCU_LVL_2, \ 61 NUM_RCU_LVL_2, \
60 NUM_RCU_LVL_3, /* == MAX_RCU_LVLS */ \ 62 NUM_RCU_LVL_3, \
63 NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \
61 }, \ 64 }, \
62 .signaled = RCU_GP_IDLE, \ 65 .signaled = RCU_GP_IDLE, \
63 .gpnum = -300, \ 66 .gpnum = -300, \
64 .completed = -300, \ 67 .completed = -300, \
65 .onofflock = __SPIN_LOCK_UNLOCKED(&name.onofflock), \ 68 .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&name.onofflock), \
66 .orphan_cbs_list = NULL, \ 69 .orphan_cbs_list = NULL, \
67 .orphan_cbs_tail = &name.orphan_cbs_list, \ 70 .orphan_cbs_tail = &name.orphan_cbs_list, \
68 .orphan_qlen = 0, \ 71 .orphan_qlen = 0, \
69 .fqslock = __SPIN_LOCK_UNLOCKED(&name.fqslock), \ 72 .fqslock = __RAW_SPIN_LOCK_UNLOCKED(&name.fqslock), \
70 .n_force_qs = 0, \ 73 .n_force_qs = 0, \
71 .n_force_qs_ngp = 0, \ 74 .n_force_qs_ngp = 0, \
72} 75}
@@ -77,7 +80,6 @@ DEFINE_PER_CPU(struct rcu_data, rcu_sched_data);
77struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state); 80struct rcu_state rcu_bh_state = RCU_STATE_INITIALIZER(rcu_bh_state);
78DEFINE_PER_CPU(struct rcu_data, rcu_bh_data); 81DEFINE_PER_CPU(struct rcu_data, rcu_bh_data);
79 82
80
81/* 83/*
82 * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s 84 * Return true if an RCU grace period is in progress. The ACCESS_ONCE()s
83 * permit this function to be invoked without holding the root rcu_node 85 * permit this function to be invoked without holding the root rcu_node
@@ -98,7 +100,7 @@ void rcu_sched_qs(int cpu)
98 struct rcu_data *rdp; 100 struct rcu_data *rdp;
99 101
100 rdp = &per_cpu(rcu_sched_data, cpu); 102 rdp = &per_cpu(rcu_sched_data, cpu);
101 rdp->passed_quiesc_completed = rdp->completed; 103 rdp->passed_quiesc_completed = rdp->gpnum - 1;
102 barrier(); 104 barrier();
103 rdp->passed_quiesc = 1; 105 rdp->passed_quiesc = 1;
104 rcu_preempt_note_context_switch(cpu); 106 rcu_preempt_note_context_switch(cpu);
@@ -109,7 +111,7 @@ void rcu_bh_qs(int cpu)
109 struct rcu_data *rdp; 111 struct rcu_data *rdp;
110 112
111 rdp = &per_cpu(rcu_bh_data, cpu); 113 rdp = &per_cpu(rcu_bh_data, cpu);
112 rdp->passed_quiesc_completed = rdp->completed; 114 rdp->passed_quiesc_completed = rdp->gpnum - 1;
113 barrier(); 115 barrier();
114 rdp->passed_quiesc = 1; 116 rdp->passed_quiesc = 1;
115} 117}
@@ -151,6 +153,24 @@ long rcu_batches_completed_bh(void)
151EXPORT_SYMBOL_GPL(rcu_batches_completed_bh); 153EXPORT_SYMBOL_GPL(rcu_batches_completed_bh);
152 154
153/* 155/*
156 * Force a quiescent state for RCU BH.
157 */
158void rcu_bh_force_quiescent_state(void)
159{
160 force_quiescent_state(&rcu_bh_state, 0);
161}
162EXPORT_SYMBOL_GPL(rcu_bh_force_quiescent_state);
163
164/*
165 * Force a quiescent state for RCU-sched.
166 */
167void rcu_sched_force_quiescent_state(void)
168{
169 force_quiescent_state(&rcu_sched_state, 0);
170}
171EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
172
173/*
154 * Does the CPU have callbacks ready to be invoked? 174 * Does the CPU have callbacks ready to be invoked?
155 */ 175 */
156static int 176static int
@@ -335,28 +355,9 @@ void rcu_irq_exit(void)
335 set_need_resched(); 355 set_need_resched();
336} 356}
337 357
338/*
339 * Record the specified "completed" value, which is later used to validate
340 * dynticks counter manipulations. Specify "rsp->completed - 1" to
341 * unconditionally invalidate any future dynticks manipulations (which is
342 * useful at the beginning of a grace period).
343 */
344static void dyntick_record_completed(struct rcu_state *rsp, long comp)
345{
346 rsp->dynticks_completed = comp;
347}
348
349#ifdef CONFIG_SMP 358#ifdef CONFIG_SMP
350 359
351/* 360/*
352 * Recall the previously recorded value of the completion for dynticks.
353 */
354static long dyntick_recall_completed(struct rcu_state *rsp)
355{
356 return rsp->dynticks_completed;
357}
358
359/*
360 * Snapshot the specified CPU's dynticks counter so that we can later 361 * Snapshot the specified CPU's dynticks counter so that we can later
361 * credit them with an implicit quiescent state. Return 1 if this CPU 362 * credit them with an implicit quiescent state. Return 1 if this CPU
362 * is in dynticks idle mode, which is an extended quiescent state. 363 * is in dynticks idle mode, which is an extended quiescent state.
@@ -419,24 +420,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
419 420
420#else /* #ifdef CONFIG_NO_HZ */ 421#else /* #ifdef CONFIG_NO_HZ */
421 422
422static void dyntick_record_completed(struct rcu_state *rsp, long comp)
423{
424}
425
426#ifdef CONFIG_SMP 423#ifdef CONFIG_SMP
427 424
428/*
429 * If there are no dynticks, then the only way that a CPU can passively
430 * be in a quiescent state is to be offline. Unlike dynticks idle, which
431 * is a point in time during the prior (already finished) grace period,
432 * an offline CPU is always in a quiescent state, and thus can be
433 * unconditionally applied. So just return the current value of completed.
434 */
435static long dyntick_recall_completed(struct rcu_state *rsp)
436{
437 return rsp->completed;
438}
439
440static int dyntick_save_progress_counter(struct rcu_data *rdp) 425static int dyntick_save_progress_counter(struct rcu_data *rdp)
441{ 426{
442 return 0; 427 return 0;
@@ -468,10 +453,10 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
468 453
469 /* Only let one CPU complain about others per time interval. */ 454 /* Only let one CPU complain about others per time interval. */
470 455
471 spin_lock_irqsave(&rnp->lock, flags); 456 raw_spin_lock_irqsave(&rnp->lock, flags);
472 delta = jiffies - rsp->jiffies_stall; 457 delta = jiffies - rsp->jiffies_stall;
473 if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) { 458 if (delta < RCU_STALL_RAT_DELAY || !rcu_gp_in_progress(rsp)) {
474 spin_unlock_irqrestore(&rnp->lock, flags); 459 raw_spin_unlock_irqrestore(&rnp->lock, flags);
475 return; 460 return;
476 } 461 }
477 rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK; 462 rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
@@ -481,13 +466,15 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
481 * due to CPU offlining. 466 * due to CPU offlining.
482 */ 467 */
483 rcu_print_task_stall(rnp); 468 rcu_print_task_stall(rnp);
484 spin_unlock_irqrestore(&rnp->lock, flags); 469 raw_spin_unlock_irqrestore(&rnp->lock, flags);
485 470
486 /* OK, time to rat on our buddy... */ 471 /* OK, time to rat on our buddy... */
487 472
488 printk(KERN_ERR "INFO: RCU detected CPU stalls:"); 473 printk(KERN_ERR "INFO: RCU detected CPU stalls:");
489 rcu_for_each_leaf_node(rsp, rnp) { 474 rcu_for_each_leaf_node(rsp, rnp) {
475 raw_spin_lock_irqsave(&rnp->lock, flags);
490 rcu_print_task_stall(rnp); 476 rcu_print_task_stall(rnp);
477 raw_spin_unlock_irqrestore(&rnp->lock, flags);
491 if (rnp->qsmask == 0) 478 if (rnp->qsmask == 0)
492 continue; 479 continue;
493 for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) 480 for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
@@ -498,6 +485,10 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
498 smp_processor_id(), (long)(jiffies - rsp->gp_start)); 485 smp_processor_id(), (long)(jiffies - rsp->gp_start));
499 trigger_all_cpu_backtrace(); 486 trigger_all_cpu_backtrace();
500 487
488 /* If so configured, complain about tasks blocking the grace period. */
489
490 rcu_print_detail_task_stall(rsp);
491
501 force_quiescent_state(rsp, 0); /* Kick them all. */ 492 force_quiescent_state(rsp, 0); /* Kick them all. */
502} 493}
503 494
@@ -510,11 +501,11 @@ static void print_cpu_stall(struct rcu_state *rsp)
510 smp_processor_id(), jiffies - rsp->gp_start); 501 smp_processor_id(), jiffies - rsp->gp_start);
511 trigger_all_cpu_backtrace(); 502 trigger_all_cpu_backtrace();
512 503
513 spin_lock_irqsave(&rnp->lock, flags); 504 raw_spin_lock_irqsave(&rnp->lock, flags);
514 if ((long)(jiffies - rsp->jiffies_stall) >= 0) 505 if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
515 rsp->jiffies_stall = 506 rsp->jiffies_stall =
516 jiffies + RCU_SECONDS_TILL_STALL_RECHECK; 507 jiffies + RCU_SECONDS_TILL_STALL_RECHECK;
517 spin_unlock_irqrestore(&rnp->lock, flags); 508 raw_spin_unlock_irqrestore(&rnp->lock, flags);
518 509
519 set_need_resched(); /* kick ourselves to get things going. */ 510 set_need_resched(); /* kick ourselves to get things going. */
520} 511}
@@ -553,13 +544,33 @@ static void check_cpu_stall(struct rcu_state *rsp, struct rcu_data *rdp)
553/* 544/*
554 * Update CPU-local rcu_data state to record the newly noticed grace period. 545 * Update CPU-local rcu_data state to record the newly noticed grace period.
555 * This is used both when we started the grace period and when we notice 546 * This is used both when we started the grace period and when we notice
556 * that someone else started the grace period. 547 * that someone else started the grace period. The caller must hold the
548 * ->lock of the leaf rcu_node structure corresponding to the current CPU,
549 * and must have irqs disabled.
557 */ 550 */
551static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
552{
553 if (rdp->gpnum != rnp->gpnum) {
554 rdp->qs_pending = 1;
555 rdp->passed_quiesc = 0;
556 rdp->gpnum = rnp->gpnum;
557 }
558}
559
558static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp) 560static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp)
559{ 561{
560 rdp->qs_pending = 1; 562 unsigned long flags;
561 rdp->passed_quiesc = 0; 563 struct rcu_node *rnp;
562 rdp->gpnum = rsp->gpnum; 564
565 local_irq_save(flags);
566 rnp = rdp->mynode;
567 if (rdp->gpnum == ACCESS_ONCE(rnp->gpnum) || /* outside lock. */
568 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
569 local_irq_restore(flags);
570 return;
571 }
572 __note_new_gpnum(rsp, rnp, rdp);
573 raw_spin_unlock_irqrestore(&rnp->lock, flags);
563} 574}
564 575
565/* 576/*
@@ -583,31 +594,59 @@ check_for_new_grace_period(struct rcu_state *rsp, struct rcu_data *rdp)
583} 594}
584 595
585/* 596/*
586 * Start a new RCU grace period if warranted, re-initializing the hierarchy 597 * Advance this CPU's callbacks, but only if the current grace period
587 * in preparation for detecting the next grace period. The caller must hold 598 * has ended. This may be called only from the CPU to whom the rdp
588 * the root node's ->lock, which is released before return. Hard irqs must 599 * belongs. In addition, the corresponding leaf rcu_node structure's
589 * be disabled. 600 * ->lock must be held by the caller, with irqs disabled.
590 */ 601 */
591static void 602static void
592rcu_start_gp(struct rcu_state *rsp, unsigned long flags) 603__rcu_process_gp_end(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
593 __releases(rcu_get_root(rsp)->lock)
594{ 604{
595 struct rcu_data *rdp = rsp->rda[smp_processor_id()]; 605 /* Did another grace period end? */
596 struct rcu_node *rnp = rcu_get_root(rsp); 606 if (rdp->completed != rnp->completed) {
607
608 /* Advance callbacks. No harm if list empty. */
609 rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL];
610 rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL];
611 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
612
613 /* Remember that we saw this grace-period completion. */
614 rdp->completed = rnp->completed;
615 }
616}
617
618/*
619 * Advance this CPU's callbacks, but only if the current grace period
620 * has ended. This may be called only from the CPU to whom the rdp
621 * belongs.
622 */
623static void
624rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp)
625{
626 unsigned long flags;
627 struct rcu_node *rnp;
597 628
598 if (!cpu_needs_another_gp(rsp, rdp)) { 629 local_irq_save(flags);
599 spin_unlock_irqrestore(&rnp->lock, flags); 630 rnp = rdp->mynode;
631 if (rdp->completed == ACCESS_ONCE(rnp->completed) || /* outside lock. */
632 !raw_spin_trylock(&rnp->lock)) { /* irqs already off, so later. */
633 local_irq_restore(flags);
600 return; 634 return;
601 } 635 }
636 __rcu_process_gp_end(rsp, rnp, rdp);
637 raw_spin_unlock_irqrestore(&rnp->lock, flags);
638}
602 639
603 /* Advance to a new grace period and initialize state. */ 640/*
604 rsp->gpnum++; 641 * Do per-CPU grace-period initialization for running CPU. The caller
605 WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT); 642 * must hold the lock of the leaf rcu_node structure corresponding to
606 rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ 643 * this CPU.
607 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; 644 */
608 record_gp_stall_check_time(rsp); 645static void
609 dyntick_record_completed(rsp, rsp->completed - 1); 646rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_data *rdp)
610 note_new_gpnum(rsp, rdp); 647{
648 /* Prior grace period ended, so advance callbacks for current CPU. */
649 __rcu_process_gp_end(rsp, rnp, rdp);
611 650
612 /* 651 /*
613 * Because this CPU just now started the new grace period, we know 652 * Because this CPU just now started the new grace period, we know
@@ -623,21 +662,70 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
623 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 662 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
624 rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 663 rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
625 664
665 /* Set state so that this CPU will detect the next quiescent state. */
666 __note_new_gpnum(rsp, rnp, rdp);
667}
668
669/*
670 * Start a new RCU grace period if warranted, re-initializing the hierarchy
671 * in preparation for detecting the next grace period. The caller must hold
672 * the root node's ->lock, which is released before return. Hard irqs must
673 * be disabled.
674 */
675static void
676rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
677 __releases(rcu_get_root(rsp)->lock)
678{
679 struct rcu_data *rdp = rsp->rda[smp_processor_id()];
680 struct rcu_node *rnp = rcu_get_root(rsp);
681
682 if (!cpu_needs_another_gp(rsp, rdp) || rsp->fqs_active) {
683 if (cpu_needs_another_gp(rsp, rdp))
684 rsp->fqs_need_gp = 1;
685 if (rnp->completed == rsp->completed) {
686 raw_spin_unlock_irqrestore(&rnp->lock, flags);
687 return;
688 }
689 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
690
691 /*
692 * Propagate new ->completed value to rcu_node structures
693 * so that other CPUs don't have to wait until the start
694 * of the next grace period to process their callbacks.
695 */
696 rcu_for_each_node_breadth_first(rsp, rnp) {
697 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
698 rnp->completed = rsp->completed;
699 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
700 }
701 local_irq_restore(flags);
702 return;
703 }
704
705 /* Advance to a new grace period and initialize state. */
706 rsp->gpnum++;
707 WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT);
708 rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */
709 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
710 record_gp_stall_check_time(rsp);
711
626 /* Special-case the common single-level case. */ 712 /* Special-case the common single-level case. */
627 if (NUM_RCU_NODES == 1) { 713 if (NUM_RCU_NODES == 1) {
628 rcu_preempt_check_blocked_tasks(rnp); 714 rcu_preempt_check_blocked_tasks(rnp);
629 rnp->qsmask = rnp->qsmaskinit; 715 rnp->qsmask = rnp->qsmaskinit;
630 rnp->gpnum = rsp->gpnum; 716 rnp->gpnum = rsp->gpnum;
717 rnp->completed = rsp->completed;
631 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ 718 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */
632 spin_unlock_irqrestore(&rnp->lock, flags); 719 rcu_start_gp_per_cpu(rsp, rnp, rdp);
720 raw_spin_unlock_irqrestore(&rnp->lock, flags);
633 return; 721 return;
634 } 722 }
635 723
636 spin_unlock(&rnp->lock); /* leave irqs disabled. */ 724 raw_spin_unlock(&rnp->lock); /* leave irqs disabled. */
637 725
638 726
639 /* Exclude any concurrent CPU-hotplug operations. */ 727 /* Exclude any concurrent CPU-hotplug operations. */
640 spin_lock(&rsp->onofflock); /* irqs already disabled. */ 728 raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */
641 729
642 /* 730 /*
643 * Set the quiescent-state-needed bits in all the rcu_node 731 * Set the quiescent-state-needed bits in all the rcu_node
@@ -657,73 +745,50 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
657 * irqs disabled. 745 * irqs disabled.
658 */ 746 */
659 rcu_for_each_node_breadth_first(rsp, rnp) { 747 rcu_for_each_node_breadth_first(rsp, rnp) {
660 spin_lock(&rnp->lock); /* irqs already disabled. */ 748 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
661 rcu_preempt_check_blocked_tasks(rnp); 749 rcu_preempt_check_blocked_tasks(rnp);
662 rnp->qsmask = rnp->qsmaskinit; 750 rnp->qsmask = rnp->qsmaskinit;
663 rnp->gpnum = rsp->gpnum; 751 rnp->gpnum = rsp->gpnum;
664 spin_unlock(&rnp->lock); /* irqs remain disabled. */ 752 rnp->completed = rsp->completed;
753 if (rnp == rdp->mynode)
754 rcu_start_gp_per_cpu(rsp, rnp, rdp);
755 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
665 } 756 }
666 757
667 rnp = rcu_get_root(rsp); 758 rnp = rcu_get_root(rsp);
668 spin_lock(&rnp->lock); /* irqs already disabled. */ 759 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
669 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */ 760 rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */
670 spin_unlock(&rnp->lock); /* irqs remain disabled. */ 761 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
671 spin_unlock_irqrestore(&rsp->onofflock, flags); 762 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
672} 763}
673 764
674/* 765/*
675 * Advance this CPU's callbacks, but only if the current grace period 766 * Report a full set of quiescent states to the specified rcu_state
676 * has ended. This may be called only from the CPU to whom the rdp 767 * data structure. This involves cleaning up after the prior grace
677 * belongs. 768 * period and letting rcu_start_gp() start up the next grace period
769 * if one is needed. Note that the caller must hold rnp->lock, as
770 * required by rcu_start_gp(), which will release it.
678 */ 771 */
679static void 772static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)
680rcu_process_gp_end(struct rcu_state *rsp, struct rcu_data *rdp)
681{
682 long completed_snap;
683 unsigned long flags;
684
685 local_irq_save(flags);
686 completed_snap = ACCESS_ONCE(rsp->completed); /* outside of lock. */
687
688 /* Did another grace period end? */
689 if (rdp->completed != completed_snap) {
690
691 /* Advance callbacks. No harm if list empty. */
692 rdp->nxttail[RCU_DONE_TAIL] = rdp->nxttail[RCU_WAIT_TAIL];
693 rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_READY_TAIL];
694 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
695
696 /* Remember that we saw this grace-period completion. */
697 rdp->completed = completed_snap;
698 }
699 local_irq_restore(flags);
700}
701
702/*
703 * Clean up after the prior grace period and let rcu_start_gp() start up
704 * the next grace period if one is needed. Note that the caller must
705 * hold rnp->lock, as required by rcu_start_gp(), which will release it.
706 */
707static void cpu_quiet_msk_finish(struct rcu_state *rsp, unsigned long flags)
708 __releases(rcu_get_root(rsp)->lock) 773 __releases(rcu_get_root(rsp)->lock)
709{ 774{
710 WARN_ON_ONCE(!rcu_gp_in_progress(rsp)); 775 WARN_ON_ONCE(!rcu_gp_in_progress(rsp));
711 rsp->completed = rsp->gpnum; 776 rsp->completed = rsp->gpnum;
712 rsp->signaled = RCU_GP_IDLE; 777 rsp->signaled = RCU_GP_IDLE;
713 rcu_process_gp_end(rsp, rsp->rda[smp_processor_id()]);
714 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */ 778 rcu_start_gp(rsp, flags); /* releases root node's rnp->lock. */
715} 779}
716 780
717/* 781/*
718 * Similar to cpu_quiet(), for which it is a helper function. Allows 782 * Similar to rcu_report_qs_rdp(), for which it is a helper function.
719 * a group of CPUs to be quieted at one go, though all the CPUs in the 783 * Allows quiescent states for a group of CPUs to be reported at one go
720 * group must be represented by the same leaf rcu_node structure. 784 * to the specified rcu_node structure, though all the CPUs in the group
721 * That structure's lock must be held upon entry, and it is released 785 * must be represented by the same rcu_node structure (which need not be
722 * before return. 786 * a leaf rcu_node structure, though it often will be). That structure's
787 * lock must be held upon entry, and it is released before return.
723 */ 788 */
724static void 789static void
725cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp, 790rcu_report_qs_rnp(unsigned long mask, struct rcu_state *rsp,
726 unsigned long flags) 791 struct rcu_node *rnp, unsigned long flags)
727 __releases(rnp->lock) 792 __releases(rnp->lock)
728{ 793{
729 struct rcu_node *rnp_c; 794 struct rcu_node *rnp_c;
@@ -733,14 +798,14 @@ cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp,
733 if (!(rnp->qsmask & mask)) { 798 if (!(rnp->qsmask & mask)) {
734 799
735 /* Our bit has already been cleared, so done. */ 800 /* Our bit has already been cleared, so done. */
736 spin_unlock_irqrestore(&rnp->lock, flags); 801 raw_spin_unlock_irqrestore(&rnp->lock, flags);
737 return; 802 return;
738 } 803 }
739 rnp->qsmask &= ~mask; 804 rnp->qsmask &= ~mask;
740 if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) { 805 if (rnp->qsmask != 0 || rcu_preempted_readers(rnp)) {
741 806
742 /* Other bits still set at this level, so done. */ 807 /* Other bits still set at this level, so done. */
743 spin_unlock_irqrestore(&rnp->lock, flags); 808 raw_spin_unlock_irqrestore(&rnp->lock, flags);
744 return; 809 return;
745 } 810 }
746 mask = rnp->grpmask; 811 mask = rnp->grpmask;
@@ -750,54 +815,56 @@ cpu_quiet_msk(unsigned long mask, struct rcu_state *rsp, struct rcu_node *rnp,
750 815
751 break; 816 break;
752 } 817 }
753 spin_unlock_irqrestore(&rnp->lock, flags); 818 raw_spin_unlock_irqrestore(&rnp->lock, flags);
754 rnp_c = rnp; 819 rnp_c = rnp;
755 rnp = rnp->parent; 820 rnp = rnp->parent;
756 spin_lock_irqsave(&rnp->lock, flags); 821 raw_spin_lock_irqsave(&rnp->lock, flags);
757 WARN_ON_ONCE(rnp_c->qsmask); 822 WARN_ON_ONCE(rnp_c->qsmask);
758 } 823 }
759 824
760 /* 825 /*
761 * Get here if we are the last CPU to pass through a quiescent 826 * Get here if we are the last CPU to pass through a quiescent
762 * state for this grace period. Invoke cpu_quiet_msk_finish() 827 * state for this grace period. Invoke rcu_report_qs_rsp()
763 * to clean up and start the next grace period if one is needed. 828 * to clean up and start the next grace period if one is needed.
764 */ 829 */
765 cpu_quiet_msk_finish(rsp, flags); /* releases rnp->lock. */ 830 rcu_report_qs_rsp(rsp, flags); /* releases rnp->lock. */
766} 831}
767 832
768/* 833/*
769 * Record a quiescent state for the specified CPU, which must either be 834 * Record a quiescent state for the specified CPU to that CPU's rcu_data
770 * the current CPU. The lastcomp argument is used to make sure we are 835 * structure. This must be either called from the specified CPU, or
771 * still in the grace period of interest. We don't want to end the current 836 * called when the specified CPU is known to be offline (and when it is
772 * grace period based on quiescent states detected in an earlier grace 837 * also known that no other CPU is concurrently trying to help the offline
773 * period! 838 * CPU). The lastcomp argument is used to make sure we are still in the
839 * grace period of interest. We don't want to end the current grace period
840 * based on quiescent states detected in an earlier grace period!
774 */ 841 */
775static void 842static void
776cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp) 843rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp)
777{ 844{
778 unsigned long flags; 845 unsigned long flags;
779 unsigned long mask; 846 unsigned long mask;
780 struct rcu_node *rnp; 847 struct rcu_node *rnp;
781 848
782 rnp = rdp->mynode; 849 rnp = rdp->mynode;
783 spin_lock_irqsave(&rnp->lock, flags); 850 raw_spin_lock_irqsave(&rnp->lock, flags);
784 if (lastcomp != ACCESS_ONCE(rsp->completed)) { 851 if (lastcomp != rnp->completed) {
785 852
786 /* 853 /*
787 * Someone beat us to it for this grace period, so leave. 854 * Someone beat us to it for this grace period, so leave.
788 * The race with GP start is resolved by the fact that we 855 * The race with GP start is resolved by the fact that we
789 * hold the leaf rcu_node lock, so that the per-CPU bits 856 * hold the leaf rcu_node lock, so that the per-CPU bits
790 * cannot yet be initialized -- so we would simply find our 857 * cannot yet be initialized -- so we would simply find our
791 * CPU's bit already cleared in cpu_quiet_msk() if this race 858 * CPU's bit already cleared in rcu_report_qs_rnp() if this
792 * occurred. 859 * race occurred.
793 */ 860 */
794 rdp->passed_quiesc = 0; /* try again later! */ 861 rdp->passed_quiesc = 0; /* try again later! */
795 spin_unlock_irqrestore(&rnp->lock, flags); 862 raw_spin_unlock_irqrestore(&rnp->lock, flags);
796 return; 863 return;
797 } 864 }
798 mask = rdp->grpmask; 865 mask = rdp->grpmask;
799 if ((rnp->qsmask & mask) == 0) { 866 if ((rnp->qsmask & mask) == 0) {
800 spin_unlock_irqrestore(&rnp->lock, flags); 867 raw_spin_unlock_irqrestore(&rnp->lock, flags);
801 } else { 868 } else {
802 rdp->qs_pending = 0; 869 rdp->qs_pending = 0;
803 870
@@ -807,7 +874,7 @@ cpu_quiet(int cpu, struct rcu_state *rsp, struct rcu_data *rdp, long lastcomp)
807 */ 874 */
808 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 875 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
809 876
810 cpu_quiet_msk(mask, rsp, rnp, flags); /* releases rnp->lock */ 877 rcu_report_qs_rnp(mask, rsp, rnp, flags); /* rlses rnp->lock */
811 } 878 }
812} 879}
813 880
@@ -838,8 +905,11 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
838 if (!rdp->passed_quiesc) 905 if (!rdp->passed_quiesc)
839 return; 906 return;
840 907
841 /* Tell RCU we are done (but cpu_quiet() will be the judge of that). */ 908 /*
842 cpu_quiet(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed); 909 * Tell RCU we are done (but rcu_report_qs_rdp() will be the
910 * judge of that).
911 */
912 rcu_report_qs_rdp(rdp->cpu, rsp, rdp, rdp->passed_quiesc_completed);
843} 913}
844 914
845#ifdef CONFIG_HOTPLUG_CPU 915#ifdef CONFIG_HOTPLUG_CPU
@@ -858,7 +928,7 @@ static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp)
858 928
859 if (rdp->nxtlist == NULL) 929 if (rdp->nxtlist == NULL)
860 return; /* irqs disabled, so comparison is stable. */ 930 return; /* irqs disabled, so comparison is stable. */
861 spin_lock(&rsp->onofflock); /* irqs already disabled. */ 931 raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */
862 *rsp->orphan_cbs_tail = rdp->nxtlist; 932 *rsp->orphan_cbs_tail = rdp->nxtlist;
863 rsp->orphan_cbs_tail = rdp->nxttail[RCU_NEXT_TAIL]; 933 rsp->orphan_cbs_tail = rdp->nxttail[RCU_NEXT_TAIL];
864 rdp->nxtlist = NULL; 934 rdp->nxtlist = NULL;
@@ -866,7 +936,7 @@ static void rcu_send_cbs_to_orphanage(struct rcu_state *rsp)
866 rdp->nxttail[i] = &rdp->nxtlist; 936 rdp->nxttail[i] = &rdp->nxtlist;
867 rsp->orphan_qlen += rdp->qlen; 937 rsp->orphan_qlen += rdp->qlen;
868 rdp->qlen = 0; 938 rdp->qlen = 0;
869 spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ 939 raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
870} 940}
871 941
872/* 942/*
@@ -877,10 +947,10 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
877 unsigned long flags; 947 unsigned long flags;
878 struct rcu_data *rdp; 948 struct rcu_data *rdp;
879 949
880 spin_lock_irqsave(&rsp->onofflock, flags); 950 raw_spin_lock_irqsave(&rsp->onofflock, flags);
881 rdp = rsp->rda[smp_processor_id()]; 951 rdp = rsp->rda[smp_processor_id()];
882 if (rsp->orphan_cbs_list == NULL) { 952 if (rsp->orphan_cbs_list == NULL) {
883 spin_unlock_irqrestore(&rsp->onofflock, flags); 953 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
884 return; 954 return;
885 } 955 }
886 *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list; 956 *rdp->nxttail[RCU_NEXT_TAIL] = rsp->orphan_cbs_list;
@@ -889,7 +959,7 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
889 rsp->orphan_cbs_list = NULL; 959 rsp->orphan_cbs_list = NULL;
890 rsp->orphan_cbs_tail = &rsp->orphan_cbs_list; 960 rsp->orphan_cbs_tail = &rsp->orphan_cbs_list;
891 rsp->orphan_qlen = 0; 961 rsp->orphan_qlen = 0;
892 spin_unlock_irqrestore(&rsp->onofflock, flags); 962 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
893} 963}
894 964
895/* 965/*
@@ -899,45 +969,47 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
899static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) 969static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
900{ 970{
901 unsigned long flags; 971 unsigned long flags;
902 long lastcomp;
903 unsigned long mask; 972 unsigned long mask;
973 int need_report = 0;
904 struct rcu_data *rdp = rsp->rda[cpu]; 974 struct rcu_data *rdp = rsp->rda[cpu];
905 struct rcu_node *rnp; 975 struct rcu_node *rnp;
906 976
907 /* Exclude any attempts to start a new grace period. */ 977 /* Exclude any attempts to start a new grace period. */
908 spin_lock_irqsave(&rsp->onofflock, flags); 978 raw_spin_lock_irqsave(&rsp->onofflock, flags);
909 979
910 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ 980 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
911 rnp = rdp->mynode; /* this is the outgoing CPU's rnp. */ 981 rnp = rdp->mynode; /* this is the outgoing CPU's rnp. */
912 mask = rdp->grpmask; /* rnp->grplo is constant. */ 982 mask = rdp->grpmask; /* rnp->grplo is constant. */
913 do { 983 do {
914 spin_lock(&rnp->lock); /* irqs already disabled. */ 984 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
915 rnp->qsmaskinit &= ~mask; 985 rnp->qsmaskinit &= ~mask;
916 if (rnp->qsmaskinit != 0) { 986 if (rnp->qsmaskinit != 0) {
917 spin_unlock(&rnp->lock); /* irqs remain disabled. */ 987 if (rnp != rdp->mynode)
988 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
918 break; 989 break;
919 } 990 }
920 991 if (rnp == rdp->mynode)
921 /* 992 need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
922 * If there was a task blocking the current grace period, 993 else
923 * and if all CPUs have checked in, we need to propagate 994 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
924 * the quiescent state up the rcu_node hierarchy. But that
925 * is inconvenient at the moment due to deadlock issues if
926 * this should end the current grace period. So set the
927 * offlined CPU's bit in ->qsmask in order to force the
928 * next force_quiescent_state() invocation to clean up this
929 * mess in a deadlock-free manner.
930 */
931 if (rcu_preempt_offline_tasks(rsp, rnp, rdp) && !rnp->qsmask)
932 rnp->qsmask |= mask;
933
934 mask = rnp->grpmask; 995 mask = rnp->grpmask;
935 spin_unlock(&rnp->lock); /* irqs remain disabled. */
936 rnp = rnp->parent; 996 rnp = rnp->parent;
937 } while (rnp != NULL); 997 } while (rnp != NULL);
938 lastcomp = rsp->completed;
939 998
940 spin_unlock_irqrestore(&rsp->onofflock, flags); 999 /*
1000 * We still hold the leaf rcu_node structure lock here, and
1001 * irqs are still disabled. The reason for this subterfuge is
1002 * because invoking rcu_report_unblock_qs_rnp() with ->onofflock
1003 * held leads to deadlock.
1004 */
1005 raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */
1006 rnp = rdp->mynode;
1007 if (need_report & RCU_OFL_TASKS_NORM_GP)
1008 rcu_report_unblock_qs_rnp(rnp, flags);
1009 else
1010 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1011 if (need_report & RCU_OFL_TASKS_EXP_GP)
1012 rcu_report_exp_rnp(rsp, rnp);
941 1013
942 rcu_adopt_orphan_cbs(rsp); 1014 rcu_adopt_orphan_cbs(rsp);
943} 1015}
@@ -1094,11 +1166,9 @@ void rcu_check_callbacks(int cpu, int user)
1094/* 1166/*
1095 * Scan the leaf rcu_node structures, processing dyntick state for any that 1167 * Scan the leaf rcu_node structures, processing dyntick state for any that
1096 * have not yet encountered a quiescent state, using the function specified. 1168 * have not yet encountered a quiescent state, using the function specified.
1097 * Returns 1 if the current grace period ends while scanning (possibly 1169 * The caller must have suppressed start of new grace periods.
1098 * because we made it end).
1099 */ 1170 */
1100static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp, 1171static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
1101 int (*f)(struct rcu_data *))
1102{ 1172{
1103 unsigned long bit; 1173 unsigned long bit;
1104 int cpu; 1174 int cpu;
@@ -1108,13 +1178,13 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp,
1108 1178
1109 rcu_for_each_leaf_node(rsp, rnp) { 1179 rcu_for_each_leaf_node(rsp, rnp) {
1110 mask = 0; 1180 mask = 0;
1111 spin_lock_irqsave(&rnp->lock, flags); 1181 raw_spin_lock_irqsave(&rnp->lock, flags);
1112 if (rsp->completed != lastcomp) { 1182 if (!rcu_gp_in_progress(rsp)) {
1113 spin_unlock_irqrestore(&rnp->lock, flags); 1183 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1114 return 1; 1184 return;
1115 } 1185 }
1116 if (rnp->qsmask == 0) { 1186 if (rnp->qsmask == 0) {
1117 spin_unlock_irqrestore(&rnp->lock, flags); 1187 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1118 continue; 1188 continue;
1119 } 1189 }
1120 cpu = rnp->grplo; 1190 cpu = rnp->grplo;
@@ -1123,15 +1193,14 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp,
1123 if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu])) 1193 if ((rnp->qsmask & bit) != 0 && f(rsp->rda[cpu]))
1124 mask |= bit; 1194 mask |= bit;
1125 } 1195 }
1126 if (mask != 0 && rsp->completed == lastcomp) { 1196 if (mask != 0) {
1127 1197
1128 /* cpu_quiet_msk() releases rnp->lock. */ 1198 /* rcu_report_qs_rnp() releases rnp->lock. */
1129 cpu_quiet_msk(mask, rsp, rnp, flags); 1199 rcu_report_qs_rnp(mask, rsp, rnp, flags);
1130 continue; 1200 continue;
1131 } 1201 }
1132 spin_unlock_irqrestore(&rnp->lock, flags); 1202 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1133 } 1203 }
1134 return 0;
1135} 1204}
1136 1205
1137/* 1206/*
@@ -1141,31 +1210,26 @@ static int rcu_process_dyntick(struct rcu_state *rsp, long lastcomp,
1141static void force_quiescent_state(struct rcu_state *rsp, int relaxed) 1210static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1142{ 1211{
1143 unsigned long flags; 1212 unsigned long flags;
1144 long lastcomp;
1145 struct rcu_node *rnp = rcu_get_root(rsp); 1213 struct rcu_node *rnp = rcu_get_root(rsp);
1146 u8 signaled;
1147 1214
1148 if (!rcu_gp_in_progress(rsp)) 1215 if (!rcu_gp_in_progress(rsp))
1149 return; /* No grace period in progress, nothing to force. */ 1216 return; /* No grace period in progress, nothing to force. */
1150 if (!spin_trylock_irqsave(&rsp->fqslock, flags)) { 1217 if (!raw_spin_trylock_irqsave(&rsp->fqslock, flags)) {
1151 rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */ 1218 rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */
1152 return; /* Someone else is already on the job. */ 1219 return; /* Someone else is already on the job. */
1153 } 1220 }
1154 if (relaxed && 1221 if (relaxed && ULONG_CMP_GE(rsp->jiffies_force_qs, jiffies))
1155 (long)(rsp->jiffies_force_qs - jiffies) >= 0) 1222 goto unlock_fqs_ret; /* no emergency and done recently. */
1156 goto unlock_ret; /* no emergency and done recently. */
1157 rsp->n_force_qs++; 1223 rsp->n_force_qs++;
1158 spin_lock(&rnp->lock); 1224 raw_spin_lock(&rnp->lock); /* irqs already disabled */
1159 lastcomp = rsp->completed;
1160 signaled = rsp->signaled;
1161 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; 1225 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
1162 if (lastcomp == rsp->gpnum) { 1226 if(!rcu_gp_in_progress(rsp)) {
1163 rsp->n_force_qs_ngp++; 1227 rsp->n_force_qs_ngp++;
1164 spin_unlock(&rnp->lock); 1228 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
1165 goto unlock_ret; /* no GP in progress, time updated. */ 1229 goto unlock_fqs_ret; /* no GP in progress, time updated. */
1166 } 1230 }
1167 spin_unlock(&rnp->lock); 1231 rsp->fqs_active = 1;
1168 switch (signaled) { 1232 switch (rsp->signaled) {
1169 case RCU_GP_IDLE: 1233 case RCU_GP_IDLE:
1170 case RCU_GP_INIT: 1234 case RCU_GP_INIT:
1171 1235
@@ -1173,37 +1237,38 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1173 1237
1174 case RCU_SAVE_DYNTICK: 1238 case RCU_SAVE_DYNTICK:
1175 1239
1240 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
1176 if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK) 1241 if (RCU_SIGNAL_INIT != RCU_SAVE_DYNTICK)
1177 break; /* So gcc recognizes the dead code. */ 1242 break; /* So gcc recognizes the dead code. */
1178 1243
1179 /* Record dyntick-idle state. */ 1244 /* Record dyntick-idle state. */
1180 if (rcu_process_dyntick(rsp, lastcomp, 1245 force_qs_rnp(rsp, dyntick_save_progress_counter);
1181 dyntick_save_progress_counter)) 1246 raw_spin_lock(&rnp->lock); /* irqs already disabled */
1182 goto unlock_ret; 1247 if (rcu_gp_in_progress(rsp))
1183
1184 /* Update state, record completion counter. */
1185 spin_lock(&rnp->lock);
1186 if (lastcomp == rsp->completed &&
1187 rsp->signaled == RCU_SAVE_DYNTICK) {
1188 rsp->signaled = RCU_FORCE_QS; 1248 rsp->signaled = RCU_FORCE_QS;
1189 dyntick_record_completed(rsp, lastcomp);
1190 }
1191 spin_unlock(&rnp->lock);
1192 break; 1249 break;
1193 1250
1194 case RCU_FORCE_QS: 1251 case RCU_FORCE_QS:
1195 1252
1196 /* Check dyntick-idle state, send IPI to laggarts. */ 1253 /* Check dyntick-idle state, send IPI to laggarts. */
1197 if (rcu_process_dyntick(rsp, dyntick_recall_completed(rsp), 1254 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
1198 rcu_implicit_dynticks_qs)) 1255 force_qs_rnp(rsp, rcu_implicit_dynticks_qs);
1199 goto unlock_ret;
1200 1256
1201 /* Leave state in case more forcing is required. */ 1257 /* Leave state in case more forcing is required. */
1202 1258
1259 raw_spin_lock(&rnp->lock); /* irqs already disabled */
1203 break; 1260 break;
1204 } 1261 }
1205unlock_ret: 1262 rsp->fqs_active = 0;
1206 spin_unlock_irqrestore(&rsp->fqslock, flags); 1263 if (rsp->fqs_need_gp) {
1264 raw_spin_unlock(&rsp->fqslock); /* irqs remain disabled */
1265 rsp->fqs_need_gp = 0;
1266 rcu_start_gp(rsp, flags); /* releases rnp->lock */
1267 return;
1268 }
1269 raw_spin_unlock(&rnp->lock); /* irqs remain disabled */
1270unlock_fqs_ret:
1271 raw_spin_unlock_irqrestore(&rsp->fqslock, flags);
1207} 1272}
1208 1273
1209#else /* #ifdef CONFIG_SMP */ 1274#else /* #ifdef CONFIG_SMP */
@@ -1231,7 +1296,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
1231 * If an RCU GP has gone long enough, go check for dyntick 1296 * If an RCU GP has gone long enough, go check for dyntick
1232 * idle CPUs and, if needed, send resched IPIs. 1297 * idle CPUs and, if needed, send resched IPIs.
1233 */ 1298 */
1234 if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0) 1299 if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
1235 force_quiescent_state(rsp, 1); 1300 force_quiescent_state(rsp, 1);
1236 1301
1237 /* 1302 /*
@@ -1245,7 +1310,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
1245 1310
1246 /* Does this CPU require a not-yet-started grace period? */ 1311 /* Does this CPU require a not-yet-started grace period? */
1247 if (cpu_needs_another_gp(rsp, rdp)) { 1312 if (cpu_needs_another_gp(rsp, rdp)) {
1248 spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags); 1313 raw_spin_lock_irqsave(&rcu_get_root(rsp)->lock, flags);
1249 rcu_start_gp(rsp, flags); /* releases above lock */ 1314 rcu_start_gp(rsp, flags); /* releases above lock */
1250 } 1315 }
1251 1316
@@ -1276,6 +1341,9 @@ static void rcu_process_callbacks(struct softirq_action *unused)
1276 * grace-period manipulations above. 1341 * grace-period manipulations above.
1277 */ 1342 */
1278 smp_mb(); /* See above block comment. */ 1343 smp_mb(); /* See above block comment. */
1344
1345 /* If we are last CPU on way to dyntick-idle mode, accelerate it. */
1346 rcu_needs_cpu_flush();
1279} 1347}
1280 1348
1281static void 1349static void
@@ -1310,7 +1378,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1310 unsigned long nestflag; 1378 unsigned long nestflag;
1311 struct rcu_node *rnp_root = rcu_get_root(rsp); 1379 struct rcu_node *rnp_root = rcu_get_root(rsp);
1312 1380
1313 spin_lock_irqsave(&rnp_root->lock, nestflag); 1381 raw_spin_lock_irqsave(&rnp_root->lock, nestflag);
1314 rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */ 1382 rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */
1315 } 1383 }
1316 1384
@@ -1328,7 +1396,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1328 force_quiescent_state(rsp, 0); 1396 force_quiescent_state(rsp, 0);
1329 rdp->n_force_qs_snap = rsp->n_force_qs; 1397 rdp->n_force_qs_snap = rsp->n_force_qs;
1330 rdp->qlen_last_fqs_check = rdp->qlen; 1398 rdp->qlen_last_fqs_check = rdp->qlen;
1331 } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0) 1399 } else if (ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies))
1332 force_quiescent_state(rsp, 1); 1400 force_quiescent_state(rsp, 1);
1333 local_irq_restore(flags); 1401 local_irq_restore(flags);
1334} 1402}
@@ -1351,6 +1419,68 @@ void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
1351} 1419}
1352EXPORT_SYMBOL_GPL(call_rcu_bh); 1420EXPORT_SYMBOL_GPL(call_rcu_bh);
1353 1421
1422/**
1423 * synchronize_sched - wait until an rcu-sched grace period has elapsed.
1424 *
1425 * Control will return to the caller some time after a full rcu-sched
1426 * grace period has elapsed, in other words after all currently executing
1427 * rcu-sched read-side critical sections have completed. These read-side
1428 * critical sections are delimited by rcu_read_lock_sched() and
1429 * rcu_read_unlock_sched(), and may be nested. Note that preempt_disable(),
1430 * local_irq_disable(), and so on may be used in place of
1431 * rcu_read_lock_sched().
1432 *
1433 * This means that all preempt_disable code sequences, including NMI and
1434 * hardware-interrupt handlers, in progress on entry will have completed
1435 * before this primitive returns. However, this does not guarantee that
1436 * softirq handlers will have completed, since in some kernels, these
1437 * handlers can run in process context, and can block.
1438 *
1439 * This primitive provides the guarantees made by the (now removed)
1440 * synchronize_kernel() API. In contrast, synchronize_rcu() only
1441 * guarantees that rcu_read_lock() sections will have completed.
1442 * In "classic RCU", these two guarantees happen to be one and
1443 * the same, but can differ in realtime RCU implementations.
1444 */
1445void synchronize_sched(void)
1446{
1447 struct rcu_synchronize rcu;
1448
1449 if (rcu_blocking_is_gp())
1450 return;
1451
1452 init_completion(&rcu.completion);
1453 /* Will wake me after RCU finished. */
1454 call_rcu_sched(&rcu.head, wakeme_after_rcu);
1455 /* Wait for it. */
1456 wait_for_completion(&rcu.completion);
1457}
1458EXPORT_SYMBOL_GPL(synchronize_sched);
1459
1460/**
1461 * synchronize_rcu_bh - wait until an rcu_bh grace period has elapsed.
1462 *
1463 * Control will return to the caller some time after a full rcu_bh grace
1464 * period has elapsed, in other words after all currently executing rcu_bh
1465 * read-side critical sections have completed. RCU read-side critical
1466 * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
1467 * and may be nested.
1468 */
1469void synchronize_rcu_bh(void)
1470{
1471 struct rcu_synchronize rcu;
1472
1473 if (rcu_blocking_is_gp())
1474 return;
1475
1476 init_completion(&rcu.completion);
1477 /* Will wake me after RCU finished. */
1478 call_rcu_bh(&rcu.head, wakeme_after_rcu);
1479 /* Wait for it. */
1480 wait_for_completion(&rcu.completion);
1481}
1482EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
1483
1354/* 1484/*
1355 * Check to see if there is any immediate RCU-related work to be done 1485 * Check to see if there is any immediate RCU-related work to be done
1356 * by the current CPU, for the specified type of RCU, returning 1 if so. 1486 * by the current CPU, for the specified type of RCU, returning 1 if so.
@@ -1360,6 +1490,8 @@ EXPORT_SYMBOL_GPL(call_rcu_bh);
1360 */ 1490 */
1361static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) 1491static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
1362{ 1492{
1493 struct rcu_node *rnp = rdp->mynode;
1494
1363 rdp->n_rcu_pending++; 1495 rdp->n_rcu_pending++;
1364 1496
1365 /* Check for CPU stalls, if enabled. */ 1497 /* Check for CPU stalls, if enabled. */
@@ -1384,20 +1516,20 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
1384 } 1516 }
1385 1517
1386 /* Has another RCU grace period completed? */ 1518 /* Has another RCU grace period completed? */
1387 if (ACCESS_ONCE(rsp->completed) != rdp->completed) { /* outside lock */ 1519 if (ACCESS_ONCE(rnp->completed) != rdp->completed) { /* outside lock */
1388 rdp->n_rp_gp_completed++; 1520 rdp->n_rp_gp_completed++;
1389 return 1; 1521 return 1;
1390 } 1522 }
1391 1523
1392 /* Has a new RCU grace period started? */ 1524 /* Has a new RCU grace period started? */
1393 if (ACCESS_ONCE(rsp->gpnum) != rdp->gpnum) { /* outside lock */ 1525 if (ACCESS_ONCE(rnp->gpnum) != rdp->gpnum) { /* outside lock */
1394 rdp->n_rp_gp_started++; 1526 rdp->n_rp_gp_started++;
1395 return 1; 1527 return 1;
1396 } 1528 }
1397 1529
1398 /* Has an RCU GP gone long enough to send resched IPIs &c? */ 1530 /* Has an RCU GP gone long enough to send resched IPIs &c? */
1399 if (rcu_gp_in_progress(rsp) && 1531 if (rcu_gp_in_progress(rsp) &&
1400 ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)) { 1532 ULONG_CMP_LT(ACCESS_ONCE(rsp->jiffies_force_qs), jiffies)) {
1401 rdp->n_rp_need_fqs++; 1533 rdp->n_rp_need_fqs++;
1402 return 1; 1534 return 1;
1403 } 1535 }
@@ -1422,10 +1554,9 @@ static int rcu_pending(int cpu)
1422/* 1554/*
1423 * Check to see if any future RCU-related work will need to be done 1555 * Check to see if any future RCU-related work will need to be done
1424 * by the current CPU, even if none need be done immediately, returning 1556 * by the current CPU, even if none need be done immediately, returning
1425 * 1 if so. This function is part of the RCU implementation; it is -not- 1557 * 1 if so.
1426 * an exported member of the RCU API.
1427 */ 1558 */
1428int rcu_needs_cpu(int cpu) 1559static int rcu_needs_cpu_quick_check(int cpu)
1429{ 1560{
1430 /* RCU callbacks either ready or pending? */ 1561 /* RCU callbacks either ready or pending? */
1431 return per_cpu(rcu_sched_data, cpu).nxtlist || 1562 return per_cpu(rcu_sched_data, cpu).nxtlist ||
@@ -1521,7 +1652,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
1521 struct rcu_node *rnp = rcu_get_root(rsp); 1652 struct rcu_node *rnp = rcu_get_root(rsp);
1522 1653
1523 /* Set up local state, ensuring consistent view of global state. */ 1654 /* Set up local state, ensuring consistent view of global state. */
1524 spin_lock_irqsave(&rnp->lock, flags); 1655 raw_spin_lock_irqsave(&rnp->lock, flags);
1525 rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo); 1656 rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
1526 rdp->nxtlist = NULL; 1657 rdp->nxtlist = NULL;
1527 for (i = 0; i < RCU_NEXT_SIZE; i++) 1658 for (i = 0; i < RCU_NEXT_SIZE; i++)
@@ -1531,7 +1662,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
1531 rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 1662 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
1532#endif /* #ifdef CONFIG_NO_HZ */ 1663#endif /* #ifdef CONFIG_NO_HZ */
1533 rdp->cpu = cpu; 1664 rdp->cpu = cpu;
1534 spin_unlock_irqrestore(&rnp->lock, flags); 1665 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1535} 1666}
1536 1667
1537/* 1668/*
@@ -1544,25 +1675,20 @@ static void __cpuinit
1544rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) 1675rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable)
1545{ 1676{
1546 unsigned long flags; 1677 unsigned long flags;
1547 long lastcomp;
1548 unsigned long mask; 1678 unsigned long mask;
1549 struct rcu_data *rdp = rsp->rda[cpu]; 1679 struct rcu_data *rdp = rsp->rda[cpu];
1550 struct rcu_node *rnp = rcu_get_root(rsp); 1680 struct rcu_node *rnp = rcu_get_root(rsp);
1551 1681
1552 /* Set up local state, ensuring consistent view of global state. */ 1682 /* Set up local state, ensuring consistent view of global state. */
1553 spin_lock_irqsave(&rnp->lock, flags); 1683 raw_spin_lock_irqsave(&rnp->lock, flags);
1554 lastcomp = rsp->completed;
1555 rdp->completed = lastcomp;
1556 rdp->gpnum = lastcomp;
1557 rdp->passed_quiesc = 0; /* We could be racing with new GP, */ 1684 rdp->passed_quiesc = 0; /* We could be racing with new GP, */
1558 rdp->qs_pending = 1; /* so set up to respond to current GP. */ 1685 rdp->qs_pending = 1; /* so set up to respond to current GP. */
1559 rdp->beenonline = 1; /* We have now been online. */ 1686 rdp->beenonline = 1; /* We have now been online. */
1560 rdp->preemptable = preemptable; 1687 rdp->preemptable = preemptable;
1561 rdp->passed_quiesc_completed = lastcomp - 1;
1562 rdp->qlen_last_fqs_check = 0; 1688 rdp->qlen_last_fqs_check = 0;
1563 rdp->n_force_qs_snap = rsp->n_force_qs; 1689 rdp->n_force_qs_snap = rsp->n_force_qs;
1564 rdp->blimit = blimit; 1690 rdp->blimit = blimit;
1565 spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1691 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1566 1692
1567 /* 1693 /*
1568 * A new grace period might start here. If so, we won't be part 1694 * A new grace period might start here. If so, we won't be part
@@ -1570,21 +1696,26 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable)
1570 */ 1696 */
1571 1697
1572 /* Exclude any attempts to start a new GP on large systems. */ 1698 /* Exclude any attempts to start a new GP on large systems. */
1573 spin_lock(&rsp->onofflock); /* irqs already disabled. */ 1699 raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */
1574 1700
1575 /* Add CPU to rcu_node bitmasks. */ 1701 /* Add CPU to rcu_node bitmasks. */
1576 rnp = rdp->mynode; 1702 rnp = rdp->mynode;
1577 mask = rdp->grpmask; 1703 mask = rdp->grpmask;
1578 do { 1704 do {
1579 /* Exclude any attempts to start a new GP on small systems. */ 1705 /* Exclude any attempts to start a new GP on small systems. */
1580 spin_lock(&rnp->lock); /* irqs already disabled. */ 1706 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
1581 rnp->qsmaskinit |= mask; 1707 rnp->qsmaskinit |= mask;
1582 mask = rnp->grpmask; 1708 mask = rnp->grpmask;
1583 spin_unlock(&rnp->lock); /* irqs already disabled. */ 1709 if (rnp == rdp->mynode) {
1710 rdp->gpnum = rnp->completed; /* if GP in progress... */
1711 rdp->completed = rnp->completed;
1712 rdp->passed_quiesc_completed = rnp->completed - 1;
1713 }
1714 raw_spin_unlock(&rnp->lock); /* irqs already disabled. */
1584 rnp = rnp->parent; 1715 rnp = rnp->parent;
1585 } while (rnp != NULL && !(rnp->qsmaskinit & mask)); 1716 } while (rnp != NULL && !(rnp->qsmaskinit & mask));
1586 1717
1587 spin_unlock_irqrestore(&rsp->onofflock, flags); 1718 raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
1588} 1719}
1589 1720
1590static void __cpuinit rcu_online_cpu(int cpu) 1721static void __cpuinit rcu_online_cpu(int cpu)
@@ -1597,8 +1728,8 @@ static void __cpuinit rcu_online_cpu(int cpu)
1597/* 1728/*
1598 * Handle CPU online/offline notification events. 1729 * Handle CPU online/offline notification events.
1599 */ 1730 */
1600int __cpuinit rcu_cpu_notify(struct notifier_block *self, 1731static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
1601 unsigned long action, void *hcpu) 1732 unsigned long action, void *hcpu)
1602{ 1733{
1603 long cpu = (long)hcpu; 1734 long cpu = (long)hcpu;
1604 1735
@@ -1668,11 +1799,17 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
1668 */ 1799 */
1669static void __init rcu_init_one(struct rcu_state *rsp) 1800static void __init rcu_init_one(struct rcu_state *rsp)
1670{ 1801{
1802 static char *buf[] = { "rcu_node_level_0",
1803 "rcu_node_level_1",
1804 "rcu_node_level_2",
1805 "rcu_node_level_3" }; /* Match MAX_RCU_LVLS */
1671 int cpustride = 1; 1806 int cpustride = 1;
1672 int i; 1807 int i;
1673 int j; 1808 int j;
1674 struct rcu_node *rnp; 1809 struct rcu_node *rnp;
1675 1810
1811 BUILD_BUG_ON(MAX_RCU_LVLS > ARRAY_SIZE(buf)); /* Fix buf[] init! */
1812
1676 /* Initialize the level-tracking arrays. */ 1813 /* Initialize the level-tracking arrays. */
1677 1814
1678 for (i = 1; i < NUM_RCU_LVLS; i++) 1815 for (i = 1; i < NUM_RCU_LVLS; i++)
@@ -1685,8 +1822,9 @@ static void __init rcu_init_one(struct rcu_state *rsp)
1685 cpustride *= rsp->levelspread[i]; 1822 cpustride *= rsp->levelspread[i];
1686 rnp = rsp->level[i]; 1823 rnp = rsp->level[i];
1687 for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) { 1824 for (j = 0; j < rsp->levelcnt[i]; j++, rnp++) {
1688 if (rnp != rcu_get_root(rsp)) 1825 raw_spin_lock_init(&rnp->lock);
1689 spin_lock_init(&rnp->lock); 1826 lockdep_set_class_and_name(&rnp->lock,
1827 &rcu_node_class[i], buf[i]);
1690 rnp->gpnum = 0; 1828 rnp->gpnum = 0;
1691 rnp->qsmask = 0; 1829 rnp->qsmask = 0;
1692 rnp->qsmaskinit = 0; 1830 rnp->qsmaskinit = 0;
@@ -1707,9 +1845,10 @@ static void __init rcu_init_one(struct rcu_state *rsp)
1707 rnp->level = i; 1845 rnp->level = i;
1708 INIT_LIST_HEAD(&rnp->blocked_tasks[0]); 1846 INIT_LIST_HEAD(&rnp->blocked_tasks[0]);
1709 INIT_LIST_HEAD(&rnp->blocked_tasks[1]); 1847 INIT_LIST_HEAD(&rnp->blocked_tasks[1]);
1848 INIT_LIST_HEAD(&rnp->blocked_tasks[2]);
1849 INIT_LIST_HEAD(&rnp->blocked_tasks[3]);
1710 } 1850 }
1711 } 1851 }
1712 spin_lock_init(&rcu_get_root(rsp)->lock);
1713} 1852}
1714 1853
1715/* 1854/*
@@ -1735,16 +1874,30 @@ do { \
1735 } \ 1874 } \
1736} while (0) 1875} while (0)
1737 1876
1738void __init __rcu_init(void) 1877void __init rcu_init(void)
1739{ 1878{
1879 int cpu;
1880
1740 rcu_bootup_announce(); 1881 rcu_bootup_announce();
1741#ifdef CONFIG_RCU_CPU_STALL_DETECTOR 1882#ifdef CONFIG_RCU_CPU_STALL_DETECTOR
1742 printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n"); 1883 printk(KERN_INFO "RCU-based detection of stalled CPUs is enabled.\n");
1743#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */ 1884#endif /* #ifdef CONFIG_RCU_CPU_STALL_DETECTOR */
1885#if NUM_RCU_LVL_4 != 0
1886 printk(KERN_INFO "Experimental four-level hierarchy is enabled.\n");
1887#endif /* #if NUM_RCU_LVL_4 != 0 */
1744 RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data); 1888 RCU_INIT_FLAVOR(&rcu_sched_state, rcu_sched_data);
1745 RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data); 1889 RCU_INIT_FLAVOR(&rcu_bh_state, rcu_bh_data);
1746 __rcu_init_preempt(); 1890 __rcu_init_preempt();
1747 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); 1891 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
1892
1893 /*
1894 * We don't need protection against CPU-hotplug here because
1895 * this is called early in boot, before either interrupts
1896 * or the scheduler are operational.
1897 */
1898 cpu_notifier(rcu_cpu_notify, 0);
1899 for_each_online_cpu(cpu)
1900 rcu_cpu_notify(NULL, CPU_UP_PREPARE, (void *)(long)cpu);
1748} 1901}
1749 1902
1750#include "rcutree_plugin.h" 1903#include "rcutree_plugin.h"