aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcutree.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r--kernel/rcutree.c507
1 files changed, 371 insertions, 136 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 6c4a6722abfd..1050d6d3922c 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -50,6 +50,8 @@
50#include <linux/wait.h> 50#include <linux/wait.h>
51#include <linux/kthread.h> 51#include <linux/kthread.h>
52#include <linux/prefetch.h> 52#include <linux/prefetch.h>
53#include <linux/delay.h>
54#include <linux/stop_machine.h>
53 55
54#include "rcutree.h" 56#include "rcutree.h"
55#include <trace/events/rcu.h> 57#include <trace/events/rcu.h>
@@ -196,7 +198,7 @@ void rcu_note_context_switch(int cpu)
196EXPORT_SYMBOL_GPL(rcu_note_context_switch); 198EXPORT_SYMBOL_GPL(rcu_note_context_switch);
197 199
198DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { 200DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
199 .dynticks_nesting = DYNTICK_TASK_NESTING, 201 .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
200 .dynticks = ATOMIC_INIT(1), 202 .dynticks = ATOMIC_INIT(1),
201}; 203};
202 204
@@ -208,8 +210,11 @@ module_param(blimit, int, 0);
208module_param(qhimark, int, 0); 210module_param(qhimark, int, 0);
209module_param(qlowmark, int, 0); 211module_param(qlowmark, int, 0);
210 212
211int rcu_cpu_stall_suppress __read_mostly; 213int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
214int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
215
212module_param(rcu_cpu_stall_suppress, int, 0644); 216module_param(rcu_cpu_stall_suppress, int, 0644);
217module_param(rcu_cpu_stall_timeout, int, 0644);
213 218
214static void force_quiescent_state(struct rcu_state *rsp, int relaxed); 219static void force_quiescent_state(struct rcu_state *rsp, int relaxed);
215static int rcu_pending(int cpu); 220static int rcu_pending(int cpu);
@@ -301,8 +306,6 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
301 return &rsp->node[0]; 306 return &rsp->node[0];
302} 307}
303 308
304#ifdef CONFIG_SMP
305
306/* 309/*
307 * If the specified CPU is offline, tell the caller that it is in 310 * If the specified CPU is offline, tell the caller that it is in
308 * a quiescent state. Otherwise, whack it with a reschedule IPI. 311 * a quiescent state. Otherwise, whack it with a reschedule IPI.
@@ -317,30 +320,21 @@ static struct rcu_node *rcu_get_root(struct rcu_state *rsp)
317static int rcu_implicit_offline_qs(struct rcu_data *rdp) 320static int rcu_implicit_offline_qs(struct rcu_data *rdp)
318{ 321{
319 /* 322 /*
320 * If the CPU is offline, it is in a quiescent state. We can 323 * If the CPU is offline for more than a jiffy, it is in a quiescent
321 * trust its state not to change because interrupts are disabled. 324 * state. We can trust its state not to change because interrupts
325 * are disabled. The reason for the jiffy's worth of slack is to
326 * handle CPUs initializing on the way up and finding their way
327 * to the idle loop on the way down.
322 */ 328 */
323 if (cpu_is_offline(rdp->cpu)) { 329 if (cpu_is_offline(rdp->cpu) &&
330 ULONG_CMP_LT(rdp->rsp->gp_start + 2, jiffies)) {
324 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl"); 331 trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, "ofl");
325 rdp->offline_fqs++; 332 rdp->offline_fqs++;
326 return 1; 333 return 1;
327 } 334 }
328
329 /*
330 * The CPU is online, so send it a reschedule IPI. This forces
331 * it through the scheduler, and (inefficiently) also handles cases
332 * where idle loops fail to inform RCU about the CPU being idle.
333 */
334 if (rdp->cpu != smp_processor_id())
335 smp_send_reschedule(rdp->cpu);
336 else
337 set_need_resched();
338 rdp->resched_ipi++;
339 return 0; 335 return 0;
340} 336}
341 337
342#endif /* #ifdef CONFIG_SMP */
343
344/* 338/*
345 * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle 339 * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle
346 * 340 *
@@ -366,6 +360,17 @@ static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval)
366 atomic_inc(&rdtp->dynticks); 360 atomic_inc(&rdtp->dynticks);
367 smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */ 361 smp_mb__after_atomic_inc(); /* Force ordering with next sojourn. */
368 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); 362 WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
363
364 /*
365 * The idle task is not permitted to enter the idle loop while
366 * in an RCU read-side critical section.
367 */
368 rcu_lockdep_assert(!lock_is_held(&rcu_lock_map),
369 "Illegal idle entry in RCU read-side critical section.");
370 rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map),
371 "Illegal idle entry in RCU-bh read-side critical section.");
372 rcu_lockdep_assert(!lock_is_held(&rcu_sched_lock_map),
373 "Illegal idle entry in RCU-sched read-side critical section.");
369} 374}
370 375
371/** 376/**
@@ -389,10 +394,15 @@ void rcu_idle_enter(void)
389 local_irq_save(flags); 394 local_irq_save(flags);
390 rdtp = &__get_cpu_var(rcu_dynticks); 395 rdtp = &__get_cpu_var(rcu_dynticks);
391 oldval = rdtp->dynticks_nesting; 396 oldval = rdtp->dynticks_nesting;
392 rdtp->dynticks_nesting = 0; 397 WARN_ON_ONCE((oldval & DYNTICK_TASK_NEST_MASK) == 0);
398 if ((oldval & DYNTICK_TASK_NEST_MASK) == DYNTICK_TASK_NEST_VALUE)
399 rdtp->dynticks_nesting = 0;
400 else
401 rdtp->dynticks_nesting -= DYNTICK_TASK_NEST_VALUE;
393 rcu_idle_enter_common(rdtp, oldval); 402 rcu_idle_enter_common(rdtp, oldval);
394 local_irq_restore(flags); 403 local_irq_restore(flags);
395} 404}
405EXPORT_SYMBOL_GPL(rcu_idle_enter);
396 406
397/** 407/**
398 * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle 408 * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle
@@ -462,7 +472,7 @@ static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval)
462 * Exit idle mode, in other words, -enter- the mode in which RCU 472 * Exit idle mode, in other words, -enter- the mode in which RCU
463 * read-side critical sections can occur. 473 * read-side critical sections can occur.
464 * 474 *
465 * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NESTING to 475 * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NEST to
466 * allow for the possibility of usermode upcalls messing up our count 476 * allow for the possibility of usermode upcalls messing up our count
467 * of interrupt nesting level during the busy period that is just 477 * of interrupt nesting level during the busy period that is just
468 * now starting. 478 * now starting.
@@ -476,11 +486,15 @@ void rcu_idle_exit(void)
476 local_irq_save(flags); 486 local_irq_save(flags);
477 rdtp = &__get_cpu_var(rcu_dynticks); 487 rdtp = &__get_cpu_var(rcu_dynticks);
478 oldval = rdtp->dynticks_nesting; 488 oldval = rdtp->dynticks_nesting;
479 WARN_ON_ONCE(oldval != 0); 489 WARN_ON_ONCE(oldval < 0);
480 rdtp->dynticks_nesting = DYNTICK_TASK_NESTING; 490 if (oldval & DYNTICK_TASK_NEST_MASK)
491 rdtp->dynticks_nesting += DYNTICK_TASK_NEST_VALUE;
492 else
493 rdtp->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
481 rcu_idle_exit_common(rdtp, oldval); 494 rcu_idle_exit_common(rdtp, oldval);
482 local_irq_restore(flags); 495 local_irq_restore(flags);
483} 496}
497EXPORT_SYMBOL_GPL(rcu_idle_exit);
484 498
485/** 499/**
486 * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle 500 * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle
@@ -581,6 +595,49 @@ int rcu_is_cpu_idle(void)
581} 595}
582EXPORT_SYMBOL(rcu_is_cpu_idle); 596EXPORT_SYMBOL(rcu_is_cpu_idle);
583 597
598#ifdef CONFIG_HOTPLUG_CPU
599
600/*
601 * Is the current CPU online? Disable preemption to avoid false positives
602 * that could otherwise happen due to the current CPU number being sampled,
603 * this task being preempted, its old CPU being taken offline, resuming
604 * on some other CPU, then determining that its old CPU is now offline.
605 * It is OK to use RCU on an offline processor during initial boot, hence
606 * the check for rcu_scheduler_fully_active. Note also that it is OK
607 * for a CPU coming online to use RCU for one jiffy prior to marking itself
608 * online in the cpu_online_mask. Similarly, it is OK for a CPU going
609 * offline to continue to use RCU for one jiffy after marking itself
610 * offline in the cpu_online_mask. This leniency is necessary given the
611 * non-atomic nature of the online and offline processing, for example,
612 * the fact that a CPU enters the scheduler after completing the CPU_DYING
613 * notifiers.
614 *
615 * This is also why RCU internally marks CPUs online during the
616 * CPU_UP_PREPARE phase and offline during the CPU_DEAD phase.
617 *
618 * Disable checking if in an NMI handler because we cannot safely report
619 * errors from NMI handlers anyway.
620 */
621bool rcu_lockdep_current_cpu_online(void)
622{
623 struct rcu_data *rdp;
624 struct rcu_node *rnp;
625 bool ret;
626
627 if (in_nmi())
628 return 1;
629 preempt_disable();
630 rdp = &__get_cpu_var(rcu_sched_data);
631 rnp = rdp->mynode;
632 ret = (rdp->grpmask & rnp->qsmaskinit) ||
633 !rcu_scheduler_fully_active;
634 preempt_enable();
635 return ret;
636}
637EXPORT_SYMBOL_GPL(rcu_lockdep_current_cpu_online);
638
639#endif /* #ifdef CONFIG_HOTPLUG_CPU */
640
584#endif /* #ifdef CONFIG_PROVE_RCU */ 641#endif /* #ifdef CONFIG_PROVE_RCU */
585 642
586/** 643/**
@@ -595,8 +652,6 @@ int rcu_is_cpu_rrupt_from_idle(void)
595 return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1; 652 return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1;
596} 653}
597 654
598#ifdef CONFIG_SMP
599
600/* 655/*
601 * Snapshot the specified CPU's dynticks counter so that we can later 656 * Snapshot the specified CPU's dynticks counter so that we can later
602 * credit them with an implicit quiescent state. Return 1 if this CPU 657 * credit them with an implicit quiescent state. Return 1 if this CPU
@@ -640,12 +695,28 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)
640 return rcu_implicit_offline_qs(rdp); 695 return rcu_implicit_offline_qs(rdp);
641} 696}
642 697
643#endif /* #ifdef CONFIG_SMP */ 698static int jiffies_till_stall_check(void)
699{
700 int till_stall_check = ACCESS_ONCE(rcu_cpu_stall_timeout);
701
702 /*
703 * Limit check must be consistent with the Kconfig limits
704 * for CONFIG_RCU_CPU_STALL_TIMEOUT.
705 */
706 if (till_stall_check < 3) {
707 ACCESS_ONCE(rcu_cpu_stall_timeout) = 3;
708 till_stall_check = 3;
709 } else if (till_stall_check > 300) {
710 ACCESS_ONCE(rcu_cpu_stall_timeout) = 300;
711 till_stall_check = 300;
712 }
713 return till_stall_check * HZ + RCU_STALL_DELAY_DELTA;
714}
644 715
645static void record_gp_stall_check_time(struct rcu_state *rsp) 716static void record_gp_stall_check_time(struct rcu_state *rsp)
646{ 717{
647 rsp->gp_start = jiffies; 718 rsp->gp_start = jiffies;
648 rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_CHECK; 719 rsp->jiffies_stall = jiffies + jiffies_till_stall_check();
649} 720}
650 721
651static void print_other_cpu_stall(struct rcu_state *rsp) 722static void print_other_cpu_stall(struct rcu_state *rsp)
@@ -664,13 +735,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
664 raw_spin_unlock_irqrestore(&rnp->lock, flags); 735 raw_spin_unlock_irqrestore(&rnp->lock, flags);
665 return; 736 return;
666 } 737 }
667 rsp->jiffies_stall = jiffies + RCU_SECONDS_TILL_STALL_RECHECK; 738 rsp->jiffies_stall = jiffies + 3 * jiffies_till_stall_check() + 3;
668
669 /*
670 * Now rat on any tasks that got kicked up to the root rcu_node
671 * due to CPU offlining.
672 */
673 ndetected = rcu_print_task_stall(rnp);
674 raw_spin_unlock_irqrestore(&rnp->lock, flags); 739 raw_spin_unlock_irqrestore(&rnp->lock, flags);
675 740
676 /* 741 /*
@@ -678,8 +743,9 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
678 * See Documentation/RCU/stallwarn.txt for info on how to debug 743 * See Documentation/RCU/stallwarn.txt for info on how to debug
679 * RCU CPU stall warnings. 744 * RCU CPU stall warnings.
680 */ 745 */
681 printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks: {", 746 printk(KERN_ERR "INFO: %s detected stalls on CPUs/tasks:",
682 rsp->name); 747 rsp->name);
748 print_cpu_stall_info_begin();
683 rcu_for_each_leaf_node(rsp, rnp) { 749 rcu_for_each_leaf_node(rsp, rnp) {
684 raw_spin_lock_irqsave(&rnp->lock, flags); 750 raw_spin_lock_irqsave(&rnp->lock, flags);
685 ndetected += rcu_print_task_stall(rnp); 751 ndetected += rcu_print_task_stall(rnp);
@@ -688,11 +754,22 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
688 continue; 754 continue;
689 for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++) 755 for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
690 if (rnp->qsmask & (1UL << cpu)) { 756 if (rnp->qsmask & (1UL << cpu)) {
691 printk(" %d", rnp->grplo + cpu); 757 print_cpu_stall_info(rsp, rnp->grplo + cpu);
692 ndetected++; 758 ndetected++;
693 } 759 }
694 } 760 }
695 printk("} (detected by %d, t=%ld jiffies)\n", 761
762 /*
763 * Now rat on any tasks that got kicked up to the root rcu_node
764 * due to CPU offlining.
765 */
766 rnp = rcu_get_root(rsp);
767 raw_spin_lock_irqsave(&rnp->lock, flags);
768 ndetected = rcu_print_task_stall(rnp);
769 raw_spin_unlock_irqrestore(&rnp->lock, flags);
770
771 print_cpu_stall_info_end();
772 printk(KERN_CONT "(detected by %d, t=%ld jiffies)\n",
696 smp_processor_id(), (long)(jiffies - rsp->gp_start)); 773 smp_processor_id(), (long)(jiffies - rsp->gp_start));
697 if (ndetected == 0) 774 if (ndetected == 0)
698 printk(KERN_ERR "INFO: Stall ended before state dump start\n"); 775 printk(KERN_ERR "INFO: Stall ended before state dump start\n");
@@ -716,15 +793,18 @@ static void print_cpu_stall(struct rcu_state *rsp)
716 * See Documentation/RCU/stallwarn.txt for info on how to debug 793 * See Documentation/RCU/stallwarn.txt for info on how to debug
717 * RCU CPU stall warnings. 794 * RCU CPU stall warnings.
718 */ 795 */
719 printk(KERN_ERR "INFO: %s detected stall on CPU %d (t=%lu jiffies)\n", 796 printk(KERN_ERR "INFO: %s self-detected stall on CPU", rsp->name);
720 rsp->name, smp_processor_id(), jiffies - rsp->gp_start); 797 print_cpu_stall_info_begin();
798 print_cpu_stall_info(rsp, smp_processor_id());
799 print_cpu_stall_info_end();
800 printk(KERN_CONT " (t=%lu jiffies)\n", jiffies - rsp->gp_start);
721 if (!trigger_all_cpu_backtrace()) 801 if (!trigger_all_cpu_backtrace())
722 dump_stack(); 802 dump_stack();
723 803
724 raw_spin_lock_irqsave(&rnp->lock, flags); 804 raw_spin_lock_irqsave(&rnp->lock, flags);
725 if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall)) 805 if (ULONG_CMP_GE(jiffies, rsp->jiffies_stall))
726 rsp->jiffies_stall = 806 rsp->jiffies_stall = jiffies +
727 jiffies + RCU_SECONDS_TILL_STALL_RECHECK; 807 3 * jiffies_till_stall_check() + 3;
728 raw_spin_unlock_irqrestore(&rnp->lock, flags); 808 raw_spin_unlock_irqrestore(&rnp->lock, flags);
729 809
730 set_need_resched(); /* kick ourselves to get things going. */ 810 set_need_resched(); /* kick ourselves to get things going. */
@@ -807,6 +887,7 @@ static void __note_new_gpnum(struct rcu_state *rsp, struct rcu_node *rnp, struct
807 rdp->passed_quiesce = 0; 887 rdp->passed_quiesce = 0;
808 } else 888 } else
809 rdp->qs_pending = 0; 889 rdp->qs_pending = 0;
890 zero_cpu_stall_ticks(rdp);
810 } 891 }
811} 892}
812 893
@@ -943,6 +1024,10 @@ rcu_start_gp_per_cpu(struct rcu_state *rsp, struct rcu_node *rnp, struct rcu_dat
943 * in preparation for detecting the next grace period. The caller must hold 1024 * in preparation for detecting the next grace period. The caller must hold
944 * the root node's ->lock, which is released before return. Hard irqs must 1025 * the root node's ->lock, which is released before return. Hard irqs must
945 * be disabled. 1026 * be disabled.
1027 *
1028 * Note that it is legal for a dying CPU (which is marked as offline) to
1029 * invoke this function. This can happen when the dying CPU reports its
1030 * quiescent state.
946 */ 1031 */
947static void 1032static void
948rcu_start_gp(struct rcu_state *rsp, unsigned long flags) 1033rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
@@ -980,26 +1065,8 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
980 rsp->fqs_state = RCU_GP_INIT; /* Hold off force_quiescent_state. */ 1065 rsp->fqs_state = RCU_GP_INIT; /* Hold off force_quiescent_state. */
981 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; 1066 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
982 record_gp_stall_check_time(rsp); 1067 record_gp_stall_check_time(rsp);
983
984 /* Special-case the common single-level case. */
985 if (NUM_RCU_NODES == 1) {
986 rcu_preempt_check_blocked_tasks(rnp);
987 rnp->qsmask = rnp->qsmaskinit;
988 rnp->gpnum = rsp->gpnum;
989 rnp->completed = rsp->completed;
990 rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state OK */
991 rcu_start_gp_per_cpu(rsp, rnp, rdp);
992 rcu_preempt_boost_start_gp(rnp);
993 trace_rcu_grace_period_init(rsp->name, rnp->gpnum,
994 rnp->level, rnp->grplo,
995 rnp->grphi, rnp->qsmask);
996 raw_spin_unlock_irqrestore(&rnp->lock, flags);
997 return;
998 }
999
1000 raw_spin_unlock(&rnp->lock); /* leave irqs disabled. */ 1068 raw_spin_unlock(&rnp->lock); /* leave irqs disabled. */
1001 1069
1002
1003 /* Exclude any concurrent CPU-hotplug operations. */ 1070 /* Exclude any concurrent CPU-hotplug operations. */
1004 raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */ 1071 raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */
1005 1072
@@ -1245,53 +1312,115 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
1245 1312
1246/* 1313/*
1247 * Move a dying CPU's RCU callbacks to online CPU's callback list. 1314 * Move a dying CPU's RCU callbacks to online CPU's callback list.
1248 * Synchronization is not required because this function executes 1315 * Also record a quiescent state for this CPU for the current grace period.
1249 * in stop_machine() context. 1316 * Synchronization and interrupt disabling are not required because
1317 * this function executes in stop_machine() context. Therefore, cleanup
1318 * operations that might block must be done later from the CPU_DEAD
1319 * notifier.
1320 *
1321 * Note that the outgoing CPU's bit has already been cleared in the
1322 * cpu_online_mask. This allows us to randomly pick a callback
1323 * destination from the bits set in that mask.
1250 */ 1324 */
1251static void rcu_send_cbs_to_online(struct rcu_state *rsp) 1325static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
1252{ 1326{
1253 int i; 1327 int i;
1254 /* current DYING CPU is cleared in the cpu_online_mask */ 1328 unsigned long mask;
1255 int receive_cpu = cpumask_any(cpu_online_mask); 1329 int receive_cpu = cpumask_any(cpu_online_mask);
1256 struct rcu_data *rdp = this_cpu_ptr(rsp->rda); 1330 struct rcu_data *rdp = this_cpu_ptr(rsp->rda);
1257 struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu); 1331 struct rcu_data *receive_rdp = per_cpu_ptr(rsp->rda, receive_cpu);
1332 RCU_TRACE(struct rcu_node *rnp = rdp->mynode); /* For dying CPU. */
1333
1334 /* First, adjust the counts. */
1335 if (rdp->nxtlist != NULL) {
1336 receive_rdp->qlen_lazy += rdp->qlen_lazy;
1337 receive_rdp->qlen += rdp->qlen;
1338 rdp->qlen_lazy = 0;
1339 rdp->qlen = 0;
1340 }
1258 1341
1259 if (rdp->nxtlist == NULL) 1342 /*
1260 return; /* irqs disabled, so comparison is stable. */ 1343 * Next, move ready-to-invoke callbacks to be invoked on some
1344 * other CPU. These will not be required to pass through another
1345 * grace period: They are done, regardless of CPU.
1346 */
1347 if (rdp->nxtlist != NULL &&
1348 rdp->nxttail[RCU_DONE_TAIL] != &rdp->nxtlist) {
1349 struct rcu_head *oldhead;
1350 struct rcu_head **oldtail;
1351 struct rcu_head **newtail;
1352
1353 oldhead = rdp->nxtlist;
1354 oldtail = receive_rdp->nxttail[RCU_DONE_TAIL];
1355 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
1356 *rdp->nxttail[RCU_DONE_TAIL] = *oldtail;
1357 *receive_rdp->nxttail[RCU_DONE_TAIL] = oldhead;
1358 newtail = rdp->nxttail[RCU_DONE_TAIL];
1359 for (i = RCU_DONE_TAIL; i < RCU_NEXT_SIZE; i++) {
1360 if (receive_rdp->nxttail[i] == oldtail)
1361 receive_rdp->nxttail[i] = newtail;
1362 if (rdp->nxttail[i] == newtail)
1363 rdp->nxttail[i] = &rdp->nxtlist;
1364 }
1365 }
1261 1366
1262 *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist; 1367 /*
1263 receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL]; 1368 * Finally, put the rest of the callbacks at the end of the list.
1264 receive_rdp->qlen += rdp->qlen; 1369 * The ones that made it partway through get to start over: We
1265 receive_rdp->n_cbs_adopted += rdp->qlen; 1370 * cannot assume that grace periods are synchronized across CPUs.
1266 rdp->n_cbs_orphaned += rdp->qlen; 1371 * (We could splice RCU_WAIT_TAIL into RCU_NEXT_READY_TAIL, but
1372 * this does not seem compelling. Not yet, anyway.)
1373 */
1374 if (rdp->nxtlist != NULL) {
1375 *receive_rdp->nxttail[RCU_NEXT_TAIL] = rdp->nxtlist;
1376 receive_rdp->nxttail[RCU_NEXT_TAIL] =
1377 rdp->nxttail[RCU_NEXT_TAIL];
1378 receive_rdp->n_cbs_adopted += rdp->qlen;
1379 rdp->n_cbs_orphaned += rdp->qlen;
1380
1381 rdp->nxtlist = NULL;
1382 for (i = 0; i < RCU_NEXT_SIZE; i++)
1383 rdp->nxttail[i] = &rdp->nxtlist;
1384 }
1267 1385
1268 rdp->nxtlist = NULL; 1386 /*
1269 for (i = 0; i < RCU_NEXT_SIZE; i++) 1387 * Record a quiescent state for the dying CPU. This is safe
1270 rdp->nxttail[i] = &rdp->nxtlist; 1388 * only because we have already cleared out the callbacks.
1271 rdp->qlen = 0; 1389 * (Otherwise, the RCU core might try to schedule the invocation
1390 * of callbacks on this now-offline CPU, which would be bad.)
1391 */
1392 mask = rdp->grpmask; /* rnp->grplo is constant. */
1393 trace_rcu_grace_period(rsp->name,
1394 rnp->gpnum + 1 - !!(rnp->qsmask & mask),
1395 "cpuofl");
1396 rcu_report_qs_rdp(smp_processor_id(), rsp, rdp, rsp->gpnum);
1397 /* Note that rcu_report_qs_rdp() might call trace_rcu_grace_period(). */
1272} 1398}
1273 1399
1274/* 1400/*
1275 * Remove the outgoing CPU from the bitmasks in the rcu_node hierarchy 1401 * The CPU has been completely removed, and some other CPU is reporting
1276 * and move all callbacks from the outgoing CPU to the current one. 1402 * this fact from process context. Do the remainder of the cleanup.
1277 * There can only be one CPU hotplug operation at a time, so no other 1403 * There can only be one CPU hotplug operation at a time, so no other
1278 * CPU can be attempting to update rcu_cpu_kthread_task. 1404 * CPU can be attempting to update rcu_cpu_kthread_task.
1279 */ 1405 */
1280static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp) 1406static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
1281{ 1407{
1282 unsigned long flags; 1408 unsigned long flags;
1283 unsigned long mask; 1409 unsigned long mask;
1284 int need_report = 0; 1410 int need_report = 0;
1285 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu); 1411 struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
1286 struct rcu_node *rnp; 1412 struct rcu_node *rnp = rdp->mynode; /* Outgoing CPU's rnp. */
1287 1413
1414 /* Adjust any no-longer-needed kthreads. */
1288 rcu_stop_cpu_kthread(cpu); 1415 rcu_stop_cpu_kthread(cpu);
1416 rcu_node_kthread_setaffinity(rnp, -1);
1417
1418 /* Remove the dying CPU from the bitmasks in the rcu_node hierarchy. */
1289 1419
1290 /* Exclude any attempts to start a new grace period. */ 1420 /* Exclude any attempts to start a new grace period. */
1291 raw_spin_lock_irqsave(&rsp->onofflock, flags); 1421 raw_spin_lock_irqsave(&rsp->onofflock, flags);
1292 1422
1293 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */ 1423 /* Remove the outgoing CPU from the masks in the rcu_node hierarchy. */
1294 rnp = rdp->mynode; /* this is the outgoing CPU's rnp. */
1295 mask = rdp->grpmask; /* rnp->grplo is constant. */ 1424 mask = rdp->grpmask; /* rnp->grplo is constant. */
1296 do { 1425 do {
1297 raw_spin_lock(&rnp->lock); /* irqs already disabled. */ 1426 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
@@ -1299,20 +1428,11 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
1299 if (rnp->qsmaskinit != 0) { 1428 if (rnp->qsmaskinit != 0) {
1300 if (rnp != rdp->mynode) 1429 if (rnp != rdp->mynode)
1301 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1430 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1302 else
1303 trace_rcu_grace_period(rsp->name,
1304 rnp->gpnum + 1 -
1305 !!(rnp->qsmask & mask),
1306 "cpuofl");
1307 break; 1431 break;
1308 } 1432 }
1309 if (rnp == rdp->mynode) { 1433 if (rnp == rdp->mynode)
1310 trace_rcu_grace_period(rsp->name,
1311 rnp->gpnum + 1 -
1312 !!(rnp->qsmask & mask),
1313 "cpuofl");
1314 need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp); 1434 need_report = rcu_preempt_offline_tasks(rsp, rnp, rdp);
1315 } else 1435 else
1316 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1436 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1317 mask = rnp->grpmask; 1437 mask = rnp->grpmask;
1318 rnp = rnp->parent; 1438 rnp = rnp->parent;
@@ -1332,29 +1452,15 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)
1332 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1452 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1333 if (need_report & RCU_OFL_TASKS_EXP_GP) 1453 if (need_report & RCU_OFL_TASKS_EXP_GP)
1334 rcu_report_exp_rnp(rsp, rnp, true); 1454 rcu_report_exp_rnp(rsp, rnp, true);
1335 rcu_node_kthread_setaffinity(rnp, -1);
1336}
1337
1338/*
1339 * Remove the specified CPU from the RCU hierarchy and move any pending
1340 * callbacks that it might have to the current CPU. This code assumes
1341 * that at least one CPU in the system will remain running at all times.
1342 * Any attempt to offline -all- CPUs is likely to strand RCU callbacks.
1343 */
1344static void rcu_offline_cpu(int cpu)
1345{
1346 __rcu_offline_cpu(cpu, &rcu_sched_state);
1347 __rcu_offline_cpu(cpu, &rcu_bh_state);
1348 rcu_preempt_offline_cpu(cpu);
1349} 1455}
1350 1456
1351#else /* #ifdef CONFIG_HOTPLUG_CPU */ 1457#else /* #ifdef CONFIG_HOTPLUG_CPU */
1352 1458
1353static void rcu_send_cbs_to_online(struct rcu_state *rsp) 1459static void rcu_cleanup_dying_cpu(struct rcu_state *rsp)
1354{ 1460{
1355} 1461}
1356 1462
1357static void rcu_offline_cpu(int cpu) 1463static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
1358{ 1464{
1359} 1465}
1360 1466
@@ -1368,11 +1474,11 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1368{ 1474{
1369 unsigned long flags; 1475 unsigned long flags;
1370 struct rcu_head *next, *list, **tail; 1476 struct rcu_head *next, *list, **tail;
1371 int bl, count; 1477 int bl, count, count_lazy;
1372 1478
1373 /* If no callbacks are ready, just return.*/ 1479 /* If no callbacks are ready, just return.*/
1374 if (!cpu_has_callbacks_ready_to_invoke(rdp)) { 1480 if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
1375 trace_rcu_batch_start(rsp->name, 0, 0); 1481 trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, 0);
1376 trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist), 1482 trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist),
1377 need_resched(), is_idle_task(current), 1483 need_resched(), is_idle_task(current),
1378 rcu_is_callbacks_kthread()); 1484 rcu_is_callbacks_kthread());
@@ -1384,8 +1490,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1384 * races with call_rcu() from interrupt handlers. 1490 * races with call_rcu() from interrupt handlers.
1385 */ 1491 */
1386 local_irq_save(flags); 1492 local_irq_save(flags);
1493 WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
1387 bl = rdp->blimit; 1494 bl = rdp->blimit;
1388 trace_rcu_batch_start(rsp->name, rdp->qlen, bl); 1495 trace_rcu_batch_start(rsp->name, rdp->qlen_lazy, rdp->qlen, bl);
1389 list = rdp->nxtlist; 1496 list = rdp->nxtlist;
1390 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL]; 1497 rdp->nxtlist = *rdp->nxttail[RCU_DONE_TAIL];
1391 *rdp->nxttail[RCU_DONE_TAIL] = NULL; 1498 *rdp->nxttail[RCU_DONE_TAIL] = NULL;
@@ -1396,12 +1503,13 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1396 local_irq_restore(flags); 1503 local_irq_restore(flags);
1397 1504
1398 /* Invoke callbacks. */ 1505 /* Invoke callbacks. */
1399 count = 0; 1506 count = count_lazy = 0;
1400 while (list) { 1507 while (list) {
1401 next = list->next; 1508 next = list->next;
1402 prefetch(next); 1509 prefetch(next);
1403 debug_rcu_head_unqueue(list); 1510 debug_rcu_head_unqueue(list);
1404 __rcu_reclaim(rsp->name, list); 1511 if (__rcu_reclaim(rsp->name, list))
1512 count_lazy++;
1405 list = next; 1513 list = next;
1406 /* Stop only if limit reached and CPU has something to do. */ 1514 /* Stop only if limit reached and CPU has something to do. */
1407 if (++count >= bl && 1515 if (++count >= bl &&
@@ -1416,6 +1524,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1416 rcu_is_callbacks_kthread()); 1524 rcu_is_callbacks_kthread());
1417 1525
1418 /* Update count, and requeue any remaining callbacks. */ 1526 /* Update count, and requeue any remaining callbacks. */
1527 rdp->qlen_lazy -= count_lazy;
1419 rdp->qlen -= count; 1528 rdp->qlen -= count;
1420 rdp->n_cbs_invoked += count; 1529 rdp->n_cbs_invoked += count;
1421 if (list != NULL) { 1530 if (list != NULL) {
@@ -1458,6 +1567,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1458void rcu_check_callbacks(int cpu, int user) 1567void rcu_check_callbacks(int cpu, int user)
1459{ 1568{
1460 trace_rcu_utilization("Start scheduler-tick"); 1569 trace_rcu_utilization("Start scheduler-tick");
1570 increment_cpu_stall_ticks();
1461 if (user || rcu_is_cpu_rrupt_from_idle()) { 1571 if (user || rcu_is_cpu_rrupt_from_idle()) {
1462 1572
1463 /* 1573 /*
@@ -1492,8 +1602,6 @@ void rcu_check_callbacks(int cpu, int user)
1492 trace_rcu_utilization("End scheduler-tick"); 1602 trace_rcu_utilization("End scheduler-tick");
1493} 1603}
1494 1604
1495#ifdef CONFIG_SMP
1496
1497/* 1605/*
1498 * Scan the leaf rcu_node structures, processing dyntick state for any that 1606 * Scan the leaf rcu_node structures, processing dyntick state for any that
1499 * have not yet encountered a quiescent state, using the function specified. 1607 * have not yet encountered a quiescent state, using the function specified.
@@ -1616,15 +1724,6 @@ unlock_fqs_ret:
1616 trace_rcu_utilization("End fqs"); 1724 trace_rcu_utilization("End fqs");
1617} 1725}
1618 1726
1619#else /* #ifdef CONFIG_SMP */
1620
1621static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1622{
1623 set_need_resched();
1624}
1625
1626#endif /* #else #ifdef CONFIG_SMP */
1627
1628/* 1727/*
1629 * This does the RCU core processing work for the specified rcu_state 1728 * This does the RCU core processing work for the specified rcu_state
1630 * and rcu_data structures. This may be called only from the CPU to 1729 * and rcu_data structures. This may be called only from the CPU to
@@ -1702,11 +1801,12 @@ static void invoke_rcu_core(void)
1702 1801
1703static void 1802static void
1704__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), 1803__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1705 struct rcu_state *rsp) 1804 struct rcu_state *rsp, bool lazy)
1706{ 1805{
1707 unsigned long flags; 1806 unsigned long flags;
1708 struct rcu_data *rdp; 1807 struct rcu_data *rdp;
1709 1808
1809 WARN_ON_ONCE((unsigned long)head & 0x3); /* Misaligned rcu_head! */
1710 debug_rcu_head_queue(head); 1810 debug_rcu_head_queue(head);
1711 head->func = func; 1811 head->func = func;
1712 head->next = NULL; 1812 head->next = NULL;
@@ -1720,18 +1820,21 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1720 * a quiescent state betweentimes. 1820 * a quiescent state betweentimes.
1721 */ 1821 */
1722 local_irq_save(flags); 1822 local_irq_save(flags);
1823 WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
1723 rdp = this_cpu_ptr(rsp->rda); 1824 rdp = this_cpu_ptr(rsp->rda);
1724 1825
1725 /* Add the callback to our list. */ 1826 /* Add the callback to our list. */
1726 *rdp->nxttail[RCU_NEXT_TAIL] = head; 1827 *rdp->nxttail[RCU_NEXT_TAIL] = head;
1727 rdp->nxttail[RCU_NEXT_TAIL] = &head->next; 1828 rdp->nxttail[RCU_NEXT_TAIL] = &head->next;
1728 rdp->qlen++; 1829 rdp->qlen++;
1830 if (lazy)
1831 rdp->qlen_lazy++;
1729 1832
1730 if (__is_kfree_rcu_offset((unsigned long)func)) 1833 if (__is_kfree_rcu_offset((unsigned long)func))
1731 trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func, 1834 trace_rcu_kfree_callback(rsp->name, head, (unsigned long)func,
1732 rdp->qlen); 1835 rdp->qlen_lazy, rdp->qlen);
1733 else 1836 else
1734 trace_rcu_callback(rsp->name, head, rdp->qlen); 1837 trace_rcu_callback(rsp->name, head, rdp->qlen_lazy, rdp->qlen);
1735 1838
1736 /* If interrupts were disabled, don't dive into RCU core. */ 1839 /* If interrupts were disabled, don't dive into RCU core. */
1737 if (irqs_disabled_flags(flags)) { 1840 if (irqs_disabled_flags(flags)) {
@@ -1778,16 +1881,16 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1778 */ 1881 */
1779void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 1882void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
1780{ 1883{
1781 __call_rcu(head, func, &rcu_sched_state); 1884 __call_rcu(head, func, &rcu_sched_state, 0);
1782} 1885}
1783EXPORT_SYMBOL_GPL(call_rcu_sched); 1886EXPORT_SYMBOL_GPL(call_rcu_sched);
1784 1887
1785/* 1888/*
1786 * Queue an RCU for invocation after a quicker grace period. 1889 * Queue an RCU callback for invocation after a quicker grace period.
1787 */ 1890 */
1788void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 1891void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
1789{ 1892{
1790 __call_rcu(head, func, &rcu_bh_state); 1893 __call_rcu(head, func, &rcu_bh_state, 0);
1791} 1894}
1792EXPORT_SYMBOL_GPL(call_rcu_bh); 1895EXPORT_SYMBOL_GPL(call_rcu_bh);
1793 1896
@@ -1816,6 +1919,10 @@ EXPORT_SYMBOL_GPL(call_rcu_bh);
1816 */ 1919 */
1817void synchronize_sched(void) 1920void synchronize_sched(void)
1818{ 1921{
1922 rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
1923 !lock_is_held(&rcu_lock_map) &&
1924 !lock_is_held(&rcu_sched_lock_map),
1925 "Illegal synchronize_sched() in RCU-sched read-side critical section");
1819 if (rcu_blocking_is_gp()) 1926 if (rcu_blocking_is_gp())
1820 return; 1927 return;
1821 wait_rcu_gp(call_rcu_sched); 1928 wait_rcu_gp(call_rcu_sched);
@@ -1833,12 +1940,137 @@ EXPORT_SYMBOL_GPL(synchronize_sched);
1833 */ 1940 */
1834void synchronize_rcu_bh(void) 1941void synchronize_rcu_bh(void)
1835{ 1942{
1943 rcu_lockdep_assert(!lock_is_held(&rcu_bh_lock_map) &&
1944 !lock_is_held(&rcu_lock_map) &&
1945 !lock_is_held(&rcu_sched_lock_map),
1946 "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section");
1836 if (rcu_blocking_is_gp()) 1947 if (rcu_blocking_is_gp())
1837 return; 1948 return;
1838 wait_rcu_gp(call_rcu_bh); 1949 wait_rcu_gp(call_rcu_bh);
1839} 1950}
1840EXPORT_SYMBOL_GPL(synchronize_rcu_bh); 1951EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
1841 1952
1953static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0);
1954static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0);
1955
1956static int synchronize_sched_expedited_cpu_stop(void *data)
1957{
1958 /*
1959 * There must be a full memory barrier on each affected CPU
1960 * between the time that try_stop_cpus() is called and the
1961 * time that it returns.
1962 *
1963 * In the current initial implementation of cpu_stop, the
1964 * above condition is already met when the control reaches
1965 * this point and the following smp_mb() is not strictly
1966 * necessary. Do smp_mb() anyway for documentation and
1967 * robustness against future implementation changes.
1968 */
1969 smp_mb(); /* See above comment block. */
1970 return 0;
1971}
1972
1973/**
1974 * synchronize_sched_expedited - Brute-force RCU-sched grace period
1975 *
1976 * Wait for an RCU-sched grace period to elapse, but use a "big hammer"
1977 * approach to force the grace period to end quickly. This consumes
1978 * significant time on all CPUs and is unfriendly to real-time workloads,
1979 * so is thus not recommended for any sort of common-case code. In fact,
1980 * if you are using synchronize_sched_expedited() in a loop, please
1981 * restructure your code to batch your updates, and then use a single
1982 * synchronize_sched() instead.
1983 *
1984 * Note that it is illegal to call this function while holding any lock
1985 * that is acquired by a CPU-hotplug notifier. And yes, it is also illegal
1986 * to call this function from a CPU-hotplug notifier. Failing to observe
1987 * these restriction will result in deadlock.
1988 *
1989 * This implementation can be thought of as an application of ticket
1990 * locking to RCU, with sync_sched_expedited_started and
1991 * sync_sched_expedited_done taking on the roles of the halves
1992 * of the ticket-lock word. Each task atomically increments
1993 * sync_sched_expedited_started upon entry, snapshotting the old value,
1994 * then attempts to stop all the CPUs. If this succeeds, then each
1995 * CPU will have executed a context switch, resulting in an RCU-sched
1996 * grace period. We are then done, so we use atomic_cmpxchg() to
1997 * update sync_sched_expedited_done to match our snapshot -- but
1998 * only if someone else has not already advanced past our snapshot.
1999 *
2000 * On the other hand, if try_stop_cpus() fails, we check the value
2001 * of sync_sched_expedited_done. If it has advanced past our
2002 * initial snapshot, then someone else must have forced a grace period
2003 * some time after we took our snapshot. In this case, our work is
2004 * done for us, and we can simply return. Otherwise, we try again,
2005 * but keep our initial snapshot for purposes of checking for someone
2006 * doing our work for us.
2007 *
2008 * If we fail too many times in a row, we fall back to synchronize_sched().
2009 */
2010void synchronize_sched_expedited(void)
2011{
2012 int firstsnap, s, snap, trycount = 0;
2013
2014 /* Note that atomic_inc_return() implies full memory barrier. */
2015 firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started);
2016 get_online_cpus();
2017 WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id()));
2018
2019 /*
2020 * Each pass through the following loop attempts to force a
2021 * context switch on each CPU.
2022 */
2023 while (try_stop_cpus(cpu_online_mask,
2024 synchronize_sched_expedited_cpu_stop,
2025 NULL) == -EAGAIN) {
2026 put_online_cpus();
2027
2028 /* No joy, try again later. Or just synchronize_sched(). */
2029 if (trycount++ < 10)
2030 udelay(trycount * num_online_cpus());
2031 else {
2032 synchronize_sched();
2033 return;
2034 }
2035
2036 /* Check to see if someone else did our work for us. */
2037 s = atomic_read(&sync_sched_expedited_done);
2038 if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) {
2039 smp_mb(); /* ensure test happens before caller kfree */
2040 return;
2041 }
2042
2043 /*
2044 * Refetching sync_sched_expedited_started allows later
2045 * callers to piggyback on our grace period. We subtract
2046 * 1 to get the same token that the last incrementer got.
2047 * We retry after they started, so our grace period works
2048 * for them, and they started after our first try, so their
2049 * grace period works for us.
2050 */
2051 get_online_cpus();
2052 snap = atomic_read(&sync_sched_expedited_started);
2053 smp_mb(); /* ensure read is before try_stop_cpus(). */
2054 }
2055
2056 /*
2057 * Everyone up to our most recent fetch is covered by our grace
2058 * period. Update the counter, but only if our work is still
2059 * relevant -- which it won't be if someone who started later
2060 * than we did beat us to the punch.
2061 */
2062 do {
2063 s = atomic_read(&sync_sched_expedited_done);
2064 if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) {
2065 smp_mb(); /* ensure test happens before caller kfree */
2066 break;
2067 }
2068 } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s);
2069
2070 put_online_cpus();
2071}
2072EXPORT_SYMBOL_GPL(synchronize_sched_expedited);
2073
1842/* 2074/*
1843 * Check to see if there is any immediate RCU-related work to be done 2075 * Check to see if there is any immediate RCU-related work to be done
1844 * by the current CPU, for the specified type of RCU, returning 1 if so. 2076 * by the current CPU, for the specified type of RCU, returning 1 if so.
@@ -1932,7 +2164,7 @@ static int rcu_cpu_has_callbacks(int cpu)
1932 /* RCU callbacks either ready or pending? */ 2164 /* RCU callbacks either ready or pending? */
1933 return per_cpu(rcu_sched_data, cpu).nxtlist || 2165 return per_cpu(rcu_sched_data, cpu).nxtlist ||
1934 per_cpu(rcu_bh_data, cpu).nxtlist || 2166 per_cpu(rcu_bh_data, cpu).nxtlist ||
1935 rcu_preempt_needs_cpu(cpu); 2167 rcu_preempt_cpu_has_callbacks(cpu);
1936} 2168}
1937 2169
1938static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL}; 2170static DEFINE_PER_CPU(struct rcu_head, rcu_barrier_head) = {NULL};
@@ -2027,9 +2259,10 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
2027 rdp->nxtlist = NULL; 2259 rdp->nxtlist = NULL;
2028 for (i = 0; i < RCU_NEXT_SIZE; i++) 2260 for (i = 0; i < RCU_NEXT_SIZE; i++)
2029 rdp->nxttail[i] = &rdp->nxtlist; 2261 rdp->nxttail[i] = &rdp->nxtlist;
2262 rdp->qlen_lazy = 0;
2030 rdp->qlen = 0; 2263 rdp->qlen = 0;
2031 rdp->dynticks = &per_cpu(rcu_dynticks, cpu); 2264 rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
2032 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_NESTING); 2265 WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
2033 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1); 2266 WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
2034 rdp->cpu = cpu; 2267 rdp->cpu = cpu;
2035 rdp->rsp = rsp; 2268 rdp->rsp = rsp;
@@ -2057,7 +2290,7 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
2057 rdp->qlen_last_fqs_check = 0; 2290 rdp->qlen_last_fqs_check = 0;
2058 rdp->n_force_qs_snap = rsp->n_force_qs; 2291 rdp->n_force_qs_snap = rsp->n_force_qs;
2059 rdp->blimit = blimit; 2292 rdp->blimit = blimit;
2060 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_NESTING; 2293 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
2061 atomic_set(&rdp->dynticks->dynticks, 2294 atomic_set(&rdp->dynticks->dynticks,
2062 (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); 2295 (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
2063 rcu_prepare_for_idle_init(cpu); 2296 rcu_prepare_for_idle_init(cpu);
@@ -2139,16 +2372,18 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2139 * touch any data without introducing corruption. We send the 2372 * touch any data without introducing corruption. We send the
2140 * dying CPU's callbacks to an arbitrarily chosen online CPU. 2373 * dying CPU's callbacks to an arbitrarily chosen online CPU.
2141 */ 2374 */
2142 rcu_send_cbs_to_online(&rcu_bh_state); 2375 rcu_cleanup_dying_cpu(&rcu_bh_state);
2143 rcu_send_cbs_to_online(&rcu_sched_state); 2376 rcu_cleanup_dying_cpu(&rcu_sched_state);
2144 rcu_preempt_send_cbs_to_online(); 2377 rcu_preempt_cleanup_dying_cpu();
2145 rcu_cleanup_after_idle(cpu); 2378 rcu_cleanup_after_idle(cpu);
2146 break; 2379 break;
2147 case CPU_DEAD: 2380 case CPU_DEAD:
2148 case CPU_DEAD_FROZEN: 2381 case CPU_DEAD_FROZEN:
2149 case CPU_UP_CANCELED: 2382 case CPU_UP_CANCELED:
2150 case CPU_UP_CANCELED_FROZEN: 2383 case CPU_UP_CANCELED_FROZEN:
2151 rcu_offline_cpu(cpu); 2384 rcu_cleanup_dead_cpu(cpu, &rcu_bh_state);
2385 rcu_cleanup_dead_cpu(cpu, &rcu_sched_state);
2386 rcu_preempt_cleanup_dead_cpu(cpu);
2152 break; 2387 break;
2153 default: 2388 default:
2154 break; 2389 break;