aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcutree.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcutree.c')
-rw-r--r--kernel/rcutree.c347
1 files changed, 222 insertions, 125 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 74df86bd9204..e441b77b614e 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -68,9 +68,9 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
68 .level = { &sname##_state.node[0] }, \ 68 .level = { &sname##_state.node[0] }, \
69 .call = cr, \ 69 .call = cr, \
70 .fqs_state = RCU_GP_IDLE, \ 70 .fqs_state = RCU_GP_IDLE, \
71 .gpnum = -300, \ 71 .gpnum = 0UL - 300UL, \
72 .completed = -300, \ 72 .completed = 0UL - 300UL, \
73 .onofflock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.onofflock), \ 73 .orphan_lock = __RAW_SPIN_LOCK_UNLOCKED(&sname##_state.orphan_lock), \
74 .orphan_nxttail = &sname##_state.orphan_nxtlist, \ 74 .orphan_nxttail = &sname##_state.orphan_nxtlist, \
75 .orphan_donetail = &sname##_state.orphan_donelist, \ 75 .orphan_donetail = &sname##_state.orphan_donelist, \
76 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \ 76 .barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
@@ -207,18 +207,15 @@ EXPORT_SYMBOL_GPL(rcu_note_context_switch);
207DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { 207DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
208 .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, 208 .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
209 .dynticks = ATOMIC_INIT(1), 209 .dynticks = ATOMIC_INIT(1),
210#if defined(CONFIG_RCU_USER_QS) && !defined(CONFIG_RCU_USER_QS_FORCE)
211 .ignore_user_qs = true,
212#endif
213}; 210};
214 211
215static int blimit = 10; /* Maximum callbacks per rcu_do_batch. */ 212static long blimit = 10; /* Maximum callbacks per rcu_do_batch. */
216static int qhimark = 10000; /* If this many pending, ignore blimit. */ 213static long qhimark = 10000; /* If this many pending, ignore blimit. */
217static int qlowmark = 100; /* Once only this many pending, use blimit. */ 214static long qlowmark = 100; /* Once only this many pending, use blimit. */
218 215
219module_param(blimit, int, 0444); 216module_param(blimit, long, 0444);
220module_param(qhimark, int, 0444); 217module_param(qhimark, long, 0444);
221module_param(qlowmark, int, 0444); 218module_param(qlowmark, long, 0444);
222 219
223int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */ 220int rcu_cpu_stall_suppress __read_mostly; /* 1 = suppress stall warnings. */
224int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT; 221int rcu_cpu_stall_timeout __read_mostly = CONFIG_RCU_CPU_STALL_TIMEOUT;
@@ -303,7 +300,8 @@ EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
303static int 300static int
304cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp) 301cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
305{ 302{
306 return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]; 303 return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL] &&
304 rdp->nxttail[RCU_DONE_TAIL] != NULL;
307} 305}
308 306
309/* 307/*
@@ -312,8 +310,11 @@ cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
312static int 310static int
313cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) 311cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
314{ 312{
315 return *rdp->nxttail[RCU_DONE_TAIL + 313 struct rcu_head **ntp;
316 ACCESS_ONCE(rsp->completed) != rdp->completed] && 314
315 ntp = rdp->nxttail[RCU_DONE_TAIL +
316 (ACCESS_ONCE(rsp->completed) != rdp->completed)];
317 return rdp->nxttail[RCU_DONE_TAIL] && ntp && *ntp &&
317 !rcu_gp_in_progress(rsp); 318 !rcu_gp_in_progress(rsp);
318} 319}
319 320
@@ -416,29 +417,7 @@ EXPORT_SYMBOL_GPL(rcu_idle_enter);
416 */ 417 */
417void rcu_user_enter(void) 418void rcu_user_enter(void)
418{ 419{
419 unsigned long flags; 420 rcu_eqs_enter(1);
420 struct rcu_dynticks *rdtp;
421
422 /*
423 * Some contexts may involve an exception occuring in an irq,
424 * leading to that nesting:
425 * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
426 * This would mess up the dyntick_nesting count though. And rcu_irq_*()
427 * helpers are enough to protect RCU uses inside the exception. So
428 * just return immediately if we detect we are in an IRQ.
429 */
430 if (in_interrupt())
431 return;
432
433 WARN_ON_ONCE(!current->mm);
434
435 local_irq_save(flags);
436 rdtp = &__get_cpu_var(rcu_dynticks);
437 if (!rdtp->ignore_user_qs && !rdtp->in_user) {
438 rdtp->in_user = true;
439 rcu_eqs_enter(true);
440 }
441 local_irq_restore(flags);
442} 421}
443 422
444/** 423/**
@@ -575,27 +554,7 @@ EXPORT_SYMBOL_GPL(rcu_idle_exit);
575 */ 554 */
576void rcu_user_exit(void) 555void rcu_user_exit(void)
577{ 556{
578 unsigned long flags; 557 rcu_eqs_exit(1);
579 struct rcu_dynticks *rdtp;
580
581 /*
582 * Some contexts may involve an exception occuring in an irq,
583 * leading to that nesting:
584 * rcu_irq_enter() rcu_user_exit() rcu_user_exit() rcu_irq_exit()
585 * This would mess up the dyntick_nesting count though. And rcu_irq_*()
586 * helpers are enough to protect RCU uses inside the exception. So
587 * just return immediately if we detect we are in an IRQ.
588 */
589 if (in_interrupt())
590 return;
591
592 local_irq_save(flags);
593 rdtp = &__get_cpu_var(rcu_dynticks);
594 if (rdtp->in_user) {
595 rdtp->in_user = false;
596 rcu_eqs_exit(true);
597 }
598 local_irq_restore(flags);
599} 558}
600 559
601/** 560/**
@@ -718,21 +677,6 @@ int rcu_is_cpu_idle(void)
718} 677}
719EXPORT_SYMBOL(rcu_is_cpu_idle); 678EXPORT_SYMBOL(rcu_is_cpu_idle);
720 679
721#ifdef CONFIG_RCU_USER_QS
722void rcu_user_hooks_switch(struct task_struct *prev,
723 struct task_struct *next)
724{
725 struct rcu_dynticks *rdtp;
726
727 /* Interrupts are disabled in context switch */
728 rdtp = &__get_cpu_var(rcu_dynticks);
729 if (!rdtp->ignore_user_qs) {
730 clear_tsk_thread_flag(prev, TIF_NOHZ);
731 set_tsk_thread_flag(next, TIF_NOHZ);
732 }
733}
734#endif /* #ifdef CONFIG_RCU_USER_QS */
735
736#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU) 680#if defined(CONFIG_PROVE_RCU) && defined(CONFIG_HOTPLUG_CPU)
737 681
738/* 682/*
@@ -873,6 +817,29 @@ static void record_gp_stall_check_time(struct rcu_state *rsp)
873 rsp->jiffies_stall = jiffies + jiffies_till_stall_check(); 817 rsp->jiffies_stall = jiffies + jiffies_till_stall_check();
874} 818}
875 819
820/*
821 * Dump stacks of all tasks running on stalled CPUs. This is a fallback
822 * for architectures that do not implement trigger_all_cpu_backtrace().
823 * The NMI-triggered stack traces are more accurate because they are
824 * printed by the target CPU.
825 */
826static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
827{
828 int cpu;
829 unsigned long flags;
830 struct rcu_node *rnp;
831
832 rcu_for_each_leaf_node(rsp, rnp) {
833 raw_spin_lock_irqsave(&rnp->lock, flags);
834 if (rnp->qsmask != 0) {
835 for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
836 if (rnp->qsmask & (1UL << cpu))
837 dump_cpu_task(rnp->grplo + cpu);
838 }
839 raw_spin_unlock_irqrestore(&rnp->lock, flags);
840 }
841}
842
876static void print_other_cpu_stall(struct rcu_state *rsp) 843static void print_other_cpu_stall(struct rcu_state *rsp)
877{ 844{
878 int cpu; 845 int cpu;
@@ -880,6 +847,7 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
880 unsigned long flags; 847 unsigned long flags;
881 int ndetected = 0; 848 int ndetected = 0;
882 struct rcu_node *rnp = rcu_get_root(rsp); 849 struct rcu_node *rnp = rcu_get_root(rsp);
850 long totqlen = 0;
883 851
884 /* Only let one CPU complain about others per time interval. */ 852 /* Only let one CPU complain about others per time interval. */
885 853
@@ -924,12 +892,15 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
924 raw_spin_unlock_irqrestore(&rnp->lock, flags); 892 raw_spin_unlock_irqrestore(&rnp->lock, flags);
925 893
926 print_cpu_stall_info_end(); 894 print_cpu_stall_info_end();
927 printk(KERN_CONT "(detected by %d, t=%ld jiffies)\n", 895 for_each_possible_cpu(cpu)
928 smp_processor_id(), (long)(jiffies - rsp->gp_start)); 896 totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
897 pr_cont("(detected by %d, t=%ld jiffies, g=%lu, c=%lu, q=%lu)\n",
898 smp_processor_id(), (long)(jiffies - rsp->gp_start),
899 rsp->gpnum, rsp->completed, totqlen);
929 if (ndetected == 0) 900 if (ndetected == 0)
930 printk(KERN_ERR "INFO: Stall ended before state dump start\n"); 901 printk(KERN_ERR "INFO: Stall ended before state dump start\n");
931 else if (!trigger_all_cpu_backtrace()) 902 else if (!trigger_all_cpu_backtrace())
932 dump_stack(); 903 rcu_dump_cpu_stacks(rsp);
933 904
934 /* Complain about tasks blocking the grace period. */ 905 /* Complain about tasks blocking the grace period. */
935 906
@@ -940,8 +911,10 @@ static void print_other_cpu_stall(struct rcu_state *rsp)
940 911
941static void print_cpu_stall(struct rcu_state *rsp) 912static void print_cpu_stall(struct rcu_state *rsp)
942{ 913{
914 int cpu;
943 unsigned long flags; 915 unsigned long flags;
944 struct rcu_node *rnp = rcu_get_root(rsp); 916 struct rcu_node *rnp = rcu_get_root(rsp);
917 long totqlen = 0;
945 918
946 /* 919 /*
947 * OK, time to rat on ourselves... 920 * OK, time to rat on ourselves...
@@ -952,7 +925,10 @@ static void print_cpu_stall(struct rcu_state *rsp)
952 print_cpu_stall_info_begin(); 925 print_cpu_stall_info_begin();
953 print_cpu_stall_info(rsp, smp_processor_id()); 926 print_cpu_stall_info(rsp, smp_processor_id());
954 print_cpu_stall_info_end(); 927 print_cpu_stall_info_end();
955 printk(KERN_CONT " (t=%lu jiffies)\n", jiffies - rsp->gp_start); 928 for_each_possible_cpu(cpu)
929 totqlen += per_cpu_ptr(rsp->rda, cpu)->qlen;
930 pr_cont(" (t=%lu jiffies g=%lu c=%lu q=%lu)\n",
931 jiffies - rsp->gp_start, rsp->gpnum, rsp->completed, totqlen);
956 if (!trigger_all_cpu_backtrace()) 932 if (!trigger_all_cpu_backtrace())
957 dump_stack(); 933 dump_stack();
958 934
@@ -1091,6 +1067,7 @@ static void init_callback_list(struct rcu_data *rdp)
1091 rdp->nxtlist = NULL; 1067 rdp->nxtlist = NULL;
1092 for (i = 0; i < RCU_NEXT_SIZE; i++) 1068 for (i = 0; i < RCU_NEXT_SIZE; i++)
1093 rdp->nxttail[i] = &rdp->nxtlist; 1069 rdp->nxttail[i] = &rdp->nxtlist;
1070 init_nocb_callback_list(rdp);
1094} 1071}
1095 1072
1096/* 1073/*
@@ -1404,15 +1381,37 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
1404 !cpu_needs_another_gp(rsp, rdp)) { 1381 !cpu_needs_another_gp(rsp, rdp)) {
1405 /* 1382 /*
1406 * Either we have not yet spawned the grace-period 1383 * Either we have not yet spawned the grace-period
1407 * task or this CPU does not need another grace period. 1384 * task, this CPU does not need another grace period,
1385 * or a grace period is already in progress.
1408 * Either way, don't start a new grace period. 1386 * Either way, don't start a new grace period.
1409 */ 1387 */
1410 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1388 raw_spin_unlock_irqrestore(&rnp->lock, flags);
1411 return; 1389 return;
1412 } 1390 }
1413 1391
1392 /*
1393 * Because there is no grace period in progress right now,
1394 * any callbacks we have up to this point will be satisfied
1395 * by the next grace period. So promote all callbacks to be
1396 * handled after the end of the next grace period. If the
1397 * CPU is not yet aware of the end of the previous grace period,
1398 * we need to allow for the callback advancement that will
1399 * occur when it does become aware. Deadlock prevents us from
1400 * making it aware at this point: We cannot acquire a leaf
1401 * rcu_node ->lock while holding the root rcu_node ->lock.
1402 */
1403 rdp->nxttail[RCU_NEXT_READY_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
1404 if (rdp->completed == rsp->completed)
1405 rdp->nxttail[RCU_WAIT_TAIL] = rdp->nxttail[RCU_NEXT_TAIL];
1406
1414 rsp->gp_flags = RCU_GP_FLAG_INIT; 1407 rsp->gp_flags = RCU_GP_FLAG_INIT;
1415 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1408 raw_spin_unlock(&rnp->lock); /* Interrupts remain disabled. */
1409
1410 /* Ensure that CPU is aware of completion of last grace period. */
1411 rcu_process_gp_end(rsp, rdp);
1412 local_irq_restore(flags);
1413
1414 /* Wake up rcu_gp_kthread() to start the grace period. */
1416 wake_up(&rsp->gp_wq); 1415 wake_up(&rsp->gp_wq);
1417} 1416}
1418 1417
@@ -1573,16 +1572,20 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp)
1573/* 1572/*
1574 * Send the specified CPU's RCU callbacks to the orphanage. The 1573 * Send the specified CPU's RCU callbacks to the orphanage. The
1575 * specified CPU must be offline, and the caller must hold the 1574 * specified CPU must be offline, and the caller must hold the
1576 * ->onofflock. 1575 * ->orphan_lock.
1577 */ 1576 */
1578static void 1577static void
1579rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, 1578rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
1580 struct rcu_node *rnp, struct rcu_data *rdp) 1579 struct rcu_node *rnp, struct rcu_data *rdp)
1581{ 1580{
1581 /* No-CBs CPUs do not have orphanable callbacks. */
1582 if (is_nocb_cpu(rdp->cpu))
1583 return;
1584
1582 /* 1585 /*
1583 * Orphan the callbacks. First adjust the counts. This is safe 1586 * Orphan the callbacks. First adjust the counts. This is safe
1584 * because ->onofflock excludes _rcu_barrier()'s adoption of 1587 * because _rcu_barrier() excludes CPU-hotplug operations, so it
1585 * the callbacks, thus no memory barrier is required. 1588 * cannot be running now. Thus no memory barrier is required.
1586 */ 1589 */
1587 if (rdp->nxtlist != NULL) { 1590 if (rdp->nxtlist != NULL) {
1588 rsp->qlen_lazy += rdp->qlen_lazy; 1591 rsp->qlen_lazy += rdp->qlen_lazy;
@@ -1623,13 +1626,17 @@ rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
1623 1626
1624/* 1627/*
1625 * Adopt the RCU callbacks from the specified rcu_state structure's 1628 * Adopt the RCU callbacks from the specified rcu_state structure's
1626 * orphanage. The caller must hold the ->onofflock. 1629 * orphanage. The caller must hold the ->orphan_lock.
1627 */ 1630 */
1628static void rcu_adopt_orphan_cbs(struct rcu_state *rsp) 1631static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
1629{ 1632{
1630 int i; 1633 int i;
1631 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); 1634 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
1632 1635
1636 /* No-CBs CPUs are handled specially. */
1637 if (rcu_nocb_adopt_orphan_cbs(rsp, rdp))
1638 return;
1639
1633 /* Do the accounting first. */ 1640 /* Do the accounting first. */
1634 rdp->qlen_lazy += rsp->qlen_lazy; 1641 rdp->qlen_lazy += rsp->qlen_lazy;
1635 rdp->qlen += rsp->qlen; 1642 rdp->qlen += rsp->qlen;
@@ -1702,7 +1709,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
1702 1709
1703 /* Exclude any attempts to start a new grace period. */ 1710 /* Exclude any attempts to start a new grace period. */
1704 mutex_lock(&rsp->onoff_mutex); 1711 mutex_lock(&rsp->onoff_mutex);
1705 raw_spin_lock_irqsave(&rsp->onofflock, flags); 1712 raw_spin_lock_irqsave(&rsp->orphan_lock, flags);
1706 1713
1707 /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */ 1714 /* Orphan the dead CPU's callbacks, and adopt them if appropriate. */
1708 rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp); 1715 rcu_send_cbs_to_orphanage(cpu, rsp, rnp, rdp);
@@ -1729,10 +1736,10 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
1729 /* 1736 /*
1730 * We still hold the leaf rcu_node structure lock here, and 1737 * We still hold the leaf rcu_node structure lock here, and
1731 * irqs are still disabled. The reason for this subterfuge is 1738 * irqs are still disabled. The reason for this subterfuge is
1732 * because invoking rcu_report_unblock_qs_rnp() with ->onofflock 1739 * because invoking rcu_report_unblock_qs_rnp() with ->orphan_lock
1733 * held leads to deadlock. 1740 * held leads to deadlock.
1734 */ 1741 */
1735 raw_spin_unlock(&rsp->onofflock); /* irqs remain disabled. */ 1742 raw_spin_unlock(&rsp->orphan_lock); /* irqs remain disabled. */
1736 rnp = rdp->mynode; 1743 rnp = rdp->mynode;
1737 if (need_report & RCU_OFL_TASKS_NORM_GP) 1744 if (need_report & RCU_OFL_TASKS_NORM_GP)
1738 rcu_report_unblock_qs_rnp(rnp, flags); 1745 rcu_report_unblock_qs_rnp(rnp, flags);
@@ -1769,7 +1776,8 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
1769{ 1776{
1770 unsigned long flags; 1777 unsigned long flags;
1771 struct rcu_head *next, *list, **tail; 1778 struct rcu_head *next, *list, **tail;
1772 int bl, count, count_lazy, i; 1779 long bl, count, count_lazy;
1780 int i;
1773 1781
1774 /* If no callbacks are ready, just return.*/ 1782 /* If no callbacks are ready, just return.*/
1775 if (!cpu_has_callbacks_ready_to_invoke(rdp)) { 1783 if (!cpu_has_callbacks_ready_to_invoke(rdp)) {
@@ -2107,9 +2115,15 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
2107 } 2115 }
2108} 2116}
2109 2117
2118/*
2119 * Helper function for call_rcu() and friends. The cpu argument will
2120 * normally be -1, indicating "currently running CPU". It may specify
2121 * a CPU only if that CPU is a no-CBs CPU. Currently, only _rcu_barrier()
2122 * is expected to specify a CPU.
2123 */
2110static void 2124static void
2111__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), 2125__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
2112 struct rcu_state *rsp, bool lazy) 2126 struct rcu_state *rsp, int cpu, bool lazy)
2113{ 2127{
2114 unsigned long flags; 2128 unsigned long flags;
2115 struct rcu_data *rdp; 2129 struct rcu_data *rdp;
@@ -2129,9 +2143,14 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
2129 rdp = this_cpu_ptr(rsp->rda); 2143 rdp = this_cpu_ptr(rsp->rda);
2130 2144
2131 /* Add the callback to our list. */ 2145 /* Add the callback to our list. */
2132 if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL)) { 2146 if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL) || cpu != -1) {
2147 int offline;
2148
2149 if (cpu != -1)
2150 rdp = per_cpu_ptr(rsp->rda, cpu);
2151 offline = !__call_rcu_nocb(rdp, head, lazy);
2152 WARN_ON_ONCE(offline);
2133 /* _call_rcu() is illegal on offline CPU; leak the callback. */ 2153 /* _call_rcu() is illegal on offline CPU; leak the callback. */
2134 WARN_ON_ONCE(1);
2135 local_irq_restore(flags); 2154 local_irq_restore(flags);
2136 return; 2155 return;
2137 } 2156 }
@@ -2160,7 +2179,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
2160 */ 2179 */
2161void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 2180void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
2162{ 2181{
2163 __call_rcu(head, func, &rcu_sched_state, 0); 2182 __call_rcu(head, func, &rcu_sched_state, -1, 0);
2164} 2183}
2165EXPORT_SYMBOL_GPL(call_rcu_sched); 2184EXPORT_SYMBOL_GPL(call_rcu_sched);
2166 2185
@@ -2169,7 +2188,7 @@ EXPORT_SYMBOL_GPL(call_rcu_sched);
2169 */ 2188 */
2170void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 2189void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
2171{ 2190{
2172 __call_rcu(head, func, &rcu_bh_state, 0); 2191 __call_rcu(head, func, &rcu_bh_state, -1, 0);
2173} 2192}
2174EXPORT_SYMBOL_GPL(call_rcu_bh); 2193EXPORT_SYMBOL_GPL(call_rcu_bh);
2175 2194
@@ -2205,10 +2224,28 @@ static inline int rcu_blocking_is_gp(void)
2205 * rcu_read_lock_sched(). 2224 * rcu_read_lock_sched().
2206 * 2225 *
2207 * This means that all preempt_disable code sequences, including NMI and 2226 * This means that all preempt_disable code sequences, including NMI and
2208 * hardware-interrupt handlers, in progress on entry will have completed 2227 * non-threaded hardware-interrupt handlers, in progress on entry will
2209 * before this primitive returns. However, this does not guarantee that 2228 * have completed before this primitive returns. However, this does not
2210 * softirq handlers will have completed, since in some kernels, these 2229 * guarantee that softirq handlers will have completed, since in some
2211 * handlers can run in process context, and can block. 2230 * kernels, these handlers can run in process context, and can block.
2231 *
2232 * Note that this guarantee implies further memory-ordering guarantees.
2233 * On systems with more than one CPU, when synchronize_sched() returns,
2234 * each CPU is guaranteed to have executed a full memory barrier since the
2235 * end of its last RCU-sched read-side critical section whose beginning
2236 * preceded the call to synchronize_sched(). In addition, each CPU having
2237 * an RCU read-side critical section that extends beyond the return from
2238 * synchronize_sched() is guaranteed to have executed a full memory barrier
2239 * after the beginning of synchronize_sched() and before the beginning of
2240 * that RCU read-side critical section. Note that these guarantees include
2241 * CPUs that are offline, idle, or executing in user mode, as well as CPUs
2242 * that are executing in the kernel.
2243 *
2244 * Furthermore, if CPU A invoked synchronize_sched(), which returned
2245 * to its caller on CPU B, then both CPU A and CPU B are guaranteed
2246 * to have executed a full memory barrier during the execution of
2247 * synchronize_sched() -- even if CPU A and CPU B are the same CPU (but
2248 * again only if the system has more than one CPU).
2212 * 2249 *
2213 * This primitive provides the guarantees made by the (now removed) 2250 * This primitive provides the guarantees made by the (now removed)
2214 * synchronize_kernel() API. In contrast, synchronize_rcu() only 2251 * synchronize_kernel() API. In contrast, synchronize_rcu() only
@@ -2224,7 +2261,10 @@ void synchronize_sched(void)
2224 "Illegal synchronize_sched() in RCU-sched read-side critical section"); 2261 "Illegal synchronize_sched() in RCU-sched read-side critical section");
2225 if (rcu_blocking_is_gp()) 2262 if (rcu_blocking_is_gp())
2226 return; 2263 return;
2227 wait_rcu_gp(call_rcu_sched); 2264 if (rcu_expedited)
2265 synchronize_sched_expedited();
2266 else
2267 wait_rcu_gp(call_rcu_sched);
2228} 2268}
2229EXPORT_SYMBOL_GPL(synchronize_sched); 2269EXPORT_SYMBOL_GPL(synchronize_sched);
2230 2270
@@ -2236,6 +2276,9 @@ EXPORT_SYMBOL_GPL(synchronize_sched);
2236 * read-side critical sections have completed. RCU read-side critical 2276 * read-side critical sections have completed. RCU read-side critical
2237 * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(), 2277 * sections are delimited by rcu_read_lock_bh() and rcu_read_unlock_bh(),
2238 * and may be nested. 2278 * and may be nested.
2279 *
2280 * See the description of synchronize_sched() for more detailed information
2281 * on memory ordering guarantees.
2239 */ 2282 */
2240void synchronize_rcu_bh(void) 2283void synchronize_rcu_bh(void)
2241{ 2284{
@@ -2245,13 +2288,13 @@ void synchronize_rcu_bh(void)
2245 "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section"); 2288 "Illegal synchronize_rcu_bh() in RCU-bh read-side critical section");
2246 if (rcu_blocking_is_gp()) 2289 if (rcu_blocking_is_gp())
2247 return; 2290 return;
2248 wait_rcu_gp(call_rcu_bh); 2291 if (rcu_expedited)
2292 synchronize_rcu_bh_expedited();
2293 else
2294 wait_rcu_gp(call_rcu_bh);
2249} 2295}
2250EXPORT_SYMBOL_GPL(synchronize_rcu_bh); 2296EXPORT_SYMBOL_GPL(synchronize_rcu_bh);
2251 2297
2252static atomic_t sync_sched_expedited_started = ATOMIC_INIT(0);
2253static atomic_t sync_sched_expedited_done = ATOMIC_INIT(0);
2254
2255static int synchronize_sched_expedited_cpu_stop(void *data) 2298static int synchronize_sched_expedited_cpu_stop(void *data)
2256{ 2299{
2257 /* 2300 /*
@@ -2308,10 +2351,32 @@ static int synchronize_sched_expedited_cpu_stop(void *data)
2308 */ 2351 */
2309void synchronize_sched_expedited(void) 2352void synchronize_sched_expedited(void)
2310{ 2353{
2311 int firstsnap, s, snap, trycount = 0; 2354 long firstsnap, s, snap;
2355 int trycount = 0;
2356 struct rcu_state *rsp = &rcu_sched_state;
2357
2358 /*
2359 * If we are in danger of counter wrap, just do synchronize_sched().
2360 * By allowing sync_sched_expedited_started to advance no more than
2361 * ULONG_MAX/8 ahead of sync_sched_expedited_done, we are ensuring
2362 * that more than 3.5 billion CPUs would be required to force a
2363 * counter wrap on a 32-bit system. Quite a few more CPUs would of
2364 * course be required on a 64-bit system.
2365 */
2366 if (ULONG_CMP_GE((ulong)atomic_long_read(&rsp->expedited_start),
2367 (ulong)atomic_long_read(&rsp->expedited_done) +
2368 ULONG_MAX / 8)) {
2369 synchronize_sched();
2370 atomic_long_inc(&rsp->expedited_wrap);
2371 return;
2372 }
2312 2373
2313 /* Note that atomic_inc_return() implies full memory barrier. */ 2374 /*
2314 firstsnap = snap = atomic_inc_return(&sync_sched_expedited_started); 2375 * Take a ticket. Note that atomic_inc_return() implies a
2376 * full memory barrier.
2377 */
2378 snap = atomic_long_inc_return(&rsp->expedited_start);
2379 firstsnap = snap;
2315 get_online_cpus(); 2380 get_online_cpus();
2316 WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id())); 2381 WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id()));
2317 2382
@@ -2323,48 +2388,65 @@ void synchronize_sched_expedited(void)
2323 synchronize_sched_expedited_cpu_stop, 2388 synchronize_sched_expedited_cpu_stop,
2324 NULL) == -EAGAIN) { 2389 NULL) == -EAGAIN) {
2325 put_online_cpus(); 2390 put_online_cpus();
2391 atomic_long_inc(&rsp->expedited_tryfail);
2392
2393 /* Check to see if someone else did our work for us. */
2394 s = atomic_long_read(&rsp->expedited_done);
2395 if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
2396 /* ensure test happens before caller kfree */
2397 smp_mb__before_atomic_inc(); /* ^^^ */
2398 atomic_long_inc(&rsp->expedited_workdone1);
2399 return;
2400 }
2326 2401
2327 /* No joy, try again later. Or just synchronize_sched(). */ 2402 /* No joy, try again later. Or just synchronize_sched(). */
2328 if (trycount++ < 10) { 2403 if (trycount++ < 10) {
2329 udelay(trycount * num_online_cpus()); 2404 udelay(trycount * num_online_cpus());
2330 } else { 2405 } else {
2331 synchronize_sched(); 2406 wait_rcu_gp(call_rcu_sched);
2407 atomic_long_inc(&rsp->expedited_normal);
2332 return; 2408 return;
2333 } 2409 }
2334 2410
2335 /* Check to see if someone else did our work for us. */ 2411 /* Recheck to see if someone else did our work for us. */
2336 s = atomic_read(&sync_sched_expedited_done); 2412 s = atomic_long_read(&rsp->expedited_done);
2337 if (UINT_CMP_GE((unsigned)s, (unsigned)firstsnap)) { 2413 if (ULONG_CMP_GE((ulong)s, (ulong)firstsnap)) {
2338 smp_mb(); /* ensure test happens before caller kfree */ 2414 /* ensure test happens before caller kfree */
2415 smp_mb__before_atomic_inc(); /* ^^^ */
2416 atomic_long_inc(&rsp->expedited_workdone2);
2339 return; 2417 return;
2340 } 2418 }
2341 2419
2342 /* 2420 /*
2343 * Refetching sync_sched_expedited_started allows later 2421 * Refetching sync_sched_expedited_started allows later
2344 * callers to piggyback on our grace period. We subtract 2422 * callers to piggyback on our grace period. We retry
2345 * 1 to get the same token that the last incrementer got. 2423 * after they started, so our grace period works for them,
2346 * We retry after they started, so our grace period works 2424 * and they started after our first try, so their grace
2347 * for them, and they started after our first try, so their 2425 * period works for us.
2348 * grace period works for us.
2349 */ 2426 */
2350 get_online_cpus(); 2427 get_online_cpus();
2351 snap = atomic_read(&sync_sched_expedited_started); 2428 snap = atomic_long_read(&rsp->expedited_start);
2352 smp_mb(); /* ensure read is before try_stop_cpus(). */ 2429 smp_mb(); /* ensure read is before try_stop_cpus(). */
2353 } 2430 }
2431 atomic_long_inc(&rsp->expedited_stoppedcpus);
2354 2432
2355 /* 2433 /*
2356 * Everyone up to our most recent fetch is covered by our grace 2434 * Everyone up to our most recent fetch is covered by our grace
2357 * period. Update the counter, but only if our work is still 2435 * period. Update the counter, but only if our work is still
2358 * relevant -- which it won't be if someone who started later 2436 * relevant -- which it won't be if someone who started later
2359 * than we did beat us to the punch. 2437 * than we did already did their update.
2360 */ 2438 */
2361 do { 2439 do {
2362 s = atomic_read(&sync_sched_expedited_done); 2440 atomic_long_inc(&rsp->expedited_done_tries);
2363 if (UINT_CMP_GE((unsigned)s, (unsigned)snap)) { 2441 s = atomic_long_read(&rsp->expedited_done);
2364 smp_mb(); /* ensure test happens before caller kfree */ 2442 if (ULONG_CMP_GE((ulong)s, (ulong)snap)) {
2443 /* ensure test happens before caller kfree */
2444 smp_mb__before_atomic_inc(); /* ^^^ */
2445 atomic_long_inc(&rsp->expedited_done_lost);
2365 break; 2446 break;
2366 } 2447 }
2367 } while (atomic_cmpxchg(&sync_sched_expedited_done, s, snap) != s); 2448 } while (atomic_long_cmpxchg(&rsp->expedited_done, s, snap) != s);
2449 atomic_long_inc(&rsp->expedited_done_exit);
2368 2450
2369 put_online_cpus(); 2451 put_online_cpus();
2370} 2452}
@@ -2558,9 +2640,17 @@ static void _rcu_barrier(struct rcu_state *rsp)
2558 * When that callback is invoked, we will know that all of the 2640 * When that callback is invoked, we will know that all of the
2559 * corresponding CPU's preceding callbacks have been invoked. 2641 * corresponding CPU's preceding callbacks have been invoked.
2560 */ 2642 */
2561 for_each_online_cpu(cpu) { 2643 for_each_possible_cpu(cpu) {
2644 if (!cpu_online(cpu) && !is_nocb_cpu(cpu))
2645 continue;
2562 rdp = per_cpu_ptr(rsp->rda, cpu); 2646 rdp = per_cpu_ptr(rsp->rda, cpu);
2563 if (ACCESS_ONCE(rdp->qlen)) { 2647 if (is_nocb_cpu(cpu)) {
2648 _rcu_barrier_trace(rsp, "OnlineNoCB", cpu,
2649 rsp->n_barrier_done);
2650 atomic_inc(&rsp->barrier_cpu_count);
2651 __call_rcu(&rdp->barrier_head, rcu_barrier_callback,
2652 rsp, cpu, 0);
2653 } else if (ACCESS_ONCE(rdp->qlen)) {
2564 _rcu_barrier_trace(rsp, "OnlineQ", cpu, 2654 _rcu_barrier_trace(rsp, "OnlineQ", cpu,
2565 rsp->n_barrier_done); 2655 rsp->n_barrier_done);
2566 smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); 2656 smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
@@ -2634,6 +2724,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
2634#endif 2724#endif
2635 rdp->cpu = cpu; 2725 rdp->cpu = cpu;
2636 rdp->rsp = rsp; 2726 rdp->rsp = rsp;
2727 rcu_boot_init_nocb_percpu_data(rdp);
2637 raw_spin_unlock_irqrestore(&rnp->lock, flags); 2728 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2638} 2729}
2639 2730
@@ -2715,6 +2806,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2715 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); 2806 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
2716 struct rcu_node *rnp = rdp->mynode; 2807 struct rcu_node *rnp = rdp->mynode;
2717 struct rcu_state *rsp; 2808 struct rcu_state *rsp;
2809 int ret = NOTIFY_OK;
2718 2810
2719 trace_rcu_utilization("Start CPU hotplug"); 2811 trace_rcu_utilization("Start CPU hotplug");
2720 switch (action) { 2812 switch (action) {
@@ -2728,7 +2820,10 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2728 rcu_boost_kthread_setaffinity(rnp, -1); 2820 rcu_boost_kthread_setaffinity(rnp, -1);
2729 break; 2821 break;
2730 case CPU_DOWN_PREPARE: 2822 case CPU_DOWN_PREPARE:
2731 rcu_boost_kthread_setaffinity(rnp, cpu); 2823 if (nocb_cpu_expendable(cpu))
2824 rcu_boost_kthread_setaffinity(rnp, cpu);
2825 else
2826 ret = NOTIFY_BAD;
2732 break; 2827 break;
2733 case CPU_DYING: 2828 case CPU_DYING:
2734 case CPU_DYING_FROZEN: 2829 case CPU_DYING_FROZEN:
@@ -2752,7 +2847,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2752 break; 2847 break;
2753 } 2848 }
2754 trace_rcu_utilization("End CPU hotplug"); 2849 trace_rcu_utilization("End CPU hotplug");
2755 return NOTIFY_OK; 2850 return ret;
2756} 2851}
2757 2852
2758/* 2853/*
@@ -2772,6 +2867,7 @@ static int __init rcu_spawn_gp_kthread(void)
2772 raw_spin_lock_irqsave(&rnp->lock, flags); 2867 raw_spin_lock_irqsave(&rnp->lock, flags);
2773 rsp->gp_kthread = t; 2868 rsp->gp_kthread = t;
2774 raw_spin_unlock_irqrestore(&rnp->lock, flags); 2869 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2870 rcu_spawn_nocb_kthreads(rsp);
2775 } 2871 }
2776 return 0; 2872 return 0;
2777} 2873}
@@ -2967,6 +3063,7 @@ void __init rcu_init(void)
2967 rcu_init_one(&rcu_sched_state, &rcu_sched_data); 3063 rcu_init_one(&rcu_sched_state, &rcu_sched_data);
2968 rcu_init_one(&rcu_bh_state, &rcu_bh_data); 3064 rcu_init_one(&rcu_bh_state, &rcu_bh_data);
2969 __rcu_init_preempt(); 3065 __rcu_init_preempt();
3066 rcu_init_nocb();
2970 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); 3067 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
2971 3068
2972 /* 3069 /*