aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPaul E. McKenney <paul.mckenney@linaro.org>2012-08-20 00:35:53 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2012-11-16 13:05:56 -0500
commit3fbfbf7a3b66ec424042d909f14ba2ddf4372ea8 (patch)
treecc364c320a6e23927ecc154a8ef8021dc7d1a9e8 /kernel
parentaac1cda34b84a9411d6b8d18c3658f094c834911 (diff)
rcu: Add callback-free CPUs
RCU callback execution can add significant OS jitter and also can degrade both scheduling latency and, in asymmetric multiprocessors, energy efficiency. This commit therefore adds the ability for selected CPUs ("rcu_nocbs=" boot parameter) to have their callbacks offloaded to kthreads. If the "rcu_nocb_poll" boot parameter is also specified, these kthreads will do polling, removing the need for the offloaded CPUs to do wakeups. At least one CPU must be doing normal callback processing: currently CPU 0 cannot be selected as a no-CBs CPU. In addition, attempts to offline the last normal-CBs CPU will fail. This feature was inspired by Jim Houston's and Joe Korty's JRCU, and this commit includes fixes to problems located by Fengguang Wu's kbuild test robot. [ paulmck: Added gfp.h include file as suggested by Fengguang Wu. ] Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/rcutree.c63
-rw-r--r--kernel/rcutree.h47
-rw-r--r--kernel/rcutree_plugin.h397
-rw-r--r--kernel/rcutree_trace.c7
4 files changed, 498 insertions, 16 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 5ffadcc3bb26..7733eb56e156 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -303,7 +303,8 @@ EXPORT_SYMBOL_GPL(rcu_sched_force_quiescent_state);
303static int 303static int
304cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp) 304cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
305{ 305{
306 return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL]; 306 return &rdp->nxtlist != rdp->nxttail[RCU_DONE_TAIL] &&
307 rdp->nxttail[RCU_DONE_TAIL] != NULL;
307} 308}
308 309
309/* 310/*
@@ -312,8 +313,11 @@ cpu_has_callbacks_ready_to_invoke(struct rcu_data *rdp)
312static int 313static int
313cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp) 314cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
314{ 315{
315 return *rdp->nxttail[RCU_DONE_TAIL + 316 struct rcu_head **ntp;
316 (ACCESS_ONCE(rsp->completed) != rdp->completed)] && 317
318 ntp = rdp->nxttail[RCU_DONE_TAIL +
319 (ACCESS_ONCE(rsp->completed) != rdp->completed)];
320 return rdp->nxttail[RCU_DONE_TAIL] && ntp && *ntp &&
317 !rcu_gp_in_progress(rsp); 321 !rcu_gp_in_progress(rsp);
318} 322}
319 323
@@ -1123,6 +1127,7 @@ static void init_callback_list(struct rcu_data *rdp)
1123 rdp->nxtlist = NULL; 1127 rdp->nxtlist = NULL;
1124 for (i = 0; i < RCU_NEXT_SIZE; i++) 1128 for (i = 0; i < RCU_NEXT_SIZE; i++)
1125 rdp->nxttail[i] = &rdp->nxtlist; 1129 rdp->nxttail[i] = &rdp->nxtlist;
1130 init_nocb_callback_list(rdp);
1126} 1131}
1127 1132
1128/* 1133/*
@@ -1633,6 +1638,10 @@ static void
1633rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp, 1638rcu_send_cbs_to_orphanage(int cpu, struct rcu_state *rsp,
1634 struct rcu_node *rnp, struct rcu_data *rdp) 1639 struct rcu_node *rnp, struct rcu_data *rdp)
1635{ 1640{
1641 /* No-CBs CPUs do not have orphanable callbacks. */
1642 if (is_nocb_cpu(rdp->cpu))
1643 return;
1644
1636 /* 1645 /*
1637 * Orphan the callbacks. First adjust the counts. This is safe 1646 * Orphan the callbacks. First adjust the counts. This is safe
1638 * because _rcu_barrier() excludes CPU-hotplug operations, so it 1647 * because _rcu_barrier() excludes CPU-hotplug operations, so it
@@ -1684,6 +1693,10 @@ static void rcu_adopt_orphan_cbs(struct rcu_state *rsp)
1684 int i; 1693 int i;
1685 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda); 1694 struct rcu_data *rdp = __this_cpu_ptr(rsp->rda);
1686 1695
1696 /* No-CBs CPUs are handled specially. */
1697 if (rcu_nocb_adopt_orphan_cbs(rsp, rdp))
1698 return;
1699
1687 /* Do the accounting first. */ 1700 /* Do the accounting first. */
1688 rdp->qlen_lazy += rsp->qlen_lazy; 1701 rdp->qlen_lazy += rsp->qlen_lazy;
1689 rdp->qlen += rsp->qlen; 1702 rdp->qlen += rsp->qlen;
@@ -2162,9 +2175,15 @@ static void __call_rcu_core(struct rcu_state *rsp, struct rcu_data *rdp,
2162 } 2175 }
2163} 2176}
2164 2177
2178/*
2179 * Helper function for call_rcu() and friends. The cpu argument will
2180 * normally be -1, indicating "currently running CPU". It may specify
2181 * a CPU only if that CPU is a no-CBs CPU. Currently, only _rcu_barrier()
2182 * is expected to specify a CPU.
2183 */
2165static void 2184static void
2166__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), 2185__call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
2167 struct rcu_state *rsp, bool lazy) 2186 struct rcu_state *rsp, int cpu, bool lazy)
2168{ 2187{
2169 unsigned long flags; 2188 unsigned long flags;
2170 struct rcu_data *rdp; 2189 struct rcu_data *rdp;
@@ -2184,9 +2203,14 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
2184 rdp = this_cpu_ptr(rsp->rda); 2203 rdp = this_cpu_ptr(rsp->rda);
2185 2204
2186 /* Add the callback to our list. */ 2205 /* Add the callback to our list. */
2187 if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL)) { 2206 if (unlikely(rdp->nxttail[RCU_NEXT_TAIL] == NULL) || cpu != -1) {
2207 int offline;
2208
2209 if (cpu != -1)
2210 rdp = per_cpu_ptr(rsp->rda, cpu);
2211 offline = !__call_rcu_nocb(rdp, head, lazy);
2212 WARN_ON_ONCE(offline);
2188 /* _call_rcu() is illegal on offline CPU; leak the callback. */ 2213 /* _call_rcu() is illegal on offline CPU; leak the callback. */
2189 WARN_ON_ONCE(1);
2190 local_irq_restore(flags); 2214 local_irq_restore(flags);
2191 return; 2215 return;
2192 } 2216 }
@@ -2215,7 +2239,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
2215 */ 2239 */
2216void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 2240void call_rcu_sched(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
2217{ 2241{
2218 __call_rcu(head, func, &rcu_sched_state, 0); 2242 __call_rcu(head, func, &rcu_sched_state, -1, 0);
2219} 2243}
2220EXPORT_SYMBOL_GPL(call_rcu_sched); 2244EXPORT_SYMBOL_GPL(call_rcu_sched);
2221 2245
@@ -2224,7 +2248,7 @@ EXPORT_SYMBOL_GPL(call_rcu_sched);
2224 */ 2248 */
2225void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 2249void call_rcu_bh(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
2226{ 2250{
2227 __call_rcu(head, func, &rcu_bh_state, 0); 2251 __call_rcu(head, func, &rcu_bh_state, -1, 0);
2228} 2252}
2229EXPORT_SYMBOL_GPL(call_rcu_bh); 2253EXPORT_SYMBOL_GPL(call_rcu_bh);
2230 2254
@@ -2676,9 +2700,17 @@ static void _rcu_barrier(struct rcu_state *rsp)
2676 * When that callback is invoked, we will know that all of the 2700 * When that callback is invoked, we will know that all of the
2677 * corresponding CPU's preceding callbacks have been invoked. 2701 * corresponding CPU's preceding callbacks have been invoked.
2678 */ 2702 */
2679 for_each_online_cpu(cpu) { 2703 for_each_possible_cpu(cpu) {
2704 if (!cpu_online(cpu) && !is_nocb_cpu(cpu))
2705 continue;
2680 rdp = per_cpu_ptr(rsp->rda, cpu); 2706 rdp = per_cpu_ptr(rsp->rda, cpu);
2681 if (ACCESS_ONCE(rdp->qlen)) { 2707 if (is_nocb_cpu(cpu)) {
2708 _rcu_barrier_trace(rsp, "OnlineNoCB", cpu,
2709 rsp->n_barrier_done);
2710 atomic_inc(&rsp->barrier_cpu_count);
2711 __call_rcu(&rdp->barrier_head, rcu_barrier_callback,
2712 rsp, cpu, 0);
2713 } else if (ACCESS_ONCE(rdp->qlen)) {
2682 _rcu_barrier_trace(rsp, "OnlineQ", cpu, 2714 _rcu_barrier_trace(rsp, "OnlineQ", cpu,
2683 rsp->n_barrier_done); 2715 rsp->n_barrier_done);
2684 smp_call_function_single(cpu, rcu_barrier_func, rsp, 1); 2716 smp_call_function_single(cpu, rcu_barrier_func, rsp, 1);
@@ -2752,6 +2784,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
2752#endif 2784#endif
2753 rdp->cpu = cpu; 2785 rdp->cpu = cpu;
2754 rdp->rsp = rsp; 2786 rdp->rsp = rsp;
2787 rcu_boot_init_nocb_percpu_data(rdp);
2755 raw_spin_unlock_irqrestore(&rnp->lock, flags); 2788 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2756} 2789}
2757 2790
@@ -2833,6 +2866,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2833 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); 2866 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
2834 struct rcu_node *rnp = rdp->mynode; 2867 struct rcu_node *rnp = rdp->mynode;
2835 struct rcu_state *rsp; 2868 struct rcu_state *rsp;
2869 int ret = NOTIFY_OK;
2836 2870
2837 trace_rcu_utilization("Start CPU hotplug"); 2871 trace_rcu_utilization("Start CPU hotplug");
2838 switch (action) { 2872 switch (action) {
@@ -2846,7 +2880,10 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2846 rcu_boost_kthread_setaffinity(rnp, -1); 2880 rcu_boost_kthread_setaffinity(rnp, -1);
2847 break; 2881 break;
2848 case CPU_DOWN_PREPARE: 2882 case CPU_DOWN_PREPARE:
2849 rcu_boost_kthread_setaffinity(rnp, cpu); 2883 if (nocb_cpu_expendable(cpu))
2884 rcu_boost_kthread_setaffinity(rnp, cpu);
2885 else
2886 ret = NOTIFY_BAD;
2850 break; 2887 break;
2851 case CPU_DYING: 2888 case CPU_DYING:
2852 case CPU_DYING_FROZEN: 2889 case CPU_DYING_FROZEN:
@@ -2870,7 +2907,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2870 break; 2907 break;
2871 } 2908 }
2872 trace_rcu_utilization("End CPU hotplug"); 2909 trace_rcu_utilization("End CPU hotplug");
2873 return NOTIFY_OK; 2910 return ret;
2874} 2911}
2875 2912
2876/* 2913/*
@@ -2890,6 +2927,7 @@ static int __init rcu_spawn_gp_kthread(void)
2890 raw_spin_lock_irqsave(&rnp->lock, flags); 2927 raw_spin_lock_irqsave(&rnp->lock, flags);
2891 rsp->gp_kthread = t; 2928 rsp->gp_kthread = t;
2892 raw_spin_unlock_irqrestore(&rnp->lock, flags); 2929 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2930 rcu_spawn_nocb_kthreads(rsp);
2893 } 2931 }
2894 return 0; 2932 return 0;
2895} 2933}
@@ -3085,6 +3123,7 @@ void __init rcu_init(void)
3085 rcu_init_one(&rcu_sched_state, &rcu_sched_data); 3123 rcu_init_one(&rcu_sched_state, &rcu_sched_data);
3086 rcu_init_one(&rcu_bh_state, &rcu_bh_data); 3124 rcu_init_one(&rcu_bh_state, &rcu_bh_data);
3087 __rcu_init_preempt(); 3125 __rcu_init_preempt();
3126 rcu_init_nocb();
3088 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); 3127 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
3089 3128
3090 /* 3129 /*
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index d274af357210..488f2ec6b663 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -317,6 +317,18 @@ struct rcu_data {
317 struct rcu_head oom_head; 317 struct rcu_head oom_head;
318#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ 318#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
319 319
320 /* 7) Callback offloading. */
321#ifdef CONFIG_RCU_NOCB_CPU
322 struct rcu_head *nocb_head; /* CBs waiting for kthread. */
323 struct rcu_head **nocb_tail;
324 atomic_long_t nocb_q_count; /* # CBs waiting for kthread */
325 atomic_long_t nocb_q_count_lazy; /* (approximate). */
326 int nocb_p_count; /* # CBs being invoked by kthread */
327 int nocb_p_count_lazy; /* (approximate). */
328 wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */
329 struct task_struct *nocb_kthread;
330#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
331
320 int cpu; 332 int cpu;
321 struct rcu_state *rsp; 333 struct rcu_state *rsp;
322}; 334};
@@ -369,6 +381,12 @@ struct rcu_state {
369 struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ 381 struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */
370 void (*call)(struct rcu_head *head, /* call_rcu() flavor. */ 382 void (*call)(struct rcu_head *head, /* call_rcu() flavor. */
371 void (*func)(struct rcu_head *head)); 383 void (*func)(struct rcu_head *head));
384#ifdef CONFIG_RCU_NOCB_CPU
385 void (*call_remote)(struct rcu_head *head,
386 void (*func)(struct rcu_head *head));
387 /* call_rcu() flavor, but for */
388 /* placing on remote CPU. */
389#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
372 390
373 /* The following fields are guarded by the root rcu_node's lock. */ 391 /* The following fields are guarded by the root rcu_node's lock. */
374 392
@@ -439,6 +457,8 @@ struct rcu_state {
439#define RCU_GP_FLAG_FQS 0x2 /* Need grace-period quiescent-state forcing. */ 457#define RCU_GP_FLAG_FQS 0x2 /* Need grace-period quiescent-state forcing. */
440 458
441extern struct list_head rcu_struct_flavors; 459extern struct list_head rcu_struct_flavors;
460
461/* Sequence through rcu_state structures for each RCU flavor. */
442#define for_each_rcu_flavor(rsp) \ 462#define for_each_rcu_flavor(rsp) \
443 list_for_each_entry((rsp), &rcu_struct_flavors, flavors) 463 list_for_each_entry((rsp), &rcu_struct_flavors, flavors)
444 464
@@ -515,5 +535,32 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu);
515static void print_cpu_stall_info_end(void); 535static void print_cpu_stall_info_end(void);
516static void zero_cpu_stall_ticks(struct rcu_data *rdp); 536static void zero_cpu_stall_ticks(struct rcu_data *rdp);
517static void increment_cpu_stall_ticks(void); 537static void increment_cpu_stall_ticks(void);
538static bool is_nocb_cpu(int cpu);
539static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
540 bool lazy);
541static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
542 struct rcu_data *rdp);
543static bool nocb_cpu_expendable(int cpu);
544static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
545static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp);
546static void init_nocb_callback_list(struct rcu_data *rdp);
547static void __init rcu_init_nocb(void);
518 548
519#endif /* #ifndef RCU_TREE_NONCORE */ 549#endif /* #ifndef RCU_TREE_NONCORE */
550
551#ifdef CONFIG_RCU_TRACE
552#ifdef CONFIG_RCU_NOCB_CPU
553/* Sum up queue lengths for tracing. */
554static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
555{
556 *ql = atomic_long_read(&rdp->nocb_q_count) + rdp->nocb_p_count;
557 *qll = atomic_long_read(&rdp->nocb_q_count_lazy) + rdp->nocb_p_count_lazy;
558}
559#else /* #ifdef CONFIG_RCU_NOCB_CPU */
560static inline void rcu_nocb_q_lengths(struct rcu_data *rdp, long *ql, long *qll)
561{
562 *ql = 0;
563 *qll = 0;
564}
565#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
566#endif /* #ifdef CONFIG_RCU_TRACE */
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 5ce3352505e9..6cdc372de34c 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -25,6 +25,7 @@
25 */ 25 */
26 26
27#include <linux/delay.h> 27#include <linux/delay.h>
28#include <linux/gfp.h>
28#include <linux/oom.h> 29#include <linux/oom.h>
29#include <linux/smpboot.h> 30#include <linux/smpboot.h>
30 31
@@ -36,6 +37,14 @@
36#define RCU_BOOST_PRIO RCU_KTHREAD_PRIO 37#define RCU_BOOST_PRIO RCU_KTHREAD_PRIO
37#endif 38#endif
38 39
40#ifdef CONFIG_RCU_NOCB_CPU
41static cpumask_var_t rcu_nocb_mask; /* CPUs to have callbacks offloaded. */
42static bool have_rcu_nocb_mask; /* Was rcu_nocb_mask allocated? */
43static bool rcu_nocb_poll; /* Offload kthread are to poll. */
44module_param(rcu_nocb_poll, bool, 0444);
45static char __initdata nocb_buf[NR_CPUS * 5];
46#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
47
39/* 48/*
40 * Check the RCU kernel configuration parameters and print informative 49 * Check the RCU kernel configuration parameters and print informative
41 * messages about anything out of the ordinary. If you like #ifdef, you 50 * messages about anything out of the ordinary. If you like #ifdef, you
@@ -76,6 +85,18 @@ static void __init rcu_bootup_announce_oddness(void)
76 printk(KERN_INFO "\tExperimental boot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf); 85 printk(KERN_INFO "\tExperimental boot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
77 if (nr_cpu_ids != NR_CPUS) 86 if (nr_cpu_ids != NR_CPUS)
78 printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); 87 printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
88#ifdef CONFIG_RCU_NOCB_CPU
89 if (have_rcu_nocb_mask) {
90 if (cpumask_test_cpu(0, rcu_nocb_mask)) {
91 cpumask_clear_cpu(0, rcu_nocb_mask);
92 pr_info("\tCPU 0: illegal no-CBs CPU (cleared).\n");
93 }
94 cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);
95 pr_info("\tExperimental no-CBs CPUs: %s.\n", nocb_buf);
96 if (rcu_nocb_poll)
97 pr_info("\tExperimental polled no-CBs CPUs.\n");
98 }
99#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
79} 100}
80 101
81#ifdef CONFIG_TREE_PREEMPT_RCU 102#ifdef CONFIG_TREE_PREEMPT_RCU
@@ -642,7 +663,7 @@ static void rcu_preempt_do_callbacks(void)
642 */ 663 */
643void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)) 664void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu))
644{ 665{
645 __call_rcu(head, func, &rcu_preempt_state, 0); 666 __call_rcu(head, func, &rcu_preempt_state, -1, 0);
646} 667}
647EXPORT_SYMBOL_GPL(call_rcu); 668EXPORT_SYMBOL_GPL(call_rcu);
648 669
@@ -656,7 +677,7 @@ EXPORT_SYMBOL_GPL(call_rcu);
656void kfree_call_rcu(struct rcu_head *head, 677void kfree_call_rcu(struct rcu_head *head,
657 void (*func)(struct rcu_head *rcu)) 678 void (*func)(struct rcu_head *rcu))
658{ 679{
659 __call_rcu(head, func, &rcu_preempt_state, 1); 680 __call_rcu(head, func, &rcu_preempt_state, -1, 1);
660} 681}
661EXPORT_SYMBOL_GPL(kfree_call_rcu); 682EXPORT_SYMBOL_GPL(kfree_call_rcu);
662 683
@@ -1025,7 +1046,7 @@ static void rcu_preempt_check_callbacks(int cpu)
1025void kfree_call_rcu(struct rcu_head *head, 1046void kfree_call_rcu(struct rcu_head *head,
1026 void (*func)(struct rcu_head *rcu)) 1047 void (*func)(struct rcu_head *rcu))
1027{ 1048{
1028 __call_rcu(head, func, &rcu_sched_state, 1); 1049 __call_rcu(head, func, &rcu_sched_state, -1, 1);
1029} 1050}
1030EXPORT_SYMBOL_GPL(kfree_call_rcu); 1051EXPORT_SYMBOL_GPL(kfree_call_rcu);
1031 1052
@@ -2104,3 +2125,373 @@ static void increment_cpu_stall_ticks(void)
2104} 2125}
2105 2126
2106#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */ 2127#endif /* #else #ifdef CONFIG_RCU_CPU_STALL_INFO */
2128
2129#ifdef CONFIG_RCU_NOCB_CPU
2130
2131/*
2132 * Offload callback processing from the boot-time-specified set of CPUs
2133 * specified by rcu_nocb_mask. For each CPU in the set, there is a
2134 * kthread created that pulls the callbacks from the corresponding CPU,
2135 * waits for a grace period to elapse, and invokes the callbacks.
2136 * The no-CBs CPUs do a wake_up() on their kthread when they insert
2137 * a callback into any empty list, unless the rcu_nocb_poll boot parameter
2138 * has been specified, in which case each kthread actively polls its
2139 * CPU. (Which isn't so great for energy efficiency, but which does
2140 * reduce RCU's overhead on that CPU.)
2141 *
2142 * This is intended to be used in conjunction with Frederic Weisbecker's
2143 * adaptive-idle work, which would seriously reduce OS jitter on CPUs
2144 * running CPU-bound user-mode computations.
2145 *
2146 * Offloading of callback processing could also in theory be used as
2147 * an energy-efficiency measure because CPUs with no RCU callbacks
2148 * queued are more aggressive about entering dyntick-idle mode.
2149 */
2150
2151
2152/* Parse the boot-time rcu_nocb_mask CPU list from the kernel parameters. */
2153static int __init rcu_nocb_setup(char *str)
2154{
2155 alloc_bootmem_cpumask_var(&rcu_nocb_mask);
2156 have_rcu_nocb_mask = true;
2157 cpulist_parse(str, rcu_nocb_mask);
2158 return 1;
2159}
2160__setup("rcu_nocbs=", rcu_nocb_setup);
2161
2162/* Is the specified CPU a no-CPUs CPU? */
2163static bool is_nocb_cpu(int cpu)
2164{
2165 if (have_rcu_nocb_mask)
2166 return cpumask_test_cpu(cpu, rcu_nocb_mask);
2167 return false;
2168}
2169
2170/*
2171 * Enqueue the specified string of rcu_head structures onto the specified
2172 * CPU's no-CBs lists. The CPU is specified by rdp, the head of the
2173 * string by rhp, and the tail of the string by rhtp. The non-lazy/lazy
2174 * counts are supplied by rhcount and rhcount_lazy.
2175 *
2176 * If warranted, also wake up the kthread servicing this CPUs queues.
2177 */
2178static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
2179 struct rcu_head *rhp,
2180 struct rcu_head **rhtp,
2181 int rhcount, int rhcount_lazy)
2182{
2183 int len;
2184 struct rcu_head **old_rhpp;
2185 struct task_struct *t;
2186
2187 /* Enqueue the callback on the nocb list and update counts. */
2188 old_rhpp = xchg(&rdp->nocb_tail, rhtp);
2189 ACCESS_ONCE(*old_rhpp) = rhp;
2190 atomic_long_add(rhcount, &rdp->nocb_q_count);
2191 atomic_long_add(rhcount_lazy, &rdp->nocb_q_count_lazy);
2192
2193 /* If we are not being polled and there is a kthread, awaken it ... */
2194 t = ACCESS_ONCE(rdp->nocb_kthread);
2195 if (rcu_nocb_poll | !t)
2196 return;
2197 len = atomic_long_read(&rdp->nocb_q_count);
2198 if (old_rhpp == &rdp->nocb_head) {
2199 wake_up(&rdp->nocb_wq); /* ... only if queue was empty ... */
2200 rdp->qlen_last_fqs_check = 0;
2201 } else if (len > rdp->qlen_last_fqs_check + qhimark) {
2202 wake_up_process(t); /* ... or if many callbacks queued. */
2203 rdp->qlen_last_fqs_check = LONG_MAX / 2;
2204 }
2205 return;
2206}
2207
2208/*
2209 * This is a helper for __call_rcu(), which invokes this when the normal
2210 * callback queue is inoperable. If this is not a no-CBs CPU, this
2211 * function returns failure back to __call_rcu(), which can complain
2212 * appropriately.
2213 *
2214 * Otherwise, this function queues the callback where the corresponding
2215 * "rcuo" kthread can find it.
2216 */
2217static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
2218 bool lazy)
2219{
2220
2221 if (!is_nocb_cpu(rdp->cpu))
2222 return 0;
2223 __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy);
2224 return 1;
2225}
2226
2227/*
2228 * Adopt orphaned callbacks on a no-CBs CPU, or return 0 if this is
2229 * not a no-CBs CPU.
2230 */
2231static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
2232 struct rcu_data *rdp)
2233{
2234 long ql = rsp->qlen;
2235 long qll = rsp->qlen_lazy;
2236
2237 /* If this is not a no-CBs CPU, tell the caller to do it the old way. */
2238 if (!is_nocb_cpu(smp_processor_id()))
2239 return 0;
2240 rsp->qlen = 0;
2241 rsp->qlen_lazy = 0;
2242
2243 /* First, enqueue the donelist, if any. This preserves CB ordering. */
2244 if (rsp->orphan_donelist != NULL) {
2245 __call_rcu_nocb_enqueue(rdp, rsp->orphan_donelist,
2246 rsp->orphan_donetail, ql, qll);
2247 ql = qll = 0;
2248 rsp->orphan_donelist = NULL;
2249 rsp->orphan_donetail = &rsp->orphan_donelist;
2250 }
2251 if (rsp->orphan_nxtlist != NULL) {
2252 __call_rcu_nocb_enqueue(rdp, rsp->orphan_nxtlist,
2253 rsp->orphan_nxttail, ql, qll);
2254 ql = qll = 0;
2255 rsp->orphan_nxtlist = NULL;
2256 rsp->orphan_nxttail = &rsp->orphan_nxtlist;
2257 }
2258 return 1;
2259}
2260
2261/*
2262 * There must be at least one non-no-CBs CPU in operation at any given
2263 * time, because no-CBs CPUs are not capable of initiating grace periods
2264 * independently. This function therefore complains if the specified
2265 * CPU is the last non-no-CBs CPU, allowing the CPU-hotplug system to
2266 * avoid offlining the last such CPU. (Recursion is a wonderful thing,
2267 * but you have to have a base case!)
2268 */
2269static bool nocb_cpu_expendable(int cpu)
2270{
2271 cpumask_var_t non_nocb_cpus;
2272 int ret;
2273
2274 /*
2275 * If there are no no-CB CPUs or if this CPU is not a no-CB CPU,
2276 * then offlining this CPU is harmless. Let it happen.
2277 */
2278 if (!have_rcu_nocb_mask || is_nocb_cpu(cpu))
2279 return 1;
2280
2281 /* If no memory, play it safe and keep the CPU around. */
2282 if (!alloc_cpumask_var(&non_nocb_cpus, GFP_NOIO))
2283 return 0;
2284 cpumask_andnot(non_nocb_cpus, cpu_online_mask, rcu_nocb_mask);
2285 cpumask_clear_cpu(cpu, non_nocb_cpus);
2286 ret = !cpumask_empty(non_nocb_cpus);
2287 free_cpumask_var(non_nocb_cpus);
2288 return ret;
2289}
2290
2291/*
2292 * Helper structure for remote registry of RCU callbacks.
2293 * This is needed for when a no-CBs CPU needs to start a grace period.
2294 * If it just invokes call_rcu(), the resulting callback will be queued,
2295 * which can result in deadlock.
2296 */
2297struct rcu_head_remote {
2298 struct rcu_head *rhp;
2299 call_rcu_func_t *crf;
2300 void (*func)(struct rcu_head *rhp);
2301};
2302
2303/*
2304 * Register a callback as specified by the rcu_head_remote struct.
2305 * This function is intended to be invoked via smp_call_function_single().
2306 */
2307static void call_rcu_local(void *arg)
2308{
2309 struct rcu_head_remote *rhrp =
2310 container_of(arg, struct rcu_head_remote, rhp);
2311
2312 rhrp->crf(rhrp->rhp, rhrp->func);
2313}
2314
2315/*
2316 * Set up an rcu_head_remote structure and the invoke call_rcu_local()
2317 * on CPU 0 (which is guaranteed to be a non-no-CBs CPU) via
2318 * smp_call_function_single().
2319 */
2320static void invoke_crf_remote(struct rcu_head *rhp,
2321 void (*func)(struct rcu_head *rhp),
2322 call_rcu_func_t crf)
2323{
2324 struct rcu_head_remote rhr;
2325
2326 rhr.rhp = rhp;
2327 rhr.crf = crf;
2328 rhr.func = func;
2329 smp_call_function_single(0, call_rcu_local, &rhr, 1);
2330}
2331
2332/*
2333 * Helper functions to be passed to wait_rcu_gp(), each of which
2334 * invokes invoke_crf_remote() to register a callback appropriately.
2335 */
2336static void __maybe_unused
2337call_rcu_preempt_remote(struct rcu_head *rhp,
2338 void (*func)(struct rcu_head *rhp))
2339{
2340 invoke_crf_remote(rhp, func, call_rcu);
2341}
2342static void call_rcu_bh_remote(struct rcu_head *rhp,
2343 void (*func)(struct rcu_head *rhp))
2344{
2345 invoke_crf_remote(rhp, func, call_rcu_bh);
2346}
2347static void call_rcu_sched_remote(struct rcu_head *rhp,
2348 void (*func)(struct rcu_head *rhp))
2349{
2350 invoke_crf_remote(rhp, func, call_rcu_sched);
2351}
2352
2353/*
2354 * Per-rcu_data kthread, but only for no-CBs CPUs. Each kthread invokes
2355 * callbacks queued by the corresponding no-CBs CPU.
2356 */
2357static int rcu_nocb_kthread(void *arg)
2358{
2359 int c, cl;
2360 struct rcu_head *list;
2361 struct rcu_head *next;
2362 struct rcu_head **tail;
2363 struct rcu_data *rdp = arg;
2364
2365 /* Each pass through this loop invokes one batch of callbacks */
2366 for (;;) {
2367 /* If not polling, wait for next batch of callbacks. */
2368 if (!rcu_nocb_poll)
2369 wait_event(rdp->nocb_wq, rdp->nocb_head);
2370 list = ACCESS_ONCE(rdp->nocb_head);
2371 if (!list) {
2372 schedule_timeout_interruptible(1);
2373 continue;
2374 }
2375
2376 /*
2377 * Extract queued callbacks, update counts, and wait
2378 * for a grace period to elapse.
2379 */
2380 ACCESS_ONCE(rdp->nocb_head) = NULL;
2381 tail = xchg(&rdp->nocb_tail, &rdp->nocb_head);
2382 c = atomic_long_xchg(&rdp->nocb_q_count, 0);
2383 cl = atomic_long_xchg(&rdp->nocb_q_count_lazy, 0);
2384 ACCESS_ONCE(rdp->nocb_p_count) += c;
2385 ACCESS_ONCE(rdp->nocb_p_count_lazy) += cl;
2386 wait_rcu_gp(rdp->rsp->call_remote);
2387
2388 /* Each pass through the following loop invokes a callback. */
2389 trace_rcu_batch_start(rdp->rsp->name, cl, c, -1);
2390 c = cl = 0;
2391 while (list) {
2392 next = list->next;
2393 /* Wait for enqueuing to complete, if needed. */
2394 while (next == NULL && &list->next != tail) {
2395 schedule_timeout_interruptible(1);
2396 next = list->next;
2397 }
2398 debug_rcu_head_unqueue(list);
2399 local_bh_disable();
2400 if (__rcu_reclaim(rdp->rsp->name, list))
2401 cl++;
2402 c++;
2403 local_bh_enable();
2404 list = next;
2405 }
2406 trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1);
2407 ACCESS_ONCE(rdp->nocb_p_count) -= c;
2408 ACCESS_ONCE(rdp->nocb_p_count_lazy) -= cl;
2409 rdp->n_cbs_invoked += c;
2410 }
2411 return 0;
2412}
2413
2414/* Initialize per-rcu_data variables for no-CBs CPUs. */
2415static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
2416{
2417 rdp->nocb_tail = &rdp->nocb_head;
2418 init_waitqueue_head(&rdp->nocb_wq);
2419}
2420
2421/* Create a kthread for each RCU flavor for each no-CBs CPU. */
2422static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
2423{
2424 int cpu;
2425 struct rcu_data *rdp;
2426 struct task_struct *t;
2427
2428 if (rcu_nocb_mask == NULL)
2429 return;
2430 for_each_cpu(cpu, rcu_nocb_mask) {
2431 rdp = per_cpu_ptr(rsp->rda, cpu);
2432 t = kthread_run(rcu_nocb_kthread, rdp, "rcuo%d", cpu);
2433 BUG_ON(IS_ERR(t));
2434 ACCESS_ONCE(rdp->nocb_kthread) = t;
2435 }
2436}
2437
2438/* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */
2439static void init_nocb_callback_list(struct rcu_data *rdp)
2440{
2441 if (rcu_nocb_mask == NULL ||
2442 !cpumask_test_cpu(rdp->cpu, rcu_nocb_mask))
2443 return;
2444 rdp->nxttail[RCU_NEXT_TAIL] = NULL;
2445}
2446
2447/* Initialize the ->call_remote fields in the rcu_state structures. */
2448static void __init rcu_init_nocb(void)
2449{
2450#ifdef CONFIG_PREEMPT_RCU
2451 rcu_preempt_state.call_remote = call_rcu_preempt_remote;
2452#endif /* #ifdef CONFIG_PREEMPT_RCU */
2453 rcu_bh_state.call_remote = call_rcu_bh_remote;
2454 rcu_sched_state.call_remote = call_rcu_sched_remote;
2455}
2456
2457#else /* #ifdef CONFIG_RCU_NOCB_CPU */
2458
2459static bool is_nocb_cpu(int cpu)
2460{
2461 return false;
2462}
2463
2464static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
2465 bool lazy)
2466{
2467 return 0;
2468}
2469
2470static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
2471 struct rcu_data *rdp)
2472{
2473 return 0;
2474}
2475
2476static bool nocb_cpu_expendable(int cpu)
2477{
2478 return 1;
2479}
2480
2481static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
2482{
2483}
2484
2485static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
2486{
2487}
2488
2489static void init_nocb_callback_list(struct rcu_data *rdp)
2490{
2491}
2492
2493static void __init rcu_init_nocb(void)
2494{
2495}
2496
2497#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index f9512687a6e5..3189f9aa3e84 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -113,6 +113,8 @@ static char convert_kthread_status(unsigned int kthread_status)
113 113
114static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp) 114static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
115{ 115{
116 long ql, qll;
117
116 if (!rdp->beenonline) 118 if (!rdp->beenonline)
117 return; 119 return;
118 seq_printf(m, "%3d%cc=%ld g=%ld pq=%d qp=%d", 120 seq_printf(m, "%3d%cc=%ld g=%ld pq=%d qp=%d",
@@ -126,8 +128,11 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
126 rdp->dynticks->dynticks_nmi_nesting, 128 rdp->dynticks->dynticks_nmi_nesting,
127 rdp->dynticks_fqs); 129 rdp->dynticks_fqs);
128 seq_printf(m, " of=%lu", rdp->offline_fqs); 130 seq_printf(m, " of=%lu", rdp->offline_fqs);
131 rcu_nocb_q_lengths(rdp, &ql, &qll);
132 qll += rdp->qlen_lazy;
133 ql += rdp->qlen;
129 seq_printf(m, " ql=%ld/%ld qs=%c%c%c%c", 134 seq_printf(m, " ql=%ld/%ld qs=%c%c%c%c",
130 rdp->qlen_lazy, rdp->qlen, 135 qll, ql,
131 ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] != 136 ".N"[rdp->nxttail[RCU_NEXT_READY_TAIL] !=
132 rdp->nxttail[RCU_NEXT_TAIL]], 137 rdp->nxttail[RCU_NEXT_TAIL]],
133 ".R"[rdp->nxttail[RCU_WAIT_TAIL] != 138 ".R"[rdp->nxttail[RCU_WAIT_TAIL] !=