aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPaul E. McKenney <paul.mckenney@linaro.org>2013-01-07 16:37:42 -0500
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2013-03-12 14:17:51 -0400
commit34ed62461ae4970695974afb9a60ac3df0086830 (patch)
treed65a582953a66ee089074fd5a57e39460d4ce2c6 /kernel
parentf6161aa153581da4a3867a2d1a7caf4be19b6ec9 (diff)
rcu: Remove restrictions on no-CBs CPUs
Currently, CPU 0 is constrained to not be a no-CBs CPU, and furthermore at least one no-CBs CPU must remain online at any given time. These restrictions are problematic in some situations, such as cases where all CPUs must run a real-time workload that needs to be insulated from OS jitter and latencies due to RCU callback invocation. This commit therefore provides no-CBs CPUs a (very crude and energy-inefficient) way to start and to wait for grace periods independently of the normal RCU callback mechanisms. This approach allows any or all of the CPUs to be designated as no-CBs CPUs, and allows any proper subset of the CPUs (whether no-CBs CPUs or not) to be offlined. This commit also provides a fix for a locking bug spotted by Xie ChanglongX <changlongx.xie@intel.com>. Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/rcutree.c14
-rw-r--r--kernel/rcutree.h12
-rw-r--r--kernel/rcutree_plugin.h156
3 files changed, 55 insertions, 127 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 5b8ad827fd86..6ad0716e65dc 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -310,6 +310,8 @@ cpu_needs_another_gp(struct rcu_state *rsp, struct rcu_data *rdp)
310 310
311 if (rcu_gp_in_progress(rsp)) 311 if (rcu_gp_in_progress(rsp))
312 return 0; /* No, a grace period is already in progress. */ 312 return 0; /* No, a grace period is already in progress. */
313 if (rcu_nocb_needs_gp(rdp))
314 return 1; /* Yes, a no-CBs CPU needs one. */
313 if (!rdp->nxttail[RCU_NEXT_TAIL]) 315 if (!rdp->nxttail[RCU_NEXT_TAIL])
314 return 0; /* No, this is a no-CBs (or offline) CPU. */ 316 return 0; /* No, this is a no-CBs (or offline) CPU. */
315 if (*rdp->nxttail[RCU_NEXT_READY_TAIL]) 317 if (*rdp->nxttail[RCU_NEXT_READY_TAIL])
@@ -1035,10 +1037,11 @@ static void init_callback_list(struct rcu_data *rdp)
1035{ 1037{
1036 int i; 1038 int i;
1037 1039
1040 if (init_nocb_callback_list(rdp))
1041 return;
1038 rdp->nxtlist = NULL; 1042 rdp->nxtlist = NULL;
1039 for (i = 0; i < RCU_NEXT_SIZE; i++) 1043 for (i = 0; i < RCU_NEXT_SIZE; i++)
1040 rdp->nxttail[i] = &rdp->nxtlist; 1044 rdp->nxttail[i] = &rdp->nxtlist;
1041 init_nocb_callback_list(rdp);
1042} 1045}
1043 1046
1044/* 1047/*
@@ -2909,7 +2912,6 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2909 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); 2912 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
2910 struct rcu_node *rnp = rdp->mynode; 2913 struct rcu_node *rnp = rdp->mynode;
2911 struct rcu_state *rsp; 2914 struct rcu_state *rsp;
2912 int ret = NOTIFY_OK;
2913 2915
2914 trace_rcu_utilization("Start CPU hotplug"); 2916 trace_rcu_utilization("Start CPU hotplug");
2915 switch (action) { 2917 switch (action) {
@@ -2923,10 +2925,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2923 rcu_boost_kthread_setaffinity(rnp, -1); 2925 rcu_boost_kthread_setaffinity(rnp, -1);
2924 break; 2926 break;
2925 case CPU_DOWN_PREPARE: 2927 case CPU_DOWN_PREPARE:
2926 if (nocb_cpu_expendable(cpu)) 2928 rcu_boost_kthread_setaffinity(rnp, cpu);
2927 rcu_boost_kthread_setaffinity(rnp, cpu);
2928 else
2929 ret = NOTIFY_BAD;
2930 break; 2929 break;
2931 case CPU_DYING: 2930 case CPU_DYING:
2932 case CPU_DYING_FROZEN: 2931 case CPU_DYING_FROZEN:
@@ -2950,7 +2949,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2950 break; 2949 break;
2951 } 2950 }
2952 trace_rcu_utilization("End CPU hotplug"); 2951 trace_rcu_utilization("End CPU hotplug");
2953 return ret; 2952 return NOTIFY_OK;
2954} 2953}
2955 2954
2956/* 2955/*
@@ -3170,7 +3169,6 @@ void __init rcu_init(void)
3170 rcu_init_one(&rcu_sched_state, &rcu_sched_data); 3169 rcu_init_one(&rcu_sched_state, &rcu_sched_data);
3171 rcu_init_one(&rcu_bh_state, &rcu_bh_data); 3170 rcu_init_one(&rcu_bh_state, &rcu_bh_data);
3172 __rcu_init_preempt(); 3171 __rcu_init_preempt();
3173 rcu_init_nocb();
3174 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks); 3172 open_softirq(RCU_SOFTIRQ, rcu_process_callbacks);
3175 3173
3176 /* 3174 /*
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index c896b5045d9d..7af39f4aaac4 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -326,6 +326,7 @@ struct rcu_data {
326 int nocb_p_count_lazy; /* (approximate). */ 326 int nocb_p_count_lazy; /* (approximate). */
327 wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */ 327 wait_queue_head_t nocb_wq; /* For nocb kthreads to sleep on. */
328 struct task_struct *nocb_kthread; 328 struct task_struct *nocb_kthread;
329 bool nocb_needs_gp;
329#endif /* #ifdef CONFIG_RCU_NOCB_CPU */ 330#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
330 331
331 int cpu; 332 int cpu;
@@ -375,12 +376,6 @@ struct rcu_state {
375 struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */ 376 struct rcu_data __percpu *rda; /* pointer of percu rcu_data. */
376 void (*call)(struct rcu_head *head, /* call_rcu() flavor. */ 377 void (*call)(struct rcu_head *head, /* call_rcu() flavor. */
377 void (*func)(struct rcu_head *head)); 378 void (*func)(struct rcu_head *head));
378#ifdef CONFIG_RCU_NOCB_CPU
379 void (*call_remote)(struct rcu_head *head,
380 void (*func)(struct rcu_head *head));
381 /* call_rcu() flavor, but for */
382 /* placing on remote CPU. */
383#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
384 379
385 /* The following fields are guarded by the root rcu_node's lock. */ 380 /* The following fields are guarded by the root rcu_node's lock. */
386 381
@@ -529,16 +524,15 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu);
529static void print_cpu_stall_info_end(void); 524static void print_cpu_stall_info_end(void);
530static void zero_cpu_stall_ticks(struct rcu_data *rdp); 525static void zero_cpu_stall_ticks(struct rcu_data *rdp);
531static void increment_cpu_stall_ticks(void); 526static void increment_cpu_stall_ticks(void);
527static int rcu_nocb_needs_gp(struct rcu_data *rdp);
532static bool is_nocb_cpu(int cpu); 528static bool is_nocb_cpu(int cpu);
533static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, 529static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
534 bool lazy); 530 bool lazy);
535static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp, 531static bool rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
536 struct rcu_data *rdp); 532 struct rcu_data *rdp);
537static bool nocb_cpu_expendable(int cpu);
538static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp); 533static void rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp);
539static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp); 534static void rcu_spawn_nocb_kthreads(struct rcu_state *rsp);
540static void init_nocb_callback_list(struct rcu_data *rdp); 535static bool init_nocb_callback_list(struct rcu_data *rdp);
541static void __init rcu_init_nocb(void);
542 536
543#endif /* #ifndef RCU_TREE_NONCORE */ 537#endif /* #ifndef RCU_TREE_NONCORE */
544 538
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index c1cc7e17ff9d..44f958a88b21 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -86,10 +86,6 @@ static void __init rcu_bootup_announce_oddness(void)
86 printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); 86 printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
87#ifdef CONFIG_RCU_NOCB_CPU 87#ifdef CONFIG_RCU_NOCB_CPU
88 if (have_rcu_nocb_mask) { 88 if (have_rcu_nocb_mask) {
89 if (cpumask_test_cpu(0, rcu_nocb_mask)) {
90 cpumask_clear_cpu(0, rcu_nocb_mask);
91 pr_info("\tCPU 0: illegal no-CBs CPU (cleared).\n");
92 }
93 cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask); 89 cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);
94 pr_info("\tExperimental no-CBs CPUs: %s.\n", nocb_buf); 90 pr_info("\tExperimental no-CBs CPUs: %s.\n", nocb_buf);
95 if (rcu_nocb_poll) 91 if (rcu_nocb_poll)
@@ -2165,6 +2161,14 @@ static int __init parse_rcu_nocb_poll(char *arg)
2165} 2161}
2166early_param("rcu_nocb_poll", parse_rcu_nocb_poll); 2162early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
2167 2163
2164/*
2165 * Does this CPU needs a grace period due to offloaded callbacks?
2166 */
2167static int rcu_nocb_needs_gp(struct rcu_data *rdp)
2168{
2169 return rdp->nocb_needs_gp;
2170}
2171
2168/* Is the specified CPU a no-CPUs CPU? */ 2172/* Is the specified CPU a no-CPUs CPU? */
2169static bool is_nocb_cpu(int cpu) 2173static bool is_nocb_cpu(int cpu)
2170{ 2174{
@@ -2265,95 +2269,39 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
2265} 2269}
2266 2270
2267/* 2271/*
2268 * There must be at least one non-no-CBs CPU in operation at any given 2272 * If necessary, kick off a new grace period, and either way wait
2269 * time, because no-CBs CPUs are not capable of initiating grace periods 2273 * for a subsequent grace period to complete.
2270 * independently. This function therefore complains if the specified
2271 * CPU is the last non-no-CBs CPU, allowing the CPU-hotplug system to
2272 * avoid offlining the last such CPU. (Recursion is a wonderful thing,
2273 * but you have to have a base case!)
2274 */ 2274 */
2275static bool nocb_cpu_expendable(int cpu) 2275static void rcu_nocb_wait_gp(struct rcu_data *rdp)
2276{ 2276{
2277 cpumask_var_t non_nocb_cpus; 2277 unsigned long c;
2278 int ret; 2278 unsigned long flags;
2279 unsigned long j;
2280 struct rcu_node *rnp = rdp->mynode;
2281
2282 raw_spin_lock_irqsave(&rnp->lock, flags);
2283 c = rnp->completed + 2;
2284 rdp->nocb_needs_gp = true;
2285 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2279 2286
2280 /* 2287 /*
2281 * If there are no no-CB CPUs or if this CPU is not a no-CB CPU, 2288 * Wait for the grace period. Do so interruptibly to avoid messing
2282 * then offlining this CPU is harmless. Let it happen. 2289 * up the load average.
2283 */ 2290 */
2284 if (!have_rcu_nocb_mask || is_nocb_cpu(cpu)) 2291 for (;;) {
2285 return 1; 2292 j = jiffies;
2286 2293 schedule_timeout_interruptible(2);
2287 /* If no memory, play it safe and keep the CPU around. */ 2294 raw_spin_lock_irqsave(&rnp->lock, flags);
2288 if (!alloc_cpumask_var(&non_nocb_cpus, GFP_NOIO)) 2295 if (ULONG_CMP_GE(rnp->completed, c)) {
2289 return 0; 2296 rdp->nocb_needs_gp = false;
2290 cpumask_andnot(non_nocb_cpus, cpu_online_mask, rcu_nocb_mask); 2297 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2291 cpumask_clear_cpu(cpu, non_nocb_cpus); 2298 break;
2292 ret = !cpumask_empty(non_nocb_cpus); 2299 }
2293 free_cpumask_var(non_nocb_cpus); 2300 if (j == jiffies)
2294 return ret; 2301 flush_signals(current);
2295} 2302 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2296 2303 }
2297/* 2304 smp_mb(); /* Ensure that CB invocation happens after GP end. */
2298 * Helper structure for remote registry of RCU callbacks.
2299 * This is needed for when a no-CBs CPU needs to start a grace period.
2300 * If it just invokes call_rcu(), the resulting callback will be queued,
2301 * which can result in deadlock.
2302 */
2303struct rcu_head_remote {
2304 struct rcu_head *rhp;
2305 call_rcu_func_t *crf;
2306 void (*func)(struct rcu_head *rhp);
2307};
2308
2309/*
2310 * Register a callback as specified by the rcu_head_remote struct.
2311 * This function is intended to be invoked via smp_call_function_single().
2312 */
2313static void call_rcu_local(void *arg)
2314{
2315 struct rcu_head_remote *rhrp =
2316 container_of(arg, struct rcu_head_remote, rhp);
2317
2318 rhrp->crf(rhrp->rhp, rhrp->func);
2319}
2320
2321/*
2322 * Set up an rcu_head_remote structure and the invoke call_rcu_local()
2323 * on CPU 0 (which is guaranteed to be a non-no-CBs CPU) via
2324 * smp_call_function_single().
2325 */
2326static void invoke_crf_remote(struct rcu_head *rhp,
2327 void (*func)(struct rcu_head *rhp),
2328 call_rcu_func_t crf)
2329{
2330 struct rcu_head_remote rhr;
2331
2332 rhr.rhp = rhp;
2333 rhr.crf = crf;
2334 rhr.func = func;
2335 smp_call_function_single(0, call_rcu_local, &rhr, 1);
2336}
2337
2338/*
2339 * Helper functions to be passed to wait_rcu_gp(), each of which
2340 * invokes invoke_crf_remote() to register a callback appropriately.
2341 */
2342static void __maybe_unused
2343call_rcu_preempt_remote(struct rcu_head *rhp,
2344 void (*func)(struct rcu_head *rhp))
2345{
2346 invoke_crf_remote(rhp, func, call_rcu);
2347}
2348static void call_rcu_bh_remote(struct rcu_head *rhp,
2349 void (*func)(struct rcu_head *rhp))
2350{
2351 invoke_crf_remote(rhp, func, call_rcu_bh);
2352}
2353static void call_rcu_sched_remote(struct rcu_head *rhp,
2354 void (*func)(struct rcu_head *rhp))
2355{
2356 invoke_crf_remote(rhp, func, call_rcu_sched);
2357} 2305}
2358 2306
2359/* 2307/*
@@ -2390,7 +2338,7 @@ static int rcu_nocb_kthread(void *arg)
2390 cl = atomic_long_xchg(&rdp->nocb_q_count_lazy, 0); 2338 cl = atomic_long_xchg(&rdp->nocb_q_count_lazy, 0);
2391 ACCESS_ONCE(rdp->nocb_p_count) += c; 2339 ACCESS_ONCE(rdp->nocb_p_count) += c;
2392 ACCESS_ONCE(rdp->nocb_p_count_lazy) += cl; 2340 ACCESS_ONCE(rdp->nocb_p_count_lazy) += cl;
2393 wait_rcu_gp(rdp->rsp->call_remote); 2341 rcu_nocb_wait_gp(rdp);
2394 2342
2395 /* Each pass through the following loop invokes a callback. */ 2343 /* Each pass through the following loop invokes a callback. */
2396 trace_rcu_batch_start(rdp->rsp->name, cl, c, -1); 2344 trace_rcu_batch_start(rdp->rsp->name, cl, c, -1);
@@ -2443,26 +2391,22 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
2443} 2391}
2444 2392
2445/* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */ 2393/* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */
2446static void init_nocb_callback_list(struct rcu_data *rdp) 2394static bool init_nocb_callback_list(struct rcu_data *rdp)
2447{ 2395{
2448 if (rcu_nocb_mask == NULL || 2396 if (rcu_nocb_mask == NULL ||
2449 !cpumask_test_cpu(rdp->cpu, rcu_nocb_mask)) 2397 !cpumask_test_cpu(rdp->cpu, rcu_nocb_mask))
2450 return; 2398 return false;
2451 rdp->nxttail[RCU_NEXT_TAIL] = NULL; 2399 rdp->nxttail[RCU_NEXT_TAIL] = NULL;
2400 return true;
2452} 2401}
2453 2402
2454/* Initialize the ->call_remote fields in the rcu_state structures. */ 2403#else /* #ifdef CONFIG_RCU_NOCB_CPU */
2455static void __init rcu_init_nocb(void) 2404
2405static int rcu_nocb_needs_gp(struct rcu_data *rdp)
2456{ 2406{
2457#ifdef CONFIG_PREEMPT_RCU 2407 return 0;
2458 rcu_preempt_state.call_remote = call_rcu_preempt_remote;
2459#endif /* #ifdef CONFIG_PREEMPT_RCU */
2460 rcu_bh_state.call_remote = call_rcu_bh_remote;
2461 rcu_sched_state.call_remote = call_rcu_sched_remote;
2462} 2408}
2463 2409
2464#else /* #ifdef CONFIG_RCU_NOCB_CPU */
2465
2466static bool is_nocb_cpu(int cpu) 2410static bool is_nocb_cpu(int cpu)
2467{ 2411{
2468 return false; 2412 return false;
@@ -2480,11 +2424,6 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
2480 return 0; 2424 return 0;
2481} 2425}
2482 2426
2483static bool nocb_cpu_expendable(int cpu)
2484{
2485 return 1;
2486}
2487
2488static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) 2427static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
2489{ 2428{
2490} 2429}
@@ -2493,12 +2432,9 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
2493{ 2432{
2494} 2433}
2495 2434
2496static void init_nocb_callback_list(struct rcu_data *rdp) 2435static bool init_nocb_callback_list(struct rcu_data *rdp)
2497{
2498}
2499
2500static void __init rcu_init_nocb(void)
2501{ 2436{
2437 return false;
2502} 2438}
2503 2439
2504#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ 2440#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */