aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2016-11-08 17:25:21 -0500
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2017-04-18 14:19:22 -0400
commitb8c17e6664c461e4aed545a943304c3b32dd309c (patch)
tree0ff7edc44bb4e2eeea3a0d6a0497ba13b2acc1dc
parent4495c08e84729385774601b5146d51d9e5849f81 (diff)
rcu: Maintain special bits at bottom of ->dynticks counter
Currently, IPIs are used to force other CPUs to invalidate their TLBs in response to a kernel virtual-memory mapping change. This works, but degrades both battery lifetime (for idle CPUs) and real-time response (for nohz_full CPUs), and in addition results in unnecessary IPIs due to the fact that CPUs executing in usermode are unaffected by stale kernel mappings. It would be better to cause a CPU executing in usermode to wait until it is entering kernel mode to do the flush, first to avoid interrupting usemode tasks and second to handle multiple flush requests with a single flush in the case of a long-running user task. This commit therefore reserves a bit at the bottom of the ->dynticks counter, which is checked upon exit from extended quiescent states. If it is set, it is cleared and then a new rcu_eqs_special_exit() macro is invoked, which, if not supplied, is an empty single-pass do-while loop. If this bottom bit is set on -entry- to an extended quiescent state, then a WARN_ON_ONCE() triggers. This bottom bit may be set using a new rcu_eqs_special_set() function, which returns true if the bit was set, or false if the CPU turned out to not be in an extended quiescent state. Please note that this function refuses to set the bit for a non-nohz_full CPU when that CPU is executing in usermode because usermode execution is tracked by RCU as a dyntick-idle extended quiescent state only for nohz_full CPUs. Reported-by: Andy Lutomirski <luto@amacapital.net> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Reviewed-by: Josh Triplett <josh@joshtriplett.org>
-rw-r--r--include/linux/rcutiny.h5
-rw-r--r--kernel/rcu/tree.c77
-rw-r--r--kernel/rcu/tree.h1
3 files changed, 67 insertions, 16 deletions
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index b452953e21c8..6c9d941e3962 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -33,6 +33,11 @@ static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
33 return 0; 33 return 0;
34} 34}
35 35
36static inline bool rcu_eqs_special_set(int cpu)
37{
38 return false; /* Never flag non-existent other CPUs! */
39}
40
36static inline unsigned long get_state_synchronize_rcu(void) 41static inline unsigned long get_state_synchronize_rcu(void)
37{ 42{
38 return 0; 43 return 0;
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 50fee7689e71..0efad311ded4 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -274,9 +274,19 @@ void rcu_bh_qs(void)
274 274
275static DEFINE_PER_CPU(int, rcu_sched_qs_mask); 275static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
276 276
277/*
278 * Steal a bit from the bottom of ->dynticks for idle entry/exit
279 * control. Initially this is for TLB flushing.
280 */
281#define RCU_DYNTICK_CTRL_MASK 0x1
282#define RCU_DYNTICK_CTRL_CTR (RCU_DYNTICK_CTRL_MASK + 1)
283#ifndef rcu_eqs_special_exit
284#define rcu_eqs_special_exit() do { } while (0)
285#endif
286
277static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { 287static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
278 .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE, 288 .dynticks_nesting = DYNTICK_TASK_EXIT_IDLE,
279 .dynticks = ATOMIC_INIT(1), 289 .dynticks = ATOMIC_INIT(RCU_DYNTICK_CTRL_CTR),
280#ifdef CONFIG_NO_HZ_FULL_SYSIDLE 290#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
281 .dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE, 291 .dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE,
282 .dynticks_idle = ATOMIC_INIT(1), 292 .dynticks_idle = ATOMIC_INIT(1),
@@ -290,15 +300,20 @@ static DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = {
290static void rcu_dynticks_eqs_enter(void) 300static void rcu_dynticks_eqs_enter(void)
291{ 301{
292 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 302 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
293 int special; 303 int seq;
294 304
295 /* 305 /*
296 * CPUs seeing atomic_inc_return() must see prior RCU read-side 306 * CPUs seeing atomic_add_return() must see prior RCU read-side
297 * critical sections, and we also must force ordering with the 307 * critical sections, and we also must force ordering with the
298 * next idle sojourn. 308 * next idle sojourn.
299 */ 309 */
300 special = atomic_inc_return(&rdtp->dynticks); 310 seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);
301 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && special & 0x1); 311 /* Better be in an extended quiescent state! */
312 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
313 (seq & RCU_DYNTICK_CTRL_CTR));
314 /* Better not have special action (TLB flush) pending! */
315 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
316 (seq & RCU_DYNTICK_CTRL_MASK));
302} 317}
303 318
304/* 319/*
@@ -308,15 +323,22 @@ static void rcu_dynticks_eqs_enter(void)
308static void rcu_dynticks_eqs_exit(void) 323static void rcu_dynticks_eqs_exit(void)
309{ 324{
310 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 325 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
311 int special; 326 int seq;
312 327
313 /* 328 /*
314 * CPUs seeing atomic_inc_return() must see prior idle sojourns, 329 * CPUs seeing atomic_add_return() must see prior idle sojourns,
315 * and we also must force ordering with the next RCU read-side 330 * and we also must force ordering with the next RCU read-side
316 * critical section. 331 * critical section.
317 */ 332 */
318 special = atomic_inc_return(&rdtp->dynticks); 333 seq = atomic_add_return(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);
319 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) && !(special & 0x1)); 334 WARN_ON_ONCE(IS_ENABLED(CONFIG_RCU_EQS_DEBUG) &&
335 !(seq & RCU_DYNTICK_CTRL_CTR));
336 if (seq & RCU_DYNTICK_CTRL_MASK) {
337 atomic_andnot(RCU_DYNTICK_CTRL_MASK, &rdtp->dynticks);
338 smp_mb__after_atomic(); /* _exit after clearing mask. */
339 /* Prefer duplicate flushes to losing a flush. */
340 rcu_eqs_special_exit();
341 }
320} 342}
321 343
322/* 344/*
@@ -333,9 +355,9 @@ static void rcu_dynticks_eqs_online(void)
333{ 355{
334 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 356 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
335 357
336 if (atomic_read(&rdtp->dynticks) & 0x1) 358 if (atomic_read(&rdtp->dynticks) & RCU_DYNTICK_CTRL_CTR)
337 return; 359 return;
338 atomic_add(0x1, &rdtp->dynticks); 360 atomic_add(RCU_DYNTICK_CTRL_CTR, &rdtp->dynticks);
339} 361}
340 362
341/* 363/*
@@ -347,7 +369,7 @@ bool rcu_dynticks_curr_cpu_in_eqs(void)
347{ 369{
348 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 370 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
349 371
350 return !(atomic_read(&rdtp->dynticks) & 0x1); 372 return !(atomic_read(&rdtp->dynticks) & RCU_DYNTICK_CTRL_CTR);
351} 373}
352 374
353/* 375/*
@@ -358,7 +380,7 @@ int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
358{ 380{
359 int snap = atomic_add_return(0, &rdtp->dynticks); 381 int snap = atomic_add_return(0, &rdtp->dynticks);
360 382
361 return snap; 383 return snap & ~RCU_DYNTICK_CTRL_MASK;
362} 384}
363 385
364/* 386/*
@@ -367,7 +389,7 @@ int rcu_dynticks_snap(struct rcu_dynticks *rdtp)
367 */ 389 */
368static bool rcu_dynticks_in_eqs(int snap) 390static bool rcu_dynticks_in_eqs(int snap)
369{ 391{
370 return !(snap & 0x1); 392 return !(snap & RCU_DYNTICK_CTRL_CTR);
371} 393}
372 394
373/* 395/*
@@ -387,10 +409,33 @@ static bool rcu_dynticks_in_eqs_since(struct rcu_dynticks *rdtp, int snap)
387static void rcu_dynticks_momentary_idle(void) 409static void rcu_dynticks_momentary_idle(void)
388{ 410{
389 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks); 411 struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
390 int special = atomic_add_return(2, &rdtp->dynticks); 412 int special = atomic_add_return(2 * RCU_DYNTICK_CTRL_CTR,
413 &rdtp->dynticks);
391 414
392 /* It is illegal to call this from idle state. */ 415 /* It is illegal to call this from idle state. */
393 WARN_ON_ONCE(!(special & 0x1)); 416 WARN_ON_ONCE(!(special & RCU_DYNTICK_CTRL_CTR));
417}
418
419/*
420 * Set the special (bottom) bit of the specified CPU so that it
421 * will take special action (such as flushing its TLB) on the
422 * next exit from an extended quiescent state. Returns true if
423 * the bit was successfully set, or false if the CPU was not in
424 * an extended quiescent state.
425 */
426bool rcu_eqs_special_set(int cpu)
427{
428 int old;
429 int new;
430 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
431
432 do {
433 old = atomic_read(&rdtp->dynticks);
434 if (old & RCU_DYNTICK_CTRL_CTR)
435 return false;
436 new = old | RCU_DYNTICK_CTRL_MASK;
437 } while (atomic_cmpxchg(&rdtp->dynticks, old, new) != old);
438 return true;
394} 439}
395 440
396DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr); 441DEFINE_PER_CPU_SHARED_ALIGNED(unsigned long, rcu_qs_ctr);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index ec62a05bfdb3..7468b4de7e0c 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -596,6 +596,7 @@ extern struct rcu_state rcu_preempt_state;
596#endif /* #ifdef CONFIG_PREEMPT_RCU */ 596#endif /* #ifdef CONFIG_PREEMPT_RCU */
597 597
598int rcu_dynticks_snap(struct rcu_dynticks *rdtp); 598int rcu_dynticks_snap(struct rcu_dynticks *rdtp);
599bool rcu_eqs_special_set(int cpu);
599 600
600#ifdef CONFIG_RCU_BOOST 601#ifdef CONFIG_RCU_BOOST
601DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status); 602DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);