aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcutree_plugin.h
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcutree_plugin.h')
-rw-r--r--kernel/rcutree_plugin.h466
1 files changed, 443 insertions, 23 deletions
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 63098a59216e..130c97b027f2 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -28,7 +28,7 @@
28#include <linux/gfp.h> 28#include <linux/gfp.h>
29#include <linux/oom.h> 29#include <linux/oom.h>
30#include <linux/smpboot.h> 30#include <linux/smpboot.h>
31#include <linux/tick.h> 31#include "time/tick-internal.h"
32 32
33#define RCU_KTHREAD_PRIO 1 33#define RCU_KTHREAD_PRIO 1
34 34
@@ -110,9 +110,7 @@ static void __init rcu_bootup_announce_oddness(void)
110 110
111#ifdef CONFIG_TREE_PREEMPT_RCU 111#ifdef CONFIG_TREE_PREEMPT_RCU
112 112
113struct rcu_state rcu_preempt_state = 113RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
114 RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
115DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
116static struct rcu_state *rcu_state = &rcu_preempt_state; 114static struct rcu_state *rcu_state = &rcu_preempt_state;
117 115
118static int rcu_preempted_readers_exp(struct rcu_node *rnp); 116static int rcu_preempted_readers_exp(struct rcu_node *rnp);
@@ -169,7 +167,7 @@ static void rcu_preempt_qs(int cpu)
169 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); 167 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
170 168
171 if (rdp->passed_quiesce == 0) 169 if (rdp->passed_quiesce == 0)
172 trace_rcu_grace_period("rcu_preempt", rdp->gpnum, "cpuqs"); 170 trace_rcu_grace_period(TPS("rcu_preempt"), rdp->gpnum, TPS("cpuqs"));
173 rdp->passed_quiesce = 1; 171 rdp->passed_quiesce = 1;
174 current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS; 172 current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
175} 173}
@@ -388,7 +386,7 @@ void rcu_read_unlock_special(struct task_struct *t)
388 np = rcu_next_node_entry(t, rnp); 386 np = rcu_next_node_entry(t, rnp);
389 list_del_init(&t->rcu_node_entry); 387 list_del_init(&t->rcu_node_entry);
390 t->rcu_blocked_node = NULL; 388 t->rcu_blocked_node = NULL;
391 trace_rcu_unlock_preempted_task("rcu_preempt", 389 trace_rcu_unlock_preempted_task(TPS("rcu_preempt"),
392 rnp->gpnum, t->pid); 390 rnp->gpnum, t->pid);
393 if (&t->rcu_node_entry == rnp->gp_tasks) 391 if (&t->rcu_node_entry == rnp->gp_tasks)
394 rnp->gp_tasks = np; 392 rnp->gp_tasks = np;
@@ -412,7 +410,7 @@ void rcu_read_unlock_special(struct task_struct *t)
412 */ 410 */
413 empty_exp_now = !rcu_preempted_readers_exp(rnp); 411 empty_exp_now = !rcu_preempted_readers_exp(rnp);
414 if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) { 412 if (!empty && !rcu_preempt_blocked_readers_cgp(rnp)) {
415 trace_rcu_quiescent_state_report("preempt_rcu", 413 trace_rcu_quiescent_state_report(TPS("preempt_rcu"),
416 rnp->gpnum, 414 rnp->gpnum,
417 0, rnp->qsmask, 415 0, rnp->qsmask,
418 rnp->level, 416 rnp->level,
@@ -1250,12 +1248,12 @@ static int rcu_boost_kthread(void *arg)
1250 int spincnt = 0; 1248 int spincnt = 0;
1251 int more2boost; 1249 int more2boost;
1252 1250
1253 trace_rcu_utilization("Start boost kthread@init"); 1251 trace_rcu_utilization(TPS("Start boost kthread@init"));
1254 for (;;) { 1252 for (;;) {
1255 rnp->boost_kthread_status = RCU_KTHREAD_WAITING; 1253 rnp->boost_kthread_status = RCU_KTHREAD_WAITING;
1256 trace_rcu_utilization("End boost kthread@rcu_wait"); 1254 trace_rcu_utilization(TPS("End boost kthread@rcu_wait"));
1257 rcu_wait(rnp->boost_tasks || rnp->exp_tasks); 1255 rcu_wait(rnp->boost_tasks || rnp->exp_tasks);
1258 trace_rcu_utilization("Start boost kthread@rcu_wait"); 1256 trace_rcu_utilization(TPS("Start boost kthread@rcu_wait"));
1259 rnp->boost_kthread_status = RCU_KTHREAD_RUNNING; 1257 rnp->boost_kthread_status = RCU_KTHREAD_RUNNING;
1260 more2boost = rcu_boost(rnp); 1258 more2boost = rcu_boost(rnp);
1261 if (more2boost) 1259 if (more2boost)
@@ -1264,14 +1262,14 @@ static int rcu_boost_kthread(void *arg)
1264 spincnt = 0; 1262 spincnt = 0;
1265 if (spincnt > 10) { 1263 if (spincnt > 10) {
1266 rnp->boost_kthread_status = RCU_KTHREAD_YIELDING; 1264 rnp->boost_kthread_status = RCU_KTHREAD_YIELDING;
1267 trace_rcu_utilization("End boost kthread@rcu_yield"); 1265 trace_rcu_utilization(TPS("End boost kthread@rcu_yield"));
1268 schedule_timeout_interruptible(2); 1266 schedule_timeout_interruptible(2);
1269 trace_rcu_utilization("Start boost kthread@rcu_yield"); 1267 trace_rcu_utilization(TPS("Start boost kthread@rcu_yield"));
1270 spincnt = 0; 1268 spincnt = 0;
1271 } 1269 }
1272 } 1270 }
1273 /* NOTREACHED */ 1271 /* NOTREACHED */
1274 trace_rcu_utilization("End boost kthread@notreached"); 1272 trace_rcu_utilization(TPS("End boost kthread@notreached"));
1275 return 0; 1273 return 0;
1276} 1274}
1277 1275
@@ -1352,7 +1350,7 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp)
1352 * already exist. We only create this kthread for preemptible RCU. 1350 * already exist. We only create this kthread for preemptible RCU.
1353 * Returns zero if all is well, a negated errno otherwise. 1351 * Returns zero if all is well, a negated errno otherwise.
1354 */ 1352 */
1355static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, 1353static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
1356 struct rcu_node *rnp) 1354 struct rcu_node *rnp)
1357{ 1355{
1358 int rnp_index = rnp - &rsp->node[0]; 1356 int rnp_index = rnp - &rsp->node[0];
@@ -1419,7 +1417,7 @@ static void rcu_cpu_kthread(unsigned int cpu)
1419 int spincnt; 1417 int spincnt;
1420 1418
1421 for (spincnt = 0; spincnt < 10; spincnt++) { 1419 for (spincnt = 0; spincnt < 10; spincnt++) {
1422 trace_rcu_utilization("Start CPU kthread@rcu_wait"); 1420 trace_rcu_utilization(TPS("Start CPU kthread@rcu_wait"));
1423 local_bh_disable(); 1421 local_bh_disable();
1424 *statusp = RCU_KTHREAD_RUNNING; 1422 *statusp = RCU_KTHREAD_RUNNING;
1425 this_cpu_inc(rcu_cpu_kthread_loops); 1423 this_cpu_inc(rcu_cpu_kthread_loops);
@@ -1431,15 +1429,15 @@ static void rcu_cpu_kthread(unsigned int cpu)
1431 rcu_kthread_do_work(); 1429 rcu_kthread_do_work();
1432 local_bh_enable(); 1430 local_bh_enable();
1433 if (*workp == 0) { 1431 if (*workp == 0) {
1434 trace_rcu_utilization("End CPU kthread@rcu_wait"); 1432 trace_rcu_utilization(TPS("End CPU kthread@rcu_wait"));
1435 *statusp = RCU_KTHREAD_WAITING; 1433 *statusp = RCU_KTHREAD_WAITING;
1436 return; 1434 return;
1437 } 1435 }
1438 } 1436 }
1439 *statusp = RCU_KTHREAD_YIELDING; 1437 *statusp = RCU_KTHREAD_YIELDING;
1440 trace_rcu_utilization("Start CPU kthread@rcu_yield"); 1438 trace_rcu_utilization(TPS("Start CPU kthread@rcu_yield"));
1441 schedule_timeout_interruptible(2); 1439 schedule_timeout_interruptible(2);
1442 trace_rcu_utilization("End CPU kthread@rcu_yield"); 1440 trace_rcu_utilization(TPS("End CPU kthread@rcu_yield"));
1443 *statusp = RCU_KTHREAD_WAITING; 1441 *statusp = RCU_KTHREAD_WAITING;
1444} 1442}
1445 1443
@@ -1507,7 +1505,7 @@ static int __init rcu_spawn_kthreads(void)
1507} 1505}
1508early_initcall(rcu_spawn_kthreads); 1506early_initcall(rcu_spawn_kthreads);
1509 1507
1510static void __cpuinit rcu_prepare_kthreads(int cpu) 1508static void rcu_prepare_kthreads(int cpu)
1511{ 1509{
1512 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); 1510 struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu);
1513 struct rcu_node *rnp = rdp->mynode; 1511 struct rcu_node *rnp = rdp->mynode;
@@ -1549,7 +1547,7 @@ static int __init rcu_scheduler_really_started(void)
1549} 1547}
1550early_initcall(rcu_scheduler_really_started); 1548early_initcall(rcu_scheduler_really_started);
1551 1549
1552static void __cpuinit rcu_prepare_kthreads(int cpu) 1550static void rcu_prepare_kthreads(int cpu)
1553{ 1551{
1554} 1552}
1555 1553
@@ -2202,7 +2200,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
2202 * Wait for the grace period. Do so interruptibly to avoid messing 2200 * Wait for the grace period. Do so interruptibly to avoid messing
2203 * up the load average. 2201 * up the load average.
2204 */ 2202 */
2205 trace_rcu_future_gp(rnp, rdp, c, "StartWait"); 2203 trace_rcu_future_gp(rnp, rdp, c, TPS("StartWait"));
2206 for (;;) { 2204 for (;;) {
2207 wait_event_interruptible( 2205 wait_event_interruptible(
2208 rnp->nocb_gp_wq[c & 0x1], 2206 rnp->nocb_gp_wq[c & 0x1],
@@ -2210,9 +2208,9 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
2210 if (likely(d)) 2208 if (likely(d))
2211 break; 2209 break;
2212 flush_signals(current); 2210 flush_signals(current);
2213 trace_rcu_future_gp(rnp, rdp, c, "ResumeWait"); 2211 trace_rcu_future_gp(rnp, rdp, c, TPS("ResumeWait"));
2214 } 2212 }
2215 trace_rcu_future_gp(rnp, rdp, c, "EndWait"); 2213 trace_rcu_future_gp(rnp, rdp, c, TPS("EndWait"));
2216 smp_mb(); /* Ensure that CB invocation happens after GP end. */ 2214 smp_mb(); /* Ensure that CB invocation happens after GP end. */
2217} 2215}
2218 2216
@@ -2375,3 +2373,425 @@ static void rcu_kick_nohz_cpu(int cpu)
2375 smp_send_reschedule(cpu); 2373 smp_send_reschedule(cpu);
2376#endif /* #ifdef CONFIG_NO_HZ_FULL */ 2374#endif /* #ifdef CONFIG_NO_HZ_FULL */
2377} 2375}
2376
2377
2378#ifdef CONFIG_NO_HZ_FULL_SYSIDLE
2379
2380/*
2381 * Define RCU flavor that holds sysidle state. This needs to be the
2382 * most active flavor of RCU.
2383 */
2384#ifdef CONFIG_PREEMPT_RCU
2385static struct rcu_state *rcu_sysidle_state = &rcu_preempt_state;
2386#else /* #ifdef CONFIG_PREEMPT_RCU */
2387static struct rcu_state *rcu_sysidle_state = &rcu_sched_state;
2388#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
2389
2390static int full_sysidle_state; /* Current system-idle state. */
2391#define RCU_SYSIDLE_NOT 0 /* Some CPU is not idle. */
2392#define RCU_SYSIDLE_SHORT 1 /* All CPUs idle for brief period. */
2393#define RCU_SYSIDLE_LONG 2 /* All CPUs idle for long enough. */
2394#define RCU_SYSIDLE_FULL 3 /* All CPUs idle, ready for sysidle. */
2395#define RCU_SYSIDLE_FULL_NOTED 4 /* Actually entered sysidle state. */
2396
2397/*
2398 * Invoked to note exit from irq or task transition to idle. Note that
2399 * usermode execution does -not- count as idle here! After all, we want
2400 * to detect full-system idle states, not RCU quiescent states and grace
2401 * periods. The caller must have disabled interrupts.
2402 */
2403static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq)
2404{
2405 unsigned long j;
2406
2407 /* Adjust nesting, check for fully idle. */
2408 if (irq) {
2409 rdtp->dynticks_idle_nesting--;
2410 WARN_ON_ONCE(rdtp->dynticks_idle_nesting < 0);
2411 if (rdtp->dynticks_idle_nesting != 0)
2412 return; /* Still not fully idle. */
2413 } else {
2414 if ((rdtp->dynticks_idle_nesting & DYNTICK_TASK_NEST_MASK) ==
2415 DYNTICK_TASK_NEST_VALUE) {
2416 rdtp->dynticks_idle_nesting = 0;
2417 } else {
2418 rdtp->dynticks_idle_nesting -= DYNTICK_TASK_NEST_VALUE;
2419 WARN_ON_ONCE(rdtp->dynticks_idle_nesting < 0);
2420 return; /* Still not fully idle. */
2421 }
2422 }
2423
2424 /* Record start of fully idle period. */
2425 j = jiffies;
2426 ACCESS_ONCE(rdtp->dynticks_idle_jiffies) = j;
2427 smp_mb__before_atomic_inc();
2428 atomic_inc(&rdtp->dynticks_idle);
2429 smp_mb__after_atomic_inc();
2430 WARN_ON_ONCE(atomic_read(&rdtp->dynticks_idle) & 0x1);
2431}
2432
2433/*
2434 * Unconditionally force exit from full system-idle state. This is
2435 * invoked when a normal CPU exits idle, but must be called separately
2436 * for the timekeeping CPU (tick_do_timer_cpu). The reason for this
2437 * is that the timekeeping CPU is permitted to take scheduling-clock
2438 * interrupts while the system is in system-idle state, and of course
2439 * rcu_sysidle_exit() has no way of distinguishing a scheduling-clock
2440 * interrupt from any other type of interrupt.
2441 */
2442void rcu_sysidle_force_exit(void)
2443{
2444 int oldstate = ACCESS_ONCE(full_sysidle_state);
2445 int newoldstate;
2446
2447 /*
2448 * Each pass through the following loop attempts to exit full
2449 * system-idle state. If contention proves to be a problem,
2450 * a trylock-based contention tree could be used here.
2451 */
2452 while (oldstate > RCU_SYSIDLE_SHORT) {
2453 newoldstate = cmpxchg(&full_sysidle_state,
2454 oldstate, RCU_SYSIDLE_NOT);
2455 if (oldstate == newoldstate &&
2456 oldstate == RCU_SYSIDLE_FULL_NOTED) {
2457 rcu_kick_nohz_cpu(tick_do_timer_cpu);
2458 return; /* We cleared it, done! */
2459 }
2460 oldstate = newoldstate;
2461 }
2462 smp_mb(); /* Order initial oldstate fetch vs. later non-idle work. */
2463}
2464
2465/*
2466 * Invoked to note entry to irq or task transition from idle. Note that
2467 * usermode execution does -not- count as idle here! The caller must
2468 * have disabled interrupts.
2469 */
2470static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq)
2471{
2472 /* Adjust nesting, check for already non-idle. */
2473 if (irq) {
2474 rdtp->dynticks_idle_nesting++;
2475 WARN_ON_ONCE(rdtp->dynticks_idle_nesting <= 0);
2476 if (rdtp->dynticks_idle_nesting != 1)
2477 return; /* Already non-idle. */
2478 } else {
2479 /*
2480 * Allow for irq misnesting. Yes, it really is possible
2481 * to enter an irq handler then never leave it, and maybe
2482 * also vice versa. Handle both possibilities.
2483 */
2484 if (rdtp->dynticks_idle_nesting & DYNTICK_TASK_NEST_MASK) {
2485 rdtp->dynticks_idle_nesting += DYNTICK_TASK_NEST_VALUE;
2486 WARN_ON_ONCE(rdtp->dynticks_idle_nesting <= 0);
2487 return; /* Already non-idle. */
2488 } else {
2489 rdtp->dynticks_idle_nesting = DYNTICK_TASK_EXIT_IDLE;
2490 }
2491 }
2492
2493 /* Record end of idle period. */
2494 smp_mb__before_atomic_inc();
2495 atomic_inc(&rdtp->dynticks_idle);
2496 smp_mb__after_atomic_inc();
2497 WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks_idle) & 0x1));
2498
2499 /*
2500 * If we are the timekeeping CPU, we are permitted to be non-idle
2501 * during a system-idle state. This must be the case, because
2502 * the timekeeping CPU has to take scheduling-clock interrupts
2503 * during the time that the system is transitioning to full
2504 * system-idle state. This means that the timekeeping CPU must
2505 * invoke rcu_sysidle_force_exit() directly if it does anything
2506 * more than take a scheduling-clock interrupt.
2507 */
2508 if (smp_processor_id() == tick_do_timer_cpu)
2509 return;
2510
2511 /* Update system-idle state: We are clearly no longer fully idle! */
2512 rcu_sysidle_force_exit();
2513}
2514
2515/*
2516 * Check to see if the current CPU is idle. Note that usermode execution
2517 * does not count as idle. The caller must have disabled interrupts.
2518 */
2519static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
2520 unsigned long *maxj)
2521{
2522 int cur;
2523 unsigned long j;
2524 struct rcu_dynticks *rdtp = rdp->dynticks;
2525
2526 /*
2527 * If some other CPU has already reported non-idle, if this is
2528 * not the flavor of RCU that tracks sysidle state, or if this
2529 * is an offline or the timekeeping CPU, nothing to do.
2530 */
2531 if (!*isidle || rdp->rsp != rcu_sysidle_state ||
2532 cpu_is_offline(rdp->cpu) || rdp->cpu == tick_do_timer_cpu)
2533 return;
2534 if (rcu_gp_in_progress(rdp->rsp))
2535 WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu);
2536
2537 /* Pick up current idle and NMI-nesting counter and check. */
2538 cur = atomic_read(&rdtp->dynticks_idle);
2539 if (cur & 0x1) {
2540 *isidle = false; /* We are not idle! */
2541 return;
2542 }
2543 smp_mb(); /* Read counters before timestamps. */
2544
2545 /* Pick up timestamps. */
2546 j = ACCESS_ONCE(rdtp->dynticks_idle_jiffies);
2547 /* If this CPU entered idle more recently, update maxj timestamp. */
2548 if (ULONG_CMP_LT(*maxj, j))
2549 *maxj = j;
2550}
2551
2552/*
2553 * Is this the flavor of RCU that is handling full-system idle?
2554 */
2555static bool is_sysidle_rcu_state(struct rcu_state *rsp)
2556{
2557 return rsp == rcu_sysidle_state;
2558}
2559
2560/*
2561 * Bind the grace-period kthread for the sysidle flavor of RCU to the
2562 * timekeeping CPU.
2563 */
2564static void rcu_bind_gp_kthread(void)
2565{
2566 int cpu = ACCESS_ONCE(tick_do_timer_cpu);
2567
2568 if (cpu < 0 || cpu >= nr_cpu_ids)
2569 return;
2570 if (raw_smp_processor_id() != cpu)
2571 set_cpus_allowed_ptr(current, cpumask_of(cpu));
2572}
2573
2574/*
2575 * Return a delay in jiffies based on the number of CPUs, rcu_node
2576 * leaf fanout, and jiffies tick rate. The idea is to allow larger
2577 * systems more time to transition to full-idle state in order to
2578 * avoid the cache thrashing that otherwise occur on the state variable.
2579 * Really small systems (less than a couple of tens of CPUs) should
2580 * instead use a single global atomically incremented counter, and later
2581 * versions of this will automatically reconfigure themselves accordingly.
2582 */
2583static unsigned long rcu_sysidle_delay(void)
2584{
2585 if (nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL)
2586 return 0;
2587 return DIV_ROUND_UP(nr_cpu_ids * HZ, rcu_fanout_leaf * 1000);
2588}
2589
2590/*
2591 * Advance the full-system-idle state. This is invoked when all of
2592 * the non-timekeeping CPUs are idle.
2593 */
2594static void rcu_sysidle(unsigned long j)
2595{
2596 /* Check the current state. */
2597 switch (ACCESS_ONCE(full_sysidle_state)) {
2598 case RCU_SYSIDLE_NOT:
2599
2600 /* First time all are idle, so note a short idle period. */
2601 ACCESS_ONCE(full_sysidle_state) = RCU_SYSIDLE_SHORT;
2602 break;
2603
2604 case RCU_SYSIDLE_SHORT:
2605
2606 /*
2607 * Idle for a bit, time to advance to next state?
2608 * cmpxchg failure means race with non-idle, let them win.
2609 */
2610 if (ULONG_CMP_GE(jiffies, j + rcu_sysidle_delay()))
2611 (void)cmpxchg(&full_sysidle_state,
2612 RCU_SYSIDLE_SHORT, RCU_SYSIDLE_LONG);
2613 break;
2614
2615 case RCU_SYSIDLE_LONG:
2616
2617 /*
2618 * Do an additional check pass before advancing to full.
2619 * cmpxchg failure means race with non-idle, let them win.
2620 */
2621 if (ULONG_CMP_GE(jiffies, j + rcu_sysidle_delay()))
2622 (void)cmpxchg(&full_sysidle_state,
2623 RCU_SYSIDLE_LONG, RCU_SYSIDLE_FULL);
2624 break;
2625
2626 default:
2627 break;
2628 }
2629}
2630
2631/*
2632 * Found a non-idle non-timekeeping CPU, so kick the system-idle state
2633 * back to the beginning.
2634 */
2635static void rcu_sysidle_cancel(void)
2636{
2637 smp_mb();
2638 ACCESS_ONCE(full_sysidle_state) = RCU_SYSIDLE_NOT;
2639}
2640
2641/*
2642 * Update the sysidle state based on the results of a force-quiescent-state
2643 * scan of the CPUs' dyntick-idle state.
2644 */
2645static void rcu_sysidle_report(struct rcu_state *rsp, int isidle,
2646 unsigned long maxj, bool gpkt)
2647{
2648 if (rsp != rcu_sysidle_state)
2649 return; /* Wrong flavor, ignore. */
2650 if (gpkt && nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL)
2651 return; /* Running state machine from timekeeping CPU. */
2652 if (isidle)
2653 rcu_sysidle(maxj); /* More idle! */
2654 else
2655 rcu_sysidle_cancel(); /* Idle is over. */
2656}
2657
2658/*
2659 * Wrapper for rcu_sysidle_report() when called from the grace-period
2660 * kthread's context.
2661 */
2662static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
2663 unsigned long maxj)
2664{
2665 rcu_sysidle_report(rsp, isidle, maxj, true);
2666}
2667
2668/* Callback and function for forcing an RCU grace period. */
2669struct rcu_sysidle_head {
2670 struct rcu_head rh;
2671 int inuse;
2672};
2673
2674static void rcu_sysidle_cb(struct rcu_head *rhp)
2675{
2676 struct rcu_sysidle_head *rshp;
2677
2678 /*
2679 * The following memory barrier is needed to replace the
2680 * memory barriers that would normally be in the memory
2681 * allocator.
2682 */
2683 smp_mb(); /* grace period precedes setting inuse. */
2684
2685 rshp = container_of(rhp, struct rcu_sysidle_head, rh);
2686 ACCESS_ONCE(rshp->inuse) = 0;
2687}
2688
2689/*
2690 * Check to see if the system is fully idle, other than the timekeeping CPU.
2691 * The caller must have disabled interrupts.
2692 */
2693bool rcu_sys_is_idle(void)
2694{
2695 static struct rcu_sysidle_head rsh;
2696 int rss = ACCESS_ONCE(full_sysidle_state);
2697
2698 if (WARN_ON_ONCE(smp_processor_id() != tick_do_timer_cpu))
2699 return false;
2700
2701 /* Handle small-system case by doing a full scan of CPUs. */
2702 if (nr_cpu_ids <= CONFIG_NO_HZ_FULL_SYSIDLE_SMALL) {
2703 int oldrss = rss - 1;
2704
2705 /*
2706 * One pass to advance to each state up to _FULL.
2707 * Give up if any pass fails to advance the state.
2708 */
2709 while (rss < RCU_SYSIDLE_FULL && oldrss < rss) {
2710 int cpu;
2711 bool isidle = true;
2712 unsigned long maxj = jiffies - ULONG_MAX / 4;
2713 struct rcu_data *rdp;
2714
2715 /* Scan all the CPUs looking for nonidle CPUs. */
2716 for_each_possible_cpu(cpu) {
2717 rdp = per_cpu_ptr(rcu_sysidle_state->rda, cpu);
2718 rcu_sysidle_check_cpu(rdp, &isidle, &maxj);
2719 if (!isidle)
2720 break;
2721 }
2722 rcu_sysidle_report(rcu_sysidle_state,
2723 isidle, maxj, false);
2724 oldrss = rss;
2725 rss = ACCESS_ONCE(full_sysidle_state);
2726 }
2727 }
2728
2729 /* If this is the first observation of an idle period, record it. */
2730 if (rss == RCU_SYSIDLE_FULL) {
2731 rss = cmpxchg(&full_sysidle_state,
2732 RCU_SYSIDLE_FULL, RCU_SYSIDLE_FULL_NOTED);
2733 return rss == RCU_SYSIDLE_FULL;
2734 }
2735
2736 smp_mb(); /* ensure rss load happens before later caller actions. */
2737
2738 /* If already fully idle, tell the caller (in case of races). */
2739 if (rss == RCU_SYSIDLE_FULL_NOTED)
2740 return true;
2741
2742 /*
2743 * If we aren't there yet, and a grace period is not in flight,
2744 * initiate a grace period. Either way, tell the caller that
2745 * we are not there yet. We use an xchg() rather than an assignment
2746 * to make up for the memory barriers that would otherwise be
2747 * provided by the memory allocator.
2748 */
2749 if (nr_cpu_ids > CONFIG_NO_HZ_FULL_SYSIDLE_SMALL &&
2750 !rcu_gp_in_progress(rcu_sysidle_state) &&
2751 !rsh.inuse && xchg(&rsh.inuse, 1) == 0)
2752 call_rcu(&rsh.rh, rcu_sysidle_cb);
2753 return false;
2754}
2755
2756/*
2757 * Initialize dynticks sysidle state for CPUs coming online.
2758 */
2759static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
2760{
2761 rdtp->dynticks_idle_nesting = DYNTICK_TASK_NEST_VALUE;
2762}
2763
2764#else /* #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
2765
2766static void rcu_sysidle_enter(struct rcu_dynticks *rdtp, int irq)
2767{
2768}
2769
2770static void rcu_sysidle_exit(struct rcu_dynticks *rdtp, int irq)
2771{
2772}
2773
2774static void rcu_sysidle_check_cpu(struct rcu_data *rdp, bool *isidle,
2775 unsigned long *maxj)
2776{
2777}
2778
2779static bool is_sysidle_rcu_state(struct rcu_state *rsp)
2780{
2781 return false;
2782}
2783
2784static void rcu_bind_gp_kthread(void)
2785{
2786}
2787
2788static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
2789 unsigned long maxj)
2790{
2791}
2792
2793static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp)
2794{
2795}
2796
2797#endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */