aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul E. McKenney <paul.mckenney@linaro.org>2012-12-28 14:30:36 -0500
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2013-03-26 11:04:51 -0400
commitc0f4dfd4f90f1667d234d21f15153ea09a2eaa66 (patch)
treee06fa6eef015a373849855249752ec525ca8ad4b
parentb11cc5760a9c48c870ad286e8a6d8fdb998fa58d (diff)
rcu: Make RCU_FAST_NO_HZ take advantage of numbered callbacks
Because RCU callbacks are now associated with the number of the grace period that they must wait for, CPUs can now take advance callbacks corresponding to grace periods that ended while a given CPU was in dyntick-idle mode. This eliminates the need to try forcing the RCU state machine while entering idle, thus reducing the CPU intensiveness of RCU_FAST_NO_HZ, which should increase its energy efficiency. Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-rw-r--r--Documentation/kernel-parameters.txt28
-rw-r--r--include/linux/rcupdate.h1
-rw-r--r--init/Kconfig17
-rw-r--r--kernel/rcutree.c28
-rw-r--r--kernel/rcutree.h12
-rw-r--r--kernel/rcutree_plugin.h374
-rw-r--r--kernel/rcutree_trace.c2
7 files changed, 149 insertions, 313 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index a17ba16c8fc8..22303b2e74bc 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2490,6 +2490,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
2490 leaf rcu_node structure. Useful for very large 2490 leaf rcu_node structure. Useful for very large
2491 systems. 2491 systems.
2492 2492
2493 rcutree.jiffies_till_first_fqs= [KNL,BOOT]
2494 Set delay from grace-period initialization to
2495 first attempt to force quiescent states.
2496 Units are jiffies, minimum value is zero,
2497 and maximum value is HZ.
2498
2499 rcutree.jiffies_till_next_fqs= [KNL,BOOT]
2500 Set delay between subsequent attempts to force
2501 quiescent states. Units are jiffies, minimum
2502 value is one, and maximum value is HZ.
2503
2493 rcutree.qhimark= [KNL,BOOT] 2504 rcutree.qhimark= [KNL,BOOT]
2494 Set threshold of queued 2505 Set threshold of queued
2495 RCU callbacks over which batch limiting is disabled. 2506 RCU callbacks over which batch limiting is disabled.
@@ -2504,16 +2515,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
2504 rcutree.rcu_cpu_stall_timeout= [KNL,BOOT] 2515 rcutree.rcu_cpu_stall_timeout= [KNL,BOOT]
2505 Set timeout for RCU CPU stall warning messages. 2516 Set timeout for RCU CPU stall warning messages.
2506 2517
2507 rcutree.jiffies_till_first_fqs= [KNL,BOOT] 2518 rcutree.rcu_idle_gp_delay= [KNL,BOOT]
2508 Set delay from grace-period initialization to 2519 Set wakeup interval for idle CPUs that have
2509 first attempt to force quiescent states. 2520 RCU callbacks (RCU_FAST_NO_HZ=y).
2510 Units are jiffies, minimum value is zero,
2511 and maximum value is HZ.
2512 2521
2513 rcutree.jiffies_till_next_fqs= [KNL,BOOT] 2522 rcutree.rcu_idle_lazy_gp_delay= [KNL,BOOT]
2514 Set delay between subsequent attempts to force 2523 Set wakeup interval for idle CPUs that have
2515 quiescent states. Units are jiffies, minimum 2524 only "lazy" RCU callbacks (RCU_FAST_NO_HZ=y).
2516 value is one, and maximum value is HZ. 2525 Lazy RCU callbacks are those which RCU can
2526 prove do nothing more than free memory.
2517 2527
2518 rcutorture.fqs_duration= [KNL,BOOT] 2528 rcutorture.fqs_duration= [KNL,BOOT]
2519 Set duration of force_quiescent_state bursts. 2529 Set duration of force_quiescent_state bursts.
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index b758ce17b309..9ed2c9a4de45 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -80,6 +80,7 @@ extern void do_trace_rcu_torture_read(char *rcutorturename,
80#define UINT_CMP_LT(a, b) (UINT_MAX / 2 < (a) - (b)) 80#define UINT_CMP_LT(a, b) (UINT_MAX / 2 < (a) - (b))
81#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) 81#define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b))
82#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) 82#define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b))
83#define ulong2long(a) (*(long *)(&(a)))
83 84
84/* Exported common interfaces */ 85/* Exported common interfaces */
85 86
diff --git a/init/Kconfig b/init/Kconfig
index 717584064a7e..a3a2304fa6d2 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -582,13 +582,16 @@ config RCU_FAST_NO_HZ
582 depends on NO_HZ && SMP 582 depends on NO_HZ && SMP
583 default n 583 default n
584 help 584 help
585 This option causes RCU to attempt to accelerate grace periods in 585 This option permits CPUs to enter dynticks-idle state even if
586 order to allow CPUs to enter dynticks-idle state more quickly. 586 they have RCU callbacks queued, and prevents RCU from waking
587 On the other hand, this option increases the overhead of the 587 these CPUs up more than roughly once every four jiffies (by
588 dynticks-idle checking, thus degrading scheduling latency. 588 default, you can adjust this using the rcutree.rcu_idle_gp_delay
589 589 parameter), thus improving energy efficiency. On the other
590 Say Y if energy efficiency is critically important, and you don't 590 hand, this option increases the duration of RCU grace periods,
591 care about real-time response. 591 for example, slowing down synchronize_rcu().
592
593 Say Y if energy efficiency is critically important, and you
594 don't care about increased grace-period durations.
592 595
593 Say N if you are unsure. 596 Say N if you are unsure.
594 597
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 2015bce749f9..7b1d7769872a 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -2640,19 +2640,27 @@ static int rcu_pending(int cpu)
2640} 2640}
2641 2641
2642/* 2642/*
2643 * Check to see if any future RCU-related work will need to be done 2643 * Return true if the specified CPU has any callback. If all_lazy is
2644 * by the current CPU, even if none need be done immediately, returning 2644 * non-NULL, store an indication of whether all callbacks are lazy.
2645 * 1 if so. 2645 * (If there are no callbacks, all of them are deemed to be lazy.)
2646 */ 2646 */
2647static int rcu_cpu_has_callbacks(int cpu) 2647static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy)
2648{ 2648{
2649 bool al = true;
2650 bool hc = false;
2651 struct rcu_data *rdp;
2649 struct rcu_state *rsp; 2652 struct rcu_state *rsp;
2650 2653
2651 /* RCU callbacks either ready or pending? */ 2654 for_each_rcu_flavor(rsp) {
2652 for_each_rcu_flavor(rsp) 2655 rdp = per_cpu_ptr(rsp->rda, cpu);
2653 if (per_cpu_ptr(rsp->rda, cpu)->nxtlist) 2656 if (rdp->qlen != rdp->qlen_lazy)
2654 return 1; 2657 al = false;
2655 return 0; 2658 if (rdp->nxtlist)
2659 hc = true;
2660 }
2661 if (all_lazy)
2662 *all_lazy = al;
2663 return hc;
2656} 2664}
2657 2665
2658/* 2666/*
@@ -2871,7 +2879,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
2871 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; 2879 rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE;
2872 atomic_set(&rdp->dynticks->dynticks, 2880 atomic_set(&rdp->dynticks->dynticks,
2873 (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); 2881 (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1);
2874 rcu_prepare_for_idle_init(cpu);
2875 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ 2882 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
2876 2883
2877 /* Add CPU to rcu_node bitmasks. */ 2884 /* Add CPU to rcu_node bitmasks. */
@@ -2945,7 +2952,6 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,
2945 */ 2952 */
2946 for_each_rcu_flavor(rsp) 2953 for_each_rcu_flavor(rsp)
2947 rcu_cleanup_dying_cpu(rsp); 2954 rcu_cleanup_dying_cpu(rsp);
2948 rcu_cleanup_after_idle(cpu);
2949 break; 2955 break;
2950 case CPU_DEAD: 2956 case CPU_DEAD:
2951 case CPU_DEAD_FROZEN: 2957 case CPU_DEAD_FROZEN:
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index b6c2335efbdf..96a27f922e92 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -88,18 +88,13 @@ struct rcu_dynticks {
88 int dynticks_nmi_nesting; /* Track NMI nesting level. */ 88 int dynticks_nmi_nesting; /* Track NMI nesting level. */
89 atomic_t dynticks; /* Even value for idle, else odd. */ 89 atomic_t dynticks; /* Even value for idle, else odd. */
90#ifdef CONFIG_RCU_FAST_NO_HZ 90#ifdef CONFIG_RCU_FAST_NO_HZ
91 int dyntick_drain; /* Prepare-for-idle state variable. */ 91 bool all_lazy; /* Are all CPU's CBs lazy? */
92 unsigned long dyntick_holdoff;
93 /* No retries for the jiffy of failure. */
94 struct timer_list idle_gp_timer;
95 /* Wake up CPU sleeping with callbacks. */
96 unsigned long idle_gp_timer_expires;
97 /* When to wake up CPU (for repost). */
98 bool idle_first_pass; /* First pass of attempt to go idle? */
99 unsigned long nonlazy_posted; 92 unsigned long nonlazy_posted;
100 /* # times non-lazy CBs posted to CPU. */ 93 /* # times non-lazy CBs posted to CPU. */
101 unsigned long nonlazy_posted_snap; 94 unsigned long nonlazy_posted_snap;
102 /* idle-period nonlazy_posted snapshot. */ 95 /* idle-period nonlazy_posted snapshot. */
96 unsigned long last_accelerate;
97 /* Last jiffy CBs were accelerated. */
103 int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ 98 int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */
104#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ 99#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
105}; 100};
@@ -521,7 +516,6 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
521 struct rcu_node *rnp); 516 struct rcu_node *rnp);
522#endif /* #ifdef CONFIG_RCU_BOOST */ 517#endif /* #ifdef CONFIG_RCU_BOOST */
523static void __cpuinit rcu_prepare_kthreads(int cpu); 518static void __cpuinit rcu_prepare_kthreads(int cpu);
524static void rcu_prepare_for_idle_init(int cpu);
525static void rcu_cleanup_after_idle(int cpu); 519static void rcu_cleanup_after_idle(int cpu);
526static void rcu_prepare_for_idle(int cpu); 520static void rcu_prepare_for_idle(int cpu);
527static void rcu_idle_count_callbacks_posted(void); 521static void rcu_idle_count_callbacks_posted(void);
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index 28185ad18df3..d318f9f18be5 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -1543,14 +1543,7 @@ static void __cpuinit rcu_prepare_kthreads(int cpu)
1543int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) 1543int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
1544{ 1544{
1545 *delta_jiffies = ULONG_MAX; 1545 *delta_jiffies = ULONG_MAX;
1546 return rcu_cpu_has_callbacks(cpu); 1546 return rcu_cpu_has_callbacks(cpu, NULL);
1547}
1548
1549/*
1550 * Because we do not have RCU_FAST_NO_HZ, don't bother initializing for it.
1551 */
1552static void rcu_prepare_for_idle_init(int cpu)
1553{
1554} 1547}
1555 1548
1556/* 1549/*
@@ -1587,16 +1580,6 @@ static void rcu_idle_count_callbacks_posted(void)
1587 * 1580 *
1588 * The following three proprocessor symbols control this state machine: 1581 * The following three proprocessor symbols control this state machine:
1589 * 1582 *
1590 * RCU_IDLE_FLUSHES gives the maximum number of times that we will attempt
1591 * to satisfy RCU. Beyond this point, it is better to incur a periodic
1592 * scheduling-clock interrupt than to loop through the state machine
1593 * at full power.
1594 * RCU_IDLE_OPT_FLUSHES gives the number of RCU_IDLE_FLUSHES that are
1595 * optional if RCU does not need anything immediately from this
1596 * CPU, even if this CPU still has RCU callbacks queued. The first
1597 * times through the state machine are mandatory: we need to give
1598 * the state machine a chance to communicate a quiescent state
1599 * to the RCU core.
1600 * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted 1583 * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted
1601 * to sleep in dyntick-idle mode with RCU callbacks pending. This 1584 * to sleep in dyntick-idle mode with RCU callbacks pending. This
1602 * is sized to be roughly one RCU grace period. Those energy-efficiency 1585 * is sized to be roughly one RCU grace period. Those energy-efficiency
@@ -1612,15 +1595,9 @@ static void rcu_idle_count_callbacks_posted(void)
1612 * adjustment, they can be converted into kernel config parameters, though 1595 * adjustment, they can be converted into kernel config parameters, though
1613 * making the state machine smarter might be a better option. 1596 * making the state machine smarter might be a better option.
1614 */ 1597 */
1615#define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */
1616#define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */
1617#define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */ 1598#define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */
1618#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ 1599#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */
1619 1600
1620static int rcu_idle_flushes = RCU_IDLE_FLUSHES;
1621module_param(rcu_idle_flushes, int, 0644);
1622static int rcu_idle_opt_flushes = RCU_IDLE_OPT_FLUSHES;
1623module_param(rcu_idle_opt_flushes, int, 0644);
1624static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY; 1601static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY;
1625module_param(rcu_idle_gp_delay, int, 0644); 1602module_param(rcu_idle_gp_delay, int, 0644);
1626static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY; 1603static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY;
@@ -1629,178 +1606,97 @@ module_param(rcu_idle_lazy_gp_delay, int, 0644);
1629extern int tick_nohz_enabled; 1606extern int tick_nohz_enabled;
1630 1607
1631/* 1608/*
1632 * Does the specified flavor of RCU have non-lazy callbacks pending on 1609 * Try to advance callbacks for all flavors of RCU on the current CPU.
1633 * the specified CPU? Both RCU flavor and CPU are specified by the 1610 * Afterwards, if there are any callbacks ready for immediate invocation,
1634 * rcu_data structure. 1611 * return true.
1635 */
1636static bool __rcu_cpu_has_nonlazy_callbacks(struct rcu_data *rdp)
1637{
1638 return rdp->qlen != rdp->qlen_lazy;
1639}
1640
1641#ifdef CONFIG_TREE_PREEMPT_RCU
1642
1643/*
1644 * Are there non-lazy RCU-preempt callbacks? (There cannot be if there
1645 * is no RCU-preempt in the kernel.)
1646 */ 1612 */
1647static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) 1613static bool rcu_try_advance_all_cbs(void)
1648{ 1614{
1649 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); 1615 bool cbs_ready = false;
1650 1616 struct rcu_data *rdp;
1651 return __rcu_cpu_has_nonlazy_callbacks(rdp); 1617 struct rcu_node *rnp;
1652} 1618 struct rcu_state *rsp;
1653
1654#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
1655 1619
1656static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) 1620 for_each_rcu_flavor(rsp) {
1657{ 1621 rdp = this_cpu_ptr(rsp->rda);
1658 return 0; 1622 rnp = rdp->mynode;
1659}
1660 1623
1661#endif /* else #ifdef CONFIG_TREE_PREEMPT_RCU */ 1624 /*
1625 * Don't bother checking unless a grace period has
1626 * completed since we last checked and there are
1627 * callbacks not yet ready to invoke.
1628 */
1629 if (rdp->completed != rnp->completed &&
1630 rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL])
1631 rcu_process_gp_end(rsp, rdp);
1662 1632
1663/* 1633 if (cpu_has_callbacks_ready_to_invoke(rdp))
1664 * Does any flavor of RCU have non-lazy callbacks on the specified CPU? 1634 cbs_ready = true;
1665 */ 1635 }
1666static bool rcu_cpu_has_nonlazy_callbacks(int cpu) 1636 return cbs_ready;
1667{
1668 return __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_sched_data, cpu)) ||
1669 __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_bh_data, cpu)) ||
1670 rcu_preempt_cpu_has_nonlazy_callbacks(cpu);
1671} 1637}
1672 1638
1673/* 1639/*
1674 * Allow the CPU to enter dyntick-idle mode if either: (1) There are no 1640 * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready
1675 * callbacks on this CPU, (2) this CPU has not yet attempted to enter 1641 * to invoke. If the CPU has callbacks, try to advance them. Tell the
1676 * dyntick-idle mode, or (3) this CPU is in the process of attempting to 1642 * caller to set the timeout based on whether or not there are non-lazy
1677 * enter dyntick-idle mode. Otherwise, if we have recently tried and failed 1643 * callbacks.
1678 * to enter dyntick-idle mode, we refuse to try to enter it. After all,
1679 * it is better to incur scheduling-clock interrupts than to spin
1680 * continuously for the same time duration!
1681 * 1644 *
1682 * The delta_jiffies argument is used to store the time when RCU is 1645 * The caller must have disabled interrupts.
1683 * going to need the CPU again if it still has callbacks. The reason
1684 * for this is that rcu_prepare_for_idle() might need to post a timer,
1685 * but if so, it will do so after tick_nohz_stop_sched_tick() has set
1686 * the wakeup time for this CPU. This means that RCU's timer can be
1687 * delayed until the wakeup time, which defeats the purpose of posting
1688 * a timer.
1689 */ 1646 */
1690int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) 1647int rcu_needs_cpu(int cpu, unsigned long *dj)
1691{ 1648{
1692 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 1649 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
1693 1650
1694 /* Flag a new idle sojourn to the idle-entry state machine. */ 1651 /* Snapshot to detect later posting of non-lazy callback. */
1695 rdtp->idle_first_pass = 1; 1652 rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
1653
1696 /* If no callbacks, RCU doesn't need the CPU. */ 1654 /* If no callbacks, RCU doesn't need the CPU. */
1697 if (!rcu_cpu_has_callbacks(cpu)) { 1655 if (!rcu_cpu_has_callbacks(cpu, &rdtp->all_lazy)) {
1698 *delta_jiffies = ULONG_MAX; 1656 *dj = ULONG_MAX;
1699 return 0; 1657 return 0;
1700 } 1658 }
1701 if (rdtp->dyntick_holdoff == jiffies) { 1659
1702 /* RCU recently tried and failed, so don't try again. */ 1660 /* Attempt to advance callbacks. */
1703 *delta_jiffies = 1; 1661 if (rcu_try_advance_all_cbs()) {
1662 /* Some ready to invoke, so initiate later invocation. */
1663 invoke_rcu_core();
1704 return 1; 1664 return 1;
1705 } 1665 }
1706 /* Set up for the possibility that RCU will post a timer. */ 1666 rdtp->last_accelerate = jiffies;
1707 if (rcu_cpu_has_nonlazy_callbacks(cpu)) { 1667
1708 *delta_jiffies = round_up(rcu_idle_gp_delay + jiffies, 1668 /* Request timer delay depending on laziness, and round. */
1709 rcu_idle_gp_delay) - jiffies; 1669 if (rdtp->all_lazy) {
1670 *dj = round_up(rcu_idle_gp_delay + jiffies,
1671 rcu_idle_gp_delay) - jiffies;
1710 } else { 1672 } else {
1711 *delta_jiffies = jiffies + rcu_idle_lazy_gp_delay; 1673 *dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies;
1712 *delta_jiffies = round_jiffies(*delta_jiffies) - jiffies;
1713 } 1674 }
1714 return 0; 1675 return 0;
1715} 1676}
1716 1677
1717/* 1678/*
1718 * Handler for smp_call_function_single(). The only point of this 1679 * Prepare a CPU for idle from an RCU perspective. The first major task
1719 * handler is to wake the CPU up, so the handler does only tracing. 1680 * is to sense whether nohz mode has been enabled or disabled via sysfs.
1720 */ 1681 * The second major task is to check to see if a non-lazy callback has
1721void rcu_idle_demigrate(void *unused) 1682 * arrived at a CPU that previously had only lazy callbacks. The third
1722{ 1683 * major task is to accelerate (that is, assign grace-period numbers to)
1723 trace_rcu_prep_idle("Demigrate"); 1684 * any recently arrived callbacks.
1724}
1725
1726/*
1727 * Timer handler used to force CPU to start pushing its remaining RCU
1728 * callbacks in the case where it entered dyntick-idle mode with callbacks
1729 * pending. The hander doesn't really need to do anything because the
1730 * real work is done upon re-entry to idle, or by the next scheduling-clock
1731 * interrupt should idle not be re-entered.
1732 *
1733 * One special case: the timer gets migrated without awakening the CPU
1734 * on which the timer was scheduled on. In this case, we must wake up
1735 * that CPU. We do so with smp_call_function_single().
1736 */
1737static void rcu_idle_gp_timer_func(unsigned long cpu_in)
1738{
1739 int cpu = (int)cpu_in;
1740
1741 trace_rcu_prep_idle("Timer");
1742 if (cpu != smp_processor_id())
1743 smp_call_function_single(cpu, rcu_idle_demigrate, NULL, 0);
1744 else
1745 WARN_ON_ONCE(1); /* Getting here can hang the system... */
1746}
1747
1748/*
1749 * Initialize the timer used to pull CPUs out of dyntick-idle mode.
1750 */
1751static void rcu_prepare_for_idle_init(int cpu)
1752{
1753 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
1754
1755 rdtp->dyntick_holdoff = jiffies - 1;
1756 setup_timer(&rdtp->idle_gp_timer, rcu_idle_gp_timer_func, cpu);
1757 rdtp->idle_gp_timer_expires = jiffies - 1;
1758 rdtp->idle_first_pass = 1;
1759}
1760
1761/*
1762 * Clean up for exit from idle. Because we are exiting from idle, there
1763 * is no longer any point to ->idle_gp_timer, so cancel it. This will
1764 * do nothing if this timer is not active, so just cancel it unconditionally.
1765 */
1766static void rcu_cleanup_after_idle(int cpu)
1767{
1768 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
1769
1770 del_timer(&rdtp->idle_gp_timer);
1771 trace_rcu_prep_idle("Cleanup after idle");
1772 rdtp->tick_nohz_enabled_snap = ACCESS_ONCE(tick_nohz_enabled);
1773}
1774
1775/*
1776 * Check to see if any RCU-related work can be done by the current CPU,
1777 * and if so, schedule a softirq to get it done. This function is part
1778 * of the RCU implementation; it is -not- an exported member of the RCU API.
1779 *
1780 * The idea is for the current CPU to clear out all work required by the
1781 * RCU core for the current grace period, so that this CPU can be permitted
1782 * to enter dyntick-idle mode. In some cases, it will need to be awakened
1783 * at the end of the grace period by whatever CPU ends the grace period.
1784 * This allows CPUs to go dyntick-idle more quickly, and to reduce the
1785 * number of wakeups by a modest integer factor.
1786 *
1787 * Because it is not legal to invoke rcu_process_callbacks() with irqs
1788 * disabled, we do one pass of force_quiescent_state(), then do a
1789 * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked
1790 * later. The ->dyntick_drain field controls the sequencing.
1791 * 1685 *
1792 * The caller must have disabled interrupts. 1686 * The caller must have disabled interrupts.
1793 */ 1687 */
1794static void rcu_prepare_for_idle(int cpu) 1688static void rcu_prepare_for_idle(int cpu)
1795{ 1689{
1796 struct timer_list *tp; 1690 struct rcu_data *rdp;
1797 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 1691 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
1692 struct rcu_node *rnp;
1693 struct rcu_state *rsp;
1798 int tne; 1694 int tne;
1799 1695
1800 /* Handle nohz enablement switches conservatively. */ 1696 /* Handle nohz enablement switches conservatively. */
1801 tne = ACCESS_ONCE(tick_nohz_enabled); 1697 tne = ACCESS_ONCE(tick_nohz_enabled);
1802 if (tne != rdtp->tick_nohz_enabled_snap) { 1698 if (tne != rdtp->tick_nohz_enabled_snap) {
1803 if (rcu_cpu_has_callbacks(cpu)) 1699 if (rcu_cpu_has_callbacks(cpu, NULL))
1804 invoke_rcu_core(); /* force nohz to see update. */ 1700 invoke_rcu_core(); /* force nohz to see update. */
1805 rdtp->tick_nohz_enabled_snap = tne; 1701 rdtp->tick_nohz_enabled_snap = tne;
1806 return; 1702 return;
@@ -1808,125 +1704,56 @@ static void rcu_prepare_for_idle(int cpu)
1808 if (!tne) 1704 if (!tne)
1809 return; 1705 return;
1810 1706
1811 /* Adaptive-tick mode, where usermode execution is idle to RCU. */ 1707 /* If this is a no-CBs CPU, no callbacks, just return. */
1812 if (!is_idle_task(current)) { 1708 if (is_nocb_cpu(cpu))
1813 rdtp->dyntick_holdoff = jiffies - 1;
1814 if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
1815 trace_rcu_prep_idle("User dyntick with callbacks");
1816 rdtp->idle_gp_timer_expires =
1817 round_up(jiffies + rcu_idle_gp_delay,
1818 rcu_idle_gp_delay);
1819 } else if (rcu_cpu_has_callbacks(cpu)) {
1820 rdtp->idle_gp_timer_expires =
1821 round_jiffies(jiffies + rcu_idle_lazy_gp_delay);
1822 trace_rcu_prep_idle("User dyntick with lazy callbacks");
1823 } else {
1824 return;
1825 }
1826 tp = &rdtp->idle_gp_timer;
1827 mod_timer_pinned(tp, rdtp->idle_gp_timer_expires);
1828 return; 1709 return;
1829 }
1830 1710
1831 /* 1711 /*
1832 * If this is an idle re-entry, for example, due to use of 1712 * If a non-lazy callback arrived at a CPU having only lazy
1833 * RCU_NONIDLE() or the new idle-loop tracing API within the idle 1713 * callbacks, invoke RCU core for the side-effect of recalculating
1834 * loop, then don't take any state-machine actions, unless the 1714 * idle duration on re-entry to idle.
1835 * momentary exit from idle queued additional non-lazy callbacks.
1836 * Instead, repost the ->idle_gp_timer if this CPU has callbacks
1837 * pending.
1838 */ 1715 */
1839 if (!rdtp->idle_first_pass && 1716 if (rdtp->all_lazy &&
1840 (rdtp->nonlazy_posted == rdtp->nonlazy_posted_snap)) { 1717 rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) {
1841 if (rcu_cpu_has_callbacks(cpu)) { 1718 invoke_rcu_core();
1842 tp = &rdtp->idle_gp_timer;
1843 mod_timer_pinned(tp, rdtp->idle_gp_timer_expires);
1844 }
1845 return; 1719 return;
1846 } 1720 }
1847 rdtp->idle_first_pass = 0;
1848 rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted - 1;
1849 1721
1850 /* 1722 /*
1851 * If there are no callbacks on this CPU, enter dyntick-idle mode. 1723 * If we have not yet accelerated this jiffy, accelerate all
1852 * Also reset state to avoid prejudicing later attempts. 1724 * callbacks on this CPU.
1853 */ 1725 */
1854 if (!rcu_cpu_has_callbacks(cpu)) { 1726 if (rdtp->last_accelerate == jiffies)
1855 rdtp->dyntick_holdoff = jiffies - 1;
1856 rdtp->dyntick_drain = 0;
1857 trace_rcu_prep_idle("No callbacks");
1858 return; 1727 return;
1728 rdtp->last_accelerate = jiffies;
1729 for_each_rcu_flavor(rsp) {
1730 rdp = per_cpu_ptr(rsp->rda, cpu);
1731 if (!*rdp->nxttail[RCU_DONE_TAIL])
1732 continue;
1733 rnp = rdp->mynode;
1734 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
1735 rcu_accelerate_cbs(rsp, rnp, rdp);
1736 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1859 } 1737 }
1738}
1860 1739
1861 /* 1740/*
1862 * If in holdoff mode, just return. We will presumably have 1741 * Clean up for exit from idle. Attempt to advance callbacks based on
1863 * refrained from disabling the scheduling-clock tick. 1742 * any grace periods that elapsed while the CPU was idle, and if any
1864 */ 1743 * callbacks are now ready to invoke, initiate invocation.
1865 if (rdtp->dyntick_holdoff == jiffies) { 1744 */
1866 trace_rcu_prep_idle("In holdoff"); 1745static void rcu_cleanup_after_idle(int cpu)
1867 return; 1746{
1868 } 1747 struct rcu_data *rdp;
1748 struct rcu_state *rsp;
1869 1749
1870 /* Check and update the ->dyntick_drain sequencing. */ 1750 if (is_nocb_cpu(cpu))
1871 if (rdtp->dyntick_drain <= 0) {
1872 /* First time through, initialize the counter. */
1873 rdtp->dyntick_drain = rcu_idle_flushes;
1874 } else if (rdtp->dyntick_drain <= rcu_idle_opt_flushes &&
1875 !rcu_pending(cpu) &&
1876 !local_softirq_pending()) {
1877 /* Can we go dyntick-idle despite still having callbacks? */
1878 rdtp->dyntick_drain = 0;
1879 rdtp->dyntick_holdoff = jiffies;
1880 if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
1881 trace_rcu_prep_idle("Dyntick with callbacks");
1882 rdtp->idle_gp_timer_expires =
1883 round_up(jiffies + rcu_idle_gp_delay,
1884 rcu_idle_gp_delay);
1885 } else {
1886 rdtp->idle_gp_timer_expires =
1887 round_jiffies(jiffies + rcu_idle_lazy_gp_delay);
1888 trace_rcu_prep_idle("Dyntick with lazy callbacks");
1889 }
1890 tp = &rdtp->idle_gp_timer;
1891 mod_timer_pinned(tp, rdtp->idle_gp_timer_expires);
1892 rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
1893 return; /* Nothing more to do immediately. */
1894 } else if (--(rdtp->dyntick_drain) <= 0) {
1895 /* We have hit the limit, so time to give up. */
1896 rdtp->dyntick_holdoff = jiffies;
1897 trace_rcu_prep_idle("Begin holdoff");
1898 invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */
1899 return; 1751 return;
1900 } 1752 rcu_try_advance_all_cbs();
1901 1753 for_each_rcu_flavor(rsp) {
1902 /* 1754 rdp = per_cpu_ptr(rsp->rda, cpu);
1903 * Do one step of pushing the remaining RCU callbacks through 1755 if (cpu_has_callbacks_ready_to_invoke(rdp))
1904 * the RCU core state machine. 1756 invoke_rcu_core();
1905 */
1906#ifdef CONFIG_TREE_PREEMPT_RCU
1907 if (per_cpu(rcu_preempt_data, cpu).nxtlist) {
1908 rcu_preempt_qs(cpu);
1909 force_quiescent_state(&rcu_preempt_state);
1910 }
1911#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
1912 if (per_cpu(rcu_sched_data, cpu).nxtlist) {
1913 rcu_sched_qs(cpu);
1914 force_quiescent_state(&rcu_sched_state);
1915 }
1916 if (per_cpu(rcu_bh_data, cpu).nxtlist) {
1917 rcu_bh_qs(cpu);
1918 force_quiescent_state(&rcu_bh_state);
1919 }
1920
1921 /*
1922 * If RCU callbacks are still pending, RCU still needs this CPU.
1923 * So try forcing the callbacks through the grace period.
1924 */
1925 if (rcu_cpu_has_callbacks(cpu)) {
1926 trace_rcu_prep_idle("More callbacks");
1927 invoke_rcu_core();
1928 } else {
1929 trace_rcu_prep_idle("Callbacks drained");
1930 } 1757 }
1931} 1758}
1932 1759
@@ -2034,16 +1861,13 @@ early_initcall(rcu_register_oom_notifier);
2034static void print_cpu_stall_fast_no_hz(char *cp, int cpu) 1861static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
2035{ 1862{
2036 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 1863 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
2037 struct timer_list *tltp = &rdtp->idle_gp_timer; 1864 unsigned long nlpd = rdtp->nonlazy_posted - rdtp->nonlazy_posted_snap;
2038 char c;
2039 1865
2040 c = rdtp->dyntick_holdoff == jiffies ? 'H' : '.'; 1866 sprintf(cp, "last_accelerate: %04lx/%04lx, nonlazy_posted: %ld, %c%c",
2041 if (timer_pending(tltp)) 1867 rdtp->last_accelerate & 0xffff, jiffies & 0xffff,
2042 sprintf(cp, "drain=%d %c timer=%lu", 1868 ulong2long(nlpd),
2043 rdtp->dyntick_drain, c, tltp->expires - jiffies); 1869 rdtp->all_lazy ? 'L' : '.',
2044 else 1870 rdtp->tick_nohz_enabled_snap ? '.' : 'D');
2045 sprintf(cp, "drain=%d %c timer not pending",
2046 rdtp->dyntick_drain, c);
2047} 1871}
2048 1872
2049#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ 1873#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 0d095dcaa670..49099e81c87b 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -46,8 +46,6 @@
46#define RCU_TREE_NONCORE 46#define RCU_TREE_NONCORE
47#include "rcutree.h" 47#include "rcutree.h"
48 48
49#define ulong2long(a) (*(long *)(&(a)))
50
51static int r_open(struct inode *inode, struct file *file, 49static int r_open(struct inode *inode, struct file *file,
52 const struct seq_operations *op) 50 const struct seq_operations *op)
53{ 51{