diff options
author | Paul E. McKenney <paul.mckenney@linaro.org> | 2012-12-28 14:30:36 -0500 |
---|---|---|
committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2013-03-26 11:04:51 -0400 |
commit | c0f4dfd4f90f1667d234d21f15153ea09a2eaa66 (patch) | |
tree | e06fa6eef015a373849855249752ec525ca8ad4b | |
parent | b11cc5760a9c48c870ad286e8a6d8fdb998fa58d (diff) |
rcu: Make RCU_FAST_NO_HZ take advantage of numbered callbacks
Because RCU callbacks are now associated with the number of the grace
period that they must wait for, CPUs can now take advance callbacks
corresponding to grace periods that ended while a given CPU was in
dyntick-idle mode. This eliminates the need to try forcing the RCU
state machine while entering idle, thus reducing the CPU intensiveness
of RCU_FAST_NO_HZ, which should increase its energy efficiency.
Signed-off-by: Paul E. McKenney <paul.mckenney@linaro.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
-rw-r--r-- | Documentation/kernel-parameters.txt | 28 | ||||
-rw-r--r-- | include/linux/rcupdate.h | 1 | ||||
-rw-r--r-- | init/Kconfig | 17 | ||||
-rw-r--r-- | kernel/rcutree.c | 28 | ||||
-rw-r--r-- | kernel/rcutree.h | 12 | ||||
-rw-r--r-- | kernel/rcutree_plugin.h | 374 | ||||
-rw-r--r-- | kernel/rcutree_trace.c | 2 |
7 files changed, 149 insertions, 313 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index a17ba16c8fc8..22303b2e74bc 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt | |||
@@ -2490,6 +2490,17 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
2490 | leaf rcu_node structure. Useful for very large | 2490 | leaf rcu_node structure. Useful for very large |
2491 | systems. | 2491 | systems. |
2492 | 2492 | ||
2493 | rcutree.jiffies_till_first_fqs= [KNL,BOOT] | ||
2494 | Set delay from grace-period initialization to | ||
2495 | first attempt to force quiescent states. | ||
2496 | Units are jiffies, minimum value is zero, | ||
2497 | and maximum value is HZ. | ||
2498 | |||
2499 | rcutree.jiffies_till_next_fqs= [KNL,BOOT] | ||
2500 | Set delay between subsequent attempts to force | ||
2501 | quiescent states. Units are jiffies, minimum | ||
2502 | value is one, and maximum value is HZ. | ||
2503 | |||
2493 | rcutree.qhimark= [KNL,BOOT] | 2504 | rcutree.qhimark= [KNL,BOOT] |
2494 | Set threshold of queued | 2505 | Set threshold of queued |
2495 | RCU callbacks over which batch limiting is disabled. | 2506 | RCU callbacks over which batch limiting is disabled. |
@@ -2504,16 +2515,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted. | |||
2504 | rcutree.rcu_cpu_stall_timeout= [KNL,BOOT] | 2515 | rcutree.rcu_cpu_stall_timeout= [KNL,BOOT] |
2505 | Set timeout for RCU CPU stall warning messages. | 2516 | Set timeout for RCU CPU stall warning messages. |
2506 | 2517 | ||
2507 | rcutree.jiffies_till_first_fqs= [KNL,BOOT] | 2518 | rcutree.rcu_idle_gp_delay= [KNL,BOOT] |
2508 | Set delay from grace-period initialization to | 2519 | Set wakeup interval for idle CPUs that have |
2509 | first attempt to force quiescent states. | 2520 | RCU callbacks (RCU_FAST_NO_HZ=y). |
2510 | Units are jiffies, minimum value is zero, | ||
2511 | and maximum value is HZ. | ||
2512 | 2521 | ||
2513 | rcutree.jiffies_till_next_fqs= [KNL,BOOT] | 2522 | rcutree.rcu_idle_lazy_gp_delay= [KNL,BOOT] |
2514 | Set delay between subsequent attempts to force | 2523 | Set wakeup interval for idle CPUs that have |
2515 | quiescent states. Units are jiffies, minimum | 2524 | only "lazy" RCU callbacks (RCU_FAST_NO_HZ=y). |
2516 | value is one, and maximum value is HZ. | 2525 | Lazy RCU callbacks are those which RCU can |
2526 | prove do nothing more than free memory. | ||
2517 | 2527 | ||
2518 | rcutorture.fqs_duration= [KNL,BOOT] | 2528 | rcutorture.fqs_duration= [KNL,BOOT] |
2519 | Set duration of force_quiescent_state bursts. | 2529 | Set duration of force_quiescent_state bursts. |
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index b758ce17b309..9ed2c9a4de45 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h | |||
@@ -80,6 +80,7 @@ extern void do_trace_rcu_torture_read(char *rcutorturename, | |||
80 | #define UINT_CMP_LT(a, b) (UINT_MAX / 2 < (a) - (b)) | 80 | #define UINT_CMP_LT(a, b) (UINT_MAX / 2 < (a) - (b)) |
81 | #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) | 81 | #define ULONG_CMP_GE(a, b) (ULONG_MAX / 2 >= (a) - (b)) |
82 | #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) | 82 | #define ULONG_CMP_LT(a, b) (ULONG_MAX / 2 < (a) - (b)) |
83 | #define ulong2long(a) (*(long *)(&(a))) | ||
83 | 84 | ||
84 | /* Exported common interfaces */ | 85 | /* Exported common interfaces */ |
85 | 86 | ||
diff --git a/init/Kconfig b/init/Kconfig index 717584064a7e..a3a2304fa6d2 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
@@ -582,13 +582,16 @@ config RCU_FAST_NO_HZ | |||
582 | depends on NO_HZ && SMP | 582 | depends on NO_HZ && SMP |
583 | default n | 583 | default n |
584 | help | 584 | help |
585 | This option causes RCU to attempt to accelerate grace periods in | 585 | This option permits CPUs to enter dynticks-idle state even if |
586 | order to allow CPUs to enter dynticks-idle state more quickly. | 586 | they have RCU callbacks queued, and prevents RCU from waking |
587 | On the other hand, this option increases the overhead of the | 587 | these CPUs up more than roughly once every four jiffies (by |
588 | dynticks-idle checking, thus degrading scheduling latency. | 588 | default, you can adjust this using the rcutree.rcu_idle_gp_delay |
589 | 589 | parameter), thus improving energy efficiency. On the other | |
590 | Say Y if energy efficiency is critically important, and you don't | 590 | hand, this option increases the duration of RCU grace periods, |
591 | care about real-time response. | 591 | for example, slowing down synchronize_rcu(). |
592 | |||
593 | Say Y if energy efficiency is critically important, and you | ||
594 | don't care about increased grace-period durations. | ||
592 | 595 | ||
593 | Say N if you are unsure. | 596 | Say N if you are unsure. |
594 | 597 | ||
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 2015bce749f9..7b1d7769872a 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -2640,19 +2640,27 @@ static int rcu_pending(int cpu) | |||
2640 | } | 2640 | } |
2641 | 2641 | ||
2642 | /* | 2642 | /* |
2643 | * Check to see if any future RCU-related work will need to be done | 2643 | * Return true if the specified CPU has any callback. If all_lazy is |
2644 | * by the current CPU, even if none need be done immediately, returning | 2644 | * non-NULL, store an indication of whether all callbacks are lazy. |
2645 | * 1 if so. | 2645 | * (If there are no callbacks, all of them are deemed to be lazy.) |
2646 | */ | 2646 | */ |
2647 | static int rcu_cpu_has_callbacks(int cpu) | 2647 | static int rcu_cpu_has_callbacks(int cpu, bool *all_lazy) |
2648 | { | 2648 | { |
2649 | bool al = true; | ||
2650 | bool hc = false; | ||
2651 | struct rcu_data *rdp; | ||
2649 | struct rcu_state *rsp; | 2652 | struct rcu_state *rsp; |
2650 | 2653 | ||
2651 | /* RCU callbacks either ready or pending? */ | 2654 | for_each_rcu_flavor(rsp) { |
2652 | for_each_rcu_flavor(rsp) | 2655 | rdp = per_cpu_ptr(rsp->rda, cpu); |
2653 | if (per_cpu_ptr(rsp->rda, cpu)->nxtlist) | 2656 | if (rdp->qlen != rdp->qlen_lazy) |
2654 | return 1; | 2657 | al = false; |
2655 | return 0; | 2658 | if (rdp->nxtlist) |
2659 | hc = true; | ||
2660 | } | ||
2661 | if (all_lazy) | ||
2662 | *all_lazy = al; | ||
2663 | return hc; | ||
2656 | } | 2664 | } |
2657 | 2665 | ||
2658 | /* | 2666 | /* |
@@ -2871,7 +2879,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) | |||
2871 | rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; | 2879 | rdp->dynticks->dynticks_nesting = DYNTICK_TASK_EXIT_IDLE; |
2872 | atomic_set(&rdp->dynticks->dynticks, | 2880 | atomic_set(&rdp->dynticks->dynticks, |
2873 | (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); | 2881 | (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); |
2874 | rcu_prepare_for_idle_init(cpu); | ||
2875 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 2882 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
2876 | 2883 | ||
2877 | /* Add CPU to rcu_node bitmasks. */ | 2884 | /* Add CPU to rcu_node bitmasks. */ |
@@ -2945,7 +2952,6 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, | |||
2945 | */ | 2952 | */ |
2946 | for_each_rcu_flavor(rsp) | 2953 | for_each_rcu_flavor(rsp) |
2947 | rcu_cleanup_dying_cpu(rsp); | 2954 | rcu_cleanup_dying_cpu(rsp); |
2948 | rcu_cleanup_after_idle(cpu); | ||
2949 | break; | 2955 | break; |
2950 | case CPU_DEAD: | 2956 | case CPU_DEAD: |
2951 | case CPU_DEAD_FROZEN: | 2957 | case CPU_DEAD_FROZEN: |
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index b6c2335efbdf..96a27f922e92 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
@@ -88,18 +88,13 @@ struct rcu_dynticks { | |||
88 | int dynticks_nmi_nesting; /* Track NMI nesting level. */ | 88 | int dynticks_nmi_nesting; /* Track NMI nesting level. */ |
89 | atomic_t dynticks; /* Even value for idle, else odd. */ | 89 | atomic_t dynticks; /* Even value for idle, else odd. */ |
90 | #ifdef CONFIG_RCU_FAST_NO_HZ | 90 | #ifdef CONFIG_RCU_FAST_NO_HZ |
91 | int dyntick_drain; /* Prepare-for-idle state variable. */ | 91 | bool all_lazy; /* Are all CPU's CBs lazy? */ |
92 | unsigned long dyntick_holdoff; | ||
93 | /* No retries for the jiffy of failure. */ | ||
94 | struct timer_list idle_gp_timer; | ||
95 | /* Wake up CPU sleeping with callbacks. */ | ||
96 | unsigned long idle_gp_timer_expires; | ||
97 | /* When to wake up CPU (for repost). */ | ||
98 | bool idle_first_pass; /* First pass of attempt to go idle? */ | ||
99 | unsigned long nonlazy_posted; | 92 | unsigned long nonlazy_posted; |
100 | /* # times non-lazy CBs posted to CPU. */ | 93 | /* # times non-lazy CBs posted to CPU. */ |
101 | unsigned long nonlazy_posted_snap; | 94 | unsigned long nonlazy_posted_snap; |
102 | /* idle-period nonlazy_posted snapshot. */ | 95 | /* idle-period nonlazy_posted snapshot. */ |
96 | unsigned long last_accelerate; | ||
97 | /* Last jiffy CBs were accelerated. */ | ||
103 | int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ | 98 | int tick_nohz_enabled_snap; /* Previously seen value from sysfs. */ |
104 | #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ | 99 | #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */ |
105 | }; | 100 | }; |
@@ -521,7 +516,6 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, | |||
521 | struct rcu_node *rnp); | 516 | struct rcu_node *rnp); |
522 | #endif /* #ifdef CONFIG_RCU_BOOST */ | 517 | #endif /* #ifdef CONFIG_RCU_BOOST */ |
523 | static void __cpuinit rcu_prepare_kthreads(int cpu); | 518 | static void __cpuinit rcu_prepare_kthreads(int cpu); |
524 | static void rcu_prepare_for_idle_init(int cpu); | ||
525 | static void rcu_cleanup_after_idle(int cpu); | 519 | static void rcu_cleanup_after_idle(int cpu); |
526 | static void rcu_prepare_for_idle(int cpu); | 520 | static void rcu_prepare_for_idle(int cpu); |
527 | static void rcu_idle_count_callbacks_posted(void); | 521 | static void rcu_idle_count_callbacks_posted(void); |
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index 28185ad18df3..d318f9f18be5 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h | |||
@@ -1543,14 +1543,7 @@ static void __cpuinit rcu_prepare_kthreads(int cpu) | |||
1543 | int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) | 1543 | int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) |
1544 | { | 1544 | { |
1545 | *delta_jiffies = ULONG_MAX; | 1545 | *delta_jiffies = ULONG_MAX; |
1546 | return rcu_cpu_has_callbacks(cpu); | 1546 | return rcu_cpu_has_callbacks(cpu, NULL); |
1547 | } | ||
1548 | |||
1549 | /* | ||
1550 | * Because we do not have RCU_FAST_NO_HZ, don't bother initializing for it. | ||
1551 | */ | ||
1552 | static void rcu_prepare_for_idle_init(int cpu) | ||
1553 | { | ||
1554 | } | 1547 | } |
1555 | 1548 | ||
1556 | /* | 1549 | /* |
@@ -1587,16 +1580,6 @@ static void rcu_idle_count_callbacks_posted(void) | |||
1587 | * | 1580 | * |
1588 | * The following three proprocessor symbols control this state machine: | 1581 | * The following three proprocessor symbols control this state machine: |
1589 | * | 1582 | * |
1590 | * RCU_IDLE_FLUSHES gives the maximum number of times that we will attempt | ||
1591 | * to satisfy RCU. Beyond this point, it is better to incur a periodic | ||
1592 | * scheduling-clock interrupt than to loop through the state machine | ||
1593 | * at full power. | ||
1594 | * RCU_IDLE_OPT_FLUSHES gives the number of RCU_IDLE_FLUSHES that are | ||
1595 | * optional if RCU does not need anything immediately from this | ||
1596 | * CPU, even if this CPU still has RCU callbacks queued. The first | ||
1597 | * times through the state machine are mandatory: we need to give | ||
1598 | * the state machine a chance to communicate a quiescent state | ||
1599 | * to the RCU core. | ||
1600 | * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted | 1583 | * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted |
1601 | * to sleep in dyntick-idle mode with RCU callbacks pending. This | 1584 | * to sleep in dyntick-idle mode with RCU callbacks pending. This |
1602 | * is sized to be roughly one RCU grace period. Those energy-efficiency | 1585 | * is sized to be roughly one RCU grace period. Those energy-efficiency |
@@ -1612,15 +1595,9 @@ static void rcu_idle_count_callbacks_posted(void) | |||
1612 | * adjustment, they can be converted into kernel config parameters, though | 1595 | * adjustment, they can be converted into kernel config parameters, though |
1613 | * making the state machine smarter might be a better option. | 1596 | * making the state machine smarter might be a better option. |
1614 | */ | 1597 | */ |
1615 | #define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */ | ||
1616 | #define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */ | ||
1617 | #define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */ | 1598 | #define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */ |
1618 | #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ | 1599 | #define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ |
1619 | 1600 | ||
1620 | static int rcu_idle_flushes = RCU_IDLE_FLUSHES; | ||
1621 | module_param(rcu_idle_flushes, int, 0644); | ||
1622 | static int rcu_idle_opt_flushes = RCU_IDLE_OPT_FLUSHES; | ||
1623 | module_param(rcu_idle_opt_flushes, int, 0644); | ||
1624 | static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY; | 1601 | static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY; |
1625 | module_param(rcu_idle_gp_delay, int, 0644); | 1602 | module_param(rcu_idle_gp_delay, int, 0644); |
1626 | static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY; | 1603 | static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY; |
@@ -1629,178 +1606,97 @@ module_param(rcu_idle_lazy_gp_delay, int, 0644); | |||
1629 | extern int tick_nohz_enabled; | 1606 | extern int tick_nohz_enabled; |
1630 | 1607 | ||
1631 | /* | 1608 | /* |
1632 | * Does the specified flavor of RCU have non-lazy callbacks pending on | 1609 | * Try to advance callbacks for all flavors of RCU on the current CPU. |
1633 | * the specified CPU? Both RCU flavor and CPU are specified by the | 1610 | * Afterwards, if there are any callbacks ready for immediate invocation, |
1634 | * rcu_data structure. | 1611 | * return true. |
1635 | */ | ||
1636 | static bool __rcu_cpu_has_nonlazy_callbacks(struct rcu_data *rdp) | ||
1637 | { | ||
1638 | return rdp->qlen != rdp->qlen_lazy; | ||
1639 | } | ||
1640 | |||
1641 | #ifdef CONFIG_TREE_PREEMPT_RCU | ||
1642 | |||
1643 | /* | ||
1644 | * Are there non-lazy RCU-preempt callbacks? (There cannot be if there | ||
1645 | * is no RCU-preempt in the kernel.) | ||
1646 | */ | 1612 | */ |
1647 | static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) | 1613 | static bool rcu_try_advance_all_cbs(void) |
1648 | { | 1614 | { |
1649 | struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); | 1615 | bool cbs_ready = false; |
1650 | 1616 | struct rcu_data *rdp; | |
1651 | return __rcu_cpu_has_nonlazy_callbacks(rdp); | 1617 | struct rcu_node *rnp; |
1652 | } | 1618 | struct rcu_state *rsp; |
1653 | |||
1654 | #else /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | ||
1655 | 1619 | ||
1656 | static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) | 1620 | for_each_rcu_flavor(rsp) { |
1657 | { | 1621 | rdp = this_cpu_ptr(rsp->rda); |
1658 | return 0; | 1622 | rnp = rdp->mynode; |
1659 | } | ||
1660 | 1623 | ||
1661 | #endif /* else #ifdef CONFIG_TREE_PREEMPT_RCU */ | 1624 | /* |
1625 | * Don't bother checking unless a grace period has | ||
1626 | * completed since we last checked and there are | ||
1627 | * callbacks not yet ready to invoke. | ||
1628 | */ | ||
1629 | if (rdp->completed != rnp->completed && | ||
1630 | rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL]) | ||
1631 | rcu_process_gp_end(rsp, rdp); | ||
1662 | 1632 | ||
1663 | /* | 1633 | if (cpu_has_callbacks_ready_to_invoke(rdp)) |
1664 | * Does any flavor of RCU have non-lazy callbacks on the specified CPU? | 1634 | cbs_ready = true; |
1665 | */ | 1635 | } |
1666 | static bool rcu_cpu_has_nonlazy_callbacks(int cpu) | 1636 | return cbs_ready; |
1667 | { | ||
1668 | return __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_sched_data, cpu)) || | ||
1669 | __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_bh_data, cpu)) || | ||
1670 | rcu_preempt_cpu_has_nonlazy_callbacks(cpu); | ||
1671 | } | 1637 | } |
1672 | 1638 | ||
1673 | /* | 1639 | /* |
1674 | * Allow the CPU to enter dyntick-idle mode if either: (1) There are no | 1640 | * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready |
1675 | * callbacks on this CPU, (2) this CPU has not yet attempted to enter | 1641 | * to invoke. If the CPU has callbacks, try to advance them. Tell the |
1676 | * dyntick-idle mode, or (3) this CPU is in the process of attempting to | 1642 | * caller to set the timeout based on whether or not there are non-lazy |
1677 | * enter dyntick-idle mode. Otherwise, if we have recently tried and failed | 1643 | * callbacks. |
1678 | * to enter dyntick-idle mode, we refuse to try to enter it. After all, | ||
1679 | * it is better to incur scheduling-clock interrupts than to spin | ||
1680 | * continuously for the same time duration! | ||
1681 | * | 1644 | * |
1682 | * The delta_jiffies argument is used to store the time when RCU is | 1645 | * The caller must have disabled interrupts. |
1683 | * going to need the CPU again if it still has callbacks. The reason | ||
1684 | * for this is that rcu_prepare_for_idle() might need to post a timer, | ||
1685 | * but if so, it will do so after tick_nohz_stop_sched_tick() has set | ||
1686 | * the wakeup time for this CPU. This means that RCU's timer can be | ||
1687 | * delayed until the wakeup time, which defeats the purpose of posting | ||
1688 | * a timer. | ||
1689 | */ | 1646 | */ |
1690 | int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) | 1647 | int rcu_needs_cpu(int cpu, unsigned long *dj) |
1691 | { | 1648 | { |
1692 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); | 1649 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); |
1693 | 1650 | ||
1694 | /* Flag a new idle sojourn to the idle-entry state machine. */ | 1651 | /* Snapshot to detect later posting of non-lazy callback. */ |
1695 | rdtp->idle_first_pass = 1; | 1652 | rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; |
1653 | |||
1696 | /* If no callbacks, RCU doesn't need the CPU. */ | 1654 | /* If no callbacks, RCU doesn't need the CPU. */ |
1697 | if (!rcu_cpu_has_callbacks(cpu)) { | 1655 | if (!rcu_cpu_has_callbacks(cpu, &rdtp->all_lazy)) { |
1698 | *delta_jiffies = ULONG_MAX; | 1656 | *dj = ULONG_MAX; |
1699 | return 0; | 1657 | return 0; |
1700 | } | 1658 | } |
1701 | if (rdtp->dyntick_holdoff == jiffies) { | 1659 | |
1702 | /* RCU recently tried and failed, so don't try again. */ | 1660 | /* Attempt to advance callbacks. */ |
1703 | *delta_jiffies = 1; | 1661 | if (rcu_try_advance_all_cbs()) { |
1662 | /* Some ready to invoke, so initiate later invocation. */ | ||
1663 | invoke_rcu_core(); | ||
1704 | return 1; | 1664 | return 1; |
1705 | } | 1665 | } |
1706 | /* Set up for the possibility that RCU will post a timer. */ | 1666 | rdtp->last_accelerate = jiffies; |
1707 | if (rcu_cpu_has_nonlazy_callbacks(cpu)) { | 1667 | |
1708 | *delta_jiffies = round_up(rcu_idle_gp_delay + jiffies, | 1668 | /* Request timer delay depending on laziness, and round. */ |
1709 | rcu_idle_gp_delay) - jiffies; | 1669 | if (rdtp->all_lazy) { |
1670 | *dj = round_up(rcu_idle_gp_delay + jiffies, | ||
1671 | rcu_idle_gp_delay) - jiffies; | ||
1710 | } else { | 1672 | } else { |
1711 | *delta_jiffies = jiffies + rcu_idle_lazy_gp_delay; | 1673 | *dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies; |
1712 | *delta_jiffies = round_jiffies(*delta_jiffies) - jiffies; | ||
1713 | } | 1674 | } |
1714 | return 0; | 1675 | return 0; |
1715 | } | 1676 | } |
1716 | 1677 | ||
1717 | /* | 1678 | /* |
1718 | * Handler for smp_call_function_single(). The only point of this | 1679 | * Prepare a CPU for idle from an RCU perspective. The first major task |
1719 | * handler is to wake the CPU up, so the handler does only tracing. | 1680 | * is to sense whether nohz mode has been enabled or disabled via sysfs. |
1720 | */ | 1681 | * The second major task is to check to see if a non-lazy callback has |
1721 | void rcu_idle_demigrate(void *unused) | 1682 | * arrived at a CPU that previously had only lazy callbacks. The third |
1722 | { | 1683 | * major task is to accelerate (that is, assign grace-period numbers to) |
1723 | trace_rcu_prep_idle("Demigrate"); | 1684 | * any recently arrived callbacks. |
1724 | } | ||
1725 | |||
1726 | /* | ||
1727 | * Timer handler used to force CPU to start pushing its remaining RCU | ||
1728 | * callbacks in the case where it entered dyntick-idle mode with callbacks | ||
1729 | * pending. The hander doesn't really need to do anything because the | ||
1730 | * real work is done upon re-entry to idle, or by the next scheduling-clock | ||
1731 | * interrupt should idle not be re-entered. | ||
1732 | * | ||
1733 | * One special case: the timer gets migrated without awakening the CPU | ||
1734 | * on which the timer was scheduled on. In this case, we must wake up | ||
1735 | * that CPU. We do so with smp_call_function_single(). | ||
1736 | */ | ||
1737 | static void rcu_idle_gp_timer_func(unsigned long cpu_in) | ||
1738 | { | ||
1739 | int cpu = (int)cpu_in; | ||
1740 | |||
1741 | trace_rcu_prep_idle("Timer"); | ||
1742 | if (cpu != smp_processor_id()) | ||
1743 | smp_call_function_single(cpu, rcu_idle_demigrate, NULL, 0); | ||
1744 | else | ||
1745 | WARN_ON_ONCE(1); /* Getting here can hang the system... */ | ||
1746 | } | ||
1747 | |||
1748 | /* | ||
1749 | * Initialize the timer used to pull CPUs out of dyntick-idle mode. | ||
1750 | */ | ||
1751 | static void rcu_prepare_for_idle_init(int cpu) | ||
1752 | { | ||
1753 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); | ||
1754 | |||
1755 | rdtp->dyntick_holdoff = jiffies - 1; | ||
1756 | setup_timer(&rdtp->idle_gp_timer, rcu_idle_gp_timer_func, cpu); | ||
1757 | rdtp->idle_gp_timer_expires = jiffies - 1; | ||
1758 | rdtp->idle_first_pass = 1; | ||
1759 | } | ||
1760 | |||
1761 | /* | ||
1762 | * Clean up for exit from idle. Because we are exiting from idle, there | ||
1763 | * is no longer any point to ->idle_gp_timer, so cancel it. This will | ||
1764 | * do nothing if this timer is not active, so just cancel it unconditionally. | ||
1765 | */ | ||
1766 | static void rcu_cleanup_after_idle(int cpu) | ||
1767 | { | ||
1768 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); | ||
1769 | |||
1770 | del_timer(&rdtp->idle_gp_timer); | ||
1771 | trace_rcu_prep_idle("Cleanup after idle"); | ||
1772 | rdtp->tick_nohz_enabled_snap = ACCESS_ONCE(tick_nohz_enabled); | ||
1773 | } | ||
1774 | |||
1775 | /* | ||
1776 | * Check to see if any RCU-related work can be done by the current CPU, | ||
1777 | * and if so, schedule a softirq to get it done. This function is part | ||
1778 | * of the RCU implementation; it is -not- an exported member of the RCU API. | ||
1779 | * | ||
1780 | * The idea is for the current CPU to clear out all work required by the | ||
1781 | * RCU core for the current grace period, so that this CPU can be permitted | ||
1782 | * to enter dyntick-idle mode. In some cases, it will need to be awakened | ||
1783 | * at the end of the grace period by whatever CPU ends the grace period. | ||
1784 | * This allows CPUs to go dyntick-idle more quickly, and to reduce the | ||
1785 | * number of wakeups by a modest integer factor. | ||
1786 | * | ||
1787 | * Because it is not legal to invoke rcu_process_callbacks() with irqs | ||
1788 | * disabled, we do one pass of force_quiescent_state(), then do a | ||
1789 | * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked | ||
1790 | * later. The ->dyntick_drain field controls the sequencing. | ||
1791 | * | 1685 | * |
1792 | * The caller must have disabled interrupts. | 1686 | * The caller must have disabled interrupts. |
1793 | */ | 1687 | */ |
1794 | static void rcu_prepare_for_idle(int cpu) | 1688 | static void rcu_prepare_for_idle(int cpu) |
1795 | { | 1689 | { |
1796 | struct timer_list *tp; | 1690 | struct rcu_data *rdp; |
1797 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); | 1691 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); |
1692 | struct rcu_node *rnp; | ||
1693 | struct rcu_state *rsp; | ||
1798 | int tne; | 1694 | int tne; |
1799 | 1695 | ||
1800 | /* Handle nohz enablement switches conservatively. */ | 1696 | /* Handle nohz enablement switches conservatively. */ |
1801 | tne = ACCESS_ONCE(tick_nohz_enabled); | 1697 | tne = ACCESS_ONCE(tick_nohz_enabled); |
1802 | if (tne != rdtp->tick_nohz_enabled_snap) { | 1698 | if (tne != rdtp->tick_nohz_enabled_snap) { |
1803 | if (rcu_cpu_has_callbacks(cpu)) | 1699 | if (rcu_cpu_has_callbacks(cpu, NULL)) |
1804 | invoke_rcu_core(); /* force nohz to see update. */ | 1700 | invoke_rcu_core(); /* force nohz to see update. */ |
1805 | rdtp->tick_nohz_enabled_snap = tne; | 1701 | rdtp->tick_nohz_enabled_snap = tne; |
1806 | return; | 1702 | return; |
@@ -1808,125 +1704,56 @@ static void rcu_prepare_for_idle(int cpu) | |||
1808 | if (!tne) | 1704 | if (!tne) |
1809 | return; | 1705 | return; |
1810 | 1706 | ||
1811 | /* Adaptive-tick mode, where usermode execution is idle to RCU. */ | 1707 | /* If this is a no-CBs CPU, no callbacks, just return. */ |
1812 | if (!is_idle_task(current)) { | 1708 | if (is_nocb_cpu(cpu)) |
1813 | rdtp->dyntick_holdoff = jiffies - 1; | ||
1814 | if (rcu_cpu_has_nonlazy_callbacks(cpu)) { | ||
1815 | trace_rcu_prep_idle("User dyntick with callbacks"); | ||
1816 | rdtp->idle_gp_timer_expires = | ||
1817 | round_up(jiffies + rcu_idle_gp_delay, | ||
1818 | rcu_idle_gp_delay); | ||
1819 | } else if (rcu_cpu_has_callbacks(cpu)) { | ||
1820 | rdtp->idle_gp_timer_expires = | ||
1821 | round_jiffies(jiffies + rcu_idle_lazy_gp_delay); | ||
1822 | trace_rcu_prep_idle("User dyntick with lazy callbacks"); | ||
1823 | } else { | ||
1824 | return; | ||
1825 | } | ||
1826 | tp = &rdtp->idle_gp_timer; | ||
1827 | mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); | ||
1828 | return; | 1709 | return; |
1829 | } | ||
1830 | 1710 | ||
1831 | /* | 1711 | /* |
1832 | * If this is an idle re-entry, for example, due to use of | 1712 | * If a non-lazy callback arrived at a CPU having only lazy |
1833 | * RCU_NONIDLE() or the new idle-loop tracing API within the idle | 1713 | * callbacks, invoke RCU core for the side-effect of recalculating |
1834 | * loop, then don't take any state-machine actions, unless the | 1714 | * idle duration on re-entry to idle. |
1835 | * momentary exit from idle queued additional non-lazy callbacks. | ||
1836 | * Instead, repost the ->idle_gp_timer if this CPU has callbacks | ||
1837 | * pending. | ||
1838 | */ | 1715 | */ |
1839 | if (!rdtp->idle_first_pass && | 1716 | if (rdtp->all_lazy && |
1840 | (rdtp->nonlazy_posted == rdtp->nonlazy_posted_snap)) { | 1717 | rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) { |
1841 | if (rcu_cpu_has_callbacks(cpu)) { | 1718 | invoke_rcu_core(); |
1842 | tp = &rdtp->idle_gp_timer; | ||
1843 | mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); | ||
1844 | } | ||
1845 | return; | 1719 | return; |
1846 | } | 1720 | } |
1847 | rdtp->idle_first_pass = 0; | ||
1848 | rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted - 1; | ||
1849 | 1721 | ||
1850 | /* | 1722 | /* |
1851 | * If there are no callbacks on this CPU, enter dyntick-idle mode. | 1723 | * If we have not yet accelerated this jiffy, accelerate all |
1852 | * Also reset state to avoid prejudicing later attempts. | 1724 | * callbacks on this CPU. |
1853 | */ | 1725 | */ |
1854 | if (!rcu_cpu_has_callbacks(cpu)) { | 1726 | if (rdtp->last_accelerate == jiffies) |
1855 | rdtp->dyntick_holdoff = jiffies - 1; | ||
1856 | rdtp->dyntick_drain = 0; | ||
1857 | trace_rcu_prep_idle("No callbacks"); | ||
1858 | return; | 1727 | return; |
1728 | rdtp->last_accelerate = jiffies; | ||
1729 | for_each_rcu_flavor(rsp) { | ||
1730 | rdp = per_cpu_ptr(rsp->rda, cpu); | ||
1731 | if (!*rdp->nxttail[RCU_DONE_TAIL]) | ||
1732 | continue; | ||
1733 | rnp = rdp->mynode; | ||
1734 | raw_spin_lock(&rnp->lock); /* irqs already disabled. */ | ||
1735 | rcu_accelerate_cbs(rsp, rnp, rdp); | ||
1736 | raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */ | ||
1859 | } | 1737 | } |
1738 | } | ||
1860 | 1739 | ||
1861 | /* | 1740 | /* |
1862 | * If in holdoff mode, just return. We will presumably have | 1741 | * Clean up for exit from idle. Attempt to advance callbacks based on |
1863 | * refrained from disabling the scheduling-clock tick. | 1742 | * any grace periods that elapsed while the CPU was idle, and if any |
1864 | */ | 1743 | * callbacks are now ready to invoke, initiate invocation. |
1865 | if (rdtp->dyntick_holdoff == jiffies) { | 1744 | */ |
1866 | trace_rcu_prep_idle("In holdoff"); | 1745 | static void rcu_cleanup_after_idle(int cpu) |
1867 | return; | 1746 | { |
1868 | } | 1747 | struct rcu_data *rdp; |
1748 | struct rcu_state *rsp; | ||
1869 | 1749 | ||
1870 | /* Check and update the ->dyntick_drain sequencing. */ | 1750 | if (is_nocb_cpu(cpu)) |
1871 | if (rdtp->dyntick_drain <= 0) { | ||
1872 | /* First time through, initialize the counter. */ | ||
1873 | rdtp->dyntick_drain = rcu_idle_flushes; | ||
1874 | } else if (rdtp->dyntick_drain <= rcu_idle_opt_flushes && | ||
1875 | !rcu_pending(cpu) && | ||
1876 | !local_softirq_pending()) { | ||
1877 | /* Can we go dyntick-idle despite still having callbacks? */ | ||
1878 | rdtp->dyntick_drain = 0; | ||
1879 | rdtp->dyntick_holdoff = jiffies; | ||
1880 | if (rcu_cpu_has_nonlazy_callbacks(cpu)) { | ||
1881 | trace_rcu_prep_idle("Dyntick with callbacks"); | ||
1882 | rdtp->idle_gp_timer_expires = | ||
1883 | round_up(jiffies + rcu_idle_gp_delay, | ||
1884 | rcu_idle_gp_delay); | ||
1885 | } else { | ||
1886 | rdtp->idle_gp_timer_expires = | ||
1887 | round_jiffies(jiffies + rcu_idle_lazy_gp_delay); | ||
1888 | trace_rcu_prep_idle("Dyntick with lazy callbacks"); | ||
1889 | } | ||
1890 | tp = &rdtp->idle_gp_timer; | ||
1891 | mod_timer_pinned(tp, rdtp->idle_gp_timer_expires); | ||
1892 | rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted; | ||
1893 | return; /* Nothing more to do immediately. */ | ||
1894 | } else if (--(rdtp->dyntick_drain) <= 0) { | ||
1895 | /* We have hit the limit, so time to give up. */ | ||
1896 | rdtp->dyntick_holdoff = jiffies; | ||
1897 | trace_rcu_prep_idle("Begin holdoff"); | ||
1898 | invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */ | ||
1899 | return; | 1751 | return; |
1900 | } | 1752 | rcu_try_advance_all_cbs(); |
1901 | 1753 | for_each_rcu_flavor(rsp) { | |
1902 | /* | 1754 | rdp = per_cpu_ptr(rsp->rda, cpu); |
1903 | * Do one step of pushing the remaining RCU callbacks through | 1755 | if (cpu_has_callbacks_ready_to_invoke(rdp)) |
1904 | * the RCU core state machine. | 1756 | invoke_rcu_core(); |
1905 | */ | ||
1906 | #ifdef CONFIG_TREE_PREEMPT_RCU | ||
1907 | if (per_cpu(rcu_preempt_data, cpu).nxtlist) { | ||
1908 | rcu_preempt_qs(cpu); | ||
1909 | force_quiescent_state(&rcu_preempt_state); | ||
1910 | } | ||
1911 | #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */ | ||
1912 | if (per_cpu(rcu_sched_data, cpu).nxtlist) { | ||
1913 | rcu_sched_qs(cpu); | ||
1914 | force_quiescent_state(&rcu_sched_state); | ||
1915 | } | ||
1916 | if (per_cpu(rcu_bh_data, cpu).nxtlist) { | ||
1917 | rcu_bh_qs(cpu); | ||
1918 | force_quiescent_state(&rcu_bh_state); | ||
1919 | } | ||
1920 | |||
1921 | /* | ||
1922 | * If RCU callbacks are still pending, RCU still needs this CPU. | ||
1923 | * So try forcing the callbacks through the grace period. | ||
1924 | */ | ||
1925 | if (rcu_cpu_has_callbacks(cpu)) { | ||
1926 | trace_rcu_prep_idle("More callbacks"); | ||
1927 | invoke_rcu_core(); | ||
1928 | } else { | ||
1929 | trace_rcu_prep_idle("Callbacks drained"); | ||
1930 | } | 1757 | } |
1931 | } | 1758 | } |
1932 | 1759 | ||
@@ -2034,16 +1861,13 @@ early_initcall(rcu_register_oom_notifier); | |||
2034 | static void print_cpu_stall_fast_no_hz(char *cp, int cpu) | 1861 | static void print_cpu_stall_fast_no_hz(char *cp, int cpu) |
2035 | { | 1862 | { |
2036 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); | 1863 | struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); |
2037 | struct timer_list *tltp = &rdtp->idle_gp_timer; | 1864 | unsigned long nlpd = rdtp->nonlazy_posted - rdtp->nonlazy_posted_snap; |
2038 | char c; | ||
2039 | 1865 | ||
2040 | c = rdtp->dyntick_holdoff == jiffies ? 'H' : '.'; | 1866 | sprintf(cp, "last_accelerate: %04lx/%04lx, nonlazy_posted: %ld, %c%c", |
2041 | if (timer_pending(tltp)) | 1867 | rdtp->last_accelerate & 0xffff, jiffies & 0xffff, |
2042 | sprintf(cp, "drain=%d %c timer=%lu", | 1868 | ulong2long(nlpd), |
2043 | rdtp->dyntick_drain, c, tltp->expires - jiffies); | 1869 | rdtp->all_lazy ? 'L' : '.', |
2044 | else | 1870 | rdtp->tick_nohz_enabled_snap ? '.' : 'D'); |
2045 | sprintf(cp, "drain=%d %c timer not pending", | ||
2046 | rdtp->dyntick_drain, c); | ||
2047 | } | 1871 | } |
2048 | 1872 | ||
2049 | #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ | 1873 | #else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ |
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c index 0d095dcaa670..49099e81c87b 100644 --- a/kernel/rcutree_trace.c +++ b/kernel/rcutree_trace.c | |||
@@ -46,8 +46,6 @@ | |||
46 | #define RCU_TREE_NONCORE | 46 | #define RCU_TREE_NONCORE |
47 | #include "rcutree.h" | 47 | #include "rcutree.h" |
48 | 48 | ||
49 | #define ulong2long(a) (*(long *)(&(a))) | ||
50 | |||
51 | static int r_open(struct inode *inode, struct file *file, | 49 | static int r_open(struct inode *inode, struct file *file, |
52 | const struct seq_operations *op) | 50 | const struct seq_operations *op) |
53 | { | 51 | { |