aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/rcutree_plugin.h
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/rcutree_plugin.h')
-rw-r--r--kernel/rcutree_plugin.h622
1 files changed, 235 insertions, 387 deletions
diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h
index c1cc7e17ff9d..170814dc418f 100644
--- a/kernel/rcutree_plugin.h
+++ b/kernel/rcutree_plugin.h
@@ -28,6 +28,7 @@
28#include <linux/gfp.h> 28#include <linux/gfp.h>
29#include <linux/oom.h> 29#include <linux/oom.h>
30#include <linux/smpboot.h> 30#include <linux/smpboot.h>
31#include <linux/tick.h>
31 32
32#define RCU_KTHREAD_PRIO 1 33#define RCU_KTHREAD_PRIO 1
33 34
@@ -85,11 +86,21 @@ static void __init rcu_bootup_announce_oddness(void)
85 if (nr_cpu_ids != NR_CPUS) 86 if (nr_cpu_ids != NR_CPUS)
86 printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids); 87 printk(KERN_INFO "\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
87#ifdef CONFIG_RCU_NOCB_CPU 88#ifdef CONFIG_RCU_NOCB_CPU
89#ifndef CONFIG_RCU_NOCB_CPU_NONE
90 if (!have_rcu_nocb_mask) {
91 alloc_bootmem_cpumask_var(&rcu_nocb_mask);
92 have_rcu_nocb_mask = true;
93 }
94#ifdef CONFIG_RCU_NOCB_CPU_ZERO
95 pr_info("\tExperimental no-CBs CPU 0\n");
96 cpumask_set_cpu(0, rcu_nocb_mask);
97#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */
98#ifdef CONFIG_RCU_NOCB_CPU_ALL
99 pr_info("\tExperimental no-CBs for all CPUs\n");
100 cpumask_setall(rcu_nocb_mask);
101#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */
102#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */
88 if (have_rcu_nocb_mask) { 103 if (have_rcu_nocb_mask) {
89 if (cpumask_test_cpu(0, rcu_nocb_mask)) {
90 cpumask_clear_cpu(0, rcu_nocb_mask);
91 pr_info("\tCPU 0: illegal no-CBs CPU (cleared).\n");
92 }
93 cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask); 104 cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);
94 pr_info("\tExperimental no-CBs CPUs: %s.\n", nocb_buf); 105 pr_info("\tExperimental no-CBs CPUs: %s.\n", nocb_buf);
95 if (rcu_nocb_poll) 106 if (rcu_nocb_poll)
@@ -101,7 +112,7 @@ static void __init rcu_bootup_announce_oddness(void)
101#ifdef CONFIG_TREE_PREEMPT_RCU 112#ifdef CONFIG_TREE_PREEMPT_RCU
102 113
103struct rcu_state rcu_preempt_state = 114struct rcu_state rcu_preempt_state =
104 RCU_STATE_INITIALIZER(rcu_preempt, call_rcu); 115 RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
105DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data); 116DEFINE_PER_CPU(struct rcu_data, rcu_preempt_data);
106static struct rcu_state *rcu_state = &rcu_preempt_state; 117static struct rcu_state *rcu_state = &rcu_preempt_state;
107 118
@@ -1533,14 +1544,7 @@ static void __cpuinit rcu_prepare_kthreads(int cpu)
1533int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) 1544int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
1534{ 1545{
1535 *delta_jiffies = ULONG_MAX; 1546 *delta_jiffies = ULONG_MAX;
1536 return rcu_cpu_has_callbacks(cpu); 1547 return rcu_cpu_has_callbacks(cpu, NULL);
1537}
1538
1539/*
1540 * Because we do not have RCU_FAST_NO_HZ, don't bother initializing for it.
1541 */
1542static void rcu_prepare_for_idle_init(int cpu)
1543{
1544} 1548}
1545 1549
1546/* 1550/*
@@ -1577,16 +1581,6 @@ static void rcu_idle_count_callbacks_posted(void)
1577 * 1581 *
1578 * The following three proprocessor symbols control this state machine: 1582 * The following three proprocessor symbols control this state machine:
1579 * 1583 *
1580 * RCU_IDLE_FLUSHES gives the maximum number of times that we will attempt
1581 * to satisfy RCU. Beyond this point, it is better to incur a periodic
1582 * scheduling-clock interrupt than to loop through the state machine
1583 * at full power.
1584 * RCU_IDLE_OPT_FLUSHES gives the number of RCU_IDLE_FLUSHES that are
1585 * optional if RCU does not need anything immediately from this
1586 * CPU, even if this CPU still has RCU callbacks queued. The first
1587 * times through the state machine are mandatory: we need to give
1588 * the state machine a chance to communicate a quiescent state
1589 * to the RCU core.
1590 * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted 1584 * RCU_IDLE_GP_DELAY gives the number of jiffies that a CPU is permitted
1591 * to sleep in dyntick-idle mode with RCU callbacks pending. This 1585 * to sleep in dyntick-idle mode with RCU callbacks pending. This
1592 * is sized to be roughly one RCU grace period. Those energy-efficiency 1586 * is sized to be roughly one RCU grace period. Those energy-efficiency
@@ -1602,186 +1596,108 @@ static void rcu_idle_count_callbacks_posted(void)
1602 * adjustment, they can be converted into kernel config parameters, though 1596 * adjustment, they can be converted into kernel config parameters, though
1603 * making the state machine smarter might be a better option. 1597 * making the state machine smarter might be a better option.
1604 */ 1598 */
1605#define RCU_IDLE_FLUSHES 5 /* Number of dyntick-idle tries. */
1606#define RCU_IDLE_OPT_FLUSHES 3 /* Optional dyntick-idle tries. */
1607#define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */ 1599#define RCU_IDLE_GP_DELAY 4 /* Roughly one grace period. */
1608#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */ 1600#define RCU_IDLE_LAZY_GP_DELAY (6 * HZ) /* Roughly six seconds. */
1609 1601
1610extern int tick_nohz_enabled; 1602static int rcu_idle_gp_delay = RCU_IDLE_GP_DELAY;
1611 1603module_param(rcu_idle_gp_delay, int, 0644);
1612/* 1604static int rcu_idle_lazy_gp_delay = RCU_IDLE_LAZY_GP_DELAY;
1613 * Does the specified flavor of RCU have non-lazy callbacks pending on 1605module_param(rcu_idle_lazy_gp_delay, int, 0644);
1614 * the specified CPU? Both RCU flavor and CPU are specified by the
1615 * rcu_data structure.
1616 */
1617static bool __rcu_cpu_has_nonlazy_callbacks(struct rcu_data *rdp)
1618{
1619 return rdp->qlen != rdp->qlen_lazy;
1620}
1621 1606
1622#ifdef CONFIG_TREE_PREEMPT_RCU 1607extern int tick_nohz_enabled;
1623 1608
1624/* 1609/*
1625 * Are there non-lazy RCU-preempt callbacks? (There cannot be if there 1610 * Try to advance callbacks for all flavors of RCU on the current CPU.
1626 * is no RCU-preempt in the kernel.) 1611 * Afterwards, if there are any callbacks ready for immediate invocation,
1612 * return true.
1627 */ 1613 */
1628static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) 1614static bool rcu_try_advance_all_cbs(void)
1629{ 1615{
1630 struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu); 1616 bool cbs_ready = false;
1631 1617 struct rcu_data *rdp;
1632 return __rcu_cpu_has_nonlazy_callbacks(rdp); 1618 struct rcu_node *rnp;
1633} 1619 struct rcu_state *rsp;
1634
1635#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
1636 1620
1637static bool rcu_preempt_cpu_has_nonlazy_callbacks(int cpu) 1621 for_each_rcu_flavor(rsp) {
1638{ 1622 rdp = this_cpu_ptr(rsp->rda);
1639 return 0; 1623 rnp = rdp->mynode;
1640}
1641 1624
1642#endif /* else #ifdef CONFIG_TREE_PREEMPT_RCU */ 1625 /*
1626 * Don't bother checking unless a grace period has
1627 * completed since we last checked and there are
1628 * callbacks not yet ready to invoke.
1629 */
1630 if (rdp->completed != rnp->completed &&
1631 rdp->nxttail[RCU_DONE_TAIL] != rdp->nxttail[RCU_NEXT_TAIL])
1632 rcu_process_gp_end(rsp, rdp);
1643 1633
1644/* 1634 if (cpu_has_callbacks_ready_to_invoke(rdp))
1645 * Does any flavor of RCU have non-lazy callbacks on the specified CPU? 1635 cbs_ready = true;
1646 */ 1636 }
1647static bool rcu_cpu_has_nonlazy_callbacks(int cpu) 1637 return cbs_ready;
1648{
1649 return __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_sched_data, cpu)) ||
1650 __rcu_cpu_has_nonlazy_callbacks(&per_cpu(rcu_bh_data, cpu)) ||
1651 rcu_preempt_cpu_has_nonlazy_callbacks(cpu);
1652} 1638}
1653 1639
1654/* 1640/*
1655 * Allow the CPU to enter dyntick-idle mode if either: (1) There are no 1641 * Allow the CPU to enter dyntick-idle mode unless it has callbacks ready
1656 * callbacks on this CPU, (2) this CPU has not yet attempted to enter 1642 * to invoke. If the CPU has callbacks, try to advance them. Tell the
1657 * dyntick-idle mode, or (3) this CPU is in the process of attempting to 1643 * caller to set the timeout based on whether or not there are non-lazy
1658 * enter dyntick-idle mode. Otherwise, if we have recently tried and failed 1644 * callbacks.
1659 * to enter dyntick-idle mode, we refuse to try to enter it. After all,
1660 * it is better to incur scheduling-clock interrupts than to spin
1661 * continuously for the same time duration!
1662 * 1645 *
1663 * The delta_jiffies argument is used to store the time when RCU is 1646 * The caller must have disabled interrupts.
1664 * going to need the CPU again if it still has callbacks. The reason
1665 * for this is that rcu_prepare_for_idle() might need to post a timer,
1666 * but if so, it will do so after tick_nohz_stop_sched_tick() has set
1667 * the wakeup time for this CPU. This means that RCU's timer can be
1668 * delayed until the wakeup time, which defeats the purpose of posting
1669 * a timer.
1670 */ 1647 */
1671int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies) 1648int rcu_needs_cpu(int cpu, unsigned long *dj)
1672{ 1649{
1673 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 1650 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
1674 1651
1675 /* Flag a new idle sojourn to the idle-entry state machine. */ 1652 /* Snapshot to detect later posting of non-lazy callback. */
1676 rdtp->idle_first_pass = 1; 1653 rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
1654
1677 /* If no callbacks, RCU doesn't need the CPU. */ 1655 /* If no callbacks, RCU doesn't need the CPU. */
1678 if (!rcu_cpu_has_callbacks(cpu)) { 1656 if (!rcu_cpu_has_callbacks(cpu, &rdtp->all_lazy)) {
1679 *delta_jiffies = ULONG_MAX; 1657 *dj = ULONG_MAX;
1680 return 0; 1658 return 0;
1681 } 1659 }
1682 if (rdtp->dyntick_holdoff == jiffies) { 1660
1683 /* RCU recently tried and failed, so don't try again. */ 1661 /* Attempt to advance callbacks. */
1684 *delta_jiffies = 1; 1662 if (rcu_try_advance_all_cbs()) {
1663 /* Some ready to invoke, so initiate later invocation. */
1664 invoke_rcu_core();
1685 return 1; 1665 return 1;
1686 } 1666 }
1687 /* Set up for the possibility that RCU will post a timer. */ 1667 rdtp->last_accelerate = jiffies;
1688 if (rcu_cpu_has_nonlazy_callbacks(cpu)) { 1668
1689 *delta_jiffies = round_up(RCU_IDLE_GP_DELAY + jiffies, 1669 /* Request timer delay depending on laziness, and round. */
1690 RCU_IDLE_GP_DELAY) - jiffies; 1670 if (rdtp->all_lazy) {
1671 *dj = round_up(rcu_idle_gp_delay + jiffies,
1672 rcu_idle_gp_delay) - jiffies;
1691 } else { 1673 } else {
1692 *delta_jiffies = jiffies + RCU_IDLE_LAZY_GP_DELAY; 1674 *dj = round_jiffies(rcu_idle_lazy_gp_delay + jiffies) - jiffies;
1693 *delta_jiffies = round_jiffies(*delta_jiffies) - jiffies;
1694 } 1675 }
1695 return 0; 1676 return 0;
1696} 1677}
1697 1678
1698/* 1679/*
1699 * Handler for smp_call_function_single(). The only point of this 1680 * Prepare a CPU for idle from an RCU perspective. The first major task
1700 * handler is to wake the CPU up, so the handler does only tracing. 1681 * is to sense whether nohz mode has been enabled or disabled via sysfs.
1701 */ 1682 * The second major task is to check to see if a non-lazy callback has
1702void rcu_idle_demigrate(void *unused) 1683 * arrived at a CPU that previously had only lazy callbacks. The third
1703{ 1684 * major task is to accelerate (that is, assign grace-period numbers to)
1704 trace_rcu_prep_idle("Demigrate"); 1685 * any recently arrived callbacks.
1705}
1706
1707/*
1708 * Timer handler used to force CPU to start pushing its remaining RCU
1709 * callbacks in the case where it entered dyntick-idle mode with callbacks
1710 * pending. The hander doesn't really need to do anything because the
1711 * real work is done upon re-entry to idle, or by the next scheduling-clock
1712 * interrupt should idle not be re-entered.
1713 *
1714 * One special case: the timer gets migrated without awakening the CPU
1715 * on which the timer was scheduled on. In this case, we must wake up
1716 * that CPU. We do so with smp_call_function_single().
1717 */
1718static void rcu_idle_gp_timer_func(unsigned long cpu_in)
1719{
1720 int cpu = (int)cpu_in;
1721
1722 trace_rcu_prep_idle("Timer");
1723 if (cpu != smp_processor_id())
1724 smp_call_function_single(cpu, rcu_idle_demigrate, NULL, 0);
1725 else
1726 WARN_ON_ONCE(1); /* Getting here can hang the system... */
1727}
1728
1729/*
1730 * Initialize the timer used to pull CPUs out of dyntick-idle mode.
1731 */
1732static void rcu_prepare_for_idle_init(int cpu)
1733{
1734 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
1735
1736 rdtp->dyntick_holdoff = jiffies - 1;
1737 setup_timer(&rdtp->idle_gp_timer, rcu_idle_gp_timer_func, cpu);
1738 rdtp->idle_gp_timer_expires = jiffies - 1;
1739 rdtp->idle_first_pass = 1;
1740}
1741
1742/*
1743 * Clean up for exit from idle. Because we are exiting from idle, there
1744 * is no longer any point to ->idle_gp_timer, so cancel it. This will
1745 * do nothing if this timer is not active, so just cancel it unconditionally.
1746 */
1747static void rcu_cleanup_after_idle(int cpu)
1748{
1749 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
1750
1751 del_timer(&rdtp->idle_gp_timer);
1752 trace_rcu_prep_idle("Cleanup after idle");
1753 rdtp->tick_nohz_enabled_snap = ACCESS_ONCE(tick_nohz_enabled);
1754}
1755
1756/*
1757 * Check to see if any RCU-related work can be done by the current CPU,
1758 * and if so, schedule a softirq to get it done. This function is part
1759 * of the RCU implementation; it is -not- an exported member of the RCU API.
1760 *
1761 * The idea is for the current CPU to clear out all work required by the
1762 * RCU core for the current grace period, so that this CPU can be permitted
1763 * to enter dyntick-idle mode. In some cases, it will need to be awakened
1764 * at the end of the grace period by whatever CPU ends the grace period.
1765 * This allows CPUs to go dyntick-idle more quickly, and to reduce the
1766 * number of wakeups by a modest integer factor.
1767 *
1768 * Because it is not legal to invoke rcu_process_callbacks() with irqs
1769 * disabled, we do one pass of force_quiescent_state(), then do a
1770 * invoke_rcu_core() to cause rcu_process_callbacks() to be invoked
1771 * later. The ->dyntick_drain field controls the sequencing.
1772 * 1686 *
1773 * The caller must have disabled interrupts. 1687 * The caller must have disabled interrupts.
1774 */ 1688 */
1775static void rcu_prepare_for_idle(int cpu) 1689static void rcu_prepare_for_idle(int cpu)
1776{ 1690{
1777 struct timer_list *tp; 1691 struct rcu_data *rdp;
1778 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 1692 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
1693 struct rcu_node *rnp;
1694 struct rcu_state *rsp;
1779 int tne; 1695 int tne;
1780 1696
1781 /* Handle nohz enablement switches conservatively. */ 1697 /* Handle nohz enablement switches conservatively. */
1782 tne = ACCESS_ONCE(tick_nohz_enabled); 1698 tne = ACCESS_ONCE(tick_nohz_enabled);
1783 if (tne != rdtp->tick_nohz_enabled_snap) { 1699 if (tne != rdtp->tick_nohz_enabled_snap) {
1784 if (rcu_cpu_has_callbacks(cpu)) 1700 if (rcu_cpu_has_callbacks(cpu, NULL))
1785 invoke_rcu_core(); /* force nohz to see update. */ 1701 invoke_rcu_core(); /* force nohz to see update. */
1786 rdtp->tick_nohz_enabled_snap = tne; 1702 rdtp->tick_nohz_enabled_snap = tne;
1787 return; 1703 return;
@@ -1789,125 +1705,56 @@ static void rcu_prepare_for_idle(int cpu)
1789 if (!tne) 1705 if (!tne)
1790 return; 1706 return;
1791 1707
1792 /* Adaptive-tick mode, where usermode execution is idle to RCU. */ 1708 /* If this is a no-CBs CPU, no callbacks, just return. */
1793 if (!is_idle_task(current)) { 1709 if (rcu_is_nocb_cpu(cpu))
1794 rdtp->dyntick_holdoff = jiffies - 1;
1795 if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
1796 trace_rcu_prep_idle("User dyntick with callbacks");
1797 rdtp->idle_gp_timer_expires =
1798 round_up(jiffies + RCU_IDLE_GP_DELAY,
1799 RCU_IDLE_GP_DELAY);
1800 } else if (rcu_cpu_has_callbacks(cpu)) {
1801 rdtp->idle_gp_timer_expires =
1802 round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY);
1803 trace_rcu_prep_idle("User dyntick with lazy callbacks");
1804 } else {
1805 return;
1806 }
1807 tp = &rdtp->idle_gp_timer;
1808 mod_timer_pinned(tp, rdtp->idle_gp_timer_expires);
1809 return; 1710 return;
1810 }
1811 1711
1812 /* 1712 /*
1813 * If this is an idle re-entry, for example, due to use of 1713 * If a non-lazy callback arrived at a CPU having only lazy
1814 * RCU_NONIDLE() or the new idle-loop tracing API within the idle 1714 * callbacks, invoke RCU core for the side-effect of recalculating
1815 * loop, then don't take any state-machine actions, unless the 1715 * idle duration on re-entry to idle.
1816 * momentary exit from idle queued additional non-lazy callbacks.
1817 * Instead, repost the ->idle_gp_timer if this CPU has callbacks
1818 * pending.
1819 */ 1716 */
1820 if (!rdtp->idle_first_pass && 1717 if (rdtp->all_lazy &&
1821 (rdtp->nonlazy_posted == rdtp->nonlazy_posted_snap)) { 1718 rdtp->nonlazy_posted != rdtp->nonlazy_posted_snap) {
1822 if (rcu_cpu_has_callbacks(cpu)) { 1719 invoke_rcu_core();
1823 tp = &rdtp->idle_gp_timer;
1824 mod_timer_pinned(tp, rdtp->idle_gp_timer_expires);
1825 }
1826 return; 1720 return;
1827 } 1721 }
1828 rdtp->idle_first_pass = 0;
1829 rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted - 1;
1830 1722
1831 /* 1723 /*
1832 * If there are no callbacks on this CPU, enter dyntick-idle mode. 1724 * If we have not yet accelerated this jiffy, accelerate all
1833 * Also reset state to avoid prejudicing later attempts. 1725 * callbacks on this CPU.
1834 */ 1726 */
1835 if (!rcu_cpu_has_callbacks(cpu)) { 1727 if (rdtp->last_accelerate == jiffies)
1836 rdtp->dyntick_holdoff = jiffies - 1;
1837 rdtp->dyntick_drain = 0;
1838 trace_rcu_prep_idle("No callbacks");
1839 return; 1728 return;
1729 rdtp->last_accelerate = jiffies;
1730 for_each_rcu_flavor(rsp) {
1731 rdp = per_cpu_ptr(rsp->rda, cpu);
1732 if (!*rdp->nxttail[RCU_DONE_TAIL])
1733 continue;
1734 rnp = rdp->mynode;
1735 raw_spin_lock(&rnp->lock); /* irqs already disabled. */
1736 rcu_accelerate_cbs(rsp, rnp, rdp);
1737 raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
1840 } 1738 }
1739}
1841 1740
1842 /* 1741/*
1843 * If in holdoff mode, just return. We will presumably have 1742 * Clean up for exit from idle. Attempt to advance callbacks based on
1844 * refrained from disabling the scheduling-clock tick. 1743 * any grace periods that elapsed while the CPU was idle, and if any
1845 */ 1744 * callbacks are now ready to invoke, initiate invocation.
1846 if (rdtp->dyntick_holdoff == jiffies) { 1745 */
1847 trace_rcu_prep_idle("In holdoff"); 1746static void rcu_cleanup_after_idle(int cpu)
1848 return; 1747{
1849 } 1748 struct rcu_data *rdp;
1749 struct rcu_state *rsp;
1850 1750
1851 /* Check and update the ->dyntick_drain sequencing. */ 1751 if (rcu_is_nocb_cpu(cpu))
1852 if (rdtp->dyntick_drain <= 0) {
1853 /* First time through, initialize the counter. */
1854 rdtp->dyntick_drain = RCU_IDLE_FLUSHES;
1855 } else if (rdtp->dyntick_drain <= RCU_IDLE_OPT_FLUSHES &&
1856 !rcu_pending(cpu) &&
1857 !local_softirq_pending()) {
1858 /* Can we go dyntick-idle despite still having callbacks? */
1859 rdtp->dyntick_drain = 0;
1860 rdtp->dyntick_holdoff = jiffies;
1861 if (rcu_cpu_has_nonlazy_callbacks(cpu)) {
1862 trace_rcu_prep_idle("Dyntick with callbacks");
1863 rdtp->idle_gp_timer_expires =
1864 round_up(jiffies + RCU_IDLE_GP_DELAY,
1865 RCU_IDLE_GP_DELAY);
1866 } else {
1867 rdtp->idle_gp_timer_expires =
1868 round_jiffies(jiffies + RCU_IDLE_LAZY_GP_DELAY);
1869 trace_rcu_prep_idle("Dyntick with lazy callbacks");
1870 }
1871 tp = &rdtp->idle_gp_timer;
1872 mod_timer_pinned(tp, rdtp->idle_gp_timer_expires);
1873 rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
1874 return; /* Nothing more to do immediately. */
1875 } else if (--(rdtp->dyntick_drain) <= 0) {
1876 /* We have hit the limit, so time to give up. */
1877 rdtp->dyntick_holdoff = jiffies;
1878 trace_rcu_prep_idle("Begin holdoff");
1879 invoke_rcu_core(); /* Force the CPU out of dyntick-idle. */
1880 return; 1752 return;
1881 } 1753 rcu_try_advance_all_cbs();
1882 1754 for_each_rcu_flavor(rsp) {
1883 /* 1755 rdp = per_cpu_ptr(rsp->rda, cpu);
1884 * Do one step of pushing the remaining RCU callbacks through 1756 if (cpu_has_callbacks_ready_to_invoke(rdp))
1885 * the RCU core state machine. 1757 invoke_rcu_core();
1886 */
1887#ifdef CONFIG_TREE_PREEMPT_RCU
1888 if (per_cpu(rcu_preempt_data, cpu).nxtlist) {
1889 rcu_preempt_qs(cpu);
1890 force_quiescent_state(&rcu_preempt_state);
1891 }
1892#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
1893 if (per_cpu(rcu_sched_data, cpu).nxtlist) {
1894 rcu_sched_qs(cpu);
1895 force_quiescent_state(&rcu_sched_state);
1896 }
1897 if (per_cpu(rcu_bh_data, cpu).nxtlist) {
1898 rcu_bh_qs(cpu);
1899 force_quiescent_state(&rcu_bh_state);
1900 }
1901
1902 /*
1903 * If RCU callbacks are still pending, RCU still needs this CPU.
1904 * So try forcing the callbacks through the grace period.
1905 */
1906 if (rcu_cpu_has_callbacks(cpu)) {
1907 trace_rcu_prep_idle("More callbacks");
1908 invoke_rcu_core();
1909 } else {
1910 trace_rcu_prep_idle("Callbacks drained");
1911 } 1758 }
1912} 1759}
1913 1760
@@ -2015,16 +1862,13 @@ early_initcall(rcu_register_oom_notifier);
2015static void print_cpu_stall_fast_no_hz(char *cp, int cpu) 1862static void print_cpu_stall_fast_no_hz(char *cp, int cpu)
2016{ 1863{
2017 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu); 1864 struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
2018 struct timer_list *tltp = &rdtp->idle_gp_timer; 1865 unsigned long nlpd = rdtp->nonlazy_posted - rdtp->nonlazy_posted_snap;
2019 char c;
2020 1866
2021 c = rdtp->dyntick_holdoff == jiffies ? 'H' : '.'; 1867 sprintf(cp, "last_accelerate: %04lx/%04lx, nonlazy_posted: %ld, %c%c",
2022 if (timer_pending(tltp)) 1868 rdtp->last_accelerate & 0xffff, jiffies & 0xffff,
2023 sprintf(cp, "drain=%d %c timer=%lu", 1869 ulong2long(nlpd),
2024 rdtp->dyntick_drain, c, tltp->expires - jiffies); 1870 rdtp->all_lazy ? 'L' : '.',
2025 else 1871 rdtp->tick_nohz_enabled_snap ? '.' : 'D');
2026 sprintf(cp, "drain=%d %c timer not pending",
2027 rdtp->dyntick_drain, c);
2028} 1872}
2029 1873
2030#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */ 1874#else /* #ifdef CONFIG_RCU_FAST_NO_HZ */
@@ -2070,10 +1914,11 @@ static void print_cpu_stall_info(struct rcu_state *rsp, int cpu)
2070 ticks_value = rsp->gpnum - rdp->gpnum; 1914 ticks_value = rsp->gpnum - rdp->gpnum;
2071 } 1915 }
2072 print_cpu_stall_fast_no_hz(fast_no_hz, cpu); 1916 print_cpu_stall_fast_no_hz(fast_no_hz, cpu);
2073 printk(KERN_ERR "\t%d: (%lu %s) idle=%03x/%llx/%d %s\n", 1917 printk(KERN_ERR "\t%d: (%lu %s) idle=%03x/%llx/%d softirq=%u/%u %s\n",
2074 cpu, ticks_value, ticks_title, 1918 cpu, ticks_value, ticks_title,
2075 atomic_read(&rdtp->dynticks) & 0xfff, 1919 atomic_read(&rdtp->dynticks) & 0xfff,
2076 rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting, 1920 rdtp->dynticks_nesting, rdtp->dynticks_nmi_nesting,
1921 rdp->softirq_snap, kstat_softirqs_cpu(RCU_SOFTIRQ, cpu),
2077 fast_no_hz); 1922 fast_no_hz);
2078} 1923}
2079 1924
@@ -2087,6 +1932,7 @@ static void print_cpu_stall_info_end(void)
2087static void zero_cpu_stall_ticks(struct rcu_data *rdp) 1932static void zero_cpu_stall_ticks(struct rcu_data *rdp)
2088{ 1933{
2089 rdp->ticks_this_gp = 0; 1934 rdp->ticks_this_gp = 0;
1935 rdp->softirq_snap = kstat_softirqs_cpu(RCU_SOFTIRQ, smp_processor_id());
2090} 1936}
2091 1937
2092/* Increment ->ticks_this_gp for all flavors of RCU. */ 1938/* Increment ->ticks_this_gp for all flavors of RCU. */
@@ -2165,8 +2011,49 @@ static int __init parse_rcu_nocb_poll(char *arg)
2165} 2011}
2166early_param("rcu_nocb_poll", parse_rcu_nocb_poll); 2012early_param("rcu_nocb_poll", parse_rcu_nocb_poll);
2167 2013
2014/*
2015 * Do any no-CBs CPUs need another grace period?
2016 *
2017 * Interrupts must be disabled. If the caller does not hold the root
2018 * rnp_node structure's ->lock, the results are advisory only.
2019 */
2020static int rcu_nocb_needs_gp(struct rcu_state *rsp)
2021{
2022 struct rcu_node *rnp = rcu_get_root(rsp);
2023
2024 return rnp->need_future_gp[(ACCESS_ONCE(rnp->completed) + 1) & 0x1];
2025}
2026
2027/*
2028 * Wake up any no-CBs CPUs' kthreads that were waiting on the just-ended
2029 * grace period.
2030 */
2031static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
2032{
2033 wake_up_all(&rnp->nocb_gp_wq[rnp->completed & 0x1]);
2034}
2035
2036/*
2037 * Set the root rcu_node structure's ->need_future_gp field
2038 * based on the sum of those of all rcu_node structures. This does
2039 * double-count the root rcu_node structure's requests, but this
2040 * is necessary to handle the possibility of a rcu_nocb_kthread()
2041 * having awakened during the time that the rcu_node structures
2042 * were being updated for the end of the previous grace period.
2043 */
2044static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
2045{
2046 rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq;
2047}
2048
2049static void rcu_init_one_nocb(struct rcu_node *rnp)
2050{
2051 init_waitqueue_head(&rnp->nocb_gp_wq[0]);
2052 init_waitqueue_head(&rnp->nocb_gp_wq[1]);
2053}
2054
2168/* Is the specified CPU a no-CPUs CPU? */ 2055/* Is the specified CPU a no-CPUs CPU? */
2169static bool is_nocb_cpu(int cpu) 2056bool rcu_is_nocb_cpu(int cpu)
2170{ 2057{
2171 if (have_rcu_nocb_mask) 2058 if (have_rcu_nocb_mask)
2172 return cpumask_test_cpu(cpu, rcu_nocb_mask); 2059 return cpumask_test_cpu(cpu, rcu_nocb_mask);
@@ -2224,9 +2111,16 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
2224 bool lazy) 2111 bool lazy)
2225{ 2112{
2226 2113
2227 if (!is_nocb_cpu(rdp->cpu)) 2114 if (!rcu_is_nocb_cpu(rdp->cpu))
2228 return 0; 2115 return 0;
2229 __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy); 2116 __call_rcu_nocb_enqueue(rdp, rhp, &rhp->next, 1, lazy);
2117 if (__is_kfree_rcu_offset((unsigned long)rhp->func))
2118 trace_rcu_kfree_callback(rdp->rsp->name, rhp,
2119 (unsigned long)rhp->func,
2120 rdp->qlen_lazy, rdp->qlen);
2121 else
2122 trace_rcu_callback(rdp->rsp->name, rhp,
2123 rdp->qlen_lazy, rdp->qlen);
2230 return 1; 2124 return 1;
2231} 2125}
2232 2126
@@ -2241,7 +2135,7 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
2241 long qll = rsp->qlen_lazy; 2135 long qll = rsp->qlen_lazy;
2242 2136
2243 /* If this is not a no-CBs CPU, tell the caller to do it the old way. */ 2137 /* If this is not a no-CBs CPU, tell the caller to do it the old way. */
2244 if (!is_nocb_cpu(smp_processor_id())) 2138 if (!rcu_is_nocb_cpu(smp_processor_id()))
2245 return 0; 2139 return 0;
2246 rsp->qlen = 0; 2140 rsp->qlen = 0;
2247 rsp->qlen_lazy = 0; 2141 rsp->qlen_lazy = 0;
@@ -2265,95 +2159,36 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
2265} 2159}
2266 2160
2267/* 2161/*
2268 * There must be at least one non-no-CBs CPU in operation at any given 2162 * If necessary, kick off a new grace period, and either way wait
2269 * time, because no-CBs CPUs are not capable of initiating grace periods 2163 * for a subsequent grace period to complete.
2270 * independently. This function therefore complains if the specified
2271 * CPU is the last non-no-CBs CPU, allowing the CPU-hotplug system to
2272 * avoid offlining the last such CPU. (Recursion is a wonderful thing,
2273 * but you have to have a base case!)
2274 */ 2164 */
2275static bool nocb_cpu_expendable(int cpu) 2165static void rcu_nocb_wait_gp(struct rcu_data *rdp)
2276{ 2166{
2277 cpumask_var_t non_nocb_cpus; 2167 unsigned long c;
2278 int ret; 2168 bool d;
2169 unsigned long flags;
2170 struct rcu_node *rnp = rdp->mynode;
2171
2172 raw_spin_lock_irqsave(&rnp->lock, flags);
2173 c = rcu_start_future_gp(rnp, rdp);
2174 raw_spin_unlock_irqrestore(&rnp->lock, flags);
2279 2175
2280 /* 2176 /*
2281 * If there are no no-CB CPUs or if this CPU is not a no-CB CPU, 2177 * Wait for the grace period. Do so interruptibly to avoid messing
2282 * then offlining this CPU is harmless. Let it happen. 2178 * up the load average.
2283 */ 2179 */
2284 if (!have_rcu_nocb_mask || is_nocb_cpu(cpu)) 2180 trace_rcu_future_gp(rnp, rdp, c, "StartWait");
2285 return 1; 2181 for (;;) {
2286 2182 wait_event_interruptible(
2287 /* If no memory, play it safe and keep the CPU around. */ 2183 rnp->nocb_gp_wq[c & 0x1],
2288 if (!alloc_cpumask_var(&non_nocb_cpus, GFP_NOIO)) 2184 (d = ULONG_CMP_GE(ACCESS_ONCE(rnp->completed), c)));
2289 return 0; 2185 if (likely(d))
2290 cpumask_andnot(non_nocb_cpus, cpu_online_mask, rcu_nocb_mask); 2186 break;
2291 cpumask_clear_cpu(cpu, non_nocb_cpus); 2187 flush_signals(current);
2292 ret = !cpumask_empty(non_nocb_cpus); 2188 trace_rcu_future_gp(rnp, rdp, c, "ResumeWait");
2293 free_cpumask_var(non_nocb_cpus); 2189 }
2294 return ret; 2190 trace_rcu_future_gp(rnp, rdp, c, "EndWait");
2295} 2191 smp_mb(); /* Ensure that CB invocation happens after GP end. */
2296
2297/*
2298 * Helper structure for remote registry of RCU callbacks.
2299 * This is needed for when a no-CBs CPU needs to start a grace period.
2300 * If it just invokes call_rcu(), the resulting callback will be queued,
2301 * which can result in deadlock.
2302 */
2303struct rcu_head_remote {
2304 struct rcu_head *rhp;
2305 call_rcu_func_t *crf;
2306 void (*func)(struct rcu_head *rhp);
2307};
2308
2309/*
2310 * Register a callback as specified by the rcu_head_remote struct.
2311 * This function is intended to be invoked via smp_call_function_single().
2312 */
2313static void call_rcu_local(void *arg)
2314{
2315 struct rcu_head_remote *rhrp =
2316 container_of(arg, struct rcu_head_remote, rhp);
2317
2318 rhrp->crf(rhrp->rhp, rhrp->func);
2319}
2320
2321/*
2322 * Set up an rcu_head_remote structure and the invoke call_rcu_local()
2323 * on CPU 0 (which is guaranteed to be a non-no-CBs CPU) via
2324 * smp_call_function_single().
2325 */
2326static void invoke_crf_remote(struct rcu_head *rhp,
2327 void (*func)(struct rcu_head *rhp),
2328 call_rcu_func_t crf)
2329{
2330 struct rcu_head_remote rhr;
2331
2332 rhr.rhp = rhp;
2333 rhr.crf = crf;
2334 rhr.func = func;
2335 smp_call_function_single(0, call_rcu_local, &rhr, 1);
2336}
2337
2338/*
2339 * Helper functions to be passed to wait_rcu_gp(), each of which
2340 * invokes invoke_crf_remote() to register a callback appropriately.
2341 */
2342static void __maybe_unused
2343call_rcu_preempt_remote(struct rcu_head *rhp,
2344 void (*func)(struct rcu_head *rhp))
2345{
2346 invoke_crf_remote(rhp, func, call_rcu);
2347}
2348static void call_rcu_bh_remote(struct rcu_head *rhp,
2349 void (*func)(struct rcu_head *rhp))
2350{
2351 invoke_crf_remote(rhp, func, call_rcu_bh);
2352}
2353static void call_rcu_sched_remote(struct rcu_head *rhp,
2354 void (*func)(struct rcu_head *rhp))
2355{
2356 invoke_crf_remote(rhp, func, call_rcu_sched);
2357} 2192}
2358 2193
2359/* 2194/*
@@ -2390,7 +2225,7 @@ static int rcu_nocb_kthread(void *arg)
2390 cl = atomic_long_xchg(&rdp->nocb_q_count_lazy, 0); 2225 cl = atomic_long_xchg(&rdp->nocb_q_count_lazy, 0);
2391 ACCESS_ONCE(rdp->nocb_p_count) += c; 2226 ACCESS_ONCE(rdp->nocb_p_count) += c;
2392 ACCESS_ONCE(rdp->nocb_p_count_lazy) += cl; 2227 ACCESS_ONCE(rdp->nocb_p_count_lazy) += cl;
2393 wait_rcu_gp(rdp->rsp->call_remote); 2228 rcu_nocb_wait_gp(rdp);
2394 2229
2395 /* Each pass through the following loop invokes a callback. */ 2230 /* Each pass through the following loop invokes a callback. */
2396 trace_rcu_batch_start(rdp->rsp->name, cl, c, -1); 2231 trace_rcu_batch_start(rdp->rsp->name, cl, c, -1);
@@ -2436,36 +2271,40 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
2436 return; 2271 return;
2437 for_each_cpu(cpu, rcu_nocb_mask) { 2272 for_each_cpu(cpu, rcu_nocb_mask) {
2438 rdp = per_cpu_ptr(rsp->rda, cpu); 2273 rdp = per_cpu_ptr(rsp->rda, cpu);
2439 t = kthread_run(rcu_nocb_kthread, rdp, "rcuo%d", cpu); 2274 t = kthread_run(rcu_nocb_kthread, rdp,
2275 "rcuo%c/%d", rsp->abbr, cpu);
2440 BUG_ON(IS_ERR(t)); 2276 BUG_ON(IS_ERR(t));
2441 ACCESS_ONCE(rdp->nocb_kthread) = t; 2277 ACCESS_ONCE(rdp->nocb_kthread) = t;
2442 } 2278 }
2443} 2279}
2444 2280
2445/* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */ 2281/* Prevent __call_rcu() from enqueuing callbacks on no-CBs CPUs */
2446static void init_nocb_callback_list(struct rcu_data *rdp) 2282static bool init_nocb_callback_list(struct rcu_data *rdp)
2447{ 2283{
2448 if (rcu_nocb_mask == NULL || 2284 if (rcu_nocb_mask == NULL ||
2449 !cpumask_test_cpu(rdp->cpu, rcu_nocb_mask)) 2285 !cpumask_test_cpu(rdp->cpu, rcu_nocb_mask))
2450 return; 2286 return false;
2451 rdp->nxttail[RCU_NEXT_TAIL] = NULL; 2287 rdp->nxttail[RCU_NEXT_TAIL] = NULL;
2288 return true;
2452} 2289}
2453 2290
2454/* Initialize the ->call_remote fields in the rcu_state structures. */ 2291#else /* #ifdef CONFIG_RCU_NOCB_CPU */
2455static void __init rcu_init_nocb(void) 2292
2293static int rcu_nocb_needs_gp(struct rcu_state *rsp)
2456{ 2294{
2457#ifdef CONFIG_PREEMPT_RCU 2295 return 0;
2458 rcu_preempt_state.call_remote = call_rcu_preempt_remote;
2459#endif /* #ifdef CONFIG_PREEMPT_RCU */
2460 rcu_bh_state.call_remote = call_rcu_bh_remote;
2461 rcu_sched_state.call_remote = call_rcu_sched_remote;
2462} 2296}
2463 2297
2464#else /* #ifdef CONFIG_RCU_NOCB_CPU */ 2298static void rcu_nocb_gp_cleanup(struct rcu_state *rsp, struct rcu_node *rnp)
2299{
2300}
2465 2301
2466static bool is_nocb_cpu(int cpu) 2302static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
2303{
2304}
2305
2306static void rcu_init_one_nocb(struct rcu_node *rnp)
2467{ 2307{
2468 return false;
2469} 2308}
2470 2309
2471static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, 2310static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp,
@@ -2480,11 +2319,6 @@ static bool __maybe_unused rcu_nocb_adopt_orphan_cbs(struct rcu_state *rsp,
2480 return 0; 2319 return 0;
2481} 2320}
2482 2321
2483static bool nocb_cpu_expendable(int cpu)
2484{
2485 return 1;
2486}
2487
2488static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp) 2322static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
2489{ 2323{
2490} 2324}
@@ -2493,12 +2327,26 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
2493{ 2327{
2494} 2328}
2495 2329
2496static void init_nocb_callback_list(struct rcu_data *rdp) 2330static bool init_nocb_callback_list(struct rcu_data *rdp)
2497{ 2331{
2332 return false;
2498} 2333}
2499 2334
2500static void __init rcu_init_nocb(void) 2335#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
2336
2337/*
2338 * An adaptive-ticks CPU can potentially execute in kernel mode for an
2339 * arbitrarily long period of time with the scheduling-clock tick turned
2340 * off. RCU will be paying attention to this CPU because it is in the
2341 * kernel, but the CPU cannot be guaranteed to be executing the RCU state
2342 * machine because the scheduling-clock tick has been disabled. Therefore,
2343 * if an adaptive-ticks CPU is failing to respond to the current grace
2344 * period and has not be idle from an RCU perspective, kick it.
2345 */
2346static void rcu_kick_nohz_cpu(int cpu)
2501{ 2347{
2348#ifdef CONFIG_NO_HZ_FULL
2349 if (tick_nohz_full_cpu(cpu))
2350 smp_send_reschedule(cpu);
2351#endif /* #ifdef CONFIG_NO_HZ_FULL */
2502} 2352}
2503
2504#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */