aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2012-06-26 20:00:35 -0400
committerPaul E. McKenney <paulmck@linux.vnet.ibm.com>2012-09-23 10:41:54 -0400
commit394f2769aa0dbcf027bae6fb52835e25e05d332e (patch)
tree78471127d379e8d128d73e976ce33022c195b0c5 /kernel
parent4605c0143c6d611b3076025ba3a7e04293c01d69 (diff)
rcu: Prevent force_quiescent_state() memory contention
Large systems running RCU_FAST_NO_HZ kernels see extreme memory contention on the rcu_state structure's ->fqslock field. This can be avoided by disabling RCU_FAST_NO_HZ, either at compile time or at boot time (via the nohz kernel boot parameter), but large systems will no doubt become sensitive to energy consumption. This commit therefore uses a combining-tree approach to spread the memory contention across new cache lines in the leaf rcu_node structures. This can be thought of as a tournament lock that has only a try-lock acquisition primitive. The effect on small systems is minimal, because such systems have an rcu_node "tree" consisting of a single node. In addition, this functionality is not used on fastpaths. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/rcutree.c47
-rw-r--r--kernel/rcutree.h1
2 files changed, 38 insertions, 10 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 723e2e723074..43d57a17fcc5 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -61,6 +61,7 @@
61/* Data structures. */ 61/* Data structures. */
62 62
63static struct lock_class_key rcu_node_class[RCU_NUM_LVLS]; 63static struct lock_class_key rcu_node_class[RCU_NUM_LVLS];
64static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
64 65
65#define RCU_STATE_INITIALIZER(sname, cr) { \ 66#define RCU_STATE_INITIALIZER(sname, cr) { \
66 .level = { &sname##_state.node[0] }, \ 67 .level = { &sname##_state.node[0] }, \
@@ -1807,16 +1808,35 @@ static void force_qs_rnp(struct rcu_state *rsp, int (*f)(struct rcu_data *))
1807static void force_quiescent_state(struct rcu_state *rsp) 1808static void force_quiescent_state(struct rcu_state *rsp)
1808{ 1809{
1809 unsigned long flags; 1810 unsigned long flags;
1810 struct rcu_node *rnp = rcu_get_root(rsp); 1811 bool ret;
1812 struct rcu_node *rnp;
1813 struct rcu_node *rnp_old = NULL;
1814
1815 /* Funnel through hierarchy to reduce memory contention. */
1816 rnp = per_cpu_ptr(rsp->rda, raw_smp_processor_id())->mynode;
1817 for (; rnp != NULL; rnp = rnp->parent) {
1818 ret = (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) ||
1819 !raw_spin_trylock(&rnp->fqslock);
1820 if (rnp_old != NULL)
1821 raw_spin_unlock(&rnp_old->fqslock);
1822 if (ret) {
1823 rsp->n_force_qs_lh++;
1824 return;
1825 }
1826 rnp_old = rnp;
1827 }
1828 /* rnp_old == rcu_get_root(rsp), rnp == NULL. */
1811 1829
1812 if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) 1830 /* Reached the root of the rcu_node tree, acquire lock. */
1831 raw_spin_lock_irqsave(&rnp_old->lock, flags);
1832 raw_spin_unlock(&rnp_old->fqslock);
1833 if (ACCESS_ONCE(rsp->gp_flags) & RCU_GP_FLAG_FQS) {
1834 rsp->n_force_qs_lh++;
1835 raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
1813 return; /* Someone beat us to it. */ 1836 return; /* Someone beat us to it. */
1814 if (!raw_spin_trylock_irqsave(&rnp->lock, flags)) {
1815 rsp->n_force_qs_lh++; /* Inexact, can lose counts. Tough! */
1816 return;
1817 } 1837 }
1818 rsp->gp_flags |= RCU_GP_FLAG_FQS; 1838 rsp->gp_flags |= RCU_GP_FLAG_FQS;
1819 raw_spin_unlock_irqrestore(&rnp->lock, flags); 1839 raw_spin_unlock_irqrestore(&rnp_old->lock, flags);
1820 wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */ 1840 wake_up(&rsp->gp_wq); /* Memory barrier implied by wake_up() path. */
1821} 1841}
1822 1842
@@ -2704,10 +2724,14 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
2704static void __init rcu_init_one(struct rcu_state *rsp, 2724static void __init rcu_init_one(struct rcu_state *rsp,
2705 struct rcu_data __percpu *rda) 2725 struct rcu_data __percpu *rda)
2706{ 2726{
2707 static char *buf[] = { "rcu_node_level_0", 2727 static char *buf[] = { "rcu_node_0",
2708 "rcu_node_level_1", 2728 "rcu_node_1",
2709 "rcu_node_level_2", 2729 "rcu_node_2",
2710 "rcu_node_level_3" }; /* Match MAX_RCU_LVLS */ 2730 "rcu_node_3" }; /* Match MAX_RCU_LVLS */
2731 static char *fqs[] = { "rcu_node_fqs_0",
2732 "rcu_node_fqs_1",
2733 "rcu_node_fqs_2",
2734 "rcu_node_fqs_3" }; /* Match MAX_RCU_LVLS */
2711 int cpustride = 1; 2735 int cpustride = 1;
2712 int i; 2736 int i;
2713 int j; 2737 int j;
@@ -2732,6 +2756,9 @@ static void __init rcu_init_one(struct rcu_state *rsp,
2732 raw_spin_lock_init(&rnp->lock); 2756 raw_spin_lock_init(&rnp->lock);
2733 lockdep_set_class_and_name(&rnp->lock, 2757 lockdep_set_class_and_name(&rnp->lock,
2734 &rcu_node_class[i], buf[i]); 2758 &rcu_node_class[i], buf[i]);
2759 raw_spin_lock_init(&rnp->fqslock);
2760 lockdep_set_class_and_name(&rnp->fqslock,
2761 &rcu_fqs_class[i], fqs[i]);
2735 rnp->gpnum = 0; 2762 rnp->gpnum = 0;
2736 rnp->qsmask = 0; 2763 rnp->qsmask = 0;
2737 rnp->qsmaskinit = 0; 2764 rnp->qsmaskinit = 0;
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 7fb93cedc76a..8f0293ce1517 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -202,6 +202,7 @@ struct rcu_node {
202 /* per-CPU kthreads as needed. */ 202 /* per-CPU kthreads as needed. */
203 unsigned int node_kthread_status; 203 unsigned int node_kthread_status;
204 /* State of node_kthread_task for tracing. */ 204 /* State of node_kthread_task for tracing. */
205 raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp;
205} ____cacheline_internodealigned_in_smp; 206} ____cacheline_internodealigned_in_smp;
206 207
207/* 208/*