aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2009-04-14 00:31:16 -0400
committerIngo Molnar <mingo@elte.hu>2009-04-14 05:31:50 -0400
commitef631b0ca01655d24e9ca7e199262c4a46416a26 (patch)
tree0ef86d5b97c6e2de500e639ebb92da8f2ec8c863
parent27b19565fe4ca5b0e9d2ae98ce4b81ca728bf445 (diff)
rcu: Make hierarchical RCU less IPI-happy
This patch fixes a hierarchical-RCU performance bug located by Anton Blanchard. The problem stems from a misguided attempt to provide a work-around for jiffies-counter failure. This work-around uses a per-CPU n_rcu_pending counter, which is incremented on each call to rcu_pending(), which in turn is called from each scheduling-clock interrupt. Each CPU then treats this counter as a surrogate for the jiffies counter, so that if the jiffies counter fails to advance, the per-CPU n_rcu_pending counter will cause RCU to invoke force_quiescent_state(), which in turn will (among other things) send resched IPIs to CPUs that have thus far failed to pass through an RCU quiescent state. Unfortunately, each CPU resets only its own counter after sending a batch of IPIs. This means that the other CPUs will also (needlessly) send -another- round of IPIs, for a full N-squared set of IPIs in the worst case every three scheduler-clock ticks until the grace period finally ends. It is not reasonable for a given CPU to reset each and every n_rcu_pending for all the other CPUs, so this patch instead simply disables the jiffies-counter "training wheels", thus eliminating the excessive IPIs. Note that the jiffies-counter IPIs do not have this problem due to the fact that the jiffies counter is global, so that the CPU sending the IPIs can easily reset things, thus preventing the other CPUs from sending redundant IPIs. Note also that the n_rcu_pending counter remains, as it will continue to be used for tracing. It may also see use to update the jiffies counter, should an appropriate kick-the-jiffies-counter API appear. Located-by: Anton Blanchard <anton@au1.ibm.com> Tested-by: Anton Blanchard <anton@au1.ibm.com> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: anton@samba.org Cc: akpm@linux-foundation.org Cc: dipankar@in.ibm.com Cc: manfred@colorfullife.com Cc: cl@linux-foundation.org Cc: josht@linux.vnet.ibm.com Cc: schamp@sgi.com Cc: niv@us.ibm.com Cc: dvhltc@us.ibm.com Cc: ego@in.ibm.com Cc: laijs@cn.fujitsu.com Cc: rostedt@goodmis.org Cc: peterz@infradead.org Cc: penberg@cs.helsinki.fi Cc: andi@firstfloor.org Cc: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com> LKML-Reference: <12396834793575-git-send-email-> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/linux/rcutree.h3
-rw-r--r--kernel/rcutree.c19
-rw-r--r--kernel/rcutree_trace.c14
3 files changed, 10 insertions, 26 deletions
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 0cdda00f2b2a..58b2aa5312b9 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -161,9 +161,8 @@ struct rcu_data {
161 unsigned long offline_fqs; /* Kicked due to being offline. */ 161 unsigned long offline_fqs; /* Kicked due to being offline. */
162 unsigned long resched_ipi; /* Sent a resched IPI. */ 162 unsigned long resched_ipi; /* Sent a resched IPI. */
163 163
164 /* 5) state to allow this CPU to force_quiescent_state on others */ 164 /* 5) For future __rcu_pending statistics. */
165 long n_rcu_pending; /* rcu_pending() calls since boot. */ 165 long n_rcu_pending; /* rcu_pending() calls since boot. */
166 long n_rcu_pending_force_qs; /* when to force quiescent states. */
167 166
168 int cpu; 167 int cpu;
169}; 168};
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 7f3266922572..d2a372fb0b9b 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -530,8 +530,6 @@ static void note_new_gpnum(struct rcu_state *rsp, struct rcu_data *rdp)
530 rdp->qs_pending = 1; 530 rdp->qs_pending = 1;
531 rdp->passed_quiesc = 0; 531 rdp->passed_quiesc = 0;
532 rdp->gpnum = rsp->gpnum; 532 rdp->gpnum = rsp->gpnum;
533 rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending +
534 RCU_JIFFIES_TILL_FORCE_QS;
535} 533}
536 534
537/* 535/*
@@ -578,8 +576,6 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)
578 rsp->gpnum++; 576 rsp->gpnum++;
579 rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ 577 rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */
580 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; 578 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
581 rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending +
582 RCU_JIFFIES_TILL_FORCE_QS;
583 record_gp_stall_check_time(rsp); 579 record_gp_stall_check_time(rsp);
584 dyntick_record_completed(rsp, rsp->completed - 1); 580 dyntick_record_completed(rsp, rsp->completed - 1);
585 note_new_gpnum(rsp, rdp); 581 note_new_gpnum(rsp, rdp);
@@ -1055,7 +1051,6 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1055{ 1051{
1056 unsigned long flags; 1052 unsigned long flags;
1057 long lastcomp; 1053 long lastcomp;
1058 struct rcu_data *rdp = rsp->rda[smp_processor_id()];
1059 struct rcu_node *rnp = rcu_get_root(rsp); 1054 struct rcu_node *rnp = rcu_get_root(rsp);
1060 u8 signaled; 1055 u8 signaled;
1061 1056
@@ -1066,16 +1061,13 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)
1066 return; /* Someone else is already on the job. */ 1061 return; /* Someone else is already on the job. */
1067 } 1062 }
1068 if (relaxed && 1063 if (relaxed &&
1069 (long)(rsp->jiffies_force_qs - jiffies) >= 0 && 1064 (long)(rsp->jiffies_force_qs - jiffies) >= 0)
1070 (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) >= 0)
1071 goto unlock_ret; /* no emergency and done recently. */ 1065 goto unlock_ret; /* no emergency and done recently. */
1072 rsp->n_force_qs++; 1066 rsp->n_force_qs++;
1073 spin_lock(&rnp->lock); 1067 spin_lock(&rnp->lock);
1074 lastcomp = rsp->completed; 1068 lastcomp = rsp->completed;
1075 signaled = rsp->signaled; 1069 signaled = rsp->signaled;
1076 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS; 1070 rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;
1077 rdp->n_rcu_pending_force_qs = rdp->n_rcu_pending +
1078 RCU_JIFFIES_TILL_FORCE_QS;
1079 if (lastcomp == rsp->gpnum) { 1071 if (lastcomp == rsp->gpnum) {
1080 rsp->n_force_qs_ngp++; 1072 rsp->n_force_qs_ngp++;
1081 spin_unlock(&rnp->lock); 1073 spin_unlock(&rnp->lock);
@@ -1144,8 +1136,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
1144 * If an RCU GP has gone long enough, go check for dyntick 1136 * If an RCU GP has gone long enough, go check for dyntick
1145 * idle CPUs and, if needed, send resched IPIs. 1137 * idle CPUs and, if needed, send resched IPIs.
1146 */ 1138 */
1147 if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 || 1139 if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)
1148 (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0)
1149 force_quiescent_state(rsp, 1); 1140 force_quiescent_state(rsp, 1);
1150 1141
1151 /* 1142 /*
@@ -1230,8 +1221,7 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1230 if (unlikely(++rdp->qlen > qhimark)) { 1221 if (unlikely(++rdp->qlen > qhimark)) {
1231 rdp->blimit = LONG_MAX; 1222 rdp->blimit = LONG_MAX;
1232 force_quiescent_state(rsp, 0); 1223 force_quiescent_state(rsp, 0);
1233 } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 || 1224 } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)
1234 (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0)
1235 force_quiescent_state(rsp, 1); 1225 force_quiescent_state(rsp, 1);
1236 local_irq_restore(flags); 1226 local_irq_restore(flags);
1237} 1227}
@@ -1290,8 +1280,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp)
1290 1280
1291 /* Has an RCU GP gone long enough to send resched IPIs &c? */ 1281 /* Has an RCU GP gone long enough to send resched IPIs &c? */
1292 if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) && 1282 if (ACCESS_ONCE(rsp->completed) != ACCESS_ONCE(rsp->gpnum) &&
1293 ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0 || 1283 ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0))
1294 (rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending) < 0))
1295 return 1; 1284 return 1;
1296 1285
1297 /* nothing to do */ 1286 /* nothing to do */
diff --git a/kernel/rcutree_trace.c b/kernel/rcutree_trace.c
index 4ee954f6a8d5..4b1875ba9404 100644
--- a/kernel/rcutree_trace.c
+++ b/kernel/rcutree_trace.c
@@ -49,14 +49,12 @@ static void print_one_rcu_data(struct seq_file *m, struct rcu_data *rdp)
49{ 49{
50 if (!rdp->beenonline) 50 if (!rdp->beenonline)
51 return; 51 return;
52 seq_printf(m, "%3d%cc=%ld g=%ld pq=%d pqc=%ld qp=%d rpfq=%ld rp=%x", 52 seq_printf(m, "%3d%cc=%ld g=%ld pq=%d pqc=%ld qp=%d",
53 rdp->cpu, 53 rdp->cpu,
54 cpu_is_offline(rdp->cpu) ? '!' : ' ', 54 cpu_is_offline(rdp->cpu) ? '!' : ' ',
55 rdp->completed, rdp->gpnum, 55 rdp->completed, rdp->gpnum,
56 rdp->passed_quiesc, rdp->passed_quiesc_completed, 56 rdp->passed_quiesc, rdp->passed_quiesc_completed,
57 rdp->qs_pending, 57 rdp->qs_pending);
58 rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending,
59 (int)(rdp->n_rcu_pending & 0xffff));
60#ifdef CONFIG_NO_HZ 58#ifdef CONFIG_NO_HZ
61 seq_printf(m, " dt=%d/%d dn=%d df=%lu", 59 seq_printf(m, " dt=%d/%d dn=%d df=%lu",
62 rdp->dynticks->dynticks, 60 rdp->dynticks->dynticks,
@@ -102,14 +100,12 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
102{ 100{
103 if (!rdp->beenonline) 101 if (!rdp->beenonline)
104 return; 102 return;
105 seq_printf(m, "%d,%s,%ld,%ld,%d,%ld,%d,%ld,%ld", 103 seq_printf(m, "%d,%s,%ld,%ld,%d,%ld,%d",
106 rdp->cpu, 104 rdp->cpu,
107 cpu_is_offline(rdp->cpu) ? "\"Y\"" : "\"N\"", 105 cpu_is_offline(rdp->cpu) ? "\"Y\"" : "\"N\"",
108 rdp->completed, rdp->gpnum, 106 rdp->completed, rdp->gpnum,
109 rdp->passed_quiesc, rdp->passed_quiesc_completed, 107 rdp->passed_quiesc, rdp->passed_quiesc_completed,
110 rdp->qs_pending, 108 rdp->qs_pending);
111 rdp->n_rcu_pending_force_qs - rdp->n_rcu_pending,
112 rdp->n_rcu_pending);
113#ifdef CONFIG_NO_HZ 109#ifdef CONFIG_NO_HZ
114 seq_printf(m, ",%d,%d,%d,%lu", 110 seq_printf(m, ",%d,%d,%d,%lu",
115 rdp->dynticks->dynticks, 111 rdp->dynticks->dynticks,
@@ -123,7 +119,7 @@ static void print_one_rcu_data_csv(struct seq_file *m, struct rcu_data *rdp)
123 119
124static int show_rcudata_csv(struct seq_file *m, void *unused) 120static int show_rcudata_csv(struct seq_file *m, void *unused)
125{ 121{
126 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\",\"rpfq\",\"rp\","); 122 seq_puts(m, "\"CPU\",\"Online?\",\"c\",\"g\",\"pq\",\"pqc\",\"pq\",");
127#ifdef CONFIG_NO_HZ 123#ifdef CONFIG_NO_HZ
128 seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\","); 124 seq_puts(m, "\"dt\",\"dt nesting\",\"dn\",\"df\",");
129#endif /* #ifdef CONFIG_NO_HZ */ 125#endif /* #ifdef CONFIG_NO_HZ */