aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPaul E. McKenney <paulmck@linux.vnet.ibm.com>2009-10-14 13:15:55 -0400
committerIngo Molnar <mingo@elte.hu>2009-10-15 05:17:16 -0400
commit37c72e56f6b234ea7387ba530434a80abf2658d8 (patch)
tree1c589c3ecdebbe19488359795cde4c55726a63aa /kernel
parent2bc872036e1c5948b5b02942810bbdd8dbdb9812 (diff)
rcu: Prevent RCU IPI storms in presence of high call_rcu() load
As the number of callbacks on a given CPU rises, invoke force_quiescent_state() only every blimit number of callbacks (defaults to 10,000), and even then only if no other CPU has invoked force_quiescent_state() in the meantime. This should fix the performance regression reported by Nick. Reported-by: Nick Piggin <npiggin@suse.de> Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: laijs@cn.fujitsu.com Cc: dipankar@in.ibm.com Cc: mathieu.desnoyers@polymtl.ca Cc: josh@joshtriplett.org Cc: dvhltc@us.ibm.com Cc: niv@us.ibm.com Cc: peterz@infradead.org Cc: rostedt@goodmis.org Cc: Valdis.Kletnieks@vt.edu Cc: dhowells@redhat.com Cc: jens.axboe@oracle.com LKML-Reference: <12555405592133-git-send-email-> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/rcutree.c29
-rw-r--r--kernel/rcutree.h4
2 files changed, 28 insertions, 5 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 705f02ac7433..ddbf111e9e18 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -958,7 +958,7 @@ static void rcu_offline_cpu(int cpu)
958 * Invoke any RCU callbacks that have made it to the end of their grace 958 * Invoke any RCU callbacks that have made it to the end of their grace
959 * period. Thottle as specified by rdp->blimit. 959 * period. Thottle as specified by rdp->blimit.
960 */ 960 */
961static void rcu_do_batch(struct rcu_data *rdp) 961static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
962{ 962{
963 unsigned long flags; 963 unsigned long flags;
964 struct rcu_head *next, *list, **tail; 964 struct rcu_head *next, *list, **tail;
@@ -1011,6 +1011,13 @@ static void rcu_do_batch(struct rcu_data *rdp)
1011 if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark) 1011 if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark)
1012 rdp->blimit = blimit; 1012 rdp->blimit = blimit;
1013 1013
1014 /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */
1015 if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) {
1016 rdp->qlen_last_fqs_check = 0;
1017 rdp->n_force_qs_snap = rsp->n_force_qs;
1018 } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark)
1019 rdp->qlen_last_fqs_check = rdp->qlen;
1020
1014 local_irq_restore(flags); 1021 local_irq_restore(flags);
1015 1022
1016 /* Re-raise the RCU softirq if there are callbacks remaining. */ 1023 /* Re-raise the RCU softirq if there are callbacks remaining. */
@@ -1224,7 +1231,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp)
1224 } 1231 }
1225 1232
1226 /* If there are callbacks ready, invoke them. */ 1233 /* If there are callbacks ready, invoke them. */
1227 rcu_do_batch(rdp); 1234 rcu_do_batch(rsp, rdp);
1228} 1235}
1229 1236
1230/* 1237/*
@@ -1288,10 +1295,20 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu),
1288 rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */ 1295 rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */
1289 } 1296 }
1290 1297
1291 /* Force the grace period if too many callbacks or too long waiting. */ 1298 /*
1292 if (unlikely(++rdp->qlen > qhimark)) { 1299 * Force the grace period if too many callbacks or too long waiting.
1300 * Enforce hysteresis, and don't invoke force_quiescent_state()
1301 * if some other CPU has recently done so. Also, don't bother
1302 * invoking force_quiescent_state() if the newly enqueued callback
1303 * is the only one waiting for a grace period to complete.
1304 */
1305 if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) {
1293 rdp->blimit = LONG_MAX; 1306 rdp->blimit = LONG_MAX;
1294 force_quiescent_state(rsp, 0); 1307 if (rsp->n_force_qs == rdp->n_force_qs_snap &&
1308 *rdp->nxttail[RCU_DONE_TAIL] != head)
1309 force_quiescent_state(rsp, 0);
1310 rdp->n_force_qs_snap = rsp->n_force_qs;
1311 rdp->qlen_last_fqs_check = rdp->qlen;
1295 } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0) 1312 } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0)
1296 force_quiescent_state(rsp, 1); 1313 force_quiescent_state(rsp, 1);
1297 local_irq_restore(flags); 1314 local_irq_restore(flags);
@@ -1523,6 +1540,8 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable)
1523 rdp->beenonline = 1; /* We have now been online. */ 1540 rdp->beenonline = 1; /* We have now been online. */
1524 rdp->preemptable = preemptable; 1541 rdp->preemptable = preemptable;
1525 rdp->passed_quiesc_completed = lastcomp - 1; 1542 rdp->passed_quiesc_completed = lastcomp - 1;
1543 rdp->qlen_last_fqs_check = 0;
1544 rdp->n_force_qs_snap = rsp->n_force_qs;
1526 rdp->blimit = blimit; 1545 rdp->blimit = blimit;
1527 spin_unlock(&rnp->lock); /* irqs remain disabled. */ 1546 spin_unlock(&rnp->lock); /* irqs remain disabled. */
1528 1547
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index b40ac5706040..599161f309fb 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -167,6 +167,10 @@ struct rcu_data {
167 struct rcu_head *nxtlist; 167 struct rcu_head *nxtlist;
168 struct rcu_head **nxttail[RCU_NEXT_SIZE]; 168 struct rcu_head **nxttail[RCU_NEXT_SIZE];
169 long qlen; /* # of queued callbacks */ 169 long qlen; /* # of queued callbacks */
170 long qlen_last_fqs_check;
171 /* qlen at last check for QS forcing */
172 unsigned long n_force_qs_snap;
173 /* did other CPU force QS recently? */
170 long blimit; /* Upper limit on a processed batch */ 174 long blimit; /* Upper limit on a processed batch */
171 175
172#ifdef CONFIG_NO_HZ 176#ifdef CONFIG_NO_HZ