diff options
author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2009-10-14 13:15:55 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-10-15 05:17:16 -0400 |
commit | 37c72e56f6b234ea7387ba530434a80abf2658d8 (patch) | |
tree | 1c589c3ecdebbe19488359795cde4c55726a63aa /kernel | |
parent | 2bc872036e1c5948b5b02942810bbdd8dbdb9812 (diff) |
rcu: Prevent RCU IPI storms in presence of high call_rcu() load
As the number of callbacks on a given CPU rises, invoke
force_quiescent_state() only every blimit number of callbacks
(defaults to 10,000), and even then only if no other CPU has
invoked force_quiescent_state() in the meantime.
This should fix the performance regression reported by Nick.
Reported-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
Cc: jens.axboe@oracle.com
LKML-Reference: <12555405592133-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/rcutree.c | 29 | ||||
-rw-r--r-- | kernel/rcutree.h | 4 |
2 files changed, 28 insertions, 5 deletions
diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 705f02ac7433..ddbf111e9e18 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
@@ -958,7 +958,7 @@ static void rcu_offline_cpu(int cpu) | |||
958 | * Invoke any RCU callbacks that have made it to the end of their grace | 958 | * Invoke any RCU callbacks that have made it to the end of their grace |
959 | * period. Thottle as specified by rdp->blimit. | 959 | * period. Thottle as specified by rdp->blimit. |
960 | */ | 960 | */ |
961 | static void rcu_do_batch(struct rcu_data *rdp) | 961 | static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) |
962 | { | 962 | { |
963 | unsigned long flags; | 963 | unsigned long flags; |
964 | struct rcu_head *next, *list, **tail; | 964 | struct rcu_head *next, *list, **tail; |
@@ -1011,6 +1011,13 @@ static void rcu_do_batch(struct rcu_data *rdp) | |||
1011 | if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark) | 1011 | if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark) |
1012 | rdp->blimit = blimit; | 1012 | rdp->blimit = blimit; |
1013 | 1013 | ||
1014 | /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */ | ||
1015 | if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) { | ||
1016 | rdp->qlen_last_fqs_check = 0; | ||
1017 | rdp->n_force_qs_snap = rsp->n_force_qs; | ||
1018 | } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark) | ||
1019 | rdp->qlen_last_fqs_check = rdp->qlen; | ||
1020 | |||
1014 | local_irq_restore(flags); | 1021 | local_irq_restore(flags); |
1015 | 1022 | ||
1016 | /* Re-raise the RCU softirq if there are callbacks remaining. */ | 1023 | /* Re-raise the RCU softirq if there are callbacks remaining. */ |
@@ -1224,7 +1231,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | |||
1224 | } | 1231 | } |
1225 | 1232 | ||
1226 | /* If there are callbacks ready, invoke them. */ | 1233 | /* If there are callbacks ready, invoke them. */ |
1227 | rcu_do_batch(rdp); | 1234 | rcu_do_batch(rsp, rdp); |
1228 | } | 1235 | } |
1229 | 1236 | ||
1230 | /* | 1237 | /* |
@@ -1288,10 +1295,20 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
1288 | rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */ | 1295 | rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */ |
1289 | } | 1296 | } |
1290 | 1297 | ||
1291 | /* Force the grace period if too many callbacks or too long waiting. */ | 1298 | /* |
1292 | if (unlikely(++rdp->qlen > qhimark)) { | 1299 | * Force the grace period if too many callbacks or too long waiting. |
1300 | * Enforce hysteresis, and don't invoke force_quiescent_state() | ||
1301 | * if some other CPU has recently done so. Also, don't bother | ||
1302 | * invoking force_quiescent_state() if the newly enqueued callback | ||
1303 | * is the only one waiting for a grace period to complete. | ||
1304 | */ | ||
1305 | if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { | ||
1293 | rdp->blimit = LONG_MAX; | 1306 | rdp->blimit = LONG_MAX; |
1294 | force_quiescent_state(rsp, 0); | 1307 | if (rsp->n_force_qs == rdp->n_force_qs_snap && |
1308 | *rdp->nxttail[RCU_DONE_TAIL] != head) | ||
1309 | force_quiescent_state(rsp, 0); | ||
1310 | rdp->n_force_qs_snap = rsp->n_force_qs; | ||
1311 | rdp->qlen_last_fqs_check = rdp->qlen; | ||
1295 | } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0) | 1312 | } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0) |
1296 | force_quiescent_state(rsp, 1); | 1313 | force_quiescent_state(rsp, 1); |
1297 | local_irq_restore(flags); | 1314 | local_irq_restore(flags); |
@@ -1523,6 +1540,8 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) | |||
1523 | rdp->beenonline = 1; /* We have now been online. */ | 1540 | rdp->beenonline = 1; /* We have now been online. */ |
1524 | rdp->preemptable = preemptable; | 1541 | rdp->preemptable = preemptable; |
1525 | rdp->passed_quiesc_completed = lastcomp - 1; | 1542 | rdp->passed_quiesc_completed = lastcomp - 1; |
1543 | rdp->qlen_last_fqs_check = 0; | ||
1544 | rdp->n_force_qs_snap = rsp->n_force_qs; | ||
1526 | rdp->blimit = blimit; | 1545 | rdp->blimit = blimit; |
1527 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 1546 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ |
1528 | 1547 | ||
diff --git a/kernel/rcutree.h b/kernel/rcutree.h index b40ac5706040..599161f309fb 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
@@ -167,6 +167,10 @@ struct rcu_data { | |||
167 | struct rcu_head *nxtlist; | 167 | struct rcu_head *nxtlist; |
168 | struct rcu_head **nxttail[RCU_NEXT_SIZE]; | 168 | struct rcu_head **nxttail[RCU_NEXT_SIZE]; |
169 | long qlen; /* # of queued callbacks */ | 169 | long qlen; /* # of queued callbacks */ |
170 | long qlen_last_fqs_check; | ||
171 | /* qlen at last check for QS forcing */ | ||
172 | unsigned long n_force_qs_snap; | ||
173 | /* did other CPU force QS recently? */ | ||
170 | long blimit; /* Upper limit on a processed batch */ | 174 | long blimit; /* Upper limit on a processed batch */ |
171 | 175 | ||
172 | #ifdef CONFIG_NO_HZ | 176 | #ifdef CONFIG_NO_HZ |