diff options
| author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2009-10-14 13:15:55 -0400 | 
|---|---|---|
| committer | Ingo Molnar <mingo@elte.hu> | 2009-10-15 05:17:16 -0400 | 
| commit | 37c72e56f6b234ea7387ba530434a80abf2658d8 (patch) | |
| tree | 1c589c3ecdebbe19488359795cde4c55726a63aa | |
| parent | 2bc872036e1c5948b5b02942810bbdd8dbdb9812 (diff) | |
rcu: Prevent RCU IPI storms in presence of high call_rcu() load
As the number of callbacks on a given CPU rises, invoke
force_quiescent_state() only every blimit number of callbacks
(defaults to 10,000), and even then only if no other CPU has
invoked force_quiescent_state() in the meantime.
This should fix the performance regression reported by Nick.
Reported-by: Nick Piggin <npiggin@suse.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: laijs@cn.fujitsu.com
Cc: dipankar@in.ibm.com
Cc: mathieu.desnoyers@polymtl.ca
Cc: josh@joshtriplett.org
Cc: dvhltc@us.ibm.com
Cc: niv@us.ibm.com
Cc: peterz@infradead.org
Cc: rostedt@goodmis.org
Cc: Valdis.Kletnieks@vt.edu
Cc: dhowells@redhat.com
Cc: jens.axboe@oracle.com
LKML-Reference: <12555405592133-git-send-email->
Signed-off-by: Ingo Molnar <mingo@elte.hu>
| -rw-r--r-- | kernel/rcutree.c | 29 | ||||
| -rw-r--r-- | kernel/rcutree.h | 4 | 
2 files changed, 28 insertions, 5 deletions
| diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 705f02ac7433..ddbf111e9e18 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c | |||
| @@ -958,7 +958,7 @@ static void rcu_offline_cpu(int cpu) | |||
| 958 | * Invoke any RCU callbacks that have made it to the end of their grace | 958 | * Invoke any RCU callbacks that have made it to the end of their grace | 
| 959 | * period. Thottle as specified by rdp->blimit. | 959 | * period. Thottle as specified by rdp->blimit. | 
| 960 | */ | 960 | */ | 
| 961 | static void rcu_do_batch(struct rcu_data *rdp) | 961 | static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp) | 
| 962 | { | 962 | { | 
| 963 | unsigned long flags; | 963 | unsigned long flags; | 
| 964 | struct rcu_head *next, *list, **tail; | 964 | struct rcu_head *next, *list, **tail; | 
| @@ -1011,6 +1011,13 @@ static void rcu_do_batch(struct rcu_data *rdp) | |||
| 1011 | if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark) | 1011 | if (rdp->blimit == LONG_MAX && rdp->qlen <= qlowmark) | 
| 1012 | rdp->blimit = blimit; | 1012 | rdp->blimit = blimit; | 
| 1013 | 1013 | ||
| 1014 | /* Reset ->qlen_last_fqs_check trigger if enough CBs have drained. */ | ||
| 1015 | if (rdp->qlen == 0 && rdp->qlen_last_fqs_check != 0) { | ||
| 1016 | rdp->qlen_last_fqs_check = 0; | ||
| 1017 | rdp->n_force_qs_snap = rsp->n_force_qs; | ||
| 1018 | } else if (rdp->qlen < rdp->qlen_last_fqs_check - qhimark) | ||
| 1019 | rdp->qlen_last_fqs_check = rdp->qlen; | ||
| 1020 | |||
| 1014 | local_irq_restore(flags); | 1021 | local_irq_restore(flags); | 
| 1015 | 1022 | ||
| 1016 | /* Re-raise the RCU softirq if there are callbacks remaining. */ | 1023 | /* Re-raise the RCU softirq if there are callbacks remaining. */ | 
| @@ -1224,7 +1231,7 @@ __rcu_process_callbacks(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 1224 | } | 1231 | } | 
| 1225 | 1232 | ||
| 1226 | /* If there are callbacks ready, invoke them. */ | 1233 | /* If there are callbacks ready, invoke them. */ | 
| 1227 | rcu_do_batch(rdp); | 1234 | rcu_do_batch(rsp, rdp); | 
| 1228 | } | 1235 | } | 
| 1229 | 1236 | ||
| 1230 | /* | 1237 | /* | 
| @@ -1288,10 +1295,20 @@ __call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu), | |||
| 1288 | rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */ | 1295 | rcu_start_gp(rsp, nestflag); /* releases rnp_root->lock. */ | 
| 1289 | } | 1296 | } | 
| 1290 | 1297 | ||
| 1291 | /* Force the grace period if too many callbacks or too long waiting. */ | 1298 | /* | 
| 1292 | if (unlikely(++rdp->qlen > qhimark)) { | 1299 | * Force the grace period if too many callbacks or too long waiting. | 
| 1300 | * Enforce hysteresis, and don't invoke force_quiescent_state() | ||
| 1301 | * if some other CPU has recently done so. Also, don't bother | ||
| 1302 | * invoking force_quiescent_state() if the newly enqueued callback | ||
| 1303 | * is the only one waiting for a grace period to complete. | ||
| 1304 | */ | ||
| 1305 | if (unlikely(++rdp->qlen > rdp->qlen_last_fqs_check + qhimark)) { | ||
| 1293 | rdp->blimit = LONG_MAX; | 1306 | rdp->blimit = LONG_MAX; | 
| 1294 | force_quiescent_state(rsp, 0); | 1307 | if (rsp->n_force_qs == rdp->n_force_qs_snap && | 
| 1308 | *rdp->nxttail[RCU_DONE_TAIL] != head) | ||
| 1309 | force_quiescent_state(rsp, 0); | ||
| 1310 | rdp->n_force_qs_snap = rsp->n_force_qs; | ||
| 1311 | rdp->qlen_last_fqs_check = rdp->qlen; | ||
| 1295 | } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0) | 1312 | } else if ((long)(ACCESS_ONCE(rsp->jiffies_force_qs) - jiffies) < 0) | 
| 1296 | force_quiescent_state(rsp, 1); | 1313 | force_quiescent_state(rsp, 1); | 
| 1297 | local_irq_restore(flags); | 1314 | local_irq_restore(flags); | 
| @@ -1523,6 +1540,8 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptable) | |||
| 1523 | rdp->beenonline = 1; /* We have now been online. */ | 1540 | rdp->beenonline = 1; /* We have now been online. */ | 
| 1524 | rdp->preemptable = preemptable; | 1541 | rdp->preemptable = preemptable; | 
| 1525 | rdp->passed_quiesc_completed = lastcomp - 1; | 1542 | rdp->passed_quiesc_completed = lastcomp - 1; | 
| 1543 | rdp->qlen_last_fqs_check = 0; | ||
| 1544 | rdp->n_force_qs_snap = rsp->n_force_qs; | ||
| 1526 | rdp->blimit = blimit; | 1545 | rdp->blimit = blimit; | 
| 1527 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 1546 | spin_unlock(&rnp->lock); /* irqs remain disabled. */ | 
| 1528 | 1547 | ||
| diff --git a/kernel/rcutree.h b/kernel/rcutree.h index b40ac5706040..599161f309fb 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h | |||
| @@ -167,6 +167,10 @@ struct rcu_data { | |||
| 167 | struct rcu_head *nxtlist; | 167 | struct rcu_head *nxtlist; | 
| 168 | struct rcu_head **nxttail[RCU_NEXT_SIZE]; | 168 | struct rcu_head **nxttail[RCU_NEXT_SIZE]; | 
| 169 | long qlen; /* # of queued callbacks */ | 169 | long qlen; /* # of queued callbacks */ | 
| 170 | long qlen_last_fqs_check; | ||
| 171 | /* qlen at last check for QS forcing */ | ||
| 172 | unsigned long n_force_qs_snap; | ||
| 173 | /* did other CPU force QS recently? */ | ||
| 170 | long blimit; /* Upper limit on a processed batch */ | 174 | long blimit; /* Upper limit on a processed batch */ | 
| 171 | 175 | ||
| 172 | #ifdef CONFIG_NO_HZ | 176 | #ifdef CONFIG_NO_HZ | 
