diff options
| author | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2016-11-30 14:21:21 -0500 |
|---|---|---|
| committer | Paul E. McKenney <paulmck@linux.vnet.ibm.com> | 2017-01-23 14:44:18 -0500 |
| commit | 3a19b46a5c17b12ef0691df19c676ba3da330a57 (patch) | |
| tree | b9effecf9064beba8b6d55fa5a7c1fd1b56a72c4 /kernel/rcu/tree.c | |
| parent | 02a5c550b2738f2bfea8e1e00aa75944d71c9e18 (diff) | |
rcu: Check cond_resched_rcu_qs() state less often to reduce GP overhead
Commit 4a81e8328d37 ("rcu: Reduce overhead of cond_resched() checks
for RCU") moved quiescent-state generation out of cond_resched()
and commit bde6c3aa9930 ("rcu: Provide cond_resched_rcu_qs() to force
quiescent states in long loops") introduced cond_resched_rcu_qs(), and
commit 5cd37193ce85 ("rcu: Make cond_resched_rcu_qs() apply to normal RCU
flavors") introduced the per-CPU rcu_qs_ctr variable, which is frequently
polled by the RCU core state machine.
This frequent polling can increase grace-period rate, which in turn
increases grace-period overhead, which is visible in some benchmarks
(for example, the "open1" benchmark in Anton Blanchard's "will it scale"
suite). This commit therefore reduces the rate at which rcu_qs_ctr
is polled by moving that polling into the force-quiescent-state (FQS)
machinery, and by further polling it only after the grace period has
been in effect for at least jiffies_till_sched_qs jiffies.
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Diffstat (limited to 'kernel/rcu/tree.c')
| -rw-r--r-- | kernel/rcu/tree.c | 46 |
1 files changed, 34 insertions, 12 deletions
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 8b970319c75b..d8245cbd08f9 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c | |||
| @@ -1232,7 +1232,10 @@ static int dyntick_save_progress_counter(struct rcu_data *rdp, | |||
| 1232 | static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, | 1232 | static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, |
| 1233 | bool *isidle, unsigned long *maxj) | 1233 | bool *isidle, unsigned long *maxj) |
| 1234 | { | 1234 | { |
| 1235 | unsigned long jtsq; | ||
| 1235 | int *rcrmp; | 1236 | int *rcrmp; |
| 1237 | unsigned long rjtsc; | ||
| 1238 | struct rcu_node *rnp; | ||
| 1236 | 1239 | ||
| 1237 | /* | 1240 | /* |
| 1238 | * If the CPU passed through or entered a dynticks idle phase with | 1241 | * If the CPU passed through or entered a dynticks idle phase with |
| @@ -1248,6 +1251,31 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, | |||
| 1248 | return 1; | 1251 | return 1; |
| 1249 | } | 1252 | } |
| 1250 | 1253 | ||
| 1254 | /* Compute and saturate jiffies_till_sched_qs. */ | ||
| 1255 | jtsq = jiffies_till_sched_qs; | ||
| 1256 | rjtsc = rcu_jiffies_till_stall_check(); | ||
| 1257 | if (jtsq > rjtsc / 2) { | ||
| 1258 | WRITE_ONCE(jiffies_till_sched_qs, rjtsc); | ||
| 1259 | jtsq = rjtsc / 2; | ||
| 1260 | } else if (jtsq < 1) { | ||
| 1261 | WRITE_ONCE(jiffies_till_sched_qs, 1); | ||
| 1262 | jtsq = 1; | ||
| 1263 | } | ||
| 1264 | |||
| 1265 | /* | ||
| 1266 | * Has this CPU encountered a cond_resched_rcu_qs() since the | ||
| 1267 | * beginning of the grace period? For this to be the case, | ||
| 1268 | * the CPU has to have noticed the current grace period. This | ||
| 1269 | * might not be the case for nohz_full CPUs looping in the kernel. | ||
| 1270 | */ | ||
| 1271 | rnp = rdp->mynode; | ||
| 1272 | if (time_after(jiffies, rdp->rsp->gp_start + jtsq) && | ||
| 1273 | READ_ONCE(rdp->rcu_qs_ctr_snap) != per_cpu(rcu_qs_ctr, rdp->cpu) && | ||
| 1274 | READ_ONCE(rdp->gpnum) == rnp->gpnum && !rdp->gpwrap) { | ||
| 1275 | trace_rcu_fqs(rdp->rsp->name, rdp->gpnum, rdp->cpu, TPS("rqc")); | ||
| 1276 | return 1; | ||
| 1277 | } | ||
| 1278 | |||
| 1251 | /* | 1279 | /* |
| 1252 | * Check for the CPU being offline, but only if the grace period | 1280 | * Check for the CPU being offline, but only if the grace period |
| 1253 | * is old enough. We don't need to worry about the CPU changing | 1281 | * is old enough. We don't need to worry about the CPU changing |
| @@ -1290,9 +1318,8 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp, | |||
| 1290 | * warning delay. | 1318 | * warning delay. |
| 1291 | */ | 1319 | */ |
| 1292 | rcrmp = &per_cpu(rcu_sched_qs_mask, rdp->cpu); | 1320 | rcrmp = &per_cpu(rcu_sched_qs_mask, rdp->cpu); |
| 1293 | if (ULONG_CMP_GE(jiffies, | 1321 | if (time_after(jiffies, rdp->rsp->gp_start + jtsq) || |
| 1294 | rdp->rsp->gp_start + jiffies_till_sched_qs) || | 1322 | time_after(jiffies, rdp->rsp->jiffies_resched)) { |
| 1295 | ULONG_CMP_GE(jiffies, rdp->rsp->jiffies_resched)) { | ||
| 1296 | if (!(READ_ONCE(*rcrmp) & rdp->rsp->flavor_mask)) { | 1323 | if (!(READ_ONCE(*rcrmp) & rdp->rsp->flavor_mask)) { |
| 1297 | WRITE_ONCE(rdp->cond_resched_completed, | 1324 | WRITE_ONCE(rdp->cond_resched_completed, |
| 1298 | READ_ONCE(rdp->mynode->completed)); | 1325 | READ_ONCE(rdp->mynode->completed)); |
| @@ -2550,10 +2577,8 @@ rcu_report_qs_rdp(int cpu, struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 2550 | 2577 | ||
| 2551 | rnp = rdp->mynode; | 2578 | rnp = rdp->mynode; |
| 2552 | raw_spin_lock_irqsave_rcu_node(rnp, flags); | 2579 | raw_spin_lock_irqsave_rcu_node(rnp, flags); |
| 2553 | if ((rdp->cpu_no_qs.b.norm && | 2580 | if (rdp->cpu_no_qs.b.norm || rdp->gpnum != rnp->gpnum || |
| 2554 | rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) || | 2581 | rnp->completed == rnp->gpnum || rdp->gpwrap) { |
| 2555 | rdp->gpnum != rnp->gpnum || rnp->completed == rnp->gpnum || | ||
| 2556 | rdp->gpwrap) { | ||
| 2557 | 2582 | ||
| 2558 | /* | 2583 | /* |
| 2559 | * The grace period in which this quiescent state was | 2584 | * The grace period in which this quiescent state was |
| @@ -2608,8 +2633,7 @@ rcu_check_quiescent_state(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 2608 | * Was there a quiescent state since the beginning of the grace | 2633 | * Was there a quiescent state since the beginning of the grace |
| 2609 | * period? If no, then exit and wait for the next call. | 2634 | * period? If no, then exit and wait for the next call. |
| 2610 | */ | 2635 | */ |
| 2611 | if (rdp->cpu_no_qs.b.norm && | 2636 | if (rdp->cpu_no_qs.b.norm) |
| 2612 | rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) | ||
| 2613 | return; | 2637 | return; |
| 2614 | 2638 | ||
| 2615 | /* | 2639 | /* |
| @@ -3563,9 +3587,7 @@ static int __rcu_pending(struct rcu_state *rsp, struct rcu_data *rdp) | |||
| 3563 | rdp->core_needs_qs && rdp->cpu_no_qs.b.norm && | 3587 | rdp->core_needs_qs && rdp->cpu_no_qs.b.norm && |
| 3564 | rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) { | 3588 | rdp->rcu_qs_ctr_snap == __this_cpu_read(rcu_qs_ctr)) { |
| 3565 | rdp->n_rp_core_needs_qs++; | 3589 | rdp->n_rp_core_needs_qs++; |
| 3566 | } else if (rdp->core_needs_qs && | 3590 | } else if (rdp->core_needs_qs && !rdp->cpu_no_qs.b.norm) { |
| 3567 | (!rdp->cpu_no_qs.b.norm || | ||
| 3568 | rdp->rcu_qs_ctr_snap != __this_cpu_read(rcu_qs_ctr))) { | ||
| 3569 | rdp->n_rp_report_qs++; | 3591 | rdp->n_rp_report_qs++; |
| 3570 | return 1; | 3592 | return 1; |
| 3571 | } | 3593 | } |
