diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2009-01-14 06:39:19 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-01-15 06:00:09 -0500 |
commit | e52fb7c097238d34f4d8e2a596f8a3f85b0c0565 (patch) | |
tree | f846443fdf7888583b4b0ff0ddd0be1be4b37df6 /kernel/sched_fair.c | |
parent | 831451ac4e44d3a20b581ce726ef1d1144373f7d (diff) |
sched: prefer wakers
Prefer tasks that wake other tasks to preempt quickly. This improves
performance because more work is available sooner.
The workload that prompted this patch was a kernel build over NFS4 (for some
curious and not understood reason we had to revert commit:
18de9735300756e3ca9c361ef58409d8561dfe0d to make any progress at all)
Without this patch a make -j8 bzImage (of x86-64 defconfig) would take
3m30-ish, with this patch we're down to 2m50-ish.
psql-sysbench/mysql-sysbench show a slight improvement in peak performance as
well, tbench and vmark seemed to not care.
It is possible to improve upon the build time (to 2m20-ish) but that seriously
destroys other benchmarks (just shows that there's more room for tinkering).
Much thanks to Mike who put in a lot of effort to benchmark things and proved
a worthy opponent with a competing patch.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r-- | kernel/sched_fair.c | 59 |
1 files changed, 53 insertions, 6 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 8e1352c75557..bdf64346b4d1 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -1295,16 +1295,63 @@ out: | |||
1295 | } | 1295 | } |
1296 | #endif /* CONFIG_SMP */ | 1296 | #endif /* CONFIG_SMP */ |
1297 | 1297 | ||
1298 | static unsigned long wakeup_gran(struct sched_entity *se) | 1298 | /* |
1299 | * Adaptive granularity | ||
1300 | * | ||
1301 | * se->avg_wakeup gives the average time a task runs until it does a wakeup, | ||
1302 | * with the limit of wakeup_gran -- when it never does a wakeup. | ||
1303 | * | ||
1304 | * So the smaller avg_wakeup is the faster we want this task to preempt, | ||
1305 | * but we don't want to treat the preemptee unfairly and therefore allow it | ||
1306 | * to run for at least the amount of time we'd like to run. | ||
1307 | * | ||
1308 | * NOTE: we use 2*avg_wakeup to increase the probability of actually doing one | ||
1309 | * | ||
1310 | * NOTE: we use *nr_running to scale with load, this nicely matches the | ||
1311 | * degrading latency on load. | ||
1312 | */ | ||
1313 | static unsigned long | ||
1314 | adaptive_gran(struct sched_entity *curr, struct sched_entity *se) | ||
1315 | { | ||
1316 | u64 this_run = curr->sum_exec_runtime - curr->prev_sum_exec_runtime; | ||
1317 | u64 expected_wakeup = 2*se->avg_wakeup * cfs_rq_of(se)->nr_running; | ||
1318 | u64 gran = 0; | ||
1319 | |||
1320 | if (this_run < expected_wakeup) | ||
1321 | gran = expected_wakeup - this_run; | ||
1322 | |||
1323 | return min_t(s64, gran, sysctl_sched_wakeup_granularity); | ||
1324 | } | ||
1325 | |||
1326 | static unsigned long | ||
1327 | wakeup_gran(struct sched_entity *curr, struct sched_entity *se) | ||
1299 | { | 1328 | { |
1300 | unsigned long gran = sysctl_sched_wakeup_granularity; | 1329 | unsigned long gran = sysctl_sched_wakeup_granularity; |
1301 | 1330 | ||
1331 | if (cfs_rq_of(curr)->curr && sched_feat(ADAPTIVE_GRAN)) | ||
1332 | gran = adaptive_gran(curr, se); | ||
1333 | |||
1302 | /* | 1334 | /* |
1303 | * More easily preempt - nice tasks, while not making it harder for | 1335 | * Since its curr running now, convert the gran from real-time |
1304 | * + nice tasks. | 1336 | * to virtual-time in his units. |
1305 | */ | 1337 | */ |
1306 | if (!sched_feat(ASYM_GRAN) || se->load.weight > NICE_0_LOAD) | 1338 | if (sched_feat(ASYM_GRAN)) { |
1307 | gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se); | 1339 | /* |
1340 | * By using 'se' instead of 'curr' we penalize light tasks, so | ||
1341 | * they get preempted easier. That is, if 'se' < 'curr' then | ||
1342 | * the resulting gran will be larger, therefore penalizing the | ||
1343 | * lighter, if otoh 'se' > 'curr' then the resulting gran will | ||
1344 | * be smaller, again penalizing the lighter task. | ||
1345 | * | ||
1346 | * This is especially important for buddies when the leftmost | ||
1347 | * task is higher priority than the buddy. | ||
1348 | */ | ||
1349 | if (unlikely(se->load.weight != NICE_0_LOAD)) | ||
1350 | gran = calc_delta_fair(gran, se); | ||
1351 | } else { | ||
1352 | if (unlikely(curr->load.weight != NICE_0_LOAD)) | ||
1353 | gran = calc_delta_fair(gran, curr); | ||
1354 | } | ||
1308 | 1355 | ||
1309 | return gran; | 1356 | return gran; |
1310 | } | 1357 | } |
@@ -1331,7 +1378,7 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) | |||
1331 | if (vdiff <= 0) | 1378 | if (vdiff <= 0) |
1332 | return -1; | 1379 | return -1; |
1333 | 1380 | ||
1334 | gran = wakeup_gran(curr); | 1381 | gran = wakeup_gran(curr, se); |
1335 | if (vdiff > gran) | 1382 | if (vdiff > gran) |
1336 | return 1; | 1383 | return 1; |
1337 | 1384 | ||