aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched_fair.c
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2009-01-14 06:39:19 -0500
committerIngo Molnar <mingo@elte.hu>2009-01-15 06:00:09 -0500
commite52fb7c097238d34f4d8e2a596f8a3f85b0c0565 (patch)
treef846443fdf7888583b4b0ff0ddd0be1be4b37df6 /kernel/sched_fair.c
parent831451ac4e44d3a20b581ce726ef1d1144373f7d (diff)
sched: prefer wakers
Prefer tasks that wake other tasks to preempt quickly. This improves performance because more work is available sooner. The workload that prompted this patch was a kernel build over NFS4 (for some curious and not understood reason we had to revert commit: 18de9735300756e3ca9c361ef58409d8561dfe0d to make any progress at all) Without this patch a make -j8 bzImage (of x86-64 defconfig) would take 3m30-ish, with this patch we're down to 2m50-ish. psql-sysbench/mysql-sysbench show a slight improvement in peak performance as well, tbench and vmark seemed to not care. It is possible to improve upon the build time (to 2m20-ish) but that seriously destroys other benchmarks (just shows that there's more room for tinkering). Much thanks to Mike who put in a lot of effort to benchmark things and proved a worthy opponent with a competing patch. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Mike Galbraith <efault@gmx.de> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r--kernel/sched_fair.c59
1 files changed, 53 insertions, 6 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 8e1352c75557..bdf64346b4d1 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1295,16 +1295,63 @@ out:
1295} 1295}
1296#endif /* CONFIG_SMP */ 1296#endif /* CONFIG_SMP */
1297 1297
1298static unsigned long wakeup_gran(struct sched_entity *se) 1298/*
1299 * Adaptive granularity
1300 *
1301 * se->avg_wakeup gives the average time a task runs until it does a wakeup,
1302 * with the limit of wakeup_gran -- when it never does a wakeup.
1303 *
1304 * So the smaller avg_wakeup is the faster we want this task to preempt,
1305 * but we don't want to treat the preemptee unfairly and therefore allow it
1306 * to run for at least the amount of time we'd like to run.
1307 *
1308 * NOTE: we use 2*avg_wakeup to increase the probability of actually doing one
1309 *
1310 * NOTE: we use *nr_running to scale with load, this nicely matches the
1311 * degrading latency on load.
1312 */
1313static unsigned long
1314adaptive_gran(struct sched_entity *curr, struct sched_entity *se)
1315{
1316 u64 this_run = curr->sum_exec_runtime - curr->prev_sum_exec_runtime;
1317 u64 expected_wakeup = 2*se->avg_wakeup * cfs_rq_of(se)->nr_running;
1318 u64 gran = 0;
1319
1320 if (this_run < expected_wakeup)
1321 gran = expected_wakeup - this_run;
1322
1323 return min_t(s64, gran, sysctl_sched_wakeup_granularity);
1324}
1325
1326static unsigned long
1327wakeup_gran(struct sched_entity *curr, struct sched_entity *se)
1299{ 1328{
1300 unsigned long gran = sysctl_sched_wakeup_granularity; 1329 unsigned long gran = sysctl_sched_wakeup_granularity;
1301 1330
1331 if (cfs_rq_of(curr)->curr && sched_feat(ADAPTIVE_GRAN))
1332 gran = adaptive_gran(curr, se);
1333
1302 /* 1334 /*
1303 * More easily preempt - nice tasks, while not making it harder for 1335 * Since its curr running now, convert the gran from real-time
1304 * + nice tasks. 1336 * to virtual-time in his units.
1305 */ 1337 */
1306 if (!sched_feat(ASYM_GRAN) || se->load.weight > NICE_0_LOAD) 1338 if (sched_feat(ASYM_GRAN)) {
1307 gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se); 1339 /*
1340 * By using 'se' instead of 'curr' we penalize light tasks, so
1341 * they get preempted easier. That is, if 'se' < 'curr' then
1342 * the resulting gran will be larger, therefore penalizing the
1343 * lighter, if otoh 'se' > 'curr' then the resulting gran will
1344 * be smaller, again penalizing the lighter task.
1345 *
1346 * This is especially important for buddies when the leftmost
1347 * task is higher priority than the buddy.
1348 */
1349 if (unlikely(se->load.weight != NICE_0_LOAD))
1350 gran = calc_delta_fair(gran, se);
1351 } else {
1352 if (unlikely(curr->load.weight != NICE_0_LOAD))
1353 gran = calc_delta_fair(gran, curr);
1354 }
1308 1355
1309 return gran; 1356 return gran;
1310} 1357}
@@ -1331,7 +1378,7 @@ wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
1331 if (vdiff <= 0) 1378 if (vdiff <= 0)
1332 return -1; 1379 return -1;
1333 1380
1334 gran = wakeup_gran(curr); 1381 gran = wakeup_gran(curr, se);
1335 if (vdiff > gran) 1382 if (vdiff > gran)
1336 return 1; 1383 return 1;
1337 1384