diff options
author | Peter Zijlstra <peterz@infradead.org> | 2009-09-16 06:31:31 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-09-17 04:17:25 -0400 |
commit | ad4b78bbcbab66998b05d422ac6106b645796e54 (patch) | |
tree | 45f3561f4bd6b886948a3b0eea64edab9bab9eda | |
parent | eb24073bc1fe3e569a855cf38d529fb650c35524 (diff) |
sched: Add new wakeup preemption mode: WAKEUP_RUNNING
Create a new wakeup preemption mode, preempt towards tasks that run
shorter on avg. It sets next buddy to be sure we actually run the task
we preempted for.
Test results:
root@twins:~# while :; do :; done &
[1] 6537
root@twins:~# while :; do :; done &
[2] 6538
root@twins:~# while :; do :; done &
[3] 6539
root@twins:~# while :; do :; done &
[4] 6540
root@twins:/home/peter# ./latt -c4 sleep 4
Entries: 48 (clients=4)
Averages:
------------------------------
Max 4750 usec
Avg 497 usec
Stdev 737 usec
root@twins:/home/peter# echo WAKEUP_RUNNING > /debug/sched_features
root@twins:/home/peter# ./latt -c4 sleep 4
Entries: 48 (clients=4)
Averages:
------------------------------
Max 14 usec
Avg 5 usec
Stdev 3 usec
Disabled by default - needs more testing.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Acked-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
LKML-Reference: <new-submission>
-rw-r--r-- | include/linux/sched.h | 2 | ||||
-rw-r--r-- | kernel/sched.c | 17 | ||||
-rw-r--r-- | kernel/sched_debug.c | 1 | ||||
-rw-r--r-- | kernel/sched_fair.c | 14 | ||||
-rw-r--r-- | kernel/sched_features.h | 5 |
5 files changed, 29 insertions, 10 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index b4a39bb2b4a4..8af3d249170e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -1113,6 +1113,8 @@ struct sched_entity { | |||
1113 | u64 start_runtime; | 1113 | u64 start_runtime; |
1114 | u64 avg_wakeup; | 1114 | u64 avg_wakeup; |
1115 | 1115 | ||
1116 | u64 avg_running; | ||
1117 | |||
1116 | #ifdef CONFIG_SCHEDSTATS | 1118 | #ifdef CONFIG_SCHEDSTATS |
1117 | u64 wait_start; | 1119 | u64 wait_start; |
1118 | u64 wait_max; | 1120 | u64 wait_max; |
diff --git a/kernel/sched.c b/kernel/sched.c index 969dfaef2465..3bb4ea2ee6f0 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -2458,6 +2458,7 @@ static void __sched_fork(struct task_struct *p) | |||
2458 | p->se.avg_overlap = 0; | 2458 | p->se.avg_overlap = 0; |
2459 | p->se.start_runtime = 0; | 2459 | p->se.start_runtime = 0; |
2460 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; | 2460 | p->se.avg_wakeup = sysctl_sched_wakeup_granularity; |
2461 | p->se.avg_running = 0; | ||
2461 | 2462 | ||
2462 | #ifdef CONFIG_SCHEDSTATS | 2463 | #ifdef CONFIG_SCHEDSTATS |
2463 | p->se.wait_start = 0; | 2464 | p->se.wait_start = 0; |
@@ -5310,14 +5311,13 @@ static inline void schedule_debug(struct task_struct *prev) | |||
5310 | #endif | 5311 | #endif |
5311 | } | 5312 | } |
5312 | 5313 | ||
5313 | static void put_prev_task(struct rq *rq, struct task_struct *prev) | 5314 | static void put_prev_task(struct rq *rq, struct task_struct *p) |
5314 | { | 5315 | { |
5315 | if (prev->state == TASK_RUNNING) { | 5316 | u64 runtime = p->se.sum_exec_runtime - p->se.prev_sum_exec_runtime; |
5316 | u64 runtime = prev->se.sum_exec_runtime; | ||
5317 | 5317 | ||
5318 | runtime -= prev->se.prev_sum_exec_runtime; | 5318 | update_avg(&p->se.avg_running, runtime); |
5319 | runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); | ||
5320 | 5319 | ||
5320 | if (p->state == TASK_RUNNING) { | ||
5321 | /* | 5321 | /* |
5322 | * In order to avoid avg_overlap growing stale when we are | 5322 | * In order to avoid avg_overlap growing stale when we are |
5323 | * indeed overlapping and hence not getting put to sleep, grow | 5323 | * indeed overlapping and hence not getting put to sleep, grow |
@@ -5327,9 +5327,12 @@ static void put_prev_task(struct rq *rq, struct task_struct *prev) | |||
5327 | * correlates to the amount of cache footprint a task can | 5327 | * correlates to the amount of cache footprint a task can |
5328 | * build up. | 5328 | * build up. |
5329 | */ | 5329 | */ |
5330 | update_avg(&prev->se.avg_overlap, runtime); | 5330 | runtime = min_t(u64, runtime, 2*sysctl_sched_migration_cost); |
5331 | update_avg(&p->se.avg_overlap, runtime); | ||
5332 | } else { | ||
5333 | update_avg(&p->se.avg_running, 0); | ||
5331 | } | 5334 | } |
5332 | prev->sched_class->put_prev_task(rq, prev); | 5335 | p->sched_class->put_prev_task(rq, p); |
5333 | } | 5336 | } |
5334 | 5337 | ||
5335 | /* | 5338 | /* |
diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c index 5ddbd0891267..efb84409bc43 100644 --- a/kernel/sched_debug.c +++ b/kernel/sched_debug.c | |||
@@ -395,6 +395,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m) | |||
395 | PN(se.sum_exec_runtime); | 395 | PN(se.sum_exec_runtime); |
396 | PN(se.avg_overlap); | 396 | PN(se.avg_overlap); |
397 | PN(se.avg_wakeup); | 397 | PN(se.avg_wakeup); |
398 | PN(se.avg_running); | ||
398 | 399 | ||
399 | nr_switches = p->nvcsw + p->nivcsw; | 400 | nr_switches = p->nvcsw + p->nivcsw; |
400 | 401 | ||
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index c741cd9d38de..3e6f78c66876 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -1605,9 +1605,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
1605 | return; | 1605 | return; |
1606 | } | 1606 | } |
1607 | 1607 | ||
1608 | if (!sched_feat(WAKEUP_PREEMPT)) | ||
1609 | return; | ||
1610 | |||
1611 | if ((sched_feat(WAKEUP_SYNC) && sync) || | 1608 | if ((sched_feat(WAKEUP_SYNC) && sync) || |
1612 | (sched_feat(WAKEUP_OVERLAP) && | 1609 | (sched_feat(WAKEUP_OVERLAP) && |
1613 | (se->avg_overlap < sysctl_sched_migration_cost && | 1610 | (se->avg_overlap < sysctl_sched_migration_cost && |
@@ -1616,6 +1613,17 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
1616 | return; | 1613 | return; |
1617 | } | 1614 | } |
1618 | 1615 | ||
1616 | if (sched_feat(WAKEUP_RUNNING)) { | ||
1617 | if (pse->avg_running < se->avg_running) { | ||
1618 | set_next_buddy(pse); | ||
1619 | resched_task(curr); | ||
1620 | return; | ||
1621 | } | ||
1622 | } | ||
1623 | |||
1624 | if (!sched_feat(WAKEUP_PREEMPT)) | ||
1625 | return; | ||
1626 | |||
1619 | find_matching_se(&se, &pse); | 1627 | find_matching_se(&se, &pse); |
1620 | 1628 | ||
1621 | BUG_ON(!pse); | 1629 | BUG_ON(!pse); |
diff --git a/kernel/sched_features.h b/kernel/sched_features.h index d5059fd761d9..0d94083582c7 100644 --- a/kernel/sched_features.h +++ b/kernel/sched_features.h | |||
@@ -54,6 +54,11 @@ SCHED_FEAT(WAKEUP_SYNC, 0) | |||
54 | SCHED_FEAT(WAKEUP_OVERLAP, 0) | 54 | SCHED_FEAT(WAKEUP_OVERLAP, 0) |
55 | 55 | ||
56 | /* | 56 | /* |
57 | * Wakeup preemption towards tasks that run short | ||
58 | */ | ||
59 | SCHED_FEAT(WAKEUP_RUNNING, 0) | ||
60 | |||
61 | /* | ||
57 | * Use the SYNC wakeup hint, pipes and the likes use this to indicate | 62 | * Use the SYNC wakeup hint, pipes and the likes use this to indicate |
58 | * the remote end is likely to consume the data we just wrote, and | 63 | * the remote end is likely to consume the data we just wrote, and |
59 | * therefore has cache benefit from being placed on the same cpu, see | 64 | * therefore has cache benefit from being placed on the same cpu, see |