aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2008-03-14 16:12:12 -0400
committerIngo Molnar <mingo@elte.hu>2008-03-14 22:02:50 -0400
commitaa2ac25229cd4d0280f6174c42712744ad61b140 (patch)
treee4450de1bb2cd4cd56d6abf64feb862c1d542653
parent27d117266097101dcf79c4576903cdcdd0eabffc (diff)
sched: fix overload performance: buddy wakeups
Currently we schedule to the leftmost task in the runqueue. When the runtimes are very short because of some server/client ping-pong, especially in over-saturated workloads, this will cycle through all tasks trashing the cache. Reduce cache trashing by keeping dependent tasks together by running newly woken tasks first. However, by not running the leftmost task first we could starve tasks because the wakee can gain unlimited runtime. Therefore we only run the wakee if its within a small (wakeup_granularity) window of the leftmost task. This preserves fairness, but does alternate server/client task groups. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--kernel/sched.c2
-rw-r--r--kernel/sched_fair.c26
2 files changed, 27 insertions, 1 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 6b06f23261c0..d1ad69b270ca 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -301,7 +301,7 @@ struct cfs_rq {
301 /* 'curr' points to currently running entity on this cfs_rq. 301 /* 'curr' points to currently running entity on this cfs_rq.
302 * It is set to NULL otherwise (i.e when none are currently running). 302 * It is set to NULL otherwise (i.e when none are currently running).
303 */ 303 */
304 struct sched_entity *curr; 304 struct sched_entity *curr, *next;
305 305
306 unsigned long nr_spread_over; 306 unsigned long nr_spread_over;
307 307
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 9d003c9d2a48..31c4a2988b64 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -207,6 +207,9 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
207 } 207 }
208 } 208 }
209 209
210 if (cfs_rq->next == se)
211 cfs_rq->next = NULL;
212
210 rb_erase(&se->run_node, &cfs_rq->tasks_timeline); 213 rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
211} 214}
212 215
@@ -626,12 +629,32 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
626 se->prev_sum_exec_runtime = se->sum_exec_runtime; 629 se->prev_sum_exec_runtime = se->sum_exec_runtime;
627} 630}
628 631
632static struct sched_entity *
633pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se)
634{
635 s64 diff, gran;
636
637 if (!cfs_rq->next)
638 return se;
639
640 diff = cfs_rq->next->vruntime - se->vruntime;
641 if (diff < 0)
642 return se;
643
644 gran = calc_delta_fair(sysctl_sched_wakeup_granularity, &cfs_rq->load);
645 if (diff > gran)
646 return se;
647
648 return cfs_rq->next;
649}
650
629static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) 651static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
630{ 652{
631 struct sched_entity *se = NULL; 653 struct sched_entity *se = NULL;
632 654
633 if (first_fair(cfs_rq)) { 655 if (first_fair(cfs_rq)) {
634 se = __pick_next_entity(cfs_rq); 656 se = __pick_next_entity(cfs_rq);
657 se = pick_next(cfs_rq, se);
635 set_next_entity(cfs_rq, se); 658 set_next_entity(cfs_rq, se);
636 } 659 }
637 660
@@ -1070,6 +1093,9 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
1070 resched_task(curr); 1093 resched_task(curr);
1071 return; 1094 return;
1072 } 1095 }
1096
1097 cfs_rq_of(pse)->next = pse;
1098
1073 /* 1099 /*
1074 * Batch tasks do not preempt (their preemption is driven by 1100 * Batch tasks do not preempt (their preemption is driven by
1075 * the tick): 1101 * the tick):