aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMike Galbraith <efault@gmx.de>2010-12-08 05:05:42 -0500
committerIngo Molnar <mingo@elte.hu>2010-12-08 14:15:06 -0500
commitf26f9aff6aaf67e9a430d16c266f91b13a5bff64 (patch)
tree84e39b539b42655dbae6b80acbe14e10772711fe
parent0f004f5a696a9434b7214d0d3cbd0525ee77d428 (diff)
Sched: fix skip_clock_update optimization
idle_balance() drops/retakes rq->lock, leaving the previous task vulnerable to set_tsk_need_resched(). Clear it after we return from balancing instead, and in setup_thread_stack() as well, so no successfully descheduled or never scheduled task has it set. Need resched confused the skip_clock_update logic, which assumes that the next call to update_rq_clock() will come nearly immediately after being set. Make the optimization robust against the waking a sleeper before it sucessfully deschedules case by checking that the current task has not been dequeued before setting the flag, since it is that useless clock update we're trying to save, and clear unconditionally in schedule() proper instead of conditionally in put_prev_task(). Signed-off-by: Mike Galbraith <efault@gmx.de> Reported-by: Bjoern B. Brandenburg <bbb.lst@gmail.com> Tested-by: Yong Zhang <yong.zhang0@gmail.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: stable@kernel.org LKML-Reference: <1291802742.1417.9.camel@marge.simson.net> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--kernel/fork.c1
-rw-r--r--kernel/sched.c26
2 files changed, 15 insertions, 12 deletions
diff --git a/kernel/fork.c b/kernel/fork.c
index 3b159c5991b7..5447dc7defa9 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -273,6 +273,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
273 273
274 setup_thread_stack(tsk, orig); 274 setup_thread_stack(tsk, orig);
275 clear_user_return_notifier(tsk); 275 clear_user_return_notifier(tsk);
276 clear_tsk_need_resched(tsk);
276 stackend = end_of_stack(tsk); 277 stackend = end_of_stack(tsk);
277 *stackend = STACK_END_MAGIC; /* for overflow detection */ 278 *stackend = STACK_END_MAGIC; /* for overflow detection */
278 279
diff --git a/kernel/sched.c b/kernel/sched.c
index 6b7c26a1a097..da14302a9857 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -641,17 +641,18 @@ static void sched_irq_time_avg_update(struct rq *rq, u64 irq_time);
641 641
642inline void update_rq_clock(struct rq *rq) 642inline void update_rq_clock(struct rq *rq)
643{ 643{
644 if (!rq->skip_clock_update) { 644 int cpu = cpu_of(rq);
645 int cpu = cpu_of(rq); 645 u64 irq_time;
646 u64 irq_time;
647 646
648 rq->clock = sched_clock_cpu(cpu); 647 if (rq->skip_clock_update)
649 irq_time = irq_time_cpu(cpu); 648 return;
650 if (rq->clock - irq_time > rq->clock_task)
651 rq->clock_task = rq->clock - irq_time;
652 649
653 sched_irq_time_avg_update(rq, irq_time); 650 rq->clock = sched_clock_cpu(cpu);
654 } 651 irq_time = irq_time_cpu(cpu);
652 if (rq->clock - irq_time > rq->clock_task)
653 rq->clock_task = rq->clock - irq_time;
654
655 sched_irq_time_avg_update(rq, irq_time);
655} 656}
656 657
657/* 658/*
@@ -2129,7 +2130,7 @@ static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
2129 * A queue event has occurred, and we're going to schedule. In 2130 * A queue event has occurred, and we're going to schedule. In
2130 * this case, we can save a useless back to back clock update. 2131 * this case, we can save a useless back to back clock update.
2131 */ 2132 */
2132 if (test_tsk_need_resched(rq->curr)) 2133 if (rq->curr->se.on_rq && test_tsk_need_resched(rq->curr))
2133 rq->skip_clock_update = 1; 2134 rq->skip_clock_update = 1;
2134} 2135}
2135 2136
@@ -3973,7 +3974,6 @@ static void put_prev_task(struct rq *rq, struct task_struct *prev)
3973{ 3974{
3974 if (prev->se.on_rq) 3975 if (prev->se.on_rq)
3975 update_rq_clock(rq); 3976 update_rq_clock(rq);
3976 rq->skip_clock_update = 0;
3977 prev->sched_class->put_prev_task(rq, prev); 3977 prev->sched_class->put_prev_task(rq, prev);
3978} 3978}
3979 3979
@@ -4031,7 +4031,6 @@ need_resched_nonpreemptible:
4031 hrtick_clear(rq); 4031 hrtick_clear(rq);
4032 4032
4033 raw_spin_lock_irq(&rq->lock); 4033 raw_spin_lock_irq(&rq->lock);
4034 clear_tsk_need_resched(prev);
4035 4034
4036 switch_count = &prev->nivcsw; 4035 switch_count = &prev->nivcsw;
4037 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) { 4036 if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {
@@ -4063,6 +4062,8 @@ need_resched_nonpreemptible:
4063 4062
4064 put_prev_task(rq, prev); 4063 put_prev_task(rq, prev);
4065 next = pick_next_task(rq); 4064 next = pick_next_task(rq);
4065 clear_tsk_need_resched(prev);
4066 rq->skip_clock_update = 0;
4066 4067
4067 if (likely(prev != next)) { 4068 if (likely(prev != next)) {
4068 sched_info_switch(prev, next); 4069 sched_info_switch(prev, next);
@@ -4071,6 +4072,7 @@ need_resched_nonpreemptible:
4071 rq->nr_switches++; 4072 rq->nr_switches++;
4072 rq->curr = next; 4073 rq->curr = next;
4073 ++*switch_count; 4074 ++*switch_count;
4075 WARN_ON_ONCE(test_tsk_need_resched(next));
4074 4076
4075 context_switch(rq, prev, next); /* unlocks the rq */ 4077 context_switch(rq, prev, next); /* unlocks the rq */
4076 /* 4078 /*