aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGregory Haskins <ghaskins@novell.com>2009-07-29 11:08:47 -0400
committerIngo Molnar <mingo@elte.hu>2009-08-02 08:26:10 -0400
commit3f029d3c6d62068d59301d90c18dbde8ee402107 (patch)
treec9c4e49bc0c8b919a81bf428180b9cffedbef389
parentc3a2ae3d93c0f10d29c071f599764d00b8de00cb (diff)
sched: Enhance the pre/post scheduling logic
We currently have an explicit "needs_post" vtable method which returns a stack variable for whether we should later run post-schedule. This leads to an awkward exchange of the variable as it bubbles back up out of the context switch. Peter Zijlstra observed that this information could be stored in the run-queue itself instead of handled on the stack. Therefore, we revert to the method of having context_switch return void, and update an internal rq->post_schedule variable when we require further processing. In addition, we fix a race condition where we try to access current->sched_class without holding the rq->lock. This is technically racy, as the sched-class could change out from under us. Instead, we reference the per-rq post_schedule variable with the runqueue unlocked, but with preemption disabled to see if we need to reacquire the rq->lock. Finally, we clean the code up slightly by removing the #ifdef CONFIG_SMP conditionals from the schedule() call, and implement some inline helper functions instead. This patch passes checkpatch, and rt-migrate. Signed-off-by: Gregory Haskins <ghaskins@novell.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <20090729150422.17691.55590.stgit@dev.haskins.net> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/linux/sched.h1
-rw-r--r--kernel/sched.c82
-rw-r--r--kernel/sched_rt.c31
3 files changed, 61 insertions, 53 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2c35bc29d2a9..195d72d5c102 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1047,7 +1047,6 @@ struct sched_class {
1047 struct rq *busiest, struct sched_domain *sd, 1047 struct rq *busiest, struct sched_domain *sd,
1048 enum cpu_idle_type idle); 1048 enum cpu_idle_type idle);
1049 void (*pre_schedule) (struct rq *this_rq, struct task_struct *task); 1049 void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
1050 int (*needs_post_schedule) (struct rq *this_rq);
1051 void (*post_schedule) (struct rq *this_rq); 1050 void (*post_schedule) (struct rq *this_rq);
1052 void (*task_wake_up) (struct rq *this_rq, struct task_struct *task); 1051 void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
1053 1052
diff --git a/kernel/sched.c b/kernel/sched.c
index a030d4514cdc..613fee54fc89 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -616,6 +616,7 @@ struct rq {
616 616
617 unsigned char idle_at_tick; 617 unsigned char idle_at_tick;
618 /* For active balancing */ 618 /* For active balancing */
619 int post_schedule;
619 int active_balance; 620 int active_balance;
620 int push_cpu; 621 int push_cpu;
621 /* cpu of this runqueue: */ 622 /* cpu of this runqueue: */
@@ -2839,17 +2840,11 @@ prepare_task_switch(struct rq *rq, struct task_struct *prev,
2839 * with the lock held can cause deadlocks; see schedule() for 2840 * with the lock held can cause deadlocks; see schedule() for
2840 * details.) 2841 * details.)
2841 */ 2842 */
2842static int finish_task_switch(struct rq *rq, struct task_struct *prev) 2843static void finish_task_switch(struct rq *rq, struct task_struct *prev)
2843 __releases(rq->lock) 2844 __releases(rq->lock)
2844{ 2845{
2845 struct mm_struct *mm = rq->prev_mm; 2846 struct mm_struct *mm = rq->prev_mm;
2846 long prev_state; 2847 long prev_state;
2847 int post_schedule = 0;
2848
2849#ifdef CONFIG_SMP
2850 if (current->sched_class->needs_post_schedule)
2851 post_schedule = current->sched_class->needs_post_schedule(rq);
2852#endif
2853 2848
2854 rq->prev_mm = NULL; 2849 rq->prev_mm = NULL;
2855 2850
@@ -2880,10 +2875,44 @@ static int finish_task_switch(struct rq *rq, struct task_struct *prev)
2880 kprobe_flush_task(prev); 2875 kprobe_flush_task(prev);
2881 put_task_struct(prev); 2876 put_task_struct(prev);
2882 } 2877 }
2878}
2879
2880#ifdef CONFIG_SMP
2881
2882/* assumes rq->lock is held */
2883static inline void pre_schedule(struct rq *rq, struct task_struct *prev)
2884{
2885 if (prev->sched_class->pre_schedule)
2886 prev->sched_class->pre_schedule(rq, prev);
2887}
2888
2889/* rq->lock is NOT held, but preemption is disabled */
2890static inline void post_schedule(struct rq *rq)
2891{
2892 if (rq->post_schedule) {
2893 unsigned long flags;
2894
2895 spin_lock_irqsave(&rq->lock, flags);
2896 if (rq->curr->sched_class->post_schedule)
2897 rq->curr->sched_class->post_schedule(rq);
2898 spin_unlock_irqrestore(&rq->lock, flags);
2899
2900 rq->post_schedule = 0;
2901 }
2902}
2903
2904#else
2883 2905
2884 return post_schedule; 2906static inline void pre_schedule(struct rq *rq, struct task_struct *p)
2907{
2908}
2909
2910static inline void post_schedule(struct rq *rq)
2911{
2885} 2912}
2886 2913
2914#endif
2915
2887/** 2916/**
2888 * schedule_tail - first thing a freshly forked thread must call. 2917 * schedule_tail - first thing a freshly forked thread must call.
2889 * @prev: the thread we just switched away from. 2918 * @prev: the thread we just switched away from.
@@ -2892,14 +2921,14 @@ asmlinkage void schedule_tail(struct task_struct *prev)
2892 __releases(rq->lock) 2921 __releases(rq->lock)
2893{ 2922{
2894 struct rq *rq = this_rq(); 2923 struct rq *rq = this_rq();
2895 int post_schedule;
2896 2924
2897 post_schedule = finish_task_switch(rq, prev); 2925 finish_task_switch(rq, prev);
2898 2926
2899#ifdef CONFIG_SMP 2927 /*
2900 if (post_schedule) 2928 * FIXME: do we need to worry about rq being invalidated by the
2901 current->sched_class->post_schedule(rq); 2929 * task_switch?
2902#endif 2930 */
2931 post_schedule(rq);
2903 2932
2904#ifdef __ARCH_WANT_UNLOCKED_CTXSW 2933#ifdef __ARCH_WANT_UNLOCKED_CTXSW
2905 /* In this case, finish_task_switch does not reenable preemption */ 2934 /* In this case, finish_task_switch does not reenable preemption */
@@ -2913,7 +2942,7 @@ asmlinkage void schedule_tail(struct task_struct *prev)
2913 * context_switch - switch to the new MM and the new 2942 * context_switch - switch to the new MM and the new
2914 * thread's register state. 2943 * thread's register state.
2915 */ 2944 */
2916static inline int 2945static inline void
2917context_switch(struct rq *rq, struct task_struct *prev, 2946context_switch(struct rq *rq, struct task_struct *prev,
2918 struct task_struct *next) 2947 struct task_struct *next)
2919{ 2948{
@@ -2960,7 +2989,7 @@ context_switch(struct rq *rq, struct task_struct *prev,
2960 * CPUs since it called schedule(), thus the 'rq' on its stack 2989 * CPUs since it called schedule(), thus the 'rq' on its stack
2961 * frame will be invalid. 2990 * frame will be invalid.
2962 */ 2991 */
2963 return finish_task_switch(this_rq(), prev); 2992 finish_task_switch(this_rq(), prev);
2964} 2993}
2965 2994
2966/* 2995/*
@@ -5371,7 +5400,6 @@ asmlinkage void __sched schedule(void)
5371{ 5400{
5372 struct task_struct *prev, *next; 5401 struct task_struct *prev, *next;
5373 unsigned long *switch_count; 5402 unsigned long *switch_count;
5374 int post_schedule = 0;
5375 struct rq *rq; 5403 struct rq *rq;
5376 int cpu; 5404 int cpu;
5377 5405
@@ -5403,10 +5431,7 @@ need_resched_nonpreemptible:
5403 switch_count = &prev->nvcsw; 5431 switch_count = &prev->nvcsw;
5404 } 5432 }
5405 5433
5406#ifdef CONFIG_SMP 5434 pre_schedule(rq, prev);
5407 if (prev->sched_class->pre_schedule)
5408 prev->sched_class->pre_schedule(rq, prev);
5409#endif
5410 5435
5411 if (unlikely(!rq->nr_running)) 5436 if (unlikely(!rq->nr_running))
5412 idle_balance(cpu, rq); 5437 idle_balance(cpu, rq);
@@ -5422,25 +5447,17 @@ need_resched_nonpreemptible:
5422 rq->curr = next; 5447 rq->curr = next;
5423 ++*switch_count; 5448 ++*switch_count;
5424 5449
5425 post_schedule = context_switch(rq, prev, next); /* unlocks the rq */ 5450 context_switch(rq, prev, next); /* unlocks the rq */
5426 /* 5451 /*
5427 * the context switch might have flipped the stack from under 5452 * the context switch might have flipped the stack from under
5428 * us, hence refresh the local variables. 5453 * us, hence refresh the local variables.
5429 */ 5454 */
5430 cpu = smp_processor_id(); 5455 cpu = smp_processor_id();
5431 rq = cpu_rq(cpu); 5456 rq = cpu_rq(cpu);
5432 } else { 5457 } else
5433#ifdef CONFIG_SMP
5434 if (current->sched_class->needs_post_schedule)
5435 post_schedule = current->sched_class->needs_post_schedule(rq);
5436#endif
5437 spin_unlock_irq(&rq->lock); 5458 spin_unlock_irq(&rq->lock);
5438 }
5439 5459
5440#ifdef CONFIG_SMP 5460 post_schedule(rq);
5441 if (post_schedule)
5442 current->sched_class->post_schedule(rq);
5443#endif
5444 5461
5445 if (unlikely(reacquire_kernel_lock(current) < 0)) 5462 if (unlikely(reacquire_kernel_lock(current) < 0))
5446 goto need_resched_nonpreemptible; 5463 goto need_resched_nonpreemptible;
@@ -9403,6 +9420,7 @@ void __init sched_init(void)
9403#ifdef CONFIG_SMP 9420#ifdef CONFIG_SMP
9404 rq->sd = NULL; 9421 rq->sd = NULL;
9405 rq->rd = NULL; 9422 rq->rd = NULL;
9423 rq->post_schedule = 0;
9406 rq->active_balance = 0; 9424 rq->active_balance = 0;
9407 rq->next_balance = jiffies; 9425 rq->next_balance = jiffies;
9408 rq->push_cpu = 0; 9426 rq->push_cpu = 0;
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 3918e01994e0..a8f89bc3e5eb 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1056,6 +1056,11 @@ static struct task_struct *_pick_next_task_rt(struct rq *rq)
1056 return p; 1056 return p;
1057} 1057}
1058 1058
1059static inline int has_pushable_tasks(struct rq *rq)
1060{
1061 return !plist_head_empty(&rq->rt.pushable_tasks);
1062}
1063
1059static struct task_struct *pick_next_task_rt(struct rq *rq) 1064static struct task_struct *pick_next_task_rt(struct rq *rq)
1060{ 1065{
1061 struct task_struct *p = _pick_next_task_rt(rq); 1066 struct task_struct *p = _pick_next_task_rt(rq);
@@ -1064,6 +1069,12 @@ static struct task_struct *pick_next_task_rt(struct rq *rq)
1064 if (p) 1069 if (p)
1065 dequeue_pushable_task(rq, p); 1070 dequeue_pushable_task(rq, p);
1066 1071
1072 /*
1073 * We detect this state here so that we can avoid taking the RQ
1074 * lock again later if there is no need to push
1075 */
1076 rq->post_schedule = has_pushable_tasks(rq);
1077
1067 return p; 1078 return p;
1068} 1079}
1069 1080
@@ -1262,11 +1273,6 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq)
1262 return lowest_rq; 1273 return lowest_rq;
1263} 1274}
1264 1275
1265static inline int has_pushable_tasks(struct rq *rq)
1266{
1267 return !plist_head_empty(&rq->rt.pushable_tasks);
1268}
1269
1270static struct task_struct *pick_next_pushable_task(struct rq *rq) 1276static struct task_struct *pick_next_pushable_task(struct rq *rq)
1271{ 1277{
1272 struct task_struct *p; 1278 struct task_struct *p;
@@ -1466,23 +1472,9 @@ static void pre_schedule_rt(struct rq *rq, struct task_struct *prev)
1466 pull_rt_task(rq); 1472 pull_rt_task(rq);
1467} 1473}
1468 1474
1469/*
1470 * assumes rq->lock is held
1471 */
1472static int needs_post_schedule_rt(struct rq *rq)
1473{
1474 return has_pushable_tasks(rq);
1475}
1476
1477static void post_schedule_rt(struct rq *rq) 1475static void post_schedule_rt(struct rq *rq)
1478{ 1476{
1479 /*
1480 * This is only called if needs_post_schedule_rt() indicates that
1481 * we need to push tasks away
1482 */
1483 spin_lock_irq(&rq->lock);
1484 push_rt_tasks(rq); 1477 push_rt_tasks(rq);
1485 spin_unlock_irq(&rq->lock);
1486} 1478}
1487 1479
1488/* 1480/*
@@ -1758,7 +1750,6 @@ static const struct sched_class rt_sched_class = {
1758 .rq_online = rq_online_rt, 1750 .rq_online = rq_online_rt,
1759 .rq_offline = rq_offline_rt, 1751 .rq_offline = rq_offline_rt,
1760 .pre_schedule = pre_schedule_rt, 1752 .pre_schedule = pre_schedule_rt,
1761 .needs_post_schedule = needs_post_schedule_rt,
1762 .post_schedule = post_schedule_rt, 1753 .post_schedule = post_schedule_rt,
1763 .task_wake_up = task_wake_up_rt, 1754 .task_wake_up = task_wake_up_rt,
1764 .switched_from = switched_from_rt, 1755 .switched_from = switched_from_rt,