aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2009-12-16 12:04:41 -0500
committerIngo Molnar <mingo@elte.hu>2009-12-16 13:01:58 -0500
commit88ec22d3edb72b261f8628226cd543589a6d5e1b (patch)
treeca547c82ab6adce0b9e29afb6ceddf50cd417d44
parentefbbd05a595343a413964ad85a2ad359b7b7efbd (diff)
sched: Remove the cfs_rq dependency from set_task_cpu()
In order to remove the cfs_rq dependency from set_task_cpu() we need to ensure the task is cfs_rq invariant for all callsites. The simple approach is to substract cfs_rq->min_vruntime from se->vruntime on dequeue, and add cfs_rq->min_vruntime on enqueue. However, this has the downside of breaking FAIR_SLEEPERS since we loose the old vruntime as we only maintain the relative position. To solve this, we observe that we only migrate runnable tasks, we do this using deactivate_task(.sleep=0) and activate_task(.wakeup=0), therefore we can restrain the min_vruntime invariance to that state. The only other case is wakeup balancing, since we want to maintain the old vruntime we cannot make it relative on dequeue, but since we don't migrate inactive tasks, we can do so right before we activate it again. This is where we need the new pre-wakeup hook, we need to call this while still holding the old rq->lock. We could fold it into ->select_task_rq(), but since that has multiple callsites and would obfuscate the locking requirements, that seems like a fudge. This leaves the fork() case, simply make sure that ->task_fork() leaves the ->vruntime in a relative state. This covers all cases where set_task_cpu() gets called, and ensures it sees a relative vruntime. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Mike Galbraith <efault@gmx.de> LKML-Reference: <20091216170518.191697025@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/linux/sched.h2
-rw-r--r--kernel/sched.c6
-rw-r--r--kernel/sched_fair.c50
3 files changed, 46 insertions, 12 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 2c9fa1ccebff..973b2b89f86d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1116,7 +1116,7 @@ struct sched_class {
1116 struct task_struct *task); 1116 struct task_struct *task);
1117 1117
1118#ifdef CONFIG_FAIR_GROUP_SCHED 1118#ifdef CONFIG_FAIR_GROUP_SCHED
1119 void (*moved_group) (struct task_struct *p); 1119 void (*moved_group) (struct task_struct *p, int on_rq);
1120#endif 1120#endif
1121}; 1121};
1122 1122
diff --git a/kernel/sched.c b/kernel/sched.c
index 6c571bdd5658..f92ce63edfff 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2038,8 +2038,6 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
2038void set_task_cpu(struct task_struct *p, unsigned int new_cpu) 2038void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
2039{ 2039{
2040 int old_cpu = task_cpu(p); 2040 int old_cpu = task_cpu(p);
2041 struct cfs_rq *old_cfsrq = task_cfs_rq(p),
2042 *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu);
2043 2041
2044#ifdef CONFIG_SCHED_DEBUG 2042#ifdef CONFIG_SCHED_DEBUG
2045 /* 2043 /*
@@ -2056,8 +2054,6 @@ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
2056 perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS, 2054 perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS,
2057 1, 1, NULL, 0); 2055 1, 1, NULL, 0);
2058 } 2056 }
2059 p->se.vruntime -= old_cfsrq->min_vruntime -
2060 new_cfsrq->min_vruntime;
2061 2057
2062 __set_task_cpu(p, new_cpu); 2058 __set_task_cpu(p, new_cpu);
2063} 2059}
@@ -10102,7 +10098,7 @@ void sched_move_task(struct task_struct *tsk)
10102 10098
10103#ifdef CONFIG_FAIR_GROUP_SCHED 10099#ifdef CONFIG_FAIR_GROUP_SCHED
10104 if (tsk->sched_class->moved_group) 10100 if (tsk->sched_class->moved_group)
10105 tsk->sched_class->moved_group(tsk); 10101 tsk->sched_class->moved_group(tsk, on_rq);
10106#endif 10102#endif
10107 10103
10108 if (unlikely(running)) 10104 if (unlikely(running))
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index ec1d2715620c..42ac3c9f66f6 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -510,6 +510,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
510 curr->sum_exec_runtime += delta_exec; 510 curr->sum_exec_runtime += delta_exec;
511 schedstat_add(cfs_rq, exec_clock, delta_exec); 511 schedstat_add(cfs_rq, exec_clock, delta_exec);
512 delta_exec_weighted = calc_delta_fair(delta_exec, curr); 512 delta_exec_weighted = calc_delta_fair(delta_exec, curr);
513
513 curr->vruntime += delta_exec_weighted; 514 curr->vruntime += delta_exec_weighted;
514 update_min_vruntime(cfs_rq); 515 update_min_vruntime(cfs_rq);
515} 516}
@@ -765,16 +766,26 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
765 se->vruntime = vruntime; 766 se->vruntime = vruntime;
766} 767}
767 768
769#define ENQUEUE_WAKEUP 1
770#define ENQUEUE_MIGRATE 2
771
768static void 772static void
769enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) 773enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
770{ 774{
771 /* 775 /*
776 * Update the normalized vruntime before updating min_vruntime
777 * through callig update_curr().
778 */
779 if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATE))
780 se->vruntime += cfs_rq->min_vruntime;
781
782 /*
772 * Update run-time statistics of the 'current'. 783 * Update run-time statistics of the 'current'.
773 */ 784 */
774 update_curr(cfs_rq); 785 update_curr(cfs_rq);
775 account_entity_enqueue(cfs_rq, se); 786 account_entity_enqueue(cfs_rq, se);
776 787
777 if (wakeup) { 788 if (flags & ENQUEUE_WAKEUP) {
778 place_entity(cfs_rq, se, 0); 789 place_entity(cfs_rq, se, 0);
779 enqueue_sleeper(cfs_rq, se); 790 enqueue_sleeper(cfs_rq, se);
780 } 791 }
@@ -828,6 +839,14 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
828 __dequeue_entity(cfs_rq, se); 839 __dequeue_entity(cfs_rq, se);
829 account_entity_dequeue(cfs_rq, se); 840 account_entity_dequeue(cfs_rq, se);
830 update_min_vruntime(cfs_rq); 841 update_min_vruntime(cfs_rq);
842
843 /*
844 * Normalize the entity after updating the min_vruntime because the
845 * update can refer to the ->curr item and we need to reflect this
846 * movement in our normalized position.
847 */
848 if (!sleep)
849 se->vruntime -= cfs_rq->min_vruntime;
831} 850}
832 851
833/* 852/*
@@ -1038,13 +1057,19 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
1038{ 1057{
1039 struct cfs_rq *cfs_rq; 1058 struct cfs_rq *cfs_rq;
1040 struct sched_entity *se = &p->se; 1059 struct sched_entity *se = &p->se;
1060 int flags = 0;
1061
1062 if (wakeup)
1063 flags |= ENQUEUE_WAKEUP;
1064 if (p->state == TASK_WAKING)
1065 flags |= ENQUEUE_MIGRATE;
1041 1066
1042 for_each_sched_entity(se) { 1067 for_each_sched_entity(se) {
1043 if (se->on_rq) 1068 if (se->on_rq)
1044 break; 1069 break;
1045 cfs_rq = cfs_rq_of(se); 1070 cfs_rq = cfs_rq_of(se);
1046 enqueue_entity(cfs_rq, se, wakeup); 1071 enqueue_entity(cfs_rq, se, flags);
1047 wakeup = 1; 1072 flags = ENQUEUE_WAKEUP;
1048 } 1073 }
1049 1074
1050 hrtick_update(rq); 1075 hrtick_update(rq);
@@ -1120,6 +1145,14 @@ static void yield_task_fair(struct rq *rq)
1120 1145
1121#ifdef CONFIG_SMP 1146#ifdef CONFIG_SMP
1122 1147
1148static void task_waking_fair(struct rq *rq, struct task_struct *p)
1149{
1150 struct sched_entity *se = &p->se;
1151 struct cfs_rq *cfs_rq = cfs_rq_of(se);
1152
1153 se->vruntime -= cfs_rq->min_vruntime;
1154}
1155
1123#ifdef CONFIG_FAIR_GROUP_SCHED 1156#ifdef CONFIG_FAIR_GROUP_SCHED
1124/* 1157/*
1125 * effective_load() calculates the load change as seen from the root_task_group 1158 * effective_load() calculates the load change as seen from the root_task_group
@@ -1978,6 +2011,8 @@ static void task_fork_fair(struct task_struct *p)
1978 resched_task(rq->curr); 2011 resched_task(rq->curr);
1979 } 2012 }
1980 2013
2014 se->vruntime -= cfs_rq->min_vruntime;
2015
1981 raw_spin_unlock_irqrestore(&rq->lock, flags); 2016 raw_spin_unlock_irqrestore(&rq->lock, flags);
1982} 2017}
1983 2018
@@ -2031,12 +2066,13 @@ static void set_curr_task_fair(struct rq *rq)
2031} 2066}
2032 2067
2033#ifdef CONFIG_FAIR_GROUP_SCHED 2068#ifdef CONFIG_FAIR_GROUP_SCHED
2034static void moved_group_fair(struct task_struct *p) 2069static void moved_group_fair(struct task_struct *p, int on_rq)
2035{ 2070{
2036 struct cfs_rq *cfs_rq = task_cfs_rq(p); 2071 struct cfs_rq *cfs_rq = task_cfs_rq(p);
2037 2072
2038 update_curr(cfs_rq); 2073 update_curr(cfs_rq);
2039 place_entity(cfs_rq, &p->se, 1); 2074 if (!on_rq)
2075 place_entity(cfs_rq, &p->se, 1);
2040} 2076}
2041#endif 2077#endif
2042 2078
@@ -2076,6 +2112,8 @@ static const struct sched_class fair_sched_class = {
2076 .move_one_task = move_one_task_fair, 2112 .move_one_task = move_one_task_fair,
2077 .rq_online = rq_online_fair, 2113 .rq_online = rq_online_fair,
2078 .rq_offline = rq_offline_fair, 2114 .rq_offline = rq_offline_fair,
2115
2116 .task_waking = task_waking_fair,
2079#endif 2117#endif
2080 2118
2081 .set_curr_task = set_curr_task_fair, 2119 .set_curr_task = set_curr_task_fair,