aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2008-10-28 12:46:20 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2008-10-28 12:46:20 -0400
commit8ca6215502462f564d7bcae2d8dcc825aa95d743 (patch)
tree1534f8ad77640ab6f6d9471679b6e4c2d11e739c /kernel
parentf8245e91a5121acc435e509aa56cd04d445a74c7 (diff)
parent4078e359c4688541a0093fde0dff35dc7190c4f5 (diff)
Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip: sched: fix documentation reference for sched_min_granularity_ns sched: virtual time buddy preemption sched: re-instate vruntime based wakeup preemption sched: weaken sync hint sched: more accurate min_vruntime accounting sched: fix a find_busiest_group buglet sched: add CONFIG_SMP consistency
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched.c3
-rw-r--r--kernel/sched_fair.c169
-rw-r--r--kernel/sched_idletask.c5
-rw-r--r--kernel/sched_rt.c5
4 files changed, 131 insertions, 51 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 6625c3c4b10d..e8819bc6f462 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -386,7 +386,6 @@ struct cfs_rq {
386 386
387 u64 exec_clock; 387 u64 exec_clock;
388 u64 min_vruntime; 388 u64 min_vruntime;
389 u64 pair_start;
390 389
391 struct rb_root tasks_timeline; 390 struct rb_root tasks_timeline;
392 struct rb_node *rb_leftmost; 391 struct rb_node *rb_leftmost;
@@ -3344,7 +3343,7 @@ small_imbalance:
3344 } else 3343 } else
3345 this_load_per_task = cpu_avg_load_per_task(this_cpu); 3344 this_load_per_task = cpu_avg_load_per_task(this_cpu);
3346 3345
3347 if (max_load - this_load + 2*busiest_load_per_task >= 3346 if (max_load - this_load + busiest_load_per_task >=
3348 busiest_load_per_task * imbn) { 3347 busiest_load_per_task * imbn) {
3349 *imbalance = busiest_load_per_task; 3348 *imbalance = busiest_load_per_task;
3350 return busiest; 3349 return busiest;
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 9573c33688b8..ce514afd78ff 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -143,6 +143,49 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
143 return se->parent; 143 return se->parent;
144} 144}
145 145
146/* return depth at which a sched entity is present in the hierarchy */
147static inline int depth_se(struct sched_entity *se)
148{
149 int depth = 0;
150
151 for_each_sched_entity(se)
152 depth++;
153
154 return depth;
155}
156
157static void
158find_matching_se(struct sched_entity **se, struct sched_entity **pse)
159{
160 int se_depth, pse_depth;
161
162 /*
163 * preemption test can be made between sibling entities who are in the
164 * same cfs_rq i.e who have a common parent. Walk up the hierarchy of
165 * both tasks until we find their ancestors who are siblings of common
166 * parent.
167 */
168
169 /* First walk up until both entities are at same depth */
170 se_depth = depth_se(*se);
171 pse_depth = depth_se(*pse);
172
173 while (se_depth > pse_depth) {
174 se_depth--;
175 *se = parent_entity(*se);
176 }
177
178 while (pse_depth > se_depth) {
179 pse_depth--;
180 *pse = parent_entity(*pse);
181 }
182
183 while (!is_same_group(*se, *pse)) {
184 *se = parent_entity(*se);
185 *pse = parent_entity(*pse);
186 }
187}
188
146#else /* CONFIG_FAIR_GROUP_SCHED */ 189#else /* CONFIG_FAIR_GROUP_SCHED */
147 190
148static inline struct rq *rq_of(struct cfs_rq *cfs_rq) 191static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
@@ -193,6 +236,11 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
193 return NULL; 236 return NULL;
194} 237}
195 238
239static inline void
240find_matching_se(struct sched_entity **se, struct sched_entity **pse)
241{
242}
243
196#endif /* CONFIG_FAIR_GROUP_SCHED */ 244#endif /* CONFIG_FAIR_GROUP_SCHED */
197 245
198 246
@@ -223,6 +271,27 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
223 return se->vruntime - cfs_rq->min_vruntime; 271 return se->vruntime - cfs_rq->min_vruntime;
224} 272}
225 273
274static void update_min_vruntime(struct cfs_rq *cfs_rq)
275{
276 u64 vruntime = cfs_rq->min_vruntime;
277
278 if (cfs_rq->curr)
279 vruntime = cfs_rq->curr->vruntime;
280
281 if (cfs_rq->rb_leftmost) {
282 struct sched_entity *se = rb_entry(cfs_rq->rb_leftmost,
283 struct sched_entity,
284 run_node);
285
286 if (vruntime == cfs_rq->min_vruntime)
287 vruntime = se->vruntime;
288 else
289 vruntime = min_vruntime(vruntime, se->vruntime);
290 }
291
292 cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime);
293}
294
226/* 295/*
227 * Enqueue an entity into the rb-tree: 296 * Enqueue an entity into the rb-tree:
228 */ 297 */
@@ -256,15 +325,8 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
256 * Maintain a cache of leftmost tree entries (it is frequently 325 * Maintain a cache of leftmost tree entries (it is frequently
257 * used): 326 * used):
258 */ 327 */
259 if (leftmost) { 328 if (leftmost)
260 cfs_rq->rb_leftmost = &se->run_node; 329 cfs_rq->rb_leftmost = &se->run_node;
261 /*
262 * maintain cfs_rq->min_vruntime to be a monotonic increasing
263 * value tracking the leftmost vruntime in the tree.
264 */
265 cfs_rq->min_vruntime =
266 max_vruntime(cfs_rq->min_vruntime, se->vruntime);
267 }
268 330
269 rb_link_node(&se->run_node, parent, link); 331 rb_link_node(&se->run_node, parent, link);
270 rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline); 332 rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
@@ -274,18 +336,9 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
274{ 336{
275 if (cfs_rq->rb_leftmost == &se->run_node) { 337 if (cfs_rq->rb_leftmost == &se->run_node) {
276 struct rb_node *next_node; 338 struct rb_node *next_node;
277 struct sched_entity *next;
278 339
279 next_node = rb_next(&se->run_node); 340 next_node = rb_next(&se->run_node);
280 cfs_rq->rb_leftmost = next_node; 341 cfs_rq->rb_leftmost = next_node;
281
282 if (next_node) {
283 next = rb_entry(next_node,
284 struct sched_entity, run_node);
285 cfs_rq->min_vruntime =
286 max_vruntime(cfs_rq->min_vruntime,
287 next->vruntime);
288 }
289 } 342 }
290 343
291 if (cfs_rq->next == se) 344 if (cfs_rq->next == se)
@@ -424,6 +477,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
424 schedstat_add(cfs_rq, exec_clock, delta_exec); 477 schedstat_add(cfs_rq, exec_clock, delta_exec);
425 delta_exec_weighted = calc_delta_fair(delta_exec, curr); 478 delta_exec_weighted = calc_delta_fair(delta_exec, curr);
426 curr->vruntime += delta_exec_weighted; 479 curr->vruntime += delta_exec_weighted;
480 update_min_vruntime(cfs_rq);
427} 481}
428 482
429static void update_curr(struct cfs_rq *cfs_rq) 483static void update_curr(struct cfs_rq *cfs_rq)
@@ -613,13 +667,7 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
613static void 667static void
614place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) 668place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
615{ 669{
616 u64 vruntime; 670 u64 vruntime = cfs_rq->min_vruntime;
617
618 if (first_fair(cfs_rq)) {
619 vruntime = min_vruntime(cfs_rq->min_vruntime,
620 __pick_next_entity(cfs_rq)->vruntime);
621 } else
622 vruntime = cfs_rq->min_vruntime;
623 671
624 /* 672 /*
625 * The 'current' period is already promised to the current tasks, 673 * The 'current' period is already promised to the current tasks,
@@ -696,6 +744,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
696 if (se != cfs_rq->curr) 744 if (se != cfs_rq->curr)
697 __dequeue_entity(cfs_rq, se); 745 __dequeue_entity(cfs_rq, se);
698 account_entity_dequeue(cfs_rq, se); 746 account_entity_dequeue(cfs_rq, se);
747 update_min_vruntime(cfs_rq);
699} 748}
700 749
701/* 750/*
@@ -742,16 +791,14 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
742 se->prev_sum_exec_runtime = se->sum_exec_runtime; 791 se->prev_sum_exec_runtime = se->sum_exec_runtime;
743} 792}
744 793
794static int
795wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
796
745static struct sched_entity * 797static struct sched_entity *
746pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se) 798pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se)
747{ 799{
748 struct rq *rq = rq_of(cfs_rq); 800 if (!cfs_rq->next || wakeup_preempt_entity(cfs_rq->next, se) == 1)
749 u64 pair_slice = rq->clock - cfs_rq->pair_start;
750
751 if (!cfs_rq->next || pair_slice > sysctl_sched_min_granularity) {
752 cfs_rq->pair_start = rq->clock;
753 return se; 801 return se;
754 }
755 802
756 return cfs_rq->next; 803 return cfs_rq->next;
757} 804}
@@ -1122,10 +1169,9 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
1122 if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) 1169 if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))
1123 return 0; 1170 return 0;
1124 1171
1125 if (!sync && sched_feat(SYNC_WAKEUPS) && 1172 if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost ||
1126 curr->se.avg_overlap < sysctl_sched_migration_cost && 1173 p->se.avg_overlap > sysctl_sched_migration_cost))
1127 p->se.avg_overlap < sysctl_sched_migration_cost) 1174 sync = 0;
1128 sync = 1;
1129 1175
1130 /* 1176 /*
1131 * If sync wakeup then subtract the (maximum possible) 1177 * If sync wakeup then subtract the (maximum possible)
@@ -1244,13 +1290,42 @@ static unsigned long wakeup_gran(struct sched_entity *se)
1244 * More easily preempt - nice tasks, while not making it harder for 1290 * More easily preempt - nice tasks, while not making it harder for
1245 * + nice tasks. 1291 * + nice tasks.
1246 */ 1292 */
1247 if (sched_feat(ASYM_GRAN)) 1293 if (!sched_feat(ASYM_GRAN) || se->load.weight > NICE_0_LOAD)
1248 gran = calc_delta_mine(gran, NICE_0_LOAD, &se->load); 1294 gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se);
1249 1295
1250 return gran; 1296 return gran;
1251} 1297}
1252 1298
1253/* 1299/*
1300 * Should 'se' preempt 'curr'.
1301 *
1302 * |s1
1303 * |s2
1304 * |s3
1305 * g
1306 * |<--->|c
1307 *
1308 * w(c, s1) = -1
1309 * w(c, s2) = 0
1310 * w(c, s3) = 1
1311 *
1312 */
1313static int
1314wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
1315{
1316 s64 gran, vdiff = curr->vruntime - se->vruntime;
1317
1318 if (vdiff <= 0)
1319 return -1;
1320
1321 gran = wakeup_gran(curr);
1322 if (vdiff > gran)
1323 return 1;
1324
1325 return 0;
1326}
1327
1328/*
1254 * Preempt the current task with a newly woken task if needed: 1329 * Preempt the current task with a newly woken task if needed:
1255 */ 1330 */
1256static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) 1331static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
@@ -1258,7 +1333,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
1258 struct task_struct *curr = rq->curr; 1333 struct task_struct *curr = rq->curr;
1259 struct cfs_rq *cfs_rq = task_cfs_rq(curr); 1334 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1260 struct sched_entity *se = &curr->se, *pse = &p->se; 1335 struct sched_entity *se = &curr->se, *pse = &p->se;
1261 s64 delta_exec;
1262 1336
1263 if (unlikely(rt_prio(p->prio))) { 1337 if (unlikely(rt_prio(p->prio))) {
1264 update_rq_clock(rq); 1338 update_rq_clock(rq);
@@ -1296,9 +1370,19 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
1296 return; 1370 return;
1297 } 1371 }
1298 1372
1299 delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime; 1373 find_matching_se(&se, &pse);
1300 if (delta_exec > wakeup_gran(pse)) 1374
1301 resched_task(curr); 1375 while (se) {
1376 BUG_ON(!pse);
1377
1378 if (wakeup_preempt_entity(se, pse) == 1) {
1379 resched_task(curr);
1380 break;
1381 }
1382
1383 se = parent_entity(se);
1384 pse = parent_entity(pse);
1385 }
1302} 1386}
1303 1387
1304static struct task_struct *pick_next_task_fair(struct rq *rq) 1388static struct task_struct *pick_next_task_fair(struct rq *rq)
@@ -1594,9 +1678,6 @@ static const struct sched_class fair_sched_class = {
1594 .enqueue_task = enqueue_task_fair, 1678 .enqueue_task = enqueue_task_fair,
1595 .dequeue_task = dequeue_task_fair, 1679 .dequeue_task = dequeue_task_fair,
1596 .yield_task = yield_task_fair, 1680 .yield_task = yield_task_fair,
1597#ifdef CONFIG_SMP
1598 .select_task_rq = select_task_rq_fair,
1599#endif /* CONFIG_SMP */
1600 1681
1601 .check_preempt_curr = check_preempt_wakeup, 1682 .check_preempt_curr = check_preempt_wakeup,
1602 1683
@@ -1604,6 +1685,8 @@ static const struct sched_class fair_sched_class = {
1604 .put_prev_task = put_prev_task_fair, 1685 .put_prev_task = put_prev_task_fair,
1605 1686
1606#ifdef CONFIG_SMP 1687#ifdef CONFIG_SMP
1688 .select_task_rq = select_task_rq_fair,
1689
1607 .load_balance = load_balance_fair, 1690 .load_balance = load_balance_fair,
1608 .move_one_task = move_one_task_fair, 1691 .move_one_task = move_one_task_fair,
1609#endif 1692#endif
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index dec4ccabe2f5..8a21a2e28c13 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -105,9 +105,6 @@ static const struct sched_class idle_sched_class = {
105 105
106 /* dequeue is not valid, we print a debug message there: */ 106 /* dequeue is not valid, we print a debug message there: */
107 .dequeue_task = dequeue_task_idle, 107 .dequeue_task = dequeue_task_idle,
108#ifdef CONFIG_SMP
109 .select_task_rq = select_task_rq_idle,
110#endif /* CONFIG_SMP */
111 108
112 .check_preempt_curr = check_preempt_curr_idle, 109 .check_preempt_curr = check_preempt_curr_idle,
113 110
@@ -115,6 +112,8 @@ static const struct sched_class idle_sched_class = {
115 .put_prev_task = put_prev_task_idle, 112 .put_prev_task = put_prev_task_idle,
116 113
117#ifdef CONFIG_SMP 114#ifdef CONFIG_SMP
115 .select_task_rq = select_task_rq_idle,
116
118 .load_balance = load_balance_idle, 117 .load_balance = load_balance_idle,
119 .move_one_task = move_one_task_idle, 118 .move_one_task = move_one_task_idle,
120#endif 119#endif
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index b446dc87494f..d9ba9d5f99d6 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1504,9 +1504,6 @@ static const struct sched_class rt_sched_class = {
1504 .enqueue_task = enqueue_task_rt, 1504 .enqueue_task = enqueue_task_rt,
1505 .dequeue_task = dequeue_task_rt, 1505 .dequeue_task = dequeue_task_rt,
1506 .yield_task = yield_task_rt, 1506 .yield_task = yield_task_rt,
1507#ifdef CONFIG_SMP
1508 .select_task_rq = select_task_rq_rt,
1509#endif /* CONFIG_SMP */
1510 1507
1511 .check_preempt_curr = check_preempt_curr_rt, 1508 .check_preempt_curr = check_preempt_curr_rt,
1512 1509
@@ -1514,6 +1511,8 @@ static const struct sched_class rt_sched_class = {
1514 .put_prev_task = put_prev_task_rt, 1511 .put_prev_task = put_prev_task_rt,
1515 1512
1516#ifdef CONFIG_SMP 1513#ifdef CONFIG_SMP
1514 .select_task_rq = select_task_rq_rt,
1515
1517 .load_balance = load_balance_rt, 1516 .load_balance = load_balance_rt,
1518 .move_one_task = move_one_task_rt, 1517 .move_one_task = move_one_task_rt,
1519 .set_cpus_allowed = set_cpus_allowed_rt, 1518 .set_cpus_allowed = set_cpus_allowed_rt,