diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-28 12:46:20 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2008-10-28 12:46:20 -0400 |
commit | 8ca6215502462f564d7bcae2d8dcc825aa95d743 (patch) | |
tree | 1534f8ad77640ab6f6d9471679b6e4c2d11e739c /kernel | |
parent | f8245e91a5121acc435e509aa56cd04d445a74c7 (diff) | |
parent | 4078e359c4688541a0093fde0dff35dc7190c4f5 (diff) |
Merge branch 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip
* 'sched-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
sched: fix documentation reference for sched_min_granularity_ns
sched: virtual time buddy preemption
sched: re-instate vruntime based wakeup preemption
sched: weaken sync hint
sched: more accurate min_vruntime accounting
sched: fix a find_busiest_group buglet
sched: add CONFIG_SMP consistency
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched.c | 3 | ||||
-rw-r--r-- | kernel/sched_fair.c | 169 | ||||
-rw-r--r-- | kernel/sched_idletask.c | 5 | ||||
-rw-r--r-- | kernel/sched_rt.c | 5 |
4 files changed, 131 insertions, 51 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 6625c3c4b10d..e8819bc6f462 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -386,7 +386,6 @@ struct cfs_rq { | |||
386 | 386 | ||
387 | u64 exec_clock; | 387 | u64 exec_clock; |
388 | u64 min_vruntime; | 388 | u64 min_vruntime; |
389 | u64 pair_start; | ||
390 | 389 | ||
391 | struct rb_root tasks_timeline; | 390 | struct rb_root tasks_timeline; |
392 | struct rb_node *rb_leftmost; | 391 | struct rb_node *rb_leftmost; |
@@ -3344,7 +3343,7 @@ small_imbalance: | |||
3344 | } else | 3343 | } else |
3345 | this_load_per_task = cpu_avg_load_per_task(this_cpu); | 3344 | this_load_per_task = cpu_avg_load_per_task(this_cpu); |
3346 | 3345 | ||
3347 | if (max_load - this_load + 2*busiest_load_per_task >= | 3346 | if (max_load - this_load + busiest_load_per_task >= |
3348 | busiest_load_per_task * imbn) { | 3347 | busiest_load_per_task * imbn) { |
3349 | *imbalance = busiest_load_per_task; | 3348 | *imbalance = busiest_load_per_task; |
3350 | return busiest; | 3349 | return busiest; |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 9573c33688b8..ce514afd78ff 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -143,6 +143,49 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se) | |||
143 | return se->parent; | 143 | return se->parent; |
144 | } | 144 | } |
145 | 145 | ||
146 | /* return depth at which a sched entity is present in the hierarchy */ | ||
147 | static inline int depth_se(struct sched_entity *se) | ||
148 | { | ||
149 | int depth = 0; | ||
150 | |||
151 | for_each_sched_entity(se) | ||
152 | depth++; | ||
153 | |||
154 | return depth; | ||
155 | } | ||
156 | |||
157 | static void | ||
158 | find_matching_se(struct sched_entity **se, struct sched_entity **pse) | ||
159 | { | ||
160 | int se_depth, pse_depth; | ||
161 | |||
162 | /* | ||
163 | * preemption test can be made between sibling entities who are in the | ||
164 | * same cfs_rq i.e who have a common parent. Walk up the hierarchy of | ||
165 | * both tasks until we find their ancestors who are siblings of common | ||
166 | * parent. | ||
167 | */ | ||
168 | |||
169 | /* First walk up until both entities are at same depth */ | ||
170 | se_depth = depth_se(*se); | ||
171 | pse_depth = depth_se(*pse); | ||
172 | |||
173 | while (se_depth > pse_depth) { | ||
174 | se_depth--; | ||
175 | *se = parent_entity(*se); | ||
176 | } | ||
177 | |||
178 | while (pse_depth > se_depth) { | ||
179 | pse_depth--; | ||
180 | *pse = parent_entity(*pse); | ||
181 | } | ||
182 | |||
183 | while (!is_same_group(*se, *pse)) { | ||
184 | *se = parent_entity(*se); | ||
185 | *pse = parent_entity(*pse); | ||
186 | } | ||
187 | } | ||
188 | |||
146 | #else /* CONFIG_FAIR_GROUP_SCHED */ | 189 | #else /* CONFIG_FAIR_GROUP_SCHED */ |
147 | 190 | ||
148 | static inline struct rq *rq_of(struct cfs_rq *cfs_rq) | 191 | static inline struct rq *rq_of(struct cfs_rq *cfs_rq) |
@@ -193,6 +236,11 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se) | |||
193 | return NULL; | 236 | return NULL; |
194 | } | 237 | } |
195 | 238 | ||
239 | static inline void | ||
240 | find_matching_se(struct sched_entity **se, struct sched_entity **pse) | ||
241 | { | ||
242 | } | ||
243 | |||
196 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 244 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
197 | 245 | ||
198 | 246 | ||
@@ -223,6 +271,27 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
223 | return se->vruntime - cfs_rq->min_vruntime; | 271 | return se->vruntime - cfs_rq->min_vruntime; |
224 | } | 272 | } |
225 | 273 | ||
274 | static void update_min_vruntime(struct cfs_rq *cfs_rq) | ||
275 | { | ||
276 | u64 vruntime = cfs_rq->min_vruntime; | ||
277 | |||
278 | if (cfs_rq->curr) | ||
279 | vruntime = cfs_rq->curr->vruntime; | ||
280 | |||
281 | if (cfs_rq->rb_leftmost) { | ||
282 | struct sched_entity *se = rb_entry(cfs_rq->rb_leftmost, | ||
283 | struct sched_entity, | ||
284 | run_node); | ||
285 | |||
286 | if (vruntime == cfs_rq->min_vruntime) | ||
287 | vruntime = se->vruntime; | ||
288 | else | ||
289 | vruntime = min_vruntime(vruntime, se->vruntime); | ||
290 | } | ||
291 | |||
292 | cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime); | ||
293 | } | ||
294 | |||
226 | /* | 295 | /* |
227 | * Enqueue an entity into the rb-tree: | 296 | * Enqueue an entity into the rb-tree: |
228 | */ | 297 | */ |
@@ -256,15 +325,8 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
256 | * Maintain a cache of leftmost tree entries (it is frequently | 325 | * Maintain a cache of leftmost tree entries (it is frequently |
257 | * used): | 326 | * used): |
258 | */ | 327 | */ |
259 | if (leftmost) { | 328 | if (leftmost) |
260 | cfs_rq->rb_leftmost = &se->run_node; | 329 | cfs_rq->rb_leftmost = &se->run_node; |
261 | /* | ||
262 | * maintain cfs_rq->min_vruntime to be a monotonic increasing | ||
263 | * value tracking the leftmost vruntime in the tree. | ||
264 | */ | ||
265 | cfs_rq->min_vruntime = | ||
266 | max_vruntime(cfs_rq->min_vruntime, se->vruntime); | ||
267 | } | ||
268 | 330 | ||
269 | rb_link_node(&se->run_node, parent, link); | 331 | rb_link_node(&se->run_node, parent, link); |
270 | rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline); | 332 | rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline); |
@@ -274,18 +336,9 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
274 | { | 336 | { |
275 | if (cfs_rq->rb_leftmost == &se->run_node) { | 337 | if (cfs_rq->rb_leftmost == &se->run_node) { |
276 | struct rb_node *next_node; | 338 | struct rb_node *next_node; |
277 | struct sched_entity *next; | ||
278 | 339 | ||
279 | next_node = rb_next(&se->run_node); | 340 | next_node = rb_next(&se->run_node); |
280 | cfs_rq->rb_leftmost = next_node; | 341 | cfs_rq->rb_leftmost = next_node; |
281 | |||
282 | if (next_node) { | ||
283 | next = rb_entry(next_node, | ||
284 | struct sched_entity, run_node); | ||
285 | cfs_rq->min_vruntime = | ||
286 | max_vruntime(cfs_rq->min_vruntime, | ||
287 | next->vruntime); | ||
288 | } | ||
289 | } | 342 | } |
290 | 343 | ||
291 | if (cfs_rq->next == se) | 344 | if (cfs_rq->next == se) |
@@ -424,6 +477,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, | |||
424 | schedstat_add(cfs_rq, exec_clock, delta_exec); | 477 | schedstat_add(cfs_rq, exec_clock, delta_exec); |
425 | delta_exec_weighted = calc_delta_fair(delta_exec, curr); | 478 | delta_exec_weighted = calc_delta_fair(delta_exec, curr); |
426 | curr->vruntime += delta_exec_weighted; | 479 | curr->vruntime += delta_exec_weighted; |
480 | update_min_vruntime(cfs_rq); | ||
427 | } | 481 | } |
428 | 482 | ||
429 | static void update_curr(struct cfs_rq *cfs_rq) | 483 | static void update_curr(struct cfs_rq *cfs_rq) |
@@ -613,13 +667,7 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
613 | static void | 667 | static void |
614 | place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | 668 | place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) |
615 | { | 669 | { |
616 | u64 vruntime; | 670 | u64 vruntime = cfs_rq->min_vruntime; |
617 | |||
618 | if (first_fair(cfs_rq)) { | ||
619 | vruntime = min_vruntime(cfs_rq->min_vruntime, | ||
620 | __pick_next_entity(cfs_rq)->vruntime); | ||
621 | } else | ||
622 | vruntime = cfs_rq->min_vruntime; | ||
623 | 671 | ||
624 | /* | 672 | /* |
625 | * The 'current' period is already promised to the current tasks, | 673 | * The 'current' period is already promised to the current tasks, |
@@ -696,6 +744,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) | |||
696 | if (se != cfs_rq->curr) | 744 | if (se != cfs_rq->curr) |
697 | __dequeue_entity(cfs_rq, se); | 745 | __dequeue_entity(cfs_rq, se); |
698 | account_entity_dequeue(cfs_rq, se); | 746 | account_entity_dequeue(cfs_rq, se); |
747 | update_min_vruntime(cfs_rq); | ||
699 | } | 748 | } |
700 | 749 | ||
701 | /* | 750 | /* |
@@ -742,16 +791,14 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
742 | se->prev_sum_exec_runtime = se->sum_exec_runtime; | 791 | se->prev_sum_exec_runtime = se->sum_exec_runtime; |
743 | } | 792 | } |
744 | 793 | ||
794 | static int | ||
795 | wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); | ||
796 | |||
745 | static struct sched_entity * | 797 | static struct sched_entity * |
746 | pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se) | 798 | pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se) |
747 | { | 799 | { |
748 | struct rq *rq = rq_of(cfs_rq); | 800 | if (!cfs_rq->next || wakeup_preempt_entity(cfs_rq->next, se) == 1) |
749 | u64 pair_slice = rq->clock - cfs_rq->pair_start; | ||
750 | |||
751 | if (!cfs_rq->next || pair_slice > sysctl_sched_min_granularity) { | ||
752 | cfs_rq->pair_start = rq->clock; | ||
753 | return se; | 801 | return se; |
754 | } | ||
755 | 802 | ||
756 | return cfs_rq->next; | 803 | return cfs_rq->next; |
757 | } | 804 | } |
@@ -1122,10 +1169,9 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq, | |||
1122 | if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) | 1169 | if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) |
1123 | return 0; | 1170 | return 0; |
1124 | 1171 | ||
1125 | if (!sync && sched_feat(SYNC_WAKEUPS) && | 1172 | if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost || |
1126 | curr->se.avg_overlap < sysctl_sched_migration_cost && | 1173 | p->se.avg_overlap > sysctl_sched_migration_cost)) |
1127 | p->se.avg_overlap < sysctl_sched_migration_cost) | 1174 | sync = 0; |
1128 | sync = 1; | ||
1129 | 1175 | ||
1130 | /* | 1176 | /* |
1131 | * If sync wakeup then subtract the (maximum possible) | 1177 | * If sync wakeup then subtract the (maximum possible) |
@@ -1244,13 +1290,42 @@ static unsigned long wakeup_gran(struct sched_entity *se) | |||
1244 | * More easily preempt - nice tasks, while not making it harder for | 1290 | * More easily preempt - nice tasks, while not making it harder for |
1245 | * + nice tasks. | 1291 | * + nice tasks. |
1246 | */ | 1292 | */ |
1247 | if (sched_feat(ASYM_GRAN)) | 1293 | if (!sched_feat(ASYM_GRAN) || se->load.weight > NICE_0_LOAD) |
1248 | gran = calc_delta_mine(gran, NICE_0_LOAD, &se->load); | 1294 | gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se); |
1249 | 1295 | ||
1250 | return gran; | 1296 | return gran; |
1251 | } | 1297 | } |
1252 | 1298 | ||
1253 | /* | 1299 | /* |
1300 | * Should 'se' preempt 'curr'. | ||
1301 | * | ||
1302 | * |s1 | ||
1303 | * |s2 | ||
1304 | * |s3 | ||
1305 | * g | ||
1306 | * |<--->|c | ||
1307 | * | ||
1308 | * w(c, s1) = -1 | ||
1309 | * w(c, s2) = 0 | ||
1310 | * w(c, s3) = 1 | ||
1311 | * | ||
1312 | */ | ||
1313 | static int | ||
1314 | wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) | ||
1315 | { | ||
1316 | s64 gran, vdiff = curr->vruntime - se->vruntime; | ||
1317 | |||
1318 | if (vdiff <= 0) | ||
1319 | return -1; | ||
1320 | |||
1321 | gran = wakeup_gran(curr); | ||
1322 | if (vdiff > gran) | ||
1323 | return 1; | ||
1324 | |||
1325 | return 0; | ||
1326 | } | ||
1327 | |||
1328 | /* | ||
1254 | * Preempt the current task with a newly woken task if needed: | 1329 | * Preempt the current task with a newly woken task if needed: |
1255 | */ | 1330 | */ |
1256 | static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) | 1331 | static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) |
@@ -1258,7 +1333,6 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) | |||
1258 | struct task_struct *curr = rq->curr; | 1333 | struct task_struct *curr = rq->curr; |
1259 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); | 1334 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); |
1260 | struct sched_entity *se = &curr->se, *pse = &p->se; | 1335 | struct sched_entity *se = &curr->se, *pse = &p->se; |
1261 | s64 delta_exec; | ||
1262 | 1336 | ||
1263 | if (unlikely(rt_prio(p->prio))) { | 1337 | if (unlikely(rt_prio(p->prio))) { |
1264 | update_rq_clock(rq); | 1338 | update_rq_clock(rq); |
@@ -1296,9 +1370,19 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) | |||
1296 | return; | 1370 | return; |
1297 | } | 1371 | } |
1298 | 1372 | ||
1299 | delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime; | 1373 | find_matching_se(&se, &pse); |
1300 | if (delta_exec > wakeup_gran(pse)) | 1374 | |
1301 | resched_task(curr); | 1375 | while (se) { |
1376 | BUG_ON(!pse); | ||
1377 | |||
1378 | if (wakeup_preempt_entity(se, pse) == 1) { | ||
1379 | resched_task(curr); | ||
1380 | break; | ||
1381 | } | ||
1382 | |||
1383 | se = parent_entity(se); | ||
1384 | pse = parent_entity(pse); | ||
1385 | } | ||
1302 | } | 1386 | } |
1303 | 1387 | ||
1304 | static struct task_struct *pick_next_task_fair(struct rq *rq) | 1388 | static struct task_struct *pick_next_task_fair(struct rq *rq) |
@@ -1594,9 +1678,6 @@ static const struct sched_class fair_sched_class = { | |||
1594 | .enqueue_task = enqueue_task_fair, | 1678 | .enqueue_task = enqueue_task_fair, |
1595 | .dequeue_task = dequeue_task_fair, | 1679 | .dequeue_task = dequeue_task_fair, |
1596 | .yield_task = yield_task_fair, | 1680 | .yield_task = yield_task_fair, |
1597 | #ifdef CONFIG_SMP | ||
1598 | .select_task_rq = select_task_rq_fair, | ||
1599 | #endif /* CONFIG_SMP */ | ||
1600 | 1681 | ||
1601 | .check_preempt_curr = check_preempt_wakeup, | 1682 | .check_preempt_curr = check_preempt_wakeup, |
1602 | 1683 | ||
@@ -1604,6 +1685,8 @@ static const struct sched_class fair_sched_class = { | |||
1604 | .put_prev_task = put_prev_task_fair, | 1685 | .put_prev_task = put_prev_task_fair, |
1605 | 1686 | ||
1606 | #ifdef CONFIG_SMP | 1687 | #ifdef CONFIG_SMP |
1688 | .select_task_rq = select_task_rq_fair, | ||
1689 | |||
1607 | .load_balance = load_balance_fair, | 1690 | .load_balance = load_balance_fair, |
1608 | .move_one_task = move_one_task_fair, | 1691 | .move_one_task = move_one_task_fair, |
1609 | #endif | 1692 | #endif |
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index dec4ccabe2f5..8a21a2e28c13 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c | |||
@@ -105,9 +105,6 @@ static const struct sched_class idle_sched_class = { | |||
105 | 105 | ||
106 | /* dequeue is not valid, we print a debug message there: */ | 106 | /* dequeue is not valid, we print a debug message there: */ |
107 | .dequeue_task = dequeue_task_idle, | 107 | .dequeue_task = dequeue_task_idle, |
108 | #ifdef CONFIG_SMP | ||
109 | .select_task_rq = select_task_rq_idle, | ||
110 | #endif /* CONFIG_SMP */ | ||
111 | 108 | ||
112 | .check_preempt_curr = check_preempt_curr_idle, | 109 | .check_preempt_curr = check_preempt_curr_idle, |
113 | 110 | ||
@@ -115,6 +112,8 @@ static const struct sched_class idle_sched_class = { | |||
115 | .put_prev_task = put_prev_task_idle, | 112 | .put_prev_task = put_prev_task_idle, |
116 | 113 | ||
117 | #ifdef CONFIG_SMP | 114 | #ifdef CONFIG_SMP |
115 | .select_task_rq = select_task_rq_idle, | ||
116 | |||
118 | .load_balance = load_balance_idle, | 117 | .load_balance = load_balance_idle, |
119 | .move_one_task = move_one_task_idle, | 118 | .move_one_task = move_one_task_idle, |
120 | #endif | 119 | #endif |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index b446dc87494f..d9ba9d5f99d6 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -1504,9 +1504,6 @@ static const struct sched_class rt_sched_class = { | |||
1504 | .enqueue_task = enqueue_task_rt, | 1504 | .enqueue_task = enqueue_task_rt, |
1505 | .dequeue_task = dequeue_task_rt, | 1505 | .dequeue_task = dequeue_task_rt, |
1506 | .yield_task = yield_task_rt, | 1506 | .yield_task = yield_task_rt, |
1507 | #ifdef CONFIG_SMP | ||
1508 | .select_task_rq = select_task_rq_rt, | ||
1509 | #endif /* CONFIG_SMP */ | ||
1510 | 1507 | ||
1511 | .check_preempt_curr = check_preempt_curr_rt, | 1508 | .check_preempt_curr = check_preempt_curr_rt, |
1512 | 1509 | ||
@@ -1514,6 +1511,8 @@ static const struct sched_class rt_sched_class = { | |||
1514 | .put_prev_task = put_prev_task_rt, | 1511 | .put_prev_task = put_prev_task_rt, |
1515 | 1512 | ||
1516 | #ifdef CONFIG_SMP | 1513 | #ifdef CONFIG_SMP |
1514 | .select_task_rq = select_task_rq_rt, | ||
1515 | |||
1517 | .load_balance = load_balance_rt, | 1516 | .load_balance = load_balance_rt, |
1518 | .move_one_task = move_one_task_rt, | 1517 | .move_one_task = move_one_task_rt, |
1519 | .set_cpus_allowed = set_cpus_allowed_rt, | 1518 | .set_cpus_allowed = set_cpus_allowed_rt, |