diff options
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r-- | kernel/sched_fair.c | 248 |
1 files changed, 180 insertions, 68 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 9573c33688b8..98345e45b059 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -143,6 +143,49 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se) | |||
143 | return se->parent; | 143 | return se->parent; |
144 | } | 144 | } |
145 | 145 | ||
146 | /* return depth at which a sched entity is present in the hierarchy */ | ||
147 | static inline int depth_se(struct sched_entity *se) | ||
148 | { | ||
149 | int depth = 0; | ||
150 | |||
151 | for_each_sched_entity(se) | ||
152 | depth++; | ||
153 | |||
154 | return depth; | ||
155 | } | ||
156 | |||
157 | static void | ||
158 | find_matching_se(struct sched_entity **se, struct sched_entity **pse) | ||
159 | { | ||
160 | int se_depth, pse_depth; | ||
161 | |||
162 | /* | ||
163 | * preemption test can be made between sibling entities who are in the | ||
164 | * same cfs_rq i.e who have a common parent. Walk up the hierarchy of | ||
165 | * both tasks until we find their ancestors who are siblings of common | ||
166 | * parent. | ||
167 | */ | ||
168 | |||
169 | /* First walk up until both entities are at same depth */ | ||
170 | se_depth = depth_se(*se); | ||
171 | pse_depth = depth_se(*pse); | ||
172 | |||
173 | while (se_depth > pse_depth) { | ||
174 | se_depth--; | ||
175 | *se = parent_entity(*se); | ||
176 | } | ||
177 | |||
178 | while (pse_depth > se_depth) { | ||
179 | pse_depth--; | ||
180 | *pse = parent_entity(*pse); | ||
181 | } | ||
182 | |||
183 | while (!is_same_group(*se, *pse)) { | ||
184 | *se = parent_entity(*se); | ||
185 | *pse = parent_entity(*pse); | ||
186 | } | ||
187 | } | ||
188 | |||
146 | #else /* CONFIG_FAIR_GROUP_SCHED */ | 189 | #else /* CONFIG_FAIR_GROUP_SCHED */ |
147 | 190 | ||
148 | static inline struct rq *rq_of(struct cfs_rq *cfs_rq) | 191 | static inline struct rq *rq_of(struct cfs_rq *cfs_rq) |
@@ -193,6 +236,11 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se) | |||
193 | return NULL; | 236 | return NULL; |
194 | } | 237 | } |
195 | 238 | ||
239 | static inline void | ||
240 | find_matching_se(struct sched_entity **se, struct sched_entity **pse) | ||
241 | { | ||
242 | } | ||
243 | |||
196 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 244 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
197 | 245 | ||
198 | 246 | ||
@@ -223,6 +271,27 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
223 | return se->vruntime - cfs_rq->min_vruntime; | 271 | return se->vruntime - cfs_rq->min_vruntime; |
224 | } | 272 | } |
225 | 273 | ||
274 | static void update_min_vruntime(struct cfs_rq *cfs_rq) | ||
275 | { | ||
276 | u64 vruntime = cfs_rq->min_vruntime; | ||
277 | |||
278 | if (cfs_rq->curr) | ||
279 | vruntime = cfs_rq->curr->vruntime; | ||
280 | |||
281 | if (cfs_rq->rb_leftmost) { | ||
282 | struct sched_entity *se = rb_entry(cfs_rq->rb_leftmost, | ||
283 | struct sched_entity, | ||
284 | run_node); | ||
285 | |||
286 | if (vruntime == cfs_rq->min_vruntime) | ||
287 | vruntime = se->vruntime; | ||
288 | else | ||
289 | vruntime = min_vruntime(vruntime, se->vruntime); | ||
290 | } | ||
291 | |||
292 | cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime); | ||
293 | } | ||
294 | |||
226 | /* | 295 | /* |
227 | * Enqueue an entity into the rb-tree: | 296 | * Enqueue an entity into the rb-tree: |
228 | */ | 297 | */ |
@@ -256,15 +325,8 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
256 | * Maintain a cache of leftmost tree entries (it is frequently | 325 | * Maintain a cache of leftmost tree entries (it is frequently |
257 | * used): | 326 | * used): |
258 | */ | 327 | */ |
259 | if (leftmost) { | 328 | if (leftmost) |
260 | cfs_rq->rb_leftmost = &se->run_node; | 329 | cfs_rq->rb_leftmost = &se->run_node; |
261 | /* | ||
262 | * maintain cfs_rq->min_vruntime to be a monotonic increasing | ||
263 | * value tracking the leftmost vruntime in the tree. | ||
264 | */ | ||
265 | cfs_rq->min_vruntime = | ||
266 | max_vruntime(cfs_rq->min_vruntime, se->vruntime); | ||
267 | } | ||
268 | 330 | ||
269 | rb_link_node(&se->run_node, parent, link); | 331 | rb_link_node(&se->run_node, parent, link); |
270 | rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline); | 332 | rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline); |
@@ -274,37 +336,25 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
274 | { | 336 | { |
275 | if (cfs_rq->rb_leftmost == &se->run_node) { | 337 | if (cfs_rq->rb_leftmost == &se->run_node) { |
276 | struct rb_node *next_node; | 338 | struct rb_node *next_node; |
277 | struct sched_entity *next; | ||
278 | 339 | ||
279 | next_node = rb_next(&se->run_node); | 340 | next_node = rb_next(&se->run_node); |
280 | cfs_rq->rb_leftmost = next_node; | 341 | cfs_rq->rb_leftmost = next_node; |
281 | |||
282 | if (next_node) { | ||
283 | next = rb_entry(next_node, | ||
284 | struct sched_entity, run_node); | ||
285 | cfs_rq->min_vruntime = | ||
286 | max_vruntime(cfs_rq->min_vruntime, | ||
287 | next->vruntime); | ||
288 | } | ||
289 | } | 342 | } |
290 | 343 | ||
291 | if (cfs_rq->next == se) | ||
292 | cfs_rq->next = NULL; | ||
293 | |||
294 | rb_erase(&se->run_node, &cfs_rq->tasks_timeline); | 344 | rb_erase(&se->run_node, &cfs_rq->tasks_timeline); |
295 | } | 345 | } |
296 | 346 | ||
297 | static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq) | ||
298 | { | ||
299 | return cfs_rq->rb_leftmost; | ||
300 | } | ||
301 | |||
302 | static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) | 347 | static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) |
303 | { | 348 | { |
304 | return rb_entry(first_fair(cfs_rq), struct sched_entity, run_node); | 349 | struct rb_node *left = cfs_rq->rb_leftmost; |
350 | |||
351 | if (!left) | ||
352 | return NULL; | ||
353 | |||
354 | return rb_entry(left, struct sched_entity, run_node); | ||
305 | } | 355 | } |
306 | 356 | ||
307 | static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) | 357 | static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) |
308 | { | 358 | { |
309 | struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); | 359 | struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); |
310 | 360 | ||
@@ -424,6 +474,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, | |||
424 | schedstat_add(cfs_rq, exec_clock, delta_exec); | 474 | schedstat_add(cfs_rq, exec_clock, delta_exec); |
425 | delta_exec_weighted = calc_delta_fair(delta_exec, curr); | 475 | delta_exec_weighted = calc_delta_fair(delta_exec, curr); |
426 | curr->vruntime += delta_exec_weighted; | 476 | curr->vruntime += delta_exec_weighted; |
477 | update_min_vruntime(cfs_rq); | ||
427 | } | 478 | } |
428 | 479 | ||
429 | static void update_curr(struct cfs_rq *cfs_rq) | 480 | static void update_curr(struct cfs_rq *cfs_rq) |
@@ -613,13 +664,7 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
613 | static void | 664 | static void |
614 | place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | 665 | place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) |
615 | { | 666 | { |
616 | u64 vruntime; | 667 | u64 vruntime = cfs_rq->min_vruntime; |
617 | |||
618 | if (first_fair(cfs_rq)) { | ||
619 | vruntime = min_vruntime(cfs_rq->min_vruntime, | ||
620 | __pick_next_entity(cfs_rq)->vruntime); | ||
621 | } else | ||
622 | vruntime = cfs_rq->min_vruntime; | ||
623 | 668 | ||
624 | /* | 669 | /* |
625 | * The 'current' period is already promised to the current tasks, | 670 | * The 'current' period is already promised to the current tasks, |
@@ -671,6 +716,15 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup) | |||
671 | __enqueue_entity(cfs_rq, se); | 716 | __enqueue_entity(cfs_rq, se); |
672 | } | 717 | } |
673 | 718 | ||
719 | static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) | ||
720 | { | ||
721 | if (cfs_rq->last == se) | ||
722 | cfs_rq->last = NULL; | ||
723 | |||
724 | if (cfs_rq->next == se) | ||
725 | cfs_rq->next = NULL; | ||
726 | } | ||
727 | |||
674 | static void | 728 | static void |
675 | dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) | 729 | dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) |
676 | { | 730 | { |
@@ -693,9 +747,12 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) | |||
693 | #endif | 747 | #endif |
694 | } | 748 | } |
695 | 749 | ||
750 | clear_buddies(cfs_rq, se); | ||
751 | |||
696 | if (se != cfs_rq->curr) | 752 | if (se != cfs_rq->curr) |
697 | __dequeue_entity(cfs_rq, se); | 753 | __dequeue_entity(cfs_rq, se); |
698 | account_entity_dequeue(cfs_rq, se); | 754 | account_entity_dequeue(cfs_rq, se); |
755 | update_min_vruntime(cfs_rq); | ||
699 | } | 756 | } |
700 | 757 | ||
701 | /* | 758 | /* |
@@ -742,29 +799,18 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
742 | se->prev_sum_exec_runtime = se->sum_exec_runtime; | 799 | se->prev_sum_exec_runtime = se->sum_exec_runtime; |
743 | } | 800 | } |
744 | 801 | ||
745 | static struct sched_entity * | 802 | static int |
746 | pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se) | 803 | wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); |
747 | { | ||
748 | struct rq *rq = rq_of(cfs_rq); | ||
749 | u64 pair_slice = rq->clock - cfs_rq->pair_start; | ||
750 | |||
751 | if (!cfs_rq->next || pair_slice > sysctl_sched_min_granularity) { | ||
752 | cfs_rq->pair_start = rq->clock; | ||
753 | return se; | ||
754 | } | ||
755 | |||
756 | return cfs_rq->next; | ||
757 | } | ||
758 | 804 | ||
759 | static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) | 805 | static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) |
760 | { | 806 | { |
761 | struct sched_entity *se = NULL; | 807 | struct sched_entity *se = __pick_next_entity(cfs_rq); |
762 | 808 | ||
763 | if (first_fair(cfs_rq)) { | 809 | if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, se) < 1) |
764 | se = __pick_next_entity(cfs_rq); | 810 | return cfs_rq->next; |
765 | se = pick_next(cfs_rq, se); | 811 | |
766 | set_next_entity(cfs_rq, se); | 812 | if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, se) < 1) |
767 | } | 813 | return cfs_rq->last; |
768 | 814 | ||
769 | return se; | 815 | return se; |
770 | } | 816 | } |
@@ -936,6 +982,8 @@ static void yield_task_fair(struct rq *rq) | |||
936 | if (unlikely(cfs_rq->nr_running == 1)) | 982 | if (unlikely(cfs_rq->nr_running == 1)) |
937 | return; | 983 | return; |
938 | 984 | ||
985 | clear_buddies(cfs_rq, se); | ||
986 | |||
939 | if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) { | 987 | if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) { |
940 | update_rq_clock(rq); | 988 | update_rq_clock(rq); |
941 | /* | 989 | /* |
@@ -1122,10 +1170,9 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq, | |||
1122 | if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) | 1170 | if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) |
1123 | return 0; | 1171 | return 0; |
1124 | 1172 | ||
1125 | if (!sync && sched_feat(SYNC_WAKEUPS) && | 1173 | if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost || |
1126 | curr->se.avg_overlap < sysctl_sched_migration_cost && | 1174 | p->se.avg_overlap > sysctl_sched_migration_cost)) |
1127 | p->se.avg_overlap < sysctl_sched_migration_cost) | 1175 | sync = 0; |
1128 | sync = 1; | ||
1129 | 1176 | ||
1130 | /* | 1177 | /* |
1131 | * If sync wakeup then subtract the (maximum possible) | 1178 | * If sync wakeup then subtract the (maximum possible) |
@@ -1244,33 +1291,88 @@ static unsigned long wakeup_gran(struct sched_entity *se) | |||
1244 | * More easily preempt - nice tasks, while not making it harder for | 1291 | * More easily preempt - nice tasks, while not making it harder for |
1245 | * + nice tasks. | 1292 | * + nice tasks. |
1246 | */ | 1293 | */ |
1247 | if (sched_feat(ASYM_GRAN)) | 1294 | if (!sched_feat(ASYM_GRAN) || se->load.weight > NICE_0_LOAD) |
1248 | gran = calc_delta_mine(gran, NICE_0_LOAD, &se->load); | 1295 | gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se); |
1249 | 1296 | ||
1250 | return gran; | 1297 | return gran; |
1251 | } | 1298 | } |
1252 | 1299 | ||
1253 | /* | 1300 | /* |
1301 | * Should 'se' preempt 'curr'. | ||
1302 | * | ||
1303 | * |s1 | ||
1304 | * |s2 | ||
1305 | * |s3 | ||
1306 | * g | ||
1307 | * |<--->|c | ||
1308 | * | ||
1309 | * w(c, s1) = -1 | ||
1310 | * w(c, s2) = 0 | ||
1311 | * w(c, s3) = 1 | ||
1312 | * | ||
1313 | */ | ||
1314 | static int | ||
1315 | wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) | ||
1316 | { | ||
1317 | s64 gran, vdiff = curr->vruntime - se->vruntime; | ||
1318 | |||
1319 | if (vdiff <= 0) | ||
1320 | return -1; | ||
1321 | |||
1322 | gran = wakeup_gran(curr); | ||
1323 | if (vdiff > gran) | ||
1324 | return 1; | ||
1325 | |||
1326 | return 0; | ||
1327 | } | ||
1328 | |||
1329 | static void set_last_buddy(struct sched_entity *se) | ||
1330 | { | ||
1331 | for_each_sched_entity(se) | ||
1332 | cfs_rq_of(se)->last = se; | ||
1333 | } | ||
1334 | |||
1335 | static void set_next_buddy(struct sched_entity *se) | ||
1336 | { | ||
1337 | for_each_sched_entity(se) | ||
1338 | cfs_rq_of(se)->next = se; | ||
1339 | } | ||
1340 | |||
1341 | /* | ||
1254 | * Preempt the current task with a newly woken task if needed: | 1342 | * Preempt the current task with a newly woken task if needed: |
1255 | */ | 1343 | */ |
1256 | static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) | 1344 | static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) |
1257 | { | 1345 | { |
1258 | struct task_struct *curr = rq->curr; | 1346 | struct task_struct *curr = rq->curr; |
1259 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); | ||
1260 | struct sched_entity *se = &curr->se, *pse = &p->se; | 1347 | struct sched_entity *se = &curr->se, *pse = &p->se; |
1261 | s64 delta_exec; | ||
1262 | 1348 | ||
1263 | if (unlikely(rt_prio(p->prio))) { | 1349 | if (unlikely(rt_prio(p->prio))) { |
1350 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); | ||
1351 | |||
1264 | update_rq_clock(rq); | 1352 | update_rq_clock(rq); |
1265 | update_curr(cfs_rq); | 1353 | update_curr(cfs_rq); |
1266 | resched_task(curr); | 1354 | resched_task(curr); |
1267 | return; | 1355 | return; |
1268 | } | 1356 | } |
1269 | 1357 | ||
1358 | if (unlikely(p->sched_class != &fair_sched_class)) | ||
1359 | return; | ||
1360 | |||
1270 | if (unlikely(se == pse)) | 1361 | if (unlikely(se == pse)) |
1271 | return; | 1362 | return; |
1272 | 1363 | ||
1273 | cfs_rq_of(pse)->next = pse; | 1364 | /* |
1365 | * Only set the backward buddy when the current task is still on the | ||
1366 | * rq. This can happen when a wakeup gets interleaved with schedule on | ||
1367 | * the ->pre_schedule() or idle_balance() point, either of which can | ||
1368 | * drop the rq lock. | ||
1369 | * | ||
1370 | * Also, during early boot the idle thread is in the fair class, for | ||
1371 | * obvious reasons its a bad idea to schedule back to the idle thread. | ||
1372 | */ | ||
1373 | if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle)) | ||
1374 | set_last_buddy(se); | ||
1375 | set_next_buddy(pse); | ||
1274 | 1376 | ||
1275 | /* | 1377 | /* |
1276 | * We can come here with TIF_NEED_RESCHED already set from new task | 1378 | * We can come here with TIF_NEED_RESCHED already set from new task |
@@ -1296,9 +1398,19 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) | |||
1296 | return; | 1398 | return; |
1297 | } | 1399 | } |
1298 | 1400 | ||
1299 | delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime; | 1401 | find_matching_se(&se, &pse); |
1300 | if (delta_exec > wakeup_gran(pse)) | 1402 | |
1301 | resched_task(curr); | 1403 | while (se) { |
1404 | BUG_ON(!pse); | ||
1405 | |||
1406 | if (wakeup_preempt_entity(se, pse) == 1) { | ||
1407 | resched_task(curr); | ||
1408 | break; | ||
1409 | } | ||
1410 | |||
1411 | se = parent_entity(se); | ||
1412 | pse = parent_entity(pse); | ||
1413 | } | ||
1302 | } | 1414 | } |
1303 | 1415 | ||
1304 | static struct task_struct *pick_next_task_fair(struct rq *rq) | 1416 | static struct task_struct *pick_next_task_fair(struct rq *rq) |
@@ -1312,6 +1424,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq) | |||
1312 | 1424 | ||
1313 | do { | 1425 | do { |
1314 | se = pick_next_entity(cfs_rq); | 1426 | se = pick_next_entity(cfs_rq); |
1427 | set_next_entity(cfs_rq, se); | ||
1315 | cfs_rq = group_cfs_rq(se); | 1428 | cfs_rq = group_cfs_rq(se); |
1316 | } while (cfs_rq); | 1429 | } while (cfs_rq); |
1317 | 1430 | ||
@@ -1594,9 +1707,6 @@ static const struct sched_class fair_sched_class = { | |||
1594 | .enqueue_task = enqueue_task_fair, | 1707 | .enqueue_task = enqueue_task_fair, |
1595 | .dequeue_task = dequeue_task_fair, | 1708 | .dequeue_task = dequeue_task_fair, |
1596 | .yield_task = yield_task_fair, | 1709 | .yield_task = yield_task_fair, |
1597 | #ifdef CONFIG_SMP | ||
1598 | .select_task_rq = select_task_rq_fair, | ||
1599 | #endif /* CONFIG_SMP */ | ||
1600 | 1710 | ||
1601 | .check_preempt_curr = check_preempt_wakeup, | 1711 | .check_preempt_curr = check_preempt_wakeup, |
1602 | 1712 | ||
@@ -1604,6 +1714,8 @@ static const struct sched_class fair_sched_class = { | |||
1604 | .put_prev_task = put_prev_task_fair, | 1714 | .put_prev_task = put_prev_task_fair, |
1605 | 1715 | ||
1606 | #ifdef CONFIG_SMP | 1716 | #ifdef CONFIG_SMP |
1717 | .select_task_rq = select_task_rq_fair, | ||
1718 | |||
1607 | .load_balance = load_balance_fair, | 1719 | .load_balance = load_balance_fair, |
1608 | .move_one_task = move_one_task_fair, | 1720 | .move_one_task = move_one_task_fair, |
1609 | #endif | 1721 | #endif |