aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched_fair.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r--kernel/sched_fair.c248
1 files changed, 180 insertions, 68 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 9573c33688b8..98345e45b059 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -143,6 +143,49 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
143 return se->parent; 143 return se->parent;
144} 144}
145 145
146/* return depth at which a sched entity is present in the hierarchy */
147static inline int depth_se(struct sched_entity *se)
148{
149 int depth = 0;
150
151 for_each_sched_entity(se)
152 depth++;
153
154 return depth;
155}
156
157static void
158find_matching_se(struct sched_entity **se, struct sched_entity **pse)
159{
160 int se_depth, pse_depth;
161
162 /*
163 * preemption test can be made between sibling entities who are in the
164 * same cfs_rq i.e who have a common parent. Walk up the hierarchy of
165 * both tasks until we find their ancestors who are siblings of common
166 * parent.
167 */
168
169 /* First walk up until both entities are at same depth */
170 se_depth = depth_se(*se);
171 pse_depth = depth_se(*pse);
172
173 while (se_depth > pse_depth) {
174 se_depth--;
175 *se = parent_entity(*se);
176 }
177
178 while (pse_depth > se_depth) {
179 pse_depth--;
180 *pse = parent_entity(*pse);
181 }
182
183 while (!is_same_group(*se, *pse)) {
184 *se = parent_entity(*se);
185 *pse = parent_entity(*pse);
186 }
187}
188
146#else /* CONFIG_FAIR_GROUP_SCHED */ 189#else /* CONFIG_FAIR_GROUP_SCHED */
147 190
148static inline struct rq *rq_of(struct cfs_rq *cfs_rq) 191static inline struct rq *rq_of(struct cfs_rq *cfs_rq)
@@ -193,6 +236,11 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
193 return NULL; 236 return NULL;
194} 237}
195 238
239static inline void
240find_matching_se(struct sched_entity **se, struct sched_entity **pse)
241{
242}
243
196#endif /* CONFIG_FAIR_GROUP_SCHED */ 244#endif /* CONFIG_FAIR_GROUP_SCHED */
197 245
198 246
@@ -223,6 +271,27 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se)
223 return se->vruntime - cfs_rq->min_vruntime; 271 return se->vruntime - cfs_rq->min_vruntime;
224} 272}
225 273
274static void update_min_vruntime(struct cfs_rq *cfs_rq)
275{
276 u64 vruntime = cfs_rq->min_vruntime;
277
278 if (cfs_rq->curr)
279 vruntime = cfs_rq->curr->vruntime;
280
281 if (cfs_rq->rb_leftmost) {
282 struct sched_entity *se = rb_entry(cfs_rq->rb_leftmost,
283 struct sched_entity,
284 run_node);
285
286 if (vruntime == cfs_rq->min_vruntime)
287 vruntime = se->vruntime;
288 else
289 vruntime = min_vruntime(vruntime, se->vruntime);
290 }
291
292 cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime);
293}
294
226/* 295/*
227 * Enqueue an entity into the rb-tree: 296 * Enqueue an entity into the rb-tree:
228 */ 297 */
@@ -256,15 +325,8 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
256 * Maintain a cache of leftmost tree entries (it is frequently 325 * Maintain a cache of leftmost tree entries (it is frequently
257 * used): 326 * used):
258 */ 327 */
259 if (leftmost) { 328 if (leftmost)
260 cfs_rq->rb_leftmost = &se->run_node; 329 cfs_rq->rb_leftmost = &se->run_node;
261 /*
262 * maintain cfs_rq->min_vruntime to be a monotonic increasing
263 * value tracking the leftmost vruntime in the tree.
264 */
265 cfs_rq->min_vruntime =
266 max_vruntime(cfs_rq->min_vruntime, se->vruntime);
267 }
268 330
269 rb_link_node(&se->run_node, parent, link); 331 rb_link_node(&se->run_node, parent, link);
270 rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline); 332 rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
@@ -274,37 +336,25 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
274{ 336{
275 if (cfs_rq->rb_leftmost == &se->run_node) { 337 if (cfs_rq->rb_leftmost == &se->run_node) {
276 struct rb_node *next_node; 338 struct rb_node *next_node;
277 struct sched_entity *next;
278 339
279 next_node = rb_next(&se->run_node); 340 next_node = rb_next(&se->run_node);
280 cfs_rq->rb_leftmost = next_node; 341 cfs_rq->rb_leftmost = next_node;
281
282 if (next_node) {
283 next = rb_entry(next_node,
284 struct sched_entity, run_node);
285 cfs_rq->min_vruntime =
286 max_vruntime(cfs_rq->min_vruntime,
287 next->vruntime);
288 }
289 } 342 }
290 343
291 if (cfs_rq->next == se)
292 cfs_rq->next = NULL;
293
294 rb_erase(&se->run_node, &cfs_rq->tasks_timeline); 344 rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
295} 345}
296 346
297static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq)
298{
299 return cfs_rq->rb_leftmost;
300}
301
302static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) 347static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq)
303{ 348{
304 return rb_entry(first_fair(cfs_rq), struct sched_entity, run_node); 349 struct rb_node *left = cfs_rq->rb_leftmost;
350
351 if (!left)
352 return NULL;
353
354 return rb_entry(left, struct sched_entity, run_node);
305} 355}
306 356
307static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) 357static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
308{ 358{
309 struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); 359 struct rb_node *last = rb_last(&cfs_rq->tasks_timeline);
310 360
@@ -424,6 +474,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
424 schedstat_add(cfs_rq, exec_clock, delta_exec); 474 schedstat_add(cfs_rq, exec_clock, delta_exec);
425 delta_exec_weighted = calc_delta_fair(delta_exec, curr); 475 delta_exec_weighted = calc_delta_fair(delta_exec, curr);
426 curr->vruntime += delta_exec_weighted; 476 curr->vruntime += delta_exec_weighted;
477 update_min_vruntime(cfs_rq);
427} 478}
428 479
429static void update_curr(struct cfs_rq *cfs_rq) 480static void update_curr(struct cfs_rq *cfs_rq)
@@ -613,13 +664,7 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se)
613static void 664static void
614place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) 665place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
615{ 666{
616 u64 vruntime; 667 u64 vruntime = cfs_rq->min_vruntime;
617
618 if (first_fair(cfs_rq)) {
619 vruntime = min_vruntime(cfs_rq->min_vruntime,
620 __pick_next_entity(cfs_rq)->vruntime);
621 } else
622 vruntime = cfs_rq->min_vruntime;
623 668
624 /* 669 /*
625 * The 'current' period is already promised to the current tasks, 670 * The 'current' period is already promised to the current tasks,
@@ -671,6 +716,15 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
671 __enqueue_entity(cfs_rq, se); 716 __enqueue_entity(cfs_rq, se);
672} 717}
673 718
719static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se)
720{
721 if (cfs_rq->last == se)
722 cfs_rq->last = NULL;
723
724 if (cfs_rq->next == se)
725 cfs_rq->next = NULL;
726}
727
674static void 728static void
675dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) 729dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
676{ 730{
@@ -693,9 +747,12 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
693#endif 747#endif
694 } 748 }
695 749
750 clear_buddies(cfs_rq, se);
751
696 if (se != cfs_rq->curr) 752 if (se != cfs_rq->curr)
697 __dequeue_entity(cfs_rq, se); 753 __dequeue_entity(cfs_rq, se);
698 account_entity_dequeue(cfs_rq, se); 754 account_entity_dequeue(cfs_rq, se);
755 update_min_vruntime(cfs_rq);
699} 756}
700 757
701/* 758/*
@@ -742,29 +799,18 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
742 se->prev_sum_exec_runtime = se->sum_exec_runtime; 799 se->prev_sum_exec_runtime = se->sum_exec_runtime;
743} 800}
744 801
745static struct sched_entity * 802static int
746pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se) 803wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se);
747{
748 struct rq *rq = rq_of(cfs_rq);
749 u64 pair_slice = rq->clock - cfs_rq->pair_start;
750
751 if (!cfs_rq->next || pair_slice > sysctl_sched_min_granularity) {
752 cfs_rq->pair_start = rq->clock;
753 return se;
754 }
755
756 return cfs_rq->next;
757}
758 804
759static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) 805static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
760{ 806{
761 struct sched_entity *se = NULL; 807 struct sched_entity *se = __pick_next_entity(cfs_rq);
762 808
763 if (first_fair(cfs_rq)) { 809 if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, se) < 1)
764 se = __pick_next_entity(cfs_rq); 810 return cfs_rq->next;
765 se = pick_next(cfs_rq, se); 811
766 set_next_entity(cfs_rq, se); 812 if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, se) < 1)
767 } 813 return cfs_rq->last;
768 814
769 return se; 815 return se;
770} 816}
@@ -936,6 +982,8 @@ static void yield_task_fair(struct rq *rq)
936 if (unlikely(cfs_rq->nr_running == 1)) 982 if (unlikely(cfs_rq->nr_running == 1))
937 return; 983 return;
938 984
985 clear_buddies(cfs_rq, se);
986
939 if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) { 987 if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {
940 update_rq_clock(rq); 988 update_rq_clock(rq);
941 /* 989 /*
@@ -1122,10 +1170,9 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq,
1122 if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) 1170 if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS))
1123 return 0; 1171 return 0;
1124 1172
1125 if (!sync && sched_feat(SYNC_WAKEUPS) && 1173 if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost ||
1126 curr->se.avg_overlap < sysctl_sched_migration_cost && 1174 p->se.avg_overlap > sysctl_sched_migration_cost))
1127 p->se.avg_overlap < sysctl_sched_migration_cost) 1175 sync = 0;
1128 sync = 1;
1129 1176
1130 /* 1177 /*
1131 * If sync wakeup then subtract the (maximum possible) 1178 * If sync wakeup then subtract the (maximum possible)
@@ -1244,33 +1291,88 @@ static unsigned long wakeup_gran(struct sched_entity *se)
1244 * More easily preempt - nice tasks, while not making it harder for 1291 * More easily preempt - nice tasks, while not making it harder for
1245 * + nice tasks. 1292 * + nice tasks.
1246 */ 1293 */
1247 if (sched_feat(ASYM_GRAN)) 1294 if (!sched_feat(ASYM_GRAN) || se->load.weight > NICE_0_LOAD)
1248 gran = calc_delta_mine(gran, NICE_0_LOAD, &se->load); 1295 gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se);
1249 1296
1250 return gran; 1297 return gran;
1251} 1298}
1252 1299
1253/* 1300/*
1301 * Should 'se' preempt 'curr'.
1302 *
1303 * |s1
1304 * |s2
1305 * |s3
1306 * g
1307 * |<--->|c
1308 *
1309 * w(c, s1) = -1
1310 * w(c, s2) = 0
1311 * w(c, s3) = 1
1312 *
1313 */
1314static int
1315wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se)
1316{
1317 s64 gran, vdiff = curr->vruntime - se->vruntime;
1318
1319 if (vdiff <= 0)
1320 return -1;
1321
1322 gran = wakeup_gran(curr);
1323 if (vdiff > gran)
1324 return 1;
1325
1326 return 0;
1327}
1328
1329static void set_last_buddy(struct sched_entity *se)
1330{
1331 for_each_sched_entity(se)
1332 cfs_rq_of(se)->last = se;
1333}
1334
1335static void set_next_buddy(struct sched_entity *se)
1336{
1337 for_each_sched_entity(se)
1338 cfs_rq_of(se)->next = se;
1339}
1340
1341/*
1254 * Preempt the current task with a newly woken task if needed: 1342 * Preempt the current task with a newly woken task if needed:
1255 */ 1343 */
1256static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) 1344static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
1257{ 1345{
1258 struct task_struct *curr = rq->curr; 1346 struct task_struct *curr = rq->curr;
1259 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1260 struct sched_entity *se = &curr->se, *pse = &p->se; 1347 struct sched_entity *se = &curr->se, *pse = &p->se;
1261 s64 delta_exec;
1262 1348
1263 if (unlikely(rt_prio(p->prio))) { 1349 if (unlikely(rt_prio(p->prio))) {
1350 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1351
1264 update_rq_clock(rq); 1352 update_rq_clock(rq);
1265 update_curr(cfs_rq); 1353 update_curr(cfs_rq);
1266 resched_task(curr); 1354 resched_task(curr);
1267 return; 1355 return;
1268 } 1356 }
1269 1357
1358 if (unlikely(p->sched_class != &fair_sched_class))
1359 return;
1360
1270 if (unlikely(se == pse)) 1361 if (unlikely(se == pse))
1271 return; 1362 return;
1272 1363
1273 cfs_rq_of(pse)->next = pse; 1364 /*
1365 * Only set the backward buddy when the current task is still on the
1366 * rq. This can happen when a wakeup gets interleaved with schedule on
1367 * the ->pre_schedule() or idle_balance() point, either of which can
1368 * drop the rq lock.
1369 *
1370 * Also, during early boot the idle thread is in the fair class, for
1371 * obvious reasons its a bad idea to schedule back to the idle thread.
1372 */
1373 if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle))
1374 set_last_buddy(se);
1375 set_next_buddy(pse);
1274 1376
1275 /* 1377 /*
1276 * We can come here with TIF_NEED_RESCHED already set from new task 1378 * We can come here with TIF_NEED_RESCHED already set from new task
@@ -1296,9 +1398,19 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync)
1296 return; 1398 return;
1297 } 1399 }
1298 1400
1299 delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime; 1401 find_matching_se(&se, &pse);
1300 if (delta_exec > wakeup_gran(pse)) 1402
1301 resched_task(curr); 1403 while (se) {
1404 BUG_ON(!pse);
1405
1406 if (wakeup_preempt_entity(se, pse) == 1) {
1407 resched_task(curr);
1408 break;
1409 }
1410
1411 se = parent_entity(se);
1412 pse = parent_entity(pse);
1413 }
1302} 1414}
1303 1415
1304static struct task_struct *pick_next_task_fair(struct rq *rq) 1416static struct task_struct *pick_next_task_fair(struct rq *rq)
@@ -1312,6 +1424,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq)
1312 1424
1313 do { 1425 do {
1314 se = pick_next_entity(cfs_rq); 1426 se = pick_next_entity(cfs_rq);
1427 set_next_entity(cfs_rq, se);
1315 cfs_rq = group_cfs_rq(se); 1428 cfs_rq = group_cfs_rq(se);
1316 } while (cfs_rq); 1429 } while (cfs_rq);
1317 1430
@@ -1594,9 +1707,6 @@ static const struct sched_class fair_sched_class = {
1594 .enqueue_task = enqueue_task_fair, 1707 .enqueue_task = enqueue_task_fair,
1595 .dequeue_task = dequeue_task_fair, 1708 .dequeue_task = dequeue_task_fair,
1596 .yield_task = yield_task_fair, 1709 .yield_task = yield_task_fair,
1597#ifdef CONFIG_SMP
1598 .select_task_rq = select_task_rq_fair,
1599#endif /* CONFIG_SMP */
1600 1710
1601 .check_preempt_curr = check_preempt_wakeup, 1711 .check_preempt_curr = check_preempt_wakeup,
1602 1712
@@ -1604,6 +1714,8 @@ static const struct sched_class fair_sched_class = {
1604 .put_prev_task = put_prev_task_fair, 1714 .put_prev_task = put_prev_task_fair,
1605 1715
1606#ifdef CONFIG_SMP 1716#ifdef CONFIG_SMP
1717 .select_task_rq = select_task_rq_fair,
1718
1607 .load_balance = load_balance_fair, 1719 .load_balance = load_balance_fair,
1608 .move_one_task = move_one_task_fair, 1720 .move_one_task = move_one_task_fair,
1609#endif 1721#endif