diff options
Diffstat (limited to 'kernel/sched_fair.c')
| -rw-r--r-- | kernel/sched_fair.c | 241 |
1 files changed, 173 insertions, 68 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 9573c33688b8..51aa3e102acb 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
| @@ -143,6 +143,49 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se) | |||
| 143 | return se->parent; | 143 | return se->parent; |
| 144 | } | 144 | } |
| 145 | 145 | ||
| 146 | /* return depth at which a sched entity is present in the hierarchy */ | ||
| 147 | static inline int depth_se(struct sched_entity *se) | ||
| 148 | { | ||
| 149 | int depth = 0; | ||
| 150 | |||
| 151 | for_each_sched_entity(se) | ||
| 152 | depth++; | ||
| 153 | |||
| 154 | return depth; | ||
| 155 | } | ||
| 156 | |||
| 157 | static void | ||
| 158 | find_matching_se(struct sched_entity **se, struct sched_entity **pse) | ||
| 159 | { | ||
| 160 | int se_depth, pse_depth; | ||
| 161 | |||
| 162 | /* | ||
| 163 | * preemption test can be made between sibling entities who are in the | ||
| 164 | * same cfs_rq i.e who have a common parent. Walk up the hierarchy of | ||
| 165 | * both tasks until we find their ancestors who are siblings of common | ||
| 166 | * parent. | ||
| 167 | */ | ||
| 168 | |||
| 169 | /* First walk up until both entities are at same depth */ | ||
| 170 | se_depth = depth_se(*se); | ||
| 171 | pse_depth = depth_se(*pse); | ||
| 172 | |||
| 173 | while (se_depth > pse_depth) { | ||
| 174 | se_depth--; | ||
| 175 | *se = parent_entity(*se); | ||
| 176 | } | ||
| 177 | |||
| 178 | while (pse_depth > se_depth) { | ||
| 179 | pse_depth--; | ||
| 180 | *pse = parent_entity(*pse); | ||
| 181 | } | ||
| 182 | |||
| 183 | while (!is_same_group(*se, *pse)) { | ||
| 184 | *se = parent_entity(*se); | ||
| 185 | *pse = parent_entity(*pse); | ||
| 186 | } | ||
| 187 | } | ||
| 188 | |||
| 146 | #else /* CONFIG_FAIR_GROUP_SCHED */ | 189 | #else /* CONFIG_FAIR_GROUP_SCHED */ |
| 147 | 190 | ||
| 148 | static inline struct rq *rq_of(struct cfs_rq *cfs_rq) | 191 | static inline struct rq *rq_of(struct cfs_rq *cfs_rq) |
| @@ -193,6 +236,11 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se) | |||
| 193 | return NULL; | 236 | return NULL; |
| 194 | } | 237 | } |
| 195 | 238 | ||
| 239 | static inline void | ||
| 240 | find_matching_se(struct sched_entity **se, struct sched_entity **pse) | ||
| 241 | { | ||
| 242 | } | ||
| 243 | |||
| 196 | #endif /* CONFIG_FAIR_GROUP_SCHED */ | 244 | #endif /* CONFIG_FAIR_GROUP_SCHED */ |
| 197 | 245 | ||
| 198 | 246 | ||
| @@ -223,6 +271,27 @@ static inline s64 entity_key(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 223 | return se->vruntime - cfs_rq->min_vruntime; | 271 | return se->vruntime - cfs_rq->min_vruntime; |
| 224 | } | 272 | } |
| 225 | 273 | ||
| 274 | static void update_min_vruntime(struct cfs_rq *cfs_rq) | ||
| 275 | { | ||
| 276 | u64 vruntime = cfs_rq->min_vruntime; | ||
| 277 | |||
| 278 | if (cfs_rq->curr) | ||
| 279 | vruntime = cfs_rq->curr->vruntime; | ||
| 280 | |||
| 281 | if (cfs_rq->rb_leftmost) { | ||
| 282 | struct sched_entity *se = rb_entry(cfs_rq->rb_leftmost, | ||
| 283 | struct sched_entity, | ||
| 284 | run_node); | ||
| 285 | |||
| 286 | if (vruntime == cfs_rq->min_vruntime) | ||
| 287 | vruntime = se->vruntime; | ||
| 288 | else | ||
| 289 | vruntime = min_vruntime(vruntime, se->vruntime); | ||
| 290 | } | ||
| 291 | |||
| 292 | cfs_rq->min_vruntime = max_vruntime(cfs_rq->min_vruntime, vruntime); | ||
| 293 | } | ||
| 294 | |||
| 226 | /* | 295 | /* |
| 227 | * Enqueue an entity into the rb-tree: | 296 | * Enqueue an entity into the rb-tree: |
| 228 | */ | 297 | */ |
| @@ -256,15 +325,8 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 256 | * Maintain a cache of leftmost tree entries (it is frequently | 325 | * Maintain a cache of leftmost tree entries (it is frequently |
| 257 | * used): | 326 | * used): |
| 258 | */ | 327 | */ |
| 259 | if (leftmost) { | 328 | if (leftmost) |
| 260 | cfs_rq->rb_leftmost = &se->run_node; | 329 | cfs_rq->rb_leftmost = &se->run_node; |
| 261 | /* | ||
| 262 | * maintain cfs_rq->min_vruntime to be a monotonic increasing | ||
| 263 | * value tracking the leftmost vruntime in the tree. | ||
| 264 | */ | ||
| 265 | cfs_rq->min_vruntime = | ||
| 266 | max_vruntime(cfs_rq->min_vruntime, se->vruntime); | ||
| 267 | } | ||
| 268 | 330 | ||
| 269 | rb_link_node(&se->run_node, parent, link); | 331 | rb_link_node(&se->run_node, parent, link); |
| 270 | rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline); | 332 | rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline); |
| @@ -274,37 +336,25 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 274 | { | 336 | { |
| 275 | if (cfs_rq->rb_leftmost == &se->run_node) { | 337 | if (cfs_rq->rb_leftmost == &se->run_node) { |
| 276 | struct rb_node *next_node; | 338 | struct rb_node *next_node; |
| 277 | struct sched_entity *next; | ||
| 278 | 339 | ||
| 279 | next_node = rb_next(&se->run_node); | 340 | next_node = rb_next(&se->run_node); |
| 280 | cfs_rq->rb_leftmost = next_node; | 341 | cfs_rq->rb_leftmost = next_node; |
| 281 | |||
| 282 | if (next_node) { | ||
| 283 | next = rb_entry(next_node, | ||
| 284 | struct sched_entity, run_node); | ||
| 285 | cfs_rq->min_vruntime = | ||
| 286 | max_vruntime(cfs_rq->min_vruntime, | ||
| 287 | next->vruntime); | ||
| 288 | } | ||
| 289 | } | 342 | } |
| 290 | 343 | ||
| 291 | if (cfs_rq->next == se) | ||
| 292 | cfs_rq->next = NULL; | ||
| 293 | |||
| 294 | rb_erase(&se->run_node, &cfs_rq->tasks_timeline); | 344 | rb_erase(&se->run_node, &cfs_rq->tasks_timeline); |
| 295 | } | 345 | } |
| 296 | 346 | ||
| 297 | static inline struct rb_node *first_fair(struct cfs_rq *cfs_rq) | ||
| 298 | { | ||
| 299 | return cfs_rq->rb_leftmost; | ||
| 300 | } | ||
| 301 | |||
| 302 | static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) | 347 | static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) |
| 303 | { | 348 | { |
| 304 | return rb_entry(first_fair(cfs_rq), struct sched_entity, run_node); | 349 | struct rb_node *left = cfs_rq->rb_leftmost; |
| 350 | |||
| 351 | if (!left) | ||
| 352 | return NULL; | ||
| 353 | |||
| 354 | return rb_entry(left, struct sched_entity, run_node); | ||
| 305 | } | 355 | } |
| 306 | 356 | ||
| 307 | static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) | 357 | static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) |
| 308 | { | 358 | { |
| 309 | struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); | 359 | struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); |
| 310 | 360 | ||
| @@ -424,6 +474,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, | |||
| 424 | schedstat_add(cfs_rq, exec_clock, delta_exec); | 474 | schedstat_add(cfs_rq, exec_clock, delta_exec); |
| 425 | delta_exec_weighted = calc_delta_fair(delta_exec, curr); | 475 | delta_exec_weighted = calc_delta_fair(delta_exec, curr); |
| 426 | curr->vruntime += delta_exec_weighted; | 476 | curr->vruntime += delta_exec_weighted; |
| 477 | update_min_vruntime(cfs_rq); | ||
| 427 | } | 478 | } |
| 428 | 479 | ||
| 429 | static void update_curr(struct cfs_rq *cfs_rq) | 480 | static void update_curr(struct cfs_rq *cfs_rq) |
| @@ -613,13 +664,7 @@ static void check_spread(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 613 | static void | 664 | static void |
| 614 | place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | 665 | place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) |
| 615 | { | 666 | { |
| 616 | u64 vruntime; | 667 | u64 vruntime = cfs_rq->min_vruntime; |
| 617 | |||
| 618 | if (first_fair(cfs_rq)) { | ||
| 619 | vruntime = min_vruntime(cfs_rq->min_vruntime, | ||
| 620 | __pick_next_entity(cfs_rq)->vruntime); | ||
| 621 | } else | ||
| 622 | vruntime = cfs_rq->min_vruntime; | ||
| 623 | 668 | ||
| 624 | /* | 669 | /* |
| 625 | * The 'current' period is already promised to the current tasks, | 670 | * The 'current' period is already promised to the current tasks, |
| @@ -693,9 +738,16 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep) | |||
| 693 | #endif | 738 | #endif |
| 694 | } | 739 | } |
| 695 | 740 | ||
| 741 | if (cfs_rq->last == se) | ||
| 742 | cfs_rq->last = NULL; | ||
| 743 | |||
| 744 | if (cfs_rq->next == se) | ||
| 745 | cfs_rq->next = NULL; | ||
| 746 | |||
| 696 | if (se != cfs_rq->curr) | 747 | if (se != cfs_rq->curr) |
| 697 | __dequeue_entity(cfs_rq, se); | 748 | __dequeue_entity(cfs_rq, se); |
| 698 | account_entity_dequeue(cfs_rq, se); | 749 | account_entity_dequeue(cfs_rq, se); |
| 750 | update_min_vruntime(cfs_rq); | ||
| 699 | } | 751 | } |
| 700 | 752 | ||
| 701 | /* | 753 | /* |
| @@ -742,29 +794,18 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
| 742 | se->prev_sum_exec_runtime = se->sum_exec_runtime; | 794 | se->prev_sum_exec_runtime = se->sum_exec_runtime; |
| 743 | } | 795 | } |
| 744 | 796 | ||
| 745 | static struct sched_entity * | 797 | static int |
| 746 | pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se) | 798 | wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); |
| 747 | { | ||
| 748 | struct rq *rq = rq_of(cfs_rq); | ||
| 749 | u64 pair_slice = rq->clock - cfs_rq->pair_start; | ||
| 750 | |||
| 751 | if (!cfs_rq->next || pair_slice > sysctl_sched_min_granularity) { | ||
| 752 | cfs_rq->pair_start = rq->clock; | ||
| 753 | return se; | ||
| 754 | } | ||
| 755 | |||
| 756 | return cfs_rq->next; | ||
| 757 | } | ||
| 758 | 799 | ||
| 759 | static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) | 800 | static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) |
| 760 | { | 801 | { |
| 761 | struct sched_entity *se = NULL; | 802 | struct sched_entity *se = __pick_next_entity(cfs_rq); |
| 762 | 803 | ||
| 763 | if (first_fair(cfs_rq)) { | 804 | if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, se) < 1) |
| 764 | se = __pick_next_entity(cfs_rq); | 805 | return cfs_rq->next; |
| 765 | se = pick_next(cfs_rq, se); | 806 | |
| 766 | set_next_entity(cfs_rq, se); | 807 | if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, se) < 1) |
| 767 | } | 808 | return cfs_rq->last; |
| 768 | 809 | ||
| 769 | return se; | 810 | return se; |
| 770 | } | 811 | } |
| @@ -1122,10 +1163,9 @@ wake_affine(struct sched_domain *this_sd, struct rq *this_rq, | |||
| 1122 | if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) | 1163 | if (!(this_sd->flags & SD_WAKE_AFFINE) || !sched_feat(AFFINE_WAKEUPS)) |
| 1123 | return 0; | 1164 | return 0; |
| 1124 | 1165 | ||
| 1125 | if (!sync && sched_feat(SYNC_WAKEUPS) && | 1166 | if (sync && (curr->se.avg_overlap > sysctl_sched_migration_cost || |
| 1126 | curr->se.avg_overlap < sysctl_sched_migration_cost && | 1167 | p->se.avg_overlap > sysctl_sched_migration_cost)) |
| 1127 | p->se.avg_overlap < sysctl_sched_migration_cost) | 1168 | sync = 0; |
| 1128 | sync = 1; | ||
| 1129 | 1169 | ||
| 1130 | /* | 1170 | /* |
| 1131 | * If sync wakeup then subtract the (maximum possible) | 1171 | * If sync wakeup then subtract the (maximum possible) |
| @@ -1244,33 +1284,88 @@ static unsigned long wakeup_gran(struct sched_entity *se) | |||
| 1244 | * More easily preempt - nice tasks, while not making it harder for | 1284 | * More easily preempt - nice tasks, while not making it harder for |
| 1245 | * + nice tasks. | 1285 | * + nice tasks. |
| 1246 | */ | 1286 | */ |
| 1247 | if (sched_feat(ASYM_GRAN)) | 1287 | if (!sched_feat(ASYM_GRAN) || se->load.weight > NICE_0_LOAD) |
| 1248 | gran = calc_delta_mine(gran, NICE_0_LOAD, &se->load); | 1288 | gran = calc_delta_fair(sysctl_sched_wakeup_granularity, se); |
| 1249 | 1289 | ||
| 1250 | return gran; | 1290 | return gran; |
| 1251 | } | 1291 | } |
| 1252 | 1292 | ||
| 1253 | /* | 1293 | /* |
| 1294 | * Should 'se' preempt 'curr'. | ||
| 1295 | * | ||
| 1296 | * |s1 | ||
| 1297 | * |s2 | ||
| 1298 | * |s3 | ||
| 1299 | * g | ||
| 1300 | * |<--->|c | ||
| 1301 | * | ||
| 1302 | * w(c, s1) = -1 | ||
| 1303 | * w(c, s2) = 0 | ||
| 1304 | * w(c, s3) = 1 | ||
| 1305 | * | ||
| 1306 | */ | ||
| 1307 | static int | ||
| 1308 | wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se) | ||
| 1309 | { | ||
| 1310 | s64 gran, vdiff = curr->vruntime - se->vruntime; | ||
| 1311 | |||
| 1312 | if (vdiff <= 0) | ||
| 1313 | return -1; | ||
| 1314 | |||
| 1315 | gran = wakeup_gran(curr); | ||
| 1316 | if (vdiff > gran) | ||
| 1317 | return 1; | ||
| 1318 | |||
| 1319 | return 0; | ||
| 1320 | } | ||
| 1321 | |||
| 1322 | static void set_last_buddy(struct sched_entity *se) | ||
| 1323 | { | ||
| 1324 | for_each_sched_entity(se) | ||
| 1325 | cfs_rq_of(se)->last = se; | ||
| 1326 | } | ||
| 1327 | |||
| 1328 | static void set_next_buddy(struct sched_entity *se) | ||
| 1329 | { | ||
| 1330 | for_each_sched_entity(se) | ||
| 1331 | cfs_rq_of(se)->next = se; | ||
| 1332 | } | ||
| 1333 | |||
| 1334 | /* | ||
| 1254 | * Preempt the current task with a newly woken task if needed: | 1335 | * Preempt the current task with a newly woken task if needed: |
| 1255 | */ | 1336 | */ |
| 1256 | static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) | 1337 | static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) |
| 1257 | { | 1338 | { |
| 1258 | struct task_struct *curr = rq->curr; | 1339 | struct task_struct *curr = rq->curr; |
| 1259 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); | ||
| 1260 | struct sched_entity *se = &curr->se, *pse = &p->se; | 1340 | struct sched_entity *se = &curr->se, *pse = &p->se; |
| 1261 | s64 delta_exec; | ||
| 1262 | 1341 | ||
| 1263 | if (unlikely(rt_prio(p->prio))) { | 1342 | if (unlikely(rt_prio(p->prio))) { |
| 1343 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); | ||
| 1344 | |||
| 1264 | update_rq_clock(rq); | 1345 | update_rq_clock(rq); |
| 1265 | update_curr(cfs_rq); | 1346 | update_curr(cfs_rq); |
| 1266 | resched_task(curr); | 1347 | resched_task(curr); |
| 1267 | return; | 1348 | return; |
| 1268 | } | 1349 | } |
| 1269 | 1350 | ||
| 1351 | if (unlikely(p->sched_class != &fair_sched_class)) | ||
| 1352 | return; | ||
| 1353 | |||
| 1270 | if (unlikely(se == pse)) | 1354 | if (unlikely(se == pse)) |
| 1271 | return; | 1355 | return; |
| 1272 | 1356 | ||
| 1273 | cfs_rq_of(pse)->next = pse; | 1357 | /* |
| 1358 | * Only set the backward buddy when the current task is still on the | ||
| 1359 | * rq. This can happen when a wakeup gets interleaved with schedule on | ||
| 1360 | * the ->pre_schedule() or idle_balance() point, either of which can | ||
| 1361 | * drop the rq lock. | ||
| 1362 | * | ||
| 1363 | * Also, during early boot the idle thread is in the fair class, for | ||
| 1364 | * obvious reasons its a bad idea to schedule back to the idle thread. | ||
| 1365 | */ | ||
| 1366 | if (sched_feat(LAST_BUDDY) && likely(se->on_rq && curr != rq->idle)) | ||
| 1367 | set_last_buddy(se); | ||
| 1368 | set_next_buddy(pse); | ||
| 1274 | 1369 | ||
| 1275 | /* | 1370 | /* |
| 1276 | * We can come here with TIF_NEED_RESCHED already set from new task | 1371 | * We can come here with TIF_NEED_RESCHED already set from new task |
| @@ -1296,9 +1391,19 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int sync) | |||
| 1296 | return; | 1391 | return; |
| 1297 | } | 1392 | } |
| 1298 | 1393 | ||
| 1299 | delta_exec = se->sum_exec_runtime - se->prev_sum_exec_runtime; | 1394 | find_matching_se(&se, &pse); |
| 1300 | if (delta_exec > wakeup_gran(pse)) | 1395 | |
| 1301 | resched_task(curr); | 1396 | while (se) { |
| 1397 | BUG_ON(!pse); | ||
| 1398 | |||
| 1399 | if (wakeup_preempt_entity(se, pse) == 1) { | ||
| 1400 | resched_task(curr); | ||
| 1401 | break; | ||
| 1402 | } | ||
| 1403 | |||
| 1404 | se = parent_entity(se); | ||
| 1405 | pse = parent_entity(pse); | ||
| 1406 | } | ||
| 1302 | } | 1407 | } |
| 1303 | 1408 | ||
| 1304 | static struct task_struct *pick_next_task_fair(struct rq *rq) | 1409 | static struct task_struct *pick_next_task_fair(struct rq *rq) |
| @@ -1312,6 +1417,7 @@ static struct task_struct *pick_next_task_fair(struct rq *rq) | |||
| 1312 | 1417 | ||
| 1313 | do { | 1418 | do { |
| 1314 | se = pick_next_entity(cfs_rq); | 1419 | se = pick_next_entity(cfs_rq); |
| 1420 | set_next_entity(cfs_rq, se); | ||
| 1315 | cfs_rq = group_cfs_rq(se); | 1421 | cfs_rq = group_cfs_rq(se); |
| 1316 | } while (cfs_rq); | 1422 | } while (cfs_rq); |
| 1317 | 1423 | ||
| @@ -1594,9 +1700,6 @@ static const struct sched_class fair_sched_class = { | |||
| 1594 | .enqueue_task = enqueue_task_fair, | 1700 | .enqueue_task = enqueue_task_fair, |
| 1595 | .dequeue_task = dequeue_task_fair, | 1701 | .dequeue_task = dequeue_task_fair, |
| 1596 | .yield_task = yield_task_fair, | 1702 | .yield_task = yield_task_fair, |
| 1597 | #ifdef CONFIG_SMP | ||
| 1598 | .select_task_rq = select_task_rq_fair, | ||
| 1599 | #endif /* CONFIG_SMP */ | ||
| 1600 | 1703 | ||
| 1601 | .check_preempt_curr = check_preempt_wakeup, | 1704 | .check_preempt_curr = check_preempt_wakeup, |
| 1602 | 1705 | ||
| @@ -1604,6 +1707,8 @@ static const struct sched_class fair_sched_class = { | |||
| 1604 | .put_prev_task = put_prev_task_fair, | 1707 | .put_prev_task = put_prev_task_fair, |
| 1605 | 1708 | ||
| 1606 | #ifdef CONFIG_SMP | 1709 | #ifdef CONFIG_SMP |
| 1710 | .select_task_rq = select_task_rq_fair, | ||
| 1711 | |||
| 1607 | .load_balance = load_balance_fair, | 1712 | .load_balance = load_balance_fair, |
| 1608 | .move_one_task = move_one_task_fair, | 1713 | .move_one_task = move_one_task_fair, |
| 1609 | #endif | 1714 | #endif |
