diff options
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r-- | kernel/sched_fair.c | 397 |
1 files changed, 230 insertions, 167 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 0c26e2df450..3f7ec9e27ee 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -69,14 +69,6 @@ static unsigned int sched_nr_latency = 8; | |||
69 | unsigned int sysctl_sched_child_runs_first __read_mostly; | 69 | unsigned int sysctl_sched_child_runs_first __read_mostly; |
70 | 70 | ||
71 | /* | 71 | /* |
72 | * sys_sched_yield() compat mode | ||
73 | * | ||
74 | * This option switches the agressive yield implementation of the | ||
75 | * old scheduler back on. | ||
76 | */ | ||
77 | unsigned int __read_mostly sysctl_sched_compat_yield; | ||
78 | |||
79 | /* | ||
80 | * SCHED_OTHER wake-up granularity. | 72 | * SCHED_OTHER wake-up granularity. |
81 | * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) | 73 | * (default: 1 msec * (1 + ilog(ncpus)), units: nanoseconds) |
82 | * | 74 | * |
@@ -419,7 +411,7 @@ static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
419 | rb_erase(&se->run_node, &cfs_rq->tasks_timeline); | 411 | rb_erase(&se->run_node, &cfs_rq->tasks_timeline); |
420 | } | 412 | } |
421 | 413 | ||
422 | static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) | 414 | static struct sched_entity *__pick_first_entity(struct cfs_rq *cfs_rq) |
423 | { | 415 | { |
424 | struct rb_node *left = cfs_rq->rb_leftmost; | 416 | struct rb_node *left = cfs_rq->rb_leftmost; |
425 | 417 | ||
@@ -429,6 +421,17 @@ static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) | |||
429 | return rb_entry(left, struct sched_entity, run_node); | 421 | return rb_entry(left, struct sched_entity, run_node); |
430 | } | 422 | } |
431 | 423 | ||
424 | static struct sched_entity *__pick_next_entity(struct sched_entity *se) | ||
425 | { | ||
426 | struct rb_node *next = rb_next(&se->run_node); | ||
427 | |||
428 | if (!next) | ||
429 | return NULL; | ||
430 | |||
431 | return rb_entry(next, struct sched_entity, run_node); | ||
432 | } | ||
433 | |||
434 | #ifdef CONFIG_SCHED_DEBUG | ||
432 | static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) | 435 | static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) |
433 | { | 436 | { |
434 | struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); | 437 | struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); |
@@ -443,7 +446,6 @@ static struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) | |||
443 | * Scheduling class statistics methods: | 446 | * Scheduling class statistics methods: |
444 | */ | 447 | */ |
445 | 448 | ||
446 | #ifdef CONFIG_SCHED_DEBUG | ||
447 | int sched_proc_update_handler(struct ctl_table *table, int write, | 449 | int sched_proc_update_handler(struct ctl_table *table, int write, |
448 | void __user *buffer, size_t *lenp, | 450 | void __user *buffer, size_t *lenp, |
449 | loff_t *ppos) | 451 | loff_t *ppos) |
@@ -540,7 +542,7 @@ static u64 sched_vslice(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
540 | } | 542 | } |
541 | 543 | ||
542 | static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update); | 544 | static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update); |
543 | static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta); | 545 | static void update_cfs_shares(struct cfs_rq *cfs_rq); |
544 | 546 | ||
545 | /* | 547 | /* |
546 | * Update the current task's runtime statistics. Skip current tasks that | 548 | * Update the current task's runtime statistics. Skip current tasks that |
@@ -733,6 +735,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | |||
733 | now - cfs_rq->load_last > 4 * period) { | 735 | now - cfs_rq->load_last > 4 * period) { |
734 | cfs_rq->load_period = 0; | 736 | cfs_rq->load_period = 0; |
735 | cfs_rq->load_avg = 0; | 737 | cfs_rq->load_avg = 0; |
738 | delta = period - 1; | ||
736 | } | 739 | } |
737 | 740 | ||
738 | cfs_rq->load_stamp = now; | 741 | cfs_rq->load_stamp = now; |
@@ -763,16 +766,15 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | |||
763 | list_del_leaf_cfs_rq(cfs_rq); | 766 | list_del_leaf_cfs_rq(cfs_rq); |
764 | } | 767 | } |
765 | 768 | ||
766 | static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg, | 769 | static long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) |
767 | long weight_delta) | ||
768 | { | 770 | { |
769 | long load_weight, load, shares; | 771 | long load_weight, load, shares; |
770 | 772 | ||
771 | load = cfs_rq->load.weight + weight_delta; | 773 | load = cfs_rq->load.weight; |
772 | 774 | ||
773 | load_weight = atomic_read(&tg->load_weight); | 775 | load_weight = atomic_read(&tg->load_weight); |
774 | load_weight -= cfs_rq->load_contribution; | ||
775 | load_weight += load; | 776 | load_weight += load; |
777 | load_weight -= cfs_rq->load_contribution; | ||
776 | 778 | ||
777 | shares = (tg->shares * load); | 779 | shares = (tg->shares * load); |
778 | if (load_weight) | 780 | if (load_weight) |
@@ -790,7 +792,7 @@ static void update_entity_shares_tick(struct cfs_rq *cfs_rq) | |||
790 | { | 792 | { |
791 | if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) { | 793 | if (cfs_rq->load_unacc_exec_time > sysctl_sched_shares_window) { |
792 | update_cfs_load(cfs_rq, 0); | 794 | update_cfs_load(cfs_rq, 0); |
793 | update_cfs_shares(cfs_rq, 0); | 795 | update_cfs_shares(cfs_rq); |
794 | } | 796 | } |
795 | } | 797 | } |
796 | # else /* CONFIG_SMP */ | 798 | # else /* CONFIG_SMP */ |
@@ -798,8 +800,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | |||
798 | { | 800 | { |
799 | } | 801 | } |
800 | 802 | ||
801 | static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg, | 803 | static inline long calc_cfs_shares(struct cfs_rq *cfs_rq, struct task_group *tg) |
802 | long weight_delta) | ||
803 | { | 804 | { |
804 | return tg->shares; | 805 | return tg->shares; |
805 | } | 806 | } |
@@ -824,7 +825,7 @@ static void reweight_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, | |||
824 | account_entity_enqueue(cfs_rq, se); | 825 | account_entity_enqueue(cfs_rq, se); |
825 | } | 826 | } |
826 | 827 | ||
827 | static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) | 828 | static void update_cfs_shares(struct cfs_rq *cfs_rq) |
828 | { | 829 | { |
829 | struct task_group *tg; | 830 | struct task_group *tg; |
830 | struct sched_entity *se; | 831 | struct sched_entity *se; |
@@ -838,7 +839,7 @@ static void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) | |||
838 | if (likely(se->load.weight == tg->shares)) | 839 | if (likely(se->load.weight == tg->shares)) |
839 | return; | 840 | return; |
840 | #endif | 841 | #endif |
841 | shares = calc_cfs_shares(cfs_rq, tg, weight_delta); | 842 | shares = calc_cfs_shares(cfs_rq, tg); |
842 | 843 | ||
843 | reweight_entity(cfs_rq_of(se), se, shares); | 844 | reweight_entity(cfs_rq_of(se), se, shares); |
844 | } | 845 | } |
@@ -847,7 +848,7 @@ static void update_cfs_load(struct cfs_rq *cfs_rq, int global_update) | |||
847 | { | 848 | { |
848 | } | 849 | } |
849 | 850 | ||
850 | static inline void update_cfs_shares(struct cfs_rq *cfs_rq, long weight_delta) | 851 | static inline void update_cfs_shares(struct cfs_rq *cfs_rq) |
851 | { | 852 | { |
852 | } | 853 | } |
853 | 854 | ||
@@ -978,8 +979,8 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | |||
978 | */ | 979 | */ |
979 | update_curr(cfs_rq); | 980 | update_curr(cfs_rq); |
980 | update_cfs_load(cfs_rq, 0); | 981 | update_cfs_load(cfs_rq, 0); |
981 | update_cfs_shares(cfs_rq, se->load.weight); | ||
982 | account_entity_enqueue(cfs_rq, se); | 982 | account_entity_enqueue(cfs_rq, se); |
983 | update_cfs_shares(cfs_rq); | ||
983 | 984 | ||
984 | if (flags & ENQUEUE_WAKEUP) { | 985 | if (flags & ENQUEUE_WAKEUP) { |
985 | place_entity(cfs_rq, se, 0); | 986 | place_entity(cfs_rq, se, 0); |
@@ -996,19 +997,49 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | |||
996 | list_add_leaf_cfs_rq(cfs_rq); | 997 | list_add_leaf_cfs_rq(cfs_rq); |
997 | } | 998 | } |
998 | 999 | ||
999 | static void __clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) | 1000 | static void __clear_buddies_last(struct sched_entity *se) |
1001 | { | ||
1002 | for_each_sched_entity(se) { | ||
1003 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | ||
1004 | if (cfs_rq->last == se) | ||
1005 | cfs_rq->last = NULL; | ||
1006 | else | ||
1007 | break; | ||
1008 | } | ||
1009 | } | ||
1010 | |||
1011 | static void __clear_buddies_next(struct sched_entity *se) | ||
1000 | { | 1012 | { |
1001 | if (!se || cfs_rq->last == se) | 1013 | for_each_sched_entity(se) { |
1002 | cfs_rq->last = NULL; | 1014 | struct cfs_rq *cfs_rq = cfs_rq_of(se); |
1015 | if (cfs_rq->next == se) | ||
1016 | cfs_rq->next = NULL; | ||
1017 | else | ||
1018 | break; | ||
1019 | } | ||
1020 | } | ||
1003 | 1021 | ||
1004 | if (!se || cfs_rq->next == se) | 1022 | static void __clear_buddies_skip(struct sched_entity *se) |
1005 | cfs_rq->next = NULL; | 1023 | { |
1024 | for_each_sched_entity(se) { | ||
1025 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | ||
1026 | if (cfs_rq->skip == se) | ||
1027 | cfs_rq->skip = NULL; | ||
1028 | else | ||
1029 | break; | ||
1030 | } | ||
1006 | } | 1031 | } |
1007 | 1032 | ||
1008 | static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) | 1033 | static void clear_buddies(struct cfs_rq *cfs_rq, struct sched_entity *se) |
1009 | { | 1034 | { |
1010 | for_each_sched_entity(se) | 1035 | if (cfs_rq->last == se) |
1011 | __clear_buddies(cfs_rq_of(se), se); | 1036 | __clear_buddies_last(se); |
1037 | |||
1038 | if (cfs_rq->next == se) | ||
1039 | __clear_buddies_next(se); | ||
1040 | |||
1041 | if (cfs_rq->skip == se) | ||
1042 | __clear_buddies_skip(se); | ||
1012 | } | 1043 | } |
1013 | 1044 | ||
1014 | static void | 1045 | static void |
@@ -1041,7 +1072,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags) | |||
1041 | update_cfs_load(cfs_rq, 0); | 1072 | update_cfs_load(cfs_rq, 0); |
1042 | account_entity_dequeue(cfs_rq, se); | 1073 | account_entity_dequeue(cfs_rq, se); |
1043 | update_min_vruntime(cfs_rq); | 1074 | update_min_vruntime(cfs_rq); |
1044 | update_cfs_shares(cfs_rq, 0); | 1075 | update_cfs_shares(cfs_rq); |
1045 | 1076 | ||
1046 | /* | 1077 | /* |
1047 | * Normalize the entity after updating the min_vruntime because the | 1078 | * Normalize the entity after updating the min_vruntime because the |
@@ -1084,7 +1115,7 @@ check_preempt_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr) | |||
1084 | return; | 1115 | return; |
1085 | 1116 | ||
1086 | if (cfs_rq->nr_running > 1) { | 1117 | if (cfs_rq->nr_running > 1) { |
1087 | struct sched_entity *se = __pick_next_entity(cfs_rq); | 1118 | struct sched_entity *se = __pick_first_entity(cfs_rq); |
1088 | s64 delta = curr->vruntime - se->vruntime; | 1119 | s64 delta = curr->vruntime - se->vruntime; |
1089 | 1120 | ||
1090 | if (delta < 0) | 1121 | if (delta < 0) |
@@ -1128,13 +1159,27 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
1128 | static int | 1159 | static int |
1129 | wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); | 1160 | wakeup_preempt_entity(struct sched_entity *curr, struct sched_entity *se); |
1130 | 1161 | ||
1162 | /* | ||
1163 | * Pick the next process, keeping these things in mind, in this order: | ||
1164 | * 1) keep things fair between processes/task groups | ||
1165 | * 2) pick the "next" process, since someone really wants that to run | ||
1166 | * 3) pick the "last" process, for cache locality | ||
1167 | * 4) do not run the "skip" process, if something else is available | ||
1168 | */ | ||
1131 | static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) | 1169 | static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) |
1132 | { | 1170 | { |
1133 | struct sched_entity *se = __pick_next_entity(cfs_rq); | 1171 | struct sched_entity *se = __pick_first_entity(cfs_rq); |
1134 | struct sched_entity *left = se; | 1172 | struct sched_entity *left = se; |
1135 | 1173 | ||
1136 | if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1) | 1174 | /* |
1137 | se = cfs_rq->next; | 1175 | * Avoid running the skip buddy, if running something else can |
1176 | * be done without getting too unfair. | ||
1177 | */ | ||
1178 | if (cfs_rq->skip == se) { | ||
1179 | struct sched_entity *second = __pick_next_entity(se); | ||
1180 | if (second && wakeup_preempt_entity(second, left) < 1) | ||
1181 | se = second; | ||
1182 | } | ||
1138 | 1183 | ||
1139 | /* | 1184 | /* |
1140 | * Prefer last buddy, try to return the CPU to a preempted task. | 1185 | * Prefer last buddy, try to return the CPU to a preempted task. |
@@ -1142,6 +1187,12 @@ static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) | |||
1142 | if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1) | 1187 | if (cfs_rq->last && wakeup_preempt_entity(cfs_rq->last, left) < 1) |
1143 | se = cfs_rq->last; | 1188 | se = cfs_rq->last; |
1144 | 1189 | ||
1190 | /* | ||
1191 | * Someone really wants this to run. If it's not unfair, run it. | ||
1192 | */ | ||
1193 | if (cfs_rq->next && wakeup_preempt_entity(cfs_rq->next, left) < 1) | ||
1194 | se = cfs_rq->next; | ||
1195 | |||
1145 | clear_buddies(cfs_rq, se); | 1196 | clear_buddies(cfs_rq, se); |
1146 | 1197 | ||
1147 | return se; | 1198 | return se; |
@@ -1282,7 +1333,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags) | |||
1282 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | 1333 | struct cfs_rq *cfs_rq = cfs_rq_of(se); |
1283 | 1334 | ||
1284 | update_cfs_load(cfs_rq, 0); | 1335 | update_cfs_load(cfs_rq, 0); |
1285 | update_cfs_shares(cfs_rq, 0); | 1336 | update_cfs_shares(cfs_rq); |
1286 | } | 1337 | } |
1287 | 1338 | ||
1288 | hrtick_update(rq); | 1339 | hrtick_update(rq); |
@@ -1312,58 +1363,12 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags) | |||
1312 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | 1363 | struct cfs_rq *cfs_rq = cfs_rq_of(se); |
1313 | 1364 | ||
1314 | update_cfs_load(cfs_rq, 0); | 1365 | update_cfs_load(cfs_rq, 0); |
1315 | update_cfs_shares(cfs_rq, 0); | 1366 | update_cfs_shares(cfs_rq); |
1316 | } | 1367 | } |
1317 | 1368 | ||
1318 | hrtick_update(rq); | 1369 | hrtick_update(rq); |
1319 | } | 1370 | } |
1320 | 1371 | ||
1321 | /* | ||
1322 | * sched_yield() support is very simple - we dequeue and enqueue. | ||
1323 | * | ||
1324 | * If compat_yield is turned on then we requeue to the end of the tree. | ||
1325 | */ | ||
1326 | static void yield_task_fair(struct rq *rq) | ||
1327 | { | ||
1328 | struct task_struct *curr = rq->curr; | ||
1329 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); | ||
1330 | struct sched_entity *rightmost, *se = &curr->se; | ||
1331 | |||
1332 | /* | ||
1333 | * Are we the only task in the tree? | ||
1334 | */ | ||
1335 | if (unlikely(cfs_rq->nr_running == 1)) | ||
1336 | return; | ||
1337 | |||
1338 | clear_buddies(cfs_rq, se); | ||
1339 | |||
1340 | if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) { | ||
1341 | update_rq_clock(rq); | ||
1342 | /* | ||
1343 | * Update run-time statistics of the 'current'. | ||
1344 | */ | ||
1345 | update_curr(cfs_rq); | ||
1346 | |||
1347 | return; | ||
1348 | } | ||
1349 | /* | ||
1350 | * Find the rightmost entry in the rbtree: | ||
1351 | */ | ||
1352 | rightmost = __pick_last_entity(cfs_rq); | ||
1353 | /* | ||
1354 | * Already in the rightmost position? | ||
1355 | */ | ||
1356 | if (unlikely(!rightmost || entity_before(rightmost, se))) | ||
1357 | return; | ||
1358 | |||
1359 | /* | ||
1360 | * Minimally necessary key value to be last in the tree: | ||
1361 | * Upon rescheduling, sched_class::put_prev_task() will place | ||
1362 | * 'current' within the tree based on its new key value. | ||
1363 | */ | ||
1364 | se->vruntime = rightmost->vruntime + 1; | ||
1365 | } | ||
1366 | |||
1367 | #ifdef CONFIG_SMP | 1372 | #ifdef CONFIG_SMP |
1368 | 1373 | ||
1369 | static void task_waking_fair(struct rq *rq, struct task_struct *p) | 1374 | static void task_waking_fair(struct rq *rq, struct task_struct *p) |
@@ -1834,6 +1839,14 @@ static void set_next_buddy(struct sched_entity *se) | |||
1834 | } | 1839 | } |
1835 | } | 1840 | } |
1836 | 1841 | ||
1842 | static void set_skip_buddy(struct sched_entity *se) | ||
1843 | { | ||
1844 | if (likely(task_of(se)->policy != SCHED_IDLE)) { | ||
1845 | for_each_sched_entity(se) | ||
1846 | cfs_rq_of(se)->skip = se; | ||
1847 | } | ||
1848 | } | ||
1849 | |||
1837 | /* | 1850 | /* |
1838 | * Preempt the current task with a newly woken task if needed: | 1851 | * Preempt the current task with a newly woken task if needed: |
1839 | */ | 1852 | */ |
@@ -1857,16 +1870,18 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_ | |||
1857 | if (test_tsk_need_resched(curr)) | 1870 | if (test_tsk_need_resched(curr)) |
1858 | return; | 1871 | return; |
1859 | 1872 | ||
1873 | /* Idle tasks are by definition preempted by non-idle tasks. */ | ||
1874 | if (unlikely(curr->policy == SCHED_IDLE) && | ||
1875 | likely(p->policy != SCHED_IDLE)) | ||
1876 | goto preempt; | ||
1877 | |||
1860 | /* | 1878 | /* |
1861 | * Batch and idle tasks do not preempt (their preemption is driven by | 1879 | * Batch and idle tasks do not preempt non-idle tasks (their preemption |
1862 | * the tick): | 1880 | * is driven by the tick): |
1863 | */ | 1881 | */ |
1864 | if (unlikely(p->policy != SCHED_NORMAL)) | 1882 | if (unlikely(p->policy != SCHED_NORMAL)) |
1865 | return; | 1883 | return; |
1866 | 1884 | ||
1867 | /* Idle tasks are by definition preempted by everybody. */ | ||
1868 | if (unlikely(curr->policy == SCHED_IDLE)) | ||
1869 | goto preempt; | ||
1870 | 1885 | ||
1871 | if (!sched_feat(WAKEUP_PREEMPT)) | 1886 | if (!sched_feat(WAKEUP_PREEMPT)) |
1872 | return; | 1887 | return; |
@@ -1932,6 +1947,51 @@ static void put_prev_task_fair(struct rq *rq, struct task_struct *prev) | |||
1932 | } | 1947 | } |
1933 | } | 1948 | } |
1934 | 1949 | ||
1950 | /* | ||
1951 | * sched_yield() is very simple | ||
1952 | * | ||
1953 | * The magic of dealing with the ->skip buddy is in pick_next_entity. | ||
1954 | */ | ||
1955 | static void yield_task_fair(struct rq *rq) | ||
1956 | { | ||
1957 | struct task_struct *curr = rq->curr; | ||
1958 | struct cfs_rq *cfs_rq = task_cfs_rq(curr); | ||
1959 | struct sched_entity *se = &curr->se; | ||
1960 | |||
1961 | /* | ||
1962 | * Are we the only task in the tree? | ||
1963 | */ | ||
1964 | if (unlikely(rq->nr_running == 1)) | ||
1965 | return; | ||
1966 | |||
1967 | clear_buddies(cfs_rq, se); | ||
1968 | |||
1969 | if (curr->policy != SCHED_BATCH) { | ||
1970 | update_rq_clock(rq); | ||
1971 | /* | ||
1972 | * Update run-time statistics of the 'current'. | ||
1973 | */ | ||
1974 | update_curr(cfs_rq); | ||
1975 | } | ||
1976 | |||
1977 | set_skip_buddy(se); | ||
1978 | } | ||
1979 | |||
1980 | static bool yield_to_task_fair(struct rq *rq, struct task_struct *p, bool preempt) | ||
1981 | { | ||
1982 | struct sched_entity *se = &p->se; | ||
1983 | |||
1984 | if (!se->on_rq) | ||
1985 | return false; | ||
1986 | |||
1987 | /* Tell the scheduler that we'd really like pse to run next. */ | ||
1988 | set_next_buddy(se); | ||
1989 | |||
1990 | yield_task_fair(rq); | ||
1991 | |||
1992 | return true; | ||
1993 | } | ||
1994 | |||
1935 | #ifdef CONFIG_SMP | 1995 | #ifdef CONFIG_SMP |
1936 | /************************************************** | 1996 | /************************************************** |
1937 | * Fair scheduling class load-balancing methods: | 1997 | * Fair scheduling class load-balancing methods: |
@@ -2123,7 +2183,7 @@ static int update_shares_cpu(struct task_group *tg, int cpu) | |||
2123 | * We need to update shares after updating tg->load_weight in | 2183 | * We need to update shares after updating tg->load_weight in |
2124 | * order to adjust the weight of groups with long running tasks. | 2184 | * order to adjust the weight of groups with long running tasks. |
2125 | */ | 2185 | */ |
2126 | update_cfs_shares(cfs_rq, 0); | 2186 | update_cfs_shares(cfs_rq); |
2127 | 2187 | ||
2128 | raw_spin_unlock_irqrestore(&rq->lock, flags); | 2188 | raw_spin_unlock_irqrestore(&rq->lock, flags); |
2129 | 2189 | ||
@@ -2610,7 +2670,6 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group) | |||
2610 | * @this_cpu: Cpu for which load balance is currently performed. | 2670 | * @this_cpu: Cpu for which load balance is currently performed. |
2611 | * @idle: Idle status of this_cpu | 2671 | * @idle: Idle status of this_cpu |
2612 | * @load_idx: Load index of sched_domain of this_cpu for load calc. | 2672 | * @load_idx: Load index of sched_domain of this_cpu for load calc. |
2613 | * @sd_idle: Idle status of the sched_domain containing group. | ||
2614 | * @local_group: Does group contain this_cpu. | 2673 | * @local_group: Does group contain this_cpu. |
2615 | * @cpus: Set of cpus considered for load balancing. | 2674 | * @cpus: Set of cpus considered for load balancing. |
2616 | * @balance: Should we balance. | 2675 | * @balance: Should we balance. |
@@ -2618,7 +2677,7 @@ fix_small_capacity(struct sched_domain *sd, struct sched_group *group) | |||
2618 | */ | 2677 | */ |
2619 | static inline void update_sg_lb_stats(struct sched_domain *sd, | 2678 | static inline void update_sg_lb_stats(struct sched_domain *sd, |
2620 | struct sched_group *group, int this_cpu, | 2679 | struct sched_group *group, int this_cpu, |
2621 | enum cpu_idle_type idle, int load_idx, int *sd_idle, | 2680 | enum cpu_idle_type idle, int load_idx, |
2622 | int local_group, const struct cpumask *cpus, | 2681 | int local_group, const struct cpumask *cpus, |
2623 | int *balance, struct sg_lb_stats *sgs) | 2682 | int *balance, struct sg_lb_stats *sgs) |
2624 | { | 2683 | { |
@@ -2638,9 +2697,6 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
2638 | for_each_cpu_and(i, sched_group_cpus(group), cpus) { | 2697 | for_each_cpu_and(i, sched_group_cpus(group), cpus) { |
2639 | struct rq *rq = cpu_rq(i); | 2698 | struct rq *rq = cpu_rq(i); |
2640 | 2699 | ||
2641 | if (*sd_idle && rq->nr_running) | ||
2642 | *sd_idle = 0; | ||
2643 | |||
2644 | /* Bias balancing toward cpus of our domain */ | 2700 | /* Bias balancing toward cpus of our domain */ |
2645 | if (local_group) { | 2701 | if (local_group) { |
2646 | if (idle_cpu(i) && !first_idle_cpu) { | 2702 | if (idle_cpu(i) && !first_idle_cpu) { |
@@ -2685,7 +2741,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
2685 | 2741 | ||
2686 | /* | 2742 | /* |
2687 | * Consider the group unbalanced when the imbalance is larger | 2743 | * Consider the group unbalanced when the imbalance is larger |
2688 | * than the average weight of two tasks. | 2744 | * than the average weight of a task. |
2689 | * | 2745 | * |
2690 | * APZ: with cgroup the avg task weight can vary wildly and | 2746 | * APZ: with cgroup the avg task weight can vary wildly and |
2691 | * might not be a suitable number - should we keep a | 2747 | * might not be a suitable number - should we keep a |
@@ -2695,7 +2751,7 @@ static inline void update_sg_lb_stats(struct sched_domain *sd, | |||
2695 | if (sgs->sum_nr_running) | 2751 | if (sgs->sum_nr_running) |
2696 | avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running; | 2752 | avg_load_per_task = sgs->sum_weighted_load / sgs->sum_nr_running; |
2697 | 2753 | ||
2698 | if ((max_cpu_load - min_cpu_load) > 2*avg_load_per_task && max_nr_running > 1) | 2754 | if ((max_cpu_load - min_cpu_load) >= avg_load_per_task && max_nr_running > 1) |
2699 | sgs->group_imb = 1; | 2755 | sgs->group_imb = 1; |
2700 | 2756 | ||
2701 | sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); | 2757 | sgs->group_capacity = DIV_ROUND_CLOSEST(group->cpu_power, SCHED_LOAD_SCALE); |
@@ -2755,15 +2811,13 @@ static bool update_sd_pick_busiest(struct sched_domain *sd, | |||
2755 | * @sd: sched_domain whose statistics are to be updated. | 2811 | * @sd: sched_domain whose statistics are to be updated. |
2756 | * @this_cpu: Cpu for which load balance is currently performed. | 2812 | * @this_cpu: Cpu for which load balance is currently performed. |
2757 | * @idle: Idle status of this_cpu | 2813 | * @idle: Idle status of this_cpu |
2758 | * @sd_idle: Idle status of the sched_domain containing sg. | ||
2759 | * @cpus: Set of cpus considered for load balancing. | 2814 | * @cpus: Set of cpus considered for load balancing. |
2760 | * @balance: Should we balance. | 2815 | * @balance: Should we balance. |
2761 | * @sds: variable to hold the statistics for this sched_domain. | 2816 | * @sds: variable to hold the statistics for this sched_domain. |
2762 | */ | 2817 | */ |
2763 | static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, | 2818 | static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, |
2764 | enum cpu_idle_type idle, int *sd_idle, | 2819 | enum cpu_idle_type idle, const struct cpumask *cpus, |
2765 | const struct cpumask *cpus, int *balance, | 2820 | int *balance, struct sd_lb_stats *sds) |
2766 | struct sd_lb_stats *sds) | ||
2767 | { | 2821 | { |
2768 | struct sched_domain *child = sd->child; | 2822 | struct sched_domain *child = sd->child; |
2769 | struct sched_group *sg = sd->groups; | 2823 | struct sched_group *sg = sd->groups; |
@@ -2781,7 +2835,7 @@ static inline void update_sd_lb_stats(struct sched_domain *sd, int this_cpu, | |||
2781 | 2835 | ||
2782 | local_group = cpumask_test_cpu(this_cpu, sched_group_cpus(sg)); | 2836 | local_group = cpumask_test_cpu(this_cpu, sched_group_cpus(sg)); |
2783 | memset(&sgs, 0, sizeof(sgs)); | 2837 | memset(&sgs, 0, sizeof(sgs)); |
2784 | update_sg_lb_stats(sd, sg, this_cpu, idle, load_idx, sd_idle, | 2838 | update_sg_lb_stats(sd, sg, this_cpu, idle, load_idx, |
2785 | local_group, cpus, balance, &sgs); | 2839 | local_group, cpus, balance, &sgs); |
2786 | 2840 | ||
2787 | if (local_group && !(*balance)) | 2841 | if (local_group && !(*balance)) |
@@ -3033,7 +3087,6 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu, | |||
3033 | * @imbalance: Variable which stores amount of weighted load which should | 3087 | * @imbalance: Variable which stores amount of weighted load which should |
3034 | * be moved to restore balance/put a group to idle. | 3088 | * be moved to restore balance/put a group to idle. |
3035 | * @idle: The idle status of this_cpu. | 3089 | * @idle: The idle status of this_cpu. |
3036 | * @sd_idle: The idleness of sd | ||
3037 | * @cpus: The set of CPUs under consideration for load-balancing. | 3090 | * @cpus: The set of CPUs under consideration for load-balancing. |
3038 | * @balance: Pointer to a variable indicating if this_cpu | 3091 | * @balance: Pointer to a variable indicating if this_cpu |
3039 | * is the appropriate cpu to perform load balancing at this_level. | 3092 | * is the appropriate cpu to perform load balancing at this_level. |
@@ -3046,7 +3099,7 @@ static inline void calculate_imbalance(struct sd_lb_stats *sds, int this_cpu, | |||
3046 | static struct sched_group * | 3099 | static struct sched_group * |
3047 | find_busiest_group(struct sched_domain *sd, int this_cpu, | 3100 | find_busiest_group(struct sched_domain *sd, int this_cpu, |
3048 | unsigned long *imbalance, enum cpu_idle_type idle, | 3101 | unsigned long *imbalance, enum cpu_idle_type idle, |
3049 | int *sd_idle, const struct cpumask *cpus, int *balance) | 3102 | const struct cpumask *cpus, int *balance) |
3050 | { | 3103 | { |
3051 | struct sd_lb_stats sds; | 3104 | struct sd_lb_stats sds; |
3052 | 3105 | ||
@@ -3056,22 +3109,11 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
3056 | * Compute the various statistics relavent for load balancing at | 3109 | * Compute the various statistics relavent for load balancing at |
3057 | * this level. | 3110 | * this level. |
3058 | */ | 3111 | */ |
3059 | update_sd_lb_stats(sd, this_cpu, idle, sd_idle, cpus, | 3112 | update_sd_lb_stats(sd, this_cpu, idle, cpus, balance, &sds); |
3060 | balance, &sds); | 3113 | |
3061 | 3114 | /* | |
3062 | /* Cases where imbalance does not exist from POV of this_cpu */ | 3115 | * this_cpu is not the appropriate cpu to perform load balancing at |
3063 | /* 1) this_cpu is not the appropriate cpu to perform load balancing | 3116 | * this level. |
3064 | * at this level. | ||
3065 | * 2) There is no busy sibling group to pull from. | ||
3066 | * 3) This group is the busiest group. | ||
3067 | * 4) This group is more busy than the avg busieness at this | ||
3068 | * sched_domain. | ||
3069 | * 5) The imbalance is within the specified limit. | ||
3070 | * | ||
3071 | * Note: when doing newidle balance, if the local group has excess | ||
3072 | * capacity (i.e. nr_running < group_capacity) and the busiest group | ||
3073 | * does not have any capacity, we force a load balance to pull tasks | ||
3074 | * to the local group. In this case, we skip past checks 3, 4 and 5. | ||
3075 | */ | 3117 | */ |
3076 | if (!(*balance)) | 3118 | if (!(*balance)) |
3077 | goto ret; | 3119 | goto ret; |
@@ -3080,41 +3122,55 @@ find_busiest_group(struct sched_domain *sd, int this_cpu, | |||
3080 | check_asym_packing(sd, &sds, this_cpu, imbalance)) | 3122 | check_asym_packing(sd, &sds, this_cpu, imbalance)) |
3081 | return sds.busiest; | 3123 | return sds.busiest; |
3082 | 3124 | ||
3125 | /* There is no busy sibling group to pull tasks from */ | ||
3083 | if (!sds.busiest || sds.busiest_nr_running == 0) | 3126 | if (!sds.busiest || sds.busiest_nr_running == 0) |
3084 | goto out_balanced; | 3127 | goto out_balanced; |
3085 | 3128 | ||
3086 | /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */ | 3129 | /* |
3130 | * If the busiest group is imbalanced the below checks don't | ||
3131 | * work because they assumes all things are equal, which typically | ||
3132 | * isn't true due to cpus_allowed constraints and the like. | ||
3133 | */ | ||
3134 | if (sds.group_imb) | ||
3135 | goto force_balance; | ||
3136 | |||
3137 | /* SD_BALANCE_NEWIDLE trumps SMP nice when underutilized */ | ||
3087 | if (idle == CPU_NEWLY_IDLE && sds.this_has_capacity && | 3138 | if (idle == CPU_NEWLY_IDLE && sds.this_has_capacity && |
3088 | !sds.busiest_has_capacity) | 3139 | !sds.busiest_has_capacity) |
3089 | goto force_balance; | 3140 | goto force_balance; |
3090 | 3141 | ||
3142 | /* | ||
3143 | * If the local group is more busy than the selected busiest group | ||
3144 | * don't try and pull any tasks. | ||
3145 | */ | ||
3091 | if (sds.this_load >= sds.max_load) | 3146 | if (sds.this_load >= sds.max_load) |
3092 | goto out_balanced; | 3147 | goto out_balanced; |
3093 | 3148 | ||
3149 | /* | ||
3150 | * Don't pull any tasks if this group is already above the domain | ||
3151 | * average load. | ||
3152 | */ | ||
3094 | sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr; | 3153 | sds.avg_load = (SCHED_LOAD_SCALE * sds.total_load) / sds.total_pwr; |
3095 | |||
3096 | if (sds.this_load >= sds.avg_load) | 3154 | if (sds.this_load >= sds.avg_load) |
3097 | goto out_balanced; | 3155 | goto out_balanced; |
3098 | 3156 | ||
3099 | /* | 3157 | if (idle == CPU_IDLE) { |
3100 | * In the CPU_NEWLY_IDLE, use imbalance_pct to be conservative. | ||
3101 | * And to check for busy balance use !idle_cpu instead of | ||
3102 | * CPU_NOT_IDLE. This is because HT siblings will use CPU_NOT_IDLE | ||
3103 | * even when they are idle. | ||
3104 | */ | ||
3105 | if (idle == CPU_NEWLY_IDLE || !idle_cpu(this_cpu)) { | ||
3106 | if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) | ||
3107 | goto out_balanced; | ||
3108 | } else { | ||
3109 | /* | 3158 | /* |
3110 | * This cpu is idle. If the busiest group load doesn't | 3159 | * This cpu is idle. If the busiest group load doesn't |
3111 | * have more tasks than the number of available cpu's and | 3160 | * have more tasks than the number of available cpu's and |
3112 | * there is no imbalance between this and busiest group | 3161 | * there is no imbalance between this and busiest group |
3113 | * wrt to idle cpu's, it is balanced. | 3162 | * wrt to idle cpu's, it is balanced. |
3114 | */ | 3163 | */ |
3115 | if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) && | 3164 | if ((sds.this_idle_cpus <= sds.busiest_idle_cpus + 1) && |
3116 | sds.busiest_nr_running <= sds.busiest_group_weight) | 3165 | sds.busiest_nr_running <= sds.busiest_group_weight) |
3117 | goto out_balanced; | 3166 | goto out_balanced; |
3167 | } else { | ||
3168 | /* | ||
3169 | * In the CPU_NEWLY_IDLE, CPU_NOT_IDLE cases, use | ||
3170 | * imbalance_pct to be conservative. | ||
3171 | */ | ||
3172 | if (100 * sds.max_load <= sd->imbalance_pct * sds.this_load) | ||
3173 | goto out_balanced; | ||
3118 | } | 3174 | } |
3119 | 3175 | ||
3120 | force_balance: | 3176 | force_balance: |
@@ -3193,7 +3249,7 @@ find_busiest_queue(struct sched_domain *sd, struct sched_group *group, | |||
3193 | /* Working cpumask for load_balance and load_balance_newidle. */ | 3249 | /* Working cpumask for load_balance and load_balance_newidle. */ |
3194 | static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask); | 3250 | static DEFINE_PER_CPU(cpumask_var_t, load_balance_tmpmask); |
3195 | 3251 | ||
3196 | static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle, | 3252 | static int need_active_balance(struct sched_domain *sd, int idle, |
3197 | int busiest_cpu, int this_cpu) | 3253 | int busiest_cpu, int this_cpu) |
3198 | { | 3254 | { |
3199 | if (idle == CPU_NEWLY_IDLE) { | 3255 | if (idle == CPU_NEWLY_IDLE) { |
@@ -3225,10 +3281,6 @@ static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle, | |||
3225 | * move_tasks() will succeed. ld_moved will be true and this | 3281 | * move_tasks() will succeed. ld_moved will be true and this |
3226 | * active balance code will not be triggered. | 3282 | * active balance code will not be triggered. |
3227 | */ | 3283 | */ |
3228 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && | ||
3229 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | ||
3230 | return 0; | ||
3231 | |||
3232 | if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP) | 3284 | if (sched_mc_power_savings < POWERSAVINGS_BALANCE_WAKEUP) |
3233 | return 0; | 3285 | return 0; |
3234 | } | 3286 | } |
@@ -3246,7 +3298,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
3246 | struct sched_domain *sd, enum cpu_idle_type idle, | 3298 | struct sched_domain *sd, enum cpu_idle_type idle, |
3247 | int *balance) | 3299 | int *balance) |
3248 | { | 3300 | { |
3249 | int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; | 3301 | int ld_moved, all_pinned = 0, active_balance = 0; |
3250 | struct sched_group *group; | 3302 | struct sched_group *group; |
3251 | unsigned long imbalance; | 3303 | unsigned long imbalance; |
3252 | struct rq *busiest; | 3304 | struct rq *busiest; |
@@ -3255,20 +3307,10 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
3255 | 3307 | ||
3256 | cpumask_copy(cpus, cpu_active_mask); | 3308 | cpumask_copy(cpus, cpu_active_mask); |
3257 | 3309 | ||
3258 | /* | ||
3259 | * When power savings policy is enabled for the parent domain, idle | ||
3260 | * sibling can pick up load irrespective of busy siblings. In this case, | ||
3261 | * let the state of idle sibling percolate up as CPU_IDLE, instead of | ||
3262 | * portraying it as CPU_NOT_IDLE. | ||
3263 | */ | ||
3264 | if (idle != CPU_NOT_IDLE && sd->flags & SD_SHARE_CPUPOWER && | ||
3265 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | ||
3266 | sd_idle = 1; | ||
3267 | |||
3268 | schedstat_inc(sd, lb_count[idle]); | 3310 | schedstat_inc(sd, lb_count[idle]); |
3269 | 3311 | ||
3270 | redo: | 3312 | redo: |
3271 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, &sd_idle, | 3313 | group = find_busiest_group(sd, this_cpu, &imbalance, idle, |
3272 | cpus, balance); | 3314 | cpus, balance); |
3273 | 3315 | ||
3274 | if (*balance == 0) | 3316 | if (*balance == 0) |
@@ -3330,8 +3372,7 @@ redo: | |||
3330 | if (idle != CPU_NEWLY_IDLE) | 3372 | if (idle != CPU_NEWLY_IDLE) |
3331 | sd->nr_balance_failed++; | 3373 | sd->nr_balance_failed++; |
3332 | 3374 | ||
3333 | if (need_active_balance(sd, sd_idle, idle, cpu_of(busiest), | 3375 | if (need_active_balance(sd, idle, cpu_of(busiest), this_cpu)) { |
3334 | this_cpu)) { | ||
3335 | raw_spin_lock_irqsave(&busiest->lock, flags); | 3376 | raw_spin_lock_irqsave(&busiest->lock, flags); |
3336 | 3377 | ||
3337 | /* don't kick the active_load_balance_cpu_stop, | 3378 | /* don't kick the active_load_balance_cpu_stop, |
@@ -3386,10 +3427,6 @@ redo: | |||
3386 | sd->balance_interval *= 2; | 3427 | sd->balance_interval *= 2; |
3387 | } | 3428 | } |
3388 | 3429 | ||
3389 | if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && | ||
3390 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | ||
3391 | ld_moved = -1; | ||
3392 | |||
3393 | goto out; | 3430 | goto out; |
3394 | 3431 | ||
3395 | out_balanced: | 3432 | out_balanced: |
@@ -3403,11 +3440,7 @@ out_one_pinned: | |||
3403 | (sd->balance_interval < sd->max_interval)) | 3440 | (sd->balance_interval < sd->max_interval)) |
3404 | sd->balance_interval *= 2; | 3441 | sd->balance_interval *= 2; |
3405 | 3442 | ||
3406 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && | 3443 | ld_moved = 0; |
3407 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | ||
3408 | ld_moved = -1; | ||
3409 | else | ||
3410 | ld_moved = 0; | ||
3411 | out: | 3444 | out: |
3412 | return ld_moved; | 3445 | return ld_moved; |
3413 | } | 3446 | } |
@@ -3831,8 +3864,7 @@ static void rebalance_domains(int cpu, enum cpu_idle_type idle) | |||
3831 | if (load_balance(cpu, rq, sd, idle, &balance)) { | 3864 | if (load_balance(cpu, rq, sd, idle, &balance)) { |
3832 | /* | 3865 | /* |
3833 | * We've pulled tasks over so either we're no | 3866 | * We've pulled tasks over so either we're no |
3834 | * longer idle, or one of our SMT siblings is | 3867 | * longer idle. |
3835 | * not idle. | ||
3836 | */ | 3868 | */ |
3837 | idle = CPU_NOT_IDLE; | 3869 | idle = CPU_NOT_IDLE; |
3838 | } | 3870 | } |
@@ -4079,33 +4111,62 @@ static void task_fork_fair(struct task_struct *p) | |||
4079 | * Priority of the task has changed. Check to see if we preempt | 4111 | * Priority of the task has changed. Check to see if we preempt |
4080 | * the current task. | 4112 | * the current task. |
4081 | */ | 4113 | */ |
4082 | static void prio_changed_fair(struct rq *rq, struct task_struct *p, | 4114 | static void |
4083 | int oldprio, int running) | 4115 | prio_changed_fair(struct rq *rq, struct task_struct *p, int oldprio) |
4084 | { | 4116 | { |
4117 | if (!p->se.on_rq) | ||
4118 | return; | ||
4119 | |||
4085 | /* | 4120 | /* |
4086 | * Reschedule if we are currently running on this runqueue and | 4121 | * Reschedule if we are currently running on this runqueue and |
4087 | * our priority decreased, or if we are not currently running on | 4122 | * our priority decreased, or if we are not currently running on |
4088 | * this runqueue and our priority is higher than the current's | 4123 | * this runqueue and our priority is higher than the current's |
4089 | */ | 4124 | */ |
4090 | if (running) { | 4125 | if (rq->curr == p) { |
4091 | if (p->prio > oldprio) | 4126 | if (p->prio > oldprio) |
4092 | resched_task(rq->curr); | 4127 | resched_task(rq->curr); |
4093 | } else | 4128 | } else |
4094 | check_preempt_curr(rq, p, 0); | 4129 | check_preempt_curr(rq, p, 0); |
4095 | } | 4130 | } |
4096 | 4131 | ||
4132 | static void switched_from_fair(struct rq *rq, struct task_struct *p) | ||
4133 | { | ||
4134 | struct sched_entity *se = &p->se; | ||
4135 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | ||
4136 | |||
4137 | /* | ||
4138 | * Ensure the task's vruntime is normalized, so that when its | ||
4139 | * switched back to the fair class the enqueue_entity(.flags=0) will | ||
4140 | * do the right thing. | ||
4141 | * | ||
4142 | * If it was on_rq, then the dequeue_entity(.flags=0) will already | ||
4143 | * have normalized the vruntime, if it was !on_rq, then only when | ||
4144 | * the task is sleeping will it still have non-normalized vruntime. | ||
4145 | */ | ||
4146 | if (!se->on_rq && p->state != TASK_RUNNING) { | ||
4147 | /* | ||
4148 | * Fix up our vruntime so that the current sleep doesn't | ||
4149 | * cause 'unlimited' sleep bonus. | ||
4150 | */ | ||
4151 | place_entity(cfs_rq, se, 0); | ||
4152 | se->vruntime -= cfs_rq->min_vruntime; | ||
4153 | } | ||
4154 | } | ||
4155 | |||
4097 | /* | 4156 | /* |
4098 | * We switched to the sched_fair class. | 4157 | * We switched to the sched_fair class. |
4099 | */ | 4158 | */ |
4100 | static void switched_to_fair(struct rq *rq, struct task_struct *p, | 4159 | static void switched_to_fair(struct rq *rq, struct task_struct *p) |
4101 | int running) | ||
4102 | { | 4160 | { |
4161 | if (!p->se.on_rq) | ||
4162 | return; | ||
4163 | |||
4103 | /* | 4164 | /* |
4104 | * We were most likely switched from sched_rt, so | 4165 | * We were most likely switched from sched_rt, so |
4105 | * kick off the schedule if running, otherwise just see | 4166 | * kick off the schedule if running, otherwise just see |
4106 | * if we can still preempt the current task. | 4167 | * if we can still preempt the current task. |
4107 | */ | 4168 | */ |
4108 | if (running) | 4169 | if (rq->curr == p) |
4109 | resched_task(rq->curr); | 4170 | resched_task(rq->curr); |
4110 | else | 4171 | else |
4111 | check_preempt_curr(rq, p, 0); | 4172 | check_preempt_curr(rq, p, 0); |
@@ -4171,6 +4232,7 @@ static const struct sched_class fair_sched_class = { | |||
4171 | .enqueue_task = enqueue_task_fair, | 4232 | .enqueue_task = enqueue_task_fair, |
4172 | .dequeue_task = dequeue_task_fair, | 4233 | .dequeue_task = dequeue_task_fair, |
4173 | .yield_task = yield_task_fair, | 4234 | .yield_task = yield_task_fair, |
4235 | .yield_to_task = yield_to_task_fair, | ||
4174 | 4236 | ||
4175 | .check_preempt_curr = check_preempt_wakeup, | 4237 | .check_preempt_curr = check_preempt_wakeup, |
4176 | 4238 | ||
@@ -4191,6 +4253,7 @@ static const struct sched_class fair_sched_class = { | |||
4191 | .task_fork = task_fork_fair, | 4253 | .task_fork = task_fork_fair, |
4192 | 4254 | ||
4193 | .prio_changed = prio_changed_fair, | 4255 | .prio_changed = prio_changed_fair, |
4256 | .switched_from = switched_from_fair, | ||
4194 | .switched_to = switched_to_fair, | 4257 | .switched_to = switched_to_fair, |
4195 | 4258 | ||
4196 | .get_rr_interval = get_rr_interval_fair, | 4259 | .get_rr_interval = get_rr_interval_fair, |