diff options
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r-- | kernel/sched_fair.c | 228 |
1 files changed, 113 insertions, 115 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 6c091d6e159d..f2cc59080efa 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -175,8 +175,15 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
175 | * Maintain a cache of leftmost tree entries (it is frequently | 175 | * Maintain a cache of leftmost tree entries (it is frequently |
176 | * used): | 176 | * used): |
177 | */ | 177 | */ |
178 | if (leftmost) | 178 | if (leftmost) { |
179 | cfs_rq->rb_leftmost = &se->run_node; | 179 | cfs_rq->rb_leftmost = &se->run_node; |
180 | /* | ||
181 | * maintain cfs_rq->min_vruntime to be a monotonic increasing | ||
182 | * value tracking the leftmost vruntime in the tree. | ||
183 | */ | ||
184 | cfs_rq->min_vruntime = | ||
185 | max_vruntime(cfs_rq->min_vruntime, se->vruntime); | ||
186 | } | ||
180 | 187 | ||
181 | rb_link_node(&se->run_node, parent, link); | 188 | rb_link_node(&se->run_node, parent, link); |
182 | rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline); | 189 | rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline); |
@@ -184,8 +191,24 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
184 | 191 | ||
185 | static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | 192 | static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) |
186 | { | 193 | { |
187 | if (cfs_rq->rb_leftmost == &se->run_node) | 194 | if (cfs_rq->rb_leftmost == &se->run_node) { |
188 | cfs_rq->rb_leftmost = rb_next(&se->run_node); | 195 | struct rb_node *next_node; |
196 | struct sched_entity *next; | ||
197 | |||
198 | next_node = rb_next(&se->run_node); | ||
199 | cfs_rq->rb_leftmost = next_node; | ||
200 | |||
201 | if (next_node) { | ||
202 | next = rb_entry(next_node, | ||
203 | struct sched_entity, run_node); | ||
204 | cfs_rq->min_vruntime = | ||
205 | max_vruntime(cfs_rq->min_vruntime, | ||
206 | next->vruntime); | ||
207 | } | ||
208 | } | ||
209 | |||
210 | if (cfs_rq->next == se) | ||
211 | cfs_rq->next = NULL; | ||
189 | 212 | ||
190 | rb_erase(&se->run_node, &cfs_rq->tasks_timeline); | 213 | rb_erase(&se->run_node, &cfs_rq->tasks_timeline); |
191 | } | 214 | } |
@@ -202,17 +225,12 @@ static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq) | |||
202 | 225 | ||
203 | static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) | 226 | static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) |
204 | { | 227 | { |
205 | struct rb_node **link = &cfs_rq->tasks_timeline.rb_node; | 228 | struct rb_node *last = rb_last(&cfs_rq->tasks_timeline); |
206 | struct sched_entity *se = NULL; | ||
207 | struct rb_node *parent; | ||
208 | 229 | ||
209 | while (*link) { | 230 | if (!last) |
210 | parent = *link; | 231 | return NULL; |
211 | se = rb_entry(parent, struct sched_entity, run_node); | ||
212 | link = &parent->rb_right; | ||
213 | } | ||
214 | 232 | ||
215 | return se; | 233 | return rb_entry(last, struct sched_entity, run_node); |
216 | } | 234 | } |
217 | 235 | ||
218 | /************************************************************** | 236 | /************************************************************** |
@@ -265,12 +283,8 @@ static u64 __sched_period(unsigned long nr_running) | |||
265 | */ | 283 | */ |
266 | static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) | 284 | static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) |
267 | { | 285 | { |
268 | u64 slice = __sched_period(cfs_rq->nr_running); | 286 | return calc_delta_mine(__sched_period(cfs_rq->nr_running), |
269 | 287 | se->load.weight, &cfs_rq->load); | |
270 | slice *= se->load.weight; | ||
271 | do_div(slice, cfs_rq->load.weight); | ||
272 | |||
273 | return slice; | ||
274 | } | 288 | } |
275 | 289 | ||
276 | /* | 290 | /* |
@@ -308,7 +322,6 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, | |||
308 | unsigned long delta_exec) | 322 | unsigned long delta_exec) |
309 | { | 323 | { |
310 | unsigned long delta_exec_weighted; | 324 | unsigned long delta_exec_weighted; |
311 | u64 vruntime; | ||
312 | 325 | ||
313 | schedstat_set(curr->exec_max, max((u64)delta_exec, curr->exec_max)); | 326 | schedstat_set(curr->exec_max, max((u64)delta_exec, curr->exec_max)); |
314 | 327 | ||
@@ -320,19 +333,6 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr, | |||
320 | &curr->load); | 333 | &curr->load); |
321 | } | 334 | } |
322 | curr->vruntime += delta_exec_weighted; | 335 | curr->vruntime += delta_exec_weighted; |
323 | |||
324 | /* | ||
325 | * maintain cfs_rq->min_vruntime to be a monotonic increasing | ||
326 | * value tracking the leftmost vruntime in the tree. | ||
327 | */ | ||
328 | if (first_fair(cfs_rq)) { | ||
329 | vruntime = min_vruntime(curr->vruntime, | ||
330 | __pick_next_entity(cfs_rq)->vruntime); | ||
331 | } else | ||
332 | vruntime = curr->vruntime; | ||
333 | |||
334 | cfs_rq->min_vruntime = | ||
335 | max_vruntime(cfs_rq->min_vruntime, vruntime); | ||
336 | } | 336 | } |
337 | 337 | ||
338 | static void update_curr(struct cfs_rq *cfs_rq) | 338 | static void update_curr(struct cfs_rq *cfs_rq) |
@@ -498,7 +498,11 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
498 | { | 498 | { |
499 | u64 vruntime; | 499 | u64 vruntime; |
500 | 500 | ||
501 | vruntime = cfs_rq->min_vruntime; | 501 | if (first_fair(cfs_rq)) { |
502 | vruntime = min_vruntime(cfs_rq->min_vruntime, | ||
503 | __pick_next_entity(cfs_rq)->vruntime); | ||
504 | } else | ||
505 | vruntime = cfs_rq->min_vruntime; | ||
502 | 506 | ||
503 | if (sched_feat(TREE_AVG)) { | 507 | if (sched_feat(TREE_AVG)) { |
504 | struct sched_entity *last = __pick_last_entity(cfs_rq); | 508 | struct sched_entity *last = __pick_last_entity(cfs_rq); |
@@ -520,8 +524,10 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial) | |||
520 | 524 | ||
521 | if (!initial) { | 525 | if (!initial) { |
522 | /* sleeps upto a single latency don't count. */ | 526 | /* sleeps upto a single latency don't count. */ |
523 | if (sched_feat(NEW_FAIR_SLEEPERS)) | 527 | if (sched_feat(NEW_FAIR_SLEEPERS)) { |
524 | vruntime -= sysctl_sched_latency; | 528 | vruntime -= calc_delta_fair(sysctl_sched_latency, |
529 | &cfs_rq->load); | ||
530 | } | ||
525 | 531 | ||
526 | /* ensure we never gain time by being placed backwards. */ | 532 | /* ensure we never gain time by being placed backwards. */ |
527 | vruntime = max_vruntime(se->vruntime, vruntime); | 533 | vruntime = max_vruntime(se->vruntime, vruntime); |
@@ -621,12 +627,32 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) | |||
621 | se->prev_sum_exec_runtime = se->sum_exec_runtime; | 627 | se->prev_sum_exec_runtime = se->sum_exec_runtime; |
622 | } | 628 | } |
623 | 629 | ||
630 | static struct sched_entity * | ||
631 | pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se) | ||
632 | { | ||
633 | s64 diff, gran; | ||
634 | |||
635 | if (!cfs_rq->next) | ||
636 | return se; | ||
637 | |||
638 | diff = cfs_rq->next->vruntime - se->vruntime; | ||
639 | if (diff < 0) | ||
640 | return se; | ||
641 | |||
642 | gran = calc_delta_fair(sysctl_sched_wakeup_granularity, &cfs_rq->load); | ||
643 | if (diff > gran) | ||
644 | return se; | ||
645 | |||
646 | return cfs_rq->next; | ||
647 | } | ||
648 | |||
624 | static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) | 649 | static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) |
625 | { | 650 | { |
626 | struct sched_entity *se = NULL; | 651 | struct sched_entity *se = NULL; |
627 | 652 | ||
628 | if (first_fair(cfs_rq)) { | 653 | if (first_fair(cfs_rq)) { |
629 | se = __pick_next_entity(cfs_rq); | 654 | se = __pick_next_entity(cfs_rq); |
655 | se = pick_next(cfs_rq, se); | ||
630 | set_next_entity(cfs_rq, se); | 656 | set_next_entity(cfs_rq, se); |
631 | } | 657 | } |
632 | 658 | ||
@@ -732,8 +758,6 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se) | |||
732 | return se->parent; | 758 | return se->parent; |
733 | } | 759 | } |
734 | 760 | ||
735 | #define GROUP_IMBALANCE_PCT 20 | ||
736 | |||
737 | #else /* CONFIG_FAIR_GROUP_SCHED */ | 761 | #else /* CONFIG_FAIR_GROUP_SCHED */ |
738 | 762 | ||
739 | #define for_each_sched_entity(se) \ | 763 | #define for_each_sched_entity(se) \ |
@@ -824,26 +848,15 @@ hrtick_start_fair(struct rq *rq, struct task_struct *p) | |||
824 | static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) | 848 | static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) |
825 | { | 849 | { |
826 | struct cfs_rq *cfs_rq; | 850 | struct cfs_rq *cfs_rq; |
827 | struct sched_entity *se = &p->se, | 851 | struct sched_entity *se = &p->se; |
828 | *topse = NULL; /* Highest schedulable entity */ | ||
829 | int incload = 1; | ||
830 | 852 | ||
831 | for_each_sched_entity(se) { | 853 | for_each_sched_entity(se) { |
832 | topse = se; | 854 | if (se->on_rq) |
833 | if (se->on_rq) { | ||
834 | incload = 0; | ||
835 | break; | 855 | break; |
836 | } | ||
837 | cfs_rq = cfs_rq_of(se); | 856 | cfs_rq = cfs_rq_of(se); |
838 | enqueue_entity(cfs_rq, se, wakeup); | 857 | enqueue_entity(cfs_rq, se, wakeup); |
839 | wakeup = 1; | 858 | wakeup = 1; |
840 | } | 859 | } |
841 | /* Increment cpu load if we just enqueued the first task of a group on | ||
842 | * 'rq->cpu'. 'topse' represents the group to which task 'p' belongs | ||
843 | * at the highest grouping level. | ||
844 | */ | ||
845 | if (incload) | ||
846 | inc_cpu_load(rq, topse->load.weight); | ||
847 | 860 | ||
848 | hrtick_start_fair(rq, rq->curr); | 861 | hrtick_start_fair(rq, rq->curr); |
849 | } | 862 | } |
@@ -856,28 +869,16 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) | |||
856 | static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep) | 869 | static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep) |
857 | { | 870 | { |
858 | struct cfs_rq *cfs_rq; | 871 | struct cfs_rq *cfs_rq; |
859 | struct sched_entity *se = &p->se, | 872 | struct sched_entity *se = &p->se; |
860 | *topse = NULL; /* Highest schedulable entity */ | ||
861 | int decload = 1; | ||
862 | 873 | ||
863 | for_each_sched_entity(se) { | 874 | for_each_sched_entity(se) { |
864 | topse = se; | ||
865 | cfs_rq = cfs_rq_of(se); | 875 | cfs_rq = cfs_rq_of(se); |
866 | dequeue_entity(cfs_rq, se, sleep); | 876 | dequeue_entity(cfs_rq, se, sleep); |
867 | /* Don't dequeue parent if it has other entities besides us */ | 877 | /* Don't dequeue parent if it has other entities besides us */ |
868 | if (cfs_rq->load.weight) { | 878 | if (cfs_rq->load.weight) |
869 | if (parent_entity(se)) | ||
870 | decload = 0; | ||
871 | break; | 879 | break; |
872 | } | ||
873 | sleep = 1; | 880 | sleep = 1; |
874 | } | 881 | } |
875 | /* Decrement cpu load if we just dequeued the last task of a group on | ||
876 | * 'rq->cpu'. 'topse' represents the group to which task 'p' belongs | ||
877 | * at the highest grouping level. | ||
878 | */ | ||
879 | if (decload) | ||
880 | dec_cpu_load(rq, topse->load.weight); | ||
881 | 882 | ||
882 | hrtick_start_fair(rq, rq->curr); | 883 | hrtick_start_fair(rq, rq->curr); |
883 | } | 884 | } |
@@ -1090,6 +1091,9 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p) | |||
1090 | resched_task(curr); | 1091 | resched_task(curr); |
1091 | return; | 1092 | return; |
1092 | } | 1093 | } |
1094 | |||
1095 | cfs_rq_of(pse)->next = pse; | ||
1096 | |||
1093 | /* | 1097 | /* |
1094 | * Batch tasks do not preempt (their preemption is driven by | 1098 | * Batch tasks do not preempt (their preemption is driven by |
1095 | * the tick): | 1099 | * the tick): |
@@ -1191,6 +1195,25 @@ static struct task_struct *load_balance_next_fair(void *arg) | |||
1191 | return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr); | 1195 | return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr); |
1192 | } | 1196 | } |
1193 | 1197 | ||
1198 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
1199 | static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) | ||
1200 | { | ||
1201 | struct sched_entity *curr; | ||
1202 | struct task_struct *p; | ||
1203 | |||
1204 | if (!cfs_rq->nr_running || !first_fair(cfs_rq)) | ||
1205 | return MAX_PRIO; | ||
1206 | |||
1207 | curr = cfs_rq->curr; | ||
1208 | if (!curr) | ||
1209 | curr = __pick_next_entity(cfs_rq); | ||
1210 | |||
1211 | p = task_of(curr); | ||
1212 | |||
1213 | return p->prio; | ||
1214 | } | ||
1215 | #endif | ||
1216 | |||
1194 | static unsigned long | 1217 | static unsigned long |
1195 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | 1218 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, |
1196 | unsigned long max_load_move, | 1219 | unsigned long max_load_move, |
@@ -1200,45 +1223,28 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
1200 | struct cfs_rq *busy_cfs_rq; | 1223 | struct cfs_rq *busy_cfs_rq; |
1201 | long rem_load_move = max_load_move; | 1224 | long rem_load_move = max_load_move; |
1202 | struct rq_iterator cfs_rq_iterator; | 1225 | struct rq_iterator cfs_rq_iterator; |
1203 | unsigned long load_moved; | ||
1204 | 1226 | ||
1205 | cfs_rq_iterator.start = load_balance_start_fair; | 1227 | cfs_rq_iterator.start = load_balance_start_fair; |
1206 | cfs_rq_iterator.next = load_balance_next_fair; | 1228 | cfs_rq_iterator.next = load_balance_next_fair; |
1207 | 1229 | ||
1208 | for_each_leaf_cfs_rq(busiest, busy_cfs_rq) { | 1230 | for_each_leaf_cfs_rq(busiest, busy_cfs_rq) { |
1209 | #ifdef CONFIG_FAIR_GROUP_SCHED | 1231 | #ifdef CONFIG_FAIR_GROUP_SCHED |
1210 | struct cfs_rq *this_cfs_rq = busy_cfs_rq->tg->cfs_rq[this_cpu]; | 1232 | struct cfs_rq *this_cfs_rq; |
1211 | unsigned long maxload, task_load, group_weight; | 1233 | long imbalance; |
1212 | unsigned long thisload, per_task_load; | 1234 | unsigned long maxload; |
1213 | struct sched_entity *se = busy_cfs_rq->tg->se[busiest->cpu]; | ||
1214 | 1235 | ||
1215 | task_load = busy_cfs_rq->load.weight; | 1236 | this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu); |
1216 | group_weight = se->load.weight; | ||
1217 | 1237 | ||
1218 | /* | 1238 | imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight; |
1219 | * 'group_weight' is contributed by tasks of total weight | 1239 | /* Don't pull if this_cfs_rq has more load than busy_cfs_rq */ |
1220 | * 'task_load'. To move 'rem_load_move' worth of weight only, | 1240 | if (imbalance <= 0) |
1221 | * we need to move a maximum task load of: | ||
1222 | * | ||
1223 | * maxload = (remload / group_weight) * task_load; | ||
1224 | */ | ||
1225 | maxload = (rem_load_move * task_load) / group_weight; | ||
1226 | |||
1227 | if (!maxload || !task_load) | ||
1228 | continue; | 1241 | continue; |
1229 | 1242 | ||
1230 | per_task_load = task_load / busy_cfs_rq->nr_running; | 1243 | /* Don't pull more than imbalance/2 */ |
1231 | /* | 1244 | imbalance /= 2; |
1232 | * balance_tasks will try to forcibly move atleast one task if | 1245 | maxload = min(rem_load_move, imbalance); |
1233 | * possible (because of SCHED_LOAD_SCALE_FUZZ). Avoid that if | ||
1234 | * maxload is less than GROUP_IMBALANCE_FUZZ% the per_task_load. | ||
1235 | */ | ||
1236 | if (100 * maxload < GROUP_IMBALANCE_PCT * per_task_load) | ||
1237 | continue; | ||
1238 | 1246 | ||
1239 | /* Disable priority-based load balance */ | 1247 | *this_best_prio = cfs_rq_best_prio(this_cfs_rq); |
1240 | *this_best_prio = 0; | ||
1241 | thisload = this_cfs_rq->load.weight; | ||
1242 | #else | 1248 | #else |
1243 | # define maxload rem_load_move | 1249 | # define maxload rem_load_move |
1244 | #endif | 1250 | #endif |
@@ -1247,33 +1253,11 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
1247 | * load_balance_[start|next]_fair iterators | 1253 | * load_balance_[start|next]_fair iterators |
1248 | */ | 1254 | */ |
1249 | cfs_rq_iterator.arg = busy_cfs_rq; | 1255 | cfs_rq_iterator.arg = busy_cfs_rq; |
1250 | load_moved = balance_tasks(this_rq, this_cpu, busiest, | 1256 | rem_load_move -= balance_tasks(this_rq, this_cpu, busiest, |
1251 | maxload, sd, idle, all_pinned, | 1257 | maxload, sd, idle, all_pinned, |
1252 | this_best_prio, | 1258 | this_best_prio, |
1253 | &cfs_rq_iterator); | 1259 | &cfs_rq_iterator); |
1254 | 1260 | ||
1255 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
1256 | /* | ||
1257 | * load_moved holds the task load that was moved. The | ||
1258 | * effective (group) weight moved would be: | ||
1259 | * load_moved_eff = load_moved/task_load * group_weight; | ||
1260 | */ | ||
1261 | load_moved = (group_weight * load_moved) / task_load; | ||
1262 | |||
1263 | /* Adjust shares on both cpus to reflect load_moved */ | ||
1264 | group_weight -= load_moved; | ||
1265 | set_se_shares(se, group_weight); | ||
1266 | |||
1267 | se = busy_cfs_rq->tg->se[this_cpu]; | ||
1268 | if (!thisload) | ||
1269 | group_weight = load_moved; | ||
1270 | else | ||
1271 | group_weight = se->load.weight + load_moved; | ||
1272 | set_se_shares(se, group_weight); | ||
1273 | #endif | ||
1274 | |||
1275 | rem_load_move -= load_moved; | ||
1276 | |||
1277 | if (rem_load_move <= 0) | 1261 | if (rem_load_move <= 0) |
1278 | break; | 1262 | break; |
1279 | } | 1263 | } |
@@ -1403,6 +1387,16 @@ static void set_curr_task_fair(struct rq *rq) | |||
1403 | set_next_entity(cfs_rq_of(se), se); | 1387 | set_next_entity(cfs_rq_of(se), se); |
1404 | } | 1388 | } |
1405 | 1389 | ||
1390 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
1391 | static void moved_group_fair(struct task_struct *p) | ||
1392 | { | ||
1393 | struct cfs_rq *cfs_rq = task_cfs_rq(p); | ||
1394 | |||
1395 | update_curr(cfs_rq); | ||
1396 | place_entity(cfs_rq, &p->se, 1); | ||
1397 | } | ||
1398 | #endif | ||
1399 | |||
1406 | /* | 1400 | /* |
1407 | * All the scheduling class methods: | 1401 | * All the scheduling class methods: |
1408 | */ | 1402 | */ |
@@ -1431,6 +1425,10 @@ static const struct sched_class fair_sched_class = { | |||
1431 | 1425 | ||
1432 | .prio_changed = prio_changed_fair, | 1426 | .prio_changed = prio_changed_fair, |
1433 | .switched_to = switched_to_fair, | 1427 | .switched_to = switched_to_fair, |
1428 | |||
1429 | #ifdef CONFIG_FAIR_GROUP_SCHED | ||
1430 | .moved_group = moved_group_fair, | ||
1431 | #endif | ||
1434 | }; | 1432 | }; |
1435 | 1433 | ||
1436 | #ifdef CONFIG_SCHED_DEBUG | 1434 | #ifdef CONFIG_SCHED_DEBUG |