aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched_fair.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r--kernel/sched_fair.c228
1 files changed, 113 insertions, 115 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 6c091d6e159d..f2cc59080efa 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -175,8 +175,15 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
175 * Maintain a cache of leftmost tree entries (it is frequently 175 * Maintain a cache of leftmost tree entries (it is frequently
176 * used): 176 * used):
177 */ 177 */
178 if (leftmost) 178 if (leftmost) {
179 cfs_rq->rb_leftmost = &se->run_node; 179 cfs_rq->rb_leftmost = &se->run_node;
180 /*
181 * maintain cfs_rq->min_vruntime to be a monotonic increasing
182 * value tracking the leftmost vruntime in the tree.
183 */
184 cfs_rq->min_vruntime =
185 max_vruntime(cfs_rq->min_vruntime, se->vruntime);
186 }
180 187
181 rb_link_node(&se->run_node, parent, link); 188 rb_link_node(&se->run_node, parent, link);
182 rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline); 189 rb_insert_color(&se->run_node, &cfs_rq->tasks_timeline);
@@ -184,8 +191,24 @@ static void __enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
184 191
185static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se) 192static void __dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
186{ 193{
187 if (cfs_rq->rb_leftmost == &se->run_node) 194 if (cfs_rq->rb_leftmost == &se->run_node) {
188 cfs_rq->rb_leftmost = rb_next(&se->run_node); 195 struct rb_node *next_node;
196 struct sched_entity *next;
197
198 next_node = rb_next(&se->run_node);
199 cfs_rq->rb_leftmost = next_node;
200
201 if (next_node) {
202 next = rb_entry(next_node,
203 struct sched_entity, run_node);
204 cfs_rq->min_vruntime =
205 max_vruntime(cfs_rq->min_vruntime,
206 next->vruntime);
207 }
208 }
209
210 if (cfs_rq->next == se)
211 cfs_rq->next = NULL;
189 212
190 rb_erase(&se->run_node, &cfs_rq->tasks_timeline); 213 rb_erase(&se->run_node, &cfs_rq->tasks_timeline);
191} 214}
@@ -202,17 +225,12 @@ static struct sched_entity *__pick_next_entity(struct cfs_rq *cfs_rq)
202 225
203static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) 226static inline struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
204{ 227{
205 struct rb_node **link = &cfs_rq->tasks_timeline.rb_node; 228 struct rb_node *last = rb_last(&cfs_rq->tasks_timeline);
206 struct sched_entity *se = NULL;
207 struct rb_node *parent;
208 229
209 while (*link) { 230 if (!last)
210 parent = *link; 231 return NULL;
211 se = rb_entry(parent, struct sched_entity, run_node);
212 link = &parent->rb_right;
213 }
214 232
215 return se; 233 return rb_entry(last, struct sched_entity, run_node);
216} 234}
217 235
218/************************************************************** 236/**************************************************************
@@ -265,12 +283,8 @@ static u64 __sched_period(unsigned long nr_running)
265 */ 283 */
266static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se) 284static u64 sched_slice(struct cfs_rq *cfs_rq, struct sched_entity *se)
267{ 285{
268 u64 slice = __sched_period(cfs_rq->nr_running); 286 return calc_delta_mine(__sched_period(cfs_rq->nr_running),
269 287 se->load.weight, &cfs_rq->load);
270 slice *= se->load.weight;
271 do_div(slice, cfs_rq->load.weight);
272
273 return slice;
274} 288}
275 289
276/* 290/*
@@ -308,7 +322,6 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
308 unsigned long delta_exec) 322 unsigned long delta_exec)
309{ 323{
310 unsigned long delta_exec_weighted; 324 unsigned long delta_exec_weighted;
311 u64 vruntime;
312 325
313 schedstat_set(curr->exec_max, max((u64)delta_exec, curr->exec_max)); 326 schedstat_set(curr->exec_max, max((u64)delta_exec, curr->exec_max));
314 327
@@ -320,19 +333,6 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
320 &curr->load); 333 &curr->load);
321 } 334 }
322 curr->vruntime += delta_exec_weighted; 335 curr->vruntime += delta_exec_weighted;
323
324 /*
325 * maintain cfs_rq->min_vruntime to be a monotonic increasing
326 * value tracking the leftmost vruntime in the tree.
327 */
328 if (first_fair(cfs_rq)) {
329 vruntime = min_vruntime(curr->vruntime,
330 __pick_next_entity(cfs_rq)->vruntime);
331 } else
332 vruntime = curr->vruntime;
333
334 cfs_rq->min_vruntime =
335 max_vruntime(cfs_rq->min_vruntime, vruntime);
336} 336}
337 337
338static void update_curr(struct cfs_rq *cfs_rq) 338static void update_curr(struct cfs_rq *cfs_rq)
@@ -498,7 +498,11 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
498{ 498{
499 u64 vruntime; 499 u64 vruntime;
500 500
501 vruntime = cfs_rq->min_vruntime; 501 if (first_fair(cfs_rq)) {
502 vruntime = min_vruntime(cfs_rq->min_vruntime,
503 __pick_next_entity(cfs_rq)->vruntime);
504 } else
505 vruntime = cfs_rq->min_vruntime;
502 506
503 if (sched_feat(TREE_AVG)) { 507 if (sched_feat(TREE_AVG)) {
504 struct sched_entity *last = __pick_last_entity(cfs_rq); 508 struct sched_entity *last = __pick_last_entity(cfs_rq);
@@ -520,8 +524,10 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
520 524
521 if (!initial) { 525 if (!initial) {
522 /* sleeps upto a single latency don't count. */ 526 /* sleeps upto a single latency don't count. */
523 if (sched_feat(NEW_FAIR_SLEEPERS)) 527 if (sched_feat(NEW_FAIR_SLEEPERS)) {
524 vruntime -= sysctl_sched_latency; 528 vruntime -= calc_delta_fair(sysctl_sched_latency,
529 &cfs_rq->load);
530 }
525 531
526 /* ensure we never gain time by being placed backwards. */ 532 /* ensure we never gain time by being placed backwards. */
527 vruntime = max_vruntime(se->vruntime, vruntime); 533 vruntime = max_vruntime(se->vruntime, vruntime);
@@ -621,12 +627,32 @@ set_next_entity(struct cfs_rq *cfs_rq, struct sched_entity *se)
621 se->prev_sum_exec_runtime = se->sum_exec_runtime; 627 se->prev_sum_exec_runtime = se->sum_exec_runtime;
622} 628}
623 629
630static struct sched_entity *
631pick_next(struct cfs_rq *cfs_rq, struct sched_entity *se)
632{
633 s64 diff, gran;
634
635 if (!cfs_rq->next)
636 return se;
637
638 diff = cfs_rq->next->vruntime - se->vruntime;
639 if (diff < 0)
640 return se;
641
642 gran = calc_delta_fair(sysctl_sched_wakeup_granularity, &cfs_rq->load);
643 if (diff > gran)
644 return se;
645
646 return cfs_rq->next;
647}
648
624static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq) 649static struct sched_entity *pick_next_entity(struct cfs_rq *cfs_rq)
625{ 650{
626 struct sched_entity *se = NULL; 651 struct sched_entity *se = NULL;
627 652
628 if (first_fair(cfs_rq)) { 653 if (first_fair(cfs_rq)) {
629 se = __pick_next_entity(cfs_rq); 654 se = __pick_next_entity(cfs_rq);
655 se = pick_next(cfs_rq, se);
630 set_next_entity(cfs_rq, se); 656 set_next_entity(cfs_rq, se);
631 } 657 }
632 658
@@ -732,8 +758,6 @@ static inline struct sched_entity *parent_entity(struct sched_entity *se)
732 return se->parent; 758 return se->parent;
733} 759}
734 760
735#define GROUP_IMBALANCE_PCT 20
736
737#else /* CONFIG_FAIR_GROUP_SCHED */ 761#else /* CONFIG_FAIR_GROUP_SCHED */
738 762
739#define for_each_sched_entity(se) \ 763#define for_each_sched_entity(se) \
@@ -824,26 +848,15 @@ hrtick_start_fair(struct rq *rq, struct task_struct *p)
824static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup) 848static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
825{ 849{
826 struct cfs_rq *cfs_rq; 850 struct cfs_rq *cfs_rq;
827 struct sched_entity *se = &p->se, 851 struct sched_entity *se = &p->se;
828 *topse = NULL; /* Highest schedulable entity */
829 int incload = 1;
830 852
831 for_each_sched_entity(se) { 853 for_each_sched_entity(se) {
832 topse = se; 854 if (se->on_rq)
833 if (se->on_rq) {
834 incload = 0;
835 break; 855 break;
836 }
837 cfs_rq = cfs_rq_of(se); 856 cfs_rq = cfs_rq_of(se);
838 enqueue_entity(cfs_rq, se, wakeup); 857 enqueue_entity(cfs_rq, se, wakeup);
839 wakeup = 1; 858 wakeup = 1;
840 } 859 }
841 /* Increment cpu load if we just enqueued the first task of a group on
842 * 'rq->cpu'. 'topse' represents the group to which task 'p' belongs
843 * at the highest grouping level.
844 */
845 if (incload)
846 inc_cpu_load(rq, topse->load.weight);
847 860
848 hrtick_start_fair(rq, rq->curr); 861 hrtick_start_fair(rq, rq->curr);
849} 862}
@@ -856,28 +869,16 @@ static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
856static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep) 869static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int sleep)
857{ 870{
858 struct cfs_rq *cfs_rq; 871 struct cfs_rq *cfs_rq;
859 struct sched_entity *se = &p->se, 872 struct sched_entity *se = &p->se;
860 *topse = NULL; /* Highest schedulable entity */
861 int decload = 1;
862 873
863 for_each_sched_entity(se) { 874 for_each_sched_entity(se) {
864 topse = se;
865 cfs_rq = cfs_rq_of(se); 875 cfs_rq = cfs_rq_of(se);
866 dequeue_entity(cfs_rq, se, sleep); 876 dequeue_entity(cfs_rq, se, sleep);
867 /* Don't dequeue parent if it has other entities besides us */ 877 /* Don't dequeue parent if it has other entities besides us */
868 if (cfs_rq->load.weight) { 878 if (cfs_rq->load.weight)
869 if (parent_entity(se))
870 decload = 0;
871 break; 879 break;
872 }
873 sleep = 1; 880 sleep = 1;
874 } 881 }
875 /* Decrement cpu load if we just dequeued the last task of a group on
876 * 'rq->cpu'. 'topse' represents the group to which task 'p' belongs
877 * at the highest grouping level.
878 */
879 if (decload)
880 dec_cpu_load(rq, topse->load.weight);
881 882
882 hrtick_start_fair(rq, rq->curr); 883 hrtick_start_fair(rq, rq->curr);
883} 884}
@@ -1090,6 +1091,9 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
1090 resched_task(curr); 1091 resched_task(curr);
1091 return; 1092 return;
1092 } 1093 }
1094
1095 cfs_rq_of(pse)->next = pse;
1096
1093 /* 1097 /*
1094 * Batch tasks do not preempt (their preemption is driven by 1098 * Batch tasks do not preempt (their preemption is driven by
1095 * the tick): 1099 * the tick):
@@ -1191,6 +1195,25 @@ static struct task_struct *load_balance_next_fair(void *arg)
1191 return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr); 1195 return __load_balance_iterator(cfs_rq, cfs_rq->rb_load_balance_curr);
1192} 1196}
1193 1197
1198#ifdef CONFIG_FAIR_GROUP_SCHED
1199static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
1200{
1201 struct sched_entity *curr;
1202 struct task_struct *p;
1203
1204 if (!cfs_rq->nr_running || !first_fair(cfs_rq))
1205 return MAX_PRIO;
1206
1207 curr = cfs_rq->curr;
1208 if (!curr)
1209 curr = __pick_next_entity(cfs_rq);
1210
1211 p = task_of(curr);
1212
1213 return p->prio;
1214}
1215#endif
1216
1194static unsigned long 1217static unsigned long
1195load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, 1218load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
1196 unsigned long max_load_move, 1219 unsigned long max_load_move,
@@ -1200,45 +1223,28 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
1200 struct cfs_rq *busy_cfs_rq; 1223 struct cfs_rq *busy_cfs_rq;
1201 long rem_load_move = max_load_move; 1224 long rem_load_move = max_load_move;
1202 struct rq_iterator cfs_rq_iterator; 1225 struct rq_iterator cfs_rq_iterator;
1203 unsigned long load_moved;
1204 1226
1205 cfs_rq_iterator.start = load_balance_start_fair; 1227 cfs_rq_iterator.start = load_balance_start_fair;
1206 cfs_rq_iterator.next = load_balance_next_fair; 1228 cfs_rq_iterator.next = load_balance_next_fair;
1207 1229
1208 for_each_leaf_cfs_rq(busiest, busy_cfs_rq) { 1230 for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
1209#ifdef CONFIG_FAIR_GROUP_SCHED 1231#ifdef CONFIG_FAIR_GROUP_SCHED
1210 struct cfs_rq *this_cfs_rq = busy_cfs_rq->tg->cfs_rq[this_cpu]; 1232 struct cfs_rq *this_cfs_rq;
1211 unsigned long maxload, task_load, group_weight; 1233 long imbalance;
1212 unsigned long thisload, per_task_load; 1234 unsigned long maxload;
1213 struct sched_entity *se = busy_cfs_rq->tg->se[busiest->cpu];
1214 1235
1215 task_load = busy_cfs_rq->load.weight; 1236 this_cfs_rq = cpu_cfs_rq(busy_cfs_rq, this_cpu);
1216 group_weight = se->load.weight;
1217 1237
1218 /* 1238 imbalance = busy_cfs_rq->load.weight - this_cfs_rq->load.weight;
1219 * 'group_weight' is contributed by tasks of total weight 1239 /* Don't pull if this_cfs_rq has more load than busy_cfs_rq */
1220 * 'task_load'. To move 'rem_load_move' worth of weight only, 1240 if (imbalance <= 0)
1221 * we need to move a maximum task load of:
1222 *
1223 * maxload = (remload / group_weight) * task_load;
1224 */
1225 maxload = (rem_load_move * task_load) / group_weight;
1226
1227 if (!maxload || !task_load)
1228 continue; 1241 continue;
1229 1242
1230 per_task_load = task_load / busy_cfs_rq->nr_running; 1243 /* Don't pull more than imbalance/2 */
1231 /* 1244 imbalance /= 2;
1232 * balance_tasks will try to forcibly move atleast one task if 1245 maxload = min(rem_load_move, imbalance);
1233 * possible (because of SCHED_LOAD_SCALE_FUZZ). Avoid that if
1234 * maxload is less than GROUP_IMBALANCE_FUZZ% the per_task_load.
1235 */
1236 if (100 * maxload < GROUP_IMBALANCE_PCT * per_task_load)
1237 continue;
1238 1246
1239 /* Disable priority-based load balance */ 1247 *this_best_prio = cfs_rq_best_prio(this_cfs_rq);
1240 *this_best_prio = 0;
1241 thisload = this_cfs_rq->load.weight;
1242#else 1248#else
1243# define maxload rem_load_move 1249# define maxload rem_load_move
1244#endif 1250#endif
@@ -1247,33 +1253,11 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
1247 * load_balance_[start|next]_fair iterators 1253 * load_balance_[start|next]_fair iterators
1248 */ 1254 */
1249 cfs_rq_iterator.arg = busy_cfs_rq; 1255 cfs_rq_iterator.arg = busy_cfs_rq;
1250 load_moved = balance_tasks(this_rq, this_cpu, busiest, 1256 rem_load_move -= balance_tasks(this_rq, this_cpu, busiest,
1251 maxload, sd, idle, all_pinned, 1257 maxload, sd, idle, all_pinned,
1252 this_best_prio, 1258 this_best_prio,
1253 &cfs_rq_iterator); 1259 &cfs_rq_iterator);
1254 1260
1255#ifdef CONFIG_FAIR_GROUP_SCHED
1256 /*
1257 * load_moved holds the task load that was moved. The
1258 * effective (group) weight moved would be:
1259 * load_moved_eff = load_moved/task_load * group_weight;
1260 */
1261 load_moved = (group_weight * load_moved) / task_load;
1262
1263 /* Adjust shares on both cpus to reflect load_moved */
1264 group_weight -= load_moved;
1265 set_se_shares(se, group_weight);
1266
1267 se = busy_cfs_rq->tg->se[this_cpu];
1268 if (!thisload)
1269 group_weight = load_moved;
1270 else
1271 group_weight = se->load.weight + load_moved;
1272 set_se_shares(se, group_weight);
1273#endif
1274
1275 rem_load_move -= load_moved;
1276
1277 if (rem_load_move <= 0) 1261 if (rem_load_move <= 0)
1278 break; 1262 break;
1279 } 1263 }
@@ -1403,6 +1387,16 @@ static void set_curr_task_fair(struct rq *rq)
1403 set_next_entity(cfs_rq_of(se), se); 1387 set_next_entity(cfs_rq_of(se), se);
1404} 1388}
1405 1389
1390#ifdef CONFIG_FAIR_GROUP_SCHED
1391static void moved_group_fair(struct task_struct *p)
1392{
1393 struct cfs_rq *cfs_rq = task_cfs_rq(p);
1394
1395 update_curr(cfs_rq);
1396 place_entity(cfs_rq, &p->se, 1);
1397}
1398#endif
1399
1406/* 1400/*
1407 * All the scheduling class methods: 1401 * All the scheduling class methods:
1408 */ 1402 */
@@ -1431,6 +1425,10 @@ static const struct sched_class fair_sched_class = {
1431 1425
1432 .prio_changed = prio_changed_fair, 1426 .prio_changed = prio_changed_fair,
1433 .switched_to = switched_to_fair, 1427 .switched_to = switched_to_fair,
1428
1429#ifdef CONFIG_FAIR_GROUP_SCHED
1430 .moved_group = moved_group_fair,
1431#endif
1434}; 1432};
1435 1433
1436#ifdef CONFIG_SCHED_DEBUG 1434#ifdef CONFIG_SCHED_DEBUG