aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPaul Turner <pjt@google.com>2012-10-04 07:18:30 -0400
committerIngo Molnar <mingo@kernel.org>2012-10-24 04:27:22 -0400
commit9ee474f55664ff63111c843099d365e7ecffb56f (patch)
tree745a678b0d3cd72ba42b67d0b6ac6c3872b14229 /kernel
parent2dac754e10a5d41d94d2d2365c0345d4f215a266 (diff)
sched: Maintain the load contribution of blocked entities
We are currently maintaining: runnable_load(cfs_rq) = \Sum task_load(t) For all running children t of cfs_rq. While this can be naturally updated for tasks in a runnable state (as they are scheduled); this does not account for the load contributed by blocked task entities. This can be solved by introducing a separate accounting for blocked load: blocked_load(cfs_rq) = \Sum runnable(b) * weight(b) Obviously we do not want to iterate over all blocked entities to account for their decay, we instead observe that: runnable_load(t) = \Sum p_i*y^i and that to account for an additional idle period we only need to compute: y*runnable_load(t). This means that we can compute all blocked entities at once by evaluating: blocked_load(cfs_rq)` = y * blocked_load(cfs_rq) Finally we maintain a decay counter so that when a sleeping entity re-awakens we can determine how much of its load should be removed from the blocked sum. Signed-off-by: Paul Turner <pjt@google.com> Reviewed-by: Ben Segall <bsegall@google.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20120823141506.585389902@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched/core.c1
-rw-r--r--kernel/sched/debug.c3
-rw-r--r--kernel/sched/fair.c128
-rw-r--r--kernel/sched/sched.h4
4 files changed, 121 insertions, 15 deletions
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fd9d0859350a..00898f1fb69e 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1528,7 +1528,6 @@ static void __sched_fork(struct task_struct *p)
1528 p->se.avg.runnable_avg_period = 0; 1528 p->se.avg.runnable_avg_period = 0;
1529 p->se.avg.runnable_avg_sum = 0; 1529 p->se.avg.runnable_avg_sum = 0;
1530#endif 1530#endif
1531
1532#ifdef CONFIG_SCHEDSTATS 1531#ifdef CONFIG_SCHEDSTATS
1533 memset(&p->se.statistics, 0, sizeof(p->se.statistics)); 1532 memset(&p->se.statistics, 0, sizeof(p->se.statistics));
1534#endif 1533#endif
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index c953a89f94aa..2d2e2b3c1bef 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -95,6 +95,7 @@ static void print_cfs_group_stats(struct seq_file *m, int cpu, struct task_group
95 P(se->avg.runnable_avg_sum); 95 P(se->avg.runnable_avg_sum);
96 P(se->avg.runnable_avg_period); 96 P(se->avg.runnable_avg_period);
97 P(se->avg.load_avg_contrib); 97 P(se->avg.load_avg_contrib);
98 P(se->avg.decay_count);
98#endif 99#endif
99#undef PN 100#undef PN
100#undef P 101#undef P
@@ -227,6 +228,8 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
227 atomic_read(&cfs_rq->tg->load_weight)); 228 atomic_read(&cfs_rq->tg->load_weight));
228 SEQ_printf(m, " .%-30s: %lld\n", "runnable_load_avg", 229 SEQ_printf(m, " .%-30s: %lld\n", "runnable_load_avg",
229 cfs_rq->runnable_load_avg); 230 cfs_rq->runnable_load_avg);
231 SEQ_printf(m, " .%-30s: %lld\n", "blocked_load_avg",
232 cfs_rq->blocked_load_avg);
230#endif 233#endif
231 234
232 print_cfs_group_stats(m, cpu, cfs_rq->tg); 235 print_cfs_group_stats(m, cpu, cfs_rq->tg);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 77af759e5675..83194175e841 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -259,6 +259,8 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
259 return grp->my_q; 259 return grp->my_q;
260} 260}
261 261
262static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq);
263
262static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) 264static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
263{ 265{
264 if (!cfs_rq->on_list) { 266 if (!cfs_rq->on_list) {
@@ -278,6 +280,8 @@ static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
278 } 280 }
279 281
280 cfs_rq->on_list = 1; 282 cfs_rq->on_list = 1;
283 /* We should have no load, but we need to update last_decay. */
284 update_cfs_rq_blocked_load(cfs_rq);
281 } 285 }
282} 286}
283 287
@@ -1081,6 +1085,20 @@ static __always_inline int __update_entity_runnable_avg(u64 now,
1081 return decayed; 1085 return decayed;
1082} 1086}
1083 1087
1088/* Synchronize an entity's decay with its parenting cfs_rq.*/
1089static inline void __synchronize_entity_decay(struct sched_entity *se)
1090{
1091 struct cfs_rq *cfs_rq = cfs_rq_of(se);
1092 u64 decays = atomic64_read(&cfs_rq->decay_counter);
1093
1094 decays -= se->avg.decay_count;
1095 if (!decays)
1096 return;
1097
1098 se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays);
1099 se->avg.decay_count = 0;
1100}
1101
1084/* Compute the current contribution to load_avg by se, return any delta */ 1102/* Compute the current contribution to load_avg by se, return any delta */
1085static long __update_entity_load_avg_contrib(struct sched_entity *se) 1103static long __update_entity_load_avg_contrib(struct sched_entity *se)
1086{ 1104{
@@ -1096,8 +1114,18 @@ static long __update_entity_load_avg_contrib(struct sched_entity *se)
1096 return se->avg.load_avg_contrib - old_contrib; 1114 return se->avg.load_avg_contrib - old_contrib;
1097} 1115}
1098 1116
1117static inline void subtract_blocked_load_contrib(struct cfs_rq *cfs_rq,
1118 long load_contrib)
1119{
1120 if (likely(load_contrib < cfs_rq->blocked_load_avg))
1121 cfs_rq->blocked_load_avg -= load_contrib;
1122 else
1123 cfs_rq->blocked_load_avg = 0;
1124}
1125
1099/* Update a sched_entity's runnable average */ 1126/* Update a sched_entity's runnable average */
1100static inline void update_entity_load_avg(struct sched_entity *se) 1127static inline void update_entity_load_avg(struct sched_entity *se,
1128 int update_cfs_rq)
1101{ 1129{
1102 struct cfs_rq *cfs_rq = cfs_rq_of(se); 1130 struct cfs_rq *cfs_rq = cfs_rq_of(se);
1103 long contrib_delta; 1131 long contrib_delta;
@@ -1107,8 +1135,34 @@ static inline void update_entity_load_avg(struct sched_entity *se)
1107 return; 1135 return;
1108 1136
1109 contrib_delta = __update_entity_load_avg_contrib(se); 1137 contrib_delta = __update_entity_load_avg_contrib(se);
1138
1139 if (!update_cfs_rq)
1140 return;
1141
1110 if (se->on_rq) 1142 if (se->on_rq)
1111 cfs_rq->runnable_load_avg += contrib_delta; 1143 cfs_rq->runnable_load_avg += contrib_delta;
1144 else
1145 subtract_blocked_load_contrib(cfs_rq, -contrib_delta);
1146}
1147
1148/*
1149 * Decay the load contributed by all blocked children and account this so that
1150 * their contribution may appropriately discounted when they wake up.
1151 */
1152static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq)
1153{
1154 u64 now = rq_of(cfs_rq)->clock_task >> 20;
1155 u64 decays;
1156
1157 decays = now - cfs_rq->last_decay;
1158 if (!decays)
1159 return;
1160
1161 cfs_rq->blocked_load_avg = decay_load(cfs_rq->blocked_load_avg,
1162 decays);
1163 atomic64_add(decays, &cfs_rq->decay_counter);
1164
1165 cfs_rq->last_decay = now;
1112} 1166}
1113 1167
1114static inline void update_rq_runnable_avg(struct rq *rq, int runnable) 1168static inline void update_rq_runnable_avg(struct rq *rq, int runnable)
@@ -1118,26 +1172,53 @@ static inline void update_rq_runnable_avg(struct rq *rq, int runnable)
1118 1172
1119/* Add the load generated by se into cfs_rq's child load-average */ 1173/* Add the load generated by se into cfs_rq's child load-average */
1120static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq, 1174static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq,
1121 struct sched_entity *se) 1175 struct sched_entity *se,
1176 int wakeup)
1122{ 1177{
1123 update_entity_load_avg(se); 1178 /* we track migrations using entity decay_count == 0 */
1179 if (unlikely(!se->avg.decay_count)) {
1180 se->avg.last_runnable_update = rq_of(cfs_rq)->clock_task;
1181 wakeup = 0;
1182 } else {
1183 __synchronize_entity_decay(se);
1184 }
1185
1186 if (wakeup)
1187 subtract_blocked_load_contrib(cfs_rq, se->avg.load_avg_contrib);
1188
1189 update_entity_load_avg(se, 0);
1124 cfs_rq->runnable_load_avg += se->avg.load_avg_contrib; 1190 cfs_rq->runnable_load_avg += se->avg.load_avg_contrib;
1191 update_cfs_rq_blocked_load(cfs_rq);
1125} 1192}
1126 1193
1127/* Remove se's load from this cfs_rq child load-average */ 1194/*
1195 * Remove se's load from this cfs_rq child load-average, if the entity is
1196 * transitioning to a blocked state we track its projected decay using
1197 * blocked_load_avg.
1198 */
1128static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq, 1199static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq,
1129 struct sched_entity *se) 1200 struct sched_entity *se,
1201 int sleep)
1130{ 1202{
1131 update_entity_load_avg(se); 1203 update_entity_load_avg(se, 1);
1204
1132 cfs_rq->runnable_load_avg -= se->avg.load_avg_contrib; 1205 cfs_rq->runnable_load_avg -= se->avg.load_avg_contrib;
1206 if (sleep) {
1207 cfs_rq->blocked_load_avg += se->avg.load_avg_contrib;
1208 se->avg.decay_count = atomic64_read(&cfs_rq->decay_counter);
1209 } /* migrations, e.g. sleep=0 leave decay_count == 0 */
1133} 1210}
1134#else 1211#else
1135static inline void update_entity_load_avg(struct sched_entity *se) {} 1212static inline void update_entity_load_avg(struct sched_entity *se,
1213 int update_cfs_rq) {}
1136static inline void update_rq_runnable_avg(struct rq *rq, int runnable) {} 1214static inline void update_rq_runnable_avg(struct rq *rq, int runnable) {}
1137static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq, 1215static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq,
1138 struct sched_entity *se) {} 1216 struct sched_entity *se,
1217 int wakeup) {}
1139static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq, 1218static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq,
1140 struct sched_entity *se) {} 1219 struct sched_entity *se,
1220 int sleep) {}
1221static inline void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq) {}
1141#endif 1222#endif
1142 1223
1143static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) 1224static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -1266,7 +1347,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
1266 */ 1347 */
1267 update_curr(cfs_rq); 1348 update_curr(cfs_rq);
1268 update_cfs_load(cfs_rq, 0); 1349 update_cfs_load(cfs_rq, 0);
1269 enqueue_entity_load_avg(cfs_rq, se); 1350 enqueue_entity_load_avg(cfs_rq, se, flags & ENQUEUE_WAKEUP);
1270 account_entity_enqueue(cfs_rq, se); 1351 account_entity_enqueue(cfs_rq, se);
1271 update_cfs_shares(cfs_rq); 1352 update_cfs_shares(cfs_rq);
1272 1353
@@ -1341,7 +1422,7 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
1341 * Update run-time statistics of the 'current'. 1422 * Update run-time statistics of the 'current'.
1342 */ 1423 */
1343 update_curr(cfs_rq); 1424 update_curr(cfs_rq);
1344 dequeue_entity_load_avg(cfs_rq, se); 1425 dequeue_entity_load_avg(cfs_rq, se, flags & DEQUEUE_SLEEP);
1345 1426
1346 update_stats_dequeue(cfs_rq, se); 1427 update_stats_dequeue(cfs_rq, se);
1347 if (flags & DEQUEUE_SLEEP) { 1428 if (flags & DEQUEUE_SLEEP) {
@@ -1512,7 +1593,7 @@ static void put_prev_entity(struct cfs_rq *cfs_rq, struct sched_entity *prev)
1512 /* Put 'current' back into the tree. */ 1593 /* Put 'current' back into the tree. */
1513 __enqueue_entity(cfs_rq, prev); 1594 __enqueue_entity(cfs_rq, prev);
1514 /* in !on_rq case, update occurred at dequeue */ 1595 /* in !on_rq case, update occurred at dequeue */
1515 update_entity_load_avg(prev); 1596 update_entity_load_avg(prev, 1);
1516 } 1597 }
1517 cfs_rq->curr = NULL; 1598 cfs_rq->curr = NULL;
1518} 1599}
@@ -1528,7 +1609,8 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
1528 /* 1609 /*
1529 * Ensure that runnable average is periodically updated. 1610 * Ensure that runnable average is periodically updated.
1530 */ 1611 */
1531 update_entity_load_avg(curr); 1612 update_entity_load_avg(curr, 1);
1613 update_cfs_rq_blocked_load(cfs_rq);
1532 1614
1533 /* 1615 /*
1534 * Update share accounting for long-running entities. 1616 * Update share accounting for long-running entities.
@@ -2387,6 +2469,7 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
2387 2469
2388 update_cfs_load(cfs_rq, 0); 2470 update_cfs_load(cfs_rq, 0);
2389 update_cfs_shares(cfs_rq); 2471 update_cfs_shares(cfs_rq);
2472 update_entity_load_avg(se, 1);
2390 } 2473 }
2391 2474
2392 if (!se) { 2475 if (!se) {
@@ -2448,6 +2531,7 @@ static void dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
2448 2531
2449 update_cfs_load(cfs_rq, 0); 2532 update_cfs_load(cfs_rq, 0);
2450 update_cfs_shares(cfs_rq); 2533 update_cfs_shares(cfs_rq);
2534 update_entity_load_avg(se, 1);
2451 } 2535 }
2452 2536
2453 if (!se) { 2537 if (!se) {
@@ -3498,6 +3582,7 @@ static int update_shares_cpu(struct task_group *tg, int cpu)
3498 3582
3499 update_rq_clock(rq); 3583 update_rq_clock(rq);
3500 update_cfs_load(cfs_rq, 1); 3584 update_cfs_load(cfs_rq, 1);
3585 update_cfs_rq_blocked_load(cfs_rq);
3501 3586
3502 /* 3587 /*
3503 * We need to update shares after updating tg->load_weight in 3588 * We need to update shares after updating tg->load_weight in
@@ -5232,6 +5317,20 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p)
5232 place_entity(cfs_rq, se, 0); 5317 place_entity(cfs_rq, se, 0);
5233 se->vruntime -= cfs_rq->min_vruntime; 5318 se->vruntime -= cfs_rq->min_vruntime;
5234 } 5319 }
5320
5321#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
5322 /*
5323 * Remove our load from contribution when we leave sched_fair
5324 * and ensure we don't carry in an old decay_count if we
5325 * switch back.
5326 */
5327 if (p->se.avg.decay_count) {
5328 struct cfs_rq *cfs_rq = cfs_rq_of(&p->se);
5329 __synchronize_entity_decay(&p->se);
5330 subtract_blocked_load_contrib(cfs_rq,
5331 p->se.avg.load_avg_contrib);
5332 }
5333#endif
5235} 5334}
5236 5335
5237/* 5336/*
@@ -5278,6 +5377,9 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
5278#ifndef CONFIG_64BIT 5377#ifndef CONFIG_64BIT
5279 cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime; 5378 cfs_rq->min_vruntime_copy = cfs_rq->min_vruntime;
5280#endif 5379#endif
5380#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
5381 atomic64_set(&cfs_rq->decay_counter, 1);
5382#endif
5281} 5383}
5282 5384
5283#ifdef CONFIG_FAIR_GROUP_SCHED 5385#ifdef CONFIG_FAIR_GROUP_SCHED
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index e6539736af58..664ff39195f7 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -229,7 +229,9 @@ struct cfs_rq {
229 * This allows for the description of both thread and group usage (in 229 * This allows for the description of both thread and group usage (in
230 * the FAIR_GROUP_SCHED case). 230 * the FAIR_GROUP_SCHED case).
231 */ 231 */
232 u64 runnable_load_avg; 232 u64 runnable_load_avg, blocked_load_avg;
233 atomic64_t decay_counter;
234 u64 last_decay;
233#endif 235#endif
234#ifdef CONFIG_FAIR_GROUP_SCHED 236#ifdef CONFIG_FAIR_GROUP_SCHED
235 struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */ 237 struct rq *rq; /* cpu runqueue to which this cfs_rq is attached */