aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--kernel/sched/fair.c100
-rw-r--r--kernel/sched/sched.h2
2 files changed, 81 insertions, 21 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 5e602e6ba0c3..74dc29ba1ad1 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -259,7 +259,8 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp)
259 return grp->my_q; 259 return grp->my_q;
260} 260}
261 261
262static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq); 262static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq,
263 int force_update);
263 264
264static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) 265static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
265{ 266{
@@ -281,7 +282,7 @@ static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq)
281 282
282 cfs_rq->on_list = 1; 283 cfs_rq->on_list = 1;
283 /* We should have no load, but we need to update last_decay. */ 284 /* We should have no load, but we need to update last_decay. */
284 update_cfs_rq_blocked_load(cfs_rq); 285 update_cfs_rq_blocked_load(cfs_rq, 0);
285 } 286 }
286} 287}
287 288
@@ -1086,17 +1087,19 @@ static __always_inline int __update_entity_runnable_avg(u64 now,
1086} 1087}
1087 1088
1088/* Synchronize an entity's decay with its parenting cfs_rq.*/ 1089/* Synchronize an entity's decay with its parenting cfs_rq.*/
1089static inline void __synchronize_entity_decay(struct sched_entity *se) 1090static inline u64 __synchronize_entity_decay(struct sched_entity *se)
1090{ 1091{
1091 struct cfs_rq *cfs_rq = cfs_rq_of(se); 1092 struct cfs_rq *cfs_rq = cfs_rq_of(se);
1092 u64 decays = atomic64_read(&cfs_rq->decay_counter); 1093 u64 decays = atomic64_read(&cfs_rq->decay_counter);
1093 1094
1094 decays -= se->avg.decay_count; 1095 decays -= se->avg.decay_count;
1095 if (!decays) 1096 if (!decays)
1096 return; 1097 return 0;
1097 1098
1098 se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays); 1099 se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays);
1099 se->avg.decay_count = 0; 1100 se->avg.decay_count = 0;
1101
1102 return decays;
1100} 1103}
1101 1104
1102/* Compute the current contribution to load_avg by se, return any delta */ 1105/* Compute the current contribution to load_avg by se, return any delta */
@@ -1149,20 +1152,26 @@ static inline void update_entity_load_avg(struct sched_entity *se,
1149 * Decay the load contributed by all blocked children and account this so that 1152 * Decay the load contributed by all blocked children and account this so that
1150 * their contribution may appropriately discounted when they wake up. 1153 * their contribution may appropriately discounted when they wake up.
1151 */ 1154 */
1152static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq) 1155static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update)
1153{ 1156{
1154 u64 now = rq_of(cfs_rq)->clock_task >> 20; 1157 u64 now = rq_of(cfs_rq)->clock_task >> 20;
1155 u64 decays; 1158 u64 decays;
1156 1159
1157 decays = now - cfs_rq->last_decay; 1160 decays = now - cfs_rq->last_decay;
1158 if (!decays) 1161 if (!decays && !force_update)
1159 return; 1162 return;
1160 1163
1161 cfs_rq->blocked_load_avg = decay_load(cfs_rq->blocked_load_avg, 1164 if (atomic64_read(&cfs_rq->removed_load)) {
1162 decays); 1165 u64 removed_load = atomic64_xchg(&cfs_rq->removed_load, 0);
1163 atomic64_add(decays, &cfs_rq->decay_counter); 1166 subtract_blocked_load_contrib(cfs_rq, removed_load);
1167 }
1164 1168
1165 cfs_rq->last_decay = now; 1169 if (decays) {
1170 cfs_rq->blocked_load_avg = decay_load(cfs_rq->blocked_load_avg,
1171 decays);
1172 atomic64_add(decays, &cfs_rq->decay_counter);
1173 cfs_rq->last_decay = now;
1174 }
1166} 1175}
1167 1176
1168static inline void update_rq_runnable_avg(struct rq *rq, int runnable) 1177static inline void update_rq_runnable_avg(struct rq *rq, int runnable)
@@ -1175,20 +1184,42 @@ static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq,
1175 struct sched_entity *se, 1184 struct sched_entity *se,
1176 int wakeup) 1185 int wakeup)
1177{ 1186{
1178 /* we track migrations using entity decay_count == 0 */ 1187 /*
1179 if (unlikely(!se->avg.decay_count)) { 1188 * We track migrations using entity decay_count <= 0, on a wake-up
1189 * migration we use a negative decay count to track the remote decays
1190 * accumulated while sleeping.
1191 */
1192 if (unlikely(se->avg.decay_count <= 0)) {
1180 se->avg.last_runnable_update = rq_of(cfs_rq)->clock_task; 1193 se->avg.last_runnable_update = rq_of(cfs_rq)->clock_task;
1194 if (se->avg.decay_count) {
1195 /*
1196 * In a wake-up migration we have to approximate the
1197 * time sleeping. This is because we can't synchronize
1198 * clock_task between the two cpus, and it is not
1199 * guaranteed to be read-safe. Instead, we can
1200 * approximate this using our carried decays, which are
1201 * explicitly atomically readable.
1202 */
1203 se->avg.last_runnable_update -= (-se->avg.decay_count)
1204 << 20;
1205 update_entity_load_avg(se, 0);
1206 /* Indicate that we're now synchronized and on-rq */
1207 se->avg.decay_count = 0;
1208 }
1181 wakeup = 0; 1209 wakeup = 0;
1182 } else { 1210 } else {
1183 __synchronize_entity_decay(se); 1211 __synchronize_entity_decay(se);
1184 } 1212 }
1185 1213
1186 if (wakeup) 1214 /* migrated tasks did not contribute to our blocked load */
1215 if (wakeup) {
1187 subtract_blocked_load_contrib(cfs_rq, se->avg.load_avg_contrib); 1216 subtract_blocked_load_contrib(cfs_rq, se->avg.load_avg_contrib);
1217 update_entity_load_avg(se, 0);
1218 }
1188 1219
1189 update_entity_load_avg(se, 0);
1190 cfs_rq->runnable_load_avg += se->avg.load_avg_contrib; 1220 cfs_rq->runnable_load_avg += se->avg.load_avg_contrib;
1191 update_cfs_rq_blocked_load(cfs_rq); 1221 /* we force update consideration on load-balancer moves */
1222 update_cfs_rq_blocked_load(cfs_rq, !wakeup);
1192} 1223}
1193 1224
1194/* 1225/*
@@ -1201,6 +1232,8 @@ static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq,
1201 int sleep) 1232 int sleep)
1202{ 1233{
1203 update_entity_load_avg(se, 1); 1234 update_entity_load_avg(se, 1);
1235 /* we force update consideration on load-balancer moves */
1236 update_cfs_rq_blocked_load(cfs_rq, !sleep);
1204 1237
1205 cfs_rq->runnable_load_avg -= se->avg.load_avg_contrib; 1238 cfs_rq->runnable_load_avg -= se->avg.load_avg_contrib;
1206 if (sleep) { 1239 if (sleep) {
@@ -1218,7 +1251,8 @@ static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq,
1218static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq, 1251static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq,
1219 struct sched_entity *se, 1252 struct sched_entity *se,
1220 int sleep) {} 1253 int sleep) {}
1221static inline void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq) {} 1254static inline void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq,
1255 int force_update) {}
1222#endif 1256#endif
1223 1257
1224static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) 1258static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se)
@@ -1610,7 +1644,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
1610 * Ensure that runnable average is periodically updated. 1644 * Ensure that runnable average is periodically updated.
1611 */ 1645 */
1612 update_entity_load_avg(curr, 1); 1646 update_entity_load_avg(curr, 1);
1613 update_cfs_rq_blocked_load(cfs_rq); 1647 update_cfs_rq_blocked_load(cfs_rq, 1);
1614 1648
1615 /* 1649 /*
1616 * Update share accounting for long-running entities. 1650 * Update share accounting for long-running entities.
@@ -3057,6 +3091,19 @@ unlock:
3057static void 3091static void
3058migrate_task_rq_fair(struct task_struct *p, int next_cpu) 3092migrate_task_rq_fair(struct task_struct *p, int next_cpu)
3059{ 3093{
3094 struct sched_entity *se = &p->se;
3095 struct cfs_rq *cfs_rq = cfs_rq_of(se);
3096
3097 /*
3098 * Load tracking: accumulate removed load so that it can be processed
3099 * when we next update owning cfs_rq under rq->lock. Tasks contribute
3100 * to blocked load iff they have a positive decay-count. It can never
3101 * be negative here since on-rq tasks have decay-count == 0.
3102 */
3103 if (se->avg.decay_count) {
3104 se->avg.decay_count = -__synchronize_entity_decay(se);
3105 atomic64_add(se->avg.load_avg_contrib, &cfs_rq->removed_load);
3106 }
3060} 3107}
3061#endif /* CONFIG_SMP */ 3108#endif /* CONFIG_SMP */
3062 3109
@@ -3593,7 +3640,7 @@ static int update_shares_cpu(struct task_group *tg, int cpu)
3593 3640
3594 update_rq_clock(rq); 3641 update_rq_clock(rq);
3595 update_cfs_load(cfs_rq, 1); 3642 update_cfs_load(cfs_rq, 1);
3596 update_cfs_rq_blocked_load(cfs_rq); 3643 update_cfs_rq_blocked_load(cfs_rq, 1);
3597 3644
3598 /* 3645 /*
3599 * We need to update shares after updating tg->load_weight in 3646 * We need to update shares after updating tg->load_weight in
@@ -5390,12 +5437,14 @@ void init_cfs_rq(struct cfs_rq *cfs_rq)
5390#endif 5437#endif
5391#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) 5438#if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP)
5392 atomic64_set(&cfs_rq->decay_counter, 1); 5439 atomic64_set(&cfs_rq->decay_counter, 1);
5440 atomic64_set(&cfs_rq->removed_load, 0);
5393#endif 5441#endif
5394} 5442}
5395 5443
5396#ifdef CONFIG_FAIR_GROUP_SCHED 5444#ifdef CONFIG_FAIR_GROUP_SCHED
5397static void task_move_group_fair(struct task_struct *p, int on_rq) 5445static void task_move_group_fair(struct task_struct *p, int on_rq)
5398{ 5446{
5447 struct cfs_rq *cfs_rq;
5399 /* 5448 /*
5400 * If the task was not on the rq at the time of this cgroup movement 5449 * If the task was not on the rq at the time of this cgroup movement
5401 * it must have been asleep, sleeping tasks keep their ->vruntime 5450 * it must have been asleep, sleeping tasks keep their ->vruntime
@@ -5427,8 +5476,19 @@ static void task_move_group_fair(struct task_struct *p, int on_rq)
5427 if (!on_rq) 5476 if (!on_rq)
5428 p->se.vruntime -= cfs_rq_of(&p->se)->min_vruntime; 5477 p->se.vruntime -= cfs_rq_of(&p->se)->min_vruntime;
5429 set_task_rq(p, task_cpu(p)); 5478 set_task_rq(p, task_cpu(p));
5430 if (!on_rq) 5479 if (!on_rq) {
5431 p->se.vruntime += cfs_rq_of(&p->se)->min_vruntime; 5480 cfs_rq = cfs_rq_of(&p->se);
5481 p->se.vruntime += cfs_rq->min_vruntime;
5482#ifdef CONFIG_SMP
5483 /*
5484 * migrate_task_rq_fair() will have removed our previous
5485 * contribution, but we must synchronize for ongoing future
5486 * decay.
5487 */
5488 p->se.avg.decay_count = atomic64_read(&cfs_rq->decay_counter);
5489 cfs_rq->blocked_load_avg += p->se.avg.load_avg_contrib;
5490#endif
5491 }
5432} 5492}
5433 5493
5434void free_fair_sched_group(struct task_group *tg) 5494void free_fair_sched_group(struct task_group *tg)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 664ff39195f7..30236ab4edc0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -230,7 +230,7 @@ struct cfs_rq {
230 * the FAIR_GROUP_SCHED case). 230 * the FAIR_GROUP_SCHED case).
231 */ 231 */
232 u64 runnable_load_avg, blocked_load_avg; 232 u64 runnable_load_avg, blocked_load_avg;
233 atomic64_t decay_counter; 233 atomic64_t decay_counter, removed_load;
234 u64 last_decay; 234 u64 last_decay;
235#endif 235#endif
236#ifdef CONFIG_FAIR_GROUP_SCHED 236#ifdef CONFIG_FAIR_GROUP_SCHED