diff options
-rw-r--r-- | kernel/sched/fair.c | 100 | ||||
-rw-r--r-- | kernel/sched/sched.h | 2 |
2 files changed, 81 insertions, 21 deletions
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 5e602e6ba0c3..74dc29ba1ad1 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c | |||
@@ -259,7 +259,8 @@ static inline struct cfs_rq *group_cfs_rq(struct sched_entity *grp) | |||
259 | return grp->my_q; | 259 | return grp->my_q; |
260 | } | 260 | } |
261 | 261 | ||
262 | static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq); | 262 | static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, |
263 | int force_update); | ||
263 | 264 | ||
264 | static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) | 265 | static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) |
265 | { | 266 | { |
@@ -281,7 +282,7 @@ static inline void list_add_leaf_cfs_rq(struct cfs_rq *cfs_rq) | |||
281 | 282 | ||
282 | cfs_rq->on_list = 1; | 283 | cfs_rq->on_list = 1; |
283 | /* We should have no load, but we need to update last_decay. */ | 284 | /* We should have no load, but we need to update last_decay. */ |
284 | update_cfs_rq_blocked_load(cfs_rq); | 285 | update_cfs_rq_blocked_load(cfs_rq, 0); |
285 | } | 286 | } |
286 | } | 287 | } |
287 | 288 | ||
@@ -1086,17 +1087,19 @@ static __always_inline int __update_entity_runnable_avg(u64 now, | |||
1086 | } | 1087 | } |
1087 | 1088 | ||
1088 | /* Synchronize an entity's decay with its parenting cfs_rq.*/ | 1089 | /* Synchronize an entity's decay with its parenting cfs_rq.*/ |
1089 | static inline void __synchronize_entity_decay(struct sched_entity *se) | 1090 | static inline u64 __synchronize_entity_decay(struct sched_entity *se) |
1090 | { | 1091 | { |
1091 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | 1092 | struct cfs_rq *cfs_rq = cfs_rq_of(se); |
1092 | u64 decays = atomic64_read(&cfs_rq->decay_counter); | 1093 | u64 decays = atomic64_read(&cfs_rq->decay_counter); |
1093 | 1094 | ||
1094 | decays -= se->avg.decay_count; | 1095 | decays -= se->avg.decay_count; |
1095 | if (!decays) | 1096 | if (!decays) |
1096 | return; | 1097 | return 0; |
1097 | 1098 | ||
1098 | se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays); | 1099 | se->avg.load_avg_contrib = decay_load(se->avg.load_avg_contrib, decays); |
1099 | se->avg.decay_count = 0; | 1100 | se->avg.decay_count = 0; |
1101 | |||
1102 | return decays; | ||
1100 | } | 1103 | } |
1101 | 1104 | ||
1102 | /* Compute the current contribution to load_avg by se, return any delta */ | 1105 | /* Compute the current contribution to load_avg by se, return any delta */ |
@@ -1149,20 +1152,26 @@ static inline void update_entity_load_avg(struct sched_entity *se, | |||
1149 | * Decay the load contributed by all blocked children and account this so that | 1152 | * Decay the load contributed by all blocked children and account this so that |
1150 | * their contribution may appropriately discounted when they wake up. | 1153 | * their contribution may appropriately discounted when they wake up. |
1151 | */ | 1154 | */ |
1152 | static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq) | 1155 | static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update) |
1153 | { | 1156 | { |
1154 | u64 now = rq_of(cfs_rq)->clock_task >> 20; | 1157 | u64 now = rq_of(cfs_rq)->clock_task >> 20; |
1155 | u64 decays; | 1158 | u64 decays; |
1156 | 1159 | ||
1157 | decays = now - cfs_rq->last_decay; | 1160 | decays = now - cfs_rq->last_decay; |
1158 | if (!decays) | 1161 | if (!decays && !force_update) |
1159 | return; | 1162 | return; |
1160 | 1163 | ||
1161 | cfs_rq->blocked_load_avg = decay_load(cfs_rq->blocked_load_avg, | 1164 | if (atomic64_read(&cfs_rq->removed_load)) { |
1162 | decays); | 1165 | u64 removed_load = atomic64_xchg(&cfs_rq->removed_load, 0); |
1163 | atomic64_add(decays, &cfs_rq->decay_counter); | 1166 | subtract_blocked_load_contrib(cfs_rq, removed_load); |
1167 | } | ||
1164 | 1168 | ||
1165 | cfs_rq->last_decay = now; | 1169 | if (decays) { |
1170 | cfs_rq->blocked_load_avg = decay_load(cfs_rq->blocked_load_avg, | ||
1171 | decays); | ||
1172 | atomic64_add(decays, &cfs_rq->decay_counter); | ||
1173 | cfs_rq->last_decay = now; | ||
1174 | } | ||
1166 | } | 1175 | } |
1167 | 1176 | ||
1168 | static inline void update_rq_runnable_avg(struct rq *rq, int runnable) | 1177 | static inline void update_rq_runnable_avg(struct rq *rq, int runnable) |
@@ -1175,20 +1184,42 @@ static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq, | |||
1175 | struct sched_entity *se, | 1184 | struct sched_entity *se, |
1176 | int wakeup) | 1185 | int wakeup) |
1177 | { | 1186 | { |
1178 | /* we track migrations using entity decay_count == 0 */ | 1187 | /* |
1179 | if (unlikely(!se->avg.decay_count)) { | 1188 | * We track migrations using entity decay_count <= 0, on a wake-up |
1189 | * migration we use a negative decay count to track the remote decays | ||
1190 | * accumulated while sleeping. | ||
1191 | */ | ||
1192 | if (unlikely(se->avg.decay_count <= 0)) { | ||
1180 | se->avg.last_runnable_update = rq_of(cfs_rq)->clock_task; | 1193 | se->avg.last_runnable_update = rq_of(cfs_rq)->clock_task; |
1194 | if (se->avg.decay_count) { | ||
1195 | /* | ||
1196 | * In a wake-up migration we have to approximate the | ||
1197 | * time sleeping. This is because we can't synchronize | ||
1198 | * clock_task between the two cpus, and it is not | ||
1199 | * guaranteed to be read-safe. Instead, we can | ||
1200 | * approximate this using our carried decays, which are | ||
1201 | * explicitly atomically readable. | ||
1202 | */ | ||
1203 | se->avg.last_runnable_update -= (-se->avg.decay_count) | ||
1204 | << 20; | ||
1205 | update_entity_load_avg(se, 0); | ||
1206 | /* Indicate that we're now synchronized and on-rq */ | ||
1207 | se->avg.decay_count = 0; | ||
1208 | } | ||
1181 | wakeup = 0; | 1209 | wakeup = 0; |
1182 | } else { | 1210 | } else { |
1183 | __synchronize_entity_decay(se); | 1211 | __synchronize_entity_decay(se); |
1184 | } | 1212 | } |
1185 | 1213 | ||
1186 | if (wakeup) | 1214 | /* migrated tasks did not contribute to our blocked load */ |
1215 | if (wakeup) { | ||
1187 | subtract_blocked_load_contrib(cfs_rq, se->avg.load_avg_contrib); | 1216 | subtract_blocked_load_contrib(cfs_rq, se->avg.load_avg_contrib); |
1217 | update_entity_load_avg(se, 0); | ||
1218 | } | ||
1188 | 1219 | ||
1189 | update_entity_load_avg(se, 0); | ||
1190 | cfs_rq->runnable_load_avg += se->avg.load_avg_contrib; | 1220 | cfs_rq->runnable_load_avg += se->avg.load_avg_contrib; |
1191 | update_cfs_rq_blocked_load(cfs_rq); | 1221 | /* we force update consideration on load-balancer moves */ |
1222 | update_cfs_rq_blocked_load(cfs_rq, !wakeup); | ||
1192 | } | 1223 | } |
1193 | 1224 | ||
1194 | /* | 1225 | /* |
@@ -1201,6 +1232,8 @@ static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq, | |||
1201 | int sleep) | 1232 | int sleep) |
1202 | { | 1233 | { |
1203 | update_entity_load_avg(se, 1); | 1234 | update_entity_load_avg(se, 1); |
1235 | /* we force update consideration on load-balancer moves */ | ||
1236 | update_cfs_rq_blocked_load(cfs_rq, !sleep); | ||
1204 | 1237 | ||
1205 | cfs_rq->runnable_load_avg -= se->avg.load_avg_contrib; | 1238 | cfs_rq->runnable_load_avg -= se->avg.load_avg_contrib; |
1206 | if (sleep) { | 1239 | if (sleep) { |
@@ -1218,7 +1251,8 @@ static inline void enqueue_entity_load_avg(struct cfs_rq *cfs_rq, | |||
1218 | static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq, | 1251 | static inline void dequeue_entity_load_avg(struct cfs_rq *cfs_rq, |
1219 | struct sched_entity *se, | 1252 | struct sched_entity *se, |
1220 | int sleep) {} | 1253 | int sleep) {} |
1221 | static inline void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq) {} | 1254 | static inline void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, |
1255 | int force_update) {} | ||
1222 | #endif | 1256 | #endif |
1223 | 1257 | ||
1224 | static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) | 1258 | static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) |
@@ -1610,7 +1644,7 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued) | |||
1610 | * Ensure that runnable average is periodically updated. | 1644 | * Ensure that runnable average is periodically updated. |
1611 | */ | 1645 | */ |
1612 | update_entity_load_avg(curr, 1); | 1646 | update_entity_load_avg(curr, 1); |
1613 | update_cfs_rq_blocked_load(cfs_rq); | 1647 | update_cfs_rq_blocked_load(cfs_rq, 1); |
1614 | 1648 | ||
1615 | /* | 1649 | /* |
1616 | * Update share accounting for long-running entities. | 1650 | * Update share accounting for long-running entities. |
@@ -3057,6 +3091,19 @@ unlock: | |||
3057 | static void | 3091 | static void |
3058 | migrate_task_rq_fair(struct task_struct *p, int next_cpu) | 3092 | migrate_task_rq_fair(struct task_struct *p, int next_cpu) |
3059 | { | 3093 | { |
3094 | struct sched_entity *se = &p->se; | ||
3095 | struct cfs_rq *cfs_rq = cfs_rq_of(se); | ||
3096 | |||
3097 | /* | ||
3098 | * Load tracking: accumulate removed load so that it can be processed | ||
3099 | * when we next update owning cfs_rq under rq->lock. Tasks contribute | ||
3100 | * to blocked load iff they have a positive decay-count. It can never | ||
3101 | * be negative here since on-rq tasks have decay-count == 0. | ||
3102 | */ | ||
3103 | if (se->avg.decay_count) { | ||
3104 | se->avg.decay_count = -__synchronize_entity_decay(se); | ||
3105 | atomic64_add(se->avg.load_avg_contrib, &cfs_rq->removed_load); | ||
3106 | } | ||
3060 | } | 3107 | } |
3061 | #endif /* CONFIG_SMP */ | 3108 | #endif /* CONFIG_SMP */ |
3062 | 3109 | ||
@@ -3593,7 +3640,7 @@ static int update_shares_cpu(struct task_group *tg, int cpu) | |||
3593 | 3640 | ||
3594 | update_rq_clock(rq); | 3641 | update_rq_clock(rq); |
3595 | update_cfs_load(cfs_rq, 1); | 3642 | update_cfs_load(cfs_rq, 1); |
3596 | update_cfs_rq_blocked_load(cfs_rq); | 3643 | update_cfs_rq_blocked_load(cfs_rq, 1); |
3597 | 3644 | ||
3598 | /* | 3645 | /* |
3599 | * We need to update shares after updating tg->load_weight in | 3646 | * We need to update shares after updating tg->load_weight in |
@@ -5390,12 +5437,14 @@ void init_cfs_rq(struct cfs_rq *cfs_rq) | |||
5390 | #endif | 5437 | #endif |
5391 | #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) | 5438 | #if defined(CONFIG_FAIR_GROUP_SCHED) && defined(CONFIG_SMP) |
5392 | atomic64_set(&cfs_rq->decay_counter, 1); | 5439 | atomic64_set(&cfs_rq->decay_counter, 1); |
5440 | atomic64_set(&cfs_rq->removed_load, 0); | ||
5393 | #endif | 5441 | #endif |
5394 | } | 5442 | } |
5395 | 5443 | ||
5396 | #ifdef CONFIG_FAIR_GROUP_SCHED | 5444 | #ifdef CONFIG_FAIR_GROUP_SCHED |
5397 | static void task_move_group_fair(struct task_struct *p, int on_rq) | 5445 | static void task_move_group_fair(struct task_struct *p, int on_rq) |
5398 | { | 5446 | { |
5447 | struct cfs_rq *cfs_rq; | ||
5399 | /* | 5448 | /* |
5400 | * If the task was not on the rq at the time of this cgroup movement | 5449 | * If the task was not on the rq at the time of this cgroup movement |
5401 | * it must have been asleep, sleeping tasks keep their ->vruntime | 5450 | * it must have been asleep, sleeping tasks keep their ->vruntime |
@@ -5427,8 +5476,19 @@ static void task_move_group_fair(struct task_struct *p, int on_rq) | |||
5427 | if (!on_rq) | 5476 | if (!on_rq) |
5428 | p->se.vruntime -= cfs_rq_of(&p->se)->min_vruntime; | 5477 | p->se.vruntime -= cfs_rq_of(&p->se)->min_vruntime; |
5429 | set_task_rq(p, task_cpu(p)); | 5478 | set_task_rq(p, task_cpu(p)); |
5430 | if (!on_rq) | 5479 | if (!on_rq) { |
5431 | p->se.vruntime += cfs_rq_of(&p->se)->min_vruntime; | 5480 | cfs_rq = cfs_rq_of(&p->se); |
5481 | p->se.vruntime += cfs_rq->min_vruntime; | ||
5482 | #ifdef CONFIG_SMP | ||
5483 | /* | ||
5484 | * migrate_task_rq_fair() will have removed our previous | ||
5485 | * contribution, but we must synchronize for ongoing future | ||
5486 | * decay. | ||
5487 | */ | ||
5488 | p->se.avg.decay_count = atomic64_read(&cfs_rq->decay_counter); | ||
5489 | cfs_rq->blocked_load_avg += p->se.avg.load_avg_contrib; | ||
5490 | #endif | ||
5491 | } | ||
5432 | } | 5492 | } |
5433 | 5493 | ||
5434 | void free_fair_sched_group(struct task_group *tg) | 5494 | void free_fair_sched_group(struct task_group *tg) |
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 664ff39195f7..30236ab4edc0 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h | |||
@@ -230,7 +230,7 @@ struct cfs_rq { | |||
230 | * the FAIR_GROUP_SCHED case). | 230 | * the FAIR_GROUP_SCHED case). |
231 | */ | 231 | */ |
232 | u64 runnable_load_avg, blocked_load_avg; | 232 | u64 runnable_load_avg, blocked_load_avg; |
233 | atomic64_t decay_counter; | 233 | atomic64_t decay_counter, removed_load; |
234 | u64 last_decay; | 234 | u64 last_decay; |
235 | #endif | 235 | #endif |
236 | #ifdef CONFIG_FAIR_GROUP_SCHED | 236 | #ifdef CONFIG_FAIR_GROUP_SCHED |