aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched
diff options
context:
space:
mode:
authorPaul Turner <pjt@google.com>2012-10-04 07:18:31 -0400
committerIngo Molnar <mingo@kernel.org>2012-10-24 04:27:26 -0400
commitbb17f65571e97a7ec0297571fb1154fbd107ad00 (patch)
treed7574f5a57abe38112cbac832a29354ee1e4fa64 /kernel/sched
parent8165e145ceb62fc338e099c9b12b3239c83d2f8e (diff)
sched: Normalize tg load contributions against runnable time
Entities of equal weight should receive equitable distribution of cpu time. This is challenging in the case of a task_group's shares as execution may be occurring on multiple cpus simultaneously. To handle this we divide up the shares into weights proportionate with the load on each cfs_rq. This does not however, account for the fact that the sum of the parts may be less than one cpu and so we need to normalize: load(tg) = min(runnable_avg(tg), 1) * tg->shares Where runnable_avg is the aggregate time in which the task_group had runnable children. Signed-off-by: Paul Turner <pjt@google.com> Reviewed-by: Ben Segall <bsegall@google.com>. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/20120823141506.930124292@google.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched')
-rw-r--r--kernel/sched/debug.c4
-rw-r--r--kernel/sched/fair.c56
-rw-r--r--kernel/sched/sched.h2
3 files changed, 62 insertions, 0 deletions
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index 290892361a0..71b0ea325e9 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -234,6 +234,10 @@ void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
234 atomic64_read(&cfs_rq->tg->load_avg)); 234 atomic64_read(&cfs_rq->tg->load_avg));
235 SEQ_printf(m, " .%-30s: %lld\n", "tg_load_contrib", 235 SEQ_printf(m, " .%-30s: %lld\n", "tg_load_contrib",
236 cfs_rq->tg_load_contrib); 236 cfs_rq->tg_load_contrib);
237 SEQ_printf(m, " .%-30s: %d\n", "tg_runnable_contrib",
238 cfs_rq->tg_runnable_contrib);
239 SEQ_printf(m, " .%-30s: %d\n", "tg->runnable_avg",
240 atomic_read(&cfs_rq->tg->runnable_avg));
237#endif 241#endif
238 242
239 print_cfs_group_stats(m, cpu, cfs_rq->tg); 243 print_cfs_group_stats(m, cpu, cfs_rq->tg);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index e20cb2693ef..9e49722da03 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1118,19 +1118,73 @@ static inline void __update_cfs_rq_tg_load_contrib(struct cfs_rq *cfs_rq,
1118 } 1118 }
1119} 1119}
1120 1120
1121/*
1122 * Aggregate cfs_rq runnable averages into an equivalent task_group
1123 * representation for computing load contributions.
1124 */
1125static inline void __update_tg_runnable_avg(struct sched_avg *sa,
1126 struct cfs_rq *cfs_rq)
1127{
1128 struct task_group *tg = cfs_rq->tg;
1129 long contrib;
1130
1131 /* The fraction of a cpu used by this cfs_rq */
1132 contrib = div_u64(sa->runnable_avg_sum << NICE_0_SHIFT,
1133 sa->runnable_avg_period + 1);
1134 contrib -= cfs_rq->tg_runnable_contrib;
1135
1136 if (abs(contrib) > cfs_rq->tg_runnable_contrib / 64) {
1137 atomic_add(contrib, &tg->runnable_avg);
1138 cfs_rq->tg_runnable_contrib += contrib;
1139 }
1140}
1141
1121static inline void __update_group_entity_contrib(struct sched_entity *se) 1142static inline void __update_group_entity_contrib(struct sched_entity *se)
1122{ 1143{
1123 struct cfs_rq *cfs_rq = group_cfs_rq(se); 1144 struct cfs_rq *cfs_rq = group_cfs_rq(se);
1124 struct task_group *tg = cfs_rq->tg; 1145 struct task_group *tg = cfs_rq->tg;
1146 int runnable_avg;
1147
1125 u64 contrib; 1148 u64 contrib;
1126 1149
1127 contrib = cfs_rq->tg_load_contrib * tg->shares; 1150 contrib = cfs_rq->tg_load_contrib * tg->shares;
1128 se->avg.load_avg_contrib = div64_u64(contrib, 1151 se->avg.load_avg_contrib = div64_u64(contrib,
1129 atomic64_read(&tg->load_avg) + 1); 1152 atomic64_read(&tg->load_avg) + 1);
1153
1154 /*
1155 * For group entities we need to compute a correction term in the case
1156 * that they are consuming <1 cpu so that we would contribute the same
1157 * load as a task of equal weight.
1158 *
1159 * Explicitly co-ordinating this measurement would be expensive, but
1160 * fortunately the sum of each cpus contribution forms a usable
1161 * lower-bound on the true value.
1162 *
1163 * Consider the aggregate of 2 contributions. Either they are disjoint
1164 * (and the sum represents true value) or they are disjoint and we are
1165 * understating by the aggregate of their overlap.
1166 *
1167 * Extending this to N cpus, for a given overlap, the maximum amount we
1168 * understand is then n_i(n_i+1)/2 * w_i where n_i is the number of
1169 * cpus that overlap for this interval and w_i is the interval width.
1170 *
1171 * On a small machine; the first term is well-bounded which bounds the
1172 * total error since w_i is a subset of the period. Whereas on a
1173 * larger machine, while this first term can be larger, if w_i is the
1174 * of consequential size guaranteed to see n_i*w_i quickly converge to
1175 * our upper bound of 1-cpu.
1176 */
1177 runnable_avg = atomic_read(&tg->runnable_avg);
1178 if (runnable_avg < NICE_0_LOAD) {
1179 se->avg.load_avg_contrib *= runnable_avg;
1180 se->avg.load_avg_contrib >>= NICE_0_SHIFT;
1181 }
1130} 1182}
1131#else 1183#else
1132static inline void __update_cfs_rq_tg_load_contrib(struct cfs_rq *cfs_rq, 1184static inline void __update_cfs_rq_tg_load_contrib(struct cfs_rq *cfs_rq,
1133 int force_update) {} 1185 int force_update) {}
1186static inline void __update_tg_runnable_avg(struct sched_avg *sa,
1187 struct cfs_rq *cfs_rq) {}
1134static inline void __update_group_entity_contrib(struct sched_entity *se) {} 1188static inline void __update_group_entity_contrib(struct sched_entity *se) {}
1135#endif 1189#endif
1136 1190
@@ -1152,6 +1206,7 @@ static long __update_entity_load_avg_contrib(struct sched_entity *se)
1152 if (entity_is_task(se)) { 1206 if (entity_is_task(se)) {
1153 __update_task_entity_contrib(se); 1207 __update_task_entity_contrib(se);
1154 } else { 1208 } else {
1209 __update_tg_runnable_avg(&se->avg, group_cfs_rq(se));
1155 __update_group_entity_contrib(se); 1210 __update_group_entity_contrib(se);
1156 } 1211 }
1157 1212
@@ -1220,6 +1275,7 @@ static void update_cfs_rq_blocked_load(struct cfs_rq *cfs_rq, int force_update)
1220static inline void update_rq_runnable_avg(struct rq *rq, int runnable) 1275static inline void update_rq_runnable_avg(struct rq *rq, int runnable)
1221{ 1276{
1222 __update_entity_runnable_avg(rq->clock_task, &rq->avg, runnable); 1277 __update_entity_runnable_avg(rq->clock_task, &rq->avg, runnable);
1278 __update_tg_runnable_avg(&rq->avg, &rq->cfs);
1223} 1279}
1224 1280
1225/* Add the load generated by se into cfs_rq's child load-average */ 1281/* Add the load generated by se into cfs_rq's child load-average */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 924a9909488..134928dc6f0 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -113,6 +113,7 @@ struct task_group {
113 113
114 atomic_t load_weight; 114 atomic_t load_weight;
115 atomic64_t load_avg; 115 atomic64_t load_avg;
116 atomic_t runnable_avg;
116#endif 117#endif
117 118
118#ifdef CONFIG_RT_GROUP_SCHED 119#ifdef CONFIG_RT_GROUP_SCHED
@@ -234,6 +235,7 @@ struct cfs_rq {
234 atomic64_t decay_counter, removed_load; 235 atomic64_t decay_counter, removed_load;
235 u64 last_decay; 236 u64 last_decay;
236#ifdef CONFIG_FAIR_GROUP_SCHED 237#ifdef CONFIG_FAIR_GROUP_SCHED
238 u32 tg_runnable_contrib;
237 u64 tg_load_contrib; 239 u64 tg_load_contrib;
238#endif 240#endif
239#endif 241#endif