aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorPeter Zijlstra <a.p.zijlstra@chello.nl>2008-04-19 13:44:58 -0400
committerIngo Molnar <mingo@elte.hu>2008-04-19 13:44:58 -0400
commitac086bc22997a2be24fc40fc8d46522fe7e03d11 (patch)
tree7a484ba13acbdf0fa98c896ce58e807b4b5b1af9 /kernel
parentd0b27fa77854b149ad4af08b0fe47fe712a47ade (diff)
sched: rt-group: smp balancing
Currently the rt group scheduling does a per cpu runtime limit, however the rt load balancer makes no guarantees about an equal spread of real- time tasks, just that at any one time, the highest priority tasks run. Solve this by making the runtime limit a global property by borrowing excessive runtime from the other cpus once the local limit runs out. Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched.c40
-rw-r--r--kernel/sched_rt.c88
2 files changed, 122 insertions, 6 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index bb20323f7d09..313cd4f057cf 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -164,6 +164,7 @@ struct rt_prio_array {
164struct rt_bandwidth { 164struct rt_bandwidth {
165 ktime_t rt_period; 165 ktime_t rt_period;
166 u64 rt_runtime; 166 u64 rt_runtime;
167 spinlock_t rt_runtime_lock;
167 struct hrtimer rt_period_timer; 168 struct hrtimer rt_period_timer;
168}; 169};
169 170
@@ -198,6 +199,8 @@ void init_rt_bandwidth(struct rt_bandwidth *rt_b, u64 period, u64 runtime)
198 rt_b->rt_period = ns_to_ktime(period); 199 rt_b->rt_period = ns_to_ktime(period);
199 rt_b->rt_runtime = runtime; 200 rt_b->rt_runtime = runtime;
200 201
202 spin_lock_init(&rt_b->rt_runtime_lock);
203
201 hrtimer_init(&rt_b->rt_period_timer, 204 hrtimer_init(&rt_b->rt_period_timer,
202 CLOCK_MONOTONIC, HRTIMER_MODE_REL); 205 CLOCK_MONOTONIC, HRTIMER_MODE_REL);
203 rt_b->rt_period_timer.function = sched_rt_period_timer; 206 rt_b->rt_period_timer.function = sched_rt_period_timer;
@@ -414,6 +417,8 @@ struct rt_rq {
414#endif 417#endif
415 int rt_throttled; 418 int rt_throttled;
416 u64 rt_time; 419 u64 rt_time;
420 u64 rt_runtime;
421 spinlock_t rt_runtime_lock;
417 422
418#ifdef CONFIG_RT_GROUP_SCHED 423#ifdef CONFIG_RT_GROUP_SCHED
419 unsigned long rt_nr_boosted; 424 unsigned long rt_nr_boosted;
@@ -7299,6 +7304,8 @@ static void init_rt_rq(struct rt_rq *rt_rq, struct rq *rq)
7299 7304
7300 rt_rq->rt_time = 0; 7305 rt_rq->rt_time = 0;
7301 rt_rq->rt_throttled = 0; 7306 rt_rq->rt_throttled = 0;
7307 rt_rq->rt_runtime = 0;
7308 spin_lock_init(&rt_rq->rt_runtime_lock);
7302 7309
7303#ifdef CONFIG_RT_GROUP_SCHED 7310#ifdef CONFIG_RT_GROUP_SCHED
7304 rt_rq->rt_nr_boosted = 0; 7311 rt_rq->rt_nr_boosted = 0;
@@ -7335,6 +7342,7 @@ static void init_tg_rt_entry(struct rq *rq, struct task_group *tg,
7335 init_rt_rq(rt_rq, rq); 7342 init_rt_rq(rt_rq, rq);
7336 rt_rq->tg = tg; 7343 rt_rq->tg = tg;
7337 rt_rq->rt_se = rt_se; 7344 rt_rq->rt_se = rt_se;
7345 rt_rq->rt_runtime = tg->rt_bandwidth.rt_runtime;
7338 if (add) 7346 if (add)
7339 list_add(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list); 7347 list_add(&rt_rq->leaf_rt_rq_list, &rq->leaf_rt_rq_list);
7340 7348
@@ -7391,6 +7399,8 @@ void __init sched_init(void)
7391 init_tg_rt_entry(rq, &init_task_group, 7399 init_tg_rt_entry(rq, &init_task_group,
7392 &per_cpu(init_rt_rq, i), 7400 &per_cpu(init_rt_rq, i),
7393 &per_cpu(init_sched_rt_entity, i), i, 1); 7401 &per_cpu(init_sched_rt_entity, i), i, 1);
7402#else
7403 rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
7394#endif 7404#endif
7395 7405
7396 for (j = 0; j < CPU_LOAD_IDX_MAX; j++) 7406 for (j = 0; j < CPU_LOAD_IDX_MAX; j++)
@@ -7974,11 +7984,11 @@ static inline int tg_has_rt_tasks(struct task_group *tg)
7974static int tg_set_bandwidth(struct task_group *tg, 7984static int tg_set_bandwidth(struct task_group *tg,
7975 u64 rt_period, u64 rt_runtime) 7985 u64 rt_period, u64 rt_runtime)
7976{ 7986{
7977 int err = 0; 7987 int i, err = 0;
7978 7988
7979 mutex_lock(&rt_constraints_mutex); 7989 mutex_lock(&rt_constraints_mutex);
7980 read_lock(&tasklist_lock); 7990 read_lock(&tasklist_lock);
7981 if (rt_runtime_us == 0 && tg_has_rt_tasks(tg)) { 7991 if (rt_runtime == 0 && tg_has_rt_tasks(tg)) {
7982 err = -EBUSY; 7992 err = -EBUSY;
7983 goto unlock; 7993 goto unlock;
7984 } 7994 }
@@ -7986,8 +7996,19 @@ static int tg_set_bandwidth(struct task_group *tg,
7986 err = -EINVAL; 7996 err = -EINVAL;
7987 goto unlock; 7997 goto unlock;
7988 } 7998 }
7999
8000 spin_lock_irq(&tg->rt_bandwidth.rt_runtime_lock);
7989 tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period); 8001 tg->rt_bandwidth.rt_period = ns_to_ktime(rt_period);
7990 tg->rt_bandwidth.rt_runtime = rt_runtime; 8002 tg->rt_bandwidth.rt_runtime = rt_runtime;
8003
8004 for_each_possible_cpu(i) {
8005 struct rt_rq *rt_rq = tg->rt_rq[i];
8006
8007 spin_lock(&rt_rq->rt_runtime_lock);
8008 rt_rq->rt_runtime = rt_runtime;
8009 spin_unlock(&rt_rq->rt_runtime_lock);
8010 }
8011 spin_unlock_irq(&tg->rt_bandwidth.rt_runtime_lock);
7991 unlock: 8012 unlock:
7992 read_unlock(&tasklist_lock); 8013 read_unlock(&tasklist_lock);
7993 mutex_unlock(&rt_constraints_mutex); 8014 mutex_unlock(&rt_constraints_mutex);
@@ -8052,6 +8073,19 @@ static int sched_rt_global_constraints(void)
8052#else 8073#else
8053static int sched_rt_global_constraints(void) 8074static int sched_rt_global_constraints(void)
8054{ 8075{
8076 unsigned long flags;
8077 int i;
8078
8079 spin_lock_irqsave(&def_rt_bandwidth.rt_runtime_lock, flags);
8080 for_each_possible_cpu(i) {
8081 struct rt_rq *rt_rq = &cpu_rq(i)->rt;
8082
8083 spin_lock(&rt_rq->rt_runtime_lock);
8084 rt_rq->rt_runtime = global_rt_runtime();
8085 spin_unlock(&rt_rq->rt_runtime_lock);
8086 }
8087 spin_unlock_irqrestore(&def_rt_bandwidth.rt_runtime_lock, flags);
8088
8055 return 0; 8089 return 0;
8056} 8090}
8057#endif 8091#endif
@@ -8168,7 +8202,7 @@ static u64 cpu_shares_read_uint(struct cgroup *cgrp, struct cftype *cft)
8168#endif 8202#endif
8169 8203
8170#ifdef CONFIG_RT_GROUP_SCHED 8204#ifdef CONFIG_RT_GROUP_SCHED
8171static int cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft, 8205static ssize_t cpu_rt_runtime_write(struct cgroup *cgrp, struct cftype *cft,
8172 struct file *file, 8206 struct file *file,
8173 const char __user *userbuf, 8207 const char __user *userbuf,
8174 size_t nbytes, loff_t *unused_ppos) 8208 size_t nbytes, loff_t *unused_ppos)
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 8bc176136666..6928ded24da1 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -62,7 +62,12 @@ static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
62 if (!rt_rq->tg) 62 if (!rt_rq->tg)
63 return RUNTIME_INF; 63 return RUNTIME_INF;
64 64
65 return rt_rq->tg->rt_bandwidth.rt_runtime; 65 return rt_rq->rt_runtime;
66}
67
68static inline u64 sched_rt_period(struct rt_rq *rt_rq)
69{
70 return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period);
66} 71}
67 72
68#define for_each_leaf_rt_rq(rt_rq, rq) \ 73#define for_each_leaf_rt_rq(rt_rq, rq) \
@@ -145,11 +150,21 @@ struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
145 return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu]; 150 return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu];
146} 151}
147 152
153static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
154{
155 return &rt_rq->tg->rt_bandwidth;
156}
157
148#else 158#else
149 159
150static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) 160static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
151{ 161{
152 return def_rt_bandwidth.rt_runtime; 162 return rt_rq->rt_runtime;
163}
164
165static inline u64 sched_rt_period(struct rt_rq *rt_rq)
166{
167 return ktime_to_ns(def_rt_bandwidth.rt_period);
153} 168}
154 169
155#define for_each_leaf_rt_rq(rt_rq, rq) \ 170#define for_each_leaf_rt_rq(rt_rq, rq) \
@@ -200,6 +215,11 @@ struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu)
200 return &cpu_rq(cpu)->rt; 215 return &cpu_rq(cpu)->rt;
201} 216}
202 217
218static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
219{
220 return &def_rt_bandwidth;
221}
222
203#endif 223#endif
204 224
205static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) 225static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
@@ -218,8 +238,10 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
218 238
219 spin_lock(&rq->lock); 239 spin_lock(&rq->lock);
220 if (rt_rq->rt_time) { 240 if (rt_rq->rt_time) {
221 u64 runtime = rt_b->rt_runtime; 241 u64 runtime;
222 242
243 spin_lock(&rt_rq->rt_runtime_lock);
244 runtime = rt_rq->rt_runtime;
223 rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); 245 rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
224 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { 246 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
225 rt_rq->rt_throttled = 0; 247 rt_rq->rt_throttled = 0;
@@ -227,6 +249,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
227 } 249 }
228 if (rt_rq->rt_time || rt_rq->rt_nr_running) 250 if (rt_rq->rt_time || rt_rq->rt_nr_running)
229 idle = 0; 251 idle = 0;
252 spin_unlock(&rt_rq->rt_runtime_lock);
230 } 253 }
231 254
232 if (enqueue) 255 if (enqueue)
@@ -237,6 +260,47 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
237 return idle; 260 return idle;
238} 261}
239 262
263#ifdef CONFIG_SMP
264static int balance_runtime(struct rt_rq *rt_rq)
265{
266 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
267 struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
268 int i, weight, more = 0;
269 u64 rt_period;
270
271 weight = cpus_weight(rd->span);
272
273 spin_lock(&rt_b->rt_runtime_lock);
274 rt_period = ktime_to_ns(rt_b->rt_period);
275 for_each_cpu_mask(i, rd->span) {
276 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
277 s64 diff;
278
279 if (iter == rt_rq)
280 continue;
281
282 spin_lock(&iter->rt_runtime_lock);
283 diff = iter->rt_runtime - iter->rt_time;
284 if (diff > 0) {
285 do_div(diff, weight);
286 if (rt_rq->rt_runtime + diff > rt_period)
287 diff = rt_period - rt_rq->rt_runtime;
288 iter->rt_runtime -= diff;
289 rt_rq->rt_runtime += diff;
290 more = 1;
291 if (rt_rq->rt_runtime == rt_period) {
292 spin_unlock(&iter->rt_runtime_lock);
293 break;
294 }
295 }
296 spin_unlock(&iter->rt_runtime_lock);
297 }
298 spin_unlock(&rt_b->rt_runtime_lock);
299
300 return more;
301}
302#endif
303
240static inline int rt_se_prio(struct sched_rt_entity *rt_se) 304static inline int rt_se_prio(struct sched_rt_entity *rt_se)
241{ 305{
242#ifdef CONFIG_RT_GROUP_SCHED 306#ifdef CONFIG_RT_GROUP_SCHED
@@ -259,6 +323,22 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
259 if (rt_rq->rt_throttled) 323 if (rt_rq->rt_throttled)
260 return rt_rq_throttled(rt_rq); 324 return rt_rq_throttled(rt_rq);
261 325
326 if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq))
327 return 0;
328
329#ifdef CONFIG_SMP
330 if (rt_rq->rt_time > runtime) {
331 int more;
332
333 spin_unlock(&rt_rq->rt_runtime_lock);
334 more = balance_runtime(rt_rq);
335 spin_lock(&rt_rq->rt_runtime_lock);
336
337 if (more)
338 runtime = sched_rt_runtime(rt_rq);
339 }
340#endif
341
262 if (rt_rq->rt_time > runtime) { 342 if (rt_rq->rt_time > runtime) {
263 rt_rq->rt_throttled = 1; 343 rt_rq->rt_throttled = 1;
264 if (rt_rq_throttled(rt_rq)) { 344 if (rt_rq_throttled(rt_rq)) {
@@ -294,9 +374,11 @@ static void update_curr_rt(struct rq *rq)
294 curr->se.exec_start = rq->clock; 374 curr->se.exec_start = rq->clock;
295 cpuacct_charge(curr, delta_exec); 375 cpuacct_charge(curr, delta_exec);
296 376
377 spin_lock(&rt_rq->rt_runtime_lock);
297 rt_rq->rt_time += delta_exec; 378 rt_rq->rt_time += delta_exec;
298 if (sched_rt_runtime_exceeded(rt_rq)) 379 if (sched_rt_runtime_exceeded(rt_rq))
299 resched_task(curr); 380 resched_task(curr);
381 spin_unlock(&rt_rq->rt_runtime_lock);
300} 382}
301 383
302static inline 384static inline