diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2008-04-19 13:44:58 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-04-19 13:44:58 -0400 |
commit | ac086bc22997a2be24fc40fc8d46522fe7e03d11 (patch) | |
tree | 7a484ba13acbdf0fa98c896ce58e807b4b5b1af9 /kernel/sched_rt.c | |
parent | d0b27fa77854b149ad4af08b0fe47fe712a47ade (diff) |
sched: rt-group: smp balancing
Currently the rt group scheduling does a per cpu runtime limit, however
the rt load balancer makes no guarantees about an equal spread of real-
time tasks, just that at any one time, the highest priority tasks run.
Solve this by making the runtime limit a global property by borrowing
excessive runtime from the other cpus once the local limit runs out.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched_rt.c')
-rw-r--r-- | kernel/sched_rt.c | 88 |
1 files changed, 85 insertions, 3 deletions
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 8bc176136666..6928ded24da1 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -62,7 +62,12 @@ static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) | |||
62 | if (!rt_rq->tg) | 62 | if (!rt_rq->tg) |
63 | return RUNTIME_INF; | 63 | return RUNTIME_INF; |
64 | 64 | ||
65 | return rt_rq->tg->rt_bandwidth.rt_runtime; | 65 | return rt_rq->rt_runtime; |
66 | } | ||
67 | |||
68 | static inline u64 sched_rt_period(struct rt_rq *rt_rq) | ||
69 | { | ||
70 | return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period); | ||
66 | } | 71 | } |
67 | 72 | ||
68 | #define for_each_leaf_rt_rq(rt_rq, rq) \ | 73 | #define for_each_leaf_rt_rq(rt_rq, rq) \ |
@@ -145,11 +150,21 @@ struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) | |||
145 | return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu]; | 150 | return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu]; |
146 | } | 151 | } |
147 | 152 | ||
153 | static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) | ||
154 | { | ||
155 | return &rt_rq->tg->rt_bandwidth; | ||
156 | } | ||
157 | |||
148 | #else | 158 | #else |
149 | 159 | ||
150 | static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) | 160 | static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) |
151 | { | 161 | { |
152 | return def_rt_bandwidth.rt_runtime; | 162 | return rt_rq->rt_runtime; |
163 | } | ||
164 | |||
165 | static inline u64 sched_rt_period(struct rt_rq *rt_rq) | ||
166 | { | ||
167 | return ktime_to_ns(def_rt_bandwidth.rt_period); | ||
153 | } | 168 | } |
154 | 169 | ||
155 | #define for_each_leaf_rt_rq(rt_rq, rq) \ | 170 | #define for_each_leaf_rt_rq(rt_rq, rq) \ |
@@ -200,6 +215,11 @@ struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) | |||
200 | return &cpu_rq(cpu)->rt; | 215 | return &cpu_rq(cpu)->rt; |
201 | } | 216 | } |
202 | 217 | ||
218 | static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) | ||
219 | { | ||
220 | return &def_rt_bandwidth; | ||
221 | } | ||
222 | |||
203 | #endif | 223 | #endif |
204 | 224 | ||
205 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | 225 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) |
@@ -218,8 +238,10 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | |||
218 | 238 | ||
219 | spin_lock(&rq->lock); | 239 | spin_lock(&rq->lock); |
220 | if (rt_rq->rt_time) { | 240 | if (rt_rq->rt_time) { |
221 | u64 runtime = rt_b->rt_runtime; | 241 | u64 runtime; |
222 | 242 | ||
243 | spin_lock(&rt_rq->rt_runtime_lock); | ||
244 | runtime = rt_rq->rt_runtime; | ||
223 | rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); | 245 | rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); |
224 | if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { | 246 | if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { |
225 | rt_rq->rt_throttled = 0; | 247 | rt_rq->rt_throttled = 0; |
@@ -227,6 +249,7 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | |||
227 | } | 249 | } |
228 | if (rt_rq->rt_time || rt_rq->rt_nr_running) | 250 | if (rt_rq->rt_time || rt_rq->rt_nr_running) |
229 | idle = 0; | 251 | idle = 0; |
252 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
230 | } | 253 | } |
231 | 254 | ||
232 | if (enqueue) | 255 | if (enqueue) |
@@ -237,6 +260,47 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | |||
237 | return idle; | 260 | return idle; |
238 | } | 261 | } |
239 | 262 | ||
263 | #ifdef CONFIG_SMP | ||
264 | static int balance_runtime(struct rt_rq *rt_rq) | ||
265 | { | ||
266 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | ||
267 | struct root_domain *rd = cpu_rq(smp_processor_id())->rd; | ||
268 | int i, weight, more = 0; | ||
269 | u64 rt_period; | ||
270 | |||
271 | weight = cpus_weight(rd->span); | ||
272 | |||
273 | spin_lock(&rt_b->rt_runtime_lock); | ||
274 | rt_period = ktime_to_ns(rt_b->rt_period); | ||
275 | for_each_cpu_mask(i, rd->span) { | ||
276 | struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); | ||
277 | s64 diff; | ||
278 | |||
279 | if (iter == rt_rq) | ||
280 | continue; | ||
281 | |||
282 | spin_lock(&iter->rt_runtime_lock); | ||
283 | diff = iter->rt_runtime - iter->rt_time; | ||
284 | if (diff > 0) { | ||
285 | do_div(diff, weight); | ||
286 | if (rt_rq->rt_runtime + diff > rt_period) | ||
287 | diff = rt_period - rt_rq->rt_runtime; | ||
288 | iter->rt_runtime -= diff; | ||
289 | rt_rq->rt_runtime += diff; | ||
290 | more = 1; | ||
291 | if (rt_rq->rt_runtime == rt_period) { | ||
292 | spin_unlock(&iter->rt_runtime_lock); | ||
293 | break; | ||
294 | } | ||
295 | } | ||
296 | spin_unlock(&iter->rt_runtime_lock); | ||
297 | } | ||
298 | spin_unlock(&rt_b->rt_runtime_lock); | ||
299 | |||
300 | return more; | ||
301 | } | ||
302 | #endif | ||
303 | |||
240 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) | 304 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) |
241 | { | 305 | { |
242 | #ifdef CONFIG_RT_GROUP_SCHED | 306 | #ifdef CONFIG_RT_GROUP_SCHED |
@@ -259,6 +323,22 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) | |||
259 | if (rt_rq->rt_throttled) | 323 | if (rt_rq->rt_throttled) |
260 | return rt_rq_throttled(rt_rq); | 324 | return rt_rq_throttled(rt_rq); |
261 | 325 | ||
326 | if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq)) | ||
327 | return 0; | ||
328 | |||
329 | #ifdef CONFIG_SMP | ||
330 | if (rt_rq->rt_time > runtime) { | ||
331 | int more; | ||
332 | |||
333 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
334 | more = balance_runtime(rt_rq); | ||
335 | spin_lock(&rt_rq->rt_runtime_lock); | ||
336 | |||
337 | if (more) | ||
338 | runtime = sched_rt_runtime(rt_rq); | ||
339 | } | ||
340 | #endif | ||
341 | |||
262 | if (rt_rq->rt_time > runtime) { | 342 | if (rt_rq->rt_time > runtime) { |
263 | rt_rq->rt_throttled = 1; | 343 | rt_rq->rt_throttled = 1; |
264 | if (rt_rq_throttled(rt_rq)) { | 344 | if (rt_rq_throttled(rt_rq)) { |
@@ -294,9 +374,11 @@ static void update_curr_rt(struct rq *rq) | |||
294 | curr->se.exec_start = rq->clock; | 374 | curr->se.exec_start = rq->clock; |
295 | cpuacct_charge(curr, delta_exec); | 375 | cpuacct_charge(curr, delta_exec); |
296 | 376 | ||
377 | spin_lock(&rt_rq->rt_runtime_lock); | ||
297 | rt_rq->rt_time += delta_exec; | 378 | rt_rq->rt_time += delta_exec; |
298 | if (sched_rt_runtime_exceeded(rt_rq)) | 379 | if (sched_rt_runtime_exceeded(rt_rq)) |
299 | resched_task(curr); | 380 | resched_task(curr); |
381 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
300 | } | 382 | } |
301 | 383 | ||
302 | static inline | 384 | static inline |