diff options
Diffstat (limited to 'kernel/sched_rt.c')
-rw-r--r-- | kernel/sched_rt.c | 227 |
1 files changed, 176 insertions, 51 deletions
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 0a6d2e516420..c2730a5a4f05 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -62,7 +62,12 @@ static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) | |||
62 | if (!rt_rq->tg) | 62 | if (!rt_rq->tg) |
63 | return RUNTIME_INF; | 63 | return RUNTIME_INF; |
64 | 64 | ||
65 | return rt_rq->tg->rt_runtime; | 65 | return rt_rq->rt_runtime; |
66 | } | ||
67 | |||
68 | static inline u64 sched_rt_period(struct rt_rq *rt_rq) | ||
69 | { | ||
70 | return ktime_to_ns(rt_rq->tg->rt_bandwidth.rt_period); | ||
66 | } | 71 | } |
67 | 72 | ||
68 | #define for_each_leaf_rt_rq(rt_rq, rq) \ | 73 | #define for_each_leaf_rt_rq(rt_rq, rq) \ |
@@ -127,14 +132,39 @@ static int rt_se_boosted(struct sched_rt_entity *rt_se) | |||
127 | return p->prio != p->normal_prio; | 132 | return p->prio != p->normal_prio; |
128 | } | 133 | } |
129 | 134 | ||
135 | #ifdef CONFIG_SMP | ||
136 | static inline cpumask_t sched_rt_period_mask(void) | ||
137 | { | ||
138 | return cpu_rq(smp_processor_id())->rd->span; | ||
139 | } | ||
140 | #else | ||
141 | static inline cpumask_t sched_rt_period_mask(void) | ||
142 | { | ||
143 | return cpu_online_map; | ||
144 | } | ||
145 | #endif | ||
146 | |||
147 | static inline | ||
148 | struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) | ||
149 | { | ||
150 | return container_of(rt_b, struct task_group, rt_bandwidth)->rt_rq[cpu]; | ||
151 | } | ||
152 | |||
153 | static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) | ||
154 | { | ||
155 | return &rt_rq->tg->rt_bandwidth; | ||
156 | } | ||
157 | |||
130 | #else | 158 | #else |
131 | 159 | ||
132 | static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) | 160 | static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) |
133 | { | 161 | { |
134 | if (sysctl_sched_rt_runtime == -1) | 162 | return rt_rq->rt_runtime; |
135 | return RUNTIME_INF; | 163 | } |
136 | 164 | ||
137 | return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC; | 165 | static inline u64 sched_rt_period(struct rt_rq *rt_rq) |
166 | { | ||
167 | return ktime_to_ns(def_rt_bandwidth.rt_period); | ||
138 | } | 168 | } |
139 | 169 | ||
140 | #define for_each_leaf_rt_rq(rt_rq, rq) \ | 170 | #define for_each_leaf_rt_rq(rt_rq, rq) \ |
@@ -173,6 +203,102 @@ static inline int rt_rq_throttled(struct rt_rq *rt_rq) | |||
173 | { | 203 | { |
174 | return rt_rq->rt_throttled; | 204 | return rt_rq->rt_throttled; |
175 | } | 205 | } |
206 | |||
207 | static inline cpumask_t sched_rt_period_mask(void) | ||
208 | { | ||
209 | return cpu_online_map; | ||
210 | } | ||
211 | |||
212 | static inline | ||
213 | struct rt_rq *sched_rt_period_rt_rq(struct rt_bandwidth *rt_b, int cpu) | ||
214 | { | ||
215 | return &cpu_rq(cpu)->rt; | ||
216 | } | ||
217 | |||
218 | static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) | ||
219 | { | ||
220 | return &def_rt_bandwidth; | ||
221 | } | ||
222 | |||
223 | #endif | ||
224 | |||
225 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | ||
226 | { | ||
227 | int i, idle = 1; | ||
228 | cpumask_t span; | ||
229 | |||
230 | if (rt_b->rt_runtime == RUNTIME_INF) | ||
231 | return 1; | ||
232 | |||
233 | span = sched_rt_period_mask(); | ||
234 | for_each_cpu_mask(i, span) { | ||
235 | int enqueue = 0; | ||
236 | struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); | ||
237 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
238 | |||
239 | spin_lock(&rq->lock); | ||
240 | if (rt_rq->rt_time) { | ||
241 | u64 runtime; | ||
242 | |||
243 | spin_lock(&rt_rq->rt_runtime_lock); | ||
244 | runtime = rt_rq->rt_runtime; | ||
245 | rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); | ||
246 | if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { | ||
247 | rt_rq->rt_throttled = 0; | ||
248 | enqueue = 1; | ||
249 | } | ||
250 | if (rt_rq->rt_time || rt_rq->rt_nr_running) | ||
251 | idle = 0; | ||
252 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
253 | } | ||
254 | |||
255 | if (enqueue) | ||
256 | sched_rt_rq_enqueue(rt_rq); | ||
257 | spin_unlock(&rq->lock); | ||
258 | } | ||
259 | |||
260 | return idle; | ||
261 | } | ||
262 | |||
263 | #ifdef CONFIG_SMP | ||
264 | static int balance_runtime(struct rt_rq *rt_rq) | ||
265 | { | ||
266 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | ||
267 | struct root_domain *rd = cpu_rq(smp_processor_id())->rd; | ||
268 | int i, weight, more = 0; | ||
269 | u64 rt_period; | ||
270 | |||
271 | weight = cpus_weight(rd->span); | ||
272 | |||
273 | spin_lock(&rt_b->rt_runtime_lock); | ||
274 | rt_period = ktime_to_ns(rt_b->rt_period); | ||
275 | for_each_cpu_mask(i, rd->span) { | ||
276 | struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); | ||
277 | s64 diff; | ||
278 | |||
279 | if (iter == rt_rq) | ||
280 | continue; | ||
281 | |||
282 | spin_lock(&iter->rt_runtime_lock); | ||
283 | diff = iter->rt_runtime - iter->rt_time; | ||
284 | if (diff > 0) { | ||
285 | do_div(diff, weight); | ||
286 | if (rt_rq->rt_runtime + diff > rt_period) | ||
287 | diff = rt_period - rt_rq->rt_runtime; | ||
288 | iter->rt_runtime -= diff; | ||
289 | rt_rq->rt_runtime += diff; | ||
290 | more = 1; | ||
291 | if (rt_rq->rt_runtime == rt_period) { | ||
292 | spin_unlock(&iter->rt_runtime_lock); | ||
293 | break; | ||
294 | } | ||
295 | } | ||
296 | spin_unlock(&iter->rt_runtime_lock); | ||
297 | } | ||
298 | spin_unlock(&rt_b->rt_runtime_lock); | ||
299 | |||
300 | return more; | ||
301 | } | ||
176 | #endif | 302 | #endif |
177 | 303 | ||
178 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) | 304 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) |
@@ -197,12 +323,24 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) | |||
197 | if (rt_rq->rt_throttled) | 323 | if (rt_rq->rt_throttled) |
198 | return rt_rq_throttled(rt_rq); | 324 | return rt_rq_throttled(rt_rq); |
199 | 325 | ||
326 | if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq)) | ||
327 | return 0; | ||
328 | |||
329 | #ifdef CONFIG_SMP | ||
200 | if (rt_rq->rt_time > runtime) { | 330 | if (rt_rq->rt_time > runtime) { |
201 | struct rq *rq = rq_of_rt_rq(rt_rq); | 331 | int more; |
202 | 332 | ||
203 | rq->rt_throttled = 1; | 333 | spin_unlock(&rt_rq->rt_runtime_lock); |
204 | rt_rq->rt_throttled = 1; | 334 | more = balance_runtime(rt_rq); |
335 | spin_lock(&rt_rq->rt_runtime_lock); | ||
205 | 336 | ||
337 | if (more) | ||
338 | runtime = sched_rt_runtime(rt_rq); | ||
339 | } | ||
340 | #endif | ||
341 | |||
342 | if (rt_rq->rt_time > runtime) { | ||
343 | rt_rq->rt_throttled = 1; | ||
206 | if (rt_rq_throttled(rt_rq)) { | 344 | if (rt_rq_throttled(rt_rq)) { |
207 | sched_rt_rq_dequeue(rt_rq); | 345 | sched_rt_rq_dequeue(rt_rq); |
208 | return 1; | 346 | return 1; |
@@ -212,29 +350,6 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) | |||
212 | return 0; | 350 | return 0; |
213 | } | 351 | } |
214 | 352 | ||
215 | static void update_sched_rt_period(struct rq *rq) | ||
216 | { | ||
217 | struct rt_rq *rt_rq; | ||
218 | u64 period; | ||
219 | |||
220 | while (rq->clock > rq->rt_period_expire) { | ||
221 | period = (u64)sysctl_sched_rt_period * NSEC_PER_USEC; | ||
222 | rq->rt_period_expire += period; | ||
223 | |||
224 | for_each_leaf_rt_rq(rt_rq, rq) { | ||
225 | u64 runtime = sched_rt_runtime(rt_rq); | ||
226 | |||
227 | rt_rq->rt_time -= min(rt_rq->rt_time, runtime); | ||
228 | if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { | ||
229 | rt_rq->rt_throttled = 0; | ||
230 | sched_rt_rq_enqueue(rt_rq); | ||
231 | } | ||
232 | } | ||
233 | |||
234 | rq->rt_throttled = 0; | ||
235 | } | ||
236 | } | ||
237 | |||
238 | /* | 353 | /* |
239 | * Update the current task's runtime statistics. Skip current tasks that | 354 | * Update the current task's runtime statistics. Skip current tasks that |
240 | * are not in our scheduling class. | 355 | * are not in our scheduling class. |
@@ -259,9 +374,15 @@ static void update_curr_rt(struct rq *rq) | |||
259 | curr->se.exec_start = rq->clock; | 374 | curr->se.exec_start = rq->clock; |
260 | cpuacct_charge(curr, delta_exec); | 375 | cpuacct_charge(curr, delta_exec); |
261 | 376 | ||
262 | rt_rq->rt_time += delta_exec; | 377 | for_each_sched_rt_entity(rt_se) { |
263 | if (sched_rt_runtime_exceeded(rt_rq)) | 378 | rt_rq = rt_rq_of_se(rt_se); |
264 | resched_task(curr); | 379 | |
380 | spin_lock(&rt_rq->rt_runtime_lock); | ||
381 | rt_rq->rt_time += delta_exec; | ||
382 | if (sched_rt_runtime_exceeded(rt_rq)) | ||
383 | resched_task(curr); | ||
384 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
385 | } | ||
265 | } | 386 | } |
266 | 387 | ||
267 | static inline | 388 | static inline |
@@ -284,6 +405,11 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
284 | #ifdef CONFIG_RT_GROUP_SCHED | 405 | #ifdef CONFIG_RT_GROUP_SCHED |
285 | if (rt_se_boosted(rt_se)) | 406 | if (rt_se_boosted(rt_se)) |
286 | rt_rq->rt_nr_boosted++; | 407 | rt_rq->rt_nr_boosted++; |
408 | |||
409 | if (rt_rq->tg) | ||
410 | start_rt_bandwidth(&rt_rq->tg->rt_bandwidth); | ||
411 | #else | ||
412 | start_rt_bandwidth(&def_rt_bandwidth); | ||
287 | #endif | 413 | #endif |
288 | } | 414 | } |
289 | 415 | ||
@@ -353,27 +479,21 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se) | |||
353 | /* | 479 | /* |
354 | * Because the prio of an upper entry depends on the lower | 480 | * Because the prio of an upper entry depends on the lower |
355 | * entries, we must remove entries top - down. | 481 | * entries, we must remove entries top - down. |
356 | * | ||
357 | * XXX: O(1/2 h^2) because we can only walk up, not down the chain. | ||
358 | * doesn't matter much for now, as h=2 for GROUP_SCHED. | ||
359 | */ | 482 | */ |
360 | static void dequeue_rt_stack(struct task_struct *p) | 483 | static void dequeue_rt_stack(struct task_struct *p) |
361 | { | 484 | { |
362 | struct sched_rt_entity *rt_se, *top_se; | 485 | struct sched_rt_entity *rt_se, *back = NULL; |
363 | 486 | ||
364 | /* | 487 | rt_se = &p->rt; |
365 | * dequeue all, top - down. | 488 | for_each_sched_rt_entity(rt_se) { |
366 | */ | 489 | rt_se->back = back; |
367 | do { | 490 | back = rt_se; |
368 | rt_se = &p->rt; | 491 | } |
369 | top_se = NULL; | 492 | |
370 | for_each_sched_rt_entity(rt_se) { | 493 | for (rt_se = back; rt_se; rt_se = rt_se->back) { |
371 | if (on_rt_rq(rt_se)) | 494 | if (on_rt_rq(rt_se)) |
372 | top_se = rt_se; | 495 | dequeue_rt_entity(rt_se); |
373 | } | 496 | } |
374 | if (top_se) | ||
375 | dequeue_rt_entity(top_se); | ||
376 | } while (top_se); | ||
377 | } | 497 | } |
378 | 498 | ||
379 | /* | 499 | /* |
@@ -393,6 +513,8 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) | |||
393 | */ | 513 | */ |
394 | for_each_sched_rt_entity(rt_se) | 514 | for_each_sched_rt_entity(rt_se) |
395 | enqueue_rt_entity(rt_se); | 515 | enqueue_rt_entity(rt_se); |
516 | |||
517 | inc_cpu_load(rq, p->se.load.weight); | ||
396 | } | 518 | } |
397 | 519 | ||
398 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) | 520 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) |
@@ -412,6 +534,8 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) | |||
412 | if (rt_rq && rt_rq->rt_nr_running) | 534 | if (rt_rq && rt_rq->rt_nr_running) |
413 | enqueue_rt_entity(rt_se); | 535 | enqueue_rt_entity(rt_se); |
414 | } | 536 | } |
537 | |||
538 | dec_cpu_load(rq, p->se.load.weight); | ||
415 | } | 539 | } |
416 | 540 | ||
417 | /* | 541 | /* |
@@ -1001,7 +1125,8 @@ move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
1001 | return 0; | 1125 | return 0; |
1002 | } | 1126 | } |
1003 | 1127 | ||
1004 | static void set_cpus_allowed_rt(struct task_struct *p, cpumask_t *new_mask) | 1128 | static void set_cpus_allowed_rt(struct task_struct *p, |
1129 | const cpumask_t *new_mask) | ||
1005 | { | 1130 | { |
1006 | int weight = cpus_weight(*new_mask); | 1131 | int weight = cpus_weight(*new_mask); |
1007 | 1132 | ||