diff options
Diffstat (limited to 'kernel/sched_rt.c')
-rw-r--r-- | kernel/sched_rt.c | 457 |
1 files changed, 295 insertions, 162 deletions
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 3432d573205d..47ceac9e8552 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -12,6 +12,9 @@ static inline int rt_overloaded(struct rq *rq) | |||
12 | 12 | ||
13 | static inline void rt_set_overload(struct rq *rq) | 13 | static inline void rt_set_overload(struct rq *rq) |
14 | { | 14 | { |
15 | if (!rq->online) | ||
16 | return; | ||
17 | |||
15 | cpu_set(rq->cpu, rq->rd->rto_mask); | 18 | cpu_set(rq->cpu, rq->rd->rto_mask); |
16 | /* | 19 | /* |
17 | * Make sure the mask is visible before we set | 20 | * Make sure the mask is visible before we set |
@@ -26,6 +29,9 @@ static inline void rt_set_overload(struct rq *rq) | |||
26 | 29 | ||
27 | static inline void rt_clear_overload(struct rq *rq) | 30 | static inline void rt_clear_overload(struct rq *rq) |
28 | { | 31 | { |
32 | if (!rq->online) | ||
33 | return; | ||
34 | |||
29 | /* the order here really doesn't matter */ | 35 | /* the order here really doesn't matter */ |
30 | atomic_dec(&rq->rd->rto_count); | 36 | atomic_dec(&rq->rd->rto_count); |
31 | cpu_clear(rq->cpu, rq->rd->rto_mask); | 37 | cpu_clear(rq->cpu, rq->rd->rto_mask); |
@@ -155,7 +161,7 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) | |||
155 | return &rt_rq->tg->rt_bandwidth; | 161 | return &rt_rq->tg->rt_bandwidth; |
156 | } | 162 | } |
157 | 163 | ||
158 | #else | 164 | #else /* !CONFIG_RT_GROUP_SCHED */ |
159 | 165 | ||
160 | static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) | 166 | static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) |
161 | { | 167 | { |
@@ -220,48 +226,10 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) | |||
220 | return &def_rt_bandwidth; | 226 | return &def_rt_bandwidth; |
221 | } | 227 | } |
222 | 228 | ||
223 | #endif | 229 | #endif /* CONFIG_RT_GROUP_SCHED */ |
224 | |||
225 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | ||
226 | { | ||
227 | int i, idle = 1; | ||
228 | cpumask_t span; | ||
229 | |||
230 | if (rt_b->rt_runtime == RUNTIME_INF) | ||
231 | return 1; | ||
232 | |||
233 | span = sched_rt_period_mask(); | ||
234 | for_each_cpu_mask(i, span) { | ||
235 | int enqueue = 0; | ||
236 | struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); | ||
237 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
238 | |||
239 | spin_lock(&rq->lock); | ||
240 | if (rt_rq->rt_time) { | ||
241 | u64 runtime; | ||
242 | |||
243 | spin_lock(&rt_rq->rt_runtime_lock); | ||
244 | runtime = rt_rq->rt_runtime; | ||
245 | rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); | ||
246 | if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { | ||
247 | rt_rq->rt_throttled = 0; | ||
248 | enqueue = 1; | ||
249 | } | ||
250 | if (rt_rq->rt_time || rt_rq->rt_nr_running) | ||
251 | idle = 0; | ||
252 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
253 | } | ||
254 | |||
255 | if (enqueue) | ||
256 | sched_rt_rq_enqueue(rt_rq); | ||
257 | spin_unlock(&rq->lock); | ||
258 | } | ||
259 | |||
260 | return idle; | ||
261 | } | ||
262 | 230 | ||
263 | #ifdef CONFIG_SMP | 231 | #ifdef CONFIG_SMP |
264 | static int balance_runtime(struct rt_rq *rt_rq) | 232 | static int do_balance_runtime(struct rt_rq *rt_rq) |
265 | { | 233 | { |
266 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | 234 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); |
267 | struct root_domain *rd = cpu_rq(smp_processor_id())->rd; | 235 | struct root_domain *rd = cpu_rq(smp_processor_id())->rd; |
@@ -280,6 +248,9 @@ static int balance_runtime(struct rt_rq *rt_rq) | |||
280 | continue; | 248 | continue; |
281 | 249 | ||
282 | spin_lock(&iter->rt_runtime_lock); | 250 | spin_lock(&iter->rt_runtime_lock); |
251 | if (iter->rt_runtime == RUNTIME_INF) | ||
252 | goto next; | ||
253 | |||
283 | diff = iter->rt_runtime - iter->rt_time; | 254 | diff = iter->rt_runtime - iter->rt_time; |
284 | if (diff > 0) { | 255 | if (diff > 0) { |
285 | do_div(diff, weight); | 256 | do_div(diff, weight); |
@@ -293,13 +264,163 @@ static int balance_runtime(struct rt_rq *rt_rq) | |||
293 | break; | 264 | break; |
294 | } | 265 | } |
295 | } | 266 | } |
267 | next: | ||
296 | spin_unlock(&iter->rt_runtime_lock); | 268 | spin_unlock(&iter->rt_runtime_lock); |
297 | } | 269 | } |
298 | spin_unlock(&rt_b->rt_runtime_lock); | 270 | spin_unlock(&rt_b->rt_runtime_lock); |
299 | 271 | ||
300 | return more; | 272 | return more; |
301 | } | 273 | } |
302 | #endif | 274 | |
275 | static void __disable_runtime(struct rq *rq) | ||
276 | { | ||
277 | struct root_domain *rd = rq->rd; | ||
278 | struct rt_rq *rt_rq; | ||
279 | |||
280 | if (unlikely(!scheduler_running)) | ||
281 | return; | ||
282 | |||
283 | for_each_leaf_rt_rq(rt_rq, rq) { | ||
284 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | ||
285 | s64 want; | ||
286 | int i; | ||
287 | |||
288 | spin_lock(&rt_b->rt_runtime_lock); | ||
289 | spin_lock(&rt_rq->rt_runtime_lock); | ||
290 | if (rt_rq->rt_runtime == RUNTIME_INF || | ||
291 | rt_rq->rt_runtime == rt_b->rt_runtime) | ||
292 | goto balanced; | ||
293 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
294 | |||
295 | want = rt_b->rt_runtime - rt_rq->rt_runtime; | ||
296 | |||
297 | for_each_cpu_mask(i, rd->span) { | ||
298 | struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); | ||
299 | s64 diff; | ||
300 | |||
301 | if (iter == rt_rq) | ||
302 | continue; | ||
303 | |||
304 | spin_lock(&iter->rt_runtime_lock); | ||
305 | if (want > 0) { | ||
306 | diff = min_t(s64, iter->rt_runtime, want); | ||
307 | iter->rt_runtime -= diff; | ||
308 | want -= diff; | ||
309 | } else { | ||
310 | iter->rt_runtime -= want; | ||
311 | want -= want; | ||
312 | } | ||
313 | spin_unlock(&iter->rt_runtime_lock); | ||
314 | |||
315 | if (!want) | ||
316 | break; | ||
317 | } | ||
318 | |||
319 | spin_lock(&rt_rq->rt_runtime_lock); | ||
320 | BUG_ON(want); | ||
321 | balanced: | ||
322 | rt_rq->rt_runtime = RUNTIME_INF; | ||
323 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
324 | spin_unlock(&rt_b->rt_runtime_lock); | ||
325 | } | ||
326 | } | ||
327 | |||
328 | static void disable_runtime(struct rq *rq) | ||
329 | { | ||
330 | unsigned long flags; | ||
331 | |||
332 | spin_lock_irqsave(&rq->lock, flags); | ||
333 | __disable_runtime(rq); | ||
334 | spin_unlock_irqrestore(&rq->lock, flags); | ||
335 | } | ||
336 | |||
337 | static void __enable_runtime(struct rq *rq) | ||
338 | { | ||
339 | struct rt_rq *rt_rq; | ||
340 | |||
341 | if (unlikely(!scheduler_running)) | ||
342 | return; | ||
343 | |||
344 | for_each_leaf_rt_rq(rt_rq, rq) { | ||
345 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | ||
346 | |||
347 | spin_lock(&rt_b->rt_runtime_lock); | ||
348 | spin_lock(&rt_rq->rt_runtime_lock); | ||
349 | rt_rq->rt_runtime = rt_b->rt_runtime; | ||
350 | rt_rq->rt_time = 0; | ||
351 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
352 | spin_unlock(&rt_b->rt_runtime_lock); | ||
353 | } | ||
354 | } | ||
355 | |||
356 | static void enable_runtime(struct rq *rq) | ||
357 | { | ||
358 | unsigned long flags; | ||
359 | |||
360 | spin_lock_irqsave(&rq->lock, flags); | ||
361 | __enable_runtime(rq); | ||
362 | spin_unlock_irqrestore(&rq->lock, flags); | ||
363 | } | ||
364 | |||
365 | static int balance_runtime(struct rt_rq *rt_rq) | ||
366 | { | ||
367 | int more = 0; | ||
368 | |||
369 | if (rt_rq->rt_time > rt_rq->rt_runtime) { | ||
370 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
371 | more = do_balance_runtime(rt_rq); | ||
372 | spin_lock(&rt_rq->rt_runtime_lock); | ||
373 | } | ||
374 | |||
375 | return more; | ||
376 | } | ||
377 | #else /* !CONFIG_SMP */ | ||
378 | static inline int balance_runtime(struct rt_rq *rt_rq) | ||
379 | { | ||
380 | return 0; | ||
381 | } | ||
382 | #endif /* CONFIG_SMP */ | ||
383 | |||
384 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | ||
385 | { | ||
386 | int i, idle = 1; | ||
387 | cpumask_t span; | ||
388 | |||
389 | if (rt_b->rt_runtime == RUNTIME_INF) | ||
390 | return 1; | ||
391 | |||
392 | span = sched_rt_period_mask(); | ||
393 | for_each_cpu_mask(i, span) { | ||
394 | int enqueue = 0; | ||
395 | struct rt_rq *rt_rq = sched_rt_period_rt_rq(rt_b, i); | ||
396 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
397 | |||
398 | spin_lock(&rq->lock); | ||
399 | if (rt_rq->rt_time) { | ||
400 | u64 runtime; | ||
401 | |||
402 | spin_lock(&rt_rq->rt_runtime_lock); | ||
403 | if (rt_rq->rt_throttled) | ||
404 | balance_runtime(rt_rq); | ||
405 | runtime = rt_rq->rt_runtime; | ||
406 | rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); | ||
407 | if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { | ||
408 | rt_rq->rt_throttled = 0; | ||
409 | enqueue = 1; | ||
410 | } | ||
411 | if (rt_rq->rt_time || rt_rq->rt_nr_running) | ||
412 | idle = 0; | ||
413 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
414 | } else if (rt_rq->rt_nr_running) | ||
415 | idle = 0; | ||
416 | |||
417 | if (enqueue) | ||
418 | sched_rt_rq_enqueue(rt_rq); | ||
419 | spin_unlock(&rq->lock); | ||
420 | } | ||
421 | |||
422 | return idle; | ||
423 | } | ||
303 | 424 | ||
304 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) | 425 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) |
305 | { | 426 | { |
@@ -326,18 +447,10 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) | |||
326 | if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq)) | 447 | if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq)) |
327 | return 0; | 448 | return 0; |
328 | 449 | ||
329 | #ifdef CONFIG_SMP | 450 | balance_runtime(rt_rq); |
330 | if (rt_rq->rt_time > runtime) { | 451 | runtime = sched_rt_runtime(rt_rq); |
331 | int more; | 452 | if (runtime == RUNTIME_INF) |
332 | 453 | return 0; | |
333 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
334 | more = balance_runtime(rt_rq); | ||
335 | spin_lock(&rt_rq->rt_runtime_lock); | ||
336 | |||
337 | if (more) | ||
338 | runtime = sched_rt_runtime(rt_rq); | ||
339 | } | ||
340 | #endif | ||
341 | 454 | ||
342 | if (rt_rq->rt_time > runtime) { | 455 | if (rt_rq->rt_time > runtime) { |
343 | rt_rq->rt_throttled = 1; | 456 | rt_rq->rt_throttled = 1; |
@@ -391,12 +504,21 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
391 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); | 504 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); |
392 | rt_rq->rt_nr_running++; | 505 | rt_rq->rt_nr_running++; |
393 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED | 506 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED |
394 | if (rt_se_prio(rt_se) < rt_rq->highest_prio) | 507 | if (rt_se_prio(rt_se) < rt_rq->highest_prio) { |
508 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
509 | |||
395 | rt_rq->highest_prio = rt_se_prio(rt_se); | 510 | rt_rq->highest_prio = rt_se_prio(rt_se); |
511 | #ifdef CONFIG_SMP | ||
512 | if (rq->online) | ||
513 | cpupri_set(&rq->rd->cpupri, rq->cpu, | ||
514 | rt_se_prio(rt_se)); | ||
515 | #endif | ||
516 | } | ||
396 | #endif | 517 | #endif |
397 | #ifdef CONFIG_SMP | 518 | #ifdef CONFIG_SMP |
398 | if (rt_se->nr_cpus_allowed > 1) { | 519 | if (rt_se->nr_cpus_allowed > 1) { |
399 | struct rq *rq = rq_of_rt_rq(rt_rq); | 520 | struct rq *rq = rq_of_rt_rq(rt_rq); |
521 | |||
400 | rq->rt.rt_nr_migratory++; | 522 | rq->rt.rt_nr_migratory++; |
401 | } | 523 | } |
402 | 524 | ||
@@ -416,6 +538,10 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
416 | static inline | 538 | static inline |
417 | void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | 539 | void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) |
418 | { | 540 | { |
541 | #ifdef CONFIG_SMP | ||
542 | int highest_prio = rt_rq->highest_prio; | ||
543 | #endif | ||
544 | |||
419 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); | 545 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); |
420 | WARN_ON(!rt_rq->rt_nr_running); | 546 | WARN_ON(!rt_rq->rt_nr_running); |
421 | rt_rq->rt_nr_running--; | 547 | rt_rq->rt_nr_running--; |
@@ -439,6 +565,14 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
439 | rq->rt.rt_nr_migratory--; | 565 | rq->rt.rt_nr_migratory--; |
440 | } | 566 | } |
441 | 567 | ||
568 | if (rt_rq->highest_prio != highest_prio) { | ||
569 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
570 | |||
571 | if (rq->online) | ||
572 | cpupri_set(&rq->rd->cpupri, rq->cpu, | ||
573 | rt_rq->highest_prio); | ||
574 | } | ||
575 | |||
442 | update_rt_migration(rq_of_rt_rq(rt_rq)); | 576 | update_rt_migration(rq_of_rt_rq(rt_rq)); |
443 | #endif /* CONFIG_SMP */ | 577 | #endif /* CONFIG_SMP */ |
444 | #ifdef CONFIG_RT_GROUP_SCHED | 578 | #ifdef CONFIG_RT_GROUP_SCHED |
@@ -449,22 +583,33 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
449 | #endif | 583 | #endif |
450 | } | 584 | } |
451 | 585 | ||
452 | static void enqueue_rt_entity(struct sched_rt_entity *rt_se) | 586 | static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) |
453 | { | 587 | { |
454 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); | 588 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); |
455 | struct rt_prio_array *array = &rt_rq->active; | 589 | struct rt_prio_array *array = &rt_rq->active; |
456 | struct rt_rq *group_rq = group_rt_rq(rt_se); | 590 | struct rt_rq *group_rq = group_rt_rq(rt_se); |
591 | struct list_head *queue = array->queue + rt_se_prio(rt_se); | ||
457 | 592 | ||
458 | if (group_rq && rt_rq_throttled(group_rq)) | 593 | /* |
594 | * Don't enqueue the group if its throttled, or when empty. | ||
595 | * The latter is a consequence of the former when a child group | ||
596 | * get throttled and the current group doesn't have any other | ||
597 | * active members. | ||
598 | */ | ||
599 | if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) | ||
459 | return; | 600 | return; |
460 | 601 | ||
461 | list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); | 602 | if (rt_se->nr_cpus_allowed == 1) |
603 | list_add(&rt_se->run_list, queue); | ||
604 | else | ||
605 | list_add_tail(&rt_se->run_list, queue); | ||
606 | |||
462 | __set_bit(rt_se_prio(rt_se), array->bitmap); | 607 | __set_bit(rt_se_prio(rt_se), array->bitmap); |
463 | 608 | ||
464 | inc_rt_tasks(rt_se, rt_rq); | 609 | inc_rt_tasks(rt_se, rt_rq); |
465 | } | 610 | } |
466 | 611 | ||
467 | static void dequeue_rt_entity(struct sched_rt_entity *rt_se) | 612 | static void __dequeue_rt_entity(struct sched_rt_entity *rt_se) |
468 | { | 613 | { |
469 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); | 614 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); |
470 | struct rt_prio_array *array = &rt_rq->active; | 615 | struct rt_prio_array *array = &rt_rq->active; |
@@ -480,11 +625,10 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se) | |||
480 | * Because the prio of an upper entry depends on the lower | 625 | * Because the prio of an upper entry depends on the lower |
481 | * entries, we must remove entries top - down. | 626 | * entries, we must remove entries top - down. |
482 | */ | 627 | */ |
483 | static void dequeue_rt_stack(struct task_struct *p) | 628 | static void dequeue_rt_stack(struct sched_rt_entity *rt_se) |
484 | { | 629 | { |
485 | struct sched_rt_entity *rt_se, *back = NULL; | 630 | struct sched_rt_entity *back = NULL; |
486 | 631 | ||
487 | rt_se = &p->rt; | ||
488 | for_each_sched_rt_entity(rt_se) { | 632 | for_each_sched_rt_entity(rt_se) { |
489 | rt_se->back = back; | 633 | rt_se->back = back; |
490 | back = rt_se; | 634 | back = rt_se; |
@@ -492,7 +636,26 @@ static void dequeue_rt_stack(struct task_struct *p) | |||
492 | 636 | ||
493 | for (rt_se = back; rt_se; rt_se = rt_se->back) { | 637 | for (rt_se = back; rt_se; rt_se = rt_se->back) { |
494 | if (on_rt_rq(rt_se)) | 638 | if (on_rt_rq(rt_se)) |
495 | dequeue_rt_entity(rt_se); | 639 | __dequeue_rt_entity(rt_se); |
640 | } | ||
641 | } | ||
642 | |||
643 | static void enqueue_rt_entity(struct sched_rt_entity *rt_se) | ||
644 | { | ||
645 | dequeue_rt_stack(rt_se); | ||
646 | for_each_sched_rt_entity(rt_se) | ||
647 | __enqueue_rt_entity(rt_se); | ||
648 | } | ||
649 | |||
650 | static void dequeue_rt_entity(struct sched_rt_entity *rt_se) | ||
651 | { | ||
652 | dequeue_rt_stack(rt_se); | ||
653 | |||
654 | for_each_sched_rt_entity(rt_se) { | ||
655 | struct rt_rq *rt_rq = group_rt_rq(rt_se); | ||
656 | |||
657 | if (rt_rq && rt_rq->rt_nr_running) | ||
658 | __enqueue_rt_entity(rt_se); | ||
496 | } | 659 | } |
497 | } | 660 | } |
498 | 661 | ||
@@ -506,32 +669,19 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) | |||
506 | if (wakeup) | 669 | if (wakeup) |
507 | rt_se->timeout = 0; | 670 | rt_se->timeout = 0; |
508 | 671 | ||
509 | dequeue_rt_stack(p); | 672 | enqueue_rt_entity(rt_se); |
510 | 673 | ||
511 | /* | 674 | inc_cpu_load(rq, p->se.load.weight); |
512 | * enqueue everybody, bottom - up. | ||
513 | */ | ||
514 | for_each_sched_rt_entity(rt_se) | ||
515 | enqueue_rt_entity(rt_se); | ||
516 | } | 675 | } |
517 | 676 | ||
518 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) | 677 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) |
519 | { | 678 | { |
520 | struct sched_rt_entity *rt_se = &p->rt; | 679 | struct sched_rt_entity *rt_se = &p->rt; |
521 | struct rt_rq *rt_rq; | ||
522 | 680 | ||
523 | update_curr_rt(rq); | 681 | update_curr_rt(rq); |
682 | dequeue_rt_entity(rt_se); | ||
524 | 683 | ||
525 | dequeue_rt_stack(p); | 684 | dec_cpu_load(rq, p->se.load.weight); |
526 | |||
527 | /* | ||
528 | * re-enqueue all non-empty rt_rq entities. | ||
529 | */ | ||
530 | for_each_sched_rt_entity(rt_se) { | ||
531 | rt_rq = group_rt_rq(rt_se); | ||
532 | if (rt_rq && rt_rq->rt_nr_running) | ||
533 | enqueue_rt_entity(rt_se); | ||
534 | } | ||
535 | } | 685 | } |
536 | 686 | ||
537 | /* | 687 | /* |
@@ -543,7 +693,11 @@ void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) | |||
543 | { | 693 | { |
544 | struct rt_prio_array *array = &rt_rq->active; | 694 | struct rt_prio_array *array = &rt_rq->active; |
545 | 695 | ||
546 | list_move_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); | 696 | if (on_rt_rq(rt_se)) { |
697 | list_del_init(&rt_se->run_list); | ||
698 | list_add_tail(&rt_se->run_list, | ||
699 | array->queue + rt_se_prio(rt_se)); | ||
700 | } | ||
547 | } | 701 | } |
548 | 702 | ||
549 | static void requeue_task_rt(struct rq *rq, struct task_struct *p) | 703 | static void requeue_task_rt(struct rq *rq, struct task_struct *p) |
@@ -606,8 +760,37 @@ static int select_task_rq_rt(struct task_struct *p, int sync) | |||
606 | */ | 760 | */ |
607 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) | 761 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) |
608 | { | 762 | { |
609 | if (p->prio < rq->curr->prio) | 763 | if (p->prio < rq->curr->prio) { |
610 | resched_task(rq->curr); | 764 | resched_task(rq->curr); |
765 | return; | ||
766 | } | ||
767 | |||
768 | #ifdef CONFIG_SMP | ||
769 | /* | ||
770 | * If: | ||
771 | * | ||
772 | * - the newly woken task is of equal priority to the current task | ||
773 | * - the newly woken task is non-migratable while current is migratable | ||
774 | * - current will be preempted on the next reschedule | ||
775 | * | ||
776 | * we should check to see if current can readily move to a different | ||
777 | * cpu. If so, we will reschedule to allow the push logic to try | ||
778 | * to move current somewhere else, making room for our non-migratable | ||
779 | * task. | ||
780 | */ | ||
781 | if((p->prio == rq->curr->prio) | ||
782 | && p->rt.nr_cpus_allowed == 1 | ||
783 | && rq->curr->rt.nr_cpus_allowed != 1) { | ||
784 | cpumask_t mask; | ||
785 | |||
786 | if (cpupri_find(&rq->rd->cpupri, rq->curr, &mask)) | ||
787 | /* | ||
788 | * There appears to be other cpus that can accept | ||
789 | * current, so lets reschedule to try and push it away | ||
790 | */ | ||
791 | resched_task(rq->curr); | ||
792 | } | ||
793 | #endif | ||
611 | } | 794 | } |
612 | 795 | ||
613 | static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, | 796 | static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, |
@@ -710,73 +893,6 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) | |||
710 | 893 | ||
711 | static DEFINE_PER_CPU(cpumask_t, local_cpu_mask); | 894 | static DEFINE_PER_CPU(cpumask_t, local_cpu_mask); |
712 | 895 | ||
713 | static int find_lowest_cpus(struct task_struct *task, cpumask_t *lowest_mask) | ||
714 | { | ||
715 | int lowest_prio = -1; | ||
716 | int lowest_cpu = -1; | ||
717 | int count = 0; | ||
718 | int cpu; | ||
719 | |||
720 | cpus_and(*lowest_mask, task_rq(task)->rd->online, task->cpus_allowed); | ||
721 | |||
722 | /* | ||
723 | * Scan each rq for the lowest prio. | ||
724 | */ | ||
725 | for_each_cpu_mask(cpu, *lowest_mask) { | ||
726 | struct rq *rq = cpu_rq(cpu); | ||
727 | |||
728 | /* We look for lowest RT prio or non-rt CPU */ | ||
729 | if (rq->rt.highest_prio >= MAX_RT_PRIO) { | ||
730 | /* | ||
731 | * if we already found a low RT queue | ||
732 | * and now we found this non-rt queue | ||
733 | * clear the mask and set our bit. | ||
734 | * Otherwise just return the queue as is | ||
735 | * and the count==1 will cause the algorithm | ||
736 | * to use the first bit found. | ||
737 | */ | ||
738 | if (lowest_cpu != -1) { | ||
739 | cpus_clear(*lowest_mask); | ||
740 | cpu_set(rq->cpu, *lowest_mask); | ||
741 | } | ||
742 | return 1; | ||
743 | } | ||
744 | |||
745 | /* no locking for now */ | ||
746 | if ((rq->rt.highest_prio > task->prio) | ||
747 | && (rq->rt.highest_prio >= lowest_prio)) { | ||
748 | if (rq->rt.highest_prio > lowest_prio) { | ||
749 | /* new low - clear old data */ | ||
750 | lowest_prio = rq->rt.highest_prio; | ||
751 | lowest_cpu = cpu; | ||
752 | count = 0; | ||
753 | } | ||
754 | count++; | ||
755 | } else | ||
756 | cpu_clear(cpu, *lowest_mask); | ||
757 | } | ||
758 | |||
759 | /* | ||
760 | * Clear out all the set bits that represent | ||
761 | * runqueues that were of higher prio than | ||
762 | * the lowest_prio. | ||
763 | */ | ||
764 | if (lowest_cpu > 0) { | ||
765 | /* | ||
766 | * Perhaps we could add another cpumask op to | ||
767 | * zero out bits. Like cpu_zero_bits(cpumask, nrbits); | ||
768 | * Then that could be optimized to use memset and such. | ||
769 | */ | ||
770 | for_each_cpu_mask(cpu, *lowest_mask) { | ||
771 | if (cpu >= lowest_cpu) | ||
772 | break; | ||
773 | cpu_clear(cpu, *lowest_mask); | ||
774 | } | ||
775 | } | ||
776 | |||
777 | return count; | ||
778 | } | ||
779 | |||
780 | static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) | 896 | static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) |
781 | { | 897 | { |
782 | int first; | 898 | int first; |
@@ -798,17 +914,12 @@ static int find_lowest_rq(struct task_struct *task) | |||
798 | cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask); | 914 | cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask); |
799 | int this_cpu = smp_processor_id(); | 915 | int this_cpu = smp_processor_id(); |
800 | int cpu = task_cpu(task); | 916 | int cpu = task_cpu(task); |
801 | int count = find_lowest_cpus(task, lowest_mask); | ||
802 | 917 | ||
803 | if (!count) | 918 | if (task->rt.nr_cpus_allowed == 1) |
804 | return -1; /* No targets found */ | 919 | return -1; /* No other targets possible */ |
805 | 920 | ||
806 | /* | 921 | if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) |
807 | * There is no sense in performing an optimal search if only one | 922 | return -1; /* No targets found */ |
808 | * target is found. | ||
809 | */ | ||
810 | if (count == 1) | ||
811 | return first_cpu(*lowest_mask); | ||
812 | 923 | ||
813 | /* | 924 | /* |
814 | * At this point we have built a mask of cpus representing the | 925 | * At this point we have built a mask of cpus representing the |
@@ -1153,17 +1264,25 @@ static void set_cpus_allowed_rt(struct task_struct *p, | |||
1153 | } | 1264 | } |
1154 | 1265 | ||
1155 | /* Assumes rq->lock is held */ | 1266 | /* Assumes rq->lock is held */ |
1156 | static void join_domain_rt(struct rq *rq) | 1267 | static void rq_online_rt(struct rq *rq) |
1157 | { | 1268 | { |
1158 | if (rq->rt.overloaded) | 1269 | if (rq->rt.overloaded) |
1159 | rt_set_overload(rq); | 1270 | rt_set_overload(rq); |
1271 | |||
1272 | __enable_runtime(rq); | ||
1273 | |||
1274 | cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio); | ||
1160 | } | 1275 | } |
1161 | 1276 | ||
1162 | /* Assumes rq->lock is held */ | 1277 | /* Assumes rq->lock is held */ |
1163 | static void leave_domain_rt(struct rq *rq) | 1278 | static void rq_offline_rt(struct rq *rq) |
1164 | { | 1279 | { |
1165 | if (rq->rt.overloaded) | 1280 | if (rq->rt.overloaded) |
1166 | rt_clear_overload(rq); | 1281 | rt_clear_overload(rq); |
1282 | |||
1283 | __disable_runtime(rq); | ||
1284 | |||
1285 | cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID); | ||
1167 | } | 1286 | } |
1168 | 1287 | ||
1169 | /* | 1288 | /* |
@@ -1326,8 +1445,8 @@ static const struct sched_class rt_sched_class = { | |||
1326 | .load_balance = load_balance_rt, | 1445 | .load_balance = load_balance_rt, |
1327 | .move_one_task = move_one_task_rt, | 1446 | .move_one_task = move_one_task_rt, |
1328 | .set_cpus_allowed = set_cpus_allowed_rt, | 1447 | .set_cpus_allowed = set_cpus_allowed_rt, |
1329 | .join_domain = join_domain_rt, | 1448 | .rq_online = rq_online_rt, |
1330 | .leave_domain = leave_domain_rt, | 1449 | .rq_offline = rq_offline_rt, |
1331 | .pre_schedule = pre_schedule_rt, | 1450 | .pre_schedule = pre_schedule_rt, |
1332 | .post_schedule = post_schedule_rt, | 1451 | .post_schedule = post_schedule_rt, |
1333 | .task_wake_up = task_wake_up_rt, | 1452 | .task_wake_up = task_wake_up_rt, |
@@ -1340,3 +1459,17 @@ static const struct sched_class rt_sched_class = { | |||
1340 | .prio_changed = prio_changed_rt, | 1459 | .prio_changed = prio_changed_rt, |
1341 | .switched_to = switched_to_rt, | 1460 | .switched_to = switched_to_rt, |
1342 | }; | 1461 | }; |
1462 | |||
1463 | #ifdef CONFIG_SCHED_DEBUG | ||
1464 | extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq); | ||
1465 | |||
1466 | static void print_rt_stats(struct seq_file *m, int cpu) | ||
1467 | { | ||
1468 | struct rt_rq *rt_rq; | ||
1469 | |||
1470 | rcu_read_lock(); | ||
1471 | for_each_leaf_rt_rq(rt_rq, cpu_rq(cpu)) | ||
1472 | print_rt_rq(m, cpu, rt_rq); | ||
1473 | rcu_read_unlock(); | ||
1474 | } | ||
1475 | #endif /* CONFIG_SCHED_DEBUG */ | ||