aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched_rt.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched_rt.c')
-rw-r--r--kernel/sched_rt.c436
1 files changed, 290 insertions, 146 deletions
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 0f3c19197fa4..f85a76363eee 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -12,6 +12,9 @@ static inline int rt_overloaded(struct rq *rq)
12 12
13static inline void rt_set_overload(struct rq *rq) 13static inline void rt_set_overload(struct rq *rq)
14{ 14{
15 if (!rq->online)
16 return;
17
15 cpu_set(rq->cpu, rq->rd->rto_mask); 18 cpu_set(rq->cpu, rq->rd->rto_mask);
16 /* 19 /*
17 * Make sure the mask is visible before we set 20 * Make sure the mask is visible before we set
@@ -26,6 +29,9 @@ static inline void rt_set_overload(struct rq *rq)
26 29
27static inline void rt_clear_overload(struct rq *rq) 30static inline void rt_clear_overload(struct rq *rq)
28{ 31{
32 if (!rq->online)
33 return;
34
29 /* the order here really doesn't matter */ 35 /* the order here really doesn't matter */
30 atomic_dec(&rq->rd->rto_count); 36 atomic_dec(&rq->rd->rto_count);
31 cpu_clear(rq->cpu, rq->rd->rto_mask); 37 cpu_clear(rq->cpu, rq->rd->rto_mask);
@@ -155,7 +161,7 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
155 return &rt_rq->tg->rt_bandwidth; 161 return &rt_rq->tg->rt_bandwidth;
156} 162}
157 163
158#else 164#else /* !CONFIG_RT_GROUP_SCHED */
159 165
160static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) 166static inline u64 sched_rt_runtime(struct rt_rq *rt_rq)
161{ 167{
@@ -220,7 +226,160 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)
220 return &def_rt_bandwidth; 226 return &def_rt_bandwidth;
221} 227}
222 228
223#endif 229#endif /* CONFIG_RT_GROUP_SCHED */
230
231#ifdef CONFIG_SMP
232static int do_balance_runtime(struct rt_rq *rt_rq)
233{
234 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
235 struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
236 int i, weight, more = 0;
237 u64 rt_period;
238
239 weight = cpus_weight(rd->span);
240
241 spin_lock(&rt_b->rt_runtime_lock);
242 rt_period = ktime_to_ns(rt_b->rt_period);
243 for_each_cpu_mask_nr(i, rd->span) {
244 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
245 s64 diff;
246
247 if (iter == rt_rq)
248 continue;
249
250 spin_lock(&iter->rt_runtime_lock);
251 if (iter->rt_runtime == RUNTIME_INF)
252 goto next;
253
254 diff = iter->rt_runtime - iter->rt_time;
255 if (diff > 0) {
256 do_div(diff, weight);
257 if (rt_rq->rt_runtime + diff > rt_period)
258 diff = rt_period - rt_rq->rt_runtime;
259 iter->rt_runtime -= diff;
260 rt_rq->rt_runtime += diff;
261 more = 1;
262 if (rt_rq->rt_runtime == rt_period) {
263 spin_unlock(&iter->rt_runtime_lock);
264 break;
265 }
266 }
267next:
268 spin_unlock(&iter->rt_runtime_lock);
269 }
270 spin_unlock(&rt_b->rt_runtime_lock);
271
272 return more;
273}
274
275static void __disable_runtime(struct rq *rq)
276{
277 struct root_domain *rd = rq->rd;
278 struct rt_rq *rt_rq;
279
280 if (unlikely(!scheduler_running))
281 return;
282
283 for_each_leaf_rt_rq(rt_rq, rq) {
284 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
285 s64 want;
286 int i;
287
288 spin_lock(&rt_b->rt_runtime_lock);
289 spin_lock(&rt_rq->rt_runtime_lock);
290 if (rt_rq->rt_runtime == RUNTIME_INF ||
291 rt_rq->rt_runtime == rt_b->rt_runtime)
292 goto balanced;
293 spin_unlock(&rt_rq->rt_runtime_lock);
294
295 want = rt_b->rt_runtime - rt_rq->rt_runtime;
296
297 for_each_cpu_mask(i, rd->span) {
298 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
299 s64 diff;
300
301 if (iter == rt_rq)
302 continue;
303
304 spin_lock(&iter->rt_runtime_lock);
305 if (want > 0) {
306 diff = min_t(s64, iter->rt_runtime, want);
307 iter->rt_runtime -= diff;
308 want -= diff;
309 } else {
310 iter->rt_runtime -= want;
311 want -= want;
312 }
313 spin_unlock(&iter->rt_runtime_lock);
314
315 if (!want)
316 break;
317 }
318
319 spin_lock(&rt_rq->rt_runtime_lock);
320 BUG_ON(want);
321balanced:
322 rt_rq->rt_runtime = RUNTIME_INF;
323 spin_unlock(&rt_rq->rt_runtime_lock);
324 spin_unlock(&rt_b->rt_runtime_lock);
325 }
326}
327
328static void disable_runtime(struct rq *rq)
329{
330 unsigned long flags;
331
332 spin_lock_irqsave(&rq->lock, flags);
333 __disable_runtime(rq);
334 spin_unlock_irqrestore(&rq->lock, flags);
335}
336
337static void __enable_runtime(struct rq *rq)
338{
339 struct rt_rq *rt_rq;
340
341 if (unlikely(!scheduler_running))
342 return;
343
344 for_each_leaf_rt_rq(rt_rq, rq) {
345 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
346
347 spin_lock(&rt_b->rt_runtime_lock);
348 spin_lock(&rt_rq->rt_runtime_lock);
349 rt_rq->rt_runtime = rt_b->rt_runtime;
350 rt_rq->rt_time = 0;
351 spin_unlock(&rt_rq->rt_runtime_lock);
352 spin_unlock(&rt_b->rt_runtime_lock);
353 }
354}
355
356static void enable_runtime(struct rq *rq)
357{
358 unsigned long flags;
359
360 spin_lock_irqsave(&rq->lock, flags);
361 __enable_runtime(rq);
362 spin_unlock_irqrestore(&rq->lock, flags);
363}
364
365static int balance_runtime(struct rt_rq *rt_rq)
366{
367 int more = 0;
368
369 if (rt_rq->rt_time > rt_rq->rt_runtime) {
370 spin_unlock(&rt_rq->rt_runtime_lock);
371 more = do_balance_runtime(rt_rq);
372 spin_lock(&rt_rq->rt_runtime_lock);
373 }
374
375 return more;
376}
377#else /* !CONFIG_SMP */
378static inline int balance_runtime(struct rt_rq *rt_rq)
379{
380 return 0;
381}
382#endif /* CONFIG_SMP */
224 383
225static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) 384static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
226{ 385{
@@ -241,6 +400,8 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
241 u64 runtime; 400 u64 runtime;
242 401
243 spin_lock(&rt_rq->rt_runtime_lock); 402 spin_lock(&rt_rq->rt_runtime_lock);
403 if (rt_rq->rt_throttled)
404 balance_runtime(rt_rq);
244 runtime = rt_rq->rt_runtime; 405 runtime = rt_rq->rt_runtime;
245 rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); 406 rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime);
246 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { 407 if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) {
@@ -261,47 +422,6 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun)
261 return idle; 422 return idle;
262} 423}
263 424
264#ifdef CONFIG_SMP
265static int balance_runtime(struct rt_rq *rt_rq)
266{
267 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
268 struct root_domain *rd = cpu_rq(smp_processor_id())->rd;
269 int i, weight, more = 0;
270 u64 rt_period;
271
272 weight = cpus_weight(rd->span);
273
274 spin_lock(&rt_b->rt_runtime_lock);
275 rt_period = ktime_to_ns(rt_b->rt_period);
276 for_each_cpu_mask(i, rd->span) {
277 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
278 s64 diff;
279
280 if (iter == rt_rq)
281 continue;
282
283 spin_lock(&iter->rt_runtime_lock);
284 diff = iter->rt_runtime - iter->rt_time;
285 if (diff > 0) {
286 do_div(diff, weight);
287 if (rt_rq->rt_runtime + diff > rt_period)
288 diff = rt_period - rt_rq->rt_runtime;
289 iter->rt_runtime -= diff;
290 rt_rq->rt_runtime += diff;
291 more = 1;
292 if (rt_rq->rt_runtime == rt_period) {
293 spin_unlock(&iter->rt_runtime_lock);
294 break;
295 }
296 }
297 spin_unlock(&iter->rt_runtime_lock);
298 }
299 spin_unlock(&rt_b->rt_runtime_lock);
300
301 return more;
302}
303#endif
304
305static inline int rt_se_prio(struct sched_rt_entity *rt_se) 425static inline int rt_se_prio(struct sched_rt_entity *rt_se)
306{ 426{
307#ifdef CONFIG_RT_GROUP_SCHED 427#ifdef CONFIG_RT_GROUP_SCHED
@@ -327,18 +447,10 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
327 if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq)) 447 if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq))
328 return 0; 448 return 0;
329 449
330#ifdef CONFIG_SMP 450 balance_runtime(rt_rq);
331 if (rt_rq->rt_time > runtime) { 451 runtime = sched_rt_runtime(rt_rq);
332 int more; 452 if (runtime == RUNTIME_INF)
333 453 return 0;
334 spin_unlock(&rt_rq->rt_runtime_lock);
335 more = balance_runtime(rt_rq);
336 spin_lock(&rt_rq->rt_runtime_lock);
337
338 if (more)
339 runtime = sched_rt_runtime(rt_rq);
340 }
341#endif
342 454
343 if (rt_rq->rt_time > runtime) { 455 if (rt_rq->rt_time > runtime) {
344 rt_rq->rt_throttled = 1; 456 rt_rq->rt_throttled = 1;
@@ -392,12 +504,23 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
392 WARN_ON(!rt_prio(rt_se_prio(rt_se))); 504 WARN_ON(!rt_prio(rt_se_prio(rt_se)));
393 rt_rq->rt_nr_running++; 505 rt_rq->rt_nr_running++;
394#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED 506#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
395 if (rt_se_prio(rt_se) < rt_rq->highest_prio) 507 if (rt_se_prio(rt_se) < rt_rq->highest_prio) {
508#ifdef CONFIG_SMP
509 struct rq *rq = rq_of_rt_rq(rt_rq);
510#endif
511
396 rt_rq->highest_prio = rt_se_prio(rt_se); 512 rt_rq->highest_prio = rt_se_prio(rt_se);
513#ifdef CONFIG_SMP
514 if (rq->online)
515 cpupri_set(&rq->rd->cpupri, rq->cpu,
516 rt_se_prio(rt_se));
517#endif
518 }
397#endif 519#endif
398#ifdef CONFIG_SMP 520#ifdef CONFIG_SMP
399 if (rt_se->nr_cpus_allowed > 1) { 521 if (rt_se->nr_cpus_allowed > 1) {
400 struct rq *rq = rq_of_rt_rq(rt_rq); 522 struct rq *rq = rq_of_rt_rq(rt_rq);
523
401 rq->rt.rt_nr_migratory++; 524 rq->rt.rt_nr_migratory++;
402 } 525 }
403 526
@@ -417,6 +540,10 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
417static inline 540static inline
418void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 541void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
419{ 542{
543#ifdef CONFIG_SMP
544 int highest_prio = rt_rq->highest_prio;
545#endif
546
420 WARN_ON(!rt_prio(rt_se_prio(rt_se))); 547 WARN_ON(!rt_prio(rt_se_prio(rt_se)));
421 WARN_ON(!rt_rq->rt_nr_running); 548 WARN_ON(!rt_rq->rt_nr_running);
422 rt_rq->rt_nr_running--; 549 rt_rq->rt_nr_running--;
@@ -440,6 +567,14 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
440 rq->rt.rt_nr_migratory--; 567 rq->rt.rt_nr_migratory--;
441 } 568 }
442 569
570 if (rt_rq->highest_prio != highest_prio) {
571 struct rq *rq = rq_of_rt_rq(rt_rq);
572
573 if (rq->online)
574 cpupri_set(&rq->rd->cpupri, rq->cpu,
575 rt_rq->highest_prio);
576 }
577
443 update_rt_migration(rq_of_rt_rq(rt_rq)); 578 update_rt_migration(rq_of_rt_rq(rt_rq));
444#endif /* CONFIG_SMP */ 579#endif /* CONFIG_SMP */
445#ifdef CONFIG_RT_GROUP_SCHED 580#ifdef CONFIG_RT_GROUP_SCHED
@@ -455,6 +590,7 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
455 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 590 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
456 struct rt_prio_array *array = &rt_rq->active; 591 struct rt_prio_array *array = &rt_rq->active;
457 struct rt_rq *group_rq = group_rt_rq(rt_se); 592 struct rt_rq *group_rq = group_rt_rq(rt_se);
593 struct list_head *queue = array->queue + rt_se_prio(rt_se);
458 594
459 /* 595 /*
460 * Don't enqueue the group if its throttled, or when empty. 596 * Don't enqueue the group if its throttled, or when empty.
@@ -465,7 +601,7 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
465 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) 601 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
466 return; 602 return;
467 603
468 list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); 604 list_add_tail(&rt_se->run_list, queue);
469 __set_bit(rt_se_prio(rt_se), array->bitmap); 605 __set_bit(rt_se_prio(rt_se), array->bitmap);
470 606
471 inc_rt_tasks(rt_se, rt_rq); 607 inc_rt_tasks(rt_se, rt_rq);
@@ -532,6 +668,8 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
532 rt_se->timeout = 0; 668 rt_se->timeout = 0;
533 669
534 enqueue_rt_entity(rt_se); 670 enqueue_rt_entity(rt_se);
671
672 inc_cpu_load(rq, p->se.load.weight);
535} 673}
536 674
537static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) 675static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
@@ -540,36 +678,42 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep)
540 678
541 update_curr_rt(rq); 679 update_curr_rt(rq);
542 dequeue_rt_entity(rt_se); 680 dequeue_rt_entity(rt_se);
681
682 dec_cpu_load(rq, p->se.load.weight);
543} 683}
544 684
545/* 685/*
546 * Put task to the end of the run list without the overhead of dequeue 686 * Put task to the end of the run list without the overhead of dequeue
547 * followed by enqueue. 687 * followed by enqueue.
548 */ 688 */
549static 689static void
550void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) 690requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head)
551{ 691{
552 struct rt_prio_array *array = &rt_rq->active; 692 if (on_rt_rq(rt_se)) {
553 struct list_head *queue = array->queue + rt_se_prio(rt_se); 693 struct rt_prio_array *array = &rt_rq->active;
694 struct list_head *queue = array->queue + rt_se_prio(rt_se);
554 695
555 if (on_rt_rq(rt_se)) 696 if (head)
556 list_move_tail(&rt_se->run_list, queue); 697 list_move(&rt_se->run_list, queue);
698 else
699 list_move_tail(&rt_se->run_list, queue);
700 }
557} 701}
558 702
559static void requeue_task_rt(struct rq *rq, struct task_struct *p) 703static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head)
560{ 704{
561 struct sched_rt_entity *rt_se = &p->rt; 705 struct sched_rt_entity *rt_se = &p->rt;
562 struct rt_rq *rt_rq; 706 struct rt_rq *rt_rq;
563 707
564 for_each_sched_rt_entity(rt_se) { 708 for_each_sched_rt_entity(rt_se) {
565 rt_rq = rt_rq_of_se(rt_se); 709 rt_rq = rt_rq_of_se(rt_se);
566 requeue_rt_entity(rt_rq, rt_se); 710 requeue_rt_entity(rt_rq, rt_se, head);
567 } 711 }
568} 712}
569 713
570static void yield_task_rt(struct rq *rq) 714static void yield_task_rt(struct rq *rq)
571{ 715{
572 requeue_task_rt(rq, rq->curr); 716 requeue_task_rt(rq, rq->curr, 0);
573} 717}
574 718
575#ifdef CONFIG_SMP 719#ifdef CONFIG_SMP
@@ -609,6 +753,30 @@ static int select_task_rq_rt(struct task_struct *p, int sync)
609 */ 753 */
610 return task_cpu(p); 754 return task_cpu(p);
611} 755}
756
757static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
758{
759 cpumask_t mask;
760
761 if (rq->curr->rt.nr_cpus_allowed == 1)
762 return;
763
764 if (p->rt.nr_cpus_allowed != 1
765 && cpupri_find(&rq->rd->cpupri, p, &mask))
766 return;
767
768 if (!cpupri_find(&rq->rd->cpupri, rq->curr, &mask))
769 return;
770
771 /*
772 * There appears to be other cpus that can accept
773 * current and none to run 'p', so lets reschedule
774 * to try and push current away:
775 */
776 requeue_task_rt(rq, p, 1);
777 resched_task(rq->curr);
778}
779
612#endif /* CONFIG_SMP */ 780#endif /* CONFIG_SMP */
613 781
614/* 782/*
@@ -616,8 +784,27 @@ static int select_task_rq_rt(struct task_struct *p, int sync)
616 */ 784 */
617static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) 785static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
618{ 786{
619 if (p->prio < rq->curr->prio) 787 if (p->prio < rq->curr->prio) {
620 resched_task(rq->curr); 788 resched_task(rq->curr);
789 return;
790 }
791
792#ifdef CONFIG_SMP
793 /*
794 * If:
795 *
796 * - the newly woken task is of equal priority to the current task
797 * - the newly woken task is non-migratable while current is migratable
798 * - current will be preempted on the next reschedule
799 *
800 * we should check to see if current can readily move to a different
801 * cpu. If so, we will reschedule to allow the push logic to try
802 * to move current somewhere else, making room for our non-migratable
803 * task.
804 */
805 if (p->prio == rq->curr->prio && !need_resched())
806 check_preempt_equal_prio(rq, p);
807#endif
621} 808}
622 809
623static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, 810static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
@@ -720,73 +907,6 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
720 907
721static DEFINE_PER_CPU(cpumask_t, local_cpu_mask); 908static DEFINE_PER_CPU(cpumask_t, local_cpu_mask);
722 909
723static int find_lowest_cpus(struct task_struct *task, cpumask_t *lowest_mask)
724{
725 int lowest_prio = -1;
726 int lowest_cpu = -1;
727 int count = 0;
728 int cpu;
729
730 cpus_and(*lowest_mask, task_rq(task)->rd->online, task->cpus_allowed);
731
732 /*
733 * Scan each rq for the lowest prio.
734 */
735 for_each_cpu_mask(cpu, *lowest_mask) {
736 struct rq *rq = cpu_rq(cpu);
737
738 /* We look for lowest RT prio or non-rt CPU */
739 if (rq->rt.highest_prio >= MAX_RT_PRIO) {
740 /*
741 * if we already found a low RT queue
742 * and now we found this non-rt queue
743 * clear the mask and set our bit.
744 * Otherwise just return the queue as is
745 * and the count==1 will cause the algorithm
746 * to use the first bit found.
747 */
748 if (lowest_cpu != -1) {
749 cpus_clear(*lowest_mask);
750 cpu_set(rq->cpu, *lowest_mask);
751 }
752 return 1;
753 }
754
755 /* no locking for now */
756 if ((rq->rt.highest_prio > task->prio)
757 && (rq->rt.highest_prio >= lowest_prio)) {
758 if (rq->rt.highest_prio > lowest_prio) {
759 /* new low - clear old data */
760 lowest_prio = rq->rt.highest_prio;
761 lowest_cpu = cpu;
762 count = 0;
763 }
764 count++;
765 } else
766 cpu_clear(cpu, *lowest_mask);
767 }
768
769 /*
770 * Clear out all the set bits that represent
771 * runqueues that were of higher prio than
772 * the lowest_prio.
773 */
774 if (lowest_cpu > 0) {
775 /*
776 * Perhaps we could add another cpumask op to
777 * zero out bits. Like cpu_zero_bits(cpumask, nrbits);
778 * Then that could be optimized to use memset and such.
779 */
780 for_each_cpu_mask(cpu, *lowest_mask) {
781 if (cpu >= lowest_cpu)
782 break;
783 cpu_clear(cpu, *lowest_mask);
784 }
785 }
786
787 return count;
788}
789
790static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) 910static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
791{ 911{
792 int first; 912 int first;
@@ -808,17 +928,19 @@ static int find_lowest_rq(struct task_struct *task)
808 cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask); 928 cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask);
809 int this_cpu = smp_processor_id(); 929 int this_cpu = smp_processor_id();
810 int cpu = task_cpu(task); 930 int cpu = task_cpu(task);
811 int count = find_lowest_cpus(task, lowest_mask);
812 931
813 if (!count) 932 if (task->rt.nr_cpus_allowed == 1)
933 return -1; /* No other targets possible */
934
935 if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
814 return -1; /* No targets found */ 936 return -1; /* No targets found */
815 937
816 /* 938 /*
817 * There is no sense in performing an optimal search if only one 939 * Only consider CPUs that are usable for migration.
818 * target is found. 940 * I guess we might want to change cpupri_find() to ignore those
941 * in the first place.
819 */ 942 */
820 if (count == 1) 943 cpus_and(*lowest_mask, *lowest_mask, cpu_active_map);
821 return first_cpu(*lowest_mask);
822 944
823 /* 945 /*
824 * At this point we have built a mask of cpus representing the 946 * At this point we have built a mask of cpus representing the
@@ -1006,7 +1128,7 @@ static int pull_rt_task(struct rq *this_rq)
1006 1128
1007 next = pick_next_task_rt(this_rq); 1129 next = pick_next_task_rt(this_rq);
1008 1130
1009 for_each_cpu_mask(cpu, this_rq->rd->rto_mask) { 1131 for_each_cpu_mask_nr(cpu, this_rq->rd->rto_mask) {
1010 if (this_cpu == cpu) 1132 if (this_cpu == cpu)
1011 continue; 1133 continue;
1012 1134
@@ -1163,17 +1285,25 @@ static void set_cpus_allowed_rt(struct task_struct *p,
1163} 1285}
1164 1286
1165/* Assumes rq->lock is held */ 1287/* Assumes rq->lock is held */
1166static void join_domain_rt(struct rq *rq) 1288static void rq_online_rt(struct rq *rq)
1167{ 1289{
1168 if (rq->rt.overloaded) 1290 if (rq->rt.overloaded)
1169 rt_set_overload(rq); 1291 rt_set_overload(rq);
1292
1293 __enable_runtime(rq);
1294
1295 cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio);
1170} 1296}
1171 1297
1172/* Assumes rq->lock is held */ 1298/* Assumes rq->lock is held */
1173static void leave_domain_rt(struct rq *rq) 1299static void rq_offline_rt(struct rq *rq)
1174{ 1300{
1175 if (rq->rt.overloaded) 1301 if (rq->rt.overloaded)
1176 rt_clear_overload(rq); 1302 rt_clear_overload(rq);
1303
1304 __disable_runtime(rq);
1305
1306 cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID);
1177} 1307}
1178 1308
1179/* 1309/*
@@ -1306,7 +1436,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued)
1306 * on the queue: 1436 * on the queue:
1307 */ 1437 */
1308 if (p->rt.run_list.prev != p->rt.run_list.next) { 1438 if (p->rt.run_list.prev != p->rt.run_list.next) {
1309 requeue_task_rt(rq, p); 1439 requeue_task_rt(rq, p, 0);
1310 set_tsk_need_resched(p); 1440 set_tsk_need_resched(p);
1311 } 1441 }
1312} 1442}
@@ -1336,8 +1466,8 @@ static const struct sched_class rt_sched_class = {
1336 .load_balance = load_balance_rt, 1466 .load_balance = load_balance_rt,
1337 .move_one_task = move_one_task_rt, 1467 .move_one_task = move_one_task_rt,
1338 .set_cpus_allowed = set_cpus_allowed_rt, 1468 .set_cpus_allowed = set_cpus_allowed_rt,
1339 .join_domain = join_domain_rt, 1469 .rq_online = rq_online_rt,
1340 .leave_domain = leave_domain_rt, 1470 .rq_offline = rq_offline_rt,
1341 .pre_schedule = pre_schedule_rt, 1471 .pre_schedule = pre_schedule_rt,
1342 .post_schedule = post_schedule_rt, 1472 .post_schedule = post_schedule_rt,
1343 .task_wake_up = task_wake_up_rt, 1473 .task_wake_up = task_wake_up_rt,
@@ -1350,3 +1480,17 @@ static const struct sched_class rt_sched_class = {
1350 .prio_changed = prio_changed_rt, 1480 .prio_changed = prio_changed_rt,
1351 .switched_to = switched_to_rt, 1481 .switched_to = switched_to_rt,
1352}; 1482};
1483
1484#ifdef CONFIG_SCHED_DEBUG
1485extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq);
1486
1487static void print_rt_stats(struct seq_file *m, int cpu)
1488{
1489 struct rt_rq *rt_rq;
1490
1491 rcu_read_lock();
1492 for_each_leaf_rt_rq(rt_rq, cpu_rq(cpu))
1493 print_rt_rq(m, cpu, rt_rq);
1494 rcu_read_unlock();
1495}
1496#endif /* CONFIG_SCHED_DEBUG */