aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched_rt.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/sched_rt.c')
-rw-r--r--kernel/sched_rt.c275
1 files changed, 185 insertions, 90 deletions
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 1dad5bbb59b6..fee5fa7c72db 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -12,6 +12,9 @@ static inline int rt_overloaded(struct rq *rq)
12 12
13static inline void rt_set_overload(struct rq *rq) 13static inline void rt_set_overload(struct rq *rq)
14{ 14{
15 if (!rq->online)
16 return;
17
15 cpu_set(rq->cpu, rq->rd->rto_mask); 18 cpu_set(rq->cpu, rq->rd->rto_mask);
16 /* 19 /*
17 * Make sure the mask is visible before we set 20 * Make sure the mask is visible before we set
@@ -26,6 +29,9 @@ static inline void rt_set_overload(struct rq *rq)
26 29
27static inline void rt_clear_overload(struct rq *rq) 30static inline void rt_clear_overload(struct rq *rq)
28{ 31{
32 if (!rq->online)
33 return;
34
29 /* the order here really doesn't matter */ 35 /* the order here really doesn't matter */
30 atomic_dec(&rq->rd->rto_count); 36 atomic_dec(&rq->rd->rto_count);
31 cpu_clear(rq->cpu, rq->rd->rto_mask); 37 cpu_clear(rq->cpu, rq->rd->rto_mask);
@@ -280,6 +286,9 @@ static int balance_runtime(struct rt_rq *rt_rq)
280 continue; 286 continue;
281 287
282 spin_lock(&iter->rt_runtime_lock); 288 spin_lock(&iter->rt_runtime_lock);
289 if (iter->rt_runtime == RUNTIME_INF)
290 goto next;
291
283 diff = iter->rt_runtime - iter->rt_time; 292 diff = iter->rt_runtime - iter->rt_time;
284 if (diff > 0) { 293 if (diff > 0) {
285 do_div(diff, weight); 294 do_div(diff, weight);
@@ -293,12 +302,105 @@ static int balance_runtime(struct rt_rq *rt_rq)
293 break; 302 break;
294 } 303 }
295 } 304 }
305next:
296 spin_unlock(&iter->rt_runtime_lock); 306 spin_unlock(&iter->rt_runtime_lock);
297 } 307 }
298 spin_unlock(&rt_b->rt_runtime_lock); 308 spin_unlock(&rt_b->rt_runtime_lock);
299 309
300 return more; 310 return more;
301} 311}
312
313static void __disable_runtime(struct rq *rq)
314{
315 struct root_domain *rd = rq->rd;
316 struct rt_rq *rt_rq;
317
318 if (unlikely(!scheduler_running))
319 return;
320
321 for_each_leaf_rt_rq(rt_rq, rq) {
322 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
323 s64 want;
324 int i;
325
326 spin_lock(&rt_b->rt_runtime_lock);
327 spin_lock(&rt_rq->rt_runtime_lock);
328 if (rt_rq->rt_runtime == RUNTIME_INF ||
329 rt_rq->rt_runtime == rt_b->rt_runtime)
330 goto balanced;
331 spin_unlock(&rt_rq->rt_runtime_lock);
332
333 want = rt_b->rt_runtime - rt_rq->rt_runtime;
334
335 for_each_cpu_mask(i, rd->span) {
336 struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i);
337 s64 diff;
338
339 if (iter == rt_rq)
340 continue;
341
342 spin_lock(&iter->rt_runtime_lock);
343 if (want > 0) {
344 diff = min_t(s64, iter->rt_runtime, want);
345 iter->rt_runtime -= diff;
346 want -= diff;
347 } else {
348 iter->rt_runtime -= want;
349 want -= want;
350 }
351 spin_unlock(&iter->rt_runtime_lock);
352
353 if (!want)
354 break;
355 }
356
357 spin_lock(&rt_rq->rt_runtime_lock);
358 BUG_ON(want);
359balanced:
360 rt_rq->rt_runtime = RUNTIME_INF;
361 spin_unlock(&rt_rq->rt_runtime_lock);
362 spin_unlock(&rt_b->rt_runtime_lock);
363 }
364}
365
366static void disable_runtime(struct rq *rq)
367{
368 unsigned long flags;
369
370 spin_lock_irqsave(&rq->lock, flags);
371 __disable_runtime(rq);
372 spin_unlock_irqrestore(&rq->lock, flags);
373}
374
375static void __enable_runtime(struct rq *rq)
376{
377 struct root_domain *rd = rq->rd;
378 struct rt_rq *rt_rq;
379
380 if (unlikely(!scheduler_running))
381 return;
382
383 for_each_leaf_rt_rq(rt_rq, rq) {
384 struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq);
385
386 spin_lock(&rt_b->rt_runtime_lock);
387 spin_lock(&rt_rq->rt_runtime_lock);
388 rt_rq->rt_runtime = rt_b->rt_runtime;
389 rt_rq->rt_time = 0;
390 spin_unlock(&rt_rq->rt_runtime_lock);
391 spin_unlock(&rt_b->rt_runtime_lock);
392 }
393}
394
395static void enable_runtime(struct rq *rq)
396{
397 unsigned long flags;
398
399 spin_lock_irqsave(&rq->lock, flags);
400 __enable_runtime(rq);
401 spin_unlock_irqrestore(&rq->lock, flags);
402}
403
302#endif 404#endif
303 405
304static inline int rt_se_prio(struct sched_rt_entity *rt_se) 406static inline int rt_se_prio(struct sched_rt_entity *rt_se)
@@ -328,14 +430,13 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq)
328 430
329#ifdef CONFIG_SMP 431#ifdef CONFIG_SMP
330 if (rt_rq->rt_time > runtime) { 432 if (rt_rq->rt_time > runtime) {
331 int more;
332
333 spin_unlock(&rt_rq->rt_runtime_lock); 433 spin_unlock(&rt_rq->rt_runtime_lock);
334 more = balance_runtime(rt_rq); 434 balance_runtime(rt_rq);
335 spin_lock(&rt_rq->rt_runtime_lock); 435 spin_lock(&rt_rq->rt_runtime_lock);
336 436
337 if (more) 437 runtime = sched_rt_runtime(rt_rq);
338 runtime = sched_rt_runtime(rt_rq); 438 if (runtime == RUNTIME_INF)
439 return 0;
339 } 440 }
340#endif 441#endif
341 442
@@ -391,12 +492,21 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
391 WARN_ON(!rt_prio(rt_se_prio(rt_se))); 492 WARN_ON(!rt_prio(rt_se_prio(rt_se)));
392 rt_rq->rt_nr_running++; 493 rt_rq->rt_nr_running++;
393#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED 494#if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED
394 if (rt_se_prio(rt_se) < rt_rq->highest_prio) 495 if (rt_se_prio(rt_se) < rt_rq->highest_prio) {
496 struct rq *rq = rq_of_rt_rq(rt_rq);
497
395 rt_rq->highest_prio = rt_se_prio(rt_se); 498 rt_rq->highest_prio = rt_se_prio(rt_se);
499#ifdef CONFIG_SMP
500 if (rq->online)
501 cpupri_set(&rq->rd->cpupri, rq->cpu,
502 rt_se_prio(rt_se));
503#endif
504 }
396#endif 505#endif
397#ifdef CONFIG_SMP 506#ifdef CONFIG_SMP
398 if (rt_se->nr_cpus_allowed > 1) { 507 if (rt_se->nr_cpus_allowed > 1) {
399 struct rq *rq = rq_of_rt_rq(rt_rq); 508 struct rq *rq = rq_of_rt_rq(rt_rq);
509
400 rq->rt.rt_nr_migratory++; 510 rq->rt.rt_nr_migratory++;
401 } 511 }
402 512
@@ -416,6 +526,10 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
416static inline 526static inline
417void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) 527void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
418{ 528{
529#ifdef CONFIG_SMP
530 int highest_prio = rt_rq->highest_prio;
531#endif
532
419 WARN_ON(!rt_prio(rt_se_prio(rt_se))); 533 WARN_ON(!rt_prio(rt_se_prio(rt_se)));
420 WARN_ON(!rt_rq->rt_nr_running); 534 WARN_ON(!rt_rq->rt_nr_running);
421 rt_rq->rt_nr_running--; 535 rt_rq->rt_nr_running--;
@@ -439,6 +553,14 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
439 rq->rt.rt_nr_migratory--; 553 rq->rt.rt_nr_migratory--;
440 } 554 }
441 555
556 if (rt_rq->highest_prio != highest_prio) {
557 struct rq *rq = rq_of_rt_rq(rt_rq);
558
559 if (rq->online)
560 cpupri_set(&rq->rd->cpupri, rq->cpu,
561 rt_rq->highest_prio);
562 }
563
442 update_rt_migration(rq_of_rt_rq(rt_rq)); 564 update_rt_migration(rq_of_rt_rq(rt_rq));
443#endif /* CONFIG_SMP */ 565#endif /* CONFIG_SMP */
444#ifdef CONFIG_RT_GROUP_SCHED 566#ifdef CONFIG_RT_GROUP_SCHED
@@ -454,6 +576,7 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
454 struct rt_rq *rt_rq = rt_rq_of_se(rt_se); 576 struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
455 struct rt_prio_array *array = &rt_rq->active; 577 struct rt_prio_array *array = &rt_rq->active;
456 struct rt_rq *group_rq = group_rt_rq(rt_se); 578 struct rt_rq *group_rq = group_rt_rq(rt_se);
579 struct list_head *queue = array->queue + rt_se_prio(rt_se);
457 580
458 /* 581 /*
459 * Don't enqueue the group if its throttled, or when empty. 582 * Don't enqueue the group if its throttled, or when empty.
@@ -464,7 +587,11 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
464 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) 587 if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
465 return; 588 return;
466 589
467 list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); 590 if (rt_se->nr_cpus_allowed == 1)
591 list_add(&rt_se->run_list, queue);
592 else
593 list_add_tail(&rt_se->run_list, queue);
594
468 __set_bit(rt_se_prio(rt_se), array->bitmap); 595 __set_bit(rt_se_prio(rt_se), array->bitmap);
469 596
470 inc_rt_tasks(rt_se, rt_rq); 597 inc_rt_tasks(rt_se, rt_rq);
@@ -551,8 +678,11 @@ void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se)
551 struct rt_prio_array *array = &rt_rq->active; 678 struct rt_prio_array *array = &rt_rq->active;
552 struct list_head *queue = array->queue + rt_se_prio(rt_se); 679 struct list_head *queue = array->queue + rt_se_prio(rt_se);
553 680
554 if (on_rt_rq(rt_se)) 681 if (on_rt_rq(rt_se)) {
555 list_move_tail(&rt_se->run_list, queue); 682 list_del_init(&rt_se->run_list);
683 list_add_tail(&rt_se->run_list,
684 array->queue + rt_se_prio(rt_se));
685 }
556} 686}
557 687
558static void requeue_task_rt(struct rq *rq, struct task_struct *p) 688static void requeue_task_rt(struct rq *rq, struct task_struct *p)
@@ -615,8 +745,37 @@ static int select_task_rq_rt(struct task_struct *p, int sync)
615 */ 745 */
616static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) 746static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p)
617{ 747{
618 if (p->prio < rq->curr->prio) 748 if (p->prio < rq->curr->prio) {
619 resched_task(rq->curr); 749 resched_task(rq->curr);
750 return;
751 }
752
753#ifdef CONFIG_SMP
754 /*
755 * If:
756 *
757 * - the newly woken task is of equal priority to the current task
758 * - the newly woken task is non-migratable while current is migratable
759 * - current will be preempted on the next reschedule
760 *
761 * we should check to see if current can readily move to a different
762 * cpu. If so, we will reschedule to allow the push logic to try
763 * to move current somewhere else, making room for our non-migratable
764 * task.
765 */
766 if((p->prio == rq->curr->prio)
767 && p->rt.nr_cpus_allowed == 1
768 && rq->curr->rt.nr_cpus_allowed != 1) {
769 cpumask_t mask;
770
771 if (cpupri_find(&rq->rd->cpupri, rq->curr, &mask))
772 /*
773 * There appears to be other cpus that can accept
774 * current, so lets reschedule to try and push it away
775 */
776 resched_task(rq->curr);
777 }
778#endif
620} 779}
621 780
622static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, 781static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq,
@@ -719,73 +878,6 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu)
719 878
720static DEFINE_PER_CPU(cpumask_t, local_cpu_mask); 879static DEFINE_PER_CPU(cpumask_t, local_cpu_mask);
721 880
722static int find_lowest_cpus(struct task_struct *task, cpumask_t *lowest_mask)
723{
724 int lowest_prio = -1;
725 int lowest_cpu = -1;
726 int count = 0;
727 int cpu;
728
729 cpus_and(*lowest_mask, task_rq(task)->rd->online, task->cpus_allowed);
730
731 /*
732 * Scan each rq for the lowest prio.
733 */
734 for_each_cpu_mask(cpu, *lowest_mask) {
735 struct rq *rq = cpu_rq(cpu);
736
737 /* We look for lowest RT prio or non-rt CPU */
738 if (rq->rt.highest_prio >= MAX_RT_PRIO) {
739 /*
740 * if we already found a low RT queue
741 * and now we found this non-rt queue
742 * clear the mask and set our bit.
743 * Otherwise just return the queue as is
744 * and the count==1 will cause the algorithm
745 * to use the first bit found.
746 */
747 if (lowest_cpu != -1) {
748 cpus_clear(*lowest_mask);
749 cpu_set(rq->cpu, *lowest_mask);
750 }
751 return 1;
752 }
753
754 /* no locking for now */
755 if ((rq->rt.highest_prio > task->prio)
756 && (rq->rt.highest_prio >= lowest_prio)) {
757 if (rq->rt.highest_prio > lowest_prio) {
758 /* new low - clear old data */
759 lowest_prio = rq->rt.highest_prio;
760 lowest_cpu = cpu;
761 count = 0;
762 }
763 count++;
764 } else
765 cpu_clear(cpu, *lowest_mask);
766 }
767
768 /*
769 * Clear out all the set bits that represent
770 * runqueues that were of higher prio than
771 * the lowest_prio.
772 */
773 if (lowest_cpu > 0) {
774 /*
775 * Perhaps we could add another cpumask op to
776 * zero out bits. Like cpu_zero_bits(cpumask, nrbits);
777 * Then that could be optimized to use memset and such.
778 */
779 for_each_cpu_mask(cpu, *lowest_mask) {
780 if (cpu >= lowest_cpu)
781 break;
782 cpu_clear(cpu, *lowest_mask);
783 }
784 }
785
786 return count;
787}
788
789static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) 881static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask)
790{ 882{
791 int first; 883 int first;
@@ -807,17 +899,12 @@ static int find_lowest_rq(struct task_struct *task)
807 cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask); 899 cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask);
808 int this_cpu = smp_processor_id(); 900 int this_cpu = smp_processor_id();
809 int cpu = task_cpu(task); 901 int cpu = task_cpu(task);
810 int count = find_lowest_cpus(task, lowest_mask);
811 902
812 if (!count) 903 if (task->rt.nr_cpus_allowed == 1)
813 return -1; /* No targets found */ 904 return -1; /* No other targets possible */
814 905
815 /* 906 if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask))
816 * There is no sense in performing an optimal search if only one 907 return -1; /* No targets found */
817 * target is found.
818 */
819 if (count == 1)
820 return first_cpu(*lowest_mask);
821 908
822 /* 909 /*
823 * At this point we have built a mask of cpus representing the 910 * At this point we have built a mask of cpus representing the
@@ -1162,17 +1249,25 @@ static void set_cpus_allowed_rt(struct task_struct *p,
1162} 1249}
1163 1250
1164/* Assumes rq->lock is held */ 1251/* Assumes rq->lock is held */
1165static void join_domain_rt(struct rq *rq) 1252static void rq_online_rt(struct rq *rq)
1166{ 1253{
1167 if (rq->rt.overloaded) 1254 if (rq->rt.overloaded)
1168 rt_set_overload(rq); 1255 rt_set_overload(rq);
1256
1257 __enable_runtime(rq);
1258
1259 cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio);
1169} 1260}
1170 1261
1171/* Assumes rq->lock is held */ 1262/* Assumes rq->lock is held */
1172static void leave_domain_rt(struct rq *rq) 1263static void rq_offline_rt(struct rq *rq)
1173{ 1264{
1174 if (rq->rt.overloaded) 1265 if (rq->rt.overloaded)
1175 rt_clear_overload(rq); 1266 rt_clear_overload(rq);
1267
1268 __disable_runtime(rq);
1269
1270 cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID);
1176} 1271}
1177 1272
1178/* 1273/*
@@ -1335,8 +1430,8 @@ static const struct sched_class rt_sched_class = {
1335 .load_balance = load_balance_rt, 1430 .load_balance = load_balance_rt,
1336 .move_one_task = move_one_task_rt, 1431 .move_one_task = move_one_task_rt,
1337 .set_cpus_allowed = set_cpus_allowed_rt, 1432 .set_cpus_allowed = set_cpus_allowed_rt,
1338 .join_domain = join_domain_rt, 1433 .rq_online = rq_online_rt,
1339 .leave_domain = leave_domain_rt, 1434 .rq_offline = rq_offline_rt,
1340 .pre_schedule = pre_schedule_rt, 1435 .pre_schedule = pre_schedule_rt,
1341 .post_schedule = post_schedule_rt, 1436 .post_schedule = post_schedule_rt,
1342 .task_wake_up = task_wake_up_rt, 1437 .task_wake_up = task_wake_up_rt,