diff options
Diffstat (limited to 'kernel/sched_rt.c')
-rw-r--r-- | kernel/sched_rt.c | 275 |
1 files changed, 185 insertions, 90 deletions
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 1dad5bbb59b6..fee5fa7c72db 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -12,6 +12,9 @@ static inline int rt_overloaded(struct rq *rq) | |||
12 | 12 | ||
13 | static inline void rt_set_overload(struct rq *rq) | 13 | static inline void rt_set_overload(struct rq *rq) |
14 | { | 14 | { |
15 | if (!rq->online) | ||
16 | return; | ||
17 | |||
15 | cpu_set(rq->cpu, rq->rd->rto_mask); | 18 | cpu_set(rq->cpu, rq->rd->rto_mask); |
16 | /* | 19 | /* |
17 | * Make sure the mask is visible before we set | 20 | * Make sure the mask is visible before we set |
@@ -26,6 +29,9 @@ static inline void rt_set_overload(struct rq *rq) | |||
26 | 29 | ||
27 | static inline void rt_clear_overload(struct rq *rq) | 30 | static inline void rt_clear_overload(struct rq *rq) |
28 | { | 31 | { |
32 | if (!rq->online) | ||
33 | return; | ||
34 | |||
29 | /* the order here really doesn't matter */ | 35 | /* the order here really doesn't matter */ |
30 | atomic_dec(&rq->rd->rto_count); | 36 | atomic_dec(&rq->rd->rto_count); |
31 | cpu_clear(rq->cpu, rq->rd->rto_mask); | 37 | cpu_clear(rq->cpu, rq->rd->rto_mask); |
@@ -280,6 +286,9 @@ static int balance_runtime(struct rt_rq *rt_rq) | |||
280 | continue; | 286 | continue; |
281 | 287 | ||
282 | spin_lock(&iter->rt_runtime_lock); | 288 | spin_lock(&iter->rt_runtime_lock); |
289 | if (iter->rt_runtime == RUNTIME_INF) | ||
290 | goto next; | ||
291 | |||
283 | diff = iter->rt_runtime - iter->rt_time; | 292 | diff = iter->rt_runtime - iter->rt_time; |
284 | if (diff > 0) { | 293 | if (diff > 0) { |
285 | do_div(diff, weight); | 294 | do_div(diff, weight); |
@@ -293,12 +302,105 @@ static int balance_runtime(struct rt_rq *rt_rq) | |||
293 | break; | 302 | break; |
294 | } | 303 | } |
295 | } | 304 | } |
305 | next: | ||
296 | spin_unlock(&iter->rt_runtime_lock); | 306 | spin_unlock(&iter->rt_runtime_lock); |
297 | } | 307 | } |
298 | spin_unlock(&rt_b->rt_runtime_lock); | 308 | spin_unlock(&rt_b->rt_runtime_lock); |
299 | 309 | ||
300 | return more; | 310 | return more; |
301 | } | 311 | } |
312 | |||
313 | static void __disable_runtime(struct rq *rq) | ||
314 | { | ||
315 | struct root_domain *rd = rq->rd; | ||
316 | struct rt_rq *rt_rq; | ||
317 | |||
318 | if (unlikely(!scheduler_running)) | ||
319 | return; | ||
320 | |||
321 | for_each_leaf_rt_rq(rt_rq, rq) { | ||
322 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | ||
323 | s64 want; | ||
324 | int i; | ||
325 | |||
326 | spin_lock(&rt_b->rt_runtime_lock); | ||
327 | spin_lock(&rt_rq->rt_runtime_lock); | ||
328 | if (rt_rq->rt_runtime == RUNTIME_INF || | ||
329 | rt_rq->rt_runtime == rt_b->rt_runtime) | ||
330 | goto balanced; | ||
331 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
332 | |||
333 | want = rt_b->rt_runtime - rt_rq->rt_runtime; | ||
334 | |||
335 | for_each_cpu_mask(i, rd->span) { | ||
336 | struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); | ||
337 | s64 diff; | ||
338 | |||
339 | if (iter == rt_rq) | ||
340 | continue; | ||
341 | |||
342 | spin_lock(&iter->rt_runtime_lock); | ||
343 | if (want > 0) { | ||
344 | diff = min_t(s64, iter->rt_runtime, want); | ||
345 | iter->rt_runtime -= diff; | ||
346 | want -= diff; | ||
347 | } else { | ||
348 | iter->rt_runtime -= want; | ||
349 | want -= want; | ||
350 | } | ||
351 | spin_unlock(&iter->rt_runtime_lock); | ||
352 | |||
353 | if (!want) | ||
354 | break; | ||
355 | } | ||
356 | |||
357 | spin_lock(&rt_rq->rt_runtime_lock); | ||
358 | BUG_ON(want); | ||
359 | balanced: | ||
360 | rt_rq->rt_runtime = RUNTIME_INF; | ||
361 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
362 | spin_unlock(&rt_b->rt_runtime_lock); | ||
363 | } | ||
364 | } | ||
365 | |||
366 | static void disable_runtime(struct rq *rq) | ||
367 | { | ||
368 | unsigned long flags; | ||
369 | |||
370 | spin_lock_irqsave(&rq->lock, flags); | ||
371 | __disable_runtime(rq); | ||
372 | spin_unlock_irqrestore(&rq->lock, flags); | ||
373 | } | ||
374 | |||
375 | static void __enable_runtime(struct rq *rq) | ||
376 | { | ||
377 | struct root_domain *rd = rq->rd; | ||
378 | struct rt_rq *rt_rq; | ||
379 | |||
380 | if (unlikely(!scheduler_running)) | ||
381 | return; | ||
382 | |||
383 | for_each_leaf_rt_rq(rt_rq, rq) { | ||
384 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | ||
385 | |||
386 | spin_lock(&rt_b->rt_runtime_lock); | ||
387 | spin_lock(&rt_rq->rt_runtime_lock); | ||
388 | rt_rq->rt_runtime = rt_b->rt_runtime; | ||
389 | rt_rq->rt_time = 0; | ||
390 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
391 | spin_unlock(&rt_b->rt_runtime_lock); | ||
392 | } | ||
393 | } | ||
394 | |||
395 | static void enable_runtime(struct rq *rq) | ||
396 | { | ||
397 | unsigned long flags; | ||
398 | |||
399 | spin_lock_irqsave(&rq->lock, flags); | ||
400 | __enable_runtime(rq); | ||
401 | spin_unlock_irqrestore(&rq->lock, flags); | ||
402 | } | ||
403 | |||
302 | #endif | 404 | #endif |
303 | 405 | ||
304 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) | 406 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) |
@@ -328,14 +430,13 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) | |||
328 | 430 | ||
329 | #ifdef CONFIG_SMP | 431 | #ifdef CONFIG_SMP |
330 | if (rt_rq->rt_time > runtime) { | 432 | if (rt_rq->rt_time > runtime) { |
331 | int more; | ||
332 | |||
333 | spin_unlock(&rt_rq->rt_runtime_lock); | 433 | spin_unlock(&rt_rq->rt_runtime_lock); |
334 | more = balance_runtime(rt_rq); | 434 | balance_runtime(rt_rq); |
335 | spin_lock(&rt_rq->rt_runtime_lock); | 435 | spin_lock(&rt_rq->rt_runtime_lock); |
336 | 436 | ||
337 | if (more) | 437 | runtime = sched_rt_runtime(rt_rq); |
338 | runtime = sched_rt_runtime(rt_rq); | 438 | if (runtime == RUNTIME_INF) |
439 | return 0; | ||
339 | } | 440 | } |
340 | #endif | 441 | #endif |
341 | 442 | ||
@@ -391,12 +492,21 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
391 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); | 492 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); |
392 | rt_rq->rt_nr_running++; | 493 | rt_rq->rt_nr_running++; |
393 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED | 494 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED |
394 | if (rt_se_prio(rt_se) < rt_rq->highest_prio) | 495 | if (rt_se_prio(rt_se) < rt_rq->highest_prio) { |
496 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
497 | |||
395 | rt_rq->highest_prio = rt_se_prio(rt_se); | 498 | rt_rq->highest_prio = rt_se_prio(rt_se); |
499 | #ifdef CONFIG_SMP | ||
500 | if (rq->online) | ||
501 | cpupri_set(&rq->rd->cpupri, rq->cpu, | ||
502 | rt_se_prio(rt_se)); | ||
503 | #endif | ||
504 | } | ||
396 | #endif | 505 | #endif |
397 | #ifdef CONFIG_SMP | 506 | #ifdef CONFIG_SMP |
398 | if (rt_se->nr_cpus_allowed > 1) { | 507 | if (rt_se->nr_cpus_allowed > 1) { |
399 | struct rq *rq = rq_of_rt_rq(rt_rq); | 508 | struct rq *rq = rq_of_rt_rq(rt_rq); |
509 | |||
400 | rq->rt.rt_nr_migratory++; | 510 | rq->rt.rt_nr_migratory++; |
401 | } | 511 | } |
402 | 512 | ||
@@ -416,6 +526,10 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
416 | static inline | 526 | static inline |
417 | void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | 527 | void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) |
418 | { | 528 | { |
529 | #ifdef CONFIG_SMP | ||
530 | int highest_prio = rt_rq->highest_prio; | ||
531 | #endif | ||
532 | |||
419 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); | 533 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); |
420 | WARN_ON(!rt_rq->rt_nr_running); | 534 | WARN_ON(!rt_rq->rt_nr_running); |
421 | rt_rq->rt_nr_running--; | 535 | rt_rq->rt_nr_running--; |
@@ -439,6 +553,14 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
439 | rq->rt.rt_nr_migratory--; | 553 | rq->rt.rt_nr_migratory--; |
440 | } | 554 | } |
441 | 555 | ||
556 | if (rt_rq->highest_prio != highest_prio) { | ||
557 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
558 | |||
559 | if (rq->online) | ||
560 | cpupri_set(&rq->rd->cpupri, rq->cpu, | ||
561 | rt_rq->highest_prio); | ||
562 | } | ||
563 | |||
442 | update_rt_migration(rq_of_rt_rq(rt_rq)); | 564 | update_rt_migration(rq_of_rt_rq(rt_rq)); |
443 | #endif /* CONFIG_SMP */ | 565 | #endif /* CONFIG_SMP */ |
444 | #ifdef CONFIG_RT_GROUP_SCHED | 566 | #ifdef CONFIG_RT_GROUP_SCHED |
@@ -454,6 +576,7 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) | |||
454 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); | 576 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); |
455 | struct rt_prio_array *array = &rt_rq->active; | 577 | struct rt_prio_array *array = &rt_rq->active; |
456 | struct rt_rq *group_rq = group_rt_rq(rt_se); | 578 | struct rt_rq *group_rq = group_rt_rq(rt_se); |
579 | struct list_head *queue = array->queue + rt_se_prio(rt_se); | ||
457 | 580 | ||
458 | /* | 581 | /* |
459 | * Don't enqueue the group if its throttled, or when empty. | 582 | * Don't enqueue the group if its throttled, or when empty. |
@@ -464,7 +587,11 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) | |||
464 | if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) | 587 | if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) |
465 | return; | 588 | return; |
466 | 589 | ||
467 | list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); | 590 | if (rt_se->nr_cpus_allowed == 1) |
591 | list_add(&rt_se->run_list, queue); | ||
592 | else | ||
593 | list_add_tail(&rt_se->run_list, queue); | ||
594 | |||
468 | __set_bit(rt_se_prio(rt_se), array->bitmap); | 595 | __set_bit(rt_se_prio(rt_se), array->bitmap); |
469 | 596 | ||
470 | inc_rt_tasks(rt_se, rt_rq); | 597 | inc_rt_tasks(rt_se, rt_rq); |
@@ -551,8 +678,11 @@ void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) | |||
551 | struct rt_prio_array *array = &rt_rq->active; | 678 | struct rt_prio_array *array = &rt_rq->active; |
552 | struct list_head *queue = array->queue + rt_se_prio(rt_se); | 679 | struct list_head *queue = array->queue + rt_se_prio(rt_se); |
553 | 680 | ||
554 | if (on_rt_rq(rt_se)) | 681 | if (on_rt_rq(rt_se)) { |
555 | list_move_tail(&rt_se->run_list, queue); | 682 | list_del_init(&rt_se->run_list); |
683 | list_add_tail(&rt_se->run_list, | ||
684 | array->queue + rt_se_prio(rt_se)); | ||
685 | } | ||
556 | } | 686 | } |
557 | 687 | ||
558 | static void requeue_task_rt(struct rq *rq, struct task_struct *p) | 688 | static void requeue_task_rt(struct rq *rq, struct task_struct *p) |
@@ -615,8 +745,37 @@ static int select_task_rq_rt(struct task_struct *p, int sync) | |||
615 | */ | 745 | */ |
616 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) | 746 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) |
617 | { | 747 | { |
618 | if (p->prio < rq->curr->prio) | 748 | if (p->prio < rq->curr->prio) { |
619 | resched_task(rq->curr); | 749 | resched_task(rq->curr); |
750 | return; | ||
751 | } | ||
752 | |||
753 | #ifdef CONFIG_SMP | ||
754 | /* | ||
755 | * If: | ||
756 | * | ||
757 | * - the newly woken task is of equal priority to the current task | ||
758 | * - the newly woken task is non-migratable while current is migratable | ||
759 | * - current will be preempted on the next reschedule | ||
760 | * | ||
761 | * we should check to see if current can readily move to a different | ||
762 | * cpu. If so, we will reschedule to allow the push logic to try | ||
763 | * to move current somewhere else, making room for our non-migratable | ||
764 | * task. | ||
765 | */ | ||
766 | if((p->prio == rq->curr->prio) | ||
767 | && p->rt.nr_cpus_allowed == 1 | ||
768 | && rq->curr->rt.nr_cpus_allowed != 1) { | ||
769 | cpumask_t mask; | ||
770 | |||
771 | if (cpupri_find(&rq->rd->cpupri, rq->curr, &mask)) | ||
772 | /* | ||
773 | * There appears to be other cpus that can accept | ||
774 | * current, so lets reschedule to try and push it away | ||
775 | */ | ||
776 | resched_task(rq->curr); | ||
777 | } | ||
778 | #endif | ||
620 | } | 779 | } |
621 | 780 | ||
622 | static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, | 781 | static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, |
@@ -719,73 +878,6 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) | |||
719 | 878 | ||
720 | static DEFINE_PER_CPU(cpumask_t, local_cpu_mask); | 879 | static DEFINE_PER_CPU(cpumask_t, local_cpu_mask); |
721 | 880 | ||
722 | static int find_lowest_cpus(struct task_struct *task, cpumask_t *lowest_mask) | ||
723 | { | ||
724 | int lowest_prio = -1; | ||
725 | int lowest_cpu = -1; | ||
726 | int count = 0; | ||
727 | int cpu; | ||
728 | |||
729 | cpus_and(*lowest_mask, task_rq(task)->rd->online, task->cpus_allowed); | ||
730 | |||
731 | /* | ||
732 | * Scan each rq for the lowest prio. | ||
733 | */ | ||
734 | for_each_cpu_mask(cpu, *lowest_mask) { | ||
735 | struct rq *rq = cpu_rq(cpu); | ||
736 | |||
737 | /* We look for lowest RT prio or non-rt CPU */ | ||
738 | if (rq->rt.highest_prio >= MAX_RT_PRIO) { | ||
739 | /* | ||
740 | * if we already found a low RT queue | ||
741 | * and now we found this non-rt queue | ||
742 | * clear the mask and set our bit. | ||
743 | * Otherwise just return the queue as is | ||
744 | * and the count==1 will cause the algorithm | ||
745 | * to use the first bit found. | ||
746 | */ | ||
747 | if (lowest_cpu != -1) { | ||
748 | cpus_clear(*lowest_mask); | ||
749 | cpu_set(rq->cpu, *lowest_mask); | ||
750 | } | ||
751 | return 1; | ||
752 | } | ||
753 | |||
754 | /* no locking for now */ | ||
755 | if ((rq->rt.highest_prio > task->prio) | ||
756 | && (rq->rt.highest_prio >= lowest_prio)) { | ||
757 | if (rq->rt.highest_prio > lowest_prio) { | ||
758 | /* new low - clear old data */ | ||
759 | lowest_prio = rq->rt.highest_prio; | ||
760 | lowest_cpu = cpu; | ||
761 | count = 0; | ||
762 | } | ||
763 | count++; | ||
764 | } else | ||
765 | cpu_clear(cpu, *lowest_mask); | ||
766 | } | ||
767 | |||
768 | /* | ||
769 | * Clear out all the set bits that represent | ||
770 | * runqueues that were of higher prio than | ||
771 | * the lowest_prio. | ||
772 | */ | ||
773 | if (lowest_cpu > 0) { | ||
774 | /* | ||
775 | * Perhaps we could add another cpumask op to | ||
776 | * zero out bits. Like cpu_zero_bits(cpumask, nrbits); | ||
777 | * Then that could be optimized to use memset and such. | ||
778 | */ | ||
779 | for_each_cpu_mask(cpu, *lowest_mask) { | ||
780 | if (cpu >= lowest_cpu) | ||
781 | break; | ||
782 | cpu_clear(cpu, *lowest_mask); | ||
783 | } | ||
784 | } | ||
785 | |||
786 | return count; | ||
787 | } | ||
788 | |||
789 | static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) | 881 | static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) |
790 | { | 882 | { |
791 | int first; | 883 | int first; |
@@ -807,17 +899,12 @@ static int find_lowest_rq(struct task_struct *task) | |||
807 | cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask); | 899 | cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask); |
808 | int this_cpu = smp_processor_id(); | 900 | int this_cpu = smp_processor_id(); |
809 | int cpu = task_cpu(task); | 901 | int cpu = task_cpu(task); |
810 | int count = find_lowest_cpus(task, lowest_mask); | ||
811 | 902 | ||
812 | if (!count) | 903 | if (task->rt.nr_cpus_allowed == 1) |
813 | return -1; /* No targets found */ | 904 | return -1; /* No other targets possible */ |
814 | 905 | ||
815 | /* | 906 | if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) |
816 | * There is no sense in performing an optimal search if only one | 907 | return -1; /* No targets found */ |
817 | * target is found. | ||
818 | */ | ||
819 | if (count == 1) | ||
820 | return first_cpu(*lowest_mask); | ||
821 | 908 | ||
822 | /* | 909 | /* |
823 | * At this point we have built a mask of cpus representing the | 910 | * At this point we have built a mask of cpus representing the |
@@ -1162,17 +1249,25 @@ static void set_cpus_allowed_rt(struct task_struct *p, | |||
1162 | } | 1249 | } |
1163 | 1250 | ||
1164 | /* Assumes rq->lock is held */ | 1251 | /* Assumes rq->lock is held */ |
1165 | static void join_domain_rt(struct rq *rq) | 1252 | static void rq_online_rt(struct rq *rq) |
1166 | { | 1253 | { |
1167 | if (rq->rt.overloaded) | 1254 | if (rq->rt.overloaded) |
1168 | rt_set_overload(rq); | 1255 | rt_set_overload(rq); |
1256 | |||
1257 | __enable_runtime(rq); | ||
1258 | |||
1259 | cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio); | ||
1169 | } | 1260 | } |
1170 | 1261 | ||
1171 | /* Assumes rq->lock is held */ | 1262 | /* Assumes rq->lock is held */ |
1172 | static void leave_domain_rt(struct rq *rq) | 1263 | static void rq_offline_rt(struct rq *rq) |
1173 | { | 1264 | { |
1174 | if (rq->rt.overloaded) | 1265 | if (rq->rt.overloaded) |
1175 | rt_clear_overload(rq); | 1266 | rt_clear_overload(rq); |
1267 | |||
1268 | __disable_runtime(rq); | ||
1269 | |||
1270 | cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID); | ||
1176 | } | 1271 | } |
1177 | 1272 | ||
1178 | /* | 1273 | /* |
@@ -1335,8 +1430,8 @@ static const struct sched_class rt_sched_class = { | |||
1335 | .load_balance = load_balance_rt, | 1430 | .load_balance = load_balance_rt, |
1336 | .move_one_task = move_one_task_rt, | 1431 | .move_one_task = move_one_task_rt, |
1337 | .set_cpus_allowed = set_cpus_allowed_rt, | 1432 | .set_cpus_allowed = set_cpus_allowed_rt, |
1338 | .join_domain = join_domain_rt, | 1433 | .rq_online = rq_online_rt, |
1339 | .leave_domain = leave_domain_rt, | 1434 | .rq_offline = rq_offline_rt, |
1340 | .pre_schedule = pre_schedule_rt, | 1435 | .pre_schedule = pre_schedule_rt, |
1341 | .post_schedule = post_schedule_rt, | 1436 | .post_schedule = post_schedule_rt, |
1342 | .task_wake_up = task_wake_up_rt, | 1437 | .task_wake_up = task_wake_up_rt, |