diff options
Diffstat (limited to 'kernel/sched_rt.c')
-rw-r--r-- | kernel/sched_rt.c | 436 |
1 files changed, 290 insertions, 146 deletions
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 0f3c19197fa4..f85a76363eee 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -12,6 +12,9 @@ static inline int rt_overloaded(struct rq *rq) | |||
12 | 12 | ||
13 | static inline void rt_set_overload(struct rq *rq) | 13 | static inline void rt_set_overload(struct rq *rq) |
14 | { | 14 | { |
15 | if (!rq->online) | ||
16 | return; | ||
17 | |||
15 | cpu_set(rq->cpu, rq->rd->rto_mask); | 18 | cpu_set(rq->cpu, rq->rd->rto_mask); |
16 | /* | 19 | /* |
17 | * Make sure the mask is visible before we set | 20 | * Make sure the mask is visible before we set |
@@ -26,6 +29,9 @@ static inline void rt_set_overload(struct rq *rq) | |||
26 | 29 | ||
27 | static inline void rt_clear_overload(struct rq *rq) | 30 | static inline void rt_clear_overload(struct rq *rq) |
28 | { | 31 | { |
32 | if (!rq->online) | ||
33 | return; | ||
34 | |||
29 | /* the order here really doesn't matter */ | 35 | /* the order here really doesn't matter */ |
30 | atomic_dec(&rq->rd->rto_count); | 36 | atomic_dec(&rq->rd->rto_count); |
31 | cpu_clear(rq->cpu, rq->rd->rto_mask); | 37 | cpu_clear(rq->cpu, rq->rd->rto_mask); |
@@ -155,7 +161,7 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) | |||
155 | return &rt_rq->tg->rt_bandwidth; | 161 | return &rt_rq->tg->rt_bandwidth; |
156 | } | 162 | } |
157 | 163 | ||
158 | #else | 164 | #else /* !CONFIG_RT_GROUP_SCHED */ |
159 | 165 | ||
160 | static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) | 166 | static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) |
161 | { | 167 | { |
@@ -220,7 +226,160 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) | |||
220 | return &def_rt_bandwidth; | 226 | return &def_rt_bandwidth; |
221 | } | 227 | } |
222 | 228 | ||
223 | #endif | 229 | #endif /* CONFIG_RT_GROUP_SCHED */ |
230 | |||
231 | #ifdef CONFIG_SMP | ||
232 | static int do_balance_runtime(struct rt_rq *rt_rq) | ||
233 | { | ||
234 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | ||
235 | struct root_domain *rd = cpu_rq(smp_processor_id())->rd; | ||
236 | int i, weight, more = 0; | ||
237 | u64 rt_period; | ||
238 | |||
239 | weight = cpus_weight(rd->span); | ||
240 | |||
241 | spin_lock(&rt_b->rt_runtime_lock); | ||
242 | rt_period = ktime_to_ns(rt_b->rt_period); | ||
243 | for_each_cpu_mask_nr(i, rd->span) { | ||
244 | struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); | ||
245 | s64 diff; | ||
246 | |||
247 | if (iter == rt_rq) | ||
248 | continue; | ||
249 | |||
250 | spin_lock(&iter->rt_runtime_lock); | ||
251 | if (iter->rt_runtime == RUNTIME_INF) | ||
252 | goto next; | ||
253 | |||
254 | diff = iter->rt_runtime - iter->rt_time; | ||
255 | if (diff > 0) { | ||
256 | do_div(diff, weight); | ||
257 | if (rt_rq->rt_runtime + diff > rt_period) | ||
258 | diff = rt_period - rt_rq->rt_runtime; | ||
259 | iter->rt_runtime -= diff; | ||
260 | rt_rq->rt_runtime += diff; | ||
261 | more = 1; | ||
262 | if (rt_rq->rt_runtime == rt_period) { | ||
263 | spin_unlock(&iter->rt_runtime_lock); | ||
264 | break; | ||
265 | } | ||
266 | } | ||
267 | next: | ||
268 | spin_unlock(&iter->rt_runtime_lock); | ||
269 | } | ||
270 | spin_unlock(&rt_b->rt_runtime_lock); | ||
271 | |||
272 | return more; | ||
273 | } | ||
274 | |||
275 | static void __disable_runtime(struct rq *rq) | ||
276 | { | ||
277 | struct root_domain *rd = rq->rd; | ||
278 | struct rt_rq *rt_rq; | ||
279 | |||
280 | if (unlikely(!scheduler_running)) | ||
281 | return; | ||
282 | |||
283 | for_each_leaf_rt_rq(rt_rq, rq) { | ||
284 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | ||
285 | s64 want; | ||
286 | int i; | ||
287 | |||
288 | spin_lock(&rt_b->rt_runtime_lock); | ||
289 | spin_lock(&rt_rq->rt_runtime_lock); | ||
290 | if (rt_rq->rt_runtime == RUNTIME_INF || | ||
291 | rt_rq->rt_runtime == rt_b->rt_runtime) | ||
292 | goto balanced; | ||
293 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
294 | |||
295 | want = rt_b->rt_runtime - rt_rq->rt_runtime; | ||
296 | |||
297 | for_each_cpu_mask(i, rd->span) { | ||
298 | struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); | ||
299 | s64 diff; | ||
300 | |||
301 | if (iter == rt_rq) | ||
302 | continue; | ||
303 | |||
304 | spin_lock(&iter->rt_runtime_lock); | ||
305 | if (want > 0) { | ||
306 | diff = min_t(s64, iter->rt_runtime, want); | ||
307 | iter->rt_runtime -= diff; | ||
308 | want -= diff; | ||
309 | } else { | ||
310 | iter->rt_runtime -= want; | ||
311 | want -= want; | ||
312 | } | ||
313 | spin_unlock(&iter->rt_runtime_lock); | ||
314 | |||
315 | if (!want) | ||
316 | break; | ||
317 | } | ||
318 | |||
319 | spin_lock(&rt_rq->rt_runtime_lock); | ||
320 | BUG_ON(want); | ||
321 | balanced: | ||
322 | rt_rq->rt_runtime = RUNTIME_INF; | ||
323 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
324 | spin_unlock(&rt_b->rt_runtime_lock); | ||
325 | } | ||
326 | } | ||
327 | |||
328 | static void disable_runtime(struct rq *rq) | ||
329 | { | ||
330 | unsigned long flags; | ||
331 | |||
332 | spin_lock_irqsave(&rq->lock, flags); | ||
333 | __disable_runtime(rq); | ||
334 | spin_unlock_irqrestore(&rq->lock, flags); | ||
335 | } | ||
336 | |||
337 | static void __enable_runtime(struct rq *rq) | ||
338 | { | ||
339 | struct rt_rq *rt_rq; | ||
340 | |||
341 | if (unlikely(!scheduler_running)) | ||
342 | return; | ||
343 | |||
344 | for_each_leaf_rt_rq(rt_rq, rq) { | ||
345 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | ||
346 | |||
347 | spin_lock(&rt_b->rt_runtime_lock); | ||
348 | spin_lock(&rt_rq->rt_runtime_lock); | ||
349 | rt_rq->rt_runtime = rt_b->rt_runtime; | ||
350 | rt_rq->rt_time = 0; | ||
351 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
352 | spin_unlock(&rt_b->rt_runtime_lock); | ||
353 | } | ||
354 | } | ||
355 | |||
356 | static void enable_runtime(struct rq *rq) | ||
357 | { | ||
358 | unsigned long flags; | ||
359 | |||
360 | spin_lock_irqsave(&rq->lock, flags); | ||
361 | __enable_runtime(rq); | ||
362 | spin_unlock_irqrestore(&rq->lock, flags); | ||
363 | } | ||
364 | |||
365 | static int balance_runtime(struct rt_rq *rt_rq) | ||
366 | { | ||
367 | int more = 0; | ||
368 | |||
369 | if (rt_rq->rt_time > rt_rq->rt_runtime) { | ||
370 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
371 | more = do_balance_runtime(rt_rq); | ||
372 | spin_lock(&rt_rq->rt_runtime_lock); | ||
373 | } | ||
374 | |||
375 | return more; | ||
376 | } | ||
377 | #else /* !CONFIG_SMP */ | ||
378 | static inline int balance_runtime(struct rt_rq *rt_rq) | ||
379 | { | ||
380 | return 0; | ||
381 | } | ||
382 | #endif /* CONFIG_SMP */ | ||
224 | 383 | ||
225 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | 384 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) |
226 | { | 385 | { |
@@ -241,6 +400,8 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | |||
241 | u64 runtime; | 400 | u64 runtime; |
242 | 401 | ||
243 | spin_lock(&rt_rq->rt_runtime_lock); | 402 | spin_lock(&rt_rq->rt_runtime_lock); |
403 | if (rt_rq->rt_throttled) | ||
404 | balance_runtime(rt_rq); | ||
244 | runtime = rt_rq->rt_runtime; | 405 | runtime = rt_rq->rt_runtime; |
245 | rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); | 406 | rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); |
246 | if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { | 407 | if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { |
@@ -261,47 +422,6 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | |||
261 | return idle; | 422 | return idle; |
262 | } | 423 | } |
263 | 424 | ||
264 | #ifdef CONFIG_SMP | ||
265 | static int balance_runtime(struct rt_rq *rt_rq) | ||
266 | { | ||
267 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | ||
268 | struct root_domain *rd = cpu_rq(smp_processor_id())->rd; | ||
269 | int i, weight, more = 0; | ||
270 | u64 rt_period; | ||
271 | |||
272 | weight = cpus_weight(rd->span); | ||
273 | |||
274 | spin_lock(&rt_b->rt_runtime_lock); | ||
275 | rt_period = ktime_to_ns(rt_b->rt_period); | ||
276 | for_each_cpu_mask(i, rd->span) { | ||
277 | struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); | ||
278 | s64 diff; | ||
279 | |||
280 | if (iter == rt_rq) | ||
281 | continue; | ||
282 | |||
283 | spin_lock(&iter->rt_runtime_lock); | ||
284 | diff = iter->rt_runtime - iter->rt_time; | ||
285 | if (diff > 0) { | ||
286 | do_div(diff, weight); | ||
287 | if (rt_rq->rt_runtime + diff > rt_period) | ||
288 | diff = rt_period - rt_rq->rt_runtime; | ||
289 | iter->rt_runtime -= diff; | ||
290 | rt_rq->rt_runtime += diff; | ||
291 | more = 1; | ||
292 | if (rt_rq->rt_runtime == rt_period) { | ||
293 | spin_unlock(&iter->rt_runtime_lock); | ||
294 | break; | ||
295 | } | ||
296 | } | ||
297 | spin_unlock(&iter->rt_runtime_lock); | ||
298 | } | ||
299 | spin_unlock(&rt_b->rt_runtime_lock); | ||
300 | |||
301 | return more; | ||
302 | } | ||
303 | #endif | ||
304 | |||
305 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) | 425 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) |
306 | { | 426 | { |
307 | #ifdef CONFIG_RT_GROUP_SCHED | 427 | #ifdef CONFIG_RT_GROUP_SCHED |
@@ -327,18 +447,10 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) | |||
327 | if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq)) | 447 | if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq)) |
328 | return 0; | 448 | return 0; |
329 | 449 | ||
330 | #ifdef CONFIG_SMP | 450 | balance_runtime(rt_rq); |
331 | if (rt_rq->rt_time > runtime) { | 451 | runtime = sched_rt_runtime(rt_rq); |
332 | int more; | 452 | if (runtime == RUNTIME_INF) |
333 | 453 | return 0; | |
334 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
335 | more = balance_runtime(rt_rq); | ||
336 | spin_lock(&rt_rq->rt_runtime_lock); | ||
337 | |||
338 | if (more) | ||
339 | runtime = sched_rt_runtime(rt_rq); | ||
340 | } | ||
341 | #endif | ||
342 | 454 | ||
343 | if (rt_rq->rt_time > runtime) { | 455 | if (rt_rq->rt_time > runtime) { |
344 | rt_rq->rt_throttled = 1; | 456 | rt_rq->rt_throttled = 1; |
@@ -392,12 +504,23 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
392 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); | 504 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); |
393 | rt_rq->rt_nr_running++; | 505 | rt_rq->rt_nr_running++; |
394 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED | 506 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED |
395 | if (rt_se_prio(rt_se) < rt_rq->highest_prio) | 507 | if (rt_se_prio(rt_se) < rt_rq->highest_prio) { |
508 | #ifdef CONFIG_SMP | ||
509 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
510 | #endif | ||
511 | |||
396 | rt_rq->highest_prio = rt_se_prio(rt_se); | 512 | rt_rq->highest_prio = rt_se_prio(rt_se); |
513 | #ifdef CONFIG_SMP | ||
514 | if (rq->online) | ||
515 | cpupri_set(&rq->rd->cpupri, rq->cpu, | ||
516 | rt_se_prio(rt_se)); | ||
517 | #endif | ||
518 | } | ||
397 | #endif | 519 | #endif |
398 | #ifdef CONFIG_SMP | 520 | #ifdef CONFIG_SMP |
399 | if (rt_se->nr_cpus_allowed > 1) { | 521 | if (rt_se->nr_cpus_allowed > 1) { |
400 | struct rq *rq = rq_of_rt_rq(rt_rq); | 522 | struct rq *rq = rq_of_rt_rq(rt_rq); |
523 | |||
401 | rq->rt.rt_nr_migratory++; | 524 | rq->rt.rt_nr_migratory++; |
402 | } | 525 | } |
403 | 526 | ||
@@ -417,6 +540,10 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
417 | static inline | 540 | static inline |
418 | void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | 541 | void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) |
419 | { | 542 | { |
543 | #ifdef CONFIG_SMP | ||
544 | int highest_prio = rt_rq->highest_prio; | ||
545 | #endif | ||
546 | |||
420 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); | 547 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); |
421 | WARN_ON(!rt_rq->rt_nr_running); | 548 | WARN_ON(!rt_rq->rt_nr_running); |
422 | rt_rq->rt_nr_running--; | 549 | rt_rq->rt_nr_running--; |
@@ -440,6 +567,14 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
440 | rq->rt.rt_nr_migratory--; | 567 | rq->rt.rt_nr_migratory--; |
441 | } | 568 | } |
442 | 569 | ||
570 | if (rt_rq->highest_prio != highest_prio) { | ||
571 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
572 | |||
573 | if (rq->online) | ||
574 | cpupri_set(&rq->rd->cpupri, rq->cpu, | ||
575 | rt_rq->highest_prio); | ||
576 | } | ||
577 | |||
443 | update_rt_migration(rq_of_rt_rq(rt_rq)); | 578 | update_rt_migration(rq_of_rt_rq(rt_rq)); |
444 | #endif /* CONFIG_SMP */ | 579 | #endif /* CONFIG_SMP */ |
445 | #ifdef CONFIG_RT_GROUP_SCHED | 580 | #ifdef CONFIG_RT_GROUP_SCHED |
@@ -455,6 +590,7 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) | |||
455 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); | 590 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); |
456 | struct rt_prio_array *array = &rt_rq->active; | 591 | struct rt_prio_array *array = &rt_rq->active; |
457 | struct rt_rq *group_rq = group_rt_rq(rt_se); | 592 | struct rt_rq *group_rq = group_rt_rq(rt_se); |
593 | struct list_head *queue = array->queue + rt_se_prio(rt_se); | ||
458 | 594 | ||
459 | /* | 595 | /* |
460 | * Don't enqueue the group if its throttled, or when empty. | 596 | * Don't enqueue the group if its throttled, or when empty. |
@@ -465,7 +601,7 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) | |||
465 | if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) | 601 | if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) |
466 | return; | 602 | return; |
467 | 603 | ||
468 | list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); | 604 | list_add_tail(&rt_se->run_list, queue); |
469 | __set_bit(rt_se_prio(rt_se), array->bitmap); | 605 | __set_bit(rt_se_prio(rt_se), array->bitmap); |
470 | 606 | ||
471 | inc_rt_tasks(rt_se, rt_rq); | 607 | inc_rt_tasks(rt_se, rt_rq); |
@@ -532,6 +668,8 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) | |||
532 | rt_se->timeout = 0; | 668 | rt_se->timeout = 0; |
533 | 669 | ||
534 | enqueue_rt_entity(rt_se); | 670 | enqueue_rt_entity(rt_se); |
671 | |||
672 | inc_cpu_load(rq, p->se.load.weight); | ||
535 | } | 673 | } |
536 | 674 | ||
537 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) | 675 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) |
@@ -540,36 +678,42 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) | |||
540 | 678 | ||
541 | update_curr_rt(rq); | 679 | update_curr_rt(rq); |
542 | dequeue_rt_entity(rt_se); | 680 | dequeue_rt_entity(rt_se); |
681 | |||
682 | dec_cpu_load(rq, p->se.load.weight); | ||
543 | } | 683 | } |
544 | 684 | ||
545 | /* | 685 | /* |
546 | * Put task to the end of the run list without the overhead of dequeue | 686 | * Put task to the end of the run list without the overhead of dequeue |
547 | * followed by enqueue. | 687 | * followed by enqueue. |
548 | */ | 688 | */ |
549 | static | 689 | static void |
550 | void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) | 690 | requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head) |
551 | { | 691 | { |
552 | struct rt_prio_array *array = &rt_rq->active; | 692 | if (on_rt_rq(rt_se)) { |
553 | struct list_head *queue = array->queue + rt_se_prio(rt_se); | 693 | struct rt_prio_array *array = &rt_rq->active; |
694 | struct list_head *queue = array->queue + rt_se_prio(rt_se); | ||
554 | 695 | ||
555 | if (on_rt_rq(rt_se)) | 696 | if (head) |
556 | list_move_tail(&rt_se->run_list, queue); | 697 | list_move(&rt_se->run_list, queue); |
698 | else | ||
699 | list_move_tail(&rt_se->run_list, queue); | ||
700 | } | ||
557 | } | 701 | } |
558 | 702 | ||
559 | static void requeue_task_rt(struct rq *rq, struct task_struct *p) | 703 | static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head) |
560 | { | 704 | { |
561 | struct sched_rt_entity *rt_se = &p->rt; | 705 | struct sched_rt_entity *rt_se = &p->rt; |
562 | struct rt_rq *rt_rq; | 706 | struct rt_rq *rt_rq; |
563 | 707 | ||
564 | for_each_sched_rt_entity(rt_se) { | 708 | for_each_sched_rt_entity(rt_se) { |
565 | rt_rq = rt_rq_of_se(rt_se); | 709 | rt_rq = rt_rq_of_se(rt_se); |
566 | requeue_rt_entity(rt_rq, rt_se); | 710 | requeue_rt_entity(rt_rq, rt_se, head); |
567 | } | 711 | } |
568 | } | 712 | } |
569 | 713 | ||
570 | static void yield_task_rt(struct rq *rq) | 714 | static void yield_task_rt(struct rq *rq) |
571 | { | 715 | { |
572 | requeue_task_rt(rq, rq->curr); | 716 | requeue_task_rt(rq, rq->curr, 0); |
573 | } | 717 | } |
574 | 718 | ||
575 | #ifdef CONFIG_SMP | 719 | #ifdef CONFIG_SMP |
@@ -609,6 +753,30 @@ static int select_task_rq_rt(struct task_struct *p, int sync) | |||
609 | */ | 753 | */ |
610 | return task_cpu(p); | 754 | return task_cpu(p); |
611 | } | 755 | } |
756 | |||
757 | static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) | ||
758 | { | ||
759 | cpumask_t mask; | ||
760 | |||
761 | if (rq->curr->rt.nr_cpus_allowed == 1) | ||
762 | return; | ||
763 | |||
764 | if (p->rt.nr_cpus_allowed != 1 | ||
765 | && cpupri_find(&rq->rd->cpupri, p, &mask)) | ||
766 | return; | ||
767 | |||
768 | if (!cpupri_find(&rq->rd->cpupri, rq->curr, &mask)) | ||
769 | return; | ||
770 | |||
771 | /* | ||
772 | * There appears to be other cpus that can accept | ||
773 | * current and none to run 'p', so lets reschedule | ||
774 | * to try and push current away: | ||
775 | */ | ||
776 | requeue_task_rt(rq, p, 1); | ||
777 | resched_task(rq->curr); | ||
778 | } | ||
779 | |||
612 | #endif /* CONFIG_SMP */ | 780 | #endif /* CONFIG_SMP */ |
613 | 781 | ||
614 | /* | 782 | /* |
@@ -616,8 +784,27 @@ static int select_task_rq_rt(struct task_struct *p, int sync) | |||
616 | */ | 784 | */ |
617 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) | 785 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) |
618 | { | 786 | { |
619 | if (p->prio < rq->curr->prio) | 787 | if (p->prio < rq->curr->prio) { |
620 | resched_task(rq->curr); | 788 | resched_task(rq->curr); |
789 | return; | ||
790 | } | ||
791 | |||
792 | #ifdef CONFIG_SMP | ||
793 | /* | ||
794 | * If: | ||
795 | * | ||
796 | * - the newly woken task is of equal priority to the current task | ||
797 | * - the newly woken task is non-migratable while current is migratable | ||
798 | * - current will be preempted on the next reschedule | ||
799 | * | ||
800 | * we should check to see if current can readily move to a different | ||
801 | * cpu. If so, we will reschedule to allow the push logic to try | ||
802 | * to move current somewhere else, making room for our non-migratable | ||
803 | * task. | ||
804 | */ | ||
805 | if (p->prio == rq->curr->prio && !need_resched()) | ||
806 | check_preempt_equal_prio(rq, p); | ||
807 | #endif | ||
621 | } | 808 | } |
622 | 809 | ||
623 | static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, | 810 | static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, |
@@ -720,73 +907,6 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) | |||
720 | 907 | ||
721 | static DEFINE_PER_CPU(cpumask_t, local_cpu_mask); | 908 | static DEFINE_PER_CPU(cpumask_t, local_cpu_mask); |
722 | 909 | ||
723 | static int find_lowest_cpus(struct task_struct *task, cpumask_t *lowest_mask) | ||
724 | { | ||
725 | int lowest_prio = -1; | ||
726 | int lowest_cpu = -1; | ||
727 | int count = 0; | ||
728 | int cpu; | ||
729 | |||
730 | cpus_and(*lowest_mask, task_rq(task)->rd->online, task->cpus_allowed); | ||
731 | |||
732 | /* | ||
733 | * Scan each rq for the lowest prio. | ||
734 | */ | ||
735 | for_each_cpu_mask(cpu, *lowest_mask) { | ||
736 | struct rq *rq = cpu_rq(cpu); | ||
737 | |||
738 | /* We look for lowest RT prio or non-rt CPU */ | ||
739 | if (rq->rt.highest_prio >= MAX_RT_PRIO) { | ||
740 | /* | ||
741 | * if we already found a low RT queue | ||
742 | * and now we found this non-rt queue | ||
743 | * clear the mask and set our bit. | ||
744 | * Otherwise just return the queue as is | ||
745 | * and the count==1 will cause the algorithm | ||
746 | * to use the first bit found. | ||
747 | */ | ||
748 | if (lowest_cpu != -1) { | ||
749 | cpus_clear(*lowest_mask); | ||
750 | cpu_set(rq->cpu, *lowest_mask); | ||
751 | } | ||
752 | return 1; | ||
753 | } | ||
754 | |||
755 | /* no locking for now */ | ||
756 | if ((rq->rt.highest_prio > task->prio) | ||
757 | && (rq->rt.highest_prio >= lowest_prio)) { | ||
758 | if (rq->rt.highest_prio > lowest_prio) { | ||
759 | /* new low - clear old data */ | ||
760 | lowest_prio = rq->rt.highest_prio; | ||
761 | lowest_cpu = cpu; | ||
762 | count = 0; | ||
763 | } | ||
764 | count++; | ||
765 | } else | ||
766 | cpu_clear(cpu, *lowest_mask); | ||
767 | } | ||
768 | |||
769 | /* | ||
770 | * Clear out all the set bits that represent | ||
771 | * runqueues that were of higher prio than | ||
772 | * the lowest_prio. | ||
773 | */ | ||
774 | if (lowest_cpu > 0) { | ||
775 | /* | ||
776 | * Perhaps we could add another cpumask op to | ||
777 | * zero out bits. Like cpu_zero_bits(cpumask, nrbits); | ||
778 | * Then that could be optimized to use memset and such. | ||
779 | */ | ||
780 | for_each_cpu_mask(cpu, *lowest_mask) { | ||
781 | if (cpu >= lowest_cpu) | ||
782 | break; | ||
783 | cpu_clear(cpu, *lowest_mask); | ||
784 | } | ||
785 | } | ||
786 | |||
787 | return count; | ||
788 | } | ||
789 | |||
790 | static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) | 910 | static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) |
791 | { | 911 | { |
792 | int first; | 912 | int first; |
@@ -808,17 +928,19 @@ static int find_lowest_rq(struct task_struct *task) | |||
808 | cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask); | 928 | cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask); |
809 | int this_cpu = smp_processor_id(); | 929 | int this_cpu = smp_processor_id(); |
810 | int cpu = task_cpu(task); | 930 | int cpu = task_cpu(task); |
811 | int count = find_lowest_cpus(task, lowest_mask); | ||
812 | 931 | ||
813 | if (!count) | 932 | if (task->rt.nr_cpus_allowed == 1) |
933 | return -1; /* No other targets possible */ | ||
934 | |||
935 | if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) | ||
814 | return -1; /* No targets found */ | 936 | return -1; /* No targets found */ |
815 | 937 | ||
816 | /* | 938 | /* |
817 | * There is no sense in performing an optimal search if only one | 939 | * Only consider CPUs that are usable for migration. |
818 | * target is found. | 940 | * I guess we might want to change cpupri_find() to ignore those |
941 | * in the first place. | ||
819 | */ | 942 | */ |
820 | if (count == 1) | 943 | cpus_and(*lowest_mask, *lowest_mask, cpu_active_map); |
821 | return first_cpu(*lowest_mask); | ||
822 | 944 | ||
823 | /* | 945 | /* |
824 | * At this point we have built a mask of cpus representing the | 946 | * At this point we have built a mask of cpus representing the |
@@ -1006,7 +1128,7 @@ static int pull_rt_task(struct rq *this_rq) | |||
1006 | 1128 | ||
1007 | next = pick_next_task_rt(this_rq); | 1129 | next = pick_next_task_rt(this_rq); |
1008 | 1130 | ||
1009 | for_each_cpu_mask(cpu, this_rq->rd->rto_mask) { | 1131 | for_each_cpu_mask_nr(cpu, this_rq->rd->rto_mask) { |
1010 | if (this_cpu == cpu) | 1132 | if (this_cpu == cpu) |
1011 | continue; | 1133 | continue; |
1012 | 1134 | ||
@@ -1163,17 +1285,25 @@ static void set_cpus_allowed_rt(struct task_struct *p, | |||
1163 | } | 1285 | } |
1164 | 1286 | ||
1165 | /* Assumes rq->lock is held */ | 1287 | /* Assumes rq->lock is held */ |
1166 | static void join_domain_rt(struct rq *rq) | 1288 | static void rq_online_rt(struct rq *rq) |
1167 | { | 1289 | { |
1168 | if (rq->rt.overloaded) | 1290 | if (rq->rt.overloaded) |
1169 | rt_set_overload(rq); | 1291 | rt_set_overload(rq); |
1292 | |||
1293 | __enable_runtime(rq); | ||
1294 | |||
1295 | cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio); | ||
1170 | } | 1296 | } |
1171 | 1297 | ||
1172 | /* Assumes rq->lock is held */ | 1298 | /* Assumes rq->lock is held */ |
1173 | static void leave_domain_rt(struct rq *rq) | 1299 | static void rq_offline_rt(struct rq *rq) |
1174 | { | 1300 | { |
1175 | if (rq->rt.overloaded) | 1301 | if (rq->rt.overloaded) |
1176 | rt_clear_overload(rq); | 1302 | rt_clear_overload(rq); |
1303 | |||
1304 | __disable_runtime(rq); | ||
1305 | |||
1306 | cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID); | ||
1177 | } | 1307 | } |
1178 | 1308 | ||
1179 | /* | 1309 | /* |
@@ -1306,7 +1436,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) | |||
1306 | * on the queue: | 1436 | * on the queue: |
1307 | */ | 1437 | */ |
1308 | if (p->rt.run_list.prev != p->rt.run_list.next) { | 1438 | if (p->rt.run_list.prev != p->rt.run_list.next) { |
1309 | requeue_task_rt(rq, p); | 1439 | requeue_task_rt(rq, p, 0); |
1310 | set_tsk_need_resched(p); | 1440 | set_tsk_need_resched(p); |
1311 | } | 1441 | } |
1312 | } | 1442 | } |
@@ -1336,8 +1466,8 @@ static const struct sched_class rt_sched_class = { | |||
1336 | .load_balance = load_balance_rt, | 1466 | .load_balance = load_balance_rt, |
1337 | .move_one_task = move_one_task_rt, | 1467 | .move_one_task = move_one_task_rt, |
1338 | .set_cpus_allowed = set_cpus_allowed_rt, | 1468 | .set_cpus_allowed = set_cpus_allowed_rt, |
1339 | .join_domain = join_domain_rt, | 1469 | .rq_online = rq_online_rt, |
1340 | .leave_domain = leave_domain_rt, | 1470 | .rq_offline = rq_offline_rt, |
1341 | .pre_schedule = pre_schedule_rt, | 1471 | .pre_schedule = pre_schedule_rt, |
1342 | .post_schedule = post_schedule_rt, | 1472 | .post_schedule = post_schedule_rt, |
1343 | .task_wake_up = task_wake_up_rt, | 1473 | .task_wake_up = task_wake_up_rt, |
@@ -1350,3 +1480,17 @@ static const struct sched_class rt_sched_class = { | |||
1350 | .prio_changed = prio_changed_rt, | 1480 | .prio_changed = prio_changed_rt, |
1351 | .switched_to = switched_to_rt, | 1481 | .switched_to = switched_to_rt, |
1352 | }; | 1482 | }; |
1483 | |||
1484 | #ifdef CONFIG_SCHED_DEBUG | ||
1485 | extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq); | ||
1486 | |||
1487 | static void print_rt_stats(struct seq_file *m, int cpu) | ||
1488 | { | ||
1489 | struct rt_rq *rt_rq; | ||
1490 | |||
1491 | rcu_read_lock(); | ||
1492 | for_each_leaf_rt_rq(rt_rq, cpu_rq(cpu)) | ||
1493 | print_rt_rq(m, cpu, rt_rq); | ||
1494 | rcu_read_unlock(); | ||
1495 | } | ||
1496 | #endif /* CONFIG_SCHED_DEBUG */ | ||