diff options
Diffstat (limited to 'kernel/sched_rt.c')
-rw-r--r-- | kernel/sched_rt.c | 515 |
1 files changed, 354 insertions, 161 deletions
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 0f3c19197fa4..cdf5740ab03e 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -12,6 +12,9 @@ static inline int rt_overloaded(struct rq *rq) | |||
12 | 12 | ||
13 | static inline void rt_set_overload(struct rq *rq) | 13 | static inline void rt_set_overload(struct rq *rq) |
14 | { | 14 | { |
15 | if (!rq->online) | ||
16 | return; | ||
17 | |||
15 | cpu_set(rq->cpu, rq->rd->rto_mask); | 18 | cpu_set(rq->cpu, rq->rd->rto_mask); |
16 | /* | 19 | /* |
17 | * Make sure the mask is visible before we set | 20 | * Make sure the mask is visible before we set |
@@ -26,6 +29,9 @@ static inline void rt_set_overload(struct rq *rq) | |||
26 | 29 | ||
27 | static inline void rt_clear_overload(struct rq *rq) | 30 | static inline void rt_clear_overload(struct rq *rq) |
28 | { | 31 | { |
32 | if (!rq->online) | ||
33 | return; | ||
34 | |||
29 | /* the order here really doesn't matter */ | 35 | /* the order here really doesn't matter */ |
30 | atomic_dec(&rq->rd->rto_count); | 36 | atomic_dec(&rq->rd->rto_count); |
31 | cpu_clear(rq->cpu, rq->rd->rto_mask); | 37 | cpu_clear(rq->cpu, rq->rd->rto_mask); |
@@ -96,12 +102,12 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se); | |||
96 | 102 | ||
97 | static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) | 103 | static void sched_rt_rq_enqueue(struct rt_rq *rt_rq) |
98 | { | 104 | { |
105 | struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; | ||
99 | struct sched_rt_entity *rt_se = rt_rq->rt_se; | 106 | struct sched_rt_entity *rt_se = rt_rq->rt_se; |
100 | 107 | ||
101 | if (rt_se && !on_rt_rq(rt_se) && rt_rq->rt_nr_running) { | 108 | if (rt_rq->rt_nr_running) { |
102 | struct task_struct *curr = rq_of_rt_rq(rt_rq)->curr; | 109 | if (rt_se && !on_rt_rq(rt_se)) |
103 | 110 | enqueue_rt_entity(rt_se); | |
104 | enqueue_rt_entity(rt_se); | ||
105 | if (rt_rq->highest_prio < curr->prio) | 111 | if (rt_rq->highest_prio < curr->prio) |
106 | resched_task(curr); | 112 | resched_task(curr); |
107 | } | 113 | } |
@@ -155,7 +161,7 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) | |||
155 | return &rt_rq->tg->rt_bandwidth; | 161 | return &rt_rq->tg->rt_bandwidth; |
156 | } | 162 | } |
157 | 163 | ||
158 | #else | 164 | #else /* !CONFIG_RT_GROUP_SCHED */ |
159 | 165 | ||
160 | static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) | 166 | static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) |
161 | { | 167 | { |
@@ -193,6 +199,8 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se) | |||
193 | 199 | ||
194 | static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) | 200 | static inline void sched_rt_rq_enqueue(struct rt_rq *rt_rq) |
195 | { | 201 | { |
202 | if (rt_rq->rt_nr_running) | ||
203 | resched_task(rq_of_rt_rq(rt_rq)->curr); | ||
196 | } | 204 | } |
197 | 205 | ||
198 | static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) | 206 | static inline void sched_rt_rq_dequeue(struct rt_rq *rt_rq) |
@@ -220,14 +228,210 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) | |||
220 | return &def_rt_bandwidth; | 228 | return &def_rt_bandwidth; |
221 | } | 229 | } |
222 | 230 | ||
223 | #endif | 231 | #endif /* CONFIG_RT_GROUP_SCHED */ |
232 | |||
233 | #ifdef CONFIG_SMP | ||
234 | /* | ||
235 | * We ran out of runtime, see if we can borrow some from our neighbours. | ||
236 | */ | ||
237 | static int do_balance_runtime(struct rt_rq *rt_rq) | ||
238 | { | ||
239 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | ||
240 | struct root_domain *rd = cpu_rq(smp_processor_id())->rd; | ||
241 | int i, weight, more = 0; | ||
242 | u64 rt_period; | ||
243 | |||
244 | weight = cpus_weight(rd->span); | ||
245 | |||
246 | spin_lock(&rt_b->rt_runtime_lock); | ||
247 | rt_period = ktime_to_ns(rt_b->rt_period); | ||
248 | for_each_cpu_mask_nr(i, rd->span) { | ||
249 | struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); | ||
250 | s64 diff; | ||
251 | |||
252 | if (iter == rt_rq) | ||
253 | continue; | ||
254 | |||
255 | spin_lock(&iter->rt_runtime_lock); | ||
256 | /* | ||
257 | * Either all rqs have inf runtime and there's nothing to steal | ||
258 | * or __disable_runtime() below sets a specific rq to inf to | ||
259 | * indicate its been disabled and disalow stealing. | ||
260 | */ | ||
261 | if (iter->rt_runtime == RUNTIME_INF) | ||
262 | goto next; | ||
263 | |||
264 | /* | ||
265 | * From runqueues with spare time, take 1/n part of their | ||
266 | * spare time, but no more than our period. | ||
267 | */ | ||
268 | diff = iter->rt_runtime - iter->rt_time; | ||
269 | if (diff > 0) { | ||
270 | diff = div_u64((u64)diff, weight); | ||
271 | if (rt_rq->rt_runtime + diff > rt_period) | ||
272 | diff = rt_period - rt_rq->rt_runtime; | ||
273 | iter->rt_runtime -= diff; | ||
274 | rt_rq->rt_runtime += diff; | ||
275 | more = 1; | ||
276 | if (rt_rq->rt_runtime == rt_period) { | ||
277 | spin_unlock(&iter->rt_runtime_lock); | ||
278 | break; | ||
279 | } | ||
280 | } | ||
281 | next: | ||
282 | spin_unlock(&iter->rt_runtime_lock); | ||
283 | } | ||
284 | spin_unlock(&rt_b->rt_runtime_lock); | ||
285 | |||
286 | return more; | ||
287 | } | ||
288 | |||
289 | /* | ||
290 | * Ensure this RQ takes back all the runtime it lend to its neighbours. | ||
291 | */ | ||
292 | static void __disable_runtime(struct rq *rq) | ||
293 | { | ||
294 | struct root_domain *rd = rq->rd; | ||
295 | struct rt_rq *rt_rq; | ||
296 | |||
297 | if (unlikely(!scheduler_running)) | ||
298 | return; | ||
299 | |||
300 | for_each_leaf_rt_rq(rt_rq, rq) { | ||
301 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | ||
302 | s64 want; | ||
303 | int i; | ||
304 | |||
305 | spin_lock(&rt_b->rt_runtime_lock); | ||
306 | spin_lock(&rt_rq->rt_runtime_lock); | ||
307 | /* | ||
308 | * Either we're all inf and nobody needs to borrow, or we're | ||
309 | * already disabled and thus have nothing to do, or we have | ||
310 | * exactly the right amount of runtime to take out. | ||
311 | */ | ||
312 | if (rt_rq->rt_runtime == RUNTIME_INF || | ||
313 | rt_rq->rt_runtime == rt_b->rt_runtime) | ||
314 | goto balanced; | ||
315 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
316 | |||
317 | /* | ||
318 | * Calculate the difference between what we started out with | ||
319 | * and what we current have, that's the amount of runtime | ||
320 | * we lend and now have to reclaim. | ||
321 | */ | ||
322 | want = rt_b->rt_runtime - rt_rq->rt_runtime; | ||
323 | |||
324 | /* | ||
325 | * Greedy reclaim, take back as much as we can. | ||
326 | */ | ||
327 | for_each_cpu_mask(i, rd->span) { | ||
328 | struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); | ||
329 | s64 diff; | ||
330 | |||
331 | /* | ||
332 | * Can't reclaim from ourselves or disabled runqueues. | ||
333 | */ | ||
334 | if (iter == rt_rq || iter->rt_runtime == RUNTIME_INF) | ||
335 | continue; | ||
336 | |||
337 | spin_lock(&iter->rt_runtime_lock); | ||
338 | if (want > 0) { | ||
339 | diff = min_t(s64, iter->rt_runtime, want); | ||
340 | iter->rt_runtime -= diff; | ||
341 | want -= diff; | ||
342 | } else { | ||
343 | iter->rt_runtime -= want; | ||
344 | want -= want; | ||
345 | } | ||
346 | spin_unlock(&iter->rt_runtime_lock); | ||
347 | |||
348 | if (!want) | ||
349 | break; | ||
350 | } | ||
351 | |||
352 | spin_lock(&rt_rq->rt_runtime_lock); | ||
353 | /* | ||
354 | * We cannot be left wanting - that would mean some runtime | ||
355 | * leaked out of the system. | ||
356 | */ | ||
357 | BUG_ON(want); | ||
358 | balanced: | ||
359 | /* | ||
360 | * Disable all the borrow logic by pretending we have inf | ||
361 | * runtime - in which case borrowing doesn't make sense. | ||
362 | */ | ||
363 | rt_rq->rt_runtime = RUNTIME_INF; | ||
364 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
365 | spin_unlock(&rt_b->rt_runtime_lock); | ||
366 | } | ||
367 | } | ||
368 | |||
369 | static void disable_runtime(struct rq *rq) | ||
370 | { | ||
371 | unsigned long flags; | ||
372 | |||
373 | spin_lock_irqsave(&rq->lock, flags); | ||
374 | __disable_runtime(rq); | ||
375 | spin_unlock_irqrestore(&rq->lock, flags); | ||
376 | } | ||
377 | |||
378 | static void __enable_runtime(struct rq *rq) | ||
379 | { | ||
380 | struct rt_rq *rt_rq; | ||
381 | |||
382 | if (unlikely(!scheduler_running)) | ||
383 | return; | ||
384 | |||
385 | /* | ||
386 | * Reset each runqueue's bandwidth settings | ||
387 | */ | ||
388 | for_each_leaf_rt_rq(rt_rq, rq) { | ||
389 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | ||
390 | |||
391 | spin_lock(&rt_b->rt_runtime_lock); | ||
392 | spin_lock(&rt_rq->rt_runtime_lock); | ||
393 | rt_rq->rt_runtime = rt_b->rt_runtime; | ||
394 | rt_rq->rt_time = 0; | ||
395 | rt_rq->rt_throttled = 0; | ||
396 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
397 | spin_unlock(&rt_b->rt_runtime_lock); | ||
398 | } | ||
399 | } | ||
400 | |||
401 | static void enable_runtime(struct rq *rq) | ||
402 | { | ||
403 | unsigned long flags; | ||
404 | |||
405 | spin_lock_irqsave(&rq->lock, flags); | ||
406 | __enable_runtime(rq); | ||
407 | spin_unlock_irqrestore(&rq->lock, flags); | ||
408 | } | ||
409 | |||
410 | static int balance_runtime(struct rt_rq *rt_rq) | ||
411 | { | ||
412 | int more = 0; | ||
413 | |||
414 | if (rt_rq->rt_time > rt_rq->rt_runtime) { | ||
415 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
416 | more = do_balance_runtime(rt_rq); | ||
417 | spin_lock(&rt_rq->rt_runtime_lock); | ||
418 | } | ||
419 | |||
420 | return more; | ||
421 | } | ||
422 | #else /* !CONFIG_SMP */ | ||
423 | static inline int balance_runtime(struct rt_rq *rt_rq) | ||
424 | { | ||
425 | return 0; | ||
426 | } | ||
427 | #endif /* CONFIG_SMP */ | ||
224 | 428 | ||
225 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | 429 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) |
226 | { | 430 | { |
227 | int i, idle = 1; | 431 | int i, idle = 1; |
228 | cpumask_t span; | 432 | cpumask_t span; |
229 | 433 | ||
230 | if (rt_b->rt_runtime == RUNTIME_INF) | 434 | if (!rt_bandwidth_enabled() || rt_b->rt_runtime == RUNTIME_INF) |
231 | return 1; | 435 | return 1; |
232 | 436 | ||
233 | span = sched_rt_period_mask(); | 437 | span = sched_rt_period_mask(); |
@@ -241,6 +445,8 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | |||
241 | u64 runtime; | 445 | u64 runtime; |
242 | 446 | ||
243 | spin_lock(&rt_rq->rt_runtime_lock); | 447 | spin_lock(&rt_rq->rt_runtime_lock); |
448 | if (rt_rq->rt_throttled) | ||
449 | balance_runtime(rt_rq); | ||
244 | runtime = rt_rq->rt_runtime; | 450 | runtime = rt_rq->rt_runtime; |
245 | rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); | 451 | rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); |
246 | if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { | 452 | if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { |
@@ -261,47 +467,6 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | |||
261 | return idle; | 467 | return idle; |
262 | } | 468 | } |
263 | 469 | ||
264 | #ifdef CONFIG_SMP | ||
265 | static int balance_runtime(struct rt_rq *rt_rq) | ||
266 | { | ||
267 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | ||
268 | struct root_domain *rd = cpu_rq(smp_processor_id())->rd; | ||
269 | int i, weight, more = 0; | ||
270 | u64 rt_period; | ||
271 | |||
272 | weight = cpus_weight(rd->span); | ||
273 | |||
274 | spin_lock(&rt_b->rt_runtime_lock); | ||
275 | rt_period = ktime_to_ns(rt_b->rt_period); | ||
276 | for_each_cpu_mask(i, rd->span) { | ||
277 | struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); | ||
278 | s64 diff; | ||
279 | |||
280 | if (iter == rt_rq) | ||
281 | continue; | ||
282 | |||
283 | spin_lock(&iter->rt_runtime_lock); | ||
284 | diff = iter->rt_runtime - iter->rt_time; | ||
285 | if (diff > 0) { | ||
286 | do_div(diff, weight); | ||
287 | if (rt_rq->rt_runtime + diff > rt_period) | ||
288 | diff = rt_period - rt_rq->rt_runtime; | ||
289 | iter->rt_runtime -= diff; | ||
290 | rt_rq->rt_runtime += diff; | ||
291 | more = 1; | ||
292 | if (rt_rq->rt_runtime == rt_period) { | ||
293 | spin_unlock(&iter->rt_runtime_lock); | ||
294 | break; | ||
295 | } | ||
296 | } | ||
297 | spin_unlock(&iter->rt_runtime_lock); | ||
298 | } | ||
299 | spin_unlock(&rt_b->rt_runtime_lock); | ||
300 | |||
301 | return more; | ||
302 | } | ||
303 | #endif | ||
304 | |||
305 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) | 470 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) |
306 | { | 471 | { |
307 | #ifdef CONFIG_RT_GROUP_SCHED | 472 | #ifdef CONFIG_RT_GROUP_SCHED |
@@ -318,27 +483,16 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) | |||
318 | { | 483 | { |
319 | u64 runtime = sched_rt_runtime(rt_rq); | 484 | u64 runtime = sched_rt_runtime(rt_rq); |
320 | 485 | ||
321 | if (runtime == RUNTIME_INF) | ||
322 | return 0; | ||
323 | |||
324 | if (rt_rq->rt_throttled) | 486 | if (rt_rq->rt_throttled) |
325 | return rt_rq_throttled(rt_rq); | 487 | return rt_rq_throttled(rt_rq); |
326 | 488 | ||
327 | if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq)) | 489 | if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq)) |
328 | return 0; | 490 | return 0; |
329 | 491 | ||
330 | #ifdef CONFIG_SMP | 492 | balance_runtime(rt_rq); |
331 | if (rt_rq->rt_time > runtime) { | 493 | runtime = sched_rt_runtime(rt_rq); |
332 | int more; | 494 | if (runtime == RUNTIME_INF) |
333 | 495 | return 0; | |
334 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
335 | more = balance_runtime(rt_rq); | ||
336 | spin_lock(&rt_rq->rt_runtime_lock); | ||
337 | |||
338 | if (more) | ||
339 | runtime = sched_rt_runtime(rt_rq); | ||
340 | } | ||
341 | #endif | ||
342 | 496 | ||
343 | if (rt_rq->rt_time > runtime) { | 497 | if (rt_rq->rt_time > runtime) { |
344 | rt_rq->rt_throttled = 1; | 498 | rt_rq->rt_throttled = 1; |
@@ -375,13 +529,18 @@ static void update_curr_rt(struct rq *rq) | |||
375 | curr->se.exec_start = rq->clock; | 529 | curr->se.exec_start = rq->clock; |
376 | cpuacct_charge(curr, delta_exec); | 530 | cpuacct_charge(curr, delta_exec); |
377 | 531 | ||
532 | if (!rt_bandwidth_enabled()) | ||
533 | return; | ||
534 | |||
378 | for_each_sched_rt_entity(rt_se) { | 535 | for_each_sched_rt_entity(rt_se) { |
379 | rt_rq = rt_rq_of_se(rt_se); | 536 | rt_rq = rt_rq_of_se(rt_se); |
380 | 537 | ||
381 | spin_lock(&rt_rq->rt_runtime_lock); | 538 | spin_lock(&rt_rq->rt_runtime_lock); |
382 | rt_rq->rt_time += delta_exec; | 539 | if (sched_rt_runtime(rt_rq) != RUNTIME_INF) { |
383 | if (sched_rt_runtime_exceeded(rt_rq)) | 540 | rt_rq->rt_time += delta_exec; |
384 | resched_task(curr); | 541 | if (sched_rt_runtime_exceeded(rt_rq)) |
542 | resched_task(curr); | ||
543 | } | ||
385 | spin_unlock(&rt_rq->rt_runtime_lock); | 544 | spin_unlock(&rt_rq->rt_runtime_lock); |
386 | } | 545 | } |
387 | } | 546 | } |
@@ -392,12 +551,23 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
392 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); | 551 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); |
393 | rt_rq->rt_nr_running++; | 552 | rt_rq->rt_nr_running++; |
394 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED | 553 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED |
395 | if (rt_se_prio(rt_se) < rt_rq->highest_prio) | 554 | if (rt_se_prio(rt_se) < rt_rq->highest_prio) { |
555 | #ifdef CONFIG_SMP | ||
556 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
557 | #endif | ||
558 | |||
396 | rt_rq->highest_prio = rt_se_prio(rt_se); | 559 | rt_rq->highest_prio = rt_se_prio(rt_se); |
560 | #ifdef CONFIG_SMP | ||
561 | if (rq->online) | ||
562 | cpupri_set(&rq->rd->cpupri, rq->cpu, | ||
563 | rt_se_prio(rt_se)); | ||
564 | #endif | ||
565 | } | ||
397 | #endif | 566 | #endif |
398 | #ifdef CONFIG_SMP | 567 | #ifdef CONFIG_SMP |
399 | if (rt_se->nr_cpus_allowed > 1) { | 568 | if (rt_se->nr_cpus_allowed > 1) { |
400 | struct rq *rq = rq_of_rt_rq(rt_rq); | 569 | struct rq *rq = rq_of_rt_rq(rt_rq); |
570 | |||
401 | rq->rt.rt_nr_migratory++; | 571 | rq->rt.rt_nr_migratory++; |
402 | } | 572 | } |
403 | 573 | ||
@@ -417,6 +587,10 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
417 | static inline | 587 | static inline |
418 | void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | 588 | void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) |
419 | { | 589 | { |
590 | #ifdef CONFIG_SMP | ||
591 | int highest_prio = rt_rq->highest_prio; | ||
592 | #endif | ||
593 | |||
420 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); | 594 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); |
421 | WARN_ON(!rt_rq->rt_nr_running); | 595 | WARN_ON(!rt_rq->rt_nr_running); |
422 | rt_rq->rt_nr_running--; | 596 | rt_rq->rt_nr_running--; |
@@ -440,6 +614,14 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
440 | rq->rt.rt_nr_migratory--; | 614 | rq->rt.rt_nr_migratory--; |
441 | } | 615 | } |
442 | 616 | ||
617 | if (rt_rq->highest_prio != highest_prio) { | ||
618 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
619 | |||
620 | if (rq->online) | ||
621 | cpupri_set(&rq->rd->cpupri, rq->cpu, | ||
622 | rt_rq->highest_prio); | ||
623 | } | ||
624 | |||
443 | update_rt_migration(rq_of_rt_rq(rt_rq)); | 625 | update_rt_migration(rq_of_rt_rq(rt_rq)); |
444 | #endif /* CONFIG_SMP */ | 626 | #endif /* CONFIG_SMP */ |
445 | #ifdef CONFIG_RT_GROUP_SCHED | 627 | #ifdef CONFIG_RT_GROUP_SCHED |
@@ -455,6 +637,7 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) | |||
455 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); | 637 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); |
456 | struct rt_prio_array *array = &rt_rq->active; | 638 | struct rt_prio_array *array = &rt_rq->active; |
457 | struct rt_rq *group_rq = group_rt_rq(rt_se); | 639 | struct rt_rq *group_rq = group_rt_rq(rt_se); |
640 | struct list_head *queue = array->queue + rt_se_prio(rt_se); | ||
458 | 641 | ||
459 | /* | 642 | /* |
460 | * Don't enqueue the group if its throttled, or when empty. | 643 | * Don't enqueue the group if its throttled, or when empty. |
@@ -465,7 +648,7 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) | |||
465 | if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) | 648 | if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) |
466 | return; | 649 | return; |
467 | 650 | ||
468 | list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); | 651 | list_add_tail(&rt_se->run_list, queue); |
469 | __set_bit(rt_se_prio(rt_se), array->bitmap); | 652 | __set_bit(rt_se_prio(rt_se), array->bitmap); |
470 | 653 | ||
471 | inc_rt_tasks(rt_se, rt_rq); | 654 | inc_rt_tasks(rt_se, rt_rq); |
@@ -532,6 +715,8 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) | |||
532 | rt_se->timeout = 0; | 715 | rt_se->timeout = 0; |
533 | 716 | ||
534 | enqueue_rt_entity(rt_se); | 717 | enqueue_rt_entity(rt_se); |
718 | |||
719 | inc_cpu_load(rq, p->se.load.weight); | ||
535 | } | 720 | } |
536 | 721 | ||
537 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) | 722 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) |
@@ -540,36 +725,42 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) | |||
540 | 725 | ||
541 | update_curr_rt(rq); | 726 | update_curr_rt(rq); |
542 | dequeue_rt_entity(rt_se); | 727 | dequeue_rt_entity(rt_se); |
728 | |||
729 | dec_cpu_load(rq, p->se.load.weight); | ||
543 | } | 730 | } |
544 | 731 | ||
545 | /* | 732 | /* |
546 | * Put task to the end of the run list without the overhead of dequeue | 733 | * Put task to the end of the run list without the overhead of dequeue |
547 | * followed by enqueue. | 734 | * followed by enqueue. |
548 | */ | 735 | */ |
549 | static | 736 | static void |
550 | void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) | 737 | requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head) |
551 | { | 738 | { |
552 | struct rt_prio_array *array = &rt_rq->active; | 739 | if (on_rt_rq(rt_se)) { |
553 | struct list_head *queue = array->queue + rt_se_prio(rt_se); | 740 | struct rt_prio_array *array = &rt_rq->active; |
741 | struct list_head *queue = array->queue + rt_se_prio(rt_se); | ||
554 | 742 | ||
555 | if (on_rt_rq(rt_se)) | 743 | if (head) |
556 | list_move_tail(&rt_se->run_list, queue); | 744 | list_move(&rt_se->run_list, queue); |
745 | else | ||
746 | list_move_tail(&rt_se->run_list, queue); | ||
747 | } | ||
557 | } | 748 | } |
558 | 749 | ||
559 | static void requeue_task_rt(struct rq *rq, struct task_struct *p) | 750 | static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head) |
560 | { | 751 | { |
561 | struct sched_rt_entity *rt_se = &p->rt; | 752 | struct sched_rt_entity *rt_se = &p->rt; |
562 | struct rt_rq *rt_rq; | 753 | struct rt_rq *rt_rq; |
563 | 754 | ||
564 | for_each_sched_rt_entity(rt_se) { | 755 | for_each_sched_rt_entity(rt_se) { |
565 | rt_rq = rt_rq_of_se(rt_se); | 756 | rt_rq = rt_rq_of_se(rt_se); |
566 | requeue_rt_entity(rt_rq, rt_se); | 757 | requeue_rt_entity(rt_rq, rt_se, head); |
567 | } | 758 | } |
568 | } | 759 | } |
569 | 760 | ||
570 | static void yield_task_rt(struct rq *rq) | 761 | static void yield_task_rt(struct rq *rq) |
571 | { | 762 | { |
572 | requeue_task_rt(rq, rq->curr); | 763 | requeue_task_rt(rq, rq->curr, 0); |
573 | } | 764 | } |
574 | 765 | ||
575 | #ifdef CONFIG_SMP | 766 | #ifdef CONFIG_SMP |
@@ -609,15 +800,58 @@ static int select_task_rq_rt(struct task_struct *p, int sync) | |||
609 | */ | 800 | */ |
610 | return task_cpu(p); | 801 | return task_cpu(p); |
611 | } | 802 | } |
803 | |||
804 | static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) | ||
805 | { | ||
806 | cpumask_t mask; | ||
807 | |||
808 | if (rq->curr->rt.nr_cpus_allowed == 1) | ||
809 | return; | ||
810 | |||
811 | if (p->rt.nr_cpus_allowed != 1 | ||
812 | && cpupri_find(&rq->rd->cpupri, p, &mask)) | ||
813 | return; | ||
814 | |||
815 | if (!cpupri_find(&rq->rd->cpupri, rq->curr, &mask)) | ||
816 | return; | ||
817 | |||
818 | /* | ||
819 | * There appears to be other cpus that can accept | ||
820 | * current and none to run 'p', so lets reschedule | ||
821 | * to try and push current away: | ||
822 | */ | ||
823 | requeue_task_rt(rq, p, 1); | ||
824 | resched_task(rq->curr); | ||
825 | } | ||
826 | |||
612 | #endif /* CONFIG_SMP */ | 827 | #endif /* CONFIG_SMP */ |
613 | 828 | ||
614 | /* | 829 | /* |
615 | * Preempt the current task with a newly woken task if needed: | 830 | * Preempt the current task with a newly woken task if needed: |
616 | */ | 831 | */ |
617 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) | 832 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int sync) |
618 | { | 833 | { |
619 | if (p->prio < rq->curr->prio) | 834 | if (p->prio < rq->curr->prio) { |
620 | resched_task(rq->curr); | 835 | resched_task(rq->curr); |
836 | return; | ||
837 | } | ||
838 | |||
839 | #ifdef CONFIG_SMP | ||
840 | /* | ||
841 | * If: | ||
842 | * | ||
843 | * - the newly woken task is of equal priority to the current task | ||
844 | * - the newly woken task is non-migratable while current is migratable | ||
845 | * - current will be preempted on the next reschedule | ||
846 | * | ||
847 | * we should check to see if current can readily move to a different | ||
848 | * cpu. If so, we will reschedule to allow the push logic to try | ||
849 | * to move current somewhere else, making room for our non-migratable | ||
850 | * task. | ||
851 | */ | ||
852 | if (p->prio == rq->curr->prio && !need_resched()) | ||
853 | check_preempt_equal_prio(rq, p); | ||
854 | #endif | ||
621 | } | 855 | } |
622 | 856 | ||
623 | static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, | 857 | static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, |
@@ -674,6 +908,8 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) | |||
674 | #define RT_MAX_TRIES 3 | 908 | #define RT_MAX_TRIES 3 |
675 | 909 | ||
676 | static int double_lock_balance(struct rq *this_rq, struct rq *busiest); | 910 | static int double_lock_balance(struct rq *this_rq, struct rq *busiest); |
911 | static void double_unlock_balance(struct rq *this_rq, struct rq *busiest); | ||
912 | |||
677 | static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); | 913 | static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); |
678 | 914 | ||
679 | static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) | 915 | static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) |
@@ -720,73 +956,6 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) | |||
720 | 956 | ||
721 | static DEFINE_PER_CPU(cpumask_t, local_cpu_mask); | 957 | static DEFINE_PER_CPU(cpumask_t, local_cpu_mask); |
722 | 958 | ||
723 | static int find_lowest_cpus(struct task_struct *task, cpumask_t *lowest_mask) | ||
724 | { | ||
725 | int lowest_prio = -1; | ||
726 | int lowest_cpu = -1; | ||
727 | int count = 0; | ||
728 | int cpu; | ||
729 | |||
730 | cpus_and(*lowest_mask, task_rq(task)->rd->online, task->cpus_allowed); | ||
731 | |||
732 | /* | ||
733 | * Scan each rq for the lowest prio. | ||
734 | */ | ||
735 | for_each_cpu_mask(cpu, *lowest_mask) { | ||
736 | struct rq *rq = cpu_rq(cpu); | ||
737 | |||
738 | /* We look for lowest RT prio or non-rt CPU */ | ||
739 | if (rq->rt.highest_prio >= MAX_RT_PRIO) { | ||
740 | /* | ||
741 | * if we already found a low RT queue | ||
742 | * and now we found this non-rt queue | ||
743 | * clear the mask and set our bit. | ||
744 | * Otherwise just return the queue as is | ||
745 | * and the count==1 will cause the algorithm | ||
746 | * to use the first bit found. | ||
747 | */ | ||
748 | if (lowest_cpu != -1) { | ||
749 | cpus_clear(*lowest_mask); | ||
750 | cpu_set(rq->cpu, *lowest_mask); | ||
751 | } | ||
752 | return 1; | ||
753 | } | ||
754 | |||
755 | /* no locking for now */ | ||
756 | if ((rq->rt.highest_prio > task->prio) | ||
757 | && (rq->rt.highest_prio >= lowest_prio)) { | ||
758 | if (rq->rt.highest_prio > lowest_prio) { | ||
759 | /* new low - clear old data */ | ||
760 | lowest_prio = rq->rt.highest_prio; | ||
761 | lowest_cpu = cpu; | ||
762 | count = 0; | ||
763 | } | ||
764 | count++; | ||
765 | } else | ||
766 | cpu_clear(cpu, *lowest_mask); | ||
767 | } | ||
768 | |||
769 | /* | ||
770 | * Clear out all the set bits that represent | ||
771 | * runqueues that were of higher prio than | ||
772 | * the lowest_prio. | ||
773 | */ | ||
774 | if (lowest_cpu > 0) { | ||
775 | /* | ||
776 | * Perhaps we could add another cpumask op to | ||
777 | * zero out bits. Like cpu_zero_bits(cpumask, nrbits); | ||
778 | * Then that could be optimized to use memset and such. | ||
779 | */ | ||
780 | for_each_cpu_mask(cpu, *lowest_mask) { | ||
781 | if (cpu >= lowest_cpu) | ||
782 | break; | ||
783 | cpu_clear(cpu, *lowest_mask); | ||
784 | } | ||
785 | } | ||
786 | |||
787 | return count; | ||
788 | } | ||
789 | |||
790 | static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) | 959 | static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) |
791 | { | 960 | { |
792 | int first; | 961 | int first; |
@@ -808,17 +977,19 @@ static int find_lowest_rq(struct task_struct *task) | |||
808 | cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask); | 977 | cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask); |
809 | int this_cpu = smp_processor_id(); | 978 | int this_cpu = smp_processor_id(); |
810 | int cpu = task_cpu(task); | 979 | int cpu = task_cpu(task); |
811 | int count = find_lowest_cpus(task, lowest_mask); | ||
812 | 980 | ||
813 | if (!count) | 981 | if (task->rt.nr_cpus_allowed == 1) |
982 | return -1; /* No other targets possible */ | ||
983 | |||
984 | if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) | ||
814 | return -1; /* No targets found */ | 985 | return -1; /* No targets found */ |
815 | 986 | ||
816 | /* | 987 | /* |
817 | * There is no sense in performing an optimal search if only one | 988 | * Only consider CPUs that are usable for migration. |
818 | * target is found. | 989 | * I guess we might want to change cpupri_find() to ignore those |
990 | * in the first place. | ||
819 | */ | 991 | */ |
820 | if (count == 1) | 992 | cpus_and(*lowest_mask, *lowest_mask, cpu_active_map); |
821 | return first_cpu(*lowest_mask); | ||
822 | 993 | ||
823 | /* | 994 | /* |
824 | * At this point we have built a mask of cpus representing the | 995 | * At this point we have built a mask of cpus representing the |
@@ -900,7 +1071,7 @@ static struct rq *find_lock_lowest_rq(struct task_struct *task, struct rq *rq) | |||
900 | break; | 1071 | break; |
901 | 1072 | ||
902 | /* try again */ | 1073 | /* try again */ |
903 | spin_unlock(&lowest_rq->lock); | 1074 | double_unlock_balance(rq, lowest_rq); |
904 | lowest_rq = NULL; | 1075 | lowest_rq = NULL; |
905 | } | 1076 | } |
906 | 1077 | ||
@@ -969,7 +1140,7 @@ static int push_rt_task(struct rq *rq) | |||
969 | 1140 | ||
970 | resched_task(lowest_rq->curr); | 1141 | resched_task(lowest_rq->curr); |
971 | 1142 | ||
972 | spin_unlock(&lowest_rq->lock); | 1143 | double_unlock_balance(rq, lowest_rq); |
973 | 1144 | ||
974 | ret = 1; | 1145 | ret = 1; |
975 | out: | 1146 | out: |
@@ -1006,7 +1177,7 @@ static int pull_rt_task(struct rq *this_rq) | |||
1006 | 1177 | ||
1007 | next = pick_next_task_rt(this_rq); | 1178 | next = pick_next_task_rt(this_rq); |
1008 | 1179 | ||
1009 | for_each_cpu_mask(cpu, this_rq->rd->rto_mask) { | 1180 | for_each_cpu_mask_nr(cpu, this_rq->rd->rto_mask) { |
1010 | if (this_cpu == cpu) | 1181 | if (this_cpu == cpu) |
1011 | continue; | 1182 | continue; |
1012 | 1183 | ||
@@ -1075,7 +1246,7 @@ static int pull_rt_task(struct rq *this_rq) | |||
1075 | 1246 | ||
1076 | } | 1247 | } |
1077 | skip: | 1248 | skip: |
1078 | spin_unlock(&src_rq->lock); | 1249 | double_unlock_balance(this_rq, src_rq); |
1079 | } | 1250 | } |
1080 | 1251 | ||
1081 | return ret; | 1252 | return ret; |
@@ -1163,17 +1334,25 @@ static void set_cpus_allowed_rt(struct task_struct *p, | |||
1163 | } | 1334 | } |
1164 | 1335 | ||
1165 | /* Assumes rq->lock is held */ | 1336 | /* Assumes rq->lock is held */ |
1166 | static void join_domain_rt(struct rq *rq) | 1337 | static void rq_online_rt(struct rq *rq) |
1167 | { | 1338 | { |
1168 | if (rq->rt.overloaded) | 1339 | if (rq->rt.overloaded) |
1169 | rt_set_overload(rq); | 1340 | rt_set_overload(rq); |
1341 | |||
1342 | __enable_runtime(rq); | ||
1343 | |||
1344 | cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio); | ||
1170 | } | 1345 | } |
1171 | 1346 | ||
1172 | /* Assumes rq->lock is held */ | 1347 | /* Assumes rq->lock is held */ |
1173 | static void leave_domain_rt(struct rq *rq) | 1348 | static void rq_offline_rt(struct rq *rq) |
1174 | { | 1349 | { |
1175 | if (rq->rt.overloaded) | 1350 | if (rq->rt.overloaded) |
1176 | rt_clear_overload(rq); | 1351 | rt_clear_overload(rq); |
1352 | |||
1353 | __disable_runtime(rq); | ||
1354 | |||
1355 | cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID); | ||
1177 | } | 1356 | } |
1178 | 1357 | ||
1179 | /* | 1358 | /* |
@@ -1306,7 +1485,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) | |||
1306 | * on the queue: | 1485 | * on the queue: |
1307 | */ | 1486 | */ |
1308 | if (p->rt.run_list.prev != p->rt.run_list.next) { | 1487 | if (p->rt.run_list.prev != p->rt.run_list.next) { |
1309 | requeue_task_rt(rq, p); | 1488 | requeue_task_rt(rq, p, 0); |
1310 | set_tsk_need_resched(p); | 1489 | set_tsk_need_resched(p); |
1311 | } | 1490 | } |
1312 | } | 1491 | } |
@@ -1336,8 +1515,8 @@ static const struct sched_class rt_sched_class = { | |||
1336 | .load_balance = load_balance_rt, | 1515 | .load_balance = load_balance_rt, |
1337 | .move_one_task = move_one_task_rt, | 1516 | .move_one_task = move_one_task_rt, |
1338 | .set_cpus_allowed = set_cpus_allowed_rt, | 1517 | .set_cpus_allowed = set_cpus_allowed_rt, |
1339 | .join_domain = join_domain_rt, | 1518 | .rq_online = rq_online_rt, |
1340 | .leave_domain = leave_domain_rt, | 1519 | .rq_offline = rq_offline_rt, |
1341 | .pre_schedule = pre_schedule_rt, | 1520 | .pre_schedule = pre_schedule_rt, |
1342 | .post_schedule = post_schedule_rt, | 1521 | .post_schedule = post_schedule_rt, |
1343 | .task_wake_up = task_wake_up_rt, | 1522 | .task_wake_up = task_wake_up_rt, |
@@ -1350,3 +1529,17 @@ static const struct sched_class rt_sched_class = { | |||
1350 | .prio_changed = prio_changed_rt, | 1529 | .prio_changed = prio_changed_rt, |
1351 | .switched_to = switched_to_rt, | 1530 | .switched_to = switched_to_rt, |
1352 | }; | 1531 | }; |
1532 | |||
1533 | #ifdef CONFIG_SCHED_DEBUG | ||
1534 | extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq); | ||
1535 | |||
1536 | static void print_rt_stats(struct seq_file *m, int cpu) | ||
1537 | { | ||
1538 | struct rt_rq *rt_rq; | ||
1539 | |||
1540 | rcu_read_lock(); | ||
1541 | for_each_leaf_rt_rq(rt_rq, cpu_rq(cpu)) | ||
1542 | print_rt_rq(m, cpu, rt_rq); | ||
1543 | rcu_read_unlock(); | ||
1544 | } | ||
1545 | #endif /* CONFIG_SCHED_DEBUG */ | ||