diff options
Diffstat (limited to 'kernel/sched_rt.c')
| -rw-r--r-- | kernel/sched_rt.c | 436 |
1 files changed, 290 insertions, 146 deletions
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 0f3c19197fa4..f85a76363eee 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
| @@ -12,6 +12,9 @@ static inline int rt_overloaded(struct rq *rq) | |||
| 12 | 12 | ||
| 13 | static inline void rt_set_overload(struct rq *rq) | 13 | static inline void rt_set_overload(struct rq *rq) |
| 14 | { | 14 | { |
| 15 | if (!rq->online) | ||
| 16 | return; | ||
| 17 | |||
| 15 | cpu_set(rq->cpu, rq->rd->rto_mask); | 18 | cpu_set(rq->cpu, rq->rd->rto_mask); |
| 16 | /* | 19 | /* |
| 17 | * Make sure the mask is visible before we set | 20 | * Make sure the mask is visible before we set |
| @@ -26,6 +29,9 @@ static inline void rt_set_overload(struct rq *rq) | |||
| 26 | 29 | ||
| 27 | static inline void rt_clear_overload(struct rq *rq) | 30 | static inline void rt_clear_overload(struct rq *rq) |
| 28 | { | 31 | { |
| 32 | if (!rq->online) | ||
| 33 | return; | ||
| 34 | |||
| 29 | /* the order here really doesn't matter */ | 35 | /* the order here really doesn't matter */ |
| 30 | atomic_dec(&rq->rd->rto_count); | 36 | atomic_dec(&rq->rd->rto_count); |
| 31 | cpu_clear(rq->cpu, rq->rd->rto_mask); | 37 | cpu_clear(rq->cpu, rq->rd->rto_mask); |
| @@ -155,7 +161,7 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) | |||
| 155 | return &rt_rq->tg->rt_bandwidth; | 161 | return &rt_rq->tg->rt_bandwidth; |
| 156 | } | 162 | } |
| 157 | 163 | ||
| 158 | #else | 164 | #else /* !CONFIG_RT_GROUP_SCHED */ |
| 159 | 165 | ||
| 160 | static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) | 166 | static inline u64 sched_rt_runtime(struct rt_rq *rt_rq) |
| 161 | { | 167 | { |
| @@ -220,7 +226,160 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq) | |||
| 220 | return &def_rt_bandwidth; | 226 | return &def_rt_bandwidth; |
| 221 | } | 227 | } |
| 222 | 228 | ||
| 223 | #endif | 229 | #endif /* CONFIG_RT_GROUP_SCHED */ |
| 230 | |||
| 231 | #ifdef CONFIG_SMP | ||
| 232 | static int do_balance_runtime(struct rt_rq *rt_rq) | ||
| 233 | { | ||
| 234 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | ||
| 235 | struct root_domain *rd = cpu_rq(smp_processor_id())->rd; | ||
| 236 | int i, weight, more = 0; | ||
| 237 | u64 rt_period; | ||
| 238 | |||
| 239 | weight = cpus_weight(rd->span); | ||
| 240 | |||
| 241 | spin_lock(&rt_b->rt_runtime_lock); | ||
| 242 | rt_period = ktime_to_ns(rt_b->rt_period); | ||
| 243 | for_each_cpu_mask_nr(i, rd->span) { | ||
| 244 | struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); | ||
| 245 | s64 diff; | ||
| 246 | |||
| 247 | if (iter == rt_rq) | ||
| 248 | continue; | ||
| 249 | |||
| 250 | spin_lock(&iter->rt_runtime_lock); | ||
| 251 | if (iter->rt_runtime == RUNTIME_INF) | ||
| 252 | goto next; | ||
| 253 | |||
| 254 | diff = iter->rt_runtime - iter->rt_time; | ||
| 255 | if (diff > 0) { | ||
| 256 | do_div(diff, weight); | ||
| 257 | if (rt_rq->rt_runtime + diff > rt_period) | ||
| 258 | diff = rt_period - rt_rq->rt_runtime; | ||
| 259 | iter->rt_runtime -= diff; | ||
| 260 | rt_rq->rt_runtime += diff; | ||
| 261 | more = 1; | ||
| 262 | if (rt_rq->rt_runtime == rt_period) { | ||
| 263 | spin_unlock(&iter->rt_runtime_lock); | ||
| 264 | break; | ||
| 265 | } | ||
| 266 | } | ||
| 267 | next: | ||
| 268 | spin_unlock(&iter->rt_runtime_lock); | ||
| 269 | } | ||
| 270 | spin_unlock(&rt_b->rt_runtime_lock); | ||
| 271 | |||
| 272 | return more; | ||
| 273 | } | ||
| 274 | |||
| 275 | static void __disable_runtime(struct rq *rq) | ||
| 276 | { | ||
| 277 | struct root_domain *rd = rq->rd; | ||
| 278 | struct rt_rq *rt_rq; | ||
| 279 | |||
| 280 | if (unlikely(!scheduler_running)) | ||
| 281 | return; | ||
| 282 | |||
| 283 | for_each_leaf_rt_rq(rt_rq, rq) { | ||
| 284 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | ||
| 285 | s64 want; | ||
| 286 | int i; | ||
| 287 | |||
| 288 | spin_lock(&rt_b->rt_runtime_lock); | ||
| 289 | spin_lock(&rt_rq->rt_runtime_lock); | ||
| 290 | if (rt_rq->rt_runtime == RUNTIME_INF || | ||
| 291 | rt_rq->rt_runtime == rt_b->rt_runtime) | ||
| 292 | goto balanced; | ||
| 293 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
| 294 | |||
| 295 | want = rt_b->rt_runtime - rt_rq->rt_runtime; | ||
| 296 | |||
| 297 | for_each_cpu_mask(i, rd->span) { | ||
| 298 | struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); | ||
| 299 | s64 diff; | ||
| 300 | |||
| 301 | if (iter == rt_rq) | ||
| 302 | continue; | ||
| 303 | |||
| 304 | spin_lock(&iter->rt_runtime_lock); | ||
| 305 | if (want > 0) { | ||
| 306 | diff = min_t(s64, iter->rt_runtime, want); | ||
| 307 | iter->rt_runtime -= diff; | ||
| 308 | want -= diff; | ||
| 309 | } else { | ||
| 310 | iter->rt_runtime -= want; | ||
| 311 | want -= want; | ||
| 312 | } | ||
| 313 | spin_unlock(&iter->rt_runtime_lock); | ||
| 314 | |||
| 315 | if (!want) | ||
| 316 | break; | ||
| 317 | } | ||
| 318 | |||
| 319 | spin_lock(&rt_rq->rt_runtime_lock); | ||
| 320 | BUG_ON(want); | ||
| 321 | balanced: | ||
| 322 | rt_rq->rt_runtime = RUNTIME_INF; | ||
| 323 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
| 324 | spin_unlock(&rt_b->rt_runtime_lock); | ||
| 325 | } | ||
| 326 | } | ||
| 327 | |||
| 328 | static void disable_runtime(struct rq *rq) | ||
| 329 | { | ||
| 330 | unsigned long flags; | ||
| 331 | |||
| 332 | spin_lock_irqsave(&rq->lock, flags); | ||
| 333 | __disable_runtime(rq); | ||
| 334 | spin_unlock_irqrestore(&rq->lock, flags); | ||
| 335 | } | ||
| 336 | |||
| 337 | static void __enable_runtime(struct rq *rq) | ||
| 338 | { | ||
| 339 | struct rt_rq *rt_rq; | ||
| 340 | |||
| 341 | if (unlikely(!scheduler_running)) | ||
| 342 | return; | ||
| 343 | |||
| 344 | for_each_leaf_rt_rq(rt_rq, rq) { | ||
| 345 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | ||
| 346 | |||
| 347 | spin_lock(&rt_b->rt_runtime_lock); | ||
| 348 | spin_lock(&rt_rq->rt_runtime_lock); | ||
| 349 | rt_rq->rt_runtime = rt_b->rt_runtime; | ||
| 350 | rt_rq->rt_time = 0; | ||
| 351 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
| 352 | spin_unlock(&rt_b->rt_runtime_lock); | ||
| 353 | } | ||
| 354 | } | ||
| 355 | |||
| 356 | static void enable_runtime(struct rq *rq) | ||
| 357 | { | ||
| 358 | unsigned long flags; | ||
| 359 | |||
| 360 | spin_lock_irqsave(&rq->lock, flags); | ||
| 361 | __enable_runtime(rq); | ||
| 362 | spin_unlock_irqrestore(&rq->lock, flags); | ||
| 363 | } | ||
| 364 | |||
| 365 | static int balance_runtime(struct rt_rq *rt_rq) | ||
| 366 | { | ||
| 367 | int more = 0; | ||
| 368 | |||
| 369 | if (rt_rq->rt_time > rt_rq->rt_runtime) { | ||
| 370 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
| 371 | more = do_balance_runtime(rt_rq); | ||
| 372 | spin_lock(&rt_rq->rt_runtime_lock); | ||
| 373 | } | ||
| 374 | |||
| 375 | return more; | ||
| 376 | } | ||
| 377 | #else /* !CONFIG_SMP */ | ||
| 378 | static inline int balance_runtime(struct rt_rq *rt_rq) | ||
| 379 | { | ||
| 380 | return 0; | ||
| 381 | } | ||
| 382 | #endif /* CONFIG_SMP */ | ||
| 224 | 383 | ||
| 225 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | 384 | static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) |
| 226 | { | 385 | { |
| @@ -241,6 +400,8 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | |||
| 241 | u64 runtime; | 400 | u64 runtime; |
| 242 | 401 | ||
| 243 | spin_lock(&rt_rq->rt_runtime_lock); | 402 | spin_lock(&rt_rq->rt_runtime_lock); |
| 403 | if (rt_rq->rt_throttled) | ||
| 404 | balance_runtime(rt_rq); | ||
| 244 | runtime = rt_rq->rt_runtime; | 405 | runtime = rt_rq->rt_runtime; |
| 245 | rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); | 406 | rt_rq->rt_time -= min(rt_rq->rt_time, overrun*runtime); |
| 246 | if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { | 407 | if (rt_rq->rt_throttled && rt_rq->rt_time < runtime) { |
| @@ -261,47 +422,6 @@ static int do_sched_rt_period_timer(struct rt_bandwidth *rt_b, int overrun) | |||
| 261 | return idle; | 422 | return idle; |
| 262 | } | 423 | } |
| 263 | 424 | ||
| 264 | #ifdef CONFIG_SMP | ||
| 265 | static int balance_runtime(struct rt_rq *rt_rq) | ||
| 266 | { | ||
| 267 | struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); | ||
| 268 | struct root_domain *rd = cpu_rq(smp_processor_id())->rd; | ||
| 269 | int i, weight, more = 0; | ||
| 270 | u64 rt_period; | ||
| 271 | |||
| 272 | weight = cpus_weight(rd->span); | ||
| 273 | |||
| 274 | spin_lock(&rt_b->rt_runtime_lock); | ||
| 275 | rt_period = ktime_to_ns(rt_b->rt_period); | ||
| 276 | for_each_cpu_mask(i, rd->span) { | ||
| 277 | struct rt_rq *iter = sched_rt_period_rt_rq(rt_b, i); | ||
| 278 | s64 diff; | ||
| 279 | |||
| 280 | if (iter == rt_rq) | ||
| 281 | continue; | ||
| 282 | |||
| 283 | spin_lock(&iter->rt_runtime_lock); | ||
| 284 | diff = iter->rt_runtime - iter->rt_time; | ||
| 285 | if (diff > 0) { | ||
| 286 | do_div(diff, weight); | ||
| 287 | if (rt_rq->rt_runtime + diff > rt_period) | ||
| 288 | diff = rt_period - rt_rq->rt_runtime; | ||
| 289 | iter->rt_runtime -= diff; | ||
| 290 | rt_rq->rt_runtime += diff; | ||
| 291 | more = 1; | ||
| 292 | if (rt_rq->rt_runtime == rt_period) { | ||
| 293 | spin_unlock(&iter->rt_runtime_lock); | ||
| 294 | break; | ||
| 295 | } | ||
| 296 | } | ||
| 297 | spin_unlock(&iter->rt_runtime_lock); | ||
| 298 | } | ||
| 299 | spin_unlock(&rt_b->rt_runtime_lock); | ||
| 300 | |||
| 301 | return more; | ||
| 302 | } | ||
| 303 | #endif | ||
| 304 | |||
| 305 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) | 425 | static inline int rt_se_prio(struct sched_rt_entity *rt_se) |
| 306 | { | 426 | { |
| 307 | #ifdef CONFIG_RT_GROUP_SCHED | 427 | #ifdef CONFIG_RT_GROUP_SCHED |
| @@ -327,18 +447,10 @@ static int sched_rt_runtime_exceeded(struct rt_rq *rt_rq) | |||
| 327 | if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq)) | 447 | if (sched_rt_runtime(rt_rq) >= sched_rt_period(rt_rq)) |
| 328 | return 0; | 448 | return 0; |
| 329 | 449 | ||
| 330 | #ifdef CONFIG_SMP | 450 | balance_runtime(rt_rq); |
| 331 | if (rt_rq->rt_time > runtime) { | 451 | runtime = sched_rt_runtime(rt_rq); |
| 332 | int more; | 452 | if (runtime == RUNTIME_INF) |
| 333 | 453 | return 0; | |
| 334 | spin_unlock(&rt_rq->rt_runtime_lock); | ||
| 335 | more = balance_runtime(rt_rq); | ||
| 336 | spin_lock(&rt_rq->rt_runtime_lock); | ||
| 337 | |||
| 338 | if (more) | ||
| 339 | runtime = sched_rt_runtime(rt_rq); | ||
| 340 | } | ||
| 341 | #endif | ||
| 342 | 454 | ||
| 343 | if (rt_rq->rt_time > runtime) { | 455 | if (rt_rq->rt_time > runtime) { |
| 344 | rt_rq->rt_throttled = 1; | 456 | rt_rq->rt_throttled = 1; |
| @@ -392,12 +504,23 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
| 392 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); | 504 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); |
| 393 | rt_rq->rt_nr_running++; | 505 | rt_rq->rt_nr_running++; |
| 394 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED | 506 | #if defined CONFIG_SMP || defined CONFIG_RT_GROUP_SCHED |
| 395 | if (rt_se_prio(rt_se) < rt_rq->highest_prio) | 507 | if (rt_se_prio(rt_se) < rt_rq->highest_prio) { |
| 508 | #ifdef CONFIG_SMP | ||
| 509 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
| 510 | #endif | ||
| 511 | |||
| 396 | rt_rq->highest_prio = rt_se_prio(rt_se); | 512 | rt_rq->highest_prio = rt_se_prio(rt_se); |
| 513 | #ifdef CONFIG_SMP | ||
| 514 | if (rq->online) | ||
| 515 | cpupri_set(&rq->rd->cpupri, rq->cpu, | ||
| 516 | rt_se_prio(rt_se)); | ||
| 517 | #endif | ||
| 518 | } | ||
| 397 | #endif | 519 | #endif |
| 398 | #ifdef CONFIG_SMP | 520 | #ifdef CONFIG_SMP |
| 399 | if (rt_se->nr_cpus_allowed > 1) { | 521 | if (rt_se->nr_cpus_allowed > 1) { |
| 400 | struct rq *rq = rq_of_rt_rq(rt_rq); | 522 | struct rq *rq = rq_of_rt_rq(rt_rq); |
| 523 | |||
| 401 | rq->rt.rt_nr_migratory++; | 524 | rq->rt.rt_nr_migratory++; |
| 402 | } | 525 | } |
| 403 | 526 | ||
| @@ -417,6 +540,10 @@ void inc_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
| 417 | static inline | 540 | static inline |
| 418 | void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | 541 | void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) |
| 419 | { | 542 | { |
| 543 | #ifdef CONFIG_SMP | ||
| 544 | int highest_prio = rt_rq->highest_prio; | ||
| 545 | #endif | ||
| 546 | |||
| 420 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); | 547 | WARN_ON(!rt_prio(rt_se_prio(rt_se))); |
| 421 | WARN_ON(!rt_rq->rt_nr_running); | 548 | WARN_ON(!rt_rq->rt_nr_running); |
| 422 | rt_rq->rt_nr_running--; | 549 | rt_rq->rt_nr_running--; |
| @@ -440,6 +567,14 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq) | |||
| 440 | rq->rt.rt_nr_migratory--; | 567 | rq->rt.rt_nr_migratory--; |
| 441 | } | 568 | } |
| 442 | 569 | ||
| 570 | if (rt_rq->highest_prio != highest_prio) { | ||
| 571 | struct rq *rq = rq_of_rt_rq(rt_rq); | ||
| 572 | |||
| 573 | if (rq->online) | ||
| 574 | cpupri_set(&rq->rd->cpupri, rq->cpu, | ||
| 575 | rt_rq->highest_prio); | ||
| 576 | } | ||
| 577 | |||
| 443 | update_rt_migration(rq_of_rt_rq(rt_rq)); | 578 | update_rt_migration(rq_of_rt_rq(rt_rq)); |
| 444 | #endif /* CONFIG_SMP */ | 579 | #endif /* CONFIG_SMP */ |
| 445 | #ifdef CONFIG_RT_GROUP_SCHED | 580 | #ifdef CONFIG_RT_GROUP_SCHED |
| @@ -455,6 +590,7 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) | |||
| 455 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); | 590 | struct rt_rq *rt_rq = rt_rq_of_se(rt_se); |
| 456 | struct rt_prio_array *array = &rt_rq->active; | 591 | struct rt_prio_array *array = &rt_rq->active; |
| 457 | struct rt_rq *group_rq = group_rt_rq(rt_se); | 592 | struct rt_rq *group_rq = group_rt_rq(rt_se); |
| 593 | struct list_head *queue = array->queue + rt_se_prio(rt_se); | ||
| 458 | 594 | ||
| 459 | /* | 595 | /* |
| 460 | * Don't enqueue the group if its throttled, or when empty. | 596 | * Don't enqueue the group if its throttled, or when empty. |
| @@ -465,7 +601,7 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se) | |||
| 465 | if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) | 601 | if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running)) |
| 466 | return; | 602 | return; |
| 467 | 603 | ||
| 468 | list_add_tail(&rt_se->run_list, array->queue + rt_se_prio(rt_se)); | 604 | list_add_tail(&rt_se->run_list, queue); |
| 469 | __set_bit(rt_se_prio(rt_se), array->bitmap); | 605 | __set_bit(rt_se_prio(rt_se), array->bitmap); |
| 470 | 606 | ||
| 471 | inc_rt_tasks(rt_se, rt_rq); | 607 | inc_rt_tasks(rt_se, rt_rq); |
| @@ -532,6 +668,8 @@ static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup) | |||
| 532 | rt_se->timeout = 0; | 668 | rt_se->timeout = 0; |
| 533 | 669 | ||
| 534 | enqueue_rt_entity(rt_se); | 670 | enqueue_rt_entity(rt_se); |
| 671 | |||
| 672 | inc_cpu_load(rq, p->se.load.weight); | ||
| 535 | } | 673 | } |
| 536 | 674 | ||
| 537 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) | 675 | static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) |
| @@ -540,36 +678,42 @@ static void dequeue_task_rt(struct rq *rq, struct task_struct *p, int sleep) | |||
| 540 | 678 | ||
| 541 | update_curr_rt(rq); | 679 | update_curr_rt(rq); |
| 542 | dequeue_rt_entity(rt_se); | 680 | dequeue_rt_entity(rt_se); |
| 681 | |||
| 682 | dec_cpu_load(rq, p->se.load.weight); | ||
| 543 | } | 683 | } |
| 544 | 684 | ||
| 545 | /* | 685 | /* |
| 546 | * Put task to the end of the run list without the overhead of dequeue | 686 | * Put task to the end of the run list without the overhead of dequeue |
| 547 | * followed by enqueue. | 687 | * followed by enqueue. |
| 548 | */ | 688 | */ |
| 549 | static | 689 | static void |
| 550 | void requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se) | 690 | requeue_rt_entity(struct rt_rq *rt_rq, struct sched_rt_entity *rt_se, int head) |
| 551 | { | 691 | { |
| 552 | struct rt_prio_array *array = &rt_rq->active; | 692 | if (on_rt_rq(rt_se)) { |
| 553 | struct list_head *queue = array->queue + rt_se_prio(rt_se); | 693 | struct rt_prio_array *array = &rt_rq->active; |
| 694 | struct list_head *queue = array->queue + rt_se_prio(rt_se); | ||
| 554 | 695 | ||
| 555 | if (on_rt_rq(rt_se)) | 696 | if (head) |
| 556 | list_move_tail(&rt_se->run_list, queue); | 697 | list_move(&rt_se->run_list, queue); |
| 698 | else | ||
| 699 | list_move_tail(&rt_se->run_list, queue); | ||
| 700 | } | ||
| 557 | } | 701 | } |
| 558 | 702 | ||
| 559 | static void requeue_task_rt(struct rq *rq, struct task_struct *p) | 703 | static void requeue_task_rt(struct rq *rq, struct task_struct *p, int head) |
| 560 | { | 704 | { |
| 561 | struct sched_rt_entity *rt_se = &p->rt; | 705 | struct sched_rt_entity *rt_se = &p->rt; |
| 562 | struct rt_rq *rt_rq; | 706 | struct rt_rq *rt_rq; |
| 563 | 707 | ||
| 564 | for_each_sched_rt_entity(rt_se) { | 708 | for_each_sched_rt_entity(rt_se) { |
| 565 | rt_rq = rt_rq_of_se(rt_se); | 709 | rt_rq = rt_rq_of_se(rt_se); |
| 566 | requeue_rt_entity(rt_rq, rt_se); | 710 | requeue_rt_entity(rt_rq, rt_se, head); |
| 567 | } | 711 | } |
| 568 | } | 712 | } |
| 569 | 713 | ||
| 570 | static void yield_task_rt(struct rq *rq) | 714 | static void yield_task_rt(struct rq *rq) |
| 571 | { | 715 | { |
| 572 | requeue_task_rt(rq, rq->curr); | 716 | requeue_task_rt(rq, rq->curr, 0); |
| 573 | } | 717 | } |
| 574 | 718 | ||
| 575 | #ifdef CONFIG_SMP | 719 | #ifdef CONFIG_SMP |
| @@ -609,6 +753,30 @@ static int select_task_rq_rt(struct task_struct *p, int sync) | |||
| 609 | */ | 753 | */ |
| 610 | return task_cpu(p); | 754 | return task_cpu(p); |
| 611 | } | 755 | } |
| 756 | |||
| 757 | static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p) | ||
| 758 | { | ||
| 759 | cpumask_t mask; | ||
| 760 | |||
| 761 | if (rq->curr->rt.nr_cpus_allowed == 1) | ||
| 762 | return; | ||
| 763 | |||
| 764 | if (p->rt.nr_cpus_allowed != 1 | ||
| 765 | && cpupri_find(&rq->rd->cpupri, p, &mask)) | ||
| 766 | return; | ||
| 767 | |||
| 768 | if (!cpupri_find(&rq->rd->cpupri, rq->curr, &mask)) | ||
| 769 | return; | ||
| 770 | |||
| 771 | /* | ||
| 772 | * There appears to be other cpus that can accept | ||
| 773 | * current and none to run 'p', so lets reschedule | ||
| 774 | * to try and push current away: | ||
| 775 | */ | ||
| 776 | requeue_task_rt(rq, p, 1); | ||
| 777 | resched_task(rq->curr); | ||
| 778 | } | ||
| 779 | |||
| 612 | #endif /* CONFIG_SMP */ | 780 | #endif /* CONFIG_SMP */ |
| 613 | 781 | ||
| 614 | /* | 782 | /* |
| @@ -616,8 +784,27 @@ static int select_task_rq_rt(struct task_struct *p, int sync) | |||
| 616 | */ | 784 | */ |
| 617 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) | 785 | static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p) |
| 618 | { | 786 | { |
| 619 | if (p->prio < rq->curr->prio) | 787 | if (p->prio < rq->curr->prio) { |
| 620 | resched_task(rq->curr); | 788 | resched_task(rq->curr); |
| 789 | return; | ||
| 790 | } | ||
| 791 | |||
| 792 | #ifdef CONFIG_SMP | ||
| 793 | /* | ||
| 794 | * If: | ||
| 795 | * | ||
| 796 | * - the newly woken task is of equal priority to the current task | ||
| 797 | * - the newly woken task is non-migratable while current is migratable | ||
| 798 | * - current will be preempted on the next reschedule | ||
| 799 | * | ||
| 800 | * we should check to see if current can readily move to a different | ||
| 801 | * cpu. If so, we will reschedule to allow the push logic to try | ||
| 802 | * to move current somewhere else, making room for our non-migratable | ||
| 803 | * task. | ||
| 804 | */ | ||
| 805 | if (p->prio == rq->curr->prio && !need_resched()) | ||
| 806 | check_preempt_equal_prio(rq, p); | ||
| 807 | #endif | ||
| 621 | } | 808 | } |
| 622 | 809 | ||
| 623 | static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, | 810 | static struct sched_rt_entity *pick_next_rt_entity(struct rq *rq, |
| @@ -720,73 +907,6 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq, int cpu) | |||
| 720 | 907 | ||
| 721 | static DEFINE_PER_CPU(cpumask_t, local_cpu_mask); | 908 | static DEFINE_PER_CPU(cpumask_t, local_cpu_mask); |
| 722 | 909 | ||
| 723 | static int find_lowest_cpus(struct task_struct *task, cpumask_t *lowest_mask) | ||
| 724 | { | ||
| 725 | int lowest_prio = -1; | ||
| 726 | int lowest_cpu = -1; | ||
| 727 | int count = 0; | ||
| 728 | int cpu; | ||
| 729 | |||
| 730 | cpus_and(*lowest_mask, task_rq(task)->rd->online, task->cpus_allowed); | ||
| 731 | |||
| 732 | /* | ||
| 733 | * Scan each rq for the lowest prio. | ||
| 734 | */ | ||
| 735 | for_each_cpu_mask(cpu, *lowest_mask) { | ||
| 736 | struct rq *rq = cpu_rq(cpu); | ||
| 737 | |||
| 738 | /* We look for lowest RT prio or non-rt CPU */ | ||
| 739 | if (rq->rt.highest_prio >= MAX_RT_PRIO) { | ||
| 740 | /* | ||
| 741 | * if we already found a low RT queue | ||
| 742 | * and now we found this non-rt queue | ||
| 743 | * clear the mask and set our bit. | ||
| 744 | * Otherwise just return the queue as is | ||
| 745 | * and the count==1 will cause the algorithm | ||
| 746 | * to use the first bit found. | ||
| 747 | */ | ||
| 748 | if (lowest_cpu != -1) { | ||
| 749 | cpus_clear(*lowest_mask); | ||
| 750 | cpu_set(rq->cpu, *lowest_mask); | ||
| 751 | } | ||
| 752 | return 1; | ||
| 753 | } | ||
| 754 | |||
| 755 | /* no locking for now */ | ||
| 756 | if ((rq->rt.highest_prio > task->prio) | ||
| 757 | && (rq->rt.highest_prio >= lowest_prio)) { | ||
| 758 | if (rq->rt.highest_prio > lowest_prio) { | ||
| 759 | /* new low - clear old data */ | ||
| 760 | lowest_prio = rq->rt.highest_prio; | ||
| 761 | lowest_cpu = cpu; | ||
| 762 | count = 0; | ||
| 763 | } | ||
| 764 | count++; | ||
| 765 | } else | ||
| 766 | cpu_clear(cpu, *lowest_mask); | ||
| 767 | } | ||
| 768 | |||
| 769 | /* | ||
| 770 | * Clear out all the set bits that represent | ||
| 771 | * runqueues that were of higher prio than | ||
| 772 | * the lowest_prio. | ||
| 773 | */ | ||
| 774 | if (lowest_cpu > 0) { | ||
| 775 | /* | ||
| 776 | * Perhaps we could add another cpumask op to | ||
| 777 | * zero out bits. Like cpu_zero_bits(cpumask, nrbits); | ||
| 778 | * Then that could be optimized to use memset and such. | ||
| 779 | */ | ||
| 780 | for_each_cpu_mask(cpu, *lowest_mask) { | ||
| 781 | if (cpu >= lowest_cpu) | ||
| 782 | break; | ||
| 783 | cpu_clear(cpu, *lowest_mask); | ||
| 784 | } | ||
| 785 | } | ||
| 786 | |||
| 787 | return count; | ||
| 788 | } | ||
| 789 | |||
| 790 | static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) | 910 | static inline int pick_optimal_cpu(int this_cpu, cpumask_t *mask) |
| 791 | { | 911 | { |
| 792 | int first; | 912 | int first; |
| @@ -808,17 +928,19 @@ static int find_lowest_rq(struct task_struct *task) | |||
| 808 | cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask); | 928 | cpumask_t *lowest_mask = &__get_cpu_var(local_cpu_mask); |
| 809 | int this_cpu = smp_processor_id(); | 929 | int this_cpu = smp_processor_id(); |
| 810 | int cpu = task_cpu(task); | 930 | int cpu = task_cpu(task); |
| 811 | int count = find_lowest_cpus(task, lowest_mask); | ||
| 812 | 931 | ||
| 813 | if (!count) | 932 | if (task->rt.nr_cpus_allowed == 1) |
| 933 | return -1; /* No other targets possible */ | ||
| 934 | |||
| 935 | if (!cpupri_find(&task_rq(task)->rd->cpupri, task, lowest_mask)) | ||
| 814 | return -1; /* No targets found */ | 936 | return -1; /* No targets found */ |
| 815 | 937 | ||
| 816 | /* | 938 | /* |
| 817 | * There is no sense in performing an optimal search if only one | 939 | * Only consider CPUs that are usable for migration. |
| 818 | * target is found. | 940 | * I guess we might want to change cpupri_find() to ignore those |
| 941 | * in the first place. | ||
| 819 | */ | 942 | */ |
| 820 | if (count == 1) | 943 | cpus_and(*lowest_mask, *lowest_mask, cpu_active_map); |
| 821 | return first_cpu(*lowest_mask); | ||
| 822 | 944 | ||
| 823 | /* | 945 | /* |
| 824 | * At this point we have built a mask of cpus representing the | 946 | * At this point we have built a mask of cpus representing the |
| @@ -1006,7 +1128,7 @@ static int pull_rt_task(struct rq *this_rq) | |||
| 1006 | 1128 | ||
| 1007 | next = pick_next_task_rt(this_rq); | 1129 | next = pick_next_task_rt(this_rq); |
| 1008 | 1130 | ||
| 1009 | for_each_cpu_mask(cpu, this_rq->rd->rto_mask) { | 1131 | for_each_cpu_mask_nr(cpu, this_rq->rd->rto_mask) { |
| 1010 | if (this_cpu == cpu) | 1132 | if (this_cpu == cpu) |
| 1011 | continue; | 1133 | continue; |
| 1012 | 1134 | ||
| @@ -1163,17 +1285,25 @@ static void set_cpus_allowed_rt(struct task_struct *p, | |||
| 1163 | } | 1285 | } |
| 1164 | 1286 | ||
| 1165 | /* Assumes rq->lock is held */ | 1287 | /* Assumes rq->lock is held */ |
| 1166 | static void join_domain_rt(struct rq *rq) | 1288 | static void rq_online_rt(struct rq *rq) |
| 1167 | { | 1289 | { |
| 1168 | if (rq->rt.overloaded) | 1290 | if (rq->rt.overloaded) |
| 1169 | rt_set_overload(rq); | 1291 | rt_set_overload(rq); |
| 1292 | |||
| 1293 | __enable_runtime(rq); | ||
| 1294 | |||
| 1295 | cpupri_set(&rq->rd->cpupri, rq->cpu, rq->rt.highest_prio); | ||
| 1170 | } | 1296 | } |
| 1171 | 1297 | ||
| 1172 | /* Assumes rq->lock is held */ | 1298 | /* Assumes rq->lock is held */ |
| 1173 | static void leave_domain_rt(struct rq *rq) | 1299 | static void rq_offline_rt(struct rq *rq) |
| 1174 | { | 1300 | { |
| 1175 | if (rq->rt.overloaded) | 1301 | if (rq->rt.overloaded) |
| 1176 | rt_clear_overload(rq); | 1302 | rt_clear_overload(rq); |
| 1303 | |||
| 1304 | __disable_runtime(rq); | ||
| 1305 | |||
| 1306 | cpupri_set(&rq->rd->cpupri, rq->cpu, CPUPRI_INVALID); | ||
| 1177 | } | 1307 | } |
| 1178 | 1308 | ||
| 1179 | /* | 1309 | /* |
| @@ -1306,7 +1436,7 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) | |||
| 1306 | * on the queue: | 1436 | * on the queue: |
| 1307 | */ | 1437 | */ |
| 1308 | if (p->rt.run_list.prev != p->rt.run_list.next) { | 1438 | if (p->rt.run_list.prev != p->rt.run_list.next) { |
| 1309 | requeue_task_rt(rq, p); | 1439 | requeue_task_rt(rq, p, 0); |
| 1310 | set_tsk_need_resched(p); | 1440 | set_tsk_need_resched(p); |
| 1311 | } | 1441 | } |
| 1312 | } | 1442 | } |
| @@ -1336,8 +1466,8 @@ static const struct sched_class rt_sched_class = { | |||
| 1336 | .load_balance = load_balance_rt, | 1466 | .load_balance = load_balance_rt, |
| 1337 | .move_one_task = move_one_task_rt, | 1467 | .move_one_task = move_one_task_rt, |
| 1338 | .set_cpus_allowed = set_cpus_allowed_rt, | 1468 | .set_cpus_allowed = set_cpus_allowed_rt, |
| 1339 | .join_domain = join_domain_rt, | 1469 | .rq_online = rq_online_rt, |
| 1340 | .leave_domain = leave_domain_rt, | 1470 | .rq_offline = rq_offline_rt, |
| 1341 | .pre_schedule = pre_schedule_rt, | 1471 | .pre_schedule = pre_schedule_rt, |
| 1342 | .post_schedule = post_schedule_rt, | 1472 | .post_schedule = post_schedule_rt, |
| 1343 | .task_wake_up = task_wake_up_rt, | 1473 | .task_wake_up = task_wake_up_rt, |
| @@ -1350,3 +1480,17 @@ static const struct sched_class rt_sched_class = { | |||
| 1350 | .prio_changed = prio_changed_rt, | 1480 | .prio_changed = prio_changed_rt, |
| 1351 | .switched_to = switched_to_rt, | 1481 | .switched_to = switched_to_rt, |
| 1352 | }; | 1482 | }; |
| 1483 | |||
| 1484 | #ifdef CONFIG_SCHED_DEBUG | ||
| 1485 | extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq); | ||
| 1486 | |||
| 1487 | static void print_rt_stats(struct seq_file *m, int cpu) | ||
| 1488 | { | ||
| 1489 | struct rt_rq *rt_rq; | ||
| 1490 | |||
| 1491 | rcu_read_lock(); | ||
| 1492 | for_each_leaf_rt_rq(rt_rq, cpu_rq(cpu)) | ||
| 1493 | print_rt_rq(m, cpu, rt_rq); | ||
| 1494 | rcu_read_unlock(); | ||
| 1495 | } | ||
| 1496 | #endif /* CONFIG_SCHED_DEBUG */ | ||
