diff options
| -rw-r--r-- | kernel/sched.c | 2 | ||||
| -rw-r--r-- | kernel/sched_rt.c | 187 |
2 files changed, 178 insertions, 11 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index 97cab609fc..c917971079 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
| @@ -3721,6 +3721,8 @@ need_resched_nonpreemptible: | |||
| 3721 | switch_count = &prev->nvcsw; | 3721 | switch_count = &prev->nvcsw; |
| 3722 | } | 3722 | } |
| 3723 | 3723 | ||
| 3724 | schedule_balance_rt(rq, prev); | ||
| 3725 | |||
| 3724 | if (unlikely(!rq->nr_running)) | 3726 | if (unlikely(!rq->nr_running)) |
| 3725 | idle_balance(cpu, rq); | 3727 | idle_balance(cpu, rq); |
| 3726 | 3728 | ||
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 547f858b07..bacb32039e 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
| @@ -179,8 +179,17 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p) | |||
| 179 | static int double_lock_balance(struct rq *this_rq, struct rq *busiest); | 179 | static int double_lock_balance(struct rq *this_rq, struct rq *busiest); |
| 180 | static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); | 180 | static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); |
| 181 | 181 | ||
| 182 | static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu) | ||
| 183 | { | ||
| 184 | if (!task_running(rq, p) && | ||
| 185 | (cpu < 0 || cpu_isset(cpu, p->cpus_allowed))) | ||
| 186 | return 1; | ||
| 187 | return 0; | ||
| 188 | } | ||
| 189 | |||
| 182 | /* Return the second highest RT task, NULL otherwise */ | 190 | /* Return the second highest RT task, NULL otherwise */ |
| 183 | static struct task_struct *pick_next_highest_task_rt(struct rq *rq) | 191 | static struct task_struct *pick_next_highest_task_rt(struct rq *rq, |
| 192 | int cpu) | ||
| 184 | { | 193 | { |
| 185 | struct rt_prio_array *array = &rq->rt.active; | 194 | struct rt_prio_array *array = &rq->rt.active; |
| 186 | struct task_struct *next; | 195 | struct task_struct *next; |
| @@ -199,26 +208,36 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq) | |||
| 199 | } | 208 | } |
| 200 | 209 | ||
| 201 | queue = array->queue + idx; | 210 | queue = array->queue + idx; |
| 211 | BUG_ON(list_empty(queue)); | ||
| 212 | |||
| 202 | next = list_entry(queue->next, struct task_struct, run_list); | 213 | next = list_entry(queue->next, struct task_struct, run_list); |
| 203 | if (unlikely(next != rq->curr)) | 214 | if (unlikely(pick_rt_task(rq, next, cpu))) |
| 204 | return next; | 215 | goto out; |
| 205 | 216 | ||
| 206 | if (queue->next->next != queue) { | 217 | if (queue->next->next != queue) { |
| 207 | /* same prio task */ | 218 | /* same prio task */ |
| 208 | next = list_entry(queue->next->next, struct task_struct, run_list); | 219 | next = list_entry(queue->next->next, struct task_struct, run_list); |
| 209 | return next; | 220 | if (pick_rt_task(rq, next, cpu)) |
| 221 | goto out; | ||
| 210 | } | 222 | } |
| 211 | 223 | ||
| 224 | retry: | ||
| 212 | /* slower, but more flexible */ | 225 | /* slower, but more flexible */ |
| 213 | idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1); | 226 | idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1); |
| 214 | if (unlikely(idx >= MAX_RT_PRIO)) { | 227 | if (unlikely(idx >= MAX_RT_PRIO)) |
| 215 | WARN_ON(1); /* rt_nr_running was 2 and above! */ | ||
| 216 | return NULL; | 228 | return NULL; |
| 217 | } | ||
| 218 | 229 | ||
| 219 | queue = array->queue + idx; | 230 | queue = array->queue + idx; |
| 220 | next = list_entry(queue->next, struct task_struct, run_list); | 231 | BUG_ON(list_empty(queue)); |
| 232 | |||
| 233 | list_for_each_entry(next, queue, run_list) { | ||
| 234 | if (pick_rt_task(rq, next, cpu)) | ||
| 235 | goto out; | ||
| 236 | } | ||
| 237 | |||
| 238 | goto retry; | ||
| 221 | 239 | ||
| 240 | out: | ||
| 222 | return next; | 241 | return next; |
| 223 | } | 242 | } |
| 224 | 243 | ||
| @@ -305,13 +324,15 @@ static int push_rt_task(struct rq *this_rq) | |||
| 305 | 324 | ||
| 306 | assert_spin_locked(&this_rq->lock); | 325 | assert_spin_locked(&this_rq->lock); |
| 307 | 326 | ||
| 308 | next_task = pick_next_highest_task_rt(this_rq); | 327 | next_task = pick_next_highest_task_rt(this_rq, -1); |
| 309 | if (!next_task) | 328 | if (!next_task) |
| 310 | return 0; | 329 | return 0; |
| 311 | 330 | ||
| 312 | retry: | 331 | retry: |
| 313 | if (unlikely(next_task == this_rq->curr)) | 332 | if (unlikely(next_task == this_rq->curr)) { |
| 333 | WARN_ON(1); | ||
| 314 | return 0; | 334 | return 0; |
| 335 | } | ||
| 315 | 336 | ||
| 316 | /* | 337 | /* |
| 317 | * It's possible that the next_task slipped in of | 338 | * It's possible that the next_task slipped in of |
| @@ -335,7 +356,7 @@ static int push_rt_task(struct rq *this_rq) | |||
| 335 | * so it is possible that next_task has changed. | 356 | * so it is possible that next_task has changed. |
| 336 | * If it has, then try again. | 357 | * If it has, then try again. |
| 337 | */ | 358 | */ |
| 338 | task = pick_next_highest_task_rt(this_rq); | 359 | task = pick_next_highest_task_rt(this_rq, -1); |
| 339 | if (unlikely(task != next_task) && task && paranoid--) { | 360 | if (unlikely(task != next_task) && task && paranoid--) { |
| 340 | put_task_struct(next_task); | 361 | put_task_struct(next_task); |
| 341 | next_task = task; | 362 | next_task = task; |
| @@ -378,6 +399,149 @@ static void push_rt_tasks(struct rq *rq) | |||
| 378 | ; | 399 | ; |
| 379 | } | 400 | } |
| 380 | 401 | ||
| 402 | static int pull_rt_task(struct rq *this_rq) | ||
| 403 | { | ||
| 404 | struct task_struct *next; | ||
| 405 | struct task_struct *p; | ||
| 406 | struct rq *src_rq; | ||
| 407 | cpumask_t *rto_cpumask; | ||
| 408 | int this_cpu = this_rq->cpu; | ||
| 409 | int cpu; | ||
| 410 | int ret = 0; | ||
| 411 | |||
| 412 | assert_spin_locked(&this_rq->lock); | ||
| 413 | |||
| 414 | /* | ||
| 415 | * If cpusets are used, and we have overlapping | ||
| 416 | * run queue cpusets, then this algorithm may not catch all. | ||
| 417 | * This is just the price you pay on trying to keep | ||
| 418 | * dirtying caches down on large SMP machines. | ||
| 419 | */ | ||
| 420 | if (likely(!rt_overloaded())) | ||
| 421 | return 0; | ||
| 422 | |||
| 423 | next = pick_next_task_rt(this_rq); | ||
| 424 | |||
| 425 | rto_cpumask = rt_overload(); | ||
| 426 | |||
| 427 | for_each_cpu_mask(cpu, *rto_cpumask) { | ||
| 428 | if (this_cpu == cpu) | ||
| 429 | continue; | ||
| 430 | |||
| 431 | src_rq = cpu_rq(cpu); | ||
| 432 | if (unlikely(src_rq->rt.rt_nr_running <= 1)) { | ||
| 433 | /* | ||
| 434 | * It is possible that overlapping cpusets | ||
| 435 | * will miss clearing a non overloaded runqueue. | ||
| 436 | * Clear it now. | ||
| 437 | */ | ||
| 438 | if (double_lock_balance(this_rq, src_rq)) { | ||
| 439 | /* unlocked our runqueue lock */ | ||
| 440 | struct task_struct *old_next = next; | ||
| 441 | next = pick_next_task_rt(this_rq); | ||
| 442 | if (next != old_next) | ||
| 443 | ret = 1; | ||
| 444 | } | ||
| 445 | if (likely(src_rq->rt.rt_nr_running <= 1)) | ||
| 446 | /* | ||
| 447 | * Small chance that this_rq->curr changed | ||
| 448 | * but it's really harmless here. | ||
| 449 | */ | ||
| 450 | rt_clear_overload(this_rq); | ||
| 451 | else | ||
| 452 | /* | ||
| 453 | * Heh, the src_rq is now overloaded, since | ||
| 454 | * we already have the src_rq lock, go straight | ||
| 455 | * to pulling tasks from it. | ||
| 456 | */ | ||
| 457 | goto try_pulling; | ||
| 458 | spin_unlock(&src_rq->lock); | ||
| 459 | continue; | ||
| 460 | } | ||
| 461 | |||
| 462 | /* | ||
| 463 | * We can potentially drop this_rq's lock in | ||
| 464 | * double_lock_balance, and another CPU could | ||
| 465 | * steal our next task - hence we must cause | ||
| 466 | * the caller to recalculate the next task | ||
| 467 | * in that case: | ||
| 468 | */ | ||
| 469 | if (double_lock_balance(this_rq, src_rq)) { | ||
| 470 | struct task_struct *old_next = next; | ||
| 471 | next = pick_next_task_rt(this_rq); | ||
| 472 | if (next != old_next) | ||
| 473 | ret = 1; | ||
| 474 | } | ||
| 475 | |||
| 476 | /* | ||
| 477 | * Are there still pullable RT tasks? | ||
| 478 | */ | ||
| 479 | if (src_rq->rt.rt_nr_running <= 1) { | ||
| 480 | spin_unlock(&src_rq->lock); | ||
| 481 | continue; | ||
| 482 | } | ||
| 483 | |||
| 484 | try_pulling: | ||
| 485 | p = pick_next_highest_task_rt(src_rq, this_cpu); | ||
| 486 | |||
| 487 | /* | ||
| 488 | * Do we have an RT task that preempts | ||
| 489 | * the to-be-scheduled task? | ||
| 490 | */ | ||
| 491 | if (p && (!next || (p->prio < next->prio))) { | ||
| 492 | WARN_ON(p == src_rq->curr); | ||
| 493 | WARN_ON(!p->se.on_rq); | ||
| 494 | |||
| 495 | /* | ||
| 496 | * There's a chance that p is higher in priority | ||
| 497 | * than what's currently running on its cpu. | ||
| 498 | * This is just that p is wakeing up and hasn't | ||
| 499 | * had a chance to schedule. We only pull | ||
| 500 | * p if it is lower in priority than the | ||
| 501 | * current task on the run queue or | ||
| 502 | * this_rq next task is lower in prio than | ||
| 503 | * the current task on that rq. | ||
| 504 | */ | ||
| 505 | if (p->prio < src_rq->curr->prio || | ||
| 506 | (next && next->prio < src_rq->curr->prio)) | ||
| 507 | goto bail; | ||
| 508 | |||
| 509 | ret = 1; | ||
| 510 | |||
| 511 | deactivate_task(src_rq, p, 0); | ||
| 512 | set_task_cpu(p, this_cpu); | ||
| 513 | activate_task(this_rq, p, 0); | ||
| 514 | /* | ||
| 515 | * We continue with the search, just in | ||
| 516 | * case there's an even higher prio task | ||
| 517 | * in another runqueue. (low likelyhood | ||
| 518 | * but possible) | ||
| 519 | */ | ||
| 520 | |||
| 521 | /* | ||
| 522 | * Update next so that we won't pick a task | ||
| 523 | * on another cpu with a priority lower (or equal) | ||
| 524 | * than the one we just picked. | ||
| 525 | */ | ||
| 526 | next = p; | ||
| 527 | |||
| 528 | } | ||
| 529 | bail: | ||
| 530 | spin_unlock(&src_rq->lock); | ||
| 531 | } | ||
| 532 | |||
| 533 | return ret; | ||
| 534 | } | ||
| 535 | |||
| 536 | static void schedule_balance_rt(struct rq *rq, | ||
| 537 | struct task_struct *prev) | ||
| 538 | { | ||
| 539 | /* Try to pull RT tasks here if we lower this rq's prio */ | ||
| 540 | if (unlikely(rt_task(prev)) && | ||
| 541 | rq->rt.highest_prio > prev->prio) | ||
| 542 | pull_rt_task(rq); | ||
| 543 | } | ||
| 544 | |||
| 381 | static void schedule_tail_balance_rt(struct rq *rq) | 545 | static void schedule_tail_balance_rt(struct rq *rq) |
| 382 | { | 546 | { |
| 383 | /* | 547 | /* |
| @@ -500,6 +664,7 @@ move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
| 500 | } | 664 | } |
| 501 | #else /* CONFIG_SMP */ | 665 | #else /* CONFIG_SMP */ |
| 502 | # define schedule_tail_balance_rt(rq) do { } while (0) | 666 | # define schedule_tail_balance_rt(rq) do { } while (0) |
| 667 | # define schedule_balance_rt(rq, prev) do { } while (0) | ||
| 503 | #endif /* CONFIG_SMP */ | 668 | #endif /* CONFIG_SMP */ |
| 504 | 669 | ||
| 505 | static void task_tick_rt(struct rq *rq, struct task_struct *p) | 670 | static void task_tick_rt(struct rq *rq, struct task_struct *p) |
