aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorSteven Rostedt <srostedt@redhat.com>2008-01-25 15:08:07 -0500
committerIngo Molnar <mingo@elte.hu>2008-01-25 15:08:07 -0500
commitf65eda4f789168ba5ff3fa75546c29efeed19f58 (patch)
tree235e6daad2bc37b22cc5b21907608c79f944f036 /kernel
parent4fd29176b7cd24909f8ceba2105cb3ae2857b90c (diff)
sched: pull RT tasks from overloaded runqueues
This patch adds the algorithm to pull tasks from RT overloaded runqueues. When a pull RT is initiated, all overloaded runqueues are examined for a RT task that is higher in prio than the highest prio task queued on the target runqueue. If another runqueue holds a RT task that is of higher prio than the highest prio task on the target runqueue is found it is pulled to the target runqueue. Signed-off-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched.c2
-rw-r--r--kernel/sched_rt.c187
2 files changed, 178 insertions, 11 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index 97cab609fc31..c91797107913 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -3721,6 +3721,8 @@ need_resched_nonpreemptible:
3721 switch_count = &prev->nvcsw; 3721 switch_count = &prev->nvcsw;
3722 } 3722 }
3723 3723
3724 schedule_balance_rt(rq, prev);
3725
3724 if (unlikely(!rq->nr_running)) 3726 if (unlikely(!rq->nr_running))
3725 idle_balance(cpu, rq); 3727 idle_balance(cpu, rq);
3726 3728
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 547f858b0752..bacb32039e95 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -179,8 +179,17 @@ static void put_prev_task_rt(struct rq *rq, struct task_struct *p)
179static int double_lock_balance(struct rq *this_rq, struct rq *busiest); 179static int double_lock_balance(struct rq *this_rq, struct rq *busiest);
180static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep); 180static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep);
181 181
182static int pick_rt_task(struct rq *rq, struct task_struct *p, int cpu)
183{
184 if (!task_running(rq, p) &&
185 (cpu < 0 || cpu_isset(cpu, p->cpus_allowed)))
186 return 1;
187 return 0;
188}
189
182/* Return the second highest RT task, NULL otherwise */ 190/* Return the second highest RT task, NULL otherwise */
183static struct task_struct *pick_next_highest_task_rt(struct rq *rq) 191static struct task_struct *pick_next_highest_task_rt(struct rq *rq,
192 int cpu)
184{ 193{
185 struct rt_prio_array *array = &rq->rt.active; 194 struct rt_prio_array *array = &rq->rt.active;
186 struct task_struct *next; 195 struct task_struct *next;
@@ -199,26 +208,36 @@ static struct task_struct *pick_next_highest_task_rt(struct rq *rq)
199 } 208 }
200 209
201 queue = array->queue + idx; 210 queue = array->queue + idx;
211 BUG_ON(list_empty(queue));
212
202 next = list_entry(queue->next, struct task_struct, run_list); 213 next = list_entry(queue->next, struct task_struct, run_list);
203 if (unlikely(next != rq->curr)) 214 if (unlikely(pick_rt_task(rq, next, cpu)))
204 return next; 215 goto out;
205 216
206 if (queue->next->next != queue) { 217 if (queue->next->next != queue) {
207 /* same prio task */ 218 /* same prio task */
208 next = list_entry(queue->next->next, struct task_struct, run_list); 219 next = list_entry(queue->next->next, struct task_struct, run_list);
209 return next; 220 if (pick_rt_task(rq, next, cpu))
221 goto out;
210 } 222 }
211 223
224 retry:
212 /* slower, but more flexible */ 225 /* slower, but more flexible */
213 idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1); 226 idx = find_next_bit(array->bitmap, MAX_RT_PRIO, idx+1);
214 if (unlikely(idx >= MAX_RT_PRIO)) { 227 if (unlikely(idx >= MAX_RT_PRIO))
215 WARN_ON(1); /* rt_nr_running was 2 and above! */
216 return NULL; 228 return NULL;
217 }
218 229
219 queue = array->queue + idx; 230 queue = array->queue + idx;
220 next = list_entry(queue->next, struct task_struct, run_list); 231 BUG_ON(list_empty(queue));
232
233 list_for_each_entry(next, queue, run_list) {
234 if (pick_rt_task(rq, next, cpu))
235 goto out;
236 }
237
238 goto retry;
221 239
240 out:
222 return next; 241 return next;
223} 242}
224 243
@@ -305,13 +324,15 @@ static int push_rt_task(struct rq *this_rq)
305 324
306 assert_spin_locked(&this_rq->lock); 325 assert_spin_locked(&this_rq->lock);
307 326
308 next_task = pick_next_highest_task_rt(this_rq); 327 next_task = pick_next_highest_task_rt(this_rq, -1);
309 if (!next_task) 328 if (!next_task)
310 return 0; 329 return 0;
311 330
312 retry: 331 retry:
313 if (unlikely(next_task == this_rq->curr)) 332 if (unlikely(next_task == this_rq->curr)) {
333 WARN_ON(1);
314 return 0; 334 return 0;
335 }
315 336
316 /* 337 /*
317 * It's possible that the next_task slipped in of 338 * It's possible that the next_task slipped in of
@@ -335,7 +356,7 @@ static int push_rt_task(struct rq *this_rq)
335 * so it is possible that next_task has changed. 356 * so it is possible that next_task has changed.
336 * If it has, then try again. 357 * If it has, then try again.
337 */ 358 */
338 task = pick_next_highest_task_rt(this_rq); 359 task = pick_next_highest_task_rt(this_rq, -1);
339 if (unlikely(task != next_task) && task && paranoid--) { 360 if (unlikely(task != next_task) && task && paranoid--) {
340 put_task_struct(next_task); 361 put_task_struct(next_task);
341 next_task = task; 362 next_task = task;
@@ -378,6 +399,149 @@ static void push_rt_tasks(struct rq *rq)
378 ; 399 ;
379} 400}
380 401
402static int pull_rt_task(struct rq *this_rq)
403{
404 struct task_struct *next;
405 struct task_struct *p;
406 struct rq *src_rq;
407 cpumask_t *rto_cpumask;
408 int this_cpu = this_rq->cpu;
409 int cpu;
410 int ret = 0;
411
412 assert_spin_locked(&this_rq->lock);
413
414 /*
415 * If cpusets are used, and we have overlapping
416 * run queue cpusets, then this algorithm may not catch all.
417 * This is just the price you pay on trying to keep
418 * dirtying caches down on large SMP machines.
419 */
420 if (likely(!rt_overloaded()))
421 return 0;
422
423 next = pick_next_task_rt(this_rq);
424
425 rto_cpumask = rt_overload();
426
427 for_each_cpu_mask(cpu, *rto_cpumask) {
428 if (this_cpu == cpu)
429 continue;
430
431 src_rq = cpu_rq(cpu);
432 if (unlikely(src_rq->rt.rt_nr_running <= 1)) {
433 /*
434 * It is possible that overlapping cpusets
435 * will miss clearing a non overloaded runqueue.
436 * Clear it now.
437 */
438 if (double_lock_balance(this_rq, src_rq)) {
439 /* unlocked our runqueue lock */
440 struct task_struct *old_next = next;
441 next = pick_next_task_rt(this_rq);
442 if (next != old_next)
443 ret = 1;
444 }
445 if (likely(src_rq->rt.rt_nr_running <= 1))
446 /*
447 * Small chance that this_rq->curr changed
448 * but it's really harmless here.
449 */
450 rt_clear_overload(this_rq);
451 else
452 /*
453 * Heh, the src_rq is now overloaded, since
454 * we already have the src_rq lock, go straight
455 * to pulling tasks from it.
456 */
457 goto try_pulling;
458 spin_unlock(&src_rq->lock);
459 continue;
460 }
461
462 /*
463 * We can potentially drop this_rq's lock in
464 * double_lock_balance, and another CPU could
465 * steal our next task - hence we must cause
466 * the caller to recalculate the next task
467 * in that case:
468 */
469 if (double_lock_balance(this_rq, src_rq)) {
470 struct task_struct *old_next = next;
471 next = pick_next_task_rt(this_rq);
472 if (next != old_next)
473 ret = 1;
474 }
475
476 /*
477 * Are there still pullable RT tasks?
478 */
479 if (src_rq->rt.rt_nr_running <= 1) {
480 spin_unlock(&src_rq->lock);
481 continue;
482 }
483
484 try_pulling:
485 p = pick_next_highest_task_rt(src_rq, this_cpu);
486
487 /*
488 * Do we have an RT task that preempts
489 * the to-be-scheduled task?
490 */
491 if (p && (!next || (p->prio < next->prio))) {
492 WARN_ON(p == src_rq->curr);
493 WARN_ON(!p->se.on_rq);
494
495 /*
496 * There's a chance that p is higher in priority
497 * than what's currently running on its cpu.
498 * This is just that p is wakeing up and hasn't
499 * had a chance to schedule. We only pull
500 * p if it is lower in priority than the
501 * current task on the run queue or
502 * this_rq next task is lower in prio than
503 * the current task on that rq.
504 */
505 if (p->prio < src_rq->curr->prio ||
506 (next && next->prio < src_rq->curr->prio))
507 goto bail;
508
509 ret = 1;
510
511 deactivate_task(src_rq, p, 0);
512 set_task_cpu(p, this_cpu);
513 activate_task(this_rq, p, 0);
514 /*
515 * We continue with the search, just in
516 * case there's an even higher prio task
517 * in another runqueue. (low likelyhood
518 * but possible)
519 */
520
521 /*
522 * Update next so that we won't pick a task
523 * on another cpu with a priority lower (or equal)
524 * than the one we just picked.
525 */
526 next = p;
527
528 }
529 bail:
530 spin_unlock(&src_rq->lock);
531 }
532
533 return ret;
534}
535
536static void schedule_balance_rt(struct rq *rq,
537 struct task_struct *prev)
538{
539 /* Try to pull RT tasks here if we lower this rq's prio */
540 if (unlikely(rt_task(prev)) &&
541 rq->rt.highest_prio > prev->prio)
542 pull_rt_task(rq);
543}
544
381static void schedule_tail_balance_rt(struct rq *rq) 545static void schedule_tail_balance_rt(struct rq *rq)
382{ 546{
383 /* 547 /*
@@ -500,6 +664,7 @@ move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
500} 664}
501#else /* CONFIG_SMP */ 665#else /* CONFIG_SMP */
502# define schedule_tail_balance_rt(rq) do { } while (0) 666# define schedule_tail_balance_rt(rq) do { } while (0)
667# define schedule_balance_rt(rq, prev) do { } while (0)
503#endif /* CONFIG_SMP */ 668#endif /* CONFIG_SMP */
504 669
505static void task_tick_rt(struct rq *rq, struct task_struct *p) 670static void task_tick_rt(struct rq *rq, struct task_struct *p)