aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched_fair.c
diff options
context:
space:
mode:
authorGregory Haskins <ghaskins@novell.com>2008-01-25 15:08:09 -0500
committerIngo Molnar <mingo@elte.hu>2008-01-25 15:08:09 -0500
commite7693a362ec84bb5b6fd441d8a8b4b9d568a7a0c (patch)
tree078940540641a59aaf199695bfc6de3f062a987b /kernel/sched_fair.c
parent697f0a487f294e634a342764472b79375bb3158a (diff)
sched: de-SCHED_OTHER-ize the RT path
The current wake-up code path tries to determine if it can optimize the wake-up to "this_cpu" by computing load calculations. The problem is that these calculations are only relevant to SCHED_OTHER tasks where load is king. For RT tasks, priority is king. So the load calculation is completely wasted bandwidth. Therefore, we create a new sched_class interface to help with pre-wakeup routing decisions and move the load calculation as a function of CFS task's class. Signed-off-by: Gregory Haskins <ghaskins@novell.com> Signed-off-by: Steven Rostedt <srostedt@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r--kernel/sched_fair.c148
1 files changed, 148 insertions, 0 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 5c208e090ae4..f881fc5e035c 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -861,6 +861,151 @@ static void yield_task_fair(struct rq *rq)
861} 861}
862 862
863/* 863/*
864 * wake_idle() will wake a task on an idle cpu if task->cpu is
865 * not idle and an idle cpu is available. The span of cpus to
866 * search starts with cpus closest then further out as needed,
867 * so we always favor a closer, idle cpu.
868 *
869 * Returns the CPU we should wake onto.
870 */
871#if defined(ARCH_HAS_SCHED_WAKE_IDLE)
872static int wake_idle(int cpu, struct task_struct *p)
873{
874 cpumask_t tmp;
875 struct sched_domain *sd;
876 int i;
877
878 /*
879 * If it is idle, then it is the best cpu to run this task.
880 *
881 * This cpu is also the best, if it has more than one task already.
882 * Siblings must be also busy(in most cases) as they didn't already
883 * pickup the extra load from this cpu and hence we need not check
884 * sibling runqueue info. This will avoid the checks and cache miss
885 * penalities associated with that.
886 */
887 if (idle_cpu(cpu) || cpu_rq(cpu)->nr_running > 1)
888 return cpu;
889
890 for_each_domain(cpu, sd) {
891 if (sd->flags & SD_WAKE_IDLE) {
892 cpus_and(tmp, sd->span, p->cpus_allowed);
893 for_each_cpu_mask(i, tmp) {
894 if (idle_cpu(i)) {
895 if (i != task_cpu(p)) {
896 schedstat_inc(p,
897 se.nr_wakeups_idle);
898 }
899 return i;
900 }
901 }
902 } else {
903 break;
904 }
905 }
906 return cpu;
907}
908#else
909static inline int wake_idle(int cpu, struct task_struct *p)
910{
911 return cpu;
912}
913#endif
914
915#ifdef CONFIG_SMP
916static int select_task_rq_fair(struct task_struct *p, int sync)
917{
918 int cpu, this_cpu;
919 struct rq *rq;
920 struct sched_domain *sd, *this_sd = NULL;
921 int new_cpu;
922
923 cpu = task_cpu(p);
924 rq = task_rq(p);
925 this_cpu = smp_processor_id();
926 new_cpu = cpu;
927
928 for_each_domain(this_cpu, sd) {
929 if (cpu_isset(cpu, sd->span)) {
930 this_sd = sd;
931 break;
932 }
933 }
934
935 if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed)))
936 goto out_set_cpu;
937
938 /*
939 * Check for affine wakeup and passive balancing possibilities.
940 */
941 if (this_sd) {
942 int idx = this_sd->wake_idx;
943 unsigned int imbalance;
944 unsigned long load, this_load;
945
946 imbalance = 100 + (this_sd->imbalance_pct - 100) / 2;
947
948 load = source_load(cpu, idx);
949 this_load = target_load(this_cpu, idx);
950
951 new_cpu = this_cpu; /* Wake to this CPU if we can */
952
953 if (this_sd->flags & SD_WAKE_AFFINE) {
954 unsigned long tl = this_load;
955 unsigned long tl_per_task;
956
957 /*
958 * Attract cache-cold tasks on sync wakeups:
959 */
960 if (sync && !task_hot(p, rq->clock, this_sd))
961 goto out_set_cpu;
962
963 schedstat_inc(p, se.nr_wakeups_affine_attempts);
964 tl_per_task = cpu_avg_load_per_task(this_cpu);
965
966 /*
967 * If sync wakeup then subtract the (maximum possible)
968 * effect of the currently running task from the load
969 * of the current CPU:
970 */
971 if (sync)
972 tl -= current->se.load.weight;
973
974 if ((tl <= load &&
975 tl + target_load(cpu, idx) <= tl_per_task) ||
976 100*(tl + p->se.load.weight) <= imbalance*load) {
977 /*
978 * This domain has SD_WAKE_AFFINE and
979 * p is cache cold in this domain, and
980 * there is no bad imbalance.
981 */
982 schedstat_inc(this_sd, ttwu_move_affine);
983 schedstat_inc(p, se.nr_wakeups_affine);
984 goto out_set_cpu;
985 }
986 }
987
988 /*
989 * Start passive balancing when half the imbalance_pct
990 * limit is reached.
991 */
992 if (this_sd->flags & SD_WAKE_BALANCE) {
993 if (imbalance*this_load <= 100*load) {
994 schedstat_inc(this_sd, ttwu_move_balance);
995 schedstat_inc(p, se.nr_wakeups_passive);
996 goto out_set_cpu;
997 }
998 }
999 }
1000
1001 new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */
1002out_set_cpu:
1003 return wake_idle(new_cpu, p);
1004}
1005#endif /* CONFIG_SMP */
1006
1007
1008/*
864 * Preempt the current task with a newly woken task if needed: 1009 * Preempt the current task with a newly woken task if needed:
865 */ 1010 */
866static void check_preempt_wakeup(struct rq *rq, struct task_struct *p) 1011static void check_preempt_wakeup(struct rq *rq, struct task_struct *p)
@@ -1153,6 +1298,9 @@ static const struct sched_class fair_sched_class = {
1153 .enqueue_task = enqueue_task_fair, 1298 .enqueue_task = enqueue_task_fair,
1154 .dequeue_task = dequeue_task_fair, 1299 .dequeue_task = dequeue_task_fair,
1155 .yield_task = yield_task_fair, 1300 .yield_task = yield_task_fair,
1301#ifdef CONFIG_SMP
1302 .select_task_rq = select_task_rq_fair,
1303#endif /* CONFIG_SMP */
1156 1304
1157 .check_preempt_curr = check_preempt_wakeup, 1305 .check_preempt_curr = check_preempt_wakeup,
1158 1306