diff options
author | Gregory Haskins <ghaskins@novell.com> | 2008-01-25 15:08:09 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-01-25 15:08:09 -0500 |
commit | e7693a362ec84bb5b6fd441d8a8b4b9d568a7a0c (patch) | |
tree | 078940540641a59aaf199695bfc6de3f062a987b /kernel/sched_fair.c | |
parent | 697f0a487f294e634a342764472b79375bb3158a (diff) |
sched: de-SCHED_OTHER-ize the RT path
The current wake-up code path tries to determine if it can optimize the
wake-up to "this_cpu" by computing load calculations. The problem is that
these calculations are only relevant to SCHED_OTHER tasks where load is king.
For RT tasks, priority is king. So the load calculation is completely wasted
bandwidth.
Therefore, we create a new sched_class interface to help with
pre-wakeup routing decisions and move the load calculation as a function
of CFS task's class.
Signed-off-by: Gregory Haskins <ghaskins@novell.com>
Signed-off-by: Steven Rostedt <srostedt@redhat.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r-- | kernel/sched_fair.c | 148 |
1 files changed, 148 insertions, 0 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 5c208e090ae4..f881fc5e035c 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -861,6 +861,151 @@ static void yield_task_fair(struct rq *rq) | |||
861 | } | 861 | } |
862 | 862 | ||
863 | /* | 863 | /* |
864 | * wake_idle() will wake a task on an idle cpu if task->cpu is | ||
865 | * not idle and an idle cpu is available. The span of cpus to | ||
866 | * search starts with cpus closest then further out as needed, | ||
867 | * so we always favor a closer, idle cpu. | ||
868 | * | ||
869 | * Returns the CPU we should wake onto. | ||
870 | */ | ||
871 | #if defined(ARCH_HAS_SCHED_WAKE_IDLE) | ||
872 | static int wake_idle(int cpu, struct task_struct *p) | ||
873 | { | ||
874 | cpumask_t tmp; | ||
875 | struct sched_domain *sd; | ||
876 | int i; | ||
877 | |||
878 | /* | ||
879 | * If it is idle, then it is the best cpu to run this task. | ||
880 | * | ||
881 | * This cpu is also the best, if it has more than one task already. | ||
882 | * Siblings must be also busy(in most cases) as they didn't already | ||
883 | * pickup the extra load from this cpu and hence we need not check | ||
884 | * sibling runqueue info. This will avoid the checks and cache miss | ||
885 | * penalities associated with that. | ||
886 | */ | ||
887 | if (idle_cpu(cpu) || cpu_rq(cpu)->nr_running > 1) | ||
888 | return cpu; | ||
889 | |||
890 | for_each_domain(cpu, sd) { | ||
891 | if (sd->flags & SD_WAKE_IDLE) { | ||
892 | cpus_and(tmp, sd->span, p->cpus_allowed); | ||
893 | for_each_cpu_mask(i, tmp) { | ||
894 | if (idle_cpu(i)) { | ||
895 | if (i != task_cpu(p)) { | ||
896 | schedstat_inc(p, | ||
897 | se.nr_wakeups_idle); | ||
898 | } | ||
899 | return i; | ||
900 | } | ||
901 | } | ||
902 | } else { | ||
903 | break; | ||
904 | } | ||
905 | } | ||
906 | return cpu; | ||
907 | } | ||
908 | #else | ||
909 | static inline int wake_idle(int cpu, struct task_struct *p) | ||
910 | { | ||
911 | return cpu; | ||
912 | } | ||
913 | #endif | ||
914 | |||
915 | #ifdef CONFIG_SMP | ||
916 | static int select_task_rq_fair(struct task_struct *p, int sync) | ||
917 | { | ||
918 | int cpu, this_cpu; | ||
919 | struct rq *rq; | ||
920 | struct sched_domain *sd, *this_sd = NULL; | ||
921 | int new_cpu; | ||
922 | |||
923 | cpu = task_cpu(p); | ||
924 | rq = task_rq(p); | ||
925 | this_cpu = smp_processor_id(); | ||
926 | new_cpu = cpu; | ||
927 | |||
928 | for_each_domain(this_cpu, sd) { | ||
929 | if (cpu_isset(cpu, sd->span)) { | ||
930 | this_sd = sd; | ||
931 | break; | ||
932 | } | ||
933 | } | ||
934 | |||
935 | if (unlikely(!cpu_isset(this_cpu, p->cpus_allowed))) | ||
936 | goto out_set_cpu; | ||
937 | |||
938 | /* | ||
939 | * Check for affine wakeup and passive balancing possibilities. | ||
940 | */ | ||
941 | if (this_sd) { | ||
942 | int idx = this_sd->wake_idx; | ||
943 | unsigned int imbalance; | ||
944 | unsigned long load, this_load; | ||
945 | |||
946 | imbalance = 100 + (this_sd->imbalance_pct - 100) / 2; | ||
947 | |||
948 | load = source_load(cpu, idx); | ||
949 | this_load = target_load(this_cpu, idx); | ||
950 | |||
951 | new_cpu = this_cpu; /* Wake to this CPU if we can */ | ||
952 | |||
953 | if (this_sd->flags & SD_WAKE_AFFINE) { | ||
954 | unsigned long tl = this_load; | ||
955 | unsigned long tl_per_task; | ||
956 | |||
957 | /* | ||
958 | * Attract cache-cold tasks on sync wakeups: | ||
959 | */ | ||
960 | if (sync && !task_hot(p, rq->clock, this_sd)) | ||
961 | goto out_set_cpu; | ||
962 | |||
963 | schedstat_inc(p, se.nr_wakeups_affine_attempts); | ||
964 | tl_per_task = cpu_avg_load_per_task(this_cpu); | ||
965 | |||
966 | /* | ||
967 | * If sync wakeup then subtract the (maximum possible) | ||
968 | * effect of the currently running task from the load | ||
969 | * of the current CPU: | ||
970 | */ | ||
971 | if (sync) | ||
972 | tl -= current->se.load.weight; | ||
973 | |||
974 | if ((tl <= load && | ||
975 | tl + target_load(cpu, idx) <= tl_per_task) || | ||
976 | 100*(tl + p->se.load.weight) <= imbalance*load) { | ||
977 | /* | ||
978 | * This domain has SD_WAKE_AFFINE and | ||
979 | * p is cache cold in this domain, and | ||
980 | * there is no bad imbalance. | ||
981 | */ | ||
982 | schedstat_inc(this_sd, ttwu_move_affine); | ||
983 | schedstat_inc(p, se.nr_wakeups_affine); | ||
984 | goto out_set_cpu; | ||
985 | } | ||
986 | } | ||
987 | |||
988 | /* | ||
989 | * Start passive balancing when half the imbalance_pct | ||
990 | * limit is reached. | ||
991 | */ | ||
992 | if (this_sd->flags & SD_WAKE_BALANCE) { | ||
993 | if (imbalance*this_load <= 100*load) { | ||
994 | schedstat_inc(this_sd, ttwu_move_balance); | ||
995 | schedstat_inc(p, se.nr_wakeups_passive); | ||
996 | goto out_set_cpu; | ||
997 | } | ||
998 | } | ||
999 | } | ||
1000 | |||
1001 | new_cpu = cpu; /* Could not wake to this_cpu. Wake to cpu instead */ | ||
1002 | out_set_cpu: | ||
1003 | return wake_idle(new_cpu, p); | ||
1004 | } | ||
1005 | #endif /* CONFIG_SMP */ | ||
1006 | |||
1007 | |||
1008 | /* | ||
864 | * Preempt the current task with a newly woken task if needed: | 1009 | * Preempt the current task with a newly woken task if needed: |
865 | */ | 1010 | */ |
866 | static void check_preempt_wakeup(struct rq *rq, struct task_struct *p) | 1011 | static void check_preempt_wakeup(struct rq *rq, struct task_struct *p) |
@@ -1153,6 +1298,9 @@ static const struct sched_class fair_sched_class = { | |||
1153 | .enqueue_task = enqueue_task_fair, | 1298 | .enqueue_task = enqueue_task_fair, |
1154 | .dequeue_task = dequeue_task_fair, | 1299 | .dequeue_task = dequeue_task_fair, |
1155 | .yield_task = yield_task_fair, | 1300 | .yield_task = yield_task_fair, |
1301 | #ifdef CONFIG_SMP | ||
1302 | .select_task_rq = select_task_rq_fair, | ||
1303 | #endif /* CONFIG_SMP */ | ||
1156 | 1304 | ||
1157 | .check_preempt_curr = check_preempt_wakeup, | 1305 | .check_preempt_curr = check_preempt_wakeup, |
1158 | 1306 | ||