diff options
author | Peter Williams <pwil3058@bigpond.net.au> | 2007-10-24 12:23:51 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2007-10-24 12:23:51 -0400 |
commit | e1d1484f72127a5580d37c379f6a5b2c2786434c (patch) | |
tree | e3e6529c5b9079f35b2c60bbd346a3c51c2b2c13 /kernel | |
parent | a0f846aa76c3e03d54c1700a87cab3a46ccd71e2 (diff) |
sched: reduce balance-tasks overhead
At the moment, balance_tasks() provides low level functionality for both
move_tasks() and move_one_task() (indirectly) via the load_balance()
function (in the sched_class interface) which also provides dual
functionality. This dual functionality complicates the interfaces and
internal mechanisms and makes the run time overhead of operations that
are called with two run queue locks held.
This patch addresses this issue and reduces the overhead of these
operations.
Signed-off-by: Peter Williams <pwil3058@bigpond.net.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/sched.c | 99 | ||||
-rw-r--r-- | kernel/sched_fair.c | 44 | ||||
-rw-r--r-- | kernel/sched_idletask.c | 14 | ||||
-rw-r--r-- | kernel/sched_rt.c | 28 |
4 files changed, 130 insertions, 55 deletions
diff --git a/kernel/sched.c b/kernel/sched.c index cc9cd5b710a6..8607795fad69 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -838,11 +838,35 @@ struct rq_iterator { | |||
838 | struct task_struct *(*next)(void *); | 838 | struct task_struct *(*next)(void *); |
839 | }; | 839 | }; |
840 | 840 | ||
841 | static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | 841 | #ifdef CONFIG_SMP |
842 | unsigned long max_nr_move, unsigned long max_load_move, | 842 | static unsigned long |
843 | struct sched_domain *sd, enum cpu_idle_type idle, | 843 | balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, |
844 | int *all_pinned, unsigned long *load_moved, | 844 | unsigned long max_load_move, struct sched_domain *sd, |
845 | int *this_best_prio, struct rq_iterator *iterator); | 845 | enum cpu_idle_type idle, int *all_pinned, |
846 | int *this_best_prio, struct rq_iterator *iterator); | ||
847 | |||
848 | static int | ||
849 | iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
850 | struct sched_domain *sd, enum cpu_idle_type idle, | ||
851 | struct rq_iterator *iterator); | ||
852 | #else | ||
853 | static inline unsigned long | ||
854 | balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
855 | unsigned long max_load_move, struct sched_domain *sd, | ||
856 | enum cpu_idle_type idle, int *all_pinned, | ||
857 | int *this_best_prio, struct rq_iterator *iterator) | ||
858 | { | ||
859 | return 0; | ||
860 | } | ||
861 | |||
862 | static inline int | ||
863 | iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
864 | struct sched_domain *sd, enum cpu_idle_type idle, | ||
865 | struct rq_iterator *iterator) | ||
866 | { | ||
867 | return 0; | ||
868 | } | ||
869 | #endif | ||
846 | 870 | ||
847 | #include "sched_stats.h" | 871 | #include "sched_stats.h" |
848 | #include "sched_idletask.c" | 872 | #include "sched_idletask.c" |
@@ -2224,17 +2248,17 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, | |||
2224 | return 1; | 2248 | return 1; |
2225 | } | 2249 | } |
2226 | 2250 | ||
2227 | static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | 2251 | static unsigned long |
2228 | unsigned long max_nr_move, unsigned long max_load_move, | 2252 | balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, |
2229 | struct sched_domain *sd, enum cpu_idle_type idle, | 2253 | unsigned long max_load_move, struct sched_domain *sd, |
2230 | int *all_pinned, unsigned long *load_moved, | 2254 | enum cpu_idle_type idle, int *all_pinned, |
2231 | int *this_best_prio, struct rq_iterator *iterator) | 2255 | int *this_best_prio, struct rq_iterator *iterator) |
2232 | { | 2256 | { |
2233 | int pulled = 0, pinned = 0, skip_for_load; | 2257 | int pulled = 0, pinned = 0, skip_for_load; |
2234 | struct task_struct *p; | 2258 | struct task_struct *p; |
2235 | long rem_load_move = max_load_move; | 2259 | long rem_load_move = max_load_move; |
2236 | 2260 | ||
2237 | if (max_nr_move == 0 || max_load_move == 0) | 2261 | if (max_load_move == 0) |
2238 | goto out; | 2262 | goto out; |
2239 | 2263 | ||
2240 | pinned = 1; | 2264 | pinned = 1; |
@@ -2267,7 +2291,7 @@ next: | |||
2267 | * We only want to steal up to the prescribed number of tasks | 2291 | * We only want to steal up to the prescribed number of tasks |
2268 | * and the prescribed amount of weighted load. | 2292 | * and the prescribed amount of weighted load. |
2269 | */ | 2293 | */ |
2270 | if (pulled < max_nr_move && rem_load_move > 0) { | 2294 | if (rem_load_move > 0) { |
2271 | if (p->prio < *this_best_prio) | 2295 | if (p->prio < *this_best_prio) |
2272 | *this_best_prio = p->prio; | 2296 | *this_best_prio = p->prio; |
2273 | p = iterator->next(iterator->arg); | 2297 | p = iterator->next(iterator->arg); |
@@ -2275,7 +2299,7 @@ next: | |||
2275 | } | 2299 | } |
2276 | out: | 2300 | out: |
2277 | /* | 2301 | /* |
2278 | * Right now, this is the only place pull_task() is called, | 2302 | * Right now, this is one of only two places pull_task() is called, |
2279 | * so we can safely collect pull_task() stats here rather than | 2303 | * so we can safely collect pull_task() stats here rather than |
2280 | * inside pull_task(). | 2304 | * inside pull_task(). |
2281 | */ | 2305 | */ |
@@ -2283,8 +2307,8 @@ out: | |||
2283 | 2307 | ||
2284 | if (all_pinned) | 2308 | if (all_pinned) |
2285 | *all_pinned = pinned; | 2309 | *all_pinned = pinned; |
2286 | *load_moved = max_load_move - rem_load_move; | 2310 | |
2287 | return pulled; | 2311 | return max_load_move - rem_load_move; |
2288 | } | 2312 | } |
2289 | 2313 | ||
2290 | /* | 2314 | /* |
@@ -2306,7 +2330,7 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
2306 | do { | 2330 | do { |
2307 | total_load_moved += | 2331 | total_load_moved += |
2308 | class->load_balance(this_rq, this_cpu, busiest, | 2332 | class->load_balance(this_rq, this_cpu, busiest, |
2309 | ULONG_MAX, max_load_move - total_load_moved, | 2333 | max_load_move - total_load_moved, |
2310 | sd, idle, all_pinned, &this_best_prio); | 2334 | sd, idle, all_pinned, &this_best_prio); |
2311 | class = class->next; | 2335 | class = class->next; |
2312 | } while (class && max_load_move > total_load_moved); | 2336 | } while (class && max_load_move > total_load_moved); |
@@ -2314,6 +2338,32 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
2314 | return total_load_moved > 0; | 2338 | return total_load_moved > 0; |
2315 | } | 2339 | } |
2316 | 2340 | ||
2341 | static int | ||
2342 | iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
2343 | struct sched_domain *sd, enum cpu_idle_type idle, | ||
2344 | struct rq_iterator *iterator) | ||
2345 | { | ||
2346 | struct task_struct *p = iterator->start(iterator->arg); | ||
2347 | int pinned = 0; | ||
2348 | |||
2349 | while (p) { | ||
2350 | if (can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) { | ||
2351 | pull_task(busiest, p, this_rq, this_cpu); | ||
2352 | /* | ||
2353 | * Right now, this is only the second place pull_task() | ||
2354 | * is called, so we can safely collect pull_task() | ||
2355 | * stats here rather than inside pull_task(). | ||
2356 | */ | ||
2357 | schedstat_inc(sd, lb_gained[idle]); | ||
2358 | |||
2359 | return 1; | ||
2360 | } | ||
2361 | p = iterator->next(iterator->arg); | ||
2362 | } | ||
2363 | |||
2364 | return 0; | ||
2365 | } | ||
2366 | |||
2317 | /* | 2367 | /* |
2318 | * move_one_task tries to move exactly one task from busiest to this_rq, as | 2368 | * move_one_task tries to move exactly one task from busiest to this_rq, as |
2319 | * part of active balancing operations within "domain". | 2369 | * part of active balancing operations within "domain". |
@@ -2325,12 +2375,9 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
2325 | struct sched_domain *sd, enum cpu_idle_type idle) | 2375 | struct sched_domain *sd, enum cpu_idle_type idle) |
2326 | { | 2376 | { |
2327 | const struct sched_class *class; | 2377 | const struct sched_class *class; |
2328 | int this_best_prio = MAX_PRIO; | ||
2329 | 2378 | ||
2330 | for (class = sched_class_highest; class; class = class->next) | 2379 | for (class = sched_class_highest; class; class = class->next) |
2331 | if (class->load_balance(this_rq, this_cpu, busiest, | 2380 | if (class->move_one_task(this_rq, this_cpu, busiest, sd, idle)) |
2332 | 1, ULONG_MAX, sd, idle, NULL, | ||
2333 | &this_best_prio)) | ||
2334 | return 1; | 2381 | return 1; |
2335 | 2382 | ||
2336 | return 0; | 2383 | return 0; |
@@ -3267,18 +3314,6 @@ static inline void idle_balance(int cpu, struct rq *rq) | |||
3267 | { | 3314 | { |
3268 | } | 3315 | } |
3269 | 3316 | ||
3270 | /* Avoid "used but not defined" warning on UP */ | ||
3271 | static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
3272 | unsigned long max_nr_move, unsigned long max_load_move, | ||
3273 | struct sched_domain *sd, enum cpu_idle_type idle, | ||
3274 | int *all_pinned, unsigned long *load_moved, | ||
3275 | int *this_best_prio, struct rq_iterator *iterator) | ||
3276 | { | ||
3277 | *load_moved = 0; | ||
3278 | |||
3279 | return 0; | ||
3280 | } | ||
3281 | |||
3282 | #endif | 3317 | #endif |
3283 | 3318 | ||
3284 | DEFINE_PER_CPU(struct kernel_stat, kstat); | 3319 | DEFINE_PER_CPU(struct kernel_stat, kstat); |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 166ed6db600b..a90d0457d603 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -936,12 +936,11 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) | |||
936 | 936 | ||
937 | static unsigned long | 937 | static unsigned long |
938 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | 938 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, |
939 | unsigned long max_nr_move, unsigned long max_load_move, | 939 | unsigned long max_load_move, |
940 | struct sched_domain *sd, enum cpu_idle_type idle, | 940 | struct sched_domain *sd, enum cpu_idle_type idle, |
941 | int *all_pinned, int *this_best_prio) | 941 | int *all_pinned, int *this_best_prio) |
942 | { | 942 | { |
943 | struct cfs_rq *busy_cfs_rq; | 943 | struct cfs_rq *busy_cfs_rq; |
944 | unsigned long load_moved, total_nr_moved = 0, nr_moved; | ||
945 | long rem_load_move = max_load_move; | 944 | long rem_load_move = max_load_move; |
946 | struct rq_iterator cfs_rq_iterator; | 945 | struct rq_iterator cfs_rq_iterator; |
947 | 946 | ||
@@ -969,25 +968,47 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
969 | #else | 968 | #else |
970 | # define maxload rem_load_move | 969 | # define maxload rem_load_move |
971 | #endif | 970 | #endif |
972 | /* pass busy_cfs_rq argument into | 971 | /* |
972 | * pass busy_cfs_rq argument into | ||
973 | * load_balance_[start|next]_fair iterators | 973 | * load_balance_[start|next]_fair iterators |
974 | */ | 974 | */ |
975 | cfs_rq_iterator.arg = busy_cfs_rq; | 975 | cfs_rq_iterator.arg = busy_cfs_rq; |
976 | nr_moved = balance_tasks(this_rq, this_cpu, busiest, | 976 | rem_load_move -= balance_tasks(this_rq, this_cpu, busiest, |
977 | max_nr_move, maxload, sd, idle, all_pinned, | 977 | maxload, sd, idle, all_pinned, |
978 | &load_moved, this_best_prio, &cfs_rq_iterator); | 978 | this_best_prio, |
979 | 979 | &cfs_rq_iterator); | |
980 | total_nr_moved += nr_moved; | ||
981 | max_nr_move -= nr_moved; | ||
982 | rem_load_move -= load_moved; | ||
983 | 980 | ||
984 | if (max_nr_move <= 0 || rem_load_move <= 0) | 981 | if (rem_load_move <= 0) |
985 | break; | 982 | break; |
986 | } | 983 | } |
987 | 984 | ||
988 | return max_load_move - rem_load_move; | 985 | return max_load_move - rem_load_move; |
989 | } | 986 | } |
990 | 987 | ||
988 | static int | ||
989 | move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
990 | struct sched_domain *sd, enum cpu_idle_type idle) | ||
991 | { | ||
992 | struct cfs_rq *busy_cfs_rq; | ||
993 | struct rq_iterator cfs_rq_iterator; | ||
994 | |||
995 | cfs_rq_iterator.start = load_balance_start_fair; | ||
996 | cfs_rq_iterator.next = load_balance_next_fair; | ||
997 | |||
998 | for_each_leaf_cfs_rq(busiest, busy_cfs_rq) { | ||
999 | /* | ||
1000 | * pass busy_cfs_rq argument into | ||
1001 | * load_balance_[start|next]_fair iterators | ||
1002 | */ | ||
1003 | cfs_rq_iterator.arg = busy_cfs_rq; | ||
1004 | if (iter_move_one_task(this_rq, this_cpu, busiest, sd, idle, | ||
1005 | &cfs_rq_iterator)) | ||
1006 | return 1; | ||
1007 | } | ||
1008 | |||
1009 | return 0; | ||
1010 | } | ||
1011 | |||
991 | /* | 1012 | /* |
992 | * scheduler tick hitting a task of our scheduling class: | 1013 | * scheduler tick hitting a task of our scheduling class: |
993 | */ | 1014 | */ |
@@ -1064,6 +1085,7 @@ static const struct sched_class fair_sched_class = { | |||
1064 | .put_prev_task = put_prev_task_fair, | 1085 | .put_prev_task = put_prev_task_fair, |
1065 | 1086 | ||
1066 | .load_balance = load_balance_fair, | 1087 | .load_balance = load_balance_fair, |
1088 | .move_one_task = move_one_task_fair, | ||
1067 | 1089 | ||
1068 | .set_curr_task = set_curr_task_fair, | 1090 | .set_curr_task = set_curr_task_fair, |
1069 | .task_tick = task_tick_fair, | 1091 | .task_tick = task_tick_fair, |
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index 6e2ead41516e..586b06ca30aa 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c | |||
@@ -39,9 +39,16 @@ static void put_prev_task_idle(struct rq *rq, struct task_struct *prev) | |||
39 | 39 | ||
40 | static unsigned long | 40 | static unsigned long |
41 | load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest, | 41 | load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest, |
42 | unsigned long max_nr_move, unsigned long max_load_move, | 42 | unsigned long max_load_move, |
43 | struct sched_domain *sd, enum cpu_idle_type idle, | 43 | struct sched_domain *sd, enum cpu_idle_type idle, |
44 | int *all_pinned, int *this_best_prio) | 44 | int *all_pinned, int *this_best_prio) |
45 | { | ||
46 | return 0; | ||
47 | } | ||
48 | |||
49 | static int | ||
50 | move_one_task_idle(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
51 | struct sched_domain *sd, enum cpu_idle_type idle) | ||
45 | { | 52 | { |
46 | return 0; | 53 | return 0; |
47 | } | 54 | } |
@@ -70,6 +77,7 @@ const struct sched_class idle_sched_class = { | |||
70 | .put_prev_task = put_prev_task_idle, | 77 | .put_prev_task = put_prev_task_idle, |
71 | 78 | ||
72 | .load_balance = load_balance_idle, | 79 | .load_balance = load_balance_idle, |
80 | .move_one_task = move_one_task_idle, | ||
73 | 81 | ||
74 | .set_curr_task = set_curr_task_idle, | 82 | .set_curr_task = set_curr_task_idle, |
75 | .task_tick = task_tick_idle, | 83 | .task_tick = task_tick_idle, |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index d0097a0634e5..e9395b7119e6 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -172,13 +172,11 @@ static struct task_struct *load_balance_next_rt(void *arg) | |||
172 | 172 | ||
173 | static unsigned long | 173 | static unsigned long |
174 | load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, | 174 | load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, |
175 | unsigned long max_nr_move, unsigned long max_load_move, | 175 | unsigned long max_load_move, |
176 | struct sched_domain *sd, enum cpu_idle_type idle, | 176 | struct sched_domain *sd, enum cpu_idle_type idle, |
177 | int *all_pinned, int *this_best_prio) | 177 | int *all_pinned, int *this_best_prio) |
178 | { | 178 | { |
179 | int nr_moved; | ||
180 | struct rq_iterator rt_rq_iterator; | 179 | struct rq_iterator rt_rq_iterator; |
181 | unsigned long load_moved; | ||
182 | 180 | ||
183 | rt_rq_iterator.start = load_balance_start_rt; | 181 | rt_rq_iterator.start = load_balance_start_rt; |
184 | rt_rq_iterator.next = load_balance_next_rt; | 182 | rt_rq_iterator.next = load_balance_next_rt; |
@@ -187,11 +185,22 @@ load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
187 | */ | 185 | */ |
188 | rt_rq_iterator.arg = busiest; | 186 | rt_rq_iterator.arg = busiest; |
189 | 187 | ||
190 | nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move, | 188 | return balance_tasks(this_rq, this_cpu, busiest, max_load_move, sd, |
191 | max_load_move, sd, idle, all_pinned, &load_moved, | 189 | idle, all_pinned, this_best_prio, &rt_rq_iterator); |
192 | this_best_prio, &rt_rq_iterator); | 190 | } |
191 | |||
192 | static int | ||
193 | move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
194 | struct sched_domain *sd, enum cpu_idle_type idle) | ||
195 | { | ||
196 | struct rq_iterator rt_rq_iterator; | ||
197 | |||
198 | rt_rq_iterator.start = load_balance_start_rt; | ||
199 | rt_rq_iterator.next = load_balance_next_rt; | ||
200 | rt_rq_iterator.arg = busiest; | ||
193 | 201 | ||
194 | return load_moved; | 202 | return iter_move_one_task(this_rq, this_cpu, busiest, sd, idle, |
203 | &rt_rq_iterator); | ||
195 | } | 204 | } |
196 | 205 | ||
197 | static void task_tick_rt(struct rq *rq, struct task_struct *p) | 206 | static void task_tick_rt(struct rq *rq, struct task_struct *p) |
@@ -237,6 +246,7 @@ const struct sched_class rt_sched_class = { | |||
237 | .put_prev_task = put_prev_task_rt, | 246 | .put_prev_task = put_prev_task_rt, |
238 | 247 | ||
239 | .load_balance = load_balance_rt, | 248 | .load_balance = load_balance_rt, |
249 | .move_one_task = move_one_task_rt, | ||
240 | 250 | ||
241 | .set_curr_task = set_curr_task_rt, | 251 | .set_curr_task = set_curr_task_rt, |
242 | .task_tick = task_tick_rt, | 252 | .task_tick = task_tick_rt, |