aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Williams <pwil3058@bigpond.net.au>2007-10-24 12:23:51 -0400
committerIngo Molnar <mingo@elte.hu>2007-10-24 12:23:51 -0400
commite1d1484f72127a5580d37c379f6a5b2c2786434c (patch)
treee3e6529c5b9079f35b2c60bbd346a3c51c2b2c13
parenta0f846aa76c3e03d54c1700a87cab3a46ccd71e2 (diff)
sched: reduce balance-tasks overhead
At the moment, balance_tasks() provides low level functionality for both move_tasks() and move_one_task() (indirectly) via the load_balance() function (in the sched_class interface) which also provides dual functionality. This dual functionality complicates the interfaces and internal mechanisms and makes the run time overhead of operations that are called with two run queue locks held. This patch addresses this issue and reduces the overhead of these operations. Signed-off-by: Peter Williams <pwil3058@bigpond.net.au> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/linux/sched.h7
-rw-r--r--kernel/sched.c99
-rw-r--r--kernel/sched_fair.c44
-rw-r--r--kernel/sched_idletask.c14
-rw-r--r--kernel/sched_rt.c28
5 files changed, 135 insertions, 57 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 52288a647692..639241f4f3d1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -829,11 +829,14 @@ struct sched_class {
829 void (*put_prev_task) (struct rq *rq, struct task_struct *p); 829 void (*put_prev_task) (struct rq *rq, struct task_struct *p);
830 830
831 unsigned long (*load_balance) (struct rq *this_rq, int this_cpu, 831 unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
832 struct rq *busiest, 832 struct rq *busiest, unsigned long max_load_move,
833 unsigned long max_nr_move, unsigned long max_load_move,
834 struct sched_domain *sd, enum cpu_idle_type idle, 833 struct sched_domain *sd, enum cpu_idle_type idle,
835 int *all_pinned, int *this_best_prio); 834 int *all_pinned, int *this_best_prio);
836 835
836 int (*move_one_task) (struct rq *this_rq, int this_cpu,
837 struct rq *busiest, struct sched_domain *sd,
838 enum cpu_idle_type idle);
839
837 void (*set_curr_task) (struct rq *rq); 840 void (*set_curr_task) (struct rq *rq);
838 void (*task_tick) (struct rq *rq, struct task_struct *p); 841 void (*task_tick) (struct rq *rq, struct task_struct *p);
839 void (*task_new) (struct rq *rq, struct task_struct *p); 842 void (*task_new) (struct rq *rq, struct task_struct *p);
diff --git a/kernel/sched.c b/kernel/sched.c
index cc9cd5b710a6..8607795fad69 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -838,11 +838,35 @@ struct rq_iterator {
838 struct task_struct *(*next)(void *); 838 struct task_struct *(*next)(void *);
839}; 839};
840 840
841static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, 841#ifdef CONFIG_SMP
842 unsigned long max_nr_move, unsigned long max_load_move, 842static unsigned long
843 struct sched_domain *sd, enum cpu_idle_type idle, 843balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
844 int *all_pinned, unsigned long *load_moved, 844 unsigned long max_load_move, struct sched_domain *sd,
845 int *this_best_prio, struct rq_iterator *iterator); 845 enum cpu_idle_type idle, int *all_pinned,
846 int *this_best_prio, struct rq_iterator *iterator);
847
848static int
849iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
850 struct sched_domain *sd, enum cpu_idle_type idle,
851 struct rq_iterator *iterator);
852#else
853static inline unsigned long
854balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
855 unsigned long max_load_move, struct sched_domain *sd,
856 enum cpu_idle_type idle, int *all_pinned,
857 int *this_best_prio, struct rq_iterator *iterator)
858{
859 return 0;
860}
861
862static inline int
863iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
864 struct sched_domain *sd, enum cpu_idle_type idle,
865 struct rq_iterator *iterator)
866{
867 return 0;
868}
869#endif
846 870
847#include "sched_stats.h" 871#include "sched_stats.h"
848#include "sched_idletask.c" 872#include "sched_idletask.c"
@@ -2224,17 +2248,17 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
2224 return 1; 2248 return 1;
2225} 2249}
2226 2250
2227static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, 2251static unsigned long
2228 unsigned long max_nr_move, unsigned long max_load_move, 2252balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
2229 struct sched_domain *sd, enum cpu_idle_type idle, 2253 unsigned long max_load_move, struct sched_domain *sd,
2230 int *all_pinned, unsigned long *load_moved, 2254 enum cpu_idle_type idle, int *all_pinned,
2231 int *this_best_prio, struct rq_iterator *iterator) 2255 int *this_best_prio, struct rq_iterator *iterator)
2232{ 2256{
2233 int pulled = 0, pinned = 0, skip_for_load; 2257 int pulled = 0, pinned = 0, skip_for_load;
2234 struct task_struct *p; 2258 struct task_struct *p;
2235 long rem_load_move = max_load_move; 2259 long rem_load_move = max_load_move;
2236 2260
2237 if (max_nr_move == 0 || max_load_move == 0) 2261 if (max_load_move == 0)
2238 goto out; 2262 goto out;
2239 2263
2240 pinned = 1; 2264 pinned = 1;
@@ -2267,7 +2291,7 @@ next:
2267 * We only want to steal up to the prescribed number of tasks 2291 * We only want to steal up to the prescribed number of tasks
2268 * and the prescribed amount of weighted load. 2292 * and the prescribed amount of weighted load.
2269 */ 2293 */
2270 if (pulled < max_nr_move && rem_load_move > 0) { 2294 if (rem_load_move > 0) {
2271 if (p->prio < *this_best_prio) 2295 if (p->prio < *this_best_prio)
2272 *this_best_prio = p->prio; 2296 *this_best_prio = p->prio;
2273 p = iterator->next(iterator->arg); 2297 p = iterator->next(iterator->arg);
@@ -2275,7 +2299,7 @@ next:
2275 } 2299 }
2276out: 2300out:
2277 /* 2301 /*
2278 * Right now, this is the only place pull_task() is called, 2302 * Right now, this is one of only two places pull_task() is called,
2279 * so we can safely collect pull_task() stats here rather than 2303 * so we can safely collect pull_task() stats here rather than
2280 * inside pull_task(). 2304 * inside pull_task().
2281 */ 2305 */
@@ -2283,8 +2307,8 @@ out:
2283 2307
2284 if (all_pinned) 2308 if (all_pinned)
2285 *all_pinned = pinned; 2309 *all_pinned = pinned;
2286 *load_moved = max_load_move - rem_load_move; 2310
2287 return pulled; 2311 return max_load_move - rem_load_move;
2288} 2312}
2289 2313
2290/* 2314/*
@@ -2306,7 +2330,7 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
2306 do { 2330 do {
2307 total_load_moved += 2331 total_load_moved +=
2308 class->load_balance(this_rq, this_cpu, busiest, 2332 class->load_balance(this_rq, this_cpu, busiest,
2309 ULONG_MAX, max_load_move - total_load_moved, 2333 max_load_move - total_load_moved,
2310 sd, idle, all_pinned, &this_best_prio); 2334 sd, idle, all_pinned, &this_best_prio);
2311 class = class->next; 2335 class = class->next;
2312 } while (class && max_load_move > total_load_moved); 2336 } while (class && max_load_move > total_load_moved);
@@ -2314,6 +2338,32 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
2314 return total_load_moved > 0; 2338 return total_load_moved > 0;
2315} 2339}
2316 2340
2341static int
2342iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
2343 struct sched_domain *sd, enum cpu_idle_type idle,
2344 struct rq_iterator *iterator)
2345{
2346 struct task_struct *p = iterator->start(iterator->arg);
2347 int pinned = 0;
2348
2349 while (p) {
2350 if (can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) {
2351 pull_task(busiest, p, this_rq, this_cpu);
2352 /*
2353 * Right now, this is only the second place pull_task()
2354 * is called, so we can safely collect pull_task()
2355 * stats here rather than inside pull_task().
2356 */
2357 schedstat_inc(sd, lb_gained[idle]);
2358
2359 return 1;
2360 }
2361 p = iterator->next(iterator->arg);
2362 }
2363
2364 return 0;
2365}
2366
2317/* 2367/*
2318 * move_one_task tries to move exactly one task from busiest to this_rq, as 2368 * move_one_task tries to move exactly one task from busiest to this_rq, as
2319 * part of active balancing operations within "domain". 2369 * part of active balancing operations within "domain".
@@ -2325,12 +2375,9 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
2325 struct sched_domain *sd, enum cpu_idle_type idle) 2375 struct sched_domain *sd, enum cpu_idle_type idle)
2326{ 2376{
2327 const struct sched_class *class; 2377 const struct sched_class *class;
2328 int this_best_prio = MAX_PRIO;
2329 2378
2330 for (class = sched_class_highest; class; class = class->next) 2379 for (class = sched_class_highest; class; class = class->next)
2331 if (class->load_balance(this_rq, this_cpu, busiest, 2380 if (class->move_one_task(this_rq, this_cpu, busiest, sd, idle))
2332 1, ULONG_MAX, sd, idle, NULL,
2333 &this_best_prio))
2334 return 1; 2381 return 1;
2335 2382
2336 return 0; 2383 return 0;
@@ -3267,18 +3314,6 @@ static inline void idle_balance(int cpu, struct rq *rq)
3267{ 3314{
3268} 3315}
3269 3316
3270/* Avoid "used but not defined" warning on UP */
3271static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
3272 unsigned long max_nr_move, unsigned long max_load_move,
3273 struct sched_domain *sd, enum cpu_idle_type idle,
3274 int *all_pinned, unsigned long *load_moved,
3275 int *this_best_prio, struct rq_iterator *iterator)
3276{
3277 *load_moved = 0;
3278
3279 return 0;
3280}
3281
3282#endif 3317#endif
3283 3318
3284DEFINE_PER_CPU(struct kernel_stat, kstat); 3319DEFINE_PER_CPU(struct kernel_stat, kstat);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 166ed6db600b..a90d0457d603 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -936,12 +936,11 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
936 936
937static unsigned long 937static unsigned long
938load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, 938load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
939 unsigned long max_nr_move, unsigned long max_load_move, 939 unsigned long max_load_move,
940 struct sched_domain *sd, enum cpu_idle_type idle, 940 struct sched_domain *sd, enum cpu_idle_type idle,
941 int *all_pinned, int *this_best_prio) 941 int *all_pinned, int *this_best_prio)
942{ 942{
943 struct cfs_rq *busy_cfs_rq; 943 struct cfs_rq *busy_cfs_rq;
944 unsigned long load_moved, total_nr_moved = 0, nr_moved;
945 long rem_load_move = max_load_move; 944 long rem_load_move = max_load_move;
946 struct rq_iterator cfs_rq_iterator; 945 struct rq_iterator cfs_rq_iterator;
947 946
@@ -969,25 +968,47 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
969#else 968#else
970# define maxload rem_load_move 969# define maxload rem_load_move
971#endif 970#endif
972 /* pass busy_cfs_rq argument into 971 /*
972 * pass busy_cfs_rq argument into
973 * load_balance_[start|next]_fair iterators 973 * load_balance_[start|next]_fair iterators
974 */ 974 */
975 cfs_rq_iterator.arg = busy_cfs_rq; 975 cfs_rq_iterator.arg = busy_cfs_rq;
976 nr_moved = balance_tasks(this_rq, this_cpu, busiest, 976 rem_load_move -= balance_tasks(this_rq, this_cpu, busiest,
977 max_nr_move, maxload, sd, idle, all_pinned, 977 maxload, sd, idle, all_pinned,
978 &load_moved, this_best_prio, &cfs_rq_iterator); 978 this_best_prio,
979 979 &cfs_rq_iterator);
980 total_nr_moved += nr_moved;
981 max_nr_move -= nr_moved;
982 rem_load_move -= load_moved;
983 980
984 if (max_nr_move <= 0 || rem_load_move <= 0) 981 if (rem_load_move <= 0)
985 break; 982 break;
986 } 983 }
987 984
988 return max_load_move - rem_load_move; 985 return max_load_move - rem_load_move;
989} 986}
990 987
988static int
989move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
990 struct sched_domain *sd, enum cpu_idle_type idle)
991{
992 struct cfs_rq *busy_cfs_rq;
993 struct rq_iterator cfs_rq_iterator;
994
995 cfs_rq_iterator.start = load_balance_start_fair;
996 cfs_rq_iterator.next = load_balance_next_fair;
997
998 for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
999 /*
1000 * pass busy_cfs_rq argument into
1001 * load_balance_[start|next]_fair iterators
1002 */
1003 cfs_rq_iterator.arg = busy_cfs_rq;
1004 if (iter_move_one_task(this_rq, this_cpu, busiest, sd, idle,
1005 &cfs_rq_iterator))
1006 return 1;
1007 }
1008
1009 return 0;
1010}
1011
991/* 1012/*
992 * scheduler tick hitting a task of our scheduling class: 1013 * scheduler tick hitting a task of our scheduling class:
993 */ 1014 */
@@ -1064,6 +1085,7 @@ static const struct sched_class fair_sched_class = {
1064 .put_prev_task = put_prev_task_fair, 1085 .put_prev_task = put_prev_task_fair,
1065 1086
1066 .load_balance = load_balance_fair, 1087 .load_balance = load_balance_fair,
1088 .move_one_task = move_one_task_fair,
1067 1089
1068 .set_curr_task = set_curr_task_fair, 1090 .set_curr_task = set_curr_task_fair,
1069 .task_tick = task_tick_fair, 1091 .task_tick = task_tick_fair,
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 6e2ead41516e..586b06ca30aa 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -39,9 +39,16 @@ static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
39 39
40static unsigned long 40static unsigned long
41load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest, 41load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
42 unsigned long max_nr_move, unsigned long max_load_move, 42 unsigned long max_load_move,
43 struct sched_domain *sd, enum cpu_idle_type idle, 43 struct sched_domain *sd, enum cpu_idle_type idle,
44 int *all_pinned, int *this_best_prio) 44 int *all_pinned, int *this_best_prio)
45{
46 return 0;
47}
48
49static int
50move_one_task_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
51 struct sched_domain *sd, enum cpu_idle_type idle)
45{ 52{
46 return 0; 53 return 0;
47} 54}
@@ -70,6 +77,7 @@ const struct sched_class idle_sched_class = {
70 .put_prev_task = put_prev_task_idle, 77 .put_prev_task = put_prev_task_idle,
71 78
72 .load_balance = load_balance_idle, 79 .load_balance = load_balance_idle,
80 .move_one_task = move_one_task_idle,
73 81
74 .set_curr_task = set_curr_task_idle, 82 .set_curr_task = set_curr_task_idle,
75 .task_tick = task_tick_idle, 83 .task_tick = task_tick_idle,
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index d0097a0634e5..e9395b7119e6 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -172,13 +172,11 @@ static struct task_struct *load_balance_next_rt(void *arg)
172 172
173static unsigned long 173static unsigned long
174load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, 174load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
175 unsigned long max_nr_move, unsigned long max_load_move, 175 unsigned long max_load_move,
176 struct sched_domain *sd, enum cpu_idle_type idle, 176 struct sched_domain *sd, enum cpu_idle_type idle,
177 int *all_pinned, int *this_best_prio) 177 int *all_pinned, int *this_best_prio)
178{ 178{
179 int nr_moved;
180 struct rq_iterator rt_rq_iterator; 179 struct rq_iterator rt_rq_iterator;
181 unsigned long load_moved;
182 180
183 rt_rq_iterator.start = load_balance_start_rt; 181 rt_rq_iterator.start = load_balance_start_rt;
184 rt_rq_iterator.next = load_balance_next_rt; 182 rt_rq_iterator.next = load_balance_next_rt;
@@ -187,11 +185,22 @@ load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
187 */ 185 */
188 rt_rq_iterator.arg = busiest; 186 rt_rq_iterator.arg = busiest;
189 187
190 nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move, 188 return balance_tasks(this_rq, this_cpu, busiest, max_load_move, sd,
191 max_load_move, sd, idle, all_pinned, &load_moved, 189 idle, all_pinned, this_best_prio, &rt_rq_iterator);
192 this_best_prio, &rt_rq_iterator); 190}
191
192static int
193move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
194 struct sched_domain *sd, enum cpu_idle_type idle)
195{
196 struct rq_iterator rt_rq_iterator;
197
198 rt_rq_iterator.start = load_balance_start_rt;
199 rt_rq_iterator.next = load_balance_next_rt;
200 rt_rq_iterator.arg = busiest;
193 201
194 return load_moved; 202 return iter_move_one_task(this_rq, this_cpu, busiest, sd, idle,
203 &rt_rq_iterator);
195} 204}
196 205
197static void task_tick_rt(struct rq *rq, struct task_struct *p) 206static void task_tick_rt(struct rq *rq, struct task_struct *p)
@@ -237,6 +246,7 @@ const struct sched_class rt_sched_class = {
237 .put_prev_task = put_prev_task_rt, 246 .put_prev_task = put_prev_task_rt,
238 247
239 .load_balance = load_balance_rt, 248 .load_balance = load_balance_rt,
249 .move_one_task = move_one_task_rt,
240 250
241 .set_curr_task = set_curr_task_rt, 251 .set_curr_task = set_curr_task_rt,
242 .task_tick = task_tick_rt, 252 .task_tick = task_tick_rt,