sched: reduce balance-tasks overhead

At the moment, balance_tasks() provides low level functionality for both move_tasks() and move_one_task() (indirectly) via the load_balance() function (in the sched_class interface) which also provides dual functionality. This dual functionality complicates the interfaces and internal mechanisms and makes the run time overhead of operations that are called with two run queue locks held. This patch addresses this issue and reduces the overhead of these operations. Signed-off-by: Peter Williams <pwil3058@bigpond.net.au> Signed-off-by: Ingo Molnar <mingo@elte.hu>
author: Peter Williams <pwil3058@bigpond.net.au> 2007-10-24 12:23:51 -0400
committer: Ingo Molnar <mingo@elte.hu> 2007-10-24 12:23:51 -0400
commit: e1d1484f72127a5580d37c379f6a5b2c2786434c (patch)
tree: e3e6529c5b9079f35b2c60bbd346a3c51c2b2c13 /kernel
parent: a0f846aa76c3e03d54c1700a87cab3a46ccd71e2 (diff)
4 files changed, 130 insertions, 55 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index cc9cd5b710a6..8607795fad69 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -838,11 +838,35 @@ struct rq_iterator {
        struct task_struct *(*next)(void *);
 };
-static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
+#ifdef CONFIG_SMP
-                      unsigned long max_nr_move, unsigned long max_load_move,
+static unsigned long
-                      struct sched_domain *sd, enum cpu_idle_type idle,
+balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
-                      int *all_pinned, unsigned long *load_moved,
+              unsigned long max_load_move, struct sched_domain *sd,
-                      int *this_best_prio, struct rq_iterator *iterator);
+              enum cpu_idle_type idle, int *all_pinned,
+              int *this_best_prio, struct rq_iterator *iterator);
+static int
+iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
+                   struct sched_domain *sd, enum cpu_idle_type idle,
+                   struct rq_iterator *iterator);
+#else
+static inline unsigned long
+balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
+              unsigned long max_load_move, struct sched_domain *sd,
+              enum cpu_idle_type idle, int *all_pinned,
+              int *this_best_prio, struct rq_iterator *iterator)
+{
+        return 0;
+}
+static inline int
+iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
+                   struct sched_domain *sd, enum cpu_idle_type idle,
+                   struct rq_iterator *iterator)
+{
+        return 0;
+}
+#endif
 #include "sched_stats.h"
 #include "sched_idletask.c"
@@ -2224,17 +2248,17 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu,
        return 1;
 }
-static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
+static unsigned long
-                      unsigned long max_nr_move, unsigned long max_load_move,
+balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
-                      struct sched_domain *sd, enum cpu_idle_type idle,
+              unsigned long max_load_move, struct sched_domain *sd,
-                      int *all_pinned, unsigned long *load_moved,
+              enum cpu_idle_type idle, int *all_pinned,
-                      int *this_best_prio, struct rq_iterator *iterator)
+              int *this_best_prio, struct rq_iterator *iterator)
 {
        int pulled = 0, pinned = 0, skip_for_load;
        struct task_struct *p;
        long rem_load_move = max_load_move;
-        if (max_nr_move == 0 || max_load_move == 0)
+        if (max_load_move == 0)
                goto out;
        pinned = 1;
@@ -2267,7 +2291,7 @@ next:
         * We only want to steal up to the prescribed number of tasks
         * and the prescribed amount of weighted load.
         */
-        if (pulled < max_nr_move && rem_load_move > 0) {
+        if (rem_load_move > 0) {
                if (p->prio < *this_best_prio)
                        *this_best_prio = p->prio;
                p = iterator->next(iterator->arg);
@@ -2275,7 +2299,7 @@ next:
        }
 out:
        /*
-         * Right now, this is the only place pull_task() is called,
+         * Right now, this is one of only two places pull_task() is called,
         * so we can safely collect pull_task() stats here rather than
         * inside pull_task().
         */
@@ -2283,8 +2307,8 @@ out:
        if (all_pinned)
                *all_pinned = pinned;
-        *load_moved = max_load_move - rem_load_move;
-        return pulled;
+        return max_load_move - rem_load_move;
 }
 /*
@@ -2306,7 +2330,7 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
        do {
                total_load_moved +=
                        class->load_balance(this_rq, this_cpu, busiest,
-                                ULONG_MAX, max_load_move - total_load_moved,
+                                max_load_move - total_load_moved,
                                sd, idle, all_pinned, &this_best_prio);
                class = class->next;
        } while (class && max_load_move > total_load_moved);
@@ -2314,6 +2338,32 @@ static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
        return total_load_moved > 0;
 }
+static int
+iter_move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
+                   struct sched_domain *sd, enum cpu_idle_type idle,
+                   struct rq_iterator *iterator)
+{
+        struct task_struct *p = iterator->start(iterator->arg);
+        int pinned = 0;
+        while (p) {
+                if (can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) {
+                        pull_task(busiest, p, this_rq, this_cpu);
+                        /*
+                         * Right now, this is only the second place pull_task()
+                         * is called, so we can safely collect pull_task()
+                         * stats here rather than inside pull_task().
+                         */
+                        schedstat_inc(sd, lb_gained[idle]);
+                        return 1;
+                }
+                p = iterator->next(iterator->arg);
+        }
+        return 0;
+}
 /*
 * move_one_task tries to move exactly one task from busiest to this_rq, as
 * part of active balancing operations within "domain".
@@ -2325,12 +2375,9 @@ static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
                         struct sched_domain *sd, enum cpu_idle_type idle)
 {
        const struct sched_class *class;
-        int this_best_prio = MAX_PRIO;
        for (class = sched_class_highest; class; class = class->next)
-                if (class->load_balance(this_rq, this_cpu, busiest,
+                if (class->move_one_task(this_rq, this_cpu, busiest, sd, idle))
-                                        1, ULONG_MAX, sd, idle, NULL,
-                                        &this_best_prio))
                        return 1;
        return 0;
@@ -3267,18 +3314,6 @@ static inline void idle_balance(int cpu, struct rq *rq)
 {
 }
-/* Avoid "used but not defined" warning on UP */
-static int balance_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
-                      unsigned long max_nr_move, unsigned long max_load_move,
-                      struct sched_domain *sd, enum cpu_idle_type idle,
-                      int *all_pinned, unsigned long *load_moved,
-                      int *this_best_prio, struct rq_iterator *iterator)
-{
-        *load_moved = 0;
-        return 0;
-}
 #endif
 DEFINE_PER_CPU(struct kernel_stat, kstat);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 166ed6db600b..a90d0457d603 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -936,12 +936,11 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
 static unsigned long
 load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
-                  unsigned long max_nr_move, unsigned long max_load_move,
+                  unsigned long max_load_move,
                  struct sched_domain *sd, enum cpu_idle_type idle,
                  int *all_pinned, int *this_best_prio)
 {
        struct cfs_rq *busy_cfs_rq;
-        unsigned long load_moved, total_nr_moved = 0, nr_moved;
        long rem_load_move = max_load_move;
        struct rq_iterator cfs_rq_iterator;
@@ -969,25 +968,47 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
 #else
 # define maxload rem_load_move
 #endif
-                /* pass busy_cfs_rq argument into
+                /*
+                 * pass busy_cfs_rq argument into
                 * load_balance_[start|next]_fair iterators
                 */
                cfs_rq_iterator.arg = busy_cfs_rq;
-                nr_moved = balance_tasks(this_rq, this_cpu, busiest,
+                rem_load_move -= balance_tasks(this_rq, this_cpu, busiest,
-                                max_nr_move, maxload, sd, idle, all_pinned,
+                                               maxload, sd, idle, all_pinned,
-                                &load_moved, this_best_prio, &cfs_rq_iterator);
+                                               this_best_prio,
+                                               &cfs_rq_iterator);
-                total_nr_moved += nr_moved;
-                max_nr_move -= nr_moved;
-                rem_load_move -= load_moved;
-                if (max_nr_move <= 0 || rem_load_move <= 0)
+                if (rem_load_move <= 0)
                        break;
        }
        return max_load_move - rem_load_move;
 }
+static int
+move_one_task_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
+                   struct sched_domain *sd, enum cpu_idle_type idle)
+{
+        struct cfs_rq *busy_cfs_rq;
+        struct rq_iterator cfs_rq_iterator;
+        cfs_rq_iterator.start = load_balance_start_fair;
+        cfs_rq_iterator.next = load_balance_next_fair;
+        for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
+                /*
+                 * pass busy_cfs_rq argument into
+                 * load_balance_[start|next]_fair iterators
+                 */
+                cfs_rq_iterator.arg = busy_cfs_rq;
+                if (iter_move_one_task(this_rq, this_cpu, busiest, sd, idle,
+                                       &cfs_rq_iterator))
+                    return 1;
+        }
+        return 0;
+}
 /*
 * scheduler tick hitting a task of our scheduling class:
 */
@@ -1064,6 +1085,7 @@ static const struct sched_class fair_sched_class = {
        .put_prev_task          = put_prev_task_fair,
        .load_balance           = load_balance_fair,
+        .move_one_task          = move_one_task_fair,
        .set_curr_task          = set_curr_task_fair,
        .task_tick              = task_tick_fair,
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 6e2ead41516e..586b06ca30aa 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -39,9 +39,16 @@ static void put_prev_task_idle(struct rq *rq, struct task_struct *prev)
 static unsigned long
 load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
-                        unsigned long max_nr_move, unsigned long max_load_move,
+                  unsigned long max_load_move,
-                        struct sched_domain *sd, enum cpu_idle_type idle,
+                  struct sched_domain *sd, enum cpu_idle_type idle,
-                        int *all_pinned, int *this_best_prio)
+                  int *all_pinned, int *this_best_prio)
+{
+        return 0;
+}
+static int
+move_one_task_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
+                   struct sched_domain *sd, enum cpu_idle_type idle)
 {
        return 0;
 }
@@ -70,6 +77,7 @@ const struct sched_class idle_sched_class = {
        .put_prev_task          = put_prev_task_idle,
        .load_balance           = load_balance_idle,
+        .move_one_task          = move_one_task_idle,
        .set_curr_task          = set_curr_task_idle,
        .task_tick              = task_tick_idle,
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index d0097a0634e5..e9395b7119e6 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -172,13 +172,11 @@ static struct task_struct *load_balance_next_rt(void *arg)
 static unsigned long
 load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
-                        unsigned long max_nr_move, unsigned long max_load_move,
+                unsigned long max_load_move,
-                        struct sched_domain *sd, enum cpu_idle_type idle,
+                struct sched_domain *sd, enum cpu_idle_type idle,
-                        int *all_pinned, int *this_best_prio)
+                int *all_pinned, int *this_best_prio)
 {
-        int nr_moved;
        struct rq_iterator rt_rq_iterator;
-        unsigned long load_moved;
        rt_rq_iterator.start = load_balance_start_rt;
        rt_rq_iterator.next = load_balance_next_rt;
@@ -187,11 +185,22 @@ load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
         */
        rt_rq_iterator.arg = busiest;
-        nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move,
+        return balance_tasks(this_rq, this_cpu, busiest, max_load_move, sd,
-                        max_load_move, sd, idle, all_pinned, &load_moved,
+                             idle, all_pinned, this_best_prio, &rt_rq_iterator);
-                        this_best_prio, &rt_rq_iterator);
+}
+static int
+move_one_task_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
+                 struct sched_domain *sd, enum cpu_idle_type idle)
+{
+        struct rq_iterator rt_rq_iterator;
+        rt_rq_iterator.start = load_balance_start_rt;
+        rt_rq_iterator.next = load_balance_next_rt;
+        rt_rq_iterator.arg = busiest;
-        return load_moved;
+        return iter_move_one_task(this_rq, this_cpu, busiest, sd, idle,
+                                  &rt_rq_iterator);
 }
 static void task_tick_rt(struct rq *rq, struct task_struct *p)
@@ -237,6 +246,7 @@ const struct sched_class rt_sched_class = {
        .put_prev_task          = put_prev_task_rt,
        .load_balance           = load_balance_rt,
+        .move_one_task          = move_one_task_rt,
        .set_curr_task          = set_curr_task_rt,
        .task_tick              = task_tick_rt,
author	Peter Williams <pwil3058@bigpond.net.au>	2007-10-24 12:23:51 -0400
committer	Ingo Molnar <mingo@elte.hu>	2007-10-24 12:23:51 -0400
commit	e1d1484f72127a5580d37c379f6a5b2c2786434c (patch)
tree	e3e6529c5b9079f35b2c60bbd346a3c51c2b2c13 /kernel
parent	a0f846aa76c3e03d54c1700a87cab3a46ccd71e2 (diff)

diff --git a/kernel/sched.c b/kernel/sched.c index cc9cd5b710a6..8607795fad69 100644 --- a/kernel/sched.c +++ b/kernel/sched.c
@@ -838,11 +838,35 @@ struct rq_iterator {
838	struct task_struct (next)(void *);	838	struct task_struct (next)(void *);
839	};	839	};
840		840
841	static int balance_tasks(struct rq this_rq, int this_cpu, struct rq busiest,	841	#ifdef CONFIG_SMP
842	unsigned long max_nr_move, unsigned long max_load_move,	842	static unsigned long
843	struct sched_domain *sd, enum cpu_idle_type idle,	843	balance_tasks(struct rq this_rq, int this_cpu, struct rq busiest,
844	int all_pinned, unsigned long load_moved,	844	unsigned long max_load_move, struct sched_domain *sd,
845	int this_best_prio, struct rq_iterator iterator);	845	enum cpu_idle_type idle, int *all_pinned,
		846	int this_best_prio, struct rq_iterator iterator);
		847
		848	static int
		849	iter_move_one_task(struct rq this_rq, int this_cpu, struct rq busiest,
		850	struct sched_domain *sd, enum cpu_idle_type idle,
		851	struct rq_iterator *iterator);
		852	#else
		853	static inline unsigned long
		854	balance_tasks(struct rq this_rq, int this_cpu, struct rq busiest,
		855	unsigned long max_load_move, struct sched_domain *sd,
		856	enum cpu_idle_type idle, int *all_pinned,
		857	int this_best_prio, struct rq_iterator iterator)
		858	{
		859	return 0;
		860	}
		861
		862	static inline int
		863	iter_move_one_task(struct rq this_rq, int this_cpu, struct rq busiest,
		864	struct sched_domain *sd, enum cpu_idle_type idle,
		865	struct rq_iterator *iterator)
		866	{
		867	return 0;
		868	}
		869	#endif
846		870
847	#include "sched_stats.h"	871	#include "sched_stats.h"
848	#include "sched_idletask.c"	872	#include "sched_idletask.c"
@@ -2224,17 +2248,17 @@ int can_migrate_task(struct task_struct p, struct rq rq, int this_cpu,
2224	return 1;	2248	return 1;
2225	}	2249	}
2226		2250
2227	static int balance_tasks(struct rq this_rq, int this_cpu, struct rq busiest,	2251	static unsigned long
2228	unsigned long max_nr_move, unsigned long max_load_move,	2252	balance_tasks(struct rq this_rq, int this_cpu, struct rq busiest,
2229	struct sched_domain *sd, enum cpu_idle_type idle,	2253	unsigned long max_load_move, struct sched_domain *sd,
2230	int all_pinned, unsigned long load_moved,	2254	enum cpu_idle_type idle, int *all_pinned,
2231	int this_best_prio, struct rq_iterator iterator)	2255	int this_best_prio, struct rq_iterator iterator)
2232	{	2256	{
2233	int pulled = 0, pinned = 0, skip_for_load;	2257	int pulled = 0, pinned = 0, skip_for_load;
2234	struct task_struct *p;	2258	struct task_struct *p;
2235	long rem_load_move = max_load_move;	2259	long rem_load_move = max_load_move;
2236		2260
2237	if (max_nr_move == 0 \|\| max_load_move == 0)	2261	if (max_load_move == 0)
2238	goto out;	2262	goto out;
2239		2263
2240	pinned = 1;	2264	pinned = 1;
@@ -2267,7 +2291,7 @@ next:
2267	* We only want to steal up to the prescribed number of tasks	2291	* We only want to steal up to the prescribed number of tasks
2268	* and the prescribed amount of weighted load.	2292	* and the prescribed amount of weighted load.
2269	*/	2293	*/
2270	if (pulled < max_nr_move && rem_load_move > 0) {	2294	if (rem_load_move > 0) {
2271	if (p->prio < *this_best_prio)	2295	if (p->prio < *this_best_prio)
2272	*this_best_prio = p->prio;	2296	*this_best_prio = p->prio;
2273	p = iterator->next(iterator->arg);	2297	p = iterator->next(iterator->arg);
@@ -2275,7 +2299,7 @@ next:
2275	}	2299	}
2276	out:	2300	out:
2277	/*	2301	/*
2278	* Right now, this is the only place pull_task() is called,	2302	* Right now, this is one of only two places pull_task() is called,
2279	* so we can safely collect pull_task() stats here rather than	2303	* so we can safely collect pull_task() stats here rather than
2280	* inside pull_task().	2304	* inside pull_task().
2281	*/	2305	*/
@@ -2283,8 +2307,8 @@ out:
2283		2307
2284	if (all_pinned)	2308	if (all_pinned)
2285	*all_pinned = pinned;	2309	*all_pinned = pinned;
2286	*load_moved = max_load_move - rem_load_move;	2310
2287	return pulled;	2311	return max_load_move - rem_load_move;
2288	}	2312	}
2289		2313
2290	/*	2314	/*
@@ -2306,7 +2330,7 @@ static int move_tasks(struct rq this_rq, int this_cpu, struct rq busiest,
2306	do {	2330	do {
2307	total_load_moved +=	2331	total_load_moved +=
2308	class->load_balance(this_rq, this_cpu, busiest,	2332	class->load_balance(this_rq, this_cpu, busiest,
2309	ULONG_MAX, max_load_move - total_load_moved,	2333	max_load_move - total_load_moved,
2310	sd, idle, all_pinned, &this_best_prio);	2334	sd, idle, all_pinned, &this_best_prio);
2311	class = class->next;	2335	class = class->next;
2312	} while (class && max_load_move > total_load_moved);	2336	} while (class && max_load_move > total_load_moved);
@@ -2314,6 +2338,32 @@ static int move_tasks(struct rq this_rq, int this_cpu, struct rq busiest,
2314	return total_load_moved > 0;	2338	return total_load_moved > 0;
2315	}	2339	}
2316		2340
		2341	static int
		2342	iter_move_one_task(struct rq this_rq, int this_cpu, struct rq busiest,
		2343	struct sched_domain *sd, enum cpu_idle_type idle,
		2344	struct rq_iterator *iterator)
		2345	{
		2346	struct task_struct *p = iterator->start(iterator->arg);
		2347	int pinned = 0;
		2348
		2349	while (p) {
		2350	if (can_migrate_task(p, busiest, this_cpu, sd, idle, &pinned)) {
		2351	pull_task(busiest, p, this_rq, this_cpu);
		2352	/*
		2353	* Right now, this is only the second place pull_task()
		2354	* is called, so we can safely collect pull_task()
		2355	* stats here rather than inside pull_task().
		2356	*/
		2357	schedstat_inc(sd, lb_gained[idle]);
		2358
		2359	return 1;
		2360	}
		2361	p = iterator->next(iterator->arg);
		2362	}
		2363
		2364	return 0;
		2365	}
		2366
2317	/*	2367	/*
2318	* move_one_task tries to move exactly one task from busiest to this_rq, as	2368	* move_one_task tries to move exactly one task from busiest to this_rq, as
2319	* part of active balancing operations within "domain".	2369	* part of active balancing operations within "domain".
@@ -2325,12 +2375,9 @@ static int move_one_task(struct rq this_rq, int this_cpu, struct rq busiest,
2325	struct sched_domain *sd, enum cpu_idle_type idle)	2375	struct sched_domain *sd, enum cpu_idle_type idle)
2326	{	2376	{
2327	const struct sched_class *class;	2377	const struct sched_class *class;
2328	int this_best_prio = MAX_PRIO;
2329		2378
2330	for (class = sched_class_highest; class; class = class->next)	2379	for (class = sched_class_highest; class; class = class->next)
2331	if (class->load_balance(this_rq, this_cpu, busiest,	2380	if (class->move_one_task(this_rq, this_cpu, busiest, sd, idle))
2332	1, ULONG_MAX, sd, idle, NULL,
2333	&this_best_prio))
2334	return 1;	2381	return 1;
2335		2382
2336	return 0;	2383	return 0;
@@ -3267,18 +3314,6 @@ static inline void idle_balance(int cpu, struct rq *rq)
3267	{	3314	{
3268	}	3315	}
3269		3316
3270	/* Avoid "used but not defined" warning on UP */
3271	static int balance_tasks(struct rq this_rq, int this_cpu, struct rq busiest,
3272	unsigned long max_nr_move, unsigned long max_load_move,
3273	struct sched_domain *sd, enum cpu_idle_type idle,
3274	int all_pinned, unsigned long load_moved,
3275	int this_best_prio, struct rq_iterator iterator)
3276	{
3277	*load_moved = 0;
3278
3279	return 0;
3280	}
3281
3282	#endif	3317	#endif
3283		3318
3284	DEFINE_PER_CPU(struct kernel_stat, kstat);	3319	DEFINE_PER_CPU(struct kernel_stat, kstat);


diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 166ed6db600b..a90d0457d603 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c
@@ -936,12 +936,11 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
936		936
937	static unsigned long	937	static unsigned long
938	load_balance_fair(struct rq this_rq, int this_cpu, struct rq busiest,	938	load_balance_fair(struct rq this_rq, int this_cpu, struct rq busiest,
939	unsigned long max_nr_move, unsigned long max_load_move,	939	unsigned long max_load_move,
940	struct sched_domain *sd, enum cpu_idle_type idle,	940	struct sched_domain *sd, enum cpu_idle_type idle,
941	int all_pinned, int this_best_prio)	941	int all_pinned, int this_best_prio)
942	{	942	{
943	struct cfs_rq *busy_cfs_rq;	943	struct cfs_rq *busy_cfs_rq;
944	unsigned long load_moved, total_nr_moved = 0, nr_moved;
945	long rem_load_move = max_load_move;	944	long rem_load_move = max_load_move;
946	struct rq_iterator cfs_rq_iterator;	945	struct rq_iterator cfs_rq_iterator;
947		946
@@ -969,25 +968,47 @@ load_balance_fair(struct rq this_rq, int this_cpu, struct rq busiest,
969	#else	968	#else
970	# define maxload rem_load_move	969	# define maxload rem_load_move
971	#endif	970	#endif
972	/* pass busy_cfs_rq argument into	971	/*
		972	* pass busy_cfs_rq argument into
973	* load_balance_[start\|next]_fair iterators	973	* load_balance_[start\|next]_fair iterators
974	*/	974	*/
975	cfs_rq_iterator.arg = busy_cfs_rq;	975	cfs_rq_iterator.arg = busy_cfs_rq;
976	nr_moved = balance_tasks(this_rq, this_cpu, busiest,	976	rem_load_move -= balance_tasks(this_rq, this_cpu, busiest,
977	max_nr_move, maxload, sd, idle, all_pinned,	977	maxload, sd, idle, all_pinned,
978	&load_moved, this_best_prio, &cfs_rq_iterator);	978	this_best_prio,
979		979	&cfs_rq_iterator);
980	total_nr_moved += nr_moved;
981	max_nr_move -= nr_moved;
982	rem_load_move -= load_moved;
983		980
984	if (max_nr_move <= 0 \|\| rem_load_move <= 0)	981	if (rem_load_move <= 0)
985	break;	982	break;
986	}	983	}
987		984
988	return max_load_move - rem_load_move;	985	return max_load_move - rem_load_move;
989	}	986	}
990		987
		988	static int
		989	move_one_task_fair(struct rq this_rq, int this_cpu, struct rq busiest,
		990	struct sched_domain *sd, enum cpu_idle_type idle)
		991	{
		992	struct cfs_rq *busy_cfs_rq;
		993	struct rq_iterator cfs_rq_iterator;
		994
		995	cfs_rq_iterator.start = load_balance_start_fair;
		996	cfs_rq_iterator.next = load_balance_next_fair;
		997
		998	for_each_leaf_cfs_rq(busiest, busy_cfs_rq) {
		999	/*
		1000	* pass busy_cfs_rq argument into
		1001	* load_balance_[start\|next]_fair iterators
		1002	*/
		1003	cfs_rq_iterator.arg = busy_cfs_rq;
		1004	if (iter_move_one_task(this_rq, this_cpu, busiest, sd, idle,
		1005	&cfs_rq_iterator))
		1006	return 1;
		1007	}
		1008
		1009	return 0;
		1010	}
		1011
991	/*	1012	/*
992	* scheduler tick hitting a task of our scheduling class:	1013	* scheduler tick hitting a task of our scheduling class:
993	*/	1014	*/
@@ -1064,6 +1085,7 @@ static const struct sched_class fair_sched_class = {
1064	.put_prev_task = put_prev_task_fair,	1085	.put_prev_task = put_prev_task_fair,
1065		1086
1066	.load_balance = load_balance_fair,	1087	.load_balance = load_balance_fair,
		1088	.move_one_task = move_one_task_fair,
1067		1089
1068	.set_curr_task = set_curr_task_fair,	1090	.set_curr_task = set_curr_task_fair,
1069	.task_tick = task_tick_fair,	1091	.task_tick = task_tick_fair,


diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index 6e2ead41516e..586b06ca30aa 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c
@@ -39,9 +39,16 @@ static void put_prev_task_idle(struct rq rq, struct task_struct prev)
39		39
40	static unsigned long	40	static unsigned long
41	load_balance_idle(struct rq this_rq, int this_cpu, struct rq busiest,	41	load_balance_idle(struct rq this_rq, int this_cpu, struct rq busiest,
42	unsigned long max_nr_move, unsigned long max_load_move,	42	unsigned long max_load_move,
43	struct sched_domain *sd, enum cpu_idle_type idle,	43	struct sched_domain *sd, enum cpu_idle_type idle,
44	int all_pinned, int this_best_prio)	44	int all_pinned, int this_best_prio)
		45	{
		46	return 0;
		47	}
		48
		49	static int
		50	move_one_task_idle(struct rq this_rq, int this_cpu, struct rq busiest,
		51	struct sched_domain *sd, enum cpu_idle_type idle)
45	{	52	{
46	return 0;	53	return 0;
47	}	54	}
@@ -70,6 +77,7 @@ const struct sched_class idle_sched_class = {
70	.put_prev_task = put_prev_task_idle,	77	.put_prev_task = put_prev_task_idle,
71		78
72	.load_balance = load_balance_idle,	79	.load_balance = load_balance_idle,
		80	.move_one_task = move_one_task_idle,
73		81
74	.set_curr_task = set_curr_task_idle,	82	.set_curr_task = set_curr_task_idle,
75	.task_tick = task_tick_idle,	83	.task_tick = task_tick_idle,


diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index d0097a0634e5..e9395b7119e6 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c
@@ -172,13 +172,11 @@ static struct task_struct load_balance_next_rt(void arg)
172		172
173	static unsigned long	173	static unsigned long
174	load_balance_rt(struct rq this_rq, int this_cpu, struct rq busiest,	174	load_balance_rt(struct rq this_rq, int this_cpu, struct rq busiest,
175	unsigned long max_nr_move, unsigned long max_load_move,	175	unsigned long max_load_move,
176	struct sched_domain *sd, enum cpu_idle_type idle,	176	struct sched_domain *sd, enum cpu_idle_type idle,
177	int all_pinned, int this_best_prio)	177	int all_pinned, int this_best_prio)
178	{	178	{
179	int nr_moved;
180	struct rq_iterator rt_rq_iterator;	179	struct rq_iterator rt_rq_iterator;
181	unsigned long load_moved;
182		180
183	rt_rq_iterator.start = load_balance_start_rt;	181	rt_rq_iterator.start = load_balance_start_rt;
184	rt_rq_iterator.next = load_balance_next_rt;	182	rt_rq_iterator.next = load_balance_next_rt;
@@ -187,11 +185,22 @@ load_balance_rt(struct rq this_rq, int this_cpu, struct rq busiest,
187	*/	185	*/
188	rt_rq_iterator.arg = busiest;	186	rt_rq_iterator.arg = busiest;
189		187
190	nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move,	188	return balance_tasks(this_rq, this_cpu, busiest, max_load_move, sd,
191	max_load_move, sd, idle, all_pinned, &load_moved,	189	idle, all_pinned, this_best_prio, &rt_rq_iterator);
192	this_best_prio, &rt_rq_iterator);	190	}
		191
		192	static int
		193	move_one_task_rt(struct rq this_rq, int this_cpu, struct rq busiest,
		194	struct sched_domain *sd, enum cpu_idle_type idle)
		195	{
		196	struct rq_iterator rt_rq_iterator;
		197
		198	rt_rq_iterator.start = load_balance_start_rt;
		199	rt_rq_iterator.next = load_balance_next_rt;
		200	rt_rq_iterator.arg = busiest;
193		201
194	return load_moved;	202	return iter_move_one_task(this_rq, this_cpu, busiest, sd, idle,
		203	&rt_rq_iterator);
195	}	204	}
196		205
197	static void task_tick_rt(struct rq rq, struct task_struct p)	206	static void task_tick_rt(struct rq rq, struct task_struct p)
@@ -237,6 +246,7 @@ const struct sched_class rt_sched_class = {
237	.put_prev_task = put_prev_task_rt,	246	.put_prev_task = put_prev_task_rt,
238		247
239	.load_balance = load_balance_rt,	248	.load_balance = load_balance_rt,
		249	.move_one_task = move_one_task_rt,
240		250
241	.set_curr_task = set_curr_task_rt,	251	.set_curr_task = set_curr_task_rt,
242	.task_tick = task_tick_rt,	252	.task_tick = task_tick_rt,