aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorPeter Williams <pwil3058@bigpond.net.au>2007-08-09 05:16:46 -0400
committerIngo Molnar <mingo@elte.hu>2007-08-09 05:16:46 -0400
commit4301065920b0cbde3986519582347e883b166f3e (patch)
tree415b8e3a2796709673015e100a3b5b28e556104a
parentf1a438d813d416fa9f4be4e6dbd10b54c5938d89 (diff)
sched: simplify move_tasks()
The move_tasks() function is currently multiplexed with two distinct capabilities: 1. attempt to move a specified amount of weighted load from one run queue to another; and 2. attempt to move a specified number of tasks from one run queue to another. The first of these capabilities is used in two places, load_balance() and load_balance_idle(), and in both of these cases the return value of move_tasks() is used purely to decide if tasks/load were moved and no notice of the actual number of tasks moved is taken. The second capability is used in exactly one place, active_load_balance(), to attempt to move exactly one task and, as before, the return value is only used as an indicator of success or failure. This multiplexing of sched_task() was introduced, by me, as part of the smpnice patches and was motivated by the fact that the alternative, one function to move specified load and one to move a single task, would have led to two functions of roughly the same complexity as the old move_tasks() (or the new balance_tasks()). However, the new modular design of the new CFS scheduler allows a simpler solution to be adopted and this patch addresses that solution by: 1. adding a new function, move_one_task(), to be used by active_load_balance(); and 2. making move_tasks() a single purpose function that tries to move a specified weighted load and returns 1 for success and 0 for failure. One of the consequences of these changes is that neither move_one_task() or the new move_tasks() care how many tasks sched_class.load_balance() moves and this enables its interface to be simplified by returning the amount of load moved as its result and removing the load_moved pointer from the argument list. This helps simplify the new move_tasks() and slightly reduces the amount of work done in each of sched_class.load_balance()'s implementations. Further simplification, e.g. changes to balance_tasks(), are possible but (slightly) complicated by the special needs of load_balance_fair() so I've left them to a later patch (if this one gets accepted). NB Since move_tasks() gets called with two run queue locks held even small reductions in overhead are worthwhile. [ mingo@elte.hu ] this change also reduces code size nicely: text data bss dec hex filename 39216 3618 24 42858 a76a sched.o.before 39173 3618 24 42815 a73f sched.o.after Signed-off-by: Peter Williams <pwil3058@bigpond.net.au> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/linux/sched.h4
-rw-r--r--kernel/sched.c82
-rw-r--r--kernel/sched_fair.c8
-rw-r--r--kernel/sched_idletask.c4
-rw-r--r--kernel/sched_rt.c9
5 files changed, 58 insertions, 49 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 17249fae5014..24bce423f10d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -866,11 +866,11 @@ struct sched_class {
866 struct task_struct * (*pick_next_task) (struct rq *rq, u64 now); 866 struct task_struct * (*pick_next_task) (struct rq *rq, u64 now);
867 void (*put_prev_task) (struct rq *rq, struct task_struct *p, u64 now); 867 void (*put_prev_task) (struct rq *rq, struct task_struct *p, u64 now);
868 868
869 int (*load_balance) (struct rq *this_rq, int this_cpu, 869 unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
870 struct rq *busiest, 870 struct rq *busiest,
871 unsigned long max_nr_move, unsigned long max_load_move, 871 unsigned long max_nr_move, unsigned long max_load_move,
872 struct sched_domain *sd, enum cpu_idle_type idle, 872 struct sched_domain *sd, enum cpu_idle_type idle,
873 int *all_pinned, unsigned long *total_load_moved); 873 int *all_pinned);
874 874
875 void (*set_curr_task) (struct rq *rq); 875 void (*set_curr_task) (struct rq *rq);
876 void (*task_tick) (struct rq *rq, struct task_struct *p); 876 void (*task_tick) (struct rq *rq, struct task_struct *p);
diff --git a/kernel/sched.c b/kernel/sched.c
index 4680f52974e3..42029634ef5a 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2231,32 +2231,49 @@ out:
2231} 2231}
2232 2232
2233/* 2233/*
2234 * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted 2234 * move_tasks tries to move up to max_load_move weighted load from busiest to
2235 * load from busiest to this_rq, as part of a balancing operation within 2235 * this_rq, as part of a balancing operation within domain "sd".
2236 * "domain". Returns the number of tasks moved. 2236 * Returns 1 if successful and 0 otherwise.
2237 * 2237 *
2238 * Called with both runqueues locked. 2238 * Called with both runqueues locked.
2239 */ 2239 */
2240static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, 2240static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest,
2241 unsigned long max_nr_move, unsigned long max_load_move, 2241 unsigned long max_load_move,
2242 struct sched_domain *sd, enum cpu_idle_type idle, 2242 struct sched_domain *sd, enum cpu_idle_type idle,
2243 int *all_pinned) 2243 int *all_pinned)
2244{ 2244{
2245 struct sched_class *class = sched_class_highest; 2245 struct sched_class *class = sched_class_highest;
2246 unsigned long load_moved, total_nr_moved = 0, nr_moved; 2246 unsigned long total_load_moved = 0;
2247 long rem_load_move = max_load_move;
2248 2247
2249 do { 2248 do {
2250 nr_moved = class->load_balance(this_rq, this_cpu, busiest, 2249 total_load_moved +=
2251 max_nr_move, (unsigned long)rem_load_move, 2250 class->load_balance(this_rq, this_cpu, busiest,
2252 sd, idle, all_pinned, &load_moved); 2251 ULONG_MAX, max_load_move - total_load_moved,
2253 total_nr_moved += nr_moved; 2252 sd, idle, all_pinned);
2254 max_nr_move -= nr_moved;
2255 rem_load_move -= load_moved;
2256 class = class->next; 2253 class = class->next;
2257 } while (class && max_nr_move && rem_load_move > 0); 2254 } while (class && max_load_move > total_load_moved);
2258 2255
2259 return total_nr_moved; 2256 return total_load_moved > 0;
2257}
2258
2259/*
2260 * move_one_task tries to move exactly one task from busiest to this_rq, as
2261 * part of active balancing operations within "domain".
2262 * Returns 1 if successful and 0 otherwise.
2263 *
2264 * Called with both runqueues locked.
2265 */
2266static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest,
2267 struct sched_domain *sd, enum cpu_idle_type idle)
2268{
2269 struct sched_class *class;
2270
2271 for (class = sched_class_highest; class; class = class->next)
2272 if (class->load_balance(this_rq, this_cpu, busiest,
2273 1, ULONG_MAX, sd, idle, NULL))
2274 return 1;
2275
2276 return 0;
2260} 2277}
2261 2278
2262/* 2279/*
@@ -2588,11 +2605,6 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle,
2588 */ 2605 */
2589#define MAX_PINNED_INTERVAL 512 2606#define MAX_PINNED_INTERVAL 512
2590 2607
2591static inline unsigned long minus_1_or_zero(unsigned long n)
2592{
2593 return n > 0 ? n - 1 : 0;
2594}
2595
2596/* 2608/*
2597 * Check this_cpu to ensure it is balanced within domain. Attempt to move 2609 * Check this_cpu to ensure it is balanced within domain. Attempt to move
2598 * tasks if there is an imbalance. 2610 * tasks if there is an imbalance.
@@ -2601,7 +2613,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
2601 struct sched_domain *sd, enum cpu_idle_type idle, 2613 struct sched_domain *sd, enum cpu_idle_type idle,
2602 int *balance) 2614 int *balance)
2603{ 2615{
2604 int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; 2616 int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0;
2605 struct sched_group *group; 2617 struct sched_group *group;
2606 unsigned long imbalance; 2618 unsigned long imbalance;
2607 struct rq *busiest; 2619 struct rq *busiest;
@@ -2642,18 +2654,17 @@ redo:
2642 2654
2643 schedstat_add(sd, lb_imbalance[idle], imbalance); 2655 schedstat_add(sd, lb_imbalance[idle], imbalance);
2644 2656
2645 nr_moved = 0; 2657 ld_moved = 0;
2646 if (busiest->nr_running > 1) { 2658 if (busiest->nr_running > 1) {
2647 /* 2659 /*
2648 * Attempt to move tasks. If find_busiest_group has found 2660 * Attempt to move tasks. If find_busiest_group has found
2649 * an imbalance but busiest->nr_running <= 1, the group is 2661 * an imbalance but busiest->nr_running <= 1, the group is
2650 * still unbalanced. nr_moved simply stays zero, so it is 2662 * still unbalanced. ld_moved simply stays zero, so it is
2651 * correctly treated as an imbalance. 2663 * correctly treated as an imbalance.
2652 */ 2664 */
2653 local_irq_save(flags); 2665 local_irq_save(flags);
2654 double_rq_lock(this_rq, busiest); 2666 double_rq_lock(this_rq, busiest);
2655 nr_moved = move_tasks(this_rq, this_cpu, busiest, 2667 ld_moved = move_tasks(this_rq, this_cpu, busiest,
2656 minus_1_or_zero(busiest->nr_running),
2657 imbalance, sd, idle, &all_pinned); 2668 imbalance, sd, idle, &all_pinned);
2658 double_rq_unlock(this_rq, busiest); 2669 double_rq_unlock(this_rq, busiest);
2659 local_irq_restore(flags); 2670 local_irq_restore(flags);
@@ -2661,7 +2672,7 @@ redo:
2661 /* 2672 /*
2662 * some other cpu did the load balance for us. 2673 * some other cpu did the load balance for us.
2663 */ 2674 */
2664 if (nr_moved && this_cpu != smp_processor_id()) 2675 if (ld_moved && this_cpu != smp_processor_id())
2665 resched_cpu(this_cpu); 2676 resched_cpu(this_cpu);
2666 2677
2667 /* All tasks on this runqueue were pinned by CPU affinity */ 2678 /* All tasks on this runqueue were pinned by CPU affinity */
@@ -2673,7 +2684,7 @@ redo:
2673 } 2684 }
2674 } 2685 }
2675 2686
2676 if (!nr_moved) { 2687 if (!ld_moved) {
2677 schedstat_inc(sd, lb_failed[idle]); 2688 schedstat_inc(sd, lb_failed[idle]);
2678 sd->nr_balance_failed++; 2689 sd->nr_balance_failed++;
2679 2690
@@ -2722,10 +2733,10 @@ redo:
2722 sd->balance_interval *= 2; 2733 sd->balance_interval *= 2;
2723 } 2734 }
2724 2735
2725 if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && 2736 if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
2726 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) 2737 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
2727 return -1; 2738 return -1;
2728 return nr_moved; 2739 return ld_moved;
2729 2740
2730out_balanced: 2741out_balanced:
2731 schedstat_inc(sd, lb_balanced[idle]); 2742 schedstat_inc(sd, lb_balanced[idle]);
@@ -2757,7 +2768,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd)
2757 struct sched_group *group; 2768 struct sched_group *group;
2758 struct rq *busiest = NULL; 2769 struct rq *busiest = NULL;
2759 unsigned long imbalance; 2770 unsigned long imbalance;
2760 int nr_moved = 0; 2771 int ld_moved = 0;
2761 int sd_idle = 0; 2772 int sd_idle = 0;
2762 int all_pinned = 0; 2773 int all_pinned = 0;
2763 cpumask_t cpus = CPU_MASK_ALL; 2774 cpumask_t cpus = CPU_MASK_ALL;
@@ -2792,12 +2803,11 @@ redo:
2792 2803
2793 schedstat_add(sd, lb_imbalance[CPU_NEWLY_IDLE], imbalance); 2804 schedstat_add(sd, lb_imbalance[CPU_NEWLY_IDLE], imbalance);
2794 2805
2795 nr_moved = 0; 2806 ld_moved = 0;
2796 if (busiest->nr_running > 1) { 2807 if (busiest->nr_running > 1) {
2797 /* Attempt to move tasks */ 2808 /* Attempt to move tasks */
2798 double_lock_balance(this_rq, busiest); 2809 double_lock_balance(this_rq, busiest);
2799 nr_moved = move_tasks(this_rq, this_cpu, busiest, 2810 ld_moved = move_tasks(this_rq, this_cpu, busiest,
2800 minus_1_or_zero(busiest->nr_running),
2801 imbalance, sd, CPU_NEWLY_IDLE, 2811 imbalance, sd, CPU_NEWLY_IDLE,
2802 &all_pinned); 2812 &all_pinned);
2803 spin_unlock(&busiest->lock); 2813 spin_unlock(&busiest->lock);
@@ -2809,7 +2819,7 @@ redo:
2809 } 2819 }
2810 } 2820 }
2811 2821
2812 if (!nr_moved) { 2822 if (!ld_moved) {
2813 schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]); 2823 schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]);
2814 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && 2824 if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER &&
2815 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) 2825 !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE))
@@ -2817,7 +2827,7 @@ redo:
2817 } else 2827 } else
2818 sd->nr_balance_failed = 0; 2828 sd->nr_balance_failed = 0;
2819 2829
2820 return nr_moved; 2830 return ld_moved;
2821 2831
2822out_balanced: 2832out_balanced:
2823 schedstat_inc(sd, lb_balanced[CPU_NEWLY_IDLE]); 2833 schedstat_inc(sd, lb_balanced[CPU_NEWLY_IDLE]);
@@ -2905,8 +2915,8 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
2905 if (likely(sd)) { 2915 if (likely(sd)) {
2906 schedstat_inc(sd, alb_cnt); 2916 schedstat_inc(sd, alb_cnt);
2907 2917
2908 if (move_tasks(target_rq, target_cpu, busiest_rq, 1, 2918 if (move_one_task(target_rq, target_cpu, busiest_rq,
2909 ULONG_MAX, sd, CPU_IDLE, NULL)) 2919 sd, CPU_IDLE))
2910 schedstat_inc(sd, alb_pushed); 2920 schedstat_inc(sd, alb_pushed);
2911 else 2921 else
2912 schedstat_inc(sd, alb_failed); 2922 schedstat_inc(sd, alb_failed);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 9f401588d509..7307a37cf26f 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -944,11 +944,11 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq)
944 return p->prio; 944 return p->prio;
945} 945}
946 946
947static int 947static unsigned long
948load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, 948load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
949 unsigned long max_nr_move, unsigned long max_load_move, 949 unsigned long max_nr_move, unsigned long max_load_move,
950 struct sched_domain *sd, enum cpu_idle_type idle, 950 struct sched_domain *sd, enum cpu_idle_type idle,
951 int *all_pinned, unsigned long *total_load_moved) 951 int *all_pinned)
952{ 952{
953 struct cfs_rq *busy_cfs_rq; 953 struct cfs_rq *busy_cfs_rq;
954 unsigned long load_moved, total_nr_moved = 0, nr_moved; 954 unsigned long load_moved, total_nr_moved = 0, nr_moved;
@@ -1006,9 +1006,7 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest,
1006 break; 1006 break;
1007 } 1007 }
1008 1008
1009 *total_load_moved = max_load_move - rem_load_move; 1009 return max_load_move - rem_load_move;
1010
1011 return total_nr_moved;
1012} 1010}
1013 1011
1014/* 1012/*
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
index 41841e741c4a..1d8d9e13d950 100644
--- a/kernel/sched_idletask.c
+++ b/kernel/sched_idletask.c
@@ -37,11 +37,11 @@ static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, u64 now)
37{ 37{
38} 38}
39 39
40static int 40static unsigned long
41load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest, 41load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest,
42 unsigned long max_nr_move, unsigned long max_load_move, 42 unsigned long max_nr_move, unsigned long max_load_move,
43 struct sched_domain *sd, enum cpu_idle_type idle, 43 struct sched_domain *sd, enum cpu_idle_type idle,
44 int *all_pinned, unsigned long *total_load_moved) 44 int *all_pinned)
45{ 45{
46 return 0; 46 return 0;
47} 47}
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index 002fcf8d3f64..2b0626a43cb8 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -172,15 +172,16 @@ static struct task_struct *load_balance_next_rt(void *arg)
172 return p; 172 return p;
173} 173}
174 174
175static int 175static unsigned long
176load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, 176load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
177 unsigned long max_nr_move, unsigned long max_load_move, 177 unsigned long max_nr_move, unsigned long max_load_move,
178 struct sched_domain *sd, enum cpu_idle_type idle, 178 struct sched_domain *sd, enum cpu_idle_type idle,
179 int *all_pinned, unsigned long *load_moved) 179 int *all_pinned)
180{ 180{
181 int this_best_prio, best_prio, best_prio_seen = 0; 181 int this_best_prio, best_prio, best_prio_seen = 0;
182 int nr_moved; 182 int nr_moved;
183 struct rq_iterator rt_rq_iterator; 183 struct rq_iterator rt_rq_iterator;
184 unsigned long load_moved;
184 185
185 best_prio = sched_find_first_bit(busiest->rt.active.bitmap); 186 best_prio = sched_find_first_bit(busiest->rt.active.bitmap);
186 this_best_prio = sched_find_first_bit(this_rq->rt.active.bitmap); 187 this_best_prio = sched_find_first_bit(this_rq->rt.active.bitmap);
@@ -203,11 +204,11 @@ load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest,
203 rt_rq_iterator.arg = busiest; 204 rt_rq_iterator.arg = busiest;
204 205
205 nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move, 206 nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move,
206 max_load_move, sd, idle, all_pinned, load_moved, 207 max_load_move, sd, idle, all_pinned, &load_moved,
207 this_best_prio, best_prio, best_prio_seen, 208 this_best_prio, best_prio, best_prio_seen,
208 &rt_rq_iterator); 209 &rt_rq_iterator);
209 210
210 return nr_moved; 211 return load_moved;
211} 212}
212 213
213static void task_tick_rt(struct rq *rq, struct task_struct *p) 214static void task_tick_rt(struct rq *rq, struct task_struct *p)