diff options
author | Peter Williams <pwil3058@bigpond.net.au> | 2007-08-09 05:16:46 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2007-08-09 05:16:46 -0400 |
commit | 4301065920b0cbde3986519582347e883b166f3e (patch) | |
tree | 415b8e3a2796709673015e100a3b5b28e556104a | |
parent | f1a438d813d416fa9f4be4e6dbd10b54c5938d89 (diff) |
sched: simplify move_tasks()
The move_tasks() function is currently multiplexed with two distinct
capabilities:
1. attempt to move a specified amount of weighted load from one run
queue to another; and
2. attempt to move a specified number of tasks from one run queue to
another.
The first of these capabilities is used in two places, load_balance()
and load_balance_idle(), and in both of these cases the return value of
move_tasks() is used purely to decide if tasks/load were moved and no
notice of the actual number of tasks moved is taken.
The second capability is used in exactly one place,
active_load_balance(), to attempt to move exactly one task and, as
before, the return value is only used as an indicator of success or failure.
This multiplexing of sched_task() was introduced, by me, as part of the
smpnice patches and was motivated by the fact that the alternative, one
function to move specified load and one to move a single task, would
have led to two functions of roughly the same complexity as the old
move_tasks() (or the new balance_tasks()). However, the new modular
design of the new CFS scheduler allows a simpler solution to be adopted
and this patch addresses that solution by:
1. adding a new function, move_one_task(), to be used by
active_load_balance(); and
2. making move_tasks() a single purpose function that tries to move a
specified weighted load and returns 1 for success and 0 for failure.
One of the consequences of these changes is that neither move_one_task()
or the new move_tasks() care how many tasks sched_class.load_balance()
moves and this enables its interface to be simplified by returning the
amount of load moved as its result and removing the load_moved pointer
from the argument list. This helps simplify the new move_tasks() and
slightly reduces the amount of work done in each of
sched_class.load_balance()'s implementations.
Further simplification, e.g. changes to balance_tasks(), are possible
but (slightly) complicated by the special needs of load_balance_fair()
so I've left them to a later patch (if this one gets accepted).
NB Since move_tasks() gets called with two run queue locks held even
small reductions in overhead are worthwhile.
[ mingo@elte.hu ]
this change also reduces code size nicely:
text data bss dec hex filename
39216 3618 24 42858 a76a sched.o.before
39173 3618 24 42815 a73f sched.o.after
Signed-off-by: Peter Williams <pwil3058@bigpond.net.au>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | include/linux/sched.h | 4 | ||||
-rw-r--r-- | kernel/sched.c | 82 | ||||
-rw-r--r-- | kernel/sched_fair.c | 8 | ||||
-rw-r--r-- | kernel/sched_idletask.c | 4 | ||||
-rw-r--r-- | kernel/sched_rt.c | 9 |
5 files changed, 58 insertions, 49 deletions
diff --git a/include/linux/sched.h b/include/linux/sched.h index 17249fae5014..24bce423f10d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h | |||
@@ -866,11 +866,11 @@ struct sched_class { | |||
866 | struct task_struct * (*pick_next_task) (struct rq *rq, u64 now); | 866 | struct task_struct * (*pick_next_task) (struct rq *rq, u64 now); |
867 | void (*put_prev_task) (struct rq *rq, struct task_struct *p, u64 now); | 867 | void (*put_prev_task) (struct rq *rq, struct task_struct *p, u64 now); |
868 | 868 | ||
869 | int (*load_balance) (struct rq *this_rq, int this_cpu, | 869 | unsigned long (*load_balance) (struct rq *this_rq, int this_cpu, |
870 | struct rq *busiest, | 870 | struct rq *busiest, |
871 | unsigned long max_nr_move, unsigned long max_load_move, | 871 | unsigned long max_nr_move, unsigned long max_load_move, |
872 | struct sched_domain *sd, enum cpu_idle_type idle, | 872 | struct sched_domain *sd, enum cpu_idle_type idle, |
873 | int *all_pinned, unsigned long *total_load_moved); | 873 | int *all_pinned); |
874 | 874 | ||
875 | void (*set_curr_task) (struct rq *rq); | 875 | void (*set_curr_task) (struct rq *rq); |
876 | void (*task_tick) (struct rq *rq, struct task_struct *p); | 876 | void (*task_tick) (struct rq *rq, struct task_struct *p); |
diff --git a/kernel/sched.c b/kernel/sched.c index 4680f52974e3..42029634ef5a 100644 --- a/kernel/sched.c +++ b/kernel/sched.c | |||
@@ -2231,32 +2231,49 @@ out: | |||
2231 | } | 2231 | } |
2232 | 2232 | ||
2233 | /* | 2233 | /* |
2234 | * move_tasks tries to move up to max_nr_move tasks and max_load_move weighted | 2234 | * move_tasks tries to move up to max_load_move weighted load from busiest to |
2235 | * load from busiest to this_rq, as part of a balancing operation within | 2235 | * this_rq, as part of a balancing operation within domain "sd". |
2236 | * "domain". Returns the number of tasks moved. | 2236 | * Returns 1 if successful and 0 otherwise. |
2237 | * | 2237 | * |
2238 | * Called with both runqueues locked. | 2238 | * Called with both runqueues locked. |
2239 | */ | 2239 | */ |
2240 | static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, | 2240 | static int move_tasks(struct rq *this_rq, int this_cpu, struct rq *busiest, |
2241 | unsigned long max_nr_move, unsigned long max_load_move, | 2241 | unsigned long max_load_move, |
2242 | struct sched_domain *sd, enum cpu_idle_type idle, | 2242 | struct sched_domain *sd, enum cpu_idle_type idle, |
2243 | int *all_pinned) | 2243 | int *all_pinned) |
2244 | { | 2244 | { |
2245 | struct sched_class *class = sched_class_highest; | 2245 | struct sched_class *class = sched_class_highest; |
2246 | unsigned long load_moved, total_nr_moved = 0, nr_moved; | 2246 | unsigned long total_load_moved = 0; |
2247 | long rem_load_move = max_load_move; | ||
2248 | 2247 | ||
2249 | do { | 2248 | do { |
2250 | nr_moved = class->load_balance(this_rq, this_cpu, busiest, | 2249 | total_load_moved += |
2251 | max_nr_move, (unsigned long)rem_load_move, | 2250 | class->load_balance(this_rq, this_cpu, busiest, |
2252 | sd, idle, all_pinned, &load_moved); | 2251 | ULONG_MAX, max_load_move - total_load_moved, |
2253 | total_nr_moved += nr_moved; | 2252 | sd, idle, all_pinned); |
2254 | max_nr_move -= nr_moved; | ||
2255 | rem_load_move -= load_moved; | ||
2256 | class = class->next; | 2253 | class = class->next; |
2257 | } while (class && max_nr_move && rem_load_move > 0); | 2254 | } while (class && max_load_move > total_load_moved); |
2258 | 2255 | ||
2259 | return total_nr_moved; | 2256 | return total_load_moved > 0; |
2257 | } | ||
2258 | |||
2259 | /* | ||
2260 | * move_one_task tries to move exactly one task from busiest to this_rq, as | ||
2261 | * part of active balancing operations within "domain". | ||
2262 | * Returns 1 if successful and 0 otherwise. | ||
2263 | * | ||
2264 | * Called with both runqueues locked. | ||
2265 | */ | ||
2266 | static int move_one_task(struct rq *this_rq, int this_cpu, struct rq *busiest, | ||
2267 | struct sched_domain *sd, enum cpu_idle_type idle) | ||
2268 | { | ||
2269 | struct sched_class *class; | ||
2270 | |||
2271 | for (class = sched_class_highest; class; class = class->next) | ||
2272 | if (class->load_balance(this_rq, this_cpu, busiest, | ||
2273 | 1, ULONG_MAX, sd, idle, NULL)) | ||
2274 | return 1; | ||
2275 | |||
2276 | return 0; | ||
2260 | } | 2277 | } |
2261 | 2278 | ||
2262 | /* | 2279 | /* |
@@ -2588,11 +2605,6 @@ find_busiest_queue(struct sched_group *group, enum cpu_idle_type idle, | |||
2588 | */ | 2605 | */ |
2589 | #define MAX_PINNED_INTERVAL 512 | 2606 | #define MAX_PINNED_INTERVAL 512 |
2590 | 2607 | ||
2591 | static inline unsigned long minus_1_or_zero(unsigned long n) | ||
2592 | { | ||
2593 | return n > 0 ? n - 1 : 0; | ||
2594 | } | ||
2595 | |||
2596 | /* | 2608 | /* |
2597 | * Check this_cpu to ensure it is balanced within domain. Attempt to move | 2609 | * Check this_cpu to ensure it is balanced within domain. Attempt to move |
2598 | * tasks if there is an imbalance. | 2610 | * tasks if there is an imbalance. |
@@ -2601,7 +2613,7 @@ static int load_balance(int this_cpu, struct rq *this_rq, | |||
2601 | struct sched_domain *sd, enum cpu_idle_type idle, | 2613 | struct sched_domain *sd, enum cpu_idle_type idle, |
2602 | int *balance) | 2614 | int *balance) |
2603 | { | 2615 | { |
2604 | int nr_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; | 2616 | int ld_moved, all_pinned = 0, active_balance = 0, sd_idle = 0; |
2605 | struct sched_group *group; | 2617 | struct sched_group *group; |
2606 | unsigned long imbalance; | 2618 | unsigned long imbalance; |
2607 | struct rq *busiest; | 2619 | struct rq *busiest; |
@@ -2642,18 +2654,17 @@ redo: | |||
2642 | 2654 | ||
2643 | schedstat_add(sd, lb_imbalance[idle], imbalance); | 2655 | schedstat_add(sd, lb_imbalance[idle], imbalance); |
2644 | 2656 | ||
2645 | nr_moved = 0; | 2657 | ld_moved = 0; |
2646 | if (busiest->nr_running > 1) { | 2658 | if (busiest->nr_running > 1) { |
2647 | /* | 2659 | /* |
2648 | * Attempt to move tasks. If find_busiest_group has found | 2660 | * Attempt to move tasks. If find_busiest_group has found |
2649 | * an imbalance but busiest->nr_running <= 1, the group is | 2661 | * an imbalance but busiest->nr_running <= 1, the group is |
2650 | * still unbalanced. nr_moved simply stays zero, so it is | 2662 | * still unbalanced. ld_moved simply stays zero, so it is |
2651 | * correctly treated as an imbalance. | 2663 | * correctly treated as an imbalance. |
2652 | */ | 2664 | */ |
2653 | local_irq_save(flags); | 2665 | local_irq_save(flags); |
2654 | double_rq_lock(this_rq, busiest); | 2666 | double_rq_lock(this_rq, busiest); |
2655 | nr_moved = move_tasks(this_rq, this_cpu, busiest, | 2667 | ld_moved = move_tasks(this_rq, this_cpu, busiest, |
2656 | minus_1_or_zero(busiest->nr_running), | ||
2657 | imbalance, sd, idle, &all_pinned); | 2668 | imbalance, sd, idle, &all_pinned); |
2658 | double_rq_unlock(this_rq, busiest); | 2669 | double_rq_unlock(this_rq, busiest); |
2659 | local_irq_restore(flags); | 2670 | local_irq_restore(flags); |
@@ -2661,7 +2672,7 @@ redo: | |||
2661 | /* | 2672 | /* |
2662 | * some other cpu did the load balance for us. | 2673 | * some other cpu did the load balance for us. |
2663 | */ | 2674 | */ |
2664 | if (nr_moved && this_cpu != smp_processor_id()) | 2675 | if (ld_moved && this_cpu != smp_processor_id()) |
2665 | resched_cpu(this_cpu); | 2676 | resched_cpu(this_cpu); |
2666 | 2677 | ||
2667 | /* All tasks on this runqueue were pinned by CPU affinity */ | 2678 | /* All tasks on this runqueue were pinned by CPU affinity */ |
@@ -2673,7 +2684,7 @@ redo: | |||
2673 | } | 2684 | } |
2674 | } | 2685 | } |
2675 | 2686 | ||
2676 | if (!nr_moved) { | 2687 | if (!ld_moved) { |
2677 | schedstat_inc(sd, lb_failed[idle]); | 2688 | schedstat_inc(sd, lb_failed[idle]); |
2678 | sd->nr_balance_failed++; | 2689 | sd->nr_balance_failed++; |
2679 | 2690 | ||
@@ -2722,10 +2733,10 @@ redo: | |||
2722 | sd->balance_interval *= 2; | 2733 | sd->balance_interval *= 2; |
2723 | } | 2734 | } |
2724 | 2735 | ||
2725 | if (!nr_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && | 2736 | if (!ld_moved && !sd_idle && sd->flags & SD_SHARE_CPUPOWER && |
2726 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | 2737 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) |
2727 | return -1; | 2738 | return -1; |
2728 | return nr_moved; | 2739 | return ld_moved; |
2729 | 2740 | ||
2730 | out_balanced: | 2741 | out_balanced: |
2731 | schedstat_inc(sd, lb_balanced[idle]); | 2742 | schedstat_inc(sd, lb_balanced[idle]); |
@@ -2757,7 +2768,7 @@ load_balance_newidle(int this_cpu, struct rq *this_rq, struct sched_domain *sd) | |||
2757 | struct sched_group *group; | 2768 | struct sched_group *group; |
2758 | struct rq *busiest = NULL; | 2769 | struct rq *busiest = NULL; |
2759 | unsigned long imbalance; | 2770 | unsigned long imbalance; |
2760 | int nr_moved = 0; | 2771 | int ld_moved = 0; |
2761 | int sd_idle = 0; | 2772 | int sd_idle = 0; |
2762 | int all_pinned = 0; | 2773 | int all_pinned = 0; |
2763 | cpumask_t cpus = CPU_MASK_ALL; | 2774 | cpumask_t cpus = CPU_MASK_ALL; |
@@ -2792,12 +2803,11 @@ redo: | |||
2792 | 2803 | ||
2793 | schedstat_add(sd, lb_imbalance[CPU_NEWLY_IDLE], imbalance); | 2804 | schedstat_add(sd, lb_imbalance[CPU_NEWLY_IDLE], imbalance); |
2794 | 2805 | ||
2795 | nr_moved = 0; | 2806 | ld_moved = 0; |
2796 | if (busiest->nr_running > 1) { | 2807 | if (busiest->nr_running > 1) { |
2797 | /* Attempt to move tasks */ | 2808 | /* Attempt to move tasks */ |
2798 | double_lock_balance(this_rq, busiest); | 2809 | double_lock_balance(this_rq, busiest); |
2799 | nr_moved = move_tasks(this_rq, this_cpu, busiest, | 2810 | ld_moved = move_tasks(this_rq, this_cpu, busiest, |
2800 | minus_1_or_zero(busiest->nr_running), | ||
2801 | imbalance, sd, CPU_NEWLY_IDLE, | 2811 | imbalance, sd, CPU_NEWLY_IDLE, |
2802 | &all_pinned); | 2812 | &all_pinned); |
2803 | spin_unlock(&busiest->lock); | 2813 | spin_unlock(&busiest->lock); |
@@ -2809,7 +2819,7 @@ redo: | |||
2809 | } | 2819 | } |
2810 | } | 2820 | } |
2811 | 2821 | ||
2812 | if (!nr_moved) { | 2822 | if (!ld_moved) { |
2813 | schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]); | 2823 | schedstat_inc(sd, lb_failed[CPU_NEWLY_IDLE]); |
2814 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && | 2824 | if (!sd_idle && sd->flags & SD_SHARE_CPUPOWER && |
2815 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) | 2825 | !test_sd_parent(sd, SD_POWERSAVINGS_BALANCE)) |
@@ -2817,7 +2827,7 @@ redo: | |||
2817 | } else | 2827 | } else |
2818 | sd->nr_balance_failed = 0; | 2828 | sd->nr_balance_failed = 0; |
2819 | 2829 | ||
2820 | return nr_moved; | 2830 | return ld_moved; |
2821 | 2831 | ||
2822 | out_balanced: | 2832 | out_balanced: |
2823 | schedstat_inc(sd, lb_balanced[CPU_NEWLY_IDLE]); | 2833 | schedstat_inc(sd, lb_balanced[CPU_NEWLY_IDLE]); |
@@ -2905,8 +2915,8 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) | |||
2905 | if (likely(sd)) { | 2915 | if (likely(sd)) { |
2906 | schedstat_inc(sd, alb_cnt); | 2916 | schedstat_inc(sd, alb_cnt); |
2907 | 2917 | ||
2908 | if (move_tasks(target_rq, target_cpu, busiest_rq, 1, | 2918 | if (move_one_task(target_rq, target_cpu, busiest_rq, |
2909 | ULONG_MAX, sd, CPU_IDLE, NULL)) | 2919 | sd, CPU_IDLE)) |
2910 | schedstat_inc(sd, alb_pushed); | 2920 | schedstat_inc(sd, alb_pushed); |
2911 | else | 2921 | else |
2912 | schedstat_inc(sd, alb_failed); | 2922 | schedstat_inc(sd, alb_failed); |
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 9f401588d509..7307a37cf26f 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -944,11 +944,11 @@ static int cfs_rq_best_prio(struct cfs_rq *cfs_rq) | |||
944 | return p->prio; | 944 | return p->prio; |
945 | } | 945 | } |
946 | 946 | ||
947 | static int | 947 | static unsigned long |
948 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | 948 | load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, |
949 | unsigned long max_nr_move, unsigned long max_load_move, | 949 | unsigned long max_nr_move, unsigned long max_load_move, |
950 | struct sched_domain *sd, enum cpu_idle_type idle, | 950 | struct sched_domain *sd, enum cpu_idle_type idle, |
951 | int *all_pinned, unsigned long *total_load_moved) | 951 | int *all_pinned) |
952 | { | 952 | { |
953 | struct cfs_rq *busy_cfs_rq; | 953 | struct cfs_rq *busy_cfs_rq; |
954 | unsigned long load_moved, total_nr_moved = 0, nr_moved; | 954 | unsigned long load_moved, total_nr_moved = 0, nr_moved; |
@@ -1006,9 +1006,7 @@ load_balance_fair(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
1006 | break; | 1006 | break; |
1007 | } | 1007 | } |
1008 | 1008 | ||
1009 | *total_load_moved = max_load_move - rem_load_move; | 1009 | return max_load_move - rem_load_move; |
1010 | |||
1011 | return total_nr_moved; | ||
1012 | } | 1010 | } |
1013 | 1011 | ||
1014 | /* | 1012 | /* |
diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c index 41841e741c4a..1d8d9e13d950 100644 --- a/kernel/sched_idletask.c +++ b/kernel/sched_idletask.c | |||
@@ -37,11 +37,11 @@ static void put_prev_task_idle(struct rq *rq, struct task_struct *prev, u64 now) | |||
37 | { | 37 | { |
38 | } | 38 | } |
39 | 39 | ||
40 | static int | 40 | static unsigned long |
41 | load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest, | 41 | load_balance_idle(struct rq *this_rq, int this_cpu, struct rq *busiest, |
42 | unsigned long max_nr_move, unsigned long max_load_move, | 42 | unsigned long max_nr_move, unsigned long max_load_move, |
43 | struct sched_domain *sd, enum cpu_idle_type idle, | 43 | struct sched_domain *sd, enum cpu_idle_type idle, |
44 | int *all_pinned, unsigned long *total_load_moved) | 44 | int *all_pinned) |
45 | { | 45 | { |
46 | return 0; | 46 | return 0; |
47 | } | 47 | } |
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index 002fcf8d3f64..2b0626a43cb8 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c | |||
@@ -172,15 +172,16 @@ static struct task_struct *load_balance_next_rt(void *arg) | |||
172 | return p; | 172 | return p; |
173 | } | 173 | } |
174 | 174 | ||
175 | static int | 175 | static unsigned long |
176 | load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, | 176 | load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, |
177 | unsigned long max_nr_move, unsigned long max_load_move, | 177 | unsigned long max_nr_move, unsigned long max_load_move, |
178 | struct sched_domain *sd, enum cpu_idle_type idle, | 178 | struct sched_domain *sd, enum cpu_idle_type idle, |
179 | int *all_pinned, unsigned long *load_moved) | 179 | int *all_pinned) |
180 | { | 180 | { |
181 | int this_best_prio, best_prio, best_prio_seen = 0; | 181 | int this_best_prio, best_prio, best_prio_seen = 0; |
182 | int nr_moved; | 182 | int nr_moved; |
183 | struct rq_iterator rt_rq_iterator; | 183 | struct rq_iterator rt_rq_iterator; |
184 | unsigned long load_moved; | ||
184 | 185 | ||
185 | best_prio = sched_find_first_bit(busiest->rt.active.bitmap); | 186 | best_prio = sched_find_first_bit(busiest->rt.active.bitmap); |
186 | this_best_prio = sched_find_first_bit(this_rq->rt.active.bitmap); | 187 | this_best_prio = sched_find_first_bit(this_rq->rt.active.bitmap); |
@@ -203,11 +204,11 @@ load_balance_rt(struct rq *this_rq, int this_cpu, struct rq *busiest, | |||
203 | rt_rq_iterator.arg = busiest; | 204 | rt_rq_iterator.arg = busiest; |
204 | 205 | ||
205 | nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move, | 206 | nr_moved = balance_tasks(this_rq, this_cpu, busiest, max_nr_move, |
206 | max_load_move, sd, idle, all_pinned, load_moved, | 207 | max_load_move, sd, idle, all_pinned, &load_moved, |
207 | this_best_prio, best_prio, best_prio_seen, | 208 | this_best_prio, best_prio, best_prio_seen, |
208 | &rt_rq_iterator); | 209 | &rt_rq_iterator); |
209 | 210 | ||
210 | return nr_moved; | 211 | return load_moved; |
211 | } | 212 | } |
212 | 213 | ||
213 | static void task_tick_rt(struct rq *rq, struct task_struct *p) | 214 | static void task_tick_rt(struct rq *rq, struct task_struct *p) |