diff options
author | Tejun Heo <tj@kernel.org> | 2010-05-06 12:49:21 -0400 |
---|---|---|
committer | Tejun Heo <tj@kernel.org> | 2010-05-06 12:49:21 -0400 |
commit | 969c79215a35b06e5e3efe69b9412f858df7856c (patch) | |
tree | 4256378687c8bc2011ec35a3e28bc3b6473e912c /kernel/sched_fair.c | |
parent | 3fc1f1e27a5b807791d72e5d992aa33b668a6626 (diff) |
sched: replace migration_thread with cpu_stop
Currently migration_thread is serving three purposes - migration
pusher, context to execute active_load_balance() and forced context
switcher for expedited RCU synchronize_sched. All three roles are
hardcoded into migration_thread() and determining which job is
scheduled is slightly messy.
This patch kills migration_thread and replaces all three uses with
cpu_stop. The three different roles of migration_thread() are
splitted into three separate cpu_stop callbacks -
migration_cpu_stop(), active_load_balance_cpu_stop() and
synchronize_sched_expedited_cpu_stop() - and each use case now simply
asks cpu_stop to execute the callback as necessary.
synchronize_sched_expedited() was implemented with private
preallocated resources and custom multi-cpu queueing and waiting
logic, both of which are provided by cpu_stop.
synchronize_sched_expedited_count is made atomic and all other shared
resources along with the mutex are dropped.
synchronize_sched_expedited() also implemented a check to detect cases
where not all the callback got executed on their assigned cpus and
fall back to synchronize_sched(). If called with cpu hotplug blocked,
cpu_stop already guarantees that and the condition cannot happen;
otherwise, stop_machine() would break. However, this patch preserves
the paranoid check using a cpumask to record on which cpus the stopper
ran so that it can serve as a bisection point if something actually
goes wrong theree.
Because the internal execution state is no longer visible,
rcu_expedited_torture_stats() is removed.
This patch also renames cpu_stop threads to from "stopper/%d" to
"migration/%d". The names of these threads ultimately don't matter
and there's no reason to make unnecessary userland visible changes.
With this patch applied, stop_machine() and sched now share the same
resources. stop_machine() is faster without wasting any resources and
sched migration users are much cleaner.
Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Dipankar Sarma <dipankar@in.ibm.com>
Cc: Josh Triplett <josh@freedesktop.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Dimitri Sivanich <sivanich@sgi.com>
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r-- | kernel/sched_fair.c | 48 |
1 files changed, 34 insertions, 14 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index cbd8b8a296d..217e4a9393e 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c | |||
@@ -2798,6 +2798,8 @@ static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle) | |||
2798 | return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2); | 2798 | return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2); |
2799 | } | 2799 | } |
2800 | 2800 | ||
2801 | static int active_load_balance_cpu_stop(void *data); | ||
2802 | |||
2801 | /* | 2803 | /* |
2802 | * Check this_cpu to ensure it is balanced within domain. Attempt to move | 2804 | * Check this_cpu to ensure it is balanced within domain. Attempt to move |
2803 | * tasks if there is an imbalance. | 2805 | * tasks if there is an imbalance. |
@@ -2887,8 +2889,9 @@ redo: | |||
2887 | if (need_active_balance(sd, sd_idle, idle)) { | 2889 | if (need_active_balance(sd, sd_idle, idle)) { |
2888 | raw_spin_lock_irqsave(&busiest->lock, flags); | 2890 | raw_spin_lock_irqsave(&busiest->lock, flags); |
2889 | 2891 | ||
2890 | /* don't kick the migration_thread, if the curr | 2892 | /* don't kick the active_load_balance_cpu_stop, |
2891 | * task on busiest cpu can't be moved to this_cpu | 2893 | * if the curr task on busiest cpu can't be |
2894 | * moved to this_cpu | ||
2892 | */ | 2895 | */ |
2893 | if (!cpumask_test_cpu(this_cpu, | 2896 | if (!cpumask_test_cpu(this_cpu, |
2894 | &busiest->curr->cpus_allowed)) { | 2897 | &busiest->curr->cpus_allowed)) { |
@@ -2898,14 +2901,22 @@ redo: | |||
2898 | goto out_one_pinned; | 2901 | goto out_one_pinned; |
2899 | } | 2902 | } |
2900 | 2903 | ||
2904 | /* | ||
2905 | * ->active_balance synchronizes accesses to | ||
2906 | * ->active_balance_work. Once set, it's cleared | ||
2907 | * only after active load balance is finished. | ||
2908 | */ | ||
2901 | if (!busiest->active_balance) { | 2909 | if (!busiest->active_balance) { |
2902 | busiest->active_balance = 1; | 2910 | busiest->active_balance = 1; |
2903 | busiest->push_cpu = this_cpu; | 2911 | busiest->push_cpu = this_cpu; |
2904 | active_balance = 1; | 2912 | active_balance = 1; |
2905 | } | 2913 | } |
2906 | raw_spin_unlock_irqrestore(&busiest->lock, flags); | 2914 | raw_spin_unlock_irqrestore(&busiest->lock, flags); |
2915 | |||
2907 | if (active_balance) | 2916 | if (active_balance) |
2908 | wake_up_process(busiest->migration_thread); | 2917 | stop_one_cpu_nowait(cpu_of(busiest), |
2918 | active_load_balance_cpu_stop, busiest, | ||
2919 | &busiest->active_balance_work); | ||
2909 | 2920 | ||
2910 | /* | 2921 | /* |
2911 | * We've kicked active balancing, reset the failure | 2922 | * We've kicked active balancing, reset the failure |
@@ -3012,24 +3023,29 @@ static void idle_balance(int this_cpu, struct rq *this_rq) | |||
3012 | } | 3023 | } |
3013 | 3024 | ||
3014 | /* | 3025 | /* |
3015 | * active_load_balance is run by migration threads. It pushes running tasks | 3026 | * active_load_balance_cpu_stop is run by cpu stopper. It pushes |
3016 | * off the busiest CPU onto idle CPUs. It requires at least 1 task to be | 3027 | * running tasks off the busiest CPU onto idle CPUs. It requires at |
3017 | * running on each physical CPU where possible, and avoids physical / | 3028 | * least 1 task to be running on each physical CPU where possible, and |
3018 | * logical imbalances. | 3029 | * avoids physical / logical imbalances. |
3019 | * | ||
3020 | * Called with busiest_rq locked. | ||
3021 | */ | 3030 | */ |
3022 | static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) | 3031 | static int active_load_balance_cpu_stop(void *data) |
3023 | { | 3032 | { |
3033 | struct rq *busiest_rq = data; | ||
3034 | int busiest_cpu = cpu_of(busiest_rq); | ||
3024 | int target_cpu = busiest_rq->push_cpu; | 3035 | int target_cpu = busiest_rq->push_cpu; |
3036 | struct rq *target_rq = cpu_rq(target_cpu); | ||
3025 | struct sched_domain *sd; | 3037 | struct sched_domain *sd; |
3026 | struct rq *target_rq; | 3038 | |
3039 | raw_spin_lock_irq(&busiest_rq->lock); | ||
3040 | |||
3041 | /* make sure the requested cpu hasn't gone down in the meantime */ | ||
3042 | if (unlikely(busiest_cpu != smp_processor_id() || | ||
3043 | !busiest_rq->active_balance)) | ||
3044 | goto out_unlock; | ||
3027 | 3045 | ||
3028 | /* Is there any task to move? */ | 3046 | /* Is there any task to move? */ |
3029 | if (busiest_rq->nr_running <= 1) | 3047 | if (busiest_rq->nr_running <= 1) |
3030 | return; | 3048 | goto out_unlock; |
3031 | |||
3032 | target_rq = cpu_rq(target_cpu); | ||
3033 | 3049 | ||
3034 | /* | 3050 | /* |
3035 | * This condition is "impossible", if it occurs | 3051 | * This condition is "impossible", if it occurs |
@@ -3058,6 +3074,10 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) | |||
3058 | schedstat_inc(sd, alb_failed); | 3074 | schedstat_inc(sd, alb_failed); |
3059 | } | 3075 | } |
3060 | double_unlock_balance(busiest_rq, target_rq); | 3076 | double_unlock_balance(busiest_rq, target_rq); |
3077 | out_unlock: | ||
3078 | busiest_rq->active_balance = 0; | ||
3079 | raw_spin_unlock_irq(&busiest_rq->lock); | ||
3080 | return 0; | ||
3061 | } | 3081 | } |
3062 | 3082 | ||
3063 | #ifdef CONFIG_NO_HZ | 3083 | #ifdef CONFIG_NO_HZ |