aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/sched_fair.c
diff options
context:
space:
mode:
authorTejun Heo <tj@kernel.org>2010-05-06 12:49:21 -0400
committerTejun Heo <tj@kernel.org>2010-05-06 12:49:21 -0400
commit969c79215a35b06e5e3efe69b9412f858df7856c (patch)
tree4256378687c8bc2011ec35a3e28bc3b6473e912c /kernel/sched_fair.c
parent3fc1f1e27a5b807791d72e5d992aa33b668a6626 (diff)
sched: replace migration_thread with cpu_stop
Currently migration_thread is serving three purposes - migration pusher, context to execute active_load_balance() and forced context switcher for expedited RCU synchronize_sched. All three roles are hardcoded into migration_thread() and determining which job is scheduled is slightly messy. This patch kills migration_thread and replaces all three uses with cpu_stop. The three different roles of migration_thread() are splitted into three separate cpu_stop callbacks - migration_cpu_stop(), active_load_balance_cpu_stop() and synchronize_sched_expedited_cpu_stop() - and each use case now simply asks cpu_stop to execute the callback as necessary. synchronize_sched_expedited() was implemented with private preallocated resources and custom multi-cpu queueing and waiting logic, both of which are provided by cpu_stop. synchronize_sched_expedited_count is made atomic and all other shared resources along with the mutex are dropped. synchronize_sched_expedited() also implemented a check to detect cases where not all the callback got executed on their assigned cpus and fall back to synchronize_sched(). If called with cpu hotplug blocked, cpu_stop already guarantees that and the condition cannot happen; otherwise, stop_machine() would break. However, this patch preserves the paranoid check using a cpumask to record on which cpus the stopper ran so that it can serve as a bisection point if something actually goes wrong theree. Because the internal execution state is no longer visible, rcu_expedited_torture_stats() is removed. This patch also renames cpu_stop threads to from "stopper/%d" to "migration/%d". The names of these threads ultimately don't matter and there's no reason to make unnecessary userland visible changes. With this patch applied, stop_machine() and sched now share the same resources. stop_machine() is faster without wasting any resources and sched migration users are much cleaner. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Peter Zijlstra <peterz@infradead.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Dipankar Sarma <dipankar@in.ibm.com> Cc: Josh Triplett <josh@freedesktop.org> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Dimitri Sivanich <sivanich@sgi.com>
Diffstat (limited to 'kernel/sched_fair.c')
-rw-r--r--kernel/sched_fair.c48
1 files changed, 34 insertions, 14 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index cbd8b8a296d..217e4a9393e 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -2798,6 +2798,8 @@ static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle)
2798 return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2); 2798 return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
2799} 2799}
2800 2800
2801static int active_load_balance_cpu_stop(void *data);
2802
2801/* 2803/*
2802 * Check this_cpu to ensure it is balanced within domain. Attempt to move 2804 * Check this_cpu to ensure it is balanced within domain. Attempt to move
2803 * tasks if there is an imbalance. 2805 * tasks if there is an imbalance.
@@ -2887,8 +2889,9 @@ redo:
2887 if (need_active_balance(sd, sd_idle, idle)) { 2889 if (need_active_balance(sd, sd_idle, idle)) {
2888 raw_spin_lock_irqsave(&busiest->lock, flags); 2890 raw_spin_lock_irqsave(&busiest->lock, flags);
2889 2891
2890 /* don't kick the migration_thread, if the curr 2892 /* don't kick the active_load_balance_cpu_stop,
2891 * task on busiest cpu can't be moved to this_cpu 2893 * if the curr task on busiest cpu can't be
2894 * moved to this_cpu
2892 */ 2895 */
2893 if (!cpumask_test_cpu(this_cpu, 2896 if (!cpumask_test_cpu(this_cpu,
2894 &busiest->curr->cpus_allowed)) { 2897 &busiest->curr->cpus_allowed)) {
@@ -2898,14 +2901,22 @@ redo:
2898 goto out_one_pinned; 2901 goto out_one_pinned;
2899 } 2902 }
2900 2903
2904 /*
2905 * ->active_balance synchronizes accesses to
2906 * ->active_balance_work. Once set, it's cleared
2907 * only after active load balance is finished.
2908 */
2901 if (!busiest->active_balance) { 2909 if (!busiest->active_balance) {
2902 busiest->active_balance = 1; 2910 busiest->active_balance = 1;
2903 busiest->push_cpu = this_cpu; 2911 busiest->push_cpu = this_cpu;
2904 active_balance = 1; 2912 active_balance = 1;
2905 } 2913 }
2906 raw_spin_unlock_irqrestore(&busiest->lock, flags); 2914 raw_spin_unlock_irqrestore(&busiest->lock, flags);
2915
2907 if (active_balance) 2916 if (active_balance)
2908 wake_up_process(busiest->migration_thread); 2917 stop_one_cpu_nowait(cpu_of(busiest),
2918 active_load_balance_cpu_stop, busiest,
2919 &busiest->active_balance_work);
2909 2920
2910 /* 2921 /*
2911 * We've kicked active balancing, reset the failure 2922 * We've kicked active balancing, reset the failure
@@ -3012,24 +3023,29 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
3012} 3023}
3013 3024
3014/* 3025/*
3015 * active_load_balance is run by migration threads. It pushes running tasks 3026 * active_load_balance_cpu_stop is run by cpu stopper. It pushes
3016 * off the busiest CPU onto idle CPUs. It requires at least 1 task to be 3027 * running tasks off the busiest CPU onto idle CPUs. It requires at
3017 * running on each physical CPU where possible, and avoids physical / 3028 * least 1 task to be running on each physical CPU where possible, and
3018 * logical imbalances. 3029 * avoids physical / logical imbalances.
3019 *
3020 * Called with busiest_rq locked.
3021 */ 3030 */
3022static void active_load_balance(struct rq *busiest_rq, int busiest_cpu) 3031static int active_load_balance_cpu_stop(void *data)
3023{ 3032{
3033 struct rq *busiest_rq = data;
3034 int busiest_cpu = cpu_of(busiest_rq);
3024 int target_cpu = busiest_rq->push_cpu; 3035 int target_cpu = busiest_rq->push_cpu;
3036 struct rq *target_rq = cpu_rq(target_cpu);
3025 struct sched_domain *sd; 3037 struct sched_domain *sd;
3026 struct rq *target_rq; 3038
3039 raw_spin_lock_irq(&busiest_rq->lock);
3040
3041 /* make sure the requested cpu hasn't gone down in the meantime */
3042 if (unlikely(busiest_cpu != smp_processor_id() ||
3043 !busiest_rq->active_balance))
3044 goto out_unlock;
3027 3045
3028 /* Is there any task to move? */ 3046 /* Is there any task to move? */
3029 if (busiest_rq->nr_running <= 1) 3047 if (busiest_rq->nr_running <= 1)
3030 return; 3048 goto out_unlock;
3031
3032 target_rq = cpu_rq(target_cpu);
3033 3049
3034 /* 3050 /*
3035 * This condition is "impossible", if it occurs 3051 * This condition is "impossible", if it occurs
@@ -3058,6 +3074,10 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
3058 schedstat_inc(sd, alb_failed); 3074 schedstat_inc(sd, alb_failed);
3059 } 3075 }
3060 double_unlock_balance(busiest_rq, target_rq); 3076 double_unlock_balance(busiest_rq, target_rq);
3077out_unlock:
3078 busiest_rq->active_balance = 0;
3079 raw_spin_unlock_irq(&busiest_rq->lock);
3080 return 0;
3061} 3081}
3062 3082
3063#ifdef CONFIG_NO_HZ 3083#ifdef CONFIG_NO_HZ