sched: replace migration_thread with cpu_stop

Currently migration_thread is serving three purposes - migration pusher, context to execute active_load_balance() and forced context switcher for expedited RCU synchronize_sched. All three roles are hardcoded into migration_thread() and determining which job is scheduled is slightly messy. This patch kills migration_thread and replaces all three uses with cpu_stop. The three different roles of migration_thread() are splitted into three separate cpu_stop callbacks - migration_cpu_stop(), active_load_balance_cpu_stop() and synchronize_sched_expedited_cpu_stop() - and each use case now simply asks cpu_stop to execute the callback as necessary. synchronize_sched_expedited() was implemented with private preallocated resources and custom multi-cpu queueing and waiting logic, both of which are provided by cpu_stop. synchronize_sched_expedited_count is made atomic and all other shared resources along with the mutex are dropped. synchronize_sched_expedited() also implemented a check to detect cases where not all the callback got executed on their assigned cpus and fall back to synchronize_sched(). If called with cpu hotplug blocked, cpu_stop already guarantees that and the condition cannot happen; otherwise, stop_machine() would break. However, this patch preserves the paranoid check using a cpumask to record on which cpus the stopper ran so that it can serve as a bisection point if something actually goes wrong theree. Because the internal execution state is no longer visible, rcu_expedited_torture_stats() is removed. This patch also renames cpu_stop threads to from "stopper/%d" to "migration/%d". The names of these threads ultimately don't matter and there's no reason to make unnecessary userland visible changes. With this patch applied, stop_machine() and sched now share the same resources. stop_machine() is faster without wasting any resources and sched migration users are much cleaner. Signed-off-by: Tejun Heo <tj@kernel.org> Acked-by: Peter Zijlstra <peterz@infradead.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Dipankar Sarma <dipankar@in.ibm.com> Cc: Josh Triplett <josh@freedesktop.org> Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: Dimitri Sivanich <sivanich@sgi.com>
author: Tejun Heo <tj@kernel.org> 2010-05-06 12:49:21 -0400
committer: Tejun Heo <tj@kernel.org> 2010-05-06 12:49:21 -0400
commit: 969c79215a35b06e5e3efe69b9412f858df7856c (patch)
tree: 4256378687c8bc2011ec35a3e28bc3b6473e912c /kernel/sched_fair.c
parent: 3fc1f1e27a5b807791d72e5d992aa33b668a6626 (diff)
1 files changed, 34 insertions, 14 deletions
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index cbd8b8a296d1..217e4a9393e4 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -2798,6 +2798,8 @@ static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle)
        return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
 }
+static int active_load_balance_cpu_stop(void *data);
 /*
 * Check this_cpu to ensure it is balanced within domain. Attempt to move
 * tasks if there is an imbalance.
@@ -2887,8 +2889,9 @@ redo:
                if (need_active_balance(sd, sd_idle, idle)) {
                        raw_spin_lock_irqsave(&busiest->lock, flags);
-                        /* don't kick the migration_thread, if the curr
+                        /* don't kick the active_load_balance_cpu_stop,
-                         * task on busiest cpu can't be moved to this_cpu
+                         * if the curr task on busiest cpu can't be
+                         * moved to this_cpu
                         */
                        if (!cpumask_test_cpu(this_cpu,
                                              &busiest->curr->cpus_allowed)) {
@@ -2898,14 +2901,22 @@ redo:
                                goto out_one_pinned;
                        }
+                        /*
+                         * ->active_balance synchronizes accesses to
+                         * ->active_balance_work.  Once set, it's cleared
+                         * only after active load balance is finished.
+                         */
                        if (!busiest->active_balance) {
                                busiest->active_balance = 1;
                                busiest->push_cpu = this_cpu;
                                active_balance = 1;
                        }
                        raw_spin_unlock_irqrestore(&busiest->lock, flags);
                        if (active_balance)
-                                wake_up_process(busiest->migration_thread);
+                                stop_one_cpu_nowait(cpu_of(busiest),
+                                        active_load_balance_cpu_stop, busiest,
+                                        &busiest->active_balance_work);
                        /*
                         * We've kicked active balancing, reset the failure
@@ -3012,24 +3023,29 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
 }
 /*
- * active_load_balance is run by migration threads. It pushes running tasks
+ * active_load_balance_cpu_stop is run by cpu stopper. It pushes
- * off the busiest CPU onto idle CPUs. It requires at least 1 task to be
+ * running tasks off the busiest CPU onto idle CPUs. It requires at
- * running on each physical CPU where possible, and avoids physical /
+ * least 1 task to be running on each physical CPU where possible, and
- * logical imbalances.
+ * avoids physical / logical imbalances.
- *
- * Called with busiest_rq locked.
 */
-static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
+static int active_load_balance_cpu_stop(void *data)
 {
+        struct rq *busiest_rq = data;
+        int busiest_cpu = cpu_of(busiest_rq);
        int target_cpu = busiest_rq->push_cpu;
+        struct rq *target_rq = cpu_rq(target_cpu);
        struct sched_domain *sd;
-        struct rq *target_rq;
+        raw_spin_lock_irq(&busiest_rq->lock);
+        /* make sure the requested cpu hasn't gone down in the meantime */
+        if (unlikely(busiest_cpu != smp_processor_id() ||
+                     !busiest_rq->active_balance))
+                goto out_unlock;
        /* Is there any task to move? */
        if (busiest_rq->nr_running <= 1)
-                return;
+                goto out_unlock;
-        target_rq = cpu_rq(target_cpu);
        /*
         * This condition is "impossible", if it occurs
@@ -3058,6 +3074,10 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
                        schedstat_inc(sd, alb_failed);
        }
        double_unlock_balance(busiest_rq, target_rq);
+out_unlock:
+        busiest_rq->active_balance = 0;
+        raw_spin_unlock_irq(&busiest_rq->lock);
+        return 0;
 }
 #ifdef CONFIG_NO_HZ
author	Tejun Heo <tj@kernel.org>	2010-05-06 12:49:21 -0400
committer	Tejun Heo <tj@kernel.org>	2010-05-06 12:49:21 -0400
commit	969c79215a35b06e5e3efe69b9412f858df7856c (patch)
tree	4256378687c8bc2011ec35a3e28bc3b6473e912c /kernel/sched_fair.c
parent	3fc1f1e27a5b807791d72e5d992aa33b668a6626 (diff)

diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index cbd8b8a296d1..217e4a9393e4 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c
@@ -2798,6 +2798,8 @@ static int need_active_balance(struct sched_domain *sd, int sd_idle, int idle)
2798	return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);	2798	return unlikely(sd->nr_balance_failed > sd->cache_nice_tries+2);
2799	}	2799	}
2800		2800
		2801	static int active_load_balance_cpu_stop(void *data);
		2802
2801	/*	2803	/*
2802	* Check this_cpu to ensure it is balanced within domain. Attempt to move	2804	* Check this_cpu to ensure it is balanced within domain. Attempt to move
2803	* tasks if there is an imbalance.	2805	* tasks if there is an imbalance.
@@ -2887,8 +2889,9 @@ redo:
2887	if (need_active_balance(sd, sd_idle, idle)) {	2889	if (need_active_balance(sd, sd_idle, idle)) {
2888	raw_spin_lock_irqsave(&busiest->lock, flags);	2890	raw_spin_lock_irqsave(&busiest->lock, flags);
2889		2891
2890	/* don't kick the migration_thread, if the curr	2892	/* don't kick the active_load_balance_cpu_stop,
2891	* task on busiest cpu can't be moved to this_cpu	2893	* if the curr task on busiest cpu can't be
		2894	* moved to this_cpu
2892	*/	2895	*/
2893	if (!cpumask_test_cpu(this_cpu,	2896	if (!cpumask_test_cpu(this_cpu,
2894	&busiest->curr->cpus_allowed)) {	2897	&busiest->curr->cpus_allowed)) {
@@ -2898,14 +2901,22 @@ redo:
2898	goto out_one_pinned;	2901	goto out_one_pinned;
2899	}	2902	}
2900		2903
		2904	/*
		2905	* ->active_balance synchronizes accesses to
		2906	* ->active_balance_work. Once set, it's cleared
		2907	* only after active load balance is finished.
		2908	*/
2901	if (!busiest->active_balance) {	2909	if (!busiest->active_balance) {
2902	busiest->active_balance = 1;	2910	busiest->active_balance = 1;
2903	busiest->push_cpu = this_cpu;	2911	busiest->push_cpu = this_cpu;
2904	active_balance = 1;	2912	active_balance = 1;
2905	}	2913	}
2906	raw_spin_unlock_irqrestore(&busiest->lock, flags);	2914	raw_spin_unlock_irqrestore(&busiest->lock, flags);
		2915
2907	if (active_balance)	2916	if (active_balance)
2908	wake_up_process(busiest->migration_thread);	2917	stop_one_cpu_nowait(cpu_of(busiest),
		2918	active_load_balance_cpu_stop, busiest,
		2919	&busiest->active_balance_work);
2909		2920
2910	/*	2921	/*
2911	* We've kicked active balancing, reset the failure	2922	* We've kicked active balancing, reset the failure
@@ -3012,24 +3023,29 @@ static void idle_balance(int this_cpu, struct rq *this_rq)
3012	}	3023	}
3013		3024
3014	/*	3025	/*
3015	* active_load_balance is run by migration threads. It pushes running tasks	3026	* active_load_balance_cpu_stop is run by cpu stopper. It pushes
3016	* off the busiest CPU onto idle CPUs. It requires at least 1 task to be	3027	* running tasks off the busiest CPU onto idle CPUs. It requires at
3017	* running on each physical CPU where possible, and avoids physical /	3028	* least 1 task to be running on each physical CPU where possible, and
3018	* logical imbalances.	3029	* avoids physical / logical imbalances.
3019	*
3020	* Called with busiest_rq locked.
3021	*/	3030	*/
3022	static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)	3031	static int active_load_balance_cpu_stop(void *data)
3023	{	3032	{
		3033	struct rq *busiest_rq = data;
		3034	int busiest_cpu = cpu_of(busiest_rq);
3024	int target_cpu = busiest_rq->push_cpu;	3035	int target_cpu = busiest_rq->push_cpu;
		3036	struct rq *target_rq = cpu_rq(target_cpu);
3025	struct sched_domain *sd;	3037	struct sched_domain *sd;
3026	struct rq *target_rq;	3038
		3039	raw_spin_lock_irq(&busiest_rq->lock);
		3040
		3041	/* make sure the requested cpu hasn't gone down in the meantime */
		3042	if (unlikely(busiest_cpu != smp_processor_id() \|\|
		3043	!busiest_rq->active_balance))
		3044	goto out_unlock;
3027		3045
3028	/* Is there any task to move? */	3046	/* Is there any task to move? */
3029	if (busiest_rq->nr_running <= 1)	3047	if (busiest_rq->nr_running <= 1)
3030	return;	3048	goto out_unlock;
3031
3032	target_rq = cpu_rq(target_cpu);
3033		3049
3034	/*	3050	/*
3035	* This condition is "impossible", if it occurs	3051	* This condition is "impossible", if it occurs
@@ -3058,6 +3074,10 @@ static void active_load_balance(struct rq *busiest_rq, int busiest_cpu)
3058	schedstat_inc(sd, alb_failed);	3074	schedstat_inc(sd, alb_failed);
3059	}	3075	}
3060	double_unlock_balance(busiest_rq, target_rq);	3076	double_unlock_balance(busiest_rq, target_rq);
		3077	out_unlock:
		3078	busiest_rq->active_balance = 0;
		3079	raw_spin_unlock_irq(&busiest_rq->lock);
		3080	return 0;
3061	}	3081	}
3062		3082
3063	#ifdef CONFIG_NO_HZ	3083	#ifdef CONFIG_NO_HZ