aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorOleg Nesterov <oleg@redhat.com>2010-03-15 05:10:27 -0400
committerIngo Molnar <mingo@elte.hu>2010-04-02 14:12:03 -0400
commit9084bb8246ea935b98320554229e2f371f7f52fa (patch)
tree8478d18125e3b4a7e0a31d702647dee1830d23ef
parent6a1bdc1b577ebcb65f6603c57f8347309bc4ab13 (diff)
sched: Make select_fallback_rq() cpuset friendly
Introduce cpuset_cpus_allowed_fallback() helper to fix the cpuset problems with select_fallback_rq(). It can be called from any context and can't use any cpuset locks including task_lock(). It is called when the task doesn't have online cpus in ->cpus_allowed but ttwu/etc must be able to find a suitable cpu. I am not proud of this patch. Everything which needs such a fat comment can't be good even if correct. But I'd prefer to not change the locking rules in the code I hardly understand, and in any case I believe this simple change make the code much more correct compared to deadlocks we currently have. Signed-off-by: Oleg Nesterov <oleg@redhat.com> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> LKML-Reference: <20100315091027.GA9155@redhat.com> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--include/linux/cpuset.h7
-rw-r--r--kernel/cpuset.c42
-rw-r--r--kernel/sched.c4
3 files changed, 50 insertions, 3 deletions
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index eeaaee746bee..a73454aec333 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -21,6 +21,7 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */
21extern int cpuset_init(void); 21extern int cpuset_init(void);
22extern void cpuset_init_smp(void); 22extern void cpuset_init_smp(void);
23extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); 23extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
24extern int cpuset_cpus_allowed_fallback(struct task_struct *p);
24extern nodemask_t cpuset_mems_allowed(struct task_struct *p); 25extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
25#define cpuset_current_mems_allowed (current->mems_allowed) 26#define cpuset_current_mems_allowed (current->mems_allowed)
26void cpuset_init_current_mems_allowed(void); 27void cpuset_init_current_mems_allowed(void);
@@ -101,6 +102,12 @@ static inline void cpuset_cpus_allowed(struct task_struct *p,
101 cpumask_copy(mask, cpu_possible_mask); 102 cpumask_copy(mask, cpu_possible_mask);
102} 103}
103 104
105static inline int cpuset_cpus_allowed_fallback(struct task_struct *p)
106{
107 cpumask_copy(&p->cpus_allowed, cpu_possible_mask);
108 return cpumask_any(cpu_active_mask);
109}
110
104static inline nodemask_t cpuset_mems_allowed(struct task_struct *p) 111static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
105{ 112{
106 return node_possible_map; 113 return node_possible_map;
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 9a747f56d58c..9a50c5f6e727 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2188,6 +2188,48 @@ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
2188 mutex_unlock(&callback_mutex); 2188 mutex_unlock(&callback_mutex);
2189} 2189}
2190 2190
2191int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
2192{
2193 const struct cpuset *cs;
2194 int cpu;
2195
2196 rcu_read_lock();
2197 cs = task_cs(tsk);
2198 if (cs)
2199 cpumask_copy(&tsk->cpus_allowed, cs->cpus_allowed);
2200 rcu_read_unlock();
2201
2202 /*
2203 * We own tsk->cpus_allowed, nobody can change it under us.
2204 *
2205 * But we used cs && cs->cpus_allowed lockless and thus can
2206 * race with cgroup_attach_task() or update_cpumask() and get
2207 * the wrong tsk->cpus_allowed. However, both cases imply the
2208 * subsequent cpuset_change_cpumask()->set_cpus_allowed_ptr()
2209 * which takes task_rq_lock().
2210 *
2211 * If we are called after it dropped the lock we must see all
2212 * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary
2213 * set any mask even if it is not right from task_cs() pov,
2214 * the pending set_cpus_allowed_ptr() will fix things.
2215 */
2216
2217 cpu = cpumask_any_and(&tsk->cpus_allowed, cpu_active_mask);
2218 if (cpu >= nr_cpu_ids) {
2219 /*
2220 * Either tsk->cpus_allowed is wrong (see above) or it
2221 * is actually empty. The latter case is only possible
2222 * if we are racing with remove_tasks_in_empty_cpuset().
2223 * Like above we can temporary set any mask and rely on
2224 * set_cpus_allowed_ptr() as synchronization point.
2225 */
2226 cpumask_copy(&tsk->cpus_allowed, cpu_possible_mask);
2227 cpu = cpumask_any(cpu_active_mask);
2228 }
2229
2230 return cpu;
2231}
2232
2191void cpuset_init_current_mems_allowed(void) 2233void cpuset_init_current_mems_allowed(void)
2192{ 2234{
2193 nodes_setall(current->mems_allowed); 2235 nodes_setall(current->mems_allowed);
diff --git a/kernel/sched.c b/kernel/sched.c
index 11119deffa48..9a38c7a24ed7 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -2300,9 +2300,7 @@ static int select_fallback_rq(int cpu, struct task_struct *p)
2300 2300
2301 /* No more Mr. Nice Guy. */ 2301 /* No more Mr. Nice Guy. */
2302 if (unlikely(dest_cpu >= nr_cpu_ids)) { 2302 if (unlikely(dest_cpu >= nr_cpu_ids)) {
2303 cpumask_copy(&p->cpus_allowed, cpu_possible_mask); 2303 dest_cpu = cpuset_cpus_allowed_fallback(p);
2304 dest_cpu = cpumask_any(cpu_active_mask);
2305
2306 /* 2304 /*
2307 * Don't tell them about moving exiting tasks or 2305 * Don't tell them about moving exiting tasks or
2308 * kernel threads (both mm NULL), since they never 2306 * kernel threads (both mm NULL), since they never