aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSuresh Siddha <suresh.b.siddha@intel.com>2011-06-23 14:19:26 -0400
committerH. Peter Anvin <hpa@linux.intel.com>2011-06-27 17:00:46 -0400
commit6d3321e8e2b3bf6a5892e2ef673c7bf536e3f904 (patch)
tree4c911f90934bd755ac12f11cc2dc9070e1aead80
parent4699d4423c07a1db35ea9453eac3a07e818338f9 (diff)
x86, mtrr: lock stop machine during MTRR rendezvous sequence
MTRR rendezvous sequence using stop_one_cpu_nowait() can potentially happen in parallel with another system wide rendezvous using stop_machine(). This can lead to deadlock (The order in which works are queued can be different on different cpu's. Some cpu's will be running the first rendezvous handler and others will be running the second rendezvous handler. Each set waiting for the other set to join for the system wide rendezvous, leading to a deadlock). MTRR rendezvous sequence is not implemented using stop_machine() as this gets called both from the process context aswell as the cpu online paths (where the cpu has not come online and the interrupts are disabled etc). stop_machine() works with only online cpus. For now, take the stop_machine mutex in the MTRR rendezvous sequence that gets called from an online cpu (here we are in the process context and can potentially sleep while taking the mutex). And the MTRR rendezvous that gets triggered during cpu online doesn't need to take this stop_machine lock (as the stop_machine() already ensures that there is no cpu hotplug going on in parallel by doing get_online_cpus()) TBD: Pursue a cleaner solution of extending the stop_machine() infrastructure to handle the case where the calling cpu is still not online and use this for MTRR rendezvous sequence. fixes: https://bugzilla.novell.com/show_bug.cgi?id=672008 Reported-by: Vadim Kotelnikov <vadimuzzz@inbox.ru> Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> Link: http://lkml.kernel.org/r/20110623182056.807230326@sbsiddha-MOBL3.sc.intel.com Cc: stable@kernel.org # 2.6.35+, backport a week or two after this gets more testing in mainline Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c23
-rw-r--r--include/linux/stop_machine.h2
-rw-r--r--kernel/stop_machine.c2
3 files changed, 26 insertions, 1 deletions
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 929739a653d1..3d17bc7f06e6 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -248,6 +248,25 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ
248 unsigned long flags; 248 unsigned long flags;
249 int cpu; 249 int cpu;
250 250
251#ifdef CONFIG_SMP
252 /*
253 * If this cpu is not yet active, we are in the cpu online path. There
254 * can be no stop_machine() in parallel, as stop machine ensures this
255 * by using get_online_cpus(). We can skip taking the stop_cpus_mutex,
256 * as we don't need it and also we can't afford to block while waiting
257 * for the mutex.
258 *
259 * If this cpu is active, we need to prevent stop_machine() happening
260 * in parallel by taking the stop cpus mutex.
261 *
262 * Also, this is called in the context of cpu online path or in the
263 * context where cpu hotplug is prevented. So checking the active status
264 * of the raw_smp_processor_id() is safe.
265 */
266 if (cpu_active(raw_smp_processor_id()))
267 mutex_lock(&stop_cpus_mutex);
268#endif
269
251 preempt_disable(); 270 preempt_disable();
252 271
253 data.smp_reg = reg; 272 data.smp_reg = reg;
@@ -330,6 +349,10 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ
330 349
331 local_irq_restore(flags); 350 local_irq_restore(flags);
332 preempt_enable(); 351 preempt_enable();
352#ifdef CONFIG_SMP
353 if (cpu_active(raw_smp_processor_id()))
354 mutex_unlock(&stop_cpus_mutex);
355#endif
333} 356}
334 357
335/** 358/**
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 092dc9b1ce7d..14d3524d1274 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -27,6 +27,8 @@ struct cpu_stop_work {
27 struct cpu_stop_done *done; 27 struct cpu_stop_done *done;
28}; 28};
29 29
30extern struct mutex stop_cpus_mutex;
31
30int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg); 32int stop_one_cpu(unsigned int cpu, cpu_stop_fn_t fn, void *arg);
31void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, 33void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
32 struct cpu_stop_work *work_buf); 34 struct cpu_stop_work *work_buf);
diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c
index e3516b29076c..0cae1cc323dc 100644
--- a/kernel/stop_machine.c
+++ b/kernel/stop_machine.c
@@ -132,8 +132,8 @@ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg,
132 cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), work_buf); 132 cpu_stop_queue_work(&per_cpu(cpu_stopper, cpu), work_buf);
133} 133}
134 134
135DEFINE_MUTEX(stop_cpus_mutex);
135/* static data for stop_cpus */ 136/* static data for stop_cpus */
136static DEFINE_MUTEX(stop_cpus_mutex);
137static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work); 137static DEFINE_PER_CPU(struct cpu_stop_work, stop_cpus_work);
138 138
139int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg) 139int __stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg)