aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorSuresh Siddha <suresh.b.siddha@intel.com>2010-07-30 14:46:42 -0400
committerH. Peter Anvin <hpa@linux.intel.com>2010-07-30 18:59:49 -0400
commit68f202e4e87cfab4439568bf397fcc5c7cf8d729 (patch)
treef8a400b1147ab80db505f1d54819a5e9f294c57e /arch
parent6aa033d7efb85830535bb83cf6713d6025ae6e59 (diff)
x86, mtrr: Use stop machine context to rendezvous all the cpu's
Use the stop machine context rather than IPI's to rendezvous all the cpus for MTRR initialization that happens during cpu bringup or for MTRR modifications during runtime. This avoids deadlock scenario (reported by Prarit) like: cpu A holds a read_lock (tasklist_lock for example) with irqs enabled cpu B waits for the same lock with irqs disabled using write_lock_irq cpu C doing set_mtrr() (during AP bringup for example), which will try to rendezvous all the cpus using IPI's This will result in C and A come to the rendezvous point and waiting for B. B is stuck forever waiting for the lock and thus not reaching the rendezvous point. Using stop cpu (run in the process context of per cpu based keventd) to do this rendezvous, avoids this deadlock scenario. Also make sure all the cpu's are in the rendezvous handler before we proceed with the local_irq_save() on each cpu. This lock step disabling irqs on all the cpus will avoid other deadlock scenarios (for example involving with the blocking smp_call_function's etc). [ This problem is very old. Marking -stable only for 2.6.35 as the stop_one_cpu_nowait() API is present only in 2.6.35. Any older kernel interested in this fix need to do some more work in backporting this patch. ] Reported-by: Prarit Bhargava <prarit@redhat.com> Signed-off-by: Suresh Siddha <suresh.b.siddha@intel.com> LKML-Reference: <1280515602.2682.10.camel@sbsiddha-MOBL3.sc.intel.com> Acked-by: Prarit Bhargava <prarit@redhat.com> Cc: stable@kernel.org [2.6.35] Signed-off-by: H. Peter Anvin <hpa@linux.intel.com>
Diffstat (limited to 'arch')
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c56
-rw-r--r--arch/x86/kernel/smpboot.c7
2 files changed, 50 insertions, 13 deletions
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 79556bd9b602..01c0f3ee6cc3 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -35,6 +35,7 @@
35 35
36#include <linux/types.h> /* FIXME: kvm_para.h needs this */ 36#include <linux/types.h> /* FIXME: kvm_para.h needs this */
37 37
38#include <linux/stop_machine.h>
38#include <linux/kvm_para.h> 39#include <linux/kvm_para.h>
39#include <linux/uaccess.h> 40#include <linux/uaccess.h>
40#include <linux/module.h> 41#include <linux/module.h>
@@ -143,22 +144,28 @@ struct set_mtrr_data {
143 mtrr_type smp_type; 144 mtrr_type smp_type;
144}; 145};
145 146
147static DEFINE_PER_CPU(struct cpu_stop_work, mtrr_work);
148
146/** 149/**
147 * ipi_handler - Synchronisation handler. Executed by "other" CPUs. 150 * mtrr_work_handler - Synchronisation handler. Executed by "other" CPUs.
148 * @info: pointer to mtrr configuration data 151 * @info: pointer to mtrr configuration data
149 * 152 *
150 * Returns nothing. 153 * Returns nothing.
151 */ 154 */
152static void ipi_handler(void *info) 155static int mtrr_work_handler(void *info)
153{ 156{
154#ifdef CONFIG_SMP 157#ifdef CONFIG_SMP
155 struct set_mtrr_data *data = info; 158 struct set_mtrr_data *data = info;
156 unsigned long flags; 159 unsigned long flags;
157 160
161 atomic_dec(&data->count);
162 while (!atomic_read(&data->gate))
163 cpu_relax();
164
158 local_irq_save(flags); 165 local_irq_save(flags);
159 166
160 atomic_dec(&data->count); 167 atomic_dec(&data->count);
161 while (!atomic_read(&data->gate)) 168 while (atomic_read(&data->gate))
162 cpu_relax(); 169 cpu_relax();
163 170
164 /* The master has cleared me to execute */ 171 /* The master has cleared me to execute */
@@ -173,12 +180,13 @@ static void ipi_handler(void *info)
173 } 180 }
174 181
175 atomic_dec(&data->count); 182 atomic_dec(&data->count);
176 while (atomic_read(&data->gate)) 183 while (!atomic_read(&data->gate))
177 cpu_relax(); 184 cpu_relax();
178 185
179 atomic_dec(&data->count); 186 atomic_dec(&data->count);
180 local_irq_restore(flags); 187 local_irq_restore(flags);
181#endif 188#endif
189 return 0;
182} 190}
183 191
184static inline int types_compatible(mtrr_type type1, mtrr_type type2) 192static inline int types_compatible(mtrr_type type1, mtrr_type type2)
@@ -198,7 +206,7 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2)
198 * 206 *
199 * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: 207 * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
200 * 208 *
201 * 1. Send IPI to do the following: 209 * 1. Queue work to do the following on all processors:
202 * 2. Disable Interrupts 210 * 2. Disable Interrupts
203 * 3. Wait for all procs to do so 211 * 3. Wait for all procs to do so
204 * 4. Enter no-fill cache mode 212 * 4. Enter no-fill cache mode
@@ -215,14 +223,17 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2)
215 * 15. Enable interrupts. 223 * 15. Enable interrupts.
216 * 224 *
217 * What does that mean for us? Well, first we set data.count to the number 225 * What does that mean for us? Well, first we set data.count to the number
218 * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait 226 * of CPUs. As each CPU announces that it started the rendezvous handler by
219 * until it hits 0 and proceed. We set the data.gate flag and reset data.count. 227 * decrementing the count, We reset data.count and set the data.gate flag
220 * Meanwhile, they are waiting for that flag to be set. Once it's set, each 228 * allowing all the cpu's to proceed with the work. As each cpu disables
229 * interrupts, it'll decrement data.count once. We wait until it hits 0 and
230 * proceed. We clear the data.gate flag and reset data.count. Meanwhile, they
231 * are waiting for that flag to be cleared. Once it's cleared, each
221 * CPU goes through the transition of updating MTRRs. 232 * CPU goes through the transition of updating MTRRs.
222 * The CPU vendors may each do it differently, 233 * The CPU vendors may each do it differently,
223 * so we call mtrr_if->set() callback and let them take care of it. 234 * so we call mtrr_if->set() callback and let them take care of it.
224 * When they're done, they again decrement data->count and wait for data.gate 235 * When they're done, they again decrement data->count and wait for data.gate
225 * to be reset. 236 * to be set.
226 * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag 237 * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag
227 * Everyone then enables interrupts and we all continue on. 238 * Everyone then enables interrupts and we all continue on.
228 * 239 *
@@ -234,6 +245,9 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ
234{ 245{
235 struct set_mtrr_data data; 246 struct set_mtrr_data data;
236 unsigned long flags; 247 unsigned long flags;
248 int cpu;
249
250 preempt_disable();
237 251
238 data.smp_reg = reg; 252 data.smp_reg = reg;
239 data.smp_base = base; 253 data.smp_base = base;
@@ -246,10 +260,15 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ
246 atomic_set(&data.gate, 0); 260 atomic_set(&data.gate, 0);
247 261
248 /* Start the ball rolling on other CPUs */ 262 /* Start the ball rolling on other CPUs */
249 if (smp_call_function(ipi_handler, &data, 0) != 0) 263 for_each_online_cpu(cpu) {
250 panic("mtrr: timed out waiting for other CPUs\n"); 264 struct cpu_stop_work *work = &per_cpu(mtrr_work, cpu);
265
266 if (cpu == smp_processor_id())
267 continue;
268
269 stop_one_cpu_nowait(cpu, mtrr_work_handler, &data, work);
270 }
251 271
252 local_irq_save(flags);
253 272
254 while (atomic_read(&data.count)) 273 while (atomic_read(&data.count))
255 cpu_relax(); 274 cpu_relax();
@@ -259,6 +278,16 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ
259 smp_wmb(); 278 smp_wmb();
260 atomic_set(&data.gate, 1); 279 atomic_set(&data.gate, 1);
261 280
281 local_irq_save(flags);
282
283 while (atomic_read(&data.count))
284 cpu_relax();
285
286 /* Ok, reset count and toggle gate */
287 atomic_set(&data.count, num_booting_cpus() - 1);
288 smp_wmb();
289 atomic_set(&data.gate, 0);
290
262 /* Do our MTRR business */ 291 /* Do our MTRR business */
263 292
264 /* 293 /*
@@ -279,7 +308,7 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ
279 308
280 atomic_set(&data.count, num_booting_cpus() - 1); 309 atomic_set(&data.count, num_booting_cpus() - 1);
281 smp_wmb(); 310 smp_wmb();
282 atomic_set(&data.gate, 0); 311 atomic_set(&data.gate, 1);
283 312
284 /* 313 /*
285 * Wait here for everyone to have seen the gate change 314 * Wait here for everyone to have seen the gate change
@@ -289,6 +318,7 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ
289 cpu_relax(); 318 cpu_relax();
290 319
291 local_irq_restore(flags); 320 local_irq_restore(flags);
321 preempt_enable();
292} 322}
293 323
294/** 324/**
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index c4f33b2e77d6..11015fd1abbc 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -816,6 +816,13 @@ do_rest:
816 if (cpumask_test_cpu(cpu, cpu_callin_mask)) 816 if (cpumask_test_cpu(cpu, cpu_callin_mask))
817 break; /* It has booted */ 817 break; /* It has booted */
818 udelay(100); 818 udelay(100);
819 /*
820 * Allow other tasks to run while we wait for the
821 * AP to come online. This also gives a chance
822 * for the MTRR work(triggered by the AP coming online)
823 * to be completed in the stop machine context.
824 */
825 schedule();
819 } 826 }
820 827
821 if (cpumask_test_cpu(cpu, cpu_callin_mask)) 828 if (cpumask_test_cpu(cpu, cpu_callin_mask))