aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel
diff options
context:
space:
mode:
authorZwane Mwaikambo <zwane@linuxpower.ca>2005-06-25 17:54:50 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-25 19:24:29 -0400
commitf370513640492641b4046bfd9a6e4714f6ae530d (patch)
tree46da47197fcbb3614b51c5f1fac841bf26d5e572 /arch/i386/kernel
parentd92de65cab5980c16d4a1c326c1ef9a591892883 (diff)
[PATCH] i386 CPU hotplug
(The i386 CPU hotplug patch provides infrastructure for some work which Pavel is doing as well as for ACPI S3 (suspend-to-RAM) work which Li Shaohua <shaohua.li@intel.com> is doing) The following provides i386 architecture support for safely unregistering and registering processors during runtime, updated for the current -mm tree. In order to avoid dumping cpu hotplug code into kernel/irq/* i dropped the cpu_online check in do_IRQ() by modifying fixup_irqs(). The difference being that on cpu offline, fixup_irqs() is called before we clear the cpu from cpu_online_map and a long delay in order to ensure that we never have any queued external interrupts on the APICs. There are additional changes to s390 and ppc64 to account for this change. 1) Add CONFIG_HOTPLUG_CPU 2) disable local APIC timer on dead cpus. 3) Disable preempt around irq balancing to prevent CPUs going down. 4) Print irq stats for all possible cpus. 5) Debugging check for interrupts on offline cpus. 6) Hacky fixup_irqs() to redirect irqs when cpus go off/online. 7) play_dead() for offline cpus to spin inside. 8) Handle offline cpus set in flush_tlb_others(). 9) Grab lock earlier in smp_call_function() to prevent CPUs going down. 10) Implement __cpu_disable() and __cpu_die(). 11) Enable local interrupts in cpu_enable() after fixup_irqs() 12) Don't fiddle with NMI on dead cpu, but leave intact on other cpus. 13) Program IRQ affinity whilst cpu is still in cpu_online_map on offline. Signed-off-by: Zwane Mwaikambo <zwane@linuxpower.ca> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r--arch/i386/kernel/apic.c3
-rw-r--r--arch/i386/kernel/io_apic.c2
-rw-r--r--arch/i386/kernel/irq.c67
-rw-r--r--arch/i386/kernel/process.c39
-rw-r--r--arch/i386/kernel/smp.c24
-rw-r--r--arch/i386/kernel/smpboot.c98
-rw-r--r--arch/i386/kernel/traps.c8
7 files changed, 212 insertions, 29 deletions
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 8d993fa71754..a28a088f3e75 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -26,6 +26,7 @@
26#include <linux/mc146818rtc.h> 26#include <linux/mc146818rtc.h>
27#include <linux/kernel_stat.h> 27#include <linux/kernel_stat.h>
28#include <linux/sysdev.h> 28#include <linux/sysdev.h>
29#include <linux/cpu.h>
29 30
30#include <asm/atomic.h> 31#include <asm/atomic.h>
31#include <asm/smp.h> 32#include <asm/smp.h>
@@ -1048,7 +1049,7 @@ void __init setup_secondary_APIC_clock(void)
1048 setup_APIC_timer(calibration_result); 1049 setup_APIC_timer(calibration_result);
1049} 1050}
1050 1051
1051void __init disable_APIC_timer(void) 1052void __devinit disable_APIC_timer(void)
1052{ 1053{
1053 if (using_apic_timer) { 1054 if (using_apic_timer) {
1054 unsigned long v; 1055 unsigned long v;
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 08540bc4ba3e..3c2b3bdfc807 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -576,9 +576,11 @@ static int balanced_irq(void *unused)
576 try_to_freeze(PF_FREEZE); 576 try_to_freeze(PF_FREEZE);
577 if (time_after(jiffies, 577 if (time_after(jiffies,
578 prev_balance_time+balanced_irq_interval)) { 578 prev_balance_time+balanced_irq_interval)) {
579 preempt_disable();
579 do_irq_balance(); 580 do_irq_balance();
580 prev_balance_time = jiffies; 581 prev_balance_time = jiffies;
581 time_remaining = balanced_irq_interval; 582 time_remaining = balanced_irq_interval;
583 preempt_enable();
582 } 584 }
583 } 585 }
584 return 0; 586 return 0;
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c
index 73945a3c53c4..af115004aec5 100644
--- a/arch/i386/kernel/irq.c
+++ b/arch/i386/kernel/irq.c
@@ -15,6 +15,9 @@
15#include <linux/seq_file.h> 15#include <linux/seq_file.h>
16#include <linux/interrupt.h> 16#include <linux/interrupt.h>
17#include <linux/kernel_stat.h> 17#include <linux/kernel_stat.h>
18#include <linux/notifier.h>
19#include <linux/cpu.h>
20#include <linux/delay.h>
18 21
19DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp; 22DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_maxaligned_in_smp;
20EXPORT_PER_CPU_SYMBOL(irq_stat); 23EXPORT_PER_CPU_SYMBOL(irq_stat);
@@ -210,9 +213,8 @@ int show_interrupts(struct seq_file *p, void *v)
210 213
211 if (i == 0) { 214 if (i == 0) {
212 seq_printf(p, " "); 215 seq_printf(p, " ");
213 for (j=0; j<NR_CPUS; j++) 216 for_each_cpu(j)
214 if (cpu_online(j)) 217 seq_printf(p, "CPU%d ",j);
215 seq_printf(p, "CPU%d ",j);
216 seq_putc(p, '\n'); 218 seq_putc(p, '\n');
217 } 219 }
218 220
@@ -225,9 +227,8 @@ int show_interrupts(struct seq_file *p, void *v)
225#ifndef CONFIG_SMP 227#ifndef CONFIG_SMP
226 seq_printf(p, "%10u ", kstat_irqs(i)); 228 seq_printf(p, "%10u ", kstat_irqs(i));
227#else 229#else
228 for (j = 0; j < NR_CPUS; j++) 230 for_each_cpu(j)
229 if (cpu_online(j)) 231 seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
230 seq_printf(p, "%10u ", kstat_cpu(j).irqs[i]);
231#endif 232#endif
232 seq_printf(p, " %14s", irq_desc[i].handler->typename); 233 seq_printf(p, " %14s", irq_desc[i].handler->typename);
233 seq_printf(p, " %s", action->name); 234 seq_printf(p, " %s", action->name);
@@ -240,16 +241,14 @@ skip:
240 spin_unlock_irqrestore(&irq_desc[i].lock, flags); 241 spin_unlock_irqrestore(&irq_desc[i].lock, flags);
241 } else if (i == NR_IRQS) { 242 } else if (i == NR_IRQS) {
242 seq_printf(p, "NMI: "); 243 seq_printf(p, "NMI: ");
243 for (j = 0; j < NR_CPUS; j++) 244 for_each_cpu(j)
244 if (cpu_online(j)) 245 seq_printf(p, "%10u ", nmi_count(j));
245 seq_printf(p, "%10u ", nmi_count(j));
246 seq_putc(p, '\n'); 246 seq_putc(p, '\n');
247#ifdef CONFIG_X86_LOCAL_APIC 247#ifdef CONFIG_X86_LOCAL_APIC
248 seq_printf(p, "LOC: "); 248 seq_printf(p, "LOC: ");
249 for (j = 0; j < NR_CPUS; j++) 249 for_each_cpu(j)
250 if (cpu_online(j)) 250 seq_printf(p, "%10u ",
251 seq_printf(p, "%10u ", 251 per_cpu(irq_stat,j).apic_timer_irqs);
252 per_cpu(irq_stat,j).apic_timer_irqs);
253 seq_putc(p, '\n'); 252 seq_putc(p, '\n');
254#endif 253#endif
255 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count)); 254 seq_printf(p, "ERR: %10u\n", atomic_read(&irq_err_count));
@@ -259,3 +258,45 @@ skip:
259 } 258 }
260 return 0; 259 return 0;
261} 260}
261
262#ifdef CONFIG_HOTPLUG_CPU
263#include <mach_apic.h>
264
265void fixup_irqs(cpumask_t map)
266{
267 unsigned int irq;
268 static int warned;
269
270 for (irq = 0; irq < NR_IRQS; irq++) {
271 cpumask_t mask;
272 if (irq == 2)
273 continue;
274
275 cpus_and(mask, irq_affinity[irq], map);
276 if (any_online_cpu(mask) == NR_CPUS) {
277 printk("Breaking affinity for irq %i\n", irq);
278 mask = map;
279 }
280 if (irq_desc[irq].handler->set_affinity)
281 irq_desc[irq].handler->set_affinity(irq, mask);
282 else if (irq_desc[irq].action && !(warned++))
283 printk("Cannot set affinity for irq %i\n", irq);
284 }
285
286#if 0
287 barrier();
288 /* Ingo Molnar says: "after the IO-APIC masks have been redirected
289 [note the nop - the interrupt-enable boundary on x86 is two
290 instructions from sti] - to flush out pending hardirqs and
291 IPIs. After this point nothing is supposed to reach this CPU." */
292 __asm__ __volatile__("sti; nop; cli");
293 barrier();
294#else
295 /* That doesn't seem sufficient. Give it 1ms. */
296 local_irq_enable();
297 mdelay(1);
298 local_irq_disable();
299#endif
300}
301#endif
302
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index aea2ce1145df..c1b11e8df60b 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -13,6 +13,7 @@
13 13
14#include <stdarg.h> 14#include <stdarg.h>
15 15
16#include <linux/cpu.h>
16#include <linux/errno.h> 17#include <linux/errno.h>
17#include <linux/sched.h> 18#include <linux/sched.h>
18#include <linux/fs.h> 19#include <linux/fs.h>
@@ -55,6 +56,9 @@
55#include <linux/irq.h> 56#include <linux/irq.h>
56#include <linux/err.h> 57#include <linux/err.h>
57 58
59#include <asm/tlbflush.h>
60#include <asm/cpu.h>
61
58asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 62asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
59 63
60static int hlt_counter; 64static int hlt_counter;
@@ -143,14 +147,44 @@ static void poll_idle (void)
143 } 147 }
144} 148}
145 149
150#ifdef CONFIG_HOTPLUG_CPU
151#include <asm/nmi.h>
152/* We don't actually take CPU down, just spin without interrupts. */
153static inline void play_dead(void)
154{
155 /* Ack it */
156 __get_cpu_var(cpu_state) = CPU_DEAD;
157
158 /* We shouldn't have to disable interrupts while dead, but
159 * some interrupts just don't seem to go away, and this makes
160 * it "work" for testing purposes. */
161 /* Death loop */
162 while (__get_cpu_var(cpu_state) != CPU_UP_PREPARE)
163 cpu_relax();
164
165 local_irq_disable();
166 __flush_tlb_all();
167 cpu_set(smp_processor_id(), cpu_online_map);
168 enable_APIC_timer();
169 local_irq_enable();
170}
171#else
172static inline void play_dead(void)
173{
174 BUG();
175}
176#endif /* CONFIG_HOTPLUG_CPU */
177
146/* 178/*
147 * The idle thread. There's no useful work to be 179 * The idle thread. There's no useful work to be
148 * done, so just try to conserve power and have a 180 * done, so just try to conserve power and have a
149 * low exit latency (ie sit in a loop waiting for 181 * low exit latency (ie sit in a loop waiting for
150 * somebody to say that they'd like to reschedule) 182 * somebody to say that they'd like to reschedule)
151 */ 183 */
152void cpu_idle (void) 184void cpu_idle(void)
153{ 185{
186 int cpu = raw_smp_processor_id();
187
154 /* endless idle loop with no priority at all */ 188 /* endless idle loop with no priority at all */
155 while (1) { 189 while (1) {
156 while (!need_resched()) { 190 while (!need_resched()) {
@@ -165,6 +199,9 @@ void cpu_idle (void)
165 if (!idle) 199 if (!idle)
166 idle = default_idle; 200 idle = default_idle;
167 201
202 if (cpu_is_offline(cpu))
203 play_dead();
204
168 __get_cpu_var(irq_stat).idle_timestamp = jiffies; 205 __get_cpu_var(irq_stat).idle_timestamp = jiffies;
169 idle(); 206 idle();
170 } 207 }
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c
index 68be7d0c7238..35f521612b20 100644
--- a/arch/i386/kernel/smp.c
+++ b/arch/i386/kernel/smp.c
@@ -19,6 +19,7 @@
19#include <linux/mc146818rtc.h> 19#include <linux/mc146818rtc.h>
20#include <linux/cache.h> 20#include <linux/cache.h>
21#include <linux/interrupt.h> 21#include <linux/interrupt.h>
22#include <linux/cpu.h>
22#include <linux/module.h> 23#include <linux/module.h>
23 24
24#include <asm/mtrr.h> 25#include <asm/mtrr.h>
@@ -164,7 +165,7 @@ void send_IPI_mask_bitmask(cpumask_t cpumask, int vector)
164 unsigned long flags; 165 unsigned long flags;
165 166
166 local_irq_save(flags); 167 local_irq_save(flags);
167 168 WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]);
168 /* 169 /*
169 * Wait for idle. 170 * Wait for idle.
170 */ 171 */
@@ -346,21 +347,21 @@ out:
346static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, 347static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm,
347 unsigned long va) 348 unsigned long va)
348{ 349{
349 cpumask_t tmp;
350 /* 350 /*
351 * A couple of (to be removed) sanity checks: 351 * A couple of (to be removed) sanity checks:
352 * 352 *
353 * - we do not send IPIs to not-yet booted CPUs.
354 * - current CPU must not be in mask 353 * - current CPU must not be in mask
355 * - mask must exist :) 354 * - mask must exist :)
356 */ 355 */
357 BUG_ON(cpus_empty(cpumask)); 356 BUG_ON(cpus_empty(cpumask));
358
359 cpus_and(tmp, cpumask, cpu_online_map);
360 BUG_ON(!cpus_equal(cpumask, tmp));
361 BUG_ON(cpu_isset(smp_processor_id(), cpumask)); 357 BUG_ON(cpu_isset(smp_processor_id(), cpumask));
362 BUG_ON(!mm); 358 BUG_ON(!mm);
363 359
360 /* If a CPU which we ran on has gone down, OK. */
361 cpus_and(cpumask, cpumask, cpu_online_map);
362 if (cpus_empty(cpumask))
363 return;
364
364 /* 365 /*
365 * i'm not happy about this global shared spinlock in the 366 * i'm not happy about this global shared spinlock in the
366 * MM hot path, but we'll see how contended it is. 367 * MM hot path, but we'll see how contended it is.
@@ -476,6 +477,7 @@ void flush_tlb_all(void)
476 */ 477 */
477void smp_send_reschedule(int cpu) 478void smp_send_reschedule(int cpu)
478{ 479{
480 WARN_ON(cpu_is_offline(cpu));
479 send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); 481 send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
480} 482}
481 483
@@ -516,10 +518,15 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
516 */ 518 */
517{ 519{
518 struct call_data_struct data; 520 struct call_data_struct data;
519 int cpus = num_online_cpus()-1; 521 int cpus;
520 522
521 if (!cpus) 523 /* Holding any lock stops cpus from going down. */
524 spin_lock(&call_lock);
525 cpus = num_online_cpus() - 1;
526 if (!cpus) {
527 spin_unlock(&call_lock);
522 return 0; 528 return 0;
529 }
523 530
524 /* Can deadlock when called with interrupts disabled */ 531 /* Can deadlock when called with interrupts disabled */
525 WARN_ON(irqs_disabled()); 532 WARN_ON(irqs_disabled());
@@ -531,7 +538,6 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic,
531 if (wait) 538 if (wait)
532 atomic_set(&data.finished, 0); 539 atomic_set(&data.finished, 0);
533 540
534 spin_lock(&call_lock);
535 call_data = &data; 541 call_data = &data;
536 mb(); 542 mb();
537 543
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index c20d96d5c15c..ad74a46e9ef0 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -44,6 +44,9 @@
44#include <linux/smp_lock.h> 44#include <linux/smp_lock.h>
45#include <linux/irq.h> 45#include <linux/irq.h>
46#include <linux/bootmem.h> 46#include <linux/bootmem.h>
47#include <linux/notifier.h>
48#include <linux/cpu.h>
49#include <linux/percpu.h>
47 50
48#include <linux/delay.h> 51#include <linux/delay.h>
49#include <linux/mc146818rtc.h> 52#include <linux/mc146818rtc.h>
@@ -96,6 +99,9 @@ static int trampoline_exec;
96 99
97static void map_cpu_to_logical_apicid(void); 100static void map_cpu_to_logical_apicid(void);
98 101
102/* State of each CPU. */
103DEFINE_PER_CPU(int, cpu_state) = { 0 };
104
99/* 105/*
100 * Currently trivial. Write the real->protected mode 106 * Currently trivial. Write the real->protected mode
101 * bootstrap into the page concerned. The caller 107 * bootstrap into the page concerned. The caller
@@ -1119,6 +1125,9 @@ static void __init smp_boot_cpus(unsigned int max_cpus)
1119 who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ 1125 who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */
1120void __init smp_prepare_cpus(unsigned int max_cpus) 1126void __init smp_prepare_cpus(unsigned int max_cpus)
1121{ 1127{
1128 smp_commenced_mask = cpumask_of_cpu(0);
1129 cpu_callin_map = cpumask_of_cpu(0);
1130 mb();
1122 smp_boot_cpus(max_cpus); 1131 smp_boot_cpus(max_cpus);
1123} 1132}
1124 1133
@@ -1128,20 +1137,99 @@ void __devinit smp_prepare_boot_cpu(void)
1128 cpu_set(smp_processor_id(), cpu_callout_map); 1137 cpu_set(smp_processor_id(), cpu_callout_map);
1129} 1138}
1130 1139
1131int __devinit __cpu_up(unsigned int cpu) 1140#ifdef CONFIG_HOTPLUG_CPU
1141
1142/* must be called with the cpucontrol mutex held */
1143static int __devinit cpu_enable(unsigned int cpu)
1132{ 1144{
1133 /* This only works at boot for x86. See "rewrite" above. */ 1145 /* get the target out of its holding state */
1134 if (cpu_isset(cpu, smp_commenced_mask)) { 1146 per_cpu(cpu_state, cpu) = CPU_UP_PREPARE;
1135 local_irq_enable(); 1147 wmb();
1136 return -ENOSYS; 1148
1149 /* wait for the processor to ack it. timeout? */
1150 while (!cpu_online(cpu))
1151 cpu_relax();
1152
1153 fixup_irqs(cpu_online_map);
1154 /* counter the disable in fixup_irqs() */
1155 local_irq_enable();
1156 return 0;
1157}
1158
1159int __cpu_disable(void)
1160{
1161 cpumask_t map = cpu_online_map;
1162 int cpu = smp_processor_id();
1163
1164 /*
1165 * Perhaps use cpufreq to drop frequency, but that could go
1166 * into generic code.
1167 *
1168 * We won't take down the boot processor on i386 due to some
1169 * interrupts only being able to be serviced by the BSP.
1170 * Especially so if we're not using an IOAPIC -zwane
1171 */
1172 if (cpu == 0)
1173 return -EBUSY;
1174
1175 /* We enable the timer again on the exit path of the death loop */
1176 disable_APIC_timer();
1177 /* Allow any queued timer interrupts to get serviced */
1178 local_irq_enable();
1179 mdelay(1);
1180 local_irq_disable();
1181
1182 cpu_clear(cpu, map);
1183 fixup_irqs(map);
1184 /* It's now safe to remove this processor from the online map */
1185 cpu_clear(cpu, cpu_online_map);
1186 return 0;
1187}
1188
1189void __cpu_die(unsigned int cpu)
1190{
1191 /* We don't do anything here: idle task is faking death itself. */
1192 unsigned int i;
1193
1194 for (i = 0; i < 10; i++) {
1195 /* They ack this in play_dead by setting CPU_DEAD */
1196 if (per_cpu(cpu_state, cpu) == CPU_DEAD)
1197 return;
1198 current->state = TASK_UNINTERRUPTIBLE;
1199 schedule_timeout(HZ/10);
1137 } 1200 }
1201 printk(KERN_ERR "CPU %u didn't die...\n", cpu);
1202}
1203#else /* ... !CONFIG_HOTPLUG_CPU */
1204int __cpu_disable(void)
1205{
1206 return -ENOSYS;
1207}
1138 1208
1209void __cpu_die(unsigned int cpu)
1210{
1211 /* We said "no" in __cpu_disable */
1212 BUG();
1213}
1214#endif /* CONFIG_HOTPLUG_CPU */
1215
1216int __devinit __cpu_up(unsigned int cpu)
1217{
1139 /* In case one didn't come up */ 1218 /* In case one didn't come up */
1140 if (!cpu_isset(cpu, cpu_callin_map)) { 1219 if (!cpu_isset(cpu, cpu_callin_map)) {
1220 printk(KERN_DEBUG "skipping cpu%d, didn't come online\n", cpu);
1141 local_irq_enable(); 1221 local_irq_enable();
1142 return -EIO; 1222 return -EIO;
1143 } 1223 }
1144 1224
1225#ifdef CONFIG_HOTPLUG_CPU
1226 /* Already up, and in cpu_quiescent now? */
1227 if (cpu_isset(cpu, smp_commenced_mask)) {
1228 cpu_enable(cpu);
1229 return 0;
1230 }
1231#endif
1232
1145 local_irq_enable(); 1233 local_irq_enable();
1146 /* Unleash the CPU! */ 1234 /* Unleash the CPU! */
1147 cpu_set(cpu, smp_commenced_mask); 1235 cpu_set(cpu, smp_commenced_mask);
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index e4d4e2162c7a..207ea8ba7169 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -625,6 +625,14 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code)
625 nmi_enter(); 625 nmi_enter();
626 626
627 cpu = smp_processor_id(); 627 cpu = smp_processor_id();
628
629#ifdef CONFIG_HOTPLUG_CPU
630 if (!cpu_online(cpu)) {
631 nmi_exit();
632 return;
633 }
634#endif
635
628 ++nmi_count(cpu); 636 ++nmi_count(cpu);
629 637
630 if (!nmi_callback(regs, cpu)) 638 if (!nmi_callback(regs, cpu))