aboutsummaryrefslogtreecommitdiffstats
path: root/arch/ia64/kernel/mca.c
diff options
context:
space:
mode:
authorRuss Anderson <rja@sgi.com>2007-05-18 18:17:17 -0400
committerTony Luck <tony.luck@intel.com>2007-07-11 14:50:11 -0400
commit1612b18ccb2318563ba51268289dc3271a6052f7 (patch)
tree65e809216e33ddd35aad274eabb159b8c56edf1c /arch/ia64/kernel/mca.c
parent256a7e097ba3d1179867b4c9aba1b75fb32d44f2 (diff)
[IA64] Support multiple CPUs going through OS_MCA
Linux does not gracefully deal with multiple processors going through OS_MCA aa part of the same MCA event. The first cpu into OS_MCA grabs the ia64_mca_serialize lock. Subsequent cpus wait for that lock, preventing them from reporting in as rendezvoused. The first cpu waits 5 seconds then complains that all the cpus have not rendezvoused. The first cpu then handles its MCA and frees up all the rendezvoused cpus and releases the ia64_mca_serialize lock. One of the subsequent cpus going thought OS_MCA then gets the ia64_mca_serialize lock, waits another 5 seconds and then complains that none of the other cpus have rendezvoused. This patch allows multiple CPUs to gracefully go through OS_MCA. The first CPU into ia64_mca_handler() grabs a mca_count lock. Subsequent CPUs into ia64_mca_handler() are added to a list of cpus that need to go through OS_MCA (a bit set in mca_cpu), and report in as rendezvoused, and but spin waiting their turn. The first CPU sees everyone rendezvous, handles his MCA, wakes up one of the other CPUs waiting to process their MCA (by clearing one mca_cpu bit), and then waits for the other cpus to complete their MCA handling. The next CPU handles his MCA and the process repeats until all the CPUs have handled their MCA. When the last CPU has handled it's MCA, it sets monarch_cpu to -1, releasing all the CPUs. In testing this works more reliably and faster. Thanks to Keith Owens for suggesting numerous improvements to this code. Signed-off-by: Russ Anderson <rja@sgi.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
Diffstat (limited to 'arch/ia64/kernel/mca.c')
-rw-r--r--arch/ia64/kernel/mca.c60
1 files changed, 53 insertions, 7 deletions
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 1ead5ea6c5ce..4b5daa3cc0fe 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -57,6 +57,9 @@
57 * 57 *
58 * 2006-09-15 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> 58 * 2006-09-15 Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
59 * Add printing support for MCA/INIT. 59 * Add printing support for MCA/INIT.
60 *
61 * 2007-04-27 Russ Anderson <rja@sgi.com>
62 * Support multiple cpus going through OS_MCA in the same event.
60 */ 63 */
61#include <linux/types.h> 64#include <linux/types.h>
62#include <linux/init.h> 65#include <linux/init.h>
@@ -96,7 +99,6 @@
96#endif 99#endif
97 100
98/* Used by mca_asm.S */ 101/* Used by mca_asm.S */
99u32 ia64_mca_serialize;
100DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */ 102DEFINE_PER_CPU(u64, ia64_mca_data); /* == __per_cpu_mca[smp_processor_id()] */
101DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */ 103DEFINE_PER_CPU(u64, ia64_mca_per_cpu_pte); /* PTE to map per-CPU area */
102DEFINE_PER_CPU(u64, ia64_mca_pal_pte); /* PTE to map PAL code */ 104DEFINE_PER_CPU(u64, ia64_mca_pal_pte); /* PTE to map PAL code */
@@ -963,11 +965,12 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
963 goto no_mod; 965 goto no_mod;
964 } 966 }
965 967
968 if (r13 != sos->prev_IA64_KR_CURRENT) {
969 msg = "inconsistent previous current and r13";
970 goto no_mod;
971 }
972
966 if (!mca_recover_range(ms->pmsa_iip)) { 973 if (!mca_recover_range(ms->pmsa_iip)) {
967 if (r13 != sos->prev_IA64_KR_CURRENT) {
968 msg = "inconsistent previous current and r13";
969 goto no_mod;
970 }
971 if ((r12 - r13) >= KERNEL_STACK_SIZE) { 974 if ((r12 - r13) >= KERNEL_STACK_SIZE) {
972 msg = "inconsistent r12 and r13"; 975 msg = "inconsistent r12 and r13";
973 goto no_mod; 976 goto no_mod;
@@ -1187,6 +1190,13 @@ all_in:
1187 * further MCA logging is enabled by clearing logs. 1190 * further MCA logging is enabled by clearing logs.
1188 * Monarch also has the duty of sending wakeup-IPIs to pull the 1191 * Monarch also has the duty of sending wakeup-IPIs to pull the
1189 * slave processors out of rendezvous spinloop. 1192 * slave processors out of rendezvous spinloop.
1193 *
1194 * If multiple processors call into OS_MCA, the first will become
1195 * the monarch. Subsequent cpus will be recorded in the mca_cpu
1196 * bitmask. After the first monarch has processed its MCA, it
1197 * will wake up the next cpu in the mca_cpu bitmask and then go
1198 * into the rendezvous loop. When all processors have serviced
1199 * their MCA, the last monarch frees up the rest of the processors.
1190 */ 1200 */
1191void 1201void
1192ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw, 1202ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
@@ -1196,16 +1206,32 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
1196 struct task_struct *previous_current; 1206 struct task_struct *previous_current;
1197 struct ia64_mca_notify_die nd = 1207 struct ia64_mca_notify_die nd =
1198 { .sos = sos, .monarch_cpu = &monarch_cpu }; 1208 { .sos = sos, .monarch_cpu = &monarch_cpu };
1209 static atomic_t mca_count;
1210 static cpumask_t mca_cpu;
1199 1211
1212 if (atomic_add_return(1, &mca_count) == 1) {
1213 monarch_cpu = cpu;
1214 sos->monarch = 1;
1215 } else {
1216 cpu_set(cpu, mca_cpu);
1217 sos->monarch = 0;
1218 }
1200 mprintk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d " 1219 mprintk(KERN_INFO "Entered OS MCA handler. PSP=%lx cpu=%d "
1201 "monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch); 1220 "monarch=%ld\n", sos->proc_state_param, cpu, sos->monarch);
1202 1221
1203 previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA"); 1222 previous_current = ia64_mca_modify_original_stack(regs, sw, sos, "MCA");
1204 monarch_cpu = cpu; 1223
1205 if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, (long)&nd, 0, 0) 1224 if (notify_die(DIE_MCA_MONARCH_ENTER, "MCA", regs, (long)&nd, 0, 0)
1206 == NOTIFY_STOP) 1225 == NOTIFY_STOP)
1207 ia64_mca_spin(__FUNCTION__); 1226 ia64_mca_spin(__FUNCTION__);
1208 ia64_wait_for_slaves(cpu, "MCA"); 1227 if (sos->monarch) {
1228 ia64_wait_for_slaves(cpu, "MCA");
1229 } else {
1230 ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA;
1231 while (cpu_isset(cpu, mca_cpu))
1232 cpu_relax(); /* spin until monarch wakes us */
1233 ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
1234 }
1209 1235
1210 /* Wakeup all the processors which are spinning in the rendezvous loop. 1236 /* Wakeup all the processors which are spinning in the rendezvous loop.
1211 * They will leave SAL, then spin in the OS with interrupts disabled 1237 * They will leave SAL, then spin in the OS with interrupts disabled
@@ -1244,6 +1270,26 @@ ia64_mca_handler(struct pt_regs *regs, struct switch_stack *sw,
1244 == NOTIFY_STOP) 1270 == NOTIFY_STOP)
1245 ia64_mca_spin(__FUNCTION__); 1271 ia64_mca_spin(__FUNCTION__);
1246 1272
1273
1274 if (atomic_dec_return(&mca_count) > 0) {
1275 int i;
1276
1277 /* wake up the next monarch cpu,
1278 * and put this cpu in the rendez loop.
1279 */
1280 ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_CONCURRENT_MCA;
1281 for_each_online_cpu(i) {
1282 if (cpu_isset(i, mca_cpu)) {
1283 monarch_cpu = i;
1284 cpu_clear(i, mca_cpu); /* wake next cpu */
1285 while (monarch_cpu != -1)
1286 cpu_relax(); /* spin until last cpu leaves */
1287 ia64_mc_info.imi_rendez_checkin[cpu] = IA64_MCA_RENDEZ_CHECKIN_NOTDONE;
1288 set_curr_task(cpu, previous_current);
1289 return;
1290 }
1291 }
1292 }
1247 set_curr_task(cpu, previous_current); 1293 set_curr_task(cpu, previous_current);
1248 monarch_cpu = -1; 1294 monarch_cpu = -1;
1249} 1295}