aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJay Lan <jlan@sgi.com>2007-04-03 20:53:42 -0400
committerTony Luck <tony.luck@intel.com>2007-05-14 18:55:39 -0400
commit311f594dec9b0c8693ec7df75b82c251b6b0e7c2 (patch)
tree1cd73b7df869c0a8c274e16b818454bed532d052
parentae67e498a54259364f7211e10d9834575b340b21 (diff)
[IA64] kdump on INIT needs multi-nodes sync-up (v.2)
The current implementation of kdump on INIT events would enter kdump processing on DIE_INIT_MONARCH_ENTER and DIE_INIT_SLAVE_ENTER events. Thus, the monarch cpu would go ahead and boot up the kdump On SN shub2 systems, this out-of-sync situation causes some slave cpus on different nodes to enter POD. This patch moves kdump entry points to DIE_INIT_MONARCH_LEAVE and DIE_INIT_SLAVE_LEAVE. It also sets kdump_in_progress variable in the DIE_INIT_MONARCH_PROCESS event to not dump all active stack traces to the console in the case of kdump. I have tested this patch on an SN machine and a HP RX2600. Signed-off-by: Jay Lan <jlan@sgi.com> Acked-by: Zou Nan hai <nanhai.zou@intel.com> Signed-off-by: Tony Luck <tony.luck@intel.com>
-rw-r--r--arch/ia64/kernel/crash.c22
-rw-r--r--arch/ia64/kernel/mca.c4
2 files changed, 19 insertions, 7 deletions
diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c
index e80c82ce6b76..1d64ef478dde 100644
--- a/arch/ia64/kernel/crash.c
+++ b/arch/ia64/kernel/crash.c
@@ -156,24 +156,30 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data)
156 if (!kdump_on_init) 156 if (!kdump_on_init)
157 return NOTIFY_DONE; 157 return NOTIFY_DONE;
158 158
159 if (val != DIE_INIT_MONARCH_ENTER && 159 if (val != DIE_INIT_MONARCH_LEAVE &&
160 val != DIE_INIT_SLAVE_ENTER && 160 val != DIE_INIT_SLAVE_LEAVE &&
161 val != DIE_INIT_MONARCH_PROCESS &&
161 val != DIE_MCA_RENDZVOUS_LEAVE && 162 val != DIE_MCA_RENDZVOUS_LEAVE &&
162 val != DIE_MCA_MONARCH_LEAVE) 163 val != DIE_MCA_MONARCH_LEAVE)
163 return NOTIFY_DONE; 164 return NOTIFY_DONE;
164 165
165 nd = (struct ia64_mca_notify_die *)args->err; 166 nd = (struct ia64_mca_notify_die *)args->err;
166 /* Reason code 1 means machine check rendezvous*/ 167 /* Reason code 1 means machine check rendezvous*/
167 if ((val == DIE_INIT_MONARCH_ENTER || val == DIE_INIT_SLAVE_ENTER) && 168 if ((val == DIE_INIT_MONARCH_LEAVE || val == DIE_INIT_SLAVE_LEAVE
168 nd->sos->rv_rc == 1) 169 || val == DIE_INIT_MONARCH_PROCESS) && nd->sos->rv_rc == 1)
169 return NOTIFY_DONE; 170 return NOTIFY_DONE;
170 171
171 switch (val) { 172 switch (val) {
172 case DIE_INIT_MONARCH_ENTER: 173 case DIE_INIT_MONARCH_PROCESS:
174 atomic_set(&kdump_in_progress, 1);
175 *(nd->monarch_cpu) = -1;
176 break;
177 case DIE_INIT_MONARCH_LEAVE:
173 machine_kdump_on_init(); 178 machine_kdump_on_init();
174 break; 179 break;
175 case DIE_INIT_SLAVE_ENTER: 180 case DIE_INIT_SLAVE_LEAVE:
176 unw_init_running(kdump_cpu_freeze, NULL); 181 if (atomic_read(&kdump_in_progress))
182 unw_init_running(kdump_cpu_freeze, NULL);
177 break; 183 break;
178 case DIE_MCA_RENDZVOUS_LEAVE: 184 case DIE_MCA_RENDZVOUS_LEAVE:
179 if (atomic_read(&kdump_in_progress)) 185 if (atomic_read(&kdump_in_progress))
@@ -215,8 +221,10 @@ static ctl_table sys_table[] = {
215static int 221static int
216machine_crash_setup(void) 222machine_crash_setup(void)
217{ 223{
224 /* be notified before default_monarch_init_process */
218 static struct notifier_block kdump_init_notifier_nb = { 225 static struct notifier_block kdump_init_notifier_nb = {
219 .notifier_call = kdump_init_notifier, 226 .notifier_call = kdump_init_notifier,
227 .priority = 1,
220 }; 228 };
221 int ret; 229 int ret;
222 if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0) 230 if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0)
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c
index 26814de6c29a..93e82e4d99d6 100644
--- a/arch/ia64/kernel/mca.c
+++ b/arch/ia64/kernel/mca.c
@@ -1477,6 +1477,10 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi
1477 struct task_struct *g, *t; 1477 struct task_struct *g, *t;
1478 if (val != DIE_INIT_MONARCH_PROCESS) 1478 if (val != DIE_INIT_MONARCH_PROCESS)
1479 return NOTIFY_DONE; 1479 return NOTIFY_DONE;
1480#ifdef CONFIG_KEXEC
1481 if (atomic_read(&kdump_in_progress))
1482 return NOTIFY_DONE;
1483#endif
1480 1484
1481 /* 1485 /*
1482 * FIXME: mlogbuf will brim over with INIT stack dumps. 1486 * FIXME: mlogbuf will brim over with INIT stack dumps.