diff options
author | Jay Lan <jlan@sgi.com> | 2007-04-03 20:53:42 -0400 |
---|---|---|
committer | Tony Luck <tony.luck@intel.com> | 2007-05-14 18:55:39 -0400 |
commit | 311f594dec9b0c8693ec7df75b82c251b6b0e7c2 (patch) | |
tree | 1cd73b7df869c0a8c274e16b818454bed532d052 | |
parent | ae67e498a54259364f7211e10d9834575b340b21 (diff) |
[IA64] kdump on INIT needs multi-nodes sync-up (v.2)
The current implementation of kdump on INIT events would enter
kdump processing on DIE_INIT_MONARCH_ENTER and DIE_INIT_SLAVE_ENTER
events. Thus, the monarch cpu would go ahead and boot up the kdump
On SN shub2 systems, this out-of-sync situation causes some slave
cpus on different nodes to enter POD.
This patch moves kdump entry points to DIE_INIT_MONARCH_LEAVE and
DIE_INIT_SLAVE_LEAVE. It also sets kdump_in_progress variable in
the DIE_INIT_MONARCH_PROCESS event to not dump all active stack
traces to the console in the case of kdump.
I have tested this patch on an SN machine and a HP RX2600.
Signed-off-by: Jay Lan <jlan@sgi.com>
Acked-by: Zou Nan hai <nanhai.zou@intel.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
-rw-r--r-- | arch/ia64/kernel/crash.c | 22 | ||||
-rw-r--r-- | arch/ia64/kernel/mca.c | 4 |
2 files changed, 19 insertions, 7 deletions
diff --git a/arch/ia64/kernel/crash.c b/arch/ia64/kernel/crash.c index e80c82ce6b76..1d64ef478dde 100644 --- a/arch/ia64/kernel/crash.c +++ b/arch/ia64/kernel/crash.c | |||
@@ -156,24 +156,30 @@ kdump_init_notifier(struct notifier_block *self, unsigned long val, void *data) | |||
156 | if (!kdump_on_init) | 156 | if (!kdump_on_init) |
157 | return NOTIFY_DONE; | 157 | return NOTIFY_DONE; |
158 | 158 | ||
159 | if (val != DIE_INIT_MONARCH_ENTER && | 159 | if (val != DIE_INIT_MONARCH_LEAVE && |
160 | val != DIE_INIT_SLAVE_ENTER && | 160 | val != DIE_INIT_SLAVE_LEAVE && |
161 | val != DIE_INIT_MONARCH_PROCESS && | ||
161 | val != DIE_MCA_RENDZVOUS_LEAVE && | 162 | val != DIE_MCA_RENDZVOUS_LEAVE && |
162 | val != DIE_MCA_MONARCH_LEAVE) | 163 | val != DIE_MCA_MONARCH_LEAVE) |
163 | return NOTIFY_DONE; | 164 | return NOTIFY_DONE; |
164 | 165 | ||
165 | nd = (struct ia64_mca_notify_die *)args->err; | 166 | nd = (struct ia64_mca_notify_die *)args->err; |
166 | /* Reason code 1 means machine check rendezvous*/ | 167 | /* Reason code 1 means machine check rendezvous*/ |
167 | if ((val == DIE_INIT_MONARCH_ENTER || val == DIE_INIT_SLAVE_ENTER) && | 168 | if ((val == DIE_INIT_MONARCH_LEAVE || val == DIE_INIT_SLAVE_LEAVE |
168 | nd->sos->rv_rc == 1) | 169 | || val == DIE_INIT_MONARCH_PROCESS) && nd->sos->rv_rc == 1) |
169 | return NOTIFY_DONE; | 170 | return NOTIFY_DONE; |
170 | 171 | ||
171 | switch (val) { | 172 | switch (val) { |
172 | case DIE_INIT_MONARCH_ENTER: | 173 | case DIE_INIT_MONARCH_PROCESS: |
174 | atomic_set(&kdump_in_progress, 1); | ||
175 | *(nd->monarch_cpu) = -1; | ||
176 | break; | ||
177 | case DIE_INIT_MONARCH_LEAVE: | ||
173 | machine_kdump_on_init(); | 178 | machine_kdump_on_init(); |
174 | break; | 179 | break; |
175 | case DIE_INIT_SLAVE_ENTER: | 180 | case DIE_INIT_SLAVE_LEAVE: |
176 | unw_init_running(kdump_cpu_freeze, NULL); | 181 | if (atomic_read(&kdump_in_progress)) |
182 | unw_init_running(kdump_cpu_freeze, NULL); | ||
177 | break; | 183 | break; |
178 | case DIE_MCA_RENDZVOUS_LEAVE: | 184 | case DIE_MCA_RENDZVOUS_LEAVE: |
179 | if (atomic_read(&kdump_in_progress)) | 185 | if (atomic_read(&kdump_in_progress)) |
@@ -215,8 +221,10 @@ static ctl_table sys_table[] = { | |||
215 | static int | 221 | static int |
216 | machine_crash_setup(void) | 222 | machine_crash_setup(void) |
217 | { | 223 | { |
224 | /* be notified before default_monarch_init_process */ | ||
218 | static struct notifier_block kdump_init_notifier_nb = { | 225 | static struct notifier_block kdump_init_notifier_nb = { |
219 | .notifier_call = kdump_init_notifier, | 226 | .notifier_call = kdump_init_notifier, |
227 | .priority = 1, | ||
220 | }; | 228 | }; |
221 | int ret; | 229 | int ret; |
222 | if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0) | 230 | if((ret = register_die_notifier(&kdump_init_notifier_nb)) != 0) |
diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 26814de6c29a..93e82e4d99d6 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c | |||
@@ -1477,6 +1477,10 @@ default_monarch_init_process(struct notifier_block *self, unsigned long val, voi | |||
1477 | struct task_struct *g, *t; | 1477 | struct task_struct *g, *t; |
1478 | if (val != DIE_INIT_MONARCH_PROCESS) | 1478 | if (val != DIE_INIT_MONARCH_PROCESS) |
1479 | return NOTIFY_DONE; | 1479 | return NOTIFY_DONE; |
1480 | #ifdef CONFIG_KEXEC | ||
1481 | if (atomic_read(&kdump_in_progress)) | ||
1482 | return NOTIFY_DONE; | ||
1483 | #endif | ||
1480 | 1484 | ||
1481 | /* | 1485 | /* |
1482 | * FIXME: mlogbuf will brim over with INIT stack dumps. | 1486 | * FIXME: mlogbuf will brim over with INIT stack dumps. |