aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/s390
diff options
context:
space:
mode:
authorHeiko Carstens <heiko.carstens@de.ibm.com>2005-06-25 17:55:30 -0400
committerLinus Torvalds <torvalds@ppc970.osdl.org>2005-06-25 19:24:37 -0400
commit77fa22450de00d535de2cc8be653983560828000 (patch)
tree61644edb2263c3d0db3ea9e9518c6f76a60039e0 /drivers/s390
parentf901e5d1e06b3326c100c5d0df43656311befb81 (diff)
[PATCH] s390: improved machine check handling
Improved machine check handling. Kernel is now able to receive machine checks while in kernel mode (system call, interrupt and program check handling). Also register validation is now performed. Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
Diffstat (limited to 'drivers/s390')
-rw-r--r--drivers/s390/s390mach.c321
-rw-r--r--drivers/s390/s390mach.h35
2 files changed, 317 insertions, 39 deletions
diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c
index ffa996c8a908..5bb255e02acc 100644
--- a/drivers/s390/s390mach.c
+++ b/drivers/s390/s390mach.c
@@ -31,14 +31,14 @@ extern void css_reiterate_subchannels(void);
31extern struct workqueue_struct *slow_path_wq; 31extern struct workqueue_struct *slow_path_wq;
32extern struct work_struct slow_path_work; 32extern struct work_struct slow_path_work;
33 33
34static void 34static NORET_TYPE void
35s390_handle_damage(char *msg) 35s390_handle_damage(char *msg)
36{ 36{
37 printk(KERN_EMERG "%s\n", msg);
38#ifdef CONFIG_SMP 37#ifdef CONFIG_SMP
39 smp_send_stop(); 38 smp_send_stop();
40#endif 39#endif
41 disabled_wait((unsigned long) __builtin_return_address(0)); 40 disabled_wait((unsigned long) __builtin_return_address(0));
41 for(;;);
42} 42}
43 43
44/* 44/*
@@ -122,40 +122,39 @@ repeat:
122 return 0; 122 return 0;
123} 123}
124 124
125struct mcck_struct {
126 int kill_task;
127 int channel_report;
128 int warning;
129 unsigned long long mcck_code;
130};
131
132static DEFINE_PER_CPU(struct mcck_struct, cpu_mcck);
133
125/* 134/*
126 * machine check handler. 135 * Main machine check handler function. Will be called with interrupts enabled
136 * or disabled and machine checks enabled or disabled.
127 */ 137 */
128void 138void
129s390_do_machine_check(void) 139s390_handle_mcck(void)
130{ 140{
131 struct mci *mci; 141 unsigned long flags;
132 142 struct mcck_struct mcck;
133 mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
134 143
135 if (mci->sd) /* system damage */ 144 /*
136 s390_handle_damage("received system damage machine check\n"); 145 * Disable machine checks and get the current state of accumulated
146 * machine checks. Afterwards delete the old state and enable machine
147 * checks again.
148 */
149 local_irq_save(flags);
150 local_mcck_disable();
151 mcck = __get_cpu_var(cpu_mcck);
152 memset(&__get_cpu_var(cpu_mcck), 0, sizeof(struct mcck_struct));
153 clear_thread_flag(TIF_MCCK_PENDING);
154 local_mcck_enable();
155 local_irq_restore(flags);
137 156
138 if (mci->pd) /* instruction processing damage */ 157 if (mcck.channel_report)
139 s390_handle_damage("received instruction processing "
140 "damage machine check\n");
141
142 if (mci->se) /* storage error uncorrected */
143 s390_handle_damage("received storage error uncorrected "
144 "machine check\n");
145
146 if (mci->sc) /* storage error corrected */
147 printk(KERN_WARNING
148 "received storage error corrected machine check\n");
149
150 if (mci->ke) /* storage key-error uncorrected */
151 s390_handle_damage("received storage key-error uncorrected "
152 "machine check\n");
153
154 if (mci->ds && mci->fa) /* storage degradation */
155 s390_handle_damage("received storage degradation machine "
156 "check\n");
157
158 if (mci->cp) /* channel report word pending */
159 up(&m_sem); 158 up(&m_sem);
160 159
161#ifdef CONFIG_MACHCHK_WARNING 160#ifdef CONFIG_MACHCHK_WARNING
@@ -168,7 +167,7 @@ s390_do_machine_check(void)
168 * On VM we only get one interrupt per virtally presented machinecheck. 167 * On VM we only get one interrupt per virtally presented machinecheck.
169 * Though one suffices, we may get one interrupt per (virtual) processor. 168 * Though one suffices, we may get one interrupt per (virtual) processor.
170 */ 169 */
171 if (mci->w) { /* WARNING pending ? */ 170 if (mcck.warning) { /* WARNING pending ? */
172 static int mchchk_wng_posted = 0; 171 static int mchchk_wng_posted = 0;
173 /* 172 /*
174 * Use single machine clear, as we cannot handle smp right now 173 * Use single machine clear, as we cannot handle smp right now
@@ -178,6 +177,261 @@ s390_do_machine_check(void)
178 kill_proc(1, SIGPWR, 1); 177 kill_proc(1, SIGPWR, 1);
179 } 178 }
180#endif 179#endif
180
181 if (mcck.kill_task) {
182 local_irq_enable();
183 printk(KERN_EMERG "mcck: Terminating task because of machine "
184 "malfunction (code 0x%016llx).\n", mcck.mcck_code);
185 printk(KERN_EMERG "mcck: task: %s, pid: %d.\n",
186 current->comm, current->pid);
187 do_exit(SIGSEGV);
188 }
189}
190
191/*
192 * returns 0 if all registers could be validated
193 * returns 1 otherwise
194 */
195static int
196s390_revalidate_registers(struct mci *mci)
197{
198 int kill_task;
199 u64 tmpclock;
200 u64 zero;
201 void *fpt_save_area, *fpt_creg_save_area;
202
203 kill_task = 0;
204 zero = 0;
205 /* General purpose registers */
206 if (!mci->gr)
207 /*
208 * General purpose registers couldn't be restored and have
209 * unknown contents. Process needs to be terminated.
210 */
211 kill_task = 1;
212
213 /* Revalidate floating point registers */
214 if (!mci->fp)
215 /*
216 * Floating point registers can't be restored and
217 * therefore the process needs to be terminated.
218 */
219 kill_task = 1;
220
221#ifndef __s390x__
222 asm volatile("ld 0,0(%0)\n"
223 "ld 2,8(%0)\n"
224 "ld 4,16(%0)\n"
225 "ld 6,24(%0)"
226 : : "a" (&S390_lowcore.floating_pt_save_area));
227#endif
228
229 if (MACHINE_HAS_IEEE) {
230#ifdef __s390x__
231 fpt_save_area = &S390_lowcore.floating_pt_save_area;
232 fpt_creg_save_area = &S390_lowcore.fpt_creg_save_area;
233#else
234 fpt_save_area = (void *) S390_lowcore.extended_save_area_addr;
235 fpt_creg_save_area = fpt_save_area+128;
236#endif
237 /* Floating point control register */
238 if (!mci->fc) {
239 /*
240 * Floating point control register can't be restored.
241 * Task will be terminated.
242 */
243 asm volatile ("lfpc 0(%0)" : : "a" (&zero));
244 kill_task = 1;
245
246 }
247 else
248 asm volatile (
249 "lfpc 0(%0)"
250 : : "a" (fpt_creg_save_area));
251
252 asm volatile("ld 0,0(%0)\n"
253 "ld 1,8(%0)\n"
254 "ld 2,16(%0)\n"
255 "ld 3,24(%0)\n"
256 "ld 4,32(%0)\n"
257 "ld 5,40(%0)\n"
258 "ld 6,48(%0)\n"
259 "ld 7,56(%0)\n"
260 "ld 8,64(%0)\n"
261 "ld 9,72(%0)\n"
262 "ld 10,80(%0)\n"
263 "ld 11,88(%0)\n"
264 "ld 12,96(%0)\n"
265 "ld 13,104(%0)\n"
266 "ld 14,112(%0)\n"
267 "ld 15,120(%0)\n"
268 : : "a" (fpt_save_area));
269 }
270
271 /* Revalidate access registers */
272 asm volatile("lam 0,15,0(%0)"
273 : : "a" (&S390_lowcore.access_regs_save_area));
274 if (!mci->ar)
275 /*
276 * Access registers have unknown contents.
277 * Terminating task.
278 */
279 kill_task = 1;
280
281 /* Revalidate control registers */
282 if (!mci->cr)
283 /*
284 * Control registers have unknown contents.
285 * Can't recover and therefore stopping machine.
286 */
287 s390_handle_damage("invalid control registers.");
288 else
289#ifdef __s390x__
290 asm volatile("lctlg 0,15,0(%0)"
291 : : "a" (&S390_lowcore.cregs_save_area));
292#else
293 asm volatile("lctl 0,15,0(%0)"
294 : : "a" (&S390_lowcore.cregs_save_area));
295#endif
296
297 /*
298 * We don't even try to revalidate the TOD register, since we simply
299 * can't write something sensible into that register.
300 */
301
302#ifdef __s390x__
303 /*
304 * See if we can revalidate the TOD programmable register with its
305 * old contents (should be zero) otherwise set it to zero.
306 */
307 if (!mci->pr)
308 asm volatile("sr 0,0\n"
309 "sckpf"
310 : : : "0", "cc");
311 else
312 asm volatile(
313 "l 0,0(%0)\n"
314 "sckpf"
315 : : "a" (&S390_lowcore.tod_progreg_save_area) : "0", "cc");
316#endif
317
318 /* Revalidate clock comparator register */
319 asm volatile ("stck 0(%1)\n"
320 "sckc 0(%1)"
321 : "=m" (tmpclock) : "a" (&(tmpclock)) : "cc", "memory");
322
323 /* Check if old PSW is valid */
324 if (!mci->wp)
325 /*
326 * Can't tell if we come from user or kernel mode
327 * -> stopping machine.
328 */
329 s390_handle_damage("old psw invalid.");
330
331 if (!mci->ms || !mci->pm || !mci->ia)
332 kill_task = 1;
333
334 return kill_task;
335}
336
337/*
338 * machine check handler.
339 */
340void
341s390_do_machine_check(struct pt_regs *regs)
342{
343 struct mci *mci;
344 struct mcck_struct *mcck;
345 int umode;
346
347 mci = (struct mci *) &S390_lowcore.mcck_interruption_code;
348 mcck = &__get_cpu_var(cpu_mcck);
349 umode = user_mode(regs);
350
351 if (mci->sd)
352 /* System damage -> stopping machine */
353 s390_handle_damage("received system damage machine check.");
354
355 if (mci->pd) {
356 if (mci->b) {
357 /* Processing backup -> verify if we can survive this */
358 u64 z_mcic, o_mcic, t_mcic;
359#ifdef __s390x__
360 z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<29);
361 o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
362 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
363 1ULL<<30 | 1ULL<<21 | 1ULL<<20 | 1ULL<<17 |
364 1ULL<<16);
365#else
366 z_mcic = (1ULL<<63 | 1ULL<<59 | 1ULL<<57 | 1ULL<<50 |
367 1ULL<<29);
368 o_mcic = (1ULL<<43 | 1ULL<<42 | 1ULL<<41 | 1ULL<<40 |
369 1ULL<<36 | 1ULL<<35 | 1ULL<<34 | 1ULL<<32 |
370 1ULL<<30 | 1ULL<<20 | 1ULL<<17 | 1ULL<<16);
371#endif
372 t_mcic = *(u64 *)mci;
373
374 if (((t_mcic & z_mcic) != 0) ||
375 ((t_mcic & o_mcic) != o_mcic)) {
376 s390_handle_damage("processing backup machine "
377 "check with damage.");
378 }
379 if (!umode)
380 s390_handle_damage("processing backup machine "
381 "check in kernel mode.");
382 mcck->kill_task = 1;
383 mcck->mcck_code = *(unsigned long long *) mci;
384 }
385 else {
386 /* Processing damage -> stopping machine */
387 s390_handle_damage("received instruction processing "
388 "damage machine check.");
389 }
390 }
391 if (s390_revalidate_registers(mci)) {
392 if (umode) {
393 /*
394 * Couldn't restore all register contents while in
395 * user mode -> mark task for termination.
396 */
397 mcck->kill_task = 1;
398 mcck->mcck_code = *(unsigned long long *) mci;
399 set_thread_flag(TIF_MCCK_PENDING);
400 }
401 else
402 /*
403 * Couldn't restore all register contents while in
404 * kernel mode -> stopping machine.
405 */
406 s390_handle_damage("unable to revalidate registers.");
407 }
408
409 if (mci->se)
410 /* Storage error uncorrected */
411 s390_handle_damage("received storage error uncorrected "
412 "machine check.");
413
414 if (mci->ke)
415 /* Storage key-error uncorrected */
416 s390_handle_damage("received storage key-error uncorrected "
417 "machine check.");
418
419 if (mci->ds && mci->fa)
420 /* Storage degradation */
421 s390_handle_damage("received storage degradation machine "
422 "check.");
423
424 if (mci->cp) {
425 /* Channel report word pending */
426 mcck->channel_report = 1;
427 set_thread_flag(TIF_MCCK_PENDING);
428 }
429
430 if (mci->w) {
431 /* Warning pending */
432 mcck->warning = 1;
433 set_thread_flag(TIF_MCCK_PENDING);
434 }
181} 435}
182 436
183/* 437/*
@@ -189,9 +443,8 @@ static int
189machine_check_init(void) 443machine_check_init(void)
190{ 444{
191 init_MUTEX_LOCKED(&m_sem); 445 init_MUTEX_LOCKED(&m_sem);
192 ctl_clear_bit(14, 25); /* disable damage MCH */ 446 ctl_clear_bit(14, 25); /* disable external damage MCH */
193 ctl_set_bit(14, 26); /* enable degradation MCH */ 447 ctl_set_bit(14, 27); /* enable system recovery MCH */
194 ctl_set_bit(14, 27); /* enable system recovery MCH */
195#ifdef CONFIG_MACHCHK_WARNING 448#ifdef CONFIG_MACHCHK_WARNING
196 ctl_set_bit(14, 24); /* enable warning MCH */ 449 ctl_set_bit(14, 24); /* enable warning MCH */
197#endif 450#endif
diff --git a/drivers/s390/s390mach.h b/drivers/s390/s390mach.h
index 7e26f0f1b0dc..4eaa70179182 100644
--- a/drivers/s390/s390mach.h
+++ b/drivers/s390/s390mach.h
@@ -16,20 +16,45 @@ struct mci {
16 __u32 sd : 1; /* 00 system damage */ 16 __u32 sd : 1; /* 00 system damage */
17 __u32 pd : 1; /* 01 instruction-processing damage */ 17 __u32 pd : 1; /* 01 instruction-processing damage */
18 __u32 sr : 1; /* 02 system recovery */ 18 __u32 sr : 1; /* 02 system recovery */
19 __u32 to_be_defined_1 : 4; /* 03-06 */ 19 __u32 to_be_defined_1 : 1; /* 03 */
20 __u32 cd : 1; /* 04 timing-facility damage */
21 __u32 ed : 1; /* 05 external damage */
22 __u32 to_be_defined_2 : 1; /* 06 */
20 __u32 dg : 1; /* 07 degradation */ 23 __u32 dg : 1; /* 07 degradation */
21 __u32 w : 1; /* 08 warning pending */ 24 __u32 w : 1; /* 08 warning pending */
22 __u32 cp : 1; /* 09 channel-report pending */ 25 __u32 cp : 1; /* 09 channel-report pending */
23 __u32 to_be_defined_2 : 6; /* 10-15 */ 26 __u32 sp : 1; /* 10 service-processor damage */
27 __u32 ck : 1; /* 11 channel-subsystem damage */
28 __u32 to_be_defined_3 : 2; /* 12-13 */
29 __u32 b : 1; /* 14 backed up */
30 __u32 to_be_defined_4 : 1; /* 15 */
24 __u32 se : 1; /* 16 storage error uncorrected */ 31 __u32 se : 1; /* 16 storage error uncorrected */
25 __u32 sc : 1; /* 17 storage error corrected */ 32 __u32 sc : 1; /* 17 storage error corrected */
26 __u32 ke : 1; /* 18 storage-key error uncorrected */ 33 __u32 ke : 1; /* 18 storage-key error uncorrected */
27 __u32 ds : 1; /* 19 storage degradation */ 34 __u32 ds : 1; /* 19 storage degradation */
28 __u32 to_be_defined_3 : 4; /* 20-23 */ 35 __u32 wp : 1; /* 20 psw mwp validity */
36 __u32 ms : 1; /* 21 psw mask and key validity */
37 __u32 pm : 1; /* 22 psw program mask and cc validity */
38 __u32 ia : 1; /* 23 psw instruction address validity */
29 __u32 fa : 1; /* 24 failing storage address validity */ 39 __u32 fa : 1; /* 24 failing storage address validity */
30 __u32 to_be_defined_4 : 7; /* 25-31 */ 40 __u32 to_be_defined_5 : 1; /* 25 */
41 __u32 ec : 1; /* 26 external damage code validity */
42 __u32 fp : 1; /* 27 floating point register validity */
43 __u32 gr : 1; /* 28 general register validity */
44 __u32 cr : 1; /* 29 control register validity */
45 __u32 to_be_defined_6 : 1; /* 30 */
46 __u32 st : 1; /* 31 storage logical validity */
31 __u32 ie : 1; /* 32 indirect storage error */ 47 __u32 ie : 1; /* 32 indirect storage error */
32 __u32 to_be_defined_5 : 31; /* 33-63 */ 48 __u32 ar : 1; /* 33 access register validity */
49 __u32 da : 1; /* 34 delayed access exception */
50 __u32 to_be_defined_7 : 7; /* 35-41 */
51 __u32 pr : 1; /* 42 tod programmable register validity */
52 __u32 fc : 1; /* 43 fp control register validity */
53 __u32 ap : 1; /* 44 ancillary report */
54 __u32 to_be_defined_8 : 1; /* 45 */
55 __u32 ct : 1; /* 46 cpu timer validity */
56 __u32 cc : 1; /* 47 clock comparator validity */
57 __u32 to_be_defined_9 : 16; /* 47-63 */
33}; 58};
34 59
35/* 60/*