diff options
Diffstat (limited to 'arch/ia64/kernel/mca_drv.c')
-rw-r--r-- | arch/ia64/kernel/mca_drv.c | 639 |
1 files changed, 639 insertions, 0 deletions
diff --git a/arch/ia64/kernel/mca_drv.c b/arch/ia64/kernel/mca_drv.c new file mode 100644 index 00000000000..ab478172c34 --- /dev/null +++ b/arch/ia64/kernel/mca_drv.c | |||
@@ -0,0 +1,639 @@ | |||
1 | /* | ||
2 | * File: mca_drv.c | ||
3 | * Purpose: Generic MCA handling layer | ||
4 | * | ||
5 | * Copyright (C) 2004 FUJITSU LIMITED | ||
6 | * Copyright (C) Hidetoshi Seto (seto.hidetoshi@jp.fujitsu.com) | ||
7 | */ | ||
8 | #include <linux/config.h> | ||
9 | #include <linux/types.h> | ||
10 | #include <linux/init.h> | ||
11 | #include <linux/sched.h> | ||
12 | #include <linux/interrupt.h> | ||
13 | #include <linux/irq.h> | ||
14 | #include <linux/kallsyms.h> | ||
15 | #include <linux/smp_lock.h> | ||
16 | #include <linux/bootmem.h> | ||
17 | #include <linux/acpi.h> | ||
18 | #include <linux/timer.h> | ||
19 | #include <linux/module.h> | ||
20 | #include <linux/kernel.h> | ||
21 | #include <linux/smp.h> | ||
22 | #include <linux/workqueue.h> | ||
23 | #include <linux/mm.h> | ||
24 | |||
25 | #include <asm/delay.h> | ||
26 | #include <asm/machvec.h> | ||
27 | #include <asm/page.h> | ||
28 | #include <asm/ptrace.h> | ||
29 | #include <asm/system.h> | ||
30 | #include <asm/sal.h> | ||
31 | #include <asm/mca.h> | ||
32 | |||
33 | #include <asm/irq.h> | ||
34 | #include <asm/hw_irq.h> | ||
35 | |||
36 | #include "mca_drv.h" | ||
37 | |||
38 | /* max size of SAL error record (default) */ | ||
39 | static int sal_rec_max = 10000; | ||
40 | |||
41 | /* from mca.c */ | ||
42 | static ia64_mca_sal_to_os_state_t *sal_to_os_handoff_state; | ||
43 | static ia64_mca_os_to_sal_state_t *os_to_sal_handoff_state; | ||
44 | |||
45 | /* from mca_drv_asm.S */ | ||
46 | extern void *mca_handler_bhhook(void); | ||
47 | |||
48 | static DEFINE_SPINLOCK(mca_bh_lock); | ||
49 | |||
50 | typedef enum { | ||
51 | MCA_IS_LOCAL = 0, | ||
52 | MCA_IS_GLOBAL = 1 | ||
53 | } mca_type_t; | ||
54 | |||
55 | #define MAX_PAGE_ISOLATE 1024 | ||
56 | |||
57 | static struct page *page_isolate[MAX_PAGE_ISOLATE]; | ||
58 | static int num_page_isolate = 0; | ||
59 | |||
60 | typedef enum { | ||
61 | ISOLATE_NG = 0, | ||
62 | ISOLATE_OK = 1 | ||
63 | } isolate_status_t; | ||
64 | |||
65 | /* | ||
66 | * This pool keeps pointers to the section part of SAL error record | ||
67 | */ | ||
68 | static struct { | ||
69 | slidx_list_t *buffer; /* section pointer list pool */ | ||
70 | int cur_idx; /* Current index of section pointer list pool */ | ||
71 | int max_idx; /* Maximum index of section pointer list pool */ | ||
72 | } slidx_pool; | ||
73 | |||
74 | /** | ||
75 | * mca_page_isolate - isolate a poisoned page in order not to use it later | ||
76 | * @paddr: poisoned memory location | ||
77 | * | ||
78 | * Return value: | ||
79 | * ISOLATE_OK / ISOLATE_NG | ||
80 | */ | ||
81 | |||
82 | static isolate_status_t | ||
83 | mca_page_isolate(unsigned long paddr) | ||
84 | { | ||
85 | int i; | ||
86 | struct page *p; | ||
87 | |||
88 | /* whether physical address is valid or not */ | ||
89 | if ( !ia64_phys_addr_valid(paddr) ) | ||
90 | return ISOLATE_NG; | ||
91 | |||
92 | /* convert physical address to physical page number */ | ||
93 | p = pfn_to_page(paddr>>PAGE_SHIFT); | ||
94 | |||
95 | /* check whether a page number have been already registered or not */ | ||
96 | for( i = 0; i < num_page_isolate; i++ ) | ||
97 | if( page_isolate[i] == p ) | ||
98 | return ISOLATE_OK; /* already listed */ | ||
99 | |||
100 | /* limitation check */ | ||
101 | if( num_page_isolate == MAX_PAGE_ISOLATE ) | ||
102 | return ISOLATE_NG; | ||
103 | |||
104 | /* kick pages having attribute 'SLAB' or 'Reserved' */ | ||
105 | if( PageSlab(p) || PageReserved(p) ) | ||
106 | return ISOLATE_NG; | ||
107 | |||
108 | /* add attribute 'Reserved' and register the page */ | ||
109 | SetPageReserved(p); | ||
110 | page_isolate[num_page_isolate++] = p; | ||
111 | |||
112 | return ISOLATE_OK; | ||
113 | } | ||
114 | |||
115 | /** | ||
116 | * mca_hanlder_bh - Kill the process which occurred memory read error | ||
117 | * @paddr: poisoned address received from MCA Handler | ||
118 | */ | ||
119 | |||
120 | void | ||
121 | mca_handler_bh(unsigned long paddr) | ||
122 | { | ||
123 | printk(KERN_DEBUG "OS_MCA: process [pid: %d](%s) encounters MCA.\n", | ||
124 | current->pid, current->comm); | ||
125 | |||
126 | spin_lock(&mca_bh_lock); | ||
127 | if (mca_page_isolate(paddr) == ISOLATE_OK) { | ||
128 | printk(KERN_DEBUG "Page isolation: ( %lx ) success.\n", paddr); | ||
129 | } else { | ||
130 | printk(KERN_DEBUG "Page isolation: ( %lx ) failure.\n", paddr); | ||
131 | } | ||
132 | spin_unlock(&mca_bh_lock); | ||
133 | |||
134 | /* This process is about to be killed itself */ | ||
135 | force_sig(SIGKILL, current); | ||
136 | schedule(); | ||
137 | } | ||
138 | |||
139 | /** | ||
140 | * mca_make_peidx - Make index of processor error section | ||
141 | * @slpi: pointer to record of processor error section | ||
142 | * @peidx: pointer to index of processor error section | ||
143 | */ | ||
144 | |||
145 | static void | ||
146 | mca_make_peidx(sal_log_processor_info_t *slpi, peidx_table_t *peidx) | ||
147 | { | ||
148 | /* | ||
149 | * calculate the start address of | ||
150 | * "struct cpuid_info" and "sal_processor_static_info_t". | ||
151 | */ | ||
152 | u64 total_check_num = slpi->valid.num_cache_check | ||
153 | + slpi->valid.num_tlb_check | ||
154 | + slpi->valid.num_bus_check | ||
155 | + slpi->valid.num_reg_file_check | ||
156 | + slpi->valid.num_ms_check; | ||
157 | u64 head_size = sizeof(sal_log_mod_error_info_t) * total_check_num | ||
158 | + sizeof(sal_log_processor_info_t); | ||
159 | u64 mid_size = slpi->valid.cpuid_info * sizeof(struct sal_cpuid_info); | ||
160 | |||
161 | peidx_head(peidx) = slpi; | ||
162 | peidx_mid(peidx) = (struct sal_cpuid_info *) | ||
163 | (slpi->valid.cpuid_info ? ((char*)slpi + head_size) : NULL); | ||
164 | peidx_bottom(peidx) = (sal_processor_static_info_t *) | ||
165 | (slpi->valid.psi_static_struct ? | ||
166 | ((char*)slpi + head_size + mid_size) : NULL); | ||
167 | } | ||
168 | |||
169 | /** | ||
170 | * mca_make_slidx - Make index of SAL error record | ||
171 | * @buffer: pointer to SAL error record | ||
172 | * @slidx: pointer to index of SAL error record | ||
173 | * | ||
174 | * Return value: | ||
175 | * 1 if record has platform error / 0 if not | ||
176 | */ | ||
177 | #define LOG_INDEX_ADD_SECT_PTR(sect, ptr) \ | ||
178 | { slidx_list_t *hl = &slidx_pool.buffer[slidx_pool.cur_idx]; \ | ||
179 | hl->hdr = ptr; \ | ||
180 | list_add(&hl->list, &(sect)); \ | ||
181 | slidx_pool.cur_idx = (slidx_pool.cur_idx + 1)%slidx_pool.max_idx; } | ||
182 | |||
183 | static int | ||
184 | mca_make_slidx(void *buffer, slidx_table_t *slidx) | ||
185 | { | ||
186 | int platform_err = 0; | ||
187 | int record_len = ((sal_log_record_header_t*)buffer)->len; | ||
188 | u32 ercd_pos; | ||
189 | int sects; | ||
190 | sal_log_section_hdr_t *sp; | ||
191 | |||
192 | /* | ||
193 | * Initialize index referring current record | ||
194 | */ | ||
195 | INIT_LIST_HEAD(&(slidx->proc_err)); | ||
196 | INIT_LIST_HEAD(&(slidx->mem_dev_err)); | ||
197 | INIT_LIST_HEAD(&(slidx->sel_dev_err)); | ||
198 | INIT_LIST_HEAD(&(slidx->pci_bus_err)); | ||
199 | INIT_LIST_HEAD(&(slidx->smbios_dev_err)); | ||
200 | INIT_LIST_HEAD(&(slidx->pci_comp_err)); | ||
201 | INIT_LIST_HEAD(&(slidx->plat_specific_err)); | ||
202 | INIT_LIST_HEAD(&(slidx->host_ctlr_err)); | ||
203 | INIT_LIST_HEAD(&(slidx->plat_bus_err)); | ||
204 | INIT_LIST_HEAD(&(slidx->unsupported)); | ||
205 | |||
206 | /* | ||
207 | * Extract a Record Header | ||
208 | */ | ||
209 | slidx->header = buffer; | ||
210 | |||
211 | /* | ||
212 | * Extract each section records | ||
213 | * (arranged from "int ia64_log_platform_info_print()") | ||
214 | */ | ||
215 | for (ercd_pos = sizeof(sal_log_record_header_t), sects = 0; | ||
216 | ercd_pos < record_len; ercd_pos += sp->len, sects++) { | ||
217 | sp = (sal_log_section_hdr_t *)((char*)buffer + ercd_pos); | ||
218 | if (!efi_guidcmp(sp->guid, SAL_PROC_DEV_ERR_SECT_GUID)) { | ||
219 | LOG_INDEX_ADD_SECT_PTR(slidx->proc_err, sp); | ||
220 | } else if (!efi_guidcmp(sp->guid, SAL_PLAT_MEM_DEV_ERR_SECT_GUID)) { | ||
221 | platform_err = 1; | ||
222 | LOG_INDEX_ADD_SECT_PTR(slidx->mem_dev_err, sp); | ||
223 | } else if (!efi_guidcmp(sp->guid, SAL_PLAT_SEL_DEV_ERR_SECT_GUID)) { | ||
224 | platform_err = 1; | ||
225 | LOG_INDEX_ADD_SECT_PTR(slidx->sel_dev_err, sp); | ||
226 | } else if (!efi_guidcmp(sp->guid, SAL_PLAT_PCI_BUS_ERR_SECT_GUID)) { | ||
227 | platform_err = 1; | ||
228 | LOG_INDEX_ADD_SECT_PTR(slidx->pci_bus_err, sp); | ||
229 | } else if (!efi_guidcmp(sp->guid, SAL_PLAT_SMBIOS_DEV_ERR_SECT_GUID)) { | ||
230 | platform_err = 1; | ||
231 | LOG_INDEX_ADD_SECT_PTR(slidx->smbios_dev_err, sp); | ||
232 | } else if (!efi_guidcmp(sp->guid, SAL_PLAT_PCI_COMP_ERR_SECT_GUID)) { | ||
233 | platform_err = 1; | ||
234 | LOG_INDEX_ADD_SECT_PTR(slidx->pci_comp_err, sp); | ||
235 | } else if (!efi_guidcmp(sp->guid, SAL_PLAT_SPECIFIC_ERR_SECT_GUID)) { | ||
236 | platform_err = 1; | ||
237 | LOG_INDEX_ADD_SECT_PTR(slidx->plat_specific_err, sp); | ||
238 | } else if (!efi_guidcmp(sp->guid, SAL_PLAT_HOST_CTLR_ERR_SECT_GUID)) { | ||
239 | platform_err = 1; | ||
240 | LOG_INDEX_ADD_SECT_PTR(slidx->host_ctlr_err, sp); | ||
241 | } else if (!efi_guidcmp(sp->guid, SAL_PLAT_BUS_ERR_SECT_GUID)) { | ||
242 | platform_err = 1; | ||
243 | LOG_INDEX_ADD_SECT_PTR(slidx->plat_bus_err, sp); | ||
244 | } else { | ||
245 | LOG_INDEX_ADD_SECT_PTR(slidx->unsupported, sp); | ||
246 | } | ||
247 | } | ||
248 | slidx->n_sections = sects; | ||
249 | |||
250 | return platform_err; | ||
251 | } | ||
252 | |||
253 | /** | ||
254 | * init_record_index_pools - Initialize pool of lists for SAL record index | ||
255 | * | ||
256 | * Return value: | ||
257 | * 0 on Success / -ENOMEM on Failure | ||
258 | */ | ||
259 | static int | ||
260 | init_record_index_pools(void) | ||
261 | { | ||
262 | int i; | ||
263 | int rec_max_size; /* Maximum size of SAL error records */ | ||
264 | int sect_min_size; /* Minimum size of SAL error sections */ | ||
265 | /* minimum size table of each section */ | ||
266 | static int sal_log_sect_min_sizes[] = { | ||
267 | sizeof(sal_log_processor_info_t) + sizeof(sal_processor_static_info_t), | ||
268 | sizeof(sal_log_mem_dev_err_info_t), | ||
269 | sizeof(sal_log_sel_dev_err_info_t), | ||
270 | sizeof(sal_log_pci_bus_err_info_t), | ||
271 | sizeof(sal_log_smbios_dev_err_info_t), | ||
272 | sizeof(sal_log_pci_comp_err_info_t), | ||
273 | sizeof(sal_log_plat_specific_err_info_t), | ||
274 | sizeof(sal_log_host_ctlr_err_info_t), | ||
275 | sizeof(sal_log_plat_bus_err_info_t), | ||
276 | }; | ||
277 | |||
278 | /* | ||
279 | * MCA handler cannot allocate new memory on flight, | ||
280 | * so we preallocate enough memory to handle a SAL record. | ||
281 | * | ||
282 | * Initialize a handling set of slidx_pool: | ||
283 | * 1. Pick up the max size of SAL error records | ||
284 | * 2. Pick up the min size of SAL error sections | ||
285 | * 3. Allocate the pool as enough to 2 SAL records | ||
286 | * (now we can estimate the maxinum of section in a record.) | ||
287 | */ | ||
288 | |||
289 | /* - 1 - */ | ||
290 | rec_max_size = sal_rec_max; | ||
291 | |||
292 | /* - 2 - */ | ||
293 | sect_min_size = sal_log_sect_min_sizes[0]; | ||
294 | for (i = 1; i < sizeof sal_log_sect_min_sizes/sizeof(size_t); i++) | ||
295 | if (sect_min_size > sal_log_sect_min_sizes[i]) | ||
296 | sect_min_size = sal_log_sect_min_sizes[i]; | ||
297 | |||
298 | /* - 3 - */ | ||
299 | slidx_pool.max_idx = (rec_max_size/sect_min_size) * 2 + 1; | ||
300 | slidx_pool.buffer = (slidx_list_t *) kmalloc(slidx_pool.max_idx * sizeof(slidx_list_t), GFP_KERNEL); | ||
301 | |||
302 | return slidx_pool.buffer ? 0 : -ENOMEM; | ||
303 | } | ||
304 | |||
305 | |||
306 | /***************************************************************************** | ||
307 | * Recovery functions * | ||
308 | *****************************************************************************/ | ||
309 | |||
310 | /** | ||
311 | * is_mca_global - Check whether this MCA is global or not | ||
312 | * @peidx: pointer of index of processor error section | ||
313 | * @pbci: pointer to pal_bus_check_info_t | ||
314 | * | ||
315 | * Return value: | ||
316 | * MCA_IS_LOCAL / MCA_IS_GLOBAL | ||
317 | */ | ||
318 | |||
319 | static mca_type_t | ||
320 | is_mca_global(peidx_table_t *peidx, pal_bus_check_info_t *pbci) | ||
321 | { | ||
322 | pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx); | ||
323 | |||
324 | /* | ||
325 | * PAL can request a rendezvous, if the MCA has a global scope. | ||
326 | * If "rz_always" flag is set, SAL requests MCA rendezvous | ||
327 | * in spite of global MCA. | ||
328 | * Therefore it is local MCA when rendezvous has not been requested. | ||
329 | * Failed to rendezvous, the system must be down. | ||
330 | */ | ||
331 | switch (sal_to_os_handoff_state->imsto_rendez_state) { | ||
332 | case -1: /* SAL rendezvous unsuccessful */ | ||
333 | return MCA_IS_GLOBAL; | ||
334 | case 0: /* SAL rendezvous not required */ | ||
335 | return MCA_IS_LOCAL; | ||
336 | case 1: /* SAL rendezvous successful int */ | ||
337 | case 2: /* SAL rendezvous successful int with init */ | ||
338 | default: | ||
339 | break; | ||
340 | } | ||
341 | |||
342 | /* | ||
343 | * If One or more Cache/TLB/Reg_File/Uarch_Check is here, | ||
344 | * it would be a local MCA. (i.e. processor internal error) | ||
345 | */ | ||
346 | if (psp->tc || psp->cc || psp->rc || psp->uc) | ||
347 | return MCA_IS_LOCAL; | ||
348 | |||
349 | /* | ||
350 | * Bus_Check structure with Bus_Check.ib (internal bus error) flag set | ||
351 | * would be a global MCA. (e.g. a system bus address parity error) | ||
352 | */ | ||
353 | if (!pbci || pbci->ib) | ||
354 | return MCA_IS_GLOBAL; | ||
355 | |||
356 | /* | ||
357 | * Bus_Check structure with Bus_Check.eb (external bus error) flag set | ||
358 | * could be either a local MCA or a global MCA. | ||
359 | * | ||
360 | * Referring Bus_Check.bsi: | ||
361 | * 0: Unknown/unclassified | ||
362 | * 1: BERR# | ||
363 | * 2: BINIT# | ||
364 | * 3: Hard Fail | ||
365 | * (FIXME: Are these SGI specific or generic bsi values?) | ||
366 | */ | ||
367 | if (pbci->eb) | ||
368 | switch (pbci->bsi) { | ||
369 | case 0: | ||
370 | /* e.g. a load from poisoned memory */ | ||
371 | return MCA_IS_LOCAL; | ||
372 | case 1: | ||
373 | case 2: | ||
374 | case 3: | ||
375 | return MCA_IS_GLOBAL; | ||
376 | } | ||
377 | |||
378 | return MCA_IS_GLOBAL; | ||
379 | } | ||
380 | |||
381 | /** | ||
382 | * recover_from_read_error - Try to recover the errors which type are "read"s. | ||
383 | * @slidx: pointer of index of SAL error record | ||
384 | * @peidx: pointer of index of processor error section | ||
385 | * @pbci: pointer of pal_bus_check_info | ||
386 | * | ||
387 | * Return value: | ||
388 | * 1 on Success / 0 on Failure | ||
389 | */ | ||
390 | |||
391 | static int | ||
392 | recover_from_read_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci) | ||
393 | { | ||
394 | sal_log_mod_error_info_t *smei; | ||
395 | pal_min_state_area_t *pmsa; | ||
396 | struct ia64_psr *psr1, *psr2; | ||
397 | ia64_fptr_t *mca_hdlr_bh = (ia64_fptr_t*)mca_handler_bhhook; | ||
398 | |||
399 | /* Is target address valid? */ | ||
400 | if (!pbci->tv) | ||
401 | return 0; | ||
402 | |||
403 | /* | ||
404 | * cpu read or memory-mapped io read | ||
405 | * | ||
406 | * offending process affected process OS MCA do | ||
407 | * kernel mode kernel mode down system | ||
408 | * kernel mode user mode kill the process | ||
409 | * user mode kernel mode down system (*) | ||
410 | * user mode user mode kill the process | ||
411 | * | ||
412 | * (*) You could terminate offending user-mode process | ||
413 | * if (pbci->pv && pbci->pl != 0) *and* if you sure | ||
414 | * the process not have any locks of kernel. | ||
415 | */ | ||
416 | |||
417 | psr1 =(struct ia64_psr *)&(peidx_minstate_area(peidx)->pmsa_ipsr); | ||
418 | |||
419 | /* | ||
420 | * Check the privilege level of interrupted context. | ||
421 | * If it is user-mode, then terminate affected process. | ||
422 | */ | ||
423 | if (psr1->cpl != 0) { | ||
424 | smei = peidx_bus_check(peidx, 0); | ||
425 | if (smei->valid.target_identifier) { | ||
426 | /* | ||
427 | * setup for resume to bottom half of MCA, | ||
428 | * "mca_handler_bhhook" | ||
429 | */ | ||
430 | pmsa = (pal_min_state_area_t *)(sal_to_os_handoff_state->pal_min_state | (6ul<<61)); | ||
431 | /* pass to bhhook as 1st argument (gr8) */ | ||
432 | pmsa->pmsa_gr[8-1] = smei->target_identifier; | ||
433 | /* set interrupted return address (but no use) */ | ||
434 | pmsa->pmsa_br0 = pmsa->pmsa_iip; | ||
435 | /* change resume address to bottom half */ | ||
436 | pmsa->pmsa_iip = mca_hdlr_bh->fp; | ||
437 | pmsa->pmsa_gr[1-1] = mca_hdlr_bh->gp; | ||
438 | /* set cpl with kernel mode */ | ||
439 | psr2 = (struct ia64_psr *)&pmsa->pmsa_ipsr; | ||
440 | psr2->cpl = 0; | ||
441 | psr2->ri = 0; | ||
442 | |||
443 | return 1; | ||
444 | } | ||
445 | |||
446 | } | ||
447 | |||
448 | return 0; | ||
449 | } | ||
450 | |||
451 | /** | ||
452 | * recover_from_platform_error - Recover from platform error. | ||
453 | * @slidx: pointer of index of SAL error record | ||
454 | * @peidx: pointer of index of processor error section | ||
455 | * @pbci: pointer of pal_bus_check_info | ||
456 | * | ||
457 | * Return value: | ||
458 | * 1 on Success / 0 on Failure | ||
459 | */ | ||
460 | |||
461 | static int | ||
462 | recover_from_platform_error(slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci) | ||
463 | { | ||
464 | int status = 0; | ||
465 | pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx); | ||
466 | |||
467 | if (psp->bc && pbci->eb && pbci->bsi == 0) { | ||
468 | switch(pbci->type) { | ||
469 | case 1: /* partial read */ | ||
470 | case 3: /* full line(cpu) read */ | ||
471 | case 9: /* I/O space read */ | ||
472 | status = recover_from_read_error(slidx, peidx, pbci); | ||
473 | break; | ||
474 | case 0: /* unknown */ | ||
475 | case 2: /* partial write */ | ||
476 | case 4: /* full line write */ | ||
477 | case 5: /* implicit or explicit write-back operation */ | ||
478 | case 6: /* snoop probe */ | ||
479 | case 7: /* incoming or outgoing ptc.g */ | ||
480 | case 8: /* write coalescing transactions */ | ||
481 | case 10: /* I/O space write */ | ||
482 | case 11: /* inter-processor interrupt message(IPI) */ | ||
483 | case 12: /* interrupt acknowledge or external task priority cycle */ | ||
484 | default: | ||
485 | break; | ||
486 | } | ||
487 | } | ||
488 | |||
489 | return status; | ||
490 | } | ||
491 | |||
492 | /** | ||
493 | * recover_from_processor_error | ||
494 | * @platform: whether there are some platform error section or not | ||
495 | * @slidx: pointer of index of SAL error record | ||
496 | * @peidx: pointer of index of processor error section | ||
497 | * @pbci: pointer of pal_bus_check_info | ||
498 | * | ||
499 | * Return value: | ||
500 | * 1 on Success / 0 on Failure | ||
501 | */ | ||
502 | /* | ||
503 | * Later we try to recover when below all conditions are satisfied. | ||
504 | * 1. Only one processor error section is exist. | ||
505 | * 2. BUS_CHECK is exist and the others are not exist.(Except TLB_CHECK) | ||
506 | * 3. The entry of BUS_CHECK_INFO is 1. | ||
507 | * 4. "External bus error" flag is set and the others are not set. | ||
508 | */ | ||
509 | |||
510 | static int | ||
511 | recover_from_processor_error(int platform, slidx_table_t *slidx, peidx_table_t *peidx, pal_bus_check_info_t *pbci) | ||
512 | { | ||
513 | pal_processor_state_info_t *psp = (pal_processor_state_info_t*)peidx_psp(peidx); | ||
514 | |||
515 | /* | ||
516 | * We cannot recover errors with other than bus_check. | ||
517 | */ | ||
518 | if (psp->cc || psp->rc || psp->uc) | ||
519 | return 0; | ||
520 | |||
521 | /* | ||
522 | * If there is no bus error, record is weird but we need not to recover. | ||
523 | */ | ||
524 | if (psp->bc == 0 || pbci == NULL) | ||
525 | return 1; | ||
526 | |||
527 | /* | ||
528 | * Sorry, we cannot handle so many. | ||
529 | */ | ||
530 | if (peidx_bus_check_num(peidx) > 1) | ||
531 | return 0; | ||
532 | /* | ||
533 | * Well, here is only one bus error. | ||
534 | */ | ||
535 | if (pbci->ib || pbci->cc) | ||
536 | return 0; | ||
537 | if (pbci->eb && pbci->bsi > 0) | ||
538 | return 0; | ||
539 | if (psp->ci == 0) | ||
540 | return 0; | ||
541 | |||
542 | /* | ||
543 | * This is a local MCA and estimated as recoverble external bus error. | ||
544 | * (e.g. a load from poisoned memory) | ||
545 | * This means "there are some platform errors". | ||
546 | */ | ||
547 | if (platform) | ||
548 | return recover_from_platform_error(slidx, peidx, pbci); | ||
549 | /* | ||
550 | * On account of strange SAL error record, we cannot recover. | ||
551 | */ | ||
552 | return 0; | ||
553 | } | ||
554 | |||
555 | /** | ||
556 | * mca_try_to_recover - Try to recover from MCA | ||
557 | * @rec: pointer to a SAL error record | ||
558 | * | ||
559 | * Return value: | ||
560 | * 1 on Success / 0 on Failure | ||
561 | */ | ||
562 | |||
563 | static int | ||
564 | mca_try_to_recover(void *rec, | ||
565 | ia64_mca_sal_to_os_state_t *sal_to_os_state, | ||
566 | ia64_mca_os_to_sal_state_t *os_to_sal_state) | ||
567 | { | ||
568 | int platform_err; | ||
569 | int n_proc_err; | ||
570 | slidx_table_t slidx; | ||
571 | peidx_table_t peidx; | ||
572 | pal_bus_check_info_t pbci; | ||
573 | |||
574 | /* handoff state from/to mca.c */ | ||
575 | sal_to_os_handoff_state = sal_to_os_state; | ||
576 | os_to_sal_handoff_state = os_to_sal_state; | ||
577 | |||
578 | /* Make index of SAL error record */ | ||
579 | platform_err = mca_make_slidx(rec, &slidx); | ||
580 | |||
581 | /* Count processor error sections */ | ||
582 | n_proc_err = slidx_count(&slidx, proc_err); | ||
583 | |||
584 | /* Now, OS can recover when there is one processor error section */ | ||
585 | if (n_proc_err > 1) | ||
586 | return 0; | ||
587 | else if (n_proc_err == 0) { | ||
588 | /* Weird SAL record ... We need not to recover */ | ||
589 | |||
590 | return 1; | ||
591 | } | ||
592 | |||
593 | /* Make index of processor error section */ | ||
594 | mca_make_peidx((sal_log_processor_info_t*)slidx_first_entry(&slidx.proc_err)->hdr, &peidx); | ||
595 | |||
596 | /* Extract Processor BUS_CHECK[0] */ | ||
597 | *((u64*)&pbci) = peidx_check_info(&peidx, bus_check, 0); | ||
598 | |||
599 | /* Check whether MCA is global or not */ | ||
600 | if (is_mca_global(&peidx, &pbci)) | ||
601 | return 0; | ||
602 | |||
603 | /* Try to recover a processor error */ | ||
604 | return recover_from_processor_error(platform_err, &slidx, &peidx, &pbci); | ||
605 | } | ||
606 | |||
607 | /* | ||
608 | * ============================================================================= | ||
609 | */ | ||
610 | |||
611 | int __init mca_external_handler_init(void) | ||
612 | { | ||
613 | if (init_record_index_pools()) | ||
614 | return -ENOMEM; | ||
615 | |||
616 | /* register external mca handlers */ | ||
617 | if (ia64_reg_MCA_extension(mca_try_to_recover)){ | ||
618 | printk(KERN_ERR "ia64_reg_MCA_extension failed.\n"); | ||
619 | kfree(slidx_pool.buffer); | ||
620 | return -EFAULT; | ||
621 | } | ||
622 | return 0; | ||
623 | } | ||
624 | |||
625 | void __exit mca_external_handler_exit(void) | ||
626 | { | ||
627 | /* unregister external mca handlers */ | ||
628 | ia64_unreg_MCA_extension(); | ||
629 | kfree(slidx_pool.buffer); | ||
630 | } | ||
631 | |||
632 | module_init(mca_external_handler_init); | ||
633 | module_exit(mca_external_handler_exit); | ||
634 | |||
635 | module_param(sal_rec_max, int, 0644); | ||
636 | MODULE_PARM_DESC(sal_rec_max, "Max size of SAL error record"); | ||
637 | |||
638 | MODULE_DESCRIPTION("ia64 platform dependent mca handler driver"); | ||
639 | MODULE_LICENSE("GPL"); | ||