diff options
author | Gavin Shan <gwshan@linux.vnet.ibm.com> | 2014-11-22 05:58:09 -0500 |
---|---|---|
committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2014-12-01 19:03:26 -0500 |
commit | a450e8f55a57d049ac3afe218f06567e12d6b4f5 (patch) | |
tree | 5d3dbf3db60f86f9cb737bf9cbd41d507720b9a8 | |
parent | b1d76a7d57762332cd8e0c020470d43c5ad3948e (diff) |
powerpc/eeh: Dump PHB diag-data early
On PowerNV platform, PHB diag-data is dumped after stopping device
drivers. In case of recursive EEH errors, the kernel is usually
crashed before dumping PHB diag-data for the second EEH error. It's
hard to locate the root cause of the second EEH error without PHB
diag-data.
The patch adds one more EEH option "eeh=early_log", which helps
dumping PHB diag-data immediately once frozen PE is detected, in
order to get the PHB diag-data for the second EEH error.
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r-- | arch/powerpc/include/asm/eeh.h | 1 | ||||
-rw-r--r-- | arch/powerpc/kernel/eeh.c | 2 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/eeh-ioda.c | 13 |
3 files changed, 15 insertions, 1 deletions
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h index 2e633b41712a..0652ebe117af 100644 --- a/arch/powerpc/include/asm/eeh.h +++ b/arch/powerpc/include/asm/eeh.h | |||
@@ -39,6 +39,7 @@ struct device_node; | |||
39 | #define EEH_PROBE_MODE_DEV 0x04 /* From PCI device */ | 39 | #define EEH_PROBE_MODE_DEV 0x04 /* From PCI device */ |
40 | #define EEH_PROBE_MODE_DEVTREE 0x08 /* From device tree */ | 40 | #define EEH_PROBE_MODE_DEVTREE 0x08 /* From device tree */ |
41 | #define EEH_ENABLE_IO_FOR_LOG 0x10 /* Enable IO for log */ | 41 | #define EEH_ENABLE_IO_FOR_LOG 0x10 /* Enable IO for log */ |
42 | #define EEH_EARLY_DUMP_LOG 0x20 /* Dump log immediately */ | ||
42 | 43 | ||
43 | /* | 44 | /* |
44 | * Delay for PE reset, all in ms | 45 | * Delay for PE reset, all in ms |
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index f1c6b115cb37..05be77d9ea0e 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c | |||
@@ -143,6 +143,8 @@ static int __init eeh_setup(char *str) | |||
143 | { | 143 | { |
144 | if (!strcmp(str, "off")) | 144 | if (!strcmp(str, "off")) |
145 | eeh_add_flag(EEH_FORCE_DISABLED); | 145 | eeh_add_flag(EEH_FORCE_DISABLED); |
146 | else if (!strcmp(str, "early_log")) | ||
147 | eeh_add_flag(EEH_EARLY_DUMP_LOG); | ||
146 | 148 | ||
147 | return 1; | 149 | return 1; |
148 | } | 150 | } |
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index fb38fe4dba89..2809c9895288 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c | |||
@@ -353,6 +353,9 @@ static int ioda_eeh_get_phb_state(struct eeh_pe *pe) | |||
353 | } else if (!(pe->state & EEH_PE_ISOLATED)) { | 353 | } else if (!(pe->state & EEH_PE_ISOLATED)) { |
354 | eeh_pe_state_mark(pe, EEH_PE_ISOLATED); | 354 | eeh_pe_state_mark(pe, EEH_PE_ISOLATED); |
355 | ioda_eeh_phb_diag(pe); | 355 | ioda_eeh_phb_diag(pe); |
356 | |||
357 | if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) | ||
358 | pnv_pci_dump_phb_diag_data(pe->phb, pe->data); | ||
356 | } | 359 | } |
357 | 360 | ||
358 | return result; | 361 | return result; |
@@ -451,6 +454,9 @@ static int ioda_eeh_get_pe_state(struct eeh_pe *pe) | |||
451 | 454 | ||
452 | eeh_pe_state_mark(pe, EEH_PE_ISOLATED); | 455 | eeh_pe_state_mark(pe, EEH_PE_ISOLATED); |
453 | ioda_eeh_phb_diag(pe); | 456 | ioda_eeh_phb_diag(pe); |
457 | |||
458 | if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) | ||
459 | pnv_pci_dump_phb_diag_data(pe->phb, pe->data); | ||
454 | } | 460 | } |
455 | 461 | ||
456 | return result; | 462 | return result; |
@@ -730,7 +736,8 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option) | |||
730 | static int ioda_eeh_get_log(struct eeh_pe *pe, int severity, | 736 | static int ioda_eeh_get_log(struct eeh_pe *pe, int severity, |
731 | char *drv_log, unsigned long len) | 737 | char *drv_log, unsigned long len) |
732 | { | 738 | { |
733 | pnv_pci_dump_phb_diag_data(pe->phb, pe->data); | 739 | if (!eeh_has_flag(EEH_EARLY_DUMP_LOG)) |
740 | pnv_pci_dump_phb_diag_data(pe->phb, pe->data); | ||
734 | 741 | ||
735 | return 0; | 742 | return 0; |
736 | } | 743 | } |
@@ -1086,6 +1093,10 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) | |||
1086 | !((*pe)->state & EEH_PE_ISOLATED)) { | 1093 | !((*pe)->state & EEH_PE_ISOLATED)) { |
1087 | eeh_pe_state_mark(*pe, EEH_PE_ISOLATED); | 1094 | eeh_pe_state_mark(*pe, EEH_PE_ISOLATED); |
1088 | ioda_eeh_phb_diag(*pe); | 1095 | ioda_eeh_phb_diag(*pe); |
1096 | |||
1097 | if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) | ||
1098 | pnv_pci_dump_phb_diag_data((*pe)->phb, | ||
1099 | (*pe)->data); | ||
1089 | } | 1100 | } |
1090 | 1101 | ||
1091 | /* | 1102 | /* |