aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
authorGavin Shan <shangw@linux.vnet.ibm.com>2014-02-25 02:28:37 -0500
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2014-02-28 02:43:10 -0500
commit947166043732b69878123bf31f51933ad0316080 (patch)
tree763a371720af25cf16aec333bf1c4bae1de1ff6c /arch
parent573ebfa6601fa58b439e7f15828762839ccd306a (diff)
powerpc/powernv: Dump PHB diag-data immediately
The PHB diag-data is important to help locating the root cause for EEH errors such as frozen PE or fenced PHB. However, the EEH core enables IO path by clearing part of HW registers before collecting this data causing it to be corrupted. This patch fixes this by dumping the PHB diag-data immediately when frozen/fenced state on PE or PHB is detected for the first time in eeh_ops::get_state() or next_error() backend. Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com> CC: <stable@vger.kernel.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/platforms/powernv/eeh-ioda.c96
1 files changed, 43 insertions, 53 deletions
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index f51474336460..253fefe3d1a0 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -114,6 +114,7 @@ DEFINE_SIMPLE_ATTRIBUTE(ioda_eeh_inbB_dbgfs_ops, ioda_eeh_inbB_dbgfs_get,
114 ioda_eeh_inbB_dbgfs_set, "0x%llx\n"); 114 ioda_eeh_inbB_dbgfs_set, "0x%llx\n");
115#endif /* CONFIG_DEBUG_FS */ 115#endif /* CONFIG_DEBUG_FS */
116 116
117
117/** 118/**
118 * ioda_eeh_post_init - Chip dependent post initialization 119 * ioda_eeh_post_init - Chip dependent post initialization
119 * @hose: PCI controller 120 * @hose: PCI controller
@@ -221,6 +222,22 @@ static int ioda_eeh_set_option(struct eeh_pe *pe, int option)
221 return ret; 222 return ret;
222} 223}
223 224
225static void ioda_eeh_phb_diag(struct pci_controller *hose)
226{
227 struct pnv_phb *phb = hose->private_data;
228 long rc;
229
230 rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
231 PNV_PCI_DIAG_BUF_SIZE);
232 if (rc != OPAL_SUCCESS) {
233 pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n",
234 __func__, hose->global_number, rc);
235 return;
236 }
237
238 pnv_pci_dump_phb_diag_data(hose, phb->diag.blob);
239}
240
224/** 241/**
225 * ioda_eeh_get_state - Retrieve the state of PE 242 * ioda_eeh_get_state - Retrieve the state of PE
226 * @pe: EEH PE 243 * @pe: EEH PE
@@ -272,6 +289,9 @@ static int ioda_eeh_get_state(struct eeh_pe *pe)
272 result |= EEH_STATE_DMA_ACTIVE; 289 result |= EEH_STATE_DMA_ACTIVE;
273 result |= EEH_STATE_MMIO_ENABLED; 290 result |= EEH_STATE_MMIO_ENABLED;
274 result |= EEH_STATE_DMA_ENABLED; 291 result |= EEH_STATE_DMA_ENABLED;
292 } else if (!(pe->state & EEH_PE_ISOLATED)) {
293 eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
294 ioda_eeh_phb_diag(hose);
275 } 295 }
276 296
277 return result; 297 return result;
@@ -315,6 +335,15 @@ static int ioda_eeh_get_state(struct eeh_pe *pe)
315 __func__, fstate, hose->global_number, pe_no); 335 __func__, fstate, hose->global_number, pe_no);
316 } 336 }
317 337
338 /* Dump PHB diag-data for frozen PE */
339 if (result != EEH_STATE_NOT_SUPPORT &&
340 (result & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) !=
341 (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE) &&
342 !(pe->state & EEH_PE_ISOLATED)) {
343 eeh_pe_state_mark(pe, EEH_PE_ISOLATED);
344 ioda_eeh_phb_diag(hose);
345 }
346
318 return result; 347 return result;
319} 348}
320 349
@@ -530,42 +559,6 @@ static int ioda_eeh_reset(struct eeh_pe *pe, int option)
530} 559}
531 560
532/** 561/**
533 * ioda_eeh_get_log - Retrieve error log
534 * @pe: EEH PE
535 * @severity: Severity level of the log
536 * @drv_log: buffer to store the log
537 * @len: space of the log buffer
538 *
539 * The function is used to retrieve error log from P7IOC.
540 */
541static int ioda_eeh_get_log(struct eeh_pe *pe, int severity,
542 char *drv_log, unsigned long len)
543{
544 s64 ret;
545 unsigned long flags;
546 struct pci_controller *hose = pe->phb;
547 struct pnv_phb *phb = hose->private_data;
548
549 spin_lock_irqsave(&phb->lock, flags);
550
551 ret = opal_pci_get_phb_diag_data2(phb->opal_id,
552 phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE);
553 if (ret) {
554 spin_unlock_irqrestore(&phb->lock, flags);
555 pr_warning("%s: Can't get log for PHB#%x-PE#%x (%lld)\n",
556 __func__, hose->global_number, pe->addr, ret);
557 return -EIO;
558 }
559
560 /* The PHB diag-data is always indicative */
561 pnv_pci_dump_phb_diag_data(hose, phb->diag.blob);
562
563 spin_unlock_irqrestore(&phb->lock, flags);
564
565 return 0;
566}
567
568/**
569 * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE 562 * ioda_eeh_configure_bridge - Configure the PCI bridges for the indicated PE
570 * @pe: EEH PE 563 * @pe: EEH PE
571 * 564 *
@@ -646,22 +639,6 @@ static void ioda_eeh_hub_diag(struct pci_controller *hose)
646 } 639 }
647} 640}
648 641
649static void ioda_eeh_phb_diag(struct pci_controller *hose)
650{
651 struct pnv_phb *phb = hose->private_data;
652 long rc;
653
654 rc = opal_pci_get_phb_diag_data2(phb->opal_id, phb->diag.blob,
655 PNV_PCI_DIAG_BUF_SIZE);
656 if (rc != OPAL_SUCCESS) {
657 pr_warning("%s: Failed to get diag-data for PHB#%x (%ld)\n",
658 __func__, hose->global_number, rc);
659 return;
660 }
661
662 pnv_pci_dump_phb_diag_data(hose, phb->diag.blob);
663}
664
665static int ioda_eeh_get_phb_pe(struct pci_controller *hose, 642static int ioda_eeh_get_phb_pe(struct pci_controller *hose,
666 struct eeh_pe **pe) 643 struct eeh_pe **pe)
667{ 644{
@@ -835,6 +812,20 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
835 } 812 }
836 813
837 /* 814 /*
815 * EEH core will try recover from fenced PHB or
816 * frozen PE. In the time for frozen PE, EEH core
817 * enable IO path for that before collecting logs,
818 * but it ruins the site. So we have to dump the
819 * log in advance here.
820 */
821 if ((ret == EEH_NEXT_ERR_FROZEN_PE ||
822 ret == EEH_NEXT_ERR_FENCED_PHB) &&
823 !((*pe)->state & EEH_PE_ISOLATED)) {
824 eeh_pe_state_mark(*pe, EEH_PE_ISOLATED);
825 ioda_eeh_phb_diag(hose);
826 }
827
828 /*
838 * If we have no errors on the specific PHB or only 829 * If we have no errors on the specific PHB or only
839 * informative error there, we continue poking it. 830 * informative error there, we continue poking it.
840 * Otherwise, we need actions to be taken by upper 831 * Otherwise, we need actions to be taken by upper
@@ -852,7 +843,6 @@ struct pnv_eeh_ops ioda_eeh_ops = {
852 .set_option = ioda_eeh_set_option, 843 .set_option = ioda_eeh_set_option,
853 .get_state = ioda_eeh_get_state, 844 .get_state = ioda_eeh_get_state,
854 .reset = ioda_eeh_reset, 845 .reset = ioda_eeh_reset,
855 .get_log = ioda_eeh_get_log,
856 .configure_bridge = ioda_eeh_configure_bridge, 846 .configure_bridge = ioda_eeh_configure_bridge,
857 .next_error = ioda_eeh_next_error 847 .next_error = ioda_eeh_next_error
858}; 848};