aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc
diff options
context:
space:
mode:
authorGavin Shan <shangw@linux.vnet.ibm.com>2013-06-27 01:46:46 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2013-06-30 21:10:33 -0400
commit56ca4fde90009094b1a46971de3879d5f2dd724e (patch)
tree2a49909519e9e41e0dd8d13ecdc82bae6dbfd75e /arch/powerpc
parent88b6d14b2bb48ea4f66fedfe671f98544395b305 (diff)
powerpc/eeh: Refactor the output message
We needn't the the whole backtrace other than one-line message in the error reporting interrupt handler. For errors triggered by access PCI config space or MMIO, we replace "WARN(1, ...)" with pr_err() and dump_stack(). The patch also adds more output messages to indicate what EEH core is doing. Besides, some printk() are replaced with pr_warning(). Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc')
-rw-r--r--arch/powerpc/kernel/eeh.c9
-rw-r--r--arch/powerpc/kernel/eeh_driver.c23
-rw-r--r--arch/powerpc/platforms/powernv/eeh-ioda.c25
3 files changed, 41 insertions, 16 deletions
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 416fb432d7e2..3a8f82fd9005 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -329,7 +329,9 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
329 eeh_serialize_unlock(flags); 329 eeh_serialize_unlock(flags);
330 eeh_send_failure_event(phb_pe); 330 eeh_send_failure_event(phb_pe);
331 331
332 WARN(1, "EEH: PHB failure detected\n"); 332 pr_err("EEH: PHB#%x failure detected\n",
333 phb_pe->phb->global_number);
334 dump_stack();
333 335
334 return 1; 336 return 1;
335out: 337out:
@@ -458,7 +460,10 @@ int eeh_dev_check_failure(struct eeh_dev *edev)
458 * a stack trace will help the device-driver authors figure 460 * a stack trace will help the device-driver authors figure
459 * out what happened. So print that out. 461 * out what happened. So print that out.
460 */ 462 */
461 WARN(1, "EEH: failure detected\n"); 463 pr_err("EEH: Frozen PE#%x detected on PHB#%x\n",
464 pe->addr, pe->phb->global_number);
465 dump_stack();
466
462 return 1; 467 return 1;
463 468
464dn_unlock: 469dn_unlock:
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 0974e1326842..2b1ce17cae50 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -425,6 +425,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
425 * status ... if any child can't handle the reset, then the entire 425 * status ... if any child can't handle the reset, then the entire
426 * slot is dlpar removed and added. 426 * slot is dlpar removed and added.
427 */ 427 */
428 pr_info("EEH: Notify device drivers to shutdown\n");
428 eeh_pe_dev_traverse(pe, eeh_report_error, &result); 429 eeh_pe_dev_traverse(pe, eeh_report_error, &result);
429 430
430 /* Get the current PCI slot state. This can take a long time, 431 /* Get the current PCI slot state. This can take a long time,
@@ -432,7 +433,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
432 */ 433 */
433 rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000); 434 rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
434 if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { 435 if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
435 printk(KERN_WARNING "EEH: Permanent failure\n"); 436 pr_warning("EEH: Permanent failure\n");
436 goto hard_fail; 437 goto hard_fail;
437 } 438 }
438 439
@@ -440,6 +441,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
440 * don't post the error log until after all dev drivers 441 * don't post the error log until after all dev drivers
441 * have been informed. 442 * have been informed.
442 */ 443 */
444 pr_info("EEH: Collect temporary log\n");
443 eeh_slot_error_detail(pe, EEH_LOG_TEMP); 445 eeh_slot_error_detail(pe, EEH_LOG_TEMP);
444 446
445 /* If all device drivers were EEH-unaware, then shut 447 /* If all device drivers were EEH-unaware, then shut
@@ -447,15 +449,18 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
447 * go down willingly, without panicing the system. 449 * go down willingly, without panicing the system.
448 */ 450 */
449 if (result == PCI_ERS_RESULT_NONE) { 451 if (result == PCI_ERS_RESULT_NONE) {
452 pr_info("EEH: Reset with hotplug activity\n");
450 rc = eeh_reset_device(pe, frozen_bus); 453 rc = eeh_reset_device(pe, frozen_bus);
451 if (rc) { 454 if (rc) {
452 printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc); 455 pr_warning("%s: Unable to reset, err=%d\n",
456 __func__, rc);
453 goto hard_fail; 457 goto hard_fail;
454 } 458 }
455 } 459 }
456 460
457 /* If all devices reported they can proceed, then re-enable MMIO */ 461 /* If all devices reported they can proceed, then re-enable MMIO */
458 if (result == PCI_ERS_RESULT_CAN_RECOVER) { 462 if (result == PCI_ERS_RESULT_CAN_RECOVER) {
463 pr_info("EEH: Enable I/O for affected devices\n");
459 rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 464 rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
460 465
461 if (rc < 0) 466 if (rc < 0)
@@ -463,6 +468,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
463 if (rc) { 468 if (rc) {
464 result = PCI_ERS_RESULT_NEED_RESET; 469 result = PCI_ERS_RESULT_NEED_RESET;
465 } else { 470 } else {
471 pr_info("EEH: Notify device drivers to resume I/O\n");
466 result = PCI_ERS_RESULT_NONE; 472 result = PCI_ERS_RESULT_NONE;
467 eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result); 473 eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result);
468 } 474 }
@@ -470,6 +476,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
470 476
471 /* If all devices reported they can proceed, then re-enable DMA */ 477 /* If all devices reported they can proceed, then re-enable DMA */
472 if (result == PCI_ERS_RESULT_CAN_RECOVER) { 478 if (result == PCI_ERS_RESULT_CAN_RECOVER) {
479 pr_info("EEH: Enabled DMA for affected devices\n");
473 rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); 480 rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
474 481
475 if (rc < 0) 482 if (rc < 0)
@@ -482,17 +489,22 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
482 489
483 /* If any device has a hard failure, then shut off everything. */ 490 /* If any device has a hard failure, then shut off everything. */
484 if (result == PCI_ERS_RESULT_DISCONNECT) { 491 if (result == PCI_ERS_RESULT_DISCONNECT) {
485 printk(KERN_WARNING "EEH: Device driver gave up\n"); 492 pr_warning("EEH: Device driver gave up\n");
486 goto hard_fail; 493 goto hard_fail;
487 } 494 }
488 495
489 /* If any device called out for a reset, then reset the slot */ 496 /* If any device called out for a reset, then reset the slot */
490 if (result == PCI_ERS_RESULT_NEED_RESET) { 497 if (result == PCI_ERS_RESULT_NEED_RESET) {
498 pr_info("EEH: Reset without hotplug activity\n");
491 rc = eeh_reset_device(pe, NULL); 499 rc = eeh_reset_device(pe, NULL);
492 if (rc) { 500 if (rc) {
493 printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc); 501 pr_warning("%s: Cannot reset, err=%d\n",
502 __func__, rc);
494 goto hard_fail; 503 goto hard_fail;
495 } 504 }
505
506 pr_info("EEH: Notify device drivers "
507 "the completion of reset\n");
496 result = PCI_ERS_RESULT_NONE; 508 result = PCI_ERS_RESULT_NONE;
497 eeh_pe_dev_traverse(pe, eeh_report_reset, &result); 509 eeh_pe_dev_traverse(pe, eeh_report_reset, &result);
498 } 510 }
@@ -500,11 +512,12 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
500 /* All devices should claim they have recovered by now. */ 512 /* All devices should claim they have recovered by now. */
501 if ((result != PCI_ERS_RESULT_RECOVERED) && 513 if ((result != PCI_ERS_RESULT_RECOVERED) &&
502 (result != PCI_ERS_RESULT_NONE)) { 514 (result != PCI_ERS_RESULT_NONE)) {
503 printk(KERN_WARNING "EEH: Not recovered\n"); 515 pr_warning("EEH: Not recovered\n");
504 goto hard_fail; 516 goto hard_fail;
505 } 517 }
506 518
507 /* Tell all device drivers that they can resume operations */ 519 /* Tell all device drivers that they can resume operations */
520 pr_info("EEH: Notify device driver to resume\n");
508 eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); 521 eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
509 522
510 return; 523 return;
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c
index 85025d7e6396..0cd1c4a71755 100644
--- a/arch/powerpc/platforms/powernv/eeh-ioda.c
+++ b/arch/powerpc/platforms/powernv/eeh-ioda.c
@@ -853,11 +853,14 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
853 phb->eeh_state |= PNV_EEH_STATE_REMOVED; 853 phb->eeh_state |= PNV_EEH_STATE_REMOVED;
854 } 854 }
855 855
856 WARN(1, "EEH: dead IOC detected\n"); 856 pr_err("EEH: dead IOC detected\n");
857 ret = 4; 857 ret = 4;
858 goto out; 858 goto out;
859 } else if (severity == OPAL_EEH_SEV_INF) 859 } else if (severity == OPAL_EEH_SEV_INF) {
860 pr_info("EEH: IOC informative error "
861 "detected\n");
860 ioda_eeh_hub_diag(hose); 862 ioda_eeh_hub_diag(hose);
863 }
861 864
862 break; 865 break;
863 case OPAL_EEH_PHB_ERROR: 866 case OPAL_EEH_PHB_ERROR:
@@ -865,8 +868,8 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
865 if (ioda_eeh_get_phb_pe(hose, pe)) 868 if (ioda_eeh_get_phb_pe(hose, pe))
866 break; 869 break;
867 870
868 WARN(1, "EEH: dead PHB#%x detected\n", 871 pr_err("EEH: dead PHB#%x detected\n",
869 hose->global_number); 872 hose->global_number);
870 phb->eeh_state |= PNV_EEH_STATE_REMOVED; 873 phb->eeh_state |= PNV_EEH_STATE_REMOVED;
871 ret = 3; 874 ret = 3;
872 goto out; 875 goto out;
@@ -874,20 +877,24 @@ static int ioda_eeh_next_error(struct eeh_pe **pe)
874 if (ioda_eeh_get_phb_pe(hose, pe)) 877 if (ioda_eeh_get_phb_pe(hose, pe))
875 break; 878 break;
876 879
877 WARN(1, "EEH: fenced PHB#%x detected\n", 880 pr_err("EEH: fenced PHB#%x detected\n",
878 hose->global_number); 881 hose->global_number);
879 ret = 2; 882 ret = 2;
880 goto out; 883 goto out;
881 } else if (severity == OPAL_EEH_SEV_INF) 884 } else if (severity == OPAL_EEH_SEV_INF) {
885 pr_info("EEH: PHB#%x informative error "
886 "detected\n",
887 hose->global_number);
882 ioda_eeh_phb_diag(hose); 888 ioda_eeh_phb_diag(hose);
889 }
883 890
884 break; 891 break;
885 case OPAL_EEH_PE_ERROR: 892 case OPAL_EEH_PE_ERROR:
886 if (ioda_eeh_get_pe(hose, frozen_pe_no, pe)) 893 if (ioda_eeh_get_pe(hose, frozen_pe_no, pe))
887 break; 894 break;
888 895
889 WARN(1, "EEH: Frozen PE#%x on PHB#%x detected\n", 896 pr_err("EEH: Frozen PE#%x on PHB#%x detected\n",
890 (*pe)->addr, (*pe)->phb->global_number); 897 (*pe)->addr, (*pe)->phb->global_number);
891 ret = 1; 898 ret = 1;
892 goto out; 899 goto out;
893 } 900 }