diff options
author | Gavin Shan <shangw@linux.vnet.ibm.com> | 2013-06-27 01:46:46 -0400 |
---|---|---|
committer | Benjamin Herrenschmidt <benh@kernel.crashing.org> | 2013-06-30 21:10:33 -0400 |
commit | 56ca4fde90009094b1a46971de3879d5f2dd724e (patch) | |
tree | 2a49909519e9e41e0dd8d13ecdc82bae6dbfd75e /arch/powerpc | |
parent | 88b6d14b2bb48ea4f66fedfe671f98544395b305 (diff) |
powerpc/eeh: Refactor the output message
We needn't the the whole backtrace other than one-line message in
the error reporting interrupt handler. For errors triggered by
access PCI config space or MMIO, we replace "WARN(1, ...)" with
pr_err() and dump_stack(). The patch also adds more output messages
to indicate what EEH core is doing. Besides, some printk() are
replaced with pr_warning().
Signed-off-by: Gavin Shan <shangw@linux.vnet.ibm.com>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Diffstat (limited to 'arch/powerpc')
-rw-r--r-- | arch/powerpc/kernel/eeh.c | 9 | ||||
-rw-r--r-- | arch/powerpc/kernel/eeh_driver.c | 23 | ||||
-rw-r--r-- | arch/powerpc/platforms/powernv/eeh-ioda.c | 25 |
3 files changed, 41 insertions, 16 deletions
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c index 416fb432d7e2..3a8f82fd9005 100644 --- a/arch/powerpc/kernel/eeh.c +++ b/arch/powerpc/kernel/eeh.c | |||
@@ -329,7 +329,9 @@ static int eeh_phb_check_failure(struct eeh_pe *pe) | |||
329 | eeh_serialize_unlock(flags); | 329 | eeh_serialize_unlock(flags); |
330 | eeh_send_failure_event(phb_pe); | 330 | eeh_send_failure_event(phb_pe); |
331 | 331 | ||
332 | WARN(1, "EEH: PHB failure detected\n"); | 332 | pr_err("EEH: PHB#%x failure detected\n", |
333 | phb_pe->phb->global_number); | ||
334 | dump_stack(); | ||
333 | 335 | ||
334 | return 1; | 336 | return 1; |
335 | out: | 337 | out: |
@@ -458,7 +460,10 @@ int eeh_dev_check_failure(struct eeh_dev *edev) | |||
458 | * a stack trace will help the device-driver authors figure | 460 | * a stack trace will help the device-driver authors figure |
459 | * out what happened. So print that out. | 461 | * out what happened. So print that out. |
460 | */ | 462 | */ |
461 | WARN(1, "EEH: failure detected\n"); | 463 | pr_err("EEH: Frozen PE#%x detected on PHB#%x\n", |
464 | pe->addr, pe->phb->global_number); | ||
465 | dump_stack(); | ||
466 | |||
462 | return 1; | 467 | return 1; |
463 | 468 | ||
464 | dn_unlock: | 469 | dn_unlock: |
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c index 0974e1326842..2b1ce17cae50 100644 --- a/arch/powerpc/kernel/eeh_driver.c +++ b/arch/powerpc/kernel/eeh_driver.c | |||
@@ -425,6 +425,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) | |||
425 | * status ... if any child can't handle the reset, then the entire | 425 | * status ... if any child can't handle the reset, then the entire |
426 | * slot is dlpar removed and added. | 426 | * slot is dlpar removed and added. |
427 | */ | 427 | */ |
428 | pr_info("EEH: Notify device drivers to shutdown\n"); | ||
428 | eeh_pe_dev_traverse(pe, eeh_report_error, &result); | 429 | eeh_pe_dev_traverse(pe, eeh_report_error, &result); |
429 | 430 | ||
430 | /* Get the current PCI slot state. This can take a long time, | 431 | /* Get the current PCI slot state. This can take a long time, |
@@ -432,7 +433,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) | |||
432 | */ | 433 | */ |
433 | rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000); | 434 | rc = eeh_ops->wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000); |
434 | if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { | 435 | if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { |
435 | printk(KERN_WARNING "EEH: Permanent failure\n"); | 436 | pr_warning("EEH: Permanent failure\n"); |
436 | goto hard_fail; | 437 | goto hard_fail; |
437 | } | 438 | } |
438 | 439 | ||
@@ -440,6 +441,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) | |||
440 | * don't post the error log until after all dev drivers | 441 | * don't post the error log until after all dev drivers |
441 | * have been informed. | 442 | * have been informed. |
442 | */ | 443 | */ |
444 | pr_info("EEH: Collect temporary log\n"); | ||
443 | eeh_slot_error_detail(pe, EEH_LOG_TEMP); | 445 | eeh_slot_error_detail(pe, EEH_LOG_TEMP); |
444 | 446 | ||
445 | /* If all device drivers were EEH-unaware, then shut | 447 | /* If all device drivers were EEH-unaware, then shut |
@@ -447,15 +449,18 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) | |||
447 | * go down willingly, without panicing the system. | 449 | * go down willingly, without panicing the system. |
448 | */ | 450 | */ |
449 | if (result == PCI_ERS_RESULT_NONE) { | 451 | if (result == PCI_ERS_RESULT_NONE) { |
452 | pr_info("EEH: Reset with hotplug activity\n"); | ||
450 | rc = eeh_reset_device(pe, frozen_bus); | 453 | rc = eeh_reset_device(pe, frozen_bus); |
451 | if (rc) { | 454 | if (rc) { |
452 | printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc); | 455 | pr_warning("%s: Unable to reset, err=%d\n", |
456 | __func__, rc); | ||
453 | goto hard_fail; | 457 | goto hard_fail; |
454 | } | 458 | } |
455 | } | 459 | } |
456 | 460 | ||
457 | /* If all devices reported they can proceed, then re-enable MMIO */ | 461 | /* If all devices reported they can proceed, then re-enable MMIO */ |
458 | if (result == PCI_ERS_RESULT_CAN_RECOVER) { | 462 | if (result == PCI_ERS_RESULT_CAN_RECOVER) { |
463 | pr_info("EEH: Enable I/O for affected devices\n"); | ||
459 | rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); | 464 | rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); |
460 | 465 | ||
461 | if (rc < 0) | 466 | if (rc < 0) |
@@ -463,6 +468,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) | |||
463 | if (rc) { | 468 | if (rc) { |
464 | result = PCI_ERS_RESULT_NEED_RESET; | 469 | result = PCI_ERS_RESULT_NEED_RESET; |
465 | } else { | 470 | } else { |
471 | pr_info("EEH: Notify device drivers to resume I/O\n"); | ||
466 | result = PCI_ERS_RESULT_NONE; | 472 | result = PCI_ERS_RESULT_NONE; |
467 | eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result); | 473 | eeh_pe_dev_traverse(pe, eeh_report_mmio_enabled, &result); |
468 | } | 474 | } |
@@ -470,6 +476,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) | |||
470 | 476 | ||
471 | /* If all devices reported they can proceed, then re-enable DMA */ | 477 | /* If all devices reported they can proceed, then re-enable DMA */ |
472 | if (result == PCI_ERS_RESULT_CAN_RECOVER) { | 478 | if (result == PCI_ERS_RESULT_CAN_RECOVER) { |
479 | pr_info("EEH: Enabled DMA for affected devices\n"); | ||
473 | rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); | 480 | rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); |
474 | 481 | ||
475 | if (rc < 0) | 482 | if (rc < 0) |
@@ -482,17 +489,22 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) | |||
482 | 489 | ||
483 | /* If any device has a hard failure, then shut off everything. */ | 490 | /* If any device has a hard failure, then shut off everything. */ |
484 | if (result == PCI_ERS_RESULT_DISCONNECT) { | 491 | if (result == PCI_ERS_RESULT_DISCONNECT) { |
485 | printk(KERN_WARNING "EEH: Device driver gave up\n"); | 492 | pr_warning("EEH: Device driver gave up\n"); |
486 | goto hard_fail; | 493 | goto hard_fail; |
487 | } | 494 | } |
488 | 495 | ||
489 | /* If any device called out for a reset, then reset the slot */ | 496 | /* If any device called out for a reset, then reset the slot */ |
490 | if (result == PCI_ERS_RESULT_NEED_RESET) { | 497 | if (result == PCI_ERS_RESULT_NEED_RESET) { |
498 | pr_info("EEH: Reset without hotplug activity\n"); | ||
491 | rc = eeh_reset_device(pe, NULL); | 499 | rc = eeh_reset_device(pe, NULL); |
492 | if (rc) { | 500 | if (rc) { |
493 | printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc); | 501 | pr_warning("%s: Cannot reset, err=%d\n", |
502 | __func__, rc); | ||
494 | goto hard_fail; | 503 | goto hard_fail; |
495 | } | 504 | } |
505 | |||
506 | pr_info("EEH: Notify device drivers " | ||
507 | "the completion of reset\n"); | ||
496 | result = PCI_ERS_RESULT_NONE; | 508 | result = PCI_ERS_RESULT_NONE; |
497 | eeh_pe_dev_traverse(pe, eeh_report_reset, &result); | 509 | eeh_pe_dev_traverse(pe, eeh_report_reset, &result); |
498 | } | 510 | } |
@@ -500,11 +512,12 @@ static void eeh_handle_normal_event(struct eeh_pe *pe) | |||
500 | /* All devices should claim they have recovered by now. */ | 512 | /* All devices should claim they have recovered by now. */ |
501 | if ((result != PCI_ERS_RESULT_RECOVERED) && | 513 | if ((result != PCI_ERS_RESULT_RECOVERED) && |
502 | (result != PCI_ERS_RESULT_NONE)) { | 514 | (result != PCI_ERS_RESULT_NONE)) { |
503 | printk(KERN_WARNING "EEH: Not recovered\n"); | 515 | pr_warning("EEH: Not recovered\n"); |
504 | goto hard_fail; | 516 | goto hard_fail; |
505 | } | 517 | } |
506 | 518 | ||
507 | /* Tell all device drivers that they can resume operations */ | 519 | /* Tell all device drivers that they can resume operations */ |
520 | pr_info("EEH: Notify device driver to resume\n"); | ||
508 | eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); | 521 | eeh_pe_dev_traverse(pe, eeh_report_resume, NULL); |
509 | 522 | ||
510 | return; | 523 | return; |
diff --git a/arch/powerpc/platforms/powernv/eeh-ioda.c b/arch/powerpc/platforms/powernv/eeh-ioda.c index 85025d7e6396..0cd1c4a71755 100644 --- a/arch/powerpc/platforms/powernv/eeh-ioda.c +++ b/arch/powerpc/platforms/powernv/eeh-ioda.c | |||
@@ -853,11 +853,14 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) | |||
853 | phb->eeh_state |= PNV_EEH_STATE_REMOVED; | 853 | phb->eeh_state |= PNV_EEH_STATE_REMOVED; |
854 | } | 854 | } |
855 | 855 | ||
856 | WARN(1, "EEH: dead IOC detected\n"); | 856 | pr_err("EEH: dead IOC detected\n"); |
857 | ret = 4; | 857 | ret = 4; |
858 | goto out; | 858 | goto out; |
859 | } else if (severity == OPAL_EEH_SEV_INF) | 859 | } else if (severity == OPAL_EEH_SEV_INF) { |
860 | pr_info("EEH: IOC informative error " | ||
861 | "detected\n"); | ||
860 | ioda_eeh_hub_diag(hose); | 862 | ioda_eeh_hub_diag(hose); |
863 | } | ||
861 | 864 | ||
862 | break; | 865 | break; |
863 | case OPAL_EEH_PHB_ERROR: | 866 | case OPAL_EEH_PHB_ERROR: |
@@ -865,8 +868,8 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) | |||
865 | if (ioda_eeh_get_phb_pe(hose, pe)) | 868 | if (ioda_eeh_get_phb_pe(hose, pe)) |
866 | break; | 869 | break; |
867 | 870 | ||
868 | WARN(1, "EEH: dead PHB#%x detected\n", | 871 | pr_err("EEH: dead PHB#%x detected\n", |
869 | hose->global_number); | 872 | hose->global_number); |
870 | phb->eeh_state |= PNV_EEH_STATE_REMOVED; | 873 | phb->eeh_state |= PNV_EEH_STATE_REMOVED; |
871 | ret = 3; | 874 | ret = 3; |
872 | goto out; | 875 | goto out; |
@@ -874,20 +877,24 @@ static int ioda_eeh_next_error(struct eeh_pe **pe) | |||
874 | if (ioda_eeh_get_phb_pe(hose, pe)) | 877 | if (ioda_eeh_get_phb_pe(hose, pe)) |
875 | break; | 878 | break; |
876 | 879 | ||
877 | WARN(1, "EEH: fenced PHB#%x detected\n", | 880 | pr_err("EEH: fenced PHB#%x detected\n", |
878 | hose->global_number); | 881 | hose->global_number); |
879 | ret = 2; | 882 | ret = 2; |
880 | goto out; | 883 | goto out; |
881 | } else if (severity == OPAL_EEH_SEV_INF) | 884 | } else if (severity == OPAL_EEH_SEV_INF) { |
885 | pr_info("EEH: PHB#%x informative error " | ||
886 | "detected\n", | ||
887 | hose->global_number); | ||
882 | ioda_eeh_phb_diag(hose); | 888 | ioda_eeh_phb_diag(hose); |
889 | } | ||
883 | 890 | ||
884 | break; | 891 | break; |
885 | case OPAL_EEH_PE_ERROR: | 892 | case OPAL_EEH_PE_ERROR: |
886 | if (ioda_eeh_get_pe(hose, frozen_pe_no, pe)) | 893 | if (ioda_eeh_get_pe(hose, frozen_pe_no, pe)) |
887 | break; | 894 | break; |
888 | 895 | ||
889 | WARN(1, "EEH: Frozen PE#%x on PHB#%x detected\n", | 896 | pr_err("EEH: Frozen PE#%x on PHB#%x detected\n", |
890 | (*pe)->addr, (*pe)->phb->global_number); | 897 | (*pe)->addr, (*pe)->phb->global_number); |
891 | ret = 1; | 898 | ret = 1; |
892 | goto out; | 899 | goto out; |
893 | } | 900 | } |