aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/misc
diff options
context:
space:
mode:
authorKleber Sacilotto de Souza <klebers@linux.vnet.ibm.com>2014-06-04 09:57:52 -0400
committerGreg Kroah-Hartman <gregkh@linuxfoundation.org>2014-07-09 17:14:27 -0400
commit93b772b25fa905c9158ee73c11c87b48668eabd0 (patch)
tree12816bd16086ade26b3a44794353fa31f966f3d6 /drivers/misc
parentfb145456fa4f4311f90703aeee058bab3b274bf8 (diff)
GenWQE: Improve hardware error recovery
Currently, in the event of a fatal hardware error, the driver tries a recovery procedure that calls pci_reset_function() to reset the card. This is not sufficient in some cases, needing a fundamental reset to bring the card back. This patch implements a call to the platform fundamental reset procedure on the error recovery path if GENWQE_PLATFORM_ERROR_RECOVERY is enabled. This is implemented by default only on PPC64, since this can cause problems on other archs, e.g. zSeries, where the platform has its own recovery procedures, leading to a potencial race conditition. For these cases, the recovery is kept as it was before. Signed-off-by: Kleber Sacilotto de Souza <klebers@linux.vnet.ibm.com> Acked-by: Frank Haverkamp <haver@linux.vnet.ibm.com> Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Diffstat (limited to 'drivers/misc')
-rw-r--r--drivers/misc/genwqe/card_base.c45
1 files changed, 45 insertions, 0 deletions
diff --git a/drivers/misc/genwqe/card_base.c b/drivers/misc/genwqe/card_base.c
index 87ebaba9b133..abb796187ceb 100644
--- a/drivers/misc/genwqe/card_base.c
+++ b/drivers/misc/genwqe/card_base.c
@@ -797,6 +797,41 @@ static int genwqe_pci_fundamental_reset(struct pci_dev *pci_dev)
797 return rc; 797 return rc;
798} 798}
799 799
800
801static int genwqe_platform_recovery(struct genwqe_dev *cd)
802{
803 struct pci_dev *pci_dev = cd->pci_dev;
804 int rc;
805
806 dev_info(&pci_dev->dev,
807 "[%s] resetting card for error recovery\n", __func__);
808
809 /* Clear out error injection flags */
810 cd->err_inject &= ~(GENWQE_INJECT_HARDWARE_FAILURE |
811 GENWQE_INJECT_GFIR_FATAL |
812 GENWQE_INJECT_GFIR_INFO);
813
814 genwqe_stop(cd);
815
816 /* Try recoverying the card with fundamental reset */
817 rc = genwqe_pci_fundamental_reset(pci_dev);
818 if (!rc) {
819 rc = genwqe_start(cd);
820 if (!rc)
821 dev_info(&pci_dev->dev,
822 "[%s] card recovered\n", __func__);
823 else
824 dev_err(&pci_dev->dev,
825 "[%s] err: cannot start card services! (err=%d)\n",
826 __func__, rc);
827 } else {
828 dev_err(&pci_dev->dev,
829 "[%s] card reset failed\n", __func__);
830 }
831
832 return rc;
833}
834
800/* 835/*
801 * genwqe_reload_bistream() - reload card bitstream 836 * genwqe_reload_bistream() - reload card bitstream
802 * 837 *
@@ -875,6 +910,7 @@ static int genwqe_health_thread(void *data)
875 struct pci_dev *pci_dev = cd->pci_dev; 910 struct pci_dev *pci_dev = cd->pci_dev;
876 u64 gfir, gfir_masked, slu_unitcfg, app_unitcfg; 911 u64 gfir, gfir_masked, slu_unitcfg, app_unitcfg;
877 912
913 health_thread_begin:
878 while (!kthread_should_stop()) { 914 while (!kthread_should_stop()) {
879 rc = wait_event_interruptible_timeout(cd->health_waitq, 915 rc = wait_event_interruptible_timeout(cd->health_waitq,
880 (genwqe_health_check_cond(cd, &gfir) || 916 (genwqe_health_check_cond(cd, &gfir) ||
@@ -960,6 +996,15 @@ static int genwqe_health_thread(void *data)
960 /* We do nothing if the card is going over PCI recovery */ 996 /* We do nothing if the card is going over PCI recovery */
961 if (pci_channel_offline(pci_dev)) 997 if (pci_channel_offline(pci_dev))
962 return -EIO; 998 return -EIO;
999
1000 /*
1001 * If it's supported by the platform, we try a fundamental reset
1002 * to recover from a fatal error. Otherwise, we continue to wait
1003 * for an external recovery procedure to take care of it.
1004 */
1005 rc = genwqe_platform_recovery(cd);
1006 if (!rc)
1007 goto health_thread_begin;
963 } 1008 }
964 1009
965 dev_err(&pci_dev->dev, 1010 dev_err(&pci_dev->dev,