aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/misc
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/misc')
-rw-r--r--drivers/misc/genwqe/Kconfig6
-rw-r--r--drivers/misc/genwqe/card_base.c79
-rw-r--r--drivers/misc/genwqe/card_base.h2
-rw-r--r--drivers/misc/genwqe/card_ddcb.c24
-rw-r--r--drivers/misc/genwqe/card_debugfs.c7
-rw-r--r--drivers/misc/genwqe/card_dev.c5
-rw-r--r--drivers/misc/genwqe/card_utils.c10
7 files changed, 115 insertions, 18 deletions
diff --git a/drivers/misc/genwqe/Kconfig b/drivers/misc/genwqe/Kconfig
index 6069d8cd79d7..4c0a033cbfdb 100644
--- a/drivers/misc/genwqe/Kconfig
+++ b/drivers/misc/genwqe/Kconfig
@@ -11,3 +11,9 @@ menuconfig GENWQE
11 Enables PCIe card driver for IBM GenWQE accelerators. 11 Enables PCIe card driver for IBM GenWQE accelerators.
12 The user-space interface is described in 12 The user-space interface is described in
13 include/linux/genwqe/genwqe_card.h. 13 include/linux/genwqe/genwqe_card.h.
14
15config GENWQE_PLATFORM_ERROR_RECOVERY
16 int "Use platform recovery procedures (0=off, 1=on)"
17 depends on GENWQE
18 default 1 if PPC64
19 default 0
diff --git a/drivers/misc/genwqe/card_base.c b/drivers/misc/genwqe/card_base.c
index e6cc3e1e7326..87ebaba9b133 100644
--- a/drivers/misc/genwqe/card_base.c
+++ b/drivers/misc/genwqe/card_base.c
@@ -140,6 +140,12 @@ static struct genwqe_dev *genwqe_dev_alloc(void)
140 cd->class_genwqe = class_genwqe; 140 cd->class_genwqe = class_genwqe;
141 cd->debugfs_genwqe = debugfs_genwqe; 141 cd->debugfs_genwqe = debugfs_genwqe;
142 142
143 /*
144 * This comes from kernel config option and can be overritten via
145 * debugfs.
146 */
147 cd->use_platform_recovery = CONFIG_GENWQE_PLATFORM_ERROR_RECOVERY;
148
143 init_waitqueue_head(&cd->queue_waitq); 149 init_waitqueue_head(&cd->queue_waitq);
144 150
145 spin_lock_init(&cd->file_lock); 151 spin_lock_init(&cd->file_lock);
@@ -943,6 +949,19 @@ static int genwqe_health_thread(void *data)
943 return 0; 949 return 0;
944 950
945 fatal_error: 951 fatal_error:
952 if (cd->use_platform_recovery) {
953 /*
954 * Since we use raw accessors, EEH errors won't be detected
955 * by the platform until we do a non-raw MMIO or config space
956 * read
957 */
958 readq(cd->mmio + IO_SLC_CFGREG_GFIR);
959
960 /* We do nothing if the card is going over PCI recovery */
961 if (pci_channel_offline(pci_dev))
962 return -EIO;
963 }
964
946 dev_err(&pci_dev->dev, 965 dev_err(&pci_dev->dev,
947 "[%s] card unusable. Please trigger unbind!\n", __func__); 966 "[%s] card unusable. Please trigger unbind!\n", __func__);
948 967
@@ -1048,6 +1067,9 @@ static int genwqe_pci_setup(struct genwqe_dev *cd)
1048 pci_set_master(pci_dev); 1067 pci_set_master(pci_dev);
1049 pci_enable_pcie_error_reporting(pci_dev); 1068 pci_enable_pcie_error_reporting(pci_dev);
1050 1069
1070 /* EEH recovery requires PCIe fundamental reset */
1071 pci_dev->needs_freset = 1;
1072
1051 /* request complete BAR-0 space (length = 0) */ 1073 /* request complete BAR-0 space (length = 0) */
1052 cd->mmio_len = pci_resource_len(pci_dev, 0); 1074 cd->mmio_len = pci_resource_len(pci_dev, 0);
1053 cd->mmio = pci_iomap(pci_dev, 0, 0); 1075 cd->mmio = pci_iomap(pci_dev, 0, 0);
@@ -1186,23 +1208,40 @@ static pci_ers_result_t genwqe_err_error_detected(struct pci_dev *pci_dev,
1186 1208
1187 dev_err(&pci_dev->dev, "[%s] state=%d\n", __func__, state); 1209 dev_err(&pci_dev->dev, "[%s] state=%d\n", __func__, state);
1188 1210
1189 if (pci_dev == NULL)
1190 return PCI_ERS_RESULT_NEED_RESET;
1191
1192 cd = dev_get_drvdata(&pci_dev->dev); 1211 cd = dev_get_drvdata(&pci_dev->dev);
1193 if (cd == NULL) 1212 if (cd == NULL)
1194 return PCI_ERS_RESULT_NEED_RESET; 1213 return PCI_ERS_RESULT_DISCONNECT;
1195 1214
1196 switch (state) { 1215 /* Stop the card */
1197 case pci_channel_io_normal: 1216 genwqe_health_check_stop(cd);
1198 return PCI_ERS_RESULT_CAN_RECOVER; 1217 genwqe_stop(cd);
1199 case pci_channel_io_frozen: 1218
1200 return PCI_ERS_RESULT_NEED_RESET; 1219 /*
1201 case pci_channel_io_perm_failure: 1220 * On permanent failure, the PCI code will call device remove
1221 * after the return of this function.
1222 * genwqe_stop() can be called twice.
1223 */
1224 if (state == pci_channel_io_perm_failure) {
1202 return PCI_ERS_RESULT_DISCONNECT; 1225 return PCI_ERS_RESULT_DISCONNECT;
1226 } else {
1227 genwqe_pci_remove(cd);
1228 return PCI_ERS_RESULT_NEED_RESET;
1203 } 1229 }
1230}
1231
1232static pci_ers_result_t genwqe_err_slot_reset(struct pci_dev *pci_dev)
1233{
1234 int rc;
1235 struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev);
1204 1236
1205 return PCI_ERS_RESULT_NEED_RESET; 1237 rc = genwqe_pci_setup(cd);
1238 if (!rc) {
1239 return PCI_ERS_RESULT_RECOVERED;
1240 } else {
1241 dev_err(&pci_dev->dev,
1242 "err: problems with PCI setup (err=%d)\n", rc);
1243 return PCI_ERS_RESULT_DISCONNECT;
1244 }
1206} 1245}
1207 1246
1208static pci_ers_result_t genwqe_err_result_none(struct pci_dev *dev) 1247static pci_ers_result_t genwqe_err_result_none(struct pci_dev *dev)
@@ -1210,8 +1249,22 @@ static pci_ers_result_t genwqe_err_result_none(struct pci_dev *dev)
1210 return PCI_ERS_RESULT_NONE; 1249 return PCI_ERS_RESULT_NONE;
1211} 1250}
1212 1251
1213static void genwqe_err_resume(struct pci_dev *dev) 1252static void genwqe_err_resume(struct pci_dev *pci_dev)
1214{ 1253{
1254 int rc;
1255 struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev);
1256
1257 rc = genwqe_start(cd);
1258 if (!rc) {
1259 rc = genwqe_health_check_start(cd);
1260 if (rc)
1261 dev_err(&pci_dev->dev,
1262 "err: cannot start health checking! (err=%d)\n",
1263 rc);
1264 } else {
1265 dev_err(&pci_dev->dev,
1266 "err: cannot start card services! (err=%d)\n", rc);
1267 }
1215} 1268}
1216 1269
1217static int genwqe_sriov_configure(struct pci_dev *dev, int numvfs) 1270static int genwqe_sriov_configure(struct pci_dev *dev, int numvfs)
@@ -1234,7 +1287,7 @@ static struct pci_error_handlers genwqe_err_handler = {
1234 .error_detected = genwqe_err_error_detected, 1287 .error_detected = genwqe_err_error_detected,
1235 .mmio_enabled = genwqe_err_result_none, 1288 .mmio_enabled = genwqe_err_result_none,
1236 .link_reset = genwqe_err_result_none, 1289 .link_reset = genwqe_err_result_none,
1237 .slot_reset = genwqe_err_result_none, 1290 .slot_reset = genwqe_err_slot_reset,
1238 .resume = genwqe_err_resume, 1291 .resume = genwqe_err_resume,
1239}; 1292};
1240 1293
diff --git a/drivers/misc/genwqe/card_base.h b/drivers/misc/genwqe/card_base.h
index 0e608a288603..67abd8cb2247 100644
--- a/drivers/misc/genwqe/card_base.h
+++ b/drivers/misc/genwqe/card_base.h
@@ -291,6 +291,8 @@ struct genwqe_dev {
291 struct task_struct *health_thread; 291 struct task_struct *health_thread;
292 wait_queue_head_t health_waitq; 292 wait_queue_head_t health_waitq;
293 293
294 int use_platform_recovery; /* use platform recovery mechanisms */
295
294 /* char device */ 296 /* char device */
295 dev_t devnum_genwqe; /* major/minor num card */ 297 dev_t devnum_genwqe; /* major/minor num card */
296 struct class *class_genwqe; /* reference to class object */ 298 struct class *class_genwqe; /* reference to class object */
diff --git a/drivers/misc/genwqe/card_ddcb.c b/drivers/misc/genwqe/card_ddcb.c
index c8046db2d5a2..f0de6153bea2 100644
--- a/drivers/misc/genwqe/card_ddcb.c
+++ b/drivers/misc/genwqe/card_ddcb.c
@@ -1118,7 +1118,21 @@ static irqreturn_t genwqe_pf_isr(int irq, void *dev_id)
1118 * safer, but slower for the good-case ... See above. 1118 * safer, but slower for the good-case ... See above.
1119 */ 1119 */
1120 gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); 1120 gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR);
1121 if ((gfir & GFIR_ERR_TRIGGER) != 0x0) { 1121 if (((gfir & GFIR_ERR_TRIGGER) != 0x0) &&
1122 !pci_channel_offline(pci_dev)) {
1123
1124 if (cd->use_platform_recovery) {
1125 /*
1126 * Since we use raw accessors, EEH errors won't be
1127 * detected by the platform until we do a non-raw
1128 * MMIO or config space read
1129 */
1130 readq(cd->mmio + IO_SLC_CFGREG_GFIR);
1131
1132 /* Don't do anything if the PCI channel is frozen */
1133 if (pci_channel_offline(pci_dev))
1134 goto exit;
1135 }
1122 1136
1123 wake_up_interruptible(&cd->health_waitq); 1137 wake_up_interruptible(&cd->health_waitq);
1124 1138
@@ -1126,12 +1140,12 @@ static irqreturn_t genwqe_pf_isr(int irq, void *dev_id)
1126 * By default GFIRs causes recovery actions. This 1140 * By default GFIRs causes recovery actions. This
1127 * count is just for debug when recovery is masked. 1141 * count is just for debug when recovery is masked.
1128 */ 1142 */
1129 printk_ratelimited(KERN_ERR 1143 dev_err_ratelimited(&pci_dev->dev,
1130 "%s %s: [%s] GFIR=%016llx\n", 1144 "[%s] GFIR=%016llx\n",
1131 GENWQE_DEVNAME, dev_name(&pci_dev->dev), 1145 __func__, gfir);
1132 __func__, gfir);
1133 } 1146 }
1134 1147
1148 exit:
1135 return IRQ_HANDLED; 1149 return IRQ_HANDLED;
1136} 1150}
1137 1151
diff --git a/drivers/misc/genwqe/card_debugfs.c b/drivers/misc/genwqe/card_debugfs.c
index 0a33ade64109..c9b4d6d0eb99 100644
--- a/drivers/misc/genwqe/card_debugfs.c
+++ b/drivers/misc/genwqe/card_debugfs.c
@@ -485,6 +485,13 @@ int genwqe_init_debugfs(struct genwqe_dev *cd)
485 goto err1; 485 goto err1;
486 } 486 }
487 487
488 file = debugfs_create_u32("use_platform_recovery", 0666, root,
489 &cd->use_platform_recovery);
490 if (!file) {
491 ret = -ENOMEM;
492 goto err1;
493 }
494
488 cd->debugfs_root = root; 495 cd->debugfs_root = root;
489 return 0; 496 return 0;
490err1: 497err1:
diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c
index 1d2f163a1906..aae42555e2ca 100644
--- a/drivers/misc/genwqe/card_dev.c
+++ b/drivers/misc/genwqe/card_dev.c
@@ -1048,10 +1048,15 @@ static long genwqe_ioctl(struct file *filp, unsigned int cmd,
1048 int rc = 0; 1048 int rc = 0;
1049 struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data; 1049 struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data;
1050 struct genwqe_dev *cd = cfile->cd; 1050 struct genwqe_dev *cd = cfile->cd;
1051 struct pci_dev *pci_dev = cd->pci_dev;
1051 struct genwqe_reg_io __user *io; 1052 struct genwqe_reg_io __user *io;
1052 u64 val; 1053 u64 val;
1053 u32 reg_offs; 1054 u32 reg_offs;
1054 1055
1056 /* Return -EIO if card hit EEH */
1057 if (pci_channel_offline(pci_dev))
1058 return -EIO;
1059
1055 if (_IOC_TYPE(cmd) != GENWQE_IOC_CODE) 1060 if (_IOC_TYPE(cmd) != GENWQE_IOC_CODE)
1056 return -EINVAL; 1061 return -EINVAL;
1057 1062
diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c
index 62cc6bb3f62e..4a500582eef0 100644
--- a/drivers/misc/genwqe/card_utils.c
+++ b/drivers/misc/genwqe/card_utils.c
@@ -53,12 +53,17 @@
53 */ 53 */
54int __genwqe_writeq(struct genwqe_dev *cd, u64 byte_offs, u64 val) 54int __genwqe_writeq(struct genwqe_dev *cd, u64 byte_offs, u64 val)
55{ 55{
56 struct pci_dev *pci_dev = cd->pci_dev;
57
56 if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) 58 if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
57 return -EIO; 59 return -EIO;
58 60
59 if (cd->mmio == NULL) 61 if (cd->mmio == NULL)
60 return -EIO; 62 return -EIO;
61 63
64 if (pci_channel_offline(pci_dev))
65 return -EIO;
66
62 __raw_writeq((__force u64)cpu_to_be64(val), cd->mmio + byte_offs); 67 __raw_writeq((__force u64)cpu_to_be64(val), cd->mmio + byte_offs);
63 return 0; 68 return 0;
64} 69}
@@ -99,12 +104,17 @@ u64 __genwqe_readq(struct genwqe_dev *cd, u64 byte_offs)
99 */ 104 */
100int __genwqe_writel(struct genwqe_dev *cd, u64 byte_offs, u32 val) 105int __genwqe_writel(struct genwqe_dev *cd, u64 byte_offs, u32 val)
101{ 106{
107 struct pci_dev *pci_dev = cd->pci_dev;
108
102 if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) 109 if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE)
103 return -EIO; 110 return -EIO;
104 111
105 if (cd->mmio == NULL) 112 if (cd->mmio == NULL)
106 return -EIO; 113 return -EIO;
107 114
115 if (pci_channel_offline(pci_dev))
116 return -EIO;
117
108 __raw_writel((__force u32)cpu_to_be32(val), cd->mmio + byte_offs); 118 __raw_writel((__force u32)cpu_to_be32(val), cd->mmio + byte_offs);
109 return 0; 119 return 0;
110} 120}