diff options
Diffstat (limited to 'drivers/misc')
-rw-r--r-- | drivers/misc/genwqe/Kconfig | 6 | ||||
-rw-r--r-- | drivers/misc/genwqe/card_base.c | 79 | ||||
-rw-r--r-- | drivers/misc/genwqe/card_base.h | 2 | ||||
-rw-r--r-- | drivers/misc/genwqe/card_ddcb.c | 24 | ||||
-rw-r--r-- | drivers/misc/genwqe/card_debugfs.c | 7 | ||||
-rw-r--r-- | drivers/misc/genwqe/card_dev.c | 5 | ||||
-rw-r--r-- | drivers/misc/genwqe/card_utils.c | 10 |
7 files changed, 115 insertions, 18 deletions
diff --git a/drivers/misc/genwqe/Kconfig b/drivers/misc/genwqe/Kconfig index 6069d8cd79d7..4c0a033cbfdb 100644 --- a/drivers/misc/genwqe/Kconfig +++ b/drivers/misc/genwqe/Kconfig | |||
@@ -11,3 +11,9 @@ menuconfig GENWQE | |||
11 | Enables PCIe card driver for IBM GenWQE accelerators. | 11 | Enables PCIe card driver for IBM GenWQE accelerators. |
12 | The user-space interface is described in | 12 | The user-space interface is described in |
13 | include/linux/genwqe/genwqe_card.h. | 13 | include/linux/genwqe/genwqe_card.h. |
14 | |||
15 | config GENWQE_PLATFORM_ERROR_RECOVERY | ||
16 | int "Use platform recovery procedures (0=off, 1=on)" | ||
17 | depends on GENWQE | ||
18 | default 1 if PPC64 | ||
19 | default 0 | ||
diff --git a/drivers/misc/genwqe/card_base.c b/drivers/misc/genwqe/card_base.c index e6cc3e1e7326..87ebaba9b133 100644 --- a/drivers/misc/genwqe/card_base.c +++ b/drivers/misc/genwqe/card_base.c | |||
@@ -140,6 +140,12 @@ static struct genwqe_dev *genwqe_dev_alloc(void) | |||
140 | cd->class_genwqe = class_genwqe; | 140 | cd->class_genwqe = class_genwqe; |
141 | cd->debugfs_genwqe = debugfs_genwqe; | 141 | cd->debugfs_genwqe = debugfs_genwqe; |
142 | 142 | ||
143 | /* | ||
144 | * This comes from kernel config option and can be overritten via | ||
145 | * debugfs. | ||
146 | */ | ||
147 | cd->use_platform_recovery = CONFIG_GENWQE_PLATFORM_ERROR_RECOVERY; | ||
148 | |||
143 | init_waitqueue_head(&cd->queue_waitq); | 149 | init_waitqueue_head(&cd->queue_waitq); |
144 | 150 | ||
145 | spin_lock_init(&cd->file_lock); | 151 | spin_lock_init(&cd->file_lock); |
@@ -943,6 +949,19 @@ static int genwqe_health_thread(void *data) | |||
943 | return 0; | 949 | return 0; |
944 | 950 | ||
945 | fatal_error: | 951 | fatal_error: |
952 | if (cd->use_platform_recovery) { | ||
953 | /* | ||
954 | * Since we use raw accessors, EEH errors won't be detected | ||
955 | * by the platform until we do a non-raw MMIO or config space | ||
956 | * read | ||
957 | */ | ||
958 | readq(cd->mmio + IO_SLC_CFGREG_GFIR); | ||
959 | |||
960 | /* We do nothing if the card is going over PCI recovery */ | ||
961 | if (pci_channel_offline(pci_dev)) | ||
962 | return -EIO; | ||
963 | } | ||
964 | |||
946 | dev_err(&pci_dev->dev, | 965 | dev_err(&pci_dev->dev, |
947 | "[%s] card unusable. Please trigger unbind!\n", __func__); | 966 | "[%s] card unusable. Please trigger unbind!\n", __func__); |
948 | 967 | ||
@@ -1048,6 +1067,9 @@ static int genwqe_pci_setup(struct genwqe_dev *cd) | |||
1048 | pci_set_master(pci_dev); | 1067 | pci_set_master(pci_dev); |
1049 | pci_enable_pcie_error_reporting(pci_dev); | 1068 | pci_enable_pcie_error_reporting(pci_dev); |
1050 | 1069 | ||
1070 | /* EEH recovery requires PCIe fundamental reset */ | ||
1071 | pci_dev->needs_freset = 1; | ||
1072 | |||
1051 | /* request complete BAR-0 space (length = 0) */ | 1073 | /* request complete BAR-0 space (length = 0) */ |
1052 | cd->mmio_len = pci_resource_len(pci_dev, 0); | 1074 | cd->mmio_len = pci_resource_len(pci_dev, 0); |
1053 | cd->mmio = pci_iomap(pci_dev, 0, 0); | 1075 | cd->mmio = pci_iomap(pci_dev, 0, 0); |
@@ -1186,23 +1208,40 @@ static pci_ers_result_t genwqe_err_error_detected(struct pci_dev *pci_dev, | |||
1186 | 1208 | ||
1187 | dev_err(&pci_dev->dev, "[%s] state=%d\n", __func__, state); | 1209 | dev_err(&pci_dev->dev, "[%s] state=%d\n", __func__, state); |
1188 | 1210 | ||
1189 | if (pci_dev == NULL) | ||
1190 | return PCI_ERS_RESULT_NEED_RESET; | ||
1191 | |||
1192 | cd = dev_get_drvdata(&pci_dev->dev); | 1211 | cd = dev_get_drvdata(&pci_dev->dev); |
1193 | if (cd == NULL) | 1212 | if (cd == NULL) |
1194 | return PCI_ERS_RESULT_NEED_RESET; | 1213 | return PCI_ERS_RESULT_DISCONNECT; |
1195 | 1214 | ||
1196 | switch (state) { | 1215 | /* Stop the card */ |
1197 | case pci_channel_io_normal: | 1216 | genwqe_health_check_stop(cd); |
1198 | return PCI_ERS_RESULT_CAN_RECOVER; | 1217 | genwqe_stop(cd); |
1199 | case pci_channel_io_frozen: | 1218 | |
1200 | return PCI_ERS_RESULT_NEED_RESET; | 1219 | /* |
1201 | case pci_channel_io_perm_failure: | 1220 | * On permanent failure, the PCI code will call device remove |
1221 | * after the return of this function. | ||
1222 | * genwqe_stop() can be called twice. | ||
1223 | */ | ||
1224 | if (state == pci_channel_io_perm_failure) { | ||
1202 | return PCI_ERS_RESULT_DISCONNECT; | 1225 | return PCI_ERS_RESULT_DISCONNECT; |
1226 | } else { | ||
1227 | genwqe_pci_remove(cd); | ||
1228 | return PCI_ERS_RESULT_NEED_RESET; | ||
1203 | } | 1229 | } |
1230 | } | ||
1231 | |||
1232 | static pci_ers_result_t genwqe_err_slot_reset(struct pci_dev *pci_dev) | ||
1233 | { | ||
1234 | int rc; | ||
1235 | struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev); | ||
1204 | 1236 | ||
1205 | return PCI_ERS_RESULT_NEED_RESET; | 1237 | rc = genwqe_pci_setup(cd); |
1238 | if (!rc) { | ||
1239 | return PCI_ERS_RESULT_RECOVERED; | ||
1240 | } else { | ||
1241 | dev_err(&pci_dev->dev, | ||
1242 | "err: problems with PCI setup (err=%d)\n", rc); | ||
1243 | return PCI_ERS_RESULT_DISCONNECT; | ||
1244 | } | ||
1206 | } | 1245 | } |
1207 | 1246 | ||
1208 | static pci_ers_result_t genwqe_err_result_none(struct pci_dev *dev) | 1247 | static pci_ers_result_t genwqe_err_result_none(struct pci_dev *dev) |
@@ -1210,8 +1249,22 @@ static pci_ers_result_t genwqe_err_result_none(struct pci_dev *dev) | |||
1210 | return PCI_ERS_RESULT_NONE; | 1249 | return PCI_ERS_RESULT_NONE; |
1211 | } | 1250 | } |
1212 | 1251 | ||
1213 | static void genwqe_err_resume(struct pci_dev *dev) | 1252 | static void genwqe_err_resume(struct pci_dev *pci_dev) |
1214 | { | 1253 | { |
1254 | int rc; | ||
1255 | struct genwqe_dev *cd = dev_get_drvdata(&pci_dev->dev); | ||
1256 | |||
1257 | rc = genwqe_start(cd); | ||
1258 | if (!rc) { | ||
1259 | rc = genwqe_health_check_start(cd); | ||
1260 | if (rc) | ||
1261 | dev_err(&pci_dev->dev, | ||
1262 | "err: cannot start health checking! (err=%d)\n", | ||
1263 | rc); | ||
1264 | } else { | ||
1265 | dev_err(&pci_dev->dev, | ||
1266 | "err: cannot start card services! (err=%d)\n", rc); | ||
1267 | } | ||
1215 | } | 1268 | } |
1216 | 1269 | ||
1217 | static int genwqe_sriov_configure(struct pci_dev *dev, int numvfs) | 1270 | static int genwqe_sriov_configure(struct pci_dev *dev, int numvfs) |
@@ -1234,7 +1287,7 @@ static struct pci_error_handlers genwqe_err_handler = { | |||
1234 | .error_detected = genwqe_err_error_detected, | 1287 | .error_detected = genwqe_err_error_detected, |
1235 | .mmio_enabled = genwqe_err_result_none, | 1288 | .mmio_enabled = genwqe_err_result_none, |
1236 | .link_reset = genwqe_err_result_none, | 1289 | .link_reset = genwqe_err_result_none, |
1237 | .slot_reset = genwqe_err_result_none, | 1290 | .slot_reset = genwqe_err_slot_reset, |
1238 | .resume = genwqe_err_resume, | 1291 | .resume = genwqe_err_resume, |
1239 | }; | 1292 | }; |
1240 | 1293 | ||
diff --git a/drivers/misc/genwqe/card_base.h b/drivers/misc/genwqe/card_base.h index 0e608a288603..67abd8cb2247 100644 --- a/drivers/misc/genwqe/card_base.h +++ b/drivers/misc/genwqe/card_base.h | |||
@@ -291,6 +291,8 @@ struct genwqe_dev { | |||
291 | struct task_struct *health_thread; | 291 | struct task_struct *health_thread; |
292 | wait_queue_head_t health_waitq; | 292 | wait_queue_head_t health_waitq; |
293 | 293 | ||
294 | int use_platform_recovery; /* use platform recovery mechanisms */ | ||
295 | |||
294 | /* char device */ | 296 | /* char device */ |
295 | dev_t devnum_genwqe; /* major/minor num card */ | 297 | dev_t devnum_genwqe; /* major/minor num card */ |
296 | struct class *class_genwqe; /* reference to class object */ | 298 | struct class *class_genwqe; /* reference to class object */ |
diff --git a/drivers/misc/genwqe/card_ddcb.c b/drivers/misc/genwqe/card_ddcb.c index c8046db2d5a2..f0de6153bea2 100644 --- a/drivers/misc/genwqe/card_ddcb.c +++ b/drivers/misc/genwqe/card_ddcb.c | |||
@@ -1118,7 +1118,21 @@ static irqreturn_t genwqe_pf_isr(int irq, void *dev_id) | |||
1118 | * safer, but slower for the good-case ... See above. | 1118 | * safer, but slower for the good-case ... See above. |
1119 | */ | 1119 | */ |
1120 | gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); | 1120 | gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); |
1121 | if ((gfir & GFIR_ERR_TRIGGER) != 0x0) { | 1121 | if (((gfir & GFIR_ERR_TRIGGER) != 0x0) && |
1122 | !pci_channel_offline(pci_dev)) { | ||
1123 | |||
1124 | if (cd->use_platform_recovery) { | ||
1125 | /* | ||
1126 | * Since we use raw accessors, EEH errors won't be | ||
1127 | * detected by the platform until we do a non-raw | ||
1128 | * MMIO or config space read | ||
1129 | */ | ||
1130 | readq(cd->mmio + IO_SLC_CFGREG_GFIR); | ||
1131 | |||
1132 | /* Don't do anything if the PCI channel is frozen */ | ||
1133 | if (pci_channel_offline(pci_dev)) | ||
1134 | goto exit; | ||
1135 | } | ||
1122 | 1136 | ||
1123 | wake_up_interruptible(&cd->health_waitq); | 1137 | wake_up_interruptible(&cd->health_waitq); |
1124 | 1138 | ||
@@ -1126,12 +1140,12 @@ static irqreturn_t genwqe_pf_isr(int irq, void *dev_id) | |||
1126 | * By default GFIRs causes recovery actions. This | 1140 | * By default GFIRs causes recovery actions. This |
1127 | * count is just for debug when recovery is masked. | 1141 | * count is just for debug when recovery is masked. |
1128 | */ | 1142 | */ |
1129 | printk_ratelimited(KERN_ERR | 1143 | dev_err_ratelimited(&pci_dev->dev, |
1130 | "%s %s: [%s] GFIR=%016llx\n", | 1144 | "[%s] GFIR=%016llx\n", |
1131 | GENWQE_DEVNAME, dev_name(&pci_dev->dev), | 1145 | __func__, gfir); |
1132 | __func__, gfir); | ||
1133 | } | 1146 | } |
1134 | 1147 | ||
1148 | exit: | ||
1135 | return IRQ_HANDLED; | 1149 | return IRQ_HANDLED; |
1136 | } | 1150 | } |
1137 | 1151 | ||
diff --git a/drivers/misc/genwqe/card_debugfs.c b/drivers/misc/genwqe/card_debugfs.c index 0a33ade64109..c9b4d6d0eb99 100644 --- a/drivers/misc/genwqe/card_debugfs.c +++ b/drivers/misc/genwqe/card_debugfs.c | |||
@@ -485,6 +485,13 @@ int genwqe_init_debugfs(struct genwqe_dev *cd) | |||
485 | goto err1; | 485 | goto err1; |
486 | } | 486 | } |
487 | 487 | ||
488 | file = debugfs_create_u32("use_platform_recovery", 0666, root, | ||
489 | &cd->use_platform_recovery); | ||
490 | if (!file) { | ||
491 | ret = -ENOMEM; | ||
492 | goto err1; | ||
493 | } | ||
494 | |||
488 | cd->debugfs_root = root; | 495 | cd->debugfs_root = root; |
489 | return 0; | 496 | return 0; |
490 | err1: | 497 | err1: |
diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c index 1d2f163a1906..aae42555e2ca 100644 --- a/drivers/misc/genwqe/card_dev.c +++ b/drivers/misc/genwqe/card_dev.c | |||
@@ -1048,10 +1048,15 @@ static long genwqe_ioctl(struct file *filp, unsigned int cmd, | |||
1048 | int rc = 0; | 1048 | int rc = 0; |
1049 | struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data; | 1049 | struct genwqe_file *cfile = (struct genwqe_file *)filp->private_data; |
1050 | struct genwqe_dev *cd = cfile->cd; | 1050 | struct genwqe_dev *cd = cfile->cd; |
1051 | struct pci_dev *pci_dev = cd->pci_dev; | ||
1051 | struct genwqe_reg_io __user *io; | 1052 | struct genwqe_reg_io __user *io; |
1052 | u64 val; | 1053 | u64 val; |
1053 | u32 reg_offs; | 1054 | u32 reg_offs; |
1054 | 1055 | ||
1056 | /* Return -EIO if card hit EEH */ | ||
1057 | if (pci_channel_offline(pci_dev)) | ||
1058 | return -EIO; | ||
1059 | |||
1055 | if (_IOC_TYPE(cmd) != GENWQE_IOC_CODE) | 1060 | if (_IOC_TYPE(cmd) != GENWQE_IOC_CODE) |
1056 | return -EINVAL; | 1061 | return -EINVAL; |
1057 | 1062 | ||
diff --git a/drivers/misc/genwqe/card_utils.c b/drivers/misc/genwqe/card_utils.c index 62cc6bb3f62e..4a500582eef0 100644 --- a/drivers/misc/genwqe/card_utils.c +++ b/drivers/misc/genwqe/card_utils.c | |||
@@ -53,12 +53,17 @@ | |||
53 | */ | 53 | */ |
54 | int __genwqe_writeq(struct genwqe_dev *cd, u64 byte_offs, u64 val) | 54 | int __genwqe_writeq(struct genwqe_dev *cd, u64 byte_offs, u64 val) |
55 | { | 55 | { |
56 | struct pci_dev *pci_dev = cd->pci_dev; | ||
57 | |||
56 | if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) | 58 | if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) |
57 | return -EIO; | 59 | return -EIO; |
58 | 60 | ||
59 | if (cd->mmio == NULL) | 61 | if (cd->mmio == NULL) |
60 | return -EIO; | 62 | return -EIO; |
61 | 63 | ||
64 | if (pci_channel_offline(pci_dev)) | ||
65 | return -EIO; | ||
66 | |||
62 | __raw_writeq((__force u64)cpu_to_be64(val), cd->mmio + byte_offs); | 67 | __raw_writeq((__force u64)cpu_to_be64(val), cd->mmio + byte_offs); |
63 | return 0; | 68 | return 0; |
64 | } | 69 | } |
@@ -99,12 +104,17 @@ u64 __genwqe_readq(struct genwqe_dev *cd, u64 byte_offs) | |||
99 | */ | 104 | */ |
100 | int __genwqe_writel(struct genwqe_dev *cd, u64 byte_offs, u32 val) | 105 | int __genwqe_writel(struct genwqe_dev *cd, u64 byte_offs, u32 val) |
101 | { | 106 | { |
107 | struct pci_dev *pci_dev = cd->pci_dev; | ||
108 | |||
102 | if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) | 109 | if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) |
103 | return -EIO; | 110 | return -EIO; |
104 | 111 | ||
105 | if (cd->mmio == NULL) | 112 | if (cd->mmio == NULL) |
106 | return -EIO; | 113 | return -EIO; |
107 | 114 | ||
115 | if (pci_channel_offline(pci_dev)) | ||
116 | return -EIO; | ||
117 | |||
108 | __raw_writel((__force u32)cpu_to_be32(val), cd->mmio + byte_offs); | 118 | __raw_writel((__force u32)cpu_to_be32(val), cd->mmio + byte_offs); |
109 | return 0; | 119 | return 0; |
110 | } | 120 | } |