aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
authorGavin Shan <gwshan@linux.vnet.ibm.com>2014-09-29 22:39:07 -0400
committerMichael Ellerman <mpe@ellerman.id.au>2014-09-30 03:15:18 -0400
commit5cfb20b96f624e9852c4f3f1c4397e81ca28d5aa (patch)
treeacf784a0da023e6ef83e60514e3ee3fa1fe30a53 /arch/powerpc/kernel
parent93e8b36d7bf5c54f1c52d8b78e34f88e52a3dfa2 (diff)
powerpc/eeh: Emulate EEH recovery for VFIO devices
When enabling EEH functionality on passed through devices (PE) with VFIO, the devices in the PE would be removed permanently from guest side. In that case, the PE remains frozen state. When returning PE to host, or restarting the guest again, we had mechanism unfreezing the PE by clearing PESTA/B frozen bits. However, that's not enough for some adapters, which are indicated as following "lspci" shows. Those adapters require hot reset on the parent bus to bring their firmware back to workable state. Otherwise, those adaptrs won't be operative and the host (for returning case) or the guest will fail to load the drivers for those adapters without exception. 0000:01:00.0 Ethernet controller: Emulex Corporation OneConnect \ 10Gb NIC (be3) (rev 02) 0000:01:00.0 0200: 19a2:0710 (rev 02) 0001:03:00.0 Ethernet controller: Emulex Corporation OneConnect \ NIC (Lancer) (rev 10) 0001:03:00.0 0200: 10df:e220 (rev 10) The patch adds mechanism to emulate EEH recovery (for hot reset on parent PCI bus) on 3 gates to fix the issue: open/release one adapter of the PE, enable EEH functionality on one adapter of the PE. Reported-by: Murilo Fossa Vicentini <muvic@br.ibm.com> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/eeh.c59
-rw-r--r--arch/powerpc/kernel/eeh_driver.c90
2 files changed, 143 insertions, 6 deletions
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index 693690827785..3350b8490dbc 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -1193,6 +1193,60 @@ int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state)
1193 return ret; 1193 return ret;
1194} 1194}
1195 1195
1196
1197static struct pci_device_id eeh_reset_ids[] = {
1198 { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE */
1199 { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */
1200 { 0 }
1201};
1202
1203static int eeh_pe_change_owner(struct eeh_pe *pe)
1204{
1205 struct eeh_dev *edev, *tmp;
1206 struct pci_dev *pdev;
1207 struct pci_device_id *id;
1208 int flags, ret;
1209
1210 /* Check PE state */
1211 flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
1212 ret = eeh_ops->get_state(pe, NULL);
1213 if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT)
1214 return 0;
1215
1216 /* Unfrozen PE, nothing to do */
1217 if ((ret & flags) == flags)
1218 return 0;
1219
1220 /* Frozen PE, check if it needs PE level reset */
1221 eeh_pe_for_each_dev(pe, edev, tmp) {
1222 pdev = eeh_dev_to_pci_dev(edev);
1223 if (!pdev)
1224 continue;
1225
1226 for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) {
1227 if (id->vendor != PCI_ANY_ID &&
1228 id->vendor != pdev->vendor)
1229 continue;
1230 if (id->device != PCI_ANY_ID &&
1231 id->device != pdev->device)
1232 continue;
1233 if (id->subvendor != PCI_ANY_ID &&
1234 id->subvendor != pdev->subsystem_vendor)
1235 continue;
1236 if (id->subdevice != PCI_ANY_ID &&
1237 id->subdevice != pdev->subsystem_device)
1238 continue;
1239
1240 goto reset;
1241 }
1242 }
1243
1244 return eeh_unfreeze_pe(pe, true);
1245
1246reset:
1247 return eeh_pe_reset_and_recover(pe);
1248}
1249
1196/** 1250/**
1197 * eeh_dev_open - Increase count of pass through devices for PE 1251 * eeh_dev_open - Increase count of pass through devices for PE
1198 * @pdev: PCI device 1252 * @pdev: PCI device
@@ -1224,7 +1278,7 @@ int eeh_dev_open(struct pci_dev *pdev)
1224 * in frozen PE won't work properly. Clear the frozen state 1278 * in frozen PE won't work properly. Clear the frozen state
1225 * in advance. 1279 * in advance.
1226 */ 1280 */
1227 ret = eeh_unfreeze_pe(edev->pe, true); 1281 ret = eeh_pe_change_owner(edev->pe);
1228 if (ret) 1282 if (ret)
1229 goto out; 1283 goto out;
1230 1284
@@ -1265,6 +1319,7 @@ void eeh_dev_release(struct pci_dev *pdev)
1265 /* Decrease PE's pass through count */ 1319 /* Decrease PE's pass through count */
1266 atomic_dec(&edev->pe->pass_dev_cnt); 1320 atomic_dec(&edev->pe->pass_dev_cnt);
1267 WARN_ON(atomic_read(&edev->pe->pass_dev_cnt) < 0); 1321 WARN_ON(atomic_read(&edev->pe->pass_dev_cnt) < 0);
1322 eeh_pe_change_owner(edev->pe);
1268out: 1323out:
1269 mutex_unlock(&eeh_dev_mutex); 1324 mutex_unlock(&eeh_dev_mutex);
1270} 1325}
@@ -1345,7 +1400,7 @@ int eeh_pe_set_option(struct eeh_pe *pe, int option)
1345 switch (option) { 1400 switch (option) {
1346 case EEH_OPT_ENABLE: 1401 case EEH_OPT_ENABLE:
1347 if (eeh_enabled()) { 1402 if (eeh_enabled()) {
1348 ret = eeh_unfreeze_pe(pe, true); 1403 ret = eeh_pe_change_owner(pe);
1349 break; 1404 break;
1350 } 1405 }
1351 ret = -EIO; 1406 ret = -EIO;
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 948e6f99089f..3fd514f8e4b2 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -180,6 +180,22 @@ static bool eeh_dev_removed(struct eeh_dev *edev)
180 return false; 180 return false;
181} 181}
182 182
183static void *eeh_dev_save_state(void *data, void *userdata)
184{
185 struct eeh_dev *edev = data;
186 struct pci_dev *pdev;
187
188 if (!edev)
189 return NULL;
190
191 pdev = eeh_dev_to_pci_dev(edev);
192 if (!pdev)
193 return NULL;
194
195 pci_save_state(pdev);
196 return NULL;
197}
198
183/** 199/**
184 * eeh_report_error - Report pci error to each device driver 200 * eeh_report_error - Report pci error to each device driver
185 * @data: eeh device 201 * @data: eeh device
@@ -303,6 +319,22 @@ static void *eeh_report_reset(void *data, void *userdata)
303 return NULL; 319 return NULL;
304} 320}
305 321
322static void *eeh_dev_restore_state(void *data, void *userdata)
323{
324 struct eeh_dev *edev = data;
325 struct pci_dev *pdev;
326
327 if (!edev)
328 return NULL;
329
330 pdev = eeh_dev_to_pci_dev(edev);
331 if (!pdev)
332 return NULL;
333
334 pci_restore_state(pdev);
335 return NULL;
336}
337
306/** 338/**
307 * eeh_report_resume - Tell device to resume normal operations 339 * eeh_report_resume - Tell device to resume normal operations
308 * @data: eeh device 340 * @data: eeh device
@@ -450,10 +482,11 @@ static void *eeh_pe_detach_dev(void *data, void *userdata)
450static void *__eeh_clear_pe_frozen_state(void *data, void *flag) 482static void *__eeh_clear_pe_frozen_state(void *data, void *flag)
451{ 483{
452 struct eeh_pe *pe = (struct eeh_pe *)data; 484 struct eeh_pe *pe = (struct eeh_pe *)data;
485 bool *clear_sw_state = flag;
453 int i, rc = 1; 486 int i, rc = 1;
454 487
455 for (i = 0; rc && i < 3; i++) 488 for (i = 0; rc && i < 3; i++)
456 rc = eeh_unfreeze_pe(pe, false); 489 rc = eeh_unfreeze_pe(pe, clear_sw_state);
457 490
458 /* Stop immediately on any errors */ 491 /* Stop immediately on any errors */
459 if (rc) { 492 if (rc) {
@@ -465,17 +498,66 @@ static void *__eeh_clear_pe_frozen_state(void *data, void *flag)
465 return NULL; 498 return NULL;
466} 499}
467 500
468static int eeh_clear_pe_frozen_state(struct eeh_pe *pe) 501static int eeh_clear_pe_frozen_state(struct eeh_pe *pe,
502 bool clear_sw_state)
469{ 503{
470 void *rc; 504 void *rc;
471 505
472 rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, NULL); 506 rc = eeh_pe_traverse(pe, __eeh_clear_pe_frozen_state, &clear_sw_state);
473 if (!rc) 507 if (!rc)
474 eeh_pe_state_clear(pe, EEH_PE_ISOLATED); 508 eeh_pe_state_clear(pe, EEH_PE_ISOLATED);
475 509
476 return rc ? -EIO : 0; 510 return rc ? -EIO : 0;
477} 511}
478 512
513int eeh_pe_reset_and_recover(struct eeh_pe *pe)
514{
515 int result, ret;
516
517 /* Bail if the PE is being recovered */
518 if (pe->state & EEH_PE_RECOVERING)
519 return 0;
520
521 /* Put the PE into recovery mode */
522 eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
523
524 /* Save states */
525 eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL);
526
527 /* Report error */
528 eeh_pe_dev_traverse(pe, eeh_report_error, &result);
529
530 /* Issue reset */
531 eeh_pe_state_mark(pe, EEH_PE_RESET);
532 ret = eeh_reset_pe(pe);
533 if (ret) {
534 eeh_pe_state_clear(pe, EEH_PE_RECOVERING | EEH_PE_RESET);
535 return ret;
536 }
537 eeh_pe_state_clear(pe, EEH_PE_RESET);
538
539 /* Unfreeze the PE */
540 ret = eeh_clear_pe_frozen_state(pe, true);
541 if (ret) {
542 eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
543 return ret;
544 }
545
546 /* Notify completion of reset */
547 eeh_pe_dev_traverse(pe, eeh_report_reset, &result);
548
549 /* Restore device state */
550 eeh_pe_dev_traverse(pe, eeh_dev_restore_state, NULL);
551
552 /* Resume */
553 eeh_pe_dev_traverse(pe, eeh_report_resume, NULL);
554
555 /* Clear recovery mode */
556 eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
557
558 return 0;
559}
560
479/** 561/**
480 * eeh_reset_device - Perform actual reset of a pci slot 562 * eeh_reset_device - Perform actual reset of a pci slot
481 * @pe: EEH PE 563 * @pe: EEH PE
@@ -534,7 +616,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus)
534 eeh_pe_state_clear(pe, EEH_PE_RESET); 616 eeh_pe_state_clear(pe, EEH_PE_RESET);
535 617
536 /* Clear frozen state */ 618 /* Clear frozen state */
537 rc = eeh_clear_pe_frozen_state(pe); 619 rc = eeh_clear_pe_frozen_state(pe, false);
538 if (rc) 620 if (rc)
539 return rc; 621 return rc;
540 622