aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel/eeh_pe.c
diff options
context:
space:
mode:
authorGavin Shan <gwshan@linux.vnet.ibm.com>2014-09-29 22:38:59 -0400
committerMichael Ellerman <mpe@ellerman.id.au>2014-09-30 03:15:12 -0400
commit22fca17924094113fe79c1db5135290e1a84ad4b (patch)
tree493a7594644c7386695f78fe1edfa258cd2a04f8 /arch/powerpc/kernel/eeh_pe.c
parentd9df1b5da17cd328301def1d2ae2c2df35f3823c (diff)
powerpc/eeh: Clear frozen device state in time
The problem was reported by Carol: In the scenario of passing mlx4 adapter to guest, EEH error could be recovered successfully. When returning the device back to host, the driver (mlx4_core.ko) couldn't be loaded successfully because of error number -5 (-EIO) returned from mlx4_get_ownership(), which hits offlined PCI device. The root cause is that we missed to put the affected devices into normal state on clearing PE isolated state right after PE reset. The patch fixes above issue by putting the affected devices to normal state when clearing PE isolated state in eeh_pe_state_clear(). Cc: stable@vger.kernel.org Reported-by: Carol L. Soto <clsoto@us.ibm.com> Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com> Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Diffstat (limited to 'arch/powerpc/kernel/eeh_pe.c')
-rw-r--r--arch/powerpc/kernel/eeh_pe.c21
1 files changed, 18 insertions, 3 deletions
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index 5864017e2bd9..53dd0915e690 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -584,6 +584,8 @@ static void *__eeh_pe_state_clear(void *data, void *flag)
584{ 584{
585 struct eeh_pe *pe = (struct eeh_pe *)data; 585 struct eeh_pe *pe = (struct eeh_pe *)data;
586 int state = *((int *)flag); 586 int state = *((int *)flag);
587 struct eeh_dev *edev, *tmp;
588 struct pci_dev *pdev;
587 589
588 /* Keep the state of permanently removed PE intact */ 590 /* Keep the state of permanently removed PE intact */
589 if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) && 591 if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) &&
@@ -592,9 +594,22 @@ static void *__eeh_pe_state_clear(void *data, void *flag)
592 594
593 pe->state &= ~state; 595 pe->state &= ~state;
594 596
595 /* Clear check count since last isolation */ 597 /*
596 if (state & EEH_PE_ISOLATED) 598 * Special treatment on clearing isolated state. Clear
597 pe->check_count = 0; 599 * check count since last isolation and put all affected
600 * devices to normal state.
601 */
602 if (!(state & EEH_PE_ISOLATED))
603 return NULL;
604
605 pe->check_count = 0;
606 eeh_pe_for_each_dev(pe, edev, tmp) {
607 pdev = eeh_dev_to_pci_dev(edev);
608 if (!pdev)
609 continue;
610
611 pdev->error_state = pci_channel_io_normal;
612 }
598 613
599 return NULL; 614 return NULL;
600} 615}