aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGavin Shan <gwshan@linux.vnet.ibm.com>2014-04-24 04:00:19 -0400
committerBenjamin Herrenschmidt <benh@kernel.crashing.org>2014-04-28 03:34:32 -0400
commitd2b0f6f77ee525811b6efe864efa6a4eb82eea73 (patch)
tree84205706f9cc2e03425ba3a48edf2a1d527e3267
parent7f52a526f64c69c913f0027fbf43821ff0b3a7d7 (diff)
powerpc/eeh: No hotplug on permanently removed dev
The issue was detected in a bit complicated test case where we have multiple hierarchical PEs shown as following figure: +-----------------+ | PE#3 p2p#0 | | p2p#1 | +-----------------+ | +-----------------+ | PE#4 pdev#0 | | pdev#1 | +-----------------+ PE#4 (have 2 PCI devices) is the child of PE#3, which has 2 p2p bridges. We accidentally had less-known scenario: PE#4 was removed permanently from the system because of permanent failure (e.g. exceeding the max allowd failure times in last hour), then we detects EEH errors on PE#3 and tried to recover it. However, eeh_dev instances for pdev#0/1 were not detached from PE#4, which was still connected to PE#3. All of that was because of the fact that we rely on count-based pcibios_release_device(), which isn't reliable enough. When doing recovery for PE#3, we still apply hotplug on PE#4 and pdev#0/1, which are not valid any more. Eventually, we run into kernel crash. The patch fixes above issue from two aspects. For unplug, we simply skip those permanently removed PE, whose state is (EEH_PE_STATE_ISOLATED && !EEH_PE_STATE_RECOVERING) and its frozen count should be greater than EEH_MAX_ALLOWED_FREEZES. For plug, we marked all permanently removed EEH devices with EEH_DEV_REMOVED and return 0xFF's on read its PCI config so that PCI core will omit them. Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
-rw-r--r--arch/powerpc/include/asm/eeh.h1
-rw-r--r--arch/powerpc/include/asm/ppc-pci.h1
-rw-r--r--arch/powerpc/kernel/eeh_driver.c48
-rw-r--r--arch/powerpc/kernel/eeh_pe.c47
-rw-r--r--arch/powerpc/kernel/pci_of_scan.c9
-rw-r--r--arch/powerpc/platforms/powernv/pci.c13
6 files changed, 100 insertions, 19 deletions
diff --git a/arch/powerpc/include/asm/eeh.h b/arch/powerpc/include/asm/eeh.h
index f4a93218fbcb..2841ecac4c47 100644
--- a/arch/powerpc/include/asm/eeh.h
+++ b/arch/powerpc/include/asm/eeh.h
@@ -98,6 +98,7 @@ struct eeh_pe {
98 98
99#define EEH_DEV_NO_HANDLER (1 << 8) /* No error handler */ 99#define EEH_DEV_NO_HANDLER (1 << 8) /* No error handler */
100#define EEH_DEV_SYSFS (1 << 9) /* Sysfs created */ 100#define EEH_DEV_SYSFS (1 << 9) /* Sysfs created */
101#define EEH_DEV_REMOVED (1 << 10) /* Removed permanently */
101 102
102struct eeh_dev { 103struct eeh_dev {
103 int mode; /* EEH mode */ 104 int mode; /* EEH mode */
diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
index ed57fa7920c8..db1e2b8eff3c 100644
--- a/arch/powerpc/include/asm/ppc-pci.h
+++ b/arch/powerpc/include/asm/ppc-pci.h
@@ -58,6 +58,7 @@ int rtas_write_config(struct pci_dn *, int where, int size, u32 val);
58int rtas_read_config(struct pci_dn *, int where, int size, u32 *val); 58int rtas_read_config(struct pci_dn *, int where, int size, u32 *val);
59void eeh_pe_state_mark(struct eeh_pe *pe, int state); 59void eeh_pe_state_mark(struct eeh_pe *pe, int state);
60void eeh_pe_state_clear(struct eeh_pe *pe, int state); 60void eeh_pe_state_clear(struct eeh_pe *pe, int state);
61void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode);
61 62
62void eeh_sysfs_add_device(struct pci_dev *pdev); 63void eeh_sysfs_add_device(struct pci_dev *pdev);
63void eeh_sysfs_remove_device(struct pci_dev *pdev); 64void eeh_sysfs_remove_device(struct pci_dev *pdev);
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 1f1e2cc045a9..f99ba9b76322 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -171,6 +171,15 @@ static void eeh_enable_irq(struct pci_dev *dev)
171 } 171 }
172} 172}
173 173
174static bool eeh_dev_removed(struct eeh_dev *edev)
175{
176 /* EEH device removed ? */
177 if (!edev || (edev->mode & EEH_DEV_REMOVED))
178 return true;
179
180 return false;
181}
182
174/** 183/**
175 * eeh_report_error - Report pci error to each device driver 184 * eeh_report_error - Report pci error to each device driver
176 * @data: eeh device 185 * @data: eeh device
@@ -187,10 +196,8 @@ static void *eeh_report_error(void *data, void *userdata)
187 enum pci_ers_result rc, *res = userdata; 196 enum pci_ers_result rc, *res = userdata;
188 struct pci_driver *driver; 197 struct pci_driver *driver;
189 198
190 /* We might not have the associated PCI device, 199 if (!dev || eeh_dev_removed(edev))
191 * then we should continue for next one. 200 return NULL;
192 */
193 if (!dev) return NULL;
194 dev->error_state = pci_channel_io_frozen; 201 dev->error_state = pci_channel_io_frozen;
195 202
196 driver = eeh_pcid_get(dev); 203 driver = eeh_pcid_get(dev);
@@ -230,6 +237,9 @@ static void *eeh_report_mmio_enabled(void *data, void *userdata)
230 enum pci_ers_result rc, *res = userdata; 237 enum pci_ers_result rc, *res = userdata;
231 struct pci_driver *driver; 238 struct pci_driver *driver;
232 239
240 if (!dev || eeh_dev_removed(edev))
241 return NULL;
242
233 driver = eeh_pcid_get(dev); 243 driver = eeh_pcid_get(dev);
234 if (!driver) return NULL; 244 if (!driver) return NULL;
235 245
@@ -267,7 +277,8 @@ static void *eeh_report_reset(void *data, void *userdata)
267 enum pci_ers_result rc, *res = userdata; 277 enum pci_ers_result rc, *res = userdata;
268 struct pci_driver *driver; 278 struct pci_driver *driver;
269 279
270 if (!dev) return NULL; 280 if (!dev || eeh_dev_removed(edev))
281 return NULL;
271 dev->error_state = pci_channel_io_normal; 282 dev->error_state = pci_channel_io_normal;
272 283
273 driver = eeh_pcid_get(dev); 284 driver = eeh_pcid_get(dev);
@@ -307,7 +318,8 @@ static void *eeh_report_resume(void *data, void *userdata)
307 struct pci_dev *dev = eeh_dev_to_pci_dev(edev); 318 struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
308 struct pci_driver *driver; 319 struct pci_driver *driver;
309 320
310 if (!dev) return NULL; 321 if (!dev || eeh_dev_removed(edev))
322 return NULL;
311 dev->error_state = pci_channel_io_normal; 323 dev->error_state = pci_channel_io_normal;
312 324
313 driver = eeh_pcid_get(dev); 325 driver = eeh_pcid_get(dev);
@@ -343,7 +355,8 @@ static void *eeh_report_failure(void *data, void *userdata)
343 struct pci_dev *dev = eeh_dev_to_pci_dev(edev); 355 struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
344 struct pci_driver *driver; 356 struct pci_driver *driver;
345 357
346 if (!dev) return NULL; 358 if (!dev || eeh_dev_removed(edev))
359 return NULL;
347 dev->error_state = pci_channel_io_perm_failure; 360 dev->error_state = pci_channel_io_perm_failure;
348 361
349 driver = eeh_pcid_get(dev); 362 driver = eeh_pcid_get(dev);
@@ -380,6 +393,16 @@ static void *eeh_rmv_device(void *data, void *userdata)
380 if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE)) 393 if (!dev || (dev->hdr_type & PCI_HEADER_TYPE_BRIDGE))
381 return NULL; 394 return NULL;
382 395
396 /*
397 * We rely on count-based pcibios_release_device() to
398 * detach permanently offlined PEs. Unfortunately, that's
399 * not reliable enough. We might have the permanently
400 * offlined PEs attached, but we needn't take care of
401 * them and their child devices.
402 */
403 if (eeh_dev_removed(edev))
404 return NULL;
405
383 driver = eeh_pcid_get(dev); 406 driver = eeh_pcid_get(dev);
384 if (driver) { 407 if (driver) {
385 eeh_pcid_put(dev); 408 eeh_pcid_put(dev);
@@ -694,8 +717,17 @@ perm_error:
694 /* Notify all devices that they're about to go down. */ 717 /* Notify all devices that they're about to go down. */
695 eeh_pe_dev_traverse(pe, eeh_report_failure, NULL); 718 eeh_pe_dev_traverse(pe, eeh_report_failure, NULL);
696 719
697 /* Shut down the device drivers for good. */ 720 /* Mark the PE to be removed permanently */
721 pe->freeze_count = EEH_MAX_ALLOWED_FREEZES + 1;
722
723 /*
724 * Shut down the device drivers for good. We mark
725 * all removed devices correctly to avoid access
726 * the their PCI config any more.
727 */
698 if (frozen_bus) { 728 if (frozen_bus) {
729 eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
730
699 pci_lock_rescan_remove(); 731 pci_lock_rescan_remove();
700 pcibios_remove_pci_devices(frozen_bus); 732 pcibios_remove_pci_devices(frozen_bus);
701 pci_unlock_rescan_remove(); 733 pci_unlock_rescan_remove();
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index f0c353fa655a..995c2a284630 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -503,13 +503,17 @@ static void *__eeh_pe_state_mark(void *data, void *flag)
503 struct eeh_dev *edev, *tmp; 503 struct eeh_dev *edev, *tmp;
504 struct pci_dev *pdev; 504 struct pci_dev *pdev;
505 505
506 /* 506 /* Keep the state of permanently removed PE intact */
507 * Mark the PE with the indicated state. Also, 507 if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) &&
508 * the associated PCI device will be put into 508 (state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING)))
509 * I/O frozen state to avoid I/O accesses from 509 return NULL;
510 * the PCI device driver. 510
511 */
512 pe->state |= state; 511 pe->state |= state;
512
513 /* Offline PCI devices if applicable */
514 if (state != EEH_PE_ISOLATED)
515 return NULL;
516
513 eeh_pe_for_each_dev(pe, edev, tmp) { 517 eeh_pe_for_each_dev(pe, edev, tmp) {
514 pdev = eeh_dev_to_pci_dev(edev); 518 pdev = eeh_dev_to_pci_dev(edev);
515 if (pdev) 519 if (pdev)
@@ -532,6 +536,27 @@ void eeh_pe_state_mark(struct eeh_pe *pe, int state)
532 eeh_pe_traverse(pe, __eeh_pe_state_mark, &state); 536 eeh_pe_traverse(pe, __eeh_pe_state_mark, &state);
533} 537}
534 538
539static void *__eeh_pe_dev_mode_mark(void *data, void *flag)
540{
541 struct eeh_dev *edev = data;
542 int mode = *((int *)flag);
543
544 edev->mode |= mode;
545
546 return NULL;
547}
548
549/**
550 * eeh_pe_dev_state_mark - Mark state for all device under the PE
551 * @pe: EEH PE
552 *
553 * Mark specific state for all child devices of the PE.
554 */
555void eeh_pe_dev_mode_mark(struct eeh_pe *pe, int mode)
556{
557 eeh_pe_dev_traverse(pe, __eeh_pe_dev_mode_mark, &mode);
558}
559
535/** 560/**
536 * __eeh_pe_state_clear - Clear state for the PE 561 * __eeh_pe_state_clear - Clear state for the PE
537 * @data: EEH PE 562 * @data: EEH PE
@@ -546,8 +571,16 @@ static void *__eeh_pe_state_clear(void *data, void *flag)
546 struct eeh_pe *pe = (struct eeh_pe *)data; 571 struct eeh_pe *pe = (struct eeh_pe *)data;
547 int state = *((int *)flag); 572 int state = *((int *)flag);
548 573
574 /* Keep the state of permanently removed PE intact */
575 if ((pe->freeze_count > EEH_MAX_ALLOWED_FREEZES) &&
576 (state & EEH_PE_ISOLATED))
577 return NULL;
578
549 pe->state &= ~state; 579 pe->state &= ~state;
550 pe->check_count = 0; 580
581 /* Clear check count since last isolation */
582 if (state & EEH_PE_ISOLATED)
583 pe->check_count = 0;
551 584
552 return NULL; 585 return NULL;
553} 586}
diff --git a/arch/powerpc/kernel/pci_of_scan.c b/arch/powerpc/kernel/pci_of_scan.c
index 83c26d829991..ea6470c21f4e 100644
--- a/arch/powerpc/kernel/pci_of_scan.c
+++ b/arch/powerpc/kernel/pci_of_scan.c
@@ -304,6 +304,9 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,
304 struct pci_dev *dev = NULL; 304 struct pci_dev *dev = NULL;
305 const __be32 *reg; 305 const __be32 *reg;
306 int reglen, devfn; 306 int reglen, devfn;
307#ifdef CONFIG_EEH
308 struct eeh_dev *edev = of_node_to_eeh_dev(dn);
309#endif
307 310
308 pr_debug(" * %s\n", dn->full_name); 311 pr_debug(" * %s\n", dn->full_name);
309 if (!of_device_is_available(dn)) 312 if (!of_device_is_available(dn))
@@ -321,6 +324,12 @@ static struct pci_dev *of_scan_pci_dev(struct pci_bus *bus,
321 return dev; 324 return dev;
322 } 325 }
323 326
327 /* Device removed permanently ? */
328#ifdef CONFIG_EEH
329 if (edev && (edev->mode & EEH_DEV_REMOVED))
330 return NULL;
331#endif
332
324 /* create a new pci_dev for this device */ 333 /* create a new pci_dev for this device */
325 dev = of_create_pci_dev(dn, bus, devfn); 334 dev = of_create_pci_dev(dn, bus, devfn);
326 if (!dev) 335 if (!dev)
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index f98cf99c9f8c..eefbfcc3fd8c 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -441,11 +441,16 @@ static bool pnv_pci_cfg_check(struct pci_controller *hose,
441 if (!(phb->flags & PNV_PHB_FLAG_EEH)) 441 if (!(phb->flags & PNV_PHB_FLAG_EEH))
442 return true; 442 return true;
443 443
444 /* PE reset ? */ 444 /* PE reset or device removed ? */
445 edev = of_node_to_eeh_dev(dn); 445 edev = of_node_to_eeh_dev(dn);
446 if (edev && edev->pe && 446 if (edev) {
447 (edev->pe->state & EEH_PE_RESET)) 447 if (edev->pe &&
448 return false; 448 (edev->pe->state & EEH_PE_RESET))
449 return false;
450
451 if (edev->mode & EEH_DEV_REMOVED)
452 return false;
453 }
449 454
450 return true; 455 return true;
451} 456}