aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/powerpc/include/asm/eeh_event.h7
-rw-r--r--arch/powerpc/platforms/pseries/eeh.c2
-rw-r--r--arch/powerpc/platforms/pseries/eeh_driver.c81
-rw-r--r--arch/powerpc/platforms/pseries/eeh_event.c36
4 files changed, 63 insertions, 63 deletions
diff --git a/arch/powerpc/include/asm/eeh_event.h b/arch/powerpc/include/asm/eeh_event.h
index 25ebf6a5aa57..c68b012b7797 100644
--- a/arch/powerpc/include/asm/eeh_event.h
+++ b/arch/powerpc/include/asm/eeh_event.h
@@ -28,12 +28,11 @@
28 */ 28 */
29struct eeh_event { 29struct eeh_event {
30 struct list_head list; /* to form event queue */ 30 struct list_head list; /* to form event queue */
31 struct device_node *dn; /* struct device node */ 31 struct eeh_dev *edev; /* EEH device */
32 struct pci_dev *dev; /* affected device */
33}; 32};
34 33
35int eeh_send_failure_event(struct device_node *dn, struct pci_dev *dev); 34int eeh_send_failure_event(struct eeh_dev *edev);
36struct pci_dn *handle_eeh_events(struct eeh_event *); 35struct eeh_dev *handle_eeh_events(struct eeh_event *);
37 36
38#endif /* __KERNEL__ */ 37#endif /* __KERNEL__ */
39#endif /* ASM_POWERPC_EEH_EVENT_H */ 38#endif /* ASM_POWERPC_EEH_EVENT_H */
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index aec10f66f5fe..9b1fd0c09295 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -475,7 +475,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
475 eeh_mark_slot(dn, EEH_MODE_ISOLATED); 475 eeh_mark_slot(dn, EEH_MODE_ISOLATED);
476 raw_spin_unlock_irqrestore(&confirm_error_lock, flags); 476 raw_spin_unlock_irqrestore(&confirm_error_lock, flags);
477 477
478 eeh_send_failure_event(edev->dn, edev->pdev); 478 eeh_send_failure_event(edev);
479 479
480 /* Most EEH events are due to device driver bugs. Having 480 /* Most EEH events are due to device driver bugs. Having
481 * a stack trace will help the device-driver authors figure 481 * a stack trace will help the device-driver authors figure
diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
index 3f25fab741e2..baf92cd9dfab 100644
--- a/arch/powerpc/platforms/pseries/eeh_driver.c
+++ b/arch/powerpc/platforms/pseries/eeh_driver.c
@@ -40,7 +40,7 @@
40 * This routine is used to retrieve the name of PCI device driver 40 * This routine is used to retrieve the name of PCI device driver
41 * if that's valid. 41 * if that's valid.
42 */ 42 */
43static inline const char *pcid_name(struct pci_dev *pdev) 43static inline const char *eeh_pcid_name(struct pci_dev *pdev)
44{ 44{
45 if (pdev && pdev->dev.driver) 45 if (pdev && pdev->dev.driver)
46 return pdev->dev.driver->name; 46 return pdev->dev.driver->name;
@@ -81,7 +81,7 @@ static void print_device_node_tree(struct pci_dn *pdn, int dent)
81 */ 81 */
82static void eeh_disable_irq(struct pci_dev *dev) 82static void eeh_disable_irq(struct pci_dev *dev)
83{ 83{
84 struct device_node *dn = pci_device_to_OF_node(dev); 84 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
85 85
86 /* Don't disable MSI and MSI-X interrupts. They are 86 /* Don't disable MSI and MSI-X interrupts. They are
87 * effectively disabled by the DMA Stopped state 87 * effectively disabled by the DMA Stopped state
@@ -93,7 +93,7 @@ static void eeh_disable_irq(struct pci_dev *dev)
93 if (!irq_has_action(dev->irq)) 93 if (!irq_has_action(dev->irq))
94 return; 94 return;
95 95
96 PCI_DN(dn)->eeh_mode |= EEH_MODE_IRQ_DISABLED; 96 edev->mode |= EEH_MODE_IRQ_DISABLED;
97 disable_irq_nosync(dev->irq); 97 disable_irq_nosync(dev->irq);
98} 98}
99 99
@@ -106,10 +106,10 @@ static void eeh_disable_irq(struct pci_dev *dev)
106 */ 106 */
107static void eeh_enable_irq(struct pci_dev *dev) 107static void eeh_enable_irq(struct pci_dev *dev)
108{ 108{
109 struct device_node *dn = pci_device_to_OF_node(dev); 109 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev);
110 110
111 if ((PCI_DN(dn)->eeh_mode) & EEH_MODE_IRQ_DISABLED) { 111 if ((edev->mode) & EEH_MODE_IRQ_DISABLED) {
112 PCI_DN(dn)->eeh_mode &= ~EEH_MODE_IRQ_DISABLED; 112 edev->mode &= ~EEH_MODE_IRQ_DISABLED;
113 enable_irq(dev->irq); 113 enable_irq(dev->irq);
114 } 114 }
115} 115}
@@ -270,20 +270,20 @@ static int eeh_report_failure(struct pci_dev *dev, void *userdata)
270 270
271/** 271/**
272 * eeh_reset_device - Perform actual reset of a pci slot 272 * eeh_reset_device - Perform actual reset of a pci slot
273 * @pe_dn: PE associated device node 273 * @edev: PE associated EEH device
274 * @bus: PCI bus corresponding to the isolcated slot 274 * @bus: PCI bus corresponding to the isolcated slot
275 * 275 *
276 * This routine must be called to do reset on the indicated PE. 276 * This routine must be called to do reset on the indicated PE.
277 * During the reset, udev might be invoked because those affected 277 * During the reset, udev might be invoked because those affected
278 * PCI devices will be removed and then added. 278 * PCI devices will be removed and then added.
279 */ 279 */
280static int eeh_reset_device(struct pci_dn *pe_dn, struct pci_bus *bus) 280static int eeh_reset_device(struct eeh_dev *edev, struct pci_bus *bus)
281{ 281{
282 struct device_node *dn; 282 struct device_node *dn;
283 int cnt, rc; 283 int cnt, rc;
284 284
285 /* pcibios will clear the counter; save the value */ 285 /* pcibios will clear the counter; save the value */
286 cnt = pe_dn->eeh_freeze_count; 286 cnt = edev->freeze_count;
287 287
288 if (bus) 288 if (bus)
289 pcibios_remove_pci_devices(bus); 289 pcibios_remove_pci_devices(bus);
@@ -292,21 +292,22 @@ static int eeh_reset_device(struct pci_dn *pe_dn, struct pci_bus *bus)
292 * Reconfigure bridges and devices. Don't try to bring the system 292 * Reconfigure bridges and devices. Don't try to bring the system
293 * up if the reset failed for some reason. 293 * up if the reset failed for some reason.
294 */ 294 */
295 rc = eeh_reset_pe(pe_dn); 295 rc = eeh_reset_pe(edev);
296 if (rc) 296 if (rc)
297 return rc; 297 return rc;
298 298
299 /* Walk over all functions on this device. */ 299 /* Walk over all functions on this device. */
300 dn = pe_dn->node; 300 dn = eeh_dev_to_of_node(edev);
301 if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) 301 if (!pcibios_find_pci_bus(dn) && of_node_to_eeh_dev(dn->parent))
302 dn = dn->parent->child; 302 dn = dn->parent->child;
303 303
304 while (dn) { 304 while (dn) {
305 struct pci_dn *ppe = PCI_DN(dn); 305 struct eeh_dev *pedev = of_node_to_eeh_dev(dn);
306
306 /* On Power4, always true because eeh_pe_config_addr=0 */ 307 /* On Power4, always true because eeh_pe_config_addr=0 */
307 if (pe_dn->eeh_pe_config_addr == ppe->eeh_pe_config_addr) { 308 if (edev->pe_config_addr == pedev->pe_config_addr) {
308 eeh_ops->configure_bridge(dn); 309 eeh_ops->configure_bridge(dn);
309 eeh_restore_bars(ppe); 310 eeh_restore_bars(pedev);
310 } 311 }
311 dn = dn->sibling; 312 dn = dn->sibling;
312 } 313 }
@@ -321,7 +322,7 @@ static int eeh_reset_device(struct pci_dn *pe_dn, struct pci_bus *bus)
321 ssleep(5); 322 ssleep(5);
322 pcibios_add_pci_devices(bus); 323 pcibios_add_pci_devices(bus);
323 } 324 }
324 pe_dn->eeh_freeze_count = cnt; 325 edev->freeze_count = cnt;
325 326
326 return 0; 327 return 0;
327} 328}
@@ -348,23 +349,22 @@ static int eeh_reset_device(struct pci_dn *pe_dn, struct pci_bus *bus)
348 * drivers (which cause a second set of hotplug events to go out to 349 * drivers (which cause a second set of hotplug events to go out to
349 * userspace). 350 * userspace).
350 */ 351 */
351struct pci_dn *handle_eeh_events(struct eeh_event *event) 352struct eeh_dev *handle_eeh_events(struct eeh_event *event)
352{ 353{
353 struct device_node *frozen_dn; 354 struct device_node *frozen_dn;
354 struct pci_dn *frozen_pdn; 355 struct eeh_dev *frozen_edev;
355 struct pci_bus *frozen_bus; 356 struct pci_bus *frozen_bus;
356 int rc = 0; 357 int rc = 0;
357 enum pci_ers_result result = PCI_ERS_RESULT_NONE; 358 enum pci_ers_result result = PCI_ERS_RESULT_NONE;
358 const char *location, *pci_str, *drv_str, *bus_pci_str, *bus_drv_str; 359 const char *location, *pci_str, *drv_str, *bus_pci_str, *bus_drv_str;
359 360
360 frozen_dn = eeh_find_device_pe(event->dn); 361 frozen_dn = eeh_find_device_pe(eeh_dev_to_of_node(event->edev));
361 if (!frozen_dn) { 362 if (!frozen_dn) {
362 363 location = of_get_property(eeh_dev_to_of_node(event->edev), "ibm,loc-code", NULL);
363 location = of_get_property(event->dn, "ibm,loc-code", NULL);
364 location = location ? location : "unknown"; 364 location = location ? location : "unknown";
365 printk(KERN_ERR "EEH: Error: Cannot find partition endpoint " 365 printk(KERN_ERR "EEH: Error: Cannot find partition endpoint "
366 "for location=%s pci addr=%s\n", 366 "for location=%s pci addr=%s\n",
367 location, eeh_pci_name(event->dev)); 367 location, eeh_pci_name(eeh_dev_to_pci_dev(event->edev)));
368 return NULL; 368 return NULL;
369 } 369 }
370 370
@@ -389,22 +389,21 @@ struct pci_dn *handle_eeh_events(struct eeh_event *event)
389 return NULL; 389 return NULL;
390 } 390 }
391 391
392 frozen_pdn = PCI_DN(frozen_dn); 392 frozen_edev = of_node_to_eeh_dev(frozen_dn);
393 frozen_pdn->eeh_freeze_count++; 393 frozen_edev->freeze_count++;
394 pci_str = eeh_pci_name(eeh_dev_to_pci_dev(event->edev));
395 drv_str = eeh_pcid_name(eeh_dev_to_pci_dev(event->edev));
394 396
395 pci_str = eeh_pci_name(event->dev); 397 if (frozen_edev->freeze_count > EEH_MAX_ALLOWED_FREEZES)
396 drv_str = pcid_name(event->dev);
397
398 if (frozen_pdn->eeh_freeze_count > EEH_MAX_ALLOWED_FREEZES)
399 goto excess_failures; 398 goto excess_failures;
400 399
401 printk(KERN_WARNING 400 printk(KERN_WARNING
402 "EEH: This PCI device has failed %d times in the last hour:\n", 401 "EEH: This PCI device has failed %d times in the last hour:\n",
403 frozen_pdn->eeh_freeze_count); 402 frozen_edev->freeze_count);
404 403
405 if (frozen_pdn->pcidev) { 404 if (frozen_edev->pdev) {
406 bus_pci_str = pci_name(frozen_pdn->pcidev); 405 bus_pci_str = pci_name(frozen_edev->pdev);
407 bus_drv_str = pcid_name(frozen_pdn->pcidev); 406 bus_drv_str = eeh_pcid_name(frozen_edev->pdev);
408 printk(KERN_WARNING 407 printk(KERN_WARNING
409 "EEH: Bus location=%s driver=%s pci addr=%s\n", 408 "EEH: Bus location=%s driver=%s pci addr=%s\n",
410 location, bus_drv_str, bus_pci_str); 409 location, bus_drv_str, bus_pci_str);
@@ -425,7 +424,7 @@ struct pci_dn *handle_eeh_events(struct eeh_event *event)
425 /* Get the current PCI slot state. This can take a long time, 424 /* Get the current PCI slot state. This can take a long time,
426 * sometimes over 3 seconds for certain systems. 425 * sometimes over 3 seconds for certain systems.
427 */ 426 */
428 rc = eeh_ops->wait_state(frozen_pdn->node, MAX_WAIT_FOR_RECOVERY*1000); 427 rc = eeh_ops->wait_state(eeh_dev_to_of_node(frozen_edev), MAX_WAIT_FOR_RECOVERY*1000);
429 if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) { 428 if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
430 printk(KERN_WARNING "EEH: Permanent failure\n"); 429 printk(KERN_WARNING "EEH: Permanent failure\n");
431 goto hard_fail; 430 goto hard_fail;
@@ -435,14 +434,14 @@ struct pci_dn *handle_eeh_events(struct eeh_event *event)
435 * don't post the error log until after all dev drivers 434 * don't post the error log until after all dev drivers
436 * have been informed. 435 * have been informed.
437 */ 436 */
438 eeh_slot_error_detail(frozen_pdn, EEH_LOG_TEMP); 437 eeh_slot_error_detail(frozen_edev, EEH_LOG_TEMP);
439 438
440 /* If all device drivers were EEH-unaware, then shut 439 /* If all device drivers were EEH-unaware, then shut
441 * down all of the device drivers, and hope they 440 * down all of the device drivers, and hope they
442 * go down willingly, without panicing the system. 441 * go down willingly, without panicing the system.
443 */ 442 */
444 if (result == PCI_ERS_RESULT_NONE) { 443 if (result == PCI_ERS_RESULT_NONE) {
445 rc = eeh_reset_device(frozen_pdn, frozen_bus); 444 rc = eeh_reset_device(frozen_edev, frozen_bus);
446 if (rc) { 445 if (rc) {
447 printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc); 446 printk(KERN_WARNING "EEH: Unable to reset, rc=%d\n", rc);
448 goto hard_fail; 447 goto hard_fail;
@@ -451,7 +450,7 @@ struct pci_dn *handle_eeh_events(struct eeh_event *event)
451 450
452 /* If all devices reported they can proceed, then re-enable MMIO */ 451 /* If all devices reported they can proceed, then re-enable MMIO */
453 if (result == PCI_ERS_RESULT_CAN_RECOVER) { 452 if (result == PCI_ERS_RESULT_CAN_RECOVER) {
454 rc = eeh_pci_enable(frozen_pdn, EEH_OPT_THAW_MMIO); 453 rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_MMIO);
455 454
456 if (rc < 0) 455 if (rc < 0)
457 goto hard_fail; 456 goto hard_fail;
@@ -465,7 +464,7 @@ struct pci_dn *handle_eeh_events(struct eeh_event *event)
465 464
466 /* If all devices reported they can proceed, then re-enable DMA */ 465 /* If all devices reported they can proceed, then re-enable DMA */
467 if (result == PCI_ERS_RESULT_CAN_RECOVER) { 466 if (result == PCI_ERS_RESULT_CAN_RECOVER) {
468 rc = eeh_pci_enable(frozen_pdn, EEH_OPT_THAW_DMA); 467 rc = eeh_pci_enable(frozen_edev, EEH_OPT_THAW_DMA);
469 468
470 if (rc < 0) 469 if (rc < 0)
471 goto hard_fail; 470 goto hard_fail;
@@ -483,7 +482,7 @@ struct pci_dn *handle_eeh_events(struct eeh_event *event)
483 482
484 /* If any device called out for a reset, then reset the slot */ 483 /* If any device called out for a reset, then reset the slot */
485 if (result == PCI_ERS_RESULT_NEED_RESET) { 484 if (result == PCI_ERS_RESULT_NEED_RESET) {
486 rc = eeh_reset_device(frozen_pdn, NULL); 485 rc = eeh_reset_device(frozen_edev, NULL);
487 if (rc) { 486 if (rc) {
488 printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc); 487 printk(KERN_WARNING "EEH: Cannot reset, rc=%d\n", rc);
489 goto hard_fail; 488 goto hard_fail;
@@ -502,7 +501,7 @@ struct pci_dn *handle_eeh_events(struct eeh_event *event)
502 /* Tell all device drivers that they can resume operations */ 501 /* Tell all device drivers that they can resume operations */
503 pci_walk_bus(frozen_bus, eeh_report_resume, NULL); 502 pci_walk_bus(frozen_bus, eeh_report_resume, NULL);
504 503
505 return frozen_pdn; 504 return frozen_edev;
506 505
507excess_failures: 506excess_failures:
508 /* 507 /*
@@ -515,7 +514,7 @@ excess_failures:
515 "has failed %d times in the last hour " 514 "has failed %d times in the last hour "
516 "and has been permanently disabled.\n" 515 "and has been permanently disabled.\n"
517 "Please try reseating this device or replacing it.\n", 516 "Please try reseating this device or replacing it.\n",
518 location, drv_str, pci_str, frozen_pdn->eeh_freeze_count); 517 location, drv_str, pci_str, frozen_edev->freeze_count);
519 goto perm_error; 518 goto perm_error;
520 519
521hard_fail: 520hard_fail:
@@ -526,7 +525,7 @@ hard_fail:
526 location, drv_str, pci_str); 525 location, drv_str, pci_str);
527 526
528perm_error: 527perm_error:
529 eeh_slot_error_detail(frozen_pdn, EEH_LOG_PERM); 528 eeh_slot_error_detail(frozen_edev, EEH_LOG_PERM);
530 529
531 /* Notify all devices that they're about to go down. */ 530 /* Notify all devices that they're about to go down. */
532 pci_walk_bus(frozen_bus, eeh_report_failure, NULL); 531 pci_walk_bus(frozen_bus, eeh_report_failure, NULL);
diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c
index e98347cb9cc0..4a4752565856 100644
--- a/arch/powerpc/platforms/pseries/eeh_event.c
+++ b/arch/powerpc/platforms/pseries/eeh_event.c
@@ -56,8 +56,8 @@ DEFINE_MUTEX(eeh_event_mutex);
56static int eeh_event_handler(void * dummy) 56static int eeh_event_handler(void * dummy)
57{ 57{
58 unsigned long flags; 58 unsigned long flags;
59 struct eeh_event *event; 59 struct eeh_event *event;
60 struct pci_dn *pdn; 60 struct eeh_dev *edev;
61 61
62 daemonize("eehd"); 62 daemonize("eehd");
63 set_current_state(TASK_INTERRUPTIBLE); 63 set_current_state(TASK_INTERRUPTIBLE);
@@ -77,23 +77,26 @@ static int eeh_event_handler(void * dummy)
77 77
78 /* Serialize processing of EEH events */ 78 /* Serialize processing of EEH events */
79 mutex_lock(&eeh_event_mutex); 79 mutex_lock(&eeh_event_mutex);
80 eeh_mark_slot(event->dn, EEH_MODE_RECOVERING); 80 edev = event->edev;
81 eeh_mark_slot(eeh_dev_to_of_node(edev), EEH_MODE_RECOVERING);
81 82
82 printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n", 83 printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n",
83 eeh_pci_name(event->dev)); 84 eeh_pci_name(edev->pdev));
85
86 edev = handle_eeh_events(event);
84 87
85 pdn = handle_eeh_events(event); 88 eeh_clear_slot(eeh_dev_to_of_node(edev), EEH_MODE_RECOVERING);
89 pci_dev_put(edev->pdev);
86 90
87 eeh_clear_slot(event->dn, EEH_MODE_RECOVERING);
88 pci_dev_put(event->dev);
89 kfree(event); 91 kfree(event);
90 mutex_unlock(&eeh_event_mutex); 92 mutex_unlock(&eeh_event_mutex);
91 93
92 /* If there are no new errors after an hour, clear the counter. */ 94 /* If there are no new errors after an hour, clear the counter. */
93 if (pdn && pdn->eeh_freeze_count>0) { 95 if (edev && edev->freeze_count>0) {
94 msleep_interruptible(3600*1000); 96 msleep_interruptible(3600*1000);
95 if (pdn->eeh_freeze_count>0) 97 if (edev->freeze_count>0)
96 pdn->eeh_freeze_count--; 98 edev->freeze_count--;
99
97 } 100 }
98 101
99 return 0; 102 return 0;
@@ -114,17 +117,17 @@ static void eeh_thread_launcher(struct work_struct *dummy)
114 117
115/** 118/**
116 * eeh_send_failure_event - Generate a PCI error event 119 * eeh_send_failure_event - Generate a PCI error event
117 * @dev: pci device 120 * @edev: EEH device
118 * 121 *
119 * This routine can be called within an interrupt context; 122 * This routine can be called within an interrupt context;
120 * the actual event will be delivered in a normal context 123 * the actual event will be delivered in a normal context
121 * (from a workqueue). 124 * (from a workqueue).
122 */ 125 */
123int eeh_send_failure_event(struct device_node *dn, 126int eeh_send_failure_event(struct eeh_dev *edev)
124 struct pci_dev *dev)
125{ 127{
126 unsigned long flags; 128 unsigned long flags;
127 struct eeh_event *event; 129 struct eeh_event *event;
130 struct device_node *dn = eeh_dev_to_of_node(edev);
128 const char *location; 131 const char *location;
129 132
130 if (!mem_init_done) { 133 if (!mem_init_done) {
@@ -140,11 +143,10 @@ int eeh_send_failure_event(struct device_node *dn,
140 return 1; 143 return 1;
141 } 144 }
142 145
143 if (dev) 146 if (edev->pdev)
144 pci_dev_get(dev); 147 pci_dev_get(edev->pdev);
145 148
146 event->dn = dn; 149 event->edev = edev;
147 event->dev = dev;
148 150
149 /* We may or may not be called in an interrupt context */ 151 /* We may or may not be called in an interrupt context */
150 spin_lock_irqsave(&eeh_eventlist_lock, flags); 152 spin_lock_irqsave(&eeh_eventlist_lock, flags);