aboutsummaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms/pseries/eeh.c
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/platforms/pseries/eeh.c')
-rw-r--r--arch/powerpc/platforms/pseries/eeh.c204
1 files changed, 118 insertions, 86 deletions
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
index 6cedbc002e0f..48fbd442e9df 100644
--- a/arch/powerpc/platforms/pseries/eeh.c
+++ b/arch/powerpc/platforms/pseries/eeh.c
@@ -74,7 +74,10 @@
74 * is broken and panic. This sets the threshold for how many read 74 * is broken and panic. This sets the threshold for how many read
75 * attempts we allow before panicking. 75 * attempts we allow before panicking.
76 */ 76 */
77#define EEH_MAX_FAILS 100000 77#define EEH_MAX_FAILS 2100000
78
79/* Time to wait for a PCI slot to retport status, in milliseconds */
80#define PCI_BUS_RESET_WAIT_MSEC (60*1000)
78 81
79/* RTAS tokens */ 82/* RTAS tokens */
80static int ibm_set_eeh_option; 83static int ibm_set_eeh_option;
@@ -83,6 +86,7 @@ static int ibm_read_slot_reset_state;
83static int ibm_read_slot_reset_state2; 86static int ibm_read_slot_reset_state2;
84static int ibm_slot_error_detail; 87static int ibm_slot_error_detail;
85static int ibm_get_config_addr_info; 88static int ibm_get_config_addr_info;
89static int ibm_get_config_addr_info2;
86static int ibm_configure_bridge; 90static int ibm_configure_bridge;
87 91
88int eeh_subsystem_enabled; 92int eeh_subsystem_enabled;
@@ -168,6 +172,55 @@ static int read_slot_reset_state(struct pci_dn *pdn, int rets[])
168} 172}
169 173
170/** 174/**
175 * eeh_wait_for_slot_status - returns error status of slot
176 * @pdn pci device node
177 * @max_wait_msecs maximum number to millisecs to wait
178 *
179 * Return negative value if a permanent error, else return
180 * Partition Endpoint (PE) status value.
181 *
182 * If @max_wait_msecs is positive, then this routine will
183 * sleep until a valid status can be obtained, or until
184 * the max allowed wait time is exceeded, in which case
185 * a -2 is returned.
186 */
187int
188eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs)
189{
190 int rc;
191 int rets[3];
192 int mwait;
193
194 while (1) {
195 rc = read_slot_reset_state(pdn, rets);
196 if (rc) return rc;
197 if (rets[1] == 0) return -1; /* EEH is not supported */
198
199 if (rets[0] != 5) return rets[0]; /* return actual status */
200
201 if (rets[2] == 0) return -1; /* permanently unavailable */
202
203 if (max_wait_msecs <= 0) return -1;
204
205 mwait = rets[2];
206 if (mwait <= 0) {
207 printk (KERN_WARNING
208 "EEH: Firmware returned bad wait value=%d\n", mwait);
209 mwait = 1000;
210 } else if (mwait > 300*1000) {
211 printk (KERN_WARNING
212 "EEH: Firmware is taking too long, time=%d\n", mwait);
213 mwait = 300*1000;
214 }
215 max_wait_msecs -= mwait;
216 msleep (mwait);
217 }
218
219 printk(KERN_WARNING "EEH: Timed out waiting for slot status\n");
220 return -2;
221}
222
223/**
171 * eeh_token_to_phys - convert EEH address token to phys address 224 * eeh_token_to_phys - convert EEH address token to phys address
172 * @token i/o token, should be address in the form 0xA.... 225 * @token i/o token, should be address in the form 0xA....
173 */ 226 */
@@ -229,7 +282,7 @@ void eeh_mark_slot (struct device_node *dn, int mode_flag)
229 dn = find_device_pe (dn); 282 dn = find_device_pe (dn);
230 283
231 /* Back up one, since config addrs might be shared */ 284 /* Back up one, since config addrs might be shared */
232 if (PCI_DN(dn) && PCI_DN(dn)->eeh_pe_config_addr) 285 if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
233 dn = dn->parent; 286 dn = dn->parent;
234 287
235 PCI_DN(dn)->eeh_mode |= mode_flag; 288 PCI_DN(dn)->eeh_mode |= mode_flag;
@@ -263,7 +316,7 @@ void eeh_clear_slot (struct device_node *dn, int mode_flag)
263 dn = find_device_pe (dn); 316 dn = find_device_pe (dn);
264 317
265 /* Back up one, since config addrs might be shared */ 318 /* Back up one, since config addrs might be shared */
266 if (PCI_DN(dn) && PCI_DN(dn)->eeh_pe_config_addr) 319 if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent))
267 dn = dn->parent; 320 dn = dn->parent;
268 321
269 PCI_DN(dn)->eeh_mode &= ~mode_flag; 322 PCI_DN(dn)->eeh_mode &= ~mode_flag;
@@ -293,7 +346,6 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
293 int rets[3]; 346 int rets[3];
294 unsigned long flags; 347 unsigned long flags;
295 struct pci_dn *pdn; 348 struct pci_dn *pdn;
296 enum pci_channel_state state;
297 int rc = 0; 349 int rc = 0;
298 350
299 total_mmio_ffs++; 351 total_mmio_ffs++;
@@ -367,25 +419,25 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
367 goto dn_unlock; 419 goto dn_unlock;
368 } 420 }
369 421
370 /* If EEH is not supported on this device, punt. */ 422 /* Note that config-io to empty slots may fail;
371 if (rets[1] != 1) { 423 * they are empty when they don't have children. */
372 printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n", 424 if ((rets[0] == 5) && (dn->child == NULL)) {
373 ret, dn->full_name);
374 false_positives++; 425 false_positives++;
375 rc = 0; 426 rc = 0;
376 goto dn_unlock; 427 goto dn_unlock;
377 } 428 }
378 429
379 /* If not the kind of error we know about, punt. */ 430 /* If EEH is not supported on this device, punt. */
380 if (rets[0] != 2 && rets[0] != 4 && rets[0] != 5) { 431 if (rets[1] != 1) {
432 printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n",
433 ret, dn->full_name);
381 false_positives++; 434 false_positives++;
382 rc = 0; 435 rc = 0;
383 goto dn_unlock; 436 goto dn_unlock;
384 } 437 }
385 438
386 /* Note that config-io to empty slots may fail; 439 /* If not the kind of error we know about, punt. */
387 * we recognize empty because they don't have children. */ 440 if (rets[0] != 1 && rets[0] != 2 && rets[0] != 4 && rets[0] != 5) {
388 if ((rets[0] == 5) && (dn->child == NULL)) {
389 false_positives++; 441 false_positives++;
390 rc = 0; 442 rc = 0;
391 goto dn_unlock; 443 goto dn_unlock;
@@ -399,17 +451,12 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
399 eeh_mark_slot (dn, EEH_MODE_ISOLATED); 451 eeh_mark_slot (dn, EEH_MODE_ISOLATED);
400 spin_unlock_irqrestore(&confirm_error_lock, flags); 452 spin_unlock_irqrestore(&confirm_error_lock, flags);
401 453
402 state = pci_channel_io_normal; 454 eeh_send_failure_event (dn, dev);
403 if ((rets[0] == 2) || (rets[0] == 4))
404 state = pci_channel_io_frozen;
405 if (rets[0] == 5)
406 state = pci_channel_io_perm_failure;
407 eeh_send_failure_event (dn, dev, state, rets[2]);
408 455
409 /* Most EEH events are due to device driver bugs. Having 456 /* Most EEH events are due to device driver bugs. Having
410 * a stack trace will help the device-driver authors figure 457 * a stack trace will help the device-driver authors figure
411 * out what happened. So print that out. */ 458 * out what happened. So print that out. */
412 if (rets[0] != 5) dump_stack(); 459 dump_stack();
413 return 1; 460 return 1;
414 461
415dn_unlock: 462dn_unlock:
@@ -458,38 +505,6 @@ EXPORT_SYMBOL(eeh_check_failure);
458/* The code below deals with error recovery */ 505/* The code below deals with error recovery */
459 506
460/** 507/**
461 * eeh_slot_availability - returns error status of slot
462 * @pdn pci device node
463 *
464 * Return negative value if a permanent error, else return
465 * a number of milliseconds to wait until the PCI slot is
466 * ready to be used.
467 */
468static int
469eeh_slot_availability(struct pci_dn *pdn)
470{
471 int rc;
472 int rets[3];
473
474 rc = read_slot_reset_state(pdn, rets);
475
476 if (rc) return rc;
477
478 if (rets[1] == 0) return -1; /* EEH is not supported */
479 if (rets[0] == 0) return 0; /* Oll Korrect */
480 if (rets[0] == 5) {
481 if (rets[2] == 0) return -1; /* permanently unavailable */
482 return rets[2]; /* number of millisecs to wait */
483 }
484 if (rets[0] == 1)
485 return 250;
486
487 printk (KERN_ERR "EEH: Slot unavailable: rc=%d, rets=%d %d %d\n",
488 rc, rets[0], rets[1], rets[2]);
489 return -2;
490}
491
492/**
493 * rtas_pci_enable - enable MMIO or DMA transfers for this slot 508 * rtas_pci_enable - enable MMIO or DMA transfers for this slot
494 * @pdn pci device node 509 * @pdn pci device node
495 */ 510 */
@@ -512,9 +527,13 @@ rtas_pci_enable(struct pci_dn *pdn, int function)
512 function); 527 function);
513 528
514 if (rc) 529 if (rc)
515 printk(KERN_WARNING "EEH: Cannot enable function %d, err=%d dn=%s\n", 530 printk(KERN_WARNING "EEH: Unexpected state change %d, err=%d dn=%s\n",
516 function, rc, pdn->node->full_name); 531 function, rc, pdn->node->full_name);
517 532
533 rc = eeh_wait_for_slot_status (pdn, PCI_BUS_RESET_WAIT_MSEC);
534 if ((rc == 4) && (function == EEH_THAW_MMIO))
535 return 0;
536
518 return rc; 537 return rc;
519} 538}
520 539
@@ -595,36 +614,24 @@ int rtas_set_slot_reset(struct pci_dn *pdn)
595{ 614{
596 int i, rc; 615 int i, rc;
597 616
598 __rtas_set_slot_reset(pdn); 617 /* Take three shots at resetting the bus */
618 for (i=0; i<3; i++) {
619 __rtas_set_slot_reset(pdn);
599 620
600 /* Now double check with the firmware to make sure the device is 621 rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC);
601 * ready to be used; if not, wait for recovery. */
602 for (i=0; i<10; i++) {
603 rc = eeh_slot_availability (pdn);
604 if (rc == 0) 622 if (rc == 0)
605 return 0; 623 return 0;
606 624
607 if (rc == -2) {
608 printk (KERN_ERR "EEH: failed (%d) to reset slot %s\n",
609 i, pdn->node->full_name);
610 __rtas_set_slot_reset(pdn);
611 continue;
612 }
613
614 if (rc < 0) { 625 if (rc < 0) {
615 printk (KERN_ERR "EEH: unrecoverable slot failure %s\n", 626 printk (KERN_ERR "EEH: unrecoverable slot failure %s\n",
616 pdn->node->full_name); 627 pdn->node->full_name);
617 return -1; 628 return -1;
618 } 629 }
619 630 printk (KERN_ERR "EEH: bus reset %d failed on slot %s\n",
620 msleep (rc+100); 631 i+1, pdn->node->full_name);
621 } 632 }
622 633
623 rc = eeh_slot_availability (pdn); 634 return -1;
624 if (rc)
625 printk (KERN_ERR "EEH: timeout resetting slot %s\n", pdn->node->full_name);
626
627 return rc;
628} 635}
629 636
630/* ------------------------------------------------------- */ 637/* ------------------------------------------------------- */
@@ -744,16 +751,48 @@ struct eeh_early_enable_info {
744 unsigned int buid_lo; 751 unsigned int buid_lo;
745}; 752};
746 753
754static int get_pe_addr (int config_addr,
755 struct eeh_early_enable_info *info)
756{
757 unsigned int rets[3];
758 int ret;
759
760 /* Use latest config-addr token on power6 */
761 if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) {
762 /* Make sure we have a PE in hand */
763 ret = rtas_call (ibm_get_config_addr_info2, 4, 2, rets,
764 config_addr, info->buid_hi, info->buid_lo, 1);
765 if (ret || (rets[0]==0))
766 return 0;
767
768 ret = rtas_call (ibm_get_config_addr_info2, 4, 2, rets,
769 config_addr, info->buid_hi, info->buid_lo, 0);
770 if (ret)
771 return 0;
772 return rets[0];
773 }
774
775 /* Use older config-addr token on power5 */
776 if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
777 ret = rtas_call (ibm_get_config_addr_info, 4, 2, rets,
778 config_addr, info->buid_hi, info->buid_lo, 0);
779 if (ret)
780 return 0;
781 return rets[0];
782 }
783 return 0;
784}
785
747/* Enable eeh for the given device node. */ 786/* Enable eeh for the given device node. */
748static void *early_enable_eeh(struct device_node *dn, void *data) 787static void *early_enable_eeh(struct device_node *dn, void *data)
749{ 788{
750 unsigned int rets[3]; 789 unsigned int rets[3];
751 struct eeh_early_enable_info *info = data; 790 struct eeh_early_enable_info *info = data;
752 int ret; 791 int ret;
753 const char *status = get_property(dn, "status", NULL); 792 const char *status = of_get_property(dn, "status", NULL);
754 const u32 *class_code = get_property(dn, "class-code", NULL); 793 const u32 *class_code = of_get_property(dn, "class-code", NULL);
755 const u32 *vendor_id = get_property(dn, "vendor-id", NULL); 794 const u32 *vendor_id = of_get_property(dn, "vendor-id", NULL);
756 const u32 *device_id = get_property(dn, "device-id", NULL); 795 const u32 *device_id = of_get_property(dn, "device-id", NULL);
757 const u32 *regs; 796 const u32 *regs;
758 int enable; 797 int enable;
759 struct pci_dn *pdn = PCI_DN(dn); 798 struct pci_dn *pdn = PCI_DN(dn);
@@ -796,7 +835,7 @@ static void *early_enable_eeh(struct device_node *dn, void *data)
796 835
797 /* Ok... see if this device supports EEH. Some do, some don't, 836 /* Ok... see if this device supports EEH. Some do, some don't,
798 * and the only way to find out is to check each and every one. */ 837 * and the only way to find out is to check each and every one. */
799 regs = get_property(dn, "reg", NULL); 838 regs = of_get_property(dn, "reg", NULL);
800 if (regs) { 839 if (regs) {
801 /* First register entry is addr (00BBSS00) */ 840 /* First register entry is addr (00BBSS00) */
802 /* Try to enable eeh */ 841 /* Try to enable eeh */
@@ -810,15 +849,7 @@ static void *early_enable_eeh(struct device_node *dn, void *data)
810 849
811 /* If the newer, better, ibm,get-config-addr-info is supported, 850 /* If the newer, better, ibm,get-config-addr-info is supported,
812 * then use that instead. */ 851 * then use that instead. */
813 pdn->eeh_pe_config_addr = 0; 852 pdn->eeh_pe_config_addr = get_pe_addr(pdn->eeh_config_addr, info);
814 if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) {
815 ret = rtas_call (ibm_get_config_addr_info, 4, 2, rets,
816 pdn->eeh_config_addr,
817 info->buid_hi, info->buid_lo,
818 0);
819 if (ret == 0)
820 pdn->eeh_pe_config_addr = rets[0];
821 }
822 853
823 /* Some older systems (Power4) allow the 854 /* Some older systems (Power4) allow the
824 * ibm,set-eeh-option call to succeed even on nodes 855 * ibm,set-eeh-option call to succeed even on nodes
@@ -889,6 +920,7 @@ void __init eeh_init(void)
889 ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state"); 920 ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state");
890 ibm_slot_error_detail = rtas_token("ibm,slot-error-detail"); 921 ibm_slot_error_detail = rtas_token("ibm,slot-error-detail");
891 ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info"); 922 ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info");
923 ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2");
892 ibm_configure_bridge = rtas_token ("ibm,configure-bridge"); 924 ibm_configure_bridge = rtas_token ("ibm,configure-bridge");
893 925
894 if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE) 926 if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE)