diff options
Diffstat (limited to 'arch/powerpc/platforms/pseries/eeh.c')
-rw-r--r-- | arch/powerpc/platforms/pseries/eeh.c | 204 |
1 files changed, 118 insertions, 86 deletions
diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c index 6cedbc002e0f..48fbd442e9df 100644 --- a/arch/powerpc/platforms/pseries/eeh.c +++ b/arch/powerpc/platforms/pseries/eeh.c | |||
@@ -74,7 +74,10 @@ | |||
74 | * is broken and panic. This sets the threshold for how many read | 74 | * is broken and panic. This sets the threshold for how many read |
75 | * attempts we allow before panicking. | 75 | * attempts we allow before panicking. |
76 | */ | 76 | */ |
77 | #define EEH_MAX_FAILS 100000 | 77 | #define EEH_MAX_FAILS 2100000 |
78 | |||
79 | /* Time to wait for a PCI slot to retport status, in milliseconds */ | ||
80 | #define PCI_BUS_RESET_WAIT_MSEC (60*1000) | ||
78 | 81 | ||
79 | /* RTAS tokens */ | 82 | /* RTAS tokens */ |
80 | static int ibm_set_eeh_option; | 83 | static int ibm_set_eeh_option; |
@@ -83,6 +86,7 @@ static int ibm_read_slot_reset_state; | |||
83 | static int ibm_read_slot_reset_state2; | 86 | static int ibm_read_slot_reset_state2; |
84 | static int ibm_slot_error_detail; | 87 | static int ibm_slot_error_detail; |
85 | static int ibm_get_config_addr_info; | 88 | static int ibm_get_config_addr_info; |
89 | static int ibm_get_config_addr_info2; | ||
86 | static int ibm_configure_bridge; | 90 | static int ibm_configure_bridge; |
87 | 91 | ||
88 | int eeh_subsystem_enabled; | 92 | int eeh_subsystem_enabled; |
@@ -168,6 +172,55 @@ static int read_slot_reset_state(struct pci_dn *pdn, int rets[]) | |||
168 | } | 172 | } |
169 | 173 | ||
170 | /** | 174 | /** |
175 | * eeh_wait_for_slot_status - returns error status of slot | ||
176 | * @pdn pci device node | ||
177 | * @max_wait_msecs maximum number to millisecs to wait | ||
178 | * | ||
179 | * Return negative value if a permanent error, else return | ||
180 | * Partition Endpoint (PE) status value. | ||
181 | * | ||
182 | * If @max_wait_msecs is positive, then this routine will | ||
183 | * sleep until a valid status can be obtained, or until | ||
184 | * the max allowed wait time is exceeded, in which case | ||
185 | * a -2 is returned. | ||
186 | */ | ||
187 | int | ||
188 | eeh_wait_for_slot_status(struct pci_dn *pdn, int max_wait_msecs) | ||
189 | { | ||
190 | int rc; | ||
191 | int rets[3]; | ||
192 | int mwait; | ||
193 | |||
194 | while (1) { | ||
195 | rc = read_slot_reset_state(pdn, rets); | ||
196 | if (rc) return rc; | ||
197 | if (rets[1] == 0) return -1; /* EEH is not supported */ | ||
198 | |||
199 | if (rets[0] != 5) return rets[0]; /* return actual status */ | ||
200 | |||
201 | if (rets[2] == 0) return -1; /* permanently unavailable */ | ||
202 | |||
203 | if (max_wait_msecs <= 0) return -1; | ||
204 | |||
205 | mwait = rets[2]; | ||
206 | if (mwait <= 0) { | ||
207 | printk (KERN_WARNING | ||
208 | "EEH: Firmware returned bad wait value=%d\n", mwait); | ||
209 | mwait = 1000; | ||
210 | } else if (mwait > 300*1000) { | ||
211 | printk (KERN_WARNING | ||
212 | "EEH: Firmware is taking too long, time=%d\n", mwait); | ||
213 | mwait = 300*1000; | ||
214 | } | ||
215 | max_wait_msecs -= mwait; | ||
216 | msleep (mwait); | ||
217 | } | ||
218 | |||
219 | printk(KERN_WARNING "EEH: Timed out waiting for slot status\n"); | ||
220 | return -2; | ||
221 | } | ||
222 | |||
223 | /** | ||
171 | * eeh_token_to_phys - convert EEH address token to phys address | 224 | * eeh_token_to_phys - convert EEH address token to phys address |
172 | * @token i/o token, should be address in the form 0xA.... | 225 | * @token i/o token, should be address in the form 0xA.... |
173 | */ | 226 | */ |
@@ -229,7 +282,7 @@ void eeh_mark_slot (struct device_node *dn, int mode_flag) | |||
229 | dn = find_device_pe (dn); | 282 | dn = find_device_pe (dn); |
230 | 283 | ||
231 | /* Back up one, since config addrs might be shared */ | 284 | /* Back up one, since config addrs might be shared */ |
232 | if (PCI_DN(dn) && PCI_DN(dn)->eeh_pe_config_addr) | 285 | if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) |
233 | dn = dn->parent; | 286 | dn = dn->parent; |
234 | 287 | ||
235 | PCI_DN(dn)->eeh_mode |= mode_flag; | 288 | PCI_DN(dn)->eeh_mode |= mode_flag; |
@@ -263,7 +316,7 @@ void eeh_clear_slot (struct device_node *dn, int mode_flag) | |||
263 | dn = find_device_pe (dn); | 316 | dn = find_device_pe (dn); |
264 | 317 | ||
265 | /* Back up one, since config addrs might be shared */ | 318 | /* Back up one, since config addrs might be shared */ |
266 | if (PCI_DN(dn) && PCI_DN(dn)->eeh_pe_config_addr) | 319 | if (!pcibios_find_pci_bus(dn) && PCI_DN(dn->parent)) |
267 | dn = dn->parent; | 320 | dn = dn->parent; |
268 | 321 | ||
269 | PCI_DN(dn)->eeh_mode &= ~mode_flag; | 322 | PCI_DN(dn)->eeh_mode &= ~mode_flag; |
@@ -293,7 +346,6 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) | |||
293 | int rets[3]; | 346 | int rets[3]; |
294 | unsigned long flags; | 347 | unsigned long flags; |
295 | struct pci_dn *pdn; | 348 | struct pci_dn *pdn; |
296 | enum pci_channel_state state; | ||
297 | int rc = 0; | 349 | int rc = 0; |
298 | 350 | ||
299 | total_mmio_ffs++; | 351 | total_mmio_ffs++; |
@@ -367,25 +419,25 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) | |||
367 | goto dn_unlock; | 419 | goto dn_unlock; |
368 | } | 420 | } |
369 | 421 | ||
370 | /* If EEH is not supported on this device, punt. */ | 422 | /* Note that config-io to empty slots may fail; |
371 | if (rets[1] != 1) { | 423 | * they are empty when they don't have children. */ |
372 | printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n", | 424 | if ((rets[0] == 5) && (dn->child == NULL)) { |
373 | ret, dn->full_name); | ||
374 | false_positives++; | 425 | false_positives++; |
375 | rc = 0; | 426 | rc = 0; |
376 | goto dn_unlock; | 427 | goto dn_unlock; |
377 | } | 428 | } |
378 | 429 | ||
379 | /* If not the kind of error we know about, punt. */ | 430 | /* If EEH is not supported on this device, punt. */ |
380 | if (rets[0] != 2 && rets[0] != 4 && rets[0] != 5) { | 431 | if (rets[1] != 1) { |
432 | printk(KERN_WARNING "EEH: event on unsupported device, rc=%d dn=%s\n", | ||
433 | ret, dn->full_name); | ||
381 | false_positives++; | 434 | false_positives++; |
382 | rc = 0; | 435 | rc = 0; |
383 | goto dn_unlock; | 436 | goto dn_unlock; |
384 | } | 437 | } |
385 | 438 | ||
386 | /* Note that config-io to empty slots may fail; | 439 | /* If not the kind of error we know about, punt. */ |
387 | * we recognize empty because they don't have children. */ | 440 | if (rets[0] != 1 && rets[0] != 2 && rets[0] != 4 && rets[0] != 5) { |
388 | if ((rets[0] == 5) && (dn->child == NULL)) { | ||
389 | false_positives++; | 441 | false_positives++; |
390 | rc = 0; | 442 | rc = 0; |
391 | goto dn_unlock; | 443 | goto dn_unlock; |
@@ -399,17 +451,12 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) | |||
399 | eeh_mark_slot (dn, EEH_MODE_ISOLATED); | 451 | eeh_mark_slot (dn, EEH_MODE_ISOLATED); |
400 | spin_unlock_irqrestore(&confirm_error_lock, flags); | 452 | spin_unlock_irqrestore(&confirm_error_lock, flags); |
401 | 453 | ||
402 | state = pci_channel_io_normal; | 454 | eeh_send_failure_event (dn, dev); |
403 | if ((rets[0] == 2) || (rets[0] == 4)) | ||
404 | state = pci_channel_io_frozen; | ||
405 | if (rets[0] == 5) | ||
406 | state = pci_channel_io_perm_failure; | ||
407 | eeh_send_failure_event (dn, dev, state, rets[2]); | ||
408 | 455 | ||
409 | /* Most EEH events are due to device driver bugs. Having | 456 | /* Most EEH events are due to device driver bugs. Having |
410 | * a stack trace will help the device-driver authors figure | 457 | * a stack trace will help the device-driver authors figure |
411 | * out what happened. So print that out. */ | 458 | * out what happened. So print that out. */ |
412 | if (rets[0] != 5) dump_stack(); | 459 | dump_stack(); |
413 | return 1; | 460 | return 1; |
414 | 461 | ||
415 | dn_unlock: | 462 | dn_unlock: |
@@ -458,38 +505,6 @@ EXPORT_SYMBOL(eeh_check_failure); | |||
458 | /* The code below deals with error recovery */ | 505 | /* The code below deals with error recovery */ |
459 | 506 | ||
460 | /** | 507 | /** |
461 | * eeh_slot_availability - returns error status of slot | ||
462 | * @pdn pci device node | ||
463 | * | ||
464 | * Return negative value if a permanent error, else return | ||
465 | * a number of milliseconds to wait until the PCI slot is | ||
466 | * ready to be used. | ||
467 | */ | ||
468 | static int | ||
469 | eeh_slot_availability(struct pci_dn *pdn) | ||
470 | { | ||
471 | int rc; | ||
472 | int rets[3]; | ||
473 | |||
474 | rc = read_slot_reset_state(pdn, rets); | ||
475 | |||
476 | if (rc) return rc; | ||
477 | |||
478 | if (rets[1] == 0) return -1; /* EEH is not supported */ | ||
479 | if (rets[0] == 0) return 0; /* Oll Korrect */ | ||
480 | if (rets[0] == 5) { | ||
481 | if (rets[2] == 0) return -1; /* permanently unavailable */ | ||
482 | return rets[2]; /* number of millisecs to wait */ | ||
483 | } | ||
484 | if (rets[0] == 1) | ||
485 | return 250; | ||
486 | |||
487 | printk (KERN_ERR "EEH: Slot unavailable: rc=%d, rets=%d %d %d\n", | ||
488 | rc, rets[0], rets[1], rets[2]); | ||
489 | return -2; | ||
490 | } | ||
491 | |||
492 | /** | ||
493 | * rtas_pci_enable - enable MMIO or DMA transfers for this slot | 508 | * rtas_pci_enable - enable MMIO or DMA transfers for this slot |
494 | * @pdn pci device node | 509 | * @pdn pci device node |
495 | */ | 510 | */ |
@@ -512,9 +527,13 @@ rtas_pci_enable(struct pci_dn *pdn, int function) | |||
512 | function); | 527 | function); |
513 | 528 | ||
514 | if (rc) | 529 | if (rc) |
515 | printk(KERN_WARNING "EEH: Cannot enable function %d, err=%d dn=%s\n", | 530 | printk(KERN_WARNING "EEH: Unexpected state change %d, err=%d dn=%s\n", |
516 | function, rc, pdn->node->full_name); | 531 | function, rc, pdn->node->full_name); |
517 | 532 | ||
533 | rc = eeh_wait_for_slot_status (pdn, PCI_BUS_RESET_WAIT_MSEC); | ||
534 | if ((rc == 4) && (function == EEH_THAW_MMIO)) | ||
535 | return 0; | ||
536 | |||
518 | return rc; | 537 | return rc; |
519 | } | 538 | } |
520 | 539 | ||
@@ -595,36 +614,24 @@ int rtas_set_slot_reset(struct pci_dn *pdn) | |||
595 | { | 614 | { |
596 | int i, rc; | 615 | int i, rc; |
597 | 616 | ||
598 | __rtas_set_slot_reset(pdn); | 617 | /* Take three shots at resetting the bus */ |
618 | for (i=0; i<3; i++) { | ||
619 | __rtas_set_slot_reset(pdn); | ||
599 | 620 | ||
600 | /* Now double check with the firmware to make sure the device is | 621 | rc = eeh_wait_for_slot_status(pdn, PCI_BUS_RESET_WAIT_MSEC); |
601 | * ready to be used; if not, wait for recovery. */ | ||
602 | for (i=0; i<10; i++) { | ||
603 | rc = eeh_slot_availability (pdn); | ||
604 | if (rc == 0) | 622 | if (rc == 0) |
605 | return 0; | 623 | return 0; |
606 | 624 | ||
607 | if (rc == -2) { | ||
608 | printk (KERN_ERR "EEH: failed (%d) to reset slot %s\n", | ||
609 | i, pdn->node->full_name); | ||
610 | __rtas_set_slot_reset(pdn); | ||
611 | continue; | ||
612 | } | ||
613 | |||
614 | if (rc < 0) { | 625 | if (rc < 0) { |
615 | printk (KERN_ERR "EEH: unrecoverable slot failure %s\n", | 626 | printk (KERN_ERR "EEH: unrecoverable slot failure %s\n", |
616 | pdn->node->full_name); | 627 | pdn->node->full_name); |
617 | return -1; | 628 | return -1; |
618 | } | 629 | } |
619 | 630 | printk (KERN_ERR "EEH: bus reset %d failed on slot %s\n", | |
620 | msleep (rc+100); | 631 | i+1, pdn->node->full_name); |
621 | } | 632 | } |
622 | 633 | ||
623 | rc = eeh_slot_availability (pdn); | 634 | return -1; |
624 | if (rc) | ||
625 | printk (KERN_ERR "EEH: timeout resetting slot %s\n", pdn->node->full_name); | ||
626 | |||
627 | return rc; | ||
628 | } | 635 | } |
629 | 636 | ||
630 | /* ------------------------------------------------------- */ | 637 | /* ------------------------------------------------------- */ |
@@ -744,16 +751,48 @@ struct eeh_early_enable_info { | |||
744 | unsigned int buid_lo; | 751 | unsigned int buid_lo; |
745 | }; | 752 | }; |
746 | 753 | ||
754 | static int get_pe_addr (int config_addr, | ||
755 | struct eeh_early_enable_info *info) | ||
756 | { | ||
757 | unsigned int rets[3]; | ||
758 | int ret; | ||
759 | |||
760 | /* Use latest config-addr token on power6 */ | ||
761 | if (ibm_get_config_addr_info2 != RTAS_UNKNOWN_SERVICE) { | ||
762 | /* Make sure we have a PE in hand */ | ||
763 | ret = rtas_call (ibm_get_config_addr_info2, 4, 2, rets, | ||
764 | config_addr, info->buid_hi, info->buid_lo, 1); | ||
765 | if (ret || (rets[0]==0)) | ||
766 | return 0; | ||
767 | |||
768 | ret = rtas_call (ibm_get_config_addr_info2, 4, 2, rets, | ||
769 | config_addr, info->buid_hi, info->buid_lo, 0); | ||
770 | if (ret) | ||
771 | return 0; | ||
772 | return rets[0]; | ||
773 | } | ||
774 | |||
775 | /* Use older config-addr token on power5 */ | ||
776 | if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) { | ||
777 | ret = rtas_call (ibm_get_config_addr_info, 4, 2, rets, | ||
778 | config_addr, info->buid_hi, info->buid_lo, 0); | ||
779 | if (ret) | ||
780 | return 0; | ||
781 | return rets[0]; | ||
782 | } | ||
783 | return 0; | ||
784 | } | ||
785 | |||
747 | /* Enable eeh for the given device node. */ | 786 | /* Enable eeh for the given device node. */ |
748 | static void *early_enable_eeh(struct device_node *dn, void *data) | 787 | static void *early_enable_eeh(struct device_node *dn, void *data) |
749 | { | 788 | { |
750 | unsigned int rets[3]; | 789 | unsigned int rets[3]; |
751 | struct eeh_early_enable_info *info = data; | 790 | struct eeh_early_enable_info *info = data; |
752 | int ret; | 791 | int ret; |
753 | const char *status = get_property(dn, "status", NULL); | 792 | const char *status = of_get_property(dn, "status", NULL); |
754 | const u32 *class_code = get_property(dn, "class-code", NULL); | 793 | const u32 *class_code = of_get_property(dn, "class-code", NULL); |
755 | const u32 *vendor_id = get_property(dn, "vendor-id", NULL); | 794 | const u32 *vendor_id = of_get_property(dn, "vendor-id", NULL); |
756 | const u32 *device_id = get_property(dn, "device-id", NULL); | 795 | const u32 *device_id = of_get_property(dn, "device-id", NULL); |
757 | const u32 *regs; | 796 | const u32 *regs; |
758 | int enable; | 797 | int enable; |
759 | struct pci_dn *pdn = PCI_DN(dn); | 798 | struct pci_dn *pdn = PCI_DN(dn); |
@@ -796,7 +835,7 @@ static void *early_enable_eeh(struct device_node *dn, void *data) | |||
796 | 835 | ||
797 | /* Ok... see if this device supports EEH. Some do, some don't, | 836 | /* Ok... see if this device supports EEH. Some do, some don't, |
798 | * and the only way to find out is to check each and every one. */ | 837 | * and the only way to find out is to check each and every one. */ |
799 | regs = get_property(dn, "reg", NULL); | 838 | regs = of_get_property(dn, "reg", NULL); |
800 | if (regs) { | 839 | if (regs) { |
801 | /* First register entry is addr (00BBSS00) */ | 840 | /* First register entry is addr (00BBSS00) */ |
802 | /* Try to enable eeh */ | 841 | /* Try to enable eeh */ |
@@ -810,15 +849,7 @@ static void *early_enable_eeh(struct device_node *dn, void *data) | |||
810 | 849 | ||
811 | /* If the newer, better, ibm,get-config-addr-info is supported, | 850 | /* If the newer, better, ibm,get-config-addr-info is supported, |
812 | * then use that instead. */ | 851 | * then use that instead. */ |
813 | pdn->eeh_pe_config_addr = 0; | 852 | pdn->eeh_pe_config_addr = get_pe_addr(pdn->eeh_config_addr, info); |
814 | if (ibm_get_config_addr_info != RTAS_UNKNOWN_SERVICE) { | ||
815 | ret = rtas_call (ibm_get_config_addr_info, 4, 2, rets, | ||
816 | pdn->eeh_config_addr, | ||
817 | info->buid_hi, info->buid_lo, | ||
818 | 0); | ||
819 | if (ret == 0) | ||
820 | pdn->eeh_pe_config_addr = rets[0]; | ||
821 | } | ||
822 | 853 | ||
823 | /* Some older systems (Power4) allow the | 854 | /* Some older systems (Power4) allow the |
824 | * ibm,set-eeh-option call to succeed even on nodes | 855 | * ibm,set-eeh-option call to succeed even on nodes |
@@ -889,6 +920,7 @@ void __init eeh_init(void) | |||
889 | ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state"); | 920 | ibm_read_slot_reset_state = rtas_token("ibm,read-slot-reset-state"); |
890 | ibm_slot_error_detail = rtas_token("ibm,slot-error-detail"); | 921 | ibm_slot_error_detail = rtas_token("ibm,slot-error-detail"); |
891 | ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info"); | 922 | ibm_get_config_addr_info = rtas_token("ibm,get-config-addr-info"); |
923 | ibm_get_config_addr_info2 = rtas_token("ibm,get-config-addr-info2"); | ||
892 | ibm_configure_bridge = rtas_token ("ibm,configure-bridge"); | 924 | ibm_configure_bridge = rtas_token ("ibm,configure-bridge"); |
893 | 925 | ||
894 | if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE) | 926 | if (ibm_set_eeh_option == RTAS_UNKNOWN_SERVICE) |