diff options
author | Linas Vepstas <linas@linas.org> | 2005-11-03 19:48:52 -0500 |
---|---|---|
committer | Paul Mackerras <paulus@samba.org> | 2005-11-09 19:33:11 -0500 |
commit | 177bc9367efe89a642bc9a07351726fd2c31f5ff (patch) | |
tree | 04bdde1e9171766f9f46a0adbf2cc5f06691496f | |
parent | 56b0fca3a008ddec8c00cfdddbef17bd9ce188e3 (diff) |
[PATCH] ppc64: PCI error rate statistics
04-eeh-statistics.patch
This minor patch adds some statistics-gathering counters that allow the
behaviour of the EEH subsystem o be monitored. While far from perfect,
it does provide a rudimentary device that makes understanding of the
current state of the system a bit easier.
Signed-off-by: Linas Vepstas <linas@linas.org>
Signed-off-by: Paul Mackerras <paulus@samba.org>
-rw-r--r-- | arch/ppc64/kernel/eeh.c | 43 |
1 files changed, 32 insertions, 11 deletions
diff --git a/arch/ppc64/kernel/eeh.c b/arch/ppc64/kernel/eeh.c index 99f11b66b5a0..926d3110dfd7 100644 --- a/arch/ppc64/kernel/eeh.c +++ b/arch/ppc64/kernel/eeh.c | |||
@@ -102,6 +102,10 @@ static DEFINE_SPINLOCK(slot_errbuf_lock); | |||
102 | static int eeh_error_buf_size; | 102 | static int eeh_error_buf_size; |
103 | 103 | ||
104 | /* System monitoring statistics */ | 104 | /* System monitoring statistics */ |
105 | static DEFINE_PER_CPU(unsigned long, no_device); | ||
106 | static DEFINE_PER_CPU(unsigned long, no_dn); | ||
107 | static DEFINE_PER_CPU(unsigned long, no_cfg_addr); | ||
108 | static DEFINE_PER_CPU(unsigned long, ignored_check); | ||
105 | static DEFINE_PER_CPU(unsigned long, total_mmio_ffs); | 109 | static DEFINE_PER_CPU(unsigned long, total_mmio_ffs); |
106 | static DEFINE_PER_CPU(unsigned long, false_positives); | 110 | static DEFINE_PER_CPU(unsigned long, false_positives); |
107 | static DEFINE_PER_CPU(unsigned long, ignored_failures); | 111 | static DEFINE_PER_CPU(unsigned long, ignored_failures); |
@@ -493,8 +497,6 @@ static void eeh_event_handler(void *dummy) | |||
493 | notifier_call_chain (&eeh_notifier_chain, | 497 | notifier_call_chain (&eeh_notifier_chain, |
494 | EEH_NOTIFY_FREEZE, event); | 498 | EEH_NOTIFY_FREEZE, event); |
495 | 499 | ||
496 | __get_cpu_var(slot_resets)++; | ||
497 | |||
498 | pci_dev_put(event->dev); | 500 | pci_dev_put(event->dev); |
499 | kfree(event); | 501 | kfree(event); |
500 | } | 502 | } |
@@ -546,17 +548,24 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) | |||
546 | if (!eeh_subsystem_enabled) | 548 | if (!eeh_subsystem_enabled) |
547 | return 0; | 549 | return 0; |
548 | 550 | ||
549 | if (!dn) | 551 | if (!dn) { |
552 | __get_cpu_var(no_dn)++; | ||
550 | return 0; | 553 | return 0; |
554 | } | ||
551 | pdn = PCI_DN(dn); | 555 | pdn = PCI_DN(dn); |
552 | 556 | ||
553 | /* Access to IO BARs might get this far and still not want checking. */ | 557 | /* Access to IO BARs might get this far and still not want checking. */ |
554 | if (!pdn->eeh_capable || !(pdn->eeh_mode & EEH_MODE_SUPPORTED) || | 558 | if (!pdn->eeh_capable || !(pdn->eeh_mode & EEH_MODE_SUPPORTED) || |
555 | pdn->eeh_mode & EEH_MODE_NOCHECK) { | 559 | pdn->eeh_mode & EEH_MODE_NOCHECK) { |
560 | __get_cpu_var(ignored_check)++; | ||
561 | #ifdef DEBUG | ||
562 | printk ("EEH:ignored check for %s %s\n", pci_name (dev), dn->full_name); | ||
563 | #endif | ||
556 | return 0; | 564 | return 0; |
557 | } | 565 | } |
558 | 566 | ||
559 | if (!pdn->eeh_config_addr) { | 567 | if (!pdn->eeh_config_addr) { |
568 | __get_cpu_var(no_cfg_addr)++; | ||
560 | return 0; | 569 | return 0; |
561 | } | 570 | } |
562 | 571 | ||
@@ -590,6 +599,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev) | |||
590 | 599 | ||
591 | /* prevent repeated reports of this failure */ | 600 | /* prevent repeated reports of this failure */ |
592 | pdn->eeh_mode |= EEH_MODE_ISOLATED; | 601 | pdn->eeh_mode |= EEH_MODE_ISOLATED; |
602 | __get_cpu_var(slot_resets)++; | ||
593 | 603 | ||
594 | reset_state = rets[0]; | 604 | reset_state = rets[0]; |
595 | 605 | ||
@@ -657,8 +667,10 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon | |||
657 | /* Finding the phys addr + pci device; this is pretty quick. */ | 667 | /* Finding the phys addr + pci device; this is pretty quick. */ |
658 | addr = eeh_token_to_phys((unsigned long __force) token); | 668 | addr = eeh_token_to_phys((unsigned long __force) token); |
659 | dev = pci_get_device_by_addr(addr); | 669 | dev = pci_get_device_by_addr(addr); |
660 | if (!dev) | 670 | if (!dev) { |
671 | __get_cpu_var(no_device)++; | ||
661 | return val; | 672 | return val; |
673 | } | ||
662 | 674 | ||
663 | dn = pci_device_to_OF_node(dev); | 675 | dn = pci_device_to_OF_node(dev); |
664 | eeh_dn_check_failure (dn, dev); | 676 | eeh_dn_check_failure (dn, dev); |
@@ -903,12 +915,17 @@ static int proc_eeh_show(struct seq_file *m, void *v) | |||
903 | unsigned int cpu; | 915 | unsigned int cpu; |
904 | unsigned long ffs = 0, positives = 0, failures = 0; | 916 | unsigned long ffs = 0, positives = 0, failures = 0; |
905 | unsigned long resets = 0; | 917 | unsigned long resets = 0; |
918 | unsigned long no_dev = 0, no_dn = 0, no_cfg = 0, no_check = 0; | ||
906 | 919 | ||
907 | for_each_cpu(cpu) { | 920 | for_each_cpu(cpu) { |
908 | ffs += per_cpu(total_mmio_ffs, cpu); | 921 | ffs += per_cpu(total_mmio_ffs, cpu); |
909 | positives += per_cpu(false_positives, cpu); | 922 | positives += per_cpu(false_positives, cpu); |
910 | failures += per_cpu(ignored_failures, cpu); | 923 | failures += per_cpu(ignored_failures, cpu); |
911 | resets += per_cpu(slot_resets, cpu); | 924 | resets += per_cpu(slot_resets, cpu); |
925 | no_dev += per_cpu(no_device, cpu); | ||
926 | no_dn += per_cpu(no_dn, cpu); | ||
927 | no_cfg += per_cpu(no_cfg_addr, cpu); | ||
928 | no_check += per_cpu(ignored_check, cpu); | ||
912 | } | 929 | } |
913 | 930 | ||
914 | if (0 == eeh_subsystem_enabled) { | 931 | if (0 == eeh_subsystem_enabled) { |
@@ -916,13 +933,17 @@ static int proc_eeh_show(struct seq_file *m, void *v) | |||
916 | seq_printf(m, "eeh_total_mmio_ffs=%ld\n", ffs); | 933 | seq_printf(m, "eeh_total_mmio_ffs=%ld\n", ffs); |
917 | } else { | 934 | } else { |
918 | seq_printf(m, "EEH Subsystem is enabled\n"); | 935 | seq_printf(m, "EEH Subsystem is enabled\n"); |
919 | seq_printf(m, "eeh_total_mmio_ffs=%ld\n" | 936 | seq_printf(m, |
920 | "eeh_false_positives=%ld\n" | 937 | "no device=%ld\n" |
921 | "eeh_ignored_failures=%ld\n" | 938 | "no device node=%ld\n" |
922 | "eeh_slot_resets=%ld\n" | 939 | "no config address=%ld\n" |
923 | "eeh_fail_count=%d\n", | 940 | "check not wanted=%ld\n" |
924 | ffs, positives, failures, resets, | 941 | "eeh_total_mmio_ffs=%ld\n" |
925 | eeh_fail_count.counter); | 942 | "eeh_false_positives=%ld\n" |
943 | "eeh_ignored_failures=%ld\n" | ||
944 | "eeh_slot_resets=%ld\n", | ||
945 | no_dev, no_dn, no_cfg, no_check, | ||
946 | ffs, positives, failures, resets); | ||
926 | } | 947 | } |
927 | 948 | ||
928 | return 0; | 949 | return 0; |