aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinas Vepstas <linas@linas.org>2005-11-03 19:48:52 -0500
committerPaul Mackerras <paulus@samba.org>2005-11-09 19:33:11 -0500
commit177bc9367efe89a642bc9a07351726fd2c31f5ff (patch)
tree04bdde1e9171766f9f46a0adbf2cc5f06691496f
parent56b0fca3a008ddec8c00cfdddbef17bd9ce188e3 (diff)
[PATCH] ppc64: PCI error rate statistics
04-eeh-statistics.patch This minor patch adds some statistics-gathering counters that allow the behaviour of the EEH subsystem o be monitored. While far from perfect, it does provide a rudimentary device that makes understanding of the current state of the system a bit easier. Signed-off-by: Linas Vepstas <linas@linas.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
-rw-r--r--arch/ppc64/kernel/eeh.c43
1 files changed, 32 insertions, 11 deletions
diff --git a/arch/ppc64/kernel/eeh.c b/arch/ppc64/kernel/eeh.c
index 99f11b66b5a0..926d3110dfd7 100644
--- a/arch/ppc64/kernel/eeh.c
+++ b/arch/ppc64/kernel/eeh.c
@@ -102,6 +102,10 @@ static DEFINE_SPINLOCK(slot_errbuf_lock);
102static int eeh_error_buf_size; 102static int eeh_error_buf_size;
103 103
104/* System monitoring statistics */ 104/* System monitoring statistics */
105static DEFINE_PER_CPU(unsigned long, no_device);
106static DEFINE_PER_CPU(unsigned long, no_dn);
107static DEFINE_PER_CPU(unsigned long, no_cfg_addr);
108static DEFINE_PER_CPU(unsigned long, ignored_check);
105static DEFINE_PER_CPU(unsigned long, total_mmio_ffs); 109static DEFINE_PER_CPU(unsigned long, total_mmio_ffs);
106static DEFINE_PER_CPU(unsigned long, false_positives); 110static DEFINE_PER_CPU(unsigned long, false_positives);
107static DEFINE_PER_CPU(unsigned long, ignored_failures); 111static DEFINE_PER_CPU(unsigned long, ignored_failures);
@@ -493,8 +497,6 @@ static void eeh_event_handler(void *dummy)
493 notifier_call_chain (&eeh_notifier_chain, 497 notifier_call_chain (&eeh_notifier_chain,
494 EEH_NOTIFY_FREEZE, event); 498 EEH_NOTIFY_FREEZE, event);
495 499
496 __get_cpu_var(slot_resets)++;
497
498 pci_dev_put(event->dev); 500 pci_dev_put(event->dev);
499 kfree(event); 501 kfree(event);
500 } 502 }
@@ -546,17 +548,24 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
546 if (!eeh_subsystem_enabled) 548 if (!eeh_subsystem_enabled)
547 return 0; 549 return 0;
548 550
549 if (!dn) 551 if (!dn) {
552 __get_cpu_var(no_dn)++;
550 return 0; 553 return 0;
554 }
551 pdn = PCI_DN(dn); 555 pdn = PCI_DN(dn);
552 556
553 /* Access to IO BARs might get this far and still not want checking. */ 557 /* Access to IO BARs might get this far and still not want checking. */
554 if (!pdn->eeh_capable || !(pdn->eeh_mode & EEH_MODE_SUPPORTED) || 558 if (!pdn->eeh_capable || !(pdn->eeh_mode & EEH_MODE_SUPPORTED) ||
555 pdn->eeh_mode & EEH_MODE_NOCHECK) { 559 pdn->eeh_mode & EEH_MODE_NOCHECK) {
560 __get_cpu_var(ignored_check)++;
561#ifdef DEBUG
562 printk ("EEH:ignored check for %s %s\n", pci_name (dev), dn->full_name);
563#endif
556 return 0; 564 return 0;
557 } 565 }
558 566
559 if (!pdn->eeh_config_addr) { 567 if (!pdn->eeh_config_addr) {
568 __get_cpu_var(no_cfg_addr)++;
560 return 0; 569 return 0;
561 } 570 }
562 571
@@ -590,6 +599,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
590 599
591 /* prevent repeated reports of this failure */ 600 /* prevent repeated reports of this failure */
592 pdn->eeh_mode |= EEH_MODE_ISOLATED; 601 pdn->eeh_mode |= EEH_MODE_ISOLATED;
602 __get_cpu_var(slot_resets)++;
593 603
594 reset_state = rets[0]; 604 reset_state = rets[0];
595 605
@@ -657,8 +667,10 @@ unsigned long eeh_check_failure(const volatile void __iomem *token, unsigned lon
657 /* Finding the phys addr + pci device; this is pretty quick. */ 667 /* Finding the phys addr + pci device; this is pretty quick. */
658 addr = eeh_token_to_phys((unsigned long __force) token); 668 addr = eeh_token_to_phys((unsigned long __force) token);
659 dev = pci_get_device_by_addr(addr); 669 dev = pci_get_device_by_addr(addr);
660 if (!dev) 670 if (!dev) {
671 __get_cpu_var(no_device)++;
661 return val; 672 return val;
673 }
662 674
663 dn = pci_device_to_OF_node(dev); 675 dn = pci_device_to_OF_node(dev);
664 eeh_dn_check_failure (dn, dev); 676 eeh_dn_check_failure (dn, dev);
@@ -903,12 +915,17 @@ static int proc_eeh_show(struct seq_file *m, void *v)
903 unsigned int cpu; 915 unsigned int cpu;
904 unsigned long ffs = 0, positives = 0, failures = 0; 916 unsigned long ffs = 0, positives = 0, failures = 0;
905 unsigned long resets = 0; 917 unsigned long resets = 0;
918 unsigned long no_dev = 0, no_dn = 0, no_cfg = 0, no_check = 0;
906 919
907 for_each_cpu(cpu) { 920 for_each_cpu(cpu) {
908 ffs += per_cpu(total_mmio_ffs, cpu); 921 ffs += per_cpu(total_mmio_ffs, cpu);
909 positives += per_cpu(false_positives, cpu); 922 positives += per_cpu(false_positives, cpu);
910 failures += per_cpu(ignored_failures, cpu); 923 failures += per_cpu(ignored_failures, cpu);
911 resets += per_cpu(slot_resets, cpu); 924 resets += per_cpu(slot_resets, cpu);
925 no_dev += per_cpu(no_device, cpu);
926 no_dn += per_cpu(no_dn, cpu);
927 no_cfg += per_cpu(no_cfg_addr, cpu);
928 no_check += per_cpu(ignored_check, cpu);
912 } 929 }
913 930
914 if (0 == eeh_subsystem_enabled) { 931 if (0 == eeh_subsystem_enabled) {
@@ -916,13 +933,17 @@ static int proc_eeh_show(struct seq_file *m, void *v)
916 seq_printf(m, "eeh_total_mmio_ffs=%ld\n", ffs); 933 seq_printf(m, "eeh_total_mmio_ffs=%ld\n", ffs);
917 } else { 934 } else {
918 seq_printf(m, "EEH Subsystem is enabled\n"); 935 seq_printf(m, "EEH Subsystem is enabled\n");
919 seq_printf(m, "eeh_total_mmio_ffs=%ld\n" 936 seq_printf(m,
920 "eeh_false_positives=%ld\n" 937 "no device=%ld\n"
921 "eeh_ignored_failures=%ld\n" 938 "no device node=%ld\n"
922 "eeh_slot_resets=%ld\n" 939 "no config address=%ld\n"
923 "eeh_fail_count=%d\n", 940 "check not wanted=%ld\n"
924 ffs, positives, failures, resets, 941 "eeh_total_mmio_ffs=%ld\n"
925 eeh_fail_count.counter); 942 "eeh_false_positives=%ld\n"
943 "eeh_ignored_failures=%ld\n"
944 "eeh_slot_resets=%ld\n",
945 no_dev, no_dn, no_cfg, no_check,
946 ffs, positives, failures, resets);
926 } 947 }
927 948
928 return 0; 949 return 0;