diff options
author | Rajat Jain <rajatja@google.com> | 2018-06-30 16:07:17 -0400 |
---|---|---|
committer | Bjorn Helgaas <bhelgaas@google.com> | 2018-07-19 17:17:03 -0400 |
commit | db89ccbe52c7885644ba578c7771e57620f879b1 (patch) | |
tree | b64d2f860d9c3b47eb9abf529c6c5424a074ff1a /drivers/pci/pcie/aer.c | |
parent | 60ed982a4e78ff938824a750dbac8a10e5b472ef (diff) |
PCI/AER: Define aer_stats structure for AER capable devices
Define a structure to hold the AER statistics. There are 2 groups of
statistics: dev_* counters that are to be collected for all AER capable
devices and rootport_* counters that are collected for all (AER capable)
rootports only. Allocate and free this structure when device is added or
released (thus counters survive the lifetime of the device).
Signed-off-by: Rajat Jain <rajatja@google.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Diffstat (limited to 'drivers/pci/pcie/aer.c')
-rw-r--r-- | drivers/pci/pcie/aer.c | 53 |
1 files changed, 51 insertions, 2 deletions
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 0c6fe22eaf75..fe1b9d22a331 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c | |||
@@ -32,6 +32,9 @@ | |||
32 | 32 | ||
33 | #define AER_ERROR_SOURCES_MAX 100 | 33 | #define AER_ERROR_SOURCES_MAX 100 |
34 | 34 | ||
35 | #define AER_MAX_TYPEOF_COR_ERRS 16 /* as per PCI_ERR_COR_STATUS */ | ||
36 | #define AER_MAX_TYPEOF_UNCOR_ERRS 26 /* as per PCI_ERR_UNCOR_STATUS*/ | ||
37 | |||
35 | struct aer_err_source { | 38 | struct aer_err_source { |
36 | unsigned int status; | 39 | unsigned int status; |
37 | unsigned int id; | 40 | unsigned int id; |
@@ -56,6 +59,42 @@ struct aer_rpc { | |||
56 | */ | 59 | */ |
57 | }; | 60 | }; |
58 | 61 | ||
62 | /* AER stats for the device */ | ||
63 | struct aer_stats { | ||
64 | |||
65 | /* | ||
66 | * Fields for all AER capable devices. They indicate the errors | ||
67 | * "as seen by this device". Note that this may mean that if an | ||
68 | * end point is causing problems, the AER counters may increment | ||
69 | * at its link partner (e.g. root port) because the errors will be | ||
70 | * "seen" by the link partner and not the the problematic end point | ||
71 | * itself (which may report all counters as 0 as it never saw any | ||
72 | * problems). | ||
73 | */ | ||
74 | /* Counters for different type of correctable errors */ | ||
75 | u64 dev_cor_errs[AER_MAX_TYPEOF_COR_ERRS]; | ||
76 | /* Counters for different type of fatal uncorrectable errors */ | ||
77 | u64 dev_fatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS]; | ||
78 | /* Counters for different type of nonfatal uncorrectable errors */ | ||
79 | u64 dev_nonfatal_errs[AER_MAX_TYPEOF_UNCOR_ERRS]; | ||
80 | /* Total number of ERR_COR sent by this device */ | ||
81 | u64 dev_total_cor_errs; | ||
82 | /* Total number of ERR_FATAL sent by this device */ | ||
83 | u64 dev_total_fatal_errs; | ||
84 | /* Total number of ERR_NONFATAL sent by this device */ | ||
85 | u64 dev_total_nonfatal_errs; | ||
86 | |||
87 | /* | ||
88 | * Fields for Root ports & root complex event collectors only, these | ||
89 | * indicate the total number of ERR_COR, ERR_FATAL, and ERR_NONFATAL | ||
90 | * messages received by the root port / event collector, INCLUDING the | ||
91 | * ones that are generated internally (by the rootport itself) | ||
92 | */ | ||
93 | u64 rootport_total_cor_errs; | ||
94 | u64 rootport_total_fatal_errs; | ||
95 | u64 rootport_total_nonfatal_errs; | ||
96 | }; | ||
97 | |||
59 | #define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \ | 98 | #define AER_LOG_TLP_MASKS (PCI_ERR_UNC_POISON_TLP| \ |
60 | PCI_ERR_UNC_ECRC| \ | 99 | PCI_ERR_UNC_ECRC| \ |
61 | PCI_ERR_UNC_UNSUP| \ | 100 | PCI_ERR_UNC_UNSUP| \ |
@@ -385,9 +424,19 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) | |||
385 | void pci_aer_init(struct pci_dev *dev) | 424 | void pci_aer_init(struct pci_dev *dev) |
386 | { | 425 | { |
387 | dev->aer_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); | 426 | dev->aer_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); |
427 | |||
428 | if (dev->aer_cap) | ||
429 | dev->aer_stats = kzalloc(sizeof(struct aer_stats), GFP_KERNEL); | ||
430 | |||
388 | pci_cleanup_aer_error_status_regs(dev); | 431 | pci_cleanup_aer_error_status_regs(dev); |
389 | } | 432 | } |
390 | 433 | ||
434 | void pci_aer_exit(struct pci_dev *dev) | ||
435 | { | ||
436 | kfree(dev->aer_stats); | ||
437 | dev->aer_stats = NULL; | ||
438 | } | ||
439 | |||
391 | #define AER_AGENT_RECEIVER 0 | 440 | #define AER_AGENT_RECEIVER 0 |
392 | #define AER_AGENT_REQUESTER 1 | 441 | #define AER_AGENT_REQUESTER 1 |
393 | #define AER_AGENT_COMPLETER 2 | 442 | #define AER_AGENT_COMPLETER 2 |
@@ -438,7 +487,7 @@ static const char *aer_error_layer[] = { | |||
438 | "Transaction Layer" | 487 | "Transaction Layer" |
439 | }; | 488 | }; |
440 | 489 | ||
441 | static const char *aer_correctable_error_string[] = { | 490 | static const char *aer_correctable_error_string[AER_MAX_TYPEOF_COR_ERRS] = { |
442 | "RxErr", /* Bit Position 0 */ | 491 | "RxErr", /* Bit Position 0 */ |
443 | NULL, | 492 | NULL, |
444 | NULL, | 493 | NULL, |
@@ -457,7 +506,7 @@ static const char *aer_correctable_error_string[] = { | |||
457 | "HeaderOF", /* Bit Position 15 */ | 506 | "HeaderOF", /* Bit Position 15 */ |
458 | }; | 507 | }; |
459 | 508 | ||
460 | static const char *aer_uncorrectable_error_string[] = { | 509 | static const char *aer_uncorrectable_error_string[AER_MAX_TYPEOF_UNCOR_ERRS] = { |
461 | "Undefined", /* Bit Position 0 */ | 510 | "Undefined", /* Bit Position 0 */ |
462 | NULL, | 511 | NULL, |
463 | NULL, | 512 | NULL, |