aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/edac/edac_mc.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/edac/edac_mc.c')
-rw-r--r--drivers/edac/edac_mc.c590
1 files changed, 303 insertions, 287 deletions
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index ea06e3a4dc35..3a7cfe88b169 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -12,7 +12,6 @@
12 * 12 *
13 */ 13 */
14 14
15#include <linux/config.h>
16#include <linux/module.h> 15#include <linux/module.h>
17#include <linux/proc_fs.h> 16#include <linux/proc_fs.h>
18#include <linux/kernel.h> 17#include <linux/kernel.h>
@@ -54,16 +53,17 @@ static int log_ce = 1;
54static int panic_on_ue; 53static int panic_on_ue;
55static int poll_msec = 1000; 54static int poll_msec = 1000;
56 55
57static int check_pci_parity = 0; /* default YES check PCI parity */
58static int panic_on_pci_parity; /* default no panic on PCI Parity */
59static atomic_t pci_parity_count = ATOMIC_INIT(0);
60
61/* lock to memory controller's control array */ 56/* lock to memory controller's control array */
62static DECLARE_MUTEX(mem_ctls_mutex); 57static DECLARE_MUTEX(mem_ctls_mutex);
63static struct list_head mc_devices = LIST_HEAD_INIT(mc_devices); 58static struct list_head mc_devices = LIST_HEAD_INIT(mc_devices);
64 59
65static struct task_struct *edac_thread; 60static struct task_struct *edac_thread;
66 61
62#ifdef CONFIG_PCI
63static int check_pci_parity = 0; /* default YES check PCI parity */
64static int panic_on_pci_parity; /* default no panic on PCI Parity */
65static atomic_t pci_parity_count = ATOMIC_INIT(0);
66
67/* Structure of the whitelist and blacklist arrays */ 67/* Structure of the whitelist and blacklist arrays */
68struct edac_pci_device_list { 68struct edac_pci_device_list {
69 unsigned int vendor; /* Vendor ID */ 69 unsigned int vendor; /* Vendor ID */
@@ -80,6 +80,12 @@ static int pci_blacklist_count;
80static struct edac_pci_device_list pci_whitelist[MAX_LISTED_PCI_DEVICES]; 80static struct edac_pci_device_list pci_whitelist[MAX_LISTED_PCI_DEVICES];
81static int pci_whitelist_count ; 81static int pci_whitelist_count ;
82 82
83#ifndef DISABLE_EDAC_SYSFS
84static struct kobject edac_pci_kobj; /* /sys/devices/system/edac/pci */
85static struct completion edac_pci_kobj_complete;
86#endif /* DISABLE_EDAC_SYSFS */
87#endif /* CONFIG_PCI */
88
83/* START sysfs data and methods */ 89/* START sysfs data and methods */
84 90
85#ifndef DISABLE_EDAC_SYSFS 91#ifndef DISABLE_EDAC_SYSFS
@@ -127,18 +133,15 @@ static struct sysdev_class edac_class = {
127 set_kset_name("edac"), 133 set_kset_name("edac"),
128}; 134};
129 135
130/* sysfs objects: 136/* sysfs object:
131 * /sys/devices/system/edac/mc 137 * /sys/devices/system/edac/mc
132 * /sys/devices/system/edac/pci
133 */ 138 */
134static struct kobject edac_memctrl_kobj; 139static struct kobject edac_memctrl_kobj;
135static struct kobject edac_pci_kobj;
136 140
137/* We use these to wait for the reference counts on edac_memctrl_kobj and 141/* We use these to wait for the reference counts on edac_memctrl_kobj and
138 * edac_pci_kobj to reach 0. 142 * edac_pci_kobj to reach 0.
139 */ 143 */
140static struct completion edac_memctrl_kobj_complete; 144static struct completion edac_memctrl_kobj_complete;
141static struct completion edac_pci_kobj_complete;
142 145
143/* 146/*
144 * /sys/devices/system/edac/mc; 147 * /sys/devices/system/edac/mc;
@@ -324,6 +327,8 @@ static void edac_sysfs_memctrl_teardown(void)
324#endif /* DISABLE_EDAC_SYSFS */ 327#endif /* DISABLE_EDAC_SYSFS */
325} 328}
326 329
330#ifdef CONFIG_PCI
331
327#ifndef DISABLE_EDAC_SYSFS 332#ifndef DISABLE_EDAC_SYSFS
328 333
329/* 334/*
@@ -624,6 +629,252 @@ static void edac_sysfs_pci_teardown(void)
624#endif 629#endif
625} 630}
626 631
632
633static u16 get_pci_parity_status(struct pci_dev *dev, int secondary)
634{
635 int where;
636 u16 status;
637
638 where = secondary ? PCI_SEC_STATUS : PCI_STATUS;
639 pci_read_config_word(dev, where, &status);
640
641 /* If we get back 0xFFFF then we must suspect that the card has been
642 * pulled but the Linux PCI layer has not yet finished cleaning up.
643 * We don't want to report on such devices
644 */
645
646 if (status == 0xFFFF) {
647 u32 sanity;
648
649 pci_read_config_dword(dev, 0, &sanity);
650
651 if (sanity == 0xFFFFFFFF)
652 return 0;
653 }
654
655 status &= PCI_STATUS_DETECTED_PARITY | PCI_STATUS_SIG_SYSTEM_ERROR |
656 PCI_STATUS_PARITY;
657
658 if (status)
659 /* reset only the bits we are interested in */
660 pci_write_config_word(dev, where, status);
661
662 return status;
663}
664
665typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev);
666
667/* Clear any PCI parity errors logged by this device. */
668static void edac_pci_dev_parity_clear(struct pci_dev *dev)
669{
670 u8 header_type;
671
672 get_pci_parity_status(dev, 0);
673
674 /* read the device TYPE, looking for bridges */
675 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
676
677 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE)
678 get_pci_parity_status(dev, 1);
679}
680
681/*
682 * PCI Parity polling
683 *
684 */
685static void edac_pci_dev_parity_test(struct pci_dev *dev)
686{
687 u16 status;
688 u8 header_type;
689
690 /* read the STATUS register on this device
691 */
692 status = get_pci_parity_status(dev, 0);
693
694 debugf2("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id );
695
696 /* check the status reg for errors */
697 if (status) {
698 if (status & (PCI_STATUS_SIG_SYSTEM_ERROR))
699 edac_printk(KERN_CRIT, EDAC_PCI,
700 "Signaled System Error on %s\n",
701 pci_name(dev));
702
703 if (status & (PCI_STATUS_PARITY)) {
704 edac_printk(KERN_CRIT, EDAC_PCI,
705 "Master Data Parity Error on %s\n",
706 pci_name(dev));
707
708 atomic_inc(&pci_parity_count);
709 }
710
711 if (status & (PCI_STATUS_DETECTED_PARITY)) {
712 edac_printk(KERN_CRIT, EDAC_PCI,
713 "Detected Parity Error on %s\n",
714 pci_name(dev));
715
716 atomic_inc(&pci_parity_count);
717 }
718 }
719
720 /* read the device TYPE, looking for bridges */
721 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
722
723 debugf2("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id );
724
725 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
726 /* On bridges, need to examine secondary status register */
727 status = get_pci_parity_status(dev, 1);
728
729 debugf2("PCI SEC_STATUS= 0x%04x %s\n",
730 status, dev->dev.bus_id );
731
732 /* check the secondary status reg for errors */
733 if (status) {
734 if (status & (PCI_STATUS_SIG_SYSTEM_ERROR))
735 edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
736 "Signaled System Error on %s\n",
737 pci_name(dev));
738
739 if (status & (PCI_STATUS_PARITY)) {
740 edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
741 "Master Data Parity Error on "
742 "%s\n", pci_name(dev));
743
744 atomic_inc(&pci_parity_count);
745 }
746
747 if (status & (PCI_STATUS_DETECTED_PARITY)) {
748 edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
749 "Detected Parity Error on %s\n",
750 pci_name(dev));
751
752 atomic_inc(&pci_parity_count);
753 }
754 }
755 }
756}
757
758/*
759 * check_dev_on_list: Scan for a PCI device on a white/black list
760 * @list: an EDAC &edac_pci_device_list white/black list pointer
761 * @free_index: index of next free entry on the list
762 * @pci_dev: PCI Device pointer
763 *
764 * see if list contains the device.
765 *
766 * Returns: 0 not found
767 * 1 found on list
768 */
769static int check_dev_on_list(struct edac_pci_device_list *list,
770 int free_index, struct pci_dev *dev)
771{
772 int i;
773 int rc = 0; /* Assume not found */
774 unsigned short vendor=dev->vendor;
775 unsigned short device=dev->device;
776
777 /* Scan the list, looking for a vendor/device match */
778 for (i = 0; i < free_index; i++, list++ ) {
779 if ((list->vendor == vendor ) && (list->device == device )) {
780 rc = 1;
781 break;
782 }
783 }
784
785 return rc;
786}
787
788/*
789 * pci_dev parity list iterator
790 * Scan the PCI device list for one iteration, looking for SERRORs
791 * Master Parity ERRORS or Parity ERRORs on primary or secondary devices
792 */
793static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn)
794{
795 struct pci_dev *dev = NULL;
796
797 /* request for kernel access to the next PCI device, if any,
798 * and while we are looking at it have its reference count
799 * bumped until we are done with it
800 */
801 while((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
802 /* if whitelist exists then it has priority, so only scan
803 * those devices on the whitelist
804 */
805 if (pci_whitelist_count > 0 ) {
806 if (check_dev_on_list(pci_whitelist,
807 pci_whitelist_count, dev))
808 fn(dev);
809 } else {
810 /*
811 * if no whitelist, then check if this devices is
812 * blacklisted
813 */
814 if (!check_dev_on_list(pci_blacklist,
815 pci_blacklist_count, dev))
816 fn(dev);
817 }
818 }
819}
820
821static void do_pci_parity_check(void)
822{
823 unsigned long flags;
824 int before_count;
825
826 debugf3("%s()\n", __func__);
827
828 if (!check_pci_parity)
829 return;
830
831 before_count = atomic_read(&pci_parity_count);
832
833 /* scan all PCI devices looking for a Parity Error on devices and
834 * bridges
835 */
836 local_irq_save(flags);
837 edac_pci_dev_parity_iterator(edac_pci_dev_parity_test);
838 local_irq_restore(flags);
839
840 /* Only if operator has selected panic on PCI Error */
841 if (panic_on_pci_parity) {
842 /* If the count is different 'after' from 'before' */
843 if (before_count != atomic_read(&pci_parity_count))
844 panic("EDAC: PCI Parity Error");
845 }
846}
847
848static inline void clear_pci_parity_errors(void)
849{
850 /* Clear any PCI bus parity errors that devices initially have logged
851 * in their registers.
852 */
853 edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear);
854}
855
856#else /* CONFIG_PCI */
857
858static inline void do_pci_parity_check(void)
859{
860 /* no-op */
861}
862
863static inline void clear_pci_parity_errors(void)
864{
865 /* no-op */
866}
867
868static void edac_sysfs_pci_teardown(void)
869{
870}
871
872static int edac_sysfs_pci_setup(void)
873{
874 return 0;
875}
876#endif /* CONFIG_PCI */
877
627#ifndef DISABLE_EDAC_SYSFS 878#ifndef DISABLE_EDAC_SYSFS
628 879
629/* EDAC sysfs CSROW data structures and methods */ 880/* EDAC sysfs CSROW data structures and methods */
@@ -1132,7 +1383,7 @@ static int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
1132 return err; 1383 return err;
1133 1384
1134 /* create a symlink for the device */ 1385 /* create a symlink for the device */
1135 err = sysfs_create_link(edac_mci_kobj, &mci->pdev->dev.kobj, 1386 err = sysfs_create_link(edac_mci_kobj, &mci->dev->kobj,
1136 EDAC_DEVICE_SYMLINK); 1387 EDAC_DEVICE_SYMLINK);
1137 1388
1138 if (err) 1389 if (err)
@@ -1238,7 +1489,7 @@ void edac_mc_dump_mci(struct mem_ctl_info *mci)
1238 debugf4("\tmci->edac_check = %p\n", mci->edac_check); 1489 debugf4("\tmci->edac_check = %p\n", mci->edac_check);
1239 debugf3("\tmci->nr_csrows = %d, csrows = %p\n", 1490 debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
1240 mci->nr_csrows, mci->csrows); 1491 mci->nr_csrows, mci->csrows);
1241 debugf3("\tpdev = %p\n", mci->pdev); 1492 debugf3("\tdev = %p\n", mci->dev);
1242 debugf3("\tmod_name:ctl_name = %s:%s\n", 1493 debugf3("\tmod_name:ctl_name = %s:%s\n",
1243 mci->mod_name, mci->ctl_name); 1494 mci->mod_name, mci->ctl_name);
1244 debugf3("\tpvt_info = %p\n\n", mci->pvt_info); 1495 debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
@@ -1363,7 +1614,7 @@ void edac_mc_free(struct mem_ctl_info *mci)
1363} 1614}
1364EXPORT_SYMBOL_GPL(edac_mc_free); 1615EXPORT_SYMBOL_GPL(edac_mc_free);
1365 1616
1366static struct mem_ctl_info *find_mci_by_pdev(struct pci_dev *pdev) 1617static struct mem_ctl_info *find_mci_by_dev(struct device *dev)
1367{ 1618{
1368 struct mem_ctl_info *mci; 1619 struct mem_ctl_info *mci;
1369 struct list_head *item; 1620 struct list_head *item;
@@ -1373,54 +1624,53 @@ static struct mem_ctl_info *find_mci_by_pdev(struct pci_dev *pdev)
1373 list_for_each(item, &mc_devices) { 1624 list_for_each(item, &mc_devices) {
1374 mci = list_entry(item, struct mem_ctl_info, link); 1625 mci = list_entry(item, struct mem_ctl_info, link);
1375 1626
1376 if (mci->pdev == pdev) 1627 if (mci->dev == dev)
1377 return mci; 1628 return mci;
1378 } 1629 }
1379 1630
1380 return NULL; 1631 return NULL;
1381} 1632}
1382 1633
1383static int add_mc_to_global_list(struct mem_ctl_info *mci) 1634/* Return 0 on success, 1 on failure.
1635 * Before calling this function, caller must
1636 * assign a unique value to mci->mc_idx.
1637 */
1638static int add_mc_to_global_list (struct mem_ctl_info *mci)
1384{ 1639{
1385 struct list_head *item, *insert_before; 1640 struct list_head *item, *insert_before;
1386 struct mem_ctl_info *p; 1641 struct mem_ctl_info *p;
1387 int i;
1388 1642
1389 if (list_empty(&mc_devices)) { 1643 insert_before = &mc_devices;
1390 mci->mc_idx = 0;
1391 insert_before = &mc_devices;
1392 } else {
1393 if (find_mci_by_pdev(mci->pdev)) {
1394 edac_printk(KERN_WARNING, EDAC_MC,
1395 "%s (%s) %s %s already assigned %d\n",
1396 mci->pdev->dev.bus_id,
1397 pci_name(mci->pdev), mci->mod_name,
1398 mci->ctl_name, mci->mc_idx);
1399 return 1;
1400 }
1401 1644
1402 insert_before = NULL; 1645 if (unlikely((p = find_mci_by_dev(mci->dev)) != NULL))
1403 i = 0; 1646 goto fail0;
1404 1647
1405 list_for_each(item, &mc_devices) { 1648 list_for_each(item, &mc_devices) {
1406 p = list_entry(item, struct mem_ctl_info, link); 1649 p = list_entry(item, struct mem_ctl_info, link);
1407 1650
1408 if (p->mc_idx != i) { 1651 if (p->mc_idx >= mci->mc_idx) {
1409 insert_before = item; 1652 if (unlikely(p->mc_idx == mci->mc_idx))
1410 break; 1653 goto fail1;
1411 }
1412 1654
1413 i++; 1655 insert_before = item;
1656 break;
1414 } 1657 }
1415
1416 mci->mc_idx = i;
1417
1418 if (insert_before == NULL)
1419 insert_before = &mc_devices;
1420 } 1658 }
1421 1659
1422 list_add_tail_rcu(&mci->link, insert_before); 1660 list_add_tail_rcu(&mci->link, insert_before);
1423 return 0; 1661 return 0;
1662
1663fail0:
1664 edac_printk(KERN_WARNING, EDAC_MC,
1665 "%s (%s) %s %s already assigned %d\n", p->dev->bus_id,
1666 dev_name(p->dev), p->mod_name, p->ctl_name, p->mc_idx);
1667 return 1;
1668
1669fail1:
1670 edac_printk(KERN_WARNING, EDAC_MC,
1671 "bug in low-level driver: attempt to assign\n"
1672 " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
1673 return 1;
1424} 1674}
1425 1675
1426static void complete_mc_list_del(struct rcu_head *head) 1676static void complete_mc_list_del(struct rcu_head *head)
@@ -1444,6 +1694,7 @@ static void del_mc_from_global_list(struct mem_ctl_info *mci)
1444 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and 1694 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
1445 * create sysfs entries associated with mci structure 1695 * create sysfs entries associated with mci structure
1446 * @mci: pointer to the mci structure to be added to the list 1696 * @mci: pointer to the mci structure to be added to the list
1697 * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure.
1447 * 1698 *
1448 * Return: 1699 * Return:
1449 * 0 Success 1700 * 0 Success
@@ -1451,9 +1702,10 @@ static void del_mc_from_global_list(struct mem_ctl_info *mci)
1451 */ 1702 */
1452 1703
1453/* FIXME - should a warning be printed if no error detection? correction? */ 1704/* FIXME - should a warning be printed if no error detection? correction? */
1454int edac_mc_add_mc(struct mem_ctl_info *mci) 1705int edac_mc_add_mc(struct mem_ctl_info *mci, int mc_idx)
1455{ 1706{
1456 debugf0("%s()\n", __func__); 1707 debugf0("%s()\n", __func__);
1708 mci->mc_idx = mc_idx;
1457#ifdef CONFIG_EDAC_DEBUG 1709#ifdef CONFIG_EDAC_DEBUG
1458 if (edac_debug_level >= 3) 1710 if (edac_debug_level >= 3)
1459 edac_mc_dump_mci(mci); 1711 edac_mc_dump_mci(mci);
@@ -1486,8 +1738,8 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
1486 } 1738 }
1487 1739
1488 /* Report action taken */ 1740 /* Report action taken */
1489 edac_mc_printk(mci, KERN_INFO, "Giving out device to %s %s: PCI %s\n", 1741 edac_mc_printk(mci, KERN_INFO, "Giving out device to %s %s: DEV %s\n",
1490 mci->mod_name, mci->ctl_name, pci_name(mci->pdev)); 1742 mci->mod_name, mci->ctl_name, dev_name(mci->dev));
1491 1743
1492 up(&mem_ctls_mutex); 1744 up(&mem_ctls_mutex);
1493 return 0; 1745 return 0;
@@ -1504,18 +1756,18 @@ EXPORT_SYMBOL_GPL(edac_mc_add_mc);
1504/** 1756/**
1505 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and 1757 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
1506 * remove mci structure from global list 1758 * remove mci structure from global list
1507 * @pdev: Pointer to 'struct pci_dev' representing mci structure to remove. 1759 * @pdev: Pointer to 'struct device' representing mci structure to remove.
1508 * 1760 *
1509 * Return pointer to removed mci structure, or NULL if device not found. 1761 * Return pointer to removed mci structure, or NULL if device not found.
1510 */ 1762 */
1511struct mem_ctl_info * edac_mc_del_mc(struct pci_dev *pdev) 1763struct mem_ctl_info * edac_mc_del_mc(struct device *dev)
1512{ 1764{
1513 struct mem_ctl_info *mci; 1765 struct mem_ctl_info *mci;
1514 1766
1515 debugf0("MC: %s()\n", __func__); 1767 debugf0("MC: %s()\n", __func__);
1516 down(&mem_ctls_mutex); 1768 down(&mem_ctls_mutex);
1517 1769
1518 if ((mci = find_mci_by_pdev(pdev)) == NULL) { 1770 if ((mci = find_mci_by_dev(dev)) == NULL) {
1519 up(&mem_ctls_mutex); 1771 up(&mem_ctls_mutex);
1520 return NULL; 1772 return NULL;
1521 } 1773 }
@@ -1524,8 +1776,8 @@ struct mem_ctl_info * edac_mc_del_mc(struct pci_dev *pdev)
1524 del_mc_from_global_list(mci); 1776 del_mc_from_global_list(mci);
1525 up(&mem_ctls_mutex); 1777 up(&mem_ctls_mutex);
1526 edac_printk(KERN_INFO, EDAC_MC, 1778 edac_printk(KERN_INFO, EDAC_MC,
1527 "Removed device %d for %s %s: PCI %s\n", mci->mc_idx, 1779 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
1528 mci->mod_name, mci->ctl_name, pci_name(mci->pdev)); 1780 mci->mod_name, mci->ctl_name, dev_name(mci->dev));
1529 return mci; 1781 return mci;
1530} 1782}
1531EXPORT_SYMBOL_GPL(edac_mc_del_mc); 1783EXPORT_SYMBOL_GPL(edac_mc_del_mc);
@@ -1739,244 +1991,6 @@ void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg)
1739} 1991}
1740EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info); 1992EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info);
1741 1993
1742#ifdef CONFIG_PCI
1743
1744static u16 get_pci_parity_status(struct pci_dev *dev, int secondary)
1745{
1746 int where;
1747 u16 status;
1748
1749 where = secondary ? PCI_SEC_STATUS : PCI_STATUS;
1750 pci_read_config_word(dev, where, &status);
1751
1752 /* If we get back 0xFFFF then we must suspect that the card has been
1753 * pulled but the Linux PCI layer has not yet finished cleaning up.
1754 * We don't want to report on such devices
1755 */
1756
1757 if (status == 0xFFFF) {
1758 u32 sanity;
1759
1760 pci_read_config_dword(dev, 0, &sanity);
1761
1762 if (sanity == 0xFFFFFFFF)
1763 return 0;
1764 }
1765
1766 status &= PCI_STATUS_DETECTED_PARITY | PCI_STATUS_SIG_SYSTEM_ERROR |
1767 PCI_STATUS_PARITY;
1768
1769 if (status)
1770 /* reset only the bits we are interested in */
1771 pci_write_config_word(dev, where, status);
1772
1773 return status;
1774}
1775
1776typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev);
1777
1778/* Clear any PCI parity errors logged by this device. */
1779static void edac_pci_dev_parity_clear(struct pci_dev *dev)
1780{
1781 u8 header_type;
1782
1783 get_pci_parity_status(dev, 0);
1784
1785 /* read the device TYPE, looking for bridges */
1786 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
1787
1788 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE)
1789 get_pci_parity_status(dev, 1);
1790}
1791
1792/*
1793 * PCI Parity polling
1794 *
1795 */
1796static void edac_pci_dev_parity_test(struct pci_dev *dev)
1797{
1798 u16 status;
1799 u8 header_type;
1800
1801 /* read the STATUS register on this device
1802 */
1803 status = get_pci_parity_status(dev, 0);
1804
1805 debugf2("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id );
1806
1807 /* check the status reg for errors */
1808 if (status) {
1809 if (status & (PCI_STATUS_SIG_SYSTEM_ERROR))
1810 edac_printk(KERN_CRIT, EDAC_PCI,
1811 "Signaled System Error on %s\n",
1812 pci_name(dev));
1813
1814 if (status & (PCI_STATUS_PARITY)) {
1815 edac_printk(KERN_CRIT, EDAC_PCI,
1816 "Master Data Parity Error on %s\n",
1817 pci_name(dev));
1818
1819 atomic_inc(&pci_parity_count);
1820 }
1821
1822 if (status & (PCI_STATUS_DETECTED_PARITY)) {
1823 edac_printk(KERN_CRIT, EDAC_PCI,
1824 "Detected Parity Error on %s\n",
1825 pci_name(dev));
1826
1827 atomic_inc(&pci_parity_count);
1828 }
1829 }
1830
1831 /* read the device TYPE, looking for bridges */
1832 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
1833
1834 debugf2("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id );
1835
1836 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
1837 /* On bridges, need to examine secondary status register */
1838 status = get_pci_parity_status(dev, 1);
1839
1840 debugf2("PCI SEC_STATUS= 0x%04x %s\n",
1841 status, dev->dev.bus_id );
1842
1843 /* check the secondary status reg for errors */
1844 if (status) {
1845 if (status & (PCI_STATUS_SIG_SYSTEM_ERROR))
1846 edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
1847 "Signaled System Error on %s\n",
1848 pci_name(dev));
1849
1850 if (status & (PCI_STATUS_PARITY)) {
1851 edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
1852 "Master Data Parity Error on "
1853 "%s\n", pci_name(dev));
1854
1855 atomic_inc(&pci_parity_count);
1856 }
1857
1858 if (status & (PCI_STATUS_DETECTED_PARITY)) {
1859 edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
1860 "Detected Parity Error on %s\n",
1861 pci_name(dev));
1862
1863 atomic_inc(&pci_parity_count);
1864 }
1865 }
1866 }
1867}
1868
1869/*
1870 * check_dev_on_list: Scan for a PCI device on a white/black list
1871 * @list: an EDAC &edac_pci_device_list white/black list pointer
1872 * @free_index: index of next free entry on the list
1873 * @pci_dev: PCI Device pointer
1874 *
1875 * see if list contains the device.
1876 *
1877 * Returns: 0 not found
1878 * 1 found on list
1879 */
1880static int check_dev_on_list(struct edac_pci_device_list *list,
1881 int free_index, struct pci_dev *dev)
1882{
1883 int i;
1884 int rc = 0; /* Assume not found */
1885 unsigned short vendor=dev->vendor;
1886 unsigned short device=dev->device;
1887
1888 /* Scan the list, looking for a vendor/device match */
1889 for (i = 0; i < free_index; i++, list++ ) {
1890 if ((list->vendor == vendor ) && (list->device == device )) {
1891 rc = 1;
1892 break;
1893 }
1894 }
1895
1896 return rc;
1897}
1898
1899/*
1900 * pci_dev parity list iterator
1901 * Scan the PCI device list for one iteration, looking for SERRORs
1902 * Master Parity ERRORS or Parity ERRORs on primary or secondary devices
1903 */
1904static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn)
1905{
1906 struct pci_dev *dev = NULL;
1907
1908 /* request for kernel access to the next PCI device, if any,
1909 * and while we are looking at it have its reference count
1910 * bumped until we are done with it
1911 */
1912 while((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
1913 /* if whitelist exists then it has priority, so only scan
1914 * those devices on the whitelist
1915 */
1916 if (pci_whitelist_count > 0 ) {
1917 if (check_dev_on_list(pci_whitelist,
1918 pci_whitelist_count, dev))
1919 fn(dev);
1920 } else {
1921 /*
1922 * if no whitelist, then check if this devices is
1923 * blacklisted
1924 */
1925 if (!check_dev_on_list(pci_blacklist,
1926 pci_blacklist_count, dev))
1927 fn(dev);
1928 }
1929 }
1930}
1931
1932static void do_pci_parity_check(void)
1933{
1934 unsigned long flags;
1935 int before_count;
1936
1937 debugf3("%s()\n", __func__);
1938
1939 if (!check_pci_parity)
1940 return;
1941
1942 before_count = atomic_read(&pci_parity_count);
1943
1944 /* scan all PCI devices looking for a Parity Error on devices and
1945 * bridges
1946 */
1947 local_irq_save(flags);
1948 edac_pci_dev_parity_iterator(edac_pci_dev_parity_test);
1949 local_irq_restore(flags);
1950
1951 /* Only if operator has selected panic on PCI Error */
1952 if (panic_on_pci_parity) {
1953 /* If the count is different 'after' from 'before' */
1954 if (before_count != atomic_read(&pci_parity_count))
1955 panic("EDAC: PCI Parity Error");
1956 }
1957}
1958
1959static inline void clear_pci_parity_errors(void)
1960{
1961 /* Clear any PCI bus parity errors that devices initially have logged
1962 * in their registers.
1963 */
1964 edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear);
1965}
1966
1967#else /* CONFIG_PCI */
1968
1969static inline void do_pci_parity_check(void)
1970{
1971 /* no-op */
1972}
1973
1974static inline void clear_pci_parity_errors(void)
1975{
1976 /* no-op */
1977}
1978
1979#endif /* CONFIG_PCI */
1980 1994
1981/* 1995/*
1982 * Iterate over all MC instances and check for ECC, et al, errors 1996 * Iterate over all MC instances and check for ECC, et al, errors
@@ -2096,10 +2110,12 @@ MODULE_DESCRIPTION("Core library routines for MC reporting");
2096 2110
2097module_param(panic_on_ue, int, 0644); 2111module_param(panic_on_ue, int, 0644);
2098MODULE_PARM_DESC(panic_on_ue, "Panic on uncorrected error: 0=off 1=on"); 2112MODULE_PARM_DESC(panic_on_ue, "Panic on uncorrected error: 0=off 1=on");
2113#ifdef CONFIG_PCI
2099module_param(check_pci_parity, int, 0644); 2114module_param(check_pci_parity, int, 0644);
2100MODULE_PARM_DESC(check_pci_parity, "Check for PCI bus parity errors: 0=off 1=on"); 2115MODULE_PARM_DESC(check_pci_parity, "Check for PCI bus parity errors: 0=off 1=on");
2101module_param(panic_on_pci_parity, int, 0644); 2116module_param(panic_on_pci_parity, int, 0644);
2102MODULE_PARM_DESC(panic_on_pci_parity, "Panic on PCI Bus Parity error: 0=off 1=on"); 2117MODULE_PARM_DESC(panic_on_pci_parity, "Panic on PCI Bus Parity error: 0=off 1=on");
2118#endif
2103module_param(log_ue, int, 0644); 2119module_param(log_ue, int, 0644);
2104MODULE_PARM_DESC(log_ue, "Log uncorrectable error to console: 0=off 1=on"); 2120MODULE_PARM_DESC(log_ue, "Log uncorrectable error to console: 0=off 1=on");
2105module_param(log_ce, int, 0644); 2121module_param(log_ce, int, 0644);