aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/edac/edac_mc.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/edac/edac_mc.c')
-rw-r--r--drivers/edac/edac_mc.c589
1 files changed, 303 insertions, 286 deletions
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 0499782db7c7..3a7cfe88b169 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -53,16 +53,17 @@ static int log_ce = 1;
53static int panic_on_ue; 53static int panic_on_ue;
54static int poll_msec = 1000; 54static int poll_msec = 1000;
55 55
56static int check_pci_parity = 0; /* default YES check PCI parity */
57static int panic_on_pci_parity; /* default no panic on PCI Parity */
58static atomic_t pci_parity_count = ATOMIC_INIT(0);
59
60/* lock to memory controller's control array */ 56/* lock to memory controller's control array */
61static DECLARE_MUTEX(mem_ctls_mutex); 57static DECLARE_MUTEX(mem_ctls_mutex);
62static struct list_head mc_devices = LIST_HEAD_INIT(mc_devices); 58static struct list_head mc_devices = LIST_HEAD_INIT(mc_devices);
63 59
64static struct task_struct *edac_thread; 60static struct task_struct *edac_thread;
65 61
62#ifdef CONFIG_PCI
63static int check_pci_parity = 0; /* default YES check PCI parity */
64static int panic_on_pci_parity; /* default no panic on PCI Parity */
65static atomic_t pci_parity_count = ATOMIC_INIT(0);
66
66/* Structure of the whitelist and blacklist arrays */ 67/* Structure of the whitelist and blacklist arrays */
67struct edac_pci_device_list { 68struct edac_pci_device_list {
68 unsigned int vendor; /* Vendor ID */ 69 unsigned int vendor; /* Vendor ID */
@@ -79,6 +80,12 @@ static int pci_blacklist_count;
79static struct edac_pci_device_list pci_whitelist[MAX_LISTED_PCI_DEVICES]; 80static struct edac_pci_device_list pci_whitelist[MAX_LISTED_PCI_DEVICES];
80static int pci_whitelist_count ; 81static int pci_whitelist_count ;
81 82
83#ifndef DISABLE_EDAC_SYSFS
84static struct kobject edac_pci_kobj; /* /sys/devices/system/edac/pci */
85static struct completion edac_pci_kobj_complete;
86#endif /* DISABLE_EDAC_SYSFS */
87#endif /* CONFIG_PCI */
88
82/* START sysfs data and methods */ 89/* START sysfs data and methods */
83 90
84#ifndef DISABLE_EDAC_SYSFS 91#ifndef DISABLE_EDAC_SYSFS
@@ -126,18 +133,15 @@ static struct sysdev_class edac_class = {
126 set_kset_name("edac"), 133 set_kset_name("edac"),
127}; 134};
128 135
129/* sysfs objects: 136/* sysfs object:
130 * /sys/devices/system/edac/mc 137 * /sys/devices/system/edac/mc
131 * /sys/devices/system/edac/pci
132 */ 138 */
133static struct kobject edac_memctrl_kobj; 139static struct kobject edac_memctrl_kobj;
134static struct kobject edac_pci_kobj;
135 140
136/* We use these to wait for the reference counts on edac_memctrl_kobj and 141/* We use these to wait for the reference counts on edac_memctrl_kobj and
137 * edac_pci_kobj to reach 0. 142 * edac_pci_kobj to reach 0.
138 */ 143 */
139static struct completion edac_memctrl_kobj_complete; 144static struct completion edac_memctrl_kobj_complete;
140static struct completion edac_pci_kobj_complete;
141 145
142/* 146/*
143 * /sys/devices/system/edac/mc; 147 * /sys/devices/system/edac/mc;
@@ -323,6 +327,8 @@ static void edac_sysfs_memctrl_teardown(void)
323#endif /* DISABLE_EDAC_SYSFS */ 327#endif /* DISABLE_EDAC_SYSFS */
324} 328}
325 329
330#ifdef CONFIG_PCI
331
326#ifndef DISABLE_EDAC_SYSFS 332#ifndef DISABLE_EDAC_SYSFS
327 333
328/* 334/*
@@ -623,6 +629,252 @@ static void edac_sysfs_pci_teardown(void)
623#endif 629#endif
624} 630}
625 631
632
633static u16 get_pci_parity_status(struct pci_dev *dev, int secondary)
634{
635 int where;
636 u16 status;
637
638 where = secondary ? PCI_SEC_STATUS : PCI_STATUS;
639 pci_read_config_word(dev, where, &status);
640
641 /* If we get back 0xFFFF then we must suspect that the card has been
642 * pulled but the Linux PCI layer has not yet finished cleaning up.
643 * We don't want to report on such devices
644 */
645
646 if (status == 0xFFFF) {
647 u32 sanity;
648
649 pci_read_config_dword(dev, 0, &sanity);
650
651 if (sanity == 0xFFFFFFFF)
652 return 0;
653 }
654
655 status &= PCI_STATUS_DETECTED_PARITY | PCI_STATUS_SIG_SYSTEM_ERROR |
656 PCI_STATUS_PARITY;
657
658 if (status)
659 /* reset only the bits we are interested in */
660 pci_write_config_word(dev, where, status);
661
662 return status;
663}
664
665typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev);
666
667/* Clear any PCI parity errors logged by this device. */
668static void edac_pci_dev_parity_clear(struct pci_dev *dev)
669{
670 u8 header_type;
671
672 get_pci_parity_status(dev, 0);
673
674 /* read the device TYPE, looking for bridges */
675 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
676
677 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE)
678 get_pci_parity_status(dev, 1);
679}
680
681/*
682 * PCI Parity polling
683 *
684 */
685static void edac_pci_dev_parity_test(struct pci_dev *dev)
686{
687 u16 status;
688 u8 header_type;
689
690 /* read the STATUS register on this device
691 */
692 status = get_pci_parity_status(dev, 0);
693
694 debugf2("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id );
695
696 /* check the status reg for errors */
697 if (status) {
698 if (status & (PCI_STATUS_SIG_SYSTEM_ERROR))
699 edac_printk(KERN_CRIT, EDAC_PCI,
700 "Signaled System Error on %s\n",
701 pci_name(dev));
702
703 if (status & (PCI_STATUS_PARITY)) {
704 edac_printk(KERN_CRIT, EDAC_PCI,
705 "Master Data Parity Error on %s\n",
706 pci_name(dev));
707
708 atomic_inc(&pci_parity_count);
709 }
710
711 if (status & (PCI_STATUS_DETECTED_PARITY)) {
712 edac_printk(KERN_CRIT, EDAC_PCI,
713 "Detected Parity Error on %s\n",
714 pci_name(dev));
715
716 atomic_inc(&pci_parity_count);
717 }
718 }
719
720 /* read the device TYPE, looking for bridges */
721 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
722
723 debugf2("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id );
724
725 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
726 /* On bridges, need to examine secondary status register */
727 status = get_pci_parity_status(dev, 1);
728
729 debugf2("PCI SEC_STATUS= 0x%04x %s\n",
730 status, dev->dev.bus_id );
731
732 /* check the secondary status reg for errors */
733 if (status) {
734 if (status & (PCI_STATUS_SIG_SYSTEM_ERROR))
735 edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
736 "Signaled System Error on %s\n",
737 pci_name(dev));
738
739 if (status & (PCI_STATUS_PARITY)) {
740 edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
741 "Master Data Parity Error on "
742 "%s\n", pci_name(dev));
743
744 atomic_inc(&pci_parity_count);
745 }
746
747 if (status & (PCI_STATUS_DETECTED_PARITY)) {
748 edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
749 "Detected Parity Error on %s\n",
750 pci_name(dev));
751
752 atomic_inc(&pci_parity_count);
753 }
754 }
755 }
756}
757
758/*
759 * check_dev_on_list: Scan for a PCI device on a white/black list
760 * @list: an EDAC &edac_pci_device_list white/black list pointer
761 * @free_index: index of next free entry on the list
762 * @pci_dev: PCI Device pointer
763 *
764 * see if list contains the device.
765 *
766 * Returns: 0 not found
767 * 1 found on list
768 */
769static int check_dev_on_list(struct edac_pci_device_list *list,
770 int free_index, struct pci_dev *dev)
771{
772 int i;
773 int rc = 0; /* Assume not found */
774 unsigned short vendor=dev->vendor;
775 unsigned short device=dev->device;
776
777 /* Scan the list, looking for a vendor/device match */
778 for (i = 0; i < free_index; i++, list++ ) {
779 if ((list->vendor == vendor ) && (list->device == device )) {
780 rc = 1;
781 break;
782 }
783 }
784
785 return rc;
786}
787
788/*
789 * pci_dev parity list iterator
790 * Scan the PCI device list for one iteration, looking for SERRORs
791 * Master Parity ERRORS or Parity ERRORs on primary or secondary devices
792 */
793static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn)
794{
795 struct pci_dev *dev = NULL;
796
797 /* request for kernel access to the next PCI device, if any,
798 * and while we are looking at it have its reference count
799 * bumped until we are done with it
800 */
801 while((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
802 /* if whitelist exists then it has priority, so only scan
803 * those devices on the whitelist
804 */
805 if (pci_whitelist_count > 0 ) {
806 if (check_dev_on_list(pci_whitelist,
807 pci_whitelist_count, dev))
808 fn(dev);
809 } else {
810 /*
811 * if no whitelist, then check if this devices is
812 * blacklisted
813 */
814 if (!check_dev_on_list(pci_blacklist,
815 pci_blacklist_count, dev))
816 fn(dev);
817 }
818 }
819}
820
821static void do_pci_parity_check(void)
822{
823 unsigned long flags;
824 int before_count;
825
826 debugf3("%s()\n", __func__);
827
828 if (!check_pci_parity)
829 return;
830
831 before_count = atomic_read(&pci_parity_count);
832
833 /* scan all PCI devices looking for a Parity Error on devices and
834 * bridges
835 */
836 local_irq_save(flags);
837 edac_pci_dev_parity_iterator(edac_pci_dev_parity_test);
838 local_irq_restore(flags);
839
840 /* Only if operator has selected panic on PCI Error */
841 if (panic_on_pci_parity) {
842 /* If the count is different 'after' from 'before' */
843 if (before_count != atomic_read(&pci_parity_count))
844 panic("EDAC: PCI Parity Error");
845 }
846}
847
848static inline void clear_pci_parity_errors(void)
849{
850 /* Clear any PCI bus parity errors that devices initially have logged
851 * in their registers.
852 */
853 edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear);
854}
855
856#else /* CONFIG_PCI */
857
858static inline void do_pci_parity_check(void)
859{
860 /* no-op */
861}
862
863static inline void clear_pci_parity_errors(void)
864{
865 /* no-op */
866}
867
868static void edac_sysfs_pci_teardown(void)
869{
870}
871
872static int edac_sysfs_pci_setup(void)
873{
874 return 0;
875}
876#endif /* CONFIG_PCI */
877
626#ifndef DISABLE_EDAC_SYSFS 878#ifndef DISABLE_EDAC_SYSFS
627 879
628/* EDAC sysfs CSROW data structures and methods */ 880/* EDAC sysfs CSROW data structures and methods */
@@ -1131,7 +1383,7 @@ static int edac_create_sysfs_mci_device(struct mem_ctl_info *mci)
1131 return err; 1383 return err;
1132 1384
1133 /* create a symlink for the device */ 1385 /* create a symlink for the device */
1134 err = sysfs_create_link(edac_mci_kobj, &mci->pdev->dev.kobj, 1386 err = sysfs_create_link(edac_mci_kobj, &mci->dev->kobj,
1135 EDAC_DEVICE_SYMLINK); 1387 EDAC_DEVICE_SYMLINK);
1136 1388
1137 if (err) 1389 if (err)
@@ -1237,7 +1489,7 @@ void edac_mc_dump_mci(struct mem_ctl_info *mci)
1237 debugf4("\tmci->edac_check = %p\n", mci->edac_check); 1489 debugf4("\tmci->edac_check = %p\n", mci->edac_check);
1238 debugf3("\tmci->nr_csrows = %d, csrows = %p\n", 1490 debugf3("\tmci->nr_csrows = %d, csrows = %p\n",
1239 mci->nr_csrows, mci->csrows); 1491 mci->nr_csrows, mci->csrows);
1240 debugf3("\tpdev = %p\n", mci->pdev); 1492 debugf3("\tdev = %p\n", mci->dev);
1241 debugf3("\tmod_name:ctl_name = %s:%s\n", 1493 debugf3("\tmod_name:ctl_name = %s:%s\n",
1242 mci->mod_name, mci->ctl_name); 1494 mci->mod_name, mci->ctl_name);
1243 debugf3("\tpvt_info = %p\n\n", mci->pvt_info); 1495 debugf3("\tpvt_info = %p\n\n", mci->pvt_info);
@@ -1362,7 +1614,7 @@ void edac_mc_free(struct mem_ctl_info *mci)
1362} 1614}
1363EXPORT_SYMBOL_GPL(edac_mc_free); 1615EXPORT_SYMBOL_GPL(edac_mc_free);
1364 1616
1365static struct mem_ctl_info *find_mci_by_pdev(struct pci_dev *pdev) 1617static struct mem_ctl_info *find_mci_by_dev(struct device *dev)
1366{ 1618{
1367 struct mem_ctl_info *mci; 1619 struct mem_ctl_info *mci;
1368 struct list_head *item; 1620 struct list_head *item;
@@ -1372,54 +1624,53 @@ static struct mem_ctl_info *find_mci_by_pdev(struct pci_dev *pdev)
1372 list_for_each(item, &mc_devices) { 1624 list_for_each(item, &mc_devices) {
1373 mci = list_entry(item, struct mem_ctl_info, link); 1625 mci = list_entry(item, struct mem_ctl_info, link);
1374 1626
1375 if (mci->pdev == pdev) 1627 if (mci->dev == dev)
1376 return mci; 1628 return mci;
1377 } 1629 }
1378 1630
1379 return NULL; 1631 return NULL;
1380} 1632}
1381 1633
1382static int add_mc_to_global_list(struct mem_ctl_info *mci) 1634/* Return 0 on success, 1 on failure.
1635 * Before calling this function, caller must
1636 * assign a unique value to mci->mc_idx.
1637 */
1638static int add_mc_to_global_list (struct mem_ctl_info *mci)
1383{ 1639{
1384 struct list_head *item, *insert_before; 1640 struct list_head *item, *insert_before;
1385 struct mem_ctl_info *p; 1641 struct mem_ctl_info *p;
1386 int i;
1387 1642
1388 if (list_empty(&mc_devices)) { 1643 insert_before = &mc_devices;
1389 mci->mc_idx = 0;
1390 insert_before = &mc_devices;
1391 } else {
1392 if (find_mci_by_pdev(mci->pdev)) {
1393 edac_printk(KERN_WARNING, EDAC_MC,
1394 "%s (%s) %s %s already assigned %d\n",
1395 mci->pdev->dev.bus_id,
1396 pci_name(mci->pdev), mci->mod_name,
1397 mci->ctl_name, mci->mc_idx);
1398 return 1;
1399 }
1400 1644
1401 insert_before = NULL; 1645 if (unlikely((p = find_mci_by_dev(mci->dev)) != NULL))
1402 i = 0; 1646 goto fail0;
1403 1647
1404 list_for_each(item, &mc_devices) { 1648 list_for_each(item, &mc_devices) {
1405 p = list_entry(item, struct mem_ctl_info, link); 1649 p = list_entry(item, struct mem_ctl_info, link);
1406 1650
1407 if (p->mc_idx != i) { 1651 if (p->mc_idx >= mci->mc_idx) {
1408 insert_before = item; 1652 if (unlikely(p->mc_idx == mci->mc_idx))
1409 break; 1653 goto fail1;
1410 }
1411 1654
1412 i++; 1655 insert_before = item;
1656 break;
1413 } 1657 }
1414
1415 mci->mc_idx = i;
1416
1417 if (insert_before == NULL)
1418 insert_before = &mc_devices;
1419 } 1658 }
1420 1659
1421 list_add_tail_rcu(&mci->link, insert_before); 1660 list_add_tail_rcu(&mci->link, insert_before);
1422 return 0; 1661 return 0;
1662
1663fail0:
1664 edac_printk(KERN_WARNING, EDAC_MC,
1665 "%s (%s) %s %s already assigned %d\n", p->dev->bus_id,
1666 dev_name(p->dev), p->mod_name, p->ctl_name, p->mc_idx);
1667 return 1;
1668
1669fail1:
1670 edac_printk(KERN_WARNING, EDAC_MC,
1671 "bug in low-level driver: attempt to assign\n"
1672 " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__);
1673 return 1;
1423} 1674}
1424 1675
1425static void complete_mc_list_del(struct rcu_head *head) 1676static void complete_mc_list_del(struct rcu_head *head)
@@ -1443,6 +1694,7 @@ static void del_mc_from_global_list(struct mem_ctl_info *mci)
1443 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and 1694 * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and
1444 * create sysfs entries associated with mci structure 1695 * create sysfs entries associated with mci structure
1445 * @mci: pointer to the mci structure to be added to the list 1696 * @mci: pointer to the mci structure to be added to the list
1697 * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure.
1446 * 1698 *
1447 * Return: 1699 * Return:
1448 * 0 Success 1700 * 0 Success
@@ -1450,9 +1702,10 @@ static void del_mc_from_global_list(struct mem_ctl_info *mci)
1450 */ 1702 */
1451 1703
1452/* FIXME - should a warning be printed if no error detection? correction? */ 1704/* FIXME - should a warning be printed if no error detection? correction? */
1453int edac_mc_add_mc(struct mem_ctl_info *mci) 1705int edac_mc_add_mc(struct mem_ctl_info *mci, int mc_idx)
1454{ 1706{
1455 debugf0("%s()\n", __func__); 1707 debugf0("%s()\n", __func__);
1708 mci->mc_idx = mc_idx;
1456#ifdef CONFIG_EDAC_DEBUG 1709#ifdef CONFIG_EDAC_DEBUG
1457 if (edac_debug_level >= 3) 1710 if (edac_debug_level >= 3)
1458 edac_mc_dump_mci(mci); 1711 edac_mc_dump_mci(mci);
@@ -1485,8 +1738,8 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
1485 } 1738 }
1486 1739
1487 /* Report action taken */ 1740 /* Report action taken */
1488 edac_mc_printk(mci, KERN_INFO, "Giving out device to %s %s: PCI %s\n", 1741 edac_mc_printk(mci, KERN_INFO, "Giving out device to %s %s: DEV %s\n",
1489 mci->mod_name, mci->ctl_name, pci_name(mci->pdev)); 1742 mci->mod_name, mci->ctl_name, dev_name(mci->dev));
1490 1743
1491 up(&mem_ctls_mutex); 1744 up(&mem_ctls_mutex);
1492 return 0; 1745 return 0;
@@ -1503,18 +1756,18 @@ EXPORT_SYMBOL_GPL(edac_mc_add_mc);
1503/** 1756/**
1504 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and 1757 * edac_mc_del_mc: Remove sysfs entries for specified mci structure and
1505 * remove mci structure from global list 1758 * remove mci structure from global list
1506 * @pdev: Pointer to 'struct pci_dev' representing mci structure to remove. 1759 * @pdev: Pointer to 'struct device' representing mci structure to remove.
1507 * 1760 *
1508 * Return pointer to removed mci structure, or NULL if device not found. 1761 * Return pointer to removed mci structure, or NULL if device not found.
1509 */ 1762 */
1510struct mem_ctl_info * edac_mc_del_mc(struct pci_dev *pdev) 1763struct mem_ctl_info * edac_mc_del_mc(struct device *dev)
1511{ 1764{
1512 struct mem_ctl_info *mci; 1765 struct mem_ctl_info *mci;
1513 1766
1514 debugf0("MC: %s()\n", __func__); 1767 debugf0("MC: %s()\n", __func__);
1515 down(&mem_ctls_mutex); 1768 down(&mem_ctls_mutex);
1516 1769
1517 if ((mci = find_mci_by_pdev(pdev)) == NULL) { 1770 if ((mci = find_mci_by_dev(dev)) == NULL) {
1518 up(&mem_ctls_mutex); 1771 up(&mem_ctls_mutex);
1519 return NULL; 1772 return NULL;
1520 } 1773 }
@@ -1523,8 +1776,8 @@ struct mem_ctl_info * edac_mc_del_mc(struct pci_dev *pdev)
1523 del_mc_from_global_list(mci); 1776 del_mc_from_global_list(mci);
1524 up(&mem_ctls_mutex); 1777 up(&mem_ctls_mutex);
1525 edac_printk(KERN_INFO, EDAC_MC, 1778 edac_printk(KERN_INFO, EDAC_MC,
1526 "Removed device %d for %s %s: PCI %s\n", mci->mc_idx, 1779 "Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
1527 mci->mod_name, mci->ctl_name, pci_name(mci->pdev)); 1780 mci->mod_name, mci->ctl_name, dev_name(mci->dev));
1528 return mci; 1781 return mci;
1529} 1782}
1530EXPORT_SYMBOL_GPL(edac_mc_del_mc); 1783EXPORT_SYMBOL_GPL(edac_mc_del_mc);
@@ -1738,244 +1991,6 @@ void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg)
1738} 1991}
1739EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info); 1992EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info);
1740 1993
1741#ifdef CONFIG_PCI
1742
1743static u16 get_pci_parity_status(struct pci_dev *dev, int secondary)
1744{
1745 int where;
1746 u16 status;
1747
1748 where = secondary ? PCI_SEC_STATUS : PCI_STATUS;
1749 pci_read_config_word(dev, where, &status);
1750
1751 /* If we get back 0xFFFF then we must suspect that the card has been
1752 * pulled but the Linux PCI layer has not yet finished cleaning up.
1753 * We don't want to report on such devices
1754 */
1755
1756 if (status == 0xFFFF) {
1757 u32 sanity;
1758
1759 pci_read_config_dword(dev, 0, &sanity);
1760
1761 if (sanity == 0xFFFFFFFF)
1762 return 0;
1763 }
1764
1765 status &= PCI_STATUS_DETECTED_PARITY | PCI_STATUS_SIG_SYSTEM_ERROR |
1766 PCI_STATUS_PARITY;
1767
1768 if (status)
1769 /* reset only the bits we are interested in */
1770 pci_write_config_word(dev, where, status);
1771
1772 return status;
1773}
1774
1775typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev);
1776
1777/* Clear any PCI parity errors logged by this device. */
1778static void edac_pci_dev_parity_clear(struct pci_dev *dev)
1779{
1780 u8 header_type;
1781
1782 get_pci_parity_status(dev, 0);
1783
1784 /* read the device TYPE, looking for bridges */
1785 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
1786
1787 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE)
1788 get_pci_parity_status(dev, 1);
1789}
1790
1791/*
1792 * PCI Parity polling
1793 *
1794 */
1795static void edac_pci_dev_parity_test(struct pci_dev *dev)
1796{
1797 u16 status;
1798 u8 header_type;
1799
1800 /* read the STATUS register on this device
1801 */
1802 status = get_pci_parity_status(dev, 0);
1803
1804 debugf2("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id );
1805
1806 /* check the status reg for errors */
1807 if (status) {
1808 if (status & (PCI_STATUS_SIG_SYSTEM_ERROR))
1809 edac_printk(KERN_CRIT, EDAC_PCI,
1810 "Signaled System Error on %s\n",
1811 pci_name(dev));
1812
1813 if (status & (PCI_STATUS_PARITY)) {
1814 edac_printk(KERN_CRIT, EDAC_PCI,
1815 "Master Data Parity Error on %s\n",
1816 pci_name(dev));
1817
1818 atomic_inc(&pci_parity_count);
1819 }
1820
1821 if (status & (PCI_STATUS_DETECTED_PARITY)) {
1822 edac_printk(KERN_CRIT, EDAC_PCI,
1823 "Detected Parity Error on %s\n",
1824 pci_name(dev));
1825
1826 atomic_inc(&pci_parity_count);
1827 }
1828 }
1829
1830 /* read the device TYPE, looking for bridges */
1831 pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type);
1832
1833 debugf2("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id );
1834
1835 if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) {
1836 /* On bridges, need to examine secondary status register */
1837 status = get_pci_parity_status(dev, 1);
1838
1839 debugf2("PCI SEC_STATUS= 0x%04x %s\n",
1840 status, dev->dev.bus_id );
1841
1842 /* check the secondary status reg for errors */
1843 if (status) {
1844 if (status & (PCI_STATUS_SIG_SYSTEM_ERROR))
1845 edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
1846 "Signaled System Error on %s\n",
1847 pci_name(dev));
1848
1849 if (status & (PCI_STATUS_PARITY)) {
1850 edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
1851 "Master Data Parity Error on "
1852 "%s\n", pci_name(dev));
1853
1854 atomic_inc(&pci_parity_count);
1855 }
1856
1857 if (status & (PCI_STATUS_DETECTED_PARITY)) {
1858 edac_printk(KERN_CRIT, EDAC_PCI, "Bridge "
1859 "Detected Parity Error on %s\n",
1860 pci_name(dev));
1861
1862 atomic_inc(&pci_parity_count);
1863 }
1864 }
1865 }
1866}
1867
1868/*
1869 * check_dev_on_list: Scan for a PCI device on a white/black list
1870 * @list: an EDAC &edac_pci_device_list white/black list pointer
1871 * @free_index: index of next free entry on the list
1872 * @pci_dev: PCI Device pointer
1873 *
1874 * see if list contains the device.
1875 *
1876 * Returns: 0 not found
1877 * 1 found on list
1878 */
1879static int check_dev_on_list(struct edac_pci_device_list *list,
1880 int free_index, struct pci_dev *dev)
1881{
1882 int i;
1883 int rc = 0; /* Assume not found */
1884 unsigned short vendor=dev->vendor;
1885 unsigned short device=dev->device;
1886
1887 /* Scan the list, looking for a vendor/device match */
1888 for (i = 0; i < free_index; i++, list++ ) {
1889 if ((list->vendor == vendor ) && (list->device == device )) {
1890 rc = 1;
1891 break;
1892 }
1893 }
1894
1895 return rc;
1896}
1897
1898/*
1899 * pci_dev parity list iterator
1900 * Scan the PCI device list for one iteration, looking for SERRORs
1901 * Master Parity ERRORS or Parity ERRORs on primary or secondary devices
1902 */
1903static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn)
1904{
1905 struct pci_dev *dev = NULL;
1906
1907 /* request for kernel access to the next PCI device, if any,
1908 * and while we are looking at it have its reference count
1909 * bumped until we are done with it
1910 */
1911 while((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
1912 /* if whitelist exists then it has priority, so only scan
1913 * those devices on the whitelist
1914 */
1915 if (pci_whitelist_count > 0 ) {
1916 if (check_dev_on_list(pci_whitelist,
1917 pci_whitelist_count, dev))
1918 fn(dev);
1919 } else {
1920 /*
1921 * if no whitelist, then check if this devices is
1922 * blacklisted
1923 */
1924 if (!check_dev_on_list(pci_blacklist,
1925 pci_blacklist_count, dev))
1926 fn(dev);
1927 }
1928 }
1929}
1930
1931static void do_pci_parity_check(void)
1932{
1933 unsigned long flags;
1934 int before_count;
1935
1936 debugf3("%s()\n", __func__);
1937
1938 if (!check_pci_parity)
1939 return;
1940
1941 before_count = atomic_read(&pci_parity_count);
1942
1943 /* scan all PCI devices looking for a Parity Error on devices and
1944 * bridges
1945 */
1946 local_irq_save(flags);
1947 edac_pci_dev_parity_iterator(edac_pci_dev_parity_test);
1948 local_irq_restore(flags);
1949
1950 /* Only if operator has selected panic on PCI Error */
1951 if (panic_on_pci_parity) {
1952 /* If the count is different 'after' from 'before' */
1953 if (before_count != atomic_read(&pci_parity_count))
1954 panic("EDAC: PCI Parity Error");
1955 }
1956}
1957
1958static inline void clear_pci_parity_errors(void)
1959{
1960 /* Clear any PCI bus parity errors that devices initially have logged
1961 * in their registers.
1962 */
1963 edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear);
1964}
1965
1966#else /* CONFIG_PCI */
1967
1968static inline void do_pci_parity_check(void)
1969{
1970 /* no-op */
1971}
1972
1973static inline void clear_pci_parity_errors(void)
1974{
1975 /* no-op */
1976}
1977
1978#endif /* CONFIG_PCI */
1979 1994
1980/* 1995/*
1981 * Iterate over all MC instances and check for ECC, et al, errors 1996 * Iterate over all MC instances and check for ECC, et al, errors
@@ -2095,10 +2110,12 @@ MODULE_DESCRIPTION("Core library routines for MC reporting");
2095 2110
2096module_param(panic_on_ue, int, 0644); 2111module_param(panic_on_ue, int, 0644);
2097MODULE_PARM_DESC(panic_on_ue, "Panic on uncorrected error: 0=off 1=on"); 2112MODULE_PARM_DESC(panic_on_ue, "Panic on uncorrected error: 0=off 1=on");
2113#ifdef CONFIG_PCI
2098module_param(check_pci_parity, int, 0644); 2114module_param(check_pci_parity, int, 0644);
2099MODULE_PARM_DESC(check_pci_parity, "Check for PCI bus parity errors: 0=off 1=on"); 2115MODULE_PARM_DESC(check_pci_parity, "Check for PCI bus parity errors: 0=off 1=on");
2100module_param(panic_on_pci_parity, int, 0644); 2116module_param(panic_on_pci_parity, int, 0644);
2101MODULE_PARM_DESC(panic_on_pci_parity, "Panic on PCI Bus Parity error: 0=off 1=on"); 2117MODULE_PARM_DESC(panic_on_pci_parity, "Panic on PCI Bus Parity error: 0=off 1=on");
2118#endif
2102module_param(log_ue, int, 0644); 2119module_param(log_ue, int, 0644);
2103MODULE_PARM_DESC(log_ue, "Log uncorrectable error to console: 0=off 1=on"); 2120MODULE_PARM_DESC(log_ue, "Log uncorrectable error to console: 0=off 1=on");
2104module_param(log_ce, int, 0644); 2121module_param(log_ce, int, 0644);