diff options
Diffstat (limited to 'drivers/edac/edac_mc.c')
-rw-r--r-- | drivers/edac/edac_mc.c | 590 |
1 files changed, 303 insertions, 287 deletions
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index ea06e3a4dc35..3a7cfe88b169 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c | |||
@@ -12,7 +12,6 @@ | |||
12 | * | 12 | * |
13 | */ | 13 | */ |
14 | 14 | ||
15 | #include <linux/config.h> | ||
16 | #include <linux/module.h> | 15 | #include <linux/module.h> |
17 | #include <linux/proc_fs.h> | 16 | #include <linux/proc_fs.h> |
18 | #include <linux/kernel.h> | 17 | #include <linux/kernel.h> |
@@ -54,16 +53,17 @@ static int log_ce = 1; | |||
54 | static int panic_on_ue; | 53 | static int panic_on_ue; |
55 | static int poll_msec = 1000; | 54 | static int poll_msec = 1000; |
56 | 55 | ||
57 | static int check_pci_parity = 0; /* default YES check PCI parity */ | ||
58 | static int panic_on_pci_parity; /* default no panic on PCI Parity */ | ||
59 | static atomic_t pci_parity_count = ATOMIC_INIT(0); | ||
60 | |||
61 | /* lock to memory controller's control array */ | 56 | /* lock to memory controller's control array */ |
62 | static DECLARE_MUTEX(mem_ctls_mutex); | 57 | static DECLARE_MUTEX(mem_ctls_mutex); |
63 | static struct list_head mc_devices = LIST_HEAD_INIT(mc_devices); | 58 | static struct list_head mc_devices = LIST_HEAD_INIT(mc_devices); |
64 | 59 | ||
65 | static struct task_struct *edac_thread; | 60 | static struct task_struct *edac_thread; |
66 | 61 | ||
62 | #ifdef CONFIG_PCI | ||
63 | static int check_pci_parity = 0; /* default YES check PCI parity */ | ||
64 | static int panic_on_pci_parity; /* default no panic on PCI Parity */ | ||
65 | static atomic_t pci_parity_count = ATOMIC_INIT(0); | ||
66 | |||
67 | /* Structure of the whitelist and blacklist arrays */ | 67 | /* Structure of the whitelist and blacklist arrays */ |
68 | struct edac_pci_device_list { | 68 | struct edac_pci_device_list { |
69 | unsigned int vendor; /* Vendor ID */ | 69 | unsigned int vendor; /* Vendor ID */ |
@@ -80,6 +80,12 @@ static int pci_blacklist_count; | |||
80 | static struct edac_pci_device_list pci_whitelist[MAX_LISTED_PCI_DEVICES]; | 80 | static struct edac_pci_device_list pci_whitelist[MAX_LISTED_PCI_DEVICES]; |
81 | static int pci_whitelist_count ; | 81 | static int pci_whitelist_count ; |
82 | 82 | ||
83 | #ifndef DISABLE_EDAC_SYSFS | ||
84 | static struct kobject edac_pci_kobj; /* /sys/devices/system/edac/pci */ | ||
85 | static struct completion edac_pci_kobj_complete; | ||
86 | #endif /* DISABLE_EDAC_SYSFS */ | ||
87 | #endif /* CONFIG_PCI */ | ||
88 | |||
83 | /* START sysfs data and methods */ | 89 | /* START sysfs data and methods */ |
84 | 90 | ||
85 | #ifndef DISABLE_EDAC_SYSFS | 91 | #ifndef DISABLE_EDAC_SYSFS |
@@ -127,18 +133,15 @@ static struct sysdev_class edac_class = { | |||
127 | set_kset_name("edac"), | 133 | set_kset_name("edac"), |
128 | }; | 134 | }; |
129 | 135 | ||
130 | /* sysfs objects: | 136 | /* sysfs object: |
131 | * /sys/devices/system/edac/mc | 137 | * /sys/devices/system/edac/mc |
132 | * /sys/devices/system/edac/pci | ||
133 | */ | 138 | */ |
134 | static struct kobject edac_memctrl_kobj; | 139 | static struct kobject edac_memctrl_kobj; |
135 | static struct kobject edac_pci_kobj; | ||
136 | 140 | ||
137 | /* We use these to wait for the reference counts on edac_memctrl_kobj and | 141 | /* We use these to wait for the reference counts on edac_memctrl_kobj and |
138 | * edac_pci_kobj to reach 0. | 142 | * edac_pci_kobj to reach 0. |
139 | */ | 143 | */ |
140 | static struct completion edac_memctrl_kobj_complete; | 144 | static struct completion edac_memctrl_kobj_complete; |
141 | static struct completion edac_pci_kobj_complete; | ||
142 | 145 | ||
143 | /* | 146 | /* |
144 | * /sys/devices/system/edac/mc; | 147 | * /sys/devices/system/edac/mc; |
@@ -324,6 +327,8 @@ static void edac_sysfs_memctrl_teardown(void) | |||
324 | #endif /* DISABLE_EDAC_SYSFS */ | 327 | #endif /* DISABLE_EDAC_SYSFS */ |
325 | } | 328 | } |
326 | 329 | ||
330 | #ifdef CONFIG_PCI | ||
331 | |||
327 | #ifndef DISABLE_EDAC_SYSFS | 332 | #ifndef DISABLE_EDAC_SYSFS |
328 | 333 | ||
329 | /* | 334 | /* |
@@ -624,6 +629,252 @@ static void edac_sysfs_pci_teardown(void) | |||
624 | #endif | 629 | #endif |
625 | } | 630 | } |
626 | 631 | ||
632 | |||
633 | static u16 get_pci_parity_status(struct pci_dev *dev, int secondary) | ||
634 | { | ||
635 | int where; | ||
636 | u16 status; | ||
637 | |||
638 | where = secondary ? PCI_SEC_STATUS : PCI_STATUS; | ||
639 | pci_read_config_word(dev, where, &status); | ||
640 | |||
641 | /* If we get back 0xFFFF then we must suspect that the card has been | ||
642 | * pulled but the Linux PCI layer has not yet finished cleaning up. | ||
643 | * We don't want to report on such devices | ||
644 | */ | ||
645 | |||
646 | if (status == 0xFFFF) { | ||
647 | u32 sanity; | ||
648 | |||
649 | pci_read_config_dword(dev, 0, &sanity); | ||
650 | |||
651 | if (sanity == 0xFFFFFFFF) | ||
652 | return 0; | ||
653 | } | ||
654 | |||
655 | status &= PCI_STATUS_DETECTED_PARITY | PCI_STATUS_SIG_SYSTEM_ERROR | | ||
656 | PCI_STATUS_PARITY; | ||
657 | |||
658 | if (status) | ||
659 | /* reset only the bits we are interested in */ | ||
660 | pci_write_config_word(dev, where, status); | ||
661 | |||
662 | return status; | ||
663 | } | ||
664 | |||
665 | typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev); | ||
666 | |||
667 | /* Clear any PCI parity errors logged by this device. */ | ||
668 | static void edac_pci_dev_parity_clear(struct pci_dev *dev) | ||
669 | { | ||
670 | u8 header_type; | ||
671 | |||
672 | get_pci_parity_status(dev, 0); | ||
673 | |||
674 | /* read the device TYPE, looking for bridges */ | ||
675 | pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); | ||
676 | |||
677 | if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) | ||
678 | get_pci_parity_status(dev, 1); | ||
679 | } | ||
680 | |||
681 | /* | ||
682 | * PCI Parity polling | ||
683 | * | ||
684 | */ | ||
685 | static void edac_pci_dev_parity_test(struct pci_dev *dev) | ||
686 | { | ||
687 | u16 status; | ||
688 | u8 header_type; | ||
689 | |||
690 | /* read the STATUS register on this device | ||
691 | */ | ||
692 | status = get_pci_parity_status(dev, 0); | ||
693 | |||
694 | debugf2("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id ); | ||
695 | |||
696 | /* check the status reg for errors */ | ||
697 | if (status) { | ||
698 | if (status & (PCI_STATUS_SIG_SYSTEM_ERROR)) | ||
699 | edac_printk(KERN_CRIT, EDAC_PCI, | ||
700 | "Signaled System Error on %s\n", | ||
701 | pci_name(dev)); | ||
702 | |||
703 | if (status & (PCI_STATUS_PARITY)) { | ||
704 | edac_printk(KERN_CRIT, EDAC_PCI, | ||
705 | "Master Data Parity Error on %s\n", | ||
706 | pci_name(dev)); | ||
707 | |||
708 | atomic_inc(&pci_parity_count); | ||
709 | } | ||
710 | |||
711 | if (status & (PCI_STATUS_DETECTED_PARITY)) { | ||
712 | edac_printk(KERN_CRIT, EDAC_PCI, | ||
713 | "Detected Parity Error on %s\n", | ||
714 | pci_name(dev)); | ||
715 | |||
716 | atomic_inc(&pci_parity_count); | ||
717 | } | ||
718 | } | ||
719 | |||
720 | /* read the device TYPE, looking for bridges */ | ||
721 | pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); | ||
722 | |||
723 | debugf2("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id ); | ||
724 | |||
725 | if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) { | ||
726 | /* On bridges, need to examine secondary status register */ | ||
727 | status = get_pci_parity_status(dev, 1); | ||
728 | |||
729 | debugf2("PCI SEC_STATUS= 0x%04x %s\n", | ||
730 | status, dev->dev.bus_id ); | ||
731 | |||
732 | /* check the secondary status reg for errors */ | ||
733 | if (status) { | ||
734 | if (status & (PCI_STATUS_SIG_SYSTEM_ERROR)) | ||
735 | edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " | ||
736 | "Signaled System Error on %s\n", | ||
737 | pci_name(dev)); | ||
738 | |||
739 | if (status & (PCI_STATUS_PARITY)) { | ||
740 | edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " | ||
741 | "Master Data Parity Error on " | ||
742 | "%s\n", pci_name(dev)); | ||
743 | |||
744 | atomic_inc(&pci_parity_count); | ||
745 | } | ||
746 | |||
747 | if (status & (PCI_STATUS_DETECTED_PARITY)) { | ||
748 | edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " | ||
749 | "Detected Parity Error on %s\n", | ||
750 | pci_name(dev)); | ||
751 | |||
752 | atomic_inc(&pci_parity_count); | ||
753 | } | ||
754 | } | ||
755 | } | ||
756 | } | ||
757 | |||
758 | /* | ||
759 | * check_dev_on_list: Scan for a PCI device on a white/black list | ||
760 | * @list: an EDAC &edac_pci_device_list white/black list pointer | ||
761 | * @free_index: index of next free entry on the list | ||
762 | * @pci_dev: PCI Device pointer | ||
763 | * | ||
764 | * see if list contains the device. | ||
765 | * | ||
766 | * Returns: 0 not found | ||
767 | * 1 found on list | ||
768 | */ | ||
769 | static int check_dev_on_list(struct edac_pci_device_list *list, | ||
770 | int free_index, struct pci_dev *dev) | ||
771 | { | ||
772 | int i; | ||
773 | int rc = 0; /* Assume not found */ | ||
774 | unsigned short vendor=dev->vendor; | ||
775 | unsigned short device=dev->device; | ||
776 | |||
777 | /* Scan the list, looking for a vendor/device match */ | ||
778 | for (i = 0; i < free_index; i++, list++ ) { | ||
779 | if ((list->vendor == vendor ) && (list->device == device )) { | ||
780 | rc = 1; | ||
781 | break; | ||
782 | } | ||
783 | } | ||
784 | |||
785 | return rc; | ||
786 | } | ||
787 | |||
788 | /* | ||
789 | * pci_dev parity list iterator | ||
790 | * Scan the PCI device list for one iteration, looking for SERRORs | ||
791 | * Master Parity ERRORS or Parity ERRORs on primary or secondary devices | ||
792 | */ | ||
793 | static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn) | ||
794 | { | ||
795 | struct pci_dev *dev = NULL; | ||
796 | |||
797 | /* request for kernel access to the next PCI device, if any, | ||
798 | * and while we are looking at it have its reference count | ||
799 | * bumped until we are done with it | ||
800 | */ | ||
801 | while((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | ||
802 | /* if whitelist exists then it has priority, so only scan | ||
803 | * those devices on the whitelist | ||
804 | */ | ||
805 | if (pci_whitelist_count > 0 ) { | ||
806 | if (check_dev_on_list(pci_whitelist, | ||
807 | pci_whitelist_count, dev)) | ||
808 | fn(dev); | ||
809 | } else { | ||
810 | /* | ||
811 | * if no whitelist, then check if this devices is | ||
812 | * blacklisted | ||
813 | */ | ||
814 | if (!check_dev_on_list(pci_blacklist, | ||
815 | pci_blacklist_count, dev)) | ||
816 | fn(dev); | ||
817 | } | ||
818 | } | ||
819 | } | ||
820 | |||
821 | static void do_pci_parity_check(void) | ||
822 | { | ||
823 | unsigned long flags; | ||
824 | int before_count; | ||
825 | |||
826 | debugf3("%s()\n", __func__); | ||
827 | |||
828 | if (!check_pci_parity) | ||
829 | return; | ||
830 | |||
831 | before_count = atomic_read(&pci_parity_count); | ||
832 | |||
833 | /* scan all PCI devices looking for a Parity Error on devices and | ||
834 | * bridges | ||
835 | */ | ||
836 | local_irq_save(flags); | ||
837 | edac_pci_dev_parity_iterator(edac_pci_dev_parity_test); | ||
838 | local_irq_restore(flags); | ||
839 | |||
840 | /* Only if operator has selected panic on PCI Error */ | ||
841 | if (panic_on_pci_parity) { | ||
842 | /* If the count is different 'after' from 'before' */ | ||
843 | if (before_count != atomic_read(&pci_parity_count)) | ||
844 | panic("EDAC: PCI Parity Error"); | ||
845 | } | ||
846 | } | ||
847 | |||
848 | static inline void clear_pci_parity_errors(void) | ||
849 | { | ||
850 | /* Clear any PCI bus parity errors that devices initially have logged | ||
851 | * in their registers. | ||
852 | */ | ||
853 | edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear); | ||
854 | } | ||
855 | |||
856 | #else /* CONFIG_PCI */ | ||
857 | |||
858 | static inline void do_pci_parity_check(void) | ||
859 | { | ||
860 | /* no-op */ | ||
861 | } | ||
862 | |||
863 | static inline void clear_pci_parity_errors(void) | ||
864 | { | ||
865 | /* no-op */ | ||
866 | } | ||
867 | |||
868 | static void edac_sysfs_pci_teardown(void) | ||
869 | { | ||
870 | } | ||
871 | |||
872 | static int edac_sysfs_pci_setup(void) | ||
873 | { | ||
874 | return 0; | ||
875 | } | ||
876 | #endif /* CONFIG_PCI */ | ||
877 | |||
627 | #ifndef DISABLE_EDAC_SYSFS | 878 | #ifndef DISABLE_EDAC_SYSFS |
628 | 879 | ||
629 | /* EDAC sysfs CSROW data structures and methods */ | 880 | /* EDAC sysfs CSROW data structures and methods */ |
@@ -1132,7 +1383,7 @@ static int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) | |||
1132 | return err; | 1383 | return err; |
1133 | 1384 | ||
1134 | /* create a symlink for the device */ | 1385 | /* create a symlink for the device */ |
1135 | err = sysfs_create_link(edac_mci_kobj, &mci->pdev->dev.kobj, | 1386 | err = sysfs_create_link(edac_mci_kobj, &mci->dev->kobj, |
1136 | EDAC_DEVICE_SYMLINK); | 1387 | EDAC_DEVICE_SYMLINK); |
1137 | 1388 | ||
1138 | if (err) | 1389 | if (err) |
@@ -1238,7 +1489,7 @@ void edac_mc_dump_mci(struct mem_ctl_info *mci) | |||
1238 | debugf4("\tmci->edac_check = %p\n", mci->edac_check); | 1489 | debugf4("\tmci->edac_check = %p\n", mci->edac_check); |
1239 | debugf3("\tmci->nr_csrows = %d, csrows = %p\n", | 1490 | debugf3("\tmci->nr_csrows = %d, csrows = %p\n", |
1240 | mci->nr_csrows, mci->csrows); | 1491 | mci->nr_csrows, mci->csrows); |
1241 | debugf3("\tpdev = %p\n", mci->pdev); | 1492 | debugf3("\tdev = %p\n", mci->dev); |
1242 | debugf3("\tmod_name:ctl_name = %s:%s\n", | 1493 | debugf3("\tmod_name:ctl_name = %s:%s\n", |
1243 | mci->mod_name, mci->ctl_name); | 1494 | mci->mod_name, mci->ctl_name); |
1244 | debugf3("\tpvt_info = %p\n\n", mci->pvt_info); | 1495 | debugf3("\tpvt_info = %p\n\n", mci->pvt_info); |
@@ -1363,7 +1614,7 @@ void edac_mc_free(struct mem_ctl_info *mci) | |||
1363 | } | 1614 | } |
1364 | EXPORT_SYMBOL_GPL(edac_mc_free); | 1615 | EXPORT_SYMBOL_GPL(edac_mc_free); |
1365 | 1616 | ||
1366 | static struct mem_ctl_info *find_mci_by_pdev(struct pci_dev *pdev) | 1617 | static struct mem_ctl_info *find_mci_by_dev(struct device *dev) |
1367 | { | 1618 | { |
1368 | struct mem_ctl_info *mci; | 1619 | struct mem_ctl_info *mci; |
1369 | struct list_head *item; | 1620 | struct list_head *item; |
@@ -1373,54 +1624,53 @@ static struct mem_ctl_info *find_mci_by_pdev(struct pci_dev *pdev) | |||
1373 | list_for_each(item, &mc_devices) { | 1624 | list_for_each(item, &mc_devices) { |
1374 | mci = list_entry(item, struct mem_ctl_info, link); | 1625 | mci = list_entry(item, struct mem_ctl_info, link); |
1375 | 1626 | ||
1376 | if (mci->pdev == pdev) | 1627 | if (mci->dev == dev) |
1377 | return mci; | 1628 | return mci; |
1378 | } | 1629 | } |
1379 | 1630 | ||
1380 | return NULL; | 1631 | return NULL; |
1381 | } | 1632 | } |
1382 | 1633 | ||
1383 | static int add_mc_to_global_list(struct mem_ctl_info *mci) | 1634 | /* Return 0 on success, 1 on failure. |
1635 | * Before calling this function, caller must | ||
1636 | * assign a unique value to mci->mc_idx. | ||
1637 | */ | ||
1638 | static int add_mc_to_global_list (struct mem_ctl_info *mci) | ||
1384 | { | 1639 | { |
1385 | struct list_head *item, *insert_before; | 1640 | struct list_head *item, *insert_before; |
1386 | struct mem_ctl_info *p; | 1641 | struct mem_ctl_info *p; |
1387 | int i; | ||
1388 | 1642 | ||
1389 | if (list_empty(&mc_devices)) { | 1643 | insert_before = &mc_devices; |
1390 | mci->mc_idx = 0; | ||
1391 | insert_before = &mc_devices; | ||
1392 | } else { | ||
1393 | if (find_mci_by_pdev(mci->pdev)) { | ||
1394 | edac_printk(KERN_WARNING, EDAC_MC, | ||
1395 | "%s (%s) %s %s already assigned %d\n", | ||
1396 | mci->pdev->dev.bus_id, | ||
1397 | pci_name(mci->pdev), mci->mod_name, | ||
1398 | mci->ctl_name, mci->mc_idx); | ||
1399 | return 1; | ||
1400 | } | ||
1401 | 1644 | ||
1402 | insert_before = NULL; | 1645 | if (unlikely((p = find_mci_by_dev(mci->dev)) != NULL)) |
1403 | i = 0; | 1646 | goto fail0; |
1404 | 1647 | ||
1405 | list_for_each(item, &mc_devices) { | 1648 | list_for_each(item, &mc_devices) { |
1406 | p = list_entry(item, struct mem_ctl_info, link); | 1649 | p = list_entry(item, struct mem_ctl_info, link); |
1407 | 1650 | ||
1408 | if (p->mc_idx != i) { | 1651 | if (p->mc_idx >= mci->mc_idx) { |
1409 | insert_before = item; | 1652 | if (unlikely(p->mc_idx == mci->mc_idx)) |
1410 | break; | 1653 | goto fail1; |
1411 | } | ||
1412 | 1654 | ||
1413 | i++; | 1655 | insert_before = item; |
1656 | break; | ||
1414 | } | 1657 | } |
1415 | |||
1416 | mci->mc_idx = i; | ||
1417 | |||
1418 | if (insert_before == NULL) | ||
1419 | insert_before = &mc_devices; | ||
1420 | } | 1658 | } |
1421 | 1659 | ||
1422 | list_add_tail_rcu(&mci->link, insert_before); | 1660 | list_add_tail_rcu(&mci->link, insert_before); |
1423 | return 0; | 1661 | return 0; |
1662 | |||
1663 | fail0: | ||
1664 | edac_printk(KERN_WARNING, EDAC_MC, | ||
1665 | "%s (%s) %s %s already assigned %d\n", p->dev->bus_id, | ||
1666 | dev_name(p->dev), p->mod_name, p->ctl_name, p->mc_idx); | ||
1667 | return 1; | ||
1668 | |||
1669 | fail1: | ||
1670 | edac_printk(KERN_WARNING, EDAC_MC, | ||
1671 | "bug in low-level driver: attempt to assign\n" | ||
1672 | " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__); | ||
1673 | return 1; | ||
1424 | } | 1674 | } |
1425 | 1675 | ||
1426 | static void complete_mc_list_del(struct rcu_head *head) | 1676 | static void complete_mc_list_del(struct rcu_head *head) |
@@ -1444,6 +1694,7 @@ static void del_mc_from_global_list(struct mem_ctl_info *mci) | |||
1444 | * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and | 1694 | * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and |
1445 | * create sysfs entries associated with mci structure | 1695 | * create sysfs entries associated with mci structure |
1446 | * @mci: pointer to the mci structure to be added to the list | 1696 | * @mci: pointer to the mci structure to be added to the list |
1697 | * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure. | ||
1447 | * | 1698 | * |
1448 | * Return: | 1699 | * Return: |
1449 | * 0 Success | 1700 | * 0 Success |
@@ -1451,9 +1702,10 @@ static void del_mc_from_global_list(struct mem_ctl_info *mci) | |||
1451 | */ | 1702 | */ |
1452 | 1703 | ||
1453 | /* FIXME - should a warning be printed if no error detection? correction? */ | 1704 | /* FIXME - should a warning be printed if no error detection? correction? */ |
1454 | int edac_mc_add_mc(struct mem_ctl_info *mci) | 1705 | int edac_mc_add_mc(struct mem_ctl_info *mci, int mc_idx) |
1455 | { | 1706 | { |
1456 | debugf0("%s()\n", __func__); | 1707 | debugf0("%s()\n", __func__); |
1708 | mci->mc_idx = mc_idx; | ||
1457 | #ifdef CONFIG_EDAC_DEBUG | 1709 | #ifdef CONFIG_EDAC_DEBUG |
1458 | if (edac_debug_level >= 3) | 1710 | if (edac_debug_level >= 3) |
1459 | edac_mc_dump_mci(mci); | 1711 | edac_mc_dump_mci(mci); |
@@ -1486,8 +1738,8 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) | |||
1486 | } | 1738 | } |
1487 | 1739 | ||
1488 | /* Report action taken */ | 1740 | /* Report action taken */ |
1489 | edac_mc_printk(mci, KERN_INFO, "Giving out device to %s %s: PCI %s\n", | 1741 | edac_mc_printk(mci, KERN_INFO, "Giving out device to %s %s: DEV %s\n", |
1490 | mci->mod_name, mci->ctl_name, pci_name(mci->pdev)); | 1742 | mci->mod_name, mci->ctl_name, dev_name(mci->dev)); |
1491 | 1743 | ||
1492 | up(&mem_ctls_mutex); | 1744 | up(&mem_ctls_mutex); |
1493 | return 0; | 1745 | return 0; |
@@ -1504,18 +1756,18 @@ EXPORT_SYMBOL_GPL(edac_mc_add_mc); | |||
1504 | /** | 1756 | /** |
1505 | * edac_mc_del_mc: Remove sysfs entries for specified mci structure and | 1757 | * edac_mc_del_mc: Remove sysfs entries for specified mci structure and |
1506 | * remove mci structure from global list | 1758 | * remove mci structure from global list |
1507 | * @pdev: Pointer to 'struct pci_dev' representing mci structure to remove. | 1759 | * @pdev: Pointer to 'struct device' representing mci structure to remove. |
1508 | * | 1760 | * |
1509 | * Return pointer to removed mci structure, or NULL if device not found. | 1761 | * Return pointer to removed mci structure, or NULL if device not found. |
1510 | */ | 1762 | */ |
1511 | struct mem_ctl_info * edac_mc_del_mc(struct pci_dev *pdev) | 1763 | struct mem_ctl_info * edac_mc_del_mc(struct device *dev) |
1512 | { | 1764 | { |
1513 | struct mem_ctl_info *mci; | 1765 | struct mem_ctl_info *mci; |
1514 | 1766 | ||
1515 | debugf0("MC: %s()\n", __func__); | 1767 | debugf0("MC: %s()\n", __func__); |
1516 | down(&mem_ctls_mutex); | 1768 | down(&mem_ctls_mutex); |
1517 | 1769 | ||
1518 | if ((mci = find_mci_by_pdev(pdev)) == NULL) { | 1770 | if ((mci = find_mci_by_dev(dev)) == NULL) { |
1519 | up(&mem_ctls_mutex); | 1771 | up(&mem_ctls_mutex); |
1520 | return NULL; | 1772 | return NULL; |
1521 | } | 1773 | } |
@@ -1524,8 +1776,8 @@ struct mem_ctl_info * edac_mc_del_mc(struct pci_dev *pdev) | |||
1524 | del_mc_from_global_list(mci); | 1776 | del_mc_from_global_list(mci); |
1525 | up(&mem_ctls_mutex); | 1777 | up(&mem_ctls_mutex); |
1526 | edac_printk(KERN_INFO, EDAC_MC, | 1778 | edac_printk(KERN_INFO, EDAC_MC, |
1527 | "Removed device %d for %s %s: PCI %s\n", mci->mc_idx, | 1779 | "Removed device %d for %s %s: DEV %s\n", mci->mc_idx, |
1528 | mci->mod_name, mci->ctl_name, pci_name(mci->pdev)); | 1780 | mci->mod_name, mci->ctl_name, dev_name(mci->dev)); |
1529 | return mci; | 1781 | return mci; |
1530 | } | 1782 | } |
1531 | EXPORT_SYMBOL_GPL(edac_mc_del_mc); | 1783 | EXPORT_SYMBOL_GPL(edac_mc_del_mc); |
@@ -1739,244 +1991,6 @@ void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg) | |||
1739 | } | 1991 | } |
1740 | EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info); | 1992 | EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info); |
1741 | 1993 | ||
1742 | #ifdef CONFIG_PCI | ||
1743 | |||
1744 | static u16 get_pci_parity_status(struct pci_dev *dev, int secondary) | ||
1745 | { | ||
1746 | int where; | ||
1747 | u16 status; | ||
1748 | |||
1749 | where = secondary ? PCI_SEC_STATUS : PCI_STATUS; | ||
1750 | pci_read_config_word(dev, where, &status); | ||
1751 | |||
1752 | /* If we get back 0xFFFF then we must suspect that the card has been | ||
1753 | * pulled but the Linux PCI layer has not yet finished cleaning up. | ||
1754 | * We don't want to report on such devices | ||
1755 | */ | ||
1756 | |||
1757 | if (status == 0xFFFF) { | ||
1758 | u32 sanity; | ||
1759 | |||
1760 | pci_read_config_dword(dev, 0, &sanity); | ||
1761 | |||
1762 | if (sanity == 0xFFFFFFFF) | ||
1763 | return 0; | ||
1764 | } | ||
1765 | |||
1766 | status &= PCI_STATUS_DETECTED_PARITY | PCI_STATUS_SIG_SYSTEM_ERROR | | ||
1767 | PCI_STATUS_PARITY; | ||
1768 | |||
1769 | if (status) | ||
1770 | /* reset only the bits we are interested in */ | ||
1771 | pci_write_config_word(dev, where, status); | ||
1772 | |||
1773 | return status; | ||
1774 | } | ||
1775 | |||
1776 | typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev); | ||
1777 | |||
1778 | /* Clear any PCI parity errors logged by this device. */ | ||
1779 | static void edac_pci_dev_parity_clear(struct pci_dev *dev) | ||
1780 | { | ||
1781 | u8 header_type; | ||
1782 | |||
1783 | get_pci_parity_status(dev, 0); | ||
1784 | |||
1785 | /* read the device TYPE, looking for bridges */ | ||
1786 | pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); | ||
1787 | |||
1788 | if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) | ||
1789 | get_pci_parity_status(dev, 1); | ||
1790 | } | ||
1791 | |||
1792 | /* | ||
1793 | * PCI Parity polling | ||
1794 | * | ||
1795 | */ | ||
1796 | static void edac_pci_dev_parity_test(struct pci_dev *dev) | ||
1797 | { | ||
1798 | u16 status; | ||
1799 | u8 header_type; | ||
1800 | |||
1801 | /* read the STATUS register on this device | ||
1802 | */ | ||
1803 | status = get_pci_parity_status(dev, 0); | ||
1804 | |||
1805 | debugf2("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id ); | ||
1806 | |||
1807 | /* check the status reg for errors */ | ||
1808 | if (status) { | ||
1809 | if (status & (PCI_STATUS_SIG_SYSTEM_ERROR)) | ||
1810 | edac_printk(KERN_CRIT, EDAC_PCI, | ||
1811 | "Signaled System Error on %s\n", | ||
1812 | pci_name(dev)); | ||
1813 | |||
1814 | if (status & (PCI_STATUS_PARITY)) { | ||
1815 | edac_printk(KERN_CRIT, EDAC_PCI, | ||
1816 | "Master Data Parity Error on %s\n", | ||
1817 | pci_name(dev)); | ||
1818 | |||
1819 | atomic_inc(&pci_parity_count); | ||
1820 | } | ||
1821 | |||
1822 | if (status & (PCI_STATUS_DETECTED_PARITY)) { | ||
1823 | edac_printk(KERN_CRIT, EDAC_PCI, | ||
1824 | "Detected Parity Error on %s\n", | ||
1825 | pci_name(dev)); | ||
1826 | |||
1827 | atomic_inc(&pci_parity_count); | ||
1828 | } | ||
1829 | } | ||
1830 | |||
1831 | /* read the device TYPE, looking for bridges */ | ||
1832 | pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); | ||
1833 | |||
1834 | debugf2("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id ); | ||
1835 | |||
1836 | if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) { | ||
1837 | /* On bridges, need to examine secondary status register */ | ||
1838 | status = get_pci_parity_status(dev, 1); | ||
1839 | |||
1840 | debugf2("PCI SEC_STATUS= 0x%04x %s\n", | ||
1841 | status, dev->dev.bus_id ); | ||
1842 | |||
1843 | /* check the secondary status reg for errors */ | ||
1844 | if (status) { | ||
1845 | if (status & (PCI_STATUS_SIG_SYSTEM_ERROR)) | ||
1846 | edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " | ||
1847 | "Signaled System Error on %s\n", | ||
1848 | pci_name(dev)); | ||
1849 | |||
1850 | if (status & (PCI_STATUS_PARITY)) { | ||
1851 | edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " | ||
1852 | "Master Data Parity Error on " | ||
1853 | "%s\n", pci_name(dev)); | ||
1854 | |||
1855 | atomic_inc(&pci_parity_count); | ||
1856 | } | ||
1857 | |||
1858 | if (status & (PCI_STATUS_DETECTED_PARITY)) { | ||
1859 | edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " | ||
1860 | "Detected Parity Error on %s\n", | ||
1861 | pci_name(dev)); | ||
1862 | |||
1863 | atomic_inc(&pci_parity_count); | ||
1864 | } | ||
1865 | } | ||
1866 | } | ||
1867 | } | ||
1868 | |||
1869 | /* | ||
1870 | * check_dev_on_list: Scan for a PCI device on a white/black list | ||
1871 | * @list: an EDAC &edac_pci_device_list white/black list pointer | ||
1872 | * @free_index: index of next free entry on the list | ||
1873 | * @pci_dev: PCI Device pointer | ||
1874 | * | ||
1875 | * see if list contains the device. | ||
1876 | * | ||
1877 | * Returns: 0 not found | ||
1878 | * 1 found on list | ||
1879 | */ | ||
1880 | static int check_dev_on_list(struct edac_pci_device_list *list, | ||
1881 | int free_index, struct pci_dev *dev) | ||
1882 | { | ||
1883 | int i; | ||
1884 | int rc = 0; /* Assume not found */ | ||
1885 | unsigned short vendor=dev->vendor; | ||
1886 | unsigned short device=dev->device; | ||
1887 | |||
1888 | /* Scan the list, looking for a vendor/device match */ | ||
1889 | for (i = 0; i < free_index; i++, list++ ) { | ||
1890 | if ((list->vendor == vendor ) && (list->device == device )) { | ||
1891 | rc = 1; | ||
1892 | break; | ||
1893 | } | ||
1894 | } | ||
1895 | |||
1896 | return rc; | ||
1897 | } | ||
1898 | |||
1899 | /* | ||
1900 | * pci_dev parity list iterator | ||
1901 | * Scan the PCI device list for one iteration, looking for SERRORs | ||
1902 | * Master Parity ERRORS or Parity ERRORs on primary or secondary devices | ||
1903 | */ | ||
1904 | static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn) | ||
1905 | { | ||
1906 | struct pci_dev *dev = NULL; | ||
1907 | |||
1908 | /* request for kernel access to the next PCI device, if any, | ||
1909 | * and while we are looking at it have its reference count | ||
1910 | * bumped until we are done with it | ||
1911 | */ | ||
1912 | while((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | ||
1913 | /* if whitelist exists then it has priority, so only scan | ||
1914 | * those devices on the whitelist | ||
1915 | */ | ||
1916 | if (pci_whitelist_count > 0 ) { | ||
1917 | if (check_dev_on_list(pci_whitelist, | ||
1918 | pci_whitelist_count, dev)) | ||
1919 | fn(dev); | ||
1920 | } else { | ||
1921 | /* | ||
1922 | * if no whitelist, then check if this devices is | ||
1923 | * blacklisted | ||
1924 | */ | ||
1925 | if (!check_dev_on_list(pci_blacklist, | ||
1926 | pci_blacklist_count, dev)) | ||
1927 | fn(dev); | ||
1928 | } | ||
1929 | } | ||
1930 | } | ||
1931 | |||
1932 | static void do_pci_parity_check(void) | ||
1933 | { | ||
1934 | unsigned long flags; | ||
1935 | int before_count; | ||
1936 | |||
1937 | debugf3("%s()\n", __func__); | ||
1938 | |||
1939 | if (!check_pci_parity) | ||
1940 | return; | ||
1941 | |||
1942 | before_count = atomic_read(&pci_parity_count); | ||
1943 | |||
1944 | /* scan all PCI devices looking for a Parity Error on devices and | ||
1945 | * bridges | ||
1946 | */ | ||
1947 | local_irq_save(flags); | ||
1948 | edac_pci_dev_parity_iterator(edac_pci_dev_parity_test); | ||
1949 | local_irq_restore(flags); | ||
1950 | |||
1951 | /* Only if operator has selected panic on PCI Error */ | ||
1952 | if (panic_on_pci_parity) { | ||
1953 | /* If the count is different 'after' from 'before' */ | ||
1954 | if (before_count != atomic_read(&pci_parity_count)) | ||
1955 | panic("EDAC: PCI Parity Error"); | ||
1956 | } | ||
1957 | } | ||
1958 | |||
1959 | static inline void clear_pci_parity_errors(void) | ||
1960 | { | ||
1961 | /* Clear any PCI bus parity errors that devices initially have logged | ||
1962 | * in their registers. | ||
1963 | */ | ||
1964 | edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear); | ||
1965 | } | ||
1966 | |||
1967 | #else /* CONFIG_PCI */ | ||
1968 | |||
1969 | static inline void do_pci_parity_check(void) | ||
1970 | { | ||
1971 | /* no-op */ | ||
1972 | } | ||
1973 | |||
1974 | static inline void clear_pci_parity_errors(void) | ||
1975 | { | ||
1976 | /* no-op */ | ||
1977 | } | ||
1978 | |||
1979 | #endif /* CONFIG_PCI */ | ||
1980 | 1994 | ||
1981 | /* | 1995 | /* |
1982 | * Iterate over all MC instances and check for ECC, et al, errors | 1996 | * Iterate over all MC instances and check for ECC, et al, errors |
@@ -2096,10 +2110,12 @@ MODULE_DESCRIPTION("Core library routines for MC reporting"); | |||
2096 | 2110 | ||
2097 | module_param(panic_on_ue, int, 0644); | 2111 | module_param(panic_on_ue, int, 0644); |
2098 | MODULE_PARM_DESC(panic_on_ue, "Panic on uncorrected error: 0=off 1=on"); | 2112 | MODULE_PARM_DESC(panic_on_ue, "Panic on uncorrected error: 0=off 1=on"); |
2113 | #ifdef CONFIG_PCI | ||
2099 | module_param(check_pci_parity, int, 0644); | 2114 | module_param(check_pci_parity, int, 0644); |
2100 | MODULE_PARM_DESC(check_pci_parity, "Check for PCI bus parity errors: 0=off 1=on"); | 2115 | MODULE_PARM_DESC(check_pci_parity, "Check for PCI bus parity errors: 0=off 1=on"); |
2101 | module_param(panic_on_pci_parity, int, 0644); | 2116 | module_param(panic_on_pci_parity, int, 0644); |
2102 | MODULE_PARM_DESC(panic_on_pci_parity, "Panic on PCI Bus Parity error: 0=off 1=on"); | 2117 | MODULE_PARM_DESC(panic_on_pci_parity, "Panic on PCI Bus Parity error: 0=off 1=on"); |
2118 | #endif | ||
2103 | module_param(log_ue, int, 0644); | 2119 | module_param(log_ue, int, 0644); |
2104 | MODULE_PARM_DESC(log_ue, "Log uncorrectable error to console: 0=off 1=on"); | 2120 | MODULE_PARM_DESC(log_ue, "Log uncorrectable error to console: 0=off 1=on"); |
2105 | module_param(log_ce, int, 0644); | 2121 | module_param(log_ce, int, 0644); |