diff options
Diffstat (limited to 'drivers/edac/edac_mc.c')
-rw-r--r-- | drivers/edac/edac_mc.c | 589 |
1 files changed, 303 insertions, 286 deletions
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c index 0499782db7c7..3a7cfe88b169 100644 --- a/drivers/edac/edac_mc.c +++ b/drivers/edac/edac_mc.c | |||
@@ -53,16 +53,17 @@ static int log_ce = 1; | |||
53 | static int panic_on_ue; | 53 | static int panic_on_ue; |
54 | static int poll_msec = 1000; | 54 | static int poll_msec = 1000; |
55 | 55 | ||
56 | static int check_pci_parity = 0; /* default YES check PCI parity */ | ||
57 | static int panic_on_pci_parity; /* default no panic on PCI Parity */ | ||
58 | static atomic_t pci_parity_count = ATOMIC_INIT(0); | ||
59 | |||
60 | /* lock to memory controller's control array */ | 56 | /* lock to memory controller's control array */ |
61 | static DECLARE_MUTEX(mem_ctls_mutex); | 57 | static DECLARE_MUTEX(mem_ctls_mutex); |
62 | static struct list_head mc_devices = LIST_HEAD_INIT(mc_devices); | 58 | static struct list_head mc_devices = LIST_HEAD_INIT(mc_devices); |
63 | 59 | ||
64 | static struct task_struct *edac_thread; | 60 | static struct task_struct *edac_thread; |
65 | 61 | ||
62 | #ifdef CONFIG_PCI | ||
63 | static int check_pci_parity = 0; /* default YES check PCI parity */ | ||
64 | static int panic_on_pci_parity; /* default no panic on PCI Parity */ | ||
65 | static atomic_t pci_parity_count = ATOMIC_INIT(0); | ||
66 | |||
66 | /* Structure of the whitelist and blacklist arrays */ | 67 | /* Structure of the whitelist and blacklist arrays */ |
67 | struct edac_pci_device_list { | 68 | struct edac_pci_device_list { |
68 | unsigned int vendor; /* Vendor ID */ | 69 | unsigned int vendor; /* Vendor ID */ |
@@ -79,6 +80,12 @@ static int pci_blacklist_count; | |||
79 | static struct edac_pci_device_list pci_whitelist[MAX_LISTED_PCI_DEVICES]; | 80 | static struct edac_pci_device_list pci_whitelist[MAX_LISTED_PCI_DEVICES]; |
80 | static int pci_whitelist_count ; | 81 | static int pci_whitelist_count ; |
81 | 82 | ||
83 | #ifndef DISABLE_EDAC_SYSFS | ||
84 | static struct kobject edac_pci_kobj; /* /sys/devices/system/edac/pci */ | ||
85 | static struct completion edac_pci_kobj_complete; | ||
86 | #endif /* DISABLE_EDAC_SYSFS */ | ||
87 | #endif /* CONFIG_PCI */ | ||
88 | |||
82 | /* START sysfs data and methods */ | 89 | /* START sysfs data and methods */ |
83 | 90 | ||
84 | #ifndef DISABLE_EDAC_SYSFS | 91 | #ifndef DISABLE_EDAC_SYSFS |
@@ -126,18 +133,15 @@ static struct sysdev_class edac_class = { | |||
126 | set_kset_name("edac"), | 133 | set_kset_name("edac"), |
127 | }; | 134 | }; |
128 | 135 | ||
129 | /* sysfs objects: | 136 | /* sysfs object: |
130 | * /sys/devices/system/edac/mc | 137 | * /sys/devices/system/edac/mc |
131 | * /sys/devices/system/edac/pci | ||
132 | */ | 138 | */ |
133 | static struct kobject edac_memctrl_kobj; | 139 | static struct kobject edac_memctrl_kobj; |
134 | static struct kobject edac_pci_kobj; | ||
135 | 140 | ||
136 | /* We use these to wait for the reference counts on edac_memctrl_kobj and | 141 | /* We use these to wait for the reference counts on edac_memctrl_kobj and |
137 | * edac_pci_kobj to reach 0. | 142 | * edac_pci_kobj to reach 0. |
138 | */ | 143 | */ |
139 | static struct completion edac_memctrl_kobj_complete; | 144 | static struct completion edac_memctrl_kobj_complete; |
140 | static struct completion edac_pci_kobj_complete; | ||
141 | 145 | ||
142 | /* | 146 | /* |
143 | * /sys/devices/system/edac/mc; | 147 | * /sys/devices/system/edac/mc; |
@@ -323,6 +327,8 @@ static void edac_sysfs_memctrl_teardown(void) | |||
323 | #endif /* DISABLE_EDAC_SYSFS */ | 327 | #endif /* DISABLE_EDAC_SYSFS */ |
324 | } | 328 | } |
325 | 329 | ||
330 | #ifdef CONFIG_PCI | ||
331 | |||
326 | #ifndef DISABLE_EDAC_SYSFS | 332 | #ifndef DISABLE_EDAC_SYSFS |
327 | 333 | ||
328 | /* | 334 | /* |
@@ -623,6 +629,252 @@ static void edac_sysfs_pci_teardown(void) | |||
623 | #endif | 629 | #endif |
624 | } | 630 | } |
625 | 631 | ||
632 | |||
633 | static u16 get_pci_parity_status(struct pci_dev *dev, int secondary) | ||
634 | { | ||
635 | int where; | ||
636 | u16 status; | ||
637 | |||
638 | where = secondary ? PCI_SEC_STATUS : PCI_STATUS; | ||
639 | pci_read_config_word(dev, where, &status); | ||
640 | |||
641 | /* If we get back 0xFFFF then we must suspect that the card has been | ||
642 | * pulled but the Linux PCI layer has not yet finished cleaning up. | ||
643 | * We don't want to report on such devices | ||
644 | */ | ||
645 | |||
646 | if (status == 0xFFFF) { | ||
647 | u32 sanity; | ||
648 | |||
649 | pci_read_config_dword(dev, 0, &sanity); | ||
650 | |||
651 | if (sanity == 0xFFFFFFFF) | ||
652 | return 0; | ||
653 | } | ||
654 | |||
655 | status &= PCI_STATUS_DETECTED_PARITY | PCI_STATUS_SIG_SYSTEM_ERROR | | ||
656 | PCI_STATUS_PARITY; | ||
657 | |||
658 | if (status) | ||
659 | /* reset only the bits we are interested in */ | ||
660 | pci_write_config_word(dev, where, status); | ||
661 | |||
662 | return status; | ||
663 | } | ||
664 | |||
665 | typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev); | ||
666 | |||
667 | /* Clear any PCI parity errors logged by this device. */ | ||
668 | static void edac_pci_dev_parity_clear(struct pci_dev *dev) | ||
669 | { | ||
670 | u8 header_type; | ||
671 | |||
672 | get_pci_parity_status(dev, 0); | ||
673 | |||
674 | /* read the device TYPE, looking for bridges */ | ||
675 | pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); | ||
676 | |||
677 | if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) | ||
678 | get_pci_parity_status(dev, 1); | ||
679 | } | ||
680 | |||
681 | /* | ||
682 | * PCI Parity polling | ||
683 | * | ||
684 | */ | ||
685 | static void edac_pci_dev_parity_test(struct pci_dev *dev) | ||
686 | { | ||
687 | u16 status; | ||
688 | u8 header_type; | ||
689 | |||
690 | /* read the STATUS register on this device | ||
691 | */ | ||
692 | status = get_pci_parity_status(dev, 0); | ||
693 | |||
694 | debugf2("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id ); | ||
695 | |||
696 | /* check the status reg for errors */ | ||
697 | if (status) { | ||
698 | if (status & (PCI_STATUS_SIG_SYSTEM_ERROR)) | ||
699 | edac_printk(KERN_CRIT, EDAC_PCI, | ||
700 | "Signaled System Error on %s\n", | ||
701 | pci_name(dev)); | ||
702 | |||
703 | if (status & (PCI_STATUS_PARITY)) { | ||
704 | edac_printk(KERN_CRIT, EDAC_PCI, | ||
705 | "Master Data Parity Error on %s\n", | ||
706 | pci_name(dev)); | ||
707 | |||
708 | atomic_inc(&pci_parity_count); | ||
709 | } | ||
710 | |||
711 | if (status & (PCI_STATUS_DETECTED_PARITY)) { | ||
712 | edac_printk(KERN_CRIT, EDAC_PCI, | ||
713 | "Detected Parity Error on %s\n", | ||
714 | pci_name(dev)); | ||
715 | |||
716 | atomic_inc(&pci_parity_count); | ||
717 | } | ||
718 | } | ||
719 | |||
720 | /* read the device TYPE, looking for bridges */ | ||
721 | pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); | ||
722 | |||
723 | debugf2("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id ); | ||
724 | |||
725 | if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) { | ||
726 | /* On bridges, need to examine secondary status register */ | ||
727 | status = get_pci_parity_status(dev, 1); | ||
728 | |||
729 | debugf2("PCI SEC_STATUS= 0x%04x %s\n", | ||
730 | status, dev->dev.bus_id ); | ||
731 | |||
732 | /* check the secondary status reg for errors */ | ||
733 | if (status) { | ||
734 | if (status & (PCI_STATUS_SIG_SYSTEM_ERROR)) | ||
735 | edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " | ||
736 | "Signaled System Error on %s\n", | ||
737 | pci_name(dev)); | ||
738 | |||
739 | if (status & (PCI_STATUS_PARITY)) { | ||
740 | edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " | ||
741 | "Master Data Parity Error on " | ||
742 | "%s\n", pci_name(dev)); | ||
743 | |||
744 | atomic_inc(&pci_parity_count); | ||
745 | } | ||
746 | |||
747 | if (status & (PCI_STATUS_DETECTED_PARITY)) { | ||
748 | edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " | ||
749 | "Detected Parity Error on %s\n", | ||
750 | pci_name(dev)); | ||
751 | |||
752 | atomic_inc(&pci_parity_count); | ||
753 | } | ||
754 | } | ||
755 | } | ||
756 | } | ||
757 | |||
758 | /* | ||
759 | * check_dev_on_list: Scan for a PCI device on a white/black list | ||
760 | * @list: an EDAC &edac_pci_device_list white/black list pointer | ||
761 | * @free_index: index of next free entry on the list | ||
762 | * @pci_dev: PCI Device pointer | ||
763 | * | ||
764 | * see if list contains the device. | ||
765 | * | ||
766 | * Returns: 0 not found | ||
767 | * 1 found on list | ||
768 | */ | ||
769 | static int check_dev_on_list(struct edac_pci_device_list *list, | ||
770 | int free_index, struct pci_dev *dev) | ||
771 | { | ||
772 | int i; | ||
773 | int rc = 0; /* Assume not found */ | ||
774 | unsigned short vendor=dev->vendor; | ||
775 | unsigned short device=dev->device; | ||
776 | |||
777 | /* Scan the list, looking for a vendor/device match */ | ||
778 | for (i = 0; i < free_index; i++, list++ ) { | ||
779 | if ((list->vendor == vendor ) && (list->device == device )) { | ||
780 | rc = 1; | ||
781 | break; | ||
782 | } | ||
783 | } | ||
784 | |||
785 | return rc; | ||
786 | } | ||
787 | |||
788 | /* | ||
789 | * pci_dev parity list iterator | ||
790 | * Scan the PCI device list for one iteration, looking for SERRORs | ||
791 | * Master Parity ERRORS or Parity ERRORs on primary or secondary devices | ||
792 | */ | ||
793 | static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn) | ||
794 | { | ||
795 | struct pci_dev *dev = NULL; | ||
796 | |||
797 | /* request for kernel access to the next PCI device, if any, | ||
798 | * and while we are looking at it have its reference count | ||
799 | * bumped until we are done with it | ||
800 | */ | ||
801 | while((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | ||
802 | /* if whitelist exists then it has priority, so only scan | ||
803 | * those devices on the whitelist | ||
804 | */ | ||
805 | if (pci_whitelist_count > 0 ) { | ||
806 | if (check_dev_on_list(pci_whitelist, | ||
807 | pci_whitelist_count, dev)) | ||
808 | fn(dev); | ||
809 | } else { | ||
810 | /* | ||
811 | * if no whitelist, then check if this devices is | ||
812 | * blacklisted | ||
813 | */ | ||
814 | if (!check_dev_on_list(pci_blacklist, | ||
815 | pci_blacklist_count, dev)) | ||
816 | fn(dev); | ||
817 | } | ||
818 | } | ||
819 | } | ||
820 | |||
821 | static void do_pci_parity_check(void) | ||
822 | { | ||
823 | unsigned long flags; | ||
824 | int before_count; | ||
825 | |||
826 | debugf3("%s()\n", __func__); | ||
827 | |||
828 | if (!check_pci_parity) | ||
829 | return; | ||
830 | |||
831 | before_count = atomic_read(&pci_parity_count); | ||
832 | |||
833 | /* scan all PCI devices looking for a Parity Error on devices and | ||
834 | * bridges | ||
835 | */ | ||
836 | local_irq_save(flags); | ||
837 | edac_pci_dev_parity_iterator(edac_pci_dev_parity_test); | ||
838 | local_irq_restore(flags); | ||
839 | |||
840 | /* Only if operator has selected panic on PCI Error */ | ||
841 | if (panic_on_pci_parity) { | ||
842 | /* If the count is different 'after' from 'before' */ | ||
843 | if (before_count != atomic_read(&pci_parity_count)) | ||
844 | panic("EDAC: PCI Parity Error"); | ||
845 | } | ||
846 | } | ||
847 | |||
848 | static inline void clear_pci_parity_errors(void) | ||
849 | { | ||
850 | /* Clear any PCI bus parity errors that devices initially have logged | ||
851 | * in their registers. | ||
852 | */ | ||
853 | edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear); | ||
854 | } | ||
855 | |||
856 | #else /* CONFIG_PCI */ | ||
857 | |||
858 | static inline void do_pci_parity_check(void) | ||
859 | { | ||
860 | /* no-op */ | ||
861 | } | ||
862 | |||
863 | static inline void clear_pci_parity_errors(void) | ||
864 | { | ||
865 | /* no-op */ | ||
866 | } | ||
867 | |||
868 | static void edac_sysfs_pci_teardown(void) | ||
869 | { | ||
870 | } | ||
871 | |||
872 | static int edac_sysfs_pci_setup(void) | ||
873 | { | ||
874 | return 0; | ||
875 | } | ||
876 | #endif /* CONFIG_PCI */ | ||
877 | |||
626 | #ifndef DISABLE_EDAC_SYSFS | 878 | #ifndef DISABLE_EDAC_SYSFS |
627 | 879 | ||
628 | /* EDAC sysfs CSROW data structures and methods */ | 880 | /* EDAC sysfs CSROW data structures and methods */ |
@@ -1131,7 +1383,7 @@ static int edac_create_sysfs_mci_device(struct mem_ctl_info *mci) | |||
1131 | return err; | 1383 | return err; |
1132 | 1384 | ||
1133 | /* create a symlink for the device */ | 1385 | /* create a symlink for the device */ |
1134 | err = sysfs_create_link(edac_mci_kobj, &mci->pdev->dev.kobj, | 1386 | err = sysfs_create_link(edac_mci_kobj, &mci->dev->kobj, |
1135 | EDAC_DEVICE_SYMLINK); | 1387 | EDAC_DEVICE_SYMLINK); |
1136 | 1388 | ||
1137 | if (err) | 1389 | if (err) |
@@ -1237,7 +1489,7 @@ void edac_mc_dump_mci(struct mem_ctl_info *mci) | |||
1237 | debugf4("\tmci->edac_check = %p\n", mci->edac_check); | 1489 | debugf4("\tmci->edac_check = %p\n", mci->edac_check); |
1238 | debugf3("\tmci->nr_csrows = %d, csrows = %p\n", | 1490 | debugf3("\tmci->nr_csrows = %d, csrows = %p\n", |
1239 | mci->nr_csrows, mci->csrows); | 1491 | mci->nr_csrows, mci->csrows); |
1240 | debugf3("\tpdev = %p\n", mci->pdev); | 1492 | debugf3("\tdev = %p\n", mci->dev); |
1241 | debugf3("\tmod_name:ctl_name = %s:%s\n", | 1493 | debugf3("\tmod_name:ctl_name = %s:%s\n", |
1242 | mci->mod_name, mci->ctl_name); | 1494 | mci->mod_name, mci->ctl_name); |
1243 | debugf3("\tpvt_info = %p\n\n", mci->pvt_info); | 1495 | debugf3("\tpvt_info = %p\n\n", mci->pvt_info); |
@@ -1362,7 +1614,7 @@ void edac_mc_free(struct mem_ctl_info *mci) | |||
1362 | } | 1614 | } |
1363 | EXPORT_SYMBOL_GPL(edac_mc_free); | 1615 | EXPORT_SYMBOL_GPL(edac_mc_free); |
1364 | 1616 | ||
1365 | static struct mem_ctl_info *find_mci_by_pdev(struct pci_dev *pdev) | 1617 | static struct mem_ctl_info *find_mci_by_dev(struct device *dev) |
1366 | { | 1618 | { |
1367 | struct mem_ctl_info *mci; | 1619 | struct mem_ctl_info *mci; |
1368 | struct list_head *item; | 1620 | struct list_head *item; |
@@ -1372,54 +1624,53 @@ static struct mem_ctl_info *find_mci_by_pdev(struct pci_dev *pdev) | |||
1372 | list_for_each(item, &mc_devices) { | 1624 | list_for_each(item, &mc_devices) { |
1373 | mci = list_entry(item, struct mem_ctl_info, link); | 1625 | mci = list_entry(item, struct mem_ctl_info, link); |
1374 | 1626 | ||
1375 | if (mci->pdev == pdev) | 1627 | if (mci->dev == dev) |
1376 | return mci; | 1628 | return mci; |
1377 | } | 1629 | } |
1378 | 1630 | ||
1379 | return NULL; | 1631 | return NULL; |
1380 | } | 1632 | } |
1381 | 1633 | ||
1382 | static int add_mc_to_global_list(struct mem_ctl_info *mci) | 1634 | /* Return 0 on success, 1 on failure. |
1635 | * Before calling this function, caller must | ||
1636 | * assign a unique value to mci->mc_idx. | ||
1637 | */ | ||
1638 | static int add_mc_to_global_list (struct mem_ctl_info *mci) | ||
1383 | { | 1639 | { |
1384 | struct list_head *item, *insert_before; | 1640 | struct list_head *item, *insert_before; |
1385 | struct mem_ctl_info *p; | 1641 | struct mem_ctl_info *p; |
1386 | int i; | ||
1387 | 1642 | ||
1388 | if (list_empty(&mc_devices)) { | 1643 | insert_before = &mc_devices; |
1389 | mci->mc_idx = 0; | ||
1390 | insert_before = &mc_devices; | ||
1391 | } else { | ||
1392 | if (find_mci_by_pdev(mci->pdev)) { | ||
1393 | edac_printk(KERN_WARNING, EDAC_MC, | ||
1394 | "%s (%s) %s %s already assigned %d\n", | ||
1395 | mci->pdev->dev.bus_id, | ||
1396 | pci_name(mci->pdev), mci->mod_name, | ||
1397 | mci->ctl_name, mci->mc_idx); | ||
1398 | return 1; | ||
1399 | } | ||
1400 | 1644 | ||
1401 | insert_before = NULL; | 1645 | if (unlikely((p = find_mci_by_dev(mci->dev)) != NULL)) |
1402 | i = 0; | 1646 | goto fail0; |
1403 | 1647 | ||
1404 | list_for_each(item, &mc_devices) { | 1648 | list_for_each(item, &mc_devices) { |
1405 | p = list_entry(item, struct mem_ctl_info, link); | 1649 | p = list_entry(item, struct mem_ctl_info, link); |
1406 | 1650 | ||
1407 | if (p->mc_idx != i) { | 1651 | if (p->mc_idx >= mci->mc_idx) { |
1408 | insert_before = item; | 1652 | if (unlikely(p->mc_idx == mci->mc_idx)) |
1409 | break; | 1653 | goto fail1; |
1410 | } | ||
1411 | 1654 | ||
1412 | i++; | 1655 | insert_before = item; |
1656 | break; | ||
1413 | } | 1657 | } |
1414 | |||
1415 | mci->mc_idx = i; | ||
1416 | |||
1417 | if (insert_before == NULL) | ||
1418 | insert_before = &mc_devices; | ||
1419 | } | 1658 | } |
1420 | 1659 | ||
1421 | list_add_tail_rcu(&mci->link, insert_before); | 1660 | list_add_tail_rcu(&mci->link, insert_before); |
1422 | return 0; | 1661 | return 0; |
1662 | |||
1663 | fail0: | ||
1664 | edac_printk(KERN_WARNING, EDAC_MC, | ||
1665 | "%s (%s) %s %s already assigned %d\n", p->dev->bus_id, | ||
1666 | dev_name(p->dev), p->mod_name, p->ctl_name, p->mc_idx); | ||
1667 | return 1; | ||
1668 | |||
1669 | fail1: | ||
1670 | edac_printk(KERN_WARNING, EDAC_MC, | ||
1671 | "bug in low-level driver: attempt to assign\n" | ||
1672 | " duplicate mc_idx %d in %s()\n", p->mc_idx, __func__); | ||
1673 | return 1; | ||
1423 | } | 1674 | } |
1424 | 1675 | ||
1425 | static void complete_mc_list_del(struct rcu_head *head) | 1676 | static void complete_mc_list_del(struct rcu_head *head) |
@@ -1443,6 +1694,7 @@ static void del_mc_from_global_list(struct mem_ctl_info *mci) | |||
1443 | * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and | 1694 | * edac_mc_add_mc: Insert the 'mci' structure into the mci global list and |
1444 | * create sysfs entries associated with mci structure | 1695 | * create sysfs entries associated with mci structure |
1445 | * @mci: pointer to the mci structure to be added to the list | 1696 | * @mci: pointer to the mci structure to be added to the list |
1697 | * @mc_idx: A unique numeric identifier to be assigned to the 'mci' structure. | ||
1446 | * | 1698 | * |
1447 | * Return: | 1699 | * Return: |
1448 | * 0 Success | 1700 | * 0 Success |
@@ -1450,9 +1702,10 @@ static void del_mc_from_global_list(struct mem_ctl_info *mci) | |||
1450 | */ | 1702 | */ |
1451 | 1703 | ||
1452 | /* FIXME - should a warning be printed if no error detection? correction? */ | 1704 | /* FIXME - should a warning be printed if no error detection? correction? */ |
1453 | int edac_mc_add_mc(struct mem_ctl_info *mci) | 1705 | int edac_mc_add_mc(struct mem_ctl_info *mci, int mc_idx) |
1454 | { | 1706 | { |
1455 | debugf0("%s()\n", __func__); | 1707 | debugf0("%s()\n", __func__); |
1708 | mci->mc_idx = mc_idx; | ||
1456 | #ifdef CONFIG_EDAC_DEBUG | 1709 | #ifdef CONFIG_EDAC_DEBUG |
1457 | if (edac_debug_level >= 3) | 1710 | if (edac_debug_level >= 3) |
1458 | edac_mc_dump_mci(mci); | 1711 | edac_mc_dump_mci(mci); |
@@ -1485,8 +1738,8 @@ int edac_mc_add_mc(struct mem_ctl_info *mci) | |||
1485 | } | 1738 | } |
1486 | 1739 | ||
1487 | /* Report action taken */ | 1740 | /* Report action taken */ |
1488 | edac_mc_printk(mci, KERN_INFO, "Giving out device to %s %s: PCI %s\n", | 1741 | edac_mc_printk(mci, KERN_INFO, "Giving out device to %s %s: DEV %s\n", |
1489 | mci->mod_name, mci->ctl_name, pci_name(mci->pdev)); | 1742 | mci->mod_name, mci->ctl_name, dev_name(mci->dev)); |
1490 | 1743 | ||
1491 | up(&mem_ctls_mutex); | 1744 | up(&mem_ctls_mutex); |
1492 | return 0; | 1745 | return 0; |
@@ -1503,18 +1756,18 @@ EXPORT_SYMBOL_GPL(edac_mc_add_mc); | |||
1503 | /** | 1756 | /** |
1504 | * edac_mc_del_mc: Remove sysfs entries for specified mci structure and | 1757 | * edac_mc_del_mc: Remove sysfs entries for specified mci structure and |
1505 | * remove mci structure from global list | 1758 | * remove mci structure from global list |
1506 | * @pdev: Pointer to 'struct pci_dev' representing mci structure to remove. | 1759 | * @pdev: Pointer to 'struct device' representing mci structure to remove. |
1507 | * | 1760 | * |
1508 | * Return pointer to removed mci structure, or NULL if device not found. | 1761 | * Return pointer to removed mci structure, or NULL if device not found. |
1509 | */ | 1762 | */ |
1510 | struct mem_ctl_info * edac_mc_del_mc(struct pci_dev *pdev) | 1763 | struct mem_ctl_info * edac_mc_del_mc(struct device *dev) |
1511 | { | 1764 | { |
1512 | struct mem_ctl_info *mci; | 1765 | struct mem_ctl_info *mci; |
1513 | 1766 | ||
1514 | debugf0("MC: %s()\n", __func__); | 1767 | debugf0("MC: %s()\n", __func__); |
1515 | down(&mem_ctls_mutex); | 1768 | down(&mem_ctls_mutex); |
1516 | 1769 | ||
1517 | if ((mci = find_mci_by_pdev(pdev)) == NULL) { | 1770 | if ((mci = find_mci_by_dev(dev)) == NULL) { |
1518 | up(&mem_ctls_mutex); | 1771 | up(&mem_ctls_mutex); |
1519 | return NULL; | 1772 | return NULL; |
1520 | } | 1773 | } |
@@ -1523,8 +1776,8 @@ struct mem_ctl_info * edac_mc_del_mc(struct pci_dev *pdev) | |||
1523 | del_mc_from_global_list(mci); | 1776 | del_mc_from_global_list(mci); |
1524 | up(&mem_ctls_mutex); | 1777 | up(&mem_ctls_mutex); |
1525 | edac_printk(KERN_INFO, EDAC_MC, | 1778 | edac_printk(KERN_INFO, EDAC_MC, |
1526 | "Removed device %d for %s %s: PCI %s\n", mci->mc_idx, | 1779 | "Removed device %d for %s %s: DEV %s\n", mci->mc_idx, |
1527 | mci->mod_name, mci->ctl_name, pci_name(mci->pdev)); | 1780 | mci->mod_name, mci->ctl_name, dev_name(mci->dev)); |
1528 | return mci; | 1781 | return mci; |
1529 | } | 1782 | } |
1530 | EXPORT_SYMBOL_GPL(edac_mc_del_mc); | 1783 | EXPORT_SYMBOL_GPL(edac_mc_del_mc); |
@@ -1738,244 +1991,6 @@ void edac_mc_handle_ue_no_info(struct mem_ctl_info *mci, const char *msg) | |||
1738 | } | 1991 | } |
1739 | EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info); | 1992 | EXPORT_SYMBOL_GPL(edac_mc_handle_ue_no_info); |
1740 | 1993 | ||
1741 | #ifdef CONFIG_PCI | ||
1742 | |||
1743 | static u16 get_pci_parity_status(struct pci_dev *dev, int secondary) | ||
1744 | { | ||
1745 | int where; | ||
1746 | u16 status; | ||
1747 | |||
1748 | where = secondary ? PCI_SEC_STATUS : PCI_STATUS; | ||
1749 | pci_read_config_word(dev, where, &status); | ||
1750 | |||
1751 | /* If we get back 0xFFFF then we must suspect that the card has been | ||
1752 | * pulled but the Linux PCI layer has not yet finished cleaning up. | ||
1753 | * We don't want to report on such devices | ||
1754 | */ | ||
1755 | |||
1756 | if (status == 0xFFFF) { | ||
1757 | u32 sanity; | ||
1758 | |||
1759 | pci_read_config_dword(dev, 0, &sanity); | ||
1760 | |||
1761 | if (sanity == 0xFFFFFFFF) | ||
1762 | return 0; | ||
1763 | } | ||
1764 | |||
1765 | status &= PCI_STATUS_DETECTED_PARITY | PCI_STATUS_SIG_SYSTEM_ERROR | | ||
1766 | PCI_STATUS_PARITY; | ||
1767 | |||
1768 | if (status) | ||
1769 | /* reset only the bits we are interested in */ | ||
1770 | pci_write_config_word(dev, where, status); | ||
1771 | |||
1772 | return status; | ||
1773 | } | ||
1774 | |||
1775 | typedef void (*pci_parity_check_fn_t) (struct pci_dev *dev); | ||
1776 | |||
1777 | /* Clear any PCI parity errors logged by this device. */ | ||
1778 | static void edac_pci_dev_parity_clear(struct pci_dev *dev) | ||
1779 | { | ||
1780 | u8 header_type; | ||
1781 | |||
1782 | get_pci_parity_status(dev, 0); | ||
1783 | |||
1784 | /* read the device TYPE, looking for bridges */ | ||
1785 | pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); | ||
1786 | |||
1787 | if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) | ||
1788 | get_pci_parity_status(dev, 1); | ||
1789 | } | ||
1790 | |||
1791 | /* | ||
1792 | * PCI Parity polling | ||
1793 | * | ||
1794 | */ | ||
1795 | static void edac_pci_dev_parity_test(struct pci_dev *dev) | ||
1796 | { | ||
1797 | u16 status; | ||
1798 | u8 header_type; | ||
1799 | |||
1800 | /* read the STATUS register on this device | ||
1801 | */ | ||
1802 | status = get_pci_parity_status(dev, 0); | ||
1803 | |||
1804 | debugf2("PCI STATUS= 0x%04x %s\n", status, dev->dev.bus_id ); | ||
1805 | |||
1806 | /* check the status reg for errors */ | ||
1807 | if (status) { | ||
1808 | if (status & (PCI_STATUS_SIG_SYSTEM_ERROR)) | ||
1809 | edac_printk(KERN_CRIT, EDAC_PCI, | ||
1810 | "Signaled System Error on %s\n", | ||
1811 | pci_name(dev)); | ||
1812 | |||
1813 | if (status & (PCI_STATUS_PARITY)) { | ||
1814 | edac_printk(KERN_CRIT, EDAC_PCI, | ||
1815 | "Master Data Parity Error on %s\n", | ||
1816 | pci_name(dev)); | ||
1817 | |||
1818 | atomic_inc(&pci_parity_count); | ||
1819 | } | ||
1820 | |||
1821 | if (status & (PCI_STATUS_DETECTED_PARITY)) { | ||
1822 | edac_printk(KERN_CRIT, EDAC_PCI, | ||
1823 | "Detected Parity Error on %s\n", | ||
1824 | pci_name(dev)); | ||
1825 | |||
1826 | atomic_inc(&pci_parity_count); | ||
1827 | } | ||
1828 | } | ||
1829 | |||
1830 | /* read the device TYPE, looking for bridges */ | ||
1831 | pci_read_config_byte(dev, PCI_HEADER_TYPE, &header_type); | ||
1832 | |||
1833 | debugf2("PCI HEADER TYPE= 0x%02x %s\n", header_type, dev->dev.bus_id ); | ||
1834 | |||
1835 | if ((header_type & 0x7F) == PCI_HEADER_TYPE_BRIDGE) { | ||
1836 | /* On bridges, need to examine secondary status register */ | ||
1837 | status = get_pci_parity_status(dev, 1); | ||
1838 | |||
1839 | debugf2("PCI SEC_STATUS= 0x%04x %s\n", | ||
1840 | status, dev->dev.bus_id ); | ||
1841 | |||
1842 | /* check the secondary status reg for errors */ | ||
1843 | if (status) { | ||
1844 | if (status & (PCI_STATUS_SIG_SYSTEM_ERROR)) | ||
1845 | edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " | ||
1846 | "Signaled System Error on %s\n", | ||
1847 | pci_name(dev)); | ||
1848 | |||
1849 | if (status & (PCI_STATUS_PARITY)) { | ||
1850 | edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " | ||
1851 | "Master Data Parity Error on " | ||
1852 | "%s\n", pci_name(dev)); | ||
1853 | |||
1854 | atomic_inc(&pci_parity_count); | ||
1855 | } | ||
1856 | |||
1857 | if (status & (PCI_STATUS_DETECTED_PARITY)) { | ||
1858 | edac_printk(KERN_CRIT, EDAC_PCI, "Bridge " | ||
1859 | "Detected Parity Error on %s\n", | ||
1860 | pci_name(dev)); | ||
1861 | |||
1862 | atomic_inc(&pci_parity_count); | ||
1863 | } | ||
1864 | } | ||
1865 | } | ||
1866 | } | ||
1867 | |||
1868 | /* | ||
1869 | * check_dev_on_list: Scan for a PCI device on a white/black list | ||
1870 | * @list: an EDAC &edac_pci_device_list white/black list pointer | ||
1871 | * @free_index: index of next free entry on the list | ||
1872 | * @pci_dev: PCI Device pointer | ||
1873 | * | ||
1874 | * see if list contains the device. | ||
1875 | * | ||
1876 | * Returns: 0 not found | ||
1877 | * 1 found on list | ||
1878 | */ | ||
1879 | static int check_dev_on_list(struct edac_pci_device_list *list, | ||
1880 | int free_index, struct pci_dev *dev) | ||
1881 | { | ||
1882 | int i; | ||
1883 | int rc = 0; /* Assume not found */ | ||
1884 | unsigned short vendor=dev->vendor; | ||
1885 | unsigned short device=dev->device; | ||
1886 | |||
1887 | /* Scan the list, looking for a vendor/device match */ | ||
1888 | for (i = 0; i < free_index; i++, list++ ) { | ||
1889 | if ((list->vendor == vendor ) && (list->device == device )) { | ||
1890 | rc = 1; | ||
1891 | break; | ||
1892 | } | ||
1893 | } | ||
1894 | |||
1895 | return rc; | ||
1896 | } | ||
1897 | |||
1898 | /* | ||
1899 | * pci_dev parity list iterator | ||
1900 | * Scan the PCI device list for one iteration, looking for SERRORs | ||
1901 | * Master Parity ERRORS or Parity ERRORs on primary or secondary devices | ||
1902 | */ | ||
1903 | static inline void edac_pci_dev_parity_iterator(pci_parity_check_fn_t fn) | ||
1904 | { | ||
1905 | struct pci_dev *dev = NULL; | ||
1906 | |||
1907 | /* request for kernel access to the next PCI device, if any, | ||
1908 | * and while we are looking at it have its reference count | ||
1909 | * bumped until we are done with it | ||
1910 | */ | ||
1911 | while((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | ||
1912 | /* if whitelist exists then it has priority, so only scan | ||
1913 | * those devices on the whitelist | ||
1914 | */ | ||
1915 | if (pci_whitelist_count > 0 ) { | ||
1916 | if (check_dev_on_list(pci_whitelist, | ||
1917 | pci_whitelist_count, dev)) | ||
1918 | fn(dev); | ||
1919 | } else { | ||
1920 | /* | ||
1921 | * if no whitelist, then check if this devices is | ||
1922 | * blacklisted | ||
1923 | */ | ||
1924 | if (!check_dev_on_list(pci_blacklist, | ||
1925 | pci_blacklist_count, dev)) | ||
1926 | fn(dev); | ||
1927 | } | ||
1928 | } | ||
1929 | } | ||
1930 | |||
1931 | static void do_pci_parity_check(void) | ||
1932 | { | ||
1933 | unsigned long flags; | ||
1934 | int before_count; | ||
1935 | |||
1936 | debugf3("%s()\n", __func__); | ||
1937 | |||
1938 | if (!check_pci_parity) | ||
1939 | return; | ||
1940 | |||
1941 | before_count = atomic_read(&pci_parity_count); | ||
1942 | |||
1943 | /* scan all PCI devices looking for a Parity Error on devices and | ||
1944 | * bridges | ||
1945 | */ | ||
1946 | local_irq_save(flags); | ||
1947 | edac_pci_dev_parity_iterator(edac_pci_dev_parity_test); | ||
1948 | local_irq_restore(flags); | ||
1949 | |||
1950 | /* Only if operator has selected panic on PCI Error */ | ||
1951 | if (panic_on_pci_parity) { | ||
1952 | /* If the count is different 'after' from 'before' */ | ||
1953 | if (before_count != atomic_read(&pci_parity_count)) | ||
1954 | panic("EDAC: PCI Parity Error"); | ||
1955 | } | ||
1956 | } | ||
1957 | |||
1958 | static inline void clear_pci_parity_errors(void) | ||
1959 | { | ||
1960 | /* Clear any PCI bus parity errors that devices initially have logged | ||
1961 | * in their registers. | ||
1962 | */ | ||
1963 | edac_pci_dev_parity_iterator(edac_pci_dev_parity_clear); | ||
1964 | } | ||
1965 | |||
1966 | #else /* CONFIG_PCI */ | ||
1967 | |||
1968 | static inline void do_pci_parity_check(void) | ||
1969 | { | ||
1970 | /* no-op */ | ||
1971 | } | ||
1972 | |||
1973 | static inline void clear_pci_parity_errors(void) | ||
1974 | { | ||
1975 | /* no-op */ | ||
1976 | } | ||
1977 | |||
1978 | #endif /* CONFIG_PCI */ | ||
1979 | 1994 | ||
1980 | /* | 1995 | /* |
1981 | * Iterate over all MC instances and check for ECC, et al, errors | 1996 | * Iterate over all MC instances and check for ECC, et al, errors |
@@ -2095,10 +2110,12 @@ MODULE_DESCRIPTION("Core library routines for MC reporting"); | |||
2095 | 2110 | ||
2096 | module_param(panic_on_ue, int, 0644); | 2111 | module_param(panic_on_ue, int, 0644); |
2097 | MODULE_PARM_DESC(panic_on_ue, "Panic on uncorrected error: 0=off 1=on"); | 2112 | MODULE_PARM_DESC(panic_on_ue, "Panic on uncorrected error: 0=off 1=on"); |
2113 | #ifdef CONFIG_PCI | ||
2098 | module_param(check_pci_parity, int, 0644); | 2114 | module_param(check_pci_parity, int, 0644); |
2099 | MODULE_PARM_DESC(check_pci_parity, "Check for PCI bus parity errors: 0=off 1=on"); | 2115 | MODULE_PARM_DESC(check_pci_parity, "Check for PCI bus parity errors: 0=off 1=on"); |
2100 | module_param(panic_on_pci_parity, int, 0644); | 2116 | module_param(panic_on_pci_parity, int, 0644); |
2101 | MODULE_PARM_DESC(panic_on_pci_parity, "Panic on PCI Bus Parity error: 0=off 1=on"); | 2117 | MODULE_PARM_DESC(panic_on_pci_parity, "Panic on PCI Bus Parity error: 0=off 1=on"); |
2118 | #endif | ||
2102 | module_param(log_ue, int, 0644); | 2119 | module_param(log_ue, int, 0644); |
2103 | MODULE_PARM_DESC(log_ue, "Log uncorrectable error to console: 0=off 1=on"); | 2120 | MODULE_PARM_DESC(log_ue, "Log uncorrectable error to console: 0=off 1=on"); |
2104 | module_param(log_ce, int, 0644); | 2121 | module_param(log_ce, int, 0644); |