aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/edac
diff options
context:
space:
mode:
authorMauro Carvalho Chehab <mchehab@redhat.com>2012-10-31 09:42:29 -0400
committerMauro Carvalho Chehab <mchehab@redhat.com>2013-02-21 09:06:38 -0500
commit80cc7d87d5eb34375f916d282450a0906a8ead60 (patch)
treebcdec36f2945da29b13955d821d5a786f1f78139 /drivers/edac
parentc2c93dbc97622e26dc19edc71e50ebaa996d7804 (diff)
edac: lock module owner to avoid error report conflicts
APEI GHES and i7core_edac/sb_edac currently can be loaded at the same time, but those are Highlander modules: "There can be only one". There are two reasons for that: 1) Each driver assumes that it is the only one registering at the EDAC core, as it is driver's responsibility to number the memory controllers, and all of them start from 0; 2) If BIOS is handling the memory errors, the OS can't also be doing it, as one will mangle with the other. So, we need to add an module owner's lock at the EDAC core, in order to avoid having two different modules handling memory errors at the same time. The best way for doing this lock seems to use the driver's name, as this is unique, and won't require changes on every driver. Signed-off-by: Mauro Carvalho Chehab <mchehab@redhat.com>
Diffstat (limited to 'drivers/edac')
-rw-r--r--drivers/edac/edac_mc.c25
1 files changed, 21 insertions, 4 deletions
diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index 78d8c7d6e76a..34eb9703ed33 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -42,6 +42,12 @@
42static DEFINE_MUTEX(mem_ctls_mutex); 42static DEFINE_MUTEX(mem_ctls_mutex);
43static LIST_HEAD(mc_devices); 43static LIST_HEAD(mc_devices);
44 44
45/*
46 * Used to lock EDAC MC to just one module, avoiding two drivers e. g.
47 * apei/ghes and i7core_edac to be used at the same time.
48 */
49static void const *edac_mc_owner;
50
45unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf, 51unsigned edac_dimm_info_location(struct dimm_info *dimm, char *buf,
46 unsigned len) 52 unsigned len)
47{ 53{
@@ -659,9 +665,9 @@ fail1:
659 return 1; 665 return 1;
660} 666}
661 667
662static void del_mc_from_global_list(struct mem_ctl_info *mci) 668static int del_mc_from_global_list(struct mem_ctl_info *mci)
663{ 669{
664 atomic_dec(&edac_handlers); 670 int handlers = atomic_dec_return(&edac_handlers);
665 list_del_rcu(&mci->link); 671 list_del_rcu(&mci->link);
666 672
667 /* these are for safe removal of devices from global list while 673 /* these are for safe removal of devices from global list while
@@ -669,6 +675,8 @@ static void del_mc_from_global_list(struct mem_ctl_info *mci)
669 */ 675 */
670 synchronize_rcu(); 676 synchronize_rcu();
671 INIT_LIST_HEAD(&mci->link); 677 INIT_LIST_HEAD(&mci->link);
678
679 return handlers;
672} 680}
673 681
674/** 682/**
@@ -712,6 +720,7 @@ EXPORT_SYMBOL(edac_mc_find);
712/* FIXME - should a warning be printed if no error detection? correction? */ 720/* FIXME - should a warning be printed if no error detection? correction? */
713int edac_mc_add_mc(struct mem_ctl_info *mci) 721int edac_mc_add_mc(struct mem_ctl_info *mci)
714{ 722{
723 int ret = -EINVAL;
715 edac_dbg(0, "\n"); 724 edac_dbg(0, "\n");
716 725
717#ifdef CONFIG_EDAC_DEBUG 726#ifdef CONFIG_EDAC_DEBUG
@@ -742,6 +751,11 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
742#endif 751#endif
743 mutex_lock(&mem_ctls_mutex); 752 mutex_lock(&mem_ctls_mutex);
744 753
754 if (edac_mc_owner && edac_mc_owner != mci->mod_name) {
755 ret = -EPERM;
756 goto fail0;
757 }
758
745 if (add_mc_to_global_list(mci)) 759 if (add_mc_to_global_list(mci))
746 goto fail0; 760 goto fail0;
747 761
@@ -768,6 +782,8 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
768 edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':" 782 edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
769 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci)); 783 " DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
770 784
785 edac_mc_owner = mci->mod_name;
786
771 mutex_unlock(&mem_ctls_mutex); 787 mutex_unlock(&mem_ctls_mutex);
772 return 0; 788 return 0;
773 789
@@ -776,7 +792,7 @@ fail1:
776 792
777fail0: 793fail0:
778 mutex_unlock(&mem_ctls_mutex); 794 mutex_unlock(&mem_ctls_mutex);
779 return 1; 795 return ret;
780} 796}
781EXPORT_SYMBOL_GPL(edac_mc_add_mc); 797EXPORT_SYMBOL_GPL(edac_mc_add_mc);
782 798
@@ -802,7 +818,8 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
802 return NULL; 818 return NULL;
803 } 819 }
804 820
805 del_mc_from_global_list(mci); 821 if (!del_mc_from_global_list(mci))
822 edac_mc_owner = NULL;
806 mutex_unlock(&mem_ctls_mutex); 823 mutex_unlock(&mem_ctls_mutex);
807 824
808 /* flush workq processes */ 825 /* flush workq processes */