diff options
-rw-r--r-- | arch/x86/include/asm/mce.h | 63 | ||||
-rw-r--r-- | arch/x86/include/asm/therm_throt.h | 9 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/Makefile | 9 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/k7.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 231 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.h | 38 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_amd.c (renamed from arch/x86/kernel/cpu/mcheck/mce_amd_64.c) | 0 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_intel.c | 254 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce_intel_64.c | 248 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/non-fatal.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/p4.c | 48 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/p5.c | 15 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/p6.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/therm_throt.c | 106 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/winchip.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/traps.c | 3 |
16 files changed, 528 insertions, 508 deletions
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 540a466e50f5..5cdd8d100ec9 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -102,15 +102,39 @@ struct mce_log { | |||
102 | 102 | ||
103 | #ifdef __KERNEL__ | 103 | #ifdef __KERNEL__ |
104 | 104 | ||
105 | #include <linux/percpu.h> | ||
106 | #include <linux/init.h> | ||
107 | #include <asm/atomic.h> | ||
108 | |||
105 | extern int mce_disabled; | 109 | extern int mce_disabled; |
110 | extern int mce_p5_enabled; | ||
106 | 111 | ||
107 | #include <asm/atomic.h> | 112 | #ifdef CONFIG_X86_MCE |
108 | #include <linux/percpu.h> | 113 | void mcheck_init(struct cpuinfo_x86 *c); |
114 | #else | ||
115 | static inline void mcheck_init(struct cpuinfo_x86 *c) {} | ||
116 | #endif | ||
117 | |||
118 | #ifdef CONFIG_X86_OLD_MCE | ||
119 | extern int nr_mce_banks; | ||
120 | void amd_mcheck_init(struct cpuinfo_x86 *c); | ||
121 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c); | ||
122 | void intel_p6_mcheck_init(struct cpuinfo_x86 *c); | ||
123 | #endif | ||
124 | |||
125 | #ifdef CONFIG_X86_ANCIENT_MCE | ||
126 | void intel_p5_mcheck_init(struct cpuinfo_x86 *c); | ||
127 | void winchip_mcheck_init(struct cpuinfo_x86 *c); | ||
128 | static inline void enable_p5_mce(void) { mce_p5_enabled = 1; } | ||
129 | #else | ||
130 | static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} | ||
131 | static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} | ||
132 | static inline void enable_p5_mce(void) {} | ||
133 | #endif | ||
109 | 134 | ||
110 | void mce_setup(struct mce *m); | 135 | void mce_setup(struct mce *m); |
111 | void mce_log(struct mce *m); | 136 | void mce_log(struct mce *m); |
112 | DECLARE_PER_CPU(struct sys_device, mce_dev); | 137 | DECLARE_PER_CPU(struct sys_device, mce_dev); |
113 | extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); | ||
114 | 138 | ||
115 | /* | 139 | /* |
116 | * To support more than 128 would need to escape the predefined | 140 | * To support more than 128 would need to escape the predefined |
@@ -145,12 +169,8 @@ int mce_available(struct cpuinfo_x86 *c); | |||
145 | DECLARE_PER_CPU(unsigned, mce_exception_count); | 169 | DECLARE_PER_CPU(unsigned, mce_exception_count); |
146 | DECLARE_PER_CPU(unsigned, mce_poll_count); | 170 | DECLARE_PER_CPU(unsigned, mce_poll_count); |
147 | 171 | ||
148 | void mce_log_therm_throt_event(__u64 status); | ||
149 | |||
150 | extern atomic_t mce_entry; | 172 | extern atomic_t mce_entry; |
151 | 173 | ||
152 | void do_machine_check(struct pt_regs *, long); | ||
153 | |||
154 | typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); | 174 | typedef DECLARE_BITMAP(mce_banks_t, MAX_NR_BANKS); |
155 | DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); | 175 | DECLARE_PER_CPU(mce_banks_t, mce_poll_banks); |
156 | 176 | ||
@@ -167,13 +187,32 @@ void mce_notify_process(void); | |||
167 | DECLARE_PER_CPU(struct mce, injectm); | 187 | DECLARE_PER_CPU(struct mce, injectm); |
168 | extern struct file_operations mce_chrdev_ops; | 188 | extern struct file_operations mce_chrdev_ops; |
169 | 189 | ||
170 | #ifdef CONFIG_X86_MCE | 190 | /* |
171 | void mcheck_init(struct cpuinfo_x86 *c); | 191 | * Exception handler |
172 | #else | 192 | */ |
173 | #define mcheck_init(c) do { } while (0) | 193 | |
174 | #endif | 194 | /* Call the installed machine check handler for this CPU setup. */ |
195 | extern void (*machine_check_vector)(struct pt_regs *, long error_code); | ||
196 | void do_machine_check(struct pt_regs *, long); | ||
197 | |||
198 | /* | ||
199 | * Threshold handler | ||
200 | */ | ||
175 | 201 | ||
176 | extern void (*mce_threshold_vector)(void); | 202 | extern void (*mce_threshold_vector)(void); |
203 | extern void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); | ||
204 | |||
205 | /* | ||
206 | * Thermal handler | ||
207 | */ | ||
208 | |||
209 | void intel_init_thermal(struct cpuinfo_x86 *c); | ||
210 | |||
211 | #ifdef CONFIG_X86_NEW_MCE | ||
212 | void mce_log_therm_throt_event(__u64 status); | ||
213 | #else | ||
214 | static inline void mce_log_therm_throt_event(__u64 status) {} | ||
215 | #endif | ||
177 | 216 | ||
178 | #endif /* __KERNEL__ */ | 217 | #endif /* __KERNEL__ */ |
179 | #endif /* _ASM_X86_MCE_H */ | 218 | #endif /* _ASM_X86_MCE_H */ |
diff --git a/arch/x86/include/asm/therm_throt.h b/arch/x86/include/asm/therm_throt.h deleted file mode 100644 index c62349ee7860..000000000000 --- a/arch/x86/include/asm/therm_throt.h +++ /dev/null | |||
@@ -1,9 +0,0 @@ | |||
1 | #ifndef _ASM_X86_THERM_THROT_H | ||
2 | #define _ASM_X86_THERM_THROT_H | ||
3 | |||
4 | #include <asm/atomic.h> | ||
5 | |||
6 | extern atomic_t therm_throt_en; | ||
7 | int therm_throt_process(int curr); | ||
8 | |||
9 | #endif /* _ASM_X86_THERM_THROT_H */ | ||
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index 45004faf67ea..188a1ca5ad2b 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile | |||
@@ -1,11 +1,12 @@ | |||
1 | obj-y = mce.o therm_throt.o | 1 | obj-y = mce.o |
2 | 2 | ||
3 | obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o | 3 | obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o |
4 | obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o | 4 | obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o |
5 | obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o | 5 | obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o |
6 | obj-$(CONFIG_X86_MCE_P4THERMAL) += mce_intel.o | 6 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o |
7 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel_64.o mce_intel.o | 7 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o |
8 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd_64.o | ||
9 | obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o | 8 | obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o |
10 | obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o | 9 | obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o |
11 | obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o | 10 | obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o |
11 | |||
12 | obj-$(CONFIG_X86_THERMAL_VECTOR) += therm_throt.o | ||
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c index 89e510424152..b945d5dbc609 100644 --- a/arch/x86/kernel/cpu/mcheck/k7.c +++ b/arch/x86/kernel/cpu/mcheck/k7.c | |||
@@ -10,10 +10,9 @@ | |||
10 | 10 | ||
11 | #include <asm/processor.h> | 11 | #include <asm/processor.h> |
12 | #include <asm/system.h> | 12 | #include <asm/system.h> |
13 | #include <asm/mce.h> | ||
13 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
14 | 15 | ||
15 | #include "mce.h" | ||
16 | |||
17 | /* Machine Check Handler For AMD Athlon/Duron: */ | 16 | /* Machine Check Handler For AMD Athlon/Duron: */ |
18 | static void k7_machine_check(struct pt_regs *regs, long error_code) | 17 | static void k7_machine_check(struct pt_regs *regs, long error_code) |
19 | { | 18 | { |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index d9d77cfd8cce..284d1de968bc 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -44,7 +44,6 @@ | |||
44 | #include <asm/msr.h> | 44 | #include <asm/msr.h> |
45 | 45 | ||
46 | #include "mce-internal.h" | 46 | #include "mce-internal.h" |
47 | #include "mce.h" | ||
48 | 47 | ||
49 | /* Handle unconfigured int18 (should never happen) */ | 48 | /* Handle unconfigured int18 (should never happen) */ |
50 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) | 49 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) |
@@ -57,7 +56,7 @@ static void unexpected_machine_check(struct pt_regs *regs, long error_code) | |||
57 | void (*machine_check_vector)(struct pt_regs *, long error_code) = | 56 | void (*machine_check_vector)(struct pt_regs *, long error_code) = |
58 | unexpected_machine_check; | 57 | unexpected_machine_check; |
59 | 58 | ||
60 | int mce_disabled; | 59 | int mce_disabled __read_mostly; |
61 | 60 | ||
62 | #ifdef CONFIG_X86_NEW_MCE | 61 | #ifdef CONFIG_X86_NEW_MCE |
63 | 62 | ||
@@ -76,21 +75,22 @@ DEFINE_PER_CPU(unsigned, mce_exception_count); | |||
76 | * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors | 75 | * 2: SIGBUS or log uncorrected errors (if possible), log corrected errors |
77 | * 3: never panic or SIGBUS, log all errors (for testing only) | 76 | * 3: never panic or SIGBUS, log all errors (for testing only) |
78 | */ | 77 | */ |
79 | static int tolerant = 1; | 78 | static int tolerant __read_mostly = 1; |
80 | static int banks; | 79 | static int banks __read_mostly; |
81 | static u64 *bank; | 80 | static u64 *bank __read_mostly; |
82 | static unsigned long notify_user; | 81 | static int rip_msr __read_mostly; |
83 | static int rip_msr; | 82 | static int mce_bootlog __read_mostly = -1; |
84 | static int mce_bootlog = -1; | 83 | static int monarch_timeout __read_mostly = -1; |
85 | static int monarch_timeout = -1; | 84 | static int mce_panic_timeout __read_mostly; |
86 | static int mce_panic_timeout; | 85 | static int mce_dont_log_ce __read_mostly; |
87 | static int mce_dont_log_ce; | 86 | int mce_cmci_disabled __read_mostly; |
88 | int mce_cmci_disabled; | 87 | int mce_ignore_ce __read_mostly; |
89 | int mce_ignore_ce; | 88 | int mce_ser __read_mostly; |
90 | int mce_ser; | 89 | |
91 | 90 | /* User mode helper program triggered by machine check event */ | |
92 | static char trigger[128]; | 91 | static unsigned long mce_need_notify; |
93 | static char *trigger_argv[2] = { trigger, NULL }; | 92 | static char mce_helper[128]; |
93 | static char *mce_helper_argv[2] = { mce_helper, NULL }; | ||
94 | 94 | ||
95 | static unsigned long dont_init_banks; | 95 | static unsigned long dont_init_banks; |
96 | 96 | ||
@@ -180,7 +180,7 @@ void mce_log(struct mce *mce) | |||
180 | wmb(); | 180 | wmb(); |
181 | 181 | ||
182 | mce->finished = 1; | 182 | mce->finished = 1; |
183 | set_bit(0, ¬ify_user); | 183 | set_bit(0, &mce_need_notify); |
184 | } | 184 | } |
185 | 185 | ||
186 | static void print_mce(struct mce *m) | 186 | static void print_mce(struct mce *m) |
@@ -691,18 +691,21 @@ static atomic_t global_nwo; | |||
691 | * in the entry order. | 691 | * in the entry order. |
692 | * TBD double check parallel CPU hotunplug | 692 | * TBD double check parallel CPU hotunplug |
693 | */ | 693 | */ |
694 | static int mce_start(int no_way_out, int *order) | 694 | static int mce_start(int *no_way_out) |
695 | { | 695 | { |
696 | int nwo; | 696 | int order; |
697 | int cpus = num_online_cpus(); | 697 | int cpus = num_online_cpus(); |
698 | u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; | 698 | u64 timeout = (u64)monarch_timeout * NSEC_PER_USEC; |
699 | 699 | ||
700 | if (!timeout) { | 700 | if (!timeout) |
701 | *order = -1; | 701 | return -1; |
702 | return no_way_out; | ||
703 | } | ||
704 | 702 | ||
705 | atomic_add(no_way_out, &global_nwo); | 703 | atomic_add(*no_way_out, &global_nwo); |
704 | /* | ||
705 | * global_nwo should be updated before mce_callin | ||
706 | */ | ||
707 | smp_wmb(); | ||
708 | order = atomic_add_return(1, &mce_callin); | ||
706 | 709 | ||
707 | /* | 710 | /* |
708 | * Wait for everyone. | 711 | * Wait for everyone. |
@@ -710,40 +713,43 @@ static int mce_start(int no_way_out, int *order) | |||
710 | while (atomic_read(&mce_callin) != cpus) { | 713 | while (atomic_read(&mce_callin) != cpus) { |
711 | if (mce_timed_out(&timeout)) { | 714 | if (mce_timed_out(&timeout)) { |
712 | atomic_set(&global_nwo, 0); | 715 | atomic_set(&global_nwo, 0); |
713 | *order = -1; | 716 | return -1; |
714 | return no_way_out; | ||
715 | } | 717 | } |
716 | ndelay(SPINUNIT); | 718 | ndelay(SPINUNIT); |
717 | } | 719 | } |
718 | 720 | ||
719 | /* | 721 | /* |
720 | * Cache the global no_way_out state. | 722 | * mce_callin should be read before global_nwo |
721 | */ | 723 | */ |
722 | nwo = atomic_read(&global_nwo); | 724 | smp_rmb(); |
723 | 725 | ||
724 | /* | 726 | if (order == 1) { |
725 | * Monarch starts executing now, the others wait. | 727 | /* |
726 | */ | 728 | * Monarch: Starts executing now, the others wait. |
727 | if (*order == 1) { | 729 | */ |
728 | atomic_set(&mce_executing, 1); | 730 | atomic_set(&mce_executing, 1); |
729 | return nwo; | 731 | } else { |
732 | /* | ||
733 | * Subject: Now start the scanning loop one by one in | ||
734 | * the original callin order. | ||
735 | * This way when there are any shared banks it will be | ||
736 | * only seen by one CPU before cleared, avoiding duplicates. | ||
737 | */ | ||
738 | while (atomic_read(&mce_executing) < order) { | ||
739 | if (mce_timed_out(&timeout)) { | ||
740 | atomic_set(&global_nwo, 0); | ||
741 | return -1; | ||
742 | } | ||
743 | ndelay(SPINUNIT); | ||
744 | } | ||
730 | } | 745 | } |
731 | 746 | ||
732 | /* | 747 | /* |
733 | * Now start the scanning loop one by one | 748 | * Cache the global no_way_out state. |
734 | * in the original callin order. | ||
735 | * This way when there are any shared banks it will | ||
736 | * be only seen by one CPU before cleared, avoiding duplicates. | ||
737 | */ | 749 | */ |
738 | while (atomic_read(&mce_executing) < *order) { | 750 | *no_way_out = atomic_read(&global_nwo); |
739 | if (mce_timed_out(&timeout)) { | 751 | |
740 | atomic_set(&global_nwo, 0); | 752 | return order; |
741 | *order = -1; | ||
742 | return no_way_out; | ||
743 | } | ||
744 | ndelay(SPINUNIT); | ||
745 | } | ||
746 | return nwo; | ||
747 | } | 753 | } |
748 | 754 | ||
749 | /* | 755 | /* |
@@ -863,7 +869,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
863 | * check handler. | 869 | * check handler. |
864 | */ | 870 | */ |
865 | int order; | 871 | int order; |
866 | |||
867 | /* | 872 | /* |
868 | * If no_way_out gets set, there is no safe way to recover from this | 873 | * If no_way_out gets set, there is no safe way to recover from this |
869 | * MCE. If tolerant is cranked up, we'll try anyway. | 874 | * MCE. If tolerant is cranked up, we'll try anyway. |
@@ -887,7 +892,6 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
887 | if (!banks) | 892 | if (!banks) |
888 | goto out; | 893 | goto out; |
889 | 894 | ||
890 | order = atomic_add_return(1, &mce_callin); | ||
891 | mce_setup(&m); | 895 | mce_setup(&m); |
892 | 896 | ||
893 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); | 897 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); |
@@ -909,7 +913,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
909 | * This way we don't report duplicated events on shared banks | 913 | * This way we don't report duplicated events on shared banks |
910 | * because the first one to see it will clear it. | 914 | * because the first one to see it will clear it. |
911 | */ | 915 | */ |
912 | no_way_out = mce_start(no_way_out, &order); | 916 | order = mce_start(&no_way_out); |
913 | for (i = 0; i < banks; i++) { | 917 | for (i = 0; i < banks; i++) { |
914 | __clear_bit(i, toclear); | 918 | __clear_bit(i, toclear); |
915 | if (!bank[i]) | 919 | if (!bank[i]) |
@@ -1118,7 +1122,7 @@ static void mcheck_timer(unsigned long data) | |||
1118 | 1122 | ||
1119 | static void mce_do_trigger(struct work_struct *work) | 1123 | static void mce_do_trigger(struct work_struct *work) |
1120 | { | 1124 | { |
1121 | call_usermodehelper(trigger, trigger_argv, NULL, UMH_NO_WAIT); | 1125 | call_usermodehelper(mce_helper, mce_helper_argv, NULL, UMH_NO_WAIT); |
1122 | } | 1126 | } |
1123 | 1127 | ||
1124 | static DECLARE_WORK(mce_trigger_work, mce_do_trigger); | 1128 | static DECLARE_WORK(mce_trigger_work, mce_do_trigger); |
@@ -1135,7 +1139,7 @@ int mce_notify_irq(void) | |||
1135 | 1139 | ||
1136 | clear_thread_flag(TIF_MCE_NOTIFY); | 1140 | clear_thread_flag(TIF_MCE_NOTIFY); |
1137 | 1141 | ||
1138 | if (test_and_clear_bit(0, ¬ify_user)) { | 1142 | if (test_and_clear_bit(0, &mce_need_notify)) { |
1139 | wake_up_interruptible(&mce_wait); | 1143 | wake_up_interruptible(&mce_wait); |
1140 | 1144 | ||
1141 | /* | 1145 | /* |
@@ -1143,7 +1147,7 @@ int mce_notify_irq(void) | |||
1143 | * work_pending is always cleared before the function is | 1147 | * work_pending is always cleared before the function is |
1144 | * executed. | 1148 | * executed. |
1145 | */ | 1149 | */ |
1146 | if (trigger[0] && !work_pending(&mce_trigger_work)) | 1150 | if (mce_helper[0] && !work_pending(&mce_trigger_work)) |
1147 | schedule_work(&mce_trigger_work); | 1151 | schedule_work(&mce_trigger_work); |
1148 | 1152 | ||
1149 | if (__ratelimit(&ratelimit)) | 1153 | if (__ratelimit(&ratelimit)) |
@@ -1282,8 +1286,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) | |||
1282 | return; | 1286 | return; |
1283 | switch (c->x86_vendor) { | 1287 | switch (c->x86_vendor) { |
1284 | case X86_VENDOR_INTEL: | 1288 | case X86_VENDOR_INTEL: |
1285 | if (mce_p5_enabled()) | 1289 | intel_p5_mcheck_init(c); |
1286 | intel_p5_mcheck_init(c); | ||
1287 | break; | 1290 | break; |
1288 | case X86_VENDOR_CENTAUR: | 1291 | case X86_VENDOR_CENTAUR: |
1289 | winchip_mcheck_init(c); | 1292 | winchip_mcheck_init(c); |
@@ -1609,8 +1612,9 @@ static int mce_resume(struct sys_device *dev) | |||
1609 | static void mce_cpu_restart(void *data) | 1612 | static void mce_cpu_restart(void *data) |
1610 | { | 1613 | { |
1611 | del_timer_sync(&__get_cpu_var(mce_timer)); | 1614 | del_timer_sync(&__get_cpu_var(mce_timer)); |
1612 | if (mce_available(¤t_cpu_data)) | 1615 | if (!mce_available(¤t_cpu_data)) |
1613 | mce_init(); | 1616 | return; |
1617 | mce_init(); | ||
1614 | mce_init_timer(); | 1618 | mce_init_timer(); |
1615 | } | 1619 | } |
1616 | 1620 | ||
@@ -1620,6 +1624,26 @@ static void mce_restart(void) | |||
1620 | on_each_cpu(mce_cpu_restart, NULL, 1); | 1624 | on_each_cpu(mce_cpu_restart, NULL, 1); |
1621 | } | 1625 | } |
1622 | 1626 | ||
1627 | /* Toggle features for corrected errors */ | ||
1628 | static void mce_disable_ce(void *all) | ||
1629 | { | ||
1630 | if (!mce_available(¤t_cpu_data)) | ||
1631 | return; | ||
1632 | if (all) | ||
1633 | del_timer_sync(&__get_cpu_var(mce_timer)); | ||
1634 | cmci_clear(); | ||
1635 | } | ||
1636 | |||
1637 | static void mce_enable_ce(void *all) | ||
1638 | { | ||
1639 | if (!mce_available(¤t_cpu_data)) | ||
1640 | return; | ||
1641 | cmci_reenable(); | ||
1642 | cmci_recheck(); | ||
1643 | if (all) | ||
1644 | mce_init_timer(); | ||
1645 | } | ||
1646 | |||
1623 | static struct sysdev_class mce_sysclass = { | 1647 | static struct sysdev_class mce_sysclass = { |
1624 | .suspend = mce_suspend, | 1648 | .suspend = mce_suspend, |
1625 | .shutdown = mce_shutdown, | 1649 | .shutdown = mce_shutdown, |
@@ -1659,9 +1683,9 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, | |||
1659 | static ssize_t | 1683 | static ssize_t |
1660 | show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) | 1684 | show_trigger(struct sys_device *s, struct sysdev_attribute *attr, char *buf) |
1661 | { | 1685 | { |
1662 | strcpy(buf, trigger); | 1686 | strcpy(buf, mce_helper); |
1663 | strcat(buf, "\n"); | 1687 | strcat(buf, "\n"); |
1664 | return strlen(trigger) + 1; | 1688 | return strlen(mce_helper) + 1; |
1665 | } | 1689 | } |
1666 | 1690 | ||
1667 | static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, | 1691 | static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, |
@@ -1670,10 +1694,10 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, | |||
1670 | char *p; | 1694 | char *p; |
1671 | int len; | 1695 | int len; |
1672 | 1696 | ||
1673 | strncpy(trigger, buf, sizeof(trigger)); | 1697 | strncpy(mce_helper, buf, sizeof(mce_helper)); |
1674 | trigger[sizeof(trigger)-1] = 0; | 1698 | mce_helper[sizeof(mce_helper)-1] = 0; |
1675 | len = strlen(trigger); | 1699 | len = strlen(mce_helper); |
1676 | p = strchr(trigger, '\n'); | 1700 | p = strchr(mce_helper, '\n'); |
1677 | 1701 | ||
1678 | if (*p) | 1702 | if (*p) |
1679 | *p = 0; | 1703 | *p = 0; |
@@ -1681,6 +1705,52 @@ static ssize_t set_trigger(struct sys_device *s, struct sysdev_attribute *attr, | |||
1681 | return len; | 1705 | return len; |
1682 | } | 1706 | } |
1683 | 1707 | ||
1708 | static ssize_t set_ignore_ce(struct sys_device *s, | ||
1709 | struct sysdev_attribute *attr, | ||
1710 | const char *buf, size_t size) | ||
1711 | { | ||
1712 | u64 new; | ||
1713 | |||
1714 | if (strict_strtoull(buf, 0, &new) < 0) | ||
1715 | return -EINVAL; | ||
1716 | |||
1717 | if (mce_ignore_ce ^ !!new) { | ||
1718 | if (new) { | ||
1719 | /* disable ce features */ | ||
1720 | on_each_cpu(mce_disable_ce, (void *)1, 1); | ||
1721 | mce_ignore_ce = 1; | ||
1722 | } else { | ||
1723 | /* enable ce features */ | ||
1724 | mce_ignore_ce = 0; | ||
1725 | on_each_cpu(mce_enable_ce, (void *)1, 1); | ||
1726 | } | ||
1727 | } | ||
1728 | return size; | ||
1729 | } | ||
1730 | |||
1731 | static ssize_t set_cmci_disabled(struct sys_device *s, | ||
1732 | struct sysdev_attribute *attr, | ||
1733 | const char *buf, size_t size) | ||
1734 | { | ||
1735 | u64 new; | ||
1736 | |||
1737 | if (strict_strtoull(buf, 0, &new) < 0) | ||
1738 | return -EINVAL; | ||
1739 | |||
1740 | if (mce_cmci_disabled ^ !!new) { | ||
1741 | if (new) { | ||
1742 | /* disable cmci */ | ||
1743 | on_each_cpu(mce_disable_ce, NULL, 1); | ||
1744 | mce_cmci_disabled = 1; | ||
1745 | } else { | ||
1746 | /* enable cmci */ | ||
1747 | mce_cmci_disabled = 0; | ||
1748 | on_each_cpu(mce_enable_ce, NULL, 1); | ||
1749 | } | ||
1750 | } | ||
1751 | return size; | ||
1752 | } | ||
1753 | |||
1684 | static ssize_t store_int_with_restart(struct sys_device *s, | 1754 | static ssize_t store_int_with_restart(struct sys_device *s, |
1685 | struct sysdev_attribute *attr, | 1755 | struct sysdev_attribute *attr, |
1686 | const char *buf, size_t size) | 1756 | const char *buf, size_t size) |
@@ -1693,6 +1763,7 @@ static ssize_t store_int_with_restart(struct sys_device *s, | |||
1693 | static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); | 1763 | static SYSDEV_ATTR(trigger, 0644, show_trigger, set_trigger); |
1694 | static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); | 1764 | static SYSDEV_INT_ATTR(tolerant, 0644, tolerant); |
1695 | static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout); | 1765 | static SYSDEV_INT_ATTR(monarch_timeout, 0644, monarch_timeout); |
1766 | static SYSDEV_INT_ATTR(dont_log_ce, 0644, mce_dont_log_ce); | ||
1696 | 1767 | ||
1697 | static struct sysdev_ext_attribute attr_check_interval = { | 1768 | static struct sysdev_ext_attribute attr_check_interval = { |
1698 | _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int, | 1769 | _SYSDEV_ATTR(check_interval, 0644, sysdev_show_int, |
@@ -1700,9 +1771,24 @@ static struct sysdev_ext_attribute attr_check_interval = { | |||
1700 | &check_interval | 1771 | &check_interval |
1701 | }; | 1772 | }; |
1702 | 1773 | ||
1774 | static struct sysdev_ext_attribute attr_ignore_ce = { | ||
1775 | _SYSDEV_ATTR(ignore_ce, 0644, sysdev_show_int, set_ignore_ce), | ||
1776 | &mce_ignore_ce | ||
1777 | }; | ||
1778 | |||
1779 | static struct sysdev_ext_attribute attr_cmci_disabled = { | ||
1780 | _SYSDEV_ATTR(cmci_disabled, 0644, sysdev_show_int, set_cmci_disabled), | ||
1781 | &mce_cmci_disabled | ||
1782 | }; | ||
1783 | |||
1703 | static struct sysdev_attribute *mce_attrs[] = { | 1784 | static struct sysdev_attribute *mce_attrs[] = { |
1704 | &attr_tolerant.attr, &attr_check_interval.attr, &attr_trigger, | 1785 | &attr_tolerant.attr, |
1786 | &attr_check_interval.attr, | ||
1787 | &attr_trigger, | ||
1705 | &attr_monarch_timeout.attr, | 1788 | &attr_monarch_timeout.attr, |
1789 | &attr_dont_log_ce.attr, | ||
1790 | &attr_ignore_ce.attr, | ||
1791 | &attr_cmci_disabled.attr, | ||
1706 | NULL | 1792 | NULL |
1707 | }; | 1793 | }; |
1708 | 1794 | ||
@@ -1712,7 +1798,7 @@ static cpumask_var_t mce_dev_initialized; | |||
1712 | static __cpuinit int mce_create_device(unsigned int cpu) | 1798 | static __cpuinit int mce_create_device(unsigned int cpu) |
1713 | { | 1799 | { |
1714 | int err; | 1800 | int err; |
1715 | int i; | 1801 | int i, j; |
1716 | 1802 | ||
1717 | if (!mce_available(&boot_cpu_data)) | 1803 | if (!mce_available(&boot_cpu_data)) |
1718 | return -EIO; | 1804 | return -EIO; |
@@ -1730,9 +1816,9 @@ static __cpuinit int mce_create_device(unsigned int cpu) | |||
1730 | if (err) | 1816 | if (err) |
1731 | goto error; | 1817 | goto error; |
1732 | } | 1818 | } |
1733 | for (i = 0; i < banks; i++) { | 1819 | for (j = 0; j < banks; j++) { |
1734 | err = sysdev_create_file(&per_cpu(mce_dev, cpu), | 1820 | err = sysdev_create_file(&per_cpu(mce_dev, cpu), |
1735 | &bank_attrs[i]); | 1821 | &bank_attrs[j]); |
1736 | if (err) | 1822 | if (err) |
1737 | goto error2; | 1823 | goto error2; |
1738 | } | 1824 | } |
@@ -1740,8 +1826,8 @@ static __cpuinit int mce_create_device(unsigned int cpu) | |||
1740 | 1826 | ||
1741 | return 0; | 1827 | return 0; |
1742 | error2: | 1828 | error2: |
1743 | while (--i >= 0) | 1829 | while (--j >= 0) |
1744 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); | 1830 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[j]); |
1745 | error: | 1831 | error: |
1746 | while (--i >= 0) | 1832 | while (--i >= 0) |
1747 | sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); | 1833 | sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); |
@@ -1883,7 +1969,7 @@ static __init int mce_init_device(void) | |||
1883 | if (!mce_available(&boot_cpu_data)) | 1969 | if (!mce_available(&boot_cpu_data)) |
1884 | return -EIO; | 1970 | return -EIO; |
1885 | 1971 | ||
1886 | alloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); | 1972 | zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); |
1887 | 1973 | ||
1888 | err = mce_init_banks(); | 1974 | err = mce_init_banks(); |
1889 | if (err) | 1975 | if (err) |
@@ -1915,7 +2001,7 @@ EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ | |||
1915 | /* This has to be run for each processor */ | 2001 | /* This has to be run for each processor */ |
1916 | void mcheck_init(struct cpuinfo_x86 *c) | 2002 | void mcheck_init(struct cpuinfo_x86 *c) |
1917 | { | 2003 | { |
1918 | if (mce_disabled == 1) | 2004 | if (mce_disabled) |
1919 | return; | 2005 | return; |
1920 | 2006 | ||
1921 | switch (c->x86_vendor) { | 2007 | switch (c->x86_vendor) { |
@@ -1945,10 +2031,9 @@ void mcheck_init(struct cpuinfo_x86 *c) | |||
1945 | 2031 | ||
1946 | static int __init mcheck_enable(char *str) | 2032 | static int __init mcheck_enable(char *str) |
1947 | { | 2033 | { |
1948 | mce_disabled = -1; | 2034 | mce_p5_enabled = 1; |
1949 | return 1; | 2035 | return 1; |
1950 | } | 2036 | } |
1951 | |||
1952 | __setup("mce", mcheck_enable); | 2037 | __setup("mce", mcheck_enable); |
1953 | 2038 | ||
1954 | #endif /* CONFIG_X86_OLD_MCE */ | 2039 | #endif /* CONFIG_X86_OLD_MCE */ |
diff --git a/arch/x86/kernel/cpu/mcheck/mce.h b/arch/x86/kernel/cpu/mcheck/mce.h deleted file mode 100644 index 84a552b458c8..000000000000 --- a/arch/x86/kernel/cpu/mcheck/mce.h +++ /dev/null | |||
@@ -1,38 +0,0 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <asm/mce.h> | ||
3 | |||
4 | #ifdef CONFIG_X86_OLD_MCE | ||
5 | void amd_mcheck_init(struct cpuinfo_x86 *c); | ||
6 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c); | ||
7 | void intel_p6_mcheck_init(struct cpuinfo_x86 *c); | ||
8 | #endif | ||
9 | |||
10 | #ifdef CONFIG_X86_ANCIENT_MCE | ||
11 | void intel_p5_mcheck_init(struct cpuinfo_x86 *c); | ||
12 | void winchip_mcheck_init(struct cpuinfo_x86 *c); | ||
13 | extern int mce_p5_enable; | ||
14 | static inline int mce_p5_enabled(void) { return mce_p5_enable; } | ||
15 | static inline void enable_p5_mce(void) { mce_p5_enable = 1; } | ||
16 | #else | ||
17 | static inline void intel_p5_mcheck_init(struct cpuinfo_x86 *c) {} | ||
18 | static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} | ||
19 | static inline int mce_p5_enabled(void) { return 0; } | ||
20 | static inline void enable_p5_mce(void) { } | ||
21 | #endif | ||
22 | |||
23 | /* Call the installed machine check handler for this CPU setup. */ | ||
24 | extern void (*machine_check_vector)(struct pt_regs *, long error_code); | ||
25 | |||
26 | #ifdef CONFIG_X86_OLD_MCE | ||
27 | |||
28 | extern int nr_mce_banks; | ||
29 | |||
30 | void intel_set_thermal_handler(void); | ||
31 | |||
32 | #else | ||
33 | |||
34 | static inline void intel_set_thermal_handler(void) { } | ||
35 | |||
36 | #endif | ||
37 | |||
38 | void intel_init_thermal(struct cpuinfo_x86 *c); | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index ddae21620bda..ddae21620bda 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index 61e32881f41b..e1acec0f7a32 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -1,80 +1,226 @@ | |||
1 | /* | 1 | /* |
2 | * Common code for Intel machine checks | 2 | * Intel specific MCE features. |
3 | * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca> | ||
4 | * Copyright (C) 2008, 2009 Intel Corporation | ||
5 | * Author: Andi Kleen | ||
3 | */ | 6 | */ |
4 | #include <linux/interrupt.h> | ||
5 | #include <linux/kernel.h> | ||
6 | #include <linux/types.h> | ||
7 | #include <linux/init.h> | ||
8 | #include <linux/smp.h> | ||
9 | 7 | ||
10 | #include <asm/therm_throt.h> | 8 | #include <linux/init.h> |
11 | #include <asm/processor.h> | 9 | #include <linux/interrupt.h> |
12 | #include <asm/system.h> | 10 | #include <linux/percpu.h> |
13 | #include <asm/apic.h> | 11 | #include <asm/apic.h> |
12 | #include <asm/processor.h> | ||
14 | #include <asm/msr.h> | 13 | #include <asm/msr.h> |
14 | #include <asm/mce.h> | ||
15 | |||
16 | /* | ||
17 | * Support for Intel Correct Machine Check Interrupts. This allows | ||
18 | * the CPU to raise an interrupt when a corrected machine check happened. | ||
19 | * Normally we pick those up using a regular polling timer. | ||
20 | * Also supports reliable discovery of shared banks. | ||
21 | */ | ||
15 | 22 | ||
16 | #include "mce.h" | 23 | static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); |
17 | 24 | ||
18 | void intel_init_thermal(struct cpuinfo_x86 *c) | 25 | /* |
26 | * cmci_discover_lock protects against parallel discovery attempts | ||
27 | * which could race against each other. | ||
28 | */ | ||
29 | static DEFINE_SPINLOCK(cmci_discover_lock); | ||
30 | |||
31 | #define CMCI_THRESHOLD 1 | ||
32 | |||
33 | static int cmci_supported(int *banks) | ||
19 | { | 34 | { |
20 | unsigned int cpu = smp_processor_id(); | 35 | u64 cap; |
21 | int tm2 = 0; | 36 | |
22 | u32 l, h; | 37 | if (mce_cmci_disabled || mce_ignore_ce) |
38 | return 0; | ||
23 | 39 | ||
24 | /* | 40 | /* |
25 | * Thermal monitoring depends on ACPI, clock modulation | 41 | * Vendor check is not strictly needed, but the initial |
26 | * and APIC as well | 42 | * initialization is vendor keyed and this |
43 | * makes sure none of the backdoors are entered otherwise. | ||
27 | */ | 44 | */ |
28 | if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC) || | 45 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) |
29 | !cpu_has(c, X86_FEATURE_APIC)) { | 46 | return 0; |
30 | pr_debug("Thermal monitoring disabled\n"); | 47 | if (!cpu_has_apic || lapic_get_maxlvt() < 6) |
31 | return; | 48 | return 0; |
49 | rdmsrl(MSR_IA32_MCG_CAP, cap); | ||
50 | *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); | ||
51 | return !!(cap & MCG_CMCI_P); | ||
52 | } | ||
53 | |||
54 | /* | ||
55 | * The interrupt handler. This is called on every event. | ||
56 | * Just call the poller directly to log any events. | ||
57 | * This could in theory increase the threshold under high load, | ||
58 | * but doesn't for now. | ||
59 | */ | ||
60 | static void intel_threshold_interrupt(void) | ||
61 | { | ||
62 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | ||
63 | mce_notify_irq(); | ||
64 | } | ||
65 | |||
66 | static void print_update(char *type, int *hdr, int num) | ||
67 | { | ||
68 | if (*hdr == 0) | ||
69 | printk(KERN_INFO "CPU %d MCA banks", smp_processor_id()); | ||
70 | *hdr = 1; | ||
71 | printk(KERN_CONT " %s:%d", type, num); | ||
72 | } | ||
73 | |||
74 | /* | ||
75 | * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks | ||
76 | * on this CPU. Use the algorithm recommended in the SDM to discover shared | ||
77 | * banks. | ||
78 | */ | ||
79 | static void cmci_discover(int banks, int boot) | ||
80 | { | ||
81 | unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); | ||
82 | unsigned long flags; | ||
83 | int hdr = 0; | ||
84 | int i; | ||
85 | |||
86 | spin_lock_irqsave(&cmci_discover_lock, flags); | ||
87 | for (i = 0; i < banks; i++) { | ||
88 | u64 val; | ||
89 | |||
90 | if (test_bit(i, owned)) | ||
91 | continue; | ||
92 | |||
93 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
94 | |||
95 | /* Already owned by someone else? */ | ||
96 | if (val & CMCI_EN) { | ||
97 | if (test_and_clear_bit(i, owned) || boot) | ||
98 | print_update("SHD", &hdr, i); | ||
99 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | ||
100 | continue; | ||
101 | } | ||
102 | |||
103 | val |= CMCI_EN | CMCI_THRESHOLD; | ||
104 | wrmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
105 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
106 | |||
107 | /* Did the enable bit stick? -- the bank supports CMCI */ | ||
108 | if (val & CMCI_EN) { | ||
109 | if (!test_and_set_bit(i, owned) || boot) | ||
110 | print_update("CMCI", &hdr, i); | ||
111 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | ||
112 | } else { | ||
113 | WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); | ||
114 | } | ||
32 | } | 115 | } |
116 | spin_unlock_irqrestore(&cmci_discover_lock, flags); | ||
117 | if (hdr) | ||
118 | printk(KERN_CONT "\n"); | ||
119 | } | ||
33 | 120 | ||
34 | /* | 121 | /* |
35 | * First check if its enabled already, in which case there might | 122 | * Just in case we missed an event during initialization check |
36 | * be some SMM goo which handles it, so we can't even put a handler | 123 | * all the CMCI owned banks. |
37 | * since it might be delivered via SMI already: | 124 | */ |
38 | */ | 125 | void cmci_recheck(void) |
39 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | 126 | { |
40 | h = apic_read(APIC_LVTTHMR); | 127 | unsigned long flags; |
41 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { | 128 | int banks; |
42 | printk(KERN_DEBUG | 129 | |
43 | "CPU%d: Thermal monitoring handled by SMI\n", cpu); | 130 | if (!mce_available(¤t_cpu_data) || !cmci_supported(&banks)) |
44 | return; | 131 | return; |
45 | } | 132 | local_irq_save(flags); |
133 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | ||
134 | local_irq_restore(flags); | ||
135 | } | ||
46 | 136 | ||
47 | if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) | 137 | /* |
48 | tm2 = 1; | 138 | * Disable CMCI on this CPU for all banks it owns when it goes down. |
139 | * This allows other CPUs to claim the banks on rediscovery. | ||
140 | */ | ||
141 | void cmci_clear(void) | ||
142 | { | ||
143 | unsigned long flags; | ||
144 | int i; | ||
145 | int banks; | ||
146 | u64 val; | ||
49 | 147 | ||
50 | /* Check whether a vector already exists */ | 148 | if (!cmci_supported(&banks)) |
51 | if (h & APIC_VECTOR_MASK) { | ||
52 | printk(KERN_DEBUG | ||
53 | "CPU%d: Thermal LVT vector (%#x) already installed\n", | ||
54 | cpu, (h & APIC_VECTOR_MASK)); | ||
55 | return; | 149 | return; |
150 | spin_lock_irqsave(&cmci_discover_lock, flags); | ||
151 | for (i = 0; i < banks; i++) { | ||
152 | if (!test_bit(i, __get_cpu_var(mce_banks_owned))) | ||
153 | continue; | ||
154 | /* Disable CMCI */ | ||
155 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
156 | val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); | ||
157 | wrmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
158 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); | ||
56 | } | 159 | } |
160 | spin_unlock_irqrestore(&cmci_discover_lock, flags); | ||
161 | } | ||
57 | 162 | ||
58 | /* We'll mask the thermal vector in the lapic till we're ready: */ | 163 | /* |
59 | h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; | 164 | * After a CPU went down cycle through all the others and rediscover |
60 | apic_write(APIC_LVTTHMR, h); | 165 | * Must run in process context. |
166 | */ | ||
167 | void cmci_rediscover(int dying) | ||
168 | { | ||
169 | int banks; | ||
170 | int cpu; | ||
171 | cpumask_var_t old; | ||
61 | 172 | ||
62 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); | 173 | if (!cmci_supported(&banks)) |
63 | wrmsr(MSR_IA32_THERM_INTERRUPT, | 174 | return; |
64 | l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); | 175 | if (!alloc_cpumask_var(&old, GFP_KERNEL)) |
176 | return; | ||
177 | cpumask_copy(old, ¤t->cpus_allowed); | ||
178 | |||
179 | for_each_online_cpu(cpu) { | ||
180 | if (cpu == dying) | ||
181 | continue; | ||
182 | if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) | ||
183 | continue; | ||
184 | /* Recheck banks in case CPUs don't all have the same */ | ||
185 | if (cmci_supported(&banks)) | ||
186 | cmci_discover(banks, 0); | ||
187 | } | ||
65 | 188 | ||
66 | intel_set_thermal_handler(); | 189 | set_cpus_allowed_ptr(current, old); |
190 | free_cpumask_var(old); | ||
191 | } | ||
192 | |||
193 | /* | ||
194 | * Reenable CMCI on this CPU in case a CPU down failed. | ||
195 | */ | ||
196 | void cmci_reenable(void) | ||
197 | { | ||
198 | int banks; | ||
199 | if (cmci_supported(&banks)) | ||
200 | cmci_discover(banks, 0); | ||
201 | } | ||
67 | 202 | ||
68 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | 203 | static void intel_init_cmci(void) |
69 | wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); | 204 | { |
205 | int banks; | ||
70 | 206 | ||
71 | /* Unmask the thermal vector: */ | 207 | if (!cmci_supported(&banks)) |
72 | l = apic_read(APIC_LVTTHMR); | 208 | return; |
73 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | ||
74 | 209 | ||
75 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", | 210 | mce_threshold_vector = intel_threshold_interrupt; |
76 | cpu, tm2 ? "TM2" : "TM1"); | 211 | cmci_discover(banks, 1); |
212 | /* | ||
213 | * For CPU #0 this runs with still disabled APIC, but that's | ||
214 | * ok because only the vector is set up. We still do another | ||
215 | * check for the banks later for CPU #0 just to make sure | ||
216 | * to not miss any events. | ||
217 | */ | ||
218 | apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED); | ||
219 | cmci_recheck(); | ||
220 | } | ||
77 | 221 | ||
78 | /* enable thermal throttle processing */ | 222 | void mce_intel_feature_init(struct cpuinfo_x86 *c) |
79 | atomic_set(&therm_throt_en, 1); | 223 | { |
224 | intel_init_thermal(c); | ||
225 | intel_init_cmci(); | ||
80 | } | 226 | } |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c deleted file mode 100644 index f2ef6952c400..000000000000 --- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c +++ /dev/null | |||
@@ -1,248 +0,0 @@ | |||
1 | /* | ||
2 | * Intel specific MCE features. | ||
3 | * Copyright 2004 Zwane Mwaikambo <zwane@linuxpower.ca> | ||
4 | * Copyright (C) 2008, 2009 Intel Corporation | ||
5 | * Author: Andi Kleen | ||
6 | */ | ||
7 | |||
8 | #include <linux/init.h> | ||
9 | #include <linux/interrupt.h> | ||
10 | #include <linux/percpu.h> | ||
11 | #include <asm/processor.h> | ||
12 | #include <asm/apic.h> | ||
13 | #include <asm/msr.h> | ||
14 | #include <asm/mce.h> | ||
15 | #include <asm/hw_irq.h> | ||
16 | #include <asm/idle.h> | ||
17 | #include <asm/therm_throt.h> | ||
18 | |||
19 | #include "mce.h" | ||
20 | |||
21 | asmlinkage void smp_thermal_interrupt(void) | ||
22 | { | ||
23 | __u64 msr_val; | ||
24 | |||
25 | ack_APIC_irq(); | ||
26 | |||
27 | exit_idle(); | ||
28 | irq_enter(); | ||
29 | |||
30 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | ||
31 | if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT)) | ||
32 | mce_log_therm_throt_event(msr_val); | ||
33 | |||
34 | inc_irq_stat(irq_thermal_count); | ||
35 | irq_exit(); | ||
36 | } | ||
37 | |||
38 | /* | ||
39 | * Support for Intel Correct Machine Check Interrupts. This allows | ||
40 | * the CPU to raise an interrupt when a corrected machine check happened. | ||
41 | * Normally we pick those up using a regular polling timer. | ||
42 | * Also supports reliable discovery of shared banks. | ||
43 | */ | ||
44 | |||
45 | static DEFINE_PER_CPU(mce_banks_t, mce_banks_owned); | ||
46 | |||
47 | /* | ||
48 | * cmci_discover_lock protects against parallel discovery attempts | ||
49 | * which could race against each other. | ||
50 | */ | ||
51 | static DEFINE_SPINLOCK(cmci_discover_lock); | ||
52 | |||
53 | #define CMCI_THRESHOLD 1 | ||
54 | |||
55 | static int cmci_supported(int *banks) | ||
56 | { | ||
57 | u64 cap; | ||
58 | |||
59 | if (mce_cmci_disabled || mce_ignore_ce) | ||
60 | return 0; | ||
61 | |||
62 | /* | ||
63 | * Vendor check is not strictly needed, but the initial | ||
64 | * initialization is vendor keyed and this | ||
65 | * makes sure none of the backdoors are entered otherwise. | ||
66 | */ | ||
67 | if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) | ||
68 | return 0; | ||
69 | if (!cpu_has_apic || lapic_get_maxlvt() < 6) | ||
70 | return 0; | ||
71 | rdmsrl(MSR_IA32_MCG_CAP, cap); | ||
72 | *banks = min_t(unsigned, MAX_NR_BANKS, cap & 0xff); | ||
73 | return !!(cap & MCG_CMCI_P); | ||
74 | } | ||
75 | |||
76 | /* | ||
77 | * The interrupt handler. This is called on every event. | ||
78 | * Just call the poller directly to log any events. | ||
79 | * This could in theory increase the threshold under high load, | ||
80 | * but doesn't for now. | ||
81 | */ | ||
82 | static void intel_threshold_interrupt(void) | ||
83 | { | ||
84 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | ||
85 | mce_notify_irq(); | ||
86 | } | ||
87 | |||
88 | static void print_update(char *type, int *hdr, int num) | ||
89 | { | ||
90 | if (*hdr == 0) | ||
91 | printk(KERN_INFO "CPU %d MCA banks", smp_processor_id()); | ||
92 | *hdr = 1; | ||
93 | printk(KERN_CONT " %s:%d", type, num); | ||
94 | } | ||
95 | |||
96 | /* | ||
97 | * Enable CMCI (Corrected Machine Check Interrupt) for available MCE banks | ||
98 | * on this CPU. Use the algorithm recommended in the SDM to discover shared | ||
99 | * banks. | ||
100 | */ | ||
101 | static void cmci_discover(int banks, int boot) | ||
102 | { | ||
103 | unsigned long *owned = (void *)&__get_cpu_var(mce_banks_owned); | ||
104 | unsigned long flags; | ||
105 | int hdr = 0; | ||
106 | int i; | ||
107 | |||
108 | spin_lock_irqsave(&cmci_discover_lock, flags); | ||
109 | for (i = 0; i < banks; i++) { | ||
110 | u64 val; | ||
111 | |||
112 | if (test_bit(i, owned)) | ||
113 | continue; | ||
114 | |||
115 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
116 | |||
117 | /* Already owned by someone else? */ | ||
118 | if (val & CMCI_EN) { | ||
119 | if (test_and_clear_bit(i, owned) || boot) | ||
120 | print_update("SHD", &hdr, i); | ||
121 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | ||
122 | continue; | ||
123 | } | ||
124 | |||
125 | val |= CMCI_EN | CMCI_THRESHOLD; | ||
126 | wrmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
127 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
128 | |||
129 | /* Did the enable bit stick? -- the bank supports CMCI */ | ||
130 | if (val & CMCI_EN) { | ||
131 | if (!test_and_set_bit(i, owned) || boot) | ||
132 | print_update("CMCI", &hdr, i); | ||
133 | __clear_bit(i, __get_cpu_var(mce_poll_banks)); | ||
134 | } else { | ||
135 | WARN_ON(!test_bit(i, __get_cpu_var(mce_poll_banks))); | ||
136 | } | ||
137 | } | ||
138 | spin_unlock_irqrestore(&cmci_discover_lock, flags); | ||
139 | if (hdr) | ||
140 | printk(KERN_CONT "\n"); | ||
141 | } | ||
142 | |||
143 | /* | ||
144 | * Just in case we missed an event during initialization check | ||
145 | * all the CMCI owned banks. | ||
146 | */ | ||
147 | void cmci_recheck(void) | ||
148 | { | ||
149 | unsigned long flags; | ||
150 | int banks; | ||
151 | |||
152 | if (!mce_available(¤t_cpu_data) || !cmci_supported(&banks)) | ||
153 | return; | ||
154 | local_irq_save(flags); | ||
155 | machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); | ||
156 | local_irq_restore(flags); | ||
157 | } | ||
158 | |||
159 | /* | ||
160 | * Disable CMCI on this CPU for all banks it owns when it goes down. | ||
161 | * This allows other CPUs to claim the banks on rediscovery. | ||
162 | */ | ||
163 | void cmci_clear(void) | ||
164 | { | ||
165 | unsigned long flags; | ||
166 | int i; | ||
167 | int banks; | ||
168 | u64 val; | ||
169 | |||
170 | if (!cmci_supported(&banks)) | ||
171 | return; | ||
172 | spin_lock_irqsave(&cmci_discover_lock, flags); | ||
173 | for (i = 0; i < banks; i++) { | ||
174 | if (!test_bit(i, __get_cpu_var(mce_banks_owned))) | ||
175 | continue; | ||
176 | /* Disable CMCI */ | ||
177 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
178 | val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); | ||
179 | wrmsrl(MSR_IA32_MC0_CTL2 + i, val); | ||
180 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); | ||
181 | } | ||
182 | spin_unlock_irqrestore(&cmci_discover_lock, flags); | ||
183 | } | ||
184 | |||
185 | /* | ||
186 | * After a CPU went down cycle through all the others and rediscover | ||
187 | * Must run in process context. | ||
188 | */ | ||
189 | void cmci_rediscover(int dying) | ||
190 | { | ||
191 | int banks; | ||
192 | int cpu; | ||
193 | cpumask_var_t old; | ||
194 | |||
195 | if (!cmci_supported(&banks)) | ||
196 | return; | ||
197 | if (!alloc_cpumask_var(&old, GFP_KERNEL)) | ||
198 | return; | ||
199 | cpumask_copy(old, ¤t->cpus_allowed); | ||
200 | |||
201 | for_each_online_cpu(cpu) { | ||
202 | if (cpu == dying) | ||
203 | continue; | ||
204 | if (set_cpus_allowed_ptr(current, cpumask_of(cpu))) | ||
205 | continue; | ||
206 | /* Recheck banks in case CPUs don't all have the same */ | ||
207 | if (cmci_supported(&banks)) | ||
208 | cmci_discover(banks, 0); | ||
209 | } | ||
210 | |||
211 | set_cpus_allowed_ptr(current, old); | ||
212 | free_cpumask_var(old); | ||
213 | } | ||
214 | |||
215 | /* | ||
216 | * Reenable CMCI on this CPU in case a CPU down failed. | ||
217 | */ | ||
218 | void cmci_reenable(void) | ||
219 | { | ||
220 | int banks; | ||
221 | if (cmci_supported(&banks)) | ||
222 | cmci_discover(banks, 0); | ||
223 | } | ||
224 | |||
225 | static void intel_init_cmci(void) | ||
226 | { | ||
227 | int banks; | ||
228 | |||
229 | if (!cmci_supported(&banks)) | ||
230 | return; | ||
231 | |||
232 | mce_threshold_vector = intel_threshold_interrupt; | ||
233 | cmci_discover(banks, 1); | ||
234 | /* | ||
235 | * For CPU #0 this runs with still disabled APIC, but that's | ||
236 | * ok because only the vector is set up. We still do another | ||
237 | * check for the banks later for CPU #0 just to make sure | ||
238 | * to not miss any events. | ||
239 | */ | ||
240 | apic_write(APIC_LVTCMCI, THRESHOLD_APIC_VECTOR|APIC_DM_FIXED); | ||
241 | cmci_recheck(); | ||
242 | } | ||
243 | |||
244 | void mce_intel_feature_init(struct cpuinfo_x86 *c) | ||
245 | { | ||
246 | intel_init_thermal(c); | ||
247 | intel_init_cmci(); | ||
248 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c index 70b710420f74..f5f2d6f71fb6 100644 --- a/arch/x86/kernel/cpu/mcheck/non-fatal.c +++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c | |||
@@ -17,10 +17,9 @@ | |||
17 | 17 | ||
18 | #include <asm/processor.h> | 18 | #include <asm/processor.h> |
19 | #include <asm/system.h> | 19 | #include <asm/system.h> |
20 | #include <asm/mce.h> | ||
20 | #include <asm/msr.h> | 21 | #include <asm/msr.h> |
21 | 22 | ||
22 | #include "mce.h" | ||
23 | |||
24 | static int firstbank; | 23 | static int firstbank; |
25 | 24 | ||
26 | #define MCE_RATE (15*HZ) /* timer rate is 15s */ | 25 | #define MCE_RATE (15*HZ) /* timer rate is 15s */ |
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c index 82cee108a2d3..4482aea9aa2e 100644 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ b/arch/x86/kernel/cpu/mcheck/p4.c | |||
@@ -1,21 +1,15 @@ | |||
1 | /* | 1 | /* |
2 | * P4 specific Machine Check Exception Reporting | 2 | * P4 specific Machine Check Exception Reporting |
3 | */ | 3 | */ |
4 | |||
5 | #include <linux/interrupt.h> | ||
6 | #include <linux/kernel.h> | 4 | #include <linux/kernel.h> |
7 | #include <linux/types.h> | 5 | #include <linux/types.h> |
8 | #include <linux/init.h> | 6 | #include <linux/init.h> |
9 | #include <linux/smp.h> | 7 | #include <linux/smp.h> |
10 | 8 | ||
11 | #include <asm/therm_throt.h> | ||
12 | #include <asm/processor.h> | 9 | #include <asm/processor.h> |
13 | #include <asm/system.h> | 10 | #include <asm/mce.h> |
14 | #include <asm/apic.h> | ||
15 | #include <asm/msr.h> | 11 | #include <asm/msr.h> |
16 | 12 | ||
17 | #include "mce.h" | ||
18 | |||
19 | /* as supported by the P4/Xeon family */ | 13 | /* as supported by the P4/Xeon family */ |
20 | struct intel_mce_extended_msrs { | 14 | struct intel_mce_extended_msrs { |
21 | u32 eax; | 15 | u32 eax; |
@@ -33,46 +27,6 @@ struct intel_mce_extended_msrs { | |||
33 | 27 | ||
34 | static int mce_num_extended_msrs; | 28 | static int mce_num_extended_msrs; |
35 | 29 | ||
36 | |||
37 | #ifdef CONFIG_X86_MCE_P4THERMAL | ||
38 | |||
39 | static void unexpected_thermal_interrupt(struct pt_regs *regs) | ||
40 | { | ||
41 | printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", | ||
42 | smp_processor_id()); | ||
43 | add_taint(TAINT_MACHINE_CHECK); | ||
44 | } | ||
45 | |||
46 | /* P4/Xeon Thermal transition interrupt handler: */ | ||
47 | static void intel_thermal_interrupt(struct pt_regs *regs) | ||
48 | { | ||
49 | __u64 msr_val; | ||
50 | |||
51 | ack_APIC_irq(); | ||
52 | |||
53 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | ||
54 | therm_throt_process(msr_val & THERM_STATUS_PROCHOT); | ||
55 | } | ||
56 | |||
57 | /* Thermal interrupt handler for this CPU setup: */ | ||
58 | static void (*vendor_thermal_interrupt)(struct pt_regs *regs) = | ||
59 | unexpected_thermal_interrupt; | ||
60 | |||
61 | void smp_thermal_interrupt(struct pt_regs *regs) | ||
62 | { | ||
63 | irq_enter(); | ||
64 | vendor_thermal_interrupt(regs); | ||
65 | __get_cpu_var(irq_stat).irq_thermal_count++; | ||
66 | irq_exit(); | ||
67 | } | ||
68 | |||
69 | void intel_set_thermal_handler(void) | ||
70 | { | ||
71 | vendor_thermal_interrupt = intel_thermal_interrupt; | ||
72 | } | ||
73 | |||
74 | #endif /* CONFIG_X86_MCE_P4THERMAL */ | ||
75 | |||
76 | /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ | 30 | /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ |
77 | static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) | 31 | static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) |
78 | { | 32 | { |
diff --git a/arch/x86/kernel/cpu/mcheck/p5.c b/arch/x86/kernel/cpu/mcheck/p5.c index 015f481ab1b0..5c0e6533d9bc 100644 --- a/arch/x86/kernel/cpu/mcheck/p5.c +++ b/arch/x86/kernel/cpu/mcheck/p5.c | |||
@@ -10,12 +10,11 @@ | |||
10 | 10 | ||
11 | #include <asm/processor.h> | 11 | #include <asm/processor.h> |
12 | #include <asm/system.h> | 12 | #include <asm/system.h> |
13 | #include <asm/mce.h> | ||
13 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
14 | 15 | ||
15 | #include "mce.h" | ||
16 | |||
17 | /* By default disabled */ | 16 | /* By default disabled */ |
18 | int mce_p5_enable; | 17 | int mce_p5_enabled __read_mostly; |
19 | 18 | ||
20 | /* Machine check handler for Pentium class Intel CPUs: */ | 19 | /* Machine check handler for Pentium class Intel CPUs: */ |
21 | static void pentium_machine_check(struct pt_regs *regs, long error_code) | 20 | static void pentium_machine_check(struct pt_regs *regs, long error_code) |
@@ -43,15 +42,13 @@ void intel_p5_mcheck_init(struct cpuinfo_x86 *c) | |||
43 | { | 42 | { |
44 | u32 l, h; | 43 | u32 l, h; |
45 | 44 | ||
46 | /* Check for MCE support: */ | 45 | /* Default P5 to off as its often misconnected: */ |
47 | if (!cpu_has(c, X86_FEATURE_MCE)) | 46 | if (!mce_p5_enabled) |
48 | return; | 47 | return; |
49 | 48 | ||
50 | #ifdef CONFIG_X86_OLD_MCE | 49 | /* Check for MCE support: */ |
51 | /* Default P5 to off as its often misconnected: */ | 50 | if (!cpu_has(c, X86_FEATURE_MCE)) |
52 | if (mce_disabled != -1) | ||
53 | return; | 51 | return; |
54 | #endif | ||
55 | 52 | ||
56 | machine_check_vector = pentium_machine_check; | 53 | machine_check_vector = pentium_machine_check; |
57 | /* Make sure the vector pointer is visible before we enable MCEs: */ | 54 | /* Make sure the vector pointer is visible before we enable MCEs: */ |
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c index 43c24e667457..01e4f8178183 100644 --- a/arch/x86/kernel/cpu/mcheck/p6.c +++ b/arch/x86/kernel/cpu/mcheck/p6.c | |||
@@ -10,10 +10,9 @@ | |||
10 | 10 | ||
11 | #include <asm/processor.h> | 11 | #include <asm/processor.h> |
12 | #include <asm/system.h> | 12 | #include <asm/system.h> |
13 | #include <asm/mce.h> | ||
13 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
14 | 15 | ||
15 | #include "mce.h" | ||
16 | |||
17 | /* Machine Check Handler For PII/PIII */ | 16 | /* Machine Check Handler For PII/PIII */ |
18 | static void intel_machine_check(struct pt_regs *regs, long error_code) | 17 | static void intel_machine_check(struct pt_regs *regs, long error_code) |
19 | { | 18 | { |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 7b1ae2e20ba5..bff8dd191dd5 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -13,13 +13,23 @@ | |||
13 | * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. | 13 | * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. |
14 | * Inspired by Ross Biro's and Al Borchers' counter code. | 14 | * Inspired by Ross Biro's and Al Borchers' counter code. |
15 | */ | 15 | */ |
16 | #include <linux/interrupt.h> | ||
16 | #include <linux/notifier.h> | 17 | #include <linux/notifier.h> |
17 | #include <linux/jiffies.h> | 18 | #include <linux/jiffies.h> |
19 | #include <linux/kernel.h> | ||
18 | #include <linux/percpu.h> | 20 | #include <linux/percpu.h> |
19 | #include <linux/sysdev.h> | 21 | #include <linux/sysdev.h> |
22 | #include <linux/types.h> | ||
23 | #include <linux/init.h> | ||
24 | #include <linux/smp.h> | ||
20 | #include <linux/cpu.h> | 25 | #include <linux/cpu.h> |
21 | 26 | ||
22 | #include <asm/therm_throt.h> | 27 | #include <asm/processor.h> |
28 | #include <asm/system.h> | ||
29 | #include <asm/apic.h> | ||
30 | #include <asm/idle.h> | ||
31 | #include <asm/mce.h> | ||
32 | #include <asm/msr.h> | ||
23 | 33 | ||
24 | /* How long to wait between reporting thermal events */ | 34 | /* How long to wait between reporting thermal events */ |
25 | #define CHECK_INTERVAL (300 * HZ) | 35 | #define CHECK_INTERVAL (300 * HZ) |
@@ -27,7 +37,7 @@ | |||
27 | static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; | 37 | static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; |
28 | static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); | 38 | static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); |
29 | 39 | ||
30 | atomic_t therm_throt_en = ATOMIC_INIT(0); | 40 | static atomic_t therm_throt_en = ATOMIC_INIT(0); |
31 | 41 | ||
32 | #ifdef CONFIG_SYSFS | 42 | #ifdef CONFIG_SYSFS |
33 | #define define_therm_throt_sysdev_one_ro(_name) \ | 43 | #define define_therm_throt_sysdev_one_ro(_name) \ |
@@ -82,7 +92,7 @@ static struct attribute_group thermal_throttle_attr_group = { | |||
82 | * 1 : Event should be logged further, and a message has been | 92 | * 1 : Event should be logged further, and a message has been |
83 | * printed to the syslog. | 93 | * printed to the syslog. |
84 | */ | 94 | */ |
85 | int therm_throt_process(int curr) | 95 | static int therm_throt_process(int curr) |
86 | { | 96 | { |
87 | unsigned int cpu = smp_processor_id(); | 97 | unsigned int cpu = smp_processor_id(); |
88 | __u64 tmp_jiffs = get_jiffies_64(); | 98 | __u64 tmp_jiffs = get_jiffies_64(); |
@@ -186,6 +196,94 @@ static __init int thermal_throttle_init_device(void) | |||
186 | 196 | ||
187 | return 0; | 197 | return 0; |
188 | } | 198 | } |
189 | |||
190 | device_initcall(thermal_throttle_init_device); | 199 | device_initcall(thermal_throttle_init_device); |
200 | |||
191 | #endif /* CONFIG_SYSFS */ | 201 | #endif /* CONFIG_SYSFS */ |
202 | |||
203 | /* Thermal transition interrupt handler */ | ||
204 | static void intel_thermal_interrupt(void) | ||
205 | { | ||
206 | __u64 msr_val; | ||
207 | |||
208 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | ||
209 | if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT)) | ||
210 | mce_log_therm_throt_event(msr_val); | ||
211 | } | ||
212 | |||
213 | static void unexpected_thermal_interrupt(void) | ||
214 | { | ||
215 | printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", | ||
216 | smp_processor_id()); | ||
217 | add_taint(TAINT_MACHINE_CHECK); | ||
218 | } | ||
219 | |||
220 | static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; | ||
221 | |||
222 | asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) | ||
223 | { | ||
224 | exit_idle(); | ||
225 | irq_enter(); | ||
226 | inc_irq_stat(irq_thermal_count); | ||
227 | smp_thermal_vector(); | ||
228 | irq_exit(); | ||
229 | /* Ack only at the end to avoid potential reentry */ | ||
230 | ack_APIC_irq(); | ||
231 | } | ||
232 | |||
233 | void intel_init_thermal(struct cpuinfo_x86 *c) | ||
234 | { | ||
235 | unsigned int cpu = smp_processor_id(); | ||
236 | int tm2 = 0; | ||
237 | u32 l, h; | ||
238 | |||
239 | /* Thermal monitoring depends on ACPI and clock modulation*/ | ||
240 | if (!cpu_has(c, X86_FEATURE_ACPI) || !cpu_has(c, X86_FEATURE_ACC)) | ||
241 | return; | ||
242 | |||
243 | /* | ||
244 | * First check if its enabled already, in which case there might | ||
245 | * be some SMM goo which handles it, so we can't even put a handler | ||
246 | * since it might be delivered via SMI already: | ||
247 | */ | ||
248 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
249 | h = apic_read(APIC_LVTTHMR); | ||
250 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { | ||
251 | printk(KERN_DEBUG | ||
252 | "CPU%d: Thermal monitoring handled by SMI\n", cpu); | ||
253 | return; | ||
254 | } | ||
255 | |||
256 | if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) | ||
257 | tm2 = 1; | ||
258 | |||
259 | /* Check whether a vector already exists */ | ||
260 | if (h & APIC_VECTOR_MASK) { | ||
261 | printk(KERN_DEBUG | ||
262 | "CPU%d: Thermal LVT vector (%#x) already installed\n", | ||
263 | cpu, (h & APIC_VECTOR_MASK)); | ||
264 | return; | ||
265 | } | ||
266 | |||
267 | /* We'll mask the thermal vector in the lapic till we're ready: */ | ||
268 | h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; | ||
269 | apic_write(APIC_LVTTHMR, h); | ||
270 | |||
271 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); | ||
272 | wrmsr(MSR_IA32_THERM_INTERRUPT, | ||
273 | l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); | ||
274 | |||
275 | smp_thermal_vector = intel_thermal_interrupt; | ||
276 | |||
277 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | ||
278 | wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h); | ||
279 | |||
280 | /* Unmask the thermal vector: */ | ||
281 | l = apic_read(APIC_LVTTHMR); | ||
282 | apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | ||
283 | |||
284 | printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", | ||
285 | cpu, tm2 ? "TM2" : "TM1"); | ||
286 | |||
287 | /* enable thermal throttle processing */ | ||
288 | atomic_set(&therm_throt_en, 1); | ||
289 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/winchip.c b/arch/x86/kernel/cpu/mcheck/winchip.c index 81b02487090b..54060f565974 100644 --- a/arch/x86/kernel/cpu/mcheck/winchip.c +++ b/arch/x86/kernel/cpu/mcheck/winchip.c | |||
@@ -9,10 +9,9 @@ | |||
9 | 9 | ||
10 | #include <asm/processor.h> | 10 | #include <asm/processor.h> |
11 | #include <asm/system.h> | 11 | #include <asm/system.h> |
12 | #include <asm/mce.h> | ||
12 | #include <asm/msr.h> | 13 | #include <asm/msr.h> |
13 | 14 | ||
14 | #include "mce.h" | ||
15 | |||
16 | /* Machine check handler for WinChip C6: */ | 15 | /* Machine check handler for WinChip C6: */ |
17 | static void winchip_machine_check(struct pt_regs *regs, long error_code) | 16 | static void winchip_machine_check(struct pt_regs *regs, long error_code) |
18 | { | 17 | { |
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 1e1e27b7d438..71f9c74814d8 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -53,6 +53,7 @@ | |||
53 | #include <asm/traps.h> | 53 | #include <asm/traps.h> |
54 | #include <asm/desc.h> | 54 | #include <asm/desc.h> |
55 | #include <asm/i387.h> | 55 | #include <asm/i387.h> |
56 | #include <asm/mce.h> | ||
56 | 57 | ||
57 | #include <asm/mach_traps.h> | 58 | #include <asm/mach_traps.h> |
58 | 59 | ||
@@ -64,8 +65,6 @@ | |||
64 | #include <asm/setup.h> | 65 | #include <asm/setup.h> |
65 | #include <asm/traps.h> | 66 | #include <asm/traps.h> |
66 | 67 | ||
67 | #include "cpu/mcheck/mce.h" | ||
68 | |||
69 | asmlinkage int system_call(void); | 68 | asmlinkage int system_call(void); |
70 | 69 | ||
71 | /* Do we ignore FPU interrupts ? */ | 70 | /* Do we ignore FPU interrupts ? */ |