diff options
-rw-r--r-- | arch/x86/include/asm/hardirq.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/mce.h | 14 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/common.c | 4 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 103 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/therm_throt.c | 29 | ||||
-rw-r--r-- | arch/x86/kernel/irq.c | 12 | ||||
-rw-r--r-- | arch/x86/kernel/setup.c | 3 | ||||
-rw-r--r-- | drivers/edac/edac_mce_amd.c | 21 | ||||
-rw-r--r-- | include/trace/events/mce.h | 69 |
9 files changed, 198 insertions, 63 deletions
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h index 82e3e8f01043..108eb6fd1ae7 100644 --- a/arch/x86/include/asm/hardirq.h +++ b/arch/x86/include/asm/hardirq.h | |||
@@ -20,11 +20,11 @@ typedef struct { | |||
20 | unsigned int irq_call_count; | 20 | unsigned int irq_call_count; |
21 | unsigned int irq_tlb_count; | 21 | unsigned int irq_tlb_count; |
22 | #endif | 22 | #endif |
23 | #ifdef CONFIG_X86_MCE | 23 | #ifdef CONFIG_X86_THERMAL_VECTOR |
24 | unsigned int irq_thermal_count; | 24 | unsigned int irq_thermal_count; |
25 | # ifdef CONFIG_X86_MCE_THRESHOLD | 25 | #endif |
26 | #ifdef CONFIG_X86_MCE_THRESHOLD | ||
26 | unsigned int irq_threshold_count; | 27 | unsigned int irq_threshold_count; |
27 | # endif | ||
28 | #endif | 28 | #endif |
29 | } ____cacheline_aligned irq_cpustat_t; | 29 | } ____cacheline_aligned irq_cpustat_t; |
30 | 30 | ||
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index f1363b72364f..858baa061cfc 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -108,6 +108,8 @@ struct mce_log { | |||
108 | #define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) | 108 | #define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9) |
109 | #define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) | 109 | #define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0) |
110 | 110 | ||
111 | extern struct atomic_notifier_head x86_mce_decoder_chain; | ||
112 | |||
111 | #ifdef __KERNEL__ | 113 | #ifdef __KERNEL__ |
112 | 114 | ||
113 | #include <linux/percpu.h> | 115 | #include <linux/percpu.h> |
@@ -118,9 +120,11 @@ extern int mce_disabled; | |||
118 | extern int mce_p5_enabled; | 120 | extern int mce_p5_enabled; |
119 | 121 | ||
120 | #ifdef CONFIG_X86_MCE | 122 | #ifdef CONFIG_X86_MCE |
121 | void mcheck_init(struct cpuinfo_x86 *c); | 123 | int mcheck_init(void); |
124 | void mcheck_cpu_init(struct cpuinfo_x86 *c); | ||
122 | #else | 125 | #else |
123 | static inline void mcheck_init(struct cpuinfo_x86 *c) {} | 126 | static inline int mcheck_init(void) { return 0; } |
127 | static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {} | ||
124 | #endif | 128 | #endif |
125 | 129 | ||
126 | #ifdef CONFIG_X86_ANCIENT_MCE | 130 | #ifdef CONFIG_X86_ANCIENT_MCE |
@@ -214,5 +218,11 @@ void intel_init_thermal(struct cpuinfo_x86 *c); | |||
214 | 218 | ||
215 | void mce_log_therm_throt_event(__u64 status); | 219 | void mce_log_therm_throt_event(__u64 status); |
216 | 220 | ||
221 | #ifdef CONFIG_X86_THERMAL_VECTOR | ||
222 | extern void mcheck_intel_therm_init(void); | ||
223 | #else | ||
224 | static inline void mcheck_intel_therm_init(void) { } | ||
225 | #endif | ||
226 | |||
217 | #endif /* __KERNEL__ */ | 227 | #endif /* __KERNEL__ */ |
218 | #endif /* _ASM_X86_MCE_H */ | 228 | #endif /* _ASM_X86_MCE_H */ |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index cc25c2b4a567..9053be5d95cd 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -837,10 +837,8 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
837 | boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; | 837 | boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; |
838 | } | 838 | } |
839 | 839 | ||
840 | #ifdef CONFIG_X86_MCE | ||
841 | /* Init Machine Check Exception if available. */ | 840 | /* Init Machine Check Exception if available. */ |
842 | mcheck_init(c); | 841 | mcheck_cpu_init(c); |
843 | #endif | ||
844 | 842 | ||
845 | select_idle_routine(c); | 843 | select_idle_routine(c); |
846 | 844 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 721a77ca8115..0bcaa3875863 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -46,6 +46,9 @@ | |||
46 | 46 | ||
47 | #include "mce-internal.h" | 47 | #include "mce-internal.h" |
48 | 48 | ||
49 | #define CREATE_TRACE_POINTS | ||
50 | #include <trace/events/mce.h> | ||
51 | |||
49 | int mce_disabled __read_mostly; | 52 | int mce_disabled __read_mostly; |
50 | 53 | ||
51 | #define MISC_MCELOG_MINOR 227 | 54 | #define MISC_MCELOG_MINOR 227 |
@@ -85,18 +88,26 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait); | |||
85 | static DEFINE_PER_CPU(struct mce, mces_seen); | 88 | static DEFINE_PER_CPU(struct mce, mces_seen); |
86 | static int cpu_missing; | 89 | static int cpu_missing; |
87 | 90 | ||
88 | static void default_decode_mce(struct mce *m) | 91 | /* |
92 | * CPU/chipset specific EDAC code can register a notifier call here to print | ||
93 | * MCE errors in a human-readable form. | ||
94 | */ | ||
95 | ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); | ||
96 | EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); | ||
97 | |||
98 | static int default_decode_mce(struct notifier_block *nb, unsigned long val, | ||
99 | void *data) | ||
89 | { | 100 | { |
90 | pr_emerg("No human readable MCE decoding support on this CPU type.\n"); | 101 | pr_emerg("No human readable MCE decoding support on this CPU type.\n"); |
91 | pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); | 102 | pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); |
103 | |||
104 | return NOTIFY_STOP; | ||
92 | } | 105 | } |
93 | 106 | ||
94 | /* | 107 | static struct notifier_block mce_dec_nb = { |
95 | * CPU/chipset specific EDAC code can register a callback here to print | 108 | .notifier_call = default_decode_mce, |
96 | * MCE errors in a human-readable form: | 109 | .priority = -1, |
97 | */ | 110 | }; |
98 | void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce; | ||
99 | EXPORT_SYMBOL(x86_mce_decode_callback); | ||
100 | 111 | ||
101 | /* MCA banks polled by the period polling timer for corrected events */ | 112 | /* MCA banks polled by the period polling timer for corrected events */ |
102 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | 113 | DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { |
@@ -141,6 +152,9 @@ void mce_log(struct mce *mce) | |||
141 | { | 152 | { |
142 | unsigned next, entry; | 153 | unsigned next, entry; |
143 | 154 | ||
155 | /* Emit the trace record: */ | ||
156 | trace_mce_record(mce); | ||
157 | |||
144 | mce->finished = 0; | 158 | mce->finished = 0; |
145 | wmb(); | 159 | wmb(); |
146 | for (;;) { | 160 | for (;;) { |
@@ -204,9 +218,9 @@ static void print_mce(struct mce *m) | |||
204 | 218 | ||
205 | /* | 219 | /* |
206 | * Print out human-readable details about the MCE error, | 220 | * Print out human-readable details about the MCE error, |
207 | * (if the CPU has an implementation for that): | 221 | * (if the CPU has an implementation for that) |
208 | */ | 222 | */ |
209 | x86_mce_decode_callback(m); | 223 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); |
210 | } | 224 | } |
211 | 225 | ||
212 | static void print_mce_head(void) | 226 | static void print_mce_head(void) |
@@ -1122,7 +1136,7 @@ static int check_interval = 5 * 60; /* 5 minutes */ | |||
1122 | static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ | 1136 | static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ |
1123 | static DEFINE_PER_CPU(struct timer_list, mce_timer); | 1137 | static DEFINE_PER_CPU(struct timer_list, mce_timer); |
1124 | 1138 | ||
1125 | static void mcheck_timer(unsigned long data) | 1139 | static void mce_start_timer(unsigned long data) |
1126 | { | 1140 | { |
1127 | struct timer_list *t = &per_cpu(mce_timer, data); | 1141 | struct timer_list *t = &per_cpu(mce_timer, data); |
1128 | int *n; | 1142 | int *n; |
@@ -1187,7 +1201,7 @@ int mce_notify_irq(void) | |||
1187 | } | 1201 | } |
1188 | EXPORT_SYMBOL_GPL(mce_notify_irq); | 1202 | EXPORT_SYMBOL_GPL(mce_notify_irq); |
1189 | 1203 | ||
1190 | static int mce_banks_init(void) | 1204 | static int __cpuinit __mcheck_cpu_mce_banks_init(void) |
1191 | { | 1205 | { |
1192 | int i; | 1206 | int i; |
1193 | 1207 | ||
@@ -1206,7 +1220,7 @@ static int mce_banks_init(void) | |||
1206 | /* | 1220 | /* |
1207 | * Initialize Machine Checks for a CPU. | 1221 | * Initialize Machine Checks for a CPU. |
1208 | */ | 1222 | */ |
1209 | static int __cpuinit mce_cap_init(void) | 1223 | static int __cpuinit __mcheck_cpu_cap_init(void) |
1210 | { | 1224 | { |
1211 | unsigned b; | 1225 | unsigned b; |
1212 | u64 cap; | 1226 | u64 cap; |
@@ -1228,7 +1242,7 @@ static int __cpuinit mce_cap_init(void) | |||
1228 | WARN_ON(banks != 0 && b != banks); | 1242 | WARN_ON(banks != 0 && b != banks); |
1229 | banks = b; | 1243 | banks = b; |
1230 | if (!mce_banks) { | 1244 | if (!mce_banks) { |
1231 | int err = mce_banks_init(); | 1245 | int err = __mcheck_cpu_mce_banks_init(); |
1232 | 1246 | ||
1233 | if (err) | 1247 | if (err) |
1234 | return err; | 1248 | return err; |
@@ -1244,7 +1258,7 @@ static int __cpuinit mce_cap_init(void) | |||
1244 | return 0; | 1258 | return 0; |
1245 | } | 1259 | } |
1246 | 1260 | ||
1247 | static void mce_init(void) | 1261 | static void __mcheck_cpu_init_generic(void) |
1248 | { | 1262 | { |
1249 | mce_banks_t all_banks; | 1263 | mce_banks_t all_banks; |
1250 | u64 cap; | 1264 | u64 cap; |
@@ -1273,7 +1287,7 @@ static void mce_init(void) | |||
1273 | } | 1287 | } |
1274 | 1288 | ||
1275 | /* Add per CPU specific workarounds here */ | 1289 | /* Add per CPU specific workarounds here */ |
1276 | static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) | 1290 | static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c) |
1277 | { | 1291 | { |
1278 | if (c->x86_vendor == X86_VENDOR_UNKNOWN) { | 1292 | if (c->x86_vendor == X86_VENDOR_UNKNOWN) { |
1279 | pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); | 1293 | pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); |
@@ -1341,7 +1355,7 @@ static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) | |||
1341 | return 0; | 1355 | return 0; |
1342 | } | 1356 | } |
1343 | 1357 | ||
1344 | static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) | 1358 | static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c) |
1345 | { | 1359 | { |
1346 | if (c->x86 != 5) | 1360 | if (c->x86 != 5) |
1347 | return; | 1361 | return; |
@@ -1355,7 +1369,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c) | |||
1355 | } | 1369 | } |
1356 | } | 1370 | } |
1357 | 1371 | ||
1358 | static void mce_cpu_features(struct cpuinfo_x86 *c) | 1372 | static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c) |
1359 | { | 1373 | { |
1360 | switch (c->x86_vendor) { | 1374 | switch (c->x86_vendor) { |
1361 | case X86_VENDOR_INTEL: | 1375 | case X86_VENDOR_INTEL: |
@@ -1369,7 +1383,7 @@ static void mce_cpu_features(struct cpuinfo_x86 *c) | |||
1369 | } | 1383 | } |
1370 | } | 1384 | } |
1371 | 1385 | ||
1372 | static void mce_init_timer(void) | 1386 | static void __mcheck_cpu_init_timer(void) |
1373 | { | 1387 | { |
1374 | struct timer_list *t = &__get_cpu_var(mce_timer); | 1388 | struct timer_list *t = &__get_cpu_var(mce_timer); |
1375 | int *n = &__get_cpu_var(mce_next_interval); | 1389 | int *n = &__get_cpu_var(mce_next_interval); |
@@ -1380,7 +1394,7 @@ static void mce_init_timer(void) | |||
1380 | *n = check_interval * HZ; | 1394 | *n = check_interval * HZ; |
1381 | if (!*n) | 1395 | if (!*n) |
1382 | return; | 1396 | return; |
1383 | setup_timer(t, mcheck_timer, smp_processor_id()); | 1397 | setup_timer(t, mce_start_timer, smp_processor_id()); |
1384 | t->expires = round_jiffies(jiffies + *n); | 1398 | t->expires = round_jiffies(jiffies + *n); |
1385 | add_timer_on(t, smp_processor_id()); | 1399 | add_timer_on(t, smp_processor_id()); |
1386 | } | 1400 | } |
@@ -1400,27 +1414,28 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) = | |||
1400 | * Called for each booted CPU to set up machine checks. | 1414 | * Called for each booted CPU to set up machine checks. |
1401 | * Must be called with preempt off: | 1415 | * Must be called with preempt off: |
1402 | */ | 1416 | */ |
1403 | void __cpuinit mcheck_init(struct cpuinfo_x86 *c) | 1417 | void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c) |
1404 | { | 1418 | { |
1405 | if (mce_disabled) | 1419 | if (mce_disabled) |
1406 | return; | 1420 | return; |
1407 | 1421 | ||
1408 | mce_ancient_init(c); | 1422 | __mcheck_cpu_ancient_init(c); |
1409 | 1423 | ||
1410 | if (!mce_available(c)) | 1424 | if (!mce_available(c)) |
1411 | return; | 1425 | return; |
1412 | 1426 | ||
1413 | if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) { | 1427 | if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) { |
1414 | mce_disabled = 1; | 1428 | mce_disabled = 1; |
1415 | return; | 1429 | return; |
1416 | } | 1430 | } |
1417 | 1431 | ||
1418 | machine_check_vector = do_machine_check; | 1432 | machine_check_vector = do_machine_check; |
1419 | 1433 | ||
1420 | mce_init(); | 1434 | __mcheck_cpu_init_generic(); |
1421 | mce_cpu_features(c); | 1435 | __mcheck_cpu_init_vendor(c); |
1422 | mce_init_timer(); | 1436 | __mcheck_cpu_init_timer(); |
1423 | INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); | 1437 | INIT_WORK(&__get_cpu_var(mce_work), mce_process_work); |
1438 | |||
1424 | } | 1439 | } |
1425 | 1440 | ||
1426 | /* | 1441 | /* |
@@ -1640,6 +1655,15 @@ static int __init mcheck_enable(char *str) | |||
1640 | } | 1655 | } |
1641 | __setup("mce", mcheck_enable); | 1656 | __setup("mce", mcheck_enable); |
1642 | 1657 | ||
1658 | int __init mcheck_init(void) | ||
1659 | { | ||
1660 | atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb); | ||
1661 | |||
1662 | mcheck_intel_therm_init(); | ||
1663 | |||
1664 | return 0; | ||
1665 | } | ||
1666 | |||
1643 | /* | 1667 | /* |
1644 | * Sysfs support | 1668 | * Sysfs support |
1645 | */ | 1669 | */ |
@@ -1648,7 +1672,7 @@ __setup("mce", mcheck_enable); | |||
1648 | * Disable machine checks on suspend and shutdown. We can't really handle | 1672 | * Disable machine checks on suspend and shutdown. We can't really handle |
1649 | * them later. | 1673 | * them later. |
1650 | */ | 1674 | */ |
1651 | static int mce_disable(void) | 1675 | static int mce_disable_error_reporting(void) |
1652 | { | 1676 | { |
1653 | int i; | 1677 | int i; |
1654 | 1678 | ||
@@ -1663,12 +1687,12 @@ static int mce_disable(void) | |||
1663 | 1687 | ||
1664 | static int mce_suspend(struct sys_device *dev, pm_message_t state) | 1688 | static int mce_suspend(struct sys_device *dev, pm_message_t state) |
1665 | { | 1689 | { |
1666 | return mce_disable(); | 1690 | return mce_disable_error_reporting(); |
1667 | } | 1691 | } |
1668 | 1692 | ||
1669 | static int mce_shutdown(struct sys_device *dev) | 1693 | static int mce_shutdown(struct sys_device *dev) |
1670 | { | 1694 | { |
1671 | return mce_disable(); | 1695 | return mce_disable_error_reporting(); |
1672 | } | 1696 | } |
1673 | 1697 | ||
1674 | /* | 1698 | /* |
@@ -1678,8 +1702,8 @@ static int mce_shutdown(struct sys_device *dev) | |||
1678 | */ | 1702 | */ |
1679 | static int mce_resume(struct sys_device *dev) | 1703 | static int mce_resume(struct sys_device *dev) |
1680 | { | 1704 | { |
1681 | mce_init(); | 1705 | __mcheck_cpu_init_generic(); |
1682 | mce_cpu_features(¤t_cpu_data); | 1706 | __mcheck_cpu_init_vendor(¤t_cpu_data); |
1683 | 1707 | ||
1684 | return 0; | 1708 | return 0; |
1685 | } | 1709 | } |
@@ -1689,8 +1713,8 @@ static void mce_cpu_restart(void *data) | |||
1689 | del_timer_sync(&__get_cpu_var(mce_timer)); | 1713 | del_timer_sync(&__get_cpu_var(mce_timer)); |
1690 | if (!mce_available(¤t_cpu_data)) | 1714 | if (!mce_available(¤t_cpu_data)) |
1691 | return; | 1715 | return; |
1692 | mce_init(); | 1716 | __mcheck_cpu_init_generic(); |
1693 | mce_init_timer(); | 1717 | __mcheck_cpu_init_timer(); |
1694 | } | 1718 | } |
1695 | 1719 | ||
1696 | /* Reinit MCEs after user configuration changes */ | 1720 | /* Reinit MCEs after user configuration changes */ |
@@ -1716,7 +1740,7 @@ static void mce_enable_ce(void *all) | |||
1716 | cmci_reenable(); | 1740 | cmci_reenable(); |
1717 | cmci_recheck(); | 1741 | cmci_recheck(); |
1718 | if (all) | 1742 | if (all) |
1719 | mce_init_timer(); | 1743 | __mcheck_cpu_init_timer(); |
1720 | } | 1744 | } |
1721 | 1745 | ||
1722 | static struct sysdev_class mce_sysclass = { | 1746 | static struct sysdev_class mce_sysclass = { |
@@ -1929,13 +1953,14 @@ static __cpuinit void mce_remove_device(unsigned int cpu) | |||
1929 | } | 1953 | } |
1930 | 1954 | ||
1931 | /* Make sure there are no machine checks on offlined CPUs. */ | 1955 | /* Make sure there are no machine checks on offlined CPUs. */ |
1932 | static void mce_disable_cpu(void *h) | 1956 | static void __cpuinit mce_disable_cpu(void *h) |
1933 | { | 1957 | { |
1934 | unsigned long action = *(unsigned long *)h; | 1958 | unsigned long action = *(unsigned long *)h; |
1935 | int i; | 1959 | int i; |
1936 | 1960 | ||
1937 | if (!mce_available(¤t_cpu_data)) | 1961 | if (!mce_available(¤t_cpu_data)) |
1938 | return; | 1962 | return; |
1963 | |||
1939 | if (!(action & CPU_TASKS_FROZEN)) | 1964 | if (!(action & CPU_TASKS_FROZEN)) |
1940 | cmci_clear(); | 1965 | cmci_clear(); |
1941 | for (i = 0; i < banks; i++) { | 1966 | for (i = 0; i < banks; i++) { |
@@ -1946,7 +1971,7 @@ static void mce_disable_cpu(void *h) | |||
1946 | } | 1971 | } |
1947 | } | 1972 | } |
1948 | 1973 | ||
1949 | static void mce_reenable_cpu(void *h) | 1974 | static void __cpuinit mce_reenable_cpu(void *h) |
1950 | { | 1975 | { |
1951 | unsigned long action = *(unsigned long *)h; | 1976 | unsigned long action = *(unsigned long *)h; |
1952 | int i; | 1977 | int i; |
@@ -2025,7 +2050,7 @@ static __init void mce_init_banks(void) | |||
2025 | } | 2050 | } |
2026 | } | 2051 | } |
2027 | 2052 | ||
2028 | static __init int mce_init_device(void) | 2053 | static __init int mcheck_init_device(void) |
2029 | { | 2054 | { |
2030 | int err; | 2055 | int err; |
2031 | int i = 0; | 2056 | int i = 0; |
@@ -2053,7 +2078,7 @@ static __init int mce_init_device(void) | |||
2053 | return err; | 2078 | return err; |
2054 | } | 2079 | } |
2055 | 2080 | ||
2056 | device_initcall(mce_init_device); | 2081 | device_initcall(mcheck_init_device); |
2057 | 2082 | ||
2058 | /* | 2083 | /* |
2059 | * Old style boot options parsing. Only for compatibility. | 2084 | * Old style boot options parsing. Only for compatibility. |
@@ -2101,7 +2126,7 @@ static int fake_panic_set(void *data, u64 val) | |||
2101 | DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get, | 2126 | DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get, |
2102 | fake_panic_set, "%llu\n"); | 2127 | fake_panic_set, "%llu\n"); |
2103 | 2128 | ||
2104 | static int __init mce_debugfs_init(void) | 2129 | static int __init mcheck_debugfs_init(void) |
2105 | { | 2130 | { |
2106 | struct dentry *dmce, *ffake_panic; | 2131 | struct dentry *dmce, *ffake_panic; |
2107 | 2132 | ||
@@ -2115,5 +2140,5 @@ static int __init mce_debugfs_init(void) | |||
2115 | 2140 | ||
2116 | return 0; | 2141 | return 0; |
2117 | } | 2142 | } |
2118 | late_initcall(mce_debugfs_init); | 2143 | late_initcall(mcheck_debugfs_init); |
2119 | #endif | 2144 | #endif |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index b3a1dba75330..4fef985fc221 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -49,6 +49,8 @@ static DEFINE_PER_CPU(struct thermal_state, thermal_state); | |||
49 | 49 | ||
50 | static atomic_t therm_throt_en = ATOMIC_INIT(0); | 50 | static atomic_t therm_throt_en = ATOMIC_INIT(0); |
51 | 51 | ||
52 | static u32 lvtthmr_init __read_mostly; | ||
53 | |||
52 | #ifdef CONFIG_SYSFS | 54 | #ifdef CONFIG_SYSFS |
53 | #define define_therm_throt_sysdev_one_ro(_name) \ | 55 | #define define_therm_throt_sysdev_one_ro(_name) \ |
54 | static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) | 56 | static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) |
@@ -254,6 +256,18 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs) | |||
254 | ack_APIC_irq(); | 256 | ack_APIC_irq(); |
255 | } | 257 | } |
256 | 258 | ||
259 | void __init mcheck_intel_therm_init(void) | ||
260 | { | ||
261 | /* | ||
262 | * This function is only called on boot CPU. Save the init thermal | ||
263 | * LVT value on BSP and use that value to restore APs' thermal LVT | ||
264 | * entry BIOS programmed later | ||
265 | */ | ||
266 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ACPI) && | ||
267 | cpu_has(&boot_cpu_data, X86_FEATURE_ACC)) | ||
268 | lvtthmr_init = apic_read(APIC_LVTTHMR); | ||
269 | } | ||
270 | |||
257 | void intel_init_thermal(struct cpuinfo_x86 *c) | 271 | void intel_init_thermal(struct cpuinfo_x86 *c) |
258 | { | 272 | { |
259 | unsigned int cpu = smp_processor_id(); | 273 | unsigned int cpu = smp_processor_id(); |
@@ -270,7 +284,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c) | |||
270 | * since it might be delivered via SMI already: | 284 | * since it might be delivered via SMI already: |
271 | */ | 285 | */ |
272 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); | 286 | rdmsr(MSR_IA32_MISC_ENABLE, l, h); |
273 | h = apic_read(APIC_LVTTHMR); | 287 | |
288 | /* | ||
289 | * The initial value of thermal LVT entries on all APs always reads | ||
290 | * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI | ||
291 | * sequence to them and LVT registers are reset to 0s except for | ||
292 | * the mask bits which are set to 1s when APs receive INIT IPI. | ||
293 | * Always restore the value that BIOS has programmed on AP based on | ||
294 | * BSP's info we saved since BIOS is always setting the same value | ||
295 | * for all threads/cores | ||
296 | */ | ||
297 | apic_write(APIC_LVTTHMR, lvtthmr_init); | ||
298 | |||
299 | h = lvtthmr_init; | ||
300 | |||
274 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { | 301 | if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { |
275 | printk(KERN_DEBUG | 302 | printk(KERN_DEBUG |
276 | "CPU%d: Thermal monitoring handled by SMI\n", cpu); | 303 | "CPU%d: Thermal monitoring handled by SMI\n", cpu); |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 04bbd5278568..19212cb01558 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -92,17 +92,17 @@ static int show_other_interrupts(struct seq_file *p, int prec) | |||
92 | seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); | 92 | seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count); |
93 | seq_printf(p, " TLB shootdowns\n"); | 93 | seq_printf(p, " TLB shootdowns\n"); |
94 | #endif | 94 | #endif |
95 | #ifdef CONFIG_X86_MCE | 95 | #ifdef CONFIG_X86_THERMAL_VECTOR |
96 | seq_printf(p, "%*s: ", prec, "TRM"); | 96 | seq_printf(p, "%*s: ", prec, "TRM"); |
97 | for_each_online_cpu(j) | 97 | for_each_online_cpu(j) |
98 | seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); | 98 | seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count); |
99 | seq_printf(p, " Thermal event interrupts\n"); | 99 | seq_printf(p, " Thermal event interrupts\n"); |
100 | # ifdef CONFIG_X86_MCE_THRESHOLD | 100 | #endif |
101 | #ifdef CONFIG_X86_MCE_THRESHOLD | ||
101 | seq_printf(p, "%*s: ", prec, "THR"); | 102 | seq_printf(p, "%*s: ", prec, "THR"); |
102 | for_each_online_cpu(j) | 103 | for_each_online_cpu(j) |
103 | seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); | 104 | seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count); |
104 | seq_printf(p, " Threshold APIC interrupts\n"); | 105 | seq_printf(p, " Threshold APIC interrupts\n"); |
105 | # endif | ||
106 | #endif | 106 | #endif |
107 | #ifdef CONFIG_X86_MCE | 107 | #ifdef CONFIG_X86_MCE |
108 | seq_printf(p, "%*s: ", prec, "MCE"); | 108 | seq_printf(p, "%*s: ", prec, "MCE"); |
@@ -194,11 +194,11 @@ u64 arch_irq_stat_cpu(unsigned int cpu) | |||
194 | sum += irq_stats(cpu)->irq_call_count; | 194 | sum += irq_stats(cpu)->irq_call_count; |
195 | sum += irq_stats(cpu)->irq_tlb_count; | 195 | sum += irq_stats(cpu)->irq_tlb_count; |
196 | #endif | 196 | #endif |
197 | #ifdef CONFIG_X86_MCE | 197 | #ifdef CONFIG_X86_THERMAL_VECTOR |
198 | sum += irq_stats(cpu)->irq_thermal_count; | 198 | sum += irq_stats(cpu)->irq_thermal_count; |
199 | # ifdef CONFIG_X86_MCE_THRESHOLD | 199 | #endif |
200 | #ifdef CONFIG_X86_MCE_THRESHOLD | ||
200 | sum += irq_stats(cpu)->irq_threshold_count; | 201 | sum += irq_stats(cpu)->irq_threshold_count; |
201 | # endif | ||
202 | #endif | 202 | #endif |
203 | #ifdef CONFIG_X86_MCE | 203 | #ifdef CONFIG_X86_MCE |
204 | sum += per_cpu(mce_exception_count, cpu); | 204 | sum += per_cpu(mce_exception_count, cpu); |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 2a34f9c5be21..c0ca8f921c91 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -109,6 +109,7 @@ | |||
109 | #ifdef CONFIG_X86_64 | 109 | #ifdef CONFIG_X86_64 |
110 | #include <asm/numa_64.h> | 110 | #include <asm/numa_64.h> |
111 | #endif | 111 | #endif |
112 | #include <asm/mce.h> | ||
112 | 113 | ||
113 | /* | 114 | /* |
114 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. | 115 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. |
@@ -1031,6 +1032,8 @@ void __init setup_arch(char **cmdline_p) | |||
1031 | #endif | 1032 | #endif |
1032 | #endif | 1033 | #endif |
1033 | x86_init.oem.banner(); | 1034 | x86_init.oem.banner(); |
1035 | |||
1036 | mcheck_init(); | ||
1034 | } | 1037 | } |
1035 | 1038 | ||
1036 | #ifdef CONFIG_X86_32 | 1039 | #ifdef CONFIG_X86_32 |
diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c index 713ed7d37247..689cc6a6214d 100644 --- a/drivers/edac/edac_mce_amd.c +++ b/drivers/edac/edac_mce_amd.c | |||
@@ -3,7 +3,6 @@ | |||
3 | 3 | ||
4 | static bool report_gart_errors; | 4 | static bool report_gart_errors; |
5 | static void (*nb_bus_decoder)(int node_id, struct err_regs *regs); | 5 | static void (*nb_bus_decoder)(int node_id, struct err_regs *regs); |
6 | static void (*orig_mce_callback)(struct mce *m); | ||
7 | 6 | ||
8 | void amd_report_gart_errors(bool v) | 7 | void amd_report_gart_errors(bool v) |
9 | { | 8 | { |
@@ -363,8 +362,10 @@ static inline void amd_decode_err_code(unsigned int ec) | |||
363 | pr_warning("Huh? Unknown MCE error 0x%x\n", ec); | 362 | pr_warning("Huh? Unknown MCE error 0x%x\n", ec); |
364 | } | 363 | } |
365 | 364 | ||
366 | static void amd_decode_mce(struct mce *m) | 365 | static int amd_decode_mce(struct notifier_block *nb, unsigned long val, |
366 | void *data) | ||
367 | { | 367 | { |
368 | struct mce *m = (struct mce *)data; | ||
368 | struct err_regs regs; | 369 | struct err_regs regs; |
369 | int node, ecc; | 370 | int node, ecc; |
370 | 371 | ||
@@ -420,20 +421,22 @@ static void amd_decode_mce(struct mce *m) | |||
420 | } | 421 | } |
421 | 422 | ||
422 | amd_decode_err_code(m->status & 0xffff); | 423 | amd_decode_err_code(m->status & 0xffff); |
424 | |||
425 | return NOTIFY_STOP; | ||
423 | } | 426 | } |
424 | 427 | ||
428 | static struct notifier_block amd_mce_dec_nb = { | ||
429 | .notifier_call = amd_decode_mce, | ||
430 | }; | ||
431 | |||
425 | static int __init mce_amd_init(void) | 432 | static int __init mce_amd_init(void) |
426 | { | 433 | { |
427 | /* | 434 | /* |
428 | * We can decode MCEs for Opteron and later CPUs: | 435 | * We can decode MCEs for Opteron and later CPUs: |
429 | */ | 436 | */ |
430 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && | 437 | if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && |
431 | (boot_cpu_data.x86 >= 0xf)) { | 438 | (boot_cpu_data.x86 >= 0xf)) |
432 | /* safe the default decode mce callback */ | 439 | atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb); |
433 | orig_mce_callback = x86_mce_decode_callback; | ||
434 | |||
435 | x86_mce_decode_callback = amd_decode_mce; | ||
436 | } | ||
437 | 440 | ||
438 | return 0; | 441 | return 0; |
439 | } | 442 | } |
@@ -442,7 +445,7 @@ early_initcall(mce_amd_init); | |||
442 | #ifdef MODULE | 445 | #ifdef MODULE |
443 | static void __exit mce_amd_exit(void) | 446 | static void __exit mce_amd_exit(void) |
444 | { | 447 | { |
445 | x86_mce_decode_callback = orig_mce_callback; | 448 | atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb); |
446 | } | 449 | } |
447 | 450 | ||
448 | MODULE_DESCRIPTION("AMD MCE decoder"); | 451 | MODULE_DESCRIPTION("AMD MCE decoder"); |
diff --git a/include/trace/events/mce.h b/include/trace/events/mce.h new file mode 100644 index 000000000000..7eee77895cb3 --- /dev/null +++ b/include/trace/events/mce.h | |||
@@ -0,0 +1,69 @@ | |||
1 | #undef TRACE_SYSTEM | ||
2 | #define TRACE_SYSTEM mce | ||
3 | |||
4 | #if !defined(_TRACE_MCE_H) || defined(TRACE_HEADER_MULTI_READ) | ||
5 | #define _TRACE_MCE_H | ||
6 | |||
7 | #include <linux/ktime.h> | ||
8 | #include <linux/tracepoint.h> | ||
9 | #include <asm/mce.h> | ||
10 | |||
11 | TRACE_EVENT(mce_record, | ||
12 | |||
13 | TP_PROTO(struct mce *m), | ||
14 | |||
15 | TP_ARGS(m), | ||
16 | |||
17 | TP_STRUCT__entry( | ||
18 | __field( u64, mcgcap ) | ||
19 | __field( u64, mcgstatus ) | ||
20 | __field( u8, bank ) | ||
21 | __field( u64, status ) | ||
22 | __field( u64, addr ) | ||
23 | __field( u64, misc ) | ||
24 | __field( u64, ip ) | ||
25 | __field( u8, cs ) | ||
26 | __field( u64, tsc ) | ||
27 | __field( u64, walltime ) | ||
28 | __field( u32, cpu ) | ||
29 | __field( u32, cpuid ) | ||
30 | __field( u32, apicid ) | ||
31 | __field( u32, socketid ) | ||
32 | __field( u8, cpuvendor ) | ||
33 | ), | ||
34 | |||
35 | TP_fast_assign( | ||
36 | __entry->mcgcap = m->mcgcap; | ||
37 | __entry->mcgstatus = m->mcgstatus; | ||
38 | __entry->bank = m->bank; | ||
39 | __entry->status = m->status; | ||
40 | __entry->addr = m->addr; | ||
41 | __entry->misc = m->misc; | ||
42 | __entry->ip = m->ip; | ||
43 | __entry->cs = m->cs; | ||
44 | __entry->tsc = m->tsc; | ||
45 | __entry->walltime = m->time; | ||
46 | __entry->cpu = m->extcpu; | ||
47 | __entry->cpuid = m->cpuid; | ||
48 | __entry->apicid = m->apicid; | ||
49 | __entry->socketid = m->socketid; | ||
50 | __entry->cpuvendor = m->cpuvendor; | ||
51 | ), | ||
52 | |||
53 | TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC: %016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x", | ||
54 | __entry->cpu, | ||
55 | __entry->mcgcap, __entry->mcgstatus, | ||
56 | __entry->bank, __entry->status, | ||
57 | __entry->addr, __entry->misc, | ||
58 | __entry->cs, __entry->ip, | ||
59 | __entry->tsc, | ||
60 | __entry->cpuvendor, __entry->cpuid, | ||
61 | __entry->walltime, | ||
62 | __entry->socketid, | ||
63 | __entry->apicid) | ||
64 | ); | ||
65 | |||
66 | #endif /* _TRACE_MCE_H */ | ||
67 | |||
68 | /* This part must be outside protection */ | ||
69 | #include <trace/define_trace.h> | ||