aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu/mcheck
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck')
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-apei.c42
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c7
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c1
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c71
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c122
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c65
7 files changed, 196 insertions, 114 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c
index 8209472b27a5..83930deec3c6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-apei.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c
@@ -106,24 +106,34 @@ int apei_write_mce(struct mce *m)
106ssize_t apei_read_mce(struct mce *m, u64 *record_id) 106ssize_t apei_read_mce(struct mce *m, u64 *record_id)
107{ 107{
108 struct cper_mce_record rcd; 108 struct cper_mce_record rcd;
109 ssize_t len; 109 int rc, pos;
110 110
111 len = erst_read_next(&rcd.hdr, sizeof(rcd)); 111 rc = erst_get_record_id_begin(&pos);
112 if (len <= 0) 112 if (rc)
113 return len; 113 return rc;
114 /* Can not skip other records in storage via ERST unless clear them */ 114retry:
115 else if (len != sizeof(rcd) || 115 rc = erst_get_record_id_next(&pos, record_id);
116 uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) { 116 if (rc)
117 if (printk_ratelimit()) 117 goto out;
118 pr_warning( 118 /* no more record */
119 "MCE-APEI: Can not skip the unknown record in ERST"); 119 if (*record_id == APEI_ERST_INVALID_RECORD_ID)
120 return -EIO; 120 goto out;
121 } 121 rc = erst_read(*record_id, &rcd.hdr, sizeof(rcd));
122 122 /* someone else has cleared the record, try next one */
123 if (rc == -ENOENT)
124 goto retry;
125 else if (rc < 0)
126 goto out;
127 /* try to skip other type records in storage */
128 else if (rc != sizeof(rcd) ||
129 uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE))
130 goto retry;
123 memcpy(m, &rcd.mce, sizeof(*m)); 131 memcpy(m, &rcd.mce, sizeof(*m));
124 *record_id = rcd.hdr.record_id; 132 rc = sizeof(*m);
133out:
134 erst_get_record_id_end();
125 135
126 return sizeof(*m); 136 return rc;
127} 137}
128 138
129/* Check whether there is record in ERST */ 139/* Check whether there is record in ERST */
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index e7dbde7bfedb..0ed633c5048b 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -25,13 +25,14 @@
25#include <linux/gfp.h> 25#include <linux/gfp.h>
26#include <asm/mce.h> 26#include <asm/mce.h>
27#include <asm/apic.h> 27#include <asm/apic.h>
28#include <asm/nmi.h>
28 29
29/* Update fake mce registers on current CPU. */ 30/* Update fake mce registers on current CPU. */
30static void inject_mce(struct mce *m) 31static void inject_mce(struct mce *m)
31{ 32{
32 struct mce *i = &per_cpu(injectm, m->extcpu); 33 struct mce *i = &per_cpu(injectm, m->extcpu);
33 34
34 /* Make sure noone reads partially written injectm */ 35 /* Make sure no one reads partially written injectm */
35 i->finished = 0; 36 i->finished = 0;
36 mb(); 37 mb();
37 m->finished = 0; 38 m->finished = 0;
@@ -83,7 +84,7 @@ static int mce_raise_notify(struct notifier_block *self,
83 struct die_args *args = (struct die_args *)data; 84 struct die_args *args = (struct die_args *)data;
84 int cpu = smp_processor_id(); 85 int cpu = smp_processor_id();
85 struct mce *m = &__get_cpu_var(injectm); 86 struct mce *m = &__get_cpu_var(injectm);
86 if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) 87 if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask))
87 return NOTIFY_DONE; 88 return NOTIFY_DONE;
88 cpumask_clear_cpu(cpu, mce_inject_cpumask); 89 cpumask_clear_cpu(cpu, mce_inject_cpumask);
89 if (m->inject_flags & MCJ_EXCEPTION) 90 if (m->inject_flags & MCJ_EXCEPTION)
@@ -95,7 +96,7 @@ static int mce_raise_notify(struct notifier_block *self,
95 96
96static struct notifier_block mce_raise_nb = { 97static struct notifier_block mce_raise_nb = {
97 .notifier_call = mce_raise_notify, 98 .notifier_call = mce_raise_notify,
98 .priority = 1000, 99 .priority = NMI_LOCAL_NORMAL_PRIOR,
99}; 100};
100 101
101/* Inject mce on current CPU */ 102/* Inject mce on current CPU */
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index 8a85dd1b1aa1..1e8d66c1336a 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -192,6 +192,7 @@ static const struct file_operations severities_coverage_fops = {
192 .release = seq_release, 192 .release = seq_release,
193 .read = seq_read, 193 .read = seq_read,
194 .write = severities_coverage_write, 194 .write = severities_coverage_write,
195 .llseek = seq_lseek,
195}; 196};
196 197
197static int __init severities_debugfs_init(void) 198static int __init severities_debugfs_init(void)
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index ed41562909fe..ff1ae9b6464d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -21,6 +21,7 @@
21#include <linux/percpu.h> 21#include <linux/percpu.h>
22#include <linux/string.h> 22#include <linux/string.h>
23#include <linux/sysdev.h> 23#include <linux/sysdev.h>
24#include <linux/syscore_ops.h>
24#include <linux/delay.h> 25#include <linux/delay.h>
25#include <linux/ctype.h> 26#include <linux/ctype.h>
26#include <linux/sched.h> 27#include <linux/sched.h>
@@ -104,20 +105,6 @@ static int cpu_missing;
104ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain); 105ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
105EXPORT_SYMBOL_GPL(x86_mce_decoder_chain); 106EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);
106 107
107static int default_decode_mce(struct notifier_block *nb, unsigned long val,
108 void *data)
109{
110 pr_emerg(HW_ERR "No human readable MCE decoding support on this CPU type.\n");
111 pr_emerg(HW_ERR "Run the message through 'mcelog --ascii' to decode.\n");
112
113 return NOTIFY_STOP;
114}
115
116static struct notifier_block mce_dec_nb = {
117 .notifier_call = default_decode_mce,
118 .priority = -1,
119};
120
121/* MCA banks polled by the period polling timer for corrected events */ 108/* MCA banks polled by the period polling timer for corrected events */
122DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { 109DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
123 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL 110 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
@@ -211,6 +198,8 @@ void mce_log(struct mce *mce)
211 198
212static void print_mce(struct mce *m) 199static void print_mce(struct mce *m)
213{ 200{
201 int ret = 0;
202
214 pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n", 203 pr_emerg(HW_ERR "CPU %d: Machine Check Exception: %Lx Bank %d: %016Lx\n",
215 m->extcpu, m->mcgstatus, m->bank, m->status); 204 m->extcpu, m->mcgstatus, m->bank, m->status);
216 205
@@ -238,7 +227,11 @@ static void print_mce(struct mce *m)
238 * Print out human-readable details about the MCE error, 227 * Print out human-readable details about the MCE error,
239 * (if the CPU has an implementation for that) 228 * (if the CPU has an implementation for that)
240 */ 229 */
241 atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m); 230 ret = atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
231 if (ret == NOTIFY_STOP)
232 return;
233
234 pr_emerg_ratelimited(HW_ERR "Run the above through 'mcelog --ascii'\n");
242} 235}
243 236
244#define PANIC_TIMEOUT 5 /* 5 seconds */ 237#define PANIC_TIMEOUT 5 /* 5 seconds */
@@ -326,7 +319,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
326 319
327static int msr_to_offset(u32 msr) 320static int msr_to_offset(u32 msr)
328{ 321{
329 unsigned bank = __get_cpu_var(injectm.bank); 322 unsigned bank = __this_cpu_read(injectm.bank);
330 323
331 if (msr == rip_msr) 324 if (msr == rip_msr)
332 return offsetof(struct mce, ip); 325 return offsetof(struct mce, ip);
@@ -346,7 +339,7 @@ static u64 mce_rdmsrl(u32 msr)
346{ 339{
347 u64 v; 340 u64 v;
348 341
349 if (__get_cpu_var(injectm).finished) { 342 if (__this_cpu_read(injectm.finished)) {
350 int offset = msr_to_offset(msr); 343 int offset = msr_to_offset(msr);
351 344
352 if (offset < 0) 345 if (offset < 0)
@@ -369,7 +362,7 @@ static u64 mce_rdmsrl(u32 msr)
369 362
370static void mce_wrmsrl(u32 msr, u64 v) 363static void mce_wrmsrl(u32 msr, u64 v)
371{ 364{
372 if (__get_cpu_var(injectm).finished) { 365 if (__this_cpu_read(injectm.finished)) {
373 int offset = msr_to_offset(msr); 366 int offset = msr_to_offset(msr);
374 367
375 if (offset >= 0) 368 if (offset >= 0)
@@ -589,7 +582,6 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
589 if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { 582 if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) {
590 mce_log(&m); 583 mce_log(&m);
591 atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m); 584 atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m);
592 add_taint(TAINT_MACHINE_CHECK);
593 } 585 }
594 586
595 /* 587 /*
@@ -881,7 +873,7 @@ reset:
881 * Check if the address reported by the CPU is in a format we can parse. 873 * Check if the address reported by the CPU is in a format we can parse.
882 * It would be possible to add code for most other cases, but all would 874 * It would be possible to add code for most other cases, but all would
883 * be somewhat complicated (e.g. segment offset would require an instruction 875 * be somewhat complicated (e.g. segment offset would require an instruction
884 * parser). So only support physical addresses upto page granuality for now. 876 * parser). So only support physical addresses up to page granuality for now.
885 */ 877 */
886static int mce_usable_address(struct mce *m) 878static int mce_usable_address(struct mce *m)
887{ 879{
@@ -1159,7 +1151,7 @@ static void mce_start_timer(unsigned long data)
1159 1151
1160 WARN_ON(smp_processor_id() != data); 1152 WARN_ON(smp_processor_id() != data);
1161 1153
1162 if (mce_available(&current_cpu_data)) { 1154 if (mce_available(__this_cpu_ptr(&cpu_info))) {
1163 machine_check_poll(MCP_TIMESTAMP, 1155 machine_check_poll(MCP_TIMESTAMP,
1164 &__get_cpu_var(mce_poll_banks)); 1156 &__get_cpu_var(mce_poll_banks));
1165 } 1157 }
@@ -1625,7 +1617,7 @@ out:
1625static unsigned int mce_poll(struct file *file, poll_table *wait) 1617static unsigned int mce_poll(struct file *file, poll_table *wait)
1626{ 1618{
1627 poll_wait(file, &mce_wait, wait); 1619 poll_wait(file, &mce_wait, wait);
1628 if (rcu_dereference_check_mce(mcelog.next)) 1620 if (rcu_access_index(mcelog.next))
1629 return POLLIN | POLLRDNORM; 1621 return POLLIN | POLLRDNORM;
1630 if (!mce_apei_read_done && apei_check_mce()) 1622 if (!mce_apei_read_done && apei_check_mce())
1631 return POLLIN | POLLRDNORM; 1623 return POLLIN | POLLRDNORM;
@@ -1665,6 +1657,7 @@ struct file_operations mce_chrdev_ops = {
1665 .read = mce_read, 1657 .read = mce_read,
1666 .poll = mce_poll, 1658 .poll = mce_poll,
1667 .unlocked_ioctl = mce_ioctl, 1659 .unlocked_ioctl = mce_ioctl,
1660 .llseek = no_llseek,
1668}; 1661};
1669EXPORT_SYMBOL_GPL(mce_chrdev_ops); 1662EXPORT_SYMBOL_GPL(mce_chrdev_ops);
1670 1663
@@ -1720,8 +1713,6 @@ __setup("mce", mcheck_enable);
1720 1713
1721int __init mcheck_init(void) 1714int __init mcheck_init(void)
1722{ 1715{
1723 atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb);
1724
1725 mcheck_intel_therm_init(); 1716 mcheck_intel_therm_init();
1726 1717
1727 return 0; 1718 return 0;
@@ -1748,14 +1739,14 @@ static int mce_disable_error_reporting(void)
1748 return 0; 1739 return 0;
1749} 1740}
1750 1741
1751static int mce_suspend(struct sys_device *dev, pm_message_t state) 1742static int mce_suspend(void)
1752{ 1743{
1753 return mce_disable_error_reporting(); 1744 return mce_disable_error_reporting();
1754} 1745}
1755 1746
1756static int mce_shutdown(struct sys_device *dev) 1747static void mce_shutdown(void)
1757{ 1748{
1758 return mce_disable_error_reporting(); 1749 mce_disable_error_reporting();
1759} 1750}
1760 1751
1761/* 1752/*
@@ -1763,18 +1754,22 @@ static int mce_shutdown(struct sys_device *dev)
1763 * Only one CPU is active at this time, the others get re-added later using 1754 * Only one CPU is active at this time, the others get re-added later using
1764 * CPU hotplug: 1755 * CPU hotplug:
1765 */ 1756 */
1766static int mce_resume(struct sys_device *dev) 1757static void mce_resume(void)
1767{ 1758{
1768 __mcheck_cpu_init_generic(); 1759 __mcheck_cpu_init_generic();
1769 __mcheck_cpu_init_vendor(&current_cpu_data); 1760 __mcheck_cpu_init_vendor(__this_cpu_ptr(&cpu_info));
1770
1771 return 0;
1772} 1761}
1773 1762
1763static struct syscore_ops mce_syscore_ops = {
1764 .suspend = mce_suspend,
1765 .shutdown = mce_shutdown,
1766 .resume = mce_resume,
1767};
1768
1774static void mce_cpu_restart(void *data) 1769static void mce_cpu_restart(void *data)
1775{ 1770{
1776 del_timer_sync(&__get_cpu_var(mce_timer)); 1771 del_timer_sync(&__get_cpu_var(mce_timer));
1777 if (!mce_available(&current_cpu_data)) 1772 if (!mce_available(__this_cpu_ptr(&cpu_info)))
1778 return; 1773 return;
1779 __mcheck_cpu_init_generic(); 1774 __mcheck_cpu_init_generic();
1780 __mcheck_cpu_init_timer(); 1775 __mcheck_cpu_init_timer();
@@ -1789,7 +1784,7 @@ static void mce_restart(void)
1789/* Toggle features for corrected errors */ 1784/* Toggle features for corrected errors */
1790static void mce_disable_ce(void *all) 1785static void mce_disable_ce(void *all)
1791{ 1786{
1792 if (!mce_available(&current_cpu_data)) 1787 if (!mce_available(__this_cpu_ptr(&cpu_info)))
1793 return; 1788 return;
1794 if (all) 1789 if (all)
1795 del_timer_sync(&__get_cpu_var(mce_timer)); 1790 del_timer_sync(&__get_cpu_var(mce_timer));
@@ -1798,7 +1793,7 @@ static void mce_disable_ce(void *all)
1798 1793
1799static void mce_enable_ce(void *all) 1794static void mce_enable_ce(void *all)
1800{ 1795{
1801 if (!mce_available(&current_cpu_data)) 1796 if (!mce_available(__this_cpu_ptr(&cpu_info)))
1802 return; 1797 return;
1803 cmci_reenable(); 1798 cmci_reenable();
1804 cmci_recheck(); 1799 cmci_recheck();
@@ -1807,9 +1802,6 @@ static void mce_enable_ce(void *all)
1807} 1802}
1808 1803
1809static struct sysdev_class mce_sysclass = { 1804static struct sysdev_class mce_sysclass = {
1810 .suspend = mce_suspend,
1811 .shutdown = mce_shutdown,
1812 .resume = mce_resume,
1813 .name = "machinecheck", 1805 .name = "machinecheck",
1814}; 1806};
1815 1807
@@ -2021,7 +2013,7 @@ static void __cpuinit mce_disable_cpu(void *h)
2021 unsigned long action = *(unsigned long *)h; 2013 unsigned long action = *(unsigned long *)h;
2022 int i; 2014 int i;
2023 2015
2024 if (!mce_available(&current_cpu_data)) 2016 if (!mce_available(__this_cpu_ptr(&cpu_info)))
2025 return; 2017 return;
2026 2018
2027 if (!(action & CPU_TASKS_FROZEN)) 2019 if (!(action & CPU_TASKS_FROZEN))
@@ -2039,7 +2031,7 @@ static void __cpuinit mce_reenable_cpu(void *h)
2039 unsigned long action = *(unsigned long *)h; 2031 unsigned long action = *(unsigned long *)h;
2040 int i; 2032 int i;
2041 2033
2042 if (!mce_available(&current_cpu_data)) 2034 if (!mce_available(__this_cpu_ptr(&cpu_info)))
2043 return; 2035 return;
2044 2036
2045 if (!(action & CPU_TASKS_FROZEN)) 2037 if (!(action & CPU_TASKS_FROZEN))
@@ -2138,6 +2130,7 @@ static __init int mcheck_init_device(void)
2138 return err; 2130 return err;
2139 } 2131 }
2140 2132
2133 register_syscore_ops(&mce_syscore_ops);
2141 register_hotcpu_notifier(&mce_cpu_notifier); 2134 register_hotcpu_notifier(&mce_cpu_notifier);
2142 misc_register(&mce_log_device); 2135 misc_register(&mce_log_device);
2143 2136
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index 39aaee5c1ab2..bb0adad35143 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -31,8 +31,6 @@
31#include <asm/mce.h> 31#include <asm/mce.h>
32#include <asm/msr.h> 32#include <asm/msr.h>
33 33
34#define PFX "mce_threshold: "
35#define VERSION "version 1.1.1"
36#define NR_BANKS 6 34#define NR_BANKS 6
37#define NR_BLOCKS 9 35#define NR_BLOCKS 9
38#define THRESHOLD_MAX 0xFFF 36#define THRESHOLD_MAX 0xFFF
@@ -59,12 +57,6 @@ struct threshold_block {
59 struct list_head miscj; 57 struct list_head miscj;
60}; 58};
61 59
62/* defaults used early on boot */
63static struct threshold_block threshold_defaults = {
64 .interrupt_enable = 0,
65 .threshold_limit = THRESHOLD_MAX,
66};
67
68struct threshold_bank { 60struct threshold_bank {
69 struct kobject *kobj; 61 struct kobject *kobj;
70 struct threshold_block *blocks; 62 struct threshold_block *blocks;
@@ -89,49 +81,101 @@ static void amd_threshold_interrupt(void);
89struct thresh_restart { 81struct thresh_restart {
90 struct threshold_block *b; 82 struct threshold_block *b;
91 int reset; 83 int reset;
84 int set_lvt_off;
85 int lvt_off;
92 u16 old_limit; 86 u16 old_limit;
93}; 87};
94 88
89static int lvt_off_valid(struct threshold_block *b, int apic, u32 lo, u32 hi)
90{
91 int msr = (hi & MASK_LVTOFF_HI) >> 20;
92
93 if (apic < 0) {
94 pr_err(FW_BUG "cpu %d, failed to setup threshold interrupt "
95 "for bank %d, block %d (MSR%08X=0x%x%08x)\n", b->cpu,
96 b->bank, b->block, b->address, hi, lo);
97 return 0;
98 }
99
100 if (apic != msr) {
101 pr_err(FW_BUG "cpu %d, invalid threshold interrupt offset %d "
102 "for bank %d, block %d (MSR%08X=0x%x%08x)\n",
103 b->cpu, apic, b->bank, b->block, b->address, hi, lo);
104 return 0;
105 }
106
107 return 1;
108};
109
95/* must be called with correct cpu affinity */ 110/* must be called with correct cpu affinity */
96/* Called via smp_call_function_single() */ 111/* Called via smp_call_function_single() */
97static void threshold_restart_bank(void *_tr) 112static void threshold_restart_bank(void *_tr)
98{ 113{
99 struct thresh_restart *tr = _tr; 114 struct thresh_restart *tr = _tr;
100 u32 mci_misc_hi, mci_misc_lo; 115 u32 hi, lo;
101 116
102 rdmsr(tr->b->address, mci_misc_lo, mci_misc_hi); 117 rdmsr(tr->b->address, lo, hi);
103 118
104 if (tr->b->threshold_limit < (mci_misc_hi & THRESHOLD_MAX)) 119 if (tr->b->threshold_limit < (hi & THRESHOLD_MAX))
105 tr->reset = 1; /* limit cannot be lower than err count */ 120 tr->reset = 1; /* limit cannot be lower than err count */
106 121
107 if (tr->reset) { /* reset err count and overflow bit */ 122 if (tr->reset) { /* reset err count and overflow bit */
108 mci_misc_hi = 123 hi =
109 (mci_misc_hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) | 124 (hi & ~(MASK_ERR_COUNT_HI | MASK_OVERFLOW_HI)) |
110 (THRESHOLD_MAX - tr->b->threshold_limit); 125 (THRESHOLD_MAX - tr->b->threshold_limit);
111 } else if (tr->old_limit) { /* change limit w/o reset */ 126 } else if (tr->old_limit) { /* change limit w/o reset */
112 int new_count = (mci_misc_hi & THRESHOLD_MAX) + 127 int new_count = (hi & THRESHOLD_MAX) +
113 (tr->old_limit - tr->b->threshold_limit); 128 (tr->old_limit - tr->b->threshold_limit);
114 129
115 mci_misc_hi = (mci_misc_hi & ~MASK_ERR_COUNT_HI) | 130 hi = (hi & ~MASK_ERR_COUNT_HI) |
116 (new_count & THRESHOLD_MAX); 131 (new_count & THRESHOLD_MAX);
117 } 132 }
118 133
134 if (tr->set_lvt_off) {
135 if (lvt_off_valid(tr->b, tr->lvt_off, lo, hi)) {
136 /* set new lvt offset */
137 hi &= ~MASK_LVTOFF_HI;
138 hi |= tr->lvt_off << 20;
139 }
140 }
141
119 tr->b->interrupt_enable ? 142 tr->b->interrupt_enable ?
120 (mci_misc_hi = (mci_misc_hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) : 143 (hi = (hi & ~MASK_INT_TYPE_HI) | INT_TYPE_APIC) :
121 (mci_misc_hi &= ~MASK_INT_TYPE_HI); 144 (hi &= ~MASK_INT_TYPE_HI);
122 145
123 mci_misc_hi |= MASK_COUNT_EN_HI; 146 hi |= MASK_COUNT_EN_HI;
124 wrmsr(tr->b->address, mci_misc_lo, mci_misc_hi); 147 wrmsr(tr->b->address, lo, hi);
148}
149
150static void mce_threshold_block_init(struct threshold_block *b, int offset)
151{
152 struct thresh_restart tr = {
153 .b = b,
154 .set_lvt_off = 1,
155 .lvt_off = offset,
156 };
157
158 b->threshold_limit = THRESHOLD_MAX;
159 threshold_restart_bank(&tr);
160};
161
162static int setup_APIC_mce(int reserved, int new)
163{
164 if (reserved < 0 && !setup_APIC_eilvt(new, THRESHOLD_APIC_VECTOR,
165 APIC_EILVT_MSG_FIX, 0))
166 return new;
167
168 return reserved;
125} 169}
126 170
127/* cpu init entry point, called from mce.c with preempt off */ 171/* cpu init entry point, called from mce.c with preempt off */
128void mce_amd_feature_init(struct cpuinfo_x86 *c) 172void mce_amd_feature_init(struct cpuinfo_x86 *c)
129{ 173{
174 struct threshold_block b;
130 unsigned int cpu = smp_processor_id(); 175 unsigned int cpu = smp_processor_id();
131 u32 low = 0, high = 0, address = 0; 176 u32 low = 0, high = 0, address = 0;
132 unsigned int bank, block; 177 unsigned int bank, block;
133 struct thresh_restart tr; 178 int offset = -1;
134 u8 lvt_off;
135 179
136 for (bank = 0; bank < NR_BANKS; ++bank) { 180 for (bank = 0; bank < NR_BANKS; ++bank) {
137 for (block = 0; block < NR_BLOCKS; ++block) { 181 for (block = 0; block < NR_BLOCKS; ++block) {
@@ -162,19 +206,16 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
162 if (shared_bank[bank] && c->cpu_core_id) 206 if (shared_bank[bank] && c->cpu_core_id)
163 break; 207 break;
164#endif 208#endif
165 lvt_off = setup_APIC_eilvt_mce(THRESHOLD_APIC_VECTOR, 209 offset = setup_APIC_mce(offset,
166 APIC_EILVT_MSG_FIX, 0); 210 (high & MASK_LVTOFF_HI) >> 20);
167 211
168 high &= ~MASK_LVTOFF_HI; 212 memset(&b, 0, sizeof(b));
169 high |= lvt_off << 20; 213 b.cpu = cpu;
170 wrmsr(address, low, high); 214 b.bank = bank;
171 215 b.block = block;
172 threshold_defaults.address = address; 216 b.address = address;
173 tr.b = &threshold_defaults;
174 tr.reset = 0;
175 tr.old_limit = 0;
176 threshold_restart_bank(&tr);
177 217
218 mce_threshold_block_init(&b, offset);
178 mce_threshold_vector = amd_threshold_interrupt; 219 mce_threshold_vector = amd_threshold_interrupt;
179 } 220 }
180 } 221 }
@@ -277,9 +318,8 @@ store_interrupt_enable(struct threshold_block *b, const char *buf, size_t size)
277 318
278 b->interrupt_enable = !!new; 319 b->interrupt_enable = !!new;
279 320
321 memset(&tr, 0, sizeof(tr));
280 tr.b = b; 322 tr.b = b;
281 tr.reset = 0;
282 tr.old_limit = 0;
283 323
284 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); 324 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
285 325
@@ -300,10 +340,10 @@ store_threshold_limit(struct threshold_block *b, const char *buf, size_t size)
300 if (new < 1) 340 if (new < 1)
301 new = 1; 341 new = 1;
302 342
343 memset(&tr, 0, sizeof(tr));
303 tr.old_limit = b->threshold_limit; 344 tr.old_limit = b->threshold_limit;
304 b->threshold_limit = new; 345 b->threshold_limit = new;
305 tr.b = b; 346 tr.b = b;
306 tr.reset = 0;
307 347
308 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1); 348 smp_call_function_single(b->cpu, threshold_restart_bank, &tr, 1);
309 349
@@ -469,6 +509,7 @@ recurse:
469out_free: 509out_free:
470 if (b) { 510 if (b) {
471 kobject_put(&b->kobj); 511 kobject_put(&b->kobj);
512 list_del(&b->miscj);
472 kfree(b); 513 kfree(b);
473 } 514 }
474 return err; 515 return err;
@@ -487,15 +528,12 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
487 int i, err = 0; 528 int i, err = 0;
488 struct threshold_bank *b = NULL; 529 struct threshold_bank *b = NULL;
489 char name[32]; 530 char name[32];
490#ifdef CONFIG_SMP
491 struct cpuinfo_x86 *c = &cpu_data(cpu);
492#endif
493 531
494 sprintf(name, "threshold_bank%i", bank); 532 sprintf(name, "threshold_bank%i", bank);
495 533
496#ifdef CONFIG_SMP 534#ifdef CONFIG_SMP
497 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ 535 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
498 i = cpumask_first(c->llc_shared_map); 536 i = cpumask_first(cpu_llc_shared_mask(cpu));
499 537
500 /* first core not up yet */ 538 /* first core not up yet */
501 if (cpu_data(i).cpu_core_id) 539 if (cpu_data(i).cpu_core_id)
@@ -515,7 +553,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
515 if (err) 553 if (err)
516 goto out; 554 goto out;
517 555
518 cpumask_copy(b->cpus, c->llc_shared_map); 556 cpumask_copy(b->cpus, cpu_llc_shared_mask(cpu));
519 per_cpu(threshold_banks, cpu)[bank] = b; 557 per_cpu(threshold_banks, cpu)[bank] = b;
520 558
521 goto out; 559 goto out;
@@ -582,9 +620,9 @@ static __cpuinit int threshold_create_device(unsigned int cpu)
582 continue; 620 continue;
583 err = threshold_create_bank(cpu, bank); 621 err = threshold_create_bank(cpu, bank);
584 if (err) 622 if (err)
585 goto out; 623 return err;
586 } 624 }
587out: 625
588 return err; 626 return err;
589} 627}
590 628
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index 6fcd0936194f..8694ef56459d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -130,7 +130,7 @@ void cmci_recheck(void)
130 unsigned long flags; 130 unsigned long flags;
131 int banks; 131 int banks;
132 132
133 if (!mce_available(&current_cpu_data) || !cmci_supported(&banks)) 133 if (!mce_available(__this_cpu_ptr(&cpu_info)) || !cmci_supported(&banks))
134 return; 134 return;
135 local_irq_save(flags); 135 local_irq_save(flags);
136 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned)); 136 machine_check_poll(MCP_TIMESTAMP, &__get_cpu_var(mce_banks_owned));
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 169d8804a9f8..27c625178bf1 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -53,8 +53,14 @@ struct thermal_state {
53 struct _thermal_state core_power_limit; 53 struct _thermal_state core_power_limit;
54 struct _thermal_state package_throttle; 54 struct _thermal_state package_throttle;
55 struct _thermal_state package_power_limit; 55 struct _thermal_state package_power_limit;
56 struct _thermal_state core_thresh0;
57 struct _thermal_state core_thresh1;
56}; 58};
57 59
60/* Callback to handle core threshold interrupts */
61int (*platform_thermal_notify)(__u64 msr_val);
62EXPORT_SYMBOL(platform_thermal_notify);
63
58static DEFINE_PER_CPU(struct thermal_state, thermal_state); 64static DEFINE_PER_CPU(struct thermal_state, thermal_state);
59 65
60static atomic_t therm_throt_en = ATOMIC_INIT(0); 66static atomic_t therm_throt_en = ATOMIC_INIT(0);
@@ -181,8 +187,6 @@ static int therm_throt_process(bool new_event, int event, int level)
181 this_cpu, 187 this_cpu,
182 level == CORE_LEVEL ? "Core" : "Package", 188 level == CORE_LEVEL ? "Core" : "Package",
183 state->count); 189 state->count);
184
185 add_taint(TAINT_MACHINE_CHECK);
186 return 1; 190 return 1;
187 } 191 }
188 if (old_event) { 192 if (old_event) {
@@ -200,6 +204,22 @@ static int therm_throt_process(bool new_event, int event, int level)
200 return 0; 204 return 0;
201} 205}
202 206
207static int thresh_event_valid(int event)
208{
209 struct _thermal_state *state;
210 unsigned int this_cpu = smp_processor_id();
211 struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu);
212 u64 now = get_jiffies_64();
213
214 state = (event == 0) ? &pstate->core_thresh0 : &pstate->core_thresh1;
215
216 if (time_before64(now, state->next_check))
217 return 0;
218
219 state->next_check = now + CHECK_INTERVAL;
220 return 1;
221}
222
203#ifdef CONFIG_SYSFS 223#ifdef CONFIG_SYSFS
204/* Add/Remove thermal_throttle interface for CPU device: */ 224/* Add/Remove thermal_throttle interface for CPU device: */
205static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev, 225static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev,
@@ -313,32 +333,50 @@ device_initcall(thermal_throttle_init_device);
313#define PACKAGE_THROTTLED ((__u64)2 << 62) 333#define PACKAGE_THROTTLED ((__u64)2 << 62)
314#define PACKAGE_POWER_LIMIT ((__u64)3 << 62) 334#define PACKAGE_POWER_LIMIT ((__u64)3 << 62)
315 335
336static void notify_thresholds(__u64 msr_val)
337{
338 /* check whether the interrupt handler is defined;
339 * otherwise simply return
340 */
341 if (!platform_thermal_notify)
342 return;
343
344 /* lower threshold reached */
345 if ((msr_val & THERM_LOG_THRESHOLD0) && thresh_event_valid(0))
346 platform_thermal_notify(msr_val);
347 /* higher threshold reached */
348 if ((msr_val & THERM_LOG_THRESHOLD1) && thresh_event_valid(1))
349 platform_thermal_notify(msr_val);
350}
351
316/* Thermal transition interrupt handler */ 352/* Thermal transition interrupt handler */
317static void intel_thermal_interrupt(void) 353static void intel_thermal_interrupt(void)
318{ 354{
319 __u64 msr_val; 355 __u64 msr_val;
320 struct cpuinfo_x86 *c = &cpu_data(smp_processor_id());
321 356
322 rdmsrl(MSR_IA32_THERM_STATUS, msr_val); 357 rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
323 358
359 /* Check for violation of core thermal thresholds*/
360 notify_thresholds(msr_val);
361
324 if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, 362 if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT,
325 THERMAL_THROTTLING_EVENT, 363 THERMAL_THROTTLING_EVENT,
326 CORE_LEVEL) != 0) 364 CORE_LEVEL) != 0)
327 mce_log_therm_throt_event(CORE_THROTTLED | msr_val); 365 mce_log_therm_throt_event(CORE_THROTTLED | msr_val);
328 366
329 if (cpu_has(c, X86_FEATURE_PLN)) 367 if (this_cpu_has(X86_FEATURE_PLN))
330 if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, 368 if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT,
331 POWER_LIMIT_EVENT, 369 POWER_LIMIT_EVENT,
332 CORE_LEVEL) != 0) 370 CORE_LEVEL) != 0)
333 mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val); 371 mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val);
334 372
335 if (cpu_has(c, X86_FEATURE_PTS)) { 373 if (this_cpu_has(X86_FEATURE_PTS)) {
336 rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); 374 rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val);
337 if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, 375 if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT,
338 THERMAL_THROTTLING_EVENT, 376 THERMAL_THROTTLING_EVENT,
339 PACKAGE_LEVEL) != 0) 377 PACKAGE_LEVEL) != 0)
340 mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val); 378 mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val);
341 if (cpu_has(c, X86_FEATURE_PLN)) 379 if (this_cpu_has(X86_FEATURE_PLN))
342 if (therm_throt_process(msr_val & 380 if (therm_throt_process(msr_val &
343 PACKAGE_THERM_STATUS_POWER_LIMIT, 381 PACKAGE_THERM_STATUS_POWER_LIMIT,
344 POWER_LIMIT_EVENT, 382 POWER_LIMIT_EVENT,
@@ -350,9 +388,8 @@ static void intel_thermal_interrupt(void)
350 388
351static void unexpected_thermal_interrupt(void) 389static void unexpected_thermal_interrupt(void)
352{ 390{
353 printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", 391 printk(KERN_ERR "CPU%d: Unexpected LVT thermal interrupt!\n",
354 smp_processor_id()); 392 smp_processor_id());
355 add_taint(TAINT_MACHINE_CHECK);
356} 393}
357 394
358static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt; 395static void (*smp_thermal_vector)(void) = unexpected_thermal_interrupt;
@@ -405,18 +442,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
405 */ 442 */
406 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 443 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
407 444
445 h = lvtthmr_init;
408 /* 446 /*
409 * The initial value of thermal LVT entries on all APs always reads 447 * The initial value of thermal LVT entries on all APs always reads
410 * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI 448 * 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
411 * sequence to them and LVT registers are reset to 0s except for 449 * sequence to them and LVT registers are reset to 0s except for
412 * the mask bits which are set to 1s when APs receive INIT IPI. 450 * the mask bits which are set to 1s when APs receive INIT IPI.
413 * Always restore the value that BIOS has programmed on AP based on 451 * If BIOS takes over the thermal interrupt and sets its interrupt
414 * BSP's info we saved since BIOS is always setting the same value 452 * delivery mode to SMI (not fixed), it restores the value that the
415 * for all threads/cores 453 * BIOS has programmed on AP based on BSP's info we saved since BIOS
454 * is always setting the same value for all threads/cores.
416 */ 455 */
417 apic_write(APIC_LVTTHMR, lvtthmr_init); 456 if ((h & APIC_DM_FIXED_MASK) != APIC_DM_FIXED)
457 apic_write(APIC_LVTTHMR, lvtthmr_init);
418 458
419 h = lvtthmr_init;
420 459
421 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) { 460 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
422 printk(KERN_DEBUG 461 printk(KERN_DEBUG