diff options
author | Russell King <rmk+kernel@arm.linux.org.uk> | 2009-09-22 15:54:53 -0400 |
---|---|---|
committer | Russell King <rmk+kernel@arm.linux.org.uk> | 2009-09-22 16:01:40 -0400 |
commit | ae19ffbadc1b2100285a5b5b3d0a4e0a11390904 (patch) | |
tree | 3c2086ab67398a019089a47ca3f362a4bc6db74f /arch/x86/kernel | |
parent | 34e84f39a27d059a3e6ec6e8b94aafa702e6f220 (diff) | |
parent | 9173a8ef24a6b1b8031507b35b8ffe5f85a87692 (diff) |
Merge branch 'master' into for-linus
Diffstat (limited to 'arch/x86/kernel')
34 files changed, 1004 insertions, 1135 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 430d5b24af7b..832cb838cb48 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -52,6 +52,7 @@ obj-$(CONFIG_X86_DS_SELFTEST) += ds_selftest.o | |||
52 | obj-$(CONFIG_X86_32) += tls.o | 52 | obj-$(CONFIG_X86_32) += tls.o |
53 | obj-$(CONFIG_IA32_EMULATION) += tls.o | 53 | obj-$(CONFIG_IA32_EMULATION) += tls.o |
54 | obj-y += step.o | 54 | obj-y += step.o |
55 | obj-$(CONFIG_INTEL_TXT) += tboot.o | ||
55 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 56 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
56 | obj-y += cpu/ | 57 | obj-y += cpu/ |
57 | obj-y += acpi/ | 58 | obj-y += acpi/ |
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index db7220220d09..cb66a22d98ad 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c | |||
@@ -66,7 +66,7 @@ static inline unsigned int get_nmi_count(int cpu) | |||
66 | 66 | ||
67 | static inline int mce_in_progress(void) | 67 | static inline int mce_in_progress(void) |
68 | { | 68 | { |
69 | #if defined(CONFIG_X86_NEW_MCE) | 69 | #if defined(CONFIG_X86_MCE) |
70 | return atomic_read(&mce_entry) > 0; | 70 | return atomic_read(&mce_entry) > 0; |
71 | #endif | 71 | #endif |
72 | return 0; | 72 | return 0; |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index c1f253dac155..8dd30638fe44 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -13,7 +13,7 @@ CFLAGS_common.o := $(nostackp) | |||
13 | 13 | ||
14 | obj-y := intel_cacheinfo.o addon_cpuid_features.o | 14 | obj-y := intel_cacheinfo.o addon_cpuid_features.o |
15 | obj-y += proc.o capflags.o powerflags.o common.o | 15 | obj-y += proc.o capflags.o powerflags.o common.o |
16 | obj-y += vmware.o hypervisor.o | 16 | obj-y += vmware.o hypervisor.o sched.o |
17 | 17 | ||
18 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o | 18 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o |
19 | obj-$(CONFIG_X86_64) += bugs_64.o | 19 | obj-$(CONFIG_X86_64) += bugs_64.o |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 22a47c82f3c0..f32fa71ccf97 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -333,6 +333,16 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) | |||
333 | #endif | 333 | #endif |
334 | } | 334 | } |
335 | 335 | ||
336 | int amd_get_nb_id(int cpu) | ||
337 | { | ||
338 | int id = 0; | ||
339 | #ifdef CONFIG_SMP | ||
340 | id = per_cpu(cpu_llc_id, cpu); | ||
341 | #endif | ||
342 | return id; | ||
343 | } | ||
344 | EXPORT_SYMBOL_GPL(amd_get_nb_id); | ||
345 | |||
336 | static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) | 346 | static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) |
337 | { | 347 | { |
338 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | 348 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) |
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c index 6b2a52dd0403..dca325c03999 100644 --- a/arch/x86/kernel/cpu/cpu_debug.c +++ b/arch/x86/kernel/cpu/cpu_debug.c | |||
@@ -30,8 +30,8 @@ | |||
30 | #include <asm/apic.h> | 30 | #include <asm/apic.h> |
31 | #include <asm/desc.h> | 31 | #include <asm/desc.h> |
32 | 32 | ||
33 | static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]); | 33 | static DEFINE_PER_CPU(struct cpu_cpuX_base [CPU_REG_ALL_BIT], cpu_arr); |
34 | static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]); | 34 | static DEFINE_PER_CPU(struct cpu_private * [MAX_CPU_FILES], priv_arr); |
35 | static DEFINE_PER_CPU(int, cpu_priv_count); | 35 | static DEFINE_PER_CPU(int, cpu_priv_count); |
36 | 36 | ||
37 | static DEFINE_MUTEX(cpu_debug_lock); | 37 | static DEFINE_MUTEX(cpu_debug_lock); |
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index ae9b503220ca..4109679863c1 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
@@ -60,7 +60,6 @@ enum { | |||
60 | }; | 60 | }; |
61 | 61 | ||
62 | #define INTEL_MSR_RANGE (0xffff) | 62 | #define INTEL_MSR_RANGE (0xffff) |
63 | #define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1) | ||
64 | 63 | ||
65 | struct acpi_cpufreq_data { | 64 | struct acpi_cpufreq_data { |
66 | struct acpi_processor_performance *acpi_data; | 65 | struct acpi_processor_performance *acpi_data; |
@@ -71,11 +70,7 @@ struct acpi_cpufreq_data { | |||
71 | 70 | ||
72 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); | 71 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); |
73 | 72 | ||
74 | struct acpi_msr_data { | 73 | static DEFINE_PER_CPU(struct aperfmperf, old_perf); |
75 | u64 saved_aperf, saved_mperf; | ||
76 | }; | ||
77 | |||
78 | static DEFINE_PER_CPU(struct acpi_msr_data, msr_data); | ||
79 | 74 | ||
80 | DEFINE_TRACE(power_mark); | 75 | DEFINE_TRACE(power_mark); |
81 | 76 | ||
@@ -244,23 +239,12 @@ static u32 get_cur_val(const struct cpumask *mask) | |||
244 | return cmd.val; | 239 | return cmd.val; |
245 | } | 240 | } |
246 | 241 | ||
247 | struct perf_pair { | ||
248 | union { | ||
249 | struct { | ||
250 | u32 lo; | ||
251 | u32 hi; | ||
252 | } split; | ||
253 | u64 whole; | ||
254 | } aperf, mperf; | ||
255 | }; | ||
256 | |||
257 | /* Called via smp_call_function_single(), on the target CPU */ | 242 | /* Called via smp_call_function_single(), on the target CPU */ |
258 | static void read_measured_perf_ctrs(void *_cur) | 243 | static void read_measured_perf_ctrs(void *_cur) |
259 | { | 244 | { |
260 | struct perf_pair *cur = _cur; | 245 | struct aperfmperf *am = _cur; |
261 | 246 | ||
262 | rdmsr(MSR_IA32_APERF, cur->aperf.split.lo, cur->aperf.split.hi); | 247 | get_aperfmperf(am); |
263 | rdmsr(MSR_IA32_MPERF, cur->mperf.split.lo, cur->mperf.split.hi); | ||
264 | } | 248 | } |
265 | 249 | ||
266 | /* | 250 | /* |
@@ -279,63 +263,17 @@ static void read_measured_perf_ctrs(void *_cur) | |||
279 | static unsigned int get_measured_perf(struct cpufreq_policy *policy, | 263 | static unsigned int get_measured_perf(struct cpufreq_policy *policy, |
280 | unsigned int cpu) | 264 | unsigned int cpu) |
281 | { | 265 | { |
282 | struct perf_pair readin, cur; | 266 | struct aperfmperf perf; |
283 | unsigned int perf_percent; | 267 | unsigned long ratio; |
284 | unsigned int retval; | 268 | unsigned int retval; |
285 | 269 | ||
286 | if (smp_call_function_single(cpu, read_measured_perf_ctrs, &readin, 1)) | 270 | if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) |
287 | return 0; | 271 | return 0; |
288 | 272 | ||
289 | cur.aperf.whole = readin.aperf.whole - | 273 | ratio = calc_aperfmperf_ratio(&per_cpu(old_perf, cpu), &perf); |
290 | per_cpu(msr_data, cpu).saved_aperf; | 274 | per_cpu(old_perf, cpu) = perf; |
291 | cur.mperf.whole = readin.mperf.whole - | ||
292 | per_cpu(msr_data, cpu).saved_mperf; | ||
293 | per_cpu(msr_data, cpu).saved_aperf = readin.aperf.whole; | ||
294 | per_cpu(msr_data, cpu).saved_mperf = readin.mperf.whole; | ||
295 | |||
296 | #ifdef __i386__ | ||
297 | /* | ||
298 | * We dont want to do 64 bit divide with 32 bit kernel | ||
299 | * Get an approximate value. Return failure in case we cannot get | ||
300 | * an approximate value. | ||
301 | */ | ||
302 | if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) { | ||
303 | int shift_count; | ||
304 | u32 h; | ||
305 | |||
306 | h = max_t(u32, cur.aperf.split.hi, cur.mperf.split.hi); | ||
307 | shift_count = fls(h); | ||
308 | |||
309 | cur.aperf.whole >>= shift_count; | ||
310 | cur.mperf.whole >>= shift_count; | ||
311 | } | ||
312 | |||
313 | if (((unsigned long)(-1) / 100) < cur.aperf.split.lo) { | ||
314 | int shift_count = 7; | ||
315 | cur.aperf.split.lo >>= shift_count; | ||
316 | cur.mperf.split.lo >>= shift_count; | ||
317 | } | ||
318 | |||
319 | if (cur.aperf.split.lo && cur.mperf.split.lo) | ||
320 | perf_percent = (cur.aperf.split.lo * 100) / cur.mperf.split.lo; | ||
321 | else | ||
322 | perf_percent = 0; | ||
323 | 275 | ||
324 | #else | 276 | retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; |
325 | if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) { | ||
326 | int shift_count = 7; | ||
327 | cur.aperf.whole >>= shift_count; | ||
328 | cur.mperf.whole >>= shift_count; | ||
329 | } | ||
330 | |||
331 | if (cur.aperf.whole && cur.mperf.whole) | ||
332 | perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole; | ||
333 | else | ||
334 | perf_percent = 0; | ||
335 | |||
336 | #endif | ||
337 | |||
338 | retval = (policy->cpuinfo.max_freq * perf_percent) / 100; | ||
339 | 277 | ||
340 | return retval; | 278 | return retval; |
341 | } | 279 | } |
@@ -731,12 +669,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) | |||
731 | acpi_processor_notify_smm(THIS_MODULE); | 669 | acpi_processor_notify_smm(THIS_MODULE); |
732 | 670 | ||
733 | /* Check for APERF/MPERF support in hardware */ | 671 | /* Check for APERF/MPERF support in hardware */ |
734 | if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) { | 672 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) |
735 | unsigned int ecx; | 673 | acpi_cpufreq_driver.getavg = get_measured_perf; |
736 | ecx = cpuid_ecx(6); | ||
737 | if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY) | ||
738 | acpi_cpufreq_driver.getavg = get_measured_perf; | ||
739 | } | ||
740 | 674 | ||
741 | dprintk("CPU%u - ACPI performance management activated.\n", cpu); | 675 | dprintk("CPU%u - ACPI performance management activated.\n", cpu); |
742 | for (i = 0; i < perf->state_count; i++) | 676 | for (i = 0; i < perf->state_count; i++) |
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 80a722a071b5..40e1835b35e8 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -350,6 +350,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
350 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); | 350 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); |
351 | } | 351 | } |
352 | 352 | ||
353 | if (c->cpuid_level > 6) { | ||
354 | unsigned ecx = cpuid_ecx(6); | ||
355 | if (ecx & 0x01) | ||
356 | set_cpu_cap(c, X86_FEATURE_APERFMPERF); | ||
357 | } | ||
358 | |||
353 | if (cpu_has_xmm2) | 359 | if (cpu_has_xmm2) |
354 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); | 360 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); |
355 | if (cpu_has_ds) { | 361 | if (cpu_has_ds) { |
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index 188a1ca5ad2b..4ac6d48fe11b 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile | |||
@@ -1,11 +1,8 @@ | |||
1 | obj-y = mce.o | 1 | obj-y = mce.o mce-severity.o |
2 | 2 | ||
3 | obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o | ||
4 | obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o | ||
5 | obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o | 3 | obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o |
6 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o | 4 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o |
7 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o | 5 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o |
8 | obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o | ||
9 | obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o | 6 | obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o |
10 | obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o | 7 | obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o |
11 | 8 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c deleted file mode 100644 index b945d5dbc609..000000000000 --- a/arch/x86/kernel/cpu/mcheck/k7.c +++ /dev/null | |||
@@ -1,116 +0,0 @@ | |||
1 | /* | ||
2 | * Athlon specific Machine Check Exception Reporting | ||
3 | * (C) Copyright 2002 Dave Jones <davej@redhat.com> | ||
4 | */ | ||
5 | #include <linux/interrupt.h> | ||
6 | #include <linux/kernel.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/init.h> | ||
9 | #include <linux/smp.h> | ||
10 | |||
11 | #include <asm/processor.h> | ||
12 | #include <asm/system.h> | ||
13 | #include <asm/mce.h> | ||
14 | #include <asm/msr.h> | ||
15 | |||
16 | /* Machine Check Handler For AMD Athlon/Duron: */ | ||
17 | static void k7_machine_check(struct pt_regs *regs, long error_code) | ||
18 | { | ||
19 | u32 alow, ahigh, high, low; | ||
20 | u32 mcgstl, mcgsth; | ||
21 | int recover = 1; | ||
22 | int i; | ||
23 | |||
24 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | ||
25 | if (mcgstl & (1<<0)) /* Recoverable ? */ | ||
26 | recover = 0; | ||
27 | |||
28 | printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", | ||
29 | smp_processor_id(), mcgsth, mcgstl); | ||
30 | |||
31 | for (i = 1; i < nr_mce_banks; i++) { | ||
32 | rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); | ||
33 | if (high & (1<<31)) { | ||
34 | char misc[20]; | ||
35 | char addr[24]; | ||
36 | |||
37 | misc[0] = '\0'; | ||
38 | addr[0] = '\0'; | ||
39 | |||
40 | if (high & (1<<29)) | ||
41 | recover |= 1; | ||
42 | if (high & (1<<25)) | ||
43 | recover |= 2; | ||
44 | high &= ~(1<<31); | ||
45 | |||
46 | if (high & (1<<27)) { | ||
47 | rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); | ||
48 | snprintf(misc, 20, "[%08x%08x]", ahigh, alow); | ||
49 | } | ||
50 | if (high & (1<<26)) { | ||
51 | rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); | ||
52 | snprintf(addr, 24, " at %08x%08x", ahigh, alow); | ||
53 | } | ||
54 | |||
55 | printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", | ||
56 | smp_processor_id(), i, high, low, misc, addr); | ||
57 | |||
58 | /* Clear it: */ | ||
59 | wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); | ||
60 | /* Serialize: */ | ||
61 | wmb(); | ||
62 | add_taint(TAINT_MACHINE_CHECK); | ||
63 | } | ||
64 | } | ||
65 | |||
66 | if (recover & 2) | ||
67 | panic("CPU context corrupt"); | ||
68 | if (recover & 1) | ||
69 | panic("Unable to continue"); | ||
70 | |||
71 | printk(KERN_EMERG "Attempting to continue.\n"); | ||
72 | |||
73 | mcgstl &= ~(1<<2); | ||
74 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | ||
75 | } | ||
76 | |||
77 | |||
78 | /* AMD K7 machine check is Intel like: */ | ||
79 | void amd_mcheck_init(struct cpuinfo_x86 *c) | ||
80 | { | ||
81 | u32 l, h; | ||
82 | int i; | ||
83 | |||
84 | if (!cpu_has(c, X86_FEATURE_MCE)) | ||
85 | return; | ||
86 | |||
87 | machine_check_vector = k7_machine_check; | ||
88 | /* Make sure the vector pointer is visible before we enable MCEs: */ | ||
89 | wmb(); | ||
90 | |||
91 | printk(KERN_INFO "Intel machine check architecture supported.\n"); | ||
92 | |||
93 | rdmsr(MSR_IA32_MCG_CAP, l, h); | ||
94 | if (l & (1<<8)) /* Control register present ? */ | ||
95 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | ||
96 | nr_mce_banks = l & 0xff; | ||
97 | |||
98 | /* | ||
99 | * Clear status for MC index 0 separately, we don't touch CTL, | ||
100 | * as some K7 Athlons cause spurious MCEs when its enabled: | ||
101 | */ | ||
102 | if (boot_cpu_data.x86 == 6) { | ||
103 | wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0); | ||
104 | i = 1; | ||
105 | } else | ||
106 | i = 0; | ||
107 | |||
108 | for (; i < nr_mce_banks; i++) { | ||
109 | wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); | ||
110 | wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); | ||
111 | } | ||
112 | |||
113 | set_in_cr4(X86_CR4_MCE); | ||
114 | printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", | ||
115 | smp_processor_id()); | ||
116 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index a3a235a53f09..7029f0e2acad 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c | |||
@@ -18,7 +18,12 @@ | |||
18 | #include <linux/string.h> | 18 | #include <linux/string.h> |
19 | #include <linux/fs.h> | 19 | #include <linux/fs.h> |
20 | #include <linux/smp.h> | 20 | #include <linux/smp.h> |
21 | #include <linux/notifier.h> | ||
22 | #include <linux/kdebug.h> | ||
23 | #include <linux/cpu.h> | ||
24 | #include <linux/sched.h> | ||
21 | #include <asm/mce.h> | 25 | #include <asm/mce.h> |
26 | #include <asm/apic.h> | ||
22 | 27 | ||
23 | /* Update fake mce registers on current CPU. */ | 28 | /* Update fake mce registers on current CPU. */ |
24 | static void inject_mce(struct mce *m) | 29 | static void inject_mce(struct mce *m) |
@@ -39,44 +44,141 @@ static void inject_mce(struct mce *m) | |||
39 | i->finished = 1; | 44 | i->finished = 1; |
40 | } | 45 | } |
41 | 46 | ||
42 | struct delayed_mce { | 47 | static void raise_poll(struct mce *m) |
43 | struct timer_list timer; | 48 | { |
44 | struct mce m; | 49 | unsigned long flags; |
45 | }; | 50 | mce_banks_t b; |
46 | 51 | ||
47 | /* Inject mce on current CPU */ | 52 | memset(&b, 0xff, sizeof(mce_banks_t)); |
48 | static void raise_mce(unsigned long data) | 53 | local_irq_save(flags); |
54 | machine_check_poll(0, &b); | ||
55 | local_irq_restore(flags); | ||
56 | m->finished = 0; | ||
57 | } | ||
58 | |||
59 | static void raise_exception(struct mce *m, struct pt_regs *pregs) | ||
49 | { | 60 | { |
50 | struct delayed_mce *dm = (struct delayed_mce *)data; | 61 | struct pt_regs regs; |
51 | struct mce *m = &dm->m; | 62 | unsigned long flags; |
52 | int cpu = m->extcpu; | ||
53 | 63 | ||
54 | inject_mce(m); | 64 | if (!pregs) { |
55 | if (m->status & MCI_STATUS_UC) { | ||
56 | struct pt_regs regs; | ||
57 | memset(®s, 0, sizeof(struct pt_regs)); | 65 | memset(®s, 0, sizeof(struct pt_regs)); |
58 | regs.ip = m->ip; | 66 | regs.ip = m->ip; |
59 | regs.cs = m->cs; | 67 | regs.cs = m->cs; |
68 | pregs = ®s; | ||
69 | } | ||
70 | /* in mcheck exeception handler, irq will be disabled */ | ||
71 | local_irq_save(flags); | ||
72 | do_machine_check(pregs, 0); | ||
73 | local_irq_restore(flags); | ||
74 | m->finished = 0; | ||
75 | } | ||
76 | |||
77 | static cpumask_t mce_inject_cpumask; | ||
78 | |||
79 | static int mce_raise_notify(struct notifier_block *self, | ||
80 | unsigned long val, void *data) | ||
81 | { | ||
82 | struct die_args *args = (struct die_args *)data; | ||
83 | int cpu = smp_processor_id(); | ||
84 | struct mce *m = &__get_cpu_var(injectm); | ||
85 | if (val != DIE_NMI_IPI || !cpu_isset(cpu, mce_inject_cpumask)) | ||
86 | return NOTIFY_DONE; | ||
87 | cpu_clear(cpu, mce_inject_cpumask); | ||
88 | if (m->inject_flags & MCJ_EXCEPTION) | ||
89 | raise_exception(m, args->regs); | ||
90 | else if (m->status) | ||
91 | raise_poll(m); | ||
92 | return NOTIFY_STOP; | ||
93 | } | ||
94 | |||
95 | static struct notifier_block mce_raise_nb = { | ||
96 | .notifier_call = mce_raise_notify, | ||
97 | .priority = 1000, | ||
98 | }; | ||
99 | |||
100 | /* Inject mce on current CPU */ | ||
101 | static int raise_local(struct mce *m) | ||
102 | { | ||
103 | int context = MCJ_CTX(m->inject_flags); | ||
104 | int ret = 0; | ||
105 | int cpu = m->extcpu; | ||
106 | |||
107 | if (m->inject_flags & MCJ_EXCEPTION) { | ||
60 | printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu); | 108 | printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu); |
61 | do_machine_check(®s, 0); | 109 | switch (context) { |
110 | case MCJ_CTX_IRQ: | ||
111 | /* | ||
112 | * Could do more to fake interrupts like | ||
113 | * calling irq_enter, but the necessary | ||
114 | * machinery isn't exported currently. | ||
115 | */ | ||
116 | /*FALL THROUGH*/ | ||
117 | case MCJ_CTX_PROCESS: | ||
118 | raise_exception(m, NULL); | ||
119 | break; | ||
120 | default: | ||
121 | printk(KERN_INFO "Invalid MCE context\n"); | ||
122 | ret = -EINVAL; | ||
123 | } | ||
62 | printk(KERN_INFO "MCE exception done on CPU %d\n", cpu); | 124 | printk(KERN_INFO "MCE exception done on CPU %d\n", cpu); |
63 | } else { | 125 | } else if (m->status) { |
64 | mce_banks_t b; | ||
65 | memset(&b, 0xff, sizeof(mce_banks_t)); | ||
66 | printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu); | 126 | printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu); |
67 | machine_check_poll(0, &b); | 127 | raise_poll(m); |
68 | mce_notify_irq(); | 128 | mce_notify_irq(); |
69 | printk(KERN_INFO "Finished machine check poll on CPU %d\n", | 129 | printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu); |
70 | cpu); | 130 | } else |
71 | } | 131 | m->finished = 0; |
72 | kfree(dm); | 132 | |
133 | return ret; | ||
134 | } | ||
135 | |||
136 | static void raise_mce(struct mce *m) | ||
137 | { | ||
138 | int context = MCJ_CTX(m->inject_flags); | ||
139 | |||
140 | inject_mce(m); | ||
141 | |||
142 | if (context == MCJ_CTX_RANDOM) | ||
143 | return; | ||
144 | |||
145 | #ifdef CONFIG_X86_LOCAL_APIC | ||
146 | if (m->inject_flags & MCJ_NMI_BROADCAST) { | ||
147 | unsigned long start; | ||
148 | int cpu; | ||
149 | get_online_cpus(); | ||
150 | mce_inject_cpumask = cpu_online_map; | ||
151 | cpu_clear(get_cpu(), mce_inject_cpumask); | ||
152 | for_each_online_cpu(cpu) { | ||
153 | struct mce *mcpu = &per_cpu(injectm, cpu); | ||
154 | if (!mcpu->finished || | ||
155 | MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM) | ||
156 | cpu_clear(cpu, mce_inject_cpumask); | ||
157 | } | ||
158 | if (!cpus_empty(mce_inject_cpumask)) | ||
159 | apic->send_IPI_mask(&mce_inject_cpumask, NMI_VECTOR); | ||
160 | start = jiffies; | ||
161 | while (!cpus_empty(mce_inject_cpumask)) { | ||
162 | if (!time_before(jiffies, start + 2*HZ)) { | ||
163 | printk(KERN_ERR | ||
164 | "Timeout waiting for mce inject NMI %lx\n", | ||
165 | *cpus_addr(mce_inject_cpumask)); | ||
166 | break; | ||
167 | } | ||
168 | cpu_relax(); | ||
169 | } | ||
170 | raise_local(m); | ||
171 | put_cpu(); | ||
172 | put_online_cpus(); | ||
173 | } else | ||
174 | #endif | ||
175 | raise_local(m); | ||
73 | } | 176 | } |
74 | 177 | ||
75 | /* Error injection interface */ | 178 | /* Error injection interface */ |
76 | static ssize_t mce_write(struct file *filp, const char __user *ubuf, | 179 | static ssize_t mce_write(struct file *filp, const char __user *ubuf, |
77 | size_t usize, loff_t *off) | 180 | size_t usize, loff_t *off) |
78 | { | 181 | { |
79 | struct delayed_mce *dm; | ||
80 | struct mce m; | 182 | struct mce m; |
81 | 183 | ||
82 | if (!capable(CAP_SYS_ADMIN)) | 184 | if (!capable(CAP_SYS_ADMIN)) |
@@ -96,19 +198,12 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf, | |||
96 | if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu)) | 198 | if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu)) |
97 | return -EINVAL; | 199 | return -EINVAL; |
98 | 200 | ||
99 | dm = kmalloc(sizeof(struct delayed_mce), GFP_KERNEL); | ||
100 | if (!dm) | ||
101 | return -ENOMEM; | ||
102 | |||
103 | /* | 201 | /* |
104 | * Need to give user space some time to set everything up, | 202 | * Need to give user space some time to set everything up, |
105 | * so do it a jiffie or two later everywhere. | 203 | * so do it a jiffie or two later everywhere. |
106 | * Should we use a hrtimer here for better synchronization? | ||
107 | */ | 204 | */ |
108 | memcpy(&dm->m, &m, sizeof(struct mce)); | 205 | schedule_timeout(2); |
109 | setup_timer(&dm->timer, raise_mce, (unsigned long)dm); | 206 | raise_mce(&m); |
110 | dm->timer.expires = jiffies + 2; | ||
111 | add_timer_on(&dm->timer, m.extcpu); | ||
112 | return usize; | 207 | return usize; |
113 | } | 208 | } |
114 | 209 | ||
@@ -116,6 +211,7 @@ static int inject_init(void) | |||
116 | { | 211 | { |
117 | printk(KERN_INFO "Machine check injector initialized\n"); | 212 | printk(KERN_INFO "Machine check injector initialized\n"); |
118 | mce_chrdev_ops.write = mce_write; | 213 | mce_chrdev_ops.write = mce_write; |
214 | register_die_notifier(&mce_raise_nb); | ||
119 | return 0; | 215 | return 0; |
120 | } | 216 | } |
121 | 217 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index 54dcb8ff12e5..32996f9fab67 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
@@ -1,3 +1,4 @@ | |||
1 | #include <linux/sysdev.h> | ||
1 | #include <asm/mce.h> | 2 | #include <asm/mce.h> |
2 | 3 | ||
3 | enum severity_level { | 4 | enum severity_level { |
@@ -10,6 +11,20 @@ enum severity_level { | |||
10 | MCE_PANIC_SEVERITY, | 11 | MCE_PANIC_SEVERITY, |
11 | }; | 12 | }; |
12 | 13 | ||
14 | #define ATTR_LEN 16 | ||
15 | |||
16 | /* One object for each MCE bank, shared by all CPUs */ | ||
17 | struct mce_bank { | ||
18 | u64 ctl; /* subevents to enable */ | ||
19 | unsigned char init; /* initialise bank? */ | ||
20 | struct sysdev_attribute attr; /* sysdev attribute */ | ||
21 | char attrname[ATTR_LEN]; /* attribute name */ | ||
22 | }; | ||
23 | |||
13 | int mce_severity(struct mce *a, int tolerant, char **msg); | 24 | int mce_severity(struct mce *a, int tolerant, char **msg); |
25 | struct dentry *mce_get_debugfs_dir(void); | ||
14 | 26 | ||
15 | extern int mce_ser; | 27 | extern int mce_ser; |
28 | |||
29 | extern struct mce_bank *mce_banks; | ||
30 | |||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index ff0807f97056..8a85dd1b1aa1 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c | |||
@@ -139,6 +139,7 @@ int mce_severity(struct mce *a, int tolerant, char **msg) | |||
139 | } | 139 | } |
140 | } | 140 | } |
141 | 141 | ||
142 | #ifdef CONFIG_DEBUG_FS | ||
142 | static void *s_start(struct seq_file *f, loff_t *pos) | 143 | static void *s_start(struct seq_file *f, loff_t *pos) |
143 | { | 144 | { |
144 | if (*pos >= ARRAY_SIZE(severities)) | 145 | if (*pos >= ARRAY_SIZE(severities)) |
@@ -197,7 +198,7 @@ static int __init severities_debugfs_init(void) | |||
197 | { | 198 | { |
198 | struct dentry *dmce = NULL, *fseverities_coverage = NULL; | 199 | struct dentry *dmce = NULL, *fseverities_coverage = NULL; |
199 | 200 | ||
200 | dmce = debugfs_create_dir("mce", NULL); | 201 | dmce = mce_get_debugfs_dir(); |
201 | if (dmce == NULL) | 202 | if (dmce == NULL) |
202 | goto err_out; | 203 | goto err_out; |
203 | fseverities_coverage = debugfs_create_file("severities-coverage", | 204 | fseverities_coverage = debugfs_create_file("severities-coverage", |
@@ -209,10 +210,7 @@ static int __init severities_debugfs_init(void) | |||
209 | return 0; | 210 | return 0; |
210 | 211 | ||
211 | err_out: | 212 | err_out: |
212 | if (fseverities_coverage) | ||
213 | debugfs_remove(fseverities_coverage); | ||
214 | if (dmce) | ||
215 | debugfs_remove(dmce); | ||
216 | return -ENOMEM; | 213 | return -ENOMEM; |
217 | } | 214 | } |
218 | late_initcall(severities_debugfs_init); | 215 | late_initcall(severities_debugfs_init); |
216 | #endif | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 9bfe9d2ea615..2f5aab26320e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/smp.h> | 34 | #include <linux/smp.h> |
35 | #include <linux/fs.h> | 35 | #include <linux/fs.h> |
36 | #include <linux/mm.h> | 36 | #include <linux/mm.h> |
37 | #include <linux/debugfs.h> | ||
37 | 38 | ||
38 | #include <asm/processor.h> | 39 | #include <asm/processor.h> |
39 | #include <asm/hw_irq.h> | 40 | #include <asm/hw_irq.h> |
@@ -45,21 +46,8 @@ | |||
45 | 46 | ||
46 | #include "mce-internal.h" | 47 | #include "mce-internal.h" |
47 | 48 | ||
48 | /* Handle unconfigured int18 (should never happen) */ | ||
49 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) | ||
50 | { | ||
51 | printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", | ||
52 | smp_processor_id()); | ||
53 | } | ||
54 | |||
55 | /* Call the installed machine check handler for this CPU setup. */ | ||
56 | void (*machine_check_vector)(struct pt_regs *, long error_code) = | ||
57 | unexpected_machine_check; | ||
58 | |||
59 | int mce_disabled __read_mostly; | 49 | int mce_disabled __read_mostly; |
60 | 50 | ||
61 | #ifdef CONFIG_X86_NEW_MCE | ||
62 | |||
63 | #define MISC_MCELOG_MINOR 227 | 51 | #define MISC_MCELOG_MINOR 227 |
64 | 52 | ||
65 | #define SPINUNIT 100 /* 100ns */ | 53 | #define SPINUNIT 100 /* 100ns */ |
@@ -77,7 +65,6 @@ DEFINE_PER_CPU(unsigned, mce_exception_count); | |||
77 | */ | 65 | */ |
78 | static int tolerant __read_mostly = 1; | 66 | static int tolerant __read_mostly = 1; |
79 | static int banks __read_mostly; | 67 | static int banks __read_mostly; |
80 | static u64 *bank __read_mostly; | ||
81 | static int rip_msr __read_mostly; | 68 | static int rip_msr __read_mostly; |
82 | static int mce_bootlog __read_mostly = -1; | 69 | static int mce_bootlog __read_mostly = -1; |
83 | static int monarch_timeout __read_mostly = -1; | 70 | static int monarch_timeout __read_mostly = -1; |
@@ -87,13 +74,13 @@ int mce_cmci_disabled __read_mostly; | |||
87 | int mce_ignore_ce __read_mostly; | 74 | int mce_ignore_ce __read_mostly; |
88 | int mce_ser __read_mostly; | 75 | int mce_ser __read_mostly; |
89 | 76 | ||
77 | struct mce_bank *mce_banks __read_mostly; | ||
78 | |||
90 | /* User mode helper program triggered by machine check event */ | 79 | /* User mode helper program triggered by machine check event */ |
91 | static unsigned long mce_need_notify; | 80 | static unsigned long mce_need_notify; |
92 | static char mce_helper[128]; | 81 | static char mce_helper[128]; |
93 | static char *mce_helper_argv[2] = { mce_helper, NULL }; | 82 | static char *mce_helper_argv[2] = { mce_helper, NULL }; |
94 | 83 | ||
95 | static unsigned long dont_init_banks; | ||
96 | |||
97 | static DECLARE_WAIT_QUEUE_HEAD(mce_wait); | 84 | static DECLARE_WAIT_QUEUE_HEAD(mce_wait); |
98 | static DEFINE_PER_CPU(struct mce, mces_seen); | 85 | static DEFINE_PER_CPU(struct mce, mces_seen); |
99 | static int cpu_missing; | 86 | static int cpu_missing; |
@@ -104,11 +91,6 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | |||
104 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL | 91 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL |
105 | }; | 92 | }; |
106 | 93 | ||
107 | static inline int skip_bank_init(int i) | ||
108 | { | ||
109 | return i < BITS_PER_LONG && test_bit(i, &dont_init_banks); | ||
110 | } | ||
111 | |||
112 | static DEFINE_PER_CPU(struct work_struct, mce_work); | 94 | static DEFINE_PER_CPU(struct work_struct, mce_work); |
113 | 95 | ||
114 | /* Do initial initialization of a struct mce */ | 96 | /* Do initial initialization of a struct mce */ |
@@ -232,6 +214,9 @@ static void print_mce_tail(void) | |||
232 | 214 | ||
233 | static atomic_t mce_paniced; | 215 | static atomic_t mce_paniced; |
234 | 216 | ||
217 | static int fake_panic; | ||
218 | static atomic_t mce_fake_paniced; | ||
219 | |||
235 | /* Panic in progress. Enable interrupts and wait for final IPI */ | 220 | /* Panic in progress. Enable interrupts and wait for final IPI */ |
236 | static void wait_for_panic(void) | 221 | static void wait_for_panic(void) |
237 | { | 222 | { |
@@ -249,15 +234,21 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
249 | { | 234 | { |
250 | int i; | 235 | int i; |
251 | 236 | ||
252 | /* | 237 | if (!fake_panic) { |
253 | * Make sure only one CPU runs in machine check panic | 238 | /* |
254 | */ | 239 | * Make sure only one CPU runs in machine check panic |
255 | if (atomic_add_return(1, &mce_paniced) > 1) | 240 | */ |
256 | wait_for_panic(); | 241 | if (atomic_inc_return(&mce_paniced) > 1) |
257 | barrier(); | 242 | wait_for_panic(); |
243 | barrier(); | ||
258 | 244 | ||
259 | bust_spinlocks(1); | 245 | bust_spinlocks(1); |
260 | console_verbose(); | 246 | console_verbose(); |
247 | } else { | ||
248 | /* Don't log too much for fake panic */ | ||
249 | if (atomic_inc_return(&mce_fake_paniced) > 1) | ||
250 | return; | ||
251 | } | ||
261 | print_mce_head(); | 252 | print_mce_head(); |
262 | /* First print corrected ones that are still unlogged */ | 253 | /* First print corrected ones that are still unlogged */ |
263 | for (i = 0; i < MCE_LOG_LEN; i++) { | 254 | for (i = 0; i < MCE_LOG_LEN; i++) { |
@@ -284,9 +275,12 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
284 | print_mce_tail(); | 275 | print_mce_tail(); |
285 | if (exp) | 276 | if (exp) |
286 | printk(KERN_EMERG "Machine check: %s\n", exp); | 277 | printk(KERN_EMERG "Machine check: %s\n", exp); |
287 | if (panic_timeout == 0) | 278 | if (!fake_panic) { |
288 | panic_timeout = mce_panic_timeout; | 279 | if (panic_timeout == 0) |
289 | panic(msg); | 280 | panic_timeout = mce_panic_timeout; |
281 | panic(msg); | ||
282 | } else | ||
283 | printk(KERN_EMERG "Fake kernel panic: %s\n", msg); | ||
290 | } | 284 | } |
291 | 285 | ||
292 | /* Support code for software error injection */ | 286 | /* Support code for software error injection */ |
@@ -296,11 +290,11 @@ static int msr_to_offset(u32 msr) | |||
296 | unsigned bank = __get_cpu_var(injectm.bank); | 290 | unsigned bank = __get_cpu_var(injectm.bank); |
297 | if (msr == rip_msr) | 291 | if (msr == rip_msr) |
298 | return offsetof(struct mce, ip); | 292 | return offsetof(struct mce, ip); |
299 | if (msr == MSR_IA32_MC0_STATUS + bank*4) | 293 | if (msr == MSR_IA32_MCx_STATUS(bank)) |
300 | return offsetof(struct mce, status); | 294 | return offsetof(struct mce, status); |
301 | if (msr == MSR_IA32_MC0_ADDR + bank*4) | 295 | if (msr == MSR_IA32_MCx_ADDR(bank)) |
302 | return offsetof(struct mce, addr); | 296 | return offsetof(struct mce, addr); |
303 | if (msr == MSR_IA32_MC0_MISC + bank*4) | 297 | if (msr == MSR_IA32_MCx_MISC(bank)) |
304 | return offsetof(struct mce, misc); | 298 | return offsetof(struct mce, misc); |
305 | if (msr == MSR_IA32_MCG_STATUS) | 299 | if (msr == MSR_IA32_MCG_STATUS) |
306 | return offsetof(struct mce, mcgstatus); | 300 | return offsetof(struct mce, mcgstatus); |
@@ -505,7 +499,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
505 | 499 | ||
506 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); | 500 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); |
507 | for (i = 0; i < banks; i++) { | 501 | for (i = 0; i < banks; i++) { |
508 | if (!bank[i] || !test_bit(i, *b)) | 502 | if (!mce_banks[i].ctl || !test_bit(i, *b)) |
509 | continue; | 503 | continue; |
510 | 504 | ||
511 | m.misc = 0; | 505 | m.misc = 0; |
@@ -514,7 +508,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
514 | m.tsc = 0; | 508 | m.tsc = 0; |
515 | 509 | ||
516 | barrier(); | 510 | barrier(); |
517 | m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); | 511 | m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); |
518 | if (!(m.status & MCI_STATUS_VAL)) | 512 | if (!(m.status & MCI_STATUS_VAL)) |
519 | continue; | 513 | continue; |
520 | 514 | ||
@@ -529,9 +523,9 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
529 | continue; | 523 | continue; |
530 | 524 | ||
531 | if (m.status & MCI_STATUS_MISCV) | 525 | if (m.status & MCI_STATUS_MISCV) |
532 | m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); | 526 | m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i)); |
533 | if (m.status & MCI_STATUS_ADDRV) | 527 | if (m.status & MCI_STATUS_ADDRV) |
534 | m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); | 528 | m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i)); |
535 | 529 | ||
536 | if (!(flags & MCP_TIMESTAMP)) | 530 | if (!(flags & MCP_TIMESTAMP)) |
537 | m.tsc = 0; | 531 | m.tsc = 0; |
@@ -547,7 +541,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
547 | /* | 541 | /* |
548 | * Clear state for this bank. | 542 | * Clear state for this bank. |
549 | */ | 543 | */ |
550 | mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | 544 | mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); |
551 | } | 545 | } |
552 | 546 | ||
553 | /* | 547 | /* |
@@ -568,7 +562,7 @@ static int mce_no_way_out(struct mce *m, char **msg) | |||
568 | int i; | 562 | int i; |
569 | 563 | ||
570 | for (i = 0; i < banks; i++) { | 564 | for (i = 0; i < banks; i++) { |
571 | m->status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); | 565 | m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); |
572 | if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) | 566 | if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) |
573 | return 1; | 567 | return 1; |
574 | } | 568 | } |
@@ -628,7 +622,7 @@ out: | |||
628 | * This way we prevent any potential data corruption in a unrecoverable case | 622 | * This way we prevent any potential data corruption in a unrecoverable case |
629 | * and also makes sure always all CPU's errors are examined. | 623 | * and also makes sure always all CPU's errors are examined. |
630 | * | 624 | * |
631 | * Also this detects the case of an machine check event coming from outer | 625 | * Also this detects the case of a machine check event coming from outer |
632 | * space (not detected by any CPUs) In this case some external agent wants | 626 | * space (not detected by any CPUs) In this case some external agent wants |
633 | * us to shut down, so panic too. | 627 | * us to shut down, so panic too. |
634 | * | 628 | * |
@@ -681,7 +675,7 @@ static void mce_reign(void) | |||
681 | * No machine check event found. Must be some external | 675 | * No machine check event found. Must be some external |
682 | * source or one CPU is hung. Panic. | 676 | * source or one CPU is hung. Panic. |
683 | */ | 677 | */ |
684 | if (!m && tolerant < 3) | 678 | if (global_worst <= MCE_KEEP_SEVERITY && tolerant < 3) |
685 | mce_panic("Machine check from unknown source", NULL, NULL); | 679 | mce_panic("Machine check from unknown source", NULL, NULL); |
686 | 680 | ||
687 | /* | 681 | /* |
@@ -715,7 +709,7 @@ static int mce_start(int *no_way_out) | |||
715 | * global_nwo should be updated before mce_callin | 709 | * global_nwo should be updated before mce_callin |
716 | */ | 710 | */ |
717 | smp_wmb(); | 711 | smp_wmb(); |
718 | order = atomic_add_return(1, &mce_callin); | 712 | order = atomic_inc_return(&mce_callin); |
719 | 713 | ||
720 | /* | 714 | /* |
721 | * Wait for everyone. | 715 | * Wait for everyone. |
@@ -852,7 +846,7 @@ static void mce_clear_state(unsigned long *toclear) | |||
852 | 846 | ||
853 | for (i = 0; i < banks; i++) { | 847 | for (i = 0; i < banks; i++) { |
854 | if (test_bit(i, toclear)) | 848 | if (test_bit(i, toclear)) |
855 | mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | 849 | mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); |
856 | } | 850 | } |
857 | } | 851 | } |
858 | 852 | ||
@@ -905,11 +899,11 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
905 | mce_setup(&m); | 899 | mce_setup(&m); |
906 | 900 | ||
907 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); | 901 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); |
908 | no_way_out = mce_no_way_out(&m, &msg); | ||
909 | |||
910 | final = &__get_cpu_var(mces_seen); | 902 | final = &__get_cpu_var(mces_seen); |
911 | *final = m; | 903 | *final = m; |
912 | 904 | ||
905 | no_way_out = mce_no_way_out(&m, &msg); | ||
906 | |||
913 | barrier(); | 907 | barrier(); |
914 | 908 | ||
915 | /* | 909 | /* |
@@ -926,14 +920,14 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
926 | order = mce_start(&no_way_out); | 920 | order = mce_start(&no_way_out); |
927 | for (i = 0; i < banks; i++) { | 921 | for (i = 0; i < banks; i++) { |
928 | __clear_bit(i, toclear); | 922 | __clear_bit(i, toclear); |
929 | if (!bank[i]) | 923 | if (!mce_banks[i].ctl) |
930 | continue; | 924 | continue; |
931 | 925 | ||
932 | m.misc = 0; | 926 | m.misc = 0; |
933 | m.addr = 0; | 927 | m.addr = 0; |
934 | m.bank = i; | 928 | m.bank = i; |
935 | 929 | ||
936 | m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); | 930 | m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); |
937 | if ((m.status & MCI_STATUS_VAL) == 0) | 931 | if ((m.status & MCI_STATUS_VAL) == 0) |
938 | continue; | 932 | continue; |
939 | 933 | ||
@@ -974,9 +968,9 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
974 | kill_it = 1; | 968 | kill_it = 1; |
975 | 969 | ||
976 | if (m.status & MCI_STATUS_MISCV) | 970 | if (m.status & MCI_STATUS_MISCV) |
977 | m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); | 971 | m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i)); |
978 | if (m.status & MCI_STATUS_ADDRV) | 972 | if (m.status & MCI_STATUS_ADDRV) |
979 | m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); | 973 | m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i)); |
980 | 974 | ||
981 | /* | 975 | /* |
982 | * Action optional error. Queue address for later processing. | 976 | * Action optional error. Queue address for later processing. |
@@ -1101,7 +1095,7 @@ void mce_log_therm_throt_event(__u64 status) | |||
1101 | */ | 1095 | */ |
1102 | static int check_interval = 5 * 60; /* 5 minutes */ | 1096 | static int check_interval = 5 * 60; /* 5 minutes */ |
1103 | 1097 | ||
1104 | static DEFINE_PER_CPU(int, next_interval); /* in jiffies */ | 1098 | static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ |
1105 | static DEFINE_PER_CPU(struct timer_list, mce_timer); | 1099 | static DEFINE_PER_CPU(struct timer_list, mce_timer); |
1106 | 1100 | ||
1107 | static void mcheck_timer(unsigned long data) | 1101 | static void mcheck_timer(unsigned long data) |
@@ -1120,7 +1114,7 @@ static void mcheck_timer(unsigned long data) | |||
1120 | * Alert userspace if needed. If we logged an MCE, reduce the | 1114 | * Alert userspace if needed. If we logged an MCE, reduce the |
1121 | * polling interval, otherwise increase the polling interval. | 1115 | * polling interval, otherwise increase the polling interval. |
1122 | */ | 1116 | */ |
1123 | n = &__get_cpu_var(next_interval); | 1117 | n = &__get_cpu_var(mce_next_interval); |
1124 | if (mce_notify_irq()) | 1118 | if (mce_notify_irq()) |
1125 | *n = max(*n/2, HZ/100); | 1119 | *n = max(*n/2, HZ/100); |
1126 | else | 1120 | else |
@@ -1169,10 +1163,25 @@ int mce_notify_irq(void) | |||
1169 | } | 1163 | } |
1170 | EXPORT_SYMBOL_GPL(mce_notify_irq); | 1164 | EXPORT_SYMBOL_GPL(mce_notify_irq); |
1171 | 1165 | ||
1166 | static int mce_banks_init(void) | ||
1167 | { | ||
1168 | int i; | ||
1169 | |||
1170 | mce_banks = kzalloc(banks * sizeof(struct mce_bank), GFP_KERNEL); | ||
1171 | if (!mce_banks) | ||
1172 | return -ENOMEM; | ||
1173 | for (i = 0; i < banks; i++) { | ||
1174 | struct mce_bank *b = &mce_banks[i]; | ||
1175 | b->ctl = -1ULL; | ||
1176 | b->init = 1; | ||
1177 | } | ||
1178 | return 0; | ||
1179 | } | ||
1180 | |||
1172 | /* | 1181 | /* |
1173 | * Initialize Machine Checks for a CPU. | 1182 | * Initialize Machine Checks for a CPU. |
1174 | */ | 1183 | */ |
1175 | static int mce_cap_init(void) | 1184 | static int __cpuinit mce_cap_init(void) |
1176 | { | 1185 | { |
1177 | unsigned b; | 1186 | unsigned b; |
1178 | u64 cap; | 1187 | u64 cap; |
@@ -1192,11 +1201,10 @@ static int mce_cap_init(void) | |||
1192 | /* Don't support asymmetric configurations today */ | 1201 | /* Don't support asymmetric configurations today */ |
1193 | WARN_ON(banks != 0 && b != banks); | 1202 | WARN_ON(banks != 0 && b != banks); |
1194 | banks = b; | 1203 | banks = b; |
1195 | if (!bank) { | 1204 | if (!mce_banks) { |
1196 | bank = kmalloc(banks * sizeof(u64), GFP_KERNEL); | 1205 | int err = mce_banks_init(); |
1197 | if (!bank) | 1206 | if (err) |
1198 | return -ENOMEM; | 1207 | return err; |
1199 | memset(bank, 0xff, banks * sizeof(u64)); | ||
1200 | } | 1208 | } |
1201 | 1209 | ||
1202 | /* Use accurate RIP reporting if available. */ | 1210 | /* Use accurate RIP reporting if available. */ |
@@ -1228,15 +1236,16 @@ static void mce_init(void) | |||
1228 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | 1236 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); |
1229 | 1237 | ||
1230 | for (i = 0; i < banks; i++) { | 1238 | for (i = 0; i < banks; i++) { |
1231 | if (skip_bank_init(i)) | 1239 | struct mce_bank *b = &mce_banks[i]; |
1240 | if (!b->init) | ||
1232 | continue; | 1241 | continue; |
1233 | wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); | 1242 | wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); |
1234 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | 1243 | wrmsrl(MSR_IA32_MCx_STATUS(i), 0); |
1235 | } | 1244 | } |
1236 | } | 1245 | } |
1237 | 1246 | ||
1238 | /* Add per CPU specific workarounds here */ | 1247 | /* Add per CPU specific workarounds here */ |
1239 | static int mce_cpu_quirks(struct cpuinfo_x86 *c) | 1248 | static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) |
1240 | { | 1249 | { |
1241 | if (c->x86_vendor == X86_VENDOR_UNKNOWN) { | 1250 | if (c->x86_vendor == X86_VENDOR_UNKNOWN) { |
1242 | pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); | 1251 | pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); |
@@ -1251,7 +1260,7 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c) | |||
1251 | * trips off incorrectly with the IOMMU & 3ware | 1260 | * trips off incorrectly with the IOMMU & 3ware |
1252 | * & Cerberus: | 1261 | * & Cerberus: |
1253 | */ | 1262 | */ |
1254 | clear_bit(10, (unsigned long *)&bank[4]); | 1263 | clear_bit(10, (unsigned long *)&mce_banks[4].ctl); |
1255 | } | 1264 | } |
1256 | if (c->x86 <= 17 && mce_bootlog < 0) { | 1265 | if (c->x86 <= 17 && mce_bootlog < 0) { |
1257 | /* | 1266 | /* |
@@ -1265,7 +1274,7 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c) | |||
1265 | * by default. | 1274 | * by default. |
1266 | */ | 1275 | */ |
1267 | if (c->x86 == 6 && banks > 0) | 1276 | if (c->x86 == 6 && banks > 0) |
1268 | bank[0] = 0; | 1277 | mce_banks[0].ctl = 0; |
1269 | } | 1278 | } |
1270 | 1279 | ||
1271 | if (c->x86_vendor == X86_VENDOR_INTEL) { | 1280 | if (c->x86_vendor == X86_VENDOR_INTEL) { |
@@ -1278,8 +1287,8 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c) | |||
1278 | * valid event later, merely don't write CTL0. | 1287 | * valid event later, merely don't write CTL0. |
1279 | */ | 1288 | */ |
1280 | 1289 | ||
1281 | if (c->x86 == 6 && c->x86_model < 0x1A) | 1290 | if (c->x86 == 6 && c->x86_model < 0x1A && banks > 0) |
1282 | __set_bit(0, &dont_init_banks); | 1291 | mce_banks[0].init = 0; |
1283 | 1292 | ||
1284 | /* | 1293 | /* |
1285 | * All newer Intel systems support MCE broadcasting. Enable | 1294 | * All newer Intel systems support MCE broadcasting. Enable |
@@ -1335,7 +1344,7 @@ static void mce_cpu_features(struct cpuinfo_x86 *c) | |||
1335 | static void mce_init_timer(void) | 1344 | static void mce_init_timer(void) |
1336 | { | 1345 | { |
1337 | struct timer_list *t = &__get_cpu_var(mce_timer); | 1346 | struct timer_list *t = &__get_cpu_var(mce_timer); |
1338 | int *n = &__get_cpu_var(next_interval); | 1347 | int *n = &__get_cpu_var(mce_next_interval); |
1339 | 1348 | ||
1340 | if (mce_ignore_ce) | 1349 | if (mce_ignore_ce) |
1341 | return; | 1350 | return; |
@@ -1348,6 +1357,17 @@ static void mce_init_timer(void) | |||
1348 | add_timer_on(t, smp_processor_id()); | 1357 | add_timer_on(t, smp_processor_id()); |
1349 | } | 1358 | } |
1350 | 1359 | ||
1360 | /* Handle unconfigured int18 (should never happen) */ | ||
1361 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) | ||
1362 | { | ||
1363 | printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", | ||
1364 | smp_processor_id()); | ||
1365 | } | ||
1366 | |||
1367 | /* Call the installed machine check handler for this CPU setup. */ | ||
1368 | void (*machine_check_vector)(struct pt_regs *, long error_code) = | ||
1369 | unexpected_machine_check; | ||
1370 | |||
1351 | /* | 1371 | /* |
1352 | * Called for each booted CPU to set up machine checks. | 1372 | * Called for each booted CPU to set up machine checks. |
1353 | * Must be called with preempt off: | 1373 | * Must be called with preempt off: |
@@ -1561,8 +1581,10 @@ static struct miscdevice mce_log_device = { | |||
1561 | */ | 1581 | */ |
1562 | static int __init mcheck_enable(char *str) | 1582 | static int __init mcheck_enable(char *str) |
1563 | { | 1583 | { |
1564 | if (*str == 0) | 1584 | if (*str == 0) { |
1565 | enable_p5_mce(); | 1585 | enable_p5_mce(); |
1586 | return 1; | ||
1587 | } | ||
1566 | if (*str == '=') | 1588 | if (*str == '=') |
1567 | str++; | 1589 | str++; |
1568 | if (!strcmp(str, "off")) | 1590 | if (!strcmp(str, "off")) |
@@ -1603,8 +1625,9 @@ static int mce_disable(void) | |||
1603 | int i; | 1625 | int i; |
1604 | 1626 | ||
1605 | for (i = 0; i < banks; i++) { | 1627 | for (i = 0; i < banks; i++) { |
1606 | if (!skip_bank_init(i)) | 1628 | struct mce_bank *b = &mce_banks[i]; |
1607 | wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); | 1629 | if (b->init) |
1630 | wrmsrl(MSR_IA32_MCx_CTL(i), 0); | ||
1608 | } | 1631 | } |
1609 | return 0; | 1632 | return 0; |
1610 | } | 1633 | } |
@@ -1679,14 +1702,15 @@ DEFINE_PER_CPU(struct sys_device, mce_dev); | |||
1679 | __cpuinitdata | 1702 | __cpuinitdata |
1680 | void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); | 1703 | void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); |
1681 | 1704 | ||
1682 | static struct sysdev_attribute *bank_attrs; | 1705 | static inline struct mce_bank *attr_to_bank(struct sysdev_attribute *attr) |
1706 | { | ||
1707 | return container_of(attr, struct mce_bank, attr); | ||
1708 | } | ||
1683 | 1709 | ||
1684 | static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, | 1710 | static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, |
1685 | char *buf) | 1711 | char *buf) |
1686 | { | 1712 | { |
1687 | u64 b = bank[attr - bank_attrs]; | 1713 | return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl); |
1688 | |||
1689 | return sprintf(buf, "%llx\n", b); | ||
1690 | } | 1714 | } |
1691 | 1715 | ||
1692 | static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, | 1716 | static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, |
@@ -1697,7 +1721,7 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, | |||
1697 | if (strict_strtoull(buf, 0, &new) < 0) | 1721 | if (strict_strtoull(buf, 0, &new) < 0) |
1698 | return -EINVAL; | 1722 | return -EINVAL; |
1699 | 1723 | ||
1700 | bank[attr - bank_attrs] = new; | 1724 | attr_to_bank(attr)->ctl = new; |
1701 | mce_restart(); | 1725 | mce_restart(); |
1702 | 1726 | ||
1703 | return size; | 1727 | return size; |
@@ -1839,7 +1863,7 @@ static __cpuinit int mce_create_device(unsigned int cpu) | |||
1839 | } | 1863 | } |
1840 | for (j = 0; j < banks; j++) { | 1864 | for (j = 0; j < banks; j++) { |
1841 | err = sysdev_create_file(&per_cpu(mce_dev, cpu), | 1865 | err = sysdev_create_file(&per_cpu(mce_dev, cpu), |
1842 | &bank_attrs[j]); | 1866 | &mce_banks[j].attr); |
1843 | if (err) | 1867 | if (err) |
1844 | goto error2; | 1868 | goto error2; |
1845 | } | 1869 | } |
@@ -1848,10 +1872,10 @@ static __cpuinit int mce_create_device(unsigned int cpu) | |||
1848 | return 0; | 1872 | return 0; |
1849 | error2: | 1873 | error2: |
1850 | while (--j >= 0) | 1874 | while (--j >= 0) |
1851 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[j]); | 1875 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr); |
1852 | error: | 1876 | error: |
1853 | while (--i >= 0) | 1877 | while (--i >= 0) |
1854 | sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); | 1878 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr); |
1855 | 1879 | ||
1856 | sysdev_unregister(&per_cpu(mce_dev, cpu)); | 1880 | sysdev_unregister(&per_cpu(mce_dev, cpu)); |
1857 | 1881 | ||
@@ -1869,7 +1893,7 @@ static __cpuinit void mce_remove_device(unsigned int cpu) | |||
1869 | sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); | 1893 | sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); |
1870 | 1894 | ||
1871 | for (i = 0; i < banks; i++) | 1895 | for (i = 0; i < banks; i++) |
1872 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); | 1896 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr); |
1873 | 1897 | ||
1874 | sysdev_unregister(&per_cpu(mce_dev, cpu)); | 1898 | sysdev_unregister(&per_cpu(mce_dev, cpu)); |
1875 | cpumask_clear_cpu(cpu, mce_dev_initialized); | 1899 | cpumask_clear_cpu(cpu, mce_dev_initialized); |
@@ -1886,8 +1910,9 @@ static void mce_disable_cpu(void *h) | |||
1886 | if (!(action & CPU_TASKS_FROZEN)) | 1910 | if (!(action & CPU_TASKS_FROZEN)) |
1887 | cmci_clear(); | 1911 | cmci_clear(); |
1888 | for (i = 0; i < banks; i++) { | 1912 | for (i = 0; i < banks; i++) { |
1889 | if (!skip_bank_init(i)) | 1913 | struct mce_bank *b = &mce_banks[i]; |
1890 | wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); | 1914 | if (b->init) |
1915 | wrmsrl(MSR_IA32_MCx_CTL(i), 0); | ||
1891 | } | 1916 | } |
1892 | } | 1917 | } |
1893 | 1918 | ||
@@ -1902,8 +1927,9 @@ static void mce_reenable_cpu(void *h) | |||
1902 | if (!(action & CPU_TASKS_FROZEN)) | 1927 | if (!(action & CPU_TASKS_FROZEN)) |
1903 | cmci_reenable(); | 1928 | cmci_reenable(); |
1904 | for (i = 0; i < banks; i++) { | 1929 | for (i = 0; i < banks; i++) { |
1905 | if (!skip_bank_init(i)) | 1930 | struct mce_bank *b = &mce_banks[i]; |
1906 | wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]); | 1931 | if (b->init) |
1932 | wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); | ||
1907 | } | 1933 | } |
1908 | } | 1934 | } |
1909 | 1935 | ||
@@ -1935,7 +1961,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
1935 | case CPU_DOWN_FAILED: | 1961 | case CPU_DOWN_FAILED: |
1936 | case CPU_DOWN_FAILED_FROZEN: | 1962 | case CPU_DOWN_FAILED_FROZEN: |
1937 | t->expires = round_jiffies(jiffies + | 1963 | t->expires = round_jiffies(jiffies + |
1938 | __get_cpu_var(next_interval)); | 1964 | __get_cpu_var(mce_next_interval)); |
1939 | add_timer_on(t, cpu); | 1965 | add_timer_on(t, cpu); |
1940 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); | 1966 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); |
1941 | break; | 1967 | break; |
@@ -1951,35 +1977,21 @@ static struct notifier_block mce_cpu_notifier __cpuinitdata = { | |||
1951 | .notifier_call = mce_cpu_callback, | 1977 | .notifier_call = mce_cpu_callback, |
1952 | }; | 1978 | }; |
1953 | 1979 | ||
1954 | static __init int mce_init_banks(void) | 1980 | static __init void mce_init_banks(void) |
1955 | { | 1981 | { |
1956 | int i; | 1982 | int i; |
1957 | 1983 | ||
1958 | bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks, | ||
1959 | GFP_KERNEL); | ||
1960 | if (!bank_attrs) | ||
1961 | return -ENOMEM; | ||
1962 | |||
1963 | for (i = 0; i < banks; i++) { | 1984 | for (i = 0; i < banks; i++) { |
1964 | struct sysdev_attribute *a = &bank_attrs[i]; | 1985 | struct mce_bank *b = &mce_banks[i]; |
1986 | struct sysdev_attribute *a = &b->attr; | ||
1965 | 1987 | ||
1966 | a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); | 1988 | a->attr.name = b->attrname; |
1967 | if (!a->attr.name) | 1989 | snprintf(b->attrname, ATTR_LEN, "bank%d", i); |
1968 | goto nomem; | ||
1969 | 1990 | ||
1970 | a->attr.mode = 0644; | 1991 | a->attr.mode = 0644; |
1971 | a->show = show_bank; | 1992 | a->show = show_bank; |
1972 | a->store = set_bank; | 1993 | a->store = set_bank; |
1973 | } | 1994 | } |
1974 | return 0; | ||
1975 | |||
1976 | nomem: | ||
1977 | while (--i >= 0) | ||
1978 | kfree(bank_attrs[i].attr.name); | ||
1979 | kfree(bank_attrs); | ||
1980 | bank_attrs = NULL; | ||
1981 | |||
1982 | return -ENOMEM; | ||
1983 | } | 1995 | } |
1984 | 1996 | ||
1985 | static __init int mce_init_device(void) | 1997 | static __init int mce_init_device(void) |
@@ -1992,9 +2004,7 @@ static __init int mce_init_device(void) | |||
1992 | 2004 | ||
1993 | zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); | 2005 | zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); |
1994 | 2006 | ||
1995 | err = mce_init_banks(); | 2007 | mce_init_banks(); |
1996 | if (err) | ||
1997 | return err; | ||
1998 | 2008 | ||
1999 | err = sysdev_class_register(&mce_sysclass); | 2009 | err = sysdev_class_register(&mce_sysclass); |
2000 | if (err) | 2010 | if (err) |
@@ -2014,57 +2024,65 @@ static __init int mce_init_device(void) | |||
2014 | 2024 | ||
2015 | device_initcall(mce_init_device); | 2025 | device_initcall(mce_init_device); |
2016 | 2026 | ||
2017 | #else /* CONFIG_X86_OLD_MCE: */ | 2027 | /* |
2018 | 2028 | * Old style boot options parsing. Only for compatibility. | |
2019 | int nr_mce_banks; | 2029 | */ |
2020 | EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ | 2030 | static int __init mcheck_disable(char *str) |
2031 | { | ||
2032 | mce_disabled = 1; | ||
2033 | return 1; | ||
2034 | } | ||
2035 | __setup("nomce", mcheck_disable); | ||
2021 | 2036 | ||
2022 | /* This has to be run for each processor */ | 2037 | #ifdef CONFIG_DEBUG_FS |
2023 | void mcheck_init(struct cpuinfo_x86 *c) | 2038 | struct dentry *mce_get_debugfs_dir(void) |
2024 | { | 2039 | { |
2025 | if (mce_disabled) | 2040 | static struct dentry *dmce; |
2026 | return; | ||
2027 | 2041 | ||
2028 | switch (c->x86_vendor) { | 2042 | if (!dmce) |
2029 | case X86_VENDOR_AMD: | 2043 | dmce = debugfs_create_dir("mce", NULL); |
2030 | amd_mcheck_init(c); | ||
2031 | break; | ||
2032 | 2044 | ||
2033 | case X86_VENDOR_INTEL: | 2045 | return dmce; |
2034 | if (c->x86 == 5) | 2046 | } |
2035 | intel_p5_mcheck_init(c); | ||
2036 | if (c->x86 == 6) | ||
2037 | intel_p6_mcheck_init(c); | ||
2038 | if (c->x86 == 15) | ||
2039 | intel_p4_mcheck_init(c); | ||
2040 | break; | ||
2041 | 2047 | ||
2042 | case X86_VENDOR_CENTAUR: | 2048 | static void mce_reset(void) |
2043 | if (c->x86 == 5) | 2049 | { |
2044 | winchip_mcheck_init(c); | 2050 | cpu_missing = 0; |
2045 | break; | 2051 | atomic_set(&mce_fake_paniced, 0); |
2052 | atomic_set(&mce_executing, 0); | ||
2053 | atomic_set(&mce_callin, 0); | ||
2054 | atomic_set(&global_nwo, 0); | ||
2055 | } | ||
2046 | 2056 | ||
2047 | default: | 2057 | static int fake_panic_get(void *data, u64 *val) |
2048 | break; | 2058 | { |
2049 | } | 2059 | *val = fake_panic; |
2050 | printk(KERN_INFO "mce: CPU supports %d MCE banks\n", nr_mce_banks); | 2060 | return 0; |
2051 | } | 2061 | } |
2052 | 2062 | ||
2053 | static int __init mcheck_enable(char *str) | 2063 | static int fake_panic_set(void *data, u64 val) |
2054 | { | 2064 | { |
2055 | mce_p5_enabled = 1; | 2065 | mce_reset(); |
2056 | return 1; | 2066 | fake_panic = val; |
2067 | return 0; | ||
2057 | } | 2068 | } |
2058 | __setup("mce", mcheck_enable); | ||
2059 | 2069 | ||
2060 | #endif /* CONFIG_X86_OLD_MCE */ | 2070 | DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get, |
2071 | fake_panic_set, "%llu\n"); | ||
2061 | 2072 | ||
2062 | /* | 2073 | static int __init mce_debugfs_init(void) |
2063 | * Old style boot options parsing. Only for compatibility. | ||
2064 | */ | ||
2065 | static int __init mcheck_disable(char *str) | ||
2066 | { | 2074 | { |
2067 | mce_disabled = 1; | 2075 | struct dentry *dmce, *ffake_panic; |
2068 | return 1; | 2076 | |
2077 | dmce = mce_get_debugfs_dir(); | ||
2078 | if (!dmce) | ||
2079 | return -ENOMEM; | ||
2080 | ffake_panic = debugfs_create_file("fake_panic", 0444, dmce, NULL, | ||
2081 | &fake_panic_fops); | ||
2082 | if (!ffake_panic) | ||
2083 | return -ENOMEM; | ||
2084 | |||
2085 | return 0; | ||
2069 | } | 2086 | } |
2070 | __setup("nomce", mcheck_disable); | 2087 | late_initcall(mce_debugfs_init); |
2088 | #endif | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 1fecba404fd8..8cd5224943b5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -69,7 +69,7 @@ struct threshold_bank { | |||
69 | struct threshold_block *blocks; | 69 | struct threshold_block *blocks; |
70 | cpumask_var_t cpus; | 70 | cpumask_var_t cpus; |
71 | }; | 71 | }; |
72 | static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); | 72 | static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); |
73 | 73 | ||
74 | #ifdef CONFIG_SMP | 74 | #ifdef CONFIG_SMP |
75 | static unsigned char shared_bank[NR_BANKS] = { | 75 | static unsigned char shared_bank[NR_BANKS] = { |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index e1acec0f7a32..889f665fe93d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -90,7 +90,7 @@ static void cmci_discover(int banks, int boot) | |||
90 | if (test_bit(i, owned)) | 90 | if (test_bit(i, owned)) |
91 | continue; | 91 | continue; |
92 | 92 | ||
93 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | 93 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
94 | 94 | ||
95 | /* Already owned by someone else? */ | 95 | /* Already owned by someone else? */ |
96 | if (val & CMCI_EN) { | 96 | if (val & CMCI_EN) { |
@@ -101,8 +101,8 @@ static void cmci_discover(int banks, int boot) | |||
101 | } | 101 | } |
102 | 102 | ||
103 | val |= CMCI_EN | CMCI_THRESHOLD; | 103 | val |= CMCI_EN | CMCI_THRESHOLD; |
104 | wrmsrl(MSR_IA32_MC0_CTL2 + i, val); | 104 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
105 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | 105 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
106 | 106 | ||
107 | /* Did the enable bit stick? -- the bank supports CMCI */ | 107 | /* Did the enable bit stick? -- the bank supports CMCI */ |
108 | if (val & CMCI_EN) { | 108 | if (val & CMCI_EN) { |
@@ -152,9 +152,9 @@ void cmci_clear(void) | |||
152 | if (!test_bit(i, __get_cpu_var(mce_banks_owned))) | 152 | if (!test_bit(i, __get_cpu_var(mce_banks_owned))) |
153 | continue; | 153 | continue; |
154 | /* Disable CMCI */ | 154 | /* Disable CMCI */ |
155 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | 155 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
156 | val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); | 156 | val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); |
157 | wrmsrl(MSR_IA32_MC0_CTL2 + i, val); | 157 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
158 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); | 158 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); |
159 | } | 159 | } |
160 | spin_unlock_irqrestore(&cmci_discover_lock, flags); | 160 | spin_unlock_irqrestore(&cmci_discover_lock, flags); |
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c deleted file mode 100644 index f5f2d6f71fb6..000000000000 --- a/arch/x86/kernel/cpu/mcheck/non-fatal.c +++ /dev/null | |||
@@ -1,94 +0,0 @@ | |||
1 | /* | ||
2 | * Non Fatal Machine Check Exception Reporting | ||
3 | * | ||
4 | * (C) Copyright 2002 Dave Jones. <davej@redhat.com> | ||
5 | * | ||
6 | * This file contains routines to check for non-fatal MCEs every 15s | ||
7 | * | ||
8 | */ | ||
9 | #include <linux/interrupt.h> | ||
10 | #include <linux/workqueue.h> | ||
11 | #include <linux/jiffies.h> | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/types.h> | ||
15 | #include <linux/init.h> | ||
16 | #include <linux/smp.h> | ||
17 | |||
18 | #include <asm/processor.h> | ||
19 | #include <asm/system.h> | ||
20 | #include <asm/mce.h> | ||
21 | #include <asm/msr.h> | ||
22 | |||
23 | static int firstbank; | ||
24 | |||
25 | #define MCE_RATE (15*HZ) /* timer rate is 15s */ | ||
26 | |||
27 | static void mce_checkregs(void *info) | ||
28 | { | ||
29 | u32 low, high; | ||
30 | int i; | ||
31 | |||
32 | for (i = firstbank; i < nr_mce_banks; i++) { | ||
33 | rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); | ||
34 | |||
35 | if (!(high & (1<<31))) | ||
36 | continue; | ||
37 | |||
38 | printk(KERN_INFO "MCE: The hardware reports a non fatal, " | ||
39 | "correctable incident occurred on CPU %d.\n", | ||
40 | smp_processor_id()); | ||
41 | |||
42 | printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low); | ||
43 | |||
44 | /* | ||
45 | * Scrub the error so we don't pick it up in MCE_RATE | ||
46 | * seconds time: | ||
47 | */ | ||
48 | wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); | ||
49 | |||
50 | /* Serialize: */ | ||
51 | wmb(); | ||
52 | add_taint(TAINT_MACHINE_CHECK); | ||
53 | } | ||
54 | } | ||
55 | |||
56 | static void mce_work_fn(struct work_struct *work); | ||
57 | static DECLARE_DELAYED_WORK(mce_work, mce_work_fn); | ||
58 | |||
59 | static void mce_work_fn(struct work_struct *work) | ||
60 | { | ||
61 | on_each_cpu(mce_checkregs, NULL, 1); | ||
62 | schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); | ||
63 | } | ||
64 | |||
65 | static int __init init_nonfatal_mce_checker(void) | ||
66 | { | ||
67 | struct cpuinfo_x86 *c = &boot_cpu_data; | ||
68 | |||
69 | /* Check for MCE support */ | ||
70 | if (!cpu_has(c, X86_FEATURE_MCE)) | ||
71 | return -ENODEV; | ||
72 | |||
73 | /* Check for PPro style MCA */ | ||
74 | if (!cpu_has(c, X86_FEATURE_MCA)) | ||
75 | return -ENODEV; | ||
76 | |||
77 | /* Some Athlons misbehave when we frob bank 0 */ | ||
78 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && | ||
79 | boot_cpu_data.x86 == 6) | ||
80 | firstbank = 1; | ||
81 | else | ||
82 | firstbank = 0; | ||
83 | |||
84 | /* | ||
85 | * Check for non-fatal errors every MCE_RATE s | ||
86 | */ | ||
87 | schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); | ||
88 | printk(KERN_INFO "Machine check exception polling timer started.\n"); | ||
89 | |||
90 | return 0; | ||
91 | } | ||
92 | module_init(init_nonfatal_mce_checker); | ||
93 | |||
94 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c deleted file mode 100644 index 4482aea9aa2e..000000000000 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ /dev/null | |||
@@ -1,163 +0,0 @@ | |||
1 | /* | ||
2 | * P4 specific Machine Check Exception Reporting | ||
3 | */ | ||
4 | #include <linux/kernel.h> | ||
5 | #include <linux/types.h> | ||
6 | #include <linux/init.h> | ||
7 | #include <linux/smp.h> | ||
8 | |||
9 | #include <asm/processor.h> | ||
10 | #include <asm/mce.h> | ||
11 | #include <asm/msr.h> | ||
12 | |||
13 | /* as supported by the P4/Xeon family */ | ||
14 | struct intel_mce_extended_msrs { | ||
15 | u32 eax; | ||
16 | u32 ebx; | ||
17 | u32 ecx; | ||
18 | u32 edx; | ||
19 | u32 esi; | ||
20 | u32 edi; | ||
21 | u32 ebp; | ||
22 | u32 esp; | ||
23 | u32 eflags; | ||
24 | u32 eip; | ||
25 | /* u32 *reserved[]; */ | ||
26 | }; | ||
27 | |||
28 | static int mce_num_extended_msrs; | ||
29 | |||
30 | /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ | ||
31 | static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) | ||
32 | { | ||
33 | u32 h; | ||
34 | |||
35 | rdmsr(MSR_IA32_MCG_EAX, r->eax, h); | ||
36 | rdmsr(MSR_IA32_MCG_EBX, r->ebx, h); | ||
37 | rdmsr(MSR_IA32_MCG_ECX, r->ecx, h); | ||
38 | rdmsr(MSR_IA32_MCG_EDX, r->edx, h); | ||
39 | rdmsr(MSR_IA32_MCG_ESI, r->esi, h); | ||
40 | rdmsr(MSR_IA32_MCG_EDI, r->edi, h); | ||
41 | rdmsr(MSR_IA32_MCG_EBP, r->ebp, h); | ||
42 | rdmsr(MSR_IA32_MCG_ESP, r->esp, h); | ||
43 | rdmsr(MSR_IA32_MCG_EFLAGS, r->eflags, h); | ||
44 | rdmsr(MSR_IA32_MCG_EIP, r->eip, h); | ||
45 | } | ||
46 | |||
47 | static void intel_machine_check(struct pt_regs *regs, long error_code) | ||
48 | { | ||
49 | u32 alow, ahigh, high, low; | ||
50 | u32 mcgstl, mcgsth; | ||
51 | int recover = 1; | ||
52 | int i; | ||
53 | |||
54 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | ||
55 | if (mcgstl & (1<<0)) /* Recoverable ? */ | ||
56 | recover = 0; | ||
57 | |||
58 | printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", | ||
59 | smp_processor_id(), mcgsth, mcgstl); | ||
60 | |||
61 | if (mce_num_extended_msrs > 0) { | ||
62 | struct intel_mce_extended_msrs dbg; | ||
63 | |||
64 | intel_get_extended_msrs(&dbg); | ||
65 | |||
66 | printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n" | ||
67 | "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n" | ||
68 | "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", | ||
69 | smp_processor_id(), dbg.eip, dbg.eflags, | ||
70 | dbg.eax, dbg.ebx, dbg.ecx, dbg.edx, | ||
71 | dbg.esi, dbg.edi, dbg.ebp, dbg.esp); | ||
72 | } | ||
73 | |||
74 | for (i = 0; i < nr_mce_banks; i++) { | ||
75 | rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); | ||
76 | if (high & (1<<31)) { | ||
77 | char misc[20]; | ||
78 | char addr[24]; | ||
79 | |||
80 | misc[0] = addr[0] = '\0'; | ||
81 | if (high & (1<<29)) | ||
82 | recover |= 1; | ||
83 | if (high & (1<<25)) | ||
84 | recover |= 2; | ||
85 | high &= ~(1<<31); | ||
86 | if (high & (1<<27)) { | ||
87 | rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); | ||
88 | snprintf(misc, 20, "[%08x%08x]", ahigh, alow); | ||
89 | } | ||
90 | if (high & (1<<26)) { | ||
91 | rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); | ||
92 | snprintf(addr, 24, " at %08x%08x", ahigh, alow); | ||
93 | } | ||
94 | printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", | ||
95 | smp_processor_id(), i, high, low, misc, addr); | ||
96 | } | ||
97 | } | ||
98 | |||
99 | if (recover & 2) | ||
100 | panic("CPU context corrupt"); | ||
101 | if (recover & 1) | ||
102 | panic("Unable to continue"); | ||
103 | |||
104 | printk(KERN_EMERG "Attempting to continue.\n"); | ||
105 | |||
106 | /* | ||
107 | * Do not clear the MSR_IA32_MCi_STATUS if the error is not | ||
108 | * recoverable/continuable.This will allow BIOS to look at the MSRs | ||
109 | * for errors if the OS could not log the error. | ||
110 | */ | ||
111 | for (i = 0; i < nr_mce_banks; i++) { | ||
112 | u32 msr; | ||
113 | msr = MSR_IA32_MC0_STATUS+i*4; | ||
114 | rdmsr(msr, low, high); | ||
115 | if (high&(1<<31)) { | ||
116 | /* Clear it */ | ||
117 | wrmsr(msr, 0UL, 0UL); | ||
118 | /* Serialize */ | ||
119 | wmb(); | ||
120 | add_taint(TAINT_MACHINE_CHECK); | ||
121 | } | ||
122 | } | ||
123 | mcgstl &= ~(1<<2); | ||
124 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | ||
125 | } | ||
126 | |||
127 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c) | ||
128 | { | ||
129 | u32 l, h; | ||
130 | int i; | ||
131 | |||
132 | machine_check_vector = intel_machine_check; | ||
133 | wmb(); | ||
134 | |||
135 | printk(KERN_INFO "Intel machine check architecture supported.\n"); | ||
136 | rdmsr(MSR_IA32_MCG_CAP, l, h); | ||
137 | if (l & (1<<8)) /* Control register present ? */ | ||
138 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | ||
139 | nr_mce_banks = l & 0xff; | ||
140 | |||
141 | for (i = 0; i < nr_mce_banks; i++) { | ||
142 | wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); | ||
143 | wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); | ||
144 | } | ||
145 | |||
146 | set_in_cr4(X86_CR4_MCE); | ||
147 | printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", | ||
148 | smp_processor_id()); | ||
149 | |||
150 | /* Check for P4/Xeon extended MCE MSRs */ | ||
151 | rdmsr(MSR_IA32_MCG_CAP, l, h); | ||
152 | if (l & (1<<9)) {/* MCG_EXT_P */ | ||
153 | mce_num_extended_msrs = (l >> 16) & 0xff; | ||
154 | printk(KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)" | ||
155 | " available\n", | ||
156 | smp_processor_id(), mce_num_extended_msrs); | ||
157 | |||
158 | #ifdef CONFIG_X86_MCE_P4THERMAL | ||
159 | /* Check for P4/Xeon Thermal monitor */ | ||
160 | intel_init_thermal(c); | ||
161 | #endif | ||
162 | } | ||
163 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c deleted file mode 100644 index 01e4f8178183..000000000000 --- a/arch/x86/kernel/cpu/mcheck/p6.c +++ /dev/null | |||
@@ -1,127 +0,0 @@ | |||
1 | /* | ||
2 | * P6 specific Machine Check Exception Reporting | ||
3 | * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> | ||
4 | */ | ||
5 | #include <linux/interrupt.h> | ||
6 | #include <linux/kernel.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/init.h> | ||
9 | #include <linux/smp.h> | ||
10 | |||
11 | #include <asm/processor.h> | ||
12 | #include <asm/system.h> | ||
13 | #include <asm/mce.h> | ||
14 | #include <asm/msr.h> | ||
15 | |||
16 | /* Machine Check Handler For PII/PIII */ | ||
17 | static void intel_machine_check(struct pt_regs *regs, long error_code) | ||
18 | { | ||
19 | u32 alow, ahigh, high, low; | ||
20 | u32 mcgstl, mcgsth; | ||
21 | int recover = 1; | ||
22 | int i; | ||
23 | |||
24 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | ||
25 | if (mcgstl & (1<<0)) /* Recoverable ? */ | ||
26 | recover = 0; | ||
27 | |||
28 | printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", | ||
29 | smp_processor_id(), mcgsth, mcgstl); | ||
30 | |||
31 | for (i = 0; i < nr_mce_banks; i++) { | ||
32 | rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); | ||
33 | if (high & (1<<31)) { | ||
34 | char misc[20]; | ||
35 | char addr[24]; | ||
36 | |||
37 | misc[0] = '\0'; | ||
38 | addr[0] = '\0'; | ||
39 | |||
40 | if (high & (1<<29)) | ||
41 | recover |= 1; | ||
42 | if (high & (1<<25)) | ||
43 | recover |= 2; | ||
44 | high &= ~(1<<31); | ||
45 | |||
46 | if (high & (1<<27)) { | ||
47 | rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); | ||
48 | snprintf(misc, 20, "[%08x%08x]", ahigh, alow); | ||
49 | } | ||
50 | if (high & (1<<26)) { | ||
51 | rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); | ||
52 | snprintf(addr, 24, " at %08x%08x", ahigh, alow); | ||
53 | } | ||
54 | |||
55 | printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", | ||
56 | smp_processor_id(), i, high, low, misc, addr); | ||
57 | } | ||
58 | } | ||
59 | |||
60 | if (recover & 2) | ||
61 | panic("CPU context corrupt"); | ||
62 | if (recover & 1) | ||
63 | panic("Unable to continue"); | ||
64 | |||
65 | printk(KERN_EMERG "Attempting to continue.\n"); | ||
66 | /* | ||
67 | * Do not clear the MSR_IA32_MCi_STATUS if the error is not | ||
68 | * recoverable/continuable.This will allow BIOS to look at the MSRs | ||
69 | * for errors if the OS could not log the error: | ||
70 | */ | ||
71 | for (i = 0; i < nr_mce_banks; i++) { | ||
72 | unsigned int msr; | ||
73 | |||
74 | msr = MSR_IA32_MC0_STATUS+i*4; | ||
75 | rdmsr(msr, low, high); | ||
76 | if (high & (1<<31)) { | ||
77 | /* Clear it: */ | ||
78 | wrmsr(msr, 0UL, 0UL); | ||
79 | /* Serialize: */ | ||
80 | wmb(); | ||
81 | add_taint(TAINT_MACHINE_CHECK); | ||
82 | } | ||
83 | } | ||
84 | mcgstl &= ~(1<<2); | ||
85 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | ||
86 | } | ||
87 | |||
88 | /* Set up machine check reporting for processors with Intel style MCE: */ | ||
89 | void intel_p6_mcheck_init(struct cpuinfo_x86 *c) | ||
90 | { | ||
91 | u32 l, h; | ||
92 | int i; | ||
93 | |||
94 | /* Check for MCE support */ | ||
95 | if (!cpu_has(c, X86_FEATURE_MCE)) | ||
96 | return; | ||
97 | |||
98 | /* Check for PPro style MCA */ | ||
99 | if (!cpu_has(c, X86_FEATURE_MCA)) | ||
100 | return; | ||
101 | |||
102 | /* Ok machine check is available */ | ||
103 | machine_check_vector = intel_machine_check; | ||
104 | /* Make sure the vector pointer is visible before we enable MCEs: */ | ||
105 | wmb(); | ||
106 | |||
107 | printk(KERN_INFO "Intel machine check architecture supported.\n"); | ||
108 | rdmsr(MSR_IA32_MCG_CAP, l, h); | ||
109 | if (l & (1<<8)) /* Control register present ? */ | ||
110 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | ||
111 | nr_mce_banks = l & 0xff; | ||
112 | |||
113 | /* | ||
114 | * Following the example in IA-32 SDM Vol 3: | ||
115 | * - MC0_CTL should not be written | ||
116 | * - Status registers on all banks should be cleared on reset | ||
117 | */ | ||
118 | for (i = 1; i < nr_mce_banks; i++) | ||
119 | wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); | ||
120 | |||
121 | for (i = 0; i < nr_mce_banks; i++) | ||
122 | wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); | ||
123 | |||
124 | set_in_cr4(X86_CR4_MCE); | ||
125 | printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", | ||
126 | smp_processor_id()); | ||
127 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 5957a93e5173..63a56d147e4a 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -260,9 +260,6 @@ void intel_init_thermal(struct cpuinfo_x86 *c) | |||
260 | return; | 260 | return; |
261 | } | 261 | } |
262 | 262 | ||
263 | if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) | ||
264 | tm2 = 1; | ||
265 | |||
266 | /* Check whether a vector already exists */ | 263 | /* Check whether a vector already exists */ |
267 | if (h & APIC_VECTOR_MASK) { | 264 | if (h & APIC_VECTOR_MASK) { |
268 | printk(KERN_DEBUG | 265 | printk(KERN_DEBUG |
@@ -271,6 +268,16 @@ void intel_init_thermal(struct cpuinfo_x86 *c) | |||
271 | return; | 268 | return; |
272 | } | 269 | } |
273 | 270 | ||
271 | /* early Pentium M models use different method for enabling TM2 */ | ||
272 | if (cpu_has(c, X86_FEATURE_TM2)) { | ||
273 | if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) { | ||
274 | rdmsr(MSR_THERM2_CTL, l, h); | ||
275 | if (l & MSR_THERM2_CTL_TM_SELECT) | ||
276 | tm2 = 1; | ||
277 | } else if (l & MSR_IA32_MISC_ENABLE_TM2) | ||
278 | tm2 = 1; | ||
279 | } | ||
280 | |||
274 | /* We'll mask the thermal vector in the lapic till we're ready: */ | 281 | /* We'll mask the thermal vector in the lapic till we're ready: */ |
275 | h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; | 282 | h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; |
276 | apic_write(APIC_LVTTHMR, h); | 283 | apic_write(APIC_LVTTHMR, h); |
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 7af0f88a4163..84e83de54575 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -58,6 +58,7 @@ unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; | |||
58 | static DEFINE_MUTEX(mtrr_mutex); | 58 | static DEFINE_MUTEX(mtrr_mutex); |
59 | 59 | ||
60 | u64 size_or_mask, size_and_mask; | 60 | u64 size_or_mask, size_and_mask; |
61 | static bool mtrr_aps_delayed_init; | ||
61 | 62 | ||
62 | static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; | 63 | static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; |
63 | 64 | ||
@@ -163,7 +164,10 @@ static void ipi_handler(void *info) | |||
163 | if (data->smp_reg != ~0U) { | 164 | if (data->smp_reg != ~0U) { |
164 | mtrr_if->set(data->smp_reg, data->smp_base, | 165 | mtrr_if->set(data->smp_reg, data->smp_base, |
165 | data->smp_size, data->smp_type); | 166 | data->smp_size, data->smp_type); |
166 | } else { | 167 | } else if (mtrr_aps_delayed_init) { |
168 | /* | ||
169 | * Initialize the MTRRs inaddition to the synchronisation. | ||
170 | */ | ||
167 | mtrr_if->set_all(); | 171 | mtrr_if->set_all(); |
168 | } | 172 | } |
169 | 173 | ||
@@ -265,6 +269,8 @@ set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type typ | |||
265 | */ | 269 | */ |
266 | if (reg != ~0U) | 270 | if (reg != ~0U) |
267 | mtrr_if->set(reg, base, size, type); | 271 | mtrr_if->set(reg, base, size, type); |
272 | else if (!mtrr_aps_delayed_init) | ||
273 | mtrr_if->set_all(); | ||
268 | 274 | ||
269 | /* Wait for the others */ | 275 | /* Wait for the others */ |
270 | while (atomic_read(&data.count)) | 276 | while (atomic_read(&data.count)) |
@@ -721,9 +727,7 @@ void __init mtrr_bp_init(void) | |||
721 | 727 | ||
722 | void mtrr_ap_init(void) | 728 | void mtrr_ap_init(void) |
723 | { | 729 | { |
724 | unsigned long flags; | 730 | if (!use_intel() || mtrr_aps_delayed_init) |
725 | |||
726 | if (!mtrr_if || !use_intel()) | ||
727 | return; | 731 | return; |
728 | /* | 732 | /* |
729 | * Ideally we should hold mtrr_mutex here to avoid mtrr entries | 733 | * Ideally we should hold mtrr_mutex here to avoid mtrr entries |
@@ -738,11 +742,7 @@ void mtrr_ap_init(void) | |||
738 | * 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug | 742 | * 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug |
739 | * lock to prevent mtrr entry changes | 743 | * lock to prevent mtrr entry changes |
740 | */ | 744 | */ |
741 | local_irq_save(flags); | 745 | set_mtrr(~0U, 0, 0, 0); |
742 | |||
743 | mtrr_if->set_all(); | ||
744 | |||
745 | local_irq_restore(flags); | ||
746 | } | 746 | } |
747 | 747 | ||
748 | /** | 748 | /** |
@@ -753,6 +753,34 @@ void mtrr_save_state(void) | |||
753 | smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1); | 753 | smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1); |
754 | } | 754 | } |
755 | 755 | ||
756 | void set_mtrr_aps_delayed_init(void) | ||
757 | { | ||
758 | if (!use_intel()) | ||
759 | return; | ||
760 | |||
761 | mtrr_aps_delayed_init = true; | ||
762 | } | ||
763 | |||
764 | /* | ||
765 | * MTRR initialization for all AP's | ||
766 | */ | ||
767 | void mtrr_aps_init(void) | ||
768 | { | ||
769 | if (!use_intel()) | ||
770 | return; | ||
771 | |||
772 | set_mtrr(~0U, 0, 0, 0); | ||
773 | mtrr_aps_delayed_init = false; | ||
774 | } | ||
775 | |||
776 | void mtrr_bp_restore(void) | ||
777 | { | ||
778 | if (!use_intel()) | ||
779 | return; | ||
780 | |||
781 | mtrr_if->set_all(); | ||
782 | } | ||
783 | |||
756 | static int __init mtrr_init_finialize(void) | 784 | static int __init mtrr_init_finialize(void) |
757 | { | 785 | { |
758 | if (!mtrr_if) | 786 | if (!mtrr_if) |
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index f9cd0849bd42..2732e2c1e4d3 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c | |||
@@ -1211,7 +1211,7 @@ amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) | |||
1211 | x86_pmu_disable_counter(hwc, idx); | 1211 | x86_pmu_disable_counter(hwc, idx); |
1212 | } | 1212 | } |
1213 | 1213 | ||
1214 | static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]); | 1214 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); |
1215 | 1215 | ||
1216 | /* | 1216 | /* |
1217 | * Set the next IRQ period, based on the hwc->period_left value. | 1217 | * Set the next IRQ period, based on the hwc->period_left value. |
@@ -1253,7 +1253,7 @@ x86_perf_counter_set_period(struct perf_counter *counter, | |||
1253 | if (left > x86_pmu.max_period) | 1253 | if (left > x86_pmu.max_period) |
1254 | left = x86_pmu.max_period; | 1254 | left = x86_pmu.max_period; |
1255 | 1255 | ||
1256 | per_cpu(prev_left[idx], smp_processor_id()) = left; | 1256 | per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; |
1257 | 1257 | ||
1258 | /* | 1258 | /* |
1259 | * The hw counter starts counting from this counter offset, | 1259 | * The hw counter starts counting from this counter offset, |
@@ -1470,7 +1470,7 @@ void perf_counter_print_debug(void) | |||
1470 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); | 1470 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); |
1471 | rdmsrl(x86_pmu.perfctr + idx, pmc_count); | 1471 | rdmsrl(x86_pmu.perfctr + idx, pmc_count); |
1472 | 1472 | ||
1473 | prev_left = per_cpu(prev_left[idx], cpu); | 1473 | prev_left = per_cpu(pmc_prev_left[idx], cpu); |
1474 | 1474 | ||
1475 | pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n", | 1475 | pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n", |
1476 | cpu, idx, pmc_ctrl); | 1476 | cpu, idx, pmc_ctrl); |
@@ -2110,8 +2110,8 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip) | |||
2110 | entry->ip[entry->nr++] = ip; | 2110 | entry->ip[entry->nr++] = ip; |
2111 | } | 2111 | } |
2112 | 2112 | ||
2113 | static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry); | 2113 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); |
2114 | static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry); | 2114 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); |
2115 | static DEFINE_PER_CPU(int, in_nmi_frame); | 2115 | static DEFINE_PER_CPU(int, in_nmi_frame); |
2116 | 2116 | ||
2117 | 2117 | ||
@@ -2264,9 +2264,9 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
2264 | struct perf_callchain_entry *entry; | 2264 | struct perf_callchain_entry *entry; |
2265 | 2265 | ||
2266 | if (in_nmi()) | 2266 | if (in_nmi()) |
2267 | entry = &__get_cpu_var(nmi_entry); | 2267 | entry = &__get_cpu_var(pmc_nmi_entry); |
2268 | else | 2268 | else |
2269 | entry = &__get_cpu_var(irq_entry); | 2269 | entry = &__get_cpu_var(pmc_irq_entry); |
2270 | 2270 | ||
2271 | entry->nr = 0; | 2271 | entry->nr = 0; |
2272 | 2272 | ||
diff --git a/arch/x86/kernel/cpu/sched.c b/arch/x86/kernel/cpu/sched.c new file mode 100644 index 000000000000..a640ae5ad201 --- /dev/null +++ b/arch/x86/kernel/cpu/sched.c | |||
@@ -0,0 +1,55 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <linux/math64.h> | ||
3 | #include <linux/percpu.h> | ||
4 | #include <linux/irqflags.h> | ||
5 | |||
6 | #include <asm/cpufeature.h> | ||
7 | #include <asm/processor.h> | ||
8 | |||
9 | #ifdef CONFIG_SMP | ||
10 | |||
11 | static DEFINE_PER_CPU(struct aperfmperf, old_perf_sched); | ||
12 | |||
13 | static unsigned long scale_aperfmperf(void) | ||
14 | { | ||
15 | struct aperfmperf val, *old = &__get_cpu_var(old_perf_sched); | ||
16 | unsigned long ratio, flags; | ||
17 | |||
18 | local_irq_save(flags); | ||
19 | get_aperfmperf(&val); | ||
20 | local_irq_restore(flags); | ||
21 | |||
22 | ratio = calc_aperfmperf_ratio(old, &val); | ||
23 | *old = val; | ||
24 | |||
25 | return ratio; | ||
26 | } | ||
27 | |||
28 | unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) | ||
29 | { | ||
30 | /* | ||
31 | * do aperf/mperf on the cpu level because it includes things | ||
32 | * like turbo mode, which are relevant to full cores. | ||
33 | */ | ||
34 | if (boot_cpu_has(X86_FEATURE_APERFMPERF)) | ||
35 | return scale_aperfmperf(); | ||
36 | |||
37 | /* | ||
38 | * maybe have something cpufreq here | ||
39 | */ | ||
40 | |||
41 | return default_scale_freq_power(sd, cpu); | ||
42 | } | ||
43 | |||
44 | unsigned long arch_scale_smt_power(struct sched_domain *sd, int cpu) | ||
45 | { | ||
46 | /* | ||
47 | * aperf/mperf already includes the smt gain | ||
48 | */ | ||
49 | if (boot_cpu_has(X86_FEATURE_APERFMPERF)) | ||
50 | return SCHED_LOAD_SCALE; | ||
51 | |||
52 | return default_scale_smt_power(sd, cpu); | ||
53 | } | ||
54 | |||
55 | #endif | ||
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index c251be745107..d59fe323807e 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -146,7 +146,7 @@ ENTRY(ftrace_graph_caller) | |||
146 | END(ftrace_graph_caller) | 146 | END(ftrace_graph_caller) |
147 | 147 | ||
148 | GLOBAL(return_to_handler) | 148 | GLOBAL(return_to_handler) |
149 | subq $80, %rsp | 149 | subq $24, %rsp |
150 | 150 | ||
151 | /* Save the return values */ | 151 | /* Save the return values */ |
152 | movq %rax, (%rsp) | 152 | movq %rax, (%rsp) |
@@ -155,10 +155,10 @@ GLOBAL(return_to_handler) | |||
155 | 155 | ||
156 | call ftrace_return_to_handler | 156 | call ftrace_return_to_handler |
157 | 157 | ||
158 | movq %rax, 72(%rsp) | 158 | movq %rax, 16(%rsp) |
159 | movq 8(%rsp), %rdx | 159 | movq 8(%rsp), %rdx |
160 | movq (%rsp), %rax | 160 | movq (%rsp), %rax |
161 | addq $72, %rsp | 161 | addq $16, %rsp |
162 | retq | 162 | retq |
163 | #endif | 163 | #endif |
164 | 164 | ||
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index b0cdde6932f5..74656d1d4e30 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -104,7 +104,7 @@ static int show_other_interrupts(struct seq_file *p, int prec) | |||
104 | seq_printf(p, " Threshold APIC interrupts\n"); | 104 | seq_printf(p, " Threshold APIC interrupts\n"); |
105 | # endif | 105 | # endif |
106 | #endif | 106 | #endif |
107 | #ifdef CONFIG_X86_NEW_MCE | 107 | #ifdef CONFIG_X86_MCE |
108 | seq_printf(p, "%*s: ", prec, "MCE"); | 108 | seq_printf(p, "%*s: ", prec, "MCE"); |
109 | for_each_online_cpu(j) | 109 | for_each_online_cpu(j) |
110 | seq_printf(p, "%10u ", per_cpu(mce_exception_count, j)); | 110 | seq_printf(p, "%10u ", per_cpu(mce_exception_count, j)); |
@@ -200,7 +200,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu) | |||
200 | sum += irq_stats(cpu)->irq_threshold_count; | 200 | sum += irq_stats(cpu)->irq_threshold_count; |
201 | # endif | 201 | # endif |
202 | #endif | 202 | #endif |
203 | #ifdef CONFIG_X86_NEW_MCE | 203 | #ifdef CONFIG_X86_MCE |
204 | sum += per_cpu(mce_exception_count, cpu); | 204 | sum += per_cpu(mce_exception_count, cpu); |
205 | sum += per_cpu(mce_poll_count, cpu); | 205 | sum += per_cpu(mce_poll_count, cpu); |
206 | #endif | 206 | #endif |
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 92b7703d3d58..ccf8ab54f31a 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -190,7 +190,7 @@ static void __init apic_intr_init(void) | |||
190 | #ifdef CONFIG_X86_MCE_THRESHOLD | 190 | #ifdef CONFIG_X86_MCE_THRESHOLD |
191 | alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); | 191 | alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); |
192 | #endif | 192 | #endif |
193 | #if defined(CONFIG_X86_NEW_MCE) && defined(CONFIG_X86_LOCAL_APIC) | 193 | #if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_LOCAL_APIC) |
194 | alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt); | 194 | alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt); |
195 | #endif | 195 | #endif |
196 | 196 | ||
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index d71c8655905b..64b838eac18c 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -225,10 +225,8 @@ static __init int iommu_setup(char *p) | |||
225 | if (!strncmp(p, "soft", 4)) | 225 | if (!strncmp(p, "soft", 4)) |
226 | swiotlb = 1; | 226 | swiotlb = 1; |
227 | #endif | 227 | #endif |
228 | if (!strncmp(p, "pt", 2)) { | 228 | if (!strncmp(p, "pt", 2)) |
229 | iommu_pass_through = 1; | 229 | iommu_pass_through = 1; |
230 | return 1; | ||
231 | } | ||
232 | 230 | ||
233 | gart_parse_options(p); | 231 | gart_parse_options(p); |
234 | 232 | ||
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index af71d06624bf..6c3b2c6fd772 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c | |||
@@ -508,7 +508,7 @@ static void __init quirk_amd_nb_node(struct pci_dev *dev) | |||
508 | 508 | ||
509 | pci_read_config_dword(nb_ht, 0x60, &val); | 509 | pci_read_config_dword(nb_ht, 0x60, &val); |
510 | set_dev_node(&dev->dev, val & 7); | 510 | set_dev_node(&dev->dev, val & 7); |
511 | pci_dev_put(dev); | 511 | pci_dev_put(nb_ht); |
512 | } | 512 | } |
513 | 513 | ||
514 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, | 514 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index a06e8d101844..27349f92a6d7 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/pm.h> | 4 | #include <linux/pm.h> |
5 | #include <linux/efi.h> | 5 | #include <linux/efi.h> |
6 | #include <linux/dmi.h> | 6 | #include <linux/dmi.h> |
7 | #include <linux/tboot.h> | ||
7 | #include <acpi/reboot.h> | 8 | #include <acpi/reboot.h> |
8 | #include <asm/io.h> | 9 | #include <asm/io.h> |
9 | #include <asm/apic.h> | 10 | #include <asm/apic.h> |
@@ -508,6 +509,8 @@ static void native_machine_emergency_restart(void) | |||
508 | if (reboot_emergency) | 509 | if (reboot_emergency) |
509 | emergency_vmx_disable_all(); | 510 | emergency_vmx_disable_all(); |
510 | 511 | ||
512 | tboot_shutdown(TB_SHUTDOWN_REBOOT); | ||
513 | |||
511 | /* Tell the BIOS if we want cold or warm reboot */ | 514 | /* Tell the BIOS if we want cold or warm reboot */ |
512 | *((unsigned short *)__va(0x472)) = reboot_mode; | 515 | *((unsigned short *)__va(0x472)) = reboot_mode; |
513 | 516 | ||
@@ -634,6 +637,8 @@ static void native_machine_halt(void) | |||
634 | /* stop other cpus and apics */ | 637 | /* stop other cpus and apics */ |
635 | machine_shutdown(); | 638 | machine_shutdown(); |
636 | 639 | ||
640 | tboot_shutdown(TB_SHUTDOWN_HALT); | ||
641 | |||
637 | /* stop this cpu */ | 642 | /* stop this cpu */ |
638 | stop_this_cpu(NULL); | 643 | stop_this_cpu(NULL); |
639 | } | 644 | } |
@@ -645,6 +650,8 @@ static void native_machine_power_off(void) | |||
645 | machine_shutdown(); | 650 | machine_shutdown(); |
646 | pm_power_off(); | 651 | pm_power_off(); |
647 | } | 652 | } |
653 | /* a fallback in case there is no PM info available */ | ||
654 | tboot_shutdown(TB_SHUTDOWN_HALT); | ||
648 | } | 655 | } |
649 | 656 | ||
650 | struct machine_ops machine_ops = { | 657 | struct machine_ops machine_ops = { |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 63f32d220ef2..19f15c4076fb 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -66,6 +66,7 @@ | |||
66 | 66 | ||
67 | #include <linux/percpu.h> | 67 | #include <linux/percpu.h> |
68 | #include <linux/crash_dump.h> | 68 | #include <linux/crash_dump.h> |
69 | #include <linux/tboot.h> | ||
69 | 70 | ||
70 | #include <video/edid.h> | 71 | #include <video/edid.h> |
71 | 72 | ||
@@ -711,6 +712,21 @@ void __init setup_arch(char **cmdline_p) | |||
711 | printk(KERN_INFO "Command line: %s\n", boot_command_line); | 712 | printk(KERN_INFO "Command line: %s\n", boot_command_line); |
712 | #endif | 713 | #endif |
713 | 714 | ||
715 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); | ||
716 | *cmdline_p = command_line; | ||
717 | |||
718 | #ifdef CONFIG_X86_64 | ||
719 | /* | ||
720 | * Must call this twice: Once just to detect whether hardware doesn't | ||
721 | * support NX (so that the early EHCI debug console setup can safely | ||
722 | * call set_fixmap(), and then again after parsing early parameters to | ||
723 | * honor the respective command line option. | ||
724 | */ | ||
725 | check_efer(); | ||
726 | #endif | ||
727 | |||
728 | parse_early_param(); | ||
729 | |||
714 | /* VMI may relocate the fixmap; do this before touching ioremap area */ | 730 | /* VMI may relocate the fixmap; do this before touching ioremap area */ |
715 | vmi_init(); | 731 | vmi_init(); |
716 | 732 | ||
@@ -793,11 +809,6 @@ void __init setup_arch(char **cmdline_p) | |||
793 | #endif | 809 | #endif |
794 | #endif | 810 | #endif |
795 | 811 | ||
796 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); | ||
797 | *cmdline_p = command_line; | ||
798 | |||
799 | parse_early_param(); | ||
800 | |||
801 | #ifdef CONFIG_X86_64 | 812 | #ifdef CONFIG_X86_64 |
802 | check_efer(); | 813 | check_efer(); |
803 | #endif | 814 | #endif |
@@ -977,6 +988,8 @@ void __init setup_arch(char **cmdline_p) | |||
977 | paravirt_pagetable_setup_done(swapper_pg_dir); | 988 | paravirt_pagetable_setup_done(swapper_pg_dir); |
978 | paravirt_post_allocator_init(); | 989 | paravirt_post_allocator_init(); |
979 | 990 | ||
991 | tboot_probe(); | ||
992 | |||
980 | #ifdef CONFIG_X86_64 | 993 | #ifdef CONFIG_X86_64 |
981 | map_vsyscall(); | 994 | map_vsyscall(); |
982 | #endif | 995 | #endif |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 07d81916f212..d559af913e1f 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -55,6 +55,7 @@ EXPORT_SYMBOL(__per_cpu_offset); | |||
55 | #define PERCPU_FIRST_CHUNK_RESERVE 0 | 55 | #define PERCPU_FIRST_CHUNK_RESERVE 0 |
56 | #endif | 56 | #endif |
57 | 57 | ||
58 | #ifdef CONFIG_X86_32 | ||
58 | /** | 59 | /** |
59 | * pcpu_need_numa - determine percpu allocation needs to consider NUMA | 60 | * pcpu_need_numa - determine percpu allocation needs to consider NUMA |
60 | * | 61 | * |
@@ -83,6 +84,7 @@ static bool __init pcpu_need_numa(void) | |||
83 | #endif | 84 | #endif |
84 | return false; | 85 | return false; |
85 | } | 86 | } |
87 | #endif | ||
86 | 88 | ||
87 | /** | 89 | /** |
88 | * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu | 90 | * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu |
@@ -124,308 +126,35 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, | |||
124 | } | 126 | } |
125 | 127 | ||
126 | /* | 128 | /* |
127 | * Large page remap allocator | 129 | * Helpers for first chunk memory allocation |
128 | * | ||
129 | * This allocator uses PMD page as unit. A PMD page is allocated for | ||
130 | * each cpu and each is remapped into vmalloc area using PMD mapping. | ||
131 | * As PMD page is quite large, only part of it is used for the first | ||
132 | * chunk. Unused part is returned to the bootmem allocator. | ||
133 | * | ||
134 | * So, the PMD pages are mapped twice - once to the physical mapping | ||
135 | * and to the vmalloc area for the first percpu chunk. The double | ||
136 | * mapping does add one more PMD TLB entry pressure but still is much | ||
137 | * better than only using 4k mappings while still being NUMA friendly. | ||
138 | */ | 130 | */ |
139 | #ifdef CONFIG_NEED_MULTIPLE_NODES | 131 | static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) |
140 | struct pcpul_ent { | ||
141 | unsigned int cpu; | ||
142 | void *ptr; | ||
143 | }; | ||
144 | |||
145 | static size_t pcpul_size; | ||
146 | static struct pcpul_ent *pcpul_map; | ||
147 | static struct vm_struct pcpul_vm; | ||
148 | |||
149 | static struct page * __init pcpul_get_page(unsigned int cpu, int pageno) | ||
150 | { | 132 | { |
151 | size_t off = (size_t)pageno << PAGE_SHIFT; | 133 | return pcpu_alloc_bootmem(cpu, size, align); |
152 | |||
153 | if (off >= pcpul_size) | ||
154 | return NULL; | ||
155 | |||
156 | return virt_to_page(pcpul_map[cpu].ptr + off); | ||
157 | } | 134 | } |
158 | 135 | ||
159 | static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) | 136 | static void __init pcpu_fc_free(void *ptr, size_t size) |
160 | { | 137 | { |
161 | size_t map_size, dyn_size; | 138 | free_bootmem(__pa(ptr), size); |
162 | unsigned int cpu; | ||
163 | int i, j; | ||
164 | ssize_t ret; | ||
165 | |||
166 | if (!chosen) { | ||
167 | size_t vm_size = VMALLOC_END - VMALLOC_START; | ||
168 | size_t tot_size = nr_cpu_ids * PMD_SIZE; | ||
169 | |||
170 | /* on non-NUMA, embedding is better */ | ||
171 | if (!pcpu_need_numa()) | ||
172 | return -EINVAL; | ||
173 | |||
174 | /* don't consume more than 20% of vmalloc area */ | ||
175 | if (tot_size > vm_size / 5) { | ||
176 | pr_info("PERCPU: too large chunk size %zuMB for " | ||
177 | "large page remap\n", tot_size >> 20); | ||
178 | return -EINVAL; | ||
179 | } | ||
180 | } | ||
181 | |||
182 | /* need PSE */ | ||
183 | if (!cpu_has_pse) { | ||
184 | pr_warning("PERCPU: lpage allocator requires PSE\n"); | ||
185 | return -EINVAL; | ||
186 | } | ||
187 | |||
188 | /* | ||
189 | * Currently supports only single page. Supporting multiple | ||
190 | * pages won't be too difficult if it ever becomes necessary. | ||
191 | */ | ||
192 | pcpul_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + | ||
193 | PERCPU_DYNAMIC_RESERVE); | ||
194 | if (pcpul_size > PMD_SIZE) { | ||
195 | pr_warning("PERCPU: static data is larger than large page, " | ||
196 | "can't use large page\n"); | ||
197 | return -EINVAL; | ||
198 | } | ||
199 | dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; | ||
200 | |||
201 | /* allocate pointer array and alloc large pages */ | ||
202 | map_size = PFN_ALIGN(nr_cpu_ids * sizeof(pcpul_map[0])); | ||
203 | pcpul_map = alloc_bootmem(map_size); | ||
204 | |||
205 | for_each_possible_cpu(cpu) { | ||
206 | pcpul_map[cpu].cpu = cpu; | ||
207 | pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE, | ||
208 | PMD_SIZE); | ||
209 | if (!pcpul_map[cpu].ptr) { | ||
210 | pr_warning("PERCPU: failed to allocate large page " | ||
211 | "for cpu%u\n", cpu); | ||
212 | goto enomem; | ||
213 | } | ||
214 | |||
215 | /* | ||
216 | * Only use pcpul_size bytes and give back the rest. | ||
217 | * | ||
218 | * Ingo: The 2MB up-rounding bootmem is needed to make | ||
219 | * sure the partial 2MB page is still fully RAM - it's | ||
220 | * not well-specified to have a PAT-incompatible area | ||
221 | * (unmapped RAM, device memory, etc.) in that hole. | ||
222 | */ | ||
223 | free_bootmem(__pa(pcpul_map[cpu].ptr + pcpul_size), | ||
224 | PMD_SIZE - pcpul_size); | ||
225 | |||
226 | memcpy(pcpul_map[cpu].ptr, __per_cpu_load, static_size); | ||
227 | } | ||
228 | |||
229 | /* allocate address and map */ | ||
230 | pcpul_vm.flags = VM_ALLOC; | ||
231 | pcpul_vm.size = nr_cpu_ids * PMD_SIZE; | ||
232 | vm_area_register_early(&pcpul_vm, PMD_SIZE); | ||
233 | |||
234 | for_each_possible_cpu(cpu) { | ||
235 | pmd_t *pmd, pmd_v; | ||
236 | |||
237 | pmd = populate_extra_pmd((unsigned long)pcpul_vm.addr + | ||
238 | cpu * PMD_SIZE); | ||
239 | pmd_v = pfn_pmd(page_to_pfn(virt_to_page(pcpul_map[cpu].ptr)), | ||
240 | PAGE_KERNEL_LARGE); | ||
241 | set_pmd(pmd, pmd_v); | ||
242 | } | ||
243 | |||
244 | /* we're ready, commit */ | ||
245 | pr_info("PERCPU: Remapped at %p with large pages, static data " | ||
246 | "%zu bytes\n", pcpul_vm.addr, static_size); | ||
247 | |||
248 | ret = pcpu_setup_first_chunk(pcpul_get_page, static_size, | ||
249 | PERCPU_FIRST_CHUNK_RESERVE, dyn_size, | ||
250 | PMD_SIZE, pcpul_vm.addr, NULL); | ||
251 | |||
252 | /* sort pcpul_map array for pcpu_lpage_remapped() */ | ||
253 | for (i = 0; i < nr_cpu_ids - 1; i++) | ||
254 | for (j = i + 1; j < nr_cpu_ids; j++) | ||
255 | if (pcpul_map[i].ptr > pcpul_map[j].ptr) { | ||
256 | struct pcpul_ent tmp = pcpul_map[i]; | ||
257 | pcpul_map[i] = pcpul_map[j]; | ||
258 | pcpul_map[j] = tmp; | ||
259 | } | ||
260 | |||
261 | return ret; | ||
262 | |||
263 | enomem: | ||
264 | for_each_possible_cpu(cpu) | ||
265 | if (pcpul_map[cpu].ptr) | ||
266 | free_bootmem(__pa(pcpul_map[cpu].ptr), pcpul_size); | ||
267 | free_bootmem(__pa(pcpul_map), map_size); | ||
268 | return -ENOMEM; | ||
269 | } | 139 | } |
270 | 140 | ||
271 | /** | 141 | static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) |
272 | * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area | ||
273 | * @kaddr: the kernel address in question | ||
274 | * | ||
275 | * Determine whether @kaddr falls in the pcpul recycled area. This is | ||
276 | * used by pageattr to detect VM aliases and break up the pcpu PMD | ||
277 | * mapping such that the same physical page is not mapped under | ||
278 | * different attributes. | ||
279 | * | ||
280 | * The recycled area is always at the tail of a partially used PMD | ||
281 | * page. | ||
282 | * | ||
283 | * RETURNS: | ||
284 | * Address of corresponding remapped pcpu address if match is found; | ||
285 | * otherwise, NULL. | ||
286 | */ | ||
287 | void *pcpu_lpage_remapped(void *kaddr) | ||
288 | { | 142 | { |
289 | void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK); | 143 | #ifdef CONFIG_NEED_MULTIPLE_NODES |
290 | unsigned long offset = (unsigned long)kaddr & ~PMD_MASK; | 144 | if (early_cpu_to_node(from) == early_cpu_to_node(to)) |
291 | int left = 0, right = nr_cpu_ids - 1; | 145 | return LOCAL_DISTANCE; |
292 | int pos; | 146 | else |
293 | 147 | return REMOTE_DISTANCE; | |
294 | /* pcpul in use at all? */ | ||
295 | if (!pcpul_map) | ||
296 | return NULL; | ||
297 | |||
298 | /* okay, perform binary search */ | ||
299 | while (left <= right) { | ||
300 | pos = (left + right) / 2; | ||
301 | |||
302 | if (pcpul_map[pos].ptr < pmd_addr) | ||
303 | left = pos + 1; | ||
304 | else if (pcpul_map[pos].ptr > pmd_addr) | ||
305 | right = pos - 1; | ||
306 | else { | ||
307 | /* it shouldn't be in the area for the first chunk */ | ||
308 | WARN_ON(offset < pcpul_size); | ||
309 | |||
310 | return pcpul_vm.addr + | ||
311 | pcpul_map[pos].cpu * PMD_SIZE + offset; | ||
312 | } | ||
313 | } | ||
314 | |||
315 | return NULL; | ||
316 | } | ||
317 | #else | 148 | #else |
318 | static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) | 149 | return LOCAL_DISTANCE; |
319 | { | ||
320 | return -EINVAL; | ||
321 | } | ||
322 | #endif | 150 | #endif |
323 | |||
324 | /* | ||
325 | * Embedding allocator | ||
326 | * | ||
327 | * The first chunk is sized to just contain the static area plus | ||
328 | * module and dynamic reserves and embedded into linear physical | ||
329 | * mapping so that it can use PMD mapping without additional TLB | ||
330 | * pressure. | ||
331 | */ | ||
332 | static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen) | ||
333 | { | ||
334 | size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; | ||
335 | |||
336 | /* | ||
337 | * If large page isn't supported, there's no benefit in doing | ||
338 | * this. Also, embedding allocation doesn't play well with | ||
339 | * NUMA. | ||
340 | */ | ||
341 | if (!chosen && (!cpu_has_pse || pcpu_need_numa())) | ||
342 | return -EINVAL; | ||
343 | |||
344 | return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, | ||
345 | reserve - PERCPU_FIRST_CHUNK_RESERVE, -1); | ||
346 | } | 151 | } |
347 | 152 | ||
348 | /* | 153 | static void __init pcpup_populate_pte(unsigned long addr) |
349 | * 4k page allocator | ||
350 | * | ||
351 | * This is the basic allocator. Static percpu area is allocated | ||
352 | * page-by-page and most of initialization is done by the generic | ||
353 | * setup function. | ||
354 | */ | ||
355 | static struct page **pcpu4k_pages __initdata; | ||
356 | static int pcpu4k_nr_static_pages __initdata; | ||
357 | |||
358 | static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno) | ||
359 | { | ||
360 | if (pageno < pcpu4k_nr_static_pages) | ||
361 | return pcpu4k_pages[cpu * pcpu4k_nr_static_pages + pageno]; | ||
362 | return NULL; | ||
363 | } | ||
364 | |||
365 | static void __init pcpu4k_populate_pte(unsigned long addr) | ||
366 | { | 154 | { |
367 | populate_extra_pte(addr); | 155 | populate_extra_pte(addr); |
368 | } | 156 | } |
369 | 157 | ||
370 | static ssize_t __init setup_pcpu_4k(size_t static_size) | ||
371 | { | ||
372 | size_t pages_size; | ||
373 | unsigned int cpu; | ||
374 | int i, j; | ||
375 | ssize_t ret; | ||
376 | |||
377 | pcpu4k_nr_static_pages = PFN_UP(static_size); | ||
378 | |||
379 | /* unaligned allocations can't be freed, round up to page size */ | ||
380 | pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * nr_cpu_ids | ||
381 | * sizeof(pcpu4k_pages[0])); | ||
382 | pcpu4k_pages = alloc_bootmem(pages_size); | ||
383 | |||
384 | /* allocate and copy */ | ||
385 | j = 0; | ||
386 | for_each_possible_cpu(cpu) | ||
387 | for (i = 0; i < pcpu4k_nr_static_pages; i++) { | ||
388 | void *ptr; | ||
389 | |||
390 | ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE); | ||
391 | if (!ptr) { | ||
392 | pr_warning("PERCPU: failed to allocate " | ||
393 | "4k page for cpu%u\n", cpu); | ||
394 | goto enomem; | ||
395 | } | ||
396 | |||
397 | memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); | ||
398 | pcpu4k_pages[j++] = virt_to_page(ptr); | ||
399 | } | ||
400 | |||
401 | /* we're ready, commit */ | ||
402 | pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n", | ||
403 | pcpu4k_nr_static_pages, static_size); | ||
404 | |||
405 | ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, | ||
406 | PERCPU_FIRST_CHUNK_RESERVE, -1, | ||
407 | -1, NULL, pcpu4k_populate_pte); | ||
408 | goto out_free_ar; | ||
409 | |||
410 | enomem: | ||
411 | while (--j >= 0) | ||
412 | free_bootmem(__pa(page_address(pcpu4k_pages[j])), PAGE_SIZE); | ||
413 | ret = -ENOMEM; | ||
414 | out_free_ar: | ||
415 | free_bootmem(__pa(pcpu4k_pages), pages_size); | ||
416 | return ret; | ||
417 | } | ||
418 | |||
419 | /* for explicit first chunk allocator selection */ | ||
420 | static char pcpu_chosen_alloc[16] __initdata; | ||
421 | |||
422 | static int __init percpu_alloc_setup(char *str) | ||
423 | { | ||
424 | strncpy(pcpu_chosen_alloc, str, sizeof(pcpu_chosen_alloc) - 1); | ||
425 | return 0; | ||
426 | } | ||
427 | early_param("percpu_alloc", percpu_alloc_setup); | ||
428 | |||
429 | static inline void setup_percpu_segment(int cpu) | 158 | static inline void setup_percpu_segment(int cpu) |
430 | { | 159 | { |
431 | #ifdef CONFIG_X86_32 | 160 | #ifdef CONFIG_X86_32 |
@@ -441,52 +170,49 @@ static inline void setup_percpu_segment(int cpu) | |||
441 | 170 | ||
442 | void __init setup_per_cpu_areas(void) | 171 | void __init setup_per_cpu_areas(void) |
443 | { | 172 | { |
444 | size_t static_size = __per_cpu_end - __per_cpu_start; | ||
445 | unsigned int cpu; | 173 | unsigned int cpu; |
446 | unsigned long delta; | 174 | unsigned long delta; |
447 | size_t pcpu_unit_size; | 175 | int rc; |
448 | ssize_t ret; | ||
449 | 176 | ||
450 | pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", | 177 | pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", |
451 | NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); | 178 | NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); |
452 | 179 | ||
453 | /* | 180 | /* |
454 | * Allocate percpu area. If PSE is supported, try to make use | 181 | * Allocate percpu area. Embedding allocator is our favorite; |
455 | * of large page mappings. Please read comments on top of | 182 | * however, on NUMA configurations, it can result in very |
456 | * each allocator for details. | 183 | * sparse unit mapping and vmalloc area isn't spacious enough |
184 | * on 32bit. Use page in that case. | ||
457 | */ | 185 | */ |
458 | ret = -EINVAL; | 186 | #ifdef CONFIG_X86_32 |
459 | if (strlen(pcpu_chosen_alloc)) { | 187 | if (pcpu_chosen_fc == PCPU_FC_AUTO && pcpu_need_numa()) |
460 | if (strcmp(pcpu_chosen_alloc, "4k")) { | 188 | pcpu_chosen_fc = PCPU_FC_PAGE; |
461 | if (!strcmp(pcpu_chosen_alloc, "lpage")) | 189 | #endif |
462 | ret = setup_pcpu_lpage(static_size, true); | 190 | rc = -EINVAL; |
463 | else if (!strcmp(pcpu_chosen_alloc, "embed")) | 191 | if (pcpu_chosen_fc != PCPU_FC_PAGE) { |
464 | ret = setup_pcpu_embed(static_size, true); | 192 | const size_t atom_size = cpu_has_pse ? PMD_SIZE : PAGE_SIZE; |
465 | else | 193 | const size_t dyn_size = PERCPU_MODULE_RESERVE + |
466 | pr_warning("PERCPU: unknown allocator %s " | 194 | PERCPU_DYNAMIC_RESERVE - PERCPU_FIRST_CHUNK_RESERVE; |
467 | "specified\n", pcpu_chosen_alloc); | 195 | |
468 | if (ret < 0) | 196 | rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, |
469 | pr_warning("PERCPU: %s allocator failed (%zd), " | 197 | dyn_size, atom_size, |
470 | "falling back to 4k\n", | 198 | pcpu_cpu_distance, |
471 | pcpu_chosen_alloc, ret); | 199 | pcpu_fc_alloc, pcpu_fc_free); |
472 | } | 200 | if (rc < 0) |
473 | } else { | 201 | pr_warning("PERCPU: %s allocator failed (%d), " |
474 | ret = setup_pcpu_lpage(static_size, false); | 202 | "falling back to page size\n", |
475 | if (ret < 0) | 203 | pcpu_fc_names[pcpu_chosen_fc], rc); |
476 | ret = setup_pcpu_embed(static_size, false); | ||
477 | } | 204 | } |
478 | if (ret < 0) | 205 | if (rc < 0) |
479 | ret = setup_pcpu_4k(static_size); | 206 | rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, |
480 | if (ret < 0) | 207 | pcpu_fc_alloc, pcpu_fc_free, |
481 | panic("cannot allocate static percpu area (%zu bytes, err=%zd)", | 208 | pcpup_populate_pte); |
482 | static_size, ret); | 209 | if (rc < 0) |
483 | 210 | panic("cannot initialize percpu area (err=%d)", rc); | |
484 | pcpu_unit_size = ret; | ||
485 | 211 | ||
486 | /* alrighty, percpu areas up and running */ | 212 | /* alrighty, percpu areas up and running */ |
487 | delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; | 213 | delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; |
488 | for_each_possible_cpu(cpu) { | 214 | for_each_possible_cpu(cpu) { |
489 | per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size; | 215 | per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu]; |
490 | per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); | 216 | per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); |
491 | per_cpu(cpu_number, cpu) = cpu; | 217 | per_cpu(cpu_number, cpu) = cpu; |
492 | setup_percpu_segment(cpu); | 218 | setup_percpu_segment(cpu); |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 81e58238c4ce..6a44a76055ad 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -856,7 +856,7 @@ static void do_signal(struct pt_regs *regs) | |||
856 | void | 856 | void |
857 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | 857 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) |
858 | { | 858 | { |
859 | #ifdef CONFIG_X86_NEW_MCE | 859 | #ifdef CONFIG_X86_MCE |
860 | /* notify userspace of pending MCEs */ | 860 | /* notify userspace of pending MCEs */ |
861 | if (thread_info_flags & _TIF_MCE_NOTIFY) | 861 | if (thread_info_flags & _TIF_MCE_NOTIFY) |
862 | mce_notify_process(); | 862 | mce_notify_process(); |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index c36cc1452cdc..a25eeec00080 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include <linux/bootmem.h> | 47 | #include <linux/bootmem.h> |
48 | #include <linux/err.h> | 48 | #include <linux/err.h> |
49 | #include <linux/nmi.h> | 49 | #include <linux/nmi.h> |
50 | #include <linux/tboot.h> | ||
50 | 51 | ||
51 | #include <asm/acpi.h> | 52 | #include <asm/acpi.h> |
52 | #include <asm/desc.h> | 53 | #include <asm/desc.h> |
@@ -1117,9 +1118,22 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1117 | 1118 | ||
1118 | if (is_uv_system()) | 1119 | if (is_uv_system()) |
1119 | uv_system_init(); | 1120 | uv_system_init(); |
1121 | |||
1122 | set_mtrr_aps_delayed_init(); | ||
1120 | out: | 1123 | out: |
1121 | preempt_enable(); | 1124 | preempt_enable(); |
1122 | } | 1125 | } |
1126 | |||
1127 | void arch_enable_nonboot_cpus_begin(void) | ||
1128 | { | ||
1129 | set_mtrr_aps_delayed_init(); | ||
1130 | } | ||
1131 | |||
1132 | void arch_enable_nonboot_cpus_end(void) | ||
1133 | { | ||
1134 | mtrr_aps_init(); | ||
1135 | } | ||
1136 | |||
1123 | /* | 1137 | /* |
1124 | * Early setup to make printk work. | 1138 | * Early setup to make printk work. |
1125 | */ | 1139 | */ |
@@ -1141,6 +1155,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) | |||
1141 | setup_ioapic_dest(); | 1155 | setup_ioapic_dest(); |
1142 | #endif | 1156 | #endif |
1143 | check_nmi_watchdog(); | 1157 | check_nmi_watchdog(); |
1158 | mtrr_aps_init(); | ||
1144 | } | 1159 | } |
1145 | 1160 | ||
1146 | static int __initdata setup_possible_cpus = -1; | 1161 | static int __initdata setup_possible_cpus = -1; |
@@ -1318,6 +1333,7 @@ void play_dead_common(void) | |||
1318 | void native_play_dead(void) | 1333 | void native_play_dead(void) |
1319 | { | 1334 | { |
1320 | play_dead_common(); | 1335 | play_dead_common(); |
1336 | tboot_shutdown(TB_SHUTDOWN_WFS); | ||
1321 | wbinvd_halt(); | 1337 | wbinvd_halt(); |
1322 | } | 1338 | } |
1323 | 1339 | ||
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c new file mode 100644 index 000000000000..86c9f91b48ae --- /dev/null +++ b/arch/x86/kernel/tboot.c | |||
@@ -0,0 +1,447 @@ | |||
1 | /* | ||
2 | * tboot.c: main implementation of helper functions used by kernel for | ||
3 | * runtime support of Intel(R) Trusted Execution Technology | ||
4 | * | ||
5 | * Copyright (c) 2006-2009, Intel Corporation | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify it | ||
8 | * under the terms and conditions of the GNU General Public License, | ||
9 | * version 2, as published by the Free Software Foundation. | ||
10 | * | ||
11 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
14 | * more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License along with | ||
17 | * this program; if not, write to the Free Software Foundation, Inc., | ||
18 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
19 | * | ||
20 | */ | ||
21 | |||
22 | #include <linux/dma_remapping.h> | ||
23 | #include <linux/init_task.h> | ||
24 | #include <linux/spinlock.h> | ||
25 | #include <linux/delay.h> | ||
26 | #include <linux/sched.h> | ||
27 | #include <linux/init.h> | ||
28 | #include <linux/dmar.h> | ||
29 | #include <linux/cpu.h> | ||
30 | #include <linux/pfn.h> | ||
31 | #include <linux/mm.h> | ||
32 | #include <linux/tboot.h> | ||
33 | |||
34 | #include <asm/trampoline.h> | ||
35 | #include <asm/processor.h> | ||
36 | #include <asm/bootparam.h> | ||
37 | #include <asm/pgtable.h> | ||
38 | #include <asm/pgalloc.h> | ||
39 | #include <asm/fixmap.h> | ||
40 | #include <asm/proto.h> | ||
41 | #include <asm/setup.h> | ||
42 | #include <asm/e820.h> | ||
43 | #include <asm/io.h> | ||
44 | |||
45 | #include "acpi/realmode/wakeup.h" | ||
46 | |||
47 | /* Global pointer to shared data; NULL means no measured launch. */ | ||
48 | struct tboot *tboot __read_mostly; | ||
49 | |||
50 | /* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */ | ||
51 | #define AP_WAIT_TIMEOUT 1 | ||
52 | |||
53 | #undef pr_fmt | ||
54 | #define pr_fmt(fmt) "tboot: " fmt | ||
55 | |||
56 | static u8 tboot_uuid[16] __initdata = TBOOT_UUID; | ||
57 | |||
58 | void __init tboot_probe(void) | ||
59 | { | ||
60 | /* Look for valid page-aligned address for shared page. */ | ||
61 | if (!boot_params.tboot_addr) | ||
62 | return; | ||
63 | /* | ||
64 | * also verify that it is mapped as we expect it before calling | ||
65 | * set_fixmap(), to reduce chance of garbage value causing crash | ||
66 | */ | ||
67 | if (!e820_any_mapped(boot_params.tboot_addr, | ||
68 | boot_params.tboot_addr, E820_RESERVED)) { | ||
69 | pr_warning("non-0 tboot_addr but it is not of type E820_RESERVED\n"); | ||
70 | return; | ||
71 | } | ||
72 | |||
73 | /* only a natively booted kernel should be using TXT */ | ||
74 | if (paravirt_enabled()) { | ||
75 | pr_warning("non-0 tboot_addr but pv_ops is enabled\n"); | ||
76 | return; | ||
77 | } | ||
78 | |||
79 | /* Map and check for tboot UUID. */ | ||
80 | set_fixmap(FIX_TBOOT_BASE, boot_params.tboot_addr); | ||
81 | tboot = (struct tboot *)fix_to_virt(FIX_TBOOT_BASE); | ||
82 | if (memcmp(&tboot_uuid, &tboot->uuid, sizeof(tboot->uuid))) { | ||
83 | pr_warning("tboot at 0x%llx is invalid\n", | ||
84 | boot_params.tboot_addr); | ||
85 | tboot = NULL; | ||
86 | return; | ||
87 | } | ||
88 | if (tboot->version < 5) { | ||
89 | pr_warning("tboot version is invalid: %u\n", tboot->version); | ||
90 | tboot = NULL; | ||
91 | return; | ||
92 | } | ||
93 | |||
94 | pr_info("found shared page at phys addr 0x%llx:\n", | ||
95 | boot_params.tboot_addr); | ||
96 | pr_debug("version: %d\n", tboot->version); | ||
97 | pr_debug("log_addr: 0x%08x\n", tboot->log_addr); | ||
98 | pr_debug("shutdown_entry: 0x%x\n", tboot->shutdown_entry); | ||
99 | pr_debug("tboot_base: 0x%08x\n", tboot->tboot_base); | ||
100 | pr_debug("tboot_size: 0x%x\n", tboot->tboot_size); | ||
101 | } | ||
102 | |||
103 | static pgd_t *tboot_pg_dir; | ||
104 | static struct mm_struct tboot_mm = { | ||
105 | .mm_rb = RB_ROOT, | ||
106 | .pgd = swapper_pg_dir, | ||
107 | .mm_users = ATOMIC_INIT(2), | ||
108 | .mm_count = ATOMIC_INIT(1), | ||
109 | .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem), | ||
110 | .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), | ||
111 | .mmlist = LIST_HEAD_INIT(init_mm.mmlist), | ||
112 | .cpu_vm_mask = CPU_MASK_ALL, | ||
113 | }; | ||
114 | |||
115 | static inline void switch_to_tboot_pt(void) | ||
116 | { | ||
117 | write_cr3(virt_to_phys(tboot_pg_dir)); | ||
118 | } | ||
119 | |||
120 | static int map_tboot_page(unsigned long vaddr, unsigned long pfn, | ||
121 | pgprot_t prot) | ||
122 | { | ||
123 | pgd_t *pgd; | ||
124 | pud_t *pud; | ||
125 | pmd_t *pmd; | ||
126 | pte_t *pte; | ||
127 | |||
128 | pgd = pgd_offset(&tboot_mm, vaddr); | ||
129 | pud = pud_alloc(&tboot_mm, pgd, vaddr); | ||
130 | if (!pud) | ||
131 | return -1; | ||
132 | pmd = pmd_alloc(&tboot_mm, pud, vaddr); | ||
133 | if (!pmd) | ||
134 | return -1; | ||
135 | pte = pte_alloc_map(&tboot_mm, pmd, vaddr); | ||
136 | if (!pte) | ||
137 | return -1; | ||
138 | set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot)); | ||
139 | pte_unmap(pte); | ||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | static int map_tboot_pages(unsigned long vaddr, unsigned long start_pfn, | ||
144 | unsigned long nr) | ||
145 | { | ||
146 | /* Reuse the original kernel mapping */ | ||
147 | tboot_pg_dir = pgd_alloc(&tboot_mm); | ||
148 | if (!tboot_pg_dir) | ||
149 | return -1; | ||
150 | |||
151 | for (; nr > 0; nr--, vaddr += PAGE_SIZE, start_pfn++) { | ||
152 | if (map_tboot_page(vaddr, start_pfn, PAGE_KERNEL_EXEC)) | ||
153 | return -1; | ||
154 | } | ||
155 | |||
156 | return 0; | ||
157 | } | ||
158 | |||
159 | static void tboot_create_trampoline(void) | ||
160 | { | ||
161 | u32 map_base, map_size; | ||
162 | |||
163 | /* Create identity map for tboot shutdown code. */ | ||
164 | map_base = PFN_DOWN(tboot->tboot_base); | ||
165 | map_size = PFN_UP(tboot->tboot_size); | ||
166 | if (map_tboot_pages(map_base << PAGE_SHIFT, map_base, map_size)) | ||
167 | panic("tboot: Error mapping tboot pages (mfns) @ 0x%x, 0x%x\n", | ||
168 | map_base, map_size); | ||
169 | } | ||
170 | |||
171 | #ifdef CONFIG_ACPI_SLEEP | ||
172 | |||
173 | static void add_mac_region(phys_addr_t start, unsigned long size) | ||
174 | { | ||
175 | struct tboot_mac_region *mr; | ||
176 | phys_addr_t end = start + size; | ||
177 | |||
178 | if (start && size) { | ||
179 | mr = &tboot->mac_regions[tboot->num_mac_regions++]; | ||
180 | mr->start = round_down(start, PAGE_SIZE); | ||
181 | mr->size = round_up(end, PAGE_SIZE) - mr->start; | ||
182 | } | ||
183 | } | ||
184 | |||
185 | static int tboot_setup_sleep(void) | ||
186 | { | ||
187 | tboot->num_mac_regions = 0; | ||
188 | |||
189 | /* S3 resume code */ | ||
190 | add_mac_region(acpi_wakeup_address, WAKEUP_SIZE); | ||
191 | |||
192 | #ifdef CONFIG_X86_TRAMPOLINE | ||
193 | /* AP trampoline code */ | ||
194 | add_mac_region(virt_to_phys(trampoline_base), TRAMPOLINE_SIZE); | ||
195 | #endif | ||
196 | |||
197 | /* kernel code + data + bss */ | ||
198 | add_mac_region(virt_to_phys(_text), _end - _text); | ||
199 | |||
200 | tboot->acpi_sinfo.kernel_s3_resume_vector = acpi_wakeup_address; | ||
201 | |||
202 | return 0; | ||
203 | } | ||
204 | |||
205 | #else /* no CONFIG_ACPI_SLEEP */ | ||
206 | |||
207 | static int tboot_setup_sleep(void) | ||
208 | { | ||
209 | /* S3 shutdown requested, but S3 not supported by the kernel... */ | ||
210 | BUG(); | ||
211 | return -1; | ||
212 | } | ||
213 | |||
214 | #endif | ||
215 | |||
216 | void tboot_shutdown(u32 shutdown_type) | ||
217 | { | ||
218 | void (*shutdown)(void); | ||
219 | |||
220 | if (!tboot_enabled()) | ||
221 | return; | ||
222 | |||
223 | /* | ||
224 | * if we're being called before the 1:1 mapping is set up then just | ||
225 | * return and let the normal shutdown happen; this should only be | ||
226 | * due to very early panic() | ||
227 | */ | ||
228 | if (!tboot_pg_dir) | ||
229 | return; | ||
230 | |||
231 | /* if this is S3 then set regions to MAC */ | ||
232 | if (shutdown_type == TB_SHUTDOWN_S3) | ||
233 | if (tboot_setup_sleep()) | ||
234 | return; | ||
235 | |||
236 | tboot->shutdown_type = shutdown_type; | ||
237 | |||
238 | switch_to_tboot_pt(); | ||
239 | |||
240 | shutdown = (void(*)(void))(unsigned long)tboot->shutdown_entry; | ||
241 | shutdown(); | ||
242 | |||
243 | /* should not reach here */ | ||
244 | while (1) | ||
245 | halt(); | ||
246 | } | ||
247 | |||
248 | static void tboot_copy_fadt(const struct acpi_table_fadt *fadt) | ||
249 | { | ||
250 | #define TB_COPY_GAS(tbg, g) \ | ||
251 | tbg.space_id = g.space_id; \ | ||
252 | tbg.bit_width = g.bit_width; \ | ||
253 | tbg.bit_offset = g.bit_offset; \ | ||
254 | tbg.access_width = g.access_width; \ | ||
255 | tbg.address = g.address; | ||
256 | |||
257 | TB_COPY_GAS(tboot->acpi_sinfo.pm1a_cnt_blk, fadt->xpm1a_control_block); | ||
258 | TB_COPY_GAS(tboot->acpi_sinfo.pm1b_cnt_blk, fadt->xpm1b_control_block); | ||
259 | TB_COPY_GAS(tboot->acpi_sinfo.pm1a_evt_blk, fadt->xpm1a_event_block); | ||
260 | TB_COPY_GAS(tboot->acpi_sinfo.pm1b_evt_blk, fadt->xpm1b_event_block); | ||
261 | |||
262 | /* | ||
263 | * We need phys addr of waking vector, but can't use virt_to_phys() on | ||
264 | * &acpi_gbl_FACS because it is ioremap'ed, so calc from FACS phys | ||
265 | * addr. | ||
266 | */ | ||
267 | tboot->acpi_sinfo.wakeup_vector = fadt->facs + | ||
268 | offsetof(struct acpi_table_facs, firmware_waking_vector); | ||
269 | } | ||
270 | |||
271 | void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control) | ||
272 | { | ||
273 | static u32 acpi_shutdown_map[ACPI_S_STATE_COUNT] = { | ||
274 | /* S0,1,2: */ -1, -1, -1, | ||
275 | /* S3: */ TB_SHUTDOWN_S3, | ||
276 | /* S4: */ TB_SHUTDOWN_S4, | ||
277 | /* S5: */ TB_SHUTDOWN_S5 }; | ||
278 | |||
279 | if (!tboot_enabled()) | ||
280 | return; | ||
281 | |||
282 | tboot_copy_fadt(&acpi_gbl_FADT); | ||
283 | tboot->acpi_sinfo.pm1a_cnt_val = pm1a_control; | ||
284 | tboot->acpi_sinfo.pm1b_cnt_val = pm1b_control; | ||
285 | /* we always use the 32b wakeup vector */ | ||
286 | tboot->acpi_sinfo.vector_width = 32; | ||
287 | |||
288 | if (sleep_state >= ACPI_S_STATE_COUNT || | ||
289 | acpi_shutdown_map[sleep_state] == -1) { | ||
290 | pr_warning("unsupported sleep state 0x%x\n", sleep_state); | ||
291 | return; | ||
292 | } | ||
293 | |||
294 | tboot_shutdown(acpi_shutdown_map[sleep_state]); | ||
295 | } | ||
296 | |||
297 | static atomic_t ap_wfs_count; | ||
298 | |||
299 | static int tboot_wait_for_aps(int num_aps) | ||
300 | { | ||
301 | unsigned long timeout; | ||
302 | |||
303 | timeout = AP_WAIT_TIMEOUT*HZ; | ||
304 | while (atomic_read((atomic_t *)&tboot->num_in_wfs) != num_aps && | ||
305 | timeout) { | ||
306 | mdelay(1); | ||
307 | timeout--; | ||
308 | } | ||
309 | |||
310 | if (timeout) | ||
311 | pr_warning("tboot wait for APs timeout\n"); | ||
312 | |||
313 | return !(atomic_read((atomic_t *)&tboot->num_in_wfs) == num_aps); | ||
314 | } | ||
315 | |||
316 | static int __cpuinit tboot_cpu_callback(struct notifier_block *nfb, | ||
317 | unsigned long action, void *hcpu) | ||
318 | { | ||
319 | switch (action) { | ||
320 | case CPU_DYING: | ||
321 | atomic_inc(&ap_wfs_count); | ||
322 | if (num_online_cpus() == 1) | ||
323 | if (tboot_wait_for_aps(atomic_read(&ap_wfs_count))) | ||
324 | return NOTIFY_BAD; | ||
325 | break; | ||
326 | } | ||
327 | return NOTIFY_OK; | ||
328 | } | ||
329 | |||
330 | static struct notifier_block tboot_cpu_notifier __cpuinitdata = | ||
331 | { | ||
332 | .notifier_call = tboot_cpu_callback, | ||
333 | }; | ||
334 | |||
335 | static __init int tboot_late_init(void) | ||
336 | { | ||
337 | if (!tboot_enabled()) | ||
338 | return 0; | ||
339 | |||
340 | tboot_create_trampoline(); | ||
341 | |||
342 | atomic_set(&ap_wfs_count, 0); | ||
343 | register_hotcpu_notifier(&tboot_cpu_notifier); | ||
344 | return 0; | ||
345 | } | ||
346 | |||
347 | late_initcall(tboot_late_init); | ||
348 | |||
349 | /* | ||
350 | * TXT configuration registers (offsets from TXT_{PUB, PRIV}_CONFIG_REGS_BASE) | ||
351 | */ | ||
352 | |||
353 | #define TXT_PUB_CONFIG_REGS_BASE 0xfed30000 | ||
354 | #define TXT_PRIV_CONFIG_REGS_BASE 0xfed20000 | ||
355 | |||
356 | /* # pages for each config regs space - used by fixmap */ | ||
357 | #define NR_TXT_CONFIG_PAGES ((TXT_PUB_CONFIG_REGS_BASE - \ | ||
358 | TXT_PRIV_CONFIG_REGS_BASE) >> PAGE_SHIFT) | ||
359 | |||
360 | /* offsets from pub/priv config space */ | ||
361 | #define TXTCR_HEAP_BASE 0x0300 | ||
362 | #define TXTCR_HEAP_SIZE 0x0308 | ||
363 | |||
364 | #define SHA1_SIZE 20 | ||
365 | |||
366 | struct sha1_hash { | ||
367 | u8 hash[SHA1_SIZE]; | ||
368 | }; | ||
369 | |||
370 | struct sinit_mle_data { | ||
371 | u32 version; /* currently 6 */ | ||
372 | struct sha1_hash bios_acm_id; | ||
373 | u32 edx_senter_flags; | ||
374 | u64 mseg_valid; | ||
375 | struct sha1_hash sinit_hash; | ||
376 | struct sha1_hash mle_hash; | ||
377 | struct sha1_hash stm_hash; | ||
378 | struct sha1_hash lcp_policy_hash; | ||
379 | u32 lcp_policy_control; | ||
380 | u32 rlp_wakeup_addr; | ||
381 | u32 reserved; | ||
382 | u32 num_mdrs; | ||
383 | u32 mdrs_off; | ||
384 | u32 num_vtd_dmars; | ||
385 | u32 vtd_dmars_off; | ||
386 | } __packed; | ||
387 | |||
388 | struct acpi_table_header *tboot_get_dmar_table(struct acpi_table_header *dmar_tbl) | ||
389 | { | ||
390 | void *heap_base, *heap_ptr, *config; | ||
391 | |||
392 | if (!tboot_enabled()) | ||
393 | return dmar_tbl; | ||
394 | |||
395 | /* | ||
396 | * ACPI tables may not be DMA protected by tboot, so use DMAR copy | ||
397 | * SINIT saved in SinitMleData in TXT heap (which is DMA protected) | ||
398 | */ | ||
399 | |||
400 | /* map config space in order to get heap addr */ | ||
401 | config = ioremap(TXT_PUB_CONFIG_REGS_BASE, NR_TXT_CONFIG_PAGES * | ||
402 | PAGE_SIZE); | ||
403 | if (!config) | ||
404 | return NULL; | ||
405 | |||
406 | /* now map TXT heap */ | ||
407 | heap_base = ioremap(*(u64 *)(config + TXTCR_HEAP_BASE), | ||
408 | *(u64 *)(config + TXTCR_HEAP_SIZE)); | ||
409 | iounmap(config); | ||
410 | if (!heap_base) | ||
411 | return NULL; | ||
412 | |||
413 | /* walk heap to SinitMleData */ | ||
414 | /* skip BiosData */ | ||
415 | heap_ptr = heap_base + *(u64 *)heap_base; | ||
416 | /* skip OsMleData */ | ||
417 | heap_ptr += *(u64 *)heap_ptr; | ||
418 | /* skip OsSinitData */ | ||
419 | heap_ptr += *(u64 *)heap_ptr; | ||
420 | /* now points to SinitMleDataSize; set to SinitMleData */ | ||
421 | heap_ptr += sizeof(u64); | ||
422 | /* get addr of DMAR table */ | ||
423 | dmar_tbl = (struct acpi_table_header *)(heap_ptr + | ||
424 | ((struct sinit_mle_data *)heap_ptr)->vtd_dmars_off - | ||
425 | sizeof(u64)); | ||
426 | |||
427 | /* don't unmap heap because dmar.c needs access to this */ | ||
428 | |||
429 | return dmar_tbl; | ||
430 | } | ||
431 | |||
432 | int tboot_force_iommu(void) | ||
433 | { | ||
434 | if (!tboot_enabled()) | ||
435 | return 0; | ||
436 | |||
437 | if (no_iommu || swiotlb || dmar_disabled) | ||
438 | pr_warning("Forcing Intel-IOMMU to enabled\n"); | ||
439 | |||
440 | dmar_disabled = 0; | ||
441 | #ifdef CONFIG_SWIOTLB | ||
442 | swiotlb = 0; | ||
443 | #endif | ||
444 | no_iommu = 0; | ||
445 | |||
446 | return 1; | ||
447 | } | ||
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 9fc178255c04..0ccb57d5ee35 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -348,15 +348,12 @@ SECTIONS | |||
348 | _end = .; | 348 | _end = .; |
349 | } | 349 | } |
350 | 350 | ||
351 | /* Sections to be discarded */ | ||
352 | /DISCARD/ : { | ||
353 | *(.exitcall.exit) | ||
354 | *(.eh_frame) | ||
355 | *(.discard) | ||
356 | } | ||
357 | |||
358 | STABS_DEBUG | 351 | STABS_DEBUG |
359 | DWARF_DEBUG | 352 | DWARF_DEBUG |
353 | |||
354 | /* Sections to be discarded */ | ||
355 | DISCARDS | ||
356 | /DISCARD/ : { *(.eh_frame) } | ||
360 | } | 357 | } |
361 | 358 | ||
362 | 359 | ||