aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-09-19 05:27:32 -0400
committerIngo Molnar <mingo@elte.hu>2009-09-19 05:28:41 -0400
commit929bf0d0156562ce631728b6fa53d68004d456d2 (patch)
tree739063990a8077b29ef97e69d73bce94573daae4 /arch/x86/kernel
parentdef0a9b2573e00ab0b486cb5382625203ab4c4a6 (diff)
parent202c4675c55ddf6b443c7e057d2dff6b42ef71aa (diff)
Merge branch 'linus' into perfcounters/core
Merge reason: Bring in tracing changes we depend on. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile1
-rw-r--r--arch/x86/kernel/acpi/boot.c105
-rw-r--r--arch/x86/kernel/alternative.c58
-rw-r--r--arch/x86/kernel/apic/apic.c110
-rw-r--r--arch/x86/kernel/apic/es7000_32.c2
-rw-r--r--arch/x86/kernel/apic/io_apic.c324
-rw-r--r--arch/x86/kernel/apic/ipi.c2
-rw-r--r--arch/x86/kernel/apic/nmi.c2
-rw-r--r--arch/x86/kernel/apic/probe_64.c6
-rw-r--r--arch/x86/kernel/apm_32.c31
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/amd.c127
-rw-r--r--arch/x86/kernel/cpu/bugs.c10
-rw-r--r--arch/x86/kernel/cpu/bugs_64.c2
-rw-r--r--arch/x86/kernel/cpu/common.c70
-rw-r--r--arch/x86/kernel/cpu/cpu_debug.c4
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c88
-rw-r--r--arch/x86/kernel/cpu/cyrix.c19
-rw-r--r--arch/x86/kernel/cpu/hypervisor.c5
-rw-r--r--arch/x86/kernel/cpu/intel.c17
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c148
-rw-r--r--arch/x86/kernel/cpu/mcheck/Makefile5
-rw-r--r--arch/x86/kernel/cpu/mcheck/k7.c116
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c158
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-internal.h15
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-severity.c8
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c324
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c10
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel.c10
-rw-r--r--arch/x86/kernel/cpu/mcheck/non-fatal.c94
-rw-r--r--arch/x86/kernel/cpu/mcheck/p4.c163
-rw-r--r--arch/x86/kernel/cpu/mcheck/p6.c127
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c13
-rw-r--r--arch/x86/kernel/cpu/mtrr/amd.c97
-rw-r--r--arch/x86/kernel/cpu/mtrr/centaur.c168
-rw-r--r--arch/x86/kernel/cpu/mtrr/cleanup.c390
-rw-r--r--arch/x86/kernel/cpu/mtrr/cyrix.c94
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c304
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c135
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c499
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.h19
-rw-r--r--arch/x86/kernel/cpu/mtrr/state.c68
-rw-r--r--arch/x86/kernel/cpu/perf_counter.c14
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c45
-rw-r--r--arch/x86/kernel/cpu/proc.c4
-rw-r--r--arch/x86/kernel/cpu/sched.c55
-rw-r--r--arch/x86/kernel/cpu/vmware.c18
-rw-r--r--arch/x86/kernel/doublefault_32.c4
-rw-r--r--arch/x86/kernel/ds.c6
-rw-r--r--arch/x86/kernel/dumpstack.c1
-rw-r--r--arch/x86/kernel/e820.c2
-rw-r--r--arch/x86/kernel/entry_64.S6
-rw-r--r--arch/x86/kernel/head_32.S1
-rw-r--r--arch/x86/kernel/irq.c4
-rw-r--r--arch/x86/kernel/irq_32.c5
-rw-r--r--arch/x86/kernel/irqinit.c2
-rw-r--r--arch/x86/kernel/kvm.c7
-rw-r--r--arch/x86/kernel/kvmclock.c4
-rw-r--r--arch/x86/kernel/mpparse.c10
-rw-r--r--arch/x86/kernel/msr.c61
-rw-r--r--arch/x86/kernel/paravirt.c3
-rw-r--r--arch/x86/kernel/pci-dma.c4
-rw-r--r--arch/x86/kernel/process_32.c30
-rw-r--r--arch/x86/kernel/process_64.c36
-rw-r--r--arch/x86/kernel/quirks.c2
-rw-r--r--arch/x86/kernel/reboot.c7
-rw-r--r--arch/x86/kernel/setup.c23
-rw-r--r--arch/x86/kernel/setup_percpu.c364
-rw-r--r--arch/x86/kernel/signal.c2
-rw-r--r--arch/x86/kernel/smpboot.c19
-rw-r--r--arch/x86/kernel/step.c9
-rw-r--r--arch/x86/kernel/tboot.c447
-rw-r--r--arch/x86/kernel/tlb_uv.c4
-rw-r--r--arch/x86/kernel/traps.c54
-rw-r--r--arch/x86/kernel/vmlinux.lds.S11
75 files changed, 2597 insertions, 2617 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 430d5b24af7b..832cb838cb48 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -52,6 +52,7 @@ obj-$(CONFIG_X86_DS_SELFTEST) += ds_selftest.o
52obj-$(CONFIG_X86_32) += tls.o 52obj-$(CONFIG_X86_32) += tls.o
53obj-$(CONFIG_IA32_EMULATION) += tls.o 53obj-$(CONFIG_IA32_EMULATION) += tls.o
54obj-y += step.o 54obj-y += step.o
55obj-$(CONFIG_INTEL_TXT) += tboot.o
55obj-$(CONFIG_STACKTRACE) += stacktrace.o 56obj-$(CONFIG_STACKTRACE) += stacktrace.o
56obj-y += cpu/ 57obj-y += cpu/
57obj-y += acpi/ 58obj-y += acpi/
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 6b8ca3a0285d..67e929b89875 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -833,106 +833,6 @@ static int __init acpi_parse_madt_lapic_entries(void)
833extern int es7000_plat; 833extern int es7000_plat;
834#endif 834#endif
835 835
836static struct {
837 int gsi_base;
838 int gsi_end;
839} mp_ioapic_routing[MAX_IO_APICS];
840
841int mp_find_ioapic(int gsi)
842{
843 int i = 0;
844
845 /* Find the IOAPIC that manages this GSI. */
846 for (i = 0; i < nr_ioapics; i++) {
847 if ((gsi >= mp_ioapic_routing[i].gsi_base)
848 && (gsi <= mp_ioapic_routing[i].gsi_end))
849 return i;
850 }
851
852 printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
853 return -1;
854}
855
856int mp_find_ioapic_pin(int ioapic, int gsi)
857{
858 if (WARN_ON(ioapic == -1))
859 return -1;
860 if (WARN_ON(gsi > mp_ioapic_routing[ioapic].gsi_end))
861 return -1;
862
863 return gsi - mp_ioapic_routing[ioapic].gsi_base;
864}
865
866static u8 __init uniq_ioapic_id(u8 id)
867{
868#ifdef CONFIG_X86_32
869 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
870 !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
871 return io_apic_get_unique_id(nr_ioapics, id);
872 else
873 return id;
874#else
875 int i;
876 DECLARE_BITMAP(used, 256);
877 bitmap_zero(used, 256);
878 for (i = 0; i < nr_ioapics; i++) {
879 struct mpc_ioapic *ia = &mp_ioapics[i];
880 __set_bit(ia->apicid, used);
881 }
882 if (!test_bit(id, used))
883 return id;
884 return find_first_zero_bit(used, 256);
885#endif
886}
887
888static int bad_ioapic(unsigned long address)
889{
890 if (nr_ioapics >= MAX_IO_APICS) {
891 printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
892 "(found %d)\n", MAX_IO_APICS, nr_ioapics);
893 panic("Recompile kernel with bigger MAX_IO_APICS!\n");
894 }
895 if (!address) {
896 printk(KERN_ERR "WARNING: Bogus (zero) I/O APIC address"
897 " found in table, skipping!\n");
898 return 1;
899 }
900 return 0;
901}
902
903void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
904{
905 int idx = 0;
906
907 if (bad_ioapic(address))
908 return;
909
910 idx = nr_ioapics;
911
912 mp_ioapics[idx].type = MP_IOAPIC;
913 mp_ioapics[idx].flags = MPC_APIC_USABLE;
914 mp_ioapics[idx].apicaddr = address;
915
916 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
917 mp_ioapics[idx].apicid = uniq_ioapic_id(id);
918 mp_ioapics[idx].apicver = io_apic_get_version(idx);
919
920 /*
921 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
922 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
923 */
924 mp_ioapic_routing[idx].gsi_base = gsi_base;
925 mp_ioapic_routing[idx].gsi_end = gsi_base +
926 io_apic_get_redir_entries(idx);
927
928 printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
929 "GSI %d-%d\n", idx, mp_ioapics[idx].apicid,
930 mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr,
931 mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
932
933 nr_ioapics++;
934}
935
936int __init acpi_probe_gsi(void) 836int __init acpi_probe_gsi(void)
937{ 837{
938 int idx; 838 int idx;
@@ -947,7 +847,7 @@ int __init acpi_probe_gsi(void)
947 847
948 max_gsi = 0; 848 max_gsi = 0;
949 for (idx = 0; idx < nr_ioapics; idx++) { 849 for (idx = 0; idx < nr_ioapics; idx++) {
950 gsi = mp_ioapic_routing[idx].gsi_end; 850 gsi = mp_gsi_routing[idx].gsi_end;
951 851
952 if (gsi > max_gsi) 852 if (gsi > max_gsi)
953 max_gsi = gsi; 853 max_gsi = gsi;
@@ -1179,9 +1079,8 @@ static int __init acpi_parse_madt_ioapic_entries(void)
1179 * If MPS is present, it will handle them, 1079 * If MPS is present, it will handle them,
1180 * otherwise the system will stay in PIC mode 1080 * otherwise the system will stay in PIC mode
1181 */ 1081 */
1182 if (acpi_disabled || acpi_noirq) { 1082 if (acpi_disabled || acpi_noirq)
1183 return -ENODEV; 1083 return -ENODEV;
1184 }
1185 1084
1186 if (!cpu_has_apic) 1085 if (!cpu_has_apic)
1187 return -ENODEV; 1086 return -ENODEV;
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index f57658702571..de7353c0ce9c 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -2,6 +2,7 @@
2#include <linux/sched.h> 2#include <linux/sched.h>
3#include <linux/mutex.h> 3#include <linux/mutex.h>
4#include <linux/list.h> 4#include <linux/list.h>
5#include <linux/stringify.h>
5#include <linux/kprobes.h> 6#include <linux/kprobes.h>
6#include <linux/mm.h> 7#include <linux/mm.h>
7#include <linux/vmalloc.h> 8#include <linux/vmalloc.h>
@@ -32,7 +33,7 @@ __setup("smp-alt-boot", bootonly);
32#define smp_alt_once 1 33#define smp_alt_once 1
33#endif 34#endif
34 35
35static int debug_alternative; 36static int __initdata_or_module debug_alternative;
36 37
37static int __init debug_alt(char *str) 38static int __init debug_alt(char *str)
38{ 39{
@@ -51,7 +52,7 @@ static int __init setup_noreplace_smp(char *str)
51__setup("noreplace-smp", setup_noreplace_smp); 52__setup("noreplace-smp", setup_noreplace_smp);
52 53
53#ifdef CONFIG_PARAVIRT 54#ifdef CONFIG_PARAVIRT
54static int noreplace_paravirt = 0; 55static int __initdata_or_module noreplace_paravirt = 0;
55 56
56static int __init setup_noreplace_paravirt(char *str) 57static int __init setup_noreplace_paravirt(char *str)
57{ 58{
@@ -64,16 +65,17 @@ __setup("noreplace-paravirt", setup_noreplace_paravirt);
64#define DPRINTK(fmt, args...) if (debug_alternative) \ 65#define DPRINTK(fmt, args...) if (debug_alternative) \
65 printk(KERN_DEBUG fmt, args) 66 printk(KERN_DEBUG fmt, args)
66 67
67#ifdef GENERIC_NOP1 68#if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64)
68/* Use inline assembly to define this because the nops are defined 69/* Use inline assembly to define this because the nops are defined
69 as inline assembly strings in the include files and we cannot 70 as inline assembly strings in the include files and we cannot
70 get them easily into strings. */ 71 get them easily into strings. */
71asm("\t.section .rodata, \"a\"\nintelnops: " 72asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nintelnops: "
72 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 73 GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6
73 GENERIC_NOP7 GENERIC_NOP8 74 GENERIC_NOP7 GENERIC_NOP8
74 "\t.previous"); 75 "\t.previous");
75extern const unsigned char intelnops[]; 76extern const unsigned char intelnops[];
76static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = { 77static const unsigned char *const __initconst_or_module
78intel_nops[ASM_NOP_MAX+1] = {
77 NULL, 79 NULL,
78 intelnops, 80 intelnops,
79 intelnops + 1, 81 intelnops + 1,
@@ -87,12 +89,13 @@ static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = {
87#endif 89#endif
88 90
89#ifdef K8_NOP1 91#ifdef K8_NOP1
90asm("\t.section .rodata, \"a\"\nk8nops: " 92asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk8nops: "
91 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 93 K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6
92 K8_NOP7 K8_NOP8 94 K8_NOP7 K8_NOP8
93 "\t.previous"); 95 "\t.previous");
94extern const unsigned char k8nops[]; 96extern const unsigned char k8nops[];
95static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = { 97static const unsigned char *const __initconst_or_module
98k8_nops[ASM_NOP_MAX+1] = {
96 NULL, 99 NULL,
97 k8nops, 100 k8nops,
98 k8nops + 1, 101 k8nops + 1,
@@ -105,13 +108,14 @@ static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = {
105}; 108};
106#endif 109#endif
107 110
108#ifdef K7_NOP1 111#if defined(K7_NOP1) && !defined(CONFIG_X86_64)
109asm("\t.section .rodata, \"a\"\nk7nops: " 112asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk7nops: "
110 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 113 K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6
111 K7_NOP7 K7_NOP8 114 K7_NOP7 K7_NOP8
112 "\t.previous"); 115 "\t.previous");
113extern const unsigned char k7nops[]; 116extern const unsigned char k7nops[];
114static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = { 117static const unsigned char *const __initconst_or_module
118k7_nops[ASM_NOP_MAX+1] = {
115 NULL, 119 NULL,
116 k7nops, 120 k7nops,
117 k7nops + 1, 121 k7nops + 1,
@@ -125,12 +129,13 @@ static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = {
125#endif 129#endif
126 130
127#ifdef P6_NOP1 131#ifdef P6_NOP1
128asm("\t.section .rodata, \"a\"\np6nops: " 132asm("\t" __stringify(__INITRODATA_OR_MODULE) "\np6nops: "
129 P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6 133 P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6
130 P6_NOP7 P6_NOP8 134 P6_NOP7 P6_NOP8
131 "\t.previous"); 135 "\t.previous");
132extern const unsigned char p6nops[]; 136extern const unsigned char p6nops[];
133static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { 137static const unsigned char *const __initconst_or_module
138p6_nops[ASM_NOP_MAX+1] = {
134 NULL, 139 NULL,
135 p6nops, 140 p6nops,
136 p6nops + 1, 141 p6nops + 1,
@@ -146,7 +151,7 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
146#ifdef CONFIG_X86_64 151#ifdef CONFIG_X86_64
147 152
148extern char __vsyscall_0; 153extern char __vsyscall_0;
149const unsigned char *const *find_nop_table(void) 154static const unsigned char *const *__init_or_module find_nop_table(void)
150{ 155{
151 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && 156 if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL &&
152 boot_cpu_has(X86_FEATURE_NOPL)) 157 boot_cpu_has(X86_FEATURE_NOPL))
@@ -157,7 +162,7 @@ const unsigned char *const *find_nop_table(void)
157 162
158#else /* CONFIG_X86_64 */ 163#else /* CONFIG_X86_64 */
159 164
160const unsigned char *const *find_nop_table(void) 165static const unsigned char *const *__init_or_module find_nop_table(void)
161{ 166{
162 if (boot_cpu_has(X86_FEATURE_K8)) 167 if (boot_cpu_has(X86_FEATURE_K8))
163 return k8_nops; 168 return k8_nops;
@@ -172,7 +177,7 @@ const unsigned char *const *find_nop_table(void)
172#endif /* CONFIG_X86_64 */ 177#endif /* CONFIG_X86_64 */
173 178
174/* Use this to add nops to a buffer, then text_poke the whole buffer. */ 179/* Use this to add nops to a buffer, then text_poke the whole buffer. */
175void add_nops(void *insns, unsigned int len) 180static void __init_or_module add_nops(void *insns, unsigned int len)
176{ 181{
177 const unsigned char *const *noptable = find_nop_table(); 182 const unsigned char *const *noptable = find_nop_table();
178 183
@@ -185,10 +190,10 @@ void add_nops(void *insns, unsigned int len)
185 len -= noplen; 190 len -= noplen;
186 } 191 }
187} 192}
188EXPORT_SYMBOL_GPL(add_nops);
189 193
190extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; 194extern struct alt_instr __alt_instructions[], __alt_instructions_end[];
191extern u8 *__smp_locks[], *__smp_locks_end[]; 195extern u8 *__smp_locks[], *__smp_locks_end[];
196static void *text_poke_early(void *addr, const void *opcode, size_t len);
192 197
193/* Replace instructions with better alternatives for this CPU type. 198/* Replace instructions with better alternatives for this CPU type.
194 This runs before SMP is initialized to avoid SMP problems with 199 This runs before SMP is initialized to avoid SMP problems with
@@ -196,7 +201,8 @@ extern u8 *__smp_locks[], *__smp_locks_end[];
196 APs have less capabilities than the boot processor are not handled. 201 APs have less capabilities than the boot processor are not handled.
197 Tough. Make sure you disable such features by hand. */ 202 Tough. Make sure you disable such features by hand. */
198 203
199void apply_alternatives(struct alt_instr *start, struct alt_instr *end) 204void __init_or_module apply_alternatives(struct alt_instr *start,
205 struct alt_instr *end)
200{ 206{
201 struct alt_instr *a; 207 struct alt_instr *a;
202 char insnbuf[MAX_PATCH_LEN]; 208 char insnbuf[MAX_PATCH_LEN];
@@ -279,9 +285,10 @@ static LIST_HEAD(smp_alt_modules);
279static DEFINE_MUTEX(smp_alt); 285static DEFINE_MUTEX(smp_alt);
280static int smp_mode = 1; /* protected by smp_alt */ 286static int smp_mode = 1; /* protected by smp_alt */
281 287
282void alternatives_smp_module_add(struct module *mod, char *name, 288void __init_or_module alternatives_smp_module_add(struct module *mod,
283 void *locks, void *locks_end, 289 char *name,
284 void *text, void *text_end) 290 void *locks, void *locks_end,
291 void *text, void *text_end)
285{ 292{
286 struct smp_alt_module *smp; 293 struct smp_alt_module *smp;
287 294
@@ -317,7 +324,7 @@ void alternatives_smp_module_add(struct module *mod, char *name,
317 mutex_unlock(&smp_alt); 324 mutex_unlock(&smp_alt);
318} 325}
319 326
320void alternatives_smp_module_del(struct module *mod) 327void __init_or_module alternatives_smp_module_del(struct module *mod)
321{ 328{
322 struct smp_alt_module *item; 329 struct smp_alt_module *item;
323 330
@@ -386,8 +393,8 @@ void alternatives_smp_switch(int smp)
386#endif 393#endif
387 394
388#ifdef CONFIG_PARAVIRT 395#ifdef CONFIG_PARAVIRT
389void apply_paravirt(struct paravirt_patch_site *start, 396void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
390 struct paravirt_patch_site *end) 397 struct paravirt_patch_site *end)
391{ 398{
392 struct paravirt_patch_site *p; 399 struct paravirt_patch_site *p;
393 char insnbuf[MAX_PATCH_LEN]; 400 char insnbuf[MAX_PATCH_LEN];
@@ -485,13 +492,14 @@ void __init alternative_instructions(void)
485 * instructions. And on the local CPU you need to be protected again NMI or MCE 492 * instructions. And on the local CPU you need to be protected again NMI or MCE
486 * handlers seeing an inconsistent instruction while you patch. 493 * handlers seeing an inconsistent instruction while you patch.
487 */ 494 */
488void *text_poke_early(void *addr, const void *opcode, size_t len) 495static void *__init_or_module text_poke_early(void *addr, const void *opcode,
496 size_t len)
489{ 497{
490 unsigned long flags; 498 unsigned long flags;
491 local_irq_save(flags); 499 local_irq_save(flags);
492 memcpy(addr, opcode, len); 500 memcpy(addr, opcode, len);
493 local_irq_restore(flags);
494 sync_core(); 501 sync_core();
502 local_irq_restore(flags);
495 /* Could also do a CLFLUSH here to speed up CPU recovery; but 503 /* Could also do a CLFLUSH here to speed up CPU recovery; but
496 that causes hangs on some VIA CPUs. */ 504 that causes hangs on some VIA CPUs. */
497 return addr; 505 return addr;
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c
index 0a1c2830ec66..159740decc41 100644
--- a/arch/x86/kernel/apic/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -49,6 +49,7 @@
49#include <asm/mtrr.h> 49#include <asm/mtrr.h>
50#include <asm/smp.h> 50#include <asm/smp.h>
51#include <asm/mce.h> 51#include <asm/mce.h>
52#include <asm/kvm_para.h>
52 53
53unsigned int num_processors; 54unsigned int num_processors;
54 55
@@ -1361,52 +1362,80 @@ void enable_x2apic(void)
1361} 1362}
1362#endif /* CONFIG_X86_X2APIC */ 1363#endif /* CONFIG_X86_X2APIC */
1363 1364
1364void __init enable_IR_x2apic(void) 1365int __init enable_IR(void)
1365{ 1366{
1366#ifdef CONFIG_INTR_REMAP 1367#ifdef CONFIG_INTR_REMAP
1367 int ret;
1368 unsigned long flags;
1369 struct IO_APIC_route_entry **ioapic_entries = NULL;
1370
1371 ret = dmar_table_init();
1372 if (ret) {
1373 pr_debug("dmar_table_init() failed with %d:\n", ret);
1374 goto ir_failed;
1375 }
1376
1377 if (!intr_remapping_supported()) { 1368 if (!intr_remapping_supported()) {
1378 pr_debug("intr-remapping not supported\n"); 1369 pr_debug("intr-remapping not supported\n");
1379 goto ir_failed; 1370 return 0;
1380 } 1371 }
1381 1372
1382
1383 if (!x2apic_preenabled && skip_ioapic_setup) { 1373 if (!x2apic_preenabled && skip_ioapic_setup) {
1384 pr_info("Skipped enabling intr-remap because of skipping " 1374 pr_info("Skipped enabling intr-remap because of skipping "
1385 "io-apic setup\n"); 1375 "io-apic setup\n");
1386 return; 1376 return 0;
1387 } 1377 }
1388 1378
1379 if (enable_intr_remapping(x2apic_supported()))
1380 return 0;
1381
1382 pr_info("Enabled Interrupt-remapping\n");
1383
1384 return 1;
1385
1386#endif
1387 return 0;
1388}
1389
1390void __init enable_IR_x2apic(void)
1391{
1392 unsigned long flags;
1393 struct IO_APIC_route_entry **ioapic_entries = NULL;
1394 int ret, x2apic_enabled = 0;
1395 int dmar_table_init_ret = 0;
1396
1397#ifdef CONFIG_INTR_REMAP
1398 dmar_table_init_ret = dmar_table_init();
1399 if (dmar_table_init_ret)
1400 pr_debug("dmar_table_init() failed with %d:\n",
1401 dmar_table_init_ret);
1402#endif
1403
1389 ioapic_entries = alloc_ioapic_entries(); 1404 ioapic_entries = alloc_ioapic_entries();
1390 if (!ioapic_entries) { 1405 if (!ioapic_entries) {
1391 pr_info("Allocate ioapic_entries failed: %d\n", ret); 1406 pr_err("Allocate ioapic_entries failed\n");
1392 goto end; 1407 goto out;
1393 } 1408 }
1394 1409
1395 ret = save_IO_APIC_setup(ioapic_entries); 1410 ret = save_IO_APIC_setup(ioapic_entries);
1396 if (ret) { 1411 if (ret) {
1397 pr_info("Saving IO-APIC state failed: %d\n", ret); 1412 pr_info("Saving IO-APIC state failed: %d\n", ret);
1398 goto end; 1413 goto out;
1399 } 1414 }
1400 1415
1401 local_irq_save(flags); 1416 local_irq_save(flags);
1402 mask_IO_APIC_setup(ioapic_entries);
1403 mask_8259A(); 1417 mask_8259A();
1418 mask_IO_APIC_setup(ioapic_entries);
1404 1419
1405 ret = enable_intr_remapping(x2apic_supported()); 1420 if (dmar_table_init_ret)
1406 if (ret) 1421 ret = 0;
1407 goto end_restore; 1422 else
1423 ret = enable_IR();
1408 1424
1409 pr_info("Enabled Interrupt-remapping\n"); 1425 if (!ret) {
1426 /* IR is required if there is APIC ID > 255 even when running
1427 * under KVM
1428 */
1429 if (max_physical_apicid > 255 || !kvm_para_available())
1430 goto nox2apic;
1431 /*
1432 * without IR all CPUs can be addressed by IOAPIC/MSI
1433 * only in physical mode
1434 */
1435 x2apic_force_phys();
1436 }
1437
1438 x2apic_enabled = 1;
1410 1439
1411 if (x2apic_supported() && !x2apic_mode) { 1440 if (x2apic_supported() && !x2apic_mode) {
1412 x2apic_mode = 1; 1441 x2apic_mode = 1;
@@ -1414,41 +1443,25 @@ void __init enable_IR_x2apic(void)
1414 pr_info("Enabled x2apic\n"); 1443 pr_info("Enabled x2apic\n");
1415 } 1444 }
1416 1445
1417end_restore: 1446nox2apic:
1418 if (ret) 1447 if (!ret) /* IR enabling failed */
1419 /*
1420 * IR enabling failed
1421 */
1422 restore_IO_APIC_setup(ioapic_entries); 1448 restore_IO_APIC_setup(ioapic_entries);
1423
1424 unmask_8259A(); 1449 unmask_8259A();
1425 local_irq_restore(flags); 1450 local_irq_restore(flags);
1426 1451
1427end: 1452out:
1428 if (ioapic_entries) 1453 if (ioapic_entries)
1429 free_ioapic_entries(ioapic_entries); 1454 free_ioapic_entries(ioapic_entries);
1430 1455
1431 if (!ret) 1456 if (x2apic_enabled)
1432 return; 1457 return;
1433 1458
1434ir_failed:
1435 if (x2apic_preenabled) 1459 if (x2apic_preenabled)
1436 panic("x2apic enabled by bios. But IR enabling failed"); 1460 panic("x2apic: enabled by BIOS but kernel init failed.");
1437 else if (cpu_has_x2apic) 1461 else if (cpu_has_x2apic)
1438 pr_info("Not enabling x2apic,Intr-remapping\n"); 1462 pr_info("Not enabling x2apic, Intr-remapping init failed.\n");
1439#else
1440 if (!cpu_has_x2apic)
1441 return;
1442
1443 if (x2apic_preenabled)
1444 panic("x2apic enabled prior OS handover,"
1445 " enable CONFIG_X86_X2APIC, CONFIG_INTR_REMAP");
1446#endif
1447
1448 return;
1449} 1463}
1450 1464
1451
1452#ifdef CONFIG_X86_64 1465#ifdef CONFIG_X86_64
1453/* 1466/*
1454 * Detect and enable local APICs on non-SMP boards. 1467 * Detect and enable local APICs on non-SMP boards.
@@ -1549,8 +1562,6 @@ no_apic:
1549#ifdef CONFIG_X86_64 1562#ifdef CONFIG_X86_64
1550void __init early_init_lapic_mapping(void) 1563void __init early_init_lapic_mapping(void)
1551{ 1564{
1552 unsigned long phys_addr;
1553
1554 /* 1565 /*
1555 * If no local APIC can be found then go out 1566 * If no local APIC can be found then go out
1556 * : it means there is no mpatable and MADT 1567 * : it means there is no mpatable and MADT
@@ -1558,11 +1569,9 @@ void __init early_init_lapic_mapping(void)
1558 if (!smp_found_config) 1569 if (!smp_found_config)
1559 return; 1570 return;
1560 1571
1561 phys_addr = mp_lapic_addr; 1572 set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr);
1562
1563 set_fixmap_nocache(FIX_APIC_BASE, phys_addr);
1564 apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", 1573 apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n",
1565 APIC_BASE, phys_addr); 1574 APIC_BASE, mp_lapic_addr);
1566 1575
1567 /* 1576 /*
1568 * Fetch the APIC ID of the BSP in case we have a 1577 * Fetch the APIC ID of the BSP in case we have a
@@ -1651,7 +1660,6 @@ int __init APIC_init_uniprocessor(void)
1651 APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { 1660 APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) {
1652 pr_err("BIOS bug, local APIC 0x%x not detected!...\n", 1661 pr_err("BIOS bug, local APIC 0x%x not detected!...\n",
1653 boot_cpu_physical_apicid); 1662 boot_cpu_physical_apicid);
1654 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
1655 return -1; 1663 return -1;
1656 } 1664 }
1657#endif 1665#endif
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
index 8952a5890281..89174f847b49 100644
--- a/arch/x86/kernel/apic/es7000_32.c
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -167,7 +167,7 @@ static int es7000_apic_is_cluster(void)
167{ 167{
168 /* MPENTIUMIII */ 168 /* MPENTIUMIII */
169 if (boot_cpu_data.x86 == 6 && 169 if (boot_cpu_data.x86 == 6 &&
170 (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) 170 (boot_cpu_data.x86_model >= 7 && boot_cpu_data.x86_model <= 11))
171 return 1; 171 return 1;
172 172
173 return 0; 173 return 0;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index d2ed6c5ddc80..3c8f9e75d038 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -66,6 +66,8 @@
66#include <asm/apic.h> 66#include <asm/apic.h>
67 67
68#define __apicdebuginit(type) static type __init 68#define __apicdebuginit(type) static type __init
69#define for_each_irq_pin(entry, head) \
70 for (entry = head; entry; entry = entry->next)
69 71
70/* 72/*
71 * Is the SiS APIC rmw bug present ? 73 * Is the SiS APIC rmw bug present ?
@@ -85,6 +87,9 @@ int nr_ioapic_registers[MAX_IO_APICS];
85struct mpc_ioapic mp_ioapics[MAX_IO_APICS]; 87struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
86int nr_ioapics; 88int nr_ioapics;
87 89
90/* IO APIC gsi routing info */
91struct mp_ioapic_gsi mp_gsi_routing[MAX_IO_APICS];
92
88/* MP IRQ source entries */ 93/* MP IRQ source entries */
89struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; 94struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
90 95
@@ -116,15 +121,6 @@ static int __init parse_noapic(char *str)
116} 121}
117early_param("noapic", parse_noapic); 122early_param("noapic", parse_noapic);
118 123
119struct irq_pin_list;
120
121/*
122 * This is performance-critical, we want to do it O(1)
123 *
124 * the indexing order of this array favors 1:1 mappings
125 * between pins and IRQs.
126 */
127
128struct irq_pin_list { 124struct irq_pin_list {
129 int apic, pin; 125 int apic, pin;
130 struct irq_pin_list *next; 126 struct irq_pin_list *next;
@@ -139,6 +135,11 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node)
139 return pin; 135 return pin;
140} 136}
141 137
138/*
139 * This is performance-critical, we want to do it O(1)
140 *
141 * Most irqs are mapped 1:1 with pins.
142 */
142struct irq_cfg { 143struct irq_cfg {
143 struct irq_pin_list *irq_2_pin; 144 struct irq_pin_list *irq_2_pin;
144 cpumask_var_t domain; 145 cpumask_var_t domain;
@@ -414,13 +415,10 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
414 unsigned long flags; 415 unsigned long flags;
415 416
416 spin_lock_irqsave(&ioapic_lock, flags); 417 spin_lock_irqsave(&ioapic_lock, flags);
417 entry = cfg->irq_2_pin; 418 for_each_irq_pin(entry, cfg->irq_2_pin) {
418 for (;;) {
419 unsigned int reg; 419 unsigned int reg;
420 int pin; 420 int pin;
421 421
422 if (!entry)
423 break;
424 pin = entry->pin; 422 pin = entry->pin;
425 reg = io_apic_read(entry->apic, 0x10 + pin*2); 423 reg = io_apic_read(entry->apic, 0x10 + pin*2);
426 /* Is the remote IRR bit set? */ 424 /* Is the remote IRR bit set? */
@@ -428,9 +426,6 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg)
428 spin_unlock_irqrestore(&ioapic_lock, flags); 426 spin_unlock_irqrestore(&ioapic_lock, flags);
429 return true; 427 return true;
430 } 428 }
431 if (!entry->next)
432 break;
433 entry = entry->next;
434 } 429 }
435 spin_unlock_irqrestore(&ioapic_lock, flags); 430 spin_unlock_irqrestore(&ioapic_lock, flags);
436 431
@@ -498,72 +493,68 @@ static void ioapic_mask_entry(int apic, int pin)
498 * shared ISA-space IRQs, so we have to support them. We are super 493 * shared ISA-space IRQs, so we have to support them. We are super
499 * fast in the common case, and fast for shared ISA-space IRQs. 494 * fast in the common case, and fast for shared ISA-space IRQs.
500 */ 495 */
501static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) 496static int
497add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin)
502{ 498{
503 struct irq_pin_list *entry; 499 struct irq_pin_list **last, *entry;
504 500
505 entry = cfg->irq_2_pin; 501 /* don't allow duplicates */
506 if (!entry) { 502 last = &cfg->irq_2_pin;
507 entry = get_one_free_irq_2_pin(node); 503 for_each_irq_pin(entry, cfg->irq_2_pin) {
508 if (!entry) {
509 printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n",
510 apic, pin);
511 return;
512 }
513 cfg->irq_2_pin = entry;
514 entry->apic = apic;
515 entry->pin = pin;
516 return;
517 }
518
519 while (entry->next) {
520 /* not again, please */
521 if (entry->apic == apic && entry->pin == pin) 504 if (entry->apic == apic && entry->pin == pin)
522 return; 505 return 0;
523 506 last = &entry->next;
524 entry = entry->next;
525 } 507 }
526 508
527 entry->next = get_one_free_irq_2_pin(node); 509 entry = get_one_free_irq_2_pin(node);
528 entry = entry->next; 510 if (!entry) {
511 printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n",
512 node, apic, pin);
513 return -ENOMEM;
514 }
529 entry->apic = apic; 515 entry->apic = apic;
530 entry->pin = pin; 516 entry->pin = pin;
517
518 *last = entry;
519 return 0;
520}
521
522static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin)
523{
524 if (add_pin_to_irq_node_nopanic(cfg, node, apic, pin))
525 panic("IO-APIC: failed to add irq-pin. Can not proceed\n");
531} 526}
532 527
533/* 528/*
534 * Reroute an IRQ to a different pin. 529 * Reroute an IRQ to a different pin.
535 */ 530 */
536static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, 531static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node,
537 int oldapic, int oldpin, 532 int oldapic, int oldpin,
538 int newapic, int newpin) 533 int newapic, int newpin)
539{ 534{
540 struct irq_pin_list *entry = cfg->irq_2_pin; 535 struct irq_pin_list *entry;
541 int replaced = 0;
542 536
543 while (entry) { 537 for_each_irq_pin(entry, cfg->irq_2_pin) {
544 if (entry->apic == oldapic && entry->pin == oldpin) { 538 if (entry->apic == oldapic && entry->pin == oldpin) {
545 entry->apic = newapic; 539 entry->apic = newapic;
546 entry->pin = newpin; 540 entry->pin = newpin;
547 replaced = 1;
548 /* every one is different, right? */ 541 /* every one is different, right? */
549 break; 542 return;
550 } 543 }
551 entry = entry->next;
552 } 544 }
553 545
554 /* why? call replace before add? */ 546 /* old apic/pin didn't exist, so just add new ones */
555 if (!replaced) 547 add_pin_to_irq_node(cfg, node, newapic, newpin);
556 add_pin_to_irq_node(cfg, node, newapic, newpin);
557} 548}
558 549
559static inline void io_apic_modify_irq(struct irq_cfg *cfg, 550static void io_apic_modify_irq(struct irq_cfg *cfg,
560 int mask_and, int mask_or, 551 int mask_and, int mask_or,
561 void (*final)(struct irq_pin_list *entry)) 552 void (*final)(struct irq_pin_list *entry))
562{ 553{
563 int pin; 554 int pin;
564 struct irq_pin_list *entry; 555 struct irq_pin_list *entry;
565 556
566 for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { 557 for_each_irq_pin(entry, cfg->irq_2_pin) {
567 unsigned int reg; 558 unsigned int reg;
568 pin = entry->pin; 559 pin = entry->pin;
569 reg = io_apic_read(entry->apic, 0x10 + pin * 2); 560 reg = io_apic_read(entry->apic, 0x10 + pin * 2);
@@ -580,7 +571,6 @@ static void __unmask_IO_APIC_irq(struct irq_cfg *cfg)
580 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); 571 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL);
581} 572}
582 573
583#ifdef CONFIG_X86_64
584static void io_apic_sync(struct irq_pin_list *entry) 574static void io_apic_sync(struct irq_pin_list *entry)
585{ 575{
586 /* 576 /*
@@ -596,11 +586,6 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
596{ 586{
597 io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); 587 io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync);
598} 588}
599#else /* CONFIG_X86_32 */
600static void __mask_IO_APIC_irq(struct irq_cfg *cfg)
601{
602 io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL);
603}
604 589
605static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) 590static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg)
606{ 591{
@@ -613,7 +598,6 @@ static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg)
613 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 598 io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED,
614 IO_APIC_REDIR_LEVEL_TRIGGER, NULL); 599 IO_APIC_REDIR_LEVEL_TRIGGER, NULL);
615} 600}
616#endif /* CONFIG_X86_32 */
617 601
618static void mask_IO_APIC_irq_desc(struct irq_desc *desc) 602static void mask_IO_APIC_irq_desc(struct irq_desc *desc)
619{ 603{
@@ -1702,12 +1686,8 @@ __apicdebuginit(void) print_IO_APIC(void)
1702 if (!entry) 1686 if (!entry)
1703 continue; 1687 continue;
1704 printk(KERN_DEBUG "IRQ%d ", irq); 1688 printk(KERN_DEBUG "IRQ%d ", irq);
1705 for (;;) { 1689 for_each_irq_pin(entry, cfg->irq_2_pin)
1706 printk("-> %d:%d", entry->apic, entry->pin); 1690 printk("-> %d:%d", entry->apic, entry->pin);
1707 if (!entry->next)
1708 break;
1709 entry = entry->next;
1710 }
1711 printk("\n"); 1691 printk("\n");
1712 } 1692 }
1713 1693
@@ -2211,7 +2191,6 @@ static unsigned int startup_ioapic_irq(unsigned int irq)
2211 return was_pending; 2191 return was_pending;
2212} 2192}
2213 2193
2214#ifdef CONFIG_X86_64
2215static int ioapic_retrigger_irq(unsigned int irq) 2194static int ioapic_retrigger_irq(unsigned int irq)
2216{ 2195{
2217 2196
@@ -2224,14 +2203,6 @@ static int ioapic_retrigger_irq(unsigned int irq)
2224 2203
2225 return 1; 2204 return 1;
2226} 2205}
2227#else
2228static int ioapic_retrigger_irq(unsigned int irq)
2229{
2230 apic->send_IPI_self(irq_cfg(irq)->vector);
2231
2232 return 1;
2233}
2234#endif
2235 2206
2236/* 2207/*
2237 * Level and edge triggered IO-APIC interrupts need different handling, 2208 * Level and edge triggered IO-APIC interrupts need different handling,
@@ -2269,13 +2240,9 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
2269 struct irq_pin_list *entry; 2240 struct irq_pin_list *entry;
2270 u8 vector = cfg->vector; 2241 u8 vector = cfg->vector;
2271 2242
2272 entry = cfg->irq_2_pin; 2243 for_each_irq_pin(entry, cfg->irq_2_pin) {
2273 for (;;) {
2274 unsigned int reg; 2244 unsigned int reg;
2275 2245
2276 if (!entry)
2277 break;
2278
2279 apic = entry->apic; 2246 apic = entry->apic;
2280 pin = entry->pin; 2247 pin = entry->pin;
2281 /* 2248 /*
@@ -2288,9 +2255,6 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq
2288 reg &= ~IO_APIC_REDIR_VECTOR_MASK; 2255 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
2289 reg |= vector; 2256 reg |= vector;
2290 io_apic_modify(apic, 0x10 + pin*2, reg); 2257 io_apic_modify(apic, 0x10 + pin*2, reg);
2291 if (!entry->next)
2292 break;
2293 entry = entry->next;
2294 } 2258 }
2295} 2259}
2296 2260
@@ -2515,11 +2479,8 @@ atomic_t irq_mis_count;
2515static void ack_apic_level(unsigned int irq) 2479static void ack_apic_level(unsigned int irq)
2516{ 2480{
2517 struct irq_desc *desc = irq_to_desc(irq); 2481 struct irq_desc *desc = irq_to_desc(irq);
2518
2519#ifdef CONFIG_X86_32
2520 unsigned long v; 2482 unsigned long v;
2521 int i; 2483 int i;
2522#endif
2523 struct irq_cfg *cfg; 2484 struct irq_cfg *cfg;
2524 int do_unmask_irq = 0; 2485 int do_unmask_irq = 0;
2525 2486
@@ -2532,31 +2493,28 @@ static void ack_apic_level(unsigned int irq)
2532 } 2493 }
2533#endif 2494#endif
2534 2495
2535#ifdef CONFIG_X86_32
2536 /* 2496 /*
2537 * It appears there is an erratum which affects at least version 0x11 2497 * It appears there is an erratum which affects at least version 0x11
2538 * of I/O APIC (that's the 82093AA and cores integrated into various 2498 * of I/O APIC (that's the 82093AA and cores integrated into various
2539 * chipsets). Under certain conditions a level-triggered interrupt is 2499 * chipsets). Under certain conditions a level-triggered interrupt is
2540 * erroneously delivered as edge-triggered one but the respective IRR 2500 * erroneously delivered as edge-triggered one but the respective IRR
2541 * bit gets set nevertheless. As a result the I/O unit expects an EOI 2501 * bit gets set nevertheless. As a result the I/O unit expects an EOI
2542 * message but it will never arrive and further interrupts are blocked 2502 * message but it will never arrive and further interrupts are blocked
2543 * from the source. The exact reason is so far unknown, but the 2503 * from the source. The exact reason is so far unknown, but the
2544 * phenomenon was observed when two consecutive interrupt requests 2504 * phenomenon was observed when two consecutive interrupt requests
2545 * from a given source get delivered to the same CPU and the source is 2505 * from a given source get delivered to the same CPU and the source is
2546 * temporarily disabled in between. 2506 * temporarily disabled in between.
2547 * 2507 *
2548 * A workaround is to simulate an EOI message manually. We achieve it 2508 * A workaround is to simulate an EOI message manually. We achieve it
2549 * by setting the trigger mode to edge and then to level when the edge 2509 * by setting the trigger mode to edge and then to level when the edge
2550 * trigger mode gets detected in the TMR of a local APIC for a 2510 * trigger mode gets detected in the TMR of a local APIC for a
2551 * level-triggered interrupt. We mask the source for the time of the 2511 * level-triggered interrupt. We mask the source for the time of the
2552 * operation to prevent an edge-triggered interrupt escaping meanwhile. 2512 * operation to prevent an edge-triggered interrupt escaping meanwhile.
2553 * The idea is from Manfred Spraul. --macro 2513 * The idea is from Manfred Spraul. --macro
2554 */ 2514 */
2555 cfg = desc->chip_data; 2515 cfg = desc->chip_data;
2556 i = cfg->vector; 2516 i = cfg->vector;
2557
2558 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); 2517 v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1));
2559#endif
2560 2518
2561 /* 2519 /*
2562 * We must acknowledge the irq before we move it or the acknowledge will 2520 * We must acknowledge the irq before we move it or the acknowledge will
@@ -2598,7 +2556,7 @@ static void ack_apic_level(unsigned int irq)
2598 unmask_IO_APIC_irq_desc(desc); 2556 unmask_IO_APIC_irq_desc(desc);
2599 } 2557 }
2600 2558
2601#ifdef CONFIG_X86_32 2559 /* Tail end of version 0x11 I/O APIC bug workaround */
2602 if (!(v & (1 << (i & 0x1f)))) { 2560 if (!(v & (1 << (i & 0x1f)))) {
2603 atomic_inc(&irq_mis_count); 2561 atomic_inc(&irq_mis_count);
2604 spin_lock(&ioapic_lock); 2562 spin_lock(&ioapic_lock);
@@ -2606,26 +2564,15 @@ static void ack_apic_level(unsigned int irq)
2606 __unmask_and_level_IO_APIC_irq(cfg); 2564 __unmask_and_level_IO_APIC_irq(cfg);
2607 spin_unlock(&ioapic_lock); 2565 spin_unlock(&ioapic_lock);
2608 } 2566 }
2609#endif
2610} 2567}
2611 2568
2612#ifdef CONFIG_INTR_REMAP 2569#ifdef CONFIG_INTR_REMAP
2613static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) 2570static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg)
2614{ 2571{
2615 int apic, pin;
2616 struct irq_pin_list *entry; 2572 struct irq_pin_list *entry;
2617 2573
2618 entry = cfg->irq_2_pin; 2574 for_each_irq_pin(entry, cfg->irq_2_pin)
2619 for (;;) { 2575 io_apic_eoi(entry->apic, entry->pin);
2620
2621 if (!entry)
2622 break;
2623
2624 apic = entry->apic;
2625 pin = entry->pin;
2626 io_apic_eoi(apic, pin);
2627 entry = entry->next;
2628 }
2629} 2576}
2630 2577
2631static void 2578static void
@@ -3241,8 +3188,7 @@ void destroy_irq(unsigned int irq)
3241 cfg = desc->chip_data; 3188 cfg = desc->chip_data;
3242 dynamic_irq_cleanup(irq); 3189 dynamic_irq_cleanup(irq);
3243 /* connect back irq_cfg */ 3190 /* connect back irq_cfg */
3244 if (desc) 3191 desc->chip_data = cfg;
3245 desc->chip_data = cfg;
3246 3192
3247 free_irte(irq); 3193 free_irte(irq);
3248 spin_lock_irqsave(&vector_lock, flags); 3194 spin_lock_irqsave(&vector_lock, flags);
@@ -3912,7 +3858,11 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq,
3912 */ 3858 */
3913 if (irq >= NR_IRQS_LEGACY) { 3859 if (irq >= NR_IRQS_LEGACY) {
3914 cfg = desc->chip_data; 3860 cfg = desc->chip_data;
3915 add_pin_to_irq_node(cfg, node, ioapic, pin); 3861 if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) {
3862 printk(KERN_INFO "can not add pin %d for irq %d\n",
3863 pin, irq);
3864 return 0;
3865 }
3916 } 3866 }
3917 3867
3918 setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity); 3868 setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity);
@@ -3941,11 +3891,28 @@ int io_apic_set_pci_routing(struct device *dev, int irq,
3941 return __io_apic_set_pci_routing(dev, irq, irq_attr); 3891 return __io_apic_set_pci_routing(dev, irq, irq_attr);
3942} 3892}
3943 3893
3944/* -------------------------------------------------------------------------- 3894u8 __init io_apic_unique_id(u8 id)
3945 ACPI-based IOAPIC Configuration 3895{
3946 -------------------------------------------------------------------------- */ 3896#ifdef CONFIG_X86_32
3897 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
3898 !APIC_XAPIC(apic_version[boot_cpu_physical_apicid]))
3899 return io_apic_get_unique_id(nr_ioapics, id);
3900 else
3901 return id;
3902#else
3903 int i;
3904 DECLARE_BITMAP(used, 256);
3947 3905
3948#ifdef CONFIG_ACPI 3906 bitmap_zero(used, 256);
3907 for (i = 0; i < nr_ioapics; i++) {
3908 struct mpc_ioapic *ia = &mp_ioapics[i];
3909 __set_bit(ia->apicid, used);
3910 }
3911 if (!test_bit(id, used))
3912 return id;
3913 return find_first_zero_bit(used, 256);
3914#endif
3915}
3949 3916
3950#ifdef CONFIG_X86_32 3917#ifdef CONFIG_X86_32
3951int __init io_apic_get_unique_id(int ioapic, int apic_id) 3918int __init io_apic_get_unique_id(int ioapic, int apic_id)
@@ -4054,8 +4021,6 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
4054 return 0; 4021 return 0;
4055} 4022}
4056 4023
4057#endif /* CONFIG_ACPI */
4058
4059/* 4024/*
4060 * This function currently is only a helper for the i386 smp boot process where 4025 * This function currently is only a helper for the i386 smp boot process where
4061 * we need to reprogram the ioredtbls to cater for the cpus which have come online 4026 * we need to reprogram the ioredtbls to cater for the cpus which have come online
@@ -4109,7 +4074,7 @@ void __init setup_ioapic_dest(void)
4109 4074
4110static struct resource *ioapic_resources; 4075static struct resource *ioapic_resources;
4111 4076
4112static struct resource * __init ioapic_setup_resources(void) 4077static struct resource * __init ioapic_setup_resources(int nr_ioapics)
4113{ 4078{
4114 unsigned long n; 4079 unsigned long n;
4115 struct resource *res; 4080 struct resource *res;
@@ -4125,15 +4090,13 @@ static struct resource * __init ioapic_setup_resources(void)
4125 mem = alloc_bootmem(n); 4090 mem = alloc_bootmem(n);
4126 res = (void *)mem; 4091 res = (void *)mem;
4127 4092
4128 if (mem != NULL) { 4093 mem += sizeof(struct resource) * nr_ioapics;
4129 mem += sizeof(struct resource) * nr_ioapics;
4130 4094
4131 for (i = 0; i < nr_ioapics; i++) { 4095 for (i = 0; i < nr_ioapics; i++) {
4132 res[i].name = mem; 4096 res[i].name = mem;
4133 res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; 4097 res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY;
4134 sprintf(mem, "IOAPIC %u", i); 4098 sprintf(mem, "IOAPIC %u", i);
4135 mem += IOAPIC_RESOURCE_NAME_SIZE; 4099 mem += IOAPIC_RESOURCE_NAME_SIZE;
4136 }
4137 } 4100 }
4138 4101
4139 ioapic_resources = res; 4102 ioapic_resources = res;
@@ -4147,7 +4110,7 @@ void __init ioapic_init_mappings(void)
4147 struct resource *ioapic_res; 4110 struct resource *ioapic_res;
4148 int i; 4111 int i;
4149 4112
4150 ioapic_res = ioapic_setup_resources(); 4113 ioapic_res = ioapic_setup_resources(nr_ioapics);
4151 for (i = 0; i < nr_ioapics; i++) { 4114 for (i = 0; i < nr_ioapics; i++) {
4152 if (smp_found_config) { 4115 if (smp_found_config) {
4153 ioapic_phys = mp_ioapics[i].apicaddr; 4116 ioapic_phys = mp_ioapics[i].apicaddr;
@@ -4176,11 +4139,9 @@ fake_ioapic_page:
4176 __fix_to_virt(idx), ioapic_phys); 4139 __fix_to_virt(idx), ioapic_phys);
4177 idx++; 4140 idx++;
4178 4141
4179 if (ioapic_res != NULL) { 4142 ioapic_res->start = ioapic_phys;
4180 ioapic_res->start = ioapic_phys; 4143 ioapic_res->end = ioapic_phys + (4 * 1024) - 1;
4181 ioapic_res->end = ioapic_phys + (4 * 1024) - 1; 4144 ioapic_res++;
4182 ioapic_res++;
4183 }
4184 } 4145 }
4185} 4146}
4186 4147
@@ -4201,3 +4162,76 @@ void __init ioapic_insert_resources(void)
4201 r++; 4162 r++;
4202 } 4163 }
4203} 4164}
4165
4166int mp_find_ioapic(int gsi)
4167{
4168 int i = 0;
4169
4170 /* Find the IOAPIC that manages this GSI. */
4171 for (i = 0; i < nr_ioapics; i++) {
4172 if ((gsi >= mp_gsi_routing[i].gsi_base)
4173 && (gsi <= mp_gsi_routing[i].gsi_end))
4174 return i;
4175 }
4176
4177 printk(KERN_ERR "ERROR: Unable to locate IOAPIC for GSI %d\n", gsi);
4178 return -1;
4179}
4180
4181int mp_find_ioapic_pin(int ioapic, int gsi)
4182{
4183 if (WARN_ON(ioapic == -1))
4184 return -1;
4185 if (WARN_ON(gsi > mp_gsi_routing[ioapic].gsi_end))
4186 return -1;
4187
4188 return gsi - mp_gsi_routing[ioapic].gsi_base;
4189}
4190
4191static int bad_ioapic(unsigned long address)
4192{
4193 if (nr_ioapics >= MAX_IO_APICS) {
4194 printk(KERN_WARNING "WARING: Max # of I/O APICs (%d) exceeded "
4195 "(found %d), skipping\n", MAX_IO_APICS, nr_ioapics);
4196 return 1;
4197 }
4198 if (!address) {
4199 printk(KERN_WARNING "WARNING: Bogus (zero) I/O APIC address"
4200 " found in table, skipping!\n");
4201 return 1;
4202 }
4203 return 0;
4204}
4205
4206void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
4207{
4208 int idx = 0;
4209
4210 if (bad_ioapic(address))
4211 return;
4212
4213 idx = nr_ioapics;
4214
4215 mp_ioapics[idx].type = MP_IOAPIC;
4216 mp_ioapics[idx].flags = MPC_APIC_USABLE;
4217 mp_ioapics[idx].apicaddr = address;
4218
4219 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
4220 mp_ioapics[idx].apicid = io_apic_unique_id(id);
4221 mp_ioapics[idx].apicver = io_apic_get_version(idx);
4222
4223 /*
4224 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
4225 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
4226 */
4227 mp_gsi_routing[idx].gsi_base = gsi_base;
4228 mp_gsi_routing[idx].gsi_end = gsi_base +
4229 io_apic_get_redir_entries(idx);
4230
4231 printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
4232 "GSI %d-%d\n", idx, mp_ioapics[idx].apicid,
4233 mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr,
4234 mp_gsi_routing[idx].gsi_base, mp_gsi_routing[idx].gsi_end);
4235
4236 nr_ioapics++;
4237}
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
index 6ef00ba4c886..08385e090a6f 100644
--- a/arch/x86/kernel/apic/ipi.c
+++ b/arch/x86/kernel/apic/ipi.c
@@ -153,7 +153,7 @@ int safe_smp_processor_id(void)
153{ 153{
154 int apicid, cpuid; 154 int apicid, cpuid;
155 155
156 if (!boot_cpu_has(X86_FEATURE_APIC)) 156 if (!cpu_has_apic)
157 return 0; 157 return 0;
158 158
159 apicid = hard_smp_processor_id(); 159 apicid = hard_smp_processor_id();
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
index db7220220d09..cb66a22d98ad 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -66,7 +66,7 @@ static inline unsigned int get_nmi_count(int cpu)
66 66
67static inline int mce_in_progress(void) 67static inline int mce_in_progress(void)
68{ 68{
69#if defined(CONFIG_X86_NEW_MCE) 69#if defined(CONFIG_X86_MCE)
70 return atomic_read(&mce_entry) > 0; 70 return atomic_read(&mce_entry) > 0;
71#endif 71#endif
72 return 0; 72 return 0;
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
index fcec2f1d34a1..65edc180fc82 100644
--- a/arch/x86/kernel/apic/probe_64.c
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -55,11 +55,11 @@ static int apicid_phys_pkg_id(int initial_apic_id, int index_msb)
55void __init default_setup_apic_routing(void) 55void __init default_setup_apic_routing(void)
56{ 56{
57#ifdef CONFIG_X86_X2APIC 57#ifdef CONFIG_X86_X2APIC
58 if (x2apic_mode && (apic != &apic_x2apic_phys && 58 if (x2apic_mode
59#ifdef CONFIG_X86_UV 59#ifdef CONFIG_X86_UV
60 apic != &apic_x2apic_uv_x && 60 && apic != &apic_x2apic_uv_x
61#endif 61#endif
62 apic != &apic_x2apic_cluster)) { 62 ) {
63 if (x2apic_phys) 63 if (x2apic_phys)
64 apic = &apic_x2apic_phys; 64 apic = &apic_x2apic_phys;
65 else 65 else
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 442b5508893f..151ace69a5aa 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -403,7 +403,15 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue);
403static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); 403static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue);
404static struct apm_user *user_list; 404static struct apm_user *user_list;
405static DEFINE_SPINLOCK(user_list_lock); 405static DEFINE_SPINLOCK(user_list_lock);
406static const struct desc_struct bad_bios_desc = { { { 0, 0x00409200 } } }; 406
407/*
408 * Set up a segment that references the real mode segment 0x40
409 * that extends up to the end of page zero (that we have reserved).
410 * This is for buggy BIOS's that refer to (real mode) segment 0x40
411 * even though they are called in protected mode.
412 */
413static struct desc_struct bad_bios_desc = GDT_ENTRY_INIT(0x4092,
414 (unsigned long)__va(0x400UL), PAGE_SIZE - 0x400 - 1);
407 415
408static const char driver_version[] = "1.16ac"; /* no spaces */ 416static const char driver_version[] = "1.16ac"; /* no spaces */
409 417
@@ -2332,15 +2340,6 @@ static int __init apm_init(void)
2332 pm_flags |= PM_APM; 2340 pm_flags |= PM_APM;
2333 2341
2334 /* 2342 /*
2335 * Set up a segment that references the real mode segment 0x40
2336 * that extends up to the end of page zero (that we have reserved).
2337 * This is for buggy BIOS's that refer to (real mode) segment 0x40
2338 * even though they are called in protected mode.
2339 */
2340 set_base(bad_bios_desc, __va((unsigned long)0x40 << 4));
2341 _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4));
2342
2343 /*
2344 * Set up the long jump entry point to the APM BIOS, which is called 2343 * Set up the long jump entry point to the APM BIOS, which is called
2345 * from inline assembly. 2344 * from inline assembly.
2346 */ 2345 */
@@ -2358,12 +2357,12 @@ static int __init apm_init(void)
2358 * code to that CPU. 2357 * code to that CPU.
2359 */ 2358 */
2360 gdt = get_cpu_gdt_table(0); 2359 gdt = get_cpu_gdt_table(0);
2361 set_base(gdt[APM_CS >> 3], 2360 set_desc_base(&gdt[APM_CS >> 3],
2362 __va((unsigned long)apm_info.bios.cseg << 4)); 2361 (unsigned long)__va((unsigned long)apm_info.bios.cseg << 4));
2363 set_base(gdt[APM_CS_16 >> 3], 2362 set_desc_base(&gdt[APM_CS_16 >> 3],
2364 __va((unsigned long)apm_info.bios.cseg_16 << 4)); 2363 (unsigned long)__va((unsigned long)apm_info.bios.cseg_16 << 4));
2365 set_base(gdt[APM_DS >> 3], 2364 set_desc_base(&gdt[APM_DS >> 3],
2366 __va((unsigned long)apm_info.bios.dseg << 4)); 2365 (unsigned long)__va((unsigned long)apm_info.bios.dseg << 4));
2367 2366
2368 proc_create("apm", 0, NULL, &apm_file_ops); 2367 proc_create("apm", 0, NULL, &apm_file_ops);
2369 2368
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index c1f253dac155..8dd30638fe44 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -13,7 +13,7 @@ CFLAGS_common.o := $(nostackp)
13 13
14obj-y := intel_cacheinfo.o addon_cpuid_features.o 14obj-y := intel_cacheinfo.o addon_cpuid_features.o
15obj-y += proc.o capflags.o powerflags.o common.o 15obj-y += proc.o capflags.o powerflags.o common.o
16obj-y += vmware.o hypervisor.o 16obj-y += vmware.o hypervisor.o sched.o
17 17
18obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o 18obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o
19obj-$(CONFIG_X86_64) += bugs_64.o 19obj-$(CONFIG_X86_64) += bugs_64.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 63fddcd082cd..f32fa71ccf97 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -2,7 +2,7 @@
2#include <linux/bitops.h> 2#include <linux/bitops.h>
3#include <linux/mm.h> 3#include <linux/mm.h>
4 4
5#include <asm/io.h> 5#include <linux/io.h>
6#include <asm/processor.h> 6#include <asm/processor.h>
7#include <asm/apic.h> 7#include <asm/apic.h>
8#include <asm/cpu.h> 8#include <asm/cpu.h>
@@ -45,8 +45,8 @@ static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c)
45#define CBAR_ENB (0x80000000) 45#define CBAR_ENB (0x80000000)
46#define CBAR_KEY (0X000000CB) 46#define CBAR_KEY (0X000000CB)
47 if (c->x86_model == 9 || c->x86_model == 10) { 47 if (c->x86_model == 9 || c->x86_model == 10) {
48 if (inl (CBAR) & CBAR_ENB) 48 if (inl(CBAR) & CBAR_ENB)
49 outl (0 | CBAR_KEY, CBAR); 49 outl(0 | CBAR_KEY, CBAR);
50 } 50 }
51} 51}
52 52
@@ -87,9 +87,10 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
87 d = d2-d; 87 d = d2-d;
88 88
89 if (d > 20*K6_BUG_LOOP) 89 if (d > 20*K6_BUG_LOOP)
90 printk("system stability may be impaired when more than 32 MB are used.\n"); 90 printk(KERN_CONT
91 "system stability may be impaired when more than 32 MB are used.\n");
91 else 92 else
92 printk("probably OK (after B9730xxxx).\n"); 93 printk(KERN_CONT "probably OK (after B9730xxxx).\n");
93 printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); 94 printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n");
94 } 95 }
95 96
@@ -219,8 +220,9 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
219 if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { 220 if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
220 rdmsr(MSR_K7_CLK_CTL, l, h); 221 rdmsr(MSR_K7_CLK_CTL, l, h);
221 if ((l & 0xfff00000) != 0x20000000) { 222 if ((l & 0xfff00000) != 0x20000000) {
222 printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, 223 printk(KERN_INFO
223 ((l & 0x000fffff)|0x20000000)); 224 "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n",
225 l, ((l & 0x000fffff)|0x20000000));
224 wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); 226 wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
225 } 227 }
226 } 228 }
@@ -251,6 +253,64 @@ static int __cpuinit nearby_node(int apicid)
251#endif 253#endif
252 254
253/* 255/*
256 * Fixup core topology information for AMD multi-node processors.
257 * Assumption 1: Number of cores in each internal node is the same.
258 * Assumption 2: Mixed systems with both single-node and dual-node
259 * processors are not supported.
260 */
261#ifdef CONFIG_X86_HT
262static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c)
263{
264#ifdef CONFIG_PCI
265 u32 t, cpn;
266 u8 n, n_id;
267 int cpu = smp_processor_id();
268
269 /* fixup topology information only once for a core */
270 if (cpu_has(c, X86_FEATURE_AMD_DCM))
271 return;
272
273 /* check for multi-node processor on boot cpu */
274 t = read_pci_config(0, 24, 3, 0xe8);
275 if (!(t & (1 << 29)))
276 return;
277
278 set_cpu_cap(c, X86_FEATURE_AMD_DCM);
279
280 /* cores per node: each internal node has half the number of cores */
281 cpn = c->x86_max_cores >> 1;
282
283 /* even-numbered NB_id of this dual-node processor */
284 n = c->phys_proc_id << 1;
285
286 /*
287 * determine internal node id and assign cores fifty-fifty to
288 * each node of the dual-node processor
289 */
290 t = read_pci_config(0, 24 + n, 3, 0xe8);
291 n = (t>>30) & 0x3;
292 if (n == 0) {
293 if (c->cpu_core_id < cpn)
294 n_id = 0;
295 else
296 n_id = 1;
297 } else {
298 if (c->cpu_core_id < cpn)
299 n_id = 1;
300 else
301 n_id = 0;
302 }
303
304 /* compute entire NodeID, use llc_shared_map to store sibling info */
305 per_cpu(cpu_llc_id, cpu) = (c->phys_proc_id << 1) + n_id;
306
307 /* fixup core id to be in range from 0 to cpn */
308 c->cpu_core_id = c->cpu_core_id % cpn;
309#endif
310}
311#endif
312
313/*
254 * On a AMD dual core setup the lower bits of the APIC id distingush the cores. 314 * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
255 * Assumes number of cores is a power of two. 315 * Assumes number of cores is a power of two.
256 */ 316 */
@@ -267,17 +327,31 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
267 c->phys_proc_id = c->initial_apicid >> bits; 327 c->phys_proc_id = c->initial_apicid >> bits;
268 /* use socket ID also for last level cache */ 328 /* use socket ID also for last level cache */
269 per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; 329 per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
330 /* fixup topology information on multi-node processors */
331 if ((c->x86 == 0x10) && (c->x86_model == 9))
332 amd_fixup_dcm(c);
270#endif 333#endif
271} 334}
272 335
336int amd_get_nb_id(int cpu)
337{
338 int id = 0;
339#ifdef CONFIG_SMP
340 id = per_cpu(cpu_llc_id, cpu);
341#endif
342 return id;
343}
344EXPORT_SYMBOL_GPL(amd_get_nb_id);
345
273static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) 346static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
274{ 347{
275#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 348#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
276 int cpu = smp_processor_id(); 349 int cpu = smp_processor_id();
277 int node; 350 int node;
278 unsigned apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid; 351 unsigned apicid = c->apicid;
352
353 node = per_cpu(cpu_llc_id, cpu);
279 354
280 node = c->phys_proc_id;
281 if (apicid_to_node[apicid] != NUMA_NO_NODE) 355 if (apicid_to_node[apicid] != NUMA_NO_NODE)
282 node = apicid_to_node[apicid]; 356 node = apicid_to_node[apicid];
283 if (!node_online(node)) { 357 if (!node_online(node)) {
@@ -398,18 +472,30 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
398 u32 level; 472 u32 level;
399 473
400 level = cpuid_eax(1); 474 level = cpuid_eax(1);
401 if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) 475 if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
402 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 476 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
403 477
404 /* 478 /*
405 * Some BIOSes incorrectly force this feature, but only K8 479 * Some BIOSes incorrectly force this feature, but only K8
406 * revision D (model = 0x14) and later actually support it. 480 * revision D (model = 0x14) and later actually support it.
481 * (AMD Erratum #110, docId: 25759).
407 */ 482 */
408 if (c->x86_model < 0x14) 483 if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) {
484 u64 val;
485
409 clear_cpu_cap(c, X86_FEATURE_LAHF_LM); 486 clear_cpu_cap(c, X86_FEATURE_LAHF_LM);
487 if (!rdmsrl_amd_safe(0xc001100d, &val)) {
488 val &= ~(1ULL << 32);
489 wrmsrl_amd_safe(0xc001100d, val);
490 }
491 }
492
410 } 493 }
411 if (c->x86 == 0x10 || c->x86 == 0x11) 494 if (c->x86 == 0x10 || c->x86 == 0x11)
412 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 495 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
496
497 /* get apicid instead of initial apic id from cpuid */
498 c->apicid = hard_smp_processor_id();
413#else 499#else
414 500
415 /* 501 /*
@@ -494,27 +580,30 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
494 * benefit in doing so. 580 * benefit in doing so.
495 */ 581 */
496 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { 582 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
497 printk(KERN_DEBUG "tseg: %010llx\n", tseg); 583 printk(KERN_DEBUG "tseg: %010llx\n", tseg);
498 if ((tseg>>PMD_SHIFT) < 584 if ((tseg>>PMD_SHIFT) <
499 (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || 585 (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
500 ((tseg>>PMD_SHIFT) < 586 ((tseg>>PMD_SHIFT) <
501 (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && 587 (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
502 (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) 588 (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
503 set_memory_4k((unsigned long)__va(tseg), 1); 589 set_memory_4k((unsigned long)__va(tseg), 1);
504 } 590 }
505 } 591 }
506#endif 592#endif
507} 593}
508 594
509#ifdef CONFIG_X86_32 595#ifdef CONFIG_X86_32
510static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) 596static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c,
597 unsigned int size)
511{ 598{
512 /* AMD errata T13 (order #21922) */ 599 /* AMD errata T13 (order #21922) */
513 if ((c->x86 == 6)) { 600 if ((c->x86 == 6)) {
514 if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ 601 /* Duron Rev A0 */
602 if (c->x86_model == 3 && c->x86_mask == 0)
515 size = 64; 603 size = 64;
604 /* Tbird rev A1/A2 */
516 if (c->x86_model == 4 && 605 if (c->x86_model == 4 &&
517 (c->x86_mask == 0 || c->x86_mask == 1)) /* Tbird rev A1/A2 */ 606 (c->x86_mask == 0 || c->x86_mask == 1))
518 size = 256; 607 size = 256;
519 } 608 }
520 return size; 609 return size;
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index c8e315f1aa83..01a265212395 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -81,7 +81,7 @@ static void __init check_fpu(void)
81 81
82 boot_cpu_data.fdiv_bug = fdiv_bug; 82 boot_cpu_data.fdiv_bug = fdiv_bug;
83 if (boot_cpu_data.fdiv_bug) 83 if (boot_cpu_data.fdiv_bug)
84 printk("Hmm, FPU with FDIV bug.\n"); 84 printk(KERN_WARNING "Hmm, FPU with FDIV bug.\n");
85} 85}
86 86
87static void __init check_hlt(void) 87static void __init check_hlt(void)
@@ -98,7 +98,7 @@ static void __init check_hlt(void)
98 halt(); 98 halt();
99 halt(); 99 halt();
100 halt(); 100 halt();
101 printk("OK.\n"); 101 printk(KERN_CONT "OK.\n");
102} 102}
103 103
104/* 104/*
@@ -122,9 +122,9 @@ static void __init check_popad(void)
122 * CPU hard. Too bad. 122 * CPU hard. Too bad.
123 */ 123 */
124 if (res != 12345678) 124 if (res != 12345678)
125 printk("Buggy.\n"); 125 printk(KERN_CONT "Buggy.\n");
126 else 126 else
127 printk("OK.\n"); 127 printk(KERN_CONT "OK.\n");
128#endif 128#endif
129} 129}
130 130
@@ -156,7 +156,7 @@ void __init check_bugs(void)
156{ 156{
157 identify_boot_cpu(); 157 identify_boot_cpu();
158#ifndef CONFIG_SMP 158#ifndef CONFIG_SMP
159 printk("CPU: "); 159 printk(KERN_INFO "CPU: ");
160 print_cpu_info(&boot_cpu_data); 160 print_cpu_info(&boot_cpu_data);
161#endif 161#endif
162 check_config(); 162 check_config();
diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c
index 9a3ed0649d4e..04f0fe5af83e 100644
--- a/arch/x86/kernel/cpu/bugs_64.c
+++ b/arch/x86/kernel/cpu/bugs_64.c
@@ -15,7 +15,7 @@ void __init check_bugs(void)
15{ 15{
16 identify_boot_cpu(); 16 identify_boot_cpu();
17#if !defined(CONFIG_SMP) 17#if !defined(CONFIG_SMP)
18 printk("CPU: "); 18 printk(KERN_INFO "CPU: ");
19 print_cpu_info(&boot_cpu_data); 19 print_cpu_info(&boot_cpu_data);
20#endif 20#endif
21 alternative_instructions(); 21 alternative_instructions();
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 5ce60a88027b..2055fc2b2e6b 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -18,8 +18,8 @@
18#include <asm/hypervisor.h> 18#include <asm/hypervisor.h>
19#include <asm/processor.h> 19#include <asm/processor.h>
20#include <asm/sections.h> 20#include <asm/sections.h>
21#include <asm/topology.h> 21#include <linux/topology.h>
22#include <asm/cpumask.h> 22#include <linux/cpumask.h>
23#include <asm/pgtable.h> 23#include <asm/pgtable.h>
24#include <asm/atomic.h> 24#include <asm/atomic.h>
25#include <asm/proto.h> 25#include <asm/proto.h>
@@ -28,13 +28,13 @@
28#include <asm/desc.h> 28#include <asm/desc.h>
29#include <asm/i387.h> 29#include <asm/i387.h>
30#include <asm/mtrr.h> 30#include <asm/mtrr.h>
31#include <asm/numa.h> 31#include <linux/numa.h>
32#include <asm/asm.h> 32#include <asm/asm.h>
33#include <asm/cpu.h> 33#include <asm/cpu.h>
34#include <asm/mce.h> 34#include <asm/mce.h>
35#include <asm/msr.h> 35#include <asm/msr.h>
36#include <asm/pat.h> 36#include <asm/pat.h>
37#include <asm/smp.h> 37#include <linux/smp.h>
38 38
39#ifdef CONFIG_X86_LOCAL_APIC 39#ifdef CONFIG_X86_LOCAL_APIC
40#include <asm/uv/uv.h> 40#include <asm/uv/uv.h>
@@ -94,45 +94,45 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
94 * TLS descriptors are currently at a different place compared to i386. 94 * TLS descriptors are currently at a different place compared to i386.
95 * Hopefully nobody expects them at a fixed place (Wine?) 95 * Hopefully nobody expects them at a fixed place (Wine?)
96 */ 96 */
97 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, 97 [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff),
98 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, 98 [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff),
99 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, 99 [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff),
100 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, 100 [GDT_ENTRY_DEFAULT_USER32_CS] = GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff),
101 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, 101 [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff),
102 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, 102 [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff),
103#else 103#else
104 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, 104 [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xc09a, 0, 0xfffff),
105 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, 105 [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
106 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, 106 [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff),
107 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } }, 107 [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff),
108 /* 108 /*
109 * Segments used for calling PnP BIOS have byte granularity. 109 * Segments used for calling PnP BIOS have byte granularity.
110 * They code segments and data segments have fixed 64k limits, 110 * They code segments and data segments have fixed 64k limits,
111 * the transfer segment sizes are set at run time. 111 * the transfer segment sizes are set at run time.
112 */ 112 */
113 /* 32-bit code */ 113 /* 32-bit code */
114 [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, 114 [GDT_ENTRY_PNPBIOS_CS32] = GDT_ENTRY_INIT(0x409a, 0, 0xffff),
115 /* 16-bit code */ 115 /* 16-bit code */
116 [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, 116 [GDT_ENTRY_PNPBIOS_CS16] = GDT_ENTRY_INIT(0x009a, 0, 0xffff),
117 /* 16-bit data */ 117 /* 16-bit data */
118 [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, 118 [GDT_ENTRY_PNPBIOS_DS] = GDT_ENTRY_INIT(0x0092, 0, 0xffff),
119 /* 16-bit data */ 119 /* 16-bit data */
120 [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, 120 [GDT_ENTRY_PNPBIOS_TS1] = GDT_ENTRY_INIT(0x0092, 0, 0),
121 /* 16-bit data */ 121 /* 16-bit data */
122 [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, 122 [GDT_ENTRY_PNPBIOS_TS2] = GDT_ENTRY_INIT(0x0092, 0, 0),
123 /* 123 /*
124 * The APM segments have byte granularity and their bases 124 * The APM segments have byte granularity and their bases
125 * are set at run time. All have 64k limits. 125 * are set at run time. All have 64k limits.
126 */ 126 */
127 /* 32-bit code */ 127 /* 32-bit code */
128 [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, 128 [GDT_ENTRY_APMBIOS_BASE] = GDT_ENTRY_INIT(0x409a, 0, 0xffff),
129 /* 16-bit code */ 129 /* 16-bit code */
130 [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, 130 [GDT_ENTRY_APMBIOS_BASE+1] = GDT_ENTRY_INIT(0x009a, 0, 0xffff),
131 /* data */ 131 /* data */
132 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, 132 [GDT_ENTRY_APMBIOS_BASE+2] = GDT_ENTRY_INIT(0x4092, 0, 0xffff),
133 133
134 [GDT_ENTRY_ESPFIX_SS] = { { { 0x0000ffff, 0x00cf9200 } } }, 134 [GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
135 [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, 135 [GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff),
136 GDT_STACK_CANARY_INIT 136 GDT_STACK_CANARY_INIT
137#endif 137#endif
138} }; 138} };
@@ -982,18 +982,26 @@ static __init int setup_disablecpuid(char *arg)
982__setup("clearcpuid=", setup_disablecpuid); 982__setup("clearcpuid=", setup_disablecpuid);
983 983
984#ifdef CONFIG_X86_64 984#ifdef CONFIG_X86_64
985struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; 985struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table };
986 986
987DEFINE_PER_CPU_FIRST(union irq_stack_union, 987DEFINE_PER_CPU_FIRST(union irq_stack_union,
988 irq_stack_union) __aligned(PAGE_SIZE); 988 irq_stack_union) __aligned(PAGE_SIZE);
989 989
990DEFINE_PER_CPU(char *, irq_stack_ptr) = 990/*
991 init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; 991 * The following four percpu variables are hot. Align current_task to
992 * cacheline size such that all four fall in the same cacheline.
993 */
994DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
995 &init_task;
996EXPORT_PER_CPU_SYMBOL(current_task);
992 997
993DEFINE_PER_CPU(unsigned long, kernel_stack) = 998DEFINE_PER_CPU(unsigned long, kernel_stack) =
994 (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; 999 (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
995EXPORT_PER_CPU_SYMBOL(kernel_stack); 1000EXPORT_PER_CPU_SYMBOL(kernel_stack);
996 1001
1002DEFINE_PER_CPU(char *, irq_stack_ptr) =
1003 init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
1004
997DEFINE_PER_CPU(unsigned int, irq_count) = -1; 1005DEFINE_PER_CPU(unsigned int, irq_count) = -1;
998 1006
999/* 1007/*
@@ -1008,8 +1016,7 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
1008}; 1016};
1009 1017
1010static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks 1018static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
1011 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) 1019 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
1012 __aligned(PAGE_SIZE);
1013 1020
1014/* May not be marked __init: used by software suspend */ 1021/* May not be marked __init: used by software suspend */
1015void syscall_init(void) 1022void syscall_init(void)
@@ -1042,8 +1049,11 @@ DEFINE_PER_CPU(struct orig_ist, orig_ist);
1042 1049
1043#else /* CONFIG_X86_64 */ 1050#else /* CONFIG_X86_64 */
1044 1051
1052DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
1053EXPORT_PER_CPU_SYMBOL(current_task);
1054
1045#ifdef CONFIG_CC_STACKPROTECTOR 1055#ifdef CONFIG_CC_STACKPROTECTOR
1046DEFINE_PER_CPU(unsigned long, stack_canary); 1056DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
1047#endif 1057#endif
1048 1058
1049/* Make sure %fs and %gs are initialized properly in idle threads */ 1059/* Make sure %fs and %gs are initialized properly in idle threads */
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c
index 6b2a52dd0403..dca325c03999 100644
--- a/arch/x86/kernel/cpu/cpu_debug.c
+++ b/arch/x86/kernel/cpu/cpu_debug.c
@@ -30,8 +30,8 @@
30#include <asm/apic.h> 30#include <asm/apic.h>
31#include <asm/desc.h> 31#include <asm/desc.h>
32 32
33static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]); 33static DEFINE_PER_CPU(struct cpu_cpuX_base [CPU_REG_ALL_BIT], cpu_arr);
34static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]); 34static DEFINE_PER_CPU(struct cpu_private * [MAX_CPU_FILES], priv_arr);
35static DEFINE_PER_CPU(int, cpu_priv_count); 35static DEFINE_PER_CPU(int, cpu_priv_count);
36 36
37static DEFINE_MUTEX(cpu_debug_lock); 37static DEFINE_MUTEX(cpu_debug_lock);
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index ae9b503220ca..4109679863c1 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -60,7 +60,6 @@ enum {
60}; 60};
61 61
62#define INTEL_MSR_RANGE (0xffff) 62#define INTEL_MSR_RANGE (0xffff)
63#define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1)
64 63
65struct acpi_cpufreq_data { 64struct acpi_cpufreq_data {
66 struct acpi_processor_performance *acpi_data; 65 struct acpi_processor_performance *acpi_data;
@@ -71,11 +70,7 @@ struct acpi_cpufreq_data {
71 70
72static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); 71static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data);
73 72
74struct acpi_msr_data { 73static DEFINE_PER_CPU(struct aperfmperf, old_perf);
75 u64 saved_aperf, saved_mperf;
76};
77
78static DEFINE_PER_CPU(struct acpi_msr_data, msr_data);
79 74
80DEFINE_TRACE(power_mark); 75DEFINE_TRACE(power_mark);
81 76
@@ -244,23 +239,12 @@ static u32 get_cur_val(const struct cpumask *mask)
244 return cmd.val; 239 return cmd.val;
245} 240}
246 241
247struct perf_pair {
248 union {
249 struct {
250 u32 lo;
251 u32 hi;
252 } split;
253 u64 whole;
254 } aperf, mperf;
255};
256
257/* Called via smp_call_function_single(), on the target CPU */ 242/* Called via smp_call_function_single(), on the target CPU */
258static void read_measured_perf_ctrs(void *_cur) 243static void read_measured_perf_ctrs(void *_cur)
259{ 244{
260 struct perf_pair *cur = _cur; 245 struct aperfmperf *am = _cur;
261 246
262 rdmsr(MSR_IA32_APERF, cur->aperf.split.lo, cur->aperf.split.hi); 247 get_aperfmperf(am);
263 rdmsr(MSR_IA32_MPERF, cur->mperf.split.lo, cur->mperf.split.hi);
264} 248}
265 249
266/* 250/*
@@ -279,63 +263,17 @@ static void read_measured_perf_ctrs(void *_cur)
279static unsigned int get_measured_perf(struct cpufreq_policy *policy, 263static unsigned int get_measured_perf(struct cpufreq_policy *policy,
280 unsigned int cpu) 264 unsigned int cpu)
281{ 265{
282 struct perf_pair readin, cur; 266 struct aperfmperf perf;
283 unsigned int perf_percent; 267 unsigned long ratio;
284 unsigned int retval; 268 unsigned int retval;
285 269
286 if (smp_call_function_single(cpu, read_measured_perf_ctrs, &readin, 1)) 270 if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1))
287 return 0; 271 return 0;
288 272
289 cur.aperf.whole = readin.aperf.whole - 273 ratio = calc_aperfmperf_ratio(&per_cpu(old_perf, cpu), &perf);
290 per_cpu(msr_data, cpu).saved_aperf; 274 per_cpu(old_perf, cpu) = perf;
291 cur.mperf.whole = readin.mperf.whole -
292 per_cpu(msr_data, cpu).saved_mperf;
293 per_cpu(msr_data, cpu).saved_aperf = readin.aperf.whole;
294 per_cpu(msr_data, cpu).saved_mperf = readin.mperf.whole;
295
296#ifdef __i386__
297 /*
298 * We dont want to do 64 bit divide with 32 bit kernel
299 * Get an approximate value. Return failure in case we cannot get
300 * an approximate value.
301 */
302 if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) {
303 int shift_count;
304 u32 h;
305
306 h = max_t(u32, cur.aperf.split.hi, cur.mperf.split.hi);
307 shift_count = fls(h);
308
309 cur.aperf.whole >>= shift_count;
310 cur.mperf.whole >>= shift_count;
311 }
312
313 if (((unsigned long)(-1) / 100) < cur.aperf.split.lo) {
314 int shift_count = 7;
315 cur.aperf.split.lo >>= shift_count;
316 cur.mperf.split.lo >>= shift_count;
317 }
318
319 if (cur.aperf.split.lo && cur.mperf.split.lo)
320 perf_percent = (cur.aperf.split.lo * 100) / cur.mperf.split.lo;
321 else
322 perf_percent = 0;
323 275
324#else 276 retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT;
325 if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) {
326 int shift_count = 7;
327 cur.aperf.whole >>= shift_count;
328 cur.mperf.whole >>= shift_count;
329 }
330
331 if (cur.aperf.whole && cur.mperf.whole)
332 perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole;
333 else
334 perf_percent = 0;
335
336#endif
337
338 retval = (policy->cpuinfo.max_freq * perf_percent) / 100;
339 277
340 return retval; 278 return retval;
341} 279}
@@ -731,12 +669,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
731 acpi_processor_notify_smm(THIS_MODULE); 669 acpi_processor_notify_smm(THIS_MODULE);
732 670
733 /* Check for APERF/MPERF support in hardware */ 671 /* Check for APERF/MPERF support in hardware */
734 if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) { 672 if (cpu_has(c, X86_FEATURE_APERFMPERF))
735 unsigned int ecx; 673 acpi_cpufreq_driver.getavg = get_measured_perf;
736 ecx = cpuid_ecx(6);
737 if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY)
738 acpi_cpufreq_driver.getavg = get_measured_perf;
739 }
740 674
741 dprintk("CPU%u - ACPI performance management activated.\n", cpu); 675 dprintk("CPU%u - ACPI performance management activated.\n", cpu);
742 for (i = 0; i < perf->state_count; i++) 676 for (i = 0; i < perf->state_count; i++)
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 593171e967ef..19807b89f058 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -3,10 +3,10 @@
3#include <linux/delay.h> 3#include <linux/delay.h>
4#include <linux/pci.h> 4#include <linux/pci.h>
5#include <asm/dma.h> 5#include <asm/dma.h>
6#include <asm/io.h> 6#include <linux/io.h>
7#include <asm/processor-cyrix.h> 7#include <asm/processor-cyrix.h>
8#include <asm/processor-flags.h> 8#include <asm/processor-flags.h>
9#include <asm/timer.h> 9#include <linux/timer.h>
10#include <asm/pci-direct.h> 10#include <asm/pci-direct.h>
11#include <asm/tsc.h> 11#include <asm/tsc.h>
12 12
@@ -282,7 +282,8 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
282 * The 5510/5520 companion chips have a funky PIT. 282 * The 5510/5520 companion chips have a funky PIT.
283 */ 283 */
284 if (vendor == PCI_VENDOR_ID_CYRIX && 284 if (vendor == PCI_VENDOR_ID_CYRIX &&
285 (device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520)) 285 (device == PCI_DEVICE_ID_CYRIX_5510 ||
286 device == PCI_DEVICE_ID_CYRIX_5520))
286 mark_tsc_unstable("cyrix 5510/5520 detected"); 287 mark_tsc_unstable("cyrix 5510/5520 detected");
287 } 288 }
288#endif 289#endif
@@ -299,7 +300,8 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
299 * ? : 0x7x 300 * ? : 0x7x
300 * GX1 : 0x8x GX1 datasheet 56 301 * GX1 : 0x8x GX1 datasheet 56
301 */ 302 */
302 if ((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <= dir1 && dir1 <= 0x8f)) 303 if ((0x30 <= dir1 && dir1 <= 0x6f) ||
304 (0x80 <= dir1 && dir1 <= 0x8f))
303 geode_configure(); 305 geode_configure();
304 return; 306 return;
305 } else { /* MediaGX */ 307 } else { /* MediaGX */
@@ -427,9 +429,12 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c)
427 printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); 429 printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n");
428 local_irq_save(flags); 430 local_irq_save(flags);
429 ccr3 = getCx86(CX86_CCR3); 431 ccr3 = getCx86(CX86_CCR3);
430 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ 432 /* enable MAPEN */
431 setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); /* enable cpuid */ 433 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);
432 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ 434 /* enable cpuid */
435 setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80);
436 /* disable MAPEN */
437 setCx86(CX86_CCR3, ccr3);
433 local_irq_restore(flags); 438 local_irq_restore(flags);
434 } 439 }
435 } 440 }
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c
index fb5b86af0b01..93ba8eeb100a 100644
--- a/arch/x86/kernel/cpu/hypervisor.c
+++ b/arch/x86/kernel/cpu/hypervisor.c
@@ -28,11 +28,10 @@
28static inline void __cpuinit 28static inline void __cpuinit
29detect_hypervisor_vendor(struct cpuinfo_x86 *c) 29detect_hypervisor_vendor(struct cpuinfo_x86 *c)
30{ 30{
31 if (vmware_platform()) { 31 if (vmware_platform())
32 c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE; 32 c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE;
33 } else { 33 else
34 c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE; 34 c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE;
35 }
36} 35}
37 36
38unsigned long get_hypervisor_tsc_freq(void) 37unsigned long get_hypervisor_tsc_freq(void)
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 3260ab044996..40e1835b35e8 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -7,17 +7,17 @@
7#include <linux/sched.h> 7#include <linux/sched.h>
8#include <linux/thread_info.h> 8#include <linux/thread_info.h>
9#include <linux/module.h> 9#include <linux/module.h>
10#include <linux/uaccess.h>
10 11
11#include <asm/processor.h> 12#include <asm/processor.h>
12#include <asm/pgtable.h> 13#include <asm/pgtable.h>
13#include <asm/msr.h> 14#include <asm/msr.h>
14#include <asm/uaccess.h>
15#include <asm/ds.h> 15#include <asm/ds.h>
16#include <asm/bugs.h> 16#include <asm/bugs.h>
17#include <asm/cpu.h> 17#include <asm/cpu.h>
18 18
19#ifdef CONFIG_X86_64 19#ifdef CONFIG_X86_64
20#include <asm/topology.h> 20#include <linux/topology.h>
21#include <asm/numa_64.h> 21#include <asm/numa_64.h>
22#endif 22#endif
23 23
@@ -174,7 +174,8 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
174#ifdef CONFIG_X86_F00F_BUG 174#ifdef CONFIG_X86_F00F_BUG
175 /* 175 /*
176 * All current models of Pentium and Pentium with MMX technology CPUs 176 * All current models of Pentium and Pentium with MMX technology CPUs
177 * have the F0 0F bug, which lets nonprivileged users lock up the system. 177 * have the F0 0F bug, which lets nonprivileged users lock up the
178 * system.
178 * Note that the workaround only should be initialized once... 179 * Note that the workaround only should be initialized once...
179 */ 180 */
180 c->f00f_bug = 0; 181 c->f00f_bug = 0;
@@ -207,7 +208,7 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
207 printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); 208 printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n");
208 printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); 209 printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n");
209 lo |= MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE; 210 lo |= MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE;
210 wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); 211 wrmsr(MSR_IA32_MISC_ENABLE, lo, hi);
211 } 212 }
212 } 213 }
213 214
@@ -283,7 +284,7 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
283 /* Intel has a non-standard dependency on %ecx for this CPUID level. */ 284 /* Intel has a non-standard dependency on %ecx for this CPUID level. */
284 cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); 285 cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
285 if (eax & 0x1f) 286 if (eax & 0x1f)
286 return ((eax >> 26) + 1); 287 return (eax >> 26) + 1;
287 else 288 else
288 return 1; 289 return 1;
289} 290}
@@ -349,6 +350,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
349 set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); 350 set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
350 } 351 }
351 352
353 if (c->cpuid_level > 6) {
354 unsigned ecx = cpuid_ecx(6);
355 if (ecx & 0x01)
356 set_cpu_cap(c, X86_FEATURE_APERFMPERF);
357 }
358
352 if (cpu_has_xmm2) 359 if (cpu_has_xmm2)
353 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); 360 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
354 if (cpu_has_ds) { 361 if (cpu_has_ds) {
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 789efe217e1a..804c40e2bc3e 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * Changes: 4 * Changes:
5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4) 5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. 6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
7 * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. 7 * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
8 */ 8 */
9 9
@@ -16,7 +16,7 @@
16#include <linux/pci.h> 16#include <linux/pci.h>
17 17
18#include <asm/processor.h> 18#include <asm/processor.h>
19#include <asm/smp.h> 19#include <linux/smp.h>
20#include <asm/k8.h> 20#include <asm/k8.h>
21 21
22#define LVL_1_INST 1 22#define LVL_1_INST 1
@@ -25,14 +25,15 @@
25#define LVL_3 4 25#define LVL_3 4
26#define LVL_TRACE 5 26#define LVL_TRACE 5
27 27
28struct _cache_table 28struct _cache_table {
29{
30 unsigned char descriptor; 29 unsigned char descriptor;
31 char cache_type; 30 char cache_type;
32 short size; 31 short size;
33}; 32};
34 33
35/* all the cache descriptor types we care about (no TLB or trace cache entries) */ 34/* All the cache descriptor types we care about (no TLB or
35 trace cache entries) */
36
36static const struct _cache_table __cpuinitconst cache_table[] = 37static const struct _cache_table __cpuinitconst cache_table[] =
37{ 38{
38 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ 39 { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */
@@ -105,8 +106,7 @@ static const struct _cache_table __cpuinitconst cache_table[] =
105}; 106};
106 107
107 108
108enum _cache_type 109enum _cache_type {
109{
110 CACHE_TYPE_NULL = 0, 110 CACHE_TYPE_NULL = 0,
111 CACHE_TYPE_DATA = 1, 111 CACHE_TYPE_DATA = 1,
112 CACHE_TYPE_INST = 2, 112 CACHE_TYPE_INST = 2,
@@ -170,31 +170,31 @@ unsigned short num_cache_leaves;
170 Maybe later */ 170 Maybe later */
171union l1_cache { 171union l1_cache {
172 struct { 172 struct {
173 unsigned line_size : 8; 173 unsigned line_size:8;
174 unsigned lines_per_tag : 8; 174 unsigned lines_per_tag:8;
175 unsigned assoc : 8; 175 unsigned assoc:8;
176 unsigned size_in_kb : 8; 176 unsigned size_in_kb:8;
177 }; 177 };
178 unsigned val; 178 unsigned val;
179}; 179};
180 180
181union l2_cache { 181union l2_cache {
182 struct { 182 struct {
183 unsigned line_size : 8; 183 unsigned line_size:8;
184 unsigned lines_per_tag : 4; 184 unsigned lines_per_tag:4;
185 unsigned assoc : 4; 185 unsigned assoc:4;
186 unsigned size_in_kb : 16; 186 unsigned size_in_kb:16;
187 }; 187 };
188 unsigned val; 188 unsigned val;
189}; 189};
190 190
191union l3_cache { 191union l3_cache {
192 struct { 192 struct {
193 unsigned line_size : 8; 193 unsigned line_size:8;
194 unsigned lines_per_tag : 4; 194 unsigned lines_per_tag:4;
195 unsigned assoc : 4; 195 unsigned assoc:4;
196 unsigned res : 2; 196 unsigned res:2;
197 unsigned size_encoded : 14; 197 unsigned size_encoded:14;
198 }; 198 };
199 unsigned val; 199 unsigned val;
200}; 200};
@@ -241,7 +241,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
241 case 0: 241 case 0:
242 if (!l1->val) 242 if (!l1->val)
243 return; 243 return;
244 assoc = l1->assoc; 244 assoc = assocs[l1->assoc];
245 line_size = l1->line_size; 245 line_size = l1->line_size;
246 lines_per_tag = l1->lines_per_tag; 246 lines_per_tag = l1->lines_per_tag;
247 size_in_kb = l1->size_in_kb; 247 size_in_kb = l1->size_in_kb;
@@ -249,7 +249,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
249 case 2: 249 case 2:
250 if (!l2.val) 250 if (!l2.val)
251 return; 251 return;
252 assoc = l2.assoc; 252 assoc = assocs[l2.assoc];
253 line_size = l2.line_size; 253 line_size = l2.line_size;
254 lines_per_tag = l2.lines_per_tag; 254 lines_per_tag = l2.lines_per_tag;
255 /* cpu_data has errata corrections for K7 applied */ 255 /* cpu_data has errata corrections for K7 applied */
@@ -258,10 +258,14 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
258 case 3: 258 case 3:
259 if (!l3.val) 259 if (!l3.val)
260 return; 260 return;
261 assoc = l3.assoc; 261 assoc = assocs[l3.assoc];
262 line_size = l3.line_size; 262 line_size = l3.line_size;
263 lines_per_tag = l3.lines_per_tag; 263 lines_per_tag = l3.lines_per_tag;
264 size_in_kb = l3.size_encoded * 512; 264 size_in_kb = l3.size_encoded * 512;
265 if (boot_cpu_has(X86_FEATURE_AMD_DCM)) {
266 size_in_kb = size_in_kb >> 1;
267 assoc = assoc >> 1;
268 }
265 break; 269 break;
266 default: 270 default:
267 return; 271 return;
@@ -270,18 +274,14 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
270 eax->split.is_self_initializing = 1; 274 eax->split.is_self_initializing = 1;
271 eax->split.type = types[leaf]; 275 eax->split.type = types[leaf];
272 eax->split.level = levels[leaf]; 276 eax->split.level = levels[leaf];
273 if (leaf == 3) 277 eax->split.num_threads_sharing = 0;
274 eax->split.num_threads_sharing =
275 current_cpu_data.x86_max_cores - 1;
276 else
277 eax->split.num_threads_sharing = 0;
278 eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; 278 eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1;
279 279
280 280
281 if (assoc == 0xf) 281 if (assoc == 0xffff)
282 eax->split.is_fully_associative = 1; 282 eax->split.is_fully_associative = 1;
283 ebx->split.coherency_line_size = line_size - 1; 283 ebx->split.coherency_line_size = line_size - 1;
284 ebx->split.ways_of_associativity = assocs[assoc] - 1; 284 ebx->split.ways_of_associativity = assoc - 1;
285 ebx->split.physical_line_partition = lines_per_tag - 1; 285 ebx->split.physical_line_partition = lines_per_tag - 1;
286 ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / 286 ecx->split.number_of_sets = (size_in_kb * 1024) / line_size /
287 (ebx->split.ways_of_associativity + 1) - 1; 287 (ebx->split.ways_of_associativity + 1) - 1;
@@ -350,7 +350,8 @@ static int __cpuinit find_num_cache_leaves(void)
350 350
351unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) 351unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
352{ 352{
353 unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ 353 /* Cache sizes */
354 unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
354 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ 355 unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
355 unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ 356 unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
356 unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; 357 unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
@@ -377,8 +378,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
377 378
378 retval = cpuid4_cache_lookup_regs(i, &this_leaf); 379 retval = cpuid4_cache_lookup_regs(i, &this_leaf);
379 if (retval >= 0) { 380 if (retval >= 0) {
380 switch(this_leaf.eax.split.level) { 381 switch (this_leaf.eax.split.level) {
381 case 1: 382 case 1:
382 if (this_leaf.eax.split.type == 383 if (this_leaf.eax.split.type ==
383 CACHE_TYPE_DATA) 384 CACHE_TYPE_DATA)
384 new_l1d = this_leaf.size/1024; 385 new_l1d = this_leaf.size/1024;
@@ -386,19 +387,20 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
386 CACHE_TYPE_INST) 387 CACHE_TYPE_INST)
387 new_l1i = this_leaf.size/1024; 388 new_l1i = this_leaf.size/1024;
388 break; 389 break;
389 case 2: 390 case 2:
390 new_l2 = this_leaf.size/1024; 391 new_l2 = this_leaf.size/1024;
391 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; 392 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
392 index_msb = get_count_order(num_threads_sharing); 393 index_msb = get_count_order(num_threads_sharing);
393 l2_id = c->apicid >> index_msb; 394 l2_id = c->apicid >> index_msb;
394 break; 395 break;
395 case 3: 396 case 3:
396 new_l3 = this_leaf.size/1024; 397 new_l3 = this_leaf.size/1024;
397 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; 398 num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing;
398 index_msb = get_count_order(num_threads_sharing); 399 index_msb = get_count_order(
400 num_threads_sharing);
399 l3_id = c->apicid >> index_msb; 401 l3_id = c->apicid >> index_msb;
400 break; 402 break;
401 default: 403 default:
402 break; 404 break;
403 } 405 }
404 } 406 }
@@ -421,22 +423,21 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
421 /* Number of times to iterate */ 423 /* Number of times to iterate */
422 n = cpuid_eax(2) & 0xFF; 424 n = cpuid_eax(2) & 0xFF;
423 425
424 for ( i = 0 ; i < n ; i++ ) { 426 for (i = 0 ; i < n ; i++) {
425 cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]); 427 cpuid(2, &regs[0], &regs[1], &regs[2], &regs[3]);
426 428
427 /* If bit 31 is set, this is an unknown format */ 429 /* If bit 31 is set, this is an unknown format */
428 for ( j = 0 ; j < 3 ; j++ ) { 430 for (j = 0 ; j < 3 ; j++)
429 if (regs[j] & (1 << 31)) regs[j] = 0; 431 if (regs[j] & (1 << 31))
430 } 432 regs[j] = 0;
431 433
432 /* Byte 0 is level count, not a descriptor */ 434 /* Byte 0 is level count, not a descriptor */
433 for ( j = 1 ; j < 16 ; j++ ) { 435 for (j = 1 ; j < 16 ; j++) {
434 unsigned char des = dp[j]; 436 unsigned char des = dp[j];
435 unsigned char k = 0; 437 unsigned char k = 0;
436 438
437 /* look up this descriptor in the table */ 439 /* look up this descriptor in the table */
438 while (cache_table[k].descriptor != 0) 440 while (cache_table[k].descriptor != 0) {
439 {
440 if (cache_table[k].descriptor == des) { 441 if (cache_table[k].descriptor == des) {
441 if (only_trace && cache_table[k].cache_type != LVL_TRACE) 442 if (only_trace && cache_table[k].cache_type != LVL_TRACE)
442 break; 443 break;
@@ -488,14 +489,14 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
488 } 489 }
489 490
490 if (trace) 491 if (trace)
491 printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); 492 printk(KERN_INFO "CPU: Trace cache: %dK uops", trace);
492 else if ( l1i ) 493 else if (l1i)
493 printk (KERN_INFO "CPU: L1 I cache: %dK", l1i); 494 printk(KERN_INFO "CPU: L1 I cache: %dK", l1i);
494 495
495 if (l1d) 496 if (l1d)
496 printk(", L1 D cache: %dK\n", l1d); 497 printk(KERN_CONT ", L1 D cache: %dK\n", l1d);
497 else 498 else
498 printk("\n"); 499 printk(KERN_CONT "\n");
499 500
500 if (l2) 501 if (l2)
501 printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); 502 printk(KERN_INFO "CPU: L2 cache: %dK\n", l2);
@@ -522,6 +523,18 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
522 int index_msb, i; 523 int index_msb, i;
523 struct cpuinfo_x86 *c = &cpu_data(cpu); 524 struct cpuinfo_x86 *c = &cpu_data(cpu);
524 525
526 if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) {
527 struct cpuinfo_x86 *d;
528 for_each_online_cpu(i) {
529 if (!per_cpu(cpuid4_info, i))
530 continue;
531 d = &cpu_data(i);
532 this_leaf = CPUID4_INFO_IDX(i, index);
533 cpumask_copy(to_cpumask(this_leaf->shared_cpu_map),
534 d->llc_shared_map);
535 }
536 return;
537 }
525 this_leaf = CPUID4_INFO_IDX(cpu, index); 538 this_leaf = CPUID4_INFO_IDX(cpu, index);
526 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; 539 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
527 540
@@ -558,8 +571,13 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
558 } 571 }
559} 572}
560#else 573#else
561static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) {} 574static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
562static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) {} 575{
576}
577
578static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
579{
580}
563#endif 581#endif
564 582
565static void __cpuinit free_cache_attributes(unsigned int cpu) 583static void __cpuinit free_cache_attributes(unsigned int cpu)
@@ -645,7 +663,7 @@ static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);
645static ssize_t show_##file_name \ 663static ssize_t show_##file_name \
646 (struct _cpuid4_info *this_leaf, char *buf) \ 664 (struct _cpuid4_info *this_leaf, char *buf) \
647{ \ 665{ \
648 return sprintf (buf, "%lu\n", (unsigned long)this_leaf->object + val); \ 666 return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \
649} 667}
650 668
651show_one_plus(level, eax.split.level, 0); 669show_one_plus(level, eax.split.level, 0);
@@ -656,7 +674,7 @@ show_one_plus(number_of_sets, ecx.split.number_of_sets, 1);
656 674
657static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf) 675static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf)
658{ 676{
659 return sprintf (buf, "%luK\n", this_leaf->size / 1024); 677 return sprintf(buf, "%luK\n", this_leaf->size / 1024);
660} 678}
661 679
662static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, 680static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
@@ -669,7 +687,7 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
669 const struct cpumask *mask; 687 const struct cpumask *mask;
670 688
671 mask = to_cpumask(this_leaf->shared_cpu_map); 689 mask = to_cpumask(this_leaf->shared_cpu_map);
672 n = type? 690 n = type ?
673 cpulist_scnprintf(buf, len-2, mask) : 691 cpulist_scnprintf(buf, len-2, mask) :
674 cpumask_scnprintf(buf, len-2, mask); 692 cpumask_scnprintf(buf, len-2, mask);
675 buf[n++] = '\n'; 693 buf[n++] = '\n';
@@ -800,7 +818,7 @@ static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644,
800static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, 818static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644,
801 show_cache_disable_1, store_cache_disable_1); 819 show_cache_disable_1, store_cache_disable_1);
802 820
803static struct attribute * default_attrs[] = { 821static struct attribute *default_attrs[] = {
804 &type.attr, 822 &type.attr,
805 &level.attr, 823 &level.attr,
806 &coherency_line_size.attr, 824 &coherency_line_size.attr,
@@ -815,7 +833,7 @@ static struct attribute * default_attrs[] = {
815 NULL 833 NULL
816}; 834};
817 835
818static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) 836static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf)
819{ 837{
820 struct _cache_attr *fattr = to_attr(attr); 838 struct _cache_attr *fattr = to_attr(attr);
821 struct _index_kobject *this_leaf = to_object(kobj); 839 struct _index_kobject *this_leaf = to_object(kobj);
@@ -828,8 +846,8 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
828 return ret; 846 return ret;
829} 847}
830 848
831static ssize_t store(struct kobject * kobj, struct attribute * attr, 849static ssize_t store(struct kobject *kobj, struct attribute *attr,
832 const char * buf, size_t count) 850 const char *buf, size_t count)
833{ 851{
834 struct _cache_attr *fattr = to_attr(attr); 852 struct _cache_attr *fattr = to_attr(attr);
835 struct _index_kobject *this_leaf = to_object(kobj); 853 struct _index_kobject *this_leaf = to_object(kobj);
@@ -883,7 +901,7 @@ static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu)
883 goto err_out; 901 goto err_out;
884 902
885 per_cpu(index_kobject, cpu) = kzalloc( 903 per_cpu(index_kobject, cpu) = kzalloc(
886 sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL); 904 sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL);
887 if (unlikely(per_cpu(index_kobject, cpu) == NULL)) 905 if (unlikely(per_cpu(index_kobject, cpu) == NULL))
888 goto err_out; 906 goto err_out;
889 907
@@ -917,7 +935,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
917 } 935 }
918 936
919 for (i = 0; i < num_cache_leaves; i++) { 937 for (i = 0; i < num_cache_leaves; i++) {
920 this_object = INDEX_KOBJECT_PTR(cpu,i); 938 this_object = INDEX_KOBJECT_PTR(cpu, i);
921 this_object->cpu = cpu; 939 this_object->cpu = cpu;
922 this_object->index = i; 940 this_object->index = i;
923 retval = kobject_init_and_add(&(this_object->kobj), 941 retval = kobject_init_and_add(&(this_object->kobj),
@@ -925,9 +943,8 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
925 per_cpu(cache_kobject, cpu), 943 per_cpu(cache_kobject, cpu),
926 "index%1lu", i); 944 "index%1lu", i);
927 if (unlikely(retval)) { 945 if (unlikely(retval)) {
928 for (j = 0; j < i; j++) { 946 for (j = 0; j < i; j++)
929 kobject_put(&(INDEX_KOBJECT_PTR(cpu,j)->kobj)); 947 kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj));
930 }
931 kobject_put(per_cpu(cache_kobject, cpu)); 948 kobject_put(per_cpu(cache_kobject, cpu));
932 cpuid4_cache_sysfs_exit(cpu); 949 cpuid4_cache_sysfs_exit(cpu);
933 return retval; 950 return retval;
@@ -952,7 +969,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
952 cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map)); 969 cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
953 970
954 for (i = 0; i < num_cache_leaves; i++) 971 for (i = 0; i < num_cache_leaves; i++)
955 kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); 972 kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj));
956 kobject_put(per_cpu(cache_kobject, cpu)); 973 kobject_put(per_cpu(cache_kobject, cpu));
957 cpuid4_cache_sysfs_exit(cpu); 974 cpuid4_cache_sysfs_exit(cpu);
958} 975}
@@ -977,8 +994,7 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb,
977 return NOTIFY_OK; 994 return NOTIFY_OK;
978} 995}
979 996
980static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = 997static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = {
981{
982 .notifier_call = cacheinfo_cpu_callback, 998 .notifier_call = cacheinfo_cpu_callback,
983}; 999};
984 1000
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile
index 188a1ca5ad2b..4ac6d48fe11b 100644
--- a/arch/x86/kernel/cpu/mcheck/Makefile
+++ b/arch/x86/kernel/cpu/mcheck/Makefile
@@ -1,11 +1,8 @@
1obj-y = mce.o 1obj-y = mce.o mce-severity.o
2 2
3obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o
4obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o
5obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o 3obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o
6obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o 4obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o
7obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o 5obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o
8obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
9obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o 6obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o
10obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o 7obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o
11 8
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c
deleted file mode 100644
index b945d5dbc609..000000000000
--- a/arch/x86/kernel/cpu/mcheck/k7.c
+++ /dev/null
@@ -1,116 +0,0 @@
1/*
2 * Athlon specific Machine Check Exception Reporting
3 * (C) Copyright 2002 Dave Jones <davej@redhat.com>
4 */
5#include <linux/interrupt.h>
6#include <linux/kernel.h>
7#include <linux/types.h>
8#include <linux/init.h>
9#include <linux/smp.h>
10
11#include <asm/processor.h>
12#include <asm/system.h>
13#include <asm/mce.h>
14#include <asm/msr.h>
15
16/* Machine Check Handler For AMD Athlon/Duron: */
17static void k7_machine_check(struct pt_regs *regs, long error_code)
18{
19 u32 alow, ahigh, high, low;
20 u32 mcgstl, mcgsth;
21 int recover = 1;
22 int i;
23
24 rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
25 if (mcgstl & (1<<0)) /* Recoverable ? */
26 recover = 0;
27
28 printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
29 smp_processor_id(), mcgsth, mcgstl);
30
31 for (i = 1; i < nr_mce_banks; i++) {
32 rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
33 if (high & (1<<31)) {
34 char misc[20];
35 char addr[24];
36
37 misc[0] = '\0';
38 addr[0] = '\0';
39
40 if (high & (1<<29))
41 recover |= 1;
42 if (high & (1<<25))
43 recover |= 2;
44 high &= ~(1<<31);
45
46 if (high & (1<<27)) {
47 rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
48 snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
49 }
50 if (high & (1<<26)) {
51 rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
52 snprintf(addr, 24, " at %08x%08x", ahigh, alow);
53 }
54
55 printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
56 smp_processor_id(), i, high, low, misc, addr);
57
58 /* Clear it: */
59 wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
60 /* Serialize: */
61 wmb();
62 add_taint(TAINT_MACHINE_CHECK);
63 }
64 }
65
66 if (recover & 2)
67 panic("CPU context corrupt");
68 if (recover & 1)
69 panic("Unable to continue");
70
71 printk(KERN_EMERG "Attempting to continue.\n");
72
73 mcgstl &= ~(1<<2);
74 wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
75}
76
77
78/* AMD K7 machine check is Intel like: */
79void amd_mcheck_init(struct cpuinfo_x86 *c)
80{
81 u32 l, h;
82 int i;
83
84 if (!cpu_has(c, X86_FEATURE_MCE))
85 return;
86
87 machine_check_vector = k7_machine_check;
88 /* Make sure the vector pointer is visible before we enable MCEs: */
89 wmb();
90
91 printk(KERN_INFO "Intel machine check architecture supported.\n");
92
93 rdmsr(MSR_IA32_MCG_CAP, l, h);
94 if (l & (1<<8)) /* Control register present ? */
95 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
96 nr_mce_banks = l & 0xff;
97
98 /*
99 * Clear status for MC index 0 separately, we don't touch CTL,
100 * as some K7 Athlons cause spurious MCEs when its enabled:
101 */
102 if (boot_cpu_data.x86 == 6) {
103 wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0);
104 i = 1;
105 } else
106 i = 0;
107
108 for (; i < nr_mce_banks; i++) {
109 wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
110 wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
111 }
112
113 set_in_cr4(X86_CR4_MCE);
114 printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
115 smp_processor_id());
116}
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index a3a235a53f09..7029f0e2acad 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -18,7 +18,12 @@
18#include <linux/string.h> 18#include <linux/string.h>
19#include <linux/fs.h> 19#include <linux/fs.h>
20#include <linux/smp.h> 20#include <linux/smp.h>
21#include <linux/notifier.h>
22#include <linux/kdebug.h>
23#include <linux/cpu.h>
24#include <linux/sched.h>
21#include <asm/mce.h> 25#include <asm/mce.h>
26#include <asm/apic.h>
22 27
23/* Update fake mce registers on current CPU. */ 28/* Update fake mce registers on current CPU. */
24static void inject_mce(struct mce *m) 29static void inject_mce(struct mce *m)
@@ -39,44 +44,141 @@ static void inject_mce(struct mce *m)
39 i->finished = 1; 44 i->finished = 1;
40} 45}
41 46
42struct delayed_mce { 47static void raise_poll(struct mce *m)
43 struct timer_list timer; 48{
44 struct mce m; 49 unsigned long flags;
45}; 50 mce_banks_t b;
46 51
47/* Inject mce on current CPU */ 52 memset(&b, 0xff, sizeof(mce_banks_t));
48static void raise_mce(unsigned long data) 53 local_irq_save(flags);
54 machine_check_poll(0, &b);
55 local_irq_restore(flags);
56 m->finished = 0;
57}
58
59static void raise_exception(struct mce *m, struct pt_regs *pregs)
49{ 60{
50 struct delayed_mce *dm = (struct delayed_mce *)data; 61 struct pt_regs regs;
51 struct mce *m = &dm->m; 62 unsigned long flags;
52 int cpu = m->extcpu;
53 63
54 inject_mce(m); 64 if (!pregs) {
55 if (m->status & MCI_STATUS_UC) {
56 struct pt_regs regs;
57 memset(&regs, 0, sizeof(struct pt_regs)); 65 memset(&regs, 0, sizeof(struct pt_regs));
58 regs.ip = m->ip; 66 regs.ip = m->ip;
59 regs.cs = m->cs; 67 regs.cs = m->cs;
68 pregs = &regs;
69 }
70 /* in mcheck exeception handler, irq will be disabled */
71 local_irq_save(flags);
72 do_machine_check(pregs, 0);
73 local_irq_restore(flags);
74 m->finished = 0;
75}
76
77static cpumask_t mce_inject_cpumask;
78
79static int mce_raise_notify(struct notifier_block *self,
80 unsigned long val, void *data)
81{
82 struct die_args *args = (struct die_args *)data;
83 int cpu = smp_processor_id();
84 struct mce *m = &__get_cpu_var(injectm);
85 if (val != DIE_NMI_IPI || !cpu_isset(cpu, mce_inject_cpumask))
86 return NOTIFY_DONE;
87 cpu_clear(cpu, mce_inject_cpumask);
88 if (m->inject_flags & MCJ_EXCEPTION)
89 raise_exception(m, args->regs);
90 else if (m->status)
91 raise_poll(m);
92 return NOTIFY_STOP;
93}
94
95static struct notifier_block mce_raise_nb = {
96 .notifier_call = mce_raise_notify,
97 .priority = 1000,
98};
99
100/* Inject mce on current CPU */
101static int raise_local(struct mce *m)
102{
103 int context = MCJ_CTX(m->inject_flags);
104 int ret = 0;
105 int cpu = m->extcpu;
106
107 if (m->inject_flags & MCJ_EXCEPTION) {
60 printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu); 108 printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu);
61 do_machine_check(&regs, 0); 109 switch (context) {
110 case MCJ_CTX_IRQ:
111 /*
112 * Could do more to fake interrupts like
113 * calling irq_enter, but the necessary
114 * machinery isn't exported currently.
115 */
116 /*FALL THROUGH*/
117 case MCJ_CTX_PROCESS:
118 raise_exception(m, NULL);
119 break;
120 default:
121 printk(KERN_INFO "Invalid MCE context\n");
122 ret = -EINVAL;
123 }
62 printk(KERN_INFO "MCE exception done on CPU %d\n", cpu); 124 printk(KERN_INFO "MCE exception done on CPU %d\n", cpu);
63 } else { 125 } else if (m->status) {
64 mce_banks_t b;
65 memset(&b, 0xff, sizeof(mce_banks_t));
66 printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu); 126 printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu);
67 machine_check_poll(0, &b); 127 raise_poll(m);
68 mce_notify_irq(); 128 mce_notify_irq();
69 printk(KERN_INFO "Finished machine check poll on CPU %d\n", 129 printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu);
70 cpu); 130 } else
71 } 131 m->finished = 0;
72 kfree(dm); 132
133 return ret;
134}
135
136static void raise_mce(struct mce *m)
137{
138 int context = MCJ_CTX(m->inject_flags);
139
140 inject_mce(m);
141
142 if (context == MCJ_CTX_RANDOM)
143 return;
144
145#ifdef CONFIG_X86_LOCAL_APIC
146 if (m->inject_flags & MCJ_NMI_BROADCAST) {
147 unsigned long start;
148 int cpu;
149 get_online_cpus();
150 mce_inject_cpumask = cpu_online_map;
151 cpu_clear(get_cpu(), mce_inject_cpumask);
152 for_each_online_cpu(cpu) {
153 struct mce *mcpu = &per_cpu(injectm, cpu);
154 if (!mcpu->finished ||
155 MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
156 cpu_clear(cpu, mce_inject_cpumask);
157 }
158 if (!cpus_empty(mce_inject_cpumask))
159 apic->send_IPI_mask(&mce_inject_cpumask, NMI_VECTOR);
160 start = jiffies;
161 while (!cpus_empty(mce_inject_cpumask)) {
162 if (!time_before(jiffies, start + 2*HZ)) {
163 printk(KERN_ERR
164 "Timeout waiting for mce inject NMI %lx\n",
165 *cpus_addr(mce_inject_cpumask));
166 break;
167 }
168 cpu_relax();
169 }
170 raise_local(m);
171 put_cpu();
172 put_online_cpus();
173 } else
174#endif
175 raise_local(m);
73} 176}
74 177
75/* Error injection interface */ 178/* Error injection interface */
76static ssize_t mce_write(struct file *filp, const char __user *ubuf, 179static ssize_t mce_write(struct file *filp, const char __user *ubuf,
77 size_t usize, loff_t *off) 180 size_t usize, loff_t *off)
78{ 181{
79 struct delayed_mce *dm;
80 struct mce m; 182 struct mce m;
81 183
82 if (!capable(CAP_SYS_ADMIN)) 184 if (!capable(CAP_SYS_ADMIN))
@@ -96,19 +198,12 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf,
96 if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu)) 198 if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu))
97 return -EINVAL; 199 return -EINVAL;
98 200
99 dm = kmalloc(sizeof(struct delayed_mce), GFP_KERNEL);
100 if (!dm)
101 return -ENOMEM;
102
103 /* 201 /*
104 * Need to give user space some time to set everything up, 202 * Need to give user space some time to set everything up,
105 * so do it a jiffie or two later everywhere. 203 * so do it a jiffie or two later everywhere.
106 * Should we use a hrtimer here for better synchronization?
107 */ 204 */
108 memcpy(&dm->m, &m, sizeof(struct mce)); 205 schedule_timeout(2);
109 setup_timer(&dm->timer, raise_mce, (unsigned long)dm); 206 raise_mce(&m);
110 dm->timer.expires = jiffies + 2;
111 add_timer_on(&dm->timer, m.extcpu);
112 return usize; 207 return usize;
113} 208}
114 209
@@ -116,6 +211,7 @@ static int inject_init(void)
116{ 211{
117 printk(KERN_INFO "Machine check injector initialized\n"); 212 printk(KERN_INFO "Machine check injector initialized\n");
118 mce_chrdev_ops.write = mce_write; 213 mce_chrdev_ops.write = mce_write;
214 register_die_notifier(&mce_raise_nb);
119 return 0; 215 return 0;
120} 216}
121 217
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h
index 54dcb8ff12e5..32996f9fab67 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-internal.h
+++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h
@@ -1,3 +1,4 @@
1#include <linux/sysdev.h>
1#include <asm/mce.h> 2#include <asm/mce.h>
2 3
3enum severity_level { 4enum severity_level {
@@ -10,6 +11,20 @@ enum severity_level {
10 MCE_PANIC_SEVERITY, 11 MCE_PANIC_SEVERITY,
11}; 12};
12 13
14#define ATTR_LEN 16
15
16/* One object for each MCE bank, shared by all CPUs */
17struct mce_bank {
18 u64 ctl; /* subevents to enable */
19 unsigned char init; /* initialise bank? */
20 struct sysdev_attribute attr; /* sysdev attribute */
21 char attrname[ATTR_LEN]; /* attribute name */
22};
23
13int mce_severity(struct mce *a, int tolerant, char **msg); 24int mce_severity(struct mce *a, int tolerant, char **msg);
25struct dentry *mce_get_debugfs_dir(void);
14 26
15extern int mce_ser; 27extern int mce_ser;
28
29extern struct mce_bank *mce_banks;
30
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c
index ff0807f97056..8a85dd1b1aa1 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-severity.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c
@@ -139,6 +139,7 @@ int mce_severity(struct mce *a, int tolerant, char **msg)
139 } 139 }
140} 140}
141 141
142#ifdef CONFIG_DEBUG_FS
142static void *s_start(struct seq_file *f, loff_t *pos) 143static void *s_start(struct seq_file *f, loff_t *pos)
143{ 144{
144 if (*pos >= ARRAY_SIZE(severities)) 145 if (*pos >= ARRAY_SIZE(severities))
@@ -197,7 +198,7 @@ static int __init severities_debugfs_init(void)
197{ 198{
198 struct dentry *dmce = NULL, *fseverities_coverage = NULL; 199 struct dentry *dmce = NULL, *fseverities_coverage = NULL;
199 200
200 dmce = debugfs_create_dir("mce", NULL); 201 dmce = mce_get_debugfs_dir();
201 if (dmce == NULL) 202 if (dmce == NULL)
202 goto err_out; 203 goto err_out;
203 fseverities_coverage = debugfs_create_file("severities-coverage", 204 fseverities_coverage = debugfs_create_file("severities-coverage",
@@ -209,10 +210,7 @@ static int __init severities_debugfs_init(void)
209 return 0; 210 return 0;
210 211
211err_out: 212err_out:
212 if (fseverities_coverage)
213 debugfs_remove(fseverities_coverage);
214 if (dmce)
215 debugfs_remove(dmce);
216 return -ENOMEM; 213 return -ENOMEM;
217} 214}
218late_initcall(severities_debugfs_init); 215late_initcall(severities_debugfs_init);
216#endif
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 01213048f62f..2f5aab26320e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -34,6 +34,7 @@
34#include <linux/smp.h> 34#include <linux/smp.h>
35#include <linux/fs.h> 35#include <linux/fs.h>
36#include <linux/mm.h> 36#include <linux/mm.h>
37#include <linux/debugfs.h>
37 38
38#include <asm/processor.h> 39#include <asm/processor.h>
39#include <asm/hw_irq.h> 40#include <asm/hw_irq.h>
@@ -45,21 +46,8 @@
45 46
46#include "mce-internal.h" 47#include "mce-internal.h"
47 48
48/* Handle unconfigured int18 (should never happen) */
49static void unexpected_machine_check(struct pt_regs *regs, long error_code)
50{
51 printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
52 smp_processor_id());
53}
54
55/* Call the installed machine check handler for this CPU setup. */
56void (*machine_check_vector)(struct pt_regs *, long error_code) =
57 unexpected_machine_check;
58
59int mce_disabled __read_mostly; 49int mce_disabled __read_mostly;
60 50
61#ifdef CONFIG_X86_NEW_MCE
62
63#define MISC_MCELOG_MINOR 227 51#define MISC_MCELOG_MINOR 227
64 52
65#define SPINUNIT 100 /* 100ns */ 53#define SPINUNIT 100 /* 100ns */
@@ -77,7 +65,6 @@ DEFINE_PER_CPU(unsigned, mce_exception_count);
77 */ 65 */
78static int tolerant __read_mostly = 1; 66static int tolerant __read_mostly = 1;
79static int banks __read_mostly; 67static int banks __read_mostly;
80static u64 *bank __read_mostly;
81static int rip_msr __read_mostly; 68static int rip_msr __read_mostly;
82static int mce_bootlog __read_mostly = -1; 69static int mce_bootlog __read_mostly = -1;
83static int monarch_timeout __read_mostly = -1; 70static int monarch_timeout __read_mostly = -1;
@@ -87,13 +74,13 @@ int mce_cmci_disabled __read_mostly;
87int mce_ignore_ce __read_mostly; 74int mce_ignore_ce __read_mostly;
88int mce_ser __read_mostly; 75int mce_ser __read_mostly;
89 76
77struct mce_bank *mce_banks __read_mostly;
78
90/* User mode helper program triggered by machine check event */ 79/* User mode helper program triggered by machine check event */
91static unsigned long mce_need_notify; 80static unsigned long mce_need_notify;
92static char mce_helper[128]; 81static char mce_helper[128];
93static char *mce_helper_argv[2] = { mce_helper, NULL }; 82static char *mce_helper_argv[2] = { mce_helper, NULL };
94 83
95static unsigned long dont_init_banks;
96
97static DECLARE_WAIT_QUEUE_HEAD(mce_wait); 84static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
98static DEFINE_PER_CPU(struct mce, mces_seen); 85static DEFINE_PER_CPU(struct mce, mces_seen);
99static int cpu_missing; 86static int cpu_missing;
@@ -104,11 +91,6 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
104 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL 91 [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL
105}; 92};
106 93
107static inline int skip_bank_init(int i)
108{
109 return i < BITS_PER_LONG && test_bit(i, &dont_init_banks);
110}
111
112static DEFINE_PER_CPU(struct work_struct, mce_work); 94static DEFINE_PER_CPU(struct work_struct, mce_work);
113 95
114/* Do initial initialization of a struct mce */ 96/* Do initial initialization of a struct mce */
@@ -183,6 +165,11 @@ void mce_log(struct mce *mce)
183 set_bit(0, &mce_need_notify); 165 set_bit(0, &mce_need_notify);
184} 166}
185 167
168void __weak decode_mce(struct mce *m)
169{
170 return;
171}
172
186static void print_mce(struct mce *m) 173static void print_mce(struct mce *m)
187{ 174{
188 printk(KERN_EMERG 175 printk(KERN_EMERG
@@ -205,6 +192,8 @@ static void print_mce(struct mce *m)
205 printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", 192 printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
206 m->cpuvendor, m->cpuid, m->time, m->socketid, 193 m->cpuvendor, m->cpuid, m->time, m->socketid,
207 m->apicid); 194 m->apicid);
195
196 decode_mce(m);
208} 197}
209 198
210static void print_mce_head(void) 199static void print_mce_head(void)
@@ -215,13 +204,19 @@ static void print_mce_head(void)
215static void print_mce_tail(void) 204static void print_mce_tail(void)
216{ 205{
217 printk(KERN_EMERG "This is not a software problem!\n" 206 printk(KERN_EMERG "This is not a software problem!\n"
218 "Run through mcelog --ascii to decode and contact your hardware vendor\n"); 207#if (!defined(CONFIG_EDAC) || !defined(CONFIG_CPU_SUP_AMD))
208 "Run through mcelog --ascii to decode and contact your hardware vendor\n"
209#endif
210 );
219} 211}
220 212
221#define PANIC_TIMEOUT 5 /* 5 seconds */ 213#define PANIC_TIMEOUT 5 /* 5 seconds */
222 214
223static atomic_t mce_paniced; 215static atomic_t mce_paniced;
224 216
217static int fake_panic;
218static atomic_t mce_fake_paniced;
219
225/* Panic in progress. Enable interrupts and wait for final IPI */ 220/* Panic in progress. Enable interrupts and wait for final IPI */
226static void wait_for_panic(void) 221static void wait_for_panic(void)
227{ 222{
@@ -239,15 +234,21 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
239{ 234{
240 int i; 235 int i;
241 236
242 /* 237 if (!fake_panic) {
243 * Make sure only one CPU runs in machine check panic 238 /*
244 */ 239 * Make sure only one CPU runs in machine check panic
245 if (atomic_add_return(1, &mce_paniced) > 1) 240 */
246 wait_for_panic(); 241 if (atomic_inc_return(&mce_paniced) > 1)
247 barrier(); 242 wait_for_panic();
243 barrier();
248 244
249 bust_spinlocks(1); 245 bust_spinlocks(1);
250 console_verbose(); 246 console_verbose();
247 } else {
248 /* Don't log too much for fake panic */
249 if (atomic_inc_return(&mce_fake_paniced) > 1)
250 return;
251 }
251 print_mce_head(); 252 print_mce_head();
252 /* First print corrected ones that are still unlogged */ 253 /* First print corrected ones that are still unlogged */
253 for (i = 0; i < MCE_LOG_LEN; i++) { 254 for (i = 0; i < MCE_LOG_LEN; i++) {
@@ -274,9 +275,12 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
274 print_mce_tail(); 275 print_mce_tail();
275 if (exp) 276 if (exp)
276 printk(KERN_EMERG "Machine check: %s\n", exp); 277 printk(KERN_EMERG "Machine check: %s\n", exp);
277 if (panic_timeout == 0) 278 if (!fake_panic) {
278 panic_timeout = mce_panic_timeout; 279 if (panic_timeout == 0)
279 panic(msg); 280 panic_timeout = mce_panic_timeout;
281 panic(msg);
282 } else
283 printk(KERN_EMERG "Fake kernel panic: %s\n", msg);
280} 284}
281 285
282/* Support code for software error injection */ 286/* Support code for software error injection */
@@ -286,11 +290,11 @@ static int msr_to_offset(u32 msr)
286 unsigned bank = __get_cpu_var(injectm.bank); 290 unsigned bank = __get_cpu_var(injectm.bank);
287 if (msr == rip_msr) 291 if (msr == rip_msr)
288 return offsetof(struct mce, ip); 292 return offsetof(struct mce, ip);
289 if (msr == MSR_IA32_MC0_STATUS + bank*4) 293 if (msr == MSR_IA32_MCx_STATUS(bank))
290 return offsetof(struct mce, status); 294 return offsetof(struct mce, status);
291 if (msr == MSR_IA32_MC0_ADDR + bank*4) 295 if (msr == MSR_IA32_MCx_ADDR(bank))
292 return offsetof(struct mce, addr); 296 return offsetof(struct mce, addr);
293 if (msr == MSR_IA32_MC0_MISC + bank*4) 297 if (msr == MSR_IA32_MCx_MISC(bank))
294 return offsetof(struct mce, misc); 298 return offsetof(struct mce, misc);
295 if (msr == MSR_IA32_MCG_STATUS) 299 if (msr == MSR_IA32_MCG_STATUS)
296 return offsetof(struct mce, mcgstatus); 300 return offsetof(struct mce, mcgstatus);
@@ -495,7 +499,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
495 499
496 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); 500 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
497 for (i = 0; i < banks; i++) { 501 for (i = 0; i < banks; i++) {
498 if (!bank[i] || !test_bit(i, *b)) 502 if (!mce_banks[i].ctl || !test_bit(i, *b))
499 continue; 503 continue;
500 504
501 m.misc = 0; 505 m.misc = 0;
@@ -504,7 +508,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
504 m.tsc = 0; 508 m.tsc = 0;
505 509
506 barrier(); 510 barrier();
507 m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); 511 m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
508 if (!(m.status & MCI_STATUS_VAL)) 512 if (!(m.status & MCI_STATUS_VAL))
509 continue; 513 continue;
510 514
@@ -519,9 +523,9 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
519 continue; 523 continue;
520 524
521 if (m.status & MCI_STATUS_MISCV) 525 if (m.status & MCI_STATUS_MISCV)
522 m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); 526 m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
523 if (m.status & MCI_STATUS_ADDRV) 527 if (m.status & MCI_STATUS_ADDRV)
524 m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); 528 m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
525 529
526 if (!(flags & MCP_TIMESTAMP)) 530 if (!(flags & MCP_TIMESTAMP))
527 m.tsc = 0; 531 m.tsc = 0;
@@ -537,7 +541,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b)
537 /* 541 /*
538 * Clear state for this bank. 542 * Clear state for this bank.
539 */ 543 */
540 mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); 544 mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
541 } 545 }
542 546
543 /* 547 /*
@@ -558,7 +562,7 @@ static int mce_no_way_out(struct mce *m, char **msg)
558 int i; 562 int i;
559 563
560 for (i = 0; i < banks; i++) { 564 for (i = 0; i < banks; i++) {
561 m->status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); 565 m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
562 if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) 566 if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY)
563 return 1; 567 return 1;
564 } 568 }
@@ -618,7 +622,7 @@ out:
618 * This way we prevent any potential data corruption in a unrecoverable case 622 * This way we prevent any potential data corruption in a unrecoverable case
619 * and also makes sure always all CPU's errors are examined. 623 * and also makes sure always all CPU's errors are examined.
620 * 624 *
621 * Also this detects the case of an machine check event coming from outer 625 * Also this detects the case of a machine check event coming from outer
622 * space (not detected by any CPUs) In this case some external agent wants 626 * space (not detected by any CPUs) In this case some external agent wants
623 * us to shut down, so panic too. 627 * us to shut down, so panic too.
624 * 628 *
@@ -671,7 +675,7 @@ static void mce_reign(void)
671 * No machine check event found. Must be some external 675 * No machine check event found. Must be some external
672 * source or one CPU is hung. Panic. 676 * source or one CPU is hung. Panic.
673 */ 677 */
674 if (!m && tolerant < 3) 678 if (global_worst <= MCE_KEEP_SEVERITY && tolerant < 3)
675 mce_panic("Machine check from unknown source", NULL, NULL); 679 mce_panic("Machine check from unknown source", NULL, NULL);
676 680
677 /* 681 /*
@@ -705,7 +709,7 @@ static int mce_start(int *no_way_out)
705 * global_nwo should be updated before mce_callin 709 * global_nwo should be updated before mce_callin
706 */ 710 */
707 smp_wmb(); 711 smp_wmb();
708 order = atomic_add_return(1, &mce_callin); 712 order = atomic_inc_return(&mce_callin);
709 713
710 /* 714 /*
711 * Wait for everyone. 715 * Wait for everyone.
@@ -842,7 +846,7 @@ static void mce_clear_state(unsigned long *toclear)
842 846
843 for (i = 0; i < banks; i++) { 847 for (i = 0; i < banks; i++) {
844 if (test_bit(i, toclear)) 848 if (test_bit(i, toclear))
845 mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); 849 mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
846 } 850 }
847} 851}
848 852
@@ -895,11 +899,11 @@ void do_machine_check(struct pt_regs *regs, long error_code)
895 mce_setup(&m); 899 mce_setup(&m);
896 900
897 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); 901 m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS);
898 no_way_out = mce_no_way_out(&m, &msg);
899
900 final = &__get_cpu_var(mces_seen); 902 final = &__get_cpu_var(mces_seen);
901 *final = m; 903 *final = m;
902 904
905 no_way_out = mce_no_way_out(&m, &msg);
906
903 barrier(); 907 barrier();
904 908
905 /* 909 /*
@@ -916,14 +920,14 @@ void do_machine_check(struct pt_regs *regs, long error_code)
916 order = mce_start(&no_way_out); 920 order = mce_start(&no_way_out);
917 for (i = 0; i < banks; i++) { 921 for (i = 0; i < banks; i++) {
918 __clear_bit(i, toclear); 922 __clear_bit(i, toclear);
919 if (!bank[i]) 923 if (!mce_banks[i].ctl)
920 continue; 924 continue;
921 925
922 m.misc = 0; 926 m.misc = 0;
923 m.addr = 0; 927 m.addr = 0;
924 m.bank = i; 928 m.bank = i;
925 929
926 m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); 930 m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i));
927 if ((m.status & MCI_STATUS_VAL) == 0) 931 if ((m.status & MCI_STATUS_VAL) == 0)
928 continue; 932 continue;
929 933
@@ -964,9 +968,9 @@ void do_machine_check(struct pt_regs *regs, long error_code)
964 kill_it = 1; 968 kill_it = 1;
965 969
966 if (m.status & MCI_STATUS_MISCV) 970 if (m.status & MCI_STATUS_MISCV)
967 m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); 971 m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i));
968 if (m.status & MCI_STATUS_ADDRV) 972 if (m.status & MCI_STATUS_ADDRV)
969 m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); 973 m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i));
970 974
971 /* 975 /*
972 * Action optional error. Queue address for later processing. 976 * Action optional error. Queue address for later processing.
@@ -1091,7 +1095,7 @@ void mce_log_therm_throt_event(__u64 status)
1091 */ 1095 */
1092static int check_interval = 5 * 60; /* 5 minutes */ 1096static int check_interval = 5 * 60; /* 5 minutes */
1093 1097
1094static DEFINE_PER_CPU(int, next_interval); /* in jiffies */ 1098static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */
1095static DEFINE_PER_CPU(struct timer_list, mce_timer); 1099static DEFINE_PER_CPU(struct timer_list, mce_timer);
1096 1100
1097static void mcheck_timer(unsigned long data) 1101static void mcheck_timer(unsigned long data)
@@ -1110,7 +1114,7 @@ static void mcheck_timer(unsigned long data)
1110 * Alert userspace if needed. If we logged an MCE, reduce the 1114 * Alert userspace if needed. If we logged an MCE, reduce the
1111 * polling interval, otherwise increase the polling interval. 1115 * polling interval, otherwise increase the polling interval.
1112 */ 1116 */
1113 n = &__get_cpu_var(next_interval); 1117 n = &__get_cpu_var(mce_next_interval);
1114 if (mce_notify_irq()) 1118 if (mce_notify_irq())
1115 *n = max(*n/2, HZ/100); 1119 *n = max(*n/2, HZ/100);
1116 else 1120 else
@@ -1159,10 +1163,25 @@ int mce_notify_irq(void)
1159} 1163}
1160EXPORT_SYMBOL_GPL(mce_notify_irq); 1164EXPORT_SYMBOL_GPL(mce_notify_irq);
1161 1165
1166static int mce_banks_init(void)
1167{
1168 int i;
1169
1170 mce_banks = kzalloc(banks * sizeof(struct mce_bank), GFP_KERNEL);
1171 if (!mce_banks)
1172 return -ENOMEM;
1173 for (i = 0; i < banks; i++) {
1174 struct mce_bank *b = &mce_banks[i];
1175 b->ctl = -1ULL;
1176 b->init = 1;
1177 }
1178 return 0;
1179}
1180
1162/* 1181/*
1163 * Initialize Machine Checks for a CPU. 1182 * Initialize Machine Checks for a CPU.
1164 */ 1183 */
1165static int mce_cap_init(void) 1184static int __cpuinit mce_cap_init(void)
1166{ 1185{
1167 unsigned b; 1186 unsigned b;
1168 u64 cap; 1187 u64 cap;
@@ -1182,11 +1201,10 @@ static int mce_cap_init(void)
1182 /* Don't support asymmetric configurations today */ 1201 /* Don't support asymmetric configurations today */
1183 WARN_ON(banks != 0 && b != banks); 1202 WARN_ON(banks != 0 && b != banks);
1184 banks = b; 1203 banks = b;
1185 if (!bank) { 1204 if (!mce_banks) {
1186 bank = kmalloc(banks * sizeof(u64), GFP_KERNEL); 1205 int err = mce_banks_init();
1187 if (!bank) 1206 if (err)
1188 return -ENOMEM; 1207 return err;
1189 memset(bank, 0xff, banks * sizeof(u64));
1190 } 1208 }
1191 1209
1192 /* Use accurate RIP reporting if available. */ 1210 /* Use accurate RIP reporting if available. */
@@ -1218,15 +1236,16 @@ static void mce_init(void)
1218 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); 1236 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
1219 1237
1220 for (i = 0; i < banks; i++) { 1238 for (i = 0; i < banks; i++) {
1221 if (skip_bank_init(i)) 1239 struct mce_bank *b = &mce_banks[i];
1240 if (!b->init)
1222 continue; 1241 continue;
1223 wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); 1242 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
1224 wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); 1243 wrmsrl(MSR_IA32_MCx_STATUS(i), 0);
1225 } 1244 }
1226} 1245}
1227 1246
1228/* Add per CPU specific workarounds here */ 1247/* Add per CPU specific workarounds here */
1229static int mce_cpu_quirks(struct cpuinfo_x86 *c) 1248static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
1230{ 1249{
1231 if (c->x86_vendor == X86_VENDOR_UNKNOWN) { 1250 if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
1232 pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); 1251 pr_info("MCE: unknown CPU type - not enabling MCE support.\n");
@@ -1241,7 +1260,7 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c)
1241 * trips off incorrectly with the IOMMU & 3ware 1260 * trips off incorrectly with the IOMMU & 3ware
1242 * & Cerberus: 1261 * & Cerberus:
1243 */ 1262 */
1244 clear_bit(10, (unsigned long *)&bank[4]); 1263 clear_bit(10, (unsigned long *)&mce_banks[4].ctl);
1245 } 1264 }
1246 if (c->x86 <= 17 && mce_bootlog < 0) { 1265 if (c->x86 <= 17 && mce_bootlog < 0) {
1247 /* 1266 /*
@@ -1255,7 +1274,7 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c)
1255 * by default. 1274 * by default.
1256 */ 1275 */
1257 if (c->x86 == 6 && banks > 0) 1276 if (c->x86 == 6 && banks > 0)
1258 bank[0] = 0; 1277 mce_banks[0].ctl = 0;
1259 } 1278 }
1260 1279
1261 if (c->x86_vendor == X86_VENDOR_INTEL) { 1280 if (c->x86_vendor == X86_VENDOR_INTEL) {
@@ -1268,8 +1287,8 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c)
1268 * valid event later, merely don't write CTL0. 1287 * valid event later, merely don't write CTL0.
1269 */ 1288 */
1270 1289
1271 if (c->x86 == 6 && c->x86_model < 0x1A) 1290 if (c->x86 == 6 && c->x86_model < 0x1A && banks > 0)
1272 __set_bit(0, &dont_init_banks); 1291 mce_banks[0].init = 0;
1273 1292
1274 /* 1293 /*
1275 * All newer Intel systems support MCE broadcasting. Enable 1294 * All newer Intel systems support MCE broadcasting. Enable
@@ -1325,7 +1344,7 @@ static void mce_cpu_features(struct cpuinfo_x86 *c)
1325static void mce_init_timer(void) 1344static void mce_init_timer(void)
1326{ 1345{
1327 struct timer_list *t = &__get_cpu_var(mce_timer); 1346 struct timer_list *t = &__get_cpu_var(mce_timer);
1328 int *n = &__get_cpu_var(next_interval); 1347 int *n = &__get_cpu_var(mce_next_interval);
1329 1348
1330 if (mce_ignore_ce) 1349 if (mce_ignore_ce)
1331 return; 1350 return;
@@ -1338,6 +1357,17 @@ static void mce_init_timer(void)
1338 add_timer_on(t, smp_processor_id()); 1357 add_timer_on(t, smp_processor_id());
1339} 1358}
1340 1359
1360/* Handle unconfigured int18 (should never happen) */
1361static void unexpected_machine_check(struct pt_regs *regs, long error_code)
1362{
1363 printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
1364 smp_processor_id());
1365}
1366
1367/* Call the installed machine check handler for this CPU setup. */
1368void (*machine_check_vector)(struct pt_regs *, long error_code) =
1369 unexpected_machine_check;
1370
1341/* 1371/*
1342 * Called for each booted CPU to set up machine checks. 1372 * Called for each booted CPU to set up machine checks.
1343 * Must be called with preempt off: 1373 * Must be called with preempt off:
@@ -1551,8 +1581,10 @@ static struct miscdevice mce_log_device = {
1551 */ 1581 */
1552static int __init mcheck_enable(char *str) 1582static int __init mcheck_enable(char *str)
1553{ 1583{
1554 if (*str == 0) 1584 if (*str == 0) {
1555 enable_p5_mce(); 1585 enable_p5_mce();
1586 return 1;
1587 }
1556 if (*str == '=') 1588 if (*str == '=')
1557 str++; 1589 str++;
1558 if (!strcmp(str, "off")) 1590 if (!strcmp(str, "off"))
@@ -1593,8 +1625,9 @@ static int mce_disable(void)
1593 int i; 1625 int i;
1594 1626
1595 for (i = 0; i < banks; i++) { 1627 for (i = 0; i < banks; i++) {
1596 if (!skip_bank_init(i)) 1628 struct mce_bank *b = &mce_banks[i];
1597 wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); 1629 if (b->init)
1630 wrmsrl(MSR_IA32_MCx_CTL(i), 0);
1598 } 1631 }
1599 return 0; 1632 return 0;
1600} 1633}
@@ -1669,14 +1702,15 @@ DEFINE_PER_CPU(struct sys_device, mce_dev);
1669__cpuinitdata 1702__cpuinitdata
1670void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); 1703void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu);
1671 1704
1672static struct sysdev_attribute *bank_attrs; 1705static inline struct mce_bank *attr_to_bank(struct sysdev_attribute *attr)
1706{
1707 return container_of(attr, struct mce_bank, attr);
1708}
1673 1709
1674static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, 1710static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr,
1675 char *buf) 1711 char *buf)
1676{ 1712{
1677 u64 b = bank[attr - bank_attrs]; 1713 return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl);
1678
1679 return sprintf(buf, "%llx\n", b);
1680} 1714}
1681 1715
1682static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, 1716static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
@@ -1687,7 +1721,7 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr,
1687 if (strict_strtoull(buf, 0, &new) < 0) 1721 if (strict_strtoull(buf, 0, &new) < 0)
1688 return -EINVAL; 1722 return -EINVAL;
1689 1723
1690 bank[attr - bank_attrs] = new; 1724 attr_to_bank(attr)->ctl = new;
1691 mce_restart(); 1725 mce_restart();
1692 1726
1693 return size; 1727 return size;
@@ -1829,7 +1863,7 @@ static __cpuinit int mce_create_device(unsigned int cpu)
1829 } 1863 }
1830 for (j = 0; j < banks; j++) { 1864 for (j = 0; j < banks; j++) {
1831 err = sysdev_create_file(&per_cpu(mce_dev, cpu), 1865 err = sysdev_create_file(&per_cpu(mce_dev, cpu),
1832 &bank_attrs[j]); 1866 &mce_banks[j].attr);
1833 if (err) 1867 if (err)
1834 goto error2; 1868 goto error2;
1835 } 1869 }
@@ -1838,10 +1872,10 @@ static __cpuinit int mce_create_device(unsigned int cpu)
1838 return 0; 1872 return 0;
1839error2: 1873error2:
1840 while (--j >= 0) 1874 while (--j >= 0)
1841 sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[j]); 1875 sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr);
1842error: 1876error:
1843 while (--i >= 0) 1877 while (--i >= 0)
1844 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); 1878 sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr);
1845 1879
1846 sysdev_unregister(&per_cpu(mce_dev, cpu)); 1880 sysdev_unregister(&per_cpu(mce_dev, cpu));
1847 1881
@@ -1859,7 +1893,7 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
1859 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); 1893 sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]);
1860 1894
1861 for (i = 0; i < banks; i++) 1895 for (i = 0; i < banks; i++)
1862 sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); 1896 sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr);
1863 1897
1864 sysdev_unregister(&per_cpu(mce_dev, cpu)); 1898 sysdev_unregister(&per_cpu(mce_dev, cpu));
1865 cpumask_clear_cpu(cpu, mce_dev_initialized); 1899 cpumask_clear_cpu(cpu, mce_dev_initialized);
@@ -1876,8 +1910,9 @@ static void mce_disable_cpu(void *h)
1876 if (!(action & CPU_TASKS_FROZEN)) 1910 if (!(action & CPU_TASKS_FROZEN))
1877 cmci_clear(); 1911 cmci_clear();
1878 for (i = 0; i < banks; i++) { 1912 for (i = 0; i < banks; i++) {
1879 if (!skip_bank_init(i)) 1913 struct mce_bank *b = &mce_banks[i];
1880 wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); 1914 if (b->init)
1915 wrmsrl(MSR_IA32_MCx_CTL(i), 0);
1881 } 1916 }
1882} 1917}
1883 1918
@@ -1892,8 +1927,9 @@ static void mce_reenable_cpu(void *h)
1892 if (!(action & CPU_TASKS_FROZEN)) 1927 if (!(action & CPU_TASKS_FROZEN))
1893 cmci_reenable(); 1928 cmci_reenable();
1894 for (i = 0; i < banks; i++) { 1929 for (i = 0; i < banks; i++) {
1895 if (!skip_bank_init(i)) 1930 struct mce_bank *b = &mce_banks[i];
1896 wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]); 1931 if (b->init)
1932 wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl);
1897 } 1933 }
1898} 1934}
1899 1935
@@ -1925,7 +1961,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu)
1925 case CPU_DOWN_FAILED: 1961 case CPU_DOWN_FAILED:
1926 case CPU_DOWN_FAILED_FROZEN: 1962 case CPU_DOWN_FAILED_FROZEN:
1927 t->expires = round_jiffies(jiffies + 1963 t->expires = round_jiffies(jiffies +
1928 __get_cpu_var(next_interval)); 1964 __get_cpu_var(mce_next_interval));
1929 add_timer_on(t, cpu); 1965 add_timer_on(t, cpu);
1930 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); 1966 smp_call_function_single(cpu, mce_reenable_cpu, &action, 1);
1931 break; 1967 break;
@@ -1941,35 +1977,21 @@ static struct notifier_block mce_cpu_notifier __cpuinitdata = {
1941 .notifier_call = mce_cpu_callback, 1977 .notifier_call = mce_cpu_callback,
1942}; 1978};
1943 1979
1944static __init int mce_init_banks(void) 1980static __init void mce_init_banks(void)
1945{ 1981{
1946 int i; 1982 int i;
1947 1983
1948 bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks,
1949 GFP_KERNEL);
1950 if (!bank_attrs)
1951 return -ENOMEM;
1952
1953 for (i = 0; i < banks; i++) { 1984 for (i = 0; i < banks; i++) {
1954 struct sysdev_attribute *a = &bank_attrs[i]; 1985 struct mce_bank *b = &mce_banks[i];
1986 struct sysdev_attribute *a = &b->attr;
1955 1987
1956 a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); 1988 a->attr.name = b->attrname;
1957 if (!a->attr.name) 1989 snprintf(b->attrname, ATTR_LEN, "bank%d", i);
1958 goto nomem;
1959 1990
1960 a->attr.mode = 0644; 1991 a->attr.mode = 0644;
1961 a->show = show_bank; 1992 a->show = show_bank;
1962 a->store = set_bank; 1993 a->store = set_bank;
1963 } 1994 }
1964 return 0;
1965
1966nomem:
1967 while (--i >= 0)
1968 kfree(bank_attrs[i].attr.name);
1969 kfree(bank_attrs);
1970 bank_attrs = NULL;
1971
1972 return -ENOMEM;
1973} 1995}
1974 1996
1975static __init int mce_init_device(void) 1997static __init int mce_init_device(void)
@@ -1982,9 +2004,7 @@ static __init int mce_init_device(void)
1982 2004
1983 zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); 2005 zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL);
1984 2006
1985 err = mce_init_banks(); 2007 mce_init_banks();
1986 if (err)
1987 return err;
1988 2008
1989 err = sysdev_class_register(&mce_sysclass); 2009 err = sysdev_class_register(&mce_sysclass);
1990 if (err) 2010 if (err)
@@ -2004,57 +2024,65 @@ static __init int mce_init_device(void)
2004 2024
2005device_initcall(mce_init_device); 2025device_initcall(mce_init_device);
2006 2026
2007#else /* CONFIG_X86_OLD_MCE: */ 2027/*
2008 2028 * Old style boot options parsing. Only for compatibility.
2009int nr_mce_banks; 2029 */
2010EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ 2030static int __init mcheck_disable(char *str)
2031{
2032 mce_disabled = 1;
2033 return 1;
2034}
2035__setup("nomce", mcheck_disable);
2011 2036
2012/* This has to be run for each processor */ 2037#ifdef CONFIG_DEBUG_FS
2013void mcheck_init(struct cpuinfo_x86 *c) 2038struct dentry *mce_get_debugfs_dir(void)
2014{ 2039{
2015 if (mce_disabled) 2040 static struct dentry *dmce;
2016 return;
2017 2041
2018 switch (c->x86_vendor) { 2042 if (!dmce)
2019 case X86_VENDOR_AMD: 2043 dmce = debugfs_create_dir("mce", NULL);
2020 amd_mcheck_init(c);
2021 break;
2022 2044
2023 case X86_VENDOR_INTEL: 2045 return dmce;
2024 if (c->x86 == 5) 2046}
2025 intel_p5_mcheck_init(c);
2026 if (c->x86 == 6)
2027 intel_p6_mcheck_init(c);
2028 if (c->x86 == 15)
2029 intel_p4_mcheck_init(c);
2030 break;
2031 2047
2032 case X86_VENDOR_CENTAUR: 2048static void mce_reset(void)
2033 if (c->x86 == 5) 2049{
2034 winchip_mcheck_init(c); 2050 cpu_missing = 0;
2035 break; 2051 atomic_set(&mce_fake_paniced, 0);
2052 atomic_set(&mce_executing, 0);
2053 atomic_set(&mce_callin, 0);
2054 atomic_set(&global_nwo, 0);
2055}
2036 2056
2037 default: 2057static int fake_panic_get(void *data, u64 *val)
2038 break; 2058{
2039 } 2059 *val = fake_panic;
2040 printk(KERN_INFO "mce: CPU supports %d MCE banks\n", nr_mce_banks); 2060 return 0;
2041} 2061}
2042 2062
2043static int __init mcheck_enable(char *str) 2063static int fake_panic_set(void *data, u64 val)
2044{ 2064{
2045 mce_p5_enabled = 1; 2065 mce_reset();
2046 return 1; 2066 fake_panic = val;
2067 return 0;
2047} 2068}
2048__setup("mce", mcheck_enable);
2049 2069
2050#endif /* CONFIG_X86_OLD_MCE */ 2070DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get,
2071 fake_panic_set, "%llu\n");
2051 2072
2052/* 2073static int __init mce_debugfs_init(void)
2053 * Old style boot options parsing. Only for compatibility.
2054 */
2055static int __init mcheck_disable(char *str)
2056{ 2074{
2057 mce_disabled = 1; 2075 struct dentry *dmce, *ffake_panic;
2058 return 1; 2076
2077 dmce = mce_get_debugfs_dir();
2078 if (!dmce)
2079 return -ENOMEM;
2080 ffake_panic = debugfs_create_file("fake_panic", 0444, dmce, NULL,
2081 &fake_panic_fops);
2082 if (!ffake_panic)
2083 return -ENOMEM;
2084
2085 return 0;
2059} 2086}
2060__setup("nomce", mcheck_disable); 2087late_initcall(mce_debugfs_init);
2088#endif
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index ddae21620bda..8cd5224943b5 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -69,7 +69,7 @@ struct threshold_bank {
69 struct threshold_block *blocks; 69 struct threshold_block *blocks;
70 cpumask_var_t cpus; 70 cpumask_var_t cpus;
71}; 71};
72static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); 72static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks);
73 73
74#ifdef CONFIG_SMP 74#ifdef CONFIG_SMP
75static unsigned char shared_bank[NR_BANKS] = { 75static unsigned char shared_bank[NR_BANKS] = {
@@ -489,12 +489,14 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
489 int i, err = 0; 489 int i, err = 0;
490 struct threshold_bank *b = NULL; 490 struct threshold_bank *b = NULL;
491 char name[32]; 491 char name[32];
492 struct cpuinfo_x86 *c = &cpu_data(cpu);
493
492 494
493 sprintf(name, "threshold_bank%i", bank); 495 sprintf(name, "threshold_bank%i", bank);
494 496
495#ifdef CONFIG_SMP 497#ifdef CONFIG_SMP
496 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ 498 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
497 i = cpumask_first(cpu_core_mask(cpu)); 499 i = cpumask_first(c->llc_shared_map);
498 500
499 /* first core not up yet */ 501 /* first core not up yet */
500 if (cpu_data(i).cpu_core_id) 502 if (cpu_data(i).cpu_core_id)
@@ -514,7 +516,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
514 if (err) 516 if (err)
515 goto out; 517 goto out;
516 518
517 cpumask_copy(b->cpus, cpu_core_mask(cpu)); 519 cpumask_copy(b->cpus, c->llc_shared_map);
518 per_cpu(threshold_banks, cpu)[bank] = b; 520 per_cpu(threshold_banks, cpu)[bank] = b;
519 521
520 goto out; 522 goto out;
@@ -539,7 +541,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
539#ifndef CONFIG_SMP 541#ifndef CONFIG_SMP
540 cpumask_setall(b->cpus); 542 cpumask_setall(b->cpus);
541#else 543#else
542 cpumask_copy(b->cpus, cpu_core_mask(cpu)); 544 cpumask_copy(b->cpus, c->llc_shared_map);
543#endif 545#endif
544 546
545 per_cpu(threshold_banks, cpu)[bank] = b; 547 per_cpu(threshold_banks, cpu)[bank] = b;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c
index e1acec0f7a32..889f665fe93d 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c
@@ -90,7 +90,7 @@ static void cmci_discover(int banks, int boot)
90 if (test_bit(i, owned)) 90 if (test_bit(i, owned))
91 continue; 91 continue;
92 92
93 rdmsrl(MSR_IA32_MC0_CTL2 + i, val); 93 rdmsrl(MSR_IA32_MCx_CTL2(i), val);
94 94
95 /* Already owned by someone else? */ 95 /* Already owned by someone else? */
96 if (val & CMCI_EN) { 96 if (val & CMCI_EN) {
@@ -101,8 +101,8 @@ static void cmci_discover(int banks, int boot)
101 } 101 }
102 102
103 val |= CMCI_EN | CMCI_THRESHOLD; 103 val |= CMCI_EN | CMCI_THRESHOLD;
104 wrmsrl(MSR_IA32_MC0_CTL2 + i, val); 104 wrmsrl(MSR_IA32_MCx_CTL2(i), val);
105 rdmsrl(MSR_IA32_MC0_CTL2 + i, val); 105 rdmsrl(MSR_IA32_MCx_CTL2(i), val);
106 106
107 /* Did the enable bit stick? -- the bank supports CMCI */ 107 /* Did the enable bit stick? -- the bank supports CMCI */
108 if (val & CMCI_EN) { 108 if (val & CMCI_EN) {
@@ -152,9 +152,9 @@ void cmci_clear(void)
152 if (!test_bit(i, __get_cpu_var(mce_banks_owned))) 152 if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
153 continue; 153 continue;
154 /* Disable CMCI */ 154 /* Disable CMCI */
155 rdmsrl(MSR_IA32_MC0_CTL2 + i, val); 155 rdmsrl(MSR_IA32_MCx_CTL2(i), val);
156 val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); 156 val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK);
157 wrmsrl(MSR_IA32_MC0_CTL2 + i, val); 157 wrmsrl(MSR_IA32_MCx_CTL2(i), val);
158 __clear_bit(i, __get_cpu_var(mce_banks_owned)); 158 __clear_bit(i, __get_cpu_var(mce_banks_owned));
159 } 159 }
160 spin_unlock_irqrestore(&cmci_discover_lock, flags); 160 spin_unlock_irqrestore(&cmci_discover_lock, flags);
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
deleted file mode 100644
index f5f2d6f71fb6..000000000000
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ /dev/null
@@ -1,94 +0,0 @@
1/*
2 * Non Fatal Machine Check Exception Reporting
3 *
4 * (C) Copyright 2002 Dave Jones. <davej@redhat.com>
5 *
6 * This file contains routines to check for non-fatal MCEs every 15s
7 *
8 */
9#include <linux/interrupt.h>
10#include <linux/workqueue.h>
11#include <linux/jiffies.h>
12#include <linux/kernel.h>
13#include <linux/module.h>
14#include <linux/types.h>
15#include <linux/init.h>
16#include <linux/smp.h>
17
18#include <asm/processor.h>
19#include <asm/system.h>
20#include <asm/mce.h>
21#include <asm/msr.h>
22
23static int firstbank;
24
25#define MCE_RATE (15*HZ) /* timer rate is 15s */
26
27static void mce_checkregs(void *info)
28{
29 u32 low, high;
30 int i;
31
32 for (i = firstbank; i < nr_mce_banks; i++) {
33 rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
34
35 if (!(high & (1<<31)))
36 continue;
37
38 printk(KERN_INFO "MCE: The hardware reports a non fatal, "
39 "correctable incident occurred on CPU %d.\n",
40 smp_processor_id());
41
42 printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
43
44 /*
45 * Scrub the error so we don't pick it up in MCE_RATE
46 * seconds time:
47 */
48 wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
49
50 /* Serialize: */
51 wmb();
52 add_taint(TAINT_MACHINE_CHECK);
53 }
54}
55
56static void mce_work_fn(struct work_struct *work);
57static DECLARE_DELAYED_WORK(mce_work, mce_work_fn);
58
59static void mce_work_fn(struct work_struct *work)
60{
61 on_each_cpu(mce_checkregs, NULL, 1);
62 schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
63}
64
65static int __init init_nonfatal_mce_checker(void)
66{
67 struct cpuinfo_x86 *c = &boot_cpu_data;
68
69 /* Check for MCE support */
70 if (!cpu_has(c, X86_FEATURE_MCE))
71 return -ENODEV;
72
73 /* Check for PPro style MCA */
74 if (!cpu_has(c, X86_FEATURE_MCA))
75 return -ENODEV;
76
77 /* Some Athlons misbehave when we frob bank 0 */
78 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD &&
79 boot_cpu_data.x86 == 6)
80 firstbank = 1;
81 else
82 firstbank = 0;
83
84 /*
85 * Check for non-fatal errors every MCE_RATE s
86 */
87 schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
88 printk(KERN_INFO "Machine check exception polling timer started.\n");
89
90 return 0;
91}
92module_init(init_nonfatal_mce_checker);
93
94MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
deleted file mode 100644
index 4482aea9aa2e..000000000000
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ /dev/null
@@ -1,163 +0,0 @@
1/*
2 * P4 specific Machine Check Exception Reporting
3 */
4#include <linux/kernel.h>
5#include <linux/types.h>
6#include <linux/init.h>
7#include <linux/smp.h>
8
9#include <asm/processor.h>
10#include <asm/mce.h>
11#include <asm/msr.h>
12
13/* as supported by the P4/Xeon family */
14struct intel_mce_extended_msrs {
15 u32 eax;
16 u32 ebx;
17 u32 ecx;
18 u32 edx;
19 u32 esi;
20 u32 edi;
21 u32 ebp;
22 u32 esp;
23 u32 eflags;
24 u32 eip;
25 /* u32 *reserved[]; */
26};
27
28static int mce_num_extended_msrs;
29
30/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */
31static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r)
32{
33 u32 h;
34
35 rdmsr(MSR_IA32_MCG_EAX, r->eax, h);
36 rdmsr(MSR_IA32_MCG_EBX, r->ebx, h);
37 rdmsr(MSR_IA32_MCG_ECX, r->ecx, h);
38 rdmsr(MSR_IA32_MCG_EDX, r->edx, h);
39 rdmsr(MSR_IA32_MCG_ESI, r->esi, h);
40 rdmsr(MSR_IA32_MCG_EDI, r->edi, h);
41 rdmsr(MSR_IA32_MCG_EBP, r->ebp, h);
42 rdmsr(MSR_IA32_MCG_ESP, r->esp, h);
43 rdmsr(MSR_IA32_MCG_EFLAGS, r->eflags, h);
44 rdmsr(MSR_IA32_MCG_EIP, r->eip, h);
45}
46
47static void intel_machine_check(struct pt_regs *regs, long error_code)
48{
49 u32 alow, ahigh, high, low;
50 u32 mcgstl, mcgsth;
51 int recover = 1;
52 int i;
53
54 rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
55 if (mcgstl & (1<<0)) /* Recoverable ? */
56 recover = 0;
57
58 printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
59 smp_processor_id(), mcgsth, mcgstl);
60
61 if (mce_num_extended_msrs > 0) {
62 struct intel_mce_extended_msrs dbg;
63
64 intel_get_extended_msrs(&dbg);
65
66 printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n"
67 "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n"
68 "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n",
69 smp_processor_id(), dbg.eip, dbg.eflags,
70 dbg.eax, dbg.ebx, dbg.ecx, dbg.edx,
71 dbg.esi, dbg.edi, dbg.ebp, dbg.esp);
72 }
73
74 for (i = 0; i < nr_mce_banks; i++) {
75 rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
76 if (high & (1<<31)) {
77 char misc[20];
78 char addr[24];
79
80 misc[0] = addr[0] = '\0';
81 if (high & (1<<29))
82 recover |= 1;
83 if (high & (1<<25))
84 recover |= 2;
85 high &= ~(1<<31);
86 if (high & (1<<27)) {
87 rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
88 snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
89 }
90 if (high & (1<<26)) {
91 rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
92 snprintf(addr, 24, " at %08x%08x", ahigh, alow);
93 }
94 printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
95 smp_processor_id(), i, high, low, misc, addr);
96 }
97 }
98
99 if (recover & 2)
100 panic("CPU context corrupt");
101 if (recover & 1)
102 panic("Unable to continue");
103
104 printk(KERN_EMERG "Attempting to continue.\n");
105
106 /*
107 * Do not clear the MSR_IA32_MCi_STATUS if the error is not
108 * recoverable/continuable.This will allow BIOS to look at the MSRs
109 * for errors if the OS could not log the error.
110 */
111 for (i = 0; i < nr_mce_banks; i++) {
112 u32 msr;
113 msr = MSR_IA32_MC0_STATUS+i*4;
114 rdmsr(msr, low, high);
115 if (high&(1<<31)) {
116 /* Clear it */
117 wrmsr(msr, 0UL, 0UL);
118 /* Serialize */
119 wmb();
120 add_taint(TAINT_MACHINE_CHECK);
121 }
122 }
123 mcgstl &= ~(1<<2);
124 wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
125}
126
127void intel_p4_mcheck_init(struct cpuinfo_x86 *c)
128{
129 u32 l, h;
130 int i;
131
132 machine_check_vector = intel_machine_check;
133 wmb();
134
135 printk(KERN_INFO "Intel machine check architecture supported.\n");
136 rdmsr(MSR_IA32_MCG_CAP, l, h);
137 if (l & (1<<8)) /* Control register present ? */
138 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
139 nr_mce_banks = l & 0xff;
140
141 for (i = 0; i < nr_mce_banks; i++) {
142 wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
143 wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
144 }
145
146 set_in_cr4(X86_CR4_MCE);
147 printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
148 smp_processor_id());
149
150 /* Check for P4/Xeon extended MCE MSRs */
151 rdmsr(MSR_IA32_MCG_CAP, l, h);
152 if (l & (1<<9)) {/* MCG_EXT_P */
153 mce_num_extended_msrs = (l >> 16) & 0xff;
154 printk(KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)"
155 " available\n",
156 smp_processor_id(), mce_num_extended_msrs);
157
158#ifdef CONFIG_X86_MCE_P4THERMAL
159 /* Check for P4/Xeon Thermal monitor */
160 intel_init_thermal(c);
161#endif
162 }
163}
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c
deleted file mode 100644
index 01e4f8178183..000000000000
--- a/arch/x86/kernel/cpu/mcheck/p6.c
+++ /dev/null
@@ -1,127 +0,0 @@
1/*
2 * P6 specific Machine Check Exception Reporting
3 * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk>
4 */
5#include <linux/interrupt.h>
6#include <linux/kernel.h>
7#include <linux/types.h>
8#include <linux/init.h>
9#include <linux/smp.h>
10
11#include <asm/processor.h>
12#include <asm/system.h>
13#include <asm/mce.h>
14#include <asm/msr.h>
15
16/* Machine Check Handler For PII/PIII */
17static void intel_machine_check(struct pt_regs *regs, long error_code)
18{
19 u32 alow, ahigh, high, low;
20 u32 mcgstl, mcgsth;
21 int recover = 1;
22 int i;
23
24 rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
25 if (mcgstl & (1<<0)) /* Recoverable ? */
26 recover = 0;
27
28 printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n",
29 smp_processor_id(), mcgsth, mcgstl);
30
31 for (i = 0; i < nr_mce_banks; i++) {
32 rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high);
33 if (high & (1<<31)) {
34 char misc[20];
35 char addr[24];
36
37 misc[0] = '\0';
38 addr[0] = '\0';
39
40 if (high & (1<<29))
41 recover |= 1;
42 if (high & (1<<25))
43 recover |= 2;
44 high &= ~(1<<31);
45
46 if (high & (1<<27)) {
47 rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh);
48 snprintf(misc, 20, "[%08x%08x]", ahigh, alow);
49 }
50 if (high & (1<<26)) {
51 rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh);
52 snprintf(addr, 24, " at %08x%08x", ahigh, alow);
53 }
54
55 printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n",
56 smp_processor_id(), i, high, low, misc, addr);
57 }
58 }
59
60 if (recover & 2)
61 panic("CPU context corrupt");
62 if (recover & 1)
63 panic("Unable to continue");
64
65 printk(KERN_EMERG "Attempting to continue.\n");
66 /*
67 * Do not clear the MSR_IA32_MCi_STATUS if the error is not
68 * recoverable/continuable.This will allow BIOS to look at the MSRs
69 * for errors if the OS could not log the error:
70 */
71 for (i = 0; i < nr_mce_banks; i++) {
72 unsigned int msr;
73
74 msr = MSR_IA32_MC0_STATUS+i*4;
75 rdmsr(msr, low, high);
76 if (high & (1<<31)) {
77 /* Clear it: */
78 wrmsr(msr, 0UL, 0UL);
79 /* Serialize: */
80 wmb();
81 add_taint(TAINT_MACHINE_CHECK);
82 }
83 }
84 mcgstl &= ~(1<<2);
85 wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth);
86}
87
88/* Set up machine check reporting for processors with Intel style MCE: */
89void intel_p6_mcheck_init(struct cpuinfo_x86 *c)
90{
91 u32 l, h;
92 int i;
93
94 /* Check for MCE support */
95 if (!cpu_has(c, X86_FEATURE_MCE))
96 return;
97
98 /* Check for PPro style MCA */
99 if (!cpu_has(c, X86_FEATURE_MCA))
100 return;
101
102 /* Ok machine check is available */
103 machine_check_vector = intel_machine_check;
104 /* Make sure the vector pointer is visible before we enable MCEs: */
105 wmb();
106
107 printk(KERN_INFO "Intel machine check architecture supported.\n");
108 rdmsr(MSR_IA32_MCG_CAP, l, h);
109 if (l & (1<<8)) /* Control register present ? */
110 wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
111 nr_mce_banks = l & 0xff;
112
113 /*
114 * Following the example in IA-32 SDM Vol 3:
115 * - MC0_CTL should not be written
116 * - Status registers on all banks should be cleared on reset
117 */
118 for (i = 1; i < nr_mce_banks; i++)
119 wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff);
120
121 for (i = 0; i < nr_mce_banks; i++)
122 wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0);
123
124 set_in_cr4(X86_CR4_MCE);
125 printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
126 smp_processor_id());
127}
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index 5957a93e5173..63a56d147e4a 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -260,9 +260,6 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
260 return; 260 return;
261 } 261 }
262 262
263 if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
264 tm2 = 1;
265
266 /* Check whether a vector already exists */ 263 /* Check whether a vector already exists */
267 if (h & APIC_VECTOR_MASK) { 264 if (h & APIC_VECTOR_MASK) {
268 printk(KERN_DEBUG 265 printk(KERN_DEBUG
@@ -271,6 +268,16 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
271 return; 268 return;
272 } 269 }
273 270
271 /* early Pentium M models use different method for enabling TM2 */
272 if (cpu_has(c, X86_FEATURE_TM2)) {
273 if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) {
274 rdmsr(MSR_THERM2_CTL, l, h);
275 if (l & MSR_THERM2_CTL_TM_SELECT)
276 tm2 = 1;
277 } else if (l & MSR_IA32_MISC_ENABLE_TM2)
278 tm2 = 1;
279 }
280
274 /* We'll mask the thermal vector in the lapic till we're ready: */ 281 /* We'll mask the thermal vector in the lapic till we're ready: */
275 h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; 282 h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED;
276 apic_write(APIC_LVTTHMR, h); 283 apic_write(APIC_LVTTHMR, h);
diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c
index ee2331b0e58f..33af14110dfd 100644
--- a/arch/x86/kernel/cpu/mtrr/amd.c
+++ b/arch/x86/kernel/cpu/mtrr/amd.c
@@ -7,15 +7,15 @@
7 7
8static void 8static void
9amd_get_mtrr(unsigned int reg, unsigned long *base, 9amd_get_mtrr(unsigned int reg, unsigned long *base,
10 unsigned long *size, mtrr_type * type) 10 unsigned long *size, mtrr_type *type)
11{ 11{
12 unsigned long low, high; 12 unsigned long low, high;
13 13
14 rdmsr(MSR_K6_UWCCR, low, high); 14 rdmsr(MSR_K6_UWCCR, low, high);
15 /* Upper dword is region 1, lower is region 0 */ 15 /* Upper dword is region 1, lower is region 0 */
16 if (reg == 1) 16 if (reg == 1)
17 low = high; 17 low = high;
18 /* The base masks off on the right alignment */ 18 /* The base masks off on the right alignment */
19 *base = (low & 0xFFFE0000) >> PAGE_SHIFT; 19 *base = (low & 0xFFFE0000) >> PAGE_SHIFT;
20 *type = 0; 20 *type = 0;
21 if (low & 1) 21 if (low & 1)
@@ -27,74 +27,81 @@ amd_get_mtrr(unsigned int reg, unsigned long *base,
27 return; 27 return;
28 } 28 }
29 /* 29 /*
30 * This needs a little explaining. The size is stored as an 30 * This needs a little explaining. The size is stored as an
31 * inverted mask of bits of 128K granularity 15 bits long offset 31 * inverted mask of bits of 128K granularity 15 bits long offset
32 * 2 bits 32 * 2 bits.
33 * 33 *
34 * So to get a size we do invert the mask and add 1 to the lowest 34 * So to get a size we do invert the mask and add 1 to the lowest
35 * mask bit (4 as its 2 bits in). This gives us a size we then shift 35 * mask bit (4 as its 2 bits in). This gives us a size we then shift
36 * to turn into 128K blocks 36 * to turn into 128K blocks.
37 * 37 *
38 * eg 111 1111 1111 1100 is 512K 38 * eg 111 1111 1111 1100 is 512K
39 * 39 *
40 * invert 000 0000 0000 0011 40 * invert 000 0000 0000 0011
41 * +1 000 0000 0000 0100 41 * +1 000 0000 0000 0100
42 * *128K ... 42 * *128K ...
43 */ 43 */
44 low = (~low) & 0x1FFFC; 44 low = (~low) & 0x1FFFC;
45 *size = (low + 4) << (15 - PAGE_SHIFT); 45 *size = (low + 4) << (15 - PAGE_SHIFT);
46 return;
47} 46}
48 47
49static void amd_set_mtrr(unsigned int reg, unsigned long base, 48/**
50 unsigned long size, mtrr_type type) 49 * amd_set_mtrr - Set variable MTRR register on the local CPU.
51/* [SUMMARY] Set variable MTRR register on the local CPU. 50 *
52 <reg> The register to set. 51 * @reg The register to set.
53 <base> The base address of the region. 52 * @base The base address of the region.
54 <size> The size of the region. If this is 0 the region is disabled. 53 * @size The size of the region. If this is 0 the region is disabled.
55 <type> The type of the region. 54 * @type The type of the region.
56 [RETURNS] Nothing. 55 *
57*/ 56 * Returns nothing.
57 */
58static void
59amd_set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
58{ 60{
59 u32 regs[2]; 61 u32 regs[2];
60 62
61 /* 63 /*
62 * Low is MTRR0 , High MTRR 1 64 * Low is MTRR0, High MTRR 1
63 */ 65 */
64 rdmsr(MSR_K6_UWCCR, regs[0], regs[1]); 66 rdmsr(MSR_K6_UWCCR, regs[0], regs[1]);
65 /* 67 /*
66 * Blank to disable 68 * Blank to disable
67 */ 69 */
68 if (size == 0) 70 if (size == 0) {
69 regs[reg] = 0; 71 regs[reg] = 0;
70 else 72 } else {
71 /* Set the register to the base, the type (off by one) and an 73 /*
72 inverted bitmask of the size The size is the only odd 74 * Set the register to the base, the type (off by one) and an
73 bit. We are fed say 512K We invert this and we get 111 1111 75 * inverted bitmask of the size The size is the only odd
74 1111 1011 but if you subtract one and invert you get the 76 * bit. We are fed say 512K We invert this and we get 111 1111
75 desired 111 1111 1111 1100 mask 77 * 1111 1011 but if you subtract one and invert you get the
76 78 * desired 111 1111 1111 1100 mask
77 But ~(x - 1) == ~x + 1 == -x. Two's complement rocks! */ 79 *
80 * But ~(x - 1) == ~x + 1 == -x. Two's complement rocks!
81 */
78 regs[reg] = (-size >> (15 - PAGE_SHIFT) & 0x0001FFFC) 82 regs[reg] = (-size >> (15 - PAGE_SHIFT) & 0x0001FFFC)
79 | (base << PAGE_SHIFT) | (type + 1); 83 | (base << PAGE_SHIFT) | (type + 1);
84 }
80 85
81 /* 86 /*
82 * The writeback rule is quite specific. See the manual. Its 87 * The writeback rule is quite specific. See the manual. Its
83 * disable local interrupts, write back the cache, set the mtrr 88 * disable local interrupts, write back the cache, set the mtrr
84 */ 89 */
85 wbinvd(); 90 wbinvd();
86 wrmsr(MSR_K6_UWCCR, regs[0], regs[1]); 91 wrmsr(MSR_K6_UWCCR, regs[0], regs[1]);
87} 92}
88 93
89static int amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type) 94static int
95amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
90{ 96{
91 /* Apply the K6 block alignment and size rules 97 /*
92 In order 98 * Apply the K6 block alignment and size rules
93 o Uncached or gathering only 99 * In order
94 o 128K or bigger block 100 * o Uncached or gathering only
95 o Power of 2 block 101 * o 128K or bigger block
96 o base suitably aligned to the power 102 * o Power of 2 block
97 */ 103 * o base suitably aligned to the power
104 */
98 if (type > MTRR_TYPE_WRCOMB || size < (1 << (17 - PAGE_SHIFT)) 105 if (type > MTRR_TYPE_WRCOMB || size < (1 << (17 - PAGE_SHIFT))
99 || (size & ~(size - 1)) - size || (base & (size - 1))) 106 || (size & ~(size - 1)) - size || (base & (size - 1)))
100 return -EINVAL; 107 return -EINVAL;
@@ -115,5 +122,3 @@ int __init amd_init_mtrr(void)
115 set_mtrr_ops(&amd_mtrr_ops); 122 set_mtrr_ops(&amd_mtrr_ops);
116 return 0; 123 return 0;
117} 124}
118
119//arch_initcall(amd_mtrr_init);
diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c
index cb9aa3a7a7ab..de89f14eff3a 100644
--- a/arch/x86/kernel/cpu/mtrr/centaur.c
+++ b/arch/x86/kernel/cpu/mtrr/centaur.c
@@ -1,7 +1,9 @@
1#include <linux/init.h> 1#include <linux/init.h>
2#include <linux/mm.h> 2#include <linux/mm.h>
3
3#include <asm/mtrr.h> 4#include <asm/mtrr.h>
4#include <asm/msr.h> 5#include <asm/msr.h>
6
5#include "mtrr.h" 7#include "mtrr.h"
6 8
7static struct { 9static struct {
@@ -12,25 +14,25 @@ static struct {
12static u8 centaur_mcr_reserved; 14static u8 centaur_mcr_reserved;
13static u8 centaur_mcr_type; /* 0 for winchip, 1 for winchip2 */ 15static u8 centaur_mcr_type; /* 0 for winchip, 1 for winchip2 */
14 16
15/* 17/**
16 * Report boot time MCR setups 18 * centaur_get_free_region - Get a free MTRR.
19 *
20 * @base: The starting (base) address of the region.
21 * @size: The size (in bytes) of the region.
22 *
23 * Returns: the index of the region on success, else -1 on error.
17 */ 24 */
18
19static int 25static int
20centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg) 26centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg)
21/* [SUMMARY] Get a free MTRR.
22 <base> The starting (base) address of the region.
23 <size> The size (in bytes) of the region.
24 [RETURNS] The index of the region on success, else -1 on error.
25*/
26{ 27{
27 int i, max;
28 mtrr_type ltype;
29 unsigned long lbase, lsize; 28 unsigned long lbase, lsize;
29 mtrr_type ltype;
30 int i, max;
30 31
31 max = num_var_ranges; 32 max = num_var_ranges;
32 if (replace_reg >= 0 && replace_reg < max) 33 if (replace_reg >= 0 && replace_reg < max)
33 return replace_reg; 34 return replace_reg;
35
34 for (i = 0; i < max; ++i) { 36 for (i = 0; i < max; ++i) {
35 if (centaur_mcr_reserved & (1 << i)) 37 if (centaur_mcr_reserved & (1 << i))
36 continue; 38 continue;
@@ -38,11 +40,14 @@ centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg)
38 if (lsize == 0) 40 if (lsize == 0)
39 return i; 41 return i;
40 } 42 }
43
41 return -ENOSPC; 44 return -ENOSPC;
42} 45}
43 46
44void 47/*
45mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) 48 * Report boot time MCR setups
49 */
50void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi)
46{ 51{
47 centaur_mcr[mcr].low = lo; 52 centaur_mcr[mcr].low = lo;
48 centaur_mcr[mcr].high = hi; 53 centaur_mcr[mcr].high = hi;
@@ -54,33 +59,35 @@ centaur_get_mcr(unsigned int reg, unsigned long *base,
54{ 59{
55 *base = centaur_mcr[reg].high >> PAGE_SHIFT; 60 *base = centaur_mcr[reg].high >> PAGE_SHIFT;
56 *size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT; 61 *size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT;
57 *type = MTRR_TYPE_WRCOMB; /* If it is there, it is write-combining */ 62 *type = MTRR_TYPE_WRCOMB; /* write-combining */
63
58 if (centaur_mcr_type == 1 && ((centaur_mcr[reg].low & 31) & 2)) 64 if (centaur_mcr_type == 1 && ((centaur_mcr[reg].low & 31) & 2))
59 *type = MTRR_TYPE_UNCACHABLE; 65 *type = MTRR_TYPE_UNCACHABLE;
60 if (centaur_mcr_type == 1 && (centaur_mcr[reg].low & 31) == 25) 66 if (centaur_mcr_type == 1 && (centaur_mcr[reg].low & 31) == 25)
61 *type = MTRR_TYPE_WRBACK; 67 *type = MTRR_TYPE_WRBACK;
62 if (centaur_mcr_type == 0 && (centaur_mcr[reg].low & 31) == 31) 68 if (centaur_mcr_type == 0 && (centaur_mcr[reg].low & 31) == 31)
63 *type = MTRR_TYPE_WRBACK; 69 *type = MTRR_TYPE_WRBACK;
64
65} 70}
66 71
67static void centaur_set_mcr(unsigned int reg, unsigned long base, 72static void
68 unsigned long size, mtrr_type type) 73centaur_set_mcr(unsigned int reg, unsigned long base,
74 unsigned long size, mtrr_type type)
69{ 75{
70 unsigned long low, high; 76 unsigned long low, high;
71 77
72 if (size == 0) { 78 if (size == 0) {
73 /* Disable */ 79 /* Disable */
74 high = low = 0; 80 high = low = 0;
75 } else { 81 } else {
76 high = base << PAGE_SHIFT; 82 high = base << PAGE_SHIFT;
77 if (centaur_mcr_type == 0) 83 if (centaur_mcr_type == 0) {
78 low = -size << PAGE_SHIFT | 0x1f; /* only support write-combining... */ 84 /* Only support write-combining... */
79 else { 85 low = -size << PAGE_SHIFT | 0x1f;
86 } else {
80 if (type == MTRR_TYPE_UNCACHABLE) 87 if (type == MTRR_TYPE_UNCACHABLE)
81 low = -size << PAGE_SHIFT | 0x02; /* NC */ 88 low = -size << PAGE_SHIFT | 0x02; /* NC */
82 else 89 else
83 low = -size << PAGE_SHIFT | 0x09; /* WWO,WC */ 90 low = -size << PAGE_SHIFT | 0x09; /* WWO, WC */
84 } 91 }
85 } 92 }
86 centaur_mcr[reg].high = high; 93 centaur_mcr[reg].high = high;
@@ -88,118 +95,16 @@ static void centaur_set_mcr(unsigned int reg, unsigned long base,
88 wrmsr(MSR_IDT_MCR0 + reg, low, high); 95 wrmsr(MSR_IDT_MCR0 + reg, low, high);
89} 96}
90 97
91#if 0 98static int
92/* 99centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int type)
93 * Initialise the later (saner) Winchip MCR variant. In this version
94 * the BIOS can pass us the registers it has used (but not their values)
95 * and the control register is read/write
96 */
97
98static void __init
99centaur_mcr1_init(void)
100{
101 unsigned i;
102 u32 lo, hi;
103
104 /* Unfortunately, MCR's are read-only, so there is no way to
105 * find out what the bios might have done.
106 */
107
108 rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
109 if (((lo >> 17) & 7) == 1) { /* Type 1 Winchip2 MCR */
110 lo &= ~0x1C0; /* clear key */
111 lo |= 0x040; /* set key to 1 */
112 wrmsr(MSR_IDT_MCR_CTRL, lo, hi); /* unlock MCR */
113 }
114
115 centaur_mcr_type = 1;
116
117 /*
118 * Clear any unconfigured MCR's.
119 */
120
121 for (i = 0; i < 8; ++i) {
122 if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) {
123 if (!(lo & (1 << (9 + i))))
124 wrmsr(MSR_IDT_MCR0 + i, 0, 0);
125 else
126 /*
127 * If the BIOS set up an MCR we cannot see it
128 * but we don't wish to obliterate it
129 */
130 centaur_mcr_reserved |= (1 << i);
131 }
132 }
133 /*
134 * Throw the main write-combining switch...
135 * However if OOSTORE is enabled then people have already done far
136 * cleverer things and we should behave.
137 */
138
139 lo |= 15; /* Write combine enables */
140 wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
141}
142
143/*
144 * Initialise the original winchip with read only MCR registers
145 * no used bitmask for the BIOS to pass on and write only control
146 */
147
148static void __init
149centaur_mcr0_init(void)
150{
151 unsigned i;
152
153 /* Unfortunately, MCR's are read-only, so there is no way to
154 * find out what the bios might have done.
155 */
156
157 /* Clear any unconfigured MCR's.
158 * This way we are sure that the centaur_mcr array contains the actual
159 * values. The disadvantage is that any BIOS tweaks are thus undone.
160 *
161 */
162 for (i = 0; i < 8; ++i) {
163 if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0)
164 wrmsr(MSR_IDT_MCR0 + i, 0, 0);
165 }
166
167 wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); /* Write only */
168}
169
170/*
171 * Initialise Winchip series MCR registers
172 */
173
174static void __init
175centaur_mcr_init(void)
176{
177 struct set_mtrr_context ctxt;
178
179 set_mtrr_prepare_save(&ctxt);
180 set_mtrr_cache_disable(&ctxt);
181
182 if (boot_cpu_data.x86_model == 4)
183 centaur_mcr0_init();
184 else if (boot_cpu_data.x86_model == 8 || boot_cpu_data.x86_model == 9)
185 centaur_mcr1_init();
186
187 set_mtrr_done(&ctxt);
188}
189#endif
190
191static int centaur_validate_add_page(unsigned long base,
192 unsigned long size, unsigned int type)
193{ 100{
194 /* 101 /*
195 * FIXME: Winchip2 supports uncached 102 * FIXME: Winchip2 supports uncached
196 */ 103 */
197 if (type != MTRR_TYPE_WRCOMB && 104 if (type != MTRR_TYPE_WRCOMB &&
198 (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) { 105 (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) {
199 printk(KERN_WARNING 106 pr_warning("mtrr: only write-combining%s supported\n",
200 "mtrr: only write-combining%s supported\n", 107 centaur_mcr_type ? " and uncacheable are" : " is");
201 centaur_mcr_type ? " and uncacheable are"
202 : " is");
203 return -EINVAL; 108 return -EINVAL;
204 } 109 }
205 return 0; 110 return 0;
@@ -207,7 +112,6 @@ static int centaur_validate_add_page(unsigned long base,
207 112
208static struct mtrr_ops centaur_mtrr_ops = { 113static struct mtrr_ops centaur_mtrr_ops = {
209 .vendor = X86_VENDOR_CENTAUR, 114 .vendor = X86_VENDOR_CENTAUR,
210// .init = centaur_mcr_init,
211 .set = centaur_set_mcr, 115 .set = centaur_set_mcr,
212 .get = centaur_get_mcr, 116 .get = centaur_get_mcr,
213 .get_free_region = centaur_get_free_region, 117 .get_free_region = centaur_get_free_region,
@@ -220,5 +124,3 @@ int __init centaur_init_mtrr(void)
220 set_mtrr_ops(&centaur_mtrr_ops); 124 set_mtrr_ops(&centaur_mtrr_ops);
221 return 0; 125 return 0;
222} 126}
223
224//arch_initcall(centaur_init_mtrr);
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c
index 1d584a18a50d..315738c74aad 100644
--- a/arch/x86/kernel/cpu/mtrr/cleanup.c
+++ b/arch/x86/kernel/cpu/mtrr/cleanup.c
@@ -1,51 +1,75 @@
1/* MTRR (Memory Type Range Register) cleanup 1/*
2 2 * MTRR (Memory Type Range Register) cleanup
3 Copyright (C) 2009 Yinghai Lu 3 *
4 4 * Copyright (C) 2009 Yinghai Lu
5 This library is free software; you can redistribute it and/or 5 *
6 modify it under the terms of the GNU Library General Public 6 * This library is free software; you can redistribute it and/or
7 License as published by the Free Software Foundation; either 7 * modify it under the terms of the GNU Library General Public
8 version 2 of the License, or (at your option) any later version. 8 * License as published by the Free Software Foundation; either
9 9 * version 2 of the License, or (at your option) any later version.
10 This library is distributed in the hope that it will be useful, 10 *
11 but WITHOUT ANY WARRANTY; without even the implied warranty of 11 * This library is distributed in the hope that it will be useful,
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 Library General Public License for more details. 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 14 * Library General Public License for more details.
15 You should have received a copy of the GNU Library General Public 15 *
16 License along with this library; if not, write to the Free 16 * You should have received a copy of the GNU Library General Public
17 Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 17 * License along with this library; if not, write to the Free
18*/ 18 * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 19 */
20#include <linux/module.h> 20#include <linux/module.h>
21#include <linux/init.h> 21#include <linux/init.h>
22#include <linux/pci.h> 22#include <linux/pci.h>
23#include <linux/smp.h> 23#include <linux/smp.h>
24#include <linux/cpu.h> 24#include <linux/cpu.h>
25#include <linux/mutex.h>
26#include <linux/sort.h> 25#include <linux/sort.h>
26#include <linux/mutex.h>
27#include <linux/uaccess.h>
28#include <linux/kvm_para.h>
27 29
30#include <asm/processor.h>
28#include <asm/e820.h> 31#include <asm/e820.h>
29#include <asm/mtrr.h> 32#include <asm/mtrr.h>
30#include <asm/uaccess.h>
31#include <asm/processor.h>
32#include <asm/msr.h> 33#include <asm/msr.h>
33#include <asm/kvm_para.h>
34#include "mtrr.h"
35 34
36/* should be related to MTRR_VAR_RANGES nums */ 35#include "mtrr.h"
37#define RANGE_NUM 256
38 36
39struct res_range { 37struct res_range {
40 unsigned long start; 38 unsigned long start;
41 unsigned long end; 39 unsigned long end;
40};
41
42struct var_mtrr_range_state {
43 unsigned long base_pfn;
44 unsigned long size_pfn;
45 mtrr_type type;
46};
47
48struct var_mtrr_state {
49 unsigned long range_startk;
50 unsigned long range_sizek;
51 unsigned long chunk_sizek;
52 unsigned long gran_sizek;
53 unsigned int reg;
42}; 54};
43 55
56/* Should be related to MTRR_VAR_RANGES nums */
57#define RANGE_NUM 256
58
59static struct res_range __initdata range[RANGE_NUM];
60static int __initdata nr_range;
61
62static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
63
64static int __initdata debug_print;
65#define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0)
66
67
44static int __init 68static int __init
45add_range(struct res_range *range, int nr_range, unsigned long start, 69add_range(struct res_range *range, int nr_range,
46 unsigned long end) 70 unsigned long start, unsigned long end)
47{ 71{
48 /* out of slots */ 72 /* Out of slots: */
49 if (nr_range >= RANGE_NUM) 73 if (nr_range >= RANGE_NUM)
50 return nr_range; 74 return nr_range;
51 75
@@ -58,12 +82,12 @@ add_range(struct res_range *range, int nr_range, unsigned long start,
58} 82}
59 83
60static int __init 84static int __init
61add_range_with_merge(struct res_range *range, int nr_range, unsigned long start, 85add_range_with_merge(struct res_range *range, int nr_range,
62 unsigned long end) 86 unsigned long start, unsigned long end)
63{ 87{
64 int i; 88 int i;
65 89
66 /* try to merge it with old one */ 90 /* Try to merge it with old one: */
67 for (i = 0; i < nr_range; i++) { 91 for (i = 0; i < nr_range; i++) {
68 unsigned long final_start, final_end; 92 unsigned long final_start, final_end;
69 unsigned long common_start, common_end; 93 unsigned long common_start, common_end;
@@ -84,7 +108,7 @@ add_range_with_merge(struct res_range *range, int nr_range, unsigned long start,
84 return nr_range; 108 return nr_range;
85 } 109 }
86 110
87 /* need to add that */ 111 /* Need to add it: */
88 return add_range(range, nr_range, start, end); 112 return add_range(range, nr_range, start, end);
89} 113}
90 114
@@ -117,7 +141,7 @@ subtract_range(struct res_range *range, unsigned long start, unsigned long end)
117 } 141 }
118 142
119 if (start > range[j].start && end < range[j].end) { 143 if (start > range[j].start && end < range[j].end) {
120 /* find the new spare */ 144 /* Find the new spare: */
121 for (i = 0; i < RANGE_NUM; i++) { 145 for (i = 0; i < RANGE_NUM; i++) {
122 if (range[i].end == 0) 146 if (range[i].end == 0)
123 break; 147 break;
@@ -146,14 +170,8 @@ static int __init cmp_range(const void *x1, const void *x2)
146 return start1 - start2; 170 return start1 - start2;
147} 171}
148 172
149struct var_mtrr_range_state { 173#define BIOS_BUG_MSG KERN_WARNING \
150 unsigned long base_pfn; 174 "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n"
151 unsigned long size_pfn;
152 mtrr_type type;
153};
154
155static struct var_mtrr_range_state __initdata range_state[RANGE_NUM];
156static int __initdata debug_print;
157 175
158static int __init 176static int __init
159x86_get_mtrr_mem_range(struct res_range *range, int nr_range, 177x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
@@ -180,7 +198,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
180 range[i].start, range[i].end + 1); 198 range[i].start, range[i].end + 1);
181 } 199 }
182 200
183 /* take out UC ranges */ 201 /* Take out UC ranges: */
184 for (i = 0; i < num_var_ranges; i++) { 202 for (i = 0; i < num_var_ranges; i++) {
185 type = range_state[i].type; 203 type = range_state[i].type;
186 if (type != MTRR_TYPE_UNCACHABLE && 204 if (type != MTRR_TYPE_UNCACHABLE &&
@@ -193,9 +211,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
193 if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed && 211 if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed &&
194 (mtrr_state.enabled & 1)) { 212 (mtrr_state.enabled & 1)) {
195 /* Var MTRR contains UC entry below 1M? Skip it: */ 213 /* Var MTRR contains UC entry below 1M? Skip it: */
196 printk(KERN_WARNING "WARNING: BIOS bug: VAR MTRR %d " 214 printk(BIOS_BUG_MSG, i);
197 "contains strange UC entry under 1M, check "
198 "with your system vendor!\n", i);
199 if (base + size <= (1<<(20-PAGE_SHIFT))) 215 if (base + size <= (1<<(20-PAGE_SHIFT)))
200 continue; 216 continue;
201 size -= (1<<(20-PAGE_SHIFT)) - base; 217 size -= (1<<(20-PAGE_SHIFT)) - base;
@@ -237,17 +253,13 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range,
237 return nr_range; 253 return nr_range;
238} 254}
239 255
240static struct res_range __initdata range[RANGE_NUM];
241static int __initdata nr_range;
242
243#ifdef CONFIG_MTRR_SANITIZER 256#ifdef CONFIG_MTRR_SANITIZER
244 257
245static unsigned long __init sum_ranges(struct res_range *range, int nr_range) 258static unsigned long __init sum_ranges(struct res_range *range, int nr_range)
246{ 259{
247 unsigned long sum; 260 unsigned long sum = 0;
248 int i; 261 int i;
249 262
250 sum = 0;
251 for (i = 0; i < nr_range; i++) 263 for (i = 0; i < nr_range; i++)
252 sum += range[i].end + 1 - range[i].start; 264 sum += range[i].end + 1 - range[i].start;
253 265
@@ -278,17 +290,9 @@ static int __init mtrr_cleanup_debug_setup(char *str)
278} 290}
279early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup); 291early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup);
280 292
281struct var_mtrr_state {
282 unsigned long range_startk;
283 unsigned long range_sizek;
284 unsigned long chunk_sizek;
285 unsigned long gran_sizek;
286 unsigned int reg;
287};
288
289static void __init 293static void __init
290set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, 294set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
291 unsigned char type, unsigned int address_bits) 295 unsigned char type, unsigned int address_bits)
292{ 296{
293 u32 base_lo, base_hi, mask_lo, mask_hi; 297 u32 base_lo, base_hi, mask_lo, mask_hi;
294 u64 base, mask; 298 u64 base, mask;
@@ -301,7 +305,7 @@ set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
301 mask = (1ULL << address_bits) - 1; 305 mask = (1ULL << address_bits) - 1;
302 mask &= ~((((u64)sizek) << 10) - 1); 306 mask &= ~((((u64)sizek) << 10) - 1);
303 307
304 base = ((u64)basek) << 10; 308 base = ((u64)basek) << 10;
305 309
306 base |= type; 310 base |= type;
307 mask |= 0x800; 311 mask |= 0x800;
@@ -317,15 +321,14 @@ set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
317 321
318static void __init 322static void __init
319save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, 323save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek,
320 unsigned char type) 324 unsigned char type)
321{ 325{
322 range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10); 326 range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10);
323 range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10); 327 range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10);
324 range_state[reg].type = type; 328 range_state[reg].type = type;
325} 329}
326 330
327static void __init 331static void __init set_var_mtrr_all(unsigned int address_bits)
328set_var_mtrr_all(unsigned int address_bits)
329{ 332{
330 unsigned long basek, sizek; 333 unsigned long basek, sizek;
331 unsigned char type; 334 unsigned char type;
@@ -342,11 +345,11 @@ set_var_mtrr_all(unsigned int address_bits)
342 345
343static unsigned long to_size_factor(unsigned long sizek, char *factorp) 346static unsigned long to_size_factor(unsigned long sizek, char *factorp)
344{ 347{
345 char factor;
346 unsigned long base = sizek; 348 unsigned long base = sizek;
349 char factor;
347 350
348 if (base & ((1<<10) - 1)) { 351 if (base & ((1<<10) - 1)) {
349 /* not MB alignment */ 352 /* Not MB-aligned: */
350 factor = 'K'; 353 factor = 'K';
351 } else if (base & ((1<<20) - 1)) { 354 } else if (base & ((1<<20) - 1)) {
352 factor = 'M'; 355 factor = 'M';
@@ -372,11 +375,12 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,
372 unsigned long max_align, align; 375 unsigned long max_align, align;
373 unsigned long sizek; 376 unsigned long sizek;
374 377
375 /* Compute the maximum size I can make a range */ 378 /* Compute the maximum size with which we can make a range: */
376 if (range_startk) 379 if (range_startk)
377 max_align = ffs(range_startk) - 1; 380 max_align = ffs(range_startk) - 1;
378 else 381 else
379 max_align = 32; 382 max_align = 32;
383
380 align = fls(range_sizek) - 1; 384 align = fls(range_sizek) - 1;
381 if (align > max_align) 385 if (align > max_align)
382 align = max_align; 386 align = max_align;
@@ -386,11 +390,10 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk,
386 char start_factor = 'K', size_factor = 'K'; 390 char start_factor = 'K', size_factor = 'K';
387 unsigned long start_base, size_base; 391 unsigned long start_base, size_base;
388 392
389 start_base = to_size_factor(range_startk, 393 start_base = to_size_factor(range_startk, &start_factor);
390 &start_factor), 394 size_base = to_size_factor(sizek, &size_factor);
391 size_base = to_size_factor(sizek, &size_factor),
392 395
393 printk(KERN_DEBUG "Setting variable MTRR %d, " 396 Dprintk("Setting variable MTRR %d, "
394 "base: %ld%cB, range: %ld%cB, type %s\n", 397 "base: %ld%cB, range: %ld%cB, type %s\n",
395 reg, start_base, start_factor, 398 reg, start_base, start_factor,
396 size_base, size_factor, 399 size_base, size_factor,
@@ -425,10 +428,11 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
425 chunk_sizek = state->chunk_sizek; 428 chunk_sizek = state->chunk_sizek;
426 gran_sizek = state->gran_sizek; 429 gran_sizek = state->gran_sizek;
427 430
428 /* align with gran size, prevent small block used up MTRRs */ 431 /* Align with gran size, prevent small block used up MTRRs: */
429 range_basek = ALIGN(state->range_startk, gran_sizek); 432 range_basek = ALIGN(state->range_startk, gran_sizek);
430 if ((range_basek > basek) && basek) 433 if ((range_basek > basek) && basek)
431 return second_sizek; 434 return second_sizek;
435
432 state->range_sizek -= (range_basek - state->range_startk); 436 state->range_sizek -= (range_basek - state->range_startk);
433 range_sizek = ALIGN(state->range_sizek, gran_sizek); 437 range_sizek = ALIGN(state->range_sizek, gran_sizek);
434 438
@@ -439,22 +443,21 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
439 } 443 }
440 state->range_sizek = range_sizek; 444 state->range_sizek = range_sizek;
441 445
442 /* try to append some small hole */ 446 /* Try to append some small hole: */
443 range0_basek = state->range_startk; 447 range0_basek = state->range_startk;
444 range0_sizek = ALIGN(state->range_sizek, chunk_sizek); 448 range0_sizek = ALIGN(state->range_sizek, chunk_sizek);
445 449
446 /* no increase */ 450 /* No increase: */
447 if (range0_sizek == state->range_sizek) { 451 if (range0_sizek == state->range_sizek) {
448 if (debug_print) 452 Dprintk("rangeX: %016lx - %016lx\n",
449 printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", 453 range0_basek<<10,
450 range0_basek<<10, 454 (range0_basek + state->range_sizek)<<10);
451 (range0_basek + state->range_sizek)<<10);
452 state->reg = range_to_mtrr(state->reg, range0_basek, 455 state->reg = range_to_mtrr(state->reg, range0_basek,
453 state->range_sizek, MTRR_TYPE_WRBACK); 456 state->range_sizek, MTRR_TYPE_WRBACK);
454 return 0; 457 return 0;
455 } 458 }
456 459
457 /* only cut back, when it is not the last */ 460 /* Only cut back when it is not the last: */
458 if (sizek) { 461 if (sizek) {
459 while (range0_basek + range0_sizek > (basek + sizek)) { 462 while (range0_basek + range0_sizek > (basek + sizek)) {
460 if (range0_sizek >= chunk_sizek) 463 if (range0_sizek >= chunk_sizek)
@@ -470,16 +473,16 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek,
470second_try: 473second_try:
471 range_basek = range0_basek + range0_sizek; 474 range_basek = range0_basek + range0_sizek;
472 475
473 /* one hole in the middle */ 476 /* One hole in the middle: */
474 if (range_basek > basek && range_basek <= (basek + sizek)) 477 if (range_basek > basek && range_basek <= (basek + sizek))
475 second_sizek = range_basek - basek; 478 second_sizek = range_basek - basek;
476 479
477 if (range0_sizek > state->range_sizek) { 480 if (range0_sizek > state->range_sizek) {
478 481
479 /* one hole in middle or at end */ 482 /* One hole in middle or at the end: */
480 hole_sizek = range0_sizek - state->range_sizek - second_sizek; 483 hole_sizek = range0_sizek - state->range_sizek - second_sizek;
481 484
482 /* hole size should be less than half of range0 size */ 485 /* Hole size should be less than half of range0 size: */
483 if (hole_sizek >= (range0_sizek >> 1) && 486 if (hole_sizek >= (range0_sizek >> 1) &&
484 range0_sizek >= chunk_sizek) { 487 range0_sizek >= chunk_sizek) {
485 range0_sizek -= chunk_sizek; 488 range0_sizek -= chunk_sizek;
@@ -491,32 +494,30 @@ second_try:
491 } 494 }
492 495
493 if (range0_sizek) { 496 if (range0_sizek) {
494 if (debug_print) 497 Dprintk("range0: %016lx - %016lx\n",
495 printk(KERN_DEBUG "range0: %016lx - %016lx\n", 498 range0_basek<<10,
496 range0_basek<<10, 499 (range0_basek + range0_sizek)<<10);
497 (range0_basek + range0_sizek)<<10);
498 state->reg = range_to_mtrr(state->reg, range0_basek, 500 state->reg = range_to_mtrr(state->reg, range0_basek,
499 range0_sizek, MTRR_TYPE_WRBACK); 501 range0_sizek, MTRR_TYPE_WRBACK);
500 } 502 }
501 503
502 if (range0_sizek < state->range_sizek) { 504 if (range0_sizek < state->range_sizek) {
503 /* need to handle left over */ 505 /* Need to handle left over range: */
504 range_sizek = state->range_sizek - range0_sizek; 506 range_sizek = state->range_sizek - range0_sizek;
505 507
506 if (debug_print) 508 Dprintk("range: %016lx - %016lx\n",
507 printk(KERN_DEBUG "range: %016lx - %016lx\n", 509 range_basek<<10,
508 range_basek<<10, 510 (range_basek + range_sizek)<<10);
509 (range_basek + range_sizek)<<10); 511
510 state->reg = range_to_mtrr(state->reg, range_basek, 512 state->reg = range_to_mtrr(state->reg, range_basek,
511 range_sizek, MTRR_TYPE_WRBACK); 513 range_sizek, MTRR_TYPE_WRBACK);
512 } 514 }
513 515
514 if (hole_sizek) { 516 if (hole_sizek) {
515 hole_basek = range_basek - hole_sizek - second_sizek; 517 hole_basek = range_basek - hole_sizek - second_sizek;
516 if (debug_print) 518 Dprintk("hole: %016lx - %016lx\n",
517 printk(KERN_DEBUG "hole: %016lx - %016lx\n", 519 hole_basek<<10,
518 hole_basek<<10, 520 (hole_basek + hole_sizek)<<10);
519 (hole_basek + hole_sizek)<<10);
520 state->reg = range_to_mtrr(state->reg, hole_basek, 521 state->reg = range_to_mtrr(state->reg, hole_basek,
521 hole_sizek, MTRR_TYPE_UNCACHABLE); 522 hole_sizek, MTRR_TYPE_UNCACHABLE);
522 } 523 }
@@ -537,23 +538,23 @@ set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn,
537 basek = base_pfn << (PAGE_SHIFT - 10); 538 basek = base_pfn << (PAGE_SHIFT - 10);
538 sizek = size_pfn << (PAGE_SHIFT - 10); 539 sizek = size_pfn << (PAGE_SHIFT - 10);
539 540
540 /* See if I can merge with the last range */ 541 /* See if I can merge with the last range: */
541 if ((basek <= 1024) || 542 if ((basek <= 1024) ||
542 (state->range_startk + state->range_sizek == basek)) { 543 (state->range_startk + state->range_sizek == basek)) {
543 unsigned long endk = basek + sizek; 544 unsigned long endk = basek + sizek;
544 state->range_sizek = endk - state->range_startk; 545 state->range_sizek = endk - state->range_startk;
545 return; 546 return;
546 } 547 }
547 /* Write the range mtrrs */ 548 /* Write the range mtrrs: */
548 if (state->range_sizek != 0) 549 if (state->range_sizek != 0)
549 second_sizek = range_to_mtrr_with_hole(state, basek, sizek); 550 second_sizek = range_to_mtrr_with_hole(state, basek, sizek);
550 551
551 /* Allocate an msr */ 552 /* Allocate an msr: */
552 state->range_startk = basek + second_sizek; 553 state->range_startk = basek + second_sizek;
553 state->range_sizek = sizek - second_sizek; 554 state->range_sizek = sizek - second_sizek;
554} 555}
555 556
556/* mininum size of mtrr block that can take hole */ 557/* Mininum size of mtrr block that can take hole: */
557static u64 mtrr_chunk_size __initdata = (256ULL<<20); 558static u64 mtrr_chunk_size __initdata = (256ULL<<20);
558 559
559static int __init parse_mtrr_chunk_size_opt(char *p) 560static int __init parse_mtrr_chunk_size_opt(char *p)
@@ -565,7 +566,7 @@ static int __init parse_mtrr_chunk_size_opt(char *p)
565} 566}
566early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt); 567early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt);
567 568
568/* granity of mtrr of block */ 569/* Granularity of mtrr of block: */
569static u64 mtrr_gran_size __initdata; 570static u64 mtrr_gran_size __initdata;
570 571
571static int __init parse_mtrr_gran_size_opt(char *p) 572static int __init parse_mtrr_gran_size_opt(char *p)
@@ -577,7 +578,7 @@ static int __init parse_mtrr_gran_size_opt(char *p)
577} 578}
578early_param("mtrr_gran_size", parse_mtrr_gran_size_opt); 579early_param("mtrr_gran_size", parse_mtrr_gran_size_opt);
579 580
580static int nr_mtrr_spare_reg __initdata = 581static unsigned long nr_mtrr_spare_reg __initdata =
581 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT; 582 CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT;
582 583
583static int __init parse_mtrr_spare_reg(char *arg) 584static int __init parse_mtrr_spare_reg(char *arg)
@@ -586,7 +587,6 @@ static int __init parse_mtrr_spare_reg(char *arg)
586 nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0); 587 nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0);
587 return 0; 588 return 0;
588} 589}
589
590early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); 590early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg);
591 591
592static int __init 592static int __init
@@ -594,8 +594,8 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range,
594 u64 chunk_size, u64 gran_size) 594 u64 chunk_size, u64 gran_size)
595{ 595{
596 struct var_mtrr_state var_state; 596 struct var_mtrr_state var_state;
597 int i;
598 int num_reg; 597 int num_reg;
598 int i;
599 599
600 var_state.range_startk = 0; 600 var_state.range_startk = 0;
601 var_state.range_sizek = 0; 601 var_state.range_sizek = 0;
@@ -605,17 +605,18 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range,
605 605
606 memset(range_state, 0, sizeof(range_state)); 606 memset(range_state, 0, sizeof(range_state));
607 607
608 /* Write the range etc */ 608 /* Write the range: */
609 for (i = 0; i < nr_range; i++) 609 for (i = 0; i < nr_range; i++) {
610 set_var_mtrr_range(&var_state, range[i].start, 610 set_var_mtrr_range(&var_state, range[i].start,
611 range[i].end - range[i].start + 1); 611 range[i].end - range[i].start + 1);
612 }
612 613
613 /* Write the last range */ 614 /* Write the last range: */
614 if (var_state.range_sizek != 0) 615 if (var_state.range_sizek != 0)
615 range_to_mtrr_with_hole(&var_state, 0, 0); 616 range_to_mtrr_with_hole(&var_state, 0, 0);
616 617
617 num_reg = var_state.reg; 618 num_reg = var_state.reg;
618 /* Clear out the extra MTRR's */ 619 /* Clear out the extra MTRR's: */
619 while (var_state.reg < num_var_ranges) { 620 while (var_state.reg < num_var_ranges) {
620 save_var_mtrr(var_state.reg, 0, 0, 0); 621 save_var_mtrr(var_state.reg, 0, 0, 0);
621 var_state.reg++; 622 var_state.reg++;
@@ -625,11 +626,11 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range,
625} 626}
626 627
627struct mtrr_cleanup_result { 628struct mtrr_cleanup_result {
628 unsigned long gran_sizek; 629 unsigned long gran_sizek;
629 unsigned long chunk_sizek; 630 unsigned long chunk_sizek;
630 unsigned long lose_cover_sizek; 631 unsigned long lose_cover_sizek;
631 unsigned int num_reg; 632 unsigned int num_reg;
632 int bad; 633 int bad;
633}; 634};
634 635
635/* 636/*
@@ -645,10 +646,10 @@ static unsigned long __initdata min_loss_pfn[RANGE_NUM];
645 646
646static void __init print_out_mtrr_range_state(void) 647static void __init print_out_mtrr_range_state(void)
647{ 648{
648 int i;
649 char start_factor = 'K', size_factor = 'K'; 649 char start_factor = 'K', size_factor = 'K';
650 unsigned long start_base, size_base; 650 unsigned long start_base, size_base;
651 mtrr_type type; 651 mtrr_type type;
652 int i;
652 653
653 for (i = 0; i < num_var_ranges; i++) { 654 for (i = 0; i < num_var_ranges; i++) {
654 655
@@ -676,10 +677,10 @@ static int __init mtrr_need_cleanup(void)
676 int i; 677 int i;
677 mtrr_type type; 678 mtrr_type type;
678 unsigned long size; 679 unsigned long size;
679 /* extra one for all 0 */ 680 /* Extra one for all 0: */
680 int num[MTRR_NUM_TYPES + 1]; 681 int num[MTRR_NUM_TYPES + 1];
681 682
682 /* check entries number */ 683 /* Check entries number: */
683 memset(num, 0, sizeof(num)); 684 memset(num, 0, sizeof(num));
684 for (i = 0; i < num_var_ranges; i++) { 685 for (i = 0; i < num_var_ranges; i++) {
685 type = range_state[i].type; 686 type = range_state[i].type;
@@ -693,88 +694,86 @@ static int __init mtrr_need_cleanup(void)
693 num[type]++; 694 num[type]++;
694 } 695 }
695 696
696 /* check if we got UC entries */ 697 /* Check if we got UC entries: */
697 if (!num[MTRR_TYPE_UNCACHABLE]) 698 if (!num[MTRR_TYPE_UNCACHABLE])
698 return 0; 699 return 0;
699 700
700 /* check if we only had WB and UC */ 701 /* Check if we only had WB and UC */
701 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != 702 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
702 num_var_ranges - num[MTRR_NUM_TYPES]) 703 num_var_ranges - num[MTRR_NUM_TYPES])
703 return 0; 704 return 0;
704 705
705 return 1; 706 return 1;
706} 707}
707 708
708static unsigned long __initdata range_sums; 709static unsigned long __initdata range_sums;
709static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size, 710
710 unsigned long extra_remove_base, 711static void __init
711 unsigned long extra_remove_size, 712mtrr_calc_range_state(u64 chunk_size, u64 gran_size,
712 int i) 713 unsigned long x_remove_base,
714 unsigned long x_remove_size, int i)
713{ 715{
714 int num_reg;
715 static struct res_range range_new[RANGE_NUM]; 716 static struct res_range range_new[RANGE_NUM];
716 static int nr_range_new;
717 unsigned long range_sums_new; 717 unsigned long range_sums_new;
718 static int nr_range_new;
719 int num_reg;
718 720
719 /* convert ranges to var ranges state */ 721 /* Convert ranges to var ranges state: */
720 num_reg = x86_setup_var_mtrrs(range, nr_range, 722 num_reg = x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size);
721 chunk_size, gran_size);
722 723
723 /* we got new setting in range_state, check it */ 724 /* We got new setting in range_state, check it: */
724 memset(range_new, 0, sizeof(range_new)); 725 memset(range_new, 0, sizeof(range_new));
725 nr_range_new = x86_get_mtrr_mem_range(range_new, 0, 726 nr_range_new = x86_get_mtrr_mem_range(range_new, 0,
726 extra_remove_base, extra_remove_size); 727 x_remove_base, x_remove_size);
727 range_sums_new = sum_ranges(range_new, nr_range_new); 728 range_sums_new = sum_ranges(range_new, nr_range_new);
728 729
729 result[i].chunk_sizek = chunk_size >> 10; 730 result[i].chunk_sizek = chunk_size >> 10;
730 result[i].gran_sizek = gran_size >> 10; 731 result[i].gran_sizek = gran_size >> 10;
731 result[i].num_reg = num_reg; 732 result[i].num_reg = num_reg;
733
732 if (range_sums < range_sums_new) { 734 if (range_sums < range_sums_new) {
733 result[i].lose_cover_sizek = 735 result[i].lose_cover_sizek = (range_sums_new - range_sums) << PSHIFT;
734 (range_sums_new - range_sums) << PSHIFT;
735 result[i].bad = 1; 736 result[i].bad = 1;
736 } else 737 } else {
737 result[i].lose_cover_sizek = 738 result[i].lose_cover_sizek = (range_sums - range_sums_new) << PSHIFT;
738 (range_sums - range_sums_new) << PSHIFT; 739 }
739 740
740 /* double check it */ 741 /* Double check it: */
741 if (!result[i].bad && !result[i].lose_cover_sizek) { 742 if (!result[i].bad && !result[i].lose_cover_sizek) {
742 if (nr_range_new != nr_range || 743 if (nr_range_new != nr_range || memcmp(range, range_new, sizeof(range)))
743 memcmp(range, range_new, sizeof(range))) 744 result[i].bad = 1;
744 result[i].bad = 1;
745 } 745 }
746 746
747 if (!result[i].bad && (range_sums - range_sums_new < 747 if (!result[i].bad && (range_sums - range_sums_new < min_loss_pfn[num_reg]))
748 min_loss_pfn[num_reg])) { 748 min_loss_pfn[num_reg] = range_sums - range_sums_new;
749 min_loss_pfn[num_reg] =
750 range_sums - range_sums_new;
751 }
752} 749}
753 750
754static void __init mtrr_print_out_one_result(int i) 751static void __init mtrr_print_out_one_result(int i)
755{ 752{
756 char gran_factor, chunk_factor, lose_factor;
757 unsigned long gran_base, chunk_base, lose_base; 753 unsigned long gran_base, chunk_base, lose_base;
754 char gran_factor, chunk_factor, lose_factor;
758 755
759 gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), 756 gran_base = to_size_factor(result[i].gran_sizek, &gran_factor),
760 chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), 757 chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor),
761 lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), 758 lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor),
762 printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", 759
763 result[i].bad ? "*BAD*" : " ", 760 pr_info("%sgran_size: %ld%c \tchunk_size: %ld%c \t",
764 gran_base, gran_factor, chunk_base, chunk_factor); 761 result[i].bad ? "*BAD*" : " ",
765 printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", 762 gran_base, gran_factor, chunk_base, chunk_factor);
766 result[i].num_reg, result[i].bad ? "-" : "", 763 pr_cont("num_reg: %d \tlose cover RAM: %s%ld%c\n",
767 lose_base, lose_factor); 764 result[i].num_reg, result[i].bad ? "-" : "",
765 lose_base, lose_factor);
768} 766}
769 767
770static int __init mtrr_search_optimal_index(void) 768static int __init mtrr_search_optimal_index(void)
771{ 769{
772 int i;
773 int num_reg_good; 770 int num_reg_good;
774 int index_good; 771 int index_good;
772 int i;
775 773
776 if (nr_mtrr_spare_reg >= num_var_ranges) 774 if (nr_mtrr_spare_reg >= num_var_ranges)
777 nr_mtrr_spare_reg = num_var_ranges - 1; 775 nr_mtrr_spare_reg = num_var_ranges - 1;
776
778 num_reg_good = -1; 777 num_reg_good = -1;
779 for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { 778 for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) {
780 if (!min_loss_pfn[i]) 779 if (!min_loss_pfn[i])
@@ -796,24 +795,24 @@ static int __init mtrr_search_optimal_index(void)
796 return index_good; 795 return index_good;
797} 796}
798 797
799
800int __init mtrr_cleanup(unsigned address_bits) 798int __init mtrr_cleanup(unsigned address_bits)
801{ 799{
802 unsigned long extra_remove_base, extra_remove_size; 800 unsigned long x_remove_base, x_remove_size;
803 unsigned long base, size, def, dummy; 801 unsigned long base, size, def, dummy;
804 mtrr_type type;
805 u64 chunk_size, gran_size; 802 u64 chunk_size, gran_size;
803 mtrr_type type;
806 int index_good; 804 int index_good;
807 int i; 805 int i;
808 806
809 if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) 807 if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1)
810 return 0; 808 return 0;
809
811 rdmsr(MSR_MTRRdefType, def, dummy); 810 rdmsr(MSR_MTRRdefType, def, dummy);
812 def &= 0xff; 811 def &= 0xff;
813 if (def != MTRR_TYPE_UNCACHABLE) 812 if (def != MTRR_TYPE_UNCACHABLE)
814 return 0; 813 return 0;
815 814
816 /* get it and store it aside */ 815 /* Get it and store it aside: */
817 memset(range_state, 0, sizeof(range_state)); 816 memset(range_state, 0, sizeof(range_state));
818 for (i = 0; i < num_var_ranges; i++) { 817 for (i = 0; i < num_var_ranges; i++) {
819 mtrr_if->get(i, &base, &size, &type); 818 mtrr_if->get(i, &base, &size, &type);
@@ -822,29 +821,28 @@ int __init mtrr_cleanup(unsigned address_bits)
822 range_state[i].type = type; 821 range_state[i].type = type;
823 } 822 }
824 823
825 /* check if we need handle it and can handle it */ 824 /* Check if we need handle it and can handle it: */
826 if (!mtrr_need_cleanup()) 825 if (!mtrr_need_cleanup())
827 return 0; 826 return 0;
828 827
829 /* print original var MTRRs at first, for debugging: */ 828 /* Print original var MTRRs at first, for debugging: */
830 printk(KERN_DEBUG "original variable MTRRs\n"); 829 printk(KERN_DEBUG "original variable MTRRs\n");
831 print_out_mtrr_range_state(); 830 print_out_mtrr_range_state();
832 831
833 memset(range, 0, sizeof(range)); 832 memset(range, 0, sizeof(range));
834 extra_remove_size = 0; 833 x_remove_size = 0;
835 extra_remove_base = 1 << (32 - PAGE_SHIFT); 834 x_remove_base = 1 << (32 - PAGE_SHIFT);
836 if (mtrr_tom2) 835 if (mtrr_tom2)
837 extra_remove_size = 836 x_remove_size = (mtrr_tom2 >> PAGE_SHIFT) - x_remove_base;
838 (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base; 837
839 nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, 838 nr_range = x86_get_mtrr_mem_range(range, 0, x_remove_base, x_remove_size);
840 extra_remove_size);
841 /* 839 /*
842 * [0, 1M) should always be coverred by var mtrr with WB 840 * [0, 1M) should always be covered by var mtrr with WB
843 * and fixed mtrrs should take effective before var mtrr for it 841 * and fixed mtrrs should take effect before var mtrr for it:
844 */ 842 */
845 nr_range = add_range_with_merge(range, nr_range, 0, 843 nr_range = add_range_with_merge(range, nr_range, 0,
846 (1ULL<<(20 - PAGE_SHIFT)) - 1); 844 (1ULL<<(20 - PAGE_SHIFT)) - 1);
847 /* sort the ranges */ 845 /* Sort the ranges: */
848 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); 846 sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL);
849 847
850 range_sums = sum_ranges(range, nr_range); 848 range_sums = sum_ranges(range, nr_range);
@@ -854,7 +852,7 @@ int __init mtrr_cleanup(unsigned address_bits)
854 if (mtrr_chunk_size && mtrr_gran_size) { 852 if (mtrr_chunk_size && mtrr_gran_size) {
855 i = 0; 853 i = 0;
856 mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size, 854 mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size,
857 extra_remove_base, extra_remove_size, i); 855 x_remove_base, x_remove_size, i);
858 856
859 mtrr_print_out_one_result(i); 857 mtrr_print_out_one_result(i);
860 858
@@ -880,7 +878,7 @@ int __init mtrr_cleanup(unsigned address_bits)
880 continue; 878 continue;
881 879
882 mtrr_calc_range_state(chunk_size, gran_size, 880 mtrr_calc_range_state(chunk_size, gran_size,
883 extra_remove_base, extra_remove_size, i); 881 x_remove_base, x_remove_size, i);
884 if (debug_print) { 882 if (debug_print) {
885 mtrr_print_out_one_result(i); 883 mtrr_print_out_one_result(i);
886 printk(KERN_INFO "\n"); 884 printk(KERN_INFO "\n");
@@ -890,7 +888,7 @@ int __init mtrr_cleanup(unsigned address_bits)
890 } 888 }
891 } 889 }
892 890
893 /* try to find the optimal index */ 891 /* Try to find the optimal index: */
894 index_good = mtrr_search_optimal_index(); 892 index_good = mtrr_search_optimal_index();
895 893
896 if (index_good != -1) { 894 if (index_good != -1) {
@@ -898,7 +896,7 @@ int __init mtrr_cleanup(unsigned address_bits)
898 i = index_good; 896 i = index_good;
899 mtrr_print_out_one_result(i); 897 mtrr_print_out_one_result(i);
900 898
901 /* convert ranges to var ranges state */ 899 /* Convert ranges to var ranges state: */
902 chunk_size = result[i].chunk_sizek; 900 chunk_size = result[i].chunk_sizek;
903 chunk_size <<= 10; 901 chunk_size <<= 10;
904 gran_size = result[i].gran_sizek; 902 gran_size = result[i].gran_sizek;
@@ -941,8 +939,8 @@ early_param("disable_mtrr_trim", disable_mtrr_trim_setup);
941 * Note this won't check if the MTRRs < 4GB where the magic bit doesn't 939 * Note this won't check if the MTRRs < 4GB where the magic bit doesn't
942 * apply to are wrong, but so far we don't know of any such case in the wild. 940 * apply to are wrong, but so far we don't know of any such case in the wild.
943 */ 941 */
944#define Tom2Enabled (1U << 21) 942#define Tom2Enabled (1U << 21)
945#define Tom2ForceMemTypeWB (1U << 22) 943#define Tom2ForceMemTypeWB (1U << 22)
946 944
947int __init amd_special_default_mtrr(void) 945int __init amd_special_default_mtrr(void)
948{ 946{
@@ -952,7 +950,7 @@ int __init amd_special_default_mtrr(void)
952 return 0; 950 return 0;
953 if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11) 951 if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11)
954 return 0; 952 return 0;
955 /* In case some hypervisor doesn't pass SYSCFG through */ 953 /* In case some hypervisor doesn't pass SYSCFG through: */
956 if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) 954 if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0)
957 return 0; 955 return 0;
958 /* 956 /*
@@ -965,19 +963,21 @@ int __init amd_special_default_mtrr(void)
965 return 0; 963 return 0;
966} 964}
967 965
968static u64 __init real_trim_memory(unsigned long start_pfn, 966static u64 __init
969 unsigned long limit_pfn) 967real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn)
970{ 968{
971 u64 trim_start, trim_size; 969 u64 trim_start, trim_size;
970
972 trim_start = start_pfn; 971 trim_start = start_pfn;
973 trim_start <<= PAGE_SHIFT; 972 trim_start <<= PAGE_SHIFT;
973
974 trim_size = limit_pfn; 974 trim_size = limit_pfn;
975 trim_size <<= PAGE_SHIFT; 975 trim_size <<= PAGE_SHIFT;
976 trim_size -= trim_start; 976 trim_size -= trim_start;
977 977
978 return e820_update_range(trim_start, trim_size, E820_RAM, 978 return e820_update_range(trim_start, trim_size, E820_RAM, E820_RESERVED);
979 E820_RESERVED);
980} 979}
980
981/** 981/**
982 * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs 982 * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs
983 * @end_pfn: ending page frame number 983 * @end_pfn: ending page frame number
@@ -985,7 +985,7 @@ static u64 __init real_trim_memory(unsigned long start_pfn,
985 * Some buggy BIOSes don't setup the MTRRs properly for systems with certain 985 * Some buggy BIOSes don't setup the MTRRs properly for systems with certain
986 * memory configurations. This routine checks that the highest MTRR matches 986 * memory configurations. This routine checks that the highest MTRR matches
987 * the end of memory, to make sure the MTRRs having a write back type cover 987 * the end of memory, to make sure the MTRRs having a write back type cover
988 * all of the memory the kernel is intending to use. If not, it'll trim any 988 * all of the memory the kernel is intending to use. If not, it'll trim any
989 * memory off the end by adjusting end_pfn, removing it from the kernel's 989 * memory off the end by adjusting end_pfn, removing it from the kernel's
990 * allocation pools, warning the user with an obnoxious message. 990 * allocation pools, warning the user with an obnoxious message.
991 */ 991 */
@@ -994,21 +994,22 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
994 unsigned long i, base, size, highest_pfn = 0, def, dummy; 994 unsigned long i, base, size, highest_pfn = 0, def, dummy;
995 mtrr_type type; 995 mtrr_type type;
996 u64 total_trim_size; 996 u64 total_trim_size;
997
998 /* extra one for all 0 */ 997 /* extra one for all 0 */
999 int num[MTRR_NUM_TYPES + 1]; 998 int num[MTRR_NUM_TYPES + 1];
999
1000 /* 1000 /*
1001 * Make sure we only trim uncachable memory on machines that 1001 * Make sure we only trim uncachable memory on machines that
1002 * support the Intel MTRR architecture: 1002 * support the Intel MTRR architecture:
1003 */ 1003 */
1004 if (!is_cpu(INTEL) || disable_mtrr_trim) 1004 if (!is_cpu(INTEL) || disable_mtrr_trim)
1005 return 0; 1005 return 0;
1006
1006 rdmsr(MSR_MTRRdefType, def, dummy); 1007 rdmsr(MSR_MTRRdefType, def, dummy);
1007 def &= 0xff; 1008 def &= 0xff;
1008 if (def != MTRR_TYPE_UNCACHABLE) 1009 if (def != MTRR_TYPE_UNCACHABLE)
1009 return 0; 1010 return 0;
1010 1011
1011 /* get it and store it aside */ 1012 /* Get it and store it aside: */
1012 memset(range_state, 0, sizeof(range_state)); 1013 memset(range_state, 0, sizeof(range_state));
1013 for (i = 0; i < num_var_ranges; i++) { 1014 for (i = 0; i < num_var_ranges; i++) {
1014 mtrr_if->get(i, &base, &size, &type); 1015 mtrr_if->get(i, &base, &size, &type);
@@ -1017,7 +1018,7 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
1017 range_state[i].type = type; 1018 range_state[i].type = type;
1018 } 1019 }
1019 1020
1020 /* Find highest cached pfn */ 1021 /* Find highest cached pfn: */
1021 for (i = 0; i < num_var_ranges; i++) { 1022 for (i = 0; i < num_var_ranges; i++) {
1022 type = range_state[i].type; 1023 type = range_state[i].type;
1023 if (type != MTRR_TYPE_WRBACK) 1024 if (type != MTRR_TYPE_WRBACK)
@@ -1028,13 +1029,13 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
1028 highest_pfn = base + size; 1029 highest_pfn = base + size;
1029 } 1030 }
1030 1031
1031 /* kvm/qemu doesn't have mtrr set right, don't trim them all */ 1032 /* kvm/qemu doesn't have mtrr set right, don't trim them all: */
1032 if (!highest_pfn) { 1033 if (!highest_pfn) {
1033 printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n"); 1034 printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n");
1034 return 0; 1035 return 0;
1035 } 1036 }
1036 1037
1037 /* check entries number */ 1038 /* Check entries number: */
1038 memset(num, 0, sizeof(num)); 1039 memset(num, 0, sizeof(num));
1039 for (i = 0; i < num_var_ranges; i++) { 1040 for (i = 0; i < num_var_ranges; i++) {
1040 type = range_state[i].type; 1041 type = range_state[i].type;
@@ -1046,11 +1047,11 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
1046 num[type]++; 1047 num[type]++;
1047 } 1048 }
1048 1049
1049 /* no entry for WB? */ 1050 /* No entry for WB? */
1050 if (!num[MTRR_TYPE_WRBACK]) 1051 if (!num[MTRR_TYPE_WRBACK])
1051 return 0; 1052 return 0;
1052 1053
1053 /* check if we only had WB and UC */ 1054 /* Check if we only had WB and UC: */
1054 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != 1055 if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] !=
1055 num_var_ranges - num[MTRR_NUM_TYPES]) 1056 num_var_ranges - num[MTRR_NUM_TYPES])
1056 return 0; 1057 return 0;
@@ -1066,31 +1067,31 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
1066 } 1067 }
1067 nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); 1068 nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0);
1068 1069
1070 /* Check the head: */
1069 total_trim_size = 0; 1071 total_trim_size = 0;
1070 /* check the head */
1071 if (range[0].start) 1072 if (range[0].start)
1072 total_trim_size += real_trim_memory(0, range[0].start); 1073 total_trim_size += real_trim_memory(0, range[0].start);
1073 /* check the holes */ 1074
1075 /* Check the holes: */
1074 for (i = 0; i < nr_range - 1; i++) { 1076 for (i = 0; i < nr_range - 1; i++) {
1075 if (range[i].end + 1 < range[i+1].start) 1077 if (range[i].end + 1 < range[i+1].start)
1076 total_trim_size += real_trim_memory(range[i].end + 1, 1078 total_trim_size += real_trim_memory(range[i].end + 1,
1077 range[i+1].start); 1079 range[i+1].start);
1078 } 1080 }
1079 /* check the top */ 1081
1082 /* Check the top: */
1080 i = nr_range - 1; 1083 i = nr_range - 1;
1081 if (range[i].end + 1 < end_pfn) 1084 if (range[i].end + 1 < end_pfn)
1082 total_trim_size += real_trim_memory(range[i].end + 1, 1085 total_trim_size += real_trim_memory(range[i].end + 1,
1083 end_pfn); 1086 end_pfn);
1084 1087
1085 if (total_trim_size) { 1088 if (total_trim_size) {
1086 printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" 1089 pr_warning("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n", total_trim_size >> 20);
1087 " all of memory, losing %lluMB of RAM.\n",
1088 total_trim_size >> 20);
1089 1090
1090 if (!changed_by_mtrr_cleanup) 1091 if (!changed_by_mtrr_cleanup)
1091 WARN_ON(1); 1092 WARN_ON(1);
1092 1093
1093 printk(KERN_INFO "update e820 for mtrr\n"); 1094 pr_info("update e820 for mtrr\n");
1094 update_e820(); 1095 update_e820();
1095 1096
1096 return 1; 1097 return 1;
@@ -1098,4 +1099,3 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn)
1098 1099
1099 return 0; 1100 return 0;
1100} 1101}
1101
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c
index ff14c320040c..228d982ce09c 100644
--- a/arch/x86/kernel/cpu/mtrr/cyrix.c
+++ b/arch/x86/kernel/cpu/mtrr/cyrix.c
@@ -1,38 +1,40 @@
1#include <linux/init.h> 1#include <linux/init.h>
2#include <linux/io.h>
2#include <linux/mm.h> 3#include <linux/mm.h>
3#include <asm/mtrr.h> 4
4#include <asm/msr.h>
5#include <asm/io.h>
6#include <asm/processor-cyrix.h> 5#include <asm/processor-cyrix.h>
7#include <asm/processor-flags.h> 6#include <asm/processor-flags.h>
7#include <asm/mtrr.h>
8#include <asm/msr.h>
9
8#include "mtrr.h" 10#include "mtrr.h"
9 11
10static void 12static void
11cyrix_get_arr(unsigned int reg, unsigned long *base, 13cyrix_get_arr(unsigned int reg, unsigned long *base,
12 unsigned long *size, mtrr_type * type) 14 unsigned long *size, mtrr_type * type)
13{ 15{
14 unsigned long flags;
15 unsigned char arr, ccr3, rcr, shift; 16 unsigned char arr, ccr3, rcr, shift;
17 unsigned long flags;
16 18
17 arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ 19 arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */
18 20
19 /* Save flags and disable interrupts */
20 local_irq_save(flags); 21 local_irq_save(flags);
21 22
22 ccr3 = getCx86(CX86_CCR3); 23 ccr3 = getCx86(CX86_CCR3);
23 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ 24 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
24 ((unsigned char *) base)[3] = getCx86(arr); 25 ((unsigned char *)base)[3] = getCx86(arr);
25 ((unsigned char *) base)[2] = getCx86(arr + 1); 26 ((unsigned char *)base)[2] = getCx86(arr + 1);
26 ((unsigned char *) base)[1] = getCx86(arr + 2); 27 ((unsigned char *)base)[1] = getCx86(arr + 2);
27 rcr = getCx86(CX86_RCR_BASE + reg); 28 rcr = getCx86(CX86_RCR_BASE + reg);
28 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ 29 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
29 30
30 /* Enable interrupts if it was enabled previously */
31 local_irq_restore(flags); 31 local_irq_restore(flags);
32
32 shift = ((unsigned char *) base)[1] & 0x0f; 33 shift = ((unsigned char *) base)[1] & 0x0f;
33 *base >>= PAGE_SHIFT; 34 *base >>= PAGE_SHIFT;
34 35
35 /* Power of two, at least 4K on ARR0-ARR6, 256K on ARR7 36 /*
37 * Power of two, at least 4K on ARR0-ARR6, 256K on ARR7
36 * Note: shift==0xf means 4G, this is unsupported. 38 * Note: shift==0xf means 4G, this is unsupported.
37 */ 39 */
38 if (shift) 40 if (shift)
@@ -76,17 +78,20 @@ cyrix_get_arr(unsigned int reg, unsigned long *base,
76 } 78 }
77} 79}
78 80
81/*
82 * cyrix_get_free_region - get a free ARR.
83 *
84 * @base: the starting (base) address of the region.
85 * @size: the size (in bytes) of the region.
86 *
87 * Returns: the index of the region on success, else -1 on error.
88*/
79static int 89static int
80cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg) 90cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
81/* [SUMMARY] Get a free ARR.
82 <base> The starting (base) address of the region.
83 <size> The size (in bytes) of the region.
84 [RETURNS] The index of the region on success, else -1 on error.
85*/
86{ 91{
87 int i;
88 mtrr_type ltype;
89 unsigned long lbase, lsize; 92 unsigned long lbase, lsize;
93 mtrr_type ltype;
94 int i;
90 95
91 switch (replace_reg) { 96 switch (replace_reg) {
92 case 7: 97 case 7:
@@ -107,14 +112,17 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
107 cyrix_get_arr(7, &lbase, &lsize, &ltype); 112 cyrix_get_arr(7, &lbase, &lsize, &ltype);
108 if (lsize == 0) 113 if (lsize == 0)
109 return 7; 114 return 7;
110 /* Else try ARR0-ARR6 first */ 115 /* Else try ARR0-ARR6 first */
111 } else { 116 } else {
112 for (i = 0; i < 7; i++) { 117 for (i = 0; i < 7; i++) {
113 cyrix_get_arr(i, &lbase, &lsize, &ltype); 118 cyrix_get_arr(i, &lbase, &lsize, &ltype);
114 if (lsize == 0) 119 if (lsize == 0)
115 return i; 120 return i;
116 } 121 }
117 /* ARR0-ARR6 isn't free, try ARR7 but its size must be at least 256K */ 122 /*
123 * ARR0-ARR6 isn't free
124 * try ARR7 but its size must be at least 256K
125 */
118 cyrix_get_arr(i, &lbase, &lsize, &ltype); 126 cyrix_get_arr(i, &lbase, &lsize, &ltype);
119 if ((lsize == 0) && (size >= 0x40)) 127 if ((lsize == 0) && (size >= 0x40))
120 return i; 128 return i;
@@ -122,21 +130,22 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg)
122 return -ENOSPC; 130 return -ENOSPC;
123} 131}
124 132
125static u32 cr4 = 0; 133static u32 cr4, ccr3;
126static u32 ccr3;
127 134
128static void prepare_set(void) 135static void prepare_set(void)
129{ 136{
130 u32 cr0; 137 u32 cr0;
131 138
132 /* Save value of CR4 and clear Page Global Enable (bit 7) */ 139 /* Save value of CR4 and clear Page Global Enable (bit 7) */
133 if ( cpu_has_pge ) { 140 if (cpu_has_pge) {
134 cr4 = read_cr4(); 141 cr4 = read_cr4();
135 write_cr4(cr4 & ~X86_CR4_PGE); 142 write_cr4(cr4 & ~X86_CR4_PGE);
136 } 143 }
137 144
138 /* Disable and flush caches. Note that wbinvd flushes the TLBs as 145 /*
139 a side-effect */ 146 * Disable and flush caches.
147 * Note that wbinvd flushes the TLBs as a side-effect
148 */
140 cr0 = read_cr0() | X86_CR0_CD; 149 cr0 = read_cr0() | X86_CR0_CD;
141 wbinvd(); 150 wbinvd();
142 write_cr0(cr0); 151 write_cr0(cr0);
@@ -147,22 +156,21 @@ static void prepare_set(void)
147 156
148 /* Cyrix ARRs - everything else was excluded at the top */ 157 /* Cyrix ARRs - everything else was excluded at the top */
149 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); 158 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10);
150
151} 159}
152 160
153static void post_set(void) 161static void post_set(void)
154{ 162{
155 /* Flush caches and TLBs */ 163 /* Flush caches and TLBs */
156 wbinvd(); 164 wbinvd();
157 165
158 /* Cyrix ARRs - everything else was excluded at the top */ 166 /* Cyrix ARRs - everything else was excluded at the top */
159 setCx86(CX86_CCR3, ccr3); 167 setCx86(CX86_CCR3, ccr3);
160 168
161 /* Enable caches */ 169 /* Enable caches */
162 write_cr0(read_cr0() & 0xbfffffff); 170 write_cr0(read_cr0() & 0xbfffffff);
163 171
164 /* Restore value of CR4 */ 172 /* Restore value of CR4 */
165 if ( cpu_has_pge ) 173 if (cpu_has_pge)
166 write_cr4(cr4); 174 write_cr4(cr4);
167} 175}
168 176
@@ -178,7 +186,8 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base,
178 size >>= 6; 186 size >>= 6;
179 187
180 size &= 0x7fff; /* make sure arr_size <= 14 */ 188 size &= 0x7fff; /* make sure arr_size <= 14 */
181 for (arr_size = 0; size; arr_size++, size >>= 1) ; 189 for (arr_size = 0; size; arr_size++, size >>= 1)
190 ;
182 191
183 if (reg < 7) { 192 if (reg < 7) {
184 switch (type) { 193 switch (type) {
@@ -215,18 +224,18 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base,
215 prepare_set(); 224 prepare_set();
216 225
217 base <<= PAGE_SHIFT; 226 base <<= PAGE_SHIFT;
218 setCx86(arr, ((unsigned char *) &base)[3]); 227 setCx86(arr + 0, ((unsigned char *)&base)[3]);
219 setCx86(arr + 1, ((unsigned char *) &base)[2]); 228 setCx86(arr + 1, ((unsigned char *)&base)[2]);
220 setCx86(arr + 2, (((unsigned char *) &base)[1]) | arr_size); 229 setCx86(arr + 2, (((unsigned char *)&base)[1]) | arr_size);
221 setCx86(CX86_RCR_BASE + reg, arr_type); 230 setCx86(CX86_RCR_BASE + reg, arr_type);
222 231
223 post_set(); 232 post_set();
224} 233}
225 234
226typedef struct { 235typedef struct {
227 unsigned long base; 236 unsigned long base;
228 unsigned long size; 237 unsigned long size;
229 mtrr_type type; 238 mtrr_type type;
230} arr_state_t; 239} arr_state_t;
231 240
232static arr_state_t arr_state[8] = { 241static arr_state_t arr_state[8] = {
@@ -247,16 +256,17 @@ static void cyrix_set_all(void)
247 setCx86(CX86_CCR0 + i, ccr_state[i]); 256 setCx86(CX86_CCR0 + i, ccr_state[i]);
248 for (; i < 7; i++) 257 for (; i < 7; i++)
249 setCx86(CX86_CCR4 + i, ccr_state[i]); 258 setCx86(CX86_CCR4 + i, ccr_state[i]);
250 for (i = 0; i < 8; i++) 259
251 cyrix_set_arr(i, arr_state[i].base, 260 for (i = 0; i < 8; i++) {
261 cyrix_set_arr(i, arr_state[i].base,
252 arr_state[i].size, arr_state[i].type); 262 arr_state[i].size, arr_state[i].type);
263 }
253 264
254 post_set(); 265 post_set();
255} 266}
256 267
257static struct mtrr_ops cyrix_mtrr_ops = { 268static struct mtrr_ops cyrix_mtrr_ops = {
258 .vendor = X86_VENDOR_CYRIX, 269 .vendor = X86_VENDOR_CYRIX,
259// .init = cyrix_arr_init,
260 .set_all = cyrix_set_all, 270 .set_all = cyrix_set_all,
261 .set = cyrix_set_arr, 271 .set = cyrix_set_arr,
262 .get = cyrix_get_arr, 272 .get = cyrix_get_arr,
@@ -270,5 +280,3 @@ int __init cyrix_init_mtrr(void)
270 set_mtrr_ops(&cyrix_mtrr_ops); 280 set_mtrr_ops(&cyrix_mtrr_ops);
271 return 0; 281 return 0;
272} 282}
273
274//arch_initcall(cyrix_init_mtrr);
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index 0543f69f0b27..55da0c5f68dd 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -1,28 +1,34 @@
1/* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong 1/*
2 because MTRRs can span upto 40 bits (36bits on most modern x86) */ 2 * This only handles 32bit MTRR on 32bit hosts. This is strictly wrong
3 * because MTRRs can span upto 40 bits (36bits on most modern x86)
4 */
5#define DEBUG
6
7#include <linux/module.h>
3#include <linux/init.h> 8#include <linux/init.h>
4#include <linux/slab.h> 9#include <linux/slab.h>
10#include <linux/io.h>
5#include <linux/mm.h> 11#include <linux/mm.h>
6#include <linux/module.h> 12
7#include <asm/io.h>
8#include <asm/mtrr.h>
9#include <asm/msr.h>
10#include <asm/system.h>
11#include <asm/cpufeature.h>
12#include <asm/processor-flags.h> 13#include <asm/processor-flags.h>
14#include <asm/cpufeature.h>
13#include <asm/tlbflush.h> 15#include <asm/tlbflush.h>
16#include <asm/system.h>
17#include <asm/mtrr.h>
18#include <asm/msr.h>
14#include <asm/pat.h> 19#include <asm/pat.h>
20
15#include "mtrr.h" 21#include "mtrr.h"
16 22
17struct fixed_range_block { 23struct fixed_range_block {
18 int base_msr; /* start address of an MTRR block */ 24 int base_msr; /* start address of an MTRR block */
19 int ranges; /* number of MTRRs in this block */ 25 int ranges; /* number of MTRRs in this block */
20}; 26};
21 27
22static struct fixed_range_block fixed_range_blocks[] = { 28static struct fixed_range_block fixed_range_blocks[] = {
23 { MSR_MTRRfix64K_00000, 1 }, /* one 64k MTRR */ 29 { MSR_MTRRfix64K_00000, 1 }, /* one 64k MTRR */
24 { MSR_MTRRfix16K_80000, 2 }, /* two 16k MTRRs */ 30 { MSR_MTRRfix16K_80000, 2 }, /* two 16k MTRRs */
25 { MSR_MTRRfix4K_C0000, 8 }, /* eight 4k MTRRs */ 31 { MSR_MTRRfix4K_C0000, 8 }, /* eight 4k MTRRs */
26 {} 32 {}
27}; 33};
28 34
@@ -30,10 +36,10 @@ static unsigned long smp_changes_mask;
30static int mtrr_state_set; 36static int mtrr_state_set;
31u64 mtrr_tom2; 37u64 mtrr_tom2;
32 38
33struct mtrr_state_type mtrr_state = {}; 39struct mtrr_state_type mtrr_state;
34EXPORT_SYMBOL_GPL(mtrr_state); 40EXPORT_SYMBOL_GPL(mtrr_state);
35 41
36/** 42/*
37 * BIOS is expected to clear MtrrFixDramModEn bit, see for example 43 * BIOS is expected to clear MtrrFixDramModEn bit, see for example
38 * "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD 44 * "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD
39 * Opteron Processors" (26094 Rev. 3.30 February 2006), section 45 * Opteron Processors" (26094 Rev. 3.30 February 2006), section
@@ -104,9 +110,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
104 * Look of multiple ranges matching this address and pick type 110 * Look of multiple ranges matching this address and pick type
105 * as per MTRR precedence 111 * as per MTRR precedence
106 */ 112 */
107 if (!(mtrr_state.enabled & 2)) { 113 if (!(mtrr_state.enabled & 2))
108 return mtrr_state.def_type; 114 return mtrr_state.def_type;
109 }
110 115
111 prev_match = 0xFF; 116 prev_match = 0xFF;
112 for (i = 0; i < num_var_ranges; ++i) { 117 for (i = 0; i < num_var_ranges; ++i) {
@@ -125,9 +130,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
125 if (start_state != end_state) 130 if (start_state != end_state)
126 return 0xFE; 131 return 0xFE;
127 132
128 if ((start & mask) != (base & mask)) { 133 if ((start & mask) != (base & mask))
129 continue; 134 continue;
130 }
131 135
132 curr_match = mtrr_state.var_ranges[i].base_lo & 0xff; 136 curr_match = mtrr_state.var_ranges[i].base_lo & 0xff;
133 if (prev_match == 0xFF) { 137 if (prev_match == 0xFF) {
@@ -148,9 +152,8 @@ u8 mtrr_type_lookup(u64 start, u64 end)
148 curr_match = MTRR_TYPE_WRTHROUGH; 152 curr_match = MTRR_TYPE_WRTHROUGH;
149 } 153 }
150 154
151 if (prev_match != curr_match) { 155 if (prev_match != curr_match)
152 return MTRR_TYPE_UNCACHABLE; 156 return MTRR_TYPE_UNCACHABLE;
153 }
154 } 157 }
155 158
156 if (mtrr_tom2) { 159 if (mtrr_tom2) {
@@ -164,7 +167,7 @@ u8 mtrr_type_lookup(u64 start, u64 end)
164 return mtrr_state.def_type; 167 return mtrr_state.def_type;
165} 168}
166 169
167/* Get the MSR pair relating to a var range */ 170/* Get the MSR pair relating to a var range */
168static void 171static void
169get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) 172get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
170{ 173{
@@ -172,7 +175,7 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr)
172 rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); 175 rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi);
173} 176}
174 177
175/* fill the MSR pair relating to a var range */ 178/* Fill the MSR pair relating to a var range */
176void fill_mtrr_var_range(unsigned int index, 179void fill_mtrr_var_range(unsigned int index,
177 u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi) 180 u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi)
178{ 181{
@@ -186,10 +189,9 @@ void fill_mtrr_var_range(unsigned int index,
186 vr[index].mask_hi = mask_hi; 189 vr[index].mask_hi = mask_hi;
187} 190}
188 191
189static void 192static void get_fixed_ranges(mtrr_type *frs)
190get_fixed_ranges(mtrr_type * frs)
191{ 193{
192 unsigned int *p = (unsigned int *) frs; 194 unsigned int *p = (unsigned int *)frs;
193 int i; 195 int i;
194 196
195 k8_check_syscfg_dram_mod_en(); 197 k8_check_syscfg_dram_mod_en();
@@ -217,22 +219,22 @@ static void __init print_fixed_last(void)
217 if (!last_fixed_end) 219 if (!last_fixed_end)
218 return; 220 return;
219 221
220 printk(KERN_DEBUG " %05X-%05X %s\n", last_fixed_start, 222 pr_debug(" %05X-%05X %s\n", last_fixed_start,
221 last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type)); 223 last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type));
222 224
223 last_fixed_end = 0; 225 last_fixed_end = 0;
224} 226}
225 227
226static void __init update_fixed_last(unsigned base, unsigned end, 228static void __init update_fixed_last(unsigned base, unsigned end,
227 mtrr_type type) 229 mtrr_type type)
228{ 230{
229 last_fixed_start = base; 231 last_fixed_start = base;
230 last_fixed_end = end; 232 last_fixed_end = end;
231 last_fixed_type = type; 233 last_fixed_type = type;
232} 234}
233 235
234static void __init print_fixed(unsigned base, unsigned step, 236static void __init
235 const mtrr_type *types) 237print_fixed(unsigned base, unsigned step, const mtrr_type *types)
236{ 238{
237 unsigned i; 239 unsigned i;
238 240
@@ -259,54 +261,55 @@ static void __init print_mtrr_state(void)
259 unsigned int i; 261 unsigned int i;
260 int high_width; 262 int high_width;
261 263
262 printk(KERN_DEBUG "MTRR default type: %s\n", 264 pr_debug("MTRR default type: %s\n",
263 mtrr_attrib_to_str(mtrr_state.def_type)); 265 mtrr_attrib_to_str(mtrr_state.def_type));
264 if (mtrr_state.have_fixed) { 266 if (mtrr_state.have_fixed) {
265 printk(KERN_DEBUG "MTRR fixed ranges %sabled:\n", 267 pr_debug("MTRR fixed ranges %sabled:\n",
266 mtrr_state.enabled & 1 ? "en" : "dis"); 268 mtrr_state.enabled & 1 ? "en" : "dis");
267 print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0); 269 print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0);
268 for (i = 0; i < 2; ++i) 270 for (i = 0; i < 2; ++i)
269 print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8); 271 print_fixed(0x80000 + i * 0x20000, 0x04000,
272 mtrr_state.fixed_ranges + (i + 1) * 8);
270 for (i = 0; i < 8; ++i) 273 for (i = 0; i < 8; ++i)
271 print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8); 274 print_fixed(0xC0000 + i * 0x08000, 0x01000,
275 mtrr_state.fixed_ranges + (i + 3) * 8);
272 276
273 /* tail */ 277 /* tail */
274 print_fixed_last(); 278 print_fixed_last();
275 } 279 }
276 printk(KERN_DEBUG "MTRR variable ranges %sabled:\n", 280 pr_debug("MTRR variable ranges %sabled:\n",
277 mtrr_state.enabled & 2 ? "en" : "dis"); 281 mtrr_state.enabled & 2 ? "en" : "dis");
278 if (size_or_mask & 0xffffffffUL) 282 if (size_or_mask & 0xffffffffUL)
279 high_width = ffs(size_or_mask & 0xffffffffUL) - 1; 283 high_width = ffs(size_or_mask & 0xffffffffUL) - 1;
280 else 284 else
281 high_width = ffs(size_or_mask>>32) + 32 - 1; 285 high_width = ffs(size_or_mask>>32) + 32 - 1;
282 high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4; 286 high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4;
287
283 for (i = 0; i < num_var_ranges; ++i) { 288 for (i = 0; i < num_var_ranges; ++i) {
284 if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) 289 if (mtrr_state.var_ranges[i].mask_lo & (1 << 11))
285 printk(KERN_DEBUG " %u base %0*X%05X000 mask %0*X%05X000 %s\n", 290 pr_debug(" %u base %0*X%05X000 mask %0*X%05X000 %s\n",
286 i, 291 i,
287 high_width, 292 high_width,
288 mtrr_state.var_ranges[i].base_hi, 293 mtrr_state.var_ranges[i].base_hi,
289 mtrr_state.var_ranges[i].base_lo >> 12, 294 mtrr_state.var_ranges[i].base_lo >> 12,
290 high_width, 295 high_width,
291 mtrr_state.var_ranges[i].mask_hi, 296 mtrr_state.var_ranges[i].mask_hi,
292 mtrr_state.var_ranges[i].mask_lo >> 12, 297 mtrr_state.var_ranges[i].mask_lo >> 12,
293 mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff)); 298 mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff));
294 else 299 else
295 printk(KERN_DEBUG " %u disabled\n", i); 300 pr_debug(" %u disabled\n", i);
296 }
297 if (mtrr_tom2) {
298 printk(KERN_DEBUG "TOM2: %016llx aka %lldM\n",
299 mtrr_tom2, mtrr_tom2>>20);
300 } 301 }
302 if (mtrr_tom2)
303 pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20);
301} 304}
302 305
303/* Grab all of the MTRR state for this CPU into *state */ 306/* Grab all of the MTRR state for this CPU into *state */
304void __init get_mtrr_state(void) 307void __init get_mtrr_state(void)
305{ 308{
306 unsigned int i;
307 struct mtrr_var_range *vrs; 309 struct mtrr_var_range *vrs;
308 unsigned lo, dummy;
309 unsigned long flags; 310 unsigned long flags;
311 unsigned lo, dummy;
312 unsigned int i;
310 313
311 vrs = mtrr_state.var_ranges; 314 vrs = mtrr_state.var_ranges;
312 315
@@ -324,6 +327,7 @@ void __init get_mtrr_state(void)
324 327
325 if (amd_special_default_mtrr()) { 328 if (amd_special_default_mtrr()) {
326 unsigned low, high; 329 unsigned low, high;
330
327 /* TOP_MEM2 */ 331 /* TOP_MEM2 */
328 rdmsr(MSR_K8_TOP_MEM2, low, high); 332 rdmsr(MSR_K8_TOP_MEM2, low, high);
329 mtrr_tom2 = high; 333 mtrr_tom2 = high;
@@ -344,10 +348,9 @@ void __init get_mtrr_state(void)
344 348
345 post_set(); 349 post_set();
346 local_irq_restore(flags); 350 local_irq_restore(flags);
347
348} 351}
349 352
350/* Some BIOS's are fucked and don't set all MTRRs the same! */ 353/* Some BIOS's are messed up and don't set all MTRRs the same! */
351void __init mtrr_state_warn(void) 354void __init mtrr_state_warn(void)
352{ 355{
353 unsigned long mask = smp_changes_mask; 356 unsigned long mask = smp_changes_mask;
@@ -355,28 +358,33 @@ void __init mtrr_state_warn(void)
355 if (!mask) 358 if (!mask)
356 return; 359 return;
357 if (mask & MTRR_CHANGE_MASK_FIXED) 360 if (mask & MTRR_CHANGE_MASK_FIXED)
358 printk(KERN_WARNING "mtrr: your CPUs had inconsistent fixed MTRR settings\n"); 361 pr_warning("mtrr: your CPUs had inconsistent fixed MTRR settings\n");
359 if (mask & MTRR_CHANGE_MASK_VARIABLE) 362 if (mask & MTRR_CHANGE_MASK_VARIABLE)
360 printk(KERN_WARNING "mtrr: your CPUs had inconsistent variable MTRR settings\n"); 363 pr_warning("mtrr: your CPUs had inconsistent variable MTRR settings\n");
361 if (mask & MTRR_CHANGE_MASK_DEFTYPE) 364 if (mask & MTRR_CHANGE_MASK_DEFTYPE)
362 printk(KERN_WARNING "mtrr: your CPUs had inconsistent MTRRdefType settings\n"); 365 pr_warning("mtrr: your CPUs had inconsistent MTRRdefType settings\n");
366
363 printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n"); 367 printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n");
364 printk(KERN_INFO "mtrr: corrected configuration.\n"); 368 printk(KERN_INFO "mtrr: corrected configuration.\n");
365} 369}
366 370
367/* Doesn't attempt to pass an error out to MTRR users 371/*
368 because it's quite complicated in some cases and probably not 372 * Doesn't attempt to pass an error out to MTRR users
369 worth it because the best error handling is to ignore it. */ 373 * because it's quite complicated in some cases and probably not
374 * worth it because the best error handling is to ignore it.
375 */
370void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b) 376void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b)
371{ 377{
372 if (wrmsr_safe(msr, a, b) < 0) 378 if (wrmsr_safe(msr, a, b) < 0) {
373 printk(KERN_ERR 379 printk(KERN_ERR
374 "MTRR: CPU %u: Writing MSR %x to %x:%x failed\n", 380 "MTRR: CPU %u: Writing MSR %x to %x:%x failed\n",
375 smp_processor_id(), msr, a, b); 381 smp_processor_id(), msr, a, b);
382 }
376} 383}
377 384
378/** 385/**
379 * set_fixed_range - checks & updates a fixed-range MTRR if it differs from the value it should have 386 * set_fixed_range - checks & updates a fixed-range MTRR if it
387 * differs from the value it should have
380 * @msr: MSR address of the MTTR which should be checked and updated 388 * @msr: MSR address of the MTTR which should be checked and updated
381 * @changed: pointer which indicates whether the MTRR needed to be changed 389 * @changed: pointer which indicates whether the MTRR needed to be changed
382 * @msrwords: pointer to the MSR values which the MSR should have 390 * @msrwords: pointer to the MSR values which the MSR should have
@@ -401,20 +409,23 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords)
401 * 409 *
402 * Returns: The index of the region on success, else negative on error. 410 * Returns: The index of the region on success, else negative on error.
403 */ 411 */
404int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg) 412int
413generic_get_free_region(unsigned long base, unsigned long size, int replace_reg)
405{ 414{
406 int i, max;
407 mtrr_type ltype;
408 unsigned long lbase, lsize; 415 unsigned long lbase, lsize;
416 mtrr_type ltype;
417 int i, max;
409 418
410 max = num_var_ranges; 419 max = num_var_ranges;
411 if (replace_reg >= 0 && replace_reg < max) 420 if (replace_reg >= 0 && replace_reg < max)
412 return replace_reg; 421 return replace_reg;
422
413 for (i = 0; i < max; ++i) { 423 for (i = 0; i < max; ++i) {
414 mtrr_if->get(i, &lbase, &lsize, &ltype); 424 mtrr_if->get(i, &lbase, &lsize, &ltype);
415 if (lsize == 0) 425 if (lsize == 0)
416 return i; 426 return i;
417 } 427 }
428
418 return -ENOSPC; 429 return -ENOSPC;
419} 430}
420 431
@@ -434,7 +445,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base,
434 rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); 445 rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi);
435 446
436 if ((mask_lo & 0x800) == 0) { 447 if ((mask_lo & 0x800) == 0) {
437 /* Invalid (i.e. free) range */ 448 /* Invalid (i.e. free) range */
438 *base = 0; 449 *base = 0;
439 *size = 0; 450 *size = 0;
440 *type = 0; 451 *type = 0;
@@ -471,27 +482,31 @@ out_put_cpu:
471} 482}
472 483
473/** 484/**
474 * set_fixed_ranges - checks & updates the fixed-range MTRRs if they differ from the saved set 485 * set_fixed_ranges - checks & updates the fixed-range MTRRs if they
486 * differ from the saved set
475 * @frs: pointer to fixed-range MTRR values, saved by get_fixed_ranges() 487 * @frs: pointer to fixed-range MTRR values, saved by get_fixed_ranges()
476 */ 488 */
477static int set_fixed_ranges(mtrr_type * frs) 489static int set_fixed_ranges(mtrr_type *frs)
478{ 490{
479 unsigned long long *saved = (unsigned long long *) frs; 491 unsigned long long *saved = (unsigned long long *)frs;
480 bool changed = false; 492 bool changed = false;
481 int block=-1, range; 493 int block = -1, range;
482 494
483 k8_check_syscfg_dram_mod_en(); 495 k8_check_syscfg_dram_mod_en();
484 496
485 while (fixed_range_blocks[++block].ranges) 497 while (fixed_range_blocks[++block].ranges) {
486 for (range=0; range < fixed_range_blocks[block].ranges; range++) 498 for (range = 0; range < fixed_range_blocks[block].ranges; range++)
487 set_fixed_range(fixed_range_blocks[block].base_msr + range, 499 set_fixed_range(fixed_range_blocks[block].base_msr + range,
488 &changed, (unsigned int *) saved++); 500 &changed, (unsigned int *)saved++);
501 }
489 502
490 return changed; 503 return changed;
491} 504}
492 505
493/* Set the MSR pair relating to a var range. Returns TRUE if 506/*
494 changes are made */ 507 * Set the MSR pair relating to a var range.
508 * Returns true if changes are made.
509 */
495static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) 510static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
496{ 511{
497 unsigned int lo, hi; 512 unsigned int lo, hi;
@@ -501,6 +516,7 @@ static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr)
501 if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL) 516 if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL)
502 || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != 517 || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) !=
503 (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { 518 (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) {
519
504 mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); 520 mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi);
505 changed = true; 521 changed = true;
506 } 522 }
@@ -526,21 +542,26 @@ static u32 deftype_lo, deftype_hi;
526 */ 542 */
527static unsigned long set_mtrr_state(void) 543static unsigned long set_mtrr_state(void)
528{ 544{
529 unsigned int i;
530 unsigned long change_mask = 0; 545 unsigned long change_mask = 0;
546 unsigned int i;
531 547
532 for (i = 0; i < num_var_ranges; i++) 548 for (i = 0; i < num_var_ranges; i++) {
533 if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i])) 549 if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i]))
534 change_mask |= MTRR_CHANGE_MASK_VARIABLE; 550 change_mask |= MTRR_CHANGE_MASK_VARIABLE;
551 }
535 552
536 if (mtrr_state.have_fixed && set_fixed_ranges(mtrr_state.fixed_ranges)) 553 if (mtrr_state.have_fixed && set_fixed_ranges(mtrr_state.fixed_ranges))
537 change_mask |= MTRR_CHANGE_MASK_FIXED; 554 change_mask |= MTRR_CHANGE_MASK_FIXED;
538 555
539 /* Set_mtrr_restore restores the old value of MTRRdefType, 556 /*
540 so to set it we fiddle with the saved value */ 557 * Set_mtrr_restore restores the old value of MTRRdefType,
558 * so to set it we fiddle with the saved value:
559 */
541 if ((deftype_lo & 0xff) != mtrr_state.def_type 560 if ((deftype_lo & 0xff) != mtrr_state.def_type
542 || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) { 561 || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) {
543 deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type | (mtrr_state.enabled << 10); 562
563 deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type |
564 (mtrr_state.enabled << 10);
544 change_mask |= MTRR_CHANGE_MASK_DEFTYPE; 565 change_mask |= MTRR_CHANGE_MASK_DEFTYPE;
545 } 566 }
546 567
@@ -548,33 +569,36 @@ static unsigned long set_mtrr_state(void)
548} 569}
549 570
550 571
551static unsigned long cr4 = 0; 572static unsigned long cr4;
552static DEFINE_SPINLOCK(set_atomicity_lock); 573static DEFINE_SPINLOCK(set_atomicity_lock);
553 574
554/* 575/*
555 * Since we are disabling the cache don't allow any interrupts - they 576 * Since we are disabling the cache don't allow any interrupts,
556 * would run extremely slow and would only increase the pain. The caller must 577 * they would run extremely slow and would only increase the pain.
557 * ensure that local interrupts are disabled and are reenabled after post_set() 578 *
558 * has been called. 579 * The caller must ensure that local interrupts are disabled and
580 * are reenabled after post_set() has been called.
559 */ 581 */
560
561static void prepare_set(void) __acquires(set_atomicity_lock) 582static void prepare_set(void) __acquires(set_atomicity_lock)
562{ 583{
563 unsigned long cr0; 584 unsigned long cr0;
564 585
565 /* Note that this is not ideal, since the cache is only flushed/disabled 586 /*
566 for this CPU while the MTRRs are changed, but changing this requires 587 * Note that this is not ideal
567 more invasive changes to the way the kernel boots */ 588 * since the cache is only flushed/disabled for this CPU while the
589 * MTRRs are changed, but changing this requires more invasive
590 * changes to the way the kernel boots
591 */
568 592
569 spin_lock(&set_atomicity_lock); 593 spin_lock(&set_atomicity_lock);
570 594
571 /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ 595 /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */
572 cr0 = read_cr0() | X86_CR0_CD; 596 cr0 = read_cr0() | X86_CR0_CD;
573 write_cr0(cr0); 597 write_cr0(cr0);
574 wbinvd(); 598 wbinvd();
575 599
576 /* Save value of CR4 and clear Page Global Enable (bit 7) */ 600 /* Save value of CR4 and clear Page Global Enable (bit 7) */
577 if ( cpu_has_pge ) { 601 if (cpu_has_pge) {
578 cr4 = read_cr4(); 602 cr4 = read_cr4();
579 write_cr4(cr4 & ~X86_CR4_PGE); 603 write_cr4(cr4 & ~X86_CR4_PGE);
580 } 604 }
@@ -582,26 +606,26 @@ static void prepare_set(void) __acquires(set_atomicity_lock)
582 /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ 606 /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */
583 __flush_tlb(); 607 __flush_tlb();
584 608
585 /* Save MTRR state */ 609 /* Save MTRR state */
586 rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi); 610 rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
587 611
588 /* Disable MTRRs, and set the default type to uncached */ 612 /* Disable MTRRs, and set the default type to uncached */
589 mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi); 613 mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi);
590} 614}
591 615
592static void post_set(void) __releases(set_atomicity_lock) 616static void post_set(void) __releases(set_atomicity_lock)
593{ 617{
594 /* Flush TLBs (no need to flush caches - they are disabled) */ 618 /* Flush TLBs (no need to flush caches - they are disabled) */
595 __flush_tlb(); 619 __flush_tlb();
596 620
597 /* Intel (P6) standard MTRRs */ 621 /* Intel (P6) standard MTRRs */
598 mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi); 622 mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi);
599 623
600 /* Enable caches */ 624 /* Enable caches */
601 write_cr0(read_cr0() & 0xbfffffff); 625 write_cr0(read_cr0() & 0xbfffffff);
602 626
603 /* Restore value of CR4 */ 627 /* Restore value of CR4 */
604 if ( cpu_has_pge ) 628 if (cpu_has_pge)
605 write_cr4(cr4); 629 write_cr4(cr4);
606 spin_unlock(&set_atomicity_lock); 630 spin_unlock(&set_atomicity_lock);
607} 631}
@@ -623,24 +647,27 @@ static void generic_set_all(void)
623 post_set(); 647 post_set();
624 local_irq_restore(flags); 648 local_irq_restore(flags);
625 649
626 /* Use the atomic bitops to update the global mask */ 650 /* Use the atomic bitops to update the global mask */
627 for (count = 0; count < sizeof mask * 8; ++count) { 651 for (count = 0; count < sizeof mask * 8; ++count) {
628 if (mask & 0x01) 652 if (mask & 0x01)
629 set_bit(count, &smp_changes_mask); 653 set_bit(count, &smp_changes_mask);
630 mask >>= 1; 654 mask >>= 1;
631 } 655 }
632 656
633} 657}
634 658
659/**
660 * generic_set_mtrr - set variable MTRR register on the local CPU.
661 *
662 * @reg: The register to set.
663 * @base: The base address of the region.
664 * @size: The size of the region. If this is 0 the region is disabled.
665 * @type: The type of the region.
666 *
667 * Returns nothing.
668 */
635static void generic_set_mtrr(unsigned int reg, unsigned long base, 669static void generic_set_mtrr(unsigned int reg, unsigned long base,
636 unsigned long size, mtrr_type type) 670 unsigned long size, mtrr_type type)
637/* [SUMMARY] Set variable MTRR register on the local CPU.
638 <reg> The register to set.
639 <base> The base address of the region.
640 <size> The size of the region. If this is 0 the region is disabled.
641 <type> The type of the region.
642 [RETURNS] Nothing.
643*/
644{ 671{
645 unsigned long flags; 672 unsigned long flags;
646 struct mtrr_var_range *vr; 673 struct mtrr_var_range *vr;
@@ -651,8 +678,10 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
651 prepare_set(); 678 prepare_set();
652 679
653 if (size == 0) { 680 if (size == 0) {
654 /* The invalid bit is kept in the mask, so we simply clear the 681 /*
655 relevant mask register to disable a range. */ 682 * The invalid bit is kept in the mask, so we simply
683 * clear the relevant mask register to disable a range.
684 */
656 mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0); 685 mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0);
657 memset(vr, 0, sizeof(struct mtrr_var_range)); 686 memset(vr, 0, sizeof(struct mtrr_var_range));
658 } else { 687 } else {
@@ -669,46 +698,50 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base,
669 local_irq_restore(flags); 698 local_irq_restore(flags);
670} 699}
671 700
672int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type) 701int generic_validate_add_page(unsigned long base, unsigned long size,
702 unsigned int type)
673{ 703{
674 unsigned long lbase, last; 704 unsigned long lbase, last;
675 705
676 /* For Intel PPro stepping <= 7, must be 4 MiB aligned 706 /*
677 and not touch 0x70000000->0x7003FFFF */ 707 * For Intel PPro stepping <= 7
708 * must be 4 MiB aligned and not touch 0x70000000 -> 0x7003FFFF
709 */
678 if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 && 710 if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 &&
679 boot_cpu_data.x86_model == 1 && 711 boot_cpu_data.x86_model == 1 &&
680 boot_cpu_data.x86_mask <= 7) { 712 boot_cpu_data.x86_mask <= 7) {
681 if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { 713 if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) {
682 printk(KERN_WARNING "mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); 714 pr_warning("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base);
683 return -EINVAL; 715 return -EINVAL;
684 } 716 }
685 if (!(base + size < 0x70000 || base > 0x7003F) && 717 if (!(base + size < 0x70000 || base > 0x7003F) &&
686 (type == MTRR_TYPE_WRCOMB 718 (type == MTRR_TYPE_WRCOMB
687 || type == MTRR_TYPE_WRBACK)) { 719 || type == MTRR_TYPE_WRBACK)) {
688 printk(KERN_WARNING "mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n"); 720 pr_warning("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n");
689 return -EINVAL; 721 return -EINVAL;
690 } 722 }
691 } 723 }
692 724
693 /* Check upper bits of base and last are equal and lower bits are 0 725 /*
694 for base and 1 for last */ 726 * Check upper bits of base and last are equal and lower bits are 0
727 * for base and 1 for last
728 */
695 last = base + size - 1; 729 last = base + size - 1;
696 for (lbase = base; !(lbase & 1) && (last & 1); 730 for (lbase = base; !(lbase & 1) && (last & 1);
697 lbase = lbase >> 1, last = last >> 1) ; 731 lbase = lbase >> 1, last = last >> 1)
732 ;
698 if (lbase != last) { 733 if (lbase != last) {
699 printk(KERN_WARNING "mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", 734 pr_warning("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size);
700 base, size);
701 return -EINVAL; 735 return -EINVAL;
702 } 736 }
703 return 0; 737 return 0;
704} 738}
705 739
706
707static int generic_have_wrcomb(void) 740static int generic_have_wrcomb(void)
708{ 741{
709 unsigned long config, dummy; 742 unsigned long config, dummy;
710 rdmsr(MSR_MTRRcap, config, dummy); 743 rdmsr(MSR_MTRRcap, config, dummy);
711 return (config & (1 << 10)); 744 return config & (1 << 10);
712} 745}
713 746
714int positive_have_wrcomb(void) 747int positive_have_wrcomb(void)
@@ -716,14 +749,15 @@ int positive_have_wrcomb(void)
716 return 1; 749 return 1;
717} 750}
718 751
719/* generic structure... 752/*
753 * Generic structure...
720 */ 754 */
721struct mtrr_ops generic_mtrr_ops = { 755struct mtrr_ops generic_mtrr_ops = {
722 .use_intel_if = 1, 756 .use_intel_if = 1,
723 .set_all = generic_set_all, 757 .set_all = generic_set_all,
724 .get = generic_get_mtrr, 758 .get = generic_get_mtrr,
725 .get_free_region = generic_get_free_region, 759 .get_free_region = generic_get_free_region,
726 .set = generic_set_mtrr, 760 .set = generic_set_mtrr,
727 .validate_add_page = generic_validate_add_page, 761 .validate_add_page = generic_validate_add_page,
728 .have_wrcomb = generic_have_wrcomb, 762 .have_wrcomb = generic_have_wrcomb,
729}; 763};
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index fb73a52913a4..08b6ea4c62b4 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -1,27 +1,28 @@
1#include <linux/init.h>
2#include <linux/proc_fs.h>
3#include <linux/capability.h> 1#include <linux/capability.h>
4#include <linux/ctype.h>
5#include <linux/module.h>
6#include <linux/seq_file.h> 2#include <linux/seq_file.h>
7#include <asm/uaccess.h> 3#include <linux/uaccess.h>
4#include <linux/proc_fs.h>
5#include <linux/module.h>
6#include <linux/ctype.h>
7#include <linux/init.h>
8 8
9#define LINE_SIZE 80 9#define LINE_SIZE 80
10 10
11#include <asm/mtrr.h> 11#include <asm/mtrr.h>
12
12#include "mtrr.h" 13#include "mtrr.h"
13 14
14#define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private) 15#define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private)
15 16
16static const char *const mtrr_strings[MTRR_NUM_TYPES] = 17static const char *const mtrr_strings[MTRR_NUM_TYPES] =
17{ 18{
18 "uncachable", /* 0 */ 19 "uncachable", /* 0 */
19 "write-combining", /* 1 */ 20 "write-combining", /* 1 */
20 "?", /* 2 */ 21 "?", /* 2 */
21 "?", /* 3 */ 22 "?", /* 3 */
22 "write-through", /* 4 */ 23 "write-through", /* 4 */
23 "write-protect", /* 5 */ 24 "write-protect", /* 5 */
24 "write-back", /* 6 */ 25 "write-back", /* 6 */
25}; 26};
26 27
27const char *mtrr_attrib_to_str(int x) 28const char *mtrr_attrib_to_str(int x)
@@ -35,8 +36,8 @@ static int
35mtrr_file_add(unsigned long base, unsigned long size, 36mtrr_file_add(unsigned long base, unsigned long size,
36 unsigned int type, bool increment, struct file *file, int page) 37 unsigned int type, bool increment, struct file *file, int page)
37{ 38{
39 unsigned int *fcount = FILE_FCOUNT(file);
38 int reg, max; 40 int reg, max;
39 unsigned int *fcount = FILE_FCOUNT(file);
40 41
41 max = num_var_ranges; 42 max = num_var_ranges;
42 if (fcount == NULL) { 43 if (fcount == NULL) {
@@ -61,8 +62,8 @@ static int
61mtrr_file_del(unsigned long base, unsigned long size, 62mtrr_file_del(unsigned long base, unsigned long size,
62 struct file *file, int page) 63 struct file *file, int page)
63{ 64{
64 int reg;
65 unsigned int *fcount = FILE_FCOUNT(file); 65 unsigned int *fcount = FILE_FCOUNT(file);
66 int reg;
66 67
67 if (!page) { 68 if (!page) {
68 if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) 69 if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1)))
@@ -81,13 +82,14 @@ mtrr_file_del(unsigned long base, unsigned long size,
81 return reg; 82 return reg;
82} 83}
83 84
84/* RED-PEN: seq_file can seek now. this is ignored. */ 85/*
86 * seq_file can seek but we ignore it.
87 *
88 * Format of control line:
89 * "base=%Lx size=%Lx type=%s" or "disable=%d"
90 */
85static ssize_t 91static ssize_t
86mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) 92mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
87/* Format of control line:
88 "base=%Lx size=%Lx type=%s" OR:
89 "disable=%d"
90*/
91{ 93{
92 int i, err; 94 int i, err;
93 unsigned long reg; 95 unsigned long reg;
@@ -100,15 +102,18 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
100 return -EPERM; 102 return -EPERM;
101 if (!len) 103 if (!len)
102 return -EINVAL; 104 return -EINVAL;
105
103 memset(line, 0, LINE_SIZE); 106 memset(line, 0, LINE_SIZE);
104 if (len > LINE_SIZE) 107 if (len > LINE_SIZE)
105 len = LINE_SIZE; 108 len = LINE_SIZE;
106 if (copy_from_user(line, buf, len - 1)) 109 if (copy_from_user(line, buf, len - 1))
107 return -EFAULT; 110 return -EFAULT;
111
108 linelen = strlen(line); 112 linelen = strlen(line);
109 ptr = line + linelen - 1; 113 ptr = line + linelen - 1;
110 if (linelen && *ptr == '\n') 114 if (linelen && *ptr == '\n')
111 *ptr = '\0'; 115 *ptr = '\0';
116
112 if (!strncmp(line, "disable=", 8)) { 117 if (!strncmp(line, "disable=", 8)) {
113 reg = simple_strtoul(line + 8, &ptr, 0); 118 reg = simple_strtoul(line + 8, &ptr, 0);
114 err = mtrr_del_page(reg, 0, 0); 119 err = mtrr_del_page(reg, 0, 0);
@@ -116,28 +121,35 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
116 return err; 121 return err;
117 return len; 122 return len;
118 } 123 }
124
119 if (strncmp(line, "base=", 5)) 125 if (strncmp(line, "base=", 5))
120 return -EINVAL; 126 return -EINVAL;
127
121 base = simple_strtoull(line + 5, &ptr, 0); 128 base = simple_strtoull(line + 5, &ptr, 0);
122 for (; isspace(*ptr); ++ptr) ; 129 for (; isspace(*ptr); ++ptr)
130 ;
131
123 if (strncmp(ptr, "size=", 5)) 132 if (strncmp(ptr, "size=", 5))
124 return -EINVAL; 133 return -EINVAL;
134
125 size = simple_strtoull(ptr + 5, &ptr, 0); 135 size = simple_strtoull(ptr + 5, &ptr, 0);
126 if ((base & 0xfff) || (size & 0xfff)) 136 if ((base & 0xfff) || (size & 0xfff))
127 return -EINVAL; 137 return -EINVAL;
128 for (; isspace(*ptr); ++ptr) ; 138 for (; isspace(*ptr); ++ptr)
139 ;
140
129 if (strncmp(ptr, "type=", 5)) 141 if (strncmp(ptr, "type=", 5))
130 return -EINVAL; 142 return -EINVAL;
131 ptr += 5; 143 ptr += 5;
132 for (; isspace(*ptr); ++ptr) ; 144 for (; isspace(*ptr); ++ptr)
145 ;
146
133 for (i = 0; i < MTRR_NUM_TYPES; ++i) { 147 for (i = 0; i < MTRR_NUM_TYPES; ++i) {
134 if (strcmp(ptr, mtrr_strings[i])) 148 if (strcmp(ptr, mtrr_strings[i]))
135 continue; 149 continue;
136 base >>= PAGE_SHIFT; 150 base >>= PAGE_SHIFT;
137 size >>= PAGE_SHIFT; 151 size >>= PAGE_SHIFT;
138 err = 152 err = mtrr_add_page((unsigned long)base, (unsigned long)size, i, true);
139 mtrr_add_page((unsigned long) base, (unsigned long) size, i,
140 true);
141 if (err < 0) 153 if (err < 0)
142 return err; 154 return err;
143 return len; 155 return len;
@@ -181,7 +193,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
181 case MTRRIOC32_SET_PAGE_ENTRY: 193 case MTRRIOC32_SET_PAGE_ENTRY:
182 case MTRRIOC32_DEL_PAGE_ENTRY: 194 case MTRRIOC32_DEL_PAGE_ENTRY:
183 case MTRRIOC32_KILL_PAGE_ENTRY: { 195 case MTRRIOC32_KILL_PAGE_ENTRY: {
184 struct mtrr_sentry32 __user *s32 = (struct mtrr_sentry32 __user *)__arg; 196 struct mtrr_sentry32 __user *s32;
197
198 s32 = (struct mtrr_sentry32 __user *)__arg;
185 err = get_user(sentry.base, &s32->base); 199 err = get_user(sentry.base, &s32->base);
186 err |= get_user(sentry.size, &s32->size); 200 err |= get_user(sentry.size, &s32->size);
187 err |= get_user(sentry.type, &s32->type); 201 err |= get_user(sentry.type, &s32->type);
@@ -191,7 +205,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
191 } 205 }
192 case MTRRIOC32_GET_ENTRY: 206 case MTRRIOC32_GET_ENTRY:
193 case MTRRIOC32_GET_PAGE_ENTRY: { 207 case MTRRIOC32_GET_PAGE_ENTRY: {
194 struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg; 208 struct mtrr_gentry32 __user *g32;
209
210 g32 = (struct mtrr_gentry32 __user *)__arg;
195 err = get_user(gentry.regnum, &g32->regnum); 211 err = get_user(gentry.regnum, &g32->regnum);
196 err |= get_user(gentry.base, &g32->base); 212 err |= get_user(gentry.base, &g32->base);
197 err |= get_user(gentry.size, &g32->size); 213 err |= get_user(gentry.size, &g32->size);
@@ -314,7 +330,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
314 if (err) 330 if (err)
315 return err; 331 return err;
316 332
317 switch(cmd) { 333 switch (cmd) {
318 case MTRRIOC_GET_ENTRY: 334 case MTRRIOC_GET_ENTRY:
319 case MTRRIOC_GET_PAGE_ENTRY: 335 case MTRRIOC_GET_PAGE_ENTRY:
320 if (copy_to_user(arg, &gentry, sizeof gentry)) 336 if (copy_to_user(arg, &gentry, sizeof gentry))
@@ -323,7 +339,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
323#ifdef CONFIG_COMPAT 339#ifdef CONFIG_COMPAT
324 case MTRRIOC32_GET_ENTRY: 340 case MTRRIOC32_GET_ENTRY:
325 case MTRRIOC32_GET_PAGE_ENTRY: { 341 case MTRRIOC32_GET_PAGE_ENTRY: {
326 struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg; 342 struct mtrr_gentry32 __user *g32;
343
344 g32 = (struct mtrr_gentry32 __user *)__arg;
327 err = put_user(gentry.base, &g32->base); 345 err = put_user(gentry.base, &g32->base);
328 err |= put_user(gentry.size, &g32->size); 346 err |= put_user(gentry.size, &g32->size);
329 err |= put_user(gentry.regnum, &g32->regnum); 347 err |= put_user(gentry.regnum, &g32->regnum);
@@ -335,11 +353,10 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg)
335 return err; 353 return err;
336} 354}
337 355
338static int 356static int mtrr_close(struct inode *ino, struct file *file)
339mtrr_close(struct inode *ino, struct file *file)
340{ 357{
341 int i, max;
342 unsigned int *fcount = FILE_FCOUNT(file); 358 unsigned int *fcount = FILE_FCOUNT(file);
359 int i, max;
343 360
344 if (fcount != NULL) { 361 if (fcount != NULL) {
345 max = num_var_ranges; 362 max = num_var_ranges;
@@ -359,22 +376,22 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset);
359 376
360static int mtrr_open(struct inode *inode, struct file *file) 377static int mtrr_open(struct inode *inode, struct file *file)
361{ 378{
362 if (!mtrr_if) 379 if (!mtrr_if)
363 return -EIO; 380 return -EIO;
364 if (!mtrr_if->get) 381 if (!mtrr_if->get)
365 return -ENXIO; 382 return -ENXIO;
366 return single_open(file, mtrr_seq_show, NULL); 383 return single_open(file, mtrr_seq_show, NULL);
367} 384}
368 385
369static const struct file_operations mtrr_fops = { 386static const struct file_operations mtrr_fops = {
370 .owner = THIS_MODULE, 387 .owner = THIS_MODULE,
371 .open = mtrr_open, 388 .open = mtrr_open,
372 .read = seq_read, 389 .read = seq_read,
373 .llseek = seq_lseek, 390 .llseek = seq_lseek,
374 .write = mtrr_write, 391 .write = mtrr_write,
375 .unlocked_ioctl = mtrr_ioctl, 392 .unlocked_ioctl = mtrr_ioctl,
376 .compat_ioctl = mtrr_ioctl, 393 .compat_ioctl = mtrr_ioctl,
377 .release = mtrr_close, 394 .release = mtrr_close,
378}; 395};
379 396
380static int mtrr_seq_show(struct seq_file *seq, void *offset) 397static int mtrr_seq_show(struct seq_file *seq, void *offset)
@@ -388,23 +405,24 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset)
388 max = num_var_ranges; 405 max = num_var_ranges;
389 for (i = 0; i < max; i++) { 406 for (i = 0; i < max; i++) {
390 mtrr_if->get(i, &base, &size, &type); 407 mtrr_if->get(i, &base, &size, &type);
391 if (size == 0) 408 if (size == 0) {
392 mtrr_usage_table[i] = 0; 409 mtrr_usage_table[i] = 0;
393 else { 410 continue;
394 if (size < (0x100000 >> PAGE_SHIFT)) {
395 /* less than 1MB */
396 factor = 'K';
397 size <<= PAGE_SHIFT - 10;
398 } else {
399 factor = 'M';
400 size >>= 20 - PAGE_SHIFT;
401 }
402 /* RED-PEN: base can be > 32bit */
403 len += seq_printf(seq,
404 "reg%02i: base=0x%06lx000 (%5luMB), size=%5lu%cB, count=%d: %s\n",
405 i, base, base >> (20 - PAGE_SHIFT), size, factor,
406 mtrr_usage_table[i], mtrr_attrib_to_str(type));
407 } 411 }
412 if (size < (0x100000 >> PAGE_SHIFT)) {
413 /* less than 1MB */
414 factor = 'K';
415 size <<= PAGE_SHIFT - 10;
416 } else {
417 factor = 'M';
418 size >>= 20 - PAGE_SHIFT;
419 }
420 /* Base can be > 32bit */
421 len += seq_printf(seq, "reg%02i: base=0x%06lx000 "
422 "(%5luMB), size=%5lu%cB, count=%d: %s\n",
423 i, base, base >> (20 - PAGE_SHIFT), size,
424 factor, mtrr_usage_table[i],
425 mtrr_attrib_to_str(type));
408 } 426 }
409 return 0; 427 return 0;
410} 428}
@@ -422,6 +440,5 @@ static int __init mtrr_if_init(void)
422 proc_create("mtrr", S_IWUSR | S_IRUGO, NULL, &mtrr_fops); 440 proc_create("mtrr", S_IWUSR | S_IRUGO, NULL, &mtrr_fops);
423 return 0; 441 return 0;
424} 442}
425
426arch_initcall(mtrr_if_init); 443arch_initcall(mtrr_if_init);
427#endif /* CONFIG_PROC_FS */ 444#endif /* CONFIG_PROC_FS */
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 8fc248b5aeaf..84e83de54575 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -25,43 +25,49 @@
25 Operating System Writer's Guide" (Intel document number 242692), 25 Operating System Writer's Guide" (Intel document number 242692),
26 section 11.11.7 26 section 11.11.7
27 27
28 This was cleaned and made readable by Patrick Mochel <mochel@osdl.org> 28 This was cleaned and made readable by Patrick Mochel <mochel@osdl.org>
29 on 6-7 March 2002. 29 on 6-7 March 2002.
30 Source: Intel Architecture Software Developers Manual, Volume 3: 30 Source: Intel Architecture Software Developers Manual, Volume 3:
31 System Programming Guide; Section 9.11. (1997 edition - PPro). 31 System Programming Guide; Section 9.11. (1997 edition - PPro).
32*/ 32*/
33 33
34#define DEBUG
35
36#include <linux/types.h> /* FIXME: kvm_para.h needs this */
37
38#include <linux/kvm_para.h>
39#include <linux/uaccess.h>
34#include <linux/module.h> 40#include <linux/module.h>
41#include <linux/mutex.h>
35#include <linux/init.h> 42#include <linux/init.h>
43#include <linux/sort.h>
44#include <linux/cpu.h>
36#include <linux/pci.h> 45#include <linux/pci.h>
37#include <linux/smp.h> 46#include <linux/smp.h>
38#include <linux/cpu.h>
39#include <linux/mutex.h>
40#include <linux/sort.h>
41 47
48#include <asm/processor.h>
42#include <asm/e820.h> 49#include <asm/e820.h>
43#include <asm/mtrr.h> 50#include <asm/mtrr.h>
44#include <asm/uaccess.h>
45#include <asm/processor.h>
46#include <asm/msr.h> 51#include <asm/msr.h>
47#include <asm/kvm_para.h> 52
48#include "mtrr.h" 53#include "mtrr.h"
49 54
50u32 num_var_ranges = 0; 55u32 num_var_ranges;
51 56
52unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; 57unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
53static DEFINE_MUTEX(mtrr_mutex); 58static DEFINE_MUTEX(mtrr_mutex);
54 59
55u64 size_or_mask, size_and_mask; 60u64 size_or_mask, size_and_mask;
61static bool mtrr_aps_delayed_init;
56 62
57static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {}; 63static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM];
58 64
59struct mtrr_ops * mtrr_if = NULL; 65struct mtrr_ops *mtrr_if;
60 66
61static void set_mtrr(unsigned int reg, unsigned long base, 67static void set_mtrr(unsigned int reg, unsigned long base,
62 unsigned long size, mtrr_type type); 68 unsigned long size, mtrr_type type);
63 69
64void set_mtrr_ops(struct mtrr_ops * ops) 70void set_mtrr_ops(struct mtrr_ops *ops)
65{ 71{
66 if (ops->vendor && ops->vendor < X86_VENDOR_NUM) 72 if (ops->vendor && ops->vendor < X86_VENDOR_NUM)
67 mtrr_ops[ops->vendor] = ops; 73 mtrr_ops[ops->vendor] = ops;
@@ -72,30 +78,36 @@ static int have_wrcomb(void)
72{ 78{
73 struct pci_dev *dev; 79 struct pci_dev *dev;
74 u8 rev; 80 u8 rev;
75 81
76 if ((dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL)) != NULL) { 82 dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL);
77 /* ServerWorks LE chipsets < rev 6 have problems with write-combining 83 if (dev != NULL) {
78 Don't allow it and leave room for other chipsets to be tagged */ 84 /*
85 * ServerWorks LE chipsets < rev 6 have problems with
86 * write-combining. Don't allow it and leave room for other
87 * chipsets to be tagged
88 */
79 if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && 89 if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
80 dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) { 90 dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) {
81 pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); 91 pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev);
82 if (rev <= 5) { 92 if (rev <= 5) {
83 printk(KERN_INFO "mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n"); 93 pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n");
84 pci_dev_put(dev); 94 pci_dev_put(dev);
85 return 0; 95 return 0;
86 } 96 }
87 } 97 }
88 /* Intel 450NX errata # 23. Non ascending cacheline evictions to 98 /*
89 write combining memory may resulting in data corruption */ 99 * Intel 450NX errata # 23. Non ascending cacheline evictions to
100 * write combining memory may resulting in data corruption
101 */
90 if (dev->vendor == PCI_VENDOR_ID_INTEL && 102 if (dev->vendor == PCI_VENDOR_ID_INTEL &&
91 dev->device == PCI_DEVICE_ID_INTEL_82451NX) { 103 dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
92 printk(KERN_INFO "mtrr: Intel 450NX MMC detected. Write-combining disabled.\n"); 104 pr_info("mtrr: Intel 450NX MMC detected. Write-combining disabled.\n");
93 pci_dev_put(dev); 105 pci_dev_put(dev);
94 return 0; 106 return 0;
95 } 107 }
96 pci_dev_put(dev); 108 pci_dev_put(dev);
97 } 109 }
98 return (mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0); 110 return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0;
99} 111}
100 112
101/* This function returns the number of variable MTRRs */ 113/* This function returns the number of variable MTRRs */
@@ -103,12 +115,13 @@ static void __init set_num_var_ranges(void)
103{ 115{
104 unsigned long config = 0, dummy; 116 unsigned long config = 0, dummy;
105 117
106 if (use_intel()) { 118 if (use_intel())
107 rdmsr(MSR_MTRRcap, config, dummy); 119 rdmsr(MSR_MTRRcap, config, dummy);
108 } else if (is_cpu(AMD)) 120 else if (is_cpu(AMD))
109 config = 2; 121 config = 2;
110 else if (is_cpu(CYRIX) || is_cpu(CENTAUR)) 122 else if (is_cpu(CYRIX) || is_cpu(CENTAUR))
111 config = 8; 123 config = 8;
124
112 num_var_ranges = config & 0xff; 125 num_var_ranges = config & 0xff;
113} 126}
114 127
@@ -130,10 +143,12 @@ struct set_mtrr_data {
130 mtrr_type smp_type; 143 mtrr_type smp_type;
131}; 144};
132 145
146/**
147 * ipi_handler - Synchronisation handler. Executed by "other" CPUs.
148 *
149 * Returns nothing.
150 */
133static void ipi_handler(void *info) 151static void ipi_handler(void *info)
134/* [SUMMARY] Synchronisation handler. Executed by "other" CPUs.
135 [RETURNS] Nothing.
136*/
137{ 152{
138#ifdef CONFIG_SMP 153#ifdef CONFIG_SMP
139 struct set_mtrr_data *data = info; 154 struct set_mtrr_data *data = info;
@@ -142,18 +157,22 @@ static void ipi_handler(void *info)
142 local_irq_save(flags); 157 local_irq_save(flags);
143 158
144 atomic_dec(&data->count); 159 atomic_dec(&data->count);
145 while(!atomic_read(&data->gate)) 160 while (!atomic_read(&data->gate))
146 cpu_relax(); 161 cpu_relax();
147 162
148 /* The master has cleared me to execute */ 163 /* The master has cleared me to execute */
149 if (data->smp_reg != ~0U) 164 if (data->smp_reg != ~0U) {
150 mtrr_if->set(data->smp_reg, data->smp_base, 165 mtrr_if->set(data->smp_reg, data->smp_base,
151 data->smp_size, data->smp_type); 166 data->smp_size, data->smp_type);
152 else 167 } else if (mtrr_aps_delayed_init) {
168 /*
169 * Initialize the MTRRs inaddition to the synchronisation.
170 */
153 mtrr_if->set_all(); 171 mtrr_if->set_all();
172 }
154 173
155 atomic_dec(&data->count); 174 atomic_dec(&data->count);
156 while(atomic_read(&data->gate)) 175 while (atomic_read(&data->gate))
157 cpu_relax(); 176 cpu_relax();
158 177
159 atomic_dec(&data->count); 178 atomic_dec(&data->count);
@@ -161,7 +180,8 @@ static void ipi_handler(void *info)
161#endif 180#endif
162} 181}
163 182
164static inline int types_compatible(mtrr_type type1, mtrr_type type2) { 183static inline int types_compatible(mtrr_type type1, mtrr_type type2)
184{
165 return type1 == MTRR_TYPE_UNCACHABLE || 185 return type1 == MTRR_TYPE_UNCACHABLE ||
166 type2 == MTRR_TYPE_UNCACHABLE || 186 type2 == MTRR_TYPE_UNCACHABLE ||
167 (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) || 187 (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) ||
@@ -176,10 +196,10 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) {
176 * @type: mtrr type 196 * @type: mtrr type
177 * 197 *
178 * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: 198 * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly:
179 * 199 *
180 * 1. Send IPI to do the following: 200 * 1. Send IPI to do the following:
181 * 2. Disable Interrupts 201 * 2. Disable Interrupts
182 * 3. Wait for all procs to do so 202 * 3. Wait for all procs to do so
183 * 4. Enter no-fill cache mode 203 * 4. Enter no-fill cache mode
184 * 5. Flush caches 204 * 5. Flush caches
185 * 6. Clear PGE bit 205 * 6. Clear PGE bit
@@ -189,26 +209,27 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) {
189 * 10. Enable all range registers 209 * 10. Enable all range registers
190 * 11. Flush all TLBs and caches again 210 * 11. Flush all TLBs and caches again
191 * 12. Enter normal cache mode and reenable caching 211 * 12. Enter normal cache mode and reenable caching
192 * 13. Set PGE 212 * 13. Set PGE
193 * 14. Wait for buddies to catch up 213 * 14. Wait for buddies to catch up
194 * 15. Enable interrupts. 214 * 15. Enable interrupts.
195 * 215 *
196 * What does that mean for us? Well, first we set data.count to the number 216 * What does that mean for us? Well, first we set data.count to the number
197 * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait 217 * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait
198 * until it hits 0 and proceed. We set the data.gate flag and reset data.count. 218 * until it hits 0 and proceed. We set the data.gate flag and reset data.count.
199 * Meanwhile, they are waiting for that flag to be set. Once it's set, each 219 * Meanwhile, they are waiting for that flag to be set. Once it's set, each
200 * CPU goes through the transition of updating MTRRs. The CPU vendors may each do it 220 * CPU goes through the transition of updating MTRRs.
201 * differently, so we call mtrr_if->set() callback and let them take care of it. 221 * The CPU vendors may each do it differently,
202 * When they're done, they again decrement data->count and wait for data.gate to 222 * so we call mtrr_if->set() callback and let them take care of it.
203 * be reset. 223 * When they're done, they again decrement data->count and wait for data.gate
204 * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag. 224 * to be reset.
225 * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag
205 * Everyone then enables interrupts and we all continue on. 226 * Everyone then enables interrupts and we all continue on.
206 * 227 *
207 * Note that the mechanism is the same for UP systems, too; all the SMP stuff 228 * Note that the mechanism is the same for UP systems, too; all the SMP stuff
208 * becomes nops. 229 * becomes nops.
209 */ 230 */
210static void set_mtrr(unsigned int reg, unsigned long base, 231static void
211 unsigned long size, mtrr_type type) 232set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type)
212{ 233{
213 struct set_mtrr_data data; 234 struct set_mtrr_data data;
214 unsigned long flags; 235 unsigned long flags;
@@ -218,121 +239,124 @@ static void set_mtrr(unsigned int reg, unsigned long base,
218 data.smp_size = size; 239 data.smp_size = size;
219 data.smp_type = type; 240 data.smp_type = type;
220 atomic_set(&data.count, num_booting_cpus() - 1); 241 atomic_set(&data.count, num_booting_cpus() - 1);
221 /* make sure data.count is visible before unleashing other CPUs */ 242
243 /* Make sure data.count is visible before unleashing other CPUs */
222 smp_wmb(); 244 smp_wmb();
223 atomic_set(&data.gate,0); 245 atomic_set(&data.gate, 0);
224 246
225 /* Start the ball rolling on other CPUs */ 247 /* Start the ball rolling on other CPUs */
226 if (smp_call_function(ipi_handler, &data, 0) != 0) 248 if (smp_call_function(ipi_handler, &data, 0) != 0)
227 panic("mtrr: timed out waiting for other CPUs\n"); 249 panic("mtrr: timed out waiting for other CPUs\n");
228 250
229 local_irq_save(flags); 251 local_irq_save(flags);
230 252
231 while(atomic_read(&data.count)) 253 while (atomic_read(&data.count))
232 cpu_relax(); 254 cpu_relax();
233 255
234 /* ok, reset count and toggle gate */ 256 /* Ok, reset count and toggle gate */
235 atomic_set(&data.count, num_booting_cpus() - 1); 257 atomic_set(&data.count, num_booting_cpus() - 1);
236 smp_wmb(); 258 smp_wmb();
237 atomic_set(&data.gate,1); 259 atomic_set(&data.gate, 1);
238 260
239 /* do our MTRR business */ 261 /* Do our MTRR business */
240 262
241 /* HACK! 263 /*
264 * HACK!
242 * We use this same function to initialize the mtrrs on boot. 265 * We use this same function to initialize the mtrrs on boot.
243 * The state of the boot cpu's mtrrs has been saved, and we want 266 * The state of the boot cpu's mtrrs has been saved, and we want
244 * to replicate across all the APs. 267 * to replicate across all the APs.
245 * If we're doing that @reg is set to something special... 268 * If we're doing that @reg is set to something special...
246 */ 269 */
247 if (reg != ~0U) 270 if (reg != ~0U)
248 mtrr_if->set(reg,base,size,type); 271 mtrr_if->set(reg, base, size, type);
272 else if (!mtrr_aps_delayed_init)
273 mtrr_if->set_all();
249 274
250 /* wait for the others */ 275 /* Wait for the others */
251 while(atomic_read(&data.count)) 276 while (atomic_read(&data.count))
252 cpu_relax(); 277 cpu_relax();
253 278
254 atomic_set(&data.count, num_booting_cpus() - 1); 279 atomic_set(&data.count, num_booting_cpus() - 1);
255 smp_wmb(); 280 smp_wmb();
256 atomic_set(&data.gate,0); 281 atomic_set(&data.gate, 0);
257 282
258 /* 283 /*
259 * Wait here for everyone to have seen the gate change 284 * Wait here for everyone to have seen the gate change
260 * So we're the last ones to touch 'data' 285 * So we're the last ones to touch 'data'
261 */ 286 */
262 while(atomic_read(&data.count)) 287 while (atomic_read(&data.count))
263 cpu_relax(); 288 cpu_relax();
264 289
265 local_irq_restore(flags); 290 local_irq_restore(flags);
266} 291}
267 292
268/** 293/**
269 * mtrr_add_page - Add a memory type region 294 * mtrr_add_page - Add a memory type region
270 * @base: Physical base address of region in pages (in units of 4 kB!) 295 * @base: Physical base address of region in pages (in units of 4 kB!)
271 * @size: Physical size of region in pages (4 kB) 296 * @size: Physical size of region in pages (4 kB)
272 * @type: Type of MTRR desired 297 * @type: Type of MTRR desired
273 * @increment: If this is true do usage counting on the region 298 * @increment: If this is true do usage counting on the region
274 * 299 *
275 * Memory type region registers control the caching on newer Intel and 300 * Memory type region registers control the caching on newer Intel and
276 * non Intel processors. This function allows drivers to request an 301 * non Intel processors. This function allows drivers to request an
277 * MTRR is added. The details and hardware specifics of each processor's 302 * MTRR is added. The details and hardware specifics of each processor's
278 * implementation are hidden from the caller, but nevertheless the 303 * implementation are hidden from the caller, but nevertheless the
279 * caller should expect to need to provide a power of two size on an 304 * caller should expect to need to provide a power of two size on an
280 * equivalent power of two boundary. 305 * equivalent power of two boundary.
281 * 306 *
282 * If the region cannot be added either because all regions are in use 307 * If the region cannot be added either because all regions are in use
283 * or the CPU cannot support it a negative value is returned. On success 308 * or the CPU cannot support it a negative value is returned. On success
284 * the register number for this entry is returned, but should be treated 309 * the register number for this entry is returned, but should be treated
285 * as a cookie only. 310 * as a cookie only.
286 * 311 *
287 * On a multiprocessor machine the changes are made to all processors. 312 * On a multiprocessor machine the changes are made to all processors.
288 * This is required on x86 by the Intel processors. 313 * This is required on x86 by the Intel processors.
289 * 314 *
290 * The available types are 315 * The available types are
291 * 316 *
292 * %MTRR_TYPE_UNCACHABLE - No caching 317 * %MTRR_TYPE_UNCACHABLE - No caching
293 * 318 *
294 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever 319 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
295 * 320 *
296 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts 321 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
297 * 322 *
298 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes 323 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
299 * 324 *
300 * BUGS: Needs a quiet flag for the cases where drivers do not mind 325 * BUGS: Needs a quiet flag for the cases where drivers do not mind
301 * failures and do not wish system log messages to be sent. 326 * failures and do not wish system log messages to be sent.
302 */ 327 */
303 328int mtrr_add_page(unsigned long base, unsigned long size,
304int mtrr_add_page(unsigned long base, unsigned long size,
305 unsigned int type, bool increment) 329 unsigned int type, bool increment)
306{ 330{
331 unsigned long lbase, lsize;
307 int i, replace, error; 332 int i, replace, error;
308 mtrr_type ltype; 333 mtrr_type ltype;
309 unsigned long lbase, lsize;
310 334
311 if (!mtrr_if) 335 if (!mtrr_if)
312 return -ENXIO; 336 return -ENXIO;
313 337
314 if ((error = mtrr_if->validate_add_page(base,size,type))) 338 error = mtrr_if->validate_add_page(base, size, type);
339 if (error)
315 return error; 340 return error;
316 341
317 if (type >= MTRR_NUM_TYPES) { 342 if (type >= MTRR_NUM_TYPES) {
318 printk(KERN_WARNING "mtrr: type: %u invalid\n", type); 343 pr_warning("mtrr: type: %u invalid\n", type);
319 return -EINVAL; 344 return -EINVAL;
320 } 345 }
321 346
322 /* If the type is WC, check that this processor supports it */ 347 /* If the type is WC, check that this processor supports it */
323 if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) { 348 if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
324 printk(KERN_WARNING 349 pr_warning("mtrr: your processor doesn't support write-combining\n");
325 "mtrr: your processor doesn't support write-combining\n");
326 return -ENOSYS; 350 return -ENOSYS;
327 } 351 }
328 352
329 if (!size) { 353 if (!size) {
330 printk(KERN_WARNING "mtrr: zero sized request\n"); 354 pr_warning("mtrr: zero sized request\n");
331 return -EINVAL; 355 return -EINVAL;
332 } 356 }
333 357
334 if (base & size_or_mask || size & size_or_mask) { 358 if (base & size_or_mask || size & size_or_mask) {
335 printk(KERN_WARNING "mtrr: base or size exceeds the MTRR width\n"); 359 pr_warning("mtrr: base or size exceeds the MTRR width\n");
336 return -EINVAL; 360 return -EINVAL;
337 } 361 }
338 362
@@ -341,36 +365,40 @@ int mtrr_add_page(unsigned long base, unsigned long size,
341 365
342 /* No CPU hotplug when we change MTRR entries */ 366 /* No CPU hotplug when we change MTRR entries */
343 get_online_cpus(); 367 get_online_cpus();
344 /* Search for existing MTRR */ 368
369 /* Search for existing MTRR */
345 mutex_lock(&mtrr_mutex); 370 mutex_lock(&mtrr_mutex);
346 for (i = 0; i < num_var_ranges; ++i) { 371 for (i = 0; i < num_var_ranges; ++i) {
347 mtrr_if->get(i, &lbase, &lsize, &ltype); 372 mtrr_if->get(i, &lbase, &lsize, &ltype);
348 if (!lsize || base > lbase + lsize - 1 || base + size - 1 < lbase) 373 if (!lsize || base > lbase + lsize - 1 ||
374 base + size - 1 < lbase)
349 continue; 375 continue;
350 /* At this point we know there is some kind of overlap/enclosure */ 376 /*
377 * At this point we know there is some kind of
378 * overlap/enclosure
379 */
351 if (base < lbase || base + size - 1 > lbase + lsize - 1) { 380 if (base < lbase || base + size - 1 > lbase + lsize - 1) {
352 if (base <= lbase && base + size - 1 >= lbase + lsize - 1) { 381 if (base <= lbase &&
382 base + size - 1 >= lbase + lsize - 1) {
353 /* New region encloses an existing region */ 383 /* New region encloses an existing region */
354 if (type == ltype) { 384 if (type == ltype) {
355 replace = replace == -1 ? i : -2; 385 replace = replace == -1 ? i : -2;
356 continue; 386 continue;
357 } 387 } else if (types_compatible(type, ltype))
358 else if (types_compatible(type, ltype))
359 continue; 388 continue;
360 } 389 }
361 printk(KERN_WARNING 390 pr_warning("mtrr: 0x%lx000,0x%lx000 overlaps existing"
362 "mtrr: 0x%lx000,0x%lx000 overlaps existing" 391 " 0x%lx000,0x%lx000\n", base, size, lbase,
363 " 0x%lx000,0x%lx000\n", base, size, lbase, 392 lsize);
364 lsize);
365 goto out; 393 goto out;
366 } 394 }
367 /* New region is enclosed by an existing region */ 395 /* New region is enclosed by an existing region */
368 if (ltype != type) { 396 if (ltype != type) {
369 if (types_compatible(type, ltype)) 397 if (types_compatible(type, ltype))
370 continue; 398 continue;
371 printk (KERN_WARNING "mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n", 399 pr_warning("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
372 base, size, mtrr_attrib_to_str(ltype), 400 base, size, mtrr_attrib_to_str(ltype),
373 mtrr_attrib_to_str(type)); 401 mtrr_attrib_to_str(type));
374 goto out; 402 goto out;
375 } 403 }
376 if (increment) 404 if (increment)
@@ -378,7 +406,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
378 error = i; 406 error = i;
379 goto out; 407 goto out;
380 } 408 }
381 /* Search for an empty MTRR */ 409 /* Search for an empty MTRR */
382 i = mtrr_if->get_free_region(base, size, replace); 410 i = mtrr_if->get_free_region(base, size, replace);
383 if (i >= 0) { 411 if (i >= 0) {
384 set_mtrr(i, base, size, type); 412 set_mtrr(i, base, size, type);
@@ -393,8 +421,9 @@ int mtrr_add_page(unsigned long base, unsigned long size,
393 mtrr_usage_table[replace] = 0; 421 mtrr_usage_table[replace] = 0;
394 } 422 }
395 } 423 }
396 } else 424 } else {
397 printk(KERN_INFO "mtrr: no more MTRRs available\n"); 425 pr_info("mtrr: no more MTRRs available\n");
426 }
398 error = i; 427 error = i;
399 out: 428 out:
400 mutex_unlock(&mtrr_mutex); 429 mutex_unlock(&mtrr_mutex);
@@ -405,10 +434,8 @@ int mtrr_add_page(unsigned long base, unsigned long size,
405static int mtrr_check(unsigned long base, unsigned long size) 434static int mtrr_check(unsigned long base, unsigned long size)
406{ 435{
407 if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { 436 if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
408 printk(KERN_WARNING 437 pr_warning("mtrr: size and base must be multiples of 4 kiB\n");
409 "mtrr: size and base must be multiples of 4 kiB\n"); 438 pr_debug("mtrr: size: 0x%lx base: 0x%lx\n", size, base);
410 printk(KERN_DEBUG
411 "mtrr: size: 0x%lx base: 0x%lx\n", size, base);
412 dump_stack(); 439 dump_stack();
413 return -1; 440 return -1;
414 } 441 }
@@ -416,66 +443,64 @@ static int mtrr_check(unsigned long base, unsigned long size)
416} 443}
417 444
418/** 445/**
419 * mtrr_add - Add a memory type region 446 * mtrr_add - Add a memory type region
420 * @base: Physical base address of region 447 * @base: Physical base address of region
421 * @size: Physical size of region 448 * @size: Physical size of region
422 * @type: Type of MTRR desired 449 * @type: Type of MTRR desired
423 * @increment: If this is true do usage counting on the region 450 * @increment: If this is true do usage counting on the region
424 * 451 *
425 * Memory type region registers control the caching on newer Intel and 452 * Memory type region registers control the caching on newer Intel and
426 * non Intel processors. This function allows drivers to request an 453 * non Intel processors. This function allows drivers to request an
427 * MTRR is added. The details and hardware specifics of each processor's 454 * MTRR is added. The details and hardware specifics of each processor's
428 * implementation are hidden from the caller, but nevertheless the 455 * implementation are hidden from the caller, but nevertheless the
429 * caller should expect to need to provide a power of two size on an 456 * caller should expect to need to provide a power of two size on an
430 * equivalent power of two boundary. 457 * equivalent power of two boundary.
431 * 458 *
432 * If the region cannot be added either because all regions are in use 459 * If the region cannot be added either because all regions are in use
433 * or the CPU cannot support it a negative value is returned. On success 460 * or the CPU cannot support it a negative value is returned. On success
434 * the register number for this entry is returned, but should be treated 461 * the register number for this entry is returned, but should be treated
435 * as a cookie only. 462 * as a cookie only.
436 * 463 *
437 * On a multiprocessor machine the changes are made to all processors. 464 * On a multiprocessor machine the changes are made to all processors.
438 * This is required on x86 by the Intel processors. 465 * This is required on x86 by the Intel processors.
439 * 466 *
440 * The available types are 467 * The available types are
441 * 468 *
442 * %MTRR_TYPE_UNCACHABLE - No caching 469 * %MTRR_TYPE_UNCACHABLE - No caching
443 * 470 *
444 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever 471 * %MTRR_TYPE_WRBACK - Write data back in bursts whenever
445 * 472 *
446 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts 473 * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts
447 * 474 *
448 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes 475 * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes
449 * 476 *
450 * BUGS: Needs a quiet flag for the cases where drivers do not mind 477 * BUGS: Needs a quiet flag for the cases where drivers do not mind
451 * failures and do not wish system log messages to be sent. 478 * failures and do not wish system log messages to be sent.
452 */ 479 */
453 480int mtrr_add(unsigned long base, unsigned long size, unsigned int type,
454int 481 bool increment)
455mtrr_add(unsigned long base, unsigned long size, unsigned int type,
456 bool increment)
457{ 482{
458 if (mtrr_check(base, size)) 483 if (mtrr_check(base, size))
459 return -EINVAL; 484 return -EINVAL;
460 return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, 485 return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type,
461 increment); 486 increment);
462} 487}
488EXPORT_SYMBOL(mtrr_add);
463 489
464/** 490/**
465 * mtrr_del_page - delete a memory type region 491 * mtrr_del_page - delete a memory type region
466 * @reg: Register returned by mtrr_add 492 * @reg: Register returned by mtrr_add
467 * @base: Physical base address 493 * @base: Physical base address
468 * @size: Size of region 494 * @size: Size of region
469 * 495 *
470 * If register is supplied then base and size are ignored. This is 496 * If register is supplied then base and size are ignored. This is
471 * how drivers should call it. 497 * how drivers should call it.
472 * 498 *
473 * Releases an MTRR region. If the usage count drops to zero the 499 * Releases an MTRR region. If the usage count drops to zero the
474 * register is freed and the region returns to default state. 500 * register is freed and the region returns to default state.
475 * On success the register is returned, on failure a negative error 501 * On success the register is returned, on failure a negative error
476 * code. 502 * code.
477 */ 503 */
478
479int mtrr_del_page(int reg, unsigned long base, unsigned long size) 504int mtrr_del_page(int reg, unsigned long base, unsigned long size)
480{ 505{
481 int i, max; 506 int i, max;
@@ -500,22 +525,22 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
500 } 525 }
501 } 526 }
502 if (reg < 0) { 527 if (reg < 0) {
503 printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base, 528 pr_debug("mtrr: no MTRR for %lx000,%lx000 found\n",
504 size); 529 base, size);
505 goto out; 530 goto out;
506 } 531 }
507 } 532 }
508 if (reg >= max) { 533 if (reg >= max) {
509 printk(KERN_WARNING "mtrr: register: %d too big\n", reg); 534 pr_warning("mtrr: register: %d too big\n", reg);
510 goto out; 535 goto out;
511 } 536 }
512 mtrr_if->get(reg, &lbase, &lsize, &ltype); 537 mtrr_if->get(reg, &lbase, &lsize, &ltype);
513 if (lsize < 1) { 538 if (lsize < 1) {
514 printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg); 539 pr_warning("mtrr: MTRR %d not used\n", reg);
515 goto out; 540 goto out;
516 } 541 }
517 if (mtrr_usage_table[reg] < 1) { 542 if (mtrr_usage_table[reg] < 1) {
518 printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg); 543 pr_warning("mtrr: reg: %d has count=0\n", reg);
519 goto out; 544 goto out;
520 } 545 }
521 if (--mtrr_usage_table[reg] < 1) 546 if (--mtrr_usage_table[reg] < 1)
@@ -526,33 +551,31 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
526 put_online_cpus(); 551 put_online_cpus();
527 return error; 552 return error;
528} 553}
554
529/** 555/**
530 * mtrr_del - delete a memory type region 556 * mtrr_del - delete a memory type region
531 * @reg: Register returned by mtrr_add 557 * @reg: Register returned by mtrr_add
532 * @base: Physical base address 558 * @base: Physical base address
533 * @size: Size of region 559 * @size: Size of region
534 * 560 *
535 * If register is supplied then base and size are ignored. This is 561 * If register is supplied then base and size are ignored. This is
536 * how drivers should call it. 562 * how drivers should call it.
537 * 563 *
538 * Releases an MTRR region. If the usage count drops to zero the 564 * Releases an MTRR region. If the usage count drops to zero the
539 * register is freed and the region returns to default state. 565 * register is freed and the region returns to default state.
540 * On success the register is returned, on failure a negative error 566 * On success the register is returned, on failure a negative error
541 * code. 567 * code.
542 */ 568 */
543 569int mtrr_del(int reg, unsigned long base, unsigned long size)
544int
545mtrr_del(int reg, unsigned long base, unsigned long size)
546{ 570{
547 if (mtrr_check(base, size)) 571 if (mtrr_check(base, size))
548 return -EINVAL; 572 return -EINVAL;
549 return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); 573 return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT);
550} 574}
551
552EXPORT_SYMBOL(mtrr_add);
553EXPORT_SYMBOL(mtrr_del); 575EXPORT_SYMBOL(mtrr_del);
554 576
555/* HACK ALERT! 577/*
578 * HACK ALERT!
556 * These should be called implicitly, but we can't yet until all the initcall 579 * These should be called implicitly, but we can't yet until all the initcall
557 * stuff is done... 580 * stuff is done...
558 */ 581 */
@@ -576,29 +599,28 @@ struct mtrr_value {
576 599
577static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES]; 600static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES];
578 601
579static int mtrr_save(struct sys_device * sysdev, pm_message_t state) 602static int mtrr_save(struct sys_device *sysdev, pm_message_t state)
580{ 603{
581 int i; 604 int i;
582 605
583 for (i = 0; i < num_var_ranges; i++) { 606 for (i = 0; i < num_var_ranges; i++) {
584 mtrr_if->get(i, 607 mtrr_if->get(i, &mtrr_value[i].lbase,
585 &mtrr_value[i].lbase, 608 &mtrr_value[i].lsize,
586 &mtrr_value[i].lsize, 609 &mtrr_value[i].ltype);
587 &mtrr_value[i].ltype);
588 } 610 }
589 return 0; 611 return 0;
590} 612}
591 613
592static int mtrr_restore(struct sys_device * sysdev) 614static int mtrr_restore(struct sys_device *sysdev)
593{ 615{
594 int i; 616 int i;
595 617
596 for (i = 0; i < num_var_ranges; i++) { 618 for (i = 0; i < num_var_ranges; i++) {
597 if (mtrr_value[i].lsize) 619 if (mtrr_value[i].lsize) {
598 set_mtrr(i, 620 set_mtrr(i, mtrr_value[i].lbase,
599 mtrr_value[i].lbase, 621 mtrr_value[i].lsize,
600 mtrr_value[i].lsize, 622 mtrr_value[i].ltype);
601 mtrr_value[i].ltype); 623 }
602 } 624 }
603 return 0; 625 return 0;
604} 626}
@@ -615,26 +637,29 @@ int __initdata changed_by_mtrr_cleanup;
615/** 637/**
616 * mtrr_bp_init - initialize mtrrs on the boot CPU 638 * mtrr_bp_init - initialize mtrrs on the boot CPU
617 * 639 *
618 * This needs to be called early; before any of the other CPUs are 640 * This needs to be called early; before any of the other CPUs are
619 * initialized (i.e. before smp_init()). 641 * initialized (i.e. before smp_init()).
620 * 642 *
621 */ 643 */
622void __init mtrr_bp_init(void) 644void __init mtrr_bp_init(void)
623{ 645{
624 u32 phys_addr; 646 u32 phys_addr;
647
625 init_ifs(); 648 init_ifs();
626 649
627 phys_addr = 32; 650 phys_addr = 32;
628 651
629 if (cpu_has_mtrr) { 652 if (cpu_has_mtrr) {
630 mtrr_if = &generic_mtrr_ops; 653 mtrr_if = &generic_mtrr_ops;
631 size_or_mask = 0xff000000; /* 36 bits */ 654 size_or_mask = 0xff000000; /* 36 bits */
632 size_and_mask = 0x00f00000; 655 size_and_mask = 0x00f00000;
633 phys_addr = 36; 656 phys_addr = 36;
634 657
635 /* This is an AMD specific MSR, but we assume(hope?) that 658 /*
636 Intel will implement it to when they extend the address 659 * This is an AMD specific MSR, but we assume(hope?) that
637 bus of the Xeon. */ 660 * Intel will implement it to when they extend the address
661 * bus of the Xeon.
662 */
638 if (cpuid_eax(0x80000000) >= 0x80000008) { 663 if (cpuid_eax(0x80000000) >= 0x80000008) {
639 phys_addr = cpuid_eax(0x80000008) & 0xff; 664 phys_addr = cpuid_eax(0x80000008) & 0xff;
640 /* CPUID workaround for Intel 0F33/0F34 CPU */ 665 /* CPUID workaround for Intel 0F33/0F34 CPU */
@@ -649,9 +674,11 @@ void __init mtrr_bp_init(void)
649 size_and_mask = ~size_or_mask & 0xfffff00000ULL; 674 size_and_mask = ~size_or_mask & 0xfffff00000ULL;
650 } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && 675 } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR &&
651 boot_cpu_data.x86 == 6) { 676 boot_cpu_data.x86 == 6) {
652 /* VIA C* family have Intel style MTRRs, but 677 /*
653 don't support PAE */ 678 * VIA C* family have Intel style MTRRs,
654 size_or_mask = 0xfff00000; /* 32 bits */ 679 * but don't support PAE
680 */
681 size_or_mask = 0xfff00000; /* 32 bits */
655 size_and_mask = 0; 682 size_and_mask = 0;
656 phys_addr = 32; 683 phys_addr = 32;
657 } 684 }
@@ -694,30 +721,28 @@ void __init mtrr_bp_init(void)
694 changed_by_mtrr_cleanup = 1; 721 changed_by_mtrr_cleanup = 1;
695 mtrr_if->set_all(); 722 mtrr_if->set_all();
696 } 723 }
697
698 } 724 }
699 } 725 }
700} 726}
701 727
702void mtrr_ap_init(void) 728void mtrr_ap_init(void)
703{ 729{
704 unsigned long flags; 730 if (!use_intel() || mtrr_aps_delayed_init)
705
706 if (!mtrr_if || !use_intel())
707 return; 731 return;
708 /* 732 /*
709 * Ideally we should hold mtrr_mutex here to avoid mtrr entries changed, 733 * Ideally we should hold mtrr_mutex here to avoid mtrr entries
710 * but this routine will be called in cpu boot time, holding the lock 734 * changed, but this routine will be called in cpu boot time,
711 * breaks it. This routine is called in two cases: 1.very earily time 735 * holding the lock breaks it.
712 * of software resume, when there absolutely isn't mtrr entry changes; 736 *
713 * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to 737 * This routine is called in two cases:
714 * prevent mtrr entry changes 738 *
739 * 1. very earily time of software resume, when there absolutely
740 * isn't mtrr entry changes;
741 *
742 * 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug
743 * lock to prevent mtrr entry changes
715 */ 744 */
716 local_irq_save(flags); 745 set_mtrr(~0U, 0, 0, 0);
717
718 mtrr_if->set_all();
719
720 local_irq_restore(flags);
721} 746}
722 747
723/** 748/**
@@ -728,23 +753,55 @@ void mtrr_save_state(void)
728 smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1); 753 smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1);
729} 754}
730 755
756void set_mtrr_aps_delayed_init(void)
757{
758 if (!use_intel())
759 return;
760
761 mtrr_aps_delayed_init = true;
762}
763
764/*
765 * MTRR initialization for all AP's
766 */
767void mtrr_aps_init(void)
768{
769 if (!use_intel())
770 return;
771
772 set_mtrr(~0U, 0, 0, 0);
773 mtrr_aps_delayed_init = false;
774}
775
776void mtrr_bp_restore(void)
777{
778 if (!use_intel())
779 return;
780
781 mtrr_if->set_all();
782}
783
731static int __init mtrr_init_finialize(void) 784static int __init mtrr_init_finialize(void)
732{ 785{
733 if (!mtrr_if) 786 if (!mtrr_if)
734 return 0; 787 return 0;
788
735 if (use_intel()) { 789 if (use_intel()) {
736 if (!changed_by_mtrr_cleanup) 790 if (!changed_by_mtrr_cleanup)
737 mtrr_state_warn(); 791 mtrr_state_warn();
738 } else { 792 return 0;
739 /* The CPUs haven't MTRR and seem to not support SMP. They have
740 * specific drivers, we use a tricky method to support
741 * suspend/resume for them.
742 * TBD: is there any system with such CPU which supports
743 * suspend/resume? if no, we should remove the code.
744 */
745 sysdev_driver_register(&cpu_sysdev_class,
746 &mtrr_sysdev_driver);
747 } 793 }
794
795 /*
796 * The CPU has no MTRR and seems to not support SMP. They have
797 * specific drivers, we use a tricky method to support
798 * suspend/resume for them.
799 *
800 * TBD: is there any system with such CPU which supports
801 * suspend/resume? If no, we should remove the code.
802 */
803 sysdev_driver_register(&cpu_sysdev_class, &mtrr_sysdev_driver);
804
748 return 0; 805 return 0;
749} 806}
750subsys_initcall(mtrr_init_finialize); 807subsys_initcall(mtrr_init_finialize);
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h
index 7538b767f206..a501dee9a87a 100644
--- a/arch/x86/kernel/cpu/mtrr/mtrr.h
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.h
@@ -1,5 +1,5 @@
1/* 1/*
2 * local mtrr defines. 2 * local MTRR defines.
3 */ 3 */
4 4
5#include <linux/types.h> 5#include <linux/types.h>
@@ -14,13 +14,12 @@ extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES];
14struct mtrr_ops { 14struct mtrr_ops {
15 u32 vendor; 15 u32 vendor;
16 u32 use_intel_if; 16 u32 use_intel_if;
17// void (*init)(void);
18 void (*set)(unsigned int reg, unsigned long base, 17 void (*set)(unsigned int reg, unsigned long base,
19 unsigned long size, mtrr_type type); 18 unsigned long size, mtrr_type type);
20 void (*set_all)(void); 19 void (*set_all)(void);
21 20
22 void (*get)(unsigned int reg, unsigned long *base, 21 void (*get)(unsigned int reg, unsigned long *base,
23 unsigned long *size, mtrr_type * type); 22 unsigned long *size, mtrr_type *type);
24 int (*get_free_region)(unsigned long base, unsigned long size, 23 int (*get_free_region)(unsigned long base, unsigned long size,
25 int replace_reg); 24 int replace_reg);
26 int (*validate_add_page)(unsigned long base, unsigned long size, 25 int (*validate_add_page)(unsigned long base, unsigned long size,
@@ -39,11 +38,11 @@ extern int positive_have_wrcomb(void);
39 38
40/* library functions for processor-specific routines */ 39/* library functions for processor-specific routines */
41struct set_mtrr_context { 40struct set_mtrr_context {
42 unsigned long flags; 41 unsigned long flags;
43 unsigned long cr4val; 42 unsigned long cr4val;
44 u32 deftype_lo; 43 u32 deftype_lo;
45 u32 deftype_hi; 44 u32 deftype_hi;
46 u32 ccr3; 45 u32 ccr3;
47}; 46};
48 47
49void set_mtrr_done(struct set_mtrr_context *ctxt); 48void set_mtrr_done(struct set_mtrr_context *ctxt);
@@ -54,10 +53,10 @@ void fill_mtrr_var_range(unsigned int index,
54 u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); 53 u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi);
55void get_mtrr_state(void); 54void get_mtrr_state(void);
56 55
57extern void set_mtrr_ops(struct mtrr_ops * ops); 56extern void set_mtrr_ops(struct mtrr_ops *ops);
58 57
59extern u64 size_or_mask, size_and_mask; 58extern u64 size_or_mask, size_and_mask;
60extern struct mtrr_ops * mtrr_if; 59extern struct mtrr_ops *mtrr_if;
61 60
62#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) 61#define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd)
63#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) 62#define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1)
diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c
index 1f5fb1588d1f..dfc80b4e6b0d 100644
--- a/arch/x86/kernel/cpu/mtrr/state.c
+++ b/arch/x86/kernel/cpu/mtrr/state.c
@@ -1,24 +1,25 @@
1#include <linux/mm.h>
2#include <linux/init.h> 1#include <linux/init.h>
3#include <asm/io.h> 2#include <linux/io.h>
4#include <asm/mtrr.h> 3#include <linux/mm.h>
5#include <asm/msr.h> 4
6#include <asm/processor-cyrix.h> 5#include <asm/processor-cyrix.h>
7#include <asm/processor-flags.h> 6#include <asm/processor-flags.h>
8#include "mtrr.h" 7#include <asm/mtrr.h>
8#include <asm/msr.h>
9 9
10#include "mtrr.h"
10 11
11/* Put the processor into a state where MTRRs can be safely set */ 12/* Put the processor into a state where MTRRs can be safely set */
12void set_mtrr_prepare_save(struct set_mtrr_context *ctxt) 13void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
13{ 14{
14 unsigned int cr0; 15 unsigned int cr0;
15 16
16 /* Disable interrupts locally */ 17 /* Disable interrupts locally */
17 local_irq_save(ctxt->flags); 18 local_irq_save(ctxt->flags);
18 19
19 if (use_intel() || is_cpu(CYRIX)) { 20 if (use_intel() || is_cpu(CYRIX)) {
20 21
21 /* Save value of CR4 and clear Page Global Enable (bit 7) */ 22 /* Save value of CR4 and clear Page Global Enable (bit 7) */
22 if (cpu_has_pge) { 23 if (cpu_has_pge) {
23 ctxt->cr4val = read_cr4(); 24 ctxt->cr4val = read_cr4();
24 write_cr4(ctxt->cr4val & ~X86_CR4_PGE); 25 write_cr4(ctxt->cr4val & ~X86_CR4_PGE);
@@ -33,50 +34,61 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt)
33 write_cr0(cr0); 34 write_cr0(cr0);
34 wbinvd(); 35 wbinvd();
35 36
36 if (use_intel()) 37 if (use_intel()) {
37 /* Save MTRR state */ 38 /* Save MTRR state */
38 rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi); 39 rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi);
39 else 40 } else {
40 /* Cyrix ARRs - everything else were excluded at the top */ 41 /*
42 * Cyrix ARRs -
43 * everything else were excluded at the top
44 */
41 ctxt->ccr3 = getCx86(CX86_CCR3); 45 ctxt->ccr3 = getCx86(CX86_CCR3);
46 }
42 } 47 }
43} 48}
44 49
45void set_mtrr_cache_disable(struct set_mtrr_context *ctxt) 50void set_mtrr_cache_disable(struct set_mtrr_context *ctxt)
46{ 51{
47 if (use_intel()) 52 if (use_intel()) {
48 /* Disable MTRRs, and set the default type to uncached */ 53 /* Disable MTRRs, and set the default type to uncached */
49 mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL, 54 mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL,
50 ctxt->deftype_hi); 55 ctxt->deftype_hi);
51 else if (is_cpu(CYRIX)) 56 } else {
52 /* Cyrix ARRs - everything else were excluded at the top */ 57 if (is_cpu(CYRIX)) {
53 setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10); 58 /* Cyrix ARRs - everything else were excluded at the top */
59 setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10);
60 }
61 }
54} 62}
55 63
56/* Restore the processor after a set_mtrr_prepare */ 64/* Restore the processor after a set_mtrr_prepare */
57void set_mtrr_done(struct set_mtrr_context *ctxt) 65void set_mtrr_done(struct set_mtrr_context *ctxt)
58{ 66{
59 if (use_intel() || is_cpu(CYRIX)) { 67 if (use_intel() || is_cpu(CYRIX)) {
60 68
61 /* Flush caches and TLBs */ 69 /* Flush caches and TLBs */
62 wbinvd(); 70 wbinvd();
63 71
64 /* Restore MTRRdefType */ 72 /* Restore MTRRdefType */
65 if (use_intel()) 73 if (use_intel()) {
66 /* Intel (P6) standard MTRRs */ 74 /* Intel (P6) standard MTRRs */
67 mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi); 75 mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo,
68 else 76 ctxt->deftype_hi);
69 /* Cyrix ARRs - everything else was excluded at the top */ 77 } else {
78 /*
79 * Cyrix ARRs -
80 * everything else was excluded at the top
81 */
70 setCx86(CX86_CCR3, ctxt->ccr3); 82 setCx86(CX86_CCR3, ctxt->ccr3);
83 }
71 84
72 /* Enable caches */ 85 /* Enable caches */
73 write_cr0(read_cr0() & 0xbfffffff); 86 write_cr0(read_cr0() & 0xbfffffff);
74 87
75 /* Restore value of CR4 */ 88 /* Restore value of CR4 */
76 if (cpu_has_pge) 89 if (cpu_has_pge)
77 write_cr4(ctxt->cr4val); 90 write_cr4(ctxt->cr4val);
78 } 91 }
79 /* Re-enable interrupts locally (if enabled previously) */ 92 /* Re-enable interrupts locally (if enabled previously) */
80 local_irq_restore(ctxt->flags); 93 local_irq_restore(ctxt->flags);
81} 94}
82
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 6a0e71b38126..dbdf712fae9e 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -1211,7 +1211,7 @@ amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx)
1211 x86_pmu_disable_counter(hwc, idx); 1211 x86_pmu_disable_counter(hwc, idx);
1212} 1212}
1213 1213
1214static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]); 1214static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left);
1215 1215
1216/* 1216/*
1217 * Set the next IRQ period, based on the hwc->period_left value. 1217 * Set the next IRQ period, based on the hwc->period_left value.
@@ -1253,7 +1253,7 @@ x86_perf_counter_set_period(struct perf_counter *counter,
1253 if (left > x86_pmu.max_period) 1253 if (left > x86_pmu.max_period)
1254 left = x86_pmu.max_period; 1254 left = x86_pmu.max_period;
1255 1255
1256 per_cpu(prev_left[idx], smp_processor_id()) = left; 1256 per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
1257 1257
1258 /* 1258 /*
1259 * The hw counter starts counting from this counter offset, 1259 * The hw counter starts counting from this counter offset,
@@ -1470,7 +1470,7 @@ void perf_counter_print_debug(void)
1470 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); 1470 rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl);
1471 rdmsrl(x86_pmu.perfctr + idx, pmc_count); 1471 rdmsrl(x86_pmu.perfctr + idx, pmc_count);
1472 1472
1473 prev_left = per_cpu(prev_left[idx], cpu); 1473 prev_left = per_cpu(pmc_prev_left[idx], cpu);
1474 1474
1475 pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n", 1475 pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n",
1476 cpu, idx, pmc_ctrl); 1476 cpu, idx, pmc_ctrl);
@@ -2124,8 +2124,8 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip)
2124 entry->ip[entry->nr++] = ip; 2124 entry->ip[entry->nr++] = ip;
2125} 2125}
2126 2126
2127static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry); 2127static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry);
2128static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry); 2128static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry);
2129static DEFINE_PER_CPU(int, in_nmi_frame); 2129static DEFINE_PER_CPU(int, in_nmi_frame);
2130 2130
2131 2131
@@ -2278,9 +2278,9 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs)
2278 struct perf_callchain_entry *entry; 2278 struct perf_callchain_entry *entry;
2279 2279
2280 if (in_nmi()) 2280 if (in_nmi())
2281 entry = &__get_cpu_var(nmi_entry); 2281 entry = &__get_cpu_var(pmc_nmi_entry);
2282 else 2282 else
2283 entry = &__get_cpu_var(irq_entry); 2283 entry = &__get_cpu_var(pmc_irq_entry);
2284 2284
2285 entry->nr = 0; 2285 entry->nr = 0;
2286 2286
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index e60ed740d2b3..392bea43b890 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -68,16 +68,16 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
68 /* returns the bit offset of the performance counter register */ 68 /* returns the bit offset of the performance counter register */
69 switch (boot_cpu_data.x86_vendor) { 69 switch (boot_cpu_data.x86_vendor) {
70 case X86_VENDOR_AMD: 70 case X86_VENDOR_AMD:
71 return (msr - MSR_K7_PERFCTR0); 71 return msr - MSR_K7_PERFCTR0;
72 case X86_VENDOR_INTEL: 72 case X86_VENDOR_INTEL:
73 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) 73 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
74 return (msr - MSR_ARCH_PERFMON_PERFCTR0); 74 return msr - MSR_ARCH_PERFMON_PERFCTR0;
75 75
76 switch (boot_cpu_data.x86) { 76 switch (boot_cpu_data.x86) {
77 case 6: 77 case 6:
78 return (msr - MSR_P6_PERFCTR0); 78 return msr - MSR_P6_PERFCTR0;
79 case 15: 79 case 15:
80 return (msr - MSR_P4_BPU_PERFCTR0); 80 return msr - MSR_P4_BPU_PERFCTR0;
81 } 81 }
82 } 82 }
83 return 0; 83 return 0;
@@ -92,16 +92,16 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
92 /* returns the bit offset of the event selection register */ 92 /* returns the bit offset of the event selection register */
93 switch (boot_cpu_data.x86_vendor) { 93 switch (boot_cpu_data.x86_vendor) {
94 case X86_VENDOR_AMD: 94 case X86_VENDOR_AMD:
95 return (msr - MSR_K7_EVNTSEL0); 95 return msr - MSR_K7_EVNTSEL0;
96 case X86_VENDOR_INTEL: 96 case X86_VENDOR_INTEL:
97 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) 97 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
98 return (msr - MSR_ARCH_PERFMON_EVENTSEL0); 98 return msr - MSR_ARCH_PERFMON_EVENTSEL0;
99 99
100 switch (boot_cpu_data.x86) { 100 switch (boot_cpu_data.x86) {
101 case 6: 101 case 6:
102 return (msr - MSR_P6_EVNTSEL0); 102 return msr - MSR_P6_EVNTSEL0;
103 case 15: 103 case 15:
104 return (msr - MSR_P4_BSU_ESCR0); 104 return msr - MSR_P4_BSU_ESCR0;
105 } 105 }
106 } 106 }
107 return 0; 107 return 0;
@@ -113,7 +113,7 @@ int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
113{ 113{
114 BUG_ON(counter > NMI_MAX_COUNTER_BITS); 114 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
115 115
116 return (!test_bit(counter, perfctr_nmi_owner)); 116 return !test_bit(counter, perfctr_nmi_owner);
117} 117}
118 118
119/* checks the an msr for availability */ 119/* checks the an msr for availability */
@@ -124,7 +124,7 @@ int avail_to_resrv_perfctr_nmi(unsigned int msr)
124 counter = nmi_perfctr_msr_to_bit(msr); 124 counter = nmi_perfctr_msr_to_bit(msr);
125 BUG_ON(counter > NMI_MAX_COUNTER_BITS); 125 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
126 126
127 return (!test_bit(counter, perfctr_nmi_owner)); 127 return !test_bit(counter, perfctr_nmi_owner);
128} 128}
129EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); 129EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
130 130
@@ -237,7 +237,7 @@ static unsigned int adjust_for_32bit_ctr(unsigned int hz)
237 */ 237 */
238 counter_val = (u64)cpu_khz * 1000; 238 counter_val = (u64)cpu_khz * 1000;
239 do_div(counter_val, retval); 239 do_div(counter_val, retval);
240 if (counter_val > 0x7fffffffULL) { 240 if (counter_val > 0x7fffffffULL) {
241 u64 count = (u64)cpu_khz * 1000; 241 u64 count = (u64)cpu_khz * 1000;
242 do_div(count, 0x7fffffffUL); 242 do_div(count, 0x7fffffffUL);
243 retval = count + 1; 243 retval = count + 1;
@@ -251,7 +251,7 @@ static void write_watchdog_counter(unsigned int perfctr_msr,
251 u64 count = (u64)cpu_khz * 1000; 251 u64 count = (u64)cpu_khz * 1000;
252 252
253 do_div(count, nmi_hz); 253 do_div(count, nmi_hz);
254 if(descr) 254 if (descr)
255 pr_debug("setting %s to -0x%08Lx\n", descr, count); 255 pr_debug("setting %s to -0x%08Lx\n", descr, count);
256 wrmsrl(perfctr_msr, 0 - count); 256 wrmsrl(perfctr_msr, 0 - count);
257} 257}
@@ -262,7 +262,7 @@ static void write_watchdog_counter32(unsigned int perfctr_msr,
262 u64 count = (u64)cpu_khz * 1000; 262 u64 count = (u64)cpu_khz * 1000;
263 263
264 do_div(count, nmi_hz); 264 do_div(count, nmi_hz);
265 if(descr) 265 if (descr)
266 pr_debug("setting %s to -0x%08Lx\n", descr, count); 266 pr_debug("setting %s to -0x%08Lx\n", descr, count);
267 wrmsr(perfctr_msr, (u32)(-count), 0); 267 wrmsr(perfctr_msr, (u32)(-count), 0);
268} 268}
@@ -296,7 +296,7 @@ static int setup_k7_watchdog(unsigned nmi_hz)
296 296
297 /* setup the timer */ 297 /* setup the timer */
298 wrmsr(evntsel_msr, evntsel, 0); 298 wrmsr(evntsel_msr, evntsel, 0);
299 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz); 299 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz);
300 300
301 /* initialize the wd struct before enabling */ 301 /* initialize the wd struct before enabling */
302 wd->perfctr_msr = perfctr_msr; 302 wd->perfctr_msr = perfctr_msr;
@@ -387,7 +387,7 @@ static int setup_p6_watchdog(unsigned nmi_hz)
387 /* setup the timer */ 387 /* setup the timer */
388 wrmsr(evntsel_msr, evntsel, 0); 388 wrmsr(evntsel_msr, evntsel, 0);
389 nmi_hz = adjust_for_32bit_ctr(nmi_hz); 389 nmi_hz = adjust_for_32bit_ctr(nmi_hz);
390 write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz); 390 write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz);
391 391
392 /* initialize the wd struct before enabling */ 392 /* initialize the wd struct before enabling */
393 wd->perfctr_msr = perfctr_msr; 393 wd->perfctr_msr = perfctr_msr;
@@ -415,7 +415,7 @@ static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
415 apic_write(APIC_LVTPC, APIC_DM_NMI); 415 apic_write(APIC_LVTPC, APIC_DM_NMI);
416 416
417 /* P6/ARCH_PERFMON has 32 bit counter write */ 417 /* P6/ARCH_PERFMON has 32 bit counter write */
418 write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz); 418 write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz);
419} 419}
420 420
421static const struct wd_ops p6_wd_ops = { 421static const struct wd_ops p6_wd_ops = {
@@ -490,9 +490,9 @@ static int setup_p4_watchdog(unsigned nmi_hz)
490 if (smp_num_siblings == 2) { 490 if (smp_num_siblings == 2) {
491 unsigned int ebx, apicid; 491 unsigned int ebx, apicid;
492 492
493 ebx = cpuid_ebx(1); 493 ebx = cpuid_ebx(1);
494 apicid = (ebx >> 24) & 0xff; 494 apicid = (ebx >> 24) & 0xff;
495 ht_num = apicid & 1; 495 ht_num = apicid & 1;
496 } else 496 } else
497#endif 497#endif
498 ht_num = 0; 498 ht_num = 0;
@@ -544,7 +544,7 @@ static int setup_p4_watchdog(unsigned nmi_hz)
544 } 544 }
545 545
546 evntsel = P4_ESCR_EVENT_SELECT(0x3F) 546 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
547 | P4_ESCR_OS 547 | P4_ESCR_OS
548 | P4_ESCR_USR; 548 | P4_ESCR_USR;
549 549
550 cccr_val |= P4_CCCR_THRESHOLD(15) 550 cccr_val |= P4_CCCR_THRESHOLD(15)
@@ -612,7 +612,7 @@ static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz)
612{ 612{
613 unsigned dummy; 613 unsigned dummy;
614 /* 614 /*
615 * P4 quirks: 615 * P4 quirks:
616 * - An overflown perfctr will assert its interrupt 616 * - An overflown perfctr will assert its interrupt
617 * until the OVF flag in its CCCR is cleared. 617 * until the OVF flag in its CCCR is cleared.
618 * - LVTPC is masked on interrupt and must be 618 * - LVTPC is masked on interrupt and must be
@@ -662,7 +662,8 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz)
662 * NOTE: Corresponding bit = 0 in ebx indicates event present. 662 * NOTE: Corresponding bit = 0 in ebx indicates event present.
663 */ 663 */
664 cpuid(10, &(eax.full), &ebx, &unused, &unused); 664 cpuid(10, &(eax.full), &ebx, &unused, &unused);
665 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || 665 if ((eax.split.mask_length <
666 (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
666 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) 667 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
667 return 0; 668 return 0;
668 669
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c
index d5e30397246b..62ac8cb6ba27 100644
--- a/arch/x86/kernel/cpu/proc.c
+++ b/arch/x86/kernel/cpu/proc.c
@@ -116,11 +116,9 @@ static int show_cpuinfo(struct seq_file *m, void *v)
116 seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize); 116 seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize);
117#endif 117#endif
118 seq_printf(m, "clflush size\t: %u\n", c->x86_clflush_size); 118 seq_printf(m, "clflush size\t: %u\n", c->x86_clflush_size);
119#ifdef CONFIG_X86_64
120 seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment); 119 seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment);
121 seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n", 120 seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n",
122 c->x86_phys_bits, c->x86_virt_bits); 121 c->x86_phys_bits, c->x86_virt_bits);
123#endif
124 122
125 seq_printf(m, "power management:"); 123 seq_printf(m, "power management:");
126 for (i = 0; i < 32; i++) { 124 for (i = 0; i < 32; i++) {
@@ -128,7 +126,7 @@ static int show_cpuinfo(struct seq_file *m, void *v)
128 if (i < ARRAY_SIZE(x86_power_flags) && 126 if (i < ARRAY_SIZE(x86_power_flags) &&
129 x86_power_flags[i]) 127 x86_power_flags[i])
130 seq_printf(m, "%s%s", 128 seq_printf(m, "%s%s",
131 x86_power_flags[i][0]?" ":"", 129 x86_power_flags[i][0] ? " " : "",
132 x86_power_flags[i]); 130 x86_power_flags[i]);
133 else 131 else
134 seq_printf(m, " [%d]", i); 132 seq_printf(m, " [%d]", i);
diff --git a/arch/x86/kernel/cpu/sched.c b/arch/x86/kernel/cpu/sched.c
new file mode 100644
index 000000000000..a640ae5ad201
--- /dev/null
+++ b/arch/x86/kernel/cpu/sched.c
@@ -0,0 +1,55 @@
1#include <linux/sched.h>
2#include <linux/math64.h>
3#include <linux/percpu.h>
4#include <linux/irqflags.h>
5
6#include <asm/cpufeature.h>
7#include <asm/processor.h>
8
9#ifdef CONFIG_SMP
10
11static DEFINE_PER_CPU(struct aperfmperf, old_perf_sched);
12
13static unsigned long scale_aperfmperf(void)
14{
15 struct aperfmperf val, *old = &__get_cpu_var(old_perf_sched);
16 unsigned long ratio, flags;
17
18 local_irq_save(flags);
19 get_aperfmperf(&val);
20 local_irq_restore(flags);
21
22 ratio = calc_aperfmperf_ratio(old, &val);
23 *old = val;
24
25 return ratio;
26}
27
28unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu)
29{
30 /*
31 * do aperf/mperf on the cpu level because it includes things
32 * like turbo mode, which are relevant to full cores.
33 */
34 if (boot_cpu_has(X86_FEATURE_APERFMPERF))
35 return scale_aperfmperf();
36
37 /*
38 * maybe have something cpufreq here
39 */
40
41 return default_scale_freq_power(sd, cpu);
42}
43
44unsigned long arch_scale_smt_power(struct sched_domain *sd, int cpu)
45{
46 /*
47 * aperf/mperf already includes the smt gain
48 */
49 if (boot_cpu_has(X86_FEATURE_APERFMPERF))
50 return SCHED_LOAD_SCALE;
51
52 return default_scale_smt_power(sd, cpu);
53}
54
55#endif
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
index 284c399e3234..bc24f514ec93 100644
--- a/arch/x86/kernel/cpu/vmware.c
+++ b/arch/x86/kernel/cpu/vmware.c
@@ -49,17 +49,17 @@ static inline int __vmware_platform(void)
49 49
50static unsigned long __vmware_get_tsc_khz(void) 50static unsigned long __vmware_get_tsc_khz(void)
51{ 51{
52 uint64_t tsc_hz; 52 uint64_t tsc_hz;
53 uint32_t eax, ebx, ecx, edx; 53 uint32_t eax, ebx, ecx, edx;
54 54
55 VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); 55 VMWARE_PORT(GETHZ, eax, ebx, ecx, edx);
56 56
57 if (ebx == UINT_MAX) 57 if (ebx == UINT_MAX)
58 return 0; 58 return 0;
59 tsc_hz = eax | (((uint64_t)ebx) << 32); 59 tsc_hz = eax | (((uint64_t)ebx) << 32);
60 do_div(tsc_hz, 1000); 60 do_div(tsc_hz, 1000);
61 BUG_ON(tsc_hz >> 32); 61 BUG_ON(tsc_hz >> 32);
62 return tsc_hz; 62 return tsc_hz;
63} 63}
64 64
65/* 65/*
diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c
index b4f14c6c09d9..37250fe490b1 100644
--- a/arch/x86/kernel/doublefault_32.c
+++ b/arch/x86/kernel/doublefault_32.c
@@ -27,9 +27,7 @@ static void doublefault_fn(void)
27 27
28 if (ptr_ok(gdt)) { 28 if (ptr_ok(gdt)) {
29 gdt += GDT_ENTRY_TSS << 3; 29 gdt += GDT_ENTRY_TSS << 3;
30 tss = *(u16 *)(gdt+2); 30 tss = get_desc_base((struct desc_struct *)gdt);
31 tss += *(u8 *)(gdt+4) << 16;
32 tss += *(u8 *)(gdt+7) << 24;
33 printk(KERN_EMERG "double fault, tss at %08lx\n", tss); 31 printk(KERN_EMERG "double fault, tss at %08lx\n", tss);
34 32
35 if (ptr_ok(tss)) { 33 if (ptr_ok(tss)) {
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c
index 48bfe1386038..ef42a038f1a6 100644
--- a/arch/x86/kernel/ds.c
+++ b/arch/x86/kernel/ds.c
@@ -509,15 +509,15 @@ enum bts_field {
509 bts_escape = ((unsigned long)-1 & ~bts_qual_mask) 509 bts_escape = ((unsigned long)-1 & ~bts_qual_mask)
510}; 510};
511 511
512static inline unsigned long bts_get(const char *base, enum bts_field field) 512static inline unsigned long bts_get(const char *base, unsigned long field)
513{ 513{
514 base += (ds_cfg.sizeof_ptr_field * field); 514 base += (ds_cfg.sizeof_ptr_field * field);
515 return *(unsigned long *)base; 515 return *(unsigned long *)base;
516} 516}
517 517
518static inline void bts_set(char *base, enum bts_field field, unsigned long val) 518static inline void bts_set(char *base, unsigned long field, unsigned long val)
519{ 519{
520 base += (ds_cfg.sizeof_ptr_field * field);; 520 base += (ds_cfg.sizeof_ptr_field * field);
521 (*(unsigned long *)base) = val; 521 (*(unsigned long *)base) = val;
522} 522}
523 523
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index c8405718a4c3..2d8a371d4339 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -15,7 +15,6 @@
15#include <linux/bug.h> 15#include <linux/bug.h>
16#include <linux/nmi.h> 16#include <linux/nmi.h>
17#include <linux/sysfs.h> 17#include <linux/sysfs.h>
18#include <linux/ftrace.h>
19 18
20#include <asm/stacktrace.h> 19#include <asm/stacktrace.h>
21 20
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 5cb5725b2bae..147005a1cc3c 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -115,7 +115,7 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size,
115{ 115{
116 int x = e820x->nr_map; 116 int x = e820x->nr_map;
117 117
118 if (x == ARRAY_SIZE(e820x->map)) { 118 if (x >= ARRAY_SIZE(e820x->map)) {
119 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); 119 printk(KERN_ERR "Ooops! Too many entries in the memory map!\n");
120 return; 120 return;
121 } 121 }
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index c251be745107..d59fe323807e 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -146,7 +146,7 @@ ENTRY(ftrace_graph_caller)
146END(ftrace_graph_caller) 146END(ftrace_graph_caller)
147 147
148GLOBAL(return_to_handler) 148GLOBAL(return_to_handler)
149 subq $80, %rsp 149 subq $24, %rsp
150 150
151 /* Save the return values */ 151 /* Save the return values */
152 movq %rax, (%rsp) 152 movq %rax, (%rsp)
@@ -155,10 +155,10 @@ GLOBAL(return_to_handler)
155 155
156 call ftrace_return_to_handler 156 call ftrace_return_to_handler
157 157
158 movq %rax, 72(%rsp) 158 movq %rax, 16(%rsp)
159 movq 8(%rsp), %rdx 159 movq 8(%rsp), %rdx
160 movq (%rsp), %rax 160 movq (%rsp), %rax
161 addq $72, %rsp 161 addq $16, %rsp
162 retq 162 retq
163#endif 163#endif
164 164
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index cc827ac9e8d3..7ffec6b3b331 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -439,7 +439,6 @@ is386: movl $2,%ecx # set MP
439 jne 1f 439 jne 1f
440 movl $per_cpu__gdt_page,%eax 440 movl $per_cpu__gdt_page,%eax
441 movl $per_cpu__stack_canary,%ecx 441 movl $per_cpu__stack_canary,%ecx
442 subl $20, %ecx
443 movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) 442 movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
444 shrl $16, %ecx 443 shrl $16, %ecx
445 movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) 444 movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index b0cdde6932f5..74656d1d4e30 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -104,7 +104,7 @@ static int show_other_interrupts(struct seq_file *p, int prec)
104 seq_printf(p, " Threshold APIC interrupts\n"); 104 seq_printf(p, " Threshold APIC interrupts\n");
105# endif 105# endif
106#endif 106#endif
107#ifdef CONFIG_X86_NEW_MCE 107#ifdef CONFIG_X86_MCE
108 seq_printf(p, "%*s: ", prec, "MCE"); 108 seq_printf(p, "%*s: ", prec, "MCE");
109 for_each_online_cpu(j) 109 for_each_online_cpu(j)
110 seq_printf(p, "%10u ", per_cpu(mce_exception_count, j)); 110 seq_printf(p, "%10u ", per_cpu(mce_exception_count, j));
@@ -200,7 +200,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
200 sum += irq_stats(cpu)->irq_threshold_count; 200 sum += irq_stats(cpu)->irq_threshold_count;
201# endif 201# endif
202#endif 202#endif
203#ifdef CONFIG_X86_NEW_MCE 203#ifdef CONFIG_X86_MCE
204 sum += per_cpu(mce_exception_count, cpu); 204 sum += per_cpu(mce_exception_count, cpu);
205 sum += per_cpu(mce_poll_count, cpu); 205 sum += per_cpu(mce_poll_count, cpu);
206#endif 206#endif
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 3b09634a5153..7d35d0fe2329 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -218,7 +218,6 @@ bool handle_irq(unsigned irq, struct pt_regs *regs)
218void fixup_irqs(void) 218void fixup_irqs(void)
219{ 219{
220 unsigned int irq; 220 unsigned int irq;
221 static int warned;
222 struct irq_desc *desc; 221 struct irq_desc *desc;
223 222
224 for_each_irq_desc(irq, desc) { 223 for_each_irq_desc(irq, desc) {
@@ -236,8 +235,8 @@ void fixup_irqs(void)
236 } 235 }
237 if (desc->chip->set_affinity) 236 if (desc->chip->set_affinity)
238 desc->chip->set_affinity(irq, affinity); 237 desc->chip->set_affinity(irq, affinity);
239 else if (desc->action && !(warned++)) 238 else if (desc->action)
240 printk("Cannot set affinity for irq %i\n", irq); 239 printk_once("Cannot set affinity for irq %i\n", irq);
241 } 240 }
242 241
243#if 0 242#if 0
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c
index 92b7703d3d58..ccf8ab54f31a 100644
--- a/arch/x86/kernel/irqinit.c
+++ b/arch/x86/kernel/irqinit.c
@@ -190,7 +190,7 @@ static void __init apic_intr_init(void)
190#ifdef CONFIG_X86_MCE_THRESHOLD 190#ifdef CONFIG_X86_MCE_THRESHOLD
191 alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); 191 alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt);
192#endif 192#endif
193#if defined(CONFIG_X86_NEW_MCE) && defined(CONFIG_X86_LOCAL_APIC) 193#if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_LOCAL_APIC)
194 alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt); 194 alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt);
195#endif 195#endif
196 196
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index c664d515f613..63b0ec8d3d4a 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -34,7 +34,6 @@
34struct kvm_para_state { 34struct kvm_para_state {
35 u8 mmu_queue[MMU_QUEUE_SIZE]; 35 u8 mmu_queue[MMU_QUEUE_SIZE];
36 int mmu_queue_len; 36 int mmu_queue_len;
37 enum paravirt_lazy_mode mode;
38}; 37};
39 38
40static DEFINE_PER_CPU(struct kvm_para_state, para_state); 39static DEFINE_PER_CPU(struct kvm_para_state, para_state);
@@ -77,7 +76,7 @@ static void kvm_deferred_mmu_op(void *buffer, int len)
77{ 76{
78 struct kvm_para_state *state = kvm_para_state(); 77 struct kvm_para_state *state = kvm_para_state();
79 78
80 if (state->mode != PARAVIRT_LAZY_MMU) { 79 if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU) {
81 kvm_mmu_op(buffer, len); 80 kvm_mmu_op(buffer, len);
82 return; 81 return;
83 } 82 }
@@ -185,10 +184,7 @@ static void kvm_release_pt(unsigned long pfn)
185 184
186static void kvm_enter_lazy_mmu(void) 185static void kvm_enter_lazy_mmu(void)
187{ 186{
188 struct kvm_para_state *state = kvm_para_state();
189
190 paravirt_enter_lazy_mmu(); 187 paravirt_enter_lazy_mmu();
191 state->mode = paravirt_get_lazy_mode();
192} 188}
193 189
194static void kvm_leave_lazy_mmu(void) 190static void kvm_leave_lazy_mmu(void)
@@ -197,7 +193,6 @@ static void kvm_leave_lazy_mmu(void)
197 193
198 mmu_queue_flush(state); 194 mmu_queue_flush(state);
199 paravirt_leave_lazy_mmu(); 195 paravirt_leave_lazy_mmu();
200 state->mode = paravirt_get_lazy_mode();
201} 196}
202 197
203static void __init paravirt_ops_setup(void) 198static void __init paravirt_ops_setup(void)
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 223af43f1526..e5efcdcca31b 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -50,8 +50,8 @@ static unsigned long kvm_get_wallclock(void)
50 struct timespec ts; 50 struct timespec ts;
51 int low, high; 51 int low, high;
52 52
53 low = (int)__pa(&wall_clock); 53 low = (int)__pa_symbol(&wall_clock);
54 high = ((u64)__pa(&wall_clock) >> 32); 54 high = ((u64)__pa_symbol(&wall_clock) >> 32);
55 native_write_msr(MSR_KVM_WALL_CLOCK, low, high); 55 native_write_msr(MSR_KVM_WALL_CLOCK, low, high);
56 56
57 vcpu_time = &get_cpu_var(hv_clock); 57 vcpu_time = &get_cpu_var(hv_clock);
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 651c93b28862..fcd513bf2846 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -482,11 +482,11 @@ static void __init construct_ioapic_table(int mpc_default_type)
482 MP_bus_info(&bus); 482 MP_bus_info(&bus);
483 } 483 }
484 484
485 ioapic.type = MP_IOAPIC; 485 ioapic.type = MP_IOAPIC;
486 ioapic.apicid = 2; 486 ioapic.apicid = 2;
487 ioapic.apicver = mpc_default_type > 4 ? 0x10 : 0x01; 487 ioapic.apicver = mpc_default_type > 4 ? 0x10 : 0x01;
488 ioapic.flags = MPC_APIC_USABLE; 488 ioapic.flags = MPC_APIC_USABLE;
489 ioapic.apicaddr = 0xFEC00000; 489 ioapic.apicaddr = IO_APIC_DEFAULT_PHYS_BASE;
490 MP_ioapic_info(&ioapic); 490 MP_ioapic_info(&ioapic);
491 491
492 /* 492 /*
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 98fd6cd4e3a4..7dd950094178 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -1,6 +1,7 @@
1/* ----------------------------------------------------------------------- * 1/* ----------------------------------------------------------------------- *
2 * 2 *
3 * Copyright 2000-2008 H. Peter Anvin - All Rights Reserved 3 * Copyright 2000-2008 H. Peter Anvin - All Rights Reserved
4 * Copyright 2009 Intel Corporation; author: H. Peter Anvin
4 * 5 *
5 * This program is free software; you can redistribute it and/or modify 6 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by 7 * it under the terms of the GNU General Public License as published by
@@ -80,11 +81,8 @@ static ssize_t msr_read(struct file *file, char __user *buf,
80 81
81 for (; count; count -= 8) { 82 for (; count; count -= 8) {
82 err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]); 83 err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]);
83 if (err) { 84 if (err)
84 if (err == -EFAULT) /* Fix idiotic error code */
85 err = -EIO;
86 break; 85 break;
87 }
88 if (copy_to_user(tmp, &data, 8)) { 86 if (copy_to_user(tmp, &data, 8)) {
89 err = -EFAULT; 87 err = -EFAULT;
90 break; 88 break;
@@ -115,11 +113,8 @@ static ssize_t msr_write(struct file *file, const char __user *buf,
115 break; 113 break;
116 } 114 }
117 err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]); 115 err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]);
118 if (err) { 116 if (err)
119 if (err == -EFAULT) /* Fix idiotic error code */
120 err = -EIO;
121 break; 117 break;
122 }
123 tmp += 2; 118 tmp += 2;
124 bytes += 8; 119 bytes += 8;
125 } 120 }
@@ -127,6 +122,54 @@ static ssize_t msr_write(struct file *file, const char __user *buf,
127 return bytes ? bytes : err; 122 return bytes ? bytes : err;
128} 123}
129 124
125static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg)
126{
127 u32 __user *uregs = (u32 __user *)arg;
128 u32 regs[8];
129 int cpu = iminor(file->f_path.dentry->d_inode);
130 int err;
131
132 switch (ioc) {
133 case X86_IOC_RDMSR_REGS:
134 if (!(file->f_mode & FMODE_READ)) {
135 err = -EBADF;
136 break;
137 }
138 if (copy_from_user(&regs, uregs, sizeof regs)) {
139 err = -EFAULT;
140 break;
141 }
142 err = rdmsr_safe_regs_on_cpu(cpu, regs);
143 if (err)
144 break;
145 if (copy_to_user(uregs, &regs, sizeof regs))
146 err = -EFAULT;
147 break;
148
149 case X86_IOC_WRMSR_REGS:
150 if (!(file->f_mode & FMODE_WRITE)) {
151 err = -EBADF;
152 break;
153 }
154 if (copy_from_user(&regs, uregs, sizeof regs)) {
155 err = -EFAULT;
156 break;
157 }
158 err = wrmsr_safe_regs_on_cpu(cpu, regs);
159 if (err)
160 break;
161 if (copy_to_user(uregs, &regs, sizeof regs))
162 err = -EFAULT;
163 break;
164
165 default:
166 err = -ENOTTY;
167 break;
168 }
169
170 return err;
171}
172
130static int msr_open(struct inode *inode, struct file *file) 173static int msr_open(struct inode *inode, struct file *file)
131{ 174{
132 unsigned int cpu = iminor(file->f_path.dentry->d_inode); 175 unsigned int cpu = iminor(file->f_path.dentry->d_inode);
@@ -157,6 +200,8 @@ static const struct file_operations msr_fops = {
157 .read = msr_read, 200 .read = msr_read,
158 .write = msr_write, 201 .write = msr_write,
159 .open = msr_open, 202 .open = msr_open,
203 .unlocked_ioctl = msr_ioctl,
204 .compat_ioctl = msr_ioctl,
160}; 205};
161 206
162static int __cpuinit msr_device_create(int cpu) 207static int __cpuinit msr_device_create(int cpu)
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index 70ec9b951d76..f5b0b4a01fb2 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -362,8 +362,9 @@ struct pv_cpu_ops pv_cpu_ops = {
362#endif 362#endif
363 .wbinvd = native_wbinvd, 363 .wbinvd = native_wbinvd,
364 .read_msr = native_read_msr_safe, 364 .read_msr = native_read_msr_safe,
365 .read_msr_amd = native_read_msr_amd_safe, 365 .rdmsr_regs = native_rdmsr_safe_regs,
366 .write_msr = native_write_msr_safe, 366 .write_msr = native_write_msr_safe,
367 .wrmsr_regs = native_wrmsr_safe_regs,
367 .read_tsc = native_read_tsc, 368 .read_tsc = native_read_tsc,
368 .read_pmc = native_read_pmc, 369 .read_pmc = native_read_pmc,
369 .read_tscp = native_read_tscp, 370 .read_tscp = native_read_tscp,
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c
index d71c8655905b..64b838eac18c 100644
--- a/arch/x86/kernel/pci-dma.c
+++ b/arch/x86/kernel/pci-dma.c
@@ -225,10 +225,8 @@ static __init int iommu_setup(char *p)
225 if (!strncmp(p, "soft", 4)) 225 if (!strncmp(p, "soft", 4))
226 swiotlb = 1; 226 swiotlb = 1;
227#endif 227#endif
228 if (!strncmp(p, "pt", 2)) { 228 if (!strncmp(p, "pt", 2))
229 iommu_pass_through = 1; 229 iommu_pass_through = 1;
230 return 1;
231 }
232 230
233 gart_parse_options(p); 231 gart_parse_options(p);
234 232
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 59f4524984af..4cf79567cdab 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -61,9 +61,6 @@
61 61
62asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); 62asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
63 63
64DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
65EXPORT_PER_CPU_SYMBOL(current_task);
66
67/* 64/*
68 * Return saved PC of a blocked thread. 65 * Return saved PC of a blocked thread.
69 */ 66 */
@@ -350,14 +347,21 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
350 *next = &next_p->thread; 347 *next = &next_p->thread;
351 int cpu = smp_processor_id(); 348 int cpu = smp_processor_id();
352 struct tss_struct *tss = &per_cpu(init_tss, cpu); 349 struct tss_struct *tss = &per_cpu(init_tss, cpu);
350 bool preload_fpu;
353 351
354 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ 352 /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */
355 353
356 __unlazy_fpu(prev_p); 354 /*
355 * If the task has used fpu the last 5 timeslices, just do a full
356 * restore of the math state immediately to avoid the trap; the
357 * chances of needing FPU soon are obviously high now
358 */
359 preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
357 360
361 __unlazy_fpu(prev_p);
358 362
359 /* we're going to use this soon, after a few expensive things */ 363 /* we're going to use this soon, after a few expensive things */
360 if (next_p->fpu_counter > 5) 364 if (preload_fpu)
361 prefetch(next->xstate); 365 prefetch(next->xstate);
362 366
363 /* 367 /*
@@ -398,6 +402,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
398 task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) 402 task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT))
399 __switch_to_xtra(prev_p, next_p, tss); 403 __switch_to_xtra(prev_p, next_p, tss);
400 404
405 /* If we're going to preload the fpu context, make sure clts
406 is run while we're batching the cpu state updates. */
407 if (preload_fpu)
408 clts();
409
401 /* 410 /*
402 * Leave lazy mode, flushing any hypercalls made here. 411 * Leave lazy mode, flushing any hypercalls made here.
403 * This must be done before restoring TLS segments so 412 * This must be done before restoring TLS segments so
@@ -407,15 +416,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
407 */ 416 */
408 arch_end_context_switch(next_p); 417 arch_end_context_switch(next_p);
409 418
410 /* If the task has used fpu the last 5 timeslices, just do a full 419 if (preload_fpu)
411 * restore of the math state immediately to avoid the trap; the 420 __math_state_restore();
412 * chances of needing FPU soon are obviously high now
413 *
414 * tsk_used_math() checks prevent calling math_state_restore(),
415 * which can sleep in the case of !tsk_used_math()
416 */
417 if (tsk_used_math(next_p) && next_p->fpu_counter > 5)
418 math_state_restore();
419 421
420 /* 422 /*
421 * Restore %gs if needed (which is common) 423 * Restore %gs if needed (which is common)
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index ebefb5407b9d..ad535b683170 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -55,9 +55,6 @@
55 55
56asmlinkage extern void ret_from_fork(void); 56asmlinkage extern void ret_from_fork(void);
57 57
58DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
59EXPORT_PER_CPU_SYMBOL(current_task);
60
61DEFINE_PER_CPU(unsigned long, old_rsp); 58DEFINE_PER_CPU(unsigned long, old_rsp);
62static DEFINE_PER_CPU(unsigned char, is_idle); 59static DEFINE_PER_CPU(unsigned char, is_idle);
63 60
@@ -386,9 +383,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
386 int cpu = smp_processor_id(); 383 int cpu = smp_processor_id();
387 struct tss_struct *tss = &per_cpu(init_tss, cpu); 384 struct tss_struct *tss = &per_cpu(init_tss, cpu);
388 unsigned fsindex, gsindex; 385 unsigned fsindex, gsindex;
386 bool preload_fpu;
387
388 /*
389 * If the task has used fpu the last 5 timeslices, just do a full
390 * restore of the math state immediately to avoid the trap; the
391 * chances of needing FPU soon are obviously high now
392 */
393 preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5;
389 394
390 /* we're going to use this soon, after a few expensive things */ 395 /* we're going to use this soon, after a few expensive things */
391 if (next_p->fpu_counter > 5) 396 if (preload_fpu)
392 prefetch(next->xstate); 397 prefetch(next->xstate);
393 398
394 /* 399 /*
@@ -419,6 +424,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
419 424
420 load_TLS(next, cpu); 425 load_TLS(next, cpu);
421 426
427 /* Must be after DS reload */
428 unlazy_fpu(prev_p);
429
430 /* Make sure cpu is ready for new context */
431 if (preload_fpu)
432 clts();
433
422 /* 434 /*
423 * Leave lazy mode, flushing any hypercalls made here. 435 * Leave lazy mode, flushing any hypercalls made here.
424 * This must be done before restoring TLS segments so 436 * This must be done before restoring TLS segments so
@@ -459,9 +471,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
459 wrmsrl(MSR_KERNEL_GS_BASE, next->gs); 471 wrmsrl(MSR_KERNEL_GS_BASE, next->gs);
460 prev->gsindex = gsindex; 472 prev->gsindex = gsindex;
461 473
462 /* Must be after DS reload */
463 unlazy_fpu(prev_p);
464
465 /* 474 /*
466 * Switch the PDA and FPU contexts. 475 * Switch the PDA and FPU contexts.
467 */ 476 */
@@ -480,15 +489,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
480 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) 489 task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV))
481 __switch_to_xtra(prev_p, next_p, tss); 490 __switch_to_xtra(prev_p, next_p, tss);
482 491
483 /* If the task has used fpu the last 5 timeslices, just do a full 492 /*
484 * restore of the math state immediately to avoid the trap; the 493 * Preload the FPU context, now that we've determined that the
485 * chances of needing FPU soon are obviously high now 494 * task is likely to be using it.
486 *
487 * tsk_used_math() checks prevent calling math_state_restore(),
488 * which can sleep in the case of !tsk_used_math()
489 */ 495 */
490 if (tsk_used_math(next_p) && next_p->fpu_counter > 5) 496 if (preload_fpu)
491 math_state_restore(); 497 __math_state_restore();
492 return prev_p; 498 return prev_p;
493} 499}
494 500
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index af71d06624bf..6c3b2c6fd772 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -508,7 +508,7 @@ static void __init quirk_amd_nb_node(struct pci_dev *dev)
508 508
509 pci_read_config_dword(nb_ht, 0x60, &val); 509 pci_read_config_dword(nb_ht, 0x60, &val);
510 set_dev_node(&dev->dev, val & 7); 510 set_dev_node(&dev->dev, val & 7);
511 pci_dev_put(dev); 511 pci_dev_put(nb_ht);
512} 512}
513 513
514DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, 514DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB,
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index a06e8d101844..27349f92a6d7 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -4,6 +4,7 @@
4#include <linux/pm.h> 4#include <linux/pm.h>
5#include <linux/efi.h> 5#include <linux/efi.h>
6#include <linux/dmi.h> 6#include <linux/dmi.h>
7#include <linux/tboot.h>
7#include <acpi/reboot.h> 8#include <acpi/reboot.h>
8#include <asm/io.h> 9#include <asm/io.h>
9#include <asm/apic.h> 10#include <asm/apic.h>
@@ -508,6 +509,8 @@ static void native_machine_emergency_restart(void)
508 if (reboot_emergency) 509 if (reboot_emergency)
509 emergency_vmx_disable_all(); 510 emergency_vmx_disable_all();
510 511
512 tboot_shutdown(TB_SHUTDOWN_REBOOT);
513
511 /* Tell the BIOS if we want cold or warm reboot */ 514 /* Tell the BIOS if we want cold or warm reboot */
512 *((unsigned short *)__va(0x472)) = reboot_mode; 515 *((unsigned short *)__va(0x472)) = reboot_mode;
513 516
@@ -634,6 +637,8 @@ static void native_machine_halt(void)
634 /* stop other cpus and apics */ 637 /* stop other cpus and apics */
635 machine_shutdown(); 638 machine_shutdown();
636 639
640 tboot_shutdown(TB_SHUTDOWN_HALT);
641
637 /* stop this cpu */ 642 /* stop this cpu */
638 stop_this_cpu(NULL); 643 stop_this_cpu(NULL);
639} 644}
@@ -645,6 +650,8 @@ static void native_machine_power_off(void)
645 machine_shutdown(); 650 machine_shutdown();
646 pm_power_off(); 651 pm_power_off();
647 } 652 }
653 /* a fallback in case there is no PM info available */
654 tboot_shutdown(TB_SHUTDOWN_HALT);
648} 655}
649 656
650struct machine_ops machine_ops = { 657struct machine_ops machine_ops = {
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 63f32d220ef2..19f15c4076fb 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -66,6 +66,7 @@
66 66
67#include <linux/percpu.h> 67#include <linux/percpu.h>
68#include <linux/crash_dump.h> 68#include <linux/crash_dump.h>
69#include <linux/tboot.h>
69 70
70#include <video/edid.h> 71#include <video/edid.h>
71 72
@@ -711,6 +712,21 @@ void __init setup_arch(char **cmdline_p)
711 printk(KERN_INFO "Command line: %s\n", boot_command_line); 712 printk(KERN_INFO "Command line: %s\n", boot_command_line);
712#endif 713#endif
713 714
715 strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
716 *cmdline_p = command_line;
717
718#ifdef CONFIG_X86_64
719 /*
720 * Must call this twice: Once just to detect whether hardware doesn't
721 * support NX (so that the early EHCI debug console setup can safely
722 * call set_fixmap(), and then again after parsing early parameters to
723 * honor the respective command line option.
724 */
725 check_efer();
726#endif
727
728 parse_early_param();
729
714 /* VMI may relocate the fixmap; do this before touching ioremap area */ 730 /* VMI may relocate the fixmap; do this before touching ioremap area */
715 vmi_init(); 731 vmi_init();
716 732
@@ -793,11 +809,6 @@ void __init setup_arch(char **cmdline_p)
793#endif 809#endif
794#endif 810#endif
795 811
796 strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE);
797 *cmdline_p = command_line;
798
799 parse_early_param();
800
801#ifdef CONFIG_X86_64 812#ifdef CONFIG_X86_64
802 check_efer(); 813 check_efer();
803#endif 814#endif
@@ -977,6 +988,8 @@ void __init setup_arch(char **cmdline_p)
977 paravirt_pagetable_setup_done(swapper_pg_dir); 988 paravirt_pagetable_setup_done(swapper_pg_dir);
978 paravirt_post_allocator_init(); 989 paravirt_post_allocator_init();
979 990
991 tboot_probe();
992
980#ifdef CONFIG_X86_64 993#ifdef CONFIG_X86_64
981 map_vsyscall(); 994 map_vsyscall();
982#endif 995#endif
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 07d81916f212..d559af913e1f 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -55,6 +55,7 @@ EXPORT_SYMBOL(__per_cpu_offset);
55#define PERCPU_FIRST_CHUNK_RESERVE 0 55#define PERCPU_FIRST_CHUNK_RESERVE 0
56#endif 56#endif
57 57
58#ifdef CONFIG_X86_32
58/** 59/**
59 * pcpu_need_numa - determine percpu allocation needs to consider NUMA 60 * pcpu_need_numa - determine percpu allocation needs to consider NUMA
60 * 61 *
@@ -83,6 +84,7 @@ static bool __init pcpu_need_numa(void)
83#endif 84#endif
84 return false; 85 return false;
85} 86}
87#endif
86 88
87/** 89/**
88 * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu 90 * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu
@@ -124,308 +126,35 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size,
124} 126}
125 127
126/* 128/*
127 * Large page remap allocator 129 * Helpers for first chunk memory allocation
128 *
129 * This allocator uses PMD page as unit. A PMD page is allocated for
130 * each cpu and each is remapped into vmalloc area using PMD mapping.
131 * As PMD page is quite large, only part of it is used for the first
132 * chunk. Unused part is returned to the bootmem allocator.
133 *
134 * So, the PMD pages are mapped twice - once to the physical mapping
135 * and to the vmalloc area for the first percpu chunk. The double
136 * mapping does add one more PMD TLB entry pressure but still is much
137 * better than only using 4k mappings while still being NUMA friendly.
138 */ 130 */
139#ifdef CONFIG_NEED_MULTIPLE_NODES 131static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
140struct pcpul_ent {
141 unsigned int cpu;
142 void *ptr;
143};
144
145static size_t pcpul_size;
146static struct pcpul_ent *pcpul_map;
147static struct vm_struct pcpul_vm;
148
149static struct page * __init pcpul_get_page(unsigned int cpu, int pageno)
150{ 132{
151 size_t off = (size_t)pageno << PAGE_SHIFT; 133 return pcpu_alloc_bootmem(cpu, size, align);
152
153 if (off >= pcpul_size)
154 return NULL;
155
156 return virt_to_page(pcpul_map[cpu].ptr + off);
157} 134}
158 135
159static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) 136static void __init pcpu_fc_free(void *ptr, size_t size)
160{ 137{
161 size_t map_size, dyn_size; 138 free_bootmem(__pa(ptr), size);
162 unsigned int cpu;
163 int i, j;
164 ssize_t ret;
165
166 if (!chosen) {
167 size_t vm_size = VMALLOC_END - VMALLOC_START;
168 size_t tot_size = nr_cpu_ids * PMD_SIZE;
169
170 /* on non-NUMA, embedding is better */
171 if (!pcpu_need_numa())
172 return -EINVAL;
173
174 /* don't consume more than 20% of vmalloc area */
175 if (tot_size > vm_size / 5) {
176 pr_info("PERCPU: too large chunk size %zuMB for "
177 "large page remap\n", tot_size >> 20);
178 return -EINVAL;
179 }
180 }
181
182 /* need PSE */
183 if (!cpu_has_pse) {
184 pr_warning("PERCPU: lpage allocator requires PSE\n");
185 return -EINVAL;
186 }
187
188 /*
189 * Currently supports only single page. Supporting multiple
190 * pages won't be too difficult if it ever becomes necessary.
191 */
192 pcpul_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE +
193 PERCPU_DYNAMIC_RESERVE);
194 if (pcpul_size > PMD_SIZE) {
195 pr_warning("PERCPU: static data is larger than large page, "
196 "can't use large page\n");
197 return -EINVAL;
198 }
199 dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE;
200
201 /* allocate pointer array and alloc large pages */
202 map_size = PFN_ALIGN(nr_cpu_ids * sizeof(pcpul_map[0]));
203 pcpul_map = alloc_bootmem(map_size);
204
205 for_each_possible_cpu(cpu) {
206 pcpul_map[cpu].cpu = cpu;
207 pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE,
208 PMD_SIZE);
209 if (!pcpul_map[cpu].ptr) {
210 pr_warning("PERCPU: failed to allocate large page "
211 "for cpu%u\n", cpu);
212 goto enomem;
213 }
214
215 /*
216 * Only use pcpul_size bytes and give back the rest.
217 *
218 * Ingo: The 2MB up-rounding bootmem is needed to make
219 * sure the partial 2MB page is still fully RAM - it's
220 * not well-specified to have a PAT-incompatible area
221 * (unmapped RAM, device memory, etc.) in that hole.
222 */
223 free_bootmem(__pa(pcpul_map[cpu].ptr + pcpul_size),
224 PMD_SIZE - pcpul_size);
225
226 memcpy(pcpul_map[cpu].ptr, __per_cpu_load, static_size);
227 }
228
229 /* allocate address and map */
230 pcpul_vm.flags = VM_ALLOC;
231 pcpul_vm.size = nr_cpu_ids * PMD_SIZE;
232 vm_area_register_early(&pcpul_vm, PMD_SIZE);
233
234 for_each_possible_cpu(cpu) {
235 pmd_t *pmd, pmd_v;
236
237 pmd = populate_extra_pmd((unsigned long)pcpul_vm.addr +
238 cpu * PMD_SIZE);
239 pmd_v = pfn_pmd(page_to_pfn(virt_to_page(pcpul_map[cpu].ptr)),
240 PAGE_KERNEL_LARGE);
241 set_pmd(pmd, pmd_v);
242 }
243
244 /* we're ready, commit */
245 pr_info("PERCPU: Remapped at %p with large pages, static data "
246 "%zu bytes\n", pcpul_vm.addr, static_size);
247
248 ret = pcpu_setup_first_chunk(pcpul_get_page, static_size,
249 PERCPU_FIRST_CHUNK_RESERVE, dyn_size,
250 PMD_SIZE, pcpul_vm.addr, NULL);
251
252 /* sort pcpul_map array for pcpu_lpage_remapped() */
253 for (i = 0; i < nr_cpu_ids - 1; i++)
254 for (j = i + 1; j < nr_cpu_ids; j++)
255 if (pcpul_map[i].ptr > pcpul_map[j].ptr) {
256 struct pcpul_ent tmp = pcpul_map[i];
257 pcpul_map[i] = pcpul_map[j];
258 pcpul_map[j] = tmp;
259 }
260
261 return ret;
262
263enomem:
264 for_each_possible_cpu(cpu)
265 if (pcpul_map[cpu].ptr)
266 free_bootmem(__pa(pcpul_map[cpu].ptr), pcpul_size);
267 free_bootmem(__pa(pcpul_map), map_size);
268 return -ENOMEM;
269} 139}
270 140
271/** 141static int __init pcpu_cpu_distance(unsigned int from, unsigned int to)
272 * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area
273 * @kaddr: the kernel address in question
274 *
275 * Determine whether @kaddr falls in the pcpul recycled area. This is
276 * used by pageattr to detect VM aliases and break up the pcpu PMD
277 * mapping such that the same physical page is not mapped under
278 * different attributes.
279 *
280 * The recycled area is always at the tail of a partially used PMD
281 * page.
282 *
283 * RETURNS:
284 * Address of corresponding remapped pcpu address if match is found;
285 * otherwise, NULL.
286 */
287void *pcpu_lpage_remapped(void *kaddr)
288{ 142{
289 void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK); 143#ifdef CONFIG_NEED_MULTIPLE_NODES
290 unsigned long offset = (unsigned long)kaddr & ~PMD_MASK; 144 if (early_cpu_to_node(from) == early_cpu_to_node(to))
291 int left = 0, right = nr_cpu_ids - 1; 145 return LOCAL_DISTANCE;
292 int pos; 146 else
293 147 return REMOTE_DISTANCE;
294 /* pcpul in use at all? */
295 if (!pcpul_map)
296 return NULL;
297
298 /* okay, perform binary search */
299 while (left <= right) {
300 pos = (left + right) / 2;
301
302 if (pcpul_map[pos].ptr < pmd_addr)
303 left = pos + 1;
304 else if (pcpul_map[pos].ptr > pmd_addr)
305 right = pos - 1;
306 else {
307 /* it shouldn't be in the area for the first chunk */
308 WARN_ON(offset < pcpul_size);
309
310 return pcpul_vm.addr +
311 pcpul_map[pos].cpu * PMD_SIZE + offset;
312 }
313 }
314
315 return NULL;
316}
317#else 148#else
318static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) 149 return LOCAL_DISTANCE;
319{
320 return -EINVAL;
321}
322#endif 150#endif
323
324/*
325 * Embedding allocator
326 *
327 * The first chunk is sized to just contain the static area plus
328 * module and dynamic reserves and embedded into linear physical
329 * mapping so that it can use PMD mapping without additional TLB
330 * pressure.
331 */
332static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen)
333{
334 size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE;
335
336 /*
337 * If large page isn't supported, there's no benefit in doing
338 * this. Also, embedding allocation doesn't play well with
339 * NUMA.
340 */
341 if (!chosen && (!cpu_has_pse || pcpu_need_numa()))
342 return -EINVAL;
343
344 return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE,
345 reserve - PERCPU_FIRST_CHUNK_RESERVE, -1);
346} 151}
347 152
348/* 153static void __init pcpup_populate_pte(unsigned long addr)
349 * 4k page allocator
350 *
351 * This is the basic allocator. Static percpu area is allocated
352 * page-by-page and most of initialization is done by the generic
353 * setup function.
354 */
355static struct page **pcpu4k_pages __initdata;
356static int pcpu4k_nr_static_pages __initdata;
357
358static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno)
359{
360 if (pageno < pcpu4k_nr_static_pages)
361 return pcpu4k_pages[cpu * pcpu4k_nr_static_pages + pageno];
362 return NULL;
363}
364
365static void __init pcpu4k_populate_pte(unsigned long addr)
366{ 154{
367 populate_extra_pte(addr); 155 populate_extra_pte(addr);
368} 156}
369 157
370static ssize_t __init setup_pcpu_4k(size_t static_size)
371{
372 size_t pages_size;
373 unsigned int cpu;
374 int i, j;
375 ssize_t ret;
376
377 pcpu4k_nr_static_pages = PFN_UP(static_size);
378
379 /* unaligned allocations can't be freed, round up to page size */
380 pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * nr_cpu_ids
381 * sizeof(pcpu4k_pages[0]));
382 pcpu4k_pages = alloc_bootmem(pages_size);
383
384 /* allocate and copy */
385 j = 0;
386 for_each_possible_cpu(cpu)
387 for (i = 0; i < pcpu4k_nr_static_pages; i++) {
388 void *ptr;
389
390 ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE);
391 if (!ptr) {
392 pr_warning("PERCPU: failed to allocate "
393 "4k page for cpu%u\n", cpu);
394 goto enomem;
395 }
396
397 memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE);
398 pcpu4k_pages[j++] = virt_to_page(ptr);
399 }
400
401 /* we're ready, commit */
402 pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n",
403 pcpu4k_nr_static_pages, static_size);
404
405 ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size,
406 PERCPU_FIRST_CHUNK_RESERVE, -1,
407 -1, NULL, pcpu4k_populate_pte);
408 goto out_free_ar;
409
410enomem:
411 while (--j >= 0)
412 free_bootmem(__pa(page_address(pcpu4k_pages[j])), PAGE_SIZE);
413 ret = -ENOMEM;
414out_free_ar:
415 free_bootmem(__pa(pcpu4k_pages), pages_size);
416 return ret;
417}
418
419/* for explicit first chunk allocator selection */
420static char pcpu_chosen_alloc[16] __initdata;
421
422static int __init percpu_alloc_setup(char *str)
423{
424 strncpy(pcpu_chosen_alloc, str, sizeof(pcpu_chosen_alloc) - 1);
425 return 0;
426}
427early_param("percpu_alloc", percpu_alloc_setup);
428
429static inline void setup_percpu_segment(int cpu) 158static inline void setup_percpu_segment(int cpu)
430{ 159{
431#ifdef CONFIG_X86_32 160#ifdef CONFIG_X86_32
@@ -441,52 +170,49 @@ static inline void setup_percpu_segment(int cpu)
441 170
442void __init setup_per_cpu_areas(void) 171void __init setup_per_cpu_areas(void)
443{ 172{
444 size_t static_size = __per_cpu_end - __per_cpu_start;
445 unsigned int cpu; 173 unsigned int cpu;
446 unsigned long delta; 174 unsigned long delta;
447 size_t pcpu_unit_size; 175 int rc;
448 ssize_t ret;
449 176
450 pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", 177 pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
451 NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); 178 NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
452 179
453 /* 180 /*
454 * Allocate percpu area. If PSE is supported, try to make use 181 * Allocate percpu area. Embedding allocator is our favorite;
455 * of large page mappings. Please read comments on top of 182 * however, on NUMA configurations, it can result in very
456 * each allocator for details. 183 * sparse unit mapping and vmalloc area isn't spacious enough
184 * on 32bit. Use page in that case.
457 */ 185 */
458 ret = -EINVAL; 186#ifdef CONFIG_X86_32
459 if (strlen(pcpu_chosen_alloc)) { 187 if (pcpu_chosen_fc == PCPU_FC_AUTO && pcpu_need_numa())
460 if (strcmp(pcpu_chosen_alloc, "4k")) { 188 pcpu_chosen_fc = PCPU_FC_PAGE;
461 if (!strcmp(pcpu_chosen_alloc, "lpage")) 189#endif
462 ret = setup_pcpu_lpage(static_size, true); 190 rc = -EINVAL;
463 else if (!strcmp(pcpu_chosen_alloc, "embed")) 191 if (pcpu_chosen_fc != PCPU_FC_PAGE) {
464 ret = setup_pcpu_embed(static_size, true); 192 const size_t atom_size = cpu_has_pse ? PMD_SIZE : PAGE_SIZE;
465 else 193 const size_t dyn_size = PERCPU_MODULE_RESERVE +
466 pr_warning("PERCPU: unknown allocator %s " 194 PERCPU_DYNAMIC_RESERVE - PERCPU_FIRST_CHUNK_RESERVE;
467 "specified\n", pcpu_chosen_alloc); 195
468 if (ret < 0) 196 rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
469 pr_warning("PERCPU: %s allocator failed (%zd), " 197 dyn_size, atom_size,
470 "falling back to 4k\n", 198 pcpu_cpu_distance,
471 pcpu_chosen_alloc, ret); 199 pcpu_fc_alloc, pcpu_fc_free);
472 } 200 if (rc < 0)
473 } else { 201 pr_warning("PERCPU: %s allocator failed (%d), "
474 ret = setup_pcpu_lpage(static_size, false); 202 "falling back to page size\n",
475 if (ret < 0) 203 pcpu_fc_names[pcpu_chosen_fc], rc);
476 ret = setup_pcpu_embed(static_size, false);
477 } 204 }
478 if (ret < 0) 205 if (rc < 0)
479 ret = setup_pcpu_4k(static_size); 206 rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE,
480 if (ret < 0) 207 pcpu_fc_alloc, pcpu_fc_free,
481 panic("cannot allocate static percpu area (%zu bytes, err=%zd)", 208 pcpup_populate_pte);
482 static_size, ret); 209 if (rc < 0)
483 210 panic("cannot initialize percpu area (err=%d)", rc);
484 pcpu_unit_size = ret;
485 211
486 /* alrighty, percpu areas up and running */ 212 /* alrighty, percpu areas up and running */
487 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; 213 delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start;
488 for_each_possible_cpu(cpu) { 214 for_each_possible_cpu(cpu) {
489 per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size; 215 per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu];
490 per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); 216 per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
491 per_cpu(cpu_number, cpu) = cpu; 217 per_cpu(cpu_number, cpu) = cpu;
492 setup_percpu_segment(cpu); 218 setup_percpu_segment(cpu);
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 81e58238c4ce..6a44a76055ad 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -856,7 +856,7 @@ static void do_signal(struct pt_regs *regs)
856void 856void
857do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) 857do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags)
858{ 858{
859#ifdef CONFIG_X86_NEW_MCE 859#ifdef CONFIG_X86_MCE
860 /* notify userspace of pending MCEs */ 860 /* notify userspace of pending MCEs */
861 if (thread_info_flags & _TIF_MCE_NOTIFY) 861 if (thread_info_flags & _TIF_MCE_NOTIFY)
862 mce_notify_process(); 862 mce_notify_process();
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 2fecda69ee64..a25eeec00080 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -47,6 +47,7 @@
47#include <linux/bootmem.h> 47#include <linux/bootmem.h>
48#include <linux/err.h> 48#include <linux/err.h>
49#include <linux/nmi.h> 49#include <linux/nmi.h>
50#include <linux/tboot.h>
50 51
51#include <asm/acpi.h> 52#include <asm/acpi.h>
52#include <asm/desc.h> 53#include <asm/desc.h>
@@ -434,7 +435,8 @@ const struct cpumask *cpu_coregroup_mask(int cpu)
434 * For perf, we return last level cache shared map. 435 * For perf, we return last level cache shared map.
435 * And for power savings, we return cpu_core_map 436 * And for power savings, we return cpu_core_map
436 */ 437 */
437 if (sched_mc_power_savings || sched_smt_power_savings) 438 if ((sched_mc_power_savings || sched_smt_power_savings) &&
439 !(cpu_has(c, X86_FEATURE_AMD_DCM)))
438 return cpu_core_mask(cpu); 440 return cpu_core_mask(cpu);
439 else 441 else
440 return c->llc_shared_map; 442 return c->llc_shared_map;
@@ -1116,9 +1118,22 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1116 1118
1117 if (is_uv_system()) 1119 if (is_uv_system())
1118 uv_system_init(); 1120 uv_system_init();
1121
1122 set_mtrr_aps_delayed_init();
1119out: 1123out:
1120 preempt_enable(); 1124 preempt_enable();
1121} 1125}
1126
1127void arch_enable_nonboot_cpus_begin(void)
1128{
1129 set_mtrr_aps_delayed_init();
1130}
1131
1132void arch_enable_nonboot_cpus_end(void)
1133{
1134 mtrr_aps_init();
1135}
1136
1122/* 1137/*
1123 * Early setup to make printk work. 1138 * Early setup to make printk work.
1124 */ 1139 */
@@ -1140,6 +1155,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus)
1140 setup_ioapic_dest(); 1155 setup_ioapic_dest();
1141#endif 1156#endif
1142 check_nmi_watchdog(); 1157 check_nmi_watchdog();
1158 mtrr_aps_init();
1143} 1159}
1144 1160
1145static int __initdata setup_possible_cpus = -1; 1161static int __initdata setup_possible_cpus = -1;
@@ -1317,6 +1333,7 @@ void play_dead_common(void)
1317void native_play_dead(void) 1333void native_play_dead(void)
1318{ 1334{
1319 play_dead_common(); 1335 play_dead_common();
1336 tboot_shutdown(TB_SHUTDOWN_WFS);
1320 wbinvd_halt(); 1337 wbinvd_halt();
1321} 1338}
1322 1339
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c
index e8b9863ef8c4..3149032ff107 100644
--- a/arch/x86/kernel/step.c
+++ b/arch/x86/kernel/step.c
@@ -4,6 +4,7 @@
4#include <linux/sched.h> 4#include <linux/sched.h>
5#include <linux/mm.h> 5#include <linux/mm.h>
6#include <linux/ptrace.h> 6#include <linux/ptrace.h>
7#include <asm/desc.h>
7 8
8unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs) 9unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs)
9{ 10{
@@ -23,7 +24,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
23 * and APM bios ones we just ignore here. 24 * and APM bios ones we just ignore here.
24 */ 25 */
25 if ((seg & SEGMENT_TI_MASK) == SEGMENT_LDT) { 26 if ((seg & SEGMENT_TI_MASK) == SEGMENT_LDT) {
26 u32 *desc; 27 struct desc_struct *desc;
27 unsigned long base; 28 unsigned long base;
28 29
29 seg &= ~7UL; 30 seg &= ~7UL;
@@ -33,12 +34,10 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re
33 addr = -1L; /* bogus selector, access would fault */ 34 addr = -1L; /* bogus selector, access would fault */
34 else { 35 else {
35 desc = child->mm->context.ldt + seg; 36 desc = child->mm->context.ldt + seg;
36 base = ((desc[0] >> 16) | 37 base = get_desc_base(desc);
37 ((desc[1] & 0xff) << 16) |
38 (desc[1] & 0xff000000));
39 38
40 /* 16-bit code segment? */ 39 /* 16-bit code segment? */
41 if (!((desc[1] >> 22) & 1)) 40 if (!desc->d)
42 addr &= 0xffff; 41 addr &= 0xffff;
43 addr += base; 42 addr += base;
44 } 43 }
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c
new file mode 100644
index 000000000000..86c9f91b48ae
--- /dev/null
+++ b/arch/x86/kernel/tboot.c
@@ -0,0 +1,447 @@
1/*
2 * tboot.c: main implementation of helper functions used by kernel for
3 * runtime support of Intel(R) Trusted Execution Technology
4 *
5 * Copyright (c) 2006-2009, Intel Corporation
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms and conditions of the GNU General Public License,
9 * version 2, as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
14 * more details.
15 *
16 * You should have received a copy of the GNU General Public License along with
17 * this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
19 *
20 */
21
22#include <linux/dma_remapping.h>
23#include <linux/init_task.h>
24#include <linux/spinlock.h>
25#include <linux/delay.h>
26#include <linux/sched.h>
27#include <linux/init.h>
28#include <linux/dmar.h>
29#include <linux/cpu.h>
30#include <linux/pfn.h>
31#include <linux/mm.h>
32#include <linux/tboot.h>
33
34#include <asm/trampoline.h>
35#include <asm/processor.h>
36#include <asm/bootparam.h>
37#include <asm/pgtable.h>
38#include <asm/pgalloc.h>
39#include <asm/fixmap.h>
40#include <asm/proto.h>
41#include <asm/setup.h>
42#include <asm/e820.h>
43#include <asm/io.h>
44
45#include "acpi/realmode/wakeup.h"
46
47/* Global pointer to shared data; NULL means no measured launch. */
48struct tboot *tboot __read_mostly;
49
50/* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */
51#define AP_WAIT_TIMEOUT 1
52
53#undef pr_fmt
54#define pr_fmt(fmt) "tboot: " fmt
55
56static u8 tboot_uuid[16] __initdata = TBOOT_UUID;
57
58void __init tboot_probe(void)
59{
60 /* Look for valid page-aligned address for shared page. */
61 if (!boot_params.tboot_addr)
62 return;
63 /*
64 * also verify that it is mapped as we expect it before calling
65 * set_fixmap(), to reduce chance of garbage value causing crash
66 */
67 if (!e820_any_mapped(boot_params.tboot_addr,
68 boot_params.tboot_addr, E820_RESERVED)) {
69 pr_warning("non-0 tboot_addr but it is not of type E820_RESERVED\n");
70 return;
71 }
72
73 /* only a natively booted kernel should be using TXT */
74 if (paravirt_enabled()) {
75 pr_warning("non-0 tboot_addr but pv_ops is enabled\n");
76 return;
77 }
78
79 /* Map and check for tboot UUID. */
80 set_fixmap(FIX_TBOOT_BASE, boot_params.tboot_addr);
81 tboot = (struct tboot *)fix_to_virt(FIX_TBOOT_BASE);
82 if (memcmp(&tboot_uuid, &tboot->uuid, sizeof(tboot->uuid))) {
83 pr_warning("tboot at 0x%llx is invalid\n",
84 boot_params.tboot_addr);
85 tboot = NULL;
86 return;
87 }
88 if (tboot->version < 5) {
89 pr_warning("tboot version is invalid: %u\n", tboot->version);
90 tboot = NULL;
91 return;
92 }
93
94 pr_info("found shared page at phys addr 0x%llx:\n",
95 boot_params.tboot_addr);
96 pr_debug("version: %d\n", tboot->version);
97 pr_debug("log_addr: 0x%08x\n", tboot->log_addr);
98 pr_debug("shutdown_entry: 0x%x\n", tboot->shutdown_entry);
99 pr_debug("tboot_base: 0x%08x\n", tboot->tboot_base);
100 pr_debug("tboot_size: 0x%x\n", tboot->tboot_size);
101}
102
103static pgd_t *tboot_pg_dir;
104static struct mm_struct tboot_mm = {
105 .mm_rb = RB_ROOT,
106 .pgd = swapper_pg_dir,
107 .mm_users = ATOMIC_INIT(2),
108 .mm_count = ATOMIC_INIT(1),
109 .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem),
110 .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock),
111 .mmlist = LIST_HEAD_INIT(init_mm.mmlist),
112 .cpu_vm_mask = CPU_MASK_ALL,
113};
114
115static inline void switch_to_tboot_pt(void)
116{
117 write_cr3(virt_to_phys(tboot_pg_dir));
118}
119
120static int map_tboot_page(unsigned long vaddr, unsigned long pfn,
121 pgprot_t prot)
122{
123 pgd_t *pgd;
124 pud_t *pud;
125 pmd_t *pmd;
126 pte_t *pte;
127
128 pgd = pgd_offset(&tboot_mm, vaddr);
129 pud = pud_alloc(&tboot_mm, pgd, vaddr);
130 if (!pud)
131 return -1;
132 pmd = pmd_alloc(&tboot_mm, pud, vaddr);
133 if (!pmd)
134 return -1;
135 pte = pte_alloc_map(&tboot_mm, pmd, vaddr);
136 if (!pte)
137 return -1;
138 set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot));
139 pte_unmap(pte);
140 return 0;
141}
142
143static int map_tboot_pages(unsigned long vaddr, unsigned long start_pfn,
144 unsigned long nr)
145{
146 /* Reuse the original kernel mapping */
147 tboot_pg_dir = pgd_alloc(&tboot_mm);
148 if (!tboot_pg_dir)
149 return -1;
150
151 for (; nr > 0; nr--, vaddr += PAGE_SIZE, start_pfn++) {
152 if (map_tboot_page(vaddr, start_pfn, PAGE_KERNEL_EXEC))
153 return -1;
154 }
155
156 return 0;
157}
158
159static void tboot_create_trampoline(void)
160{
161 u32 map_base, map_size;
162
163 /* Create identity map for tboot shutdown code. */
164 map_base = PFN_DOWN(tboot->tboot_base);
165 map_size = PFN_UP(tboot->tboot_size);
166 if (map_tboot_pages(map_base << PAGE_SHIFT, map_base, map_size))
167 panic("tboot: Error mapping tboot pages (mfns) @ 0x%x, 0x%x\n",
168 map_base, map_size);
169}
170
171#ifdef CONFIG_ACPI_SLEEP
172
173static void add_mac_region(phys_addr_t start, unsigned long size)
174{
175 struct tboot_mac_region *mr;
176 phys_addr_t end = start + size;
177
178 if (start && size) {
179 mr = &tboot->mac_regions[tboot->num_mac_regions++];
180 mr->start = round_down(start, PAGE_SIZE);
181 mr->size = round_up(end, PAGE_SIZE) - mr->start;
182 }
183}
184
185static int tboot_setup_sleep(void)
186{
187 tboot->num_mac_regions = 0;
188
189 /* S3 resume code */
190 add_mac_region(acpi_wakeup_address, WAKEUP_SIZE);
191
192#ifdef CONFIG_X86_TRAMPOLINE
193 /* AP trampoline code */
194 add_mac_region(virt_to_phys(trampoline_base), TRAMPOLINE_SIZE);
195#endif
196
197 /* kernel code + data + bss */
198 add_mac_region(virt_to_phys(_text), _end - _text);
199
200 tboot->acpi_sinfo.kernel_s3_resume_vector = acpi_wakeup_address;
201
202 return 0;
203}
204
205#else /* no CONFIG_ACPI_SLEEP */
206
207static int tboot_setup_sleep(void)
208{
209 /* S3 shutdown requested, but S3 not supported by the kernel... */
210 BUG();
211 return -1;
212}
213
214#endif
215
216void tboot_shutdown(u32 shutdown_type)
217{
218 void (*shutdown)(void);
219
220 if (!tboot_enabled())
221 return;
222
223 /*
224 * if we're being called before the 1:1 mapping is set up then just
225 * return and let the normal shutdown happen; this should only be
226 * due to very early panic()
227 */
228 if (!tboot_pg_dir)
229 return;
230
231 /* if this is S3 then set regions to MAC */
232 if (shutdown_type == TB_SHUTDOWN_S3)
233 if (tboot_setup_sleep())
234 return;
235
236 tboot->shutdown_type = shutdown_type;
237
238 switch_to_tboot_pt();
239
240 shutdown = (void(*)(void))(unsigned long)tboot->shutdown_entry;
241 shutdown();
242
243 /* should not reach here */
244 while (1)
245 halt();
246}
247
248static void tboot_copy_fadt(const struct acpi_table_fadt *fadt)
249{
250#define TB_COPY_GAS(tbg, g) \
251 tbg.space_id = g.space_id; \
252 tbg.bit_width = g.bit_width; \
253 tbg.bit_offset = g.bit_offset; \
254 tbg.access_width = g.access_width; \
255 tbg.address = g.address;
256
257 TB_COPY_GAS(tboot->acpi_sinfo.pm1a_cnt_blk, fadt->xpm1a_control_block);
258 TB_COPY_GAS(tboot->acpi_sinfo.pm1b_cnt_blk, fadt->xpm1b_control_block);
259 TB_COPY_GAS(tboot->acpi_sinfo.pm1a_evt_blk, fadt->xpm1a_event_block);
260 TB_COPY_GAS(tboot->acpi_sinfo.pm1b_evt_blk, fadt->xpm1b_event_block);
261
262 /*
263 * We need phys addr of waking vector, but can't use virt_to_phys() on
264 * &acpi_gbl_FACS because it is ioremap'ed, so calc from FACS phys
265 * addr.
266 */
267 tboot->acpi_sinfo.wakeup_vector = fadt->facs +
268 offsetof(struct acpi_table_facs, firmware_waking_vector);
269}
270
271void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control)
272{
273 static u32 acpi_shutdown_map[ACPI_S_STATE_COUNT] = {
274 /* S0,1,2: */ -1, -1, -1,
275 /* S3: */ TB_SHUTDOWN_S3,
276 /* S4: */ TB_SHUTDOWN_S4,
277 /* S5: */ TB_SHUTDOWN_S5 };
278
279 if (!tboot_enabled())
280 return;
281
282 tboot_copy_fadt(&acpi_gbl_FADT);
283 tboot->acpi_sinfo.pm1a_cnt_val = pm1a_control;
284 tboot->acpi_sinfo.pm1b_cnt_val = pm1b_control;
285 /* we always use the 32b wakeup vector */
286 tboot->acpi_sinfo.vector_width = 32;
287
288 if (sleep_state >= ACPI_S_STATE_COUNT ||
289 acpi_shutdown_map[sleep_state] == -1) {
290 pr_warning("unsupported sleep state 0x%x\n", sleep_state);
291 return;
292 }
293
294 tboot_shutdown(acpi_shutdown_map[sleep_state]);
295}
296
297static atomic_t ap_wfs_count;
298
299static int tboot_wait_for_aps(int num_aps)
300{
301 unsigned long timeout;
302
303 timeout = AP_WAIT_TIMEOUT*HZ;
304 while (atomic_read((atomic_t *)&tboot->num_in_wfs) != num_aps &&
305 timeout) {
306 mdelay(1);
307 timeout--;
308 }
309
310 if (timeout)
311 pr_warning("tboot wait for APs timeout\n");
312
313 return !(atomic_read((atomic_t *)&tboot->num_in_wfs) == num_aps);
314}
315
316static int __cpuinit tboot_cpu_callback(struct notifier_block *nfb,
317 unsigned long action, void *hcpu)
318{
319 switch (action) {
320 case CPU_DYING:
321 atomic_inc(&ap_wfs_count);
322 if (num_online_cpus() == 1)
323 if (tboot_wait_for_aps(atomic_read(&ap_wfs_count)))
324 return NOTIFY_BAD;
325 break;
326 }
327 return NOTIFY_OK;
328}
329
330static struct notifier_block tboot_cpu_notifier __cpuinitdata =
331{
332 .notifier_call = tboot_cpu_callback,
333};
334
335static __init int tboot_late_init(void)
336{
337 if (!tboot_enabled())
338 return 0;
339
340 tboot_create_trampoline();
341
342 atomic_set(&ap_wfs_count, 0);
343 register_hotcpu_notifier(&tboot_cpu_notifier);
344 return 0;
345}
346
347late_initcall(tboot_late_init);
348
349/*
350 * TXT configuration registers (offsets from TXT_{PUB, PRIV}_CONFIG_REGS_BASE)
351 */
352
353#define TXT_PUB_CONFIG_REGS_BASE 0xfed30000
354#define TXT_PRIV_CONFIG_REGS_BASE 0xfed20000
355
356/* # pages for each config regs space - used by fixmap */
357#define NR_TXT_CONFIG_PAGES ((TXT_PUB_CONFIG_REGS_BASE - \
358 TXT_PRIV_CONFIG_REGS_BASE) >> PAGE_SHIFT)
359
360/* offsets from pub/priv config space */
361#define TXTCR_HEAP_BASE 0x0300
362#define TXTCR_HEAP_SIZE 0x0308
363
364#define SHA1_SIZE 20
365
366struct sha1_hash {
367 u8 hash[SHA1_SIZE];
368};
369
370struct sinit_mle_data {
371 u32 version; /* currently 6 */
372 struct sha1_hash bios_acm_id;
373 u32 edx_senter_flags;
374 u64 mseg_valid;
375 struct sha1_hash sinit_hash;
376 struct sha1_hash mle_hash;
377 struct sha1_hash stm_hash;
378 struct sha1_hash lcp_policy_hash;
379 u32 lcp_policy_control;
380 u32 rlp_wakeup_addr;
381 u32 reserved;
382 u32 num_mdrs;
383 u32 mdrs_off;
384 u32 num_vtd_dmars;
385 u32 vtd_dmars_off;
386} __packed;
387
388struct acpi_table_header *tboot_get_dmar_table(struct acpi_table_header *dmar_tbl)
389{
390 void *heap_base, *heap_ptr, *config;
391
392 if (!tboot_enabled())
393 return dmar_tbl;
394
395 /*
396 * ACPI tables may not be DMA protected by tboot, so use DMAR copy
397 * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
398 */
399
400 /* map config space in order to get heap addr */
401 config = ioremap(TXT_PUB_CONFIG_REGS_BASE, NR_TXT_CONFIG_PAGES *
402 PAGE_SIZE);
403 if (!config)
404 return NULL;
405
406 /* now map TXT heap */
407 heap_base = ioremap(*(u64 *)(config + TXTCR_HEAP_BASE),
408 *(u64 *)(config + TXTCR_HEAP_SIZE));
409 iounmap(config);
410 if (!heap_base)
411 return NULL;
412
413 /* walk heap to SinitMleData */
414 /* skip BiosData */
415 heap_ptr = heap_base + *(u64 *)heap_base;
416 /* skip OsMleData */
417 heap_ptr += *(u64 *)heap_ptr;
418 /* skip OsSinitData */
419 heap_ptr += *(u64 *)heap_ptr;
420 /* now points to SinitMleDataSize; set to SinitMleData */
421 heap_ptr += sizeof(u64);
422 /* get addr of DMAR table */
423 dmar_tbl = (struct acpi_table_header *)(heap_ptr +
424 ((struct sinit_mle_data *)heap_ptr)->vtd_dmars_off -
425 sizeof(u64));
426
427 /* don't unmap heap because dmar.c needs access to this */
428
429 return dmar_tbl;
430}
431
432int tboot_force_iommu(void)
433{
434 if (!tboot_enabled())
435 return 0;
436
437 if (no_iommu || swiotlb || dmar_disabled)
438 pr_warning("Forcing Intel-IOMMU to enabled\n");
439
440 dmar_disabled = 0;
441#ifdef CONFIG_SWIOTLB
442 swiotlb = 0;
443#endif
444 no_iommu = 0;
445
446 return 1;
447}
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 77b9689f8edb..503c1f2e8835 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -640,13 +640,13 @@ static int __init uv_ptc_init(void)
640 if (!is_uv_system()) 640 if (!is_uv_system())
641 return 0; 641 return 0;
642 642
643 proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL); 643 proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL,
644 &proc_uv_ptc_operations);
644 if (!proc_uv_ptc) { 645 if (!proc_uv_ptc) {
645 printk(KERN_ERR "unable to create %s proc entry\n", 646 printk(KERN_ERR "unable to create %s proc entry\n",
646 UV_PTC_BASENAME); 647 UV_PTC_BASENAME);
647 return -EINVAL; 648 return -EINVAL;
648 } 649 }
649 proc_uv_ptc->proc_fops = &proc_uv_ptc_operations;
650 return 0; 650 return 0;
651} 651}
652 652
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 5204332f475d..83264922a878 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -76,7 +76,7 @@ char ignore_fpu_irq;
76 * F0 0F bug workaround.. We have a special link segment 76 * F0 0F bug workaround.. We have a special link segment
77 * for this. 77 * for this.
78 */ 78 */
79gate_desc idt_table[256] 79gate_desc idt_table[NR_VECTORS]
80 __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; 80 __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, };
81#endif 81#endif
82 82
@@ -786,33 +786,34 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code)
786#endif 786#endif
787} 787}
788 788
789#ifdef CONFIG_X86_32 789asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void)
790unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp)
791{ 790{
792 struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id());
793 unsigned long base = (kesp - uesp) & -THREAD_SIZE;
794 unsigned long new_kesp = kesp - base;
795 unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT;
796 __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS];
797
798 /* Set up base for espfix segment */
799 desc &= 0x00f0ff0000000000ULL;
800 desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) |
801 ((((__u64)base) << 32) & 0xff00000000000000ULL) |
802 ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) |
803 (lim_pages & 0xffff);
804 *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc;
805
806 return new_kesp;
807} 791}
808#endif
809 792
810asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) 793asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void)
811{ 794{
812} 795}
813 796
814asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void) 797/*
798 * __math_state_restore assumes that cr0.TS is already clear and the
799 * fpu state is all ready for use. Used during context switch.
800 */
801void __math_state_restore(void)
815{ 802{
803 struct thread_info *thread = current_thread_info();
804 struct task_struct *tsk = thread->task;
805
806 /*
807 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
808 */
809 if (unlikely(restore_fpu_checking(tsk))) {
810 stts();
811 force_sig(SIGSEGV, tsk);
812 return;
813 }
814
815 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */
816 tsk->fpu_counter++;
816} 817}
817 818
818/* 819/*
@@ -846,17 +847,8 @@ asmlinkage void math_state_restore(void)
846 } 847 }
847 848
848 clts(); /* Allow maths ops (or we recurse) */ 849 clts(); /* Allow maths ops (or we recurse) */
849 /*
850 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
851 */
852 if (unlikely(restore_fpu_checking(tsk))) {
853 stts();
854 force_sig(SIGSEGV, tsk);
855 return;
856 }
857 850
858 thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ 851 __math_state_restore();
859 tsk->fpu_counter++;
860} 852}
861EXPORT_SYMBOL_GPL(math_state_restore); 853EXPORT_SYMBOL_GPL(math_state_restore);
862 854
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S
index 9fc178255c04..0ccb57d5ee35 100644
--- a/arch/x86/kernel/vmlinux.lds.S
+++ b/arch/x86/kernel/vmlinux.lds.S
@@ -348,15 +348,12 @@ SECTIONS
348 _end = .; 348 _end = .;
349 } 349 }
350 350
351 /* Sections to be discarded */
352 /DISCARD/ : {
353 *(.exitcall.exit)
354 *(.eh_frame)
355 *(.discard)
356 }
357
358 STABS_DEBUG 351 STABS_DEBUG
359 DWARF_DEBUG 352 DWARF_DEBUG
353
354 /* Sections to be discarded */
355 DISCARDS
356 /DISCARD/ : { *(.eh_frame) }
360} 357}
361 358
362 359