diff options
author | Len Brown <len.brown@intel.com> | 2009-09-19 00:11:26 -0400 |
---|---|---|
committer | Len Brown <len.brown@intel.com> | 2009-09-19 00:11:26 -0400 |
commit | c602c65b2f81d14456771d1e3f15d1381f4b7efa (patch) | |
tree | f1f833c8dd6c1519eeb101be32f7fe54a9605af5 /arch/x86/kernel | |
parent | 3834f47291df475be3f0f0fb7ccaa098967cc054 (diff) | |
parent | 78f28b7c555359c67c2a0d23f7436e915329421e (diff) |
Merge branch 'linus' into sfi-release
Conflicts:
arch/x86/kernel/setup.c
drivers/acpi/power.c
init/main.c
Signed-off-by: Len Brown <len.brown@intel.com>
Diffstat (limited to 'arch/x86/kernel')
104 files changed, 3857 insertions, 3433 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 6321afaafb26..d8e5d0cdd678 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile | |||
@@ -31,8 +31,8 @@ GCOV_PROFILE_paravirt.o := n | |||
31 | 31 | ||
32 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o | 32 | obj-y := process_$(BITS).o signal.o entry_$(BITS).o |
33 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o | 33 | obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o |
34 | obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o | 34 | obj-y += time.o ioport.o ldt.o dumpstack.o |
35 | obj-y += setup.o i8259.o irqinit.o | 35 | obj-y += setup.o x86_init.o i8259.o irqinit.o |
36 | obj-$(CONFIG_X86_VISWS) += visws_quirks.o | 36 | obj-$(CONFIG_X86_VISWS) += visws_quirks.o |
37 | obj-$(CONFIG_X86_32) += probe_roms_32.o | 37 | obj-$(CONFIG_X86_32) += probe_roms_32.o |
38 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o | 38 | obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o |
@@ -52,6 +52,7 @@ obj-$(CONFIG_X86_DS_SELFTEST) += ds_selftest.o | |||
52 | obj-$(CONFIG_X86_32) += tls.o | 52 | obj-$(CONFIG_X86_32) += tls.o |
53 | obj-$(CONFIG_IA32_EMULATION) += tls.o | 53 | obj-$(CONFIG_IA32_EMULATION) += tls.o |
54 | obj-y += step.o | 54 | obj-y += step.o |
55 | obj-$(CONFIG_INTEL_TXT) += tboot.o | ||
55 | obj-$(CONFIG_STACKTRACE) += stacktrace.o | 56 | obj-$(CONFIG_STACKTRACE) += stacktrace.o |
56 | obj-y += cpu/ | 57 | obj-y += cpu/ |
57 | obj-y += acpi/ | 58 | obj-y += acpi/ |
@@ -105,6 +106,7 @@ obj-$(CONFIG_SCx200) += scx200.o | |||
105 | scx200-y += scx200_32.o | 106 | scx200-y += scx200_32.o |
106 | 107 | ||
107 | obj-$(CONFIG_OLPC) += olpc.o | 108 | obj-$(CONFIG_OLPC) += olpc.o |
109 | obj-$(CONFIG_X86_MRST) += mrst.o | ||
108 | 110 | ||
109 | microcode-y := microcode_core.o | 111 | microcode-y := microcode_core.o |
110 | microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o | 112 | microcode-$(CONFIG_MICROCODE_INTEL) += microcode_intel.o |
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 7e62d1eb8e30..67e929b89875 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c | |||
@@ -1079,9 +1079,8 @@ static int __init acpi_parse_madt_ioapic_entries(void) | |||
1079 | * If MPS is present, it will handle them, | 1079 | * If MPS is present, it will handle them, |
1080 | * otherwise the system will stay in PIC mode | 1080 | * otherwise the system will stay in PIC mode |
1081 | */ | 1081 | */ |
1082 | if (acpi_disabled || acpi_noirq) { | 1082 | if (acpi_disabled || acpi_noirq) |
1083 | return -ENODEV; | 1083 | return -ENODEV; |
1084 | } | ||
1085 | 1084 | ||
1086 | if (!cpu_has_apic) | 1085 | if (!cpu_has_apic) |
1087 | return -ENODEV; | 1086 | return -ENODEV; |
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index f57658702571..de7353c0ce9c 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c | |||
@@ -2,6 +2,7 @@ | |||
2 | #include <linux/sched.h> | 2 | #include <linux/sched.h> |
3 | #include <linux/mutex.h> | 3 | #include <linux/mutex.h> |
4 | #include <linux/list.h> | 4 | #include <linux/list.h> |
5 | #include <linux/stringify.h> | ||
5 | #include <linux/kprobes.h> | 6 | #include <linux/kprobes.h> |
6 | #include <linux/mm.h> | 7 | #include <linux/mm.h> |
7 | #include <linux/vmalloc.h> | 8 | #include <linux/vmalloc.h> |
@@ -32,7 +33,7 @@ __setup("smp-alt-boot", bootonly); | |||
32 | #define smp_alt_once 1 | 33 | #define smp_alt_once 1 |
33 | #endif | 34 | #endif |
34 | 35 | ||
35 | static int debug_alternative; | 36 | static int __initdata_or_module debug_alternative; |
36 | 37 | ||
37 | static int __init debug_alt(char *str) | 38 | static int __init debug_alt(char *str) |
38 | { | 39 | { |
@@ -51,7 +52,7 @@ static int __init setup_noreplace_smp(char *str) | |||
51 | __setup("noreplace-smp", setup_noreplace_smp); | 52 | __setup("noreplace-smp", setup_noreplace_smp); |
52 | 53 | ||
53 | #ifdef CONFIG_PARAVIRT | 54 | #ifdef CONFIG_PARAVIRT |
54 | static int noreplace_paravirt = 0; | 55 | static int __initdata_or_module noreplace_paravirt = 0; |
55 | 56 | ||
56 | static int __init setup_noreplace_paravirt(char *str) | 57 | static int __init setup_noreplace_paravirt(char *str) |
57 | { | 58 | { |
@@ -64,16 +65,17 @@ __setup("noreplace-paravirt", setup_noreplace_paravirt); | |||
64 | #define DPRINTK(fmt, args...) if (debug_alternative) \ | 65 | #define DPRINTK(fmt, args...) if (debug_alternative) \ |
65 | printk(KERN_DEBUG fmt, args) | 66 | printk(KERN_DEBUG fmt, args) |
66 | 67 | ||
67 | #ifdef GENERIC_NOP1 | 68 | #if defined(GENERIC_NOP1) && !defined(CONFIG_X86_64) |
68 | /* Use inline assembly to define this because the nops are defined | 69 | /* Use inline assembly to define this because the nops are defined |
69 | as inline assembly strings in the include files and we cannot | 70 | as inline assembly strings in the include files and we cannot |
70 | get them easily into strings. */ | 71 | get them easily into strings. */ |
71 | asm("\t.section .rodata, \"a\"\nintelnops: " | 72 | asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nintelnops: " |
72 | GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 | 73 | GENERIC_NOP1 GENERIC_NOP2 GENERIC_NOP3 GENERIC_NOP4 GENERIC_NOP5 GENERIC_NOP6 |
73 | GENERIC_NOP7 GENERIC_NOP8 | 74 | GENERIC_NOP7 GENERIC_NOP8 |
74 | "\t.previous"); | 75 | "\t.previous"); |
75 | extern const unsigned char intelnops[]; | 76 | extern const unsigned char intelnops[]; |
76 | static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = { | 77 | static const unsigned char *const __initconst_or_module |
78 | intel_nops[ASM_NOP_MAX+1] = { | ||
77 | NULL, | 79 | NULL, |
78 | intelnops, | 80 | intelnops, |
79 | intelnops + 1, | 81 | intelnops + 1, |
@@ -87,12 +89,13 @@ static const unsigned char *const intel_nops[ASM_NOP_MAX+1] = { | |||
87 | #endif | 89 | #endif |
88 | 90 | ||
89 | #ifdef K8_NOP1 | 91 | #ifdef K8_NOP1 |
90 | asm("\t.section .rodata, \"a\"\nk8nops: " | 92 | asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk8nops: " |
91 | K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 | 93 | K8_NOP1 K8_NOP2 K8_NOP3 K8_NOP4 K8_NOP5 K8_NOP6 |
92 | K8_NOP7 K8_NOP8 | 94 | K8_NOP7 K8_NOP8 |
93 | "\t.previous"); | 95 | "\t.previous"); |
94 | extern const unsigned char k8nops[]; | 96 | extern const unsigned char k8nops[]; |
95 | static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = { | 97 | static const unsigned char *const __initconst_or_module |
98 | k8_nops[ASM_NOP_MAX+1] = { | ||
96 | NULL, | 99 | NULL, |
97 | k8nops, | 100 | k8nops, |
98 | k8nops + 1, | 101 | k8nops + 1, |
@@ -105,13 +108,14 @@ static const unsigned char *const k8_nops[ASM_NOP_MAX+1] = { | |||
105 | }; | 108 | }; |
106 | #endif | 109 | #endif |
107 | 110 | ||
108 | #ifdef K7_NOP1 | 111 | #if defined(K7_NOP1) && !defined(CONFIG_X86_64) |
109 | asm("\t.section .rodata, \"a\"\nk7nops: " | 112 | asm("\t" __stringify(__INITRODATA_OR_MODULE) "\nk7nops: " |
110 | K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 | 113 | K7_NOP1 K7_NOP2 K7_NOP3 K7_NOP4 K7_NOP5 K7_NOP6 |
111 | K7_NOP7 K7_NOP8 | 114 | K7_NOP7 K7_NOP8 |
112 | "\t.previous"); | 115 | "\t.previous"); |
113 | extern const unsigned char k7nops[]; | 116 | extern const unsigned char k7nops[]; |
114 | static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = { | 117 | static const unsigned char *const __initconst_or_module |
118 | k7_nops[ASM_NOP_MAX+1] = { | ||
115 | NULL, | 119 | NULL, |
116 | k7nops, | 120 | k7nops, |
117 | k7nops + 1, | 121 | k7nops + 1, |
@@ -125,12 +129,13 @@ static const unsigned char *const k7_nops[ASM_NOP_MAX+1] = { | |||
125 | #endif | 129 | #endif |
126 | 130 | ||
127 | #ifdef P6_NOP1 | 131 | #ifdef P6_NOP1 |
128 | asm("\t.section .rodata, \"a\"\np6nops: " | 132 | asm("\t" __stringify(__INITRODATA_OR_MODULE) "\np6nops: " |
129 | P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6 | 133 | P6_NOP1 P6_NOP2 P6_NOP3 P6_NOP4 P6_NOP5 P6_NOP6 |
130 | P6_NOP7 P6_NOP8 | 134 | P6_NOP7 P6_NOP8 |
131 | "\t.previous"); | 135 | "\t.previous"); |
132 | extern const unsigned char p6nops[]; | 136 | extern const unsigned char p6nops[]; |
133 | static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { | 137 | static const unsigned char *const __initconst_or_module |
138 | p6_nops[ASM_NOP_MAX+1] = { | ||
134 | NULL, | 139 | NULL, |
135 | p6nops, | 140 | p6nops, |
136 | p6nops + 1, | 141 | p6nops + 1, |
@@ -146,7 +151,7 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = { | |||
146 | #ifdef CONFIG_X86_64 | 151 | #ifdef CONFIG_X86_64 |
147 | 152 | ||
148 | extern char __vsyscall_0; | 153 | extern char __vsyscall_0; |
149 | const unsigned char *const *find_nop_table(void) | 154 | static const unsigned char *const *__init_or_module find_nop_table(void) |
150 | { | 155 | { |
151 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && | 156 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && |
152 | boot_cpu_has(X86_FEATURE_NOPL)) | 157 | boot_cpu_has(X86_FEATURE_NOPL)) |
@@ -157,7 +162,7 @@ const unsigned char *const *find_nop_table(void) | |||
157 | 162 | ||
158 | #else /* CONFIG_X86_64 */ | 163 | #else /* CONFIG_X86_64 */ |
159 | 164 | ||
160 | const unsigned char *const *find_nop_table(void) | 165 | static const unsigned char *const *__init_or_module find_nop_table(void) |
161 | { | 166 | { |
162 | if (boot_cpu_has(X86_FEATURE_K8)) | 167 | if (boot_cpu_has(X86_FEATURE_K8)) |
163 | return k8_nops; | 168 | return k8_nops; |
@@ -172,7 +177,7 @@ const unsigned char *const *find_nop_table(void) | |||
172 | #endif /* CONFIG_X86_64 */ | 177 | #endif /* CONFIG_X86_64 */ |
173 | 178 | ||
174 | /* Use this to add nops to a buffer, then text_poke the whole buffer. */ | 179 | /* Use this to add nops to a buffer, then text_poke the whole buffer. */ |
175 | void add_nops(void *insns, unsigned int len) | 180 | static void __init_or_module add_nops(void *insns, unsigned int len) |
176 | { | 181 | { |
177 | const unsigned char *const *noptable = find_nop_table(); | 182 | const unsigned char *const *noptable = find_nop_table(); |
178 | 183 | ||
@@ -185,10 +190,10 @@ void add_nops(void *insns, unsigned int len) | |||
185 | len -= noplen; | 190 | len -= noplen; |
186 | } | 191 | } |
187 | } | 192 | } |
188 | EXPORT_SYMBOL_GPL(add_nops); | ||
189 | 193 | ||
190 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; | 194 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; |
191 | extern u8 *__smp_locks[], *__smp_locks_end[]; | 195 | extern u8 *__smp_locks[], *__smp_locks_end[]; |
196 | static void *text_poke_early(void *addr, const void *opcode, size_t len); | ||
192 | 197 | ||
193 | /* Replace instructions with better alternatives for this CPU type. | 198 | /* Replace instructions with better alternatives for this CPU type. |
194 | This runs before SMP is initialized to avoid SMP problems with | 199 | This runs before SMP is initialized to avoid SMP problems with |
@@ -196,7 +201,8 @@ extern u8 *__smp_locks[], *__smp_locks_end[]; | |||
196 | APs have less capabilities than the boot processor are not handled. | 201 | APs have less capabilities than the boot processor are not handled. |
197 | Tough. Make sure you disable such features by hand. */ | 202 | Tough. Make sure you disable such features by hand. */ |
198 | 203 | ||
199 | void apply_alternatives(struct alt_instr *start, struct alt_instr *end) | 204 | void __init_or_module apply_alternatives(struct alt_instr *start, |
205 | struct alt_instr *end) | ||
200 | { | 206 | { |
201 | struct alt_instr *a; | 207 | struct alt_instr *a; |
202 | char insnbuf[MAX_PATCH_LEN]; | 208 | char insnbuf[MAX_PATCH_LEN]; |
@@ -279,9 +285,10 @@ static LIST_HEAD(smp_alt_modules); | |||
279 | static DEFINE_MUTEX(smp_alt); | 285 | static DEFINE_MUTEX(smp_alt); |
280 | static int smp_mode = 1; /* protected by smp_alt */ | 286 | static int smp_mode = 1; /* protected by smp_alt */ |
281 | 287 | ||
282 | void alternatives_smp_module_add(struct module *mod, char *name, | 288 | void __init_or_module alternatives_smp_module_add(struct module *mod, |
283 | void *locks, void *locks_end, | 289 | char *name, |
284 | void *text, void *text_end) | 290 | void *locks, void *locks_end, |
291 | void *text, void *text_end) | ||
285 | { | 292 | { |
286 | struct smp_alt_module *smp; | 293 | struct smp_alt_module *smp; |
287 | 294 | ||
@@ -317,7 +324,7 @@ void alternatives_smp_module_add(struct module *mod, char *name, | |||
317 | mutex_unlock(&smp_alt); | 324 | mutex_unlock(&smp_alt); |
318 | } | 325 | } |
319 | 326 | ||
320 | void alternatives_smp_module_del(struct module *mod) | 327 | void __init_or_module alternatives_smp_module_del(struct module *mod) |
321 | { | 328 | { |
322 | struct smp_alt_module *item; | 329 | struct smp_alt_module *item; |
323 | 330 | ||
@@ -386,8 +393,8 @@ void alternatives_smp_switch(int smp) | |||
386 | #endif | 393 | #endif |
387 | 394 | ||
388 | #ifdef CONFIG_PARAVIRT | 395 | #ifdef CONFIG_PARAVIRT |
389 | void apply_paravirt(struct paravirt_patch_site *start, | 396 | void __init_or_module apply_paravirt(struct paravirt_patch_site *start, |
390 | struct paravirt_patch_site *end) | 397 | struct paravirt_patch_site *end) |
391 | { | 398 | { |
392 | struct paravirt_patch_site *p; | 399 | struct paravirt_patch_site *p; |
393 | char insnbuf[MAX_PATCH_LEN]; | 400 | char insnbuf[MAX_PATCH_LEN]; |
@@ -485,13 +492,14 @@ void __init alternative_instructions(void) | |||
485 | * instructions. And on the local CPU you need to be protected again NMI or MCE | 492 | * instructions. And on the local CPU you need to be protected again NMI or MCE |
486 | * handlers seeing an inconsistent instruction while you patch. | 493 | * handlers seeing an inconsistent instruction while you patch. |
487 | */ | 494 | */ |
488 | void *text_poke_early(void *addr, const void *opcode, size_t len) | 495 | static void *__init_or_module text_poke_early(void *addr, const void *opcode, |
496 | size_t len) | ||
489 | { | 497 | { |
490 | unsigned long flags; | 498 | unsigned long flags; |
491 | local_irq_save(flags); | 499 | local_irq_save(flags); |
492 | memcpy(addr, opcode, len); | 500 | memcpy(addr, opcode, len); |
493 | local_irq_restore(flags); | ||
494 | sync_core(); | 501 | sync_core(); |
502 | local_irq_restore(flags); | ||
495 | /* Could also do a CLFLUSH here to speed up CPU recovery; but | 503 | /* Could also do a CLFLUSH here to speed up CPU recovery; but |
496 | that causes hangs on some VIA CPUs. */ | 504 | that causes hangs on some VIA CPUs. */ |
497 | return addr; | 505 | return addr; |
diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 6c99f5037801..98f230f6a28d 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c | |||
@@ -41,9 +41,13 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock); | |||
41 | static LIST_HEAD(iommu_pd_list); | 41 | static LIST_HEAD(iommu_pd_list); |
42 | static DEFINE_SPINLOCK(iommu_pd_list_lock); | 42 | static DEFINE_SPINLOCK(iommu_pd_list_lock); |
43 | 43 | ||
44 | #ifdef CONFIG_IOMMU_API | 44 | /* |
45 | * Domain for untranslated devices - only allocated | ||
46 | * if iommu=pt passed on kernel cmd line. | ||
47 | */ | ||
48 | static struct protection_domain *pt_domain; | ||
49 | |||
45 | static struct iommu_ops amd_iommu_ops; | 50 | static struct iommu_ops amd_iommu_ops; |
46 | #endif | ||
47 | 51 | ||
48 | /* | 52 | /* |
49 | * general struct to manage commands send to an IOMMU | 53 | * general struct to manage commands send to an IOMMU |
@@ -55,16 +59,16 @@ struct iommu_cmd { | |||
55 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | 59 | static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, |
56 | struct unity_map_entry *e); | 60 | struct unity_map_entry *e); |
57 | static struct dma_ops_domain *find_protection_domain(u16 devid); | 61 | static struct dma_ops_domain *find_protection_domain(u16 devid); |
58 | static u64* alloc_pte(struct protection_domain *dom, | 62 | static u64 *alloc_pte(struct protection_domain *domain, |
59 | unsigned long address, u64 | 63 | unsigned long address, int end_lvl, |
60 | **pte_page, gfp_t gfp); | 64 | u64 **pte_page, gfp_t gfp); |
61 | static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, | 65 | static void dma_ops_reserve_addresses(struct dma_ops_domain *dom, |
62 | unsigned long start_page, | 66 | unsigned long start_page, |
63 | unsigned int pages); | 67 | unsigned int pages); |
64 | 68 | static void reset_iommu_command_buffer(struct amd_iommu *iommu); | |
65 | #ifndef BUS_NOTIFY_UNBOUND_DRIVER | 69 | static u64 *fetch_pte(struct protection_domain *domain, |
66 | #define BUS_NOTIFY_UNBOUND_DRIVER 0x0005 | 70 | unsigned long address, int map_size); |
67 | #endif | 71 | static void update_domain(struct protection_domain *domain); |
68 | 72 | ||
69 | #ifdef CONFIG_AMD_IOMMU_STATS | 73 | #ifdef CONFIG_AMD_IOMMU_STATS |
70 | 74 | ||
@@ -138,7 +142,25 @@ static int iommu_has_npcache(struct amd_iommu *iommu) | |||
138 | * | 142 | * |
139 | ****************************************************************************/ | 143 | ****************************************************************************/ |
140 | 144 | ||
141 | static void iommu_print_event(void *__evt) | 145 | static void dump_dte_entry(u16 devid) |
146 | { | ||
147 | int i; | ||
148 | |||
149 | for (i = 0; i < 8; ++i) | ||
150 | pr_err("AMD-Vi: DTE[%d]: %08x\n", i, | ||
151 | amd_iommu_dev_table[devid].data[i]); | ||
152 | } | ||
153 | |||
154 | static void dump_command(unsigned long phys_addr) | ||
155 | { | ||
156 | struct iommu_cmd *cmd = phys_to_virt(phys_addr); | ||
157 | int i; | ||
158 | |||
159 | for (i = 0; i < 4; ++i) | ||
160 | pr_err("AMD-Vi: CMD[%d]: %08x\n", i, cmd->data[i]); | ||
161 | } | ||
162 | |||
163 | static void iommu_print_event(struct amd_iommu *iommu, void *__evt) | ||
142 | { | 164 | { |
143 | u32 *event = __evt; | 165 | u32 *event = __evt; |
144 | int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; | 166 | int type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; |
@@ -147,7 +169,7 @@ static void iommu_print_event(void *__evt) | |||
147 | int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; | 169 | int flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; |
148 | u64 address = (u64)(((u64)event[3]) << 32) | event[2]; | 170 | u64 address = (u64)(((u64)event[3]) << 32) | event[2]; |
149 | 171 | ||
150 | printk(KERN_ERR "AMD IOMMU: Event logged ["); | 172 | printk(KERN_ERR "AMD-Vi: Event logged ["); |
151 | 173 | ||
152 | switch (type) { | 174 | switch (type) { |
153 | case EVENT_TYPE_ILL_DEV: | 175 | case EVENT_TYPE_ILL_DEV: |
@@ -155,6 +177,7 @@ static void iommu_print_event(void *__evt) | |||
155 | "address=0x%016llx flags=0x%04x]\n", | 177 | "address=0x%016llx flags=0x%04x]\n", |
156 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), | 178 | PCI_BUS(devid), PCI_SLOT(devid), PCI_FUNC(devid), |
157 | address, flags); | 179 | address, flags); |
180 | dump_dte_entry(devid); | ||
158 | break; | 181 | break; |
159 | case EVENT_TYPE_IO_FAULT: | 182 | case EVENT_TYPE_IO_FAULT: |
160 | printk("IO_PAGE_FAULT device=%02x:%02x.%x " | 183 | printk("IO_PAGE_FAULT device=%02x:%02x.%x " |
@@ -176,6 +199,8 @@ static void iommu_print_event(void *__evt) | |||
176 | break; | 199 | break; |
177 | case EVENT_TYPE_ILL_CMD: | 200 | case EVENT_TYPE_ILL_CMD: |
178 | printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); | 201 | printk("ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); |
202 | reset_iommu_command_buffer(iommu); | ||
203 | dump_command(address); | ||
179 | break; | 204 | break; |
180 | case EVENT_TYPE_CMD_HARD_ERR: | 205 | case EVENT_TYPE_CMD_HARD_ERR: |
181 | printk("COMMAND_HARDWARE_ERROR address=0x%016llx " | 206 | printk("COMMAND_HARDWARE_ERROR address=0x%016llx " |
@@ -209,7 +234,7 @@ static void iommu_poll_events(struct amd_iommu *iommu) | |||
209 | tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); | 234 | tail = readl(iommu->mmio_base + MMIO_EVT_TAIL_OFFSET); |
210 | 235 | ||
211 | while (head != tail) { | 236 | while (head != tail) { |
212 | iommu_print_event(iommu->evt_buf + head); | 237 | iommu_print_event(iommu, iommu->evt_buf + head); |
213 | head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size; | 238 | head = (head + EVENT_ENTRY_SIZE) % iommu->evt_buf_size; |
214 | } | 239 | } |
215 | 240 | ||
@@ -296,8 +321,11 @@ static void __iommu_wait_for_completion(struct amd_iommu *iommu) | |||
296 | status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; | 321 | status &= ~MMIO_STATUS_COM_WAIT_INT_MASK; |
297 | writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); | 322 | writel(status, iommu->mmio_base + MMIO_STATUS_OFFSET); |
298 | 323 | ||
299 | if (unlikely(i == EXIT_LOOP_COUNT)) | 324 | if (unlikely(i == EXIT_LOOP_COUNT)) { |
300 | panic("AMD IOMMU: Completion wait loop failed\n"); | 325 | spin_unlock(&iommu->lock); |
326 | reset_iommu_command_buffer(iommu); | ||
327 | spin_lock(&iommu->lock); | ||
328 | } | ||
301 | } | 329 | } |
302 | 330 | ||
303 | /* | 331 | /* |
@@ -445,47 +473,78 @@ static void iommu_flush_tlb_pde(struct amd_iommu *iommu, u16 domid) | |||
445 | } | 473 | } |
446 | 474 | ||
447 | /* | 475 | /* |
476 | * This function flushes one domain on one IOMMU | ||
477 | */ | ||
478 | static void flush_domain_on_iommu(struct amd_iommu *iommu, u16 domid) | ||
479 | { | ||
480 | struct iommu_cmd cmd; | ||
481 | unsigned long flags; | ||
482 | |||
483 | __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, | ||
484 | domid, 1, 1); | ||
485 | |||
486 | spin_lock_irqsave(&iommu->lock, flags); | ||
487 | __iommu_queue_command(iommu, &cmd); | ||
488 | __iommu_completion_wait(iommu); | ||
489 | __iommu_wait_for_completion(iommu); | ||
490 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
491 | } | ||
492 | |||
493 | static void flush_all_domains_on_iommu(struct amd_iommu *iommu) | ||
494 | { | ||
495 | int i; | ||
496 | |||
497 | for (i = 1; i < MAX_DOMAIN_ID; ++i) { | ||
498 | if (!test_bit(i, amd_iommu_pd_alloc_bitmap)) | ||
499 | continue; | ||
500 | flush_domain_on_iommu(iommu, i); | ||
501 | } | ||
502 | |||
503 | } | ||
504 | |||
505 | /* | ||
448 | * This function is used to flush the IO/TLB for a given protection domain | 506 | * This function is used to flush the IO/TLB for a given protection domain |
449 | * on every IOMMU in the system | 507 | * on every IOMMU in the system |
450 | */ | 508 | */ |
451 | static void iommu_flush_domain(u16 domid) | 509 | static void iommu_flush_domain(u16 domid) |
452 | { | 510 | { |
453 | unsigned long flags; | ||
454 | struct amd_iommu *iommu; | 511 | struct amd_iommu *iommu; |
455 | struct iommu_cmd cmd; | ||
456 | 512 | ||
457 | INC_STATS_COUNTER(domain_flush_all); | 513 | INC_STATS_COUNTER(domain_flush_all); |
458 | 514 | ||
459 | __iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS, | 515 | for_each_iommu(iommu) |
460 | domid, 1, 1); | 516 | flush_domain_on_iommu(iommu, domid); |
461 | |||
462 | for_each_iommu(iommu) { | ||
463 | spin_lock_irqsave(&iommu->lock, flags); | ||
464 | __iommu_queue_command(iommu, &cmd); | ||
465 | __iommu_completion_wait(iommu); | ||
466 | __iommu_wait_for_completion(iommu); | ||
467 | spin_unlock_irqrestore(&iommu->lock, flags); | ||
468 | } | ||
469 | } | 517 | } |
470 | 518 | ||
471 | void amd_iommu_flush_all_domains(void) | 519 | void amd_iommu_flush_all_domains(void) |
472 | { | 520 | { |
521 | struct amd_iommu *iommu; | ||
522 | |||
523 | for_each_iommu(iommu) | ||
524 | flush_all_domains_on_iommu(iommu); | ||
525 | } | ||
526 | |||
527 | static void flush_all_devices_for_iommu(struct amd_iommu *iommu) | ||
528 | { | ||
473 | int i; | 529 | int i; |
474 | 530 | ||
475 | for (i = 1; i < MAX_DOMAIN_ID; ++i) { | 531 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { |
476 | if (!test_bit(i, amd_iommu_pd_alloc_bitmap)) | 532 | if (iommu != amd_iommu_rlookup_table[i]) |
477 | continue; | 533 | continue; |
478 | iommu_flush_domain(i); | 534 | |
535 | iommu_queue_inv_dev_entry(iommu, i); | ||
536 | iommu_completion_wait(iommu); | ||
479 | } | 537 | } |
480 | } | 538 | } |
481 | 539 | ||
482 | void amd_iommu_flush_all_devices(void) | 540 | static void flush_devices_by_domain(struct protection_domain *domain) |
483 | { | 541 | { |
484 | struct amd_iommu *iommu; | 542 | struct amd_iommu *iommu; |
485 | int i; | 543 | int i; |
486 | 544 | ||
487 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { | 545 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { |
488 | if (amd_iommu_pd_table[i] == NULL) | 546 | if ((domain == NULL && amd_iommu_pd_table[i] == NULL) || |
547 | (amd_iommu_pd_table[i] != domain)) | ||
489 | continue; | 548 | continue; |
490 | 549 | ||
491 | iommu = amd_iommu_rlookup_table[i]; | 550 | iommu = amd_iommu_rlookup_table[i]; |
@@ -497,6 +556,27 @@ void amd_iommu_flush_all_devices(void) | |||
497 | } | 556 | } |
498 | } | 557 | } |
499 | 558 | ||
559 | static void reset_iommu_command_buffer(struct amd_iommu *iommu) | ||
560 | { | ||
561 | pr_err("AMD-Vi: Resetting IOMMU command buffer\n"); | ||
562 | |||
563 | if (iommu->reset_in_progress) | ||
564 | panic("AMD-Vi: ILLEGAL_COMMAND_ERROR while resetting command buffer\n"); | ||
565 | |||
566 | iommu->reset_in_progress = true; | ||
567 | |||
568 | amd_iommu_reset_cmd_buffer(iommu); | ||
569 | flush_all_devices_for_iommu(iommu); | ||
570 | flush_all_domains_on_iommu(iommu); | ||
571 | |||
572 | iommu->reset_in_progress = false; | ||
573 | } | ||
574 | |||
575 | void amd_iommu_flush_all_devices(void) | ||
576 | { | ||
577 | flush_devices_by_domain(NULL); | ||
578 | } | ||
579 | |||
500 | /**************************************************************************** | 580 | /**************************************************************************** |
501 | * | 581 | * |
502 | * The functions below are used the create the page table mappings for | 582 | * The functions below are used the create the page table mappings for |
@@ -514,18 +594,21 @@ void amd_iommu_flush_all_devices(void) | |||
514 | static int iommu_map_page(struct protection_domain *dom, | 594 | static int iommu_map_page(struct protection_domain *dom, |
515 | unsigned long bus_addr, | 595 | unsigned long bus_addr, |
516 | unsigned long phys_addr, | 596 | unsigned long phys_addr, |
517 | int prot) | 597 | int prot, |
598 | int map_size) | ||
518 | { | 599 | { |
519 | u64 __pte, *pte; | 600 | u64 __pte, *pte; |
520 | 601 | ||
521 | bus_addr = PAGE_ALIGN(bus_addr); | 602 | bus_addr = PAGE_ALIGN(bus_addr); |
522 | phys_addr = PAGE_ALIGN(phys_addr); | 603 | phys_addr = PAGE_ALIGN(phys_addr); |
523 | 604 | ||
524 | /* only support 512GB address spaces for now */ | 605 | BUG_ON(!PM_ALIGNED(map_size, bus_addr)); |
525 | if (bus_addr > IOMMU_MAP_SIZE_L3 || !(prot & IOMMU_PROT_MASK)) | 606 | BUG_ON(!PM_ALIGNED(map_size, phys_addr)); |
607 | |||
608 | if (!(prot & IOMMU_PROT_MASK)) | ||
526 | return -EINVAL; | 609 | return -EINVAL; |
527 | 610 | ||
528 | pte = alloc_pte(dom, bus_addr, NULL, GFP_KERNEL); | 611 | pte = alloc_pte(dom, bus_addr, map_size, NULL, GFP_KERNEL); |
529 | 612 | ||
530 | if (IOMMU_PTE_PRESENT(*pte)) | 613 | if (IOMMU_PTE_PRESENT(*pte)) |
531 | return -EBUSY; | 614 | return -EBUSY; |
@@ -538,29 +621,18 @@ static int iommu_map_page(struct protection_domain *dom, | |||
538 | 621 | ||
539 | *pte = __pte; | 622 | *pte = __pte; |
540 | 623 | ||
624 | update_domain(dom); | ||
625 | |||
541 | return 0; | 626 | return 0; |
542 | } | 627 | } |
543 | 628 | ||
544 | static void iommu_unmap_page(struct protection_domain *dom, | 629 | static void iommu_unmap_page(struct protection_domain *dom, |
545 | unsigned long bus_addr) | 630 | unsigned long bus_addr, int map_size) |
546 | { | 631 | { |
547 | u64 *pte; | 632 | u64 *pte = fetch_pte(dom, bus_addr, map_size); |
548 | |||
549 | pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)]; | ||
550 | |||
551 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
552 | return; | ||
553 | |||
554 | pte = IOMMU_PTE_PAGE(*pte); | ||
555 | pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; | ||
556 | 633 | ||
557 | if (!IOMMU_PTE_PRESENT(*pte)) | 634 | if (pte) |
558 | return; | 635 | *pte = 0; |
559 | |||
560 | pte = IOMMU_PTE_PAGE(*pte); | ||
561 | pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)]; | ||
562 | |||
563 | *pte = 0; | ||
564 | } | 636 | } |
565 | 637 | ||
566 | /* | 638 | /* |
@@ -615,7 +687,8 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom, | |||
615 | 687 | ||
616 | for (addr = e->address_start; addr < e->address_end; | 688 | for (addr = e->address_start; addr < e->address_end; |
617 | addr += PAGE_SIZE) { | 689 | addr += PAGE_SIZE) { |
618 | ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot); | 690 | ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot, |
691 | PM_MAP_4k); | ||
619 | if (ret) | 692 | if (ret) |
620 | return ret; | 693 | return ret; |
621 | /* | 694 | /* |
@@ -670,24 +743,29 @@ static int init_unity_mappings_for_device(struct dma_ops_domain *dma_dom, | |||
670 | * This function checks if there is a PTE for a given dma address. If | 743 | * This function checks if there is a PTE for a given dma address. If |
671 | * there is one, it returns the pointer to it. | 744 | * there is one, it returns the pointer to it. |
672 | */ | 745 | */ |
673 | static u64* fetch_pte(struct protection_domain *domain, | 746 | static u64 *fetch_pte(struct protection_domain *domain, |
674 | unsigned long address) | 747 | unsigned long address, int map_size) |
675 | { | 748 | { |
749 | int level; | ||
676 | u64 *pte; | 750 | u64 *pte; |
677 | 751 | ||
678 | pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(address)]; | 752 | level = domain->mode - 1; |
753 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; | ||
679 | 754 | ||
680 | if (!IOMMU_PTE_PRESENT(*pte)) | 755 | while (level > map_size) { |
681 | return NULL; | 756 | if (!IOMMU_PTE_PRESENT(*pte)) |
757 | return NULL; | ||
682 | 758 | ||
683 | pte = IOMMU_PTE_PAGE(*pte); | 759 | level -= 1; |
684 | pte = &pte[IOMMU_PTE_L1_INDEX(address)]; | ||
685 | 760 | ||
686 | if (!IOMMU_PTE_PRESENT(*pte)) | 761 | pte = IOMMU_PTE_PAGE(*pte); |
687 | return NULL; | 762 | pte = &pte[PM_LEVEL_INDEX(level, address)]; |
688 | 763 | ||
689 | pte = IOMMU_PTE_PAGE(*pte); | 764 | if ((PM_PTE_LEVEL(*pte) == 0) && level != map_size) { |
690 | pte = &pte[IOMMU_PTE_L0_INDEX(address)]; | 765 | pte = NULL; |
766 | break; | ||
767 | } | ||
768 | } | ||
691 | 769 | ||
692 | return pte; | 770 | return pte; |
693 | } | 771 | } |
@@ -727,7 +805,7 @@ static int alloc_new_range(struct amd_iommu *iommu, | |||
727 | u64 *pte, *pte_page; | 805 | u64 *pte, *pte_page; |
728 | 806 | ||
729 | for (i = 0; i < num_ptes; ++i) { | 807 | for (i = 0; i < num_ptes; ++i) { |
730 | pte = alloc_pte(&dma_dom->domain, address, | 808 | pte = alloc_pte(&dma_dom->domain, address, PM_MAP_4k, |
731 | &pte_page, gfp); | 809 | &pte_page, gfp); |
732 | if (!pte) | 810 | if (!pte) |
733 | goto out_free; | 811 | goto out_free; |
@@ -760,16 +838,20 @@ static int alloc_new_range(struct amd_iommu *iommu, | |||
760 | for (i = dma_dom->aperture[index]->offset; | 838 | for (i = dma_dom->aperture[index]->offset; |
761 | i < dma_dom->aperture_size; | 839 | i < dma_dom->aperture_size; |
762 | i += PAGE_SIZE) { | 840 | i += PAGE_SIZE) { |
763 | u64 *pte = fetch_pte(&dma_dom->domain, i); | 841 | u64 *pte = fetch_pte(&dma_dom->domain, i, PM_MAP_4k); |
764 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) | 842 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) |
765 | continue; | 843 | continue; |
766 | 844 | ||
767 | dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1); | 845 | dma_ops_reserve_addresses(dma_dom, i << PAGE_SHIFT, 1); |
768 | } | 846 | } |
769 | 847 | ||
848 | update_domain(&dma_dom->domain); | ||
849 | |||
770 | return 0; | 850 | return 0; |
771 | 851 | ||
772 | out_free: | 852 | out_free: |
853 | update_domain(&dma_dom->domain); | ||
854 | |||
773 | free_page((unsigned long)dma_dom->aperture[index]->bitmap); | 855 | free_page((unsigned long)dma_dom->aperture[index]->bitmap); |
774 | 856 | ||
775 | kfree(dma_dom->aperture[index]); | 857 | kfree(dma_dom->aperture[index]); |
@@ -1009,7 +1091,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu) | |||
1009 | dma_dom->domain.id = domain_id_alloc(); | 1091 | dma_dom->domain.id = domain_id_alloc(); |
1010 | if (dma_dom->domain.id == 0) | 1092 | if (dma_dom->domain.id == 0) |
1011 | goto free_dma_dom; | 1093 | goto free_dma_dom; |
1012 | dma_dom->domain.mode = PAGE_MODE_3_LEVEL; | 1094 | dma_dom->domain.mode = PAGE_MODE_2_LEVEL; |
1013 | dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); | 1095 | dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL); |
1014 | dma_dom->domain.flags = PD_DMA_OPS_MASK; | 1096 | dma_dom->domain.flags = PD_DMA_OPS_MASK; |
1015 | dma_dom->domain.priv = dma_dom; | 1097 | dma_dom->domain.priv = dma_dom; |
@@ -1063,6 +1145,41 @@ static struct protection_domain *domain_for_device(u16 devid) | |||
1063 | return dom; | 1145 | return dom; |
1064 | } | 1146 | } |
1065 | 1147 | ||
1148 | static void set_dte_entry(u16 devid, struct protection_domain *domain) | ||
1149 | { | ||
1150 | u64 pte_root = virt_to_phys(domain->pt_root); | ||
1151 | |||
1152 | pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) | ||
1153 | << DEV_ENTRY_MODE_SHIFT; | ||
1154 | pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; | ||
1155 | |||
1156 | amd_iommu_dev_table[devid].data[2] = domain->id; | ||
1157 | amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); | ||
1158 | amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); | ||
1159 | |||
1160 | amd_iommu_pd_table[devid] = domain; | ||
1161 | } | ||
1162 | |||
1163 | /* | ||
1164 | * If a device is not yet associated with a domain, this function does | ||
1165 | * assigns it visible for the hardware | ||
1166 | */ | ||
1167 | static void __attach_device(struct amd_iommu *iommu, | ||
1168 | struct protection_domain *domain, | ||
1169 | u16 devid) | ||
1170 | { | ||
1171 | /* lock domain */ | ||
1172 | spin_lock(&domain->lock); | ||
1173 | |||
1174 | /* update DTE entry */ | ||
1175 | set_dte_entry(devid, domain); | ||
1176 | |||
1177 | domain->dev_cnt += 1; | ||
1178 | |||
1179 | /* ready */ | ||
1180 | spin_unlock(&domain->lock); | ||
1181 | } | ||
1182 | |||
1066 | /* | 1183 | /* |
1067 | * If a device is not yet associated with a domain, this function does | 1184 | * If a device is not yet associated with a domain, this function does |
1068 | * assigns it visible for the hardware | 1185 | * assigns it visible for the hardware |
@@ -1072,27 +1189,16 @@ static void attach_device(struct amd_iommu *iommu, | |||
1072 | u16 devid) | 1189 | u16 devid) |
1073 | { | 1190 | { |
1074 | unsigned long flags; | 1191 | unsigned long flags; |
1075 | u64 pte_root = virt_to_phys(domain->pt_root); | ||
1076 | |||
1077 | domain->dev_cnt += 1; | ||
1078 | |||
1079 | pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) | ||
1080 | << DEV_ENTRY_MODE_SHIFT; | ||
1081 | pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; | ||
1082 | 1192 | ||
1083 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | 1193 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); |
1084 | amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); | 1194 | __attach_device(iommu, domain, devid); |
1085 | amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); | ||
1086 | amd_iommu_dev_table[devid].data[2] = domain->id; | ||
1087 | |||
1088 | amd_iommu_pd_table[devid] = domain; | ||
1089 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 1195 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
1090 | 1196 | ||
1091 | /* | 1197 | /* |
1092 | * We might boot into a crash-kernel here. The crashed kernel | 1198 | * We might boot into a crash-kernel here. The crashed kernel |
1093 | * left the caches in the IOMMU dirty. So we have to flush | 1199 | * left the caches in the IOMMU dirty. So we have to flush |
1094 | * here to evict all dirty stuff. | 1200 | * here to evict all dirty stuff. |
1095 | */ | 1201 | */ |
1096 | iommu_queue_inv_dev_entry(iommu, devid); | 1202 | iommu_queue_inv_dev_entry(iommu, devid); |
1097 | iommu_flush_tlb_pde(iommu, domain->id); | 1203 | iommu_flush_tlb_pde(iommu, domain->id); |
1098 | } | 1204 | } |
@@ -1119,6 +1225,15 @@ static void __detach_device(struct protection_domain *domain, u16 devid) | |||
1119 | 1225 | ||
1120 | /* ready */ | 1226 | /* ready */ |
1121 | spin_unlock(&domain->lock); | 1227 | spin_unlock(&domain->lock); |
1228 | |||
1229 | /* | ||
1230 | * If we run in passthrough mode the device must be assigned to the | ||
1231 | * passthrough domain if it is detached from any other domain | ||
1232 | */ | ||
1233 | if (iommu_pass_through) { | ||
1234 | struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; | ||
1235 | __attach_device(iommu, pt_domain, devid); | ||
1236 | } | ||
1122 | } | 1237 | } |
1123 | 1238 | ||
1124 | /* | 1239 | /* |
@@ -1164,6 +1279,8 @@ static int device_change_notifier(struct notifier_block *nb, | |||
1164 | case BUS_NOTIFY_UNBOUND_DRIVER: | 1279 | case BUS_NOTIFY_UNBOUND_DRIVER: |
1165 | if (!domain) | 1280 | if (!domain) |
1166 | goto out; | 1281 | goto out; |
1282 | if (iommu_pass_through) | ||
1283 | break; | ||
1167 | detach_device(domain, devid); | 1284 | detach_device(domain, devid); |
1168 | break; | 1285 | break; |
1169 | case BUS_NOTIFY_ADD_DEVICE: | 1286 | case BUS_NOTIFY_ADD_DEVICE: |
@@ -1292,39 +1409,91 @@ static int get_device_resources(struct device *dev, | |||
1292 | return 1; | 1409 | return 1; |
1293 | } | 1410 | } |
1294 | 1411 | ||
1412 | static void update_device_table(struct protection_domain *domain) | ||
1413 | { | ||
1414 | unsigned long flags; | ||
1415 | int i; | ||
1416 | |||
1417 | for (i = 0; i <= amd_iommu_last_bdf; ++i) { | ||
1418 | if (amd_iommu_pd_table[i] != domain) | ||
1419 | continue; | ||
1420 | write_lock_irqsave(&amd_iommu_devtable_lock, flags); | ||
1421 | set_dte_entry(i, domain); | ||
1422 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | ||
1423 | } | ||
1424 | } | ||
1425 | |||
1426 | static void update_domain(struct protection_domain *domain) | ||
1427 | { | ||
1428 | if (!domain->updated) | ||
1429 | return; | ||
1430 | |||
1431 | update_device_table(domain); | ||
1432 | flush_devices_by_domain(domain); | ||
1433 | iommu_flush_domain(domain->id); | ||
1434 | |||
1435 | domain->updated = false; | ||
1436 | } | ||
1437 | |||
1295 | /* | 1438 | /* |
1296 | * If the pte_page is not yet allocated this function is called | 1439 | * This function is used to add another level to an IO page table. Adding |
1440 | * another level increases the size of the address space by 9 bits to a size up | ||
1441 | * to 64 bits. | ||
1297 | */ | 1442 | */ |
1298 | static u64* alloc_pte(struct protection_domain *dom, | 1443 | static bool increase_address_space(struct protection_domain *domain, |
1299 | unsigned long address, u64 **pte_page, gfp_t gfp) | 1444 | gfp_t gfp) |
1445 | { | ||
1446 | u64 *pte; | ||
1447 | |||
1448 | if (domain->mode == PAGE_MODE_6_LEVEL) | ||
1449 | /* address space already 64 bit large */ | ||
1450 | return false; | ||
1451 | |||
1452 | pte = (void *)get_zeroed_page(gfp); | ||
1453 | if (!pte) | ||
1454 | return false; | ||
1455 | |||
1456 | *pte = PM_LEVEL_PDE(domain->mode, | ||
1457 | virt_to_phys(domain->pt_root)); | ||
1458 | domain->pt_root = pte; | ||
1459 | domain->mode += 1; | ||
1460 | domain->updated = true; | ||
1461 | |||
1462 | return true; | ||
1463 | } | ||
1464 | |||
1465 | static u64 *alloc_pte(struct protection_domain *domain, | ||
1466 | unsigned long address, | ||
1467 | int end_lvl, | ||
1468 | u64 **pte_page, | ||
1469 | gfp_t gfp) | ||
1300 | { | 1470 | { |
1301 | u64 *pte, *page; | 1471 | u64 *pte, *page; |
1472 | int level; | ||
1302 | 1473 | ||
1303 | pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(address)]; | 1474 | while (address > PM_LEVEL_SIZE(domain->mode)) |
1475 | increase_address_space(domain, gfp); | ||
1304 | 1476 | ||
1305 | if (!IOMMU_PTE_PRESENT(*pte)) { | 1477 | level = domain->mode - 1; |
1306 | page = (u64 *)get_zeroed_page(gfp); | 1478 | pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; |
1307 | if (!page) | ||
1308 | return NULL; | ||
1309 | *pte = IOMMU_L2_PDE(virt_to_phys(page)); | ||
1310 | } | ||
1311 | 1479 | ||
1312 | pte = IOMMU_PTE_PAGE(*pte); | 1480 | while (level > end_lvl) { |
1313 | pte = &pte[IOMMU_PTE_L1_INDEX(address)]; | 1481 | if (!IOMMU_PTE_PRESENT(*pte)) { |
1482 | page = (u64 *)get_zeroed_page(gfp); | ||
1483 | if (!page) | ||
1484 | return NULL; | ||
1485 | *pte = PM_LEVEL_PDE(level, virt_to_phys(page)); | ||
1486 | } | ||
1314 | 1487 | ||
1315 | if (!IOMMU_PTE_PRESENT(*pte)) { | 1488 | level -= 1; |
1316 | page = (u64 *)get_zeroed_page(gfp); | ||
1317 | if (!page) | ||
1318 | return NULL; | ||
1319 | *pte = IOMMU_L1_PDE(virt_to_phys(page)); | ||
1320 | } | ||
1321 | 1489 | ||
1322 | pte = IOMMU_PTE_PAGE(*pte); | 1490 | pte = IOMMU_PTE_PAGE(*pte); |
1323 | 1491 | ||
1324 | if (pte_page) | 1492 | if (pte_page && level == end_lvl) |
1325 | *pte_page = pte; | 1493 | *pte_page = pte; |
1326 | 1494 | ||
1327 | pte = &pte[IOMMU_PTE_L0_INDEX(address)]; | 1495 | pte = &pte[PM_LEVEL_INDEX(level, address)]; |
1496 | } | ||
1328 | 1497 | ||
1329 | return pte; | 1498 | return pte; |
1330 | } | 1499 | } |
@@ -1344,10 +1513,13 @@ static u64* dma_ops_get_pte(struct dma_ops_domain *dom, | |||
1344 | 1513 | ||
1345 | pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; | 1514 | pte = aperture->pte_pages[APERTURE_PAGE_INDEX(address)]; |
1346 | if (!pte) { | 1515 | if (!pte) { |
1347 | pte = alloc_pte(&dom->domain, address, &pte_page, GFP_ATOMIC); | 1516 | pte = alloc_pte(&dom->domain, address, PM_MAP_4k, &pte_page, |
1517 | GFP_ATOMIC); | ||
1348 | aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page; | 1518 | aperture->pte_pages[APERTURE_PAGE_INDEX(address)] = pte_page; |
1349 | } else | 1519 | } else |
1350 | pte += IOMMU_PTE_L0_INDEX(address); | 1520 | pte += PM_LEVEL_INDEX(0, address); |
1521 | |||
1522 | update_domain(&dom->domain); | ||
1351 | 1523 | ||
1352 | return pte; | 1524 | return pte; |
1353 | } | 1525 | } |
@@ -1409,7 +1581,7 @@ static void dma_ops_domain_unmap(struct amd_iommu *iommu, | |||
1409 | if (!pte) | 1581 | if (!pte) |
1410 | return; | 1582 | return; |
1411 | 1583 | ||
1412 | pte += IOMMU_PTE_L0_INDEX(address); | 1584 | pte += PM_LEVEL_INDEX(0, address); |
1413 | 1585 | ||
1414 | WARN_ON(!*pte); | 1586 | WARN_ON(!*pte); |
1415 | 1587 | ||
@@ -1988,19 +2160,47 @@ static void cleanup_domain(struct protection_domain *domain) | |||
1988 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); | 2160 | write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); |
1989 | } | 2161 | } |
1990 | 2162 | ||
1991 | static int amd_iommu_domain_init(struct iommu_domain *dom) | 2163 | static void protection_domain_free(struct protection_domain *domain) |
2164 | { | ||
2165 | if (!domain) | ||
2166 | return; | ||
2167 | |||
2168 | if (domain->id) | ||
2169 | domain_id_free(domain->id); | ||
2170 | |||
2171 | kfree(domain); | ||
2172 | } | ||
2173 | |||
2174 | static struct protection_domain *protection_domain_alloc(void) | ||
1992 | { | 2175 | { |
1993 | struct protection_domain *domain; | 2176 | struct protection_domain *domain; |
1994 | 2177 | ||
1995 | domain = kzalloc(sizeof(*domain), GFP_KERNEL); | 2178 | domain = kzalloc(sizeof(*domain), GFP_KERNEL); |
1996 | if (!domain) | 2179 | if (!domain) |
1997 | return -ENOMEM; | 2180 | return NULL; |
1998 | 2181 | ||
1999 | spin_lock_init(&domain->lock); | 2182 | spin_lock_init(&domain->lock); |
2000 | domain->mode = PAGE_MODE_3_LEVEL; | ||
2001 | domain->id = domain_id_alloc(); | 2183 | domain->id = domain_id_alloc(); |
2002 | if (!domain->id) | 2184 | if (!domain->id) |
2185 | goto out_err; | ||
2186 | |||
2187 | return domain; | ||
2188 | |||
2189 | out_err: | ||
2190 | kfree(domain); | ||
2191 | |||
2192 | return NULL; | ||
2193 | } | ||
2194 | |||
2195 | static int amd_iommu_domain_init(struct iommu_domain *dom) | ||
2196 | { | ||
2197 | struct protection_domain *domain; | ||
2198 | |||
2199 | domain = protection_domain_alloc(); | ||
2200 | if (!domain) | ||
2003 | goto out_free; | 2201 | goto out_free; |
2202 | |||
2203 | domain->mode = PAGE_MODE_3_LEVEL; | ||
2004 | domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); | 2204 | domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); |
2005 | if (!domain->pt_root) | 2205 | if (!domain->pt_root) |
2006 | goto out_free; | 2206 | goto out_free; |
@@ -2010,7 +2210,7 @@ static int amd_iommu_domain_init(struct iommu_domain *dom) | |||
2010 | return 0; | 2210 | return 0; |
2011 | 2211 | ||
2012 | out_free: | 2212 | out_free: |
2013 | kfree(domain); | 2213 | protection_domain_free(domain); |
2014 | 2214 | ||
2015 | return -ENOMEM; | 2215 | return -ENOMEM; |
2016 | } | 2216 | } |
@@ -2115,7 +2315,7 @@ static int amd_iommu_map_range(struct iommu_domain *dom, | |||
2115 | paddr &= PAGE_MASK; | 2315 | paddr &= PAGE_MASK; |
2116 | 2316 | ||
2117 | for (i = 0; i < npages; ++i) { | 2317 | for (i = 0; i < npages; ++i) { |
2118 | ret = iommu_map_page(domain, iova, paddr, prot); | 2318 | ret = iommu_map_page(domain, iova, paddr, prot, PM_MAP_4k); |
2119 | if (ret) | 2319 | if (ret) |
2120 | return ret; | 2320 | return ret; |
2121 | 2321 | ||
@@ -2136,7 +2336,7 @@ static void amd_iommu_unmap_range(struct iommu_domain *dom, | |||
2136 | iova &= PAGE_MASK; | 2336 | iova &= PAGE_MASK; |
2137 | 2337 | ||
2138 | for (i = 0; i < npages; ++i) { | 2338 | for (i = 0; i < npages; ++i) { |
2139 | iommu_unmap_page(domain, iova); | 2339 | iommu_unmap_page(domain, iova, PM_MAP_4k); |
2140 | iova += PAGE_SIZE; | 2340 | iova += PAGE_SIZE; |
2141 | } | 2341 | } |
2142 | 2342 | ||
@@ -2151,21 +2351,9 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, | |||
2151 | phys_addr_t paddr; | 2351 | phys_addr_t paddr; |
2152 | u64 *pte; | 2352 | u64 *pte; |
2153 | 2353 | ||
2154 | pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)]; | 2354 | pte = fetch_pte(domain, iova, PM_MAP_4k); |
2155 | |||
2156 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
2157 | return 0; | ||
2158 | |||
2159 | pte = IOMMU_PTE_PAGE(*pte); | ||
2160 | pte = &pte[IOMMU_PTE_L1_INDEX(iova)]; | ||
2161 | |||
2162 | if (!IOMMU_PTE_PRESENT(*pte)) | ||
2163 | return 0; | ||
2164 | |||
2165 | pte = IOMMU_PTE_PAGE(*pte); | ||
2166 | pte = &pte[IOMMU_PTE_L0_INDEX(iova)]; | ||
2167 | 2355 | ||
2168 | if (!IOMMU_PTE_PRESENT(*pte)) | 2356 | if (!pte || !IOMMU_PTE_PRESENT(*pte)) |
2169 | return 0; | 2357 | return 0; |
2170 | 2358 | ||
2171 | paddr = *pte & IOMMU_PAGE_MASK; | 2359 | paddr = *pte & IOMMU_PAGE_MASK; |
@@ -2191,3 +2379,46 @@ static struct iommu_ops amd_iommu_ops = { | |||
2191 | .domain_has_cap = amd_iommu_domain_has_cap, | 2379 | .domain_has_cap = amd_iommu_domain_has_cap, |
2192 | }; | 2380 | }; |
2193 | 2381 | ||
2382 | /***************************************************************************** | ||
2383 | * | ||
2384 | * The next functions do a basic initialization of IOMMU for pass through | ||
2385 | * mode | ||
2386 | * | ||
2387 | * In passthrough mode the IOMMU is initialized and enabled but not used for | ||
2388 | * DMA-API translation. | ||
2389 | * | ||
2390 | *****************************************************************************/ | ||
2391 | |||
2392 | int __init amd_iommu_init_passthrough(void) | ||
2393 | { | ||
2394 | struct pci_dev *dev = NULL; | ||
2395 | u16 devid, devid2; | ||
2396 | |||
2397 | /* allocate passthroug domain */ | ||
2398 | pt_domain = protection_domain_alloc(); | ||
2399 | if (!pt_domain) | ||
2400 | return -ENOMEM; | ||
2401 | |||
2402 | pt_domain->mode |= PAGE_MODE_NONE; | ||
2403 | |||
2404 | while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { | ||
2405 | struct amd_iommu *iommu; | ||
2406 | |||
2407 | devid = calc_devid(dev->bus->number, dev->devfn); | ||
2408 | if (devid > amd_iommu_last_bdf) | ||
2409 | continue; | ||
2410 | |||
2411 | devid2 = amd_iommu_alias_table[devid]; | ||
2412 | |||
2413 | iommu = amd_iommu_rlookup_table[devid2]; | ||
2414 | if (!iommu) | ||
2415 | continue; | ||
2416 | |||
2417 | __attach_device(iommu, pt_domain, devid); | ||
2418 | __attach_device(iommu, pt_domain, devid2); | ||
2419 | } | ||
2420 | |||
2421 | pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); | ||
2422 | |||
2423 | return 0; | ||
2424 | } | ||
diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c1b17e97252e..b4b61d462dcc 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c | |||
@@ -252,7 +252,7 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit) | |||
252 | /* Function to enable the hardware */ | 252 | /* Function to enable the hardware */ |
253 | static void iommu_enable(struct amd_iommu *iommu) | 253 | static void iommu_enable(struct amd_iommu *iommu) |
254 | { | 254 | { |
255 | printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n", | 255 | printk(KERN_INFO "AMD-Vi: Enabling IOMMU at %s cap 0x%hx\n", |
256 | dev_name(&iommu->dev->dev), iommu->cap_ptr); | 256 | dev_name(&iommu->dev->dev), iommu->cap_ptr); |
257 | 257 | ||
258 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); | 258 | iommu_feature_enable(iommu, CONTROL_IOMMU_EN); |
@@ -435,6 +435,20 @@ static u8 * __init alloc_command_buffer(struct amd_iommu *iommu) | |||
435 | } | 435 | } |
436 | 436 | ||
437 | /* | 437 | /* |
438 | * This function resets the command buffer if the IOMMU stopped fetching | ||
439 | * commands from it. | ||
440 | */ | ||
441 | void amd_iommu_reset_cmd_buffer(struct amd_iommu *iommu) | ||
442 | { | ||
443 | iommu_feature_disable(iommu, CONTROL_CMDBUF_EN); | ||
444 | |||
445 | writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); | ||
446 | writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); | ||
447 | |||
448 | iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); | ||
449 | } | ||
450 | |||
451 | /* | ||
438 | * This function writes the command buffer address to the hardware and | 452 | * This function writes the command buffer address to the hardware and |
439 | * enables it. | 453 | * enables it. |
440 | */ | 454 | */ |
@@ -450,11 +464,7 @@ static void iommu_enable_command_buffer(struct amd_iommu *iommu) | |||
450 | memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, | 464 | memcpy_toio(iommu->mmio_base + MMIO_CMD_BUF_OFFSET, |
451 | &entry, sizeof(entry)); | 465 | &entry, sizeof(entry)); |
452 | 466 | ||
453 | /* set head and tail to zero manually */ | 467 | amd_iommu_reset_cmd_buffer(iommu); |
454 | writel(0x00, iommu->mmio_base + MMIO_CMD_HEAD_OFFSET); | ||
455 | writel(0x00, iommu->mmio_base + MMIO_CMD_TAIL_OFFSET); | ||
456 | |||
457 | iommu_feature_enable(iommu, CONTROL_CMDBUF_EN); | ||
458 | } | 468 | } |
459 | 469 | ||
460 | static void __init free_command_buffer(struct amd_iommu *iommu) | 470 | static void __init free_command_buffer(struct amd_iommu *iommu) |
@@ -858,7 +868,7 @@ static int __init init_iommu_all(struct acpi_table_header *table) | |||
858 | switch (*p) { | 868 | switch (*p) { |
859 | case ACPI_IVHD_TYPE: | 869 | case ACPI_IVHD_TYPE: |
860 | 870 | ||
861 | DUMP_printk("IOMMU: device: %02x:%02x.%01x cap: %04x " | 871 | DUMP_printk("device: %02x:%02x.%01x cap: %04x " |
862 | "seg: %d flags: %01x info %04x\n", | 872 | "seg: %d flags: %01x info %04x\n", |
863 | PCI_BUS(h->devid), PCI_SLOT(h->devid), | 873 | PCI_BUS(h->devid), PCI_SLOT(h->devid), |
864 | PCI_FUNC(h->devid), h->cap_ptr, | 874 | PCI_FUNC(h->devid), h->cap_ptr, |
@@ -902,7 +912,7 @@ static int __init iommu_setup_msi(struct amd_iommu *iommu) | |||
902 | 912 | ||
903 | r = request_irq(iommu->dev->irq, amd_iommu_int_handler, | 913 | r = request_irq(iommu->dev->irq, amd_iommu_int_handler, |
904 | IRQF_SAMPLE_RANDOM, | 914 | IRQF_SAMPLE_RANDOM, |
905 | "AMD IOMMU", | 915 | "AMD-Vi", |
906 | NULL); | 916 | NULL); |
907 | 917 | ||
908 | if (r) { | 918 | if (r) { |
@@ -1150,7 +1160,7 @@ int __init amd_iommu_init(void) | |||
1150 | 1160 | ||
1151 | 1161 | ||
1152 | if (no_iommu) { | 1162 | if (no_iommu) { |
1153 | printk(KERN_INFO "AMD IOMMU disabled by kernel command line\n"); | 1163 | printk(KERN_INFO "AMD-Vi disabled by kernel command line\n"); |
1154 | return 0; | 1164 | return 0; |
1155 | } | 1165 | } |
1156 | 1166 | ||
@@ -1242,22 +1252,28 @@ int __init amd_iommu_init(void) | |||
1242 | if (ret) | 1252 | if (ret) |
1243 | goto free; | 1253 | goto free; |
1244 | 1254 | ||
1245 | ret = amd_iommu_init_dma_ops(); | 1255 | if (iommu_pass_through) |
1256 | ret = amd_iommu_init_passthrough(); | ||
1257 | else | ||
1258 | ret = amd_iommu_init_dma_ops(); | ||
1246 | if (ret) | 1259 | if (ret) |
1247 | goto free; | 1260 | goto free; |
1248 | 1261 | ||
1249 | enable_iommus(); | 1262 | enable_iommus(); |
1250 | 1263 | ||
1251 | printk(KERN_INFO "AMD IOMMU: device isolation "); | 1264 | if (iommu_pass_through) |
1265 | goto out; | ||
1266 | |||
1267 | printk(KERN_INFO "AMD-Vi: device isolation "); | ||
1252 | if (amd_iommu_isolate) | 1268 | if (amd_iommu_isolate) |
1253 | printk("enabled\n"); | 1269 | printk("enabled\n"); |
1254 | else | 1270 | else |
1255 | printk("disabled\n"); | 1271 | printk("disabled\n"); |
1256 | 1272 | ||
1257 | if (amd_iommu_unmap_flush) | 1273 | if (amd_iommu_unmap_flush) |
1258 | printk(KERN_INFO "AMD IOMMU: IO/TLB flush on unmap enabled\n"); | 1274 | printk(KERN_INFO "AMD-Vi: IO/TLB flush on unmap enabled\n"); |
1259 | else | 1275 | else |
1260 | printk(KERN_INFO "AMD IOMMU: Lazy IO/TLB flushing enabled\n"); | 1276 | printk(KERN_INFO "AMD-Vi: Lazy IO/TLB flushing enabled\n"); |
1261 | 1277 | ||
1262 | out: | 1278 | out: |
1263 | return ret; | 1279 | return ret; |
diff --git a/arch/x86/kernel/aperture_64.c b/arch/x86/kernel/aperture_64.c index 676debfc1702..128111d8ffe0 100644 --- a/arch/x86/kernel/aperture_64.c +++ b/arch/x86/kernel/aperture_64.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/bitops.h> | 20 | #include <linux/bitops.h> |
21 | #include <linux/ioport.h> | 21 | #include <linux/ioport.h> |
22 | #include <linux/suspend.h> | 22 | #include <linux/suspend.h> |
23 | #include <linux/kmemleak.h> | ||
23 | #include <asm/e820.h> | 24 | #include <asm/e820.h> |
24 | #include <asm/io.h> | 25 | #include <asm/io.h> |
25 | #include <asm/iommu.h> | 26 | #include <asm/iommu.h> |
@@ -94,6 +95,11 @@ static u32 __init allocate_aperture(void) | |||
94 | * code for safe | 95 | * code for safe |
95 | */ | 96 | */ |
96 | p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20); | 97 | p = __alloc_bootmem_nopanic(aper_size, aper_size, 512ULL<<20); |
98 | /* | ||
99 | * Kmemleak should not scan this block as it may not be mapped via the | ||
100 | * kernel direct mapping. | ||
101 | */ | ||
102 | kmemleak_ignore(p); | ||
97 | if (!p || __pa(p)+aper_size > 0xffffffff) { | 103 | if (!p || __pa(p)+aper_size > 0xffffffff) { |
98 | printk(KERN_ERR | 104 | printk(KERN_ERR |
99 | "Cannot allocate aperture memory hole (%p,%uK)\n", | 105 | "Cannot allocate aperture memory hole (%p,%uK)\n", |
diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 0a1c2830ec66..a34601f52987 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c | |||
@@ -36,6 +36,7 @@ | |||
36 | #include <linux/mm.h> | 36 | #include <linux/mm.h> |
37 | 37 | ||
38 | #include <asm/perf_counter.h> | 38 | #include <asm/perf_counter.h> |
39 | #include <asm/x86_init.h> | ||
39 | #include <asm/pgalloc.h> | 40 | #include <asm/pgalloc.h> |
40 | #include <asm/atomic.h> | 41 | #include <asm/atomic.h> |
41 | #include <asm/mpspec.h> | 42 | #include <asm/mpspec.h> |
@@ -49,6 +50,7 @@ | |||
49 | #include <asm/mtrr.h> | 50 | #include <asm/mtrr.h> |
50 | #include <asm/smp.h> | 51 | #include <asm/smp.h> |
51 | #include <asm/mce.h> | 52 | #include <asm/mce.h> |
53 | #include <asm/kvm_para.h> | ||
52 | 54 | ||
53 | unsigned int num_processors; | 55 | unsigned int num_processors; |
54 | 56 | ||
@@ -1361,52 +1363,80 @@ void enable_x2apic(void) | |||
1361 | } | 1363 | } |
1362 | #endif /* CONFIG_X86_X2APIC */ | 1364 | #endif /* CONFIG_X86_X2APIC */ |
1363 | 1365 | ||
1364 | void __init enable_IR_x2apic(void) | 1366 | int __init enable_IR(void) |
1365 | { | 1367 | { |
1366 | #ifdef CONFIG_INTR_REMAP | 1368 | #ifdef CONFIG_INTR_REMAP |
1367 | int ret; | ||
1368 | unsigned long flags; | ||
1369 | struct IO_APIC_route_entry **ioapic_entries = NULL; | ||
1370 | |||
1371 | ret = dmar_table_init(); | ||
1372 | if (ret) { | ||
1373 | pr_debug("dmar_table_init() failed with %d:\n", ret); | ||
1374 | goto ir_failed; | ||
1375 | } | ||
1376 | |||
1377 | if (!intr_remapping_supported()) { | 1369 | if (!intr_remapping_supported()) { |
1378 | pr_debug("intr-remapping not supported\n"); | 1370 | pr_debug("intr-remapping not supported\n"); |
1379 | goto ir_failed; | 1371 | return 0; |
1380 | } | 1372 | } |
1381 | 1373 | ||
1382 | |||
1383 | if (!x2apic_preenabled && skip_ioapic_setup) { | 1374 | if (!x2apic_preenabled && skip_ioapic_setup) { |
1384 | pr_info("Skipped enabling intr-remap because of skipping " | 1375 | pr_info("Skipped enabling intr-remap because of skipping " |
1385 | "io-apic setup\n"); | 1376 | "io-apic setup\n"); |
1386 | return; | 1377 | return 0; |
1387 | } | 1378 | } |
1388 | 1379 | ||
1380 | if (enable_intr_remapping(x2apic_supported())) | ||
1381 | return 0; | ||
1382 | |||
1383 | pr_info("Enabled Interrupt-remapping\n"); | ||
1384 | |||
1385 | return 1; | ||
1386 | |||
1387 | #endif | ||
1388 | return 0; | ||
1389 | } | ||
1390 | |||
1391 | void __init enable_IR_x2apic(void) | ||
1392 | { | ||
1393 | unsigned long flags; | ||
1394 | struct IO_APIC_route_entry **ioapic_entries = NULL; | ||
1395 | int ret, x2apic_enabled = 0; | ||
1396 | int dmar_table_init_ret = 0; | ||
1397 | |||
1398 | #ifdef CONFIG_INTR_REMAP | ||
1399 | dmar_table_init_ret = dmar_table_init(); | ||
1400 | if (dmar_table_init_ret) | ||
1401 | pr_debug("dmar_table_init() failed with %d:\n", | ||
1402 | dmar_table_init_ret); | ||
1403 | #endif | ||
1404 | |||
1389 | ioapic_entries = alloc_ioapic_entries(); | 1405 | ioapic_entries = alloc_ioapic_entries(); |
1390 | if (!ioapic_entries) { | 1406 | if (!ioapic_entries) { |
1391 | pr_info("Allocate ioapic_entries failed: %d\n", ret); | 1407 | pr_err("Allocate ioapic_entries failed\n"); |
1392 | goto end; | 1408 | goto out; |
1393 | } | 1409 | } |
1394 | 1410 | ||
1395 | ret = save_IO_APIC_setup(ioapic_entries); | 1411 | ret = save_IO_APIC_setup(ioapic_entries); |
1396 | if (ret) { | 1412 | if (ret) { |
1397 | pr_info("Saving IO-APIC state failed: %d\n", ret); | 1413 | pr_info("Saving IO-APIC state failed: %d\n", ret); |
1398 | goto end; | 1414 | goto out; |
1399 | } | 1415 | } |
1400 | 1416 | ||
1401 | local_irq_save(flags); | 1417 | local_irq_save(flags); |
1402 | mask_IO_APIC_setup(ioapic_entries); | ||
1403 | mask_8259A(); | 1418 | mask_8259A(); |
1419 | mask_IO_APIC_setup(ioapic_entries); | ||
1404 | 1420 | ||
1405 | ret = enable_intr_remapping(x2apic_supported()); | 1421 | if (dmar_table_init_ret) |
1406 | if (ret) | 1422 | ret = 0; |
1407 | goto end_restore; | 1423 | else |
1424 | ret = enable_IR(); | ||
1408 | 1425 | ||
1409 | pr_info("Enabled Interrupt-remapping\n"); | 1426 | if (!ret) { |
1427 | /* IR is required if there is APIC ID > 255 even when running | ||
1428 | * under KVM | ||
1429 | */ | ||
1430 | if (max_physical_apicid > 255 || !kvm_para_available()) | ||
1431 | goto nox2apic; | ||
1432 | /* | ||
1433 | * without IR all CPUs can be addressed by IOAPIC/MSI | ||
1434 | * only in physical mode | ||
1435 | */ | ||
1436 | x2apic_force_phys(); | ||
1437 | } | ||
1438 | |||
1439 | x2apic_enabled = 1; | ||
1410 | 1440 | ||
1411 | if (x2apic_supported() && !x2apic_mode) { | 1441 | if (x2apic_supported() && !x2apic_mode) { |
1412 | x2apic_mode = 1; | 1442 | x2apic_mode = 1; |
@@ -1414,41 +1444,25 @@ void __init enable_IR_x2apic(void) | |||
1414 | pr_info("Enabled x2apic\n"); | 1444 | pr_info("Enabled x2apic\n"); |
1415 | } | 1445 | } |
1416 | 1446 | ||
1417 | end_restore: | 1447 | nox2apic: |
1418 | if (ret) | 1448 | if (!ret) /* IR enabling failed */ |
1419 | /* | ||
1420 | * IR enabling failed | ||
1421 | */ | ||
1422 | restore_IO_APIC_setup(ioapic_entries); | 1449 | restore_IO_APIC_setup(ioapic_entries); |
1423 | |||
1424 | unmask_8259A(); | 1450 | unmask_8259A(); |
1425 | local_irq_restore(flags); | 1451 | local_irq_restore(flags); |
1426 | 1452 | ||
1427 | end: | 1453 | out: |
1428 | if (ioapic_entries) | 1454 | if (ioapic_entries) |
1429 | free_ioapic_entries(ioapic_entries); | 1455 | free_ioapic_entries(ioapic_entries); |
1430 | 1456 | ||
1431 | if (!ret) | 1457 | if (x2apic_enabled) |
1432 | return; | 1458 | return; |
1433 | 1459 | ||
1434 | ir_failed: | ||
1435 | if (x2apic_preenabled) | 1460 | if (x2apic_preenabled) |
1436 | panic("x2apic enabled by bios. But IR enabling failed"); | 1461 | panic("x2apic: enabled by BIOS but kernel init failed."); |
1437 | else if (cpu_has_x2apic) | 1462 | else if (cpu_has_x2apic) |
1438 | pr_info("Not enabling x2apic,Intr-remapping\n"); | 1463 | pr_info("Not enabling x2apic, Intr-remapping init failed.\n"); |
1439 | #else | ||
1440 | if (!cpu_has_x2apic) | ||
1441 | return; | ||
1442 | |||
1443 | if (x2apic_preenabled) | ||
1444 | panic("x2apic enabled prior OS handover," | ||
1445 | " enable CONFIG_X86_X2APIC, CONFIG_INTR_REMAP"); | ||
1446 | #endif | ||
1447 | |||
1448 | return; | ||
1449 | } | 1464 | } |
1450 | 1465 | ||
1451 | |||
1452 | #ifdef CONFIG_X86_64 | 1466 | #ifdef CONFIG_X86_64 |
1453 | /* | 1467 | /* |
1454 | * Detect and enable local APICs on non-SMP boards. | 1468 | * Detect and enable local APICs on non-SMP boards. |
@@ -1549,8 +1563,6 @@ no_apic: | |||
1549 | #ifdef CONFIG_X86_64 | 1563 | #ifdef CONFIG_X86_64 |
1550 | void __init early_init_lapic_mapping(void) | 1564 | void __init early_init_lapic_mapping(void) |
1551 | { | 1565 | { |
1552 | unsigned long phys_addr; | ||
1553 | |||
1554 | /* | 1566 | /* |
1555 | * If no local APIC can be found then go out | 1567 | * If no local APIC can be found then go out |
1556 | * : it means there is no mpatable and MADT | 1568 | * : it means there is no mpatable and MADT |
@@ -1558,11 +1570,9 @@ void __init early_init_lapic_mapping(void) | |||
1558 | if (!smp_found_config) | 1570 | if (!smp_found_config) |
1559 | return; | 1571 | return; |
1560 | 1572 | ||
1561 | phys_addr = mp_lapic_addr; | 1573 | set_fixmap_nocache(FIX_APIC_BASE, mp_lapic_addr); |
1562 | |||
1563 | set_fixmap_nocache(FIX_APIC_BASE, phys_addr); | ||
1564 | apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", | 1574 | apic_printk(APIC_VERBOSE, "mapped APIC to %16lx (%16lx)\n", |
1565 | APIC_BASE, phys_addr); | 1575 | APIC_BASE, mp_lapic_addr); |
1566 | 1576 | ||
1567 | /* | 1577 | /* |
1568 | * Fetch the APIC ID of the BSP in case we have a | 1578 | * Fetch the APIC ID of the BSP in case we have a |
@@ -1651,7 +1661,6 @@ int __init APIC_init_uniprocessor(void) | |||
1651 | APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { | 1661 | APIC_INTEGRATED(apic_version[boot_cpu_physical_apicid])) { |
1652 | pr_err("BIOS bug, local APIC 0x%x not detected!...\n", | 1662 | pr_err("BIOS bug, local APIC 0x%x not detected!...\n", |
1653 | boot_cpu_physical_apicid); | 1663 | boot_cpu_physical_apicid); |
1654 | clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); | ||
1655 | return -1; | 1664 | return -1; |
1656 | } | 1665 | } |
1657 | #endif | 1666 | #endif |
@@ -1701,7 +1710,7 @@ int __init APIC_init_uniprocessor(void) | |||
1701 | localise_nmi_watchdog(); | 1710 | localise_nmi_watchdog(); |
1702 | #endif | 1711 | #endif |
1703 | 1712 | ||
1704 | setup_boot_clock(); | 1713 | x86_init.timers.setup_percpu_clockev(); |
1705 | #ifdef CONFIG_X86_64 | 1714 | #ifdef CONFIG_X86_64 |
1706 | check_nmi_watchdog(); | 1715 | check_nmi_watchdog(); |
1707 | #endif | 1716 | #endif |
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c index 676cdac385c0..77a06413b6b2 100644 --- a/arch/x86/kernel/apic/bigsmp_32.c +++ b/arch/x86/kernel/apic/bigsmp_32.c | |||
@@ -112,7 +112,7 @@ static physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map) | |||
112 | return physids_promote(0xFFL); | 112 | return physids_promote(0xFFL); |
113 | } | 113 | } |
114 | 114 | ||
115 | static int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid) | 115 | static int bigsmp_check_phys_apicid_present(int phys_apicid) |
116 | { | 116 | { |
117 | return 1; | 117 | return 1; |
118 | } | 118 | } |
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c index 8952a5890281..89174f847b49 100644 --- a/arch/x86/kernel/apic/es7000_32.c +++ b/arch/x86/kernel/apic/es7000_32.c | |||
@@ -167,7 +167,7 @@ static int es7000_apic_is_cluster(void) | |||
167 | { | 167 | { |
168 | /* MPENTIUMIII */ | 168 | /* MPENTIUMIII */ |
169 | if (boot_cpu_data.x86 == 6 && | 169 | if (boot_cpu_data.x86 == 6 && |
170 | (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) | 170 | (boot_cpu_data.x86_model >= 7 && boot_cpu_data.x86_model <= 11)) |
171 | return 1; | 171 | return 1; |
172 | 172 | ||
173 | return 0; | 173 | return 0; |
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index a8c0232b3893..809e1cf86d6b 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c | |||
@@ -66,6 +66,8 @@ | |||
66 | #include <asm/apic.h> | 66 | #include <asm/apic.h> |
67 | 67 | ||
68 | #define __apicdebuginit(type) static type __init | 68 | #define __apicdebuginit(type) static type __init |
69 | #define for_each_irq_pin(entry, head) \ | ||
70 | for (entry = head; entry; entry = entry->next) | ||
69 | 71 | ||
70 | /* | 72 | /* |
71 | * Is the SiS APIC rmw bug present ? | 73 | * Is the SiS APIC rmw bug present ? |
@@ -94,6 +96,11 @@ struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES]; | |||
94 | /* # of MP IRQ source entries */ | 96 | /* # of MP IRQ source entries */ |
95 | int mp_irq_entries; | 97 | int mp_irq_entries; |
96 | 98 | ||
99 | /* Number of legacy interrupts */ | ||
100 | static int nr_legacy_irqs __read_mostly = NR_IRQS_LEGACY; | ||
101 | /* GSI interrupts */ | ||
102 | static int nr_irqs_gsi = NR_IRQS_LEGACY; | ||
103 | |||
97 | #if defined (CONFIG_MCA) || defined (CONFIG_EISA) | 104 | #if defined (CONFIG_MCA) || defined (CONFIG_EISA) |
98 | int mp_bus_id_to_type[MAX_MP_BUSSES]; | 105 | int mp_bus_id_to_type[MAX_MP_BUSSES]; |
99 | #endif | 106 | #endif |
@@ -119,15 +126,6 @@ static int __init parse_noapic(char *str) | |||
119 | } | 126 | } |
120 | early_param("noapic", parse_noapic); | 127 | early_param("noapic", parse_noapic); |
121 | 128 | ||
122 | struct irq_pin_list; | ||
123 | |||
124 | /* | ||
125 | * This is performance-critical, we want to do it O(1) | ||
126 | * | ||
127 | * the indexing order of this array favors 1:1 mappings | ||
128 | * between pins and IRQs. | ||
129 | */ | ||
130 | |||
131 | struct irq_pin_list { | 129 | struct irq_pin_list { |
132 | int apic, pin; | 130 | int apic, pin; |
133 | struct irq_pin_list *next; | 131 | struct irq_pin_list *next; |
@@ -142,6 +140,11 @@ static struct irq_pin_list *get_one_free_irq_2_pin(int node) | |||
142 | return pin; | 140 | return pin; |
143 | } | 141 | } |
144 | 142 | ||
143 | /* | ||
144 | * This is performance-critical, we want to do it O(1) | ||
145 | * | ||
146 | * Most irqs are mapped 1:1 with pins. | ||
147 | */ | ||
145 | struct irq_cfg { | 148 | struct irq_cfg { |
146 | struct irq_pin_list *irq_2_pin; | 149 | struct irq_pin_list *irq_2_pin; |
147 | cpumask_var_t domain; | 150 | cpumask_var_t domain; |
@@ -175,6 +178,12 @@ static struct irq_cfg irq_cfgx[NR_IRQS] = { | |||
175 | [15] = { .vector = IRQ15_VECTOR, }, | 178 | [15] = { .vector = IRQ15_VECTOR, }, |
176 | }; | 179 | }; |
177 | 180 | ||
181 | void __init io_apic_disable_legacy(void) | ||
182 | { | ||
183 | nr_legacy_irqs = 0; | ||
184 | nr_irqs_gsi = 0; | ||
185 | } | ||
186 | |||
178 | int __init arch_early_irq_init(void) | 187 | int __init arch_early_irq_init(void) |
179 | { | 188 | { |
180 | struct irq_cfg *cfg; | 189 | struct irq_cfg *cfg; |
@@ -192,7 +201,7 @@ int __init arch_early_irq_init(void) | |||
192 | desc->chip_data = &cfg[i]; | 201 | desc->chip_data = &cfg[i]; |
193 | zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); | 202 | zalloc_cpumask_var_node(&cfg[i].domain, GFP_NOWAIT, node); |
194 | zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node); | 203 | zalloc_cpumask_var_node(&cfg[i].old_domain, GFP_NOWAIT, node); |
195 | if (i < NR_IRQS_LEGACY) | 204 | if (i < nr_legacy_irqs) |
196 | cpumask_setall(cfg[i].domain); | 205 | cpumask_setall(cfg[i].domain); |
197 | } | 206 | } |
198 | 207 | ||
@@ -417,13 +426,10 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) | |||
417 | unsigned long flags; | 426 | unsigned long flags; |
418 | 427 | ||
419 | spin_lock_irqsave(&ioapic_lock, flags); | 428 | spin_lock_irqsave(&ioapic_lock, flags); |
420 | entry = cfg->irq_2_pin; | 429 | for_each_irq_pin(entry, cfg->irq_2_pin) { |
421 | for (;;) { | ||
422 | unsigned int reg; | 430 | unsigned int reg; |
423 | int pin; | 431 | int pin; |
424 | 432 | ||
425 | if (!entry) | ||
426 | break; | ||
427 | pin = entry->pin; | 433 | pin = entry->pin; |
428 | reg = io_apic_read(entry->apic, 0x10 + pin*2); | 434 | reg = io_apic_read(entry->apic, 0x10 + pin*2); |
429 | /* Is the remote IRR bit set? */ | 435 | /* Is the remote IRR bit set? */ |
@@ -431,9 +437,6 @@ static bool io_apic_level_ack_pending(struct irq_cfg *cfg) | |||
431 | spin_unlock_irqrestore(&ioapic_lock, flags); | 437 | spin_unlock_irqrestore(&ioapic_lock, flags); |
432 | return true; | 438 | return true; |
433 | } | 439 | } |
434 | if (!entry->next) | ||
435 | break; | ||
436 | entry = entry->next; | ||
437 | } | 440 | } |
438 | spin_unlock_irqrestore(&ioapic_lock, flags); | 441 | spin_unlock_irqrestore(&ioapic_lock, flags); |
439 | 442 | ||
@@ -501,72 +504,68 @@ static void ioapic_mask_entry(int apic, int pin) | |||
501 | * shared ISA-space IRQs, so we have to support them. We are super | 504 | * shared ISA-space IRQs, so we have to support them. We are super |
502 | * fast in the common case, and fast for shared ISA-space IRQs. | 505 | * fast in the common case, and fast for shared ISA-space IRQs. |
503 | */ | 506 | */ |
504 | static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) | 507 | static int |
508 | add_pin_to_irq_node_nopanic(struct irq_cfg *cfg, int node, int apic, int pin) | ||
505 | { | 509 | { |
506 | struct irq_pin_list *entry; | 510 | struct irq_pin_list **last, *entry; |
507 | |||
508 | entry = cfg->irq_2_pin; | ||
509 | if (!entry) { | ||
510 | entry = get_one_free_irq_2_pin(node); | ||
511 | if (!entry) { | ||
512 | printk(KERN_ERR "can not alloc irq_2_pin to add %d - %d\n", | ||
513 | apic, pin); | ||
514 | return; | ||
515 | } | ||
516 | cfg->irq_2_pin = entry; | ||
517 | entry->apic = apic; | ||
518 | entry->pin = pin; | ||
519 | return; | ||
520 | } | ||
521 | 511 | ||
522 | while (entry->next) { | 512 | /* don't allow duplicates */ |
523 | /* not again, please */ | 513 | last = &cfg->irq_2_pin; |
514 | for_each_irq_pin(entry, cfg->irq_2_pin) { | ||
524 | if (entry->apic == apic && entry->pin == pin) | 515 | if (entry->apic == apic && entry->pin == pin) |
525 | return; | 516 | return 0; |
526 | 517 | last = &entry->next; | |
527 | entry = entry->next; | ||
528 | } | 518 | } |
529 | 519 | ||
530 | entry->next = get_one_free_irq_2_pin(node); | 520 | entry = get_one_free_irq_2_pin(node); |
531 | entry = entry->next; | 521 | if (!entry) { |
522 | printk(KERN_ERR "can not alloc irq_pin_list (%d,%d,%d)\n", | ||
523 | node, apic, pin); | ||
524 | return -ENOMEM; | ||
525 | } | ||
532 | entry->apic = apic; | 526 | entry->apic = apic; |
533 | entry->pin = pin; | 527 | entry->pin = pin; |
528 | |||
529 | *last = entry; | ||
530 | return 0; | ||
531 | } | ||
532 | |||
533 | static void add_pin_to_irq_node(struct irq_cfg *cfg, int node, int apic, int pin) | ||
534 | { | ||
535 | if (add_pin_to_irq_node_nopanic(cfg, node, apic, pin)) | ||
536 | panic("IO-APIC: failed to add irq-pin. Can not proceed\n"); | ||
534 | } | 537 | } |
535 | 538 | ||
536 | /* | 539 | /* |
537 | * Reroute an IRQ to a different pin. | 540 | * Reroute an IRQ to a different pin. |
538 | */ | 541 | */ |
539 | static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, | 542 | static void __init replace_pin_at_irq_node(struct irq_cfg *cfg, int node, |
540 | int oldapic, int oldpin, | 543 | int oldapic, int oldpin, |
541 | int newapic, int newpin) | 544 | int newapic, int newpin) |
542 | { | 545 | { |
543 | struct irq_pin_list *entry = cfg->irq_2_pin; | 546 | struct irq_pin_list *entry; |
544 | int replaced = 0; | ||
545 | 547 | ||
546 | while (entry) { | 548 | for_each_irq_pin(entry, cfg->irq_2_pin) { |
547 | if (entry->apic == oldapic && entry->pin == oldpin) { | 549 | if (entry->apic == oldapic && entry->pin == oldpin) { |
548 | entry->apic = newapic; | 550 | entry->apic = newapic; |
549 | entry->pin = newpin; | 551 | entry->pin = newpin; |
550 | replaced = 1; | ||
551 | /* every one is different, right? */ | 552 | /* every one is different, right? */ |
552 | break; | 553 | return; |
553 | } | 554 | } |
554 | entry = entry->next; | ||
555 | } | 555 | } |
556 | 556 | ||
557 | /* why? call replace before add? */ | 557 | /* old apic/pin didn't exist, so just add new ones */ |
558 | if (!replaced) | 558 | add_pin_to_irq_node(cfg, node, newapic, newpin); |
559 | add_pin_to_irq_node(cfg, node, newapic, newpin); | ||
560 | } | 559 | } |
561 | 560 | ||
562 | static inline void io_apic_modify_irq(struct irq_cfg *cfg, | 561 | static void io_apic_modify_irq(struct irq_cfg *cfg, |
563 | int mask_and, int mask_or, | 562 | int mask_and, int mask_or, |
564 | void (*final)(struct irq_pin_list *entry)) | 563 | void (*final)(struct irq_pin_list *entry)) |
565 | { | 564 | { |
566 | int pin; | 565 | int pin; |
567 | struct irq_pin_list *entry; | 566 | struct irq_pin_list *entry; |
568 | 567 | ||
569 | for (entry = cfg->irq_2_pin; entry != NULL; entry = entry->next) { | 568 | for_each_irq_pin(entry, cfg->irq_2_pin) { |
570 | unsigned int reg; | 569 | unsigned int reg; |
571 | pin = entry->pin; | 570 | pin = entry->pin; |
572 | reg = io_apic_read(entry->apic, 0x10 + pin * 2); | 571 | reg = io_apic_read(entry->apic, 0x10 + pin * 2); |
@@ -583,7 +582,6 @@ static void __unmask_IO_APIC_irq(struct irq_cfg *cfg) | |||
583 | io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); | 582 | io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, 0, NULL); |
584 | } | 583 | } |
585 | 584 | ||
586 | #ifdef CONFIG_X86_64 | ||
587 | static void io_apic_sync(struct irq_pin_list *entry) | 585 | static void io_apic_sync(struct irq_pin_list *entry) |
588 | { | 586 | { |
589 | /* | 587 | /* |
@@ -599,11 +597,6 @@ static void __mask_IO_APIC_irq(struct irq_cfg *cfg) | |||
599 | { | 597 | { |
600 | io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); | 598 | io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, &io_apic_sync); |
601 | } | 599 | } |
602 | #else /* CONFIG_X86_32 */ | ||
603 | static void __mask_IO_APIC_irq(struct irq_cfg *cfg) | ||
604 | { | ||
605 | io_apic_modify_irq(cfg, ~0, IO_APIC_REDIR_MASKED, NULL); | ||
606 | } | ||
607 | 600 | ||
608 | static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) | 601 | static void __mask_and_edge_IO_APIC_irq(struct irq_cfg *cfg) |
609 | { | 602 | { |
@@ -616,7 +609,6 @@ static void __unmask_and_level_IO_APIC_irq(struct irq_cfg *cfg) | |||
616 | io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, | 609 | io_apic_modify_irq(cfg, ~IO_APIC_REDIR_MASKED, |
617 | IO_APIC_REDIR_LEVEL_TRIGGER, NULL); | 610 | IO_APIC_REDIR_LEVEL_TRIGGER, NULL); |
618 | } | 611 | } |
619 | #endif /* CONFIG_X86_32 */ | ||
620 | 612 | ||
621 | static void mask_IO_APIC_irq_desc(struct irq_desc *desc) | 613 | static void mask_IO_APIC_irq_desc(struct irq_desc *desc) |
622 | { | 614 | { |
@@ -886,7 +878,7 @@ static int __init find_isa_irq_apic(int irq, int type) | |||
886 | */ | 878 | */ |
887 | static int EISA_ELCR(unsigned int irq) | 879 | static int EISA_ELCR(unsigned int irq) |
888 | { | 880 | { |
889 | if (irq < NR_IRQS_LEGACY) { | 881 | if (irq < nr_legacy_irqs) { |
890 | unsigned int port = 0x4d0 + (irq >> 3); | 882 | unsigned int port = 0x4d0 + (irq >> 3); |
891 | return (inb(port) >> (irq & 7)) & 1; | 883 | return (inb(port) >> (irq & 7)) & 1; |
892 | } | 884 | } |
@@ -1483,7 +1475,7 @@ static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq | |||
1483 | } | 1475 | } |
1484 | 1476 | ||
1485 | ioapic_register_intr(irq, desc, trigger); | 1477 | ioapic_register_intr(irq, desc, trigger); |
1486 | if (irq < NR_IRQS_LEGACY) | 1478 | if (irq < nr_legacy_irqs) |
1487 | disable_8259A_irq(irq); | 1479 | disable_8259A_irq(irq); |
1488 | 1480 | ||
1489 | ioapic_write_entry(apic_id, pin, entry); | 1481 | ioapic_write_entry(apic_id, pin, entry); |
@@ -1705,12 +1697,8 @@ __apicdebuginit(void) print_IO_APIC(void) | |||
1705 | if (!entry) | 1697 | if (!entry) |
1706 | continue; | 1698 | continue; |
1707 | printk(KERN_DEBUG "IRQ%d ", irq); | 1699 | printk(KERN_DEBUG "IRQ%d ", irq); |
1708 | for (;;) { | 1700 | for_each_irq_pin(entry, cfg->irq_2_pin) |
1709 | printk("-> %d:%d", entry->apic, entry->pin); | 1701 | printk("-> %d:%d", entry->apic, entry->pin); |
1710 | if (!entry->next) | ||
1711 | break; | ||
1712 | entry = entry->next; | ||
1713 | } | ||
1714 | printk("\n"); | 1702 | printk("\n"); |
1715 | } | 1703 | } |
1716 | 1704 | ||
@@ -1854,7 +1842,7 @@ __apicdebuginit(void) print_PIC(void) | |||
1854 | unsigned int v; | 1842 | unsigned int v; |
1855 | unsigned long flags; | 1843 | unsigned long flags; |
1856 | 1844 | ||
1857 | if (apic_verbosity == APIC_QUIET) | 1845 | if (apic_verbosity == APIC_QUIET || !nr_legacy_irqs) |
1858 | return; | 1846 | return; |
1859 | 1847 | ||
1860 | printk(KERN_DEBUG "\nprinting PIC contents\n"); | 1848 | printk(KERN_DEBUG "\nprinting PIC contents\n"); |
@@ -1917,6 +1905,10 @@ void __init enable_IO_APIC(void) | |||
1917 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1905 | spin_unlock_irqrestore(&ioapic_lock, flags); |
1918 | nr_ioapic_registers[apic] = reg_01.bits.entries+1; | 1906 | nr_ioapic_registers[apic] = reg_01.bits.entries+1; |
1919 | } | 1907 | } |
1908 | |||
1909 | if (!nr_legacy_irqs) | ||
1910 | return; | ||
1911 | |||
1920 | for(apic = 0; apic < nr_ioapics; apic++) { | 1912 | for(apic = 0; apic < nr_ioapics; apic++) { |
1921 | int pin; | 1913 | int pin; |
1922 | /* See if any of the pins is in ExtINT mode */ | 1914 | /* See if any of the pins is in ExtINT mode */ |
@@ -1971,6 +1963,9 @@ void disable_IO_APIC(void) | |||
1971 | */ | 1963 | */ |
1972 | clear_IO_APIC(); | 1964 | clear_IO_APIC(); |
1973 | 1965 | ||
1966 | if (!nr_legacy_irqs) | ||
1967 | return; | ||
1968 | |||
1974 | /* | 1969 | /* |
1975 | * If the i8259 is routed through an IOAPIC | 1970 | * If the i8259 is routed through an IOAPIC |
1976 | * Put that IOAPIC in virtual wire mode | 1971 | * Put that IOAPIC in virtual wire mode |
@@ -2017,7 +2012,7 @@ void disable_IO_APIC(void) | |||
2017 | * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 | 2012 | * by Matt Domsch <Matt_Domsch@dell.com> Tue Dec 21 12:25:05 CST 1999 |
2018 | */ | 2013 | */ |
2019 | 2014 | ||
2020 | static void __init setup_ioapic_ids_from_mpc(void) | 2015 | void __init setup_ioapic_ids_from_mpc(void) |
2021 | { | 2016 | { |
2022 | union IO_APIC_reg_00 reg_00; | 2017 | union IO_APIC_reg_00 reg_00; |
2023 | physid_mask_t phys_id_present_map; | 2018 | physid_mask_t phys_id_present_map; |
@@ -2026,9 +2021,8 @@ static void __init setup_ioapic_ids_from_mpc(void) | |||
2026 | unsigned char old_id; | 2021 | unsigned char old_id; |
2027 | unsigned long flags; | 2022 | unsigned long flags; |
2028 | 2023 | ||
2029 | if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids()) | 2024 | if (acpi_ioapic) |
2030 | return; | 2025 | return; |
2031 | |||
2032 | /* | 2026 | /* |
2033 | * Don't check I/O APIC IDs for xAPIC systems. They have | 2027 | * Don't check I/O APIC IDs for xAPIC systems. They have |
2034 | * no meaning without the serial APIC bus. | 2028 | * no meaning without the serial APIC bus. |
@@ -2202,7 +2196,7 @@ static unsigned int startup_ioapic_irq(unsigned int irq) | |||
2202 | struct irq_cfg *cfg; | 2196 | struct irq_cfg *cfg; |
2203 | 2197 | ||
2204 | spin_lock_irqsave(&ioapic_lock, flags); | 2198 | spin_lock_irqsave(&ioapic_lock, flags); |
2205 | if (irq < NR_IRQS_LEGACY) { | 2199 | if (irq < nr_legacy_irqs) { |
2206 | disable_8259A_irq(irq); | 2200 | disable_8259A_irq(irq); |
2207 | if (i8259A_irq_pending(irq)) | 2201 | if (i8259A_irq_pending(irq)) |
2208 | was_pending = 1; | 2202 | was_pending = 1; |
@@ -2214,7 +2208,6 @@ static unsigned int startup_ioapic_irq(unsigned int irq) | |||
2214 | return was_pending; | 2208 | return was_pending; |
2215 | } | 2209 | } |
2216 | 2210 | ||
2217 | #ifdef CONFIG_X86_64 | ||
2218 | static int ioapic_retrigger_irq(unsigned int irq) | 2211 | static int ioapic_retrigger_irq(unsigned int irq) |
2219 | { | 2212 | { |
2220 | 2213 | ||
@@ -2227,14 +2220,6 @@ static int ioapic_retrigger_irq(unsigned int irq) | |||
2227 | 2220 | ||
2228 | return 1; | 2221 | return 1; |
2229 | } | 2222 | } |
2230 | #else | ||
2231 | static int ioapic_retrigger_irq(unsigned int irq) | ||
2232 | { | ||
2233 | apic->send_IPI_self(irq_cfg(irq)->vector); | ||
2234 | |||
2235 | return 1; | ||
2236 | } | ||
2237 | #endif | ||
2238 | 2223 | ||
2239 | /* | 2224 | /* |
2240 | * Level and edge triggered IO-APIC interrupts need different handling, | 2225 | * Level and edge triggered IO-APIC interrupts need different handling, |
@@ -2272,13 +2257,9 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq | |||
2272 | struct irq_pin_list *entry; | 2257 | struct irq_pin_list *entry; |
2273 | u8 vector = cfg->vector; | 2258 | u8 vector = cfg->vector; |
2274 | 2259 | ||
2275 | entry = cfg->irq_2_pin; | 2260 | for_each_irq_pin(entry, cfg->irq_2_pin) { |
2276 | for (;;) { | ||
2277 | unsigned int reg; | 2261 | unsigned int reg; |
2278 | 2262 | ||
2279 | if (!entry) | ||
2280 | break; | ||
2281 | |||
2282 | apic = entry->apic; | 2263 | apic = entry->apic; |
2283 | pin = entry->pin; | 2264 | pin = entry->pin; |
2284 | /* | 2265 | /* |
@@ -2291,9 +2272,6 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, struct irq | |||
2291 | reg &= ~IO_APIC_REDIR_VECTOR_MASK; | 2272 | reg &= ~IO_APIC_REDIR_VECTOR_MASK; |
2292 | reg |= vector; | 2273 | reg |= vector; |
2293 | io_apic_modify(apic, 0x10 + pin*2, reg); | 2274 | io_apic_modify(apic, 0x10 + pin*2, reg); |
2294 | if (!entry->next) | ||
2295 | break; | ||
2296 | entry = entry->next; | ||
2297 | } | 2275 | } |
2298 | } | 2276 | } |
2299 | 2277 | ||
@@ -2518,11 +2496,8 @@ atomic_t irq_mis_count; | |||
2518 | static void ack_apic_level(unsigned int irq) | 2496 | static void ack_apic_level(unsigned int irq) |
2519 | { | 2497 | { |
2520 | struct irq_desc *desc = irq_to_desc(irq); | 2498 | struct irq_desc *desc = irq_to_desc(irq); |
2521 | |||
2522 | #ifdef CONFIG_X86_32 | ||
2523 | unsigned long v; | 2499 | unsigned long v; |
2524 | int i; | 2500 | int i; |
2525 | #endif | ||
2526 | struct irq_cfg *cfg; | 2501 | struct irq_cfg *cfg; |
2527 | int do_unmask_irq = 0; | 2502 | int do_unmask_irq = 0; |
2528 | 2503 | ||
@@ -2535,31 +2510,28 @@ static void ack_apic_level(unsigned int irq) | |||
2535 | } | 2510 | } |
2536 | #endif | 2511 | #endif |
2537 | 2512 | ||
2538 | #ifdef CONFIG_X86_32 | ||
2539 | /* | 2513 | /* |
2540 | * It appears there is an erratum which affects at least version 0x11 | 2514 | * It appears there is an erratum which affects at least version 0x11 |
2541 | * of I/O APIC (that's the 82093AA and cores integrated into various | 2515 | * of I/O APIC (that's the 82093AA and cores integrated into various |
2542 | * chipsets). Under certain conditions a level-triggered interrupt is | 2516 | * chipsets). Under certain conditions a level-triggered interrupt is |
2543 | * erroneously delivered as edge-triggered one but the respective IRR | 2517 | * erroneously delivered as edge-triggered one but the respective IRR |
2544 | * bit gets set nevertheless. As a result the I/O unit expects an EOI | 2518 | * bit gets set nevertheless. As a result the I/O unit expects an EOI |
2545 | * message but it will never arrive and further interrupts are blocked | 2519 | * message but it will never arrive and further interrupts are blocked |
2546 | * from the source. The exact reason is so far unknown, but the | 2520 | * from the source. The exact reason is so far unknown, but the |
2547 | * phenomenon was observed when two consecutive interrupt requests | 2521 | * phenomenon was observed when two consecutive interrupt requests |
2548 | * from a given source get delivered to the same CPU and the source is | 2522 | * from a given source get delivered to the same CPU and the source is |
2549 | * temporarily disabled in between. | 2523 | * temporarily disabled in between. |
2550 | * | 2524 | * |
2551 | * A workaround is to simulate an EOI message manually. We achieve it | 2525 | * A workaround is to simulate an EOI message manually. We achieve it |
2552 | * by setting the trigger mode to edge and then to level when the edge | 2526 | * by setting the trigger mode to edge and then to level when the edge |
2553 | * trigger mode gets detected in the TMR of a local APIC for a | 2527 | * trigger mode gets detected in the TMR of a local APIC for a |
2554 | * level-triggered interrupt. We mask the source for the time of the | 2528 | * level-triggered interrupt. We mask the source for the time of the |
2555 | * operation to prevent an edge-triggered interrupt escaping meanwhile. | 2529 | * operation to prevent an edge-triggered interrupt escaping meanwhile. |
2556 | * The idea is from Manfred Spraul. --macro | 2530 | * The idea is from Manfred Spraul. --macro |
2557 | */ | 2531 | */ |
2558 | cfg = desc->chip_data; | 2532 | cfg = desc->chip_data; |
2559 | i = cfg->vector; | 2533 | i = cfg->vector; |
2560 | |||
2561 | v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); | 2534 | v = apic_read(APIC_TMR + ((i & ~0x1f) >> 1)); |
2562 | #endif | ||
2563 | 2535 | ||
2564 | /* | 2536 | /* |
2565 | * We must acknowledge the irq before we move it or the acknowledge will | 2537 | * We must acknowledge the irq before we move it or the acknowledge will |
@@ -2601,7 +2573,7 @@ static void ack_apic_level(unsigned int irq) | |||
2601 | unmask_IO_APIC_irq_desc(desc); | 2573 | unmask_IO_APIC_irq_desc(desc); |
2602 | } | 2574 | } |
2603 | 2575 | ||
2604 | #ifdef CONFIG_X86_32 | 2576 | /* Tail end of version 0x11 I/O APIC bug workaround */ |
2605 | if (!(v & (1 << (i & 0x1f)))) { | 2577 | if (!(v & (1 << (i & 0x1f)))) { |
2606 | atomic_inc(&irq_mis_count); | 2578 | atomic_inc(&irq_mis_count); |
2607 | spin_lock(&ioapic_lock); | 2579 | spin_lock(&ioapic_lock); |
@@ -2609,26 +2581,15 @@ static void ack_apic_level(unsigned int irq) | |||
2609 | __unmask_and_level_IO_APIC_irq(cfg); | 2581 | __unmask_and_level_IO_APIC_irq(cfg); |
2610 | spin_unlock(&ioapic_lock); | 2582 | spin_unlock(&ioapic_lock); |
2611 | } | 2583 | } |
2612 | #endif | ||
2613 | } | 2584 | } |
2614 | 2585 | ||
2615 | #ifdef CONFIG_INTR_REMAP | 2586 | #ifdef CONFIG_INTR_REMAP |
2616 | static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) | 2587 | static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) |
2617 | { | 2588 | { |
2618 | int apic, pin; | ||
2619 | struct irq_pin_list *entry; | 2589 | struct irq_pin_list *entry; |
2620 | 2590 | ||
2621 | entry = cfg->irq_2_pin; | 2591 | for_each_irq_pin(entry, cfg->irq_2_pin) |
2622 | for (;;) { | 2592 | io_apic_eoi(entry->apic, entry->pin); |
2623 | |||
2624 | if (!entry) | ||
2625 | break; | ||
2626 | |||
2627 | apic = entry->apic; | ||
2628 | pin = entry->pin; | ||
2629 | io_apic_eoi(apic, pin); | ||
2630 | entry = entry->next; | ||
2631 | } | ||
2632 | } | 2593 | } |
2633 | 2594 | ||
2634 | static void | 2595 | static void |
@@ -2713,7 +2674,7 @@ static inline void init_IO_APIC_traps(void) | |||
2713 | * so default to an old-fashioned 8259 | 2674 | * so default to an old-fashioned 8259 |
2714 | * interrupt if we can.. | 2675 | * interrupt if we can.. |
2715 | */ | 2676 | */ |
2716 | if (irq < NR_IRQS_LEGACY) | 2677 | if (irq < nr_legacy_irqs) |
2717 | make_8259A_irq(irq); | 2678 | make_8259A_irq(irq); |
2718 | else | 2679 | else |
2719 | /* Strange. Oh, well.. */ | 2680 | /* Strange. Oh, well.. */ |
@@ -3049,7 +3010,7 @@ out: | |||
3049 | * the I/O APIC in all cases now. No actual device should request | 3010 | * the I/O APIC in all cases now. No actual device should request |
3050 | * it anyway. --macro | 3011 | * it anyway. --macro |
3051 | */ | 3012 | */ |
3052 | #define PIC_IRQS (1 << PIC_CASCADE_IR) | 3013 | #define PIC_IRQS (1UL << PIC_CASCADE_IR) |
3053 | 3014 | ||
3054 | void __init setup_IO_APIC(void) | 3015 | void __init setup_IO_APIC(void) |
3055 | { | 3016 | { |
@@ -3057,21 +3018,19 @@ void __init setup_IO_APIC(void) | |||
3057 | /* | 3018 | /* |
3058 | * calling enable_IO_APIC() is moved to setup_local_APIC for BP | 3019 | * calling enable_IO_APIC() is moved to setup_local_APIC for BP |
3059 | */ | 3020 | */ |
3060 | 3021 | io_apic_irqs = nr_legacy_irqs ? ~PIC_IRQS : ~0UL; | |
3061 | io_apic_irqs = ~PIC_IRQS; | ||
3062 | 3022 | ||
3063 | apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); | 3023 | apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); |
3064 | /* | 3024 | /* |
3065 | * Set up IO-APIC IRQ routing. | 3025 | * Set up IO-APIC IRQ routing. |
3066 | */ | 3026 | */ |
3067 | #ifdef CONFIG_X86_32 | 3027 | x86_init.mpparse.setup_ioapic_ids(); |
3068 | if (!acpi_ioapic) | 3028 | |
3069 | setup_ioapic_ids_from_mpc(); | ||
3070 | #endif | ||
3071 | sync_Arb_IDs(); | 3029 | sync_Arb_IDs(); |
3072 | setup_IO_APIC_irqs(); | 3030 | setup_IO_APIC_irqs(); |
3073 | init_IO_APIC_traps(); | 3031 | init_IO_APIC_traps(); |
3074 | check_timer(); | 3032 | if (nr_legacy_irqs) |
3033 | check_timer(); | ||
3075 | } | 3034 | } |
3076 | 3035 | ||
3077 | /* | 3036 | /* |
@@ -3172,7 +3131,6 @@ static int __init ioapic_init_sysfs(void) | |||
3172 | 3131 | ||
3173 | device_initcall(ioapic_init_sysfs); | 3132 | device_initcall(ioapic_init_sysfs); |
3174 | 3133 | ||
3175 | static int nr_irqs_gsi = NR_IRQS_LEGACY; | ||
3176 | /* | 3134 | /* |
3177 | * Dynamic irq allocate and deallocation | 3135 | * Dynamic irq allocate and deallocation |
3178 | */ | 3136 | */ |
@@ -3244,8 +3202,7 @@ void destroy_irq(unsigned int irq) | |||
3244 | cfg = desc->chip_data; | 3202 | cfg = desc->chip_data; |
3245 | dynamic_irq_cleanup(irq); | 3203 | dynamic_irq_cleanup(irq); |
3246 | /* connect back irq_cfg */ | 3204 | /* connect back irq_cfg */ |
3247 | if (desc) | 3205 | desc->chip_data = cfg; |
3248 | desc->chip_data = cfg; | ||
3249 | 3206 | ||
3250 | free_irte(irq); | 3207 | free_irte(irq); |
3251 | spin_lock_irqsave(&vector_lock, flags); | 3208 | spin_lock_irqsave(&vector_lock, flags); |
@@ -3913,9 +3870,13 @@ static int __io_apic_set_pci_routing(struct device *dev, int irq, | |||
3913 | /* | 3870 | /* |
3914 | * IRQs < 16 are already in the irq_2_pin[] map | 3871 | * IRQs < 16 are already in the irq_2_pin[] map |
3915 | */ | 3872 | */ |
3916 | if (irq >= NR_IRQS_LEGACY) { | 3873 | if (irq >= nr_legacy_irqs) { |
3917 | cfg = desc->chip_data; | 3874 | cfg = desc->chip_data; |
3918 | add_pin_to_irq_node(cfg, node, ioapic, pin); | 3875 | if (add_pin_to_irq_node_nopanic(cfg, node, ioapic, pin)) { |
3876 | printk(KERN_INFO "can not add pin %d for irq %d\n", | ||
3877 | pin, irq); | ||
3878 | return 0; | ||
3879 | } | ||
3919 | } | 3880 | } |
3920 | 3881 | ||
3921 | setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity); | 3882 | setup_IO_APIC_irq(ioapic, pin, irq, desc, trigger, polarity); |
@@ -4127,7 +4088,7 @@ void __init setup_ioapic_dest(void) | |||
4127 | 4088 | ||
4128 | static struct resource *ioapic_resources; | 4089 | static struct resource *ioapic_resources; |
4129 | 4090 | ||
4130 | static struct resource * __init ioapic_setup_resources(void) | 4091 | static struct resource * __init ioapic_setup_resources(int nr_ioapics) |
4131 | { | 4092 | { |
4132 | unsigned long n; | 4093 | unsigned long n; |
4133 | struct resource *res; | 4094 | struct resource *res; |
@@ -4143,15 +4104,13 @@ static struct resource * __init ioapic_setup_resources(void) | |||
4143 | mem = alloc_bootmem(n); | 4104 | mem = alloc_bootmem(n); |
4144 | res = (void *)mem; | 4105 | res = (void *)mem; |
4145 | 4106 | ||
4146 | if (mem != NULL) { | 4107 | mem += sizeof(struct resource) * nr_ioapics; |
4147 | mem += sizeof(struct resource) * nr_ioapics; | ||
4148 | 4108 | ||
4149 | for (i = 0; i < nr_ioapics; i++) { | 4109 | for (i = 0; i < nr_ioapics; i++) { |
4150 | res[i].name = mem; | 4110 | res[i].name = mem; |
4151 | res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; | 4111 | res[i].flags = IORESOURCE_MEM | IORESOURCE_BUSY; |
4152 | sprintf(mem, "IOAPIC %u", i); | 4112 | sprintf(mem, "IOAPIC %u", i); |
4153 | mem += IOAPIC_RESOURCE_NAME_SIZE; | 4113 | mem += IOAPIC_RESOURCE_NAME_SIZE; |
4154 | } | ||
4155 | } | 4114 | } |
4156 | 4115 | ||
4157 | ioapic_resources = res; | 4116 | ioapic_resources = res; |
@@ -4165,7 +4124,7 @@ void __init ioapic_init_mappings(void) | |||
4165 | struct resource *ioapic_res; | 4124 | struct resource *ioapic_res; |
4166 | int i; | 4125 | int i; |
4167 | 4126 | ||
4168 | ioapic_res = ioapic_setup_resources(); | 4127 | ioapic_res = ioapic_setup_resources(nr_ioapics); |
4169 | for (i = 0; i < nr_ioapics; i++) { | 4128 | for (i = 0; i < nr_ioapics; i++) { |
4170 | if (smp_found_config) { | 4129 | if (smp_found_config) { |
4171 | ioapic_phys = mp_ioapics[i].apicaddr; | 4130 | ioapic_phys = mp_ioapics[i].apicaddr; |
@@ -4194,11 +4153,9 @@ fake_ioapic_page: | |||
4194 | __fix_to_virt(idx), ioapic_phys); | 4153 | __fix_to_virt(idx), ioapic_phys); |
4195 | idx++; | 4154 | idx++; |
4196 | 4155 | ||
4197 | if (ioapic_res != NULL) { | 4156 | ioapic_res->start = ioapic_phys; |
4198 | ioapic_res->start = ioapic_phys; | 4157 | ioapic_res->end = ioapic_phys + (4 * 1024) - 1; |
4199 | ioapic_res->end = ioapic_phys + (4 * 1024) - 1; | 4158 | ioapic_res++; |
4200 | ioapic_res++; | ||
4201 | } | ||
4202 | } | 4159 | } |
4203 | } | 4160 | } |
4204 | 4161 | ||
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c index 6ef00ba4c886..08385e090a6f 100644 --- a/arch/x86/kernel/apic/ipi.c +++ b/arch/x86/kernel/apic/ipi.c | |||
@@ -153,7 +153,7 @@ int safe_smp_processor_id(void) | |||
153 | { | 153 | { |
154 | int apicid, cpuid; | 154 | int apicid, cpuid; |
155 | 155 | ||
156 | if (!boot_cpu_has(X86_FEATURE_APIC)) | 156 | if (!cpu_has_apic) |
157 | return 0; | 157 | return 0; |
158 | 158 | ||
159 | apicid = hard_smp_processor_id(); | 159 | apicid = hard_smp_processor_id(); |
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c index b3025b43b63a..cb66a22d98ad 100644 --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c | |||
@@ -39,7 +39,7 @@ | |||
39 | int unknown_nmi_panic; | 39 | int unknown_nmi_panic; |
40 | int nmi_watchdog_enabled; | 40 | int nmi_watchdog_enabled; |
41 | 41 | ||
42 | static cpumask_var_t backtrace_mask; | 42 | static cpumask_t backtrace_mask __read_mostly; |
43 | 43 | ||
44 | /* nmi_active: | 44 | /* nmi_active: |
45 | * >0: the lapic NMI watchdog is active, but can be disabled | 45 | * >0: the lapic NMI watchdog is active, but can be disabled |
@@ -66,7 +66,7 @@ static inline unsigned int get_nmi_count(int cpu) | |||
66 | 66 | ||
67 | static inline int mce_in_progress(void) | 67 | static inline int mce_in_progress(void) |
68 | { | 68 | { |
69 | #if defined(CONFIG_X86_NEW_MCE) | 69 | #if defined(CONFIG_X86_MCE) |
70 | return atomic_read(&mce_entry) > 0; | 70 | return atomic_read(&mce_entry) > 0; |
71 | #endif | 71 | #endif |
72 | return 0; | 72 | return 0; |
@@ -138,7 +138,6 @@ int __init check_nmi_watchdog(void) | |||
138 | if (!prev_nmi_count) | 138 | if (!prev_nmi_count) |
139 | goto error; | 139 | goto error; |
140 | 140 | ||
141 | alloc_cpumask_var(&backtrace_mask, GFP_KERNEL|__GFP_ZERO); | ||
142 | printk(KERN_INFO "Testing NMI watchdog ... "); | 141 | printk(KERN_INFO "Testing NMI watchdog ... "); |
143 | 142 | ||
144 | #ifdef CONFIG_SMP | 143 | #ifdef CONFIG_SMP |
@@ -415,14 +414,17 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) | |||
415 | } | 414 | } |
416 | 415 | ||
417 | /* We can be called before check_nmi_watchdog, hence NULL check. */ | 416 | /* We can be called before check_nmi_watchdog, hence NULL check. */ |
418 | if (backtrace_mask != NULL && cpumask_test_cpu(cpu, backtrace_mask)) { | 417 | if (cpumask_test_cpu(cpu, &backtrace_mask)) { |
419 | static DEFINE_SPINLOCK(lock); /* Serialise the printks */ | 418 | static DEFINE_SPINLOCK(lock); /* Serialise the printks */ |
420 | 419 | ||
421 | spin_lock(&lock); | 420 | spin_lock(&lock); |
422 | printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); | 421 | printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu); |
422 | show_regs(regs); | ||
423 | dump_stack(); | 423 | dump_stack(); |
424 | spin_unlock(&lock); | 424 | spin_unlock(&lock); |
425 | cpumask_clear_cpu(cpu, backtrace_mask); | 425 | cpumask_clear_cpu(cpu, &backtrace_mask); |
426 | |||
427 | rc = 1; | ||
426 | } | 428 | } |
427 | 429 | ||
428 | /* Could check oops_in_progress here too, but it's safer not to */ | 430 | /* Could check oops_in_progress here too, but it's safer not to */ |
@@ -552,14 +554,18 @@ int do_nmi_callback(struct pt_regs *regs, int cpu) | |||
552 | return 0; | 554 | return 0; |
553 | } | 555 | } |
554 | 556 | ||
555 | void __trigger_all_cpu_backtrace(void) | 557 | void arch_trigger_all_cpu_backtrace(void) |
556 | { | 558 | { |
557 | int i; | 559 | int i; |
558 | 560 | ||
559 | cpumask_copy(backtrace_mask, cpu_online_mask); | 561 | cpumask_copy(&backtrace_mask, cpu_online_mask); |
562 | |||
563 | printk(KERN_INFO "sending NMI to all CPUs:\n"); | ||
564 | apic->send_IPI_all(NMI_VECTOR); | ||
565 | |||
560 | /* Wait for up to 10 seconds for all CPUs to do the backtrace */ | 566 | /* Wait for up to 10 seconds for all CPUs to do the backtrace */ |
561 | for (i = 0; i < 10 * 1000; i++) { | 567 | for (i = 0; i < 10 * 1000; i++) { |
562 | if (cpumask_empty(backtrace_mask)) | 568 | if (cpumask_empty(&backtrace_mask)) |
563 | break; | 569 | break; |
564 | mdelay(1); | 570 | mdelay(1); |
565 | } | 571 | } |
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c index ca96e68f0d23..efa00e2b8505 100644 --- a/arch/x86/kernel/apic/numaq_32.c +++ b/arch/x86/kernel/apic/numaq_32.c | |||
@@ -66,7 +66,6 @@ struct mpc_trans { | |||
66 | unsigned short trans_reserved; | 66 | unsigned short trans_reserved; |
67 | }; | 67 | }; |
68 | 68 | ||
69 | /* x86_quirks member */ | ||
70 | static int mpc_record; | 69 | static int mpc_record; |
71 | 70 | ||
72 | static struct mpc_trans *translation_table[MAX_MPC_ENTRY]; | 71 | static struct mpc_trans *translation_table[MAX_MPC_ENTRY]; |
@@ -130,10 +129,9 @@ void __cpuinit numaq_tsc_disable(void) | |||
130 | } | 129 | } |
131 | } | 130 | } |
132 | 131 | ||
133 | static int __init numaq_pre_time_init(void) | 132 | static void __init numaq_tsc_init(void) |
134 | { | 133 | { |
135 | numaq_tsc_disable(); | 134 | numaq_tsc_disable(); |
136 | return 0; | ||
137 | } | 135 | } |
138 | 136 | ||
139 | static inline int generate_logical_apicid(int quad, int phys_apicid) | 137 | static inline int generate_logical_apicid(int quad, int phys_apicid) |
@@ -177,6 +175,19 @@ static void mpc_oem_pci_bus(struct mpc_bus *m) | |||
177 | quad_local_to_mp_bus_id[quad][local] = m->busid; | 175 | quad_local_to_mp_bus_id[quad][local] = m->busid; |
178 | } | 176 | } |
179 | 177 | ||
178 | /* | ||
179 | * Called from mpparse code. | ||
180 | * mode = 0: prescan | ||
181 | * mode = 1: one mpc entry scanned | ||
182 | */ | ||
183 | static void numaq_mpc_record(unsigned int mode) | ||
184 | { | ||
185 | if (!mode) | ||
186 | mpc_record = 0; | ||
187 | else | ||
188 | mpc_record++; | ||
189 | } | ||
190 | |||
180 | static void __init MP_translation_info(struct mpc_trans *m) | 191 | static void __init MP_translation_info(struct mpc_trans *m) |
181 | { | 192 | { |
182 | printk(KERN_INFO | 193 | printk(KERN_INFO |
@@ -206,9 +217,9 @@ static int __init mpf_checksum(unsigned char *mp, int len) | |||
206 | /* | 217 | /* |
207 | * Read/parse the MPC oem tables | 218 | * Read/parse the MPC oem tables |
208 | */ | 219 | */ |
209 | static void __init | 220 | static void __init smp_read_mpc_oem(struct mpc_table *mpc) |
210 | smp_read_mpc_oem(struct mpc_oemtable *oemtable, unsigned short oemsize) | ||
211 | { | 221 | { |
222 | struct mpc_oemtable *oemtable = (void *)(long)mpc->oemptr; | ||
212 | int count = sizeof(*oemtable); /* the header size */ | 223 | int count = sizeof(*oemtable); /* the header size */ |
213 | unsigned char *oemptr = ((unsigned char *)oemtable) + count; | 224 | unsigned char *oemptr = ((unsigned char *)oemtable) + count; |
214 | 225 | ||
@@ -250,29 +261,6 @@ static void __init | |||
250 | } | 261 | } |
251 | } | 262 | } |
252 | 263 | ||
253 | static int __init numaq_setup_ioapic_ids(void) | ||
254 | { | ||
255 | /* so can skip it */ | ||
256 | return 1; | ||
257 | } | ||
258 | |||
259 | static struct x86_quirks numaq_x86_quirks __initdata = { | ||
260 | .arch_pre_time_init = numaq_pre_time_init, | ||
261 | .arch_time_init = NULL, | ||
262 | .arch_pre_intr_init = NULL, | ||
263 | .arch_memory_setup = NULL, | ||
264 | .arch_intr_init = NULL, | ||
265 | .arch_trap_init = NULL, | ||
266 | .mach_get_smp_config = NULL, | ||
267 | .mach_find_smp_config = NULL, | ||
268 | .mpc_record = &mpc_record, | ||
269 | .mpc_apic_id = mpc_apic_id, | ||
270 | .mpc_oem_bus_info = mpc_oem_bus_info, | ||
271 | .mpc_oem_pci_bus = mpc_oem_pci_bus, | ||
272 | .smp_read_mpc_oem = smp_read_mpc_oem, | ||
273 | .setup_ioapic_ids = numaq_setup_ioapic_ids, | ||
274 | }; | ||
275 | |||
276 | static __init void early_check_numaq(void) | 264 | static __init void early_check_numaq(void) |
277 | { | 265 | { |
278 | /* | 266 | /* |
@@ -286,8 +274,15 @@ static __init void early_check_numaq(void) | |||
286 | if (smp_found_config) | 274 | if (smp_found_config) |
287 | early_get_smp_config(); | 275 | early_get_smp_config(); |
288 | 276 | ||
289 | if (found_numaq) | 277 | if (found_numaq) { |
290 | x86_quirks = &numaq_x86_quirks; | 278 | x86_init.mpparse.mpc_record = numaq_mpc_record; |
279 | x86_init.mpparse.setup_ioapic_ids = x86_init_noop; | ||
280 | x86_init.mpparse.mpc_apic_id = mpc_apic_id; | ||
281 | x86_init.mpparse.smp_read_mpc_oem = smp_read_mpc_oem; | ||
282 | x86_init.mpparse.mpc_oem_pci_bus = mpc_oem_pci_bus; | ||
283 | x86_init.mpparse.mpc_oem_bus_info = mpc_oem_bus_info; | ||
284 | x86_init.timers.tsc_pre_init = numaq_tsc_init; | ||
285 | } | ||
291 | } | 286 | } |
292 | 287 | ||
293 | int __init get_memcfg_numaq(void) | 288 | int __init get_memcfg_numaq(void) |
@@ -418,7 +413,7 @@ static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid) | |||
418 | /* Where the IO area was mapped on multiquad, always 0 otherwise */ | 413 | /* Where the IO area was mapped on multiquad, always 0 otherwise */ |
419 | void *xquad_portio; | 414 | void *xquad_portio; |
420 | 415 | ||
421 | static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid) | 416 | static inline int numaq_check_phys_apicid_present(int phys_apicid) |
422 | { | 417 | { |
423 | return 1; | 418 | return 1; |
424 | } | 419 | } |
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index bc3e880f9b82..65edc180fc82 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c | |||
@@ -44,17 +44,22 @@ static struct apic *apic_probe[] __initdata = { | |||
44 | NULL, | 44 | NULL, |
45 | }; | 45 | }; |
46 | 46 | ||
47 | static int apicid_phys_pkg_id(int initial_apic_id, int index_msb) | ||
48 | { | ||
49 | return hard_smp_processor_id() >> index_msb; | ||
50 | } | ||
51 | |||
47 | /* | 52 | /* |
48 | * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. | 53 | * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. |
49 | */ | 54 | */ |
50 | void __init default_setup_apic_routing(void) | 55 | void __init default_setup_apic_routing(void) |
51 | { | 56 | { |
52 | #ifdef CONFIG_X86_X2APIC | 57 | #ifdef CONFIG_X86_X2APIC |
53 | if (x2apic_mode && (apic != &apic_x2apic_phys && | 58 | if (x2apic_mode |
54 | #ifdef CONFIG_X86_UV | 59 | #ifdef CONFIG_X86_UV |
55 | apic != &apic_x2apic_uv_x && | 60 | && apic != &apic_x2apic_uv_x |
56 | #endif | 61 | #endif |
57 | apic != &apic_x2apic_cluster)) { | 62 | ) { |
58 | if (x2apic_phys) | 63 | if (x2apic_phys) |
59 | apic = &apic_x2apic_phys; | 64 | apic = &apic_x2apic_phys; |
60 | else | 65 | else |
@@ -69,6 +74,11 @@ void __init default_setup_apic_routing(void) | |||
69 | printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); | 74 | printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); |
70 | } | 75 | } |
71 | 76 | ||
77 | if (is_vsmp_box()) { | ||
78 | /* need to update phys_pkg_id */ | ||
79 | apic->phys_pkg_id = apicid_phys_pkg_id; | ||
80 | } | ||
81 | |||
72 | /* | 82 | /* |
73 | * Now that apic routing model is selected, configure the | 83 | * Now that apic routing model is selected, configure the |
74 | * fault handling for intr remapping. | 84 | * fault handling for intr remapping. |
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c index eafdfbd1ea95..645ecc4ff0be 100644 --- a/arch/x86/kernel/apic/summit_32.c +++ b/arch/x86/kernel/apic/summit_32.c | |||
@@ -272,7 +272,7 @@ static physid_mask_t summit_apicid_to_cpu_present(int apicid) | |||
272 | return physid_mask_of_physid(0); | 272 | return physid_mask_of_physid(0); |
273 | } | 273 | } |
274 | 274 | ||
275 | static int summit_check_phys_apicid_present(int boot_cpu_physical_apicid) | 275 | static int summit_check_phys_apicid_present(int physical_apicid) |
276 | { | 276 | { |
277 | return 1; | 277 | return 1; |
278 | } | 278 | } |
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 442b5508893f..151ace69a5aa 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c | |||
@@ -403,7 +403,15 @@ static DECLARE_WAIT_QUEUE_HEAD(apm_waitqueue); | |||
403 | static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); | 403 | static DECLARE_WAIT_QUEUE_HEAD(apm_suspend_waitqueue); |
404 | static struct apm_user *user_list; | 404 | static struct apm_user *user_list; |
405 | static DEFINE_SPINLOCK(user_list_lock); | 405 | static DEFINE_SPINLOCK(user_list_lock); |
406 | static const struct desc_struct bad_bios_desc = { { { 0, 0x00409200 } } }; | 406 | |
407 | /* | ||
408 | * Set up a segment that references the real mode segment 0x40 | ||
409 | * that extends up to the end of page zero (that we have reserved). | ||
410 | * This is for buggy BIOS's that refer to (real mode) segment 0x40 | ||
411 | * even though they are called in protected mode. | ||
412 | */ | ||
413 | static struct desc_struct bad_bios_desc = GDT_ENTRY_INIT(0x4092, | ||
414 | (unsigned long)__va(0x400UL), PAGE_SIZE - 0x400 - 1); | ||
407 | 415 | ||
408 | static const char driver_version[] = "1.16ac"; /* no spaces */ | 416 | static const char driver_version[] = "1.16ac"; /* no spaces */ |
409 | 417 | ||
@@ -2332,15 +2340,6 @@ static int __init apm_init(void) | |||
2332 | pm_flags |= PM_APM; | 2340 | pm_flags |= PM_APM; |
2333 | 2341 | ||
2334 | /* | 2342 | /* |
2335 | * Set up a segment that references the real mode segment 0x40 | ||
2336 | * that extends up to the end of page zero (that we have reserved). | ||
2337 | * This is for buggy BIOS's that refer to (real mode) segment 0x40 | ||
2338 | * even though they are called in protected mode. | ||
2339 | */ | ||
2340 | set_base(bad_bios_desc, __va((unsigned long)0x40 << 4)); | ||
2341 | _set_limit((char *)&bad_bios_desc, 4095 - (0x40 << 4)); | ||
2342 | |||
2343 | /* | ||
2344 | * Set up the long jump entry point to the APM BIOS, which is called | 2343 | * Set up the long jump entry point to the APM BIOS, which is called |
2345 | * from inline assembly. | 2344 | * from inline assembly. |
2346 | */ | 2345 | */ |
@@ -2358,12 +2357,12 @@ static int __init apm_init(void) | |||
2358 | * code to that CPU. | 2357 | * code to that CPU. |
2359 | */ | 2358 | */ |
2360 | gdt = get_cpu_gdt_table(0); | 2359 | gdt = get_cpu_gdt_table(0); |
2361 | set_base(gdt[APM_CS >> 3], | 2360 | set_desc_base(&gdt[APM_CS >> 3], |
2362 | __va((unsigned long)apm_info.bios.cseg << 4)); | 2361 | (unsigned long)__va((unsigned long)apm_info.bios.cseg << 4)); |
2363 | set_base(gdt[APM_CS_16 >> 3], | 2362 | set_desc_base(&gdt[APM_CS_16 >> 3], |
2364 | __va((unsigned long)apm_info.bios.cseg_16 << 4)); | 2363 | (unsigned long)__va((unsigned long)apm_info.bios.cseg_16 << 4)); |
2365 | set_base(gdt[APM_DS >> 3], | 2364 | set_desc_base(&gdt[APM_DS >> 3], |
2366 | __va((unsigned long)apm_info.bios.dseg << 4)); | 2365 | (unsigned long)__va((unsigned long)apm_info.bios.dseg << 4)); |
2367 | 2366 | ||
2368 | proc_create("apm", 0, NULL, &apm_file_ops); | 2367 | proc_create("apm", 0, NULL, &apm_file_ops); |
2369 | 2368 | ||
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 898ecc47e129..4a6aeedcd965 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c | |||
@@ -3,6 +3,7 @@ | |||
3 | * This code generates raw asm output which is post-processed to extract | 3 | * This code generates raw asm output which is post-processed to extract |
4 | * and format the required data. | 4 | * and format the required data. |
5 | */ | 5 | */ |
6 | #define COMPILE_OFFSETS | ||
6 | 7 | ||
7 | #include <linux/crypto.h> | 8 | #include <linux/crypto.h> |
8 | #include <linux/sched.h> | 9 | #include <linux/sched.h> |
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile index c1f253dac155..8dd30638fe44 100644 --- a/arch/x86/kernel/cpu/Makefile +++ b/arch/x86/kernel/cpu/Makefile | |||
@@ -13,7 +13,7 @@ CFLAGS_common.o := $(nostackp) | |||
13 | 13 | ||
14 | obj-y := intel_cacheinfo.o addon_cpuid_features.o | 14 | obj-y := intel_cacheinfo.o addon_cpuid_features.o |
15 | obj-y += proc.o capflags.o powerflags.o common.o | 15 | obj-y += proc.o capflags.o powerflags.o common.o |
16 | obj-y += vmware.o hypervisor.o | 16 | obj-y += vmware.o hypervisor.o sched.o |
17 | 17 | ||
18 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o | 18 | obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o |
19 | obj-$(CONFIG_X86_64) += bugs_64.o | 19 | obj-$(CONFIG_X86_64) += bugs_64.o |
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 63fddcd082cd..f32fa71ccf97 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c | |||
@@ -2,7 +2,7 @@ | |||
2 | #include <linux/bitops.h> | 2 | #include <linux/bitops.h> |
3 | #include <linux/mm.h> | 3 | #include <linux/mm.h> |
4 | 4 | ||
5 | #include <asm/io.h> | 5 | #include <linux/io.h> |
6 | #include <asm/processor.h> | 6 | #include <asm/processor.h> |
7 | #include <asm/apic.h> | 7 | #include <asm/apic.h> |
8 | #include <asm/cpu.h> | 8 | #include <asm/cpu.h> |
@@ -45,8 +45,8 @@ static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c) | |||
45 | #define CBAR_ENB (0x80000000) | 45 | #define CBAR_ENB (0x80000000) |
46 | #define CBAR_KEY (0X000000CB) | 46 | #define CBAR_KEY (0X000000CB) |
47 | if (c->x86_model == 9 || c->x86_model == 10) { | 47 | if (c->x86_model == 9 || c->x86_model == 10) { |
48 | if (inl (CBAR) & CBAR_ENB) | 48 | if (inl(CBAR) & CBAR_ENB) |
49 | outl (0 | CBAR_KEY, CBAR); | 49 | outl(0 | CBAR_KEY, CBAR); |
50 | } | 50 | } |
51 | } | 51 | } |
52 | 52 | ||
@@ -87,9 +87,10 @@ static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c) | |||
87 | d = d2-d; | 87 | d = d2-d; |
88 | 88 | ||
89 | if (d > 20*K6_BUG_LOOP) | 89 | if (d > 20*K6_BUG_LOOP) |
90 | printk("system stability may be impaired when more than 32 MB are used.\n"); | 90 | printk(KERN_CONT |
91 | "system stability may be impaired when more than 32 MB are used.\n"); | ||
91 | else | 92 | else |
92 | printk("probably OK (after B9730xxxx).\n"); | 93 | printk(KERN_CONT "probably OK (after B9730xxxx).\n"); |
93 | printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); | 94 | printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n"); |
94 | } | 95 | } |
95 | 96 | ||
@@ -219,8 +220,9 @@ static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c) | |||
219 | if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { | 220 | if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) { |
220 | rdmsr(MSR_K7_CLK_CTL, l, h); | 221 | rdmsr(MSR_K7_CLK_CTL, l, h); |
221 | if ((l & 0xfff00000) != 0x20000000) { | 222 | if ((l & 0xfff00000) != 0x20000000) { |
222 | printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l, | 223 | printk(KERN_INFO |
223 | ((l & 0x000fffff)|0x20000000)); | 224 | "CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", |
225 | l, ((l & 0x000fffff)|0x20000000)); | ||
224 | wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); | 226 | wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h); |
225 | } | 227 | } |
226 | } | 228 | } |
@@ -251,6 +253,64 @@ static int __cpuinit nearby_node(int apicid) | |||
251 | #endif | 253 | #endif |
252 | 254 | ||
253 | /* | 255 | /* |
256 | * Fixup core topology information for AMD multi-node processors. | ||
257 | * Assumption 1: Number of cores in each internal node is the same. | ||
258 | * Assumption 2: Mixed systems with both single-node and dual-node | ||
259 | * processors are not supported. | ||
260 | */ | ||
261 | #ifdef CONFIG_X86_HT | ||
262 | static void __cpuinit amd_fixup_dcm(struct cpuinfo_x86 *c) | ||
263 | { | ||
264 | #ifdef CONFIG_PCI | ||
265 | u32 t, cpn; | ||
266 | u8 n, n_id; | ||
267 | int cpu = smp_processor_id(); | ||
268 | |||
269 | /* fixup topology information only once for a core */ | ||
270 | if (cpu_has(c, X86_FEATURE_AMD_DCM)) | ||
271 | return; | ||
272 | |||
273 | /* check for multi-node processor on boot cpu */ | ||
274 | t = read_pci_config(0, 24, 3, 0xe8); | ||
275 | if (!(t & (1 << 29))) | ||
276 | return; | ||
277 | |||
278 | set_cpu_cap(c, X86_FEATURE_AMD_DCM); | ||
279 | |||
280 | /* cores per node: each internal node has half the number of cores */ | ||
281 | cpn = c->x86_max_cores >> 1; | ||
282 | |||
283 | /* even-numbered NB_id of this dual-node processor */ | ||
284 | n = c->phys_proc_id << 1; | ||
285 | |||
286 | /* | ||
287 | * determine internal node id and assign cores fifty-fifty to | ||
288 | * each node of the dual-node processor | ||
289 | */ | ||
290 | t = read_pci_config(0, 24 + n, 3, 0xe8); | ||
291 | n = (t>>30) & 0x3; | ||
292 | if (n == 0) { | ||
293 | if (c->cpu_core_id < cpn) | ||
294 | n_id = 0; | ||
295 | else | ||
296 | n_id = 1; | ||
297 | } else { | ||
298 | if (c->cpu_core_id < cpn) | ||
299 | n_id = 1; | ||
300 | else | ||
301 | n_id = 0; | ||
302 | } | ||
303 | |||
304 | /* compute entire NodeID, use llc_shared_map to store sibling info */ | ||
305 | per_cpu(cpu_llc_id, cpu) = (c->phys_proc_id << 1) + n_id; | ||
306 | |||
307 | /* fixup core id to be in range from 0 to cpn */ | ||
308 | c->cpu_core_id = c->cpu_core_id % cpn; | ||
309 | #endif | ||
310 | } | ||
311 | #endif | ||
312 | |||
313 | /* | ||
254 | * On a AMD dual core setup the lower bits of the APIC id distingush the cores. | 314 | * On a AMD dual core setup the lower bits of the APIC id distingush the cores. |
255 | * Assumes number of cores is a power of two. | 315 | * Assumes number of cores is a power of two. |
256 | */ | 316 | */ |
@@ -267,17 +327,31 @@ static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c) | |||
267 | c->phys_proc_id = c->initial_apicid >> bits; | 327 | c->phys_proc_id = c->initial_apicid >> bits; |
268 | /* use socket ID also for last level cache */ | 328 | /* use socket ID also for last level cache */ |
269 | per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; | 329 | per_cpu(cpu_llc_id, cpu) = c->phys_proc_id; |
330 | /* fixup topology information on multi-node processors */ | ||
331 | if ((c->x86 == 0x10) && (c->x86_model == 9)) | ||
332 | amd_fixup_dcm(c); | ||
270 | #endif | 333 | #endif |
271 | } | 334 | } |
272 | 335 | ||
336 | int amd_get_nb_id(int cpu) | ||
337 | { | ||
338 | int id = 0; | ||
339 | #ifdef CONFIG_SMP | ||
340 | id = per_cpu(cpu_llc_id, cpu); | ||
341 | #endif | ||
342 | return id; | ||
343 | } | ||
344 | EXPORT_SYMBOL_GPL(amd_get_nb_id); | ||
345 | |||
273 | static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) | 346 | static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c) |
274 | { | 347 | { |
275 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) | 348 | #if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) |
276 | int cpu = smp_processor_id(); | 349 | int cpu = smp_processor_id(); |
277 | int node; | 350 | int node; |
278 | unsigned apicid = cpu_has_apic ? hard_smp_processor_id() : c->apicid; | 351 | unsigned apicid = c->apicid; |
352 | |||
353 | node = per_cpu(cpu_llc_id, cpu); | ||
279 | 354 | ||
280 | node = c->phys_proc_id; | ||
281 | if (apicid_to_node[apicid] != NUMA_NO_NODE) | 355 | if (apicid_to_node[apicid] != NUMA_NO_NODE) |
282 | node = apicid_to_node[apicid]; | 356 | node = apicid_to_node[apicid]; |
283 | if (!node_online(node)) { | 357 | if (!node_online(node)) { |
@@ -398,18 +472,30 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
398 | u32 level; | 472 | u32 level; |
399 | 473 | ||
400 | level = cpuid_eax(1); | 474 | level = cpuid_eax(1); |
401 | if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) | 475 | if ((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58) |
402 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | 476 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); |
403 | 477 | ||
404 | /* | 478 | /* |
405 | * Some BIOSes incorrectly force this feature, but only K8 | 479 | * Some BIOSes incorrectly force this feature, but only K8 |
406 | * revision D (model = 0x14) and later actually support it. | 480 | * revision D (model = 0x14) and later actually support it. |
481 | * (AMD Erratum #110, docId: 25759). | ||
407 | */ | 482 | */ |
408 | if (c->x86_model < 0x14) | 483 | if (c->x86_model < 0x14 && cpu_has(c, X86_FEATURE_LAHF_LM)) { |
484 | u64 val; | ||
485 | |||
409 | clear_cpu_cap(c, X86_FEATURE_LAHF_LM); | 486 | clear_cpu_cap(c, X86_FEATURE_LAHF_LM); |
487 | if (!rdmsrl_amd_safe(0xc001100d, &val)) { | ||
488 | val &= ~(1ULL << 32); | ||
489 | wrmsrl_amd_safe(0xc001100d, val); | ||
490 | } | ||
491 | } | ||
492 | |||
410 | } | 493 | } |
411 | if (c->x86 == 0x10 || c->x86 == 0x11) | 494 | if (c->x86 == 0x10 || c->x86 == 0x11) |
412 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); | 495 | set_cpu_cap(c, X86_FEATURE_REP_GOOD); |
496 | |||
497 | /* get apicid instead of initial apic id from cpuid */ | ||
498 | c->apicid = hard_smp_processor_id(); | ||
413 | #else | 499 | #else |
414 | 500 | ||
415 | /* | 501 | /* |
@@ -494,27 +580,30 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
494 | * benefit in doing so. | 580 | * benefit in doing so. |
495 | */ | 581 | */ |
496 | if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { | 582 | if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) { |
497 | printk(KERN_DEBUG "tseg: %010llx\n", tseg); | 583 | printk(KERN_DEBUG "tseg: %010llx\n", tseg); |
498 | if ((tseg>>PMD_SHIFT) < | 584 | if ((tseg>>PMD_SHIFT) < |
499 | (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || | 585 | (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) || |
500 | ((tseg>>PMD_SHIFT) < | 586 | ((tseg>>PMD_SHIFT) < |
501 | (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && | 587 | (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) && |
502 | (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) | 588 | (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT)))) |
503 | set_memory_4k((unsigned long)__va(tseg), 1); | 589 | set_memory_4k((unsigned long)__va(tseg), 1); |
504 | } | 590 | } |
505 | } | 591 | } |
506 | #endif | 592 | #endif |
507 | } | 593 | } |
508 | 594 | ||
509 | #ifdef CONFIG_X86_32 | 595 | #ifdef CONFIG_X86_32 |
510 | static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) | 596 | static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, |
597 | unsigned int size) | ||
511 | { | 598 | { |
512 | /* AMD errata T13 (order #21922) */ | 599 | /* AMD errata T13 (order #21922) */ |
513 | if ((c->x86 == 6)) { | 600 | if ((c->x86 == 6)) { |
514 | if (c->x86_model == 3 && c->x86_mask == 0) /* Duron Rev A0 */ | 601 | /* Duron Rev A0 */ |
602 | if (c->x86_model == 3 && c->x86_mask == 0) | ||
515 | size = 64; | 603 | size = 64; |
604 | /* Tbird rev A1/A2 */ | ||
516 | if (c->x86_model == 4 && | 605 | if (c->x86_model == 4 && |
517 | (c->x86_mask == 0 || c->x86_mask == 1)) /* Tbird rev A1/A2 */ | 606 | (c->x86_mask == 0 || c->x86_mask == 1)) |
518 | size = 256; | 607 | size = 256; |
519 | } | 608 | } |
520 | return size; | 609 | return size; |
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c index c8e315f1aa83..01a265212395 100644 --- a/arch/x86/kernel/cpu/bugs.c +++ b/arch/x86/kernel/cpu/bugs.c | |||
@@ -81,7 +81,7 @@ static void __init check_fpu(void) | |||
81 | 81 | ||
82 | boot_cpu_data.fdiv_bug = fdiv_bug; | 82 | boot_cpu_data.fdiv_bug = fdiv_bug; |
83 | if (boot_cpu_data.fdiv_bug) | 83 | if (boot_cpu_data.fdiv_bug) |
84 | printk("Hmm, FPU with FDIV bug.\n"); | 84 | printk(KERN_WARNING "Hmm, FPU with FDIV bug.\n"); |
85 | } | 85 | } |
86 | 86 | ||
87 | static void __init check_hlt(void) | 87 | static void __init check_hlt(void) |
@@ -98,7 +98,7 @@ static void __init check_hlt(void) | |||
98 | halt(); | 98 | halt(); |
99 | halt(); | 99 | halt(); |
100 | halt(); | 100 | halt(); |
101 | printk("OK.\n"); | 101 | printk(KERN_CONT "OK.\n"); |
102 | } | 102 | } |
103 | 103 | ||
104 | /* | 104 | /* |
@@ -122,9 +122,9 @@ static void __init check_popad(void) | |||
122 | * CPU hard. Too bad. | 122 | * CPU hard. Too bad. |
123 | */ | 123 | */ |
124 | if (res != 12345678) | 124 | if (res != 12345678) |
125 | printk("Buggy.\n"); | 125 | printk(KERN_CONT "Buggy.\n"); |
126 | else | 126 | else |
127 | printk("OK.\n"); | 127 | printk(KERN_CONT "OK.\n"); |
128 | #endif | 128 | #endif |
129 | } | 129 | } |
130 | 130 | ||
@@ -156,7 +156,7 @@ void __init check_bugs(void) | |||
156 | { | 156 | { |
157 | identify_boot_cpu(); | 157 | identify_boot_cpu(); |
158 | #ifndef CONFIG_SMP | 158 | #ifndef CONFIG_SMP |
159 | printk("CPU: "); | 159 | printk(KERN_INFO "CPU: "); |
160 | print_cpu_info(&boot_cpu_data); | 160 | print_cpu_info(&boot_cpu_data); |
161 | #endif | 161 | #endif |
162 | check_config(); | 162 | check_config(); |
diff --git a/arch/x86/kernel/cpu/bugs_64.c b/arch/x86/kernel/cpu/bugs_64.c index 9a3ed0649d4e..04f0fe5af83e 100644 --- a/arch/x86/kernel/cpu/bugs_64.c +++ b/arch/x86/kernel/cpu/bugs_64.c | |||
@@ -15,7 +15,7 @@ void __init check_bugs(void) | |||
15 | { | 15 | { |
16 | identify_boot_cpu(); | 16 | identify_boot_cpu(); |
17 | #if !defined(CONFIG_SMP) | 17 | #if !defined(CONFIG_SMP) |
18 | printk("CPU: "); | 18 | printk(KERN_INFO "CPU: "); |
19 | print_cpu_info(&boot_cpu_data); | 19 | print_cpu_info(&boot_cpu_data); |
20 | #endif | 20 | #endif |
21 | alternative_instructions(); | 21 | alternative_instructions(); |
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 5ce60a88027b..2055fc2b2e6b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c | |||
@@ -18,8 +18,8 @@ | |||
18 | #include <asm/hypervisor.h> | 18 | #include <asm/hypervisor.h> |
19 | #include <asm/processor.h> | 19 | #include <asm/processor.h> |
20 | #include <asm/sections.h> | 20 | #include <asm/sections.h> |
21 | #include <asm/topology.h> | 21 | #include <linux/topology.h> |
22 | #include <asm/cpumask.h> | 22 | #include <linux/cpumask.h> |
23 | #include <asm/pgtable.h> | 23 | #include <asm/pgtable.h> |
24 | #include <asm/atomic.h> | 24 | #include <asm/atomic.h> |
25 | #include <asm/proto.h> | 25 | #include <asm/proto.h> |
@@ -28,13 +28,13 @@ | |||
28 | #include <asm/desc.h> | 28 | #include <asm/desc.h> |
29 | #include <asm/i387.h> | 29 | #include <asm/i387.h> |
30 | #include <asm/mtrr.h> | 30 | #include <asm/mtrr.h> |
31 | #include <asm/numa.h> | 31 | #include <linux/numa.h> |
32 | #include <asm/asm.h> | 32 | #include <asm/asm.h> |
33 | #include <asm/cpu.h> | 33 | #include <asm/cpu.h> |
34 | #include <asm/mce.h> | 34 | #include <asm/mce.h> |
35 | #include <asm/msr.h> | 35 | #include <asm/msr.h> |
36 | #include <asm/pat.h> | 36 | #include <asm/pat.h> |
37 | #include <asm/smp.h> | 37 | #include <linux/smp.h> |
38 | 38 | ||
39 | #ifdef CONFIG_X86_LOCAL_APIC | 39 | #ifdef CONFIG_X86_LOCAL_APIC |
40 | #include <asm/uv/uv.h> | 40 | #include <asm/uv/uv.h> |
@@ -94,45 +94,45 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = { | |||
94 | * TLS descriptors are currently at a different place compared to i386. | 94 | * TLS descriptors are currently at a different place compared to i386. |
95 | * Hopefully nobody expects them at a fixed place (Wine?) | 95 | * Hopefully nobody expects them at a fixed place (Wine?) |
96 | */ | 96 | */ |
97 | [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, | 97 | [GDT_ENTRY_KERNEL32_CS] = GDT_ENTRY_INIT(0xc09b, 0, 0xfffff), |
98 | [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, | 98 | [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xa09b, 0, 0xfffff), |
99 | [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, | 99 | [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc093, 0, 0xfffff), |
100 | [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, | 100 | [GDT_ENTRY_DEFAULT_USER32_CS] = GDT_ENTRY_INIT(0xc0fb, 0, 0xfffff), |
101 | [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, | 101 | [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f3, 0, 0xfffff), |
102 | [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, | 102 | [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xa0fb, 0, 0xfffff), |
103 | #else | 103 | #else |
104 | [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, | 104 | [GDT_ENTRY_KERNEL_CS] = GDT_ENTRY_INIT(0xc09a, 0, 0xfffff), |
105 | [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, | 105 | [GDT_ENTRY_KERNEL_DS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), |
106 | [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, | 106 | [GDT_ENTRY_DEFAULT_USER_CS] = GDT_ENTRY_INIT(0xc0fa, 0, 0xfffff), |
107 | [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff200 } } }, | 107 | [GDT_ENTRY_DEFAULT_USER_DS] = GDT_ENTRY_INIT(0xc0f2, 0, 0xfffff), |
108 | /* | 108 | /* |
109 | * Segments used for calling PnP BIOS have byte granularity. | 109 | * Segments used for calling PnP BIOS have byte granularity. |
110 | * They code segments and data segments have fixed 64k limits, | 110 | * They code segments and data segments have fixed 64k limits, |
111 | * the transfer segment sizes are set at run time. | 111 | * the transfer segment sizes are set at run time. |
112 | */ | 112 | */ |
113 | /* 32-bit code */ | 113 | /* 32-bit code */ |
114 | [GDT_ENTRY_PNPBIOS_CS32] = { { { 0x0000ffff, 0x00409a00 } } }, | 114 | [GDT_ENTRY_PNPBIOS_CS32] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), |
115 | /* 16-bit code */ | 115 | /* 16-bit code */ |
116 | [GDT_ENTRY_PNPBIOS_CS16] = { { { 0x0000ffff, 0x00009a00 } } }, | 116 | [GDT_ENTRY_PNPBIOS_CS16] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), |
117 | /* 16-bit data */ | 117 | /* 16-bit data */ |
118 | [GDT_ENTRY_PNPBIOS_DS] = { { { 0x0000ffff, 0x00009200 } } }, | 118 | [GDT_ENTRY_PNPBIOS_DS] = GDT_ENTRY_INIT(0x0092, 0, 0xffff), |
119 | /* 16-bit data */ | 119 | /* 16-bit data */ |
120 | [GDT_ENTRY_PNPBIOS_TS1] = { { { 0x00000000, 0x00009200 } } }, | 120 | [GDT_ENTRY_PNPBIOS_TS1] = GDT_ENTRY_INIT(0x0092, 0, 0), |
121 | /* 16-bit data */ | 121 | /* 16-bit data */ |
122 | [GDT_ENTRY_PNPBIOS_TS2] = { { { 0x00000000, 0x00009200 } } }, | 122 | [GDT_ENTRY_PNPBIOS_TS2] = GDT_ENTRY_INIT(0x0092, 0, 0), |
123 | /* | 123 | /* |
124 | * The APM segments have byte granularity and their bases | 124 | * The APM segments have byte granularity and their bases |
125 | * are set at run time. All have 64k limits. | 125 | * are set at run time. All have 64k limits. |
126 | */ | 126 | */ |
127 | /* 32-bit code */ | 127 | /* 32-bit code */ |
128 | [GDT_ENTRY_APMBIOS_BASE] = { { { 0x0000ffff, 0x00409a00 } } }, | 128 | [GDT_ENTRY_APMBIOS_BASE] = GDT_ENTRY_INIT(0x409a, 0, 0xffff), |
129 | /* 16-bit code */ | 129 | /* 16-bit code */ |
130 | [GDT_ENTRY_APMBIOS_BASE+1] = { { { 0x0000ffff, 0x00009a00 } } }, | 130 | [GDT_ENTRY_APMBIOS_BASE+1] = GDT_ENTRY_INIT(0x009a, 0, 0xffff), |
131 | /* data */ | 131 | /* data */ |
132 | [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, | 132 | [GDT_ENTRY_APMBIOS_BASE+2] = GDT_ENTRY_INIT(0x4092, 0, 0xffff), |
133 | 133 | ||
134 | [GDT_ENTRY_ESPFIX_SS] = { { { 0x0000ffff, 0x00cf9200 } } }, | 134 | [GDT_ENTRY_ESPFIX_SS] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), |
135 | [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, | 135 | [GDT_ENTRY_PERCPU] = GDT_ENTRY_INIT(0xc092, 0, 0xfffff), |
136 | GDT_STACK_CANARY_INIT | 136 | GDT_STACK_CANARY_INIT |
137 | #endif | 137 | #endif |
138 | } }; | 138 | } }; |
@@ -982,18 +982,26 @@ static __init int setup_disablecpuid(char *arg) | |||
982 | __setup("clearcpuid=", setup_disablecpuid); | 982 | __setup("clearcpuid=", setup_disablecpuid); |
983 | 983 | ||
984 | #ifdef CONFIG_X86_64 | 984 | #ifdef CONFIG_X86_64 |
985 | struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; | 985 | struct desc_ptr idt_descr = { NR_VECTORS * 16 - 1, (unsigned long) idt_table }; |
986 | 986 | ||
987 | DEFINE_PER_CPU_FIRST(union irq_stack_union, | 987 | DEFINE_PER_CPU_FIRST(union irq_stack_union, |
988 | irq_stack_union) __aligned(PAGE_SIZE); | 988 | irq_stack_union) __aligned(PAGE_SIZE); |
989 | 989 | ||
990 | DEFINE_PER_CPU(char *, irq_stack_ptr) = | 990 | /* |
991 | init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; | 991 | * The following four percpu variables are hot. Align current_task to |
992 | * cacheline size such that all four fall in the same cacheline. | ||
993 | */ | ||
994 | DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned = | ||
995 | &init_task; | ||
996 | EXPORT_PER_CPU_SYMBOL(current_task); | ||
992 | 997 | ||
993 | DEFINE_PER_CPU(unsigned long, kernel_stack) = | 998 | DEFINE_PER_CPU(unsigned long, kernel_stack) = |
994 | (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; | 999 | (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; |
995 | EXPORT_PER_CPU_SYMBOL(kernel_stack); | 1000 | EXPORT_PER_CPU_SYMBOL(kernel_stack); |
996 | 1001 | ||
1002 | DEFINE_PER_CPU(char *, irq_stack_ptr) = | ||
1003 | init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; | ||
1004 | |||
997 | DEFINE_PER_CPU(unsigned int, irq_count) = -1; | 1005 | DEFINE_PER_CPU(unsigned int, irq_count) = -1; |
998 | 1006 | ||
999 | /* | 1007 | /* |
@@ -1008,8 +1016,7 @@ static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = { | |||
1008 | }; | 1016 | }; |
1009 | 1017 | ||
1010 | static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks | 1018 | static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks |
1011 | [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) | 1019 | [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]); |
1012 | __aligned(PAGE_SIZE); | ||
1013 | 1020 | ||
1014 | /* May not be marked __init: used by software suspend */ | 1021 | /* May not be marked __init: used by software suspend */ |
1015 | void syscall_init(void) | 1022 | void syscall_init(void) |
@@ -1042,8 +1049,11 @@ DEFINE_PER_CPU(struct orig_ist, orig_ist); | |||
1042 | 1049 | ||
1043 | #else /* CONFIG_X86_64 */ | 1050 | #else /* CONFIG_X86_64 */ |
1044 | 1051 | ||
1052 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; | ||
1053 | EXPORT_PER_CPU_SYMBOL(current_task); | ||
1054 | |||
1045 | #ifdef CONFIG_CC_STACKPROTECTOR | 1055 | #ifdef CONFIG_CC_STACKPROTECTOR |
1046 | DEFINE_PER_CPU(unsigned long, stack_canary); | 1056 | DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); |
1047 | #endif | 1057 | #endif |
1048 | 1058 | ||
1049 | /* Make sure %fs and %gs are initialized properly in idle threads */ | 1059 | /* Make sure %fs and %gs are initialized properly in idle threads */ |
diff --git a/arch/x86/kernel/cpu/cpu_debug.c b/arch/x86/kernel/cpu/cpu_debug.c index 6b2a52dd0403..dca325c03999 100644 --- a/arch/x86/kernel/cpu/cpu_debug.c +++ b/arch/x86/kernel/cpu/cpu_debug.c | |||
@@ -30,8 +30,8 @@ | |||
30 | #include <asm/apic.h> | 30 | #include <asm/apic.h> |
31 | #include <asm/desc.h> | 31 | #include <asm/desc.h> |
32 | 32 | ||
33 | static DEFINE_PER_CPU(struct cpu_cpuX_base, cpu_arr[CPU_REG_ALL_BIT]); | 33 | static DEFINE_PER_CPU(struct cpu_cpuX_base [CPU_REG_ALL_BIT], cpu_arr); |
34 | static DEFINE_PER_CPU(struct cpu_private *, priv_arr[MAX_CPU_FILES]); | 34 | static DEFINE_PER_CPU(struct cpu_private * [MAX_CPU_FILES], priv_arr); |
35 | static DEFINE_PER_CPU(int, cpu_priv_count); | 35 | static DEFINE_PER_CPU(int, cpu_priv_count); |
36 | 36 | ||
37 | static DEFINE_MUTEX(cpu_debug_lock); | 37 | static DEFINE_MUTEX(cpu_debug_lock); |
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c index ae9b503220ca..7bb676c533aa 100644 --- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c +++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c | |||
@@ -60,7 +60,6 @@ enum { | |||
60 | }; | 60 | }; |
61 | 61 | ||
62 | #define INTEL_MSR_RANGE (0xffff) | 62 | #define INTEL_MSR_RANGE (0xffff) |
63 | #define CPUID_6_ECX_APERFMPERF_CAPABILITY (0x1) | ||
64 | 63 | ||
65 | struct acpi_cpufreq_data { | 64 | struct acpi_cpufreq_data { |
66 | struct acpi_processor_performance *acpi_data; | 65 | struct acpi_processor_performance *acpi_data; |
@@ -71,11 +70,7 @@ struct acpi_cpufreq_data { | |||
71 | 70 | ||
72 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); | 71 | static DEFINE_PER_CPU(struct acpi_cpufreq_data *, drv_data); |
73 | 72 | ||
74 | struct acpi_msr_data { | 73 | static DEFINE_PER_CPU(struct aperfmperf, old_perf); |
75 | u64 saved_aperf, saved_mperf; | ||
76 | }; | ||
77 | |||
78 | static DEFINE_PER_CPU(struct acpi_msr_data, msr_data); | ||
79 | 74 | ||
80 | DEFINE_TRACE(power_mark); | 75 | DEFINE_TRACE(power_mark); |
81 | 76 | ||
@@ -244,23 +239,12 @@ static u32 get_cur_val(const struct cpumask *mask) | |||
244 | return cmd.val; | 239 | return cmd.val; |
245 | } | 240 | } |
246 | 241 | ||
247 | struct perf_pair { | ||
248 | union { | ||
249 | struct { | ||
250 | u32 lo; | ||
251 | u32 hi; | ||
252 | } split; | ||
253 | u64 whole; | ||
254 | } aperf, mperf; | ||
255 | }; | ||
256 | |||
257 | /* Called via smp_call_function_single(), on the target CPU */ | 242 | /* Called via smp_call_function_single(), on the target CPU */ |
258 | static void read_measured_perf_ctrs(void *_cur) | 243 | static void read_measured_perf_ctrs(void *_cur) |
259 | { | 244 | { |
260 | struct perf_pair *cur = _cur; | 245 | struct aperfmperf *am = _cur; |
261 | 246 | ||
262 | rdmsr(MSR_IA32_APERF, cur->aperf.split.lo, cur->aperf.split.hi); | 247 | get_aperfmperf(am); |
263 | rdmsr(MSR_IA32_MPERF, cur->mperf.split.lo, cur->mperf.split.hi); | ||
264 | } | 248 | } |
265 | 249 | ||
266 | /* | 250 | /* |
@@ -279,63 +263,17 @@ static void read_measured_perf_ctrs(void *_cur) | |||
279 | static unsigned int get_measured_perf(struct cpufreq_policy *policy, | 263 | static unsigned int get_measured_perf(struct cpufreq_policy *policy, |
280 | unsigned int cpu) | 264 | unsigned int cpu) |
281 | { | 265 | { |
282 | struct perf_pair readin, cur; | 266 | struct aperfmperf perf; |
283 | unsigned int perf_percent; | 267 | unsigned long ratio; |
284 | unsigned int retval; | 268 | unsigned int retval; |
285 | 269 | ||
286 | if (smp_call_function_single(cpu, read_measured_perf_ctrs, &readin, 1)) | 270 | if (smp_call_function_single(cpu, read_measured_perf_ctrs, &perf, 1)) |
287 | return 0; | 271 | return 0; |
288 | 272 | ||
289 | cur.aperf.whole = readin.aperf.whole - | 273 | ratio = calc_aperfmperf_ratio(&per_cpu(old_perf, cpu), &perf); |
290 | per_cpu(msr_data, cpu).saved_aperf; | 274 | per_cpu(old_perf, cpu) = perf; |
291 | cur.mperf.whole = readin.mperf.whole - | ||
292 | per_cpu(msr_data, cpu).saved_mperf; | ||
293 | per_cpu(msr_data, cpu).saved_aperf = readin.aperf.whole; | ||
294 | per_cpu(msr_data, cpu).saved_mperf = readin.mperf.whole; | ||
295 | |||
296 | #ifdef __i386__ | ||
297 | /* | ||
298 | * We dont want to do 64 bit divide with 32 bit kernel | ||
299 | * Get an approximate value. Return failure in case we cannot get | ||
300 | * an approximate value. | ||
301 | */ | ||
302 | if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) { | ||
303 | int shift_count; | ||
304 | u32 h; | ||
305 | |||
306 | h = max_t(u32, cur.aperf.split.hi, cur.mperf.split.hi); | ||
307 | shift_count = fls(h); | ||
308 | |||
309 | cur.aperf.whole >>= shift_count; | ||
310 | cur.mperf.whole >>= shift_count; | ||
311 | } | ||
312 | |||
313 | if (((unsigned long)(-1) / 100) < cur.aperf.split.lo) { | ||
314 | int shift_count = 7; | ||
315 | cur.aperf.split.lo >>= shift_count; | ||
316 | cur.mperf.split.lo >>= shift_count; | ||
317 | } | ||
318 | |||
319 | if (cur.aperf.split.lo && cur.mperf.split.lo) | ||
320 | perf_percent = (cur.aperf.split.lo * 100) / cur.mperf.split.lo; | ||
321 | else | ||
322 | perf_percent = 0; | ||
323 | 275 | ||
324 | #else | 276 | retval = (policy->cpuinfo.max_freq * ratio) >> APERFMPERF_SHIFT; |
325 | if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) { | ||
326 | int shift_count = 7; | ||
327 | cur.aperf.whole >>= shift_count; | ||
328 | cur.mperf.whole >>= shift_count; | ||
329 | } | ||
330 | |||
331 | if (cur.aperf.whole && cur.mperf.whole) | ||
332 | perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole; | ||
333 | else | ||
334 | perf_percent = 0; | ||
335 | |||
336 | #endif | ||
337 | |||
338 | retval = (policy->cpuinfo.max_freq * perf_percent) / 100; | ||
339 | 277 | ||
340 | return retval; | 278 | return retval; |
341 | } | 279 | } |
@@ -588,6 +526,21 @@ static const struct dmi_system_id sw_any_bug_dmi_table[] = { | |||
588 | }, | 526 | }, |
589 | { } | 527 | { } |
590 | }; | 528 | }; |
529 | |||
530 | static int acpi_cpufreq_blacklist(struct cpuinfo_x86 *c) | ||
531 | { | ||
532 | /* http://www.intel.com/Assets/PDF/specupdate/314554.pdf | ||
533 | * AL30: A Machine Check Exception (MCE) Occurring during an | ||
534 | * Enhanced Intel SpeedStep Technology Ratio Change May Cause | ||
535 | * Both Processor Cores to Lock Up when HT is enabled*/ | ||
536 | if (c->x86_vendor == X86_VENDOR_INTEL) { | ||
537 | if ((c->x86 == 15) && | ||
538 | (c->x86_model == 6) && | ||
539 | (c->x86_mask == 8) && smt_capable()) | ||
540 | return -ENODEV; | ||
541 | } | ||
542 | return 0; | ||
543 | } | ||
591 | #endif | 544 | #endif |
592 | 545 | ||
593 | static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) | 546 | static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) |
@@ -602,6 +555,12 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) | |||
602 | 555 | ||
603 | dprintk("acpi_cpufreq_cpu_init\n"); | 556 | dprintk("acpi_cpufreq_cpu_init\n"); |
604 | 557 | ||
558 | #ifdef CONFIG_SMP | ||
559 | result = acpi_cpufreq_blacklist(c); | ||
560 | if (result) | ||
561 | return result; | ||
562 | #endif | ||
563 | |||
605 | data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); | 564 | data = kzalloc(sizeof(struct acpi_cpufreq_data), GFP_KERNEL); |
606 | if (!data) | 565 | if (!data) |
607 | return -ENOMEM; | 566 | return -ENOMEM; |
@@ -731,12 +690,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) | |||
731 | acpi_processor_notify_smm(THIS_MODULE); | 690 | acpi_processor_notify_smm(THIS_MODULE); |
732 | 691 | ||
733 | /* Check for APERF/MPERF support in hardware */ | 692 | /* Check for APERF/MPERF support in hardware */ |
734 | if (c->x86_vendor == X86_VENDOR_INTEL && c->cpuid_level >= 6) { | 693 | if (cpu_has(c, X86_FEATURE_APERFMPERF)) |
735 | unsigned int ecx; | 694 | acpi_cpufreq_driver.getavg = get_measured_perf; |
736 | ecx = cpuid_ecx(6); | ||
737 | if (ecx & CPUID_6_ECX_APERFMPERF_CAPABILITY) | ||
738 | acpi_cpufreq_driver.getavg = get_measured_perf; | ||
739 | } | ||
740 | 695 | ||
741 | dprintk("CPU%u - ACPI performance management activated.\n", cpu); | 696 | dprintk("CPU%u - ACPI performance management activated.\n", cpu); |
742 | for (i = 0; i < perf->state_count; i++) | 697 | for (i = 0; i < perf->state_count; i++) |
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index 2a50ef891000..6394aa5c7985 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c | |||
@@ -605,9 +605,10 @@ static int check_pst_table(struct powernow_k8_data *data, struct pst_s *pst, | |||
605 | return 0; | 605 | return 0; |
606 | } | 606 | } |
607 | 607 | ||
608 | static void invalidate_entry(struct powernow_k8_data *data, unsigned int entry) | 608 | static void invalidate_entry(struct cpufreq_frequency_table *powernow_table, |
609 | unsigned int entry) | ||
609 | { | 610 | { |
610 | data->powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; | 611 | powernow_table[entry].frequency = CPUFREQ_ENTRY_INVALID; |
611 | } | 612 | } |
612 | 613 | ||
613 | static void print_basics(struct powernow_k8_data *data) | 614 | static void print_basics(struct powernow_k8_data *data) |
@@ -854,6 +855,10 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | |||
854 | goto err_out; | 855 | goto err_out; |
855 | } | 856 | } |
856 | 857 | ||
858 | /* fill in data */ | ||
859 | data->numps = data->acpi_data.state_count; | ||
860 | powernow_k8_acpi_pst_values(data, 0); | ||
861 | |||
857 | if (cpu_family == CPU_HW_PSTATE) | 862 | if (cpu_family == CPU_HW_PSTATE) |
858 | ret_val = fill_powernow_table_pstate(data, powernow_table); | 863 | ret_val = fill_powernow_table_pstate(data, powernow_table); |
859 | else | 864 | else |
@@ -866,11 +871,8 @@ static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) | |||
866 | powernow_table[data->acpi_data.state_count].index = 0; | 871 | powernow_table[data->acpi_data.state_count].index = 0; |
867 | data->powernow_table = powernow_table; | 872 | data->powernow_table = powernow_table; |
868 | 873 | ||
869 | /* fill in data */ | ||
870 | data->numps = data->acpi_data.state_count; | ||
871 | if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu) | 874 | if (cpumask_first(cpu_core_mask(data->cpu)) == data->cpu) |
872 | print_basics(data); | 875 | print_basics(data); |
873 | powernow_k8_acpi_pst_values(data, 0); | ||
874 | 876 | ||
875 | /* notify BIOS that we exist */ | 877 | /* notify BIOS that we exist */ |
876 | acpi_processor_notify_smm(THIS_MODULE); | 878 | acpi_processor_notify_smm(THIS_MODULE); |
@@ -914,13 +916,13 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, | |||
914 | "bad value %d.\n", i, index); | 916 | "bad value %d.\n", i, index); |
915 | printk(KERN_ERR PFX "Please report to BIOS " | 917 | printk(KERN_ERR PFX "Please report to BIOS " |
916 | "manufacturer\n"); | 918 | "manufacturer\n"); |
917 | invalidate_entry(data, i); | 919 | invalidate_entry(powernow_table, i); |
918 | continue; | 920 | continue; |
919 | } | 921 | } |
920 | rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi); | 922 | rdmsr(MSR_PSTATE_DEF_BASE + index, lo, hi); |
921 | if (!(hi & HW_PSTATE_VALID_MASK)) { | 923 | if (!(hi & HW_PSTATE_VALID_MASK)) { |
922 | dprintk("invalid pstate %d, ignoring\n", index); | 924 | dprintk("invalid pstate %d, ignoring\n", index); |
923 | invalidate_entry(data, i); | 925 | invalidate_entry(powernow_table, i); |
924 | continue; | 926 | continue; |
925 | } | 927 | } |
926 | 928 | ||
@@ -941,7 +943,6 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, | |||
941 | struct cpufreq_frequency_table *powernow_table) | 943 | struct cpufreq_frequency_table *powernow_table) |
942 | { | 944 | { |
943 | int i; | 945 | int i; |
944 | int cntlofreq = 0; | ||
945 | 946 | ||
946 | for (i = 0; i < data->acpi_data.state_count; i++) { | 947 | for (i = 0; i < data->acpi_data.state_count; i++) { |
947 | u32 fid; | 948 | u32 fid; |
@@ -970,7 +971,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, | |||
970 | /* verify frequency is OK */ | 971 | /* verify frequency is OK */ |
971 | if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) { | 972 | if ((freq > (MAX_FREQ * 1000)) || (freq < (MIN_FREQ * 1000))) { |
972 | dprintk("invalid freq %u kHz, ignoring\n", freq); | 973 | dprintk("invalid freq %u kHz, ignoring\n", freq); |
973 | invalidate_entry(data, i); | 974 | invalidate_entry(powernow_table, i); |
974 | continue; | 975 | continue; |
975 | } | 976 | } |
976 | 977 | ||
@@ -978,38 +979,17 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, | |||
978 | * BIOSs are using "off" to indicate invalid */ | 979 | * BIOSs are using "off" to indicate invalid */ |
979 | if (vid == VID_OFF) { | 980 | if (vid == VID_OFF) { |
980 | dprintk("invalid vid %u, ignoring\n", vid); | 981 | dprintk("invalid vid %u, ignoring\n", vid); |
981 | invalidate_entry(data, i); | 982 | invalidate_entry(powernow_table, i); |
982 | continue; | 983 | continue; |
983 | } | 984 | } |
984 | 985 | ||
985 | /* verify only 1 entry from the lo frequency table */ | ||
986 | if (fid < HI_FID_TABLE_BOTTOM) { | ||
987 | if (cntlofreq) { | ||
988 | /* if both entries are the same, | ||
989 | * ignore this one ... */ | ||
990 | if ((freq != powernow_table[cntlofreq].frequency) || | ||
991 | (index != powernow_table[cntlofreq].index)) { | ||
992 | printk(KERN_ERR PFX | ||
993 | "Too many lo freq table " | ||
994 | "entries\n"); | ||
995 | return 1; | ||
996 | } | ||
997 | |||
998 | dprintk("double low frequency table entry, " | ||
999 | "ignoring it.\n"); | ||
1000 | invalidate_entry(data, i); | ||
1001 | continue; | ||
1002 | } else | ||
1003 | cntlofreq = i; | ||
1004 | } | ||
1005 | |||
1006 | if (freq != (data->acpi_data.states[i].core_frequency * 1000)) { | 986 | if (freq != (data->acpi_data.states[i].core_frequency * 1000)) { |
1007 | printk(KERN_INFO PFX "invalid freq entries " | 987 | printk(KERN_INFO PFX "invalid freq entries " |
1008 | "%u kHz vs. %u kHz\n", freq, | 988 | "%u kHz vs. %u kHz\n", freq, |
1009 | (unsigned int) | 989 | (unsigned int) |
1010 | (data->acpi_data.states[i].core_frequency | 990 | (data->acpi_data.states[i].core_frequency |
1011 | * 1000)); | 991 | * 1000)); |
1012 | invalidate_entry(data, i); | 992 | invalidate_entry(powernow_table, i); |
1013 | continue; | 993 | continue; |
1014 | } | 994 | } |
1015 | } | 995 | } |
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 593171e967ef..19807b89f058 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c | |||
@@ -3,10 +3,10 @@ | |||
3 | #include <linux/delay.h> | 3 | #include <linux/delay.h> |
4 | #include <linux/pci.h> | 4 | #include <linux/pci.h> |
5 | #include <asm/dma.h> | 5 | #include <asm/dma.h> |
6 | #include <asm/io.h> | 6 | #include <linux/io.h> |
7 | #include <asm/processor-cyrix.h> | 7 | #include <asm/processor-cyrix.h> |
8 | #include <asm/processor-flags.h> | 8 | #include <asm/processor-flags.h> |
9 | #include <asm/timer.h> | 9 | #include <linux/timer.h> |
10 | #include <asm/pci-direct.h> | 10 | #include <asm/pci-direct.h> |
11 | #include <asm/tsc.h> | 11 | #include <asm/tsc.h> |
12 | 12 | ||
@@ -282,7 +282,8 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) | |||
282 | * The 5510/5520 companion chips have a funky PIT. | 282 | * The 5510/5520 companion chips have a funky PIT. |
283 | */ | 283 | */ |
284 | if (vendor == PCI_VENDOR_ID_CYRIX && | 284 | if (vendor == PCI_VENDOR_ID_CYRIX && |
285 | (device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520)) | 285 | (device == PCI_DEVICE_ID_CYRIX_5510 || |
286 | device == PCI_DEVICE_ID_CYRIX_5520)) | ||
286 | mark_tsc_unstable("cyrix 5510/5520 detected"); | 287 | mark_tsc_unstable("cyrix 5510/5520 detected"); |
287 | } | 288 | } |
288 | #endif | 289 | #endif |
@@ -299,7 +300,8 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) | |||
299 | * ? : 0x7x | 300 | * ? : 0x7x |
300 | * GX1 : 0x8x GX1 datasheet 56 | 301 | * GX1 : 0x8x GX1 datasheet 56 |
301 | */ | 302 | */ |
302 | if ((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <= dir1 && dir1 <= 0x8f)) | 303 | if ((0x30 <= dir1 && dir1 <= 0x6f) || |
304 | (0x80 <= dir1 && dir1 <= 0x8f)) | ||
303 | geode_configure(); | 305 | geode_configure(); |
304 | return; | 306 | return; |
305 | } else { /* MediaGX */ | 307 | } else { /* MediaGX */ |
@@ -427,9 +429,12 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c) | |||
427 | printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); | 429 | printk(KERN_INFO "Enabling CPUID on Cyrix processor.\n"); |
428 | local_irq_save(flags); | 430 | local_irq_save(flags); |
429 | ccr3 = getCx86(CX86_CCR3); | 431 | ccr3 = getCx86(CX86_CCR3); |
430 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ | 432 | /* enable MAPEN */ |
431 | setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); /* enable cpuid */ | 433 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); |
432 | setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ | 434 | /* enable cpuid */ |
435 | setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); | ||
436 | /* disable MAPEN */ | ||
437 | setCx86(CX86_CCR3, ccr3); | ||
433 | local_irq_restore(flags); | 438 | local_irq_restore(flags); |
434 | } | 439 | } |
435 | } | 440 | } |
diff --git a/arch/x86/kernel/cpu/hypervisor.c b/arch/x86/kernel/cpu/hypervisor.c index fb5b86af0b01..08be922de33a 100644 --- a/arch/x86/kernel/cpu/hypervisor.c +++ b/arch/x86/kernel/cpu/hypervisor.c | |||
@@ -28,18 +28,10 @@ | |||
28 | static inline void __cpuinit | 28 | static inline void __cpuinit |
29 | detect_hypervisor_vendor(struct cpuinfo_x86 *c) | 29 | detect_hypervisor_vendor(struct cpuinfo_x86 *c) |
30 | { | 30 | { |
31 | if (vmware_platform()) { | 31 | if (vmware_platform()) |
32 | c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE; | 32 | c->x86_hyper_vendor = X86_HYPER_VENDOR_VMWARE; |
33 | } else { | 33 | else |
34 | c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE; | 34 | c->x86_hyper_vendor = X86_HYPER_VENDOR_NONE; |
35 | } | ||
36 | } | ||
37 | |||
38 | unsigned long get_hypervisor_tsc_freq(void) | ||
39 | { | ||
40 | if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) | ||
41 | return vmware_get_tsc_khz(); | ||
42 | return 0; | ||
43 | } | 35 | } |
44 | 36 | ||
45 | static inline void __cpuinit | 37 | static inline void __cpuinit |
@@ -56,3 +48,10 @@ void __cpuinit init_hypervisor(struct cpuinfo_x86 *c) | |||
56 | detect_hypervisor_vendor(c); | 48 | detect_hypervisor_vendor(c); |
57 | hypervisor_set_feature_bits(c); | 49 | hypervisor_set_feature_bits(c); |
58 | } | 50 | } |
51 | |||
52 | void __init init_hypervisor_platform(void) | ||
53 | { | ||
54 | init_hypervisor(&boot_cpu_data); | ||
55 | if (boot_cpu_data.x86_hyper_vendor == X86_HYPER_VENDOR_VMWARE) | ||
56 | vmware_platform_setup(); | ||
57 | } | ||
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 3260ab044996..40e1835b35e8 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c | |||
@@ -7,17 +7,17 @@ | |||
7 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
8 | #include <linux/thread_info.h> | 8 | #include <linux/thread_info.h> |
9 | #include <linux/module.h> | 9 | #include <linux/module.h> |
10 | #include <linux/uaccess.h> | ||
10 | 11 | ||
11 | #include <asm/processor.h> | 12 | #include <asm/processor.h> |
12 | #include <asm/pgtable.h> | 13 | #include <asm/pgtable.h> |
13 | #include <asm/msr.h> | 14 | #include <asm/msr.h> |
14 | #include <asm/uaccess.h> | ||
15 | #include <asm/ds.h> | 15 | #include <asm/ds.h> |
16 | #include <asm/bugs.h> | 16 | #include <asm/bugs.h> |
17 | #include <asm/cpu.h> | 17 | #include <asm/cpu.h> |
18 | 18 | ||
19 | #ifdef CONFIG_X86_64 | 19 | #ifdef CONFIG_X86_64 |
20 | #include <asm/topology.h> | 20 | #include <linux/topology.h> |
21 | #include <asm/numa_64.h> | 21 | #include <asm/numa_64.h> |
22 | #endif | 22 | #endif |
23 | 23 | ||
@@ -174,7 +174,8 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) | |||
174 | #ifdef CONFIG_X86_F00F_BUG | 174 | #ifdef CONFIG_X86_F00F_BUG |
175 | /* | 175 | /* |
176 | * All current models of Pentium and Pentium with MMX technology CPUs | 176 | * All current models of Pentium and Pentium with MMX technology CPUs |
177 | * have the F0 0F bug, which lets nonprivileged users lock up the system. | 177 | * have the F0 0F bug, which lets nonprivileged users lock up the |
178 | * system. | ||
178 | * Note that the workaround only should be initialized once... | 179 | * Note that the workaround only should be initialized once... |
179 | */ | 180 | */ |
180 | c->f00f_bug = 0; | 181 | c->f00f_bug = 0; |
@@ -207,7 +208,7 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c) | |||
207 | printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); | 208 | printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); |
208 | printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); | 209 | printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); |
209 | lo |= MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE; | 210 | lo |= MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE; |
210 | wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); | 211 | wrmsr(MSR_IA32_MISC_ENABLE, lo, hi); |
211 | } | 212 | } |
212 | } | 213 | } |
213 | 214 | ||
@@ -283,7 +284,7 @@ static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c) | |||
283 | /* Intel has a non-standard dependency on %ecx for this CPUID level. */ | 284 | /* Intel has a non-standard dependency on %ecx for this CPUID level. */ |
284 | cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); | 285 | cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); |
285 | if (eax & 0x1f) | 286 | if (eax & 0x1f) |
286 | return ((eax >> 26) + 1); | 287 | return (eax >> 26) + 1; |
287 | else | 288 | else |
288 | return 1; | 289 | return 1; |
289 | } | 290 | } |
@@ -349,6 +350,12 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
349 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); | 350 | set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); |
350 | } | 351 | } |
351 | 352 | ||
353 | if (c->cpuid_level > 6) { | ||
354 | unsigned ecx = cpuid_ecx(6); | ||
355 | if (ecx & 0x01) | ||
356 | set_cpu_cap(c, X86_FEATURE_APERFMPERF); | ||
357 | } | ||
358 | |||
352 | if (cpu_has_xmm2) | 359 | if (cpu_has_xmm2) |
353 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); | 360 | set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); |
354 | if (cpu_has_ds) { | 361 | if (cpu_has_ds) { |
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 789efe217e1a..804c40e2bc3e 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c | |||
@@ -3,7 +3,7 @@ | |||
3 | * | 3 | * |
4 | * Changes: | 4 | * Changes: |
5 | * Venkatesh Pallipadi : Adding cache identification through cpuid(4) | 5 | * Venkatesh Pallipadi : Adding cache identification through cpuid(4) |
6 | * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. | 6 | * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. |
7 | * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. | 7 | * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. |
8 | */ | 8 | */ |
9 | 9 | ||
@@ -16,7 +16,7 @@ | |||
16 | #include <linux/pci.h> | 16 | #include <linux/pci.h> |
17 | 17 | ||
18 | #include <asm/processor.h> | 18 | #include <asm/processor.h> |
19 | #include <asm/smp.h> | 19 | #include <linux/smp.h> |
20 | #include <asm/k8.h> | 20 | #include <asm/k8.h> |
21 | 21 | ||
22 | #define LVL_1_INST 1 | 22 | #define LVL_1_INST 1 |
@@ -25,14 +25,15 @@ | |||
25 | #define LVL_3 4 | 25 | #define LVL_3 4 |
26 | #define LVL_TRACE 5 | 26 | #define LVL_TRACE 5 |
27 | 27 | ||
28 | struct _cache_table | 28 | struct _cache_table { |
29 | { | ||
30 | unsigned char descriptor; | 29 | unsigned char descriptor; |
31 | char cache_type; | 30 | char cache_type; |
32 | short size; | 31 | short size; |
33 | }; | 32 | }; |
34 | 33 | ||
35 | /* all the cache descriptor types we care about (no TLB or trace cache entries) */ | 34 | /* All the cache descriptor types we care about (no TLB or |
35 | trace cache entries) */ | ||
36 | |||
36 | static const struct _cache_table __cpuinitconst cache_table[] = | 37 | static const struct _cache_table __cpuinitconst cache_table[] = |
37 | { | 38 | { |
38 | { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ | 39 | { 0x06, LVL_1_INST, 8 }, /* 4-way set assoc, 32 byte line size */ |
@@ -105,8 +106,7 @@ static const struct _cache_table __cpuinitconst cache_table[] = | |||
105 | }; | 106 | }; |
106 | 107 | ||
107 | 108 | ||
108 | enum _cache_type | 109 | enum _cache_type { |
109 | { | ||
110 | CACHE_TYPE_NULL = 0, | 110 | CACHE_TYPE_NULL = 0, |
111 | CACHE_TYPE_DATA = 1, | 111 | CACHE_TYPE_DATA = 1, |
112 | CACHE_TYPE_INST = 2, | 112 | CACHE_TYPE_INST = 2, |
@@ -170,31 +170,31 @@ unsigned short num_cache_leaves; | |||
170 | Maybe later */ | 170 | Maybe later */ |
171 | union l1_cache { | 171 | union l1_cache { |
172 | struct { | 172 | struct { |
173 | unsigned line_size : 8; | 173 | unsigned line_size:8; |
174 | unsigned lines_per_tag : 8; | 174 | unsigned lines_per_tag:8; |
175 | unsigned assoc : 8; | 175 | unsigned assoc:8; |
176 | unsigned size_in_kb : 8; | 176 | unsigned size_in_kb:8; |
177 | }; | 177 | }; |
178 | unsigned val; | 178 | unsigned val; |
179 | }; | 179 | }; |
180 | 180 | ||
181 | union l2_cache { | 181 | union l2_cache { |
182 | struct { | 182 | struct { |
183 | unsigned line_size : 8; | 183 | unsigned line_size:8; |
184 | unsigned lines_per_tag : 4; | 184 | unsigned lines_per_tag:4; |
185 | unsigned assoc : 4; | 185 | unsigned assoc:4; |
186 | unsigned size_in_kb : 16; | 186 | unsigned size_in_kb:16; |
187 | }; | 187 | }; |
188 | unsigned val; | 188 | unsigned val; |
189 | }; | 189 | }; |
190 | 190 | ||
191 | union l3_cache { | 191 | union l3_cache { |
192 | struct { | 192 | struct { |
193 | unsigned line_size : 8; | 193 | unsigned line_size:8; |
194 | unsigned lines_per_tag : 4; | 194 | unsigned lines_per_tag:4; |
195 | unsigned assoc : 4; | 195 | unsigned assoc:4; |
196 | unsigned res : 2; | 196 | unsigned res:2; |
197 | unsigned size_encoded : 14; | 197 | unsigned size_encoded:14; |
198 | }; | 198 | }; |
199 | unsigned val; | 199 | unsigned val; |
200 | }; | 200 | }; |
@@ -241,7 +241,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, | |||
241 | case 0: | 241 | case 0: |
242 | if (!l1->val) | 242 | if (!l1->val) |
243 | return; | 243 | return; |
244 | assoc = l1->assoc; | 244 | assoc = assocs[l1->assoc]; |
245 | line_size = l1->line_size; | 245 | line_size = l1->line_size; |
246 | lines_per_tag = l1->lines_per_tag; | 246 | lines_per_tag = l1->lines_per_tag; |
247 | size_in_kb = l1->size_in_kb; | 247 | size_in_kb = l1->size_in_kb; |
@@ -249,7 +249,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, | |||
249 | case 2: | 249 | case 2: |
250 | if (!l2.val) | 250 | if (!l2.val) |
251 | return; | 251 | return; |
252 | assoc = l2.assoc; | 252 | assoc = assocs[l2.assoc]; |
253 | line_size = l2.line_size; | 253 | line_size = l2.line_size; |
254 | lines_per_tag = l2.lines_per_tag; | 254 | lines_per_tag = l2.lines_per_tag; |
255 | /* cpu_data has errata corrections for K7 applied */ | 255 | /* cpu_data has errata corrections for K7 applied */ |
@@ -258,10 +258,14 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, | |||
258 | case 3: | 258 | case 3: |
259 | if (!l3.val) | 259 | if (!l3.val) |
260 | return; | 260 | return; |
261 | assoc = l3.assoc; | 261 | assoc = assocs[l3.assoc]; |
262 | line_size = l3.line_size; | 262 | line_size = l3.line_size; |
263 | lines_per_tag = l3.lines_per_tag; | 263 | lines_per_tag = l3.lines_per_tag; |
264 | size_in_kb = l3.size_encoded * 512; | 264 | size_in_kb = l3.size_encoded * 512; |
265 | if (boot_cpu_has(X86_FEATURE_AMD_DCM)) { | ||
266 | size_in_kb = size_in_kb >> 1; | ||
267 | assoc = assoc >> 1; | ||
268 | } | ||
265 | break; | 269 | break; |
266 | default: | 270 | default: |
267 | return; | 271 | return; |
@@ -270,18 +274,14 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, | |||
270 | eax->split.is_self_initializing = 1; | 274 | eax->split.is_self_initializing = 1; |
271 | eax->split.type = types[leaf]; | 275 | eax->split.type = types[leaf]; |
272 | eax->split.level = levels[leaf]; | 276 | eax->split.level = levels[leaf]; |
273 | if (leaf == 3) | 277 | eax->split.num_threads_sharing = 0; |
274 | eax->split.num_threads_sharing = | ||
275 | current_cpu_data.x86_max_cores - 1; | ||
276 | else | ||
277 | eax->split.num_threads_sharing = 0; | ||
278 | eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; | 278 | eax->split.num_cores_on_die = current_cpu_data.x86_max_cores - 1; |
279 | 279 | ||
280 | 280 | ||
281 | if (assoc == 0xf) | 281 | if (assoc == 0xffff) |
282 | eax->split.is_fully_associative = 1; | 282 | eax->split.is_fully_associative = 1; |
283 | ebx->split.coherency_line_size = line_size - 1; | 283 | ebx->split.coherency_line_size = line_size - 1; |
284 | ebx->split.ways_of_associativity = assocs[assoc] - 1; | 284 | ebx->split.ways_of_associativity = assoc - 1; |
285 | ebx->split.physical_line_partition = lines_per_tag - 1; | 285 | ebx->split.physical_line_partition = lines_per_tag - 1; |
286 | ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / | 286 | ecx->split.number_of_sets = (size_in_kb * 1024) / line_size / |
287 | (ebx->split.ways_of_associativity + 1) - 1; | 287 | (ebx->split.ways_of_associativity + 1) - 1; |
@@ -350,7 +350,8 @@ static int __cpuinit find_num_cache_leaves(void) | |||
350 | 350 | ||
351 | unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | 351 | unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) |
352 | { | 352 | { |
353 | unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; /* Cache sizes */ | 353 | /* Cache sizes */ |
354 | unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0; | ||
354 | unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ | 355 | unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */ |
355 | unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ | 356 | unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */ |
356 | unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; | 357 | unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb; |
@@ -377,8 +378,8 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
377 | 378 | ||
378 | retval = cpuid4_cache_lookup_regs(i, &this_leaf); | 379 | retval = cpuid4_cache_lookup_regs(i, &this_leaf); |
379 | if (retval >= 0) { | 380 | if (retval >= 0) { |
380 | switch(this_leaf.eax.split.level) { | 381 | switch (this_leaf.eax.split.level) { |
381 | case 1: | 382 | case 1: |
382 | if (this_leaf.eax.split.type == | 383 | if (this_leaf.eax.split.type == |
383 | CACHE_TYPE_DATA) | 384 | CACHE_TYPE_DATA) |
384 | new_l1d = this_leaf.size/1024; | 385 | new_l1d = this_leaf.size/1024; |
@@ -386,19 +387,20 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
386 | CACHE_TYPE_INST) | 387 | CACHE_TYPE_INST) |
387 | new_l1i = this_leaf.size/1024; | 388 | new_l1i = this_leaf.size/1024; |
388 | break; | 389 | break; |
389 | case 2: | 390 | case 2: |
390 | new_l2 = this_leaf.size/1024; | 391 | new_l2 = this_leaf.size/1024; |
391 | num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; | 392 | num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; |
392 | index_msb = get_count_order(num_threads_sharing); | 393 | index_msb = get_count_order(num_threads_sharing); |
393 | l2_id = c->apicid >> index_msb; | 394 | l2_id = c->apicid >> index_msb; |
394 | break; | 395 | break; |
395 | case 3: | 396 | case 3: |
396 | new_l3 = this_leaf.size/1024; | 397 | new_l3 = this_leaf.size/1024; |
397 | num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; | 398 | num_threads_sharing = 1 + this_leaf.eax.split.num_threads_sharing; |
398 | index_msb = get_count_order(num_threads_sharing); | 399 | index_msb = get_count_order( |
400 | num_threads_sharing); | ||
399 | l3_id = c->apicid >> index_msb; | 401 | l3_id = c->apicid >> index_msb; |
400 | break; | 402 | break; |
401 | default: | 403 | default: |
402 | break; | 404 | break; |
403 | } | 405 | } |
404 | } | 406 | } |
@@ -421,22 +423,21 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
421 | /* Number of times to iterate */ | 423 | /* Number of times to iterate */ |
422 | n = cpuid_eax(2) & 0xFF; | 424 | n = cpuid_eax(2) & 0xFF; |
423 | 425 | ||
424 | for ( i = 0 ; i < n ; i++ ) { | 426 | for (i = 0 ; i < n ; i++) { |
425 | cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); | 427 | cpuid(2, ®s[0], ®s[1], ®s[2], ®s[3]); |
426 | 428 | ||
427 | /* If bit 31 is set, this is an unknown format */ | 429 | /* If bit 31 is set, this is an unknown format */ |
428 | for ( j = 0 ; j < 3 ; j++ ) { | 430 | for (j = 0 ; j < 3 ; j++) |
429 | if (regs[j] & (1 << 31)) regs[j] = 0; | 431 | if (regs[j] & (1 << 31)) |
430 | } | 432 | regs[j] = 0; |
431 | 433 | ||
432 | /* Byte 0 is level count, not a descriptor */ | 434 | /* Byte 0 is level count, not a descriptor */ |
433 | for ( j = 1 ; j < 16 ; j++ ) { | 435 | for (j = 1 ; j < 16 ; j++) { |
434 | unsigned char des = dp[j]; | 436 | unsigned char des = dp[j]; |
435 | unsigned char k = 0; | 437 | unsigned char k = 0; |
436 | 438 | ||
437 | /* look up this descriptor in the table */ | 439 | /* look up this descriptor in the table */ |
438 | while (cache_table[k].descriptor != 0) | 440 | while (cache_table[k].descriptor != 0) { |
439 | { | ||
440 | if (cache_table[k].descriptor == des) { | 441 | if (cache_table[k].descriptor == des) { |
441 | if (only_trace && cache_table[k].cache_type != LVL_TRACE) | 442 | if (only_trace && cache_table[k].cache_type != LVL_TRACE) |
442 | break; | 443 | break; |
@@ -488,14 +489,14 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c) | |||
488 | } | 489 | } |
489 | 490 | ||
490 | if (trace) | 491 | if (trace) |
491 | printk (KERN_INFO "CPU: Trace cache: %dK uops", trace); | 492 | printk(KERN_INFO "CPU: Trace cache: %dK uops", trace); |
492 | else if ( l1i ) | 493 | else if (l1i) |
493 | printk (KERN_INFO "CPU: L1 I cache: %dK", l1i); | 494 | printk(KERN_INFO "CPU: L1 I cache: %dK", l1i); |
494 | 495 | ||
495 | if (l1d) | 496 | if (l1d) |
496 | printk(", L1 D cache: %dK\n", l1d); | 497 | printk(KERN_CONT ", L1 D cache: %dK\n", l1d); |
497 | else | 498 | else |
498 | printk("\n"); | 499 | printk(KERN_CONT "\n"); |
499 | 500 | ||
500 | if (l2) | 501 | if (l2) |
501 | printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); | 502 | printk(KERN_INFO "CPU: L2 cache: %dK\n", l2); |
@@ -522,6 +523,18 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) | |||
522 | int index_msb, i; | 523 | int index_msb, i; |
523 | struct cpuinfo_x86 *c = &cpu_data(cpu); | 524 | struct cpuinfo_x86 *c = &cpu_data(cpu); |
524 | 525 | ||
526 | if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) { | ||
527 | struct cpuinfo_x86 *d; | ||
528 | for_each_online_cpu(i) { | ||
529 | if (!per_cpu(cpuid4_info, i)) | ||
530 | continue; | ||
531 | d = &cpu_data(i); | ||
532 | this_leaf = CPUID4_INFO_IDX(i, index); | ||
533 | cpumask_copy(to_cpumask(this_leaf->shared_cpu_map), | ||
534 | d->llc_shared_map); | ||
535 | } | ||
536 | return; | ||
537 | } | ||
525 | this_leaf = CPUID4_INFO_IDX(cpu, index); | 538 | this_leaf = CPUID4_INFO_IDX(cpu, index); |
526 | num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; | 539 | num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; |
527 | 540 | ||
@@ -558,8 +571,13 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) | |||
558 | } | 571 | } |
559 | } | 572 | } |
560 | #else | 573 | #else |
561 | static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) {} | 574 | static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) |
562 | static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) {} | 575 | { |
576 | } | ||
577 | |||
578 | static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index) | ||
579 | { | ||
580 | } | ||
563 | #endif | 581 | #endif |
564 | 582 | ||
565 | static void __cpuinit free_cache_attributes(unsigned int cpu) | 583 | static void __cpuinit free_cache_attributes(unsigned int cpu) |
@@ -645,7 +663,7 @@ static DEFINE_PER_CPU(struct _index_kobject *, index_kobject); | |||
645 | static ssize_t show_##file_name \ | 663 | static ssize_t show_##file_name \ |
646 | (struct _cpuid4_info *this_leaf, char *buf) \ | 664 | (struct _cpuid4_info *this_leaf, char *buf) \ |
647 | { \ | 665 | { \ |
648 | return sprintf (buf, "%lu\n", (unsigned long)this_leaf->object + val); \ | 666 | return sprintf(buf, "%lu\n", (unsigned long)this_leaf->object + val); \ |
649 | } | 667 | } |
650 | 668 | ||
651 | show_one_plus(level, eax.split.level, 0); | 669 | show_one_plus(level, eax.split.level, 0); |
@@ -656,7 +674,7 @@ show_one_plus(number_of_sets, ecx.split.number_of_sets, 1); | |||
656 | 674 | ||
657 | static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf) | 675 | static ssize_t show_size(struct _cpuid4_info *this_leaf, char *buf) |
658 | { | 676 | { |
659 | return sprintf (buf, "%luK\n", this_leaf->size / 1024); | 677 | return sprintf(buf, "%luK\n", this_leaf->size / 1024); |
660 | } | 678 | } |
661 | 679 | ||
662 | static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, | 680 | static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, |
@@ -669,7 +687,7 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf, | |||
669 | const struct cpumask *mask; | 687 | const struct cpumask *mask; |
670 | 688 | ||
671 | mask = to_cpumask(this_leaf->shared_cpu_map); | 689 | mask = to_cpumask(this_leaf->shared_cpu_map); |
672 | n = type? | 690 | n = type ? |
673 | cpulist_scnprintf(buf, len-2, mask) : | 691 | cpulist_scnprintf(buf, len-2, mask) : |
674 | cpumask_scnprintf(buf, len-2, mask); | 692 | cpumask_scnprintf(buf, len-2, mask); |
675 | buf[n++] = '\n'; | 693 | buf[n++] = '\n'; |
@@ -800,7 +818,7 @@ static struct _cache_attr cache_disable_0 = __ATTR(cache_disable_0, 0644, | |||
800 | static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, | 818 | static struct _cache_attr cache_disable_1 = __ATTR(cache_disable_1, 0644, |
801 | show_cache_disable_1, store_cache_disable_1); | 819 | show_cache_disable_1, store_cache_disable_1); |
802 | 820 | ||
803 | static struct attribute * default_attrs[] = { | 821 | static struct attribute *default_attrs[] = { |
804 | &type.attr, | 822 | &type.attr, |
805 | &level.attr, | 823 | &level.attr, |
806 | &coherency_line_size.attr, | 824 | &coherency_line_size.attr, |
@@ -815,7 +833,7 @@ static struct attribute * default_attrs[] = { | |||
815 | NULL | 833 | NULL |
816 | }; | 834 | }; |
817 | 835 | ||
818 | static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) | 836 | static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) |
819 | { | 837 | { |
820 | struct _cache_attr *fattr = to_attr(attr); | 838 | struct _cache_attr *fattr = to_attr(attr); |
821 | struct _index_kobject *this_leaf = to_object(kobj); | 839 | struct _index_kobject *this_leaf = to_object(kobj); |
@@ -828,8 +846,8 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) | |||
828 | return ret; | 846 | return ret; |
829 | } | 847 | } |
830 | 848 | ||
831 | static ssize_t store(struct kobject * kobj, struct attribute * attr, | 849 | static ssize_t store(struct kobject *kobj, struct attribute *attr, |
832 | const char * buf, size_t count) | 850 | const char *buf, size_t count) |
833 | { | 851 | { |
834 | struct _cache_attr *fattr = to_attr(attr); | 852 | struct _cache_attr *fattr = to_attr(attr); |
835 | struct _index_kobject *this_leaf = to_object(kobj); | 853 | struct _index_kobject *this_leaf = to_object(kobj); |
@@ -883,7 +901,7 @@ static int __cpuinit cpuid4_cache_sysfs_init(unsigned int cpu) | |||
883 | goto err_out; | 901 | goto err_out; |
884 | 902 | ||
885 | per_cpu(index_kobject, cpu) = kzalloc( | 903 | per_cpu(index_kobject, cpu) = kzalloc( |
886 | sizeof(struct _index_kobject ) * num_cache_leaves, GFP_KERNEL); | 904 | sizeof(struct _index_kobject) * num_cache_leaves, GFP_KERNEL); |
887 | if (unlikely(per_cpu(index_kobject, cpu) == NULL)) | 905 | if (unlikely(per_cpu(index_kobject, cpu) == NULL)) |
888 | goto err_out; | 906 | goto err_out; |
889 | 907 | ||
@@ -917,7 +935,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
917 | } | 935 | } |
918 | 936 | ||
919 | for (i = 0; i < num_cache_leaves; i++) { | 937 | for (i = 0; i < num_cache_leaves; i++) { |
920 | this_object = INDEX_KOBJECT_PTR(cpu,i); | 938 | this_object = INDEX_KOBJECT_PTR(cpu, i); |
921 | this_object->cpu = cpu; | 939 | this_object->cpu = cpu; |
922 | this_object->index = i; | 940 | this_object->index = i; |
923 | retval = kobject_init_and_add(&(this_object->kobj), | 941 | retval = kobject_init_and_add(&(this_object->kobj), |
@@ -925,9 +943,8 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev) | |||
925 | per_cpu(cache_kobject, cpu), | 943 | per_cpu(cache_kobject, cpu), |
926 | "index%1lu", i); | 944 | "index%1lu", i); |
927 | if (unlikely(retval)) { | 945 | if (unlikely(retval)) { |
928 | for (j = 0; j < i; j++) { | 946 | for (j = 0; j < i; j++) |
929 | kobject_put(&(INDEX_KOBJECT_PTR(cpu,j)->kobj)); | 947 | kobject_put(&(INDEX_KOBJECT_PTR(cpu, j)->kobj)); |
930 | } | ||
931 | kobject_put(per_cpu(cache_kobject, cpu)); | 948 | kobject_put(per_cpu(cache_kobject, cpu)); |
932 | cpuid4_cache_sysfs_exit(cpu); | 949 | cpuid4_cache_sysfs_exit(cpu); |
933 | return retval; | 950 | return retval; |
@@ -952,7 +969,7 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev) | |||
952 | cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map)); | 969 | cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map)); |
953 | 970 | ||
954 | for (i = 0; i < num_cache_leaves; i++) | 971 | for (i = 0; i < num_cache_leaves; i++) |
955 | kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); | 972 | kobject_put(&(INDEX_KOBJECT_PTR(cpu, i)->kobj)); |
956 | kobject_put(per_cpu(cache_kobject, cpu)); | 973 | kobject_put(per_cpu(cache_kobject, cpu)); |
957 | cpuid4_cache_sysfs_exit(cpu); | 974 | cpuid4_cache_sysfs_exit(cpu); |
958 | } | 975 | } |
@@ -977,8 +994,7 @@ static int __cpuinit cacheinfo_cpu_callback(struct notifier_block *nfb, | |||
977 | return NOTIFY_OK; | 994 | return NOTIFY_OK; |
978 | } | 995 | } |
979 | 996 | ||
980 | static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = | 997 | static struct notifier_block __cpuinitdata cacheinfo_cpu_notifier = { |
981 | { | ||
982 | .notifier_call = cacheinfo_cpu_callback, | 998 | .notifier_call = cacheinfo_cpu_callback, |
983 | }; | 999 | }; |
984 | 1000 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/Makefile b/arch/x86/kernel/cpu/mcheck/Makefile index 188a1ca5ad2b..4ac6d48fe11b 100644 --- a/arch/x86/kernel/cpu/mcheck/Makefile +++ b/arch/x86/kernel/cpu/mcheck/Makefile | |||
@@ -1,11 +1,8 @@ | |||
1 | obj-y = mce.o | 1 | obj-y = mce.o mce-severity.o |
2 | 2 | ||
3 | obj-$(CONFIG_X86_NEW_MCE) += mce-severity.o | ||
4 | obj-$(CONFIG_X86_OLD_MCE) += k7.o p4.o p6.o | ||
5 | obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o | 3 | obj-$(CONFIG_X86_ANCIENT_MCE) += winchip.o p5.o |
6 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o | 4 | obj-$(CONFIG_X86_MCE_INTEL) += mce_intel.o |
7 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o | 5 | obj-$(CONFIG_X86_MCE_AMD) += mce_amd.o |
8 | obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o | ||
9 | obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o | 6 | obj-$(CONFIG_X86_MCE_THRESHOLD) += threshold.o |
10 | obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o | 7 | obj-$(CONFIG_X86_MCE_INJECT) += mce-inject.o |
11 | 8 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/k7.c b/arch/x86/kernel/cpu/mcheck/k7.c deleted file mode 100644 index b945d5dbc609..000000000000 --- a/arch/x86/kernel/cpu/mcheck/k7.c +++ /dev/null | |||
@@ -1,116 +0,0 @@ | |||
1 | /* | ||
2 | * Athlon specific Machine Check Exception Reporting | ||
3 | * (C) Copyright 2002 Dave Jones <davej@redhat.com> | ||
4 | */ | ||
5 | #include <linux/interrupt.h> | ||
6 | #include <linux/kernel.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/init.h> | ||
9 | #include <linux/smp.h> | ||
10 | |||
11 | #include <asm/processor.h> | ||
12 | #include <asm/system.h> | ||
13 | #include <asm/mce.h> | ||
14 | #include <asm/msr.h> | ||
15 | |||
16 | /* Machine Check Handler For AMD Athlon/Duron: */ | ||
17 | static void k7_machine_check(struct pt_regs *regs, long error_code) | ||
18 | { | ||
19 | u32 alow, ahigh, high, low; | ||
20 | u32 mcgstl, mcgsth; | ||
21 | int recover = 1; | ||
22 | int i; | ||
23 | |||
24 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | ||
25 | if (mcgstl & (1<<0)) /* Recoverable ? */ | ||
26 | recover = 0; | ||
27 | |||
28 | printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", | ||
29 | smp_processor_id(), mcgsth, mcgstl); | ||
30 | |||
31 | for (i = 1; i < nr_mce_banks; i++) { | ||
32 | rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); | ||
33 | if (high & (1<<31)) { | ||
34 | char misc[20]; | ||
35 | char addr[24]; | ||
36 | |||
37 | misc[0] = '\0'; | ||
38 | addr[0] = '\0'; | ||
39 | |||
40 | if (high & (1<<29)) | ||
41 | recover |= 1; | ||
42 | if (high & (1<<25)) | ||
43 | recover |= 2; | ||
44 | high &= ~(1<<31); | ||
45 | |||
46 | if (high & (1<<27)) { | ||
47 | rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); | ||
48 | snprintf(misc, 20, "[%08x%08x]", ahigh, alow); | ||
49 | } | ||
50 | if (high & (1<<26)) { | ||
51 | rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); | ||
52 | snprintf(addr, 24, " at %08x%08x", ahigh, alow); | ||
53 | } | ||
54 | |||
55 | printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", | ||
56 | smp_processor_id(), i, high, low, misc, addr); | ||
57 | |||
58 | /* Clear it: */ | ||
59 | wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); | ||
60 | /* Serialize: */ | ||
61 | wmb(); | ||
62 | add_taint(TAINT_MACHINE_CHECK); | ||
63 | } | ||
64 | } | ||
65 | |||
66 | if (recover & 2) | ||
67 | panic("CPU context corrupt"); | ||
68 | if (recover & 1) | ||
69 | panic("Unable to continue"); | ||
70 | |||
71 | printk(KERN_EMERG "Attempting to continue.\n"); | ||
72 | |||
73 | mcgstl &= ~(1<<2); | ||
74 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | ||
75 | } | ||
76 | |||
77 | |||
78 | /* AMD K7 machine check is Intel like: */ | ||
79 | void amd_mcheck_init(struct cpuinfo_x86 *c) | ||
80 | { | ||
81 | u32 l, h; | ||
82 | int i; | ||
83 | |||
84 | if (!cpu_has(c, X86_FEATURE_MCE)) | ||
85 | return; | ||
86 | |||
87 | machine_check_vector = k7_machine_check; | ||
88 | /* Make sure the vector pointer is visible before we enable MCEs: */ | ||
89 | wmb(); | ||
90 | |||
91 | printk(KERN_INFO "Intel machine check architecture supported.\n"); | ||
92 | |||
93 | rdmsr(MSR_IA32_MCG_CAP, l, h); | ||
94 | if (l & (1<<8)) /* Control register present ? */ | ||
95 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | ||
96 | nr_mce_banks = l & 0xff; | ||
97 | |||
98 | /* | ||
99 | * Clear status for MC index 0 separately, we don't touch CTL, | ||
100 | * as some K7 Athlons cause spurious MCEs when its enabled: | ||
101 | */ | ||
102 | if (boot_cpu_data.x86 == 6) { | ||
103 | wrmsr(MSR_IA32_MC0_STATUS, 0x0, 0x0); | ||
104 | i = 1; | ||
105 | } else | ||
106 | i = 0; | ||
107 | |||
108 | for (; i < nr_mce_banks; i++) { | ||
109 | wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); | ||
110 | wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); | ||
111 | } | ||
112 | |||
113 | set_in_cr4(X86_CR4_MCE); | ||
114 | printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", | ||
115 | smp_processor_id()); | ||
116 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index a3a235a53f09..7029f0e2acad 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c | |||
@@ -18,7 +18,12 @@ | |||
18 | #include <linux/string.h> | 18 | #include <linux/string.h> |
19 | #include <linux/fs.h> | 19 | #include <linux/fs.h> |
20 | #include <linux/smp.h> | 20 | #include <linux/smp.h> |
21 | #include <linux/notifier.h> | ||
22 | #include <linux/kdebug.h> | ||
23 | #include <linux/cpu.h> | ||
24 | #include <linux/sched.h> | ||
21 | #include <asm/mce.h> | 25 | #include <asm/mce.h> |
26 | #include <asm/apic.h> | ||
22 | 27 | ||
23 | /* Update fake mce registers on current CPU. */ | 28 | /* Update fake mce registers on current CPU. */ |
24 | static void inject_mce(struct mce *m) | 29 | static void inject_mce(struct mce *m) |
@@ -39,44 +44,141 @@ static void inject_mce(struct mce *m) | |||
39 | i->finished = 1; | 44 | i->finished = 1; |
40 | } | 45 | } |
41 | 46 | ||
42 | struct delayed_mce { | 47 | static void raise_poll(struct mce *m) |
43 | struct timer_list timer; | 48 | { |
44 | struct mce m; | 49 | unsigned long flags; |
45 | }; | 50 | mce_banks_t b; |
46 | 51 | ||
47 | /* Inject mce on current CPU */ | 52 | memset(&b, 0xff, sizeof(mce_banks_t)); |
48 | static void raise_mce(unsigned long data) | 53 | local_irq_save(flags); |
54 | machine_check_poll(0, &b); | ||
55 | local_irq_restore(flags); | ||
56 | m->finished = 0; | ||
57 | } | ||
58 | |||
59 | static void raise_exception(struct mce *m, struct pt_regs *pregs) | ||
49 | { | 60 | { |
50 | struct delayed_mce *dm = (struct delayed_mce *)data; | 61 | struct pt_regs regs; |
51 | struct mce *m = &dm->m; | 62 | unsigned long flags; |
52 | int cpu = m->extcpu; | ||
53 | 63 | ||
54 | inject_mce(m); | 64 | if (!pregs) { |
55 | if (m->status & MCI_STATUS_UC) { | ||
56 | struct pt_regs regs; | ||
57 | memset(®s, 0, sizeof(struct pt_regs)); | 65 | memset(®s, 0, sizeof(struct pt_regs)); |
58 | regs.ip = m->ip; | 66 | regs.ip = m->ip; |
59 | regs.cs = m->cs; | 67 | regs.cs = m->cs; |
68 | pregs = ®s; | ||
69 | } | ||
70 | /* in mcheck exeception handler, irq will be disabled */ | ||
71 | local_irq_save(flags); | ||
72 | do_machine_check(pregs, 0); | ||
73 | local_irq_restore(flags); | ||
74 | m->finished = 0; | ||
75 | } | ||
76 | |||
77 | static cpumask_t mce_inject_cpumask; | ||
78 | |||
79 | static int mce_raise_notify(struct notifier_block *self, | ||
80 | unsigned long val, void *data) | ||
81 | { | ||
82 | struct die_args *args = (struct die_args *)data; | ||
83 | int cpu = smp_processor_id(); | ||
84 | struct mce *m = &__get_cpu_var(injectm); | ||
85 | if (val != DIE_NMI_IPI || !cpu_isset(cpu, mce_inject_cpumask)) | ||
86 | return NOTIFY_DONE; | ||
87 | cpu_clear(cpu, mce_inject_cpumask); | ||
88 | if (m->inject_flags & MCJ_EXCEPTION) | ||
89 | raise_exception(m, args->regs); | ||
90 | else if (m->status) | ||
91 | raise_poll(m); | ||
92 | return NOTIFY_STOP; | ||
93 | } | ||
94 | |||
95 | static struct notifier_block mce_raise_nb = { | ||
96 | .notifier_call = mce_raise_notify, | ||
97 | .priority = 1000, | ||
98 | }; | ||
99 | |||
100 | /* Inject mce on current CPU */ | ||
101 | static int raise_local(struct mce *m) | ||
102 | { | ||
103 | int context = MCJ_CTX(m->inject_flags); | ||
104 | int ret = 0; | ||
105 | int cpu = m->extcpu; | ||
106 | |||
107 | if (m->inject_flags & MCJ_EXCEPTION) { | ||
60 | printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu); | 108 | printk(KERN_INFO "Triggering MCE exception on CPU %d\n", cpu); |
61 | do_machine_check(®s, 0); | 109 | switch (context) { |
110 | case MCJ_CTX_IRQ: | ||
111 | /* | ||
112 | * Could do more to fake interrupts like | ||
113 | * calling irq_enter, but the necessary | ||
114 | * machinery isn't exported currently. | ||
115 | */ | ||
116 | /*FALL THROUGH*/ | ||
117 | case MCJ_CTX_PROCESS: | ||
118 | raise_exception(m, NULL); | ||
119 | break; | ||
120 | default: | ||
121 | printk(KERN_INFO "Invalid MCE context\n"); | ||
122 | ret = -EINVAL; | ||
123 | } | ||
62 | printk(KERN_INFO "MCE exception done on CPU %d\n", cpu); | 124 | printk(KERN_INFO "MCE exception done on CPU %d\n", cpu); |
63 | } else { | 125 | } else if (m->status) { |
64 | mce_banks_t b; | ||
65 | memset(&b, 0xff, sizeof(mce_banks_t)); | ||
66 | printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu); | 126 | printk(KERN_INFO "Starting machine check poll CPU %d\n", cpu); |
67 | machine_check_poll(0, &b); | 127 | raise_poll(m); |
68 | mce_notify_irq(); | 128 | mce_notify_irq(); |
69 | printk(KERN_INFO "Finished machine check poll on CPU %d\n", | 129 | printk(KERN_INFO "Machine check poll done on CPU %d\n", cpu); |
70 | cpu); | 130 | } else |
71 | } | 131 | m->finished = 0; |
72 | kfree(dm); | 132 | |
133 | return ret; | ||
134 | } | ||
135 | |||
136 | static void raise_mce(struct mce *m) | ||
137 | { | ||
138 | int context = MCJ_CTX(m->inject_flags); | ||
139 | |||
140 | inject_mce(m); | ||
141 | |||
142 | if (context == MCJ_CTX_RANDOM) | ||
143 | return; | ||
144 | |||
145 | #ifdef CONFIG_X86_LOCAL_APIC | ||
146 | if (m->inject_flags & MCJ_NMI_BROADCAST) { | ||
147 | unsigned long start; | ||
148 | int cpu; | ||
149 | get_online_cpus(); | ||
150 | mce_inject_cpumask = cpu_online_map; | ||
151 | cpu_clear(get_cpu(), mce_inject_cpumask); | ||
152 | for_each_online_cpu(cpu) { | ||
153 | struct mce *mcpu = &per_cpu(injectm, cpu); | ||
154 | if (!mcpu->finished || | ||
155 | MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM) | ||
156 | cpu_clear(cpu, mce_inject_cpumask); | ||
157 | } | ||
158 | if (!cpus_empty(mce_inject_cpumask)) | ||
159 | apic->send_IPI_mask(&mce_inject_cpumask, NMI_VECTOR); | ||
160 | start = jiffies; | ||
161 | while (!cpus_empty(mce_inject_cpumask)) { | ||
162 | if (!time_before(jiffies, start + 2*HZ)) { | ||
163 | printk(KERN_ERR | ||
164 | "Timeout waiting for mce inject NMI %lx\n", | ||
165 | *cpus_addr(mce_inject_cpumask)); | ||
166 | break; | ||
167 | } | ||
168 | cpu_relax(); | ||
169 | } | ||
170 | raise_local(m); | ||
171 | put_cpu(); | ||
172 | put_online_cpus(); | ||
173 | } else | ||
174 | #endif | ||
175 | raise_local(m); | ||
73 | } | 176 | } |
74 | 177 | ||
75 | /* Error injection interface */ | 178 | /* Error injection interface */ |
76 | static ssize_t mce_write(struct file *filp, const char __user *ubuf, | 179 | static ssize_t mce_write(struct file *filp, const char __user *ubuf, |
77 | size_t usize, loff_t *off) | 180 | size_t usize, loff_t *off) |
78 | { | 181 | { |
79 | struct delayed_mce *dm; | ||
80 | struct mce m; | 182 | struct mce m; |
81 | 183 | ||
82 | if (!capable(CAP_SYS_ADMIN)) | 184 | if (!capable(CAP_SYS_ADMIN)) |
@@ -96,19 +198,12 @@ static ssize_t mce_write(struct file *filp, const char __user *ubuf, | |||
96 | if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu)) | 198 | if (m.extcpu >= num_possible_cpus() || !cpu_online(m.extcpu)) |
97 | return -EINVAL; | 199 | return -EINVAL; |
98 | 200 | ||
99 | dm = kmalloc(sizeof(struct delayed_mce), GFP_KERNEL); | ||
100 | if (!dm) | ||
101 | return -ENOMEM; | ||
102 | |||
103 | /* | 201 | /* |
104 | * Need to give user space some time to set everything up, | 202 | * Need to give user space some time to set everything up, |
105 | * so do it a jiffie or two later everywhere. | 203 | * so do it a jiffie or two later everywhere. |
106 | * Should we use a hrtimer here for better synchronization? | ||
107 | */ | 204 | */ |
108 | memcpy(&dm->m, &m, sizeof(struct mce)); | 205 | schedule_timeout(2); |
109 | setup_timer(&dm->timer, raise_mce, (unsigned long)dm); | 206 | raise_mce(&m); |
110 | dm->timer.expires = jiffies + 2; | ||
111 | add_timer_on(&dm->timer, m.extcpu); | ||
112 | return usize; | 207 | return usize; |
113 | } | 208 | } |
114 | 209 | ||
@@ -116,6 +211,7 @@ static int inject_init(void) | |||
116 | { | 211 | { |
117 | printk(KERN_INFO "Machine check injector initialized\n"); | 212 | printk(KERN_INFO "Machine check injector initialized\n"); |
118 | mce_chrdev_ops.write = mce_write; | 213 | mce_chrdev_ops.write = mce_write; |
214 | register_die_notifier(&mce_raise_nb); | ||
119 | return 0; | 215 | return 0; |
120 | } | 216 | } |
121 | 217 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index 54dcb8ff12e5..32996f9fab67 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h | |||
@@ -1,3 +1,4 @@ | |||
1 | #include <linux/sysdev.h> | ||
1 | #include <asm/mce.h> | 2 | #include <asm/mce.h> |
2 | 3 | ||
3 | enum severity_level { | 4 | enum severity_level { |
@@ -10,6 +11,20 @@ enum severity_level { | |||
10 | MCE_PANIC_SEVERITY, | 11 | MCE_PANIC_SEVERITY, |
11 | }; | 12 | }; |
12 | 13 | ||
14 | #define ATTR_LEN 16 | ||
15 | |||
16 | /* One object for each MCE bank, shared by all CPUs */ | ||
17 | struct mce_bank { | ||
18 | u64 ctl; /* subevents to enable */ | ||
19 | unsigned char init; /* initialise bank? */ | ||
20 | struct sysdev_attribute attr; /* sysdev attribute */ | ||
21 | char attrname[ATTR_LEN]; /* attribute name */ | ||
22 | }; | ||
23 | |||
13 | int mce_severity(struct mce *a, int tolerant, char **msg); | 24 | int mce_severity(struct mce *a, int tolerant, char **msg); |
25 | struct dentry *mce_get_debugfs_dir(void); | ||
14 | 26 | ||
15 | extern int mce_ser; | 27 | extern int mce_ser; |
28 | |||
29 | extern struct mce_bank *mce_banks; | ||
30 | |||
diff --git a/arch/x86/kernel/cpu/mcheck/mce-severity.c b/arch/x86/kernel/cpu/mcheck/mce-severity.c index ff0807f97056..8a85dd1b1aa1 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-severity.c +++ b/arch/x86/kernel/cpu/mcheck/mce-severity.c | |||
@@ -139,6 +139,7 @@ int mce_severity(struct mce *a, int tolerant, char **msg) | |||
139 | } | 139 | } |
140 | } | 140 | } |
141 | 141 | ||
142 | #ifdef CONFIG_DEBUG_FS | ||
142 | static void *s_start(struct seq_file *f, loff_t *pos) | 143 | static void *s_start(struct seq_file *f, loff_t *pos) |
143 | { | 144 | { |
144 | if (*pos >= ARRAY_SIZE(severities)) | 145 | if (*pos >= ARRAY_SIZE(severities)) |
@@ -197,7 +198,7 @@ static int __init severities_debugfs_init(void) | |||
197 | { | 198 | { |
198 | struct dentry *dmce = NULL, *fseverities_coverage = NULL; | 199 | struct dentry *dmce = NULL, *fseverities_coverage = NULL; |
199 | 200 | ||
200 | dmce = debugfs_create_dir("mce", NULL); | 201 | dmce = mce_get_debugfs_dir(); |
201 | if (dmce == NULL) | 202 | if (dmce == NULL) |
202 | goto err_out; | 203 | goto err_out; |
203 | fseverities_coverage = debugfs_create_file("severities-coverage", | 204 | fseverities_coverage = debugfs_create_file("severities-coverage", |
@@ -209,10 +210,7 @@ static int __init severities_debugfs_init(void) | |||
209 | return 0; | 210 | return 0; |
210 | 211 | ||
211 | err_out: | 212 | err_out: |
212 | if (fseverities_coverage) | ||
213 | debugfs_remove(fseverities_coverage); | ||
214 | if (dmce) | ||
215 | debugfs_remove(dmce); | ||
216 | return -ENOMEM; | 213 | return -ENOMEM; |
217 | } | 214 | } |
218 | late_initcall(severities_debugfs_init); | 215 | late_initcall(severities_debugfs_init); |
216 | #endif | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 01213048f62f..2f5aab26320e 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -34,6 +34,7 @@ | |||
34 | #include <linux/smp.h> | 34 | #include <linux/smp.h> |
35 | #include <linux/fs.h> | 35 | #include <linux/fs.h> |
36 | #include <linux/mm.h> | 36 | #include <linux/mm.h> |
37 | #include <linux/debugfs.h> | ||
37 | 38 | ||
38 | #include <asm/processor.h> | 39 | #include <asm/processor.h> |
39 | #include <asm/hw_irq.h> | 40 | #include <asm/hw_irq.h> |
@@ -45,21 +46,8 @@ | |||
45 | 46 | ||
46 | #include "mce-internal.h" | 47 | #include "mce-internal.h" |
47 | 48 | ||
48 | /* Handle unconfigured int18 (should never happen) */ | ||
49 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) | ||
50 | { | ||
51 | printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", | ||
52 | smp_processor_id()); | ||
53 | } | ||
54 | |||
55 | /* Call the installed machine check handler for this CPU setup. */ | ||
56 | void (*machine_check_vector)(struct pt_regs *, long error_code) = | ||
57 | unexpected_machine_check; | ||
58 | |||
59 | int mce_disabled __read_mostly; | 49 | int mce_disabled __read_mostly; |
60 | 50 | ||
61 | #ifdef CONFIG_X86_NEW_MCE | ||
62 | |||
63 | #define MISC_MCELOG_MINOR 227 | 51 | #define MISC_MCELOG_MINOR 227 |
64 | 52 | ||
65 | #define SPINUNIT 100 /* 100ns */ | 53 | #define SPINUNIT 100 /* 100ns */ |
@@ -77,7 +65,6 @@ DEFINE_PER_CPU(unsigned, mce_exception_count); | |||
77 | */ | 65 | */ |
78 | static int tolerant __read_mostly = 1; | 66 | static int tolerant __read_mostly = 1; |
79 | static int banks __read_mostly; | 67 | static int banks __read_mostly; |
80 | static u64 *bank __read_mostly; | ||
81 | static int rip_msr __read_mostly; | 68 | static int rip_msr __read_mostly; |
82 | static int mce_bootlog __read_mostly = -1; | 69 | static int mce_bootlog __read_mostly = -1; |
83 | static int monarch_timeout __read_mostly = -1; | 70 | static int monarch_timeout __read_mostly = -1; |
@@ -87,13 +74,13 @@ int mce_cmci_disabled __read_mostly; | |||
87 | int mce_ignore_ce __read_mostly; | 74 | int mce_ignore_ce __read_mostly; |
88 | int mce_ser __read_mostly; | 75 | int mce_ser __read_mostly; |
89 | 76 | ||
77 | struct mce_bank *mce_banks __read_mostly; | ||
78 | |||
90 | /* User mode helper program triggered by machine check event */ | 79 | /* User mode helper program triggered by machine check event */ |
91 | static unsigned long mce_need_notify; | 80 | static unsigned long mce_need_notify; |
92 | static char mce_helper[128]; | 81 | static char mce_helper[128]; |
93 | static char *mce_helper_argv[2] = { mce_helper, NULL }; | 82 | static char *mce_helper_argv[2] = { mce_helper, NULL }; |
94 | 83 | ||
95 | static unsigned long dont_init_banks; | ||
96 | |||
97 | static DECLARE_WAIT_QUEUE_HEAD(mce_wait); | 84 | static DECLARE_WAIT_QUEUE_HEAD(mce_wait); |
98 | static DEFINE_PER_CPU(struct mce, mces_seen); | 85 | static DEFINE_PER_CPU(struct mce, mces_seen); |
99 | static int cpu_missing; | 86 | static int cpu_missing; |
@@ -104,11 +91,6 @@ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { | |||
104 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL | 91 | [0 ... BITS_TO_LONGS(MAX_NR_BANKS)-1] = ~0UL |
105 | }; | 92 | }; |
106 | 93 | ||
107 | static inline int skip_bank_init(int i) | ||
108 | { | ||
109 | return i < BITS_PER_LONG && test_bit(i, &dont_init_banks); | ||
110 | } | ||
111 | |||
112 | static DEFINE_PER_CPU(struct work_struct, mce_work); | 94 | static DEFINE_PER_CPU(struct work_struct, mce_work); |
113 | 95 | ||
114 | /* Do initial initialization of a struct mce */ | 96 | /* Do initial initialization of a struct mce */ |
@@ -183,6 +165,11 @@ void mce_log(struct mce *mce) | |||
183 | set_bit(0, &mce_need_notify); | 165 | set_bit(0, &mce_need_notify); |
184 | } | 166 | } |
185 | 167 | ||
168 | void __weak decode_mce(struct mce *m) | ||
169 | { | ||
170 | return; | ||
171 | } | ||
172 | |||
186 | static void print_mce(struct mce *m) | 173 | static void print_mce(struct mce *m) |
187 | { | 174 | { |
188 | printk(KERN_EMERG | 175 | printk(KERN_EMERG |
@@ -205,6 +192,8 @@ static void print_mce(struct mce *m) | |||
205 | printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", | 192 | printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", |
206 | m->cpuvendor, m->cpuid, m->time, m->socketid, | 193 | m->cpuvendor, m->cpuid, m->time, m->socketid, |
207 | m->apicid); | 194 | m->apicid); |
195 | |||
196 | decode_mce(m); | ||
208 | } | 197 | } |
209 | 198 | ||
210 | static void print_mce_head(void) | 199 | static void print_mce_head(void) |
@@ -215,13 +204,19 @@ static void print_mce_head(void) | |||
215 | static void print_mce_tail(void) | 204 | static void print_mce_tail(void) |
216 | { | 205 | { |
217 | printk(KERN_EMERG "This is not a software problem!\n" | 206 | printk(KERN_EMERG "This is not a software problem!\n" |
218 | "Run through mcelog --ascii to decode and contact your hardware vendor\n"); | 207 | #if (!defined(CONFIG_EDAC) || !defined(CONFIG_CPU_SUP_AMD)) |
208 | "Run through mcelog --ascii to decode and contact your hardware vendor\n" | ||
209 | #endif | ||
210 | ); | ||
219 | } | 211 | } |
220 | 212 | ||
221 | #define PANIC_TIMEOUT 5 /* 5 seconds */ | 213 | #define PANIC_TIMEOUT 5 /* 5 seconds */ |
222 | 214 | ||
223 | static atomic_t mce_paniced; | 215 | static atomic_t mce_paniced; |
224 | 216 | ||
217 | static int fake_panic; | ||
218 | static atomic_t mce_fake_paniced; | ||
219 | |||
225 | /* Panic in progress. Enable interrupts and wait for final IPI */ | 220 | /* Panic in progress. Enable interrupts and wait for final IPI */ |
226 | static void wait_for_panic(void) | 221 | static void wait_for_panic(void) |
227 | { | 222 | { |
@@ -239,15 +234,21 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
239 | { | 234 | { |
240 | int i; | 235 | int i; |
241 | 236 | ||
242 | /* | 237 | if (!fake_panic) { |
243 | * Make sure only one CPU runs in machine check panic | 238 | /* |
244 | */ | 239 | * Make sure only one CPU runs in machine check panic |
245 | if (atomic_add_return(1, &mce_paniced) > 1) | 240 | */ |
246 | wait_for_panic(); | 241 | if (atomic_inc_return(&mce_paniced) > 1) |
247 | barrier(); | 242 | wait_for_panic(); |
243 | barrier(); | ||
248 | 244 | ||
249 | bust_spinlocks(1); | 245 | bust_spinlocks(1); |
250 | console_verbose(); | 246 | console_verbose(); |
247 | } else { | ||
248 | /* Don't log too much for fake panic */ | ||
249 | if (atomic_inc_return(&mce_fake_paniced) > 1) | ||
250 | return; | ||
251 | } | ||
251 | print_mce_head(); | 252 | print_mce_head(); |
252 | /* First print corrected ones that are still unlogged */ | 253 | /* First print corrected ones that are still unlogged */ |
253 | for (i = 0; i < MCE_LOG_LEN; i++) { | 254 | for (i = 0; i < MCE_LOG_LEN; i++) { |
@@ -274,9 +275,12 @@ static void mce_panic(char *msg, struct mce *final, char *exp) | |||
274 | print_mce_tail(); | 275 | print_mce_tail(); |
275 | if (exp) | 276 | if (exp) |
276 | printk(KERN_EMERG "Machine check: %s\n", exp); | 277 | printk(KERN_EMERG "Machine check: %s\n", exp); |
277 | if (panic_timeout == 0) | 278 | if (!fake_panic) { |
278 | panic_timeout = mce_panic_timeout; | 279 | if (panic_timeout == 0) |
279 | panic(msg); | 280 | panic_timeout = mce_panic_timeout; |
281 | panic(msg); | ||
282 | } else | ||
283 | printk(KERN_EMERG "Fake kernel panic: %s\n", msg); | ||
280 | } | 284 | } |
281 | 285 | ||
282 | /* Support code for software error injection */ | 286 | /* Support code for software error injection */ |
@@ -286,11 +290,11 @@ static int msr_to_offset(u32 msr) | |||
286 | unsigned bank = __get_cpu_var(injectm.bank); | 290 | unsigned bank = __get_cpu_var(injectm.bank); |
287 | if (msr == rip_msr) | 291 | if (msr == rip_msr) |
288 | return offsetof(struct mce, ip); | 292 | return offsetof(struct mce, ip); |
289 | if (msr == MSR_IA32_MC0_STATUS + bank*4) | 293 | if (msr == MSR_IA32_MCx_STATUS(bank)) |
290 | return offsetof(struct mce, status); | 294 | return offsetof(struct mce, status); |
291 | if (msr == MSR_IA32_MC0_ADDR + bank*4) | 295 | if (msr == MSR_IA32_MCx_ADDR(bank)) |
292 | return offsetof(struct mce, addr); | 296 | return offsetof(struct mce, addr); |
293 | if (msr == MSR_IA32_MC0_MISC + bank*4) | 297 | if (msr == MSR_IA32_MCx_MISC(bank)) |
294 | return offsetof(struct mce, misc); | 298 | return offsetof(struct mce, misc); |
295 | if (msr == MSR_IA32_MCG_STATUS) | 299 | if (msr == MSR_IA32_MCG_STATUS) |
296 | return offsetof(struct mce, mcgstatus); | 300 | return offsetof(struct mce, mcgstatus); |
@@ -495,7 +499,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
495 | 499 | ||
496 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); | 500 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); |
497 | for (i = 0; i < banks; i++) { | 501 | for (i = 0; i < banks; i++) { |
498 | if (!bank[i] || !test_bit(i, *b)) | 502 | if (!mce_banks[i].ctl || !test_bit(i, *b)) |
499 | continue; | 503 | continue; |
500 | 504 | ||
501 | m.misc = 0; | 505 | m.misc = 0; |
@@ -504,7 +508,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
504 | m.tsc = 0; | 508 | m.tsc = 0; |
505 | 509 | ||
506 | barrier(); | 510 | barrier(); |
507 | m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); | 511 | m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); |
508 | if (!(m.status & MCI_STATUS_VAL)) | 512 | if (!(m.status & MCI_STATUS_VAL)) |
509 | continue; | 513 | continue; |
510 | 514 | ||
@@ -519,9 +523,9 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
519 | continue; | 523 | continue; |
520 | 524 | ||
521 | if (m.status & MCI_STATUS_MISCV) | 525 | if (m.status & MCI_STATUS_MISCV) |
522 | m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); | 526 | m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i)); |
523 | if (m.status & MCI_STATUS_ADDRV) | 527 | if (m.status & MCI_STATUS_ADDRV) |
524 | m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); | 528 | m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i)); |
525 | 529 | ||
526 | if (!(flags & MCP_TIMESTAMP)) | 530 | if (!(flags & MCP_TIMESTAMP)) |
527 | m.tsc = 0; | 531 | m.tsc = 0; |
@@ -537,7 +541,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
537 | /* | 541 | /* |
538 | * Clear state for this bank. | 542 | * Clear state for this bank. |
539 | */ | 543 | */ |
540 | mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | 544 | mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); |
541 | } | 545 | } |
542 | 546 | ||
543 | /* | 547 | /* |
@@ -558,7 +562,7 @@ static int mce_no_way_out(struct mce *m, char **msg) | |||
558 | int i; | 562 | int i; |
559 | 563 | ||
560 | for (i = 0; i < banks; i++) { | 564 | for (i = 0; i < banks; i++) { |
561 | m->status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); | 565 | m->status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); |
562 | if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) | 566 | if (mce_severity(m, tolerant, msg) >= MCE_PANIC_SEVERITY) |
563 | return 1; | 567 | return 1; |
564 | } | 568 | } |
@@ -618,7 +622,7 @@ out: | |||
618 | * This way we prevent any potential data corruption in a unrecoverable case | 622 | * This way we prevent any potential data corruption in a unrecoverable case |
619 | * and also makes sure always all CPU's errors are examined. | 623 | * and also makes sure always all CPU's errors are examined. |
620 | * | 624 | * |
621 | * Also this detects the case of an machine check event coming from outer | 625 | * Also this detects the case of a machine check event coming from outer |
622 | * space (not detected by any CPUs) In this case some external agent wants | 626 | * space (not detected by any CPUs) In this case some external agent wants |
623 | * us to shut down, so panic too. | 627 | * us to shut down, so panic too. |
624 | * | 628 | * |
@@ -671,7 +675,7 @@ static void mce_reign(void) | |||
671 | * No machine check event found. Must be some external | 675 | * No machine check event found. Must be some external |
672 | * source or one CPU is hung. Panic. | 676 | * source or one CPU is hung. Panic. |
673 | */ | 677 | */ |
674 | if (!m && tolerant < 3) | 678 | if (global_worst <= MCE_KEEP_SEVERITY && tolerant < 3) |
675 | mce_panic("Machine check from unknown source", NULL, NULL); | 679 | mce_panic("Machine check from unknown source", NULL, NULL); |
676 | 680 | ||
677 | /* | 681 | /* |
@@ -705,7 +709,7 @@ static int mce_start(int *no_way_out) | |||
705 | * global_nwo should be updated before mce_callin | 709 | * global_nwo should be updated before mce_callin |
706 | */ | 710 | */ |
707 | smp_wmb(); | 711 | smp_wmb(); |
708 | order = atomic_add_return(1, &mce_callin); | 712 | order = atomic_inc_return(&mce_callin); |
709 | 713 | ||
710 | /* | 714 | /* |
711 | * Wait for everyone. | 715 | * Wait for everyone. |
@@ -842,7 +846,7 @@ static void mce_clear_state(unsigned long *toclear) | |||
842 | 846 | ||
843 | for (i = 0; i < banks; i++) { | 847 | for (i = 0; i < banks; i++) { |
844 | if (test_bit(i, toclear)) | 848 | if (test_bit(i, toclear)) |
845 | mce_wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | 849 | mce_wrmsrl(MSR_IA32_MCx_STATUS(i), 0); |
846 | } | 850 | } |
847 | } | 851 | } |
848 | 852 | ||
@@ -895,11 +899,11 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
895 | mce_setup(&m); | 899 | mce_setup(&m); |
896 | 900 | ||
897 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); | 901 | m.mcgstatus = mce_rdmsrl(MSR_IA32_MCG_STATUS); |
898 | no_way_out = mce_no_way_out(&m, &msg); | ||
899 | |||
900 | final = &__get_cpu_var(mces_seen); | 902 | final = &__get_cpu_var(mces_seen); |
901 | *final = m; | 903 | *final = m; |
902 | 904 | ||
905 | no_way_out = mce_no_way_out(&m, &msg); | ||
906 | |||
903 | barrier(); | 907 | barrier(); |
904 | 908 | ||
905 | /* | 909 | /* |
@@ -916,14 +920,14 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
916 | order = mce_start(&no_way_out); | 920 | order = mce_start(&no_way_out); |
917 | for (i = 0; i < banks; i++) { | 921 | for (i = 0; i < banks; i++) { |
918 | __clear_bit(i, toclear); | 922 | __clear_bit(i, toclear); |
919 | if (!bank[i]) | 923 | if (!mce_banks[i].ctl) |
920 | continue; | 924 | continue; |
921 | 925 | ||
922 | m.misc = 0; | 926 | m.misc = 0; |
923 | m.addr = 0; | 927 | m.addr = 0; |
924 | m.bank = i; | 928 | m.bank = i; |
925 | 929 | ||
926 | m.status = mce_rdmsrl(MSR_IA32_MC0_STATUS + i*4); | 930 | m.status = mce_rdmsrl(MSR_IA32_MCx_STATUS(i)); |
927 | if ((m.status & MCI_STATUS_VAL) == 0) | 931 | if ((m.status & MCI_STATUS_VAL) == 0) |
928 | continue; | 932 | continue; |
929 | 933 | ||
@@ -964,9 +968,9 @@ void do_machine_check(struct pt_regs *regs, long error_code) | |||
964 | kill_it = 1; | 968 | kill_it = 1; |
965 | 969 | ||
966 | if (m.status & MCI_STATUS_MISCV) | 970 | if (m.status & MCI_STATUS_MISCV) |
967 | m.misc = mce_rdmsrl(MSR_IA32_MC0_MISC + i*4); | 971 | m.misc = mce_rdmsrl(MSR_IA32_MCx_MISC(i)); |
968 | if (m.status & MCI_STATUS_ADDRV) | 972 | if (m.status & MCI_STATUS_ADDRV) |
969 | m.addr = mce_rdmsrl(MSR_IA32_MC0_ADDR + i*4); | 973 | m.addr = mce_rdmsrl(MSR_IA32_MCx_ADDR(i)); |
970 | 974 | ||
971 | /* | 975 | /* |
972 | * Action optional error. Queue address for later processing. | 976 | * Action optional error. Queue address for later processing. |
@@ -1091,7 +1095,7 @@ void mce_log_therm_throt_event(__u64 status) | |||
1091 | */ | 1095 | */ |
1092 | static int check_interval = 5 * 60; /* 5 minutes */ | 1096 | static int check_interval = 5 * 60; /* 5 minutes */ |
1093 | 1097 | ||
1094 | static DEFINE_PER_CPU(int, next_interval); /* in jiffies */ | 1098 | static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */ |
1095 | static DEFINE_PER_CPU(struct timer_list, mce_timer); | 1099 | static DEFINE_PER_CPU(struct timer_list, mce_timer); |
1096 | 1100 | ||
1097 | static void mcheck_timer(unsigned long data) | 1101 | static void mcheck_timer(unsigned long data) |
@@ -1110,7 +1114,7 @@ static void mcheck_timer(unsigned long data) | |||
1110 | * Alert userspace if needed. If we logged an MCE, reduce the | 1114 | * Alert userspace if needed. If we logged an MCE, reduce the |
1111 | * polling interval, otherwise increase the polling interval. | 1115 | * polling interval, otherwise increase the polling interval. |
1112 | */ | 1116 | */ |
1113 | n = &__get_cpu_var(next_interval); | 1117 | n = &__get_cpu_var(mce_next_interval); |
1114 | if (mce_notify_irq()) | 1118 | if (mce_notify_irq()) |
1115 | *n = max(*n/2, HZ/100); | 1119 | *n = max(*n/2, HZ/100); |
1116 | else | 1120 | else |
@@ -1159,10 +1163,25 @@ int mce_notify_irq(void) | |||
1159 | } | 1163 | } |
1160 | EXPORT_SYMBOL_GPL(mce_notify_irq); | 1164 | EXPORT_SYMBOL_GPL(mce_notify_irq); |
1161 | 1165 | ||
1166 | static int mce_banks_init(void) | ||
1167 | { | ||
1168 | int i; | ||
1169 | |||
1170 | mce_banks = kzalloc(banks * sizeof(struct mce_bank), GFP_KERNEL); | ||
1171 | if (!mce_banks) | ||
1172 | return -ENOMEM; | ||
1173 | for (i = 0; i < banks; i++) { | ||
1174 | struct mce_bank *b = &mce_banks[i]; | ||
1175 | b->ctl = -1ULL; | ||
1176 | b->init = 1; | ||
1177 | } | ||
1178 | return 0; | ||
1179 | } | ||
1180 | |||
1162 | /* | 1181 | /* |
1163 | * Initialize Machine Checks for a CPU. | 1182 | * Initialize Machine Checks for a CPU. |
1164 | */ | 1183 | */ |
1165 | static int mce_cap_init(void) | 1184 | static int __cpuinit mce_cap_init(void) |
1166 | { | 1185 | { |
1167 | unsigned b; | 1186 | unsigned b; |
1168 | u64 cap; | 1187 | u64 cap; |
@@ -1182,11 +1201,10 @@ static int mce_cap_init(void) | |||
1182 | /* Don't support asymmetric configurations today */ | 1201 | /* Don't support asymmetric configurations today */ |
1183 | WARN_ON(banks != 0 && b != banks); | 1202 | WARN_ON(banks != 0 && b != banks); |
1184 | banks = b; | 1203 | banks = b; |
1185 | if (!bank) { | 1204 | if (!mce_banks) { |
1186 | bank = kmalloc(banks * sizeof(u64), GFP_KERNEL); | 1205 | int err = mce_banks_init(); |
1187 | if (!bank) | 1206 | if (err) |
1188 | return -ENOMEM; | 1207 | return err; |
1189 | memset(bank, 0xff, banks * sizeof(u64)); | ||
1190 | } | 1208 | } |
1191 | 1209 | ||
1192 | /* Use accurate RIP reporting if available. */ | 1210 | /* Use accurate RIP reporting if available. */ |
@@ -1218,15 +1236,16 @@ static void mce_init(void) | |||
1218 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | 1236 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); |
1219 | 1237 | ||
1220 | for (i = 0; i < banks; i++) { | 1238 | for (i = 0; i < banks; i++) { |
1221 | if (skip_bank_init(i)) | 1239 | struct mce_bank *b = &mce_banks[i]; |
1240 | if (!b->init) | ||
1222 | continue; | 1241 | continue; |
1223 | wrmsrl(MSR_IA32_MC0_CTL+4*i, bank[i]); | 1242 | wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); |
1224 | wrmsrl(MSR_IA32_MC0_STATUS+4*i, 0); | 1243 | wrmsrl(MSR_IA32_MCx_STATUS(i), 0); |
1225 | } | 1244 | } |
1226 | } | 1245 | } |
1227 | 1246 | ||
1228 | /* Add per CPU specific workarounds here */ | 1247 | /* Add per CPU specific workarounds here */ |
1229 | static int mce_cpu_quirks(struct cpuinfo_x86 *c) | 1248 | static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c) |
1230 | { | 1249 | { |
1231 | if (c->x86_vendor == X86_VENDOR_UNKNOWN) { | 1250 | if (c->x86_vendor == X86_VENDOR_UNKNOWN) { |
1232 | pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); | 1251 | pr_info("MCE: unknown CPU type - not enabling MCE support.\n"); |
@@ -1241,7 +1260,7 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c) | |||
1241 | * trips off incorrectly with the IOMMU & 3ware | 1260 | * trips off incorrectly with the IOMMU & 3ware |
1242 | * & Cerberus: | 1261 | * & Cerberus: |
1243 | */ | 1262 | */ |
1244 | clear_bit(10, (unsigned long *)&bank[4]); | 1263 | clear_bit(10, (unsigned long *)&mce_banks[4].ctl); |
1245 | } | 1264 | } |
1246 | if (c->x86 <= 17 && mce_bootlog < 0) { | 1265 | if (c->x86 <= 17 && mce_bootlog < 0) { |
1247 | /* | 1266 | /* |
@@ -1255,7 +1274,7 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c) | |||
1255 | * by default. | 1274 | * by default. |
1256 | */ | 1275 | */ |
1257 | if (c->x86 == 6 && banks > 0) | 1276 | if (c->x86 == 6 && banks > 0) |
1258 | bank[0] = 0; | 1277 | mce_banks[0].ctl = 0; |
1259 | } | 1278 | } |
1260 | 1279 | ||
1261 | if (c->x86_vendor == X86_VENDOR_INTEL) { | 1280 | if (c->x86_vendor == X86_VENDOR_INTEL) { |
@@ -1268,8 +1287,8 @@ static int mce_cpu_quirks(struct cpuinfo_x86 *c) | |||
1268 | * valid event later, merely don't write CTL0. | 1287 | * valid event later, merely don't write CTL0. |
1269 | */ | 1288 | */ |
1270 | 1289 | ||
1271 | if (c->x86 == 6 && c->x86_model < 0x1A) | 1290 | if (c->x86 == 6 && c->x86_model < 0x1A && banks > 0) |
1272 | __set_bit(0, &dont_init_banks); | 1291 | mce_banks[0].init = 0; |
1273 | 1292 | ||
1274 | /* | 1293 | /* |
1275 | * All newer Intel systems support MCE broadcasting. Enable | 1294 | * All newer Intel systems support MCE broadcasting. Enable |
@@ -1325,7 +1344,7 @@ static void mce_cpu_features(struct cpuinfo_x86 *c) | |||
1325 | static void mce_init_timer(void) | 1344 | static void mce_init_timer(void) |
1326 | { | 1345 | { |
1327 | struct timer_list *t = &__get_cpu_var(mce_timer); | 1346 | struct timer_list *t = &__get_cpu_var(mce_timer); |
1328 | int *n = &__get_cpu_var(next_interval); | 1347 | int *n = &__get_cpu_var(mce_next_interval); |
1329 | 1348 | ||
1330 | if (mce_ignore_ce) | 1349 | if (mce_ignore_ce) |
1331 | return; | 1350 | return; |
@@ -1338,6 +1357,17 @@ static void mce_init_timer(void) | |||
1338 | add_timer_on(t, smp_processor_id()); | 1357 | add_timer_on(t, smp_processor_id()); |
1339 | } | 1358 | } |
1340 | 1359 | ||
1360 | /* Handle unconfigured int18 (should never happen) */ | ||
1361 | static void unexpected_machine_check(struct pt_regs *regs, long error_code) | ||
1362 | { | ||
1363 | printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", | ||
1364 | smp_processor_id()); | ||
1365 | } | ||
1366 | |||
1367 | /* Call the installed machine check handler for this CPU setup. */ | ||
1368 | void (*machine_check_vector)(struct pt_regs *, long error_code) = | ||
1369 | unexpected_machine_check; | ||
1370 | |||
1341 | /* | 1371 | /* |
1342 | * Called for each booted CPU to set up machine checks. | 1372 | * Called for each booted CPU to set up machine checks. |
1343 | * Must be called with preempt off: | 1373 | * Must be called with preempt off: |
@@ -1551,8 +1581,10 @@ static struct miscdevice mce_log_device = { | |||
1551 | */ | 1581 | */ |
1552 | static int __init mcheck_enable(char *str) | 1582 | static int __init mcheck_enable(char *str) |
1553 | { | 1583 | { |
1554 | if (*str == 0) | 1584 | if (*str == 0) { |
1555 | enable_p5_mce(); | 1585 | enable_p5_mce(); |
1586 | return 1; | ||
1587 | } | ||
1556 | if (*str == '=') | 1588 | if (*str == '=') |
1557 | str++; | 1589 | str++; |
1558 | if (!strcmp(str, "off")) | 1590 | if (!strcmp(str, "off")) |
@@ -1593,8 +1625,9 @@ static int mce_disable(void) | |||
1593 | int i; | 1625 | int i; |
1594 | 1626 | ||
1595 | for (i = 0; i < banks; i++) { | 1627 | for (i = 0; i < banks; i++) { |
1596 | if (!skip_bank_init(i)) | 1628 | struct mce_bank *b = &mce_banks[i]; |
1597 | wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); | 1629 | if (b->init) |
1630 | wrmsrl(MSR_IA32_MCx_CTL(i), 0); | ||
1598 | } | 1631 | } |
1599 | return 0; | 1632 | return 0; |
1600 | } | 1633 | } |
@@ -1669,14 +1702,15 @@ DEFINE_PER_CPU(struct sys_device, mce_dev); | |||
1669 | __cpuinitdata | 1702 | __cpuinitdata |
1670 | void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); | 1703 | void (*threshold_cpu_callback)(unsigned long action, unsigned int cpu); |
1671 | 1704 | ||
1672 | static struct sysdev_attribute *bank_attrs; | 1705 | static inline struct mce_bank *attr_to_bank(struct sysdev_attribute *attr) |
1706 | { | ||
1707 | return container_of(attr, struct mce_bank, attr); | ||
1708 | } | ||
1673 | 1709 | ||
1674 | static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, | 1710 | static ssize_t show_bank(struct sys_device *s, struct sysdev_attribute *attr, |
1675 | char *buf) | 1711 | char *buf) |
1676 | { | 1712 | { |
1677 | u64 b = bank[attr - bank_attrs]; | 1713 | return sprintf(buf, "%llx\n", attr_to_bank(attr)->ctl); |
1678 | |||
1679 | return sprintf(buf, "%llx\n", b); | ||
1680 | } | 1714 | } |
1681 | 1715 | ||
1682 | static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, | 1716 | static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, |
@@ -1687,7 +1721,7 @@ static ssize_t set_bank(struct sys_device *s, struct sysdev_attribute *attr, | |||
1687 | if (strict_strtoull(buf, 0, &new) < 0) | 1721 | if (strict_strtoull(buf, 0, &new) < 0) |
1688 | return -EINVAL; | 1722 | return -EINVAL; |
1689 | 1723 | ||
1690 | bank[attr - bank_attrs] = new; | 1724 | attr_to_bank(attr)->ctl = new; |
1691 | mce_restart(); | 1725 | mce_restart(); |
1692 | 1726 | ||
1693 | return size; | 1727 | return size; |
@@ -1829,7 +1863,7 @@ static __cpuinit int mce_create_device(unsigned int cpu) | |||
1829 | } | 1863 | } |
1830 | for (j = 0; j < banks; j++) { | 1864 | for (j = 0; j < banks; j++) { |
1831 | err = sysdev_create_file(&per_cpu(mce_dev, cpu), | 1865 | err = sysdev_create_file(&per_cpu(mce_dev, cpu), |
1832 | &bank_attrs[j]); | 1866 | &mce_banks[j].attr); |
1833 | if (err) | 1867 | if (err) |
1834 | goto error2; | 1868 | goto error2; |
1835 | } | 1869 | } |
@@ -1838,10 +1872,10 @@ static __cpuinit int mce_create_device(unsigned int cpu) | |||
1838 | return 0; | 1872 | return 0; |
1839 | error2: | 1873 | error2: |
1840 | while (--j >= 0) | 1874 | while (--j >= 0) |
1841 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[j]); | 1875 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[j].attr); |
1842 | error: | 1876 | error: |
1843 | while (--i >= 0) | 1877 | while (--i >= 0) |
1844 | sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); | 1878 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr); |
1845 | 1879 | ||
1846 | sysdev_unregister(&per_cpu(mce_dev, cpu)); | 1880 | sysdev_unregister(&per_cpu(mce_dev, cpu)); |
1847 | 1881 | ||
@@ -1859,7 +1893,7 @@ static __cpuinit void mce_remove_device(unsigned int cpu) | |||
1859 | sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); | 1893 | sysdev_remove_file(&per_cpu(mce_dev, cpu), mce_attrs[i]); |
1860 | 1894 | ||
1861 | for (i = 0; i < banks; i++) | 1895 | for (i = 0; i < banks; i++) |
1862 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &bank_attrs[i]); | 1896 | sysdev_remove_file(&per_cpu(mce_dev, cpu), &mce_banks[i].attr); |
1863 | 1897 | ||
1864 | sysdev_unregister(&per_cpu(mce_dev, cpu)); | 1898 | sysdev_unregister(&per_cpu(mce_dev, cpu)); |
1865 | cpumask_clear_cpu(cpu, mce_dev_initialized); | 1899 | cpumask_clear_cpu(cpu, mce_dev_initialized); |
@@ -1876,8 +1910,9 @@ static void mce_disable_cpu(void *h) | |||
1876 | if (!(action & CPU_TASKS_FROZEN)) | 1910 | if (!(action & CPU_TASKS_FROZEN)) |
1877 | cmci_clear(); | 1911 | cmci_clear(); |
1878 | for (i = 0; i < banks; i++) { | 1912 | for (i = 0; i < banks; i++) { |
1879 | if (!skip_bank_init(i)) | 1913 | struct mce_bank *b = &mce_banks[i]; |
1880 | wrmsrl(MSR_IA32_MC0_CTL + i*4, 0); | 1914 | if (b->init) |
1915 | wrmsrl(MSR_IA32_MCx_CTL(i), 0); | ||
1881 | } | 1916 | } |
1882 | } | 1917 | } |
1883 | 1918 | ||
@@ -1892,8 +1927,9 @@ static void mce_reenable_cpu(void *h) | |||
1892 | if (!(action & CPU_TASKS_FROZEN)) | 1927 | if (!(action & CPU_TASKS_FROZEN)) |
1893 | cmci_reenable(); | 1928 | cmci_reenable(); |
1894 | for (i = 0; i < banks; i++) { | 1929 | for (i = 0; i < banks; i++) { |
1895 | if (!skip_bank_init(i)) | 1930 | struct mce_bank *b = &mce_banks[i]; |
1896 | wrmsrl(MSR_IA32_MC0_CTL + i*4, bank[i]); | 1931 | if (b->init) |
1932 | wrmsrl(MSR_IA32_MCx_CTL(i), b->ctl); | ||
1897 | } | 1933 | } |
1898 | } | 1934 | } |
1899 | 1935 | ||
@@ -1925,7 +1961,7 @@ mce_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) | |||
1925 | case CPU_DOWN_FAILED: | 1961 | case CPU_DOWN_FAILED: |
1926 | case CPU_DOWN_FAILED_FROZEN: | 1962 | case CPU_DOWN_FAILED_FROZEN: |
1927 | t->expires = round_jiffies(jiffies + | 1963 | t->expires = round_jiffies(jiffies + |
1928 | __get_cpu_var(next_interval)); | 1964 | __get_cpu_var(mce_next_interval)); |
1929 | add_timer_on(t, cpu); | 1965 | add_timer_on(t, cpu); |
1930 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); | 1966 | smp_call_function_single(cpu, mce_reenable_cpu, &action, 1); |
1931 | break; | 1967 | break; |
@@ -1941,35 +1977,21 @@ static struct notifier_block mce_cpu_notifier __cpuinitdata = { | |||
1941 | .notifier_call = mce_cpu_callback, | 1977 | .notifier_call = mce_cpu_callback, |
1942 | }; | 1978 | }; |
1943 | 1979 | ||
1944 | static __init int mce_init_banks(void) | 1980 | static __init void mce_init_banks(void) |
1945 | { | 1981 | { |
1946 | int i; | 1982 | int i; |
1947 | 1983 | ||
1948 | bank_attrs = kzalloc(sizeof(struct sysdev_attribute) * banks, | ||
1949 | GFP_KERNEL); | ||
1950 | if (!bank_attrs) | ||
1951 | return -ENOMEM; | ||
1952 | |||
1953 | for (i = 0; i < banks; i++) { | 1984 | for (i = 0; i < banks; i++) { |
1954 | struct sysdev_attribute *a = &bank_attrs[i]; | 1985 | struct mce_bank *b = &mce_banks[i]; |
1986 | struct sysdev_attribute *a = &b->attr; | ||
1955 | 1987 | ||
1956 | a->attr.name = kasprintf(GFP_KERNEL, "bank%d", i); | 1988 | a->attr.name = b->attrname; |
1957 | if (!a->attr.name) | 1989 | snprintf(b->attrname, ATTR_LEN, "bank%d", i); |
1958 | goto nomem; | ||
1959 | 1990 | ||
1960 | a->attr.mode = 0644; | 1991 | a->attr.mode = 0644; |
1961 | a->show = show_bank; | 1992 | a->show = show_bank; |
1962 | a->store = set_bank; | 1993 | a->store = set_bank; |
1963 | } | 1994 | } |
1964 | return 0; | ||
1965 | |||
1966 | nomem: | ||
1967 | while (--i >= 0) | ||
1968 | kfree(bank_attrs[i].attr.name); | ||
1969 | kfree(bank_attrs); | ||
1970 | bank_attrs = NULL; | ||
1971 | |||
1972 | return -ENOMEM; | ||
1973 | } | 1995 | } |
1974 | 1996 | ||
1975 | static __init int mce_init_device(void) | 1997 | static __init int mce_init_device(void) |
@@ -1982,9 +2004,7 @@ static __init int mce_init_device(void) | |||
1982 | 2004 | ||
1983 | zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); | 2005 | zalloc_cpumask_var(&mce_dev_initialized, GFP_KERNEL); |
1984 | 2006 | ||
1985 | err = mce_init_banks(); | 2007 | mce_init_banks(); |
1986 | if (err) | ||
1987 | return err; | ||
1988 | 2008 | ||
1989 | err = sysdev_class_register(&mce_sysclass); | 2009 | err = sysdev_class_register(&mce_sysclass); |
1990 | if (err) | 2010 | if (err) |
@@ -2004,57 +2024,65 @@ static __init int mce_init_device(void) | |||
2004 | 2024 | ||
2005 | device_initcall(mce_init_device); | 2025 | device_initcall(mce_init_device); |
2006 | 2026 | ||
2007 | #else /* CONFIG_X86_OLD_MCE: */ | 2027 | /* |
2008 | 2028 | * Old style boot options parsing. Only for compatibility. | |
2009 | int nr_mce_banks; | 2029 | */ |
2010 | EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */ | 2030 | static int __init mcheck_disable(char *str) |
2031 | { | ||
2032 | mce_disabled = 1; | ||
2033 | return 1; | ||
2034 | } | ||
2035 | __setup("nomce", mcheck_disable); | ||
2011 | 2036 | ||
2012 | /* This has to be run for each processor */ | 2037 | #ifdef CONFIG_DEBUG_FS |
2013 | void mcheck_init(struct cpuinfo_x86 *c) | 2038 | struct dentry *mce_get_debugfs_dir(void) |
2014 | { | 2039 | { |
2015 | if (mce_disabled) | 2040 | static struct dentry *dmce; |
2016 | return; | ||
2017 | 2041 | ||
2018 | switch (c->x86_vendor) { | 2042 | if (!dmce) |
2019 | case X86_VENDOR_AMD: | 2043 | dmce = debugfs_create_dir("mce", NULL); |
2020 | amd_mcheck_init(c); | ||
2021 | break; | ||
2022 | 2044 | ||
2023 | case X86_VENDOR_INTEL: | 2045 | return dmce; |
2024 | if (c->x86 == 5) | 2046 | } |
2025 | intel_p5_mcheck_init(c); | ||
2026 | if (c->x86 == 6) | ||
2027 | intel_p6_mcheck_init(c); | ||
2028 | if (c->x86 == 15) | ||
2029 | intel_p4_mcheck_init(c); | ||
2030 | break; | ||
2031 | 2047 | ||
2032 | case X86_VENDOR_CENTAUR: | 2048 | static void mce_reset(void) |
2033 | if (c->x86 == 5) | 2049 | { |
2034 | winchip_mcheck_init(c); | 2050 | cpu_missing = 0; |
2035 | break; | 2051 | atomic_set(&mce_fake_paniced, 0); |
2052 | atomic_set(&mce_executing, 0); | ||
2053 | atomic_set(&mce_callin, 0); | ||
2054 | atomic_set(&global_nwo, 0); | ||
2055 | } | ||
2036 | 2056 | ||
2037 | default: | 2057 | static int fake_panic_get(void *data, u64 *val) |
2038 | break; | 2058 | { |
2039 | } | 2059 | *val = fake_panic; |
2040 | printk(KERN_INFO "mce: CPU supports %d MCE banks\n", nr_mce_banks); | 2060 | return 0; |
2041 | } | 2061 | } |
2042 | 2062 | ||
2043 | static int __init mcheck_enable(char *str) | 2063 | static int fake_panic_set(void *data, u64 val) |
2044 | { | 2064 | { |
2045 | mce_p5_enabled = 1; | 2065 | mce_reset(); |
2046 | return 1; | 2066 | fake_panic = val; |
2067 | return 0; | ||
2047 | } | 2068 | } |
2048 | __setup("mce", mcheck_enable); | ||
2049 | 2069 | ||
2050 | #endif /* CONFIG_X86_OLD_MCE */ | 2070 | DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get, |
2071 | fake_panic_set, "%llu\n"); | ||
2051 | 2072 | ||
2052 | /* | 2073 | static int __init mce_debugfs_init(void) |
2053 | * Old style boot options parsing. Only for compatibility. | ||
2054 | */ | ||
2055 | static int __init mcheck_disable(char *str) | ||
2056 | { | 2074 | { |
2057 | mce_disabled = 1; | 2075 | struct dentry *dmce, *ffake_panic; |
2058 | return 1; | 2076 | |
2077 | dmce = mce_get_debugfs_dir(); | ||
2078 | if (!dmce) | ||
2079 | return -ENOMEM; | ||
2080 | ffake_panic = debugfs_create_file("fake_panic", 0444, dmce, NULL, | ||
2081 | &fake_panic_fops); | ||
2082 | if (!ffake_panic) | ||
2083 | return -ENOMEM; | ||
2084 | |||
2085 | return 0; | ||
2059 | } | 2086 | } |
2060 | __setup("nomce", mcheck_disable); | 2087 | late_initcall(mce_debugfs_init); |
2088 | #endif | ||
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index ddae21620bda..8cd5224943b5 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c | |||
@@ -69,7 +69,7 @@ struct threshold_bank { | |||
69 | struct threshold_block *blocks; | 69 | struct threshold_block *blocks; |
70 | cpumask_var_t cpus; | 70 | cpumask_var_t cpus; |
71 | }; | 71 | }; |
72 | static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); | 72 | static DEFINE_PER_CPU(struct threshold_bank * [NR_BANKS], threshold_banks); |
73 | 73 | ||
74 | #ifdef CONFIG_SMP | 74 | #ifdef CONFIG_SMP |
75 | static unsigned char shared_bank[NR_BANKS] = { | 75 | static unsigned char shared_bank[NR_BANKS] = { |
@@ -489,12 +489,14 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
489 | int i, err = 0; | 489 | int i, err = 0; |
490 | struct threshold_bank *b = NULL; | 490 | struct threshold_bank *b = NULL; |
491 | char name[32]; | 491 | char name[32]; |
492 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
493 | |||
492 | 494 | ||
493 | sprintf(name, "threshold_bank%i", bank); | 495 | sprintf(name, "threshold_bank%i", bank); |
494 | 496 | ||
495 | #ifdef CONFIG_SMP | 497 | #ifdef CONFIG_SMP |
496 | if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ | 498 | if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ |
497 | i = cpumask_first(cpu_core_mask(cpu)); | 499 | i = cpumask_first(c->llc_shared_map); |
498 | 500 | ||
499 | /* first core not up yet */ | 501 | /* first core not up yet */ |
500 | if (cpu_data(i).cpu_core_id) | 502 | if (cpu_data(i).cpu_core_id) |
@@ -514,7 +516,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
514 | if (err) | 516 | if (err) |
515 | goto out; | 517 | goto out; |
516 | 518 | ||
517 | cpumask_copy(b->cpus, cpu_core_mask(cpu)); | 519 | cpumask_copy(b->cpus, c->llc_shared_map); |
518 | per_cpu(threshold_banks, cpu)[bank] = b; | 520 | per_cpu(threshold_banks, cpu)[bank] = b; |
519 | 521 | ||
520 | goto out; | 522 | goto out; |
@@ -539,7 +541,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) | |||
539 | #ifndef CONFIG_SMP | 541 | #ifndef CONFIG_SMP |
540 | cpumask_setall(b->cpus); | 542 | cpumask_setall(b->cpus); |
541 | #else | 543 | #else |
542 | cpumask_copy(b->cpus, cpu_core_mask(cpu)); | 544 | cpumask_copy(b->cpus, c->llc_shared_map); |
543 | #endif | 545 | #endif |
544 | 546 | ||
545 | per_cpu(threshold_banks, cpu)[bank] = b; | 547 | per_cpu(threshold_banks, cpu)[bank] = b; |
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel.c b/arch/x86/kernel/cpu/mcheck/mce_intel.c index e1acec0f7a32..889f665fe93d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_intel.c +++ b/arch/x86/kernel/cpu/mcheck/mce_intel.c | |||
@@ -90,7 +90,7 @@ static void cmci_discover(int banks, int boot) | |||
90 | if (test_bit(i, owned)) | 90 | if (test_bit(i, owned)) |
91 | continue; | 91 | continue; |
92 | 92 | ||
93 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | 93 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
94 | 94 | ||
95 | /* Already owned by someone else? */ | 95 | /* Already owned by someone else? */ |
96 | if (val & CMCI_EN) { | 96 | if (val & CMCI_EN) { |
@@ -101,8 +101,8 @@ static void cmci_discover(int banks, int boot) | |||
101 | } | 101 | } |
102 | 102 | ||
103 | val |= CMCI_EN | CMCI_THRESHOLD; | 103 | val |= CMCI_EN | CMCI_THRESHOLD; |
104 | wrmsrl(MSR_IA32_MC0_CTL2 + i, val); | 104 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
105 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | 105 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
106 | 106 | ||
107 | /* Did the enable bit stick? -- the bank supports CMCI */ | 107 | /* Did the enable bit stick? -- the bank supports CMCI */ |
108 | if (val & CMCI_EN) { | 108 | if (val & CMCI_EN) { |
@@ -152,9 +152,9 @@ void cmci_clear(void) | |||
152 | if (!test_bit(i, __get_cpu_var(mce_banks_owned))) | 152 | if (!test_bit(i, __get_cpu_var(mce_banks_owned))) |
153 | continue; | 153 | continue; |
154 | /* Disable CMCI */ | 154 | /* Disable CMCI */ |
155 | rdmsrl(MSR_IA32_MC0_CTL2 + i, val); | 155 | rdmsrl(MSR_IA32_MCx_CTL2(i), val); |
156 | val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); | 156 | val &= ~(CMCI_EN|CMCI_THRESHOLD_MASK); |
157 | wrmsrl(MSR_IA32_MC0_CTL2 + i, val); | 157 | wrmsrl(MSR_IA32_MCx_CTL2(i), val); |
158 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); | 158 | __clear_bit(i, __get_cpu_var(mce_banks_owned)); |
159 | } | 159 | } |
160 | spin_unlock_irqrestore(&cmci_discover_lock, flags); | 160 | spin_unlock_irqrestore(&cmci_discover_lock, flags); |
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c deleted file mode 100644 index f5f2d6f71fb6..000000000000 --- a/arch/x86/kernel/cpu/mcheck/non-fatal.c +++ /dev/null | |||
@@ -1,94 +0,0 @@ | |||
1 | /* | ||
2 | * Non Fatal Machine Check Exception Reporting | ||
3 | * | ||
4 | * (C) Copyright 2002 Dave Jones. <davej@redhat.com> | ||
5 | * | ||
6 | * This file contains routines to check for non-fatal MCEs every 15s | ||
7 | * | ||
8 | */ | ||
9 | #include <linux/interrupt.h> | ||
10 | #include <linux/workqueue.h> | ||
11 | #include <linux/jiffies.h> | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/module.h> | ||
14 | #include <linux/types.h> | ||
15 | #include <linux/init.h> | ||
16 | #include <linux/smp.h> | ||
17 | |||
18 | #include <asm/processor.h> | ||
19 | #include <asm/system.h> | ||
20 | #include <asm/mce.h> | ||
21 | #include <asm/msr.h> | ||
22 | |||
23 | static int firstbank; | ||
24 | |||
25 | #define MCE_RATE (15*HZ) /* timer rate is 15s */ | ||
26 | |||
27 | static void mce_checkregs(void *info) | ||
28 | { | ||
29 | u32 low, high; | ||
30 | int i; | ||
31 | |||
32 | for (i = firstbank; i < nr_mce_banks; i++) { | ||
33 | rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); | ||
34 | |||
35 | if (!(high & (1<<31))) | ||
36 | continue; | ||
37 | |||
38 | printk(KERN_INFO "MCE: The hardware reports a non fatal, " | ||
39 | "correctable incident occurred on CPU %d.\n", | ||
40 | smp_processor_id()); | ||
41 | |||
42 | printk(KERN_INFO "Bank %d: %08x%08x\n", i, high, low); | ||
43 | |||
44 | /* | ||
45 | * Scrub the error so we don't pick it up in MCE_RATE | ||
46 | * seconds time: | ||
47 | */ | ||
48 | wrmsr(MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL); | ||
49 | |||
50 | /* Serialize: */ | ||
51 | wmb(); | ||
52 | add_taint(TAINT_MACHINE_CHECK); | ||
53 | } | ||
54 | } | ||
55 | |||
56 | static void mce_work_fn(struct work_struct *work); | ||
57 | static DECLARE_DELAYED_WORK(mce_work, mce_work_fn); | ||
58 | |||
59 | static void mce_work_fn(struct work_struct *work) | ||
60 | { | ||
61 | on_each_cpu(mce_checkregs, NULL, 1); | ||
62 | schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); | ||
63 | } | ||
64 | |||
65 | static int __init init_nonfatal_mce_checker(void) | ||
66 | { | ||
67 | struct cpuinfo_x86 *c = &boot_cpu_data; | ||
68 | |||
69 | /* Check for MCE support */ | ||
70 | if (!cpu_has(c, X86_FEATURE_MCE)) | ||
71 | return -ENODEV; | ||
72 | |||
73 | /* Check for PPro style MCA */ | ||
74 | if (!cpu_has(c, X86_FEATURE_MCA)) | ||
75 | return -ENODEV; | ||
76 | |||
77 | /* Some Athlons misbehave when we frob bank 0 */ | ||
78 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && | ||
79 | boot_cpu_data.x86 == 6) | ||
80 | firstbank = 1; | ||
81 | else | ||
82 | firstbank = 0; | ||
83 | |||
84 | /* | ||
85 | * Check for non-fatal errors every MCE_RATE s | ||
86 | */ | ||
87 | schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); | ||
88 | printk(KERN_INFO "Machine check exception polling timer started.\n"); | ||
89 | |||
90 | return 0; | ||
91 | } | ||
92 | module_init(init_nonfatal_mce_checker); | ||
93 | |||
94 | MODULE_LICENSE("GPL"); | ||
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c deleted file mode 100644 index 4482aea9aa2e..000000000000 --- a/arch/x86/kernel/cpu/mcheck/p4.c +++ /dev/null | |||
@@ -1,163 +0,0 @@ | |||
1 | /* | ||
2 | * P4 specific Machine Check Exception Reporting | ||
3 | */ | ||
4 | #include <linux/kernel.h> | ||
5 | #include <linux/types.h> | ||
6 | #include <linux/init.h> | ||
7 | #include <linux/smp.h> | ||
8 | |||
9 | #include <asm/processor.h> | ||
10 | #include <asm/mce.h> | ||
11 | #include <asm/msr.h> | ||
12 | |||
13 | /* as supported by the P4/Xeon family */ | ||
14 | struct intel_mce_extended_msrs { | ||
15 | u32 eax; | ||
16 | u32 ebx; | ||
17 | u32 ecx; | ||
18 | u32 edx; | ||
19 | u32 esi; | ||
20 | u32 edi; | ||
21 | u32 ebp; | ||
22 | u32 esp; | ||
23 | u32 eflags; | ||
24 | u32 eip; | ||
25 | /* u32 *reserved[]; */ | ||
26 | }; | ||
27 | |||
28 | static int mce_num_extended_msrs; | ||
29 | |||
30 | /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ | ||
31 | static void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) | ||
32 | { | ||
33 | u32 h; | ||
34 | |||
35 | rdmsr(MSR_IA32_MCG_EAX, r->eax, h); | ||
36 | rdmsr(MSR_IA32_MCG_EBX, r->ebx, h); | ||
37 | rdmsr(MSR_IA32_MCG_ECX, r->ecx, h); | ||
38 | rdmsr(MSR_IA32_MCG_EDX, r->edx, h); | ||
39 | rdmsr(MSR_IA32_MCG_ESI, r->esi, h); | ||
40 | rdmsr(MSR_IA32_MCG_EDI, r->edi, h); | ||
41 | rdmsr(MSR_IA32_MCG_EBP, r->ebp, h); | ||
42 | rdmsr(MSR_IA32_MCG_ESP, r->esp, h); | ||
43 | rdmsr(MSR_IA32_MCG_EFLAGS, r->eflags, h); | ||
44 | rdmsr(MSR_IA32_MCG_EIP, r->eip, h); | ||
45 | } | ||
46 | |||
47 | static void intel_machine_check(struct pt_regs *regs, long error_code) | ||
48 | { | ||
49 | u32 alow, ahigh, high, low; | ||
50 | u32 mcgstl, mcgsth; | ||
51 | int recover = 1; | ||
52 | int i; | ||
53 | |||
54 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | ||
55 | if (mcgstl & (1<<0)) /* Recoverable ? */ | ||
56 | recover = 0; | ||
57 | |||
58 | printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", | ||
59 | smp_processor_id(), mcgsth, mcgstl); | ||
60 | |||
61 | if (mce_num_extended_msrs > 0) { | ||
62 | struct intel_mce_extended_msrs dbg; | ||
63 | |||
64 | intel_get_extended_msrs(&dbg); | ||
65 | |||
66 | printk(KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n" | ||
67 | "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n" | ||
68 | "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", | ||
69 | smp_processor_id(), dbg.eip, dbg.eflags, | ||
70 | dbg.eax, dbg.ebx, dbg.ecx, dbg.edx, | ||
71 | dbg.esi, dbg.edi, dbg.ebp, dbg.esp); | ||
72 | } | ||
73 | |||
74 | for (i = 0; i < nr_mce_banks; i++) { | ||
75 | rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); | ||
76 | if (high & (1<<31)) { | ||
77 | char misc[20]; | ||
78 | char addr[24]; | ||
79 | |||
80 | misc[0] = addr[0] = '\0'; | ||
81 | if (high & (1<<29)) | ||
82 | recover |= 1; | ||
83 | if (high & (1<<25)) | ||
84 | recover |= 2; | ||
85 | high &= ~(1<<31); | ||
86 | if (high & (1<<27)) { | ||
87 | rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); | ||
88 | snprintf(misc, 20, "[%08x%08x]", ahigh, alow); | ||
89 | } | ||
90 | if (high & (1<<26)) { | ||
91 | rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); | ||
92 | snprintf(addr, 24, " at %08x%08x", ahigh, alow); | ||
93 | } | ||
94 | printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", | ||
95 | smp_processor_id(), i, high, low, misc, addr); | ||
96 | } | ||
97 | } | ||
98 | |||
99 | if (recover & 2) | ||
100 | panic("CPU context corrupt"); | ||
101 | if (recover & 1) | ||
102 | panic("Unable to continue"); | ||
103 | |||
104 | printk(KERN_EMERG "Attempting to continue.\n"); | ||
105 | |||
106 | /* | ||
107 | * Do not clear the MSR_IA32_MCi_STATUS if the error is not | ||
108 | * recoverable/continuable.This will allow BIOS to look at the MSRs | ||
109 | * for errors if the OS could not log the error. | ||
110 | */ | ||
111 | for (i = 0; i < nr_mce_banks; i++) { | ||
112 | u32 msr; | ||
113 | msr = MSR_IA32_MC0_STATUS+i*4; | ||
114 | rdmsr(msr, low, high); | ||
115 | if (high&(1<<31)) { | ||
116 | /* Clear it */ | ||
117 | wrmsr(msr, 0UL, 0UL); | ||
118 | /* Serialize */ | ||
119 | wmb(); | ||
120 | add_taint(TAINT_MACHINE_CHECK); | ||
121 | } | ||
122 | } | ||
123 | mcgstl &= ~(1<<2); | ||
124 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | ||
125 | } | ||
126 | |||
127 | void intel_p4_mcheck_init(struct cpuinfo_x86 *c) | ||
128 | { | ||
129 | u32 l, h; | ||
130 | int i; | ||
131 | |||
132 | machine_check_vector = intel_machine_check; | ||
133 | wmb(); | ||
134 | |||
135 | printk(KERN_INFO "Intel machine check architecture supported.\n"); | ||
136 | rdmsr(MSR_IA32_MCG_CAP, l, h); | ||
137 | if (l & (1<<8)) /* Control register present ? */ | ||
138 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | ||
139 | nr_mce_banks = l & 0xff; | ||
140 | |||
141 | for (i = 0; i < nr_mce_banks; i++) { | ||
142 | wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); | ||
143 | wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); | ||
144 | } | ||
145 | |||
146 | set_in_cr4(X86_CR4_MCE); | ||
147 | printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", | ||
148 | smp_processor_id()); | ||
149 | |||
150 | /* Check for P4/Xeon extended MCE MSRs */ | ||
151 | rdmsr(MSR_IA32_MCG_CAP, l, h); | ||
152 | if (l & (1<<9)) {/* MCG_EXT_P */ | ||
153 | mce_num_extended_msrs = (l >> 16) & 0xff; | ||
154 | printk(KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)" | ||
155 | " available\n", | ||
156 | smp_processor_id(), mce_num_extended_msrs); | ||
157 | |||
158 | #ifdef CONFIG_X86_MCE_P4THERMAL | ||
159 | /* Check for P4/Xeon Thermal monitor */ | ||
160 | intel_init_thermal(c); | ||
161 | #endif | ||
162 | } | ||
163 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/p6.c b/arch/x86/kernel/cpu/mcheck/p6.c deleted file mode 100644 index 01e4f8178183..000000000000 --- a/arch/x86/kernel/cpu/mcheck/p6.c +++ /dev/null | |||
@@ -1,127 +0,0 @@ | |||
1 | /* | ||
2 | * P6 specific Machine Check Exception Reporting | ||
3 | * (C) Copyright 2002 Alan Cox <alan@lxorguk.ukuu.org.uk> | ||
4 | */ | ||
5 | #include <linux/interrupt.h> | ||
6 | #include <linux/kernel.h> | ||
7 | #include <linux/types.h> | ||
8 | #include <linux/init.h> | ||
9 | #include <linux/smp.h> | ||
10 | |||
11 | #include <asm/processor.h> | ||
12 | #include <asm/system.h> | ||
13 | #include <asm/mce.h> | ||
14 | #include <asm/msr.h> | ||
15 | |||
16 | /* Machine Check Handler For PII/PIII */ | ||
17 | static void intel_machine_check(struct pt_regs *regs, long error_code) | ||
18 | { | ||
19 | u32 alow, ahigh, high, low; | ||
20 | u32 mcgstl, mcgsth; | ||
21 | int recover = 1; | ||
22 | int i; | ||
23 | |||
24 | rdmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | ||
25 | if (mcgstl & (1<<0)) /* Recoverable ? */ | ||
26 | recover = 0; | ||
27 | |||
28 | printk(KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", | ||
29 | smp_processor_id(), mcgsth, mcgstl); | ||
30 | |||
31 | for (i = 0; i < nr_mce_banks; i++) { | ||
32 | rdmsr(MSR_IA32_MC0_STATUS+i*4, low, high); | ||
33 | if (high & (1<<31)) { | ||
34 | char misc[20]; | ||
35 | char addr[24]; | ||
36 | |||
37 | misc[0] = '\0'; | ||
38 | addr[0] = '\0'; | ||
39 | |||
40 | if (high & (1<<29)) | ||
41 | recover |= 1; | ||
42 | if (high & (1<<25)) | ||
43 | recover |= 2; | ||
44 | high &= ~(1<<31); | ||
45 | |||
46 | if (high & (1<<27)) { | ||
47 | rdmsr(MSR_IA32_MC0_MISC+i*4, alow, ahigh); | ||
48 | snprintf(misc, 20, "[%08x%08x]", ahigh, alow); | ||
49 | } | ||
50 | if (high & (1<<26)) { | ||
51 | rdmsr(MSR_IA32_MC0_ADDR+i*4, alow, ahigh); | ||
52 | snprintf(addr, 24, " at %08x%08x", ahigh, alow); | ||
53 | } | ||
54 | |||
55 | printk(KERN_EMERG "CPU %d: Bank %d: %08x%08x%s%s\n", | ||
56 | smp_processor_id(), i, high, low, misc, addr); | ||
57 | } | ||
58 | } | ||
59 | |||
60 | if (recover & 2) | ||
61 | panic("CPU context corrupt"); | ||
62 | if (recover & 1) | ||
63 | panic("Unable to continue"); | ||
64 | |||
65 | printk(KERN_EMERG "Attempting to continue.\n"); | ||
66 | /* | ||
67 | * Do not clear the MSR_IA32_MCi_STATUS if the error is not | ||
68 | * recoverable/continuable.This will allow BIOS to look at the MSRs | ||
69 | * for errors if the OS could not log the error: | ||
70 | */ | ||
71 | for (i = 0; i < nr_mce_banks; i++) { | ||
72 | unsigned int msr; | ||
73 | |||
74 | msr = MSR_IA32_MC0_STATUS+i*4; | ||
75 | rdmsr(msr, low, high); | ||
76 | if (high & (1<<31)) { | ||
77 | /* Clear it: */ | ||
78 | wrmsr(msr, 0UL, 0UL); | ||
79 | /* Serialize: */ | ||
80 | wmb(); | ||
81 | add_taint(TAINT_MACHINE_CHECK); | ||
82 | } | ||
83 | } | ||
84 | mcgstl &= ~(1<<2); | ||
85 | wrmsr(MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | ||
86 | } | ||
87 | |||
88 | /* Set up machine check reporting for processors with Intel style MCE: */ | ||
89 | void intel_p6_mcheck_init(struct cpuinfo_x86 *c) | ||
90 | { | ||
91 | u32 l, h; | ||
92 | int i; | ||
93 | |||
94 | /* Check for MCE support */ | ||
95 | if (!cpu_has(c, X86_FEATURE_MCE)) | ||
96 | return; | ||
97 | |||
98 | /* Check for PPro style MCA */ | ||
99 | if (!cpu_has(c, X86_FEATURE_MCA)) | ||
100 | return; | ||
101 | |||
102 | /* Ok machine check is available */ | ||
103 | machine_check_vector = intel_machine_check; | ||
104 | /* Make sure the vector pointer is visible before we enable MCEs: */ | ||
105 | wmb(); | ||
106 | |||
107 | printk(KERN_INFO "Intel machine check architecture supported.\n"); | ||
108 | rdmsr(MSR_IA32_MCG_CAP, l, h); | ||
109 | if (l & (1<<8)) /* Control register present ? */ | ||
110 | wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff); | ||
111 | nr_mce_banks = l & 0xff; | ||
112 | |||
113 | /* | ||
114 | * Following the example in IA-32 SDM Vol 3: | ||
115 | * - MC0_CTL should not be written | ||
116 | * - Status registers on all banks should be cleared on reset | ||
117 | */ | ||
118 | for (i = 1; i < nr_mce_banks; i++) | ||
119 | wrmsr(MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); | ||
120 | |||
121 | for (i = 0; i < nr_mce_banks; i++) | ||
122 | wrmsr(MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); | ||
123 | |||
124 | set_in_cr4(X86_CR4_MCE); | ||
125 | printk(KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n", | ||
126 | smp_processor_id()); | ||
127 | } | ||
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 5957a93e5173..63a56d147e4a 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -260,9 +260,6 @@ void intel_init_thermal(struct cpuinfo_x86 *c) | |||
260 | return; | 260 | return; |
261 | } | 261 | } |
262 | 262 | ||
263 | if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2)) | ||
264 | tm2 = 1; | ||
265 | |||
266 | /* Check whether a vector already exists */ | 263 | /* Check whether a vector already exists */ |
267 | if (h & APIC_VECTOR_MASK) { | 264 | if (h & APIC_VECTOR_MASK) { |
268 | printk(KERN_DEBUG | 265 | printk(KERN_DEBUG |
@@ -271,6 +268,16 @@ void intel_init_thermal(struct cpuinfo_x86 *c) | |||
271 | return; | 268 | return; |
272 | } | 269 | } |
273 | 270 | ||
271 | /* early Pentium M models use different method for enabling TM2 */ | ||
272 | if (cpu_has(c, X86_FEATURE_TM2)) { | ||
273 | if (c->x86 == 6 && (c->x86_model == 9 || c->x86_model == 13)) { | ||
274 | rdmsr(MSR_THERM2_CTL, l, h); | ||
275 | if (l & MSR_THERM2_CTL_TM_SELECT) | ||
276 | tm2 = 1; | ||
277 | } else if (l & MSR_IA32_MISC_ENABLE_TM2) | ||
278 | tm2 = 1; | ||
279 | } | ||
280 | |||
274 | /* We'll mask the thermal vector in the lapic till we're ready: */ | 281 | /* We'll mask the thermal vector in the lapic till we're ready: */ |
275 | h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; | 282 | h = THERMAL_APIC_VECTOR | APIC_DM_FIXED | APIC_LVT_MASKED; |
276 | apic_write(APIC_LVTTHMR, h); | 283 | apic_write(APIC_LVTTHMR, h); |
diff --git a/arch/x86/kernel/cpu/mtrr/amd.c b/arch/x86/kernel/cpu/mtrr/amd.c index ee2331b0e58f..33af14110dfd 100644 --- a/arch/x86/kernel/cpu/mtrr/amd.c +++ b/arch/x86/kernel/cpu/mtrr/amd.c | |||
@@ -7,15 +7,15 @@ | |||
7 | 7 | ||
8 | static void | 8 | static void |
9 | amd_get_mtrr(unsigned int reg, unsigned long *base, | 9 | amd_get_mtrr(unsigned int reg, unsigned long *base, |
10 | unsigned long *size, mtrr_type * type) | 10 | unsigned long *size, mtrr_type *type) |
11 | { | 11 | { |
12 | unsigned long low, high; | 12 | unsigned long low, high; |
13 | 13 | ||
14 | rdmsr(MSR_K6_UWCCR, low, high); | 14 | rdmsr(MSR_K6_UWCCR, low, high); |
15 | /* Upper dword is region 1, lower is region 0 */ | 15 | /* Upper dword is region 1, lower is region 0 */ |
16 | if (reg == 1) | 16 | if (reg == 1) |
17 | low = high; | 17 | low = high; |
18 | /* The base masks off on the right alignment */ | 18 | /* The base masks off on the right alignment */ |
19 | *base = (low & 0xFFFE0000) >> PAGE_SHIFT; | 19 | *base = (low & 0xFFFE0000) >> PAGE_SHIFT; |
20 | *type = 0; | 20 | *type = 0; |
21 | if (low & 1) | 21 | if (low & 1) |
@@ -27,74 +27,81 @@ amd_get_mtrr(unsigned int reg, unsigned long *base, | |||
27 | return; | 27 | return; |
28 | } | 28 | } |
29 | /* | 29 | /* |
30 | * This needs a little explaining. The size is stored as an | 30 | * This needs a little explaining. The size is stored as an |
31 | * inverted mask of bits of 128K granularity 15 bits long offset | 31 | * inverted mask of bits of 128K granularity 15 bits long offset |
32 | * 2 bits | 32 | * 2 bits. |
33 | * | 33 | * |
34 | * So to get a size we do invert the mask and add 1 to the lowest | 34 | * So to get a size we do invert the mask and add 1 to the lowest |
35 | * mask bit (4 as its 2 bits in). This gives us a size we then shift | 35 | * mask bit (4 as its 2 bits in). This gives us a size we then shift |
36 | * to turn into 128K blocks | 36 | * to turn into 128K blocks. |
37 | * | 37 | * |
38 | * eg 111 1111 1111 1100 is 512K | 38 | * eg 111 1111 1111 1100 is 512K |
39 | * | 39 | * |
40 | * invert 000 0000 0000 0011 | 40 | * invert 000 0000 0000 0011 |
41 | * +1 000 0000 0000 0100 | 41 | * +1 000 0000 0000 0100 |
42 | * *128K ... | 42 | * *128K ... |
43 | */ | 43 | */ |
44 | low = (~low) & 0x1FFFC; | 44 | low = (~low) & 0x1FFFC; |
45 | *size = (low + 4) << (15 - PAGE_SHIFT); | 45 | *size = (low + 4) << (15 - PAGE_SHIFT); |
46 | return; | ||
47 | } | 46 | } |
48 | 47 | ||
49 | static void amd_set_mtrr(unsigned int reg, unsigned long base, | 48 | /** |
50 | unsigned long size, mtrr_type type) | 49 | * amd_set_mtrr - Set variable MTRR register on the local CPU. |
51 | /* [SUMMARY] Set variable MTRR register on the local CPU. | 50 | * |
52 | <reg> The register to set. | 51 | * @reg The register to set. |
53 | <base> The base address of the region. | 52 | * @base The base address of the region. |
54 | <size> The size of the region. If this is 0 the region is disabled. | 53 | * @size The size of the region. If this is 0 the region is disabled. |
55 | <type> The type of the region. | 54 | * @type The type of the region. |
56 | [RETURNS] Nothing. | 55 | * |
57 | */ | 56 | * Returns nothing. |
57 | */ | ||
58 | static void | ||
59 | amd_set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type) | ||
58 | { | 60 | { |
59 | u32 regs[2]; | 61 | u32 regs[2]; |
60 | 62 | ||
61 | /* | 63 | /* |
62 | * Low is MTRR0 , High MTRR 1 | 64 | * Low is MTRR0, High MTRR 1 |
63 | */ | 65 | */ |
64 | rdmsr(MSR_K6_UWCCR, regs[0], regs[1]); | 66 | rdmsr(MSR_K6_UWCCR, regs[0], regs[1]); |
65 | /* | 67 | /* |
66 | * Blank to disable | 68 | * Blank to disable |
67 | */ | 69 | */ |
68 | if (size == 0) | 70 | if (size == 0) { |
69 | regs[reg] = 0; | 71 | regs[reg] = 0; |
70 | else | 72 | } else { |
71 | /* Set the register to the base, the type (off by one) and an | 73 | /* |
72 | inverted bitmask of the size The size is the only odd | 74 | * Set the register to the base, the type (off by one) and an |
73 | bit. We are fed say 512K We invert this and we get 111 1111 | 75 | * inverted bitmask of the size The size is the only odd |
74 | 1111 1011 but if you subtract one and invert you get the | 76 | * bit. We are fed say 512K We invert this and we get 111 1111 |
75 | desired 111 1111 1111 1100 mask | 77 | * 1111 1011 but if you subtract one and invert you get the |
76 | 78 | * desired 111 1111 1111 1100 mask | |
77 | But ~(x - 1) == ~x + 1 == -x. Two's complement rocks! */ | 79 | * |
80 | * But ~(x - 1) == ~x + 1 == -x. Two's complement rocks! | ||
81 | */ | ||
78 | regs[reg] = (-size >> (15 - PAGE_SHIFT) & 0x0001FFFC) | 82 | regs[reg] = (-size >> (15 - PAGE_SHIFT) & 0x0001FFFC) |
79 | | (base << PAGE_SHIFT) | (type + 1); | 83 | | (base << PAGE_SHIFT) | (type + 1); |
84 | } | ||
80 | 85 | ||
81 | /* | 86 | /* |
82 | * The writeback rule is quite specific. See the manual. Its | 87 | * The writeback rule is quite specific. See the manual. Its |
83 | * disable local interrupts, write back the cache, set the mtrr | 88 | * disable local interrupts, write back the cache, set the mtrr |
84 | */ | 89 | */ |
85 | wbinvd(); | 90 | wbinvd(); |
86 | wrmsr(MSR_K6_UWCCR, regs[0], regs[1]); | 91 | wrmsr(MSR_K6_UWCCR, regs[0], regs[1]); |
87 | } | 92 | } |
88 | 93 | ||
89 | static int amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type) | 94 | static int |
95 | amd_validate_add_page(unsigned long base, unsigned long size, unsigned int type) | ||
90 | { | 96 | { |
91 | /* Apply the K6 block alignment and size rules | 97 | /* |
92 | In order | 98 | * Apply the K6 block alignment and size rules |
93 | o Uncached or gathering only | 99 | * In order |
94 | o 128K or bigger block | 100 | * o Uncached or gathering only |
95 | o Power of 2 block | 101 | * o 128K or bigger block |
96 | o base suitably aligned to the power | 102 | * o Power of 2 block |
97 | */ | 103 | * o base suitably aligned to the power |
104 | */ | ||
98 | if (type > MTRR_TYPE_WRCOMB || size < (1 << (17 - PAGE_SHIFT)) | 105 | if (type > MTRR_TYPE_WRCOMB || size < (1 << (17 - PAGE_SHIFT)) |
99 | || (size & ~(size - 1)) - size || (base & (size - 1))) | 106 | || (size & ~(size - 1)) - size || (base & (size - 1))) |
100 | return -EINVAL; | 107 | return -EINVAL; |
@@ -115,5 +122,3 @@ int __init amd_init_mtrr(void) | |||
115 | set_mtrr_ops(&amd_mtrr_ops); | 122 | set_mtrr_ops(&amd_mtrr_ops); |
116 | return 0; | 123 | return 0; |
117 | } | 124 | } |
118 | |||
119 | //arch_initcall(amd_mtrr_init); | ||
diff --git a/arch/x86/kernel/cpu/mtrr/centaur.c b/arch/x86/kernel/cpu/mtrr/centaur.c index cb9aa3a7a7ab..de89f14eff3a 100644 --- a/arch/x86/kernel/cpu/mtrr/centaur.c +++ b/arch/x86/kernel/cpu/mtrr/centaur.c | |||
@@ -1,7 +1,9 @@ | |||
1 | #include <linux/init.h> | 1 | #include <linux/init.h> |
2 | #include <linux/mm.h> | 2 | #include <linux/mm.h> |
3 | |||
3 | #include <asm/mtrr.h> | 4 | #include <asm/mtrr.h> |
4 | #include <asm/msr.h> | 5 | #include <asm/msr.h> |
6 | |||
5 | #include "mtrr.h" | 7 | #include "mtrr.h" |
6 | 8 | ||
7 | static struct { | 9 | static struct { |
@@ -12,25 +14,25 @@ static struct { | |||
12 | static u8 centaur_mcr_reserved; | 14 | static u8 centaur_mcr_reserved; |
13 | static u8 centaur_mcr_type; /* 0 for winchip, 1 for winchip2 */ | 15 | static u8 centaur_mcr_type; /* 0 for winchip, 1 for winchip2 */ |
14 | 16 | ||
15 | /* | 17 | /** |
16 | * Report boot time MCR setups | 18 | * centaur_get_free_region - Get a free MTRR. |
19 | * | ||
20 | * @base: The starting (base) address of the region. | ||
21 | * @size: The size (in bytes) of the region. | ||
22 | * | ||
23 | * Returns: the index of the region on success, else -1 on error. | ||
17 | */ | 24 | */ |
18 | |||
19 | static int | 25 | static int |
20 | centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg) | 26 | centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg) |
21 | /* [SUMMARY] Get a free MTRR. | ||
22 | <base> The starting (base) address of the region. | ||
23 | <size> The size (in bytes) of the region. | ||
24 | [RETURNS] The index of the region on success, else -1 on error. | ||
25 | */ | ||
26 | { | 27 | { |
27 | int i, max; | ||
28 | mtrr_type ltype; | ||
29 | unsigned long lbase, lsize; | 28 | unsigned long lbase, lsize; |
29 | mtrr_type ltype; | ||
30 | int i, max; | ||
30 | 31 | ||
31 | max = num_var_ranges; | 32 | max = num_var_ranges; |
32 | if (replace_reg >= 0 && replace_reg < max) | 33 | if (replace_reg >= 0 && replace_reg < max) |
33 | return replace_reg; | 34 | return replace_reg; |
35 | |||
34 | for (i = 0; i < max; ++i) { | 36 | for (i = 0; i < max; ++i) { |
35 | if (centaur_mcr_reserved & (1 << i)) | 37 | if (centaur_mcr_reserved & (1 << i)) |
36 | continue; | 38 | continue; |
@@ -38,11 +40,14 @@ centaur_get_free_region(unsigned long base, unsigned long size, int replace_reg) | |||
38 | if (lsize == 0) | 40 | if (lsize == 0) |
39 | return i; | 41 | return i; |
40 | } | 42 | } |
43 | |||
41 | return -ENOSPC; | 44 | return -ENOSPC; |
42 | } | 45 | } |
43 | 46 | ||
44 | void | 47 | /* |
45 | mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) | 48 | * Report boot time MCR setups |
49 | */ | ||
50 | void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi) | ||
46 | { | 51 | { |
47 | centaur_mcr[mcr].low = lo; | 52 | centaur_mcr[mcr].low = lo; |
48 | centaur_mcr[mcr].high = hi; | 53 | centaur_mcr[mcr].high = hi; |
@@ -54,33 +59,35 @@ centaur_get_mcr(unsigned int reg, unsigned long *base, | |||
54 | { | 59 | { |
55 | *base = centaur_mcr[reg].high >> PAGE_SHIFT; | 60 | *base = centaur_mcr[reg].high >> PAGE_SHIFT; |
56 | *size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT; | 61 | *size = -(centaur_mcr[reg].low & 0xfffff000) >> PAGE_SHIFT; |
57 | *type = MTRR_TYPE_WRCOMB; /* If it is there, it is write-combining */ | 62 | *type = MTRR_TYPE_WRCOMB; /* write-combining */ |
63 | |||
58 | if (centaur_mcr_type == 1 && ((centaur_mcr[reg].low & 31) & 2)) | 64 | if (centaur_mcr_type == 1 && ((centaur_mcr[reg].low & 31) & 2)) |
59 | *type = MTRR_TYPE_UNCACHABLE; | 65 | *type = MTRR_TYPE_UNCACHABLE; |
60 | if (centaur_mcr_type == 1 && (centaur_mcr[reg].low & 31) == 25) | 66 | if (centaur_mcr_type == 1 && (centaur_mcr[reg].low & 31) == 25) |
61 | *type = MTRR_TYPE_WRBACK; | 67 | *type = MTRR_TYPE_WRBACK; |
62 | if (centaur_mcr_type == 0 && (centaur_mcr[reg].low & 31) == 31) | 68 | if (centaur_mcr_type == 0 && (centaur_mcr[reg].low & 31) == 31) |
63 | *type = MTRR_TYPE_WRBACK; | 69 | *type = MTRR_TYPE_WRBACK; |
64 | |||
65 | } | 70 | } |
66 | 71 | ||
67 | static void centaur_set_mcr(unsigned int reg, unsigned long base, | 72 | static void |
68 | unsigned long size, mtrr_type type) | 73 | centaur_set_mcr(unsigned int reg, unsigned long base, |
74 | unsigned long size, mtrr_type type) | ||
69 | { | 75 | { |
70 | unsigned long low, high; | 76 | unsigned long low, high; |
71 | 77 | ||
72 | if (size == 0) { | 78 | if (size == 0) { |
73 | /* Disable */ | 79 | /* Disable */ |
74 | high = low = 0; | 80 | high = low = 0; |
75 | } else { | 81 | } else { |
76 | high = base << PAGE_SHIFT; | 82 | high = base << PAGE_SHIFT; |
77 | if (centaur_mcr_type == 0) | 83 | if (centaur_mcr_type == 0) { |
78 | low = -size << PAGE_SHIFT | 0x1f; /* only support write-combining... */ | 84 | /* Only support write-combining... */ |
79 | else { | 85 | low = -size << PAGE_SHIFT | 0x1f; |
86 | } else { | ||
80 | if (type == MTRR_TYPE_UNCACHABLE) | 87 | if (type == MTRR_TYPE_UNCACHABLE) |
81 | low = -size << PAGE_SHIFT | 0x02; /* NC */ | 88 | low = -size << PAGE_SHIFT | 0x02; /* NC */ |
82 | else | 89 | else |
83 | low = -size << PAGE_SHIFT | 0x09; /* WWO,WC */ | 90 | low = -size << PAGE_SHIFT | 0x09; /* WWO, WC */ |
84 | } | 91 | } |
85 | } | 92 | } |
86 | centaur_mcr[reg].high = high; | 93 | centaur_mcr[reg].high = high; |
@@ -88,118 +95,16 @@ static void centaur_set_mcr(unsigned int reg, unsigned long base, | |||
88 | wrmsr(MSR_IDT_MCR0 + reg, low, high); | 95 | wrmsr(MSR_IDT_MCR0 + reg, low, high); |
89 | } | 96 | } |
90 | 97 | ||
91 | #if 0 | 98 | static int |
92 | /* | 99 | centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int type) |
93 | * Initialise the later (saner) Winchip MCR variant. In this version | ||
94 | * the BIOS can pass us the registers it has used (but not their values) | ||
95 | * and the control register is read/write | ||
96 | */ | ||
97 | |||
98 | static void __init | ||
99 | centaur_mcr1_init(void) | ||
100 | { | ||
101 | unsigned i; | ||
102 | u32 lo, hi; | ||
103 | |||
104 | /* Unfortunately, MCR's are read-only, so there is no way to | ||
105 | * find out what the bios might have done. | ||
106 | */ | ||
107 | |||
108 | rdmsr(MSR_IDT_MCR_CTRL, lo, hi); | ||
109 | if (((lo >> 17) & 7) == 1) { /* Type 1 Winchip2 MCR */ | ||
110 | lo &= ~0x1C0; /* clear key */ | ||
111 | lo |= 0x040; /* set key to 1 */ | ||
112 | wrmsr(MSR_IDT_MCR_CTRL, lo, hi); /* unlock MCR */ | ||
113 | } | ||
114 | |||
115 | centaur_mcr_type = 1; | ||
116 | |||
117 | /* | ||
118 | * Clear any unconfigured MCR's. | ||
119 | */ | ||
120 | |||
121 | for (i = 0; i < 8; ++i) { | ||
122 | if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) { | ||
123 | if (!(lo & (1 << (9 + i)))) | ||
124 | wrmsr(MSR_IDT_MCR0 + i, 0, 0); | ||
125 | else | ||
126 | /* | ||
127 | * If the BIOS set up an MCR we cannot see it | ||
128 | * but we don't wish to obliterate it | ||
129 | */ | ||
130 | centaur_mcr_reserved |= (1 << i); | ||
131 | } | ||
132 | } | ||
133 | /* | ||
134 | * Throw the main write-combining switch... | ||
135 | * However if OOSTORE is enabled then people have already done far | ||
136 | * cleverer things and we should behave. | ||
137 | */ | ||
138 | |||
139 | lo |= 15; /* Write combine enables */ | ||
140 | wrmsr(MSR_IDT_MCR_CTRL, lo, hi); | ||
141 | } | ||
142 | |||
143 | /* | ||
144 | * Initialise the original winchip with read only MCR registers | ||
145 | * no used bitmask for the BIOS to pass on and write only control | ||
146 | */ | ||
147 | |||
148 | static void __init | ||
149 | centaur_mcr0_init(void) | ||
150 | { | ||
151 | unsigned i; | ||
152 | |||
153 | /* Unfortunately, MCR's are read-only, so there is no way to | ||
154 | * find out what the bios might have done. | ||
155 | */ | ||
156 | |||
157 | /* Clear any unconfigured MCR's. | ||
158 | * This way we are sure that the centaur_mcr array contains the actual | ||
159 | * values. The disadvantage is that any BIOS tweaks are thus undone. | ||
160 | * | ||
161 | */ | ||
162 | for (i = 0; i < 8; ++i) { | ||
163 | if (centaur_mcr[i].high == 0 && centaur_mcr[i].low == 0) | ||
164 | wrmsr(MSR_IDT_MCR0 + i, 0, 0); | ||
165 | } | ||
166 | |||
167 | wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0); /* Write only */ | ||
168 | } | ||
169 | |||
170 | /* | ||
171 | * Initialise Winchip series MCR registers | ||
172 | */ | ||
173 | |||
174 | static void __init | ||
175 | centaur_mcr_init(void) | ||
176 | { | ||
177 | struct set_mtrr_context ctxt; | ||
178 | |||
179 | set_mtrr_prepare_save(&ctxt); | ||
180 | set_mtrr_cache_disable(&ctxt); | ||
181 | |||
182 | if (boot_cpu_data.x86_model == 4) | ||
183 | centaur_mcr0_init(); | ||
184 | else if (boot_cpu_data.x86_model == 8 || boot_cpu_data.x86_model == 9) | ||
185 | centaur_mcr1_init(); | ||
186 | |||
187 | set_mtrr_done(&ctxt); | ||
188 | } | ||
189 | #endif | ||
190 | |||
191 | static int centaur_validate_add_page(unsigned long base, | ||
192 | unsigned long size, unsigned int type) | ||
193 | { | 100 | { |
194 | /* | 101 | /* |
195 | * FIXME: Winchip2 supports uncached | 102 | * FIXME: Winchip2 supports uncached |
196 | */ | 103 | */ |
197 | if (type != MTRR_TYPE_WRCOMB && | 104 | if (type != MTRR_TYPE_WRCOMB && |
198 | (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) { | 105 | (centaur_mcr_type == 0 || type != MTRR_TYPE_UNCACHABLE)) { |
199 | printk(KERN_WARNING | 106 | pr_warning("mtrr: only write-combining%s supported\n", |
200 | "mtrr: only write-combining%s supported\n", | 107 | centaur_mcr_type ? " and uncacheable are" : " is"); |
201 | centaur_mcr_type ? " and uncacheable are" | ||
202 | : " is"); | ||
203 | return -EINVAL; | 108 | return -EINVAL; |
204 | } | 109 | } |
205 | return 0; | 110 | return 0; |
@@ -207,7 +112,6 @@ static int centaur_validate_add_page(unsigned long base, | |||
207 | 112 | ||
208 | static struct mtrr_ops centaur_mtrr_ops = { | 113 | static struct mtrr_ops centaur_mtrr_ops = { |
209 | .vendor = X86_VENDOR_CENTAUR, | 114 | .vendor = X86_VENDOR_CENTAUR, |
210 | // .init = centaur_mcr_init, | ||
211 | .set = centaur_set_mcr, | 115 | .set = centaur_set_mcr, |
212 | .get = centaur_get_mcr, | 116 | .get = centaur_get_mcr, |
213 | .get_free_region = centaur_get_free_region, | 117 | .get_free_region = centaur_get_free_region, |
@@ -220,5 +124,3 @@ int __init centaur_init_mtrr(void) | |||
220 | set_mtrr_ops(¢aur_mtrr_ops); | 124 | set_mtrr_ops(¢aur_mtrr_ops); |
221 | return 0; | 125 | return 0; |
222 | } | 126 | } |
223 | |||
224 | //arch_initcall(centaur_init_mtrr); | ||
diff --git a/arch/x86/kernel/cpu/mtrr/cleanup.c b/arch/x86/kernel/cpu/mtrr/cleanup.c index 1d584a18a50d..315738c74aad 100644 --- a/arch/x86/kernel/cpu/mtrr/cleanup.c +++ b/arch/x86/kernel/cpu/mtrr/cleanup.c | |||
@@ -1,51 +1,75 @@ | |||
1 | /* MTRR (Memory Type Range Register) cleanup | 1 | /* |
2 | 2 | * MTRR (Memory Type Range Register) cleanup | |
3 | Copyright (C) 2009 Yinghai Lu | 3 | * |
4 | 4 | * Copyright (C) 2009 Yinghai Lu | |
5 | This library is free software; you can redistribute it and/or | 5 | * |
6 | modify it under the terms of the GNU Library General Public | 6 | * This library is free software; you can redistribute it and/or |
7 | License as published by the Free Software Foundation; either | 7 | * modify it under the terms of the GNU Library General Public |
8 | version 2 of the License, or (at your option) any later version. | 8 | * License as published by the Free Software Foundation; either |
9 | 9 | * version 2 of the License, or (at your option) any later version. | |
10 | This library is distributed in the hope that it will be useful, | 10 | * |
11 | but WITHOUT ANY WARRANTY; without even the implied warranty of | 11 | * This library is distributed in the hope that it will be useful, |
12 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | Library General Public License for more details. | 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | 14 | * Library General Public License for more details. | |
15 | You should have received a copy of the GNU Library General Public | 15 | * |
16 | License along with this library; if not, write to the Free | 16 | * You should have received a copy of the GNU Library General Public |
17 | Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | 17 | * License along with this library; if not, write to the Free |
18 | */ | 18 | * Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. |
19 | 19 | */ | |
20 | #include <linux/module.h> | 20 | #include <linux/module.h> |
21 | #include <linux/init.h> | 21 | #include <linux/init.h> |
22 | #include <linux/pci.h> | 22 | #include <linux/pci.h> |
23 | #include <linux/smp.h> | 23 | #include <linux/smp.h> |
24 | #include <linux/cpu.h> | 24 | #include <linux/cpu.h> |
25 | #include <linux/mutex.h> | ||
26 | #include <linux/sort.h> | 25 | #include <linux/sort.h> |
26 | #include <linux/mutex.h> | ||
27 | #include <linux/uaccess.h> | ||
28 | #include <linux/kvm_para.h> | ||
27 | 29 | ||
30 | #include <asm/processor.h> | ||
28 | #include <asm/e820.h> | 31 | #include <asm/e820.h> |
29 | #include <asm/mtrr.h> | 32 | #include <asm/mtrr.h> |
30 | #include <asm/uaccess.h> | ||
31 | #include <asm/processor.h> | ||
32 | #include <asm/msr.h> | 33 | #include <asm/msr.h> |
33 | #include <asm/kvm_para.h> | ||
34 | #include "mtrr.h" | ||
35 | 34 | ||
36 | /* should be related to MTRR_VAR_RANGES nums */ | 35 | #include "mtrr.h" |
37 | #define RANGE_NUM 256 | ||
38 | 36 | ||
39 | struct res_range { | 37 | struct res_range { |
40 | unsigned long start; | 38 | unsigned long start; |
41 | unsigned long end; | 39 | unsigned long end; |
40 | }; | ||
41 | |||
42 | struct var_mtrr_range_state { | ||
43 | unsigned long base_pfn; | ||
44 | unsigned long size_pfn; | ||
45 | mtrr_type type; | ||
46 | }; | ||
47 | |||
48 | struct var_mtrr_state { | ||
49 | unsigned long range_startk; | ||
50 | unsigned long range_sizek; | ||
51 | unsigned long chunk_sizek; | ||
52 | unsigned long gran_sizek; | ||
53 | unsigned int reg; | ||
42 | }; | 54 | }; |
43 | 55 | ||
56 | /* Should be related to MTRR_VAR_RANGES nums */ | ||
57 | #define RANGE_NUM 256 | ||
58 | |||
59 | static struct res_range __initdata range[RANGE_NUM]; | ||
60 | static int __initdata nr_range; | ||
61 | |||
62 | static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; | ||
63 | |||
64 | static int __initdata debug_print; | ||
65 | #define Dprintk(x...) do { if (debug_print) printk(KERN_DEBUG x); } while (0) | ||
66 | |||
67 | |||
44 | static int __init | 68 | static int __init |
45 | add_range(struct res_range *range, int nr_range, unsigned long start, | 69 | add_range(struct res_range *range, int nr_range, |
46 | unsigned long end) | 70 | unsigned long start, unsigned long end) |
47 | { | 71 | { |
48 | /* out of slots */ | 72 | /* Out of slots: */ |
49 | if (nr_range >= RANGE_NUM) | 73 | if (nr_range >= RANGE_NUM) |
50 | return nr_range; | 74 | return nr_range; |
51 | 75 | ||
@@ -58,12 +82,12 @@ add_range(struct res_range *range, int nr_range, unsigned long start, | |||
58 | } | 82 | } |
59 | 83 | ||
60 | static int __init | 84 | static int __init |
61 | add_range_with_merge(struct res_range *range, int nr_range, unsigned long start, | 85 | add_range_with_merge(struct res_range *range, int nr_range, |
62 | unsigned long end) | 86 | unsigned long start, unsigned long end) |
63 | { | 87 | { |
64 | int i; | 88 | int i; |
65 | 89 | ||
66 | /* try to merge it with old one */ | 90 | /* Try to merge it with old one: */ |
67 | for (i = 0; i < nr_range; i++) { | 91 | for (i = 0; i < nr_range; i++) { |
68 | unsigned long final_start, final_end; | 92 | unsigned long final_start, final_end; |
69 | unsigned long common_start, common_end; | 93 | unsigned long common_start, common_end; |
@@ -84,7 +108,7 @@ add_range_with_merge(struct res_range *range, int nr_range, unsigned long start, | |||
84 | return nr_range; | 108 | return nr_range; |
85 | } | 109 | } |
86 | 110 | ||
87 | /* need to add that */ | 111 | /* Need to add it: */ |
88 | return add_range(range, nr_range, start, end); | 112 | return add_range(range, nr_range, start, end); |
89 | } | 113 | } |
90 | 114 | ||
@@ -117,7 +141,7 @@ subtract_range(struct res_range *range, unsigned long start, unsigned long end) | |||
117 | } | 141 | } |
118 | 142 | ||
119 | if (start > range[j].start && end < range[j].end) { | 143 | if (start > range[j].start && end < range[j].end) { |
120 | /* find the new spare */ | 144 | /* Find the new spare: */ |
121 | for (i = 0; i < RANGE_NUM; i++) { | 145 | for (i = 0; i < RANGE_NUM; i++) { |
122 | if (range[i].end == 0) | 146 | if (range[i].end == 0) |
123 | break; | 147 | break; |
@@ -146,14 +170,8 @@ static int __init cmp_range(const void *x1, const void *x2) | |||
146 | return start1 - start2; | 170 | return start1 - start2; |
147 | } | 171 | } |
148 | 172 | ||
149 | struct var_mtrr_range_state { | 173 | #define BIOS_BUG_MSG KERN_WARNING \ |
150 | unsigned long base_pfn; | 174 | "WARNING: BIOS bug: VAR MTRR %d contains strange UC entry under 1M, check with your system vendor!\n" |
151 | unsigned long size_pfn; | ||
152 | mtrr_type type; | ||
153 | }; | ||
154 | |||
155 | static struct var_mtrr_range_state __initdata range_state[RANGE_NUM]; | ||
156 | static int __initdata debug_print; | ||
157 | 175 | ||
158 | static int __init | 176 | static int __init |
159 | x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | 177 | x86_get_mtrr_mem_range(struct res_range *range, int nr_range, |
@@ -180,7 +198,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
180 | range[i].start, range[i].end + 1); | 198 | range[i].start, range[i].end + 1); |
181 | } | 199 | } |
182 | 200 | ||
183 | /* take out UC ranges */ | 201 | /* Take out UC ranges: */ |
184 | for (i = 0; i < num_var_ranges; i++) { | 202 | for (i = 0; i < num_var_ranges; i++) { |
185 | type = range_state[i].type; | 203 | type = range_state[i].type; |
186 | if (type != MTRR_TYPE_UNCACHABLE && | 204 | if (type != MTRR_TYPE_UNCACHABLE && |
@@ -193,9 +211,7 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
193 | if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed && | 211 | if (base < (1<<(20-PAGE_SHIFT)) && mtrr_state.have_fixed && |
194 | (mtrr_state.enabled & 1)) { | 212 | (mtrr_state.enabled & 1)) { |
195 | /* Var MTRR contains UC entry below 1M? Skip it: */ | 213 | /* Var MTRR contains UC entry below 1M? Skip it: */ |
196 | printk(KERN_WARNING "WARNING: BIOS bug: VAR MTRR %d " | 214 | printk(BIOS_BUG_MSG, i); |
197 | "contains strange UC entry under 1M, check " | ||
198 | "with your system vendor!\n", i); | ||
199 | if (base + size <= (1<<(20-PAGE_SHIFT))) | 215 | if (base + size <= (1<<(20-PAGE_SHIFT))) |
200 | continue; | 216 | continue; |
201 | size -= (1<<(20-PAGE_SHIFT)) - base; | 217 | size -= (1<<(20-PAGE_SHIFT)) - base; |
@@ -237,17 +253,13 @@ x86_get_mtrr_mem_range(struct res_range *range, int nr_range, | |||
237 | return nr_range; | 253 | return nr_range; |
238 | } | 254 | } |
239 | 255 | ||
240 | static struct res_range __initdata range[RANGE_NUM]; | ||
241 | static int __initdata nr_range; | ||
242 | |||
243 | #ifdef CONFIG_MTRR_SANITIZER | 256 | #ifdef CONFIG_MTRR_SANITIZER |
244 | 257 | ||
245 | static unsigned long __init sum_ranges(struct res_range *range, int nr_range) | 258 | static unsigned long __init sum_ranges(struct res_range *range, int nr_range) |
246 | { | 259 | { |
247 | unsigned long sum; | 260 | unsigned long sum = 0; |
248 | int i; | 261 | int i; |
249 | 262 | ||
250 | sum = 0; | ||
251 | for (i = 0; i < nr_range; i++) | 263 | for (i = 0; i < nr_range; i++) |
252 | sum += range[i].end + 1 - range[i].start; | 264 | sum += range[i].end + 1 - range[i].start; |
253 | 265 | ||
@@ -278,17 +290,9 @@ static int __init mtrr_cleanup_debug_setup(char *str) | |||
278 | } | 290 | } |
279 | early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup); | 291 | early_param("mtrr_cleanup_debug", mtrr_cleanup_debug_setup); |
280 | 292 | ||
281 | struct var_mtrr_state { | ||
282 | unsigned long range_startk; | ||
283 | unsigned long range_sizek; | ||
284 | unsigned long chunk_sizek; | ||
285 | unsigned long gran_sizek; | ||
286 | unsigned int reg; | ||
287 | }; | ||
288 | |||
289 | static void __init | 293 | static void __init |
290 | set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, | 294 | set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, |
291 | unsigned char type, unsigned int address_bits) | 295 | unsigned char type, unsigned int address_bits) |
292 | { | 296 | { |
293 | u32 base_lo, base_hi, mask_lo, mask_hi; | 297 | u32 base_lo, base_hi, mask_lo, mask_hi; |
294 | u64 base, mask; | 298 | u64 base, mask; |
@@ -301,7 +305,7 @@ set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, | |||
301 | mask = (1ULL << address_bits) - 1; | 305 | mask = (1ULL << address_bits) - 1; |
302 | mask &= ~((((u64)sizek) << 10) - 1); | 306 | mask &= ~((((u64)sizek) << 10) - 1); |
303 | 307 | ||
304 | base = ((u64)basek) << 10; | 308 | base = ((u64)basek) << 10; |
305 | 309 | ||
306 | base |= type; | 310 | base |= type; |
307 | mask |= 0x800; | 311 | mask |= 0x800; |
@@ -317,15 +321,14 @@ set_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, | |||
317 | 321 | ||
318 | static void __init | 322 | static void __init |
319 | save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, | 323 | save_var_mtrr(unsigned int reg, unsigned long basek, unsigned long sizek, |
320 | unsigned char type) | 324 | unsigned char type) |
321 | { | 325 | { |
322 | range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10); | 326 | range_state[reg].base_pfn = basek >> (PAGE_SHIFT - 10); |
323 | range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10); | 327 | range_state[reg].size_pfn = sizek >> (PAGE_SHIFT - 10); |
324 | range_state[reg].type = type; | 328 | range_state[reg].type = type; |
325 | } | 329 | } |
326 | 330 | ||
327 | static void __init | 331 | static void __init set_var_mtrr_all(unsigned int address_bits) |
328 | set_var_mtrr_all(unsigned int address_bits) | ||
329 | { | 332 | { |
330 | unsigned long basek, sizek; | 333 | unsigned long basek, sizek; |
331 | unsigned char type; | 334 | unsigned char type; |
@@ -342,11 +345,11 @@ set_var_mtrr_all(unsigned int address_bits) | |||
342 | 345 | ||
343 | static unsigned long to_size_factor(unsigned long sizek, char *factorp) | 346 | static unsigned long to_size_factor(unsigned long sizek, char *factorp) |
344 | { | 347 | { |
345 | char factor; | ||
346 | unsigned long base = sizek; | 348 | unsigned long base = sizek; |
349 | char factor; | ||
347 | 350 | ||
348 | if (base & ((1<<10) - 1)) { | 351 | if (base & ((1<<10) - 1)) { |
349 | /* not MB alignment */ | 352 | /* Not MB-aligned: */ |
350 | factor = 'K'; | 353 | factor = 'K'; |
351 | } else if (base & ((1<<20) - 1)) { | 354 | } else if (base & ((1<<20) - 1)) { |
352 | factor = 'M'; | 355 | factor = 'M'; |
@@ -372,11 +375,12 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk, | |||
372 | unsigned long max_align, align; | 375 | unsigned long max_align, align; |
373 | unsigned long sizek; | 376 | unsigned long sizek; |
374 | 377 | ||
375 | /* Compute the maximum size I can make a range */ | 378 | /* Compute the maximum size with which we can make a range: */ |
376 | if (range_startk) | 379 | if (range_startk) |
377 | max_align = ffs(range_startk) - 1; | 380 | max_align = ffs(range_startk) - 1; |
378 | else | 381 | else |
379 | max_align = 32; | 382 | max_align = 32; |
383 | |||
380 | align = fls(range_sizek) - 1; | 384 | align = fls(range_sizek) - 1; |
381 | if (align > max_align) | 385 | if (align > max_align) |
382 | align = max_align; | 386 | align = max_align; |
@@ -386,11 +390,10 @@ range_to_mtrr(unsigned int reg, unsigned long range_startk, | |||
386 | char start_factor = 'K', size_factor = 'K'; | 390 | char start_factor = 'K', size_factor = 'K'; |
387 | unsigned long start_base, size_base; | 391 | unsigned long start_base, size_base; |
388 | 392 | ||
389 | start_base = to_size_factor(range_startk, | 393 | start_base = to_size_factor(range_startk, &start_factor); |
390 | &start_factor), | 394 | size_base = to_size_factor(sizek, &size_factor); |
391 | size_base = to_size_factor(sizek, &size_factor), | ||
392 | 395 | ||
393 | printk(KERN_DEBUG "Setting variable MTRR %d, " | 396 | Dprintk("Setting variable MTRR %d, " |
394 | "base: %ld%cB, range: %ld%cB, type %s\n", | 397 | "base: %ld%cB, range: %ld%cB, type %s\n", |
395 | reg, start_base, start_factor, | 398 | reg, start_base, start_factor, |
396 | size_base, size_factor, | 399 | size_base, size_factor, |
@@ -425,10 +428,11 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, | |||
425 | chunk_sizek = state->chunk_sizek; | 428 | chunk_sizek = state->chunk_sizek; |
426 | gran_sizek = state->gran_sizek; | 429 | gran_sizek = state->gran_sizek; |
427 | 430 | ||
428 | /* align with gran size, prevent small block used up MTRRs */ | 431 | /* Align with gran size, prevent small block used up MTRRs: */ |
429 | range_basek = ALIGN(state->range_startk, gran_sizek); | 432 | range_basek = ALIGN(state->range_startk, gran_sizek); |
430 | if ((range_basek > basek) && basek) | 433 | if ((range_basek > basek) && basek) |
431 | return second_sizek; | 434 | return second_sizek; |
435 | |||
432 | state->range_sizek -= (range_basek - state->range_startk); | 436 | state->range_sizek -= (range_basek - state->range_startk); |
433 | range_sizek = ALIGN(state->range_sizek, gran_sizek); | 437 | range_sizek = ALIGN(state->range_sizek, gran_sizek); |
434 | 438 | ||
@@ -439,22 +443,21 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, | |||
439 | } | 443 | } |
440 | state->range_sizek = range_sizek; | 444 | state->range_sizek = range_sizek; |
441 | 445 | ||
442 | /* try to append some small hole */ | 446 | /* Try to append some small hole: */ |
443 | range0_basek = state->range_startk; | 447 | range0_basek = state->range_startk; |
444 | range0_sizek = ALIGN(state->range_sizek, chunk_sizek); | 448 | range0_sizek = ALIGN(state->range_sizek, chunk_sizek); |
445 | 449 | ||
446 | /* no increase */ | 450 | /* No increase: */ |
447 | if (range0_sizek == state->range_sizek) { | 451 | if (range0_sizek == state->range_sizek) { |
448 | if (debug_print) | 452 | Dprintk("rangeX: %016lx - %016lx\n", |
449 | printk(KERN_DEBUG "rangeX: %016lx - %016lx\n", | 453 | range0_basek<<10, |
450 | range0_basek<<10, | 454 | (range0_basek + state->range_sizek)<<10); |
451 | (range0_basek + state->range_sizek)<<10); | ||
452 | state->reg = range_to_mtrr(state->reg, range0_basek, | 455 | state->reg = range_to_mtrr(state->reg, range0_basek, |
453 | state->range_sizek, MTRR_TYPE_WRBACK); | 456 | state->range_sizek, MTRR_TYPE_WRBACK); |
454 | return 0; | 457 | return 0; |
455 | } | 458 | } |
456 | 459 | ||
457 | /* only cut back, when it is not the last */ | 460 | /* Only cut back when it is not the last: */ |
458 | if (sizek) { | 461 | if (sizek) { |
459 | while (range0_basek + range0_sizek > (basek + sizek)) { | 462 | while (range0_basek + range0_sizek > (basek + sizek)) { |
460 | if (range0_sizek >= chunk_sizek) | 463 | if (range0_sizek >= chunk_sizek) |
@@ -470,16 +473,16 @@ range_to_mtrr_with_hole(struct var_mtrr_state *state, unsigned long basek, | |||
470 | second_try: | 473 | second_try: |
471 | range_basek = range0_basek + range0_sizek; | 474 | range_basek = range0_basek + range0_sizek; |
472 | 475 | ||
473 | /* one hole in the middle */ | 476 | /* One hole in the middle: */ |
474 | if (range_basek > basek && range_basek <= (basek + sizek)) | 477 | if (range_basek > basek && range_basek <= (basek + sizek)) |
475 | second_sizek = range_basek - basek; | 478 | second_sizek = range_basek - basek; |
476 | 479 | ||
477 | if (range0_sizek > state->range_sizek) { | 480 | if (range0_sizek > state->range_sizek) { |
478 | 481 | ||
479 | /* one hole in middle or at end */ | 482 | /* One hole in middle or at the end: */ |
480 | hole_sizek = range0_sizek - state->range_sizek - second_sizek; | 483 | hole_sizek = range0_sizek - state->range_sizek - second_sizek; |
481 | 484 | ||
482 | /* hole size should be less than half of range0 size */ | 485 | /* Hole size should be less than half of range0 size: */ |
483 | if (hole_sizek >= (range0_sizek >> 1) && | 486 | if (hole_sizek >= (range0_sizek >> 1) && |
484 | range0_sizek >= chunk_sizek) { | 487 | range0_sizek >= chunk_sizek) { |
485 | range0_sizek -= chunk_sizek; | 488 | range0_sizek -= chunk_sizek; |
@@ -491,32 +494,30 @@ second_try: | |||
491 | } | 494 | } |
492 | 495 | ||
493 | if (range0_sizek) { | 496 | if (range0_sizek) { |
494 | if (debug_print) | 497 | Dprintk("range0: %016lx - %016lx\n", |
495 | printk(KERN_DEBUG "range0: %016lx - %016lx\n", | 498 | range0_basek<<10, |
496 | range0_basek<<10, | 499 | (range0_basek + range0_sizek)<<10); |
497 | (range0_basek + range0_sizek)<<10); | ||
498 | state->reg = range_to_mtrr(state->reg, range0_basek, | 500 | state->reg = range_to_mtrr(state->reg, range0_basek, |
499 | range0_sizek, MTRR_TYPE_WRBACK); | 501 | range0_sizek, MTRR_TYPE_WRBACK); |
500 | } | 502 | } |
501 | 503 | ||
502 | if (range0_sizek < state->range_sizek) { | 504 | if (range0_sizek < state->range_sizek) { |
503 | /* need to handle left over */ | 505 | /* Need to handle left over range: */ |
504 | range_sizek = state->range_sizek - range0_sizek; | 506 | range_sizek = state->range_sizek - range0_sizek; |
505 | 507 | ||
506 | if (debug_print) | 508 | Dprintk("range: %016lx - %016lx\n", |
507 | printk(KERN_DEBUG "range: %016lx - %016lx\n", | 509 | range_basek<<10, |
508 | range_basek<<10, | 510 | (range_basek + range_sizek)<<10); |
509 | (range_basek + range_sizek)<<10); | 511 | |
510 | state->reg = range_to_mtrr(state->reg, range_basek, | 512 | state->reg = range_to_mtrr(state->reg, range_basek, |
511 | range_sizek, MTRR_TYPE_WRBACK); | 513 | range_sizek, MTRR_TYPE_WRBACK); |
512 | } | 514 | } |
513 | 515 | ||
514 | if (hole_sizek) { | 516 | if (hole_sizek) { |
515 | hole_basek = range_basek - hole_sizek - second_sizek; | 517 | hole_basek = range_basek - hole_sizek - second_sizek; |
516 | if (debug_print) | 518 | Dprintk("hole: %016lx - %016lx\n", |
517 | printk(KERN_DEBUG "hole: %016lx - %016lx\n", | 519 | hole_basek<<10, |
518 | hole_basek<<10, | 520 | (hole_basek + hole_sizek)<<10); |
519 | (hole_basek + hole_sizek)<<10); | ||
520 | state->reg = range_to_mtrr(state->reg, hole_basek, | 521 | state->reg = range_to_mtrr(state->reg, hole_basek, |
521 | hole_sizek, MTRR_TYPE_UNCACHABLE); | 522 | hole_sizek, MTRR_TYPE_UNCACHABLE); |
522 | } | 523 | } |
@@ -537,23 +538,23 @@ set_var_mtrr_range(struct var_mtrr_state *state, unsigned long base_pfn, | |||
537 | basek = base_pfn << (PAGE_SHIFT - 10); | 538 | basek = base_pfn << (PAGE_SHIFT - 10); |
538 | sizek = size_pfn << (PAGE_SHIFT - 10); | 539 | sizek = size_pfn << (PAGE_SHIFT - 10); |
539 | 540 | ||
540 | /* See if I can merge with the last range */ | 541 | /* See if I can merge with the last range: */ |
541 | if ((basek <= 1024) || | 542 | if ((basek <= 1024) || |
542 | (state->range_startk + state->range_sizek == basek)) { | 543 | (state->range_startk + state->range_sizek == basek)) { |
543 | unsigned long endk = basek + sizek; | 544 | unsigned long endk = basek + sizek; |
544 | state->range_sizek = endk - state->range_startk; | 545 | state->range_sizek = endk - state->range_startk; |
545 | return; | 546 | return; |
546 | } | 547 | } |
547 | /* Write the range mtrrs */ | 548 | /* Write the range mtrrs: */ |
548 | if (state->range_sizek != 0) | 549 | if (state->range_sizek != 0) |
549 | second_sizek = range_to_mtrr_with_hole(state, basek, sizek); | 550 | second_sizek = range_to_mtrr_with_hole(state, basek, sizek); |
550 | 551 | ||
551 | /* Allocate an msr */ | 552 | /* Allocate an msr: */ |
552 | state->range_startk = basek + second_sizek; | 553 | state->range_startk = basek + second_sizek; |
553 | state->range_sizek = sizek - second_sizek; | 554 | state->range_sizek = sizek - second_sizek; |
554 | } | 555 | } |
555 | 556 | ||
556 | /* mininum size of mtrr block that can take hole */ | 557 | /* Mininum size of mtrr block that can take hole: */ |
557 | static u64 mtrr_chunk_size __initdata = (256ULL<<20); | 558 | static u64 mtrr_chunk_size __initdata = (256ULL<<20); |
558 | 559 | ||
559 | static int __init parse_mtrr_chunk_size_opt(char *p) | 560 | static int __init parse_mtrr_chunk_size_opt(char *p) |
@@ -565,7 +566,7 @@ static int __init parse_mtrr_chunk_size_opt(char *p) | |||
565 | } | 566 | } |
566 | early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt); | 567 | early_param("mtrr_chunk_size", parse_mtrr_chunk_size_opt); |
567 | 568 | ||
568 | /* granity of mtrr of block */ | 569 | /* Granularity of mtrr of block: */ |
569 | static u64 mtrr_gran_size __initdata; | 570 | static u64 mtrr_gran_size __initdata; |
570 | 571 | ||
571 | static int __init parse_mtrr_gran_size_opt(char *p) | 572 | static int __init parse_mtrr_gran_size_opt(char *p) |
@@ -577,7 +578,7 @@ static int __init parse_mtrr_gran_size_opt(char *p) | |||
577 | } | 578 | } |
578 | early_param("mtrr_gran_size", parse_mtrr_gran_size_opt); | 579 | early_param("mtrr_gran_size", parse_mtrr_gran_size_opt); |
579 | 580 | ||
580 | static int nr_mtrr_spare_reg __initdata = | 581 | static unsigned long nr_mtrr_spare_reg __initdata = |
581 | CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT; | 582 | CONFIG_MTRR_SANITIZER_SPARE_REG_NR_DEFAULT; |
582 | 583 | ||
583 | static int __init parse_mtrr_spare_reg(char *arg) | 584 | static int __init parse_mtrr_spare_reg(char *arg) |
@@ -586,7 +587,6 @@ static int __init parse_mtrr_spare_reg(char *arg) | |||
586 | nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0); | 587 | nr_mtrr_spare_reg = simple_strtoul(arg, NULL, 0); |
587 | return 0; | 588 | return 0; |
588 | } | 589 | } |
589 | |||
590 | early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); | 590 | early_param("mtrr_spare_reg_nr", parse_mtrr_spare_reg); |
591 | 591 | ||
592 | static int __init | 592 | static int __init |
@@ -594,8 +594,8 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range, | |||
594 | u64 chunk_size, u64 gran_size) | 594 | u64 chunk_size, u64 gran_size) |
595 | { | 595 | { |
596 | struct var_mtrr_state var_state; | 596 | struct var_mtrr_state var_state; |
597 | int i; | ||
598 | int num_reg; | 597 | int num_reg; |
598 | int i; | ||
599 | 599 | ||
600 | var_state.range_startk = 0; | 600 | var_state.range_startk = 0; |
601 | var_state.range_sizek = 0; | 601 | var_state.range_sizek = 0; |
@@ -605,17 +605,18 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range, | |||
605 | 605 | ||
606 | memset(range_state, 0, sizeof(range_state)); | 606 | memset(range_state, 0, sizeof(range_state)); |
607 | 607 | ||
608 | /* Write the range etc */ | 608 | /* Write the range: */ |
609 | for (i = 0; i < nr_range; i++) | 609 | for (i = 0; i < nr_range; i++) { |
610 | set_var_mtrr_range(&var_state, range[i].start, | 610 | set_var_mtrr_range(&var_state, range[i].start, |
611 | range[i].end - range[i].start + 1); | 611 | range[i].end - range[i].start + 1); |
612 | } | ||
612 | 613 | ||
613 | /* Write the last range */ | 614 | /* Write the last range: */ |
614 | if (var_state.range_sizek != 0) | 615 | if (var_state.range_sizek != 0) |
615 | range_to_mtrr_with_hole(&var_state, 0, 0); | 616 | range_to_mtrr_with_hole(&var_state, 0, 0); |
616 | 617 | ||
617 | num_reg = var_state.reg; | 618 | num_reg = var_state.reg; |
618 | /* Clear out the extra MTRR's */ | 619 | /* Clear out the extra MTRR's: */ |
619 | while (var_state.reg < num_var_ranges) { | 620 | while (var_state.reg < num_var_ranges) { |
620 | save_var_mtrr(var_state.reg, 0, 0, 0); | 621 | save_var_mtrr(var_state.reg, 0, 0, 0); |
621 | var_state.reg++; | 622 | var_state.reg++; |
@@ -625,11 +626,11 @@ x86_setup_var_mtrrs(struct res_range *range, int nr_range, | |||
625 | } | 626 | } |
626 | 627 | ||
627 | struct mtrr_cleanup_result { | 628 | struct mtrr_cleanup_result { |
628 | unsigned long gran_sizek; | 629 | unsigned long gran_sizek; |
629 | unsigned long chunk_sizek; | 630 | unsigned long chunk_sizek; |
630 | unsigned long lose_cover_sizek; | 631 | unsigned long lose_cover_sizek; |
631 | unsigned int num_reg; | 632 | unsigned int num_reg; |
632 | int bad; | 633 | int bad; |
633 | }; | 634 | }; |
634 | 635 | ||
635 | /* | 636 | /* |
@@ -645,10 +646,10 @@ static unsigned long __initdata min_loss_pfn[RANGE_NUM]; | |||
645 | 646 | ||
646 | static void __init print_out_mtrr_range_state(void) | 647 | static void __init print_out_mtrr_range_state(void) |
647 | { | 648 | { |
648 | int i; | ||
649 | char start_factor = 'K', size_factor = 'K'; | 649 | char start_factor = 'K', size_factor = 'K'; |
650 | unsigned long start_base, size_base; | 650 | unsigned long start_base, size_base; |
651 | mtrr_type type; | 651 | mtrr_type type; |
652 | int i; | ||
652 | 653 | ||
653 | for (i = 0; i < num_var_ranges; i++) { | 654 | for (i = 0; i < num_var_ranges; i++) { |
654 | 655 | ||
@@ -676,10 +677,10 @@ static int __init mtrr_need_cleanup(void) | |||
676 | int i; | 677 | int i; |
677 | mtrr_type type; | 678 | mtrr_type type; |
678 | unsigned long size; | 679 | unsigned long size; |
679 | /* extra one for all 0 */ | 680 | /* Extra one for all 0: */ |
680 | int num[MTRR_NUM_TYPES + 1]; | 681 | int num[MTRR_NUM_TYPES + 1]; |
681 | 682 | ||
682 | /* check entries number */ | 683 | /* Check entries number: */ |
683 | memset(num, 0, sizeof(num)); | 684 | memset(num, 0, sizeof(num)); |
684 | for (i = 0; i < num_var_ranges; i++) { | 685 | for (i = 0; i < num_var_ranges; i++) { |
685 | type = range_state[i].type; | 686 | type = range_state[i].type; |
@@ -693,88 +694,86 @@ static int __init mtrr_need_cleanup(void) | |||
693 | num[type]++; | 694 | num[type]++; |
694 | } | 695 | } |
695 | 696 | ||
696 | /* check if we got UC entries */ | 697 | /* Check if we got UC entries: */ |
697 | if (!num[MTRR_TYPE_UNCACHABLE]) | 698 | if (!num[MTRR_TYPE_UNCACHABLE]) |
698 | return 0; | 699 | return 0; |
699 | 700 | ||
700 | /* check if we only had WB and UC */ | 701 | /* Check if we only had WB and UC */ |
701 | if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != | 702 | if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != |
702 | num_var_ranges - num[MTRR_NUM_TYPES]) | 703 | num_var_ranges - num[MTRR_NUM_TYPES]) |
703 | return 0; | 704 | return 0; |
704 | 705 | ||
705 | return 1; | 706 | return 1; |
706 | } | 707 | } |
707 | 708 | ||
708 | static unsigned long __initdata range_sums; | 709 | static unsigned long __initdata range_sums; |
709 | static void __init mtrr_calc_range_state(u64 chunk_size, u64 gran_size, | 710 | |
710 | unsigned long extra_remove_base, | 711 | static void __init |
711 | unsigned long extra_remove_size, | 712 | mtrr_calc_range_state(u64 chunk_size, u64 gran_size, |
712 | int i) | 713 | unsigned long x_remove_base, |
714 | unsigned long x_remove_size, int i) | ||
713 | { | 715 | { |
714 | int num_reg; | ||
715 | static struct res_range range_new[RANGE_NUM]; | 716 | static struct res_range range_new[RANGE_NUM]; |
716 | static int nr_range_new; | ||
717 | unsigned long range_sums_new; | 717 | unsigned long range_sums_new; |
718 | static int nr_range_new; | ||
719 | int num_reg; | ||
718 | 720 | ||
719 | /* convert ranges to var ranges state */ | 721 | /* Convert ranges to var ranges state: */ |
720 | num_reg = x86_setup_var_mtrrs(range, nr_range, | 722 | num_reg = x86_setup_var_mtrrs(range, nr_range, chunk_size, gran_size); |
721 | chunk_size, gran_size); | ||
722 | 723 | ||
723 | /* we got new setting in range_state, check it */ | 724 | /* We got new setting in range_state, check it: */ |
724 | memset(range_new, 0, sizeof(range_new)); | 725 | memset(range_new, 0, sizeof(range_new)); |
725 | nr_range_new = x86_get_mtrr_mem_range(range_new, 0, | 726 | nr_range_new = x86_get_mtrr_mem_range(range_new, 0, |
726 | extra_remove_base, extra_remove_size); | 727 | x_remove_base, x_remove_size); |
727 | range_sums_new = sum_ranges(range_new, nr_range_new); | 728 | range_sums_new = sum_ranges(range_new, nr_range_new); |
728 | 729 | ||
729 | result[i].chunk_sizek = chunk_size >> 10; | 730 | result[i].chunk_sizek = chunk_size >> 10; |
730 | result[i].gran_sizek = gran_size >> 10; | 731 | result[i].gran_sizek = gran_size >> 10; |
731 | result[i].num_reg = num_reg; | 732 | result[i].num_reg = num_reg; |
733 | |||
732 | if (range_sums < range_sums_new) { | 734 | if (range_sums < range_sums_new) { |
733 | result[i].lose_cover_sizek = | 735 | result[i].lose_cover_sizek = (range_sums_new - range_sums) << PSHIFT; |
734 | (range_sums_new - range_sums) << PSHIFT; | ||
735 | result[i].bad = 1; | 736 | result[i].bad = 1; |
736 | } else | 737 | } else { |
737 | result[i].lose_cover_sizek = | 738 | result[i].lose_cover_sizek = (range_sums - range_sums_new) << PSHIFT; |
738 | (range_sums - range_sums_new) << PSHIFT; | 739 | } |
739 | 740 | ||
740 | /* double check it */ | 741 | /* Double check it: */ |
741 | if (!result[i].bad && !result[i].lose_cover_sizek) { | 742 | if (!result[i].bad && !result[i].lose_cover_sizek) { |
742 | if (nr_range_new != nr_range || | 743 | if (nr_range_new != nr_range || memcmp(range, range_new, sizeof(range))) |
743 | memcmp(range, range_new, sizeof(range))) | 744 | result[i].bad = 1; |
744 | result[i].bad = 1; | ||
745 | } | 745 | } |
746 | 746 | ||
747 | if (!result[i].bad && (range_sums - range_sums_new < | 747 | if (!result[i].bad && (range_sums - range_sums_new < min_loss_pfn[num_reg])) |
748 | min_loss_pfn[num_reg])) { | 748 | min_loss_pfn[num_reg] = range_sums - range_sums_new; |
749 | min_loss_pfn[num_reg] = | ||
750 | range_sums - range_sums_new; | ||
751 | } | ||
752 | } | 749 | } |
753 | 750 | ||
754 | static void __init mtrr_print_out_one_result(int i) | 751 | static void __init mtrr_print_out_one_result(int i) |
755 | { | 752 | { |
756 | char gran_factor, chunk_factor, lose_factor; | ||
757 | unsigned long gran_base, chunk_base, lose_base; | 753 | unsigned long gran_base, chunk_base, lose_base; |
754 | char gran_factor, chunk_factor, lose_factor; | ||
758 | 755 | ||
759 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), | 756 | gran_base = to_size_factor(result[i].gran_sizek, &gran_factor), |
760 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), | 757 | chunk_base = to_size_factor(result[i].chunk_sizek, &chunk_factor), |
761 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), | 758 | lose_base = to_size_factor(result[i].lose_cover_sizek, &lose_factor), |
762 | printk(KERN_INFO "%sgran_size: %ld%c \tchunk_size: %ld%c \t", | 759 | |
763 | result[i].bad ? "*BAD*" : " ", | 760 | pr_info("%sgran_size: %ld%c \tchunk_size: %ld%c \t", |
764 | gran_base, gran_factor, chunk_base, chunk_factor); | 761 | result[i].bad ? "*BAD*" : " ", |
765 | printk(KERN_CONT "num_reg: %d \tlose cover RAM: %s%ld%c\n", | 762 | gran_base, gran_factor, chunk_base, chunk_factor); |
766 | result[i].num_reg, result[i].bad ? "-" : "", | 763 | pr_cont("num_reg: %d \tlose cover RAM: %s%ld%c\n", |
767 | lose_base, lose_factor); | 764 | result[i].num_reg, result[i].bad ? "-" : "", |
765 | lose_base, lose_factor); | ||
768 | } | 766 | } |
769 | 767 | ||
770 | static int __init mtrr_search_optimal_index(void) | 768 | static int __init mtrr_search_optimal_index(void) |
771 | { | 769 | { |
772 | int i; | ||
773 | int num_reg_good; | 770 | int num_reg_good; |
774 | int index_good; | 771 | int index_good; |
772 | int i; | ||
775 | 773 | ||
776 | if (nr_mtrr_spare_reg >= num_var_ranges) | 774 | if (nr_mtrr_spare_reg >= num_var_ranges) |
777 | nr_mtrr_spare_reg = num_var_ranges - 1; | 775 | nr_mtrr_spare_reg = num_var_ranges - 1; |
776 | |||
778 | num_reg_good = -1; | 777 | num_reg_good = -1; |
779 | for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { | 778 | for (i = num_var_ranges - nr_mtrr_spare_reg; i > 0; i--) { |
780 | if (!min_loss_pfn[i]) | 779 | if (!min_loss_pfn[i]) |
@@ -796,24 +795,24 @@ static int __init mtrr_search_optimal_index(void) | |||
796 | return index_good; | 795 | return index_good; |
797 | } | 796 | } |
798 | 797 | ||
799 | |||
800 | int __init mtrr_cleanup(unsigned address_bits) | 798 | int __init mtrr_cleanup(unsigned address_bits) |
801 | { | 799 | { |
802 | unsigned long extra_remove_base, extra_remove_size; | 800 | unsigned long x_remove_base, x_remove_size; |
803 | unsigned long base, size, def, dummy; | 801 | unsigned long base, size, def, dummy; |
804 | mtrr_type type; | ||
805 | u64 chunk_size, gran_size; | 802 | u64 chunk_size, gran_size; |
803 | mtrr_type type; | ||
806 | int index_good; | 804 | int index_good; |
807 | int i; | 805 | int i; |
808 | 806 | ||
809 | if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) | 807 | if (!is_cpu(INTEL) || enable_mtrr_cleanup < 1) |
810 | return 0; | 808 | return 0; |
809 | |||
811 | rdmsr(MSR_MTRRdefType, def, dummy); | 810 | rdmsr(MSR_MTRRdefType, def, dummy); |
812 | def &= 0xff; | 811 | def &= 0xff; |
813 | if (def != MTRR_TYPE_UNCACHABLE) | 812 | if (def != MTRR_TYPE_UNCACHABLE) |
814 | return 0; | 813 | return 0; |
815 | 814 | ||
816 | /* get it and store it aside */ | 815 | /* Get it and store it aside: */ |
817 | memset(range_state, 0, sizeof(range_state)); | 816 | memset(range_state, 0, sizeof(range_state)); |
818 | for (i = 0; i < num_var_ranges; i++) { | 817 | for (i = 0; i < num_var_ranges; i++) { |
819 | mtrr_if->get(i, &base, &size, &type); | 818 | mtrr_if->get(i, &base, &size, &type); |
@@ -822,29 +821,28 @@ int __init mtrr_cleanup(unsigned address_bits) | |||
822 | range_state[i].type = type; | 821 | range_state[i].type = type; |
823 | } | 822 | } |
824 | 823 | ||
825 | /* check if we need handle it and can handle it */ | 824 | /* Check if we need handle it and can handle it: */ |
826 | if (!mtrr_need_cleanup()) | 825 | if (!mtrr_need_cleanup()) |
827 | return 0; | 826 | return 0; |
828 | 827 | ||
829 | /* print original var MTRRs at first, for debugging: */ | 828 | /* Print original var MTRRs at first, for debugging: */ |
830 | printk(KERN_DEBUG "original variable MTRRs\n"); | 829 | printk(KERN_DEBUG "original variable MTRRs\n"); |
831 | print_out_mtrr_range_state(); | 830 | print_out_mtrr_range_state(); |
832 | 831 | ||
833 | memset(range, 0, sizeof(range)); | 832 | memset(range, 0, sizeof(range)); |
834 | extra_remove_size = 0; | 833 | x_remove_size = 0; |
835 | extra_remove_base = 1 << (32 - PAGE_SHIFT); | 834 | x_remove_base = 1 << (32 - PAGE_SHIFT); |
836 | if (mtrr_tom2) | 835 | if (mtrr_tom2) |
837 | extra_remove_size = | 836 | x_remove_size = (mtrr_tom2 >> PAGE_SHIFT) - x_remove_base; |
838 | (mtrr_tom2 >> PAGE_SHIFT) - extra_remove_base; | 837 | |
839 | nr_range = x86_get_mtrr_mem_range(range, 0, extra_remove_base, | 838 | nr_range = x86_get_mtrr_mem_range(range, 0, x_remove_base, x_remove_size); |
840 | extra_remove_size); | ||
841 | /* | 839 | /* |
842 | * [0, 1M) should always be coverred by var mtrr with WB | 840 | * [0, 1M) should always be covered by var mtrr with WB |
843 | * and fixed mtrrs should take effective before var mtrr for it | 841 | * and fixed mtrrs should take effect before var mtrr for it: |
844 | */ | 842 | */ |
845 | nr_range = add_range_with_merge(range, nr_range, 0, | 843 | nr_range = add_range_with_merge(range, nr_range, 0, |
846 | (1ULL<<(20 - PAGE_SHIFT)) - 1); | 844 | (1ULL<<(20 - PAGE_SHIFT)) - 1); |
847 | /* sort the ranges */ | 845 | /* Sort the ranges: */ |
848 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); | 846 | sort(range, nr_range, sizeof(struct res_range), cmp_range, NULL); |
849 | 847 | ||
850 | range_sums = sum_ranges(range, nr_range); | 848 | range_sums = sum_ranges(range, nr_range); |
@@ -854,7 +852,7 @@ int __init mtrr_cleanup(unsigned address_bits) | |||
854 | if (mtrr_chunk_size && mtrr_gran_size) { | 852 | if (mtrr_chunk_size && mtrr_gran_size) { |
855 | i = 0; | 853 | i = 0; |
856 | mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size, | 854 | mtrr_calc_range_state(mtrr_chunk_size, mtrr_gran_size, |
857 | extra_remove_base, extra_remove_size, i); | 855 | x_remove_base, x_remove_size, i); |
858 | 856 | ||
859 | mtrr_print_out_one_result(i); | 857 | mtrr_print_out_one_result(i); |
860 | 858 | ||
@@ -880,7 +878,7 @@ int __init mtrr_cleanup(unsigned address_bits) | |||
880 | continue; | 878 | continue; |
881 | 879 | ||
882 | mtrr_calc_range_state(chunk_size, gran_size, | 880 | mtrr_calc_range_state(chunk_size, gran_size, |
883 | extra_remove_base, extra_remove_size, i); | 881 | x_remove_base, x_remove_size, i); |
884 | if (debug_print) { | 882 | if (debug_print) { |
885 | mtrr_print_out_one_result(i); | 883 | mtrr_print_out_one_result(i); |
886 | printk(KERN_INFO "\n"); | 884 | printk(KERN_INFO "\n"); |
@@ -890,7 +888,7 @@ int __init mtrr_cleanup(unsigned address_bits) | |||
890 | } | 888 | } |
891 | } | 889 | } |
892 | 890 | ||
893 | /* try to find the optimal index */ | 891 | /* Try to find the optimal index: */ |
894 | index_good = mtrr_search_optimal_index(); | 892 | index_good = mtrr_search_optimal_index(); |
895 | 893 | ||
896 | if (index_good != -1) { | 894 | if (index_good != -1) { |
@@ -898,7 +896,7 @@ int __init mtrr_cleanup(unsigned address_bits) | |||
898 | i = index_good; | 896 | i = index_good; |
899 | mtrr_print_out_one_result(i); | 897 | mtrr_print_out_one_result(i); |
900 | 898 | ||
901 | /* convert ranges to var ranges state */ | 899 | /* Convert ranges to var ranges state: */ |
902 | chunk_size = result[i].chunk_sizek; | 900 | chunk_size = result[i].chunk_sizek; |
903 | chunk_size <<= 10; | 901 | chunk_size <<= 10; |
904 | gran_size = result[i].gran_sizek; | 902 | gran_size = result[i].gran_sizek; |
@@ -941,8 +939,8 @@ early_param("disable_mtrr_trim", disable_mtrr_trim_setup); | |||
941 | * Note this won't check if the MTRRs < 4GB where the magic bit doesn't | 939 | * Note this won't check if the MTRRs < 4GB where the magic bit doesn't |
942 | * apply to are wrong, but so far we don't know of any such case in the wild. | 940 | * apply to are wrong, but so far we don't know of any such case in the wild. |
943 | */ | 941 | */ |
944 | #define Tom2Enabled (1U << 21) | 942 | #define Tom2Enabled (1U << 21) |
945 | #define Tom2ForceMemTypeWB (1U << 22) | 943 | #define Tom2ForceMemTypeWB (1U << 22) |
946 | 944 | ||
947 | int __init amd_special_default_mtrr(void) | 945 | int __init amd_special_default_mtrr(void) |
948 | { | 946 | { |
@@ -952,7 +950,7 @@ int __init amd_special_default_mtrr(void) | |||
952 | return 0; | 950 | return 0; |
953 | if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11) | 951 | if (boot_cpu_data.x86 < 0xf || boot_cpu_data.x86 > 0x11) |
954 | return 0; | 952 | return 0; |
955 | /* In case some hypervisor doesn't pass SYSCFG through */ | 953 | /* In case some hypervisor doesn't pass SYSCFG through: */ |
956 | if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) | 954 | if (rdmsr_safe(MSR_K8_SYSCFG, &l, &h) < 0) |
957 | return 0; | 955 | return 0; |
958 | /* | 956 | /* |
@@ -965,19 +963,21 @@ int __init amd_special_default_mtrr(void) | |||
965 | return 0; | 963 | return 0; |
966 | } | 964 | } |
967 | 965 | ||
968 | static u64 __init real_trim_memory(unsigned long start_pfn, | 966 | static u64 __init |
969 | unsigned long limit_pfn) | 967 | real_trim_memory(unsigned long start_pfn, unsigned long limit_pfn) |
970 | { | 968 | { |
971 | u64 trim_start, trim_size; | 969 | u64 trim_start, trim_size; |
970 | |||
972 | trim_start = start_pfn; | 971 | trim_start = start_pfn; |
973 | trim_start <<= PAGE_SHIFT; | 972 | trim_start <<= PAGE_SHIFT; |
973 | |||
974 | trim_size = limit_pfn; | 974 | trim_size = limit_pfn; |
975 | trim_size <<= PAGE_SHIFT; | 975 | trim_size <<= PAGE_SHIFT; |
976 | trim_size -= trim_start; | 976 | trim_size -= trim_start; |
977 | 977 | ||
978 | return e820_update_range(trim_start, trim_size, E820_RAM, | 978 | return e820_update_range(trim_start, trim_size, E820_RAM, E820_RESERVED); |
979 | E820_RESERVED); | ||
980 | } | 979 | } |
980 | |||
981 | /** | 981 | /** |
982 | * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs | 982 | * mtrr_trim_uncached_memory - trim RAM not covered by MTRRs |
983 | * @end_pfn: ending page frame number | 983 | * @end_pfn: ending page frame number |
@@ -985,7 +985,7 @@ static u64 __init real_trim_memory(unsigned long start_pfn, | |||
985 | * Some buggy BIOSes don't setup the MTRRs properly for systems with certain | 985 | * Some buggy BIOSes don't setup the MTRRs properly for systems with certain |
986 | * memory configurations. This routine checks that the highest MTRR matches | 986 | * memory configurations. This routine checks that the highest MTRR matches |
987 | * the end of memory, to make sure the MTRRs having a write back type cover | 987 | * the end of memory, to make sure the MTRRs having a write back type cover |
988 | * all of the memory the kernel is intending to use. If not, it'll trim any | 988 | * all of the memory the kernel is intending to use. If not, it'll trim any |
989 | * memory off the end by adjusting end_pfn, removing it from the kernel's | 989 | * memory off the end by adjusting end_pfn, removing it from the kernel's |
990 | * allocation pools, warning the user with an obnoxious message. | 990 | * allocation pools, warning the user with an obnoxious message. |
991 | */ | 991 | */ |
@@ -994,21 +994,22 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
994 | unsigned long i, base, size, highest_pfn = 0, def, dummy; | 994 | unsigned long i, base, size, highest_pfn = 0, def, dummy; |
995 | mtrr_type type; | 995 | mtrr_type type; |
996 | u64 total_trim_size; | 996 | u64 total_trim_size; |
997 | |||
998 | /* extra one for all 0 */ | 997 | /* extra one for all 0 */ |
999 | int num[MTRR_NUM_TYPES + 1]; | 998 | int num[MTRR_NUM_TYPES + 1]; |
999 | |||
1000 | /* | 1000 | /* |
1001 | * Make sure we only trim uncachable memory on machines that | 1001 | * Make sure we only trim uncachable memory on machines that |
1002 | * support the Intel MTRR architecture: | 1002 | * support the Intel MTRR architecture: |
1003 | */ | 1003 | */ |
1004 | if (!is_cpu(INTEL) || disable_mtrr_trim) | 1004 | if (!is_cpu(INTEL) || disable_mtrr_trim) |
1005 | return 0; | 1005 | return 0; |
1006 | |||
1006 | rdmsr(MSR_MTRRdefType, def, dummy); | 1007 | rdmsr(MSR_MTRRdefType, def, dummy); |
1007 | def &= 0xff; | 1008 | def &= 0xff; |
1008 | if (def != MTRR_TYPE_UNCACHABLE) | 1009 | if (def != MTRR_TYPE_UNCACHABLE) |
1009 | return 0; | 1010 | return 0; |
1010 | 1011 | ||
1011 | /* get it and store it aside */ | 1012 | /* Get it and store it aside: */ |
1012 | memset(range_state, 0, sizeof(range_state)); | 1013 | memset(range_state, 0, sizeof(range_state)); |
1013 | for (i = 0; i < num_var_ranges; i++) { | 1014 | for (i = 0; i < num_var_ranges; i++) { |
1014 | mtrr_if->get(i, &base, &size, &type); | 1015 | mtrr_if->get(i, &base, &size, &type); |
@@ -1017,7 +1018,7 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
1017 | range_state[i].type = type; | 1018 | range_state[i].type = type; |
1018 | } | 1019 | } |
1019 | 1020 | ||
1020 | /* Find highest cached pfn */ | 1021 | /* Find highest cached pfn: */ |
1021 | for (i = 0; i < num_var_ranges; i++) { | 1022 | for (i = 0; i < num_var_ranges; i++) { |
1022 | type = range_state[i].type; | 1023 | type = range_state[i].type; |
1023 | if (type != MTRR_TYPE_WRBACK) | 1024 | if (type != MTRR_TYPE_WRBACK) |
@@ -1028,13 +1029,13 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
1028 | highest_pfn = base + size; | 1029 | highest_pfn = base + size; |
1029 | } | 1030 | } |
1030 | 1031 | ||
1031 | /* kvm/qemu doesn't have mtrr set right, don't trim them all */ | 1032 | /* kvm/qemu doesn't have mtrr set right, don't trim them all: */ |
1032 | if (!highest_pfn) { | 1033 | if (!highest_pfn) { |
1033 | printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n"); | 1034 | printk(KERN_INFO "CPU MTRRs all blank - virtualized system.\n"); |
1034 | return 0; | 1035 | return 0; |
1035 | } | 1036 | } |
1036 | 1037 | ||
1037 | /* check entries number */ | 1038 | /* Check entries number: */ |
1038 | memset(num, 0, sizeof(num)); | 1039 | memset(num, 0, sizeof(num)); |
1039 | for (i = 0; i < num_var_ranges; i++) { | 1040 | for (i = 0; i < num_var_ranges; i++) { |
1040 | type = range_state[i].type; | 1041 | type = range_state[i].type; |
@@ -1046,11 +1047,11 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
1046 | num[type]++; | 1047 | num[type]++; |
1047 | } | 1048 | } |
1048 | 1049 | ||
1049 | /* no entry for WB? */ | 1050 | /* No entry for WB? */ |
1050 | if (!num[MTRR_TYPE_WRBACK]) | 1051 | if (!num[MTRR_TYPE_WRBACK]) |
1051 | return 0; | 1052 | return 0; |
1052 | 1053 | ||
1053 | /* check if we only had WB and UC */ | 1054 | /* Check if we only had WB and UC: */ |
1054 | if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != | 1055 | if (num[MTRR_TYPE_WRBACK] + num[MTRR_TYPE_UNCACHABLE] != |
1055 | num_var_ranges - num[MTRR_NUM_TYPES]) | 1056 | num_var_ranges - num[MTRR_NUM_TYPES]) |
1056 | return 0; | 1057 | return 0; |
@@ -1066,31 +1067,31 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
1066 | } | 1067 | } |
1067 | nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); | 1068 | nr_range = x86_get_mtrr_mem_range(range, nr_range, 0, 0); |
1068 | 1069 | ||
1070 | /* Check the head: */ | ||
1069 | total_trim_size = 0; | 1071 | total_trim_size = 0; |
1070 | /* check the head */ | ||
1071 | if (range[0].start) | 1072 | if (range[0].start) |
1072 | total_trim_size += real_trim_memory(0, range[0].start); | 1073 | total_trim_size += real_trim_memory(0, range[0].start); |
1073 | /* check the holes */ | 1074 | |
1075 | /* Check the holes: */ | ||
1074 | for (i = 0; i < nr_range - 1; i++) { | 1076 | for (i = 0; i < nr_range - 1; i++) { |
1075 | if (range[i].end + 1 < range[i+1].start) | 1077 | if (range[i].end + 1 < range[i+1].start) |
1076 | total_trim_size += real_trim_memory(range[i].end + 1, | 1078 | total_trim_size += real_trim_memory(range[i].end + 1, |
1077 | range[i+1].start); | 1079 | range[i+1].start); |
1078 | } | 1080 | } |
1079 | /* check the top */ | 1081 | |
1082 | /* Check the top: */ | ||
1080 | i = nr_range - 1; | 1083 | i = nr_range - 1; |
1081 | if (range[i].end + 1 < end_pfn) | 1084 | if (range[i].end + 1 < end_pfn) |
1082 | total_trim_size += real_trim_memory(range[i].end + 1, | 1085 | total_trim_size += real_trim_memory(range[i].end + 1, |
1083 | end_pfn); | 1086 | end_pfn); |
1084 | 1087 | ||
1085 | if (total_trim_size) { | 1088 | if (total_trim_size) { |
1086 | printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" | 1089 | pr_warning("WARNING: BIOS bug: CPU MTRRs don't cover all of memory, losing %lluMB of RAM.\n", total_trim_size >> 20); |
1087 | " all of memory, losing %lluMB of RAM.\n", | ||
1088 | total_trim_size >> 20); | ||
1089 | 1090 | ||
1090 | if (!changed_by_mtrr_cleanup) | 1091 | if (!changed_by_mtrr_cleanup) |
1091 | WARN_ON(1); | 1092 | WARN_ON(1); |
1092 | 1093 | ||
1093 | printk(KERN_INFO "update e820 for mtrr\n"); | 1094 | pr_info("update e820 for mtrr\n"); |
1094 | update_e820(); | 1095 | update_e820(); |
1095 | 1096 | ||
1096 | return 1; | 1097 | return 1; |
@@ -1098,4 +1099,3 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) | |||
1098 | 1099 | ||
1099 | return 0; | 1100 | return 0; |
1100 | } | 1101 | } |
1101 | |||
diff --git a/arch/x86/kernel/cpu/mtrr/cyrix.c b/arch/x86/kernel/cpu/mtrr/cyrix.c index ff14c320040c..228d982ce09c 100644 --- a/arch/x86/kernel/cpu/mtrr/cyrix.c +++ b/arch/x86/kernel/cpu/mtrr/cyrix.c | |||
@@ -1,38 +1,40 @@ | |||
1 | #include <linux/init.h> | 1 | #include <linux/init.h> |
2 | #include <linux/io.h> | ||
2 | #include <linux/mm.h> | 3 | #include <linux/mm.h> |
3 | #include <asm/mtrr.h> | 4 | |
4 | #include <asm/msr.h> | ||
5 | #include <asm/io.h> | ||
6 | #include <asm/processor-cyrix.h> | 5 | #include <asm/processor-cyrix.h> |
7 | #include <asm/processor-flags.h> | 6 | #include <asm/processor-flags.h> |
7 | #include <asm/mtrr.h> | ||
8 | #include <asm/msr.h> | ||
9 | |||
8 | #include "mtrr.h" | 10 | #include "mtrr.h" |
9 | 11 | ||
10 | static void | 12 | static void |
11 | cyrix_get_arr(unsigned int reg, unsigned long *base, | 13 | cyrix_get_arr(unsigned int reg, unsigned long *base, |
12 | unsigned long *size, mtrr_type * type) | 14 | unsigned long *size, mtrr_type * type) |
13 | { | 15 | { |
14 | unsigned long flags; | ||
15 | unsigned char arr, ccr3, rcr, shift; | 16 | unsigned char arr, ccr3, rcr, shift; |
17 | unsigned long flags; | ||
16 | 18 | ||
17 | arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ | 19 | arr = CX86_ARR_BASE + (reg << 1) + reg; /* avoid multiplication by 3 */ |
18 | 20 | ||
19 | /* Save flags and disable interrupts */ | ||
20 | local_irq_save(flags); | 21 | local_irq_save(flags); |
21 | 22 | ||
22 | ccr3 = getCx86(CX86_CCR3); | 23 | ccr3 = getCx86(CX86_CCR3); |
23 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ | 24 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ |
24 | ((unsigned char *) base)[3] = getCx86(arr); | 25 | ((unsigned char *)base)[3] = getCx86(arr); |
25 | ((unsigned char *) base)[2] = getCx86(arr + 1); | 26 | ((unsigned char *)base)[2] = getCx86(arr + 1); |
26 | ((unsigned char *) base)[1] = getCx86(arr + 2); | 27 | ((unsigned char *)base)[1] = getCx86(arr + 2); |
27 | rcr = getCx86(CX86_RCR_BASE + reg); | 28 | rcr = getCx86(CX86_RCR_BASE + reg); |
28 | setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ | 29 | setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ |
29 | 30 | ||
30 | /* Enable interrupts if it was enabled previously */ | ||
31 | local_irq_restore(flags); | 31 | local_irq_restore(flags); |
32 | |||
32 | shift = ((unsigned char *) base)[1] & 0x0f; | 33 | shift = ((unsigned char *) base)[1] & 0x0f; |
33 | *base >>= PAGE_SHIFT; | 34 | *base >>= PAGE_SHIFT; |
34 | 35 | ||
35 | /* Power of two, at least 4K on ARR0-ARR6, 256K on ARR7 | 36 | /* |
37 | * Power of two, at least 4K on ARR0-ARR6, 256K on ARR7 | ||
36 | * Note: shift==0xf means 4G, this is unsupported. | 38 | * Note: shift==0xf means 4G, this is unsupported. |
37 | */ | 39 | */ |
38 | if (shift) | 40 | if (shift) |
@@ -76,17 +78,20 @@ cyrix_get_arr(unsigned int reg, unsigned long *base, | |||
76 | } | 78 | } |
77 | } | 79 | } |
78 | 80 | ||
81 | /* | ||
82 | * cyrix_get_free_region - get a free ARR. | ||
83 | * | ||
84 | * @base: the starting (base) address of the region. | ||
85 | * @size: the size (in bytes) of the region. | ||
86 | * | ||
87 | * Returns: the index of the region on success, else -1 on error. | ||
88 | */ | ||
79 | static int | 89 | static int |
80 | cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg) | 90 | cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg) |
81 | /* [SUMMARY] Get a free ARR. | ||
82 | <base> The starting (base) address of the region. | ||
83 | <size> The size (in bytes) of the region. | ||
84 | [RETURNS] The index of the region on success, else -1 on error. | ||
85 | */ | ||
86 | { | 91 | { |
87 | int i; | ||
88 | mtrr_type ltype; | ||
89 | unsigned long lbase, lsize; | 92 | unsigned long lbase, lsize; |
93 | mtrr_type ltype; | ||
94 | int i; | ||
90 | 95 | ||
91 | switch (replace_reg) { | 96 | switch (replace_reg) { |
92 | case 7: | 97 | case 7: |
@@ -107,14 +112,17 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg) | |||
107 | cyrix_get_arr(7, &lbase, &lsize, <ype); | 112 | cyrix_get_arr(7, &lbase, &lsize, <ype); |
108 | if (lsize == 0) | 113 | if (lsize == 0) |
109 | return 7; | 114 | return 7; |
110 | /* Else try ARR0-ARR6 first */ | 115 | /* Else try ARR0-ARR6 first */ |
111 | } else { | 116 | } else { |
112 | for (i = 0; i < 7; i++) { | 117 | for (i = 0; i < 7; i++) { |
113 | cyrix_get_arr(i, &lbase, &lsize, <ype); | 118 | cyrix_get_arr(i, &lbase, &lsize, <ype); |
114 | if (lsize == 0) | 119 | if (lsize == 0) |
115 | return i; | 120 | return i; |
116 | } | 121 | } |
117 | /* ARR0-ARR6 isn't free, try ARR7 but its size must be at least 256K */ | 122 | /* |
123 | * ARR0-ARR6 isn't free | ||
124 | * try ARR7 but its size must be at least 256K | ||
125 | */ | ||
118 | cyrix_get_arr(i, &lbase, &lsize, <ype); | 126 | cyrix_get_arr(i, &lbase, &lsize, <ype); |
119 | if ((lsize == 0) && (size >= 0x40)) | 127 | if ((lsize == 0) && (size >= 0x40)) |
120 | return i; | 128 | return i; |
@@ -122,21 +130,22 @@ cyrix_get_free_region(unsigned long base, unsigned long size, int replace_reg) | |||
122 | return -ENOSPC; | 130 | return -ENOSPC; |
123 | } | 131 | } |
124 | 132 | ||
125 | static u32 cr4 = 0; | 133 | static u32 cr4, ccr3; |
126 | static u32 ccr3; | ||
127 | 134 | ||
128 | static void prepare_set(void) | 135 | static void prepare_set(void) |
129 | { | 136 | { |
130 | u32 cr0; | 137 | u32 cr0; |
131 | 138 | ||
132 | /* Save value of CR4 and clear Page Global Enable (bit 7) */ | 139 | /* Save value of CR4 and clear Page Global Enable (bit 7) */ |
133 | if ( cpu_has_pge ) { | 140 | if (cpu_has_pge) { |
134 | cr4 = read_cr4(); | 141 | cr4 = read_cr4(); |
135 | write_cr4(cr4 & ~X86_CR4_PGE); | 142 | write_cr4(cr4 & ~X86_CR4_PGE); |
136 | } | 143 | } |
137 | 144 | ||
138 | /* Disable and flush caches. Note that wbinvd flushes the TLBs as | 145 | /* |
139 | a side-effect */ | 146 | * Disable and flush caches. |
147 | * Note that wbinvd flushes the TLBs as a side-effect | ||
148 | */ | ||
140 | cr0 = read_cr0() | X86_CR0_CD; | 149 | cr0 = read_cr0() | X86_CR0_CD; |
141 | wbinvd(); | 150 | wbinvd(); |
142 | write_cr0(cr0); | 151 | write_cr0(cr0); |
@@ -147,22 +156,21 @@ static void prepare_set(void) | |||
147 | 156 | ||
148 | /* Cyrix ARRs - everything else was excluded at the top */ | 157 | /* Cyrix ARRs - everything else was excluded at the top */ |
149 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); | 158 | setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); |
150 | |||
151 | } | 159 | } |
152 | 160 | ||
153 | static void post_set(void) | 161 | static void post_set(void) |
154 | { | 162 | { |
155 | /* Flush caches and TLBs */ | 163 | /* Flush caches and TLBs */ |
156 | wbinvd(); | 164 | wbinvd(); |
157 | 165 | ||
158 | /* Cyrix ARRs - everything else was excluded at the top */ | 166 | /* Cyrix ARRs - everything else was excluded at the top */ |
159 | setCx86(CX86_CCR3, ccr3); | 167 | setCx86(CX86_CCR3, ccr3); |
160 | 168 | ||
161 | /* Enable caches */ | 169 | /* Enable caches */ |
162 | write_cr0(read_cr0() & 0xbfffffff); | 170 | write_cr0(read_cr0() & 0xbfffffff); |
163 | 171 | ||
164 | /* Restore value of CR4 */ | 172 | /* Restore value of CR4 */ |
165 | if ( cpu_has_pge ) | 173 | if (cpu_has_pge) |
166 | write_cr4(cr4); | 174 | write_cr4(cr4); |
167 | } | 175 | } |
168 | 176 | ||
@@ -178,7 +186,8 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base, | |||
178 | size >>= 6; | 186 | size >>= 6; |
179 | 187 | ||
180 | size &= 0x7fff; /* make sure arr_size <= 14 */ | 188 | size &= 0x7fff; /* make sure arr_size <= 14 */ |
181 | for (arr_size = 0; size; arr_size++, size >>= 1) ; | 189 | for (arr_size = 0; size; arr_size++, size >>= 1) |
190 | ; | ||
182 | 191 | ||
183 | if (reg < 7) { | 192 | if (reg < 7) { |
184 | switch (type) { | 193 | switch (type) { |
@@ -215,18 +224,18 @@ static void cyrix_set_arr(unsigned int reg, unsigned long base, | |||
215 | prepare_set(); | 224 | prepare_set(); |
216 | 225 | ||
217 | base <<= PAGE_SHIFT; | 226 | base <<= PAGE_SHIFT; |
218 | setCx86(arr, ((unsigned char *) &base)[3]); | 227 | setCx86(arr + 0, ((unsigned char *)&base)[3]); |
219 | setCx86(arr + 1, ((unsigned char *) &base)[2]); | 228 | setCx86(arr + 1, ((unsigned char *)&base)[2]); |
220 | setCx86(arr + 2, (((unsigned char *) &base)[1]) | arr_size); | 229 | setCx86(arr + 2, (((unsigned char *)&base)[1]) | arr_size); |
221 | setCx86(CX86_RCR_BASE + reg, arr_type); | 230 | setCx86(CX86_RCR_BASE + reg, arr_type); |
222 | 231 | ||
223 | post_set(); | 232 | post_set(); |
224 | } | 233 | } |
225 | 234 | ||
226 | typedef struct { | 235 | typedef struct { |
227 | unsigned long base; | 236 | unsigned long base; |
228 | unsigned long size; | 237 | unsigned long size; |
229 | mtrr_type type; | 238 | mtrr_type type; |
230 | } arr_state_t; | 239 | } arr_state_t; |
231 | 240 | ||
232 | static arr_state_t arr_state[8] = { | 241 | static arr_state_t arr_state[8] = { |
@@ -247,16 +256,17 @@ static void cyrix_set_all(void) | |||
247 | setCx86(CX86_CCR0 + i, ccr_state[i]); | 256 | setCx86(CX86_CCR0 + i, ccr_state[i]); |
248 | for (; i < 7; i++) | 257 | for (; i < 7; i++) |
249 | setCx86(CX86_CCR4 + i, ccr_state[i]); | 258 | setCx86(CX86_CCR4 + i, ccr_state[i]); |
250 | for (i = 0; i < 8; i++) | 259 | |
251 | cyrix_set_arr(i, arr_state[i].base, | 260 | for (i = 0; i < 8; i++) { |
261 | cyrix_set_arr(i, arr_state[i].base, | ||
252 | arr_state[i].size, arr_state[i].type); | 262 | arr_state[i].size, arr_state[i].type); |
263 | } | ||
253 | 264 | ||
254 | post_set(); | 265 | post_set(); |
255 | } | 266 | } |
256 | 267 | ||
257 | static struct mtrr_ops cyrix_mtrr_ops = { | 268 | static struct mtrr_ops cyrix_mtrr_ops = { |
258 | .vendor = X86_VENDOR_CYRIX, | 269 | .vendor = X86_VENDOR_CYRIX, |
259 | // .init = cyrix_arr_init, | ||
260 | .set_all = cyrix_set_all, | 270 | .set_all = cyrix_set_all, |
261 | .set = cyrix_set_arr, | 271 | .set = cyrix_set_arr, |
262 | .get = cyrix_get_arr, | 272 | .get = cyrix_get_arr, |
@@ -270,5 +280,3 @@ int __init cyrix_init_mtrr(void) | |||
270 | set_mtrr_ops(&cyrix_mtrr_ops); | 280 | set_mtrr_ops(&cyrix_mtrr_ops); |
271 | return 0; | 281 | return 0; |
272 | } | 282 | } |
273 | |||
274 | //arch_initcall(cyrix_init_mtrr); | ||
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c index 0543f69f0b27..55da0c5f68dd 100644 --- a/arch/x86/kernel/cpu/mtrr/generic.c +++ b/arch/x86/kernel/cpu/mtrr/generic.c | |||
@@ -1,28 +1,34 @@ | |||
1 | /* This only handles 32bit MTRR on 32bit hosts. This is strictly wrong | 1 | /* |
2 | because MTRRs can span upto 40 bits (36bits on most modern x86) */ | 2 | * This only handles 32bit MTRR on 32bit hosts. This is strictly wrong |
3 | * because MTRRs can span upto 40 bits (36bits on most modern x86) | ||
4 | */ | ||
5 | #define DEBUG | ||
6 | |||
7 | #include <linux/module.h> | ||
3 | #include <linux/init.h> | 8 | #include <linux/init.h> |
4 | #include <linux/slab.h> | 9 | #include <linux/slab.h> |
10 | #include <linux/io.h> | ||
5 | #include <linux/mm.h> | 11 | #include <linux/mm.h> |
6 | #include <linux/module.h> | 12 | |
7 | #include <asm/io.h> | ||
8 | #include <asm/mtrr.h> | ||
9 | #include <asm/msr.h> | ||
10 | #include <asm/system.h> | ||
11 | #include <asm/cpufeature.h> | ||
12 | #include <asm/processor-flags.h> | 13 | #include <asm/processor-flags.h> |
14 | #include <asm/cpufeature.h> | ||
13 | #include <asm/tlbflush.h> | 15 | #include <asm/tlbflush.h> |
16 | #include <asm/system.h> | ||
17 | #include <asm/mtrr.h> | ||
18 | #include <asm/msr.h> | ||
14 | #include <asm/pat.h> | 19 | #include <asm/pat.h> |
20 | |||
15 | #include "mtrr.h" | 21 | #include "mtrr.h" |
16 | 22 | ||
17 | struct fixed_range_block { | 23 | struct fixed_range_block { |
18 | int base_msr; /* start address of an MTRR block */ | 24 | int base_msr; /* start address of an MTRR block */ |
19 | int ranges; /* number of MTRRs in this block */ | 25 | int ranges; /* number of MTRRs in this block */ |
20 | }; | 26 | }; |
21 | 27 | ||
22 | static struct fixed_range_block fixed_range_blocks[] = { | 28 | static struct fixed_range_block fixed_range_blocks[] = { |
23 | { MSR_MTRRfix64K_00000, 1 }, /* one 64k MTRR */ | 29 | { MSR_MTRRfix64K_00000, 1 }, /* one 64k MTRR */ |
24 | { MSR_MTRRfix16K_80000, 2 }, /* two 16k MTRRs */ | 30 | { MSR_MTRRfix16K_80000, 2 }, /* two 16k MTRRs */ |
25 | { MSR_MTRRfix4K_C0000, 8 }, /* eight 4k MTRRs */ | 31 | { MSR_MTRRfix4K_C0000, 8 }, /* eight 4k MTRRs */ |
26 | {} | 32 | {} |
27 | }; | 33 | }; |
28 | 34 | ||
@@ -30,10 +36,10 @@ static unsigned long smp_changes_mask; | |||
30 | static int mtrr_state_set; | 36 | static int mtrr_state_set; |
31 | u64 mtrr_tom2; | 37 | u64 mtrr_tom2; |
32 | 38 | ||
33 | struct mtrr_state_type mtrr_state = {}; | 39 | struct mtrr_state_type mtrr_state; |
34 | EXPORT_SYMBOL_GPL(mtrr_state); | 40 | EXPORT_SYMBOL_GPL(mtrr_state); |
35 | 41 | ||
36 | /** | 42 | /* |
37 | * BIOS is expected to clear MtrrFixDramModEn bit, see for example | 43 | * BIOS is expected to clear MtrrFixDramModEn bit, see for example |
38 | * "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD | 44 | * "BIOS and Kernel Developer's Guide for the AMD Athlon 64 and AMD |
39 | * Opteron Processors" (26094 Rev. 3.30 February 2006), section | 45 | * Opteron Processors" (26094 Rev. 3.30 February 2006), section |
@@ -104,9 +110,8 @@ u8 mtrr_type_lookup(u64 start, u64 end) | |||
104 | * Look of multiple ranges matching this address and pick type | 110 | * Look of multiple ranges matching this address and pick type |
105 | * as per MTRR precedence | 111 | * as per MTRR precedence |
106 | */ | 112 | */ |
107 | if (!(mtrr_state.enabled & 2)) { | 113 | if (!(mtrr_state.enabled & 2)) |
108 | return mtrr_state.def_type; | 114 | return mtrr_state.def_type; |
109 | } | ||
110 | 115 | ||
111 | prev_match = 0xFF; | 116 | prev_match = 0xFF; |
112 | for (i = 0; i < num_var_ranges; ++i) { | 117 | for (i = 0; i < num_var_ranges; ++i) { |
@@ -125,9 +130,8 @@ u8 mtrr_type_lookup(u64 start, u64 end) | |||
125 | if (start_state != end_state) | 130 | if (start_state != end_state) |
126 | return 0xFE; | 131 | return 0xFE; |
127 | 132 | ||
128 | if ((start & mask) != (base & mask)) { | 133 | if ((start & mask) != (base & mask)) |
129 | continue; | 134 | continue; |
130 | } | ||
131 | 135 | ||
132 | curr_match = mtrr_state.var_ranges[i].base_lo & 0xff; | 136 | curr_match = mtrr_state.var_ranges[i].base_lo & 0xff; |
133 | if (prev_match == 0xFF) { | 137 | if (prev_match == 0xFF) { |
@@ -148,9 +152,8 @@ u8 mtrr_type_lookup(u64 start, u64 end) | |||
148 | curr_match = MTRR_TYPE_WRTHROUGH; | 152 | curr_match = MTRR_TYPE_WRTHROUGH; |
149 | } | 153 | } |
150 | 154 | ||
151 | if (prev_match != curr_match) { | 155 | if (prev_match != curr_match) |
152 | return MTRR_TYPE_UNCACHABLE; | 156 | return MTRR_TYPE_UNCACHABLE; |
153 | } | ||
154 | } | 157 | } |
155 | 158 | ||
156 | if (mtrr_tom2) { | 159 | if (mtrr_tom2) { |
@@ -164,7 +167,7 @@ u8 mtrr_type_lookup(u64 start, u64 end) | |||
164 | return mtrr_state.def_type; | 167 | return mtrr_state.def_type; |
165 | } | 168 | } |
166 | 169 | ||
167 | /* Get the MSR pair relating to a var range */ | 170 | /* Get the MSR pair relating to a var range */ |
168 | static void | 171 | static void |
169 | get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) | 172 | get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) |
170 | { | 173 | { |
@@ -172,7 +175,7 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) | |||
172 | rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); | 175 | rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); |
173 | } | 176 | } |
174 | 177 | ||
175 | /* fill the MSR pair relating to a var range */ | 178 | /* Fill the MSR pair relating to a var range */ |
176 | void fill_mtrr_var_range(unsigned int index, | 179 | void fill_mtrr_var_range(unsigned int index, |
177 | u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi) | 180 | u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi) |
178 | { | 181 | { |
@@ -186,10 +189,9 @@ void fill_mtrr_var_range(unsigned int index, | |||
186 | vr[index].mask_hi = mask_hi; | 189 | vr[index].mask_hi = mask_hi; |
187 | } | 190 | } |
188 | 191 | ||
189 | static void | 192 | static void get_fixed_ranges(mtrr_type *frs) |
190 | get_fixed_ranges(mtrr_type * frs) | ||
191 | { | 193 | { |
192 | unsigned int *p = (unsigned int *) frs; | 194 | unsigned int *p = (unsigned int *)frs; |
193 | int i; | 195 | int i; |
194 | 196 | ||
195 | k8_check_syscfg_dram_mod_en(); | 197 | k8_check_syscfg_dram_mod_en(); |
@@ -217,22 +219,22 @@ static void __init print_fixed_last(void) | |||
217 | if (!last_fixed_end) | 219 | if (!last_fixed_end) |
218 | return; | 220 | return; |
219 | 221 | ||
220 | printk(KERN_DEBUG " %05X-%05X %s\n", last_fixed_start, | 222 | pr_debug(" %05X-%05X %s\n", last_fixed_start, |
221 | last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type)); | 223 | last_fixed_end - 1, mtrr_attrib_to_str(last_fixed_type)); |
222 | 224 | ||
223 | last_fixed_end = 0; | 225 | last_fixed_end = 0; |
224 | } | 226 | } |
225 | 227 | ||
226 | static void __init update_fixed_last(unsigned base, unsigned end, | 228 | static void __init update_fixed_last(unsigned base, unsigned end, |
227 | mtrr_type type) | 229 | mtrr_type type) |
228 | { | 230 | { |
229 | last_fixed_start = base; | 231 | last_fixed_start = base; |
230 | last_fixed_end = end; | 232 | last_fixed_end = end; |
231 | last_fixed_type = type; | 233 | last_fixed_type = type; |
232 | } | 234 | } |
233 | 235 | ||
234 | static void __init print_fixed(unsigned base, unsigned step, | 236 | static void __init |
235 | const mtrr_type *types) | 237 | print_fixed(unsigned base, unsigned step, const mtrr_type *types) |
236 | { | 238 | { |
237 | unsigned i; | 239 | unsigned i; |
238 | 240 | ||
@@ -259,54 +261,55 @@ static void __init print_mtrr_state(void) | |||
259 | unsigned int i; | 261 | unsigned int i; |
260 | int high_width; | 262 | int high_width; |
261 | 263 | ||
262 | printk(KERN_DEBUG "MTRR default type: %s\n", | 264 | pr_debug("MTRR default type: %s\n", |
263 | mtrr_attrib_to_str(mtrr_state.def_type)); | 265 | mtrr_attrib_to_str(mtrr_state.def_type)); |
264 | if (mtrr_state.have_fixed) { | 266 | if (mtrr_state.have_fixed) { |
265 | printk(KERN_DEBUG "MTRR fixed ranges %sabled:\n", | 267 | pr_debug("MTRR fixed ranges %sabled:\n", |
266 | mtrr_state.enabled & 1 ? "en" : "dis"); | 268 | mtrr_state.enabled & 1 ? "en" : "dis"); |
267 | print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0); | 269 | print_fixed(0x00000, 0x10000, mtrr_state.fixed_ranges + 0); |
268 | for (i = 0; i < 2; ++i) | 270 | for (i = 0; i < 2; ++i) |
269 | print_fixed(0x80000 + i * 0x20000, 0x04000, mtrr_state.fixed_ranges + (i + 1) * 8); | 271 | print_fixed(0x80000 + i * 0x20000, 0x04000, |
272 | mtrr_state.fixed_ranges + (i + 1) * 8); | ||
270 | for (i = 0; i < 8; ++i) | 273 | for (i = 0; i < 8; ++i) |
271 | print_fixed(0xC0000 + i * 0x08000, 0x01000, mtrr_state.fixed_ranges + (i + 3) * 8); | 274 | print_fixed(0xC0000 + i * 0x08000, 0x01000, |
275 | mtrr_state.fixed_ranges + (i + 3) * 8); | ||
272 | 276 | ||
273 | /* tail */ | 277 | /* tail */ |
274 | print_fixed_last(); | 278 | print_fixed_last(); |
275 | } | 279 | } |
276 | printk(KERN_DEBUG "MTRR variable ranges %sabled:\n", | 280 | pr_debug("MTRR variable ranges %sabled:\n", |
277 | mtrr_state.enabled & 2 ? "en" : "dis"); | 281 | mtrr_state.enabled & 2 ? "en" : "dis"); |
278 | if (size_or_mask & 0xffffffffUL) | 282 | if (size_or_mask & 0xffffffffUL) |
279 | high_width = ffs(size_or_mask & 0xffffffffUL) - 1; | 283 | high_width = ffs(size_or_mask & 0xffffffffUL) - 1; |
280 | else | 284 | else |
281 | high_width = ffs(size_or_mask>>32) + 32 - 1; | 285 | high_width = ffs(size_or_mask>>32) + 32 - 1; |
282 | high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4; | 286 | high_width = (high_width - (32 - PAGE_SHIFT) + 3) / 4; |
287 | |||
283 | for (i = 0; i < num_var_ranges; ++i) { | 288 | for (i = 0; i < num_var_ranges; ++i) { |
284 | if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) | 289 | if (mtrr_state.var_ranges[i].mask_lo & (1 << 11)) |
285 | printk(KERN_DEBUG " %u base %0*X%05X000 mask %0*X%05X000 %s\n", | 290 | pr_debug(" %u base %0*X%05X000 mask %0*X%05X000 %s\n", |
286 | i, | 291 | i, |
287 | high_width, | 292 | high_width, |
288 | mtrr_state.var_ranges[i].base_hi, | 293 | mtrr_state.var_ranges[i].base_hi, |
289 | mtrr_state.var_ranges[i].base_lo >> 12, | 294 | mtrr_state.var_ranges[i].base_lo >> 12, |
290 | high_width, | 295 | high_width, |
291 | mtrr_state.var_ranges[i].mask_hi, | 296 | mtrr_state.var_ranges[i].mask_hi, |
292 | mtrr_state.var_ranges[i].mask_lo >> 12, | 297 | mtrr_state.var_ranges[i].mask_lo >> 12, |
293 | mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff)); | 298 | mtrr_attrib_to_str(mtrr_state.var_ranges[i].base_lo & 0xff)); |
294 | else | 299 | else |
295 | printk(KERN_DEBUG " %u disabled\n", i); | 300 | pr_debug(" %u disabled\n", i); |
296 | } | ||
297 | if (mtrr_tom2) { | ||
298 | printk(KERN_DEBUG "TOM2: %016llx aka %lldM\n", | ||
299 | mtrr_tom2, mtrr_tom2>>20); | ||
300 | } | 301 | } |
302 | if (mtrr_tom2) | ||
303 | pr_debug("TOM2: %016llx aka %lldM\n", mtrr_tom2, mtrr_tom2>>20); | ||
301 | } | 304 | } |
302 | 305 | ||
303 | /* Grab all of the MTRR state for this CPU into *state */ | 306 | /* Grab all of the MTRR state for this CPU into *state */ |
304 | void __init get_mtrr_state(void) | 307 | void __init get_mtrr_state(void) |
305 | { | 308 | { |
306 | unsigned int i; | ||
307 | struct mtrr_var_range *vrs; | 309 | struct mtrr_var_range *vrs; |
308 | unsigned lo, dummy; | ||
309 | unsigned long flags; | 310 | unsigned long flags; |
311 | unsigned lo, dummy; | ||
312 | unsigned int i; | ||
310 | 313 | ||
311 | vrs = mtrr_state.var_ranges; | 314 | vrs = mtrr_state.var_ranges; |
312 | 315 | ||
@@ -324,6 +327,7 @@ void __init get_mtrr_state(void) | |||
324 | 327 | ||
325 | if (amd_special_default_mtrr()) { | 328 | if (amd_special_default_mtrr()) { |
326 | unsigned low, high; | 329 | unsigned low, high; |
330 | |||
327 | /* TOP_MEM2 */ | 331 | /* TOP_MEM2 */ |
328 | rdmsr(MSR_K8_TOP_MEM2, low, high); | 332 | rdmsr(MSR_K8_TOP_MEM2, low, high); |
329 | mtrr_tom2 = high; | 333 | mtrr_tom2 = high; |
@@ -344,10 +348,9 @@ void __init get_mtrr_state(void) | |||
344 | 348 | ||
345 | post_set(); | 349 | post_set(); |
346 | local_irq_restore(flags); | 350 | local_irq_restore(flags); |
347 | |||
348 | } | 351 | } |
349 | 352 | ||
350 | /* Some BIOS's are fucked and don't set all MTRRs the same! */ | 353 | /* Some BIOS's are messed up and don't set all MTRRs the same! */ |
351 | void __init mtrr_state_warn(void) | 354 | void __init mtrr_state_warn(void) |
352 | { | 355 | { |
353 | unsigned long mask = smp_changes_mask; | 356 | unsigned long mask = smp_changes_mask; |
@@ -355,28 +358,33 @@ void __init mtrr_state_warn(void) | |||
355 | if (!mask) | 358 | if (!mask) |
356 | return; | 359 | return; |
357 | if (mask & MTRR_CHANGE_MASK_FIXED) | 360 | if (mask & MTRR_CHANGE_MASK_FIXED) |
358 | printk(KERN_WARNING "mtrr: your CPUs had inconsistent fixed MTRR settings\n"); | 361 | pr_warning("mtrr: your CPUs had inconsistent fixed MTRR settings\n"); |
359 | if (mask & MTRR_CHANGE_MASK_VARIABLE) | 362 | if (mask & MTRR_CHANGE_MASK_VARIABLE) |
360 | printk(KERN_WARNING "mtrr: your CPUs had inconsistent variable MTRR settings\n"); | 363 | pr_warning("mtrr: your CPUs had inconsistent variable MTRR settings\n"); |
361 | if (mask & MTRR_CHANGE_MASK_DEFTYPE) | 364 | if (mask & MTRR_CHANGE_MASK_DEFTYPE) |
362 | printk(KERN_WARNING "mtrr: your CPUs had inconsistent MTRRdefType settings\n"); | 365 | pr_warning("mtrr: your CPUs had inconsistent MTRRdefType settings\n"); |
366 | |||
363 | printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n"); | 367 | printk(KERN_INFO "mtrr: probably your BIOS does not setup all CPUs.\n"); |
364 | printk(KERN_INFO "mtrr: corrected configuration.\n"); | 368 | printk(KERN_INFO "mtrr: corrected configuration.\n"); |
365 | } | 369 | } |
366 | 370 | ||
367 | /* Doesn't attempt to pass an error out to MTRR users | 371 | /* |
368 | because it's quite complicated in some cases and probably not | 372 | * Doesn't attempt to pass an error out to MTRR users |
369 | worth it because the best error handling is to ignore it. */ | 373 | * because it's quite complicated in some cases and probably not |
374 | * worth it because the best error handling is to ignore it. | ||
375 | */ | ||
370 | void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b) | 376 | void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b) |
371 | { | 377 | { |
372 | if (wrmsr_safe(msr, a, b) < 0) | 378 | if (wrmsr_safe(msr, a, b) < 0) { |
373 | printk(KERN_ERR | 379 | printk(KERN_ERR |
374 | "MTRR: CPU %u: Writing MSR %x to %x:%x failed\n", | 380 | "MTRR: CPU %u: Writing MSR %x to %x:%x failed\n", |
375 | smp_processor_id(), msr, a, b); | 381 | smp_processor_id(), msr, a, b); |
382 | } | ||
376 | } | 383 | } |
377 | 384 | ||
378 | /** | 385 | /** |
379 | * set_fixed_range - checks & updates a fixed-range MTRR if it differs from the value it should have | 386 | * set_fixed_range - checks & updates a fixed-range MTRR if it |
387 | * differs from the value it should have | ||
380 | * @msr: MSR address of the MTTR which should be checked and updated | 388 | * @msr: MSR address of the MTTR which should be checked and updated |
381 | * @changed: pointer which indicates whether the MTRR needed to be changed | 389 | * @changed: pointer which indicates whether the MTRR needed to be changed |
382 | * @msrwords: pointer to the MSR values which the MSR should have | 390 | * @msrwords: pointer to the MSR values which the MSR should have |
@@ -401,20 +409,23 @@ static void set_fixed_range(int msr, bool *changed, unsigned int *msrwords) | |||
401 | * | 409 | * |
402 | * Returns: The index of the region on success, else negative on error. | 410 | * Returns: The index of the region on success, else negative on error. |
403 | */ | 411 | */ |
404 | int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg) | 412 | int |
413 | generic_get_free_region(unsigned long base, unsigned long size, int replace_reg) | ||
405 | { | 414 | { |
406 | int i, max; | ||
407 | mtrr_type ltype; | ||
408 | unsigned long lbase, lsize; | 415 | unsigned long lbase, lsize; |
416 | mtrr_type ltype; | ||
417 | int i, max; | ||
409 | 418 | ||
410 | max = num_var_ranges; | 419 | max = num_var_ranges; |
411 | if (replace_reg >= 0 && replace_reg < max) | 420 | if (replace_reg >= 0 && replace_reg < max) |
412 | return replace_reg; | 421 | return replace_reg; |
422 | |||
413 | for (i = 0; i < max; ++i) { | 423 | for (i = 0; i < max; ++i) { |
414 | mtrr_if->get(i, &lbase, &lsize, <ype); | 424 | mtrr_if->get(i, &lbase, &lsize, <ype); |
415 | if (lsize == 0) | 425 | if (lsize == 0) |
416 | return i; | 426 | return i; |
417 | } | 427 | } |
428 | |||
418 | return -ENOSPC; | 429 | return -ENOSPC; |
419 | } | 430 | } |
420 | 431 | ||
@@ -434,7 +445,7 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, | |||
434 | rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); | 445 | rdmsr(MTRRphysMask_MSR(reg), mask_lo, mask_hi); |
435 | 446 | ||
436 | if ((mask_lo & 0x800) == 0) { | 447 | if ((mask_lo & 0x800) == 0) { |
437 | /* Invalid (i.e. free) range */ | 448 | /* Invalid (i.e. free) range */ |
438 | *base = 0; | 449 | *base = 0; |
439 | *size = 0; | 450 | *size = 0; |
440 | *type = 0; | 451 | *type = 0; |
@@ -471,27 +482,31 @@ out_put_cpu: | |||
471 | } | 482 | } |
472 | 483 | ||
473 | /** | 484 | /** |
474 | * set_fixed_ranges - checks & updates the fixed-range MTRRs if they differ from the saved set | 485 | * set_fixed_ranges - checks & updates the fixed-range MTRRs if they |
486 | * differ from the saved set | ||
475 | * @frs: pointer to fixed-range MTRR values, saved by get_fixed_ranges() | 487 | * @frs: pointer to fixed-range MTRR values, saved by get_fixed_ranges() |
476 | */ | 488 | */ |
477 | static int set_fixed_ranges(mtrr_type * frs) | 489 | static int set_fixed_ranges(mtrr_type *frs) |
478 | { | 490 | { |
479 | unsigned long long *saved = (unsigned long long *) frs; | 491 | unsigned long long *saved = (unsigned long long *)frs; |
480 | bool changed = false; | 492 | bool changed = false; |
481 | int block=-1, range; | 493 | int block = -1, range; |
482 | 494 | ||
483 | k8_check_syscfg_dram_mod_en(); | 495 | k8_check_syscfg_dram_mod_en(); |
484 | 496 | ||
485 | while (fixed_range_blocks[++block].ranges) | 497 | while (fixed_range_blocks[++block].ranges) { |
486 | for (range=0; range < fixed_range_blocks[block].ranges; range++) | 498 | for (range = 0; range < fixed_range_blocks[block].ranges; range++) |
487 | set_fixed_range(fixed_range_blocks[block].base_msr + range, | 499 | set_fixed_range(fixed_range_blocks[block].base_msr + range, |
488 | &changed, (unsigned int *) saved++); | 500 | &changed, (unsigned int *)saved++); |
501 | } | ||
489 | 502 | ||
490 | return changed; | 503 | return changed; |
491 | } | 504 | } |
492 | 505 | ||
493 | /* Set the MSR pair relating to a var range. Returns TRUE if | 506 | /* |
494 | changes are made */ | 507 | * Set the MSR pair relating to a var range. |
508 | * Returns true if changes are made. | ||
509 | */ | ||
495 | static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) | 510 | static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) |
496 | { | 511 | { |
497 | unsigned int lo, hi; | 512 | unsigned int lo, hi; |
@@ -501,6 +516,7 @@ static bool set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) | |||
501 | if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL) | 516 | if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL) |
502 | || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != | 517 | || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != |
503 | (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { | 518 | (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { |
519 | |||
504 | mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); | 520 | mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); |
505 | changed = true; | 521 | changed = true; |
506 | } | 522 | } |
@@ -526,21 +542,26 @@ static u32 deftype_lo, deftype_hi; | |||
526 | */ | 542 | */ |
527 | static unsigned long set_mtrr_state(void) | 543 | static unsigned long set_mtrr_state(void) |
528 | { | 544 | { |
529 | unsigned int i; | ||
530 | unsigned long change_mask = 0; | 545 | unsigned long change_mask = 0; |
546 | unsigned int i; | ||
531 | 547 | ||
532 | for (i = 0; i < num_var_ranges; i++) | 548 | for (i = 0; i < num_var_ranges; i++) { |
533 | if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i])) | 549 | if (set_mtrr_var_ranges(i, &mtrr_state.var_ranges[i])) |
534 | change_mask |= MTRR_CHANGE_MASK_VARIABLE; | 550 | change_mask |= MTRR_CHANGE_MASK_VARIABLE; |
551 | } | ||
535 | 552 | ||
536 | if (mtrr_state.have_fixed && set_fixed_ranges(mtrr_state.fixed_ranges)) | 553 | if (mtrr_state.have_fixed && set_fixed_ranges(mtrr_state.fixed_ranges)) |
537 | change_mask |= MTRR_CHANGE_MASK_FIXED; | 554 | change_mask |= MTRR_CHANGE_MASK_FIXED; |
538 | 555 | ||
539 | /* Set_mtrr_restore restores the old value of MTRRdefType, | 556 | /* |
540 | so to set it we fiddle with the saved value */ | 557 | * Set_mtrr_restore restores the old value of MTRRdefType, |
558 | * so to set it we fiddle with the saved value: | ||
559 | */ | ||
541 | if ((deftype_lo & 0xff) != mtrr_state.def_type | 560 | if ((deftype_lo & 0xff) != mtrr_state.def_type |
542 | || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) { | 561 | || ((deftype_lo & 0xc00) >> 10) != mtrr_state.enabled) { |
543 | deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type | (mtrr_state.enabled << 10); | 562 | |
563 | deftype_lo = (deftype_lo & ~0xcff) | mtrr_state.def_type | | ||
564 | (mtrr_state.enabled << 10); | ||
544 | change_mask |= MTRR_CHANGE_MASK_DEFTYPE; | 565 | change_mask |= MTRR_CHANGE_MASK_DEFTYPE; |
545 | } | 566 | } |
546 | 567 | ||
@@ -548,33 +569,36 @@ static unsigned long set_mtrr_state(void) | |||
548 | } | 569 | } |
549 | 570 | ||
550 | 571 | ||
551 | static unsigned long cr4 = 0; | 572 | static unsigned long cr4; |
552 | static DEFINE_SPINLOCK(set_atomicity_lock); | 573 | static DEFINE_SPINLOCK(set_atomicity_lock); |
553 | 574 | ||
554 | /* | 575 | /* |
555 | * Since we are disabling the cache don't allow any interrupts - they | 576 | * Since we are disabling the cache don't allow any interrupts, |
556 | * would run extremely slow and would only increase the pain. The caller must | 577 | * they would run extremely slow and would only increase the pain. |
557 | * ensure that local interrupts are disabled and are reenabled after post_set() | 578 | * |
558 | * has been called. | 579 | * The caller must ensure that local interrupts are disabled and |
580 | * are reenabled after post_set() has been called. | ||
559 | */ | 581 | */ |
560 | |||
561 | static void prepare_set(void) __acquires(set_atomicity_lock) | 582 | static void prepare_set(void) __acquires(set_atomicity_lock) |
562 | { | 583 | { |
563 | unsigned long cr0; | 584 | unsigned long cr0; |
564 | 585 | ||
565 | /* Note that this is not ideal, since the cache is only flushed/disabled | 586 | /* |
566 | for this CPU while the MTRRs are changed, but changing this requires | 587 | * Note that this is not ideal |
567 | more invasive changes to the way the kernel boots */ | 588 | * since the cache is only flushed/disabled for this CPU while the |
589 | * MTRRs are changed, but changing this requires more invasive | ||
590 | * changes to the way the kernel boots | ||
591 | */ | ||
568 | 592 | ||
569 | spin_lock(&set_atomicity_lock); | 593 | spin_lock(&set_atomicity_lock); |
570 | 594 | ||
571 | /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ | 595 | /* Enter the no-fill (CD=1, NW=0) cache mode and flush caches. */ |
572 | cr0 = read_cr0() | X86_CR0_CD; | 596 | cr0 = read_cr0() | X86_CR0_CD; |
573 | write_cr0(cr0); | 597 | write_cr0(cr0); |
574 | wbinvd(); | 598 | wbinvd(); |
575 | 599 | ||
576 | /* Save value of CR4 and clear Page Global Enable (bit 7) */ | 600 | /* Save value of CR4 and clear Page Global Enable (bit 7) */ |
577 | if ( cpu_has_pge ) { | 601 | if (cpu_has_pge) { |
578 | cr4 = read_cr4(); | 602 | cr4 = read_cr4(); |
579 | write_cr4(cr4 & ~X86_CR4_PGE); | 603 | write_cr4(cr4 & ~X86_CR4_PGE); |
580 | } | 604 | } |
@@ -582,26 +606,26 @@ static void prepare_set(void) __acquires(set_atomicity_lock) | |||
582 | /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ | 606 | /* Flush all TLBs via a mov %cr3, %reg; mov %reg, %cr3 */ |
583 | __flush_tlb(); | 607 | __flush_tlb(); |
584 | 608 | ||
585 | /* Save MTRR state */ | 609 | /* Save MTRR state */ |
586 | rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi); | 610 | rdmsr(MSR_MTRRdefType, deftype_lo, deftype_hi); |
587 | 611 | ||
588 | /* Disable MTRRs, and set the default type to uncached */ | 612 | /* Disable MTRRs, and set the default type to uncached */ |
589 | mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi); | 613 | mtrr_wrmsr(MSR_MTRRdefType, deftype_lo & ~0xcff, deftype_hi); |
590 | } | 614 | } |
591 | 615 | ||
592 | static void post_set(void) __releases(set_atomicity_lock) | 616 | static void post_set(void) __releases(set_atomicity_lock) |
593 | { | 617 | { |
594 | /* Flush TLBs (no need to flush caches - they are disabled) */ | 618 | /* Flush TLBs (no need to flush caches - they are disabled) */ |
595 | __flush_tlb(); | 619 | __flush_tlb(); |
596 | 620 | ||
597 | /* Intel (P6) standard MTRRs */ | 621 | /* Intel (P6) standard MTRRs */ |
598 | mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi); | 622 | mtrr_wrmsr(MSR_MTRRdefType, deftype_lo, deftype_hi); |
599 | 623 | ||
600 | /* Enable caches */ | 624 | /* Enable caches */ |
601 | write_cr0(read_cr0() & 0xbfffffff); | 625 | write_cr0(read_cr0() & 0xbfffffff); |
602 | 626 | ||
603 | /* Restore value of CR4 */ | 627 | /* Restore value of CR4 */ |
604 | if ( cpu_has_pge ) | 628 | if (cpu_has_pge) |
605 | write_cr4(cr4); | 629 | write_cr4(cr4); |
606 | spin_unlock(&set_atomicity_lock); | 630 | spin_unlock(&set_atomicity_lock); |
607 | } | 631 | } |
@@ -623,24 +647,27 @@ static void generic_set_all(void) | |||
623 | post_set(); | 647 | post_set(); |
624 | local_irq_restore(flags); | 648 | local_irq_restore(flags); |
625 | 649 | ||
626 | /* Use the atomic bitops to update the global mask */ | 650 | /* Use the atomic bitops to update the global mask */ |
627 | for (count = 0; count < sizeof mask * 8; ++count) { | 651 | for (count = 0; count < sizeof mask * 8; ++count) { |
628 | if (mask & 0x01) | 652 | if (mask & 0x01) |
629 | set_bit(count, &smp_changes_mask); | 653 | set_bit(count, &smp_changes_mask); |
630 | mask >>= 1; | 654 | mask >>= 1; |
631 | } | 655 | } |
632 | 656 | ||
633 | } | 657 | } |
634 | 658 | ||
659 | /** | ||
660 | * generic_set_mtrr - set variable MTRR register on the local CPU. | ||
661 | * | ||
662 | * @reg: The register to set. | ||
663 | * @base: The base address of the region. | ||
664 | * @size: The size of the region. If this is 0 the region is disabled. | ||
665 | * @type: The type of the region. | ||
666 | * | ||
667 | * Returns nothing. | ||
668 | */ | ||
635 | static void generic_set_mtrr(unsigned int reg, unsigned long base, | 669 | static void generic_set_mtrr(unsigned int reg, unsigned long base, |
636 | unsigned long size, mtrr_type type) | 670 | unsigned long size, mtrr_type type) |
637 | /* [SUMMARY] Set variable MTRR register on the local CPU. | ||
638 | <reg> The register to set. | ||
639 | <base> The base address of the region. | ||
640 | <size> The size of the region. If this is 0 the region is disabled. | ||
641 | <type> The type of the region. | ||
642 | [RETURNS] Nothing. | ||
643 | */ | ||
644 | { | 671 | { |
645 | unsigned long flags; | 672 | unsigned long flags; |
646 | struct mtrr_var_range *vr; | 673 | struct mtrr_var_range *vr; |
@@ -651,8 +678,10 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base, | |||
651 | prepare_set(); | 678 | prepare_set(); |
652 | 679 | ||
653 | if (size == 0) { | 680 | if (size == 0) { |
654 | /* The invalid bit is kept in the mask, so we simply clear the | 681 | /* |
655 | relevant mask register to disable a range. */ | 682 | * The invalid bit is kept in the mask, so we simply |
683 | * clear the relevant mask register to disable a range. | ||
684 | */ | ||
656 | mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0); | 685 | mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0); |
657 | memset(vr, 0, sizeof(struct mtrr_var_range)); | 686 | memset(vr, 0, sizeof(struct mtrr_var_range)); |
658 | } else { | 687 | } else { |
@@ -669,46 +698,50 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base, | |||
669 | local_irq_restore(flags); | 698 | local_irq_restore(flags); |
670 | } | 699 | } |
671 | 700 | ||
672 | int generic_validate_add_page(unsigned long base, unsigned long size, unsigned int type) | 701 | int generic_validate_add_page(unsigned long base, unsigned long size, |
702 | unsigned int type) | ||
673 | { | 703 | { |
674 | unsigned long lbase, last; | 704 | unsigned long lbase, last; |
675 | 705 | ||
676 | /* For Intel PPro stepping <= 7, must be 4 MiB aligned | 706 | /* |
677 | and not touch 0x70000000->0x7003FFFF */ | 707 | * For Intel PPro stepping <= 7 |
708 | * must be 4 MiB aligned and not touch 0x70000000 -> 0x7003FFFF | ||
709 | */ | ||
678 | if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 && | 710 | if (is_cpu(INTEL) && boot_cpu_data.x86 == 6 && |
679 | boot_cpu_data.x86_model == 1 && | 711 | boot_cpu_data.x86_model == 1 && |
680 | boot_cpu_data.x86_mask <= 7) { | 712 | boot_cpu_data.x86_mask <= 7) { |
681 | if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { | 713 | if (base & ((1 << (22 - PAGE_SHIFT)) - 1)) { |
682 | printk(KERN_WARNING "mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); | 714 | pr_warning("mtrr: base(0x%lx000) is not 4 MiB aligned\n", base); |
683 | return -EINVAL; | 715 | return -EINVAL; |
684 | } | 716 | } |
685 | if (!(base + size < 0x70000 || base > 0x7003F) && | 717 | if (!(base + size < 0x70000 || base > 0x7003F) && |
686 | (type == MTRR_TYPE_WRCOMB | 718 | (type == MTRR_TYPE_WRCOMB |
687 | || type == MTRR_TYPE_WRBACK)) { | 719 | || type == MTRR_TYPE_WRBACK)) { |
688 | printk(KERN_WARNING "mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n"); | 720 | pr_warning("mtrr: writable mtrr between 0x70000000 and 0x7003FFFF may hang the CPU.\n"); |
689 | return -EINVAL; | 721 | return -EINVAL; |
690 | } | 722 | } |
691 | } | 723 | } |
692 | 724 | ||
693 | /* Check upper bits of base and last are equal and lower bits are 0 | 725 | /* |
694 | for base and 1 for last */ | 726 | * Check upper bits of base and last are equal and lower bits are 0 |
727 | * for base and 1 for last | ||
728 | */ | ||
695 | last = base + size - 1; | 729 | last = base + size - 1; |
696 | for (lbase = base; !(lbase & 1) && (last & 1); | 730 | for (lbase = base; !(lbase & 1) && (last & 1); |
697 | lbase = lbase >> 1, last = last >> 1) ; | 731 | lbase = lbase >> 1, last = last >> 1) |
732 | ; | ||
698 | if (lbase != last) { | 733 | if (lbase != last) { |
699 | printk(KERN_WARNING "mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", | 734 | pr_warning("mtrr: base(0x%lx000) is not aligned on a size(0x%lx000) boundary\n", base, size); |
700 | base, size); | ||
701 | return -EINVAL; | 735 | return -EINVAL; |
702 | } | 736 | } |
703 | return 0; | 737 | return 0; |
704 | } | 738 | } |
705 | 739 | ||
706 | |||
707 | static int generic_have_wrcomb(void) | 740 | static int generic_have_wrcomb(void) |
708 | { | 741 | { |
709 | unsigned long config, dummy; | 742 | unsigned long config, dummy; |
710 | rdmsr(MSR_MTRRcap, config, dummy); | 743 | rdmsr(MSR_MTRRcap, config, dummy); |
711 | return (config & (1 << 10)); | 744 | return config & (1 << 10); |
712 | } | 745 | } |
713 | 746 | ||
714 | int positive_have_wrcomb(void) | 747 | int positive_have_wrcomb(void) |
@@ -716,14 +749,15 @@ int positive_have_wrcomb(void) | |||
716 | return 1; | 749 | return 1; |
717 | } | 750 | } |
718 | 751 | ||
719 | /* generic structure... | 752 | /* |
753 | * Generic structure... | ||
720 | */ | 754 | */ |
721 | struct mtrr_ops generic_mtrr_ops = { | 755 | struct mtrr_ops generic_mtrr_ops = { |
722 | .use_intel_if = 1, | 756 | .use_intel_if = 1, |
723 | .set_all = generic_set_all, | 757 | .set_all = generic_set_all, |
724 | .get = generic_get_mtrr, | 758 | .get = generic_get_mtrr, |
725 | .get_free_region = generic_get_free_region, | 759 | .get_free_region = generic_get_free_region, |
726 | .set = generic_set_mtrr, | 760 | .set = generic_set_mtrr, |
727 | .validate_add_page = generic_validate_add_page, | 761 | .validate_add_page = generic_validate_add_page, |
728 | .have_wrcomb = generic_have_wrcomb, | 762 | .have_wrcomb = generic_have_wrcomb, |
729 | }; | 763 | }; |
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index fb73a52913a4..08b6ea4c62b4 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c | |||
@@ -1,27 +1,28 @@ | |||
1 | #include <linux/init.h> | ||
2 | #include <linux/proc_fs.h> | ||
3 | #include <linux/capability.h> | 1 | #include <linux/capability.h> |
4 | #include <linux/ctype.h> | ||
5 | #include <linux/module.h> | ||
6 | #include <linux/seq_file.h> | 2 | #include <linux/seq_file.h> |
7 | #include <asm/uaccess.h> | 3 | #include <linux/uaccess.h> |
4 | #include <linux/proc_fs.h> | ||
5 | #include <linux/module.h> | ||
6 | #include <linux/ctype.h> | ||
7 | #include <linux/init.h> | ||
8 | 8 | ||
9 | #define LINE_SIZE 80 | 9 | #define LINE_SIZE 80 |
10 | 10 | ||
11 | #include <asm/mtrr.h> | 11 | #include <asm/mtrr.h> |
12 | |||
12 | #include "mtrr.h" | 13 | #include "mtrr.h" |
13 | 14 | ||
14 | #define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private) | 15 | #define FILE_FCOUNT(f) (((struct seq_file *)((f)->private_data))->private) |
15 | 16 | ||
16 | static const char *const mtrr_strings[MTRR_NUM_TYPES] = | 17 | static const char *const mtrr_strings[MTRR_NUM_TYPES] = |
17 | { | 18 | { |
18 | "uncachable", /* 0 */ | 19 | "uncachable", /* 0 */ |
19 | "write-combining", /* 1 */ | 20 | "write-combining", /* 1 */ |
20 | "?", /* 2 */ | 21 | "?", /* 2 */ |
21 | "?", /* 3 */ | 22 | "?", /* 3 */ |
22 | "write-through", /* 4 */ | 23 | "write-through", /* 4 */ |
23 | "write-protect", /* 5 */ | 24 | "write-protect", /* 5 */ |
24 | "write-back", /* 6 */ | 25 | "write-back", /* 6 */ |
25 | }; | 26 | }; |
26 | 27 | ||
27 | const char *mtrr_attrib_to_str(int x) | 28 | const char *mtrr_attrib_to_str(int x) |
@@ -35,8 +36,8 @@ static int | |||
35 | mtrr_file_add(unsigned long base, unsigned long size, | 36 | mtrr_file_add(unsigned long base, unsigned long size, |
36 | unsigned int type, bool increment, struct file *file, int page) | 37 | unsigned int type, bool increment, struct file *file, int page) |
37 | { | 38 | { |
39 | unsigned int *fcount = FILE_FCOUNT(file); | ||
38 | int reg, max; | 40 | int reg, max; |
39 | unsigned int *fcount = FILE_FCOUNT(file); | ||
40 | 41 | ||
41 | max = num_var_ranges; | 42 | max = num_var_ranges; |
42 | if (fcount == NULL) { | 43 | if (fcount == NULL) { |
@@ -61,8 +62,8 @@ static int | |||
61 | mtrr_file_del(unsigned long base, unsigned long size, | 62 | mtrr_file_del(unsigned long base, unsigned long size, |
62 | struct file *file, int page) | 63 | struct file *file, int page) |
63 | { | 64 | { |
64 | int reg; | ||
65 | unsigned int *fcount = FILE_FCOUNT(file); | 65 | unsigned int *fcount = FILE_FCOUNT(file); |
66 | int reg; | ||
66 | 67 | ||
67 | if (!page) { | 68 | if (!page) { |
68 | if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) | 69 | if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) |
@@ -81,13 +82,14 @@ mtrr_file_del(unsigned long base, unsigned long size, | |||
81 | return reg; | 82 | return reg; |
82 | } | 83 | } |
83 | 84 | ||
84 | /* RED-PEN: seq_file can seek now. this is ignored. */ | 85 | /* |
86 | * seq_file can seek but we ignore it. | ||
87 | * | ||
88 | * Format of control line: | ||
89 | * "base=%Lx size=%Lx type=%s" or "disable=%d" | ||
90 | */ | ||
85 | static ssize_t | 91 | static ssize_t |
86 | mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) | 92 | mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) |
87 | /* Format of control line: | ||
88 | "base=%Lx size=%Lx type=%s" OR: | ||
89 | "disable=%d" | ||
90 | */ | ||
91 | { | 93 | { |
92 | int i, err; | 94 | int i, err; |
93 | unsigned long reg; | 95 | unsigned long reg; |
@@ -100,15 +102,18 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) | |||
100 | return -EPERM; | 102 | return -EPERM; |
101 | if (!len) | 103 | if (!len) |
102 | return -EINVAL; | 104 | return -EINVAL; |
105 | |||
103 | memset(line, 0, LINE_SIZE); | 106 | memset(line, 0, LINE_SIZE); |
104 | if (len > LINE_SIZE) | 107 | if (len > LINE_SIZE) |
105 | len = LINE_SIZE; | 108 | len = LINE_SIZE; |
106 | if (copy_from_user(line, buf, len - 1)) | 109 | if (copy_from_user(line, buf, len - 1)) |
107 | return -EFAULT; | 110 | return -EFAULT; |
111 | |||
108 | linelen = strlen(line); | 112 | linelen = strlen(line); |
109 | ptr = line + linelen - 1; | 113 | ptr = line + linelen - 1; |
110 | if (linelen && *ptr == '\n') | 114 | if (linelen && *ptr == '\n') |
111 | *ptr = '\0'; | 115 | *ptr = '\0'; |
116 | |||
112 | if (!strncmp(line, "disable=", 8)) { | 117 | if (!strncmp(line, "disable=", 8)) { |
113 | reg = simple_strtoul(line + 8, &ptr, 0); | 118 | reg = simple_strtoul(line + 8, &ptr, 0); |
114 | err = mtrr_del_page(reg, 0, 0); | 119 | err = mtrr_del_page(reg, 0, 0); |
@@ -116,28 +121,35 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) | |||
116 | return err; | 121 | return err; |
117 | return len; | 122 | return len; |
118 | } | 123 | } |
124 | |||
119 | if (strncmp(line, "base=", 5)) | 125 | if (strncmp(line, "base=", 5)) |
120 | return -EINVAL; | 126 | return -EINVAL; |
127 | |||
121 | base = simple_strtoull(line + 5, &ptr, 0); | 128 | base = simple_strtoull(line + 5, &ptr, 0); |
122 | for (; isspace(*ptr); ++ptr) ; | 129 | for (; isspace(*ptr); ++ptr) |
130 | ; | ||
131 | |||
123 | if (strncmp(ptr, "size=", 5)) | 132 | if (strncmp(ptr, "size=", 5)) |
124 | return -EINVAL; | 133 | return -EINVAL; |
134 | |||
125 | size = simple_strtoull(ptr + 5, &ptr, 0); | 135 | size = simple_strtoull(ptr + 5, &ptr, 0); |
126 | if ((base & 0xfff) || (size & 0xfff)) | 136 | if ((base & 0xfff) || (size & 0xfff)) |
127 | return -EINVAL; | 137 | return -EINVAL; |
128 | for (; isspace(*ptr); ++ptr) ; | 138 | for (; isspace(*ptr); ++ptr) |
139 | ; | ||
140 | |||
129 | if (strncmp(ptr, "type=", 5)) | 141 | if (strncmp(ptr, "type=", 5)) |
130 | return -EINVAL; | 142 | return -EINVAL; |
131 | ptr += 5; | 143 | ptr += 5; |
132 | for (; isspace(*ptr); ++ptr) ; | 144 | for (; isspace(*ptr); ++ptr) |
145 | ; | ||
146 | |||
133 | for (i = 0; i < MTRR_NUM_TYPES; ++i) { | 147 | for (i = 0; i < MTRR_NUM_TYPES; ++i) { |
134 | if (strcmp(ptr, mtrr_strings[i])) | 148 | if (strcmp(ptr, mtrr_strings[i])) |
135 | continue; | 149 | continue; |
136 | base >>= PAGE_SHIFT; | 150 | base >>= PAGE_SHIFT; |
137 | size >>= PAGE_SHIFT; | 151 | size >>= PAGE_SHIFT; |
138 | err = | 152 | err = mtrr_add_page((unsigned long)base, (unsigned long)size, i, true); |
139 | mtrr_add_page((unsigned long) base, (unsigned long) size, i, | ||
140 | true); | ||
141 | if (err < 0) | 153 | if (err < 0) |
142 | return err; | 154 | return err; |
143 | return len; | 155 | return len; |
@@ -181,7 +193,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) | |||
181 | case MTRRIOC32_SET_PAGE_ENTRY: | 193 | case MTRRIOC32_SET_PAGE_ENTRY: |
182 | case MTRRIOC32_DEL_PAGE_ENTRY: | 194 | case MTRRIOC32_DEL_PAGE_ENTRY: |
183 | case MTRRIOC32_KILL_PAGE_ENTRY: { | 195 | case MTRRIOC32_KILL_PAGE_ENTRY: { |
184 | struct mtrr_sentry32 __user *s32 = (struct mtrr_sentry32 __user *)__arg; | 196 | struct mtrr_sentry32 __user *s32; |
197 | |||
198 | s32 = (struct mtrr_sentry32 __user *)__arg; | ||
185 | err = get_user(sentry.base, &s32->base); | 199 | err = get_user(sentry.base, &s32->base); |
186 | err |= get_user(sentry.size, &s32->size); | 200 | err |= get_user(sentry.size, &s32->size); |
187 | err |= get_user(sentry.type, &s32->type); | 201 | err |= get_user(sentry.type, &s32->type); |
@@ -191,7 +205,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) | |||
191 | } | 205 | } |
192 | case MTRRIOC32_GET_ENTRY: | 206 | case MTRRIOC32_GET_ENTRY: |
193 | case MTRRIOC32_GET_PAGE_ENTRY: { | 207 | case MTRRIOC32_GET_PAGE_ENTRY: { |
194 | struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg; | 208 | struct mtrr_gentry32 __user *g32; |
209 | |||
210 | g32 = (struct mtrr_gentry32 __user *)__arg; | ||
195 | err = get_user(gentry.regnum, &g32->regnum); | 211 | err = get_user(gentry.regnum, &g32->regnum); |
196 | err |= get_user(gentry.base, &g32->base); | 212 | err |= get_user(gentry.base, &g32->base); |
197 | err |= get_user(gentry.size, &g32->size); | 213 | err |= get_user(gentry.size, &g32->size); |
@@ -314,7 +330,7 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) | |||
314 | if (err) | 330 | if (err) |
315 | return err; | 331 | return err; |
316 | 332 | ||
317 | switch(cmd) { | 333 | switch (cmd) { |
318 | case MTRRIOC_GET_ENTRY: | 334 | case MTRRIOC_GET_ENTRY: |
319 | case MTRRIOC_GET_PAGE_ENTRY: | 335 | case MTRRIOC_GET_PAGE_ENTRY: |
320 | if (copy_to_user(arg, &gentry, sizeof gentry)) | 336 | if (copy_to_user(arg, &gentry, sizeof gentry)) |
@@ -323,7 +339,9 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) | |||
323 | #ifdef CONFIG_COMPAT | 339 | #ifdef CONFIG_COMPAT |
324 | case MTRRIOC32_GET_ENTRY: | 340 | case MTRRIOC32_GET_ENTRY: |
325 | case MTRRIOC32_GET_PAGE_ENTRY: { | 341 | case MTRRIOC32_GET_PAGE_ENTRY: { |
326 | struct mtrr_gentry32 __user *g32 = (struct mtrr_gentry32 __user *)__arg; | 342 | struct mtrr_gentry32 __user *g32; |
343 | |||
344 | g32 = (struct mtrr_gentry32 __user *)__arg; | ||
327 | err = put_user(gentry.base, &g32->base); | 345 | err = put_user(gentry.base, &g32->base); |
328 | err |= put_user(gentry.size, &g32->size); | 346 | err |= put_user(gentry.size, &g32->size); |
329 | err |= put_user(gentry.regnum, &g32->regnum); | 347 | err |= put_user(gentry.regnum, &g32->regnum); |
@@ -335,11 +353,10 @@ mtrr_ioctl(struct file *file, unsigned int cmd, unsigned long __arg) | |||
335 | return err; | 353 | return err; |
336 | } | 354 | } |
337 | 355 | ||
338 | static int | 356 | static int mtrr_close(struct inode *ino, struct file *file) |
339 | mtrr_close(struct inode *ino, struct file *file) | ||
340 | { | 357 | { |
341 | int i, max; | ||
342 | unsigned int *fcount = FILE_FCOUNT(file); | 358 | unsigned int *fcount = FILE_FCOUNT(file); |
359 | int i, max; | ||
343 | 360 | ||
344 | if (fcount != NULL) { | 361 | if (fcount != NULL) { |
345 | max = num_var_ranges; | 362 | max = num_var_ranges; |
@@ -359,22 +376,22 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset); | |||
359 | 376 | ||
360 | static int mtrr_open(struct inode *inode, struct file *file) | 377 | static int mtrr_open(struct inode *inode, struct file *file) |
361 | { | 378 | { |
362 | if (!mtrr_if) | 379 | if (!mtrr_if) |
363 | return -EIO; | 380 | return -EIO; |
364 | if (!mtrr_if->get) | 381 | if (!mtrr_if->get) |
365 | return -ENXIO; | 382 | return -ENXIO; |
366 | return single_open(file, mtrr_seq_show, NULL); | 383 | return single_open(file, mtrr_seq_show, NULL); |
367 | } | 384 | } |
368 | 385 | ||
369 | static const struct file_operations mtrr_fops = { | 386 | static const struct file_operations mtrr_fops = { |
370 | .owner = THIS_MODULE, | 387 | .owner = THIS_MODULE, |
371 | .open = mtrr_open, | 388 | .open = mtrr_open, |
372 | .read = seq_read, | 389 | .read = seq_read, |
373 | .llseek = seq_lseek, | 390 | .llseek = seq_lseek, |
374 | .write = mtrr_write, | 391 | .write = mtrr_write, |
375 | .unlocked_ioctl = mtrr_ioctl, | 392 | .unlocked_ioctl = mtrr_ioctl, |
376 | .compat_ioctl = mtrr_ioctl, | 393 | .compat_ioctl = mtrr_ioctl, |
377 | .release = mtrr_close, | 394 | .release = mtrr_close, |
378 | }; | 395 | }; |
379 | 396 | ||
380 | static int mtrr_seq_show(struct seq_file *seq, void *offset) | 397 | static int mtrr_seq_show(struct seq_file *seq, void *offset) |
@@ -388,23 +405,24 @@ static int mtrr_seq_show(struct seq_file *seq, void *offset) | |||
388 | max = num_var_ranges; | 405 | max = num_var_ranges; |
389 | for (i = 0; i < max; i++) { | 406 | for (i = 0; i < max; i++) { |
390 | mtrr_if->get(i, &base, &size, &type); | 407 | mtrr_if->get(i, &base, &size, &type); |
391 | if (size == 0) | 408 | if (size == 0) { |
392 | mtrr_usage_table[i] = 0; | 409 | mtrr_usage_table[i] = 0; |
393 | else { | 410 | continue; |
394 | if (size < (0x100000 >> PAGE_SHIFT)) { | ||
395 | /* less than 1MB */ | ||
396 | factor = 'K'; | ||
397 | size <<= PAGE_SHIFT - 10; | ||
398 | } else { | ||
399 | factor = 'M'; | ||
400 | size >>= 20 - PAGE_SHIFT; | ||
401 | } | ||
402 | /* RED-PEN: base can be > 32bit */ | ||
403 | len += seq_printf(seq, | ||
404 | "reg%02i: base=0x%06lx000 (%5luMB), size=%5lu%cB, count=%d: %s\n", | ||
405 | i, base, base >> (20 - PAGE_SHIFT), size, factor, | ||
406 | mtrr_usage_table[i], mtrr_attrib_to_str(type)); | ||
407 | } | 411 | } |
412 | if (size < (0x100000 >> PAGE_SHIFT)) { | ||
413 | /* less than 1MB */ | ||
414 | factor = 'K'; | ||
415 | size <<= PAGE_SHIFT - 10; | ||
416 | } else { | ||
417 | factor = 'M'; | ||
418 | size >>= 20 - PAGE_SHIFT; | ||
419 | } | ||
420 | /* Base can be > 32bit */ | ||
421 | len += seq_printf(seq, "reg%02i: base=0x%06lx000 " | ||
422 | "(%5luMB), size=%5lu%cB, count=%d: %s\n", | ||
423 | i, base, base >> (20 - PAGE_SHIFT), size, | ||
424 | factor, mtrr_usage_table[i], | ||
425 | mtrr_attrib_to_str(type)); | ||
408 | } | 426 | } |
409 | return 0; | 427 | return 0; |
410 | } | 428 | } |
@@ -422,6 +440,5 @@ static int __init mtrr_if_init(void) | |||
422 | proc_create("mtrr", S_IWUSR | S_IRUGO, NULL, &mtrr_fops); | 440 | proc_create("mtrr", S_IWUSR | S_IRUGO, NULL, &mtrr_fops); |
423 | return 0; | 441 | return 0; |
424 | } | 442 | } |
425 | |||
426 | arch_initcall(mtrr_if_init); | 443 | arch_initcall(mtrr_if_init); |
427 | #endif /* CONFIG_PROC_FS */ | 444 | #endif /* CONFIG_PROC_FS */ |
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 8fc248b5aeaf..84e83de54575 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c | |||
@@ -25,43 +25,49 @@ | |||
25 | Operating System Writer's Guide" (Intel document number 242692), | 25 | Operating System Writer's Guide" (Intel document number 242692), |
26 | section 11.11.7 | 26 | section 11.11.7 |
27 | 27 | ||
28 | This was cleaned and made readable by Patrick Mochel <mochel@osdl.org> | 28 | This was cleaned and made readable by Patrick Mochel <mochel@osdl.org> |
29 | on 6-7 March 2002. | 29 | on 6-7 March 2002. |
30 | Source: Intel Architecture Software Developers Manual, Volume 3: | 30 | Source: Intel Architecture Software Developers Manual, Volume 3: |
31 | System Programming Guide; Section 9.11. (1997 edition - PPro). | 31 | System Programming Guide; Section 9.11. (1997 edition - PPro). |
32 | */ | 32 | */ |
33 | 33 | ||
34 | #define DEBUG | ||
35 | |||
36 | #include <linux/types.h> /* FIXME: kvm_para.h needs this */ | ||
37 | |||
38 | #include <linux/kvm_para.h> | ||
39 | #include <linux/uaccess.h> | ||
34 | #include <linux/module.h> | 40 | #include <linux/module.h> |
41 | #include <linux/mutex.h> | ||
35 | #include <linux/init.h> | 42 | #include <linux/init.h> |
43 | #include <linux/sort.h> | ||
44 | #include <linux/cpu.h> | ||
36 | #include <linux/pci.h> | 45 | #include <linux/pci.h> |
37 | #include <linux/smp.h> | 46 | #include <linux/smp.h> |
38 | #include <linux/cpu.h> | ||
39 | #include <linux/mutex.h> | ||
40 | #include <linux/sort.h> | ||
41 | 47 | ||
48 | #include <asm/processor.h> | ||
42 | #include <asm/e820.h> | 49 | #include <asm/e820.h> |
43 | #include <asm/mtrr.h> | 50 | #include <asm/mtrr.h> |
44 | #include <asm/uaccess.h> | ||
45 | #include <asm/processor.h> | ||
46 | #include <asm/msr.h> | 51 | #include <asm/msr.h> |
47 | #include <asm/kvm_para.h> | 52 | |
48 | #include "mtrr.h" | 53 | #include "mtrr.h" |
49 | 54 | ||
50 | u32 num_var_ranges = 0; | 55 | u32 num_var_ranges; |
51 | 56 | ||
52 | unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; | 57 | unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; |
53 | static DEFINE_MUTEX(mtrr_mutex); | 58 | static DEFINE_MUTEX(mtrr_mutex); |
54 | 59 | ||
55 | u64 size_or_mask, size_and_mask; | 60 | u64 size_or_mask, size_and_mask; |
61 | static bool mtrr_aps_delayed_init; | ||
56 | 62 | ||
57 | static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {}; | 63 | static struct mtrr_ops *mtrr_ops[X86_VENDOR_NUM]; |
58 | 64 | ||
59 | struct mtrr_ops * mtrr_if = NULL; | 65 | struct mtrr_ops *mtrr_if; |
60 | 66 | ||
61 | static void set_mtrr(unsigned int reg, unsigned long base, | 67 | static void set_mtrr(unsigned int reg, unsigned long base, |
62 | unsigned long size, mtrr_type type); | 68 | unsigned long size, mtrr_type type); |
63 | 69 | ||
64 | void set_mtrr_ops(struct mtrr_ops * ops) | 70 | void set_mtrr_ops(struct mtrr_ops *ops) |
65 | { | 71 | { |
66 | if (ops->vendor && ops->vendor < X86_VENDOR_NUM) | 72 | if (ops->vendor && ops->vendor < X86_VENDOR_NUM) |
67 | mtrr_ops[ops->vendor] = ops; | 73 | mtrr_ops[ops->vendor] = ops; |
@@ -72,30 +78,36 @@ static int have_wrcomb(void) | |||
72 | { | 78 | { |
73 | struct pci_dev *dev; | 79 | struct pci_dev *dev; |
74 | u8 rev; | 80 | u8 rev; |
75 | 81 | ||
76 | if ((dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL)) != NULL) { | 82 | dev = pci_get_class(PCI_CLASS_BRIDGE_HOST << 8, NULL); |
77 | /* ServerWorks LE chipsets < rev 6 have problems with write-combining | 83 | if (dev != NULL) { |
78 | Don't allow it and leave room for other chipsets to be tagged */ | 84 | /* |
85 | * ServerWorks LE chipsets < rev 6 have problems with | ||
86 | * write-combining. Don't allow it and leave room for other | ||
87 | * chipsets to be tagged | ||
88 | */ | ||
79 | if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && | 89 | if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS && |
80 | dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) { | 90 | dev->device == PCI_DEVICE_ID_SERVERWORKS_LE) { |
81 | pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); | 91 | pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); |
82 | if (rev <= 5) { | 92 | if (rev <= 5) { |
83 | printk(KERN_INFO "mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n"); | 93 | pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n"); |
84 | pci_dev_put(dev); | 94 | pci_dev_put(dev); |
85 | return 0; | 95 | return 0; |
86 | } | 96 | } |
87 | } | 97 | } |
88 | /* Intel 450NX errata # 23. Non ascending cacheline evictions to | 98 | /* |
89 | write combining memory may resulting in data corruption */ | 99 | * Intel 450NX errata # 23. Non ascending cacheline evictions to |
100 | * write combining memory may resulting in data corruption | ||
101 | */ | ||
90 | if (dev->vendor == PCI_VENDOR_ID_INTEL && | 102 | if (dev->vendor == PCI_VENDOR_ID_INTEL && |
91 | dev->device == PCI_DEVICE_ID_INTEL_82451NX) { | 103 | dev->device == PCI_DEVICE_ID_INTEL_82451NX) { |
92 | printk(KERN_INFO "mtrr: Intel 450NX MMC detected. Write-combining disabled.\n"); | 104 | pr_info("mtrr: Intel 450NX MMC detected. Write-combining disabled.\n"); |
93 | pci_dev_put(dev); | 105 | pci_dev_put(dev); |
94 | return 0; | 106 | return 0; |
95 | } | 107 | } |
96 | pci_dev_put(dev); | 108 | pci_dev_put(dev); |
97 | } | 109 | } |
98 | return (mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0); | 110 | return mtrr_if->have_wrcomb ? mtrr_if->have_wrcomb() : 0; |
99 | } | 111 | } |
100 | 112 | ||
101 | /* This function returns the number of variable MTRRs */ | 113 | /* This function returns the number of variable MTRRs */ |
@@ -103,12 +115,13 @@ static void __init set_num_var_ranges(void) | |||
103 | { | 115 | { |
104 | unsigned long config = 0, dummy; | 116 | unsigned long config = 0, dummy; |
105 | 117 | ||
106 | if (use_intel()) { | 118 | if (use_intel()) |
107 | rdmsr(MSR_MTRRcap, config, dummy); | 119 | rdmsr(MSR_MTRRcap, config, dummy); |
108 | } else if (is_cpu(AMD)) | 120 | else if (is_cpu(AMD)) |
109 | config = 2; | 121 | config = 2; |
110 | else if (is_cpu(CYRIX) || is_cpu(CENTAUR)) | 122 | else if (is_cpu(CYRIX) || is_cpu(CENTAUR)) |
111 | config = 8; | 123 | config = 8; |
124 | |||
112 | num_var_ranges = config & 0xff; | 125 | num_var_ranges = config & 0xff; |
113 | } | 126 | } |
114 | 127 | ||
@@ -130,10 +143,12 @@ struct set_mtrr_data { | |||
130 | mtrr_type smp_type; | 143 | mtrr_type smp_type; |
131 | }; | 144 | }; |
132 | 145 | ||
146 | /** | ||
147 | * ipi_handler - Synchronisation handler. Executed by "other" CPUs. | ||
148 | * | ||
149 | * Returns nothing. | ||
150 | */ | ||
133 | static void ipi_handler(void *info) | 151 | static void ipi_handler(void *info) |
134 | /* [SUMMARY] Synchronisation handler. Executed by "other" CPUs. | ||
135 | [RETURNS] Nothing. | ||
136 | */ | ||
137 | { | 152 | { |
138 | #ifdef CONFIG_SMP | 153 | #ifdef CONFIG_SMP |
139 | struct set_mtrr_data *data = info; | 154 | struct set_mtrr_data *data = info; |
@@ -142,18 +157,22 @@ static void ipi_handler(void *info) | |||
142 | local_irq_save(flags); | 157 | local_irq_save(flags); |
143 | 158 | ||
144 | atomic_dec(&data->count); | 159 | atomic_dec(&data->count); |
145 | while(!atomic_read(&data->gate)) | 160 | while (!atomic_read(&data->gate)) |
146 | cpu_relax(); | 161 | cpu_relax(); |
147 | 162 | ||
148 | /* The master has cleared me to execute */ | 163 | /* The master has cleared me to execute */ |
149 | if (data->smp_reg != ~0U) | 164 | if (data->smp_reg != ~0U) { |
150 | mtrr_if->set(data->smp_reg, data->smp_base, | 165 | mtrr_if->set(data->smp_reg, data->smp_base, |
151 | data->smp_size, data->smp_type); | 166 | data->smp_size, data->smp_type); |
152 | else | 167 | } else if (mtrr_aps_delayed_init) { |
168 | /* | ||
169 | * Initialize the MTRRs inaddition to the synchronisation. | ||
170 | */ | ||
153 | mtrr_if->set_all(); | 171 | mtrr_if->set_all(); |
172 | } | ||
154 | 173 | ||
155 | atomic_dec(&data->count); | 174 | atomic_dec(&data->count); |
156 | while(atomic_read(&data->gate)) | 175 | while (atomic_read(&data->gate)) |
157 | cpu_relax(); | 176 | cpu_relax(); |
158 | 177 | ||
159 | atomic_dec(&data->count); | 178 | atomic_dec(&data->count); |
@@ -161,7 +180,8 @@ static void ipi_handler(void *info) | |||
161 | #endif | 180 | #endif |
162 | } | 181 | } |
163 | 182 | ||
164 | static inline int types_compatible(mtrr_type type1, mtrr_type type2) { | 183 | static inline int types_compatible(mtrr_type type1, mtrr_type type2) |
184 | { | ||
165 | return type1 == MTRR_TYPE_UNCACHABLE || | 185 | return type1 == MTRR_TYPE_UNCACHABLE || |
166 | type2 == MTRR_TYPE_UNCACHABLE || | 186 | type2 == MTRR_TYPE_UNCACHABLE || |
167 | (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) || | 187 | (type1 == MTRR_TYPE_WRTHROUGH && type2 == MTRR_TYPE_WRBACK) || |
@@ -176,10 +196,10 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) { | |||
176 | * @type: mtrr type | 196 | * @type: mtrr type |
177 | * | 197 | * |
178 | * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: | 198 | * This is kinda tricky, but fortunately, Intel spelled it out for us cleanly: |
179 | * | 199 | * |
180 | * 1. Send IPI to do the following: | 200 | * 1. Send IPI to do the following: |
181 | * 2. Disable Interrupts | 201 | * 2. Disable Interrupts |
182 | * 3. Wait for all procs to do so | 202 | * 3. Wait for all procs to do so |
183 | * 4. Enter no-fill cache mode | 203 | * 4. Enter no-fill cache mode |
184 | * 5. Flush caches | 204 | * 5. Flush caches |
185 | * 6. Clear PGE bit | 205 | * 6. Clear PGE bit |
@@ -189,26 +209,27 @@ static inline int types_compatible(mtrr_type type1, mtrr_type type2) { | |||
189 | * 10. Enable all range registers | 209 | * 10. Enable all range registers |
190 | * 11. Flush all TLBs and caches again | 210 | * 11. Flush all TLBs and caches again |
191 | * 12. Enter normal cache mode and reenable caching | 211 | * 12. Enter normal cache mode and reenable caching |
192 | * 13. Set PGE | 212 | * 13. Set PGE |
193 | * 14. Wait for buddies to catch up | 213 | * 14. Wait for buddies to catch up |
194 | * 15. Enable interrupts. | 214 | * 15. Enable interrupts. |
195 | * | 215 | * |
196 | * What does that mean for us? Well, first we set data.count to the number | 216 | * What does that mean for us? Well, first we set data.count to the number |
197 | * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait | 217 | * of CPUs. As each CPU disables interrupts, it'll decrement it once. We wait |
198 | * until it hits 0 and proceed. We set the data.gate flag and reset data.count. | 218 | * until it hits 0 and proceed. We set the data.gate flag and reset data.count. |
199 | * Meanwhile, they are waiting for that flag to be set. Once it's set, each | 219 | * Meanwhile, they are waiting for that flag to be set. Once it's set, each |
200 | * CPU goes through the transition of updating MTRRs. The CPU vendors may each do it | 220 | * CPU goes through the transition of updating MTRRs. |
201 | * differently, so we call mtrr_if->set() callback and let them take care of it. | 221 | * The CPU vendors may each do it differently, |
202 | * When they're done, they again decrement data->count and wait for data.gate to | 222 | * so we call mtrr_if->set() callback and let them take care of it. |
203 | * be reset. | 223 | * When they're done, they again decrement data->count and wait for data.gate |
204 | * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag. | 224 | * to be reset. |
225 | * When we finish, we wait for data.count to hit 0 and toggle the data.gate flag | ||
205 | * Everyone then enables interrupts and we all continue on. | 226 | * Everyone then enables interrupts and we all continue on. |
206 | * | 227 | * |
207 | * Note that the mechanism is the same for UP systems, too; all the SMP stuff | 228 | * Note that the mechanism is the same for UP systems, too; all the SMP stuff |
208 | * becomes nops. | 229 | * becomes nops. |
209 | */ | 230 | */ |
210 | static void set_mtrr(unsigned int reg, unsigned long base, | 231 | static void |
211 | unsigned long size, mtrr_type type) | 232 | set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type) |
212 | { | 233 | { |
213 | struct set_mtrr_data data; | 234 | struct set_mtrr_data data; |
214 | unsigned long flags; | 235 | unsigned long flags; |
@@ -218,121 +239,124 @@ static void set_mtrr(unsigned int reg, unsigned long base, | |||
218 | data.smp_size = size; | 239 | data.smp_size = size; |
219 | data.smp_type = type; | 240 | data.smp_type = type; |
220 | atomic_set(&data.count, num_booting_cpus() - 1); | 241 | atomic_set(&data.count, num_booting_cpus() - 1); |
221 | /* make sure data.count is visible before unleashing other CPUs */ | 242 | |
243 | /* Make sure data.count is visible before unleashing other CPUs */ | ||
222 | smp_wmb(); | 244 | smp_wmb(); |
223 | atomic_set(&data.gate,0); | 245 | atomic_set(&data.gate, 0); |
224 | 246 | ||
225 | /* Start the ball rolling on other CPUs */ | 247 | /* Start the ball rolling on other CPUs */ |
226 | if (smp_call_function(ipi_handler, &data, 0) != 0) | 248 | if (smp_call_function(ipi_handler, &data, 0) != 0) |
227 | panic("mtrr: timed out waiting for other CPUs\n"); | 249 | panic("mtrr: timed out waiting for other CPUs\n"); |
228 | 250 | ||
229 | local_irq_save(flags); | 251 | local_irq_save(flags); |
230 | 252 | ||
231 | while(atomic_read(&data.count)) | 253 | while (atomic_read(&data.count)) |
232 | cpu_relax(); | 254 | cpu_relax(); |
233 | 255 | ||
234 | /* ok, reset count and toggle gate */ | 256 | /* Ok, reset count and toggle gate */ |
235 | atomic_set(&data.count, num_booting_cpus() - 1); | 257 | atomic_set(&data.count, num_booting_cpus() - 1); |
236 | smp_wmb(); | 258 | smp_wmb(); |
237 | atomic_set(&data.gate,1); | 259 | atomic_set(&data.gate, 1); |
238 | 260 | ||
239 | /* do our MTRR business */ | 261 | /* Do our MTRR business */ |
240 | 262 | ||
241 | /* HACK! | 263 | /* |
264 | * HACK! | ||
242 | * We use this same function to initialize the mtrrs on boot. | 265 | * We use this same function to initialize the mtrrs on boot. |
243 | * The state of the boot cpu's mtrrs has been saved, and we want | 266 | * The state of the boot cpu's mtrrs has been saved, and we want |
244 | * to replicate across all the APs. | 267 | * to replicate across all the APs. |
245 | * If we're doing that @reg is set to something special... | 268 | * If we're doing that @reg is set to something special... |
246 | */ | 269 | */ |
247 | if (reg != ~0U) | 270 | if (reg != ~0U) |
248 | mtrr_if->set(reg,base,size,type); | 271 | mtrr_if->set(reg, base, size, type); |
272 | else if (!mtrr_aps_delayed_init) | ||
273 | mtrr_if->set_all(); | ||
249 | 274 | ||
250 | /* wait for the others */ | 275 | /* Wait for the others */ |
251 | while(atomic_read(&data.count)) | 276 | while (atomic_read(&data.count)) |
252 | cpu_relax(); | 277 | cpu_relax(); |
253 | 278 | ||
254 | atomic_set(&data.count, num_booting_cpus() - 1); | 279 | atomic_set(&data.count, num_booting_cpus() - 1); |
255 | smp_wmb(); | 280 | smp_wmb(); |
256 | atomic_set(&data.gate,0); | 281 | atomic_set(&data.gate, 0); |
257 | 282 | ||
258 | /* | 283 | /* |
259 | * Wait here for everyone to have seen the gate change | 284 | * Wait here for everyone to have seen the gate change |
260 | * So we're the last ones to touch 'data' | 285 | * So we're the last ones to touch 'data' |
261 | */ | 286 | */ |
262 | while(atomic_read(&data.count)) | 287 | while (atomic_read(&data.count)) |
263 | cpu_relax(); | 288 | cpu_relax(); |
264 | 289 | ||
265 | local_irq_restore(flags); | 290 | local_irq_restore(flags); |
266 | } | 291 | } |
267 | 292 | ||
268 | /** | 293 | /** |
269 | * mtrr_add_page - Add a memory type region | 294 | * mtrr_add_page - Add a memory type region |
270 | * @base: Physical base address of region in pages (in units of 4 kB!) | 295 | * @base: Physical base address of region in pages (in units of 4 kB!) |
271 | * @size: Physical size of region in pages (4 kB) | 296 | * @size: Physical size of region in pages (4 kB) |
272 | * @type: Type of MTRR desired | 297 | * @type: Type of MTRR desired |
273 | * @increment: If this is true do usage counting on the region | 298 | * @increment: If this is true do usage counting on the region |
274 | * | 299 | * |
275 | * Memory type region registers control the caching on newer Intel and | 300 | * Memory type region registers control the caching on newer Intel and |
276 | * non Intel processors. This function allows drivers to request an | 301 | * non Intel processors. This function allows drivers to request an |
277 | * MTRR is added. The details and hardware specifics of each processor's | 302 | * MTRR is added. The details and hardware specifics of each processor's |
278 | * implementation are hidden from the caller, but nevertheless the | 303 | * implementation are hidden from the caller, but nevertheless the |
279 | * caller should expect to need to provide a power of two size on an | 304 | * caller should expect to need to provide a power of two size on an |
280 | * equivalent power of two boundary. | 305 | * equivalent power of two boundary. |
281 | * | 306 | * |
282 | * If the region cannot be added either because all regions are in use | 307 | * If the region cannot be added either because all regions are in use |
283 | * or the CPU cannot support it a negative value is returned. On success | 308 | * or the CPU cannot support it a negative value is returned. On success |
284 | * the register number for this entry is returned, but should be treated | 309 | * the register number for this entry is returned, but should be treated |
285 | * as a cookie only. | 310 | * as a cookie only. |
286 | * | 311 | * |
287 | * On a multiprocessor machine the changes are made to all processors. | 312 | * On a multiprocessor machine the changes are made to all processors. |
288 | * This is required on x86 by the Intel processors. | 313 | * This is required on x86 by the Intel processors. |
289 | * | 314 | * |
290 | * The available types are | 315 | * The available types are |
291 | * | 316 | * |
292 | * %MTRR_TYPE_UNCACHABLE - No caching | 317 | * %MTRR_TYPE_UNCACHABLE - No caching |
293 | * | 318 | * |
294 | * %MTRR_TYPE_WRBACK - Write data back in bursts whenever | 319 | * %MTRR_TYPE_WRBACK - Write data back in bursts whenever |
295 | * | 320 | * |
296 | * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts | 321 | * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts |
297 | * | 322 | * |
298 | * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes | 323 | * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes |
299 | * | 324 | * |
300 | * BUGS: Needs a quiet flag for the cases where drivers do not mind | 325 | * BUGS: Needs a quiet flag for the cases where drivers do not mind |
301 | * failures and do not wish system log messages to be sent. | 326 | * failures and do not wish system log messages to be sent. |
302 | */ | 327 | */ |
303 | 328 | int mtrr_add_page(unsigned long base, unsigned long size, | |
304 | int mtrr_add_page(unsigned long base, unsigned long size, | ||
305 | unsigned int type, bool increment) | 329 | unsigned int type, bool increment) |
306 | { | 330 | { |
331 | unsigned long lbase, lsize; | ||
307 | int i, replace, error; | 332 | int i, replace, error; |
308 | mtrr_type ltype; | 333 | mtrr_type ltype; |
309 | unsigned long lbase, lsize; | ||
310 | 334 | ||
311 | if (!mtrr_if) | 335 | if (!mtrr_if) |
312 | return -ENXIO; | 336 | return -ENXIO; |
313 | 337 | ||
314 | if ((error = mtrr_if->validate_add_page(base,size,type))) | 338 | error = mtrr_if->validate_add_page(base, size, type); |
339 | if (error) | ||
315 | return error; | 340 | return error; |
316 | 341 | ||
317 | if (type >= MTRR_NUM_TYPES) { | 342 | if (type >= MTRR_NUM_TYPES) { |
318 | printk(KERN_WARNING "mtrr: type: %u invalid\n", type); | 343 | pr_warning("mtrr: type: %u invalid\n", type); |
319 | return -EINVAL; | 344 | return -EINVAL; |
320 | } | 345 | } |
321 | 346 | ||
322 | /* If the type is WC, check that this processor supports it */ | 347 | /* If the type is WC, check that this processor supports it */ |
323 | if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) { | 348 | if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) { |
324 | printk(KERN_WARNING | 349 | pr_warning("mtrr: your processor doesn't support write-combining\n"); |
325 | "mtrr: your processor doesn't support write-combining\n"); | ||
326 | return -ENOSYS; | 350 | return -ENOSYS; |
327 | } | 351 | } |
328 | 352 | ||
329 | if (!size) { | 353 | if (!size) { |
330 | printk(KERN_WARNING "mtrr: zero sized request\n"); | 354 | pr_warning("mtrr: zero sized request\n"); |
331 | return -EINVAL; | 355 | return -EINVAL; |
332 | } | 356 | } |
333 | 357 | ||
334 | if (base & size_or_mask || size & size_or_mask) { | 358 | if (base & size_or_mask || size & size_or_mask) { |
335 | printk(KERN_WARNING "mtrr: base or size exceeds the MTRR width\n"); | 359 | pr_warning("mtrr: base or size exceeds the MTRR width\n"); |
336 | return -EINVAL; | 360 | return -EINVAL; |
337 | } | 361 | } |
338 | 362 | ||
@@ -341,36 +365,40 @@ int mtrr_add_page(unsigned long base, unsigned long size, | |||
341 | 365 | ||
342 | /* No CPU hotplug when we change MTRR entries */ | 366 | /* No CPU hotplug when we change MTRR entries */ |
343 | get_online_cpus(); | 367 | get_online_cpus(); |
344 | /* Search for existing MTRR */ | 368 | |
369 | /* Search for existing MTRR */ | ||
345 | mutex_lock(&mtrr_mutex); | 370 | mutex_lock(&mtrr_mutex); |
346 | for (i = 0; i < num_var_ranges; ++i) { | 371 | for (i = 0; i < num_var_ranges; ++i) { |
347 | mtrr_if->get(i, &lbase, &lsize, <ype); | 372 | mtrr_if->get(i, &lbase, &lsize, <ype); |
348 | if (!lsize || base > lbase + lsize - 1 || base + size - 1 < lbase) | 373 | if (!lsize || base > lbase + lsize - 1 || |
374 | base + size - 1 < lbase) | ||
349 | continue; | 375 | continue; |
350 | /* At this point we know there is some kind of overlap/enclosure */ | 376 | /* |
377 | * At this point we know there is some kind of | ||
378 | * overlap/enclosure | ||
379 | */ | ||
351 | if (base < lbase || base + size - 1 > lbase + lsize - 1) { | 380 | if (base < lbase || base + size - 1 > lbase + lsize - 1) { |
352 | if (base <= lbase && base + size - 1 >= lbase + lsize - 1) { | 381 | if (base <= lbase && |
382 | base + size - 1 >= lbase + lsize - 1) { | ||
353 | /* New region encloses an existing region */ | 383 | /* New region encloses an existing region */ |
354 | if (type == ltype) { | 384 | if (type == ltype) { |
355 | replace = replace == -1 ? i : -2; | 385 | replace = replace == -1 ? i : -2; |
356 | continue; | 386 | continue; |
357 | } | 387 | } else if (types_compatible(type, ltype)) |
358 | else if (types_compatible(type, ltype)) | ||
359 | continue; | 388 | continue; |
360 | } | 389 | } |
361 | printk(KERN_WARNING | 390 | pr_warning("mtrr: 0x%lx000,0x%lx000 overlaps existing" |
362 | "mtrr: 0x%lx000,0x%lx000 overlaps existing" | 391 | " 0x%lx000,0x%lx000\n", base, size, lbase, |
363 | " 0x%lx000,0x%lx000\n", base, size, lbase, | 392 | lsize); |
364 | lsize); | ||
365 | goto out; | 393 | goto out; |
366 | } | 394 | } |
367 | /* New region is enclosed by an existing region */ | 395 | /* New region is enclosed by an existing region */ |
368 | if (ltype != type) { | 396 | if (ltype != type) { |
369 | if (types_compatible(type, ltype)) | 397 | if (types_compatible(type, ltype)) |
370 | continue; | 398 | continue; |
371 | printk (KERN_WARNING "mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n", | 399 | pr_warning("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n", |
372 | base, size, mtrr_attrib_to_str(ltype), | 400 | base, size, mtrr_attrib_to_str(ltype), |
373 | mtrr_attrib_to_str(type)); | 401 | mtrr_attrib_to_str(type)); |
374 | goto out; | 402 | goto out; |
375 | } | 403 | } |
376 | if (increment) | 404 | if (increment) |
@@ -378,7 +406,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, | |||
378 | error = i; | 406 | error = i; |
379 | goto out; | 407 | goto out; |
380 | } | 408 | } |
381 | /* Search for an empty MTRR */ | 409 | /* Search for an empty MTRR */ |
382 | i = mtrr_if->get_free_region(base, size, replace); | 410 | i = mtrr_if->get_free_region(base, size, replace); |
383 | if (i >= 0) { | 411 | if (i >= 0) { |
384 | set_mtrr(i, base, size, type); | 412 | set_mtrr(i, base, size, type); |
@@ -393,8 +421,9 @@ int mtrr_add_page(unsigned long base, unsigned long size, | |||
393 | mtrr_usage_table[replace] = 0; | 421 | mtrr_usage_table[replace] = 0; |
394 | } | 422 | } |
395 | } | 423 | } |
396 | } else | 424 | } else { |
397 | printk(KERN_INFO "mtrr: no more MTRRs available\n"); | 425 | pr_info("mtrr: no more MTRRs available\n"); |
426 | } | ||
398 | error = i; | 427 | error = i; |
399 | out: | 428 | out: |
400 | mutex_unlock(&mtrr_mutex); | 429 | mutex_unlock(&mtrr_mutex); |
@@ -405,10 +434,8 @@ int mtrr_add_page(unsigned long base, unsigned long size, | |||
405 | static int mtrr_check(unsigned long base, unsigned long size) | 434 | static int mtrr_check(unsigned long base, unsigned long size) |
406 | { | 435 | { |
407 | if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { | 436 | if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { |
408 | printk(KERN_WARNING | 437 | pr_warning("mtrr: size and base must be multiples of 4 kiB\n"); |
409 | "mtrr: size and base must be multiples of 4 kiB\n"); | 438 | pr_debug("mtrr: size: 0x%lx base: 0x%lx\n", size, base); |
410 | printk(KERN_DEBUG | ||
411 | "mtrr: size: 0x%lx base: 0x%lx\n", size, base); | ||
412 | dump_stack(); | 439 | dump_stack(); |
413 | return -1; | 440 | return -1; |
414 | } | 441 | } |
@@ -416,66 +443,64 @@ static int mtrr_check(unsigned long base, unsigned long size) | |||
416 | } | 443 | } |
417 | 444 | ||
418 | /** | 445 | /** |
419 | * mtrr_add - Add a memory type region | 446 | * mtrr_add - Add a memory type region |
420 | * @base: Physical base address of region | 447 | * @base: Physical base address of region |
421 | * @size: Physical size of region | 448 | * @size: Physical size of region |
422 | * @type: Type of MTRR desired | 449 | * @type: Type of MTRR desired |
423 | * @increment: If this is true do usage counting on the region | 450 | * @increment: If this is true do usage counting on the region |
424 | * | 451 | * |
425 | * Memory type region registers control the caching on newer Intel and | 452 | * Memory type region registers control the caching on newer Intel and |
426 | * non Intel processors. This function allows drivers to request an | 453 | * non Intel processors. This function allows drivers to request an |
427 | * MTRR is added. The details and hardware specifics of each processor's | 454 | * MTRR is added. The details and hardware specifics of each processor's |
428 | * implementation are hidden from the caller, but nevertheless the | 455 | * implementation are hidden from the caller, but nevertheless the |
429 | * caller should expect to need to provide a power of two size on an | 456 | * caller should expect to need to provide a power of two size on an |
430 | * equivalent power of two boundary. | 457 | * equivalent power of two boundary. |
431 | * | 458 | * |
432 | * If the region cannot be added either because all regions are in use | 459 | * If the region cannot be added either because all regions are in use |
433 | * or the CPU cannot support it a negative value is returned. On success | 460 | * or the CPU cannot support it a negative value is returned. On success |
434 | * the register number for this entry is returned, but should be treated | 461 | * the register number for this entry is returned, but should be treated |
435 | * as a cookie only. | 462 | * as a cookie only. |
436 | * | 463 | * |
437 | * On a multiprocessor machine the changes are made to all processors. | 464 | * On a multiprocessor machine the changes are made to all processors. |
438 | * This is required on x86 by the Intel processors. | 465 | * This is required on x86 by the Intel processors. |
439 | * | 466 | * |
440 | * The available types are | 467 | * The available types are |
441 | * | 468 | * |
442 | * %MTRR_TYPE_UNCACHABLE - No caching | 469 | * %MTRR_TYPE_UNCACHABLE - No caching |
443 | * | 470 | * |
444 | * %MTRR_TYPE_WRBACK - Write data back in bursts whenever | 471 | * %MTRR_TYPE_WRBACK - Write data back in bursts whenever |
445 | * | 472 | * |
446 | * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts | 473 | * %MTRR_TYPE_WRCOMB - Write data back soon but allow bursts |
447 | * | 474 | * |
448 | * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes | 475 | * %MTRR_TYPE_WRTHROUGH - Cache reads but not writes |
449 | * | 476 | * |
450 | * BUGS: Needs a quiet flag for the cases where drivers do not mind | 477 | * BUGS: Needs a quiet flag for the cases where drivers do not mind |
451 | * failures and do not wish system log messages to be sent. | 478 | * failures and do not wish system log messages to be sent. |
452 | */ | 479 | */ |
453 | 480 | int mtrr_add(unsigned long base, unsigned long size, unsigned int type, | |
454 | int | 481 | bool increment) |
455 | mtrr_add(unsigned long base, unsigned long size, unsigned int type, | ||
456 | bool increment) | ||
457 | { | 482 | { |
458 | if (mtrr_check(base, size)) | 483 | if (mtrr_check(base, size)) |
459 | return -EINVAL; | 484 | return -EINVAL; |
460 | return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, | 485 | return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, |
461 | increment); | 486 | increment); |
462 | } | 487 | } |
488 | EXPORT_SYMBOL(mtrr_add); | ||
463 | 489 | ||
464 | /** | 490 | /** |
465 | * mtrr_del_page - delete a memory type region | 491 | * mtrr_del_page - delete a memory type region |
466 | * @reg: Register returned by mtrr_add | 492 | * @reg: Register returned by mtrr_add |
467 | * @base: Physical base address | 493 | * @base: Physical base address |
468 | * @size: Size of region | 494 | * @size: Size of region |
469 | * | 495 | * |
470 | * If register is supplied then base and size are ignored. This is | 496 | * If register is supplied then base and size are ignored. This is |
471 | * how drivers should call it. | 497 | * how drivers should call it. |
472 | * | 498 | * |
473 | * Releases an MTRR region. If the usage count drops to zero the | 499 | * Releases an MTRR region. If the usage count drops to zero the |
474 | * register is freed and the region returns to default state. | 500 | * register is freed and the region returns to default state. |
475 | * On success the register is returned, on failure a negative error | 501 | * On success the register is returned, on failure a negative error |
476 | * code. | 502 | * code. |
477 | */ | 503 | */ |
478 | |||
479 | int mtrr_del_page(int reg, unsigned long base, unsigned long size) | 504 | int mtrr_del_page(int reg, unsigned long base, unsigned long size) |
480 | { | 505 | { |
481 | int i, max; | 506 | int i, max; |
@@ -500,22 +525,22 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) | |||
500 | } | 525 | } |
501 | } | 526 | } |
502 | if (reg < 0) { | 527 | if (reg < 0) { |
503 | printk(KERN_DEBUG "mtrr: no MTRR for %lx000,%lx000 found\n", base, | 528 | pr_debug("mtrr: no MTRR for %lx000,%lx000 found\n", |
504 | size); | 529 | base, size); |
505 | goto out; | 530 | goto out; |
506 | } | 531 | } |
507 | } | 532 | } |
508 | if (reg >= max) { | 533 | if (reg >= max) { |
509 | printk(KERN_WARNING "mtrr: register: %d too big\n", reg); | 534 | pr_warning("mtrr: register: %d too big\n", reg); |
510 | goto out; | 535 | goto out; |
511 | } | 536 | } |
512 | mtrr_if->get(reg, &lbase, &lsize, <ype); | 537 | mtrr_if->get(reg, &lbase, &lsize, <ype); |
513 | if (lsize < 1) { | 538 | if (lsize < 1) { |
514 | printk(KERN_WARNING "mtrr: MTRR %d not used\n", reg); | 539 | pr_warning("mtrr: MTRR %d not used\n", reg); |
515 | goto out; | 540 | goto out; |
516 | } | 541 | } |
517 | if (mtrr_usage_table[reg] < 1) { | 542 | if (mtrr_usage_table[reg] < 1) { |
518 | printk(KERN_WARNING "mtrr: reg: %d has count=0\n", reg); | 543 | pr_warning("mtrr: reg: %d has count=0\n", reg); |
519 | goto out; | 544 | goto out; |
520 | } | 545 | } |
521 | if (--mtrr_usage_table[reg] < 1) | 546 | if (--mtrr_usage_table[reg] < 1) |
@@ -526,33 +551,31 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) | |||
526 | put_online_cpus(); | 551 | put_online_cpus(); |
527 | return error; | 552 | return error; |
528 | } | 553 | } |
554 | |||
529 | /** | 555 | /** |
530 | * mtrr_del - delete a memory type region | 556 | * mtrr_del - delete a memory type region |
531 | * @reg: Register returned by mtrr_add | 557 | * @reg: Register returned by mtrr_add |
532 | * @base: Physical base address | 558 | * @base: Physical base address |
533 | * @size: Size of region | 559 | * @size: Size of region |
534 | * | 560 | * |
535 | * If register is supplied then base and size are ignored. This is | 561 | * If register is supplied then base and size are ignored. This is |
536 | * how drivers should call it. | 562 | * how drivers should call it. |
537 | * | 563 | * |
538 | * Releases an MTRR region. If the usage count drops to zero the | 564 | * Releases an MTRR region. If the usage count drops to zero the |
539 | * register is freed and the region returns to default state. | 565 | * register is freed and the region returns to default state. |
540 | * On success the register is returned, on failure a negative error | 566 | * On success the register is returned, on failure a negative error |
541 | * code. | 567 | * code. |
542 | */ | 568 | */ |
543 | 569 | int mtrr_del(int reg, unsigned long base, unsigned long size) | |
544 | int | ||
545 | mtrr_del(int reg, unsigned long base, unsigned long size) | ||
546 | { | 570 | { |
547 | if (mtrr_check(base, size)) | 571 | if (mtrr_check(base, size)) |
548 | return -EINVAL; | 572 | return -EINVAL; |
549 | return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); | 573 | return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); |
550 | } | 574 | } |
551 | |||
552 | EXPORT_SYMBOL(mtrr_add); | ||
553 | EXPORT_SYMBOL(mtrr_del); | 575 | EXPORT_SYMBOL(mtrr_del); |
554 | 576 | ||
555 | /* HACK ALERT! | 577 | /* |
578 | * HACK ALERT! | ||
556 | * These should be called implicitly, but we can't yet until all the initcall | 579 | * These should be called implicitly, but we can't yet until all the initcall |
557 | * stuff is done... | 580 | * stuff is done... |
558 | */ | 581 | */ |
@@ -576,29 +599,28 @@ struct mtrr_value { | |||
576 | 599 | ||
577 | static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES]; | 600 | static struct mtrr_value mtrr_value[MTRR_MAX_VAR_RANGES]; |
578 | 601 | ||
579 | static int mtrr_save(struct sys_device * sysdev, pm_message_t state) | 602 | static int mtrr_save(struct sys_device *sysdev, pm_message_t state) |
580 | { | 603 | { |
581 | int i; | 604 | int i; |
582 | 605 | ||
583 | for (i = 0; i < num_var_ranges; i++) { | 606 | for (i = 0; i < num_var_ranges; i++) { |
584 | mtrr_if->get(i, | 607 | mtrr_if->get(i, &mtrr_value[i].lbase, |
585 | &mtrr_value[i].lbase, | 608 | &mtrr_value[i].lsize, |
586 | &mtrr_value[i].lsize, | 609 | &mtrr_value[i].ltype); |
587 | &mtrr_value[i].ltype); | ||
588 | } | 610 | } |
589 | return 0; | 611 | return 0; |
590 | } | 612 | } |
591 | 613 | ||
592 | static int mtrr_restore(struct sys_device * sysdev) | 614 | static int mtrr_restore(struct sys_device *sysdev) |
593 | { | 615 | { |
594 | int i; | 616 | int i; |
595 | 617 | ||
596 | for (i = 0; i < num_var_ranges; i++) { | 618 | for (i = 0; i < num_var_ranges; i++) { |
597 | if (mtrr_value[i].lsize) | 619 | if (mtrr_value[i].lsize) { |
598 | set_mtrr(i, | 620 | set_mtrr(i, mtrr_value[i].lbase, |
599 | mtrr_value[i].lbase, | 621 | mtrr_value[i].lsize, |
600 | mtrr_value[i].lsize, | 622 | mtrr_value[i].ltype); |
601 | mtrr_value[i].ltype); | 623 | } |
602 | } | 624 | } |
603 | return 0; | 625 | return 0; |
604 | } | 626 | } |
@@ -615,26 +637,29 @@ int __initdata changed_by_mtrr_cleanup; | |||
615 | /** | 637 | /** |
616 | * mtrr_bp_init - initialize mtrrs on the boot CPU | 638 | * mtrr_bp_init - initialize mtrrs on the boot CPU |
617 | * | 639 | * |
618 | * This needs to be called early; before any of the other CPUs are | 640 | * This needs to be called early; before any of the other CPUs are |
619 | * initialized (i.e. before smp_init()). | 641 | * initialized (i.e. before smp_init()). |
620 | * | 642 | * |
621 | */ | 643 | */ |
622 | void __init mtrr_bp_init(void) | 644 | void __init mtrr_bp_init(void) |
623 | { | 645 | { |
624 | u32 phys_addr; | 646 | u32 phys_addr; |
647 | |||
625 | init_ifs(); | 648 | init_ifs(); |
626 | 649 | ||
627 | phys_addr = 32; | 650 | phys_addr = 32; |
628 | 651 | ||
629 | if (cpu_has_mtrr) { | 652 | if (cpu_has_mtrr) { |
630 | mtrr_if = &generic_mtrr_ops; | 653 | mtrr_if = &generic_mtrr_ops; |
631 | size_or_mask = 0xff000000; /* 36 bits */ | 654 | size_or_mask = 0xff000000; /* 36 bits */ |
632 | size_and_mask = 0x00f00000; | 655 | size_and_mask = 0x00f00000; |
633 | phys_addr = 36; | 656 | phys_addr = 36; |
634 | 657 | ||
635 | /* This is an AMD specific MSR, but we assume(hope?) that | 658 | /* |
636 | Intel will implement it to when they extend the address | 659 | * This is an AMD specific MSR, but we assume(hope?) that |
637 | bus of the Xeon. */ | 660 | * Intel will implement it to when they extend the address |
661 | * bus of the Xeon. | ||
662 | */ | ||
638 | if (cpuid_eax(0x80000000) >= 0x80000008) { | 663 | if (cpuid_eax(0x80000000) >= 0x80000008) { |
639 | phys_addr = cpuid_eax(0x80000008) & 0xff; | 664 | phys_addr = cpuid_eax(0x80000008) & 0xff; |
640 | /* CPUID workaround for Intel 0F33/0F34 CPU */ | 665 | /* CPUID workaround for Intel 0F33/0F34 CPU */ |
@@ -649,9 +674,11 @@ void __init mtrr_bp_init(void) | |||
649 | size_and_mask = ~size_or_mask & 0xfffff00000ULL; | 674 | size_and_mask = ~size_or_mask & 0xfffff00000ULL; |
650 | } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && | 675 | } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && |
651 | boot_cpu_data.x86 == 6) { | 676 | boot_cpu_data.x86 == 6) { |
652 | /* VIA C* family have Intel style MTRRs, but | 677 | /* |
653 | don't support PAE */ | 678 | * VIA C* family have Intel style MTRRs, |
654 | size_or_mask = 0xfff00000; /* 32 bits */ | 679 | * but don't support PAE |
680 | */ | ||
681 | size_or_mask = 0xfff00000; /* 32 bits */ | ||
655 | size_and_mask = 0; | 682 | size_and_mask = 0; |
656 | phys_addr = 32; | 683 | phys_addr = 32; |
657 | } | 684 | } |
@@ -694,30 +721,28 @@ void __init mtrr_bp_init(void) | |||
694 | changed_by_mtrr_cleanup = 1; | 721 | changed_by_mtrr_cleanup = 1; |
695 | mtrr_if->set_all(); | 722 | mtrr_if->set_all(); |
696 | } | 723 | } |
697 | |||
698 | } | 724 | } |
699 | } | 725 | } |
700 | } | 726 | } |
701 | 727 | ||
702 | void mtrr_ap_init(void) | 728 | void mtrr_ap_init(void) |
703 | { | 729 | { |
704 | unsigned long flags; | 730 | if (!use_intel() || mtrr_aps_delayed_init) |
705 | |||
706 | if (!mtrr_if || !use_intel()) | ||
707 | return; | 731 | return; |
708 | /* | 732 | /* |
709 | * Ideally we should hold mtrr_mutex here to avoid mtrr entries changed, | 733 | * Ideally we should hold mtrr_mutex here to avoid mtrr entries |
710 | * but this routine will be called in cpu boot time, holding the lock | 734 | * changed, but this routine will be called in cpu boot time, |
711 | * breaks it. This routine is called in two cases: 1.very earily time | 735 | * holding the lock breaks it. |
712 | * of software resume, when there absolutely isn't mtrr entry changes; | 736 | * |
713 | * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to | 737 | * This routine is called in two cases: |
714 | * prevent mtrr entry changes | 738 | * |
739 | * 1. very earily time of software resume, when there absolutely | ||
740 | * isn't mtrr entry changes; | ||
741 | * | ||
742 | * 2. cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug | ||
743 | * lock to prevent mtrr entry changes | ||
715 | */ | 744 | */ |
716 | local_irq_save(flags); | 745 | set_mtrr(~0U, 0, 0, 0); |
717 | |||
718 | mtrr_if->set_all(); | ||
719 | |||
720 | local_irq_restore(flags); | ||
721 | } | 746 | } |
722 | 747 | ||
723 | /** | 748 | /** |
@@ -728,23 +753,55 @@ void mtrr_save_state(void) | |||
728 | smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1); | 753 | smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1); |
729 | } | 754 | } |
730 | 755 | ||
756 | void set_mtrr_aps_delayed_init(void) | ||
757 | { | ||
758 | if (!use_intel()) | ||
759 | return; | ||
760 | |||
761 | mtrr_aps_delayed_init = true; | ||
762 | } | ||
763 | |||
764 | /* | ||
765 | * MTRR initialization for all AP's | ||
766 | */ | ||
767 | void mtrr_aps_init(void) | ||
768 | { | ||
769 | if (!use_intel()) | ||
770 | return; | ||
771 | |||
772 | set_mtrr(~0U, 0, 0, 0); | ||
773 | mtrr_aps_delayed_init = false; | ||
774 | } | ||
775 | |||
776 | void mtrr_bp_restore(void) | ||
777 | { | ||
778 | if (!use_intel()) | ||
779 | return; | ||
780 | |||
781 | mtrr_if->set_all(); | ||
782 | } | ||
783 | |||
731 | static int __init mtrr_init_finialize(void) | 784 | static int __init mtrr_init_finialize(void) |
732 | { | 785 | { |
733 | if (!mtrr_if) | 786 | if (!mtrr_if) |
734 | return 0; | 787 | return 0; |
788 | |||
735 | if (use_intel()) { | 789 | if (use_intel()) { |
736 | if (!changed_by_mtrr_cleanup) | 790 | if (!changed_by_mtrr_cleanup) |
737 | mtrr_state_warn(); | 791 | mtrr_state_warn(); |
738 | } else { | 792 | return 0; |
739 | /* The CPUs haven't MTRR and seem to not support SMP. They have | ||
740 | * specific drivers, we use a tricky method to support | ||
741 | * suspend/resume for them. | ||
742 | * TBD: is there any system with such CPU which supports | ||
743 | * suspend/resume? if no, we should remove the code. | ||
744 | */ | ||
745 | sysdev_driver_register(&cpu_sysdev_class, | ||
746 | &mtrr_sysdev_driver); | ||
747 | } | 793 | } |
794 | |||
795 | /* | ||
796 | * The CPU has no MTRR and seems to not support SMP. They have | ||
797 | * specific drivers, we use a tricky method to support | ||
798 | * suspend/resume for them. | ||
799 | * | ||
800 | * TBD: is there any system with such CPU which supports | ||
801 | * suspend/resume? If no, we should remove the code. | ||
802 | */ | ||
803 | sysdev_driver_register(&cpu_sysdev_class, &mtrr_sysdev_driver); | ||
804 | |||
748 | return 0; | 805 | return 0; |
749 | } | 806 | } |
750 | subsys_initcall(mtrr_init_finialize); | 807 | subsys_initcall(mtrr_init_finialize); |
diff --git a/arch/x86/kernel/cpu/mtrr/mtrr.h b/arch/x86/kernel/cpu/mtrr/mtrr.h index 7538b767f206..a501dee9a87a 100644 --- a/arch/x86/kernel/cpu/mtrr/mtrr.h +++ b/arch/x86/kernel/cpu/mtrr/mtrr.h | |||
@@ -1,5 +1,5 @@ | |||
1 | /* | 1 | /* |
2 | * local mtrr defines. | 2 | * local MTRR defines. |
3 | */ | 3 | */ |
4 | 4 | ||
5 | #include <linux/types.h> | 5 | #include <linux/types.h> |
@@ -14,13 +14,12 @@ extern unsigned int mtrr_usage_table[MTRR_MAX_VAR_RANGES]; | |||
14 | struct mtrr_ops { | 14 | struct mtrr_ops { |
15 | u32 vendor; | 15 | u32 vendor; |
16 | u32 use_intel_if; | 16 | u32 use_intel_if; |
17 | // void (*init)(void); | ||
18 | void (*set)(unsigned int reg, unsigned long base, | 17 | void (*set)(unsigned int reg, unsigned long base, |
19 | unsigned long size, mtrr_type type); | 18 | unsigned long size, mtrr_type type); |
20 | void (*set_all)(void); | 19 | void (*set_all)(void); |
21 | 20 | ||
22 | void (*get)(unsigned int reg, unsigned long *base, | 21 | void (*get)(unsigned int reg, unsigned long *base, |
23 | unsigned long *size, mtrr_type * type); | 22 | unsigned long *size, mtrr_type *type); |
24 | int (*get_free_region)(unsigned long base, unsigned long size, | 23 | int (*get_free_region)(unsigned long base, unsigned long size, |
25 | int replace_reg); | 24 | int replace_reg); |
26 | int (*validate_add_page)(unsigned long base, unsigned long size, | 25 | int (*validate_add_page)(unsigned long base, unsigned long size, |
@@ -39,11 +38,11 @@ extern int positive_have_wrcomb(void); | |||
39 | 38 | ||
40 | /* library functions for processor-specific routines */ | 39 | /* library functions for processor-specific routines */ |
41 | struct set_mtrr_context { | 40 | struct set_mtrr_context { |
42 | unsigned long flags; | 41 | unsigned long flags; |
43 | unsigned long cr4val; | 42 | unsigned long cr4val; |
44 | u32 deftype_lo; | 43 | u32 deftype_lo; |
45 | u32 deftype_hi; | 44 | u32 deftype_hi; |
46 | u32 ccr3; | 45 | u32 ccr3; |
47 | }; | 46 | }; |
48 | 47 | ||
49 | void set_mtrr_done(struct set_mtrr_context *ctxt); | 48 | void set_mtrr_done(struct set_mtrr_context *ctxt); |
@@ -54,10 +53,10 @@ void fill_mtrr_var_range(unsigned int index, | |||
54 | u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); | 53 | u32 base_lo, u32 base_hi, u32 mask_lo, u32 mask_hi); |
55 | void get_mtrr_state(void); | 54 | void get_mtrr_state(void); |
56 | 55 | ||
57 | extern void set_mtrr_ops(struct mtrr_ops * ops); | 56 | extern void set_mtrr_ops(struct mtrr_ops *ops); |
58 | 57 | ||
59 | extern u64 size_or_mask, size_and_mask; | 58 | extern u64 size_or_mask, size_and_mask; |
60 | extern struct mtrr_ops * mtrr_if; | 59 | extern struct mtrr_ops *mtrr_if; |
61 | 60 | ||
62 | #define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) | 61 | #define is_cpu(vnd) (mtrr_if && mtrr_if->vendor == X86_VENDOR_##vnd) |
63 | #define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) | 62 | #define use_intel() (mtrr_if && mtrr_if->use_intel_if == 1) |
diff --git a/arch/x86/kernel/cpu/mtrr/state.c b/arch/x86/kernel/cpu/mtrr/state.c index 1f5fb1588d1f..dfc80b4e6b0d 100644 --- a/arch/x86/kernel/cpu/mtrr/state.c +++ b/arch/x86/kernel/cpu/mtrr/state.c | |||
@@ -1,24 +1,25 @@ | |||
1 | #include <linux/mm.h> | ||
2 | #include <linux/init.h> | 1 | #include <linux/init.h> |
3 | #include <asm/io.h> | 2 | #include <linux/io.h> |
4 | #include <asm/mtrr.h> | 3 | #include <linux/mm.h> |
5 | #include <asm/msr.h> | 4 | |
6 | #include <asm/processor-cyrix.h> | 5 | #include <asm/processor-cyrix.h> |
7 | #include <asm/processor-flags.h> | 6 | #include <asm/processor-flags.h> |
8 | #include "mtrr.h" | 7 | #include <asm/mtrr.h> |
8 | #include <asm/msr.h> | ||
9 | 9 | ||
10 | #include "mtrr.h" | ||
10 | 11 | ||
11 | /* Put the processor into a state where MTRRs can be safely set */ | 12 | /* Put the processor into a state where MTRRs can be safely set */ |
12 | void set_mtrr_prepare_save(struct set_mtrr_context *ctxt) | 13 | void set_mtrr_prepare_save(struct set_mtrr_context *ctxt) |
13 | { | 14 | { |
14 | unsigned int cr0; | 15 | unsigned int cr0; |
15 | 16 | ||
16 | /* Disable interrupts locally */ | 17 | /* Disable interrupts locally */ |
17 | local_irq_save(ctxt->flags); | 18 | local_irq_save(ctxt->flags); |
18 | 19 | ||
19 | if (use_intel() || is_cpu(CYRIX)) { | 20 | if (use_intel() || is_cpu(CYRIX)) { |
20 | 21 | ||
21 | /* Save value of CR4 and clear Page Global Enable (bit 7) */ | 22 | /* Save value of CR4 and clear Page Global Enable (bit 7) */ |
22 | if (cpu_has_pge) { | 23 | if (cpu_has_pge) { |
23 | ctxt->cr4val = read_cr4(); | 24 | ctxt->cr4val = read_cr4(); |
24 | write_cr4(ctxt->cr4val & ~X86_CR4_PGE); | 25 | write_cr4(ctxt->cr4val & ~X86_CR4_PGE); |
@@ -33,50 +34,61 @@ void set_mtrr_prepare_save(struct set_mtrr_context *ctxt) | |||
33 | write_cr0(cr0); | 34 | write_cr0(cr0); |
34 | wbinvd(); | 35 | wbinvd(); |
35 | 36 | ||
36 | if (use_intel()) | 37 | if (use_intel()) { |
37 | /* Save MTRR state */ | 38 | /* Save MTRR state */ |
38 | rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi); | 39 | rdmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi); |
39 | else | 40 | } else { |
40 | /* Cyrix ARRs - everything else were excluded at the top */ | 41 | /* |
42 | * Cyrix ARRs - | ||
43 | * everything else were excluded at the top | ||
44 | */ | ||
41 | ctxt->ccr3 = getCx86(CX86_CCR3); | 45 | ctxt->ccr3 = getCx86(CX86_CCR3); |
46 | } | ||
42 | } | 47 | } |
43 | } | 48 | } |
44 | 49 | ||
45 | void set_mtrr_cache_disable(struct set_mtrr_context *ctxt) | 50 | void set_mtrr_cache_disable(struct set_mtrr_context *ctxt) |
46 | { | 51 | { |
47 | if (use_intel()) | 52 | if (use_intel()) { |
48 | /* Disable MTRRs, and set the default type to uncached */ | 53 | /* Disable MTRRs, and set the default type to uncached */ |
49 | mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL, | 54 | mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo & 0xf300UL, |
50 | ctxt->deftype_hi); | 55 | ctxt->deftype_hi); |
51 | else if (is_cpu(CYRIX)) | 56 | } else { |
52 | /* Cyrix ARRs - everything else were excluded at the top */ | 57 | if (is_cpu(CYRIX)) { |
53 | setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10); | 58 | /* Cyrix ARRs - everything else were excluded at the top */ |
59 | setCx86(CX86_CCR3, (ctxt->ccr3 & 0x0f) | 0x10); | ||
60 | } | ||
61 | } | ||
54 | } | 62 | } |
55 | 63 | ||
56 | /* Restore the processor after a set_mtrr_prepare */ | 64 | /* Restore the processor after a set_mtrr_prepare */ |
57 | void set_mtrr_done(struct set_mtrr_context *ctxt) | 65 | void set_mtrr_done(struct set_mtrr_context *ctxt) |
58 | { | 66 | { |
59 | if (use_intel() || is_cpu(CYRIX)) { | 67 | if (use_intel() || is_cpu(CYRIX)) { |
60 | 68 | ||
61 | /* Flush caches and TLBs */ | 69 | /* Flush caches and TLBs */ |
62 | wbinvd(); | 70 | wbinvd(); |
63 | 71 | ||
64 | /* Restore MTRRdefType */ | 72 | /* Restore MTRRdefType */ |
65 | if (use_intel()) | 73 | if (use_intel()) { |
66 | /* Intel (P6) standard MTRRs */ | 74 | /* Intel (P6) standard MTRRs */ |
67 | mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo, ctxt->deftype_hi); | 75 | mtrr_wrmsr(MSR_MTRRdefType, ctxt->deftype_lo, |
68 | else | 76 | ctxt->deftype_hi); |
69 | /* Cyrix ARRs - everything else was excluded at the top */ | 77 | } else { |
78 | /* | ||
79 | * Cyrix ARRs - | ||
80 | * everything else was excluded at the top | ||
81 | */ | ||
70 | setCx86(CX86_CCR3, ctxt->ccr3); | 82 | setCx86(CX86_CCR3, ctxt->ccr3); |
83 | } | ||
71 | 84 | ||
72 | /* Enable caches */ | 85 | /* Enable caches */ |
73 | write_cr0(read_cr0() & 0xbfffffff); | 86 | write_cr0(read_cr0() & 0xbfffffff); |
74 | 87 | ||
75 | /* Restore value of CR4 */ | 88 | /* Restore value of CR4 */ |
76 | if (cpu_has_pge) | 89 | if (cpu_has_pge) |
77 | write_cr4(ctxt->cr4val); | 90 | write_cr4(ctxt->cr4val); |
78 | } | 91 | } |
79 | /* Re-enable interrupts locally (if enabled previously) */ | 92 | /* Re-enable interrupts locally (if enabled previously) */ |
80 | local_irq_restore(ctxt->flags); | 93 | local_irq_restore(ctxt->flags); |
81 | } | 94 | } |
82 | |||
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c index 900332b800f8..2732e2c1e4d3 100644 --- a/arch/x86/kernel/cpu/perf_counter.c +++ b/arch/x86/kernel/cpu/perf_counter.c | |||
@@ -6,6 +6,7 @@ | |||
6 | * Copyright (C) 2009 Jaswinder Singh Rajput | 6 | * Copyright (C) 2009 Jaswinder Singh Rajput |
7 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter | 7 | * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter |
8 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> | 8 | * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com> |
9 | * Copyright (C) 2009 Intel Corporation, <markus.t.metzger@intel.com> | ||
9 | * | 10 | * |
10 | * For licencing details see kernel-base/COPYING | 11 | * For licencing details see kernel-base/COPYING |
11 | */ | 12 | */ |
@@ -20,6 +21,7 @@ | |||
20 | #include <linux/sched.h> | 21 | #include <linux/sched.h> |
21 | #include <linux/uaccess.h> | 22 | #include <linux/uaccess.h> |
22 | #include <linux/highmem.h> | 23 | #include <linux/highmem.h> |
24 | #include <linux/cpu.h> | ||
23 | 25 | ||
24 | #include <asm/apic.h> | 26 | #include <asm/apic.h> |
25 | #include <asm/stacktrace.h> | 27 | #include <asm/stacktrace.h> |
@@ -27,12 +29,52 @@ | |||
27 | 29 | ||
28 | static u64 perf_counter_mask __read_mostly; | 30 | static u64 perf_counter_mask __read_mostly; |
29 | 31 | ||
32 | /* The maximal number of PEBS counters: */ | ||
33 | #define MAX_PEBS_COUNTERS 4 | ||
34 | |||
35 | /* The size of a BTS record in bytes: */ | ||
36 | #define BTS_RECORD_SIZE 24 | ||
37 | |||
38 | /* The size of a per-cpu BTS buffer in bytes: */ | ||
39 | #define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 1024) | ||
40 | |||
41 | /* The BTS overflow threshold in bytes from the end of the buffer: */ | ||
42 | #define BTS_OVFL_TH (BTS_RECORD_SIZE * 64) | ||
43 | |||
44 | |||
45 | /* | ||
46 | * Bits in the debugctlmsr controlling branch tracing. | ||
47 | */ | ||
48 | #define X86_DEBUGCTL_TR (1 << 6) | ||
49 | #define X86_DEBUGCTL_BTS (1 << 7) | ||
50 | #define X86_DEBUGCTL_BTINT (1 << 8) | ||
51 | #define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) | ||
52 | #define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) | ||
53 | |||
54 | /* | ||
55 | * A debug store configuration. | ||
56 | * | ||
57 | * We only support architectures that use 64bit fields. | ||
58 | */ | ||
59 | struct debug_store { | ||
60 | u64 bts_buffer_base; | ||
61 | u64 bts_index; | ||
62 | u64 bts_absolute_maximum; | ||
63 | u64 bts_interrupt_threshold; | ||
64 | u64 pebs_buffer_base; | ||
65 | u64 pebs_index; | ||
66 | u64 pebs_absolute_maximum; | ||
67 | u64 pebs_interrupt_threshold; | ||
68 | u64 pebs_counter_reset[MAX_PEBS_COUNTERS]; | ||
69 | }; | ||
70 | |||
30 | struct cpu_hw_counters { | 71 | struct cpu_hw_counters { |
31 | struct perf_counter *counters[X86_PMC_IDX_MAX]; | 72 | struct perf_counter *counters[X86_PMC_IDX_MAX]; |
32 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 73 | unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
33 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; | 74 | unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; |
34 | unsigned long interrupts; | 75 | unsigned long interrupts; |
35 | int enabled; | 76 | int enabled; |
77 | struct debug_store *ds; | ||
36 | }; | 78 | }; |
37 | 79 | ||
38 | /* | 80 | /* |
@@ -58,6 +100,8 @@ struct x86_pmu { | |||
58 | int apic; | 100 | int apic; |
59 | u64 max_period; | 101 | u64 max_period; |
60 | u64 intel_ctrl; | 102 | u64 intel_ctrl; |
103 | void (*enable_bts)(u64 config); | ||
104 | void (*disable_bts)(void); | ||
61 | }; | 105 | }; |
62 | 106 | ||
63 | static struct x86_pmu x86_pmu __read_mostly; | 107 | static struct x86_pmu x86_pmu __read_mostly; |
@@ -577,6 +621,9 @@ x86_perf_counter_update(struct perf_counter *counter, | |||
577 | u64 prev_raw_count, new_raw_count; | 621 | u64 prev_raw_count, new_raw_count; |
578 | s64 delta; | 622 | s64 delta; |
579 | 623 | ||
624 | if (idx == X86_PMC_IDX_FIXED_BTS) | ||
625 | return 0; | ||
626 | |||
580 | /* | 627 | /* |
581 | * Careful: an NMI might modify the previous counter value. | 628 | * Careful: an NMI might modify the previous counter value. |
582 | * | 629 | * |
@@ -666,10 +713,110 @@ static void release_pmc_hardware(void) | |||
666 | #endif | 713 | #endif |
667 | } | 714 | } |
668 | 715 | ||
716 | static inline bool bts_available(void) | ||
717 | { | ||
718 | return x86_pmu.enable_bts != NULL; | ||
719 | } | ||
720 | |||
721 | static inline void init_debug_store_on_cpu(int cpu) | ||
722 | { | ||
723 | struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds; | ||
724 | |||
725 | if (!ds) | ||
726 | return; | ||
727 | |||
728 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, | ||
729 | (u32)((u64)(unsigned long)ds), | ||
730 | (u32)((u64)(unsigned long)ds >> 32)); | ||
731 | } | ||
732 | |||
733 | static inline void fini_debug_store_on_cpu(int cpu) | ||
734 | { | ||
735 | if (!per_cpu(cpu_hw_counters, cpu).ds) | ||
736 | return; | ||
737 | |||
738 | wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); | ||
739 | } | ||
740 | |||
741 | static void release_bts_hardware(void) | ||
742 | { | ||
743 | int cpu; | ||
744 | |||
745 | if (!bts_available()) | ||
746 | return; | ||
747 | |||
748 | get_online_cpus(); | ||
749 | |||
750 | for_each_online_cpu(cpu) | ||
751 | fini_debug_store_on_cpu(cpu); | ||
752 | |||
753 | for_each_possible_cpu(cpu) { | ||
754 | struct debug_store *ds = per_cpu(cpu_hw_counters, cpu).ds; | ||
755 | |||
756 | if (!ds) | ||
757 | continue; | ||
758 | |||
759 | per_cpu(cpu_hw_counters, cpu).ds = NULL; | ||
760 | |||
761 | kfree((void *)(unsigned long)ds->bts_buffer_base); | ||
762 | kfree(ds); | ||
763 | } | ||
764 | |||
765 | put_online_cpus(); | ||
766 | } | ||
767 | |||
768 | static int reserve_bts_hardware(void) | ||
769 | { | ||
770 | int cpu, err = 0; | ||
771 | |||
772 | if (!bts_available()) | ||
773 | return 0; | ||
774 | |||
775 | get_online_cpus(); | ||
776 | |||
777 | for_each_possible_cpu(cpu) { | ||
778 | struct debug_store *ds; | ||
779 | void *buffer; | ||
780 | |||
781 | err = -ENOMEM; | ||
782 | buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); | ||
783 | if (unlikely(!buffer)) | ||
784 | break; | ||
785 | |||
786 | ds = kzalloc(sizeof(*ds), GFP_KERNEL); | ||
787 | if (unlikely(!ds)) { | ||
788 | kfree(buffer); | ||
789 | break; | ||
790 | } | ||
791 | |||
792 | ds->bts_buffer_base = (u64)(unsigned long)buffer; | ||
793 | ds->bts_index = ds->bts_buffer_base; | ||
794 | ds->bts_absolute_maximum = | ||
795 | ds->bts_buffer_base + BTS_BUFFER_SIZE; | ||
796 | ds->bts_interrupt_threshold = | ||
797 | ds->bts_absolute_maximum - BTS_OVFL_TH; | ||
798 | |||
799 | per_cpu(cpu_hw_counters, cpu).ds = ds; | ||
800 | err = 0; | ||
801 | } | ||
802 | |||
803 | if (err) | ||
804 | release_bts_hardware(); | ||
805 | else { | ||
806 | for_each_online_cpu(cpu) | ||
807 | init_debug_store_on_cpu(cpu); | ||
808 | } | ||
809 | |||
810 | put_online_cpus(); | ||
811 | |||
812 | return err; | ||
813 | } | ||
814 | |||
669 | static void hw_perf_counter_destroy(struct perf_counter *counter) | 815 | static void hw_perf_counter_destroy(struct perf_counter *counter) |
670 | { | 816 | { |
671 | if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) { | 817 | if (atomic_dec_and_mutex_lock(&active_counters, &pmc_reserve_mutex)) { |
672 | release_pmc_hardware(); | 818 | release_pmc_hardware(); |
819 | release_bts_hardware(); | ||
673 | mutex_unlock(&pmc_reserve_mutex); | 820 | mutex_unlock(&pmc_reserve_mutex); |
674 | } | 821 | } |
675 | } | 822 | } |
@@ -712,6 +859,42 @@ set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr) | |||
712 | return 0; | 859 | return 0; |
713 | } | 860 | } |
714 | 861 | ||
862 | static void intel_pmu_enable_bts(u64 config) | ||
863 | { | ||
864 | unsigned long debugctlmsr; | ||
865 | |||
866 | debugctlmsr = get_debugctlmsr(); | ||
867 | |||
868 | debugctlmsr |= X86_DEBUGCTL_TR; | ||
869 | debugctlmsr |= X86_DEBUGCTL_BTS; | ||
870 | debugctlmsr |= X86_DEBUGCTL_BTINT; | ||
871 | |||
872 | if (!(config & ARCH_PERFMON_EVENTSEL_OS)) | ||
873 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; | ||
874 | |||
875 | if (!(config & ARCH_PERFMON_EVENTSEL_USR)) | ||
876 | debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; | ||
877 | |||
878 | update_debugctlmsr(debugctlmsr); | ||
879 | } | ||
880 | |||
881 | static void intel_pmu_disable_bts(void) | ||
882 | { | ||
883 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
884 | unsigned long debugctlmsr; | ||
885 | |||
886 | if (!cpuc->ds) | ||
887 | return; | ||
888 | |||
889 | debugctlmsr = get_debugctlmsr(); | ||
890 | |||
891 | debugctlmsr &= | ||
892 | ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | | ||
893 | X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); | ||
894 | |||
895 | update_debugctlmsr(debugctlmsr); | ||
896 | } | ||
897 | |||
715 | /* | 898 | /* |
716 | * Setup the hardware configuration for a given attr_type | 899 | * Setup the hardware configuration for a given attr_type |
717 | */ | 900 | */ |
@@ -728,9 +911,13 @@ static int __hw_perf_counter_init(struct perf_counter *counter) | |||
728 | err = 0; | 911 | err = 0; |
729 | if (!atomic_inc_not_zero(&active_counters)) { | 912 | if (!atomic_inc_not_zero(&active_counters)) { |
730 | mutex_lock(&pmc_reserve_mutex); | 913 | mutex_lock(&pmc_reserve_mutex); |
731 | if (atomic_read(&active_counters) == 0 && !reserve_pmc_hardware()) | 914 | if (atomic_read(&active_counters) == 0) { |
732 | err = -EBUSY; | 915 | if (!reserve_pmc_hardware()) |
733 | else | 916 | err = -EBUSY; |
917 | else | ||
918 | err = reserve_bts_hardware(); | ||
919 | } | ||
920 | if (!err) | ||
734 | atomic_inc(&active_counters); | 921 | atomic_inc(&active_counters); |
735 | mutex_unlock(&pmc_reserve_mutex); | 922 | mutex_unlock(&pmc_reserve_mutex); |
736 | } | 923 | } |
@@ -793,6 +980,20 @@ static int __hw_perf_counter_init(struct perf_counter *counter) | |||
793 | if (config == -1LL) | 980 | if (config == -1LL) |
794 | return -EINVAL; | 981 | return -EINVAL; |
795 | 982 | ||
983 | /* | ||
984 | * Branch tracing: | ||
985 | */ | ||
986 | if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && | ||
987 | (hwc->sample_period == 1)) { | ||
988 | /* BTS is not supported by this architecture. */ | ||
989 | if (!bts_available()) | ||
990 | return -EOPNOTSUPP; | ||
991 | |||
992 | /* BTS is currently only allowed for user-mode. */ | ||
993 | if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) | ||
994 | return -EOPNOTSUPP; | ||
995 | } | ||
996 | |||
796 | hwc->config |= config; | 997 | hwc->config |= config; |
797 | 998 | ||
798 | return 0; | 999 | return 0; |
@@ -817,7 +1018,18 @@ static void p6_pmu_disable_all(void) | |||
817 | 1018 | ||
818 | static void intel_pmu_disable_all(void) | 1019 | static void intel_pmu_disable_all(void) |
819 | { | 1020 | { |
1021 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
1022 | |||
1023 | if (!cpuc->enabled) | ||
1024 | return; | ||
1025 | |||
1026 | cpuc->enabled = 0; | ||
1027 | barrier(); | ||
1028 | |||
820 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); | 1029 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); |
1030 | |||
1031 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) | ||
1032 | intel_pmu_disable_bts(); | ||
821 | } | 1033 | } |
822 | 1034 | ||
823 | static void amd_pmu_disable_all(void) | 1035 | static void amd_pmu_disable_all(void) |
@@ -875,7 +1087,25 @@ static void p6_pmu_enable_all(void) | |||
875 | 1087 | ||
876 | static void intel_pmu_enable_all(void) | 1088 | static void intel_pmu_enable_all(void) |
877 | { | 1089 | { |
1090 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | ||
1091 | |||
1092 | if (cpuc->enabled) | ||
1093 | return; | ||
1094 | |||
1095 | cpuc->enabled = 1; | ||
1096 | barrier(); | ||
1097 | |||
878 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); | 1098 | wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); |
1099 | |||
1100 | if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { | ||
1101 | struct perf_counter *counter = | ||
1102 | cpuc->counters[X86_PMC_IDX_FIXED_BTS]; | ||
1103 | |||
1104 | if (WARN_ON_ONCE(!counter)) | ||
1105 | return; | ||
1106 | |||
1107 | intel_pmu_enable_bts(counter->hw.config); | ||
1108 | } | ||
879 | } | 1109 | } |
880 | 1110 | ||
881 | static void amd_pmu_enable_all(void) | 1111 | static void amd_pmu_enable_all(void) |
@@ -962,6 +1192,11 @@ p6_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) | |||
962 | static inline void | 1192 | static inline void |
963 | intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) | 1193 | intel_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) |
964 | { | 1194 | { |
1195 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | ||
1196 | intel_pmu_disable_bts(); | ||
1197 | return; | ||
1198 | } | ||
1199 | |||
965 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | 1200 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { |
966 | intel_pmu_disable_fixed(hwc, idx); | 1201 | intel_pmu_disable_fixed(hwc, idx); |
967 | return; | 1202 | return; |
@@ -976,7 +1211,7 @@ amd_pmu_disable_counter(struct hw_perf_counter *hwc, int idx) | |||
976 | x86_pmu_disable_counter(hwc, idx); | 1211 | x86_pmu_disable_counter(hwc, idx); |
977 | } | 1212 | } |
978 | 1213 | ||
979 | static DEFINE_PER_CPU(u64, prev_left[X86_PMC_IDX_MAX]); | 1214 | static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); |
980 | 1215 | ||
981 | /* | 1216 | /* |
982 | * Set the next IRQ period, based on the hwc->period_left value. | 1217 | * Set the next IRQ period, based on the hwc->period_left value. |
@@ -990,6 +1225,9 @@ x86_perf_counter_set_period(struct perf_counter *counter, | |||
990 | s64 period = hwc->sample_period; | 1225 | s64 period = hwc->sample_period; |
991 | int err, ret = 0; | 1226 | int err, ret = 0; |
992 | 1227 | ||
1228 | if (idx == X86_PMC_IDX_FIXED_BTS) | ||
1229 | return 0; | ||
1230 | |||
993 | /* | 1231 | /* |
994 | * If we are way outside a reasoable range then just skip forward: | 1232 | * If we are way outside a reasoable range then just skip forward: |
995 | */ | 1233 | */ |
@@ -1015,7 +1253,7 @@ x86_perf_counter_set_period(struct perf_counter *counter, | |||
1015 | if (left > x86_pmu.max_period) | 1253 | if (left > x86_pmu.max_period) |
1016 | left = x86_pmu.max_period; | 1254 | left = x86_pmu.max_period; |
1017 | 1255 | ||
1018 | per_cpu(prev_left[idx], smp_processor_id()) = left; | 1256 | per_cpu(pmc_prev_left[idx], smp_processor_id()) = left; |
1019 | 1257 | ||
1020 | /* | 1258 | /* |
1021 | * The hw counter starts counting from this counter offset, | 1259 | * The hw counter starts counting from this counter offset, |
@@ -1072,6 +1310,14 @@ static void p6_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | |||
1072 | 1310 | ||
1073 | static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) | 1311 | static void intel_pmu_enable_counter(struct hw_perf_counter *hwc, int idx) |
1074 | { | 1312 | { |
1313 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | ||
1314 | if (!__get_cpu_var(cpu_hw_counters).enabled) | ||
1315 | return; | ||
1316 | |||
1317 | intel_pmu_enable_bts(hwc->config); | ||
1318 | return; | ||
1319 | } | ||
1320 | |||
1075 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { | 1321 | if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { |
1076 | intel_pmu_enable_fixed(hwc, idx); | 1322 | intel_pmu_enable_fixed(hwc, idx); |
1077 | return; | 1323 | return; |
@@ -1093,11 +1339,16 @@ fixed_mode_idx(struct perf_counter *counter, struct hw_perf_counter *hwc) | |||
1093 | { | 1339 | { |
1094 | unsigned int event; | 1340 | unsigned int event; |
1095 | 1341 | ||
1342 | event = hwc->config & ARCH_PERFMON_EVENT_MASK; | ||
1343 | |||
1344 | if (unlikely((event == | ||
1345 | x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && | ||
1346 | (hwc->sample_period == 1))) | ||
1347 | return X86_PMC_IDX_FIXED_BTS; | ||
1348 | |||
1096 | if (!x86_pmu.num_counters_fixed) | 1349 | if (!x86_pmu.num_counters_fixed) |
1097 | return -1; | 1350 | return -1; |
1098 | 1351 | ||
1099 | event = hwc->config & ARCH_PERFMON_EVENT_MASK; | ||
1100 | |||
1101 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) | 1352 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) |
1102 | return X86_PMC_IDX_FIXED_INSTRUCTIONS; | 1353 | return X86_PMC_IDX_FIXED_INSTRUCTIONS; |
1103 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) | 1354 | if (unlikely(event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) |
@@ -1118,7 +1369,15 @@ static int x86_pmu_enable(struct perf_counter *counter) | |||
1118 | int idx; | 1369 | int idx; |
1119 | 1370 | ||
1120 | idx = fixed_mode_idx(counter, hwc); | 1371 | idx = fixed_mode_idx(counter, hwc); |
1121 | if (idx >= 0) { | 1372 | if (idx == X86_PMC_IDX_FIXED_BTS) { |
1373 | /* BTS is already occupied. */ | ||
1374 | if (test_and_set_bit(idx, cpuc->used_mask)) | ||
1375 | return -EAGAIN; | ||
1376 | |||
1377 | hwc->config_base = 0; | ||
1378 | hwc->counter_base = 0; | ||
1379 | hwc->idx = idx; | ||
1380 | } else if (idx >= 0) { | ||
1122 | /* | 1381 | /* |
1123 | * Try to get the fixed counter, if that is already taken | 1382 | * Try to get the fixed counter, if that is already taken |
1124 | * then try to get a generic counter: | 1383 | * then try to get a generic counter: |
@@ -1211,7 +1470,7 @@ void perf_counter_print_debug(void) | |||
1211 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); | 1470 | rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); |
1212 | rdmsrl(x86_pmu.perfctr + idx, pmc_count); | 1471 | rdmsrl(x86_pmu.perfctr + idx, pmc_count); |
1213 | 1472 | ||
1214 | prev_left = per_cpu(prev_left[idx], cpu); | 1473 | prev_left = per_cpu(pmc_prev_left[idx], cpu); |
1215 | 1474 | ||
1216 | pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n", | 1475 | pr_info("CPU#%d: gen-PMC%d ctrl: %016llx\n", |
1217 | cpu, idx, pmc_ctrl); | 1476 | cpu, idx, pmc_ctrl); |
@@ -1229,6 +1488,44 @@ void perf_counter_print_debug(void) | |||
1229 | local_irq_restore(flags); | 1488 | local_irq_restore(flags); |
1230 | } | 1489 | } |
1231 | 1490 | ||
1491 | static void intel_pmu_drain_bts_buffer(struct cpu_hw_counters *cpuc, | ||
1492 | struct perf_sample_data *data) | ||
1493 | { | ||
1494 | struct debug_store *ds = cpuc->ds; | ||
1495 | struct bts_record { | ||
1496 | u64 from; | ||
1497 | u64 to; | ||
1498 | u64 flags; | ||
1499 | }; | ||
1500 | struct perf_counter *counter = cpuc->counters[X86_PMC_IDX_FIXED_BTS]; | ||
1501 | unsigned long orig_ip = data->regs->ip; | ||
1502 | struct bts_record *at, *top; | ||
1503 | |||
1504 | if (!counter) | ||
1505 | return; | ||
1506 | |||
1507 | if (!ds) | ||
1508 | return; | ||
1509 | |||
1510 | at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; | ||
1511 | top = (struct bts_record *)(unsigned long)ds->bts_index; | ||
1512 | |||
1513 | ds->bts_index = ds->bts_buffer_base; | ||
1514 | |||
1515 | for (; at < top; at++) { | ||
1516 | data->regs->ip = at->from; | ||
1517 | data->addr = at->to; | ||
1518 | |||
1519 | perf_counter_output(counter, 1, data); | ||
1520 | } | ||
1521 | |||
1522 | data->regs->ip = orig_ip; | ||
1523 | data->addr = 0; | ||
1524 | |||
1525 | /* There's new data available. */ | ||
1526 | counter->pending_kill = POLL_IN; | ||
1527 | } | ||
1528 | |||
1232 | static void x86_pmu_disable(struct perf_counter *counter) | 1529 | static void x86_pmu_disable(struct perf_counter *counter) |
1233 | { | 1530 | { |
1234 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); | 1531 | struct cpu_hw_counters *cpuc = &__get_cpu_var(cpu_hw_counters); |
@@ -1253,6 +1550,15 @@ static void x86_pmu_disable(struct perf_counter *counter) | |||
1253 | * that we are disabling: | 1550 | * that we are disabling: |
1254 | */ | 1551 | */ |
1255 | x86_perf_counter_update(counter, hwc, idx); | 1552 | x86_perf_counter_update(counter, hwc, idx); |
1553 | |||
1554 | /* Drain the remaining BTS records. */ | ||
1555 | if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { | ||
1556 | struct perf_sample_data data; | ||
1557 | struct pt_regs regs; | ||
1558 | |||
1559 | data.regs = ®s; | ||
1560 | intel_pmu_drain_bts_buffer(cpuc, &data); | ||
1561 | } | ||
1256 | cpuc->counters[idx] = NULL; | 1562 | cpuc->counters[idx] = NULL; |
1257 | clear_bit(idx, cpuc->used_mask); | 1563 | clear_bit(idx, cpuc->used_mask); |
1258 | 1564 | ||
@@ -1280,6 +1586,7 @@ static int intel_pmu_save_and_restart(struct perf_counter *counter) | |||
1280 | 1586 | ||
1281 | static void intel_pmu_reset(void) | 1587 | static void intel_pmu_reset(void) |
1282 | { | 1588 | { |
1589 | struct debug_store *ds = __get_cpu_var(cpu_hw_counters).ds; | ||
1283 | unsigned long flags; | 1590 | unsigned long flags; |
1284 | int idx; | 1591 | int idx; |
1285 | 1592 | ||
@@ -1297,6 +1604,8 @@ static void intel_pmu_reset(void) | |||
1297 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { | 1604 | for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { |
1298 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); | 1605 | checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); |
1299 | } | 1606 | } |
1607 | if (ds) | ||
1608 | ds->bts_index = ds->bts_buffer_base; | ||
1300 | 1609 | ||
1301 | local_irq_restore(flags); | 1610 | local_irq_restore(flags); |
1302 | } | 1611 | } |
@@ -1362,6 +1671,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) | |||
1362 | cpuc = &__get_cpu_var(cpu_hw_counters); | 1671 | cpuc = &__get_cpu_var(cpu_hw_counters); |
1363 | 1672 | ||
1364 | perf_disable(); | 1673 | perf_disable(); |
1674 | intel_pmu_drain_bts_buffer(cpuc, &data); | ||
1365 | status = intel_pmu_get_status(); | 1675 | status = intel_pmu_get_status(); |
1366 | if (!status) { | 1676 | if (!status) { |
1367 | perf_enable(); | 1677 | perf_enable(); |
@@ -1571,6 +1881,8 @@ static struct x86_pmu intel_pmu = { | |||
1571 | * the generic counter period: | 1881 | * the generic counter period: |
1572 | */ | 1882 | */ |
1573 | .max_period = (1ULL << 31) - 1, | 1883 | .max_period = (1ULL << 31) - 1, |
1884 | .enable_bts = intel_pmu_enable_bts, | ||
1885 | .disable_bts = intel_pmu_disable_bts, | ||
1574 | }; | 1886 | }; |
1575 | 1887 | ||
1576 | static struct x86_pmu amd_pmu = { | 1888 | static struct x86_pmu amd_pmu = { |
@@ -1798,8 +2110,8 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip) | |||
1798 | entry->ip[entry->nr++] = ip; | 2110 | entry->ip[entry->nr++] = ip; |
1799 | } | 2111 | } |
1800 | 2112 | ||
1801 | static DEFINE_PER_CPU(struct perf_callchain_entry, irq_entry); | 2113 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); |
1802 | static DEFINE_PER_CPU(struct perf_callchain_entry, nmi_entry); | 2114 | static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); |
1803 | static DEFINE_PER_CPU(int, in_nmi_frame); | 2115 | static DEFINE_PER_CPU(int, in_nmi_frame); |
1804 | 2116 | ||
1805 | 2117 | ||
@@ -1952,9 +2264,9 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
1952 | struct perf_callchain_entry *entry; | 2264 | struct perf_callchain_entry *entry; |
1953 | 2265 | ||
1954 | if (in_nmi()) | 2266 | if (in_nmi()) |
1955 | entry = &__get_cpu_var(nmi_entry); | 2267 | entry = &__get_cpu_var(pmc_nmi_entry); |
1956 | else | 2268 | else |
1957 | entry = &__get_cpu_var(irq_entry); | 2269 | entry = &__get_cpu_var(pmc_irq_entry); |
1958 | 2270 | ||
1959 | entry->nr = 0; | 2271 | entry->nr = 0; |
1960 | 2272 | ||
@@ -1962,3 +2274,8 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) | |||
1962 | 2274 | ||
1963 | return entry; | 2275 | return entry; |
1964 | } | 2276 | } |
2277 | |||
2278 | void hw_perf_counter_setup_online(int cpu) | ||
2279 | { | ||
2280 | init_debug_store_on_cpu(cpu); | ||
2281 | } | ||
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index e60ed740d2b3..392bea43b890 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c | |||
@@ -68,16 +68,16 @@ static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) | |||
68 | /* returns the bit offset of the performance counter register */ | 68 | /* returns the bit offset of the performance counter register */ |
69 | switch (boot_cpu_data.x86_vendor) { | 69 | switch (boot_cpu_data.x86_vendor) { |
70 | case X86_VENDOR_AMD: | 70 | case X86_VENDOR_AMD: |
71 | return (msr - MSR_K7_PERFCTR0); | 71 | return msr - MSR_K7_PERFCTR0; |
72 | case X86_VENDOR_INTEL: | 72 | case X86_VENDOR_INTEL: |
73 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | 73 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) |
74 | return (msr - MSR_ARCH_PERFMON_PERFCTR0); | 74 | return msr - MSR_ARCH_PERFMON_PERFCTR0; |
75 | 75 | ||
76 | switch (boot_cpu_data.x86) { | 76 | switch (boot_cpu_data.x86) { |
77 | case 6: | 77 | case 6: |
78 | return (msr - MSR_P6_PERFCTR0); | 78 | return msr - MSR_P6_PERFCTR0; |
79 | case 15: | 79 | case 15: |
80 | return (msr - MSR_P4_BPU_PERFCTR0); | 80 | return msr - MSR_P4_BPU_PERFCTR0; |
81 | } | 81 | } |
82 | } | 82 | } |
83 | return 0; | 83 | return 0; |
@@ -92,16 +92,16 @@ static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) | |||
92 | /* returns the bit offset of the event selection register */ | 92 | /* returns the bit offset of the event selection register */ |
93 | switch (boot_cpu_data.x86_vendor) { | 93 | switch (boot_cpu_data.x86_vendor) { |
94 | case X86_VENDOR_AMD: | 94 | case X86_VENDOR_AMD: |
95 | return (msr - MSR_K7_EVNTSEL0); | 95 | return msr - MSR_K7_EVNTSEL0; |
96 | case X86_VENDOR_INTEL: | 96 | case X86_VENDOR_INTEL: |
97 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | 97 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) |
98 | return (msr - MSR_ARCH_PERFMON_EVENTSEL0); | 98 | return msr - MSR_ARCH_PERFMON_EVENTSEL0; |
99 | 99 | ||
100 | switch (boot_cpu_data.x86) { | 100 | switch (boot_cpu_data.x86) { |
101 | case 6: | 101 | case 6: |
102 | return (msr - MSR_P6_EVNTSEL0); | 102 | return msr - MSR_P6_EVNTSEL0; |
103 | case 15: | 103 | case 15: |
104 | return (msr - MSR_P4_BSU_ESCR0); | 104 | return msr - MSR_P4_BSU_ESCR0; |
105 | } | 105 | } |
106 | } | 106 | } |
107 | return 0; | 107 | return 0; |
@@ -113,7 +113,7 @@ int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) | |||
113 | { | 113 | { |
114 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | 114 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); |
115 | 115 | ||
116 | return (!test_bit(counter, perfctr_nmi_owner)); | 116 | return !test_bit(counter, perfctr_nmi_owner); |
117 | } | 117 | } |
118 | 118 | ||
119 | /* checks the an msr for availability */ | 119 | /* checks the an msr for availability */ |
@@ -124,7 +124,7 @@ int avail_to_resrv_perfctr_nmi(unsigned int msr) | |||
124 | counter = nmi_perfctr_msr_to_bit(msr); | 124 | counter = nmi_perfctr_msr_to_bit(msr); |
125 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | 125 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); |
126 | 126 | ||
127 | return (!test_bit(counter, perfctr_nmi_owner)); | 127 | return !test_bit(counter, perfctr_nmi_owner); |
128 | } | 128 | } |
129 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); | 129 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); |
130 | 130 | ||
@@ -237,7 +237,7 @@ static unsigned int adjust_for_32bit_ctr(unsigned int hz) | |||
237 | */ | 237 | */ |
238 | counter_val = (u64)cpu_khz * 1000; | 238 | counter_val = (u64)cpu_khz * 1000; |
239 | do_div(counter_val, retval); | 239 | do_div(counter_val, retval); |
240 | if (counter_val > 0x7fffffffULL) { | 240 | if (counter_val > 0x7fffffffULL) { |
241 | u64 count = (u64)cpu_khz * 1000; | 241 | u64 count = (u64)cpu_khz * 1000; |
242 | do_div(count, 0x7fffffffUL); | 242 | do_div(count, 0x7fffffffUL); |
243 | retval = count + 1; | 243 | retval = count + 1; |
@@ -251,7 +251,7 @@ static void write_watchdog_counter(unsigned int perfctr_msr, | |||
251 | u64 count = (u64)cpu_khz * 1000; | 251 | u64 count = (u64)cpu_khz * 1000; |
252 | 252 | ||
253 | do_div(count, nmi_hz); | 253 | do_div(count, nmi_hz); |
254 | if(descr) | 254 | if (descr) |
255 | pr_debug("setting %s to -0x%08Lx\n", descr, count); | 255 | pr_debug("setting %s to -0x%08Lx\n", descr, count); |
256 | wrmsrl(perfctr_msr, 0 - count); | 256 | wrmsrl(perfctr_msr, 0 - count); |
257 | } | 257 | } |
@@ -262,7 +262,7 @@ static void write_watchdog_counter32(unsigned int perfctr_msr, | |||
262 | u64 count = (u64)cpu_khz * 1000; | 262 | u64 count = (u64)cpu_khz * 1000; |
263 | 263 | ||
264 | do_div(count, nmi_hz); | 264 | do_div(count, nmi_hz); |
265 | if(descr) | 265 | if (descr) |
266 | pr_debug("setting %s to -0x%08Lx\n", descr, count); | 266 | pr_debug("setting %s to -0x%08Lx\n", descr, count); |
267 | wrmsr(perfctr_msr, (u32)(-count), 0); | 267 | wrmsr(perfctr_msr, (u32)(-count), 0); |
268 | } | 268 | } |
@@ -296,7 +296,7 @@ static int setup_k7_watchdog(unsigned nmi_hz) | |||
296 | 296 | ||
297 | /* setup the timer */ | 297 | /* setup the timer */ |
298 | wrmsr(evntsel_msr, evntsel, 0); | 298 | wrmsr(evntsel_msr, evntsel, 0); |
299 | write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz); | 299 | write_watchdog_counter(perfctr_msr, "K7_PERFCTR0", nmi_hz); |
300 | 300 | ||
301 | /* initialize the wd struct before enabling */ | 301 | /* initialize the wd struct before enabling */ |
302 | wd->perfctr_msr = perfctr_msr; | 302 | wd->perfctr_msr = perfctr_msr; |
@@ -387,7 +387,7 @@ static int setup_p6_watchdog(unsigned nmi_hz) | |||
387 | /* setup the timer */ | 387 | /* setup the timer */ |
388 | wrmsr(evntsel_msr, evntsel, 0); | 388 | wrmsr(evntsel_msr, evntsel, 0); |
389 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | 389 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); |
390 | write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz); | 390 | write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0", nmi_hz); |
391 | 391 | ||
392 | /* initialize the wd struct before enabling */ | 392 | /* initialize the wd struct before enabling */ |
393 | wd->perfctr_msr = perfctr_msr; | 393 | wd->perfctr_msr = perfctr_msr; |
@@ -415,7 +415,7 @@ static void __kprobes p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) | |||
415 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 415 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
416 | 416 | ||
417 | /* P6/ARCH_PERFMON has 32 bit counter write */ | 417 | /* P6/ARCH_PERFMON has 32 bit counter write */ |
418 | write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz); | 418 | write_watchdog_counter32(wd->perfctr_msr, NULL, nmi_hz); |
419 | } | 419 | } |
420 | 420 | ||
421 | static const struct wd_ops p6_wd_ops = { | 421 | static const struct wd_ops p6_wd_ops = { |
@@ -490,9 +490,9 @@ static int setup_p4_watchdog(unsigned nmi_hz) | |||
490 | if (smp_num_siblings == 2) { | 490 | if (smp_num_siblings == 2) { |
491 | unsigned int ebx, apicid; | 491 | unsigned int ebx, apicid; |
492 | 492 | ||
493 | ebx = cpuid_ebx(1); | 493 | ebx = cpuid_ebx(1); |
494 | apicid = (ebx >> 24) & 0xff; | 494 | apicid = (ebx >> 24) & 0xff; |
495 | ht_num = apicid & 1; | 495 | ht_num = apicid & 1; |
496 | } else | 496 | } else |
497 | #endif | 497 | #endif |
498 | ht_num = 0; | 498 | ht_num = 0; |
@@ -544,7 +544,7 @@ static int setup_p4_watchdog(unsigned nmi_hz) | |||
544 | } | 544 | } |
545 | 545 | ||
546 | evntsel = P4_ESCR_EVENT_SELECT(0x3F) | 546 | evntsel = P4_ESCR_EVENT_SELECT(0x3F) |
547 | | P4_ESCR_OS | 547 | | P4_ESCR_OS |
548 | | P4_ESCR_USR; | 548 | | P4_ESCR_USR; |
549 | 549 | ||
550 | cccr_val |= P4_CCCR_THRESHOLD(15) | 550 | cccr_val |= P4_CCCR_THRESHOLD(15) |
@@ -612,7 +612,7 @@ static void __kprobes p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) | |||
612 | { | 612 | { |
613 | unsigned dummy; | 613 | unsigned dummy; |
614 | /* | 614 | /* |
615 | * P4 quirks: | 615 | * P4 quirks: |
616 | * - An overflown perfctr will assert its interrupt | 616 | * - An overflown perfctr will assert its interrupt |
617 | * until the OVF flag in its CCCR is cleared. | 617 | * until the OVF flag in its CCCR is cleared. |
618 | * - LVTPC is masked on interrupt and must be | 618 | * - LVTPC is masked on interrupt and must be |
@@ -662,7 +662,8 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz) | |||
662 | * NOTE: Corresponding bit = 0 in ebx indicates event present. | 662 | * NOTE: Corresponding bit = 0 in ebx indicates event present. |
663 | */ | 663 | */ |
664 | cpuid(10, &(eax.full), &ebx, &unused, &unused); | 664 | cpuid(10, &(eax.full), &ebx, &unused, &unused); |
665 | if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || | 665 | if ((eax.split.mask_length < |
666 | (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || | ||
666 | (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | 667 | (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) |
667 | return 0; | 668 | return 0; |
668 | 669 | ||
diff --git a/arch/x86/kernel/cpu/proc.c b/arch/x86/kernel/cpu/proc.c index d5e30397246b..62ac8cb6ba27 100644 --- a/arch/x86/kernel/cpu/proc.c +++ b/arch/x86/kernel/cpu/proc.c | |||
@@ -116,11 +116,9 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
116 | seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize); | 116 | seq_printf(m, "TLB size\t: %d 4K pages\n", c->x86_tlbsize); |
117 | #endif | 117 | #endif |
118 | seq_printf(m, "clflush size\t: %u\n", c->x86_clflush_size); | 118 | seq_printf(m, "clflush size\t: %u\n", c->x86_clflush_size); |
119 | #ifdef CONFIG_X86_64 | ||
120 | seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment); | 119 | seq_printf(m, "cache_alignment\t: %d\n", c->x86_cache_alignment); |
121 | seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n", | 120 | seq_printf(m, "address sizes\t: %u bits physical, %u bits virtual\n", |
122 | c->x86_phys_bits, c->x86_virt_bits); | 121 | c->x86_phys_bits, c->x86_virt_bits); |
123 | #endif | ||
124 | 122 | ||
125 | seq_printf(m, "power management:"); | 123 | seq_printf(m, "power management:"); |
126 | for (i = 0; i < 32; i++) { | 124 | for (i = 0; i < 32; i++) { |
@@ -128,7 +126,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
128 | if (i < ARRAY_SIZE(x86_power_flags) && | 126 | if (i < ARRAY_SIZE(x86_power_flags) && |
129 | x86_power_flags[i]) | 127 | x86_power_flags[i]) |
130 | seq_printf(m, "%s%s", | 128 | seq_printf(m, "%s%s", |
131 | x86_power_flags[i][0]?" ":"", | 129 | x86_power_flags[i][0] ? " " : "", |
132 | x86_power_flags[i]); | 130 | x86_power_flags[i]); |
133 | else | 131 | else |
134 | seq_printf(m, " [%d]", i); | 132 | seq_printf(m, " [%d]", i); |
diff --git a/arch/x86/kernel/cpu/sched.c b/arch/x86/kernel/cpu/sched.c new file mode 100644 index 000000000000..a640ae5ad201 --- /dev/null +++ b/arch/x86/kernel/cpu/sched.c | |||
@@ -0,0 +1,55 @@ | |||
1 | #include <linux/sched.h> | ||
2 | #include <linux/math64.h> | ||
3 | #include <linux/percpu.h> | ||
4 | #include <linux/irqflags.h> | ||
5 | |||
6 | #include <asm/cpufeature.h> | ||
7 | #include <asm/processor.h> | ||
8 | |||
9 | #ifdef CONFIG_SMP | ||
10 | |||
11 | static DEFINE_PER_CPU(struct aperfmperf, old_perf_sched); | ||
12 | |||
13 | static unsigned long scale_aperfmperf(void) | ||
14 | { | ||
15 | struct aperfmperf val, *old = &__get_cpu_var(old_perf_sched); | ||
16 | unsigned long ratio, flags; | ||
17 | |||
18 | local_irq_save(flags); | ||
19 | get_aperfmperf(&val); | ||
20 | local_irq_restore(flags); | ||
21 | |||
22 | ratio = calc_aperfmperf_ratio(old, &val); | ||
23 | *old = val; | ||
24 | |||
25 | return ratio; | ||
26 | } | ||
27 | |||
28 | unsigned long arch_scale_freq_power(struct sched_domain *sd, int cpu) | ||
29 | { | ||
30 | /* | ||
31 | * do aperf/mperf on the cpu level because it includes things | ||
32 | * like turbo mode, which are relevant to full cores. | ||
33 | */ | ||
34 | if (boot_cpu_has(X86_FEATURE_APERFMPERF)) | ||
35 | return scale_aperfmperf(); | ||
36 | |||
37 | /* | ||
38 | * maybe have something cpufreq here | ||
39 | */ | ||
40 | |||
41 | return default_scale_freq_power(sd, cpu); | ||
42 | } | ||
43 | |||
44 | unsigned long arch_scale_smt_power(struct sched_domain *sd, int cpu) | ||
45 | { | ||
46 | /* | ||
47 | * aperf/mperf already includes the smt gain | ||
48 | */ | ||
49 | if (boot_cpu_has(X86_FEATURE_APERFMPERF)) | ||
50 | return SCHED_LOAD_SCALE; | ||
51 | |||
52 | return default_scale_smt_power(sd, cpu); | ||
53 | } | ||
54 | |||
55 | #endif | ||
diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 284c399e3234..0a46b4df5d80 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include <linux/dmi.h> | 24 | #include <linux/dmi.h> |
25 | #include <asm/div64.h> | 25 | #include <asm/div64.h> |
26 | #include <asm/vmware.h> | 26 | #include <asm/vmware.h> |
27 | #include <asm/x86_init.h> | ||
27 | 28 | ||
28 | #define CPUID_VMWARE_INFO_LEAF 0x40000000 | 29 | #define CPUID_VMWARE_INFO_LEAF 0x40000000 |
29 | #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 | 30 | #define VMWARE_HYPERVISOR_MAGIC 0x564D5868 |
@@ -47,19 +48,27 @@ static inline int __vmware_platform(void) | |||
47 | return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC; | 48 | return eax != (uint32_t)-1 && ebx == VMWARE_HYPERVISOR_MAGIC; |
48 | } | 49 | } |
49 | 50 | ||
50 | static unsigned long __vmware_get_tsc_khz(void) | 51 | static unsigned long vmware_get_tsc_khz(void) |
51 | { | 52 | { |
52 | uint64_t tsc_hz; | 53 | uint64_t tsc_hz; |
53 | uint32_t eax, ebx, ecx, edx; | 54 | uint32_t eax, ebx, ecx, edx; |
55 | |||
56 | VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); | ||
57 | |||
58 | tsc_hz = eax | (((uint64_t)ebx) << 32); | ||
59 | do_div(tsc_hz, 1000); | ||
60 | BUG_ON(tsc_hz >> 32); | ||
61 | return tsc_hz; | ||
62 | } | ||
63 | |||
64 | void __init vmware_platform_setup(void) | ||
65 | { | ||
66 | uint32_t eax, ebx, ecx, edx; | ||
54 | 67 | ||
55 | VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); | 68 | VMWARE_PORT(GETHZ, eax, ebx, ecx, edx); |
56 | 69 | ||
57 | if (ebx == UINT_MAX) | 70 | if (ebx != UINT_MAX) |
58 | return 0; | 71 | x86_platform.calibrate_tsc = vmware_get_tsc_khz; |
59 | tsc_hz = eax | (((uint64_t)ebx) << 32); | ||
60 | do_div(tsc_hz, 1000); | ||
61 | BUG_ON(tsc_hz >> 32); | ||
62 | return tsc_hz; | ||
63 | } | 72 | } |
64 | 73 | ||
65 | /* | 74 | /* |
@@ -87,12 +96,6 @@ int vmware_platform(void) | |||
87 | return 0; | 96 | return 0; |
88 | } | 97 | } |
89 | 98 | ||
90 | unsigned long vmware_get_tsc_khz(void) | ||
91 | { | ||
92 | BUG_ON(!vmware_platform()); | ||
93 | return __vmware_get_tsc_khz(); | ||
94 | } | ||
95 | |||
96 | /* | 99 | /* |
97 | * VMware hypervisor takes care of exporting a reliable TSC to the guest. | 100 | * VMware hypervisor takes care of exporting a reliable TSC to the guest. |
98 | * Still, due to timing difference when running on virtual cpus, the TSC can | 101 | * Still, due to timing difference when running on virtual cpus, the TSC can |
diff --git a/arch/x86/kernel/doublefault_32.c b/arch/x86/kernel/doublefault_32.c index b4f14c6c09d9..37250fe490b1 100644 --- a/arch/x86/kernel/doublefault_32.c +++ b/arch/x86/kernel/doublefault_32.c | |||
@@ -27,9 +27,7 @@ static void doublefault_fn(void) | |||
27 | 27 | ||
28 | if (ptr_ok(gdt)) { | 28 | if (ptr_ok(gdt)) { |
29 | gdt += GDT_ENTRY_TSS << 3; | 29 | gdt += GDT_ENTRY_TSS << 3; |
30 | tss = *(u16 *)(gdt+2); | 30 | tss = get_desc_base((struct desc_struct *)gdt); |
31 | tss += *(u8 *)(gdt+4) << 16; | ||
32 | tss += *(u8 *)(gdt+7) << 24; | ||
33 | printk(KERN_EMERG "double fault, tss at %08lx\n", tss); | 31 | printk(KERN_EMERG "double fault, tss at %08lx\n", tss); |
34 | 32 | ||
35 | if (ptr_ok(tss)) { | 33 | if (ptr_ok(tss)) { |
diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c index 48bfe1386038..ef42a038f1a6 100644 --- a/arch/x86/kernel/ds.c +++ b/arch/x86/kernel/ds.c | |||
@@ -509,15 +509,15 @@ enum bts_field { | |||
509 | bts_escape = ((unsigned long)-1 & ~bts_qual_mask) | 509 | bts_escape = ((unsigned long)-1 & ~bts_qual_mask) |
510 | }; | 510 | }; |
511 | 511 | ||
512 | static inline unsigned long bts_get(const char *base, enum bts_field field) | 512 | static inline unsigned long bts_get(const char *base, unsigned long field) |
513 | { | 513 | { |
514 | base += (ds_cfg.sizeof_ptr_field * field); | 514 | base += (ds_cfg.sizeof_ptr_field * field); |
515 | return *(unsigned long *)base; | 515 | return *(unsigned long *)base; |
516 | } | 516 | } |
517 | 517 | ||
518 | static inline void bts_set(char *base, enum bts_field field, unsigned long val) | 518 | static inline void bts_set(char *base, unsigned long field, unsigned long val) |
519 | { | 519 | { |
520 | base += (ds_cfg.sizeof_ptr_field * field);; | 520 | base += (ds_cfg.sizeof_ptr_field * field); |
521 | (*(unsigned long *)base) = val; | 521 | (*(unsigned long *)base) = val; |
522 | } | 522 | } |
523 | 523 | ||
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index c8405718a4c3..2d8a371d4339 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c | |||
@@ -15,7 +15,6 @@ | |||
15 | #include <linux/bug.h> | 15 | #include <linux/bug.h> |
16 | #include <linux/nmi.h> | 16 | #include <linux/nmi.h> |
17 | #include <linux/sysfs.h> | 17 | #include <linux/sysfs.h> |
18 | #include <linux/ftrace.h> | ||
19 | 18 | ||
20 | #include <asm/stacktrace.h> | 19 | #include <asm/stacktrace.h> |
21 | 20 | ||
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 5cb5725b2bae..a3210ce1eccd 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c | |||
@@ -115,7 +115,7 @@ static void __init __e820_add_region(struct e820map *e820x, u64 start, u64 size, | |||
115 | { | 115 | { |
116 | int x = e820x->nr_map; | 116 | int x = e820x->nr_map; |
117 | 117 | ||
118 | if (x == ARRAY_SIZE(e820x->map)) { | 118 | if (x >= ARRAY_SIZE(e820x->map)) { |
119 | printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); | 119 | printk(KERN_ERR "Ooops! Too many entries in the memory map!\n"); |
120 | return; | 120 | return; |
121 | } | 121 | } |
@@ -1455,28 +1455,11 @@ char *__init default_machine_specific_memory_setup(void) | |||
1455 | return who; | 1455 | return who; |
1456 | } | 1456 | } |
1457 | 1457 | ||
1458 | char *__init __attribute__((weak)) machine_specific_memory_setup(void) | ||
1459 | { | ||
1460 | if (x86_quirks->arch_memory_setup) { | ||
1461 | char *who = x86_quirks->arch_memory_setup(); | ||
1462 | |||
1463 | if (who) | ||
1464 | return who; | ||
1465 | } | ||
1466 | return default_machine_specific_memory_setup(); | ||
1467 | } | ||
1468 | |||
1469 | /* Overridden in paravirt.c if CONFIG_PARAVIRT */ | ||
1470 | char * __init __attribute__((weak)) memory_setup(void) | ||
1471 | { | ||
1472 | return machine_specific_memory_setup(); | ||
1473 | } | ||
1474 | |||
1475 | void __init setup_memory_map(void) | 1458 | void __init setup_memory_map(void) |
1476 | { | 1459 | { |
1477 | char *who; | 1460 | char *who; |
1478 | 1461 | ||
1479 | who = memory_setup(); | 1462 | who = x86_init.resources.memory_setup(); |
1480 | memcpy(&e820_saved, &e820, sizeof(struct e820map)); | 1463 | memcpy(&e820_saved, &e820, sizeof(struct e820map)); |
1481 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); | 1464 | printk(KERN_INFO "BIOS-provided physical RAM map:\n"); |
1482 | e820_print_map(who); | 1465 | e820_print_map(who); |
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index fe26ba3e3451..ad5bd988fb79 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #include <asm/time.h> | 42 | #include <asm/time.h> |
43 | #include <asm/cacheflush.h> | 43 | #include <asm/cacheflush.h> |
44 | #include <asm/tlbflush.h> | 44 | #include <asm/tlbflush.h> |
45 | #include <asm/x86_init.h> | ||
45 | 46 | ||
46 | #define EFI_DEBUG 1 | 47 | #define EFI_DEBUG 1 |
47 | #define PFX "EFI: " | 48 | #define PFX "EFI: " |
@@ -453,6 +454,9 @@ void __init efi_init(void) | |||
453 | if (add_efi_memmap) | 454 | if (add_efi_memmap) |
454 | do_add_efi_memmap(); | 455 | do_add_efi_memmap(); |
455 | 456 | ||
457 | x86_platform.get_wallclock = efi_get_time; | ||
458 | x86_platform.set_wallclock = efi_set_rtc_mmss; | ||
459 | |||
456 | /* Setup for EFI runtime service */ | 460 | /* Setup for EFI runtime service */ |
457 | reboot_type = BOOT_EFI; | 461 | reboot_type = BOOT_EFI; |
458 | 462 | ||
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index c251be745107..d59fe323807e 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S | |||
@@ -146,7 +146,7 @@ ENTRY(ftrace_graph_caller) | |||
146 | END(ftrace_graph_caller) | 146 | END(ftrace_graph_caller) |
147 | 147 | ||
148 | GLOBAL(return_to_handler) | 148 | GLOBAL(return_to_handler) |
149 | subq $80, %rsp | 149 | subq $24, %rsp |
150 | 150 | ||
151 | /* Save the return values */ | 151 | /* Save the return values */ |
152 | movq %rax, (%rsp) | 152 | movq %rax, (%rsp) |
@@ -155,10 +155,10 @@ GLOBAL(return_to_handler) | |||
155 | 155 | ||
156 | call ftrace_return_to_handler | 156 | call ftrace_return_to_handler |
157 | 157 | ||
158 | movq %rax, 72(%rsp) | 158 | movq %rax, 16(%rsp) |
159 | movq 8(%rsp), %rdx | 159 | movq 8(%rsp), %rdx |
160 | movq (%rsp), %rax | 160 | movq (%rsp), %rax |
161 | addq $72, %rsp | 161 | addq $16, %rsp |
162 | retq | 162 | retq |
163 | #endif | 163 | #endif |
164 | 164 | ||
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index d94e1ea3b9fe..9dbb527e1652 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c | |||
@@ -417,10 +417,6 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr, | |||
417 | unsigned long return_hooker = (unsigned long) | 417 | unsigned long return_hooker = (unsigned long) |
418 | &return_to_handler; | 418 | &return_to_handler; |
419 | 419 | ||
420 | /* Nmi's are currently unsupported */ | ||
421 | if (unlikely(in_nmi())) | ||
422 | return; | ||
423 | |||
424 | if (unlikely(atomic_read(¤t->tracing_graph_pause))) | 420 | if (unlikely(atomic_read(¤t->tracing_graph_pause))) |
425 | return; | 421 | return; |
426 | 422 | ||
@@ -498,37 +494,56 @@ static struct syscall_metadata *find_syscall_meta(unsigned long *syscall) | |||
498 | 494 | ||
499 | struct syscall_metadata *syscall_nr_to_meta(int nr) | 495 | struct syscall_metadata *syscall_nr_to_meta(int nr) |
500 | { | 496 | { |
501 | if (!syscalls_metadata || nr >= FTRACE_SYSCALL_MAX || nr < 0) | 497 | if (!syscalls_metadata || nr >= NR_syscalls || nr < 0) |
502 | return NULL; | 498 | return NULL; |
503 | 499 | ||
504 | return syscalls_metadata[nr]; | 500 | return syscalls_metadata[nr]; |
505 | } | 501 | } |
506 | 502 | ||
507 | void arch_init_ftrace_syscalls(void) | 503 | int syscall_name_to_nr(char *name) |
504 | { | ||
505 | int i; | ||
506 | |||
507 | if (!syscalls_metadata) | ||
508 | return -1; | ||
509 | |||
510 | for (i = 0; i < NR_syscalls; i++) { | ||
511 | if (syscalls_metadata[i]) { | ||
512 | if (!strcmp(syscalls_metadata[i]->name, name)) | ||
513 | return i; | ||
514 | } | ||
515 | } | ||
516 | return -1; | ||
517 | } | ||
518 | |||
519 | void set_syscall_enter_id(int num, int id) | ||
520 | { | ||
521 | syscalls_metadata[num]->enter_id = id; | ||
522 | } | ||
523 | |||
524 | void set_syscall_exit_id(int num, int id) | ||
525 | { | ||
526 | syscalls_metadata[num]->exit_id = id; | ||
527 | } | ||
528 | |||
529 | static int __init arch_init_ftrace_syscalls(void) | ||
508 | { | 530 | { |
509 | int i; | 531 | int i; |
510 | struct syscall_metadata *meta; | 532 | struct syscall_metadata *meta; |
511 | unsigned long **psys_syscall_table = &sys_call_table; | 533 | unsigned long **psys_syscall_table = &sys_call_table; |
512 | static atomic_t refs; | ||
513 | |||
514 | if (atomic_inc_return(&refs) != 1) | ||
515 | goto end; | ||
516 | 534 | ||
517 | syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * | 535 | syscalls_metadata = kzalloc(sizeof(*syscalls_metadata) * |
518 | FTRACE_SYSCALL_MAX, GFP_KERNEL); | 536 | NR_syscalls, GFP_KERNEL); |
519 | if (!syscalls_metadata) { | 537 | if (!syscalls_metadata) { |
520 | WARN_ON(1); | 538 | WARN_ON(1); |
521 | return; | 539 | return -ENOMEM; |
522 | } | 540 | } |
523 | 541 | ||
524 | for (i = 0; i < FTRACE_SYSCALL_MAX; i++) { | 542 | for (i = 0; i < NR_syscalls; i++) { |
525 | meta = find_syscall_meta(psys_syscall_table[i]); | 543 | meta = find_syscall_meta(psys_syscall_table[i]); |
526 | syscalls_metadata[i] = meta; | 544 | syscalls_metadata[i] = meta; |
527 | } | 545 | } |
528 | return; | 546 | return 0; |
529 | |||
530 | /* Paranoid: avoid overflow */ | ||
531 | end: | ||
532 | atomic_dec(&refs); | ||
533 | } | 547 | } |
548 | arch_initcall(arch_init_ftrace_syscalls); | ||
534 | #endif | 549 | #endif |
diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index 3f8579f8d42c..4f8e2507e8f3 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c | |||
@@ -11,8 +11,21 @@ | |||
11 | #include <asm/setup.h> | 11 | #include <asm/setup.h> |
12 | #include <asm/sections.h> | 12 | #include <asm/sections.h> |
13 | #include <asm/e820.h> | 13 | #include <asm/e820.h> |
14 | #include <asm/bios_ebda.h> | 14 | #include <asm/page.h> |
15 | #include <asm/trampoline.h> | 15 | #include <asm/trampoline.h> |
16 | #include <asm/apic.h> | ||
17 | #include <asm/io_apic.h> | ||
18 | #include <asm/bios_ebda.h> | ||
19 | |||
20 | static void __init i386_default_early_setup(void) | ||
21 | { | ||
22 | /* Initilize 32bit specific setup functions */ | ||
23 | x86_init.resources.probe_roms = probe_roms; | ||
24 | x86_init.resources.reserve_resources = i386_reserve_resources; | ||
25 | x86_init.mpparse.setup_ioapic_ids = setup_ioapic_ids_from_mpc; | ||
26 | |||
27 | reserve_ebda_region(); | ||
28 | } | ||
16 | 29 | ||
17 | void __init i386_start_kernel(void) | 30 | void __init i386_start_kernel(void) |
18 | { | 31 | { |
@@ -29,7 +42,16 @@ void __init i386_start_kernel(void) | |||
29 | reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); | 42 | reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); |
30 | } | 43 | } |
31 | #endif | 44 | #endif |
32 | reserve_ebda_region(); | 45 | |
46 | /* Call the subarch specific early setup function */ | ||
47 | switch (boot_params.hdr.hardware_subarch) { | ||
48 | case X86_SUBARCH_MRST: | ||
49 | x86_mrst_early_setup(); | ||
50 | break; | ||
51 | default: | ||
52 | i386_default_early_setup(); | ||
53 | break; | ||
54 | } | ||
33 | 55 | ||
34 | /* | 56 | /* |
35 | * At this point everything still needed from the boot loader | 57 | * At this point everything still needed from the boot loader |
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 70eaa852c732..0b06cd778fd9 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c | |||
@@ -23,8 +23,8 @@ | |||
23 | #include <asm/sections.h> | 23 | #include <asm/sections.h> |
24 | #include <asm/kdebug.h> | 24 | #include <asm/kdebug.h> |
25 | #include <asm/e820.h> | 25 | #include <asm/e820.h> |
26 | #include <asm/bios_ebda.h> | ||
27 | #include <asm/trampoline.h> | 26 | #include <asm/trampoline.h> |
27 | #include <asm/bios_ebda.h> | ||
28 | 28 | ||
29 | static void __init zap_identity_mappings(void) | 29 | static void __init zap_identity_mappings(void) |
30 | { | 30 | { |
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index cc827ac9e8d3..b766e8c7252d 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S | |||
@@ -157,6 +157,7 @@ subarch_entries: | |||
157 | .long default_entry /* normal x86/PC */ | 157 | .long default_entry /* normal x86/PC */ |
158 | .long lguest_entry /* lguest hypervisor */ | 158 | .long lguest_entry /* lguest hypervisor */ |
159 | .long xen_entry /* Xen hypervisor */ | 159 | .long xen_entry /* Xen hypervisor */ |
160 | .long default_entry /* Moorestown MID */ | ||
160 | num_subarch_entries = (. - subarch_entries) / 4 | 161 | num_subarch_entries = (. - subarch_entries) / 4 |
161 | .previous | 162 | .previous |
162 | #endif /* CONFIG_PARAVIRT */ | 163 | #endif /* CONFIG_PARAVIRT */ |
@@ -439,7 +440,6 @@ is386: movl $2,%ecx # set MP | |||
439 | jne 1f | 440 | jne 1f |
440 | movl $per_cpu__gdt_page,%eax | 441 | movl $per_cpu__gdt_page,%eax |
441 | movl $per_cpu__stack_canary,%ecx | 442 | movl $per_cpu__stack_canary,%ecx |
442 | subl $20, %ecx | ||
443 | movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) | 443 | movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax) |
444 | shrl $16, %ecx | 444 | shrl $16, %ecx |
445 | movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) | 445 | movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax) |
diff --git a/arch/x86/kernel/i8253.c b/arch/x86/kernel/i8253.c index 5cf36c053ac4..23c167925a5c 100644 --- a/arch/x86/kernel/i8253.c +++ b/arch/x86/kernel/i8253.c | |||
@@ -19,12 +19,6 @@ | |||
19 | DEFINE_SPINLOCK(i8253_lock); | 19 | DEFINE_SPINLOCK(i8253_lock); |
20 | EXPORT_SYMBOL(i8253_lock); | 20 | EXPORT_SYMBOL(i8253_lock); |
21 | 21 | ||
22 | #ifdef CONFIG_X86_32 | ||
23 | static void pit_disable_clocksource(void); | ||
24 | #else | ||
25 | static inline void pit_disable_clocksource(void) { } | ||
26 | #endif | ||
27 | |||
28 | /* | 22 | /* |
29 | * HPET replaces the PIT, when enabled. So we need to know, which of | 23 | * HPET replaces the PIT, when enabled. So we need to know, which of |
30 | * the two timers is used | 24 | * the two timers is used |
@@ -57,12 +51,10 @@ static void init_pit_timer(enum clock_event_mode mode, | |||
57 | outb_pit(0, PIT_CH0); | 51 | outb_pit(0, PIT_CH0); |
58 | outb_pit(0, PIT_CH0); | 52 | outb_pit(0, PIT_CH0); |
59 | } | 53 | } |
60 | pit_disable_clocksource(); | ||
61 | break; | 54 | break; |
62 | 55 | ||
63 | case CLOCK_EVT_MODE_ONESHOT: | 56 | case CLOCK_EVT_MODE_ONESHOT: |
64 | /* One shot setup */ | 57 | /* One shot setup */ |
65 | pit_disable_clocksource(); | ||
66 | outb_pit(0x38, PIT_MODE); | 58 | outb_pit(0x38, PIT_MODE); |
67 | break; | 59 | break; |
68 | 60 | ||
@@ -200,17 +192,6 @@ static struct clocksource pit_cs = { | |||
200 | .shift = 20, | 192 | .shift = 20, |
201 | }; | 193 | }; |
202 | 194 | ||
203 | static void pit_disable_clocksource(void) | ||
204 | { | ||
205 | /* | ||
206 | * Use mult to check whether it is registered or not | ||
207 | */ | ||
208 | if (pit_cs.mult) { | ||
209 | clocksource_unregister(&pit_cs); | ||
210 | pit_cs.mult = 0; | ||
211 | } | ||
212 | } | ||
213 | |||
214 | static int __init init_pit_clocksource(void) | 195 | static int __init init_pit_clocksource(void) |
215 | { | 196 | { |
216 | /* | 197 | /* |
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index b0cdde6932f5..74656d1d4e30 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c | |||
@@ -104,7 +104,7 @@ static int show_other_interrupts(struct seq_file *p, int prec) | |||
104 | seq_printf(p, " Threshold APIC interrupts\n"); | 104 | seq_printf(p, " Threshold APIC interrupts\n"); |
105 | # endif | 105 | # endif |
106 | #endif | 106 | #endif |
107 | #ifdef CONFIG_X86_NEW_MCE | 107 | #ifdef CONFIG_X86_MCE |
108 | seq_printf(p, "%*s: ", prec, "MCE"); | 108 | seq_printf(p, "%*s: ", prec, "MCE"); |
109 | for_each_online_cpu(j) | 109 | for_each_online_cpu(j) |
110 | seq_printf(p, "%10u ", per_cpu(mce_exception_count, j)); | 110 | seq_printf(p, "%10u ", per_cpu(mce_exception_count, j)); |
@@ -200,7 +200,7 @@ u64 arch_irq_stat_cpu(unsigned int cpu) | |||
200 | sum += irq_stats(cpu)->irq_threshold_count; | 200 | sum += irq_stats(cpu)->irq_threshold_count; |
201 | # endif | 201 | # endif |
202 | #endif | 202 | #endif |
203 | #ifdef CONFIG_X86_NEW_MCE | 203 | #ifdef CONFIG_X86_MCE |
204 | sum += per_cpu(mce_exception_count, cpu); | 204 | sum += per_cpu(mce_exception_count, cpu); |
205 | sum += per_cpu(mce_poll_count, cpu); | 205 | sum += per_cpu(mce_poll_count, cpu); |
206 | #endif | 206 | #endif |
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c index 3b09634a5153..7d35d0fe2329 100644 --- a/arch/x86/kernel/irq_32.c +++ b/arch/x86/kernel/irq_32.c | |||
@@ -218,7 +218,6 @@ bool handle_irq(unsigned irq, struct pt_regs *regs) | |||
218 | void fixup_irqs(void) | 218 | void fixup_irqs(void) |
219 | { | 219 | { |
220 | unsigned int irq; | 220 | unsigned int irq; |
221 | static int warned; | ||
222 | struct irq_desc *desc; | 221 | struct irq_desc *desc; |
223 | 222 | ||
224 | for_each_irq_desc(irq, desc) { | 223 | for_each_irq_desc(irq, desc) { |
@@ -236,8 +235,8 @@ void fixup_irqs(void) | |||
236 | } | 235 | } |
237 | if (desc->chip->set_affinity) | 236 | if (desc->chip->set_affinity) |
238 | desc->chip->set_affinity(irq, affinity); | 237 | desc->chip->set_affinity(irq, affinity); |
239 | else if (desc->action && !(warned++)) | 238 | else if (desc->action) |
240 | printk("Cannot set affinity for irq %i\n", irq); | 239 | printk_once("Cannot set affinity for irq %i\n", irq); |
241 | } | 240 | } |
242 | 241 | ||
243 | #if 0 | 242 | #if 0 |
diff --git a/arch/x86/kernel/irqinit.c b/arch/x86/kernel/irqinit.c index 92b7703d3d58..300883112e3d 100644 --- a/arch/x86/kernel/irqinit.c +++ b/arch/x86/kernel/irqinit.c | |||
@@ -116,7 +116,7 @@ int vector_used_by_percpu_irq(unsigned int vector) | |||
116 | return 0; | 116 | return 0; |
117 | } | 117 | } |
118 | 118 | ||
119 | static void __init init_ISA_irqs(void) | 119 | void __init init_ISA_irqs(void) |
120 | { | 120 | { |
121 | int i; | 121 | int i; |
122 | 122 | ||
@@ -140,8 +140,10 @@ static void __init init_ISA_irqs(void) | |||
140 | } | 140 | } |
141 | } | 141 | } |
142 | 142 | ||
143 | /* Overridden in paravirt.c */ | 143 | void __init init_IRQ(void) |
144 | void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); | 144 | { |
145 | x86_init.irqs.intr_init(); | ||
146 | } | ||
145 | 147 | ||
146 | static void __init smp_intr_init(void) | 148 | static void __init smp_intr_init(void) |
147 | { | 149 | { |
@@ -190,7 +192,7 @@ static void __init apic_intr_init(void) | |||
190 | #ifdef CONFIG_X86_MCE_THRESHOLD | 192 | #ifdef CONFIG_X86_MCE_THRESHOLD |
191 | alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); | 193 | alloc_intr_gate(THRESHOLD_APIC_VECTOR, threshold_interrupt); |
192 | #endif | 194 | #endif |
193 | #if defined(CONFIG_X86_NEW_MCE) && defined(CONFIG_X86_LOCAL_APIC) | 195 | #if defined(CONFIG_X86_MCE) && defined(CONFIG_X86_LOCAL_APIC) |
194 | alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt); | 196 | alloc_intr_gate(MCE_SELF_VECTOR, mce_self_interrupt); |
195 | #endif | 197 | #endif |
196 | 198 | ||
@@ -213,32 +215,12 @@ static void __init apic_intr_init(void) | |||
213 | #endif | 215 | #endif |
214 | } | 216 | } |
215 | 217 | ||
216 | /** | ||
217 | * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors | ||
218 | * | ||
219 | * Description: | ||
220 | * Perform any necessary interrupt initialisation prior to setting up | ||
221 | * the "ordinary" interrupt call gates. For legacy reasons, the ISA | ||
222 | * interrupts should be initialised here if the machine emulates a PC | ||
223 | * in any way. | ||
224 | **/ | ||
225 | static void __init x86_quirk_pre_intr_init(void) | ||
226 | { | ||
227 | #ifdef CONFIG_X86_32 | ||
228 | if (x86_quirks->arch_pre_intr_init) { | ||
229 | if (x86_quirks->arch_pre_intr_init()) | ||
230 | return; | ||
231 | } | ||
232 | #endif | ||
233 | init_ISA_irqs(); | ||
234 | } | ||
235 | |||
236 | void __init native_init_IRQ(void) | 218 | void __init native_init_IRQ(void) |
237 | { | 219 | { |
238 | int i; | 220 | int i; |
239 | 221 | ||
240 | /* Execute any quirks before the call gates are initialised: */ | 222 | /* Execute any quirks before the call gates are initialised: */ |
241 | x86_quirk_pre_intr_init(); | 223 | x86_init.irqs.pre_vector_init(); |
242 | 224 | ||
243 | apic_intr_init(); | 225 | apic_intr_init(); |
244 | 226 | ||
@@ -258,12 +240,6 @@ void __init native_init_IRQ(void) | |||
258 | 240 | ||
259 | #ifdef CONFIG_X86_32 | 241 | #ifdef CONFIG_X86_32 |
260 | /* | 242 | /* |
261 | * Call quirks after call gates are initialised (usually add in | ||
262 | * the architecture specific gates): | ||
263 | */ | ||
264 | x86_quirk_intr_init(); | ||
265 | |||
266 | /* | ||
267 | * External FPU? Set up irq13 if so, for | 243 | * External FPU? Set up irq13 if so, for |
268 | * original braindamaged IBM FERR coupling. | 244 | * original braindamaged IBM FERR coupling. |
269 | */ | 245 | */ |
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index c664d515f613..63b0ec8d3d4a 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c | |||
@@ -34,7 +34,6 @@ | |||
34 | struct kvm_para_state { | 34 | struct kvm_para_state { |
35 | u8 mmu_queue[MMU_QUEUE_SIZE]; | 35 | u8 mmu_queue[MMU_QUEUE_SIZE]; |
36 | int mmu_queue_len; | 36 | int mmu_queue_len; |
37 | enum paravirt_lazy_mode mode; | ||
38 | }; | 37 | }; |
39 | 38 | ||
40 | static DEFINE_PER_CPU(struct kvm_para_state, para_state); | 39 | static DEFINE_PER_CPU(struct kvm_para_state, para_state); |
@@ -77,7 +76,7 @@ static void kvm_deferred_mmu_op(void *buffer, int len) | |||
77 | { | 76 | { |
78 | struct kvm_para_state *state = kvm_para_state(); | 77 | struct kvm_para_state *state = kvm_para_state(); |
79 | 78 | ||
80 | if (state->mode != PARAVIRT_LAZY_MMU) { | 79 | if (paravirt_get_lazy_mode() != PARAVIRT_LAZY_MMU) { |
81 | kvm_mmu_op(buffer, len); | 80 | kvm_mmu_op(buffer, len); |
82 | return; | 81 | return; |
83 | } | 82 | } |
@@ -185,10 +184,7 @@ static void kvm_release_pt(unsigned long pfn) | |||
185 | 184 | ||
186 | static void kvm_enter_lazy_mmu(void) | 185 | static void kvm_enter_lazy_mmu(void) |
187 | { | 186 | { |
188 | struct kvm_para_state *state = kvm_para_state(); | ||
189 | |||
190 | paravirt_enter_lazy_mmu(); | 187 | paravirt_enter_lazy_mmu(); |
191 | state->mode = paravirt_get_lazy_mode(); | ||
192 | } | 188 | } |
193 | 189 | ||
194 | static void kvm_leave_lazy_mmu(void) | 190 | static void kvm_leave_lazy_mmu(void) |
@@ -197,7 +193,6 @@ static void kvm_leave_lazy_mmu(void) | |||
197 | 193 | ||
198 | mmu_queue_flush(state); | 194 | mmu_queue_flush(state); |
199 | paravirt_leave_lazy_mmu(); | 195 | paravirt_leave_lazy_mmu(); |
200 | state->mode = paravirt_get_lazy_mode(); | ||
201 | } | 196 | } |
202 | 197 | ||
203 | static void __init paravirt_ops_setup(void) | 198 | static void __init paravirt_ops_setup(void) |
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 223af43f1526..feaeb0d3aa4f 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c | |||
@@ -22,6 +22,8 @@ | |||
22 | #include <asm/msr.h> | 22 | #include <asm/msr.h> |
23 | #include <asm/apic.h> | 23 | #include <asm/apic.h> |
24 | #include <linux/percpu.h> | 24 | #include <linux/percpu.h> |
25 | |||
26 | #include <asm/x86_init.h> | ||
25 | #include <asm/reboot.h> | 27 | #include <asm/reboot.h> |
26 | 28 | ||
27 | #define KVM_SCALE 22 | 29 | #define KVM_SCALE 22 |
@@ -50,8 +52,8 @@ static unsigned long kvm_get_wallclock(void) | |||
50 | struct timespec ts; | 52 | struct timespec ts; |
51 | int low, high; | 53 | int low, high; |
52 | 54 | ||
53 | low = (int)__pa(&wall_clock); | 55 | low = (int)__pa_symbol(&wall_clock); |
54 | high = ((u64)__pa(&wall_clock) >> 32); | 56 | high = ((u64)__pa_symbol(&wall_clock) >> 32); |
55 | native_write_msr(MSR_KVM_WALL_CLOCK, low, high); | 57 | native_write_msr(MSR_KVM_WALL_CLOCK, low, high); |
56 | 58 | ||
57 | vcpu_time = &get_cpu_var(hv_clock); | 59 | vcpu_time = &get_cpu_var(hv_clock); |
@@ -182,12 +184,13 @@ void __init kvmclock_init(void) | |||
182 | if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { | 184 | if (kvmclock && kvm_para_has_feature(KVM_FEATURE_CLOCKSOURCE)) { |
183 | if (kvm_register_clock("boot clock")) | 185 | if (kvm_register_clock("boot clock")) |
184 | return; | 186 | return; |
185 | pv_time_ops.get_wallclock = kvm_get_wallclock; | ||
186 | pv_time_ops.set_wallclock = kvm_set_wallclock; | ||
187 | pv_time_ops.sched_clock = kvm_clock_read; | 187 | pv_time_ops.sched_clock = kvm_clock_read; |
188 | pv_time_ops.get_tsc_khz = kvm_get_tsc_khz; | 188 | x86_platform.calibrate_tsc = kvm_get_tsc_khz; |
189 | x86_platform.get_wallclock = kvm_get_wallclock; | ||
190 | x86_platform.set_wallclock = kvm_set_wallclock; | ||
189 | #ifdef CONFIG_X86_LOCAL_APIC | 191 | #ifdef CONFIG_X86_LOCAL_APIC |
190 | pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock; | 192 | x86_cpuinit.setup_percpu_clockev = |
193 | kvm_setup_secondary_clock; | ||
191 | #endif | 194 | #endif |
192 | #ifdef CONFIG_SMP | 195 | #ifdef CONFIG_SMP |
193 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; | 196 | smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; |
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 651c93b28862..5be95ef4ffec 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c | |||
@@ -45,6 +45,11 @@ static int __init mpf_checksum(unsigned char *mp, int len) | |||
45 | return sum & 0xFF; | 45 | return sum & 0xFF; |
46 | } | 46 | } |
47 | 47 | ||
48 | int __init default_mpc_apic_id(struct mpc_cpu *m) | ||
49 | { | ||
50 | return m->apicid; | ||
51 | } | ||
52 | |||
48 | static void __init MP_processor_info(struct mpc_cpu *m) | 53 | static void __init MP_processor_info(struct mpc_cpu *m) |
49 | { | 54 | { |
50 | int apicid; | 55 | int apicid; |
@@ -55,10 +60,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) | |||
55 | return; | 60 | return; |
56 | } | 61 | } |
57 | 62 | ||
58 | if (x86_quirks->mpc_apic_id) | 63 | apicid = x86_init.mpparse.mpc_apic_id(m); |
59 | apicid = x86_quirks->mpc_apic_id(m); | ||
60 | else | ||
61 | apicid = m->apicid; | ||
62 | 64 | ||
63 | if (m->cpuflag & CPU_BOOTPROCESSOR) { | 65 | if (m->cpuflag & CPU_BOOTPROCESSOR) { |
64 | bootup_cpu = " (Bootup-CPU)"; | 66 | bootup_cpu = " (Bootup-CPU)"; |
@@ -70,16 +72,18 @@ static void __init MP_processor_info(struct mpc_cpu *m) | |||
70 | } | 72 | } |
71 | 73 | ||
72 | #ifdef CONFIG_X86_IO_APIC | 74 | #ifdef CONFIG_X86_IO_APIC |
73 | static void __init MP_bus_info(struct mpc_bus *m) | 75 | void __init default_mpc_oem_bus_info(struct mpc_bus *m, char *str) |
74 | { | 76 | { |
75 | char str[7]; | ||
76 | memcpy(str, m->bustype, 6); | 77 | memcpy(str, m->bustype, 6); |
77 | str[6] = 0; | 78 | str[6] = 0; |
79 | apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->busid, str); | ||
80 | } | ||
78 | 81 | ||
79 | if (x86_quirks->mpc_oem_bus_info) | 82 | static void __init MP_bus_info(struct mpc_bus *m) |
80 | x86_quirks->mpc_oem_bus_info(m, str); | 83 | { |
81 | else | 84 | char str[7]; |
82 | apic_printk(APIC_VERBOSE, "Bus #%d is %s\n", m->busid, str); | 85 | |
86 | x86_init.mpparse.mpc_oem_bus_info(m, str); | ||
83 | 87 | ||
84 | #if MAX_MP_BUSSES < 256 | 88 | #if MAX_MP_BUSSES < 256 |
85 | if (m->busid >= MAX_MP_BUSSES) { | 89 | if (m->busid >= MAX_MP_BUSSES) { |
@@ -96,8 +100,8 @@ static void __init MP_bus_info(struct mpc_bus *m) | |||
96 | mp_bus_id_to_type[m->busid] = MP_BUS_ISA; | 100 | mp_bus_id_to_type[m->busid] = MP_BUS_ISA; |
97 | #endif | 101 | #endif |
98 | } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { | 102 | } else if (strncmp(str, BUSTYPE_PCI, sizeof(BUSTYPE_PCI) - 1) == 0) { |
99 | if (x86_quirks->mpc_oem_pci_bus) | 103 | if (x86_init.mpparse.mpc_oem_pci_bus) |
100 | x86_quirks->mpc_oem_pci_bus(m); | 104 | x86_init.mpparse.mpc_oem_pci_bus(m); |
101 | 105 | ||
102 | clear_bit(m->busid, mp_bus_not_pci); | 106 | clear_bit(m->busid, mp_bus_not_pci); |
103 | #if defined(CONFIG_EISA) || defined(CONFIG_MCA) | 107 | #if defined(CONFIG_EISA) || defined(CONFIG_MCA) |
@@ -291,6 +295,8 @@ static void __init smp_dump_mptable(struct mpc_table *mpc, unsigned char *mpt) | |||
291 | 1, mpc, mpc->length, 1); | 295 | 1, mpc, mpc->length, 1); |
292 | } | 296 | } |
293 | 297 | ||
298 | void __init default_smp_read_mpc_oem(struct mpc_table *mpc) { } | ||
299 | |||
294 | static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) | 300 | static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) |
295 | { | 301 | { |
296 | char str[16]; | 302 | char str[16]; |
@@ -312,16 +318,13 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) | |||
312 | if (early) | 318 | if (early) |
313 | return 1; | 319 | return 1; |
314 | 320 | ||
315 | if (mpc->oemptr && x86_quirks->smp_read_mpc_oem) { | 321 | if (mpc->oemptr) |
316 | struct mpc_oemtable *oem_table = (void *)(long)mpc->oemptr; | 322 | x86_init.mpparse.smp_read_mpc_oem(mpc); |
317 | x86_quirks->smp_read_mpc_oem(oem_table, mpc->oemsize); | ||
318 | } | ||
319 | 323 | ||
320 | /* | 324 | /* |
321 | * Now process the configuration blocks. | 325 | * Now process the configuration blocks. |
322 | */ | 326 | */ |
323 | if (x86_quirks->mpc_record) | 327 | x86_init.mpparse.mpc_record(0); |
324 | *x86_quirks->mpc_record = 0; | ||
325 | 328 | ||
326 | while (count < mpc->length) { | 329 | while (count < mpc->length) { |
327 | switch (*mpt) { | 330 | switch (*mpt) { |
@@ -353,8 +356,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) | |||
353 | count = mpc->length; | 356 | count = mpc->length; |
354 | break; | 357 | break; |
355 | } | 358 | } |
356 | if (x86_quirks->mpc_record) | 359 | x86_init.mpparse.mpc_record(1); |
357 | (*x86_quirks->mpc_record)++; | ||
358 | } | 360 | } |
359 | 361 | ||
360 | #ifdef CONFIG_X86_BIGSMP | 362 | #ifdef CONFIG_X86_BIGSMP |
@@ -482,11 +484,11 @@ static void __init construct_ioapic_table(int mpc_default_type) | |||
482 | MP_bus_info(&bus); | 484 | MP_bus_info(&bus); |
483 | } | 485 | } |
484 | 486 | ||
485 | ioapic.type = MP_IOAPIC; | 487 | ioapic.type = MP_IOAPIC; |
486 | ioapic.apicid = 2; | 488 | ioapic.apicid = 2; |
487 | ioapic.apicver = mpc_default_type > 4 ? 0x10 : 0x01; | 489 | ioapic.apicver = mpc_default_type > 4 ? 0x10 : 0x01; |
488 | ioapic.flags = MPC_APIC_USABLE; | 490 | ioapic.flags = MPC_APIC_USABLE; |
489 | ioapic.apicaddr = 0xFEC00000; | 491 | ioapic.apicaddr = IO_APIC_DEFAULT_PHYS_BASE; |
490 | MP_ioapic_info(&ioapic); | 492 | MP_ioapic_info(&ioapic); |
491 | 493 | ||
492 | /* | 494 | /* |
@@ -608,7 +610,7 @@ static int __init check_physptr(struct mpf_intel *mpf, unsigned int early) | |||
608 | /* | 610 | /* |
609 | * Scan the memory blocks for an SMP configuration block. | 611 | * Scan the memory blocks for an SMP configuration block. |
610 | */ | 612 | */ |
611 | static void __init __get_smp_config(unsigned int early) | 613 | void __init default_get_smp_config(unsigned int early) |
612 | { | 614 | { |
613 | struct mpf_intel *mpf = mpf_found; | 615 | struct mpf_intel *mpf = mpf_found; |
614 | 616 | ||
@@ -625,11 +627,6 @@ static void __init __get_smp_config(unsigned int early) | |||
625 | if (acpi_lapic && acpi_ioapic) | 627 | if (acpi_lapic && acpi_ioapic) |
626 | return; | 628 | return; |
627 | 629 | ||
628 | if (x86_quirks->mach_get_smp_config) { | ||
629 | if (x86_quirks->mach_get_smp_config(early)) | ||
630 | return; | ||
631 | } | ||
632 | |||
633 | printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", | 630 | printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", |
634 | mpf->specification); | 631 | mpf->specification); |
635 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) | 632 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) |
@@ -670,16 +667,6 @@ static void __init __get_smp_config(unsigned int early) | |||
670 | */ | 667 | */ |
671 | } | 668 | } |
672 | 669 | ||
673 | void __init early_get_smp_config(void) | ||
674 | { | ||
675 | __get_smp_config(1); | ||
676 | } | ||
677 | |||
678 | void __init get_smp_config(void) | ||
679 | { | ||
680 | __get_smp_config(0); | ||
681 | } | ||
682 | |||
683 | static void __init smp_reserve_bootmem(struct mpf_intel *mpf) | 670 | static void __init smp_reserve_bootmem(struct mpf_intel *mpf) |
684 | { | 671 | { |
685 | unsigned long size = get_mpc_size(mpf->physptr); | 672 | unsigned long size = get_mpc_size(mpf->physptr); |
@@ -745,14 +732,10 @@ static int __init smp_scan_config(unsigned long base, unsigned long length, | |||
745 | return 0; | 732 | return 0; |
746 | } | 733 | } |
747 | 734 | ||
748 | static void __init __find_smp_config(unsigned int reserve) | 735 | void __init default_find_smp_config(unsigned int reserve) |
749 | { | 736 | { |
750 | unsigned int address; | 737 | unsigned int address; |
751 | 738 | ||
752 | if (x86_quirks->mach_find_smp_config) { | ||
753 | if (x86_quirks->mach_find_smp_config(reserve)) | ||
754 | return; | ||
755 | } | ||
756 | /* | 739 | /* |
757 | * FIXME: Linux assumes you have 640K of base ram.. | 740 | * FIXME: Linux assumes you have 640K of base ram.. |
758 | * this continues the error... | 741 | * this continues the error... |
@@ -787,16 +770,6 @@ static void __init __find_smp_config(unsigned int reserve) | |||
787 | smp_scan_config(address, 0x400, reserve); | 770 | smp_scan_config(address, 0x400, reserve); |
788 | } | 771 | } |
789 | 772 | ||
790 | void __init early_find_smp_config(void) | ||
791 | { | ||
792 | __find_smp_config(0); | ||
793 | } | ||
794 | |||
795 | void __init find_smp_config(void) | ||
796 | { | ||
797 | __find_smp_config(1); | ||
798 | } | ||
799 | |||
800 | #ifdef CONFIG_X86_IO_APIC | 773 | #ifdef CONFIG_X86_IO_APIC |
801 | static u8 __initdata irq_used[MAX_IRQ_SOURCES]; | 774 | static u8 __initdata irq_used[MAX_IRQ_SOURCES]; |
802 | 775 | ||
diff --git a/arch/x86/kernel/mrst.c b/arch/x86/kernel/mrst.c new file mode 100644 index 000000000000..3b7078abc871 --- /dev/null +++ b/arch/x86/kernel/mrst.c | |||
@@ -0,0 +1,24 @@ | |||
1 | /* | ||
2 | * mrst.c: Intel Moorestown platform specific setup code | ||
3 | * | ||
4 | * (C) Copyright 2008 Intel Corporation | ||
5 | * Author: Jacob Pan (jacob.jun.pan@intel.com) | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or | ||
8 | * modify it under the terms of the GNU General Public License | ||
9 | * as published by the Free Software Foundation; version 2 | ||
10 | * of the License. | ||
11 | */ | ||
12 | #include <linux/init.h> | ||
13 | |||
14 | #include <asm/setup.h> | ||
15 | |||
16 | /* | ||
17 | * Moorestown specific x86_init function overrides and early setup | ||
18 | * calls. | ||
19 | */ | ||
20 | void __init x86_mrst_early_setup(void) | ||
21 | { | ||
22 | x86_init.resources.probe_roms = x86_init_noop; | ||
23 | x86_init.resources.reserve_resources = x86_init_noop; | ||
24 | } | ||
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c index 98fd6cd4e3a4..7dd950094178 100644 --- a/arch/x86/kernel/msr.c +++ b/arch/x86/kernel/msr.c | |||
@@ -1,6 +1,7 @@ | |||
1 | /* ----------------------------------------------------------------------- * | 1 | /* ----------------------------------------------------------------------- * |
2 | * | 2 | * |
3 | * Copyright 2000-2008 H. Peter Anvin - All Rights Reserved | 3 | * Copyright 2000-2008 H. Peter Anvin - All Rights Reserved |
4 | * Copyright 2009 Intel Corporation; author: H. Peter Anvin | ||
4 | * | 5 | * |
5 | * This program is free software; you can redistribute it and/or modify | 6 | * This program is free software; you can redistribute it and/or modify |
6 | * it under the terms of the GNU General Public License as published by | 7 | * it under the terms of the GNU General Public License as published by |
@@ -80,11 +81,8 @@ static ssize_t msr_read(struct file *file, char __user *buf, | |||
80 | 81 | ||
81 | for (; count; count -= 8) { | 82 | for (; count; count -= 8) { |
82 | err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]); | 83 | err = rdmsr_safe_on_cpu(cpu, reg, &data[0], &data[1]); |
83 | if (err) { | 84 | if (err) |
84 | if (err == -EFAULT) /* Fix idiotic error code */ | ||
85 | err = -EIO; | ||
86 | break; | 85 | break; |
87 | } | ||
88 | if (copy_to_user(tmp, &data, 8)) { | 86 | if (copy_to_user(tmp, &data, 8)) { |
89 | err = -EFAULT; | 87 | err = -EFAULT; |
90 | break; | 88 | break; |
@@ -115,11 +113,8 @@ static ssize_t msr_write(struct file *file, const char __user *buf, | |||
115 | break; | 113 | break; |
116 | } | 114 | } |
117 | err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]); | 115 | err = wrmsr_safe_on_cpu(cpu, reg, data[0], data[1]); |
118 | if (err) { | 116 | if (err) |
119 | if (err == -EFAULT) /* Fix idiotic error code */ | ||
120 | err = -EIO; | ||
121 | break; | 117 | break; |
122 | } | ||
123 | tmp += 2; | 118 | tmp += 2; |
124 | bytes += 8; | 119 | bytes += 8; |
125 | } | 120 | } |
@@ -127,6 +122,54 @@ static ssize_t msr_write(struct file *file, const char __user *buf, | |||
127 | return bytes ? bytes : err; | 122 | return bytes ? bytes : err; |
128 | } | 123 | } |
129 | 124 | ||
125 | static long msr_ioctl(struct file *file, unsigned int ioc, unsigned long arg) | ||
126 | { | ||
127 | u32 __user *uregs = (u32 __user *)arg; | ||
128 | u32 regs[8]; | ||
129 | int cpu = iminor(file->f_path.dentry->d_inode); | ||
130 | int err; | ||
131 | |||
132 | switch (ioc) { | ||
133 | case X86_IOC_RDMSR_REGS: | ||
134 | if (!(file->f_mode & FMODE_READ)) { | ||
135 | err = -EBADF; | ||
136 | break; | ||
137 | } | ||
138 | if (copy_from_user(®s, uregs, sizeof regs)) { | ||
139 | err = -EFAULT; | ||
140 | break; | ||
141 | } | ||
142 | err = rdmsr_safe_regs_on_cpu(cpu, regs); | ||
143 | if (err) | ||
144 | break; | ||
145 | if (copy_to_user(uregs, ®s, sizeof regs)) | ||
146 | err = -EFAULT; | ||
147 | break; | ||
148 | |||
149 | case X86_IOC_WRMSR_REGS: | ||
150 | if (!(file->f_mode & FMODE_WRITE)) { | ||
151 | err = -EBADF; | ||
152 | break; | ||
153 | } | ||
154 | if (copy_from_user(®s, uregs, sizeof regs)) { | ||
155 | err = -EFAULT; | ||
156 | break; | ||
157 | } | ||
158 | err = wrmsr_safe_regs_on_cpu(cpu, regs); | ||
159 | if (err) | ||
160 | break; | ||
161 | if (copy_to_user(uregs, ®s, sizeof regs)) | ||
162 | err = -EFAULT; | ||
163 | break; | ||
164 | |||
165 | default: | ||
166 | err = -ENOTTY; | ||
167 | break; | ||
168 | } | ||
169 | |||
170 | return err; | ||
171 | } | ||
172 | |||
130 | static int msr_open(struct inode *inode, struct file *file) | 173 | static int msr_open(struct inode *inode, struct file *file) |
131 | { | 174 | { |
132 | unsigned int cpu = iminor(file->f_path.dentry->d_inode); | 175 | unsigned int cpu = iminor(file->f_path.dentry->d_inode); |
@@ -157,6 +200,8 @@ static const struct file_operations msr_fops = { | |||
157 | .read = msr_read, | 200 | .read = msr_read, |
158 | .write = msr_write, | 201 | .write = msr_write, |
159 | .open = msr_open, | 202 | .open = msr_open, |
203 | .unlocked_ioctl = msr_ioctl, | ||
204 | .compat_ioctl = msr_ioctl, | ||
160 | }; | 205 | }; |
161 | 206 | ||
162 | static int __cpuinit msr_device_create(int cpu) | 207 | static int __cpuinit msr_device_create(int cpu) |
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index 70ec9b951d76..1b1739d16310 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c | |||
@@ -54,17 +54,12 @@ u64 _paravirt_ident_64(u64 x) | |||
54 | return x; | 54 | return x; |
55 | } | 55 | } |
56 | 56 | ||
57 | static void __init default_banner(void) | 57 | void __init default_banner(void) |
58 | { | 58 | { |
59 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", | 59 | printk(KERN_INFO "Booting paravirtualized kernel on %s\n", |
60 | pv_info.name); | 60 | pv_info.name); |
61 | } | 61 | } |
62 | 62 | ||
63 | char *memory_setup(void) | ||
64 | { | ||
65 | return pv_init_ops.memory_setup(); | ||
66 | } | ||
67 | |||
68 | /* Simple instruction patching code. */ | 63 | /* Simple instruction patching code. */ |
69 | #define DEF_NATIVE(ops, name, code) \ | 64 | #define DEF_NATIVE(ops, name, code) \ |
70 | extern const char start_##ops##_##name[], end_##ops##_##name[]; \ | 65 | extern const char start_##ops##_##name[], end_##ops##_##name[]; \ |
@@ -188,11 +183,6 @@ unsigned paravirt_patch_insns(void *insnbuf, unsigned len, | |||
188 | return insn_len; | 183 | return insn_len; |
189 | } | 184 | } |
190 | 185 | ||
191 | void init_IRQ(void) | ||
192 | { | ||
193 | pv_irq_ops.init_IRQ(); | ||
194 | } | ||
195 | |||
196 | static void native_flush_tlb(void) | 186 | static void native_flush_tlb(void) |
197 | { | 187 | { |
198 | __native_flush_tlb(); | 188 | __native_flush_tlb(); |
@@ -218,13 +208,6 @@ extern void native_irq_enable_sysexit(void); | |||
218 | extern void native_usergs_sysret32(void); | 208 | extern void native_usergs_sysret32(void); |
219 | extern void native_usergs_sysret64(void); | 209 | extern void native_usergs_sysret64(void); |
220 | 210 | ||
221 | static int __init print_banner(void) | ||
222 | { | ||
223 | pv_init_ops.banner(); | ||
224 | return 0; | ||
225 | } | ||
226 | core_initcall(print_banner); | ||
227 | |||
228 | static struct resource reserve_ioports = { | 211 | static struct resource reserve_ioports = { |
229 | .start = 0, | 212 | .start = 0, |
230 | .end = IO_SPACE_LIMIT, | 213 | .end = IO_SPACE_LIMIT, |
@@ -320,21 +303,13 @@ struct pv_info pv_info = { | |||
320 | 303 | ||
321 | struct pv_init_ops pv_init_ops = { | 304 | struct pv_init_ops pv_init_ops = { |
322 | .patch = native_patch, | 305 | .patch = native_patch, |
323 | .banner = default_banner, | ||
324 | .arch_setup = paravirt_nop, | ||
325 | .memory_setup = machine_specific_memory_setup, | ||
326 | }; | 306 | }; |
327 | 307 | ||
328 | struct pv_time_ops pv_time_ops = { | 308 | struct pv_time_ops pv_time_ops = { |
329 | .time_init = hpet_time_init, | ||
330 | .get_wallclock = native_get_wallclock, | ||
331 | .set_wallclock = native_set_wallclock, | ||
332 | .sched_clock = native_sched_clock, | 309 | .sched_clock = native_sched_clock, |
333 | .get_tsc_khz = native_calibrate_tsc, | ||
334 | }; | 310 | }; |
335 | 311 | ||
336 | struct pv_irq_ops pv_irq_ops = { | 312 | struct pv_irq_ops pv_irq_ops = { |
337 | .init_IRQ = native_init_IRQ, | ||
338 | .save_fl = __PV_IS_CALLEE_SAVE(native_save_fl), | 313 | .save_fl = __PV_IS_CALLEE_SAVE(native_save_fl), |
339 | .restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl), | 314 | .restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl), |
340 | .irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable), | 315 | .irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable), |
@@ -362,8 +337,9 @@ struct pv_cpu_ops pv_cpu_ops = { | |||
362 | #endif | 337 | #endif |
363 | .wbinvd = native_wbinvd, | 338 | .wbinvd = native_wbinvd, |
364 | .read_msr = native_read_msr_safe, | 339 | .read_msr = native_read_msr_safe, |
365 | .read_msr_amd = native_read_msr_amd_safe, | 340 | .rdmsr_regs = native_rdmsr_safe_regs, |
366 | .write_msr = native_write_msr_safe, | 341 | .write_msr = native_write_msr_safe, |
342 | .wrmsr_regs = native_wrmsr_safe_regs, | ||
367 | .read_tsc = native_read_tsc, | 343 | .read_tsc = native_read_tsc, |
368 | .read_pmc = native_read_pmc, | 344 | .read_pmc = native_read_pmc, |
369 | .read_tscp = native_read_tscp, | 345 | .read_tscp = native_read_tscp, |
@@ -408,8 +384,6 @@ struct pv_cpu_ops pv_cpu_ops = { | |||
408 | 384 | ||
409 | struct pv_apic_ops pv_apic_ops = { | 385 | struct pv_apic_ops pv_apic_ops = { |
410 | #ifdef CONFIG_X86_LOCAL_APIC | 386 | #ifdef CONFIG_X86_LOCAL_APIC |
411 | .setup_boot_clock = setup_boot_APIC_clock, | ||
412 | .setup_secondary_clock = setup_secondary_APIC_clock, | ||
413 | .startup_ipi_hook = paravirt_nop, | 387 | .startup_ipi_hook = paravirt_nop, |
414 | #endif | 388 | #endif |
415 | }; | 389 | }; |
@@ -423,13 +397,6 @@ struct pv_apic_ops pv_apic_ops = { | |||
423 | #endif | 397 | #endif |
424 | 398 | ||
425 | struct pv_mmu_ops pv_mmu_ops = { | 399 | struct pv_mmu_ops pv_mmu_ops = { |
426 | #ifndef CONFIG_X86_64 | ||
427 | .pagetable_setup_start = native_pagetable_setup_start, | ||
428 | .pagetable_setup_done = native_pagetable_setup_done, | ||
429 | #else | ||
430 | .pagetable_setup_start = paravirt_nop, | ||
431 | .pagetable_setup_done = paravirt_nop, | ||
432 | #endif | ||
433 | 400 | ||
434 | .read_cr2 = native_read_cr2, | 401 | .read_cr2 = native_read_cr2, |
435 | .write_cr2 = native_write_cr2, | 402 | .write_cr2 = native_write_cr2, |
diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 1a041bcf506b..64b838eac18c 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c | |||
@@ -3,6 +3,7 @@ | |||
3 | #include <linux/dmar.h> | 3 | #include <linux/dmar.h> |
4 | #include <linux/bootmem.h> | 4 | #include <linux/bootmem.h> |
5 | #include <linux/pci.h> | 5 | #include <linux/pci.h> |
6 | #include <linux/kmemleak.h> | ||
6 | 7 | ||
7 | #include <asm/proto.h> | 8 | #include <asm/proto.h> |
8 | #include <asm/dma.h> | 9 | #include <asm/dma.h> |
@@ -32,7 +33,14 @@ int no_iommu __read_mostly; | |||
32 | /* Set this to 1 if there is a HW IOMMU in the system */ | 33 | /* Set this to 1 if there is a HW IOMMU in the system */ |
33 | int iommu_detected __read_mostly = 0; | 34 | int iommu_detected __read_mostly = 0; |
34 | 35 | ||
35 | int iommu_pass_through; | 36 | /* |
37 | * This variable becomes 1 if iommu=pt is passed on the kernel command line. | ||
38 | * If this variable is 1, IOMMU implementations do no DMA ranslation for | ||
39 | * devices and allow every device to access to whole physical memory. This is | ||
40 | * useful if a user want to use an IOMMU only for KVM device assignment to | ||
41 | * guests and not for driver dma translation. | ||
42 | */ | ||
43 | int iommu_pass_through __read_mostly; | ||
36 | 44 | ||
37 | dma_addr_t bad_dma_address __read_mostly = 0; | 45 | dma_addr_t bad_dma_address __read_mostly = 0; |
38 | EXPORT_SYMBOL(bad_dma_address); | 46 | EXPORT_SYMBOL(bad_dma_address); |
@@ -88,6 +96,11 @@ void __init dma32_reserve_bootmem(void) | |||
88 | size = roundup(dma32_bootmem_size, align); | 96 | size = roundup(dma32_bootmem_size, align); |
89 | dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, | 97 | dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, |
90 | 512ULL<<20); | 98 | 512ULL<<20); |
99 | /* | ||
100 | * Kmemleak should not scan this block as it may not be mapped via the | ||
101 | * kernel direct mapping. | ||
102 | */ | ||
103 | kmemleak_ignore(dma32_bootmem_ptr); | ||
91 | if (dma32_bootmem_ptr) | 104 | if (dma32_bootmem_ptr) |
92 | dma32_bootmem_size = size; | 105 | dma32_bootmem_size = size; |
93 | else | 106 | else |
@@ -147,7 +160,7 @@ again: | |||
147 | return NULL; | 160 | return NULL; |
148 | 161 | ||
149 | addr = page_to_phys(page); | 162 | addr = page_to_phys(page); |
150 | if (!is_buffer_dma_capable(dma_mask, addr, size)) { | 163 | if (addr + size > dma_mask) { |
151 | __free_pages(page, get_order(size)); | 164 | __free_pages(page, get_order(size)); |
152 | 165 | ||
153 | if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) { | 166 | if (dma_mask < DMA_BIT_MASK(32) && !(flag & GFP_DMA)) { |
@@ -212,10 +225,8 @@ static __init int iommu_setup(char *p) | |||
212 | if (!strncmp(p, "soft", 4)) | 225 | if (!strncmp(p, "soft", 4)) |
213 | swiotlb = 1; | 226 | swiotlb = 1; |
214 | #endif | 227 | #endif |
215 | if (!strncmp(p, "pt", 2)) { | 228 | if (!strncmp(p, "pt", 2)) |
216 | iommu_pass_through = 1; | 229 | iommu_pass_through = 1; |
217 | return 1; | ||
218 | } | ||
219 | 230 | ||
220 | gart_parse_options(p); | 231 | gart_parse_options(p); |
221 | 232 | ||
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index d2e56b8f48e7..98a827ee9ed7 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c | |||
@@ -190,14 +190,13 @@ static void iommu_full(struct device *dev, size_t size, int dir) | |||
190 | static inline int | 190 | static inline int |
191 | need_iommu(struct device *dev, unsigned long addr, size_t size) | 191 | need_iommu(struct device *dev, unsigned long addr, size_t size) |
192 | { | 192 | { |
193 | return force_iommu || | 193 | return force_iommu || !dma_capable(dev, addr, size); |
194 | !is_buffer_dma_capable(*dev->dma_mask, addr, size); | ||
195 | } | 194 | } |
196 | 195 | ||
197 | static inline int | 196 | static inline int |
198 | nonforced_iommu(struct device *dev, unsigned long addr, size_t size) | 197 | nonforced_iommu(struct device *dev, unsigned long addr, size_t size) |
199 | { | 198 | { |
200 | return !is_buffer_dma_capable(*dev->dma_mask, addr, size); | 199 | return !dma_capable(dev, addr, size); |
201 | } | 200 | } |
202 | 201 | ||
203 | /* Map a single continuous physical area into the IOMMU. | 202 | /* Map a single continuous physical area into the IOMMU. |
diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 71d412a09f30..a3933d4330cd 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c | |||
@@ -14,7 +14,7 @@ | |||
14 | static int | 14 | static int |
15 | check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) | 15 | check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) |
16 | { | 16 | { |
17 | if (hwdev && !is_buffer_dma_capable(*hwdev->dma_mask, bus, size)) { | 17 | if (hwdev && !dma_capable(hwdev, bus, size)) { |
18 | if (*hwdev->dma_mask >= DMA_BIT_MASK(32)) | 18 | if (*hwdev->dma_mask >= DMA_BIT_MASK(32)) |
19 | printk(KERN_ERR | 19 | printk(KERN_ERR |
20 | "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", | 20 | "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", |
@@ -79,12 +79,29 @@ static void nommu_free_coherent(struct device *dev, size_t size, void *vaddr, | |||
79 | free_pages((unsigned long)vaddr, get_order(size)); | 79 | free_pages((unsigned long)vaddr, get_order(size)); |
80 | } | 80 | } |
81 | 81 | ||
82 | static void nommu_sync_single_for_device(struct device *dev, | ||
83 | dma_addr_t addr, size_t size, | ||
84 | enum dma_data_direction dir) | ||
85 | { | ||
86 | flush_write_buffers(); | ||
87 | } | ||
88 | |||
89 | |||
90 | static void nommu_sync_sg_for_device(struct device *dev, | ||
91 | struct scatterlist *sg, int nelems, | ||
92 | enum dma_data_direction dir) | ||
93 | { | ||
94 | flush_write_buffers(); | ||
95 | } | ||
96 | |||
82 | struct dma_map_ops nommu_dma_ops = { | 97 | struct dma_map_ops nommu_dma_ops = { |
83 | .alloc_coherent = dma_generic_alloc_coherent, | 98 | .alloc_coherent = dma_generic_alloc_coherent, |
84 | .free_coherent = nommu_free_coherent, | 99 | .free_coherent = nommu_free_coherent, |
85 | .map_sg = nommu_map_sg, | 100 | .map_sg = nommu_map_sg, |
86 | .map_page = nommu_map_page, | 101 | .map_page = nommu_map_page, |
87 | .is_phys = 1, | 102 | .sync_single_for_device = nommu_sync_single_for_device, |
103 | .sync_sg_for_device = nommu_sync_sg_for_device, | ||
104 | .is_phys = 1, | ||
88 | }; | 105 | }; |
89 | 106 | ||
90 | void __init no_iommu_init(void) | 107 | void __init no_iommu_init(void) |
diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 6af96ee44200..e8a35016115f 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c | |||
@@ -13,31 +13,6 @@ | |||
13 | 13 | ||
14 | int swiotlb __read_mostly; | 14 | int swiotlb __read_mostly; |
15 | 15 | ||
16 | void * __init swiotlb_alloc_boot(size_t size, unsigned long nslabs) | ||
17 | { | ||
18 | return alloc_bootmem_low_pages(size); | ||
19 | } | ||
20 | |||
21 | void *swiotlb_alloc(unsigned order, unsigned long nslabs) | ||
22 | { | ||
23 | return (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, order); | ||
24 | } | ||
25 | |||
26 | dma_addr_t swiotlb_phys_to_bus(struct device *hwdev, phys_addr_t paddr) | ||
27 | { | ||
28 | return paddr; | ||
29 | } | ||
30 | |||
31 | phys_addr_t swiotlb_bus_to_phys(struct device *hwdev, dma_addr_t baddr) | ||
32 | { | ||
33 | return baddr; | ||
34 | } | ||
35 | |||
36 | int __weak swiotlb_arch_range_needs_mapping(phys_addr_t paddr, size_t size) | ||
37 | { | ||
38 | return 0; | ||
39 | } | ||
40 | |||
41 | static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, | 16 | static void *x86_swiotlb_alloc_coherent(struct device *hwdev, size_t size, |
42 | dma_addr_t *dma_handle, gfp_t flags) | 17 | dma_addr_t *dma_handle, gfp_t flags) |
43 | { | 18 | { |
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 59f4524984af..4cf79567cdab 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c | |||
@@ -61,9 +61,6 @@ | |||
61 | 61 | ||
62 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 62 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
63 | 63 | ||
64 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; | ||
65 | EXPORT_PER_CPU_SYMBOL(current_task); | ||
66 | |||
67 | /* | 64 | /* |
68 | * Return saved PC of a blocked thread. | 65 | * Return saved PC of a blocked thread. |
69 | */ | 66 | */ |
@@ -350,14 +347,21 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
350 | *next = &next_p->thread; | 347 | *next = &next_p->thread; |
351 | int cpu = smp_processor_id(); | 348 | int cpu = smp_processor_id(); |
352 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | 349 | struct tss_struct *tss = &per_cpu(init_tss, cpu); |
350 | bool preload_fpu; | ||
353 | 351 | ||
354 | /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ | 352 | /* never put a printk in __switch_to... printk() calls wake_up*() indirectly */ |
355 | 353 | ||
356 | __unlazy_fpu(prev_p); | 354 | /* |
355 | * If the task has used fpu the last 5 timeslices, just do a full | ||
356 | * restore of the math state immediately to avoid the trap; the | ||
357 | * chances of needing FPU soon are obviously high now | ||
358 | */ | ||
359 | preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5; | ||
357 | 360 | ||
361 | __unlazy_fpu(prev_p); | ||
358 | 362 | ||
359 | /* we're going to use this soon, after a few expensive things */ | 363 | /* we're going to use this soon, after a few expensive things */ |
360 | if (next_p->fpu_counter > 5) | 364 | if (preload_fpu) |
361 | prefetch(next->xstate); | 365 | prefetch(next->xstate); |
362 | 366 | ||
363 | /* | 367 | /* |
@@ -398,6 +402,11 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
398 | task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) | 402 | task_thread_info(next_p)->flags & _TIF_WORK_CTXSW_NEXT)) |
399 | __switch_to_xtra(prev_p, next_p, tss); | 403 | __switch_to_xtra(prev_p, next_p, tss); |
400 | 404 | ||
405 | /* If we're going to preload the fpu context, make sure clts | ||
406 | is run while we're batching the cpu state updates. */ | ||
407 | if (preload_fpu) | ||
408 | clts(); | ||
409 | |||
401 | /* | 410 | /* |
402 | * Leave lazy mode, flushing any hypercalls made here. | 411 | * Leave lazy mode, flushing any hypercalls made here. |
403 | * This must be done before restoring TLS segments so | 412 | * This must be done before restoring TLS segments so |
@@ -407,15 +416,8 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
407 | */ | 416 | */ |
408 | arch_end_context_switch(next_p); | 417 | arch_end_context_switch(next_p); |
409 | 418 | ||
410 | /* If the task has used fpu the last 5 timeslices, just do a full | 419 | if (preload_fpu) |
411 | * restore of the math state immediately to avoid the trap; the | 420 | __math_state_restore(); |
412 | * chances of needing FPU soon are obviously high now | ||
413 | * | ||
414 | * tsk_used_math() checks prevent calling math_state_restore(), | ||
415 | * which can sleep in the case of !tsk_used_math() | ||
416 | */ | ||
417 | if (tsk_used_math(next_p) && next_p->fpu_counter > 5) | ||
418 | math_state_restore(); | ||
419 | 421 | ||
420 | /* | 422 | /* |
421 | * Restore %gs if needed (which is common) | 423 | * Restore %gs if needed (which is common) |
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ebefb5407b9d..ad535b683170 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c | |||
@@ -55,9 +55,6 @@ | |||
55 | 55 | ||
56 | asmlinkage extern void ret_from_fork(void); | 56 | asmlinkage extern void ret_from_fork(void); |
57 | 57 | ||
58 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; | ||
59 | EXPORT_PER_CPU_SYMBOL(current_task); | ||
60 | |||
61 | DEFINE_PER_CPU(unsigned long, old_rsp); | 58 | DEFINE_PER_CPU(unsigned long, old_rsp); |
62 | static DEFINE_PER_CPU(unsigned char, is_idle); | 59 | static DEFINE_PER_CPU(unsigned char, is_idle); |
63 | 60 | ||
@@ -386,9 +383,17 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
386 | int cpu = smp_processor_id(); | 383 | int cpu = smp_processor_id(); |
387 | struct tss_struct *tss = &per_cpu(init_tss, cpu); | 384 | struct tss_struct *tss = &per_cpu(init_tss, cpu); |
388 | unsigned fsindex, gsindex; | 385 | unsigned fsindex, gsindex; |
386 | bool preload_fpu; | ||
387 | |||
388 | /* | ||
389 | * If the task has used fpu the last 5 timeslices, just do a full | ||
390 | * restore of the math state immediately to avoid the trap; the | ||
391 | * chances of needing FPU soon are obviously high now | ||
392 | */ | ||
393 | preload_fpu = tsk_used_math(next_p) && next_p->fpu_counter > 5; | ||
389 | 394 | ||
390 | /* we're going to use this soon, after a few expensive things */ | 395 | /* we're going to use this soon, after a few expensive things */ |
391 | if (next_p->fpu_counter > 5) | 396 | if (preload_fpu) |
392 | prefetch(next->xstate); | 397 | prefetch(next->xstate); |
393 | 398 | ||
394 | /* | 399 | /* |
@@ -419,6 +424,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
419 | 424 | ||
420 | load_TLS(next, cpu); | 425 | load_TLS(next, cpu); |
421 | 426 | ||
427 | /* Must be after DS reload */ | ||
428 | unlazy_fpu(prev_p); | ||
429 | |||
430 | /* Make sure cpu is ready for new context */ | ||
431 | if (preload_fpu) | ||
432 | clts(); | ||
433 | |||
422 | /* | 434 | /* |
423 | * Leave lazy mode, flushing any hypercalls made here. | 435 | * Leave lazy mode, flushing any hypercalls made here. |
424 | * This must be done before restoring TLS segments so | 436 | * This must be done before restoring TLS segments so |
@@ -459,9 +471,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
459 | wrmsrl(MSR_KERNEL_GS_BASE, next->gs); | 471 | wrmsrl(MSR_KERNEL_GS_BASE, next->gs); |
460 | prev->gsindex = gsindex; | 472 | prev->gsindex = gsindex; |
461 | 473 | ||
462 | /* Must be after DS reload */ | ||
463 | unlazy_fpu(prev_p); | ||
464 | |||
465 | /* | 474 | /* |
466 | * Switch the PDA and FPU contexts. | 475 | * Switch the PDA and FPU contexts. |
467 | */ | 476 | */ |
@@ -480,15 +489,12 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) | |||
480 | task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) | 489 | task_thread_info(prev_p)->flags & _TIF_WORK_CTXSW_PREV)) |
481 | __switch_to_xtra(prev_p, next_p, tss); | 490 | __switch_to_xtra(prev_p, next_p, tss); |
482 | 491 | ||
483 | /* If the task has used fpu the last 5 timeslices, just do a full | 492 | /* |
484 | * restore of the math state immediately to avoid the trap; the | 493 | * Preload the FPU context, now that we've determined that the |
485 | * chances of needing FPU soon are obviously high now | 494 | * task is likely to be using it. |
486 | * | ||
487 | * tsk_used_math() checks prevent calling math_state_restore(), | ||
488 | * which can sleep in the case of !tsk_used_math() | ||
489 | */ | 495 | */ |
490 | if (tsk_used_math(next_p) && next_p->fpu_counter > 5) | 496 | if (preload_fpu) |
491 | math_state_restore(); | 497 | __math_state_restore(); |
492 | return prev_p; | 498 | return prev_p; |
493 | } | 499 | } |
494 | 500 | ||
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 09ecbde91c13..8d7d5c9c1be3 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c | |||
@@ -35,10 +35,11 @@ | |||
35 | #include <asm/proto.h> | 35 | #include <asm/proto.h> |
36 | #include <asm/ds.h> | 36 | #include <asm/ds.h> |
37 | 37 | ||
38 | #include <trace/syscall.h> | ||
39 | |||
40 | #include "tls.h" | 38 | #include "tls.h" |
41 | 39 | ||
40 | #define CREATE_TRACE_POINTS | ||
41 | #include <trace/events/syscalls.h> | ||
42 | |||
42 | enum x86_regset { | 43 | enum x86_regset { |
43 | REGSET_GENERAL, | 44 | REGSET_GENERAL, |
44 | REGSET_FP, | 45 | REGSET_FP, |
@@ -1497,8 +1498,8 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs) | |||
1497 | tracehook_report_syscall_entry(regs)) | 1498 | tracehook_report_syscall_entry(regs)) |
1498 | ret = -1L; | 1499 | ret = -1L; |
1499 | 1500 | ||
1500 | if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE))) | 1501 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) |
1501 | ftrace_syscall_enter(regs); | 1502 | trace_sys_enter(regs, regs->orig_ax); |
1502 | 1503 | ||
1503 | if (unlikely(current->audit_context)) { | 1504 | if (unlikely(current->audit_context)) { |
1504 | if (IS_IA32) | 1505 | if (IS_IA32) |
@@ -1523,8 +1524,8 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs) | |||
1523 | if (unlikely(current->audit_context)) | 1524 | if (unlikely(current->audit_context)) |
1524 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); | 1525 | audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax); |
1525 | 1526 | ||
1526 | if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE))) | 1527 | if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT))) |
1527 | ftrace_syscall_exit(regs); | 1528 | trace_sys_exit(regs, regs->ax); |
1528 | 1529 | ||
1529 | if (test_thread_flag(TIF_SYSCALL_TRACE)) | 1530 | if (test_thread_flag(TIF_SYSCALL_TRACE)) |
1530 | tracehook_report_syscall_exit(regs, 0); | 1531 | tracehook_report_syscall_exit(regs, 0); |
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index af71d06624bf..6c3b2c6fd772 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c | |||
@@ -508,7 +508,7 @@ static void __init quirk_amd_nb_node(struct pci_dev *dev) | |||
508 | 508 | ||
509 | pci_read_config_dword(nb_ht, 0x60, &val); | 509 | pci_read_config_dword(nb_ht, 0x60, &val); |
510 | set_dev_node(&dev->dev, val & 7); | 510 | set_dev_node(&dev->dev, val & 7); |
511 | pci_dev_put(dev); | 511 | pci_dev_put(nb_ht); |
512 | } | 512 | } |
513 | 513 | ||
514 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, | 514 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_K8_NB, |
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index a06e8d101844..27349f92a6d7 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/pm.h> | 4 | #include <linux/pm.h> |
5 | #include <linux/efi.h> | 5 | #include <linux/efi.h> |
6 | #include <linux/dmi.h> | 6 | #include <linux/dmi.h> |
7 | #include <linux/tboot.h> | ||
7 | #include <acpi/reboot.h> | 8 | #include <acpi/reboot.h> |
8 | #include <asm/io.h> | 9 | #include <asm/io.h> |
9 | #include <asm/apic.h> | 10 | #include <asm/apic.h> |
@@ -508,6 +509,8 @@ static void native_machine_emergency_restart(void) | |||
508 | if (reboot_emergency) | 509 | if (reboot_emergency) |
509 | emergency_vmx_disable_all(); | 510 | emergency_vmx_disable_all(); |
510 | 511 | ||
512 | tboot_shutdown(TB_SHUTDOWN_REBOOT); | ||
513 | |||
511 | /* Tell the BIOS if we want cold or warm reboot */ | 514 | /* Tell the BIOS if we want cold or warm reboot */ |
512 | *((unsigned short *)__va(0x472)) = reboot_mode; | 515 | *((unsigned short *)__va(0x472)) = reboot_mode; |
513 | 516 | ||
@@ -634,6 +637,8 @@ static void native_machine_halt(void) | |||
634 | /* stop other cpus and apics */ | 637 | /* stop other cpus and apics */ |
635 | machine_shutdown(); | 638 | machine_shutdown(); |
636 | 639 | ||
640 | tboot_shutdown(TB_SHUTDOWN_HALT); | ||
641 | |||
637 | /* stop this cpu */ | 642 | /* stop this cpu */ |
638 | stop_this_cpu(NULL); | 643 | stop_this_cpu(NULL); |
639 | } | 644 | } |
@@ -645,6 +650,8 @@ static void native_machine_power_off(void) | |||
645 | machine_shutdown(); | 650 | machine_shutdown(); |
646 | pm_power_off(); | 651 | pm_power_off(); |
647 | } | 652 | } |
653 | /* a fallback in case there is no PM info available */ | ||
654 | tboot_shutdown(TB_SHUTDOWN_HALT); | ||
648 | } | 655 | } |
649 | 656 | ||
650 | struct machine_ops machine_ops = { | 657 | struct machine_ops machine_ops = { |
diff --git a/arch/x86/kernel/rtc.c b/arch/x86/kernel/rtc.c index 5d465b207e72..1cfbbfc3ae26 100644 --- a/arch/x86/kernel/rtc.c +++ b/arch/x86/kernel/rtc.c | |||
@@ -8,6 +8,7 @@ | |||
8 | #include <linux/pnp.h> | 8 | #include <linux/pnp.h> |
9 | 9 | ||
10 | #include <asm/vsyscall.h> | 10 | #include <asm/vsyscall.h> |
11 | #include <asm/x86_init.h> | ||
11 | #include <asm/time.h> | 12 | #include <asm/time.h> |
12 | 13 | ||
13 | #ifdef CONFIG_X86_32 | 14 | #ifdef CONFIG_X86_32 |
@@ -165,33 +166,29 @@ void rtc_cmos_write(unsigned char val, unsigned char addr) | |||
165 | } | 166 | } |
166 | EXPORT_SYMBOL(rtc_cmos_write); | 167 | EXPORT_SYMBOL(rtc_cmos_write); |
167 | 168 | ||
168 | static int set_rtc_mmss(unsigned long nowtime) | 169 | int update_persistent_clock(struct timespec now) |
169 | { | 170 | { |
170 | unsigned long flags; | 171 | unsigned long flags; |
171 | int retval; | 172 | int retval; |
172 | 173 | ||
173 | spin_lock_irqsave(&rtc_lock, flags); | 174 | spin_lock_irqsave(&rtc_lock, flags); |
174 | retval = set_wallclock(nowtime); | 175 | retval = x86_platform.set_wallclock(now.tv_sec); |
175 | spin_unlock_irqrestore(&rtc_lock, flags); | 176 | spin_unlock_irqrestore(&rtc_lock, flags); |
176 | 177 | ||
177 | return retval; | 178 | return retval; |
178 | } | 179 | } |
179 | 180 | ||
180 | /* not static: needed by APM */ | 181 | /* not static: needed by APM */ |
181 | unsigned long read_persistent_clock(void) | 182 | void read_persistent_clock(struct timespec *ts) |
182 | { | 183 | { |
183 | unsigned long retval, flags; | 184 | unsigned long retval, flags; |
184 | 185 | ||
185 | spin_lock_irqsave(&rtc_lock, flags); | 186 | spin_lock_irqsave(&rtc_lock, flags); |
186 | retval = get_wallclock(); | 187 | retval = x86_platform.get_wallclock(); |
187 | spin_unlock_irqrestore(&rtc_lock, flags); | 188 | spin_unlock_irqrestore(&rtc_lock, flags); |
188 | 189 | ||
189 | return retval; | 190 | ts->tv_sec = retval; |
190 | } | 191 | ts->tv_nsec = 0; |
191 | |||
192 | int update_persistent_clock(struct timespec now) | ||
193 | { | ||
194 | return set_rtc_mmss(now.tv_sec); | ||
195 | } | 192 | } |
196 | 193 | ||
197 | unsigned long long native_read_tsc(void) | 194 | unsigned long long native_read_tsc(void) |
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index d784ea207606..6c7886890fa6 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c | |||
@@ -67,6 +67,7 @@ | |||
67 | 67 | ||
68 | #include <linux/percpu.h> | 68 | #include <linux/percpu.h> |
69 | #include <linux/crash_dump.h> | 69 | #include <linux/crash_dump.h> |
70 | #include <linux/tboot.h> | ||
70 | 71 | ||
71 | #include <video/edid.h> | 72 | #include <video/edid.h> |
72 | 73 | ||
@@ -109,10 +110,6 @@ | |||
109 | #include <asm/numa_64.h> | 110 | #include <asm/numa_64.h> |
110 | #endif | 111 | #endif |
111 | 112 | ||
112 | #ifndef ARCH_SETUP | ||
113 | #define ARCH_SETUP | ||
114 | #endif | ||
115 | |||
116 | /* | 113 | /* |
117 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. | 114 | * end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries. |
118 | * The direct mapping extends to max_pfn_mapped, so that we can directly access | 115 | * The direct mapping extends to max_pfn_mapped, so that we can directly access |
@@ -134,9 +131,9 @@ int default_cpu_present_to_apicid(int mps_cpu) | |||
134 | return __default_cpu_present_to_apicid(mps_cpu); | 131 | return __default_cpu_present_to_apicid(mps_cpu); |
135 | } | 132 | } |
136 | 133 | ||
137 | int default_check_phys_apicid_present(int boot_cpu_physical_apicid) | 134 | int default_check_phys_apicid_present(int phys_apicid) |
138 | { | 135 | { |
139 | return __default_check_phys_apicid_present(boot_cpu_physical_apicid); | 136 | return __default_check_phys_apicid_present(phys_apicid); |
140 | } | 137 | } |
141 | #endif | 138 | #endif |
142 | 139 | ||
@@ -172,13 +169,6 @@ static struct resource bss_resource = { | |||
172 | 169 | ||
173 | 170 | ||
174 | #ifdef CONFIG_X86_32 | 171 | #ifdef CONFIG_X86_32 |
175 | static struct resource video_ram_resource = { | ||
176 | .name = "Video RAM area", | ||
177 | .start = 0xa0000, | ||
178 | .end = 0xbffff, | ||
179 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM | ||
180 | }; | ||
181 | |||
182 | /* cpu data as detected by the assembly code in head.S */ | 172 | /* cpu data as detected by the assembly code in head.S */ |
183 | struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1}; | 173 | struct cpuinfo_x86 new_cpu_data __cpuinitdata = {0, 0, 0, 0, -1, 1, 0, 0, -1}; |
184 | /* common cpu data for all cpus */ | 174 | /* common cpu data for all cpus */ |
@@ -606,7 +596,7 @@ static struct resource standard_io_resources[] = { | |||
606 | .flags = IORESOURCE_BUSY | IORESOURCE_IO } | 596 | .flags = IORESOURCE_BUSY | IORESOURCE_IO } |
607 | }; | 597 | }; |
608 | 598 | ||
609 | static void __init reserve_standard_io_resources(void) | 599 | void __init reserve_standard_io_resources(void) |
610 | { | 600 | { |
611 | int i; | 601 | int i; |
612 | 602 | ||
@@ -638,10 +628,6 @@ static int __init setup_elfcorehdr(char *arg) | |||
638 | early_param("elfcorehdr", setup_elfcorehdr); | 628 | early_param("elfcorehdr", setup_elfcorehdr); |
639 | #endif | 629 | #endif |
640 | 630 | ||
641 | static struct x86_quirks default_x86_quirks __initdata; | ||
642 | |||
643 | struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; | ||
644 | |||
645 | #ifdef CONFIG_X86_RESERVE_LOW_64K | 631 | #ifdef CONFIG_X86_RESERVE_LOW_64K |
646 | static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) | 632 | static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) |
647 | { | 633 | { |
@@ -712,6 +698,21 @@ void __init setup_arch(char **cmdline_p) | |||
712 | printk(KERN_INFO "Command line: %s\n", boot_command_line); | 698 | printk(KERN_INFO "Command line: %s\n", boot_command_line); |
713 | #endif | 699 | #endif |
714 | 700 | ||
701 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); | ||
702 | *cmdline_p = command_line; | ||
703 | |||
704 | #ifdef CONFIG_X86_64 | ||
705 | /* | ||
706 | * Must call this twice: Once just to detect whether hardware doesn't | ||
707 | * support NX (so that the early EHCI debug console setup can safely | ||
708 | * call set_fixmap(), and then again after parsing early parameters to | ||
709 | * honor the respective command line option. | ||
710 | */ | ||
711 | check_efer(); | ||
712 | #endif | ||
713 | |||
714 | parse_early_param(); | ||
715 | |||
715 | /* VMI may relocate the fixmap; do this before touching ioremap area */ | 716 | /* VMI may relocate the fixmap; do this before touching ioremap area */ |
716 | vmi_init(); | 717 | vmi_init(); |
717 | 718 | ||
@@ -758,7 +759,7 @@ void __init setup_arch(char **cmdline_p) | |||
758 | } | 759 | } |
759 | #endif | 760 | #endif |
760 | 761 | ||
761 | ARCH_SETUP | 762 | x86_init.oem.arch_setup(); |
762 | 763 | ||
763 | setup_memory_map(); | 764 | setup_memory_map(); |
764 | parse_setup_data(); | 765 | parse_setup_data(); |
@@ -794,11 +795,6 @@ void __init setup_arch(char **cmdline_p) | |||
794 | #endif | 795 | #endif |
795 | #endif | 796 | #endif |
796 | 797 | ||
797 | strlcpy(command_line, boot_command_line, COMMAND_LINE_SIZE); | ||
798 | *cmdline_p = command_line; | ||
799 | |||
800 | parse_early_param(); | ||
801 | |||
802 | #ifdef CONFIG_X86_64 | 798 | #ifdef CONFIG_X86_64 |
803 | check_efer(); | 799 | check_efer(); |
804 | #endif | 800 | #endif |
@@ -834,11 +830,9 @@ void __init setup_arch(char **cmdline_p) | |||
834 | * VMware detection requires dmi to be available, so this | 830 | * VMware detection requires dmi to be available, so this |
835 | * needs to be done after dmi_scan_machine, for the BP. | 831 | * needs to be done after dmi_scan_machine, for the BP. |
836 | */ | 832 | */ |
837 | init_hypervisor(&boot_cpu_data); | 833 | init_hypervisor_platform(); |
838 | 834 | ||
839 | #ifdef CONFIG_X86_32 | 835 | x86_init.resources.probe_roms(); |
840 | probe_roms(); | ||
841 | #endif | ||
842 | 836 | ||
843 | /* after parse_early_param, so could debug it */ | 837 | /* after parse_early_param, so could debug it */ |
844 | insert_resource(&iomem_resource, &code_resource); | 838 | insert_resource(&iomem_resource, &code_resource); |
@@ -973,10 +967,11 @@ void __init setup_arch(char **cmdline_p) | |||
973 | kvmclock_init(); | 967 | kvmclock_init(); |
974 | #endif | 968 | #endif |
975 | 969 | ||
976 | paravirt_pagetable_setup_start(swapper_pg_dir); | 970 | x86_init.paging.pagetable_setup_start(swapper_pg_dir); |
977 | paging_init(); | 971 | paging_init(); |
978 | paravirt_pagetable_setup_done(swapper_pg_dir); | 972 | x86_init.paging.pagetable_setup_done(swapper_pg_dir); |
979 | paravirt_post_allocator_init(); | 973 | |
974 | tboot_probe(); | ||
980 | 975 | ||
981 | #ifdef CONFIG_X86_64 | 976 | #ifdef CONFIG_X86_64 |
982 | map_vsyscall(); | 977 | map_vsyscall(); |
@@ -993,13 +988,11 @@ void __init setup_arch(char **cmdline_p) | |||
993 | 988 | ||
994 | sfi_init(); | 989 | sfi_init(); |
995 | 990 | ||
996 | #if defined(CONFIG_X86_MPPARSE) || defined(CONFIG_X86_VISWS) | ||
997 | /* | 991 | /* |
998 | * get boot-time SMP configuration: | 992 | * get boot-time SMP configuration: |
999 | */ | 993 | */ |
1000 | if (smp_found_config) | 994 | if (smp_found_config) |
1001 | get_smp_config(); | 995 | get_smp_config(); |
1002 | #endif | ||
1003 | 996 | ||
1004 | prefill_possible_map(); | 997 | prefill_possible_map(); |
1005 | 998 | ||
@@ -1018,10 +1011,7 @@ void __init setup_arch(char **cmdline_p) | |||
1018 | e820_reserve_resources(); | 1011 | e820_reserve_resources(); |
1019 | e820_mark_nosave_regions(max_low_pfn); | 1012 | e820_mark_nosave_regions(max_low_pfn); |
1020 | 1013 | ||
1021 | #ifdef CONFIG_X86_32 | 1014 | x86_init.resources.reserve_resources(); |
1022 | request_resource(&iomem_resource, &video_ram_resource); | ||
1023 | #endif | ||
1024 | reserve_standard_io_resources(); | ||
1025 | 1015 | ||
1026 | e820_setup_gap(); | 1016 | e820_setup_gap(); |
1027 | 1017 | ||
@@ -1033,78 +1023,22 @@ void __init setup_arch(char **cmdline_p) | |||
1033 | conswitchp = &dummy_con; | 1023 | conswitchp = &dummy_con; |
1034 | #endif | 1024 | #endif |
1035 | #endif | 1025 | #endif |
1026 | x86_init.oem.banner(); | ||
1036 | } | 1027 | } |
1037 | 1028 | ||
1038 | #ifdef CONFIG_X86_32 | 1029 | #ifdef CONFIG_X86_32 |
1039 | 1030 | ||
1040 | /** | 1031 | static struct resource video_ram_resource = { |
1041 | * x86_quirk_intr_init - post gate setup interrupt initialisation | 1032 | .name = "Video RAM area", |
1042 | * | 1033 | .start = 0xa0000, |
1043 | * Description: | 1034 | .end = 0xbffff, |
1044 | * Fill in any interrupts that may have been left out by the general | 1035 | .flags = IORESOURCE_BUSY | IORESOURCE_MEM |
1045 | * init_IRQ() routine. interrupts having to do with the machine rather | ||
1046 | * than the devices on the I/O bus (like APIC interrupts in intel MP | ||
1047 | * systems) are started here. | ||
1048 | **/ | ||
1049 | void __init x86_quirk_intr_init(void) | ||
1050 | { | ||
1051 | if (x86_quirks->arch_intr_init) { | ||
1052 | if (x86_quirks->arch_intr_init()) | ||
1053 | return; | ||
1054 | } | ||
1055 | } | ||
1056 | |||
1057 | /** | ||
1058 | * x86_quirk_trap_init - initialise system specific traps | ||
1059 | * | ||
1060 | * Description: | ||
1061 | * Called as the final act of trap_init(). Used in VISWS to initialise | ||
1062 | * the various board specific APIC traps. | ||
1063 | **/ | ||
1064 | void __init x86_quirk_trap_init(void) | ||
1065 | { | ||
1066 | if (x86_quirks->arch_trap_init) { | ||
1067 | if (x86_quirks->arch_trap_init()) | ||
1068 | return; | ||
1069 | } | ||
1070 | } | ||
1071 | |||
1072 | static struct irqaction irq0 = { | ||
1073 | .handler = timer_interrupt, | ||
1074 | .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER, | ||
1075 | .name = "timer" | ||
1076 | }; | 1036 | }; |
1077 | 1037 | ||
1078 | /** | 1038 | void __init i386_reserve_resources(void) |
1079 | * x86_quirk_pre_time_init - do any specific initialisations before. | ||
1080 | * | ||
1081 | **/ | ||
1082 | void __init x86_quirk_pre_time_init(void) | ||
1083 | { | 1039 | { |
1084 | if (x86_quirks->arch_pre_time_init) | 1040 | request_resource(&iomem_resource, &video_ram_resource); |
1085 | x86_quirks->arch_pre_time_init(); | 1041 | reserve_standard_io_resources(); |
1086 | } | 1042 | } |
1087 | 1043 | ||
1088 | /** | ||
1089 | * x86_quirk_time_init - do any specific initialisations for the system timer. | ||
1090 | * | ||
1091 | * Description: | ||
1092 | * Must plug the system timer interrupt source at HZ into the IRQ listed | ||
1093 | * in irq_vectors.h:TIMER_IRQ | ||
1094 | **/ | ||
1095 | void __init x86_quirk_time_init(void) | ||
1096 | { | ||
1097 | if (x86_quirks->arch_time_init) { | ||
1098 | /* | ||
1099 | * A nonzero return code does not mean failure, it means | ||
1100 | * that the architecture quirk does not want any | ||
1101 | * generic (timer) setup to be performed after this: | ||
1102 | */ | ||
1103 | if (x86_quirks->arch_time_init()) | ||
1104 | return; | ||
1105 | } | ||
1106 | |||
1107 | irq0.mask = cpumask_of_cpu(0); | ||
1108 | setup_irq(0, &irq0); | ||
1109 | } | ||
1110 | #endif /* CONFIG_X86_32 */ | 1044 | #endif /* CONFIG_X86_32 */ |
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c index 07d81916f212..d559af913e1f 100644 --- a/arch/x86/kernel/setup_percpu.c +++ b/arch/x86/kernel/setup_percpu.c | |||
@@ -55,6 +55,7 @@ EXPORT_SYMBOL(__per_cpu_offset); | |||
55 | #define PERCPU_FIRST_CHUNK_RESERVE 0 | 55 | #define PERCPU_FIRST_CHUNK_RESERVE 0 |
56 | #endif | 56 | #endif |
57 | 57 | ||
58 | #ifdef CONFIG_X86_32 | ||
58 | /** | 59 | /** |
59 | * pcpu_need_numa - determine percpu allocation needs to consider NUMA | 60 | * pcpu_need_numa - determine percpu allocation needs to consider NUMA |
60 | * | 61 | * |
@@ -83,6 +84,7 @@ static bool __init pcpu_need_numa(void) | |||
83 | #endif | 84 | #endif |
84 | return false; | 85 | return false; |
85 | } | 86 | } |
87 | #endif | ||
86 | 88 | ||
87 | /** | 89 | /** |
88 | * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu | 90 | * pcpu_alloc_bootmem - NUMA friendly alloc_bootmem wrapper for percpu |
@@ -124,308 +126,35 @@ static void * __init pcpu_alloc_bootmem(unsigned int cpu, unsigned long size, | |||
124 | } | 126 | } |
125 | 127 | ||
126 | /* | 128 | /* |
127 | * Large page remap allocator | 129 | * Helpers for first chunk memory allocation |
128 | * | ||
129 | * This allocator uses PMD page as unit. A PMD page is allocated for | ||
130 | * each cpu and each is remapped into vmalloc area using PMD mapping. | ||
131 | * As PMD page is quite large, only part of it is used for the first | ||
132 | * chunk. Unused part is returned to the bootmem allocator. | ||
133 | * | ||
134 | * So, the PMD pages are mapped twice - once to the physical mapping | ||
135 | * and to the vmalloc area for the first percpu chunk. The double | ||
136 | * mapping does add one more PMD TLB entry pressure but still is much | ||
137 | * better than only using 4k mappings while still being NUMA friendly. | ||
138 | */ | 130 | */ |
139 | #ifdef CONFIG_NEED_MULTIPLE_NODES | 131 | static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align) |
140 | struct pcpul_ent { | ||
141 | unsigned int cpu; | ||
142 | void *ptr; | ||
143 | }; | ||
144 | |||
145 | static size_t pcpul_size; | ||
146 | static struct pcpul_ent *pcpul_map; | ||
147 | static struct vm_struct pcpul_vm; | ||
148 | |||
149 | static struct page * __init pcpul_get_page(unsigned int cpu, int pageno) | ||
150 | { | 132 | { |
151 | size_t off = (size_t)pageno << PAGE_SHIFT; | 133 | return pcpu_alloc_bootmem(cpu, size, align); |
152 | |||
153 | if (off >= pcpul_size) | ||
154 | return NULL; | ||
155 | |||
156 | return virt_to_page(pcpul_map[cpu].ptr + off); | ||
157 | } | 134 | } |
158 | 135 | ||
159 | static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) | 136 | static void __init pcpu_fc_free(void *ptr, size_t size) |
160 | { | 137 | { |
161 | size_t map_size, dyn_size; | 138 | free_bootmem(__pa(ptr), size); |
162 | unsigned int cpu; | ||
163 | int i, j; | ||
164 | ssize_t ret; | ||
165 | |||
166 | if (!chosen) { | ||
167 | size_t vm_size = VMALLOC_END - VMALLOC_START; | ||
168 | size_t tot_size = nr_cpu_ids * PMD_SIZE; | ||
169 | |||
170 | /* on non-NUMA, embedding is better */ | ||
171 | if (!pcpu_need_numa()) | ||
172 | return -EINVAL; | ||
173 | |||
174 | /* don't consume more than 20% of vmalloc area */ | ||
175 | if (tot_size > vm_size / 5) { | ||
176 | pr_info("PERCPU: too large chunk size %zuMB for " | ||
177 | "large page remap\n", tot_size >> 20); | ||
178 | return -EINVAL; | ||
179 | } | ||
180 | } | ||
181 | |||
182 | /* need PSE */ | ||
183 | if (!cpu_has_pse) { | ||
184 | pr_warning("PERCPU: lpage allocator requires PSE\n"); | ||
185 | return -EINVAL; | ||
186 | } | ||
187 | |||
188 | /* | ||
189 | * Currently supports only single page. Supporting multiple | ||
190 | * pages won't be too difficult if it ever becomes necessary. | ||
191 | */ | ||
192 | pcpul_size = PFN_ALIGN(static_size + PERCPU_MODULE_RESERVE + | ||
193 | PERCPU_DYNAMIC_RESERVE); | ||
194 | if (pcpul_size > PMD_SIZE) { | ||
195 | pr_warning("PERCPU: static data is larger than large page, " | ||
196 | "can't use large page\n"); | ||
197 | return -EINVAL; | ||
198 | } | ||
199 | dyn_size = pcpul_size - static_size - PERCPU_FIRST_CHUNK_RESERVE; | ||
200 | |||
201 | /* allocate pointer array and alloc large pages */ | ||
202 | map_size = PFN_ALIGN(nr_cpu_ids * sizeof(pcpul_map[0])); | ||
203 | pcpul_map = alloc_bootmem(map_size); | ||
204 | |||
205 | for_each_possible_cpu(cpu) { | ||
206 | pcpul_map[cpu].cpu = cpu; | ||
207 | pcpul_map[cpu].ptr = pcpu_alloc_bootmem(cpu, PMD_SIZE, | ||
208 | PMD_SIZE); | ||
209 | if (!pcpul_map[cpu].ptr) { | ||
210 | pr_warning("PERCPU: failed to allocate large page " | ||
211 | "for cpu%u\n", cpu); | ||
212 | goto enomem; | ||
213 | } | ||
214 | |||
215 | /* | ||
216 | * Only use pcpul_size bytes and give back the rest. | ||
217 | * | ||
218 | * Ingo: The 2MB up-rounding bootmem is needed to make | ||
219 | * sure the partial 2MB page is still fully RAM - it's | ||
220 | * not well-specified to have a PAT-incompatible area | ||
221 | * (unmapped RAM, device memory, etc.) in that hole. | ||
222 | */ | ||
223 | free_bootmem(__pa(pcpul_map[cpu].ptr + pcpul_size), | ||
224 | PMD_SIZE - pcpul_size); | ||
225 | |||
226 | memcpy(pcpul_map[cpu].ptr, __per_cpu_load, static_size); | ||
227 | } | ||
228 | |||
229 | /* allocate address and map */ | ||
230 | pcpul_vm.flags = VM_ALLOC; | ||
231 | pcpul_vm.size = nr_cpu_ids * PMD_SIZE; | ||
232 | vm_area_register_early(&pcpul_vm, PMD_SIZE); | ||
233 | |||
234 | for_each_possible_cpu(cpu) { | ||
235 | pmd_t *pmd, pmd_v; | ||
236 | |||
237 | pmd = populate_extra_pmd((unsigned long)pcpul_vm.addr + | ||
238 | cpu * PMD_SIZE); | ||
239 | pmd_v = pfn_pmd(page_to_pfn(virt_to_page(pcpul_map[cpu].ptr)), | ||
240 | PAGE_KERNEL_LARGE); | ||
241 | set_pmd(pmd, pmd_v); | ||
242 | } | ||
243 | |||
244 | /* we're ready, commit */ | ||
245 | pr_info("PERCPU: Remapped at %p with large pages, static data " | ||
246 | "%zu bytes\n", pcpul_vm.addr, static_size); | ||
247 | |||
248 | ret = pcpu_setup_first_chunk(pcpul_get_page, static_size, | ||
249 | PERCPU_FIRST_CHUNK_RESERVE, dyn_size, | ||
250 | PMD_SIZE, pcpul_vm.addr, NULL); | ||
251 | |||
252 | /* sort pcpul_map array for pcpu_lpage_remapped() */ | ||
253 | for (i = 0; i < nr_cpu_ids - 1; i++) | ||
254 | for (j = i + 1; j < nr_cpu_ids; j++) | ||
255 | if (pcpul_map[i].ptr > pcpul_map[j].ptr) { | ||
256 | struct pcpul_ent tmp = pcpul_map[i]; | ||
257 | pcpul_map[i] = pcpul_map[j]; | ||
258 | pcpul_map[j] = tmp; | ||
259 | } | ||
260 | |||
261 | return ret; | ||
262 | |||
263 | enomem: | ||
264 | for_each_possible_cpu(cpu) | ||
265 | if (pcpul_map[cpu].ptr) | ||
266 | free_bootmem(__pa(pcpul_map[cpu].ptr), pcpul_size); | ||
267 | free_bootmem(__pa(pcpul_map), map_size); | ||
268 | return -ENOMEM; | ||
269 | } | 139 | } |
270 | 140 | ||
271 | /** | 141 | static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) |
272 | * pcpu_lpage_remapped - determine whether a kaddr is in pcpul recycled area | ||
273 | * @kaddr: the kernel address in question | ||
274 | * | ||
275 | * Determine whether @kaddr falls in the pcpul recycled area. This is | ||
276 | * used by pageattr to detect VM aliases and break up the pcpu PMD | ||
277 | * mapping such that the same physical page is not mapped under | ||
278 | * different attributes. | ||
279 | * | ||
280 | * The recycled area is always at the tail of a partially used PMD | ||
281 | * page. | ||
282 | * | ||
283 | * RETURNS: | ||
284 | * Address of corresponding remapped pcpu address if match is found; | ||
285 | * otherwise, NULL. | ||
286 | */ | ||
287 | void *pcpu_lpage_remapped(void *kaddr) | ||
288 | { | 142 | { |
289 | void *pmd_addr = (void *)((unsigned long)kaddr & PMD_MASK); | 143 | #ifdef CONFIG_NEED_MULTIPLE_NODES |
290 | unsigned long offset = (unsigned long)kaddr & ~PMD_MASK; | 144 | if (early_cpu_to_node(from) == early_cpu_to_node(to)) |
291 | int left = 0, right = nr_cpu_ids - 1; | 145 | return LOCAL_DISTANCE; |
292 | int pos; | 146 | else |
293 | 147 | return REMOTE_DISTANCE; | |
294 | /* pcpul in use at all? */ | ||
295 | if (!pcpul_map) | ||
296 | return NULL; | ||
297 | |||
298 | /* okay, perform binary search */ | ||
299 | while (left <= right) { | ||
300 | pos = (left + right) / 2; | ||
301 | |||
302 | if (pcpul_map[pos].ptr < pmd_addr) | ||
303 | left = pos + 1; | ||
304 | else if (pcpul_map[pos].ptr > pmd_addr) | ||
305 | right = pos - 1; | ||
306 | else { | ||
307 | /* it shouldn't be in the area for the first chunk */ | ||
308 | WARN_ON(offset < pcpul_size); | ||
309 | |||
310 | return pcpul_vm.addr + | ||
311 | pcpul_map[pos].cpu * PMD_SIZE + offset; | ||
312 | } | ||
313 | } | ||
314 | |||
315 | return NULL; | ||
316 | } | ||
317 | #else | 148 | #else |
318 | static ssize_t __init setup_pcpu_lpage(size_t static_size, bool chosen) | 149 | return LOCAL_DISTANCE; |
319 | { | ||
320 | return -EINVAL; | ||
321 | } | ||
322 | #endif | 150 | #endif |
323 | |||
324 | /* | ||
325 | * Embedding allocator | ||
326 | * | ||
327 | * The first chunk is sized to just contain the static area plus | ||
328 | * module and dynamic reserves and embedded into linear physical | ||
329 | * mapping so that it can use PMD mapping without additional TLB | ||
330 | * pressure. | ||
331 | */ | ||
332 | static ssize_t __init setup_pcpu_embed(size_t static_size, bool chosen) | ||
333 | { | ||
334 | size_t reserve = PERCPU_MODULE_RESERVE + PERCPU_DYNAMIC_RESERVE; | ||
335 | |||
336 | /* | ||
337 | * If large page isn't supported, there's no benefit in doing | ||
338 | * this. Also, embedding allocation doesn't play well with | ||
339 | * NUMA. | ||
340 | */ | ||
341 | if (!chosen && (!cpu_has_pse || pcpu_need_numa())) | ||
342 | return -EINVAL; | ||
343 | |||
344 | return pcpu_embed_first_chunk(static_size, PERCPU_FIRST_CHUNK_RESERVE, | ||
345 | reserve - PERCPU_FIRST_CHUNK_RESERVE, -1); | ||
346 | } | 151 | } |
347 | 152 | ||
348 | /* | 153 | static void __init pcpup_populate_pte(unsigned long addr) |
349 | * 4k page allocator | ||
350 | * | ||
351 | * This is the basic allocator. Static percpu area is allocated | ||
352 | * page-by-page and most of initialization is done by the generic | ||
353 | * setup function. | ||
354 | */ | ||
355 | static struct page **pcpu4k_pages __initdata; | ||
356 | static int pcpu4k_nr_static_pages __initdata; | ||
357 | |||
358 | static struct page * __init pcpu4k_get_page(unsigned int cpu, int pageno) | ||
359 | { | ||
360 | if (pageno < pcpu4k_nr_static_pages) | ||
361 | return pcpu4k_pages[cpu * pcpu4k_nr_static_pages + pageno]; | ||
362 | return NULL; | ||
363 | } | ||
364 | |||
365 | static void __init pcpu4k_populate_pte(unsigned long addr) | ||
366 | { | 154 | { |
367 | populate_extra_pte(addr); | 155 | populate_extra_pte(addr); |
368 | } | 156 | } |
369 | 157 | ||
370 | static ssize_t __init setup_pcpu_4k(size_t static_size) | ||
371 | { | ||
372 | size_t pages_size; | ||
373 | unsigned int cpu; | ||
374 | int i, j; | ||
375 | ssize_t ret; | ||
376 | |||
377 | pcpu4k_nr_static_pages = PFN_UP(static_size); | ||
378 | |||
379 | /* unaligned allocations can't be freed, round up to page size */ | ||
380 | pages_size = PFN_ALIGN(pcpu4k_nr_static_pages * nr_cpu_ids | ||
381 | * sizeof(pcpu4k_pages[0])); | ||
382 | pcpu4k_pages = alloc_bootmem(pages_size); | ||
383 | |||
384 | /* allocate and copy */ | ||
385 | j = 0; | ||
386 | for_each_possible_cpu(cpu) | ||
387 | for (i = 0; i < pcpu4k_nr_static_pages; i++) { | ||
388 | void *ptr; | ||
389 | |||
390 | ptr = pcpu_alloc_bootmem(cpu, PAGE_SIZE, PAGE_SIZE); | ||
391 | if (!ptr) { | ||
392 | pr_warning("PERCPU: failed to allocate " | ||
393 | "4k page for cpu%u\n", cpu); | ||
394 | goto enomem; | ||
395 | } | ||
396 | |||
397 | memcpy(ptr, __per_cpu_load + i * PAGE_SIZE, PAGE_SIZE); | ||
398 | pcpu4k_pages[j++] = virt_to_page(ptr); | ||
399 | } | ||
400 | |||
401 | /* we're ready, commit */ | ||
402 | pr_info("PERCPU: Allocated %d 4k pages, static data %zu bytes\n", | ||
403 | pcpu4k_nr_static_pages, static_size); | ||
404 | |||
405 | ret = pcpu_setup_first_chunk(pcpu4k_get_page, static_size, | ||
406 | PERCPU_FIRST_CHUNK_RESERVE, -1, | ||
407 | -1, NULL, pcpu4k_populate_pte); | ||
408 | goto out_free_ar; | ||
409 | |||
410 | enomem: | ||
411 | while (--j >= 0) | ||
412 | free_bootmem(__pa(page_address(pcpu4k_pages[j])), PAGE_SIZE); | ||
413 | ret = -ENOMEM; | ||
414 | out_free_ar: | ||
415 | free_bootmem(__pa(pcpu4k_pages), pages_size); | ||
416 | return ret; | ||
417 | } | ||
418 | |||
419 | /* for explicit first chunk allocator selection */ | ||
420 | static char pcpu_chosen_alloc[16] __initdata; | ||
421 | |||
422 | static int __init percpu_alloc_setup(char *str) | ||
423 | { | ||
424 | strncpy(pcpu_chosen_alloc, str, sizeof(pcpu_chosen_alloc) - 1); | ||
425 | return 0; | ||
426 | } | ||
427 | early_param("percpu_alloc", percpu_alloc_setup); | ||
428 | |||
429 | static inline void setup_percpu_segment(int cpu) | 158 | static inline void setup_percpu_segment(int cpu) |
430 | { | 159 | { |
431 | #ifdef CONFIG_X86_32 | 160 | #ifdef CONFIG_X86_32 |
@@ -441,52 +170,49 @@ static inline void setup_percpu_segment(int cpu) | |||
441 | 170 | ||
442 | void __init setup_per_cpu_areas(void) | 171 | void __init setup_per_cpu_areas(void) |
443 | { | 172 | { |
444 | size_t static_size = __per_cpu_end - __per_cpu_start; | ||
445 | unsigned int cpu; | 173 | unsigned int cpu; |
446 | unsigned long delta; | 174 | unsigned long delta; |
447 | size_t pcpu_unit_size; | 175 | int rc; |
448 | ssize_t ret; | ||
449 | 176 | ||
450 | pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", | 177 | pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", |
451 | NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); | 178 | NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); |
452 | 179 | ||
453 | /* | 180 | /* |
454 | * Allocate percpu area. If PSE is supported, try to make use | 181 | * Allocate percpu area. Embedding allocator is our favorite; |
455 | * of large page mappings. Please read comments on top of | 182 | * however, on NUMA configurations, it can result in very |
456 | * each allocator for details. | 183 | * sparse unit mapping and vmalloc area isn't spacious enough |
184 | * on 32bit. Use page in that case. | ||
457 | */ | 185 | */ |
458 | ret = -EINVAL; | 186 | #ifdef CONFIG_X86_32 |
459 | if (strlen(pcpu_chosen_alloc)) { | 187 | if (pcpu_chosen_fc == PCPU_FC_AUTO && pcpu_need_numa()) |
460 | if (strcmp(pcpu_chosen_alloc, "4k")) { | 188 | pcpu_chosen_fc = PCPU_FC_PAGE; |
461 | if (!strcmp(pcpu_chosen_alloc, "lpage")) | 189 | #endif |
462 | ret = setup_pcpu_lpage(static_size, true); | 190 | rc = -EINVAL; |
463 | else if (!strcmp(pcpu_chosen_alloc, "embed")) | 191 | if (pcpu_chosen_fc != PCPU_FC_PAGE) { |
464 | ret = setup_pcpu_embed(static_size, true); | 192 | const size_t atom_size = cpu_has_pse ? PMD_SIZE : PAGE_SIZE; |
465 | else | 193 | const size_t dyn_size = PERCPU_MODULE_RESERVE + |
466 | pr_warning("PERCPU: unknown allocator %s " | 194 | PERCPU_DYNAMIC_RESERVE - PERCPU_FIRST_CHUNK_RESERVE; |
467 | "specified\n", pcpu_chosen_alloc); | 195 | |
468 | if (ret < 0) | 196 | rc = pcpu_embed_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, |
469 | pr_warning("PERCPU: %s allocator failed (%zd), " | 197 | dyn_size, atom_size, |
470 | "falling back to 4k\n", | 198 | pcpu_cpu_distance, |
471 | pcpu_chosen_alloc, ret); | 199 | pcpu_fc_alloc, pcpu_fc_free); |
472 | } | 200 | if (rc < 0) |
473 | } else { | 201 | pr_warning("PERCPU: %s allocator failed (%d), " |
474 | ret = setup_pcpu_lpage(static_size, false); | 202 | "falling back to page size\n", |
475 | if (ret < 0) | 203 | pcpu_fc_names[pcpu_chosen_fc], rc); |
476 | ret = setup_pcpu_embed(static_size, false); | ||
477 | } | 204 | } |
478 | if (ret < 0) | 205 | if (rc < 0) |
479 | ret = setup_pcpu_4k(static_size); | 206 | rc = pcpu_page_first_chunk(PERCPU_FIRST_CHUNK_RESERVE, |
480 | if (ret < 0) | 207 | pcpu_fc_alloc, pcpu_fc_free, |
481 | panic("cannot allocate static percpu area (%zu bytes, err=%zd)", | 208 | pcpup_populate_pte); |
482 | static_size, ret); | 209 | if (rc < 0) |
483 | 210 | panic("cannot initialize percpu area (err=%d)", rc); | |
484 | pcpu_unit_size = ret; | ||
485 | 211 | ||
486 | /* alrighty, percpu areas up and running */ | 212 | /* alrighty, percpu areas up and running */ |
487 | delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; | 213 | delta = (unsigned long)pcpu_base_addr - (unsigned long)__per_cpu_start; |
488 | for_each_possible_cpu(cpu) { | 214 | for_each_possible_cpu(cpu) { |
489 | per_cpu_offset(cpu) = delta + cpu * pcpu_unit_size; | 215 | per_cpu_offset(cpu) = delta + pcpu_unit_offsets[cpu]; |
490 | per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); | 216 | per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); |
491 | per_cpu(cpu_number, cpu) = cpu; | 217 | per_cpu(cpu_number, cpu) = cpu; |
492 | setup_percpu_segment(cpu); | 218 | setup_percpu_segment(cpu); |
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 4c578751e94e..6a44a76055ad 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c | |||
@@ -856,7 +856,7 @@ static void do_signal(struct pt_regs *regs) | |||
856 | void | 856 | void |
857 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | 857 | do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) |
858 | { | 858 | { |
859 | #ifdef CONFIG_X86_NEW_MCE | 859 | #ifdef CONFIG_X86_MCE |
860 | /* notify userspace of pending MCEs */ | 860 | /* notify userspace of pending MCEs */ |
861 | if (thread_info_flags & _TIF_MCE_NOTIFY) | 861 | if (thread_info_flags & _TIF_MCE_NOTIFY) |
862 | mce_notify_process(); | 862 | mce_notify_process(); |
@@ -869,6 +869,8 @@ do_notify_resume(struct pt_regs *regs, void *unused, __u32 thread_info_flags) | |||
869 | if (thread_info_flags & _TIF_NOTIFY_RESUME) { | 869 | if (thread_info_flags & _TIF_NOTIFY_RESUME) { |
870 | clear_thread_flag(TIF_NOTIFY_RESUME); | 870 | clear_thread_flag(TIF_NOTIFY_RESUME); |
871 | tracehook_notify_resume(regs); | 871 | tracehook_notify_resume(regs); |
872 | if (current->replacement_session_keyring) | ||
873 | key_replace_session_keyring(); | ||
872 | } | 874 | } |
873 | 875 | ||
874 | #ifdef CONFIG_X86_32 | 876 | #ifdef CONFIG_X86_32 |
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 2fecda69ee64..09c5e077dff7 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c | |||
@@ -47,6 +47,7 @@ | |||
47 | #include <linux/bootmem.h> | 47 | #include <linux/bootmem.h> |
48 | #include <linux/err.h> | 48 | #include <linux/err.h> |
49 | #include <linux/nmi.h> | 49 | #include <linux/nmi.h> |
50 | #include <linux/tboot.h> | ||
50 | 51 | ||
51 | #include <asm/acpi.h> | 52 | #include <asm/acpi.h> |
52 | #include <asm/desc.h> | 53 | #include <asm/desc.h> |
@@ -323,7 +324,7 @@ notrace static void __cpuinit start_secondary(void *unused) | |||
323 | /* enable local interrupts */ | 324 | /* enable local interrupts */ |
324 | local_irq_enable(); | 325 | local_irq_enable(); |
325 | 326 | ||
326 | setup_secondary_clock(); | 327 | x86_cpuinit.setup_percpu_clockev(); |
327 | 328 | ||
328 | wmb(); | 329 | wmb(); |
329 | cpu_idle(); | 330 | cpu_idle(); |
@@ -434,7 +435,8 @@ const struct cpumask *cpu_coregroup_mask(int cpu) | |||
434 | * For perf, we return last level cache shared map. | 435 | * For perf, we return last level cache shared map. |
435 | * And for power savings, we return cpu_core_map | 436 | * And for power savings, we return cpu_core_map |
436 | */ | 437 | */ |
437 | if (sched_mc_power_savings || sched_smt_power_savings) | 438 | if ((sched_mc_power_savings || sched_smt_power_savings) && |
439 | !(cpu_has(c, X86_FEATURE_AMD_DCM))) | ||
438 | return cpu_core_mask(cpu); | 440 | return cpu_core_mask(cpu); |
439 | else | 441 | else |
440 | return c->llc_shared_map; | 442 | return c->llc_shared_map; |
@@ -1112,13 +1114,26 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) | |||
1112 | 1114 | ||
1113 | printk(KERN_INFO "CPU%d: ", 0); | 1115 | printk(KERN_INFO "CPU%d: ", 0); |
1114 | print_cpu_info(&cpu_data(0)); | 1116 | print_cpu_info(&cpu_data(0)); |
1115 | setup_boot_clock(); | 1117 | x86_init.timers.setup_percpu_clockev(); |
1116 | 1118 | ||
1117 | if (is_uv_system()) | 1119 | if (is_uv_system()) |
1118 | uv_system_init(); | 1120 | uv_system_init(); |
1121 | |||
1122 | set_mtrr_aps_delayed_init(); | ||
1119 | out: | 1123 | out: |
1120 | preempt_enable(); | 1124 | preempt_enable(); |
1121 | } | 1125 | } |
1126 | |||
1127 | void arch_enable_nonboot_cpus_begin(void) | ||
1128 | { | ||
1129 | set_mtrr_aps_delayed_init(); | ||
1130 | } | ||
1131 | |||
1132 | void arch_enable_nonboot_cpus_end(void) | ||
1133 | { | ||
1134 | mtrr_aps_init(); | ||
1135 | } | ||
1136 | |||
1122 | /* | 1137 | /* |
1123 | * Early setup to make printk work. | 1138 | * Early setup to make printk work. |
1124 | */ | 1139 | */ |
@@ -1140,6 +1155,7 @@ void __init native_smp_cpus_done(unsigned int max_cpus) | |||
1140 | setup_ioapic_dest(); | 1155 | setup_ioapic_dest(); |
1141 | #endif | 1156 | #endif |
1142 | check_nmi_watchdog(); | 1157 | check_nmi_watchdog(); |
1158 | mtrr_aps_init(); | ||
1143 | } | 1159 | } |
1144 | 1160 | ||
1145 | static int __initdata setup_possible_cpus = -1; | 1161 | static int __initdata setup_possible_cpus = -1; |
@@ -1317,6 +1333,7 @@ void play_dead_common(void) | |||
1317 | void native_play_dead(void) | 1333 | void native_play_dead(void) |
1318 | { | 1334 | { |
1319 | play_dead_common(); | 1335 | play_dead_common(); |
1336 | tboot_shutdown(TB_SHUTDOWN_WFS); | ||
1320 | wbinvd_halt(); | 1337 | wbinvd_halt(); |
1321 | } | 1338 | } |
1322 | 1339 | ||
diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index e8b9863ef8c4..3149032ff107 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c | |||
@@ -4,6 +4,7 @@ | |||
4 | #include <linux/sched.h> | 4 | #include <linux/sched.h> |
5 | #include <linux/mm.h> | 5 | #include <linux/mm.h> |
6 | #include <linux/ptrace.h> | 6 | #include <linux/ptrace.h> |
7 | #include <asm/desc.h> | ||
7 | 8 | ||
8 | unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs) | 9 | unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *regs) |
9 | { | 10 | { |
@@ -23,7 +24,7 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re | |||
23 | * and APM bios ones we just ignore here. | 24 | * and APM bios ones we just ignore here. |
24 | */ | 25 | */ |
25 | if ((seg & SEGMENT_TI_MASK) == SEGMENT_LDT) { | 26 | if ((seg & SEGMENT_TI_MASK) == SEGMENT_LDT) { |
26 | u32 *desc; | 27 | struct desc_struct *desc; |
27 | unsigned long base; | 28 | unsigned long base; |
28 | 29 | ||
29 | seg &= ~7UL; | 30 | seg &= ~7UL; |
@@ -33,12 +34,10 @@ unsigned long convert_ip_to_linear(struct task_struct *child, struct pt_regs *re | |||
33 | addr = -1L; /* bogus selector, access would fault */ | 34 | addr = -1L; /* bogus selector, access would fault */ |
34 | else { | 35 | else { |
35 | desc = child->mm->context.ldt + seg; | 36 | desc = child->mm->context.ldt + seg; |
36 | base = ((desc[0] >> 16) | | 37 | base = get_desc_base(desc); |
37 | ((desc[1] & 0xff) << 16) | | ||
38 | (desc[1] & 0xff000000)); | ||
39 | 38 | ||
40 | /* 16-bit code segment? */ | 39 | /* 16-bit code segment? */ |
41 | if (!((desc[1] >> 22) & 1)) | 40 | if (!desc->d) |
42 | addr &= 0xffff; | 41 | addr &= 0xffff; |
43 | addr += base; | 42 | addr += base; |
44 | } | 43 | } |
diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index 6bc211accf08..45e00eb09c3a 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c | |||
@@ -18,9 +18,9 @@ | |||
18 | #include <asm/ia32.h> | 18 | #include <asm/ia32.h> |
19 | #include <asm/syscalls.h> | 19 | #include <asm/syscalls.h> |
20 | 20 | ||
21 | asmlinkage long sys_mmap(unsigned long addr, unsigned long len, | 21 | SYSCALL_DEFINE6(mmap, unsigned long, addr, unsigned long, len, |
22 | unsigned long prot, unsigned long flags, | 22 | unsigned long, prot, unsigned long, flags, |
23 | unsigned long fd, unsigned long off) | 23 | unsigned long, fd, unsigned long, off) |
24 | { | 24 | { |
25 | long error; | 25 | long error; |
26 | struct file *file; | 26 | struct file *file; |
@@ -226,7 +226,7 @@ bottomup: | |||
226 | } | 226 | } |
227 | 227 | ||
228 | 228 | ||
229 | asmlinkage long sys_uname(struct new_utsname __user *name) | 229 | SYSCALL_DEFINE1(uname, struct new_utsname __user *, name) |
230 | { | 230 | { |
231 | int err; | 231 | int err; |
232 | down_read(&uts_sem); | 232 | down_read(&uts_sem); |
diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c new file mode 100644 index 000000000000..86c9f91b48ae --- /dev/null +++ b/arch/x86/kernel/tboot.c | |||
@@ -0,0 +1,447 @@ | |||
1 | /* | ||
2 | * tboot.c: main implementation of helper functions used by kernel for | ||
3 | * runtime support of Intel(R) Trusted Execution Technology | ||
4 | * | ||
5 | * Copyright (c) 2006-2009, Intel Corporation | ||
6 | * | ||
7 | * This program is free software; you can redistribute it and/or modify it | ||
8 | * under the terms and conditions of the GNU General Public License, | ||
9 | * version 2, as published by the Free Software Foundation. | ||
10 | * | ||
11 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
14 | * more details. | ||
15 | * | ||
16 | * You should have received a copy of the GNU General Public License along with | ||
17 | * this program; if not, write to the Free Software Foundation, Inc., | ||
18 | * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. | ||
19 | * | ||
20 | */ | ||
21 | |||
22 | #include <linux/dma_remapping.h> | ||
23 | #include <linux/init_task.h> | ||
24 | #include <linux/spinlock.h> | ||
25 | #include <linux/delay.h> | ||
26 | #include <linux/sched.h> | ||
27 | #include <linux/init.h> | ||
28 | #include <linux/dmar.h> | ||
29 | #include <linux/cpu.h> | ||
30 | #include <linux/pfn.h> | ||
31 | #include <linux/mm.h> | ||
32 | #include <linux/tboot.h> | ||
33 | |||
34 | #include <asm/trampoline.h> | ||
35 | #include <asm/processor.h> | ||
36 | #include <asm/bootparam.h> | ||
37 | #include <asm/pgtable.h> | ||
38 | #include <asm/pgalloc.h> | ||
39 | #include <asm/fixmap.h> | ||
40 | #include <asm/proto.h> | ||
41 | #include <asm/setup.h> | ||
42 | #include <asm/e820.h> | ||
43 | #include <asm/io.h> | ||
44 | |||
45 | #include "acpi/realmode/wakeup.h" | ||
46 | |||
47 | /* Global pointer to shared data; NULL means no measured launch. */ | ||
48 | struct tboot *tboot __read_mostly; | ||
49 | |||
50 | /* timeout for APs (in secs) to enter wait-for-SIPI state during shutdown */ | ||
51 | #define AP_WAIT_TIMEOUT 1 | ||
52 | |||
53 | #undef pr_fmt | ||
54 | #define pr_fmt(fmt) "tboot: " fmt | ||
55 | |||
56 | static u8 tboot_uuid[16] __initdata = TBOOT_UUID; | ||
57 | |||
58 | void __init tboot_probe(void) | ||
59 | { | ||
60 | /* Look for valid page-aligned address for shared page. */ | ||
61 | if (!boot_params.tboot_addr) | ||
62 | return; | ||
63 | /* | ||
64 | * also verify that it is mapped as we expect it before calling | ||
65 | * set_fixmap(), to reduce chance of garbage value causing crash | ||
66 | */ | ||
67 | if (!e820_any_mapped(boot_params.tboot_addr, | ||
68 | boot_params.tboot_addr, E820_RESERVED)) { | ||
69 | pr_warning("non-0 tboot_addr but it is not of type E820_RESERVED\n"); | ||
70 | return; | ||
71 | } | ||
72 | |||
73 | /* only a natively booted kernel should be using TXT */ | ||
74 | if (paravirt_enabled()) { | ||
75 | pr_warning("non-0 tboot_addr but pv_ops is enabled\n"); | ||
76 | return; | ||
77 | } | ||
78 | |||
79 | /* Map and check for tboot UUID. */ | ||
80 | set_fixmap(FIX_TBOOT_BASE, boot_params.tboot_addr); | ||
81 | tboot = (struct tboot *)fix_to_virt(FIX_TBOOT_BASE); | ||
82 | if (memcmp(&tboot_uuid, &tboot->uuid, sizeof(tboot->uuid))) { | ||
83 | pr_warning("tboot at 0x%llx is invalid\n", | ||
84 | boot_params.tboot_addr); | ||
85 | tboot = NULL; | ||
86 | return; | ||
87 | } | ||
88 | if (tboot->version < 5) { | ||
89 | pr_warning("tboot version is invalid: %u\n", tboot->version); | ||
90 | tboot = NULL; | ||
91 | return; | ||
92 | } | ||
93 | |||
94 | pr_info("found shared page at phys addr 0x%llx:\n", | ||
95 | boot_params.tboot_addr); | ||
96 | pr_debug("version: %d\n", tboot->version); | ||
97 | pr_debug("log_addr: 0x%08x\n", tboot->log_addr); | ||
98 | pr_debug("shutdown_entry: 0x%x\n", tboot->shutdown_entry); | ||
99 | pr_debug("tboot_base: 0x%08x\n", tboot->tboot_base); | ||
100 | pr_debug("tboot_size: 0x%x\n", tboot->tboot_size); | ||
101 | } | ||
102 | |||
103 | static pgd_t *tboot_pg_dir; | ||
104 | static struct mm_struct tboot_mm = { | ||
105 | .mm_rb = RB_ROOT, | ||
106 | .pgd = swapper_pg_dir, | ||
107 | .mm_users = ATOMIC_INIT(2), | ||
108 | .mm_count = ATOMIC_INIT(1), | ||
109 | .mmap_sem = __RWSEM_INITIALIZER(init_mm.mmap_sem), | ||
110 | .page_table_lock = __SPIN_LOCK_UNLOCKED(init_mm.page_table_lock), | ||
111 | .mmlist = LIST_HEAD_INIT(init_mm.mmlist), | ||
112 | .cpu_vm_mask = CPU_MASK_ALL, | ||
113 | }; | ||
114 | |||
115 | static inline void switch_to_tboot_pt(void) | ||
116 | { | ||
117 | write_cr3(virt_to_phys(tboot_pg_dir)); | ||
118 | } | ||
119 | |||
120 | static int map_tboot_page(unsigned long vaddr, unsigned long pfn, | ||
121 | pgprot_t prot) | ||
122 | { | ||
123 | pgd_t *pgd; | ||
124 | pud_t *pud; | ||
125 | pmd_t *pmd; | ||
126 | pte_t *pte; | ||
127 | |||
128 | pgd = pgd_offset(&tboot_mm, vaddr); | ||
129 | pud = pud_alloc(&tboot_mm, pgd, vaddr); | ||
130 | if (!pud) | ||
131 | return -1; | ||
132 | pmd = pmd_alloc(&tboot_mm, pud, vaddr); | ||
133 | if (!pmd) | ||
134 | return -1; | ||
135 | pte = pte_alloc_map(&tboot_mm, pmd, vaddr); | ||
136 | if (!pte) | ||
137 | return -1; | ||
138 | set_pte_at(&tboot_mm, vaddr, pte, pfn_pte(pfn, prot)); | ||
139 | pte_unmap(pte); | ||
140 | return 0; | ||
141 | } | ||
142 | |||
143 | static int map_tboot_pages(unsigned long vaddr, unsigned long start_pfn, | ||
144 | unsigned long nr) | ||
145 | { | ||
146 | /* Reuse the original kernel mapping */ | ||
147 | tboot_pg_dir = pgd_alloc(&tboot_mm); | ||
148 | if (!tboot_pg_dir) | ||
149 | return -1; | ||
150 | |||
151 | for (; nr > 0; nr--, vaddr += PAGE_SIZE, start_pfn++) { | ||
152 | if (map_tboot_page(vaddr, start_pfn, PAGE_KERNEL_EXEC)) | ||
153 | return -1; | ||
154 | } | ||
155 | |||
156 | return 0; | ||
157 | } | ||
158 | |||
159 | static void tboot_create_trampoline(void) | ||
160 | { | ||
161 | u32 map_base, map_size; | ||
162 | |||
163 | /* Create identity map for tboot shutdown code. */ | ||
164 | map_base = PFN_DOWN(tboot->tboot_base); | ||
165 | map_size = PFN_UP(tboot->tboot_size); | ||
166 | if (map_tboot_pages(map_base << PAGE_SHIFT, map_base, map_size)) | ||
167 | panic("tboot: Error mapping tboot pages (mfns) @ 0x%x, 0x%x\n", | ||
168 | map_base, map_size); | ||
169 | } | ||
170 | |||
171 | #ifdef CONFIG_ACPI_SLEEP | ||
172 | |||
173 | static void add_mac_region(phys_addr_t start, unsigned long size) | ||
174 | { | ||
175 | struct tboot_mac_region *mr; | ||
176 | phys_addr_t end = start + size; | ||
177 | |||
178 | if (start && size) { | ||
179 | mr = &tboot->mac_regions[tboot->num_mac_regions++]; | ||
180 | mr->start = round_down(start, PAGE_SIZE); | ||
181 | mr->size = round_up(end, PAGE_SIZE) - mr->start; | ||
182 | } | ||
183 | } | ||
184 | |||
185 | static int tboot_setup_sleep(void) | ||
186 | { | ||
187 | tboot->num_mac_regions = 0; | ||
188 | |||
189 | /* S3 resume code */ | ||
190 | add_mac_region(acpi_wakeup_address, WAKEUP_SIZE); | ||
191 | |||
192 | #ifdef CONFIG_X86_TRAMPOLINE | ||
193 | /* AP trampoline code */ | ||
194 | add_mac_region(virt_to_phys(trampoline_base), TRAMPOLINE_SIZE); | ||
195 | #endif | ||
196 | |||
197 | /* kernel code + data + bss */ | ||
198 | add_mac_region(virt_to_phys(_text), _end - _text); | ||
199 | |||
200 | tboot->acpi_sinfo.kernel_s3_resume_vector = acpi_wakeup_address; | ||
201 | |||
202 | return 0; | ||
203 | } | ||
204 | |||
205 | #else /* no CONFIG_ACPI_SLEEP */ | ||
206 | |||
207 | static int tboot_setup_sleep(void) | ||
208 | { | ||
209 | /* S3 shutdown requested, but S3 not supported by the kernel... */ | ||
210 | BUG(); | ||
211 | return -1; | ||
212 | } | ||
213 | |||
214 | #endif | ||
215 | |||
216 | void tboot_shutdown(u32 shutdown_type) | ||
217 | { | ||
218 | void (*shutdown)(void); | ||
219 | |||
220 | if (!tboot_enabled()) | ||
221 | return; | ||
222 | |||
223 | /* | ||
224 | * if we're being called before the 1:1 mapping is set up then just | ||
225 | * return and let the normal shutdown happen; this should only be | ||
226 | * due to very early panic() | ||
227 | */ | ||
228 | if (!tboot_pg_dir) | ||
229 | return; | ||
230 | |||
231 | /* if this is S3 then set regions to MAC */ | ||
232 | if (shutdown_type == TB_SHUTDOWN_S3) | ||
233 | if (tboot_setup_sleep()) | ||
234 | return; | ||
235 | |||
236 | tboot->shutdown_type = shutdown_type; | ||
237 | |||
238 | switch_to_tboot_pt(); | ||
239 | |||
240 | shutdown = (void(*)(void))(unsigned long)tboot->shutdown_entry; | ||
241 | shutdown(); | ||
242 | |||
243 | /* should not reach here */ | ||
244 | while (1) | ||
245 | halt(); | ||
246 | } | ||
247 | |||
248 | static void tboot_copy_fadt(const struct acpi_table_fadt *fadt) | ||
249 | { | ||
250 | #define TB_COPY_GAS(tbg, g) \ | ||
251 | tbg.space_id = g.space_id; \ | ||
252 | tbg.bit_width = g.bit_width; \ | ||
253 | tbg.bit_offset = g.bit_offset; \ | ||
254 | tbg.access_width = g.access_width; \ | ||
255 | tbg.address = g.address; | ||
256 | |||
257 | TB_COPY_GAS(tboot->acpi_sinfo.pm1a_cnt_blk, fadt->xpm1a_control_block); | ||
258 | TB_COPY_GAS(tboot->acpi_sinfo.pm1b_cnt_blk, fadt->xpm1b_control_block); | ||
259 | TB_COPY_GAS(tboot->acpi_sinfo.pm1a_evt_blk, fadt->xpm1a_event_block); | ||
260 | TB_COPY_GAS(tboot->acpi_sinfo.pm1b_evt_blk, fadt->xpm1b_event_block); | ||
261 | |||
262 | /* | ||
263 | * We need phys addr of waking vector, but can't use virt_to_phys() on | ||
264 | * &acpi_gbl_FACS because it is ioremap'ed, so calc from FACS phys | ||
265 | * addr. | ||
266 | */ | ||
267 | tboot->acpi_sinfo.wakeup_vector = fadt->facs + | ||
268 | offsetof(struct acpi_table_facs, firmware_waking_vector); | ||
269 | } | ||
270 | |||
271 | void tboot_sleep(u8 sleep_state, u32 pm1a_control, u32 pm1b_control) | ||
272 | { | ||
273 | static u32 acpi_shutdown_map[ACPI_S_STATE_COUNT] = { | ||
274 | /* S0,1,2: */ -1, -1, -1, | ||
275 | /* S3: */ TB_SHUTDOWN_S3, | ||
276 | /* S4: */ TB_SHUTDOWN_S4, | ||
277 | /* S5: */ TB_SHUTDOWN_S5 }; | ||
278 | |||
279 | if (!tboot_enabled()) | ||
280 | return; | ||
281 | |||
282 | tboot_copy_fadt(&acpi_gbl_FADT); | ||
283 | tboot->acpi_sinfo.pm1a_cnt_val = pm1a_control; | ||
284 | tboot->acpi_sinfo.pm1b_cnt_val = pm1b_control; | ||
285 | /* we always use the 32b wakeup vector */ | ||
286 | tboot->acpi_sinfo.vector_width = 32; | ||
287 | |||
288 | if (sleep_state >= ACPI_S_STATE_COUNT || | ||
289 | acpi_shutdown_map[sleep_state] == -1) { | ||
290 | pr_warning("unsupported sleep state 0x%x\n", sleep_state); | ||
291 | return; | ||
292 | } | ||
293 | |||
294 | tboot_shutdown(acpi_shutdown_map[sleep_state]); | ||
295 | } | ||
296 | |||
297 | static atomic_t ap_wfs_count; | ||
298 | |||
299 | static int tboot_wait_for_aps(int num_aps) | ||
300 | { | ||
301 | unsigned long timeout; | ||
302 | |||
303 | timeout = AP_WAIT_TIMEOUT*HZ; | ||
304 | while (atomic_read((atomic_t *)&tboot->num_in_wfs) != num_aps && | ||
305 | timeout) { | ||
306 | mdelay(1); | ||
307 | timeout--; | ||
308 | } | ||
309 | |||
310 | if (timeout) | ||
311 | pr_warning("tboot wait for APs timeout\n"); | ||
312 | |||
313 | return !(atomic_read((atomic_t *)&tboot->num_in_wfs) == num_aps); | ||
314 | } | ||
315 | |||
316 | static int __cpuinit tboot_cpu_callback(struct notifier_block *nfb, | ||
317 | unsigned long action, void *hcpu) | ||
318 | { | ||
319 | switch (action) { | ||
320 | case CPU_DYING: | ||
321 | atomic_inc(&ap_wfs_count); | ||
322 | if (num_online_cpus() == 1) | ||
323 | if (tboot_wait_for_aps(atomic_read(&ap_wfs_count))) | ||
324 | return NOTIFY_BAD; | ||
325 | break; | ||
326 | } | ||
327 | return NOTIFY_OK; | ||
328 | } | ||
329 | |||
330 | static struct notifier_block tboot_cpu_notifier __cpuinitdata = | ||
331 | { | ||
332 | .notifier_call = tboot_cpu_callback, | ||
333 | }; | ||
334 | |||
335 | static __init int tboot_late_init(void) | ||
336 | { | ||
337 | if (!tboot_enabled()) | ||
338 | return 0; | ||
339 | |||
340 | tboot_create_trampoline(); | ||
341 | |||
342 | atomic_set(&ap_wfs_count, 0); | ||
343 | register_hotcpu_notifier(&tboot_cpu_notifier); | ||
344 | return 0; | ||
345 | } | ||
346 | |||
347 | late_initcall(tboot_late_init); | ||
348 | |||
349 | /* | ||
350 | * TXT configuration registers (offsets from TXT_{PUB, PRIV}_CONFIG_REGS_BASE) | ||
351 | */ | ||
352 | |||
353 | #define TXT_PUB_CONFIG_REGS_BASE 0xfed30000 | ||
354 | #define TXT_PRIV_CONFIG_REGS_BASE 0xfed20000 | ||
355 | |||
356 | /* # pages for each config regs space - used by fixmap */ | ||
357 | #define NR_TXT_CONFIG_PAGES ((TXT_PUB_CONFIG_REGS_BASE - \ | ||
358 | TXT_PRIV_CONFIG_REGS_BASE) >> PAGE_SHIFT) | ||
359 | |||
360 | /* offsets from pub/priv config space */ | ||
361 | #define TXTCR_HEAP_BASE 0x0300 | ||
362 | #define TXTCR_HEAP_SIZE 0x0308 | ||
363 | |||
364 | #define SHA1_SIZE 20 | ||
365 | |||
366 | struct sha1_hash { | ||
367 | u8 hash[SHA1_SIZE]; | ||
368 | }; | ||
369 | |||
370 | struct sinit_mle_data { | ||
371 | u32 version; /* currently 6 */ | ||
372 | struct sha1_hash bios_acm_id; | ||
373 | u32 edx_senter_flags; | ||
374 | u64 mseg_valid; | ||
375 | struct sha1_hash sinit_hash; | ||
376 | struct sha1_hash mle_hash; | ||
377 | struct sha1_hash stm_hash; | ||
378 | struct sha1_hash lcp_policy_hash; | ||
379 | u32 lcp_policy_control; | ||
380 | u32 rlp_wakeup_addr; | ||
381 | u32 reserved; | ||
382 | u32 num_mdrs; | ||
383 | u32 mdrs_off; | ||
384 | u32 num_vtd_dmars; | ||
385 | u32 vtd_dmars_off; | ||
386 | } __packed; | ||
387 | |||
388 | struct acpi_table_header *tboot_get_dmar_table(struct acpi_table_header *dmar_tbl) | ||
389 | { | ||
390 | void *heap_base, *heap_ptr, *config; | ||
391 | |||
392 | if (!tboot_enabled()) | ||
393 | return dmar_tbl; | ||
394 | |||
395 | /* | ||
396 | * ACPI tables may not be DMA protected by tboot, so use DMAR copy | ||
397 | * SINIT saved in SinitMleData in TXT heap (which is DMA protected) | ||
398 | */ | ||
399 | |||
400 | /* map config space in order to get heap addr */ | ||
401 | config = ioremap(TXT_PUB_CONFIG_REGS_BASE, NR_TXT_CONFIG_PAGES * | ||
402 | PAGE_SIZE); | ||
403 | if (!config) | ||
404 | return NULL; | ||
405 | |||
406 | /* now map TXT heap */ | ||
407 | heap_base = ioremap(*(u64 *)(config + TXTCR_HEAP_BASE), | ||
408 | *(u64 *)(config + TXTCR_HEAP_SIZE)); | ||
409 | iounmap(config); | ||
410 | if (!heap_base) | ||
411 | return NULL; | ||
412 | |||
413 | /* walk heap to SinitMleData */ | ||
414 | /* skip BiosData */ | ||
415 | heap_ptr = heap_base + *(u64 *)heap_base; | ||
416 | /* skip OsMleData */ | ||
417 | heap_ptr += *(u64 *)heap_ptr; | ||
418 | /* skip OsSinitData */ | ||
419 | heap_ptr += *(u64 *)heap_ptr; | ||
420 | /* now points to SinitMleDataSize; set to SinitMleData */ | ||
421 | heap_ptr += sizeof(u64); | ||
422 | /* get addr of DMAR table */ | ||
423 | dmar_tbl = (struct acpi_table_header *)(heap_ptr + | ||
424 | ((struct sinit_mle_data *)heap_ptr)->vtd_dmars_off - | ||
425 | sizeof(u64)); | ||
426 | |||
427 | /* don't unmap heap because dmar.c needs access to this */ | ||
428 | |||
429 | return dmar_tbl; | ||
430 | } | ||
431 | |||
432 | int tboot_force_iommu(void) | ||
433 | { | ||
434 | if (!tboot_enabled()) | ||
435 | return 0; | ||
436 | |||
437 | if (no_iommu || swiotlb || dmar_disabled) | ||
438 | pr_warning("Forcing Intel-IOMMU to enabled\n"); | ||
439 | |||
440 | dmar_disabled = 0; | ||
441 | #ifdef CONFIG_SWIOTLB | ||
442 | swiotlb = 0; | ||
443 | #endif | ||
444 | no_iommu = 0; | ||
445 | |||
446 | return 1; | ||
447 | } | ||
diff --git a/arch/x86/kernel/time.c b/arch/x86/kernel/time.c new file mode 100644 index 000000000000..e293ac56c723 --- /dev/null +++ b/arch/x86/kernel/time.c | |||
@@ -0,0 +1,121 @@ | |||
1 | /* | ||
2 | * Copyright (c) 1991,1992,1995 Linus Torvalds | ||
3 | * Copyright (c) 1994 Alan Modra | ||
4 | * Copyright (c) 1995 Markus Kuhn | ||
5 | * Copyright (c) 1996 Ingo Molnar | ||
6 | * Copyright (c) 1998 Andrea Arcangeli | ||
7 | * Copyright (c) 2002,2006 Vojtech Pavlik | ||
8 | * Copyright (c) 2003 Andi Kleen | ||
9 | * | ||
10 | */ | ||
11 | |||
12 | #include <linux/clockchips.h> | ||
13 | #include <linux/interrupt.h> | ||
14 | #include <linux/time.h> | ||
15 | #include <linux/mca.h> | ||
16 | |||
17 | #include <asm/vsyscall.h> | ||
18 | #include <asm/x86_init.h> | ||
19 | #include <asm/i8259.h> | ||
20 | #include <asm/i8253.h> | ||
21 | #include <asm/timer.h> | ||
22 | #include <asm/hpet.h> | ||
23 | #include <asm/time.h> | ||
24 | |||
25 | #if defined(CONFIG_X86_32) && defined(CONFIG_X86_IO_APIC) | ||
26 | int timer_ack; | ||
27 | #endif | ||
28 | |||
29 | #ifdef CONFIG_X86_64 | ||
30 | volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; | ||
31 | #endif | ||
32 | |||
33 | unsigned long profile_pc(struct pt_regs *regs) | ||
34 | { | ||
35 | unsigned long pc = instruction_pointer(regs); | ||
36 | |||
37 | if (!user_mode_vm(regs) && in_lock_functions(pc)) { | ||
38 | #ifdef CONFIG_FRAME_POINTER | ||
39 | return *(unsigned long *)(regs->bp + sizeof(long)); | ||
40 | #else | ||
41 | unsigned long *sp = (unsigned long *)regs->sp; | ||
42 | /* | ||
43 | * Return address is either directly at stack pointer | ||
44 | * or above a saved flags. Eflags has bits 22-31 zero, | ||
45 | * kernel addresses don't. | ||
46 | */ | ||
47 | if (sp[0] >> 22) | ||
48 | return sp[0]; | ||
49 | if (sp[1] >> 22) | ||
50 | return sp[1]; | ||
51 | #endif | ||
52 | } | ||
53 | return pc; | ||
54 | } | ||
55 | EXPORT_SYMBOL(profile_pc); | ||
56 | |||
57 | /* | ||
58 | * Default timer interrupt handler for PIT/HPET | ||
59 | */ | ||
60 | static irqreturn_t timer_interrupt(int irq, void *dev_id) | ||
61 | { | ||
62 | /* Keep nmi watchdog up to date */ | ||
63 | inc_irq_stat(irq0_irqs); | ||
64 | |||
65 | /* Optimized out for !IO_APIC and x86_64 */ | ||
66 | if (timer_ack) { | ||
67 | /* | ||
68 | * Subtle, when I/O APICs are used we have to ack timer IRQ | ||
69 | * manually to deassert NMI lines for the watchdog if run | ||
70 | * on an 82489DX-based system. | ||
71 | */ | ||
72 | spin_lock(&i8259A_lock); | ||
73 | outb(0x0c, PIC_MASTER_OCW3); | ||
74 | /* Ack the IRQ; AEOI will end it automatically. */ | ||
75 | inb(PIC_MASTER_POLL); | ||
76 | spin_unlock(&i8259A_lock); | ||
77 | } | ||
78 | |||
79 | global_clock_event->event_handler(global_clock_event); | ||
80 | |||
81 | /* MCA bus quirk: Acknowledge irq0 by setting bit 7 in port 0x61 */ | ||
82 | if (MCA_bus) | ||
83 | outb_p(inb_p(0x61)| 0x80, 0x61); | ||
84 | |||
85 | return IRQ_HANDLED; | ||
86 | } | ||
87 | |||
88 | static struct irqaction irq0 = { | ||
89 | .handler = timer_interrupt, | ||
90 | .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER, | ||
91 | .name = "timer" | ||
92 | }; | ||
93 | |||
94 | void __init setup_default_timer_irq(void) | ||
95 | { | ||
96 | irq0.mask = cpumask_of_cpu(0); | ||
97 | setup_irq(0, &irq0); | ||
98 | } | ||
99 | |||
100 | /* Default timer init function */ | ||
101 | void __init hpet_time_init(void) | ||
102 | { | ||
103 | if (!hpet_enable()) | ||
104 | setup_pit_timer(); | ||
105 | setup_default_timer_irq(); | ||
106 | } | ||
107 | |||
108 | static __init void x86_late_time_init(void) | ||
109 | { | ||
110 | x86_init.timers.timer_init(); | ||
111 | tsc_init(); | ||
112 | } | ||
113 | |||
114 | /* | ||
115 | * Initialize TSC and delay the periodic timer init to | ||
116 | * late x86_late_time_init() so ioremap works. | ||
117 | */ | ||
118 | void __init time_init(void) | ||
119 | { | ||
120 | late_time_init = x86_late_time_init; | ||
121 | } | ||
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c deleted file mode 100644 index 5c5d87f0b2e1..000000000000 --- a/arch/x86/kernel/time_32.c +++ /dev/null | |||
@@ -1,137 +0,0 @@ | |||
1 | /* | ||
2 | * Copyright (C) 1991, 1992, 1995 Linus Torvalds | ||
3 | * | ||
4 | * This file contains the PC-specific time handling details: | ||
5 | * reading the RTC at bootup, etc.. | ||
6 | * 1994-07-02 Alan Modra | ||
7 | * fixed set_rtc_mmss, fixed time.year for >= 2000, new mktime | ||
8 | * 1995-03-26 Markus Kuhn | ||
9 | * fixed 500 ms bug at call to set_rtc_mmss, fixed DS12887 | ||
10 | * precision CMOS clock update | ||
11 | * 1996-05-03 Ingo Molnar | ||
12 | * fixed time warps in do_[slow|fast]_gettimeoffset() | ||
13 | * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 | ||
14 | * "A Kernel Model for Precision Timekeeping" by Dave Mills | ||
15 | * 1998-09-05 (Various) | ||
16 | * More robust do_fast_gettimeoffset() algorithm implemented | ||
17 | * (works with APM, Cyrix 6x86MX and Centaur C6), | ||
18 | * monotonic gettimeofday() with fast_get_timeoffset(), | ||
19 | * drift-proof precision TSC calibration on boot | ||
20 | * (C. Scott Ananian <cananian@alumni.princeton.edu>, Andrew D. | ||
21 | * Balsa <andrebalsa@altern.org>, Philip Gladstone <philip@raptor.com>; | ||
22 | * ported from 2.0.35 Jumbo-9 by Michael Krause <m.krause@tu-harburg.de>). | ||
23 | * 1998-12-16 Andrea Arcangeli | ||
24 | * Fixed Jumbo-9 code in 2.1.131: do_gettimeofday was missing 1 jiffy | ||
25 | * because was not accounting lost_ticks. | ||
26 | * 1998-12-24 Copyright (C) 1998 Andrea Arcangeli | ||
27 | * Fixed a xtime SMP race (we need the xtime_lock rw spinlock to | ||
28 | * serialize accesses to xtime/lost_ticks). | ||
29 | */ | ||
30 | |||
31 | #include <linux/init.h> | ||
32 | #include <linux/interrupt.h> | ||
33 | #include <linux/time.h> | ||
34 | #include <linux/mca.h> | ||
35 | |||
36 | #include <asm/setup.h> | ||
37 | #include <asm/hpet.h> | ||
38 | #include <asm/time.h> | ||
39 | #include <asm/timer.h> | ||
40 | |||
41 | #include <asm/do_timer.h> | ||
42 | |||
43 | int timer_ack; | ||
44 | |||
45 | unsigned long profile_pc(struct pt_regs *regs) | ||
46 | { | ||
47 | unsigned long pc = instruction_pointer(regs); | ||
48 | |||
49 | #ifdef CONFIG_SMP | ||
50 | if (!user_mode_vm(regs) && in_lock_functions(pc)) { | ||
51 | #ifdef CONFIG_FRAME_POINTER | ||
52 | return *(unsigned long *)(regs->bp + sizeof(long)); | ||
53 | #else | ||
54 | unsigned long *sp = (unsigned long *)®s->sp; | ||
55 | |||
56 | /* Return address is either directly at stack pointer | ||
57 | or above a saved flags. Eflags has bits 22-31 zero, | ||
58 | kernel addresses don't. */ | ||
59 | if (sp[0] >> 22) | ||
60 | return sp[0]; | ||
61 | if (sp[1] >> 22) | ||
62 | return sp[1]; | ||
63 | #endif | ||
64 | } | ||
65 | #endif | ||
66 | return pc; | ||
67 | } | ||
68 | EXPORT_SYMBOL(profile_pc); | ||
69 | |||
70 | /* | ||
71 | * This is the same as the above, except we _also_ save the current | ||
72 | * Time Stamp Counter value at the time of the timer interrupt, so that | ||
73 | * we later on can estimate the time of day more exactly. | ||
74 | */ | ||
75 | irqreturn_t timer_interrupt(int irq, void *dev_id) | ||
76 | { | ||
77 | /* Keep nmi watchdog up to date */ | ||
78 | inc_irq_stat(irq0_irqs); | ||
79 | |||
80 | #ifdef CONFIG_X86_IO_APIC | ||
81 | if (timer_ack) { | ||
82 | /* | ||
83 | * Subtle, when I/O APICs are used we have to ack timer IRQ | ||
84 | * manually to deassert NMI lines for the watchdog if run | ||
85 | * on an 82489DX-based system. | ||
86 | */ | ||
87 | spin_lock(&i8259A_lock); | ||
88 | outb(0x0c, PIC_MASTER_OCW3); | ||
89 | /* Ack the IRQ; AEOI will end it automatically. */ | ||
90 | inb(PIC_MASTER_POLL); | ||
91 | spin_unlock(&i8259A_lock); | ||
92 | } | ||
93 | #endif | ||
94 | |||
95 | do_timer_interrupt_hook(); | ||
96 | |||
97 | #ifdef CONFIG_MCA | ||
98 | if (MCA_bus) { | ||
99 | /* The PS/2 uses level-triggered interrupts. You can't | ||
100 | turn them off, nor would you want to (any attempt to | ||
101 | enable edge-triggered interrupts usually gets intercepted by a | ||
102 | special hardware circuit). Hence we have to acknowledge | ||
103 | the timer interrupt. Through some incredibly stupid | ||
104 | design idea, the reset for IRQ 0 is done by setting the | ||
105 | high bit of the PPI port B (0x61). Note that some PS/2s, | ||
106 | notably the 55SX, work fine if this is removed. */ | ||
107 | |||
108 | u8 irq_v = inb_p(0x61); /* read the current state */ | ||
109 | outb_p(irq_v | 0x80, 0x61); /* reset the IRQ */ | ||
110 | } | ||
111 | #endif | ||
112 | |||
113 | return IRQ_HANDLED; | ||
114 | } | ||
115 | |||
116 | /* Duplicate of time_init() below, with hpet_enable part added */ | ||
117 | void __init hpet_time_init(void) | ||
118 | { | ||
119 | if (!hpet_enable()) | ||
120 | setup_pit_timer(); | ||
121 | x86_quirk_time_init(); | ||
122 | } | ||
123 | |||
124 | /* | ||
125 | * This is called directly from init code; we must delay timer setup in the | ||
126 | * HPET case as we can't make the decision to turn on HPET this early in the | ||
127 | * boot process. | ||
128 | * | ||
129 | * The chosen time_init function will usually be hpet_time_init, above, but | ||
130 | * in the case of virtual hardware, an alternative function may be substituted. | ||
131 | */ | ||
132 | void __init time_init(void) | ||
133 | { | ||
134 | x86_quirk_pre_time_init(); | ||
135 | tsc_init(); | ||
136 | late_time_init = choose_time_init(); | ||
137 | } | ||
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c deleted file mode 100644 index 5ba343e61844..000000000000 --- a/arch/x86/kernel/time_64.c +++ /dev/null | |||
@@ -1,135 +0,0 @@ | |||
1 | /* | ||
2 | * "High Precision Event Timer" based timekeeping. | ||
3 | * | ||
4 | * Copyright (c) 1991,1992,1995 Linus Torvalds | ||
5 | * Copyright (c) 1994 Alan Modra | ||
6 | * Copyright (c) 1995 Markus Kuhn | ||
7 | * Copyright (c) 1996 Ingo Molnar | ||
8 | * Copyright (c) 1998 Andrea Arcangeli | ||
9 | * Copyright (c) 2002,2006 Vojtech Pavlik | ||
10 | * Copyright (c) 2003 Andi Kleen | ||
11 | * RTC support code taken from arch/i386/kernel/timers/time_hpet.c | ||
12 | */ | ||
13 | |||
14 | #include <linux/clockchips.h> | ||
15 | #include <linux/init.h> | ||
16 | #include <linux/interrupt.h> | ||
17 | #include <linux/module.h> | ||
18 | #include <linux/time.h> | ||
19 | #include <linux/mca.h> | ||
20 | #include <linux/nmi.h> | ||
21 | |||
22 | #include <asm/i8253.h> | ||
23 | #include <asm/hpet.h> | ||
24 | #include <asm/vgtod.h> | ||
25 | #include <asm/time.h> | ||
26 | #include <asm/timer.h> | ||
27 | |||
28 | volatile unsigned long __jiffies __section_jiffies = INITIAL_JIFFIES; | ||
29 | |||
30 | unsigned long profile_pc(struct pt_regs *regs) | ||
31 | { | ||
32 | unsigned long pc = instruction_pointer(regs); | ||
33 | |||
34 | /* Assume the lock function has either no stack frame or a copy | ||
35 | of flags from PUSHF | ||
36 | Eflags always has bits 22 and up cleared unlike kernel addresses. */ | ||
37 | if (!user_mode_vm(regs) && in_lock_functions(pc)) { | ||
38 | #ifdef CONFIG_FRAME_POINTER | ||
39 | return *(unsigned long *)(regs->bp + sizeof(long)); | ||
40 | #else | ||
41 | unsigned long *sp = (unsigned long *)regs->sp; | ||
42 | if (sp[0] >> 22) | ||
43 | return sp[0]; | ||
44 | if (sp[1] >> 22) | ||
45 | return sp[1]; | ||
46 | #endif | ||
47 | } | ||
48 | return pc; | ||
49 | } | ||
50 | EXPORT_SYMBOL(profile_pc); | ||
51 | |||
52 | static irqreturn_t timer_interrupt(int irq, void *dev_id) | ||
53 | { | ||
54 | inc_irq_stat(irq0_irqs); | ||
55 | |||
56 | global_clock_event->event_handler(global_clock_event); | ||
57 | |||
58 | #ifdef CONFIG_MCA | ||
59 | if (MCA_bus) { | ||
60 | u8 irq_v = inb_p(0x61); /* read the current state */ | ||
61 | outb_p(irq_v|0x80, 0x61); /* reset the IRQ */ | ||
62 | } | ||
63 | #endif | ||
64 | |||
65 | return IRQ_HANDLED; | ||
66 | } | ||
67 | |||
68 | /* calibrate_cpu is used on systems with fixed rate TSCs to determine | ||
69 | * processor frequency */ | ||
70 | #define TICK_COUNT 100000000 | ||
71 | unsigned long __init calibrate_cpu(void) | ||
72 | { | ||
73 | int tsc_start, tsc_now; | ||
74 | int i, no_ctr_free; | ||
75 | unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0; | ||
76 | unsigned long flags; | ||
77 | |||
78 | for (i = 0; i < 4; i++) | ||
79 | if (avail_to_resrv_perfctr_nmi_bit(i)) | ||
80 | break; | ||
81 | no_ctr_free = (i == 4); | ||
82 | if (no_ctr_free) { | ||
83 | WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... " | ||
84 | "cpu_khz value may be incorrect.\n"); | ||
85 | i = 3; | ||
86 | rdmsrl(MSR_K7_EVNTSEL3, evntsel3); | ||
87 | wrmsrl(MSR_K7_EVNTSEL3, 0); | ||
88 | rdmsrl(MSR_K7_PERFCTR3, pmc3); | ||
89 | } else { | ||
90 | reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i); | ||
91 | reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i); | ||
92 | } | ||
93 | local_irq_save(flags); | ||
94 | /* start measuring cycles, incrementing from 0 */ | ||
95 | wrmsrl(MSR_K7_PERFCTR0 + i, 0); | ||
96 | wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76); | ||
97 | rdtscl(tsc_start); | ||
98 | do { | ||
99 | rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now); | ||
100 | tsc_now = get_cycles(); | ||
101 | } while ((tsc_now - tsc_start) < TICK_COUNT); | ||
102 | |||
103 | local_irq_restore(flags); | ||
104 | if (no_ctr_free) { | ||
105 | wrmsrl(MSR_K7_EVNTSEL3, 0); | ||
106 | wrmsrl(MSR_K7_PERFCTR3, pmc3); | ||
107 | wrmsrl(MSR_K7_EVNTSEL3, evntsel3); | ||
108 | } else { | ||
109 | release_perfctr_nmi(MSR_K7_PERFCTR0 + i); | ||
110 | release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); | ||
111 | } | ||
112 | |||
113 | return pmc_now * tsc_khz / (tsc_now - tsc_start); | ||
114 | } | ||
115 | |||
116 | static struct irqaction irq0 = { | ||
117 | .handler = timer_interrupt, | ||
118 | .flags = IRQF_DISABLED | IRQF_IRQPOLL | IRQF_NOBALANCING | IRQF_TIMER, | ||
119 | .name = "timer" | ||
120 | }; | ||
121 | |||
122 | void __init hpet_time_init(void) | ||
123 | { | ||
124 | if (!hpet_enable()) | ||
125 | setup_pit_timer(); | ||
126 | |||
127 | setup_irq(0, &irq0); | ||
128 | } | ||
129 | |||
130 | void __init time_init(void) | ||
131 | { | ||
132 | tsc_init(); | ||
133 | |||
134 | late_time_init = choose_time_init(); | ||
135 | } | ||
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c index 77b9689f8edb..503c1f2e8835 100644 --- a/arch/x86/kernel/tlb_uv.c +++ b/arch/x86/kernel/tlb_uv.c | |||
@@ -640,13 +640,13 @@ static int __init uv_ptc_init(void) | |||
640 | if (!is_uv_system()) | 640 | if (!is_uv_system()) |
641 | return 0; | 641 | return 0; |
642 | 642 | ||
643 | proc_uv_ptc = create_proc_entry(UV_PTC_BASENAME, 0444, NULL); | 643 | proc_uv_ptc = proc_create(UV_PTC_BASENAME, 0444, NULL, |
644 | &proc_uv_ptc_operations); | ||
644 | if (!proc_uv_ptc) { | 645 | if (!proc_uv_ptc) { |
645 | printk(KERN_ERR "unable to create %s proc entry\n", | 646 | printk(KERN_ERR "unable to create %s proc entry\n", |
646 | UV_PTC_BASENAME); | 647 | UV_PTC_BASENAME); |
647 | return -EINVAL; | 648 | return -EINVAL; |
648 | } | 649 | } |
649 | proc_uv_ptc->proc_fops = &proc_uv_ptc_operations; | ||
650 | return 0; | 650 | return 0; |
651 | } | 651 | } |
652 | 652 | ||
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 5204332f475d..7dc0de9d1ed9 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c | |||
@@ -59,6 +59,7 @@ | |||
59 | #include <asm/mach_traps.h> | 59 | #include <asm/mach_traps.h> |
60 | 60 | ||
61 | #ifdef CONFIG_X86_64 | 61 | #ifdef CONFIG_X86_64 |
62 | #include <asm/x86_init.h> | ||
62 | #include <asm/pgalloc.h> | 63 | #include <asm/pgalloc.h> |
63 | #include <asm/proto.h> | 64 | #include <asm/proto.h> |
64 | #else | 65 | #else |
@@ -76,7 +77,7 @@ char ignore_fpu_irq; | |||
76 | * F0 0F bug workaround.. We have a special link segment | 77 | * F0 0F bug workaround.. We have a special link segment |
77 | * for this. | 78 | * for this. |
78 | */ | 79 | */ |
79 | gate_desc idt_table[256] | 80 | gate_desc idt_table[NR_VECTORS] |
80 | __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; | 81 | __attribute__((__section__(".data.idt"))) = { { { { 0, 0 } } }, }; |
81 | #endif | 82 | #endif |
82 | 83 | ||
@@ -786,33 +787,34 @@ do_spurious_interrupt_bug(struct pt_regs *regs, long error_code) | |||
786 | #endif | 787 | #endif |
787 | } | 788 | } |
788 | 789 | ||
789 | #ifdef CONFIG_X86_32 | 790 | asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) |
790 | unsigned long patch_espfix_desc(unsigned long uesp, unsigned long kesp) | ||
791 | { | 791 | { |
792 | struct desc_struct *gdt = get_cpu_gdt_table(smp_processor_id()); | ||
793 | unsigned long base = (kesp - uesp) & -THREAD_SIZE; | ||
794 | unsigned long new_kesp = kesp - base; | ||
795 | unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; | ||
796 | __u64 desc = *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS]; | ||
797 | |||
798 | /* Set up base for espfix segment */ | ||
799 | desc &= 0x00f0ff0000000000ULL; | ||
800 | desc |= ((((__u64)base) << 16) & 0x000000ffffff0000ULL) | | ||
801 | ((((__u64)base) << 32) & 0xff00000000000000ULL) | | ||
802 | ((((__u64)lim_pages) << 32) & 0x000f000000000000ULL) | | ||
803 | (lim_pages & 0xffff); | ||
804 | *(__u64 *)&gdt[GDT_ENTRY_ESPFIX_SS] = desc; | ||
805 | |||
806 | return new_kesp; | ||
807 | } | 792 | } |
808 | #endif | ||
809 | 793 | ||
810 | asmlinkage void __attribute__((weak)) smp_thermal_interrupt(void) | 794 | asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void) |
811 | { | 795 | { |
812 | } | 796 | } |
813 | 797 | ||
814 | asmlinkage void __attribute__((weak)) smp_threshold_interrupt(void) | 798 | /* |
799 | * __math_state_restore assumes that cr0.TS is already clear and the | ||
800 | * fpu state is all ready for use. Used during context switch. | ||
801 | */ | ||
802 | void __math_state_restore(void) | ||
815 | { | 803 | { |
804 | struct thread_info *thread = current_thread_info(); | ||
805 | struct task_struct *tsk = thread->task; | ||
806 | |||
807 | /* | ||
808 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. | ||
809 | */ | ||
810 | if (unlikely(restore_fpu_checking(tsk))) { | ||
811 | stts(); | ||
812 | force_sig(SIGSEGV, tsk); | ||
813 | return; | ||
814 | } | ||
815 | |||
816 | thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ | ||
817 | tsk->fpu_counter++; | ||
816 | } | 818 | } |
817 | 819 | ||
818 | /* | 820 | /* |
@@ -846,17 +848,8 @@ asmlinkage void math_state_restore(void) | |||
846 | } | 848 | } |
847 | 849 | ||
848 | clts(); /* Allow maths ops (or we recurse) */ | 850 | clts(); /* Allow maths ops (or we recurse) */ |
849 | /* | ||
850 | * Paranoid restore. send a SIGSEGV if we fail to restore the state. | ||
851 | */ | ||
852 | if (unlikely(restore_fpu_checking(tsk))) { | ||
853 | stts(); | ||
854 | force_sig(SIGSEGV, tsk); | ||
855 | return; | ||
856 | } | ||
857 | 851 | ||
858 | thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ | 852 | __math_state_restore(); |
859 | tsk->fpu_counter++; | ||
860 | } | 853 | } |
861 | EXPORT_SYMBOL_GPL(math_state_restore); | 854 | EXPORT_SYMBOL_GPL(math_state_restore); |
862 | 855 | ||
@@ -980,7 +973,5 @@ void __init trap_init(void) | |||
980 | */ | 973 | */ |
981 | cpu_init(); | 974 | cpu_init(); |
982 | 975 | ||
983 | #ifdef CONFIG_X86_32 | 976 | x86_init.irqs.trap_init(); |
984 | x86_quirk_trap_init(); | ||
985 | #endif | ||
986 | } | 977 | } |
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 71f4368b357e..17409e8d1097 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c | |||
@@ -17,6 +17,8 @@ | |||
17 | #include <asm/time.h> | 17 | #include <asm/time.h> |
18 | #include <asm/delay.h> | 18 | #include <asm/delay.h> |
19 | #include <asm/hypervisor.h> | 19 | #include <asm/hypervisor.h> |
20 | #include <asm/nmi.h> | ||
21 | #include <asm/x86_init.h> | ||
20 | 22 | ||
21 | unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ | 23 | unsigned int __read_mostly cpu_khz; /* TSC clocks / usec, not used here */ |
22 | EXPORT_SYMBOL(cpu_khz); | 24 | EXPORT_SYMBOL(cpu_khz); |
@@ -400,15 +402,9 @@ unsigned long native_calibrate_tsc(void) | |||
400 | { | 402 | { |
401 | u64 tsc1, tsc2, delta, ref1, ref2; | 403 | u64 tsc1, tsc2, delta, ref1, ref2; |
402 | unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; | 404 | unsigned long tsc_pit_min = ULONG_MAX, tsc_ref_min = ULONG_MAX; |
403 | unsigned long flags, latch, ms, fast_calibrate, hv_tsc_khz; | 405 | unsigned long flags, latch, ms, fast_calibrate; |
404 | int hpet = is_hpet_enabled(), i, loopmin; | 406 | int hpet = is_hpet_enabled(), i, loopmin; |
405 | 407 | ||
406 | hv_tsc_khz = get_hypervisor_tsc_freq(); | ||
407 | if (hv_tsc_khz) { | ||
408 | printk(KERN_INFO "TSC: Frequency read from the hypervisor\n"); | ||
409 | return hv_tsc_khz; | ||
410 | } | ||
411 | |||
412 | local_irq_save(flags); | 408 | local_irq_save(flags); |
413 | fast_calibrate = quick_pit_calibrate(); | 409 | fast_calibrate = quick_pit_calibrate(); |
414 | local_irq_restore(flags); | 410 | local_irq_restore(flags); |
@@ -566,7 +562,7 @@ int recalibrate_cpu_khz(void) | |||
566 | unsigned long cpu_khz_old = cpu_khz; | 562 | unsigned long cpu_khz_old = cpu_khz; |
567 | 563 | ||
568 | if (cpu_has_tsc) { | 564 | if (cpu_has_tsc) { |
569 | tsc_khz = calibrate_tsc(); | 565 | tsc_khz = x86_platform.calibrate_tsc(); |
570 | cpu_khz = tsc_khz; | 566 | cpu_khz = tsc_khz; |
571 | cpu_data(0).loops_per_jiffy = | 567 | cpu_data(0).loops_per_jiffy = |
572 | cpufreq_scale(cpu_data(0).loops_per_jiffy, | 568 | cpufreq_scale(cpu_data(0).loops_per_jiffy, |
@@ -744,10 +740,16 @@ static cycle_t __vsyscall_fn vread_tsc(void) | |||
744 | } | 740 | } |
745 | #endif | 741 | #endif |
746 | 742 | ||
743 | static void resume_tsc(void) | ||
744 | { | ||
745 | clocksource_tsc.cycle_last = 0; | ||
746 | } | ||
747 | |||
747 | static struct clocksource clocksource_tsc = { | 748 | static struct clocksource clocksource_tsc = { |
748 | .name = "tsc", | 749 | .name = "tsc", |
749 | .rating = 300, | 750 | .rating = 300, |
750 | .read = read_tsc, | 751 | .read = read_tsc, |
752 | .resume = resume_tsc, | ||
751 | .mask = CLOCKSOURCE_MASK(64), | 753 | .mask = CLOCKSOURCE_MASK(64), |
752 | .shift = 22, | 754 | .shift = 22, |
753 | .flags = CLOCK_SOURCE_IS_CONTINUOUS | | 755 | .flags = CLOCK_SOURCE_IS_CONTINUOUS | |
@@ -761,12 +763,14 @@ void mark_tsc_unstable(char *reason) | |||
761 | { | 763 | { |
762 | if (!tsc_unstable) { | 764 | if (!tsc_unstable) { |
763 | tsc_unstable = 1; | 765 | tsc_unstable = 1; |
764 | printk("Marking TSC unstable due to %s\n", reason); | 766 | printk(KERN_INFO "Marking TSC unstable due to %s\n", reason); |
765 | /* Change only the rating, when not registered */ | 767 | /* Change only the rating, when not registered */ |
766 | if (clocksource_tsc.mult) | 768 | if (clocksource_tsc.mult) |
767 | clocksource_change_rating(&clocksource_tsc, 0); | 769 | clocksource_mark_unstable(&clocksource_tsc); |
768 | else | 770 | else { |
771 | clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; | ||
769 | clocksource_tsc.rating = 0; | 772 | clocksource_tsc.rating = 0; |
773 | } | ||
770 | } | 774 | } |
771 | } | 775 | } |
772 | 776 | ||
@@ -852,15 +856,71 @@ static void __init init_tsc_clocksource(void) | |||
852 | clocksource_register(&clocksource_tsc); | 856 | clocksource_register(&clocksource_tsc); |
853 | } | 857 | } |
854 | 858 | ||
859 | #ifdef CONFIG_X86_64 | ||
860 | /* | ||
861 | * calibrate_cpu is used on systems with fixed rate TSCs to determine | ||
862 | * processor frequency | ||
863 | */ | ||
864 | #define TICK_COUNT 100000000 | ||
865 | static unsigned long __init calibrate_cpu(void) | ||
866 | { | ||
867 | int tsc_start, tsc_now; | ||
868 | int i, no_ctr_free; | ||
869 | unsigned long evntsel3 = 0, pmc3 = 0, pmc_now = 0; | ||
870 | unsigned long flags; | ||
871 | |||
872 | for (i = 0; i < 4; i++) | ||
873 | if (avail_to_resrv_perfctr_nmi_bit(i)) | ||
874 | break; | ||
875 | no_ctr_free = (i == 4); | ||
876 | if (no_ctr_free) { | ||
877 | WARN(1, KERN_WARNING "Warning: AMD perfctrs busy ... " | ||
878 | "cpu_khz value may be incorrect.\n"); | ||
879 | i = 3; | ||
880 | rdmsrl(MSR_K7_EVNTSEL3, evntsel3); | ||
881 | wrmsrl(MSR_K7_EVNTSEL3, 0); | ||
882 | rdmsrl(MSR_K7_PERFCTR3, pmc3); | ||
883 | } else { | ||
884 | reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i); | ||
885 | reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i); | ||
886 | } | ||
887 | local_irq_save(flags); | ||
888 | /* start measuring cycles, incrementing from 0 */ | ||
889 | wrmsrl(MSR_K7_PERFCTR0 + i, 0); | ||
890 | wrmsrl(MSR_K7_EVNTSEL0 + i, 1 << 22 | 3 << 16 | 0x76); | ||
891 | rdtscl(tsc_start); | ||
892 | do { | ||
893 | rdmsrl(MSR_K7_PERFCTR0 + i, pmc_now); | ||
894 | tsc_now = get_cycles(); | ||
895 | } while ((tsc_now - tsc_start) < TICK_COUNT); | ||
896 | |||
897 | local_irq_restore(flags); | ||
898 | if (no_ctr_free) { | ||
899 | wrmsrl(MSR_K7_EVNTSEL3, 0); | ||
900 | wrmsrl(MSR_K7_PERFCTR3, pmc3); | ||
901 | wrmsrl(MSR_K7_EVNTSEL3, evntsel3); | ||
902 | } else { | ||
903 | release_perfctr_nmi(MSR_K7_PERFCTR0 + i); | ||
904 | release_evntsel_nmi(MSR_K7_EVNTSEL0 + i); | ||
905 | } | ||
906 | |||
907 | return pmc_now * tsc_khz / (tsc_now - tsc_start); | ||
908 | } | ||
909 | #else | ||
910 | static inline unsigned long calibrate_cpu(void) { return cpu_khz; } | ||
911 | #endif | ||
912 | |||
855 | void __init tsc_init(void) | 913 | void __init tsc_init(void) |
856 | { | 914 | { |
857 | u64 lpj; | 915 | u64 lpj; |
858 | int cpu; | 916 | int cpu; |
859 | 917 | ||
918 | x86_init.timers.tsc_pre_init(); | ||
919 | |||
860 | if (!cpu_has_tsc) | 920 | if (!cpu_has_tsc) |
861 | return; | 921 | return; |
862 | 922 | ||
863 | tsc_khz = calibrate_tsc(); | 923 | tsc_khz = x86_platform.calibrate_tsc(); |
864 | cpu_khz = tsc_khz; | 924 | cpu_khz = tsc_khz; |
865 | 925 | ||
866 | if (!tsc_khz) { | 926 | if (!tsc_khz) { |
@@ -868,11 +928,9 @@ void __init tsc_init(void) | |||
868 | return; | 928 | return; |
869 | } | 929 | } |
870 | 930 | ||
871 | #ifdef CONFIG_X86_64 | ||
872 | if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) && | 931 | if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) && |
873 | (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) | 932 | (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)) |
874 | cpu_khz = calibrate_cpu(); | 933 | cpu_khz = calibrate_cpu(); |
875 | #endif | ||
876 | 934 | ||
877 | printk("Detected %lu.%03lu MHz processor.\n", | 935 | printk("Detected %lu.%03lu MHz processor.\n", |
878 | (unsigned long)cpu_khz / 1000, | 936 | (unsigned long)cpu_khz / 1000, |
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c index 31ffc24eec4d..f068553a1b17 100644 --- a/arch/x86/kernel/visws_quirks.c +++ b/arch/x86/kernel/visws_quirks.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <asm/setup.h> | 30 | #include <asm/setup.h> |
31 | #include <asm/apic.h> | 31 | #include <asm/apic.h> |
32 | #include <asm/e820.h> | 32 | #include <asm/e820.h> |
33 | #include <asm/time.h> | ||
33 | #include <asm/io.h> | 34 | #include <asm/io.h> |
34 | 35 | ||
35 | #include <linux/kernel_stat.h> | 36 | #include <linux/kernel_stat.h> |
@@ -53,7 +54,7 @@ int is_visws_box(void) | |||
53 | return visws_board_type >= 0; | 54 | return visws_board_type >= 0; |
54 | } | 55 | } |
55 | 56 | ||
56 | static int __init visws_time_init(void) | 57 | static void __init visws_time_init(void) |
57 | { | 58 | { |
58 | printk(KERN_INFO "Starting Cobalt Timer system clock\n"); | 59 | printk(KERN_INFO "Starting Cobalt Timer system clock\n"); |
59 | 60 | ||
@@ -66,21 +67,13 @@ static int __init visws_time_init(void) | |||
66 | /* Enable (unmask) the timer interrupt */ | 67 | /* Enable (unmask) the timer interrupt */ |
67 | co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK); | 68 | co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK); |
68 | 69 | ||
69 | /* | 70 | setup_default_timer_irq(); |
70 | * Zero return means the generic timer setup code will set up | ||
71 | * the standard vector: | ||
72 | */ | ||
73 | return 0; | ||
74 | } | 71 | } |
75 | 72 | ||
76 | static int __init visws_pre_intr_init(void) | 73 | /* Replaces the default init_ISA_irqs in the generic setup */ |
74 | static void __init visws_pre_intr_init(void) | ||
77 | { | 75 | { |
78 | init_VISWS_APIC_irqs(); | 76 | init_VISWS_APIC_irqs(); |
79 | |||
80 | /* | ||
81 | * We dont want ISA irqs to be set up by the generic code: | ||
82 | */ | ||
83 | return 1; | ||
84 | } | 77 | } |
85 | 78 | ||
86 | /* Quirk for machine specific memory setup. */ | 79 | /* Quirk for machine specific memory setup. */ |
@@ -156,12 +149,8 @@ static void visws_machine_power_off(void) | |||
156 | outl(PIIX_SPECIAL_STOP, 0xCFC); | 149 | outl(PIIX_SPECIAL_STOP, 0xCFC); |
157 | } | 150 | } |
158 | 151 | ||
159 | static int __init visws_get_smp_config(unsigned int early) | 152 | static void __init visws_get_smp_config(unsigned int early) |
160 | { | 153 | { |
161 | /* | ||
162 | * Prevent MP-table parsing by the generic code: | ||
163 | */ | ||
164 | return 1; | ||
165 | } | 154 | } |
166 | 155 | ||
167 | /* | 156 | /* |
@@ -208,7 +197,7 @@ static void __init MP_processor_info(struct mpc_cpu *m) | |||
208 | apic_version[m->apicid] = ver; | 197 | apic_version[m->apicid] = ver; |
209 | } | 198 | } |
210 | 199 | ||
211 | static int __init visws_find_smp_config(unsigned int reserve) | 200 | static void __init visws_find_smp_config(unsigned int reserve) |
212 | { | 201 | { |
213 | struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS); | 202 | struct mpc_cpu *mp = phys_to_virt(CO_CPU_TAB_PHYS); |
214 | unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); | 203 | unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS)); |
@@ -230,21 +219,9 @@ static int __init visws_find_smp_config(unsigned int reserve) | |||
230 | MP_processor_info(mp++); | 219 | MP_processor_info(mp++); |
231 | 220 | ||
232 | mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; | 221 | mp_lapic_addr = APIC_DEFAULT_PHYS_BASE; |
233 | |||
234 | return 1; | ||
235 | } | 222 | } |
236 | 223 | ||
237 | static int visws_trap_init(void); | 224 | static void visws_trap_init(void); |
238 | |||
239 | static struct x86_quirks visws_x86_quirks __initdata = { | ||
240 | .arch_time_init = visws_time_init, | ||
241 | .arch_pre_intr_init = visws_pre_intr_init, | ||
242 | .arch_memory_setup = visws_memory_setup, | ||
243 | .arch_intr_init = NULL, | ||
244 | .arch_trap_init = visws_trap_init, | ||
245 | .mach_get_smp_config = visws_get_smp_config, | ||
246 | .mach_find_smp_config = visws_find_smp_config, | ||
247 | }; | ||
248 | 225 | ||
249 | void __init visws_early_detect(void) | 226 | void __init visws_early_detect(void) |
250 | { | 227 | { |
@@ -257,11 +234,14 @@ void __init visws_early_detect(void) | |||
257 | return; | 234 | return; |
258 | 235 | ||
259 | /* | 236 | /* |
260 | * Install special quirks for timer, interrupt and memory setup: | 237 | * Override the default platform setup functions |
261 | * Fall back to generic behavior for traps: | ||
262 | * Override generic MP-table parsing: | ||
263 | */ | 238 | */ |
264 | x86_quirks = &visws_x86_quirks; | 239 | x86_init.resources.memory_setup = visws_memory_setup; |
240 | x86_init.mpparse.get_smp_config = visws_get_smp_config; | ||
241 | x86_init.mpparse.find_smp_config = visws_find_smp_config; | ||
242 | x86_init.irqs.pre_vector_init = visws_pre_intr_init; | ||
243 | x86_init.irqs.trap_init = visws_trap_init; | ||
244 | x86_init.timers.timer_init = visws_time_init; | ||
265 | 245 | ||
266 | /* | 246 | /* |
267 | * Install reboot quirks: | 247 | * Install reboot quirks: |
@@ -400,12 +380,10 @@ static __init void cobalt_init(void) | |||
400 | co_apic_read(CO_APIC_ID)); | 380 | co_apic_read(CO_APIC_ID)); |
401 | } | 381 | } |
402 | 382 | ||
403 | static int __init visws_trap_init(void) | 383 | static void __init visws_trap_init(void) |
404 | { | 384 | { |
405 | lithium_init(); | 385 | lithium_init(); |
406 | cobalt_init(); | 386 | cobalt_init(); |
407 | |||
408 | return 1; | ||
409 | } | 387 | } |
410 | 388 | ||
411 | /* | 389 | /* |
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c index 95a7289e4b0c..31e6f6cfe53e 100644 --- a/arch/x86/kernel/vmi_32.c +++ b/arch/x86/kernel/vmi_32.c | |||
@@ -817,15 +817,15 @@ static inline int __init activate_vmi(void) | |||
817 | vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm); | 817 | vmi_timer_ops.set_alarm = vmi_get_function(VMI_CALL_SetAlarm); |
818 | vmi_timer_ops.cancel_alarm = | 818 | vmi_timer_ops.cancel_alarm = |
819 | vmi_get_function(VMI_CALL_CancelAlarm); | 819 | vmi_get_function(VMI_CALL_CancelAlarm); |
820 | pv_time_ops.time_init = vmi_time_init; | 820 | x86_init.timers.timer_init = vmi_time_init; |
821 | pv_time_ops.get_wallclock = vmi_get_wallclock; | ||
822 | pv_time_ops.set_wallclock = vmi_set_wallclock; | ||
823 | #ifdef CONFIG_X86_LOCAL_APIC | 821 | #ifdef CONFIG_X86_LOCAL_APIC |
824 | pv_apic_ops.setup_boot_clock = vmi_time_bsp_init; | 822 | x86_init.timers.setup_percpu_clockev = vmi_time_bsp_init; |
825 | pv_apic_ops.setup_secondary_clock = vmi_time_ap_init; | 823 | x86_cpuinit.setup_percpu_clockev = vmi_time_ap_init; |
826 | #endif | 824 | #endif |
827 | pv_time_ops.sched_clock = vmi_sched_clock; | 825 | pv_time_ops.sched_clock = vmi_sched_clock; |
828 | pv_time_ops.get_tsc_khz = vmi_tsc_khz; | 826 | x86_platform.calibrate_tsc = vmi_tsc_khz; |
827 | x86_platform.get_wallclock = vmi_get_wallclock; | ||
828 | x86_platform.set_wallclock = vmi_set_wallclock; | ||
829 | 829 | ||
830 | /* We have true wallclock functions; disable CMOS clock sync */ | 830 | /* We have true wallclock functions; disable CMOS clock sync */ |
831 | no_sync_cmos_clock = 1; | 831 | no_sync_cmos_clock = 1; |
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c index 2b3eb82efeeb..611b9e2360d3 100644 --- a/arch/x86/kernel/vmiclock_32.c +++ b/arch/x86/kernel/vmiclock_32.c | |||
@@ -68,7 +68,7 @@ unsigned long long vmi_sched_clock(void) | |||
68 | return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE)); | 68 | return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE)); |
69 | } | 69 | } |
70 | 70 | ||
71 | /* paravirt_ops.get_tsc_khz = vmi_tsc_khz */ | 71 | /* x86_platform.calibrate_tsc = vmi_tsc_khz */ |
72 | unsigned long vmi_tsc_khz(void) | 72 | unsigned long vmi_tsc_khz(void) |
73 | { | 73 | { |
74 | unsigned long long khz; | 74 | unsigned long long khz; |
diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index 9fc178255c04..0ccb57d5ee35 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S | |||
@@ -348,15 +348,12 @@ SECTIONS | |||
348 | _end = .; | 348 | _end = .; |
349 | } | 349 | } |
350 | 350 | ||
351 | /* Sections to be discarded */ | ||
352 | /DISCARD/ : { | ||
353 | *(.exitcall.exit) | ||
354 | *(.eh_frame) | ||
355 | *(.discard) | ||
356 | } | ||
357 | |||
358 | STABS_DEBUG | 351 | STABS_DEBUG |
359 | DWARF_DEBUG | 352 | DWARF_DEBUG |
353 | |||
354 | /* Sections to be discarded */ | ||
355 | DISCARDS | ||
356 | /DISCARD/ : { *(.eh_frame) } | ||
360 | } | 357 | } |
361 | 358 | ||
362 | 359 | ||
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c index 25ee06a80aad..cf53a78e2dcf 100644 --- a/arch/x86/kernel/vsyscall_64.c +++ b/arch/x86/kernel/vsyscall_64.c | |||
@@ -87,6 +87,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock) | |||
87 | vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; | 87 | vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec; |
88 | vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; | 88 | vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec; |
89 | vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic; | 89 | vsyscall_gtod_data.wall_to_monotonic = wall_to_monotonic; |
90 | vsyscall_gtod_data.wall_time_coarse = __current_kernel_time(); | ||
90 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); | 91 | write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags); |
91 | } | 92 | } |
92 | 93 | ||
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c new file mode 100644 index 000000000000..4449a4a2c2ed --- /dev/null +++ b/arch/x86/kernel/x86_init.c | |||
@@ -0,0 +1,75 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2009 Thomas Gleixner <tglx@linutronix.de> | ||
3 | * | ||
4 | * For licencing details see kernel-base/COPYING | ||
5 | */ | ||
6 | #include <linux/init.h> | ||
7 | |||
8 | #include <asm/bios_ebda.h> | ||
9 | #include <asm/paravirt.h> | ||
10 | #include <asm/mpspec.h> | ||
11 | #include <asm/setup.h> | ||
12 | #include <asm/apic.h> | ||
13 | #include <asm/e820.h> | ||
14 | #include <asm/time.h> | ||
15 | #include <asm/irq.h> | ||
16 | #include <asm/tsc.h> | ||
17 | |||
18 | void __cpuinit x86_init_noop(void) { } | ||
19 | void __init x86_init_uint_noop(unsigned int unused) { } | ||
20 | void __init x86_init_pgd_noop(pgd_t *unused) { } | ||
21 | |||
22 | /* | ||
23 | * The platform setup functions are preset with the default functions | ||
24 | * for standard PC hardware. | ||
25 | */ | ||
26 | struct x86_init_ops x86_init __initdata = { | ||
27 | |||
28 | .resources = { | ||
29 | .probe_roms = x86_init_noop, | ||
30 | .reserve_resources = reserve_standard_io_resources, | ||
31 | .memory_setup = default_machine_specific_memory_setup, | ||
32 | }, | ||
33 | |||
34 | .mpparse = { | ||
35 | .mpc_record = x86_init_uint_noop, | ||
36 | .setup_ioapic_ids = x86_init_noop, | ||
37 | .mpc_apic_id = default_mpc_apic_id, | ||
38 | .smp_read_mpc_oem = default_smp_read_mpc_oem, | ||
39 | .mpc_oem_bus_info = default_mpc_oem_bus_info, | ||
40 | .find_smp_config = default_find_smp_config, | ||
41 | .get_smp_config = default_get_smp_config, | ||
42 | }, | ||
43 | |||
44 | .irqs = { | ||
45 | .pre_vector_init = init_ISA_irqs, | ||
46 | .intr_init = native_init_IRQ, | ||
47 | .trap_init = x86_init_noop, | ||
48 | }, | ||
49 | |||
50 | .oem = { | ||
51 | .arch_setup = x86_init_noop, | ||
52 | .banner = default_banner, | ||
53 | }, | ||
54 | |||
55 | .paging = { | ||
56 | .pagetable_setup_start = native_pagetable_setup_start, | ||
57 | .pagetable_setup_done = native_pagetable_setup_done, | ||
58 | }, | ||
59 | |||
60 | .timers = { | ||
61 | .setup_percpu_clockev = setup_boot_APIC_clock, | ||
62 | .tsc_pre_init = x86_init_noop, | ||
63 | .timer_init = hpet_time_init, | ||
64 | }, | ||
65 | }; | ||
66 | |||
67 | struct x86_cpuinit_ops x86_cpuinit __cpuinitdata = { | ||
68 | .setup_percpu_clockev = setup_secondary_APIC_clock, | ||
69 | }; | ||
70 | |||
71 | struct x86_platform_ops x86_platform = { | ||
72 | .calibrate_tsc = native_calibrate_tsc, | ||
73 | .get_wallclock = mach_get_cmos_time, | ||
74 | .set_wallclock = mach_set_rtc_mmss, | ||
75 | }; | ||