diff options
Diffstat (limited to 'arch/i386/kernel')
54 files changed, 2406 insertions, 2696 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 4ae3dcf1d2f0..4f98516b9f94 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile | |||
@@ -39,12 +39,10 @@ obj-$(CONFIG_EARLY_PRINTK) += early_printk.o | |||
39 | obj-$(CONFIG_HPET_TIMER) += hpet.o | 39 | obj-$(CONFIG_HPET_TIMER) += hpet.o |
40 | obj-$(CONFIG_K8_NB) += k8.o | 40 | obj-$(CONFIG_K8_NB) += k8.o |
41 | 41 | ||
42 | obj-$(CONFIG_VMI) += vmi.o vmitime.o | 42 | obj-$(CONFIG_VMI) += vmi.o vmiclock.o |
43 | obj-$(CONFIG_PARAVIRT) += paravirt.o | 43 | obj-$(CONFIG_PARAVIRT) += paravirt.o |
44 | obj-y += pcspeaker.o | 44 | obj-y += pcspeaker.o |
45 | 45 | ||
46 | EXTRA_AFLAGS := -traditional | ||
47 | |||
48 | obj-$(CONFIG_SCx200) += scx200.o | 46 | obj-$(CONFIG_SCx200) += scx200.o |
49 | 47 | ||
50 | # vsyscall.o contains the vsyscall DSO images as __initdata. | 48 | # vsyscall.o contains the vsyscall DSO images as __initdata. |
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index 9ea5b8ecc7e1..280898b045b2 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c | |||
@@ -874,7 +874,7 @@ static void __init acpi_process_madt(void) | |||
874 | acpi_ioapic = 1; | 874 | acpi_ioapic = 1; |
875 | 875 | ||
876 | smp_found_config = 1; | 876 | smp_found_config = 1; |
877 | clustered_apic_check(); | 877 | setup_apic_routing(); |
878 | } | 878 | } |
879 | } | 879 | } |
880 | if (error == -EINVAL) { | 880 | if (error == -EINVAL) { |
diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c index 8f7efd38254d..23f78efc577d 100644 --- a/arch/i386/kernel/acpi/earlyquirk.c +++ b/arch/i386/kernel/acpi/earlyquirk.c | |||
@@ -10,7 +10,6 @@ | |||
10 | #include <asm/pci-direct.h> | 10 | #include <asm/pci-direct.h> |
11 | #include <asm/acpi.h> | 11 | #include <asm/acpi.h> |
12 | #include <asm/apic.h> | 12 | #include <asm/apic.h> |
13 | #include <asm/irq.h> | ||
14 | 13 | ||
15 | #ifdef CONFIG_ACPI | 14 | #ifdef CONFIG_ACPI |
16 | 15 | ||
@@ -48,24 +47,6 @@ static int __init check_bridge(int vendor, int device) | |||
48 | return 0; | 47 | return 0; |
49 | } | 48 | } |
50 | 49 | ||
51 | static void check_intel(void) | ||
52 | { | ||
53 | u16 vendor, device; | ||
54 | |||
55 | vendor = read_pci_config_16(0, 0, 0, PCI_VENDOR_ID); | ||
56 | |||
57 | if (vendor != PCI_VENDOR_ID_INTEL) | ||
58 | return; | ||
59 | |||
60 | device = read_pci_config_16(0, 0, 0, PCI_DEVICE_ID); | ||
61 | #ifdef CONFIG_SMP | ||
62 | if (device == PCI_DEVICE_ID_INTEL_E7320_MCH || | ||
63 | device == PCI_DEVICE_ID_INTEL_E7520_MCH || | ||
64 | device == PCI_DEVICE_ID_INTEL_E7525_MCH) | ||
65 | quirk_intel_irqbalance(); | ||
66 | #endif | ||
67 | } | ||
68 | |||
69 | void __init check_acpi_pci(void) | 50 | void __init check_acpi_pci(void) |
70 | { | 51 | { |
71 | int num, slot, func; | 52 | int num, slot, func; |
@@ -77,8 +58,6 @@ void __init check_acpi_pci(void) | |||
77 | if (!early_pci_allowed()) | 58 | if (!early_pci_allowed()) |
78 | return; | 59 | return; |
79 | 60 | ||
80 | check_intel(); | ||
81 | |||
82 | /* Poor man's PCI discovery */ | 61 | /* Poor man's PCI discovery */ |
83 | for (num = 0; num < 32; num++) { | 62 | for (num = 0; num < 32; num++) { |
84 | for (slot = 0; slot < 32; slot++) { | 63 | for (slot = 0; slot < 32; slot++) { |
diff --git a/arch/i386/kernel/alternative.c b/arch/i386/kernel/alternative.c index 426f59b0106b..e5cec6685cc5 100644 --- a/arch/i386/kernel/alternative.c +++ b/arch/i386/kernel/alternative.c | |||
@@ -5,6 +5,7 @@ | |||
5 | #include <asm/alternative.h> | 5 | #include <asm/alternative.h> |
6 | #include <asm/sections.h> | 6 | #include <asm/sections.h> |
7 | 7 | ||
8 | static int noreplace_smp = 0; | ||
8 | static int smp_alt_once = 0; | 9 | static int smp_alt_once = 0; |
9 | static int debug_alternative = 0; | 10 | static int debug_alternative = 0; |
10 | 11 | ||
@@ -13,15 +14,33 @@ static int __init bootonly(char *str) | |||
13 | smp_alt_once = 1; | 14 | smp_alt_once = 1; |
14 | return 1; | 15 | return 1; |
15 | } | 16 | } |
17 | __setup("smp-alt-boot", bootonly); | ||
18 | |||
16 | static int __init debug_alt(char *str) | 19 | static int __init debug_alt(char *str) |
17 | { | 20 | { |
18 | debug_alternative = 1; | 21 | debug_alternative = 1; |
19 | return 1; | 22 | return 1; |
20 | } | 23 | } |
21 | |||
22 | __setup("smp-alt-boot", bootonly); | ||
23 | __setup("debug-alternative", debug_alt); | 24 | __setup("debug-alternative", debug_alt); |
24 | 25 | ||
26 | static int __init setup_noreplace_smp(char *str) | ||
27 | { | ||
28 | noreplace_smp = 1; | ||
29 | return 1; | ||
30 | } | ||
31 | __setup("noreplace-smp", setup_noreplace_smp); | ||
32 | |||
33 | #ifdef CONFIG_PARAVIRT | ||
34 | static int noreplace_paravirt = 0; | ||
35 | |||
36 | static int __init setup_noreplace_paravirt(char *str) | ||
37 | { | ||
38 | noreplace_paravirt = 1; | ||
39 | return 1; | ||
40 | } | ||
41 | __setup("noreplace-paravirt", setup_noreplace_paravirt); | ||
42 | #endif | ||
43 | |||
25 | #define DPRINTK(fmt, args...) if (debug_alternative) \ | 44 | #define DPRINTK(fmt, args...) if (debug_alternative) \ |
26 | printk(KERN_DEBUG fmt, args) | 45 | printk(KERN_DEBUG fmt, args) |
27 | 46 | ||
@@ -132,11 +151,8 @@ static void nop_out(void *insns, unsigned int len) | |||
132 | } | 151 | } |
133 | 152 | ||
134 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; | 153 | extern struct alt_instr __alt_instructions[], __alt_instructions_end[]; |
135 | extern struct alt_instr __smp_alt_instructions[], __smp_alt_instructions_end[]; | ||
136 | extern u8 *__smp_locks[], *__smp_locks_end[]; | 154 | extern u8 *__smp_locks[], *__smp_locks_end[]; |
137 | 155 | ||
138 | extern u8 __smp_alt_begin[], __smp_alt_end[]; | ||
139 | |||
140 | /* Replace instructions with better alternatives for this CPU type. | 156 | /* Replace instructions with better alternatives for this CPU type. |
141 | This runs before SMP is initialized to avoid SMP problems with | 157 | This runs before SMP is initialized to avoid SMP problems with |
142 | self modifying code. This implies that assymetric systems where | 158 | self modifying code. This implies that assymetric systems where |
@@ -171,29 +187,6 @@ void apply_alternatives(struct alt_instr *start, struct alt_instr *end) | |||
171 | 187 | ||
172 | #ifdef CONFIG_SMP | 188 | #ifdef CONFIG_SMP |
173 | 189 | ||
174 | static void alternatives_smp_save(struct alt_instr *start, struct alt_instr *end) | ||
175 | { | ||
176 | struct alt_instr *a; | ||
177 | |||
178 | DPRINTK("%s: alt table %p-%p\n", __FUNCTION__, start, end); | ||
179 | for (a = start; a < end; a++) { | ||
180 | memcpy(a->replacement + a->replacementlen, | ||
181 | a->instr, | ||
182 | a->instrlen); | ||
183 | } | ||
184 | } | ||
185 | |||
186 | static void alternatives_smp_apply(struct alt_instr *start, struct alt_instr *end) | ||
187 | { | ||
188 | struct alt_instr *a; | ||
189 | |||
190 | for (a = start; a < end; a++) { | ||
191 | memcpy(a->instr, | ||
192 | a->replacement + a->replacementlen, | ||
193 | a->instrlen); | ||
194 | } | ||
195 | } | ||
196 | |||
197 | static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) | 190 | static void alternatives_smp_lock(u8 **start, u8 **end, u8 *text, u8 *text_end) |
198 | { | 191 | { |
199 | u8 **ptr; | 192 | u8 **ptr; |
@@ -211,6 +204,9 @@ static void alternatives_smp_unlock(u8 **start, u8 **end, u8 *text, u8 *text_end | |||
211 | { | 204 | { |
212 | u8 **ptr; | 205 | u8 **ptr; |
213 | 206 | ||
207 | if (noreplace_smp) | ||
208 | return; | ||
209 | |||
214 | for (ptr = start; ptr < end; ptr++) { | 210 | for (ptr = start; ptr < end; ptr++) { |
215 | if (*ptr < text) | 211 | if (*ptr < text) |
216 | continue; | 212 | continue; |
@@ -245,6 +241,9 @@ void alternatives_smp_module_add(struct module *mod, char *name, | |||
245 | struct smp_alt_module *smp; | 241 | struct smp_alt_module *smp; |
246 | unsigned long flags; | 242 | unsigned long flags; |
247 | 243 | ||
244 | if (noreplace_smp) | ||
245 | return; | ||
246 | |||
248 | if (smp_alt_once) { | 247 | if (smp_alt_once) { |
249 | if (boot_cpu_has(X86_FEATURE_UP)) | 248 | if (boot_cpu_has(X86_FEATURE_UP)) |
250 | alternatives_smp_unlock(locks, locks_end, | 249 | alternatives_smp_unlock(locks, locks_end, |
@@ -279,7 +278,7 @@ void alternatives_smp_module_del(struct module *mod) | |||
279 | struct smp_alt_module *item; | 278 | struct smp_alt_module *item; |
280 | unsigned long flags; | 279 | unsigned long flags; |
281 | 280 | ||
282 | if (smp_alt_once) | 281 | if (smp_alt_once || noreplace_smp) |
283 | return; | 282 | return; |
284 | 283 | ||
285 | spin_lock_irqsave(&smp_alt, flags); | 284 | spin_lock_irqsave(&smp_alt, flags); |
@@ -310,7 +309,7 @@ void alternatives_smp_switch(int smp) | |||
310 | return; | 309 | return; |
311 | #endif | 310 | #endif |
312 | 311 | ||
313 | if (smp_alt_once) | 312 | if (noreplace_smp || smp_alt_once) |
314 | return; | 313 | return; |
315 | BUG_ON(!smp && (num_online_cpus() > 1)); | 314 | BUG_ON(!smp && (num_online_cpus() > 1)); |
316 | 315 | ||
@@ -319,8 +318,6 @@ void alternatives_smp_switch(int smp) | |||
319 | printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); | 318 | printk(KERN_INFO "SMP alternatives: switching to SMP code\n"); |
320 | clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); | 319 | clear_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); |
321 | clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); | 320 | clear_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); |
322 | alternatives_smp_apply(__smp_alt_instructions, | ||
323 | __smp_alt_instructions_end); | ||
324 | list_for_each_entry(mod, &smp_alt_modules, next) | 321 | list_for_each_entry(mod, &smp_alt_modules, next) |
325 | alternatives_smp_lock(mod->locks, mod->locks_end, | 322 | alternatives_smp_lock(mod->locks, mod->locks_end, |
326 | mod->text, mod->text_end); | 323 | mod->text, mod->text_end); |
@@ -328,8 +325,6 @@ void alternatives_smp_switch(int smp) | |||
328 | printk(KERN_INFO "SMP alternatives: switching to UP code\n"); | 325 | printk(KERN_INFO "SMP alternatives: switching to UP code\n"); |
329 | set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); | 326 | set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); |
330 | set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); | 327 | set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); |
331 | apply_alternatives(__smp_alt_instructions, | ||
332 | __smp_alt_instructions_end); | ||
333 | list_for_each_entry(mod, &smp_alt_modules, next) | 328 | list_for_each_entry(mod, &smp_alt_modules, next) |
334 | alternatives_smp_unlock(mod->locks, mod->locks_end, | 329 | alternatives_smp_unlock(mod->locks, mod->locks_end, |
335 | mod->text, mod->text_end); | 330 | mod->text, mod->text_end); |
@@ -340,36 +335,31 @@ void alternatives_smp_switch(int smp) | |||
340 | #endif | 335 | #endif |
341 | 336 | ||
342 | #ifdef CONFIG_PARAVIRT | 337 | #ifdef CONFIG_PARAVIRT |
343 | void apply_paravirt(struct paravirt_patch *start, struct paravirt_patch *end) | 338 | void apply_paravirt(struct paravirt_patch_site *start, |
339 | struct paravirt_patch_site *end) | ||
344 | { | 340 | { |
345 | struct paravirt_patch *p; | 341 | struct paravirt_patch_site *p; |
342 | |||
343 | if (noreplace_paravirt) | ||
344 | return; | ||
346 | 345 | ||
347 | for (p = start; p < end; p++) { | 346 | for (p = start; p < end; p++) { |
348 | unsigned int used; | 347 | unsigned int used; |
349 | 348 | ||
350 | used = paravirt_ops.patch(p->instrtype, p->clobbers, p->instr, | 349 | used = paravirt_ops.patch(p->instrtype, p->clobbers, p->instr, |
351 | p->len); | 350 | p->len); |
352 | #ifdef CONFIG_DEBUG_PARAVIRT | 351 | |
353 | { | 352 | BUG_ON(used > p->len); |
354 | int i; | 353 | |
355 | /* Deliberately clobber regs using "not %reg" to find bugs. */ | ||
356 | for (i = 0; i < 3; i++) { | ||
357 | if (p->len - used >= 2 && (p->clobbers & (1 << i))) { | ||
358 | memcpy(p->instr + used, "\xf7\xd0", 2); | ||
359 | p->instr[used+1] |= i; | ||
360 | used += 2; | ||
361 | } | ||
362 | } | ||
363 | } | ||
364 | #endif | ||
365 | /* Pad the rest with nops */ | 354 | /* Pad the rest with nops */ |
366 | nop_out(p->instr + used, p->len - used); | 355 | nop_out(p->instr + used, p->len - used); |
367 | } | 356 | } |
368 | 357 | ||
369 | /* Sync to be conservative, in case we patched following instructions */ | 358 | /* Sync to be conservative, in case we patched following |
359 | * instructions */ | ||
370 | sync_core(); | 360 | sync_core(); |
371 | } | 361 | } |
372 | extern struct paravirt_patch __start_parainstructions[], | 362 | extern struct paravirt_patch_site __start_parainstructions[], |
373 | __stop_parainstructions[]; | 363 | __stop_parainstructions[]; |
374 | #endif /* CONFIG_PARAVIRT */ | 364 | #endif /* CONFIG_PARAVIRT */ |
375 | 365 | ||
@@ -396,23 +386,19 @@ void __init alternative_instructions(void) | |||
396 | printk(KERN_INFO "SMP alternatives: switching to UP code\n"); | 386 | printk(KERN_INFO "SMP alternatives: switching to UP code\n"); |
397 | set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); | 387 | set_bit(X86_FEATURE_UP, boot_cpu_data.x86_capability); |
398 | set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); | 388 | set_bit(X86_FEATURE_UP, cpu_data[0].x86_capability); |
399 | apply_alternatives(__smp_alt_instructions, | ||
400 | __smp_alt_instructions_end); | ||
401 | alternatives_smp_unlock(__smp_locks, __smp_locks_end, | 389 | alternatives_smp_unlock(__smp_locks, __smp_locks_end, |
402 | _text, _etext); | 390 | _text, _etext); |
403 | } | 391 | } |
404 | free_init_pages("SMP alternatives", | 392 | free_init_pages("SMP alternatives", |
405 | (unsigned long)__smp_alt_begin, | 393 | __pa_symbol(&__smp_locks), |
406 | (unsigned long)__smp_alt_end); | 394 | __pa_symbol(&__smp_locks_end)); |
407 | } else { | 395 | } else { |
408 | alternatives_smp_save(__smp_alt_instructions, | ||
409 | __smp_alt_instructions_end); | ||
410 | alternatives_smp_module_add(NULL, "core kernel", | 396 | alternatives_smp_module_add(NULL, "core kernel", |
411 | __smp_locks, __smp_locks_end, | 397 | __smp_locks, __smp_locks_end, |
412 | _text, _etext); | 398 | _text, _etext); |
413 | alternatives_smp_switch(0); | 399 | alternatives_smp_switch(0); |
414 | } | 400 | } |
415 | #endif | 401 | #endif |
416 | apply_paravirt(__start_parainstructions, __stop_parainstructions); | 402 | apply_paravirt(__parainstructions, __parainstructions_end); |
417 | local_irq_restore(flags); | 403 | local_irq_restore(flags); |
418 | } | 404 | } |
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 93aa911646ad..aca054cc0552 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c | |||
@@ -129,6 +129,28 @@ static int modern_apic(void) | |||
129 | return lapic_get_version() >= 0x14; | 129 | return lapic_get_version() >= 0x14; |
130 | } | 130 | } |
131 | 131 | ||
132 | void apic_wait_icr_idle(void) | ||
133 | { | ||
134 | while (apic_read(APIC_ICR) & APIC_ICR_BUSY) | ||
135 | cpu_relax(); | ||
136 | } | ||
137 | |||
138 | unsigned long safe_apic_wait_icr_idle(void) | ||
139 | { | ||
140 | unsigned long send_status; | ||
141 | int timeout; | ||
142 | |||
143 | timeout = 0; | ||
144 | do { | ||
145 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; | ||
146 | if (!send_status) | ||
147 | break; | ||
148 | udelay(100); | ||
149 | } while (timeout++ < 1000); | ||
150 | |||
151 | return send_status; | ||
152 | } | ||
153 | |||
132 | /** | 154 | /** |
133 | * enable_NMI_through_LVT0 - enable NMI through local vector table 0 | 155 | * enable_NMI_through_LVT0 - enable NMI through local vector table 0 |
134 | */ | 156 | */ |
diff --git a/arch/i386/kernel/apm.c b/arch/i386/kernel/apm.c index 064bbf2861f4..367ff1d930cb 100644 --- a/arch/i386/kernel/apm.c +++ b/arch/i386/kernel/apm.c | |||
@@ -233,11 +233,10 @@ | |||
233 | #include <asm/desc.h> | 233 | #include <asm/desc.h> |
234 | #include <asm/i8253.h> | 234 | #include <asm/i8253.h> |
235 | #include <asm/paravirt.h> | 235 | #include <asm/paravirt.h> |
236 | #include <asm/reboot.h> | ||
236 | 237 | ||
237 | #include "io_ports.h" | 238 | #include "io_ports.h" |
238 | 239 | ||
239 | extern void machine_real_restart(unsigned char *, int); | ||
240 | |||
241 | #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) | 240 | #if defined(CONFIG_APM_DISPLAY_BLANK) && defined(CONFIG_VT) |
242 | extern int (*console_blank_hook)(int); | 241 | extern int (*console_blank_hook)(int); |
243 | #endif | 242 | #endif |
@@ -384,13 +383,6 @@ static int ignore_sys_suspend; | |||
384 | static int ignore_normal_resume; | 383 | static int ignore_normal_resume; |
385 | static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL; | 384 | static int bounce_interval __read_mostly = DEFAULT_BOUNCE_INTERVAL; |
386 | 385 | ||
387 | #ifdef CONFIG_APM_RTC_IS_GMT | ||
388 | # define clock_cmos_diff 0 | ||
389 | # define got_clock_diff 1 | ||
390 | #else | ||
391 | static long clock_cmos_diff; | ||
392 | static int got_clock_diff; | ||
393 | #endif | ||
394 | static int debug __read_mostly; | 386 | static int debug __read_mostly; |
395 | static int smp __read_mostly; | 387 | static int smp __read_mostly; |
396 | static int apm_disabled = -1; | 388 | static int apm_disabled = -1; |
diff --git a/arch/i386/kernel/asm-offsets.c b/arch/i386/kernel/asm-offsets.c index c37535163bfc..27a776c9044d 100644 --- a/arch/i386/kernel/asm-offsets.c +++ b/arch/i386/kernel/asm-offsets.c | |||
@@ -11,11 +11,11 @@ | |||
11 | #include <linux/suspend.h> | 11 | #include <linux/suspend.h> |
12 | #include <asm/ucontext.h> | 12 | #include <asm/ucontext.h> |
13 | #include "sigframe.h" | 13 | #include "sigframe.h" |
14 | #include <asm/pgtable.h> | ||
14 | #include <asm/fixmap.h> | 15 | #include <asm/fixmap.h> |
15 | #include <asm/processor.h> | 16 | #include <asm/processor.h> |
16 | #include <asm/thread_info.h> | 17 | #include <asm/thread_info.h> |
17 | #include <asm/elf.h> | 18 | #include <asm/elf.h> |
18 | #include <asm/pda.h> | ||
19 | 19 | ||
20 | #define DEFINE(sym, val) \ | 20 | #define DEFINE(sym, val) \ |
21 | asm volatile("\n->" #sym " %0 " #val : : "i" (val)) | 21 | asm volatile("\n->" #sym " %0 " #val : : "i" (val)) |
@@ -25,6 +25,9 @@ | |||
25 | #define OFFSET(sym, str, mem) \ | 25 | #define OFFSET(sym, str, mem) \ |
26 | DEFINE(sym, offsetof(struct str, mem)); | 26 | DEFINE(sym, offsetof(struct str, mem)); |
27 | 27 | ||
28 | /* workaround for a warning with -Wmissing-prototypes */ | ||
29 | void foo(void); | ||
30 | |||
28 | void foo(void) | 31 | void foo(void) |
29 | { | 32 | { |
30 | OFFSET(SIGCONTEXT_eax, sigcontext, eax); | 33 | OFFSET(SIGCONTEXT_eax, sigcontext, eax); |
@@ -90,17 +93,18 @@ void foo(void) | |||
90 | OFFSET(pbe_next, pbe, next); | 93 | OFFSET(pbe_next, pbe, next); |
91 | 94 | ||
92 | /* Offset from the sysenter stack to tss.esp0 */ | 95 | /* Offset from the sysenter stack to tss.esp0 */ |
93 | DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, esp0) - | 96 | DEFINE(TSS_sysenter_esp0, offsetof(struct tss_struct, x86_tss.esp0) - |
94 | sizeof(struct tss_struct)); | 97 | sizeof(struct tss_struct)); |
95 | 98 | ||
96 | DEFINE(PAGE_SIZE_asm, PAGE_SIZE); | 99 | DEFINE(PAGE_SIZE_asm, PAGE_SIZE); |
97 | DEFINE(VDSO_PRELINK, VDSO_PRELINK); | 100 | DEFINE(PAGE_SHIFT_asm, PAGE_SHIFT); |
101 | DEFINE(PTRS_PER_PTE, PTRS_PER_PTE); | ||
102 | DEFINE(PTRS_PER_PMD, PTRS_PER_PMD); | ||
103 | DEFINE(PTRS_PER_PGD, PTRS_PER_PGD); | ||
98 | 104 | ||
99 | OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); | 105 | DEFINE(VDSO_PRELINK_asm, VDSO_PRELINK); |
100 | 106 | ||
101 | BLANK(); | 107 | OFFSET(crypto_tfm_ctx_offset, crypto_tfm, __crt_ctx); |
102 | OFFSET(PDA_cpu, i386_pda, cpu_number); | ||
103 | OFFSET(PDA_pcurrent, i386_pda, pcurrent); | ||
104 | 108 | ||
105 | #ifdef CONFIG_PARAVIRT | 109 | #ifdef CONFIG_PARAVIRT |
106 | BLANK(); | 110 | BLANK(); |
diff --git a/arch/i386/kernel/cpu/Makefile b/arch/i386/kernel/cpu/Makefile index 010aecfffbc1..74f27a463db0 100644 --- a/arch/i386/kernel/cpu/Makefile +++ b/arch/i386/kernel/cpu/Makefile | |||
@@ -2,7 +2,7 @@ | |||
2 | # Makefile for x86-compatible CPU details and quirks | 2 | # Makefile for x86-compatible CPU details and quirks |
3 | # | 3 | # |
4 | 4 | ||
5 | obj-y := common.o proc.o | 5 | obj-y := common.o proc.o bugs.o |
6 | 6 | ||
7 | obj-y += amd.o | 7 | obj-y += amd.o |
8 | obj-y += cyrix.o | 8 | obj-y += cyrix.o |
@@ -17,3 +17,5 @@ obj-$(CONFIG_X86_MCE) += mcheck/ | |||
17 | 17 | ||
18 | obj-$(CONFIG_MTRR) += mtrr/ | 18 | obj-$(CONFIG_MTRR) += mtrr/ |
19 | obj-$(CONFIG_CPU_FREQ) += cpufreq/ | 19 | obj-$(CONFIG_CPU_FREQ) += cpufreq/ |
20 | |||
21 | obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o | ||
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index 2d47db482972..4fec702afd7e 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c | |||
@@ -53,6 +53,8 @@ static __cpuinit int amd_apic_timer_broken(void) | |||
53 | return 0; | 53 | return 0; |
54 | } | 54 | } |
55 | 55 | ||
56 | int force_mwait __cpuinitdata; | ||
57 | |||
56 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) | 58 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) |
57 | { | 59 | { |
58 | u32 l, h; | 60 | u32 l, h; |
@@ -275,6 +277,9 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c) | |||
275 | 277 | ||
276 | if (amd_apic_timer_broken()) | 278 | if (amd_apic_timer_broken()) |
277 | set_bit(X86_FEATURE_LAPIC_TIMER_BROKEN, c->x86_capability); | 279 | set_bit(X86_FEATURE_LAPIC_TIMER_BROKEN, c->x86_capability); |
280 | |||
281 | if (c->x86 == 0x10 && !force_mwait) | ||
282 | clear_bit(X86_FEATURE_MWAIT, c->x86_capability); | ||
278 | } | 283 | } |
279 | 284 | ||
280 | static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) | 285 | static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) |
@@ -314,13 +319,3 @@ int __init amd_init_cpu(void) | |||
314 | cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev; | 319 | cpu_devs[X86_VENDOR_AMD] = &amd_cpu_dev; |
315 | return 0; | 320 | return 0; |
316 | } | 321 | } |
317 | |||
318 | //early_arch_initcall(amd_init_cpu); | ||
319 | |||
320 | static int __init amd_exit_cpu(void) | ||
321 | { | ||
322 | cpu_devs[X86_VENDOR_AMD] = NULL; | ||
323 | return 0; | ||
324 | } | ||
325 | |||
326 | late_initcall(amd_exit_cpu); | ||
diff --git a/arch/i386/kernel/cpu/bugs.c b/arch/i386/kernel/cpu/bugs.c new file mode 100644 index 000000000000..54428a2500f3 --- /dev/null +++ b/arch/i386/kernel/cpu/bugs.c | |||
@@ -0,0 +1,191 @@ | |||
1 | /* | ||
2 | * arch/i386/cpu/bugs.c | ||
3 | * | ||
4 | * Copyright (C) 1994 Linus Torvalds | ||
5 | * | ||
6 | * Cyrix stuff, June 1998 by: | ||
7 | * - Rafael R. Reilova (moved everything from head.S), | ||
8 | * <rreilova@ececs.uc.edu> | ||
9 | * - Channing Corn (tests & fixes), | ||
10 | * - Andrew D. Balsa (code cleanup). | ||
11 | */ | ||
12 | #include <linux/init.h> | ||
13 | #include <linux/utsname.h> | ||
14 | #include <asm/processor.h> | ||
15 | #include <asm/i387.h> | ||
16 | #include <asm/msr.h> | ||
17 | #include <asm/paravirt.h> | ||
18 | #include <asm/alternative.h> | ||
19 | |||
20 | static int __init no_halt(char *s) | ||
21 | { | ||
22 | boot_cpu_data.hlt_works_ok = 0; | ||
23 | return 1; | ||
24 | } | ||
25 | |||
26 | __setup("no-hlt", no_halt); | ||
27 | |||
28 | static int __init mca_pentium(char *s) | ||
29 | { | ||
30 | mca_pentium_flag = 1; | ||
31 | return 1; | ||
32 | } | ||
33 | |||
34 | __setup("mca-pentium", mca_pentium); | ||
35 | |||
36 | static int __init no_387(char *s) | ||
37 | { | ||
38 | boot_cpu_data.hard_math = 0; | ||
39 | write_cr0(0xE | read_cr0()); | ||
40 | return 1; | ||
41 | } | ||
42 | |||
43 | __setup("no387", no_387); | ||
44 | |||
45 | static double __initdata x = 4195835.0; | ||
46 | static double __initdata y = 3145727.0; | ||
47 | |||
48 | /* | ||
49 | * This used to check for exceptions.. | ||
50 | * However, it turns out that to support that, | ||
51 | * the XMM trap handlers basically had to | ||
52 | * be buggy. So let's have a correct XMM trap | ||
53 | * handler, and forget about printing out | ||
54 | * some status at boot. | ||
55 | * | ||
56 | * We should really only care about bugs here | ||
57 | * anyway. Not features. | ||
58 | */ | ||
59 | static void __init check_fpu(void) | ||
60 | { | ||
61 | if (!boot_cpu_data.hard_math) { | ||
62 | #ifndef CONFIG_MATH_EMULATION | ||
63 | printk(KERN_EMERG "No coprocessor found and no math emulation present.\n"); | ||
64 | printk(KERN_EMERG "Giving up.\n"); | ||
65 | for (;;) ; | ||
66 | #endif | ||
67 | return; | ||
68 | } | ||
69 | |||
70 | /* trap_init() enabled FXSR and company _before_ testing for FP problems here. */ | ||
71 | /* Test for the divl bug.. */ | ||
72 | __asm__("fninit\n\t" | ||
73 | "fldl %1\n\t" | ||
74 | "fdivl %2\n\t" | ||
75 | "fmull %2\n\t" | ||
76 | "fldl %1\n\t" | ||
77 | "fsubp %%st,%%st(1)\n\t" | ||
78 | "fistpl %0\n\t" | ||
79 | "fwait\n\t" | ||
80 | "fninit" | ||
81 | : "=m" (*&boot_cpu_data.fdiv_bug) | ||
82 | : "m" (*&x), "m" (*&y)); | ||
83 | if (boot_cpu_data.fdiv_bug) | ||
84 | printk("Hmm, FPU with FDIV bug.\n"); | ||
85 | } | ||
86 | |||
87 | static void __init check_hlt(void) | ||
88 | { | ||
89 | if (paravirt_enabled()) | ||
90 | return; | ||
91 | |||
92 | printk(KERN_INFO "Checking 'hlt' instruction... "); | ||
93 | if (!boot_cpu_data.hlt_works_ok) { | ||
94 | printk("disabled\n"); | ||
95 | return; | ||
96 | } | ||
97 | halt(); | ||
98 | halt(); | ||
99 | halt(); | ||
100 | halt(); | ||
101 | printk("OK.\n"); | ||
102 | } | ||
103 | |||
104 | /* | ||
105 | * Most 386 processors have a bug where a POPAD can lock the | ||
106 | * machine even from user space. | ||
107 | */ | ||
108 | |||
109 | static void __init check_popad(void) | ||
110 | { | ||
111 | #ifndef CONFIG_X86_POPAD_OK | ||
112 | int res, inp = (int) &res; | ||
113 | |||
114 | printk(KERN_INFO "Checking for popad bug... "); | ||
115 | __asm__ __volatile__( | ||
116 | "movl $12345678,%%eax; movl $0,%%edi; pusha; popa; movl (%%edx,%%edi),%%ecx " | ||
117 | : "=&a" (res) | ||
118 | : "d" (inp) | ||
119 | : "ecx", "edi" ); | ||
120 | /* If this fails, it means that any user program may lock the CPU hard. Too bad. */ | ||
121 | if (res != 12345678) printk( "Buggy.\n" ); | ||
122 | else printk( "OK.\n" ); | ||
123 | #endif | ||
124 | } | ||
125 | |||
126 | /* | ||
127 | * Check whether we are able to run this kernel safely on SMP. | ||
128 | * | ||
129 | * - In order to run on a i386, we need to be compiled for i386 | ||
130 | * (for due to lack of "invlpg" and working WP on a i386) | ||
131 | * - In order to run on anything without a TSC, we need to be | ||
132 | * compiled for a i486. | ||
133 | * - In order to support the local APIC on a buggy Pentium machine, | ||
134 | * we need to be compiled with CONFIG_X86_GOOD_APIC disabled, | ||
135 | * which happens implicitly if compiled for a Pentium or lower | ||
136 | * (unless an advanced selection of CPU features is used) as an | ||
137 | * otherwise config implies a properly working local APIC without | ||
138 | * the need to do extra reads from the APIC. | ||
139 | */ | ||
140 | |||
141 | static void __init check_config(void) | ||
142 | { | ||
143 | /* | ||
144 | * We'd better not be a i386 if we're configured to use some | ||
145 | * i486+ only features! (WP works in supervisor mode and the | ||
146 | * new "invlpg" and "bswap" instructions) | ||
147 | */ | ||
148 | #if defined(CONFIG_X86_WP_WORKS_OK) || defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_BSWAP) | ||
149 | if (boot_cpu_data.x86 == 3) | ||
150 | panic("Kernel requires i486+ for 'invlpg' and other features"); | ||
151 | #endif | ||
152 | |||
153 | /* | ||
154 | * If we configured ourselves for a TSC, we'd better have one! | ||
155 | */ | ||
156 | #ifdef CONFIG_X86_TSC | ||
157 | if (!cpu_has_tsc && !tsc_disable) | ||
158 | panic("Kernel compiled for Pentium+, requires TSC feature!"); | ||
159 | #endif | ||
160 | |||
161 | /* | ||
162 | * If we were told we had a good local APIC, check for buggy Pentia, | ||
163 | * i.e. all B steppings and the C2 stepping of P54C when using their | ||
164 | * integrated APIC (see 11AP erratum in "Pentium Processor | ||
165 | * Specification Update"). | ||
166 | */ | ||
167 | #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_GOOD_APIC) | ||
168 | if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL | ||
169 | && cpu_has_apic | ||
170 | && boot_cpu_data.x86 == 5 | ||
171 | && boot_cpu_data.x86_model == 2 | ||
172 | && (boot_cpu_data.x86_mask < 6 || boot_cpu_data.x86_mask == 11)) | ||
173 | panic("Kernel compiled for PMMX+, assumes a local APIC without the read-before-write bug!"); | ||
174 | #endif | ||
175 | } | ||
176 | |||
177 | |||
178 | void __init check_bugs(void) | ||
179 | { | ||
180 | identify_boot_cpu(); | ||
181 | #ifndef CONFIG_SMP | ||
182 | printk("CPU: "); | ||
183 | print_cpu_info(&boot_cpu_data); | ||
184 | #endif | ||
185 | check_config(); | ||
186 | check_fpu(); | ||
187 | check_hlt(); | ||
188 | check_popad(); | ||
189 | init_utsname()->machine[1] = '0' + (boot_cpu_data.x86 > 6 ? 6 : boot_cpu_data.x86); | ||
190 | alternative_instructions(); | ||
191 | } | ||
diff --git a/arch/i386/kernel/cpu/centaur.c b/arch/i386/kernel/cpu/centaur.c index 8c25047975c0..473eac883c7b 100644 --- a/arch/i386/kernel/cpu/centaur.c +++ b/arch/i386/kernel/cpu/centaur.c | |||
@@ -469,13 +469,3 @@ int __init centaur_init_cpu(void) | |||
469 | cpu_devs[X86_VENDOR_CENTAUR] = ¢aur_cpu_dev; | 469 | cpu_devs[X86_VENDOR_CENTAUR] = ¢aur_cpu_dev; |
470 | return 0; | 470 | return 0; |
471 | } | 471 | } |
472 | |||
473 | //early_arch_initcall(centaur_init_cpu); | ||
474 | |||
475 | static int __init centaur_exit_cpu(void) | ||
476 | { | ||
477 | cpu_devs[X86_VENDOR_CENTAUR] = NULL; | ||
478 | return 0; | ||
479 | } | ||
480 | |||
481 | late_initcall(centaur_exit_cpu); | ||
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index dcbbd0a8bfc2..794d593c47eb 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c | |||
@@ -18,15 +18,37 @@ | |||
18 | #include <asm/apic.h> | 18 | #include <asm/apic.h> |
19 | #include <mach_apic.h> | 19 | #include <mach_apic.h> |
20 | #endif | 20 | #endif |
21 | #include <asm/pda.h> | ||
22 | 21 | ||
23 | #include "cpu.h" | 22 | #include "cpu.h" |
24 | 23 | ||
25 | DEFINE_PER_CPU(struct Xgt_desc_struct, cpu_gdt_descr); | 24 | DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { |
26 | EXPORT_PER_CPU_SYMBOL(cpu_gdt_descr); | 25 | [GDT_ENTRY_KERNEL_CS] = { 0x0000ffff, 0x00cf9a00 }, |
26 | [GDT_ENTRY_KERNEL_DS] = { 0x0000ffff, 0x00cf9200 }, | ||
27 | [GDT_ENTRY_DEFAULT_USER_CS] = { 0x0000ffff, 0x00cffa00 }, | ||
28 | [GDT_ENTRY_DEFAULT_USER_DS] = { 0x0000ffff, 0x00cff200 }, | ||
29 | /* | ||
30 | * Segments used for calling PnP BIOS have byte granularity. | ||
31 | * They code segments and data segments have fixed 64k limits, | ||
32 | * the transfer segment sizes are set at run time. | ||
33 | */ | ||
34 | [GDT_ENTRY_PNPBIOS_CS32] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */ | ||
35 | [GDT_ENTRY_PNPBIOS_CS16] = { 0x0000ffff, 0x00009a00 },/* 16-bit code */ | ||
36 | [GDT_ENTRY_PNPBIOS_DS] = { 0x0000ffff, 0x00009200 }, /* 16-bit data */ | ||
37 | [GDT_ENTRY_PNPBIOS_TS1] = { 0x00000000, 0x00009200 },/* 16-bit data */ | ||
38 | [GDT_ENTRY_PNPBIOS_TS2] = { 0x00000000, 0x00009200 },/* 16-bit data */ | ||
39 | /* | ||
40 | * The APM segments have byte granularity and their bases | ||
41 | * are set at run time. All have 64k limits. | ||
42 | */ | ||
43 | [GDT_ENTRY_APMBIOS_BASE] = { 0x0000ffff, 0x00409a00 },/* 32-bit code */ | ||
44 | /* 16-bit code */ | ||
45 | [GDT_ENTRY_APMBIOS_BASE+1] = { 0x0000ffff, 0x00009a00 }, | ||
46 | [GDT_ENTRY_APMBIOS_BASE+2] = { 0x0000ffff, 0x00409200 }, /* data */ | ||
27 | 47 | ||
28 | struct i386_pda *_cpu_pda[NR_CPUS] __read_mostly; | 48 | [GDT_ENTRY_ESPFIX_SS] = { 0x00000000, 0x00c09200 }, |
29 | EXPORT_SYMBOL(_cpu_pda); | 49 | [GDT_ENTRY_PERCPU] = { 0x00000000, 0x00000000 }, |
50 | } }; | ||
51 | EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); | ||
30 | 52 | ||
31 | static int cachesize_override __cpuinitdata = -1; | 53 | static int cachesize_override __cpuinitdata = -1; |
32 | static int disable_x86_fxsr __cpuinitdata; | 54 | static int disable_x86_fxsr __cpuinitdata; |
@@ -368,7 +390,7 @@ __setup("serialnumber", x86_serial_nr_setup); | |||
368 | /* | 390 | /* |
369 | * This does the hard work of actually picking apart the CPU stuff... | 391 | * This does the hard work of actually picking apart the CPU stuff... |
370 | */ | 392 | */ |
371 | void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | 393 | static void __cpuinit identify_cpu(struct cpuinfo_x86 *c) |
372 | { | 394 | { |
373 | int i; | 395 | int i; |
374 | 396 | ||
@@ -479,15 +501,22 @@ void __cpuinit identify_cpu(struct cpuinfo_x86 *c) | |||
479 | 501 | ||
480 | /* Init Machine Check Exception if available. */ | 502 | /* Init Machine Check Exception if available. */ |
481 | mcheck_init(c); | 503 | mcheck_init(c); |
504 | } | ||
482 | 505 | ||
483 | if (c == &boot_cpu_data) | 506 | void __init identify_boot_cpu(void) |
484 | sysenter_setup(); | 507 | { |
508 | identify_cpu(&boot_cpu_data); | ||
509 | sysenter_setup(); | ||
485 | enable_sep_cpu(); | 510 | enable_sep_cpu(); |
511 | mtrr_bp_init(); | ||
512 | } | ||
486 | 513 | ||
487 | if (c == &boot_cpu_data) | 514 | void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) |
488 | mtrr_bp_init(); | 515 | { |
489 | else | 516 | BUG_ON(c == &boot_cpu_data); |
490 | mtrr_ap_init(); | 517 | identify_cpu(c); |
518 | enable_sep_cpu(); | ||
519 | mtrr_ap_init(); | ||
491 | } | 520 | } |
492 | 521 | ||
493 | #ifdef CONFIG_X86_HT | 522 | #ifdef CONFIG_X86_HT |
@@ -601,129 +630,36 @@ void __init early_cpu_init(void) | |||
601 | #endif | 630 | #endif |
602 | } | 631 | } |
603 | 632 | ||
604 | /* Make sure %gs is initialized properly in idle threads */ | 633 | /* Make sure %fs is initialized properly in idle threads */ |
605 | struct pt_regs * __devinit idle_regs(struct pt_regs *regs) | 634 | struct pt_regs * __devinit idle_regs(struct pt_regs *regs) |
606 | { | 635 | { |
607 | memset(regs, 0, sizeof(struct pt_regs)); | 636 | memset(regs, 0, sizeof(struct pt_regs)); |
608 | regs->xfs = __KERNEL_PDA; | 637 | regs->xfs = __KERNEL_PERCPU; |
609 | return regs; | 638 | return regs; |
610 | } | 639 | } |
611 | 640 | ||
612 | static __cpuinit int alloc_gdt(int cpu) | 641 | /* Current gdt points %fs at the "master" per-cpu area: after this, |
642 | * it's on the real one. */ | ||
643 | void switch_to_new_gdt(void) | ||
613 | { | 644 | { |
614 | struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | 645 | struct Xgt_desc_struct gdt_descr; |
615 | struct desc_struct *gdt; | ||
616 | struct i386_pda *pda; | ||
617 | |||
618 | gdt = (struct desc_struct *)cpu_gdt_descr->address; | ||
619 | pda = cpu_pda(cpu); | ||
620 | |||
621 | /* | ||
622 | * This is a horrible hack to allocate the GDT. The problem | ||
623 | * is that cpu_init() is called really early for the boot CPU | ||
624 | * (and hence needs bootmem) but much later for the secondary | ||
625 | * CPUs, when bootmem will have gone away | ||
626 | */ | ||
627 | if (NODE_DATA(0)->bdata->node_bootmem_map) { | ||
628 | BUG_ON(gdt != NULL || pda != NULL); | ||
629 | |||
630 | gdt = alloc_bootmem_pages(PAGE_SIZE); | ||
631 | pda = alloc_bootmem(sizeof(*pda)); | ||
632 | /* alloc_bootmem(_pages) panics on failure, so no check */ | ||
633 | |||
634 | memset(gdt, 0, PAGE_SIZE); | ||
635 | memset(pda, 0, sizeof(*pda)); | ||
636 | } else { | ||
637 | /* GDT and PDA might already have been allocated if | ||
638 | this is a CPU hotplug re-insertion. */ | ||
639 | if (gdt == NULL) | ||
640 | gdt = (struct desc_struct *)get_zeroed_page(GFP_KERNEL); | ||
641 | |||
642 | if (pda == NULL) | ||
643 | pda = kmalloc_node(sizeof(*pda), GFP_KERNEL, cpu_to_node(cpu)); | ||
644 | |||
645 | if (unlikely(!gdt || !pda)) { | ||
646 | free_pages((unsigned long)gdt, 0); | ||
647 | kfree(pda); | ||
648 | return 0; | ||
649 | } | ||
650 | } | ||
651 | |||
652 | cpu_gdt_descr->address = (unsigned long)gdt; | ||
653 | cpu_pda(cpu) = pda; | ||
654 | |||
655 | return 1; | ||
656 | } | ||
657 | 646 | ||
658 | /* Initial PDA used by boot CPU */ | 647 | gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); |
659 | struct i386_pda boot_pda = { | 648 | gdt_descr.size = GDT_SIZE - 1; |
660 | ._pda = &boot_pda, | 649 | load_gdt(&gdt_descr); |
661 | .cpu_number = 0, | 650 | asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); |
662 | .pcurrent = &init_task, | ||
663 | }; | ||
664 | |||
665 | static inline void set_kernel_fs(void) | ||
666 | { | ||
667 | /* Set %fs for this CPU's PDA. Memory clobber is to create a | ||
668 | barrier with respect to any PDA operations, so the compiler | ||
669 | doesn't move any before here. */ | ||
670 | asm volatile ("mov %0, %%fs" : : "r" (__KERNEL_PDA) : "memory"); | ||
671 | } | 651 | } |
672 | 652 | ||
673 | /* Initialize the CPU's GDT and PDA. The boot CPU does this for | 653 | /* |
674 | itself, but secondaries find this done for them. */ | 654 | * cpu_init() initializes state that is per-CPU. Some data is already |
675 | __cpuinit int init_gdt(int cpu, struct task_struct *idle) | 655 | * initialized (naturally) in the bootstrap process, such as the GDT |
676 | { | 656 | * and IDT. We reload them nevertheless, this function acts as a |
677 | struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | 657 | * 'CPU state barrier', nothing should get across. |
678 | struct desc_struct *gdt; | 658 | */ |
679 | struct i386_pda *pda; | 659 | void __cpuinit cpu_init(void) |
680 | |||
681 | /* For non-boot CPUs, the GDT and PDA should already have been | ||
682 | allocated. */ | ||
683 | if (!alloc_gdt(cpu)) { | ||
684 | printk(KERN_CRIT "CPU%d failed to allocate GDT or PDA\n", cpu); | ||
685 | return 0; | ||
686 | } | ||
687 | |||
688 | gdt = (struct desc_struct *)cpu_gdt_descr->address; | ||
689 | pda = cpu_pda(cpu); | ||
690 | |||
691 | BUG_ON(gdt == NULL || pda == NULL); | ||
692 | |||
693 | /* | ||
694 | * Initialize the per-CPU GDT with the boot GDT, | ||
695 | * and set up the GDT descriptor: | ||
696 | */ | ||
697 | memcpy(gdt, cpu_gdt_table, GDT_SIZE); | ||
698 | cpu_gdt_descr->size = GDT_SIZE - 1; | ||
699 | |||
700 | pack_descriptor((u32 *)&gdt[GDT_ENTRY_PDA].a, | ||
701 | (u32 *)&gdt[GDT_ENTRY_PDA].b, | ||
702 | (unsigned long)pda, sizeof(*pda) - 1, | ||
703 | 0x80 | DESCTYPE_S | 0x2, 0); /* present read-write data segment */ | ||
704 | |||
705 | memset(pda, 0, sizeof(*pda)); | ||
706 | pda->_pda = pda; | ||
707 | pda->cpu_number = cpu; | ||
708 | pda->pcurrent = idle; | ||
709 | |||
710 | return 1; | ||
711 | } | ||
712 | |||
713 | void __cpuinit cpu_set_gdt(int cpu) | ||
714 | { | ||
715 | struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | ||
716 | |||
717 | /* Reinit these anyway, even if they've already been done (on | ||
718 | the boot CPU, this will transition from the boot gdt+pda to | ||
719 | the real ones). */ | ||
720 | load_gdt(cpu_gdt_descr); | ||
721 | set_kernel_fs(); | ||
722 | } | ||
723 | |||
724 | /* Common CPU init for both boot and secondary CPUs */ | ||
725 | static void __cpuinit _cpu_init(int cpu, struct task_struct *curr) | ||
726 | { | 660 | { |
661 | int cpu = smp_processor_id(); | ||
662 | struct task_struct *curr = current; | ||
727 | struct tss_struct * t = &per_cpu(init_tss, cpu); | 663 | struct tss_struct * t = &per_cpu(init_tss, cpu); |
728 | struct thread_struct *thread = &curr->thread; | 664 | struct thread_struct *thread = &curr->thread; |
729 | 665 | ||
@@ -744,6 +680,7 @@ static void __cpuinit _cpu_init(int cpu, struct task_struct *curr) | |||
744 | } | 680 | } |
745 | 681 | ||
746 | load_idt(&idt_descr); | 682 | load_idt(&idt_descr); |
683 | switch_to_new_gdt(); | ||
747 | 684 | ||
748 | /* | 685 | /* |
749 | * Set up and load the per-CPU TSS and LDT | 686 | * Set up and load the per-CPU TSS and LDT |
@@ -783,38 +720,6 @@ static void __cpuinit _cpu_init(int cpu, struct task_struct *curr) | |||
783 | mxcsr_feature_mask_init(); | 720 | mxcsr_feature_mask_init(); |
784 | } | 721 | } |
785 | 722 | ||
786 | /* Entrypoint to initialize secondary CPU */ | ||
787 | void __cpuinit secondary_cpu_init(void) | ||
788 | { | ||
789 | int cpu = smp_processor_id(); | ||
790 | struct task_struct *curr = current; | ||
791 | |||
792 | _cpu_init(cpu, curr); | ||
793 | } | ||
794 | |||
795 | /* | ||
796 | * cpu_init() initializes state that is per-CPU. Some data is already | ||
797 | * initialized (naturally) in the bootstrap process, such as the GDT | ||
798 | * and IDT. We reload them nevertheless, this function acts as a | ||
799 | * 'CPU state barrier', nothing should get across. | ||
800 | */ | ||
801 | void __cpuinit cpu_init(void) | ||
802 | { | ||
803 | int cpu = smp_processor_id(); | ||
804 | struct task_struct *curr = current; | ||
805 | |||
806 | /* Set up the real GDT and PDA, so we can transition from the | ||
807 | boot versions. */ | ||
808 | if (!init_gdt(cpu, curr)) { | ||
809 | /* failed to allocate something; not much we can do... */ | ||
810 | for (;;) | ||
811 | local_irq_enable(); | ||
812 | } | ||
813 | |||
814 | cpu_set_gdt(cpu); | ||
815 | _cpu_init(cpu, curr); | ||
816 | } | ||
817 | |||
818 | #ifdef CONFIG_HOTPLUG_CPU | 723 | #ifdef CONFIG_HOTPLUG_CPU |
819 | void __cpuinit cpu_uninit(void) | 724 | void __cpuinit cpu_uninit(void) |
820 | { | 725 | { |
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c index de27bd07bc9c..0b8411a864fb 100644 --- a/arch/i386/kernel/cpu/cyrix.c +++ b/arch/i386/kernel/cpu/cyrix.c | |||
@@ -279,7 +279,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) | |||
279 | */ | 279 | */ |
280 | if (vendor == PCI_VENDOR_ID_CYRIX && | 280 | if (vendor == PCI_VENDOR_ID_CYRIX && |
281 | (device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520)) | 281 | (device == PCI_DEVICE_ID_CYRIX_5510 || device == PCI_DEVICE_ID_CYRIX_5520)) |
282 | pit_latch_buggy = 1; | 282 | mark_tsc_unstable("cyrix 5510/5520 detected"); |
283 | } | 283 | } |
284 | #endif | 284 | #endif |
285 | c->x86_cache_size=16; /* Yep 16K integrated cache thats it */ | 285 | c->x86_cache_size=16; /* Yep 16K integrated cache thats it */ |
@@ -448,16 +448,6 @@ int __init cyrix_init_cpu(void) | |||
448 | return 0; | 448 | return 0; |
449 | } | 449 | } |
450 | 450 | ||
451 | //early_arch_initcall(cyrix_init_cpu); | ||
452 | |||
453 | static int __init cyrix_exit_cpu(void) | ||
454 | { | ||
455 | cpu_devs[X86_VENDOR_CYRIX] = NULL; | ||
456 | return 0; | ||
457 | } | ||
458 | |||
459 | late_initcall(cyrix_exit_cpu); | ||
460 | |||
461 | static struct cpu_dev nsc_cpu_dev __cpuinitdata = { | 451 | static struct cpu_dev nsc_cpu_dev __cpuinitdata = { |
462 | .c_vendor = "NSC", | 452 | .c_vendor = "NSC", |
463 | .c_ident = { "Geode by NSC" }, | 453 | .c_ident = { "Geode by NSC" }, |
@@ -470,12 +460,3 @@ int __init nsc_init_cpu(void) | |||
470 | return 0; | 460 | return 0; |
471 | } | 461 | } |
472 | 462 | ||
473 | //early_arch_initcall(nsc_init_cpu); | ||
474 | |||
475 | static int __init nsc_exit_cpu(void) | ||
476 | { | ||
477 | cpu_devs[X86_VENDOR_NSC] = NULL; | ||
478 | return 0; | ||
479 | } | ||
480 | |||
481 | late_initcall(nsc_exit_cpu); | ||
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 56fe26584957..dc4e08147b1f 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c | |||
@@ -188,8 +188,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
188 | } | 188 | } |
189 | #endif | 189 | #endif |
190 | 190 | ||
191 | if (c->x86 == 15) | 191 | if (c->x86 == 15) { |
192 | set_bit(X86_FEATURE_P4, c->x86_capability); | 192 | set_bit(X86_FEATURE_P4, c->x86_capability); |
193 | set_bit(X86_FEATURE_SYNC_RDTSC, c->x86_capability); | ||
194 | } | ||
193 | if (c->x86 == 6) | 195 | if (c->x86 == 6) |
194 | set_bit(X86_FEATURE_P3, c->x86_capability); | 196 | set_bit(X86_FEATURE_P3, c->x86_capability); |
195 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || | 197 | if ((c->x86 == 0xf && c->x86_model >= 0x03) || |
diff --git a/arch/i386/kernel/cpu/mcheck/k7.c b/arch/i386/kernel/cpu/mcheck/k7.c index b0862af595aa..f9fa4142551e 100644 --- a/arch/i386/kernel/cpu/mcheck/k7.c +++ b/arch/i386/kernel/cpu/mcheck/k7.c | |||
@@ -75,6 +75,9 @@ void amd_mcheck_init(struct cpuinfo_x86 *c) | |||
75 | machine_check_vector = k7_machine_check; | 75 | machine_check_vector = k7_machine_check; |
76 | wmb(); | 76 | wmb(); |
77 | 77 | ||
78 | if (!cpu_has(c, X86_FEATURE_MCE)) | ||
79 | return; | ||
80 | |||
78 | printk (KERN_INFO "Intel machine check architecture supported.\n"); | 81 | printk (KERN_INFO "Intel machine check architecture supported.\n"); |
79 | rdmsr (MSR_IA32_MCG_CAP, l, h); | 82 | rdmsr (MSR_IA32_MCG_CAP, l, h); |
80 | if (l & (1<<8)) /* Control register present ? */ | 83 | if (l & (1<<8)) /* Control register present ? */ |
@@ -82,9 +85,13 @@ void amd_mcheck_init(struct cpuinfo_x86 *c) | |||
82 | nr_mce_banks = l & 0xff; | 85 | nr_mce_banks = l & 0xff; |
83 | 86 | ||
84 | /* Clear status for MC index 0 separately, we don't touch CTL, | 87 | /* Clear status for MC index 0 separately, we don't touch CTL, |
85 | * as some Athlons cause spurious MCEs when its enabled. */ | 88 | * as some K7 Athlons cause spurious MCEs when its enabled. */ |
86 | wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0); | 89 | if (boot_cpu_data.x86 == 6) { |
87 | for (i=1; i<nr_mce_banks; i++) { | 90 | wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0); |
91 | i = 1; | ||
92 | } else | ||
93 | i = 0; | ||
94 | for (; i<nr_mce_banks; i++) { | ||
88 | wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); | 95 | wrmsr (MSR_IA32_MC0_CTL+4*i, 0xffffffff, 0xffffffff); |
89 | wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); | 96 | wrmsr (MSR_IA32_MC0_STATUS+4*i, 0x0, 0x0); |
90 | } | 97 | } |
diff --git a/arch/i386/kernel/cpu/mcheck/mce.c b/arch/i386/kernel/cpu/mcheck/mce.c index 4f10c62d180c..56cd485b127c 100644 --- a/arch/i386/kernel/cpu/mcheck/mce.c +++ b/arch/i386/kernel/cpu/mcheck/mce.c | |||
@@ -38,8 +38,7 @@ void mcheck_init(struct cpuinfo_x86 *c) | |||
38 | 38 | ||
39 | switch (c->x86_vendor) { | 39 | switch (c->x86_vendor) { |
40 | case X86_VENDOR_AMD: | 40 | case X86_VENDOR_AMD: |
41 | if (c->x86==6 || c->x86==15) | 41 | amd_mcheck_init(c); |
42 | amd_mcheck_init(c); | ||
43 | break; | 42 | break; |
44 | 43 | ||
45 | case X86_VENDOR_INTEL: | 44 | case X86_VENDOR_INTEL: |
diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c index 504434a46011..1509edfb2313 100644 --- a/arch/i386/kernel/cpu/mcheck/p4.c +++ b/arch/i386/kernel/cpu/mcheck/p4.c | |||
@@ -124,13 +124,10 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) | |||
124 | 124 | ||
125 | 125 | ||
126 | /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ | 126 | /* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ |
127 | static inline int intel_get_extended_msrs(struct intel_mce_extended_msrs *r) | 127 | static inline void intel_get_extended_msrs(struct intel_mce_extended_msrs *r) |
128 | { | 128 | { |
129 | u32 h; | 129 | u32 h; |
130 | 130 | ||
131 | if (mce_num_extended_msrs == 0) | ||
132 | goto done; | ||
133 | |||
134 | rdmsr (MSR_IA32_MCG_EAX, r->eax, h); | 131 | rdmsr (MSR_IA32_MCG_EAX, r->eax, h); |
135 | rdmsr (MSR_IA32_MCG_EBX, r->ebx, h); | 132 | rdmsr (MSR_IA32_MCG_EBX, r->ebx, h); |
136 | rdmsr (MSR_IA32_MCG_ECX, r->ecx, h); | 133 | rdmsr (MSR_IA32_MCG_ECX, r->ecx, h); |
@@ -141,12 +138,6 @@ static inline int intel_get_extended_msrs(struct intel_mce_extended_msrs *r) | |||
141 | rdmsr (MSR_IA32_MCG_ESP, r->esp, h); | 138 | rdmsr (MSR_IA32_MCG_ESP, r->esp, h); |
142 | rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h); | 139 | rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h); |
143 | rdmsr (MSR_IA32_MCG_EIP, r->eip, h); | 140 | rdmsr (MSR_IA32_MCG_EIP, r->eip, h); |
144 | |||
145 | /* can we rely on kmalloc to do a dynamic | ||
146 | * allocation for the reserved registers? | ||
147 | */ | ||
148 | done: | ||
149 | return mce_num_extended_msrs; | ||
150 | } | 141 | } |
151 | 142 | ||
152 | static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) | 143 | static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) |
@@ -155,7 +146,6 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) | |||
155 | u32 alow, ahigh, high, low; | 146 | u32 alow, ahigh, high, low; |
156 | u32 mcgstl, mcgsth; | 147 | u32 mcgstl, mcgsth; |
157 | int i; | 148 | int i; |
158 | struct intel_mce_extended_msrs dbg; | ||
159 | 149 | ||
160 | rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); | 150 | rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); |
161 | if (mcgstl & (1<<0)) /* Recoverable ? */ | 151 | if (mcgstl & (1<<0)) /* Recoverable ? */ |
@@ -164,7 +154,9 @@ static fastcall void intel_machine_check(struct pt_regs * regs, long error_code) | |||
164 | printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", | 154 | printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", |
165 | smp_processor_id(), mcgsth, mcgstl); | 155 | smp_processor_id(), mcgsth, mcgstl); |
166 | 156 | ||
167 | if (intel_get_extended_msrs(&dbg)) { | 157 | if (mce_num_extended_msrs > 0) { |
158 | struct intel_mce_extended_msrs dbg; | ||
159 | intel_get_extended_msrs(&dbg); | ||
168 | printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n", | 160 | printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n", |
169 | smp_processor_id(), dbg.eip, dbg.eflags); | 161 | smp_processor_id(), dbg.eip, dbg.eflags); |
170 | printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n", | 162 | printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n", |
diff --git a/arch/i386/kernel/cpu/mtrr/generic.c b/arch/i386/kernel/cpu/mtrr/generic.c index f77fc53db654..5367e32e0403 100644 --- a/arch/i386/kernel/cpu/mtrr/generic.c +++ b/arch/i386/kernel/cpu/mtrr/generic.c | |||
@@ -20,13 +20,25 @@ struct mtrr_state { | |||
20 | mtrr_type def_type; | 20 | mtrr_type def_type; |
21 | }; | 21 | }; |
22 | 22 | ||
23 | struct fixed_range_block { | ||
24 | int base_msr; /* start address of an MTRR block */ | ||
25 | int ranges; /* number of MTRRs in this block */ | ||
26 | }; | ||
27 | |||
28 | static struct fixed_range_block fixed_range_blocks[] = { | ||
29 | { MTRRfix64K_00000_MSR, 1 }, /* one 64k MTRR */ | ||
30 | { MTRRfix16K_80000_MSR, 2 }, /* two 16k MTRRs */ | ||
31 | { MTRRfix4K_C0000_MSR, 8 }, /* eight 4k MTRRs */ | ||
32 | {} | ||
33 | }; | ||
34 | |||
23 | static unsigned long smp_changes_mask; | 35 | static unsigned long smp_changes_mask; |
24 | static struct mtrr_state mtrr_state = {}; | 36 | static struct mtrr_state mtrr_state = {}; |
25 | 37 | ||
26 | #undef MODULE_PARAM_PREFIX | 38 | #undef MODULE_PARAM_PREFIX |
27 | #define MODULE_PARAM_PREFIX "mtrr." | 39 | #define MODULE_PARAM_PREFIX "mtrr." |
28 | 40 | ||
29 | static __initdata int mtrr_show; | 41 | static int mtrr_show; |
30 | module_param_named(show, mtrr_show, bool, 0); | 42 | module_param_named(show, mtrr_show, bool, 0); |
31 | 43 | ||
32 | /* Get the MSR pair relating to a var range */ | 44 | /* Get the MSR pair relating to a var range */ |
@@ -37,7 +49,7 @@ get_mtrr_var_range(unsigned int index, struct mtrr_var_range *vr) | |||
37 | rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); | 49 | rdmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); |
38 | } | 50 | } |
39 | 51 | ||
40 | static void __init | 52 | static void |
41 | get_fixed_ranges(mtrr_type * frs) | 53 | get_fixed_ranges(mtrr_type * frs) |
42 | { | 54 | { |
43 | unsigned int *p = (unsigned int *) frs; | 55 | unsigned int *p = (unsigned int *) frs; |
@@ -51,12 +63,18 @@ get_fixed_ranges(mtrr_type * frs) | |||
51 | rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]); | 63 | rdmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]); |
52 | } | 64 | } |
53 | 65 | ||
54 | static void __init print_fixed(unsigned base, unsigned step, const mtrr_type*types) | 66 | void mtrr_save_fixed_ranges(void *info) |
67 | { | ||
68 | get_fixed_ranges(mtrr_state.fixed_ranges); | ||
69 | } | ||
70 | |||
71 | static void __cpuinit print_fixed(unsigned base, unsigned step, const mtrr_type*types) | ||
55 | { | 72 | { |
56 | unsigned i; | 73 | unsigned i; |
57 | 74 | ||
58 | for (i = 0; i < 8; ++i, ++types, base += step) | 75 | for (i = 0; i < 8; ++i, ++types, base += step) |
59 | printk(KERN_INFO "MTRR %05X-%05X %s\n", base, base + step - 1, mtrr_attrib_to_str(*types)); | 76 | printk(KERN_INFO "MTRR %05X-%05X %s\n", |
77 | base, base + step - 1, mtrr_attrib_to_str(*types)); | ||
60 | } | 78 | } |
61 | 79 | ||
62 | /* Grab all of the MTRR state for this CPU into *state */ | 80 | /* Grab all of the MTRR state for this CPU into *state */ |
@@ -147,6 +165,44 @@ void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b) | |||
147 | smp_processor_id(), msr, a, b); | 165 | smp_processor_id(), msr, a, b); |
148 | } | 166 | } |
149 | 167 | ||
168 | /** | ||
169 | * Enable and allow read/write of extended fixed-range MTRR bits on K8 CPUs | ||
170 | * see AMD publication no. 24593, chapter 3.2.1 for more information | ||
171 | */ | ||
172 | static inline void k8_enable_fixed_iorrs(void) | ||
173 | { | ||
174 | unsigned lo, hi; | ||
175 | |||
176 | rdmsr(MSR_K8_SYSCFG, lo, hi); | ||
177 | mtrr_wrmsr(MSR_K8_SYSCFG, lo | ||
178 | | K8_MTRRFIXRANGE_DRAM_ENABLE | ||
179 | | K8_MTRRFIXRANGE_DRAM_MODIFY, hi); | ||
180 | } | ||
181 | |||
182 | /** | ||
183 | * Checks and updates an fixed-range MTRR if it differs from the value it | ||
184 | * should have. If K8 extenstions are wanted, update the K8 SYSCFG MSR also. | ||
185 | * see AMD publication no. 24593, chapter 7.8.1, page 233 for more information | ||
186 | * \param msr MSR address of the MTTR which should be checked and updated | ||
187 | * \param changed pointer which indicates whether the MTRR needed to be changed | ||
188 | * \param msrwords pointer to the MSR values which the MSR should have | ||
189 | */ | ||
190 | static void set_fixed_range(int msr, int * changed, unsigned int * msrwords) | ||
191 | { | ||
192 | unsigned lo, hi; | ||
193 | |||
194 | rdmsr(msr, lo, hi); | ||
195 | |||
196 | if (lo != msrwords[0] || hi != msrwords[1]) { | ||
197 | if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD && | ||
198 | boot_cpu_data.x86 == 15 && | ||
199 | ((msrwords[0] | msrwords[1]) & K8_MTRR_RDMEM_WRMEM_MASK)) | ||
200 | k8_enable_fixed_iorrs(); | ||
201 | mtrr_wrmsr(msr, msrwords[0], msrwords[1]); | ||
202 | *changed = TRUE; | ||
203 | } | ||
204 | } | ||
205 | |||
150 | int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg) | 206 | int generic_get_free_region(unsigned long base, unsigned long size, int replace_reg) |
151 | /* [SUMMARY] Get a free MTRR. | 207 | /* [SUMMARY] Get a free MTRR. |
152 | <base> The starting (base) address of the region. | 208 | <base> The starting (base) address of the region. |
@@ -196,36 +252,21 @@ static void generic_get_mtrr(unsigned int reg, unsigned long *base, | |||
196 | *type = base_lo & 0xff; | 252 | *type = base_lo & 0xff; |
197 | } | 253 | } |
198 | 254 | ||
255 | /** | ||
256 | * Checks and updates the fixed-range MTRRs if they differ from the saved set | ||
257 | * \param frs pointer to fixed-range MTRR values, saved by get_fixed_ranges() | ||
258 | */ | ||
199 | static int set_fixed_ranges(mtrr_type * frs) | 259 | static int set_fixed_ranges(mtrr_type * frs) |
200 | { | 260 | { |
201 | unsigned int *p = (unsigned int *) frs; | 261 | unsigned long long *saved = (unsigned long long *) frs; |
202 | int changed = FALSE; | 262 | int changed = FALSE; |
203 | int i; | 263 | int block=-1, range; |
204 | unsigned int lo, hi; | ||
205 | 264 | ||
206 | rdmsr(MTRRfix64K_00000_MSR, lo, hi); | 265 | while (fixed_range_blocks[++block].ranges) |
207 | if (p[0] != lo || p[1] != hi) { | 266 | for (range=0; range < fixed_range_blocks[block].ranges; range++) |
208 | mtrr_wrmsr(MTRRfix64K_00000_MSR, p[0], p[1]); | 267 | set_fixed_range(fixed_range_blocks[block].base_msr + range, |
209 | changed = TRUE; | 268 | &changed, (unsigned int *) saved++); |
210 | } | ||
211 | 269 | ||
212 | for (i = 0; i < 2; i++) { | ||
213 | rdmsr(MTRRfix16K_80000_MSR + i, lo, hi); | ||
214 | if (p[2 + i * 2] != lo || p[3 + i * 2] != hi) { | ||
215 | mtrr_wrmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2], | ||
216 | p[3 + i * 2]); | ||
217 | changed = TRUE; | ||
218 | } | ||
219 | } | ||
220 | |||
221 | for (i = 0; i < 8; i++) { | ||
222 | rdmsr(MTRRfix4K_C0000_MSR + i, lo, hi); | ||
223 | if (p[6 + i * 2] != lo || p[7 + i * 2] != hi) { | ||
224 | mtrr_wrmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], | ||
225 | p[7 + i * 2]); | ||
226 | changed = TRUE; | ||
227 | } | ||
228 | } | ||
229 | return changed; | 270 | return changed; |
230 | } | 271 | } |
231 | 272 | ||
@@ -428,7 +469,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size, unsigned i | |||
428 | } | 469 | } |
429 | } | 470 | } |
430 | 471 | ||
431 | if (base + size < 0x100) { | 472 | if (base < 0x100) { |
432 | printk(KERN_WARNING "mtrr: cannot set region below 1 MiB (0x%lx000,0x%lx000)\n", | 473 | printk(KERN_WARNING "mtrr: cannot set region below 1 MiB (0x%lx000,0x%lx000)\n", |
433 | base, size); | 474 | base, size); |
434 | return -EINVAL; | 475 | return -EINVAL; |
diff --git a/arch/i386/kernel/cpu/mtrr/main.c b/arch/i386/kernel/cpu/mtrr/main.c index 0acfb6a5a220..02a2f39e5e0a 100644 --- a/arch/i386/kernel/cpu/mtrr/main.c +++ b/arch/i386/kernel/cpu/mtrr/main.c | |||
@@ -729,6 +729,17 @@ void mtrr_ap_init(void) | |||
729 | local_irq_restore(flags); | 729 | local_irq_restore(flags); |
730 | } | 730 | } |
731 | 731 | ||
732 | /** | ||
733 | * Save current fixed-range MTRR state of the BSP | ||
734 | */ | ||
735 | void mtrr_save_state(void) | ||
736 | { | ||
737 | if (smp_processor_id() == 0) | ||
738 | mtrr_save_fixed_ranges(NULL); | ||
739 | else | ||
740 | smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1); | ||
741 | } | ||
742 | |||
732 | static int __init mtrr_init_finialize(void) | 743 | static int __init mtrr_init_finialize(void) |
733 | { | 744 | { |
734 | if (!mtrr_if) | 745 | if (!mtrr_if) |
diff --git a/arch/i386/kernel/cpu/nexgen.c b/arch/i386/kernel/cpu/nexgen.c index 8bf23cc80c63..961fbe1a748f 100644 --- a/arch/i386/kernel/cpu/nexgen.c +++ b/arch/i386/kernel/cpu/nexgen.c | |||
@@ -58,13 +58,3 @@ int __init nexgen_init_cpu(void) | |||
58 | cpu_devs[X86_VENDOR_NEXGEN] = &nexgen_cpu_dev; | 58 | cpu_devs[X86_VENDOR_NEXGEN] = &nexgen_cpu_dev; |
59 | return 0; | 59 | return 0; |
60 | } | 60 | } |
61 | |||
62 | //early_arch_initcall(nexgen_init_cpu); | ||
63 | |||
64 | static int __init nexgen_exit_cpu(void) | ||
65 | { | ||
66 | cpu_devs[X86_VENDOR_NEXGEN] = NULL; | ||
67 | return 0; | ||
68 | } | ||
69 | |||
70 | late_initcall(nexgen_exit_cpu); | ||
diff --git a/arch/i386/kernel/cpu/perfctr-watchdog.c b/arch/i386/kernel/cpu/perfctr-watchdog.c new file mode 100644 index 000000000000..2b04c8f1db62 --- /dev/null +++ b/arch/i386/kernel/cpu/perfctr-watchdog.c | |||
@@ -0,0 +1,658 @@ | |||
1 | /* local apic based NMI watchdog for various CPUs. | ||
2 | This file also handles reservation of performance counters for coordination | ||
3 | with other users (like oprofile). | ||
4 | |||
5 | Note that these events normally don't tick when the CPU idles. This means | ||
6 | the frequency varies with CPU load. | ||
7 | |||
8 | Original code for K7/P6 written by Keith Owens */ | ||
9 | |||
10 | #include <linux/percpu.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/kernel.h> | ||
13 | #include <linux/bitops.h> | ||
14 | #include <linux/smp.h> | ||
15 | #include <linux/nmi.h> | ||
16 | #include <asm/apic.h> | ||
17 | #include <asm/intel_arch_perfmon.h> | ||
18 | |||
19 | struct nmi_watchdog_ctlblk { | ||
20 | unsigned int cccr_msr; | ||
21 | unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ | ||
22 | unsigned int evntsel_msr; /* the MSR to select the events to handle */ | ||
23 | }; | ||
24 | |||
25 | /* Interface defining a CPU specific perfctr watchdog */ | ||
26 | struct wd_ops { | ||
27 | int (*reserve)(void); | ||
28 | void (*unreserve)(void); | ||
29 | int (*setup)(unsigned nmi_hz); | ||
30 | void (*rearm)(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz); | ||
31 | void (*stop)(void *); | ||
32 | unsigned perfctr; | ||
33 | unsigned evntsel; | ||
34 | u64 checkbit; | ||
35 | }; | ||
36 | |||
37 | static struct wd_ops *wd_ops; | ||
38 | |||
39 | /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's | ||
40 | * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) | ||
41 | */ | ||
42 | #define NMI_MAX_COUNTER_BITS 66 | ||
43 | |||
44 | /* perfctr_nmi_owner tracks the ownership of the perfctr registers: | ||
45 | * evtsel_nmi_owner tracks the ownership of the event selection | ||
46 | * - different performance counters/ event selection may be reserved for | ||
47 | * different subsystems this reservation system just tries to coordinate | ||
48 | * things a little | ||
49 | */ | ||
50 | static DECLARE_BITMAP(perfctr_nmi_owner, NMI_MAX_COUNTER_BITS); | ||
51 | static DECLARE_BITMAP(evntsel_nmi_owner, NMI_MAX_COUNTER_BITS); | ||
52 | |||
53 | static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); | ||
54 | |||
55 | /* converts an msr to an appropriate reservation bit */ | ||
56 | static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) | ||
57 | { | ||
58 | return wd_ops ? msr - wd_ops->perfctr : 0; | ||
59 | } | ||
60 | |||
61 | /* converts an msr to an appropriate reservation bit */ | ||
62 | /* returns the bit offset of the event selection register */ | ||
63 | static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) | ||
64 | { | ||
65 | return wd_ops ? msr - wd_ops->evntsel : 0; | ||
66 | } | ||
67 | |||
68 | /* checks for a bit availability (hack for oprofile) */ | ||
69 | int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) | ||
70 | { | ||
71 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
72 | |||
73 | return (!test_bit(counter, perfctr_nmi_owner)); | ||
74 | } | ||
75 | |||
76 | /* checks the an msr for availability */ | ||
77 | int avail_to_resrv_perfctr_nmi(unsigned int msr) | ||
78 | { | ||
79 | unsigned int counter; | ||
80 | |||
81 | counter = nmi_perfctr_msr_to_bit(msr); | ||
82 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
83 | |||
84 | return (!test_bit(counter, perfctr_nmi_owner)); | ||
85 | } | ||
86 | |||
87 | int reserve_perfctr_nmi(unsigned int msr) | ||
88 | { | ||
89 | unsigned int counter; | ||
90 | |||
91 | counter = nmi_perfctr_msr_to_bit(msr); | ||
92 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
93 | |||
94 | if (!test_and_set_bit(counter, perfctr_nmi_owner)) | ||
95 | return 1; | ||
96 | return 0; | ||
97 | } | ||
98 | |||
99 | void release_perfctr_nmi(unsigned int msr) | ||
100 | { | ||
101 | unsigned int counter; | ||
102 | |||
103 | counter = nmi_perfctr_msr_to_bit(msr); | ||
104 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
105 | |||
106 | clear_bit(counter, perfctr_nmi_owner); | ||
107 | } | ||
108 | |||
109 | int reserve_evntsel_nmi(unsigned int msr) | ||
110 | { | ||
111 | unsigned int counter; | ||
112 | |||
113 | counter = nmi_evntsel_msr_to_bit(msr); | ||
114 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
115 | |||
116 | if (!test_and_set_bit(counter, evntsel_nmi_owner)) | ||
117 | return 1; | ||
118 | return 0; | ||
119 | } | ||
120 | |||
121 | void release_evntsel_nmi(unsigned int msr) | ||
122 | { | ||
123 | unsigned int counter; | ||
124 | |||
125 | counter = nmi_evntsel_msr_to_bit(msr); | ||
126 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
127 | |||
128 | clear_bit(counter, evntsel_nmi_owner); | ||
129 | } | ||
130 | |||
131 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); | ||
132 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); | ||
133 | EXPORT_SYMBOL(reserve_perfctr_nmi); | ||
134 | EXPORT_SYMBOL(release_perfctr_nmi); | ||
135 | EXPORT_SYMBOL(reserve_evntsel_nmi); | ||
136 | EXPORT_SYMBOL(release_evntsel_nmi); | ||
137 | |||
138 | void disable_lapic_nmi_watchdog(void) | ||
139 | { | ||
140 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); | ||
141 | |||
142 | if (atomic_read(&nmi_active) <= 0) | ||
143 | return; | ||
144 | |||
145 | on_each_cpu(wd_ops->stop, NULL, 0, 1); | ||
146 | wd_ops->unreserve(); | ||
147 | |||
148 | BUG_ON(atomic_read(&nmi_active) != 0); | ||
149 | } | ||
150 | |||
151 | void enable_lapic_nmi_watchdog(void) | ||
152 | { | ||
153 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); | ||
154 | |||
155 | /* are we already enabled */ | ||
156 | if (atomic_read(&nmi_active) != 0) | ||
157 | return; | ||
158 | |||
159 | /* are we lapic aware */ | ||
160 | if (!wd_ops) | ||
161 | return; | ||
162 | if (!wd_ops->reserve()) { | ||
163 | printk(KERN_ERR "NMI watchdog: cannot reserve perfctrs\n"); | ||
164 | return; | ||
165 | } | ||
166 | |||
167 | on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); | ||
168 | touch_nmi_watchdog(); | ||
169 | } | ||
170 | |||
171 | /* | ||
172 | * Activate the NMI watchdog via the local APIC. | ||
173 | */ | ||
174 | |||
175 | static unsigned int adjust_for_32bit_ctr(unsigned int hz) | ||
176 | { | ||
177 | u64 counter_val; | ||
178 | unsigned int retval = hz; | ||
179 | |||
180 | /* | ||
181 | * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter | ||
182 | * are writable, with higher bits sign extending from bit 31. | ||
183 | * So, we can only program the counter with 31 bit values and | ||
184 | * 32nd bit should be 1, for 33.. to be 1. | ||
185 | * Find the appropriate nmi_hz | ||
186 | */ | ||
187 | counter_val = (u64)cpu_khz * 1000; | ||
188 | do_div(counter_val, retval); | ||
189 | if (counter_val > 0x7fffffffULL) { | ||
190 | u64 count = (u64)cpu_khz * 1000; | ||
191 | do_div(count, 0x7fffffffUL); | ||
192 | retval = count + 1; | ||
193 | } | ||
194 | return retval; | ||
195 | } | ||
196 | |||
197 | static void | ||
198 | write_watchdog_counter(unsigned int perfctr_msr, const char *descr, unsigned nmi_hz) | ||
199 | { | ||
200 | u64 count = (u64)cpu_khz * 1000; | ||
201 | |||
202 | do_div(count, nmi_hz); | ||
203 | if(descr) | ||
204 | Dprintk("setting %s to -0x%08Lx\n", descr, count); | ||
205 | wrmsrl(perfctr_msr, 0 - count); | ||
206 | } | ||
207 | |||
208 | static void write_watchdog_counter32(unsigned int perfctr_msr, | ||
209 | const char *descr, unsigned nmi_hz) | ||
210 | { | ||
211 | u64 count = (u64)cpu_khz * 1000; | ||
212 | |||
213 | do_div(count, nmi_hz); | ||
214 | if(descr) | ||
215 | Dprintk("setting %s to -0x%08Lx\n", descr, count); | ||
216 | wrmsr(perfctr_msr, (u32)(-count), 0); | ||
217 | } | ||
218 | |||
219 | /* AMD K7/K8/Family10h/Family11h support. AMD keeps this interface | ||
220 | nicely stable so there is not much variety */ | ||
221 | |||
222 | #define K7_EVNTSEL_ENABLE (1 << 22) | ||
223 | #define K7_EVNTSEL_INT (1 << 20) | ||
224 | #define K7_EVNTSEL_OS (1 << 17) | ||
225 | #define K7_EVNTSEL_USR (1 << 16) | ||
226 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 | ||
227 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING | ||
228 | |||
229 | static int setup_k7_watchdog(unsigned nmi_hz) | ||
230 | { | ||
231 | unsigned int perfctr_msr, evntsel_msr; | ||
232 | unsigned int evntsel; | ||
233 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
234 | |||
235 | perfctr_msr = MSR_K7_PERFCTR0; | ||
236 | evntsel_msr = MSR_K7_EVNTSEL0; | ||
237 | |||
238 | wrmsrl(perfctr_msr, 0UL); | ||
239 | |||
240 | evntsel = K7_EVNTSEL_INT | ||
241 | | K7_EVNTSEL_OS | ||
242 | | K7_EVNTSEL_USR | ||
243 | | K7_NMI_EVENT; | ||
244 | |||
245 | /* setup the timer */ | ||
246 | wrmsr(evntsel_msr, evntsel, 0); | ||
247 | write_watchdog_counter(perfctr_msr, "K7_PERFCTR0",nmi_hz); | ||
248 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
249 | evntsel |= K7_EVNTSEL_ENABLE; | ||
250 | wrmsr(evntsel_msr, evntsel, 0); | ||
251 | |||
252 | wd->perfctr_msr = perfctr_msr; | ||
253 | wd->evntsel_msr = evntsel_msr; | ||
254 | wd->cccr_msr = 0; //unused | ||
255 | return 1; | ||
256 | } | ||
257 | |||
258 | static void single_msr_stop_watchdog(void *arg) | ||
259 | { | ||
260 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
261 | |||
262 | wrmsr(wd->evntsel_msr, 0, 0); | ||
263 | } | ||
264 | |||
265 | static int single_msr_reserve(void) | ||
266 | { | ||
267 | if (!reserve_perfctr_nmi(wd_ops->perfctr)) | ||
268 | return 0; | ||
269 | |||
270 | if (!reserve_evntsel_nmi(wd_ops->evntsel)) { | ||
271 | release_perfctr_nmi(wd_ops->perfctr); | ||
272 | return 0; | ||
273 | } | ||
274 | return 1; | ||
275 | } | ||
276 | |||
277 | static void single_msr_unreserve(void) | ||
278 | { | ||
279 | release_evntsel_nmi(wd_ops->perfctr); | ||
280 | release_perfctr_nmi(wd_ops->evntsel); | ||
281 | } | ||
282 | |||
283 | static void single_msr_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) | ||
284 | { | ||
285 | /* start the cycle over again */ | ||
286 | write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); | ||
287 | } | ||
288 | |||
289 | static struct wd_ops k7_wd_ops = { | ||
290 | .reserve = single_msr_reserve, | ||
291 | .unreserve = single_msr_unreserve, | ||
292 | .setup = setup_k7_watchdog, | ||
293 | .rearm = single_msr_rearm, | ||
294 | .stop = single_msr_stop_watchdog, | ||
295 | .perfctr = MSR_K7_PERFCTR0, | ||
296 | .evntsel = MSR_K7_EVNTSEL0, | ||
297 | .checkbit = 1ULL<<63, | ||
298 | }; | ||
299 | |||
300 | /* Intel Model 6 (PPro+,P2,P3,P-M,Core1) */ | ||
301 | |||
302 | #define P6_EVNTSEL0_ENABLE (1 << 22) | ||
303 | #define P6_EVNTSEL_INT (1 << 20) | ||
304 | #define P6_EVNTSEL_OS (1 << 17) | ||
305 | #define P6_EVNTSEL_USR (1 << 16) | ||
306 | #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 | ||
307 | #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED | ||
308 | |||
309 | static int setup_p6_watchdog(unsigned nmi_hz) | ||
310 | { | ||
311 | unsigned int perfctr_msr, evntsel_msr; | ||
312 | unsigned int evntsel; | ||
313 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
314 | |||
315 | perfctr_msr = MSR_P6_PERFCTR0; | ||
316 | evntsel_msr = MSR_P6_EVNTSEL0; | ||
317 | |||
318 | wrmsrl(perfctr_msr, 0UL); | ||
319 | |||
320 | evntsel = P6_EVNTSEL_INT | ||
321 | | P6_EVNTSEL_OS | ||
322 | | P6_EVNTSEL_USR | ||
323 | | P6_NMI_EVENT; | ||
324 | |||
325 | /* setup the timer */ | ||
326 | wrmsr(evntsel_msr, evntsel, 0); | ||
327 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | ||
328 | write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0",nmi_hz); | ||
329 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
330 | evntsel |= P6_EVNTSEL0_ENABLE; | ||
331 | wrmsr(evntsel_msr, evntsel, 0); | ||
332 | |||
333 | wd->perfctr_msr = perfctr_msr; | ||
334 | wd->evntsel_msr = evntsel_msr; | ||
335 | wd->cccr_msr = 0; //unused | ||
336 | return 1; | ||
337 | } | ||
338 | |||
339 | static void p6_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) | ||
340 | { | ||
341 | /* P6 based Pentium M need to re-unmask | ||
342 | * the apic vector but it doesn't hurt | ||
343 | * other P6 variant. | ||
344 | * ArchPerfom/Core Duo also needs this */ | ||
345 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
346 | /* P6/ARCH_PERFMON has 32 bit counter write */ | ||
347 | write_watchdog_counter32(wd->perfctr_msr, NULL,nmi_hz); | ||
348 | } | ||
349 | |||
350 | static struct wd_ops p6_wd_ops = { | ||
351 | .reserve = single_msr_reserve, | ||
352 | .unreserve = single_msr_unreserve, | ||
353 | .setup = setup_p6_watchdog, | ||
354 | .rearm = p6_rearm, | ||
355 | .stop = single_msr_stop_watchdog, | ||
356 | .perfctr = MSR_P6_PERFCTR0, | ||
357 | .evntsel = MSR_P6_EVNTSEL0, | ||
358 | .checkbit = 1ULL<<39, | ||
359 | }; | ||
360 | |||
361 | /* Intel P4 performance counters. By far the most complicated of all. */ | ||
362 | |||
363 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) | ||
364 | #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) | ||
365 | #define P4_ESCR_OS (1<<3) | ||
366 | #define P4_ESCR_USR (1<<2) | ||
367 | #define P4_CCCR_OVF_PMI0 (1<<26) | ||
368 | #define P4_CCCR_OVF_PMI1 (1<<27) | ||
369 | #define P4_CCCR_THRESHOLD(N) ((N)<<20) | ||
370 | #define P4_CCCR_COMPLEMENT (1<<19) | ||
371 | #define P4_CCCR_COMPARE (1<<18) | ||
372 | #define P4_CCCR_REQUIRED (3<<16) | ||
373 | #define P4_CCCR_ESCR_SELECT(N) ((N)<<13) | ||
374 | #define P4_CCCR_ENABLE (1<<12) | ||
375 | #define P4_CCCR_OVF (1<<31) | ||
376 | |||
377 | /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | ||
378 | CRU_ESCR0 (with any non-null event selector) through a complemented | ||
379 | max threshold. [IA32-Vol3, Section 14.9.9] */ | ||
380 | |||
381 | static int setup_p4_watchdog(unsigned nmi_hz) | ||
382 | { | ||
383 | unsigned int perfctr_msr, evntsel_msr, cccr_msr; | ||
384 | unsigned int evntsel, cccr_val; | ||
385 | unsigned int misc_enable, dummy; | ||
386 | unsigned int ht_num; | ||
387 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
388 | |||
389 | rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); | ||
390 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) | ||
391 | return 0; | ||
392 | |||
393 | #ifdef CONFIG_SMP | ||
394 | /* detect which hyperthread we are on */ | ||
395 | if (smp_num_siblings == 2) { | ||
396 | unsigned int ebx, apicid; | ||
397 | |||
398 | ebx = cpuid_ebx(1); | ||
399 | apicid = (ebx >> 24) & 0xff; | ||
400 | ht_num = apicid & 1; | ||
401 | } else | ||
402 | #endif | ||
403 | ht_num = 0; | ||
404 | |||
405 | /* performance counters are shared resources | ||
406 | * assign each hyperthread its own set | ||
407 | * (re-use the ESCR0 register, seems safe | ||
408 | * and keeps the cccr_val the same) | ||
409 | */ | ||
410 | if (!ht_num) { | ||
411 | /* logical cpu 0 */ | ||
412 | perfctr_msr = MSR_P4_IQ_PERFCTR0; | ||
413 | evntsel_msr = MSR_P4_CRU_ESCR0; | ||
414 | cccr_msr = MSR_P4_IQ_CCCR0; | ||
415 | cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); | ||
416 | } else { | ||
417 | /* logical cpu 1 */ | ||
418 | perfctr_msr = MSR_P4_IQ_PERFCTR1; | ||
419 | evntsel_msr = MSR_P4_CRU_ESCR0; | ||
420 | cccr_msr = MSR_P4_IQ_CCCR1; | ||
421 | cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); | ||
422 | } | ||
423 | |||
424 | evntsel = P4_ESCR_EVENT_SELECT(0x3F) | ||
425 | | P4_ESCR_OS | ||
426 | | P4_ESCR_USR; | ||
427 | |||
428 | cccr_val |= P4_CCCR_THRESHOLD(15) | ||
429 | | P4_CCCR_COMPLEMENT | ||
430 | | P4_CCCR_COMPARE | ||
431 | | P4_CCCR_REQUIRED; | ||
432 | |||
433 | wrmsr(evntsel_msr, evntsel, 0); | ||
434 | wrmsr(cccr_msr, cccr_val, 0); | ||
435 | write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0", nmi_hz); | ||
436 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
437 | cccr_val |= P4_CCCR_ENABLE; | ||
438 | wrmsr(cccr_msr, cccr_val, 0); | ||
439 | wd->perfctr_msr = perfctr_msr; | ||
440 | wd->evntsel_msr = evntsel_msr; | ||
441 | wd->cccr_msr = cccr_msr; | ||
442 | return 1; | ||
443 | } | ||
444 | |||
445 | static void stop_p4_watchdog(void *arg) | ||
446 | { | ||
447 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
448 | wrmsr(wd->cccr_msr, 0, 0); | ||
449 | wrmsr(wd->evntsel_msr, 0, 0); | ||
450 | } | ||
451 | |||
452 | static int p4_reserve(void) | ||
453 | { | ||
454 | if (!reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR0)) | ||
455 | return 0; | ||
456 | #ifdef CONFIG_SMP | ||
457 | if (smp_num_siblings > 1 && !reserve_perfctr_nmi(MSR_P4_IQ_PERFCTR1)) | ||
458 | goto fail1; | ||
459 | #endif | ||
460 | if (!reserve_evntsel_nmi(MSR_P4_CRU_ESCR0)) | ||
461 | goto fail2; | ||
462 | /* RED-PEN why is ESCR1 not reserved here? */ | ||
463 | return 1; | ||
464 | fail2: | ||
465 | #ifdef CONFIG_SMP | ||
466 | if (smp_num_siblings > 1) | ||
467 | release_perfctr_nmi(MSR_P4_IQ_PERFCTR1); | ||
468 | fail1: | ||
469 | #endif | ||
470 | release_perfctr_nmi(MSR_P4_IQ_PERFCTR0); | ||
471 | return 0; | ||
472 | } | ||
473 | |||
474 | static void p4_unreserve(void) | ||
475 | { | ||
476 | #ifdef CONFIG_SMP | ||
477 | if (smp_num_siblings > 1) | ||
478 | release_evntsel_nmi(MSR_P4_IQ_PERFCTR1); | ||
479 | #endif | ||
480 | release_evntsel_nmi(MSR_P4_IQ_PERFCTR0); | ||
481 | release_perfctr_nmi(MSR_P4_CRU_ESCR0); | ||
482 | } | ||
483 | |||
484 | static void p4_rearm(struct nmi_watchdog_ctlblk *wd, unsigned nmi_hz) | ||
485 | { | ||
486 | unsigned dummy; | ||
487 | /* | ||
488 | * P4 quirks: | ||
489 | * - An overflown perfctr will assert its interrupt | ||
490 | * until the OVF flag in its CCCR is cleared. | ||
491 | * - LVTPC is masked on interrupt and must be | ||
492 | * unmasked by the LVTPC handler. | ||
493 | */ | ||
494 | rdmsrl(wd->cccr_msr, dummy); | ||
495 | dummy &= ~P4_CCCR_OVF; | ||
496 | wrmsrl(wd->cccr_msr, dummy); | ||
497 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
498 | /* start the cycle over again */ | ||
499 | write_watchdog_counter(wd->perfctr_msr, NULL, nmi_hz); | ||
500 | } | ||
501 | |||
502 | static struct wd_ops p4_wd_ops = { | ||
503 | .reserve = p4_reserve, | ||
504 | .unreserve = p4_unreserve, | ||
505 | .setup = setup_p4_watchdog, | ||
506 | .rearm = p4_rearm, | ||
507 | .stop = stop_p4_watchdog, | ||
508 | /* RED-PEN this is wrong for the other sibling */ | ||
509 | .perfctr = MSR_P4_BPU_PERFCTR0, | ||
510 | .evntsel = MSR_P4_BSU_ESCR0, | ||
511 | .checkbit = 1ULL<<39, | ||
512 | }; | ||
513 | |||
514 | /* Watchdog using the Intel architected PerfMon. Used for Core2 and hopefully | ||
515 | all future Intel CPUs. */ | ||
516 | |||
517 | #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL | ||
518 | #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK | ||
519 | |||
520 | static int setup_intel_arch_watchdog(unsigned nmi_hz) | ||
521 | { | ||
522 | unsigned int ebx; | ||
523 | union cpuid10_eax eax; | ||
524 | unsigned int unused; | ||
525 | unsigned int perfctr_msr, evntsel_msr; | ||
526 | unsigned int evntsel; | ||
527 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
528 | |||
529 | /* | ||
530 | * Check whether the Architectural PerfMon supports | ||
531 | * Unhalted Core Cycles Event or not. | ||
532 | * NOTE: Corresponding bit = 0 in ebx indicates event present. | ||
533 | */ | ||
534 | cpuid(10, &(eax.full), &ebx, &unused, &unused); | ||
535 | if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || | ||
536 | (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | ||
537 | return 0; | ||
538 | |||
539 | perfctr_msr = MSR_ARCH_PERFMON_PERFCTR1; | ||
540 | evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL1; | ||
541 | |||
542 | wrmsrl(perfctr_msr, 0UL); | ||
543 | |||
544 | evntsel = ARCH_PERFMON_EVENTSEL_INT | ||
545 | | ARCH_PERFMON_EVENTSEL_OS | ||
546 | | ARCH_PERFMON_EVENTSEL_USR | ||
547 | | ARCH_PERFMON_NMI_EVENT_SEL | ||
548 | | ARCH_PERFMON_NMI_EVENT_UMASK; | ||
549 | |||
550 | /* setup the timer */ | ||
551 | wrmsr(evntsel_msr, evntsel, 0); | ||
552 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | ||
553 | write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0", nmi_hz); | ||
554 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
555 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
556 | wrmsr(evntsel_msr, evntsel, 0); | ||
557 | |||
558 | wd->perfctr_msr = perfctr_msr; | ||
559 | wd->evntsel_msr = evntsel_msr; | ||
560 | wd->cccr_msr = 0; //unused | ||
561 | wd_ops->checkbit = 1ULL << (eax.split.bit_width - 1); | ||
562 | return 1; | ||
563 | } | ||
564 | |||
565 | static struct wd_ops intel_arch_wd_ops = { | ||
566 | .reserve = single_msr_reserve, | ||
567 | .unreserve = single_msr_unreserve, | ||
568 | .setup = setup_intel_arch_watchdog, | ||
569 | .rearm = p6_rearm, | ||
570 | .stop = single_msr_stop_watchdog, | ||
571 | .perfctr = MSR_ARCH_PERFMON_PERFCTR0, | ||
572 | .evntsel = MSR_ARCH_PERFMON_EVENTSEL0, | ||
573 | }; | ||
574 | |||
575 | static void probe_nmi_watchdog(void) | ||
576 | { | ||
577 | switch (boot_cpu_data.x86_vendor) { | ||
578 | case X86_VENDOR_AMD: | ||
579 | if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && | ||
580 | boot_cpu_data.x86 != 16) | ||
581 | return; | ||
582 | wd_ops = &k7_wd_ops; | ||
583 | break; | ||
584 | case X86_VENDOR_INTEL: | ||
585 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
586 | wd_ops = &intel_arch_wd_ops; | ||
587 | break; | ||
588 | } | ||
589 | switch (boot_cpu_data.x86) { | ||
590 | case 6: | ||
591 | if (boot_cpu_data.x86_model > 0xd) | ||
592 | return; | ||
593 | |||
594 | wd_ops = &p6_wd_ops; | ||
595 | break; | ||
596 | case 15: | ||
597 | if (boot_cpu_data.x86_model > 0x4) | ||
598 | return; | ||
599 | |||
600 | wd_ops = &p4_wd_ops; | ||
601 | break; | ||
602 | default: | ||
603 | return; | ||
604 | } | ||
605 | break; | ||
606 | } | ||
607 | } | ||
608 | |||
609 | /* Interface to nmi.c */ | ||
610 | |||
611 | int lapic_watchdog_init(unsigned nmi_hz) | ||
612 | { | ||
613 | if (!wd_ops) { | ||
614 | probe_nmi_watchdog(); | ||
615 | if (!wd_ops) | ||
616 | return -1; | ||
617 | } | ||
618 | |||
619 | if (!(wd_ops->setup(nmi_hz))) { | ||
620 | printk(KERN_ERR "Cannot setup NMI watchdog on CPU %d\n", | ||
621 | raw_smp_processor_id()); | ||
622 | return -1; | ||
623 | } | ||
624 | |||
625 | return 0; | ||
626 | } | ||
627 | |||
628 | void lapic_watchdog_stop(void) | ||
629 | { | ||
630 | if (wd_ops) | ||
631 | wd_ops->stop(NULL); | ||
632 | } | ||
633 | |||
634 | unsigned lapic_adjust_nmi_hz(unsigned hz) | ||
635 | { | ||
636 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
637 | if (wd->perfctr_msr == MSR_P6_PERFCTR0 || | ||
638 | wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR1) | ||
639 | hz = adjust_for_32bit_ctr(hz); | ||
640 | return hz; | ||
641 | } | ||
642 | |||
643 | int lapic_wd_event(unsigned nmi_hz) | ||
644 | { | ||
645 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
646 | u64 ctr; | ||
647 | rdmsrl(wd->perfctr_msr, ctr); | ||
648 | if (ctr & wd_ops->checkbit) { /* perfctr still running? */ | ||
649 | return 0; | ||
650 | } | ||
651 | wd_ops->rearm(wd, nmi_hz); | ||
652 | return 1; | ||
653 | } | ||
654 | |||
655 | int lapic_watchdog_ok(void) | ||
656 | { | ||
657 | return wd_ops != NULL; | ||
658 | } | ||
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index 47e3ebbfb28d..89d91e6cc972 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c | |||
@@ -72,8 +72,7 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
72 | "stc", | 72 | "stc", |
73 | "100mhzsteps", | 73 | "100mhzsteps", |
74 | "hwpstate", | 74 | "hwpstate", |
75 | NULL, | 75 | "", /* constant_tsc - moved to flags */ |
76 | NULL, /* constant_tsc - moved to flags */ | ||
77 | /* nothing */ | 76 | /* nothing */ |
78 | }; | 77 | }; |
79 | struct cpuinfo_x86 *c = v; | 78 | struct cpuinfo_x86 *c = v; |
diff --git a/arch/i386/kernel/cpu/rise.c b/arch/i386/kernel/cpu/rise.c index 9317f7414989..50076f22e90f 100644 --- a/arch/i386/kernel/cpu/rise.c +++ b/arch/i386/kernel/cpu/rise.c | |||
@@ -50,12 +50,3 @@ int __init rise_init_cpu(void) | |||
50 | return 0; | 50 | return 0; |
51 | } | 51 | } |
52 | 52 | ||
53 | //early_arch_initcall(rise_init_cpu); | ||
54 | |||
55 | static int __init rise_exit_cpu(void) | ||
56 | { | ||
57 | cpu_devs[X86_VENDOR_RISE] = NULL; | ||
58 | return 0; | ||
59 | } | ||
60 | |||
61 | late_initcall(rise_exit_cpu); | ||
diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c index 5678d46863c6..6471a5a13202 100644 --- a/arch/i386/kernel/cpu/transmeta.c +++ b/arch/i386/kernel/cpu/transmeta.c | |||
@@ -112,13 +112,3 @@ int __init transmeta_init_cpu(void) | |||
112 | cpu_devs[X86_VENDOR_TRANSMETA] = &transmeta_cpu_dev; | 112 | cpu_devs[X86_VENDOR_TRANSMETA] = &transmeta_cpu_dev; |
113 | return 0; | 113 | return 0; |
114 | } | 114 | } |
115 | |||
116 | //early_arch_initcall(transmeta_init_cpu); | ||
117 | |||
118 | static int __init transmeta_exit_cpu(void) | ||
119 | { | ||
120 | cpu_devs[X86_VENDOR_TRANSMETA] = NULL; | ||
121 | return 0; | ||
122 | } | ||
123 | |||
124 | late_initcall(transmeta_exit_cpu); | ||
diff --git a/arch/i386/kernel/cpu/umc.c b/arch/i386/kernel/cpu/umc.c index 1bf3f87e9c5b..a7a4e75bdcd7 100644 --- a/arch/i386/kernel/cpu/umc.c +++ b/arch/i386/kernel/cpu/umc.c | |||
@@ -24,13 +24,3 @@ int __init umc_init_cpu(void) | |||
24 | cpu_devs[X86_VENDOR_UMC] = &umc_cpu_dev; | 24 | cpu_devs[X86_VENDOR_UMC] = &umc_cpu_dev; |
25 | return 0; | 25 | return 0; |
26 | } | 26 | } |
27 | |||
28 | //early_arch_initcall(umc_init_cpu); | ||
29 | |||
30 | static int __init umc_exit_cpu(void) | ||
31 | { | ||
32 | cpu_devs[X86_VENDOR_UMC] = NULL; | ||
33 | return 0; | ||
34 | } | ||
35 | |||
36 | late_initcall(umc_exit_cpu); | ||
diff --git a/arch/i386/kernel/doublefault.c b/arch/i386/kernel/doublefault.c index b4d14c2eb345..265c5597efb0 100644 --- a/arch/i386/kernel/doublefault.c +++ b/arch/i386/kernel/doublefault.c | |||
@@ -33,7 +33,7 @@ static void doublefault_fn(void) | |||
33 | printk("double fault, tss at %08lx\n", tss); | 33 | printk("double fault, tss at %08lx\n", tss); |
34 | 34 | ||
35 | if (ptr_ok(tss)) { | 35 | if (ptr_ok(tss)) { |
36 | struct tss_struct *t = (struct tss_struct *)tss; | 36 | struct i386_hw_tss *t = (struct i386_hw_tss *)tss; |
37 | 37 | ||
38 | printk("eip = %08lx, esp = %08lx\n", t->eip, t->esp); | 38 | printk("eip = %08lx, esp = %08lx\n", t->eip, t->esp); |
39 | 39 | ||
@@ -49,18 +49,21 @@ static void doublefault_fn(void) | |||
49 | } | 49 | } |
50 | 50 | ||
51 | struct tss_struct doublefault_tss __cacheline_aligned = { | 51 | struct tss_struct doublefault_tss __cacheline_aligned = { |
52 | .esp0 = STACK_START, | 52 | .x86_tss = { |
53 | .ss0 = __KERNEL_DS, | 53 | .esp0 = STACK_START, |
54 | .ldt = 0, | 54 | .ss0 = __KERNEL_DS, |
55 | .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, | 55 | .ldt = 0, |
56 | .io_bitmap_base = INVALID_IO_BITMAP_OFFSET, | ||
56 | 57 | ||
57 | .eip = (unsigned long) doublefault_fn, | 58 | .eip = (unsigned long) doublefault_fn, |
58 | .eflags = X86_EFLAGS_SF | 0x2, /* 0x2 bit is always set */ | 59 | /* 0x2 bit is always set */ |
59 | .esp = STACK_START, | 60 | .eflags = X86_EFLAGS_SF | 0x2, |
60 | .es = __USER_DS, | 61 | .esp = STACK_START, |
61 | .cs = __KERNEL_CS, | 62 | .es = __USER_DS, |
62 | .ss = __KERNEL_DS, | 63 | .cs = __KERNEL_CS, |
63 | .ds = __USER_DS, | 64 | .ss = __KERNEL_DS, |
65 | .ds = __USER_DS, | ||
64 | 66 | ||
65 | .__cr3 = __pa(swapper_pg_dir) | 67 | .__cr3 = __pa(swapper_pg_dir) |
68 | } | ||
66 | }; | 69 | }; |
diff --git a/arch/i386/kernel/e820.c b/arch/i386/kernel/e820.c index 70f39560846a..9645bb51f76a 100644 --- a/arch/i386/kernel/e820.c +++ b/arch/i386/kernel/e820.c | |||
@@ -161,26 +161,27 @@ static struct resource standard_io_resources[] = { { | |||
161 | 161 | ||
162 | static int __init romsignature(const unsigned char *rom) | 162 | static int __init romsignature(const unsigned char *rom) |
163 | { | 163 | { |
164 | const unsigned short * const ptr = (const unsigned short *)rom; | ||
164 | unsigned short sig; | 165 | unsigned short sig; |
165 | 166 | ||
166 | return probe_kernel_address((const unsigned short *)rom, sig) == 0 && | 167 | return probe_kernel_address(ptr, sig) == 0 && sig == ROMSIGNATURE; |
167 | sig == ROMSIGNATURE; | ||
168 | } | 168 | } |
169 | 169 | ||
170 | static int __init romchecksum(unsigned char *rom, unsigned long length) | 170 | static int __init romchecksum(const unsigned char *rom, unsigned long length) |
171 | { | 171 | { |
172 | unsigned char sum; | 172 | unsigned char sum, c; |
173 | 173 | ||
174 | for (sum = 0; length; length--) | 174 | for (sum = 0; length && probe_kernel_address(rom++, c) == 0; length--) |
175 | sum += *rom++; | 175 | sum += c; |
176 | return sum == 0; | 176 | return !length && !sum; |
177 | } | 177 | } |
178 | 178 | ||
179 | static void __init probe_roms(void) | 179 | static void __init probe_roms(void) |
180 | { | 180 | { |
181 | const unsigned char *rom; | ||
181 | unsigned long start, length, upper; | 182 | unsigned long start, length, upper; |
182 | unsigned char *rom; | 183 | unsigned char c; |
183 | int i; | 184 | int i; |
184 | 185 | ||
185 | /* video rom */ | 186 | /* video rom */ |
186 | upper = adapter_rom_resources[0].start; | 187 | upper = adapter_rom_resources[0].start; |
@@ -191,8 +192,11 @@ static void __init probe_roms(void) | |||
191 | 192 | ||
192 | video_rom_resource.start = start; | 193 | video_rom_resource.start = start; |
193 | 194 | ||
195 | if (probe_kernel_address(rom + 2, c) != 0) | ||
196 | continue; | ||
197 | |||
194 | /* 0 < length <= 0x7f * 512, historically */ | 198 | /* 0 < length <= 0x7f * 512, historically */ |
195 | length = rom[2] * 512; | 199 | length = c * 512; |
196 | 200 | ||
197 | /* if checksum okay, trust length byte */ | 201 | /* if checksum okay, trust length byte */ |
198 | if (length && romchecksum(rom, length)) | 202 | if (length && romchecksum(rom, length)) |
@@ -226,8 +230,11 @@ static void __init probe_roms(void) | |||
226 | if (!romsignature(rom)) | 230 | if (!romsignature(rom)) |
227 | continue; | 231 | continue; |
228 | 232 | ||
233 | if (probe_kernel_address(rom + 2, c) != 0) | ||
234 | continue; | ||
235 | |||
229 | /* 0 < length <= 0x7f * 512, historically */ | 236 | /* 0 < length <= 0x7f * 512, historically */ |
230 | length = rom[2] * 512; | 237 | length = c * 512; |
231 | 238 | ||
232 | /* but accept any length that fits if checksum okay */ | 239 | /* but accept any length that fits if checksum okay */ |
233 | if (!length || start + length > upper || !romchecksum(rom, length)) | 240 | if (!length || start + length > upper || !romchecksum(rom, length)) |
@@ -386,10 +393,8 @@ int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) | |||
386 | ____________________33__ | 393 | ____________________33__ |
387 | ______________________4_ | 394 | ______________________4_ |
388 | */ | 395 | */ |
389 | printk("sanitize start\n"); | ||
390 | /* if there's only one memory region, don't bother */ | 396 | /* if there's only one memory region, don't bother */ |
391 | if (*pnr_map < 2) { | 397 | if (*pnr_map < 2) { |
392 | printk("sanitize bail 0\n"); | ||
393 | return -1; | 398 | return -1; |
394 | } | 399 | } |
395 | 400 | ||
@@ -398,7 +403,6 @@ int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) | |||
398 | /* bail out if we find any unreasonable addresses in bios map */ | 403 | /* bail out if we find any unreasonable addresses in bios map */ |
399 | for (i=0; i<old_nr; i++) | 404 | for (i=0; i<old_nr; i++) |
400 | if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) { | 405 | if (biosmap[i].addr + biosmap[i].size < biosmap[i].addr) { |
401 | printk("sanitize bail 1\n"); | ||
402 | return -1; | 406 | return -1; |
403 | } | 407 | } |
404 | 408 | ||
@@ -494,7 +498,6 @@ int __init sanitize_e820_map(struct e820entry * biosmap, char * pnr_map) | |||
494 | memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); | 498 | memcpy(biosmap, new_bios, new_nr*sizeof(struct e820entry)); |
495 | *pnr_map = new_nr; | 499 | *pnr_map = new_nr; |
496 | 500 | ||
497 | printk("sanitize end\n"); | ||
498 | return 0; | 501 | return 0; |
499 | } | 502 | } |
500 | 503 | ||
@@ -525,7 +528,6 @@ int __init copy_e820_map(struct e820entry * biosmap, int nr_map) | |||
525 | unsigned long long size = biosmap->size; | 528 | unsigned long long size = biosmap->size; |
526 | unsigned long long end = start + size; | 529 | unsigned long long end = start + size; |
527 | unsigned long type = biosmap->type; | 530 | unsigned long type = biosmap->type; |
528 | printk("copy_e820_map() start: %016Lx size: %016Lx end: %016Lx type: %ld\n", start, size, end, type); | ||
529 | 531 | ||
530 | /* Overflow in 64 bits? Ignore the memory map. */ | 532 | /* Overflow in 64 bits? Ignore the memory map. */ |
531 | if (start > end) | 533 | if (start > end) |
@@ -536,17 +538,11 @@ int __init copy_e820_map(struct e820entry * biosmap, int nr_map) | |||
536 | * Not right. Fix it up. | 538 | * Not right. Fix it up. |
537 | */ | 539 | */ |
538 | if (type == E820_RAM) { | 540 | if (type == E820_RAM) { |
539 | printk("copy_e820_map() type is E820_RAM\n"); | ||
540 | if (start < 0x100000ULL && end > 0xA0000ULL) { | 541 | if (start < 0x100000ULL && end > 0xA0000ULL) { |
541 | printk("copy_e820_map() lies in range...\n"); | 542 | if (start < 0xA0000ULL) |
542 | if (start < 0xA0000ULL) { | ||
543 | printk("copy_e820_map() start < 0xA0000ULL\n"); | ||
544 | add_memory_region(start, 0xA0000ULL-start, type); | 543 | add_memory_region(start, 0xA0000ULL-start, type); |
545 | } | 544 | if (end <= 0x100000ULL) |
546 | if (end <= 0x100000ULL) { | ||
547 | printk("copy_e820_map() end <= 0x100000ULL\n"); | ||
548 | continue; | 545 | continue; |
549 | } | ||
550 | start = 0x100000ULL; | 546 | start = 0x100000ULL; |
551 | size = end - start; | 547 | size = end - start; |
552 | } | 548 | } |
@@ -818,6 +814,26 @@ void __init limit_regions(unsigned long long size) | |||
818 | print_memory_map("limit_regions endfunc"); | 814 | print_memory_map("limit_regions endfunc"); |
819 | } | 815 | } |
820 | 816 | ||
817 | /* | ||
818 | * This function checks if any part of the range <start,end> is mapped | ||
819 | * with type. | ||
820 | */ | ||
821 | int | ||
822 | e820_any_mapped(u64 start, u64 end, unsigned type) | ||
823 | { | ||
824 | int i; | ||
825 | for (i = 0; i < e820.nr_map; i++) { | ||
826 | const struct e820entry *ei = &e820.map[i]; | ||
827 | if (type && ei->type != type) | ||
828 | continue; | ||
829 | if (ei->addr >= end || ei->addr + ei->size <= start) | ||
830 | continue; | ||
831 | return 1; | ||
832 | } | ||
833 | return 0; | ||
834 | } | ||
835 | EXPORT_SYMBOL_GPL(e820_any_mapped); | ||
836 | |||
821 | /* | 837 | /* |
822 | * This function checks if the entire range <start,end> is mapped with type. | 838 | * This function checks if the entire range <start,end> is mapped with type. |
823 | * | 839 | * |
diff --git a/arch/i386/kernel/efi.c b/arch/i386/kernel/efi.c index 8f9c624ace6f..dd9e7faafa7c 100644 --- a/arch/i386/kernel/efi.c +++ b/arch/i386/kernel/efi.c | |||
@@ -69,13 +69,11 @@ static void efi_call_phys_prelog(void) __acquires(efi_rt_lock) | |||
69 | { | 69 | { |
70 | unsigned long cr4; | 70 | unsigned long cr4; |
71 | unsigned long temp; | 71 | unsigned long temp; |
72 | struct Xgt_desc_struct *cpu_gdt_descr; | 72 | struct Xgt_desc_struct gdt_descr; |
73 | 73 | ||
74 | spin_lock(&efi_rt_lock); | 74 | spin_lock(&efi_rt_lock); |
75 | local_irq_save(efi_rt_eflags); | 75 | local_irq_save(efi_rt_eflags); |
76 | 76 | ||
77 | cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0); | ||
78 | |||
79 | /* | 77 | /* |
80 | * If I don't have PSE, I should just duplicate two entries in page | 78 | * If I don't have PSE, I should just duplicate two entries in page |
81 | * directory. If I have PSE, I just need to duplicate one entry in | 79 | * directory. If I have PSE, I just need to duplicate one entry in |
@@ -105,17 +103,19 @@ static void efi_call_phys_prelog(void) __acquires(efi_rt_lock) | |||
105 | */ | 103 | */ |
106 | local_flush_tlb(); | 104 | local_flush_tlb(); |
107 | 105 | ||
108 | cpu_gdt_descr->address = __pa(cpu_gdt_descr->address); | 106 | gdt_descr.address = __pa(get_cpu_gdt_table(0)); |
109 | load_gdt(cpu_gdt_descr); | 107 | gdt_descr.size = GDT_SIZE - 1; |
108 | load_gdt(&gdt_descr); | ||
110 | } | 109 | } |
111 | 110 | ||
112 | static void efi_call_phys_epilog(void) __releases(efi_rt_lock) | 111 | static void efi_call_phys_epilog(void) __releases(efi_rt_lock) |
113 | { | 112 | { |
114 | unsigned long cr4; | 113 | unsigned long cr4; |
115 | struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, 0); | 114 | struct Xgt_desc_struct gdt_descr; |
116 | 115 | ||
117 | cpu_gdt_descr->address = (unsigned long)__va(cpu_gdt_descr->address); | 116 | gdt_descr.address = (unsigned long)get_cpu_gdt_table(0); |
118 | load_gdt(cpu_gdt_descr); | 117 | gdt_descr.size = GDT_SIZE - 1; |
118 | load_gdt(&gdt_descr); | ||
119 | 119 | ||
120 | cr4 = read_cr4(); | 120 | cr4 = read_cr4(); |
121 | 121 | ||
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 18bddcb8e9e8..b1f16ee65e4d 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S | |||
@@ -15,7 +15,7 @@ | |||
15 | * I changed all the .align's to 4 (16 byte alignment), as that's faster | 15 | * I changed all the .align's to 4 (16 byte alignment), as that's faster |
16 | * on a 486. | 16 | * on a 486. |
17 | * | 17 | * |
18 | * Stack layout in 'ret_from_system_call': | 18 | * Stack layout in 'syscall_exit': |
19 | * ptrace needs to have all regs on the stack. | 19 | * ptrace needs to have all regs on the stack. |
20 | * if the order here is changed, it needs to be | 20 | * if the order here is changed, it needs to be |
21 | * updated in fork.c:copy_process, signal.c:do_signal, | 21 | * updated in fork.c:copy_process, signal.c:do_signal, |
@@ -132,7 +132,7 @@ VM_MASK = 0x00020000 | |||
132 | movl $(__USER_DS), %edx; \ | 132 | movl $(__USER_DS), %edx; \ |
133 | movl %edx, %ds; \ | 133 | movl %edx, %ds; \ |
134 | movl %edx, %es; \ | 134 | movl %edx, %es; \ |
135 | movl $(__KERNEL_PDA), %edx; \ | 135 | movl $(__KERNEL_PERCPU), %edx; \ |
136 | movl %edx, %fs | 136 | movl %edx, %fs |
137 | 137 | ||
138 | #define RESTORE_INT_REGS \ | 138 | #define RESTORE_INT_REGS \ |
@@ -305,16 +305,12 @@ sysenter_past_esp: | |||
305 | pushl $(__USER_CS) | 305 | pushl $(__USER_CS) |
306 | CFI_ADJUST_CFA_OFFSET 4 | 306 | CFI_ADJUST_CFA_OFFSET 4 |
307 | /*CFI_REL_OFFSET cs, 0*/ | 307 | /*CFI_REL_OFFSET cs, 0*/ |
308 | #ifndef CONFIG_COMPAT_VDSO | ||
309 | /* | 308 | /* |
310 | * Push current_thread_info()->sysenter_return to the stack. | 309 | * Push current_thread_info()->sysenter_return to the stack. |
311 | * A tiny bit of offset fixup is necessary - 4*4 means the 4 words | 310 | * A tiny bit of offset fixup is necessary - 4*4 means the 4 words |
312 | * pushed above; +8 corresponds to copy_thread's esp0 setting. | 311 | * pushed above; +8 corresponds to copy_thread's esp0 setting. |
313 | */ | 312 | */ |
314 | pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) | 313 | pushl (TI_sysenter_return-THREAD_SIZE+8+4*4)(%esp) |
315 | #else | ||
316 | pushl $SYSENTER_RETURN | ||
317 | #endif | ||
318 | CFI_ADJUST_CFA_OFFSET 4 | 314 | CFI_ADJUST_CFA_OFFSET 4 |
319 | CFI_REL_OFFSET eip, 0 | 315 | CFI_REL_OFFSET eip, 0 |
320 | 316 | ||
@@ -342,7 +338,7 @@ sysenter_past_esp: | |||
342 | jae syscall_badsys | 338 | jae syscall_badsys |
343 | call *sys_call_table(,%eax,4) | 339 | call *sys_call_table(,%eax,4) |
344 | movl %eax,PT_EAX(%esp) | 340 | movl %eax,PT_EAX(%esp) |
345 | DISABLE_INTERRUPTS(CLBR_ECX|CLBR_EDX) | 341 | DISABLE_INTERRUPTS(CLBR_ANY) |
346 | TRACE_IRQS_OFF | 342 | TRACE_IRQS_OFF |
347 | movl TI_flags(%ebp), %ecx | 343 | movl TI_flags(%ebp), %ecx |
348 | testw $_TIF_ALLWORK_MASK, %cx | 344 | testw $_TIF_ALLWORK_MASK, %cx |
@@ -560,9 +556,7 @@ END(syscall_badsys) | |||
560 | 556 | ||
561 | #define FIXUP_ESPFIX_STACK \ | 557 | #define FIXUP_ESPFIX_STACK \ |
562 | /* since we are on a wrong stack, we cant make it a C code :( */ \ | 558 | /* since we are on a wrong stack, we cant make it a C code :( */ \ |
563 | movl %fs:PDA_cpu, %ebx; \ | 559 | PER_CPU(gdt_page, %ebx); \ |
564 | PER_CPU(cpu_gdt_descr, %ebx); \ | ||
565 | movl GDS_address(%ebx), %ebx; \ | ||
566 | GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ | 560 | GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ |
567 | addl %esp, %eax; \ | 561 | addl %esp, %eax; \ |
568 | pushl $__KERNEL_DS; \ | 562 | pushl $__KERNEL_DS; \ |
@@ -635,7 +629,7 @@ ENTRY(name) \ | |||
635 | SAVE_ALL; \ | 629 | SAVE_ALL; \ |
636 | TRACE_IRQS_OFF \ | 630 | TRACE_IRQS_OFF \ |
637 | movl %esp,%eax; \ | 631 | movl %esp,%eax; \ |
638 | call smp_/**/name; \ | 632 | call smp_##name; \ |
639 | jmp ret_from_intr; \ | 633 | jmp ret_from_intr; \ |
640 | CFI_ENDPROC; \ | 634 | CFI_ENDPROC; \ |
641 | ENDPROC(name) | 635 | ENDPROC(name) |
@@ -643,11 +637,6 @@ ENDPROC(name) | |||
643 | /* The include is where all of the SMP etc. interrupts come from */ | 637 | /* The include is where all of the SMP etc. interrupts come from */ |
644 | #include "entry_arch.h" | 638 | #include "entry_arch.h" |
645 | 639 | ||
646 | /* This alternate entry is needed because we hijack the apic LVTT */ | ||
647 | #if defined(CONFIG_VMI) && defined(CONFIG_X86_LOCAL_APIC) | ||
648 | BUILD_INTERRUPT(apic_vmi_timer_interrupt,LOCAL_TIMER_VECTOR) | ||
649 | #endif | ||
650 | |||
651 | KPROBE_ENTRY(page_fault) | 640 | KPROBE_ENTRY(page_fault) |
652 | RING0_EC_FRAME | 641 | RING0_EC_FRAME |
653 | pushl $do_page_fault | 642 | pushl $do_page_fault |
@@ -686,7 +675,7 @@ error_code: | |||
686 | pushl %fs | 675 | pushl %fs |
687 | CFI_ADJUST_CFA_OFFSET 4 | 676 | CFI_ADJUST_CFA_OFFSET 4 |
688 | /*CFI_REL_OFFSET fs, 0*/ | 677 | /*CFI_REL_OFFSET fs, 0*/ |
689 | movl $(__KERNEL_PDA), %ecx | 678 | movl $(__KERNEL_PERCPU), %ecx |
690 | movl %ecx, %fs | 679 | movl %ecx, %fs |
691 | UNWIND_ESPFIX_STACK | 680 | UNWIND_ESPFIX_STACK |
692 | popl %ecx | 681 | popl %ecx |
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index 3fa7f9389afe..9b10af65faaa 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S | |||
@@ -34,17 +34,32 @@ | |||
34 | 34 | ||
35 | /* | 35 | /* |
36 | * This is how much memory *in addition to the memory covered up to | 36 | * This is how much memory *in addition to the memory covered up to |
37 | * and including _end* we need mapped initially. We need one bit for | 37 | * and including _end* we need mapped initially. |
38 | * each possible page, but only in low memory, which means | 38 | * We need: |
39 | * 2^32/4096/8 = 128K worst case (4G/4G split.) | 39 | * - one bit for each possible page, but only in low memory, which means |
40 | * 2^32/4096/8 = 128K worst case (4G/4G split.) | ||
41 | * - enough space to map all low memory, which means | ||
42 | * (2^32/4096) / 1024 pages (worst case, non PAE) | ||
43 | * (2^32/4096) / 512 + 4 pages (worst case for PAE) | ||
44 | * - a few pages for allocator use before the kernel pagetable has | ||
45 | * been set up | ||
40 | * | 46 | * |
41 | * Modulo rounding, each megabyte assigned here requires a kilobyte of | 47 | * Modulo rounding, each megabyte assigned here requires a kilobyte of |
42 | * memory, which is currently unreclaimed. | 48 | * memory, which is currently unreclaimed. |
43 | * | 49 | * |
44 | * This should be a multiple of a page. | 50 | * This should be a multiple of a page. |
45 | */ | 51 | */ |
46 | #define INIT_MAP_BEYOND_END (128*1024) | 52 | LOW_PAGES = 1<<(32-PAGE_SHIFT_asm) |
47 | 53 | ||
54 | #if PTRS_PER_PMD > 1 | ||
55 | PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PMD) + PTRS_PER_PGD | ||
56 | #else | ||
57 | PAGE_TABLE_SIZE = (LOW_PAGES / PTRS_PER_PGD) | ||
58 | #endif | ||
59 | BOOTBITMAP_SIZE = LOW_PAGES / 8 | ||
60 | ALLOCATOR_SLOP = 4 | ||
61 | |||
62 | INIT_MAP_BEYOND_END = BOOTBITMAP_SIZE + (PAGE_TABLE_SIZE + ALLOCATOR_SLOP)*PAGE_SIZE_asm | ||
48 | 63 | ||
49 | /* | 64 | /* |
50 | * 32-bit kernel entrypoint; only used by the boot CPU. On entry, | 65 | * 32-bit kernel entrypoint; only used by the boot CPU. On entry, |
@@ -147,8 +162,7 @@ page_pde_offset = (__PAGE_OFFSET >> 20); | |||
147 | /* | 162 | /* |
148 | * Non-boot CPU entry point; entered from trampoline.S | 163 | * Non-boot CPU entry point; entered from trampoline.S |
149 | * We can't lgdt here, because lgdt itself uses a data segment, but | 164 | * We can't lgdt here, because lgdt itself uses a data segment, but |
150 | * we know the trampoline has already loaded the boot_gdt_table GDT | 165 | * we know the trampoline has already loaded the boot_gdt for us. |
151 | * for us. | ||
152 | * | 166 | * |
153 | * If cpu hotplug is not supported then this code can go in init section | 167 | * If cpu hotplug is not supported then this code can go in init section |
154 | * which will be freed later | 168 | * which will be freed later |
@@ -318,12 +332,12 @@ is386: movl $2,%ecx # set MP | |||
318 | movl %eax,%cr0 | 332 | movl %eax,%cr0 |
319 | 333 | ||
320 | call check_x87 | 334 | call check_x87 |
321 | call setup_pda | ||
322 | lgdt early_gdt_descr | 335 | lgdt early_gdt_descr |
323 | lidt idt_descr | 336 | lidt idt_descr |
324 | ljmp $(__KERNEL_CS),$1f | 337 | ljmp $(__KERNEL_CS),$1f |
325 | 1: movl $(__KERNEL_DS),%eax # reload all the segment registers | 338 | 1: movl $(__KERNEL_DS),%eax # reload all the segment registers |
326 | movl %eax,%ss # after changing gdt. | 339 | movl %eax,%ss # after changing gdt. |
340 | movl %eax,%fs # gets reset once there's real percpu | ||
327 | 341 | ||
328 | movl $(__USER_DS),%eax # DS/ES contains default USER segment | 342 | movl $(__USER_DS),%eax # DS/ES contains default USER segment |
329 | movl %eax,%ds | 343 | movl %eax,%ds |
@@ -333,16 +347,17 @@ is386: movl $2,%ecx # set MP | |||
333 | movl %eax,%gs | 347 | movl %eax,%gs |
334 | lldt %ax | 348 | lldt %ax |
335 | 349 | ||
336 | movl $(__KERNEL_PDA),%eax | ||
337 | mov %eax,%fs | ||
338 | |||
339 | cld # gcc2 wants the direction flag cleared at all times | 350 | cld # gcc2 wants the direction flag cleared at all times |
340 | pushl $0 # fake return address for unwinder | 351 | pushl $0 # fake return address for unwinder |
341 | #ifdef CONFIG_SMP | 352 | #ifdef CONFIG_SMP |
342 | movb ready, %cl | 353 | movb ready, %cl |
343 | movb $1, ready | 354 | movb $1, ready |
344 | cmpb $0,%cl # the first CPU calls start_kernel | 355 | cmpb $0,%cl # the first CPU calls start_kernel |
345 | jne initialize_secondary # all other CPUs call initialize_secondary | 356 | je 1f |
357 | movl $(__KERNEL_PERCPU), %eax | ||
358 | movl %eax,%fs # set this cpu's percpu | ||
359 | jmp initialize_secondary # all other CPUs call initialize_secondary | ||
360 | 1: | ||
346 | #endif /* CONFIG_SMP */ | 361 | #endif /* CONFIG_SMP */ |
347 | jmp start_kernel | 362 | jmp start_kernel |
348 | 363 | ||
@@ -366,23 +381,6 @@ check_x87: | |||
366 | ret | 381 | ret |
367 | 382 | ||
368 | /* | 383 | /* |
369 | * Point the GDT at this CPU's PDA. On boot this will be | ||
370 | * cpu_gdt_table and boot_pda; for secondary CPUs, these will be | ||
371 | * that CPU's GDT and PDA. | ||
372 | */ | ||
373 | ENTRY(setup_pda) | ||
374 | /* get the PDA pointer */ | ||
375 | movl start_pda, %eax | ||
376 | |||
377 | /* slot the PDA address into the GDT */ | ||
378 | mov early_gdt_descr+2, %ecx | ||
379 | mov %ax, (__KERNEL_PDA+0+2)(%ecx) /* base & 0x0000ffff */ | ||
380 | shr $16, %eax | ||
381 | mov %al, (__KERNEL_PDA+4+0)(%ecx) /* base & 0x00ff0000 */ | ||
382 | mov %ah, (__KERNEL_PDA+4+3)(%ecx) /* base & 0xff000000 */ | ||
383 | ret | ||
384 | |||
385 | /* | ||
386 | * setup_idt | 384 | * setup_idt |
387 | * | 385 | * |
388 | * sets up a idt with 256 entries pointing to | 386 | * sets up a idt with 256 entries pointing to |
@@ -554,9 +552,6 @@ ENTRY(empty_zero_page) | |||
554 | * This starts the data section. | 552 | * This starts the data section. |
555 | */ | 553 | */ |
556 | .data | 554 | .data |
557 | ENTRY(start_pda) | ||
558 | .long boot_pda | ||
559 | |||
560 | ENTRY(stack_start) | 555 | ENTRY(stack_start) |
561 | .long init_thread_union+THREAD_SIZE | 556 | .long init_thread_union+THREAD_SIZE |
562 | .long __BOOT_DS | 557 | .long __BOOT_DS |
@@ -588,7 +583,7 @@ fault_msg: | |||
588 | .word 0 # 32 bit align gdt_desc.address | 583 | .word 0 # 32 bit align gdt_desc.address |
589 | boot_gdt_descr: | 584 | boot_gdt_descr: |
590 | .word __BOOT_DS+7 | 585 | .word __BOOT_DS+7 |
591 | .long boot_gdt_table - __PAGE_OFFSET | 586 | .long boot_gdt - __PAGE_OFFSET |
592 | 587 | ||
593 | .word 0 # 32-bit align idt_desc.address | 588 | .word 0 # 32-bit align idt_desc.address |
594 | idt_descr: | 589 | idt_descr: |
@@ -599,67 +594,14 @@ idt_descr: | |||
599 | .word 0 # 32 bit align gdt_desc.address | 594 | .word 0 # 32 bit align gdt_desc.address |
600 | ENTRY(early_gdt_descr) | 595 | ENTRY(early_gdt_descr) |
601 | .word GDT_ENTRIES*8-1 | 596 | .word GDT_ENTRIES*8-1 |
602 | .long cpu_gdt_table | 597 | .long per_cpu__gdt_page /* Overwritten for secondary CPUs */ |
603 | 598 | ||
604 | /* | 599 | /* |
605 | * The boot_gdt_table must mirror the equivalent in setup.S and is | 600 | * The boot_gdt must mirror the equivalent in setup.S and is |
606 | * used only for booting. | 601 | * used only for booting. |
607 | */ | 602 | */ |
608 | .align L1_CACHE_BYTES | 603 | .align L1_CACHE_BYTES |
609 | ENTRY(boot_gdt_table) | 604 | ENTRY(boot_gdt) |
610 | .fill GDT_ENTRY_BOOT_CS,8,0 | 605 | .fill GDT_ENTRY_BOOT_CS,8,0 |
611 | .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ | 606 | .quad 0x00cf9a000000ffff /* kernel 4GB code at 0x00000000 */ |
612 | .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ | 607 | .quad 0x00cf92000000ffff /* kernel 4GB data at 0x00000000 */ |
613 | |||
614 | /* | ||
615 | * The Global Descriptor Table contains 28 quadwords, per-CPU. | ||
616 | */ | ||
617 | .align L1_CACHE_BYTES | ||
618 | ENTRY(cpu_gdt_table) | ||
619 | .quad 0x0000000000000000 /* NULL descriptor */ | ||
620 | .quad 0x0000000000000000 /* 0x0b reserved */ | ||
621 | .quad 0x0000000000000000 /* 0x13 reserved */ | ||
622 | .quad 0x0000000000000000 /* 0x1b reserved */ | ||
623 | .quad 0x0000000000000000 /* 0x20 unused */ | ||
624 | .quad 0x0000000000000000 /* 0x28 unused */ | ||
625 | .quad 0x0000000000000000 /* 0x33 TLS entry 1 */ | ||
626 | .quad 0x0000000000000000 /* 0x3b TLS entry 2 */ | ||
627 | .quad 0x0000000000000000 /* 0x43 TLS entry 3 */ | ||
628 | .quad 0x0000000000000000 /* 0x4b reserved */ | ||
629 | .quad 0x0000000000000000 /* 0x53 reserved */ | ||
630 | .quad 0x0000000000000000 /* 0x5b reserved */ | ||
631 | |||
632 | .quad 0x00cf9a000000ffff /* 0x60 kernel 4GB code at 0x00000000 */ | ||
633 | .quad 0x00cf92000000ffff /* 0x68 kernel 4GB data at 0x00000000 */ | ||
634 | .quad 0x00cffa000000ffff /* 0x73 user 4GB code at 0x00000000 */ | ||
635 | .quad 0x00cff2000000ffff /* 0x7b user 4GB data at 0x00000000 */ | ||
636 | |||
637 | .quad 0x0000000000000000 /* 0x80 TSS descriptor */ | ||
638 | .quad 0x0000000000000000 /* 0x88 LDT descriptor */ | ||
639 | |||
640 | /* | ||
641 | * Segments used for calling PnP BIOS have byte granularity. | ||
642 | * They code segments and data segments have fixed 64k limits, | ||
643 | * the transfer segment sizes are set at run time. | ||
644 | */ | ||
645 | .quad 0x00409a000000ffff /* 0x90 32-bit code */ | ||
646 | .quad 0x00009a000000ffff /* 0x98 16-bit code */ | ||
647 | .quad 0x000092000000ffff /* 0xa0 16-bit data */ | ||
648 | .quad 0x0000920000000000 /* 0xa8 16-bit data */ | ||
649 | .quad 0x0000920000000000 /* 0xb0 16-bit data */ | ||
650 | |||
651 | /* | ||
652 | * The APM segments have byte granularity and their bases | ||
653 | * are set at run time. All have 64k limits. | ||
654 | */ | ||
655 | .quad 0x00409a000000ffff /* 0xb8 APM CS code */ | ||
656 | .quad 0x00009a000000ffff /* 0xc0 APM CS 16 code (16 bit) */ | ||
657 | .quad 0x004092000000ffff /* 0xc8 APM DS data */ | ||
658 | |||
659 | .quad 0x00c0920000000000 /* 0xd0 - ESPFIX SS */ | ||
660 | .quad 0x00cf92000000ffff /* 0xd8 - PDA */ | ||
661 | .quad 0x0000000000000000 /* 0xe0 - unused */ | ||
662 | .quad 0x0000000000000000 /* 0xe8 - unused */ | ||
663 | .quad 0x0000000000000000 /* 0xf0 - unused */ | ||
664 | .quad 0x0000000000000000 /* 0xf8 - GDT entry 31: double-fault TSS */ | ||
665 | |||
diff --git a/arch/i386/kernel/i386_ksyms.c b/arch/i386/kernel/i386_ksyms.c index 4afe26e86260..e3d4b73bfdb0 100644 --- a/arch/i386/kernel/i386_ksyms.c +++ b/arch/i386/kernel/i386_ksyms.c | |||
@@ -28,5 +28,3 @@ EXPORT_SYMBOL(__read_lock_failed); | |||
28 | #endif | 28 | #endif |
29 | 29 | ||
30 | EXPORT_SYMBOL(csum_partial); | 30 | EXPORT_SYMBOL(csum_partial); |
31 | |||
32 | EXPORT_SYMBOL(_proxy_pda); | ||
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 89d85d244926..1b623cda3a64 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c | |||
@@ -35,6 +35,7 @@ | |||
35 | #include <linux/msi.h> | 35 | #include <linux/msi.h> |
36 | #include <linux/htirq.h> | 36 | #include <linux/htirq.h> |
37 | #include <linux/freezer.h> | 37 | #include <linux/freezer.h> |
38 | #include <linux/kthread.h> | ||
38 | 39 | ||
39 | #include <asm/io.h> | 40 | #include <asm/io.h> |
40 | #include <asm/smp.h> | 41 | #include <asm/smp.h> |
@@ -661,8 +662,6 @@ static int balanced_irq(void *unused) | |||
661 | unsigned long prev_balance_time = jiffies; | 662 | unsigned long prev_balance_time = jiffies; |
662 | long time_remaining = balanced_irq_interval; | 663 | long time_remaining = balanced_irq_interval; |
663 | 664 | ||
664 | daemonize("kirqd"); | ||
665 | |||
666 | /* push everything to CPU 0 to give us a starting point. */ | 665 | /* push everything to CPU 0 to give us a starting point. */ |
667 | for (i = 0 ; i < NR_IRQS ; i++) { | 666 | for (i = 0 ; i < NR_IRQS ; i++) { |
668 | irq_desc[i].pending_mask = cpumask_of_cpu(0); | 667 | irq_desc[i].pending_mask = cpumask_of_cpu(0); |
@@ -722,10 +721,9 @@ static int __init balanced_irq_init(void) | |||
722 | } | 721 | } |
723 | 722 | ||
724 | printk(KERN_INFO "Starting balanced_irq\n"); | 723 | printk(KERN_INFO "Starting balanced_irq\n"); |
725 | if (kernel_thread(balanced_irq, NULL, CLONE_KERNEL) >= 0) | 724 | if (!IS_ERR(kthread_run(balanced_irq, NULL, "kirqd"))) |
726 | return 0; | 725 | return 0; |
727 | else | 726 | printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); |
728 | printk(KERN_ERR "balanced_irq_init: failed to spawn balanced_irq"); | ||
729 | failed: | 727 | failed: |
730 | for_each_possible_cpu(i) { | 728 | for_each_possible_cpu(i) { |
731 | kfree(irq_cpu_data[i].irq_delta); | 729 | kfree(irq_cpu_data[i].irq_delta); |
@@ -1403,10 +1401,6 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in | |||
1403 | enable_8259A_irq(0); | 1401 | enable_8259A_irq(0); |
1404 | } | 1402 | } |
1405 | 1403 | ||
1406 | static inline void UNEXPECTED_IO_APIC(void) | ||
1407 | { | ||
1408 | } | ||
1409 | |||
1410 | void __init print_IO_APIC(void) | 1404 | void __init print_IO_APIC(void) |
1411 | { | 1405 | { |
1412 | int apic, i; | 1406 | int apic, i; |
@@ -1446,34 +1440,12 @@ void __init print_IO_APIC(void) | |||
1446 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); | 1440 | printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); |
1447 | printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); | 1441 | printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); |
1448 | printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); | 1442 | printk(KERN_DEBUG "....... : LTS : %X\n", reg_00.bits.LTS); |
1449 | if (reg_00.bits.ID >= get_physical_broadcast()) | ||
1450 | UNEXPECTED_IO_APIC(); | ||
1451 | if (reg_00.bits.__reserved_1 || reg_00.bits.__reserved_2) | ||
1452 | UNEXPECTED_IO_APIC(); | ||
1453 | 1443 | ||
1454 | printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw); | 1444 | printk(KERN_DEBUG ".... register #01: %08X\n", reg_01.raw); |
1455 | printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); | 1445 | printk(KERN_DEBUG "....... : max redirection entries: %04X\n", reg_01.bits.entries); |
1456 | if ( (reg_01.bits.entries != 0x0f) && /* older (Neptune) boards */ | ||
1457 | (reg_01.bits.entries != 0x17) && /* typical ISA+PCI boards */ | ||
1458 | (reg_01.bits.entries != 0x1b) && /* Compaq Proliant boards */ | ||
1459 | (reg_01.bits.entries != 0x1f) && /* dual Xeon boards */ | ||
1460 | (reg_01.bits.entries != 0x22) && /* bigger Xeon boards */ | ||
1461 | (reg_01.bits.entries != 0x2E) && | ||
1462 | (reg_01.bits.entries != 0x3F) | ||
1463 | ) | ||
1464 | UNEXPECTED_IO_APIC(); | ||
1465 | 1446 | ||
1466 | printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); | 1447 | printk(KERN_DEBUG "....... : PRQ implemented: %X\n", reg_01.bits.PRQ); |
1467 | printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); | 1448 | printk(KERN_DEBUG "....... : IO APIC version: %04X\n", reg_01.bits.version); |
1468 | if ( (reg_01.bits.version != 0x01) && /* 82489DX IO-APICs */ | ||
1469 | (reg_01.bits.version != 0x10) && /* oldest IO-APICs */ | ||
1470 | (reg_01.bits.version != 0x11) && /* Pentium/Pro IO-APICs */ | ||
1471 | (reg_01.bits.version != 0x13) && /* Xeon IO-APICs */ | ||
1472 | (reg_01.bits.version != 0x20) /* Intel P64H (82806 AA) */ | ||
1473 | ) | ||
1474 | UNEXPECTED_IO_APIC(); | ||
1475 | if (reg_01.bits.__reserved_1 || reg_01.bits.__reserved_2) | ||
1476 | UNEXPECTED_IO_APIC(); | ||
1477 | 1449 | ||
1478 | /* | 1450 | /* |
1479 | * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, | 1451 | * Some Intel chipsets with IO APIC VERSION of 0x1? don't have reg_02, |
@@ -1483,8 +1455,6 @@ void __init print_IO_APIC(void) | |||
1483 | if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { | 1455 | if (reg_01.bits.version >= 0x10 && reg_02.raw != reg_01.raw) { |
1484 | printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); | 1456 | printk(KERN_DEBUG ".... register #02: %08X\n", reg_02.raw); |
1485 | printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); | 1457 | printk(KERN_DEBUG "....... : arbitration: %02X\n", reg_02.bits.arbitration); |
1486 | if (reg_02.bits.__reserved_1 || reg_02.bits.__reserved_2) | ||
1487 | UNEXPECTED_IO_APIC(); | ||
1488 | } | 1458 | } |
1489 | 1459 | ||
1490 | /* | 1460 | /* |
@@ -1496,8 +1466,6 @@ void __init print_IO_APIC(void) | |||
1496 | reg_03.raw != reg_01.raw) { | 1466 | reg_03.raw != reg_01.raw) { |
1497 | printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); | 1467 | printk(KERN_DEBUG ".... register #03: %08X\n", reg_03.raw); |
1498 | printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); | 1468 | printk(KERN_DEBUG "....... : Boot DT : %X\n", reg_03.bits.boot_DT); |
1499 | if (reg_03.bits.__reserved_1) | ||
1500 | UNEXPECTED_IO_APIC(); | ||
1501 | } | 1469 | } |
1502 | 1470 | ||
1503 | printk(KERN_DEBUG ".... IRQ redirection table:\n"); | 1471 | printk(KERN_DEBUG ".... IRQ redirection table:\n"); |
diff --git a/arch/i386/kernel/ioport.c b/arch/i386/kernel/ioport.c index 498e8bc197d5..d1e42e0dbe67 100644 --- a/arch/i386/kernel/ioport.c +++ b/arch/i386/kernel/ioport.c | |||
@@ -16,6 +16,7 @@ | |||
16 | #include <linux/stddef.h> | 16 | #include <linux/stddef.h> |
17 | #include <linux/slab.h> | 17 | #include <linux/slab.h> |
18 | #include <linux/thread_info.h> | 18 | #include <linux/thread_info.h> |
19 | #include <linux/syscalls.h> | ||
19 | 20 | ||
20 | /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ | 21 | /* Set EXTENT bits starting at BASE in BITMAP to value TURN_ON. */ |
21 | static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) | 22 | static void set_bitmap(unsigned long *bitmap, unsigned int base, unsigned int extent, int new_value) |
@@ -113,7 +114,7 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) | |||
113 | * Reset the owner so that a process switch will not set | 114 | * Reset the owner so that a process switch will not set |
114 | * tss->io_bitmap_base to IO_BITMAP_OFFSET. | 115 | * tss->io_bitmap_base to IO_BITMAP_OFFSET. |
115 | */ | 116 | */ |
116 | tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; | 117 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; |
117 | tss->io_bitmap_owner = NULL; | 118 | tss->io_bitmap_owner = NULL; |
118 | 119 | ||
119 | put_cpu(); | 120 | put_cpu(); |
diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 8db8d514c9c0..d2daf672f4a2 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c | |||
@@ -24,6 +24,9 @@ | |||
24 | DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; | 24 | DEFINE_PER_CPU(irq_cpustat_t, irq_stat) ____cacheline_internodealigned_in_smp; |
25 | EXPORT_PER_CPU_SYMBOL(irq_stat); | 25 | EXPORT_PER_CPU_SYMBOL(irq_stat); |
26 | 26 | ||
27 | DEFINE_PER_CPU(struct pt_regs *, irq_regs); | ||
28 | EXPORT_PER_CPU_SYMBOL(irq_regs); | ||
29 | |||
27 | /* | 30 | /* |
28 | * 'what should we do if we get a hw irq event on an illegal vector'. | 31 | * 'what should we do if we get a hw irq event on an illegal vector'. |
29 | * each architecture has to answer this themselves. | 32 | * each architecture has to answer this themselves. |
diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index 4f5983c98669..0952eccd8f28 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c | |||
@@ -477,7 +477,7 @@ static int __init smp_read_mpc(struct mp_config_table *mpc) | |||
477 | } | 477 | } |
478 | ++mpc_record; | 478 | ++mpc_record; |
479 | } | 479 | } |
480 | clustered_apic_check(); | 480 | setup_apic_routing(); |
481 | if (!num_processors) | 481 | if (!num_processors) |
482 | printk(KERN_ERR "SMP mptable: no processors registered!\n"); | 482 | printk(KERN_ERR "SMP mptable: no processors registered!\n"); |
483 | return num_processors; | 483 | return num_processors; |
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index 84c3497efb60..33cf2f3c444f 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c | |||
@@ -20,7 +20,6 @@ | |||
20 | #include <linux/sysdev.h> | 20 | #include <linux/sysdev.h> |
21 | #include <linux/sysctl.h> | 21 | #include <linux/sysctl.h> |
22 | #include <linux/percpu.h> | 22 | #include <linux/percpu.h> |
23 | #include <linux/dmi.h> | ||
24 | #include <linux/kprobes.h> | 23 | #include <linux/kprobes.h> |
25 | #include <linux/cpumask.h> | 24 | #include <linux/cpumask.h> |
26 | #include <linux/kernel_stat.h> | 25 | #include <linux/kernel_stat.h> |
@@ -28,30 +27,14 @@ | |||
28 | #include <asm/smp.h> | 27 | #include <asm/smp.h> |
29 | #include <asm/nmi.h> | 28 | #include <asm/nmi.h> |
30 | #include <asm/kdebug.h> | 29 | #include <asm/kdebug.h> |
31 | #include <asm/intel_arch_perfmon.h> | ||
32 | 30 | ||
33 | #include "mach_traps.h" | 31 | #include "mach_traps.h" |
34 | 32 | ||
35 | int unknown_nmi_panic; | 33 | int unknown_nmi_panic; |
36 | int nmi_watchdog_enabled; | 34 | int nmi_watchdog_enabled; |
37 | 35 | ||
38 | /* perfctr_nmi_owner tracks the ownership of the perfctr registers: | ||
39 | * evtsel_nmi_owner tracks the ownership of the event selection | ||
40 | * - different performance counters/ event selection may be reserved for | ||
41 | * different subsystems this reservation system just tries to coordinate | ||
42 | * things a little | ||
43 | */ | ||
44 | |||
45 | /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's | ||
46 | * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) | ||
47 | */ | ||
48 | #define NMI_MAX_COUNTER_BITS 66 | ||
49 | #define NMI_MAX_COUNTER_LONGS BITS_TO_LONGS(NMI_MAX_COUNTER_BITS) | ||
50 | |||
51 | static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner[NMI_MAX_COUNTER_LONGS]); | ||
52 | static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[NMI_MAX_COUNTER_LONGS]); | ||
53 | |||
54 | static cpumask_t backtrace_mask = CPU_MASK_NONE; | 36 | static cpumask_t backtrace_mask = CPU_MASK_NONE; |
37 | |||
55 | /* nmi_active: | 38 | /* nmi_active: |
56 | * >0: the lapic NMI watchdog is active, but can be disabled | 39 | * >0: the lapic NMI watchdog is active, but can be disabled |
57 | * <0: the lapic NMI watchdog has not been set up, and cannot | 40 | * <0: the lapic NMI watchdog has not been set up, and cannot |
@@ -63,206 +46,11 @@ atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ | |||
63 | unsigned int nmi_watchdog = NMI_DEFAULT; | 46 | unsigned int nmi_watchdog = NMI_DEFAULT; |
64 | static unsigned int nmi_hz = HZ; | 47 | static unsigned int nmi_hz = HZ; |
65 | 48 | ||
66 | struct nmi_watchdog_ctlblk { | 49 | static DEFINE_PER_CPU(short, wd_enabled); |
67 | int enabled; | ||
68 | u64 check_bit; | ||
69 | unsigned int cccr_msr; | ||
70 | unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ | ||
71 | unsigned int evntsel_msr; /* the MSR to select the events to handle */ | ||
72 | }; | ||
73 | static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); | ||
74 | 50 | ||
75 | /* local prototypes */ | 51 | /* local prototypes */ |
76 | static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu); | 52 | static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu); |
77 | 53 | ||
78 | extern void show_registers(struct pt_regs *regs); | ||
79 | extern int unknown_nmi_panic; | ||
80 | |||
81 | /* converts an msr to an appropriate reservation bit */ | ||
82 | static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) | ||
83 | { | ||
84 | /* returns the bit offset of the performance counter register */ | ||
85 | switch (boot_cpu_data.x86_vendor) { | ||
86 | case X86_VENDOR_AMD: | ||
87 | return (msr - MSR_K7_PERFCTR0); | ||
88 | case X86_VENDOR_INTEL: | ||
89 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | ||
90 | return (msr - MSR_ARCH_PERFMON_PERFCTR0); | ||
91 | |||
92 | switch (boot_cpu_data.x86) { | ||
93 | case 6: | ||
94 | return (msr - MSR_P6_PERFCTR0); | ||
95 | case 15: | ||
96 | return (msr - MSR_P4_BPU_PERFCTR0); | ||
97 | } | ||
98 | } | ||
99 | return 0; | ||
100 | } | ||
101 | |||
102 | /* converts an msr to an appropriate reservation bit */ | ||
103 | static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) | ||
104 | { | ||
105 | /* returns the bit offset of the event selection register */ | ||
106 | switch (boot_cpu_data.x86_vendor) { | ||
107 | case X86_VENDOR_AMD: | ||
108 | return (msr - MSR_K7_EVNTSEL0); | ||
109 | case X86_VENDOR_INTEL: | ||
110 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | ||
111 | return (msr - MSR_ARCH_PERFMON_EVENTSEL0); | ||
112 | |||
113 | switch (boot_cpu_data.x86) { | ||
114 | case 6: | ||
115 | return (msr - MSR_P6_EVNTSEL0); | ||
116 | case 15: | ||
117 | return (msr - MSR_P4_BSU_ESCR0); | ||
118 | } | ||
119 | } | ||
120 | return 0; | ||
121 | } | ||
122 | |||
123 | /* checks for a bit availability (hack for oprofile) */ | ||
124 | int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) | ||
125 | { | ||
126 | int cpu; | ||
127 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
128 | for_each_possible_cpu (cpu) { | ||
129 | if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0])) | ||
130 | return 0; | ||
131 | } | ||
132 | return 1; | ||
133 | } | ||
134 | |||
135 | /* checks the an msr for availability */ | ||
136 | int avail_to_resrv_perfctr_nmi(unsigned int msr) | ||
137 | { | ||
138 | unsigned int counter; | ||
139 | int cpu; | ||
140 | |||
141 | counter = nmi_perfctr_msr_to_bit(msr); | ||
142 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
143 | |||
144 | for_each_possible_cpu (cpu) { | ||
145 | if (test_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0])) | ||
146 | return 0; | ||
147 | } | ||
148 | return 1; | ||
149 | } | ||
150 | |||
151 | static int __reserve_perfctr_nmi(int cpu, unsigned int msr) | ||
152 | { | ||
153 | unsigned int counter; | ||
154 | if (cpu < 0) | ||
155 | cpu = smp_processor_id(); | ||
156 | |||
157 | counter = nmi_perfctr_msr_to_bit(msr); | ||
158 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
159 | |||
160 | if (!test_and_set_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0])) | ||
161 | return 1; | ||
162 | return 0; | ||
163 | } | ||
164 | |||
165 | static void __release_perfctr_nmi(int cpu, unsigned int msr) | ||
166 | { | ||
167 | unsigned int counter; | ||
168 | if (cpu < 0) | ||
169 | cpu = smp_processor_id(); | ||
170 | |||
171 | counter = nmi_perfctr_msr_to_bit(msr); | ||
172 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
173 | |||
174 | clear_bit(counter, &per_cpu(perfctr_nmi_owner, cpu)[0]); | ||
175 | } | ||
176 | |||
177 | int reserve_perfctr_nmi(unsigned int msr) | ||
178 | { | ||
179 | int cpu, i; | ||
180 | for_each_possible_cpu (cpu) { | ||
181 | if (!__reserve_perfctr_nmi(cpu, msr)) { | ||
182 | for_each_possible_cpu (i) { | ||
183 | if (i >= cpu) | ||
184 | break; | ||
185 | __release_perfctr_nmi(i, msr); | ||
186 | } | ||
187 | return 0; | ||
188 | } | ||
189 | } | ||
190 | return 1; | ||
191 | } | ||
192 | |||
193 | void release_perfctr_nmi(unsigned int msr) | ||
194 | { | ||
195 | int cpu; | ||
196 | for_each_possible_cpu (cpu) { | ||
197 | __release_perfctr_nmi(cpu, msr); | ||
198 | } | ||
199 | } | ||
200 | |||
201 | int __reserve_evntsel_nmi(int cpu, unsigned int msr) | ||
202 | { | ||
203 | unsigned int counter; | ||
204 | if (cpu < 0) | ||
205 | cpu = smp_processor_id(); | ||
206 | |||
207 | counter = nmi_evntsel_msr_to_bit(msr); | ||
208 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
209 | |||
210 | if (!test_and_set_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0])) | ||
211 | return 1; | ||
212 | return 0; | ||
213 | } | ||
214 | |||
215 | static void __release_evntsel_nmi(int cpu, unsigned int msr) | ||
216 | { | ||
217 | unsigned int counter; | ||
218 | if (cpu < 0) | ||
219 | cpu = smp_processor_id(); | ||
220 | |||
221 | counter = nmi_evntsel_msr_to_bit(msr); | ||
222 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
223 | |||
224 | clear_bit(counter, &per_cpu(evntsel_nmi_owner, cpu)[0]); | ||
225 | } | ||
226 | |||
227 | int reserve_evntsel_nmi(unsigned int msr) | ||
228 | { | ||
229 | int cpu, i; | ||
230 | for_each_possible_cpu (cpu) { | ||
231 | if (!__reserve_evntsel_nmi(cpu, msr)) { | ||
232 | for_each_possible_cpu (i) { | ||
233 | if (i >= cpu) | ||
234 | break; | ||
235 | __release_evntsel_nmi(i, msr); | ||
236 | } | ||
237 | return 0; | ||
238 | } | ||
239 | } | ||
240 | return 1; | ||
241 | } | ||
242 | |||
243 | void release_evntsel_nmi(unsigned int msr) | ||
244 | { | ||
245 | int cpu; | ||
246 | for_each_possible_cpu (cpu) { | ||
247 | __release_evntsel_nmi(cpu, msr); | ||
248 | } | ||
249 | } | ||
250 | |||
251 | static __cpuinit inline int nmi_known_cpu(void) | ||
252 | { | ||
253 | switch (boot_cpu_data.x86_vendor) { | ||
254 | case X86_VENDOR_AMD: | ||
255 | return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6) | ||
256 | || (boot_cpu_data.x86 == 16)); | ||
257 | case X86_VENDOR_INTEL: | ||
258 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | ||
259 | return 1; | ||
260 | else | ||
261 | return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)); | ||
262 | } | ||
263 | return 0; | ||
264 | } | ||
265 | |||
266 | static int endflag __initdata = 0; | 54 | static int endflag __initdata = 0; |
267 | 55 | ||
268 | #ifdef CONFIG_SMP | 56 | #ifdef CONFIG_SMP |
@@ -284,28 +72,6 @@ static __init void nmi_cpu_busy(void *data) | |||
284 | } | 72 | } |
285 | #endif | 73 | #endif |
286 | 74 | ||
287 | static unsigned int adjust_for_32bit_ctr(unsigned int hz) | ||
288 | { | ||
289 | u64 counter_val; | ||
290 | unsigned int retval = hz; | ||
291 | |||
292 | /* | ||
293 | * On Intel CPUs with P6/ARCH_PERFMON only 32 bits in the counter | ||
294 | * are writable, with higher bits sign extending from bit 31. | ||
295 | * So, we can only program the counter with 31 bit values and | ||
296 | * 32nd bit should be 1, for 33.. to be 1. | ||
297 | * Find the appropriate nmi_hz | ||
298 | */ | ||
299 | counter_val = (u64)cpu_khz * 1000; | ||
300 | do_div(counter_val, retval); | ||
301 | if (counter_val > 0x7fffffffULL) { | ||
302 | u64 count = (u64)cpu_khz * 1000; | ||
303 | do_div(count, 0x7fffffffUL); | ||
304 | retval = count + 1; | ||
305 | } | ||
306 | return retval; | ||
307 | } | ||
308 | |||
309 | static int __init check_nmi_watchdog(void) | 75 | static int __init check_nmi_watchdog(void) |
310 | { | 76 | { |
311 | unsigned int *prev_nmi_count; | 77 | unsigned int *prev_nmi_count; |
@@ -338,14 +104,14 @@ static int __init check_nmi_watchdog(void) | |||
338 | if (!cpu_isset(cpu, cpu_callin_map)) | 104 | if (!cpu_isset(cpu, cpu_callin_map)) |
339 | continue; | 105 | continue; |
340 | #endif | 106 | #endif |
341 | if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled) | 107 | if (!per_cpu(wd_enabled, cpu)) |
342 | continue; | 108 | continue; |
343 | if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { | 109 | if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { |
344 | printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", | 110 | printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", |
345 | cpu, | 111 | cpu, |
346 | prev_nmi_count[cpu], | 112 | prev_nmi_count[cpu], |
347 | nmi_count(cpu)); | 113 | nmi_count(cpu)); |
348 | per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0; | 114 | per_cpu(wd_enabled, cpu) = 0; |
349 | atomic_dec(&nmi_active); | 115 | atomic_dec(&nmi_active); |
350 | } | 116 | } |
351 | } | 117 | } |
@@ -359,16 +125,8 @@ static int __init check_nmi_watchdog(void) | |||
359 | 125 | ||
360 | /* now that we know it works we can reduce NMI frequency to | 126 | /* now that we know it works we can reduce NMI frequency to |
361 | something more reasonable; makes a difference in some configs */ | 127 | something more reasonable; makes a difference in some configs */ |
362 | if (nmi_watchdog == NMI_LOCAL_APIC) { | 128 | if (nmi_watchdog == NMI_LOCAL_APIC) |
363 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | 129 | nmi_hz = lapic_adjust_nmi_hz(1); |
364 | |||
365 | nmi_hz = 1; | ||
366 | |||
367 | if (wd->perfctr_msr == MSR_P6_PERFCTR0 || | ||
368 | wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { | ||
369 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | ||
370 | } | ||
371 | } | ||
372 | 130 | ||
373 | kfree(prev_nmi_count); | 131 | kfree(prev_nmi_count); |
374 | return 0; | 132 | return 0; |
@@ -391,85 +149,8 @@ static int __init setup_nmi_watchdog(char *str) | |||
391 | 149 | ||
392 | __setup("nmi_watchdog=", setup_nmi_watchdog); | 150 | __setup("nmi_watchdog=", setup_nmi_watchdog); |
393 | 151 | ||
394 | static void disable_lapic_nmi_watchdog(void) | ||
395 | { | ||
396 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); | ||
397 | |||
398 | if (atomic_read(&nmi_active) <= 0) | ||
399 | return; | ||
400 | |||
401 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); | ||
402 | |||
403 | BUG_ON(atomic_read(&nmi_active) != 0); | ||
404 | } | ||
405 | |||
406 | static void enable_lapic_nmi_watchdog(void) | ||
407 | { | ||
408 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); | ||
409 | |||
410 | /* are we already enabled */ | ||
411 | if (atomic_read(&nmi_active) != 0) | ||
412 | return; | ||
413 | |||
414 | /* are we lapic aware */ | ||
415 | if (nmi_known_cpu() <= 0) | ||
416 | return; | ||
417 | 152 | ||
418 | on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); | 153 | /* Suspend/resume support */ |
419 | touch_nmi_watchdog(); | ||
420 | } | ||
421 | |||
422 | void disable_timer_nmi_watchdog(void) | ||
423 | { | ||
424 | BUG_ON(nmi_watchdog != NMI_IO_APIC); | ||
425 | |||
426 | if (atomic_read(&nmi_active) <= 0) | ||
427 | return; | ||
428 | |||
429 | disable_irq(0); | ||
430 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); | ||
431 | |||
432 | BUG_ON(atomic_read(&nmi_active) != 0); | ||
433 | } | ||
434 | |||
435 | void enable_timer_nmi_watchdog(void) | ||
436 | { | ||
437 | BUG_ON(nmi_watchdog != NMI_IO_APIC); | ||
438 | |||
439 | if (atomic_read(&nmi_active) == 0) { | ||
440 | touch_nmi_watchdog(); | ||
441 | on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); | ||
442 | enable_irq(0); | ||
443 | } | ||
444 | } | ||
445 | |||
446 | static void __acpi_nmi_disable(void *__unused) | ||
447 | { | ||
448 | apic_write_around(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); | ||
449 | } | ||
450 | |||
451 | /* | ||
452 | * Disable timer based NMIs on all CPUs: | ||
453 | */ | ||
454 | void acpi_nmi_disable(void) | ||
455 | { | ||
456 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) | ||
457 | on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); | ||
458 | } | ||
459 | |||
460 | static void __acpi_nmi_enable(void *__unused) | ||
461 | { | ||
462 | apic_write_around(APIC_LVT0, APIC_DM_NMI); | ||
463 | } | ||
464 | |||
465 | /* | ||
466 | * Enable timer based NMIs on all CPUs: | ||
467 | */ | ||
468 | void acpi_nmi_enable(void) | ||
469 | { | ||
470 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) | ||
471 | on_each_cpu(__acpi_nmi_enable, NULL, 0, 1); | ||
472 | } | ||
473 | 154 | ||
474 | #ifdef CONFIG_PM | 155 | #ifdef CONFIG_PM |
475 | 156 | ||
@@ -516,7 +197,7 @@ static int __init init_lapic_nmi_sysfs(void) | |||
516 | if (nmi_watchdog != NMI_LOCAL_APIC) | 197 | if (nmi_watchdog != NMI_LOCAL_APIC) |
517 | return 0; | 198 | return 0; |
518 | 199 | ||
519 | if ( atomic_read(&nmi_active) < 0 ) | 200 | if (atomic_read(&nmi_active) < 0) |
520 | return 0; | 201 | return 0; |
521 | 202 | ||
522 | error = sysdev_class_register(&nmi_sysclass); | 203 | error = sysdev_class_register(&nmi_sysclass); |
@@ -529,433 +210,69 @@ late_initcall(init_lapic_nmi_sysfs); | |||
529 | 210 | ||
530 | #endif /* CONFIG_PM */ | 211 | #endif /* CONFIG_PM */ |
531 | 212 | ||
532 | /* | 213 | static void __acpi_nmi_enable(void *__unused) |
533 | * Activate the NMI watchdog via the local APIC. | ||
534 | * Original code written by Keith Owens. | ||
535 | */ | ||
536 | |||
537 | static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr) | ||
538 | { | ||
539 | u64 count = (u64)cpu_khz * 1000; | ||
540 | |||
541 | do_div(count, nmi_hz); | ||
542 | if(descr) | ||
543 | Dprintk("setting %s to -0x%08Lx\n", descr, count); | ||
544 | wrmsrl(perfctr_msr, 0 - count); | ||
545 | } | ||
546 | |||
547 | static void write_watchdog_counter32(unsigned int perfctr_msr, | ||
548 | const char *descr) | ||
549 | { | ||
550 | u64 count = (u64)cpu_khz * 1000; | ||
551 | |||
552 | do_div(count, nmi_hz); | ||
553 | if(descr) | ||
554 | Dprintk("setting %s to -0x%08Lx\n", descr, count); | ||
555 | wrmsr(perfctr_msr, (u32)(-count), 0); | ||
556 | } | ||
557 | |||
558 | /* Note that these events don't tick when the CPU idles. This means | ||
559 | the frequency varies with CPU load. */ | ||
560 | |||
561 | #define K7_EVNTSEL_ENABLE (1 << 22) | ||
562 | #define K7_EVNTSEL_INT (1 << 20) | ||
563 | #define K7_EVNTSEL_OS (1 << 17) | ||
564 | #define K7_EVNTSEL_USR (1 << 16) | ||
565 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 | ||
566 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING | ||
567 | |||
568 | static int setup_k7_watchdog(void) | ||
569 | { | ||
570 | unsigned int perfctr_msr, evntsel_msr; | ||
571 | unsigned int evntsel; | ||
572 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
573 | |||
574 | perfctr_msr = MSR_K7_PERFCTR0; | ||
575 | evntsel_msr = MSR_K7_EVNTSEL0; | ||
576 | if (!__reserve_perfctr_nmi(-1, perfctr_msr)) | ||
577 | goto fail; | ||
578 | |||
579 | if (!__reserve_evntsel_nmi(-1, evntsel_msr)) | ||
580 | goto fail1; | ||
581 | |||
582 | wrmsrl(perfctr_msr, 0UL); | ||
583 | |||
584 | evntsel = K7_EVNTSEL_INT | ||
585 | | K7_EVNTSEL_OS | ||
586 | | K7_EVNTSEL_USR | ||
587 | | K7_NMI_EVENT; | ||
588 | |||
589 | /* setup the timer */ | ||
590 | wrmsr(evntsel_msr, evntsel, 0); | ||
591 | write_watchdog_counter(perfctr_msr, "K7_PERFCTR0"); | ||
592 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
593 | evntsel |= K7_EVNTSEL_ENABLE; | ||
594 | wrmsr(evntsel_msr, evntsel, 0); | ||
595 | |||
596 | wd->perfctr_msr = perfctr_msr; | ||
597 | wd->evntsel_msr = evntsel_msr; | ||
598 | wd->cccr_msr = 0; //unused | ||
599 | wd->check_bit = 1ULL<<63; | ||
600 | return 1; | ||
601 | fail1: | ||
602 | __release_perfctr_nmi(-1, perfctr_msr); | ||
603 | fail: | ||
604 | return 0; | ||
605 | } | ||
606 | |||
607 | static void stop_k7_watchdog(void) | ||
608 | { | ||
609 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
610 | |||
611 | wrmsr(wd->evntsel_msr, 0, 0); | ||
612 | |||
613 | __release_evntsel_nmi(-1, wd->evntsel_msr); | ||
614 | __release_perfctr_nmi(-1, wd->perfctr_msr); | ||
615 | } | ||
616 | |||
617 | #define P6_EVNTSEL0_ENABLE (1 << 22) | ||
618 | #define P6_EVNTSEL_INT (1 << 20) | ||
619 | #define P6_EVNTSEL_OS (1 << 17) | ||
620 | #define P6_EVNTSEL_USR (1 << 16) | ||
621 | #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 | ||
622 | #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED | ||
623 | |||
624 | static int setup_p6_watchdog(void) | ||
625 | { | ||
626 | unsigned int perfctr_msr, evntsel_msr; | ||
627 | unsigned int evntsel; | ||
628 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
629 | |||
630 | perfctr_msr = MSR_P6_PERFCTR0; | ||
631 | evntsel_msr = MSR_P6_EVNTSEL0; | ||
632 | if (!__reserve_perfctr_nmi(-1, perfctr_msr)) | ||
633 | goto fail; | ||
634 | |||
635 | if (!__reserve_evntsel_nmi(-1, evntsel_msr)) | ||
636 | goto fail1; | ||
637 | |||
638 | wrmsrl(perfctr_msr, 0UL); | ||
639 | |||
640 | evntsel = P6_EVNTSEL_INT | ||
641 | | P6_EVNTSEL_OS | ||
642 | | P6_EVNTSEL_USR | ||
643 | | P6_NMI_EVENT; | ||
644 | |||
645 | /* setup the timer */ | ||
646 | wrmsr(evntsel_msr, evntsel, 0); | ||
647 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | ||
648 | write_watchdog_counter32(perfctr_msr, "P6_PERFCTR0"); | ||
649 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
650 | evntsel |= P6_EVNTSEL0_ENABLE; | ||
651 | wrmsr(evntsel_msr, evntsel, 0); | ||
652 | |||
653 | wd->perfctr_msr = perfctr_msr; | ||
654 | wd->evntsel_msr = evntsel_msr; | ||
655 | wd->cccr_msr = 0; //unused | ||
656 | wd->check_bit = 1ULL<<39; | ||
657 | return 1; | ||
658 | fail1: | ||
659 | __release_perfctr_nmi(-1, perfctr_msr); | ||
660 | fail: | ||
661 | return 0; | ||
662 | } | ||
663 | |||
664 | static void stop_p6_watchdog(void) | ||
665 | { | ||
666 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
667 | |||
668 | wrmsr(wd->evntsel_msr, 0, 0); | ||
669 | |||
670 | __release_evntsel_nmi(-1, wd->evntsel_msr); | ||
671 | __release_perfctr_nmi(-1, wd->perfctr_msr); | ||
672 | } | ||
673 | |||
674 | /* Note that these events don't tick when the CPU idles. This means | ||
675 | the frequency varies with CPU load. */ | ||
676 | |||
677 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) | ||
678 | #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) | ||
679 | #define P4_ESCR_OS (1<<3) | ||
680 | #define P4_ESCR_USR (1<<2) | ||
681 | #define P4_CCCR_OVF_PMI0 (1<<26) | ||
682 | #define P4_CCCR_OVF_PMI1 (1<<27) | ||
683 | #define P4_CCCR_THRESHOLD(N) ((N)<<20) | ||
684 | #define P4_CCCR_COMPLEMENT (1<<19) | ||
685 | #define P4_CCCR_COMPARE (1<<18) | ||
686 | #define P4_CCCR_REQUIRED (3<<16) | ||
687 | #define P4_CCCR_ESCR_SELECT(N) ((N)<<13) | ||
688 | #define P4_CCCR_ENABLE (1<<12) | ||
689 | #define P4_CCCR_OVF (1<<31) | ||
690 | /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | ||
691 | CRU_ESCR0 (with any non-null event selector) through a complemented | ||
692 | max threshold. [IA32-Vol3, Section 14.9.9] */ | ||
693 | |||
694 | static int setup_p4_watchdog(void) | ||
695 | { | 214 | { |
696 | unsigned int perfctr_msr, evntsel_msr, cccr_msr; | 215 | apic_write_around(APIC_LVT0, APIC_DM_NMI); |
697 | unsigned int evntsel, cccr_val; | ||
698 | unsigned int misc_enable, dummy; | ||
699 | unsigned int ht_num; | ||
700 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
701 | |||
702 | rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); | ||
703 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) | ||
704 | return 0; | ||
705 | |||
706 | #ifdef CONFIG_SMP | ||
707 | /* detect which hyperthread we are on */ | ||
708 | if (smp_num_siblings == 2) { | ||
709 | unsigned int ebx, apicid; | ||
710 | |||
711 | ebx = cpuid_ebx(1); | ||
712 | apicid = (ebx >> 24) & 0xff; | ||
713 | ht_num = apicid & 1; | ||
714 | } else | ||
715 | #endif | ||
716 | ht_num = 0; | ||
717 | |||
718 | /* performance counters are shared resources | ||
719 | * assign each hyperthread its own set | ||
720 | * (re-use the ESCR0 register, seems safe | ||
721 | * and keeps the cccr_val the same) | ||
722 | */ | ||
723 | if (!ht_num) { | ||
724 | /* logical cpu 0 */ | ||
725 | perfctr_msr = MSR_P4_IQ_PERFCTR0; | ||
726 | evntsel_msr = MSR_P4_CRU_ESCR0; | ||
727 | cccr_msr = MSR_P4_IQ_CCCR0; | ||
728 | cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); | ||
729 | } else { | ||
730 | /* logical cpu 1 */ | ||
731 | perfctr_msr = MSR_P4_IQ_PERFCTR1; | ||
732 | evntsel_msr = MSR_P4_CRU_ESCR0; | ||
733 | cccr_msr = MSR_P4_IQ_CCCR1; | ||
734 | cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); | ||
735 | } | ||
736 | |||
737 | if (!__reserve_perfctr_nmi(-1, perfctr_msr)) | ||
738 | goto fail; | ||
739 | |||
740 | if (!__reserve_evntsel_nmi(-1, evntsel_msr)) | ||
741 | goto fail1; | ||
742 | |||
743 | evntsel = P4_ESCR_EVENT_SELECT(0x3F) | ||
744 | | P4_ESCR_OS | ||
745 | | P4_ESCR_USR; | ||
746 | |||
747 | cccr_val |= P4_CCCR_THRESHOLD(15) | ||
748 | | P4_CCCR_COMPLEMENT | ||
749 | | P4_CCCR_COMPARE | ||
750 | | P4_CCCR_REQUIRED; | ||
751 | |||
752 | wrmsr(evntsel_msr, evntsel, 0); | ||
753 | wrmsr(cccr_msr, cccr_val, 0); | ||
754 | write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0"); | ||
755 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
756 | cccr_val |= P4_CCCR_ENABLE; | ||
757 | wrmsr(cccr_msr, cccr_val, 0); | ||
758 | wd->perfctr_msr = perfctr_msr; | ||
759 | wd->evntsel_msr = evntsel_msr; | ||
760 | wd->cccr_msr = cccr_msr; | ||
761 | wd->check_bit = 1ULL<<39; | ||
762 | return 1; | ||
763 | fail1: | ||
764 | __release_perfctr_nmi(-1, perfctr_msr); | ||
765 | fail: | ||
766 | return 0; | ||
767 | } | 216 | } |
768 | 217 | ||
769 | static void stop_p4_watchdog(void) | 218 | /* |
219 | * Enable timer based NMIs on all CPUs: | ||
220 | */ | ||
221 | void acpi_nmi_enable(void) | ||
770 | { | 222 | { |
771 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | 223 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) |
772 | 224 | on_each_cpu(__acpi_nmi_enable, NULL, 0, 1); | |
773 | wrmsr(wd->cccr_msr, 0, 0); | ||
774 | wrmsr(wd->evntsel_msr, 0, 0); | ||
775 | |||
776 | __release_evntsel_nmi(-1, wd->evntsel_msr); | ||
777 | __release_perfctr_nmi(-1, wd->perfctr_msr); | ||
778 | } | 225 | } |
779 | 226 | ||
780 | #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL | 227 | static void __acpi_nmi_disable(void *__unused) |
781 | #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK | ||
782 | |||
783 | static int setup_intel_arch_watchdog(void) | ||
784 | { | 228 | { |
785 | unsigned int ebx; | 229 | apic_write(APIC_LVT0, APIC_DM_NMI | APIC_LVT_MASKED); |
786 | union cpuid10_eax eax; | ||
787 | unsigned int unused; | ||
788 | unsigned int perfctr_msr, evntsel_msr; | ||
789 | unsigned int evntsel; | ||
790 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
791 | |||
792 | /* | ||
793 | * Check whether the Architectural PerfMon supports | ||
794 | * Unhalted Core Cycles Event or not. | ||
795 | * NOTE: Corresponding bit = 0 in ebx indicates event present. | ||
796 | */ | ||
797 | cpuid(10, &(eax.full), &ebx, &unused, &unused); | ||
798 | if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || | ||
799 | (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | ||
800 | goto fail; | ||
801 | |||
802 | perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; | ||
803 | evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0; | ||
804 | |||
805 | if (!__reserve_perfctr_nmi(-1, perfctr_msr)) | ||
806 | goto fail; | ||
807 | |||
808 | if (!__reserve_evntsel_nmi(-1, evntsel_msr)) | ||
809 | goto fail1; | ||
810 | |||
811 | wrmsrl(perfctr_msr, 0UL); | ||
812 | |||
813 | evntsel = ARCH_PERFMON_EVENTSEL_INT | ||
814 | | ARCH_PERFMON_EVENTSEL_OS | ||
815 | | ARCH_PERFMON_EVENTSEL_USR | ||
816 | | ARCH_PERFMON_NMI_EVENT_SEL | ||
817 | | ARCH_PERFMON_NMI_EVENT_UMASK; | ||
818 | |||
819 | /* setup the timer */ | ||
820 | wrmsr(evntsel_msr, evntsel, 0); | ||
821 | nmi_hz = adjust_for_32bit_ctr(nmi_hz); | ||
822 | write_watchdog_counter32(perfctr_msr, "INTEL_ARCH_PERFCTR0"); | ||
823 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
824 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; | ||
825 | wrmsr(evntsel_msr, evntsel, 0); | ||
826 | |||
827 | wd->perfctr_msr = perfctr_msr; | ||
828 | wd->evntsel_msr = evntsel_msr; | ||
829 | wd->cccr_msr = 0; //unused | ||
830 | wd->check_bit = 1ULL << (eax.split.bit_width - 1); | ||
831 | return 1; | ||
832 | fail1: | ||
833 | __release_perfctr_nmi(-1, perfctr_msr); | ||
834 | fail: | ||
835 | return 0; | ||
836 | } | 230 | } |
837 | 231 | ||
838 | static void stop_intel_arch_watchdog(void) | 232 | /* |
233 | * Disable timer based NMIs on all CPUs: | ||
234 | */ | ||
235 | void acpi_nmi_disable(void) | ||
839 | { | 236 | { |
840 | unsigned int ebx; | 237 | if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) |
841 | union cpuid10_eax eax; | 238 | on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); |
842 | unsigned int unused; | ||
843 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
844 | |||
845 | /* | ||
846 | * Check whether the Architectural PerfMon supports | ||
847 | * Unhalted Core Cycles Event or not. | ||
848 | * NOTE: Corresponding bit = 0 in ebx indicates event present. | ||
849 | */ | ||
850 | cpuid(10, &(eax.full), &ebx, &unused, &unused); | ||
851 | if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || | ||
852 | (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | ||
853 | return; | ||
854 | |||
855 | wrmsr(wd->evntsel_msr, 0, 0); | ||
856 | __release_evntsel_nmi(-1, wd->evntsel_msr); | ||
857 | __release_perfctr_nmi(-1, wd->perfctr_msr); | ||
858 | } | 239 | } |
859 | 240 | ||
860 | void setup_apic_nmi_watchdog (void *unused) | 241 | void setup_apic_nmi_watchdog (void *unused) |
861 | { | 242 | { |
862 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | 243 | if (__get_cpu_var(wd_enabled)) |
863 | 244 | return; | |
864 | /* only support LOCAL and IO APICs for now */ | ||
865 | if ((nmi_watchdog != NMI_LOCAL_APIC) && | ||
866 | (nmi_watchdog != NMI_IO_APIC)) | ||
867 | return; | ||
868 | |||
869 | if (wd->enabled == 1) | ||
870 | return; | ||
871 | 245 | ||
872 | /* cheap hack to support suspend/resume */ | 246 | /* cheap hack to support suspend/resume */ |
873 | /* if cpu0 is not active neither should the other cpus */ | 247 | /* if cpu0 is not active neither should the other cpus */ |
874 | if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0)) | 248 | if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0)) |
875 | return; | 249 | return; |
876 | 250 | ||
877 | if (nmi_watchdog == NMI_LOCAL_APIC) { | 251 | switch (nmi_watchdog) { |
878 | switch (boot_cpu_data.x86_vendor) { | 252 | case NMI_LOCAL_APIC: |
879 | case X86_VENDOR_AMD: | 253 | __get_cpu_var(wd_enabled) = 1; /* enable it before to avoid race with handler */ |
880 | if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15 && | 254 | if (lapic_watchdog_init(nmi_hz) < 0) { |
881 | boot_cpu_data.x86 != 16) | 255 | __get_cpu_var(wd_enabled) = 0; |
882 | return; | ||
883 | if (!setup_k7_watchdog()) | ||
884 | return; | ||
885 | break; | ||
886 | case X86_VENDOR_INTEL: | ||
887 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
888 | if (!setup_intel_arch_watchdog()) | ||
889 | return; | ||
890 | break; | ||
891 | } | ||
892 | switch (boot_cpu_data.x86) { | ||
893 | case 6: | ||
894 | if (boot_cpu_data.x86_model > 0xd) | ||
895 | return; | ||
896 | |||
897 | if (!setup_p6_watchdog()) | ||
898 | return; | ||
899 | break; | ||
900 | case 15: | ||
901 | if (boot_cpu_data.x86_model > 0x4) | ||
902 | return; | ||
903 | |||
904 | if (!setup_p4_watchdog()) | ||
905 | return; | ||
906 | break; | ||
907 | default: | ||
908 | return; | ||
909 | } | ||
910 | break; | ||
911 | default: | ||
912 | return; | 256 | return; |
913 | } | 257 | } |
258 | /* FALL THROUGH */ | ||
259 | case NMI_IO_APIC: | ||
260 | __get_cpu_var(wd_enabled) = 1; | ||
261 | atomic_inc(&nmi_active); | ||
914 | } | 262 | } |
915 | wd->enabled = 1; | ||
916 | atomic_inc(&nmi_active); | ||
917 | } | 263 | } |
918 | 264 | ||
919 | void stop_apic_nmi_watchdog(void *unused) | 265 | void stop_apic_nmi_watchdog(void *unused) |
920 | { | 266 | { |
921 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
922 | |||
923 | /* only support LOCAL and IO APICs for now */ | 267 | /* only support LOCAL and IO APICs for now */ |
924 | if ((nmi_watchdog != NMI_LOCAL_APIC) && | 268 | if ((nmi_watchdog != NMI_LOCAL_APIC) && |
925 | (nmi_watchdog != NMI_IO_APIC)) | 269 | (nmi_watchdog != NMI_IO_APIC)) |
926 | return; | 270 | return; |
927 | 271 | if (__get_cpu_var(wd_enabled) == 0) | |
928 | if (wd->enabled == 0) | ||
929 | return; | 272 | return; |
930 | 273 | if (nmi_watchdog == NMI_LOCAL_APIC) | |
931 | if (nmi_watchdog == NMI_LOCAL_APIC) { | 274 | lapic_watchdog_stop(); |
932 | switch (boot_cpu_data.x86_vendor) { | 275 | __get_cpu_var(wd_enabled) = 0; |
933 | case X86_VENDOR_AMD: | ||
934 | stop_k7_watchdog(); | ||
935 | break; | ||
936 | case X86_VENDOR_INTEL: | ||
937 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
938 | stop_intel_arch_watchdog(); | ||
939 | break; | ||
940 | } | ||
941 | switch (boot_cpu_data.x86) { | ||
942 | case 6: | ||
943 | if (boot_cpu_data.x86_model > 0xd) | ||
944 | break; | ||
945 | stop_p6_watchdog(); | ||
946 | break; | ||
947 | case 15: | ||
948 | if (boot_cpu_data.x86_model > 0x4) | ||
949 | break; | ||
950 | stop_p4_watchdog(); | ||
951 | break; | ||
952 | } | ||
953 | break; | ||
954 | default: | ||
955 | return; | ||
956 | } | ||
957 | } | ||
958 | wd->enabled = 0; | ||
959 | atomic_dec(&nmi_active); | 276 | atomic_dec(&nmi_active); |
960 | } | 277 | } |
961 | 278 | ||
@@ -1011,8 +328,6 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) | |||
1011 | unsigned int sum; | 328 | unsigned int sum; |
1012 | int touched = 0; | 329 | int touched = 0; |
1013 | int cpu = smp_processor_id(); | 330 | int cpu = smp_processor_id(); |
1014 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
1015 | u64 dummy; | ||
1016 | int rc=0; | 331 | int rc=0; |
1017 | 332 | ||
1018 | /* check for other users first */ | 333 | /* check for other users first */ |
@@ -1055,53 +370,20 @@ __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) | |||
1055 | alert_counter[cpu] = 0; | 370 | alert_counter[cpu] = 0; |
1056 | } | 371 | } |
1057 | /* see if the nmi watchdog went off */ | 372 | /* see if the nmi watchdog went off */ |
1058 | if (wd->enabled) { | 373 | if (!__get_cpu_var(wd_enabled)) |
1059 | if (nmi_watchdog == NMI_LOCAL_APIC) { | 374 | return rc; |
1060 | rdmsrl(wd->perfctr_msr, dummy); | 375 | switch (nmi_watchdog) { |
1061 | if (dummy & wd->check_bit){ | 376 | case NMI_LOCAL_APIC: |
1062 | /* this wasn't a watchdog timer interrupt */ | 377 | rc |= lapic_wd_event(nmi_hz); |
1063 | goto done; | 378 | break; |
1064 | } | 379 | case NMI_IO_APIC: |
1065 | 380 | /* don't know how to accurately check for this. | |
1066 | /* only Intel P4 uses the cccr msr */ | 381 | * just assume it was a watchdog timer interrupt |
1067 | if (wd->cccr_msr != 0) { | 382 | * This matches the old behaviour. |
1068 | /* | 383 | */ |
1069 | * P4 quirks: | 384 | rc = 1; |
1070 | * - An overflown perfctr will assert its interrupt | 385 | break; |
1071 | * until the OVF flag in its CCCR is cleared. | ||
1072 | * - LVTPC is masked on interrupt and must be | ||
1073 | * unmasked by the LVTPC handler. | ||
1074 | */ | ||
1075 | rdmsrl(wd->cccr_msr, dummy); | ||
1076 | dummy &= ~P4_CCCR_OVF; | ||
1077 | wrmsrl(wd->cccr_msr, dummy); | ||
1078 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
1079 | /* start the cycle over again */ | ||
1080 | write_watchdog_counter(wd->perfctr_msr, NULL); | ||
1081 | } | ||
1082 | else if (wd->perfctr_msr == MSR_P6_PERFCTR0 || | ||
1083 | wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { | ||
1084 | /* P6 based Pentium M need to re-unmask | ||
1085 | * the apic vector but it doesn't hurt | ||
1086 | * other P6 variant. | ||
1087 | * ArchPerfom/Core Duo also needs this */ | ||
1088 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
1089 | /* P6/ARCH_PERFMON has 32 bit counter write */ | ||
1090 | write_watchdog_counter32(wd->perfctr_msr, NULL); | ||
1091 | } else { | ||
1092 | /* start the cycle over again */ | ||
1093 | write_watchdog_counter(wd->perfctr_msr, NULL); | ||
1094 | } | ||
1095 | rc = 1; | ||
1096 | } else if (nmi_watchdog == NMI_IO_APIC) { | ||
1097 | /* don't know how to accurately check for this. | ||
1098 | * just assume it was a watchdog timer interrupt | ||
1099 | * This matches the old behaviour. | ||
1100 | */ | ||
1101 | rc = 1; | ||
1102 | } | ||
1103 | } | 386 | } |
1104 | done: | ||
1105 | return rc; | 387 | return rc; |
1106 | } | 388 | } |
1107 | 389 | ||
@@ -1146,7 +428,7 @@ int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, | |||
1146 | } | 428 | } |
1147 | 429 | ||
1148 | if (nmi_watchdog == NMI_DEFAULT) { | 430 | if (nmi_watchdog == NMI_DEFAULT) { |
1149 | if (nmi_known_cpu() > 0) | 431 | if (lapic_watchdog_ok()) |
1150 | nmi_watchdog = NMI_LOCAL_APIC; | 432 | nmi_watchdog = NMI_LOCAL_APIC; |
1151 | else | 433 | else |
1152 | nmi_watchdog = NMI_IO_APIC; | 434 | nmi_watchdog = NMI_IO_APIC; |
@@ -1182,11 +464,3 @@ void __trigger_all_cpu_backtrace(void) | |||
1182 | 464 | ||
1183 | EXPORT_SYMBOL(nmi_active); | 465 | EXPORT_SYMBOL(nmi_active); |
1184 | EXPORT_SYMBOL(nmi_watchdog); | 466 | EXPORT_SYMBOL(nmi_watchdog); |
1185 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); | ||
1186 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); | ||
1187 | EXPORT_SYMBOL(reserve_perfctr_nmi); | ||
1188 | EXPORT_SYMBOL(release_perfctr_nmi); | ||
1189 | EXPORT_SYMBOL(reserve_evntsel_nmi); | ||
1190 | EXPORT_SYMBOL(release_evntsel_nmi); | ||
1191 | EXPORT_SYMBOL(disable_timer_nmi_watchdog); | ||
1192 | EXPORT_SYMBOL(enable_timer_nmi_watchdog); | ||
diff --git a/arch/i386/kernel/paravirt.c b/arch/i386/kernel/paravirt.c index 2ec331e03fa9..5c10f376bce1 100644 --- a/arch/i386/kernel/paravirt.c +++ b/arch/i386/kernel/paravirt.c | |||
@@ -20,6 +20,7 @@ | |||
20 | #include <linux/efi.h> | 20 | #include <linux/efi.h> |
21 | #include <linux/bcd.h> | 21 | #include <linux/bcd.h> |
22 | #include <linux/start_kernel.h> | 22 | #include <linux/start_kernel.h> |
23 | #include <linux/highmem.h> | ||
23 | 24 | ||
24 | #include <asm/bug.h> | 25 | #include <asm/bug.h> |
25 | #include <asm/paravirt.h> | 26 | #include <asm/paravirt.h> |
@@ -35,7 +36,7 @@ | |||
35 | #include <asm/timer.h> | 36 | #include <asm/timer.h> |
36 | 37 | ||
37 | /* nop stub */ | 38 | /* nop stub */ |
38 | static void native_nop(void) | 39 | void _paravirt_nop(void) |
39 | { | 40 | { |
40 | } | 41 | } |
41 | 42 | ||
@@ -54,331 +55,148 @@ char *memory_setup(void) | |||
54 | #define DEF_NATIVE(name, code) \ | 55 | #define DEF_NATIVE(name, code) \ |
55 | extern const char start_##name[], end_##name[]; \ | 56 | extern const char start_##name[], end_##name[]; \ |
56 | asm("start_" #name ": " code "; end_" #name ":") | 57 | asm("start_" #name ": " code "; end_" #name ":") |
57 | DEF_NATIVE(cli, "cli"); | 58 | |
58 | DEF_NATIVE(sti, "sti"); | 59 | DEF_NATIVE(irq_disable, "cli"); |
59 | DEF_NATIVE(popf, "push %eax; popf"); | 60 | DEF_NATIVE(irq_enable, "sti"); |
60 | DEF_NATIVE(pushf, "pushf; pop %eax"); | 61 | DEF_NATIVE(restore_fl, "push %eax; popf"); |
61 | DEF_NATIVE(pushf_cli, "pushf; pop %eax; cli"); | 62 | DEF_NATIVE(save_fl, "pushf; pop %eax"); |
62 | DEF_NATIVE(iret, "iret"); | 63 | DEF_NATIVE(iret, "iret"); |
63 | DEF_NATIVE(sti_sysexit, "sti; sysexit"); | 64 | DEF_NATIVE(irq_enable_sysexit, "sti; sysexit"); |
65 | DEF_NATIVE(read_cr2, "mov %cr2, %eax"); | ||
66 | DEF_NATIVE(write_cr3, "mov %eax, %cr3"); | ||
67 | DEF_NATIVE(read_cr3, "mov %cr3, %eax"); | ||
68 | DEF_NATIVE(clts, "clts"); | ||
69 | DEF_NATIVE(read_tsc, "rdtsc"); | ||
64 | 70 | ||
65 | static const struct native_insns | 71 | DEF_NATIVE(ud2a, "ud2a"); |
66 | { | ||
67 | const char *start, *end; | ||
68 | } native_insns[] = { | ||
69 | [PARAVIRT_IRQ_DISABLE] = { start_cli, end_cli }, | ||
70 | [PARAVIRT_IRQ_ENABLE] = { start_sti, end_sti }, | ||
71 | [PARAVIRT_RESTORE_FLAGS] = { start_popf, end_popf }, | ||
72 | [PARAVIRT_SAVE_FLAGS] = { start_pushf, end_pushf }, | ||
73 | [PARAVIRT_SAVE_FLAGS_IRQ_DISABLE] = { start_pushf_cli, end_pushf_cli }, | ||
74 | [PARAVIRT_INTERRUPT_RETURN] = { start_iret, end_iret }, | ||
75 | [PARAVIRT_STI_SYSEXIT] = { start_sti_sysexit, end_sti_sysexit }, | ||
76 | }; | ||
77 | 72 | ||
78 | static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len) | 73 | static unsigned native_patch(u8 type, u16 clobbers, void *insns, unsigned len) |
79 | { | 74 | { |
80 | unsigned int insn_len; | 75 | const unsigned char *start, *end; |
81 | 76 | unsigned ret; | |
82 | /* Don't touch it if we don't have a replacement */ | 77 | |
83 | if (type >= ARRAY_SIZE(native_insns) || !native_insns[type].start) | 78 | switch(type) { |
84 | return len; | 79 | #define SITE(x) case PARAVIRT_PATCH(x): start = start_##x; end = end_##x; goto patch_site |
85 | 80 | SITE(irq_disable); | |
86 | insn_len = native_insns[type].end - native_insns[type].start; | 81 | SITE(irq_enable); |
87 | 82 | SITE(restore_fl); | |
88 | /* Similarly if we can't fit replacement. */ | 83 | SITE(save_fl); |
89 | if (len < insn_len) | 84 | SITE(iret); |
90 | return len; | 85 | SITE(irq_enable_sysexit); |
86 | SITE(read_cr2); | ||
87 | SITE(read_cr3); | ||
88 | SITE(write_cr3); | ||
89 | SITE(clts); | ||
90 | SITE(read_tsc); | ||
91 | #undef SITE | ||
92 | |||
93 | patch_site: | ||
94 | ret = paravirt_patch_insns(insns, len, start, end); | ||
95 | break; | ||
91 | 96 | ||
92 | memcpy(insns, native_insns[type].start, insn_len); | 97 | case PARAVIRT_PATCH(make_pgd): |
93 | return insn_len; | 98 | case PARAVIRT_PATCH(make_pte): |
94 | } | 99 | case PARAVIRT_PATCH(pgd_val): |
100 | case PARAVIRT_PATCH(pte_val): | ||
101 | #ifdef CONFIG_X86_PAE | ||
102 | case PARAVIRT_PATCH(make_pmd): | ||
103 | case PARAVIRT_PATCH(pmd_val): | ||
104 | #endif | ||
105 | /* These functions end up returning exactly what | ||
106 | they're passed, in the same registers. */ | ||
107 | ret = paravirt_patch_nop(); | ||
108 | break; | ||
95 | 109 | ||
96 | static unsigned long native_get_debugreg(int regno) | ||
97 | { | ||
98 | unsigned long val = 0; /* Damn you, gcc! */ | ||
99 | |||
100 | switch (regno) { | ||
101 | case 0: | ||
102 | asm("movl %%db0, %0" :"=r" (val)); break; | ||
103 | case 1: | ||
104 | asm("movl %%db1, %0" :"=r" (val)); break; | ||
105 | case 2: | ||
106 | asm("movl %%db2, %0" :"=r" (val)); break; | ||
107 | case 3: | ||
108 | asm("movl %%db3, %0" :"=r" (val)); break; | ||
109 | case 6: | ||
110 | asm("movl %%db6, %0" :"=r" (val)); break; | ||
111 | case 7: | ||
112 | asm("movl %%db7, %0" :"=r" (val)); break; | ||
113 | default: | 110 | default: |
114 | BUG(); | 111 | ret = paravirt_patch_default(type, clobbers, insns, len); |
115 | } | ||
116 | return val; | ||
117 | } | ||
118 | |||
119 | static void native_set_debugreg(int regno, unsigned long value) | ||
120 | { | ||
121 | switch (regno) { | ||
122 | case 0: | ||
123 | asm("movl %0,%%db0" : /* no output */ :"r" (value)); | ||
124 | break; | ||
125 | case 1: | ||
126 | asm("movl %0,%%db1" : /* no output */ :"r" (value)); | ||
127 | break; | ||
128 | case 2: | ||
129 | asm("movl %0,%%db2" : /* no output */ :"r" (value)); | ||
130 | break; | 112 | break; |
131 | case 3: | ||
132 | asm("movl %0,%%db3" : /* no output */ :"r" (value)); | ||
133 | break; | ||
134 | case 6: | ||
135 | asm("movl %0,%%db6" : /* no output */ :"r" (value)); | ||
136 | break; | ||
137 | case 7: | ||
138 | asm("movl %0,%%db7" : /* no output */ :"r" (value)); | ||
139 | break; | ||
140 | default: | ||
141 | BUG(); | ||
142 | } | 113 | } |
143 | } | ||
144 | |||
145 | void init_IRQ(void) | ||
146 | { | ||
147 | paravirt_ops.init_IRQ(); | ||
148 | } | ||
149 | |||
150 | static void native_clts(void) | ||
151 | { | ||
152 | asm volatile ("clts"); | ||
153 | } | ||
154 | |||
155 | static unsigned long native_read_cr0(void) | ||
156 | { | ||
157 | unsigned long val; | ||
158 | asm volatile("movl %%cr0,%0\n\t" :"=r" (val)); | ||
159 | return val; | ||
160 | } | ||
161 | |||
162 | static void native_write_cr0(unsigned long val) | ||
163 | { | ||
164 | asm volatile("movl %0,%%cr0": :"r" (val)); | ||
165 | } | ||
166 | |||
167 | static unsigned long native_read_cr2(void) | ||
168 | { | ||
169 | unsigned long val; | ||
170 | asm volatile("movl %%cr2,%0\n\t" :"=r" (val)); | ||
171 | return val; | ||
172 | } | ||
173 | |||
174 | static void native_write_cr2(unsigned long val) | ||
175 | { | ||
176 | asm volatile("movl %0,%%cr2": :"r" (val)); | ||
177 | } | ||
178 | |||
179 | static unsigned long native_read_cr3(void) | ||
180 | { | ||
181 | unsigned long val; | ||
182 | asm volatile("movl %%cr3,%0\n\t" :"=r" (val)); | ||
183 | return val; | ||
184 | } | ||
185 | |||
186 | static void native_write_cr3(unsigned long val) | ||
187 | { | ||
188 | asm volatile("movl %0,%%cr3": :"r" (val)); | ||
189 | } | ||
190 | |||
191 | static unsigned long native_read_cr4(void) | ||
192 | { | ||
193 | unsigned long val; | ||
194 | asm volatile("movl %%cr4,%0\n\t" :"=r" (val)); | ||
195 | return val; | ||
196 | } | ||
197 | |||
198 | static unsigned long native_read_cr4_safe(void) | ||
199 | { | ||
200 | unsigned long val; | ||
201 | /* This could fault if %cr4 does not exist */ | ||
202 | asm("1: movl %%cr4, %0 \n" | ||
203 | "2: \n" | ||
204 | ".section __ex_table,\"a\" \n" | ||
205 | ".long 1b,2b \n" | ||
206 | ".previous \n" | ||
207 | : "=r" (val): "0" (0)); | ||
208 | return val; | ||
209 | } | ||
210 | |||
211 | static void native_write_cr4(unsigned long val) | ||
212 | { | ||
213 | asm volatile("movl %0,%%cr4": :"r" (val)); | ||
214 | } | ||
215 | |||
216 | static unsigned long native_save_fl(void) | ||
217 | { | ||
218 | unsigned long f; | ||
219 | asm volatile("pushfl ; popl %0":"=g" (f): /* no input */); | ||
220 | return f; | ||
221 | } | ||
222 | |||
223 | static void native_restore_fl(unsigned long f) | ||
224 | { | ||
225 | asm volatile("pushl %0 ; popfl": /* no output */ | ||
226 | :"g" (f) | ||
227 | :"memory", "cc"); | ||
228 | } | ||
229 | |||
230 | static void native_irq_disable(void) | ||
231 | { | ||
232 | asm volatile("cli": : :"memory"); | ||
233 | } | ||
234 | |||
235 | static void native_irq_enable(void) | ||
236 | { | ||
237 | asm volatile("sti": : :"memory"); | ||
238 | } | ||
239 | |||
240 | static void native_safe_halt(void) | ||
241 | { | ||
242 | asm volatile("sti; hlt": : :"memory"); | ||
243 | } | ||
244 | 114 | ||
245 | static void native_halt(void) | 115 | return ret; |
246 | { | ||
247 | asm volatile("hlt": : :"memory"); | ||
248 | } | 116 | } |
249 | 117 | ||
250 | static void native_wbinvd(void) | 118 | unsigned paravirt_patch_nop(void) |
251 | { | 119 | { |
252 | asm volatile("wbinvd": : :"memory"); | 120 | return 0; |
253 | } | 121 | } |
254 | 122 | ||
255 | static unsigned long long native_read_msr(unsigned int msr, int *err) | 123 | unsigned paravirt_patch_ignore(unsigned len) |
256 | { | 124 | { |
257 | unsigned long long val; | 125 | return len; |
258 | |||
259 | asm volatile("2: rdmsr ; xorl %0,%0\n" | ||
260 | "1:\n\t" | ||
261 | ".section .fixup,\"ax\"\n\t" | ||
262 | "3: movl %3,%0 ; jmp 1b\n\t" | ||
263 | ".previous\n\t" | ||
264 | ".section __ex_table,\"a\"\n" | ||
265 | " .align 4\n\t" | ||
266 | " .long 2b,3b\n\t" | ||
267 | ".previous" | ||
268 | : "=r" (*err), "=A" (val) | ||
269 | : "c" (msr), "i" (-EFAULT)); | ||
270 | |||
271 | return val; | ||
272 | } | 126 | } |
273 | 127 | ||
274 | static int native_write_msr(unsigned int msr, unsigned long long val) | 128 | unsigned paravirt_patch_call(void *target, u16 tgt_clobbers, |
129 | void *site, u16 site_clobbers, | ||
130 | unsigned len) | ||
275 | { | 131 | { |
276 | int err; | 132 | unsigned char *call = site; |
277 | asm volatile("2: wrmsr ; xorl %0,%0\n" | 133 | unsigned long delta = (unsigned long)target - (unsigned long)(call+5); |
278 | "1:\n\t" | ||
279 | ".section .fixup,\"ax\"\n\t" | ||
280 | "3: movl %4,%0 ; jmp 1b\n\t" | ||
281 | ".previous\n\t" | ||
282 | ".section __ex_table,\"a\"\n" | ||
283 | " .align 4\n\t" | ||
284 | " .long 2b,3b\n\t" | ||
285 | ".previous" | ||
286 | : "=a" (err) | ||
287 | : "c" (msr), "0" ((u32)val), "d" ((u32)(val>>32)), | ||
288 | "i" (-EFAULT)); | ||
289 | return err; | ||
290 | } | ||
291 | 134 | ||
292 | static unsigned long long native_read_tsc(void) | 135 | if (tgt_clobbers & ~site_clobbers) |
293 | { | 136 | return len; /* target would clobber too much for this site */ |
294 | unsigned long long val; | 137 | if (len < 5) |
295 | asm volatile("rdtsc" : "=A" (val)); | 138 | return len; /* call too long for patch site */ |
296 | return val; | ||
297 | } | ||
298 | 139 | ||
299 | static unsigned long long native_read_pmc(void) | 140 | *call++ = 0xe8; /* call */ |
300 | { | 141 | *(unsigned long *)call = delta; |
301 | unsigned long long val; | ||
302 | asm volatile("rdpmc" : "=A" (val)); | ||
303 | return val; | ||
304 | } | ||
305 | 142 | ||
306 | static void native_load_tr_desc(void) | 143 | return 5; |
307 | { | ||
308 | asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); | ||
309 | } | 144 | } |
310 | 145 | ||
311 | static void native_load_gdt(const struct Xgt_desc_struct *dtr) | 146 | unsigned paravirt_patch_jmp(void *target, void *site, unsigned len) |
312 | { | 147 | { |
313 | asm volatile("lgdt %0"::"m" (*dtr)); | 148 | unsigned char *jmp = site; |
314 | } | 149 | unsigned long delta = (unsigned long)target - (unsigned long)(jmp+5); |
315 | 150 | ||
316 | static void native_load_idt(const struct Xgt_desc_struct *dtr) | 151 | if (len < 5) |
317 | { | 152 | return len; /* call too long for patch site */ |
318 | asm volatile("lidt %0"::"m" (*dtr)); | ||
319 | } | ||
320 | 153 | ||
321 | static void native_store_gdt(struct Xgt_desc_struct *dtr) | 154 | *jmp++ = 0xe9; /* jmp */ |
322 | { | 155 | *(unsigned long *)jmp = delta; |
323 | asm ("sgdt %0":"=m" (*dtr)); | ||
324 | } | ||
325 | 156 | ||
326 | static void native_store_idt(struct Xgt_desc_struct *dtr) | 157 | return 5; |
327 | { | ||
328 | asm ("sidt %0":"=m" (*dtr)); | ||
329 | } | 158 | } |
330 | 159 | ||
331 | static unsigned long native_store_tr(void) | 160 | unsigned paravirt_patch_default(u8 type, u16 clobbers, void *site, unsigned len) |
332 | { | 161 | { |
333 | unsigned long tr; | 162 | void *opfunc = *((void **)¶virt_ops + type); |
334 | asm ("str %0":"=r" (tr)); | 163 | unsigned ret; |
335 | return tr; | ||
336 | } | ||
337 | 164 | ||
338 | static void native_load_tls(struct thread_struct *t, unsigned int cpu) | 165 | if (opfunc == NULL) |
339 | { | 166 | /* If there's no function, patch it with a ud2a (BUG) */ |
340 | #define C(i) get_cpu_gdt_table(cpu)[GDT_ENTRY_TLS_MIN + i] = t->tls_array[i] | 167 | ret = paravirt_patch_insns(site, len, start_ud2a, end_ud2a); |
341 | C(0); C(1); C(2); | 168 | else if (opfunc == paravirt_nop) |
342 | #undef C | 169 | /* If the operation is a nop, then nop the callsite */ |
343 | } | 170 | ret = paravirt_patch_nop(); |
171 | else if (type == PARAVIRT_PATCH(iret) || | ||
172 | type == PARAVIRT_PATCH(irq_enable_sysexit)) | ||
173 | /* If operation requires a jmp, then jmp */ | ||
174 | ret = paravirt_patch_jmp(opfunc, site, len); | ||
175 | else | ||
176 | /* Otherwise call the function; assume target could | ||
177 | clobber any caller-save reg */ | ||
178 | ret = paravirt_patch_call(opfunc, CLBR_ANY, | ||
179 | site, clobbers, len); | ||
344 | 180 | ||
345 | static inline void native_write_dt_entry(void *dt, int entry, u32 entry_low, u32 entry_high) | 181 | return ret; |
346 | { | ||
347 | u32 *lp = (u32 *)((char *)dt + entry*8); | ||
348 | lp[0] = entry_low; | ||
349 | lp[1] = entry_high; | ||
350 | } | 182 | } |
351 | 183 | ||
352 | static void native_write_ldt_entry(void *dt, int entrynum, u32 low, u32 high) | 184 | unsigned paravirt_patch_insns(void *site, unsigned len, |
185 | const char *start, const char *end) | ||
353 | { | 186 | { |
354 | native_write_dt_entry(dt, entrynum, low, high); | 187 | unsigned insn_len = end - start; |
355 | } | ||
356 | 188 | ||
357 | static void native_write_gdt_entry(void *dt, int entrynum, u32 low, u32 high) | 189 | if (insn_len > len || start == NULL) |
358 | { | 190 | insn_len = len; |
359 | native_write_dt_entry(dt, entrynum, low, high); | 191 | else |
360 | } | 192 | memcpy(site, start, insn_len); |
361 | |||
362 | static void native_write_idt_entry(void *dt, int entrynum, u32 low, u32 high) | ||
363 | { | ||
364 | native_write_dt_entry(dt, entrynum, low, high); | ||
365 | } | ||
366 | 193 | ||
367 | static void native_load_esp0(struct tss_struct *tss, | 194 | return insn_len; |
368 | struct thread_struct *thread) | ||
369 | { | ||
370 | tss->esp0 = thread->esp0; | ||
371 | |||
372 | /* This can only happen when SEP is enabled, no need to test "SEP"arately */ | ||
373 | if (unlikely(tss->ss1 != thread->sysenter_cs)) { | ||
374 | tss->ss1 = thread->sysenter_cs; | ||
375 | wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); | ||
376 | } | ||
377 | } | 195 | } |
378 | 196 | ||
379 | static void native_io_delay(void) | 197 | void init_IRQ(void) |
380 | { | 198 | { |
381 | asm volatile("outb %al,$0x80"); | 199 | paravirt_ops.init_IRQ(); |
382 | } | 200 | } |
383 | 201 | ||
384 | static void native_flush_tlb(void) | 202 | static void native_flush_tlb(void) |
@@ -395,83 +213,11 @@ static void native_flush_tlb_global(void) | |||
395 | __native_flush_tlb_global(); | 213 | __native_flush_tlb_global(); |
396 | } | 214 | } |
397 | 215 | ||
398 | static void native_flush_tlb_single(u32 addr) | 216 | static void native_flush_tlb_single(unsigned long addr) |
399 | { | 217 | { |
400 | __native_flush_tlb_single(addr); | 218 | __native_flush_tlb_single(addr); |
401 | } | 219 | } |
402 | 220 | ||
403 | #ifndef CONFIG_X86_PAE | ||
404 | static void native_set_pte(pte_t *ptep, pte_t pteval) | ||
405 | { | ||
406 | *ptep = pteval; | ||
407 | } | ||
408 | |||
409 | static void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval) | ||
410 | { | ||
411 | *ptep = pteval; | ||
412 | } | ||
413 | |||
414 | static void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) | ||
415 | { | ||
416 | *pmdp = pmdval; | ||
417 | } | ||
418 | |||
419 | #else /* CONFIG_X86_PAE */ | ||
420 | |||
421 | static void native_set_pte(pte_t *ptep, pte_t pte) | ||
422 | { | ||
423 | ptep->pte_high = pte.pte_high; | ||
424 | smp_wmb(); | ||
425 | ptep->pte_low = pte.pte_low; | ||
426 | } | ||
427 | |||
428 | static void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte) | ||
429 | { | ||
430 | ptep->pte_high = pte.pte_high; | ||
431 | smp_wmb(); | ||
432 | ptep->pte_low = pte.pte_low; | ||
433 | } | ||
434 | |||
435 | static void native_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) | ||
436 | { | ||
437 | ptep->pte_low = 0; | ||
438 | smp_wmb(); | ||
439 | ptep->pte_high = pte.pte_high; | ||
440 | smp_wmb(); | ||
441 | ptep->pte_low = pte.pte_low; | ||
442 | } | ||
443 | |||
444 | static void native_set_pte_atomic(pte_t *ptep, pte_t pteval) | ||
445 | { | ||
446 | set_64bit((unsigned long long *)ptep,pte_val(pteval)); | ||
447 | } | ||
448 | |||
449 | static void native_set_pmd(pmd_t *pmdp, pmd_t pmdval) | ||
450 | { | ||
451 | set_64bit((unsigned long long *)pmdp,pmd_val(pmdval)); | ||
452 | } | ||
453 | |||
454 | static void native_set_pud(pud_t *pudp, pud_t pudval) | ||
455 | { | ||
456 | *pudp = pudval; | ||
457 | } | ||
458 | |||
459 | static void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | ||
460 | { | ||
461 | ptep->pte_low = 0; | ||
462 | smp_wmb(); | ||
463 | ptep->pte_high = 0; | ||
464 | } | ||
465 | |||
466 | static void native_pmd_clear(pmd_t *pmd) | ||
467 | { | ||
468 | u32 *tmp = (u32 *)pmd; | ||
469 | *tmp = 0; | ||
470 | smp_wmb(); | ||
471 | *(tmp + 1) = 0; | ||
472 | } | ||
473 | #endif /* CONFIG_X86_PAE */ | ||
474 | |||
475 | /* These are in entry.S */ | 221 | /* These are in entry.S */ |
476 | extern void native_iret(void); | 222 | extern void native_iret(void); |
477 | extern void native_irq_enable_sysexit(void); | 223 | extern void native_irq_enable_sysexit(void); |
@@ -487,10 +233,11 @@ struct paravirt_ops paravirt_ops = { | |||
487 | .name = "bare hardware", | 233 | .name = "bare hardware", |
488 | .paravirt_enabled = 0, | 234 | .paravirt_enabled = 0, |
489 | .kernel_rpl = 0, | 235 | .kernel_rpl = 0, |
236 | .shared_kernel_pmd = 1, /* Only used when CONFIG_X86_PAE is set */ | ||
490 | 237 | ||
491 | .patch = native_patch, | 238 | .patch = native_patch, |
492 | .banner = default_banner, | 239 | .banner = default_banner, |
493 | .arch_setup = native_nop, | 240 | .arch_setup = paravirt_nop, |
494 | .memory_setup = machine_specific_memory_setup, | 241 | .memory_setup = machine_specific_memory_setup, |
495 | .get_wallclock = native_get_wallclock, | 242 | .get_wallclock = native_get_wallclock, |
496 | .set_wallclock = native_set_wallclock, | 243 | .set_wallclock = native_set_wallclock, |
@@ -517,8 +264,8 @@ struct paravirt_ops paravirt_ops = { | |||
517 | .safe_halt = native_safe_halt, | 264 | .safe_halt = native_safe_halt, |
518 | .halt = native_halt, | 265 | .halt = native_halt, |
519 | .wbinvd = native_wbinvd, | 266 | .wbinvd = native_wbinvd, |
520 | .read_msr = native_read_msr, | 267 | .read_msr = native_read_msr_safe, |
521 | .write_msr = native_write_msr, | 268 | .write_msr = native_write_msr_safe, |
522 | .read_tsc = native_read_tsc, | 269 | .read_tsc = native_read_tsc, |
523 | .read_pmc = native_read_pmc, | 270 | .read_pmc = native_read_pmc, |
524 | .get_scheduled_cycles = native_read_tsc, | 271 | .get_scheduled_cycles = native_read_tsc, |
@@ -531,9 +278,9 @@ struct paravirt_ops paravirt_ops = { | |||
531 | .store_idt = native_store_idt, | 278 | .store_idt = native_store_idt, |
532 | .store_tr = native_store_tr, | 279 | .store_tr = native_store_tr, |
533 | .load_tls = native_load_tls, | 280 | .load_tls = native_load_tls, |
534 | .write_ldt_entry = native_write_ldt_entry, | 281 | .write_ldt_entry = write_dt_entry, |
535 | .write_gdt_entry = native_write_gdt_entry, | 282 | .write_gdt_entry = write_dt_entry, |
536 | .write_idt_entry = native_write_idt_entry, | 283 | .write_idt_entry = write_dt_entry, |
537 | .load_esp0 = native_load_esp0, | 284 | .load_esp0 = native_load_esp0, |
538 | 285 | ||
539 | .set_iopl_mask = native_set_iopl_mask, | 286 | .set_iopl_mask = native_set_iopl_mask, |
@@ -545,44 +292,57 @@ struct paravirt_ops paravirt_ops = { | |||
545 | .apic_read = native_apic_read, | 292 | .apic_read = native_apic_read, |
546 | .setup_boot_clock = setup_boot_APIC_clock, | 293 | .setup_boot_clock = setup_boot_APIC_clock, |
547 | .setup_secondary_clock = setup_secondary_APIC_clock, | 294 | .setup_secondary_clock = setup_secondary_APIC_clock, |
295 | .startup_ipi_hook = paravirt_nop, | ||
548 | #endif | 296 | #endif |
549 | .set_lazy_mode = (void *)native_nop, | 297 | .set_lazy_mode = paravirt_nop, |
298 | |||
299 | .pagetable_setup_start = native_pagetable_setup_start, | ||
300 | .pagetable_setup_done = native_pagetable_setup_done, | ||
550 | 301 | ||
551 | .flush_tlb_user = native_flush_tlb, | 302 | .flush_tlb_user = native_flush_tlb, |
552 | .flush_tlb_kernel = native_flush_tlb_global, | 303 | .flush_tlb_kernel = native_flush_tlb_global, |
553 | .flush_tlb_single = native_flush_tlb_single, | 304 | .flush_tlb_single = native_flush_tlb_single, |
305 | .flush_tlb_others = native_flush_tlb_others, | ||
554 | 306 | ||
555 | .map_pt_hook = (void *)native_nop, | 307 | .alloc_pt = paravirt_nop, |
556 | 308 | .alloc_pd = paravirt_nop, | |
557 | .alloc_pt = (void *)native_nop, | 309 | .alloc_pd_clone = paravirt_nop, |
558 | .alloc_pd = (void *)native_nop, | 310 | .release_pt = paravirt_nop, |
559 | .alloc_pd_clone = (void *)native_nop, | 311 | .release_pd = paravirt_nop, |
560 | .release_pt = (void *)native_nop, | ||
561 | .release_pd = (void *)native_nop, | ||
562 | 312 | ||
563 | .set_pte = native_set_pte, | 313 | .set_pte = native_set_pte, |
564 | .set_pte_at = native_set_pte_at, | 314 | .set_pte_at = native_set_pte_at, |
565 | .set_pmd = native_set_pmd, | 315 | .set_pmd = native_set_pmd, |
566 | .pte_update = (void *)native_nop, | 316 | .pte_update = paravirt_nop, |
567 | .pte_update_defer = (void *)native_nop, | 317 | .pte_update_defer = paravirt_nop, |
318 | |||
319 | #ifdef CONFIG_HIGHPTE | ||
320 | .kmap_atomic_pte = kmap_atomic, | ||
321 | #endif | ||
322 | |||
568 | #ifdef CONFIG_X86_PAE | 323 | #ifdef CONFIG_X86_PAE |
569 | .set_pte_atomic = native_set_pte_atomic, | 324 | .set_pte_atomic = native_set_pte_atomic, |
570 | .set_pte_present = native_set_pte_present, | 325 | .set_pte_present = native_set_pte_present, |
571 | .set_pud = native_set_pud, | 326 | .set_pud = native_set_pud, |
572 | .pte_clear = native_pte_clear, | 327 | .pte_clear = native_pte_clear, |
573 | .pmd_clear = native_pmd_clear, | 328 | .pmd_clear = native_pmd_clear, |
329 | |||
330 | .pmd_val = native_pmd_val, | ||
331 | .make_pmd = native_make_pmd, | ||
574 | #endif | 332 | #endif |
575 | 333 | ||
334 | .pte_val = native_pte_val, | ||
335 | .pgd_val = native_pgd_val, | ||
336 | |||
337 | .make_pte = native_make_pte, | ||
338 | .make_pgd = native_make_pgd, | ||
339 | |||
576 | .irq_enable_sysexit = native_irq_enable_sysexit, | 340 | .irq_enable_sysexit = native_irq_enable_sysexit, |
577 | .iret = native_iret, | 341 | .iret = native_iret, |
578 | 342 | ||
579 | .startup_ipi_hook = (void *)native_nop, | 343 | .dup_mmap = paravirt_nop, |
344 | .exit_mmap = paravirt_nop, | ||
345 | .activate_mm = paravirt_nop, | ||
580 | }; | 346 | }; |
581 | 347 | ||
582 | /* | 348 | EXPORT_SYMBOL(paravirt_ops); |
583 | * NOTE: CONFIG_PARAVIRT is experimental and the paravirt_ops | ||
584 | * semantics are subject to change. Hence we only do this | ||
585 | * internal-only export of this, until it gets sorted out and | ||
586 | * all lowlevel CPU ops used by modules are separately exported. | ||
587 | */ | ||
588 | EXPORT_SYMBOL_GPL(paravirt_ops); | ||
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 393a67d5d943..61999479b7a4 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c | |||
@@ -39,6 +39,7 @@ | |||
39 | #include <linux/random.h> | 39 | #include <linux/random.h> |
40 | #include <linux/personality.h> | 40 | #include <linux/personality.h> |
41 | #include <linux/tick.h> | 41 | #include <linux/tick.h> |
42 | #include <linux/percpu.h> | ||
42 | 43 | ||
43 | #include <asm/uaccess.h> | 44 | #include <asm/uaccess.h> |
44 | #include <asm/pgtable.h> | 45 | #include <asm/pgtable.h> |
@@ -57,7 +58,6 @@ | |||
57 | 58 | ||
58 | #include <asm/tlbflush.h> | 59 | #include <asm/tlbflush.h> |
59 | #include <asm/cpu.h> | 60 | #include <asm/cpu.h> |
60 | #include <asm/pda.h> | ||
61 | 61 | ||
62 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); | 62 | asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); |
63 | 63 | ||
@@ -66,6 +66,12 @@ static int hlt_counter; | |||
66 | unsigned long boot_option_idle_override = 0; | 66 | unsigned long boot_option_idle_override = 0; |
67 | EXPORT_SYMBOL(boot_option_idle_override); | 67 | EXPORT_SYMBOL(boot_option_idle_override); |
68 | 68 | ||
69 | DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; | ||
70 | EXPORT_PER_CPU_SYMBOL(current_task); | ||
71 | |||
72 | DEFINE_PER_CPU(int, cpu_number); | ||
73 | EXPORT_PER_CPU_SYMBOL(cpu_number); | ||
74 | |||
69 | /* | 75 | /* |
70 | * Return saved PC of a blocked thread. | 76 | * Return saved PC of a blocked thread. |
71 | */ | 77 | */ |
@@ -272,25 +278,24 @@ void __devinit select_idle_routine(const struct cpuinfo_x86 *c) | |||
272 | } | 278 | } |
273 | } | 279 | } |
274 | 280 | ||
275 | static int __init idle_setup (char *str) | 281 | static int __init idle_setup(char *str) |
276 | { | 282 | { |
277 | if (!strncmp(str, "poll", 4)) { | 283 | if (!strcmp(str, "poll")) { |
278 | printk("using polling idle threads.\n"); | 284 | printk("using polling idle threads.\n"); |
279 | pm_idle = poll_idle; | 285 | pm_idle = poll_idle; |
280 | #ifdef CONFIG_X86_SMP | 286 | #ifdef CONFIG_X86_SMP |
281 | if (smp_num_siblings > 1) | 287 | if (smp_num_siblings > 1) |
282 | printk("WARNING: polling idle and HT enabled, performance may degrade.\n"); | 288 | printk("WARNING: polling idle and HT enabled, performance may degrade.\n"); |
283 | #endif | 289 | #endif |
284 | } else if (!strncmp(str, "halt", 4)) { | 290 | } else if (!strcmp(str, "mwait")) |
285 | printk("using halt in idle threads.\n"); | 291 | force_mwait = 1; |
286 | pm_idle = default_idle; | 292 | else |
287 | } | 293 | return -1; |
288 | 294 | ||
289 | boot_option_idle_override = 1; | 295 | boot_option_idle_override = 1; |
290 | return 1; | 296 | return 0; |
291 | } | 297 | } |
292 | 298 | early_param("idle", idle_setup); | |
293 | __setup("idle=", idle_setup); | ||
294 | 299 | ||
295 | void show_regs(struct pt_regs * regs) | 300 | void show_regs(struct pt_regs * regs) |
296 | { | 301 | { |
@@ -343,7 +348,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) | |||
343 | 348 | ||
344 | regs.xds = __USER_DS; | 349 | regs.xds = __USER_DS; |
345 | regs.xes = __USER_DS; | 350 | regs.xes = __USER_DS; |
346 | regs.xfs = __KERNEL_PDA; | 351 | regs.xfs = __KERNEL_PERCPU; |
347 | regs.orig_eax = -1; | 352 | regs.orig_eax = -1; |
348 | regs.eip = (unsigned long) kernel_thread_helper; | 353 | regs.eip = (unsigned long) kernel_thread_helper; |
349 | regs.xcs = __KERNEL_CS | get_kernel_rpl(); | 354 | regs.xcs = __KERNEL_CS | get_kernel_rpl(); |
@@ -376,7 +381,7 @@ void exit_thread(void) | |||
376 | t->io_bitmap_max = 0; | 381 | t->io_bitmap_max = 0; |
377 | tss->io_bitmap_owner = NULL; | 382 | tss->io_bitmap_owner = NULL; |
378 | tss->io_bitmap_max = 0; | 383 | tss->io_bitmap_max = 0; |
379 | tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; | 384 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; |
380 | put_cpu(); | 385 | put_cpu(); |
381 | } | 386 | } |
382 | } | 387 | } |
@@ -555,7 +560,7 @@ static noinline void __switch_to_xtra(struct task_struct *next_p, | |||
555 | * Disable the bitmap via an invalid offset. We still cache | 560 | * Disable the bitmap via an invalid offset. We still cache |
556 | * the previous bitmap owner and the IO bitmap contents: | 561 | * the previous bitmap owner and the IO bitmap contents: |
557 | */ | 562 | */ |
558 | tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET; | 563 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET; |
559 | return; | 564 | return; |
560 | } | 565 | } |
561 | 566 | ||
@@ -565,7 +570,7 @@ static noinline void __switch_to_xtra(struct task_struct *next_p, | |||
565 | * matches the next task, we dont have to do anything but | 570 | * matches the next task, we dont have to do anything but |
566 | * to set a valid offset in the TSS: | 571 | * to set a valid offset in the TSS: |
567 | */ | 572 | */ |
568 | tss->io_bitmap_base = IO_BITMAP_OFFSET; | 573 | tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; |
569 | return; | 574 | return; |
570 | } | 575 | } |
571 | /* | 576 | /* |
@@ -577,7 +582,7 @@ static noinline void __switch_to_xtra(struct task_struct *next_p, | |||
577 | * redundant copies when the currently switched task does not | 582 | * redundant copies when the currently switched task does not |
578 | * perform any I/O during its timeslice. | 583 | * perform any I/O during its timeslice. |
579 | */ | 584 | */ |
580 | tss->io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; | 585 | tss->x86_tss.io_bitmap_base = INVALID_IO_BITMAP_OFFSET_LAZY; |
581 | } | 586 | } |
582 | 587 | ||
583 | /* | 588 | /* |
@@ -712,7 +717,7 @@ struct task_struct fastcall * __switch_to(struct task_struct *prev_p, struct tas | |||
712 | if (prev->gs | next->gs) | 717 | if (prev->gs | next->gs) |
713 | loadsegment(gs, next->gs); | 718 | loadsegment(gs, next->gs); |
714 | 719 | ||
715 | write_pda(pcurrent, next_p); | 720 | x86_write_percpu(current_task, next_p); |
716 | 721 | ||
717 | return prev_p; | 722 | return prev_p; |
718 | } | 723 | } |
diff --git a/arch/i386/kernel/quirks.c b/arch/i386/kernel/quirks.c index 34874c398b44..9f6ab1789bb0 100644 --- a/arch/i386/kernel/quirks.c +++ b/arch/i386/kernel/quirks.c | |||
@@ -3,12 +3,10 @@ | |||
3 | */ | 3 | */ |
4 | #include <linux/pci.h> | 4 | #include <linux/pci.h> |
5 | #include <linux/irq.h> | 5 | #include <linux/irq.h> |
6 | #include <asm/pci-direct.h> | ||
7 | #include <asm/genapic.h> | ||
8 | #include <asm/cpu.h> | ||
9 | 6 | ||
10 | #if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI) | 7 | #if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI) |
11 | static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev) | 8 | |
9 | static void __devinit quirk_intel_irqbalance(struct pci_dev *dev) | ||
12 | { | 10 | { |
13 | u8 config, rev; | 11 | u8 config, rev; |
14 | u32 word; | 12 | u32 word; |
@@ -16,12 +14,14 @@ static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev) | |||
16 | /* BIOS may enable hardware IRQ balancing for | 14 | /* BIOS may enable hardware IRQ balancing for |
17 | * E7520/E7320/E7525(revision ID 0x9 and below) | 15 | * E7520/E7320/E7525(revision ID 0x9 and below) |
18 | * based platforms. | 16 | * based platforms. |
19 | * For those platforms, make sure that the genapic is set to 'flat' | 17 | * Disable SW irqbalance/affinity on those platforms. |
20 | */ | 18 | */ |
21 | pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); | 19 | pci_read_config_byte(dev, PCI_CLASS_REVISION, &rev); |
22 | if (rev > 0x9) | 20 | if (rev > 0x9) |
23 | return; | 21 | return; |
24 | 22 | ||
23 | printk(KERN_INFO "Intel E7520/7320/7525 detected."); | ||
24 | |||
25 | /* enable access to config space*/ | 25 | /* enable access to config space*/ |
26 | pci_read_config_byte(dev, 0xf4, &config); | 26 | pci_read_config_byte(dev, 0xf4, &config); |
27 | pci_write_config_byte(dev, 0xf4, config|0x2); | 27 | pci_write_config_byte(dev, 0xf4, config|0x2); |
@@ -30,44 +30,6 @@ static void __devinit verify_quirk_intel_irqbalance(struct pci_dev *dev) | |||
30 | raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word); | 30 | raw_pci_ops->read(0, 0, 0x40, 0x4c, 2, &word); |
31 | 31 | ||
32 | if (!(word & (1 << 13))) { | 32 | if (!(word & (1 << 13))) { |
33 | #ifdef CONFIG_X86_64 | ||
34 | if (genapic != &apic_flat) | ||
35 | panic("APIC mode must be flat on this system\n"); | ||
36 | #elif defined(CONFIG_X86_GENERICARCH) | ||
37 | if (genapic != &apic_default) | ||
38 | panic("APIC mode must be default(flat) on this system. Use apic=default\n"); | ||
39 | #endif | ||
40 | } | ||
41 | |||
42 | /* put back the original value for config space*/ | ||
43 | if (!(config & 0x2)) | ||
44 | pci_write_config_byte(dev, 0xf4, config); | ||
45 | } | ||
46 | |||
47 | void __init quirk_intel_irqbalance(void) | ||
48 | { | ||
49 | u8 config, rev; | ||
50 | u32 word; | ||
51 | |||
52 | /* BIOS may enable hardware IRQ balancing for | ||
53 | * E7520/E7320/E7525(revision ID 0x9 and below) | ||
54 | * based platforms. | ||
55 | * Disable SW irqbalance/affinity on those platforms. | ||
56 | */ | ||
57 | rev = read_pci_config_byte(0, 0, 0, PCI_CLASS_REVISION); | ||
58 | if (rev > 0x9) | ||
59 | return; | ||
60 | |||
61 | printk(KERN_INFO "Intel E7520/7320/7525 detected."); | ||
62 | |||
63 | /* enable access to config space */ | ||
64 | config = read_pci_config_byte(0, 0, 0, 0xf4); | ||
65 | write_pci_config_byte(0, 0, 0, 0xf4, config|0x2); | ||
66 | |||
67 | /* read xTPR register */ | ||
68 | word = read_pci_config_16(0, 0, 0x40, 0x4c); | ||
69 | |||
70 | if (!(word & (1 << 13))) { | ||
71 | printk(KERN_INFO "Disabling irq balancing and affinity\n"); | 33 | printk(KERN_INFO "Disabling irq balancing and affinity\n"); |
72 | #ifdef CONFIG_IRQBALANCE | 34 | #ifdef CONFIG_IRQBALANCE |
73 | irqbalance_disable(""); | 35 | irqbalance_disable(""); |
@@ -76,24 +38,13 @@ void __init quirk_intel_irqbalance(void) | |||
76 | #ifdef CONFIG_PROC_FS | 38 | #ifdef CONFIG_PROC_FS |
77 | no_irq_affinity = 1; | 39 | no_irq_affinity = 1; |
78 | #endif | 40 | #endif |
79 | #ifdef CONFIG_HOTPLUG_CPU | ||
80 | printk(KERN_INFO "Disabling cpu hotplug control\n"); | ||
81 | enable_cpu_hotplug = 0; | ||
82 | #endif | ||
83 | #ifdef CONFIG_X86_64 | ||
84 | /* force the genapic selection to flat mode so that | ||
85 | * interrupts can be redirected to more than one CPU. | ||
86 | */ | ||
87 | genapic_force = &apic_flat; | ||
88 | #endif | ||
89 | } | 41 | } |
90 | 42 | ||
91 | /* put back the original value for config space */ | 43 | /* put back the original value for config space*/ |
92 | if (!(config & 0x2)) | 44 | if (!(config & 0x2)) |
93 | write_pci_config_byte(0, 0, 0, 0xf4, config); | 45 | pci_write_config_byte(dev, 0xf4, config); |
94 | } | 46 | } |
95 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, verify_quirk_intel_irqbalance); | 47 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7320_MCH, quirk_intel_irqbalance); |
96 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, verify_quirk_intel_irqbalance); | 48 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7525_MCH, quirk_intel_irqbalance); |
97 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, verify_quirk_intel_irqbalance); | 49 | DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E7520_MCH, quirk_intel_irqbalance); |
98 | |||
99 | #endif | 50 | #endif |
diff --git a/arch/i386/kernel/reboot.c b/arch/i386/kernel/reboot.c index 3514b4153f7f..50dfc65319cd 100644 --- a/arch/i386/kernel/reboot.c +++ b/arch/i386/kernel/reboot.c | |||
@@ -17,7 +17,8 @@ | |||
17 | #include <asm/apic.h> | 17 | #include <asm/apic.h> |
18 | #include <asm/desc.h> | 18 | #include <asm/desc.h> |
19 | #include "mach_reboot.h" | 19 | #include "mach_reboot.h" |
20 | #include <linux/reboot_fixups.h> | 20 | #include <asm/reboot_fixups.h> |
21 | #include <asm/reboot.h> | ||
21 | 22 | ||
22 | /* | 23 | /* |
23 | * Power off function, if any | 24 | * Power off function, if any |
@@ -197,8 +198,6 @@ static unsigned char jump_to_bios [] = | |||
197 | */ | 198 | */ |
198 | void machine_real_restart(unsigned char *code, int length) | 199 | void machine_real_restart(unsigned char *code, int length) |
199 | { | 200 | { |
200 | unsigned long flags; | ||
201 | |||
202 | local_irq_disable(); | 201 | local_irq_disable(); |
203 | 202 | ||
204 | /* Write zero to CMOS register number 0x0f, which the BIOS POST | 203 | /* Write zero to CMOS register number 0x0f, which the BIOS POST |
@@ -211,9 +210,9 @@ void machine_real_restart(unsigned char *code, int length) | |||
211 | safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.) | 210 | safe side. (Yes, CMOS_WRITE does outb_p's. - Paul G.) |
212 | */ | 211 | */ |
213 | 212 | ||
214 | spin_lock_irqsave(&rtc_lock, flags); | 213 | spin_lock(&rtc_lock); |
215 | CMOS_WRITE(0x00, 0x8f); | 214 | CMOS_WRITE(0x00, 0x8f); |
216 | spin_unlock_irqrestore(&rtc_lock, flags); | 215 | spin_unlock(&rtc_lock); |
217 | 216 | ||
218 | /* Remap the kernel at virtual address zero, as well as offset zero | 217 | /* Remap the kernel at virtual address zero, as well as offset zero |
219 | from the kernel segment. This assumes the kernel segment starts at | 218 | from the kernel segment. This assumes the kernel segment starts at |
@@ -280,7 +279,7 @@ void machine_real_restart(unsigned char *code, int length) | |||
280 | EXPORT_SYMBOL(machine_real_restart); | 279 | EXPORT_SYMBOL(machine_real_restart); |
281 | #endif | 280 | #endif |
282 | 281 | ||
283 | void machine_shutdown(void) | 282 | static void native_machine_shutdown(void) |
284 | { | 283 | { |
285 | #ifdef CONFIG_SMP | 284 | #ifdef CONFIG_SMP |
286 | int reboot_cpu_id; | 285 | int reboot_cpu_id; |
@@ -316,7 +315,11 @@ void machine_shutdown(void) | |||
316 | #endif | 315 | #endif |
317 | } | 316 | } |
318 | 317 | ||
319 | void machine_emergency_restart(void) | 318 | void __attribute__((weak)) mach_reboot_fixups(void) |
319 | { | ||
320 | } | ||
321 | |||
322 | static void native_machine_emergency_restart(void) | ||
320 | { | 323 | { |
321 | if (!reboot_thru_bios) { | 324 | if (!reboot_thru_bios) { |
322 | if (efi_enabled) { | 325 | if (efi_enabled) { |
@@ -340,17 +343,17 @@ void machine_emergency_restart(void) | |||
340 | machine_real_restart(jump_to_bios, sizeof(jump_to_bios)); | 343 | machine_real_restart(jump_to_bios, sizeof(jump_to_bios)); |
341 | } | 344 | } |
342 | 345 | ||
343 | void machine_restart(char * __unused) | 346 | static void native_machine_restart(char * __unused) |
344 | { | 347 | { |
345 | machine_shutdown(); | 348 | machine_shutdown(); |
346 | machine_emergency_restart(); | 349 | machine_emergency_restart(); |
347 | } | 350 | } |
348 | 351 | ||
349 | void machine_halt(void) | 352 | static void native_machine_halt(void) |
350 | { | 353 | { |
351 | } | 354 | } |
352 | 355 | ||
353 | void machine_power_off(void) | 356 | static void native_machine_power_off(void) |
354 | { | 357 | { |
355 | if (pm_power_off) { | 358 | if (pm_power_off) { |
356 | machine_shutdown(); | 359 | machine_shutdown(); |
@@ -359,3 +362,35 @@ void machine_power_off(void) | |||
359 | } | 362 | } |
360 | 363 | ||
361 | 364 | ||
365 | struct machine_ops machine_ops = { | ||
366 | .power_off = native_machine_power_off, | ||
367 | .shutdown = native_machine_shutdown, | ||
368 | .emergency_restart = native_machine_emergency_restart, | ||
369 | .restart = native_machine_restart, | ||
370 | .halt = native_machine_halt, | ||
371 | }; | ||
372 | |||
373 | void machine_power_off(void) | ||
374 | { | ||
375 | machine_ops.power_off(); | ||
376 | } | ||
377 | |||
378 | void machine_shutdown(void) | ||
379 | { | ||
380 | machine_ops.shutdown(); | ||
381 | } | ||
382 | |||
383 | void machine_emergency_restart(void) | ||
384 | { | ||
385 | machine_ops.emergency_restart(); | ||
386 | } | ||
387 | |||
388 | void machine_restart(char *cmd) | ||
389 | { | ||
390 | machine_ops.restart(cmd); | ||
391 | } | ||
392 | |||
393 | void machine_halt(void) | ||
394 | { | ||
395 | machine_ops.halt(); | ||
396 | } | ||
diff --git a/arch/i386/kernel/reboot_fixups.c b/arch/i386/kernel/reboot_fixups.c index 99aab41a05b0..2d78d918340f 100644 --- a/arch/i386/kernel/reboot_fixups.c +++ b/arch/i386/kernel/reboot_fixups.c | |||
@@ -10,7 +10,7 @@ | |||
10 | 10 | ||
11 | #include <asm/delay.h> | 11 | #include <asm/delay.h> |
12 | #include <linux/pci.h> | 12 | #include <linux/pci.h> |
13 | #include <linux/reboot_fixups.h> | 13 | #include <asm/reboot_fixups.h> |
14 | 14 | ||
15 | static void cs5530a_warm_reset(struct pci_dev *dev) | 15 | static void cs5530a_warm_reset(struct pci_dev *dev) |
16 | { | 16 | { |
diff --git a/arch/i386/kernel/smp.c b/arch/i386/kernel/smp.c index 0e8977871b1f..89a45a9ddcd4 100644 --- a/arch/i386/kernel/smp.c +++ b/arch/i386/kernel/smp.c | |||
@@ -165,20 +165,20 @@ void fastcall send_IPI_self(int vector) | |||
165 | } | 165 | } |
166 | 166 | ||
167 | /* | 167 | /* |
168 | * This is only used on smaller machines. | 168 | * This is used to send an IPI with no shorthand notation (the destination is |
169 | * specified in bits 56 to 63 of the ICR). | ||
169 | */ | 170 | */ |
170 | void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) | 171 | static inline void __send_IPI_dest_field(unsigned long mask, int vector) |
171 | { | 172 | { |
172 | unsigned long mask = cpus_addr(cpumask)[0]; | ||
173 | unsigned long cfg; | 173 | unsigned long cfg; |
174 | unsigned long flags; | ||
175 | 174 | ||
176 | local_irq_save(flags); | ||
177 | WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); | ||
178 | /* | 175 | /* |
179 | * Wait for idle. | 176 | * Wait for idle. |
180 | */ | 177 | */ |
181 | apic_wait_icr_idle(); | 178 | if (unlikely(vector == NMI_VECTOR)) |
179 | safe_apic_wait_icr_idle(); | ||
180 | else | ||
181 | apic_wait_icr_idle(); | ||
182 | 182 | ||
183 | /* | 183 | /* |
184 | * prepare target chip field | 184 | * prepare target chip field |
@@ -195,13 +195,25 @@ void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) | |||
195 | * Send the IPI. The write to APIC_ICR fires this off. | 195 | * Send the IPI. The write to APIC_ICR fires this off. |
196 | */ | 196 | */ |
197 | apic_write_around(APIC_ICR, cfg); | 197 | apic_write_around(APIC_ICR, cfg); |
198 | } | ||
199 | |||
200 | /* | ||
201 | * This is only used on smaller machines. | ||
202 | */ | ||
203 | void send_IPI_mask_bitmask(cpumask_t cpumask, int vector) | ||
204 | { | ||
205 | unsigned long mask = cpus_addr(cpumask)[0]; | ||
206 | unsigned long flags; | ||
198 | 207 | ||
208 | local_irq_save(flags); | ||
209 | WARN_ON(mask & ~cpus_addr(cpu_online_map)[0]); | ||
210 | __send_IPI_dest_field(mask, vector); | ||
199 | local_irq_restore(flags); | 211 | local_irq_restore(flags); |
200 | } | 212 | } |
201 | 213 | ||
202 | void send_IPI_mask_sequence(cpumask_t mask, int vector) | 214 | void send_IPI_mask_sequence(cpumask_t mask, int vector) |
203 | { | 215 | { |
204 | unsigned long cfg, flags; | 216 | unsigned long flags; |
205 | unsigned int query_cpu; | 217 | unsigned int query_cpu; |
206 | 218 | ||
207 | /* | 219 | /* |
@@ -211,30 +223,10 @@ void send_IPI_mask_sequence(cpumask_t mask, int vector) | |||
211 | */ | 223 | */ |
212 | 224 | ||
213 | local_irq_save(flags); | 225 | local_irq_save(flags); |
214 | |||
215 | for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) { | 226 | for (query_cpu = 0; query_cpu < NR_CPUS; ++query_cpu) { |
216 | if (cpu_isset(query_cpu, mask)) { | 227 | if (cpu_isset(query_cpu, mask)) { |
217 | 228 | __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), | |
218 | /* | 229 | vector); |
219 | * Wait for idle. | ||
220 | */ | ||
221 | apic_wait_icr_idle(); | ||
222 | |||
223 | /* | ||
224 | * prepare target chip field | ||
225 | */ | ||
226 | cfg = __prepare_ICR2(cpu_to_logical_apicid(query_cpu)); | ||
227 | apic_write_around(APIC_ICR2, cfg); | ||
228 | |||
229 | /* | ||
230 | * program the ICR | ||
231 | */ | ||
232 | cfg = __prepare_ICR(0, vector); | ||
233 | |||
234 | /* | ||
235 | * Send the IPI. The write to APIC_ICR fires this off. | ||
236 | */ | ||
237 | apic_write_around(APIC_ICR, cfg); | ||
238 | } | 230 | } |
239 | } | 231 | } |
240 | local_irq_restore(flags); | 232 | local_irq_restore(flags); |
@@ -256,7 +248,6 @@ static cpumask_t flush_cpumask; | |||
256 | static struct mm_struct * flush_mm; | 248 | static struct mm_struct * flush_mm; |
257 | static unsigned long flush_va; | 249 | static unsigned long flush_va; |
258 | static DEFINE_SPINLOCK(tlbstate_lock); | 250 | static DEFINE_SPINLOCK(tlbstate_lock); |
259 | #define FLUSH_ALL 0xffffffff | ||
260 | 251 | ||
261 | /* | 252 | /* |
262 | * We cannot call mmdrop() because we are in interrupt context, | 253 | * We cannot call mmdrop() because we are in interrupt context, |
@@ -338,7 +329,7 @@ fastcall void smp_invalidate_interrupt(struct pt_regs *regs) | |||
338 | 329 | ||
339 | if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) { | 330 | if (flush_mm == per_cpu(cpu_tlbstate, cpu).active_mm) { |
340 | if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) { | 331 | if (per_cpu(cpu_tlbstate, cpu).state == TLBSTATE_OK) { |
341 | if (flush_va == FLUSH_ALL) | 332 | if (flush_va == TLB_FLUSH_ALL) |
342 | local_flush_tlb(); | 333 | local_flush_tlb(); |
343 | else | 334 | else |
344 | __flush_tlb_one(flush_va); | 335 | __flush_tlb_one(flush_va); |
@@ -353,9 +344,11 @@ out: | |||
353 | put_cpu_no_resched(); | 344 | put_cpu_no_resched(); |
354 | } | 345 | } |
355 | 346 | ||
356 | static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, | 347 | void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, |
357 | unsigned long va) | 348 | unsigned long va) |
358 | { | 349 | { |
350 | cpumask_t cpumask = *cpumaskp; | ||
351 | |||
359 | /* | 352 | /* |
360 | * A couple of (to be removed) sanity checks: | 353 | * A couple of (to be removed) sanity checks: |
361 | * | 354 | * |
@@ -366,10 +359,12 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, | |||
366 | BUG_ON(cpu_isset(smp_processor_id(), cpumask)); | 359 | BUG_ON(cpu_isset(smp_processor_id(), cpumask)); |
367 | BUG_ON(!mm); | 360 | BUG_ON(!mm); |
368 | 361 | ||
362 | #ifdef CONFIG_HOTPLUG_CPU | ||
369 | /* If a CPU which we ran on has gone down, OK. */ | 363 | /* If a CPU which we ran on has gone down, OK. */ |
370 | cpus_and(cpumask, cpumask, cpu_online_map); | 364 | cpus_and(cpumask, cpumask, cpu_online_map); |
371 | if (cpus_empty(cpumask)) | 365 | if (unlikely(cpus_empty(cpumask))) |
372 | return; | 366 | return; |
367 | #endif | ||
373 | 368 | ||
374 | /* | 369 | /* |
375 | * i'm not happy about this global shared spinlock in the | 370 | * i'm not happy about this global shared spinlock in the |
@@ -380,17 +375,7 @@ static void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, | |||
380 | 375 | ||
381 | flush_mm = mm; | 376 | flush_mm = mm; |
382 | flush_va = va; | 377 | flush_va = va; |
383 | #if NR_CPUS <= BITS_PER_LONG | 378 | cpus_or(flush_cpumask, cpumask, flush_cpumask); |
384 | atomic_set_mask(cpumask, &flush_cpumask); | ||
385 | #else | ||
386 | { | ||
387 | int k; | ||
388 | unsigned long *flush_mask = (unsigned long *)&flush_cpumask; | ||
389 | unsigned long *cpu_mask = (unsigned long *)&cpumask; | ||
390 | for (k = 0; k < BITS_TO_LONGS(NR_CPUS); ++k) | ||
391 | atomic_set_mask(cpu_mask[k], &flush_mask[k]); | ||
392 | } | ||
393 | #endif | ||
394 | /* | 379 | /* |
395 | * We have to send the IPI only to | 380 | * We have to send the IPI only to |
396 | * CPUs affected. | 381 | * CPUs affected. |
@@ -417,7 +402,7 @@ void flush_tlb_current_task(void) | |||
417 | 402 | ||
418 | local_flush_tlb(); | 403 | local_flush_tlb(); |
419 | if (!cpus_empty(cpu_mask)) | 404 | if (!cpus_empty(cpu_mask)) |
420 | flush_tlb_others(cpu_mask, mm, FLUSH_ALL); | 405 | flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); |
421 | preempt_enable(); | 406 | preempt_enable(); |
422 | } | 407 | } |
423 | 408 | ||
@@ -436,7 +421,7 @@ void flush_tlb_mm (struct mm_struct * mm) | |||
436 | leave_mm(smp_processor_id()); | 421 | leave_mm(smp_processor_id()); |
437 | } | 422 | } |
438 | if (!cpus_empty(cpu_mask)) | 423 | if (!cpus_empty(cpu_mask)) |
439 | flush_tlb_others(cpu_mask, mm, FLUSH_ALL); | 424 | flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); |
440 | 425 | ||
441 | preempt_enable(); | 426 | preempt_enable(); |
442 | } | 427 | } |
@@ -483,7 +468,7 @@ void flush_tlb_all(void) | |||
483 | * it goes straight through and wastes no time serializing | 468 | * it goes straight through and wastes no time serializing |
484 | * anything. Worst case is that we lose a reschedule ... | 469 | * anything. Worst case is that we lose a reschedule ... |
485 | */ | 470 | */ |
486 | void smp_send_reschedule(int cpu) | 471 | void native_smp_send_reschedule(int cpu) |
487 | { | 472 | { |
488 | WARN_ON(cpu_is_offline(cpu)); | 473 | WARN_ON(cpu_is_offline(cpu)); |
489 | send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); | 474 | send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); |
@@ -515,36 +500,78 @@ void unlock_ipi_call_lock(void) | |||
515 | 500 | ||
516 | static struct call_data_struct *call_data; | 501 | static struct call_data_struct *call_data; |
517 | 502 | ||
503 | static void __smp_call_function(void (*func) (void *info), void *info, | ||
504 | int nonatomic, int wait) | ||
505 | { | ||
506 | struct call_data_struct data; | ||
507 | int cpus = num_online_cpus() - 1; | ||
508 | |||
509 | if (!cpus) | ||
510 | return; | ||
511 | |||
512 | data.func = func; | ||
513 | data.info = info; | ||
514 | atomic_set(&data.started, 0); | ||
515 | data.wait = wait; | ||
516 | if (wait) | ||
517 | atomic_set(&data.finished, 0); | ||
518 | |||
519 | call_data = &data; | ||
520 | mb(); | ||
521 | |||
522 | /* Send a message to all other CPUs and wait for them to respond */ | ||
523 | send_IPI_allbutself(CALL_FUNCTION_VECTOR); | ||
524 | |||
525 | /* Wait for response */ | ||
526 | while (atomic_read(&data.started) != cpus) | ||
527 | cpu_relax(); | ||
528 | |||
529 | if (wait) | ||
530 | while (atomic_read(&data.finished) != cpus) | ||
531 | cpu_relax(); | ||
532 | } | ||
533 | |||
534 | |||
518 | /** | 535 | /** |
519 | * smp_call_function(): Run a function on all other CPUs. | 536 | * smp_call_function_mask(): Run a function on a set of other CPUs. |
537 | * @mask: The set of cpus to run on. Must not include the current cpu. | ||
520 | * @func: The function to run. This must be fast and non-blocking. | 538 | * @func: The function to run. This must be fast and non-blocking. |
521 | * @info: An arbitrary pointer to pass to the function. | 539 | * @info: An arbitrary pointer to pass to the function. |
522 | * @nonatomic: currently unused. | ||
523 | * @wait: If true, wait (atomically) until function has completed on other CPUs. | 540 | * @wait: If true, wait (atomically) until function has completed on other CPUs. |
524 | * | 541 | * |
525 | * Returns 0 on success, else a negative status code. Does not return until | 542 | * Returns 0 on success, else a negative status code. |
526 | * remote CPUs are nearly ready to execute <<func>> or are or have executed. | 543 | * |
544 | * If @wait is true, then returns once @func has returned; otherwise | ||
545 | * it returns just before the target cpu calls @func. | ||
527 | * | 546 | * |
528 | * You must not call this function with disabled interrupts or from a | 547 | * You must not call this function with disabled interrupts or from a |
529 | * hardware interrupt handler or from a bottom half handler. | 548 | * hardware interrupt handler or from a bottom half handler. |
530 | */ | 549 | */ |
531 | int smp_call_function (void (*func) (void *info), void *info, int nonatomic, | 550 | int native_smp_call_function_mask(cpumask_t mask, |
532 | int wait) | 551 | void (*func)(void *), void *info, |
552 | int wait) | ||
533 | { | 553 | { |
534 | struct call_data_struct data; | 554 | struct call_data_struct data; |
555 | cpumask_t allbutself; | ||
535 | int cpus; | 556 | int cpus; |
536 | 557 | ||
558 | /* Can deadlock when called with interrupts disabled */ | ||
559 | WARN_ON(irqs_disabled()); | ||
560 | |||
537 | /* Holding any lock stops cpus from going down. */ | 561 | /* Holding any lock stops cpus from going down. */ |
538 | spin_lock(&call_lock); | 562 | spin_lock(&call_lock); |
539 | cpus = num_online_cpus() - 1; | 563 | |
564 | allbutself = cpu_online_map; | ||
565 | cpu_clear(smp_processor_id(), allbutself); | ||
566 | |||
567 | cpus_and(mask, mask, allbutself); | ||
568 | cpus = cpus_weight(mask); | ||
569 | |||
540 | if (!cpus) { | 570 | if (!cpus) { |
541 | spin_unlock(&call_lock); | 571 | spin_unlock(&call_lock); |
542 | return 0; | 572 | return 0; |
543 | } | 573 | } |
544 | 574 | ||
545 | /* Can deadlock when called with interrupts disabled */ | ||
546 | WARN_ON(irqs_disabled()); | ||
547 | |||
548 | data.func = func; | 575 | data.func = func; |
549 | data.info = info; | 576 | data.info = info; |
550 | atomic_set(&data.started, 0); | 577 | atomic_set(&data.started, 0); |
@@ -554,9 +581,12 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, | |||
554 | 581 | ||
555 | call_data = &data; | 582 | call_data = &data; |
556 | mb(); | 583 | mb(); |
557 | 584 | ||
558 | /* Send a message to all other CPUs and wait for them to respond */ | 585 | /* Send a message to other CPUs */ |
559 | send_IPI_allbutself(CALL_FUNCTION_VECTOR); | 586 | if (cpus_equal(mask, allbutself)) |
587 | send_IPI_allbutself(CALL_FUNCTION_VECTOR); | ||
588 | else | ||
589 | send_IPI_mask(mask, CALL_FUNCTION_VECTOR); | ||
560 | 590 | ||
561 | /* Wait for response */ | 591 | /* Wait for response */ |
562 | while (atomic_read(&data.started) != cpus) | 592 | while (atomic_read(&data.started) != cpus) |
@@ -569,15 +599,68 @@ int smp_call_function (void (*func) (void *info), void *info, int nonatomic, | |||
569 | 599 | ||
570 | return 0; | 600 | return 0; |
571 | } | 601 | } |
602 | |||
603 | /** | ||
604 | * smp_call_function(): Run a function on all other CPUs. | ||
605 | * @func: The function to run. This must be fast and non-blocking. | ||
606 | * @info: An arbitrary pointer to pass to the function. | ||
607 | * @nonatomic: Unused. | ||
608 | * @wait: If true, wait (atomically) until function has completed on other CPUs. | ||
609 | * | ||
610 | * Returns 0 on success, else a negative status code. | ||
611 | * | ||
612 | * If @wait is true, then returns once @func has returned; otherwise | ||
613 | * it returns just before the target cpu calls @func. | ||
614 | * | ||
615 | * You must not call this function with disabled interrupts or from a | ||
616 | * hardware interrupt handler or from a bottom half handler. | ||
617 | */ | ||
618 | int smp_call_function(void (*func) (void *info), void *info, int nonatomic, | ||
619 | int wait) | ||
620 | { | ||
621 | return smp_call_function_mask(cpu_online_map, func, info, wait); | ||
622 | } | ||
572 | EXPORT_SYMBOL(smp_call_function); | 623 | EXPORT_SYMBOL(smp_call_function); |
573 | 624 | ||
625 | /** | ||
626 | * smp_call_function_single - Run a function on another CPU | ||
627 | * @cpu: The target CPU. Cannot be the calling CPU. | ||
628 | * @func: The function to run. This must be fast and non-blocking. | ||
629 | * @info: An arbitrary pointer to pass to the function. | ||
630 | * @nonatomic: Unused. | ||
631 | * @wait: If true, wait until function has completed on other CPUs. | ||
632 | * | ||
633 | * Returns 0 on success, else a negative status code. | ||
634 | * | ||
635 | * If @wait is true, then returns once @func has returned; otherwise | ||
636 | * it returns just before the target cpu calls @func. | ||
637 | */ | ||
638 | int smp_call_function_single(int cpu, void (*func) (void *info), void *info, | ||
639 | int nonatomic, int wait) | ||
640 | { | ||
641 | /* prevent preemption and reschedule on another processor */ | ||
642 | int ret; | ||
643 | int me = get_cpu(); | ||
644 | if (cpu == me) { | ||
645 | WARN_ON(1); | ||
646 | put_cpu(); | ||
647 | return -EBUSY; | ||
648 | } | ||
649 | |||
650 | ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait); | ||
651 | |||
652 | put_cpu(); | ||
653 | return ret; | ||
654 | } | ||
655 | EXPORT_SYMBOL(smp_call_function_single); | ||
656 | |||
574 | static void stop_this_cpu (void * dummy) | 657 | static void stop_this_cpu (void * dummy) |
575 | { | 658 | { |
659 | local_irq_disable(); | ||
576 | /* | 660 | /* |
577 | * Remove this CPU: | 661 | * Remove this CPU: |
578 | */ | 662 | */ |
579 | cpu_clear(smp_processor_id(), cpu_online_map); | 663 | cpu_clear(smp_processor_id(), cpu_online_map); |
580 | local_irq_disable(); | ||
581 | disable_local_APIC(); | 664 | disable_local_APIC(); |
582 | if (cpu_data[smp_processor_id()].hlt_works_ok) | 665 | if (cpu_data[smp_processor_id()].hlt_works_ok) |
583 | for(;;) halt(); | 666 | for(;;) halt(); |
@@ -588,13 +671,18 @@ static void stop_this_cpu (void * dummy) | |||
588 | * this function calls the 'stop' function on all other CPUs in the system. | 671 | * this function calls the 'stop' function on all other CPUs in the system. |
589 | */ | 672 | */ |
590 | 673 | ||
591 | void smp_send_stop(void) | 674 | void native_smp_send_stop(void) |
592 | { | 675 | { |
593 | smp_call_function(stop_this_cpu, NULL, 1, 0); | 676 | /* Don't deadlock on the call lock in panic */ |
677 | int nolock = !spin_trylock(&call_lock); | ||
678 | unsigned long flags; | ||
594 | 679 | ||
595 | local_irq_disable(); | 680 | local_irq_save(flags); |
681 | __smp_call_function(stop_this_cpu, NULL, 0, 0); | ||
682 | if (!nolock) | ||
683 | spin_unlock(&call_lock); | ||
596 | disable_local_APIC(); | 684 | disable_local_APIC(); |
597 | local_irq_enable(); | 685 | local_irq_restore(flags); |
598 | } | 686 | } |
599 | 687 | ||
600 | /* | 688 | /* |
@@ -633,77 +721,6 @@ fastcall void smp_call_function_interrupt(struct pt_regs *regs) | |||
633 | } | 721 | } |
634 | } | 722 | } |
635 | 723 | ||
636 | /* | ||
637 | * this function sends a 'generic call function' IPI to one other CPU | ||
638 | * in the system. | ||
639 | * | ||
640 | * cpu is a standard Linux logical CPU number. | ||
641 | */ | ||
642 | static void | ||
643 | __smp_call_function_single(int cpu, void (*func) (void *info), void *info, | ||
644 | int nonatomic, int wait) | ||
645 | { | ||
646 | struct call_data_struct data; | ||
647 | int cpus = 1; | ||
648 | |||
649 | data.func = func; | ||
650 | data.info = info; | ||
651 | atomic_set(&data.started, 0); | ||
652 | data.wait = wait; | ||
653 | if (wait) | ||
654 | atomic_set(&data.finished, 0); | ||
655 | |||
656 | call_data = &data; | ||
657 | wmb(); | ||
658 | /* Send a message to all other CPUs and wait for them to respond */ | ||
659 | send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_VECTOR); | ||
660 | |||
661 | /* Wait for response */ | ||
662 | while (atomic_read(&data.started) != cpus) | ||
663 | cpu_relax(); | ||
664 | |||
665 | if (!wait) | ||
666 | return; | ||
667 | |||
668 | while (atomic_read(&data.finished) != cpus) | ||
669 | cpu_relax(); | ||
670 | } | ||
671 | |||
672 | /* | ||
673 | * smp_call_function_single - Run a function on another CPU | ||
674 | * @func: The function to run. This must be fast and non-blocking. | ||
675 | * @info: An arbitrary pointer to pass to the function. | ||
676 | * @nonatomic: Currently unused. | ||
677 | * @wait: If true, wait until function has completed on other CPUs. | ||
678 | * | ||
679 | * Retrurns 0 on success, else a negative status code. | ||
680 | * | ||
681 | * Does not return until the remote CPU is nearly ready to execute <func> | ||
682 | * or is or has executed. | ||
683 | */ | ||
684 | |||
685 | int smp_call_function_single(int cpu, void (*func) (void *info), void *info, | ||
686 | int nonatomic, int wait) | ||
687 | { | ||
688 | /* prevent preemption and reschedule on another processor */ | ||
689 | int me = get_cpu(); | ||
690 | if (cpu == me) { | ||
691 | WARN_ON(1); | ||
692 | put_cpu(); | ||
693 | return -EBUSY; | ||
694 | } | ||
695 | |||
696 | /* Can deadlock when called with interrupts disabled */ | ||
697 | WARN_ON(irqs_disabled()); | ||
698 | |||
699 | spin_lock_bh(&call_lock); | ||
700 | __smp_call_function_single(cpu, func, info, nonatomic, wait); | ||
701 | spin_unlock_bh(&call_lock); | ||
702 | put_cpu(); | ||
703 | return 0; | ||
704 | } | ||
705 | EXPORT_SYMBOL(smp_call_function_single); | ||
706 | |||
707 | static int convert_apicid_to_cpu(int apic_id) | 724 | static int convert_apicid_to_cpu(int apic_id) |
708 | { | 725 | { |
709 | int i; | 726 | int i; |
@@ -730,3 +747,14 @@ int safe_smp_processor_id(void) | |||
730 | 747 | ||
731 | return cpuid >= 0 ? cpuid : 0; | 748 | return cpuid >= 0 ? cpuid : 0; |
732 | } | 749 | } |
750 | |||
751 | struct smp_ops smp_ops = { | ||
752 | .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu, | ||
753 | .smp_prepare_cpus = native_smp_prepare_cpus, | ||
754 | .cpu_up = native_cpu_up, | ||
755 | .smp_cpus_done = native_smp_cpus_done, | ||
756 | |||
757 | .smp_send_stop = native_smp_send_stop, | ||
758 | .smp_send_reschedule = native_smp_send_reschedule, | ||
759 | .smp_call_function_mask = native_smp_call_function_mask, | ||
760 | }; | ||
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 4ff55e675576..a4b7ad283f49 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c | |||
@@ -53,13 +53,12 @@ | |||
53 | #include <asm/desc.h> | 53 | #include <asm/desc.h> |
54 | #include <asm/arch_hooks.h> | 54 | #include <asm/arch_hooks.h> |
55 | #include <asm/nmi.h> | 55 | #include <asm/nmi.h> |
56 | #include <asm/pda.h> | ||
57 | #include <asm/genapic.h> | ||
58 | 56 | ||
59 | #include <mach_apic.h> | 57 | #include <mach_apic.h> |
60 | #include <mach_wakecpu.h> | 58 | #include <mach_wakecpu.h> |
61 | #include <smpboot_hooks.h> | 59 | #include <smpboot_hooks.h> |
62 | #include <asm/vmi.h> | 60 | #include <asm/vmi.h> |
61 | #include <asm/mtrr.h> | ||
63 | 62 | ||
64 | /* Set if we find a B stepping CPU */ | 63 | /* Set if we find a B stepping CPU */ |
65 | static int __devinitdata smp_b_stepping; | 64 | static int __devinitdata smp_b_stepping; |
@@ -100,6 +99,9 @@ EXPORT_SYMBOL(x86_cpu_to_apicid); | |||
100 | 99 | ||
101 | u8 apicid_2_node[MAX_APICID]; | 100 | u8 apicid_2_node[MAX_APICID]; |
102 | 101 | ||
102 | DEFINE_PER_CPU(unsigned long, this_cpu_off); | ||
103 | EXPORT_PER_CPU_SYMBOL(this_cpu_off); | ||
104 | |||
103 | /* | 105 | /* |
104 | * Trampoline 80x86 program as an array. | 106 | * Trampoline 80x86 program as an array. |
105 | */ | 107 | */ |
@@ -156,7 +158,7 @@ static void __cpuinit smp_store_cpu_info(int id) | |||
156 | 158 | ||
157 | *c = boot_cpu_data; | 159 | *c = boot_cpu_data; |
158 | if (id!=0) | 160 | if (id!=0) |
159 | identify_cpu(c); | 161 | identify_secondary_cpu(c); |
160 | /* | 162 | /* |
161 | * Mask B, Pentium, but not Pentium MMX | 163 | * Mask B, Pentium, but not Pentium MMX |
162 | */ | 164 | */ |
@@ -379,14 +381,14 @@ set_cpu_sibling_map(int cpu) | |||
379 | static void __cpuinit start_secondary(void *unused) | 381 | static void __cpuinit start_secondary(void *unused) |
380 | { | 382 | { |
381 | /* | 383 | /* |
382 | * Don't put *anything* before secondary_cpu_init(), SMP | 384 | * Don't put *anything* before cpu_init(), SMP booting is too |
383 | * booting is too fragile that we want to limit the | 385 | * fragile that we want to limit the things done here to the |
384 | * things done here to the most necessary things. | 386 | * most necessary things. |
385 | */ | 387 | */ |
386 | #ifdef CONFIG_VMI | 388 | #ifdef CONFIG_VMI |
387 | vmi_bringup(); | 389 | vmi_bringup(); |
388 | #endif | 390 | #endif |
389 | secondary_cpu_init(); | 391 | cpu_init(); |
390 | preempt_disable(); | 392 | preempt_disable(); |
391 | smp_callin(); | 393 | smp_callin(); |
392 | while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) | 394 | while (!cpu_isset(smp_processor_id(), smp_commenced_mask)) |
@@ -441,12 +443,6 @@ static void __cpuinit start_secondary(void *unused) | |||
441 | void __devinit initialize_secondary(void) | 443 | void __devinit initialize_secondary(void) |
442 | { | 444 | { |
443 | /* | 445 | /* |
444 | * switch to the per CPU GDT we already set up | ||
445 | * in do_boot_cpu() | ||
446 | */ | ||
447 | cpu_set_gdt(current_thread_info()->cpu); | ||
448 | |||
449 | /* | ||
450 | * We don't actually need to load the full TSS, | 446 | * We don't actually need to load the full TSS, |
451 | * basically just the stack pointer and the eip. | 447 | * basically just the stack pointer and the eip. |
452 | */ | 448 | */ |
@@ -463,7 +459,6 @@ extern struct { | |||
463 | void * esp; | 459 | void * esp; |
464 | unsigned short ss; | 460 | unsigned short ss; |
465 | } stack_start; | 461 | } stack_start; |
466 | extern struct i386_pda *start_pda; | ||
467 | 462 | ||
468 | #ifdef CONFIG_NUMA | 463 | #ifdef CONFIG_NUMA |
469 | 464 | ||
@@ -521,12 +516,12 @@ static void unmap_cpu_to_logical_apicid(int cpu) | |||
521 | unmap_cpu_to_node(cpu); | 516 | unmap_cpu_to_node(cpu); |
522 | } | 517 | } |
523 | 518 | ||
524 | #if APIC_DEBUG | ||
525 | static inline void __inquire_remote_apic(int apicid) | 519 | static inline void __inquire_remote_apic(int apicid) |
526 | { | 520 | { |
527 | int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; | 521 | int i, regs[] = { APIC_ID >> 4, APIC_LVR >> 4, APIC_SPIV >> 4 }; |
528 | char *names[] = { "ID", "VERSION", "SPIV" }; | 522 | char *names[] = { "ID", "VERSION", "SPIV" }; |
529 | int timeout, status; | 523 | int timeout; |
524 | unsigned long status; | ||
530 | 525 | ||
531 | printk("Inquiring remote APIC #%d...\n", apicid); | 526 | printk("Inquiring remote APIC #%d...\n", apicid); |
532 | 527 | ||
@@ -536,7 +531,9 @@ static inline void __inquire_remote_apic(int apicid) | |||
536 | /* | 531 | /* |
537 | * Wait for idle. | 532 | * Wait for idle. |
538 | */ | 533 | */ |
539 | apic_wait_icr_idle(); | 534 | status = safe_apic_wait_icr_idle(); |
535 | if (status) | ||
536 | printk("a previous APIC delivery may have failed\n"); | ||
540 | 537 | ||
541 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); | 538 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); |
542 | apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); | 539 | apic_write_around(APIC_ICR, APIC_DM_REMRD | regs[i]); |
@@ -550,14 +547,13 @@ static inline void __inquire_remote_apic(int apicid) | |||
550 | switch (status) { | 547 | switch (status) { |
551 | case APIC_ICR_RR_VALID: | 548 | case APIC_ICR_RR_VALID: |
552 | status = apic_read(APIC_RRR); | 549 | status = apic_read(APIC_RRR); |
553 | printk("%08x\n", status); | 550 | printk("%lx\n", status); |
554 | break; | 551 | break; |
555 | default: | 552 | default: |
556 | printk("failed\n"); | 553 | printk("failed\n"); |
557 | } | 554 | } |
558 | } | 555 | } |
559 | } | 556 | } |
560 | #endif | ||
561 | 557 | ||
562 | #ifdef WAKE_SECONDARY_VIA_NMI | 558 | #ifdef WAKE_SECONDARY_VIA_NMI |
563 | /* | 559 | /* |
@@ -568,8 +564,8 @@ static inline void __inquire_remote_apic(int apicid) | |||
568 | static int __devinit | 564 | static int __devinit |
569 | wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) | 565 | wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) |
570 | { | 566 | { |
571 | unsigned long send_status = 0, accept_status = 0; | 567 | unsigned long send_status, accept_status = 0; |
572 | int timeout, maxlvt; | 568 | int maxlvt; |
573 | 569 | ||
574 | /* Target chip */ | 570 | /* Target chip */ |
575 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); | 571 | apic_write_around(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid)); |
@@ -579,12 +575,7 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) | |||
579 | apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); | 575 | apic_write_around(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); |
580 | 576 | ||
581 | Dprintk("Waiting for send to finish...\n"); | 577 | Dprintk("Waiting for send to finish...\n"); |
582 | timeout = 0; | 578 | send_status = safe_apic_wait_icr_idle(); |
583 | do { | ||
584 | Dprintk("+"); | ||
585 | udelay(100); | ||
586 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; | ||
587 | } while (send_status && (timeout++ < 1000)); | ||
588 | 579 | ||
589 | /* | 580 | /* |
590 | * Give the other CPU some time to accept the IPI. | 581 | * Give the other CPU some time to accept the IPI. |
@@ -614,8 +605,8 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip) | |||
614 | static int __devinit | 605 | static int __devinit |
615 | wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | 606 | wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) |
616 | { | 607 | { |
617 | unsigned long send_status = 0, accept_status = 0; | 608 | unsigned long send_status, accept_status = 0; |
618 | int maxlvt, timeout, num_starts, j; | 609 | int maxlvt, num_starts, j; |
619 | 610 | ||
620 | /* | 611 | /* |
621 | * Be paranoid about clearing APIC errors. | 612 | * Be paranoid about clearing APIC errors. |
@@ -640,12 +631,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | |||
640 | | APIC_DM_INIT); | 631 | | APIC_DM_INIT); |
641 | 632 | ||
642 | Dprintk("Waiting for send to finish...\n"); | 633 | Dprintk("Waiting for send to finish...\n"); |
643 | timeout = 0; | 634 | send_status = safe_apic_wait_icr_idle(); |
644 | do { | ||
645 | Dprintk("+"); | ||
646 | udelay(100); | ||
647 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; | ||
648 | } while (send_status && (timeout++ < 1000)); | ||
649 | 635 | ||
650 | mdelay(10); | 636 | mdelay(10); |
651 | 637 | ||
@@ -658,12 +644,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | |||
658 | apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); | 644 | apic_write_around(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); |
659 | 645 | ||
660 | Dprintk("Waiting for send to finish...\n"); | 646 | Dprintk("Waiting for send to finish...\n"); |
661 | timeout = 0; | 647 | send_status = safe_apic_wait_icr_idle(); |
662 | do { | ||
663 | Dprintk("+"); | ||
664 | udelay(100); | ||
665 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; | ||
666 | } while (send_status && (timeout++ < 1000)); | ||
667 | 648 | ||
668 | atomic_set(&init_deasserted, 1); | 649 | atomic_set(&init_deasserted, 1); |
669 | 650 | ||
@@ -719,12 +700,7 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) | |||
719 | Dprintk("Startup point 1.\n"); | 700 | Dprintk("Startup point 1.\n"); |
720 | 701 | ||
721 | Dprintk("Waiting for send to finish...\n"); | 702 | Dprintk("Waiting for send to finish...\n"); |
722 | timeout = 0; | 703 | send_status = safe_apic_wait_icr_idle(); |
723 | do { | ||
724 | Dprintk("+"); | ||
725 | udelay(100); | ||
726 | send_status = apic_read(APIC_ICR) & APIC_ICR_BUSY; | ||
727 | } while (send_status && (timeout++ < 1000)); | ||
728 | 704 | ||
729 | /* | 705 | /* |
730 | * Give the other CPU some time to accept the IPI. | 706 | * Give the other CPU some time to accept the IPI. |
@@ -788,6 +764,25 @@ static inline struct task_struct * alloc_idle_task(int cpu) | |||
788 | #define alloc_idle_task(cpu) fork_idle(cpu) | 764 | #define alloc_idle_task(cpu) fork_idle(cpu) |
789 | #endif | 765 | #endif |
790 | 766 | ||
767 | /* Initialize the CPU's GDT. This is either the boot CPU doing itself | ||
768 | (still using the master per-cpu area), or a CPU doing it for a | ||
769 | secondary which will soon come up. */ | ||
770 | static __cpuinit void init_gdt(int cpu) | ||
771 | { | ||
772 | struct desc_struct *gdt = get_cpu_gdt_table(cpu); | ||
773 | |||
774 | pack_descriptor((u32 *)&gdt[GDT_ENTRY_PERCPU].a, | ||
775 | (u32 *)&gdt[GDT_ENTRY_PERCPU].b, | ||
776 | __per_cpu_offset[cpu], 0xFFFFF, | ||
777 | 0x80 | DESCTYPE_S | 0x2, 0x8); | ||
778 | |||
779 | per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu]; | ||
780 | per_cpu(cpu_number, cpu) = cpu; | ||
781 | } | ||
782 | |||
783 | /* Defined in head.S */ | ||
784 | extern struct Xgt_desc_struct early_gdt_descr; | ||
785 | |||
791 | static int __cpuinit do_boot_cpu(int apicid, int cpu) | 786 | static int __cpuinit do_boot_cpu(int apicid, int cpu) |
792 | /* | 787 | /* |
793 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad | 788 | * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad |
@@ -802,6 +797,12 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) | |||
802 | unsigned short nmi_high = 0, nmi_low = 0; | 797 | unsigned short nmi_high = 0, nmi_low = 0; |
803 | 798 | ||
804 | /* | 799 | /* |
800 | * Save current MTRR state in case it was changed since early boot | ||
801 | * (e.g. by the ACPI SMI) to initialize new CPUs with MTRRs in sync: | ||
802 | */ | ||
803 | mtrr_save_state(); | ||
804 | |||
805 | /* | ||
805 | * We can't use kernel_thread since we must avoid to | 806 | * We can't use kernel_thread since we must avoid to |
806 | * reschedule the child. | 807 | * reschedule the child. |
807 | */ | 808 | */ |
@@ -809,13 +810,9 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) | |||
809 | if (IS_ERR(idle)) | 810 | if (IS_ERR(idle)) |
810 | panic("failed fork for CPU %d", cpu); | 811 | panic("failed fork for CPU %d", cpu); |
811 | 812 | ||
812 | /* Pre-allocate and initialize the CPU's GDT and PDA so it | 813 | init_gdt(cpu); |
813 | doesn't have to do any memory allocation during the | 814 | per_cpu(current_task, cpu) = idle; |
814 | delicate CPU-bringup phase. */ | 815 | early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); |
815 | if (!init_gdt(cpu, idle)) { | ||
816 | printk(KERN_INFO "Couldn't allocate GDT/PDA for CPU %d\n", cpu); | ||
817 | return -1; /* ? */ | ||
818 | } | ||
819 | 816 | ||
820 | idle->thread.eip = (unsigned long) start_secondary; | 817 | idle->thread.eip = (unsigned long) start_secondary; |
821 | /* start_eip had better be page-aligned! */ | 818 | /* start_eip had better be page-aligned! */ |
@@ -941,7 +938,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu) | |||
941 | DECLARE_COMPLETION_ONSTACK(done); | 938 | DECLARE_COMPLETION_ONSTACK(done); |
942 | struct warm_boot_cpu_info info; | 939 | struct warm_boot_cpu_info info; |
943 | int apicid, ret; | 940 | int apicid, ret; |
944 | struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | ||
945 | 941 | ||
946 | apicid = x86_cpu_to_apicid[cpu]; | 942 | apicid = x86_cpu_to_apicid[cpu]; |
947 | if (apicid == BAD_APICID) { | 943 | if (apicid == BAD_APICID) { |
@@ -949,18 +945,6 @@ static int __cpuinit __smp_prepare_cpu(int cpu) | |||
949 | goto exit; | 945 | goto exit; |
950 | } | 946 | } |
951 | 947 | ||
952 | /* | ||
953 | * the CPU isn't initialized at boot time, allocate gdt table here. | ||
954 | * cpu_init will initialize it | ||
955 | */ | ||
956 | if (!cpu_gdt_descr->address) { | ||
957 | cpu_gdt_descr->address = get_zeroed_page(GFP_KERNEL); | ||
958 | if (!cpu_gdt_descr->address) | ||
959 | printk(KERN_CRIT "CPU%d failed to allocate GDT\n", cpu); | ||
960 | ret = -ENOMEM; | ||
961 | goto exit; | ||
962 | } | ||
963 | |||
964 | info.complete = &done; | 948 | info.complete = &done; |
965 | info.apicid = apicid; | 949 | info.apicid = apicid; |
966 | info.cpu = cpu; | 950 | info.cpu = cpu; |
@@ -1173,7 +1157,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) | |||
1173 | 1157 | ||
1174 | /* These are wrappers to interface to the new boot process. Someone | 1158 | /* These are wrappers to interface to the new boot process. Someone |
1175 | who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ | 1159 | who understands all this stuff should rewrite it properly. --RR 15/Jul/02 */ |
1176 | void __init smp_prepare_cpus(unsigned int max_cpus) | 1160 | void __init native_smp_prepare_cpus(unsigned int max_cpus) |
1177 | { | 1161 | { |
1178 | smp_commenced_mask = cpumask_of_cpu(0); | 1162 | smp_commenced_mask = cpumask_of_cpu(0); |
1179 | cpu_callin_map = cpumask_of_cpu(0); | 1163 | cpu_callin_map = cpumask_of_cpu(0); |
@@ -1181,13 +1165,18 @@ void __init smp_prepare_cpus(unsigned int max_cpus) | |||
1181 | smp_boot_cpus(max_cpus); | 1165 | smp_boot_cpus(max_cpus); |
1182 | } | 1166 | } |
1183 | 1167 | ||
1184 | void __devinit smp_prepare_boot_cpu(void) | 1168 | void __init native_smp_prepare_boot_cpu(void) |
1185 | { | 1169 | { |
1186 | cpu_set(smp_processor_id(), cpu_online_map); | 1170 | unsigned int cpu = smp_processor_id(); |
1187 | cpu_set(smp_processor_id(), cpu_callout_map); | 1171 | |
1188 | cpu_set(smp_processor_id(), cpu_present_map); | 1172 | init_gdt(cpu); |
1189 | cpu_set(smp_processor_id(), cpu_possible_map); | 1173 | switch_to_new_gdt(); |
1190 | per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; | 1174 | |
1175 | cpu_set(cpu, cpu_online_map); | ||
1176 | cpu_set(cpu, cpu_callout_map); | ||
1177 | cpu_set(cpu, cpu_present_map); | ||
1178 | cpu_set(cpu, cpu_possible_map); | ||
1179 | __get_cpu_var(cpu_state) = CPU_ONLINE; | ||
1191 | } | 1180 | } |
1192 | 1181 | ||
1193 | #ifdef CONFIG_HOTPLUG_CPU | 1182 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -1277,7 +1266,7 @@ void __cpu_die(unsigned int cpu) | |||
1277 | } | 1266 | } |
1278 | #endif /* CONFIG_HOTPLUG_CPU */ | 1267 | #endif /* CONFIG_HOTPLUG_CPU */ |
1279 | 1268 | ||
1280 | int __cpuinit __cpu_up(unsigned int cpu) | 1269 | int __cpuinit native_cpu_up(unsigned int cpu) |
1281 | { | 1270 | { |
1282 | unsigned long flags; | 1271 | unsigned long flags; |
1283 | #ifdef CONFIG_HOTPLUG_CPU | 1272 | #ifdef CONFIG_HOTPLUG_CPU |
@@ -1319,15 +1308,10 @@ int __cpuinit __cpu_up(unsigned int cpu) | |||
1319 | touch_nmi_watchdog(); | 1308 | touch_nmi_watchdog(); |
1320 | } | 1309 | } |
1321 | 1310 | ||
1322 | #ifdef CONFIG_X86_GENERICARCH | ||
1323 | if (num_online_cpus() > 8 && genapic == &apic_default) | ||
1324 | panic("Default flat APIC routing can't be used with > 8 cpus\n"); | ||
1325 | #endif | ||
1326 | |||
1327 | return 0; | 1311 | return 0; |
1328 | } | 1312 | } |
1329 | 1313 | ||
1330 | void __init smp_cpus_done(unsigned int max_cpus) | 1314 | void __init native_smp_cpus_done(unsigned int max_cpus) |
1331 | { | 1315 | { |
1332 | #ifdef CONFIG_X86_IO_APIC | 1316 | #ifdef CONFIG_X86_IO_APIC |
1333 | setup_ioapic_dest(); | 1317 | setup_ioapic_dest(); |
diff --git a/arch/i386/kernel/sysenter.c b/arch/i386/kernel/sysenter.c index 13ca54a85a1c..ff4ee6f3326b 100644 --- a/arch/i386/kernel/sysenter.c +++ b/arch/i386/kernel/sysenter.c | |||
@@ -22,16 +22,26 @@ | |||
22 | #include <asm/msr.h> | 22 | #include <asm/msr.h> |
23 | #include <asm/pgtable.h> | 23 | #include <asm/pgtable.h> |
24 | #include <asm/unistd.h> | 24 | #include <asm/unistd.h> |
25 | #include <asm/elf.h> | ||
26 | #include <asm/tlbflush.h> | ||
27 | |||
28 | enum { | ||
29 | VDSO_DISABLED = 0, | ||
30 | VDSO_ENABLED = 1, | ||
31 | VDSO_COMPAT = 2, | ||
32 | }; | ||
33 | |||
34 | #ifdef CONFIG_COMPAT_VDSO | ||
35 | #define VDSO_DEFAULT VDSO_COMPAT | ||
36 | #else | ||
37 | #define VDSO_DEFAULT VDSO_ENABLED | ||
38 | #endif | ||
25 | 39 | ||
26 | /* | 40 | /* |
27 | * Should the kernel map a VDSO page into processes and pass its | 41 | * Should the kernel map a VDSO page into processes and pass its |
28 | * address down to glibc upon exec()? | 42 | * address down to glibc upon exec()? |
29 | */ | 43 | */ |
30 | #ifdef CONFIG_PARAVIRT | 44 | unsigned int __read_mostly vdso_enabled = VDSO_DEFAULT; |
31 | unsigned int __read_mostly vdso_enabled = 0; | ||
32 | #else | ||
33 | unsigned int __read_mostly vdso_enabled = 1; | ||
34 | #endif | ||
35 | 45 | ||
36 | EXPORT_SYMBOL_GPL(vdso_enabled); | 46 | EXPORT_SYMBOL_GPL(vdso_enabled); |
37 | 47 | ||
@@ -46,6 +56,123 @@ __setup("vdso=", vdso_setup); | |||
46 | 56 | ||
47 | extern asmlinkage void sysenter_entry(void); | 57 | extern asmlinkage void sysenter_entry(void); |
48 | 58 | ||
59 | static __init void reloc_symtab(Elf32_Ehdr *ehdr, | ||
60 | unsigned offset, unsigned size) | ||
61 | { | ||
62 | Elf32_Sym *sym = (void *)ehdr + offset; | ||
63 | unsigned nsym = size / sizeof(*sym); | ||
64 | unsigned i; | ||
65 | |||
66 | for(i = 0; i < nsym; i++, sym++) { | ||
67 | if (sym->st_shndx == SHN_UNDEF || | ||
68 | sym->st_shndx == SHN_ABS) | ||
69 | continue; /* skip */ | ||
70 | |||
71 | if (sym->st_shndx > SHN_LORESERVE) { | ||
72 | printk(KERN_INFO "VDSO: unexpected st_shndx %x\n", | ||
73 | sym->st_shndx); | ||
74 | continue; | ||
75 | } | ||
76 | |||
77 | switch(ELF_ST_TYPE(sym->st_info)) { | ||
78 | case STT_OBJECT: | ||
79 | case STT_FUNC: | ||
80 | case STT_SECTION: | ||
81 | case STT_FILE: | ||
82 | sym->st_value += VDSO_HIGH_BASE; | ||
83 | } | ||
84 | } | ||
85 | } | ||
86 | |||
87 | static __init void reloc_dyn(Elf32_Ehdr *ehdr, unsigned offset) | ||
88 | { | ||
89 | Elf32_Dyn *dyn = (void *)ehdr + offset; | ||
90 | |||
91 | for(; dyn->d_tag != DT_NULL; dyn++) | ||
92 | switch(dyn->d_tag) { | ||
93 | case DT_PLTGOT: | ||
94 | case DT_HASH: | ||
95 | case DT_STRTAB: | ||
96 | case DT_SYMTAB: | ||
97 | case DT_RELA: | ||
98 | case DT_INIT: | ||
99 | case DT_FINI: | ||
100 | case DT_REL: | ||
101 | case DT_DEBUG: | ||
102 | case DT_JMPREL: | ||
103 | case DT_VERSYM: | ||
104 | case DT_VERDEF: | ||
105 | case DT_VERNEED: | ||
106 | case DT_ADDRRNGLO ... DT_ADDRRNGHI: | ||
107 | /* definitely pointers needing relocation */ | ||
108 | dyn->d_un.d_ptr += VDSO_HIGH_BASE; | ||
109 | break; | ||
110 | |||
111 | case DT_ENCODING ... OLD_DT_LOOS-1: | ||
112 | case DT_LOOS ... DT_HIOS-1: | ||
113 | /* Tags above DT_ENCODING are pointers if | ||
114 | they're even */ | ||
115 | if (dyn->d_tag >= DT_ENCODING && | ||
116 | (dyn->d_tag & 1) == 0) | ||
117 | dyn->d_un.d_ptr += VDSO_HIGH_BASE; | ||
118 | break; | ||
119 | |||
120 | case DT_VERDEFNUM: | ||
121 | case DT_VERNEEDNUM: | ||
122 | case DT_FLAGS_1: | ||
123 | case DT_RELACOUNT: | ||
124 | case DT_RELCOUNT: | ||
125 | case DT_VALRNGLO ... DT_VALRNGHI: | ||
126 | /* definitely not pointers */ | ||
127 | break; | ||
128 | |||
129 | case OLD_DT_LOOS ... DT_LOOS-1: | ||
130 | case DT_HIOS ... DT_VALRNGLO-1: | ||
131 | default: | ||
132 | if (dyn->d_tag > DT_ENCODING) | ||
133 | printk(KERN_INFO "VDSO: unexpected DT_tag %x\n", | ||
134 | dyn->d_tag); | ||
135 | break; | ||
136 | } | ||
137 | } | ||
138 | |||
139 | static __init void relocate_vdso(Elf32_Ehdr *ehdr) | ||
140 | { | ||
141 | Elf32_Phdr *phdr; | ||
142 | Elf32_Shdr *shdr; | ||
143 | int i; | ||
144 | |||
145 | BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 || | ||
146 | !elf_check_arch(ehdr) || | ||
147 | ehdr->e_type != ET_DYN); | ||
148 | |||
149 | ehdr->e_entry += VDSO_HIGH_BASE; | ||
150 | |||
151 | /* rebase phdrs */ | ||
152 | phdr = (void *)ehdr + ehdr->e_phoff; | ||
153 | for (i = 0; i < ehdr->e_phnum; i++) { | ||
154 | phdr[i].p_vaddr += VDSO_HIGH_BASE; | ||
155 | |||
156 | /* relocate dynamic stuff */ | ||
157 | if (phdr[i].p_type == PT_DYNAMIC) | ||
158 | reloc_dyn(ehdr, phdr[i].p_offset); | ||
159 | } | ||
160 | |||
161 | /* rebase sections */ | ||
162 | shdr = (void *)ehdr + ehdr->e_shoff; | ||
163 | for(i = 0; i < ehdr->e_shnum; i++) { | ||
164 | if (!(shdr[i].sh_flags & SHF_ALLOC)) | ||
165 | continue; | ||
166 | |||
167 | shdr[i].sh_addr += VDSO_HIGH_BASE; | ||
168 | |||
169 | if (shdr[i].sh_type == SHT_SYMTAB || | ||
170 | shdr[i].sh_type == SHT_DYNSYM) | ||
171 | reloc_symtab(ehdr, shdr[i].sh_offset, | ||
172 | shdr[i].sh_size); | ||
173 | } | ||
174 | } | ||
175 | |||
49 | void enable_sep_cpu(void) | 176 | void enable_sep_cpu(void) |
50 | { | 177 | { |
51 | int cpu = get_cpu(); | 178 | int cpu = get_cpu(); |
@@ -56,14 +183,33 @@ void enable_sep_cpu(void) | |||
56 | return; | 183 | return; |
57 | } | 184 | } |
58 | 185 | ||
59 | tss->ss1 = __KERNEL_CS; | 186 | tss->x86_tss.ss1 = __KERNEL_CS; |
60 | tss->esp1 = sizeof(struct tss_struct) + (unsigned long) tss; | 187 | tss->x86_tss.esp1 = sizeof(struct tss_struct) + (unsigned long) tss; |
61 | wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); | 188 | wrmsr(MSR_IA32_SYSENTER_CS, __KERNEL_CS, 0); |
62 | wrmsr(MSR_IA32_SYSENTER_ESP, tss->esp1, 0); | 189 | wrmsr(MSR_IA32_SYSENTER_ESP, tss->x86_tss.esp1, 0); |
63 | wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0); | 190 | wrmsr(MSR_IA32_SYSENTER_EIP, (unsigned long) sysenter_entry, 0); |
64 | put_cpu(); | 191 | put_cpu(); |
65 | } | 192 | } |
66 | 193 | ||
194 | static struct vm_area_struct gate_vma; | ||
195 | |||
196 | static int __init gate_vma_init(void) | ||
197 | { | ||
198 | gate_vma.vm_mm = NULL; | ||
199 | gate_vma.vm_start = FIXADDR_USER_START; | ||
200 | gate_vma.vm_end = FIXADDR_USER_END; | ||
201 | gate_vma.vm_flags = VM_READ | VM_MAYREAD | VM_EXEC | VM_MAYEXEC; | ||
202 | gate_vma.vm_page_prot = __P101; | ||
203 | /* | ||
204 | * Make sure the vDSO gets into every core dump. | ||
205 | * Dumping its contents makes post-mortem fully interpretable later | ||
206 | * without matching up the same kernel and hardware config to see | ||
207 | * what PC values meant. | ||
208 | */ | ||
209 | gate_vma.vm_flags |= VM_ALWAYSDUMP; | ||
210 | return 0; | ||
211 | } | ||
212 | |||
67 | /* | 213 | /* |
68 | * These symbols are defined by vsyscall.o to mark the bounds | 214 | * These symbols are defined by vsyscall.o to mark the bounds |
69 | * of the ELF DSO images included therein. | 215 | * of the ELF DSO images included therein. |
@@ -72,31 +218,48 @@ extern const char vsyscall_int80_start, vsyscall_int80_end; | |||
72 | extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; | 218 | extern const char vsyscall_sysenter_start, vsyscall_sysenter_end; |
73 | static struct page *syscall_pages[1]; | 219 | static struct page *syscall_pages[1]; |
74 | 220 | ||
221 | static void map_compat_vdso(int map) | ||
222 | { | ||
223 | static int vdso_mapped; | ||
224 | |||
225 | if (map == vdso_mapped) | ||
226 | return; | ||
227 | |||
228 | vdso_mapped = map; | ||
229 | |||
230 | __set_fixmap(FIX_VDSO, page_to_pfn(syscall_pages[0]) << PAGE_SHIFT, | ||
231 | map ? PAGE_READONLY_EXEC : PAGE_NONE); | ||
232 | |||
233 | /* flush stray tlbs */ | ||
234 | flush_tlb_all(); | ||
235 | } | ||
236 | |||
75 | int __init sysenter_setup(void) | 237 | int __init sysenter_setup(void) |
76 | { | 238 | { |
77 | void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); | 239 | void *syscall_page = (void *)get_zeroed_page(GFP_ATOMIC); |
240 | const void *vsyscall; | ||
241 | size_t vsyscall_len; | ||
242 | |||
78 | syscall_pages[0] = virt_to_page(syscall_page); | 243 | syscall_pages[0] = virt_to_page(syscall_page); |
79 | 244 | ||
80 | #ifdef CONFIG_COMPAT_VDSO | 245 | gate_vma_init(); |
81 | __set_fixmap(FIX_VDSO, __pa(syscall_page), PAGE_READONLY_EXEC); | 246 | |
82 | printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); | 247 | printk("Compat vDSO mapped to %08lx.\n", __fix_to_virt(FIX_VDSO)); |
83 | #endif | ||
84 | 248 | ||
85 | if (!boot_cpu_has(X86_FEATURE_SEP)) { | 249 | if (!boot_cpu_has(X86_FEATURE_SEP)) { |
86 | memcpy(syscall_page, | 250 | vsyscall = &vsyscall_int80_start; |
87 | &vsyscall_int80_start, | 251 | vsyscall_len = &vsyscall_int80_end - &vsyscall_int80_start; |
88 | &vsyscall_int80_end - &vsyscall_int80_start); | 252 | } else { |
89 | return 0; | 253 | vsyscall = &vsyscall_sysenter_start; |
254 | vsyscall_len = &vsyscall_sysenter_end - &vsyscall_sysenter_start; | ||
90 | } | 255 | } |
91 | 256 | ||
92 | memcpy(syscall_page, | 257 | memcpy(syscall_page, vsyscall, vsyscall_len); |
93 | &vsyscall_sysenter_start, | 258 | relocate_vdso(syscall_page); |
94 | &vsyscall_sysenter_end - &vsyscall_sysenter_start); | ||
95 | 259 | ||
96 | return 0; | 260 | return 0; |
97 | } | 261 | } |
98 | 262 | ||
99 | #ifndef CONFIG_COMPAT_VDSO | ||
100 | /* Defined in vsyscall-sysenter.S */ | 263 | /* Defined in vsyscall-sysenter.S */ |
101 | extern void SYSENTER_RETURN; | 264 | extern void SYSENTER_RETURN; |
102 | 265 | ||
@@ -105,36 +268,52 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int exstack) | |||
105 | { | 268 | { |
106 | struct mm_struct *mm = current->mm; | 269 | struct mm_struct *mm = current->mm; |
107 | unsigned long addr; | 270 | unsigned long addr; |
108 | int ret; | 271 | int ret = 0; |
272 | bool compat; | ||
109 | 273 | ||
110 | down_write(&mm->mmap_sem); | 274 | down_write(&mm->mmap_sem); |
111 | addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); | ||
112 | if (IS_ERR_VALUE(addr)) { | ||
113 | ret = addr; | ||
114 | goto up_fail; | ||
115 | } | ||
116 | 275 | ||
117 | /* | 276 | /* Test compat mode once here, in case someone |
118 | * MAYWRITE to allow gdb to COW and set breakpoints | 277 | changes it via sysctl */ |
119 | * | 278 | compat = (vdso_enabled == VDSO_COMPAT); |
120 | * Make sure the vDSO gets into every core dump. | 279 | |
121 | * Dumping its contents makes post-mortem fully interpretable later | 280 | map_compat_vdso(compat); |
122 | * without matching up the same kernel and hardware config to see | 281 | |
123 | * what PC values meant. | 282 | if (compat) |
124 | */ | 283 | addr = VDSO_HIGH_BASE; |
125 | ret = install_special_mapping(mm, addr, PAGE_SIZE, | 284 | else { |
126 | VM_READ|VM_EXEC| | 285 | addr = get_unmapped_area(NULL, 0, PAGE_SIZE, 0, 0); |
127 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| | 286 | if (IS_ERR_VALUE(addr)) { |
128 | VM_ALWAYSDUMP, | 287 | ret = addr; |
129 | syscall_pages); | 288 | goto up_fail; |
130 | if (ret) | 289 | } |
131 | goto up_fail; | 290 | |
291 | /* | ||
292 | * MAYWRITE to allow gdb to COW and set breakpoints | ||
293 | * | ||
294 | * Make sure the vDSO gets into every core dump. | ||
295 | * Dumping its contents makes post-mortem fully | ||
296 | * interpretable later without matching up the same | ||
297 | * kernel and hardware config to see what PC values | ||
298 | * meant. | ||
299 | */ | ||
300 | ret = install_special_mapping(mm, addr, PAGE_SIZE, | ||
301 | VM_READ|VM_EXEC| | ||
302 | VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC| | ||
303 | VM_ALWAYSDUMP, | ||
304 | syscall_pages); | ||
305 | |||
306 | if (ret) | ||
307 | goto up_fail; | ||
308 | } | ||
132 | 309 | ||
133 | current->mm->context.vdso = (void *)addr; | 310 | current->mm->context.vdso = (void *)addr; |
134 | current_thread_info()->sysenter_return = | 311 | current_thread_info()->sysenter_return = |
135 | (void *)VDSO_SYM(&SYSENTER_RETURN); | 312 | (void *)VDSO_SYM(&SYSENTER_RETURN); |
136 | up_fail: | 313 | |
314 | up_fail: | ||
137 | up_write(&mm->mmap_sem); | 315 | up_write(&mm->mmap_sem); |
316 | |||
138 | return ret; | 317 | return ret; |
139 | } | 318 | } |
140 | 319 | ||
@@ -147,6 +326,11 @@ const char *arch_vma_name(struct vm_area_struct *vma) | |||
147 | 326 | ||
148 | struct vm_area_struct *get_gate_vma(struct task_struct *tsk) | 327 | struct vm_area_struct *get_gate_vma(struct task_struct *tsk) |
149 | { | 328 | { |
329 | struct mm_struct *mm = tsk->mm; | ||
330 | |||
331 | /* Check to see if this task was created in compat vdso mode */ | ||
332 | if (mm && mm->context.vdso == (void *)VDSO_HIGH_BASE) | ||
333 | return &gate_vma; | ||
150 | return NULL; | 334 | return NULL; |
151 | } | 335 | } |
152 | 336 | ||
@@ -159,4 +343,3 @@ int in_gate_area_no_task(unsigned long addr) | |||
159 | { | 343 | { |
160 | return 0; | 344 | return 0; |
161 | } | 345 | } |
162 | #endif | ||
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 94e5cb091104..a665df61f08c 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c | |||
@@ -70,8 +70,6 @@ | |||
70 | 70 | ||
71 | #include <asm/i8259.h> | 71 | #include <asm/i8259.h> |
72 | 72 | ||
73 | int pit_latch_buggy; /* extern */ | ||
74 | |||
75 | #include "do_timer.h" | 73 | #include "do_timer.h" |
76 | 74 | ||
77 | unsigned int cpu_khz; /* Detected as we calibrate the TSC */ | 75 | unsigned int cpu_khz; /* Detected as we calibrate the TSC */ |
diff --git a/arch/i386/kernel/trampoline.S b/arch/i386/kernel/trampoline.S index 2f1814c5cfd7..f62815f8d06a 100644 --- a/arch/i386/kernel/trampoline.S +++ b/arch/i386/kernel/trampoline.S | |||
@@ -29,7 +29,7 @@ | |||
29 | * | 29 | * |
30 | * TYPE VALUE | 30 | * TYPE VALUE |
31 | * R_386_32 startup_32_smp | 31 | * R_386_32 startup_32_smp |
32 | * R_386_32 boot_gdt_table | 32 | * R_386_32 boot_gdt |
33 | */ | 33 | */ |
34 | 34 | ||
35 | #include <linux/linkage.h> | 35 | #include <linux/linkage.h> |
@@ -62,8 +62,8 @@ r_base = . | |||
62 | * to 32 bit. | 62 | * to 32 bit. |
63 | */ | 63 | */ |
64 | 64 | ||
65 | lidtl boot_idt - r_base # load idt with 0, 0 | 65 | lidtl boot_idt_descr - r_base # load idt with 0, 0 |
66 | lgdtl boot_gdt - r_base # load gdt with whatever is appropriate | 66 | lgdtl boot_gdt_descr - r_base # load gdt with whatever is appropriate |
67 | 67 | ||
68 | xor %ax, %ax | 68 | xor %ax, %ax |
69 | inc %ax # protected mode (PE) bit | 69 | inc %ax # protected mode (PE) bit |
@@ -73,11 +73,11 @@ r_base = . | |||
73 | 73 | ||
74 | # These need to be in the same 64K segment as the above; | 74 | # These need to be in the same 64K segment as the above; |
75 | # hence we don't use the boot_gdt_descr defined in head.S | 75 | # hence we don't use the boot_gdt_descr defined in head.S |
76 | boot_gdt: | 76 | boot_gdt_descr: |
77 | .word __BOOT_DS + 7 # gdt limit | 77 | .word __BOOT_DS + 7 # gdt limit |
78 | .long boot_gdt_table-__PAGE_OFFSET # gdt base | 78 | .long boot_gdt - __PAGE_OFFSET # gdt base |
79 | 79 | ||
80 | boot_idt: | 80 | boot_idt_descr: |
81 | .word 0 # idt limit = 0 | 81 | .word 0 # idt limit = 0 |
82 | .long 0 # idt base = 0L | 82 | .long 0 # idt base = 0L |
83 | 83 | ||
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index af0d3f70a817..f21b41e7770c 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c | |||
@@ -476,8 +476,6 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86, | |||
476 | siginfo_t *info) | 476 | siginfo_t *info) |
477 | { | 477 | { |
478 | struct task_struct *tsk = current; | 478 | struct task_struct *tsk = current; |
479 | tsk->thread.error_code = error_code; | ||
480 | tsk->thread.trap_no = trapnr; | ||
481 | 479 | ||
482 | if (regs->eflags & VM_MASK) { | 480 | if (regs->eflags & VM_MASK) { |
483 | if (vm86) | 481 | if (vm86) |
@@ -489,6 +487,18 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86, | |||
489 | goto kernel_trap; | 487 | goto kernel_trap; |
490 | 488 | ||
491 | trap_signal: { | 489 | trap_signal: { |
490 | /* | ||
491 | * We want error_code and trap_no set for userspace faults and | ||
492 | * kernelspace faults which result in die(), but not | ||
493 | * kernelspace faults which are fixed up. die() gives the | ||
494 | * process no chance to handle the signal and notice the | ||
495 | * kernel fault information, so that won't result in polluting | ||
496 | * the information about previously queued, but not yet | ||
497 | * delivered, faults. See also do_general_protection below. | ||
498 | */ | ||
499 | tsk->thread.error_code = error_code; | ||
500 | tsk->thread.trap_no = trapnr; | ||
501 | |||
492 | if (info) | 502 | if (info) |
493 | force_sig_info(signr, info, tsk); | 503 | force_sig_info(signr, info, tsk); |
494 | else | 504 | else |
@@ -497,8 +507,11 @@ static void __kprobes do_trap(int trapnr, int signr, char *str, int vm86, | |||
497 | } | 507 | } |
498 | 508 | ||
499 | kernel_trap: { | 509 | kernel_trap: { |
500 | if (!fixup_exception(regs)) | 510 | if (!fixup_exception(regs)) { |
511 | tsk->thread.error_code = error_code; | ||
512 | tsk->thread.trap_no = trapnr; | ||
501 | die(str, regs, error_code); | 513 | die(str, regs, error_code); |
514 | } | ||
502 | return; | 515 | return; |
503 | } | 516 | } |
504 | 517 | ||
@@ -583,7 +596,7 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs, | |||
583 | * and we set the offset field correctly. Then we let the CPU to | 596 | * and we set the offset field correctly. Then we let the CPU to |
584 | * restart the faulting instruction. | 597 | * restart the faulting instruction. |
585 | */ | 598 | */ |
586 | if (tss->io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && | 599 | if (tss->x86_tss.io_bitmap_base == INVALID_IO_BITMAP_OFFSET_LAZY && |
587 | thread->io_bitmap_ptr) { | 600 | thread->io_bitmap_ptr) { |
588 | memcpy(tss->io_bitmap, thread->io_bitmap_ptr, | 601 | memcpy(tss->io_bitmap, thread->io_bitmap_ptr, |
589 | thread->io_bitmap_max); | 602 | thread->io_bitmap_max); |
@@ -596,16 +609,13 @@ fastcall void __kprobes do_general_protection(struct pt_regs * regs, | |||
596 | thread->io_bitmap_max, 0xff, | 609 | thread->io_bitmap_max, 0xff, |
597 | tss->io_bitmap_max - thread->io_bitmap_max); | 610 | tss->io_bitmap_max - thread->io_bitmap_max); |
598 | tss->io_bitmap_max = thread->io_bitmap_max; | 611 | tss->io_bitmap_max = thread->io_bitmap_max; |
599 | tss->io_bitmap_base = IO_BITMAP_OFFSET; | 612 | tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET; |
600 | tss->io_bitmap_owner = thread; | 613 | tss->io_bitmap_owner = thread; |
601 | put_cpu(); | 614 | put_cpu(); |
602 | return; | 615 | return; |
603 | } | 616 | } |
604 | put_cpu(); | 617 | put_cpu(); |
605 | 618 | ||
606 | current->thread.error_code = error_code; | ||
607 | current->thread.trap_no = 13; | ||
608 | |||
609 | if (regs->eflags & VM_MASK) | 619 | if (regs->eflags & VM_MASK) |
610 | goto gp_in_vm86; | 620 | goto gp_in_vm86; |
611 | 621 | ||
@@ -624,6 +634,8 @@ gp_in_vm86: | |||
624 | 634 | ||
625 | gp_in_kernel: | 635 | gp_in_kernel: |
626 | if (!fixup_exception(regs)) { | 636 | if (!fixup_exception(regs)) { |
637 | current->thread.error_code = error_code; | ||
638 | current->thread.trap_no = 13; | ||
627 | if (notify_die(DIE_GPF, "general protection fault", regs, | 639 | if (notify_die(DIE_GPF, "general protection fault", regs, |
628 | error_code, 13, SIGSEGV) == NOTIFY_STOP) | 640 | error_code, 13, SIGSEGV) == NOTIFY_STOP) |
629 | return; | 641 | return; |
@@ -1018,9 +1030,7 @@ fastcall void do_spurious_interrupt_bug(struct pt_regs * regs, | |||
1018 | fastcall unsigned long patch_espfix_desc(unsigned long uesp, | 1030 | fastcall unsigned long patch_espfix_desc(unsigned long uesp, |
1019 | unsigned long kesp) | 1031 | unsigned long kesp) |
1020 | { | 1032 | { |
1021 | int cpu = smp_processor_id(); | 1033 | struct desc_struct *gdt = __get_cpu_var(gdt_page).gdt; |
1022 | struct Xgt_desc_struct *cpu_gdt_descr = &per_cpu(cpu_gdt_descr, cpu); | ||
1023 | struct desc_struct *gdt = (struct desc_struct *)cpu_gdt_descr->address; | ||
1024 | unsigned long base = (kesp - uesp) & -THREAD_SIZE; | 1034 | unsigned long base = (kesp - uesp) & -THREAD_SIZE; |
1025 | unsigned long new_kesp = kesp - base; | 1035 | unsigned long new_kesp = kesp - base; |
1026 | unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; | 1036 | unsigned long lim_pages = (new_kesp | (THREAD_SIZE - 1)) >> PAGE_SHIFT; |
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index 6cb8f5336732..f64b81f3033b 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c | |||
@@ -200,13 +200,10 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) | |||
200 | { | 200 | { |
201 | struct cpufreq_freqs *freq = data; | 201 | struct cpufreq_freqs *freq = data; |
202 | 202 | ||
203 | if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE) | ||
204 | write_seqlock_irq(&xtime_lock); | ||
205 | |||
206 | if (!ref_freq) { | 203 | if (!ref_freq) { |
207 | if (!freq->old){ | 204 | if (!freq->old){ |
208 | ref_freq = freq->new; | 205 | ref_freq = freq->new; |
209 | goto end; | 206 | return 0; |
210 | } | 207 | } |
211 | ref_freq = freq->old; | 208 | ref_freq = freq->old; |
212 | loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy; | 209 | loops_per_jiffy_ref = cpu_data[freq->cpu].loops_per_jiffy; |
@@ -233,13 +230,10 @@ time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) | |||
233 | * TSC based sched_clock turns | 230 | * TSC based sched_clock turns |
234 | * to junk w/ cpufreq | 231 | * to junk w/ cpufreq |
235 | */ | 232 | */ |
236 | mark_tsc_unstable(); | 233 | mark_tsc_unstable("cpufreq changes"); |
237 | } | 234 | } |
238 | } | 235 | } |
239 | } | 236 | } |
240 | end: | ||
241 | if (val != CPUFREQ_RESUMECHANGE && val != CPUFREQ_SUSPENDCHANGE) | ||
242 | write_sequnlock_irq(&xtime_lock); | ||
243 | 237 | ||
244 | return 0; | 238 | return 0; |
245 | } | 239 | } |
@@ -281,11 +275,12 @@ static struct clocksource clocksource_tsc = { | |||
281 | CLOCK_SOURCE_MUST_VERIFY, | 275 | CLOCK_SOURCE_MUST_VERIFY, |
282 | }; | 276 | }; |
283 | 277 | ||
284 | void mark_tsc_unstable(void) | 278 | void mark_tsc_unstable(char *reason) |
285 | { | 279 | { |
286 | if (!tsc_unstable) { | 280 | if (!tsc_unstable) { |
287 | tsc_unstable = 1; | 281 | tsc_unstable = 1; |
288 | tsc_enabled = 0; | 282 | tsc_enabled = 0; |
283 | printk("Marking TSC unstable due to: %s.\n", reason); | ||
289 | /* Can be called before registration */ | 284 | /* Can be called before registration */ |
290 | if (clocksource_tsc.mult) | 285 | if (clocksource_tsc.mult) |
291 | clocksource_change_rating(&clocksource_tsc, 0); | 286 | clocksource_change_rating(&clocksource_tsc, 0); |
diff --git a/arch/i386/kernel/verify_cpu.S b/arch/i386/kernel/verify_cpu.S new file mode 100644 index 000000000000..e51a8695d54e --- /dev/null +++ b/arch/i386/kernel/verify_cpu.S | |||
@@ -0,0 +1,65 @@ | |||
1 | /* Check if CPU has some minimum CPUID bits | ||
2 | This runs in 16bit mode so that the caller can still use the BIOS | ||
3 | to output errors on the screen */ | ||
4 | #include <asm/cpufeature.h> | ||
5 | |||
6 | verify_cpu: | ||
7 | pushfl # Save caller passed flags | ||
8 | pushl $0 # Kill any dangerous flags | ||
9 | popfl | ||
10 | |||
11 | #if CONFIG_X86_MINIMUM_CPU_MODEL >= 4 | ||
12 | pushfl | ||
13 | orl $(1<<18),(%esp) # try setting AC | ||
14 | popfl | ||
15 | pushfl | ||
16 | popl %eax | ||
17 | testl $(1<<18),%eax | ||
18 | jz bad | ||
19 | #endif | ||
20 | #if REQUIRED_MASK1 != 0 | ||
21 | pushfl # standard way to check for cpuid | ||
22 | popl %eax | ||
23 | movl %eax,%ebx | ||
24 | xorl $0x200000,%eax | ||
25 | pushl %eax | ||
26 | popfl | ||
27 | pushfl | ||
28 | popl %eax | ||
29 | cmpl %eax,%ebx | ||
30 | pushfl # standard way to check for cpuid | ||
31 | popl %eax | ||
32 | movl %eax,%ebx | ||
33 | xorl $0x200000,%eax | ||
34 | pushl %eax | ||
35 | popfl | ||
36 | pushfl | ||
37 | popl %eax | ||
38 | cmpl %eax,%ebx | ||
39 | jz bad # REQUIRED_MASK1 != 0 requires CPUID | ||
40 | |||
41 | movl $0x0,%eax # See if cpuid 1 is implemented | ||
42 | cpuid | ||
43 | cmpl $0x1,%eax | ||
44 | jb bad # no cpuid 1 | ||
45 | |||
46 | movl $0x1,%eax # Does the cpu have what it takes | ||
47 | cpuid | ||
48 | |||
49 | #if CONFIG_X86_MINIMUM_CPU_MODEL > 4 | ||
50 | #error add proper model checking here | ||
51 | #endif | ||
52 | |||
53 | andl $REQUIRED_MASK1,%edx | ||
54 | xorl $REQUIRED_MASK1,%edx | ||
55 | jnz bad | ||
56 | #endif /* REQUIRED_MASK1 */ | ||
57 | |||
58 | popfl | ||
59 | xor %eax,%eax | ||
60 | ret | ||
61 | |||
62 | bad: | ||
63 | popfl | ||
64 | movl $1,%eax | ||
65 | ret | ||
diff --git a/arch/i386/kernel/vmi.c b/arch/i386/kernel/vmi.c index 697a70e8c0c9..c8726c424b35 100644 --- a/arch/i386/kernel/vmi.c +++ b/arch/i386/kernel/vmi.c | |||
@@ -26,6 +26,7 @@ | |||
26 | #include <linux/cpu.h> | 26 | #include <linux/cpu.h> |
27 | #include <linux/bootmem.h> | 27 | #include <linux/bootmem.h> |
28 | #include <linux/mm.h> | 28 | #include <linux/mm.h> |
29 | #include <linux/highmem.h> | ||
29 | #include <asm/vmi.h> | 30 | #include <asm/vmi.h> |
30 | #include <asm/io.h> | 31 | #include <asm/io.h> |
31 | #include <asm/fixmap.h> | 32 | #include <asm/fixmap.h> |
@@ -56,7 +57,7 @@ static int disable_noidle; | |||
56 | static int disable_vmi_timer; | 57 | static int disable_vmi_timer; |
57 | 58 | ||
58 | /* Cached VMI operations */ | 59 | /* Cached VMI operations */ |
59 | struct { | 60 | static struct { |
60 | void (*cpuid)(void /* non-c */); | 61 | void (*cpuid)(void /* non-c */); |
61 | void (*_set_ldt)(u32 selector); | 62 | void (*_set_ldt)(u32 selector); |
62 | void (*set_tr)(u32 selector); | 63 | void (*set_tr)(u32 selector); |
@@ -65,16 +66,15 @@ struct { | |||
65 | void (*release_page)(u32, u32); | 66 | void (*release_page)(u32, u32); |
66 | void (*set_pte)(pte_t, pte_t *, unsigned); | 67 | void (*set_pte)(pte_t, pte_t *, unsigned); |
67 | void (*update_pte)(pte_t *, unsigned); | 68 | void (*update_pte)(pte_t *, unsigned); |
68 | void (*set_linear_mapping)(int, u32, u32, u32); | 69 | void (*set_linear_mapping)(int, void *, u32, u32); |
69 | void (*flush_tlb)(int); | 70 | void (*_flush_tlb)(int); |
70 | void (*set_initial_ap_state)(int, int); | 71 | void (*set_initial_ap_state)(int, int); |
71 | void (*halt)(void); | 72 | void (*halt)(void); |
72 | void (*set_lazy_mode)(int mode); | 73 | void (*set_lazy_mode)(int mode); |
73 | } vmi_ops; | 74 | } vmi_ops; |
74 | 75 | ||
75 | /* XXX move this to alternative.h */ | 76 | /* Cached VMI operations */ |
76 | extern struct paravirt_patch __start_parainstructions[], | 77 | struct vmi_timer_ops vmi_timer_ops; |
77 | __stop_parainstructions[]; | ||
78 | 78 | ||
79 | /* | 79 | /* |
80 | * VMI patching routines. | 80 | * VMI patching routines. |
@@ -83,11 +83,6 @@ extern struct paravirt_patch __start_parainstructions[], | |||
83 | #define MNEM_JMP 0xe9 | 83 | #define MNEM_JMP 0xe9 |
84 | #define MNEM_RET 0xc3 | 84 | #define MNEM_RET 0xc3 |
85 | 85 | ||
86 | static char irq_save_disable_callout[] = { | ||
87 | MNEM_CALL, 0, 0, 0, 0, | ||
88 | MNEM_CALL, 0, 0, 0, 0, | ||
89 | MNEM_RET | ||
90 | }; | ||
91 | #define IRQ_PATCH_INT_MASK 0 | 86 | #define IRQ_PATCH_INT_MASK 0 |
92 | #define IRQ_PATCH_DISABLE 5 | 87 | #define IRQ_PATCH_DISABLE 5 |
93 | 88 | ||
@@ -135,33 +130,17 @@ static unsigned patch_internal(int call, unsigned len, void *insns) | |||
135 | static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, unsigned len) | 130 | static unsigned vmi_patch(u8 type, u16 clobbers, void *insns, unsigned len) |
136 | { | 131 | { |
137 | switch (type) { | 132 | switch (type) { |
138 | case PARAVIRT_IRQ_DISABLE: | 133 | case PARAVIRT_PATCH(irq_disable): |
139 | return patch_internal(VMI_CALL_DisableInterrupts, len, insns); | 134 | return patch_internal(VMI_CALL_DisableInterrupts, len, insns); |
140 | case PARAVIRT_IRQ_ENABLE: | 135 | case PARAVIRT_PATCH(irq_enable): |
141 | return patch_internal(VMI_CALL_EnableInterrupts, len, insns); | 136 | return patch_internal(VMI_CALL_EnableInterrupts, len, insns); |
142 | case PARAVIRT_RESTORE_FLAGS: | 137 | case PARAVIRT_PATCH(restore_fl): |
143 | return patch_internal(VMI_CALL_SetInterruptMask, len, insns); | 138 | return patch_internal(VMI_CALL_SetInterruptMask, len, insns); |
144 | case PARAVIRT_SAVE_FLAGS: | 139 | case PARAVIRT_PATCH(save_fl): |
145 | return patch_internal(VMI_CALL_GetInterruptMask, len, insns); | 140 | return patch_internal(VMI_CALL_GetInterruptMask, len, insns); |
146 | case PARAVIRT_SAVE_FLAGS_IRQ_DISABLE: | 141 | case PARAVIRT_PATCH(iret): |
147 | if (len >= 10) { | ||
148 | patch_internal(VMI_CALL_GetInterruptMask, len, insns); | ||
149 | patch_internal(VMI_CALL_DisableInterrupts, len-5, insns+5); | ||
150 | return 10; | ||
151 | } else { | ||
152 | /* | ||
153 | * You bastards didn't leave enough room to | ||
154 | * patch save_flags_irq_disable inline. Patch | ||
155 | * to a helper | ||
156 | */ | ||
157 | BUG_ON(len < 5); | ||
158 | *(char *)insns = MNEM_CALL; | ||
159 | patch_offset(insns, irq_save_disable_callout); | ||
160 | return 5; | ||
161 | } | ||
162 | case PARAVIRT_INTERRUPT_RETURN: | ||
163 | return patch_internal(VMI_CALL_IRET, len, insns); | 142 | return patch_internal(VMI_CALL_IRET, len, insns); |
164 | case PARAVIRT_STI_SYSEXIT: | 143 | case PARAVIRT_PATCH(irq_enable_sysexit): |
165 | return patch_internal(VMI_CALL_SYSEXIT, len, insns); | 144 | return patch_internal(VMI_CALL_SYSEXIT, len, insns); |
166 | default: | 145 | default: |
167 | break; | 146 | break; |
@@ -230,24 +209,24 @@ static void vmi_set_tr(void) | |||
230 | static void vmi_load_esp0(struct tss_struct *tss, | 209 | static void vmi_load_esp0(struct tss_struct *tss, |
231 | struct thread_struct *thread) | 210 | struct thread_struct *thread) |
232 | { | 211 | { |
233 | tss->esp0 = thread->esp0; | 212 | tss->x86_tss.esp0 = thread->esp0; |
234 | 213 | ||
235 | /* This can only happen when SEP is enabled, no need to test "SEP"arately */ | 214 | /* This can only happen when SEP is enabled, no need to test "SEP"arately */ |
236 | if (unlikely(tss->ss1 != thread->sysenter_cs)) { | 215 | if (unlikely(tss->x86_tss.ss1 != thread->sysenter_cs)) { |
237 | tss->ss1 = thread->sysenter_cs; | 216 | tss->x86_tss.ss1 = thread->sysenter_cs; |
238 | wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); | 217 | wrmsr(MSR_IA32_SYSENTER_CS, thread->sysenter_cs, 0); |
239 | } | 218 | } |
240 | vmi_ops.set_kernel_stack(__KERNEL_DS, tss->esp0); | 219 | vmi_ops.set_kernel_stack(__KERNEL_DS, tss->x86_tss.esp0); |
241 | } | 220 | } |
242 | 221 | ||
243 | static void vmi_flush_tlb_user(void) | 222 | static void vmi_flush_tlb_user(void) |
244 | { | 223 | { |
245 | vmi_ops.flush_tlb(VMI_FLUSH_TLB); | 224 | vmi_ops._flush_tlb(VMI_FLUSH_TLB); |
246 | } | 225 | } |
247 | 226 | ||
248 | static void vmi_flush_tlb_kernel(void) | 227 | static void vmi_flush_tlb_kernel(void) |
249 | { | 228 | { |
250 | vmi_ops.flush_tlb(VMI_FLUSH_TLB | VMI_FLUSH_GLOBAL); | 229 | vmi_ops._flush_tlb(VMI_FLUSH_TLB | VMI_FLUSH_GLOBAL); |
251 | } | 230 | } |
252 | 231 | ||
253 | /* Stub to do nothing at all; used for delays and unimplemented calls */ | 232 | /* Stub to do nothing at all; used for delays and unimplemented calls */ |
@@ -255,18 +234,6 @@ static void vmi_nop(void) | |||
255 | { | 234 | { |
256 | } | 235 | } |
257 | 236 | ||
258 | /* For NO_IDLE_HZ, we stop the clock when halting the kernel */ | ||
259 | static fastcall void vmi_safe_halt(void) | ||
260 | { | ||
261 | int idle = vmi_stop_hz_timer(); | ||
262 | vmi_ops.halt(); | ||
263 | if (idle) { | ||
264 | local_irq_disable(); | ||
265 | vmi_account_time_restart_hz_timer(); | ||
266 | local_irq_enable(); | ||
267 | } | ||
268 | } | ||
269 | |||
270 | #ifdef CONFIG_DEBUG_PAGE_TYPE | 237 | #ifdef CONFIG_DEBUG_PAGE_TYPE |
271 | 238 | ||
272 | #ifdef CONFIG_X86_PAE | 239 | #ifdef CONFIG_X86_PAE |
@@ -370,8 +337,11 @@ static void vmi_check_page_type(u32 pfn, int type) | |||
370 | #define vmi_check_page_type(p,t) do { } while (0) | 337 | #define vmi_check_page_type(p,t) do { } while (0) |
371 | #endif | 338 | #endif |
372 | 339 | ||
373 | static void vmi_map_pt_hook(int type, pte_t *va, u32 pfn) | 340 | #ifdef CONFIG_HIGHPTE |
341 | static void *vmi_kmap_atomic_pte(struct page *page, enum km_type type) | ||
374 | { | 342 | { |
343 | void *va = kmap_atomic(page, type); | ||
344 | |||
375 | /* | 345 | /* |
376 | * Internally, the VMI ROM must map virtual addresses to physical | 346 | * Internally, the VMI ROM must map virtual addresses to physical |
377 | * addresses for processing MMU updates. By the time MMU updates | 347 | * addresses for processing MMU updates. By the time MMU updates |
@@ -385,8 +355,11 @@ static void vmi_map_pt_hook(int type, pte_t *va, u32 pfn) | |||
385 | * args: SLOT VA COUNT PFN | 355 | * args: SLOT VA COUNT PFN |
386 | */ | 356 | */ |
387 | BUG_ON(type != KM_PTE0 && type != KM_PTE1); | 357 | BUG_ON(type != KM_PTE0 && type != KM_PTE1); |
388 | vmi_ops.set_linear_mapping((type - KM_PTE0)+1, (u32)va, 1, pfn); | 358 | vmi_ops.set_linear_mapping((type - KM_PTE0)+1, va, 1, page_to_pfn(page)); |
359 | |||
360 | return va; | ||
389 | } | 361 | } |
362 | #endif | ||
390 | 363 | ||
391 | static void vmi_allocate_pt(u32 pfn) | 364 | static void vmi_allocate_pt(u32 pfn) |
392 | { | 365 | { |
@@ -443,13 +416,13 @@ static void vmi_release_pd(u32 pfn) | |||
443 | ((level) | (is_current_as(mm, user) ? \ | 416 | ((level) | (is_current_as(mm, user) ? \ |
444 | (VMI_PAGE_DEFER | VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0)) | 417 | (VMI_PAGE_DEFER | VMI_PAGE_CURRENT_AS | ((addr) & VMI_PAGE_VA_MASK)) : 0)) |
445 | 418 | ||
446 | static void vmi_update_pte(struct mm_struct *mm, u32 addr, pte_t *ptep) | 419 | static void vmi_update_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
447 | { | 420 | { |
448 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | 421 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); |
449 | vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | 422 | vmi_ops.update_pte(ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); |
450 | } | 423 | } |
451 | 424 | ||
452 | static void vmi_update_pte_defer(struct mm_struct *mm, u32 addr, pte_t *ptep) | 425 | static void vmi_update_pte_defer(struct mm_struct *mm, unsigned long addr, pte_t *ptep) |
453 | { | 426 | { |
454 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | 427 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); |
455 | vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0)); | 428 | vmi_ops.update_pte(ptep, vmi_flags_addr_defer(mm, addr, VMI_PAGE_PT, 0)); |
@@ -462,7 +435,7 @@ static void vmi_set_pte(pte_t *ptep, pte_t pte) | |||
462 | vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT); | 435 | vmi_ops.set_pte(pte, ptep, VMI_PAGE_PT); |
463 | } | 436 | } |
464 | 437 | ||
465 | static void vmi_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte) | 438 | static void vmi_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte) |
466 | { | 439 | { |
467 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); | 440 | vmi_check_page_type(__pa(ptep) >> PAGE_SHIFT, VMI_PAGE_PTE); |
468 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | 441 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); |
@@ -516,7 +489,7 @@ static void vmi_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep) | |||
516 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); | 489 | vmi_ops.set_pte(pte, ptep, vmi_flags_addr(mm, addr, VMI_PAGE_PT, 0)); |
517 | } | 490 | } |
518 | 491 | ||
519 | void vmi_pmd_clear(pmd_t *pmd) | 492 | static void vmi_pmd_clear(pmd_t *pmd) |
520 | { | 493 | { |
521 | const pte_t pte = { 0 }; | 494 | const pte_t pte = { 0 }; |
522 | vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD); | 495 | vmi_check_page_type(__pa(pmd) >> PAGE_SHIFT, VMI_PAGE_PMD); |
@@ -525,8 +498,6 @@ void vmi_pmd_clear(pmd_t *pmd) | |||
525 | #endif | 498 | #endif |
526 | 499 | ||
527 | #ifdef CONFIG_SMP | 500 | #ifdef CONFIG_SMP |
528 | extern void setup_pda(void); | ||
529 | |||
530 | static void __devinit | 501 | static void __devinit |
531 | vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, | 502 | vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, |
532 | unsigned long start_esp) | 503 | unsigned long start_esp) |
@@ -551,13 +522,11 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, | |||
551 | 522 | ||
552 | ap.ds = __USER_DS; | 523 | ap.ds = __USER_DS; |
553 | ap.es = __USER_DS; | 524 | ap.es = __USER_DS; |
554 | ap.fs = __KERNEL_PDA; | 525 | ap.fs = __KERNEL_PERCPU; |
555 | ap.gs = 0; | 526 | ap.gs = 0; |
556 | 527 | ||
557 | ap.eflags = 0; | 528 | ap.eflags = 0; |
558 | 529 | ||
559 | setup_pda(); | ||
560 | |||
561 | #ifdef CONFIG_X86_PAE | 530 | #ifdef CONFIG_X86_PAE |
562 | /* efer should match BSP efer. */ | 531 | /* efer should match BSP efer. */ |
563 | if (cpu_has_nx) { | 532 | if (cpu_has_nx) { |
@@ -575,9 +544,9 @@ vmi_startup_ipi_hook(int phys_apicid, unsigned long start_eip, | |||
575 | } | 544 | } |
576 | #endif | 545 | #endif |
577 | 546 | ||
578 | static void vmi_set_lazy_mode(int mode) | 547 | static void vmi_set_lazy_mode(enum paravirt_lazy_mode mode) |
579 | { | 548 | { |
580 | static DEFINE_PER_CPU(int, lazy_mode); | 549 | static DEFINE_PER_CPU(enum paravirt_lazy_mode, lazy_mode); |
581 | 550 | ||
582 | if (!vmi_ops.set_lazy_mode) | 551 | if (!vmi_ops.set_lazy_mode) |
583 | return; | 552 | return; |
@@ -685,7 +654,7 @@ void vmi_bringup(void) | |||
685 | { | 654 | { |
686 | /* We must establish the lowmem mapping for MMU ops to work */ | 655 | /* We must establish the lowmem mapping for MMU ops to work */ |
687 | if (vmi_ops.set_linear_mapping) | 656 | if (vmi_ops.set_linear_mapping) |
688 | vmi_ops.set_linear_mapping(0, __PAGE_OFFSET, max_low_pfn, 0); | 657 | vmi_ops.set_linear_mapping(0, (void *)__PAGE_OFFSET, max_low_pfn, 0); |
689 | } | 658 | } |
690 | 659 | ||
691 | /* | 660 | /* |
@@ -740,7 +709,6 @@ do { \ | |||
740 | } \ | 709 | } \ |
741 | } while (0) | 710 | } while (0) |
742 | 711 | ||
743 | |||
744 | /* | 712 | /* |
745 | * Activate the VMI interface and switch into paravirtualized mode | 713 | * Activate the VMI interface and switch into paravirtualized mode |
746 | */ | 714 | */ |
@@ -796,12 +764,6 @@ static inline int __init activate_vmi(void) | |||
796 | para_fill(irq_disable, DisableInterrupts); | 764 | para_fill(irq_disable, DisableInterrupts); |
797 | para_fill(irq_enable, EnableInterrupts); | 765 | para_fill(irq_enable, EnableInterrupts); |
798 | 766 | ||
799 | /* irq_save_disable !!! sheer pain */ | ||
800 | patch_offset(&irq_save_disable_callout[IRQ_PATCH_INT_MASK], | ||
801 | (char *)paravirt_ops.save_fl); | ||
802 | patch_offset(&irq_save_disable_callout[IRQ_PATCH_DISABLE], | ||
803 | (char *)paravirt_ops.irq_disable); | ||
804 | |||
805 | para_fill(wbinvd, WBINVD); | 767 | para_fill(wbinvd, WBINVD); |
806 | para_fill(read_tsc, RDTSC); | 768 | para_fill(read_tsc, RDTSC); |
807 | 769 | ||
@@ -831,8 +793,8 @@ static inline int __init activate_vmi(void) | |||
831 | para_wrap(set_lazy_mode, vmi_set_lazy_mode, set_lazy_mode, SetLazyMode); | 793 | para_wrap(set_lazy_mode, vmi_set_lazy_mode, set_lazy_mode, SetLazyMode); |
832 | 794 | ||
833 | /* user and kernel flush are just handled with different flags to FlushTLB */ | 795 | /* user and kernel flush are just handled with different flags to FlushTLB */ |
834 | para_wrap(flush_tlb_user, vmi_flush_tlb_user, flush_tlb, FlushTLB); | 796 | para_wrap(flush_tlb_user, vmi_flush_tlb_user, _flush_tlb, FlushTLB); |
835 | para_wrap(flush_tlb_kernel, vmi_flush_tlb_kernel, flush_tlb, FlushTLB); | 797 | para_wrap(flush_tlb_kernel, vmi_flush_tlb_kernel, _flush_tlb, FlushTLB); |
836 | para_fill(flush_tlb_single, InvalPage); | 798 | para_fill(flush_tlb_single, InvalPage); |
837 | 799 | ||
838 | /* | 800 | /* |
@@ -878,8 +840,13 @@ static inline int __init activate_vmi(void) | |||
878 | paravirt_ops.release_pt = vmi_release_pt; | 840 | paravirt_ops.release_pt = vmi_release_pt; |
879 | paravirt_ops.release_pd = vmi_release_pd; | 841 | paravirt_ops.release_pd = vmi_release_pd; |
880 | } | 842 | } |
881 | para_wrap(map_pt_hook, vmi_map_pt_hook, set_linear_mapping, | 843 | |
882 | SetLinearMapping); | 844 | /* Set linear is needed in all cases */ |
845 | vmi_ops.set_linear_mapping = vmi_get_function(VMI_CALL_SetLinearMapping); | ||
846 | #ifdef CONFIG_HIGHPTE | ||
847 | if (vmi_ops.set_linear_mapping) | ||
848 | paravirt_ops.kmap_atomic_pte = vmi_kmap_atomic_pte; | ||
849 | #endif | ||
883 | 850 | ||
884 | /* | 851 | /* |
885 | * These MUST always be patched. Don't support indirect jumps | 852 | * These MUST always be patched. Don't support indirect jumps |
@@ -920,8 +887,8 @@ static inline int __init activate_vmi(void) | |||
920 | paravirt_ops.get_wallclock = vmi_get_wallclock; | 887 | paravirt_ops.get_wallclock = vmi_get_wallclock; |
921 | paravirt_ops.set_wallclock = vmi_set_wallclock; | 888 | paravirt_ops.set_wallclock = vmi_set_wallclock; |
922 | #ifdef CONFIG_X86_LOCAL_APIC | 889 | #ifdef CONFIG_X86_LOCAL_APIC |
923 | paravirt_ops.setup_boot_clock = vmi_timer_setup_boot_alarm; | 890 | paravirt_ops.setup_boot_clock = vmi_time_bsp_init; |
924 | paravirt_ops.setup_secondary_clock = vmi_timer_setup_secondary_alarm; | 891 | paravirt_ops.setup_secondary_clock = vmi_time_ap_init; |
925 | #endif | 892 | #endif |
926 | paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles; | 893 | paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles; |
927 | paravirt_ops.get_cpu_khz = vmi_cpu_khz; | 894 | paravirt_ops.get_cpu_khz = vmi_cpu_khz; |
@@ -933,11 +900,7 @@ static inline int __init activate_vmi(void) | |||
933 | disable_vmi_timer = 1; | 900 | disable_vmi_timer = 1; |
934 | } | 901 | } |
935 | 902 | ||
936 | /* No idle HZ mode only works if VMI timer and no idle is enabled */ | 903 | para_fill(safe_halt, Halt); |
937 | if (disable_noidle || disable_vmi_timer) | ||
938 | para_fill(safe_halt, Halt); | ||
939 | else | ||
940 | para_wrap(safe_halt, vmi_safe_halt, halt, Halt); | ||
941 | 904 | ||
942 | /* | 905 | /* |
943 | * Alternative instruction rewriting doesn't happen soon enough | 906 | * Alternative instruction rewriting doesn't happen soon enough |
@@ -945,7 +908,7 @@ static inline int __init activate_vmi(void) | |||
945 | * to do this before IRQs get reenabled. Fortunately, it is | 908 | * to do this before IRQs get reenabled. Fortunately, it is |
946 | * idempotent. | 909 | * idempotent. |
947 | */ | 910 | */ |
948 | apply_paravirt(__start_parainstructions, __stop_parainstructions); | 911 | apply_paravirt(__parainstructions, __parainstructions_end); |
949 | 912 | ||
950 | vmi_bringup(); | 913 | vmi_bringup(); |
951 | 914 | ||
diff --git a/arch/i386/kernel/vmiclock.c b/arch/i386/kernel/vmiclock.c new file mode 100644 index 000000000000..26a37f8a8762 --- /dev/null +++ b/arch/i386/kernel/vmiclock.c | |||
@@ -0,0 +1,318 @@ | |||
1 | /* | ||
2 | * VMI paravirtual timer support routines. | ||
3 | * | ||
4 | * Copyright (C) 2007, VMware, Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
14 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
15 | * details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
20 | * | ||
21 | */ | ||
22 | |||
23 | #include <linux/smp.h> | ||
24 | #include <linux/interrupt.h> | ||
25 | #include <linux/cpumask.h> | ||
26 | #include <linux/clocksource.h> | ||
27 | #include <linux/clockchips.h> | ||
28 | |||
29 | #include <asm/vmi.h> | ||
30 | #include <asm/vmi_time.h> | ||
31 | #include <asm/arch_hooks.h> | ||
32 | #include <asm/apicdef.h> | ||
33 | #include <asm/apic.h> | ||
34 | #include <asm/timer.h> | ||
35 | |||
36 | #include <irq_vectors.h> | ||
37 | #include "io_ports.h" | ||
38 | |||
39 | #define VMI_ONESHOT (VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL | vmi_get_alarm_wiring()) | ||
40 | #define VMI_PERIODIC (VMI_ALARM_IS_PERIODIC | VMI_CYCLES_REAL | vmi_get_alarm_wiring()) | ||
41 | |||
42 | static DEFINE_PER_CPU(struct clock_event_device, local_events); | ||
43 | |||
44 | static inline u32 vmi_counter(u32 flags) | ||
45 | { | ||
46 | /* Given VMI_ONESHOT or VMI_PERIODIC, return the corresponding | ||
47 | * cycle counter. */ | ||
48 | return flags & VMI_ALARM_COUNTER_MASK; | ||
49 | } | ||
50 | |||
51 | /* paravirt_ops.get_wallclock = vmi_get_wallclock */ | ||
52 | unsigned long vmi_get_wallclock(void) | ||
53 | { | ||
54 | unsigned long long wallclock; | ||
55 | wallclock = vmi_timer_ops.get_wallclock(); // nsec | ||
56 | (void)do_div(wallclock, 1000000000); // sec | ||
57 | |||
58 | return wallclock; | ||
59 | } | ||
60 | |||
61 | /* paravirt_ops.set_wallclock = vmi_set_wallclock */ | ||
62 | int vmi_set_wallclock(unsigned long now) | ||
63 | { | ||
64 | return 0; | ||
65 | } | ||
66 | |||
67 | /* paravirt_ops.get_scheduled_cycles = vmi_get_sched_cycles */ | ||
68 | unsigned long long vmi_get_sched_cycles(void) | ||
69 | { | ||
70 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE); | ||
71 | } | ||
72 | |||
73 | /* paravirt_ops.get_cpu_khz = vmi_cpu_khz */ | ||
74 | unsigned long vmi_cpu_khz(void) | ||
75 | { | ||
76 | unsigned long long khz; | ||
77 | khz = vmi_timer_ops.get_cycle_frequency(); | ||
78 | (void)do_div(khz, 1000); | ||
79 | return khz; | ||
80 | } | ||
81 | |||
82 | static inline unsigned int vmi_get_timer_vector(void) | ||
83 | { | ||
84 | #ifdef CONFIG_X86_IO_APIC | ||
85 | return FIRST_DEVICE_VECTOR; | ||
86 | #else | ||
87 | return FIRST_EXTERNAL_VECTOR; | ||
88 | #endif | ||
89 | } | ||
90 | |||
91 | /** vmi clockchip */ | ||
92 | #ifdef CONFIG_X86_LOCAL_APIC | ||
93 | static unsigned int startup_timer_irq(unsigned int irq) | ||
94 | { | ||
95 | unsigned long val = apic_read(APIC_LVTT); | ||
96 | apic_write(APIC_LVTT, vmi_get_timer_vector()); | ||
97 | |||
98 | return (val & APIC_SEND_PENDING); | ||
99 | } | ||
100 | |||
101 | static void mask_timer_irq(unsigned int irq) | ||
102 | { | ||
103 | unsigned long val = apic_read(APIC_LVTT); | ||
104 | apic_write(APIC_LVTT, val | APIC_LVT_MASKED); | ||
105 | } | ||
106 | |||
107 | static void unmask_timer_irq(unsigned int irq) | ||
108 | { | ||
109 | unsigned long val = apic_read(APIC_LVTT); | ||
110 | apic_write(APIC_LVTT, val & ~APIC_LVT_MASKED); | ||
111 | } | ||
112 | |||
113 | static void ack_timer_irq(unsigned int irq) | ||
114 | { | ||
115 | ack_APIC_irq(); | ||
116 | } | ||
117 | |||
118 | static struct irq_chip vmi_chip __read_mostly = { | ||
119 | .name = "VMI-LOCAL", | ||
120 | .startup = startup_timer_irq, | ||
121 | .mask = mask_timer_irq, | ||
122 | .unmask = unmask_timer_irq, | ||
123 | .ack = ack_timer_irq | ||
124 | }; | ||
125 | #endif | ||
126 | |||
127 | /** vmi clockevent */ | ||
128 | #define VMI_ALARM_WIRED_IRQ0 0x00000000 | ||
129 | #define VMI_ALARM_WIRED_LVTT 0x00010000 | ||
130 | static int vmi_wiring = VMI_ALARM_WIRED_IRQ0; | ||
131 | |||
132 | static inline int vmi_get_alarm_wiring(void) | ||
133 | { | ||
134 | return vmi_wiring; | ||
135 | } | ||
136 | |||
137 | static void vmi_timer_set_mode(enum clock_event_mode mode, | ||
138 | struct clock_event_device *evt) | ||
139 | { | ||
140 | cycle_t now, cycles_per_hz; | ||
141 | BUG_ON(!irqs_disabled()); | ||
142 | |||
143 | switch (mode) { | ||
144 | case CLOCK_EVT_MODE_ONESHOT: | ||
145 | break; | ||
146 | case CLOCK_EVT_MODE_PERIODIC: | ||
147 | cycles_per_hz = vmi_timer_ops.get_cycle_frequency(); | ||
148 | (void)do_div(cycles_per_hz, HZ); | ||
149 | now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_PERIODIC)); | ||
150 | vmi_timer_ops.set_alarm(VMI_PERIODIC, now, cycles_per_hz); | ||
151 | break; | ||
152 | case CLOCK_EVT_MODE_UNUSED: | ||
153 | case CLOCK_EVT_MODE_SHUTDOWN: | ||
154 | switch (evt->mode) { | ||
155 | case CLOCK_EVT_MODE_ONESHOT: | ||
156 | vmi_timer_ops.cancel_alarm(VMI_ONESHOT); | ||
157 | break; | ||
158 | case CLOCK_EVT_MODE_PERIODIC: | ||
159 | vmi_timer_ops.cancel_alarm(VMI_PERIODIC); | ||
160 | break; | ||
161 | default: | ||
162 | break; | ||
163 | } | ||
164 | break; | ||
165 | default: | ||
166 | break; | ||
167 | } | ||
168 | } | ||
169 | |||
170 | static int vmi_timer_next_event(unsigned long delta, | ||
171 | struct clock_event_device *evt) | ||
172 | { | ||
173 | /* Unfortunately, set_next_event interface only passes relative | ||
174 | * expiry, but we want absolute expiry. It'd be better if were | ||
175 | * were passed an aboslute expiry, since a bunch of time may | ||
176 | * have been stolen between the time the delta is computed and | ||
177 | * when we set the alarm below. */ | ||
178 | cycle_t now = vmi_timer_ops.get_cycle_counter(vmi_counter(VMI_ONESHOT)); | ||
179 | |||
180 | BUG_ON(evt->mode != CLOCK_EVT_MODE_ONESHOT); | ||
181 | vmi_timer_ops.set_alarm(VMI_ONESHOT, now + delta, 0); | ||
182 | return 0; | ||
183 | } | ||
184 | |||
185 | static struct clock_event_device vmi_clockevent = { | ||
186 | .name = "vmi-timer", | ||
187 | .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT, | ||
188 | .shift = 22, | ||
189 | .set_mode = vmi_timer_set_mode, | ||
190 | .set_next_event = vmi_timer_next_event, | ||
191 | .rating = 1000, | ||
192 | .irq = 0, | ||
193 | }; | ||
194 | |||
195 | static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id) | ||
196 | { | ||
197 | struct clock_event_device *evt = &__get_cpu_var(local_events); | ||
198 | evt->event_handler(evt); | ||
199 | return IRQ_HANDLED; | ||
200 | } | ||
201 | |||
202 | static struct irqaction vmi_clock_action = { | ||
203 | .name = "vmi-timer", | ||
204 | .handler = vmi_timer_interrupt, | ||
205 | .flags = IRQF_DISABLED | IRQF_NOBALANCING, | ||
206 | .mask = CPU_MASK_ALL, | ||
207 | }; | ||
208 | |||
209 | static void __devinit vmi_time_init_clockevent(void) | ||
210 | { | ||
211 | cycle_t cycles_per_msec; | ||
212 | struct clock_event_device *evt; | ||
213 | |||
214 | int cpu = smp_processor_id(); | ||
215 | evt = &__get_cpu_var(local_events); | ||
216 | |||
217 | /* Use cycles_per_msec since div_sc params are 32-bits. */ | ||
218 | cycles_per_msec = vmi_timer_ops.get_cycle_frequency(); | ||
219 | (void)do_div(cycles_per_msec, 1000); | ||
220 | |||
221 | memcpy(evt, &vmi_clockevent, sizeof(*evt)); | ||
222 | /* Must pick .shift such that .mult fits in 32-bits. Choosing | ||
223 | * .shift to be 22 allows 2^(32-22) cycles per nano-seconds | ||
224 | * before overflow. */ | ||
225 | evt->mult = div_sc(cycles_per_msec, NSEC_PER_MSEC, evt->shift); | ||
226 | /* Upper bound is clockevent's use of ulong for cycle deltas. */ | ||
227 | evt->max_delta_ns = clockevent_delta2ns(ULONG_MAX, evt); | ||
228 | evt->min_delta_ns = clockevent_delta2ns(1, evt); | ||
229 | evt->cpumask = cpumask_of_cpu(cpu); | ||
230 | |||
231 | printk(KERN_WARNING "vmi: registering clock event %s. mult=%lu shift=%u\n", | ||
232 | evt->name, evt->mult, evt->shift); | ||
233 | clockevents_register_device(evt); | ||
234 | } | ||
235 | |||
236 | void __init vmi_time_init(void) | ||
237 | { | ||
238 | /* Disable PIT: BIOSes start PIT CH0 with 18.2hz peridic. */ | ||
239 | outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */ | ||
240 | |||
241 | vmi_time_init_clockevent(); | ||
242 | setup_irq(0, &vmi_clock_action); | ||
243 | } | ||
244 | |||
245 | #ifdef CONFIG_X86_LOCAL_APIC | ||
246 | void __devinit vmi_time_bsp_init(void) | ||
247 | { | ||
248 | /* | ||
249 | * On APIC systems, we want local timers to fire on each cpu. We do | ||
250 | * this by programming LVTT to deliver timer events to the IRQ handler | ||
251 | * for IRQ-0, since we can't re-use the APIC local timer handler | ||
252 | * without interfering with that code. | ||
253 | */ | ||
254 | clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); | ||
255 | local_irq_disable(); | ||
256 | #ifdef CONFIG_X86_SMP | ||
257 | /* | ||
258 | * XXX handle_percpu_irq only defined for SMP; we need to switch over | ||
259 | * to using it, since this is a local interrupt, which each CPU must | ||
260 | * handle individually without locking out or dropping simultaneous | ||
261 | * local timers on other CPUs. We also don't want to trigger the | ||
262 | * quirk workaround code for interrupts which gets invoked from | ||
263 | * handle_percpu_irq via eoi, so we use our own IRQ chip. | ||
264 | */ | ||
265 | set_irq_chip_and_handler_name(0, &vmi_chip, handle_percpu_irq, "lvtt"); | ||
266 | #else | ||
267 | set_irq_chip_and_handler_name(0, &vmi_chip, handle_edge_irq, "lvtt"); | ||
268 | #endif | ||
269 | vmi_wiring = VMI_ALARM_WIRED_LVTT; | ||
270 | apic_write(APIC_LVTT, vmi_get_timer_vector()); | ||
271 | local_irq_enable(); | ||
272 | clockevents_notify(CLOCK_EVT_NOTIFY_RESUME, NULL); | ||
273 | } | ||
274 | |||
275 | void __devinit vmi_time_ap_init(void) | ||
276 | { | ||
277 | vmi_time_init_clockevent(); | ||
278 | apic_write(APIC_LVTT, vmi_get_timer_vector()); | ||
279 | } | ||
280 | #endif | ||
281 | |||
282 | /** vmi clocksource */ | ||
283 | |||
284 | static cycle_t read_real_cycles(void) | ||
285 | { | ||
286 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); | ||
287 | } | ||
288 | |||
289 | static struct clocksource clocksource_vmi = { | ||
290 | .name = "vmi-timer", | ||
291 | .rating = 450, | ||
292 | .read = read_real_cycles, | ||
293 | .mask = CLOCKSOURCE_MASK(64), | ||
294 | .mult = 0, /* to be set */ | ||
295 | .shift = 22, | ||
296 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | ||
297 | }; | ||
298 | |||
299 | static int __init init_vmi_clocksource(void) | ||
300 | { | ||
301 | cycle_t cycles_per_msec; | ||
302 | |||
303 | if (!vmi_timer_ops.get_cycle_frequency) | ||
304 | return 0; | ||
305 | /* Use khz2mult rather than hz2mult since hz arg is only 32-bits. */ | ||
306 | cycles_per_msec = vmi_timer_ops.get_cycle_frequency(); | ||
307 | (void)do_div(cycles_per_msec, 1000); | ||
308 | |||
309 | /* Note that clocksource.{mult, shift} converts in the opposite direction | ||
310 | * as clockevents. */ | ||
311 | clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec, | ||
312 | clocksource_vmi.shift); | ||
313 | |||
314 | printk(KERN_WARNING "vmi: registering clock source khz=%lld\n", cycles_per_msec); | ||
315 | return clocksource_register(&clocksource_vmi); | ||
316 | |||
317 | } | ||
318 | module_init(init_vmi_clocksource); | ||
diff --git a/arch/i386/kernel/vmitime.c b/arch/i386/kernel/vmitime.c deleted file mode 100644 index 9dfb17739b67..000000000000 --- a/arch/i386/kernel/vmitime.c +++ /dev/null | |||
@@ -1,482 +0,0 @@ | |||
1 | /* | ||
2 | * VMI paravirtual timer support routines. | ||
3 | * | ||
4 | * Copyright (C) 2005, VMware, Inc. | ||
5 | * | ||
6 | * This program is free software; you can redistribute it and/or modify | ||
7 | * it under the terms of the GNU General Public License as published by | ||
8 | * the Free Software Foundation; either version 2 of the License, or | ||
9 | * (at your option) any later version. | ||
10 | * | ||
11 | * This program is distributed in the hope that it will be useful, but | ||
12 | * WITHOUT ANY WARRANTY; without even the implied warranty of | ||
13 | * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or | ||
14 | * NON INFRINGEMENT. See the GNU General Public License for more | ||
15 | * details. | ||
16 | * | ||
17 | * You should have received a copy of the GNU General Public License | ||
18 | * along with this program; if not, write to the Free Software | ||
19 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. | ||
20 | * | ||
21 | * Send feedback to dhecht@vmware.com | ||
22 | * | ||
23 | */ | ||
24 | |||
25 | /* | ||
26 | * Portions of this code from arch/i386/kernel/timers/timer_tsc.c. | ||
27 | * Portions of the CONFIG_NO_IDLE_HZ code from arch/s390/kernel/time.c. | ||
28 | * See comments there for proper credits. | ||
29 | */ | ||
30 | |||
31 | #include <linux/spinlock.h> | ||
32 | #include <linux/init.h> | ||
33 | #include <linux/errno.h> | ||
34 | #include <linux/jiffies.h> | ||
35 | #include <linux/interrupt.h> | ||
36 | #include <linux/kernel_stat.h> | ||
37 | #include <linux/rcupdate.h> | ||
38 | #include <linux/clocksource.h> | ||
39 | |||
40 | #include <asm/timer.h> | ||
41 | #include <asm/io.h> | ||
42 | #include <asm/apic.h> | ||
43 | #include <asm/div64.h> | ||
44 | #include <asm/timer.h> | ||
45 | #include <asm/desc.h> | ||
46 | |||
47 | #include <asm/vmi.h> | ||
48 | #include <asm/vmi_time.h> | ||
49 | |||
50 | #include <mach_timer.h> | ||
51 | #include <io_ports.h> | ||
52 | |||
53 | #ifdef CONFIG_X86_LOCAL_APIC | ||
54 | #define VMI_ALARM_WIRING VMI_ALARM_WIRED_LVTT | ||
55 | #else | ||
56 | #define VMI_ALARM_WIRING VMI_ALARM_WIRED_IRQ0 | ||
57 | #endif | ||
58 | |||
59 | /* Cached VMI operations */ | ||
60 | struct vmi_timer_ops vmi_timer_ops; | ||
61 | |||
62 | #ifdef CONFIG_NO_IDLE_HZ | ||
63 | |||
64 | /* /proc/sys/kernel/hz_timer state. */ | ||
65 | int sysctl_hz_timer; | ||
66 | |||
67 | /* Some stats */ | ||
68 | static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_irqs); | ||
69 | static DEFINE_PER_CPU(unsigned long, vmi_idle_no_hz_jiffies); | ||
70 | static DEFINE_PER_CPU(unsigned long, idle_start_jiffies); | ||
71 | |||
72 | #endif /* CONFIG_NO_IDLE_HZ */ | ||
73 | |||
74 | /* Number of alarms per second. By default this is CONFIG_VMI_ALARM_HZ. */ | ||
75 | static int alarm_hz = CONFIG_VMI_ALARM_HZ; | ||
76 | |||
77 | /* Cache of the value get_cycle_frequency / HZ. */ | ||
78 | static signed long long cycles_per_jiffy; | ||
79 | |||
80 | /* Cache of the value get_cycle_frequency / alarm_hz. */ | ||
81 | static signed long long cycles_per_alarm; | ||
82 | |||
83 | /* The number of cycles accounted for by the 'jiffies'/'xtime' count. | ||
84 | * Protected by xtime_lock. */ | ||
85 | static unsigned long long real_cycles_accounted_system; | ||
86 | |||
87 | /* The number of cycles accounted for by update_process_times(), per cpu. */ | ||
88 | static DEFINE_PER_CPU(unsigned long long, process_times_cycles_accounted_cpu); | ||
89 | |||
90 | /* The number of stolen cycles accounted, per cpu. */ | ||
91 | static DEFINE_PER_CPU(unsigned long long, stolen_cycles_accounted_cpu); | ||
92 | |||
93 | /* Clock source. */ | ||
94 | static cycle_t read_real_cycles(void) | ||
95 | { | ||
96 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); | ||
97 | } | ||
98 | |||
99 | static cycle_t read_available_cycles(void) | ||
100 | { | ||
101 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE); | ||
102 | } | ||
103 | |||
104 | #if 0 | ||
105 | static cycle_t read_stolen_cycles(void) | ||
106 | { | ||
107 | return vmi_timer_ops.get_cycle_counter(VMI_CYCLES_STOLEN); | ||
108 | } | ||
109 | #endif /* 0 */ | ||
110 | |||
111 | static struct clocksource clocksource_vmi = { | ||
112 | .name = "vmi-timer", | ||
113 | .rating = 450, | ||
114 | .read = read_real_cycles, | ||
115 | .mask = CLOCKSOURCE_MASK(64), | ||
116 | .mult = 0, /* to be set */ | ||
117 | .shift = 22, | ||
118 | .flags = CLOCK_SOURCE_IS_CONTINUOUS, | ||
119 | }; | ||
120 | |||
121 | |||
122 | /* Timer interrupt handler. */ | ||
123 | static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id); | ||
124 | |||
125 | static struct irqaction vmi_timer_irq = { | ||
126 | .handler = vmi_timer_interrupt, | ||
127 | .flags = IRQF_DISABLED, | ||
128 | .mask = CPU_MASK_NONE, | ||
129 | .name = "VMI-alarm", | ||
130 | }; | ||
131 | |||
132 | /* Alarm rate */ | ||
133 | static int __init vmi_timer_alarm_rate_setup(char* str) | ||
134 | { | ||
135 | int alarm_rate; | ||
136 | if (get_option(&str, &alarm_rate) == 1 && alarm_rate > 0) { | ||
137 | alarm_hz = alarm_rate; | ||
138 | printk(KERN_WARNING "VMI timer alarm HZ set to %d\n", alarm_hz); | ||
139 | } | ||
140 | return 1; | ||
141 | } | ||
142 | __setup("vmi_timer_alarm_hz=", vmi_timer_alarm_rate_setup); | ||
143 | |||
144 | |||
145 | /* Initialization */ | ||
146 | static void vmi_get_wallclock_ts(struct timespec *ts) | ||
147 | { | ||
148 | unsigned long long wallclock; | ||
149 | wallclock = vmi_timer_ops.get_wallclock(); // nsec units | ||
150 | ts->tv_nsec = do_div(wallclock, 1000000000); | ||
151 | ts->tv_sec = wallclock; | ||
152 | } | ||
153 | |||
154 | unsigned long vmi_get_wallclock(void) | ||
155 | { | ||
156 | struct timespec ts; | ||
157 | vmi_get_wallclock_ts(&ts); | ||
158 | return ts.tv_sec; | ||
159 | } | ||
160 | |||
161 | int vmi_set_wallclock(unsigned long now) | ||
162 | { | ||
163 | return -1; | ||
164 | } | ||
165 | |||
166 | unsigned long long vmi_get_sched_cycles(void) | ||
167 | { | ||
168 | return read_available_cycles(); | ||
169 | } | ||
170 | |||
171 | unsigned long vmi_cpu_khz(void) | ||
172 | { | ||
173 | unsigned long long khz; | ||
174 | |||
175 | khz = vmi_timer_ops.get_cycle_frequency(); | ||
176 | (void)do_div(khz, 1000); | ||
177 | return khz; | ||
178 | } | ||
179 | |||
180 | void __init vmi_time_init(void) | ||
181 | { | ||
182 | unsigned long long cycles_per_sec, cycles_per_msec; | ||
183 | unsigned long flags; | ||
184 | |||
185 | local_irq_save(flags); | ||
186 | setup_irq(0, &vmi_timer_irq); | ||
187 | #ifdef CONFIG_X86_LOCAL_APIC | ||
188 | set_intr_gate(LOCAL_TIMER_VECTOR, apic_vmi_timer_interrupt); | ||
189 | #endif | ||
190 | |||
191 | real_cycles_accounted_system = read_real_cycles(); | ||
192 | per_cpu(process_times_cycles_accounted_cpu, 0) = read_available_cycles(); | ||
193 | |||
194 | cycles_per_sec = vmi_timer_ops.get_cycle_frequency(); | ||
195 | cycles_per_jiffy = cycles_per_sec; | ||
196 | (void)do_div(cycles_per_jiffy, HZ); | ||
197 | cycles_per_alarm = cycles_per_sec; | ||
198 | (void)do_div(cycles_per_alarm, alarm_hz); | ||
199 | cycles_per_msec = cycles_per_sec; | ||
200 | (void)do_div(cycles_per_msec, 1000); | ||
201 | |||
202 | printk(KERN_WARNING "VMI timer cycles/sec = %llu ; cycles/jiffy = %llu ;" | ||
203 | "cycles/alarm = %llu\n", cycles_per_sec, cycles_per_jiffy, | ||
204 | cycles_per_alarm); | ||
205 | |||
206 | clocksource_vmi.mult = clocksource_khz2mult(cycles_per_msec, | ||
207 | clocksource_vmi.shift); | ||
208 | if (clocksource_register(&clocksource_vmi)) | ||
209 | printk(KERN_WARNING "Error registering VMITIME clocksource."); | ||
210 | |||
211 | /* Disable PIT. */ | ||
212 | outb_p(0x3a, PIT_MODE); /* binary, mode 5, LSB/MSB, ch 0 */ | ||
213 | |||
214 | /* schedule the alarm. do this in phase with process_times_cycles_accounted_cpu | ||
215 | * reduce the latency calling update_process_times. */ | ||
216 | vmi_timer_ops.set_alarm( | ||
217 | VMI_ALARM_WIRED_IRQ0 | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | ||
218 | per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm, | ||
219 | cycles_per_alarm); | ||
220 | |||
221 | local_irq_restore(flags); | ||
222 | } | ||
223 | |||
224 | #ifdef CONFIG_X86_LOCAL_APIC | ||
225 | |||
226 | void __init vmi_timer_setup_boot_alarm(void) | ||
227 | { | ||
228 | local_irq_disable(); | ||
229 | |||
230 | /* Route the interrupt to the correct vector. */ | ||
231 | apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR); | ||
232 | |||
233 | /* Cancel the IRQ0 wired alarm, and setup the LVTT alarm. */ | ||
234 | vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE); | ||
235 | vmi_timer_ops.set_alarm( | ||
236 | VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | ||
237 | per_cpu(process_times_cycles_accounted_cpu, 0) + cycles_per_alarm, | ||
238 | cycles_per_alarm); | ||
239 | local_irq_enable(); | ||
240 | } | ||
241 | |||
242 | /* Initialize the time accounting variables for an AP on an SMP system. | ||
243 | * Also, set the local alarm for the AP. */ | ||
244 | void __devinit vmi_timer_setup_secondary_alarm(void) | ||
245 | { | ||
246 | int cpu = smp_processor_id(); | ||
247 | |||
248 | /* Route the interrupt to the correct vector. */ | ||
249 | apic_write_around(APIC_LVTT, LOCAL_TIMER_VECTOR); | ||
250 | |||
251 | per_cpu(process_times_cycles_accounted_cpu, cpu) = read_available_cycles(); | ||
252 | |||
253 | vmi_timer_ops.set_alarm( | ||
254 | VMI_ALARM_WIRED_LVTT | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | ||
255 | per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm, | ||
256 | cycles_per_alarm); | ||
257 | } | ||
258 | |||
259 | #endif | ||
260 | |||
261 | /* Update system wide (real) time accounting (e.g. jiffies, xtime). */ | ||
262 | static void vmi_account_real_cycles(unsigned long long cur_real_cycles) | ||
263 | { | ||
264 | long long cycles_not_accounted; | ||
265 | |||
266 | write_seqlock(&xtime_lock); | ||
267 | |||
268 | cycles_not_accounted = cur_real_cycles - real_cycles_accounted_system; | ||
269 | while (cycles_not_accounted >= cycles_per_jiffy) { | ||
270 | /* systems wide jiffies. */ | ||
271 | do_timer(1); | ||
272 | |||
273 | cycles_not_accounted -= cycles_per_jiffy; | ||
274 | real_cycles_accounted_system += cycles_per_jiffy; | ||
275 | } | ||
276 | |||
277 | write_sequnlock(&xtime_lock); | ||
278 | } | ||
279 | |||
280 | /* Update per-cpu process times. */ | ||
281 | static void vmi_account_process_times_cycles(struct pt_regs *regs, int cpu, | ||
282 | unsigned long long cur_process_times_cycles) | ||
283 | { | ||
284 | long long cycles_not_accounted; | ||
285 | cycles_not_accounted = cur_process_times_cycles - | ||
286 | per_cpu(process_times_cycles_accounted_cpu, cpu); | ||
287 | |||
288 | while (cycles_not_accounted >= cycles_per_jiffy) { | ||
289 | /* Account time to the current process. This includes | ||
290 | * calling into the scheduler to decrement the timeslice | ||
291 | * and possibly reschedule.*/ | ||
292 | update_process_times(user_mode(regs)); | ||
293 | /* XXX handle /proc/profile multiplier. */ | ||
294 | profile_tick(CPU_PROFILING); | ||
295 | |||
296 | cycles_not_accounted -= cycles_per_jiffy; | ||
297 | per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy; | ||
298 | } | ||
299 | } | ||
300 | |||
301 | #ifdef CONFIG_NO_IDLE_HZ | ||
302 | /* Update per-cpu idle times. Used when a no-hz halt is ended. */ | ||
303 | static void vmi_account_no_hz_idle_cycles(int cpu, | ||
304 | unsigned long long cur_process_times_cycles) | ||
305 | { | ||
306 | long long cycles_not_accounted; | ||
307 | unsigned long no_idle_hz_jiffies = 0; | ||
308 | |||
309 | cycles_not_accounted = cur_process_times_cycles - | ||
310 | per_cpu(process_times_cycles_accounted_cpu, cpu); | ||
311 | |||
312 | while (cycles_not_accounted >= cycles_per_jiffy) { | ||
313 | no_idle_hz_jiffies++; | ||
314 | cycles_not_accounted -= cycles_per_jiffy; | ||
315 | per_cpu(process_times_cycles_accounted_cpu, cpu) += cycles_per_jiffy; | ||
316 | } | ||
317 | /* Account time to the idle process. */ | ||
318 | account_steal_time(idle_task(cpu), jiffies_to_cputime(no_idle_hz_jiffies)); | ||
319 | } | ||
320 | #endif | ||
321 | |||
322 | /* Update per-cpu stolen time. */ | ||
323 | static void vmi_account_stolen_cycles(int cpu, | ||
324 | unsigned long long cur_real_cycles, | ||
325 | unsigned long long cur_avail_cycles) | ||
326 | { | ||
327 | long long stolen_cycles_not_accounted; | ||
328 | unsigned long stolen_jiffies = 0; | ||
329 | |||
330 | if (cur_real_cycles < cur_avail_cycles) | ||
331 | return; | ||
332 | |||
333 | stolen_cycles_not_accounted = cur_real_cycles - cur_avail_cycles - | ||
334 | per_cpu(stolen_cycles_accounted_cpu, cpu); | ||
335 | |||
336 | while (stolen_cycles_not_accounted >= cycles_per_jiffy) { | ||
337 | stolen_jiffies++; | ||
338 | stolen_cycles_not_accounted -= cycles_per_jiffy; | ||
339 | per_cpu(stolen_cycles_accounted_cpu, cpu) += cycles_per_jiffy; | ||
340 | } | ||
341 | /* HACK: pass NULL to force time onto cpustat->steal. */ | ||
342 | account_steal_time(NULL, jiffies_to_cputime(stolen_jiffies)); | ||
343 | } | ||
344 | |||
345 | /* Body of either IRQ0 interrupt handler (UP no local-APIC) or | ||
346 | * local-APIC LVTT interrupt handler (UP & local-APIC or SMP). */ | ||
347 | static void vmi_local_timer_interrupt(int cpu) | ||
348 | { | ||
349 | unsigned long long cur_real_cycles, cur_process_times_cycles; | ||
350 | |||
351 | cur_real_cycles = read_real_cycles(); | ||
352 | cur_process_times_cycles = read_available_cycles(); | ||
353 | /* Update system wide (real) time state (xtime, jiffies). */ | ||
354 | vmi_account_real_cycles(cur_real_cycles); | ||
355 | /* Update per-cpu process times. */ | ||
356 | vmi_account_process_times_cycles(get_irq_regs(), cpu, cur_process_times_cycles); | ||
357 | /* Update time stolen from this cpu by the hypervisor. */ | ||
358 | vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles); | ||
359 | } | ||
360 | |||
361 | #ifdef CONFIG_NO_IDLE_HZ | ||
362 | |||
363 | /* Must be called only from idle loop, with interrupts disabled. */ | ||
364 | int vmi_stop_hz_timer(void) | ||
365 | { | ||
366 | /* Note that cpu_set, cpu_clear are (SMP safe) atomic on x86. */ | ||
367 | |||
368 | unsigned long seq, next; | ||
369 | unsigned long long real_cycles_expiry; | ||
370 | int cpu = smp_processor_id(); | ||
371 | |||
372 | BUG_ON(!irqs_disabled()); | ||
373 | if (sysctl_hz_timer != 0) | ||
374 | return 0; | ||
375 | |||
376 | cpu_set(cpu, nohz_cpu_mask); | ||
377 | smp_mb(); | ||
378 | |||
379 | if (rcu_needs_cpu(cpu) || local_softirq_pending() || | ||
380 | (next = next_timer_interrupt(), | ||
381 | time_before_eq(next, jiffies + HZ/CONFIG_VMI_ALARM_HZ))) { | ||
382 | cpu_clear(cpu, nohz_cpu_mask); | ||
383 | return 0; | ||
384 | } | ||
385 | |||
386 | /* Convert jiffies to the real cycle counter. */ | ||
387 | do { | ||
388 | seq = read_seqbegin(&xtime_lock); | ||
389 | real_cycles_expiry = real_cycles_accounted_system + | ||
390 | (long)(next - jiffies) * cycles_per_jiffy; | ||
391 | } while (read_seqretry(&xtime_lock, seq)); | ||
392 | |||
393 | /* This cpu is going idle. Disable the periodic alarm. */ | ||
394 | vmi_timer_ops.cancel_alarm(VMI_CYCLES_AVAILABLE); | ||
395 | per_cpu(idle_start_jiffies, cpu) = jiffies; | ||
396 | /* Set the real time alarm to expire at the next event. */ | ||
397 | vmi_timer_ops.set_alarm( | ||
398 | VMI_ALARM_WIRING | VMI_ALARM_IS_ONESHOT | VMI_CYCLES_REAL, | ||
399 | real_cycles_expiry, 0); | ||
400 | return 1; | ||
401 | } | ||
402 | |||
403 | static void vmi_reenable_hz_timer(int cpu) | ||
404 | { | ||
405 | /* For /proc/vmi/info idle_hz stat. */ | ||
406 | per_cpu(vmi_idle_no_hz_jiffies, cpu) += jiffies - per_cpu(idle_start_jiffies, cpu); | ||
407 | per_cpu(vmi_idle_no_hz_irqs, cpu)++; | ||
408 | |||
409 | /* Don't bother explicitly cancelling the one-shot alarm -- at | ||
410 | * worse we will receive a spurious timer interrupt. */ | ||
411 | vmi_timer_ops.set_alarm( | ||
412 | VMI_ALARM_WIRING | VMI_ALARM_IS_PERIODIC | VMI_CYCLES_AVAILABLE, | ||
413 | per_cpu(process_times_cycles_accounted_cpu, cpu) + cycles_per_alarm, | ||
414 | cycles_per_alarm); | ||
415 | /* Indicate this cpu is no longer nohz idle. */ | ||
416 | cpu_clear(cpu, nohz_cpu_mask); | ||
417 | } | ||
418 | |||
419 | /* Called from interrupt handlers when (local) HZ timer is disabled. */ | ||
420 | void vmi_account_time_restart_hz_timer(void) | ||
421 | { | ||
422 | unsigned long long cur_real_cycles, cur_process_times_cycles; | ||
423 | int cpu = smp_processor_id(); | ||
424 | |||
425 | BUG_ON(!irqs_disabled()); | ||
426 | /* Account the time during which the HZ timer was disabled. */ | ||
427 | cur_real_cycles = read_real_cycles(); | ||
428 | cur_process_times_cycles = read_available_cycles(); | ||
429 | /* Update system wide (real) time state (xtime, jiffies). */ | ||
430 | vmi_account_real_cycles(cur_real_cycles); | ||
431 | /* Update per-cpu idle times. */ | ||
432 | vmi_account_no_hz_idle_cycles(cpu, cur_process_times_cycles); | ||
433 | /* Update time stolen from this cpu by the hypervisor. */ | ||
434 | vmi_account_stolen_cycles(cpu, cur_real_cycles, cur_process_times_cycles); | ||
435 | /* Reenable the hz timer. */ | ||
436 | vmi_reenable_hz_timer(cpu); | ||
437 | } | ||
438 | |||
439 | #endif /* CONFIG_NO_IDLE_HZ */ | ||
440 | |||
441 | /* UP (and no local-APIC) VMI-timer alarm interrupt handler. | ||
442 | * Handler for IRQ0. Not used when SMP or X86_LOCAL_APIC after | ||
443 | * APIC setup and setup_boot_vmi_alarm() is called. */ | ||
444 | static irqreturn_t vmi_timer_interrupt(int irq, void *dev_id) | ||
445 | { | ||
446 | vmi_local_timer_interrupt(smp_processor_id()); | ||
447 | return IRQ_HANDLED; | ||
448 | } | ||
449 | |||
450 | #ifdef CONFIG_X86_LOCAL_APIC | ||
451 | |||
452 | /* SMP VMI-timer alarm interrupt handler. Handler for LVTT vector. | ||
453 | * Also used in UP when CONFIG_X86_LOCAL_APIC. | ||
454 | * The wrapper code is from arch/i386/kernel/apic.c#smp_apic_timer_interrupt. */ | ||
455 | void smp_apic_vmi_timer_interrupt(struct pt_regs *regs) | ||
456 | { | ||
457 | struct pt_regs *old_regs = set_irq_regs(regs); | ||
458 | int cpu = smp_processor_id(); | ||
459 | |||
460 | /* | ||
461 | * the NMI deadlock-detector uses this. | ||
462 | */ | ||
463 | per_cpu(irq_stat,cpu).apic_timer_irqs++; | ||
464 | |||
465 | /* | ||
466 | * NOTE! We'd better ACK the irq immediately, | ||
467 | * because timer handling can be slow. | ||
468 | */ | ||
469 | ack_APIC_irq(); | ||
470 | |||
471 | /* | ||
472 | * update_process_times() expects us to have done irq_enter(). | ||
473 | * Besides, if we don't timer interrupts ignore the global | ||
474 | * interrupt lock, which is the WrongThing (tm) to do. | ||
475 | */ | ||
476 | irq_enter(); | ||
477 | vmi_local_timer_interrupt(cpu); | ||
478 | irq_exit(); | ||
479 | set_irq_regs(old_regs); | ||
480 | } | ||
481 | |||
482 | #endif /* CONFIG_X86_LOCAL_APIC */ | ||
diff --git a/arch/i386/kernel/vmlinux.lds.S b/arch/i386/kernel/vmlinux.lds.S index 6f38f818380b..23e8614edeee 100644 --- a/arch/i386/kernel/vmlinux.lds.S +++ b/arch/i386/kernel/vmlinux.lds.S | |||
@@ -26,12 +26,11 @@ OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") | |||
26 | OUTPUT_ARCH(i386) | 26 | OUTPUT_ARCH(i386) |
27 | ENTRY(phys_startup_32) | 27 | ENTRY(phys_startup_32) |
28 | jiffies = jiffies_64; | 28 | jiffies = jiffies_64; |
29 | _proxy_pda = 1; | ||
30 | 29 | ||
31 | PHDRS { | 30 | PHDRS { |
32 | text PT_LOAD FLAGS(5); /* R_E */ | 31 | text PT_LOAD FLAGS(5); /* R_E */ |
33 | data PT_LOAD FLAGS(7); /* RWE */ | 32 | data PT_LOAD FLAGS(7); /* RWE */ |
34 | note PT_NOTE FLAGS(4); /* R__ */ | 33 | note PT_NOTE FLAGS(0); /* ___ */ |
35 | } | 34 | } |
36 | SECTIONS | 35 | SECTIONS |
37 | { | 36 | { |
@@ -61,8 +60,6 @@ SECTIONS | |||
61 | __stop___ex_table = .; | 60 | __stop___ex_table = .; |
62 | } | 61 | } |
63 | 62 | ||
64 | RODATA | ||
65 | |||
66 | BUG_TABLE | 63 | BUG_TABLE |
67 | 64 | ||
68 | . = ALIGN(4); | 65 | . = ALIGN(4); |
@@ -72,6 +69,8 @@ SECTIONS | |||
72 | __tracedata_end = .; | 69 | __tracedata_end = .; |
73 | } | 70 | } |
74 | 71 | ||
72 | RODATA | ||
73 | |||
75 | /* writeable */ | 74 | /* writeable */ |
76 | . = ALIGN(4096); | 75 | . = ALIGN(4096); |
77 | .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ | 76 | .data : AT(ADDR(.data) - LOAD_OFFSET) { /* Data */ |
@@ -117,22 +116,11 @@ SECTIONS | |||
117 | 116 | ||
118 | /* might get freed after init */ | 117 | /* might get freed after init */ |
119 | . = ALIGN(4096); | 118 | . = ALIGN(4096); |
120 | .smp_altinstructions : AT(ADDR(.smp_altinstructions) - LOAD_OFFSET) { | ||
121 | __smp_alt_begin = .; | ||
122 | __smp_alt_instructions = .; | ||
123 | *(.smp_altinstructions) | ||
124 | __smp_alt_instructions_end = .; | ||
125 | } | ||
126 | . = ALIGN(4); | ||
127 | .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { | 119 | .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) { |
128 | __smp_locks = .; | 120 | __smp_locks = .; |
129 | *(.smp_locks) | 121 | *(.smp_locks) |
130 | __smp_locks_end = .; | 122 | __smp_locks_end = .; |
131 | } | 123 | } |
132 | .smp_altinstr_replacement : AT(ADDR(.smp_altinstr_replacement) - LOAD_OFFSET) { | ||
133 | *(.smp_altinstr_replacement) | ||
134 | __smp_alt_end = .; | ||
135 | } | ||
136 | /* will be freed after init | 124 | /* will be freed after init |
137 | * Following ALIGN() is required to make sure no other data falls on the | 125 | * Following ALIGN() is required to make sure no other data falls on the |
138 | * same page where __smp_alt_end is pointing as that page might be freed | 126 | * same page where __smp_alt_end is pointing as that page might be freed |
@@ -178,9 +166,9 @@ SECTIONS | |||
178 | } | 166 | } |
179 | . = ALIGN(4); | 167 | . = ALIGN(4); |
180 | .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { | 168 | .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { |
181 | __start_parainstructions = .; | 169 | __parainstructions = .; |
182 | *(.parainstructions) | 170 | *(.parainstructions) |
183 | __stop_parainstructions = .; | 171 | __parainstructions_end = .; |
184 | } | 172 | } |
185 | /* .exit.text is discard at runtime, not link time, to deal with references | 173 | /* .exit.text is discard at runtime, not link time, to deal with references |
186 | from .altinstructions and .eh_frame */ | 174 | from .altinstructions and .eh_frame */ |
@@ -194,7 +182,7 @@ SECTIONS | |||
194 | __initramfs_end = .; | 182 | __initramfs_end = .; |
195 | } | 183 | } |
196 | #endif | 184 | #endif |
197 | . = ALIGN(L1_CACHE_BYTES); | 185 | . = ALIGN(4096); |
198 | .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { | 186 | .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { |
199 | __per_cpu_start = .; | 187 | __per_cpu_start = .; |
200 | *(.data.percpu) | 188 | *(.data.percpu) |
diff --git a/arch/i386/kernel/vsyscall.lds.S b/arch/i386/kernel/vsyscall.lds.S index f66cd11adb72..4a8b0ed9b8fb 100644 --- a/arch/i386/kernel/vsyscall.lds.S +++ b/arch/i386/kernel/vsyscall.lds.S | |||
@@ -7,7 +7,7 @@ | |||
7 | 7 | ||
8 | SECTIONS | 8 | SECTIONS |
9 | { | 9 | { |
10 | . = VDSO_PRELINK + SIZEOF_HEADERS; | 10 | . = VDSO_PRELINK_asm + SIZEOF_HEADERS; |
11 | 11 | ||
12 | .hash : { *(.hash) } :text | 12 | .hash : { *(.hash) } :text |
13 | .gnu.hash : { *(.gnu.hash) } | 13 | .gnu.hash : { *(.gnu.hash) } |
@@ -21,7 +21,7 @@ SECTIONS | |||
21 | For the layouts to match, we need to skip more than enough | 21 | For the layouts to match, we need to skip more than enough |
22 | space for the dynamic symbol table et al. If this amount | 22 | space for the dynamic symbol table et al. If this amount |
23 | is insufficient, ld -shared will barf. Just increase it here. */ | 23 | is insufficient, ld -shared will barf. Just increase it here. */ |
24 | . = VDSO_PRELINK + 0x400; | 24 | . = VDSO_PRELINK_asm + 0x400; |
25 | 25 | ||
26 | .text : { *(.text) } :text =0x90909090 | 26 | .text : { *(.text) } :text =0x90909090 |
27 | .note : { *(.note.*) } :text :note | 27 | .note : { *(.note.*) } :text :note |