diff options
Diffstat (limited to 'arch/i386/kernel')
40 files changed, 1865 insertions, 1246 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 5427a842e841..1a884b6e6e5c 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile | |||
@@ -4,7 +4,7 @@ | |||
4 | 4 | ||
5 | extra-y := head.o init_task.o vmlinux.lds | 5 | extra-y := head.o init_task.o vmlinux.lds |
6 | 6 | ||
7 | obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ | 7 | obj-y := process.o signal.o entry.o traps.o irq.o \ |
8 | ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ | 8 | ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ |
9 | pci-dma.o i386_ksyms.o i387.o bootflag.o \ | 9 | pci-dma.o i386_ksyms.o i387.o bootflag.o \ |
10 | quirks.o i8237.o topology.o alternative.o i8253.o tsc.o | 10 | quirks.o i8237.o topology.o alternative.o i8253.o tsc.o |
@@ -81,4 +81,5 @@ $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \ | |||
81 | $(call if_changed,syscall) | 81 | $(call if_changed,syscall) |
82 | 82 | ||
83 | k8-y += ../../x86_64/kernel/k8.o | 83 | k8-y += ../../x86_64/kernel/k8.o |
84 | stacktrace-y += ../../x86_64/kernel/stacktrace.o | ||
84 | 85 | ||
diff --git a/arch/i386/kernel/acpi/Makefile b/arch/i386/kernel/acpi/Makefile index 7e9ac99354f4..7f7be01f44e6 100644 --- a/arch/i386/kernel/acpi/Makefile +++ b/arch/i386/kernel/acpi/Makefile | |||
@@ -1,5 +1,7 @@ | |||
1 | obj-$(CONFIG_ACPI) += boot.o | 1 | obj-$(CONFIG_ACPI) += boot.o |
2 | ifneq ($(CONFIG_PCI),) | ||
2 | obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o | 3 | obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o |
4 | endif | ||
3 | obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o | 5 | obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o |
4 | 6 | ||
5 | ifneq ($(CONFIG_ACPI_PROCESSOR),) | 7 | ifneq ($(CONFIG_ACPI_PROCESSOR),) |
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c index ee003bc0e8b1..1aaea6ab8c46 100644 --- a/arch/i386/kernel/acpi/boot.c +++ b/arch/i386/kernel/acpi/boot.c | |||
@@ -26,9 +26,12 @@ | |||
26 | #include <linux/init.h> | 26 | #include <linux/init.h> |
27 | #include <linux/acpi.h> | 27 | #include <linux/acpi.h> |
28 | #include <linux/efi.h> | 28 | #include <linux/efi.h> |
29 | #include <linux/cpumask.h> | ||
29 | #include <linux/module.h> | 30 | #include <linux/module.h> |
30 | #include <linux/dmi.h> | 31 | #include <linux/dmi.h> |
31 | #include <linux/irq.h> | 32 | #include <linux/irq.h> |
33 | #include <linux/bootmem.h> | ||
34 | #include <linux/ioport.h> | ||
32 | 35 | ||
33 | #include <asm/pgtable.h> | 36 | #include <asm/pgtable.h> |
34 | #include <asm/io_apic.h> | 37 | #include <asm/io_apic.h> |
@@ -36,11 +39,17 @@ | |||
36 | #include <asm/io.h> | 39 | #include <asm/io.h> |
37 | #include <asm/mpspec.h> | 40 | #include <asm/mpspec.h> |
38 | 41 | ||
39 | #ifdef CONFIG_X86_64 | 42 | static int __initdata acpi_force = 0; |
40 | 43 | ||
41 | extern void __init clustered_apic_check(void); | 44 | #ifdef CONFIG_ACPI |
45 | int acpi_disabled = 0; | ||
46 | #else | ||
47 | int acpi_disabled = 1; | ||
48 | #endif | ||
49 | EXPORT_SYMBOL(acpi_disabled); | ||
50 | |||
51 | #ifdef CONFIG_X86_64 | ||
42 | 52 | ||
43 | extern int gsi_irq_sharing(int gsi); | ||
44 | #include <asm/proto.h> | 53 | #include <asm/proto.h> |
45 | 54 | ||
46 | static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; } | 55 | static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; } |
@@ -506,16 +515,76 @@ EXPORT_SYMBOL(acpi_register_gsi); | |||
506 | #ifdef CONFIG_ACPI_HOTPLUG_CPU | 515 | #ifdef CONFIG_ACPI_HOTPLUG_CPU |
507 | int acpi_map_lsapic(acpi_handle handle, int *pcpu) | 516 | int acpi_map_lsapic(acpi_handle handle, int *pcpu) |
508 | { | 517 | { |
509 | /* TBD */ | 518 | struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL }; |
510 | return -EINVAL; | 519 | union acpi_object *obj; |
520 | struct acpi_table_lapic *lapic; | ||
521 | cpumask_t tmp_map, new_map; | ||
522 | u8 physid; | ||
523 | int cpu; | ||
524 | |||
525 | if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer))) | ||
526 | return -EINVAL; | ||
527 | |||
528 | if (!buffer.length || !buffer.pointer) | ||
529 | return -EINVAL; | ||
530 | |||
531 | obj = buffer.pointer; | ||
532 | if (obj->type != ACPI_TYPE_BUFFER || | ||
533 | obj->buffer.length < sizeof(*lapic)) { | ||
534 | kfree(buffer.pointer); | ||
535 | return -EINVAL; | ||
536 | } | ||
537 | |||
538 | lapic = (struct acpi_table_lapic *)obj->buffer.pointer; | ||
539 | |||
540 | if ((lapic->header.type != ACPI_MADT_LAPIC) || | ||
541 | (!lapic->flags.enabled)) { | ||
542 | kfree(buffer.pointer); | ||
543 | return -EINVAL; | ||
544 | } | ||
545 | |||
546 | physid = lapic->id; | ||
547 | |||
548 | kfree(buffer.pointer); | ||
549 | buffer.length = ACPI_ALLOCATE_BUFFER; | ||
550 | buffer.pointer = NULL; | ||
551 | |||
552 | tmp_map = cpu_present_map; | ||
553 | mp_register_lapic(physid, lapic->flags.enabled); | ||
554 | |||
555 | /* | ||
556 | * If mp_register_lapic successfully generates a new logical cpu | ||
557 | * number, then the following will get us exactly what was mapped | ||
558 | */ | ||
559 | cpus_andnot(new_map, cpu_present_map, tmp_map); | ||
560 | if (cpus_empty(new_map)) { | ||
561 | printk ("Unable to map lapic to logical cpu number\n"); | ||
562 | return -EINVAL; | ||
563 | } | ||
564 | |||
565 | cpu = first_cpu(new_map); | ||
566 | |||
567 | *pcpu = cpu; | ||
568 | return 0; | ||
511 | } | 569 | } |
512 | 570 | ||
513 | EXPORT_SYMBOL(acpi_map_lsapic); | 571 | EXPORT_SYMBOL(acpi_map_lsapic); |
514 | 572 | ||
515 | int acpi_unmap_lsapic(int cpu) | 573 | int acpi_unmap_lsapic(int cpu) |
516 | { | 574 | { |
517 | /* TBD */ | 575 | int i; |
518 | return -EINVAL; | 576 | |
577 | for_each_possible_cpu(i) { | ||
578 | if (x86_acpiid_to_apicid[i] == x86_cpu_to_apicid[cpu]) { | ||
579 | x86_acpiid_to_apicid[i] = -1; | ||
580 | break; | ||
581 | } | ||
582 | } | ||
583 | x86_cpu_to_apicid[cpu] = -1; | ||
584 | cpu_clear(cpu, cpu_present_map); | ||
585 | num_processors--; | ||
586 | |||
587 | return (0); | ||
519 | } | 588 | } |
520 | 589 | ||
521 | EXPORT_SYMBOL(acpi_unmap_lsapic); | 590 | EXPORT_SYMBOL(acpi_unmap_lsapic); |
@@ -579,6 +648,8 @@ static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size) | |||
579 | static int __init acpi_parse_hpet(unsigned long phys, unsigned long size) | 648 | static int __init acpi_parse_hpet(unsigned long phys, unsigned long size) |
580 | { | 649 | { |
581 | struct acpi_table_hpet *hpet_tbl; | 650 | struct acpi_table_hpet *hpet_tbl; |
651 | struct resource *hpet_res; | ||
652 | resource_size_t res_start; | ||
582 | 653 | ||
583 | if (!phys || !size) | 654 | if (!phys || !size) |
584 | return -EINVAL; | 655 | return -EINVAL; |
@@ -594,12 +665,26 @@ static int __init acpi_parse_hpet(unsigned long phys, unsigned long size) | |||
594 | "memory.\n"); | 665 | "memory.\n"); |
595 | return -1; | 666 | return -1; |
596 | } | 667 | } |
668 | |||
669 | #define HPET_RESOURCE_NAME_SIZE 9 | ||
670 | hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE); | ||
671 | if (hpet_res) { | ||
672 | memset(hpet_res, 0, sizeof(*hpet_res)); | ||
673 | hpet_res->name = (void *)&hpet_res[1]; | ||
674 | hpet_res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; | ||
675 | snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE, | ||
676 | "HPET %u", hpet_tbl->number); | ||
677 | hpet_res->end = (1 * 1024) - 1; | ||
678 | } | ||
679 | |||
597 | #ifdef CONFIG_X86_64 | 680 | #ifdef CONFIG_X86_64 |
598 | vxtime.hpet_address = hpet_tbl->addr.addrl | | 681 | vxtime.hpet_address = hpet_tbl->addr.addrl | |
599 | ((long)hpet_tbl->addr.addrh << 32); | 682 | ((long)hpet_tbl->addr.addrh << 32); |
600 | 683 | ||
601 | printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", | 684 | printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", |
602 | hpet_tbl->id, vxtime.hpet_address); | 685 | hpet_tbl->id, vxtime.hpet_address); |
686 | |||
687 | res_start = vxtime.hpet_address; | ||
603 | #else /* X86 */ | 688 | #else /* X86 */ |
604 | { | 689 | { |
605 | extern unsigned long hpet_address; | 690 | extern unsigned long hpet_address; |
@@ -607,9 +692,17 @@ static int __init acpi_parse_hpet(unsigned long phys, unsigned long size) | |||
607 | hpet_address = hpet_tbl->addr.addrl; | 692 | hpet_address = hpet_tbl->addr.addrl; |
608 | printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", | 693 | printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", |
609 | hpet_tbl->id, hpet_address); | 694 | hpet_tbl->id, hpet_address); |
695 | |||
696 | res_start = hpet_address; | ||
610 | } | 697 | } |
611 | #endif /* X86 */ | 698 | #endif /* X86 */ |
612 | 699 | ||
700 | if (hpet_res) { | ||
701 | hpet_res->start = res_start; | ||
702 | hpet_res->end += res_start; | ||
703 | insert_resource(&iomem_resource, hpet_res); | ||
704 | } | ||
705 | |||
613 | return 0; | 706 | return 0; |
614 | } | 707 | } |
615 | #else | 708 | #else |
@@ -860,8 +953,6 @@ static void __init acpi_process_madt(void) | |||
860 | return; | 953 | return; |
861 | } | 954 | } |
862 | 955 | ||
863 | extern int acpi_force; | ||
864 | |||
865 | #ifdef __i386__ | 956 | #ifdef __i386__ |
866 | 957 | ||
867 | static int __init disable_acpi_irq(struct dmi_system_id *d) | 958 | static int __init disable_acpi_irq(struct dmi_system_id *d) |
@@ -1163,3 +1254,75 @@ int __init acpi_boot_init(void) | |||
1163 | 1254 | ||
1164 | return 0; | 1255 | return 0; |
1165 | } | 1256 | } |
1257 | |||
1258 | static int __init parse_acpi(char *arg) | ||
1259 | { | ||
1260 | if (!arg) | ||
1261 | return -EINVAL; | ||
1262 | |||
1263 | /* "acpi=off" disables both ACPI table parsing and interpreter */ | ||
1264 | if (strcmp(arg, "off") == 0) { | ||
1265 | disable_acpi(); | ||
1266 | } | ||
1267 | /* acpi=force to over-ride black-list */ | ||
1268 | else if (strcmp(arg, "force") == 0) { | ||
1269 | acpi_force = 1; | ||
1270 | acpi_ht = 1; | ||
1271 | acpi_disabled = 0; | ||
1272 | } | ||
1273 | /* acpi=strict disables out-of-spec workarounds */ | ||
1274 | else if (strcmp(arg, "strict") == 0) { | ||
1275 | acpi_strict = 1; | ||
1276 | } | ||
1277 | /* Limit ACPI just to boot-time to enable HT */ | ||
1278 | else if (strcmp(arg, "ht") == 0) { | ||
1279 | if (!acpi_force) | ||
1280 | disable_acpi(); | ||
1281 | acpi_ht = 1; | ||
1282 | } | ||
1283 | /* "acpi=noirq" disables ACPI interrupt routing */ | ||
1284 | else if (strcmp(arg, "noirq") == 0) { | ||
1285 | acpi_noirq_set(); | ||
1286 | } else { | ||
1287 | /* Core will printk when we return error. */ | ||
1288 | return -EINVAL; | ||
1289 | } | ||
1290 | return 0; | ||
1291 | } | ||
1292 | early_param("acpi", parse_acpi); | ||
1293 | |||
1294 | /* FIXME: Using pci= for an ACPI parameter is a travesty. */ | ||
1295 | static int __init parse_pci(char *arg) | ||
1296 | { | ||
1297 | if (arg && strcmp(arg, "noacpi") == 0) | ||
1298 | acpi_disable_pci(); | ||
1299 | return 0; | ||
1300 | } | ||
1301 | early_param("pci", parse_pci); | ||
1302 | |||
1303 | #ifdef CONFIG_X86_IO_APIC | ||
1304 | static int __init parse_acpi_skip_timer_override(char *arg) | ||
1305 | { | ||
1306 | acpi_skip_timer_override = 1; | ||
1307 | return 0; | ||
1308 | } | ||
1309 | early_param("acpi_skip_timer_override", parse_acpi_skip_timer_override); | ||
1310 | #endif /* CONFIG_X86_IO_APIC */ | ||
1311 | |||
1312 | static int __init setup_acpi_sci(char *s) | ||
1313 | { | ||
1314 | if (!s) | ||
1315 | return -EINVAL; | ||
1316 | if (!strcmp(s, "edge")) | ||
1317 | acpi_sci_flags.trigger = 1; | ||
1318 | else if (!strcmp(s, "level")) | ||
1319 | acpi_sci_flags.trigger = 3; | ||
1320 | else if (!strcmp(s, "high")) | ||
1321 | acpi_sci_flags.polarity = 1; | ||
1322 | else if (!strcmp(s, "low")) | ||
1323 | acpi_sci_flags.polarity = 3; | ||
1324 | else | ||
1325 | return -EINVAL; | ||
1326 | return 0; | ||
1327 | } | ||
1328 | early_param("acpi_sci", setup_acpi_sci); | ||
diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c index 1649a175a206..fe799b11ac0a 100644 --- a/arch/i386/kernel/acpi/earlyquirk.c +++ b/arch/i386/kernel/acpi/earlyquirk.c | |||
@@ -48,7 +48,11 @@ void __init check_acpi_pci(void) | |||
48 | int num, slot, func; | 48 | int num, slot, func; |
49 | 49 | ||
50 | /* Assume the machine supports type 1. If not it will | 50 | /* Assume the machine supports type 1. If not it will |
51 | always read ffffffff and should not have any side effect. */ | 51 | always read ffffffff and should not have any side effect. |
52 | Actually a few buggy systems can machine check. Allow the user | ||
53 | to disable it by command line option at least -AK */ | ||
54 | if (!early_pci_allowed()) | ||
55 | return; | ||
52 | 56 | ||
53 | /* Poor man's PCI discovery */ | 57 | /* Poor man's PCI discovery */ |
54 | for (num = 0; num < 32; num++) { | 58 | for (num = 0; num < 32; num++) { |
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c index 8c844d07862f..90faae5c5d30 100644 --- a/arch/i386/kernel/apic.c +++ b/arch/i386/kernel/apic.c | |||
@@ -52,7 +52,18 @@ static cpumask_t timer_bcast_ipi; | |||
52 | /* | 52 | /* |
53 | * Knob to control our willingness to enable the local APIC. | 53 | * Knob to control our willingness to enable the local APIC. |
54 | */ | 54 | */ |
55 | int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ | 55 | static int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ |
56 | |||
57 | static inline void lapic_disable(void) | ||
58 | { | ||
59 | enable_local_apic = -1; | ||
60 | clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability); | ||
61 | } | ||
62 | |||
63 | static inline void lapic_enable(void) | ||
64 | { | ||
65 | enable_local_apic = 1; | ||
66 | } | ||
56 | 67 | ||
57 | /* | 68 | /* |
58 | * Debug level | 69 | * Debug level |
@@ -586,8 +597,7 @@ void __devinit setup_local_APIC(void) | |||
586 | printk("No ESR for 82489DX.\n"); | 597 | printk("No ESR for 82489DX.\n"); |
587 | } | 598 | } |
588 | 599 | ||
589 | if (nmi_watchdog == NMI_LOCAL_APIC) | 600 | setup_apic_nmi_watchdog(NULL); |
590 | setup_apic_nmi_watchdog(); | ||
591 | apic_pm_activate(); | 601 | apic_pm_activate(); |
592 | } | 602 | } |
593 | 603 | ||
@@ -1373,3 +1383,18 @@ int __init APIC_init_uniprocessor (void) | |||
1373 | 1383 | ||
1374 | return 0; | 1384 | return 0; |
1375 | } | 1385 | } |
1386 | |||
1387 | static int __init parse_lapic(char *arg) | ||
1388 | { | ||
1389 | lapic_enable(); | ||
1390 | return 0; | ||
1391 | } | ||
1392 | early_param("lapic", parse_lapic); | ||
1393 | |||
1394 | static int __init parse_nolapic(char *arg) | ||
1395 | { | ||
1396 | lapic_disable(); | ||
1397 | return 0; | ||
1398 | } | ||
1399 | early_param("nolapic", parse_nolapic); | ||
1400 | |||
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c index e6a2d6b80cda..e4758095d87a 100644 --- a/arch/i386/kernel/cpu/amd.c +++ b/arch/i386/kernel/cpu/amd.c | |||
@@ -22,7 +22,7 @@ | |||
22 | extern void vide(void); | 22 | extern void vide(void); |
23 | __asm__(".align 4\nvide: ret"); | 23 | __asm__(".align 4\nvide: ret"); |
24 | 24 | ||
25 | static void __init init_amd(struct cpuinfo_x86 *c) | 25 | static void __cpuinit init_amd(struct cpuinfo_x86 *c) |
26 | { | 26 | { |
27 | u32 l, h; | 27 | u32 l, h; |
28 | int mbytes = num_physpages >> (20-PAGE_SHIFT); | 28 | int mbytes = num_physpages >> (20-PAGE_SHIFT); |
@@ -246,7 +246,7 @@ static void __init init_amd(struct cpuinfo_x86 *c) | |||
246 | num_cache_leaves = 3; | 246 | num_cache_leaves = 3; |
247 | } | 247 | } |
248 | 248 | ||
249 | static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) | 249 | static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) |
250 | { | 250 | { |
251 | /* AMD errata T13 (order #21922) */ | 251 | /* AMD errata T13 (order #21922) */ |
252 | if ((c->x86 == 6)) { | 252 | if ((c->x86 == 6)) { |
@@ -259,7 +259,7 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) | |||
259 | return size; | 259 | return size; |
260 | } | 260 | } |
261 | 261 | ||
262 | static struct cpu_dev amd_cpu_dev __initdata = { | 262 | static struct cpu_dev amd_cpu_dev __cpuinitdata = { |
263 | .c_vendor = "AMD", | 263 | .c_vendor = "AMD", |
264 | .c_ident = { "AuthenticAMD" }, | 264 | .c_ident = { "AuthenticAMD" }, |
265 | .c_models = { | 265 | .c_models = { |
@@ -275,7 +275,6 @@ static struct cpu_dev amd_cpu_dev __initdata = { | |||
275 | }, | 275 | }, |
276 | }, | 276 | }, |
277 | .c_init = init_amd, | 277 | .c_init = init_amd, |
278 | .c_identify = generic_identify, | ||
279 | .c_size_cache = amd_size_cache, | 278 | .c_size_cache = amd_size_cache, |
280 | }; | 279 | }; |
281 | 280 | ||
diff --git a/arch/i386/kernel/cpu/centaur.c b/arch/i386/kernel/cpu/centaur.c index bd75629dd262..8c25047975c0 100644 --- a/arch/i386/kernel/cpu/centaur.c +++ b/arch/i386/kernel/cpu/centaur.c | |||
@@ -9,7 +9,7 @@ | |||
9 | 9 | ||
10 | #ifdef CONFIG_X86_OOSTORE | 10 | #ifdef CONFIG_X86_OOSTORE |
11 | 11 | ||
12 | static u32 __init power2(u32 x) | 12 | static u32 __cpuinit power2(u32 x) |
13 | { | 13 | { |
14 | u32 s=1; | 14 | u32 s=1; |
15 | while(s<=x) | 15 | while(s<=x) |
@@ -22,7 +22,7 @@ static u32 __init power2(u32 x) | |||
22 | * Set up an actual MCR | 22 | * Set up an actual MCR |
23 | */ | 23 | */ |
24 | 24 | ||
25 | static void __init centaur_mcr_insert(int reg, u32 base, u32 size, int key) | 25 | static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key) |
26 | { | 26 | { |
27 | u32 lo, hi; | 27 | u32 lo, hi; |
28 | 28 | ||
@@ -40,7 +40,7 @@ static void __init centaur_mcr_insert(int reg, u32 base, u32 size, int key) | |||
40 | * Shortcut: We know you can't put 4Gig of RAM on a winchip | 40 | * Shortcut: We know you can't put 4Gig of RAM on a winchip |
41 | */ | 41 | */ |
42 | 42 | ||
43 | static u32 __init ramtop(void) /* 16388 */ | 43 | static u32 __cpuinit ramtop(void) /* 16388 */ |
44 | { | 44 | { |
45 | int i; | 45 | int i; |
46 | u32 top = 0; | 46 | u32 top = 0; |
@@ -91,7 +91,7 @@ static u32 __init ramtop(void) /* 16388 */ | |||
91 | * Compute a set of MCR's to give maximum coverage | 91 | * Compute a set of MCR's to give maximum coverage |
92 | */ | 92 | */ |
93 | 93 | ||
94 | static int __init centaur_mcr_compute(int nr, int key) | 94 | static int __cpuinit centaur_mcr_compute(int nr, int key) |
95 | { | 95 | { |
96 | u32 mem = ramtop(); | 96 | u32 mem = ramtop(); |
97 | u32 root = power2(mem); | 97 | u32 root = power2(mem); |
@@ -166,7 +166,7 @@ static int __init centaur_mcr_compute(int nr, int key) | |||
166 | return ct; | 166 | return ct; |
167 | } | 167 | } |
168 | 168 | ||
169 | static void __init centaur_create_optimal_mcr(void) | 169 | static void __cpuinit centaur_create_optimal_mcr(void) |
170 | { | 170 | { |
171 | int i; | 171 | int i; |
172 | /* | 172 | /* |
@@ -189,7 +189,7 @@ static void __init centaur_create_optimal_mcr(void) | |||
189 | wrmsr(MSR_IDT_MCR0+i, 0, 0); | 189 | wrmsr(MSR_IDT_MCR0+i, 0, 0); |
190 | } | 190 | } |
191 | 191 | ||
192 | static void __init winchip2_create_optimal_mcr(void) | 192 | static void __cpuinit winchip2_create_optimal_mcr(void) |
193 | { | 193 | { |
194 | u32 lo, hi; | 194 | u32 lo, hi; |
195 | int i; | 195 | int i; |
@@ -227,7 +227,7 @@ static void __init winchip2_create_optimal_mcr(void) | |||
227 | * Handle the MCR key on the Winchip 2. | 227 | * Handle the MCR key on the Winchip 2. |
228 | */ | 228 | */ |
229 | 229 | ||
230 | static void __init winchip2_unprotect_mcr(void) | 230 | static void __cpuinit winchip2_unprotect_mcr(void) |
231 | { | 231 | { |
232 | u32 lo, hi; | 232 | u32 lo, hi; |
233 | u32 key; | 233 | u32 key; |
@@ -239,7 +239,7 @@ static void __init winchip2_unprotect_mcr(void) | |||
239 | wrmsr(MSR_IDT_MCR_CTRL, lo, hi); | 239 | wrmsr(MSR_IDT_MCR_CTRL, lo, hi); |
240 | } | 240 | } |
241 | 241 | ||
242 | static void __init winchip2_protect_mcr(void) | 242 | static void __cpuinit winchip2_protect_mcr(void) |
243 | { | 243 | { |
244 | u32 lo, hi; | 244 | u32 lo, hi; |
245 | 245 | ||
@@ -257,7 +257,7 @@ static void __init winchip2_protect_mcr(void) | |||
257 | #define RNG_ENABLED (1 << 3) | 257 | #define RNG_ENABLED (1 << 3) |
258 | #define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */ | 258 | #define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */ |
259 | 259 | ||
260 | static void __init init_c3(struct cpuinfo_x86 *c) | 260 | static void __cpuinit init_c3(struct cpuinfo_x86 *c) |
261 | { | 261 | { |
262 | u32 lo, hi; | 262 | u32 lo, hi; |
263 | 263 | ||
@@ -303,7 +303,7 @@ static void __init init_c3(struct cpuinfo_x86 *c) | |||
303 | display_cacheinfo(c); | 303 | display_cacheinfo(c); |
304 | } | 304 | } |
305 | 305 | ||
306 | static void __init init_centaur(struct cpuinfo_x86 *c) | 306 | static void __cpuinit init_centaur(struct cpuinfo_x86 *c) |
307 | { | 307 | { |
308 | enum { | 308 | enum { |
309 | ECX8=1<<1, | 309 | ECX8=1<<1, |
@@ -442,7 +442,7 @@ static void __init init_centaur(struct cpuinfo_x86 *c) | |||
442 | } | 442 | } |
443 | } | 443 | } |
444 | 444 | ||
445 | static unsigned int centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size) | 445 | static unsigned int __cpuinit centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size) |
446 | { | 446 | { |
447 | /* VIA C3 CPUs (670-68F) need further shifting. */ | 447 | /* VIA C3 CPUs (670-68F) need further shifting. */ |
448 | if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8))) | 448 | if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8))) |
@@ -457,7 +457,7 @@ static unsigned int centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size | |||
457 | return size; | 457 | return size; |
458 | } | 458 | } |
459 | 459 | ||
460 | static struct cpu_dev centaur_cpu_dev __initdata = { | 460 | static struct cpu_dev centaur_cpu_dev __cpuinitdata = { |
461 | .c_vendor = "Centaur", | 461 | .c_vendor = "Centaur", |
462 | .c_ident = { "CentaurHauls" }, | 462 | .c_ident = { "CentaurHauls" }, |
463 | .c_init = init_centaur, | 463 | .c_init = init_centaur, |
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c index 70c87de582c7..2799baaadf45 100644 --- a/arch/i386/kernel/cpu/common.c +++ b/arch/i386/kernel/cpu/common.c | |||
@@ -36,7 +36,7 @@ struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; | |||
36 | 36 | ||
37 | extern int disable_pse; | 37 | extern int disable_pse; |
38 | 38 | ||
39 | static void default_init(struct cpuinfo_x86 * c) | 39 | static void __cpuinit default_init(struct cpuinfo_x86 * c) |
40 | { | 40 | { |
41 | /* Not much we can do here... */ | 41 | /* Not much we can do here... */ |
42 | /* Check if at least it has cpuid */ | 42 | /* Check if at least it has cpuid */ |
@@ -49,7 +49,7 @@ static void default_init(struct cpuinfo_x86 * c) | |||
49 | } | 49 | } |
50 | } | 50 | } |
51 | 51 | ||
52 | static struct cpu_dev default_cpu = { | 52 | static struct cpu_dev __cpuinitdata default_cpu = { |
53 | .c_init = default_init, | 53 | .c_init = default_init, |
54 | .c_vendor = "Unknown", | 54 | .c_vendor = "Unknown", |
55 | }; | 55 | }; |
@@ -265,7 +265,7 @@ static void __init early_cpu_detect(void) | |||
265 | } | 265 | } |
266 | } | 266 | } |
267 | 267 | ||
268 | void __cpuinit generic_identify(struct cpuinfo_x86 * c) | 268 | static void __cpuinit generic_identify(struct cpuinfo_x86 * c) |
269 | { | 269 | { |
270 | u32 tfms, xlvl; | 270 | u32 tfms, xlvl; |
271 | int ebx; | 271 | int ebx; |
@@ -675,7 +675,7 @@ old_gdt: | |||
675 | #endif | 675 | #endif |
676 | 676 | ||
677 | /* Clear %fs and %gs. */ | 677 | /* Clear %fs and %gs. */ |
678 | asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); | 678 | asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0)); |
679 | 679 | ||
680 | /* Clear all 6 debug registers: */ | 680 | /* Clear all 6 debug registers: */ |
681 | set_debugreg(0, 0); | 681 | set_debugreg(0, 0); |
diff --git a/arch/i386/kernel/cpu/cpu.h b/arch/i386/kernel/cpu/cpu.h index 5a1d4f163e84..2f6432cef6ff 100644 --- a/arch/i386/kernel/cpu/cpu.h +++ b/arch/i386/kernel/cpu/cpu.h | |||
@@ -24,7 +24,5 @@ extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM]; | |||
24 | extern int get_model_name(struct cpuinfo_x86 *c); | 24 | extern int get_model_name(struct cpuinfo_x86 *c); |
25 | extern void display_cacheinfo(struct cpuinfo_x86 *c); | 25 | extern void display_cacheinfo(struct cpuinfo_x86 *c); |
26 | 26 | ||
27 | extern void generic_identify(struct cpuinfo_x86 * c); | ||
28 | |||
29 | extern void early_intel_workaround(struct cpuinfo_x86 *c); | 27 | extern void early_intel_workaround(struct cpuinfo_x86 *c); |
30 | 28 | ||
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c index f03b7f94c304..c0c3b59de32c 100644 --- a/arch/i386/kernel/cpu/cyrix.c +++ b/arch/i386/kernel/cpu/cyrix.c | |||
@@ -12,7 +12,7 @@ | |||
12 | /* | 12 | /* |
13 | * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU | 13 | * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU |
14 | */ | 14 | */ |
15 | static void __init do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) | 15 | static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) |
16 | { | 16 | { |
17 | unsigned char ccr2, ccr3; | 17 | unsigned char ccr2, ccr3; |
18 | unsigned long flags; | 18 | unsigned long flags; |
@@ -52,25 +52,25 @@ static void __init do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) | |||
52 | * Actually since bugs.h doesn't even reference this perhaps someone should | 52 | * Actually since bugs.h doesn't even reference this perhaps someone should |
53 | * fix the documentation ??? | 53 | * fix the documentation ??? |
54 | */ | 54 | */ |
55 | static unsigned char Cx86_dir0_msb __initdata = 0; | 55 | static unsigned char Cx86_dir0_msb __cpuinitdata = 0; |
56 | 56 | ||
57 | static char Cx86_model[][9] __initdata = { | 57 | static char Cx86_model[][9] __cpuinitdata = { |
58 | "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ", | 58 | "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ", |
59 | "M II ", "Unknown" | 59 | "M II ", "Unknown" |
60 | }; | 60 | }; |
61 | static char Cx486_name[][5] __initdata = { | 61 | static char Cx486_name[][5] __cpuinitdata = { |
62 | "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx", | 62 | "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx", |
63 | "SRx2", "DRx2" | 63 | "SRx2", "DRx2" |
64 | }; | 64 | }; |
65 | static char Cx486S_name[][4] __initdata = { | 65 | static char Cx486S_name[][4] __cpuinitdata = { |
66 | "S", "S2", "Se", "S2e" | 66 | "S", "S2", "Se", "S2e" |
67 | }; | 67 | }; |
68 | static char Cx486D_name[][4] __initdata = { | 68 | static char Cx486D_name[][4] __cpuinitdata = { |
69 | "DX", "DX2", "?", "?", "?", "DX4" | 69 | "DX", "DX2", "?", "?", "?", "DX4" |
70 | }; | 70 | }; |
71 | static char Cx86_cb[] __initdata = "?.5x Core/Bus Clock"; | 71 | static char Cx86_cb[] __cpuinitdata = "?.5x Core/Bus Clock"; |
72 | static char cyrix_model_mult1[] __initdata = "12??43"; | 72 | static char cyrix_model_mult1[] __cpuinitdata = "12??43"; |
73 | static char cyrix_model_mult2[] __initdata = "12233445"; | 73 | static char cyrix_model_mult2[] __cpuinitdata = "12233445"; |
74 | 74 | ||
75 | /* | 75 | /* |
76 | * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old | 76 | * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old |
@@ -82,7 +82,7 @@ static char cyrix_model_mult2[] __initdata = "12233445"; | |||
82 | 82 | ||
83 | extern void calibrate_delay(void) __init; | 83 | extern void calibrate_delay(void) __init; |
84 | 84 | ||
85 | static void __init check_cx686_slop(struct cpuinfo_x86 *c) | 85 | static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c) |
86 | { | 86 | { |
87 | unsigned long flags; | 87 | unsigned long flags; |
88 | 88 | ||
@@ -107,7 +107,7 @@ static void __init check_cx686_slop(struct cpuinfo_x86 *c) | |||
107 | } | 107 | } |
108 | 108 | ||
109 | 109 | ||
110 | static void __init set_cx86_reorder(void) | 110 | static void __cpuinit set_cx86_reorder(void) |
111 | { | 111 | { |
112 | u8 ccr3; | 112 | u8 ccr3; |
113 | 113 | ||
@@ -122,7 +122,7 @@ static void __init set_cx86_reorder(void) | |||
122 | setCx86(CX86_CCR3, ccr3); | 122 | setCx86(CX86_CCR3, ccr3); |
123 | } | 123 | } |
124 | 124 | ||
125 | static void __init set_cx86_memwb(void) | 125 | static void __cpuinit set_cx86_memwb(void) |
126 | { | 126 | { |
127 | u32 cr0; | 127 | u32 cr0; |
128 | 128 | ||
@@ -137,7 +137,7 @@ static void __init set_cx86_memwb(void) | |||
137 | setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 ); | 137 | setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 ); |
138 | } | 138 | } |
139 | 139 | ||
140 | static void __init set_cx86_inc(void) | 140 | static void __cpuinit set_cx86_inc(void) |
141 | { | 141 | { |
142 | unsigned char ccr3; | 142 | unsigned char ccr3; |
143 | 143 | ||
@@ -158,7 +158,7 @@ static void __init set_cx86_inc(void) | |||
158 | * Configure later MediaGX and/or Geode processor. | 158 | * Configure later MediaGX and/or Geode processor. |
159 | */ | 159 | */ |
160 | 160 | ||
161 | static void __init geode_configure(void) | 161 | static void __cpuinit geode_configure(void) |
162 | { | 162 | { |
163 | unsigned long flags; | 163 | unsigned long flags; |
164 | u8 ccr3, ccr4; | 164 | u8 ccr3, ccr4; |
@@ -184,14 +184,14 @@ static void __init geode_configure(void) | |||
184 | 184 | ||
185 | 185 | ||
186 | #ifdef CONFIG_PCI | 186 | #ifdef CONFIG_PCI |
187 | static struct pci_device_id __initdata cyrix_55x0[] = { | 187 | static struct pci_device_id __cpuinitdata cyrix_55x0[] = { |
188 | { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510) }, | 188 | { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510) }, |
189 | { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520) }, | 189 | { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520) }, |
190 | { }, | 190 | { }, |
191 | }; | 191 | }; |
192 | #endif | 192 | #endif |
193 | 193 | ||
194 | static void __init init_cyrix(struct cpuinfo_x86 *c) | 194 | static void __cpuinit init_cyrix(struct cpuinfo_x86 *c) |
195 | { | 195 | { |
196 | unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0; | 196 | unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0; |
197 | char *buf = c->x86_model_id; | 197 | char *buf = c->x86_model_id; |
@@ -346,7 +346,7 @@ static void __init init_cyrix(struct cpuinfo_x86 *c) | |||
346 | /* | 346 | /* |
347 | * Handle National Semiconductor branded processors | 347 | * Handle National Semiconductor branded processors |
348 | */ | 348 | */ |
349 | static void __init init_nsc(struct cpuinfo_x86 *c) | 349 | static void __cpuinit init_nsc(struct cpuinfo_x86 *c) |
350 | { | 350 | { |
351 | /* There may be GX1 processors in the wild that are branded | 351 | /* There may be GX1 processors in the wild that are branded |
352 | * NSC and not Cyrix. | 352 | * NSC and not Cyrix. |
@@ -394,7 +394,7 @@ static inline int test_cyrix_52div(void) | |||
394 | return (unsigned char) (test >> 8) == 0x02; | 394 | return (unsigned char) (test >> 8) == 0x02; |
395 | } | 395 | } |
396 | 396 | ||
397 | static void cyrix_identify(struct cpuinfo_x86 * c) | 397 | static void __cpuinit cyrix_identify(struct cpuinfo_x86 * c) |
398 | { | 398 | { |
399 | /* Detect Cyrix with disabled CPUID */ | 399 | /* Detect Cyrix with disabled CPUID */ |
400 | if ( c->x86 == 4 && test_cyrix_52div() ) { | 400 | if ( c->x86 == 4 && test_cyrix_52div() ) { |
@@ -427,10 +427,9 @@ static void cyrix_identify(struct cpuinfo_x86 * c) | |||
427 | local_irq_restore(flags); | 427 | local_irq_restore(flags); |
428 | } | 428 | } |
429 | } | 429 | } |
430 | generic_identify(c); | ||
431 | } | 430 | } |
432 | 431 | ||
433 | static struct cpu_dev cyrix_cpu_dev __initdata = { | 432 | static struct cpu_dev cyrix_cpu_dev __cpuinitdata = { |
434 | .c_vendor = "Cyrix", | 433 | .c_vendor = "Cyrix", |
435 | .c_ident = { "CyrixInstead" }, | 434 | .c_ident = { "CyrixInstead" }, |
436 | .c_init = init_cyrix, | 435 | .c_init = init_cyrix, |
@@ -453,11 +452,10 @@ static int __init cyrix_exit_cpu(void) | |||
453 | 452 | ||
454 | late_initcall(cyrix_exit_cpu); | 453 | late_initcall(cyrix_exit_cpu); |
455 | 454 | ||
456 | static struct cpu_dev nsc_cpu_dev __initdata = { | 455 | static struct cpu_dev nsc_cpu_dev __cpuinitdata = { |
457 | .c_vendor = "NSC", | 456 | .c_vendor = "NSC", |
458 | .c_ident = { "Geode by NSC" }, | 457 | .c_ident = { "Geode by NSC" }, |
459 | .c_init = init_nsc, | 458 | .c_init = init_nsc, |
460 | .c_identify = generic_identify, | ||
461 | }; | 459 | }; |
462 | 460 | ||
463 | int __init nsc_init_cpu(void) | 461 | int __init nsc_init_cpu(void) |
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c index 5a2e270924b1..94a95aa5227e 100644 --- a/arch/i386/kernel/cpu/intel.c +++ b/arch/i386/kernel/cpu/intel.c | |||
@@ -198,7 +198,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) | |||
198 | } | 198 | } |
199 | 199 | ||
200 | 200 | ||
201 | static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) | 201 | static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) |
202 | { | 202 | { |
203 | /* Intel PIII Tualatin. This comes in two flavours. | 203 | /* Intel PIII Tualatin. This comes in two flavours. |
204 | * One has 256kb of cache, the other 512. We have no way | 204 | * One has 256kb of cache, the other 512. We have no way |
@@ -263,7 +263,6 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = { | |||
263 | }, | 263 | }, |
264 | }, | 264 | }, |
265 | .c_init = init_intel, | 265 | .c_init = init_intel, |
266 | .c_identify = generic_identify, | ||
267 | .c_size_cache = intel_size_cache, | 266 | .c_size_cache = intel_size_cache, |
268 | }; | 267 | }; |
269 | 268 | ||
diff --git a/arch/i386/kernel/cpu/mcheck/Makefile b/arch/i386/kernel/cpu/mcheck/Makefile index 30808f3d6715..f1ebe1c1c17a 100644 --- a/arch/i386/kernel/cpu/mcheck/Makefile +++ b/arch/i386/kernel/cpu/mcheck/Makefile | |||
@@ -1,2 +1,2 @@ | |||
1 | obj-y = mce.o k7.o p4.o p5.o p6.o winchip.o | 1 | obj-y = mce.o k7.o p4.o p5.o p6.o winchip.o therm_throt.o |
2 | obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o | 2 | obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o |
diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c index b95f1b3d53aa..504434a46011 100644 --- a/arch/i386/kernel/cpu/mcheck/p4.c +++ b/arch/i386/kernel/cpu/mcheck/p4.c | |||
@@ -13,6 +13,8 @@ | |||
13 | #include <asm/msr.h> | 13 | #include <asm/msr.h> |
14 | #include <asm/apic.h> | 14 | #include <asm/apic.h> |
15 | 15 | ||
16 | #include <asm/therm_throt.h> | ||
17 | |||
16 | #include "mce.h" | 18 | #include "mce.h" |
17 | 19 | ||
18 | /* as supported by the P4/Xeon family */ | 20 | /* as supported by the P4/Xeon family */ |
@@ -44,25 +46,12 @@ static void unexpected_thermal_interrupt(struct pt_regs *regs) | |||
44 | /* P4/Xeon Thermal transition interrupt handler */ | 46 | /* P4/Xeon Thermal transition interrupt handler */ |
45 | static void intel_thermal_interrupt(struct pt_regs *regs) | 47 | static void intel_thermal_interrupt(struct pt_regs *regs) |
46 | { | 48 | { |
47 | u32 l, h; | 49 | __u64 msr_val; |
48 | unsigned int cpu = smp_processor_id(); | ||
49 | static unsigned long next[NR_CPUS]; | ||
50 | 50 | ||
51 | ack_APIC_irq(); | 51 | ack_APIC_irq(); |
52 | 52 | ||
53 | if (time_after(next[cpu], jiffies)) | 53 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); |
54 | return; | 54 | therm_throt_process(msr_val & 0x1); |
55 | |||
56 | next[cpu] = jiffies + HZ*5; | ||
57 | rdmsr(MSR_IA32_THERM_STATUS, l, h); | ||
58 | if (l & 0x1) { | ||
59 | printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu); | ||
60 | printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n", | ||
61 | cpu); | ||
62 | add_taint(TAINT_MACHINE_CHECK); | ||
63 | } else { | ||
64 | printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu); | ||
65 | } | ||
66 | } | 55 | } |
67 | 56 | ||
68 | /* Thermal interrupt handler for this CPU setup */ | 57 | /* Thermal interrupt handler for this CPU setup */ |
@@ -122,10 +111,13 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) | |||
122 | 111 | ||
123 | rdmsr (MSR_IA32_MISC_ENABLE, l, h); | 112 | rdmsr (MSR_IA32_MISC_ENABLE, l, h); |
124 | wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h); | 113 | wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h); |
125 | 114 | ||
126 | l = apic_read (APIC_LVTTHMR); | 115 | l = apic_read (APIC_LVTTHMR); |
127 | apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | 116 | apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); |
128 | printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); | 117 | printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); |
118 | |||
119 | /* enable thermal throttle processing */ | ||
120 | atomic_set(&therm_throt_en, 1); | ||
129 | return; | 121 | return; |
130 | } | 122 | } |
131 | #endif /* CONFIG_X86_MCE_P4THERMAL */ | 123 | #endif /* CONFIG_X86_MCE_P4THERMAL */ |
diff --git a/arch/i386/kernel/cpu/mcheck/therm_throt.c b/arch/i386/kernel/cpu/mcheck/therm_throt.c new file mode 100644 index 000000000000..4f43047de406 --- /dev/null +++ b/arch/i386/kernel/cpu/mcheck/therm_throt.c | |||
@@ -0,0 +1,180 @@ | |||
1 | /* | ||
2 | * linux/arch/i386/kerne/cpu/mcheck/therm_throt.c | ||
3 | * | ||
4 | * Thermal throttle event support code (such as syslog messaging and rate | ||
5 | * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). | ||
6 | * This allows consistent reporting of CPU thermal throttle events. | ||
7 | * | ||
8 | * Maintains a counter in /sys that keeps track of the number of thermal | ||
9 | * events, such that the user knows how bad the thermal problem might be | ||
10 | * (since the logging to syslog and mcelog is rate limited). | ||
11 | * | ||
12 | * Author: Dmitriy Zavin (dmitriyz@google.com) | ||
13 | * | ||
14 | * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. | ||
15 | * Inspired by Ross Biro's and Al Borchers' counter code. | ||
16 | */ | ||
17 | |||
18 | #include <linux/percpu.h> | ||
19 | #include <linux/sysdev.h> | ||
20 | #include <linux/cpu.h> | ||
21 | #include <asm/cpu.h> | ||
22 | #include <linux/notifier.h> | ||
23 | #include <asm/therm_throt.h> | ||
24 | |||
25 | /* How long to wait between reporting thermal events */ | ||
26 | #define CHECK_INTERVAL (300 * HZ) | ||
27 | |||
28 | static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; | ||
29 | static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); | ||
30 | atomic_t therm_throt_en = ATOMIC_INIT(0); | ||
31 | |||
32 | #ifdef CONFIG_SYSFS | ||
33 | #define define_therm_throt_sysdev_one_ro(_name) \ | ||
34 | static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) | ||
35 | |||
36 | #define define_therm_throt_sysdev_show_func(name) \ | ||
37 | static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ | ||
38 | char *buf) \ | ||
39 | { \ | ||
40 | unsigned int cpu = dev->id; \ | ||
41 | ssize_t ret; \ | ||
42 | \ | ||
43 | preempt_disable(); /* CPU hotplug */ \ | ||
44 | if (cpu_online(cpu)) \ | ||
45 | ret = sprintf(buf, "%lu\n", \ | ||
46 | per_cpu(thermal_throttle_##name, cpu)); \ | ||
47 | else \ | ||
48 | ret = 0; \ | ||
49 | preempt_enable(); \ | ||
50 | \ | ||
51 | return ret; \ | ||
52 | } | ||
53 | |||
54 | define_therm_throt_sysdev_show_func(count); | ||
55 | define_therm_throt_sysdev_one_ro(count); | ||
56 | |||
57 | static struct attribute *thermal_throttle_attrs[] = { | ||
58 | &attr_count.attr, | ||
59 | NULL | ||
60 | }; | ||
61 | |||
62 | static struct attribute_group thermal_throttle_attr_group = { | ||
63 | .attrs = thermal_throttle_attrs, | ||
64 | .name = "thermal_throttle" | ||
65 | }; | ||
66 | #endif /* CONFIG_SYSFS */ | ||
67 | |||
68 | /*** | ||
69 | * therm_throt_process - Process thermal throttling event from interrupt | ||
70 | * @curr: Whether the condition is current or not (boolean), since the | ||
71 | * thermal interrupt normally gets called both when the thermal | ||
72 | * event begins and once the event has ended. | ||
73 | * | ||
74 | * This function is called by the thermal interrupt after the | ||
75 | * IRQ has been acknowledged. | ||
76 | * | ||
77 | * It will take care of rate limiting and printing messages to the syslog. | ||
78 | * | ||
79 | * Returns: 0 : Event should NOT be further logged, i.e. still in | ||
80 | * "timeout" from previous log message. | ||
81 | * 1 : Event should be logged further, and a message has been | ||
82 | * printed to the syslog. | ||
83 | */ | ||
84 | int therm_throt_process(int curr) | ||
85 | { | ||
86 | unsigned int cpu = smp_processor_id(); | ||
87 | __u64 tmp_jiffs = get_jiffies_64(); | ||
88 | |||
89 | if (curr) | ||
90 | __get_cpu_var(thermal_throttle_count)++; | ||
91 | |||
92 | if (time_before64(tmp_jiffs, __get_cpu_var(next_check))) | ||
93 | return 0; | ||
94 | |||
95 | __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL; | ||
96 | |||
97 | /* if we just entered the thermal event */ | ||
98 | if (curr) { | ||
99 | printk(KERN_CRIT "CPU%d: Temperature above threshold, " | ||
100 | "cpu clock throttled (total events = %lu)\n", cpu, | ||
101 | __get_cpu_var(thermal_throttle_count)); | ||
102 | |||
103 | add_taint(TAINT_MACHINE_CHECK); | ||
104 | } else { | ||
105 | printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu); | ||
106 | } | ||
107 | |||
108 | return 1; | ||
109 | } | ||
110 | |||
111 | #ifdef CONFIG_SYSFS | ||
112 | /* Add/Remove thermal_throttle interface for CPU device */ | ||
113 | static __cpuinit int thermal_throttle_add_dev(struct sys_device * sys_dev) | ||
114 | { | ||
115 | sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group); | ||
116 | return 0; | ||
117 | } | ||
118 | |||
119 | #ifdef CONFIG_HOTPLUG_CPU | ||
120 | static __cpuinit int thermal_throttle_remove_dev(struct sys_device * sys_dev) | ||
121 | { | ||
122 | sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); | ||
123 | return 0; | ||
124 | } | ||
125 | |||
126 | /* Mutex protecting device creation against CPU hotplug */ | ||
127 | static DEFINE_MUTEX(therm_cpu_lock); | ||
128 | |||
129 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ | ||
130 | static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, | ||
131 | unsigned long action, | ||
132 | void *hcpu) | ||
133 | { | ||
134 | unsigned int cpu = (unsigned long)hcpu; | ||
135 | struct sys_device *sys_dev; | ||
136 | |||
137 | sys_dev = get_cpu_sysdev(cpu); | ||
138 | mutex_lock(&therm_cpu_lock); | ||
139 | switch (action) { | ||
140 | case CPU_ONLINE: | ||
141 | thermal_throttle_add_dev(sys_dev); | ||
142 | break; | ||
143 | case CPU_DEAD: | ||
144 | thermal_throttle_remove_dev(sys_dev); | ||
145 | break; | ||
146 | } | ||
147 | mutex_unlock(&therm_cpu_lock); | ||
148 | return NOTIFY_OK; | ||
149 | } | ||
150 | |||
151 | static struct notifier_block thermal_throttle_cpu_notifier = | ||
152 | { | ||
153 | .notifier_call = thermal_throttle_cpu_callback, | ||
154 | }; | ||
155 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
156 | |||
157 | static __init int thermal_throttle_init_device(void) | ||
158 | { | ||
159 | unsigned int cpu = 0; | ||
160 | |||
161 | if (!atomic_read(&therm_throt_en)) | ||
162 | return 0; | ||
163 | |||
164 | register_hotcpu_notifier(&thermal_throttle_cpu_notifier); | ||
165 | |||
166 | #ifdef CONFIG_HOTPLUG_CPU | ||
167 | mutex_lock(&therm_cpu_lock); | ||
168 | #endif | ||
169 | /* connect live CPUs to sysfs */ | ||
170 | for_each_online_cpu(cpu) | ||
171 | thermal_throttle_add_dev(get_cpu_sysdev(cpu)); | ||
172 | #ifdef CONFIG_HOTPLUG_CPU | ||
173 | mutex_unlock(&therm_cpu_lock); | ||
174 | #endif | ||
175 | |||
176 | return 0; | ||
177 | } | ||
178 | |||
179 | device_initcall(thermal_throttle_init_device); | ||
180 | #endif /* CONFIG_SYSFS */ | ||
diff --git a/arch/i386/kernel/cpu/nexgen.c b/arch/i386/kernel/cpu/nexgen.c index ad87fa58058d..8bf23cc80c63 100644 --- a/arch/i386/kernel/cpu/nexgen.c +++ b/arch/i386/kernel/cpu/nexgen.c | |||
@@ -10,7 +10,7 @@ | |||
10 | * to have CPUID. (Thanks to Herbert Oppmann) | 10 | * to have CPUID. (Thanks to Herbert Oppmann) |
11 | */ | 11 | */ |
12 | 12 | ||
13 | static int __init deep_magic_nexgen_probe(void) | 13 | static int __cpuinit deep_magic_nexgen_probe(void) |
14 | { | 14 | { |
15 | int ret; | 15 | int ret; |
16 | 16 | ||
@@ -27,21 +27,20 @@ static int __init deep_magic_nexgen_probe(void) | |||
27 | return ret; | 27 | return ret; |
28 | } | 28 | } |
29 | 29 | ||
30 | static void __init init_nexgen(struct cpuinfo_x86 * c) | 30 | static void __cpuinit init_nexgen(struct cpuinfo_x86 * c) |
31 | { | 31 | { |
32 | c->x86_cache_size = 256; /* A few had 1 MB... */ | 32 | c->x86_cache_size = 256; /* A few had 1 MB... */ |
33 | } | 33 | } |
34 | 34 | ||
35 | static void __init nexgen_identify(struct cpuinfo_x86 * c) | 35 | static void __cpuinit nexgen_identify(struct cpuinfo_x86 * c) |
36 | { | 36 | { |
37 | /* Detect NexGen with old hypercode */ | 37 | /* Detect NexGen with old hypercode */ |
38 | if ( deep_magic_nexgen_probe() ) { | 38 | if ( deep_magic_nexgen_probe() ) { |
39 | strcpy(c->x86_vendor_id, "NexGenDriven"); | 39 | strcpy(c->x86_vendor_id, "NexGenDriven"); |
40 | } | 40 | } |
41 | generic_identify(c); | ||
42 | } | 41 | } |
43 | 42 | ||
44 | static struct cpu_dev nexgen_cpu_dev __initdata = { | 43 | static struct cpu_dev nexgen_cpu_dev __cpuinitdata = { |
45 | .c_vendor = "Nexgen", | 44 | .c_vendor = "Nexgen", |
46 | .c_ident = { "NexGenDriven" }, | 45 | .c_ident = { "NexGenDriven" }, |
47 | .c_models = { | 46 | .c_models = { |
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c index f54a15268ed7..76aac088a323 100644 --- a/arch/i386/kernel/cpu/proc.c +++ b/arch/i386/kernel/cpu/proc.c | |||
@@ -46,8 +46,8 @@ static int show_cpuinfo(struct seq_file *m, void *v) | |||
46 | 46 | ||
47 | /* Intel-defined (#2) */ | 47 | /* Intel-defined (#2) */ |
48 | "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", | 48 | "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", |
49 | "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL, | 49 | "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL, |
50 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 50 | NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL, |
51 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, | 51 | NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, |
52 | 52 | ||
53 | /* VIA/Cyrix/Centaur-defined */ | 53 | /* VIA/Cyrix/Centaur-defined */ |
diff --git a/arch/i386/kernel/cpu/rise.c b/arch/i386/kernel/cpu/rise.c index d08d5a2811c8..9317f7414989 100644 --- a/arch/i386/kernel/cpu/rise.c +++ b/arch/i386/kernel/cpu/rise.c | |||
@@ -5,7 +5,7 @@ | |||
5 | 5 | ||
6 | #include "cpu.h" | 6 | #include "cpu.h" |
7 | 7 | ||
8 | static void __init init_rise(struct cpuinfo_x86 *c) | 8 | static void __cpuinit init_rise(struct cpuinfo_x86 *c) |
9 | { | 9 | { |
10 | printk("CPU: Rise iDragon"); | 10 | printk("CPU: Rise iDragon"); |
11 | if (c->x86_model > 2) | 11 | if (c->x86_model > 2) |
@@ -28,7 +28,7 @@ static void __init init_rise(struct cpuinfo_x86 *c) | |||
28 | set_bit(X86_FEATURE_CX8, c->x86_capability); | 28 | set_bit(X86_FEATURE_CX8, c->x86_capability); |
29 | } | 29 | } |
30 | 30 | ||
31 | static struct cpu_dev rise_cpu_dev __initdata = { | 31 | static struct cpu_dev rise_cpu_dev __cpuinitdata = { |
32 | .c_vendor = "Rise", | 32 | .c_vendor = "Rise", |
33 | .c_ident = { "RiseRiseRise" }, | 33 | .c_ident = { "RiseRiseRise" }, |
34 | .c_models = { | 34 | .c_models = { |
diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c index 7214c9b577ab..4056fb7d2cdf 100644 --- a/arch/i386/kernel/cpu/transmeta.c +++ b/arch/i386/kernel/cpu/transmeta.c | |||
@@ -5,7 +5,7 @@ | |||
5 | #include <asm/msr.h> | 5 | #include <asm/msr.h> |
6 | #include "cpu.h" | 6 | #include "cpu.h" |
7 | 7 | ||
8 | static void __init init_transmeta(struct cpuinfo_x86 *c) | 8 | static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) |
9 | { | 9 | { |
10 | unsigned int cap_mask, uk, max, dummy; | 10 | unsigned int cap_mask, uk, max, dummy; |
11 | unsigned int cms_rev1, cms_rev2; | 11 | unsigned int cms_rev1, cms_rev2; |
@@ -85,10 +85,9 @@ static void __init init_transmeta(struct cpuinfo_x86 *c) | |||
85 | #endif | 85 | #endif |
86 | } | 86 | } |
87 | 87 | ||
88 | static void __init transmeta_identify(struct cpuinfo_x86 * c) | 88 | static void __cpuinit transmeta_identify(struct cpuinfo_x86 * c) |
89 | { | 89 | { |
90 | u32 xlvl; | 90 | u32 xlvl; |
91 | generic_identify(c); | ||
92 | 91 | ||
93 | /* Transmeta-defined flags: level 0x80860001 */ | 92 | /* Transmeta-defined flags: level 0x80860001 */ |
94 | xlvl = cpuid_eax(0x80860000); | 93 | xlvl = cpuid_eax(0x80860000); |
@@ -98,7 +97,7 @@ static void __init transmeta_identify(struct cpuinfo_x86 * c) | |||
98 | } | 97 | } |
99 | } | 98 | } |
100 | 99 | ||
101 | static struct cpu_dev transmeta_cpu_dev __initdata = { | 100 | static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { |
102 | .c_vendor = "Transmeta", | 101 | .c_vendor = "Transmeta", |
103 | .c_ident = { "GenuineTMx86", "TransmetaCPU" }, | 102 | .c_ident = { "GenuineTMx86", "TransmetaCPU" }, |
104 | .c_init = init_transmeta, | 103 | .c_init = init_transmeta, |
diff --git a/arch/i386/kernel/cpu/umc.c b/arch/i386/kernel/cpu/umc.c index 2cd988f6dc55..1bf3f87e9c5b 100644 --- a/arch/i386/kernel/cpu/umc.c +++ b/arch/i386/kernel/cpu/umc.c | |||
@@ -5,12 +5,8 @@ | |||
5 | 5 | ||
6 | /* UMC chips appear to be only either 386 or 486, so no special init takes place. | 6 | /* UMC chips appear to be only either 386 or 486, so no special init takes place. |
7 | */ | 7 | */ |
8 | static void __init init_umc(struct cpuinfo_x86 * c) | ||
9 | { | ||
10 | |||
11 | } | ||
12 | 8 | ||
13 | static struct cpu_dev umc_cpu_dev __initdata = { | 9 | static struct cpu_dev umc_cpu_dev __cpuinitdata = { |
14 | .c_vendor = "UMC", | 10 | .c_vendor = "UMC", |
15 | .c_ident = { "UMC UMC UMC" }, | 11 | .c_ident = { "UMC UMC UMC" }, |
16 | .c_models = { | 12 | .c_models = { |
@@ -21,7 +17,6 @@ static struct cpu_dev umc_cpu_dev __initdata = { | |||
21 | } | 17 | } |
22 | }, | 18 | }, |
23 | }, | 19 | }, |
24 | .c_init = init_umc, | ||
25 | }; | 20 | }; |
26 | 21 | ||
27 | int __init umc_init_cpu(void) | 22 | int __init umc_init_cpu(void) |
diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c index 5b96f038367f..67d297dc1003 100644 --- a/arch/i386/kernel/crash.c +++ b/arch/i386/kernel/crash.c | |||
@@ -22,6 +22,8 @@ | |||
22 | #include <asm/nmi.h> | 22 | #include <asm/nmi.h> |
23 | #include <asm/hw_irq.h> | 23 | #include <asm/hw_irq.h> |
24 | #include <asm/apic.h> | 24 | #include <asm/apic.h> |
25 | #include <asm/kdebug.h> | ||
26 | |||
25 | #include <mach_ipi.h> | 27 | #include <mach_ipi.h> |
26 | 28 | ||
27 | 29 | ||
@@ -93,16 +95,25 @@ static void crash_save_self(struct pt_regs *regs) | |||
93 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) | 95 | #if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) |
94 | static atomic_t waiting_for_crash_ipi; | 96 | static atomic_t waiting_for_crash_ipi; |
95 | 97 | ||
96 | static int crash_nmi_callback(struct pt_regs *regs, int cpu) | 98 | static int crash_nmi_callback(struct notifier_block *self, |
99 | unsigned long val, void *data) | ||
97 | { | 100 | { |
101 | struct pt_regs *regs; | ||
98 | struct pt_regs fixed_regs; | 102 | struct pt_regs fixed_regs; |
103 | int cpu; | ||
104 | |||
105 | if (val != DIE_NMI_IPI) | ||
106 | return NOTIFY_OK; | ||
107 | |||
108 | regs = ((struct die_args *)data)->regs; | ||
109 | cpu = raw_smp_processor_id(); | ||
99 | 110 | ||
100 | /* Don't do anything if this handler is invoked on crashing cpu. | 111 | /* Don't do anything if this handler is invoked on crashing cpu. |
101 | * Otherwise, system will completely hang. Crashing cpu can get | 112 | * Otherwise, system will completely hang. Crashing cpu can get |
102 | * an NMI if system was initially booted with nmi_watchdog parameter. | 113 | * an NMI if system was initially booted with nmi_watchdog parameter. |
103 | */ | 114 | */ |
104 | if (cpu == crashing_cpu) | 115 | if (cpu == crashing_cpu) |
105 | return 1; | 116 | return NOTIFY_STOP; |
106 | local_irq_disable(); | 117 | local_irq_disable(); |
107 | 118 | ||
108 | if (!user_mode_vm(regs)) { | 119 | if (!user_mode_vm(regs)) { |
@@ -125,13 +136,18 @@ static void smp_send_nmi_allbutself(void) | |||
125 | send_IPI_allbutself(NMI_VECTOR); | 136 | send_IPI_allbutself(NMI_VECTOR); |
126 | } | 137 | } |
127 | 138 | ||
139 | static struct notifier_block crash_nmi_nb = { | ||
140 | .notifier_call = crash_nmi_callback, | ||
141 | }; | ||
142 | |||
128 | static void nmi_shootdown_cpus(void) | 143 | static void nmi_shootdown_cpus(void) |
129 | { | 144 | { |
130 | unsigned long msecs; | 145 | unsigned long msecs; |
131 | 146 | ||
132 | atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); | 147 | atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); |
133 | /* Would it be better to replace the trap vector here? */ | 148 | /* Would it be better to replace the trap vector here? */ |
134 | set_nmi_callback(crash_nmi_callback); | 149 | if (register_die_notifier(&crash_nmi_nb)) |
150 | return; /* return what? */ | ||
135 | /* Ensure the new callback function is set before sending | 151 | /* Ensure the new callback function is set before sending |
136 | * out the NMI | 152 | * out the NMI |
137 | */ | 153 | */ |
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S index 87f9f60b803b..5a63d6fdb70e 100644 --- a/arch/i386/kernel/entry.S +++ b/arch/i386/kernel/entry.S | |||
@@ -76,8 +76,15 @@ DF_MASK = 0x00000400 | |||
76 | NT_MASK = 0x00004000 | 76 | NT_MASK = 0x00004000 |
77 | VM_MASK = 0x00020000 | 77 | VM_MASK = 0x00020000 |
78 | 78 | ||
79 | /* These are replaces for paravirtualization */ | ||
80 | #define DISABLE_INTERRUPTS cli | ||
81 | #define ENABLE_INTERRUPTS sti | ||
82 | #define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit | ||
83 | #define INTERRUPT_RETURN iret | ||
84 | #define GET_CR0_INTO_EAX movl %cr0, %eax | ||
85 | |||
79 | #ifdef CONFIG_PREEMPT | 86 | #ifdef CONFIG_PREEMPT |
80 | #define preempt_stop cli; TRACE_IRQS_OFF | 87 | #define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF |
81 | #else | 88 | #else |
82 | #define preempt_stop | 89 | #define preempt_stop |
83 | #define resume_kernel restore_nocheck | 90 | #define resume_kernel restore_nocheck |
@@ -176,18 +183,21 @@ VM_MASK = 0x00020000 | |||
176 | 183 | ||
177 | #define RING0_INT_FRAME \ | 184 | #define RING0_INT_FRAME \ |
178 | CFI_STARTPROC simple;\ | 185 | CFI_STARTPROC simple;\ |
186 | CFI_SIGNAL_FRAME;\ | ||
179 | CFI_DEF_CFA esp, 3*4;\ | 187 | CFI_DEF_CFA esp, 3*4;\ |
180 | /*CFI_OFFSET cs, -2*4;*/\ | 188 | /*CFI_OFFSET cs, -2*4;*/\ |
181 | CFI_OFFSET eip, -3*4 | 189 | CFI_OFFSET eip, -3*4 |
182 | 190 | ||
183 | #define RING0_EC_FRAME \ | 191 | #define RING0_EC_FRAME \ |
184 | CFI_STARTPROC simple;\ | 192 | CFI_STARTPROC simple;\ |
193 | CFI_SIGNAL_FRAME;\ | ||
185 | CFI_DEF_CFA esp, 4*4;\ | 194 | CFI_DEF_CFA esp, 4*4;\ |
186 | /*CFI_OFFSET cs, -2*4;*/\ | 195 | /*CFI_OFFSET cs, -2*4;*/\ |
187 | CFI_OFFSET eip, -3*4 | 196 | CFI_OFFSET eip, -3*4 |
188 | 197 | ||
189 | #define RING0_PTREGS_FRAME \ | 198 | #define RING0_PTREGS_FRAME \ |
190 | CFI_STARTPROC simple;\ | 199 | CFI_STARTPROC simple;\ |
200 | CFI_SIGNAL_FRAME;\ | ||
191 | CFI_DEF_CFA esp, OLDESP-EBX;\ | 201 | CFI_DEF_CFA esp, OLDESP-EBX;\ |
192 | /*CFI_OFFSET cs, CS-OLDESP;*/\ | 202 | /*CFI_OFFSET cs, CS-OLDESP;*/\ |
193 | CFI_OFFSET eip, EIP-OLDESP;\ | 203 | CFI_OFFSET eip, EIP-OLDESP;\ |
@@ -233,10 +243,11 @@ ret_from_intr: | |||
233 | check_userspace: | 243 | check_userspace: |
234 | movl EFLAGS(%esp), %eax # mix EFLAGS and CS | 244 | movl EFLAGS(%esp), %eax # mix EFLAGS and CS |
235 | movb CS(%esp), %al | 245 | movb CS(%esp), %al |
236 | testl $(VM_MASK | 3), %eax | 246 | andl $(VM_MASK | SEGMENT_RPL_MASK), %eax |
237 | jz resume_kernel | 247 | cmpl $USER_RPL, %eax |
248 | jb resume_kernel # not returning to v8086 or userspace | ||
238 | ENTRY(resume_userspace) | 249 | ENTRY(resume_userspace) |
239 | cli # make sure we don't miss an interrupt | 250 | DISABLE_INTERRUPTS # make sure we don't miss an interrupt |
240 | # setting need_resched or sigpending | 251 | # setting need_resched or sigpending |
241 | # between sampling and the iret | 252 | # between sampling and the iret |
242 | movl TI_flags(%ebp), %ecx | 253 | movl TI_flags(%ebp), %ecx |
@@ -247,7 +258,7 @@ ENTRY(resume_userspace) | |||
247 | 258 | ||
248 | #ifdef CONFIG_PREEMPT | 259 | #ifdef CONFIG_PREEMPT |
249 | ENTRY(resume_kernel) | 260 | ENTRY(resume_kernel) |
250 | cli | 261 | DISABLE_INTERRUPTS |
251 | cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? | 262 | cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? |
252 | jnz restore_nocheck | 263 | jnz restore_nocheck |
253 | need_resched: | 264 | need_resched: |
@@ -267,6 +278,7 @@ need_resched: | |||
267 | # sysenter call handler stub | 278 | # sysenter call handler stub |
268 | ENTRY(sysenter_entry) | 279 | ENTRY(sysenter_entry) |
269 | CFI_STARTPROC simple | 280 | CFI_STARTPROC simple |
281 | CFI_SIGNAL_FRAME | ||
270 | CFI_DEF_CFA esp, 0 | 282 | CFI_DEF_CFA esp, 0 |
271 | CFI_REGISTER esp, ebp | 283 | CFI_REGISTER esp, ebp |
272 | movl TSS_sysenter_esp0(%esp),%esp | 284 | movl TSS_sysenter_esp0(%esp),%esp |
@@ -275,7 +287,7 @@ sysenter_past_esp: | |||
275 | * No need to follow this irqs on/off section: the syscall | 287 | * No need to follow this irqs on/off section: the syscall |
276 | * disabled irqs and here we enable it straight after entry: | 288 | * disabled irqs and here we enable it straight after entry: |
277 | */ | 289 | */ |
278 | sti | 290 | ENABLE_INTERRUPTS |
279 | pushl $(__USER_DS) | 291 | pushl $(__USER_DS) |
280 | CFI_ADJUST_CFA_OFFSET 4 | 292 | CFI_ADJUST_CFA_OFFSET 4 |
281 | /*CFI_REL_OFFSET ss, 0*/ | 293 | /*CFI_REL_OFFSET ss, 0*/ |
@@ -320,7 +332,7 @@ sysenter_past_esp: | |||
320 | jae syscall_badsys | 332 | jae syscall_badsys |
321 | call *sys_call_table(,%eax,4) | 333 | call *sys_call_table(,%eax,4) |
322 | movl %eax,EAX(%esp) | 334 | movl %eax,EAX(%esp) |
323 | cli | 335 | DISABLE_INTERRUPTS |
324 | TRACE_IRQS_OFF | 336 | TRACE_IRQS_OFF |
325 | movl TI_flags(%ebp), %ecx | 337 | movl TI_flags(%ebp), %ecx |
326 | testw $_TIF_ALLWORK_MASK, %cx | 338 | testw $_TIF_ALLWORK_MASK, %cx |
@@ -330,8 +342,7 @@ sysenter_past_esp: | |||
330 | movl OLDESP(%esp), %ecx | 342 | movl OLDESP(%esp), %ecx |
331 | xorl %ebp,%ebp | 343 | xorl %ebp,%ebp |
332 | TRACE_IRQS_ON | 344 | TRACE_IRQS_ON |
333 | sti | 345 | ENABLE_INTERRUPTS_SYSEXIT |
334 | sysexit | ||
335 | CFI_ENDPROC | 346 | CFI_ENDPROC |
336 | 347 | ||
337 | 348 | ||
@@ -356,7 +367,7 @@ syscall_call: | |||
356 | call *sys_call_table(,%eax,4) | 367 | call *sys_call_table(,%eax,4) |
357 | movl %eax,EAX(%esp) # store the return value | 368 | movl %eax,EAX(%esp) # store the return value |
358 | syscall_exit: | 369 | syscall_exit: |
359 | cli # make sure we don't miss an interrupt | 370 | DISABLE_INTERRUPTS # make sure we don't miss an interrupt |
360 | # setting need_resched or sigpending | 371 | # setting need_resched or sigpending |
361 | # between sampling and the iret | 372 | # between sampling and the iret |
362 | TRACE_IRQS_OFF | 373 | TRACE_IRQS_OFF |
@@ -371,8 +382,8 @@ restore_all: | |||
371 | # See comments in process.c:copy_thread() for details. | 382 | # See comments in process.c:copy_thread() for details. |
372 | movb OLDSS(%esp), %ah | 383 | movb OLDSS(%esp), %ah |
373 | movb CS(%esp), %al | 384 | movb CS(%esp), %al |
374 | andl $(VM_MASK | (4 << 8) | 3), %eax | 385 | andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax |
375 | cmpl $((4 << 8) | 3), %eax | 386 | cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax |
376 | CFI_REMEMBER_STATE | 387 | CFI_REMEMBER_STATE |
377 | je ldt_ss # returning to user-space with LDT SS | 388 | je ldt_ss # returning to user-space with LDT SS |
378 | restore_nocheck: | 389 | restore_nocheck: |
@@ -381,11 +392,11 @@ restore_nocheck_notrace: | |||
381 | RESTORE_REGS | 392 | RESTORE_REGS |
382 | addl $4, %esp | 393 | addl $4, %esp |
383 | CFI_ADJUST_CFA_OFFSET -4 | 394 | CFI_ADJUST_CFA_OFFSET -4 |
384 | 1: iret | 395 | 1: INTERRUPT_RETURN |
385 | .section .fixup,"ax" | 396 | .section .fixup,"ax" |
386 | iret_exc: | 397 | iret_exc: |
387 | TRACE_IRQS_ON | 398 | TRACE_IRQS_ON |
388 | sti | 399 | ENABLE_INTERRUPTS |
389 | pushl $0 # no error code | 400 | pushl $0 # no error code |
390 | pushl $do_iret_error | 401 | pushl $do_iret_error |
391 | jmp error_code | 402 | jmp error_code |
@@ -409,7 +420,7 @@ ldt_ss: | |||
409 | * dosemu and wine happy. */ | 420 | * dosemu and wine happy. */ |
410 | subl $8, %esp # reserve space for switch16 pointer | 421 | subl $8, %esp # reserve space for switch16 pointer |
411 | CFI_ADJUST_CFA_OFFSET 8 | 422 | CFI_ADJUST_CFA_OFFSET 8 |
412 | cli | 423 | DISABLE_INTERRUPTS |
413 | TRACE_IRQS_OFF | 424 | TRACE_IRQS_OFF |
414 | movl %esp, %eax | 425 | movl %esp, %eax |
415 | /* Set up the 16bit stack frame with switch32 pointer on top, | 426 | /* Set up the 16bit stack frame with switch32 pointer on top, |
@@ -419,7 +430,7 @@ ldt_ss: | |||
419 | TRACE_IRQS_IRET | 430 | TRACE_IRQS_IRET |
420 | RESTORE_REGS | 431 | RESTORE_REGS |
421 | lss 20+4(%esp), %esp # switch to 16bit stack | 432 | lss 20+4(%esp), %esp # switch to 16bit stack |
422 | 1: iret | 433 | 1: INTERRUPT_RETURN |
423 | .section __ex_table,"a" | 434 | .section __ex_table,"a" |
424 | .align 4 | 435 | .align 4 |
425 | .long 1b,iret_exc | 436 | .long 1b,iret_exc |
@@ -434,7 +445,7 @@ work_pending: | |||
434 | jz work_notifysig | 445 | jz work_notifysig |
435 | work_resched: | 446 | work_resched: |
436 | call schedule | 447 | call schedule |
437 | cli # make sure we don't miss an interrupt | 448 | DISABLE_INTERRUPTS # make sure we don't miss an interrupt |
438 | # setting need_resched or sigpending | 449 | # setting need_resched or sigpending |
439 | # between sampling and the iret | 450 | # between sampling and the iret |
440 | TRACE_IRQS_OFF | 451 | TRACE_IRQS_OFF |
@@ -490,7 +501,7 @@ syscall_exit_work: | |||
490 | testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl | 501 | testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl |
491 | jz work_pending | 502 | jz work_pending |
492 | TRACE_IRQS_ON | 503 | TRACE_IRQS_ON |
493 | sti # could let do_syscall_trace() call | 504 | ENABLE_INTERRUPTS # could let do_syscall_trace() call |
494 | # schedule() instead | 505 | # schedule() instead |
495 | movl %esp, %eax | 506 | movl %esp, %eax |
496 | movl $1, %edx | 507 | movl $1, %edx |
@@ -591,11 +602,9 @@ ENTRY(name) \ | |||
591 | /* The include is where all of the SMP etc. interrupts come from */ | 602 | /* The include is where all of the SMP etc. interrupts come from */ |
592 | #include "entry_arch.h" | 603 | #include "entry_arch.h" |
593 | 604 | ||
594 | ENTRY(divide_error) | 605 | KPROBE_ENTRY(page_fault) |
595 | RING0_INT_FRAME | 606 | RING0_EC_FRAME |
596 | pushl $0 # no error code | 607 | pushl $do_page_fault |
597 | CFI_ADJUST_CFA_OFFSET 4 | ||
598 | pushl $do_divide_error | ||
599 | CFI_ADJUST_CFA_OFFSET 4 | 608 | CFI_ADJUST_CFA_OFFSET 4 |
600 | ALIGN | 609 | ALIGN |
601 | error_code: | 610 | error_code: |
@@ -645,6 +654,7 @@ error_code: | |||
645 | call *%edi | 654 | call *%edi |
646 | jmp ret_from_exception | 655 | jmp ret_from_exception |
647 | CFI_ENDPROC | 656 | CFI_ENDPROC |
657 | KPROBE_END(page_fault) | ||
648 | 658 | ||
649 | ENTRY(coprocessor_error) | 659 | ENTRY(coprocessor_error) |
650 | RING0_INT_FRAME | 660 | RING0_INT_FRAME |
@@ -669,7 +679,7 @@ ENTRY(device_not_available) | |||
669 | pushl $-1 # mark this as an int | 679 | pushl $-1 # mark this as an int |
670 | CFI_ADJUST_CFA_OFFSET 4 | 680 | CFI_ADJUST_CFA_OFFSET 4 |
671 | SAVE_ALL | 681 | SAVE_ALL |
672 | movl %cr0, %eax | 682 | GET_CR0_INTO_EAX |
673 | testl $0x4, %eax # EM (math emulation bit) | 683 | testl $0x4, %eax # EM (math emulation bit) |
674 | jne device_not_available_emulate | 684 | jne device_not_available_emulate |
675 | preempt_stop | 685 | preempt_stop |
@@ -702,9 +712,15 @@ device_not_available_emulate: | |||
702 | jne ok; \ | 712 | jne ok; \ |
703 | label: \ | 713 | label: \ |
704 | movl TSS_sysenter_esp0+offset(%esp),%esp; \ | 714 | movl TSS_sysenter_esp0+offset(%esp),%esp; \ |
715 | CFI_DEF_CFA esp, 0; \ | ||
716 | CFI_UNDEFINED eip; \ | ||
705 | pushfl; \ | 717 | pushfl; \ |
718 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
706 | pushl $__KERNEL_CS; \ | 719 | pushl $__KERNEL_CS; \ |
707 | pushl $sysenter_past_esp | 720 | CFI_ADJUST_CFA_OFFSET 4; \ |
721 | pushl $sysenter_past_esp; \ | ||
722 | CFI_ADJUST_CFA_OFFSET 4; \ | ||
723 | CFI_REL_OFFSET eip, 0 | ||
708 | 724 | ||
709 | KPROBE_ENTRY(debug) | 725 | KPROBE_ENTRY(debug) |
710 | RING0_INT_FRAME | 726 | RING0_INT_FRAME |
@@ -720,7 +736,8 @@ debug_stack_correct: | |||
720 | call do_debug | 736 | call do_debug |
721 | jmp ret_from_exception | 737 | jmp ret_from_exception |
722 | CFI_ENDPROC | 738 | CFI_ENDPROC |
723 | .previous .text | 739 | KPROBE_END(debug) |
740 | |||
724 | /* | 741 | /* |
725 | * NMI is doubly nasty. It can happen _while_ we're handling | 742 | * NMI is doubly nasty. It can happen _while_ we're handling |
726 | * a debug fault, and the debug fault hasn't yet been able to | 743 | * a debug fault, and the debug fault hasn't yet been able to |
@@ -729,7 +746,7 @@ debug_stack_correct: | |||
729 | * check whether we got an NMI on the debug path where the debug | 746 | * check whether we got an NMI on the debug path where the debug |
730 | * fault happened on the sysenter path. | 747 | * fault happened on the sysenter path. |
731 | */ | 748 | */ |
732 | ENTRY(nmi) | 749 | KPROBE_ENTRY(nmi) |
733 | RING0_INT_FRAME | 750 | RING0_INT_FRAME |
734 | pushl %eax | 751 | pushl %eax |
735 | CFI_ADJUST_CFA_OFFSET 4 | 752 | CFI_ADJUST_CFA_OFFSET 4 |
@@ -754,6 +771,7 @@ ENTRY(nmi) | |||
754 | cmpl $sysenter_entry,12(%esp) | 771 | cmpl $sysenter_entry,12(%esp) |
755 | je nmi_debug_stack_check | 772 | je nmi_debug_stack_check |
756 | nmi_stack_correct: | 773 | nmi_stack_correct: |
774 | /* We have a RING0_INT_FRAME here */ | ||
757 | pushl %eax | 775 | pushl %eax |
758 | CFI_ADJUST_CFA_OFFSET 4 | 776 | CFI_ADJUST_CFA_OFFSET 4 |
759 | SAVE_ALL | 777 | SAVE_ALL |
@@ -764,9 +782,12 @@ nmi_stack_correct: | |||
764 | CFI_ENDPROC | 782 | CFI_ENDPROC |
765 | 783 | ||
766 | nmi_stack_fixup: | 784 | nmi_stack_fixup: |
785 | RING0_INT_FRAME | ||
767 | FIX_STACK(12,nmi_stack_correct, 1) | 786 | FIX_STACK(12,nmi_stack_correct, 1) |
768 | jmp nmi_stack_correct | 787 | jmp nmi_stack_correct |
788 | |||
769 | nmi_debug_stack_check: | 789 | nmi_debug_stack_check: |
790 | /* We have a RING0_INT_FRAME here */ | ||
770 | cmpw $__KERNEL_CS,16(%esp) | 791 | cmpw $__KERNEL_CS,16(%esp) |
771 | jne nmi_stack_correct | 792 | jne nmi_stack_correct |
772 | cmpl $debug,(%esp) | 793 | cmpl $debug,(%esp) |
@@ -777,8 +798,10 @@ nmi_debug_stack_check: | |||
777 | jmp nmi_stack_correct | 798 | jmp nmi_stack_correct |
778 | 799 | ||
779 | nmi_16bit_stack: | 800 | nmi_16bit_stack: |
780 | RING0_INT_FRAME | 801 | /* We have a RING0_INT_FRAME here. |
781 | /* create the pointer to lss back */ | 802 | * |
803 | * create the pointer to lss back | ||
804 | */ | ||
782 | pushl %ss | 805 | pushl %ss |
783 | CFI_ADJUST_CFA_OFFSET 4 | 806 | CFI_ADJUST_CFA_OFFSET 4 |
784 | pushl %esp | 807 | pushl %esp |
@@ -799,12 +822,13 @@ nmi_16bit_stack: | |||
799 | call do_nmi | 822 | call do_nmi |
800 | RESTORE_REGS | 823 | RESTORE_REGS |
801 | lss 12+4(%esp), %esp # back to 16bit stack | 824 | lss 12+4(%esp), %esp # back to 16bit stack |
802 | 1: iret | 825 | 1: INTERRUPT_RETURN |
803 | CFI_ENDPROC | 826 | CFI_ENDPROC |
804 | .section __ex_table,"a" | 827 | .section __ex_table,"a" |
805 | .align 4 | 828 | .align 4 |
806 | .long 1b,iret_exc | 829 | .long 1b,iret_exc |
807 | .previous | 830 | .previous |
831 | KPROBE_END(nmi) | ||
808 | 832 | ||
809 | KPROBE_ENTRY(int3) | 833 | KPROBE_ENTRY(int3) |
810 | RING0_INT_FRAME | 834 | RING0_INT_FRAME |
@@ -816,7 +840,7 @@ KPROBE_ENTRY(int3) | |||
816 | call do_int3 | 840 | call do_int3 |
817 | jmp ret_from_exception | 841 | jmp ret_from_exception |
818 | CFI_ENDPROC | 842 | CFI_ENDPROC |
819 | .previous .text | 843 | KPROBE_END(int3) |
820 | 844 | ||
821 | ENTRY(overflow) | 845 | ENTRY(overflow) |
822 | RING0_INT_FRAME | 846 | RING0_INT_FRAME |
@@ -881,7 +905,7 @@ KPROBE_ENTRY(general_protection) | |||
881 | CFI_ADJUST_CFA_OFFSET 4 | 905 | CFI_ADJUST_CFA_OFFSET 4 |
882 | jmp error_code | 906 | jmp error_code |
883 | CFI_ENDPROC | 907 | CFI_ENDPROC |
884 | .previous .text | 908 | KPROBE_END(general_protection) |
885 | 909 | ||
886 | ENTRY(alignment_check) | 910 | ENTRY(alignment_check) |
887 | RING0_EC_FRAME | 911 | RING0_EC_FRAME |
@@ -890,13 +914,14 @@ ENTRY(alignment_check) | |||
890 | jmp error_code | 914 | jmp error_code |
891 | CFI_ENDPROC | 915 | CFI_ENDPROC |
892 | 916 | ||
893 | KPROBE_ENTRY(page_fault) | 917 | ENTRY(divide_error) |
894 | RING0_EC_FRAME | 918 | RING0_INT_FRAME |
895 | pushl $do_page_fault | 919 | pushl $0 # no error code |
920 | CFI_ADJUST_CFA_OFFSET 4 | ||
921 | pushl $do_divide_error | ||
896 | CFI_ADJUST_CFA_OFFSET 4 | 922 | CFI_ADJUST_CFA_OFFSET 4 |
897 | jmp error_code | 923 | jmp error_code |
898 | CFI_ENDPROC | 924 | CFI_ENDPROC |
899 | .previous .text | ||
900 | 925 | ||
901 | #ifdef CONFIG_X86_MCE | 926 | #ifdef CONFIG_X86_MCE |
902 | ENTRY(machine_check) | 927 | ENTRY(machine_check) |
@@ -949,6 +974,19 @@ ENTRY(arch_unwind_init_running) | |||
949 | ENDPROC(arch_unwind_init_running) | 974 | ENDPROC(arch_unwind_init_running) |
950 | #endif | 975 | #endif |
951 | 976 | ||
977 | ENTRY(kernel_thread_helper) | ||
978 | pushl $0 # fake return address for unwinder | ||
979 | CFI_STARTPROC | ||
980 | movl %edx,%eax | ||
981 | push %edx | ||
982 | CFI_ADJUST_CFA_OFFSET 4 | ||
983 | call *%ebx | ||
984 | push %eax | ||
985 | CFI_ADJUST_CFA_OFFSET 4 | ||
986 | call do_exit | ||
987 | CFI_ENDPROC | ||
988 | ENDPROC(kernel_thread_helper) | ||
989 | |||
952 | .section .rodata,"a" | 990 | .section .rodata,"a" |
953 | #include "syscall_table.S" | 991 | #include "syscall_table.S" |
954 | 992 | ||
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index a6b8bd89aa27..be9d883c62ce 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S | |||
@@ -371,8 +371,65 @@ rp_sidt: | |||
371 | addl $8,%edi | 371 | addl $8,%edi |
372 | dec %ecx | 372 | dec %ecx |
373 | jne rp_sidt | 373 | jne rp_sidt |
374 | |||
375 | .macro set_early_handler handler,trapno | ||
376 | lea \handler,%edx | ||
377 | movl $(__KERNEL_CS << 16),%eax | ||
378 | movw %dx,%ax | ||
379 | movw $0x8E00,%dx /* interrupt gate - dpl=0, present */ | ||
380 | lea idt_table,%edi | ||
381 | movl %eax,8*\trapno(%edi) | ||
382 | movl %edx,8*\trapno+4(%edi) | ||
383 | .endm | ||
384 | |||
385 | set_early_handler handler=early_divide_err,trapno=0 | ||
386 | set_early_handler handler=early_illegal_opcode,trapno=6 | ||
387 | set_early_handler handler=early_protection_fault,trapno=13 | ||
388 | set_early_handler handler=early_page_fault,trapno=14 | ||
389 | |||
374 | ret | 390 | ret |
375 | 391 | ||
392 | early_divide_err: | ||
393 | xor %edx,%edx | ||
394 | pushl $0 /* fake errcode */ | ||
395 | jmp early_fault | ||
396 | |||
397 | early_illegal_opcode: | ||
398 | movl $6,%edx | ||
399 | pushl $0 /* fake errcode */ | ||
400 | jmp early_fault | ||
401 | |||
402 | early_protection_fault: | ||
403 | movl $13,%edx | ||
404 | jmp early_fault | ||
405 | |||
406 | early_page_fault: | ||
407 | movl $14,%edx | ||
408 | jmp early_fault | ||
409 | |||
410 | early_fault: | ||
411 | cld | ||
412 | #ifdef CONFIG_PRINTK | ||
413 | movl $(__KERNEL_DS),%eax | ||
414 | movl %eax,%ds | ||
415 | movl %eax,%es | ||
416 | cmpl $2,early_recursion_flag | ||
417 | je hlt_loop | ||
418 | incl early_recursion_flag | ||
419 | movl %cr2,%eax | ||
420 | pushl %eax | ||
421 | pushl %edx /* trapno */ | ||
422 | pushl $fault_msg | ||
423 | #ifdef CONFIG_EARLY_PRINTK | ||
424 | call early_printk | ||
425 | #else | ||
426 | call printk | ||
427 | #endif | ||
428 | #endif | ||
429 | hlt_loop: | ||
430 | hlt | ||
431 | jmp hlt_loop | ||
432 | |||
376 | /* This is the default interrupt "handler" :-) */ | 433 | /* This is the default interrupt "handler" :-) */ |
377 | ALIGN | 434 | ALIGN |
378 | ignore_int: | 435 | ignore_int: |
@@ -386,6 +443,9 @@ ignore_int: | |||
386 | movl $(__KERNEL_DS),%eax | 443 | movl $(__KERNEL_DS),%eax |
387 | movl %eax,%ds | 444 | movl %eax,%ds |
388 | movl %eax,%es | 445 | movl %eax,%es |
446 | cmpl $2,early_recursion_flag | ||
447 | je hlt_loop | ||
448 | incl early_recursion_flag | ||
389 | pushl 16(%esp) | 449 | pushl 16(%esp) |
390 | pushl 24(%esp) | 450 | pushl 24(%esp) |
391 | pushl 32(%esp) | 451 | pushl 32(%esp) |
@@ -431,9 +491,16 @@ ENTRY(stack_start) | |||
431 | 491 | ||
432 | ready: .byte 0 | 492 | ready: .byte 0 |
433 | 493 | ||
494 | early_recursion_flag: | ||
495 | .long 0 | ||
496 | |||
434 | int_msg: | 497 | int_msg: |
435 | .asciz "Unknown interrupt or fault at EIP %p %p %p\n" | 498 | .asciz "Unknown interrupt or fault at EIP %p %p %p\n" |
436 | 499 | ||
500 | fault_msg: | ||
501 | .ascii "Int %d: CR2 %p err %p EIP %p CS %p flags %p\n" | ||
502 | .asciz "Stack: %p %p %p %p %p %p %p %p\n" | ||
503 | |||
437 | /* | 504 | /* |
438 | * The IDT and GDT 'descriptors' are a strange 48-bit object | 505 | * The IDT and GDT 'descriptors' are a strange 48-bit object |
439 | * only used by the lidt and lgdt instructions. They are not | 506 | * only used by the lidt and lgdt instructions. They are not |
diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c index d4756d154f47..ea5f4e7958d8 100644 --- a/arch/i386/kernel/i8259.c +++ b/arch/i386/kernel/i8259.c | |||
@@ -45,6 +45,8 @@ static void end_8259A_irq (unsigned int irq) | |||
45 | 45 | ||
46 | #define shutdown_8259A_irq disable_8259A_irq | 46 | #define shutdown_8259A_irq disable_8259A_irq |
47 | 47 | ||
48 | static int i8259A_auto_eoi; | ||
49 | |||
48 | static void mask_and_ack_8259A(unsigned int); | 50 | static void mask_and_ack_8259A(unsigned int); |
49 | 51 | ||
50 | unsigned int startup_8259A_irq(unsigned int irq) | 52 | unsigned int startup_8259A_irq(unsigned int irq) |
@@ -253,7 +255,7 @@ static void save_ELCR(char *trigger) | |||
253 | 255 | ||
254 | static int i8259A_resume(struct sys_device *dev) | 256 | static int i8259A_resume(struct sys_device *dev) |
255 | { | 257 | { |
256 | init_8259A(0); | 258 | init_8259A(i8259A_auto_eoi); |
257 | restore_ELCR(irq_trigger); | 259 | restore_ELCR(irq_trigger); |
258 | return 0; | 260 | return 0; |
259 | } | 261 | } |
@@ -301,6 +303,8 @@ void init_8259A(int auto_eoi) | |||
301 | { | 303 | { |
302 | unsigned long flags; | 304 | unsigned long flags; |
303 | 305 | ||
306 | i8259A_auto_eoi = auto_eoi; | ||
307 | |||
304 | spin_lock_irqsave(&i8259A_lock, flags); | 308 | spin_lock_irqsave(&i8259A_lock, flags); |
305 | 309 | ||
306 | outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ | 310 | outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ |
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c index 4fb32c551fe0..fd0df75cfbda 100644 --- a/arch/i386/kernel/io_apic.c +++ b/arch/i386/kernel/io_apic.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include <asm/nmi.h> | 40 | #include <asm/nmi.h> |
41 | 41 | ||
42 | #include <mach_apic.h> | 42 | #include <mach_apic.h> |
43 | #include <mach_apicdef.h> | ||
43 | 44 | ||
44 | #include "io_ports.h" | 45 | #include "io_ports.h" |
45 | 46 | ||
@@ -65,7 +66,7 @@ int sis_apic_bug = -1; | |||
65 | */ | 66 | */ |
66 | int nr_ioapic_registers[MAX_IO_APICS]; | 67 | int nr_ioapic_registers[MAX_IO_APICS]; |
67 | 68 | ||
68 | int disable_timer_pin_1 __initdata; | 69 | static int disable_timer_pin_1 __initdata; |
69 | 70 | ||
70 | /* | 71 | /* |
71 | * Rough estimation of how many shared IRQs there are, can | 72 | * Rough estimation of how many shared IRQs there are, can |
@@ -93,6 +94,34 @@ int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1}; | |||
93 | #define vector_to_irq(vector) (vector) | 94 | #define vector_to_irq(vector) (vector) |
94 | #endif | 95 | #endif |
95 | 96 | ||
97 | |||
98 | union entry_union { | ||
99 | struct { u32 w1, w2; }; | ||
100 | struct IO_APIC_route_entry entry; | ||
101 | }; | ||
102 | |||
103 | static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin) | ||
104 | { | ||
105 | union entry_union eu; | ||
106 | unsigned long flags; | ||
107 | spin_lock_irqsave(&ioapic_lock, flags); | ||
108 | eu.w1 = io_apic_read(apic, 0x10 + 2 * pin); | ||
109 | eu.w2 = io_apic_read(apic, 0x11 + 2 * pin); | ||
110 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
111 | return eu.entry; | ||
112 | } | ||
113 | |||
114 | static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) | ||
115 | { | ||
116 | unsigned long flags; | ||
117 | union entry_union eu; | ||
118 | eu.entry = e; | ||
119 | spin_lock_irqsave(&ioapic_lock, flags); | ||
120 | io_apic_write(apic, 0x10 + 2*pin, eu.w1); | ||
121 | io_apic_write(apic, 0x11 + 2*pin, eu.w2); | ||
122 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
123 | } | ||
124 | |||
96 | /* | 125 | /* |
97 | * The common case is 1:1 IRQ<->pin mappings. Sometimes there are | 126 | * The common case is 1:1 IRQ<->pin mappings. Sometimes there are |
98 | * shared ISA-space IRQs, so we have to support them. We are super | 127 | * shared ISA-space IRQs, so we have to support them. We are super |
@@ -200,13 +229,9 @@ static void unmask_IO_APIC_irq (unsigned int irq) | |||
200 | static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) | 229 | static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) |
201 | { | 230 | { |
202 | struct IO_APIC_route_entry entry; | 231 | struct IO_APIC_route_entry entry; |
203 | unsigned long flags; | ||
204 | 232 | ||
205 | /* Check delivery_mode to be sure we're not clearing an SMI pin */ | 233 | /* Check delivery_mode to be sure we're not clearing an SMI pin */ |
206 | spin_lock_irqsave(&ioapic_lock, flags); | 234 | entry = ioapic_read_entry(apic, pin); |
207 | *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); | ||
208 | *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); | ||
209 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
210 | if (entry.delivery_mode == dest_SMI) | 235 | if (entry.delivery_mode == dest_SMI) |
211 | return; | 236 | return; |
212 | 237 | ||
@@ -215,10 +240,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) | |||
215 | */ | 240 | */ |
216 | memset(&entry, 0, sizeof(entry)); | 241 | memset(&entry, 0, sizeof(entry)); |
217 | entry.mask = 1; | 242 | entry.mask = 1; |
218 | spin_lock_irqsave(&ioapic_lock, flags); | 243 | ioapic_write_entry(apic, pin, entry); |
219 | io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0)); | ||
220 | io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1)); | ||
221 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
222 | } | 244 | } |
223 | 245 | ||
224 | static void clear_IO_APIC (void) | 246 | static void clear_IO_APIC (void) |
@@ -1283,9 +1305,8 @@ static void __init setup_IO_APIC_irqs(void) | |||
1283 | if (!apic && (irq < 16)) | 1305 | if (!apic && (irq < 16)) |
1284 | disable_8259A_irq(irq); | 1306 | disable_8259A_irq(irq); |
1285 | } | 1307 | } |
1308 | ioapic_write_entry(apic, pin, entry); | ||
1286 | spin_lock_irqsave(&ioapic_lock, flags); | 1309 | spin_lock_irqsave(&ioapic_lock, flags); |
1287 | io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); | ||
1288 | io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); | ||
1289 | set_native_irq_info(irq, TARGET_CPUS); | 1310 | set_native_irq_info(irq, TARGET_CPUS); |
1290 | spin_unlock_irqrestore(&ioapic_lock, flags); | 1311 | spin_unlock_irqrestore(&ioapic_lock, flags); |
1291 | } | 1312 | } |
@@ -1301,7 +1322,6 @@ static void __init setup_IO_APIC_irqs(void) | |||
1301 | static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) | 1322 | static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) |
1302 | { | 1323 | { |
1303 | struct IO_APIC_route_entry entry; | 1324 | struct IO_APIC_route_entry entry; |
1304 | unsigned long flags; | ||
1305 | 1325 | ||
1306 | memset(&entry,0,sizeof(entry)); | 1326 | memset(&entry,0,sizeof(entry)); |
1307 | 1327 | ||
@@ -1331,10 +1351,7 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in | |||
1331 | /* | 1351 | /* |
1332 | * Add it to the IO-APIC irq-routing table: | 1352 | * Add it to the IO-APIC irq-routing table: |
1333 | */ | 1353 | */ |
1334 | spin_lock_irqsave(&ioapic_lock, flags); | 1354 | ioapic_write_entry(apic, pin, entry); |
1335 | io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1)); | ||
1336 | io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0)); | ||
1337 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
1338 | 1355 | ||
1339 | enable_8259A_irq(0); | 1356 | enable_8259A_irq(0); |
1340 | } | 1357 | } |
@@ -1444,10 +1461,7 @@ void __init print_IO_APIC(void) | |||
1444 | for (i = 0; i <= reg_01.bits.entries; i++) { | 1461 | for (i = 0; i <= reg_01.bits.entries; i++) { |
1445 | struct IO_APIC_route_entry entry; | 1462 | struct IO_APIC_route_entry entry; |
1446 | 1463 | ||
1447 | spin_lock_irqsave(&ioapic_lock, flags); | 1464 | entry = ioapic_read_entry(apic, i); |
1448 | *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2); | ||
1449 | *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2); | ||
1450 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
1451 | 1465 | ||
1452 | printk(KERN_DEBUG " %02x %03X %02X ", | 1466 | printk(KERN_DEBUG " %02x %03X %02X ", |
1453 | i, | 1467 | i, |
@@ -1666,10 +1680,7 @@ static void __init enable_IO_APIC(void) | |||
1666 | /* See if any of the pins is in ExtINT mode */ | 1680 | /* See if any of the pins is in ExtINT mode */ |
1667 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { | 1681 | for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { |
1668 | struct IO_APIC_route_entry entry; | 1682 | struct IO_APIC_route_entry entry; |
1669 | spin_lock_irqsave(&ioapic_lock, flags); | 1683 | entry = ioapic_read_entry(apic, pin); |
1670 | *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin); | ||
1671 | *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin); | ||
1672 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
1673 | 1684 | ||
1674 | 1685 | ||
1675 | /* If the interrupt line is enabled and in ExtInt mode | 1686 | /* If the interrupt line is enabled and in ExtInt mode |
@@ -1726,7 +1737,6 @@ void disable_IO_APIC(void) | |||
1726 | */ | 1737 | */ |
1727 | if (ioapic_i8259.pin != -1) { | 1738 | if (ioapic_i8259.pin != -1) { |
1728 | struct IO_APIC_route_entry entry; | 1739 | struct IO_APIC_route_entry entry; |
1729 | unsigned long flags; | ||
1730 | 1740 | ||
1731 | memset(&entry, 0, sizeof(entry)); | 1741 | memset(&entry, 0, sizeof(entry)); |
1732 | entry.mask = 0; /* Enabled */ | 1742 | entry.mask = 0; /* Enabled */ |
@@ -1743,12 +1753,7 @@ void disable_IO_APIC(void) | |||
1743 | /* | 1753 | /* |
1744 | * Add it to the IO-APIC irq-routing table: | 1754 | * Add it to the IO-APIC irq-routing table: |
1745 | */ | 1755 | */ |
1746 | spin_lock_irqsave(&ioapic_lock, flags); | 1756 | ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry); |
1747 | io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin, | ||
1748 | *(((int *)&entry)+1)); | ||
1749 | io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin, | ||
1750 | *(((int *)&entry)+0)); | ||
1751 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
1752 | } | 1757 | } |
1753 | disconnect_bsp_APIC(ioapic_i8259.pin != -1); | 1758 | disconnect_bsp_APIC(ioapic_i8259.pin != -1); |
1754 | } | 1759 | } |
@@ -2213,17 +2218,13 @@ static inline void unlock_ExtINT_logic(void) | |||
2213 | int apic, pin, i; | 2218 | int apic, pin, i; |
2214 | struct IO_APIC_route_entry entry0, entry1; | 2219 | struct IO_APIC_route_entry entry0, entry1; |
2215 | unsigned char save_control, save_freq_select; | 2220 | unsigned char save_control, save_freq_select; |
2216 | unsigned long flags; | ||
2217 | 2221 | ||
2218 | pin = find_isa_irq_pin(8, mp_INT); | 2222 | pin = find_isa_irq_pin(8, mp_INT); |
2219 | apic = find_isa_irq_apic(8, mp_INT); | 2223 | apic = find_isa_irq_apic(8, mp_INT); |
2220 | if (pin == -1) | 2224 | if (pin == -1) |
2221 | return; | 2225 | return; |
2222 | 2226 | ||
2223 | spin_lock_irqsave(&ioapic_lock, flags); | 2227 | entry0 = ioapic_read_entry(apic, pin); |
2224 | *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin); | ||
2225 | *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin); | ||
2226 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2227 | clear_IO_APIC_pin(apic, pin); | 2228 | clear_IO_APIC_pin(apic, pin); |
2228 | 2229 | ||
2229 | memset(&entry1, 0, sizeof(entry1)); | 2230 | memset(&entry1, 0, sizeof(entry1)); |
@@ -2236,10 +2237,7 @@ static inline void unlock_ExtINT_logic(void) | |||
2236 | entry1.trigger = 0; | 2237 | entry1.trigger = 0; |
2237 | entry1.vector = 0; | 2238 | entry1.vector = 0; |
2238 | 2239 | ||
2239 | spin_lock_irqsave(&ioapic_lock, flags); | 2240 | ioapic_write_entry(apic, pin, entry1); |
2240 | io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1)); | ||
2241 | io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0)); | ||
2242 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2243 | 2241 | ||
2244 | save_control = CMOS_READ(RTC_CONTROL); | 2242 | save_control = CMOS_READ(RTC_CONTROL); |
2245 | save_freq_select = CMOS_READ(RTC_FREQ_SELECT); | 2243 | save_freq_select = CMOS_READ(RTC_FREQ_SELECT); |
@@ -2258,10 +2256,7 @@ static inline void unlock_ExtINT_logic(void) | |||
2258 | CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); | 2256 | CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); |
2259 | clear_IO_APIC_pin(apic, pin); | 2257 | clear_IO_APIC_pin(apic, pin); |
2260 | 2258 | ||
2261 | spin_lock_irqsave(&ioapic_lock, flags); | 2259 | ioapic_write_entry(apic, pin, entry0); |
2262 | io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1)); | ||
2263 | io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0)); | ||
2264 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2265 | } | 2260 | } |
2266 | 2261 | ||
2267 | int timer_uses_ioapic_pin_0; | 2262 | int timer_uses_ioapic_pin_0; |
@@ -2461,17 +2456,12 @@ static int ioapic_suspend(struct sys_device *dev, pm_message_t state) | |||
2461 | { | 2456 | { |
2462 | struct IO_APIC_route_entry *entry; | 2457 | struct IO_APIC_route_entry *entry; |
2463 | struct sysfs_ioapic_data *data; | 2458 | struct sysfs_ioapic_data *data; |
2464 | unsigned long flags; | ||
2465 | int i; | 2459 | int i; |
2466 | 2460 | ||
2467 | data = container_of(dev, struct sysfs_ioapic_data, dev); | 2461 | data = container_of(dev, struct sysfs_ioapic_data, dev); |
2468 | entry = data->entry; | 2462 | entry = data->entry; |
2469 | spin_lock_irqsave(&ioapic_lock, flags); | 2463 | for (i = 0; i < nr_ioapic_registers[dev->id]; i ++) |
2470 | for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { | 2464 | entry[i] = ioapic_read_entry(dev->id, i); |
2471 | *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i); | ||
2472 | *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i); | ||
2473 | } | ||
2474 | spin_unlock_irqrestore(&ioapic_lock, flags); | ||
2475 | 2465 | ||
2476 | return 0; | 2466 | return 0; |
2477 | } | 2467 | } |
@@ -2493,11 +2483,9 @@ static int ioapic_resume(struct sys_device *dev) | |||
2493 | reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; | 2483 | reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; |
2494 | io_apic_write(dev->id, 0, reg_00.raw); | 2484 | io_apic_write(dev->id, 0, reg_00.raw); |
2495 | } | 2485 | } |
2496 | for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { | ||
2497 | io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1)); | ||
2498 | io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0)); | ||
2499 | } | ||
2500 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2486 | spin_unlock_irqrestore(&ioapic_lock, flags); |
2487 | for (i = 0; i < nr_ioapic_registers[dev->id]; i ++) | ||
2488 | ioapic_write_entry(dev->id, i, entry[i]); | ||
2501 | 2489 | ||
2502 | return 0; | 2490 | return 0; |
2503 | } | 2491 | } |
@@ -2694,9 +2682,8 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a | |||
2694 | if (!ioapic && (irq < 16)) | 2682 | if (!ioapic && (irq < 16)) |
2695 | disable_8259A_irq(irq); | 2683 | disable_8259A_irq(irq); |
2696 | 2684 | ||
2685 | ioapic_write_entry(ioapic, pin, entry); | ||
2697 | spin_lock_irqsave(&ioapic_lock, flags); | 2686 | spin_lock_irqsave(&ioapic_lock, flags); |
2698 | io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1)); | ||
2699 | io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0)); | ||
2700 | set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); | 2687 | set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); |
2701 | spin_unlock_irqrestore(&ioapic_lock, flags); | 2688 | spin_unlock_irqrestore(&ioapic_lock, flags); |
2702 | 2689 | ||
@@ -2704,3 +2691,25 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a | |||
2704 | } | 2691 | } |
2705 | 2692 | ||
2706 | #endif /* CONFIG_ACPI */ | 2693 | #endif /* CONFIG_ACPI */ |
2694 | |||
2695 | static int __init parse_disable_timer_pin_1(char *arg) | ||
2696 | { | ||
2697 | disable_timer_pin_1 = 1; | ||
2698 | return 0; | ||
2699 | } | ||
2700 | early_param("disable_timer_pin_1", parse_disable_timer_pin_1); | ||
2701 | |||
2702 | static int __init parse_enable_timer_pin_1(char *arg) | ||
2703 | { | ||
2704 | disable_timer_pin_1 = -1; | ||
2705 | return 0; | ||
2706 | } | ||
2707 | early_param("enable_timer_pin_1", parse_enable_timer_pin_1); | ||
2708 | |||
2709 | static int __init parse_noapic(char *arg) | ||
2710 | { | ||
2711 | /* disable IO-APIC */ | ||
2712 | disable_ioapic_setup(); | ||
2713 | return 0; | ||
2714 | } | ||
2715 | early_param("noapic", parse_noapic); | ||
diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c index 6b1ae6ba76f0..91966bafb3dc 100644 --- a/arch/i386/kernel/machine_kexec.c +++ b/arch/i386/kernel/machine_kexec.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/mm.h> | 9 | #include <linux/mm.h> |
10 | #include <linux/kexec.h> | 10 | #include <linux/kexec.h> |
11 | #include <linux/delay.h> | 11 | #include <linux/delay.h> |
12 | #include <linux/init.h> | ||
12 | #include <asm/pgtable.h> | 13 | #include <asm/pgtable.h> |
13 | #include <asm/pgalloc.h> | 14 | #include <asm/pgalloc.h> |
14 | #include <asm/tlbflush.h> | 15 | #include <asm/tlbflush.h> |
@@ -20,70 +21,13 @@ | |||
20 | #include <asm/system.h> | 21 | #include <asm/system.h> |
21 | 22 | ||
22 | #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) | 23 | #define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) |
23 | 24 | static u32 kexec_pgd[1024] PAGE_ALIGNED; | |
24 | #define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) | 25 | #ifdef CONFIG_X86_PAE |
25 | #define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) | 26 | static u32 kexec_pmd0[1024] PAGE_ALIGNED; |
26 | #define L2_ATTR (_PAGE_PRESENT) | 27 | static u32 kexec_pmd1[1024] PAGE_ALIGNED; |
27 | |||
28 | #define LEVEL0_SIZE (1UL << 12UL) | ||
29 | |||
30 | #ifndef CONFIG_X86_PAE | ||
31 | #define LEVEL1_SIZE (1UL << 22UL) | ||
32 | static u32 pgtable_level1[1024] PAGE_ALIGNED; | ||
33 | |||
34 | static void identity_map_page(unsigned long address) | ||
35 | { | ||
36 | unsigned long level1_index, level2_index; | ||
37 | u32 *pgtable_level2; | ||
38 | |||
39 | /* Find the current page table */ | ||
40 | pgtable_level2 = __va(read_cr3()); | ||
41 | |||
42 | /* Find the indexes of the physical address to identity map */ | ||
43 | level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE; | ||
44 | level2_index = address / LEVEL1_SIZE; | ||
45 | |||
46 | /* Identity map the page table entry */ | ||
47 | pgtable_level1[level1_index] = address | L0_ATTR; | ||
48 | pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR; | ||
49 | |||
50 | /* Flush the tlb so the new mapping takes effect. | ||
51 | * Global tlb entries are not flushed but that is not an issue. | ||
52 | */ | ||
53 | load_cr3(pgtable_level2); | ||
54 | } | ||
55 | |||
56 | #else | ||
57 | #define LEVEL1_SIZE (1UL << 21UL) | ||
58 | #define LEVEL2_SIZE (1UL << 30UL) | ||
59 | static u64 pgtable_level1[512] PAGE_ALIGNED; | ||
60 | static u64 pgtable_level2[512] PAGE_ALIGNED; | ||
61 | |||
62 | static void identity_map_page(unsigned long address) | ||
63 | { | ||
64 | unsigned long level1_index, level2_index, level3_index; | ||
65 | u64 *pgtable_level3; | ||
66 | |||
67 | /* Find the current page table */ | ||
68 | pgtable_level3 = __va(read_cr3()); | ||
69 | |||
70 | /* Find the indexes of the physical address to identity map */ | ||
71 | level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE; | ||
72 | level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE; | ||
73 | level3_index = address / LEVEL2_SIZE; | ||
74 | |||
75 | /* Identity map the page table entry */ | ||
76 | pgtable_level1[level1_index] = address | L0_ATTR; | ||
77 | pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR; | ||
78 | set_64bit(&pgtable_level3[level3_index], | ||
79 | __pa(pgtable_level2) | L2_ATTR); | ||
80 | |||
81 | /* Flush the tlb so the new mapping takes effect. | ||
82 | * Global tlb entries are not flushed but that is not an issue. | ||
83 | */ | ||
84 | load_cr3(pgtable_level3); | ||
85 | } | ||
86 | #endif | 28 | #endif |
29 | static u32 kexec_pte0[1024] PAGE_ALIGNED; | ||
30 | static u32 kexec_pte1[1024] PAGE_ALIGNED; | ||
87 | 31 | ||
88 | static void set_idt(void *newidt, __u16 limit) | 32 | static void set_idt(void *newidt, __u16 limit) |
89 | { | 33 | { |
@@ -127,16 +71,6 @@ static void load_segments(void) | |||
127 | #undef __STR | 71 | #undef __STR |
128 | } | 72 | } |
129 | 73 | ||
130 | typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)( | ||
131 | unsigned long indirection_page, | ||
132 | unsigned long reboot_code_buffer, | ||
133 | unsigned long start_address, | ||
134 | unsigned int has_pae) ATTRIB_NORET; | ||
135 | |||
136 | extern const unsigned char relocate_new_kernel[]; | ||
137 | extern void relocate_new_kernel_end(void); | ||
138 | extern const unsigned int relocate_new_kernel_size; | ||
139 | |||
140 | /* | 74 | /* |
141 | * A architecture hook called to validate the | 75 | * A architecture hook called to validate the |
142 | * proposed image and prepare the control pages | 76 | * proposed image and prepare the control pages |
@@ -169,25 +103,29 @@ void machine_kexec_cleanup(struct kimage *image) | |||
169 | */ | 103 | */ |
170 | NORET_TYPE void machine_kexec(struct kimage *image) | 104 | NORET_TYPE void machine_kexec(struct kimage *image) |
171 | { | 105 | { |
172 | unsigned long page_list; | 106 | unsigned long page_list[PAGES_NR]; |
173 | unsigned long reboot_code_buffer; | 107 | void *control_page; |
174 | |||
175 | relocate_new_kernel_t rnk; | ||
176 | 108 | ||
177 | /* Interrupts aren't acceptable while we reboot */ | 109 | /* Interrupts aren't acceptable while we reboot */ |
178 | local_irq_disable(); | 110 | local_irq_disable(); |
179 | 111 | ||
180 | /* Compute some offsets */ | 112 | control_page = page_address(image->control_code_page); |
181 | reboot_code_buffer = page_to_pfn(image->control_code_page) | 113 | memcpy(control_page, relocate_kernel, PAGE_SIZE); |
182 | << PAGE_SHIFT; | 114 | |
183 | page_list = image->head; | 115 | page_list[PA_CONTROL_PAGE] = __pa(control_page); |
184 | 116 | page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel; | |
185 | /* Set up an identity mapping for the reboot_code_buffer */ | 117 | page_list[PA_PGD] = __pa(kexec_pgd); |
186 | identity_map_page(reboot_code_buffer); | 118 | page_list[VA_PGD] = (unsigned long)kexec_pgd; |
187 | 119 | #ifdef CONFIG_X86_PAE | |
188 | /* copy it out */ | 120 | page_list[PA_PMD_0] = __pa(kexec_pmd0); |
189 | memcpy((void *)reboot_code_buffer, relocate_new_kernel, | 121 | page_list[VA_PMD_0] = (unsigned long)kexec_pmd0; |
190 | relocate_new_kernel_size); | 122 | page_list[PA_PMD_1] = __pa(kexec_pmd1); |
123 | page_list[VA_PMD_1] = (unsigned long)kexec_pmd1; | ||
124 | #endif | ||
125 | page_list[PA_PTE_0] = __pa(kexec_pte0); | ||
126 | page_list[VA_PTE_0] = (unsigned long)kexec_pte0; | ||
127 | page_list[PA_PTE_1] = __pa(kexec_pte1); | ||
128 | page_list[VA_PTE_1] = (unsigned long)kexec_pte1; | ||
191 | 129 | ||
192 | /* The segment registers are funny things, they have both a | 130 | /* The segment registers are funny things, they have both a |
193 | * visible and an invisible part. Whenever the visible part is | 131 | * visible and an invisible part. Whenever the visible part is |
@@ -206,6 +144,28 @@ NORET_TYPE void machine_kexec(struct kimage *image) | |||
206 | set_idt(phys_to_virt(0),0); | 144 | set_idt(phys_to_virt(0),0); |
207 | 145 | ||
208 | /* now call it */ | 146 | /* now call it */ |
209 | rnk = (relocate_new_kernel_t) reboot_code_buffer; | 147 | relocate_kernel((unsigned long)image->head, (unsigned long)page_list, |
210 | (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae); | 148 | image->start, cpu_has_pae); |
149 | } | ||
150 | |||
151 | /* crashkernel=size@addr specifies the location to reserve for | ||
152 | * a crash kernel. By reserving this memory we guarantee | ||
153 | * that linux never sets it up as a DMA target. | ||
154 | * Useful for holding code to do something appropriate | ||
155 | * after a kernel panic. | ||
156 | */ | ||
157 | static int __init parse_crashkernel(char *arg) | ||
158 | { | ||
159 | unsigned long size, base; | ||
160 | size = memparse(arg, &arg); | ||
161 | if (*arg == '@') { | ||
162 | base = memparse(arg+1, &arg); | ||
163 | /* FIXME: Do I want a sanity check | ||
164 | * to validate the memory range? | ||
165 | */ | ||
166 | crashk_res.start = base; | ||
167 | crashk_res.end = base + size - 1; | ||
168 | } | ||
169 | return 0; | ||
211 | } | 170 | } |
171 | early_param("crashkernel", parse_crashkernel); | ||
diff --git a/arch/i386/kernel/mca.c b/arch/i386/kernel/mca.c index cd5456f14af4..eb57a851789d 100644 --- a/arch/i386/kernel/mca.c +++ b/arch/i386/kernel/mca.c | |||
@@ -42,6 +42,7 @@ | |||
42 | #include <linux/errno.h> | 42 | #include <linux/errno.h> |
43 | #include <linux/kernel.h> | 43 | #include <linux/kernel.h> |
44 | #include <linux/mca.h> | 44 | #include <linux/mca.h> |
45 | #include <linux/kprobes.h> | ||
45 | #include <asm/system.h> | 46 | #include <asm/system.h> |
46 | #include <asm/io.h> | 47 | #include <asm/io.h> |
47 | #include <linux/proc_fs.h> | 48 | #include <linux/proc_fs.h> |
@@ -414,7 +415,8 @@ subsys_initcall(mca_init); | |||
414 | 415 | ||
415 | /*--------------------------------------------------------------------*/ | 416 | /*--------------------------------------------------------------------*/ |
416 | 417 | ||
417 | static void mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag) | 418 | static __kprobes void |
419 | mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag) | ||
418 | { | 420 | { |
419 | int slot = mca_dev->slot; | 421 | int slot = mca_dev->slot; |
420 | 422 | ||
@@ -444,7 +446,7 @@ static void mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag) | |||
444 | 446 | ||
445 | /*--------------------------------------------------------------------*/ | 447 | /*--------------------------------------------------------------------*/ |
446 | 448 | ||
447 | static int mca_handle_nmi_callback(struct device *dev, void *data) | 449 | static int __kprobes mca_handle_nmi_callback(struct device *dev, void *data) |
448 | { | 450 | { |
449 | struct mca_device *mca_dev = to_mca_device(dev); | 451 | struct mca_device *mca_dev = to_mca_device(dev); |
450 | unsigned char pos5; | 452 | unsigned char pos5; |
@@ -462,7 +464,7 @@ static int mca_handle_nmi_callback(struct device *dev, void *data) | |||
462 | return 0; | 464 | return 0; |
463 | } | 465 | } |
464 | 466 | ||
465 | void mca_handle_nmi(void) | 467 | void __kprobes mca_handle_nmi(void) |
466 | { | 468 | { |
467 | /* First try - scan the various adapters and see if a specific | 469 | /* First try - scan the various adapters and see if a specific |
468 | * adapter was responsible for the error. | 470 | * adapter was responsible for the error. |
diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c index a70b5fa0ef06..442aaf8c77eb 100644 --- a/arch/i386/kernel/mpparse.c +++ b/arch/i386/kernel/mpparse.c | |||
@@ -30,6 +30,7 @@ | |||
30 | #include <asm/io_apic.h> | 30 | #include <asm/io_apic.h> |
31 | 31 | ||
32 | #include <mach_apic.h> | 32 | #include <mach_apic.h> |
33 | #include <mach_apicdef.h> | ||
33 | #include <mach_mpparse.h> | 34 | #include <mach_mpparse.h> |
34 | #include <bios_ebda.h> | 35 | #include <bios_ebda.h> |
35 | 36 | ||
@@ -68,7 +69,7 @@ unsigned int def_to_bigsmp = 0; | |||
68 | /* Processor that is doing the boot up */ | 69 | /* Processor that is doing the boot up */ |
69 | unsigned int boot_cpu_physical_apicid = -1U; | 70 | unsigned int boot_cpu_physical_apicid = -1U; |
70 | /* Internal processor count */ | 71 | /* Internal processor count */ |
71 | static unsigned int __devinitdata num_processors; | 72 | unsigned int __cpuinitdata num_processors; |
72 | 73 | ||
73 | /* Bitmask of physically existing CPUs */ | 74 | /* Bitmask of physically existing CPUs */ |
74 | physid_mask_t phys_cpu_present_map; | 75 | physid_mask_t phys_cpu_present_map; |
@@ -228,12 +229,14 @@ static void __init MP_bus_info (struct mpc_config_bus *m) | |||
228 | 229 | ||
229 | mpc_oem_bus_info(m, str, translation_table[mpc_record]); | 230 | mpc_oem_bus_info(m, str, translation_table[mpc_record]); |
230 | 231 | ||
232 | #if MAX_MP_BUSSES < 256 | ||
231 | if (m->mpc_busid >= MAX_MP_BUSSES) { | 233 | if (m->mpc_busid >= MAX_MP_BUSSES) { |
232 | printk(KERN_WARNING "MP table busid value (%d) for bustype %s " | 234 | printk(KERN_WARNING "MP table busid value (%d) for bustype %s " |
233 | " is too large, max. supported is %d\n", | 235 | " is too large, max. supported is %d\n", |
234 | m->mpc_busid, str, MAX_MP_BUSSES - 1); | 236 | m->mpc_busid, str, MAX_MP_BUSSES - 1); |
235 | return; | 237 | return; |
236 | } | 238 | } |
239 | #endif | ||
237 | 240 | ||
238 | if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) { | 241 | if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) { |
239 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; | 242 | mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; |
@@ -293,19 +296,6 @@ static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m) | |||
293 | m->mpc_irqtype, m->mpc_irqflag & 3, | 296 | m->mpc_irqtype, m->mpc_irqflag & 3, |
294 | (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid, | 297 | (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid, |
295 | m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); | 298 | m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); |
296 | /* | ||
297 | * Well it seems all SMP boards in existence | ||
298 | * use ExtINT/LVT1 == LINT0 and | ||
299 | * NMI/LVT2 == LINT1 - the following check | ||
300 | * will show us if this assumptions is false. | ||
301 | * Until then we do not have to add baggage. | ||
302 | */ | ||
303 | if ((m->mpc_irqtype == mp_ExtINT) && | ||
304 | (m->mpc_destapiclint != 0)) | ||
305 | BUG(); | ||
306 | if ((m->mpc_irqtype == mp_NMI) && | ||
307 | (m->mpc_destapiclint != 1)) | ||
308 | BUG(); | ||
309 | } | 299 | } |
310 | 300 | ||
311 | #ifdef CONFIG_X86_NUMAQ | 301 | #ifdef CONFIG_X86_NUMAQ |
@@ -822,8 +812,7 @@ int es7000_plat; | |||
822 | 812 | ||
823 | #ifdef CONFIG_ACPI | 813 | #ifdef CONFIG_ACPI |
824 | 814 | ||
825 | void __init mp_register_lapic_address ( | 815 | void __init mp_register_lapic_address(u64 address) |
826 | u64 address) | ||
827 | { | 816 | { |
828 | mp_lapic_addr = (unsigned long) address; | 817 | mp_lapic_addr = (unsigned long) address; |
829 | 818 | ||
@@ -835,13 +824,10 @@ void __init mp_register_lapic_address ( | |||
835 | Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid); | 824 | Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid); |
836 | } | 825 | } |
837 | 826 | ||
838 | 827 | void __devinit mp_register_lapic (u8 id, u8 enabled) | |
839 | void __devinit mp_register_lapic ( | ||
840 | u8 id, | ||
841 | u8 enabled) | ||
842 | { | 828 | { |
843 | struct mpc_config_processor processor; | 829 | struct mpc_config_processor processor; |
844 | int boot_cpu = 0; | 830 | int boot_cpu = 0; |
845 | 831 | ||
846 | if (MAX_APICS - id <= 0) { | 832 | if (MAX_APICS - id <= 0) { |
847 | printk(KERN_WARNING "Processor #%d invalid (max %d)\n", | 833 | printk(KERN_WARNING "Processor #%d invalid (max %d)\n", |
@@ -878,11 +864,9 @@ static struct mp_ioapic_routing { | |||
878 | u32 pin_programmed[4]; | 864 | u32 pin_programmed[4]; |
879 | } mp_ioapic_routing[MAX_IO_APICS]; | 865 | } mp_ioapic_routing[MAX_IO_APICS]; |
880 | 866 | ||
881 | 867 | static int mp_find_ioapic (int gsi) | |
882 | static int mp_find_ioapic ( | ||
883 | int gsi) | ||
884 | { | 868 | { |
885 | int i = 0; | 869 | int i = 0; |
886 | 870 | ||
887 | /* Find the IOAPIC that manages this GSI. */ | 871 | /* Find the IOAPIC that manages this GSI. */ |
888 | for (i = 0; i < nr_ioapics; i++) { | 872 | for (i = 0; i < nr_ioapics; i++) { |
@@ -895,15 +879,11 @@ static int mp_find_ioapic ( | |||
895 | 879 | ||
896 | return -1; | 880 | return -1; |
897 | } | 881 | } |
898 | |||
899 | 882 | ||
900 | void __init mp_register_ioapic ( | 883 | void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base) |
901 | u8 id, | ||
902 | u32 address, | ||
903 | u32 gsi_base) | ||
904 | { | 884 | { |
905 | int idx = 0; | 885 | int idx = 0; |
906 | int tmpid; | 886 | int tmpid; |
907 | 887 | ||
908 | if (nr_ioapics >= MAX_IO_APICS) { | 888 | if (nr_ioapics >= MAX_IO_APICS) { |
909 | printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " | 889 | printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " |
@@ -949,16 +929,10 @@ void __init mp_register_ioapic ( | |||
949 | mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, | 929 | mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, |
950 | mp_ioapic_routing[idx].gsi_base, | 930 | mp_ioapic_routing[idx].gsi_base, |
951 | mp_ioapic_routing[idx].gsi_end); | 931 | mp_ioapic_routing[idx].gsi_end); |
952 | |||
953 | return; | ||
954 | } | 932 | } |
955 | 933 | ||
956 | 934 | void __init | |
957 | void __init mp_override_legacy_irq ( | 935 | mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi) |
958 | u8 bus_irq, | ||
959 | u8 polarity, | ||
960 | u8 trigger, | ||
961 | u32 gsi) | ||
962 | { | 936 | { |
963 | struct mpc_config_intsrc intsrc; | 937 | struct mpc_config_intsrc intsrc; |
964 | int ioapic = -1; | 938 | int ioapic = -1; |
@@ -996,15 +970,13 @@ void __init mp_override_legacy_irq ( | |||
996 | mp_irqs[mp_irq_entries] = intsrc; | 970 | mp_irqs[mp_irq_entries] = intsrc; |
997 | if (++mp_irq_entries == MAX_IRQ_SOURCES) | 971 | if (++mp_irq_entries == MAX_IRQ_SOURCES) |
998 | panic("Max # of irq sources exceeded!\n"); | 972 | panic("Max # of irq sources exceeded!\n"); |
999 | |||
1000 | return; | ||
1001 | } | 973 | } |
1002 | 974 | ||
1003 | void __init mp_config_acpi_legacy_irqs (void) | 975 | void __init mp_config_acpi_legacy_irqs (void) |
1004 | { | 976 | { |
1005 | struct mpc_config_intsrc intsrc; | 977 | struct mpc_config_intsrc intsrc; |
1006 | int i = 0; | 978 | int i = 0; |
1007 | int ioapic = -1; | 979 | int ioapic = -1; |
1008 | 980 | ||
1009 | /* | 981 | /* |
1010 | * Fabricate the legacy ISA bus (bus #31). | 982 | * Fabricate the legacy ISA bus (bus #31). |
@@ -1073,12 +1045,12 @@ void __init mp_config_acpi_legacy_irqs (void) | |||
1073 | 1045 | ||
1074 | #define MAX_GSI_NUM 4096 | 1046 | #define MAX_GSI_NUM 4096 |
1075 | 1047 | ||
1076 | int mp_register_gsi (u32 gsi, int triggering, int polarity) | 1048 | int mp_register_gsi(u32 gsi, int triggering, int polarity) |
1077 | { | 1049 | { |
1078 | int ioapic = -1; | 1050 | int ioapic = -1; |
1079 | int ioapic_pin = 0; | 1051 | int ioapic_pin = 0; |
1080 | int idx, bit = 0; | 1052 | int idx, bit = 0; |
1081 | static int pci_irq = 16; | 1053 | static int pci_irq = 16; |
1082 | /* | 1054 | /* |
1083 | * Mapping between Global System Interrups, which | 1055 | * Mapping between Global System Interrups, which |
1084 | * represent all possible interrupts, and IRQs | 1056 | * represent all possible interrupts, and IRQs |
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c index acb351478e42..dbda706fdd14 100644 --- a/arch/i386/kernel/nmi.c +++ b/arch/i386/kernel/nmi.c | |||
@@ -21,83 +21,174 @@ | |||
21 | #include <linux/sysdev.h> | 21 | #include <linux/sysdev.h> |
22 | #include <linux/sysctl.h> | 22 | #include <linux/sysctl.h> |
23 | #include <linux/percpu.h> | 23 | #include <linux/percpu.h> |
24 | #include <linux/dmi.h> | ||
25 | #include <linux/kprobes.h> | ||
24 | 26 | ||
25 | #include <asm/smp.h> | 27 | #include <asm/smp.h> |
26 | #include <asm/nmi.h> | 28 | #include <asm/nmi.h> |
29 | #include <asm/kdebug.h> | ||
27 | #include <asm/intel_arch_perfmon.h> | 30 | #include <asm/intel_arch_perfmon.h> |
28 | 31 | ||
29 | #include "mach_traps.h" | 32 | #include "mach_traps.h" |
30 | 33 | ||
31 | unsigned int nmi_watchdog = NMI_NONE; | 34 | /* perfctr_nmi_owner tracks the ownership of the perfctr registers: |
32 | extern int unknown_nmi_panic; | 35 | * evtsel_nmi_owner tracks the ownership of the event selection |
33 | static unsigned int nmi_hz = HZ; | 36 | * - different performance counters/ event selection may be reserved for |
34 | static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ | 37 | * different subsystems this reservation system just tries to coordinate |
35 | static unsigned int nmi_p4_cccr_val; | 38 | * things a little |
36 | extern void show_registers(struct pt_regs *regs); | 39 | */ |
40 | static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner); | ||
41 | static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]); | ||
37 | 42 | ||
38 | /* | 43 | /* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's |
39 | * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: | 44 | * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now) |
40 | * - it may be reserved by some other driver, or not | ||
41 | * - when not reserved by some other driver, it may be used for | ||
42 | * the NMI watchdog, or not | ||
43 | * | ||
44 | * This is maintained separately from nmi_active because the NMI | ||
45 | * watchdog may also be driven from the I/O APIC timer. | ||
46 | */ | 45 | */ |
47 | static DEFINE_SPINLOCK(lapic_nmi_owner_lock); | 46 | #define NMI_MAX_COUNTER_BITS 66 |
48 | static unsigned int lapic_nmi_owner; | ||
49 | #define LAPIC_NMI_WATCHDOG (1<<0) | ||
50 | #define LAPIC_NMI_RESERVED (1<<1) | ||
51 | 47 | ||
52 | /* nmi_active: | 48 | /* nmi_active: |
53 | * +1: the lapic NMI watchdog is active, but can be disabled | 49 | * >0: the lapic NMI watchdog is active, but can be disabled |
54 | * 0: the lapic NMI watchdog has not been set up, and cannot | 50 | * <0: the lapic NMI watchdog has not been set up, and cannot |
55 | * be enabled | 51 | * be enabled |
56 | * -1: the lapic NMI watchdog is disabled, but can be enabled | 52 | * 0: the lapic NMI watchdog is disabled, but can be enabled |
57 | */ | 53 | */ |
58 | int nmi_active; | 54 | atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ |
59 | 55 | ||
60 | #define K7_EVNTSEL_ENABLE (1 << 22) | 56 | unsigned int nmi_watchdog = NMI_DEFAULT; |
61 | #define K7_EVNTSEL_INT (1 << 20) | 57 | static unsigned int nmi_hz = HZ; |
62 | #define K7_EVNTSEL_OS (1 << 17) | ||
63 | #define K7_EVNTSEL_USR (1 << 16) | ||
64 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 | ||
65 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING | ||
66 | 58 | ||
67 | #define P6_EVNTSEL0_ENABLE (1 << 22) | 59 | struct nmi_watchdog_ctlblk { |
68 | #define P6_EVNTSEL_INT (1 << 20) | 60 | int enabled; |
69 | #define P6_EVNTSEL_OS (1 << 17) | 61 | u64 check_bit; |
70 | #define P6_EVNTSEL_USR (1 << 16) | 62 | unsigned int cccr_msr; |
71 | #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 | 63 | unsigned int perfctr_msr; /* the MSR to reset in NMI handler */ |
72 | #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED | 64 | unsigned int evntsel_msr; /* the MSR to select the events to handle */ |
65 | }; | ||
66 | static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk); | ||
73 | 67 | ||
74 | #define MSR_P4_MISC_ENABLE 0x1A0 | 68 | /* local prototypes */ |
75 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) | 69 | static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu); |
76 | #define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12) | ||
77 | #define MSR_P4_PERFCTR0 0x300 | ||
78 | #define MSR_P4_CCCR0 0x360 | ||
79 | #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) | ||
80 | #define P4_ESCR_OS (1<<3) | ||
81 | #define P4_ESCR_USR (1<<2) | ||
82 | #define P4_CCCR_OVF_PMI0 (1<<26) | ||
83 | #define P4_CCCR_OVF_PMI1 (1<<27) | ||
84 | #define P4_CCCR_THRESHOLD(N) ((N)<<20) | ||
85 | #define P4_CCCR_COMPLEMENT (1<<19) | ||
86 | #define P4_CCCR_COMPARE (1<<18) | ||
87 | #define P4_CCCR_REQUIRED (3<<16) | ||
88 | #define P4_CCCR_ESCR_SELECT(N) ((N)<<13) | ||
89 | #define P4_CCCR_ENABLE (1<<12) | ||
90 | /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | ||
91 | CRU_ESCR0 (with any non-null event selector) through a complemented | ||
92 | max threshold. [IA32-Vol3, Section 14.9.9] */ | ||
93 | #define MSR_P4_IQ_COUNTER0 0x30C | ||
94 | #define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR) | ||
95 | #define P4_NMI_IQ_CCCR0 \ | ||
96 | (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \ | ||
97 | P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE) | ||
98 | 70 | ||
99 | #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL | 71 | extern void show_registers(struct pt_regs *regs); |
100 | #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK | 72 | extern int unknown_nmi_panic; |
73 | |||
74 | /* converts an msr to an appropriate reservation bit */ | ||
75 | static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr) | ||
76 | { | ||
77 | /* returns the bit offset of the performance counter register */ | ||
78 | switch (boot_cpu_data.x86_vendor) { | ||
79 | case X86_VENDOR_AMD: | ||
80 | return (msr - MSR_K7_PERFCTR0); | ||
81 | case X86_VENDOR_INTEL: | ||
82 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | ||
83 | return (msr - MSR_ARCH_PERFMON_PERFCTR0); | ||
84 | |||
85 | switch (boot_cpu_data.x86) { | ||
86 | case 6: | ||
87 | return (msr - MSR_P6_PERFCTR0); | ||
88 | case 15: | ||
89 | return (msr - MSR_P4_BPU_PERFCTR0); | ||
90 | } | ||
91 | } | ||
92 | return 0; | ||
93 | } | ||
94 | |||
95 | /* converts an msr to an appropriate reservation bit */ | ||
96 | static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr) | ||
97 | { | ||
98 | /* returns the bit offset of the event selection register */ | ||
99 | switch (boot_cpu_data.x86_vendor) { | ||
100 | case X86_VENDOR_AMD: | ||
101 | return (msr - MSR_K7_EVNTSEL0); | ||
102 | case X86_VENDOR_INTEL: | ||
103 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | ||
104 | return (msr - MSR_ARCH_PERFMON_EVENTSEL0); | ||
105 | |||
106 | switch (boot_cpu_data.x86) { | ||
107 | case 6: | ||
108 | return (msr - MSR_P6_EVNTSEL0); | ||
109 | case 15: | ||
110 | return (msr - MSR_P4_BSU_ESCR0); | ||
111 | } | ||
112 | } | ||
113 | return 0; | ||
114 | } | ||
115 | |||
116 | /* checks for a bit availability (hack for oprofile) */ | ||
117 | int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) | ||
118 | { | ||
119 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
120 | |||
121 | return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner))); | ||
122 | } | ||
123 | |||
124 | /* checks the an msr for availability */ | ||
125 | int avail_to_resrv_perfctr_nmi(unsigned int msr) | ||
126 | { | ||
127 | unsigned int counter; | ||
128 | |||
129 | counter = nmi_perfctr_msr_to_bit(msr); | ||
130 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
131 | |||
132 | return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner))); | ||
133 | } | ||
134 | |||
135 | int reserve_perfctr_nmi(unsigned int msr) | ||
136 | { | ||
137 | unsigned int counter; | ||
138 | |||
139 | counter = nmi_perfctr_msr_to_bit(msr); | ||
140 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
141 | |||
142 | if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner))) | ||
143 | return 1; | ||
144 | return 0; | ||
145 | } | ||
146 | |||
147 | void release_perfctr_nmi(unsigned int msr) | ||
148 | { | ||
149 | unsigned int counter; | ||
150 | |||
151 | counter = nmi_perfctr_msr_to_bit(msr); | ||
152 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
153 | |||
154 | clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner)); | ||
155 | } | ||
156 | |||
157 | int reserve_evntsel_nmi(unsigned int msr) | ||
158 | { | ||
159 | unsigned int counter; | ||
160 | |||
161 | counter = nmi_evntsel_msr_to_bit(msr); | ||
162 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
163 | |||
164 | if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0])) | ||
165 | return 1; | ||
166 | return 0; | ||
167 | } | ||
168 | |||
169 | void release_evntsel_nmi(unsigned int msr) | ||
170 | { | ||
171 | unsigned int counter; | ||
172 | |||
173 | counter = nmi_evntsel_msr_to_bit(msr); | ||
174 | BUG_ON(counter > NMI_MAX_COUNTER_BITS); | ||
175 | |||
176 | clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]); | ||
177 | } | ||
178 | |||
179 | static __cpuinit inline int nmi_known_cpu(void) | ||
180 | { | ||
181 | switch (boot_cpu_data.x86_vendor) { | ||
182 | case X86_VENDOR_AMD: | ||
183 | return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)); | ||
184 | case X86_VENDOR_INTEL: | ||
185 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) | ||
186 | return 1; | ||
187 | else | ||
188 | return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6)); | ||
189 | } | ||
190 | return 0; | ||
191 | } | ||
101 | 192 | ||
102 | #ifdef CONFIG_SMP | 193 | #ifdef CONFIG_SMP |
103 | /* The performance counters used by NMI_LOCAL_APIC don't trigger when | 194 | /* The performance counters used by NMI_LOCAL_APIC don't trigger when |
@@ -125,7 +216,18 @@ static int __init check_nmi_watchdog(void) | |||
125 | unsigned int *prev_nmi_count; | 216 | unsigned int *prev_nmi_count; |
126 | int cpu; | 217 | int cpu; |
127 | 218 | ||
128 | if (nmi_watchdog == NMI_NONE) | 219 | /* Enable NMI watchdog for newer systems. |
220 | Actually it should be safe for most systems before 2004 too except | ||
221 | for some IBM systems that corrupt registers when NMI happens | ||
222 | during SMM. Unfortunately we don't have more exact information | ||
223 | on these and use this coarse check. */ | ||
224 | if (nmi_watchdog == NMI_DEFAULT && dmi_get_year(DMI_BIOS_DATE) >= 2004) | ||
225 | nmi_watchdog = NMI_LOCAL_APIC; | ||
226 | |||
227 | if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT)) | ||
228 | return 0; | ||
229 | |||
230 | if (!atomic_read(&nmi_active)) | ||
129 | return 0; | 231 | return 0; |
130 | 232 | ||
131 | prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); | 233 | prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); |
@@ -149,25 +251,45 @@ static int __init check_nmi_watchdog(void) | |||
149 | if (!cpu_isset(cpu, cpu_callin_map)) | 251 | if (!cpu_isset(cpu, cpu_callin_map)) |
150 | continue; | 252 | continue; |
151 | #endif | 253 | #endif |
254 | if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled) | ||
255 | continue; | ||
152 | if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { | 256 | if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { |
153 | endflag = 1; | ||
154 | printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", | 257 | printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", |
155 | cpu, | 258 | cpu, |
156 | prev_nmi_count[cpu], | 259 | prev_nmi_count[cpu], |
157 | nmi_count(cpu)); | 260 | nmi_count(cpu)); |
158 | nmi_active = 0; | 261 | per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0; |
159 | lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; | 262 | atomic_dec(&nmi_active); |
160 | kfree(prev_nmi_count); | ||
161 | return -1; | ||
162 | } | 263 | } |
163 | } | 264 | } |
265 | if (!atomic_read(&nmi_active)) { | ||
266 | kfree(prev_nmi_count); | ||
267 | atomic_set(&nmi_active, -1); | ||
268 | return -1; | ||
269 | } | ||
164 | endflag = 1; | 270 | endflag = 1; |
165 | printk("OK.\n"); | 271 | printk("OK.\n"); |
166 | 272 | ||
167 | /* now that we know it works we can reduce NMI frequency to | 273 | /* now that we know it works we can reduce NMI frequency to |
168 | something more reasonable; makes a difference in some configs */ | 274 | something more reasonable; makes a difference in some configs */ |
169 | if (nmi_watchdog == NMI_LOCAL_APIC) | 275 | if (nmi_watchdog == NMI_LOCAL_APIC) { |
276 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
277 | |||
170 | nmi_hz = 1; | 278 | nmi_hz = 1; |
279 | /* | ||
280 | * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter | ||
281 | * are writable, with higher bits sign extending from bit 31. | ||
282 | * So, we can only program the counter with 31 bit values and | ||
283 | * 32nd bit should be 1, for 33.. to be 1. | ||
284 | * Find the appropriate nmi_hz | ||
285 | */ | ||
286 | if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 && | ||
287 | ((u64)cpu_khz * 1000) > 0x7fffffffULL) { | ||
288 | u64 count = (u64)cpu_khz * 1000; | ||
289 | do_div(count, 0x7fffffffUL); | ||
290 | nmi_hz = count + 1; | ||
291 | } | ||
292 | } | ||
171 | 293 | ||
172 | kfree(prev_nmi_count); | 294 | kfree(prev_nmi_count); |
173 | return 0; | 295 | return 0; |
@@ -181,124 +303,70 @@ static int __init setup_nmi_watchdog(char *str) | |||
181 | 303 | ||
182 | get_option(&str, &nmi); | 304 | get_option(&str, &nmi); |
183 | 305 | ||
184 | if (nmi >= NMI_INVALID) | 306 | if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE)) |
185 | return 0; | 307 | return 0; |
186 | if (nmi == NMI_NONE) | ||
187 | nmi_watchdog = nmi; | ||
188 | /* | 308 | /* |
189 | * If any other x86 CPU has a local APIC, then | 309 | * If any other x86 CPU has a local APIC, then |
190 | * please test the NMI stuff there and send me the | 310 | * please test the NMI stuff there and send me the |
191 | * missing bits. Right now Intel P6/P4 and AMD K7 only. | 311 | * missing bits. Right now Intel P6/P4 and AMD K7 only. |
192 | */ | 312 | */ |
193 | if ((nmi == NMI_LOCAL_APIC) && | 313 | if ((nmi == NMI_LOCAL_APIC) && (nmi_known_cpu() == 0)) |
194 | (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && | 314 | return 0; /* no lapic support */ |
195 | (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15)) | 315 | nmi_watchdog = nmi; |
196 | nmi_watchdog = nmi; | ||
197 | if ((nmi == NMI_LOCAL_APIC) && | ||
198 | (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && | ||
199 | (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15)) | ||
200 | nmi_watchdog = nmi; | ||
201 | /* | ||
202 | * We can enable the IO-APIC watchdog | ||
203 | * unconditionally. | ||
204 | */ | ||
205 | if (nmi == NMI_IO_APIC) { | ||
206 | nmi_active = 1; | ||
207 | nmi_watchdog = nmi; | ||
208 | } | ||
209 | return 1; | 316 | return 1; |
210 | } | 317 | } |
211 | 318 | ||
212 | __setup("nmi_watchdog=", setup_nmi_watchdog); | 319 | __setup("nmi_watchdog=", setup_nmi_watchdog); |
213 | 320 | ||
214 | static void disable_intel_arch_watchdog(void); | ||
215 | |||
216 | static void disable_lapic_nmi_watchdog(void) | 321 | static void disable_lapic_nmi_watchdog(void) |
217 | { | 322 | { |
218 | if (nmi_active <= 0) | 323 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); |
324 | |||
325 | if (atomic_read(&nmi_active) <= 0) | ||
219 | return; | 326 | return; |
220 | switch (boot_cpu_data.x86_vendor) { | ||
221 | case X86_VENDOR_AMD: | ||
222 | wrmsr(MSR_K7_EVNTSEL0, 0, 0); | ||
223 | break; | ||
224 | case X86_VENDOR_INTEL: | ||
225 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
226 | disable_intel_arch_watchdog(); | ||
227 | break; | ||
228 | } | ||
229 | switch (boot_cpu_data.x86) { | ||
230 | case 6: | ||
231 | if (boot_cpu_data.x86_model > 0xd) | ||
232 | break; | ||
233 | 327 | ||
234 | wrmsr(MSR_P6_EVNTSEL0, 0, 0); | 328 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); |
235 | break; | ||
236 | case 15: | ||
237 | if (boot_cpu_data.x86_model > 0x4) | ||
238 | break; | ||
239 | 329 | ||
240 | wrmsr(MSR_P4_IQ_CCCR0, 0, 0); | 330 | BUG_ON(atomic_read(&nmi_active) != 0); |
241 | wrmsr(MSR_P4_CRU_ESCR0, 0, 0); | ||
242 | break; | ||
243 | } | ||
244 | break; | ||
245 | } | ||
246 | nmi_active = -1; | ||
247 | /* tell do_nmi() and others that we're not active any more */ | ||
248 | nmi_watchdog = 0; | ||
249 | } | 331 | } |
250 | 332 | ||
251 | static void enable_lapic_nmi_watchdog(void) | 333 | static void enable_lapic_nmi_watchdog(void) |
252 | { | 334 | { |
253 | if (nmi_active < 0) { | 335 | BUG_ON(nmi_watchdog != NMI_LOCAL_APIC); |
254 | nmi_watchdog = NMI_LOCAL_APIC; | ||
255 | setup_apic_nmi_watchdog(); | ||
256 | } | ||
257 | } | ||
258 | 336 | ||
259 | int reserve_lapic_nmi(void) | 337 | /* are we already enabled */ |
260 | { | 338 | if (atomic_read(&nmi_active) != 0) |
261 | unsigned int old_owner; | 339 | return; |
262 | |||
263 | spin_lock(&lapic_nmi_owner_lock); | ||
264 | old_owner = lapic_nmi_owner; | ||
265 | lapic_nmi_owner |= LAPIC_NMI_RESERVED; | ||
266 | spin_unlock(&lapic_nmi_owner_lock); | ||
267 | if (old_owner & LAPIC_NMI_RESERVED) | ||
268 | return -EBUSY; | ||
269 | if (old_owner & LAPIC_NMI_WATCHDOG) | ||
270 | disable_lapic_nmi_watchdog(); | ||
271 | return 0; | ||
272 | } | ||
273 | 340 | ||
274 | void release_lapic_nmi(void) | 341 | /* are we lapic aware */ |
275 | { | 342 | if (nmi_known_cpu() <= 0) |
276 | unsigned int new_owner; | 343 | return; |
277 | 344 | ||
278 | spin_lock(&lapic_nmi_owner_lock); | 345 | on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); |
279 | new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED; | 346 | touch_nmi_watchdog(); |
280 | lapic_nmi_owner = new_owner; | ||
281 | spin_unlock(&lapic_nmi_owner_lock); | ||
282 | if (new_owner & LAPIC_NMI_WATCHDOG) | ||
283 | enable_lapic_nmi_watchdog(); | ||
284 | } | 347 | } |
285 | 348 | ||
286 | void disable_timer_nmi_watchdog(void) | 349 | void disable_timer_nmi_watchdog(void) |
287 | { | 350 | { |
288 | if ((nmi_watchdog != NMI_IO_APIC) || (nmi_active <= 0)) | 351 | BUG_ON(nmi_watchdog != NMI_IO_APIC); |
352 | |||
353 | if (atomic_read(&nmi_active) <= 0) | ||
289 | return; | 354 | return; |
290 | 355 | ||
291 | unset_nmi_callback(); | 356 | disable_irq(0); |
292 | nmi_active = -1; | 357 | on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); |
293 | nmi_watchdog = NMI_NONE; | 358 | |
359 | BUG_ON(atomic_read(&nmi_active) != 0); | ||
294 | } | 360 | } |
295 | 361 | ||
296 | void enable_timer_nmi_watchdog(void) | 362 | void enable_timer_nmi_watchdog(void) |
297 | { | 363 | { |
298 | if (nmi_active < 0) { | 364 | BUG_ON(nmi_watchdog != NMI_IO_APIC); |
299 | nmi_watchdog = NMI_IO_APIC; | 365 | |
366 | if (atomic_read(&nmi_active) == 0) { | ||
300 | touch_nmi_watchdog(); | 367 | touch_nmi_watchdog(); |
301 | nmi_active = 1; | 368 | on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); |
369 | enable_irq(0); | ||
302 | } | 370 | } |
303 | } | 371 | } |
304 | 372 | ||
@@ -308,15 +376,20 @@ static int nmi_pm_active; /* nmi_active before suspend */ | |||
308 | 376 | ||
309 | static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) | 377 | static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) |
310 | { | 378 | { |
311 | nmi_pm_active = nmi_active; | 379 | /* only CPU0 goes here, other CPUs should be offline */ |
312 | disable_lapic_nmi_watchdog(); | 380 | nmi_pm_active = atomic_read(&nmi_active); |
381 | stop_apic_nmi_watchdog(NULL); | ||
382 | BUG_ON(atomic_read(&nmi_active) != 0); | ||
313 | return 0; | 383 | return 0; |
314 | } | 384 | } |
315 | 385 | ||
316 | static int lapic_nmi_resume(struct sys_device *dev) | 386 | static int lapic_nmi_resume(struct sys_device *dev) |
317 | { | 387 | { |
318 | if (nmi_pm_active > 0) | 388 | /* only CPU0 goes here, other CPUs should be offline */ |
319 | enable_lapic_nmi_watchdog(); | 389 | if (nmi_pm_active > 0) { |
390 | setup_apic_nmi_watchdog(NULL); | ||
391 | touch_nmi_watchdog(); | ||
392 | } | ||
320 | return 0; | 393 | return 0; |
321 | } | 394 | } |
322 | 395 | ||
@@ -336,7 +409,13 @@ static int __init init_lapic_nmi_sysfs(void) | |||
336 | { | 409 | { |
337 | int error; | 410 | int error; |
338 | 411 | ||
339 | if (nmi_active == 0 || nmi_watchdog != NMI_LOCAL_APIC) | 412 | /* should really be a BUG_ON but b/c this is an |
413 | * init call, it just doesn't work. -dcz | ||
414 | */ | ||
415 | if (nmi_watchdog != NMI_LOCAL_APIC) | ||
416 | return 0; | ||
417 | |||
418 | if ( atomic_read(&nmi_active) < 0 ) | ||
340 | return 0; | 419 | return 0; |
341 | 420 | ||
342 | error = sysdev_class_register(&nmi_sysclass); | 421 | error = sysdev_class_register(&nmi_sysclass); |
@@ -354,138 +433,269 @@ late_initcall(init_lapic_nmi_sysfs); | |||
354 | * Original code written by Keith Owens. | 433 | * Original code written by Keith Owens. |
355 | */ | 434 | */ |
356 | 435 | ||
357 | static void clear_msr_range(unsigned int base, unsigned int n) | 436 | static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr) |
358 | { | ||
359 | unsigned int i; | ||
360 | |||
361 | for(i = 0; i < n; ++i) | ||
362 | wrmsr(base+i, 0, 0); | ||
363 | } | ||
364 | |||
365 | static void write_watchdog_counter(const char *descr) | ||
366 | { | 437 | { |
367 | u64 count = (u64)cpu_khz * 1000; | 438 | u64 count = (u64)cpu_khz * 1000; |
368 | 439 | ||
369 | do_div(count, nmi_hz); | 440 | do_div(count, nmi_hz); |
370 | if(descr) | 441 | if(descr) |
371 | Dprintk("setting %s to -0x%08Lx\n", descr, count); | 442 | Dprintk("setting %s to -0x%08Lx\n", descr, count); |
372 | wrmsrl(nmi_perfctr_msr, 0 - count); | 443 | wrmsrl(perfctr_msr, 0 - count); |
373 | } | 444 | } |
374 | 445 | ||
375 | static void setup_k7_watchdog(void) | 446 | /* Note that these events don't tick when the CPU idles. This means |
447 | the frequency varies with CPU load. */ | ||
448 | |||
449 | #define K7_EVNTSEL_ENABLE (1 << 22) | ||
450 | #define K7_EVNTSEL_INT (1 << 20) | ||
451 | #define K7_EVNTSEL_OS (1 << 17) | ||
452 | #define K7_EVNTSEL_USR (1 << 16) | ||
453 | #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76 | ||
454 | #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING | ||
455 | |||
456 | static int setup_k7_watchdog(void) | ||
376 | { | 457 | { |
458 | unsigned int perfctr_msr, evntsel_msr; | ||
377 | unsigned int evntsel; | 459 | unsigned int evntsel; |
460 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
461 | |||
462 | perfctr_msr = MSR_K7_PERFCTR0; | ||
463 | evntsel_msr = MSR_K7_EVNTSEL0; | ||
464 | if (!reserve_perfctr_nmi(perfctr_msr)) | ||
465 | goto fail; | ||
378 | 466 | ||
379 | nmi_perfctr_msr = MSR_K7_PERFCTR0; | 467 | if (!reserve_evntsel_nmi(evntsel_msr)) |
468 | goto fail1; | ||
380 | 469 | ||
381 | clear_msr_range(MSR_K7_EVNTSEL0, 4); | 470 | wrmsrl(perfctr_msr, 0UL); |
382 | clear_msr_range(MSR_K7_PERFCTR0, 4); | ||
383 | 471 | ||
384 | evntsel = K7_EVNTSEL_INT | 472 | evntsel = K7_EVNTSEL_INT |
385 | | K7_EVNTSEL_OS | 473 | | K7_EVNTSEL_OS |
386 | | K7_EVNTSEL_USR | 474 | | K7_EVNTSEL_USR |
387 | | K7_NMI_EVENT; | 475 | | K7_NMI_EVENT; |
388 | 476 | ||
389 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); | 477 | /* setup the timer */ |
390 | write_watchdog_counter("K7_PERFCTR0"); | 478 | wrmsr(evntsel_msr, evntsel, 0); |
479 | write_watchdog_counter(perfctr_msr, "K7_PERFCTR0"); | ||
391 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 480 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
392 | evntsel |= K7_EVNTSEL_ENABLE; | 481 | evntsel |= K7_EVNTSEL_ENABLE; |
393 | wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); | 482 | wrmsr(evntsel_msr, evntsel, 0); |
483 | |||
484 | wd->perfctr_msr = perfctr_msr; | ||
485 | wd->evntsel_msr = evntsel_msr; | ||
486 | wd->cccr_msr = 0; //unused | ||
487 | wd->check_bit = 1ULL<<63; | ||
488 | return 1; | ||
489 | fail1: | ||
490 | release_perfctr_nmi(perfctr_msr); | ||
491 | fail: | ||
492 | return 0; | ||
493 | } | ||
494 | |||
495 | static void stop_k7_watchdog(void) | ||
496 | { | ||
497 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
498 | |||
499 | wrmsr(wd->evntsel_msr, 0, 0); | ||
500 | |||
501 | release_evntsel_nmi(wd->evntsel_msr); | ||
502 | release_perfctr_nmi(wd->perfctr_msr); | ||
394 | } | 503 | } |
395 | 504 | ||
396 | static void setup_p6_watchdog(void) | 505 | #define P6_EVNTSEL0_ENABLE (1 << 22) |
506 | #define P6_EVNTSEL_INT (1 << 20) | ||
507 | #define P6_EVNTSEL_OS (1 << 17) | ||
508 | #define P6_EVNTSEL_USR (1 << 16) | ||
509 | #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 | ||
510 | #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED | ||
511 | |||
512 | static int setup_p6_watchdog(void) | ||
397 | { | 513 | { |
514 | unsigned int perfctr_msr, evntsel_msr; | ||
398 | unsigned int evntsel; | 515 | unsigned int evntsel; |
516 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
517 | |||
518 | perfctr_msr = MSR_P6_PERFCTR0; | ||
519 | evntsel_msr = MSR_P6_EVNTSEL0; | ||
520 | if (!reserve_perfctr_nmi(perfctr_msr)) | ||
521 | goto fail; | ||
399 | 522 | ||
400 | nmi_perfctr_msr = MSR_P6_PERFCTR0; | 523 | if (!reserve_evntsel_nmi(evntsel_msr)) |
524 | goto fail1; | ||
401 | 525 | ||
402 | clear_msr_range(MSR_P6_EVNTSEL0, 2); | 526 | wrmsrl(perfctr_msr, 0UL); |
403 | clear_msr_range(MSR_P6_PERFCTR0, 2); | ||
404 | 527 | ||
405 | evntsel = P6_EVNTSEL_INT | 528 | evntsel = P6_EVNTSEL_INT |
406 | | P6_EVNTSEL_OS | 529 | | P6_EVNTSEL_OS |
407 | | P6_EVNTSEL_USR | 530 | | P6_EVNTSEL_USR |
408 | | P6_NMI_EVENT; | 531 | | P6_NMI_EVENT; |
409 | 532 | ||
410 | wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); | 533 | /* setup the timer */ |
411 | write_watchdog_counter("P6_PERFCTR0"); | 534 | wrmsr(evntsel_msr, evntsel, 0); |
535 | write_watchdog_counter(perfctr_msr, "P6_PERFCTR0"); | ||
412 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 536 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
413 | evntsel |= P6_EVNTSEL0_ENABLE; | 537 | evntsel |= P6_EVNTSEL0_ENABLE; |
414 | wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); | 538 | wrmsr(evntsel_msr, evntsel, 0); |
539 | |||
540 | wd->perfctr_msr = perfctr_msr; | ||
541 | wd->evntsel_msr = evntsel_msr; | ||
542 | wd->cccr_msr = 0; //unused | ||
543 | wd->check_bit = 1ULL<<39; | ||
544 | return 1; | ||
545 | fail1: | ||
546 | release_perfctr_nmi(perfctr_msr); | ||
547 | fail: | ||
548 | return 0; | ||
549 | } | ||
550 | |||
551 | static void stop_p6_watchdog(void) | ||
552 | { | ||
553 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
554 | |||
555 | wrmsr(wd->evntsel_msr, 0, 0); | ||
556 | |||
557 | release_evntsel_nmi(wd->evntsel_msr); | ||
558 | release_perfctr_nmi(wd->perfctr_msr); | ||
415 | } | 559 | } |
416 | 560 | ||
561 | /* Note that these events don't tick when the CPU idles. This means | ||
562 | the frequency varies with CPU load. */ | ||
563 | |||
564 | #define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) | ||
565 | #define P4_ESCR_EVENT_SELECT(N) ((N)<<25) | ||
566 | #define P4_ESCR_OS (1<<3) | ||
567 | #define P4_ESCR_USR (1<<2) | ||
568 | #define P4_CCCR_OVF_PMI0 (1<<26) | ||
569 | #define P4_CCCR_OVF_PMI1 (1<<27) | ||
570 | #define P4_CCCR_THRESHOLD(N) ((N)<<20) | ||
571 | #define P4_CCCR_COMPLEMENT (1<<19) | ||
572 | #define P4_CCCR_COMPARE (1<<18) | ||
573 | #define P4_CCCR_REQUIRED (3<<16) | ||
574 | #define P4_CCCR_ESCR_SELECT(N) ((N)<<13) | ||
575 | #define P4_CCCR_ENABLE (1<<12) | ||
576 | #define P4_CCCR_OVF (1<<31) | ||
577 | /* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter | ||
578 | CRU_ESCR0 (with any non-null event selector) through a complemented | ||
579 | max threshold. [IA32-Vol3, Section 14.9.9] */ | ||
580 | |||
417 | static int setup_p4_watchdog(void) | 581 | static int setup_p4_watchdog(void) |
418 | { | 582 | { |
583 | unsigned int perfctr_msr, evntsel_msr, cccr_msr; | ||
584 | unsigned int evntsel, cccr_val; | ||
419 | unsigned int misc_enable, dummy; | 585 | unsigned int misc_enable, dummy; |
586 | unsigned int ht_num; | ||
587 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
420 | 588 | ||
421 | rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy); | 589 | rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy); |
422 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) | 590 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) |
423 | return 0; | 591 | return 0; |
424 | 592 | ||
425 | nmi_perfctr_msr = MSR_P4_IQ_COUNTER0; | ||
426 | nmi_p4_cccr_val = P4_NMI_IQ_CCCR0; | ||
427 | #ifdef CONFIG_SMP | 593 | #ifdef CONFIG_SMP |
428 | if (smp_num_siblings == 2) | 594 | /* detect which hyperthread we are on */ |
429 | nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1; | 595 | if (smp_num_siblings == 2) { |
596 | unsigned int ebx, apicid; | ||
597 | |||
598 | ebx = cpuid_ebx(1); | ||
599 | apicid = (ebx >> 24) & 0xff; | ||
600 | ht_num = apicid & 1; | ||
601 | } else | ||
430 | #endif | 602 | #endif |
603 | ht_num = 0; | ||
431 | 604 | ||
432 | if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL)) | 605 | /* performance counters are shared resources |
433 | clear_msr_range(0x3F1, 2); | 606 | * assign each hyperthread its own set |
434 | /* MSR 0x3F0 seems to have a default value of 0xFC00, but current | 607 | * (re-use the ESCR0 register, seems safe |
435 | docs doesn't fully define it, so leave it alone for now. */ | 608 | * and keeps the cccr_val the same) |
436 | if (boot_cpu_data.x86_model >= 0x3) { | 609 | */ |
437 | /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */ | 610 | if (!ht_num) { |
438 | clear_msr_range(0x3A0, 26); | 611 | /* logical cpu 0 */ |
439 | clear_msr_range(0x3BC, 3); | 612 | perfctr_msr = MSR_P4_IQ_PERFCTR0; |
613 | evntsel_msr = MSR_P4_CRU_ESCR0; | ||
614 | cccr_msr = MSR_P4_IQ_CCCR0; | ||
615 | cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4); | ||
440 | } else { | 616 | } else { |
441 | clear_msr_range(0x3A0, 31); | 617 | /* logical cpu 1 */ |
618 | perfctr_msr = MSR_P4_IQ_PERFCTR1; | ||
619 | evntsel_msr = MSR_P4_CRU_ESCR0; | ||
620 | cccr_msr = MSR_P4_IQ_CCCR1; | ||
621 | cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4); | ||
442 | } | 622 | } |
443 | clear_msr_range(0x3C0, 6); | 623 | |
444 | clear_msr_range(0x3C8, 6); | 624 | if (!reserve_perfctr_nmi(perfctr_msr)) |
445 | clear_msr_range(0x3E0, 2); | 625 | goto fail; |
446 | clear_msr_range(MSR_P4_CCCR0, 18); | 626 | |
447 | clear_msr_range(MSR_P4_PERFCTR0, 18); | 627 | if (!reserve_evntsel_nmi(evntsel_msr)) |
448 | 628 | goto fail1; | |
449 | wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); | 629 | |
450 | wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); | 630 | evntsel = P4_ESCR_EVENT_SELECT(0x3F) |
451 | write_watchdog_counter("P4_IQ_COUNTER0"); | 631 | | P4_ESCR_OS |
632 | | P4_ESCR_USR; | ||
633 | |||
634 | cccr_val |= P4_CCCR_THRESHOLD(15) | ||
635 | | P4_CCCR_COMPLEMENT | ||
636 | | P4_CCCR_COMPARE | ||
637 | | P4_CCCR_REQUIRED; | ||
638 | |||
639 | wrmsr(evntsel_msr, evntsel, 0); | ||
640 | wrmsr(cccr_msr, cccr_val, 0); | ||
641 | write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0"); | ||
452 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 642 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
453 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); | 643 | cccr_val |= P4_CCCR_ENABLE; |
644 | wrmsr(cccr_msr, cccr_val, 0); | ||
645 | wd->perfctr_msr = perfctr_msr; | ||
646 | wd->evntsel_msr = evntsel_msr; | ||
647 | wd->cccr_msr = cccr_msr; | ||
648 | wd->check_bit = 1ULL<<39; | ||
454 | return 1; | 649 | return 1; |
650 | fail1: | ||
651 | release_perfctr_nmi(perfctr_msr); | ||
652 | fail: | ||
653 | return 0; | ||
455 | } | 654 | } |
456 | 655 | ||
457 | static void disable_intel_arch_watchdog(void) | 656 | static void stop_p4_watchdog(void) |
458 | { | 657 | { |
459 | unsigned ebx; | 658 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); |
460 | 659 | ||
461 | /* | 660 | wrmsr(wd->cccr_msr, 0, 0); |
462 | * Check whether the Architectural PerfMon supports | 661 | wrmsr(wd->evntsel_msr, 0, 0); |
463 | * Unhalted Core Cycles Event or not. | 662 | |
464 | * NOTE: Corresponding bit = 0 in ebp indicates event present. | 663 | release_evntsel_nmi(wd->evntsel_msr); |
465 | */ | 664 | release_perfctr_nmi(wd->perfctr_msr); |
466 | ebx = cpuid_ebx(10); | ||
467 | if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | ||
468 | wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0); | ||
469 | } | 665 | } |
470 | 666 | ||
667 | #define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL | ||
668 | #define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK | ||
669 | |||
471 | static int setup_intel_arch_watchdog(void) | 670 | static int setup_intel_arch_watchdog(void) |
472 | { | 671 | { |
672 | unsigned int ebx; | ||
673 | union cpuid10_eax eax; | ||
674 | unsigned int unused; | ||
675 | unsigned int perfctr_msr, evntsel_msr; | ||
473 | unsigned int evntsel; | 676 | unsigned int evntsel; |
474 | unsigned ebx; | 677 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); |
475 | 678 | ||
476 | /* | 679 | /* |
477 | * Check whether the Architectural PerfMon supports | 680 | * Check whether the Architectural PerfMon supports |
478 | * Unhalted Core Cycles Event or not. | 681 | * Unhalted Core Cycles Event or not. |
479 | * NOTE: Corresponding bit = 0 in ebp indicates event present. | 682 | * NOTE: Corresponding bit = 0 in ebx indicates event present. |
480 | */ | 683 | */ |
481 | ebx = cpuid_ebx(10); | 684 | cpuid(10, &(eax.full), &ebx, &unused, &unused); |
482 | if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | 685 | if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || |
483 | return 0; | 686 | (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) |
687 | goto fail; | ||
688 | |||
689 | perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; | ||
690 | evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0; | ||
484 | 691 | ||
485 | nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; | 692 | if (!reserve_perfctr_nmi(perfctr_msr)) |
693 | goto fail; | ||
486 | 694 | ||
487 | clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2); | 695 | if (!reserve_evntsel_nmi(evntsel_msr)) |
488 | clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2); | 696 | goto fail1; |
697 | |||
698 | wrmsrl(perfctr_msr, 0UL); | ||
489 | 699 | ||
490 | evntsel = ARCH_PERFMON_EVENTSEL_INT | 700 | evntsel = ARCH_PERFMON_EVENTSEL_INT |
491 | | ARCH_PERFMON_EVENTSEL_OS | 701 | | ARCH_PERFMON_EVENTSEL_OS |
@@ -493,51 +703,145 @@ static int setup_intel_arch_watchdog(void) | |||
493 | | ARCH_PERFMON_NMI_EVENT_SEL | 703 | | ARCH_PERFMON_NMI_EVENT_SEL |
494 | | ARCH_PERFMON_NMI_EVENT_UMASK; | 704 | | ARCH_PERFMON_NMI_EVENT_UMASK; |
495 | 705 | ||
496 | wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); | 706 | /* setup the timer */ |
497 | write_watchdog_counter("INTEL_ARCH_PERFCTR0"); | 707 | wrmsr(evntsel_msr, evntsel, 0); |
708 | write_watchdog_counter(perfctr_msr, "INTEL_ARCH_PERFCTR0"); | ||
498 | apic_write(APIC_LVTPC, APIC_DM_NMI); | 709 | apic_write(APIC_LVTPC, APIC_DM_NMI); |
499 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; | 710 | evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; |
500 | wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); | 711 | wrmsr(evntsel_msr, evntsel, 0); |
712 | |||
713 | wd->perfctr_msr = perfctr_msr; | ||
714 | wd->evntsel_msr = evntsel_msr; | ||
715 | wd->cccr_msr = 0; //unused | ||
716 | wd->check_bit = 1ULL << (eax.split.bit_width - 1); | ||
501 | return 1; | 717 | return 1; |
718 | fail1: | ||
719 | release_perfctr_nmi(perfctr_msr); | ||
720 | fail: | ||
721 | return 0; | ||
502 | } | 722 | } |
503 | 723 | ||
504 | void setup_apic_nmi_watchdog (void) | 724 | static void stop_intel_arch_watchdog(void) |
505 | { | 725 | { |
506 | switch (boot_cpu_data.x86_vendor) { | 726 | unsigned int ebx; |
507 | case X86_VENDOR_AMD: | 727 | union cpuid10_eax eax; |
508 | if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15) | 728 | unsigned int unused; |
509 | return; | 729 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); |
510 | setup_k7_watchdog(); | 730 | |
511 | break; | 731 | /* |
512 | case X86_VENDOR_INTEL: | 732 | * Check whether the Architectural PerfMon supports |
513 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | 733 | * Unhalted Core Cycles Event or not. |
514 | if (!setup_intel_arch_watchdog()) | 734 | * NOTE: Corresponding bit = 0 in ebx indicates event present. |
735 | */ | ||
736 | cpuid(10, &(eax.full), &ebx, &unused, &unused); | ||
737 | if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) || | ||
738 | (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) | ||
739 | return; | ||
740 | |||
741 | wrmsr(wd->evntsel_msr, 0, 0); | ||
742 | release_evntsel_nmi(wd->evntsel_msr); | ||
743 | release_perfctr_nmi(wd->perfctr_msr); | ||
744 | } | ||
745 | |||
746 | void setup_apic_nmi_watchdog (void *unused) | ||
747 | { | ||
748 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
749 | |||
750 | /* only support LOCAL and IO APICs for now */ | ||
751 | if ((nmi_watchdog != NMI_LOCAL_APIC) && | ||
752 | (nmi_watchdog != NMI_IO_APIC)) | ||
753 | return; | ||
754 | |||
755 | if (wd->enabled == 1) | ||
756 | return; | ||
757 | |||
758 | /* cheap hack to support suspend/resume */ | ||
759 | /* if cpu0 is not active neither should the other cpus */ | ||
760 | if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0)) | ||
761 | return; | ||
762 | |||
763 | if (nmi_watchdog == NMI_LOCAL_APIC) { | ||
764 | switch (boot_cpu_data.x86_vendor) { | ||
765 | case X86_VENDOR_AMD: | ||
766 | if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15) | ||
515 | return; | 767 | return; |
516 | break; | 768 | if (!setup_k7_watchdog()) |
517 | } | ||
518 | switch (boot_cpu_data.x86) { | ||
519 | case 6: | ||
520 | if (boot_cpu_data.x86_model > 0xd) | ||
521 | return; | 769 | return; |
522 | |||
523 | setup_p6_watchdog(); | ||
524 | break; | 770 | break; |
525 | case 15: | 771 | case X86_VENDOR_INTEL: |
526 | if (boot_cpu_data.x86_model > 0x4) | 772 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { |
527 | return; | 773 | if (!setup_intel_arch_watchdog()) |
774 | return; | ||
775 | break; | ||
776 | } | ||
777 | switch (boot_cpu_data.x86) { | ||
778 | case 6: | ||
779 | if (boot_cpu_data.x86_model > 0xd) | ||
780 | return; | ||
781 | |||
782 | if (!setup_p6_watchdog()) | ||
783 | return; | ||
784 | break; | ||
785 | case 15: | ||
786 | if (boot_cpu_data.x86_model > 0x4) | ||
787 | return; | ||
528 | 788 | ||
529 | if (!setup_p4_watchdog()) | 789 | if (!setup_p4_watchdog()) |
790 | return; | ||
791 | break; | ||
792 | default: | ||
530 | return; | 793 | return; |
794 | } | ||
531 | break; | 795 | break; |
532 | default: | 796 | default: |
533 | return; | 797 | return; |
534 | } | 798 | } |
535 | break; | 799 | } |
536 | default: | 800 | wd->enabled = 1; |
801 | atomic_inc(&nmi_active); | ||
802 | } | ||
803 | |||
804 | void stop_apic_nmi_watchdog(void *unused) | ||
805 | { | ||
806 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
807 | |||
808 | /* only support LOCAL and IO APICs for now */ | ||
809 | if ((nmi_watchdog != NMI_LOCAL_APIC) && | ||
810 | (nmi_watchdog != NMI_IO_APIC)) | ||
811 | return; | ||
812 | |||
813 | if (wd->enabled == 0) | ||
537 | return; | 814 | return; |
815 | |||
816 | if (nmi_watchdog == NMI_LOCAL_APIC) { | ||
817 | switch (boot_cpu_data.x86_vendor) { | ||
818 | case X86_VENDOR_AMD: | ||
819 | stop_k7_watchdog(); | ||
820 | break; | ||
821 | case X86_VENDOR_INTEL: | ||
822 | if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { | ||
823 | stop_intel_arch_watchdog(); | ||
824 | break; | ||
825 | } | ||
826 | switch (boot_cpu_data.x86) { | ||
827 | case 6: | ||
828 | if (boot_cpu_data.x86_model > 0xd) | ||
829 | break; | ||
830 | stop_p6_watchdog(); | ||
831 | break; | ||
832 | case 15: | ||
833 | if (boot_cpu_data.x86_model > 0x4) | ||
834 | break; | ||
835 | stop_p4_watchdog(); | ||
836 | break; | ||
837 | } | ||
838 | break; | ||
839 | default: | ||
840 | return; | ||
841 | } | ||
538 | } | 842 | } |
539 | lapic_nmi_owner = LAPIC_NMI_WATCHDOG; | 843 | wd->enabled = 0; |
540 | nmi_active = 1; | 844 | atomic_dec(&nmi_active); |
541 | } | 845 | } |
542 | 846 | ||
543 | /* | 847 | /* |
@@ -579,7 +883,7 @@ EXPORT_SYMBOL(touch_nmi_watchdog); | |||
579 | 883 | ||
580 | extern void die_nmi(struct pt_regs *, const char *msg); | 884 | extern void die_nmi(struct pt_regs *, const char *msg); |
581 | 885 | ||
582 | void nmi_watchdog_tick (struct pt_regs * regs) | 886 | __kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) |
583 | { | 887 | { |
584 | 888 | ||
585 | /* | 889 | /* |
@@ -588,11 +892,23 @@ void nmi_watchdog_tick (struct pt_regs * regs) | |||
588 | * smp_processor_id(). | 892 | * smp_processor_id(). |
589 | */ | 893 | */ |
590 | unsigned int sum; | 894 | unsigned int sum; |
895 | int touched = 0; | ||
591 | int cpu = smp_processor_id(); | 896 | int cpu = smp_processor_id(); |
897 | struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk); | ||
898 | u64 dummy; | ||
899 | int rc=0; | ||
900 | |||
901 | /* check for other users first */ | ||
902 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) | ||
903 | == NOTIFY_STOP) { | ||
904 | rc = 1; | ||
905 | touched = 1; | ||
906 | } | ||
592 | 907 | ||
593 | sum = per_cpu(irq_stat, cpu).apic_timer_irqs; | 908 | sum = per_cpu(irq_stat, cpu).apic_timer_irqs; |
594 | 909 | ||
595 | if (last_irq_sums[cpu] == sum) { | 910 | /* if the apic timer isn't firing, this cpu isn't doing much */ |
911 | if (!touched && last_irq_sums[cpu] == sum) { | ||
596 | /* | 912 | /* |
597 | * Ayiee, looks like this CPU is stuck ... | 913 | * Ayiee, looks like this CPU is stuck ... |
598 | * wait a few IRQs (5 seconds) before doing the oops ... | 914 | * wait a few IRQs (5 seconds) before doing the oops ... |
@@ -607,27 +923,59 @@ void nmi_watchdog_tick (struct pt_regs * regs) | |||
607 | last_irq_sums[cpu] = sum; | 923 | last_irq_sums[cpu] = sum; |
608 | alert_counter[cpu] = 0; | 924 | alert_counter[cpu] = 0; |
609 | } | 925 | } |
610 | if (nmi_perfctr_msr) { | 926 | /* see if the nmi watchdog went off */ |
611 | if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) { | 927 | if (wd->enabled) { |
612 | /* | 928 | if (nmi_watchdog == NMI_LOCAL_APIC) { |
613 | * P4 quirks: | 929 | rdmsrl(wd->perfctr_msr, dummy); |
614 | * - An overflown perfctr will assert its interrupt | 930 | if (dummy & wd->check_bit){ |
615 | * until the OVF flag in its CCCR is cleared. | 931 | /* this wasn't a watchdog timer interrupt */ |
616 | * - LVTPC is masked on interrupt and must be | 932 | goto done; |
617 | * unmasked by the LVTPC handler. | 933 | } |
934 | |||
935 | /* only Intel P4 uses the cccr msr */ | ||
936 | if (wd->cccr_msr != 0) { | ||
937 | /* | ||
938 | * P4 quirks: | ||
939 | * - An overflown perfctr will assert its interrupt | ||
940 | * until the OVF flag in its CCCR is cleared. | ||
941 | * - LVTPC is masked on interrupt and must be | ||
942 | * unmasked by the LVTPC handler. | ||
943 | */ | ||
944 | rdmsrl(wd->cccr_msr, dummy); | ||
945 | dummy &= ~P4_CCCR_OVF; | ||
946 | wrmsrl(wd->cccr_msr, dummy); | ||
947 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
948 | } | ||
949 | else if (wd->perfctr_msr == MSR_P6_PERFCTR0 || | ||
950 | wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { | ||
951 | /* P6 based Pentium M need to re-unmask | ||
952 | * the apic vector but it doesn't hurt | ||
953 | * other P6 variant. | ||
954 | * ArchPerfom/Core Duo also needs this */ | ||
955 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
956 | } | ||
957 | /* start the cycle over again */ | ||
958 | write_watchdog_counter(wd->perfctr_msr, NULL); | ||
959 | rc = 1; | ||
960 | } else if (nmi_watchdog == NMI_IO_APIC) { | ||
961 | /* don't know how to accurately check for this. | ||
962 | * just assume it was a watchdog timer interrupt | ||
963 | * This matches the old behaviour. | ||
618 | */ | 964 | */ |
619 | wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); | 965 | rc = 1; |
620 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
621 | } | 966 | } |
622 | else if (nmi_perfctr_msr == MSR_P6_PERFCTR0 || | ||
623 | nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) { | ||
624 | /* Only P6 based Pentium M need to re-unmask | ||
625 | * the apic vector but it doesn't hurt | ||
626 | * other P6 variant */ | ||
627 | apic_write(APIC_LVTPC, APIC_DM_NMI); | ||
628 | } | ||
629 | write_watchdog_counter(NULL); | ||
630 | } | 967 | } |
968 | done: | ||
969 | return rc; | ||
970 | } | ||
971 | |||
972 | int do_nmi_callback(struct pt_regs * regs, int cpu) | ||
973 | { | ||
974 | #ifdef CONFIG_SYSCTL | ||
975 | if (unknown_nmi_panic) | ||
976 | return unknown_nmi_panic_callback(regs, cpu); | ||
977 | #endif | ||
978 | return 0; | ||
631 | } | 979 | } |
632 | 980 | ||
633 | #ifdef CONFIG_SYSCTL | 981 | #ifdef CONFIG_SYSCTL |
@@ -637,36 +985,46 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) | |||
637 | unsigned char reason = get_nmi_reason(); | 985 | unsigned char reason = get_nmi_reason(); |
638 | char buf[64]; | 986 | char buf[64]; |
639 | 987 | ||
640 | if (!(reason & 0xc0)) { | 988 | sprintf(buf, "NMI received for unknown reason %02x\n", reason); |
641 | sprintf(buf, "NMI received for unknown reason %02x\n", reason); | 989 | die_nmi(regs, buf); |
642 | die_nmi(regs, buf); | ||
643 | } | ||
644 | return 0; | 990 | return 0; |
645 | } | 991 | } |
646 | 992 | ||
647 | /* | 993 | /* |
648 | * proc handler for /proc/sys/kernel/unknown_nmi_panic | 994 | * proc handler for /proc/sys/kernel/nmi |
649 | */ | 995 | */ |
650 | int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file, | 996 | int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file, |
651 | void __user *buffer, size_t *length, loff_t *ppos) | 997 | void __user *buffer, size_t *length, loff_t *ppos) |
652 | { | 998 | { |
653 | int old_state; | 999 | int old_state; |
654 | 1000 | ||
655 | old_state = unknown_nmi_panic; | 1001 | nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0; |
1002 | old_state = nmi_watchdog_enabled; | ||
656 | proc_dointvec(table, write, file, buffer, length, ppos); | 1003 | proc_dointvec(table, write, file, buffer, length, ppos); |
657 | if (!!old_state == !!unknown_nmi_panic) | 1004 | if (!!old_state == !!nmi_watchdog_enabled) |
658 | return 0; | 1005 | return 0; |
659 | 1006 | ||
660 | if (unknown_nmi_panic) { | 1007 | if (atomic_read(&nmi_active) < 0) { |
661 | if (reserve_lapic_nmi() < 0) { | 1008 | printk( KERN_WARNING "NMI watchdog is permanently disabled\n"); |
662 | unknown_nmi_panic = 0; | 1009 | return -EIO; |
663 | return -EBUSY; | 1010 | } |
664 | } else { | 1011 | |
665 | set_nmi_callback(unknown_nmi_panic_callback); | 1012 | if (nmi_watchdog == NMI_DEFAULT) { |
666 | } | 1013 | if (nmi_known_cpu() > 0) |
1014 | nmi_watchdog = NMI_LOCAL_APIC; | ||
1015 | else | ||
1016 | nmi_watchdog = NMI_IO_APIC; | ||
1017 | } | ||
1018 | |||
1019 | if (nmi_watchdog == NMI_LOCAL_APIC) { | ||
1020 | if (nmi_watchdog_enabled) | ||
1021 | enable_lapic_nmi_watchdog(); | ||
1022 | else | ||
1023 | disable_lapic_nmi_watchdog(); | ||
667 | } else { | 1024 | } else { |
668 | release_lapic_nmi(); | 1025 | printk( KERN_WARNING |
669 | unset_nmi_callback(); | 1026 | "NMI watchdog doesn't know what hardware to touch\n"); |
1027 | return -EIO; | ||
670 | } | 1028 | } |
671 | return 0; | 1029 | return 0; |
672 | } | 1030 | } |
@@ -675,7 +1033,11 @@ int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file, | |||
675 | 1033 | ||
676 | EXPORT_SYMBOL(nmi_active); | 1034 | EXPORT_SYMBOL(nmi_active); |
677 | EXPORT_SYMBOL(nmi_watchdog); | 1035 | EXPORT_SYMBOL(nmi_watchdog); |
678 | EXPORT_SYMBOL(reserve_lapic_nmi); | 1036 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi); |
679 | EXPORT_SYMBOL(release_lapic_nmi); | 1037 | EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); |
1038 | EXPORT_SYMBOL(reserve_perfctr_nmi); | ||
1039 | EXPORT_SYMBOL(release_perfctr_nmi); | ||
1040 | EXPORT_SYMBOL(reserve_evntsel_nmi); | ||
1041 | EXPORT_SYMBOL(release_evntsel_nmi); | ||
680 | EXPORT_SYMBOL(disable_timer_nmi_watchdog); | 1042 | EXPORT_SYMBOL(disable_timer_nmi_watchdog); |
681 | EXPORT_SYMBOL(enable_timer_nmi_watchdog); | 1043 | EXPORT_SYMBOL(enable_timer_nmi_watchdog); |
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c index 8657c739656a..8c190ca7ae44 100644 --- a/arch/i386/kernel/process.c +++ b/arch/i386/kernel/process.c | |||
@@ -37,6 +37,7 @@ | |||
37 | #include <linux/kallsyms.h> | 37 | #include <linux/kallsyms.h> |
38 | #include <linux/ptrace.h> | 38 | #include <linux/ptrace.h> |
39 | #include <linux/random.h> | 39 | #include <linux/random.h> |
40 | #include <linux/personality.h> | ||
40 | 41 | ||
41 | #include <asm/uaccess.h> | 42 | #include <asm/uaccess.h> |
42 | #include <asm/pgtable.h> | 43 | #include <asm/pgtable.h> |
@@ -320,15 +321,6 @@ void show_regs(struct pt_regs * regs) | |||
320 | * the "args". | 321 | * the "args". |
321 | */ | 322 | */ |
322 | extern void kernel_thread_helper(void); | 323 | extern void kernel_thread_helper(void); |
323 | __asm__(".section .text\n" | ||
324 | ".align 4\n" | ||
325 | "kernel_thread_helper:\n\t" | ||
326 | "movl %edx,%eax\n\t" | ||
327 | "pushl %edx\n\t" | ||
328 | "call *%ebx\n\t" | ||
329 | "pushl %eax\n\t" | ||
330 | "call do_exit\n" | ||
331 | ".previous"); | ||
332 | 324 | ||
333 | /* | 325 | /* |
334 | * Create a kernel thread | 326 | * Create a kernel thread |
@@ -346,7 +338,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags) | |||
346 | regs.xes = __USER_DS; | 338 | regs.xes = __USER_DS; |
347 | regs.orig_eax = -1; | 339 | regs.orig_eax = -1; |
348 | regs.eip = (unsigned long) kernel_thread_helper; | 340 | regs.eip = (unsigned long) kernel_thread_helper; |
349 | regs.xcs = __KERNEL_CS; | 341 | regs.xcs = __KERNEL_CS | get_kernel_rpl(); |
350 | regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; | 342 | regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; |
351 | 343 | ||
352 | /* Ok, create the new process.. */ | 344 | /* Ok, create the new process.. */ |
@@ -905,7 +897,7 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *u_info) | |||
905 | 897 | ||
906 | unsigned long arch_align_stack(unsigned long sp) | 898 | unsigned long arch_align_stack(unsigned long sp) |
907 | { | 899 | { |
908 | if (randomize_va_space) | 900 | if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space) |
909 | sp -= get_random_int() % 8192; | 901 | sp -= get_random_int() % 8192; |
910 | return sp & ~0xf; | 902 | return sp & ~0xf; |
911 | } | 903 | } |
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c index d3db03f4085d..775f50e9395b 100644 --- a/arch/i386/kernel/ptrace.c +++ b/arch/i386/kernel/ptrace.c | |||
@@ -185,17 +185,17 @@ static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_ | |||
185 | return addr; | 185 | return addr; |
186 | } | 186 | } |
187 | 187 | ||
188 | static inline int is_at_popf(struct task_struct *child, struct pt_regs *regs) | 188 | static inline int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs) |
189 | { | 189 | { |
190 | int i, copied; | 190 | int i, copied; |
191 | unsigned char opcode[16]; | 191 | unsigned char opcode[15]; |
192 | unsigned long addr = convert_eip_to_linear(child, regs); | 192 | unsigned long addr = convert_eip_to_linear(child, regs); |
193 | 193 | ||
194 | copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); | 194 | copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); |
195 | for (i = 0; i < copied; i++) { | 195 | for (i = 0; i < copied; i++) { |
196 | switch (opcode[i]) { | 196 | switch (opcode[i]) { |
197 | /* popf */ | 197 | /* popf and iret */ |
198 | case 0x9d: | 198 | case 0x9d: case 0xcf: |
199 | return 1; | 199 | return 1; |
200 | /* opcode and address size prefixes */ | 200 | /* opcode and address size prefixes */ |
201 | case 0x66: case 0x67: | 201 | case 0x66: case 0x67: |
@@ -247,7 +247,7 @@ static void set_singlestep(struct task_struct *child) | |||
247 | * don't mark it as being "us" that set it, so that we | 247 | * don't mark it as being "us" that set it, so that we |
248 | * won't clear it by hand later. | 248 | * won't clear it by hand later. |
249 | */ | 249 | */ |
250 | if (is_at_popf(child, regs)) | 250 | if (is_setting_trap_flag(child, regs)) |
251 | return; | 251 | return; |
252 | 252 | ||
253 | child->ptrace |= PT_DTRACE; | 253 | child->ptrace |= PT_DTRACE; |
diff --git a/arch/i386/kernel/relocate_kernel.S b/arch/i386/kernel/relocate_kernel.S index d312616effa1..f151d6fae462 100644 --- a/arch/i386/kernel/relocate_kernel.S +++ b/arch/i386/kernel/relocate_kernel.S | |||
@@ -7,16 +7,138 @@ | |||
7 | */ | 7 | */ |
8 | 8 | ||
9 | #include <linux/linkage.h> | 9 | #include <linux/linkage.h> |
10 | #include <asm/page.h> | ||
11 | #include <asm/kexec.h> | ||
12 | |||
13 | /* | ||
14 | * Must be relocatable PIC code callable as a C function | ||
15 | */ | ||
16 | |||
17 | #define PTR(x) (x << 2) | ||
18 | #define PAGE_ALIGNED (1 << PAGE_SHIFT) | ||
19 | #define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */ | ||
20 | #define PAE_PGD_ATTR 0x01 /* _PAGE_PRESENT */ | ||
21 | |||
22 | .text | ||
23 | .align PAGE_ALIGNED | ||
24 | .globl relocate_kernel | ||
25 | relocate_kernel: | ||
26 | movl 8(%esp), %ebp /* list of pages */ | ||
27 | |||
28 | #ifdef CONFIG_X86_PAE | ||
29 | /* map the control page at its virtual address */ | ||
30 | |||
31 | movl PTR(VA_PGD)(%ebp), %edi | ||
32 | movl PTR(VA_CONTROL_PAGE)(%ebp), %eax | ||
33 | andl $0xc0000000, %eax | ||
34 | shrl $27, %eax | ||
35 | addl %edi, %eax | ||
36 | |||
37 | movl PTR(PA_PMD_0)(%ebp), %edx | ||
38 | orl $PAE_PGD_ATTR, %edx | ||
39 | movl %edx, (%eax) | ||
40 | |||
41 | movl PTR(VA_PMD_0)(%ebp), %edi | ||
42 | movl PTR(VA_CONTROL_PAGE)(%ebp), %eax | ||
43 | andl $0x3fe00000, %eax | ||
44 | shrl $18, %eax | ||
45 | addl %edi, %eax | ||
46 | |||
47 | movl PTR(PA_PTE_0)(%ebp), %edx | ||
48 | orl $PAGE_ATTR, %edx | ||
49 | movl %edx, (%eax) | ||
50 | |||
51 | movl PTR(VA_PTE_0)(%ebp), %edi | ||
52 | movl PTR(VA_CONTROL_PAGE)(%ebp), %eax | ||
53 | andl $0x001ff000, %eax | ||
54 | shrl $9, %eax | ||
55 | addl %edi, %eax | ||
56 | |||
57 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edx | ||
58 | orl $PAGE_ATTR, %edx | ||
59 | movl %edx, (%eax) | ||
60 | |||
61 | /* identity map the control page at its physical address */ | ||
62 | |||
63 | movl PTR(VA_PGD)(%ebp), %edi | ||
64 | movl PTR(PA_CONTROL_PAGE)(%ebp), %eax | ||
65 | andl $0xc0000000, %eax | ||
66 | shrl $27, %eax | ||
67 | addl %edi, %eax | ||
68 | |||
69 | movl PTR(PA_PMD_1)(%ebp), %edx | ||
70 | orl $PAE_PGD_ATTR, %edx | ||
71 | movl %edx, (%eax) | ||
72 | |||
73 | movl PTR(VA_PMD_1)(%ebp), %edi | ||
74 | movl PTR(PA_CONTROL_PAGE)(%ebp), %eax | ||
75 | andl $0x3fe00000, %eax | ||
76 | shrl $18, %eax | ||
77 | addl %edi, %eax | ||
78 | |||
79 | movl PTR(PA_PTE_1)(%ebp), %edx | ||
80 | orl $PAGE_ATTR, %edx | ||
81 | movl %edx, (%eax) | ||
82 | |||
83 | movl PTR(VA_PTE_1)(%ebp), %edi | ||
84 | movl PTR(PA_CONTROL_PAGE)(%ebp), %eax | ||
85 | andl $0x001ff000, %eax | ||
86 | shrl $9, %eax | ||
87 | addl %edi, %eax | ||
88 | |||
89 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edx | ||
90 | orl $PAGE_ATTR, %edx | ||
91 | movl %edx, (%eax) | ||
92 | #else | ||
93 | /* map the control page at its virtual address */ | ||
94 | |||
95 | movl PTR(VA_PGD)(%ebp), %edi | ||
96 | movl PTR(VA_CONTROL_PAGE)(%ebp), %eax | ||
97 | andl $0xffc00000, %eax | ||
98 | shrl $20, %eax | ||
99 | addl %edi, %eax | ||
100 | |||
101 | movl PTR(PA_PTE_0)(%ebp), %edx | ||
102 | orl $PAGE_ATTR, %edx | ||
103 | movl %edx, (%eax) | ||
104 | |||
105 | movl PTR(VA_PTE_0)(%ebp), %edi | ||
106 | movl PTR(VA_CONTROL_PAGE)(%ebp), %eax | ||
107 | andl $0x003ff000, %eax | ||
108 | shrl $10, %eax | ||
109 | addl %edi, %eax | ||
110 | |||
111 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edx | ||
112 | orl $PAGE_ATTR, %edx | ||
113 | movl %edx, (%eax) | ||
114 | |||
115 | /* identity map the control page at its physical address */ | ||
116 | |||
117 | movl PTR(VA_PGD)(%ebp), %edi | ||
118 | movl PTR(PA_CONTROL_PAGE)(%ebp), %eax | ||
119 | andl $0xffc00000, %eax | ||
120 | shrl $20, %eax | ||
121 | addl %edi, %eax | ||
122 | |||
123 | movl PTR(PA_PTE_1)(%ebp), %edx | ||
124 | orl $PAGE_ATTR, %edx | ||
125 | movl %edx, (%eax) | ||
126 | |||
127 | movl PTR(VA_PTE_1)(%ebp), %edi | ||
128 | movl PTR(PA_CONTROL_PAGE)(%ebp), %eax | ||
129 | andl $0x003ff000, %eax | ||
130 | shrl $10, %eax | ||
131 | addl %edi, %eax | ||
132 | |||
133 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edx | ||
134 | orl $PAGE_ATTR, %edx | ||
135 | movl %edx, (%eax) | ||
136 | #endif | ||
10 | 137 | ||
11 | /* | ||
12 | * Must be relocatable PIC code callable as a C function, that once | ||
13 | * it starts can not use the previous processes stack. | ||
14 | */ | ||
15 | .globl relocate_new_kernel | ||
16 | relocate_new_kernel: | 138 | relocate_new_kernel: |
17 | /* read the arguments and say goodbye to the stack */ | 139 | /* read the arguments and say goodbye to the stack */ |
18 | movl 4(%esp), %ebx /* page_list */ | 140 | movl 4(%esp), %ebx /* page_list */ |
19 | movl 8(%esp), %ebp /* reboot_code_buffer */ | 141 | movl 8(%esp), %ebp /* list of pages */ |
20 | movl 12(%esp), %edx /* start address */ | 142 | movl 12(%esp), %edx /* start address */ |
21 | movl 16(%esp), %ecx /* cpu_has_pae */ | 143 | movl 16(%esp), %ecx /* cpu_has_pae */ |
22 | 144 | ||
@@ -24,11 +146,26 @@ relocate_new_kernel: | |||
24 | pushl $0 | 146 | pushl $0 |
25 | popfl | 147 | popfl |
26 | 148 | ||
27 | /* set a new stack at the bottom of our page... */ | 149 | /* get physical address of control page now */ |
28 | lea 4096(%ebp), %esp | 150 | /* this is impossible after page table switch */ |
151 | movl PTR(PA_CONTROL_PAGE)(%ebp), %edi | ||
29 | 152 | ||
30 | /* store the parameters back on the stack */ | 153 | /* switch to new set of page tables */ |
31 | pushl %edx /* store the start address */ | 154 | movl PTR(PA_PGD)(%ebp), %eax |
155 | movl %eax, %cr3 | ||
156 | |||
157 | /* setup a new stack at the end of the physical control page */ | ||
158 | lea 4096(%edi), %esp | ||
159 | |||
160 | /* jump to identity mapped page */ | ||
161 | movl %edi, %eax | ||
162 | addl $(identity_mapped - relocate_kernel), %eax | ||
163 | pushl %eax | ||
164 | ret | ||
165 | |||
166 | identity_mapped: | ||
167 | /* store the start address on the stack */ | ||
168 | pushl %edx | ||
32 | 169 | ||
33 | /* Set cr0 to a known state: | 170 | /* Set cr0 to a known state: |
34 | * 31 0 == Paging disabled | 171 | * 31 0 == Paging disabled |
@@ -113,8 +250,3 @@ relocate_new_kernel: | |||
113 | xorl %edi, %edi | 250 | xorl %edi, %edi |
114 | xorl %ebp, %ebp | 251 | xorl %ebp, %ebp |
115 | ret | 252 | ret |
116 | relocate_new_kernel_end: | ||
117 | |||
118 | .globl relocate_new_kernel_size | ||
119 | relocate_new_kernel_size: | ||
120 | .long relocate_new_kernel_end - relocate_new_kernel | ||
diff --git a/arch/i386/kernel/semaphore.c b/arch/i386/kernel/semaphore.c deleted file mode 100644 index 98352c374c76..000000000000 --- a/arch/i386/kernel/semaphore.c +++ /dev/null | |||
@@ -1,134 +0,0 @@ | |||
1 | /* | ||
2 | * i386 semaphore implementation. | ||
3 | * | ||
4 | * (C) Copyright 1999 Linus Torvalds | ||
5 | * | ||
6 | * Portions Copyright 1999 Red Hat, Inc. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License | ||
10 | * as published by the Free Software Foundation; either version | ||
11 | * 2 of the License, or (at your option) any later version. | ||
12 | * | ||
13 | * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org> | ||
14 | */ | ||
15 | #include <asm/semaphore.h> | ||
16 | |||
17 | /* | ||
18 | * The semaphore operations have a special calling sequence that | ||
19 | * allow us to do a simpler in-line version of them. These routines | ||
20 | * need to convert that sequence back into the C sequence when | ||
21 | * there is contention on the semaphore. | ||
22 | * | ||
23 | * %eax contains the semaphore pointer on entry. Save the C-clobbered | ||
24 | * registers (%eax, %edx and %ecx) except %eax whish is either a return | ||
25 | * value or just clobbered.. | ||
26 | */ | ||
27 | asm( | ||
28 | ".section .sched.text\n" | ||
29 | ".align 4\n" | ||
30 | ".globl __down_failed\n" | ||
31 | "__down_failed:\n\t" | ||
32 | #if defined(CONFIG_FRAME_POINTER) | ||
33 | "pushl %ebp\n\t" | ||
34 | "movl %esp,%ebp\n\t" | ||
35 | #endif | ||
36 | "pushl %edx\n\t" | ||
37 | "pushl %ecx\n\t" | ||
38 | "call __down\n\t" | ||
39 | "popl %ecx\n\t" | ||
40 | "popl %edx\n\t" | ||
41 | #if defined(CONFIG_FRAME_POINTER) | ||
42 | "movl %ebp,%esp\n\t" | ||
43 | "popl %ebp\n\t" | ||
44 | #endif | ||
45 | "ret" | ||
46 | ); | ||
47 | |||
48 | asm( | ||
49 | ".section .sched.text\n" | ||
50 | ".align 4\n" | ||
51 | ".globl __down_failed_interruptible\n" | ||
52 | "__down_failed_interruptible:\n\t" | ||
53 | #if defined(CONFIG_FRAME_POINTER) | ||
54 | "pushl %ebp\n\t" | ||
55 | "movl %esp,%ebp\n\t" | ||
56 | #endif | ||
57 | "pushl %edx\n\t" | ||
58 | "pushl %ecx\n\t" | ||
59 | "call __down_interruptible\n\t" | ||
60 | "popl %ecx\n\t" | ||
61 | "popl %edx\n\t" | ||
62 | #if defined(CONFIG_FRAME_POINTER) | ||
63 | "movl %ebp,%esp\n\t" | ||
64 | "popl %ebp\n\t" | ||
65 | #endif | ||
66 | "ret" | ||
67 | ); | ||
68 | |||
69 | asm( | ||
70 | ".section .sched.text\n" | ||
71 | ".align 4\n" | ||
72 | ".globl __down_failed_trylock\n" | ||
73 | "__down_failed_trylock:\n\t" | ||
74 | #if defined(CONFIG_FRAME_POINTER) | ||
75 | "pushl %ebp\n\t" | ||
76 | "movl %esp,%ebp\n\t" | ||
77 | #endif | ||
78 | "pushl %edx\n\t" | ||
79 | "pushl %ecx\n\t" | ||
80 | "call __down_trylock\n\t" | ||
81 | "popl %ecx\n\t" | ||
82 | "popl %edx\n\t" | ||
83 | #if defined(CONFIG_FRAME_POINTER) | ||
84 | "movl %ebp,%esp\n\t" | ||
85 | "popl %ebp\n\t" | ||
86 | #endif | ||
87 | "ret" | ||
88 | ); | ||
89 | |||
90 | asm( | ||
91 | ".section .sched.text\n" | ||
92 | ".align 4\n" | ||
93 | ".globl __up_wakeup\n" | ||
94 | "__up_wakeup:\n\t" | ||
95 | "pushl %edx\n\t" | ||
96 | "pushl %ecx\n\t" | ||
97 | "call __up\n\t" | ||
98 | "popl %ecx\n\t" | ||
99 | "popl %edx\n\t" | ||
100 | "ret" | ||
101 | ); | ||
102 | |||
103 | /* | ||
104 | * rw spinlock fallbacks | ||
105 | */ | ||
106 | #if defined(CONFIG_SMP) | ||
107 | asm( | ||
108 | ".section .sched.text\n" | ||
109 | ".align 4\n" | ||
110 | ".globl __write_lock_failed\n" | ||
111 | "__write_lock_failed:\n\t" | ||
112 | LOCK_PREFIX "addl $" RW_LOCK_BIAS_STR ",(%eax)\n" | ||
113 | "1: rep; nop\n\t" | ||
114 | "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" | ||
115 | "jne 1b\n\t" | ||
116 | LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t" | ||
117 | "jnz __write_lock_failed\n\t" | ||
118 | "ret" | ||
119 | ); | ||
120 | |||
121 | asm( | ||
122 | ".section .sched.text\n" | ||
123 | ".align 4\n" | ||
124 | ".globl __read_lock_failed\n" | ||
125 | "__read_lock_failed:\n\t" | ||
126 | LOCK_PREFIX "incl (%eax)\n" | ||
127 | "1: rep; nop\n\t" | ||
128 | "cmpl $1,(%eax)\n\t" | ||
129 | "js 1b\n\t" | ||
130 | LOCK_PREFIX "decl (%eax)\n\t" | ||
131 | "js __read_lock_failed\n\t" | ||
132 | "ret" | ||
133 | ); | ||
134 | #endif | ||
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 16d99444cf66..76a524b4c90f 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c | |||
@@ -90,18 +90,6 @@ EXPORT_SYMBOL(boot_cpu_data); | |||
90 | 90 | ||
91 | unsigned long mmu_cr4_features; | 91 | unsigned long mmu_cr4_features; |
92 | 92 | ||
93 | #ifdef CONFIG_ACPI | ||
94 | int acpi_disabled = 0; | ||
95 | #else | ||
96 | int acpi_disabled = 1; | ||
97 | #endif | ||
98 | EXPORT_SYMBOL(acpi_disabled); | ||
99 | |||
100 | #ifdef CONFIG_ACPI | ||
101 | int __initdata acpi_force = 0; | ||
102 | extern acpi_interrupt_flags acpi_sci_flags; | ||
103 | #endif | ||
104 | |||
105 | /* for MCA, but anyone else can use it if they want */ | 93 | /* for MCA, but anyone else can use it if they want */ |
106 | unsigned int machine_id; | 94 | unsigned int machine_id; |
107 | #ifdef CONFIG_MCA | 95 | #ifdef CONFIG_MCA |
@@ -149,7 +137,6 @@ EXPORT_SYMBOL(ist_info); | |||
149 | struct e820map e820; | 137 | struct e820map e820; |
150 | 138 | ||
151 | extern void early_cpu_init(void); | 139 | extern void early_cpu_init(void); |
152 | extern void generic_apic_probe(char *); | ||
153 | extern int root_mountflags; | 140 | extern int root_mountflags; |
154 | 141 | ||
155 | unsigned long saved_videomode; | 142 | unsigned long saved_videomode; |
@@ -701,238 +688,132 @@ static inline void copy_edd(void) | |||
701 | } | 688 | } |
702 | #endif | 689 | #endif |
703 | 690 | ||
704 | static void __init parse_cmdline_early (char ** cmdline_p) | 691 | static int __initdata user_defined_memmap = 0; |
705 | { | ||
706 | char c = ' ', *to = command_line, *from = saved_command_line; | ||
707 | int len = 0; | ||
708 | int userdef = 0; | ||
709 | 692 | ||
710 | /* Save unparsed command line copy for /proc/cmdline */ | 693 | /* |
711 | saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; | 694 | * "mem=nopentium" disables the 4MB page tables. |
695 | * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM | ||
696 | * to <mem>, overriding the bios size. | ||
697 | * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from | ||
698 | * <start> to <start>+<mem>, overriding the bios size. | ||
699 | * | ||
700 | * HPA tells me bootloaders need to parse mem=, so no new | ||
701 | * option should be mem= [also see Documentation/i386/boot.txt] | ||
702 | */ | ||
703 | static int __init parse_mem(char *arg) | ||
704 | { | ||
705 | if (!arg) | ||
706 | return -EINVAL; | ||
712 | 707 | ||
713 | for (;;) { | 708 | if (strcmp(arg, "nopentium") == 0) { |
714 | if (c != ' ') | 709 | clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); |
715 | goto next_char; | 710 | disable_pse = 1; |
716 | /* | 711 | } else { |
717 | * "mem=nopentium" disables the 4MB page tables. | 712 | /* If the user specifies memory size, we |
718 | * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM | 713 | * limit the BIOS-provided memory map to |
719 | * to <mem>, overriding the bios size. | 714 | * that size. exactmap can be used to specify |
720 | * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from | 715 | * the exact map. mem=number can be used to |
721 | * <start> to <start>+<mem>, overriding the bios size. | 716 | * trim the existing memory map. |
722 | * | ||
723 | * HPA tells me bootloaders need to parse mem=, so no new | ||
724 | * option should be mem= [also see Documentation/i386/boot.txt] | ||
725 | */ | 717 | */ |
726 | if (!memcmp(from, "mem=", 4)) { | 718 | unsigned long long mem_size; |
727 | if (to != command_line) | ||
728 | to--; | ||
729 | if (!memcmp(from+4, "nopentium", 9)) { | ||
730 | from += 9+4; | ||
731 | clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability); | ||
732 | disable_pse = 1; | ||
733 | } else { | ||
734 | /* If the user specifies memory size, we | ||
735 | * limit the BIOS-provided memory map to | ||
736 | * that size. exactmap can be used to specify | ||
737 | * the exact map. mem=number can be used to | ||
738 | * trim the existing memory map. | ||
739 | */ | ||
740 | unsigned long long mem_size; | ||
741 | 719 | ||
742 | mem_size = memparse(from+4, &from); | 720 | mem_size = memparse(arg, &arg); |
743 | limit_regions(mem_size); | 721 | limit_regions(mem_size); |
744 | userdef=1; | 722 | user_defined_memmap = 1; |
745 | } | 723 | } |
746 | } | 724 | return 0; |
747 | 725 | } | |
748 | else if (!memcmp(from, "memmap=", 7)) { | 726 | early_param("mem", parse_mem); |
749 | if (to != command_line) | ||
750 | to--; | ||
751 | if (!memcmp(from+7, "exactmap", 8)) { | ||
752 | #ifdef CONFIG_CRASH_DUMP | ||
753 | /* If we are doing a crash dump, we | ||
754 | * still need to know the real mem | ||
755 | * size before original memory map is | ||
756 | * reset. | ||
757 | */ | ||
758 | find_max_pfn(); | ||
759 | saved_max_pfn = max_pfn; | ||
760 | #endif | ||
761 | from += 8+7; | ||
762 | e820.nr_map = 0; | ||
763 | userdef = 1; | ||
764 | } else { | ||
765 | /* If the user specifies memory size, we | ||
766 | * limit the BIOS-provided memory map to | ||
767 | * that size. exactmap can be used to specify | ||
768 | * the exact map. mem=number can be used to | ||
769 | * trim the existing memory map. | ||
770 | */ | ||
771 | unsigned long long start_at, mem_size; | ||
772 | |||
773 | mem_size = memparse(from+7, &from); | ||
774 | if (*from == '@') { | ||
775 | start_at = memparse(from+1, &from); | ||
776 | add_memory_region(start_at, mem_size, E820_RAM); | ||
777 | } else if (*from == '#') { | ||
778 | start_at = memparse(from+1, &from); | ||
779 | add_memory_region(start_at, mem_size, E820_ACPI); | ||
780 | } else if (*from == '$') { | ||
781 | start_at = memparse(from+1, &from); | ||
782 | add_memory_region(start_at, mem_size, E820_RESERVED); | ||
783 | } else { | ||
784 | limit_regions(mem_size); | ||
785 | userdef=1; | ||
786 | } | ||
787 | } | ||
788 | } | ||
789 | |||
790 | else if (!memcmp(from, "noexec=", 7)) | ||
791 | noexec_setup(from + 7); | ||
792 | 727 | ||
728 | static int __init parse_memmap(char *arg) | ||
729 | { | ||
730 | if (!arg) | ||
731 | return -EINVAL; | ||
793 | 732 | ||
794 | #ifdef CONFIG_X86_SMP | 733 | if (strcmp(arg, "exactmap") == 0) { |
795 | /* | 734 | #ifdef CONFIG_CRASH_DUMP |
796 | * If the BIOS enumerates physical processors before logical, | 735 | /* If we are doing a crash dump, we |
797 | * maxcpus=N at enumeration-time can be used to disable HT. | 736 | * still need to know the real mem |
737 | * size before original memory map is | ||
738 | * reset. | ||
798 | */ | 739 | */ |
799 | else if (!memcmp(from, "maxcpus=", 8)) { | 740 | find_max_pfn(); |
800 | extern unsigned int maxcpus; | 741 | saved_max_pfn = max_pfn; |
801 | |||
802 | maxcpus = simple_strtoul(from + 8, NULL, 0); | ||
803 | } | ||
804 | #endif | 742 | #endif |
805 | 743 | e820.nr_map = 0; | |
806 | #ifdef CONFIG_ACPI | 744 | user_defined_memmap = 1; |
807 | /* "acpi=off" disables both ACPI table parsing and interpreter */ | 745 | } else { |
808 | else if (!memcmp(from, "acpi=off", 8)) { | 746 | /* If the user specifies memory size, we |
809 | disable_acpi(); | 747 | * limit the BIOS-provided memory map to |
810 | } | 748 | * that size. exactmap can be used to specify |
811 | 749 | * the exact map. mem=number can be used to | |
812 | /* acpi=force to over-ride black-list */ | 750 | * trim the existing memory map. |
813 | else if (!memcmp(from, "acpi=force", 10)) { | 751 | */ |
814 | acpi_force = 1; | 752 | unsigned long long start_at, mem_size; |
815 | acpi_ht = 1; | 753 | |
816 | acpi_disabled = 0; | 754 | mem_size = memparse(arg, &arg); |
817 | } | 755 | if (*arg == '@') { |
818 | 756 | start_at = memparse(arg+1, &arg); | |
819 | /* acpi=strict disables out-of-spec workarounds */ | 757 | add_memory_region(start_at, mem_size, E820_RAM); |
820 | else if (!memcmp(from, "acpi=strict", 11)) { | 758 | } else if (*arg == '#') { |
821 | acpi_strict = 1; | 759 | start_at = memparse(arg+1, &arg); |
822 | } | 760 | add_memory_region(start_at, mem_size, E820_ACPI); |
823 | 761 | } else if (*arg == '$') { | |
824 | /* Limit ACPI just to boot-time to enable HT */ | 762 | start_at = memparse(arg+1, &arg); |
825 | else if (!memcmp(from, "acpi=ht", 7)) { | 763 | add_memory_region(start_at, mem_size, E820_RESERVED); |
826 | if (!acpi_force) | 764 | } else { |
827 | disable_acpi(); | 765 | limit_regions(mem_size); |
828 | acpi_ht = 1; | 766 | user_defined_memmap = 1; |
829 | } | ||
830 | |||
831 | /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */ | ||
832 | else if (!memcmp(from, "pci=noacpi", 10)) { | ||
833 | acpi_disable_pci(); | ||
834 | } | ||
835 | /* "acpi=noirq" disables ACPI interrupt routing */ | ||
836 | else if (!memcmp(from, "acpi=noirq", 10)) { | ||
837 | acpi_noirq_set(); | ||
838 | } | 767 | } |
768 | } | ||
769 | return 0; | ||
770 | } | ||
771 | early_param("memmap", parse_memmap); | ||
839 | 772 | ||
840 | else if (!memcmp(from, "acpi_sci=edge", 13)) | 773 | #ifdef CONFIG_PROC_VMCORE |
841 | acpi_sci_flags.trigger = 1; | 774 | /* elfcorehdr= specifies the location of elf core header |
842 | 775 | * stored by the crashed kernel. | |
843 | else if (!memcmp(from, "acpi_sci=level", 14)) | 776 | */ |
844 | acpi_sci_flags.trigger = 3; | 777 | static int __init parse_elfcorehdr(char *arg) |
845 | 778 | { | |
846 | else if (!memcmp(from, "acpi_sci=high", 13)) | 779 | if (!arg) |
847 | acpi_sci_flags.polarity = 1; | 780 | return -EINVAL; |
848 | |||
849 | else if (!memcmp(from, "acpi_sci=low", 12)) | ||
850 | acpi_sci_flags.polarity = 3; | ||
851 | |||
852 | #ifdef CONFIG_X86_IO_APIC | ||
853 | else if (!memcmp(from, "acpi_skip_timer_override", 24)) | ||
854 | acpi_skip_timer_override = 1; | ||
855 | |||
856 | if (!memcmp(from, "disable_timer_pin_1", 19)) | ||
857 | disable_timer_pin_1 = 1; | ||
858 | if (!memcmp(from, "enable_timer_pin_1", 18)) | ||
859 | disable_timer_pin_1 = -1; | ||
860 | 781 | ||
861 | /* disable IO-APIC */ | 782 | elfcorehdr_addr = memparse(arg, &arg); |
862 | else if (!memcmp(from, "noapic", 6)) | 783 | return 0; |
863 | disable_ioapic_setup(); | 784 | } |
864 | #endif /* CONFIG_X86_IO_APIC */ | 785 | early_param("elfcorehdr", parse_elfcorehdr); |
865 | #endif /* CONFIG_ACPI */ | 786 | #endif /* CONFIG_PROC_VMCORE */ |
866 | 787 | ||
867 | #ifdef CONFIG_X86_LOCAL_APIC | 788 | /* |
868 | /* enable local APIC */ | 789 | * highmem=size forces highmem to be exactly 'size' bytes. |
869 | else if (!memcmp(from, "lapic", 5)) | 790 | * This works even on boxes that have no highmem otherwise. |
870 | lapic_enable(); | 791 | * This also works to reduce highmem size on bigger boxes. |
792 | */ | ||
793 | static int __init parse_highmem(char *arg) | ||
794 | { | ||
795 | if (!arg) | ||
796 | return -EINVAL; | ||
871 | 797 | ||
872 | /* disable local APIC */ | 798 | highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT; |
873 | else if (!memcmp(from, "nolapic", 6)) | 799 | return 0; |
874 | lapic_disable(); | 800 | } |
875 | #endif /* CONFIG_X86_LOCAL_APIC */ | 801 | early_param("highmem", parse_highmem); |
876 | 802 | ||
877 | #ifdef CONFIG_KEXEC | 803 | /* |
878 | /* crashkernel=size@addr specifies the location to reserve for | 804 | * vmalloc=size forces the vmalloc area to be exactly 'size' |
879 | * a crash kernel. By reserving this memory we guarantee | 805 | * bytes. This can be used to increase (or decrease) the |
880 | * that linux never set's it up as a DMA target. | 806 | * vmalloc area - the default is 128m. |
881 | * Useful for holding code to do something appropriate | 807 | */ |
882 | * after a kernel panic. | 808 | static int __init parse_vmalloc(char *arg) |
883 | */ | 809 | { |
884 | else if (!memcmp(from, "crashkernel=", 12)) { | 810 | if (!arg) |
885 | unsigned long size, base; | 811 | return -EINVAL; |
886 | size = memparse(from+12, &from); | ||
887 | if (*from == '@') { | ||
888 | base = memparse(from+1, &from); | ||
889 | /* FIXME: Do I want a sanity check | ||
890 | * to validate the memory range? | ||
891 | */ | ||
892 | crashk_res.start = base; | ||
893 | crashk_res.end = base + size - 1; | ||
894 | } | ||
895 | } | ||
896 | #endif | ||
897 | #ifdef CONFIG_PROC_VMCORE | ||
898 | /* elfcorehdr= specifies the location of elf core header | ||
899 | * stored by the crashed kernel. | ||
900 | */ | ||
901 | else if (!memcmp(from, "elfcorehdr=", 11)) | ||
902 | elfcorehdr_addr = memparse(from+11, &from); | ||
903 | #endif | ||
904 | 812 | ||
905 | /* | 813 | __VMALLOC_RESERVE = memparse(arg, &arg); |
906 | * highmem=size forces highmem to be exactly 'size' bytes. | 814 | return 0; |
907 | * This works even on boxes that have no highmem otherwise. | ||
908 | * This also works to reduce highmem size on bigger boxes. | ||
909 | */ | ||
910 | else if (!memcmp(from, "highmem=", 8)) | ||
911 | highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT; | ||
912 | |||
913 | /* | ||
914 | * vmalloc=size forces the vmalloc area to be exactly 'size' | ||
915 | * bytes. This can be used to increase (or decrease) the | ||
916 | * vmalloc area - the default is 128m. | ||
917 | */ | ||
918 | else if (!memcmp(from, "vmalloc=", 8)) | ||
919 | __VMALLOC_RESERVE = memparse(from+8, &from); | ||
920 | |||
921 | next_char: | ||
922 | c = *(from++); | ||
923 | if (!c) | ||
924 | break; | ||
925 | if (COMMAND_LINE_SIZE <= ++len) | ||
926 | break; | ||
927 | *(to++) = c; | ||
928 | } | ||
929 | *to = '\0'; | ||
930 | *cmdline_p = command_line; | ||
931 | if (userdef) { | ||
932 | printk(KERN_INFO "user-defined physical RAM map:\n"); | ||
933 | print_memory_map("user"); | ||
934 | } | ||
935 | } | 815 | } |
816 | early_param("vmalloc", parse_vmalloc); | ||
936 | 817 | ||
937 | /* | 818 | /* |
938 | * reservetop=size reserves a hole at the top of the kernel address space which | 819 | * reservetop=size reserves a hole at the top of the kernel address space which |
@@ -1189,6 +1070,14 @@ static unsigned long __init setup_memory(void) | |||
1189 | } | 1070 | } |
1190 | printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", | 1071 | printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", |
1191 | pages_to_mb(highend_pfn - highstart_pfn)); | 1072 | pages_to_mb(highend_pfn - highstart_pfn)); |
1073 | num_physpages = highend_pfn; | ||
1074 | high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1; | ||
1075 | #else | ||
1076 | num_physpages = max_low_pfn; | ||
1077 | high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1; | ||
1078 | #endif | ||
1079 | #ifdef CONFIG_FLATMEM | ||
1080 | max_mapnr = num_physpages; | ||
1192 | #endif | 1081 | #endif |
1193 | printk(KERN_NOTICE "%ldMB LOWMEM available.\n", | 1082 | printk(KERN_NOTICE "%ldMB LOWMEM available.\n", |
1194 | pages_to_mb(max_low_pfn)); | 1083 | pages_to_mb(max_low_pfn)); |
@@ -1518,17 +1407,15 @@ void __init setup_arch(char **cmdline_p) | |||
1518 | data_resource.start = virt_to_phys(_etext); | 1407 | data_resource.start = virt_to_phys(_etext); |
1519 | data_resource.end = virt_to_phys(_edata)-1; | 1408 | data_resource.end = virt_to_phys(_edata)-1; |
1520 | 1409 | ||
1521 | parse_cmdline_early(cmdline_p); | 1410 | parse_early_param(); |
1522 | 1411 | ||
1523 | #ifdef CONFIG_EARLY_PRINTK | 1412 | if (user_defined_memmap) { |
1524 | { | 1413 | printk(KERN_INFO "user-defined physical RAM map:\n"); |
1525 | char *s = strstr(*cmdline_p, "earlyprintk="); | 1414 | print_memory_map("user"); |
1526 | if (s) { | ||
1527 | setup_early_printk(strchr(s, '=') + 1); | ||
1528 | printk("early console enabled\n"); | ||
1529 | } | ||
1530 | } | 1415 | } |
1531 | #endif | 1416 | |
1417 | strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE); | ||
1418 | *cmdline_p = command_line; | ||
1532 | 1419 | ||
1533 | max_low_pfn = setup_memory(); | 1420 | max_low_pfn = setup_memory(); |
1534 | 1421 | ||
@@ -1557,7 +1444,7 @@ void __init setup_arch(char **cmdline_p) | |||
1557 | dmi_scan_machine(); | 1444 | dmi_scan_machine(); |
1558 | 1445 | ||
1559 | #ifdef CONFIG_X86_GENERICARCH | 1446 | #ifdef CONFIG_X86_GENERICARCH |
1560 | generic_apic_probe(*cmdline_p); | 1447 | generic_apic_probe(); |
1561 | #endif | 1448 | #endif |
1562 | if (efi_enabled) | 1449 | if (efi_enabled) |
1563 | efi_map_memmap(); | 1450 | efi_map_memmap(); |
@@ -1569,9 +1456,11 @@ void __init setup_arch(char **cmdline_p) | |||
1569 | acpi_boot_table_init(); | 1456 | acpi_boot_table_init(); |
1570 | #endif | 1457 | #endif |
1571 | 1458 | ||
1459 | #ifdef CONFIG_PCI | ||
1572 | #ifdef CONFIG_X86_IO_APIC | 1460 | #ifdef CONFIG_X86_IO_APIC |
1573 | check_acpi_pci(); /* Checks more than just ACPI actually */ | 1461 | check_acpi_pci(); /* Checks more than just ACPI actually */ |
1574 | #endif | 1462 | #endif |
1463 | #endif | ||
1575 | 1464 | ||
1576 | #ifdef CONFIG_ACPI | 1465 | #ifdef CONFIG_ACPI |
1577 | acpi_boot_init(); | 1466 | acpi_boot_init(); |
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index efe07990e7fc..020d873b7d21 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c | |||
@@ -177,6 +177,9 @@ static void __devinit smp_store_cpu_info(int id) | |||
177 | */ | 177 | */ |
178 | if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { | 178 | if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { |
179 | 179 | ||
180 | if (num_possible_cpus() == 1) | ||
181 | goto valid_k7; | ||
182 | |||
180 | /* Athlon 660/661 is valid. */ | 183 | /* Athlon 660/661 is valid. */ |
181 | if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1))) | 184 | if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1))) |
182 | goto valid_k7; | 185 | goto valid_k7; |
@@ -1376,7 +1379,8 @@ int __cpu_disable(void) | |||
1376 | */ | 1379 | */ |
1377 | if (cpu == 0) | 1380 | if (cpu == 0) |
1378 | return -EBUSY; | 1381 | return -EBUSY; |
1379 | 1382 | if (nmi_watchdog == NMI_LOCAL_APIC) | |
1383 | stop_apic_nmi_watchdog(NULL); | ||
1380 | clear_local_APIC(); | 1384 | clear_local_APIC(); |
1381 | /* Allow any queued timer interrupts to get serviced */ | 1385 | /* Allow any queued timer interrupts to get serviced */ |
1382 | local_irq_enable(); | 1386 | local_irq_enable(); |
@@ -1490,3 +1494,16 @@ void __init smp_intr_init(void) | |||
1490 | /* IPI for generic function call */ | 1494 | /* IPI for generic function call */ |
1491 | set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); | 1495 | set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); |
1492 | } | 1496 | } |
1497 | |||
1498 | /* | ||
1499 | * If the BIOS enumerates physical processors before logical, | ||
1500 | * maxcpus=N at enumeration-time can be used to disable HT. | ||
1501 | */ | ||
1502 | static int __init parse_maxcpus(char *arg) | ||
1503 | { | ||
1504 | extern unsigned int maxcpus; | ||
1505 | |||
1506 | maxcpus = simple_strtoul(arg, NULL, 0); | ||
1507 | return 0; | ||
1508 | } | ||
1509 | early_param("maxcpus", parse_maxcpus); | ||
diff --git a/arch/i386/kernel/stacktrace.c b/arch/i386/kernel/stacktrace.c deleted file mode 100644 index e62a037ab399..000000000000 --- a/arch/i386/kernel/stacktrace.c +++ /dev/null | |||
@@ -1,98 +0,0 @@ | |||
1 | /* | ||
2 | * arch/i386/kernel/stacktrace.c | ||
3 | * | ||
4 | * Stack trace management functions | ||
5 | * | ||
6 | * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> | ||
7 | */ | ||
8 | #include <linux/sched.h> | ||
9 | #include <linux/stacktrace.h> | ||
10 | |||
11 | static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) | ||
12 | { | ||
13 | return p > (void *)tinfo && | ||
14 | p < (void *)tinfo + THREAD_SIZE - 3; | ||
15 | } | ||
16 | |||
17 | /* | ||
18 | * Save stack-backtrace addresses into a stack_trace buffer: | ||
19 | */ | ||
20 | static inline unsigned long | ||
21 | save_context_stack(struct stack_trace *trace, unsigned int skip, | ||
22 | struct thread_info *tinfo, unsigned long *stack, | ||
23 | unsigned long ebp) | ||
24 | { | ||
25 | unsigned long addr; | ||
26 | |||
27 | #ifdef CONFIG_FRAME_POINTER | ||
28 | while (valid_stack_ptr(tinfo, (void *)ebp)) { | ||
29 | addr = *(unsigned long *)(ebp + 4); | ||
30 | if (!skip) | ||
31 | trace->entries[trace->nr_entries++] = addr; | ||
32 | else | ||
33 | skip--; | ||
34 | if (trace->nr_entries >= trace->max_entries) | ||
35 | break; | ||
36 | /* | ||
37 | * break out of recursive entries (such as | ||
38 | * end_of_stack_stop_unwind_function): | ||
39 | */ | ||
40 | if (ebp == *(unsigned long *)ebp) | ||
41 | break; | ||
42 | |||
43 | ebp = *(unsigned long *)ebp; | ||
44 | } | ||
45 | #else | ||
46 | while (valid_stack_ptr(tinfo, stack)) { | ||
47 | addr = *stack++; | ||
48 | if (__kernel_text_address(addr)) { | ||
49 | if (!skip) | ||
50 | trace->entries[trace->nr_entries++] = addr; | ||
51 | else | ||
52 | skip--; | ||
53 | if (trace->nr_entries >= trace->max_entries) | ||
54 | break; | ||
55 | } | ||
56 | } | ||
57 | #endif | ||
58 | |||
59 | return ebp; | ||
60 | } | ||
61 | |||
62 | /* | ||
63 | * Save stack-backtrace addresses into a stack_trace buffer. | ||
64 | * If all_contexts is set, all contexts (hardirq, softirq and process) | ||
65 | * are saved. If not set then only the current context is saved. | ||
66 | */ | ||
67 | void save_stack_trace(struct stack_trace *trace, | ||
68 | struct task_struct *task, int all_contexts, | ||
69 | unsigned int skip) | ||
70 | { | ||
71 | unsigned long ebp; | ||
72 | unsigned long *stack = &ebp; | ||
73 | |||
74 | WARN_ON(trace->nr_entries || !trace->max_entries); | ||
75 | |||
76 | if (!task || task == current) { | ||
77 | /* Grab ebp right from our regs: */ | ||
78 | asm ("movl %%ebp, %0" : "=r" (ebp)); | ||
79 | } else { | ||
80 | /* ebp is the last reg pushed by switch_to(): */ | ||
81 | ebp = *(unsigned long *) task->thread.esp; | ||
82 | } | ||
83 | |||
84 | while (1) { | ||
85 | struct thread_info *context = (struct thread_info *) | ||
86 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); | ||
87 | |||
88 | ebp = save_context_stack(trace, skip, context, stack, ebp); | ||
89 | stack = (unsigned long *)context->previous_esp; | ||
90 | if (!all_contexts || !stack || | ||
91 | trace->nr_entries >= trace->max_entries) | ||
92 | break; | ||
93 | trace->entries[trace->nr_entries++] = ULONG_MAX; | ||
94 | if (trace->nr_entries >= trace->max_entries) | ||
95 | break; | ||
96 | } | ||
97 | } | ||
98 | |||
diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S index dd63d4775398..7e639f78b0b9 100644 --- a/arch/i386/kernel/syscall_table.S +++ b/arch/i386/kernel/syscall_table.S | |||
@@ -317,3 +317,4 @@ ENTRY(sys_call_table) | |||
317 | .long sys_tee /* 315 */ | 317 | .long sys_tee /* 315 */ |
318 | .long sys_vmsplice | 318 | .long sys_vmsplice |
319 | .long sys_move_pages | 319 | .long sys_move_pages |
320 | .long sys_getcpu | ||
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c index 1302e4ab3c4f..86944acfb647 100644 --- a/arch/i386/kernel/time.c +++ b/arch/i386/kernel/time.c | |||
@@ -130,18 +130,33 @@ static int set_rtc_mmss(unsigned long nowtime) | |||
130 | 130 | ||
131 | int timer_ack; | 131 | int timer_ack; |
132 | 132 | ||
133 | #if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) | ||
134 | unsigned long profile_pc(struct pt_regs *regs) | 133 | unsigned long profile_pc(struct pt_regs *regs) |
135 | { | 134 | { |
136 | unsigned long pc = instruction_pointer(regs); | 135 | unsigned long pc = instruction_pointer(regs); |
137 | 136 | ||
138 | if (!user_mode_vm(regs) && in_lock_functions(pc)) | 137 | #ifdef CONFIG_SMP |
138 | if (!user_mode_vm(regs) && in_lock_functions(pc)) { | ||
139 | #ifdef CONFIG_FRAME_POINTER | ||
139 | return *(unsigned long *)(regs->ebp + 4); | 140 | return *(unsigned long *)(regs->ebp + 4); |
140 | 141 | #else | |
142 | unsigned long *sp; | ||
143 | if ((regs->xcs & 3) == 0) | ||
144 | sp = (unsigned long *)®s->esp; | ||
145 | else | ||
146 | sp = (unsigned long *)regs->esp; | ||
147 | /* Return address is either directly at stack pointer | ||
148 | or above a saved eflags. Eflags has bits 22-31 zero, | ||
149 | kernel addresses don't. */ | ||
150 | if (sp[0] >> 22) | ||
151 | return sp[0]; | ||
152 | if (sp[1] >> 22) | ||
153 | return sp[1]; | ||
154 | #endif | ||
155 | } | ||
156 | #endif | ||
141 | return pc; | 157 | return pc; |
142 | } | 158 | } |
143 | EXPORT_SYMBOL(profile_pc); | 159 | EXPORT_SYMBOL(profile_pc); |
144 | #endif | ||
145 | 160 | ||
146 | /* | 161 | /* |
147 | * This is the same as the above, except we _also_ save the current | 162 | * This is the same as the above, except we _also_ save the current |
diff --git a/arch/i386/kernel/topology.c b/arch/i386/kernel/topology.c index e2e281d4bcc8..07d6da36a825 100644 --- a/arch/i386/kernel/topology.c +++ b/arch/i386/kernel/topology.c | |||
@@ -28,6 +28,7 @@ | |||
28 | #include <linux/init.h> | 28 | #include <linux/init.h> |
29 | #include <linux/smp.h> | 29 | #include <linux/smp.h> |
30 | #include <linux/nodemask.h> | 30 | #include <linux/nodemask.h> |
31 | #include <linux/mmzone.h> | ||
31 | #include <asm/cpu.h> | 32 | #include <asm/cpu.h> |
32 | 33 | ||
33 | static struct i386_cpu cpu_devices[NR_CPUS]; | 34 | static struct i386_cpu cpu_devices[NR_CPUS]; |
@@ -55,34 +56,18 @@ EXPORT_SYMBOL(arch_register_cpu); | |||
55 | EXPORT_SYMBOL(arch_unregister_cpu); | 56 | EXPORT_SYMBOL(arch_unregister_cpu); |
56 | #endif /*CONFIG_HOTPLUG_CPU*/ | 57 | #endif /*CONFIG_HOTPLUG_CPU*/ |
57 | 58 | ||
58 | |||
59 | |||
60 | #ifdef CONFIG_NUMA | ||
61 | #include <linux/mmzone.h> | ||
62 | |||
63 | static int __init topology_init(void) | 59 | static int __init topology_init(void) |
64 | { | 60 | { |
65 | int i; | 61 | int i; |
66 | 62 | ||
63 | #ifdef CONFIG_NUMA | ||
67 | for_each_online_node(i) | 64 | for_each_online_node(i) |
68 | register_one_node(i); | 65 | register_one_node(i); |
66 | #endif /* CONFIG_NUMA */ | ||
69 | 67 | ||
70 | for_each_present_cpu(i) | 68 | for_each_present_cpu(i) |
71 | arch_register_cpu(i); | 69 | arch_register_cpu(i); |
72 | return 0; | 70 | return 0; |
73 | } | 71 | } |
74 | 72 | ||
75 | #else /* !CONFIG_NUMA */ | ||
76 | |||
77 | static int __init topology_init(void) | ||
78 | { | ||
79 | int i; | ||
80 | |||
81 | for_each_present_cpu(i) | ||
82 | arch_register_cpu(i); | ||
83 | return 0; | ||
84 | } | ||
85 | |||
86 | #endif /* CONFIG_NUMA */ | ||
87 | |||
88 | subsys_initcall(topology_init); | 73 | subsys_initcall(topology_init); |
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index 4fcc6690be99..21aa1cd57773 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c | |||
@@ -51,6 +51,7 @@ | |||
51 | #include <asm/smp.h> | 51 | #include <asm/smp.h> |
52 | #include <asm/arch_hooks.h> | 52 | #include <asm/arch_hooks.h> |
53 | #include <asm/kdebug.h> | 53 | #include <asm/kdebug.h> |
54 | #include <asm/stacktrace.h> | ||
54 | 55 | ||
55 | #include <linux/module.h> | 56 | #include <linux/module.h> |
56 | 57 | ||
@@ -118,26 +119,16 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p) | |||
118 | p < (void *)tinfo + THREAD_SIZE - 3; | 119 | p < (void *)tinfo + THREAD_SIZE - 3; |
119 | } | 120 | } |
120 | 121 | ||
121 | /* | ||
122 | * Print one address/symbol entries per line. | ||
123 | */ | ||
124 | static inline void print_addr_and_symbol(unsigned long addr, char *log_lvl) | ||
125 | { | ||
126 | printk(" [<%08lx>] ", addr); | ||
127 | |||
128 | print_symbol("%s\n", addr); | ||
129 | } | ||
130 | |||
131 | static inline unsigned long print_context_stack(struct thread_info *tinfo, | 122 | static inline unsigned long print_context_stack(struct thread_info *tinfo, |
132 | unsigned long *stack, unsigned long ebp, | 123 | unsigned long *stack, unsigned long ebp, |
133 | char *log_lvl) | 124 | struct stacktrace_ops *ops, void *data) |
134 | { | 125 | { |
135 | unsigned long addr; | 126 | unsigned long addr; |
136 | 127 | ||
137 | #ifdef CONFIG_FRAME_POINTER | 128 | #ifdef CONFIG_FRAME_POINTER |
138 | while (valid_stack_ptr(tinfo, (void *)ebp)) { | 129 | while (valid_stack_ptr(tinfo, (void *)ebp)) { |
139 | addr = *(unsigned long *)(ebp + 4); | 130 | addr = *(unsigned long *)(ebp + 4); |
140 | print_addr_and_symbol(addr, log_lvl); | 131 | ops->address(data, addr); |
141 | /* | 132 | /* |
142 | * break out of recursive entries (such as | 133 | * break out of recursive entries (such as |
143 | * end_of_stack_stop_unwind_function): | 134 | * end_of_stack_stop_unwind_function): |
@@ -150,30 +141,37 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo, | |||
150 | while (valid_stack_ptr(tinfo, stack)) { | 141 | while (valid_stack_ptr(tinfo, stack)) { |
151 | addr = *stack++; | 142 | addr = *stack++; |
152 | if (__kernel_text_address(addr)) | 143 | if (__kernel_text_address(addr)) |
153 | print_addr_and_symbol(addr, log_lvl); | 144 | ops->address(data, addr); |
154 | } | 145 | } |
155 | #endif | 146 | #endif |
156 | return ebp; | 147 | return ebp; |
157 | } | 148 | } |
158 | 149 | ||
150 | struct ops_and_data { | ||
151 | struct stacktrace_ops *ops; | ||
152 | void *data; | ||
153 | }; | ||
154 | |||
159 | static asmlinkage int | 155 | static asmlinkage int |
160 | show_trace_unwind(struct unwind_frame_info *info, void *log_lvl) | 156 | dump_trace_unwind(struct unwind_frame_info *info, void *data) |
161 | { | 157 | { |
158 | struct ops_and_data *oad = (struct ops_and_data *)data; | ||
162 | int n = 0; | 159 | int n = 0; |
163 | 160 | ||
164 | while (unwind(info) == 0 && UNW_PC(info)) { | 161 | while (unwind(info) == 0 && UNW_PC(info)) { |
165 | n++; | 162 | n++; |
166 | print_addr_and_symbol(UNW_PC(info), log_lvl); | 163 | oad->ops->address(oad->data, UNW_PC(info)); |
167 | if (arch_unw_user_mode(info)) | 164 | if (arch_unw_user_mode(info)) |
168 | break; | 165 | break; |
169 | } | 166 | } |
170 | return n; | 167 | return n; |
171 | } | 168 | } |
172 | 169 | ||
173 | static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | 170 | void dump_trace(struct task_struct *task, struct pt_regs *regs, |
174 | unsigned long *stack, char *log_lvl) | 171 | unsigned long *stack, |
172 | struct stacktrace_ops *ops, void *data) | ||
175 | { | 173 | { |
176 | unsigned long ebp; | 174 | unsigned long ebp = 0; |
177 | 175 | ||
178 | if (!task) | 176 | if (!task) |
179 | task = current; | 177 | task = current; |
@@ -181,54 +179,116 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | |||
181 | if (call_trace >= 0) { | 179 | if (call_trace >= 0) { |
182 | int unw_ret = 0; | 180 | int unw_ret = 0; |
183 | struct unwind_frame_info info; | 181 | struct unwind_frame_info info; |
182 | struct ops_and_data oad = { .ops = ops, .data = data }; | ||
184 | 183 | ||
185 | if (regs) { | 184 | if (regs) { |
186 | if (unwind_init_frame_info(&info, task, regs) == 0) | 185 | if (unwind_init_frame_info(&info, task, regs) == 0) |
187 | unw_ret = show_trace_unwind(&info, log_lvl); | 186 | unw_ret = dump_trace_unwind(&info, &oad); |
188 | } else if (task == current) | 187 | } else if (task == current) |
189 | unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl); | 188 | unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad); |
190 | else { | 189 | else { |
191 | if (unwind_init_blocked(&info, task) == 0) | 190 | if (unwind_init_blocked(&info, task) == 0) |
192 | unw_ret = show_trace_unwind(&info, log_lvl); | 191 | unw_ret = dump_trace_unwind(&info, &oad); |
193 | } | 192 | } |
194 | if (unw_ret > 0) { | 193 | if (unw_ret > 0) { |
195 | if (call_trace == 1 && !arch_unw_user_mode(&info)) { | 194 | if (call_trace == 1 && !arch_unw_user_mode(&info)) { |
196 | print_symbol("DWARF2 unwinder stuck at %s\n", | 195 | ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n", |
197 | UNW_PC(&info)); | 196 | UNW_PC(&info)); |
198 | if (UNW_SP(&info) >= PAGE_OFFSET) { | 197 | if (UNW_SP(&info) >= PAGE_OFFSET) { |
199 | printk("Leftover inexact backtrace:\n"); | 198 | ops->warning(data, "Leftover inexact backtrace:\n"); |
200 | stack = (void *)UNW_SP(&info); | 199 | stack = (void *)UNW_SP(&info); |
200 | if (!stack) | ||
201 | return; | ||
202 | ebp = UNW_FP(&info); | ||
201 | } else | 203 | } else |
202 | printk("Full inexact backtrace again:\n"); | 204 | ops->warning(data, "Full inexact backtrace again:\n"); |
203 | } else if (call_trace >= 1) | 205 | } else if (call_trace >= 1) |
204 | return; | 206 | return; |
205 | else | 207 | else |
206 | printk("Full inexact backtrace again:\n"); | 208 | ops->warning(data, "Full inexact backtrace again:\n"); |
207 | } else | 209 | } else |
208 | printk("Inexact backtrace:\n"); | 210 | ops->warning(data, "Inexact backtrace:\n"); |
211 | } | ||
212 | if (!stack) { | ||
213 | unsigned long dummy; | ||
214 | stack = &dummy; | ||
215 | if (task && task != current) | ||
216 | stack = (unsigned long *)task->thread.esp; | ||
209 | } | 217 | } |
210 | 218 | ||
211 | if (task == current) { | 219 | #ifdef CONFIG_FRAME_POINTER |
212 | /* Grab ebp right from our regs */ | 220 | if (!ebp) { |
213 | asm ("movl %%ebp, %0" : "=r" (ebp) : ); | 221 | if (task == current) { |
214 | } else { | 222 | /* Grab ebp right from our regs */ |
215 | /* ebp is the last reg pushed by switch_to */ | 223 | asm ("movl %%ebp, %0" : "=r" (ebp) : ); |
216 | ebp = *(unsigned long *) task->thread.esp; | 224 | } else { |
225 | /* ebp is the last reg pushed by switch_to */ | ||
226 | ebp = *(unsigned long *) task->thread.esp; | ||
227 | } | ||
217 | } | 228 | } |
229 | #endif | ||
218 | 230 | ||
219 | while (1) { | 231 | while (1) { |
220 | struct thread_info *context; | 232 | struct thread_info *context; |
221 | context = (struct thread_info *) | 233 | context = (struct thread_info *) |
222 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); | 234 | ((unsigned long)stack & (~(THREAD_SIZE - 1))); |
223 | ebp = print_context_stack(context, stack, ebp, log_lvl); | 235 | ebp = print_context_stack(context, stack, ebp, ops, data); |
236 | /* Should be after the line below, but somewhere | ||
237 | in early boot context comes out corrupted and we | ||
238 | can't reference it -AK */ | ||
239 | if (ops->stack(data, "IRQ") < 0) | ||
240 | break; | ||
224 | stack = (unsigned long*)context->previous_esp; | 241 | stack = (unsigned long*)context->previous_esp; |
225 | if (!stack) | 242 | if (!stack) |
226 | break; | 243 | break; |
227 | printk("%s =======================\n", log_lvl); | ||
228 | } | 244 | } |
229 | } | 245 | } |
246 | EXPORT_SYMBOL(dump_trace); | ||
247 | |||
248 | static void | ||
249 | print_trace_warning_symbol(void *data, char *msg, unsigned long symbol) | ||
250 | { | ||
251 | printk(data); | ||
252 | print_symbol(msg, symbol); | ||
253 | printk("\n"); | ||
254 | } | ||
255 | |||
256 | static void print_trace_warning(void *data, char *msg) | ||
257 | { | ||
258 | printk("%s%s\n", (char *)data, msg); | ||
259 | } | ||
260 | |||
261 | static int print_trace_stack(void *data, char *name) | ||
262 | { | ||
263 | return 0; | ||
264 | } | ||
265 | |||
266 | /* | ||
267 | * Print one address/symbol entries per line. | ||
268 | */ | ||
269 | static void print_trace_address(void *data, unsigned long addr) | ||
270 | { | ||
271 | printk("%s [<%08lx>] ", (char *)data, addr); | ||
272 | print_symbol("%s\n", addr); | ||
273 | } | ||
274 | |||
275 | static struct stacktrace_ops print_trace_ops = { | ||
276 | .warning = print_trace_warning, | ||
277 | .warning_symbol = print_trace_warning_symbol, | ||
278 | .stack = print_trace_stack, | ||
279 | .address = print_trace_address, | ||
280 | }; | ||
281 | |||
282 | static void | ||
283 | show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, | ||
284 | unsigned long * stack, char *log_lvl) | ||
285 | { | ||
286 | dump_trace(task, regs, stack, &print_trace_ops, log_lvl); | ||
287 | printk("%s =======================\n", log_lvl); | ||
288 | } | ||
230 | 289 | ||
231 | void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack) | 290 | void show_trace(struct task_struct *task, struct pt_regs *regs, |
291 | unsigned long * stack) | ||
232 | { | 292 | { |
233 | show_trace_log_lvl(task, regs, stack, ""); | 293 | show_trace_log_lvl(task, regs, stack, ""); |
234 | } | 294 | } |
@@ -291,8 +351,9 @@ void show_registers(struct pt_regs *regs) | |||
291 | ss = regs->xss & 0xffff; | 351 | ss = regs->xss & 0xffff; |
292 | } | 352 | } |
293 | print_modules(); | 353 | print_modules(); |
294 | printk(KERN_EMERG "CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\n" | 354 | printk(KERN_EMERG "CPU: %d\n" |
295 | "EFLAGS: %08lx (%s %.*s) \n", | 355 | KERN_EMERG "EIP: %04x:[<%08lx>] %s VLI\n" |
356 | KERN_EMERG "EFLAGS: %08lx (%s %.*s)\n", | ||
296 | smp_processor_id(), 0xffff & regs->xcs, regs->eip, | 357 | smp_processor_id(), 0xffff & regs->xcs, regs->eip, |
297 | print_tainted(), regs->eflags, system_utsname.release, | 358 | print_tainted(), regs->eflags, system_utsname.release, |
298 | (int)strcspn(system_utsname.version, " "), | 359 | (int)strcspn(system_utsname.version, " "), |
@@ -634,18 +695,24 @@ gp_in_kernel: | |||
634 | } | 695 | } |
635 | } | 696 | } |
636 | 697 | ||
637 | static void mem_parity_error(unsigned char reason, struct pt_regs * regs) | 698 | static __kprobes void |
699 | mem_parity_error(unsigned char reason, struct pt_regs * regs) | ||
638 | { | 700 | { |
639 | printk(KERN_EMERG "Uhhuh. NMI received. Dazed and confused, but trying " | 701 | printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on " |
640 | "to continue\n"); | 702 | "CPU %d.\n", reason, smp_processor_id()); |
641 | printk(KERN_EMERG "You probably have a hardware problem with your RAM " | 703 | printk(KERN_EMERG "You probably have a hardware problem with your RAM " |
642 | "chips\n"); | 704 | "chips\n"); |
705 | if (panic_on_unrecovered_nmi) | ||
706 | panic("NMI: Not continuing"); | ||
707 | |||
708 | printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); | ||
643 | 709 | ||
644 | /* Clear and disable the memory parity error line. */ | 710 | /* Clear and disable the memory parity error line. */ |
645 | clear_mem_error(reason); | 711 | clear_mem_error(reason); |
646 | } | 712 | } |
647 | 713 | ||
648 | static void io_check_error(unsigned char reason, struct pt_regs * regs) | 714 | static __kprobes void |
715 | io_check_error(unsigned char reason, struct pt_regs * regs) | ||
649 | { | 716 | { |
650 | unsigned long i; | 717 | unsigned long i; |
651 | 718 | ||
@@ -661,7 +728,8 @@ static void io_check_error(unsigned char reason, struct pt_regs * regs) | |||
661 | outb(reason, 0x61); | 728 | outb(reason, 0x61); |
662 | } | 729 | } |
663 | 730 | ||
664 | static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) | 731 | static __kprobes void |
732 | unknown_nmi_error(unsigned char reason, struct pt_regs * regs) | ||
665 | { | 733 | { |
666 | #ifdef CONFIG_MCA | 734 | #ifdef CONFIG_MCA |
667 | /* Might actually be able to figure out what the guilty party | 735 | /* Might actually be able to figure out what the guilty party |
@@ -671,15 +739,18 @@ static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) | |||
671 | return; | 739 | return; |
672 | } | 740 | } |
673 | #endif | 741 | #endif |
674 | printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", | 742 | printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on " |
675 | reason, smp_processor_id()); | 743 | "CPU %d.\n", reason, smp_processor_id()); |
676 | printk("Dazed and confused, but trying to continue\n"); | 744 | printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n"); |
677 | printk("Do you have a strange power saving mode enabled?\n"); | 745 | if (panic_on_unrecovered_nmi) |
746 | panic("NMI: Not continuing"); | ||
747 | |||
748 | printk(KERN_EMERG "Dazed and confused, but trying to continue\n"); | ||
678 | } | 749 | } |
679 | 750 | ||
680 | static DEFINE_SPINLOCK(nmi_print_lock); | 751 | static DEFINE_SPINLOCK(nmi_print_lock); |
681 | 752 | ||
682 | void die_nmi (struct pt_regs *regs, const char *msg) | 753 | void __kprobes die_nmi(struct pt_regs *regs, const char *msg) |
683 | { | 754 | { |
684 | if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == | 755 | if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == |
685 | NOTIFY_STOP) | 756 | NOTIFY_STOP) |
@@ -711,7 +782,7 @@ void die_nmi (struct pt_regs *regs, const char *msg) | |||
711 | do_exit(SIGSEGV); | 782 | do_exit(SIGSEGV); |
712 | } | 783 | } |
713 | 784 | ||
714 | static void default_do_nmi(struct pt_regs * regs) | 785 | static __kprobes void default_do_nmi(struct pt_regs * regs) |
715 | { | 786 | { |
716 | unsigned char reason = 0; | 787 | unsigned char reason = 0; |
717 | 788 | ||
@@ -728,12 +799,12 @@ static void default_do_nmi(struct pt_regs * regs) | |||
728 | * Ok, so this is none of the documented NMI sources, | 799 | * Ok, so this is none of the documented NMI sources, |
729 | * so it must be the NMI watchdog. | 800 | * so it must be the NMI watchdog. |
730 | */ | 801 | */ |
731 | if (nmi_watchdog) { | 802 | if (nmi_watchdog_tick(regs, reason)) |
732 | nmi_watchdog_tick(regs); | ||
733 | return; | 803 | return; |
734 | } | 804 | if (!do_nmi_callback(regs, smp_processor_id())) |
735 | #endif | 805 | #endif |
736 | unknown_nmi_error(reason, regs); | 806 | unknown_nmi_error(reason, regs); |
807 | |||
737 | return; | 808 | return; |
738 | } | 809 | } |
739 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) | 810 | if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) |
@@ -749,14 +820,7 @@ static void default_do_nmi(struct pt_regs * regs) | |||
749 | reassert_nmi(); | 820 | reassert_nmi(); |
750 | } | 821 | } |
751 | 822 | ||
752 | static int dummy_nmi_callback(struct pt_regs * regs, int cpu) | 823 | fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code) |
753 | { | ||
754 | return 0; | ||
755 | } | ||
756 | |||
757 | static nmi_callback_t nmi_callback = dummy_nmi_callback; | ||
758 | |||
759 | fastcall void do_nmi(struct pt_regs * regs, long error_code) | ||
760 | { | 824 | { |
761 | int cpu; | 825 | int cpu; |
762 | 826 | ||
@@ -766,25 +830,11 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code) | |||
766 | 830 | ||
767 | ++nmi_count(cpu); | 831 | ++nmi_count(cpu); |
768 | 832 | ||
769 | if (!rcu_dereference(nmi_callback)(regs, cpu)) | 833 | default_do_nmi(regs); |
770 | default_do_nmi(regs); | ||
771 | 834 | ||
772 | nmi_exit(); | 835 | nmi_exit(); |
773 | } | 836 | } |
774 | 837 | ||
775 | void set_nmi_callback(nmi_callback_t callback) | ||
776 | { | ||
777 | vmalloc_sync_all(); | ||
778 | rcu_assign_pointer(nmi_callback, callback); | ||
779 | } | ||
780 | EXPORT_SYMBOL_GPL(set_nmi_callback); | ||
781 | |||
782 | void unset_nmi_callback(void) | ||
783 | { | ||
784 | nmi_callback = dummy_nmi_callback; | ||
785 | } | ||
786 | EXPORT_SYMBOL_GPL(unset_nmi_callback); | ||
787 | |||
788 | #ifdef CONFIG_KPROBES | 838 | #ifdef CONFIG_KPROBES |
789 | fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code) | 839 | fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code) |
790 | { | 840 | { |
@@ -1124,20 +1174,6 @@ void __init trap_init_f00f_bug(void) | |||
1124 | } | 1174 | } |
1125 | #endif | 1175 | #endif |
1126 | 1176 | ||
1127 | #define _set_gate(gate_addr,type,dpl,addr,seg) \ | ||
1128 | do { \ | ||
1129 | int __d0, __d1; \ | ||
1130 | __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \ | ||
1131 | "movw %4,%%dx\n\t" \ | ||
1132 | "movl %%eax,%0\n\t" \ | ||
1133 | "movl %%edx,%1" \ | ||
1134 | :"=m" (*((long *) (gate_addr))), \ | ||
1135 | "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \ | ||
1136 | :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \ | ||
1137 | "3" ((char *) (addr)),"2" ((seg) << 16)); \ | ||
1138 | } while (0) | ||
1139 | |||
1140 | |||
1141 | /* | 1177 | /* |
1142 | * This needs to use 'idt_table' rather than 'idt', and | 1178 | * This needs to use 'idt_table' rather than 'idt', and |
1143 | * thus use the _nonmapped_ version of the IDT, as the | 1179 | * thus use the _nonmapped_ version of the IDT, as the |
@@ -1146,7 +1182,7 @@ do { \ | |||
1146 | */ | 1182 | */ |
1147 | void set_intr_gate(unsigned int n, void *addr) | 1183 | void set_intr_gate(unsigned int n, void *addr) |
1148 | { | 1184 | { |
1149 | _set_gate(idt_table+n,14,0,addr,__KERNEL_CS); | 1185 | _set_gate(n, DESCTYPE_INT, addr, __KERNEL_CS); |
1150 | } | 1186 | } |
1151 | 1187 | ||
1152 | /* | 1188 | /* |
@@ -1154,22 +1190,22 @@ void set_intr_gate(unsigned int n, void *addr) | |||
1154 | */ | 1190 | */ |
1155 | static inline void set_system_intr_gate(unsigned int n, void *addr) | 1191 | static inline void set_system_intr_gate(unsigned int n, void *addr) |
1156 | { | 1192 | { |
1157 | _set_gate(idt_table+n, 14, 3, addr, __KERNEL_CS); | 1193 | _set_gate(n, DESCTYPE_INT | DESCTYPE_DPL3, addr, __KERNEL_CS); |
1158 | } | 1194 | } |
1159 | 1195 | ||
1160 | static void __init set_trap_gate(unsigned int n, void *addr) | 1196 | static void __init set_trap_gate(unsigned int n, void *addr) |
1161 | { | 1197 | { |
1162 | _set_gate(idt_table+n,15,0,addr,__KERNEL_CS); | 1198 | _set_gate(n, DESCTYPE_TRAP, addr, __KERNEL_CS); |
1163 | } | 1199 | } |
1164 | 1200 | ||
1165 | static void __init set_system_gate(unsigned int n, void *addr) | 1201 | static void __init set_system_gate(unsigned int n, void *addr) |
1166 | { | 1202 | { |
1167 | _set_gate(idt_table+n,15,3,addr,__KERNEL_CS); | 1203 | _set_gate(n, DESCTYPE_TRAP | DESCTYPE_DPL3, addr, __KERNEL_CS); |
1168 | } | 1204 | } |
1169 | 1205 | ||
1170 | static void __init set_task_gate(unsigned int n, unsigned int gdt_entry) | 1206 | static void __init set_task_gate(unsigned int n, unsigned int gdt_entry) |
1171 | { | 1207 | { |
1172 | _set_gate(idt_table+n,5,0,0,(gdt_entry<<3)); | 1208 | _set_gate(n, DESCTYPE_TASK, (void *)0, (gdt_entry<<3)); |
1173 | } | 1209 | } |
1174 | 1210 | ||
1175 | 1211 | ||
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c index 7e0d8dab2075..b8fa0a8b2e47 100644 --- a/arch/i386/kernel/tsc.c +++ b/arch/i386/kernel/tsc.c | |||
@@ -192,7 +192,7 @@ int recalibrate_cpu_khz(void) | |||
192 | 192 | ||
193 | EXPORT_SYMBOL(recalibrate_cpu_khz); | 193 | EXPORT_SYMBOL(recalibrate_cpu_khz); |
194 | 194 | ||
195 | void tsc_init(void) | 195 | void __init tsc_init(void) |
196 | { | 196 | { |
197 | if (!cpu_has_tsc || tsc_disable) | 197 | if (!cpu_has_tsc || tsc_disable) |
198 | return; | 198 | return; |