aboutsummaryrefslogtreecommitdiffstats
path: root/arch/i386/kernel
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@g5.osdl.org>2006-09-26 16:07:55 -0400
committerLinus Torvalds <torvalds@g5.osdl.org>2006-09-26 16:07:55 -0400
commitb278240839e20fa9384ea430df463b367b90e04e (patch)
treef99f0c8cdd4cc7f177cd75440e6bd181cded7fb3 /arch/i386/kernel
parentdd77a4ee0f3981693d4229aa1d57cea9e526ff47 (diff)
parent3f75f42d7733e73aca5c78326489efd4189e0111 (diff)
Merge branch 'for-linus' of git://one.firstfloor.org/home/andi/git/linux-2.6
* 'for-linus' of git://one.firstfloor.org/home/andi/git/linux-2.6: (225 commits) [PATCH] Don't set calgary iommu as default y [PATCH] i386/x86-64: New Intel feature flags [PATCH] x86: Add a cumulative thermal throttle event counter. [PATCH] i386: Make the jiffies compares use the 64bit safe macros. [PATCH] x86: Refactor thermal throttle processing [PATCH] Add 64bit jiffies compares (for use with get_jiffies_64) [PATCH] Fix unwinder warning in traps.c [PATCH] x86: Allow disabling early pci scans with pci=noearly or disallowing conf1 [PATCH] x86: Move direct PCI scanning functions out of line [PATCH] i386/x86-64: Make all early PCI scans dependent on CONFIG_PCI [PATCH] Don't leak NT bit into next task [PATCH] i386/x86-64: Work around gcc bug with noreturn functions in unwinder [PATCH] Fix some broken white space in ia32_signal.c [PATCH] Initialize argument registers for 32bit signal handlers. [PATCH] Remove all traces of signal number conversion [PATCH] Don't synchronize time reading on single core AMD systems [PATCH] Remove outdated comment in x86-64 mmconfig code [PATCH] Use string instructions for Core2 copy/clear [PATCH] x86: - restore i8259A eoi status on resume [PATCH] i386: Split multi-line printk in oops output. ...
Diffstat (limited to 'arch/i386/kernel')
-rw-r--r--arch/i386/kernel/Makefile3
-rw-r--r--arch/i386/kernel/acpi/Makefile2
-rw-r--r--arch/i386/kernel/acpi/boot.c181
-rw-r--r--arch/i386/kernel/acpi/earlyquirk.c6
-rw-r--r--arch/i386/kernel/apic.c31
-rw-r--r--arch/i386/kernel/cpu/amd.c7
-rw-r--r--arch/i386/kernel/cpu/centaur.c24
-rw-r--r--arch/i386/kernel/cpu/common.c8
-rw-r--r--arch/i386/kernel/cpu/cpu.h2
-rw-r--r--arch/i386/kernel/cpu/cyrix.c42
-rw-r--r--arch/i386/kernel/cpu/intel.c3
-rw-r--r--arch/i386/kernel/cpu/mcheck/Makefile2
-rw-r--r--arch/i386/kernel/cpu/mcheck/p4.c26
-rw-r--r--arch/i386/kernel/cpu/mcheck/therm_throt.c180
-rw-r--r--arch/i386/kernel/cpu/nexgen.c9
-rw-r--r--arch/i386/kernel/cpu/proc.c4
-rw-r--r--arch/i386/kernel/cpu/rise.c4
-rw-r--r--arch/i386/kernel/cpu/transmeta.c7
-rw-r--r--arch/i386/kernel/cpu/umc.c7
-rw-r--r--arch/i386/kernel/crash.c22
-rw-r--r--arch/i386/kernel/entry.S110
-rw-r--r--arch/i386/kernel/head.S67
-rw-r--r--arch/i386/kernel/i8259.c6
-rw-r--r--arch/i386/kernel/io_apic.c125
-rw-r--r--arch/i386/kernel/machine_kexec.c140
-rw-r--r--arch/i386/kernel/mca.c8
-rw-r--r--arch/i386/kernel/mpparse.c70
-rw-r--r--arch/i386/kernel/nmi.c940
-rw-r--r--arch/i386/kernel/process.c14
-rw-r--r--arch/i386/kernel/ptrace.c10
-rw-r--r--arch/i386/kernel/relocate_kernel.S162
-rw-r--r--arch/i386/kernel/semaphore.c134
-rw-r--r--arch/i386/kernel/setup.c367
-rw-r--r--arch/i386/kernel/smpboot.c19
-rw-r--r--arch/i386/kernel/stacktrace.c98
-rw-r--r--arch/i386/kernel/syscall_table.S1
-rw-r--r--arch/i386/kernel/time.c23
-rw-r--r--arch/i386/kernel/topology.c21
-rw-r--r--arch/i386/kernel/traps.c224
-rw-r--r--arch/i386/kernel/tsc.c2
40 files changed, 1865 insertions, 1246 deletions
diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile
index 5427a842e841..1a884b6e6e5c 100644
--- a/arch/i386/kernel/Makefile
+++ b/arch/i386/kernel/Makefile
@@ -4,7 +4,7 @@
4 4
5extra-y := head.o init_task.o vmlinux.lds 5extra-y := head.o init_task.o vmlinux.lds
6 6
7obj-y := process.o semaphore.o signal.o entry.o traps.o irq.o \ 7obj-y := process.o signal.o entry.o traps.o irq.o \
8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \ 8 ptrace.o time.o ioport.o ldt.o setup.o i8259.o sys_i386.o \
9 pci-dma.o i386_ksyms.o i387.o bootflag.o \ 9 pci-dma.o i386_ksyms.o i387.o bootflag.o \
10 quirks.o i8237.o topology.o alternative.o i8253.o tsc.o 10 quirks.o i8237.o topology.o alternative.o i8253.o tsc.o
@@ -81,4 +81,5 @@ $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \
81 $(call if_changed,syscall) 81 $(call if_changed,syscall)
82 82
83k8-y += ../../x86_64/kernel/k8.o 83k8-y += ../../x86_64/kernel/k8.o
84stacktrace-y += ../../x86_64/kernel/stacktrace.o
84 85
diff --git a/arch/i386/kernel/acpi/Makefile b/arch/i386/kernel/acpi/Makefile
index 7e9ac99354f4..7f7be01f44e6 100644
--- a/arch/i386/kernel/acpi/Makefile
+++ b/arch/i386/kernel/acpi/Makefile
@@ -1,5 +1,7 @@
1obj-$(CONFIG_ACPI) += boot.o 1obj-$(CONFIG_ACPI) += boot.o
2ifneq ($(CONFIG_PCI),)
2obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o 3obj-$(CONFIG_X86_IO_APIC) += earlyquirk.o
4endif
3obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o 5obj-$(CONFIG_ACPI_SLEEP) += sleep.o wakeup.o
4 6
5ifneq ($(CONFIG_ACPI_PROCESSOR),) 7ifneq ($(CONFIG_ACPI_PROCESSOR),)
diff --git a/arch/i386/kernel/acpi/boot.c b/arch/i386/kernel/acpi/boot.c
index ee003bc0e8b1..1aaea6ab8c46 100644
--- a/arch/i386/kernel/acpi/boot.c
+++ b/arch/i386/kernel/acpi/boot.c
@@ -26,9 +26,12 @@
26#include <linux/init.h> 26#include <linux/init.h>
27#include <linux/acpi.h> 27#include <linux/acpi.h>
28#include <linux/efi.h> 28#include <linux/efi.h>
29#include <linux/cpumask.h>
29#include <linux/module.h> 30#include <linux/module.h>
30#include <linux/dmi.h> 31#include <linux/dmi.h>
31#include <linux/irq.h> 32#include <linux/irq.h>
33#include <linux/bootmem.h>
34#include <linux/ioport.h>
32 35
33#include <asm/pgtable.h> 36#include <asm/pgtable.h>
34#include <asm/io_apic.h> 37#include <asm/io_apic.h>
@@ -36,11 +39,17 @@
36#include <asm/io.h> 39#include <asm/io.h>
37#include <asm/mpspec.h> 40#include <asm/mpspec.h>
38 41
39#ifdef CONFIG_X86_64 42static int __initdata acpi_force = 0;
40 43
41extern void __init clustered_apic_check(void); 44#ifdef CONFIG_ACPI
45int acpi_disabled = 0;
46#else
47int acpi_disabled = 1;
48#endif
49EXPORT_SYMBOL(acpi_disabled);
50
51#ifdef CONFIG_X86_64
42 52
43extern int gsi_irq_sharing(int gsi);
44#include <asm/proto.h> 53#include <asm/proto.h>
45 54
46static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; } 55static inline int acpi_madt_oem_check(char *oem_id, char *oem_table_id) { return 0; }
@@ -506,16 +515,76 @@ EXPORT_SYMBOL(acpi_register_gsi);
506#ifdef CONFIG_ACPI_HOTPLUG_CPU 515#ifdef CONFIG_ACPI_HOTPLUG_CPU
507int acpi_map_lsapic(acpi_handle handle, int *pcpu) 516int acpi_map_lsapic(acpi_handle handle, int *pcpu)
508{ 517{
509 /* TBD */ 518 struct acpi_buffer buffer = { ACPI_ALLOCATE_BUFFER, NULL };
510 return -EINVAL; 519 union acpi_object *obj;
520 struct acpi_table_lapic *lapic;
521 cpumask_t tmp_map, new_map;
522 u8 physid;
523 int cpu;
524
525 if (ACPI_FAILURE(acpi_evaluate_object(handle, "_MAT", NULL, &buffer)))
526 return -EINVAL;
527
528 if (!buffer.length || !buffer.pointer)
529 return -EINVAL;
530
531 obj = buffer.pointer;
532 if (obj->type != ACPI_TYPE_BUFFER ||
533 obj->buffer.length < sizeof(*lapic)) {
534 kfree(buffer.pointer);
535 return -EINVAL;
536 }
537
538 lapic = (struct acpi_table_lapic *)obj->buffer.pointer;
539
540 if ((lapic->header.type != ACPI_MADT_LAPIC) ||
541 (!lapic->flags.enabled)) {
542 kfree(buffer.pointer);
543 return -EINVAL;
544 }
545
546 physid = lapic->id;
547
548 kfree(buffer.pointer);
549 buffer.length = ACPI_ALLOCATE_BUFFER;
550 buffer.pointer = NULL;
551
552 tmp_map = cpu_present_map;
553 mp_register_lapic(physid, lapic->flags.enabled);
554
555 /*
556 * If mp_register_lapic successfully generates a new logical cpu
557 * number, then the following will get us exactly what was mapped
558 */
559 cpus_andnot(new_map, cpu_present_map, tmp_map);
560 if (cpus_empty(new_map)) {
561 printk ("Unable to map lapic to logical cpu number\n");
562 return -EINVAL;
563 }
564
565 cpu = first_cpu(new_map);
566
567 *pcpu = cpu;
568 return 0;
511} 569}
512 570
513EXPORT_SYMBOL(acpi_map_lsapic); 571EXPORT_SYMBOL(acpi_map_lsapic);
514 572
515int acpi_unmap_lsapic(int cpu) 573int acpi_unmap_lsapic(int cpu)
516{ 574{
517 /* TBD */ 575 int i;
518 return -EINVAL; 576
577 for_each_possible_cpu(i) {
578 if (x86_acpiid_to_apicid[i] == x86_cpu_to_apicid[cpu]) {
579 x86_acpiid_to_apicid[i] = -1;
580 break;
581 }
582 }
583 x86_cpu_to_apicid[cpu] = -1;
584 cpu_clear(cpu, cpu_present_map);
585 num_processors--;
586
587 return (0);
519} 588}
520 589
521EXPORT_SYMBOL(acpi_unmap_lsapic); 590EXPORT_SYMBOL(acpi_unmap_lsapic);
@@ -579,6 +648,8 @@ static int __init acpi_parse_sbf(unsigned long phys_addr, unsigned long size)
579static int __init acpi_parse_hpet(unsigned long phys, unsigned long size) 648static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
580{ 649{
581 struct acpi_table_hpet *hpet_tbl; 650 struct acpi_table_hpet *hpet_tbl;
651 struct resource *hpet_res;
652 resource_size_t res_start;
582 653
583 if (!phys || !size) 654 if (!phys || !size)
584 return -EINVAL; 655 return -EINVAL;
@@ -594,12 +665,26 @@ static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
594 "memory.\n"); 665 "memory.\n");
595 return -1; 666 return -1;
596 } 667 }
668
669#define HPET_RESOURCE_NAME_SIZE 9
670 hpet_res = alloc_bootmem(sizeof(*hpet_res) + HPET_RESOURCE_NAME_SIZE);
671 if (hpet_res) {
672 memset(hpet_res, 0, sizeof(*hpet_res));
673 hpet_res->name = (void *)&hpet_res[1];
674 hpet_res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
675 snprintf((char *)hpet_res->name, HPET_RESOURCE_NAME_SIZE,
676 "HPET %u", hpet_tbl->number);
677 hpet_res->end = (1 * 1024) - 1;
678 }
679
597#ifdef CONFIG_X86_64 680#ifdef CONFIG_X86_64
598 vxtime.hpet_address = hpet_tbl->addr.addrl | 681 vxtime.hpet_address = hpet_tbl->addr.addrl |
599 ((long)hpet_tbl->addr.addrh << 32); 682 ((long)hpet_tbl->addr.addrh << 32);
600 683
601 printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", 684 printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
602 hpet_tbl->id, vxtime.hpet_address); 685 hpet_tbl->id, vxtime.hpet_address);
686
687 res_start = vxtime.hpet_address;
603#else /* X86 */ 688#else /* X86 */
604 { 689 {
605 extern unsigned long hpet_address; 690 extern unsigned long hpet_address;
@@ -607,9 +692,17 @@ static int __init acpi_parse_hpet(unsigned long phys, unsigned long size)
607 hpet_address = hpet_tbl->addr.addrl; 692 hpet_address = hpet_tbl->addr.addrl;
608 printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n", 693 printk(KERN_INFO PREFIX "HPET id: %#x base: %#lx\n",
609 hpet_tbl->id, hpet_address); 694 hpet_tbl->id, hpet_address);
695
696 res_start = hpet_address;
610 } 697 }
611#endif /* X86 */ 698#endif /* X86 */
612 699
700 if (hpet_res) {
701 hpet_res->start = res_start;
702 hpet_res->end += res_start;
703 insert_resource(&iomem_resource, hpet_res);
704 }
705
613 return 0; 706 return 0;
614} 707}
615#else 708#else
@@ -860,8 +953,6 @@ static void __init acpi_process_madt(void)
860 return; 953 return;
861} 954}
862 955
863extern int acpi_force;
864
865#ifdef __i386__ 956#ifdef __i386__
866 957
867static int __init disable_acpi_irq(struct dmi_system_id *d) 958static int __init disable_acpi_irq(struct dmi_system_id *d)
@@ -1163,3 +1254,75 @@ int __init acpi_boot_init(void)
1163 1254
1164 return 0; 1255 return 0;
1165} 1256}
1257
1258static int __init parse_acpi(char *arg)
1259{
1260 if (!arg)
1261 return -EINVAL;
1262
1263 /* "acpi=off" disables both ACPI table parsing and interpreter */
1264 if (strcmp(arg, "off") == 0) {
1265 disable_acpi();
1266 }
1267 /* acpi=force to over-ride black-list */
1268 else if (strcmp(arg, "force") == 0) {
1269 acpi_force = 1;
1270 acpi_ht = 1;
1271 acpi_disabled = 0;
1272 }
1273 /* acpi=strict disables out-of-spec workarounds */
1274 else if (strcmp(arg, "strict") == 0) {
1275 acpi_strict = 1;
1276 }
1277 /* Limit ACPI just to boot-time to enable HT */
1278 else if (strcmp(arg, "ht") == 0) {
1279 if (!acpi_force)
1280 disable_acpi();
1281 acpi_ht = 1;
1282 }
1283 /* "acpi=noirq" disables ACPI interrupt routing */
1284 else if (strcmp(arg, "noirq") == 0) {
1285 acpi_noirq_set();
1286 } else {
1287 /* Core will printk when we return error. */
1288 return -EINVAL;
1289 }
1290 return 0;
1291}
1292early_param("acpi", parse_acpi);
1293
1294/* FIXME: Using pci= for an ACPI parameter is a travesty. */
1295static int __init parse_pci(char *arg)
1296{
1297 if (arg && strcmp(arg, "noacpi") == 0)
1298 acpi_disable_pci();
1299 return 0;
1300}
1301early_param("pci", parse_pci);
1302
1303#ifdef CONFIG_X86_IO_APIC
1304static int __init parse_acpi_skip_timer_override(char *arg)
1305{
1306 acpi_skip_timer_override = 1;
1307 return 0;
1308}
1309early_param("acpi_skip_timer_override", parse_acpi_skip_timer_override);
1310#endif /* CONFIG_X86_IO_APIC */
1311
1312static int __init setup_acpi_sci(char *s)
1313{
1314 if (!s)
1315 return -EINVAL;
1316 if (!strcmp(s, "edge"))
1317 acpi_sci_flags.trigger = 1;
1318 else if (!strcmp(s, "level"))
1319 acpi_sci_flags.trigger = 3;
1320 else if (!strcmp(s, "high"))
1321 acpi_sci_flags.polarity = 1;
1322 else if (!strcmp(s, "low"))
1323 acpi_sci_flags.polarity = 3;
1324 else
1325 return -EINVAL;
1326 return 0;
1327}
1328early_param("acpi_sci", setup_acpi_sci);
diff --git a/arch/i386/kernel/acpi/earlyquirk.c b/arch/i386/kernel/acpi/earlyquirk.c
index 1649a175a206..fe799b11ac0a 100644
--- a/arch/i386/kernel/acpi/earlyquirk.c
+++ b/arch/i386/kernel/acpi/earlyquirk.c
@@ -48,7 +48,11 @@ void __init check_acpi_pci(void)
48 int num, slot, func; 48 int num, slot, func;
49 49
50 /* Assume the machine supports type 1. If not it will 50 /* Assume the machine supports type 1. If not it will
51 always read ffffffff and should not have any side effect. */ 51 always read ffffffff and should not have any side effect.
52 Actually a few buggy systems can machine check. Allow the user
53 to disable it by command line option at least -AK */
54 if (!early_pci_allowed())
55 return;
52 56
53 /* Poor man's PCI discovery */ 57 /* Poor man's PCI discovery */
54 for (num = 0; num < 32; num++) { 58 for (num = 0; num < 32; num++) {
diff --git a/arch/i386/kernel/apic.c b/arch/i386/kernel/apic.c
index 8c844d07862f..90faae5c5d30 100644
--- a/arch/i386/kernel/apic.c
+++ b/arch/i386/kernel/apic.c
@@ -52,7 +52,18 @@ static cpumask_t timer_bcast_ipi;
52/* 52/*
53 * Knob to control our willingness to enable the local APIC. 53 * Knob to control our willingness to enable the local APIC.
54 */ 54 */
55int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */ 55static int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
56
57static inline void lapic_disable(void)
58{
59 enable_local_apic = -1;
60 clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
61}
62
63static inline void lapic_enable(void)
64{
65 enable_local_apic = 1;
66}
56 67
57/* 68/*
58 * Debug level 69 * Debug level
@@ -586,8 +597,7 @@ void __devinit setup_local_APIC(void)
586 printk("No ESR for 82489DX.\n"); 597 printk("No ESR for 82489DX.\n");
587 } 598 }
588 599
589 if (nmi_watchdog == NMI_LOCAL_APIC) 600 setup_apic_nmi_watchdog(NULL);
590 setup_apic_nmi_watchdog();
591 apic_pm_activate(); 601 apic_pm_activate();
592} 602}
593 603
@@ -1373,3 +1383,18 @@ int __init APIC_init_uniprocessor (void)
1373 1383
1374 return 0; 1384 return 0;
1375} 1385}
1386
1387static int __init parse_lapic(char *arg)
1388{
1389 lapic_enable();
1390 return 0;
1391}
1392early_param("lapic", parse_lapic);
1393
1394static int __init parse_nolapic(char *arg)
1395{
1396 lapic_disable();
1397 return 0;
1398}
1399early_param("nolapic", parse_nolapic);
1400
diff --git a/arch/i386/kernel/cpu/amd.c b/arch/i386/kernel/cpu/amd.c
index e6a2d6b80cda..e4758095d87a 100644
--- a/arch/i386/kernel/cpu/amd.c
+++ b/arch/i386/kernel/cpu/amd.c
@@ -22,7 +22,7 @@
22extern void vide(void); 22extern void vide(void);
23__asm__(".align 4\nvide: ret"); 23__asm__(".align 4\nvide: ret");
24 24
25static void __init init_amd(struct cpuinfo_x86 *c) 25static void __cpuinit init_amd(struct cpuinfo_x86 *c)
26{ 26{
27 u32 l, h; 27 u32 l, h;
28 int mbytes = num_physpages >> (20-PAGE_SHIFT); 28 int mbytes = num_physpages >> (20-PAGE_SHIFT);
@@ -246,7 +246,7 @@ static void __init init_amd(struct cpuinfo_x86 *c)
246 num_cache_leaves = 3; 246 num_cache_leaves = 3;
247} 247}
248 248
249static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size) 249static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
250{ 250{
251 /* AMD errata T13 (order #21922) */ 251 /* AMD errata T13 (order #21922) */
252 if ((c->x86 == 6)) { 252 if ((c->x86 == 6)) {
@@ -259,7 +259,7 @@ static unsigned int amd_size_cache(struct cpuinfo_x86 * c, unsigned int size)
259 return size; 259 return size;
260} 260}
261 261
262static struct cpu_dev amd_cpu_dev __initdata = { 262static struct cpu_dev amd_cpu_dev __cpuinitdata = {
263 .c_vendor = "AMD", 263 .c_vendor = "AMD",
264 .c_ident = { "AuthenticAMD" }, 264 .c_ident = { "AuthenticAMD" },
265 .c_models = { 265 .c_models = {
@@ -275,7 +275,6 @@ static struct cpu_dev amd_cpu_dev __initdata = {
275 }, 275 },
276 }, 276 },
277 .c_init = init_amd, 277 .c_init = init_amd,
278 .c_identify = generic_identify,
279 .c_size_cache = amd_size_cache, 278 .c_size_cache = amd_size_cache,
280}; 279};
281 280
diff --git a/arch/i386/kernel/cpu/centaur.c b/arch/i386/kernel/cpu/centaur.c
index bd75629dd262..8c25047975c0 100644
--- a/arch/i386/kernel/cpu/centaur.c
+++ b/arch/i386/kernel/cpu/centaur.c
@@ -9,7 +9,7 @@
9 9
10#ifdef CONFIG_X86_OOSTORE 10#ifdef CONFIG_X86_OOSTORE
11 11
12static u32 __init power2(u32 x) 12static u32 __cpuinit power2(u32 x)
13{ 13{
14 u32 s=1; 14 u32 s=1;
15 while(s<=x) 15 while(s<=x)
@@ -22,7 +22,7 @@ static u32 __init power2(u32 x)
22 * Set up an actual MCR 22 * Set up an actual MCR
23 */ 23 */
24 24
25static void __init centaur_mcr_insert(int reg, u32 base, u32 size, int key) 25static void __cpuinit centaur_mcr_insert(int reg, u32 base, u32 size, int key)
26{ 26{
27 u32 lo, hi; 27 u32 lo, hi;
28 28
@@ -40,7 +40,7 @@ static void __init centaur_mcr_insert(int reg, u32 base, u32 size, int key)
40 * Shortcut: We know you can't put 4Gig of RAM on a winchip 40 * Shortcut: We know you can't put 4Gig of RAM on a winchip
41 */ 41 */
42 42
43static u32 __init ramtop(void) /* 16388 */ 43static u32 __cpuinit ramtop(void) /* 16388 */
44{ 44{
45 int i; 45 int i;
46 u32 top = 0; 46 u32 top = 0;
@@ -91,7 +91,7 @@ static u32 __init ramtop(void) /* 16388 */
91 * Compute a set of MCR's to give maximum coverage 91 * Compute a set of MCR's to give maximum coverage
92 */ 92 */
93 93
94static int __init centaur_mcr_compute(int nr, int key) 94static int __cpuinit centaur_mcr_compute(int nr, int key)
95{ 95{
96 u32 mem = ramtop(); 96 u32 mem = ramtop();
97 u32 root = power2(mem); 97 u32 root = power2(mem);
@@ -166,7 +166,7 @@ static int __init centaur_mcr_compute(int nr, int key)
166 return ct; 166 return ct;
167} 167}
168 168
169static void __init centaur_create_optimal_mcr(void) 169static void __cpuinit centaur_create_optimal_mcr(void)
170{ 170{
171 int i; 171 int i;
172 /* 172 /*
@@ -189,7 +189,7 @@ static void __init centaur_create_optimal_mcr(void)
189 wrmsr(MSR_IDT_MCR0+i, 0, 0); 189 wrmsr(MSR_IDT_MCR0+i, 0, 0);
190} 190}
191 191
192static void __init winchip2_create_optimal_mcr(void) 192static void __cpuinit winchip2_create_optimal_mcr(void)
193{ 193{
194 u32 lo, hi; 194 u32 lo, hi;
195 int i; 195 int i;
@@ -227,7 +227,7 @@ static void __init winchip2_create_optimal_mcr(void)
227 * Handle the MCR key on the Winchip 2. 227 * Handle the MCR key on the Winchip 2.
228 */ 228 */
229 229
230static void __init winchip2_unprotect_mcr(void) 230static void __cpuinit winchip2_unprotect_mcr(void)
231{ 231{
232 u32 lo, hi; 232 u32 lo, hi;
233 u32 key; 233 u32 key;
@@ -239,7 +239,7 @@ static void __init winchip2_unprotect_mcr(void)
239 wrmsr(MSR_IDT_MCR_CTRL, lo, hi); 239 wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
240} 240}
241 241
242static void __init winchip2_protect_mcr(void) 242static void __cpuinit winchip2_protect_mcr(void)
243{ 243{
244 u32 lo, hi; 244 u32 lo, hi;
245 245
@@ -257,7 +257,7 @@ static void __init winchip2_protect_mcr(void)
257#define RNG_ENABLED (1 << 3) 257#define RNG_ENABLED (1 << 3)
258#define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */ 258#define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */
259 259
260static void __init init_c3(struct cpuinfo_x86 *c) 260static void __cpuinit init_c3(struct cpuinfo_x86 *c)
261{ 261{
262 u32 lo, hi; 262 u32 lo, hi;
263 263
@@ -303,7 +303,7 @@ static void __init init_c3(struct cpuinfo_x86 *c)
303 display_cacheinfo(c); 303 display_cacheinfo(c);
304} 304}
305 305
306static void __init init_centaur(struct cpuinfo_x86 *c) 306static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
307{ 307{
308 enum { 308 enum {
309 ECX8=1<<1, 309 ECX8=1<<1,
@@ -442,7 +442,7 @@ static void __init init_centaur(struct cpuinfo_x86 *c)
442 } 442 }
443} 443}
444 444
445static unsigned int centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size) 445static unsigned int __cpuinit centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size)
446{ 446{
447 /* VIA C3 CPUs (670-68F) need further shifting. */ 447 /* VIA C3 CPUs (670-68F) need further shifting. */
448 if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8))) 448 if ((c->x86 == 6) && ((c->x86_model == 7) || (c->x86_model == 8)))
@@ -457,7 +457,7 @@ static unsigned int centaur_size_cache(struct cpuinfo_x86 * c, unsigned int size
457 return size; 457 return size;
458} 458}
459 459
460static struct cpu_dev centaur_cpu_dev __initdata = { 460static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
461 .c_vendor = "Centaur", 461 .c_vendor = "Centaur",
462 .c_ident = { "CentaurHauls" }, 462 .c_ident = { "CentaurHauls" },
463 .c_init = init_centaur, 463 .c_init = init_centaur,
diff --git a/arch/i386/kernel/cpu/common.c b/arch/i386/kernel/cpu/common.c
index 70c87de582c7..2799baaadf45 100644
--- a/arch/i386/kernel/cpu/common.c
+++ b/arch/i386/kernel/cpu/common.c
@@ -36,7 +36,7 @@ struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {};
36 36
37extern int disable_pse; 37extern int disable_pse;
38 38
39static void default_init(struct cpuinfo_x86 * c) 39static void __cpuinit default_init(struct cpuinfo_x86 * c)
40{ 40{
41 /* Not much we can do here... */ 41 /* Not much we can do here... */
42 /* Check if at least it has cpuid */ 42 /* Check if at least it has cpuid */
@@ -49,7 +49,7 @@ static void default_init(struct cpuinfo_x86 * c)
49 } 49 }
50} 50}
51 51
52static struct cpu_dev default_cpu = { 52static struct cpu_dev __cpuinitdata default_cpu = {
53 .c_init = default_init, 53 .c_init = default_init,
54 .c_vendor = "Unknown", 54 .c_vendor = "Unknown",
55}; 55};
@@ -265,7 +265,7 @@ static void __init early_cpu_detect(void)
265 } 265 }
266} 266}
267 267
268void __cpuinit generic_identify(struct cpuinfo_x86 * c) 268static void __cpuinit generic_identify(struct cpuinfo_x86 * c)
269{ 269{
270 u32 tfms, xlvl; 270 u32 tfms, xlvl;
271 int ebx; 271 int ebx;
@@ -675,7 +675,7 @@ old_gdt:
675#endif 675#endif
676 676
677 /* Clear %fs and %gs. */ 677 /* Clear %fs and %gs. */
678 asm volatile ("xorl %eax, %eax; movl %eax, %fs; movl %eax, %gs"); 678 asm volatile ("movl %0, %%fs; movl %0, %%gs" : : "r" (0));
679 679
680 /* Clear all 6 debug registers: */ 680 /* Clear all 6 debug registers: */
681 set_debugreg(0, 0); 681 set_debugreg(0, 0);
diff --git a/arch/i386/kernel/cpu/cpu.h b/arch/i386/kernel/cpu/cpu.h
index 5a1d4f163e84..2f6432cef6ff 100644
--- a/arch/i386/kernel/cpu/cpu.h
+++ b/arch/i386/kernel/cpu/cpu.h
@@ -24,7 +24,5 @@ extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM];
24extern int get_model_name(struct cpuinfo_x86 *c); 24extern int get_model_name(struct cpuinfo_x86 *c);
25extern void display_cacheinfo(struct cpuinfo_x86 *c); 25extern void display_cacheinfo(struct cpuinfo_x86 *c);
26 26
27extern void generic_identify(struct cpuinfo_x86 * c);
28
29extern void early_intel_workaround(struct cpuinfo_x86 *c); 27extern void early_intel_workaround(struct cpuinfo_x86 *c);
30 28
diff --git a/arch/i386/kernel/cpu/cyrix.c b/arch/i386/kernel/cpu/cyrix.c
index f03b7f94c304..c0c3b59de32c 100644
--- a/arch/i386/kernel/cpu/cyrix.c
+++ b/arch/i386/kernel/cpu/cyrix.c
@@ -12,7 +12,7 @@
12/* 12/*
13 * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU 13 * Read NSC/Cyrix DEVID registers (DIR) to get more detailed info. about the CPU
14 */ 14 */
15static void __init do_cyrix_devid(unsigned char *dir0, unsigned char *dir1) 15static void __cpuinit do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
16{ 16{
17 unsigned char ccr2, ccr3; 17 unsigned char ccr2, ccr3;
18 unsigned long flags; 18 unsigned long flags;
@@ -52,25 +52,25 @@ static void __init do_cyrix_devid(unsigned char *dir0, unsigned char *dir1)
52 * Actually since bugs.h doesn't even reference this perhaps someone should 52 * Actually since bugs.h doesn't even reference this perhaps someone should
53 * fix the documentation ??? 53 * fix the documentation ???
54 */ 54 */
55static unsigned char Cx86_dir0_msb __initdata = 0; 55static unsigned char Cx86_dir0_msb __cpuinitdata = 0;
56 56
57static char Cx86_model[][9] __initdata = { 57static char Cx86_model[][9] __cpuinitdata = {
58 "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ", 58 "Cx486", "Cx486", "5x86 ", "6x86", "MediaGX ", "6x86MX ",
59 "M II ", "Unknown" 59 "M II ", "Unknown"
60}; 60};
61static char Cx486_name[][5] __initdata = { 61static char Cx486_name[][5] __cpuinitdata = {
62 "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx", 62 "SLC", "DLC", "SLC2", "DLC2", "SRx", "DRx",
63 "SRx2", "DRx2" 63 "SRx2", "DRx2"
64}; 64};
65static char Cx486S_name[][4] __initdata = { 65static char Cx486S_name[][4] __cpuinitdata = {
66 "S", "S2", "Se", "S2e" 66 "S", "S2", "Se", "S2e"
67}; 67};
68static char Cx486D_name[][4] __initdata = { 68static char Cx486D_name[][4] __cpuinitdata = {
69 "DX", "DX2", "?", "?", "?", "DX4" 69 "DX", "DX2", "?", "?", "?", "DX4"
70}; 70};
71static char Cx86_cb[] __initdata = "?.5x Core/Bus Clock"; 71static char Cx86_cb[] __cpuinitdata = "?.5x Core/Bus Clock";
72static char cyrix_model_mult1[] __initdata = "12??43"; 72static char cyrix_model_mult1[] __cpuinitdata = "12??43";
73static char cyrix_model_mult2[] __initdata = "12233445"; 73static char cyrix_model_mult2[] __cpuinitdata = "12233445";
74 74
75/* 75/*
76 * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old 76 * Reset the slow-loop (SLOP) bit on the 686(L) which is set by some old
@@ -82,7 +82,7 @@ static char cyrix_model_mult2[] __initdata = "12233445";
82 82
83extern void calibrate_delay(void) __init; 83extern void calibrate_delay(void) __init;
84 84
85static void __init check_cx686_slop(struct cpuinfo_x86 *c) 85static void __cpuinit check_cx686_slop(struct cpuinfo_x86 *c)
86{ 86{
87 unsigned long flags; 87 unsigned long flags;
88 88
@@ -107,7 +107,7 @@ static void __init check_cx686_slop(struct cpuinfo_x86 *c)
107} 107}
108 108
109 109
110static void __init set_cx86_reorder(void) 110static void __cpuinit set_cx86_reorder(void)
111{ 111{
112 u8 ccr3; 112 u8 ccr3;
113 113
@@ -122,7 +122,7 @@ static void __init set_cx86_reorder(void)
122 setCx86(CX86_CCR3, ccr3); 122 setCx86(CX86_CCR3, ccr3);
123} 123}
124 124
125static void __init set_cx86_memwb(void) 125static void __cpuinit set_cx86_memwb(void)
126{ 126{
127 u32 cr0; 127 u32 cr0;
128 128
@@ -137,7 +137,7 @@ static void __init set_cx86_memwb(void)
137 setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 ); 137 setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14 );
138} 138}
139 139
140static void __init set_cx86_inc(void) 140static void __cpuinit set_cx86_inc(void)
141{ 141{
142 unsigned char ccr3; 142 unsigned char ccr3;
143 143
@@ -158,7 +158,7 @@ static void __init set_cx86_inc(void)
158 * Configure later MediaGX and/or Geode processor. 158 * Configure later MediaGX and/or Geode processor.
159 */ 159 */
160 160
161static void __init geode_configure(void) 161static void __cpuinit geode_configure(void)
162{ 162{
163 unsigned long flags; 163 unsigned long flags;
164 u8 ccr3, ccr4; 164 u8 ccr3, ccr4;
@@ -184,14 +184,14 @@ static void __init geode_configure(void)
184 184
185 185
186#ifdef CONFIG_PCI 186#ifdef CONFIG_PCI
187static struct pci_device_id __initdata cyrix_55x0[] = { 187static struct pci_device_id __cpuinitdata cyrix_55x0[] = {
188 { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510) }, 188 { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5510) },
189 { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520) }, 189 { PCI_DEVICE(PCI_VENDOR_ID_CYRIX, PCI_DEVICE_ID_CYRIX_5520) },
190 { }, 190 { },
191}; 191};
192#endif 192#endif
193 193
194static void __init init_cyrix(struct cpuinfo_x86 *c) 194static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
195{ 195{
196 unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0; 196 unsigned char dir0, dir0_msn, dir0_lsn, dir1 = 0;
197 char *buf = c->x86_model_id; 197 char *buf = c->x86_model_id;
@@ -346,7 +346,7 @@ static void __init init_cyrix(struct cpuinfo_x86 *c)
346/* 346/*
347 * Handle National Semiconductor branded processors 347 * Handle National Semiconductor branded processors
348 */ 348 */
349static void __init init_nsc(struct cpuinfo_x86 *c) 349static void __cpuinit init_nsc(struct cpuinfo_x86 *c)
350{ 350{
351 /* There may be GX1 processors in the wild that are branded 351 /* There may be GX1 processors in the wild that are branded
352 * NSC and not Cyrix. 352 * NSC and not Cyrix.
@@ -394,7 +394,7 @@ static inline int test_cyrix_52div(void)
394 return (unsigned char) (test >> 8) == 0x02; 394 return (unsigned char) (test >> 8) == 0x02;
395} 395}
396 396
397static void cyrix_identify(struct cpuinfo_x86 * c) 397static void __cpuinit cyrix_identify(struct cpuinfo_x86 * c)
398{ 398{
399 /* Detect Cyrix with disabled CPUID */ 399 /* Detect Cyrix with disabled CPUID */
400 if ( c->x86 == 4 && test_cyrix_52div() ) { 400 if ( c->x86 == 4 && test_cyrix_52div() ) {
@@ -427,10 +427,9 @@ static void cyrix_identify(struct cpuinfo_x86 * c)
427 local_irq_restore(flags); 427 local_irq_restore(flags);
428 } 428 }
429 } 429 }
430 generic_identify(c);
431} 430}
432 431
433static struct cpu_dev cyrix_cpu_dev __initdata = { 432static struct cpu_dev cyrix_cpu_dev __cpuinitdata = {
434 .c_vendor = "Cyrix", 433 .c_vendor = "Cyrix",
435 .c_ident = { "CyrixInstead" }, 434 .c_ident = { "CyrixInstead" },
436 .c_init = init_cyrix, 435 .c_init = init_cyrix,
@@ -453,11 +452,10 @@ static int __init cyrix_exit_cpu(void)
453 452
454late_initcall(cyrix_exit_cpu); 453late_initcall(cyrix_exit_cpu);
455 454
456static struct cpu_dev nsc_cpu_dev __initdata = { 455static struct cpu_dev nsc_cpu_dev __cpuinitdata = {
457 .c_vendor = "NSC", 456 .c_vendor = "NSC",
458 .c_ident = { "Geode by NSC" }, 457 .c_ident = { "Geode by NSC" },
459 .c_init = init_nsc, 458 .c_init = init_nsc,
460 .c_identify = generic_identify,
461}; 459};
462 460
463int __init nsc_init_cpu(void) 461int __init nsc_init_cpu(void)
diff --git a/arch/i386/kernel/cpu/intel.c b/arch/i386/kernel/cpu/intel.c
index 5a2e270924b1..94a95aa5227e 100644
--- a/arch/i386/kernel/cpu/intel.c
+++ b/arch/i386/kernel/cpu/intel.c
@@ -198,7 +198,7 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
198} 198}
199 199
200 200
201static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) 201static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 * c, unsigned int size)
202{ 202{
203 /* Intel PIII Tualatin. This comes in two flavours. 203 /* Intel PIII Tualatin. This comes in two flavours.
204 * One has 256kb of cache, the other 512. We have no way 204 * One has 256kb of cache, the other 512. We have no way
@@ -263,7 +263,6 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = {
263 }, 263 },
264 }, 264 },
265 .c_init = init_intel, 265 .c_init = init_intel,
266 .c_identify = generic_identify,
267 .c_size_cache = intel_size_cache, 266 .c_size_cache = intel_size_cache,
268}; 267};
269 268
diff --git a/arch/i386/kernel/cpu/mcheck/Makefile b/arch/i386/kernel/cpu/mcheck/Makefile
index 30808f3d6715..f1ebe1c1c17a 100644
--- a/arch/i386/kernel/cpu/mcheck/Makefile
+++ b/arch/i386/kernel/cpu/mcheck/Makefile
@@ -1,2 +1,2 @@
1obj-y = mce.o k7.o p4.o p5.o p6.o winchip.o 1obj-y = mce.o k7.o p4.o p5.o p6.o winchip.o therm_throt.o
2obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o 2obj-$(CONFIG_X86_MCE_NONFATAL) += non-fatal.o
diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c
index b95f1b3d53aa..504434a46011 100644
--- a/arch/i386/kernel/cpu/mcheck/p4.c
+++ b/arch/i386/kernel/cpu/mcheck/p4.c
@@ -13,6 +13,8 @@
13#include <asm/msr.h> 13#include <asm/msr.h>
14#include <asm/apic.h> 14#include <asm/apic.h>
15 15
16#include <asm/therm_throt.h>
17
16#include "mce.h" 18#include "mce.h"
17 19
18/* as supported by the P4/Xeon family */ 20/* as supported by the P4/Xeon family */
@@ -44,25 +46,12 @@ static void unexpected_thermal_interrupt(struct pt_regs *regs)
44/* P4/Xeon Thermal transition interrupt handler */ 46/* P4/Xeon Thermal transition interrupt handler */
45static void intel_thermal_interrupt(struct pt_regs *regs) 47static void intel_thermal_interrupt(struct pt_regs *regs)
46{ 48{
47 u32 l, h; 49 __u64 msr_val;
48 unsigned int cpu = smp_processor_id();
49 static unsigned long next[NR_CPUS];
50 50
51 ack_APIC_irq(); 51 ack_APIC_irq();
52 52
53 if (time_after(next[cpu], jiffies)) 53 rdmsrl(MSR_IA32_THERM_STATUS, msr_val);
54 return; 54 therm_throt_process(msr_val & 0x1);
55
56 next[cpu] = jiffies + HZ*5;
57 rdmsr(MSR_IA32_THERM_STATUS, l, h);
58 if (l & 0x1) {
59 printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
60 printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
61 cpu);
62 add_taint(TAINT_MACHINE_CHECK);
63 } else {
64 printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
65 }
66} 55}
67 56
68/* Thermal interrupt handler for this CPU setup */ 57/* Thermal interrupt handler for this CPU setup */
@@ -122,10 +111,13 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
122 111
123 rdmsr (MSR_IA32_MISC_ENABLE, l, h); 112 rdmsr (MSR_IA32_MISC_ENABLE, l, h);
124 wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h); 113 wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
125 114
126 l = apic_read (APIC_LVTTHMR); 115 l = apic_read (APIC_LVTTHMR);
127 apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); 116 apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
128 printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); 117 printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
118
119 /* enable thermal throttle processing */
120 atomic_set(&therm_throt_en, 1);
129 return; 121 return;
130} 122}
131#endif /* CONFIG_X86_MCE_P4THERMAL */ 123#endif /* CONFIG_X86_MCE_P4THERMAL */
diff --git a/arch/i386/kernel/cpu/mcheck/therm_throt.c b/arch/i386/kernel/cpu/mcheck/therm_throt.c
new file mode 100644
index 000000000000..4f43047de406
--- /dev/null
+++ b/arch/i386/kernel/cpu/mcheck/therm_throt.c
@@ -0,0 +1,180 @@
1/*
2 * linux/arch/i386/kerne/cpu/mcheck/therm_throt.c
3 *
4 * Thermal throttle event support code (such as syslog messaging and rate
5 * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
6 * This allows consistent reporting of CPU thermal throttle events.
7 *
8 * Maintains a counter in /sys that keeps track of the number of thermal
9 * events, such that the user knows how bad the thermal problem might be
10 * (since the logging to syslog and mcelog is rate limited).
11 *
12 * Author: Dmitriy Zavin (dmitriyz@google.com)
13 *
14 * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
15 * Inspired by Ross Biro's and Al Borchers' counter code.
16 */
17
18#include <linux/percpu.h>
19#include <linux/sysdev.h>
20#include <linux/cpu.h>
21#include <asm/cpu.h>
22#include <linux/notifier.h>
23#include <asm/therm_throt.h>
24
25/* How long to wait between reporting thermal events */
26#define CHECK_INTERVAL (300 * HZ)
27
28static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
29static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
30atomic_t therm_throt_en = ATOMIC_INIT(0);
31
32#ifdef CONFIG_SYSFS
33#define define_therm_throt_sysdev_one_ro(_name) \
34 static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
35
36#define define_therm_throt_sysdev_show_func(name) \
37static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
38 char *buf) \
39{ \
40 unsigned int cpu = dev->id; \
41 ssize_t ret; \
42 \
43 preempt_disable(); /* CPU hotplug */ \
44 if (cpu_online(cpu)) \
45 ret = sprintf(buf, "%lu\n", \
46 per_cpu(thermal_throttle_##name, cpu)); \
47 else \
48 ret = 0; \
49 preempt_enable(); \
50 \
51 return ret; \
52}
53
54define_therm_throt_sysdev_show_func(count);
55define_therm_throt_sysdev_one_ro(count);
56
57static struct attribute *thermal_throttle_attrs[] = {
58 &attr_count.attr,
59 NULL
60};
61
62static struct attribute_group thermal_throttle_attr_group = {
63 .attrs = thermal_throttle_attrs,
64 .name = "thermal_throttle"
65};
66#endif /* CONFIG_SYSFS */
67
68/***
69 * therm_throt_process - Process thermal throttling event from interrupt
70 * @curr: Whether the condition is current or not (boolean), since the
71 * thermal interrupt normally gets called both when the thermal
72 * event begins and once the event has ended.
73 *
74 * This function is called by the thermal interrupt after the
75 * IRQ has been acknowledged.
76 *
77 * It will take care of rate limiting and printing messages to the syslog.
78 *
79 * Returns: 0 : Event should NOT be further logged, i.e. still in
80 * "timeout" from previous log message.
81 * 1 : Event should be logged further, and a message has been
82 * printed to the syslog.
83 */
84int therm_throt_process(int curr)
85{
86 unsigned int cpu = smp_processor_id();
87 __u64 tmp_jiffs = get_jiffies_64();
88
89 if (curr)
90 __get_cpu_var(thermal_throttle_count)++;
91
92 if (time_before64(tmp_jiffs, __get_cpu_var(next_check)))
93 return 0;
94
95 __get_cpu_var(next_check) = tmp_jiffs + CHECK_INTERVAL;
96
97 /* if we just entered the thermal event */
98 if (curr) {
99 printk(KERN_CRIT "CPU%d: Temperature above threshold, "
100 "cpu clock throttled (total events = %lu)\n", cpu,
101 __get_cpu_var(thermal_throttle_count));
102
103 add_taint(TAINT_MACHINE_CHECK);
104 } else {
105 printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu);
106 }
107
108 return 1;
109}
110
111#ifdef CONFIG_SYSFS
112/* Add/Remove thermal_throttle interface for CPU device */
113static __cpuinit int thermal_throttle_add_dev(struct sys_device * sys_dev)
114{
115 sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group);
116 return 0;
117}
118
119#ifdef CONFIG_HOTPLUG_CPU
120static __cpuinit int thermal_throttle_remove_dev(struct sys_device * sys_dev)
121{
122 sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
123 return 0;
124}
125
126/* Mutex protecting device creation against CPU hotplug */
127static DEFINE_MUTEX(therm_cpu_lock);
128
129/* Get notified when a cpu comes on/off. Be hotplug friendly. */
130static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb,
131 unsigned long action,
132 void *hcpu)
133{
134 unsigned int cpu = (unsigned long)hcpu;
135 struct sys_device *sys_dev;
136
137 sys_dev = get_cpu_sysdev(cpu);
138 mutex_lock(&therm_cpu_lock);
139 switch (action) {
140 case CPU_ONLINE:
141 thermal_throttle_add_dev(sys_dev);
142 break;
143 case CPU_DEAD:
144 thermal_throttle_remove_dev(sys_dev);
145 break;
146 }
147 mutex_unlock(&therm_cpu_lock);
148 return NOTIFY_OK;
149}
150
151static struct notifier_block thermal_throttle_cpu_notifier =
152{
153 .notifier_call = thermal_throttle_cpu_callback,
154};
155#endif /* CONFIG_HOTPLUG_CPU */
156
157static __init int thermal_throttle_init_device(void)
158{
159 unsigned int cpu = 0;
160
161 if (!atomic_read(&therm_throt_en))
162 return 0;
163
164 register_hotcpu_notifier(&thermal_throttle_cpu_notifier);
165
166#ifdef CONFIG_HOTPLUG_CPU
167 mutex_lock(&therm_cpu_lock);
168#endif
169 /* connect live CPUs to sysfs */
170 for_each_online_cpu(cpu)
171 thermal_throttle_add_dev(get_cpu_sysdev(cpu));
172#ifdef CONFIG_HOTPLUG_CPU
173 mutex_unlock(&therm_cpu_lock);
174#endif
175
176 return 0;
177}
178
179device_initcall(thermal_throttle_init_device);
180#endif /* CONFIG_SYSFS */
diff --git a/arch/i386/kernel/cpu/nexgen.c b/arch/i386/kernel/cpu/nexgen.c
index ad87fa58058d..8bf23cc80c63 100644
--- a/arch/i386/kernel/cpu/nexgen.c
+++ b/arch/i386/kernel/cpu/nexgen.c
@@ -10,7 +10,7 @@
10 * to have CPUID. (Thanks to Herbert Oppmann) 10 * to have CPUID. (Thanks to Herbert Oppmann)
11 */ 11 */
12 12
13static int __init deep_magic_nexgen_probe(void) 13static int __cpuinit deep_magic_nexgen_probe(void)
14{ 14{
15 int ret; 15 int ret;
16 16
@@ -27,21 +27,20 @@ static int __init deep_magic_nexgen_probe(void)
27 return ret; 27 return ret;
28} 28}
29 29
30static void __init init_nexgen(struct cpuinfo_x86 * c) 30static void __cpuinit init_nexgen(struct cpuinfo_x86 * c)
31{ 31{
32 c->x86_cache_size = 256; /* A few had 1 MB... */ 32 c->x86_cache_size = 256; /* A few had 1 MB... */
33} 33}
34 34
35static void __init nexgen_identify(struct cpuinfo_x86 * c) 35static void __cpuinit nexgen_identify(struct cpuinfo_x86 * c)
36{ 36{
37 /* Detect NexGen with old hypercode */ 37 /* Detect NexGen with old hypercode */
38 if ( deep_magic_nexgen_probe() ) { 38 if ( deep_magic_nexgen_probe() ) {
39 strcpy(c->x86_vendor_id, "NexGenDriven"); 39 strcpy(c->x86_vendor_id, "NexGenDriven");
40 } 40 }
41 generic_identify(c);
42} 41}
43 42
44static struct cpu_dev nexgen_cpu_dev __initdata = { 43static struct cpu_dev nexgen_cpu_dev __cpuinitdata = {
45 .c_vendor = "Nexgen", 44 .c_vendor = "Nexgen",
46 .c_ident = { "NexGenDriven" }, 45 .c_ident = { "NexGenDriven" },
47 .c_models = { 46 .c_models = {
diff --git a/arch/i386/kernel/cpu/proc.c b/arch/i386/kernel/cpu/proc.c
index f54a15268ed7..76aac088a323 100644
--- a/arch/i386/kernel/cpu/proc.c
+++ b/arch/i386/kernel/cpu/proc.c
@@ -46,8 +46,8 @@ static int show_cpuinfo(struct seq_file *m, void *v)
46 46
47 /* Intel-defined (#2) */ 47 /* Intel-defined (#2) */
48 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est", 48 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
49 "tm2", NULL, "cid", NULL, NULL, "cx16", "xtpr", NULL, 49 "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
50 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 50 NULL, NULL, "dca", NULL, NULL, NULL, NULL, NULL,
51 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, 51 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
52 52
53 /* VIA/Cyrix/Centaur-defined */ 53 /* VIA/Cyrix/Centaur-defined */
diff --git a/arch/i386/kernel/cpu/rise.c b/arch/i386/kernel/cpu/rise.c
index d08d5a2811c8..9317f7414989 100644
--- a/arch/i386/kernel/cpu/rise.c
+++ b/arch/i386/kernel/cpu/rise.c
@@ -5,7 +5,7 @@
5 5
6#include "cpu.h" 6#include "cpu.h"
7 7
8static void __init init_rise(struct cpuinfo_x86 *c) 8static void __cpuinit init_rise(struct cpuinfo_x86 *c)
9{ 9{
10 printk("CPU: Rise iDragon"); 10 printk("CPU: Rise iDragon");
11 if (c->x86_model > 2) 11 if (c->x86_model > 2)
@@ -28,7 +28,7 @@ static void __init init_rise(struct cpuinfo_x86 *c)
28 set_bit(X86_FEATURE_CX8, c->x86_capability); 28 set_bit(X86_FEATURE_CX8, c->x86_capability);
29} 29}
30 30
31static struct cpu_dev rise_cpu_dev __initdata = { 31static struct cpu_dev rise_cpu_dev __cpuinitdata = {
32 .c_vendor = "Rise", 32 .c_vendor = "Rise",
33 .c_ident = { "RiseRiseRise" }, 33 .c_ident = { "RiseRiseRise" },
34 .c_models = { 34 .c_models = {
diff --git a/arch/i386/kernel/cpu/transmeta.c b/arch/i386/kernel/cpu/transmeta.c
index 7214c9b577ab..4056fb7d2cdf 100644
--- a/arch/i386/kernel/cpu/transmeta.c
+++ b/arch/i386/kernel/cpu/transmeta.c
@@ -5,7 +5,7 @@
5#include <asm/msr.h> 5#include <asm/msr.h>
6#include "cpu.h" 6#include "cpu.h"
7 7
8static void __init init_transmeta(struct cpuinfo_x86 *c) 8static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
9{ 9{
10 unsigned int cap_mask, uk, max, dummy; 10 unsigned int cap_mask, uk, max, dummy;
11 unsigned int cms_rev1, cms_rev2; 11 unsigned int cms_rev1, cms_rev2;
@@ -85,10 +85,9 @@ static void __init init_transmeta(struct cpuinfo_x86 *c)
85#endif 85#endif
86} 86}
87 87
88static void __init transmeta_identify(struct cpuinfo_x86 * c) 88static void __cpuinit transmeta_identify(struct cpuinfo_x86 * c)
89{ 89{
90 u32 xlvl; 90 u32 xlvl;
91 generic_identify(c);
92 91
93 /* Transmeta-defined flags: level 0x80860001 */ 92 /* Transmeta-defined flags: level 0x80860001 */
94 xlvl = cpuid_eax(0x80860000); 93 xlvl = cpuid_eax(0x80860000);
@@ -98,7 +97,7 @@ static void __init transmeta_identify(struct cpuinfo_x86 * c)
98 } 97 }
99} 98}
100 99
101static struct cpu_dev transmeta_cpu_dev __initdata = { 100static struct cpu_dev transmeta_cpu_dev __cpuinitdata = {
102 .c_vendor = "Transmeta", 101 .c_vendor = "Transmeta",
103 .c_ident = { "GenuineTMx86", "TransmetaCPU" }, 102 .c_ident = { "GenuineTMx86", "TransmetaCPU" },
104 .c_init = init_transmeta, 103 .c_init = init_transmeta,
diff --git a/arch/i386/kernel/cpu/umc.c b/arch/i386/kernel/cpu/umc.c
index 2cd988f6dc55..1bf3f87e9c5b 100644
--- a/arch/i386/kernel/cpu/umc.c
+++ b/arch/i386/kernel/cpu/umc.c
@@ -5,12 +5,8 @@
5 5
6/* UMC chips appear to be only either 386 or 486, so no special init takes place. 6/* UMC chips appear to be only either 386 or 486, so no special init takes place.
7 */ 7 */
8static void __init init_umc(struct cpuinfo_x86 * c)
9{
10
11}
12 8
13static struct cpu_dev umc_cpu_dev __initdata = { 9static struct cpu_dev umc_cpu_dev __cpuinitdata = {
14 .c_vendor = "UMC", 10 .c_vendor = "UMC",
15 .c_ident = { "UMC UMC UMC" }, 11 .c_ident = { "UMC UMC UMC" },
16 .c_models = { 12 .c_models = {
@@ -21,7 +17,6 @@ static struct cpu_dev umc_cpu_dev __initdata = {
21 } 17 }
22 }, 18 },
23 }, 19 },
24 .c_init = init_umc,
25}; 20};
26 21
27int __init umc_init_cpu(void) 22int __init umc_init_cpu(void)
diff --git a/arch/i386/kernel/crash.c b/arch/i386/kernel/crash.c
index 5b96f038367f..67d297dc1003 100644
--- a/arch/i386/kernel/crash.c
+++ b/arch/i386/kernel/crash.c
@@ -22,6 +22,8 @@
22#include <asm/nmi.h> 22#include <asm/nmi.h>
23#include <asm/hw_irq.h> 23#include <asm/hw_irq.h>
24#include <asm/apic.h> 24#include <asm/apic.h>
25#include <asm/kdebug.h>
26
25#include <mach_ipi.h> 27#include <mach_ipi.h>
26 28
27 29
@@ -93,16 +95,25 @@ static void crash_save_self(struct pt_regs *regs)
93#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) 95#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
94static atomic_t waiting_for_crash_ipi; 96static atomic_t waiting_for_crash_ipi;
95 97
96static int crash_nmi_callback(struct pt_regs *regs, int cpu) 98static int crash_nmi_callback(struct notifier_block *self,
99 unsigned long val, void *data)
97{ 100{
101 struct pt_regs *regs;
98 struct pt_regs fixed_regs; 102 struct pt_regs fixed_regs;
103 int cpu;
104
105 if (val != DIE_NMI_IPI)
106 return NOTIFY_OK;
107
108 regs = ((struct die_args *)data)->regs;
109 cpu = raw_smp_processor_id();
99 110
100 /* Don't do anything if this handler is invoked on crashing cpu. 111 /* Don't do anything if this handler is invoked on crashing cpu.
101 * Otherwise, system will completely hang. Crashing cpu can get 112 * Otherwise, system will completely hang. Crashing cpu can get
102 * an NMI if system was initially booted with nmi_watchdog parameter. 113 * an NMI if system was initially booted with nmi_watchdog parameter.
103 */ 114 */
104 if (cpu == crashing_cpu) 115 if (cpu == crashing_cpu)
105 return 1; 116 return NOTIFY_STOP;
106 local_irq_disable(); 117 local_irq_disable();
107 118
108 if (!user_mode_vm(regs)) { 119 if (!user_mode_vm(regs)) {
@@ -125,13 +136,18 @@ static void smp_send_nmi_allbutself(void)
125 send_IPI_allbutself(NMI_VECTOR); 136 send_IPI_allbutself(NMI_VECTOR);
126} 137}
127 138
139static struct notifier_block crash_nmi_nb = {
140 .notifier_call = crash_nmi_callback,
141};
142
128static void nmi_shootdown_cpus(void) 143static void nmi_shootdown_cpus(void)
129{ 144{
130 unsigned long msecs; 145 unsigned long msecs;
131 146
132 atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); 147 atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1);
133 /* Would it be better to replace the trap vector here? */ 148 /* Would it be better to replace the trap vector here? */
134 set_nmi_callback(crash_nmi_callback); 149 if (register_die_notifier(&crash_nmi_nb))
150 return; /* return what? */
135 /* Ensure the new callback function is set before sending 151 /* Ensure the new callback function is set before sending
136 * out the NMI 152 * out the NMI
137 */ 153 */
diff --git a/arch/i386/kernel/entry.S b/arch/i386/kernel/entry.S
index 87f9f60b803b..5a63d6fdb70e 100644
--- a/arch/i386/kernel/entry.S
+++ b/arch/i386/kernel/entry.S
@@ -76,8 +76,15 @@ DF_MASK = 0x00000400
76NT_MASK = 0x00004000 76NT_MASK = 0x00004000
77VM_MASK = 0x00020000 77VM_MASK = 0x00020000
78 78
79/* These are replaces for paravirtualization */
80#define DISABLE_INTERRUPTS cli
81#define ENABLE_INTERRUPTS sti
82#define ENABLE_INTERRUPTS_SYSEXIT sti; sysexit
83#define INTERRUPT_RETURN iret
84#define GET_CR0_INTO_EAX movl %cr0, %eax
85
79#ifdef CONFIG_PREEMPT 86#ifdef CONFIG_PREEMPT
80#define preempt_stop cli; TRACE_IRQS_OFF 87#define preempt_stop DISABLE_INTERRUPTS; TRACE_IRQS_OFF
81#else 88#else
82#define preempt_stop 89#define preempt_stop
83#define resume_kernel restore_nocheck 90#define resume_kernel restore_nocheck
@@ -176,18 +183,21 @@ VM_MASK = 0x00020000
176 183
177#define RING0_INT_FRAME \ 184#define RING0_INT_FRAME \
178 CFI_STARTPROC simple;\ 185 CFI_STARTPROC simple;\
186 CFI_SIGNAL_FRAME;\
179 CFI_DEF_CFA esp, 3*4;\ 187 CFI_DEF_CFA esp, 3*4;\
180 /*CFI_OFFSET cs, -2*4;*/\ 188 /*CFI_OFFSET cs, -2*4;*/\
181 CFI_OFFSET eip, -3*4 189 CFI_OFFSET eip, -3*4
182 190
183#define RING0_EC_FRAME \ 191#define RING0_EC_FRAME \
184 CFI_STARTPROC simple;\ 192 CFI_STARTPROC simple;\
193 CFI_SIGNAL_FRAME;\
185 CFI_DEF_CFA esp, 4*4;\ 194 CFI_DEF_CFA esp, 4*4;\
186 /*CFI_OFFSET cs, -2*4;*/\ 195 /*CFI_OFFSET cs, -2*4;*/\
187 CFI_OFFSET eip, -3*4 196 CFI_OFFSET eip, -3*4
188 197
189#define RING0_PTREGS_FRAME \ 198#define RING0_PTREGS_FRAME \
190 CFI_STARTPROC simple;\ 199 CFI_STARTPROC simple;\
200 CFI_SIGNAL_FRAME;\
191 CFI_DEF_CFA esp, OLDESP-EBX;\ 201 CFI_DEF_CFA esp, OLDESP-EBX;\
192 /*CFI_OFFSET cs, CS-OLDESP;*/\ 202 /*CFI_OFFSET cs, CS-OLDESP;*/\
193 CFI_OFFSET eip, EIP-OLDESP;\ 203 CFI_OFFSET eip, EIP-OLDESP;\
@@ -233,10 +243,11 @@ ret_from_intr:
233check_userspace: 243check_userspace:
234 movl EFLAGS(%esp), %eax # mix EFLAGS and CS 244 movl EFLAGS(%esp), %eax # mix EFLAGS and CS
235 movb CS(%esp), %al 245 movb CS(%esp), %al
236 testl $(VM_MASK | 3), %eax 246 andl $(VM_MASK | SEGMENT_RPL_MASK), %eax
237 jz resume_kernel 247 cmpl $USER_RPL, %eax
248 jb resume_kernel # not returning to v8086 or userspace
238ENTRY(resume_userspace) 249ENTRY(resume_userspace)
239 cli # make sure we don't miss an interrupt 250 DISABLE_INTERRUPTS # make sure we don't miss an interrupt
240 # setting need_resched or sigpending 251 # setting need_resched or sigpending
241 # between sampling and the iret 252 # between sampling and the iret
242 movl TI_flags(%ebp), %ecx 253 movl TI_flags(%ebp), %ecx
@@ -247,7 +258,7 @@ ENTRY(resume_userspace)
247 258
248#ifdef CONFIG_PREEMPT 259#ifdef CONFIG_PREEMPT
249ENTRY(resume_kernel) 260ENTRY(resume_kernel)
250 cli 261 DISABLE_INTERRUPTS
251 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? 262 cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ?
252 jnz restore_nocheck 263 jnz restore_nocheck
253need_resched: 264need_resched:
@@ -267,6 +278,7 @@ need_resched:
267 # sysenter call handler stub 278 # sysenter call handler stub
268ENTRY(sysenter_entry) 279ENTRY(sysenter_entry)
269 CFI_STARTPROC simple 280 CFI_STARTPROC simple
281 CFI_SIGNAL_FRAME
270 CFI_DEF_CFA esp, 0 282 CFI_DEF_CFA esp, 0
271 CFI_REGISTER esp, ebp 283 CFI_REGISTER esp, ebp
272 movl TSS_sysenter_esp0(%esp),%esp 284 movl TSS_sysenter_esp0(%esp),%esp
@@ -275,7 +287,7 @@ sysenter_past_esp:
275 * No need to follow this irqs on/off section: the syscall 287 * No need to follow this irqs on/off section: the syscall
276 * disabled irqs and here we enable it straight after entry: 288 * disabled irqs and here we enable it straight after entry:
277 */ 289 */
278 sti 290 ENABLE_INTERRUPTS
279 pushl $(__USER_DS) 291 pushl $(__USER_DS)
280 CFI_ADJUST_CFA_OFFSET 4 292 CFI_ADJUST_CFA_OFFSET 4
281 /*CFI_REL_OFFSET ss, 0*/ 293 /*CFI_REL_OFFSET ss, 0*/
@@ -320,7 +332,7 @@ sysenter_past_esp:
320 jae syscall_badsys 332 jae syscall_badsys
321 call *sys_call_table(,%eax,4) 333 call *sys_call_table(,%eax,4)
322 movl %eax,EAX(%esp) 334 movl %eax,EAX(%esp)
323 cli 335 DISABLE_INTERRUPTS
324 TRACE_IRQS_OFF 336 TRACE_IRQS_OFF
325 movl TI_flags(%ebp), %ecx 337 movl TI_flags(%ebp), %ecx
326 testw $_TIF_ALLWORK_MASK, %cx 338 testw $_TIF_ALLWORK_MASK, %cx
@@ -330,8 +342,7 @@ sysenter_past_esp:
330 movl OLDESP(%esp), %ecx 342 movl OLDESP(%esp), %ecx
331 xorl %ebp,%ebp 343 xorl %ebp,%ebp
332 TRACE_IRQS_ON 344 TRACE_IRQS_ON
333 sti 345 ENABLE_INTERRUPTS_SYSEXIT
334 sysexit
335 CFI_ENDPROC 346 CFI_ENDPROC
336 347
337 348
@@ -356,7 +367,7 @@ syscall_call:
356 call *sys_call_table(,%eax,4) 367 call *sys_call_table(,%eax,4)
357 movl %eax,EAX(%esp) # store the return value 368 movl %eax,EAX(%esp) # store the return value
358syscall_exit: 369syscall_exit:
359 cli # make sure we don't miss an interrupt 370 DISABLE_INTERRUPTS # make sure we don't miss an interrupt
360 # setting need_resched or sigpending 371 # setting need_resched or sigpending
361 # between sampling and the iret 372 # between sampling and the iret
362 TRACE_IRQS_OFF 373 TRACE_IRQS_OFF
@@ -371,8 +382,8 @@ restore_all:
371 # See comments in process.c:copy_thread() for details. 382 # See comments in process.c:copy_thread() for details.
372 movb OLDSS(%esp), %ah 383 movb OLDSS(%esp), %ah
373 movb CS(%esp), %al 384 movb CS(%esp), %al
374 andl $(VM_MASK | (4 << 8) | 3), %eax 385 andl $(VM_MASK | (SEGMENT_TI_MASK << 8) | SEGMENT_RPL_MASK), %eax
375 cmpl $((4 << 8) | 3), %eax 386 cmpl $((SEGMENT_LDT << 8) | USER_RPL), %eax
376 CFI_REMEMBER_STATE 387 CFI_REMEMBER_STATE
377 je ldt_ss # returning to user-space with LDT SS 388 je ldt_ss # returning to user-space with LDT SS
378restore_nocheck: 389restore_nocheck:
@@ -381,11 +392,11 @@ restore_nocheck_notrace:
381 RESTORE_REGS 392 RESTORE_REGS
382 addl $4, %esp 393 addl $4, %esp
383 CFI_ADJUST_CFA_OFFSET -4 394 CFI_ADJUST_CFA_OFFSET -4
3841: iret 3951: INTERRUPT_RETURN
385.section .fixup,"ax" 396.section .fixup,"ax"
386iret_exc: 397iret_exc:
387 TRACE_IRQS_ON 398 TRACE_IRQS_ON
388 sti 399 ENABLE_INTERRUPTS
389 pushl $0 # no error code 400 pushl $0 # no error code
390 pushl $do_iret_error 401 pushl $do_iret_error
391 jmp error_code 402 jmp error_code
@@ -409,7 +420,7 @@ ldt_ss:
409 * dosemu and wine happy. */ 420 * dosemu and wine happy. */
410 subl $8, %esp # reserve space for switch16 pointer 421 subl $8, %esp # reserve space for switch16 pointer
411 CFI_ADJUST_CFA_OFFSET 8 422 CFI_ADJUST_CFA_OFFSET 8
412 cli 423 DISABLE_INTERRUPTS
413 TRACE_IRQS_OFF 424 TRACE_IRQS_OFF
414 movl %esp, %eax 425 movl %esp, %eax
415 /* Set up the 16bit stack frame with switch32 pointer on top, 426 /* Set up the 16bit stack frame with switch32 pointer on top,
@@ -419,7 +430,7 @@ ldt_ss:
419 TRACE_IRQS_IRET 430 TRACE_IRQS_IRET
420 RESTORE_REGS 431 RESTORE_REGS
421 lss 20+4(%esp), %esp # switch to 16bit stack 432 lss 20+4(%esp), %esp # switch to 16bit stack
4221: iret 4331: INTERRUPT_RETURN
423.section __ex_table,"a" 434.section __ex_table,"a"
424 .align 4 435 .align 4
425 .long 1b,iret_exc 436 .long 1b,iret_exc
@@ -434,7 +445,7 @@ work_pending:
434 jz work_notifysig 445 jz work_notifysig
435work_resched: 446work_resched:
436 call schedule 447 call schedule
437 cli # make sure we don't miss an interrupt 448 DISABLE_INTERRUPTS # make sure we don't miss an interrupt
438 # setting need_resched or sigpending 449 # setting need_resched or sigpending
439 # between sampling and the iret 450 # between sampling and the iret
440 TRACE_IRQS_OFF 451 TRACE_IRQS_OFF
@@ -490,7 +501,7 @@ syscall_exit_work:
490 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl 501 testb $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SINGLESTEP), %cl
491 jz work_pending 502 jz work_pending
492 TRACE_IRQS_ON 503 TRACE_IRQS_ON
493 sti # could let do_syscall_trace() call 504 ENABLE_INTERRUPTS # could let do_syscall_trace() call
494 # schedule() instead 505 # schedule() instead
495 movl %esp, %eax 506 movl %esp, %eax
496 movl $1, %edx 507 movl $1, %edx
@@ -591,11 +602,9 @@ ENTRY(name) \
591/* The include is where all of the SMP etc. interrupts come from */ 602/* The include is where all of the SMP etc. interrupts come from */
592#include "entry_arch.h" 603#include "entry_arch.h"
593 604
594ENTRY(divide_error) 605KPROBE_ENTRY(page_fault)
595 RING0_INT_FRAME 606 RING0_EC_FRAME
596 pushl $0 # no error code 607 pushl $do_page_fault
597 CFI_ADJUST_CFA_OFFSET 4
598 pushl $do_divide_error
599 CFI_ADJUST_CFA_OFFSET 4 608 CFI_ADJUST_CFA_OFFSET 4
600 ALIGN 609 ALIGN
601error_code: 610error_code:
@@ -645,6 +654,7 @@ error_code:
645 call *%edi 654 call *%edi
646 jmp ret_from_exception 655 jmp ret_from_exception
647 CFI_ENDPROC 656 CFI_ENDPROC
657KPROBE_END(page_fault)
648 658
649ENTRY(coprocessor_error) 659ENTRY(coprocessor_error)
650 RING0_INT_FRAME 660 RING0_INT_FRAME
@@ -669,7 +679,7 @@ ENTRY(device_not_available)
669 pushl $-1 # mark this as an int 679 pushl $-1 # mark this as an int
670 CFI_ADJUST_CFA_OFFSET 4 680 CFI_ADJUST_CFA_OFFSET 4
671 SAVE_ALL 681 SAVE_ALL
672 movl %cr0, %eax 682 GET_CR0_INTO_EAX
673 testl $0x4, %eax # EM (math emulation bit) 683 testl $0x4, %eax # EM (math emulation bit)
674 jne device_not_available_emulate 684 jne device_not_available_emulate
675 preempt_stop 685 preempt_stop
@@ -702,9 +712,15 @@ device_not_available_emulate:
702 jne ok; \ 712 jne ok; \
703label: \ 713label: \
704 movl TSS_sysenter_esp0+offset(%esp),%esp; \ 714 movl TSS_sysenter_esp0+offset(%esp),%esp; \
715 CFI_DEF_CFA esp, 0; \
716 CFI_UNDEFINED eip; \
705 pushfl; \ 717 pushfl; \
718 CFI_ADJUST_CFA_OFFSET 4; \
706 pushl $__KERNEL_CS; \ 719 pushl $__KERNEL_CS; \
707 pushl $sysenter_past_esp 720 CFI_ADJUST_CFA_OFFSET 4; \
721 pushl $sysenter_past_esp; \
722 CFI_ADJUST_CFA_OFFSET 4; \
723 CFI_REL_OFFSET eip, 0
708 724
709KPROBE_ENTRY(debug) 725KPROBE_ENTRY(debug)
710 RING0_INT_FRAME 726 RING0_INT_FRAME
@@ -720,7 +736,8 @@ debug_stack_correct:
720 call do_debug 736 call do_debug
721 jmp ret_from_exception 737 jmp ret_from_exception
722 CFI_ENDPROC 738 CFI_ENDPROC
723 .previous .text 739KPROBE_END(debug)
740
724/* 741/*
725 * NMI is doubly nasty. It can happen _while_ we're handling 742 * NMI is doubly nasty. It can happen _while_ we're handling
726 * a debug fault, and the debug fault hasn't yet been able to 743 * a debug fault, and the debug fault hasn't yet been able to
@@ -729,7 +746,7 @@ debug_stack_correct:
729 * check whether we got an NMI on the debug path where the debug 746 * check whether we got an NMI on the debug path where the debug
730 * fault happened on the sysenter path. 747 * fault happened on the sysenter path.
731 */ 748 */
732ENTRY(nmi) 749KPROBE_ENTRY(nmi)
733 RING0_INT_FRAME 750 RING0_INT_FRAME
734 pushl %eax 751 pushl %eax
735 CFI_ADJUST_CFA_OFFSET 4 752 CFI_ADJUST_CFA_OFFSET 4
@@ -754,6 +771,7 @@ ENTRY(nmi)
754 cmpl $sysenter_entry,12(%esp) 771 cmpl $sysenter_entry,12(%esp)
755 je nmi_debug_stack_check 772 je nmi_debug_stack_check
756nmi_stack_correct: 773nmi_stack_correct:
774 /* We have a RING0_INT_FRAME here */
757 pushl %eax 775 pushl %eax
758 CFI_ADJUST_CFA_OFFSET 4 776 CFI_ADJUST_CFA_OFFSET 4
759 SAVE_ALL 777 SAVE_ALL
@@ -764,9 +782,12 @@ nmi_stack_correct:
764 CFI_ENDPROC 782 CFI_ENDPROC
765 783
766nmi_stack_fixup: 784nmi_stack_fixup:
785 RING0_INT_FRAME
767 FIX_STACK(12,nmi_stack_correct, 1) 786 FIX_STACK(12,nmi_stack_correct, 1)
768 jmp nmi_stack_correct 787 jmp nmi_stack_correct
788
769nmi_debug_stack_check: 789nmi_debug_stack_check:
790 /* We have a RING0_INT_FRAME here */
770 cmpw $__KERNEL_CS,16(%esp) 791 cmpw $__KERNEL_CS,16(%esp)
771 jne nmi_stack_correct 792 jne nmi_stack_correct
772 cmpl $debug,(%esp) 793 cmpl $debug,(%esp)
@@ -777,8 +798,10 @@ nmi_debug_stack_check:
777 jmp nmi_stack_correct 798 jmp nmi_stack_correct
778 799
779nmi_16bit_stack: 800nmi_16bit_stack:
780 RING0_INT_FRAME 801 /* We have a RING0_INT_FRAME here.
781 /* create the pointer to lss back */ 802 *
803 * create the pointer to lss back
804 */
782 pushl %ss 805 pushl %ss
783 CFI_ADJUST_CFA_OFFSET 4 806 CFI_ADJUST_CFA_OFFSET 4
784 pushl %esp 807 pushl %esp
@@ -799,12 +822,13 @@ nmi_16bit_stack:
799 call do_nmi 822 call do_nmi
800 RESTORE_REGS 823 RESTORE_REGS
801 lss 12+4(%esp), %esp # back to 16bit stack 824 lss 12+4(%esp), %esp # back to 16bit stack
8021: iret 8251: INTERRUPT_RETURN
803 CFI_ENDPROC 826 CFI_ENDPROC
804.section __ex_table,"a" 827.section __ex_table,"a"
805 .align 4 828 .align 4
806 .long 1b,iret_exc 829 .long 1b,iret_exc
807.previous 830.previous
831KPROBE_END(nmi)
808 832
809KPROBE_ENTRY(int3) 833KPROBE_ENTRY(int3)
810 RING0_INT_FRAME 834 RING0_INT_FRAME
@@ -816,7 +840,7 @@ KPROBE_ENTRY(int3)
816 call do_int3 840 call do_int3
817 jmp ret_from_exception 841 jmp ret_from_exception
818 CFI_ENDPROC 842 CFI_ENDPROC
819 .previous .text 843KPROBE_END(int3)
820 844
821ENTRY(overflow) 845ENTRY(overflow)
822 RING0_INT_FRAME 846 RING0_INT_FRAME
@@ -881,7 +905,7 @@ KPROBE_ENTRY(general_protection)
881 CFI_ADJUST_CFA_OFFSET 4 905 CFI_ADJUST_CFA_OFFSET 4
882 jmp error_code 906 jmp error_code
883 CFI_ENDPROC 907 CFI_ENDPROC
884 .previous .text 908KPROBE_END(general_protection)
885 909
886ENTRY(alignment_check) 910ENTRY(alignment_check)
887 RING0_EC_FRAME 911 RING0_EC_FRAME
@@ -890,13 +914,14 @@ ENTRY(alignment_check)
890 jmp error_code 914 jmp error_code
891 CFI_ENDPROC 915 CFI_ENDPROC
892 916
893KPROBE_ENTRY(page_fault) 917ENTRY(divide_error)
894 RING0_EC_FRAME 918 RING0_INT_FRAME
895 pushl $do_page_fault 919 pushl $0 # no error code
920 CFI_ADJUST_CFA_OFFSET 4
921 pushl $do_divide_error
896 CFI_ADJUST_CFA_OFFSET 4 922 CFI_ADJUST_CFA_OFFSET 4
897 jmp error_code 923 jmp error_code
898 CFI_ENDPROC 924 CFI_ENDPROC
899 .previous .text
900 925
901#ifdef CONFIG_X86_MCE 926#ifdef CONFIG_X86_MCE
902ENTRY(machine_check) 927ENTRY(machine_check)
@@ -949,6 +974,19 @@ ENTRY(arch_unwind_init_running)
949ENDPROC(arch_unwind_init_running) 974ENDPROC(arch_unwind_init_running)
950#endif 975#endif
951 976
977ENTRY(kernel_thread_helper)
978 pushl $0 # fake return address for unwinder
979 CFI_STARTPROC
980 movl %edx,%eax
981 push %edx
982 CFI_ADJUST_CFA_OFFSET 4
983 call *%ebx
984 push %eax
985 CFI_ADJUST_CFA_OFFSET 4
986 call do_exit
987 CFI_ENDPROC
988ENDPROC(kernel_thread_helper)
989
952.section .rodata,"a" 990.section .rodata,"a"
953#include "syscall_table.S" 991#include "syscall_table.S"
954 992
diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S
index a6b8bd89aa27..be9d883c62ce 100644
--- a/arch/i386/kernel/head.S
+++ b/arch/i386/kernel/head.S
@@ -371,8 +371,65 @@ rp_sidt:
371 addl $8,%edi 371 addl $8,%edi
372 dec %ecx 372 dec %ecx
373 jne rp_sidt 373 jne rp_sidt
374
375.macro set_early_handler handler,trapno
376 lea \handler,%edx
377 movl $(__KERNEL_CS << 16),%eax
378 movw %dx,%ax
379 movw $0x8E00,%dx /* interrupt gate - dpl=0, present */
380 lea idt_table,%edi
381 movl %eax,8*\trapno(%edi)
382 movl %edx,8*\trapno+4(%edi)
383.endm
384
385 set_early_handler handler=early_divide_err,trapno=0
386 set_early_handler handler=early_illegal_opcode,trapno=6
387 set_early_handler handler=early_protection_fault,trapno=13
388 set_early_handler handler=early_page_fault,trapno=14
389
374 ret 390 ret
375 391
392early_divide_err:
393 xor %edx,%edx
394 pushl $0 /* fake errcode */
395 jmp early_fault
396
397early_illegal_opcode:
398 movl $6,%edx
399 pushl $0 /* fake errcode */
400 jmp early_fault
401
402early_protection_fault:
403 movl $13,%edx
404 jmp early_fault
405
406early_page_fault:
407 movl $14,%edx
408 jmp early_fault
409
410early_fault:
411 cld
412#ifdef CONFIG_PRINTK
413 movl $(__KERNEL_DS),%eax
414 movl %eax,%ds
415 movl %eax,%es
416 cmpl $2,early_recursion_flag
417 je hlt_loop
418 incl early_recursion_flag
419 movl %cr2,%eax
420 pushl %eax
421 pushl %edx /* trapno */
422 pushl $fault_msg
423#ifdef CONFIG_EARLY_PRINTK
424 call early_printk
425#else
426 call printk
427#endif
428#endif
429hlt_loop:
430 hlt
431 jmp hlt_loop
432
376/* This is the default interrupt "handler" :-) */ 433/* This is the default interrupt "handler" :-) */
377 ALIGN 434 ALIGN
378ignore_int: 435ignore_int:
@@ -386,6 +443,9 @@ ignore_int:
386 movl $(__KERNEL_DS),%eax 443 movl $(__KERNEL_DS),%eax
387 movl %eax,%ds 444 movl %eax,%ds
388 movl %eax,%es 445 movl %eax,%es
446 cmpl $2,early_recursion_flag
447 je hlt_loop
448 incl early_recursion_flag
389 pushl 16(%esp) 449 pushl 16(%esp)
390 pushl 24(%esp) 450 pushl 24(%esp)
391 pushl 32(%esp) 451 pushl 32(%esp)
@@ -431,9 +491,16 @@ ENTRY(stack_start)
431 491
432ready: .byte 0 492ready: .byte 0
433 493
494early_recursion_flag:
495 .long 0
496
434int_msg: 497int_msg:
435 .asciz "Unknown interrupt or fault at EIP %p %p %p\n" 498 .asciz "Unknown interrupt or fault at EIP %p %p %p\n"
436 499
500fault_msg:
501 .ascii "Int %d: CR2 %p err %p EIP %p CS %p flags %p\n"
502 .asciz "Stack: %p %p %p %p %p %p %p %p\n"
503
437/* 504/*
438 * The IDT and GDT 'descriptors' are a strange 48-bit object 505 * The IDT and GDT 'descriptors' are a strange 48-bit object
439 * only used by the lidt and lgdt instructions. They are not 506 * only used by the lidt and lgdt instructions. They are not
diff --git a/arch/i386/kernel/i8259.c b/arch/i386/kernel/i8259.c
index d4756d154f47..ea5f4e7958d8 100644
--- a/arch/i386/kernel/i8259.c
+++ b/arch/i386/kernel/i8259.c
@@ -45,6 +45,8 @@ static void end_8259A_irq (unsigned int irq)
45 45
46#define shutdown_8259A_irq disable_8259A_irq 46#define shutdown_8259A_irq disable_8259A_irq
47 47
48static int i8259A_auto_eoi;
49
48static void mask_and_ack_8259A(unsigned int); 50static void mask_and_ack_8259A(unsigned int);
49 51
50unsigned int startup_8259A_irq(unsigned int irq) 52unsigned int startup_8259A_irq(unsigned int irq)
@@ -253,7 +255,7 @@ static void save_ELCR(char *trigger)
253 255
254static int i8259A_resume(struct sys_device *dev) 256static int i8259A_resume(struct sys_device *dev)
255{ 257{
256 init_8259A(0); 258 init_8259A(i8259A_auto_eoi);
257 restore_ELCR(irq_trigger); 259 restore_ELCR(irq_trigger);
258 return 0; 260 return 0;
259} 261}
@@ -301,6 +303,8 @@ void init_8259A(int auto_eoi)
301{ 303{
302 unsigned long flags; 304 unsigned long flags;
303 305
306 i8259A_auto_eoi = auto_eoi;
307
304 spin_lock_irqsave(&i8259A_lock, flags); 308 spin_lock_irqsave(&i8259A_lock, flags);
305 309
306 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */ 310 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
diff --git a/arch/i386/kernel/io_apic.c b/arch/i386/kernel/io_apic.c
index 4fb32c551fe0..fd0df75cfbda 100644
--- a/arch/i386/kernel/io_apic.c
+++ b/arch/i386/kernel/io_apic.c
@@ -40,6 +40,7 @@
40#include <asm/nmi.h> 40#include <asm/nmi.h>
41 41
42#include <mach_apic.h> 42#include <mach_apic.h>
43#include <mach_apicdef.h>
43 44
44#include "io_ports.h" 45#include "io_ports.h"
45 46
@@ -65,7 +66,7 @@ int sis_apic_bug = -1;
65 */ 66 */
66int nr_ioapic_registers[MAX_IO_APICS]; 67int nr_ioapic_registers[MAX_IO_APICS];
67 68
68int disable_timer_pin_1 __initdata; 69static int disable_timer_pin_1 __initdata;
69 70
70/* 71/*
71 * Rough estimation of how many shared IRQs there are, can 72 * Rough estimation of how many shared IRQs there are, can
@@ -93,6 +94,34 @@ int vector_irq[NR_VECTORS] __read_mostly = { [0 ... NR_VECTORS - 1] = -1};
93#define vector_to_irq(vector) (vector) 94#define vector_to_irq(vector) (vector)
94#endif 95#endif
95 96
97
98union entry_union {
99 struct { u32 w1, w2; };
100 struct IO_APIC_route_entry entry;
101};
102
103static struct IO_APIC_route_entry ioapic_read_entry(int apic, int pin)
104{
105 union entry_union eu;
106 unsigned long flags;
107 spin_lock_irqsave(&ioapic_lock, flags);
108 eu.w1 = io_apic_read(apic, 0x10 + 2 * pin);
109 eu.w2 = io_apic_read(apic, 0x11 + 2 * pin);
110 spin_unlock_irqrestore(&ioapic_lock, flags);
111 return eu.entry;
112}
113
114static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
115{
116 unsigned long flags;
117 union entry_union eu;
118 eu.entry = e;
119 spin_lock_irqsave(&ioapic_lock, flags);
120 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
121 io_apic_write(apic, 0x11 + 2*pin, eu.w2);
122 spin_unlock_irqrestore(&ioapic_lock, flags);
123}
124
96/* 125/*
97 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are 126 * The common case is 1:1 IRQ<->pin mappings. Sometimes there are
98 * shared ISA-space IRQs, so we have to support them. We are super 127 * shared ISA-space IRQs, so we have to support them. We are super
@@ -200,13 +229,9 @@ static void unmask_IO_APIC_irq (unsigned int irq)
200static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin) 229static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
201{ 230{
202 struct IO_APIC_route_entry entry; 231 struct IO_APIC_route_entry entry;
203 unsigned long flags;
204 232
205 /* Check delivery_mode to be sure we're not clearing an SMI pin */ 233 /* Check delivery_mode to be sure we're not clearing an SMI pin */
206 spin_lock_irqsave(&ioapic_lock, flags); 234 entry = ioapic_read_entry(apic, pin);
207 *(((int*)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
208 *(((int*)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
209 spin_unlock_irqrestore(&ioapic_lock, flags);
210 if (entry.delivery_mode == dest_SMI) 235 if (entry.delivery_mode == dest_SMI)
211 return; 236 return;
212 237
@@ -215,10 +240,7 @@ static void clear_IO_APIC_pin(unsigned int apic, unsigned int pin)
215 */ 240 */
216 memset(&entry, 0, sizeof(entry)); 241 memset(&entry, 0, sizeof(entry));
217 entry.mask = 1; 242 entry.mask = 1;
218 spin_lock_irqsave(&ioapic_lock, flags); 243 ioapic_write_entry(apic, pin, entry);
219 io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry) + 0));
220 io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry) + 1));
221 spin_unlock_irqrestore(&ioapic_lock, flags);
222} 244}
223 245
224static void clear_IO_APIC (void) 246static void clear_IO_APIC (void)
@@ -1283,9 +1305,8 @@ static void __init setup_IO_APIC_irqs(void)
1283 if (!apic && (irq < 16)) 1305 if (!apic && (irq < 16))
1284 disable_8259A_irq(irq); 1306 disable_8259A_irq(irq);
1285 } 1307 }
1308 ioapic_write_entry(apic, pin, entry);
1286 spin_lock_irqsave(&ioapic_lock, flags); 1309 spin_lock_irqsave(&ioapic_lock, flags);
1287 io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
1288 io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
1289 set_native_irq_info(irq, TARGET_CPUS); 1310 set_native_irq_info(irq, TARGET_CPUS);
1290 spin_unlock_irqrestore(&ioapic_lock, flags); 1311 spin_unlock_irqrestore(&ioapic_lock, flags);
1291 } 1312 }
@@ -1301,7 +1322,6 @@ static void __init setup_IO_APIC_irqs(void)
1301static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector) 1322static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, int vector)
1302{ 1323{
1303 struct IO_APIC_route_entry entry; 1324 struct IO_APIC_route_entry entry;
1304 unsigned long flags;
1305 1325
1306 memset(&entry,0,sizeof(entry)); 1326 memset(&entry,0,sizeof(entry));
1307 1327
@@ -1331,10 +1351,7 @@ static void __init setup_ExtINT_IRQ0_pin(unsigned int apic, unsigned int pin, in
1331 /* 1351 /*
1332 * Add it to the IO-APIC irq-routing table: 1352 * Add it to the IO-APIC irq-routing table:
1333 */ 1353 */
1334 spin_lock_irqsave(&ioapic_lock, flags); 1354 ioapic_write_entry(apic, pin, entry);
1335 io_apic_write(apic, 0x11+2*pin, *(((int *)&entry)+1));
1336 io_apic_write(apic, 0x10+2*pin, *(((int *)&entry)+0));
1337 spin_unlock_irqrestore(&ioapic_lock, flags);
1338 1355
1339 enable_8259A_irq(0); 1356 enable_8259A_irq(0);
1340} 1357}
@@ -1444,10 +1461,7 @@ void __init print_IO_APIC(void)
1444 for (i = 0; i <= reg_01.bits.entries; i++) { 1461 for (i = 0; i <= reg_01.bits.entries; i++) {
1445 struct IO_APIC_route_entry entry; 1462 struct IO_APIC_route_entry entry;
1446 1463
1447 spin_lock_irqsave(&ioapic_lock, flags); 1464 entry = ioapic_read_entry(apic, i);
1448 *(((int *)&entry)+0) = io_apic_read(apic, 0x10+i*2);
1449 *(((int *)&entry)+1) = io_apic_read(apic, 0x11+i*2);
1450 spin_unlock_irqrestore(&ioapic_lock, flags);
1451 1465
1452 printk(KERN_DEBUG " %02x %03X %02X ", 1466 printk(KERN_DEBUG " %02x %03X %02X ",
1453 i, 1467 i,
@@ -1666,10 +1680,7 @@ static void __init enable_IO_APIC(void)
1666 /* See if any of the pins is in ExtINT mode */ 1680 /* See if any of the pins is in ExtINT mode */
1667 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { 1681 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
1668 struct IO_APIC_route_entry entry; 1682 struct IO_APIC_route_entry entry;
1669 spin_lock_irqsave(&ioapic_lock, flags); 1683 entry = ioapic_read_entry(apic, pin);
1670 *(((int *)&entry) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
1671 *(((int *)&entry) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
1672 spin_unlock_irqrestore(&ioapic_lock, flags);
1673 1684
1674 1685
1675 /* If the interrupt line is enabled and in ExtInt mode 1686 /* If the interrupt line is enabled and in ExtInt mode
@@ -1726,7 +1737,6 @@ void disable_IO_APIC(void)
1726 */ 1737 */
1727 if (ioapic_i8259.pin != -1) { 1738 if (ioapic_i8259.pin != -1) {
1728 struct IO_APIC_route_entry entry; 1739 struct IO_APIC_route_entry entry;
1729 unsigned long flags;
1730 1740
1731 memset(&entry, 0, sizeof(entry)); 1741 memset(&entry, 0, sizeof(entry));
1732 entry.mask = 0; /* Enabled */ 1742 entry.mask = 0; /* Enabled */
@@ -1743,12 +1753,7 @@ void disable_IO_APIC(void)
1743 /* 1753 /*
1744 * Add it to the IO-APIC irq-routing table: 1754 * Add it to the IO-APIC irq-routing table:
1745 */ 1755 */
1746 spin_lock_irqsave(&ioapic_lock, flags); 1756 ioapic_write_entry(ioapic_i8259.apic, ioapic_i8259.pin, entry);
1747 io_apic_write(ioapic_i8259.apic, 0x11+2*ioapic_i8259.pin,
1748 *(((int *)&entry)+1));
1749 io_apic_write(ioapic_i8259.apic, 0x10+2*ioapic_i8259.pin,
1750 *(((int *)&entry)+0));
1751 spin_unlock_irqrestore(&ioapic_lock, flags);
1752 } 1757 }
1753 disconnect_bsp_APIC(ioapic_i8259.pin != -1); 1758 disconnect_bsp_APIC(ioapic_i8259.pin != -1);
1754} 1759}
@@ -2213,17 +2218,13 @@ static inline void unlock_ExtINT_logic(void)
2213 int apic, pin, i; 2218 int apic, pin, i;
2214 struct IO_APIC_route_entry entry0, entry1; 2219 struct IO_APIC_route_entry entry0, entry1;
2215 unsigned char save_control, save_freq_select; 2220 unsigned char save_control, save_freq_select;
2216 unsigned long flags;
2217 2221
2218 pin = find_isa_irq_pin(8, mp_INT); 2222 pin = find_isa_irq_pin(8, mp_INT);
2219 apic = find_isa_irq_apic(8, mp_INT); 2223 apic = find_isa_irq_apic(8, mp_INT);
2220 if (pin == -1) 2224 if (pin == -1)
2221 return; 2225 return;
2222 2226
2223 spin_lock_irqsave(&ioapic_lock, flags); 2227 entry0 = ioapic_read_entry(apic, pin);
2224 *(((int *)&entry0) + 1) = io_apic_read(apic, 0x11 + 2 * pin);
2225 *(((int *)&entry0) + 0) = io_apic_read(apic, 0x10 + 2 * pin);
2226 spin_unlock_irqrestore(&ioapic_lock, flags);
2227 clear_IO_APIC_pin(apic, pin); 2228 clear_IO_APIC_pin(apic, pin);
2228 2229
2229 memset(&entry1, 0, sizeof(entry1)); 2230 memset(&entry1, 0, sizeof(entry1));
@@ -2236,10 +2237,7 @@ static inline void unlock_ExtINT_logic(void)
2236 entry1.trigger = 0; 2237 entry1.trigger = 0;
2237 entry1.vector = 0; 2238 entry1.vector = 0;
2238 2239
2239 spin_lock_irqsave(&ioapic_lock, flags); 2240 ioapic_write_entry(apic, pin, entry1);
2240 io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry1) + 1));
2241 io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry1) + 0));
2242 spin_unlock_irqrestore(&ioapic_lock, flags);
2243 2241
2244 save_control = CMOS_READ(RTC_CONTROL); 2242 save_control = CMOS_READ(RTC_CONTROL);
2245 save_freq_select = CMOS_READ(RTC_FREQ_SELECT); 2243 save_freq_select = CMOS_READ(RTC_FREQ_SELECT);
@@ -2258,10 +2256,7 @@ static inline void unlock_ExtINT_logic(void)
2258 CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT); 2256 CMOS_WRITE(save_freq_select, RTC_FREQ_SELECT);
2259 clear_IO_APIC_pin(apic, pin); 2257 clear_IO_APIC_pin(apic, pin);
2260 2258
2261 spin_lock_irqsave(&ioapic_lock, flags); 2259 ioapic_write_entry(apic, pin, entry0);
2262 io_apic_write(apic, 0x11 + 2 * pin, *(((int *)&entry0) + 1));
2263 io_apic_write(apic, 0x10 + 2 * pin, *(((int *)&entry0) + 0));
2264 spin_unlock_irqrestore(&ioapic_lock, flags);
2265} 2260}
2266 2261
2267int timer_uses_ioapic_pin_0; 2262int timer_uses_ioapic_pin_0;
@@ -2461,17 +2456,12 @@ static int ioapic_suspend(struct sys_device *dev, pm_message_t state)
2461{ 2456{
2462 struct IO_APIC_route_entry *entry; 2457 struct IO_APIC_route_entry *entry;
2463 struct sysfs_ioapic_data *data; 2458 struct sysfs_ioapic_data *data;
2464 unsigned long flags;
2465 int i; 2459 int i;
2466 2460
2467 data = container_of(dev, struct sysfs_ioapic_data, dev); 2461 data = container_of(dev, struct sysfs_ioapic_data, dev);
2468 entry = data->entry; 2462 entry = data->entry;
2469 spin_lock_irqsave(&ioapic_lock, flags); 2463 for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
2470 for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) { 2464 entry[i] = ioapic_read_entry(dev->id, i);
2471 *(((int *)entry) + 1) = io_apic_read(dev->id, 0x11 + 2 * i);
2472 *(((int *)entry) + 0) = io_apic_read(dev->id, 0x10 + 2 * i);
2473 }
2474 spin_unlock_irqrestore(&ioapic_lock, flags);
2475 2465
2476 return 0; 2466 return 0;
2477} 2467}
@@ -2493,11 +2483,9 @@ static int ioapic_resume(struct sys_device *dev)
2493 reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid; 2483 reg_00.bits.ID = mp_ioapics[dev->id].mpc_apicid;
2494 io_apic_write(dev->id, 0, reg_00.raw); 2484 io_apic_write(dev->id, 0, reg_00.raw);
2495 } 2485 }
2496 for (i = 0; i < nr_ioapic_registers[dev->id]; i ++, entry ++ ) {
2497 io_apic_write(dev->id, 0x11+2*i, *(((int *)entry)+1));
2498 io_apic_write(dev->id, 0x10+2*i, *(((int *)entry)+0));
2499 }
2500 spin_unlock_irqrestore(&ioapic_lock, flags); 2486 spin_unlock_irqrestore(&ioapic_lock, flags);
2487 for (i = 0; i < nr_ioapic_registers[dev->id]; i ++)
2488 ioapic_write_entry(dev->id, i, entry[i]);
2501 2489
2502 return 0; 2490 return 0;
2503} 2491}
@@ -2694,9 +2682,8 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
2694 if (!ioapic && (irq < 16)) 2682 if (!ioapic && (irq < 16))
2695 disable_8259A_irq(irq); 2683 disable_8259A_irq(irq);
2696 2684
2685 ioapic_write_entry(ioapic, pin, entry);
2697 spin_lock_irqsave(&ioapic_lock, flags); 2686 spin_lock_irqsave(&ioapic_lock, flags);
2698 io_apic_write(ioapic, 0x11+2*pin, *(((int *)&entry)+1));
2699 io_apic_write(ioapic, 0x10+2*pin, *(((int *)&entry)+0));
2700 set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS); 2687 set_native_irq_info(use_pci_vector() ? entry.vector : irq, TARGET_CPUS);
2701 spin_unlock_irqrestore(&ioapic_lock, flags); 2688 spin_unlock_irqrestore(&ioapic_lock, flags);
2702 2689
@@ -2704,3 +2691,25 @@ int io_apic_set_pci_routing (int ioapic, int pin, int irq, int edge_level, int a
2704} 2691}
2705 2692
2706#endif /* CONFIG_ACPI */ 2693#endif /* CONFIG_ACPI */
2694
2695static int __init parse_disable_timer_pin_1(char *arg)
2696{
2697 disable_timer_pin_1 = 1;
2698 return 0;
2699}
2700early_param("disable_timer_pin_1", parse_disable_timer_pin_1);
2701
2702static int __init parse_enable_timer_pin_1(char *arg)
2703{
2704 disable_timer_pin_1 = -1;
2705 return 0;
2706}
2707early_param("enable_timer_pin_1", parse_enable_timer_pin_1);
2708
2709static int __init parse_noapic(char *arg)
2710{
2711 /* disable IO-APIC */
2712 disable_ioapic_setup();
2713 return 0;
2714}
2715early_param("noapic", parse_noapic);
diff --git a/arch/i386/kernel/machine_kexec.c b/arch/i386/kernel/machine_kexec.c
index 6b1ae6ba76f0..91966bafb3dc 100644
--- a/arch/i386/kernel/machine_kexec.c
+++ b/arch/i386/kernel/machine_kexec.c
@@ -9,6 +9,7 @@
9#include <linux/mm.h> 9#include <linux/mm.h>
10#include <linux/kexec.h> 10#include <linux/kexec.h>
11#include <linux/delay.h> 11#include <linux/delay.h>
12#include <linux/init.h>
12#include <asm/pgtable.h> 13#include <asm/pgtable.h>
13#include <asm/pgalloc.h> 14#include <asm/pgalloc.h>
14#include <asm/tlbflush.h> 15#include <asm/tlbflush.h>
@@ -20,70 +21,13 @@
20#include <asm/system.h> 21#include <asm/system.h>
21 22
22#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE))) 23#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
23 24static u32 kexec_pgd[1024] PAGE_ALIGNED;
24#define L0_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) 25#ifdef CONFIG_X86_PAE
25#define L1_ATTR (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | _PAGE_DIRTY) 26static u32 kexec_pmd0[1024] PAGE_ALIGNED;
26#define L2_ATTR (_PAGE_PRESENT) 27static u32 kexec_pmd1[1024] PAGE_ALIGNED;
27
28#define LEVEL0_SIZE (1UL << 12UL)
29
30#ifndef CONFIG_X86_PAE
31#define LEVEL1_SIZE (1UL << 22UL)
32static u32 pgtable_level1[1024] PAGE_ALIGNED;
33
34static void identity_map_page(unsigned long address)
35{
36 unsigned long level1_index, level2_index;
37 u32 *pgtable_level2;
38
39 /* Find the current page table */
40 pgtable_level2 = __va(read_cr3());
41
42 /* Find the indexes of the physical address to identity map */
43 level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
44 level2_index = address / LEVEL1_SIZE;
45
46 /* Identity map the page table entry */
47 pgtable_level1[level1_index] = address | L0_ATTR;
48 pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
49
50 /* Flush the tlb so the new mapping takes effect.
51 * Global tlb entries are not flushed but that is not an issue.
52 */
53 load_cr3(pgtable_level2);
54}
55
56#else
57#define LEVEL1_SIZE (1UL << 21UL)
58#define LEVEL2_SIZE (1UL << 30UL)
59static u64 pgtable_level1[512] PAGE_ALIGNED;
60static u64 pgtable_level2[512] PAGE_ALIGNED;
61
62static void identity_map_page(unsigned long address)
63{
64 unsigned long level1_index, level2_index, level3_index;
65 u64 *pgtable_level3;
66
67 /* Find the current page table */
68 pgtable_level3 = __va(read_cr3());
69
70 /* Find the indexes of the physical address to identity map */
71 level1_index = (address % LEVEL1_SIZE)/LEVEL0_SIZE;
72 level2_index = (address % LEVEL2_SIZE)/LEVEL1_SIZE;
73 level3_index = address / LEVEL2_SIZE;
74
75 /* Identity map the page table entry */
76 pgtable_level1[level1_index] = address | L0_ATTR;
77 pgtable_level2[level2_index] = __pa(pgtable_level1) | L1_ATTR;
78 set_64bit(&pgtable_level3[level3_index],
79 __pa(pgtable_level2) | L2_ATTR);
80
81 /* Flush the tlb so the new mapping takes effect.
82 * Global tlb entries are not flushed but that is not an issue.
83 */
84 load_cr3(pgtable_level3);
85}
86#endif 28#endif
29static u32 kexec_pte0[1024] PAGE_ALIGNED;
30static u32 kexec_pte1[1024] PAGE_ALIGNED;
87 31
88static void set_idt(void *newidt, __u16 limit) 32static void set_idt(void *newidt, __u16 limit)
89{ 33{
@@ -127,16 +71,6 @@ static void load_segments(void)
127#undef __STR 71#undef __STR
128} 72}
129 73
130typedef asmlinkage NORET_TYPE void (*relocate_new_kernel_t)(
131 unsigned long indirection_page,
132 unsigned long reboot_code_buffer,
133 unsigned long start_address,
134 unsigned int has_pae) ATTRIB_NORET;
135
136extern const unsigned char relocate_new_kernel[];
137extern void relocate_new_kernel_end(void);
138extern const unsigned int relocate_new_kernel_size;
139
140/* 74/*
141 * A architecture hook called to validate the 75 * A architecture hook called to validate the
142 * proposed image and prepare the control pages 76 * proposed image and prepare the control pages
@@ -169,25 +103,29 @@ void machine_kexec_cleanup(struct kimage *image)
169 */ 103 */
170NORET_TYPE void machine_kexec(struct kimage *image) 104NORET_TYPE void machine_kexec(struct kimage *image)
171{ 105{
172 unsigned long page_list; 106 unsigned long page_list[PAGES_NR];
173 unsigned long reboot_code_buffer; 107 void *control_page;
174
175 relocate_new_kernel_t rnk;
176 108
177 /* Interrupts aren't acceptable while we reboot */ 109 /* Interrupts aren't acceptable while we reboot */
178 local_irq_disable(); 110 local_irq_disable();
179 111
180 /* Compute some offsets */ 112 control_page = page_address(image->control_code_page);
181 reboot_code_buffer = page_to_pfn(image->control_code_page) 113 memcpy(control_page, relocate_kernel, PAGE_SIZE);
182 << PAGE_SHIFT; 114
183 page_list = image->head; 115 page_list[PA_CONTROL_PAGE] = __pa(control_page);
184 116 page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
185 /* Set up an identity mapping for the reboot_code_buffer */ 117 page_list[PA_PGD] = __pa(kexec_pgd);
186 identity_map_page(reboot_code_buffer); 118 page_list[VA_PGD] = (unsigned long)kexec_pgd;
187 119#ifdef CONFIG_X86_PAE
188 /* copy it out */ 120 page_list[PA_PMD_0] = __pa(kexec_pmd0);
189 memcpy((void *)reboot_code_buffer, relocate_new_kernel, 121 page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
190 relocate_new_kernel_size); 122 page_list[PA_PMD_1] = __pa(kexec_pmd1);
123 page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
124#endif
125 page_list[PA_PTE_0] = __pa(kexec_pte0);
126 page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
127 page_list[PA_PTE_1] = __pa(kexec_pte1);
128 page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
191 129
192 /* The segment registers are funny things, they have both a 130 /* The segment registers are funny things, they have both a
193 * visible and an invisible part. Whenever the visible part is 131 * visible and an invisible part. Whenever the visible part is
@@ -206,6 +144,28 @@ NORET_TYPE void machine_kexec(struct kimage *image)
206 set_idt(phys_to_virt(0),0); 144 set_idt(phys_to_virt(0),0);
207 145
208 /* now call it */ 146 /* now call it */
209 rnk = (relocate_new_kernel_t) reboot_code_buffer; 147 relocate_kernel((unsigned long)image->head, (unsigned long)page_list,
210 (*rnk)(page_list, reboot_code_buffer, image->start, cpu_has_pae); 148 image->start, cpu_has_pae);
149}
150
151/* crashkernel=size@addr specifies the location to reserve for
152 * a crash kernel. By reserving this memory we guarantee
153 * that linux never sets it up as a DMA target.
154 * Useful for holding code to do something appropriate
155 * after a kernel panic.
156 */
157static int __init parse_crashkernel(char *arg)
158{
159 unsigned long size, base;
160 size = memparse(arg, &arg);
161 if (*arg == '@') {
162 base = memparse(arg+1, &arg);
163 /* FIXME: Do I want a sanity check
164 * to validate the memory range?
165 */
166 crashk_res.start = base;
167 crashk_res.end = base + size - 1;
168 }
169 return 0;
211} 170}
171early_param("crashkernel", parse_crashkernel);
diff --git a/arch/i386/kernel/mca.c b/arch/i386/kernel/mca.c
index cd5456f14af4..eb57a851789d 100644
--- a/arch/i386/kernel/mca.c
+++ b/arch/i386/kernel/mca.c
@@ -42,6 +42,7 @@
42#include <linux/errno.h> 42#include <linux/errno.h>
43#include <linux/kernel.h> 43#include <linux/kernel.h>
44#include <linux/mca.h> 44#include <linux/mca.h>
45#include <linux/kprobes.h>
45#include <asm/system.h> 46#include <asm/system.h>
46#include <asm/io.h> 47#include <asm/io.h>
47#include <linux/proc_fs.h> 48#include <linux/proc_fs.h>
@@ -414,7 +415,8 @@ subsys_initcall(mca_init);
414 415
415/*--------------------------------------------------------------------*/ 416/*--------------------------------------------------------------------*/
416 417
417static void mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag) 418static __kprobes void
419mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag)
418{ 420{
419 int slot = mca_dev->slot; 421 int slot = mca_dev->slot;
420 422
@@ -444,7 +446,7 @@ static void mca_handle_nmi_device(struct mca_device *mca_dev, int check_flag)
444 446
445/*--------------------------------------------------------------------*/ 447/*--------------------------------------------------------------------*/
446 448
447static int mca_handle_nmi_callback(struct device *dev, void *data) 449static int __kprobes mca_handle_nmi_callback(struct device *dev, void *data)
448{ 450{
449 struct mca_device *mca_dev = to_mca_device(dev); 451 struct mca_device *mca_dev = to_mca_device(dev);
450 unsigned char pos5; 452 unsigned char pos5;
@@ -462,7 +464,7 @@ static int mca_handle_nmi_callback(struct device *dev, void *data)
462 return 0; 464 return 0;
463} 465}
464 466
465void mca_handle_nmi(void) 467void __kprobes mca_handle_nmi(void)
466{ 468{
467 /* First try - scan the various adapters and see if a specific 469 /* First try - scan the various adapters and see if a specific
468 * adapter was responsible for the error. 470 * adapter was responsible for the error.
diff --git a/arch/i386/kernel/mpparse.c b/arch/i386/kernel/mpparse.c
index a70b5fa0ef06..442aaf8c77eb 100644
--- a/arch/i386/kernel/mpparse.c
+++ b/arch/i386/kernel/mpparse.c
@@ -30,6 +30,7 @@
30#include <asm/io_apic.h> 30#include <asm/io_apic.h>
31 31
32#include <mach_apic.h> 32#include <mach_apic.h>
33#include <mach_apicdef.h>
33#include <mach_mpparse.h> 34#include <mach_mpparse.h>
34#include <bios_ebda.h> 35#include <bios_ebda.h>
35 36
@@ -68,7 +69,7 @@ unsigned int def_to_bigsmp = 0;
68/* Processor that is doing the boot up */ 69/* Processor that is doing the boot up */
69unsigned int boot_cpu_physical_apicid = -1U; 70unsigned int boot_cpu_physical_apicid = -1U;
70/* Internal processor count */ 71/* Internal processor count */
71static unsigned int __devinitdata num_processors; 72unsigned int __cpuinitdata num_processors;
72 73
73/* Bitmask of physically existing CPUs */ 74/* Bitmask of physically existing CPUs */
74physid_mask_t phys_cpu_present_map; 75physid_mask_t phys_cpu_present_map;
@@ -228,12 +229,14 @@ static void __init MP_bus_info (struct mpc_config_bus *m)
228 229
229 mpc_oem_bus_info(m, str, translation_table[mpc_record]); 230 mpc_oem_bus_info(m, str, translation_table[mpc_record]);
230 231
232#if MAX_MP_BUSSES < 256
231 if (m->mpc_busid >= MAX_MP_BUSSES) { 233 if (m->mpc_busid >= MAX_MP_BUSSES) {
232 printk(KERN_WARNING "MP table busid value (%d) for bustype %s " 234 printk(KERN_WARNING "MP table busid value (%d) for bustype %s "
233 " is too large, max. supported is %d\n", 235 " is too large, max. supported is %d\n",
234 m->mpc_busid, str, MAX_MP_BUSSES - 1); 236 m->mpc_busid, str, MAX_MP_BUSSES - 1);
235 return; 237 return;
236 } 238 }
239#endif
237 240
238 if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) { 241 if (strncmp(str, BUSTYPE_ISA, sizeof(BUSTYPE_ISA)-1) == 0) {
239 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA; 242 mp_bus_id_to_type[m->mpc_busid] = MP_BUS_ISA;
@@ -293,19 +296,6 @@ static void __init MP_lintsrc_info (struct mpc_config_lintsrc *m)
293 m->mpc_irqtype, m->mpc_irqflag & 3, 296 m->mpc_irqtype, m->mpc_irqflag & 3,
294 (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid, 297 (m->mpc_irqflag >> 2) &3, m->mpc_srcbusid,
295 m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint); 298 m->mpc_srcbusirq, m->mpc_destapic, m->mpc_destapiclint);
296 /*
297 * Well it seems all SMP boards in existence
298 * use ExtINT/LVT1 == LINT0 and
299 * NMI/LVT2 == LINT1 - the following check
300 * will show us if this assumptions is false.
301 * Until then we do not have to add baggage.
302 */
303 if ((m->mpc_irqtype == mp_ExtINT) &&
304 (m->mpc_destapiclint != 0))
305 BUG();
306 if ((m->mpc_irqtype == mp_NMI) &&
307 (m->mpc_destapiclint != 1))
308 BUG();
309} 299}
310 300
311#ifdef CONFIG_X86_NUMAQ 301#ifdef CONFIG_X86_NUMAQ
@@ -822,8 +812,7 @@ int es7000_plat;
822 812
823#ifdef CONFIG_ACPI 813#ifdef CONFIG_ACPI
824 814
825void __init mp_register_lapic_address ( 815void __init mp_register_lapic_address(u64 address)
826 u64 address)
827{ 816{
828 mp_lapic_addr = (unsigned long) address; 817 mp_lapic_addr = (unsigned long) address;
829 818
@@ -835,13 +824,10 @@ void __init mp_register_lapic_address (
835 Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid); 824 Dprintk("Boot CPU = %d\n", boot_cpu_physical_apicid);
836} 825}
837 826
838 827void __devinit mp_register_lapic (u8 id, u8 enabled)
839void __devinit mp_register_lapic (
840 u8 id,
841 u8 enabled)
842{ 828{
843 struct mpc_config_processor processor; 829 struct mpc_config_processor processor;
844 int boot_cpu = 0; 830 int boot_cpu = 0;
845 831
846 if (MAX_APICS - id <= 0) { 832 if (MAX_APICS - id <= 0) {
847 printk(KERN_WARNING "Processor #%d invalid (max %d)\n", 833 printk(KERN_WARNING "Processor #%d invalid (max %d)\n",
@@ -878,11 +864,9 @@ static struct mp_ioapic_routing {
878 u32 pin_programmed[4]; 864 u32 pin_programmed[4];
879} mp_ioapic_routing[MAX_IO_APICS]; 865} mp_ioapic_routing[MAX_IO_APICS];
880 866
881 867static int mp_find_ioapic (int gsi)
882static int mp_find_ioapic (
883 int gsi)
884{ 868{
885 int i = 0; 869 int i = 0;
886 870
887 /* Find the IOAPIC that manages this GSI. */ 871 /* Find the IOAPIC that manages this GSI. */
888 for (i = 0; i < nr_ioapics; i++) { 872 for (i = 0; i < nr_ioapics; i++) {
@@ -895,15 +879,11 @@ static int mp_find_ioapic (
895 879
896 return -1; 880 return -1;
897} 881}
898
899 882
900void __init mp_register_ioapic ( 883void __init mp_register_ioapic(u8 id, u32 address, u32 gsi_base)
901 u8 id,
902 u32 address,
903 u32 gsi_base)
904{ 884{
905 int idx = 0; 885 int idx = 0;
906 int tmpid; 886 int tmpid;
907 887
908 if (nr_ioapics >= MAX_IO_APICS) { 888 if (nr_ioapics >= MAX_IO_APICS) {
909 printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded " 889 printk(KERN_ERR "ERROR: Max # of I/O APICs (%d) exceeded "
@@ -949,16 +929,10 @@ void __init mp_register_ioapic (
949 mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr, 929 mp_ioapics[idx].mpc_apicver, mp_ioapics[idx].mpc_apicaddr,
950 mp_ioapic_routing[idx].gsi_base, 930 mp_ioapic_routing[idx].gsi_base,
951 mp_ioapic_routing[idx].gsi_end); 931 mp_ioapic_routing[idx].gsi_end);
952
953 return;
954} 932}
955 933
956 934void __init
957void __init mp_override_legacy_irq ( 935mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
958 u8 bus_irq,
959 u8 polarity,
960 u8 trigger,
961 u32 gsi)
962{ 936{
963 struct mpc_config_intsrc intsrc; 937 struct mpc_config_intsrc intsrc;
964 int ioapic = -1; 938 int ioapic = -1;
@@ -996,15 +970,13 @@ void __init mp_override_legacy_irq (
996 mp_irqs[mp_irq_entries] = intsrc; 970 mp_irqs[mp_irq_entries] = intsrc;
997 if (++mp_irq_entries == MAX_IRQ_SOURCES) 971 if (++mp_irq_entries == MAX_IRQ_SOURCES)
998 panic("Max # of irq sources exceeded!\n"); 972 panic("Max # of irq sources exceeded!\n");
999
1000 return;
1001} 973}
1002 974
1003void __init mp_config_acpi_legacy_irqs (void) 975void __init mp_config_acpi_legacy_irqs (void)
1004{ 976{
1005 struct mpc_config_intsrc intsrc; 977 struct mpc_config_intsrc intsrc;
1006 int i = 0; 978 int i = 0;
1007 int ioapic = -1; 979 int ioapic = -1;
1008 980
1009 /* 981 /*
1010 * Fabricate the legacy ISA bus (bus #31). 982 * Fabricate the legacy ISA bus (bus #31).
@@ -1073,12 +1045,12 @@ void __init mp_config_acpi_legacy_irqs (void)
1073 1045
1074#define MAX_GSI_NUM 4096 1046#define MAX_GSI_NUM 4096
1075 1047
1076int mp_register_gsi (u32 gsi, int triggering, int polarity) 1048int mp_register_gsi(u32 gsi, int triggering, int polarity)
1077{ 1049{
1078 int ioapic = -1; 1050 int ioapic = -1;
1079 int ioapic_pin = 0; 1051 int ioapic_pin = 0;
1080 int idx, bit = 0; 1052 int idx, bit = 0;
1081 static int pci_irq = 16; 1053 static int pci_irq = 16;
1082 /* 1054 /*
1083 * Mapping between Global System Interrups, which 1055 * Mapping between Global System Interrups, which
1084 * represent all possible interrupts, and IRQs 1056 * represent all possible interrupts, and IRQs
diff --git a/arch/i386/kernel/nmi.c b/arch/i386/kernel/nmi.c
index acb351478e42..dbda706fdd14 100644
--- a/arch/i386/kernel/nmi.c
+++ b/arch/i386/kernel/nmi.c
@@ -21,83 +21,174 @@
21#include <linux/sysdev.h> 21#include <linux/sysdev.h>
22#include <linux/sysctl.h> 22#include <linux/sysctl.h>
23#include <linux/percpu.h> 23#include <linux/percpu.h>
24#include <linux/dmi.h>
25#include <linux/kprobes.h>
24 26
25#include <asm/smp.h> 27#include <asm/smp.h>
26#include <asm/nmi.h> 28#include <asm/nmi.h>
29#include <asm/kdebug.h>
27#include <asm/intel_arch_perfmon.h> 30#include <asm/intel_arch_perfmon.h>
28 31
29#include "mach_traps.h" 32#include "mach_traps.h"
30 33
31unsigned int nmi_watchdog = NMI_NONE; 34/* perfctr_nmi_owner tracks the ownership of the perfctr registers:
32extern int unknown_nmi_panic; 35 * evtsel_nmi_owner tracks the ownership of the event selection
33static unsigned int nmi_hz = HZ; 36 * - different performance counters/ event selection may be reserved for
34static unsigned int nmi_perfctr_msr; /* the MSR to reset in NMI handler */ 37 * different subsystems this reservation system just tries to coordinate
35static unsigned int nmi_p4_cccr_val; 38 * things a little
36extern void show_registers(struct pt_regs *regs); 39 */
40static DEFINE_PER_CPU(unsigned long, perfctr_nmi_owner);
41static DEFINE_PER_CPU(unsigned long, evntsel_nmi_owner[3]);
37 42
38/* 43/* this number is calculated from Intel's MSR_P4_CRU_ESCR5 register and it's
39 * lapic_nmi_owner tracks the ownership of the lapic NMI hardware: 44 * offset from MSR_P4_BSU_ESCR0. It will be the max for all platforms (for now)
40 * - it may be reserved by some other driver, or not
41 * - when not reserved by some other driver, it may be used for
42 * the NMI watchdog, or not
43 *
44 * This is maintained separately from nmi_active because the NMI
45 * watchdog may also be driven from the I/O APIC timer.
46 */ 45 */
47static DEFINE_SPINLOCK(lapic_nmi_owner_lock); 46#define NMI_MAX_COUNTER_BITS 66
48static unsigned int lapic_nmi_owner;
49#define LAPIC_NMI_WATCHDOG (1<<0)
50#define LAPIC_NMI_RESERVED (1<<1)
51 47
52/* nmi_active: 48/* nmi_active:
53 * +1: the lapic NMI watchdog is active, but can be disabled 49 * >0: the lapic NMI watchdog is active, but can be disabled
54 * 0: the lapic NMI watchdog has not been set up, and cannot 50 * <0: the lapic NMI watchdog has not been set up, and cannot
55 * be enabled 51 * be enabled
56 * -1: the lapic NMI watchdog is disabled, but can be enabled 52 * 0: the lapic NMI watchdog is disabled, but can be enabled
57 */ 53 */
58int nmi_active; 54atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */
59 55
60#define K7_EVNTSEL_ENABLE (1 << 22) 56unsigned int nmi_watchdog = NMI_DEFAULT;
61#define K7_EVNTSEL_INT (1 << 20) 57static unsigned int nmi_hz = HZ;
62#define K7_EVNTSEL_OS (1 << 17)
63#define K7_EVNTSEL_USR (1 << 16)
64#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
65#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
66 58
67#define P6_EVNTSEL0_ENABLE (1 << 22) 59struct nmi_watchdog_ctlblk {
68#define P6_EVNTSEL_INT (1 << 20) 60 int enabled;
69#define P6_EVNTSEL_OS (1 << 17) 61 u64 check_bit;
70#define P6_EVNTSEL_USR (1 << 16) 62 unsigned int cccr_msr;
71#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79 63 unsigned int perfctr_msr; /* the MSR to reset in NMI handler */
72#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED 64 unsigned int evntsel_msr; /* the MSR to select the events to handle */
65};
66static DEFINE_PER_CPU(struct nmi_watchdog_ctlblk, nmi_watchdog_ctlblk);
73 67
74#define MSR_P4_MISC_ENABLE 0x1A0 68/* local prototypes */
75#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7) 69static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu);
76#define MSR_P4_MISC_ENABLE_PEBS_UNAVAIL (1<<12)
77#define MSR_P4_PERFCTR0 0x300
78#define MSR_P4_CCCR0 0x360
79#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
80#define P4_ESCR_OS (1<<3)
81#define P4_ESCR_USR (1<<2)
82#define P4_CCCR_OVF_PMI0 (1<<26)
83#define P4_CCCR_OVF_PMI1 (1<<27)
84#define P4_CCCR_THRESHOLD(N) ((N)<<20)
85#define P4_CCCR_COMPLEMENT (1<<19)
86#define P4_CCCR_COMPARE (1<<18)
87#define P4_CCCR_REQUIRED (3<<16)
88#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
89#define P4_CCCR_ENABLE (1<<12)
90/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
91 CRU_ESCR0 (with any non-null event selector) through a complemented
92 max threshold. [IA32-Vol3, Section 14.9.9] */
93#define MSR_P4_IQ_COUNTER0 0x30C
94#define P4_NMI_CRU_ESCR0 (P4_ESCR_EVENT_SELECT(0x3F)|P4_ESCR_OS|P4_ESCR_USR)
95#define P4_NMI_IQ_CCCR0 \
96 (P4_CCCR_OVF_PMI0|P4_CCCR_THRESHOLD(15)|P4_CCCR_COMPLEMENT| \
97 P4_CCCR_COMPARE|P4_CCCR_REQUIRED|P4_CCCR_ESCR_SELECT(4)|P4_CCCR_ENABLE)
98 70
99#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 71extern void show_registers(struct pt_regs *regs);
100#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK 72extern int unknown_nmi_panic;
73
74/* converts an msr to an appropriate reservation bit */
75static inline unsigned int nmi_perfctr_msr_to_bit(unsigned int msr)
76{
77 /* returns the bit offset of the performance counter register */
78 switch (boot_cpu_data.x86_vendor) {
79 case X86_VENDOR_AMD:
80 return (msr - MSR_K7_PERFCTR0);
81 case X86_VENDOR_INTEL:
82 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
83 return (msr - MSR_ARCH_PERFMON_PERFCTR0);
84
85 switch (boot_cpu_data.x86) {
86 case 6:
87 return (msr - MSR_P6_PERFCTR0);
88 case 15:
89 return (msr - MSR_P4_BPU_PERFCTR0);
90 }
91 }
92 return 0;
93}
94
95/* converts an msr to an appropriate reservation bit */
96static inline unsigned int nmi_evntsel_msr_to_bit(unsigned int msr)
97{
98 /* returns the bit offset of the event selection register */
99 switch (boot_cpu_data.x86_vendor) {
100 case X86_VENDOR_AMD:
101 return (msr - MSR_K7_EVNTSEL0);
102 case X86_VENDOR_INTEL:
103 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
104 return (msr - MSR_ARCH_PERFMON_EVENTSEL0);
105
106 switch (boot_cpu_data.x86) {
107 case 6:
108 return (msr - MSR_P6_EVNTSEL0);
109 case 15:
110 return (msr - MSR_P4_BSU_ESCR0);
111 }
112 }
113 return 0;
114}
115
116/* checks for a bit availability (hack for oprofile) */
117int avail_to_resrv_perfctr_nmi_bit(unsigned int counter)
118{
119 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
120
121 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
122}
123
124/* checks the an msr for availability */
125int avail_to_resrv_perfctr_nmi(unsigned int msr)
126{
127 unsigned int counter;
128
129 counter = nmi_perfctr_msr_to_bit(msr);
130 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
131
132 return (!test_bit(counter, &__get_cpu_var(perfctr_nmi_owner)));
133}
134
135int reserve_perfctr_nmi(unsigned int msr)
136{
137 unsigned int counter;
138
139 counter = nmi_perfctr_msr_to_bit(msr);
140 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
141
142 if (!test_and_set_bit(counter, &__get_cpu_var(perfctr_nmi_owner)))
143 return 1;
144 return 0;
145}
146
147void release_perfctr_nmi(unsigned int msr)
148{
149 unsigned int counter;
150
151 counter = nmi_perfctr_msr_to_bit(msr);
152 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
153
154 clear_bit(counter, &__get_cpu_var(perfctr_nmi_owner));
155}
156
157int reserve_evntsel_nmi(unsigned int msr)
158{
159 unsigned int counter;
160
161 counter = nmi_evntsel_msr_to_bit(msr);
162 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
163
164 if (!test_and_set_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]))
165 return 1;
166 return 0;
167}
168
169void release_evntsel_nmi(unsigned int msr)
170{
171 unsigned int counter;
172
173 counter = nmi_evntsel_msr_to_bit(msr);
174 BUG_ON(counter > NMI_MAX_COUNTER_BITS);
175
176 clear_bit(counter, &__get_cpu_var(evntsel_nmi_owner)[0]);
177}
178
179static __cpuinit inline int nmi_known_cpu(void)
180{
181 switch (boot_cpu_data.x86_vendor) {
182 case X86_VENDOR_AMD:
183 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
184 case X86_VENDOR_INTEL:
185 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON))
186 return 1;
187 else
188 return ((boot_cpu_data.x86 == 15) || (boot_cpu_data.x86 == 6));
189 }
190 return 0;
191}
101 192
102#ifdef CONFIG_SMP 193#ifdef CONFIG_SMP
103/* The performance counters used by NMI_LOCAL_APIC don't trigger when 194/* The performance counters used by NMI_LOCAL_APIC don't trigger when
@@ -125,7 +216,18 @@ static int __init check_nmi_watchdog(void)
125 unsigned int *prev_nmi_count; 216 unsigned int *prev_nmi_count;
126 int cpu; 217 int cpu;
127 218
128 if (nmi_watchdog == NMI_NONE) 219 /* Enable NMI watchdog for newer systems.
220 Actually it should be safe for most systems before 2004 too except
221 for some IBM systems that corrupt registers when NMI happens
222 during SMM. Unfortunately we don't have more exact information
223 on these and use this coarse check. */
224 if (nmi_watchdog == NMI_DEFAULT && dmi_get_year(DMI_BIOS_DATE) >= 2004)
225 nmi_watchdog = NMI_LOCAL_APIC;
226
227 if ((nmi_watchdog == NMI_NONE) || (nmi_watchdog == NMI_DEFAULT))
228 return 0;
229
230 if (!atomic_read(&nmi_active))
129 return 0; 231 return 0;
130 232
131 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL); 233 prev_nmi_count = kmalloc(NR_CPUS * sizeof(int), GFP_KERNEL);
@@ -149,25 +251,45 @@ static int __init check_nmi_watchdog(void)
149 if (!cpu_isset(cpu, cpu_callin_map)) 251 if (!cpu_isset(cpu, cpu_callin_map))
150 continue; 252 continue;
151#endif 253#endif
254 if (!per_cpu(nmi_watchdog_ctlblk, cpu).enabled)
255 continue;
152 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) { 256 if (nmi_count(cpu) - prev_nmi_count[cpu] <= 5) {
153 endflag = 1;
154 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n", 257 printk("CPU#%d: NMI appears to be stuck (%d->%d)!\n",
155 cpu, 258 cpu,
156 prev_nmi_count[cpu], 259 prev_nmi_count[cpu],
157 nmi_count(cpu)); 260 nmi_count(cpu));
158 nmi_active = 0; 261 per_cpu(nmi_watchdog_ctlblk, cpu).enabled = 0;
159 lapic_nmi_owner &= ~LAPIC_NMI_WATCHDOG; 262 atomic_dec(&nmi_active);
160 kfree(prev_nmi_count);
161 return -1;
162 } 263 }
163 } 264 }
265 if (!atomic_read(&nmi_active)) {
266 kfree(prev_nmi_count);
267 atomic_set(&nmi_active, -1);
268 return -1;
269 }
164 endflag = 1; 270 endflag = 1;
165 printk("OK.\n"); 271 printk("OK.\n");
166 272
167 /* now that we know it works we can reduce NMI frequency to 273 /* now that we know it works we can reduce NMI frequency to
168 something more reasonable; makes a difference in some configs */ 274 something more reasonable; makes a difference in some configs */
169 if (nmi_watchdog == NMI_LOCAL_APIC) 275 if (nmi_watchdog == NMI_LOCAL_APIC) {
276 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
277
170 nmi_hz = 1; 278 nmi_hz = 1;
279 /*
280 * On Intel CPUs with ARCH_PERFMON only 32 bits in the counter
281 * are writable, with higher bits sign extending from bit 31.
282 * So, we can only program the counter with 31 bit values and
283 * 32nd bit should be 1, for 33.. to be 1.
284 * Find the appropriate nmi_hz
285 */
286 if (wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0 &&
287 ((u64)cpu_khz * 1000) > 0x7fffffffULL) {
288 u64 count = (u64)cpu_khz * 1000;
289 do_div(count, 0x7fffffffUL);
290 nmi_hz = count + 1;
291 }
292 }
171 293
172 kfree(prev_nmi_count); 294 kfree(prev_nmi_count);
173 return 0; 295 return 0;
@@ -181,124 +303,70 @@ static int __init setup_nmi_watchdog(char *str)
181 303
182 get_option(&str, &nmi); 304 get_option(&str, &nmi);
183 305
184 if (nmi >= NMI_INVALID) 306 if ((nmi >= NMI_INVALID) || (nmi < NMI_NONE))
185 return 0; 307 return 0;
186 if (nmi == NMI_NONE)
187 nmi_watchdog = nmi;
188 /* 308 /*
189 * If any other x86 CPU has a local APIC, then 309 * If any other x86 CPU has a local APIC, then
190 * please test the NMI stuff there and send me the 310 * please test the NMI stuff there and send me the
191 * missing bits. Right now Intel P6/P4 and AMD K7 only. 311 * missing bits. Right now Intel P6/P4 and AMD K7 only.
192 */ 312 */
193 if ((nmi == NMI_LOCAL_APIC) && 313 if ((nmi == NMI_LOCAL_APIC) && (nmi_known_cpu() == 0))
194 (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && 314 return 0; /* no lapic support */
195 (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15)) 315 nmi_watchdog = nmi;
196 nmi_watchdog = nmi;
197 if ((nmi == NMI_LOCAL_APIC) &&
198 (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
199 (boot_cpu_data.x86 == 6 || boot_cpu_data.x86 == 15))
200 nmi_watchdog = nmi;
201 /*
202 * We can enable the IO-APIC watchdog
203 * unconditionally.
204 */
205 if (nmi == NMI_IO_APIC) {
206 nmi_active = 1;
207 nmi_watchdog = nmi;
208 }
209 return 1; 316 return 1;
210} 317}
211 318
212__setup("nmi_watchdog=", setup_nmi_watchdog); 319__setup("nmi_watchdog=", setup_nmi_watchdog);
213 320
214static void disable_intel_arch_watchdog(void);
215
216static void disable_lapic_nmi_watchdog(void) 321static void disable_lapic_nmi_watchdog(void)
217{ 322{
218 if (nmi_active <= 0) 323 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
324
325 if (atomic_read(&nmi_active) <= 0)
219 return; 326 return;
220 switch (boot_cpu_data.x86_vendor) {
221 case X86_VENDOR_AMD:
222 wrmsr(MSR_K7_EVNTSEL0, 0, 0);
223 break;
224 case X86_VENDOR_INTEL:
225 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
226 disable_intel_arch_watchdog();
227 break;
228 }
229 switch (boot_cpu_data.x86) {
230 case 6:
231 if (boot_cpu_data.x86_model > 0xd)
232 break;
233 327
234 wrmsr(MSR_P6_EVNTSEL0, 0, 0); 328 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
235 break;
236 case 15:
237 if (boot_cpu_data.x86_model > 0x4)
238 break;
239 329
240 wrmsr(MSR_P4_IQ_CCCR0, 0, 0); 330 BUG_ON(atomic_read(&nmi_active) != 0);
241 wrmsr(MSR_P4_CRU_ESCR0, 0, 0);
242 break;
243 }
244 break;
245 }
246 nmi_active = -1;
247 /* tell do_nmi() and others that we're not active any more */
248 nmi_watchdog = 0;
249} 331}
250 332
251static void enable_lapic_nmi_watchdog(void) 333static void enable_lapic_nmi_watchdog(void)
252{ 334{
253 if (nmi_active < 0) { 335 BUG_ON(nmi_watchdog != NMI_LOCAL_APIC);
254 nmi_watchdog = NMI_LOCAL_APIC;
255 setup_apic_nmi_watchdog();
256 }
257}
258 336
259int reserve_lapic_nmi(void) 337 /* are we already enabled */
260{ 338 if (atomic_read(&nmi_active) != 0)
261 unsigned int old_owner; 339 return;
262
263 spin_lock(&lapic_nmi_owner_lock);
264 old_owner = lapic_nmi_owner;
265 lapic_nmi_owner |= LAPIC_NMI_RESERVED;
266 spin_unlock(&lapic_nmi_owner_lock);
267 if (old_owner & LAPIC_NMI_RESERVED)
268 return -EBUSY;
269 if (old_owner & LAPIC_NMI_WATCHDOG)
270 disable_lapic_nmi_watchdog();
271 return 0;
272}
273 340
274void release_lapic_nmi(void) 341 /* are we lapic aware */
275{ 342 if (nmi_known_cpu() <= 0)
276 unsigned int new_owner; 343 return;
277 344
278 spin_lock(&lapic_nmi_owner_lock); 345 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
279 new_owner = lapic_nmi_owner & ~LAPIC_NMI_RESERVED; 346 touch_nmi_watchdog();
280 lapic_nmi_owner = new_owner;
281 spin_unlock(&lapic_nmi_owner_lock);
282 if (new_owner & LAPIC_NMI_WATCHDOG)
283 enable_lapic_nmi_watchdog();
284} 347}
285 348
286void disable_timer_nmi_watchdog(void) 349void disable_timer_nmi_watchdog(void)
287{ 350{
288 if ((nmi_watchdog != NMI_IO_APIC) || (nmi_active <= 0)) 351 BUG_ON(nmi_watchdog != NMI_IO_APIC);
352
353 if (atomic_read(&nmi_active) <= 0)
289 return; 354 return;
290 355
291 unset_nmi_callback(); 356 disable_irq(0);
292 nmi_active = -1; 357 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1);
293 nmi_watchdog = NMI_NONE; 358
359 BUG_ON(atomic_read(&nmi_active) != 0);
294} 360}
295 361
296void enable_timer_nmi_watchdog(void) 362void enable_timer_nmi_watchdog(void)
297{ 363{
298 if (nmi_active < 0) { 364 BUG_ON(nmi_watchdog != NMI_IO_APIC);
299 nmi_watchdog = NMI_IO_APIC; 365
366 if (atomic_read(&nmi_active) == 0) {
300 touch_nmi_watchdog(); 367 touch_nmi_watchdog();
301 nmi_active = 1; 368 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1);
369 enable_irq(0);
302 } 370 }
303} 371}
304 372
@@ -308,15 +376,20 @@ static int nmi_pm_active; /* nmi_active before suspend */
308 376
309static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state) 377static int lapic_nmi_suspend(struct sys_device *dev, pm_message_t state)
310{ 378{
311 nmi_pm_active = nmi_active; 379 /* only CPU0 goes here, other CPUs should be offline */
312 disable_lapic_nmi_watchdog(); 380 nmi_pm_active = atomic_read(&nmi_active);
381 stop_apic_nmi_watchdog(NULL);
382 BUG_ON(atomic_read(&nmi_active) != 0);
313 return 0; 383 return 0;
314} 384}
315 385
316static int lapic_nmi_resume(struct sys_device *dev) 386static int lapic_nmi_resume(struct sys_device *dev)
317{ 387{
318 if (nmi_pm_active > 0) 388 /* only CPU0 goes here, other CPUs should be offline */
319 enable_lapic_nmi_watchdog(); 389 if (nmi_pm_active > 0) {
390 setup_apic_nmi_watchdog(NULL);
391 touch_nmi_watchdog();
392 }
320 return 0; 393 return 0;
321} 394}
322 395
@@ -336,7 +409,13 @@ static int __init init_lapic_nmi_sysfs(void)
336{ 409{
337 int error; 410 int error;
338 411
339 if (nmi_active == 0 || nmi_watchdog != NMI_LOCAL_APIC) 412 /* should really be a BUG_ON but b/c this is an
413 * init call, it just doesn't work. -dcz
414 */
415 if (nmi_watchdog != NMI_LOCAL_APIC)
416 return 0;
417
418 if ( atomic_read(&nmi_active) < 0 )
340 return 0; 419 return 0;
341 420
342 error = sysdev_class_register(&nmi_sysclass); 421 error = sysdev_class_register(&nmi_sysclass);
@@ -354,138 +433,269 @@ late_initcall(init_lapic_nmi_sysfs);
354 * Original code written by Keith Owens. 433 * Original code written by Keith Owens.
355 */ 434 */
356 435
357static void clear_msr_range(unsigned int base, unsigned int n) 436static void write_watchdog_counter(unsigned int perfctr_msr, const char *descr)
358{
359 unsigned int i;
360
361 for(i = 0; i < n; ++i)
362 wrmsr(base+i, 0, 0);
363}
364
365static void write_watchdog_counter(const char *descr)
366{ 437{
367 u64 count = (u64)cpu_khz * 1000; 438 u64 count = (u64)cpu_khz * 1000;
368 439
369 do_div(count, nmi_hz); 440 do_div(count, nmi_hz);
370 if(descr) 441 if(descr)
371 Dprintk("setting %s to -0x%08Lx\n", descr, count); 442 Dprintk("setting %s to -0x%08Lx\n", descr, count);
372 wrmsrl(nmi_perfctr_msr, 0 - count); 443 wrmsrl(perfctr_msr, 0 - count);
373} 444}
374 445
375static void setup_k7_watchdog(void) 446/* Note that these events don't tick when the CPU idles. This means
447 the frequency varies with CPU load. */
448
449#define K7_EVNTSEL_ENABLE (1 << 22)
450#define K7_EVNTSEL_INT (1 << 20)
451#define K7_EVNTSEL_OS (1 << 17)
452#define K7_EVNTSEL_USR (1 << 16)
453#define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
454#define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
455
456static int setup_k7_watchdog(void)
376{ 457{
458 unsigned int perfctr_msr, evntsel_msr;
377 unsigned int evntsel; 459 unsigned int evntsel;
460 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
461
462 perfctr_msr = MSR_K7_PERFCTR0;
463 evntsel_msr = MSR_K7_EVNTSEL0;
464 if (!reserve_perfctr_nmi(perfctr_msr))
465 goto fail;
378 466
379 nmi_perfctr_msr = MSR_K7_PERFCTR0; 467 if (!reserve_evntsel_nmi(evntsel_msr))
468 goto fail1;
380 469
381 clear_msr_range(MSR_K7_EVNTSEL0, 4); 470 wrmsrl(perfctr_msr, 0UL);
382 clear_msr_range(MSR_K7_PERFCTR0, 4);
383 471
384 evntsel = K7_EVNTSEL_INT 472 evntsel = K7_EVNTSEL_INT
385 | K7_EVNTSEL_OS 473 | K7_EVNTSEL_OS
386 | K7_EVNTSEL_USR 474 | K7_EVNTSEL_USR
387 | K7_NMI_EVENT; 475 | K7_NMI_EVENT;
388 476
389 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); 477 /* setup the timer */
390 write_watchdog_counter("K7_PERFCTR0"); 478 wrmsr(evntsel_msr, evntsel, 0);
479 write_watchdog_counter(perfctr_msr, "K7_PERFCTR0");
391 apic_write(APIC_LVTPC, APIC_DM_NMI); 480 apic_write(APIC_LVTPC, APIC_DM_NMI);
392 evntsel |= K7_EVNTSEL_ENABLE; 481 evntsel |= K7_EVNTSEL_ENABLE;
393 wrmsr(MSR_K7_EVNTSEL0, evntsel, 0); 482 wrmsr(evntsel_msr, evntsel, 0);
483
484 wd->perfctr_msr = perfctr_msr;
485 wd->evntsel_msr = evntsel_msr;
486 wd->cccr_msr = 0; //unused
487 wd->check_bit = 1ULL<<63;
488 return 1;
489fail1:
490 release_perfctr_nmi(perfctr_msr);
491fail:
492 return 0;
493}
494
495static void stop_k7_watchdog(void)
496{
497 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
498
499 wrmsr(wd->evntsel_msr, 0, 0);
500
501 release_evntsel_nmi(wd->evntsel_msr);
502 release_perfctr_nmi(wd->perfctr_msr);
394} 503}
395 504
396static void setup_p6_watchdog(void) 505#define P6_EVNTSEL0_ENABLE (1 << 22)
506#define P6_EVNTSEL_INT (1 << 20)
507#define P6_EVNTSEL_OS (1 << 17)
508#define P6_EVNTSEL_USR (1 << 16)
509#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
510#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
511
512static int setup_p6_watchdog(void)
397{ 513{
514 unsigned int perfctr_msr, evntsel_msr;
398 unsigned int evntsel; 515 unsigned int evntsel;
516 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
517
518 perfctr_msr = MSR_P6_PERFCTR0;
519 evntsel_msr = MSR_P6_EVNTSEL0;
520 if (!reserve_perfctr_nmi(perfctr_msr))
521 goto fail;
399 522
400 nmi_perfctr_msr = MSR_P6_PERFCTR0; 523 if (!reserve_evntsel_nmi(evntsel_msr))
524 goto fail1;
401 525
402 clear_msr_range(MSR_P6_EVNTSEL0, 2); 526 wrmsrl(perfctr_msr, 0UL);
403 clear_msr_range(MSR_P6_PERFCTR0, 2);
404 527
405 evntsel = P6_EVNTSEL_INT 528 evntsel = P6_EVNTSEL_INT
406 | P6_EVNTSEL_OS 529 | P6_EVNTSEL_OS
407 | P6_EVNTSEL_USR 530 | P6_EVNTSEL_USR
408 | P6_NMI_EVENT; 531 | P6_NMI_EVENT;
409 532
410 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); 533 /* setup the timer */
411 write_watchdog_counter("P6_PERFCTR0"); 534 wrmsr(evntsel_msr, evntsel, 0);
535 write_watchdog_counter(perfctr_msr, "P6_PERFCTR0");
412 apic_write(APIC_LVTPC, APIC_DM_NMI); 536 apic_write(APIC_LVTPC, APIC_DM_NMI);
413 evntsel |= P6_EVNTSEL0_ENABLE; 537 evntsel |= P6_EVNTSEL0_ENABLE;
414 wrmsr(MSR_P6_EVNTSEL0, evntsel, 0); 538 wrmsr(evntsel_msr, evntsel, 0);
539
540 wd->perfctr_msr = perfctr_msr;
541 wd->evntsel_msr = evntsel_msr;
542 wd->cccr_msr = 0; //unused
543 wd->check_bit = 1ULL<<39;
544 return 1;
545fail1:
546 release_perfctr_nmi(perfctr_msr);
547fail:
548 return 0;
549}
550
551static void stop_p6_watchdog(void)
552{
553 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
554
555 wrmsr(wd->evntsel_msr, 0, 0);
556
557 release_evntsel_nmi(wd->evntsel_msr);
558 release_perfctr_nmi(wd->perfctr_msr);
415} 559}
416 560
561/* Note that these events don't tick when the CPU idles. This means
562 the frequency varies with CPU load. */
563
564#define MSR_P4_MISC_ENABLE_PERF_AVAIL (1<<7)
565#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
566#define P4_ESCR_OS (1<<3)
567#define P4_ESCR_USR (1<<2)
568#define P4_CCCR_OVF_PMI0 (1<<26)
569#define P4_CCCR_OVF_PMI1 (1<<27)
570#define P4_CCCR_THRESHOLD(N) ((N)<<20)
571#define P4_CCCR_COMPLEMENT (1<<19)
572#define P4_CCCR_COMPARE (1<<18)
573#define P4_CCCR_REQUIRED (3<<16)
574#define P4_CCCR_ESCR_SELECT(N) ((N)<<13)
575#define P4_CCCR_ENABLE (1<<12)
576#define P4_CCCR_OVF (1<<31)
577/* Set up IQ_COUNTER0 to behave like a clock, by having IQ_CCCR0 filter
578 CRU_ESCR0 (with any non-null event selector) through a complemented
579 max threshold. [IA32-Vol3, Section 14.9.9] */
580
417static int setup_p4_watchdog(void) 581static int setup_p4_watchdog(void)
418{ 582{
583 unsigned int perfctr_msr, evntsel_msr, cccr_msr;
584 unsigned int evntsel, cccr_val;
419 unsigned int misc_enable, dummy; 585 unsigned int misc_enable, dummy;
586 unsigned int ht_num;
587 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
420 588
421 rdmsr(MSR_P4_MISC_ENABLE, misc_enable, dummy); 589 rdmsr(MSR_IA32_MISC_ENABLE, misc_enable, dummy);
422 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL)) 590 if (!(misc_enable & MSR_P4_MISC_ENABLE_PERF_AVAIL))
423 return 0; 591 return 0;
424 592
425 nmi_perfctr_msr = MSR_P4_IQ_COUNTER0;
426 nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
427#ifdef CONFIG_SMP 593#ifdef CONFIG_SMP
428 if (smp_num_siblings == 2) 594 /* detect which hyperthread we are on */
429 nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1; 595 if (smp_num_siblings == 2) {
596 unsigned int ebx, apicid;
597
598 ebx = cpuid_ebx(1);
599 apicid = (ebx >> 24) & 0xff;
600 ht_num = apicid & 1;
601 } else
430#endif 602#endif
603 ht_num = 0;
431 604
432 if (!(misc_enable & MSR_P4_MISC_ENABLE_PEBS_UNAVAIL)) 605 /* performance counters are shared resources
433 clear_msr_range(0x3F1, 2); 606 * assign each hyperthread its own set
434 /* MSR 0x3F0 seems to have a default value of 0xFC00, but current 607 * (re-use the ESCR0 register, seems safe
435 docs doesn't fully define it, so leave it alone for now. */ 608 * and keeps the cccr_val the same)
436 if (boot_cpu_data.x86_model >= 0x3) { 609 */
437 /* MSR_P4_IQ_ESCR0/1 (0x3ba/0x3bb) removed */ 610 if (!ht_num) {
438 clear_msr_range(0x3A0, 26); 611 /* logical cpu 0 */
439 clear_msr_range(0x3BC, 3); 612 perfctr_msr = MSR_P4_IQ_PERFCTR0;
613 evntsel_msr = MSR_P4_CRU_ESCR0;
614 cccr_msr = MSR_P4_IQ_CCCR0;
615 cccr_val = P4_CCCR_OVF_PMI0 | P4_CCCR_ESCR_SELECT(4);
440 } else { 616 } else {
441 clear_msr_range(0x3A0, 31); 617 /* logical cpu 1 */
618 perfctr_msr = MSR_P4_IQ_PERFCTR1;
619 evntsel_msr = MSR_P4_CRU_ESCR0;
620 cccr_msr = MSR_P4_IQ_CCCR1;
621 cccr_val = P4_CCCR_OVF_PMI1 | P4_CCCR_ESCR_SELECT(4);
442 } 622 }
443 clear_msr_range(0x3C0, 6); 623
444 clear_msr_range(0x3C8, 6); 624 if (!reserve_perfctr_nmi(perfctr_msr))
445 clear_msr_range(0x3E0, 2); 625 goto fail;
446 clear_msr_range(MSR_P4_CCCR0, 18); 626
447 clear_msr_range(MSR_P4_PERFCTR0, 18); 627 if (!reserve_evntsel_nmi(evntsel_msr))
448 628 goto fail1;
449 wrmsr(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0, 0); 629
450 wrmsr(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE, 0); 630 evntsel = P4_ESCR_EVENT_SELECT(0x3F)
451 write_watchdog_counter("P4_IQ_COUNTER0"); 631 | P4_ESCR_OS
632 | P4_ESCR_USR;
633
634 cccr_val |= P4_CCCR_THRESHOLD(15)
635 | P4_CCCR_COMPLEMENT
636 | P4_CCCR_COMPARE
637 | P4_CCCR_REQUIRED;
638
639 wrmsr(evntsel_msr, evntsel, 0);
640 wrmsr(cccr_msr, cccr_val, 0);
641 write_watchdog_counter(perfctr_msr, "P4_IQ_COUNTER0");
452 apic_write(APIC_LVTPC, APIC_DM_NMI); 642 apic_write(APIC_LVTPC, APIC_DM_NMI);
453 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); 643 cccr_val |= P4_CCCR_ENABLE;
644 wrmsr(cccr_msr, cccr_val, 0);
645 wd->perfctr_msr = perfctr_msr;
646 wd->evntsel_msr = evntsel_msr;
647 wd->cccr_msr = cccr_msr;
648 wd->check_bit = 1ULL<<39;
454 return 1; 649 return 1;
650fail1:
651 release_perfctr_nmi(perfctr_msr);
652fail:
653 return 0;
455} 654}
456 655
457static void disable_intel_arch_watchdog(void) 656static void stop_p4_watchdog(void)
458{ 657{
459 unsigned ebx; 658 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
460 659
461 /* 660 wrmsr(wd->cccr_msr, 0, 0);
462 * Check whether the Architectural PerfMon supports 661 wrmsr(wd->evntsel_msr, 0, 0);
463 * Unhalted Core Cycles Event or not. 662
464 * NOTE: Corresponding bit = 0 in ebp indicates event present. 663 release_evntsel_nmi(wd->evntsel_msr);
465 */ 664 release_perfctr_nmi(wd->perfctr_msr);
466 ebx = cpuid_ebx(10);
467 if (!(ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
468 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, 0, 0);
469} 665}
470 666
667#define ARCH_PERFMON_NMI_EVENT_SEL ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL
668#define ARCH_PERFMON_NMI_EVENT_UMASK ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK
669
471static int setup_intel_arch_watchdog(void) 670static int setup_intel_arch_watchdog(void)
472{ 671{
672 unsigned int ebx;
673 union cpuid10_eax eax;
674 unsigned int unused;
675 unsigned int perfctr_msr, evntsel_msr;
473 unsigned int evntsel; 676 unsigned int evntsel;
474 unsigned ebx; 677 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
475 678
476 /* 679 /*
477 * Check whether the Architectural PerfMon supports 680 * Check whether the Architectural PerfMon supports
478 * Unhalted Core Cycles Event or not. 681 * Unhalted Core Cycles Event or not.
479 * NOTE: Corresponding bit = 0 in ebp indicates event present. 682 * NOTE: Corresponding bit = 0 in ebx indicates event present.
480 */ 683 */
481 ebx = cpuid_ebx(10); 684 cpuid(10, &(eax.full), &ebx, &unused, &unused);
482 if ((ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT)) 685 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
483 return 0; 686 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
687 goto fail;
688
689 perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0;
690 evntsel_msr = MSR_ARCH_PERFMON_EVENTSEL0;
484 691
485 nmi_perfctr_msr = MSR_ARCH_PERFMON_PERFCTR0; 692 if (!reserve_perfctr_nmi(perfctr_msr))
693 goto fail;
486 694
487 clear_msr_range(MSR_ARCH_PERFMON_EVENTSEL0, 2); 695 if (!reserve_evntsel_nmi(evntsel_msr))
488 clear_msr_range(MSR_ARCH_PERFMON_PERFCTR0, 2); 696 goto fail1;
697
698 wrmsrl(perfctr_msr, 0UL);
489 699
490 evntsel = ARCH_PERFMON_EVENTSEL_INT 700 evntsel = ARCH_PERFMON_EVENTSEL_INT
491 | ARCH_PERFMON_EVENTSEL_OS 701 | ARCH_PERFMON_EVENTSEL_OS
@@ -493,51 +703,145 @@ static int setup_intel_arch_watchdog(void)
493 | ARCH_PERFMON_NMI_EVENT_SEL 703 | ARCH_PERFMON_NMI_EVENT_SEL
494 | ARCH_PERFMON_NMI_EVENT_UMASK; 704 | ARCH_PERFMON_NMI_EVENT_UMASK;
495 705
496 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); 706 /* setup the timer */
497 write_watchdog_counter("INTEL_ARCH_PERFCTR0"); 707 wrmsr(evntsel_msr, evntsel, 0);
708 write_watchdog_counter(perfctr_msr, "INTEL_ARCH_PERFCTR0");
498 apic_write(APIC_LVTPC, APIC_DM_NMI); 709 apic_write(APIC_LVTPC, APIC_DM_NMI);
499 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; 710 evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE;
500 wrmsr(MSR_ARCH_PERFMON_EVENTSEL0, evntsel, 0); 711 wrmsr(evntsel_msr, evntsel, 0);
712
713 wd->perfctr_msr = perfctr_msr;
714 wd->evntsel_msr = evntsel_msr;
715 wd->cccr_msr = 0; //unused
716 wd->check_bit = 1ULL << (eax.split.bit_width - 1);
501 return 1; 717 return 1;
718fail1:
719 release_perfctr_nmi(perfctr_msr);
720fail:
721 return 0;
502} 722}
503 723
504void setup_apic_nmi_watchdog (void) 724static void stop_intel_arch_watchdog(void)
505{ 725{
506 switch (boot_cpu_data.x86_vendor) { 726 unsigned int ebx;
507 case X86_VENDOR_AMD: 727 union cpuid10_eax eax;
508 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15) 728 unsigned int unused;
509 return; 729 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
510 setup_k7_watchdog(); 730
511 break; 731 /*
512 case X86_VENDOR_INTEL: 732 * Check whether the Architectural PerfMon supports
513 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { 733 * Unhalted Core Cycles Event or not.
514 if (!setup_intel_arch_watchdog()) 734 * NOTE: Corresponding bit = 0 in ebx indicates event present.
735 */
736 cpuid(10, &(eax.full), &ebx, &unused, &unused);
737 if ((eax.split.mask_length < (ARCH_PERFMON_UNHALTED_CORE_CYCLES_INDEX+1)) ||
738 (ebx & ARCH_PERFMON_UNHALTED_CORE_CYCLES_PRESENT))
739 return;
740
741 wrmsr(wd->evntsel_msr, 0, 0);
742 release_evntsel_nmi(wd->evntsel_msr);
743 release_perfctr_nmi(wd->perfctr_msr);
744}
745
746void setup_apic_nmi_watchdog (void *unused)
747{
748 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
749
750 /* only support LOCAL and IO APICs for now */
751 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
752 (nmi_watchdog != NMI_IO_APIC))
753 return;
754
755 if (wd->enabled == 1)
756 return;
757
758 /* cheap hack to support suspend/resume */
759 /* if cpu0 is not active neither should the other cpus */
760 if ((smp_processor_id() != 0) && (atomic_read(&nmi_active) <= 0))
761 return;
762
763 if (nmi_watchdog == NMI_LOCAL_APIC) {
764 switch (boot_cpu_data.x86_vendor) {
765 case X86_VENDOR_AMD:
766 if (boot_cpu_data.x86 != 6 && boot_cpu_data.x86 != 15)
515 return; 767 return;
516 break; 768 if (!setup_k7_watchdog())
517 }
518 switch (boot_cpu_data.x86) {
519 case 6:
520 if (boot_cpu_data.x86_model > 0xd)
521 return; 769 return;
522
523 setup_p6_watchdog();
524 break; 770 break;
525 case 15: 771 case X86_VENDOR_INTEL:
526 if (boot_cpu_data.x86_model > 0x4) 772 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
527 return; 773 if (!setup_intel_arch_watchdog())
774 return;
775 break;
776 }
777 switch (boot_cpu_data.x86) {
778 case 6:
779 if (boot_cpu_data.x86_model > 0xd)
780 return;
781
782 if (!setup_p6_watchdog())
783 return;
784 break;
785 case 15:
786 if (boot_cpu_data.x86_model > 0x4)
787 return;
528 788
529 if (!setup_p4_watchdog()) 789 if (!setup_p4_watchdog())
790 return;
791 break;
792 default:
530 return; 793 return;
794 }
531 break; 795 break;
532 default: 796 default:
533 return; 797 return;
534 } 798 }
535 break; 799 }
536 default: 800 wd->enabled = 1;
801 atomic_inc(&nmi_active);
802}
803
804void stop_apic_nmi_watchdog(void *unused)
805{
806 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
807
808 /* only support LOCAL and IO APICs for now */
809 if ((nmi_watchdog != NMI_LOCAL_APIC) &&
810 (nmi_watchdog != NMI_IO_APIC))
811 return;
812
813 if (wd->enabled == 0)
537 return; 814 return;
815
816 if (nmi_watchdog == NMI_LOCAL_APIC) {
817 switch (boot_cpu_data.x86_vendor) {
818 case X86_VENDOR_AMD:
819 stop_k7_watchdog();
820 break;
821 case X86_VENDOR_INTEL:
822 if (cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) {
823 stop_intel_arch_watchdog();
824 break;
825 }
826 switch (boot_cpu_data.x86) {
827 case 6:
828 if (boot_cpu_data.x86_model > 0xd)
829 break;
830 stop_p6_watchdog();
831 break;
832 case 15:
833 if (boot_cpu_data.x86_model > 0x4)
834 break;
835 stop_p4_watchdog();
836 break;
837 }
838 break;
839 default:
840 return;
841 }
538 } 842 }
539 lapic_nmi_owner = LAPIC_NMI_WATCHDOG; 843 wd->enabled = 0;
540 nmi_active = 1; 844 atomic_dec(&nmi_active);
541} 845}
542 846
543/* 847/*
@@ -579,7 +883,7 @@ EXPORT_SYMBOL(touch_nmi_watchdog);
579 883
580extern void die_nmi(struct pt_regs *, const char *msg); 884extern void die_nmi(struct pt_regs *, const char *msg);
581 885
582void nmi_watchdog_tick (struct pt_regs * regs) 886__kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason)
583{ 887{
584 888
585 /* 889 /*
@@ -588,11 +892,23 @@ void nmi_watchdog_tick (struct pt_regs * regs)
588 * smp_processor_id(). 892 * smp_processor_id().
589 */ 893 */
590 unsigned int sum; 894 unsigned int sum;
895 int touched = 0;
591 int cpu = smp_processor_id(); 896 int cpu = smp_processor_id();
897 struct nmi_watchdog_ctlblk *wd = &__get_cpu_var(nmi_watchdog_ctlblk);
898 u64 dummy;
899 int rc=0;
900
901 /* check for other users first */
902 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
903 == NOTIFY_STOP) {
904 rc = 1;
905 touched = 1;
906 }
592 907
593 sum = per_cpu(irq_stat, cpu).apic_timer_irqs; 908 sum = per_cpu(irq_stat, cpu).apic_timer_irqs;
594 909
595 if (last_irq_sums[cpu] == sum) { 910 /* if the apic timer isn't firing, this cpu isn't doing much */
911 if (!touched && last_irq_sums[cpu] == sum) {
596 /* 912 /*
597 * Ayiee, looks like this CPU is stuck ... 913 * Ayiee, looks like this CPU is stuck ...
598 * wait a few IRQs (5 seconds) before doing the oops ... 914 * wait a few IRQs (5 seconds) before doing the oops ...
@@ -607,27 +923,59 @@ void nmi_watchdog_tick (struct pt_regs * regs)
607 last_irq_sums[cpu] = sum; 923 last_irq_sums[cpu] = sum;
608 alert_counter[cpu] = 0; 924 alert_counter[cpu] = 0;
609 } 925 }
610 if (nmi_perfctr_msr) { 926 /* see if the nmi watchdog went off */
611 if (nmi_perfctr_msr == MSR_P4_IQ_COUNTER0) { 927 if (wd->enabled) {
612 /* 928 if (nmi_watchdog == NMI_LOCAL_APIC) {
613 * P4 quirks: 929 rdmsrl(wd->perfctr_msr, dummy);
614 * - An overflown perfctr will assert its interrupt 930 if (dummy & wd->check_bit){
615 * until the OVF flag in its CCCR is cleared. 931 /* this wasn't a watchdog timer interrupt */
616 * - LVTPC is masked on interrupt and must be 932 goto done;
617 * unmasked by the LVTPC handler. 933 }
934
935 /* only Intel P4 uses the cccr msr */
936 if (wd->cccr_msr != 0) {
937 /*
938 * P4 quirks:
939 * - An overflown perfctr will assert its interrupt
940 * until the OVF flag in its CCCR is cleared.
941 * - LVTPC is masked on interrupt and must be
942 * unmasked by the LVTPC handler.
943 */
944 rdmsrl(wd->cccr_msr, dummy);
945 dummy &= ~P4_CCCR_OVF;
946 wrmsrl(wd->cccr_msr, dummy);
947 apic_write(APIC_LVTPC, APIC_DM_NMI);
948 }
949 else if (wd->perfctr_msr == MSR_P6_PERFCTR0 ||
950 wd->perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
951 /* P6 based Pentium M need to re-unmask
952 * the apic vector but it doesn't hurt
953 * other P6 variant.
954 * ArchPerfom/Core Duo also needs this */
955 apic_write(APIC_LVTPC, APIC_DM_NMI);
956 }
957 /* start the cycle over again */
958 write_watchdog_counter(wd->perfctr_msr, NULL);
959 rc = 1;
960 } else if (nmi_watchdog == NMI_IO_APIC) {
961 /* don't know how to accurately check for this.
962 * just assume it was a watchdog timer interrupt
963 * This matches the old behaviour.
618 */ 964 */
619 wrmsr(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val, 0); 965 rc = 1;
620 apic_write(APIC_LVTPC, APIC_DM_NMI);
621 } 966 }
622 else if (nmi_perfctr_msr == MSR_P6_PERFCTR0 ||
623 nmi_perfctr_msr == MSR_ARCH_PERFMON_PERFCTR0) {
624 /* Only P6 based Pentium M need to re-unmask
625 * the apic vector but it doesn't hurt
626 * other P6 variant */
627 apic_write(APIC_LVTPC, APIC_DM_NMI);
628 }
629 write_watchdog_counter(NULL);
630 } 967 }
968done:
969 return rc;
970}
971
972int do_nmi_callback(struct pt_regs * regs, int cpu)
973{
974#ifdef CONFIG_SYSCTL
975 if (unknown_nmi_panic)
976 return unknown_nmi_panic_callback(regs, cpu);
977#endif
978 return 0;
631} 979}
632 980
633#ifdef CONFIG_SYSCTL 981#ifdef CONFIG_SYSCTL
@@ -637,36 +985,46 @@ static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
637 unsigned char reason = get_nmi_reason(); 985 unsigned char reason = get_nmi_reason();
638 char buf[64]; 986 char buf[64];
639 987
640 if (!(reason & 0xc0)) { 988 sprintf(buf, "NMI received for unknown reason %02x\n", reason);
641 sprintf(buf, "NMI received for unknown reason %02x\n", reason); 989 die_nmi(regs, buf);
642 die_nmi(regs, buf);
643 }
644 return 0; 990 return 0;
645} 991}
646 992
647/* 993/*
648 * proc handler for /proc/sys/kernel/unknown_nmi_panic 994 * proc handler for /proc/sys/kernel/nmi
649 */ 995 */
650int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file, 996int proc_nmi_enabled(struct ctl_table *table, int write, struct file *file,
651 void __user *buffer, size_t *length, loff_t *ppos) 997 void __user *buffer, size_t *length, loff_t *ppos)
652{ 998{
653 int old_state; 999 int old_state;
654 1000
655 old_state = unknown_nmi_panic; 1001 nmi_watchdog_enabled = (atomic_read(&nmi_active) > 0) ? 1 : 0;
1002 old_state = nmi_watchdog_enabled;
656 proc_dointvec(table, write, file, buffer, length, ppos); 1003 proc_dointvec(table, write, file, buffer, length, ppos);
657 if (!!old_state == !!unknown_nmi_panic) 1004 if (!!old_state == !!nmi_watchdog_enabled)
658 return 0; 1005 return 0;
659 1006
660 if (unknown_nmi_panic) { 1007 if (atomic_read(&nmi_active) < 0) {
661 if (reserve_lapic_nmi() < 0) { 1008 printk( KERN_WARNING "NMI watchdog is permanently disabled\n");
662 unknown_nmi_panic = 0; 1009 return -EIO;
663 return -EBUSY; 1010 }
664 } else { 1011
665 set_nmi_callback(unknown_nmi_panic_callback); 1012 if (nmi_watchdog == NMI_DEFAULT) {
666 } 1013 if (nmi_known_cpu() > 0)
1014 nmi_watchdog = NMI_LOCAL_APIC;
1015 else
1016 nmi_watchdog = NMI_IO_APIC;
1017 }
1018
1019 if (nmi_watchdog == NMI_LOCAL_APIC) {
1020 if (nmi_watchdog_enabled)
1021 enable_lapic_nmi_watchdog();
1022 else
1023 disable_lapic_nmi_watchdog();
667 } else { 1024 } else {
668 release_lapic_nmi(); 1025 printk( KERN_WARNING
669 unset_nmi_callback(); 1026 "NMI watchdog doesn't know what hardware to touch\n");
1027 return -EIO;
670 } 1028 }
671 return 0; 1029 return 0;
672} 1030}
@@ -675,7 +1033,11 @@ int proc_unknown_nmi_panic(ctl_table *table, int write, struct file *file,
675 1033
676EXPORT_SYMBOL(nmi_active); 1034EXPORT_SYMBOL(nmi_active);
677EXPORT_SYMBOL(nmi_watchdog); 1035EXPORT_SYMBOL(nmi_watchdog);
678EXPORT_SYMBOL(reserve_lapic_nmi); 1036EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi);
679EXPORT_SYMBOL(release_lapic_nmi); 1037EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit);
1038EXPORT_SYMBOL(reserve_perfctr_nmi);
1039EXPORT_SYMBOL(release_perfctr_nmi);
1040EXPORT_SYMBOL(reserve_evntsel_nmi);
1041EXPORT_SYMBOL(release_evntsel_nmi);
680EXPORT_SYMBOL(disable_timer_nmi_watchdog); 1042EXPORT_SYMBOL(disable_timer_nmi_watchdog);
681EXPORT_SYMBOL(enable_timer_nmi_watchdog); 1043EXPORT_SYMBOL(enable_timer_nmi_watchdog);
diff --git a/arch/i386/kernel/process.c b/arch/i386/kernel/process.c
index 8657c739656a..8c190ca7ae44 100644
--- a/arch/i386/kernel/process.c
+++ b/arch/i386/kernel/process.c
@@ -37,6 +37,7 @@
37#include <linux/kallsyms.h> 37#include <linux/kallsyms.h>
38#include <linux/ptrace.h> 38#include <linux/ptrace.h>
39#include <linux/random.h> 39#include <linux/random.h>
40#include <linux/personality.h>
40 41
41#include <asm/uaccess.h> 42#include <asm/uaccess.h>
42#include <asm/pgtable.h> 43#include <asm/pgtable.h>
@@ -320,15 +321,6 @@ void show_regs(struct pt_regs * regs)
320 * the "args". 321 * the "args".
321 */ 322 */
322extern void kernel_thread_helper(void); 323extern void kernel_thread_helper(void);
323__asm__(".section .text\n"
324 ".align 4\n"
325 "kernel_thread_helper:\n\t"
326 "movl %edx,%eax\n\t"
327 "pushl %edx\n\t"
328 "call *%ebx\n\t"
329 "pushl %eax\n\t"
330 "call do_exit\n"
331 ".previous");
332 324
333/* 325/*
334 * Create a kernel thread 326 * Create a kernel thread
@@ -346,7 +338,7 @@ int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
346 regs.xes = __USER_DS; 338 regs.xes = __USER_DS;
347 regs.orig_eax = -1; 339 regs.orig_eax = -1;
348 regs.eip = (unsigned long) kernel_thread_helper; 340 regs.eip = (unsigned long) kernel_thread_helper;
349 regs.xcs = __KERNEL_CS; 341 regs.xcs = __KERNEL_CS | get_kernel_rpl();
350 regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2; 342 regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;
351 343
352 /* Ok, create the new process.. */ 344 /* Ok, create the new process.. */
@@ -905,7 +897,7 @@ asmlinkage int sys_get_thread_area(struct user_desc __user *u_info)
905 897
906unsigned long arch_align_stack(unsigned long sp) 898unsigned long arch_align_stack(unsigned long sp)
907{ 899{
908 if (randomize_va_space) 900 if (!(current->personality & ADDR_NO_RANDOMIZE) && randomize_va_space)
909 sp -= get_random_int() % 8192; 901 sp -= get_random_int() % 8192;
910 return sp & ~0xf; 902 return sp & ~0xf;
911} 903}
diff --git a/arch/i386/kernel/ptrace.c b/arch/i386/kernel/ptrace.c
index d3db03f4085d..775f50e9395b 100644
--- a/arch/i386/kernel/ptrace.c
+++ b/arch/i386/kernel/ptrace.c
@@ -185,17 +185,17 @@ static unsigned long convert_eip_to_linear(struct task_struct *child, struct pt_
185 return addr; 185 return addr;
186} 186}
187 187
188static inline int is_at_popf(struct task_struct *child, struct pt_regs *regs) 188static inline int is_setting_trap_flag(struct task_struct *child, struct pt_regs *regs)
189{ 189{
190 int i, copied; 190 int i, copied;
191 unsigned char opcode[16]; 191 unsigned char opcode[15];
192 unsigned long addr = convert_eip_to_linear(child, regs); 192 unsigned long addr = convert_eip_to_linear(child, regs);
193 193
194 copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0); 194 copied = access_process_vm(child, addr, opcode, sizeof(opcode), 0);
195 for (i = 0; i < copied; i++) { 195 for (i = 0; i < copied; i++) {
196 switch (opcode[i]) { 196 switch (opcode[i]) {
197 /* popf */ 197 /* popf and iret */
198 case 0x9d: 198 case 0x9d: case 0xcf:
199 return 1; 199 return 1;
200 /* opcode and address size prefixes */ 200 /* opcode and address size prefixes */
201 case 0x66: case 0x67: 201 case 0x66: case 0x67:
@@ -247,7 +247,7 @@ static void set_singlestep(struct task_struct *child)
247 * don't mark it as being "us" that set it, so that we 247 * don't mark it as being "us" that set it, so that we
248 * won't clear it by hand later. 248 * won't clear it by hand later.
249 */ 249 */
250 if (is_at_popf(child, regs)) 250 if (is_setting_trap_flag(child, regs))
251 return; 251 return;
252 252
253 child->ptrace |= PT_DTRACE; 253 child->ptrace |= PT_DTRACE;
diff --git a/arch/i386/kernel/relocate_kernel.S b/arch/i386/kernel/relocate_kernel.S
index d312616effa1..f151d6fae462 100644
--- a/arch/i386/kernel/relocate_kernel.S
+++ b/arch/i386/kernel/relocate_kernel.S
@@ -7,16 +7,138 @@
7 */ 7 */
8 8
9#include <linux/linkage.h> 9#include <linux/linkage.h>
10#include <asm/page.h>
11#include <asm/kexec.h>
12
13/*
14 * Must be relocatable PIC code callable as a C function
15 */
16
17#define PTR(x) (x << 2)
18#define PAGE_ALIGNED (1 << PAGE_SHIFT)
19#define PAGE_ATTR 0x63 /* _PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY */
20#define PAE_PGD_ATTR 0x01 /* _PAGE_PRESENT */
21
22 .text
23 .align PAGE_ALIGNED
24 .globl relocate_kernel
25relocate_kernel:
26 movl 8(%esp), %ebp /* list of pages */
27
28#ifdef CONFIG_X86_PAE
29 /* map the control page at its virtual address */
30
31 movl PTR(VA_PGD)(%ebp), %edi
32 movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
33 andl $0xc0000000, %eax
34 shrl $27, %eax
35 addl %edi, %eax
36
37 movl PTR(PA_PMD_0)(%ebp), %edx
38 orl $PAE_PGD_ATTR, %edx
39 movl %edx, (%eax)
40
41 movl PTR(VA_PMD_0)(%ebp), %edi
42 movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
43 andl $0x3fe00000, %eax
44 shrl $18, %eax
45 addl %edi, %eax
46
47 movl PTR(PA_PTE_0)(%ebp), %edx
48 orl $PAGE_ATTR, %edx
49 movl %edx, (%eax)
50
51 movl PTR(VA_PTE_0)(%ebp), %edi
52 movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
53 andl $0x001ff000, %eax
54 shrl $9, %eax
55 addl %edi, %eax
56
57 movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
58 orl $PAGE_ATTR, %edx
59 movl %edx, (%eax)
60
61 /* identity map the control page at its physical address */
62
63 movl PTR(VA_PGD)(%ebp), %edi
64 movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
65 andl $0xc0000000, %eax
66 shrl $27, %eax
67 addl %edi, %eax
68
69 movl PTR(PA_PMD_1)(%ebp), %edx
70 orl $PAE_PGD_ATTR, %edx
71 movl %edx, (%eax)
72
73 movl PTR(VA_PMD_1)(%ebp), %edi
74 movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
75 andl $0x3fe00000, %eax
76 shrl $18, %eax
77 addl %edi, %eax
78
79 movl PTR(PA_PTE_1)(%ebp), %edx
80 orl $PAGE_ATTR, %edx
81 movl %edx, (%eax)
82
83 movl PTR(VA_PTE_1)(%ebp), %edi
84 movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
85 andl $0x001ff000, %eax
86 shrl $9, %eax
87 addl %edi, %eax
88
89 movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
90 orl $PAGE_ATTR, %edx
91 movl %edx, (%eax)
92#else
93 /* map the control page at its virtual address */
94
95 movl PTR(VA_PGD)(%ebp), %edi
96 movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
97 andl $0xffc00000, %eax
98 shrl $20, %eax
99 addl %edi, %eax
100
101 movl PTR(PA_PTE_0)(%ebp), %edx
102 orl $PAGE_ATTR, %edx
103 movl %edx, (%eax)
104
105 movl PTR(VA_PTE_0)(%ebp), %edi
106 movl PTR(VA_CONTROL_PAGE)(%ebp), %eax
107 andl $0x003ff000, %eax
108 shrl $10, %eax
109 addl %edi, %eax
110
111 movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
112 orl $PAGE_ATTR, %edx
113 movl %edx, (%eax)
114
115 /* identity map the control page at its physical address */
116
117 movl PTR(VA_PGD)(%ebp), %edi
118 movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
119 andl $0xffc00000, %eax
120 shrl $20, %eax
121 addl %edi, %eax
122
123 movl PTR(PA_PTE_1)(%ebp), %edx
124 orl $PAGE_ATTR, %edx
125 movl %edx, (%eax)
126
127 movl PTR(VA_PTE_1)(%ebp), %edi
128 movl PTR(PA_CONTROL_PAGE)(%ebp), %eax
129 andl $0x003ff000, %eax
130 shrl $10, %eax
131 addl %edi, %eax
132
133 movl PTR(PA_CONTROL_PAGE)(%ebp), %edx
134 orl $PAGE_ATTR, %edx
135 movl %edx, (%eax)
136#endif
10 137
11 /*
12 * Must be relocatable PIC code callable as a C function, that once
13 * it starts can not use the previous processes stack.
14 */
15 .globl relocate_new_kernel
16relocate_new_kernel: 138relocate_new_kernel:
17 /* read the arguments and say goodbye to the stack */ 139 /* read the arguments and say goodbye to the stack */
18 movl 4(%esp), %ebx /* page_list */ 140 movl 4(%esp), %ebx /* page_list */
19 movl 8(%esp), %ebp /* reboot_code_buffer */ 141 movl 8(%esp), %ebp /* list of pages */
20 movl 12(%esp), %edx /* start address */ 142 movl 12(%esp), %edx /* start address */
21 movl 16(%esp), %ecx /* cpu_has_pae */ 143 movl 16(%esp), %ecx /* cpu_has_pae */
22 144
@@ -24,11 +146,26 @@ relocate_new_kernel:
24 pushl $0 146 pushl $0
25 popfl 147 popfl
26 148
27 /* set a new stack at the bottom of our page... */ 149 /* get physical address of control page now */
28 lea 4096(%ebp), %esp 150 /* this is impossible after page table switch */
151 movl PTR(PA_CONTROL_PAGE)(%ebp), %edi
29 152
30 /* store the parameters back on the stack */ 153 /* switch to new set of page tables */
31 pushl %edx /* store the start address */ 154 movl PTR(PA_PGD)(%ebp), %eax
155 movl %eax, %cr3
156
157 /* setup a new stack at the end of the physical control page */
158 lea 4096(%edi), %esp
159
160 /* jump to identity mapped page */
161 movl %edi, %eax
162 addl $(identity_mapped - relocate_kernel), %eax
163 pushl %eax
164 ret
165
166identity_mapped:
167 /* store the start address on the stack */
168 pushl %edx
32 169
33 /* Set cr0 to a known state: 170 /* Set cr0 to a known state:
34 * 31 0 == Paging disabled 171 * 31 0 == Paging disabled
@@ -113,8 +250,3 @@ relocate_new_kernel:
113 xorl %edi, %edi 250 xorl %edi, %edi
114 xorl %ebp, %ebp 251 xorl %ebp, %ebp
115 ret 252 ret
116relocate_new_kernel_end:
117
118 .globl relocate_new_kernel_size
119relocate_new_kernel_size:
120 .long relocate_new_kernel_end - relocate_new_kernel
diff --git a/arch/i386/kernel/semaphore.c b/arch/i386/kernel/semaphore.c
deleted file mode 100644
index 98352c374c76..000000000000
--- a/arch/i386/kernel/semaphore.c
+++ /dev/null
@@ -1,134 +0,0 @@
1/*
2 * i386 semaphore implementation.
3 *
4 * (C) Copyright 1999 Linus Torvalds
5 *
6 * Portions Copyright 1999 Red Hat, Inc.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 *
13 * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org>
14 */
15#include <asm/semaphore.h>
16
17/*
18 * The semaphore operations have a special calling sequence that
19 * allow us to do a simpler in-line version of them. These routines
20 * need to convert that sequence back into the C sequence when
21 * there is contention on the semaphore.
22 *
23 * %eax contains the semaphore pointer on entry. Save the C-clobbered
24 * registers (%eax, %edx and %ecx) except %eax whish is either a return
25 * value or just clobbered..
26 */
27asm(
28".section .sched.text\n"
29".align 4\n"
30".globl __down_failed\n"
31"__down_failed:\n\t"
32#if defined(CONFIG_FRAME_POINTER)
33 "pushl %ebp\n\t"
34 "movl %esp,%ebp\n\t"
35#endif
36 "pushl %edx\n\t"
37 "pushl %ecx\n\t"
38 "call __down\n\t"
39 "popl %ecx\n\t"
40 "popl %edx\n\t"
41#if defined(CONFIG_FRAME_POINTER)
42 "movl %ebp,%esp\n\t"
43 "popl %ebp\n\t"
44#endif
45 "ret"
46);
47
48asm(
49".section .sched.text\n"
50".align 4\n"
51".globl __down_failed_interruptible\n"
52"__down_failed_interruptible:\n\t"
53#if defined(CONFIG_FRAME_POINTER)
54 "pushl %ebp\n\t"
55 "movl %esp,%ebp\n\t"
56#endif
57 "pushl %edx\n\t"
58 "pushl %ecx\n\t"
59 "call __down_interruptible\n\t"
60 "popl %ecx\n\t"
61 "popl %edx\n\t"
62#if defined(CONFIG_FRAME_POINTER)
63 "movl %ebp,%esp\n\t"
64 "popl %ebp\n\t"
65#endif
66 "ret"
67);
68
69asm(
70".section .sched.text\n"
71".align 4\n"
72".globl __down_failed_trylock\n"
73"__down_failed_trylock:\n\t"
74#if defined(CONFIG_FRAME_POINTER)
75 "pushl %ebp\n\t"
76 "movl %esp,%ebp\n\t"
77#endif
78 "pushl %edx\n\t"
79 "pushl %ecx\n\t"
80 "call __down_trylock\n\t"
81 "popl %ecx\n\t"
82 "popl %edx\n\t"
83#if defined(CONFIG_FRAME_POINTER)
84 "movl %ebp,%esp\n\t"
85 "popl %ebp\n\t"
86#endif
87 "ret"
88);
89
90asm(
91".section .sched.text\n"
92".align 4\n"
93".globl __up_wakeup\n"
94"__up_wakeup:\n\t"
95 "pushl %edx\n\t"
96 "pushl %ecx\n\t"
97 "call __up\n\t"
98 "popl %ecx\n\t"
99 "popl %edx\n\t"
100 "ret"
101);
102
103/*
104 * rw spinlock fallbacks
105 */
106#if defined(CONFIG_SMP)
107asm(
108".section .sched.text\n"
109".align 4\n"
110".globl __write_lock_failed\n"
111"__write_lock_failed:\n\t"
112 LOCK_PREFIX "addl $" RW_LOCK_BIAS_STR ",(%eax)\n"
113"1: rep; nop\n\t"
114 "cmpl $" RW_LOCK_BIAS_STR ",(%eax)\n\t"
115 "jne 1b\n\t"
116 LOCK_PREFIX "subl $" RW_LOCK_BIAS_STR ",(%eax)\n\t"
117 "jnz __write_lock_failed\n\t"
118 "ret"
119);
120
121asm(
122".section .sched.text\n"
123".align 4\n"
124".globl __read_lock_failed\n"
125"__read_lock_failed:\n\t"
126 LOCK_PREFIX "incl (%eax)\n"
127"1: rep; nop\n\t"
128 "cmpl $1,(%eax)\n\t"
129 "js 1b\n\t"
130 LOCK_PREFIX "decl (%eax)\n\t"
131 "js __read_lock_failed\n\t"
132 "ret"
133);
134#endif
diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c
index 16d99444cf66..76a524b4c90f 100644
--- a/arch/i386/kernel/setup.c
+++ b/arch/i386/kernel/setup.c
@@ -90,18 +90,6 @@ EXPORT_SYMBOL(boot_cpu_data);
90 90
91unsigned long mmu_cr4_features; 91unsigned long mmu_cr4_features;
92 92
93#ifdef CONFIG_ACPI
94 int acpi_disabled = 0;
95#else
96 int acpi_disabled = 1;
97#endif
98EXPORT_SYMBOL(acpi_disabled);
99
100#ifdef CONFIG_ACPI
101int __initdata acpi_force = 0;
102extern acpi_interrupt_flags acpi_sci_flags;
103#endif
104
105/* for MCA, but anyone else can use it if they want */ 93/* for MCA, but anyone else can use it if they want */
106unsigned int machine_id; 94unsigned int machine_id;
107#ifdef CONFIG_MCA 95#ifdef CONFIG_MCA
@@ -149,7 +137,6 @@ EXPORT_SYMBOL(ist_info);
149struct e820map e820; 137struct e820map e820;
150 138
151extern void early_cpu_init(void); 139extern void early_cpu_init(void);
152extern void generic_apic_probe(char *);
153extern int root_mountflags; 140extern int root_mountflags;
154 141
155unsigned long saved_videomode; 142unsigned long saved_videomode;
@@ -701,238 +688,132 @@ static inline void copy_edd(void)
701} 688}
702#endif 689#endif
703 690
704static void __init parse_cmdline_early (char ** cmdline_p) 691static int __initdata user_defined_memmap = 0;
705{
706 char c = ' ', *to = command_line, *from = saved_command_line;
707 int len = 0;
708 int userdef = 0;
709 692
710 /* Save unparsed command line copy for /proc/cmdline */ 693/*
711 saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; 694 * "mem=nopentium" disables the 4MB page tables.
695 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM
696 * to <mem>, overriding the bios size.
697 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from
698 * <start> to <start>+<mem>, overriding the bios size.
699 *
700 * HPA tells me bootloaders need to parse mem=, so no new
701 * option should be mem= [also see Documentation/i386/boot.txt]
702 */
703static int __init parse_mem(char *arg)
704{
705 if (!arg)
706 return -EINVAL;
712 707
713 for (;;) { 708 if (strcmp(arg, "nopentium") == 0) {
714 if (c != ' ') 709 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
715 goto next_char; 710 disable_pse = 1;
716 /* 711 } else {
717 * "mem=nopentium" disables the 4MB page tables. 712 /* If the user specifies memory size, we
718 * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM 713 * limit the BIOS-provided memory map to
719 * to <mem>, overriding the bios size. 714 * that size. exactmap can be used to specify
720 * "memmap=XXX[KkmM]@XXX[KkmM]" defines a memory region from 715 * the exact map. mem=number can be used to
721 * <start> to <start>+<mem>, overriding the bios size. 716 * trim the existing memory map.
722 *
723 * HPA tells me bootloaders need to parse mem=, so no new
724 * option should be mem= [also see Documentation/i386/boot.txt]
725 */ 717 */
726 if (!memcmp(from, "mem=", 4)) { 718 unsigned long long mem_size;
727 if (to != command_line)
728 to--;
729 if (!memcmp(from+4, "nopentium", 9)) {
730 from += 9+4;
731 clear_bit(X86_FEATURE_PSE, boot_cpu_data.x86_capability);
732 disable_pse = 1;
733 } else {
734 /* If the user specifies memory size, we
735 * limit the BIOS-provided memory map to
736 * that size. exactmap can be used to specify
737 * the exact map. mem=number can be used to
738 * trim the existing memory map.
739 */
740 unsigned long long mem_size;
741 719
742 mem_size = memparse(from+4, &from); 720 mem_size = memparse(arg, &arg);
743 limit_regions(mem_size); 721 limit_regions(mem_size);
744 userdef=1; 722 user_defined_memmap = 1;
745 } 723 }
746 } 724 return 0;
747 725}
748 else if (!memcmp(from, "memmap=", 7)) { 726early_param("mem", parse_mem);
749 if (to != command_line)
750 to--;
751 if (!memcmp(from+7, "exactmap", 8)) {
752#ifdef CONFIG_CRASH_DUMP
753 /* If we are doing a crash dump, we
754 * still need to know the real mem
755 * size before original memory map is
756 * reset.
757 */
758 find_max_pfn();
759 saved_max_pfn = max_pfn;
760#endif
761 from += 8+7;
762 e820.nr_map = 0;
763 userdef = 1;
764 } else {
765 /* If the user specifies memory size, we
766 * limit the BIOS-provided memory map to
767 * that size. exactmap can be used to specify
768 * the exact map. mem=number can be used to
769 * trim the existing memory map.
770 */
771 unsigned long long start_at, mem_size;
772
773 mem_size = memparse(from+7, &from);
774 if (*from == '@') {
775 start_at = memparse(from+1, &from);
776 add_memory_region(start_at, mem_size, E820_RAM);
777 } else if (*from == '#') {
778 start_at = memparse(from+1, &from);
779 add_memory_region(start_at, mem_size, E820_ACPI);
780 } else if (*from == '$') {
781 start_at = memparse(from+1, &from);
782 add_memory_region(start_at, mem_size, E820_RESERVED);
783 } else {
784 limit_regions(mem_size);
785 userdef=1;
786 }
787 }
788 }
789
790 else if (!memcmp(from, "noexec=", 7))
791 noexec_setup(from + 7);
792 727
728static int __init parse_memmap(char *arg)
729{
730 if (!arg)
731 return -EINVAL;
793 732
794#ifdef CONFIG_X86_SMP 733 if (strcmp(arg, "exactmap") == 0) {
795 /* 734#ifdef CONFIG_CRASH_DUMP
796 * If the BIOS enumerates physical processors before logical, 735 /* If we are doing a crash dump, we
797 * maxcpus=N at enumeration-time can be used to disable HT. 736 * still need to know the real mem
737 * size before original memory map is
738 * reset.
798 */ 739 */
799 else if (!memcmp(from, "maxcpus=", 8)) { 740 find_max_pfn();
800 extern unsigned int maxcpus; 741 saved_max_pfn = max_pfn;
801
802 maxcpus = simple_strtoul(from + 8, NULL, 0);
803 }
804#endif 742#endif
805 743 e820.nr_map = 0;
806#ifdef CONFIG_ACPI 744 user_defined_memmap = 1;
807 /* "acpi=off" disables both ACPI table parsing and interpreter */ 745 } else {
808 else if (!memcmp(from, "acpi=off", 8)) { 746 /* If the user specifies memory size, we
809 disable_acpi(); 747 * limit the BIOS-provided memory map to
810 } 748 * that size. exactmap can be used to specify
811 749 * the exact map. mem=number can be used to
812 /* acpi=force to over-ride black-list */ 750 * trim the existing memory map.
813 else if (!memcmp(from, "acpi=force", 10)) { 751 */
814 acpi_force = 1; 752 unsigned long long start_at, mem_size;
815 acpi_ht = 1; 753
816 acpi_disabled = 0; 754 mem_size = memparse(arg, &arg);
817 } 755 if (*arg == '@') {
818 756 start_at = memparse(arg+1, &arg);
819 /* acpi=strict disables out-of-spec workarounds */ 757 add_memory_region(start_at, mem_size, E820_RAM);
820 else if (!memcmp(from, "acpi=strict", 11)) { 758 } else if (*arg == '#') {
821 acpi_strict = 1; 759 start_at = memparse(arg+1, &arg);
822 } 760 add_memory_region(start_at, mem_size, E820_ACPI);
823 761 } else if (*arg == '$') {
824 /* Limit ACPI just to boot-time to enable HT */ 762 start_at = memparse(arg+1, &arg);
825 else if (!memcmp(from, "acpi=ht", 7)) { 763 add_memory_region(start_at, mem_size, E820_RESERVED);
826 if (!acpi_force) 764 } else {
827 disable_acpi(); 765 limit_regions(mem_size);
828 acpi_ht = 1; 766 user_defined_memmap = 1;
829 }
830
831 /* "pci=noacpi" disable ACPI IRQ routing and PCI scan */
832 else if (!memcmp(from, "pci=noacpi", 10)) {
833 acpi_disable_pci();
834 }
835 /* "acpi=noirq" disables ACPI interrupt routing */
836 else if (!memcmp(from, "acpi=noirq", 10)) {
837 acpi_noirq_set();
838 } 767 }
768 }
769 return 0;
770}
771early_param("memmap", parse_memmap);
839 772
840 else if (!memcmp(from, "acpi_sci=edge", 13)) 773#ifdef CONFIG_PROC_VMCORE
841 acpi_sci_flags.trigger = 1; 774/* elfcorehdr= specifies the location of elf core header
842 775 * stored by the crashed kernel.
843 else if (!memcmp(from, "acpi_sci=level", 14)) 776 */
844 acpi_sci_flags.trigger = 3; 777static int __init parse_elfcorehdr(char *arg)
845 778{
846 else if (!memcmp(from, "acpi_sci=high", 13)) 779 if (!arg)
847 acpi_sci_flags.polarity = 1; 780 return -EINVAL;
848
849 else if (!memcmp(from, "acpi_sci=low", 12))
850 acpi_sci_flags.polarity = 3;
851
852#ifdef CONFIG_X86_IO_APIC
853 else if (!memcmp(from, "acpi_skip_timer_override", 24))
854 acpi_skip_timer_override = 1;
855
856 if (!memcmp(from, "disable_timer_pin_1", 19))
857 disable_timer_pin_1 = 1;
858 if (!memcmp(from, "enable_timer_pin_1", 18))
859 disable_timer_pin_1 = -1;
860 781
861 /* disable IO-APIC */ 782 elfcorehdr_addr = memparse(arg, &arg);
862 else if (!memcmp(from, "noapic", 6)) 783 return 0;
863 disable_ioapic_setup(); 784}
864#endif /* CONFIG_X86_IO_APIC */ 785early_param("elfcorehdr", parse_elfcorehdr);
865#endif /* CONFIG_ACPI */ 786#endif /* CONFIG_PROC_VMCORE */
866 787
867#ifdef CONFIG_X86_LOCAL_APIC 788/*
868 /* enable local APIC */ 789 * highmem=size forces highmem to be exactly 'size' bytes.
869 else if (!memcmp(from, "lapic", 5)) 790 * This works even on boxes that have no highmem otherwise.
870 lapic_enable(); 791 * This also works to reduce highmem size on bigger boxes.
792 */
793static int __init parse_highmem(char *arg)
794{
795 if (!arg)
796 return -EINVAL;
871 797
872 /* disable local APIC */ 798 highmem_pages = memparse(arg, &arg) >> PAGE_SHIFT;
873 else if (!memcmp(from, "nolapic", 6)) 799 return 0;
874 lapic_disable(); 800}
875#endif /* CONFIG_X86_LOCAL_APIC */ 801early_param("highmem", parse_highmem);
876 802
877#ifdef CONFIG_KEXEC 803/*
878 /* crashkernel=size@addr specifies the location to reserve for 804 * vmalloc=size forces the vmalloc area to be exactly 'size'
879 * a crash kernel. By reserving this memory we guarantee 805 * bytes. This can be used to increase (or decrease) the
880 * that linux never set's it up as a DMA target. 806 * vmalloc area - the default is 128m.
881 * Useful for holding code to do something appropriate 807 */
882 * after a kernel panic. 808static int __init parse_vmalloc(char *arg)
883 */ 809{
884 else if (!memcmp(from, "crashkernel=", 12)) { 810 if (!arg)
885 unsigned long size, base; 811 return -EINVAL;
886 size = memparse(from+12, &from);
887 if (*from == '@') {
888 base = memparse(from+1, &from);
889 /* FIXME: Do I want a sanity check
890 * to validate the memory range?
891 */
892 crashk_res.start = base;
893 crashk_res.end = base + size - 1;
894 }
895 }
896#endif
897#ifdef CONFIG_PROC_VMCORE
898 /* elfcorehdr= specifies the location of elf core header
899 * stored by the crashed kernel.
900 */
901 else if (!memcmp(from, "elfcorehdr=", 11))
902 elfcorehdr_addr = memparse(from+11, &from);
903#endif
904 812
905 /* 813 __VMALLOC_RESERVE = memparse(arg, &arg);
906 * highmem=size forces highmem to be exactly 'size' bytes. 814 return 0;
907 * This works even on boxes that have no highmem otherwise.
908 * This also works to reduce highmem size on bigger boxes.
909 */
910 else if (!memcmp(from, "highmem=", 8))
911 highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT;
912
913 /*
914 * vmalloc=size forces the vmalloc area to be exactly 'size'
915 * bytes. This can be used to increase (or decrease) the
916 * vmalloc area - the default is 128m.
917 */
918 else if (!memcmp(from, "vmalloc=", 8))
919 __VMALLOC_RESERVE = memparse(from+8, &from);
920
921 next_char:
922 c = *(from++);
923 if (!c)
924 break;
925 if (COMMAND_LINE_SIZE <= ++len)
926 break;
927 *(to++) = c;
928 }
929 *to = '\0';
930 *cmdline_p = command_line;
931 if (userdef) {
932 printk(KERN_INFO "user-defined physical RAM map:\n");
933 print_memory_map("user");
934 }
935} 815}
816early_param("vmalloc", parse_vmalloc);
936 817
937/* 818/*
938 * reservetop=size reserves a hole at the top of the kernel address space which 819 * reservetop=size reserves a hole at the top of the kernel address space which
@@ -1189,6 +1070,14 @@ static unsigned long __init setup_memory(void)
1189 } 1070 }
1190 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", 1071 printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
1191 pages_to_mb(highend_pfn - highstart_pfn)); 1072 pages_to_mb(highend_pfn - highstart_pfn));
1073 num_physpages = highend_pfn;
1074 high_memory = (void *) __va(highstart_pfn * PAGE_SIZE - 1) + 1;
1075#else
1076 num_physpages = max_low_pfn;
1077 high_memory = (void *) __va(max_low_pfn * PAGE_SIZE - 1) + 1;
1078#endif
1079#ifdef CONFIG_FLATMEM
1080 max_mapnr = num_physpages;
1192#endif 1081#endif
1193 printk(KERN_NOTICE "%ldMB LOWMEM available.\n", 1082 printk(KERN_NOTICE "%ldMB LOWMEM available.\n",
1194 pages_to_mb(max_low_pfn)); 1083 pages_to_mb(max_low_pfn));
@@ -1518,17 +1407,15 @@ void __init setup_arch(char **cmdline_p)
1518 data_resource.start = virt_to_phys(_etext); 1407 data_resource.start = virt_to_phys(_etext);
1519 data_resource.end = virt_to_phys(_edata)-1; 1408 data_resource.end = virt_to_phys(_edata)-1;
1520 1409
1521 parse_cmdline_early(cmdline_p); 1410 parse_early_param();
1522 1411
1523#ifdef CONFIG_EARLY_PRINTK 1412 if (user_defined_memmap) {
1524 { 1413 printk(KERN_INFO "user-defined physical RAM map:\n");
1525 char *s = strstr(*cmdline_p, "earlyprintk="); 1414 print_memory_map("user");
1526 if (s) {
1527 setup_early_printk(strchr(s, '=') + 1);
1528 printk("early console enabled\n");
1529 }
1530 } 1415 }
1531#endif 1416
1417 strlcpy(command_line, saved_command_line, COMMAND_LINE_SIZE);
1418 *cmdline_p = command_line;
1532 1419
1533 max_low_pfn = setup_memory(); 1420 max_low_pfn = setup_memory();
1534 1421
@@ -1557,7 +1444,7 @@ void __init setup_arch(char **cmdline_p)
1557 dmi_scan_machine(); 1444 dmi_scan_machine();
1558 1445
1559#ifdef CONFIG_X86_GENERICARCH 1446#ifdef CONFIG_X86_GENERICARCH
1560 generic_apic_probe(*cmdline_p); 1447 generic_apic_probe();
1561#endif 1448#endif
1562 if (efi_enabled) 1449 if (efi_enabled)
1563 efi_map_memmap(); 1450 efi_map_memmap();
@@ -1569,9 +1456,11 @@ void __init setup_arch(char **cmdline_p)
1569 acpi_boot_table_init(); 1456 acpi_boot_table_init();
1570#endif 1457#endif
1571 1458
1459#ifdef CONFIG_PCI
1572#ifdef CONFIG_X86_IO_APIC 1460#ifdef CONFIG_X86_IO_APIC
1573 check_acpi_pci(); /* Checks more than just ACPI actually */ 1461 check_acpi_pci(); /* Checks more than just ACPI actually */
1574#endif 1462#endif
1463#endif
1575 1464
1576#ifdef CONFIG_ACPI 1465#ifdef CONFIG_ACPI
1577 acpi_boot_init(); 1466 acpi_boot_init();
diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c
index efe07990e7fc..020d873b7d21 100644
--- a/arch/i386/kernel/smpboot.c
+++ b/arch/i386/kernel/smpboot.c
@@ -177,6 +177,9 @@ static void __devinit smp_store_cpu_info(int id)
177 */ 177 */
178 if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) { 178 if ((c->x86_vendor == X86_VENDOR_AMD) && (c->x86 == 6)) {
179 179
180 if (num_possible_cpus() == 1)
181 goto valid_k7;
182
180 /* Athlon 660/661 is valid. */ 183 /* Athlon 660/661 is valid. */
181 if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1))) 184 if ((c->x86_model==6) && ((c->x86_mask==0) || (c->x86_mask==1)))
182 goto valid_k7; 185 goto valid_k7;
@@ -1376,7 +1379,8 @@ int __cpu_disable(void)
1376 */ 1379 */
1377 if (cpu == 0) 1380 if (cpu == 0)
1378 return -EBUSY; 1381 return -EBUSY;
1379 1382 if (nmi_watchdog == NMI_LOCAL_APIC)
1383 stop_apic_nmi_watchdog(NULL);
1380 clear_local_APIC(); 1384 clear_local_APIC();
1381 /* Allow any queued timer interrupts to get serviced */ 1385 /* Allow any queued timer interrupts to get serviced */
1382 local_irq_enable(); 1386 local_irq_enable();
@@ -1490,3 +1494,16 @@ void __init smp_intr_init(void)
1490 /* IPI for generic function call */ 1494 /* IPI for generic function call */
1491 set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 1495 set_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
1492} 1496}
1497
1498/*
1499 * If the BIOS enumerates physical processors before logical,
1500 * maxcpus=N at enumeration-time can be used to disable HT.
1501 */
1502static int __init parse_maxcpus(char *arg)
1503{
1504 extern unsigned int maxcpus;
1505
1506 maxcpus = simple_strtoul(arg, NULL, 0);
1507 return 0;
1508}
1509early_param("maxcpus", parse_maxcpus);
diff --git a/arch/i386/kernel/stacktrace.c b/arch/i386/kernel/stacktrace.c
deleted file mode 100644
index e62a037ab399..000000000000
--- a/arch/i386/kernel/stacktrace.c
+++ /dev/null
@@ -1,98 +0,0 @@
1/*
2 * arch/i386/kernel/stacktrace.c
3 *
4 * Stack trace management functions
5 *
6 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
7 */
8#include <linux/sched.h>
9#include <linux/stacktrace.h>
10
11static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
12{
13 return p > (void *)tinfo &&
14 p < (void *)tinfo + THREAD_SIZE - 3;
15}
16
17/*
18 * Save stack-backtrace addresses into a stack_trace buffer:
19 */
20static inline unsigned long
21save_context_stack(struct stack_trace *trace, unsigned int skip,
22 struct thread_info *tinfo, unsigned long *stack,
23 unsigned long ebp)
24{
25 unsigned long addr;
26
27#ifdef CONFIG_FRAME_POINTER
28 while (valid_stack_ptr(tinfo, (void *)ebp)) {
29 addr = *(unsigned long *)(ebp + 4);
30 if (!skip)
31 trace->entries[trace->nr_entries++] = addr;
32 else
33 skip--;
34 if (trace->nr_entries >= trace->max_entries)
35 break;
36 /*
37 * break out of recursive entries (such as
38 * end_of_stack_stop_unwind_function):
39 */
40 if (ebp == *(unsigned long *)ebp)
41 break;
42
43 ebp = *(unsigned long *)ebp;
44 }
45#else
46 while (valid_stack_ptr(tinfo, stack)) {
47 addr = *stack++;
48 if (__kernel_text_address(addr)) {
49 if (!skip)
50 trace->entries[trace->nr_entries++] = addr;
51 else
52 skip--;
53 if (trace->nr_entries >= trace->max_entries)
54 break;
55 }
56 }
57#endif
58
59 return ebp;
60}
61
62/*
63 * Save stack-backtrace addresses into a stack_trace buffer.
64 * If all_contexts is set, all contexts (hardirq, softirq and process)
65 * are saved. If not set then only the current context is saved.
66 */
67void save_stack_trace(struct stack_trace *trace,
68 struct task_struct *task, int all_contexts,
69 unsigned int skip)
70{
71 unsigned long ebp;
72 unsigned long *stack = &ebp;
73
74 WARN_ON(trace->nr_entries || !trace->max_entries);
75
76 if (!task || task == current) {
77 /* Grab ebp right from our regs: */
78 asm ("movl %%ebp, %0" : "=r" (ebp));
79 } else {
80 /* ebp is the last reg pushed by switch_to(): */
81 ebp = *(unsigned long *) task->thread.esp;
82 }
83
84 while (1) {
85 struct thread_info *context = (struct thread_info *)
86 ((unsigned long)stack & (~(THREAD_SIZE - 1)));
87
88 ebp = save_context_stack(trace, skip, context, stack, ebp);
89 stack = (unsigned long *)context->previous_esp;
90 if (!all_contexts || !stack ||
91 trace->nr_entries >= trace->max_entries)
92 break;
93 trace->entries[trace->nr_entries++] = ULONG_MAX;
94 if (trace->nr_entries >= trace->max_entries)
95 break;
96 }
97}
98
diff --git a/arch/i386/kernel/syscall_table.S b/arch/i386/kernel/syscall_table.S
index dd63d4775398..7e639f78b0b9 100644
--- a/arch/i386/kernel/syscall_table.S
+++ b/arch/i386/kernel/syscall_table.S
@@ -317,3 +317,4 @@ ENTRY(sys_call_table)
317 .long sys_tee /* 315 */ 317 .long sys_tee /* 315 */
318 .long sys_vmsplice 318 .long sys_vmsplice
319 .long sys_move_pages 319 .long sys_move_pages
320 .long sys_getcpu
diff --git a/arch/i386/kernel/time.c b/arch/i386/kernel/time.c
index 1302e4ab3c4f..86944acfb647 100644
--- a/arch/i386/kernel/time.c
+++ b/arch/i386/kernel/time.c
@@ -130,18 +130,33 @@ static int set_rtc_mmss(unsigned long nowtime)
130 130
131int timer_ack; 131int timer_ack;
132 132
133#if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER)
134unsigned long profile_pc(struct pt_regs *regs) 133unsigned long profile_pc(struct pt_regs *regs)
135{ 134{
136 unsigned long pc = instruction_pointer(regs); 135 unsigned long pc = instruction_pointer(regs);
137 136
138 if (!user_mode_vm(regs) && in_lock_functions(pc)) 137#ifdef CONFIG_SMP
138 if (!user_mode_vm(regs) && in_lock_functions(pc)) {
139#ifdef CONFIG_FRAME_POINTER
139 return *(unsigned long *)(regs->ebp + 4); 140 return *(unsigned long *)(regs->ebp + 4);
140 141#else
142 unsigned long *sp;
143 if ((regs->xcs & 3) == 0)
144 sp = (unsigned long *)&regs->esp;
145 else
146 sp = (unsigned long *)regs->esp;
147 /* Return address is either directly at stack pointer
148 or above a saved eflags. Eflags has bits 22-31 zero,
149 kernel addresses don't. */
150 if (sp[0] >> 22)
151 return sp[0];
152 if (sp[1] >> 22)
153 return sp[1];
154#endif
155 }
156#endif
141 return pc; 157 return pc;
142} 158}
143EXPORT_SYMBOL(profile_pc); 159EXPORT_SYMBOL(profile_pc);
144#endif
145 160
146/* 161/*
147 * This is the same as the above, except we _also_ save the current 162 * This is the same as the above, except we _also_ save the current
diff --git a/arch/i386/kernel/topology.c b/arch/i386/kernel/topology.c
index e2e281d4bcc8..07d6da36a825 100644
--- a/arch/i386/kernel/topology.c
+++ b/arch/i386/kernel/topology.c
@@ -28,6 +28,7 @@
28#include <linux/init.h> 28#include <linux/init.h>
29#include <linux/smp.h> 29#include <linux/smp.h>
30#include <linux/nodemask.h> 30#include <linux/nodemask.h>
31#include <linux/mmzone.h>
31#include <asm/cpu.h> 32#include <asm/cpu.h>
32 33
33static struct i386_cpu cpu_devices[NR_CPUS]; 34static struct i386_cpu cpu_devices[NR_CPUS];
@@ -55,34 +56,18 @@ EXPORT_SYMBOL(arch_register_cpu);
55EXPORT_SYMBOL(arch_unregister_cpu); 56EXPORT_SYMBOL(arch_unregister_cpu);
56#endif /*CONFIG_HOTPLUG_CPU*/ 57#endif /*CONFIG_HOTPLUG_CPU*/
57 58
58
59
60#ifdef CONFIG_NUMA
61#include <linux/mmzone.h>
62
63static int __init topology_init(void) 59static int __init topology_init(void)
64{ 60{
65 int i; 61 int i;
66 62
63#ifdef CONFIG_NUMA
67 for_each_online_node(i) 64 for_each_online_node(i)
68 register_one_node(i); 65 register_one_node(i);
66#endif /* CONFIG_NUMA */
69 67
70 for_each_present_cpu(i) 68 for_each_present_cpu(i)
71 arch_register_cpu(i); 69 arch_register_cpu(i);
72 return 0; 70 return 0;
73} 71}
74 72
75#else /* !CONFIG_NUMA */
76
77static int __init topology_init(void)
78{
79 int i;
80
81 for_each_present_cpu(i)
82 arch_register_cpu(i);
83 return 0;
84}
85
86#endif /* CONFIG_NUMA */
87
88subsys_initcall(topology_init); 73subsys_initcall(topology_init);
diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c
index 4fcc6690be99..21aa1cd57773 100644
--- a/arch/i386/kernel/traps.c
+++ b/arch/i386/kernel/traps.c
@@ -51,6 +51,7 @@
51#include <asm/smp.h> 51#include <asm/smp.h>
52#include <asm/arch_hooks.h> 52#include <asm/arch_hooks.h>
53#include <asm/kdebug.h> 53#include <asm/kdebug.h>
54#include <asm/stacktrace.h>
54 55
55#include <linux/module.h> 56#include <linux/module.h>
56 57
@@ -118,26 +119,16 @@ static inline int valid_stack_ptr(struct thread_info *tinfo, void *p)
118 p < (void *)tinfo + THREAD_SIZE - 3; 119 p < (void *)tinfo + THREAD_SIZE - 3;
119} 120}
120 121
121/*
122 * Print one address/symbol entries per line.
123 */
124static inline void print_addr_and_symbol(unsigned long addr, char *log_lvl)
125{
126 printk(" [<%08lx>] ", addr);
127
128 print_symbol("%s\n", addr);
129}
130
131static inline unsigned long print_context_stack(struct thread_info *tinfo, 122static inline unsigned long print_context_stack(struct thread_info *tinfo,
132 unsigned long *stack, unsigned long ebp, 123 unsigned long *stack, unsigned long ebp,
133 char *log_lvl) 124 struct stacktrace_ops *ops, void *data)
134{ 125{
135 unsigned long addr; 126 unsigned long addr;
136 127
137#ifdef CONFIG_FRAME_POINTER 128#ifdef CONFIG_FRAME_POINTER
138 while (valid_stack_ptr(tinfo, (void *)ebp)) { 129 while (valid_stack_ptr(tinfo, (void *)ebp)) {
139 addr = *(unsigned long *)(ebp + 4); 130 addr = *(unsigned long *)(ebp + 4);
140 print_addr_and_symbol(addr, log_lvl); 131 ops->address(data, addr);
141 /* 132 /*
142 * break out of recursive entries (such as 133 * break out of recursive entries (such as
143 * end_of_stack_stop_unwind_function): 134 * end_of_stack_stop_unwind_function):
@@ -150,30 +141,37 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
150 while (valid_stack_ptr(tinfo, stack)) { 141 while (valid_stack_ptr(tinfo, stack)) {
151 addr = *stack++; 142 addr = *stack++;
152 if (__kernel_text_address(addr)) 143 if (__kernel_text_address(addr))
153 print_addr_and_symbol(addr, log_lvl); 144 ops->address(data, addr);
154 } 145 }
155#endif 146#endif
156 return ebp; 147 return ebp;
157} 148}
158 149
150struct ops_and_data {
151 struct stacktrace_ops *ops;
152 void *data;
153};
154
159static asmlinkage int 155static asmlinkage int
160show_trace_unwind(struct unwind_frame_info *info, void *log_lvl) 156dump_trace_unwind(struct unwind_frame_info *info, void *data)
161{ 157{
158 struct ops_and_data *oad = (struct ops_and_data *)data;
162 int n = 0; 159 int n = 0;
163 160
164 while (unwind(info) == 0 && UNW_PC(info)) { 161 while (unwind(info) == 0 && UNW_PC(info)) {
165 n++; 162 n++;
166 print_addr_and_symbol(UNW_PC(info), log_lvl); 163 oad->ops->address(oad->data, UNW_PC(info));
167 if (arch_unw_user_mode(info)) 164 if (arch_unw_user_mode(info))
168 break; 165 break;
169 } 166 }
170 return n; 167 return n;
171} 168}
172 169
173static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs, 170void dump_trace(struct task_struct *task, struct pt_regs *regs,
174 unsigned long *stack, char *log_lvl) 171 unsigned long *stack,
172 struct stacktrace_ops *ops, void *data)
175{ 173{
176 unsigned long ebp; 174 unsigned long ebp = 0;
177 175
178 if (!task) 176 if (!task)
179 task = current; 177 task = current;
@@ -181,54 +179,116 @@ static void show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
181 if (call_trace >= 0) { 179 if (call_trace >= 0) {
182 int unw_ret = 0; 180 int unw_ret = 0;
183 struct unwind_frame_info info; 181 struct unwind_frame_info info;
182 struct ops_and_data oad = { .ops = ops, .data = data };
184 183
185 if (regs) { 184 if (regs) {
186 if (unwind_init_frame_info(&info, task, regs) == 0) 185 if (unwind_init_frame_info(&info, task, regs) == 0)
187 unw_ret = show_trace_unwind(&info, log_lvl); 186 unw_ret = dump_trace_unwind(&info, &oad);
188 } else if (task == current) 187 } else if (task == current)
189 unw_ret = unwind_init_running(&info, show_trace_unwind, log_lvl); 188 unw_ret = unwind_init_running(&info, dump_trace_unwind, &oad);
190 else { 189 else {
191 if (unwind_init_blocked(&info, task) == 0) 190 if (unwind_init_blocked(&info, task) == 0)
192 unw_ret = show_trace_unwind(&info, log_lvl); 191 unw_ret = dump_trace_unwind(&info, &oad);
193 } 192 }
194 if (unw_ret > 0) { 193 if (unw_ret > 0) {
195 if (call_trace == 1 && !arch_unw_user_mode(&info)) { 194 if (call_trace == 1 && !arch_unw_user_mode(&info)) {
196 print_symbol("DWARF2 unwinder stuck at %s\n", 195 ops->warning_symbol(data, "DWARF2 unwinder stuck at %s\n",
197 UNW_PC(&info)); 196 UNW_PC(&info));
198 if (UNW_SP(&info) >= PAGE_OFFSET) { 197 if (UNW_SP(&info) >= PAGE_OFFSET) {
199 printk("Leftover inexact backtrace:\n"); 198 ops->warning(data, "Leftover inexact backtrace:\n");
200 stack = (void *)UNW_SP(&info); 199 stack = (void *)UNW_SP(&info);
200 if (!stack)
201 return;
202 ebp = UNW_FP(&info);
201 } else 203 } else
202 printk("Full inexact backtrace again:\n"); 204 ops->warning(data, "Full inexact backtrace again:\n");
203 } else if (call_trace >= 1) 205 } else if (call_trace >= 1)
204 return; 206 return;
205 else 207 else
206 printk("Full inexact backtrace again:\n"); 208 ops->warning(data, "Full inexact backtrace again:\n");
207 } else 209 } else
208 printk("Inexact backtrace:\n"); 210 ops->warning(data, "Inexact backtrace:\n");
211 }
212 if (!stack) {
213 unsigned long dummy;
214 stack = &dummy;
215 if (task && task != current)
216 stack = (unsigned long *)task->thread.esp;
209 } 217 }
210 218
211 if (task == current) { 219#ifdef CONFIG_FRAME_POINTER
212 /* Grab ebp right from our regs */ 220 if (!ebp) {
213 asm ("movl %%ebp, %0" : "=r" (ebp) : ); 221 if (task == current) {
214 } else { 222 /* Grab ebp right from our regs */
215 /* ebp is the last reg pushed by switch_to */ 223 asm ("movl %%ebp, %0" : "=r" (ebp) : );
216 ebp = *(unsigned long *) task->thread.esp; 224 } else {
225 /* ebp is the last reg pushed by switch_to */
226 ebp = *(unsigned long *) task->thread.esp;
227 }
217 } 228 }
229#endif
218 230
219 while (1) { 231 while (1) {
220 struct thread_info *context; 232 struct thread_info *context;
221 context = (struct thread_info *) 233 context = (struct thread_info *)
222 ((unsigned long)stack & (~(THREAD_SIZE - 1))); 234 ((unsigned long)stack & (~(THREAD_SIZE - 1)));
223 ebp = print_context_stack(context, stack, ebp, log_lvl); 235 ebp = print_context_stack(context, stack, ebp, ops, data);
236 /* Should be after the line below, but somewhere
237 in early boot context comes out corrupted and we
238 can't reference it -AK */
239 if (ops->stack(data, "IRQ") < 0)
240 break;
224 stack = (unsigned long*)context->previous_esp; 241 stack = (unsigned long*)context->previous_esp;
225 if (!stack) 242 if (!stack)
226 break; 243 break;
227 printk("%s =======================\n", log_lvl);
228 } 244 }
229} 245}
246EXPORT_SYMBOL(dump_trace);
247
248static void
249print_trace_warning_symbol(void *data, char *msg, unsigned long symbol)
250{
251 printk(data);
252 print_symbol(msg, symbol);
253 printk("\n");
254}
255
256static void print_trace_warning(void *data, char *msg)
257{
258 printk("%s%s\n", (char *)data, msg);
259}
260
261static int print_trace_stack(void *data, char *name)
262{
263 return 0;
264}
265
266/*
267 * Print one address/symbol entries per line.
268 */
269static void print_trace_address(void *data, unsigned long addr)
270{
271 printk("%s [<%08lx>] ", (char *)data, addr);
272 print_symbol("%s\n", addr);
273}
274
275static struct stacktrace_ops print_trace_ops = {
276 .warning = print_trace_warning,
277 .warning_symbol = print_trace_warning_symbol,
278 .stack = print_trace_stack,
279 .address = print_trace_address,
280};
281
282static void
283show_trace_log_lvl(struct task_struct *task, struct pt_regs *regs,
284 unsigned long * stack, char *log_lvl)
285{
286 dump_trace(task, regs, stack, &print_trace_ops, log_lvl);
287 printk("%s =======================\n", log_lvl);
288}
230 289
231void show_trace(struct task_struct *task, struct pt_regs *regs, unsigned long * stack) 290void show_trace(struct task_struct *task, struct pt_regs *regs,
291 unsigned long * stack)
232{ 292{
233 show_trace_log_lvl(task, regs, stack, ""); 293 show_trace_log_lvl(task, regs, stack, "");
234} 294}
@@ -291,8 +351,9 @@ void show_registers(struct pt_regs *regs)
291 ss = regs->xss & 0xffff; 351 ss = regs->xss & 0xffff;
292 } 352 }
293 print_modules(); 353 print_modules();
294 printk(KERN_EMERG "CPU: %d\nEIP: %04x:[<%08lx>] %s VLI\n" 354 printk(KERN_EMERG "CPU: %d\n"
295 "EFLAGS: %08lx (%s %.*s) \n", 355 KERN_EMERG "EIP: %04x:[<%08lx>] %s VLI\n"
356 KERN_EMERG "EFLAGS: %08lx (%s %.*s)\n",
296 smp_processor_id(), 0xffff & regs->xcs, regs->eip, 357 smp_processor_id(), 0xffff & regs->xcs, regs->eip,
297 print_tainted(), regs->eflags, system_utsname.release, 358 print_tainted(), regs->eflags, system_utsname.release,
298 (int)strcspn(system_utsname.version, " "), 359 (int)strcspn(system_utsname.version, " "),
@@ -634,18 +695,24 @@ gp_in_kernel:
634 } 695 }
635} 696}
636 697
637static void mem_parity_error(unsigned char reason, struct pt_regs * regs) 698static __kprobes void
699mem_parity_error(unsigned char reason, struct pt_regs * regs)
638{ 700{
639 printk(KERN_EMERG "Uhhuh. NMI received. Dazed and confused, but trying " 701 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
640 "to continue\n"); 702 "CPU %d.\n", reason, smp_processor_id());
641 printk(KERN_EMERG "You probably have a hardware problem with your RAM " 703 printk(KERN_EMERG "You probably have a hardware problem with your RAM "
642 "chips\n"); 704 "chips\n");
705 if (panic_on_unrecovered_nmi)
706 panic("NMI: Not continuing");
707
708 printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
643 709
644 /* Clear and disable the memory parity error line. */ 710 /* Clear and disable the memory parity error line. */
645 clear_mem_error(reason); 711 clear_mem_error(reason);
646} 712}
647 713
648static void io_check_error(unsigned char reason, struct pt_regs * regs) 714static __kprobes void
715io_check_error(unsigned char reason, struct pt_regs * regs)
649{ 716{
650 unsigned long i; 717 unsigned long i;
651 718
@@ -661,7 +728,8 @@ static void io_check_error(unsigned char reason, struct pt_regs * regs)
661 outb(reason, 0x61); 728 outb(reason, 0x61);
662} 729}
663 730
664static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) 731static __kprobes void
732unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
665{ 733{
666#ifdef CONFIG_MCA 734#ifdef CONFIG_MCA
667 /* Might actually be able to figure out what the guilty party 735 /* Might actually be able to figure out what the guilty party
@@ -671,15 +739,18 @@ static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
671 return; 739 return;
672 } 740 }
673#endif 741#endif
674 printk("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n", 742 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x on "
675 reason, smp_processor_id()); 743 "CPU %d.\n", reason, smp_processor_id());
676 printk("Dazed and confused, but trying to continue\n"); 744 printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
677 printk("Do you have a strange power saving mode enabled?\n"); 745 if (panic_on_unrecovered_nmi)
746 panic("NMI: Not continuing");
747
748 printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
678} 749}
679 750
680static DEFINE_SPINLOCK(nmi_print_lock); 751static DEFINE_SPINLOCK(nmi_print_lock);
681 752
682void die_nmi (struct pt_regs *regs, const char *msg) 753void __kprobes die_nmi(struct pt_regs *regs, const char *msg)
683{ 754{
684 if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == 755 if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) ==
685 NOTIFY_STOP) 756 NOTIFY_STOP)
@@ -711,7 +782,7 @@ void die_nmi (struct pt_regs *regs, const char *msg)
711 do_exit(SIGSEGV); 782 do_exit(SIGSEGV);
712} 783}
713 784
714static void default_do_nmi(struct pt_regs * regs) 785static __kprobes void default_do_nmi(struct pt_regs * regs)
715{ 786{
716 unsigned char reason = 0; 787 unsigned char reason = 0;
717 788
@@ -728,12 +799,12 @@ static void default_do_nmi(struct pt_regs * regs)
728 * Ok, so this is none of the documented NMI sources, 799 * Ok, so this is none of the documented NMI sources,
729 * so it must be the NMI watchdog. 800 * so it must be the NMI watchdog.
730 */ 801 */
731 if (nmi_watchdog) { 802 if (nmi_watchdog_tick(regs, reason))
732 nmi_watchdog_tick(regs);
733 return; 803 return;
734 } 804 if (!do_nmi_callback(regs, smp_processor_id()))
735#endif 805#endif
736 unknown_nmi_error(reason, regs); 806 unknown_nmi_error(reason, regs);
807
737 return; 808 return;
738 } 809 }
739 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) 810 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
@@ -749,14 +820,7 @@ static void default_do_nmi(struct pt_regs * regs)
749 reassert_nmi(); 820 reassert_nmi();
750} 821}
751 822
752static int dummy_nmi_callback(struct pt_regs * regs, int cpu) 823fastcall __kprobes void do_nmi(struct pt_regs * regs, long error_code)
753{
754 return 0;
755}
756
757static nmi_callback_t nmi_callback = dummy_nmi_callback;
758
759fastcall void do_nmi(struct pt_regs * regs, long error_code)
760{ 824{
761 int cpu; 825 int cpu;
762 826
@@ -766,25 +830,11 @@ fastcall void do_nmi(struct pt_regs * regs, long error_code)
766 830
767 ++nmi_count(cpu); 831 ++nmi_count(cpu);
768 832
769 if (!rcu_dereference(nmi_callback)(regs, cpu)) 833 default_do_nmi(regs);
770 default_do_nmi(regs);
771 834
772 nmi_exit(); 835 nmi_exit();
773} 836}
774 837
775void set_nmi_callback(nmi_callback_t callback)
776{
777 vmalloc_sync_all();
778 rcu_assign_pointer(nmi_callback, callback);
779}
780EXPORT_SYMBOL_GPL(set_nmi_callback);
781
782void unset_nmi_callback(void)
783{
784 nmi_callback = dummy_nmi_callback;
785}
786EXPORT_SYMBOL_GPL(unset_nmi_callback);
787
788#ifdef CONFIG_KPROBES 838#ifdef CONFIG_KPROBES
789fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code) 839fastcall void __kprobes do_int3(struct pt_regs *regs, long error_code)
790{ 840{
@@ -1124,20 +1174,6 @@ void __init trap_init_f00f_bug(void)
1124} 1174}
1125#endif 1175#endif
1126 1176
1127#define _set_gate(gate_addr,type,dpl,addr,seg) \
1128do { \
1129 int __d0, __d1; \
1130 __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
1131 "movw %4,%%dx\n\t" \
1132 "movl %%eax,%0\n\t" \
1133 "movl %%edx,%1" \
1134 :"=m" (*((long *) (gate_addr))), \
1135 "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
1136 :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
1137 "3" ((char *) (addr)),"2" ((seg) << 16)); \
1138} while (0)
1139
1140
1141/* 1177/*
1142 * This needs to use 'idt_table' rather than 'idt', and 1178 * This needs to use 'idt_table' rather than 'idt', and
1143 * thus use the _nonmapped_ version of the IDT, as the 1179 * thus use the _nonmapped_ version of the IDT, as the
@@ -1146,7 +1182,7 @@ do { \
1146 */ 1182 */
1147void set_intr_gate(unsigned int n, void *addr) 1183void set_intr_gate(unsigned int n, void *addr)
1148{ 1184{
1149 _set_gate(idt_table+n,14,0,addr,__KERNEL_CS); 1185 _set_gate(n, DESCTYPE_INT, addr, __KERNEL_CS);
1150} 1186}
1151 1187
1152/* 1188/*
@@ -1154,22 +1190,22 @@ void set_intr_gate(unsigned int n, void *addr)
1154 */ 1190 */
1155static inline void set_system_intr_gate(unsigned int n, void *addr) 1191static inline void set_system_intr_gate(unsigned int n, void *addr)
1156{ 1192{
1157 _set_gate(idt_table+n, 14, 3, addr, __KERNEL_CS); 1193 _set_gate(n, DESCTYPE_INT | DESCTYPE_DPL3, addr, __KERNEL_CS);
1158} 1194}
1159 1195
1160static void __init set_trap_gate(unsigned int n, void *addr) 1196static void __init set_trap_gate(unsigned int n, void *addr)
1161{ 1197{
1162 _set_gate(idt_table+n,15,0,addr,__KERNEL_CS); 1198 _set_gate(n, DESCTYPE_TRAP, addr, __KERNEL_CS);
1163} 1199}
1164 1200
1165static void __init set_system_gate(unsigned int n, void *addr) 1201static void __init set_system_gate(unsigned int n, void *addr)
1166{ 1202{
1167 _set_gate(idt_table+n,15,3,addr,__KERNEL_CS); 1203 _set_gate(n, DESCTYPE_TRAP | DESCTYPE_DPL3, addr, __KERNEL_CS);
1168} 1204}
1169 1205
1170static void __init set_task_gate(unsigned int n, unsigned int gdt_entry) 1206static void __init set_task_gate(unsigned int n, unsigned int gdt_entry)
1171{ 1207{
1172 _set_gate(idt_table+n,5,0,0,(gdt_entry<<3)); 1208 _set_gate(n, DESCTYPE_TASK, (void *)0, (gdt_entry<<3));
1173} 1209}
1174 1210
1175 1211
diff --git a/arch/i386/kernel/tsc.c b/arch/i386/kernel/tsc.c
index 7e0d8dab2075..b8fa0a8b2e47 100644
--- a/arch/i386/kernel/tsc.c
+++ b/arch/i386/kernel/tsc.c
@@ -192,7 +192,7 @@ int recalibrate_cpu_khz(void)
192 192
193EXPORT_SYMBOL(recalibrate_cpu_khz); 193EXPORT_SYMBOL(recalibrate_cpu_khz);
194 194
195void tsc_init(void) 195void __init tsc_init(void)
196{ 196{
197 if (!cpu_has_tsc || tsc_disable) 197 if (!cpu_has_tsc || tsc_disable)
198 return; 198 return;