aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2008-07-18 13:31:12 -0400
committerIngo Molnar <mingo@elte.hu>2008-07-18 13:31:12 -0400
commit3e370b29d35fb01bfb92c2814d6f79bf6a2cb970 (patch)
tree3b8fb467d60bfe6a34686f4abdc3a60050ba40a4 /arch/x86/kernel
parent88d1dce3a74367291f65a757fbdcaf17f042f30c (diff)
parent5b664cb235e97afbf34db9c4d77f08ebd725335e (diff)
Merge branch 'linus' into x86/pci-ioapic-boot-irq-quirks
Conflicts: drivers/pci/quirks.c Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile12
-rw-r--r--arch/x86/kernel/acpi/boot.c57
-rw-r--r--arch/x86/kernel/acpi/processor.c6
-rw-r--r--arch/x86/kernel/acpi/sleep.c32
-rw-r--r--arch/x86/kernel/alternative.c22
-rw-r--r--arch/x86/kernel/apic_32.c29
-rw-r--r--arch/x86/kernel/apm_32.c12
-rw-r--r--arch/x86/kernel/asm-offsets_64.c2
-rw-r--r--arch/x86/kernel/cpu/amd_64.c11
-rw-r--r--arch/x86/kernel/cpu/centaur_64.c12
-rw-r--r--arch/x86/kernel/cpu/common_64.c17
-rw-r--r--arch/x86/kernel/cpu/intel.c4
-rw-r--r--arch/x86/kernel/cpu/intel_64.c12
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c10
-rw-r--r--arch/x86/kernel/cpu/mcheck/non-fatal.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/main.c4
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c4
-rw-r--r--arch/x86/kernel/cpuid.c27
-rw-r--r--arch/x86/kernel/e820.c161
-rw-r--r--arch/x86/kernel/early-quirks.c26
-rw-r--r--arch/x86/kernel/early_printk.c2
-rw-r--r--arch/x86/kernel/efi.c2
-rw-r--r--arch/x86/kernel/entry_32.S73
-rw-r--r--arch/x86/kernel/entry_64.S139
-rw-r--r--arch/x86/kernel/ftrace.c141
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c75
-rw-r--r--arch/x86/kernel/hpet.c20
-rw-r--r--arch/x86/kernel/i386_ksyms_32.c9
-rw-r--r--arch/x86/kernel/io_apic_32.c42
-rw-r--r--arch/x86/kernel/io_apic_64.c61
-rw-r--r--arch/x86/kernel/irqinit_64.c4
-rw-r--r--arch/x86/kernel/ldt.c2
-rw-r--r--arch/x86/kernel/machine_kexec_32.c4
-rw-r--r--arch/x86/kernel/machine_kexec_64.c4
-rw-r--r--arch/x86/kernel/microcode.c6
-rw-r--r--arch/x86/kernel/mpparse.c20
-rw-r--r--arch/x86/kernel/msr.c16
-rw-r--r--arch/x86/kernel/nmi.c9
-rw-r--r--arch/x86/kernel/numaq_32.c7
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/pci-gart_64.c8
-rw-r--r--arch/x86/kernel/process.c30
-rw-r--r--arch/x86/kernel/process_32.c3
-rw-r--r--arch/x86/kernel/process_64.c3
-rw-r--r--arch/x86/kernel/quirks.c2
-rw-r--r--arch/x86/kernel/setup.c73
-rw-r--r--arch/x86/kernel/setup_percpu.c8
-rw-r--r--arch/x86/kernel/smp.c158
-rw-r--r--arch/x86/kernel/smpboot.c8
-rw-r--r--arch/x86/kernel/smpcommon.c56
-rw-r--r--arch/x86/kernel/stacktrace.c2
-rw-r--r--arch/x86/kernel/time_32.c3
-rw-r--r--arch/x86/kernel/time_64.c18
-rw-r--r--arch/x86/kernel/tlb_32.c2
-rw-r--r--arch/x86/kernel/tlb_64.c2
-rw-r--r--arch/x86/kernel/traps_32.c177
-rw-r--r--arch/x86/kernel/traps_64.c512
-rw-r--r--arch/x86/kernel/tsc.c535
-rw-r--r--arch/x86/kernel/tsc_32.c455
-rw-r--r--arch/x86/kernel/tsc_64.c357
-rw-r--r--arch/x86/kernel/visws_quirks.c709
-rw-r--r--arch/x86/kernel/vmi_32.c2
-rw-r--r--arch/x86/kernel/vmiclock_32.c4
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S8
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S10
-rw-r--r--arch/x86/kernel/vsyscall_64.c7
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c11
67 files changed, 2581 insertions, 1682 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 54829e2b5160..da140611bb57 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -6,6 +6,12 @@ extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinu
6 6
7CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) 7CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
8 8
9ifdef CONFIG_FTRACE
10# Do not profile debug utilities
11CFLAGS_REMOVE_tsc.o = -pg
12CFLAGS_REMOVE_rtc.o = -pg
13endif
14
9# 15#
10# vsyscalls (which work on the user stack) should have 16# vsyscalls (which work on the user stack) should have
11# no stack-protector checks: 17# no stack-protector checks:
@@ -13,12 +19,13 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE)
13nostackp := $(call cc-option, -fno-stack-protector) 19nostackp := $(call cc-option, -fno-stack-protector)
14CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) 20CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
15CFLAGS_hpet.o := $(nostackp) 21CFLAGS_hpet.o := $(nostackp)
16CFLAGS_tsc_64.o := $(nostackp) 22CFLAGS_tsc.o := $(nostackp)
17 23
18obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o 24obj-y := process_$(BITS).o signal_$(BITS).o entry_$(BITS).o
19obj-y += traps_$(BITS).o irq_$(BITS).o 25obj-y += traps_$(BITS).o irq_$(BITS).o
20obj-y += time_$(BITS).o ioport.o ldt.o 26obj-y += time_$(BITS).o ioport.o ldt.o
21obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o 27obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o
28obj-$(CONFIG_X86_VISWS) += visws_quirks.o
22obj-$(CONFIG_X86_32) += probe_roms_32.o 29obj-$(CONFIG_X86_32) += probe_roms_32.o
23obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 30obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
24obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o 31obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
@@ -26,7 +33,7 @@ obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
26obj-y += bootflag.o e820.o 33obj-y += bootflag.o e820.o
27obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o 34obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o
28obj-y += alternative.o i8253.o pci-nommu.o 35obj-y += alternative.o i8253.o pci-nommu.o
29obj-y += tsc_$(BITS).o io_delay.o rtc.o 36obj-y += tsc.o io_delay.o rtc.o
30 37
31obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o 38obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
32obj-y += process.o 39obj-y += process.o
@@ -56,6 +63,7 @@ obj-$(CONFIG_X86_MPPARSE) += mpparse.o
56obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi.o 63obj-$(CONFIG_X86_LOCAL_APIC) += apic_$(BITS).o nmi.o
57obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o 64obj-$(CONFIG_X86_IO_APIC) += io_apic_$(BITS).o
58obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o 65obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
66obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
59obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 67obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
60obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 68obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
61obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 69obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 5c0107602b62..f489d7a9be92 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -37,6 +37,7 @@
37#include <asm/pgtable.h> 37#include <asm/pgtable.h>
38#include <asm/io_apic.h> 38#include <asm/io_apic.h>
39#include <asm/apic.h> 39#include <asm/apic.h>
40#include <asm/genapic.h>
40#include <asm/io.h> 41#include <asm/io.h>
41#include <asm/mpspec.h> 42#include <asm/mpspec.h>
42#include <asm/smp.h> 43#include <asm/smp.h>
@@ -83,8 +84,6 @@ int acpi_lapic;
83int acpi_ioapic; 84int acpi_ioapic;
84int acpi_strict; 85int acpi_strict;
85 86
86static int disable_irq0_through_ioapic __initdata;
87
88u8 acpi_sci_flags __initdata; 87u8 acpi_sci_flags __initdata;
89int acpi_sci_override_gsi __initdata; 88int acpi_sci_override_gsi __initdata;
90int acpi_skip_timer_override __initdata; 89int acpi_skip_timer_override __initdata;
@@ -108,21 +107,6 @@ static u64 acpi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
108 */ 107 */
109enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC; 108enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC;
110 109
111#ifdef CONFIG_X86_64
112
113/* rely on all ACPI tables being in the direct mapping */
114char *__init __acpi_map_table(unsigned long phys_addr, unsigned long size)
115{
116 if (!phys_addr || !size)
117 return NULL;
118
119 if (phys_addr+size <= (max_pfn_mapped << PAGE_SHIFT) + PAGE_SIZE)
120 return __va(phys_addr);
121
122 return NULL;
123}
124
125#else
126 110
127/* 111/*
128 * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END, 112 * Temporarily use the virtual area starting from FIX_IO_APIC_BASE_END,
@@ -141,11 +125,15 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
141 unsigned long base, offset, mapped_size; 125 unsigned long base, offset, mapped_size;
142 int idx; 126 int idx;
143 127
144 if (phys + size < 8 * 1024 * 1024) 128 if (!phys || !size)
129 return NULL;
130
131 if (phys+size <= (max_low_pfn_mapped << PAGE_SHIFT))
145 return __va(phys); 132 return __va(phys);
146 133
147 offset = phys & (PAGE_SIZE - 1); 134 offset = phys & (PAGE_SIZE - 1);
148 mapped_size = PAGE_SIZE - offset; 135 mapped_size = PAGE_SIZE - offset;
136 clear_fixmap(FIX_ACPI_END);
149 set_fixmap(FIX_ACPI_END, phys); 137 set_fixmap(FIX_ACPI_END, phys);
150 base = fix_to_virt(FIX_ACPI_END); 138 base = fix_to_virt(FIX_ACPI_END);
151 139
@@ -157,13 +145,13 @@ char *__init __acpi_map_table(unsigned long phys, unsigned long size)
157 if (--idx < FIX_ACPI_BEGIN) 145 if (--idx < FIX_ACPI_BEGIN)
158 return NULL; /* cannot handle this */ 146 return NULL; /* cannot handle this */
159 phys += PAGE_SIZE; 147 phys += PAGE_SIZE;
148 clear_fixmap(idx);
160 set_fixmap(idx, phys); 149 set_fixmap(idx, phys);
161 mapped_size += PAGE_SIZE; 150 mapped_size += PAGE_SIZE;
162 } 151 }
163 152
164 return ((unsigned char *)base + offset); 153 return ((unsigned char *)base + offset);
165} 154}
166#endif
167 155
168#ifdef CONFIG_PCI_MMCONFIG 156#ifdef CONFIG_PCI_MMCONFIG
169/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */ 157/* The physical address of the MMCONFIG aperture. Set from ACPI tables. */
@@ -992,10 +980,6 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
992 int pin; 980 int pin;
993 struct mp_config_intsrc mp_irq; 981 struct mp_config_intsrc mp_irq;
994 982
995 /* Skip the 8254 timer interrupt (IRQ 0) if requested. */
996 if (bus_irq == 0 && disable_irq0_through_ioapic)
997 return;
998
999 /* 983 /*
1000 * Convert 'gsi' to 'ioapic.pin'. 984 * Convert 'gsi' to 'ioapic.pin'.
1001 */ 985 */
@@ -1062,10 +1046,6 @@ void __init mp_config_acpi_legacy_irqs(void)
1062 for (i = 0; i < 16; i++) { 1046 for (i = 0; i < 16; i++) {
1063 int idx; 1047 int idx;
1064 1048
1065 /* Skip the 8254 timer interrupt (IRQ 0) if requested. */
1066 if (i == 0 && disable_irq0_through_ioapic)
1067 continue;
1068
1069 for (idx = 0; idx < mp_irq_entries; idx++) { 1049 for (idx = 0; idx < mp_irq_entries; idx++) {
1070 struct mp_config_intsrc *irq = mp_irqs + idx; 1050 struct mp_config_intsrc *irq = mp_irqs + idx;
1071 1051
@@ -1373,8 +1353,6 @@ static void __init acpi_process_madt(void)
1373 return; 1353 return;
1374} 1354}
1375 1355
1376#ifdef __i386__
1377
1378static int __init disable_acpi_irq(const struct dmi_system_id *d) 1356static int __init disable_acpi_irq(const struct dmi_system_id *d)
1379{ 1357{
1380 if (!acpi_force) { 1358 if (!acpi_force) {
@@ -1425,13 +1403,12 @@ static int __init force_acpi_ht(const struct dmi_system_id *d)
1425} 1403}
1426 1404
1427/* 1405/*
1428 * Don't register any I/O APIC entries for the 8254 timer IRQ. 1406 * Force ignoring BIOS IRQ0 pin2 override
1429 */ 1407 */
1430static int __init 1408static int __init dmi_ignore_irq0_timer_override(const struct dmi_system_id *d)
1431dmi_disable_irq0_through_ioapic(const struct dmi_system_id *d)
1432{ 1409{
1433 pr_notice("%s detected: disabling IRQ 0 through I/O APIC\n", d->ident); 1410 pr_notice("%s detected: Ignoring BIOS IRQ0 pin2 override\n", d->ident);
1434 disable_irq0_through_ioapic = 1; 1411 acpi_skip_timer_override = 1;
1435 return 0; 1412 return 0;
1436} 1413}
1437 1414
@@ -1609,11 +1586,11 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
1609 * is enabled. This input is incorrectly designated the 1586 * is enabled. This input is incorrectly designated the
1610 * ISA IRQ 0 via an interrupt source override even though 1587 * ISA IRQ 0 via an interrupt source override even though
1611 * it is wired to the output of the master 8259A and INTIN0 1588 * it is wired to the output of the master 8259A and INTIN0
1612 * is not connected at all. Abandon any attempts to route 1589 * is not connected at all. Force ignoring BIOS IRQ0 pin2
1613 * IRQ 0 through the I/O APIC therefore. 1590 * override in that cases.
1614 */ 1591 */
1615 { 1592 {
1616 .callback = dmi_disable_irq0_through_ioapic, 1593 .callback = dmi_ignore_irq0_timer_override,
1617 .ident = "HP NX6125 laptop", 1594 .ident = "HP NX6125 laptop",
1618 .matches = { 1595 .matches = {
1619 DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), 1596 DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
@@ -1621,7 +1598,7 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
1621 }, 1598 },
1622 }, 1599 },
1623 { 1600 {
1624 .callback = dmi_disable_irq0_through_ioapic, 1601 .callback = dmi_ignore_irq0_timer_override,
1625 .ident = "HP NX6325 laptop", 1602 .ident = "HP NX6325 laptop",
1626 .matches = { 1603 .matches = {
1627 DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"), 1604 DMI_MATCH(DMI_SYS_VENDOR, "Hewlett-Packard"),
@@ -1631,8 +1608,6 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = {
1631 {} 1608 {}
1632}; 1609};
1633 1610
1634#endif /* __i386__ */
1635
1636/* 1611/*
1637 * acpi_boot_table_init() and acpi_boot_init() 1612 * acpi_boot_table_init() and acpi_boot_init()
1638 * called from setup_arch(), always. 1613 * called from setup_arch(), always.
@@ -1660,9 +1635,7 @@ int __init acpi_boot_table_init(void)
1660{ 1635{
1661 int error; 1636 int error;
1662 1637
1663#ifdef __i386__
1664 dmi_check_system(acpi_dmi_table); 1638 dmi_check_system(acpi_dmi_table);
1665#endif
1666 1639
1667 /* 1640 /*
1668 * If acpi_disabled, bail out 1641 * If acpi_disabled, bail out
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c
index de2d2e4ebad9..7c074eec39fb 100644
--- a/arch/x86/kernel/acpi/processor.c
+++ b/arch/x86/kernel/acpi/processor.c
@@ -56,6 +56,12 @@ static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c)
56 if (cpu_has(c, X86_FEATURE_ACPI)) 56 if (cpu_has(c, X86_FEATURE_ACPI))
57 buf[2] |= ACPI_PDC_T_FFH; 57 buf[2] |= ACPI_PDC_T_FFH;
58 58
59 /*
60 * If mwait/monitor is unsupported, C2/C3_FFH will be disabled
61 */
62 if (!cpu_has(c, X86_FEATURE_MWAIT))
63 buf[2] &= ~(ACPI_PDC_C_C2C3_FFH);
64
59 obj->type = ACPI_TYPE_BUFFER; 65 obj->type = ACPI_TYPE_BUFFER;
60 obj->buffer.length = 12; 66 obj->buffer.length = 12;
61 obj->buffer.pointer = (u8 *) buf; 67 obj->buffer.pointer = (u8 *) buf;
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index e6a4b564ccaa..868de3d5c39d 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -23,6 +23,15 @@ static unsigned long acpi_realmode;
23static char temp_stack[10240]; 23static char temp_stack[10240];
24#endif 24#endif
25 25
26/* XXX: this macro should move to asm-x86/segment.h and be shared with the
27 boot code... */
28#define GDT_ENTRY(flags, base, limit) \
29 (((u64)(base & 0xff000000) << 32) | \
30 ((u64)flags << 40) | \
31 ((u64)(limit & 0x00ff0000) << 32) | \
32 ((u64)(base & 0x00ffffff) << 16) | \
33 ((u64)(limit & 0x0000ffff)))
34
26/** 35/**
27 * acpi_save_state_mem - save kernel state 36 * acpi_save_state_mem - save kernel state
28 * 37 *
@@ -51,18 +60,27 @@ int acpi_save_state_mem(void)
51 header->video_mode = saved_video_mode; 60 header->video_mode = saved_video_mode;
52 61
53 header->wakeup_jmp_seg = acpi_wakeup_address >> 4; 62 header->wakeup_jmp_seg = acpi_wakeup_address >> 4;
63
64 /*
65 * Set up the wakeup GDT. We set these up as Big Real Mode,
66 * that is, with limits set to 4 GB. At least the Lenovo
67 * Thinkpad X61 is known to need this for the video BIOS
68 * initialization quirk to work; this is likely to also
69 * be the case for other laptops or integrated video devices.
70 */
71
54 /* GDT[0]: GDT self-pointer */ 72 /* GDT[0]: GDT self-pointer */
55 header->wakeup_gdt[0] = 73 header->wakeup_gdt[0] =
56 (u64)(sizeof(header->wakeup_gdt) - 1) + 74 (u64)(sizeof(header->wakeup_gdt) - 1) +
57 ((u64)(acpi_wakeup_address + 75 ((u64)(acpi_wakeup_address +
58 ((char *)&header->wakeup_gdt - (char *)acpi_realmode)) 76 ((char *)&header->wakeup_gdt - (char *)acpi_realmode))
59 << 16); 77 << 16);
60 /* GDT[1]: real-mode-like code segment */ 78 /* GDT[1]: big real mode-like code segment */
61 header->wakeup_gdt[1] = (0x009bULL << 40) + 79 header->wakeup_gdt[1] =
62 ((u64)acpi_wakeup_address << 16) + 0xffff; 80 GDT_ENTRY(0x809b, acpi_wakeup_address, 0xfffff);
63 /* GDT[2]: real-mode-like data segment */ 81 /* GDT[2]: big real mode-like data segment */
64 header->wakeup_gdt[2] = (0x0093ULL << 40) + 82 header->wakeup_gdt[2] =
65 ((u64)acpi_wakeup_address << 16) + 0xffff; 83 GDT_ENTRY(0x8093, acpi_wakeup_address, 0xfffff);
66 84
67#ifndef CONFIG_64BIT 85#ifndef CONFIG_64BIT
68 store_gdt((struct desc_ptr *)&header->pmode_gdt); 86 store_gdt((struct desc_ptr *)&header->pmode_gdt);
@@ -140,6 +158,8 @@ static int __init acpi_sleep_setup(char *str)
140 acpi_realmode_flags |= 2; 158 acpi_realmode_flags |= 2;
141 if (strncmp(str, "s3_beep", 7) == 0) 159 if (strncmp(str, "s3_beep", 7) == 0)
142 acpi_realmode_flags |= 4; 160 acpi_realmode_flags |= 4;
161 if (strncmp(str, "old_ordering", 12) == 0)
162 acpi_old_suspend_ordering();
143 str = strchr(str, ','); 163 str = strchr(str, ',');
144 if (str != NULL) 164 if (str != NULL)
145 str += strspn(str, ", \t"); 165 str += strspn(str, ", \t");
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 65c7857a90dd..2763cb37b553 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -1,6 +1,6 @@
1#include <linux/module.h> 1#include <linux/module.h>
2#include <linux/sched.h> 2#include <linux/sched.h>
3#include <linux/spinlock.h> 3#include <linux/mutex.h>
4#include <linux/list.h> 4#include <linux/list.h>
5#include <linux/kprobes.h> 5#include <linux/kprobes.h>
6#include <linux/mm.h> 6#include <linux/mm.h>
@@ -143,7 +143,7 @@ static const unsigned char *const p6_nops[ASM_NOP_MAX+1] = {
143#ifdef CONFIG_X86_64 143#ifdef CONFIG_X86_64
144 144
145extern char __vsyscall_0; 145extern char __vsyscall_0;
146static inline const unsigned char*const * find_nop_table(void) 146const unsigned char *const *find_nop_table(void)
147{ 147{
148 return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || 148 return boot_cpu_data.x86_vendor != X86_VENDOR_INTEL ||
149 boot_cpu_data.x86 < 6 ? k8_nops : p6_nops; 149 boot_cpu_data.x86 < 6 ? k8_nops : p6_nops;
@@ -162,7 +162,7 @@ static const struct nop {
162 { -1, NULL } 162 { -1, NULL }
163}; 163};
164 164
165static const unsigned char*const * find_nop_table(void) 165const unsigned char *const *find_nop_table(void)
166{ 166{
167 const unsigned char *const *noptable = intel_nops; 167 const unsigned char *const *noptable = intel_nops;
168 int i; 168 int i;
@@ -279,7 +279,7 @@ struct smp_alt_module {
279 struct list_head next; 279 struct list_head next;
280}; 280};
281static LIST_HEAD(smp_alt_modules); 281static LIST_HEAD(smp_alt_modules);
282static DEFINE_SPINLOCK(smp_alt); 282static DEFINE_MUTEX(smp_alt);
283static int smp_mode = 1; /* protected by smp_alt */ 283static int smp_mode = 1; /* protected by smp_alt */
284 284
285void alternatives_smp_module_add(struct module *mod, char *name, 285void alternatives_smp_module_add(struct module *mod, char *name,
@@ -312,12 +312,12 @@ void alternatives_smp_module_add(struct module *mod, char *name,
312 __func__, smp->locks, smp->locks_end, 312 __func__, smp->locks, smp->locks_end,
313 smp->text, smp->text_end, smp->name); 313 smp->text, smp->text_end, smp->name);
314 314
315 spin_lock(&smp_alt); 315 mutex_lock(&smp_alt);
316 list_add_tail(&smp->next, &smp_alt_modules); 316 list_add_tail(&smp->next, &smp_alt_modules);
317 if (boot_cpu_has(X86_FEATURE_UP)) 317 if (boot_cpu_has(X86_FEATURE_UP))
318 alternatives_smp_unlock(smp->locks, smp->locks_end, 318 alternatives_smp_unlock(smp->locks, smp->locks_end,
319 smp->text, smp->text_end); 319 smp->text, smp->text_end);
320 spin_unlock(&smp_alt); 320 mutex_unlock(&smp_alt);
321} 321}
322 322
323void alternatives_smp_module_del(struct module *mod) 323void alternatives_smp_module_del(struct module *mod)
@@ -327,17 +327,17 @@ void alternatives_smp_module_del(struct module *mod)
327 if (smp_alt_once || noreplace_smp) 327 if (smp_alt_once || noreplace_smp)
328 return; 328 return;
329 329
330 spin_lock(&smp_alt); 330 mutex_lock(&smp_alt);
331 list_for_each_entry(item, &smp_alt_modules, next) { 331 list_for_each_entry(item, &smp_alt_modules, next) {
332 if (mod != item->mod) 332 if (mod != item->mod)
333 continue; 333 continue;
334 list_del(&item->next); 334 list_del(&item->next);
335 spin_unlock(&smp_alt); 335 mutex_unlock(&smp_alt);
336 DPRINTK("%s: %s\n", __func__, item->name); 336 DPRINTK("%s: %s\n", __func__, item->name);
337 kfree(item); 337 kfree(item);
338 return; 338 return;
339 } 339 }
340 spin_unlock(&smp_alt); 340 mutex_unlock(&smp_alt);
341} 341}
342 342
343void alternatives_smp_switch(int smp) 343void alternatives_smp_switch(int smp)
@@ -359,7 +359,7 @@ void alternatives_smp_switch(int smp)
359 return; 359 return;
360 BUG_ON(!smp && (num_online_cpus() > 1)); 360 BUG_ON(!smp && (num_online_cpus() > 1));
361 361
362 spin_lock(&smp_alt); 362 mutex_lock(&smp_alt);
363 363
364 /* 364 /*
365 * Avoid unnecessary switches because it forces JIT based VMs to 365 * Avoid unnecessary switches because it forces JIT based VMs to
@@ -383,7 +383,7 @@ void alternatives_smp_switch(int smp)
383 mod->text, mod->text_end); 383 mod->text, mod->text_end);
384 } 384 }
385 smp_mode = smp; 385 smp_mode = smp;
386 spin_unlock(&smp_alt); 386 mutex_unlock(&smp_alt);
387} 387}
388 388
389#endif 389#endif
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index 6dea8306d8c0..a437d027f20b 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -82,6 +82,11 @@ int pic_mode;
82/* Have we found an MP table */ 82/* Have we found an MP table */
83int smp_found_config; 83int smp_found_config;
84 84
85static struct resource lapic_resource = {
86 .name = "Local APIC",
87 .flags = IORESOURCE_MEM | IORESOURCE_BUSY,
88};
89
85static unsigned int calibration_result; 90static unsigned int calibration_result;
86 91
87static int lapic_next_event(unsigned long delta, 92static int lapic_next_event(unsigned long delta,
@@ -969,7 +974,7 @@ void __cpuinit setup_local_APIC(void)
969 * Double-check whether this APIC is really registered. 974 * Double-check whether this APIC is really registered.
970 */ 975 */
971 if (!apic_id_registered()) 976 if (!apic_id_registered())
972 BUG(); 977 WARN_ON_ONCE(1);
973 978
974 /* 979 /*
975 * Intel recommends to set DFR, LDR and TPR before enabling 980 * Intel recommends to set DFR, LDR and TPR before enabling
@@ -1335,6 +1340,10 @@ void __init smp_intr_init(void)
1335 1340
1336 /* IPI for generic function call */ 1341 /* IPI for generic function call */
1337 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 1342 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
1343
1344 /* IPI for single call function */
1345 set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
1346 call_function_single_interrupt);
1338} 1347}
1339#endif 1348#endif
1340 1349
@@ -1720,3 +1729,21 @@ static int __init apic_set_verbosity(char *str)
1720} 1729}
1721__setup("apic=", apic_set_verbosity); 1730__setup("apic=", apic_set_verbosity);
1722 1731
1732static int __init lapic_insert_resource(void)
1733{
1734 if (!apic_phys)
1735 return -1;
1736
1737 /* Put local APIC into the resource map. */
1738 lapic_resource.start = apic_phys;
1739 lapic_resource.end = lapic_resource.start + PAGE_SIZE - 1;
1740 insert_resource(&iomem_resource, &lapic_resource);
1741
1742 return 0;
1743}
1744
1745/*
1746 * need call insert after e820_reserve_resources()
1747 * that is using request_resource
1748 */
1749late_initcall(lapic_insert_resource);
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 00e6d1370954..bf9b441331e9 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -204,6 +204,7 @@
204#include <linux/module.h> 204#include <linux/module.h>
205 205
206#include <linux/poll.h> 206#include <linux/poll.h>
207#include <linux/smp_lock.h>
207#include <linux/types.h> 208#include <linux/types.h>
208#include <linux/stddef.h> 209#include <linux/stddef.h>
209#include <linux/timer.h> 210#include <linux/timer.h>
@@ -1212,9 +1213,9 @@ static int suspend(int vetoable)
1212 if (err != APM_SUCCESS) 1213 if (err != APM_SUCCESS)
1213 apm_error("suspend", err); 1214 apm_error("suspend", err);
1214 err = (err == APM_SUCCESS) ? 0 : -EIO; 1215 err = (err == APM_SUCCESS) ? 0 : -EIO;
1215 device_power_up(); 1216 device_power_up(PMSG_RESUME);
1216 local_irq_enable(); 1217 local_irq_enable();
1217 device_resume(); 1218 device_resume(PMSG_RESUME);
1218 queue_event(APM_NORMAL_RESUME, NULL); 1219 queue_event(APM_NORMAL_RESUME, NULL);
1219 spin_lock(&user_list_lock); 1220 spin_lock(&user_list_lock);
1220 for (as = user_list; as != NULL; as = as->next) { 1221 for (as = user_list; as != NULL; as = as->next) {
@@ -1239,7 +1240,7 @@ static void standby(void)
1239 apm_error("standby", err); 1240 apm_error("standby", err);
1240 1241
1241 local_irq_disable(); 1242 local_irq_disable();
1242 device_power_up(); 1243 device_power_up(PMSG_RESUME);
1243 local_irq_enable(); 1244 local_irq_enable();
1244} 1245}
1245 1246
@@ -1325,7 +1326,7 @@ static void check_events(void)
1325 ignore_bounce = 1; 1326 ignore_bounce = 1;
1326 if ((event != APM_NORMAL_RESUME) 1327 if ((event != APM_NORMAL_RESUME)
1327 || (ignore_normal_resume == 0)) { 1328 || (ignore_normal_resume == 0)) {
1328 device_resume(); 1329 device_resume(PMSG_RESUME);
1329 queue_event(event, NULL); 1330 queue_event(event, NULL);
1330 } 1331 }
1331 ignore_normal_resume = 0; 1332 ignore_normal_resume = 0;
@@ -1549,10 +1550,12 @@ static int do_open(struct inode *inode, struct file *filp)
1549{ 1550{
1550 struct apm_user *as; 1551 struct apm_user *as;
1551 1552
1553 lock_kernel();
1552 as = kmalloc(sizeof(*as), GFP_KERNEL); 1554 as = kmalloc(sizeof(*as), GFP_KERNEL);
1553 if (as == NULL) { 1555 if (as == NULL) {
1554 printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n", 1556 printk(KERN_ERR "apm: cannot allocate struct of size %d bytes\n",
1555 sizeof(*as)); 1557 sizeof(*as));
1558 unlock_kernel();
1556 return -ENOMEM; 1559 return -ENOMEM;
1557 } 1560 }
1558 as->magic = APM_BIOS_MAGIC; 1561 as->magic = APM_BIOS_MAGIC;
@@ -1574,6 +1577,7 @@ static int do_open(struct inode *inode, struct file *filp)
1574 user_list = as; 1577 user_list = as;
1575 spin_unlock(&user_list_lock); 1578 spin_unlock(&user_list_lock);
1576 filp->private_data = as; 1579 filp->private_data = as;
1580 unlock_kernel();
1577 return 0; 1581 return 0;
1578} 1582}
1579 1583
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 3295e7c08fe7..bacf5deeec2d 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -34,7 +34,7 @@ int main(void)
34 ENTRY(pid); 34 ENTRY(pid);
35 BLANK(); 35 BLANK();
36#undef ENTRY 36#undef ENTRY
37#define ENTRY(entry) DEFINE(threadinfo_ ## entry, offsetof(struct thread_info, entry)) 37#define ENTRY(entry) DEFINE(TI_ ## entry, offsetof(struct thread_info, entry))
38 ENTRY(flags); 38 ENTRY(flags);
39 ENTRY(addr_limit); 39 ENTRY(addr_limit);
40 ENTRY(preempt_count); 40 ENTRY(preempt_count);
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
index 958526d6a74a..7c36fb8a28d4 100644
--- a/arch/x86/kernel/cpu/amd_64.c
+++ b/arch/x86/kernel/cpu/amd_64.c
@@ -199,10 +199,15 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
199 * Don't do it for gbpages because there seems very little 199 * Don't do it for gbpages because there seems very little
200 * benefit in doing so. 200 * benefit in doing so.
201 */ 201 */
202 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg) && 202 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
203 (tseg >> PMD_SHIFT) < 203 printk(KERN_DEBUG "tseg: %010llx\n", tseg);
204 (max_pfn_mapped >> (PMD_SHIFT-PAGE_SHIFT))) 204 if ((tseg>>PMD_SHIFT) <
205 (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
206 ((tseg>>PMD_SHIFT) <
207 (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
208 (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
205 set_memory_4k((unsigned long)__va(tseg), 1); 209 set_memory_4k((unsigned long)__va(tseg), 1);
210 }
206 } 211 }
207} 212}
208 213
diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c
index 13526fd5cce1..1d181c40e2e1 100644
--- a/arch/x86/kernel/cpu/centaur_64.c
+++ b/arch/x86/kernel/cpu/centaur_64.c
@@ -10,20 +10,12 @@ static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
10{ 10{
11 if (c->x86 == 0x6 && c->x86_model >= 0xf) 11 if (c->x86 == 0x6 && c->x86_model >= 0xf)
12 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 12 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
13
14 set_cpu_cap(c, X86_FEATURE_SYSENTER32);
13} 15}
14 16
15static void __cpuinit init_centaur(struct cpuinfo_x86 *c) 17static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
16{ 18{
17 /* Cache sizes */
18 unsigned n;
19
20 n = c->extended_cpuid_level;
21 if (n >= 0x80000008) {
22 unsigned eax = cpuid_eax(0x80000008);
23 c->x86_virt_bits = (eax >> 8) & 0xff;
24 c->x86_phys_bits = eax & 0xff;
25 }
26
27 if (c->x86 == 0x6 && c->x86_model >= 0xf) { 19 if (c->x86 == 0x6 && c->x86_model >= 0xf) {
28 c->x86_cache_alignment = c->x86_clflush_size * 2; 20 c->x86_cache_alignment = c->x86_clflush_size * 2;
29 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 21 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
index 751850235291..7b8cc72feb40 100644
--- a/arch/x86/kernel/cpu/common_64.c
+++ b/arch/x86/kernel/cpu/common_64.c
@@ -98,7 +98,7 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c)
98 98
99void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) 99void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
100{ 100{
101 unsigned int n, dummy, eax, ebx, ecx, edx; 101 unsigned int n, dummy, ebx, ecx, edx;
102 102
103 n = c->extended_cpuid_level; 103 n = c->extended_cpuid_level;
104 104
@@ -121,11 +121,6 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
121 printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", 121 printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
122 c->x86_cache_size, ecx & 0xFF); 122 c->x86_cache_size, ecx & 0xFF);
123 } 123 }
124 if (n >= 0x80000008) {
125 cpuid(0x80000008, &eax, &dummy, &dummy, &dummy);
126 c->x86_virt_bits = (eax >> 8) & 0xff;
127 c->x86_phys_bits = eax & 0xff;
128 }
129} 124}
130 125
131void __cpuinit detect_ht(struct cpuinfo_x86 *c) 126void __cpuinit detect_ht(struct cpuinfo_x86 *c)
@@ -314,6 +309,16 @@ static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
314 if (c->extended_cpuid_level >= 0x80000007) 309 if (c->extended_cpuid_level >= 0x80000007)
315 c->x86_power = cpuid_edx(0x80000007); 310 c->x86_power = cpuid_edx(0x80000007);
316 311
312 if (c->extended_cpuid_level >= 0x80000008) {
313 u32 eax = cpuid_eax(0x80000008);
314
315 c->x86_virt_bits = (eax >> 8) & 0xff;
316 c->x86_phys_bits = eax & 0xff;
317 }
318
319 /* Assume all 64-bit CPUs support 32-bit syscall */
320 set_cpu_cap(c, X86_FEATURE_SYSCALL32);
321
317 if (c->x86_vendor != X86_VENDOR_UNKNOWN && 322 if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
318 cpu_devs[c->x86_vendor]->c_early_init) 323 cpu_devs[c->x86_vendor]->c_early_init)
319 cpu_devs[c->x86_vendor]->c_early_init(c); 324 cpu_devs[c->x86_vendor]->c_early_init(c);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index fe9224c51d37..70609efdf1da 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -226,6 +226,10 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
226 226
227 if (cpu_has_bts) 227 if (cpu_has_bts)
228 ds_init_intel(c); 228 ds_init_intel(c);
229
230#ifdef CONFIG_X86_NUMAQ
231 numaq_tsc_disable();
232#endif
229} 233}
230 234
231static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) 235static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c
index fcb1cc9d75ca..1019c58d39f0 100644
--- a/arch/x86/kernel/cpu/intel_64.c
+++ b/arch/x86/kernel/cpu/intel_64.c
@@ -12,6 +12,8 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
12 if ((c->x86 == 0xf && c->x86_model >= 0x03) || 12 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
13 (c->x86 == 0x6 && c->x86_model >= 0x0e)) 13 (c->x86 == 0x6 && c->x86_model >= 0x0e))
14 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 14 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
15
16 set_cpu_cap(c, X86_FEATURE_SYSENTER32);
15} 17}
16 18
17/* 19/*
@@ -52,9 +54,6 @@ static void __cpuinit srat_detect_node(void)
52 54
53static void __cpuinit init_intel(struct cpuinfo_x86 *c) 55static void __cpuinit init_intel(struct cpuinfo_x86 *c)
54{ 56{
55 /* Cache sizes */
56 unsigned n;
57
58 init_intel_cacheinfo(c); 57 init_intel_cacheinfo(c);
59 if (c->cpuid_level > 9) { 58 if (c->cpuid_level > 9) {
60 unsigned eax = cpuid_eax(10); 59 unsigned eax = cpuid_eax(10);
@@ -76,13 +75,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
76 if (cpu_has_bts) 75 if (cpu_has_bts)
77 ds_init_intel(c); 76 ds_init_intel(c);
78 77
79 n = c->extended_cpuid_level;
80 if (n >= 0x80000008) {
81 unsigned eax = cpuid_eax(0x80000008);
82 c->x86_virt_bits = (eax >> 8) & 0xff;
83 c->x86_phys_bits = eax & 0xff;
84 }
85
86 if (c->x86 == 15) 78 if (c->x86 == 15)
87 c->x86_cache_alignment = c->x86_clflush_size * 2; 79 c->x86_cache_alignment = c->x86_clflush_size * 2;
88 if (c->x86 == 6) 80 if (c->x86 == 6)
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 501ca1cea27d..c4a7ec31394c 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -9,6 +9,7 @@
9#include <linux/types.h> 9#include <linux/types.h>
10#include <linux/kernel.h> 10#include <linux/kernel.h>
11#include <linux/sched.h> 11#include <linux/sched.h>
12#include <linux/smp_lock.h>
12#include <linux/string.h> 13#include <linux/string.h>
13#include <linux/rcupdate.h> 14#include <linux/rcupdate.h>
14#include <linux/kallsyms.h> 15#include <linux/kallsyms.h>
@@ -363,7 +364,7 @@ static void mcheck_check_cpu(void *info)
363 364
364static void mcheck_timer(struct work_struct *work) 365static void mcheck_timer(struct work_struct *work)
365{ 366{
366 on_each_cpu(mcheck_check_cpu, NULL, 1, 1); 367 on_each_cpu(mcheck_check_cpu, NULL, 1);
367 368
368 /* 369 /*
369 * Alert userspace if needed. If we logged an MCE, reduce the 370 * Alert userspace if needed. If we logged an MCE, reduce the
@@ -532,10 +533,12 @@ static int open_exclu; /* already open exclusive? */
532 533
533static int mce_open(struct inode *inode, struct file *file) 534static int mce_open(struct inode *inode, struct file *file)
534{ 535{
536 lock_kernel();
535 spin_lock(&mce_state_lock); 537 spin_lock(&mce_state_lock);
536 538
537 if (open_exclu || (open_count && (file->f_flags & O_EXCL))) { 539 if (open_exclu || (open_count && (file->f_flags & O_EXCL))) {
538 spin_unlock(&mce_state_lock); 540 spin_unlock(&mce_state_lock);
541 unlock_kernel();
539 return -EBUSY; 542 return -EBUSY;
540 } 543 }
541 544
@@ -544,6 +547,7 @@ static int mce_open(struct inode *inode, struct file *file)
544 open_count++; 547 open_count++;
545 548
546 spin_unlock(&mce_state_lock); 549 spin_unlock(&mce_state_lock);
550 unlock_kernel();
547 551
548 return nonseekable_open(inode, file); 552 return nonseekable_open(inode, file);
549} 553}
@@ -617,7 +621,7 @@ static ssize_t mce_read(struct file *filp, char __user *ubuf, size_t usize,
617 * Collect entries that were still getting written before the 621 * Collect entries that were still getting written before the
618 * synchronize. 622 * synchronize.
619 */ 623 */
620 on_each_cpu(collect_tscs, cpu_tsc, 1, 1); 624 on_each_cpu(collect_tscs, cpu_tsc, 1);
621 for (i = next; i < MCE_LOG_LEN; i++) { 625 for (i = next; i < MCE_LOG_LEN; i++) {
622 if (mcelog.entry[i].finished && 626 if (mcelog.entry[i].finished &&
623 mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) { 627 mcelog.entry[i].tsc < cpu_tsc[mcelog.entry[i].cpu]) {
@@ -742,7 +746,7 @@ static void mce_restart(void)
742 if (next_interval) 746 if (next_interval)
743 cancel_delayed_work(&mcheck_work); 747 cancel_delayed_work(&mcheck_work);
744 /* Timer race is harmless here */ 748 /* Timer race is harmless here */
745 on_each_cpu(mce_init, NULL, 1, 1); 749 on_each_cpu(mce_init, NULL, 1);
746 next_interval = check_interval * HZ; 750 next_interval = check_interval * HZ;
747 if (next_interval) 751 if (next_interval)
748 schedule_delayed_work(&mcheck_work, 752 schedule_delayed_work(&mcheck_work,
diff --git a/arch/x86/kernel/cpu/mcheck/non-fatal.c b/arch/x86/kernel/cpu/mcheck/non-fatal.c
index 00ccb6c14ec2..cc1fccdd31e0 100644
--- a/arch/x86/kernel/cpu/mcheck/non-fatal.c
+++ b/arch/x86/kernel/cpu/mcheck/non-fatal.c
@@ -59,7 +59,7 @@ static DECLARE_DELAYED_WORK(mce_work, mce_work_fn);
59 59
60static void mce_work_fn(struct work_struct *work) 60static void mce_work_fn(struct work_struct *work)
61{ 61{
62 on_each_cpu(mce_checkregs, NULL, 1, 1); 62 on_each_cpu(mce_checkregs, NULL, 1);
63 schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE)); 63 schedule_delayed_work(&mce_work, round_jiffies_relative(MCE_RATE));
64} 64}
65 65
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c
index 105afe12beb0..6f23969c8faf 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/main.c
@@ -223,7 +223,7 @@ static void set_mtrr(unsigned int reg, unsigned long base,
223 atomic_set(&data.gate,0); 223 atomic_set(&data.gate,0);
224 224
225 /* Start the ball rolling on other CPUs */ 225 /* Start the ball rolling on other CPUs */
226 if (smp_call_function(ipi_handler, &data, 1, 0) != 0) 226 if (smp_call_function(ipi_handler, &data, 0) != 0)
227 panic("mtrr: timed out waiting for other CPUs\n"); 227 panic("mtrr: timed out waiting for other CPUs\n");
228 228
229 local_irq_save(flags); 229 local_irq_save(flags);
@@ -1682,7 +1682,7 @@ void mtrr_ap_init(void)
1682 */ 1682 */
1683void mtrr_save_state(void) 1683void mtrr_save_state(void)
1684{ 1684{
1685 smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1, 1); 1685 smp_call_function_single(0, mtrr_save_fixed_ranges, NULL, 1);
1686} 1686}
1687 1687
1688static int __init mtrr_init_finialize(void) 1688static int __init mtrr_init_finialize(void)
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 2e9bef6e3aa3..6d4bdc02388a 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -189,7 +189,7 @@ void disable_lapic_nmi_watchdog(void)
189 if (atomic_read(&nmi_active) <= 0) 189 if (atomic_read(&nmi_active) <= 0)
190 return; 190 return;
191 191
192 on_each_cpu(stop_apic_nmi_watchdog, NULL, 0, 1); 192 on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
193 193
194 if (wd_ops) 194 if (wd_ops)
195 wd_ops->unreserve(); 195 wd_ops->unreserve();
@@ -213,7 +213,7 @@ void enable_lapic_nmi_watchdog(void)
213 return; 213 return;
214 } 214 }
215 215
216 on_each_cpu(setup_apic_nmi_watchdog, NULL, 0, 1); 216 on_each_cpu(setup_apic_nmi_watchdog, NULL, 1);
217 touch_nmi_watchdog(); 217 touch_nmi_watchdog();
218} 218}
219 219
diff --git a/arch/x86/kernel/cpuid.c b/arch/x86/kernel/cpuid.c
index daff52a62248..2de5fa2bbf77 100644
--- a/arch/x86/kernel/cpuid.c
+++ b/arch/x86/kernel/cpuid.c
@@ -33,6 +33,7 @@
33#include <linux/init.h> 33#include <linux/init.h>
34#include <linux/poll.h> 34#include <linux/poll.h>
35#include <linux/smp.h> 35#include <linux/smp.h>
36#include <linux/smp_lock.h>
36#include <linux/major.h> 37#include <linux/major.h>
37#include <linux/fs.h> 38#include <linux/fs.h>
38#include <linux/smp_lock.h> 39#include <linux/smp_lock.h>
@@ -95,7 +96,7 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
95 for (; count; count -= 16) { 96 for (; count; count -= 16) {
96 cmd.eax = pos; 97 cmd.eax = pos;
97 cmd.ecx = pos >> 32; 98 cmd.ecx = pos >> 32;
98 smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1, 1); 99 smp_call_function_single(cpu, cpuid_smp_cpuid, &cmd, 1);
99 if (copy_to_user(tmp, &cmd, 16)) 100 if (copy_to_user(tmp, &cmd, 16))
100 return -EFAULT; 101 return -EFAULT;
101 tmp += 16; 102 tmp += 16;
@@ -107,15 +108,23 @@ static ssize_t cpuid_read(struct file *file, char __user *buf,
107 108
108static int cpuid_open(struct inode *inode, struct file *file) 109static int cpuid_open(struct inode *inode, struct file *file)
109{ 110{
110 unsigned int cpu = iminor(file->f_path.dentry->d_inode); 111 unsigned int cpu;
111 struct cpuinfo_x86 *c = &cpu_data(cpu); 112 struct cpuinfo_x86 *c;
112 113 int ret = 0;
113 if (cpu >= NR_CPUS || !cpu_online(cpu)) 114
114 return -ENXIO; /* No such CPU */ 115 lock_kernel();
116
117 cpu = iminor(file->f_path.dentry->d_inode);
118 if (cpu >= NR_CPUS || !cpu_online(cpu)) {
119 ret = -ENXIO; /* No such CPU */
120 goto out;
121 }
122 c = &cpu_data(cpu);
115 if (c->cpuid_level < 0) 123 if (c->cpuid_level < 0)
116 return -EIO; /* CPUID not supported */ 124 ret = -EIO; /* CPUID not supported */
117 125out:
118 return 0; 126 unlock_kernel();
127 return ret;
119} 128}
120 129
121/* 130/*
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index d0335853ff52..28c29180b380 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -19,6 +19,7 @@
19#include <linux/mm.h> 19#include <linux/mm.h>
20#include <linux/pfn.h> 20#include <linux/pfn.h>
21#include <linux/suspend.h> 21#include <linux/suspend.h>
22#include <linux/firmware-map.h>
22 23
23#include <asm/pgtable.h> 24#include <asm/pgtable.h>
24#include <asm/page.h> 25#include <asm/page.h>
@@ -27,7 +28,22 @@
27#include <asm/setup.h> 28#include <asm/setup.h>
28#include <asm/trampoline.h> 29#include <asm/trampoline.h>
29 30
31/*
32 * The e820 map is the map that gets modified e.g. with command line parameters
33 * and that is also registered with modifications in the kernel resource tree
34 * with the iomem_resource as parent.
35 *
36 * The e820_saved is directly saved after the BIOS-provided memory map is
37 * copied. It doesn't get modified afterwards. It's registered for the
38 * /sys/firmware/memmap interface.
39 *
40 * That memory map is not modified and is used as base for kexec. The kexec'd
41 * kernel should get the same memory map as the firmware provides. Then the
42 * user can e.g. boot the original kernel with mem=1G while still booting the
43 * next kernel with full memory.
44 */
30struct e820map e820; 45struct e820map e820;
46struct e820map e820_saved;
31 47
32/* For PCI or other memory-mapped resources */ 48/* For PCI or other memory-mapped resources */
33unsigned long pci_mem_start = 0xaeedbabe; 49unsigned long pci_mem_start = 0xaeedbabe;
@@ -398,8 +414,9 @@ static int __init append_e820_map(struct e820entry *biosmap, int nr_map)
398 return __append_e820_map(biosmap, nr_map); 414 return __append_e820_map(biosmap, nr_map);
399} 415}
400 416
401u64 __init e820_update_range(u64 start, u64 size, unsigned old_type, 417static u64 __init e820_update_range_map(struct e820map *e820x, u64 start,
402 unsigned new_type) 418 u64 size, unsigned old_type,
419 unsigned new_type)
403{ 420{
404 int i; 421 int i;
405 u64 real_updated_size = 0; 422 u64 real_updated_size = 0;
@@ -410,7 +427,7 @@ u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
410 size = ULLONG_MAX - start; 427 size = ULLONG_MAX - start;
411 428
412 for (i = 0; i < e820.nr_map; i++) { 429 for (i = 0; i < e820.nr_map; i++) {
413 struct e820entry *ei = &e820.map[i]; 430 struct e820entry *ei = &e820x->map[i];
414 u64 final_start, final_end; 431 u64 final_start, final_end;
415 if (ei->type != old_type) 432 if (ei->type != old_type)
416 continue; 433 continue;
@@ -438,6 +455,19 @@ u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
438 return real_updated_size; 455 return real_updated_size;
439} 456}
440 457
458u64 __init e820_update_range(u64 start, u64 size, unsigned old_type,
459 unsigned new_type)
460{
461 return e820_update_range_map(&e820, start, size, old_type, new_type);
462}
463
464static u64 __init e820_update_range_saved(u64 start, u64 size,
465 unsigned old_type, unsigned new_type)
466{
467 return e820_update_range_map(&e820_saved, start, size, old_type,
468 new_type);
469}
470
441/* make e820 not cover the range */ 471/* make e820 not cover the range */
442u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type, 472u64 __init e820_remove_range(u64 start, u64 size, unsigned old_type,
443 int checktype) 473 int checktype)
@@ -487,6 +517,15 @@ void __init update_e820(void)
487 printk(KERN_INFO "modified physical RAM map:\n"); 517 printk(KERN_INFO "modified physical RAM map:\n");
488 e820_print_map("modified"); 518 e820_print_map("modified");
489} 519}
520static void __init update_e820_saved(void)
521{
522 int nr_map;
523
524 nr_map = e820_saved.nr_map;
525 if (sanitize_e820_map(e820_saved.map, ARRAY_SIZE(e820_saved.map), &nr_map))
526 return;
527 e820_saved.nr_map = nr_map;
528}
490#define MAX_GAP_END 0x100000000ull 529#define MAX_GAP_END 0x100000000ull
491/* 530/*
492 * Search for a gap in the e820 memory space from start_addr to end_addr. 531 * Search for a gap in the e820 memory space from start_addr to end_addr.
@@ -991,8 +1030,10 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
991 1030
992 addr = round_down(start + size - sizet, align); 1031 addr = round_down(start + size - sizet, align);
993 e820_update_range(addr, sizet, E820_RAM, E820_RESERVED); 1032 e820_update_range(addr, sizet, E820_RAM, E820_RESERVED);
1033 e820_update_range_saved(addr, sizet, E820_RAM, E820_RESERVED);
994 printk(KERN_INFO "update e820 for early_reserve_e820\n"); 1034 printk(KERN_INFO "update e820 for early_reserve_e820\n");
995 update_e820(); 1035 update_e820();
1036 update_e820_saved();
996 1037
997 return addr; 1038 return addr;
998} 1039}
@@ -1008,30 +1049,51 @@ u64 __init early_reserve_e820(u64 startt, u64 sizet, u64 align)
1008#endif 1049#endif
1009 1050
1010/* 1051/*
1011 * Last pfn which the user wants to use.
1012 */
1013unsigned long __initdata end_user_pfn = MAX_ARCH_PFN;
1014
1015/*
1016 * Find the highest page frame number we have available 1052 * Find the highest page frame number we have available
1017 */ 1053 */
1018unsigned long __init e820_end_of_ram(void) 1054static unsigned long __init e820_end_pfn(unsigned long limit_pfn, unsigned type)
1019{ 1055{
1020 unsigned long last_pfn; 1056 int i;
1057 unsigned long last_pfn = 0;
1021 unsigned long max_arch_pfn = MAX_ARCH_PFN; 1058 unsigned long max_arch_pfn = MAX_ARCH_PFN;
1022 1059
1023 last_pfn = find_max_pfn_with_active_regions(); 1060 for (i = 0; i < e820.nr_map; i++) {
1061 struct e820entry *ei = &e820.map[i];
1062 unsigned long start_pfn;
1063 unsigned long end_pfn;
1064
1065 if (ei->type != type)
1066 continue;
1067
1068 start_pfn = ei->addr >> PAGE_SHIFT;
1069 end_pfn = (ei->addr + ei->size) >> PAGE_SHIFT;
1070
1071 if (start_pfn >= limit_pfn)
1072 continue;
1073 if (end_pfn > limit_pfn) {
1074 last_pfn = limit_pfn;
1075 break;
1076 }
1077 if (end_pfn > last_pfn)
1078 last_pfn = end_pfn;
1079 }
1024 1080
1025 if (last_pfn > max_arch_pfn) 1081 if (last_pfn > max_arch_pfn)
1026 last_pfn = max_arch_pfn; 1082 last_pfn = max_arch_pfn;
1027 if (last_pfn > end_user_pfn)
1028 last_pfn = end_user_pfn;
1029 1083
1030 printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n", 1084 printk(KERN_INFO "last_pfn = %#lx max_arch_pfn = %#lx\n",
1031 last_pfn, max_arch_pfn); 1085 last_pfn, max_arch_pfn);
1032 return last_pfn; 1086 return last_pfn;
1033} 1087}
1088unsigned long __init e820_end_of_ram_pfn(void)
1089{
1090 return e820_end_pfn(MAX_ARCH_PFN, E820_RAM);
1091}
1034 1092
1093unsigned long __init e820_end_of_low_ram_pfn(void)
1094{
1095 return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM);
1096}
1035/* 1097/*
1036 * Finds an active region in the address range from start_pfn to last_pfn and 1098 * Finds an active region in the address range from start_pfn to last_pfn and
1037 * returns its range in ei_startpfn and ei_endpfn for the e820 entry. 1099 * returns its range in ei_startpfn and ei_endpfn for the e820 entry.
@@ -1062,12 +1124,6 @@ int __init e820_find_active_region(const struct e820entry *ei,
1062 if (*ei_endpfn > last_pfn) 1124 if (*ei_endpfn > last_pfn)
1063 *ei_endpfn = last_pfn; 1125 *ei_endpfn = last_pfn;
1064 1126
1065 /* Obey end_user_pfn to save on memmap */
1066 if (*ei_startpfn >= end_user_pfn)
1067 return 0;
1068 if (*ei_endpfn > end_user_pfn)
1069 *ei_endpfn = end_user_pfn;
1070
1071 return 1; 1127 return 1;
1072} 1128}
1073 1129
@@ -1113,6 +1169,8 @@ static void early_panic(char *msg)
1113 panic(msg); 1169 panic(msg);
1114} 1170}
1115 1171
1172static int userdef __initdata;
1173
1116/* "mem=nopentium" disables the 4MB page tables. */ 1174/* "mem=nopentium" disables the 4MB page tables. */
1117static int __init parse_memopt(char *p) 1175static int __init parse_memopt(char *p)
1118{ 1176{
@@ -1128,22 +1186,22 @@ static int __init parse_memopt(char *p)
1128 } 1186 }
1129#endif 1187#endif
1130 1188
1189 userdef = 1;
1131 mem_size = memparse(p, &p); 1190 mem_size = memparse(p, &p);
1132 end_user_pfn = mem_size>>PAGE_SHIFT; 1191 e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
1133 e820_update_range(mem_size, ULLONG_MAX - mem_size,
1134 E820_RAM, E820_RESERVED);
1135 1192
1136 return 0; 1193 return 0;
1137} 1194}
1138early_param("mem", parse_memopt); 1195early_param("mem", parse_memopt);
1139 1196
1140static int userdef __initdata;
1141
1142static int __init parse_memmap_opt(char *p) 1197static int __init parse_memmap_opt(char *p)
1143{ 1198{
1144 char *oldp; 1199 char *oldp;
1145 u64 start_at, mem_size; 1200 u64 start_at, mem_size;
1146 1201
1202 if (!p)
1203 return -EINVAL;
1204
1147 if (!strcmp(p, "exactmap")) { 1205 if (!strcmp(p, "exactmap")) {
1148#ifdef CONFIG_CRASH_DUMP 1206#ifdef CONFIG_CRASH_DUMP
1149 /* 1207 /*
@@ -1151,9 +1209,7 @@ static int __init parse_memmap_opt(char *p)
1151 * the real mem size before original memory map is 1209 * the real mem size before original memory map is
1152 * reset. 1210 * reset.
1153 */ 1211 */
1154 e820_register_active_regions(0, 0, -1UL); 1212 saved_max_pfn = e820_end_of_ram_pfn();
1155 saved_max_pfn = e820_end_of_ram();
1156 remove_all_active_ranges();
1157#endif 1213#endif
1158 e820.nr_map = 0; 1214 e820.nr_map = 0;
1159 userdef = 1; 1215 userdef = 1;
@@ -1175,11 +1231,9 @@ static int __init parse_memmap_opt(char *p)
1175 } else if (*p == '$') { 1231 } else if (*p == '$') {
1176 start_at = memparse(p+1, &p); 1232 start_at = memparse(p+1, &p);
1177 e820_add_region(start_at, mem_size, E820_RESERVED); 1233 e820_add_region(start_at, mem_size, E820_RESERVED);
1178 } else { 1234 } else
1179 end_user_pfn = (mem_size >> PAGE_SHIFT); 1235 e820_remove_range(mem_size, ULLONG_MAX - mem_size, E820_RAM, 1);
1180 e820_update_range(mem_size, ULLONG_MAX - mem_size, 1236
1181 E820_RAM, E820_RESERVED);
1182 }
1183 return *p == '\0' ? 0 : -EINVAL; 1237 return *p == '\0' ? 0 : -EINVAL;
1184} 1238}
1185early_param("memmap", parse_memmap_opt); 1239early_param("memmap", parse_memmap_opt);
@@ -1198,6 +1252,17 @@ void __init finish_e820_parsing(void)
1198 } 1252 }
1199} 1253}
1200 1254
1255static inline const char *e820_type_to_string(int e820_type)
1256{
1257 switch (e820_type) {
1258 case E820_RESERVED_KERN:
1259 case E820_RAM: return "System RAM";
1260 case E820_ACPI: return "ACPI Tables";
1261 case E820_NVS: return "ACPI Non-volatile Storage";
1262 default: return "reserved";
1263 }
1264}
1265
1201/* 1266/*
1202 * Mark e820 reserved areas as busy for the resource manager. 1267 * Mark e820 reserved areas as busy for the resource manager.
1203 */ 1268 */
@@ -1209,13 +1274,6 @@ void __init e820_reserve_resources(void)
1209 1274
1210 res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); 1275 res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
1211 for (i = 0; i < e820.nr_map; i++) { 1276 for (i = 0; i < e820.nr_map; i++) {
1212 switch (e820.map[i].type) {
1213 case E820_RESERVED_KERN:
1214 case E820_RAM: res->name = "System RAM"; break;
1215 case E820_ACPI: res->name = "ACPI Tables"; break;
1216 case E820_NVS: res->name = "ACPI Non-volatile Storage"; break;
1217 default: res->name = "reserved";
1218 }
1219 end = e820.map[i].addr + e820.map[i].size - 1; 1277 end = e820.map[i].addr + e820.map[i].size - 1;
1220#ifndef CONFIG_RESOURCES_64BIT 1278#ifndef CONFIG_RESOURCES_64BIT
1221 if (end > 0x100000000ULL) { 1279 if (end > 0x100000000ULL) {
@@ -1223,6 +1281,7 @@ void __init e820_reserve_resources(void)
1223 continue; 1281 continue;
1224 } 1282 }
1225#endif 1283#endif
1284 res->name = e820_type_to_string(e820.map[i].type);
1226 res->start = e820.map[i].addr; 1285 res->start = e820.map[i].addr;
1227 res->end = end; 1286 res->end = end;
1228 1287
@@ -1230,8 +1289,20 @@ void __init e820_reserve_resources(void)
1230 insert_resource(&iomem_resource, res); 1289 insert_resource(&iomem_resource, res);
1231 res++; 1290 res++;
1232 } 1291 }
1292
1293 for (i = 0; i < e820_saved.nr_map; i++) {
1294 struct e820entry *entry = &e820_saved.map[i];
1295 firmware_map_add_early(entry->addr,
1296 entry->addr + entry->size - 1,
1297 e820_type_to_string(entry->type));
1298 }
1233} 1299}
1234 1300
1301/*
1302 * Non-standard memory setup can be specified via this quirk:
1303 */
1304char * (*arch_memory_setup_quirk)(void);
1305
1235char *__init default_machine_specific_memory_setup(void) 1306char *__init default_machine_specific_memory_setup(void)
1236{ 1307{
1237 char *who = "BIOS-e820"; 1308 char *who = "BIOS-e820";
@@ -1272,6 +1343,12 @@ char *__init default_machine_specific_memory_setup(void)
1272 1343
1273char *__init __attribute__((weak)) machine_specific_memory_setup(void) 1344char *__init __attribute__((weak)) machine_specific_memory_setup(void)
1274{ 1345{
1346 if (arch_memory_setup_quirk) {
1347 char *who = arch_memory_setup_quirk();
1348
1349 if (who)
1350 return who;
1351 }
1275 return default_machine_specific_memory_setup(); 1352 return default_machine_specific_memory_setup();
1276} 1353}
1277 1354
@@ -1283,8 +1360,12 @@ char * __init __attribute__((weak)) memory_setup(void)
1283 1360
1284void __init setup_memory_map(void) 1361void __init setup_memory_map(void)
1285{ 1362{
1363 char *who;
1364
1365 who = memory_setup();
1366 memcpy(&e820_saved, &e820, sizeof(struct e820map));
1286 printk(KERN_INFO "BIOS-provided physical RAM map:\n"); 1367 printk(KERN_INFO "BIOS-provided physical RAM map:\n");
1287 e820_print_map(memory_setup()); 1368 e820_print_map(who);
1288} 1369}
1289 1370
1290#ifdef CONFIG_X86_64 1371#ifdef CONFIG_X86_64
diff --git a/arch/x86/kernel/early-quirks.c b/arch/x86/kernel/early-quirks.c
index a4665f37cfc5..a0e11c0cc872 100644
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@@ -120,7 +120,18 @@ static struct chipset early_qrk[] __initdata = {
120 {} 120 {}
121}; 121};
122 122
123static void __init check_dev_quirk(int num, int slot, int func) 123/**
124 * check_dev_quirk - apply early quirks to a given PCI device
125 * @num: bus number
126 * @slot: slot number
127 * @func: PCI function
128 *
129 * Check the vendor & device ID against the early quirks table.
130 *
131 * If the device is single function, let early_quirks() know so we don't
132 * poke at this device again.
133 */
134static int __init check_dev_quirk(int num, int slot, int func)
124{ 135{
125 u16 class; 136 u16 class;
126 u16 vendor; 137 u16 vendor;
@@ -131,7 +142,7 @@ static void __init check_dev_quirk(int num, int slot, int func)
131 class = read_pci_config_16(num, slot, func, PCI_CLASS_DEVICE); 142 class = read_pci_config_16(num, slot, func, PCI_CLASS_DEVICE);
132 143
133 if (class == 0xffff) 144 if (class == 0xffff)
134 return; 145 return -1; /* no class, treat as single function */
135 146
136 vendor = read_pci_config_16(num, slot, func, PCI_VENDOR_ID); 147 vendor = read_pci_config_16(num, slot, func, PCI_VENDOR_ID);
137 148
@@ -154,7 +165,9 @@ static void __init check_dev_quirk(int num, int slot, int func)
154 type = read_pci_config_byte(num, slot, func, 165 type = read_pci_config_byte(num, slot, func,
155 PCI_HEADER_TYPE); 166 PCI_HEADER_TYPE);
156 if (!(type & 0x80)) 167 if (!(type & 0x80))
157 return; 168 return -1;
169
170 return 0;
158} 171}
159 172
160void __init early_quirks(void) 173void __init early_quirks(void)
@@ -167,6 +180,9 @@ void __init early_quirks(void)
167 /* Poor man's PCI discovery */ 180 /* Poor man's PCI discovery */
168 for (num = 0; num < 32; num++) 181 for (num = 0; num < 32; num++)
169 for (slot = 0; slot < 32; slot++) 182 for (slot = 0; slot < 32; slot++)
170 for (func = 0; func < 8; func++) 183 for (func = 0; func < 8; func++) {
171 check_dev_quirk(num, slot, func); 184 /* Only probe function 0 on single fn devices */
185 if (check_dev_quirk(num, slot, func))
186 break;
187 }
172} 188}
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 643fd861b724..ff9e7350da54 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -196,7 +196,7 @@ static struct console simnow_console = {
196static struct console *early_console = &early_vga_console; 196static struct console *early_console = &early_vga_console;
197static int early_console_initialized; 197static int early_console_initialized;
198 198
199void early_printk(const char *fmt, ...) 199asmlinkage void early_printk(const char *fmt, ...)
200{ 200{
201 char buf[512]; 201 char buf[512];
202 int n; 202 int n;
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 94382faeadb6..06cc8d4254b1 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -473,7 +473,7 @@ void __init efi_enter_virtual_mode(void)
473 size = md->num_pages << EFI_PAGE_SHIFT; 473 size = md->num_pages << EFI_PAGE_SHIFT;
474 end = md->phys_addr + size; 474 end = md->phys_addr + size;
475 475
476 if (PFN_UP(end) <= max_pfn_mapped) 476 if (PFN_UP(end) <= max_low_pfn_mapped)
477 va = __va(md->phys_addr); 477 va = __va(md->phys_addr);
478 else 478 else
479 va = efi_ioremap(md->phys_addr, size); 479 va = efi_ioremap(md->phys_addr, size);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 53393c306e11..6bc07f0f1202 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -51,6 +51,7 @@
51#include <asm/percpu.h> 51#include <asm/percpu.h>
52#include <asm/dwarf2.h> 52#include <asm/dwarf2.h>
53#include <asm/processor-flags.h> 53#include <asm/processor-flags.h>
54#include <asm/ftrace.h>
54#include <asm/irq_vectors.h> 55#include <asm/irq_vectors.h>
55 56
56/* 57/*
@@ -1024,6 +1025,7 @@ ENTRY(xen_sysenter_target)
1024 RING0_INT_FRAME 1025 RING0_INT_FRAME
1025 addl $5*4, %esp /* remove xen-provided frame */ 1026 addl $5*4, %esp /* remove xen-provided frame */
1026 jmp sysenter_past_esp 1027 jmp sysenter_past_esp
1028 CFI_ENDPROC
1027 1029
1028ENTRY(xen_hypervisor_callback) 1030ENTRY(xen_hypervisor_callback)
1029 CFI_STARTPROC 1031 CFI_STARTPROC
@@ -1110,6 +1112,77 @@ ENDPROC(xen_failsafe_callback)
1110 1112
1111#endif /* CONFIG_XEN */ 1113#endif /* CONFIG_XEN */
1112 1114
1115#ifdef CONFIG_FTRACE
1116#ifdef CONFIG_DYNAMIC_FTRACE
1117
1118ENTRY(mcount)
1119 pushl %eax
1120 pushl %ecx
1121 pushl %edx
1122 movl 0xc(%esp), %eax
1123 subl $MCOUNT_INSN_SIZE, %eax
1124
1125.globl mcount_call
1126mcount_call:
1127 call ftrace_stub
1128
1129 popl %edx
1130 popl %ecx
1131 popl %eax
1132
1133 ret
1134END(mcount)
1135
1136ENTRY(ftrace_caller)
1137 pushl %eax
1138 pushl %ecx
1139 pushl %edx
1140 movl 0xc(%esp), %eax
1141 movl 0x4(%ebp), %edx
1142 subl $MCOUNT_INSN_SIZE, %eax
1143
1144.globl ftrace_call
1145ftrace_call:
1146 call ftrace_stub
1147
1148 popl %edx
1149 popl %ecx
1150 popl %eax
1151
1152.globl ftrace_stub
1153ftrace_stub:
1154 ret
1155END(ftrace_caller)
1156
1157#else /* ! CONFIG_DYNAMIC_FTRACE */
1158
1159ENTRY(mcount)
1160 cmpl $ftrace_stub, ftrace_trace_function
1161 jnz trace
1162.globl ftrace_stub
1163ftrace_stub:
1164 ret
1165
1166 /* taken from glibc */
1167trace:
1168 pushl %eax
1169 pushl %ecx
1170 pushl %edx
1171 movl 0xc(%esp), %eax
1172 movl 0x4(%ebp), %edx
1173 subl $MCOUNT_INSN_SIZE, %eax
1174
1175 call *ftrace_trace_function
1176
1177 popl %edx
1178 popl %ecx
1179 popl %eax
1180
1181 jmp ftrace_stub
1182END(mcount)
1183#endif /* CONFIG_DYNAMIC_FTRACE */
1184#endif /* CONFIG_FTRACE */
1185
1113.section .rodata,"a" 1186.section .rodata,"a"
1114#include "syscall_table_32.S" 1187#include "syscall_table_32.S"
1115 1188
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 07d69f262337..ae63e584c340 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -51,9 +51,115 @@
51#include <asm/page.h> 51#include <asm/page.h>
52#include <asm/irqflags.h> 52#include <asm/irqflags.h>
53#include <asm/paravirt.h> 53#include <asm/paravirt.h>
54#include <asm/ftrace.h>
54 55
55 .code64 56 .code64
56 57
58#ifdef CONFIG_FTRACE
59#ifdef CONFIG_DYNAMIC_FTRACE
60ENTRY(mcount)
61
62 subq $0x38, %rsp
63 movq %rax, (%rsp)
64 movq %rcx, 8(%rsp)
65 movq %rdx, 16(%rsp)
66 movq %rsi, 24(%rsp)
67 movq %rdi, 32(%rsp)
68 movq %r8, 40(%rsp)
69 movq %r9, 48(%rsp)
70
71 movq 0x38(%rsp), %rdi
72 subq $MCOUNT_INSN_SIZE, %rdi
73
74.globl mcount_call
75mcount_call:
76 call ftrace_stub
77
78 movq 48(%rsp), %r9
79 movq 40(%rsp), %r8
80 movq 32(%rsp), %rdi
81 movq 24(%rsp), %rsi
82 movq 16(%rsp), %rdx
83 movq 8(%rsp), %rcx
84 movq (%rsp), %rax
85 addq $0x38, %rsp
86
87 retq
88END(mcount)
89
90ENTRY(ftrace_caller)
91
92 /* taken from glibc */
93 subq $0x38, %rsp
94 movq %rax, (%rsp)
95 movq %rcx, 8(%rsp)
96 movq %rdx, 16(%rsp)
97 movq %rsi, 24(%rsp)
98 movq %rdi, 32(%rsp)
99 movq %r8, 40(%rsp)
100 movq %r9, 48(%rsp)
101
102 movq 0x38(%rsp), %rdi
103 movq 8(%rbp), %rsi
104 subq $MCOUNT_INSN_SIZE, %rdi
105
106.globl ftrace_call
107ftrace_call:
108 call ftrace_stub
109
110 movq 48(%rsp), %r9
111 movq 40(%rsp), %r8
112 movq 32(%rsp), %rdi
113 movq 24(%rsp), %rsi
114 movq 16(%rsp), %rdx
115 movq 8(%rsp), %rcx
116 movq (%rsp), %rax
117 addq $0x38, %rsp
118
119.globl ftrace_stub
120ftrace_stub:
121 retq
122END(ftrace_caller)
123
124#else /* ! CONFIG_DYNAMIC_FTRACE */
125ENTRY(mcount)
126 cmpq $ftrace_stub, ftrace_trace_function
127 jnz trace
128.globl ftrace_stub
129ftrace_stub:
130 retq
131
132trace:
133 /* taken from glibc */
134 subq $0x38, %rsp
135 movq %rax, (%rsp)
136 movq %rcx, 8(%rsp)
137 movq %rdx, 16(%rsp)
138 movq %rsi, 24(%rsp)
139 movq %rdi, 32(%rsp)
140 movq %r8, 40(%rsp)
141 movq %r9, 48(%rsp)
142
143 movq 0x38(%rsp), %rdi
144 movq 8(%rbp), %rsi
145 subq $MCOUNT_INSN_SIZE, %rdi
146
147 call *ftrace_trace_function
148
149 movq 48(%rsp), %r9
150 movq 40(%rsp), %r8
151 movq 32(%rsp), %rdi
152 movq 24(%rsp), %rsi
153 movq 16(%rsp), %rdx
154 movq 8(%rsp), %rcx
155 movq (%rsp), %rax
156 addq $0x38, %rsp
157
158 jmp ftrace_stub
159END(mcount)
160#endif /* CONFIG_DYNAMIC_FTRACE */
161#endif /* CONFIG_FTRACE */
162
57#ifndef CONFIG_PREEMPT 163#ifndef CONFIG_PREEMPT
58#define retint_kernel retint_restore_args 164#define retint_kernel retint_restore_args
59#endif 165#endif
@@ -168,13 +274,13 @@ ENTRY(ret_from_fork)
168 CFI_ADJUST_CFA_OFFSET -4 274 CFI_ADJUST_CFA_OFFSET -4
169 call schedule_tail 275 call schedule_tail
170 GET_THREAD_INFO(%rcx) 276 GET_THREAD_INFO(%rcx)
171 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),threadinfo_flags(%rcx) 277 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT),TI_flags(%rcx)
172 jnz rff_trace 278 jnz rff_trace
173rff_action: 279rff_action:
174 RESTORE_REST 280 RESTORE_REST
175 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread? 281 testl $3,CS-ARGOFFSET(%rsp) # from kernel_thread?
176 je int_ret_from_sys_call 282 je int_ret_from_sys_call
177 testl $_TIF_IA32,threadinfo_flags(%rcx) 283 testl $_TIF_IA32,TI_flags(%rcx)
178 jnz int_ret_from_sys_call 284 jnz int_ret_from_sys_call
179 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET 285 RESTORE_TOP_OF_STACK %rdi,ARGOFFSET
180 jmp ret_from_sys_call 286 jmp ret_from_sys_call
@@ -243,7 +349,8 @@ ENTRY(system_call_after_swapgs)
243 movq %rcx,RIP-ARGOFFSET(%rsp) 349 movq %rcx,RIP-ARGOFFSET(%rsp)
244 CFI_REL_OFFSET rip,RIP-ARGOFFSET 350 CFI_REL_OFFSET rip,RIP-ARGOFFSET
245 GET_THREAD_INFO(%rcx) 351 GET_THREAD_INFO(%rcx)
246 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP),threadinfo_flags(%rcx) 352 testl $(_TIF_SYSCALL_TRACE|_TIF_SYSCALL_AUDIT|_TIF_SECCOMP), \
353 TI_flags(%rcx)
247 jnz tracesys 354 jnz tracesys
248 cmpq $__NR_syscall_max,%rax 355 cmpq $__NR_syscall_max,%rax
249 ja badsys 356 ja badsys
@@ -262,7 +369,7 @@ sysret_check:
262 GET_THREAD_INFO(%rcx) 369 GET_THREAD_INFO(%rcx)
263 DISABLE_INTERRUPTS(CLBR_NONE) 370 DISABLE_INTERRUPTS(CLBR_NONE)
264 TRACE_IRQS_OFF 371 TRACE_IRQS_OFF
265 movl threadinfo_flags(%rcx),%edx 372 movl TI_flags(%rcx),%edx
266 andl %edi,%edx 373 andl %edi,%edx
267 jnz sysret_careful 374 jnz sysret_careful
268 CFI_REMEMBER_STATE 375 CFI_REMEMBER_STATE
@@ -305,7 +412,7 @@ sysret_signal:
305 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1 412 leaq -ARGOFFSET(%rsp),%rdi # &pt_regs -> arg1
306 xorl %esi,%esi # oldset -> arg2 413 xorl %esi,%esi # oldset -> arg2
307 call ptregscall_common 414 call ptregscall_common
3081: movl $_TIF_NEED_RESCHED,%edi 4151: movl $_TIF_WORK_MASK,%edi
309 /* Use IRET because user could have changed frame. This 416 /* Use IRET because user could have changed frame. This
310 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */ 417 works because ptregscall_common has called FIXUP_TOP_OF_STACK. */
311 DISABLE_INTERRUPTS(CLBR_NONE) 418 DISABLE_INTERRUPTS(CLBR_NONE)
@@ -347,10 +454,10 @@ int_ret_from_sys_call:
347int_with_check: 454int_with_check:
348 LOCKDEP_SYS_EXIT_IRQ 455 LOCKDEP_SYS_EXIT_IRQ
349 GET_THREAD_INFO(%rcx) 456 GET_THREAD_INFO(%rcx)
350 movl threadinfo_flags(%rcx),%edx 457 movl TI_flags(%rcx),%edx
351 andl %edi,%edx 458 andl %edi,%edx
352 jnz int_careful 459 jnz int_careful
353 andl $~TS_COMPAT,threadinfo_status(%rcx) 460 andl $~TS_COMPAT,TI_status(%rcx)
354 jmp retint_swapgs 461 jmp retint_swapgs
355 462
356 /* Either reschedule or signal or syscall exit tracking needed. */ 463 /* Either reschedule or signal or syscall exit tracking needed. */
@@ -393,7 +500,7 @@ int_signal:
393 movq %rsp,%rdi # &ptregs -> arg1 500 movq %rsp,%rdi # &ptregs -> arg1
394 xorl %esi,%esi # oldset -> arg2 501 xorl %esi,%esi # oldset -> arg2
395 call do_notify_resume 502 call do_notify_resume
3961: movl $_TIF_NEED_RESCHED,%edi 5031: movl $_TIF_WORK_MASK,%edi
397int_restore_rest: 504int_restore_rest:
398 RESTORE_REST 505 RESTORE_REST
399 DISABLE_INTERRUPTS(CLBR_NONE) 506 DISABLE_INTERRUPTS(CLBR_NONE)
@@ -558,7 +665,7 @@ retint_with_reschedule:
558 movl $_TIF_WORK_MASK,%edi 665 movl $_TIF_WORK_MASK,%edi
559retint_check: 666retint_check:
560 LOCKDEP_SYS_EXIT_IRQ 667 LOCKDEP_SYS_EXIT_IRQ
561 movl threadinfo_flags(%rcx),%edx 668 movl TI_flags(%rcx),%edx
562 andl %edi,%edx 669 andl %edi,%edx
563 CFI_REMEMBER_STATE 670 CFI_REMEMBER_STATE
564 jnz retint_careful 671 jnz retint_careful
@@ -646,17 +753,16 @@ retint_signal:
646 RESTORE_REST 753 RESTORE_REST
647 DISABLE_INTERRUPTS(CLBR_NONE) 754 DISABLE_INTERRUPTS(CLBR_NONE)
648 TRACE_IRQS_OFF 755 TRACE_IRQS_OFF
649 movl $_TIF_NEED_RESCHED,%edi
650 GET_THREAD_INFO(%rcx) 756 GET_THREAD_INFO(%rcx)
651 jmp retint_check 757 jmp retint_with_reschedule
652 758
653#ifdef CONFIG_PREEMPT 759#ifdef CONFIG_PREEMPT
654 /* Returning to kernel space. Check if we need preemption */ 760 /* Returning to kernel space. Check if we need preemption */
655 /* rcx: threadinfo. interrupts off. */ 761 /* rcx: threadinfo. interrupts off. */
656ENTRY(retint_kernel) 762ENTRY(retint_kernel)
657 cmpl $0,threadinfo_preempt_count(%rcx) 763 cmpl $0,TI_preempt_count(%rcx)
658 jnz retint_restore_args 764 jnz retint_restore_args
659 bt $TIF_NEED_RESCHED,threadinfo_flags(%rcx) 765 bt $TIF_NEED_RESCHED,TI_flags(%rcx)
660 jnc retint_restore_args 766 jnc retint_restore_args
661 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */ 767 bt $9,EFLAGS-ARGOFFSET(%rsp) /* interrupts off? */
662 jnc retint_restore_args 768 jnc retint_restore_args
@@ -710,6 +816,9 @@ END(invalidate_interrupt\num)
710ENTRY(call_function_interrupt) 816ENTRY(call_function_interrupt)
711 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt 817 apicinterrupt CALL_FUNCTION_VECTOR,smp_call_function_interrupt
712END(call_function_interrupt) 818END(call_function_interrupt)
819ENTRY(call_function_single_interrupt)
820 apicinterrupt CALL_FUNCTION_SINGLE_VECTOR,smp_call_function_single_interrupt
821END(call_function_single_interrupt)
713ENTRY(irq_move_cleanup_interrupt) 822ENTRY(irq_move_cleanup_interrupt)
714 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt 823 apicinterrupt IRQ_MOVE_CLEANUP_VECTOR,smp_irq_move_cleanup_interrupt
715END(irq_move_cleanup_interrupt) 824END(irq_move_cleanup_interrupt)
@@ -819,7 +928,7 @@ paranoid_restore\trace:
819 jmp irq_return 928 jmp irq_return
820paranoid_userspace\trace: 929paranoid_userspace\trace:
821 GET_THREAD_INFO(%rcx) 930 GET_THREAD_INFO(%rcx)
822 movl threadinfo_flags(%rcx),%ebx 931 movl TI_flags(%rcx),%ebx
823 andl $_TIF_WORK_MASK,%ebx 932 andl $_TIF_WORK_MASK,%ebx
824 jz paranoid_swapgs\trace 933 jz paranoid_swapgs\trace
825 movq %rsp,%rdi /* &pt_regs */ 934 movq %rsp,%rdi /* &pt_regs */
@@ -917,7 +1026,7 @@ error_exit:
917 testl %eax,%eax 1026 testl %eax,%eax
918 jne retint_kernel 1027 jne retint_kernel
919 LOCKDEP_SYS_EXIT_IRQ 1028 LOCKDEP_SYS_EXIT_IRQ
920 movl threadinfo_flags(%rcx),%edx 1029 movl TI_flags(%rcx),%edx
921 movl $_TIF_WORK_MASK,%edi 1030 movl $_TIF_WORK_MASK,%edi
922 andl %edi,%edx 1031 andl %edi,%edx
923 jnz retint_careful 1032 jnz retint_careful
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
new file mode 100644
index 000000000000..ab115cd15fdf
--- /dev/null
+++ b/arch/x86/kernel/ftrace.c
@@ -0,0 +1,141 @@
1/*
2 * Code for replacing ftrace calls with jumps.
3 *
4 * Copyright (C) 2007-2008 Steven Rostedt <srostedt@redhat.com>
5 *
6 * Thanks goes to Ingo Molnar, for suggesting the idea.
7 * Mathieu Desnoyers, for suggesting postponing the modifications.
8 * Arjan van de Ven, for keeping me straight, and explaining to me
9 * the dangers of modifying code on the run.
10 */
11
12#include <linux/spinlock.h>
13#include <linux/hardirq.h>
14#include <linux/ftrace.h>
15#include <linux/percpu.h>
16#include <linux/init.h>
17#include <linux/list.h>
18
19#include <asm/alternative.h>
20#include <asm/ftrace.h>
21
22
23/* Long is fine, even if it is only 4 bytes ;-) */
24static long *ftrace_nop;
25
26union ftrace_code_union {
27 char code[MCOUNT_INSN_SIZE];
28 struct {
29 char e8;
30 int offset;
31 } __attribute__((packed));
32};
33
34
35static int notrace ftrace_calc_offset(long ip, long addr)
36{
37 return (int)(addr - ip);
38}
39
40notrace unsigned char *ftrace_nop_replace(void)
41{
42 return (char *)ftrace_nop;
43}
44
45notrace unsigned char *ftrace_call_replace(unsigned long ip, unsigned long addr)
46{
47 static union ftrace_code_union calc;
48
49 calc.e8 = 0xe8;
50 calc.offset = ftrace_calc_offset(ip + MCOUNT_INSN_SIZE, addr);
51
52 /*
53 * No locking needed, this must be called via kstop_machine
54 * which in essence is like running on a uniprocessor machine.
55 */
56 return calc.code;
57}
58
59notrace int
60ftrace_modify_code(unsigned long ip, unsigned char *old_code,
61 unsigned char *new_code)
62{
63 unsigned replaced;
64 unsigned old = *(unsigned *)old_code; /* 4 bytes */
65 unsigned new = *(unsigned *)new_code; /* 4 bytes */
66 unsigned char newch = new_code[4];
67 int faulted = 0;
68
69 /*
70 * Note: Due to modules and __init, code can
71 * disappear and change, we need to protect against faulting
72 * as well as code changing.
73 *
74 * No real locking needed, this code is run through
75 * kstop_machine.
76 */
77 asm volatile (
78 "1: lock\n"
79 " cmpxchg %3, (%2)\n"
80 " jnz 2f\n"
81 " movb %b4, 4(%2)\n"
82 "2:\n"
83 ".section .fixup, \"ax\"\n"
84 "3: movl $1, %0\n"
85 " jmp 2b\n"
86 ".previous\n"
87 _ASM_EXTABLE(1b, 3b)
88 : "=r"(faulted), "=a"(replaced)
89 : "r"(ip), "r"(new), "c"(newch),
90 "0"(faulted), "a"(old)
91 : "memory");
92 sync_core();
93
94 if (replaced != old && replaced != new)
95 faulted = 2;
96
97 return faulted;
98}
99
100notrace int ftrace_update_ftrace_func(ftrace_func_t func)
101{
102 unsigned long ip = (unsigned long)(&ftrace_call);
103 unsigned char old[MCOUNT_INSN_SIZE], *new;
104 int ret;
105
106 memcpy(old, &ftrace_call, MCOUNT_INSN_SIZE);
107 new = ftrace_call_replace(ip, (unsigned long)func);
108 ret = ftrace_modify_code(ip, old, new);
109
110 return ret;
111}
112
113notrace int ftrace_mcount_set(unsigned long *data)
114{
115 unsigned long ip = (long)(&mcount_call);
116 unsigned long *addr = data;
117 unsigned char old[MCOUNT_INSN_SIZE], *new;
118
119 /*
120 * Replace the mcount stub with a pointer to the
121 * ip recorder function.
122 */
123 memcpy(old, &mcount_call, MCOUNT_INSN_SIZE);
124 new = ftrace_call_replace(ip, *addr);
125 *addr = ftrace_modify_code(ip, old, new);
126
127 return 0;
128}
129
130int __init ftrace_dyn_arch_init(void *data)
131{
132 const unsigned char *const *noptable = find_nop_table();
133
134 /* This is running in kstop_machine */
135
136 ftrace_mcount_set(data);
137
138 ftrace_nop = (unsigned long *)noptable[MCOUNT_INSN_SIZE];
139
140 return 0;
141}
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index 45e84acca8a9..711f11c30b06 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -8,6 +8,7 @@
8 * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved. 8 * Copyright (C) 2007-2008 Silicon Graphics, Inc. All rights reserved.
9 */ 9 */
10 10
11#include <linux/kernel.h>
11#include <linux/threads.h> 12#include <linux/threads.h>
12#include <linux/cpumask.h> 13#include <linux/cpumask.h>
13#include <linux/string.h> 14#include <linux/string.h>
@@ -20,6 +21,7 @@
20#include <asm/smp.h> 21#include <asm/smp.h>
21#include <asm/ipi.h> 22#include <asm/ipi.h>
22#include <asm/genapic.h> 23#include <asm/genapic.h>
24#include <asm/pgtable.h>
23#include <asm/uv/uv_mmrs.h> 25#include <asm/uv/uv_mmrs.h>
24#include <asm/uv/uv_hub.h> 26#include <asm/uv/uv_hub.h>
25 27
@@ -208,14 +210,79 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size)
208 BUG(); 210 BUG();
209} 211}
210 212
213static __init void map_low_mmrs(void)
214{
215 init_extra_mapping_uc(UV_GLOBAL_MMR32_BASE, UV_GLOBAL_MMR32_SIZE);
216 init_extra_mapping_uc(UV_LOCAL_MMR_BASE, UV_LOCAL_MMR_SIZE);
217}
218
219enum map_type {map_wb, map_uc};
220
221static void map_high(char *id, unsigned long base, int shift, enum map_type map_type)
222{
223 unsigned long bytes, paddr;
224
225 paddr = base << shift;
226 bytes = (1UL << shift);
227 printk(KERN_INFO "UV: Map %s_HI 0x%lx - 0x%lx\n", id, paddr,
228 paddr + bytes);
229 if (map_type == map_uc)
230 init_extra_mapping_uc(paddr, bytes);
231 else
232 init_extra_mapping_wb(paddr, bytes);
233
234}
235static __init void map_gru_high(int max_pnode)
236{
237 union uvh_rh_gam_gru_overlay_config_mmr_u gru;
238 int shift = UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT;
239
240 gru.v = uv_read_local_mmr(UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR);
241 if (gru.s.enable)
242 map_high("GRU", gru.s.base, shift, map_wb);
243}
244
245static __init void map_config_high(int max_pnode)
246{
247 union uvh_rh_gam_cfg_overlay_config_mmr_u cfg;
248 int shift = UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR_BASE_SHFT;
249
250 cfg.v = uv_read_local_mmr(UVH_RH_GAM_CFG_OVERLAY_CONFIG_MMR);
251 if (cfg.s.enable)
252 map_high("CONFIG", cfg.s.base, shift, map_uc);
253}
254
255static __init void map_mmr_high(int max_pnode)
256{
257 union uvh_rh_gam_mmr_overlay_config_mmr_u mmr;
258 int shift = UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR_BASE_SHFT;
259
260 mmr.v = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR);
261 if (mmr.s.enable)
262 map_high("MMR", mmr.s.base, shift, map_uc);
263}
264
265static __init void map_mmioh_high(int max_pnode)
266{
267 union uvh_rh_gam_mmioh_overlay_config_mmr_u mmioh;
268 int shift = UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT;
269
270 mmioh.v = uv_read_local_mmr(UVH_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR);
271 if (mmioh.s.enable)
272 map_high("MMIOH", mmioh.s.base, shift, map_uc);
273}
274
211static __init void uv_system_init(void) 275static __init void uv_system_init(void)
212{ 276{
213 union uvh_si_addr_map_config_u m_n_config; 277 union uvh_si_addr_map_config_u m_n_config;
214 union uvh_node_id_u node_id; 278 union uvh_node_id_u node_id;
215 unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size; 279 unsigned long gnode_upper, lowmem_redir_base, lowmem_redir_size;
216 int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val; 280 int bytes, nid, cpu, lcpu, pnode, blade, i, j, m_val, n_val;
281 int max_pnode = 0;
217 unsigned long mmr_base, present; 282 unsigned long mmr_base, present;
218 283
284 map_low_mmrs();
285
219 m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG); 286 m_n_config.v = uv_read_local_mmr(UVH_SI_ADDR_MAP_CONFIG);
220 m_val = m_n_config.s.m_skt; 287 m_val = m_n_config.s.m_skt;
221 n_val = m_n_config.s.n_skt; 288 n_val = m_n_config.s.n_skt;
@@ -281,12 +348,18 @@ static __init void uv_system_init(void)
281 uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */ 348 uv_cpu_hub_info(cpu)->coherency_domain_number = 0;/* ZZZ */
282 uv_node_to_blade[nid] = blade; 349 uv_node_to_blade[nid] = blade;
283 uv_cpu_to_blade[cpu] = blade; 350 uv_cpu_to_blade[cpu] = blade;
351 max_pnode = max(pnode, max_pnode);
284 352
285 printk(KERN_DEBUG "UV cpu %d, apicid 0x%x, pnode %d, nid %d, " 353 printk(KERN_DEBUG "UV: cpu %d, apicid 0x%x, pnode %d, nid %d, "
286 "lcpu %d, blade %d\n", 354 "lcpu %d, blade %d\n",
287 cpu, per_cpu(x86_cpu_to_apicid, cpu), pnode, nid, 355 cpu, per_cpu(x86_cpu_to_apicid, cpu), pnode, nid,
288 lcpu, blade); 356 lcpu, blade);
289 } 357 }
358
359 map_gru_high(max_pnode);
360 map_mmr_high(max_pnode);
361 map_config_high(max_pnode);
362 map_mmioh_high(max_pnode);
290} 363}
291 364
292/* 365/*
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index ea230ec69057..0ea6a19bfdfe 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -36,26 +36,15 @@ static inline void hpet_writel(unsigned long d, unsigned long a)
36} 36}
37 37
38#ifdef CONFIG_X86_64 38#ifdef CONFIG_X86_64
39
40#include <asm/pgtable.h> 39#include <asm/pgtable.h>
41 40#endif
42static inline void hpet_set_mapping(void)
43{
44 set_fixmap_nocache(FIX_HPET_BASE, hpet_address);
45 __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
46 hpet_virt_address = (void __iomem *)fix_to_virt(FIX_HPET_BASE);
47}
48
49static inline void hpet_clear_mapping(void)
50{
51 hpet_virt_address = NULL;
52}
53
54#else
55 41
56static inline void hpet_set_mapping(void) 42static inline void hpet_set_mapping(void)
57{ 43{
58 hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE); 44 hpet_virt_address = ioremap_nocache(hpet_address, HPET_MMAP_SIZE);
45#ifdef CONFIG_X86_64
46 __set_fixmap(VSYSCALL_HPET, hpet_address, PAGE_KERNEL_VSYSCALL_NOCACHE);
47#endif
59} 48}
60 49
61static inline void hpet_clear_mapping(void) 50static inline void hpet_clear_mapping(void)
@@ -63,7 +52,6 @@ static inline void hpet_clear_mapping(void)
63 iounmap(hpet_virt_address); 52 iounmap(hpet_virt_address);
64 hpet_virt_address = NULL; 53 hpet_virt_address = NULL;
65} 54}
66#endif
67 55
68/* 56/*
69 * HPET command line enable / disable 57 * HPET command line enable / disable
diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c
index deb43785e923..dd7ebee446af 100644
--- a/arch/x86/kernel/i386_ksyms_32.c
+++ b/arch/x86/kernel/i386_ksyms_32.c
@@ -1,7 +1,14 @@
1#include <linux/module.h> 1#include <linux/module.h>
2
2#include <asm/checksum.h> 3#include <asm/checksum.h>
3#include <asm/desc.h>
4#include <asm/pgtable.h> 4#include <asm/pgtable.h>
5#include <asm/desc.h>
6#include <asm/ftrace.h>
7
8#ifdef CONFIG_FTRACE
9/* mcount is defined in assembly */
10EXPORT_SYMBOL(mcount);
11#endif
5 12
6/* Networking helper routines. */ 13/* Networking helper routines. */
7EXPORT_SYMBOL(csum_partial_copy_generic); 14EXPORT_SYMBOL(csum_partial_copy_generic);
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 337ec3438a8f..558abf4c796a 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -1569,7 +1569,7 @@ void /*__init*/ print_local_APIC(void *dummy)
1569 1569
1570void print_all_local_APICs(void) 1570void print_all_local_APICs(void)
1571{ 1571{
1572 on_each_cpu(print_local_APIC, NULL, 1, 1); 1572 on_each_cpu(print_local_APIC, NULL, 1);
1573} 1573}
1574 1574
1575void /*__init*/ print_PIC(void) 1575void /*__init*/ print_PIC(void)
@@ -2020,7 +2020,7 @@ static inline void init_IO_APIC_traps(void)
2020 * The local APIC irq-chip implementation: 2020 * The local APIC irq-chip implementation:
2021 */ 2021 */
2022 2022
2023static void ack_apic(unsigned int irq) 2023static void ack_lapic_irq(unsigned int irq)
2024{ 2024{
2025 ack_APIC_irq(); 2025 ack_APIC_irq();
2026} 2026}
@@ -2045,9 +2045,17 @@ static struct irq_chip lapic_chip __read_mostly = {
2045 .name = "local-APIC", 2045 .name = "local-APIC",
2046 .mask = mask_lapic_irq, 2046 .mask = mask_lapic_irq,
2047 .unmask = unmask_lapic_irq, 2047 .unmask = unmask_lapic_irq,
2048 .eoi = ack_apic, 2048 .ack = ack_lapic_irq,
2049}; 2049};
2050 2050
2051static void lapic_register_intr(int irq, int vector)
2052{
2053 irq_desc[irq].status &= ~IRQ_LEVEL;
2054 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
2055 "edge");
2056 set_intr_gate(vector, interrupt[irq]);
2057}
2058
2051static void __init setup_nmi(void) 2059static void __init setup_nmi(void)
2052{ 2060{
2053 /* 2061 /*
@@ -2247,8 +2255,7 @@ static inline void __init check_timer(void)
2247 2255
2248 printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); 2256 printk(KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
2249 2257
2250 set_irq_chip_and_handler_name(0, &lapic_chip, handle_fasteoi_irq, 2258 lapic_register_intr(0, vector);
2251 "fasteoi");
2252 apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */ 2259 apic_write_around(APIC_LVT0, APIC_DM_FIXED | vector); /* Fixed mode */
2253 enable_8259A_irq(0); 2260 enable_8259A_irq(0);
2254 2261
@@ -2280,11 +2287,21 @@ out:
2280} 2287}
2281 2288
2282/* 2289/*
2283 * 2290 * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
2284 * IRQ's that are handled by the PIC in the MPS IOAPIC case. 2291 * to devices. However there may be an I/O APIC pin available for
2285 * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ. 2292 * this interrupt regardless. The pin may be left unconnected, but
2286 * Linux doesn't really care, as it's not actually used 2293 * typically it will be reused as an ExtINT cascade interrupt for
2287 * for any interrupt handling anyway. 2294 * the master 8259A. In the MPS case such a pin will normally be
2295 * reported as an ExtINT interrupt in the MP table. With ACPI
2296 * there is no provision for ExtINT interrupts, and in the absence
2297 * of an override it would be treated as an ordinary ISA I/O APIC
2298 * interrupt, that is edge-triggered and unmasked by default. We
2299 * used to do this, but it caused problems on some systems because
2300 * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
2301 * the same ExtINT cascade interrupt to drive the local APIC of the
2302 * bootstrap processor. Therefore we refrain from routing IRQ2 to
2303 * the I/O APIC in all cases now. No actual device should request
2304 * it anyway. --macro
2288 */ 2305 */
2289#define PIC_IRQS (1 << PIC_CASCADE_IR) 2306#define PIC_IRQS (1 << PIC_CASCADE_IR)
2290 2307
@@ -2298,10 +2315,7 @@ void __init setup_IO_APIC(void)
2298 2315
2299 enable_IO_APIC(); 2316 enable_IO_APIC();
2300 2317
2301 if (acpi_ioapic) 2318 io_apic_irqs = ~PIC_IRQS;
2302 io_apic_irqs = ~0; /* all IRQs go through IOAPIC */
2303 else
2304 io_apic_irqs = ~PIC_IRQS;
2305 2319
2306 printk("ENABLING IO-APIC IRQs\n"); 2320 printk("ENABLING IO-APIC IRQs\n");
2307 2321
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 2b4c40bc12c9..6510cde36b35 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -1160,7 +1160,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
1160 1160
1161void print_all_local_APICs (void) 1161void print_all_local_APICs (void)
1162{ 1162{
1163 on_each_cpu(print_local_APIC, NULL, 1, 1); 1163 on_each_cpu(print_local_APIC, NULL, 1);
1164} 1164}
1165 1165
1166void __apicdebuginit print_PIC(void) 1166void __apicdebuginit print_PIC(void)
@@ -1554,7 +1554,7 @@ static inline void init_IO_APIC_traps(void)
1554 } 1554 }
1555} 1555}
1556 1556
1557static void enable_lapic_irq (unsigned int irq) 1557static void unmask_lapic_irq(unsigned int irq)
1558{ 1558{
1559 unsigned long v; 1559 unsigned long v;
1560 1560
@@ -1562,7 +1562,7 @@ static void enable_lapic_irq (unsigned int irq)
1562 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED); 1562 apic_write(APIC_LVT0, v & ~APIC_LVT_MASKED);
1563} 1563}
1564 1564
1565static void disable_lapic_irq (unsigned int irq) 1565static void mask_lapic_irq(unsigned int irq)
1566{ 1566{
1567 unsigned long v; 1567 unsigned long v;
1568 1568
@@ -1575,19 +1575,20 @@ static void ack_lapic_irq (unsigned int irq)
1575 ack_APIC_irq(); 1575 ack_APIC_irq();
1576} 1576}
1577 1577
1578static void end_lapic_irq (unsigned int i) { /* nothing */ } 1578static struct irq_chip lapic_chip __read_mostly = {
1579 1579 .name = "local-APIC",
1580static struct hw_interrupt_type lapic_irq_type __read_mostly = { 1580 .mask = mask_lapic_irq,
1581 .name = "local-APIC", 1581 .unmask = unmask_lapic_irq,
1582 .typename = "local-APIC-edge", 1582 .ack = ack_lapic_irq,
1583 .startup = NULL, /* startup_irq() not used for IRQ0 */
1584 .shutdown = NULL, /* shutdown_irq() not used for IRQ0 */
1585 .enable = enable_lapic_irq,
1586 .disable = disable_lapic_irq,
1587 .ack = ack_lapic_irq,
1588 .end = end_lapic_irq,
1589}; 1583};
1590 1584
1585static void lapic_register_intr(int irq)
1586{
1587 irq_desc[irq].status &= ~IRQ_LEVEL;
1588 set_irq_chip_and_handler_name(irq, &lapic_chip, handle_edge_irq,
1589 "edge");
1590}
1591
1591static void __init setup_nmi(void) 1592static void __init setup_nmi(void)
1592{ 1593{
1593 /* 1594 /*
@@ -1714,11 +1715,6 @@ static inline void __init check_timer(void)
1714 apic2 = apic1; 1715 apic2 = apic1;
1715 } 1716 }
1716 1717
1717 replace_pin_at_irq(0, 0, 0, apic1, pin1);
1718 apic1 = 0;
1719 pin1 = 0;
1720 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
1721
1722 if (pin1 != -1) { 1718 if (pin1 != -1) {
1723 /* 1719 /*
1724 * Ok, does IRQ0 through the IOAPIC work? 1720 * Ok, does IRQ0 through the IOAPIC work?
@@ -1779,7 +1775,7 @@ static inline void __init check_timer(void)
1779 1775
1780 apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ..."); 1776 apic_printk(APIC_VERBOSE, KERN_INFO "...trying to set up timer as Virtual Wire IRQ...");
1781 1777
1782 irq_desc[0].chip = &lapic_irq_type; 1778 lapic_register_intr(0);
1783 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */ 1779 apic_write(APIC_LVT0, APIC_DM_FIXED | cfg->vector); /* Fixed mode */
1784 enable_8259A_irq(0); 1780 enable_8259A_irq(0);
1785 1781
@@ -1817,11 +1813,21 @@ static int __init notimercheck(char *s)
1817__setup("no_timer_check", notimercheck); 1813__setup("no_timer_check", notimercheck);
1818 1814
1819/* 1815/*
1820 * 1816 * Traditionally ISA IRQ2 is the cascade IRQ, and is not available
1821 * IRQs that are handled by the PIC in the MPS IOAPIC case. 1817 * to devices. However there may be an I/O APIC pin available for
1822 * - IRQ2 is the cascade IRQ, and cannot be a io-apic IRQ. 1818 * this interrupt regardless. The pin may be left unconnected, but
1823 * Linux doesn't really care, as it's not actually used 1819 * typically it will be reused as an ExtINT cascade interrupt for
1824 * for any interrupt handling anyway. 1820 * the master 8259A. In the MPS case such a pin will normally be
1821 * reported as an ExtINT interrupt in the MP table. With ACPI
1822 * there is no provision for ExtINT interrupts, and in the absence
1823 * of an override it would be treated as an ordinary ISA I/O APIC
1824 * interrupt, that is edge-triggered and unmasked by default. We
1825 * used to do this, but it caused problems on some systems because
1826 * of the NMI watchdog and sometimes IRQ0 of the 8254 timer using
1827 * the same ExtINT cascade interrupt to drive the local APIC of the
1828 * bootstrap processor. Therefore we refrain from routing IRQ2 to
1829 * the I/O APIC in all cases now. No actual device should request
1830 * it anyway. --macro
1825 */ 1831 */
1826#define PIC_IRQS (1<<2) 1832#define PIC_IRQS (1<<2)
1827 1833
@@ -1832,10 +1838,7 @@ void __init setup_IO_APIC(void)
1832 * calling enable_IO_APIC() is moved to setup_local_APIC for BP 1838 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
1833 */ 1839 */
1834 1840
1835 if (acpi_ioapic) 1841 io_apic_irqs = ~PIC_IRQS;
1836 io_apic_irqs = ~0; /* all IRQs go through IOAPIC */
1837 else
1838 io_apic_irqs = ~PIC_IRQS;
1839 1842
1840 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n"); 1843 apic_printk(APIC_VERBOSE, "ENABLING IO-APIC IRQs\n");
1841 1844
diff --git a/arch/x86/kernel/irqinit_64.c b/arch/x86/kernel/irqinit_64.c
index 31f49e8f46a7..0373e88de95a 100644
--- a/arch/x86/kernel/irqinit_64.c
+++ b/arch/x86/kernel/irqinit_64.c
@@ -199,6 +199,10 @@ void __init native_init_IRQ(void)
199 /* IPI for generic function call */ 199 /* IPI for generic function call */
200 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 200 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
201 201
202 /* IPI for generic single function call */
203 alloc_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
204 call_function_single_interrupt);
205
202 /* Low priority IPI to cleanup after moving an irq */ 206 /* Low priority IPI to cleanup after moving an irq */
203 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt); 207 set_intr_gate(IRQ_MOVE_CLEANUP_VECTOR, irq_move_cleanup_interrupt);
204#endif 208#endif
diff --git a/arch/x86/kernel/ldt.c b/arch/x86/kernel/ldt.c
index 21f2bae98c15..a8449571858a 100644
--- a/arch/x86/kernel/ldt.c
+++ b/arch/x86/kernel/ldt.c
@@ -68,7 +68,7 @@ static int alloc_ldt(mm_context_t *pc, int mincount, int reload)
68 load_LDT(pc); 68 load_LDT(pc);
69 mask = cpumask_of_cpu(smp_processor_id()); 69 mask = cpumask_of_cpu(smp_processor_id());
70 if (!cpus_equal(current->mm->cpu_vm_mask, mask)) 70 if (!cpus_equal(current->mm->cpu_vm_mask, mask))
71 smp_call_function(flush_ldt, current->mm, 1, 1); 71 smp_call_function(flush_ldt, current->mm, 1);
72 preempt_enable(); 72 preempt_enable();
73#else 73#else
74 load_LDT(pc); 74 load_LDT(pc);
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index f4960171bc66..8864230d55af 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -11,6 +11,8 @@
11#include <linux/delay.h> 11#include <linux/delay.h>
12#include <linux/init.h> 12#include <linux/init.h>
13#include <linux/numa.h> 13#include <linux/numa.h>
14#include <linux/ftrace.h>
15
14#include <asm/pgtable.h> 16#include <asm/pgtable.h>
15#include <asm/pgalloc.h> 17#include <asm/pgalloc.h>
16#include <asm/tlbflush.h> 18#include <asm/tlbflush.h>
@@ -107,6 +109,8 @@ NORET_TYPE void machine_kexec(struct kimage *image)
107 unsigned long page_list[PAGES_NR]; 109 unsigned long page_list[PAGES_NR];
108 void *control_page; 110 void *control_page;
109 111
112 tracer_disable();
113
110 /* Interrupts aren't acceptable while we reboot */ 114 /* Interrupts aren't acceptable while we reboot */
111 local_irq_disable(); 115 local_irq_disable();
112 116
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index 7830dc4a8380..9dd9262693a3 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -11,6 +11,8 @@
11#include <linux/string.h> 11#include <linux/string.h>
12#include <linux/reboot.h> 12#include <linux/reboot.h>
13#include <linux/numa.h> 13#include <linux/numa.h>
14#include <linux/ftrace.h>
15
14#include <asm/pgtable.h> 16#include <asm/pgtable.h>
15#include <asm/tlbflush.h> 17#include <asm/tlbflush.h>
16#include <asm/mmu_context.h> 18#include <asm/mmu_context.h>
@@ -184,6 +186,8 @@ NORET_TYPE void machine_kexec(struct kimage *image)
184 unsigned long page_list[PAGES_NR]; 186 unsigned long page_list[PAGES_NR];
185 void *control_page; 187 void *control_page;
186 188
189 tracer_disable();
190
187 /* Interrupts aren't acceptable while we reboot */ 191 /* Interrupts aren't acceptable while we reboot */
188 local_irq_disable(); 192 local_irq_disable();
189 193
diff --git a/arch/x86/kernel/microcode.c b/arch/x86/kernel/microcode.c
index 9758fea87c5b..56b933119a04 100644
--- a/arch/x86/kernel/microcode.c
+++ b/arch/x86/kernel/microcode.c
@@ -76,6 +76,7 @@
76#include <linux/kernel.h> 76#include <linux/kernel.h>
77#include <linux/init.h> 77#include <linux/init.h>
78#include <linux/sched.h> 78#include <linux/sched.h>
79#include <linux/smp_lock.h>
79#include <linux/cpumask.h> 80#include <linux/cpumask.h>
80#include <linux/module.h> 81#include <linux/module.h>
81#include <linux/slab.h> 82#include <linux/slab.h>
@@ -423,6 +424,7 @@ out:
423 424
424static int microcode_open (struct inode *unused1, struct file *unused2) 425static int microcode_open (struct inode *unused1, struct file *unused2)
425{ 426{
427 cycle_kernel_lock();
426 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM; 428 return capable(CAP_SYS_RAWIO) ? 0 : -EPERM;
427} 429}
428 430
@@ -489,7 +491,7 @@ MODULE_ALIAS_MISCDEV(MICROCODE_MINOR);
489#define microcode_dev_exit() do { } while(0) 491#define microcode_dev_exit() do { } while(0)
490#endif 492#endif
491 493
492static long get_next_ucode_from_buffer(void **mc, void *buf, 494static long get_next_ucode_from_buffer(void **mc, const u8 *buf,
493 unsigned long size, long offset) 495 unsigned long size, long offset)
494{ 496{
495 microcode_header_t *mc_header; 497 microcode_header_t *mc_header;
@@ -523,7 +525,7 @@ static int cpu_request_microcode(int cpu)
523 char name[30]; 525 char name[30];
524 struct cpuinfo_x86 *c = &cpu_data(cpu); 526 struct cpuinfo_x86 *c = &cpu_data(cpu);
525 const struct firmware *firmware; 527 const struct firmware *firmware;
526 void *buf; 528 const u8 *buf;
527 unsigned long size; 529 unsigned long size;
528 long offset = 0; 530 long offset = 0;
529 int error; 531 int error;
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index 8b6b1e05c306..3b25e49380c6 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -726,12 +726,22 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
726static struct intel_mp_floating *mpf_found; 726static struct intel_mp_floating *mpf_found;
727 727
728/* 728/*
729 * Machine specific quirk for finding the SMP config before other setup
730 * activities destroy the table:
731 */
732int (*mach_get_smp_config_quirk)(unsigned int early);
733
734/*
729 * Scan the memory blocks for an SMP configuration block. 735 * Scan the memory blocks for an SMP configuration block.
730 */ 736 */
731static void __init __get_smp_config(unsigned early) 737static void __init __get_smp_config(unsigned int early)
732{ 738{
733 struct intel_mp_floating *mpf = mpf_found; 739 struct intel_mp_floating *mpf = mpf_found;
734 740
741 if (mach_get_smp_config_quirk) {
742 if (mach_get_smp_config_quirk(early))
743 return;
744 }
735 if (acpi_lapic && early) 745 if (acpi_lapic && early)
736 return; 746 return;
737 /* 747 /*
@@ -889,10 +899,16 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
889 return 0; 899 return 0;
890} 900}
891 901
892static void __init __find_smp_config(unsigned reserve) 902int (*mach_find_smp_config_quirk)(unsigned int reserve);
903
904static void __init __find_smp_config(unsigned int reserve)
893{ 905{
894 unsigned int address; 906 unsigned int address;
895 907
908 if (mach_find_smp_config_quirk) {
909 if (mach_find_smp_config_quirk(reserve))
910 return;
911 }
896 /* 912 /*
897 * FIXME: Linux assumes you have 640K of base ram.. 913 * FIXME: Linux assumes you have 640K of base ram..
898 * this continues the error... 914 * this continues the error...
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 1f3abe048e93..a153b3905f60 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -117,12 +117,20 @@ static int msr_open(struct inode *inode, struct file *file)
117{ 117{
118 unsigned int cpu = iminor(file->f_path.dentry->d_inode); 118 unsigned int cpu = iminor(file->f_path.dentry->d_inode);
119 struct cpuinfo_x86 *c = &cpu_data(cpu); 119 struct cpuinfo_x86 *c = &cpu_data(cpu);
120 int ret = 0;
120 121
121 if (cpu >= NR_CPUS || !cpu_online(cpu)) 122 lock_kernel();
122 return -ENXIO; /* No such CPU */ 123 cpu = iminor(file->f_path.dentry->d_inode);
123 if (!cpu_has(c, X86_FEATURE_MSR))
124 return -EIO; /* MSR not supported */
125 124
125 if (cpu >= NR_CPUS || !cpu_online(cpu)) {
126 ret = -ENXIO; /* No such CPU */
127 goto out;
128 }
129 c = &cpu_data(cpu);
130 if (!cpu_has(c, X86_FEATURE_MSR))
131 ret = -EIO; /* MSR not supported */
132out:
133 unlock_kernel();
126 return 0; 134 return 0;
127} 135}
128 136
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 8dfe9db87a9e..ec024b3baad0 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -130,7 +130,7 @@ int __init check_nmi_watchdog(void)
130 130
131#ifdef CONFIG_SMP 131#ifdef CONFIG_SMP
132 if (nmi_watchdog == NMI_LOCAL_APIC) 132 if (nmi_watchdog == NMI_LOCAL_APIC)
133 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0, 0); 133 smp_call_function(nmi_cpu_busy, (void *)&endflag, 0);
134#endif 134#endif
135 135
136 for_each_possible_cpu(cpu) 136 for_each_possible_cpu(cpu)
@@ -171,6 +171,9 @@ int __init check_nmi_watchdog(void)
171error: 171error:
172 if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259) 172 if (nmi_watchdog == NMI_IO_APIC && !timer_through_8259)
173 disable_8259A_irq(0); 173 disable_8259A_irq(0);
174#ifdef CONFIG_X86_32
175 timer_ack = 0;
176#endif
174 return -1; 177 return -1;
175} 178}
176 179
@@ -269,7 +272,7 @@ static void __acpi_nmi_enable(void *__unused)
269void acpi_nmi_enable(void) 272void acpi_nmi_enable(void)
270{ 273{
271 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) 274 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
272 on_each_cpu(__acpi_nmi_enable, NULL, 0, 1); 275 on_each_cpu(__acpi_nmi_enable, NULL, 1);
273} 276}
274 277
275static void __acpi_nmi_disable(void *__unused) 278static void __acpi_nmi_disable(void *__unused)
@@ -283,7 +286,7 @@ static void __acpi_nmi_disable(void *__unused)
283void acpi_nmi_disable(void) 286void acpi_nmi_disable(void)
284{ 287{
285 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC) 288 if (atomic_read(&nmi_active) && nmi_watchdog == NMI_IO_APIC)
286 on_each_cpu(__acpi_nmi_disable, NULL, 0, 1); 289 on_each_cpu(__acpi_nmi_disable, NULL, 1);
287} 290}
288 291
289void setup_apic_nmi_watchdog(void *unused) 292void setup_apic_nmi_watchdog(void *unused)
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index f0f1de1c4a1d..a23e8233b9ac 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -93,12 +93,13 @@ int __init get_memcfg_numaq(void)
93 return 1; 93 return 1;
94} 94}
95 95
96static int __init numaq_tsc_disable(void) 96void __init numaq_tsc_disable(void)
97{ 97{
98 if (!found_numaq)
99 return;
100
98 if (num_online_nodes() > 1) { 101 if (num_online_nodes() > 1) {
99 printk(KERN_DEBUG "NUMAQ: disabling TSC\n"); 102 printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
100 setup_clear_cpu_cap(X86_FEATURE_TSC); 103 setup_clear_cpu_cap(X86_FEATURE_TSC);
101 } 104 }
102 return 0;
103} 105}
104arch_initcall(numaq_tsc_disable);
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e7e5652f65bc..e0f571d58c19 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -285,7 +285,7 @@ struct pv_time_ops pv_time_ops = {
285 .get_wallclock = native_get_wallclock, 285 .get_wallclock = native_get_wallclock,
286 .set_wallclock = native_set_wallclock, 286 .set_wallclock = native_set_wallclock,
287 .sched_clock = native_sched_clock, 287 .sched_clock = native_sched_clock,
288 .get_cpu_khz = native_calculate_cpu_khz, 288 .get_tsc_khz = native_calibrate_tsc,
289}; 289};
290 290
291struct pv_irq_ops pv_irq_ops = { 291struct pv_irq_ops pv_irq_ops = {
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index d0d18db5d2a4..c3fe78406d18 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -630,6 +630,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
630 struct pci_dev *dev; 630 struct pci_dev *dev;
631 void *gatt; 631 void *gatt;
632 int i, error; 632 int i, error;
633 unsigned long start_pfn, end_pfn;
633 634
634 printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); 635 printk(KERN_INFO "PCI-DMA: Disabling AGP.\n");
635 aper_size = aper_base = info->aper_size = 0; 636 aper_size = aper_base = info->aper_size = 0;
@@ -674,6 +675,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info)
674 675
675 printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n", 676 printk(KERN_INFO "PCI-DMA: aperture base @ %x size %u KB\n",
676 aper_base, aper_size>>10); 677 aper_base, aper_size>>10);
678
679 /* need to map that range */
680 end_pfn = (aper_base>>PAGE_SHIFT) + (aper_size>>PAGE_SHIFT);
681 if (end_pfn > max_low_pfn_mapped) {
682 start_pfn = (aper_base>>PAGE_SHIFT);
683 init_memory_mapping(start_pfn<<PAGE_SHIFT, end_pfn<<PAGE_SHIFT);
684 }
677 return 0; 685 return 0;
678 686
679 nommu: 687 nommu:
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 4061d63aabe7..4d629c62f4f8 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -7,6 +7,12 @@
7#include <linux/module.h> 7#include <linux/module.h>
8#include <linux/pm.h> 8#include <linux/pm.h>
9#include <linux/clockchips.h> 9#include <linux/clockchips.h>
10#include <asm/system.h>
11
12unsigned long idle_halt;
13EXPORT_SYMBOL(idle_halt);
14unsigned long idle_nomwait;
15EXPORT_SYMBOL(idle_nomwait);
10 16
11struct kmem_cache *task_xstate_cachep; 17struct kmem_cache *task_xstate_cachep;
12 18
@@ -132,7 +138,7 @@ void cpu_idle_wait(void)
132{ 138{
133 smp_mb(); 139 smp_mb();
134 /* kick all the CPUs so that they exit out of pm_idle */ 140 /* kick all the CPUs so that they exit out of pm_idle */
135 smp_call_function(do_nothing, NULL, 0, 1); 141 smp_call_function(do_nothing, NULL, 1);
136} 142}
137EXPORT_SYMBOL_GPL(cpu_idle_wait); 143EXPORT_SYMBOL_GPL(cpu_idle_wait);
138 144
@@ -325,7 +331,27 @@ static int __init idle_setup(char *str)
325 pm_idle = poll_idle; 331 pm_idle = poll_idle;
326 } else if (!strcmp(str, "mwait")) 332 } else if (!strcmp(str, "mwait"))
327 force_mwait = 1; 333 force_mwait = 1;
328 else 334 else if (!strcmp(str, "halt")) {
335 /*
336 * When the boot option of idle=halt is added, halt is
337 * forced to be used for CPU idle. In such case CPU C2/C3
338 * won't be used again.
339 * To continue to load the CPU idle driver, don't touch
340 * the boot_option_idle_override.
341 */
342 pm_idle = default_idle;
343 idle_halt = 1;
344 return 0;
345 } else if (!strcmp(str, "nomwait")) {
346 /*
347 * If the boot option of "idle=nomwait" is added,
348 * it means that mwait will be disabled for CPU C2/C3
349 * states. In such case it won't touch the variable
350 * of boot_option_idle_override.
351 */
352 idle_nomwait = 1;
353 return 0;
354 } else
329 return -1; 355 return -1;
330 356
331 boot_option_idle_override = 1; 357 boot_option_idle_override = 1;
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 9a139f6c9df3..0c3927accb00 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -142,7 +142,10 @@ void cpu_idle(void)
142 142
143 local_irq_disable(); 143 local_irq_disable();
144 __get_cpu_var(irq_stat).idle_timestamp = jiffies; 144 __get_cpu_var(irq_stat).idle_timestamp = jiffies;
145 /* Don't trace irqs off for idle */
146 stop_critical_timings();
145 pm_idle(); 147 pm_idle();
148 start_critical_timings();
146 } 149 }
147 tick_nohz_restart_sched_tick(); 150 tick_nohz_restart_sched_tick();
148 preempt_enable_no_resched(); 151 preempt_enable_no_resched();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index db5eb963e4df..a8e53626ac9a 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -134,7 +134,10 @@ void cpu_idle(void)
134 */ 134 */
135 local_irq_disable(); 135 local_irq_disable();
136 enter_idle(); 136 enter_idle();
137 /* Don't trace irqs off for idle */
138 stop_critical_timings();
137 pm_idle(); 139 pm_idle();
140 start_critical_timings();
138 /* In many cases the interrupt that ended idle 141 /* In many cases the interrupt that ended idle
139 has already called exit_idle. But some idle 142 has already called exit_idle. But some idle
140 loops can be woken up without interrupt. */ 143 loops can be woken up without interrupt. */
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 79bdcd11c66e..d13858818100 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -266,6 +266,8 @@ static void old_ich_force_enable_hpet_user(struct pci_dev *dev)
266 hpet_print_force_info(); 266 hpet_print_force_info();
267} 267}
268 268
269DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_ESB_1,
270 old_ich_force_enable_hpet_user);
269DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0, 271DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_0,
270 old_ich_force_enable_hpet_user); 272 old_ich_force_enable_hpet_user);
271DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12, 273DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82801CA_12,
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index cfcfbefee0b9..531b55b8e81a 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -394,11 +394,10 @@ static void __init parse_setup_data(void)
394 } 394 }
395} 395}
396 396
397static void __init reserve_setup_data(void) 397static void __init e820_reserve_setup_data(void)
398{ 398{
399 struct setup_data *data; 399 struct setup_data *data;
400 u64 pa_data; 400 u64 pa_data;
401 char buf[32];
402 int found = 0; 401 int found = 0;
403 402
404 if (boot_params.hdr.version < 0x0209) 403 if (boot_params.hdr.version < 0x0209)
@@ -406,8 +405,6 @@ static void __init reserve_setup_data(void)
406 pa_data = boot_params.hdr.setup_data; 405 pa_data = boot_params.hdr.setup_data;
407 while (pa_data) { 406 while (pa_data) {
408 data = early_ioremap(pa_data, sizeof(*data)); 407 data = early_ioremap(pa_data, sizeof(*data));
409 sprintf(buf, "setup data %x", data->type);
410 reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
411 e820_update_range(pa_data, sizeof(*data)+data->len, 408 e820_update_range(pa_data, sizeof(*data)+data->len,
412 E820_RAM, E820_RESERVED_KERN); 409 E820_RAM, E820_RESERVED_KERN);
413 found = 1; 410 found = 1;
@@ -418,10 +415,29 @@ static void __init reserve_setup_data(void)
418 return; 415 return;
419 416
420 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map); 417 sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
418 memcpy(&e820_saved, &e820, sizeof(struct e820map));
421 printk(KERN_INFO "extended physical RAM map:\n"); 419 printk(KERN_INFO "extended physical RAM map:\n");
422 e820_print_map("reserve setup_data"); 420 e820_print_map("reserve setup_data");
423} 421}
424 422
423static void __init reserve_early_setup_data(void)
424{
425 struct setup_data *data;
426 u64 pa_data;
427 char buf[32];
428
429 if (boot_params.hdr.version < 0x0209)
430 return;
431 pa_data = boot_params.hdr.setup_data;
432 while (pa_data) {
433 data = early_ioremap(pa_data, sizeof(*data));
434 sprintf(buf, "setup data %x", data->type);
435 reserve_early(pa_data, pa_data+sizeof(*data)+data->len, buf);
436 pa_data = data->next;
437 early_iounmap(data, sizeof(*data));
438 }
439}
440
425/* 441/*
426 * --------- Crashkernel reservation ------------------------------ 442 * --------- Crashkernel reservation ------------------------------
427 */ 443 */
@@ -580,6 +596,7 @@ void __init setup_arch(char **cmdline_p)
580{ 596{
581#ifdef CONFIG_X86_32 597#ifdef CONFIG_X86_32
582 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); 598 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
599 visws_early_detect();
583 pre_setup_arch_hook(); 600 pre_setup_arch_hook();
584 early_cpu_init(); 601 early_cpu_init();
585#else 602#else
@@ -626,6 +643,8 @@ void __init setup_arch(char **cmdline_p)
626 643
627 setup_memory_map(); 644 setup_memory_map();
628 parse_setup_data(); 645 parse_setup_data();
646 /* update the e820_saved too */
647 e820_reserve_setup_data();
629 648
630 copy_edd(); 649 copy_edd();
631 650
@@ -656,7 +675,7 @@ void __init setup_arch(char **cmdline_p)
656 parse_early_param(); 675 parse_early_param();
657 676
658 /* after early param, so could get panic from serial */ 677 /* after early param, so could get panic from serial */
659 reserve_setup_data(); 678 reserve_early_setup_data();
660 679
661 if (acpi_mps_check()) { 680 if (acpi_mps_check()) {
662#ifdef CONFIG_X86_LOCAL_APIC 681#ifdef CONFIG_X86_LOCAL_APIC
@@ -665,6 +684,11 @@ void __init setup_arch(char **cmdline_p)
665 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC); 684 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_APIC);
666 } 685 }
667 686
687#ifdef CONFIG_PCI
688 if (pci_early_dump_regs)
689 early_dump_pci_devices();
690#endif
691
668 finish_e820_parsing(); 692 finish_e820_parsing();
669 693
670#ifdef CONFIG_X86_32 694#ifdef CONFIG_X86_32
@@ -691,22 +715,18 @@ void __init setup_arch(char **cmdline_p)
691 early_gart_iommu_check(); 715 early_gart_iommu_check();
692#endif 716#endif
693 717
694 e820_register_active_regions(0, 0, -1UL);
695 /* 718 /*
696 * partially used pages are not usable - thus 719 * partially used pages are not usable - thus
697 * we are rounding upwards: 720 * we are rounding upwards:
698 */ 721 */
699 max_pfn = e820_end_of_ram(); 722 max_pfn = e820_end_of_ram_pfn();
700 723
701 /* preallocate 4k for mptable mpc */ 724 /* preallocate 4k for mptable mpc */
702 early_reserve_e820_mpc_new(); 725 early_reserve_e820_mpc_new();
703 /* update e820 for memory not covered by WB MTRRs */ 726 /* update e820 for memory not covered by WB MTRRs */
704 mtrr_bp_init(); 727 mtrr_bp_init();
705 if (mtrr_trim_uncached_memory(max_pfn)) { 728 if (mtrr_trim_uncached_memory(max_pfn))
706 remove_all_active_ranges(); 729 max_pfn = e820_end_of_ram_pfn();
707 e820_register_active_regions(0, 0, -1UL);
708 max_pfn = e820_end_of_ram();
709 }
710 730
711#ifdef CONFIG_X86_32 731#ifdef CONFIG_X86_32
712 /* max_low_pfn get updated here */ 732 /* max_low_pfn get updated here */
@@ -718,12 +738,26 @@ void __init setup_arch(char **cmdline_p)
718 738
719 /* How many end-of-memory variables you have, grandma! */ 739 /* How many end-of-memory variables you have, grandma! */
720 /* need this before calling reserve_initrd */ 740 /* need this before calling reserve_initrd */
721 max_low_pfn = max_pfn; 741 if (max_pfn > (1UL<<(32 - PAGE_SHIFT)))
742 max_low_pfn = e820_end_of_low_ram_pfn();
743 else
744 max_low_pfn = max_pfn;
745
722 high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1; 746 high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
723#endif 747#endif
724 748
725 /* max_pfn_mapped is updated here */ 749 /* max_pfn_mapped is updated here */
726 max_pfn_mapped = init_memory_mapping(0, (max_low_pfn << PAGE_SHIFT)); 750 max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT);
751 max_pfn_mapped = max_low_pfn_mapped;
752
753#ifdef CONFIG_X86_64
754 if (max_pfn > max_low_pfn) {
755 max_pfn_mapped = init_memory_mapping(1UL<<32,
756 max_pfn<<PAGE_SHIFT);
757 /* can we preseve max_low_pfn ?*/
758 max_low_pfn = max_pfn;
759 }
760#endif
727 761
728 /* 762 /*
729 * NOTE: On x86-32, only from this point on, fixmaps are ready for use. 763 * NOTE: On x86-32, only from this point on, fixmaps are ready for use.
@@ -749,9 +783,6 @@ void __init setup_arch(char **cmdline_p)
749 */ 783 */
750 acpi_boot_table_init(); 784 acpi_boot_table_init();
751 785
752 /* Remove active ranges so rediscovery with NUMA-awareness happens */
753 remove_all_active_ranges();
754
755#ifdef CONFIG_ACPI_NUMA 786#ifdef CONFIG_ACPI_NUMA
756 /* 787 /*
757 * Parse SRAT to discover nodes. 788 * Parse SRAT to discover nodes.
@@ -823,6 +854,14 @@ void __init setup_arch(char **cmdline_p)
823 init_cpu_to_node(); 854 init_cpu_to_node();
824#endif 855#endif
825 856
857#ifdef CONFIG_X86_NUMAQ
858 /*
859 * need to check online nodes num, call it
860 * here before time_init/tsc_init
861 */
862 numaq_tsc_disable();
863#endif
864
826 init_apic_mappings(); 865 init_apic_mappings();
827 ioapic_init_mappings(); 866 ioapic_init_mappings();
828 867
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 5fc310f746fc..cac68430d31f 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -343,23 +343,23 @@ static const cpumask_t cpu_mask_none;
343/* 343/*
344 * Returns a pointer to the bitmask of CPUs on Node 'node'. 344 * Returns a pointer to the bitmask of CPUs on Node 'node'.
345 */ 345 */
346cpumask_t *_node_to_cpumask_ptr(int node) 346const cpumask_t *_node_to_cpumask_ptr(int node)
347{ 347{
348 if (node_to_cpumask_map == NULL) { 348 if (node_to_cpumask_map == NULL) {
349 printk(KERN_WARNING 349 printk(KERN_WARNING
350 "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n", 350 "_node_to_cpumask_ptr(%d): no node_to_cpumask_map!\n",
351 node); 351 node);
352 dump_stack(); 352 dump_stack();
353 return &cpu_online_map; 353 return (const cpumask_t *)&cpu_online_map;
354 } 354 }
355 if (node >= nr_node_ids) { 355 if (node >= nr_node_ids) {
356 printk(KERN_WARNING 356 printk(KERN_WARNING
357 "_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n", 357 "_node_to_cpumask_ptr(%d): node > nr_node_ids(%d)\n",
358 node, nr_node_ids); 358 node, nr_node_ids);
359 dump_stack(); 359 dump_stack();
360 return (cpumask_t *)&cpu_mask_none; 360 return &cpu_mask_none;
361 } 361 }
362 return (cpumask_t *)&node_to_cpumask_map[node]; 362 return &node_to_cpumask_map[node];
363} 363}
364EXPORT_SYMBOL(_node_to_cpumask_ptr); 364EXPORT_SYMBOL(_node_to_cpumask_ptr);
365 365
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index 0cb7aadc87cd..361b7a4c640c 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -121,132 +121,23 @@ static void native_smp_send_reschedule(int cpu)
121 send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR); 121 send_IPI_mask(cpumask_of_cpu(cpu), RESCHEDULE_VECTOR);
122} 122}
123 123
124/* 124void native_send_call_func_single_ipi(int cpu)
125 * Structure and data for smp_call_function(). This is designed to minimise
126 * static memory requirements. It also looks cleaner.
127 */
128static DEFINE_SPINLOCK(call_lock);
129
130struct call_data_struct {
131 void (*func) (void *info);
132 void *info;
133 atomic_t started;
134 atomic_t finished;
135 int wait;
136};
137
138void lock_ipi_call_lock(void)
139{ 125{
140 spin_lock_irq(&call_lock); 126 send_IPI_mask(cpumask_of_cpu(cpu), CALL_FUNCTION_SINGLE_VECTOR);
141}
142
143void unlock_ipi_call_lock(void)
144{
145 spin_unlock_irq(&call_lock);
146}
147
148static struct call_data_struct *call_data;
149
150static void __smp_call_function(void (*func) (void *info), void *info,
151 int nonatomic, int wait)
152{
153 struct call_data_struct data;
154 int cpus = num_online_cpus() - 1;
155
156 if (!cpus)
157 return;
158
159 data.func = func;
160 data.info = info;
161 atomic_set(&data.started, 0);
162 data.wait = wait;
163 if (wait)
164 atomic_set(&data.finished, 0);
165
166 call_data = &data;
167 mb();
168
169 /* Send a message to all other CPUs and wait for them to respond */
170 send_IPI_allbutself(CALL_FUNCTION_VECTOR);
171
172 /* Wait for response */
173 while (atomic_read(&data.started) != cpus)
174 cpu_relax();
175
176 if (wait)
177 while (atomic_read(&data.finished) != cpus)
178 cpu_relax();
179} 127}
180 128
181 129void native_send_call_func_ipi(cpumask_t mask)
182/**
183 * smp_call_function_mask(): Run a function on a set of other CPUs.
184 * @mask: The set of cpus to run on. Must not include the current cpu.
185 * @func: The function to run. This must be fast and non-blocking.
186 * @info: An arbitrary pointer to pass to the function.
187 * @wait: If true, wait (atomically) until function has completed on other CPUs.
188 *
189 * Returns 0 on success, else a negative status code.
190 *
191 * If @wait is true, then returns once @func has returned; otherwise
192 * it returns just before the target cpu calls @func.
193 *
194 * You must not call this function with disabled interrupts or from a
195 * hardware interrupt handler or from a bottom half handler.
196 */
197static int
198native_smp_call_function_mask(cpumask_t mask,
199 void (*func)(void *), void *info,
200 int wait)
201{ 130{
202 struct call_data_struct data;
203 cpumask_t allbutself; 131 cpumask_t allbutself;
204 int cpus;
205
206 /* Can deadlock when called with interrupts disabled */
207 WARN_ON(irqs_disabled());
208
209 /* Holding any lock stops cpus from going down. */
210 spin_lock(&call_lock);
211 132
212 allbutself = cpu_online_map; 133 allbutself = cpu_online_map;
213 cpu_clear(smp_processor_id(), allbutself); 134 cpu_clear(smp_processor_id(), allbutself);
214 135
215 cpus_and(mask, mask, allbutself);
216 cpus = cpus_weight(mask);
217
218 if (!cpus) {
219 spin_unlock(&call_lock);
220 return 0;
221 }
222
223 data.func = func;
224 data.info = info;
225 atomic_set(&data.started, 0);
226 data.wait = wait;
227 if (wait)
228 atomic_set(&data.finished, 0);
229
230 call_data = &data;
231 wmb();
232
233 /* Send a message to other CPUs */
234 if (cpus_equal(mask, allbutself) && 136 if (cpus_equal(mask, allbutself) &&
235 cpus_equal(cpu_online_map, cpu_callout_map)) 137 cpus_equal(cpu_online_map, cpu_callout_map))
236 send_IPI_allbutself(CALL_FUNCTION_VECTOR); 138 send_IPI_allbutself(CALL_FUNCTION_VECTOR);
237 else 139 else
238 send_IPI_mask(mask, CALL_FUNCTION_VECTOR); 140 send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
239
240 /* Wait for response */
241 while (atomic_read(&data.started) != cpus)
242 cpu_relax();
243
244 if (wait)
245 while (atomic_read(&data.finished) != cpus)
246 cpu_relax();
247 spin_unlock(&call_lock);
248
249 return 0;
250} 141}
251 142
252static void stop_this_cpu(void *dummy) 143static void stop_this_cpu(void *dummy)
@@ -268,18 +159,13 @@ static void stop_this_cpu(void *dummy)
268 159
269static void native_smp_send_stop(void) 160static void native_smp_send_stop(void)
270{ 161{
271 int nolock;
272 unsigned long flags; 162 unsigned long flags;
273 163
274 if (reboot_force) 164 if (reboot_force)
275 return; 165 return;
276 166
277 /* Don't deadlock on the call lock in panic */ 167 smp_call_function(stop_this_cpu, NULL, 0);
278 nolock = !spin_trylock(&call_lock);
279 local_irq_save(flags); 168 local_irq_save(flags);
280 __smp_call_function(stop_this_cpu, NULL, 0, 0);
281 if (!nolock)
282 spin_unlock(&call_lock);
283 disable_local_APIC(); 169 disable_local_APIC();
284 local_irq_restore(flags); 170 local_irq_restore(flags);
285} 171}
@@ -301,33 +187,28 @@ void smp_reschedule_interrupt(struct pt_regs *regs)
301 187
302void smp_call_function_interrupt(struct pt_regs *regs) 188void smp_call_function_interrupt(struct pt_regs *regs)
303{ 189{
304 void (*func) (void *info) = call_data->func;
305 void *info = call_data->info;
306 int wait = call_data->wait;
307
308 ack_APIC_irq(); 190 ack_APIC_irq();
309 /*
310 * Notify initiating CPU that I've grabbed the data and am
311 * about to execute the function
312 */
313 mb();
314 atomic_inc(&call_data->started);
315 /*
316 * At this point the info structure may be out of scope unless wait==1
317 */
318 irq_enter(); 191 irq_enter();
319 (*func)(info); 192 generic_smp_call_function_interrupt();
320#ifdef CONFIG_X86_32 193#ifdef CONFIG_X86_32
321 __get_cpu_var(irq_stat).irq_call_count++; 194 __get_cpu_var(irq_stat).irq_call_count++;
322#else 195#else
323 add_pda(irq_call_count, 1); 196 add_pda(irq_call_count, 1);
324#endif 197#endif
325 irq_exit(); 198 irq_exit();
199}
326 200
327 if (wait) { 201void smp_call_function_single_interrupt(struct pt_regs *regs)
328 mb(); 202{
329 atomic_inc(&call_data->finished); 203 ack_APIC_irq();
330 } 204 irq_enter();
205 generic_smp_call_function_single_interrupt();
206#ifdef CONFIG_X86_32
207 __get_cpu_var(irq_stat).irq_call_count++;
208#else
209 add_pda(irq_call_count, 1);
210#endif
211 irq_exit();
331} 212}
332 213
333struct smp_ops smp_ops = { 214struct smp_ops smp_ops = {
@@ -338,7 +219,8 @@ struct smp_ops smp_ops = {
338 219
339 .smp_send_stop = native_smp_send_stop, 220 .smp_send_stop = native_smp_send_stop,
340 .smp_send_reschedule = native_smp_send_reschedule, 221 .smp_send_reschedule = native_smp_send_reschedule,
341 .smp_call_function_mask = native_smp_call_function_mask, 222
223 .send_call_func_ipi = native_send_call_func_ipi,
224 .send_call_func_single_ipi = native_send_call_func_single_ipi,
342}; 225};
343EXPORT_SYMBOL_GPL(smp_ops); 226EXPORT_SYMBOL_GPL(smp_ops);
344
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index e1200b202ed7..687376ab07e8 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -327,12 +327,12 @@ static void __cpuinit start_secondary(void *unused)
327 * lock helps us to not include this cpu in a currently in progress 327 * lock helps us to not include this cpu in a currently in progress
328 * smp_call_function(). 328 * smp_call_function().
329 */ 329 */
330 lock_ipi_call_lock(); 330 ipi_call_lock_irq();
331#ifdef CONFIG_X86_IO_APIC 331#ifdef CONFIG_X86_IO_APIC
332 setup_vector_irq(smp_processor_id()); 332 setup_vector_irq(smp_processor_id());
333#endif 333#endif
334 cpu_set(smp_processor_id(), cpu_online_map); 334 cpu_set(smp_processor_id(), cpu_online_map);
335 unlock_ipi_call_lock(); 335 ipi_call_unlock_irq();
336 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; 336 per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE;
337 337
338 setup_secondary_clock(); 338 setup_secondary_clock();
@@ -939,9 +939,9 @@ do_rest:
939 inquire_remote_apic(apicid); 939 inquire_remote_apic(apicid);
940 } 940 }
941 } 941 }
942 942#ifdef CONFIG_X86_64
943restore_state: 943restore_state:
944 944#endif
945 if (boot_error) { 945 if (boot_error) {
946 /* Try to put things back the way they were before ... */ 946 /* Try to put things back the way they were before ... */
947 numa_remove_cpu(cpu); /* was set by numa_add_cpu */ 947 numa_remove_cpu(cpu); /* was set by numa_add_cpu */
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
index 3449064d141a..99941b37eca0 100644
--- a/arch/x86/kernel/smpcommon.c
+++ b/arch/x86/kernel/smpcommon.c
@@ -25,59 +25,3 @@ __cpuinit void init_gdt(int cpu)
25 per_cpu(cpu_number, cpu) = cpu; 25 per_cpu(cpu_number, cpu) = cpu;
26} 26}
27#endif 27#endif
28
29/**
30 * smp_call_function(): Run a function on all other CPUs.
31 * @func: The function to run. This must be fast and non-blocking.
32 * @info: An arbitrary pointer to pass to the function.
33 * @nonatomic: Unused.
34 * @wait: If true, wait (atomically) until function has completed on other CPUs.
35 *
36 * Returns 0 on success, else a negative status code.
37 *
38 * If @wait is true, then returns once @func has returned; otherwise
39 * it returns just before the target cpu calls @func.
40 *
41 * You must not call this function with disabled interrupts or from a
42 * hardware interrupt handler or from a bottom half handler.
43 */
44int smp_call_function(void (*func) (void *info), void *info, int nonatomic,
45 int wait)
46{
47 return smp_call_function_mask(cpu_online_map, func, info, wait);
48}
49EXPORT_SYMBOL(smp_call_function);
50
51/**
52 * smp_call_function_single - Run a function on a specific CPU
53 * @cpu: The target CPU. Cannot be the calling CPU.
54 * @func: The function to run. This must be fast and non-blocking.
55 * @info: An arbitrary pointer to pass to the function.
56 * @nonatomic: Unused.
57 * @wait: If true, wait until function has completed on other CPUs.
58 *
59 * Returns 0 on success, else a negative status code.
60 *
61 * If @wait is true, then returns once @func has returned; otherwise
62 * it returns just before the target cpu calls @func.
63 */
64int smp_call_function_single(int cpu, void (*func) (void *info), void *info,
65 int nonatomic, int wait)
66{
67 /* prevent preemption and reschedule on another processor */
68 int ret;
69 int me = get_cpu();
70 if (cpu == me) {
71 local_irq_disable();
72 func(info);
73 local_irq_enable();
74 put_cpu();
75 return 0;
76 }
77
78 ret = smp_call_function_mask(cpumask_of_cpu(cpu), func, info, wait);
79
80 put_cpu();
81 return ret;
82}
83EXPORT_SYMBOL(smp_call_function_single);
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index c28c342c162f..a03e7f6d90c3 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -74,6 +74,7 @@ void save_stack_trace(struct stack_trace *trace)
74 if (trace->nr_entries < trace->max_entries) 74 if (trace->nr_entries < trace->max_entries)
75 trace->entries[trace->nr_entries++] = ULONG_MAX; 75 trace->entries[trace->nr_entries++] = ULONG_MAX;
76} 76}
77EXPORT_SYMBOL_GPL(save_stack_trace);
77 78
78void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace) 79void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
79{ 80{
@@ -81,3 +82,4 @@ void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace)
81 if (trace->nr_entries < trace->max_entries) 82 if (trace->nr_entries < trace->max_entries)
82 trace->entries[trace->nr_entries++] = ULONG_MAX; 83 trace->entries[trace->nr_entries++] = ULONG_MAX;
83} 84}
85EXPORT_SYMBOL_GPL(save_stack_trace_tsk);
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index 5f29f12da50c..059ca6ee59b4 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -39,9 +39,6 @@
39 39
40#include "do_timer.h" 40#include "do_timer.h"
41 41
42unsigned int cpu_khz; /* Detected as we calibrate the TSC */
43EXPORT_SYMBOL(cpu_khz);
44
45int timer_ack; 42int timer_ack;
46 43
47unsigned long profile_pc(struct pt_regs *regs) 44unsigned long profile_pc(struct pt_regs *regs)
diff --git a/arch/x86/kernel/time_64.c b/arch/x86/kernel/time_64.c
index 39ae8511a137..e3d49c553af2 100644
--- a/arch/x86/kernel/time_64.c
+++ b/arch/x86/kernel/time_64.c
@@ -56,7 +56,7 @@ static irqreturn_t timer_event_interrupt(int irq, void *dev_id)
56/* calibrate_cpu is used on systems with fixed rate TSCs to determine 56/* calibrate_cpu is used on systems with fixed rate TSCs to determine
57 * processor frequency */ 57 * processor frequency */
58#define TICK_COUNT 100000000 58#define TICK_COUNT 100000000
59unsigned long __init native_calculate_cpu_khz(void) 59unsigned long __init calibrate_cpu(void)
60{ 60{
61 int tsc_start, tsc_now; 61 int tsc_start, tsc_now;
62 int i, no_ctr_free; 62 int i, no_ctr_free;
@@ -116,25 +116,11 @@ void __init hpet_time_init(void)
116 116
117void __init time_init(void) 117void __init time_init(void)
118{ 118{
119 tsc_calibrate(); 119 tsc_init();
120
121 cpu_khz = tsc_khz;
122 if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) &&
123 (boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
124 cpu_khz = calculate_cpu_khz();
125
126 lpj_fine = ((unsigned long)tsc_khz * 1000)/HZ;
127
128 if (unsynchronized_tsc())
129 mark_tsc_unstable("TSCs unsynchronized");
130
131 if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP)) 120 if (cpu_has(&boot_cpu_data, X86_FEATURE_RDTSCP))
132 vgetcpu_mode = VGETCPU_RDTSCP; 121 vgetcpu_mode = VGETCPU_RDTSCP;
133 else 122 else
134 vgetcpu_mode = VGETCPU_LSL; 123 vgetcpu_mode = VGETCPU_LSL;
135 124
136 printk(KERN_INFO "time.c: Detected %d.%03d MHz processor.\n",
137 cpu_khz / 1000, cpu_khz % 1000);
138 init_tsc_clocksource();
139 late_time_init = choose_time_init(); 125 late_time_init = choose_time_init();
140} 126}
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index 9bb2363851af..fec1ecedc9b7 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -238,6 +238,6 @@ static void do_flush_tlb_all(void *info)
238 238
239void flush_tlb_all(void) 239void flush_tlb_all(void)
240{ 240{
241 on_each_cpu(do_flush_tlb_all, NULL, 1, 1); 241 on_each_cpu(do_flush_tlb_all, NULL, 1);
242} 242}
243 243
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index 5039d0f097a2..dcbf7a1159ea 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -275,5 +275,5 @@ static void do_flush_tlb_all(void *info)
275 275
276void flush_tlb_all(void) 276void flush_tlb_all(void)
277{ 277{
278 on_each_cpu(do_flush_tlb_all, NULL, 1, 1); 278 on_each_cpu(do_flush_tlb_all, NULL, 1);
279} 279}
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index d7cc292691ff..8a768973c4f0 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -1,5 +1,6 @@
1/* 1/*
2 * Copyright (C) 1991, 1992 Linus Torvalds 2 * Copyright (C) 1991, 1992 Linus Torvalds
3 * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs
3 * 4 *
4 * Pentium III FXSR, SSE support 5 * Pentium III FXSR, SSE support
5 * Gareth Hughes <gareth@valinux.com>, May 2000 6 * Gareth Hughes <gareth@valinux.com>, May 2000
@@ -60,8 +61,6 @@
60 61
61#include "mach_traps.h" 62#include "mach_traps.h"
62 63
63int panic_on_unrecovered_nmi;
64
65DECLARE_BITMAP(used_vectors, NR_VECTORS); 64DECLARE_BITMAP(used_vectors, NR_VECTORS);
66EXPORT_SYMBOL_GPL(used_vectors); 65EXPORT_SYMBOL_GPL(used_vectors);
67 66
@@ -98,19 +97,22 @@ asmlinkage void alignment_check(void);
98asmlinkage void spurious_interrupt_bug(void); 97asmlinkage void spurious_interrupt_bug(void);
99asmlinkage void machine_check(void); 98asmlinkage void machine_check(void);
100 99
100int panic_on_unrecovered_nmi;
101int kstack_depth_to_print = 24; 101int kstack_depth_to_print = 24;
102static unsigned int code_bytes = 64; 102static unsigned int code_bytes = 64;
103static int ignore_nmis;
104static int die_counter;
103 105
104void printk_address(unsigned long address, int reliable) 106void printk_address(unsigned long address, int reliable)
105{ 107{
106#ifdef CONFIG_KALLSYMS 108#ifdef CONFIG_KALLSYMS
107 char namebuf[KSYM_NAME_LEN];
108 unsigned long offset = 0; 109 unsigned long offset = 0;
109 unsigned long symsize; 110 unsigned long symsize;
110 const char *symname; 111 const char *symname;
111 char reliab[4] = "";
112 char *delim = ":";
113 char *modname; 112 char *modname;
113 char *delim = ":";
114 char namebuf[KSYM_NAME_LEN];
115 char reliab[4] = "";
114 116
115 symname = kallsyms_lookup(address, &symsize, &offset, 117 symname = kallsyms_lookup(address, &symsize, &offset,
116 &modname, namebuf); 118 &modname, namebuf);
@@ -130,22 +132,23 @@ void printk_address(unsigned long address, int reliable)
130#endif 132#endif
131} 133}
132 134
133static inline int valid_stack_ptr(struct thread_info *tinfo, void *p, unsigned size) 135static inline int valid_stack_ptr(struct thread_info *tinfo,
136 void *p, unsigned int size)
134{ 137{
135 return p > (void *)tinfo && 138 void *t = tinfo;
136 p <= (void *)tinfo + THREAD_SIZE - size; 139 return p > t && p <= t + THREAD_SIZE - size;
137} 140}
138 141
139/* The form of the top of the frame on the stack */ 142/* The form of the top of the frame on the stack */
140struct stack_frame { 143struct stack_frame {
141 struct stack_frame *next_frame; 144 struct stack_frame *next_frame;
142 unsigned long return_address; 145 unsigned long return_address;
143}; 146};
144 147
145static inline unsigned long 148static inline unsigned long
146print_context_stack(struct thread_info *tinfo, 149print_context_stack(struct thread_info *tinfo,
147 unsigned long *stack, unsigned long bp, 150 unsigned long *stack, unsigned long bp,
148 const struct stacktrace_ops *ops, void *data) 151 const struct stacktrace_ops *ops, void *data)
149{ 152{
150 struct stack_frame *frame = (struct stack_frame *)bp; 153 struct stack_frame *frame = (struct stack_frame *)bp;
151 154
@@ -167,8 +170,6 @@ print_context_stack(struct thread_info *tinfo,
167 return bp; 170 return bp;
168} 171}
169 172
170#define MSG(msg) ops->warning(data, msg)
171
172void dump_trace(struct task_struct *task, struct pt_regs *regs, 173void dump_trace(struct task_struct *task, struct pt_regs *regs,
173 unsigned long *stack, unsigned long bp, 174 unsigned long *stack, unsigned long bp,
174 const struct stacktrace_ops *ops, void *data) 175 const struct stacktrace_ops *ops, void *data)
@@ -178,7 +179,6 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
178 179
179 if (!stack) { 180 if (!stack) {
180 unsigned long dummy; 181 unsigned long dummy;
181
182 stack = &dummy; 182 stack = &dummy;
183 if (task != current) 183 if (task != current)
184 stack = (unsigned long *)task->thread.sp; 184 stack = (unsigned long *)task->thread.sp;
@@ -196,7 +196,7 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
196 } 196 }
197#endif 197#endif
198 198
199 while (1) { 199 for (;;) {
200 struct thread_info *context; 200 struct thread_info *context;
201 201
202 context = (struct thread_info *) 202 context = (struct thread_info *)
@@ -248,10 +248,10 @@ static void print_trace_address(void *data, unsigned long addr, int reliable)
248} 248}
249 249
250static const struct stacktrace_ops print_trace_ops = { 250static const struct stacktrace_ops print_trace_ops = {
251 .warning = print_trace_warning, 251 .warning = print_trace_warning,
252 .warning_symbol = print_trace_warning_symbol, 252 .warning_symbol = print_trace_warning_symbol,
253 .stack = print_trace_stack, 253 .stack = print_trace_stack,
254 .address = print_trace_address, 254 .address = print_trace_address,
255}; 255};
256 256
257static void 257static void
@@ -351,15 +351,14 @@ void show_registers(struct pt_regs *regs)
351 printk(KERN_EMERG "Code: "); 351 printk(KERN_EMERG "Code: ");
352 352
353 ip = (u8 *)regs->ip - code_prologue; 353 ip = (u8 *)regs->ip - code_prologue;
354 if (ip < (u8 *)PAGE_OFFSET || 354 if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
355 probe_kernel_address(ip, c)) {
356 /* try starting at EIP */ 355 /* try starting at EIP */
357 ip = (u8 *)regs->ip; 356 ip = (u8 *)regs->ip;
358 code_len = code_len - code_prologue + 1; 357 code_len = code_len - code_prologue + 1;
359 } 358 }
360 for (i = 0; i < code_len; i++, ip++) { 359 for (i = 0; i < code_len; i++, ip++) {
361 if (ip < (u8 *)PAGE_OFFSET || 360 if (ip < (u8 *)PAGE_OFFSET ||
362 probe_kernel_address(ip, c)) { 361 probe_kernel_address(ip, c)) {
363 printk(" Bad EIP value."); 362 printk(" Bad EIP value.");
364 break; 363 break;
365 } 364 }
@@ -384,8 +383,6 @@ int is_valid_bugaddr(unsigned long ip)
384 return ud2 == 0x0b0f; 383 return ud2 == 0x0b0f;
385} 384}
386 385
387static int die_counter;
388
389int __kprobes __die(const char *str, struct pt_regs *regs, long err) 386int __kprobes __die(const char *str, struct pt_regs *regs, long err)
390{ 387{
391 unsigned short ss; 388 unsigned short ss;
@@ -402,26 +399,22 @@ int __kprobes __die(const char *str, struct pt_regs *regs, long err)
402 printk("DEBUG_PAGEALLOC"); 399 printk("DEBUG_PAGEALLOC");
403#endif 400#endif
404 printk("\n"); 401 printk("\n");
405
406 if (notify_die(DIE_OOPS, str, regs, err, 402 if (notify_die(DIE_OOPS, str, regs, err,
407 current->thread.trap_no, SIGSEGV) != NOTIFY_STOP) { 403 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
408 404 return 1;
409 show_registers(regs);
410 /* Executive summary in case the oops scrolled away */
411 sp = (unsigned long) (&regs->sp);
412 savesegment(ss, ss);
413 if (user_mode(regs)) {
414 sp = regs->sp;
415 ss = regs->ss & 0xffff;
416 }
417 printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
418 print_symbol("%s", regs->ip);
419 printk(" SS:ESP %04x:%08lx\n", ss, sp);
420 405
421 return 0; 406 show_registers(regs);
407 /* Executive summary in case the oops scrolled away */
408 sp = (unsigned long) (&regs->sp);
409 savesegment(ss, ss);
410 if (user_mode(regs)) {
411 sp = regs->sp;
412 ss = regs->ss & 0xffff;
422 } 413 }
423 414 printk(KERN_EMERG "EIP: [<%08lx>] ", regs->ip);
424 return 1; 415 print_symbol("%s", regs->ip);
416 printk(" SS:ESP %04x:%08lx\n", ss, sp);
417 return 0;
425} 418}
426 419
427/* 420/*
@@ -546,7 +539,7 @@ void do_##name(struct pt_regs *regs, long error_code) \
546{ \ 539{ \
547 trace_hardirqs_fixup(); \ 540 trace_hardirqs_fixup(); \
548 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 541 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
549 == NOTIFY_STOP) \ 542 == NOTIFY_STOP) \
550 return; \ 543 return; \
551 do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \ 544 do_trap(trapnr, signr, str, 0, regs, error_code, NULL); \
552} 545}
@@ -562,7 +555,7 @@ void do_##name(struct pt_regs *regs, long error_code) \
562 info.si_code = sicode; \ 555 info.si_code = sicode; \
563 info.si_addr = (void __user *)siaddr; \ 556 info.si_addr = (void __user *)siaddr; \
564 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 557 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
565 == NOTIFY_STOP) \ 558 == NOTIFY_STOP) \
566 return; \ 559 return; \
567 do_trap(trapnr, signr, str, 0, regs, error_code, &info); \ 560 do_trap(trapnr, signr, str, 0, regs, error_code, &info); \
568} 561}
@@ -571,7 +564,7 @@ void do_##name(struct pt_regs *regs, long error_code) \
571void do_##name(struct pt_regs *regs, long error_code) \ 564void do_##name(struct pt_regs *regs, long error_code) \
572{ \ 565{ \
573 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 566 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
574 == NOTIFY_STOP) \ 567 == NOTIFY_STOP) \
575 return; \ 568 return; \
576 do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \ 569 do_trap(trapnr, signr, str, 1, regs, error_code, NULL); \
577} 570}
@@ -586,27 +579,29 @@ void do_##name(struct pt_regs *regs, long error_code) \
586 info.si_addr = (void __user *)siaddr; \ 579 info.si_addr = (void __user *)siaddr; \
587 trace_hardirqs_fixup(); \ 580 trace_hardirqs_fixup(); \
588 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 581 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
589 == NOTIFY_STOP) \ 582 == NOTIFY_STOP) \
590 return; \ 583 return; \
591 do_trap(trapnr, signr, str, 1, regs, error_code, &info); \ 584 do_trap(trapnr, signr, str, 1, regs, error_code, &info); \
592} 585}
593 586
594DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) 587DO_VM86_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
595#ifndef CONFIG_KPROBES 588#ifndef CONFIG_KPROBES
596DO_VM86_ERROR(3, SIGTRAP, "int3", int3) 589DO_VM86_ERROR(3, SIGTRAP, "int3", int3)
597#endif 590#endif
598DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow) 591DO_VM86_ERROR(4, SIGSEGV, "overflow", overflow)
599DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds) 592DO_VM86_ERROR(5, SIGSEGV, "bounds", bounds)
600DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0) 593DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip, 0)
601DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) 594DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
602DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) 595DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
603DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) 596DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
604DO_ERROR(12, SIGBUS, "stack segment", stack_segment) 597DO_ERROR(12, SIGBUS, "stack segment", stack_segment)
605DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0) 598DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0, 0)
606DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1) 599DO_ERROR_INFO(32, SIGILL, "iret exception", iret_error, ILL_BADSTK, 0, 1)
607 600
608void __kprobes do_general_protection(struct pt_regs *regs, long error_code) 601void __kprobes
602do_general_protection(struct pt_regs *regs, long error_code)
609{ 603{
604 struct task_struct *tsk;
610 struct thread_struct *thread; 605 struct thread_struct *thread;
611 struct tss_struct *tss; 606 struct tss_struct *tss;
612 int cpu; 607 int cpu;
@@ -647,23 +642,24 @@ void __kprobes do_general_protection(struct pt_regs *regs, long error_code)
647 if (regs->flags & X86_VM_MASK) 642 if (regs->flags & X86_VM_MASK)
648 goto gp_in_vm86; 643 goto gp_in_vm86;
649 644
645 tsk = current;
650 if (!user_mode(regs)) 646 if (!user_mode(regs))
651 goto gp_in_kernel; 647 goto gp_in_kernel;
652 648
653 current->thread.error_code = error_code; 649 tsk->thread.error_code = error_code;
654 current->thread.trap_no = 13; 650 tsk->thread.trap_no = 13;
655 651
656 if (show_unhandled_signals && unhandled_signal(current, SIGSEGV) && 652 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
657 printk_ratelimit()) { 653 printk_ratelimit()) {
658 printk(KERN_INFO 654 printk(KERN_INFO
659 "%s[%d] general protection ip:%lx sp:%lx error:%lx", 655 "%s[%d] general protection ip:%lx sp:%lx error:%lx",
660 current->comm, task_pid_nr(current), 656 tsk->comm, task_pid_nr(tsk),
661 regs->ip, regs->sp, error_code); 657 regs->ip, regs->sp, error_code);
662 print_vma_addr(" in ", regs->ip); 658 print_vma_addr(" in ", regs->ip);
663 printk("\n"); 659 printk("\n");
664 } 660 }
665 661
666 force_sig(SIGSEGV, current); 662 force_sig(SIGSEGV, tsk);
667 return; 663 return;
668 664
669gp_in_vm86: 665gp_in_vm86:
@@ -672,14 +668,15 @@ gp_in_vm86:
672 return; 668 return;
673 669
674gp_in_kernel: 670gp_in_kernel:
675 if (!fixup_exception(regs)) { 671 if (fixup_exception(regs))
676 current->thread.error_code = error_code; 672 return;
677 current->thread.trap_no = 13; 673
678 if (notify_die(DIE_GPF, "general protection fault", regs, 674 tsk->thread.error_code = error_code;
675 tsk->thread.trap_no = 13;
676 if (notify_die(DIE_GPF, "general protection fault", regs,
679 error_code, 13, SIGSEGV) == NOTIFY_STOP) 677 error_code, 13, SIGSEGV) == NOTIFY_STOP)
680 return; 678 return;
681 die("general protection fault", regs, error_code); 679 die("general protection fault", regs, error_code);
682 }
683} 680}
684 681
685static notrace __kprobes void 682static notrace __kprobes void
@@ -792,14 +789,17 @@ void notrace __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic)
792static notrace __kprobes void default_do_nmi(struct pt_regs *regs) 789static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
793{ 790{
794 unsigned char reason = 0; 791 unsigned char reason = 0;
792 int cpu;
793
794 cpu = smp_processor_id();
795 795
796 /* Only the BSP gets external NMIs from the system: */ 796 /* Only the BSP gets external NMIs from the system. */
797 if (!smp_processor_id()) 797 if (!cpu)
798 reason = get_nmi_reason(); 798 reason = get_nmi_reason();
799 799
800 if (!(reason & 0xc0)) { 800 if (!(reason & 0xc0)) {
801 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) 801 if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
802 == NOTIFY_STOP) 802 == NOTIFY_STOP)
803 return; 803 return;
804#ifdef CONFIG_X86_LOCAL_APIC 804#ifdef CONFIG_X86_LOCAL_APIC
805 /* 805 /*
@@ -808,7 +808,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
808 */ 808 */
809 if (nmi_watchdog_tick(regs, reason)) 809 if (nmi_watchdog_tick(regs, reason))
810 return; 810 return;
811 if (!do_nmi_callback(regs, smp_processor_id())) 811 if (!do_nmi_callback(regs, cpu))
812 unknown_nmi_error(reason, regs); 812 unknown_nmi_error(reason, regs);
813#else 813#else
814 unknown_nmi_error(reason, regs); 814 unknown_nmi_error(reason, regs);
@@ -818,6 +818,8 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
818 } 818 }
819 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) 819 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
820 return; 820 return;
821
822 /* AK: following checks seem to be broken on modern chipsets. FIXME */
821 if (reason & 0x80) 823 if (reason & 0x80)
822 mem_parity_error(reason, regs); 824 mem_parity_error(reason, regs);
823 if (reason & 0x40) 825 if (reason & 0x40)
@@ -829,8 +831,6 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
829 reassert_nmi(); 831 reassert_nmi();
830} 832}
831 833
832static int ignore_nmis;
833
834notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code) 834notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code)
835{ 835{
836 int cpu; 836 int cpu;
@@ -915,7 +915,7 @@ void __kprobes do_debug(struct pt_regs *regs, long error_code)
915 tsk->thread.debugctlmsr = 0; 915 tsk->thread.debugctlmsr = 0;
916 916
917 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code, 917 if (notify_die(DIE_DEBUG, "debug", regs, condition, error_code,
918 SIGTRAP) == NOTIFY_STOP) 918 SIGTRAP) == NOTIFY_STOP)
919 return; 919 return;
920 /* It's safe to allow irq's after DR6 has been saved */ 920 /* It's safe to allow irq's after DR6 has been saved */
921 if (regs->flags & X86_EFLAGS_IF) 921 if (regs->flags & X86_EFLAGS_IF)
@@ -976,9 +976,8 @@ clear_TF_reenable:
976void math_error(void __user *ip) 976void math_error(void __user *ip)
977{ 977{
978 struct task_struct *task; 978 struct task_struct *task;
979 unsigned short cwd;
980 unsigned short swd;
981 siginfo_t info; 979 siginfo_t info;
980 unsigned short cwd, swd;
982 981
983 /* 982 /*
984 * Save the info for the exception handler and clear the error. 983 * Save the info for the exception handler and clear the error.
@@ -997,7 +996,7 @@ void math_error(void __user *ip)
997 * C1 reg you need in case of a stack fault, 0x040 is the stack 996 * C1 reg you need in case of a stack fault, 0x040 is the stack
998 * fault bit. We should only be taking one exception at a time, 997 * fault bit. We should only be taking one exception at a time,
999 * so if this combination doesn't produce any single exception, 998 * so if this combination doesn't produce any single exception,
1000 * then we have a bad program that isn't syncronizing its FPU usage 999 * then we have a bad program that isn't synchronizing its FPU usage
1001 * and it will suffer the consequences since we won't be able to 1000 * and it will suffer the consequences since we won't be able to
1002 * fully reproduce the context of the exception 1001 * fully reproduce the context of the exception
1003 */ 1002 */
@@ -1006,7 +1005,7 @@ void math_error(void __user *ip)
1006 switch (swd & ~cwd & 0x3f) { 1005 switch (swd & ~cwd & 0x3f) {
1007 case 0x000: /* No unmasked exception */ 1006 case 0x000: /* No unmasked exception */
1008 return; 1007 return;
1009 default: /* Multiple exceptions */ 1008 default: /* Multiple exceptions */
1010 break; 1009 break;
1011 case 0x001: /* Invalid Op */ 1010 case 0x001: /* Invalid Op */
1012 /* 1011 /*
@@ -1042,8 +1041,8 @@ void do_coprocessor_error(struct pt_regs *regs, long error_code)
1042static void simd_math_error(void __user *ip) 1041static void simd_math_error(void __user *ip)
1043{ 1042{
1044 struct task_struct *task; 1043 struct task_struct *task;
1045 unsigned short mxcsr;
1046 siginfo_t info; 1044 siginfo_t info;
1045 unsigned short mxcsr;
1047 1046
1048 /* 1047 /*
1049 * Save the info for the exception handler and clear the error. 1048 * Save the info for the exception handler and clear the error.
@@ -1198,16 +1197,16 @@ void __init trap_init(void)
1198 early_iounmap(p, 4); 1197 early_iounmap(p, 4);
1199#endif 1198#endif
1200 1199
1201 set_trap_gate(0, &divide_error); 1200 set_trap_gate(0, &divide_error);
1202 set_intr_gate(1, &debug); 1201 set_intr_gate(1, &debug);
1203 set_intr_gate(2, &nmi); 1202 set_intr_gate(2, &nmi);
1204 set_system_intr_gate(3, &int3); /* int3/4 can be called from all */ 1203 set_system_intr_gate(3, &int3); /* int3 can be called from all */
1205 set_system_gate(4, &overflow); 1204 set_system_gate(4, &overflow); /* int4 can be called from all */
1206 set_trap_gate(5, &bounds); 1205 set_trap_gate(5, &bounds);
1207 set_trap_gate(6, &invalid_op); 1206 set_trap_gate(6, &invalid_op);
1208 set_trap_gate(7, &device_not_available); 1207 set_trap_gate(7, &device_not_available);
1209 set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS); 1208 set_task_gate(8, GDT_ENTRY_DOUBLEFAULT_TSS);
1210 set_trap_gate(9, &coprocessor_segment_overrun); 1209 set_trap_gate(9, &coprocessor_segment_overrun);
1211 set_trap_gate(10, &invalid_TSS); 1210 set_trap_gate(10, &invalid_TSS);
1212 set_trap_gate(11, &segment_not_present); 1211 set_trap_gate(11, &segment_not_present);
1213 set_trap_gate(12, &stack_segment); 1212 set_trap_gate(12, &stack_segment);
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 80ba6d37bfe0..2696a6837782 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -10,49 +10,49 @@
10 * 'Traps.c' handles hardware traps and faults after we have saved some 10 * 'Traps.c' handles hardware traps and faults after we have saved some
11 * state in 'entry.S'. 11 * state in 'entry.S'.
12 */ 12 */
13#include <linux/sched.h> 13#include <linux/moduleparam.h>
14#include <linux/interrupt.h>
15#include <linux/kallsyms.h>
16#include <linux/spinlock.h>
17#include <linux/kprobes.h>
18#include <linux/uaccess.h>
19#include <linux/utsname.h>
20#include <linux/kdebug.h>
14#include <linux/kernel.h> 21#include <linux/kernel.h>
22#include <linux/module.h>
23#include <linux/ptrace.h>
15#include <linux/string.h> 24#include <linux/string.h>
25#include <linux/unwind.h>
26#include <linux/delay.h>
16#include <linux/errno.h> 27#include <linux/errno.h>
17#include <linux/ptrace.h> 28#include <linux/kexec.h>
29#include <linux/sched.h>
18#include <linux/timer.h> 30#include <linux/timer.h>
19#include <linux/mm.h>
20#include <linux/init.h> 31#include <linux/init.h>
21#include <linux/delay.h>
22#include <linux/spinlock.h>
23#include <linux/interrupt.h>
24#include <linux/kallsyms.h>
25#include <linux/module.h>
26#include <linux/moduleparam.h>
27#include <linux/nmi.h>
28#include <linux/kprobes.h>
29#include <linux/kexec.h>
30#include <linux/unwind.h>
31#include <linux/uaccess.h>
32#include <linux/bug.h> 32#include <linux/bug.h>
33#include <linux/kdebug.h> 33#include <linux/nmi.h>
34#include <linux/utsname.h> 34#include <linux/mm.h>
35
36#include <mach_traps.h>
37 35
38#if defined(CONFIG_EDAC) 36#if defined(CONFIG_EDAC)
39#include <linux/edac.h> 37#include <linux/edac.h>
40#endif 38#endif
41 39
42#include <asm/system.h> 40#include <asm/stacktrace.h>
43#include <asm/io.h> 41#include <asm/processor.h>
44#include <asm/atomic.h>
45#include <asm/debugreg.h> 42#include <asm/debugreg.h>
43#include <asm/atomic.h>
44#include <asm/system.h>
45#include <asm/unwind.h>
46#include <asm/desc.h> 46#include <asm/desc.h>
47#include <asm/i387.h> 47#include <asm/i387.h>
48#include <asm/processor.h> 48#include <asm/nmi.h>
49#include <asm/unwind.h>
50#include <asm/smp.h> 49#include <asm/smp.h>
50#include <asm/io.h>
51#include <asm/pgalloc.h> 51#include <asm/pgalloc.h>
52#include <asm/pda.h>
53#include <asm/proto.h> 52#include <asm/proto.h>
54#include <asm/nmi.h> 53#include <asm/pda.h>
55#include <asm/stacktrace.h> 54
55#include <mach_traps.h>
56 56
57asmlinkage void divide_error(void); 57asmlinkage void divide_error(void);
58asmlinkage void debug(void); 58asmlinkage void debug(void);
@@ -72,12 +72,14 @@ asmlinkage void page_fault(void);
72asmlinkage void coprocessor_error(void); 72asmlinkage void coprocessor_error(void);
73asmlinkage void simd_coprocessor_error(void); 73asmlinkage void simd_coprocessor_error(void);
74asmlinkage void alignment_check(void); 74asmlinkage void alignment_check(void);
75asmlinkage void machine_check(void);
76asmlinkage void spurious_interrupt_bug(void); 75asmlinkage void spurious_interrupt_bug(void);
76asmlinkage void machine_check(void);
77 77
78int panic_on_unrecovered_nmi; 78int panic_on_unrecovered_nmi;
79int kstack_depth_to_print = 12;
79static unsigned int code_bytes = 64; 80static unsigned int code_bytes = 64;
80static unsigned ignore_nmis; 81static int ignore_nmis;
82static int die_counter;
81 83
82static inline void conditional_sti(struct pt_regs *regs) 84static inline void conditional_sti(struct pt_regs *regs)
83{ 85{
@@ -101,34 +103,9 @@ static inline void preempt_conditional_cli(struct pt_regs *regs)
101 dec_preempt_count(); 103 dec_preempt_count();
102} 104}
103 105
104int kstack_depth_to_print = 12;
105
106void printk_address(unsigned long address, int reliable) 106void printk_address(unsigned long address, int reliable)
107{ 107{
108#ifdef CONFIG_KALLSYMS 108 printk(" [<%016lx>] %s%pS\n", address, reliable ? "": "? ", (void *) address);
109 unsigned long offset = 0, symsize;
110 const char *symname;
111 char *modname;
112 char *delim = ":";
113 char namebuf[KSYM_NAME_LEN];
114 char reliab[4] = "";
115
116 symname = kallsyms_lookup(address, &symsize, &offset,
117 &modname, namebuf);
118 if (!symname) {
119 printk(" [<%016lx>]\n", address);
120 return;
121 }
122 if (!reliable)
123 strcpy(reliab, "? ");
124
125 if (!modname)
126 modname = delim = "";
127 printk(" [<%016lx>] %s%s%s%s%s+0x%lx/0x%lx\n",
128 address, reliab, delim, modname, delim, symname, offset, symsize);
129#else
130 printk(" [<%016lx>]\n", address);
131#endif
132} 109}
133 110
134static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, 111static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
@@ -205,8 +182,6 @@ static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack,
205 return NULL; 182 return NULL;
206} 183}
207 184
208#define MSG(txt) ops->warning(data, txt)
209
210/* 185/*
211 * x86-64 can have up to three kernel stacks: 186 * x86-64 can have up to three kernel stacks:
212 * process stack 187 * process stack
@@ -233,11 +208,11 @@ struct stack_frame {
233 unsigned long return_address; 208 unsigned long return_address;
234}; 209};
235 210
236 211static inline unsigned long
237static inline unsigned long print_context_stack(struct thread_info *tinfo, 212print_context_stack(struct thread_info *tinfo,
238 unsigned long *stack, unsigned long bp, 213 unsigned long *stack, unsigned long bp,
239 const struct stacktrace_ops *ops, void *data, 214 const struct stacktrace_ops *ops, void *data,
240 unsigned long *end) 215 unsigned long *end)
241{ 216{
242 struct stack_frame *frame = (struct stack_frame *)bp; 217 struct stack_frame *frame = (struct stack_frame *)bp;
243 218
@@ -259,7 +234,7 @@ static inline unsigned long print_context_stack(struct thread_info *tinfo,
259 return bp; 234 return bp;
260} 235}
261 236
262void dump_trace(struct task_struct *tsk, struct pt_regs *regs, 237void dump_trace(struct task_struct *task, struct pt_regs *regs,
263 unsigned long *stack, unsigned long bp, 238 unsigned long *stack, unsigned long bp,
264 const struct stacktrace_ops *ops, void *data) 239 const struct stacktrace_ops *ops, void *data)
265{ 240{
@@ -268,36 +243,34 @@ void dump_trace(struct task_struct *tsk, struct pt_regs *regs,
268 unsigned used = 0; 243 unsigned used = 0;
269 struct thread_info *tinfo; 244 struct thread_info *tinfo;
270 245
271 if (!tsk) 246 if (!task)
272 tsk = current; 247 task = current;
273 tinfo = task_thread_info(tsk);
274 248
275 if (!stack) { 249 if (!stack) {
276 unsigned long dummy; 250 unsigned long dummy;
277 stack = &dummy; 251 stack = &dummy;
278 if (tsk && tsk != current) 252 if (task && task != current)
279 stack = (unsigned long *)tsk->thread.sp; 253 stack = (unsigned long *)task->thread.sp;
280 } 254 }
281 255
282#ifdef CONFIG_FRAME_POINTER 256#ifdef CONFIG_FRAME_POINTER
283 if (!bp) { 257 if (!bp) {
284 if (tsk == current) { 258 if (task == current) {
285 /* Grab bp right from our regs */ 259 /* Grab bp right from our regs */
286 asm("movq %%rbp, %0" : "=r" (bp):); 260 asm("movq %%rbp, %0" : "=r" (bp) :);
287 } else { 261 } else {
288 /* bp is the last reg pushed by switch_to */ 262 /* bp is the last reg pushed by switch_to */
289 bp = *(unsigned long *) tsk->thread.sp; 263 bp = *(unsigned long *) task->thread.sp;
290 } 264 }
291 } 265 }
292#endif 266#endif
293 267
294
295
296 /* 268 /*
297 * Print function call entries in all stacks, starting at the 269 * Print function call entries in all stacks, starting at the
298 * current stack address. If the stacks consist of nested 270 * current stack address. If the stacks consist of nested
299 * exceptions 271 * exceptions
300 */ 272 */
273 tinfo = task_thread_info(task);
301 for (;;) { 274 for (;;) {
302 char *id; 275 char *id;
303 unsigned long *estack_end; 276 unsigned long *estack_end;
@@ -382,18 +355,17 @@ static const struct stacktrace_ops print_trace_ops = {
382 .address = print_trace_address, 355 .address = print_trace_address,
383}; 356};
384 357
385void 358void show_trace(struct task_struct *task, struct pt_regs *regs,
386show_trace(struct task_struct *tsk, struct pt_regs *regs, unsigned long *stack, 359 unsigned long *stack, unsigned long bp)
387 unsigned long bp)
388{ 360{
389 printk("\nCall Trace:\n"); 361 printk("\nCall Trace:\n");
390 dump_trace(tsk, regs, stack, bp, &print_trace_ops, NULL); 362 dump_trace(task, regs, stack, bp, &print_trace_ops, NULL);
391 printk("\n"); 363 printk("\n");
392} 364}
393 365
394static void 366static void
395_show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp, 367_show_stack(struct task_struct *task, struct pt_regs *regs,
396 unsigned long bp) 368 unsigned long *sp, unsigned long bp)
397{ 369{
398 unsigned long *stack; 370 unsigned long *stack;
399 int i; 371 int i;
@@ -405,14 +377,14 @@ _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp,
405 // back trace for this cpu. 377 // back trace for this cpu.
406 378
407 if (sp == NULL) { 379 if (sp == NULL) {
408 if (tsk) 380 if (task)
409 sp = (unsigned long *)tsk->thread.sp; 381 sp = (unsigned long *)task->thread.sp;
410 else 382 else
411 sp = (unsigned long *)&sp; 383 sp = (unsigned long *)&sp;
412 } 384 }
413 385
414 stack = sp; 386 stack = sp;
415 for(i=0; i < kstack_depth_to_print; i++) { 387 for (i = 0; i < kstack_depth_to_print; i++) {
416 if (stack >= irqstack && stack <= irqstack_end) { 388 if (stack >= irqstack && stack <= irqstack_end) {
417 if (stack == irqstack_end) { 389 if (stack == irqstack_end) {
418 stack = (unsigned long *) (irqstack_end[-1]); 390 stack = (unsigned long *) (irqstack_end[-1]);
@@ -427,12 +399,12 @@ _show_stack(struct task_struct *tsk, struct pt_regs *regs, unsigned long *sp,
427 printk(" %016lx", *stack++); 399 printk(" %016lx", *stack++);
428 touch_nmi_watchdog(); 400 touch_nmi_watchdog();
429 } 401 }
430 show_trace(tsk, regs, sp, bp); 402 show_trace(task, regs, sp, bp);
431} 403}
432 404
433void show_stack(struct task_struct *tsk, unsigned long * sp) 405void show_stack(struct task_struct *task, unsigned long *sp)
434{ 406{
435 _show_stack(tsk, NULL, sp, 0); 407 _show_stack(task, NULL, sp, 0);
436} 408}
437 409
438/* 410/*
@@ -440,8 +412,8 @@ void show_stack(struct task_struct *tsk, unsigned long * sp)
440 */ 412 */
441void dump_stack(void) 413void dump_stack(void)
442{ 414{
443 unsigned long dummy;
444 unsigned long bp = 0; 415 unsigned long bp = 0;
416 unsigned long stack;
445 417
446#ifdef CONFIG_FRAME_POINTER 418#ifdef CONFIG_FRAME_POINTER
447 if (!bp) 419 if (!bp)
@@ -453,7 +425,7 @@ void dump_stack(void)
453 init_utsname()->release, 425 init_utsname()->release,
454 (int)strcspn(init_utsname()->version, " "), 426 (int)strcspn(init_utsname()->version, " "),
455 init_utsname()->version); 427 init_utsname()->version);
456 show_trace(NULL, NULL, &dummy, bp); 428 show_trace(NULL, NULL, &stack, bp);
457} 429}
458 430
459EXPORT_SYMBOL(dump_stack); 431EXPORT_SYMBOL(dump_stack);
@@ -464,12 +436,8 @@ void show_registers(struct pt_regs *regs)
464 unsigned long sp; 436 unsigned long sp;
465 const int cpu = smp_processor_id(); 437 const int cpu = smp_processor_id();
466 struct task_struct *cur = cpu_pda(cpu)->pcurrent; 438 struct task_struct *cur = cpu_pda(cpu)->pcurrent;
467 u8 *ip;
468 unsigned int code_prologue = code_bytes * 43 / 64;
469 unsigned int code_len = code_bytes;
470 439
471 sp = regs->sp; 440 sp = regs->sp;
472 ip = (u8 *) regs->ip - code_prologue;
473 printk("CPU %d ", cpu); 441 printk("CPU %d ", cpu);
474 __show_regs(regs); 442 __show_regs(regs);
475 printk("Process %s (pid: %d, threadinfo %p, task %p)\n", 443 printk("Process %s (pid: %d, threadinfo %p, task %p)\n",
@@ -480,15 +448,21 @@ void show_registers(struct pt_regs *regs)
480 * time of the fault.. 448 * time of the fault..
481 */ 449 */
482 if (!user_mode(regs)) { 450 if (!user_mode(regs)) {
451 unsigned int code_prologue = code_bytes * 43 / 64;
452 unsigned int code_len = code_bytes;
483 unsigned char c; 453 unsigned char c;
454 u8 *ip;
455
484 printk("Stack: "); 456 printk("Stack: ");
485 _show_stack(NULL, regs, (unsigned long *)sp, regs->bp); 457 _show_stack(NULL, regs, (unsigned long *)sp, regs->bp);
486 printk("\n"); 458 printk("\n");
487 459
488 printk(KERN_EMERG "Code: "); 460 printk(KERN_EMERG "Code: ");
461
462 ip = (u8 *)regs->ip - code_prologue;
489 if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) { 463 if (ip < (u8 *)PAGE_OFFSET || probe_kernel_address(ip, c)) {
490 /* try starting at RIP */ 464 /* try starting at RIP */
491 ip = (u8 *) regs->ip; 465 ip = (u8 *)regs->ip;
492 code_len = code_len - code_prologue + 1; 466 code_len = code_len - code_prologue + 1;
493 } 467 }
494 for (i = 0; i < code_len; i++, ip++) { 468 for (i = 0; i < code_len; i++, ip++) {
@@ -504,7 +478,7 @@ void show_registers(struct pt_regs *regs)
504 } 478 }
505 } 479 }
506 printk("\n"); 480 printk("\n");
507} 481}
508 482
509int is_valid_bugaddr(unsigned long ip) 483int is_valid_bugaddr(unsigned long ip)
510{ 484{
@@ -562,10 +536,9 @@ void __kprobes oops_end(unsigned long flags, struct pt_regs *regs, int signr)
562 do_exit(signr); 536 do_exit(signr);
563} 537}
564 538
565int __kprobes __die(const char * str, struct pt_regs * regs, long err) 539int __kprobes __die(const char *str, struct pt_regs *regs, long err)
566{ 540{
567 static int die_counter; 541 printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff, ++die_counter);
568 printk(KERN_EMERG "%s: %04lx [%u] ", str, err & 0xffff,++die_counter);
569#ifdef CONFIG_PREEMPT 542#ifdef CONFIG_PREEMPT
570 printk("PREEMPT "); 543 printk("PREEMPT ");
571#endif 544#endif
@@ -576,8 +549,10 @@ int __kprobes __die(const char * str, struct pt_regs * regs, long err)
576 printk("DEBUG_PAGEALLOC"); 549 printk("DEBUG_PAGEALLOC");
577#endif 550#endif
578 printk("\n"); 551 printk("\n");
579 if (notify_die(DIE_OOPS, str, regs, err, current->thread.trap_no, SIGSEGV) == NOTIFY_STOP) 552 if (notify_die(DIE_OOPS, str, regs, err,
553 current->thread.trap_no, SIGSEGV) == NOTIFY_STOP)
580 return 1; 554 return 1;
555
581 show_registers(regs); 556 show_registers(regs);
582 add_taint(TAINT_DIE); 557 add_taint(TAINT_DIE);
583 /* Executive summary in case the oops scrolled away */ 558 /* Executive summary in case the oops scrolled away */
@@ -589,7 +564,7 @@ int __kprobes __die(const char * str, struct pt_regs * regs, long err)
589 return 0; 564 return 0;
590} 565}
591 566
592void die(const char * str, struct pt_regs * regs, long err) 567void die(const char *str, struct pt_regs *regs, long err)
593{ 568{
594 unsigned long flags = oops_begin(); 569 unsigned long flags = oops_begin();
595 570
@@ -606,8 +581,7 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic)
606{ 581{
607 unsigned long flags; 582 unsigned long flags;
608 583
609 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == 584 if (notify_die(DIE_NMIWATCHDOG, str, regs, 0, 2, SIGINT) == NOTIFY_STOP)
610 NOTIFY_STOP)
611 return; 585 return;
612 586
613 flags = oops_begin(); 587 flags = oops_begin();
@@ -629,44 +603,44 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic)
629 do_exit(SIGBUS); 603 do_exit(SIGBUS);
630} 604}
631 605
632static void __kprobes do_trap(int trapnr, int signr, char *str, 606static void __kprobes
633 struct pt_regs * regs, long error_code, 607do_trap(int trapnr, int signr, char *str, struct pt_regs *regs,
634 siginfo_t *info) 608 long error_code, siginfo_t *info)
635{ 609{
636 struct task_struct *tsk = current; 610 struct task_struct *tsk = current;
637 611
638 if (user_mode(regs)) { 612 if (!user_mode(regs))
639 /* 613 goto kernel_trap;
640 * We want error_code and trap_no set for userspace
641 * faults and kernelspace faults which result in
642 * die(), but not kernelspace faults which are fixed
643 * up. die() gives the process no chance to handle
644 * the signal and notice the kernel fault information,
645 * so that won't result in polluting the information
646 * about previously queued, but not yet delivered,
647 * faults. See also do_general_protection below.
648 */
649 tsk->thread.error_code = error_code;
650 tsk->thread.trap_no = trapnr;
651
652 if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
653 printk_ratelimit()) {
654 printk(KERN_INFO
655 "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
656 tsk->comm, tsk->pid, str,
657 regs->ip, regs->sp, error_code);
658 print_vma_addr(" in ", regs->ip);
659 printk("\n");
660 }
661 614
662 if (info) 615 /*
663 force_sig_info(signr, info, tsk); 616 * We want error_code and trap_no set for userspace faults and
664 else 617 * kernelspace faults which result in die(), but not
665 force_sig(signr, tsk); 618 * kernelspace faults which are fixed up. die() gives the
666 return; 619 * process no chance to handle the signal and notice the
620 * kernel fault information, so that won't result in polluting
621 * the information about previously queued, but not yet
622 * delivered, faults. See also do_general_protection below.
623 */
624 tsk->thread.error_code = error_code;
625 tsk->thread.trap_no = trapnr;
626
627 if (show_unhandled_signals && unhandled_signal(tsk, signr) &&
628 printk_ratelimit()) {
629 printk(KERN_INFO
630 "%s[%d] trap %s ip:%lx sp:%lx error:%lx",
631 tsk->comm, tsk->pid, str,
632 regs->ip, regs->sp, error_code);
633 print_vma_addr(" in ", regs->ip);
634 printk("\n");
667 } 635 }
668 636
637 if (info)
638 force_sig_info(signr, info, tsk);
639 else
640 force_sig(signr, tsk);
641 return;
669 642
643kernel_trap:
670 if (!fixup_exception(regs)) { 644 if (!fixup_exception(regs)) {
671 tsk->thread.error_code = error_code; 645 tsk->thread.error_code = error_code;
672 tsk->thread.trap_no = trapnr; 646 tsk->thread.trap_no = trapnr;
@@ -676,38 +650,38 @@ static void __kprobes do_trap(int trapnr, int signr, char *str,
676} 650}
677 651
678#define DO_ERROR(trapnr, signr, str, name) \ 652#define DO_ERROR(trapnr, signr, str, name) \
679asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ 653asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
680{ \ 654{ \
681 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 655 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
682 == NOTIFY_STOP) \ 656 == NOTIFY_STOP) \
683 return; \ 657 return; \
684 conditional_sti(regs); \ 658 conditional_sti(regs); \
685 do_trap(trapnr, signr, str, regs, error_code, NULL); \ 659 do_trap(trapnr, signr, str, regs, error_code, NULL); \
686} 660}
687 661
688#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \ 662#define DO_ERROR_INFO(trapnr, signr, str, name, sicode, siaddr) \
689asmlinkage void do_##name(struct pt_regs * regs, long error_code) \ 663asmlinkage void do_##name(struct pt_regs * regs, long error_code) \
690{ \ 664{ \
691 siginfo_t info; \ 665 siginfo_t info; \
692 info.si_signo = signr; \ 666 info.si_signo = signr; \
693 info.si_errno = 0; \ 667 info.si_errno = 0; \
694 info.si_code = sicode; \ 668 info.si_code = sicode; \
695 info.si_addr = (void __user *)siaddr; \ 669 info.si_addr = (void __user *)siaddr; \
696 trace_hardirqs_fixup(); \ 670 trace_hardirqs_fixup(); \
697 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \ 671 if (notify_die(DIE_TRAP, str, regs, error_code, trapnr, signr) \
698 == NOTIFY_STOP) \ 672 == NOTIFY_STOP) \
699 return; \ 673 return; \
700 conditional_sti(regs); \ 674 conditional_sti(regs); \
701 do_trap(trapnr, signr, str, regs, error_code, &info); \ 675 do_trap(trapnr, signr, str, regs, error_code, &info); \
702} 676}
703 677
704DO_ERROR_INFO( 0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip) 678DO_ERROR_INFO(0, SIGFPE, "divide error", divide_error, FPE_INTDIV, regs->ip)
705DO_ERROR( 4, SIGSEGV, "overflow", overflow) 679DO_ERROR(4, SIGSEGV, "overflow", overflow)
706DO_ERROR( 5, SIGSEGV, "bounds", bounds) 680DO_ERROR(5, SIGSEGV, "bounds", bounds)
707DO_ERROR_INFO( 6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip) 681DO_ERROR_INFO(6, SIGILL, "invalid opcode", invalid_op, ILL_ILLOPN, regs->ip)
708DO_ERROR( 9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun) 682DO_ERROR(9, SIGFPE, "coprocessor segment overrun", coprocessor_segment_overrun)
709DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS) 683DO_ERROR(10, SIGSEGV, "invalid TSS", invalid_TSS)
710DO_ERROR(11, SIGBUS, "segment not present", segment_not_present) 684DO_ERROR(11, SIGBUS, "segment not present", segment_not_present)
711DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0) 685DO_ERROR_INFO(17, SIGBUS, "alignment check", alignment_check, BUS_ADRALN, 0)
712 686
713/* Runs on IST stack */ 687/* Runs on IST stack */
@@ -738,31 +712,34 @@ asmlinkage void do_double_fault(struct pt_regs * regs, long error_code)
738 die(str, regs, error_code); 712 die(str, regs, error_code);
739} 713}
740 714
741asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, 715asmlinkage void __kprobes
742 long error_code) 716do_general_protection(struct pt_regs *regs, long error_code)
743{ 717{
744 struct task_struct *tsk = current; 718 struct task_struct *tsk;
745 719
746 conditional_sti(regs); 720 conditional_sti(regs);
747 721
748 if (user_mode(regs)) { 722 tsk = current;
749 tsk->thread.error_code = error_code; 723 if (!user_mode(regs))
750 tsk->thread.trap_no = 13; 724 goto gp_in_kernel;
751
752 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
753 printk_ratelimit()) {
754 printk(KERN_INFO
755 "%s[%d] general protection ip:%lx sp:%lx error:%lx",
756 tsk->comm, tsk->pid,
757 regs->ip, regs->sp, error_code);
758 print_vma_addr(" in ", regs->ip);
759 printk("\n");
760 }
761 725
762 force_sig(SIGSEGV, tsk); 726 tsk->thread.error_code = error_code;
763 return; 727 tsk->thread.trap_no = 13;
764 }
765 728
729 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
730 printk_ratelimit()) {
731 printk(KERN_INFO
732 "%s[%d] general protection ip:%lx sp:%lx error:%lx",
733 tsk->comm, tsk->pid,
734 regs->ip, regs->sp, error_code);
735 print_vma_addr(" in ", regs->ip);
736 printk("\n");
737 }
738
739 force_sig(SIGSEGV, tsk);
740 return;
741
742gp_in_kernel:
766 if (fixup_exception(regs)) 743 if (fixup_exception(regs))
767 return; 744 return;
768 745
@@ -775,14 +752,14 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs,
775} 752}
776 753
777static notrace __kprobes void 754static notrace __kprobes void
778mem_parity_error(unsigned char reason, struct pt_regs * regs) 755mem_parity_error(unsigned char reason, struct pt_regs *regs)
779{ 756{
780 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", 757 printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n",
781 reason); 758 reason);
782 printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n"); 759 printk(KERN_EMERG "You have some hardware problem, likely on the PCI bus.\n");
783 760
784#if defined(CONFIG_EDAC) 761#if defined(CONFIG_EDAC)
785 if(edac_handler_set()) { 762 if (edac_handler_set()) {
786 edac_atomic_assert_error(); 763 edac_atomic_assert_error();
787 return; 764 return;
788 } 765 }
@@ -799,7 +776,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs)
799} 776}
800 777
801static notrace __kprobes void 778static notrace __kprobes void
802io_check_error(unsigned char reason, struct pt_regs * regs) 779io_check_error(unsigned char reason, struct pt_regs *regs)
803{ 780{
804 printk("NMI: IOCK error (debug interrupt?)\n"); 781 printk("NMI: IOCK error (debug interrupt?)\n");
805 show_registers(regs); 782 show_registers(regs);
@@ -829,14 +806,14 @@ unknown_nmi_error(unsigned char reason, struct pt_regs * regs)
829 806
830/* Runs on IST stack. This code must keep interrupts off all the time. 807/* Runs on IST stack. This code must keep interrupts off all the time.
831 Nested NMIs are prevented by the CPU. */ 808 Nested NMIs are prevented by the CPU. */
832asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs) 809asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs)
833{ 810{
834 unsigned char reason = 0; 811 unsigned char reason = 0;
835 int cpu; 812 int cpu;
836 813
837 cpu = smp_processor_id(); 814 cpu = smp_processor_id();
838 815
839 /* Only the BSP gets external NMIs from the system. */ 816 /* Only the BSP gets external NMIs from the system. */
840 if (!cpu) 817 if (!cpu)
841 reason = get_nmi_reason(); 818 reason = get_nmi_reason();
842 819
@@ -848,18 +825,17 @@ asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs)
848 * Ok, so this is none of the documented NMI sources, 825 * Ok, so this is none of the documented NMI sources,
849 * so it must be the NMI watchdog. 826 * so it must be the NMI watchdog.
850 */ 827 */
851 if (nmi_watchdog_tick(regs,reason)) 828 if (nmi_watchdog_tick(regs, reason))
852 return; 829 return;
853 if (!do_nmi_callback(regs,cpu)) 830 if (!do_nmi_callback(regs, cpu))
854 unknown_nmi_error(reason, regs); 831 unknown_nmi_error(reason, regs);
855 832
856 return; 833 return;
857 } 834 }
858 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) 835 if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
859 return; 836 return;
860 837
861 /* AK: following checks seem to be broken on modern chipsets. FIXME */ 838 /* AK: following checks seem to be broken on modern chipsets. FIXME */
862
863 if (reason & 0x80) 839 if (reason & 0x80)
864 mem_parity_error(reason, regs); 840 mem_parity_error(reason, regs);
865 if (reason & 0x40) 841 if (reason & 0x40)
@@ -870,9 +846,12 @@ asmlinkage notrace __kprobes void
870do_nmi(struct pt_regs *regs, long error_code) 846do_nmi(struct pt_regs *regs, long error_code)
871{ 847{
872 nmi_enter(); 848 nmi_enter();
849
873 add_pda(__nmi_count, 1); 850 add_pda(__nmi_count, 1);
851
874 if (!ignore_nmis) 852 if (!ignore_nmis)
875 default_do_nmi(regs); 853 default_do_nmi(regs);
854
876 nmi_exit(); 855 nmi_exit();
877} 856}
878 857
@@ -889,13 +868,14 @@ void restart_nmi(void)
889} 868}
890 869
891/* runs on IST stack. */ 870/* runs on IST stack. */
892asmlinkage void __kprobes do_int3(struct pt_regs * regs, long error_code) 871asmlinkage void __kprobes do_int3(struct pt_regs *regs, long error_code)
893{ 872{
894 trace_hardirqs_fixup(); 873 trace_hardirqs_fixup();
895 874
896 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP) == NOTIFY_STOP) { 875 if (notify_die(DIE_INT3, "int3", regs, error_code, 3, SIGTRAP)
876 == NOTIFY_STOP)
897 return; 877 return;
898 } 878
899 preempt_conditional_sti(regs); 879 preempt_conditional_sti(regs);
900 do_trap(3, SIGTRAP, "int3", regs, error_code, NULL); 880 do_trap(3, SIGTRAP, "int3", regs, error_code, NULL);
901 preempt_conditional_cli(regs); 881 preempt_conditional_cli(regs);
@@ -926,8 +906,8 @@ asmlinkage __kprobes struct pt_regs *sync_regs(struct pt_regs *eregs)
926asmlinkage void __kprobes do_debug(struct pt_regs * regs, 906asmlinkage void __kprobes do_debug(struct pt_regs * regs,
927 unsigned long error_code) 907 unsigned long error_code)
928{ 908{
929 unsigned long condition;
930 struct task_struct *tsk = current; 909 struct task_struct *tsk = current;
910 unsigned long condition;
931 siginfo_t info; 911 siginfo_t info;
932 912
933 trace_hardirqs_fixup(); 913 trace_hardirqs_fixup();
@@ -948,21 +928,19 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs,
948 928
949 /* Mask out spurious debug traps due to lazy DR7 setting */ 929 /* Mask out spurious debug traps due to lazy DR7 setting */
950 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) { 930 if (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) {
951 if (!tsk->thread.debugreg7) { 931 if (!tsk->thread.debugreg7)
952 goto clear_dr7; 932 goto clear_dr7;
953 }
954 } 933 }
955 934
956 tsk->thread.debugreg6 = condition; 935 tsk->thread.debugreg6 = condition;
957 936
958
959 /* 937 /*
960 * Single-stepping through TF: make sure we ignore any events in 938 * Single-stepping through TF: make sure we ignore any events in
961 * kernel space (but re-enable TF when returning to user mode). 939 * kernel space (but re-enable TF when returning to user mode).
962 */ 940 */
963 if (condition & DR_STEP) { 941 if (condition & DR_STEP) {
964 if (!user_mode(regs)) 942 if (!user_mode(regs))
965 goto clear_TF_reenable; 943 goto clear_TF_reenable;
966 } 944 }
967 945
968 /* Ok, finally something we can handle */ 946 /* Ok, finally something we can handle */
@@ -975,7 +953,7 @@ asmlinkage void __kprobes do_debug(struct pt_regs * regs,
975 force_sig_info(SIGTRAP, &info, tsk); 953 force_sig_info(SIGTRAP, &info, tsk);
976 954
977clear_dr7: 955clear_dr7:
978 set_debugreg(0UL, 7); 956 set_debugreg(0, 7);
979 preempt_conditional_cli(regs); 957 preempt_conditional_cli(regs);
980 return; 958 return;
981 959
@@ -983,6 +961,7 @@ clear_TF_reenable:
983 set_tsk_thread_flag(tsk, TIF_SINGLESTEP); 961 set_tsk_thread_flag(tsk, TIF_SINGLESTEP);
984 regs->flags &= ~X86_EFLAGS_TF; 962 regs->flags &= ~X86_EFLAGS_TF;
985 preempt_conditional_cli(regs); 963 preempt_conditional_cli(regs);
964 return;
986} 965}
987 966
988static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr) 967static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
@@ -1005,7 +984,7 @@ static int kernel_math_error(struct pt_regs *regs, const char *str, int trapnr)
1005asmlinkage void do_coprocessor_error(struct pt_regs *regs) 984asmlinkage void do_coprocessor_error(struct pt_regs *regs)
1006{ 985{
1007 void __user *ip = (void __user *)(regs->ip); 986 void __user *ip = (void __user *)(regs->ip);
1008 struct task_struct * task; 987 struct task_struct *task;
1009 siginfo_t info; 988 siginfo_t info;
1010 unsigned short cwd, swd; 989 unsigned short cwd, swd;
1011 990
@@ -1038,30 +1017,30 @@ asmlinkage void do_coprocessor_error(struct pt_regs *regs)
1038 cwd = get_fpu_cwd(task); 1017 cwd = get_fpu_cwd(task);
1039 swd = get_fpu_swd(task); 1018 swd = get_fpu_swd(task);
1040 switch (swd & ~cwd & 0x3f) { 1019 switch (swd & ~cwd & 0x3f) {
1041 case 0x000: 1020 case 0x000: /* No unmasked exception */
1042 default: 1021 default: /* Multiple exceptions */
1043 break; 1022 break;
1044 case 0x001: /* Invalid Op */ 1023 case 0x001: /* Invalid Op */
1045 /* 1024 /*
1046 * swd & 0x240 == 0x040: Stack Underflow 1025 * swd & 0x240 == 0x040: Stack Underflow
1047 * swd & 0x240 == 0x240: Stack Overflow 1026 * swd & 0x240 == 0x240: Stack Overflow
1048 * User must clear the SF bit (0x40) if set 1027 * User must clear the SF bit (0x40) if set
1049 */ 1028 */
1050 info.si_code = FPE_FLTINV; 1029 info.si_code = FPE_FLTINV;
1051 break; 1030 break;
1052 case 0x002: /* Denormalize */ 1031 case 0x002: /* Denormalize */
1053 case 0x010: /* Underflow */ 1032 case 0x010: /* Underflow */
1054 info.si_code = FPE_FLTUND; 1033 info.si_code = FPE_FLTUND;
1055 break; 1034 break;
1056 case 0x004: /* Zero Divide */ 1035 case 0x004: /* Zero Divide */
1057 info.si_code = FPE_FLTDIV; 1036 info.si_code = FPE_FLTDIV;
1058 break; 1037 break;
1059 case 0x008: /* Overflow */ 1038 case 0x008: /* Overflow */
1060 info.si_code = FPE_FLTOVF; 1039 info.si_code = FPE_FLTOVF;
1061 break; 1040 break;
1062 case 0x020: /* Precision */ 1041 case 0x020: /* Precision */
1063 info.si_code = FPE_FLTRES; 1042 info.si_code = FPE_FLTRES;
1064 break; 1043 break;
1065 } 1044 }
1066 force_sig_info(SIGFPE, &info, task); 1045 force_sig_info(SIGFPE, &info, task);
1067} 1046}
@@ -1074,7 +1053,7 @@ asmlinkage void bad_intr(void)
1074asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs) 1053asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
1075{ 1054{
1076 void __user *ip = (void __user *)(regs->ip); 1055 void __user *ip = (void __user *)(regs->ip);
1077 struct task_struct * task; 1056 struct task_struct *task;
1078 siginfo_t info; 1057 siginfo_t info;
1079 unsigned short mxcsr; 1058 unsigned short mxcsr;
1080 1059
@@ -1102,25 +1081,25 @@ asmlinkage void do_simd_coprocessor_error(struct pt_regs *regs)
1102 */ 1081 */
1103 mxcsr = get_fpu_mxcsr(task); 1082 mxcsr = get_fpu_mxcsr(task);
1104 switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) { 1083 switch (~((mxcsr & 0x1f80) >> 7) & (mxcsr & 0x3f)) {
1105 case 0x000: 1084 case 0x000:
1106 default: 1085 default:
1107 break; 1086 break;
1108 case 0x001: /* Invalid Op */ 1087 case 0x001: /* Invalid Op */
1109 info.si_code = FPE_FLTINV; 1088 info.si_code = FPE_FLTINV;
1110 break; 1089 break;
1111 case 0x002: /* Denormalize */ 1090 case 0x002: /* Denormalize */
1112 case 0x010: /* Underflow */ 1091 case 0x010: /* Underflow */
1113 info.si_code = FPE_FLTUND; 1092 info.si_code = FPE_FLTUND;
1114 break; 1093 break;
1115 case 0x004: /* Zero Divide */ 1094 case 0x004: /* Zero Divide */
1116 info.si_code = FPE_FLTDIV; 1095 info.si_code = FPE_FLTDIV;
1117 break; 1096 break;
1118 case 0x008: /* Overflow */ 1097 case 0x008: /* Overflow */
1119 info.si_code = FPE_FLTOVF; 1098 info.si_code = FPE_FLTOVF;
1120 break; 1099 break;
1121 case 0x020: /* Precision */ 1100 case 0x020: /* Precision */
1122 info.si_code = FPE_FLTRES; 1101 info.si_code = FPE_FLTRES;
1123 break; 1102 break;
1124 } 1103 }
1125 force_sig_info(SIGFPE, &info, task); 1104 force_sig_info(SIGFPE, &info, task);
1126} 1105}
@@ -1138,7 +1117,7 @@ asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void)
1138} 1117}
1139 1118
1140/* 1119/*
1141 * 'math_state_restore()' saves the current math information in the 1120 * 'math_state_restore()' saves the current math information in the
1142 * old math state array, and gets the new ones from the current task 1121 * old math state array, and gets the new ones from the current task
1143 * 1122 *
1144 * Careful.. There are problems with IBM-designed IRQ13 behaviour. 1123 * Careful.. There are problems with IBM-designed IRQ13 behaviour.
@@ -1163,7 +1142,7 @@ asmlinkage void math_state_restore(void)
1163 local_irq_disable(); 1142 local_irq_disable();
1164 } 1143 }
1165 1144
1166 clts(); /* Allow maths ops (or we recurse) */ 1145 clts(); /* Allow maths ops (or we recurse) */
1167 restore_fpu_checking(&me->thread.xstate->fxsave); 1146 restore_fpu_checking(&me->thread.xstate->fxsave);
1168 task_thread_info(me)->status |= TS_USEDFPU; 1147 task_thread_info(me)->status |= TS_USEDFPU;
1169 me->fpu_counter++; 1148 me->fpu_counter++;
@@ -1172,64 +1151,61 @@ EXPORT_SYMBOL_GPL(math_state_restore);
1172 1151
1173void __init trap_init(void) 1152void __init trap_init(void)
1174{ 1153{
1175 set_intr_gate(0,&divide_error); 1154 set_intr_gate(0, &divide_error);
1176 set_intr_gate_ist(1,&debug,DEBUG_STACK); 1155 set_intr_gate_ist(1, &debug, DEBUG_STACK);
1177 set_intr_gate_ist(2,&nmi,NMI_STACK); 1156 set_intr_gate_ist(2, &nmi, NMI_STACK);
1178 set_system_gate_ist(3,&int3,DEBUG_STACK); /* int3 can be called from all */ 1157 set_system_gate_ist(3, &int3, DEBUG_STACK); /* int3 can be called from all */
1179 set_system_gate(4,&overflow); /* int4 can be called from all */ 1158 set_system_gate(4, &overflow); /* int4 can be called from all */
1180 set_intr_gate(5,&bounds); 1159 set_intr_gate(5, &bounds);
1181 set_intr_gate(6,&invalid_op); 1160 set_intr_gate(6, &invalid_op);
1182 set_intr_gate(7,&device_not_available); 1161 set_intr_gate(7, &device_not_available);
1183 set_intr_gate_ist(8,&double_fault, DOUBLEFAULT_STACK); 1162 set_intr_gate_ist(8, &double_fault, DOUBLEFAULT_STACK);
1184 set_intr_gate(9,&coprocessor_segment_overrun); 1163 set_intr_gate(9, &coprocessor_segment_overrun);
1185 set_intr_gate(10,&invalid_TSS); 1164 set_intr_gate(10, &invalid_TSS);
1186 set_intr_gate(11,&segment_not_present); 1165 set_intr_gate(11, &segment_not_present);
1187 set_intr_gate_ist(12,&stack_segment,STACKFAULT_STACK); 1166 set_intr_gate_ist(12, &stack_segment, STACKFAULT_STACK);
1188 set_intr_gate(13,&general_protection); 1167 set_intr_gate(13, &general_protection);
1189 set_intr_gate(14,&page_fault); 1168 set_intr_gate(14, &page_fault);
1190 set_intr_gate(15,&spurious_interrupt_bug); 1169 set_intr_gate(15, &spurious_interrupt_bug);
1191 set_intr_gate(16,&coprocessor_error); 1170 set_intr_gate(16, &coprocessor_error);
1192 set_intr_gate(17,&alignment_check); 1171 set_intr_gate(17, &alignment_check);
1193#ifdef CONFIG_X86_MCE 1172#ifdef CONFIG_X86_MCE
1194 set_intr_gate_ist(18,&machine_check, MCE_STACK); 1173 set_intr_gate_ist(18, &machine_check, MCE_STACK);
1195#endif 1174#endif
1196 set_intr_gate(19,&simd_coprocessor_error); 1175 set_intr_gate(19, &simd_coprocessor_error);
1197 1176
1198#ifdef CONFIG_IA32_EMULATION 1177#ifdef CONFIG_IA32_EMULATION
1199 set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); 1178 set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
1200#endif 1179#endif
1201
1202 /* 1180 /*
1203 * initialize the per thread extended state: 1181 * initialize the per thread extended state:
1204 */ 1182 */
1205 init_thread_xstate(); 1183 init_thread_xstate();
1206 /* 1184 /*
1207 * Should be a barrier for any external CPU state. 1185 * Should be a barrier for any external CPU state:
1208 */ 1186 */
1209 cpu_init(); 1187 cpu_init();
1210} 1188}
1211 1189
1212
1213static int __init oops_setup(char *s) 1190static int __init oops_setup(char *s)
1214{ 1191{
1215 if (!s) 1192 if (!s)
1216 return -EINVAL; 1193 return -EINVAL;
1217 if (!strcmp(s, "panic")) 1194 if (!strcmp(s, "panic"))
1218 panic_on_oops = 1; 1195 panic_on_oops = 1;
1219 return 0; 1196 return 0;
1220} 1197}
1221early_param("oops", oops_setup); 1198early_param("oops", oops_setup);
1222 1199
1223static int __init kstack_setup(char *s) 1200static int __init kstack_setup(char *s)
1224{ 1201{
1225 if (!s) 1202 if (!s)
1226 return -EINVAL; 1203 return -EINVAL;
1227 kstack_depth_to_print = simple_strtoul(s,NULL,0); 1204 kstack_depth_to_print = simple_strtoul(s, NULL, 0);
1228 return 0; 1205 return 0;
1229} 1206}
1230early_param("kstack", kstack_setup); 1207early_param("kstack", kstack_setup);
1231 1208
1232
1233static int __init code_bytes_setup(char *s) 1209static int __init code_bytes_setup(char *s)
1234{ 1210{
1235 code_bytes = simple_strtoul(s, NULL, 0); 1211 code_bytes = simple_strtoul(s, NULL, 0);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
new file mode 100644
index 000000000000..7603c0553909
--- /dev/null
+++ b/arch/x86/kernel/tsc.c
@@ -0,0 +1,535 @@
1#include <linux/kernel.h>
2#include <linux/sched.h>
3#include <linux/init.h>
4#include <linux/module.h>
5#include <linux/timer.h>
6#include <linux/acpi_pmtmr.h>
7#include <linux/cpufreq.h>
8#include <linux/dmi.h>
9#include <linux/delay.h>
10#include <linux/clocksource.h>
11#include <linux/percpu.h>
12
13#include <asm/hpet.h>
14#include <asm/timer.h>
15#include <asm/vgtod.h>
16#include <asm/time.h>
17#include <asm/delay.h>
18
19unsigned int cpu_khz; /* TSC clocks / usec, not used here */
20EXPORT_SYMBOL(cpu_khz);
21unsigned int tsc_khz;
22EXPORT_SYMBOL(tsc_khz);
23
24/*
25 * TSC can be unstable due to cpufreq or due to unsynced TSCs
26 */
27static int tsc_unstable;
28
29/* native_sched_clock() is called before tsc_init(), so
30 we must start with the TSC soft disabled to prevent
31 erroneous rdtsc usage on !cpu_has_tsc processors */
32static int tsc_disabled = -1;
33
34/*
35 * Scheduler clock - returns current time in nanosec units.
36 */
37u64 native_sched_clock(void)
38{
39 u64 this_offset;
40
41 /*
42 * Fall back to jiffies if there's no TSC available:
43 * ( But note that we still use it if the TSC is marked
44 * unstable. We do this because unlike Time Of Day,
45 * the scheduler clock tolerates small errors and it's
46 * very important for it to be as fast as the platform
47 * can achive it. )
48 */
49 if (unlikely(tsc_disabled)) {
50 /* No locking but a rare wrong value is not a big deal: */
51 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
52 }
53
54 /* read the Time Stamp Counter: */
55 rdtscll(this_offset);
56
57 /* return the value in ns */
58 return cycles_2_ns(this_offset);
59}
60
61/* We need to define a real function for sched_clock, to override the
62 weak default version */
63#ifdef CONFIG_PARAVIRT
64unsigned long long sched_clock(void)
65{
66 return paravirt_sched_clock();
67}
68#else
69unsigned long long
70sched_clock(void) __attribute__((alias("native_sched_clock")));
71#endif
72
73int check_tsc_unstable(void)
74{
75 return tsc_unstable;
76}
77EXPORT_SYMBOL_GPL(check_tsc_unstable);
78
79#ifdef CONFIG_X86_TSC
80int __init notsc_setup(char *str)
81{
82 printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
83 "cannot disable TSC completely.\n");
84 tsc_disabled = 1;
85 return 1;
86}
87#else
88/*
89 * disable flag for tsc. Takes effect by clearing the TSC cpu flag
90 * in cpu/common.c
91 */
92int __init notsc_setup(char *str)
93{
94 setup_clear_cpu_cap(X86_FEATURE_TSC);
95 return 1;
96}
97#endif
98
99__setup("notsc", notsc_setup);
100
101#define MAX_RETRIES 5
102#define SMI_TRESHOLD 50000
103
104/*
105 * Read TSC and the reference counters. Take care of SMI disturbance
106 */
107static u64 __init tsc_read_refs(u64 *pm, u64 *hpet)
108{
109 u64 t1, t2;
110 int i;
111
112 for (i = 0; i < MAX_RETRIES; i++) {
113 t1 = get_cycles();
114 if (hpet)
115 *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
116 else
117 *pm = acpi_pm_read_early();
118 t2 = get_cycles();
119 if ((t2 - t1) < SMI_TRESHOLD)
120 return t2;
121 }
122 return ULLONG_MAX;
123}
124
125/**
126 * native_calibrate_tsc - calibrate the tsc on boot
127 */
128unsigned long native_calibrate_tsc(void)
129{
130 unsigned long flags;
131 u64 tsc1, tsc2, tr1, tr2, delta, pm1, pm2, hpet1, hpet2;
132 int hpet = is_hpet_enabled();
133 unsigned int tsc_khz_val = 0;
134
135 local_irq_save(flags);
136
137 tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL);
138
139 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
140
141 outb(0xb0, 0x43);
142 outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
143 outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42);
144 tr1 = get_cycles();
145 while ((inb(0x61) & 0x20) == 0);
146 tr2 = get_cycles();
147
148 tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL);
149
150 local_irq_restore(flags);
151
152 /*
153 * Preset the result with the raw and inaccurate PIT
154 * calibration value
155 */
156 delta = (tr2 - tr1);
157 do_div(delta, 50);
158 tsc_khz_val = delta;
159
160 /* hpet or pmtimer available ? */
161 if (!hpet && !pm1 && !pm2) {
162 printk(KERN_INFO "TSC calibrated against PIT\n");
163 goto out;
164 }
165
166 /* Check, whether the sampling was disturbed by an SMI */
167 if (tsc1 == ULLONG_MAX || tsc2 == ULLONG_MAX) {
168 printk(KERN_WARNING "TSC calibration disturbed by SMI, "
169 "using PIT calibration result\n");
170 goto out;
171 }
172
173 tsc2 = (tsc2 - tsc1) * 1000000LL;
174
175 if (hpet) {
176 printk(KERN_INFO "TSC calibrated against HPET\n");
177 if (hpet2 < hpet1)
178 hpet2 += 0x100000000ULL;
179 hpet2 -= hpet1;
180 tsc1 = ((u64)hpet2 * hpet_readl(HPET_PERIOD));
181 do_div(tsc1, 1000000);
182 } else {
183 printk(KERN_INFO "TSC calibrated against PM_TIMER\n");
184 if (pm2 < pm1)
185 pm2 += (u64)ACPI_PM_OVRRUN;
186 pm2 -= pm1;
187 tsc1 = pm2 * 1000000000LL;
188 do_div(tsc1, PMTMR_TICKS_PER_SEC);
189 }
190
191 do_div(tsc2, tsc1);
192 tsc_khz_val = tsc2;
193
194out:
195 return tsc_khz_val;
196}
197
198
199#ifdef CONFIG_X86_32
200/* Only called from the Powernow K7 cpu freq driver */
201int recalibrate_cpu_khz(void)
202{
203#ifndef CONFIG_SMP
204 unsigned long cpu_khz_old = cpu_khz;
205
206 if (cpu_has_tsc) {
207 tsc_khz = calibrate_tsc();
208 cpu_khz = tsc_khz;
209 cpu_data(0).loops_per_jiffy =
210 cpufreq_scale(cpu_data(0).loops_per_jiffy,
211 cpu_khz_old, cpu_khz);
212 return 0;
213 } else
214 return -ENODEV;
215#else
216 return -ENODEV;
217#endif
218}
219
220EXPORT_SYMBOL(recalibrate_cpu_khz);
221
222#endif /* CONFIG_X86_32 */
223
224/* Accelerators for sched_clock()
225 * convert from cycles(64bits) => nanoseconds (64bits)
226 * basic equation:
227 * ns = cycles / (freq / ns_per_sec)
228 * ns = cycles * (ns_per_sec / freq)
229 * ns = cycles * (10^9 / (cpu_khz * 10^3))
230 * ns = cycles * (10^6 / cpu_khz)
231 *
232 * Then we use scaling math (suggested by george@mvista.com) to get:
233 * ns = cycles * (10^6 * SC / cpu_khz) / SC
234 * ns = cycles * cyc2ns_scale / SC
235 *
236 * And since SC is a constant power of two, we can convert the div
237 * into a shift.
238 *
239 * We can use khz divisor instead of mhz to keep a better precision, since
240 * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
241 * (mathieu.desnoyers@polymtl.ca)
242 *
243 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
244 */
245
246DEFINE_PER_CPU(unsigned long, cyc2ns);
247
248static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
249{
250 unsigned long long tsc_now, ns_now;
251 unsigned long flags, *scale;
252
253 local_irq_save(flags);
254 sched_clock_idle_sleep_event();
255
256 scale = &per_cpu(cyc2ns, cpu);
257
258 rdtscll(tsc_now);
259 ns_now = __cycles_2_ns(tsc_now);
260
261 if (cpu_khz)
262 *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
263
264 sched_clock_idle_wakeup_event(0);
265 local_irq_restore(flags);
266}
267
268#ifdef CONFIG_CPU_FREQ
269
270/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
271 * changes.
272 *
273 * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's
274 * not that important because current Opteron setups do not support
275 * scaling on SMP anyroads.
276 *
277 * Should fix up last_tsc too. Currently gettimeofday in the
278 * first tick after the change will be slightly wrong.
279 */
280
281static unsigned int ref_freq;
282static unsigned long loops_per_jiffy_ref;
283static unsigned long tsc_khz_ref;
284
285static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
286 void *data)
287{
288 struct cpufreq_freqs *freq = data;
289 unsigned long *lpj, dummy;
290
291 if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC))
292 return 0;
293
294 lpj = &dummy;
295 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
296#ifdef CONFIG_SMP
297 lpj = &cpu_data(freq->cpu).loops_per_jiffy;
298#else
299 lpj = &boot_cpu_data.loops_per_jiffy;
300#endif
301
302 if (!ref_freq) {
303 ref_freq = freq->old;
304 loops_per_jiffy_ref = *lpj;
305 tsc_khz_ref = tsc_khz;
306 }
307 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
308 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
309 (val == CPUFREQ_RESUMECHANGE)) {
310 *lpj = cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
311
312 tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
313 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
314 mark_tsc_unstable("cpufreq changes");
315 }
316
317 set_cyc2ns_scale(tsc_khz_ref, freq->cpu);
318
319 return 0;
320}
321
322static struct notifier_block time_cpufreq_notifier_block = {
323 .notifier_call = time_cpufreq_notifier
324};
325
326static int __init cpufreq_tsc(void)
327{
328 cpufreq_register_notifier(&time_cpufreq_notifier_block,
329 CPUFREQ_TRANSITION_NOTIFIER);
330 return 0;
331}
332
333core_initcall(cpufreq_tsc);
334
335#endif /* CONFIG_CPU_FREQ */
336
337/* clocksource code */
338
339static struct clocksource clocksource_tsc;
340
341/*
342 * We compare the TSC to the cycle_last value in the clocksource
343 * structure to avoid a nasty time-warp. This can be observed in a
344 * very small window right after one CPU updated cycle_last under
345 * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which
346 * is smaller than the cycle_last reference value due to a TSC which
347 * is slighty behind. This delta is nowhere else observable, but in
348 * that case it results in a forward time jump in the range of hours
349 * due to the unsigned delta calculation of the time keeping core
350 * code, which is necessary to support wrapping clocksources like pm
351 * timer.
352 */
353static cycle_t read_tsc(void)
354{
355 cycle_t ret = (cycle_t)get_cycles();
356
357 return ret >= clocksource_tsc.cycle_last ?
358 ret : clocksource_tsc.cycle_last;
359}
360
361#ifdef CONFIG_X86_64
362static cycle_t __vsyscall_fn vread_tsc(void)
363{
364 cycle_t ret = (cycle_t)vget_cycles();
365
366 return ret >= __vsyscall_gtod_data.clock.cycle_last ?
367 ret : __vsyscall_gtod_data.clock.cycle_last;
368}
369#endif
370
371static struct clocksource clocksource_tsc = {
372 .name = "tsc",
373 .rating = 300,
374 .read = read_tsc,
375 .mask = CLOCKSOURCE_MASK(64),
376 .shift = 22,
377 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
378 CLOCK_SOURCE_MUST_VERIFY,
379#ifdef CONFIG_X86_64
380 .vread = vread_tsc,
381#endif
382};
383
384void mark_tsc_unstable(char *reason)
385{
386 if (!tsc_unstable) {
387 tsc_unstable = 1;
388 printk("Marking TSC unstable due to %s\n", reason);
389 /* Change only the rating, when not registered */
390 if (clocksource_tsc.mult)
391 clocksource_change_rating(&clocksource_tsc, 0);
392 else
393 clocksource_tsc.rating = 0;
394 }
395}
396
397EXPORT_SYMBOL_GPL(mark_tsc_unstable);
398
399static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d)
400{
401 printk(KERN_NOTICE "%s detected: marking TSC unstable.\n",
402 d->ident);
403 tsc_unstable = 1;
404 return 0;
405}
406
407/* List of systems that have known TSC problems */
408static struct dmi_system_id __initdata bad_tsc_dmi_table[] = {
409 {
410 .callback = dmi_mark_tsc_unstable,
411 .ident = "IBM Thinkpad 380XD",
412 .matches = {
413 DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
414 DMI_MATCH(DMI_BOARD_NAME, "2635FA0"),
415 },
416 },
417 {}
418};
419
420/*
421 * Geode_LX - the OLPC CPU has a possibly a very reliable TSC
422 */
423#ifdef CONFIG_MGEODE_LX
424/* RTSC counts during suspend */
425#define RTSC_SUSP 0x100
426
427static void __init check_geode_tsc_reliable(void)
428{
429 unsigned long res_low, res_high;
430
431 rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
432 if (res_low & RTSC_SUSP)
433 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
434}
435#else
436static inline void check_geode_tsc_reliable(void) { }
437#endif
438
439/*
440 * Make an educated guess if the TSC is trustworthy and synchronized
441 * over all CPUs.
442 */
443__cpuinit int unsynchronized_tsc(void)
444{
445 if (!cpu_has_tsc || tsc_unstable)
446 return 1;
447
448#ifdef CONFIG_SMP
449 if (apic_is_clustered_box())
450 return 1;
451#endif
452
453 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
454 return 0;
455 /*
456 * Intel systems are normally all synchronized.
457 * Exceptions must mark TSC as unstable:
458 */
459 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
460 /* assume multi socket systems are not synchronized: */
461 if (num_possible_cpus() > 1)
462 tsc_unstable = 1;
463 }
464
465 return tsc_unstable;
466}
467
468static void __init init_tsc_clocksource(void)
469{
470 clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
471 clocksource_tsc.shift);
472 /* lower the rating if we already know its unstable: */
473 if (check_tsc_unstable()) {
474 clocksource_tsc.rating = 0;
475 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
476 }
477 clocksource_register(&clocksource_tsc);
478}
479
480void __init tsc_init(void)
481{
482 u64 lpj;
483 int cpu;
484
485 if (!cpu_has_tsc)
486 return;
487
488 tsc_khz = calibrate_tsc();
489 cpu_khz = tsc_khz;
490
491 if (!tsc_khz) {
492 mark_tsc_unstable("could not calculate TSC khz");
493 return;
494 }
495
496#ifdef CONFIG_X86_64
497 if (cpu_has(&boot_cpu_data, X86_FEATURE_CONSTANT_TSC) &&
498 (boot_cpu_data.x86_vendor == X86_VENDOR_AMD))
499 cpu_khz = calibrate_cpu();
500#endif
501
502 lpj = ((u64)tsc_khz * 1000);
503 do_div(lpj, HZ);
504 lpj_fine = lpj;
505
506 printk("Detected %lu.%03lu MHz processor.\n",
507 (unsigned long)cpu_khz / 1000,
508 (unsigned long)cpu_khz % 1000);
509
510 /*
511 * Secondary CPUs do not run through tsc_init(), so set up
512 * all the scale factors for all CPUs, assuming the same
513 * speed as the bootup CPU. (cpufreq notifiers will fix this
514 * up if their speed diverges)
515 */
516 for_each_possible_cpu(cpu)
517 set_cyc2ns_scale(cpu_khz, cpu);
518
519 if (tsc_disabled > 0)
520 return;
521
522 /* now allow native_sched_clock() to use rdtsc */
523 tsc_disabled = 0;
524
525 use_tsc_delay();
526 /* Check and install the TSC clocksource */
527 dmi_check_system(bad_tsc_dmi_table);
528
529 if (unsynchronized_tsc())
530 mark_tsc_unstable("TSCs unsynchronized");
531
532 check_geode_tsc_reliable();
533 init_tsc_clocksource();
534}
535
diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c
deleted file mode 100644
index 6240922e497c..000000000000
--- a/arch/x86/kernel/tsc_32.c
+++ /dev/null
@@ -1,455 +0,0 @@
1#include <linux/sched.h>
2#include <linux/clocksource.h>
3#include <linux/workqueue.h>
4#include <linux/delay.h>
5#include <linux/cpufreq.h>
6#include <linux/jiffies.h>
7#include <linux/init.h>
8#include <linux/dmi.h>
9#include <linux/percpu.h>
10
11#include <asm/delay.h>
12#include <asm/tsc.h>
13#include <asm/io.h>
14#include <asm/timer.h>
15
16#include "mach_timer.h"
17
18/* native_sched_clock() is called before tsc_init(), so
19 we must start with the TSC soft disabled to prevent
20 erroneous rdtsc usage on !cpu_has_tsc processors */
21static int tsc_disabled = -1;
22
23/*
24 * On some systems the TSC frequency does not
25 * change with the cpu frequency. So we need
26 * an extra value to store the TSC freq
27 */
28unsigned int tsc_khz;
29EXPORT_SYMBOL_GPL(tsc_khz);
30
31#ifdef CONFIG_X86_TSC
32static int __init tsc_setup(char *str)
33{
34 printk(KERN_WARNING "notsc: Kernel compiled with CONFIG_X86_TSC, "
35 "cannot disable TSC completely.\n");
36 tsc_disabled = 1;
37 return 1;
38}
39#else
40/*
41 * disable flag for tsc. Takes effect by clearing the TSC cpu flag
42 * in cpu/common.c
43 */
44static int __init tsc_setup(char *str)
45{
46 setup_clear_cpu_cap(X86_FEATURE_TSC);
47 return 1;
48}
49#endif
50
51__setup("notsc", tsc_setup);
52
53/*
54 * code to mark and check if the TSC is unstable
55 * due to cpufreq or due to unsynced TSCs
56 */
57static int tsc_unstable;
58
59int check_tsc_unstable(void)
60{
61 return tsc_unstable;
62}
63EXPORT_SYMBOL_GPL(check_tsc_unstable);
64
65/* Accelerators for sched_clock()
66 * convert from cycles(64bits) => nanoseconds (64bits)
67 * basic equation:
68 * ns = cycles / (freq / ns_per_sec)
69 * ns = cycles * (ns_per_sec / freq)
70 * ns = cycles * (10^9 / (cpu_khz * 10^3))
71 * ns = cycles * (10^6 / cpu_khz)
72 *
73 * Then we use scaling math (suggested by george@mvista.com) to get:
74 * ns = cycles * (10^6 * SC / cpu_khz) / SC
75 * ns = cycles * cyc2ns_scale / SC
76 *
77 * And since SC is a constant power of two, we can convert the div
78 * into a shift.
79 *
80 * We can use khz divisor instead of mhz to keep a better precision, since
81 * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
82 * (mathieu.desnoyers@polymtl.ca)
83 *
84 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
85 */
86
87DEFINE_PER_CPU(unsigned long, cyc2ns);
88
89static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
90{
91 unsigned long long tsc_now, ns_now;
92 unsigned long flags, *scale;
93
94 local_irq_save(flags);
95 sched_clock_idle_sleep_event();
96
97 scale = &per_cpu(cyc2ns, cpu);
98
99 rdtscll(tsc_now);
100 ns_now = __cycles_2_ns(tsc_now);
101
102 if (cpu_khz)
103 *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
104
105 /*
106 * Start smoothly with the new frequency:
107 */
108 sched_clock_idle_wakeup_event(0);
109 local_irq_restore(flags);
110}
111
112/*
113 * Scheduler clock - returns current time in nanosec units.
114 */
115unsigned long long native_sched_clock(void)
116{
117 unsigned long long this_offset;
118
119 /*
120 * Fall back to jiffies if there's no TSC available:
121 * ( But note that we still use it if the TSC is marked
122 * unstable. We do this because unlike Time Of Day,
123 * the scheduler clock tolerates small errors and it's
124 * very important for it to be as fast as the platform
125 * can achive it. )
126 */
127 if (unlikely(tsc_disabled))
128 /* No locking but a rare wrong value is not a big deal: */
129 return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ);
130
131 /* read the Time Stamp Counter: */
132 rdtscll(this_offset);
133
134 /* return the value in ns */
135 return cycles_2_ns(this_offset);
136}
137
138/* We need to define a real function for sched_clock, to override the
139 weak default version */
140#ifdef CONFIG_PARAVIRT
141unsigned long long sched_clock(void)
142{
143 return paravirt_sched_clock();
144}
145#else
146unsigned long long sched_clock(void)
147 __attribute__((alias("native_sched_clock")));
148#endif
149
150unsigned long native_calculate_cpu_khz(void)
151{
152 unsigned long long start, end;
153 unsigned long count;
154 u64 delta64 = (u64)ULLONG_MAX;
155 int i;
156 unsigned long flags;
157
158 local_irq_save(flags);
159
160 /* run 3 times to ensure the cache is warm and to get an accurate reading */
161 for (i = 0; i < 3; i++) {
162 mach_prepare_counter();
163 rdtscll(start);
164 mach_countup(&count);
165 rdtscll(end);
166
167 /*
168 * Error: ECTCNEVERSET
169 * The CTC wasn't reliable: we got a hit on the very first read,
170 * or the CPU was so fast/slow that the quotient wouldn't fit in
171 * 32 bits..
172 */
173 if (count <= 1)
174 continue;
175
176 /* cpu freq too slow: */
177 if ((end - start) <= CALIBRATE_TIME_MSEC)
178 continue;
179
180 /*
181 * We want the minimum time of all runs in case one of them
182 * is inaccurate due to SMI or other delay
183 */
184 delta64 = min(delta64, (end - start));
185 }
186
187 /* cpu freq too fast (or every run was bad): */
188 if (delta64 > (1ULL<<32))
189 goto err;
190
191 delta64 += CALIBRATE_TIME_MSEC/2; /* round for do_div */
192 do_div(delta64,CALIBRATE_TIME_MSEC);
193
194 local_irq_restore(flags);
195 return (unsigned long)delta64;
196err:
197 local_irq_restore(flags);
198 return 0;
199}
200
201int recalibrate_cpu_khz(void)
202{
203#ifndef CONFIG_SMP
204 unsigned long cpu_khz_old = cpu_khz;
205
206 if (cpu_has_tsc) {
207 cpu_khz = calculate_cpu_khz();
208 tsc_khz = cpu_khz;
209 cpu_data(0).loops_per_jiffy =
210 cpufreq_scale(cpu_data(0).loops_per_jiffy,
211 cpu_khz_old, cpu_khz);
212 return 0;
213 } else
214 return -ENODEV;
215#else
216 return -ENODEV;
217#endif
218}
219
220EXPORT_SYMBOL(recalibrate_cpu_khz);
221
222#ifdef CONFIG_CPU_FREQ
223
224/*
225 * if the CPU frequency is scaled, TSC-based delays will need a different
226 * loops_per_jiffy value to function properly.
227 */
228static unsigned int ref_freq;
229static unsigned long loops_per_jiffy_ref;
230static unsigned long cpu_khz_ref;
231
232static int
233time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data)
234{
235 struct cpufreq_freqs *freq = data;
236
237 if (!ref_freq) {
238 if (!freq->old){
239 ref_freq = freq->new;
240 return 0;
241 }
242 ref_freq = freq->old;
243 loops_per_jiffy_ref = cpu_data(freq->cpu).loops_per_jiffy;
244 cpu_khz_ref = cpu_khz;
245 }
246
247 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
248 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
249 (val == CPUFREQ_RESUMECHANGE)) {
250 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
251 cpu_data(freq->cpu).loops_per_jiffy =
252 cpufreq_scale(loops_per_jiffy_ref,
253 ref_freq, freq->new);
254
255 if (cpu_khz) {
256
257 if (num_online_cpus() == 1)
258 cpu_khz = cpufreq_scale(cpu_khz_ref,
259 ref_freq, freq->new);
260 if (!(freq->flags & CPUFREQ_CONST_LOOPS)) {
261 tsc_khz = cpu_khz;
262 set_cyc2ns_scale(cpu_khz, freq->cpu);
263 /*
264 * TSC based sched_clock turns
265 * to junk w/ cpufreq
266 */
267 mark_tsc_unstable("cpufreq changes");
268 }
269 }
270 }
271
272 return 0;
273}
274
275static struct notifier_block time_cpufreq_notifier_block = {
276 .notifier_call = time_cpufreq_notifier
277};
278
279static int __init cpufreq_tsc(void)
280{
281 return cpufreq_register_notifier(&time_cpufreq_notifier_block,
282 CPUFREQ_TRANSITION_NOTIFIER);
283}
284core_initcall(cpufreq_tsc);
285
286#endif
287
288/* clock source code */
289
290static struct clocksource clocksource_tsc;
291
292/*
293 * We compare the TSC to the cycle_last value in the clocksource
294 * structure to avoid a nasty time-warp issue. This can be observed in
295 * a very small window right after one CPU updated cycle_last under
296 * xtime lock and the other CPU reads a TSC value which is smaller
297 * than the cycle_last reference value due to a TSC which is slighty
298 * behind. This delta is nowhere else observable, but in that case it
299 * results in a forward time jump in the range of hours due to the
300 * unsigned delta calculation of the time keeping core code, which is
301 * necessary to support wrapping clocksources like pm timer.
302 */
303static cycle_t read_tsc(void)
304{
305 cycle_t ret;
306
307 rdtscll(ret);
308
309 return ret >= clocksource_tsc.cycle_last ?
310 ret : clocksource_tsc.cycle_last;
311}
312
313static struct clocksource clocksource_tsc = {
314 .name = "tsc",
315 .rating = 300,
316 .read = read_tsc,
317 .mask = CLOCKSOURCE_MASK(64),
318 .mult = 0, /* to be set */
319 .shift = 22,
320 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
321 CLOCK_SOURCE_MUST_VERIFY,
322};
323
324void mark_tsc_unstable(char *reason)
325{
326 if (!tsc_unstable) {
327 tsc_unstable = 1;
328 printk("Marking TSC unstable due to: %s.\n", reason);
329 /* Can be called before registration */
330 if (clocksource_tsc.mult)
331 clocksource_change_rating(&clocksource_tsc, 0);
332 else
333 clocksource_tsc.rating = 0;
334 }
335}
336EXPORT_SYMBOL_GPL(mark_tsc_unstable);
337
338static int __init dmi_mark_tsc_unstable(const struct dmi_system_id *d)
339{
340 printk(KERN_NOTICE "%s detected: marking TSC unstable.\n",
341 d->ident);
342 tsc_unstable = 1;
343 return 0;
344}
345
346/* List of systems that have known TSC problems */
347static struct dmi_system_id __initdata bad_tsc_dmi_table[] = {
348 {
349 .callback = dmi_mark_tsc_unstable,
350 .ident = "IBM Thinkpad 380XD",
351 .matches = {
352 DMI_MATCH(DMI_BOARD_VENDOR, "IBM"),
353 DMI_MATCH(DMI_BOARD_NAME, "2635FA0"),
354 },
355 },
356 {}
357};
358
359/*
360 * Make an educated guess if the TSC is trustworthy and synchronized
361 * over all CPUs.
362 */
363__cpuinit int unsynchronized_tsc(void)
364{
365 if (!cpu_has_tsc || tsc_unstable)
366 return 1;
367
368 /* Anything with constant TSC should be synchronized */
369 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
370 return 0;
371
372 /*
373 * Intel systems are normally all synchronized.
374 * Exceptions must mark TSC as unstable:
375 */
376 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) {
377 /* assume multi socket systems are not synchronized: */
378 if (num_possible_cpus() > 1)
379 tsc_unstable = 1;
380 }
381 return tsc_unstable;
382}
383
384/*
385 * Geode_LX - the OLPC CPU has a possibly a very reliable TSC
386 */
387#ifdef CONFIG_MGEODE_LX
388/* RTSC counts during suspend */
389#define RTSC_SUSP 0x100
390
391static void __init check_geode_tsc_reliable(void)
392{
393 unsigned long res_low, res_high;
394
395 rdmsr_safe(MSR_GEODE_BUSCONT_CONF0, &res_low, &res_high);
396 if (res_low & RTSC_SUSP)
397 clocksource_tsc.flags &= ~CLOCK_SOURCE_MUST_VERIFY;
398}
399#else
400static inline void check_geode_tsc_reliable(void) { }
401#endif
402
403
404void __init tsc_init(void)
405{
406 int cpu;
407 u64 lpj;
408
409 if (!cpu_has_tsc || tsc_disabled > 0)
410 return;
411
412 cpu_khz = calculate_cpu_khz();
413 tsc_khz = cpu_khz;
414
415 if (!cpu_khz) {
416 mark_tsc_unstable("could not calculate TSC khz");
417 return;
418 }
419
420 lpj = ((u64)tsc_khz * 1000);
421 do_div(lpj, HZ);
422 lpj_fine = lpj;
423
424 /* now allow native_sched_clock() to use rdtsc */
425 tsc_disabled = 0;
426
427 printk("Detected %lu.%03lu MHz processor.\n",
428 (unsigned long)cpu_khz / 1000,
429 (unsigned long)cpu_khz % 1000);
430
431 /*
432 * Secondary CPUs do not run through tsc_init(), so set up
433 * all the scale factors for all CPUs, assuming the same
434 * speed as the bootup CPU. (cpufreq notifiers will fix this
435 * up if their speed diverges)
436 */
437 for_each_possible_cpu(cpu)
438 set_cyc2ns_scale(cpu_khz, cpu);
439
440 use_tsc_delay();
441
442 /* Check and install the TSC clocksource */
443 dmi_check_system(bad_tsc_dmi_table);
444
445 unsynchronized_tsc();
446 check_geode_tsc_reliable();
447 clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
448 clocksource_tsc.shift);
449 /* lower the rating if we already know its unstable: */
450 if (check_tsc_unstable()) {
451 clocksource_tsc.rating = 0;
452 clocksource_tsc.flags &= ~CLOCK_SOURCE_IS_CONTINUOUS;
453 }
454 clocksource_register(&clocksource_tsc);
455}
diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c
deleted file mode 100644
index 9898fb01edfd..000000000000
--- a/arch/x86/kernel/tsc_64.c
+++ /dev/null
@@ -1,357 +0,0 @@
1#include <linux/kernel.h>
2#include <linux/sched.h>
3#include <linux/interrupt.h>
4#include <linux/init.h>
5#include <linux/clocksource.h>
6#include <linux/time.h>
7#include <linux/acpi.h>
8#include <linux/cpufreq.h>
9#include <linux/acpi_pmtmr.h>
10
11#include <asm/hpet.h>
12#include <asm/timex.h>
13#include <asm/timer.h>
14#include <asm/vgtod.h>
15
16static int notsc __initdata = 0;
17
18unsigned int cpu_khz; /* TSC clocks / usec, not used here */
19EXPORT_SYMBOL(cpu_khz);
20unsigned int tsc_khz;
21EXPORT_SYMBOL(tsc_khz);
22
23/* Accelerators for sched_clock()
24 * convert from cycles(64bits) => nanoseconds (64bits)
25 * basic equation:
26 * ns = cycles / (freq / ns_per_sec)
27 * ns = cycles * (ns_per_sec / freq)
28 * ns = cycles * (10^9 / (cpu_khz * 10^3))
29 * ns = cycles * (10^6 / cpu_khz)
30 *
31 * Then we use scaling math (suggested by george@mvista.com) to get:
32 * ns = cycles * (10^6 * SC / cpu_khz) / SC
33 * ns = cycles * cyc2ns_scale / SC
34 *
35 * And since SC is a constant power of two, we can convert the div
36 * into a shift.
37 *
38 * We can use khz divisor instead of mhz to keep a better precision, since
39 * cyc2ns_scale is limited to 10^6 * 2^10, which fits in 32 bits.
40 * (mathieu.desnoyers@polymtl.ca)
41 *
42 * -johnstul@us.ibm.com "math is hard, lets go shopping!"
43 */
44DEFINE_PER_CPU(unsigned long, cyc2ns);
45
46static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
47{
48 unsigned long long tsc_now, ns_now;
49 unsigned long flags, *scale;
50
51 local_irq_save(flags);
52 sched_clock_idle_sleep_event();
53
54 scale = &per_cpu(cyc2ns, cpu);
55
56 rdtscll(tsc_now);
57 ns_now = __cycles_2_ns(tsc_now);
58
59 if (cpu_khz)
60 *scale = (NSEC_PER_MSEC << CYC2NS_SCALE_FACTOR)/cpu_khz;
61
62 sched_clock_idle_wakeup_event(0);
63 local_irq_restore(flags);
64}
65
66unsigned long long native_sched_clock(void)
67{
68 unsigned long a = 0;
69
70 /* Could do CPU core sync here. Opteron can execute rdtsc speculatively,
71 * which means it is not completely exact and may not be monotonous
72 * between CPUs. But the errors should be too small to matter for
73 * scheduling purposes.
74 */
75
76 rdtscll(a);
77 return cycles_2_ns(a);
78}
79
80/* We need to define a real function for sched_clock, to override the
81 weak default version */
82#ifdef CONFIG_PARAVIRT
83unsigned long long sched_clock(void)
84{
85 return paravirt_sched_clock();
86}
87#else
88unsigned long long
89sched_clock(void) __attribute__((alias("native_sched_clock")));
90#endif
91
92
93static int tsc_unstable;
94
95int check_tsc_unstable(void)
96{
97 return tsc_unstable;
98}
99EXPORT_SYMBOL_GPL(check_tsc_unstable);
100
101#ifdef CONFIG_CPU_FREQ
102
103/* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
104 * changes.
105 *
106 * RED-PEN: On SMP we assume all CPUs run with the same frequency. It's
107 * not that important because current Opteron setups do not support
108 * scaling on SMP anyroads.
109 *
110 * Should fix up last_tsc too. Currently gettimeofday in the
111 * first tick after the change will be slightly wrong.
112 */
113
114static unsigned int ref_freq;
115static unsigned long loops_per_jiffy_ref;
116static unsigned long tsc_khz_ref;
117
118static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val,
119 void *data)
120{
121 struct cpufreq_freqs *freq = data;
122 unsigned long *lpj, dummy;
123
124 if (cpu_has(&cpu_data(freq->cpu), X86_FEATURE_CONSTANT_TSC))
125 return 0;
126
127 lpj = &dummy;
128 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
129#ifdef CONFIG_SMP
130 lpj = &cpu_data(freq->cpu).loops_per_jiffy;
131#else
132 lpj = &boot_cpu_data.loops_per_jiffy;
133#endif
134
135 if (!ref_freq) {
136 ref_freq = freq->old;
137 loops_per_jiffy_ref = *lpj;
138 tsc_khz_ref = tsc_khz;
139 }
140 if ((val == CPUFREQ_PRECHANGE && freq->old < freq->new) ||
141 (val == CPUFREQ_POSTCHANGE && freq->old > freq->new) ||
142 (val == CPUFREQ_RESUMECHANGE)) {
143 *lpj =
144 cpufreq_scale(loops_per_jiffy_ref, ref_freq, freq->new);
145
146 tsc_khz = cpufreq_scale(tsc_khz_ref, ref_freq, freq->new);
147 if (!(freq->flags & CPUFREQ_CONST_LOOPS))
148 mark_tsc_unstable("cpufreq changes");
149 }
150
151 set_cyc2ns_scale(tsc_khz_ref, freq->cpu);
152
153 return 0;
154}
155
156static struct notifier_block time_cpufreq_notifier_block = {
157 .notifier_call = time_cpufreq_notifier
158};
159
160static int __init cpufreq_tsc(void)
161{
162 cpufreq_register_notifier(&time_cpufreq_notifier_block,
163 CPUFREQ_TRANSITION_NOTIFIER);
164 return 0;
165}
166
167core_initcall(cpufreq_tsc);
168
169#endif
170
171#define MAX_RETRIES 5
172#define SMI_TRESHOLD 50000
173
174/*
175 * Read TSC and the reference counters. Take care of SMI disturbance
176 */
177static unsigned long __init tsc_read_refs(unsigned long *pm,
178 unsigned long *hpet)
179{
180 unsigned long t1, t2;
181 int i;
182
183 for (i = 0; i < MAX_RETRIES; i++) {
184 t1 = get_cycles();
185 if (hpet)
186 *hpet = hpet_readl(HPET_COUNTER) & 0xFFFFFFFF;
187 else
188 *pm = acpi_pm_read_early();
189 t2 = get_cycles();
190 if ((t2 - t1) < SMI_TRESHOLD)
191 return t2;
192 }
193 return ULONG_MAX;
194}
195
196/**
197 * tsc_calibrate - calibrate the tsc on boot
198 */
199void __init tsc_calibrate(void)
200{
201 unsigned long flags, tsc1, tsc2, tr1, tr2, pm1, pm2, hpet1, hpet2;
202 int hpet = is_hpet_enabled(), cpu;
203
204 local_irq_save(flags);
205
206 tsc1 = tsc_read_refs(&pm1, hpet ? &hpet1 : NULL);
207
208 outb((inb(0x61) & ~0x02) | 0x01, 0x61);
209
210 outb(0xb0, 0x43);
211 outb((CLOCK_TICK_RATE / (1000 / 50)) & 0xff, 0x42);
212 outb((CLOCK_TICK_RATE / (1000 / 50)) >> 8, 0x42);
213 tr1 = get_cycles();
214 while ((inb(0x61) & 0x20) == 0);
215 tr2 = get_cycles();
216
217 tsc2 = tsc_read_refs(&pm2, hpet ? &hpet2 : NULL);
218
219 local_irq_restore(flags);
220
221 /*
222 * Preset the result with the raw and inaccurate PIT
223 * calibration value
224 */
225 tsc_khz = (tr2 - tr1) / 50;
226
227 /* hpet or pmtimer available ? */
228 if (!hpet && !pm1 && !pm2) {
229 printk(KERN_INFO "TSC calibrated against PIT\n");
230 goto out;
231 }
232
233 /* Check, whether the sampling was disturbed by an SMI */
234 if (tsc1 == ULONG_MAX || tsc2 == ULONG_MAX) {
235 printk(KERN_WARNING "TSC calibration disturbed by SMI, "
236 "using PIT calibration result\n");
237 goto out;
238 }
239
240 tsc2 = (tsc2 - tsc1) * 1000000L;
241
242 if (hpet) {
243 printk(KERN_INFO "TSC calibrated against HPET\n");
244 if (hpet2 < hpet1)
245 hpet2 += 0x100000000UL;
246 hpet2 -= hpet1;
247 tsc1 = (hpet2 * hpet_readl(HPET_PERIOD)) / 1000000;
248 } else {
249 printk(KERN_INFO "TSC calibrated against PM_TIMER\n");
250 if (pm2 < pm1)
251 pm2 += ACPI_PM_OVRRUN;
252 pm2 -= pm1;
253 tsc1 = (pm2 * 1000000000) / PMTMR_TICKS_PER_SEC;
254 }
255
256 tsc_khz = tsc2 / tsc1;
257
258out:
259 for_each_possible_cpu(cpu)
260 set_cyc2ns_scale(tsc_khz, cpu);
261}
262
263/*
264 * Make an educated guess if the TSC is trustworthy and synchronized
265 * over all CPUs.
266 */
267__cpuinit int unsynchronized_tsc(void)
268{
269 if (tsc_unstable)
270 return 1;
271
272#ifdef CONFIG_SMP
273 if (apic_is_clustered_box())
274 return 1;
275#endif
276
277 if (boot_cpu_has(X86_FEATURE_CONSTANT_TSC))
278 return 0;
279
280 /* Assume multi socket systems are not synchronized */
281 return num_present_cpus() > 1;
282}
283
284int __init notsc_setup(char *s)
285{
286 notsc = 1;
287 return 1;
288}
289
290__setup("notsc", notsc_setup);
291
292static struct clocksource clocksource_tsc;
293
294/*
295 * We compare the TSC to the cycle_last value in the clocksource
296 * structure to avoid a nasty time-warp. This can be observed in a
297 * very small window right after one CPU updated cycle_last under
298 * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which
299 * is smaller than the cycle_last reference value due to a TSC which
300 * is slighty behind. This delta is nowhere else observable, but in
301 * that case it results in a forward time jump in the range of hours
302 * due to the unsigned delta calculation of the time keeping core
303 * code, which is necessary to support wrapping clocksources like pm
304 * timer.
305 */
306static cycle_t read_tsc(void)
307{
308 cycle_t ret = (cycle_t)get_cycles();
309
310 return ret >= clocksource_tsc.cycle_last ?
311 ret : clocksource_tsc.cycle_last;
312}
313
314static cycle_t __vsyscall_fn vread_tsc(void)
315{
316 cycle_t ret = (cycle_t)vget_cycles();
317
318 return ret >= __vsyscall_gtod_data.clock.cycle_last ?
319 ret : __vsyscall_gtod_data.clock.cycle_last;
320}
321
322static struct clocksource clocksource_tsc = {
323 .name = "tsc",
324 .rating = 300,
325 .read = read_tsc,
326 .mask = CLOCKSOURCE_MASK(64),
327 .shift = 22,
328 .flags = CLOCK_SOURCE_IS_CONTINUOUS |
329 CLOCK_SOURCE_MUST_VERIFY,
330 .vread = vread_tsc,
331};
332
333void mark_tsc_unstable(char *reason)
334{
335 if (!tsc_unstable) {
336 tsc_unstable = 1;
337 printk("Marking TSC unstable due to %s\n", reason);
338 /* Change only the rating, when not registered */
339 if (clocksource_tsc.mult)
340 clocksource_change_rating(&clocksource_tsc, 0);
341 else
342 clocksource_tsc.rating = 0;
343 }
344}
345EXPORT_SYMBOL_GPL(mark_tsc_unstable);
346
347void __init init_tsc_clocksource(void)
348{
349 if (!notsc) {
350 clocksource_tsc.mult = clocksource_khz2mult(tsc_khz,
351 clocksource_tsc.shift);
352 if (check_tsc_unstable())
353 clocksource_tsc.rating = 0;
354
355 clocksource_register(&clocksource_tsc);
356 }
357}
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
new file mode 100644
index 000000000000..e94bdb6add1d
--- /dev/null
+++ b/arch/x86/kernel/visws_quirks.c
@@ -0,0 +1,709 @@
1/*
2 * SGI Visual Workstation support and quirks, unmaintained.
3 *
4 * Split out from setup.c by davej@suse.de
5 *
6 * Copyright (C) 1999 Bent Hagemark, Ingo Molnar
7 *
8 * SGI Visual Workstation interrupt controller
9 *
10 * The Cobalt system ASIC in the Visual Workstation contains a "Cobalt" APIC
11 * which serves as the main interrupt controller in the system. Non-legacy
12 * hardware in the system uses this controller directly. Legacy devices
13 * are connected to the PIIX4 which in turn has its 8259(s) connected to
14 * a of the Cobalt APIC entry.
15 *
16 * 09/02/2000 - Updated for 2.4 by jbarnes@sgi.com
17 *
18 * 25/11/2002 - Updated for 2.5 by Andrey Panin <pazke@orbita1.ru>
19 */
20#include <linux/interrupt.h>
21#include <linux/module.h>
22#include <linux/init.h>
23#include <linux/smp.h>
24
25#include <asm/visws/cobalt.h>
26#include <asm/visws/piix4.h>
27#include <asm/arch_hooks.h>
28#include <asm/fixmap.h>
29#include <asm/reboot.h>
30#include <asm/setup.h>
31#include <asm/e820.h>
32#include <asm/smp.h>
33#include <asm/io.h>
34
35#include <mach_ipi.h>
36
37#include "mach_apic.h"
38
39#include <linux/init.h>
40#include <linux/smp.h>
41
42#include <linux/kernel_stat.h>
43#include <linux/interrupt.h>
44#include <linux/init.h>
45
46#include <asm/io.h>
47#include <asm/apic.h>
48#include <asm/i8259.h>
49#include <asm/irq_vectors.h>
50#include <asm/visws/cobalt.h>
51#include <asm/visws/lithium.h>
52#include <asm/visws/piix4.h>
53
54#include <linux/sched.h>
55#include <linux/kernel.h>
56#include <linux/init.h>
57#include <linux/pci.h>
58#include <linux/pci_ids.h>
59
60extern int no_broadcast;
61
62#include <asm/io.h>
63#include <asm/apic.h>
64#include <asm/arch_hooks.h>
65#include <asm/visws/cobalt.h>
66#include <asm/visws/lithium.h>
67
68char visws_board_type = -1;
69char visws_board_rev = -1;
70
71int is_visws_box(void)
72{
73 return visws_board_type >= 0;
74}
75
76static int __init visws_time_init_quirk(void)
77{
78 printk(KERN_INFO "Starting Cobalt Timer system clock\n");
79
80 /* Set the countdown value */
81 co_cpu_write(CO_CPU_TIMEVAL, CO_TIME_HZ/HZ);
82
83 /* Start the timer */
84 co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) | CO_CTRL_TIMERUN);
85
86 /* Enable (unmask) the timer interrupt */
87 co_cpu_write(CO_CPU_CTRL, co_cpu_read(CO_CPU_CTRL) & ~CO_CTRL_TIMEMASK);
88
89 /*
90 * Zero return means the generic timer setup code will set up
91 * the standard vector:
92 */
93 return 0;
94}
95
96static int __init visws_pre_intr_init_quirk(void)
97{
98 init_VISWS_APIC_irqs();
99
100 /*
101 * We dont want ISA irqs to be set up by the generic code:
102 */
103 return 1;
104}
105
106/* Quirk for machine specific memory setup. */
107
108#define MB (1024 * 1024)
109
110unsigned long sgivwfb_mem_phys;
111unsigned long sgivwfb_mem_size;
112EXPORT_SYMBOL(sgivwfb_mem_phys);
113EXPORT_SYMBOL(sgivwfb_mem_size);
114
115long long mem_size __initdata = 0;
116
117static char * __init visws_memory_setup_quirk(void)
118{
119 long long gfx_mem_size = 8 * MB;
120
121 mem_size = boot_params.alt_mem_k;
122
123 if (!mem_size) {
124 printk(KERN_WARNING "Bootloader didn't set memory size, upgrade it !\n");
125 mem_size = 128 * MB;
126 }
127
128 /*
129 * this hardcodes the graphics memory to 8 MB
130 * it really should be sized dynamically (or at least
131 * set as a boot param)
132 */
133 if (!sgivwfb_mem_size) {
134 printk(KERN_WARNING "Defaulting to 8 MB framebuffer size\n");
135 sgivwfb_mem_size = 8 * MB;
136 }
137
138 /*
139 * Trim to nearest MB
140 */
141 sgivwfb_mem_size &= ~((1 << 20) - 1);
142 sgivwfb_mem_phys = mem_size - gfx_mem_size;
143
144 e820_add_region(0, LOWMEMSIZE(), E820_RAM);
145 e820_add_region(HIGH_MEMORY, mem_size - sgivwfb_mem_size - HIGH_MEMORY, E820_RAM);
146 e820_add_region(sgivwfb_mem_phys, sgivwfb_mem_size, E820_RESERVED);
147
148 return "PROM";
149}
150
151static void visws_machine_emergency_restart(void)
152{
153 /*
154 * Visual Workstations restart after this
155 * register is poked on the PIIX4
156 */
157 outb(PIIX4_RESET_VAL, PIIX4_RESET_PORT);
158}
159
160static void visws_machine_power_off(void)
161{
162 unsigned short pm_status;
163/* extern unsigned int pci_bus0; */
164
165 while ((pm_status = inw(PMSTS_PORT)) & 0x100)
166 outw(pm_status, PMSTS_PORT);
167
168 outw(PM_SUSPEND_ENABLE, PMCNTRL_PORT);
169
170 mdelay(10);
171
172#define PCI_CONF1_ADDRESS(bus, devfn, reg) \
173 (0x80000000 | (bus << 16) | (devfn << 8) | (reg & ~3))
174
175/* outl(PCI_CONF1_ADDRESS(pci_bus0, SPECIAL_DEV, SPECIAL_REG), 0xCF8); */
176 outl(PIIX_SPECIAL_STOP, 0xCFC);
177}
178
179static int __init visws_get_smp_config_quirk(unsigned int early)
180{
181 /*
182 * Prevent MP-table parsing by the generic code:
183 */
184 return 1;
185}
186
187extern unsigned int __cpuinitdata maxcpus;
188
189/*
190 * The Visual Workstation is Intel MP compliant in the hardware
191 * sense, but it doesn't have a BIOS(-configuration table).
192 * No problem for Linux.
193 */
194
195static void __init MP_processor_info (struct mpc_config_processor *m)
196{
197 int ver, logical_apicid;
198 physid_mask_t apic_cpus;
199
200 if (!(m->mpc_cpuflag & CPU_ENABLED))
201 return;
202
203 logical_apicid = m->mpc_apicid;
204 printk(KERN_INFO "%sCPU #%d %u:%u APIC version %d\n",
205 m->mpc_cpuflag & CPU_BOOTPROCESSOR ? "Bootup " : "",
206 m->mpc_apicid,
207 (m->mpc_cpufeature & CPU_FAMILY_MASK) >> 8,
208 (m->mpc_cpufeature & CPU_MODEL_MASK) >> 4,
209 m->mpc_apicver);
210
211 if (m->mpc_cpuflag & CPU_BOOTPROCESSOR)
212 boot_cpu_physical_apicid = m->mpc_apicid;
213
214 ver = m->mpc_apicver;
215 if ((ver >= 0x14 && m->mpc_apicid >= 0xff) || m->mpc_apicid >= 0xf) {
216 printk(KERN_ERR "Processor #%d INVALID. (Max ID: %d).\n",
217 m->mpc_apicid, MAX_APICS);
218 return;
219 }
220
221 apic_cpus = apicid_to_cpu_present(m->mpc_apicid);
222 physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus);
223 /*
224 * Validate version
225 */
226 if (ver == 0x0) {
227 printk(KERN_ERR "BIOS bug, APIC version is 0 for CPU#%d! "
228 "fixing up to 0x10. (tell your hw vendor)\n",
229 m->mpc_apicid);
230 ver = 0x10;
231 }
232 apic_version[m->mpc_apicid] = ver;
233}
234
235int __init visws_find_smp_config_quirk(unsigned int reserve)
236{
237 struct mpc_config_processor *mp = phys_to_virt(CO_CPU_TAB_PHYS);
238 unsigned short ncpus = readw(phys_to_virt(CO_CPU_NUM_PHYS));
239
240 if (ncpus > CO_CPU_MAX) {
241 printk(KERN_WARNING "find_visws_smp: got cpu count of %d at %p\n",
242 ncpus, mp);
243
244 ncpus = CO_CPU_MAX;
245 }
246
247 if (ncpus > maxcpus)
248 ncpus = maxcpus;
249
250#ifdef CONFIG_X86_LOCAL_APIC
251 smp_found_config = 1;
252#endif
253 while (ncpus--)
254 MP_processor_info(mp++);
255
256 mp_lapic_addr = APIC_DEFAULT_PHYS_BASE;
257
258 return 1;
259}
260
261extern int visws_trap_init_quirk(void);
262
263void __init visws_early_detect(void)
264{
265 int raw;
266
267 visws_board_type = (char)(inb_p(PIIX_GPI_BD_REG) & PIIX_GPI_BD_REG)
268 >> PIIX_GPI_BD_SHIFT;
269
270 if (visws_board_type < 0)
271 return;
272
273 /*
274 * Install special quirks for timer, interrupt and memory setup:
275 */
276 arch_time_init_quirk = visws_time_init_quirk;
277 arch_pre_intr_init_quirk = visws_pre_intr_init_quirk;
278 arch_memory_setup_quirk = visws_memory_setup_quirk;
279
280 /*
281 * Fall back to generic behavior for traps:
282 */
283 arch_intr_init_quirk = NULL;
284 arch_trap_init_quirk = visws_trap_init_quirk;
285
286 /*
287 * Install reboot quirks:
288 */
289 pm_power_off = visws_machine_power_off;
290 machine_ops.emergency_restart = visws_machine_emergency_restart;
291
292 /*
293 * Do not use broadcast IPIs:
294 */
295 no_broadcast = 0;
296
297 /*
298 * Override generic MP-table parsing:
299 */
300 mach_get_smp_config_quirk = visws_get_smp_config_quirk;
301 mach_find_smp_config_quirk = visws_find_smp_config_quirk;
302
303#ifdef CONFIG_X86_IO_APIC
304 /*
305 * Turn off IO-APIC detection and initialization:
306 */
307 skip_ioapic_setup = 1;
308#endif
309
310 /*
311 * Get Board rev.
312 * First, we have to initialize the 307 part to allow us access
313 * to the GPIO registers. Let's map them at 0x0fc0 which is right
314 * after the PIIX4 PM section.
315 */
316 outb_p(SIO_DEV_SEL, SIO_INDEX);
317 outb_p(SIO_GP_DEV, SIO_DATA); /* Talk to GPIO regs. */
318
319 outb_p(SIO_DEV_MSB, SIO_INDEX);
320 outb_p(SIO_GP_MSB, SIO_DATA); /* MSB of GPIO base address */
321
322 outb_p(SIO_DEV_LSB, SIO_INDEX);
323 outb_p(SIO_GP_LSB, SIO_DATA); /* LSB of GPIO base address */
324
325 outb_p(SIO_DEV_ENB, SIO_INDEX);
326 outb_p(1, SIO_DATA); /* Enable GPIO registers. */
327
328 /*
329 * Now, we have to map the power management section to write
330 * a bit which enables access to the GPIO registers.
331 * What lunatic came up with this shit?
332 */
333 outb_p(SIO_DEV_SEL, SIO_INDEX);
334 outb_p(SIO_PM_DEV, SIO_DATA); /* Talk to GPIO regs. */
335
336 outb_p(SIO_DEV_MSB, SIO_INDEX);
337 outb_p(SIO_PM_MSB, SIO_DATA); /* MSB of PM base address */
338
339 outb_p(SIO_DEV_LSB, SIO_INDEX);
340 outb_p(SIO_PM_LSB, SIO_DATA); /* LSB of PM base address */
341
342 outb_p(SIO_DEV_ENB, SIO_INDEX);
343 outb_p(1, SIO_DATA); /* Enable PM registers. */
344
345 /*
346 * Now, write the PM register which enables the GPIO registers.
347 */
348 outb_p(SIO_PM_FER2, SIO_PM_INDEX);
349 outb_p(SIO_PM_GP_EN, SIO_PM_DATA);
350
351 /*
352 * Now, initialize the GPIO registers.
353 * We want them all to be inputs which is the
354 * power on default, so let's leave them alone.
355 * So, let's just read the board rev!
356 */
357 raw = inb_p(SIO_GP_DATA1);
358 raw &= 0x7f; /* 7 bits of valid board revision ID. */
359
360 if (visws_board_type == VISWS_320) {
361 if (raw < 0x6) {
362 visws_board_rev = 4;
363 } else if (raw < 0xc) {
364 visws_board_rev = 5;
365 } else {
366 visws_board_rev = 6;
367 }
368 } else if (visws_board_type == VISWS_540) {
369 visws_board_rev = 2;
370 } else {
371 visws_board_rev = raw;
372 }
373
374 printk(KERN_INFO "Silicon Graphics Visual Workstation %s (rev %d) detected\n",
375 (visws_board_type == VISWS_320 ? "320" :
376 (visws_board_type == VISWS_540 ? "540" :
377 "unknown")), visws_board_rev);
378}
379
380#define A01234 (LI_INTA_0 | LI_INTA_1 | LI_INTA_2 | LI_INTA_3 | LI_INTA_4)
381#define BCD (LI_INTB | LI_INTC | LI_INTD)
382#define ALLDEVS (A01234 | BCD)
383
384static __init void lithium_init(void)
385{
386 set_fixmap(FIX_LI_PCIA, LI_PCI_A_PHYS);
387 set_fixmap(FIX_LI_PCIB, LI_PCI_B_PHYS);
388
389 if ((li_pcia_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
390 (li_pcia_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) {
391 printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'A');
392/* panic("This machine is not SGI Visual Workstation 320/540"); */
393 }
394
395 if ((li_pcib_read16(PCI_VENDOR_ID) != PCI_VENDOR_ID_SGI) ||
396 (li_pcib_read16(PCI_DEVICE_ID) != PCI_DEVICE_ID_SGI_LITHIUM)) {
397 printk(KERN_EMERG "Lithium hostbridge %c not found\n", 'B');
398/* panic("This machine is not SGI Visual Workstation 320/540"); */
399 }
400
401 li_pcia_write16(LI_PCI_INTEN, ALLDEVS);
402 li_pcib_write16(LI_PCI_INTEN, ALLDEVS);
403}
404
405static __init void cobalt_init(void)
406{
407 /*
408 * On normal SMP PC this is used only with SMP, but we have to
409 * use it and set it up here to start the Cobalt clock
410 */
411 set_fixmap(FIX_APIC_BASE, APIC_DEFAULT_PHYS_BASE);
412 setup_local_APIC();
413 printk(KERN_INFO "Local APIC Version %#x, ID %#x\n",
414 (unsigned int)apic_read(APIC_LVR),
415 (unsigned int)apic_read(APIC_ID));
416
417 set_fixmap(FIX_CO_CPU, CO_CPU_PHYS);
418 set_fixmap(FIX_CO_APIC, CO_APIC_PHYS);
419 printk(KERN_INFO "Cobalt Revision %#lx, APIC ID %#lx\n",
420 co_cpu_read(CO_CPU_REV), co_apic_read(CO_APIC_ID));
421
422 /* Enable Cobalt APIC being careful to NOT change the ID! */
423 co_apic_write(CO_APIC_ID, co_apic_read(CO_APIC_ID) | CO_APIC_ENABLE);
424
425 printk(KERN_INFO "Cobalt APIC enabled: ID reg %#lx\n",
426 co_apic_read(CO_APIC_ID));
427}
428
429int __init visws_trap_init_quirk(void)
430{
431 lithium_init();
432 cobalt_init();
433
434 return 1;
435}
436
437/*
438 * IRQ controller / APIC support:
439 */
440
441static DEFINE_SPINLOCK(cobalt_lock);
442
443/*
444 * Set the given Cobalt APIC Redirection Table entry to point
445 * to the given IDT vector/index.
446 */
447static inline void co_apic_set(int entry, int irq)
448{
449 co_apic_write(CO_APIC_LO(entry), CO_APIC_LEVEL | (irq + FIRST_EXTERNAL_VECTOR));
450 co_apic_write(CO_APIC_HI(entry), 0);
451}
452
453/*
454 * Cobalt (IO)-APIC functions to handle PCI devices.
455 */
456static inline int co_apic_ide0_hack(void)
457{
458 extern char visws_board_type;
459 extern char visws_board_rev;
460
461 if (visws_board_type == VISWS_320 && visws_board_rev == 5)
462 return 5;
463 return CO_APIC_IDE0;
464}
465
466static int is_co_apic(unsigned int irq)
467{
468 if (IS_CO_APIC(irq))
469 return CO_APIC(irq);
470
471 switch (irq) {
472 case 0: return CO_APIC_CPU;
473 case CO_IRQ_IDE0: return co_apic_ide0_hack();
474 case CO_IRQ_IDE1: return CO_APIC_IDE1;
475 default: return -1;
476 }
477}
478
479
480/*
481 * This is the SGI Cobalt (IO-)APIC:
482 */
483
484static void enable_cobalt_irq(unsigned int irq)
485{
486 co_apic_set(is_co_apic(irq), irq);
487}
488
489static void disable_cobalt_irq(unsigned int irq)
490{
491 int entry = is_co_apic(irq);
492
493 co_apic_write(CO_APIC_LO(entry), CO_APIC_MASK);
494 co_apic_read(CO_APIC_LO(entry));
495}
496
497/*
498 * "irq" really just serves to identify the device. Here is where we
499 * map this to the Cobalt APIC entry where it's physically wired.
500 * This is called via request_irq -> setup_irq -> irq_desc->startup()
501 */
502static unsigned int startup_cobalt_irq(unsigned int irq)
503{
504 unsigned long flags;
505
506 spin_lock_irqsave(&cobalt_lock, flags);
507 if ((irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING)))
508 irq_desc[irq].status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_WAITING);
509 enable_cobalt_irq(irq);
510 spin_unlock_irqrestore(&cobalt_lock, flags);
511 return 0;
512}
513
514static void ack_cobalt_irq(unsigned int irq)
515{
516 unsigned long flags;
517
518 spin_lock_irqsave(&cobalt_lock, flags);
519 disable_cobalt_irq(irq);
520 apic_write(APIC_EOI, APIC_EIO_ACK);
521 spin_unlock_irqrestore(&cobalt_lock, flags);
522}
523
524static void end_cobalt_irq(unsigned int irq)
525{
526 unsigned long flags;
527
528 spin_lock_irqsave(&cobalt_lock, flags);
529 if (!(irq_desc[irq].status & (IRQ_DISABLED | IRQ_INPROGRESS)))
530 enable_cobalt_irq(irq);
531 spin_unlock_irqrestore(&cobalt_lock, flags);
532}
533
534static struct irq_chip cobalt_irq_type = {
535 .typename = "Cobalt-APIC",
536 .startup = startup_cobalt_irq,
537 .shutdown = disable_cobalt_irq,
538 .enable = enable_cobalt_irq,
539 .disable = disable_cobalt_irq,
540 .ack = ack_cobalt_irq,
541 .end = end_cobalt_irq,
542};
543
544
545/*
546 * This is the PIIX4-based 8259 that is wired up indirectly to Cobalt
547 * -- not the manner expected by the code in i8259.c.
548 *
549 * there is a 'master' physical interrupt source that gets sent to
550 * the CPU. But in the chipset there are various 'virtual' interrupts
551 * waiting to be handled. We represent this to Linux through a 'master'
552 * interrupt controller type, and through a special virtual interrupt-
553 * controller. Device drivers only see the virtual interrupt sources.
554 */
555static unsigned int startup_piix4_master_irq(unsigned int irq)
556{
557 init_8259A(0);
558
559 return startup_cobalt_irq(irq);
560}
561
562static void end_piix4_master_irq(unsigned int irq)
563{
564 unsigned long flags;
565
566 spin_lock_irqsave(&cobalt_lock, flags);
567 enable_cobalt_irq(irq);
568 spin_unlock_irqrestore(&cobalt_lock, flags);
569}
570
571static struct irq_chip piix4_master_irq_type = {
572 .typename = "PIIX4-master",
573 .startup = startup_piix4_master_irq,
574 .ack = ack_cobalt_irq,
575 .end = end_piix4_master_irq,
576};
577
578
579static struct irq_chip piix4_virtual_irq_type = {
580 .typename = "PIIX4-virtual",
581 .shutdown = disable_8259A_irq,
582 .enable = enable_8259A_irq,
583 .disable = disable_8259A_irq,
584};
585
586
587/*
588 * PIIX4-8259 master/virtual functions to handle interrupt requests
589 * from legacy devices: floppy, parallel, serial, rtc.
590 *
591 * None of these get Cobalt APIC entries, neither do they have IDT
592 * entries. These interrupts are purely virtual and distributed from
593 * the 'master' interrupt source: CO_IRQ_8259.
594 *
595 * When the 8259 interrupts its handler figures out which of these
596 * devices is interrupting and dispatches to its handler.
597 *
598 * CAREFUL: devices see the 'virtual' interrupt only. Thus disable/
599 * enable_irq gets the right irq. This 'master' irq is never directly
600 * manipulated by any driver.
601 */
602static irqreturn_t piix4_master_intr(int irq, void *dev_id)
603{
604 int realirq;
605 irq_desc_t *desc;
606 unsigned long flags;
607
608 spin_lock_irqsave(&i8259A_lock, flags);
609
610 /* Find out what's interrupting in the PIIX4 master 8259 */
611 outb(0x0c, 0x20); /* OCW3 Poll command */
612 realirq = inb(0x20);
613
614 /*
615 * Bit 7 == 0 means invalid/spurious
616 */
617 if (unlikely(!(realirq & 0x80)))
618 goto out_unlock;
619
620 realirq &= 7;
621
622 if (unlikely(realirq == 2)) {
623 outb(0x0c, 0xa0);
624 realirq = inb(0xa0);
625
626 if (unlikely(!(realirq & 0x80)))
627 goto out_unlock;
628
629 realirq = (realirq & 7) + 8;
630 }
631
632 /* mask and ack interrupt */
633 cached_irq_mask |= 1 << realirq;
634 if (unlikely(realirq > 7)) {
635 inb(0xa1);
636 outb(cached_slave_mask, 0xa1);
637 outb(0x60 + (realirq & 7), 0xa0);
638 outb(0x60 + 2, 0x20);
639 } else {
640 inb(0x21);
641 outb(cached_master_mask, 0x21);
642 outb(0x60 + realirq, 0x20);
643 }
644
645 spin_unlock_irqrestore(&i8259A_lock, flags);
646
647 desc = irq_desc + realirq;
648
649 /*
650 * handle this 'virtual interrupt' as a Cobalt one now.
651 */
652 kstat_cpu(smp_processor_id()).irqs[realirq]++;
653
654 if (likely(desc->action != NULL))
655 handle_IRQ_event(realirq, desc->action);
656
657 if (!(desc->status & IRQ_DISABLED))
658 enable_8259A_irq(realirq);
659
660 return IRQ_HANDLED;
661
662out_unlock:
663 spin_unlock_irqrestore(&i8259A_lock, flags);
664 return IRQ_NONE;
665}
666
667static struct irqaction master_action = {
668 .handler = piix4_master_intr,
669 .name = "PIIX4-8259",
670};
671
672static struct irqaction cascade_action = {
673 .handler = no_action,
674 .name = "cascade",
675};
676
677
678void init_VISWS_APIC_irqs(void)
679{
680 int i;
681
682 for (i = 0; i < CO_IRQ_APIC0 + CO_APIC_LAST + 1; i++) {
683 irq_desc[i].status = IRQ_DISABLED;
684 irq_desc[i].action = 0;
685 irq_desc[i].depth = 1;
686
687 if (i == 0) {
688 irq_desc[i].chip = &cobalt_irq_type;
689 }
690 else if (i == CO_IRQ_IDE0) {
691 irq_desc[i].chip = &cobalt_irq_type;
692 }
693 else if (i == CO_IRQ_IDE1) {
694 irq_desc[i].chip = &cobalt_irq_type;
695 }
696 else if (i == CO_IRQ_8259) {
697 irq_desc[i].chip = &piix4_master_irq_type;
698 }
699 else if (i < CO_IRQ_APIC0) {
700 irq_desc[i].chip = &piix4_virtual_irq_type;
701 }
702 else if (IS_CO_APIC(i)) {
703 irq_desc[i].chip = &cobalt_irq_type;
704 }
705 }
706
707 setup_irq(CO_IRQ_8259, &master_action);
708 setup_irq(2, &cascade_action);
709}
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 946bf13b44ab..b15346092b7b 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -932,7 +932,7 @@ static inline int __init activate_vmi(void)
932 pv_apic_ops.setup_secondary_clock = vmi_time_ap_init; 932 pv_apic_ops.setup_secondary_clock = vmi_time_ap_init;
933#endif 933#endif
934 pv_time_ops.sched_clock = vmi_sched_clock; 934 pv_time_ops.sched_clock = vmi_sched_clock;
935 pv_time_ops.get_cpu_khz = vmi_cpu_khz; 935 pv_time_ops.get_tsc_khz = vmi_tsc_khz;
936 936
937 /* We have true wallclock functions; disable CMOS clock sync */ 937 /* We have true wallclock functions; disable CMOS clock sync */
938 no_sync_cmos_clock = 1; 938 no_sync_cmos_clock = 1;
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index ba7d19e102b1..6953859fe289 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -69,8 +69,8 @@ unsigned long long vmi_sched_clock(void)
69 return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE)); 69 return cycles_2_ns(vmi_timer_ops.get_cycle_counter(VMI_CYCLES_AVAILABLE));
70} 70}
71 71
72/* paravirt_ops.get_cpu_khz = vmi_cpu_khz */ 72/* paravirt_ops.get_tsc_khz = vmi_tsc_khz */
73unsigned long vmi_cpu_khz(void) 73unsigned long vmi_tsc_khz(void)
74{ 74{
75 unsigned long long khz; 75 unsigned long long khz;
76 khz = vmi_timer_ops.get_cycle_frequency(); 76 khz = vmi_timer_ops.get_cycle_frequency();
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 2674f5796275..cdb2363697d2 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -49,16 +49,14 @@ SECTIONS
49 _etext = .; /* End of text section */ 49 _etext = .; /* End of text section */
50 } :text = 0x9090 50 } :text = 0x9090
51 51
52 NOTES :text :note
53
52 . = ALIGN(16); /* Exception table */ 54 . = ALIGN(16); /* Exception table */
53 __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { 55 __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
54 __start___ex_table = .; 56 __start___ex_table = .;
55 *(__ex_table) 57 *(__ex_table)
56 __stop___ex_table = .; 58 __stop___ex_table = .;
57 } 59 } :text = 0x9090
58
59 NOTES :text :note
60
61 BUG_TABLE :text
62 60
63 RODATA 61 RODATA
64 62
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index fd246e22fe6b..63e5c1a22e88 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -19,7 +19,7 @@ PHDRS {
19 data PT_LOAD FLAGS(7); /* RWE */ 19 data PT_LOAD FLAGS(7); /* RWE */
20 user PT_LOAD FLAGS(7); /* RWE */ 20 user PT_LOAD FLAGS(7); /* RWE */
21 data.init PT_LOAD FLAGS(7); /* RWE */ 21 data.init PT_LOAD FLAGS(7); /* RWE */
22 note PT_NOTE FLAGS(4); /* R__ */ 22 note PT_NOTE FLAGS(0); /* ___ */
23} 23}
24SECTIONS 24SECTIONS
25{ 25{
@@ -40,16 +40,14 @@ SECTIONS
40 _etext = .; /* End of text section */ 40 _etext = .; /* End of text section */
41 } :text = 0x9090 41 } :text = 0x9090
42 42
43 NOTES :text :note
44
43 . = ALIGN(16); /* Exception table */ 45 . = ALIGN(16); /* Exception table */
44 __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) { 46 __ex_table : AT(ADDR(__ex_table) - LOAD_OFFSET) {
45 __start___ex_table = .; 47 __start___ex_table = .;
46 *(__ex_table) 48 *(__ex_table)
47 __stop___ex_table = .; 49 __stop___ex_table = .;
48 } 50 } :text = 0x9090
49
50 NOTES :text :note
51
52 BUG_TABLE :text
53 51
54 RODATA 52 RODATA
55 53
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index c87cbd84c3e5..0b8b6690a86d 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -42,7 +42,8 @@
42#include <asm/topology.h> 42#include <asm/topology.h>
43#include <asm/vgtod.h> 43#include <asm/vgtod.h>
44 44
45#define __vsyscall(nr) __attribute__ ((unused,__section__(".vsyscall_" #nr))) 45#define __vsyscall(nr) \
46 __attribute__ ((unused, __section__(".vsyscall_" #nr))) notrace
46#define __syscall_clobber "r11","cx","memory" 47#define __syscall_clobber "r11","cx","memory"
47 48
48/* 49/*
@@ -278,7 +279,7 @@ cpu_vsyscall_notifier(struct notifier_block *n, unsigned long action, void *arg)
278{ 279{
279 long cpu = (long)arg; 280 long cpu = (long)arg;
280 if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN) 281 if (action == CPU_ONLINE || action == CPU_ONLINE_FROZEN)
281 smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 0, 1); 282 smp_call_function_single(cpu, cpu_vsyscall_init, NULL, 1);
282 return NOTIFY_DONE; 283 return NOTIFY_DONE;
283} 284}
284 285
@@ -301,7 +302,7 @@ static int __init vsyscall_init(void)
301#ifdef CONFIG_SYSCTL 302#ifdef CONFIG_SYSCTL
302 register_sysctl_table(kernel_root_table2); 303 register_sysctl_table(kernel_root_table2);
303#endif 304#endif
304 on_each_cpu(cpu_vsyscall_init, NULL, 0, 1); 305 on_each_cpu(cpu_vsyscall_init, NULL, 1);
305 hotcpu_notifier(cpu_vsyscall_notifier, 0); 306 hotcpu_notifier(cpu_vsyscall_notifier, 0);
306 return 0; 307 return 0;
307} 308}
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 2f306a826897..b545f371b5f5 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -2,13 +2,20 @@
2 All C exports should go in the respective C files. */ 2 All C exports should go in the respective C files. */
3 3
4#include <linux/module.h> 4#include <linux/module.h>
5#include <net/checksum.h>
6#include <linux/smp.h> 5#include <linux/smp.h>
7 6
7#include <net/checksum.h>
8
8#include <asm/processor.h> 9#include <asm/processor.h>
9#include <asm/uaccess.h>
10#include <asm/pgtable.h> 10#include <asm/pgtable.h>
11#include <asm/uaccess.h>
11#include <asm/desc.h> 12#include <asm/desc.h>
13#include <asm/ftrace.h>
14
15#ifdef CONFIG_FTRACE
16/* mcount is defined in assembly */
17EXPORT_SYMBOL(mcount);
18#endif
12 19
13EXPORT_SYMBOL(kernel_thread); 20EXPORT_SYMBOL(kernel_thread);
14 21