aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile19
-rw-r--r--arch/x86/kernel/acpi/boot.c74
-rw-r--r--arch/x86/kernel/apic.c177
-rw-r--r--arch/x86/kernel/apm_32.c2
-rw-r--r--arch/x86/kernel/asm-offsets_32.c1
-rw-r--r--arch/x86/kernel/bigsmp_32.c266
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c54
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/common.c155
-rw-r--r--arch/x86/kernel/cpu/intel.c14
-rw-r--r--arch/x86/kernel/crash.c2
-rw-r--r--arch/x86/kernel/dumpstack.c2
-rw-r--r--arch/x86/kernel/early_printk.c2
-rw-r--r--arch/x86/kernel/entry_32.S445
-rw-r--r--arch/x86/kernel/entry_64.S4
-rw-r--r--arch/x86/kernel/es7000_32.c477
-rw-r--r--arch/x86/kernel/genapic_64.c22
-rw-r--r--arch/x86/kernel/genapic_flat_64.c176
-rw-r--r--arch/x86/kernel/genx2apic_cluster.c133
-rw-r--r--arch/x86/kernel/genx2apic_phys.c125
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c111
-rw-r--r--arch/x86/kernel/head_32.S34
-rw-r--r--arch/x86/kernel/head_64.S21
-rw-r--r--arch/x86/kernel/hpet.c2
-rw-r--r--arch/x86/kernel/io_apic.c306
-rw-r--r--arch/x86/kernel/ioport.c3
-rw-r--r--arch/x86/kernel/ipi.c176
-rw-r--r--arch/x86/kernel/irq.c38
-rw-r--r--arch/x86/kernel/irq_32.c31
-rw-r--r--arch/x86/kernel/irq_64.c34
-rw-r--r--arch/x86/kernel/irqinit_32.c12
-rw-r--r--arch/x86/kernel/kgdb.c4
-rw-r--r--arch/x86/kernel/machine_kexec_64.c82
-rw-r--r--arch/x86/kernel/mpparse.c36
-rw-r--r--arch/x86/kernel/nmi.c2
-rw-r--r--arch/x86/kernel/numaq_32.c307
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c10
-rw-r--r--arch/x86/kernel/paravirt.c81
-rw-r--r--arch/x86/kernel/paravirt_patch_32.c12
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c15
-rw-r--r--arch/x86/kernel/probe_32.c411
-rw-r--r--arch/x86/kernel/probe_roms_32.c2
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--arch/x86/kernel/process_32.c53
-rw-r--r--arch/x86/kernel/process_64.c11
-rw-r--r--arch/x86/kernel/ptrace.c35
-rw-r--r--arch/x86/kernel/reboot.c4
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S125
-rw-r--r--arch/x86/kernel/setup.c30
-rw-r--r--arch/x86/kernel/setup_percpu.c380
-rw-r--r--arch/x86/kernel/signal.c346
-rw-r--r--arch/x86/kernel/smp.c15
-rw-r--r--arch/x86/kernel/smpboot.c49
-rw-r--r--arch/x86/kernel/smpcommon.c32
-rw-r--r--arch/x86/kernel/stacktrace.c2
-rw-r--r--arch/x86/kernel/summit_32.c416
-rw-r--r--arch/x86/kernel/syscall_table_32.S20
-rw-r--r--arch/x86/kernel/time_32.c2
-rw-r--r--arch/x86/kernel/tlb_uv.c4
-rw-r--r--arch/x86/kernel/trampoline_64.S19
-rw-r--r--arch/x86/kernel/traps.c11
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/kernel/visws_quirks.c6
-rw-r--r--arch/x86/kernel/vm86_32.c20
-rw-r--r--arch/x86/kernel/vmi_32.c9
-rw-r--r--arch/x86/kernel/vmiclock_32.c2
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S13
-rw-r--r--arch/x86/kernel/vsmp_64.c12
68 files changed, 3651 insertions, 1851 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index a99437c965cc..24f357e7557a 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -28,9 +28,9 @@ CFLAGS_paravirt.o := $(nostackp)
28obj-y := process_$(BITS).o signal.o entry_$(BITS).o 28obj-y := process_$(BITS).o signal.o entry_$(BITS).o
29obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 29obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
30obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o 30obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o
31obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o 31obj-y += setup.o i8259.o irqinit_$(BITS).o
32obj-$(CONFIG_X86_VISWS) += visws_quirks.o 32obj-$(CONFIG_X86_VISWS) += visws_quirks.o
33obj-$(CONFIG_X86_32) += probe_roms_32.o 33obj-$(CONFIG_X86_32) += probe_32.o probe_roms_32.o
34obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 34obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
35obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o 35obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
36obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o 36obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
@@ -50,20 +50,20 @@ obj-y += step.o
50obj-$(CONFIG_STACKTRACE) += stacktrace.o 50obj-$(CONFIG_STACKTRACE) += stacktrace.o
51obj-y += cpu/ 51obj-y += cpu/
52obj-y += acpi/ 52obj-y += acpi/
53obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o 53obj-y += reboot.o
54obj-$(CONFIG_MCA) += mca_32.o 54obj-$(CONFIG_MCA) += mca_32.o
55obj-$(CONFIG_X86_MSR) += msr.o 55obj-$(CONFIG_X86_MSR) += msr.o
56obj-$(CONFIG_X86_CPUID) += cpuid.o 56obj-$(CONFIG_X86_CPUID) += cpuid.o
57obj-$(CONFIG_PCI) += early-quirks.o 57obj-$(CONFIG_PCI) += early-quirks.o
58apm-y := apm_32.o 58apm-y := apm_32.o
59obj-$(CONFIG_APM) += apm.o 59obj-$(CONFIG_APM) += apm.o
60obj-$(CONFIG_X86_SMP) += smp.o 60obj-$(CONFIG_SMP) += smp.o
61obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o 61obj-$(CONFIG_SMP) += smpboot.o tsc_sync.o ipi.o
62obj-$(CONFIG_X86_32_SMP) += smpcommon.o 62obj-$(CONFIG_SMP) += setup_percpu.o
63obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o 63obj-$(CONFIG_X86_64_SMP) += tsc_sync.o
64obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o 64obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o
65obj-$(CONFIG_X86_MPPARSE) += mpparse.o 65obj-$(CONFIG_X86_MPPARSE) += mpparse.o
66obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o 66obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o ipi.o
67obj-$(CONFIG_X86_IO_APIC) += io_apic.o 67obj-$(CONFIG_X86_IO_APIC) += io_apic.o
68obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o 68obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
69obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o 69obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
@@ -71,9 +71,10 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
71obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 71obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
72obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 72obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
73obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 73obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
74obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o
74obj-$(CONFIG_X86_NUMAQ) += numaq_32.o 75obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
75obj-$(CONFIG_X86_ES7000) += es7000_32.o 76obj-$(CONFIG_X86_ES7000) += es7000_32.o
76obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o 77obj-$(CONFIG_X86_SUMMIT) += summit_32.o
77obj-y += vsmp_64.o 78obj-y += vsmp_64.o
78obj-$(CONFIG_KPROBES) += kprobes.o 79obj-$(CONFIG_KPROBES) += kprobes.o
79obj-$(CONFIG_MODULES) += module_$(BITS).o 80obj-$(CONFIG_MODULES) += module_$(BITS).o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index c193ec3c695e..956c1dee6fbe 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -42,10 +42,6 @@
42#include <asm/mpspec.h> 42#include <asm/mpspec.h>
43#include <asm/smp.h> 43#include <asm/smp.h>
44 44
45#ifdef CONFIG_X86_LOCAL_APIC
46# include <mach_apic.h>
47#endif
48
49static int __initdata acpi_force = 0; 45static int __initdata acpi_force = 0;
50u32 acpi_rsdt_forced; 46u32 acpi_rsdt_forced;
51#ifdef CONFIG_ACPI 47#ifdef CONFIG_ACPI
@@ -56,16 +52,7 @@ int acpi_disabled = 1;
56EXPORT_SYMBOL(acpi_disabled); 52EXPORT_SYMBOL(acpi_disabled);
57 53
58#ifdef CONFIG_X86_64 54#ifdef CONFIG_X86_64
59 55# include <asm/proto.h>
60#include <asm/proto.h>
61
62#else /* X86 */
63
64#ifdef CONFIG_X86_LOCAL_APIC
65#include <mach_apic.h>
66#include <mach_mpparse.h>
67#endif /* CONFIG_X86_LOCAL_APIC */
68
69#endif /* X86 */ 56#endif /* X86 */
70 57
71#define BAD_MADT_ENTRY(entry, end) ( \ 58#define BAD_MADT_ENTRY(entry, end) ( \
@@ -121,35 +108,18 @@ enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC;
121 */ 108 */
122char *__init __acpi_map_table(unsigned long phys, unsigned long size) 109char *__init __acpi_map_table(unsigned long phys, unsigned long size)
123{ 110{
124 unsigned long base, offset, mapped_size;
125 int idx;
126 111
127 if (!phys || !size) 112 if (!phys || !size)
128 return NULL; 113 return NULL;
129 114
130 if (phys+size <= (max_low_pfn_mapped << PAGE_SHIFT)) 115 return early_ioremap(phys, size);
131 return __va(phys); 116}
132 117void __init __acpi_unmap_table(char *map, unsigned long size)
133 offset = phys & (PAGE_SIZE - 1); 118{
134 mapped_size = PAGE_SIZE - offset; 119 if (!map || !size)
135 clear_fixmap(FIX_ACPI_END); 120 return;
136 set_fixmap(FIX_ACPI_END, phys);
137 base = fix_to_virt(FIX_ACPI_END);
138
139 /*
140 * Most cases can be covered by the below.
141 */
142 idx = FIX_ACPI_END;
143 while (mapped_size < size) {
144 if (--idx < FIX_ACPI_BEGIN)
145 return NULL; /* cannot handle this */
146 phys += PAGE_SIZE;
147 clear_fixmap(idx);
148 set_fixmap(idx, phys);
149 mapped_size += PAGE_SIZE;
150 }
151 121
152 return ((unsigned char *)base + offset); 122 early_iounmap(map, size);
153} 123}
154 124
155#ifdef CONFIG_PCI_MMCONFIG 125#ifdef CONFIG_PCI_MMCONFIG
@@ -239,7 +209,8 @@ static int __init acpi_parse_madt(struct acpi_table_header *table)
239 madt->address); 209 madt->address);
240 } 210 }
241 211
242 acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id); 212 default_acpi_madt_oem_check(madt->header.oem_id,
213 madt->header.oem_table_id);
243 214
244 return 0; 215 return 0;
245} 216}
@@ -884,7 +855,7 @@ static struct {
884 DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); 855 DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
885} mp_ioapic_routing[MAX_IO_APICS]; 856} mp_ioapic_routing[MAX_IO_APICS];
886 857
887static int mp_find_ioapic(int gsi) 858int mp_find_ioapic(int gsi)
888{ 859{
889 int i = 0; 860 int i = 0;
890 861
@@ -899,6 +870,16 @@ static int mp_find_ioapic(int gsi)
899 return -1; 870 return -1;
900} 871}
901 872
873int mp_find_ioapic_pin(int ioapic, int gsi)
874{
875 if (WARN_ON(ioapic == -1))
876 return -1;
877 if (WARN_ON(gsi > mp_ioapic_routing[ioapic].gsi_end))
878 return -1;
879
880 return gsi - mp_ioapic_routing[ioapic].gsi_base;
881}
882
902static u8 __init uniq_ioapic_id(u8 id) 883static u8 __init uniq_ioapic_id(u8 id)
903{ 884{
904#ifdef CONFIG_X86_32 885#ifdef CONFIG_X86_32
@@ -1034,7 +1015,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
1034 ioapic = mp_find_ioapic(gsi); 1015 ioapic = mp_find_ioapic(gsi);
1035 if (ioapic < 0) 1016 if (ioapic < 0)
1036 return; 1017 return;
1037 pin = gsi - mp_ioapic_routing[ioapic].gsi_base; 1018 pin = mp_find_ioapic_pin(ioapic, gsi);
1038 1019
1039 /* 1020 /*
1040 * TBD: This check is for faulty timer entries, where the override 1021 * TBD: This check is for faulty timer entries, where the override
@@ -1154,7 +1135,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
1154 return gsi; 1135 return gsi;
1155 } 1136 }
1156 1137
1157 ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; 1138 ioapic_pin = mp_find_ioapic_pin(ioapic, gsi);
1158 1139
1159#ifdef CONFIG_X86_32 1140#ifdef CONFIG_X86_32
1160 if (ioapic_renumber_irq) 1141 if (ioapic_renumber_irq)
@@ -1243,7 +1224,7 @@ int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
1243 mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); 1224 mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
1244 ioapic = mp_find_ioapic(gsi); 1225 ioapic = mp_find_ioapic(gsi);
1245 mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id; 1226 mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id;
1246 mp_irq.dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base; 1227 mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
1247 1228
1248 save_mp_irq(&mp_irq); 1229 save_mp_irq(&mp_irq);
1249#endif 1230#endif
@@ -1370,7 +1351,7 @@ static void __init acpi_process_madt(void)
1370 if (!error) { 1351 if (!error) {
1371 acpi_lapic = 1; 1352 acpi_lapic = 1;
1372 1353
1373#ifdef CONFIG_X86_GENERICARCH 1354#ifdef CONFIG_X86_BIGSMP
1374 generic_bigsmp_probe(); 1355 generic_bigsmp_probe();
1375#endif 1356#endif
1376 /* 1357 /*
@@ -1382,9 +1363,8 @@ static void __init acpi_process_madt(void)
1382 acpi_ioapic = 1; 1363 acpi_ioapic = 1;
1383 1364
1384 smp_found_config = 1; 1365 smp_found_config = 1;
1385#ifdef CONFIG_X86_32 1366 if (apic->setup_apic_routing)
1386 setup_apic_routing(); 1367 apic->setup_apic_routing();
1387#endif
1388 } 1368 }
1389 } 1369 }
1390 if (error == -EINVAL) { 1370 if (error == -EINVAL) {
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index abfa0b641aea..c9aed4510585 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Local APIC handling, local APIC timers 2 * Local APIC handling, local APIC timers
3 * 3 *
4 * (c) 1999, 2000 Ingo Molnar <mingo@redhat.com> 4 * (c) 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
5 * 5 *
6 * Fixes 6 * Fixes
7 * Maciej W. Rozycki : Bits for genuine 82489DX APICs; 7 * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
@@ -14,52 +14,72 @@
14 * Mikael Pettersson : PM converted to driver model. 14 * Mikael Pettersson : PM converted to driver model.
15 */ 15 */
16 16
17#include <linux/init.h>
18
19#include <linux/mm.h>
20#include <linux/delay.h>
21#include <linux/bootmem.h>
22#include <linux/interrupt.h>
23#include <linux/mc146818rtc.h>
24#include <linux/kernel_stat.h> 17#include <linux/kernel_stat.h>
25#include <linux/sysdev.h> 18#include <linux/mc146818rtc.h>
26#include <linux/ioport.h>
27#include <linux/cpu.h>
28#include <linux/clockchips.h>
29#include <linux/acpi_pmtmr.h> 19#include <linux/acpi_pmtmr.h>
20#include <linux/clockchips.h>
21#include <linux/interrupt.h>
22#include <linux/bootmem.h>
23#include <linux/ftrace.h>
24#include <linux/ioport.h>
30#include <linux/module.h> 25#include <linux/module.h>
31#include <linux/dmi.h> 26#include <linux/sysdev.h>
27#include <linux/delay.h>
28#include <linux/timex.h>
32#include <linux/dmar.h> 29#include <linux/dmar.h>
33#include <linux/ftrace.h> 30#include <linux/init.h>
34#include <linux/smp.h> 31#include <linux/cpu.h>
32#include <linux/dmi.h>
35#include <linux/nmi.h> 33#include <linux/nmi.h>
36#include <linux/timex.h> 34#include <linux/smp.h>
35#include <linux/mm.h>
37 36
38#include <asm/perf_counter.h> 37#include <asm/perf_counter.h>
39#include <asm/atomic.h>
40#include <asm/mtrr.h>
41#include <asm/mpspec.h>
42#include <asm/desc.h>
43#include <asm/arch_hooks.h> 38#include <asm/arch_hooks.h>
44#include <asm/hpet.h>
45#include <asm/pgalloc.h> 39#include <asm/pgalloc.h>
40#include <asm/genapic.h>
41#include <asm/atomic.h>
42#include <asm/mpspec.h>
46#include <asm/i8253.h> 43#include <asm/i8253.h>
47#include <asm/idle.h> 44#include <asm/i8259.h>
48#include <asm/proto.h> 45#include <asm/proto.h>
49#include <asm/apic.h> 46#include <asm/apic.h>
50#include <asm/i8259.h> 47#include <asm/desc.h>
48#include <asm/hpet.h>
49#include <asm/idle.h>
50#include <asm/mtrr.h>
51#include <asm/smp.h> 51#include <asm/smp.h>
52 52
53#include <mach_apic.h> 53unsigned int num_processors;
54#include <mach_apicdef.h> 54
55#include <mach_ipi.h> 55unsigned disabled_cpus __cpuinitdata;
56
57/* Processor that is doing the boot up */
58unsigned int boot_cpu_physical_apicid = -1U;
56 59
57/* 60/*
58 * Sanity check 61 * The highest APIC ID seen during enumeration.
62 *
63 * This determines the messaging protocol we can use: if all APIC IDs
64 * are in the 0 ... 7 range, then we can use logical addressing which
65 * has some performance advantages (better broadcasting).
66 *
67 * If there's an APIC ID above 8, we use physical addressing.
59 */ 68 */
60#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F) 69unsigned int max_physical_apicid;
61# error SPURIOUS_APIC_VECTOR definition error 70
62#endif 71/*
72 * Bitmask of physically existing CPUs:
73 */
74physid_mask_t phys_cpu_present_map;
75
76/*
77 * Map cpu index to physical APIC ID
78 */
79DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
80DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
81EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
82EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
63 83
64#ifdef CONFIG_X86_32 84#ifdef CONFIG_X86_32
65/* 85/*
@@ -458,7 +478,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
458static void lapic_timer_broadcast(const struct cpumask *mask) 478static void lapic_timer_broadcast(const struct cpumask *mask)
459{ 479{
460#ifdef CONFIG_SMP 480#ifdef CONFIG_SMP
461 send_IPI_mask(mask, LOCAL_TIMER_VECTOR); 481 apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
462#endif 482#endif
463} 483}
464 484
@@ -536,7 +556,8 @@ static void __init lapic_cal_handler(struct clock_event_device *dev)
536 } 556 }
537} 557}
538 558
539static int __init calibrate_by_pmtimer(long deltapm, long *delta) 559static int __init
560calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
540{ 561{
541 const long pm_100ms = PMTMR_TICKS_PER_SEC / 10; 562 const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
542 const long pm_thresh = pm_100ms / 100; 563 const long pm_thresh = pm_100ms / 100;
@@ -547,7 +568,7 @@ static int __init calibrate_by_pmtimer(long deltapm, long *delta)
547 return -1; 568 return -1;
548#endif 569#endif
549 570
550 apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); 571 apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm);
551 572
552 /* Check, if the PM timer is available */ 573 /* Check, if the PM timer is available */
553 if (!deltapm) 574 if (!deltapm)
@@ -557,19 +578,30 @@ static int __init calibrate_by_pmtimer(long deltapm, long *delta)
557 578
558 if (deltapm > (pm_100ms - pm_thresh) && 579 if (deltapm > (pm_100ms - pm_thresh) &&
559 deltapm < (pm_100ms + pm_thresh)) { 580 deltapm < (pm_100ms + pm_thresh)) {
560 apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); 581 apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n");
561 } else { 582 return 0;
562 res = (((u64)deltapm) * mult) >> 22; 583 }
563 do_div(res, 1000000); 584
564 pr_warning("APIC calibration not consistent " 585 res = (((u64)deltapm) * mult) >> 22;
565 "with PM Timer: %ldms instead of 100ms\n", 586 do_div(res, 1000000);
566 (long)res); 587 pr_warning("APIC calibration not consistent "
567 /* Correct the lapic counter value */ 588 "with PM-Timer: %ldms instead of 100ms\n",(long)res);
568 res = (((u64)(*delta)) * pm_100ms); 589
590 /* Correct the lapic counter value */
591 res = (((u64)(*delta)) * pm_100ms);
592 do_div(res, deltapm);
593 pr_info("APIC delta adjusted to PM-Timer: "
594 "%lu (%ld)\n", (unsigned long)res, *delta);
595 *delta = (long)res;
596
597 /* Correct the tsc counter value */
598 if (cpu_has_tsc) {
599 res = (((u64)(*deltatsc)) * pm_100ms);
569 do_div(res, deltapm); 600 do_div(res, deltapm);
570 pr_info("APIC delta adjusted to PM-Timer: " 601 apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
571 "%lu (%ld)\n", (unsigned long)res, *delta); 602 "PM-Timer: %lu (%ld) \n",
572 *delta = (long)res; 603 (unsigned long)res, *deltatsc);
604 *deltatsc = (long)res;
573 } 605 }
574 606
575 return 0; 607 return 0;
@@ -580,7 +612,7 @@ static int __init calibrate_APIC_clock(void)
580 struct clock_event_device *levt = &__get_cpu_var(lapic_events); 612 struct clock_event_device *levt = &__get_cpu_var(lapic_events);
581 void (*real_handler)(struct clock_event_device *dev); 613 void (*real_handler)(struct clock_event_device *dev);
582 unsigned long deltaj; 614 unsigned long deltaj;
583 long delta; 615 long delta, deltatsc;
584 int pm_referenced = 0; 616 int pm_referenced = 0;
585 617
586 local_irq_disable(); 618 local_irq_disable();
@@ -610,9 +642,11 @@ static int __init calibrate_APIC_clock(void)
610 delta = lapic_cal_t1 - lapic_cal_t2; 642 delta = lapic_cal_t1 - lapic_cal_t2;
611 apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); 643 apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
612 644
645 deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
646
613 /* we trust the PM based calibration if possible */ 647 /* we trust the PM based calibration if possible */
614 pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1, 648 pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
615 &delta); 649 &delta, &deltatsc);
616 650
617 /* Calculate the scaled math multiplication factor */ 651 /* Calculate the scaled math multiplication factor */
618 lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, 652 lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
@@ -630,11 +664,10 @@ static int __init calibrate_APIC_clock(void)
630 calibration_result); 664 calibration_result);
631 665
632 if (cpu_has_tsc) { 666 if (cpu_has_tsc) {
633 delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
634 apic_printk(APIC_VERBOSE, "..... CPU clock speed is " 667 apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
635 "%ld.%04ld MHz.\n", 668 "%ld.%04ld MHz.\n",
636 (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ), 669 (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
637 (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ)); 670 (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ));
638 } 671 }
639 672
640 apic_printk(APIC_VERBOSE, "..... host bus clock speed is " 673 apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
@@ -994,11 +1027,11 @@ int __init verify_local_APIC(void)
994 */ 1027 */
995 reg0 = apic_read(APIC_ID); 1028 reg0 = apic_read(APIC_ID);
996 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); 1029 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
997 apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); 1030 apic_write(APIC_ID, reg0 ^ apic->apic_id_mask);
998 reg1 = apic_read(APIC_ID); 1031 reg1 = apic_read(APIC_ID);
999 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); 1032 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
1000 apic_write(APIC_ID, reg0); 1033 apic_write(APIC_ID, reg0);
1001 if (reg1 != (reg0 ^ APIC_ID_MASK)) 1034 if (reg1 != (reg0 ^ apic->apic_id_mask))
1002 return 0; 1035 return 0;
1003 1036
1004 /* 1037 /*
@@ -1092,7 +1125,7 @@ static void __cpuinit lapic_setup_esr(void)
1092 return; 1125 return;
1093 } 1126 }
1094 1127
1095 if (esr_disable) { 1128 if (apic->disable_esr) {
1096 /* 1129 /*
1097 * Something untraceable is creating bad interrupts on 1130 * Something untraceable is creating bad interrupts on
1098 * secondary quads ... for the moment, just leave the 1131 * secondary quads ... for the moment, just leave the
@@ -1134,15 +1167,13 @@ void __cpuinit setup_local_APIC(void)
1134 int i, j; 1167 int i, j;
1135 1168
1136 if (disable_apic) { 1169 if (disable_apic) {
1137#ifdef CONFIG_X86_IO_APIC 1170 arch_disable_smp_support();
1138 disable_ioapic_setup();
1139#endif
1140 return; 1171 return;
1141 } 1172 }
1142 1173
1143#ifdef CONFIG_X86_32 1174#ifdef CONFIG_X86_32
1144 /* Pound the ESR really hard over the head with a big hammer - mbligh */ 1175 /* Pound the ESR really hard over the head with a big hammer - mbligh */
1145 if (lapic_is_integrated() && esr_disable) { 1176 if (lapic_is_integrated() && apic->disable_esr) {
1146 apic_write(APIC_ESR, 0); 1177 apic_write(APIC_ESR, 0);
1147 apic_write(APIC_ESR, 0); 1178 apic_write(APIC_ESR, 0);
1148 apic_write(APIC_ESR, 0); 1179 apic_write(APIC_ESR, 0);
@@ -1157,7 +1188,7 @@ void __cpuinit setup_local_APIC(void)
1157 * Double-check whether this APIC is really registered. 1188 * Double-check whether this APIC is really registered.
1158 * This is meaningless in clustered apic mode, so we skip it. 1189 * This is meaningless in clustered apic mode, so we skip it.
1159 */ 1190 */
1160 if (!apic_id_registered()) 1191 if (!apic->apic_id_registered())
1161 BUG(); 1192 BUG();
1162 1193
1163 /* 1194 /*
@@ -1165,7 +1196,7 @@ void __cpuinit setup_local_APIC(void)
1165 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel 1196 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
1166 * document number 292116). So here it goes... 1197 * document number 292116). So here it goes...
1167 */ 1198 */
1168 init_apic_ldr(); 1199 apic->init_apic_ldr();
1169 1200
1170 /* 1201 /*
1171 * Set Task Priority to 'accept all'. We never change this 1202 * Set Task Priority to 'accept all'. We never change this
@@ -1611,7 +1642,7 @@ int __init APIC_init_uniprocessor(void)
1611 enable_IR_x2apic(); 1642 enable_IR_x2apic();
1612#endif 1643#endif
1613#ifdef CONFIG_X86_64 1644#ifdef CONFIG_X86_64
1614 setup_apic_routing(); 1645 default_setup_apic_routing();
1615#endif 1646#endif
1616 1647
1617 verify_local_APIC(); 1648 verify_local_APIC();
@@ -1749,7 +1780,8 @@ void __init connect_bsp_APIC(void)
1749 outb(0x01, 0x23); 1780 outb(0x01, 0x23);
1750 } 1781 }
1751#endif 1782#endif
1752 enable_apic_mode(); 1783 if (apic->enable_apic_mode)
1784 apic->enable_apic_mode();
1753} 1785}
1754 1786
1755/** 1787/**
@@ -1887,7 +1919,7 @@ void __cpuinit generic_processor_info(int apicid, int version)
1887 } 1919 }
1888#endif 1920#endif
1889 1921
1890#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) 1922#if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
1891 early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; 1923 early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
1892 early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; 1924 early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1893#endif 1925#endif
@@ -1896,11 +1928,30 @@ void __cpuinit generic_processor_info(int apicid, int version)
1896 set_cpu_present(cpu, true); 1928 set_cpu_present(cpu, true);
1897} 1929}
1898 1930
1899#ifdef CONFIG_X86_64
1900int hard_smp_processor_id(void) 1931int hard_smp_processor_id(void)
1901{ 1932{
1902 return read_apic_id(); 1933 return read_apic_id();
1903} 1934}
1935
1936void default_init_apic_ldr(void)
1937{
1938 unsigned long val;
1939
1940 apic_write(APIC_DFR, APIC_DFR_VALUE);
1941 val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
1942 val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
1943 apic_write(APIC_LDR, val);
1944}
1945
1946#ifdef CONFIG_X86_32
1947int default_apicid_to_node(int logical_apicid)
1948{
1949#ifdef CONFIG_SMP
1950 return apicid_2_node[hard_smp_processor_id()];
1951#else
1952 return 0;
1953#endif
1954}
1904#endif 1955#endif
1905 1956
1906/* 1957/*
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 98807bb095ad..37ba5f85b718 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -301,7 +301,7 @@ extern int (*console_blank_hook)(int);
301 */ 301 */
302#define APM_ZERO_SEGS 302#define APM_ZERO_SEGS
303 303
304#include "apm.h" 304#include <asm/apm.h>
305 305
306/* 306/*
307 * Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend. 307 * Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend.
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index ee4df08feee6..fbf2f33e3080 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -75,6 +75,7 @@ void foo(void)
75 OFFSET(PT_DS, pt_regs, ds); 75 OFFSET(PT_DS, pt_regs, ds);
76 OFFSET(PT_ES, pt_regs, es); 76 OFFSET(PT_ES, pt_regs, es);
77 OFFSET(PT_FS, pt_regs, fs); 77 OFFSET(PT_FS, pt_regs, fs);
78 OFFSET(PT_GS, pt_regs, gs);
78 OFFSET(PT_ORIG_EAX, pt_regs, orig_ax); 79 OFFSET(PT_ORIG_EAX, pt_regs, orig_ax);
79 OFFSET(PT_EIP, pt_regs, ip); 80 OFFSET(PT_EIP, pt_regs, ip);
80 OFFSET(PT_CS, pt_regs, cs); 81 OFFSET(PT_CS, pt_regs, cs);
diff --git a/arch/x86/kernel/bigsmp_32.c b/arch/x86/kernel/bigsmp_32.c
new file mode 100644
index 000000000000..47a62f46afdb
--- /dev/null
+++ b/arch/x86/kernel/bigsmp_32.c
@@ -0,0 +1,266 @@
1/*
2 * APIC driver for "bigsmp" XAPIC machines with more than 8 virtual CPUs.
3 * Drives the local APIC in "clustered mode".
4 */
5#define APIC_DEFINITION 1
6#include <linux/threads.h>
7#include <linux/cpumask.h>
8#include <asm/mpspec.h>
9#include <asm/genapic.h>
10#include <asm/fixmap.h>
11#include <asm/apicdef.h>
12#include <asm/ipi.h>
13#include <linux/kernel.h>
14#include <linux/init.h>
15#include <linux/dmi.h>
16#include <linux/smp.h>
17
18
19static inline unsigned bigsmp_get_apic_id(unsigned long x)
20{
21 return (x >> 24) & 0xFF;
22}
23
24#define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu))
25
26static inline int bigsmp_apic_id_registered(void)
27{
28 return 1;
29}
30
31static inline const cpumask_t *bigsmp_target_cpus(void)
32{
33#ifdef CONFIG_SMP
34 return &cpu_online_map;
35#else
36 return &cpumask_of_cpu(0);
37#endif
38}
39
40#define APIC_DFR_VALUE (APIC_DFR_FLAT)
41
42static inline unsigned long
43bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid)
44{
45 return 0;
46}
47
48static inline unsigned long bigsmp_check_apicid_present(int bit)
49{
50 return 1;
51}
52
53static inline unsigned long calculate_ldr(int cpu)
54{
55 unsigned long val, id;
56 val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
57 id = xapic_phys_to_log_apicid(cpu);
58 val |= SET_APIC_LOGICAL_ID(id);
59 return val;
60}
61
62/*
63 * Set up the logical destination ID.
64 *
65 * Intel recommends to set DFR, LDR and TPR before enabling
66 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
67 * document number 292116). So here it goes...
68 */
69static inline void bigsmp_init_apic_ldr(void)
70{
71 unsigned long val;
72 int cpu = smp_processor_id();
73
74 apic_write(APIC_DFR, APIC_DFR_VALUE);
75 val = calculate_ldr(cpu);
76 apic_write(APIC_LDR, val);
77}
78
79static inline void bigsmp_setup_apic_routing(void)
80{
81 printk("Enabling APIC mode: %s. Using %d I/O APICs\n",
82 "Physflat", nr_ioapics);
83}
84
85static inline int bigsmp_apicid_to_node(int logical_apicid)
86{
87 return apicid_2_node[hard_smp_processor_id()];
88}
89
90static inline int bigsmp_cpu_present_to_apicid(int mps_cpu)
91{
92 if (mps_cpu < nr_cpu_ids)
93 return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
94
95 return BAD_APICID;
96}
97
98static inline physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid)
99{
100 return physid_mask_of_physid(phys_apicid);
101}
102
103extern u8 cpu_2_logical_apicid[];
104/* Mapping from cpu number to logical apicid */
105static inline int bigsmp_cpu_to_logical_apicid(int cpu)
106{
107 if (cpu >= nr_cpu_ids)
108 return BAD_APICID;
109 return cpu_physical_id(cpu);
110}
111
112static inline physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map)
113{
114 /* For clustered we don't have a good way to do this yet - hack */
115 return physids_promote(0xFFL);
116}
117
118static inline void bigsmp_setup_portio_remap(void)
119{
120}
121
122static inline int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid)
123{
124 return 1;
125}
126
127/* As we are using single CPU as destination, pick only one CPU here */
128static inline unsigned int bigsmp_cpu_mask_to_apicid(const cpumask_t *cpumask)
129{
130 return bigsmp_cpu_to_logical_apicid(first_cpu(*cpumask));
131}
132
133static inline unsigned int
134bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
135 const struct cpumask *andmask)
136{
137 int cpu;
138
139 /*
140 * We're using fixed IRQ delivery, can only return one phys APIC ID.
141 * May as well be the first.
142 */
143 for_each_cpu_and(cpu, cpumask, andmask) {
144 if (cpumask_test_cpu(cpu, cpu_online_mask))
145 break;
146 }
147 if (cpu < nr_cpu_ids)
148 return bigsmp_cpu_to_logical_apicid(cpu);
149
150 return BAD_APICID;
151}
152
153static inline int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
154{
155 return cpuid_apic >> index_msb;
156}
157
158static inline void bigsmp_send_IPI_mask(const struct cpumask *mask, int vector)
159{
160 default_send_IPI_mask_sequence_phys(mask, vector);
161}
162
163static inline void bigsmp_send_IPI_allbutself(int vector)
164{
165 default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
166}
167
168static inline void bigsmp_send_IPI_all(int vector)
169{
170 bigsmp_send_IPI_mask(cpu_online_mask, vector);
171}
172
173static int dmi_bigsmp; /* can be set by dmi scanners */
174
175static int hp_ht_bigsmp(const struct dmi_system_id *d)
176{
177 printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident);
178 dmi_bigsmp = 1;
179 return 0;
180}
181
182
183static const struct dmi_system_id bigsmp_dmi_table[] = {
184 { hp_ht_bigsmp, "HP ProLiant DL760 G2",
185 { DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
186 DMI_MATCH(DMI_BIOS_VERSION, "P44-"),}
187 },
188
189 { hp_ht_bigsmp, "HP ProLiant DL740",
190 { DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
191 DMI_MATCH(DMI_BIOS_VERSION, "P47-"),}
192 },
193 { }
194};
195
196static void bigsmp_vector_allocation_domain(int cpu, cpumask_t *retmask)
197{
198 cpus_clear(*retmask);
199 cpu_set(cpu, *retmask);
200}
201
202static int probe_bigsmp(void)
203{
204 if (def_to_bigsmp)
205 dmi_bigsmp = 1;
206 else
207 dmi_check_system(bigsmp_dmi_table);
208 return dmi_bigsmp;
209}
210
211struct genapic apic_bigsmp = {
212
213 .name = "bigsmp",
214 .probe = probe_bigsmp,
215 .acpi_madt_oem_check = NULL,
216 .apic_id_registered = bigsmp_apic_id_registered,
217
218 .irq_delivery_mode = dest_Fixed,
219 /* phys delivery to target CPU: */
220 .irq_dest_mode = 0,
221
222 .target_cpus = bigsmp_target_cpus,
223 .disable_esr = 1,
224 .dest_logical = 0,
225 .check_apicid_used = bigsmp_check_apicid_used,
226 .check_apicid_present = bigsmp_check_apicid_present,
227
228 .vector_allocation_domain = bigsmp_vector_allocation_domain,
229 .init_apic_ldr = bigsmp_init_apic_ldr,
230
231 .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map,
232 .setup_apic_routing = bigsmp_setup_apic_routing,
233 .multi_timer_check = NULL,
234 .apicid_to_node = bigsmp_apicid_to_node,
235 .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid,
236 .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid,
237 .apicid_to_cpu_present = bigsmp_apicid_to_cpu_present,
238 .setup_portio_remap = NULL,
239 .check_phys_apicid_present = bigsmp_check_phys_apicid_present,
240 .enable_apic_mode = NULL,
241 .phys_pkg_id = bigsmp_phys_pkg_id,
242 .mps_oem_check = NULL,
243
244 .get_apic_id = bigsmp_get_apic_id,
245 .set_apic_id = NULL,
246 .apic_id_mask = 0xFF << 24,
247
248 .cpu_mask_to_apicid = bigsmp_cpu_mask_to_apicid,
249 .cpu_mask_to_apicid_and = bigsmp_cpu_mask_to_apicid_and,
250
251 .send_IPI_mask = bigsmp_send_IPI_mask,
252 .send_IPI_mask_allbutself = NULL,
253 .send_IPI_allbutself = bigsmp_send_IPI_allbutself,
254 .send_IPI_all = bigsmp_send_IPI_all,
255 .send_IPI_self = default_send_IPI_self,
256
257 .wakeup_cpu = NULL,
258 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
259 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
260
261 .wait_for_init_deassert = default_wait_for_init_deassert,
262
263 .smp_callin_clear_local_apic = NULL,
264 .store_NMI_vector = NULL,
265 .inquire_remote_apic = default_inquire_remote_apic,
266};
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 2cf23634b6d9..e48640cfac0c 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -7,7 +7,7 @@
7#include <asm/pat.h> 7#include <asm/pat.h>
8#include <asm/processor.h> 8#include <asm/processor.h>
9 9
10#include <mach_apic.h> 10#include <asm/genapic.h>
11 11
12struct cpuid_bit { 12struct cpuid_bit {
13 u16 feature; 13 u16 feature;
@@ -69,7 +69,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
69 */ 69 */
70void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) 70void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
71{ 71{
72#ifdef CONFIG_X86_SMP 72#ifdef CONFIG_SMP
73 unsigned int eax, ebx, ecx, edx, sub_index; 73 unsigned int eax, ebx, ecx, edx, sub_index;
74 unsigned int ht_mask_width, core_plus_mask_width; 74 unsigned int ht_mask_width, core_plus_mask_width;
75 unsigned int core_select_mask, core_level_siblings; 75 unsigned int core_select_mask, core_level_siblings;
@@ -116,22 +116,14 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
116 116
117 core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; 117 core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width;
118 118
119#ifdef CONFIG_X86_32 119 c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, ht_mask_width)
120 c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width)
121 & core_select_mask; 120 & core_select_mask;
122 c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width); 121 c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, core_plus_mask_width);
123 /* 122 /*
124 * Reinit the apicid, now that we have extended initial_apicid. 123 * Reinit the apicid, now that we have extended initial_apicid.
125 */ 124 */
126 c->apicid = phys_pkg_id(c->initial_apicid, 0); 125 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
127#else 126
128 c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask;
129 c->phys_proc_id = phys_pkg_id(core_plus_mask_width);
130 /*
131 * Reinit the apicid, now that we have extended initial_apicid.
132 */
133 c->apicid = phys_pkg_id(0);
134#endif
135 c->x86_max_cores = (core_level_siblings / smp_num_siblings); 127 c->x86_max_cores = (core_level_siblings / smp_num_siblings);
136 128
137 129
@@ -143,37 +135,3 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
143 return; 135 return;
144#endif 136#endif
145} 137}
146
147#ifdef CONFIG_X86_PAT
148void __cpuinit validate_pat_support(struct cpuinfo_x86 *c)
149{
150 if (!cpu_has_pat)
151 pat_disable("PAT not supported by CPU.");
152
153 switch (c->x86_vendor) {
154 case X86_VENDOR_INTEL:
155 /*
156 * There is a known erratum on Pentium III and Core Solo
157 * and Core Duo CPUs.
158 * " Page with PAT set to WC while associated MTRR is UC
159 * may consolidate to UC "
160 * Because of this erratum, it is better to stick with
161 * setting WC in MTRR rather than using PAT on these CPUs.
162 *
163 * Enable PAT WC only on P4, Core 2 or later CPUs.
164 */
165 if (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 15))
166 return;
167
168 pat_disable("PAT WC disabled due to known CPU erratum.");
169 return;
170
171 case X86_VENDOR_AMD:
172 case X86_VENDOR_CENTAUR:
173 case X86_VENDOR_TRANSMETA:
174 return;
175 }
176
177 pat_disable("PAT disabled. Not yet verified on this CPU type.");
178}
179#endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7c878f6aa919..ff4d7b9e32e4 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -12,7 +12,7 @@
12# include <asm/cacheflush.h> 12# include <asm/cacheflush.h>
13#endif 13#endif
14 14
15#include <mach_apic.h> 15#include <asm/genapic.h>
16 16
17#include "cpu.h" 17#include "cpu.h"
18 18
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 6fd316689c47..3a9d45a14ad9 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -27,7 +27,7 @@
27#ifdef CONFIG_X86_LOCAL_APIC 27#ifdef CONFIG_X86_LOCAL_APIC
28#include <asm/mpspec.h> 28#include <asm/mpspec.h>
29#include <asm/apic.h> 29#include <asm/apic.h>
30#include <mach_apic.h> 30#include <asm/genapic.h>
31#include <asm/genapic.h> 31#include <asm/genapic.h>
32#include <asm/uv/uv.h> 32#include <asm/uv/uv.h>
33#endif 33#endif
@@ -40,6 +40,7 @@
40#include <asm/sections.h> 40#include <asm/sections.h>
41#include <asm/setup.h> 41#include <asm/setup.h>
42#include <asm/hypervisor.h> 42#include <asm/hypervisor.h>
43#include <asm/stackprotector.h>
43 44
44#include "cpu.h" 45#include "cpu.h"
45 46
@@ -53,6 +54,15 @@ cpumask_var_t cpu_initialized_mask;
53/* representing cpus for which sibling maps can be computed */ 54/* representing cpus for which sibling maps can be computed */
54cpumask_var_t cpu_sibling_setup_mask; 55cpumask_var_t cpu_sibling_setup_mask;
55 56
57/* correctly size the local cpu masks */
58void __init setup_cpu_local_masks(void)
59{
60 alloc_bootmem_cpumask_var(&cpu_initialized_mask);
61 alloc_bootmem_cpumask_var(&cpu_callin_mask);
62 alloc_bootmem_cpumask_var(&cpu_callout_mask);
63 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
64}
65
56#else /* CONFIG_X86_32 */ 66#else /* CONFIG_X86_32 */
57 67
58cpumask_t cpu_callin_map; 68cpumask_t cpu_callin_map;
@@ -114,6 +124,7 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
114 124
115 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, 125 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
116 [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } }, 126 [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
127 GDT_STACK_CANARY_INIT
117#endif 128#endif
118} }; 129} };
119EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); 130EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
@@ -216,6 +227,49 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
216#endif 227#endif
217 228
218/* 229/*
230 * Some CPU features depend on higher CPUID levels, which may not always
231 * be available due to CPUID level capping or broken virtualization
232 * software. Add those features to this table to auto-disable them.
233 */
234struct cpuid_dependent_feature {
235 u32 feature;
236 u32 level;
237};
238static const struct cpuid_dependent_feature __cpuinitconst
239cpuid_dependent_features[] = {
240 { X86_FEATURE_MWAIT, 0x00000005 },
241 { X86_FEATURE_DCA, 0x00000009 },
242 { X86_FEATURE_XSAVE, 0x0000000d },
243 { 0, 0 }
244};
245
246static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
247{
248 const struct cpuid_dependent_feature *df;
249 for (df = cpuid_dependent_features; df->feature; df++) {
250 /*
251 * Note: cpuid_level is set to -1 if unavailable, but
252 * extended_extended_level is set to 0 if unavailable
253 * and the legitimate extended levels are all negative
254 * when signed; hence the weird messing around with
255 * signs here...
256 */
257 if (cpu_has(c, df->feature) &&
258 ((s32)df->feature < 0 ?
259 (u32)df->feature > (u32)c->extended_cpuid_level :
260 (s32)df->feature > (s32)c->cpuid_level)) {
261 clear_cpu_cap(c, df->feature);
262 if (warn)
263 printk(KERN_WARNING
264 "CPU: CPU feature %s disabled "
265 "due to lack of CPUID level 0x%x\n",
266 x86_cap_flags[df->feature],
267 df->level);
268 }
269 }
270}
271
272/*
219 * Naming convention should be: <Name> [(<Codename>)] 273 * Naming convention should be: <Name> [(<Codename>)]
220 * This table only is used unless init_<vendor>() below doesn't set it; 274 * This table only is used unless init_<vendor>() below doesn't set it;
221 * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used 275 * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
@@ -245,18 +299,29 @@ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
245 299
246__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; 300__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
247 301
302void load_percpu_segment(int cpu)
303{
304#ifdef CONFIG_X86_32
305 loadsegment(fs, __KERNEL_PERCPU);
306#else
307 loadsegment(gs, 0);
308 wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
309#endif
310 load_stack_canary_segment();
311}
312
248/* Current gdt points %fs at the "master" per-cpu area: after this, 313/* Current gdt points %fs at the "master" per-cpu area: after this,
249 * it's on the real one. */ 314 * it's on the real one. */
250void switch_to_new_gdt(void) 315void switch_to_new_gdt(int cpu)
251{ 316{
252 struct desc_ptr gdt_descr; 317 struct desc_ptr gdt_descr;
253 318
254 gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); 319 gdt_descr.address = (long)get_cpu_gdt_table(cpu);
255 gdt_descr.size = GDT_SIZE - 1; 320 gdt_descr.size = GDT_SIZE - 1;
256 load_gdt(&gdt_descr); 321 load_gdt(&gdt_descr);
257#ifdef CONFIG_X86_32 322 /* Reload the per-cpu base */
258 asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); 323
259#endif 324 load_percpu_segment(cpu);
260} 325}
261 326
262static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; 327static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
@@ -386,11 +451,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
386 } 451 }
387 452
388 index_msb = get_count_order(smp_num_siblings); 453 index_msb = get_count_order(smp_num_siblings);
389#ifdef CONFIG_X86_64 454 c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb);
390 c->phys_proc_id = phys_pkg_id(index_msb);
391#else
392 c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb);
393#endif
394 455
395 smp_num_siblings = smp_num_siblings / c->x86_max_cores; 456 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
396 457
@@ -398,13 +459,8 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
398 459
399 core_bits = get_count_order(c->x86_max_cores); 460 core_bits = get_count_order(c->x86_max_cores);
400 461
401#ifdef CONFIG_X86_64 462 c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) &
402 c->cpu_core_id = phys_pkg_id(index_msb) &
403 ((1 << core_bits) - 1);
404#else
405 c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) &
406 ((1 << core_bits) - 1); 463 ((1 << core_bits) - 1);
407#endif
408 } 464 }
409 465
410out: 466out:
@@ -573,11 +629,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
573 if (this_cpu->c_early_init) 629 if (this_cpu->c_early_init)
574 this_cpu->c_early_init(c); 630 this_cpu->c_early_init(c);
575 631
576 validate_pat_support(c);
577
578#ifdef CONFIG_SMP 632#ifdef CONFIG_SMP
579 c->cpu_index = boot_cpu_id; 633 c->cpu_index = boot_cpu_id;
580#endif 634#endif
635 filter_cpuid_features(c, false);
581} 636}
582 637
583void __init early_cpu_init(void) 638void __init early_cpu_init(void)
@@ -640,7 +695,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
640 c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; 695 c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
641#ifdef CONFIG_X86_32 696#ifdef CONFIG_X86_32
642# ifdef CONFIG_X86_HT 697# ifdef CONFIG_X86_HT
643 c->apicid = phys_pkg_id(c->initial_apicid, 0); 698 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
644# else 699# else
645 c->apicid = c->initial_apicid; 700 c->apicid = c->initial_apicid;
646# endif 701# endif
@@ -687,7 +742,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
687 this_cpu->c_identify(c); 742 this_cpu->c_identify(c);
688 743
689#ifdef CONFIG_X86_64 744#ifdef CONFIG_X86_64
690 c->apicid = phys_pkg_id(0); 745 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
691#endif 746#endif
692 747
693 /* 748 /*
@@ -711,6 +766,9 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
711 * we do "generic changes." 766 * we do "generic changes."
712 */ 767 */
713 768
769 /* Filter out anything that depends on CPUID levels we don't have */
770 filter_cpuid_features(c, true);
771
714 /* If the model name is still unset, do table lookup. */ 772 /* If the model name is still unset, do table lookup. */
715 if (!c->x86_model_id[0]) { 773 if (!c->x86_model_id[0]) {
716 char *p; 774 char *p;
@@ -885,12 +943,8 @@ struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
885 943
886DEFINE_PER_CPU_FIRST(union irq_stack_union, 944DEFINE_PER_CPU_FIRST(union irq_stack_union,
887 irq_stack_union) __aligned(PAGE_SIZE); 945 irq_stack_union) __aligned(PAGE_SIZE);
888#ifdef CONFIG_SMP
889DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */
890#else
891DEFINE_PER_CPU(char *, irq_stack_ptr) = 946DEFINE_PER_CPU(char *, irq_stack_ptr) =
892 per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64; 947 init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
893#endif
894 948
895DEFINE_PER_CPU(unsigned long, kernel_stack) = 949DEFINE_PER_CPU(unsigned long, kernel_stack) =
896 (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE; 950 (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
@@ -933,16 +987,21 @@ unsigned long kernel_eflags;
933 */ 987 */
934DEFINE_PER_CPU(struct orig_ist, orig_ist); 988DEFINE_PER_CPU(struct orig_ist, orig_ist);
935 989
936#else 990#else /* x86_64 */
937 991
938/* Make sure %fs is initialized properly in idle threads */ 992#ifdef CONFIG_CC_STACKPROTECTOR
993DEFINE_PER_CPU(unsigned long, stack_canary);
994#endif
995
996/* Make sure %fs and %gs are initialized properly in idle threads */
939struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) 997struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
940{ 998{
941 memset(regs, 0, sizeof(struct pt_regs)); 999 memset(regs, 0, sizeof(struct pt_regs));
942 regs->fs = __KERNEL_PERCPU; 1000 regs->fs = __KERNEL_PERCPU;
1001 regs->gs = __KERNEL_STACK_CANARY;
943 return regs; 1002 return regs;
944} 1003}
945#endif 1004#endif /* x86_64 */
946 1005
947/* 1006/*
948 * cpu_init() initializes state that is per-CPU. Some data is already 1007 * cpu_init() initializes state that is per-CPU. Some data is already
@@ -961,10 +1020,6 @@ void __cpuinit cpu_init(void)
961 struct task_struct *me; 1020 struct task_struct *me;
962 int i; 1021 int i;
963 1022
964 loadsegment(fs, 0);
965 loadsegment(gs, 0);
966 load_gs_base(cpu);
967
968#ifdef CONFIG_NUMA 1023#ifdef CONFIG_NUMA
969 if (cpu != 0 && percpu_read(node_number) == 0 && 1024 if (cpu != 0 && percpu_read(node_number) == 0 &&
970 cpu_to_node(cpu) != NUMA_NO_NODE) 1025 cpu_to_node(cpu) != NUMA_NO_NODE)
@@ -985,7 +1040,9 @@ void __cpuinit cpu_init(void)
985 * and set up the GDT descriptor: 1040 * and set up the GDT descriptor:
986 */ 1041 */
987 1042
988 switch_to_new_gdt(); 1043 switch_to_new_gdt(cpu);
1044 loadsegment(fs, 0);
1045
989 load_idt((const struct desc_ptr *)&idt_descr); 1046 load_idt((const struct desc_ptr *)&idt_descr);
990 1047
991 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); 1048 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
@@ -1043,22 +1100,19 @@ void __cpuinit cpu_init(void)
1043 */ 1100 */
1044 if (kgdb_connected && arch_kgdb_ops.correct_hw_break) 1101 if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
1045 arch_kgdb_ops.correct_hw_break(); 1102 arch_kgdb_ops.correct_hw_break();
1046 else { 1103 else
1047#endif 1104#endif
1048 /* 1105 {
1049 * Clear all 6 debug registers: 1106 /*
1050 */ 1107 * Clear all 6 debug registers:
1051 1108 */
1052 set_debugreg(0UL, 0); 1109 set_debugreg(0UL, 0);
1053 set_debugreg(0UL, 1); 1110 set_debugreg(0UL, 1);
1054 set_debugreg(0UL, 2); 1111 set_debugreg(0UL, 2);
1055 set_debugreg(0UL, 3); 1112 set_debugreg(0UL, 3);
1056 set_debugreg(0UL, 6); 1113 set_debugreg(0UL, 6);
1057 set_debugreg(0UL, 7); 1114 set_debugreg(0UL, 7);
1058#ifdef CONFIG_KGDB
1059 /* If the kgdb is connected no debug regs should be altered. */
1060 } 1115 }
1061#endif
1062 1116
1063 fpu_init(); 1117 fpu_init();
1064 1118
@@ -1088,7 +1142,7 @@ void __cpuinit cpu_init(void)
1088 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); 1142 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
1089 1143
1090 load_idt(&idt_descr); 1144 load_idt(&idt_descr);
1091 switch_to_new_gdt(); 1145 switch_to_new_gdt(cpu);
1092 1146
1093 /* 1147 /*
1094 * Set up and load the per-CPU TSS and LDT 1148 * Set up and load the per-CPU TSS and LDT
@@ -1109,9 +1163,6 @@ void __cpuinit cpu_init(void)
1109 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); 1163 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
1110#endif 1164#endif
1111 1165
1112 /* Clear %gs. */
1113 asm volatile ("mov %0, %%gs" : : "r" (0));
1114
1115 /* Clear all 6 debug registers: */ 1166 /* Clear all 6 debug registers: */
1116 set_debugreg(0, 0); 1167 set_debugreg(0, 0);
1117 set_debugreg(0, 1); 1168 set_debugreg(0, 1);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 24ff26a38ade..1f137a87d4bd 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -24,7 +24,7 @@
24#ifdef CONFIG_X86_LOCAL_APIC 24#ifdef CONFIG_X86_LOCAL_APIC
25#include <asm/mpspec.h> 25#include <asm/mpspec.h>
26#include <asm/apic.h> 26#include <asm/apic.h>
27#include <mach_apic.h> 27#include <asm/genapic.h>
28#endif 28#endif
29 29
30static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) 30static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
@@ -63,6 +63,18 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
63 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); 63 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
64 } 64 }
65 65
66 /*
67 * There is a known erratum on Pentium III and Core Solo
68 * and Core Duo CPUs.
69 * " Page with PAT set to WC while associated MTRR is UC
70 * may consolidate to UC "
71 * Because of this erratum, it is better to stick with
72 * setting WC in MTRR rather than using PAT on these CPUs.
73 *
74 * Enable PAT WC only on P4, Core 2 or later CPUs.
75 */
76 if (c->x86 == 6 && c->x86_model < 15)
77 clear_cpu_cap(c, X86_FEATURE_PAT);
66} 78}
67 79
68#ifdef CONFIG_X86_32 80#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index 11b93cabdf78..ad7f2a696f4a 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -28,7 +28,7 @@
28#include <asm/reboot.h> 28#include <asm/reboot.h>
29#include <asm/virtext.h> 29#include <asm/virtext.h>
30 30
31#include <mach_ipi.h> 31#include <asm/genapic.h>
32 32
33 33
34#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) 34#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 6b1f6f6f8661..87d103ded1c3 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -99,7 +99,7 @@ print_context_stack(struct thread_info *tinfo,
99 frame = frame->next_frame; 99 frame = frame->next_frame;
100 bp = (unsigned long) frame; 100 bp = (unsigned long) frame;
101 } else { 101 } else {
102 ops->address(data, addr, bp == 0); 102 ops->address(data, addr, 0);
103 } 103 }
104 print_ftrace_graph_addr(addr, data, ops, tinfo, graph); 104 print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
105 } 105 }
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 504ad198e4ad..639ad98238a2 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -13,8 +13,8 @@
13#include <asm/setup.h> 13#include <asm/setup.h>
14#include <xen/hvc-console.h> 14#include <xen/hvc-console.h>
15#include <asm/pci-direct.h> 15#include <asm/pci-direct.h>
16#include <asm/pgtable.h>
17#include <asm/fixmap.h> 16#include <asm/fixmap.h>
17#include <asm/pgtable.h>
18#include <linux/usb/ehci_def.h> 18#include <linux/usb/ehci_def.h>
19 19
20/* Simple VGA output */ 20/* Simple VGA output */
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index a0b91aac72a1..e99206831459 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -30,12 +30,13 @@
30 * 1C(%esp) - %ds 30 * 1C(%esp) - %ds
31 * 20(%esp) - %es 31 * 20(%esp) - %es
32 * 24(%esp) - %fs 32 * 24(%esp) - %fs
33 * 28(%esp) - orig_eax 33 * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS
34 * 2C(%esp) - %eip 34 * 2C(%esp) - orig_eax
35 * 30(%esp) - %cs 35 * 30(%esp) - %eip
36 * 34(%esp) - %eflags 36 * 34(%esp) - %cs
37 * 38(%esp) - %oldesp 37 * 38(%esp) - %eflags
38 * 3C(%esp) - %oldss 38 * 3C(%esp) - %oldesp
39 * 40(%esp) - %oldss
39 * 40 *
40 * "current" is in register %ebx during any slow entries. 41 * "current" is in register %ebx during any slow entries.
41 */ 42 */
@@ -101,121 +102,221 @@
101#define resume_userspace_sig resume_userspace 102#define resume_userspace_sig resume_userspace
102#endif 103#endif
103 104
104#define SAVE_ALL \ 105/*
105 cld; \ 106 * User gs save/restore
106 pushl %fs; \ 107 *
107 CFI_ADJUST_CFA_OFFSET 4;\ 108 * %gs is used for userland TLS and kernel only uses it for stack
108 /*CFI_REL_OFFSET fs, 0;*/\ 109 * canary which is required to be at %gs:20 by gcc. Read the comment
109 pushl %es; \ 110 * at the top of stackprotector.h for more info.
110 CFI_ADJUST_CFA_OFFSET 4;\ 111 *
111 /*CFI_REL_OFFSET es, 0;*/\ 112 * Local labels 98 and 99 are used.
112 pushl %ds; \ 113 */
113 CFI_ADJUST_CFA_OFFSET 4;\ 114#ifdef CONFIG_X86_32_LAZY_GS
114 /*CFI_REL_OFFSET ds, 0;*/\ 115
115 pushl %eax; \ 116 /* unfortunately push/pop can't be no-op */
116 CFI_ADJUST_CFA_OFFSET 4;\ 117.macro PUSH_GS
117 CFI_REL_OFFSET eax, 0;\ 118 pushl $0
118 pushl %ebp; \ 119 CFI_ADJUST_CFA_OFFSET 4
119 CFI_ADJUST_CFA_OFFSET 4;\ 120.endm
120 CFI_REL_OFFSET ebp, 0;\ 121.macro POP_GS pop=0
121 pushl %edi; \ 122 addl $(4 + \pop), %esp
122 CFI_ADJUST_CFA_OFFSET 4;\ 123 CFI_ADJUST_CFA_OFFSET -(4 + \pop)
123 CFI_REL_OFFSET edi, 0;\ 124.endm
124 pushl %esi; \ 125.macro POP_GS_EX
125 CFI_ADJUST_CFA_OFFSET 4;\ 126.endm
126 CFI_REL_OFFSET esi, 0;\ 127
127 pushl %edx; \ 128 /* all the rest are no-op */
128 CFI_ADJUST_CFA_OFFSET 4;\ 129.macro PTGS_TO_GS
129 CFI_REL_OFFSET edx, 0;\ 130.endm
130 pushl %ecx; \ 131.macro PTGS_TO_GS_EX
131 CFI_ADJUST_CFA_OFFSET 4;\ 132.endm
132 CFI_REL_OFFSET ecx, 0;\ 133.macro GS_TO_REG reg
133 pushl %ebx; \ 134.endm
134 CFI_ADJUST_CFA_OFFSET 4;\ 135.macro REG_TO_PTGS reg
135 CFI_REL_OFFSET ebx, 0;\ 136.endm
136 movl $(__USER_DS), %edx; \ 137.macro SET_KERNEL_GS reg
137 movl %edx, %ds; \ 138.endm
138 movl %edx, %es; \ 139
139 movl $(__KERNEL_PERCPU), %edx; \ 140#else /* CONFIG_X86_32_LAZY_GS */
141
142.macro PUSH_GS
143 pushl %gs
144 CFI_ADJUST_CFA_OFFSET 4
145 /*CFI_REL_OFFSET gs, 0*/
146.endm
147
148.macro POP_GS pop=0
14998: popl %gs
150 CFI_ADJUST_CFA_OFFSET -4
151 /*CFI_RESTORE gs*/
152 .if \pop <> 0
153 add $\pop, %esp
154 CFI_ADJUST_CFA_OFFSET -\pop
155 .endif
156.endm
157.macro POP_GS_EX
158.pushsection .fixup, "ax"
15999: movl $0, (%esp)
160 jmp 98b
161.section __ex_table, "a"
162 .align 4
163 .long 98b, 99b
164.popsection
165.endm
166
167.macro PTGS_TO_GS
16898: mov PT_GS(%esp), %gs
169.endm
170.macro PTGS_TO_GS_EX
171.pushsection .fixup, "ax"
17299: movl $0, PT_GS(%esp)
173 jmp 98b
174.section __ex_table, "a"
175 .align 4
176 .long 98b, 99b
177.popsection
178.endm
179
180.macro GS_TO_REG reg
181 movl %gs, \reg
182 /*CFI_REGISTER gs, \reg*/
183.endm
184.macro REG_TO_PTGS reg
185 movl \reg, PT_GS(%esp)
186 /*CFI_REL_OFFSET gs, PT_GS*/
187.endm
188.macro SET_KERNEL_GS reg
189 movl $(__KERNEL_STACK_CANARY), \reg
190 movl \reg, %gs
191.endm
192
193#endif /* CONFIG_X86_32_LAZY_GS */
194
195.macro SAVE_ALL
196 cld
197 PUSH_GS
198 pushl %fs
199 CFI_ADJUST_CFA_OFFSET 4
200 /*CFI_REL_OFFSET fs, 0;*/
201 pushl %es
202 CFI_ADJUST_CFA_OFFSET 4
203 /*CFI_REL_OFFSET es, 0;*/
204 pushl %ds
205 CFI_ADJUST_CFA_OFFSET 4
206 /*CFI_REL_OFFSET ds, 0;*/
207 pushl %eax
208 CFI_ADJUST_CFA_OFFSET 4
209 CFI_REL_OFFSET eax, 0
210 pushl %ebp
211 CFI_ADJUST_CFA_OFFSET 4
212 CFI_REL_OFFSET ebp, 0
213 pushl %edi
214 CFI_ADJUST_CFA_OFFSET 4
215 CFI_REL_OFFSET edi, 0
216 pushl %esi
217 CFI_ADJUST_CFA_OFFSET 4
218 CFI_REL_OFFSET esi, 0
219 pushl %edx
220 CFI_ADJUST_CFA_OFFSET 4
221 CFI_REL_OFFSET edx, 0
222 pushl %ecx
223 CFI_ADJUST_CFA_OFFSET 4
224 CFI_REL_OFFSET ecx, 0
225 pushl %ebx
226 CFI_ADJUST_CFA_OFFSET 4
227 CFI_REL_OFFSET ebx, 0
228 movl $(__USER_DS), %edx
229 movl %edx, %ds
230 movl %edx, %es
231 movl $(__KERNEL_PERCPU), %edx
140 movl %edx, %fs 232 movl %edx, %fs
233 SET_KERNEL_GS %edx
234.endm
141 235
142#define RESTORE_INT_REGS \ 236.macro RESTORE_INT_REGS
143 popl %ebx; \ 237 popl %ebx
144 CFI_ADJUST_CFA_OFFSET -4;\ 238 CFI_ADJUST_CFA_OFFSET -4
145 CFI_RESTORE ebx;\ 239 CFI_RESTORE ebx
146 popl %ecx; \ 240 popl %ecx
147 CFI_ADJUST_CFA_OFFSET -4;\ 241 CFI_ADJUST_CFA_OFFSET -4
148 CFI_RESTORE ecx;\ 242 CFI_RESTORE ecx
149 popl %edx; \ 243 popl %edx
150 CFI_ADJUST_CFA_OFFSET -4;\ 244 CFI_ADJUST_CFA_OFFSET -4
151 CFI_RESTORE edx;\ 245 CFI_RESTORE edx
152 popl %esi; \ 246 popl %esi
153 CFI_ADJUST_CFA_OFFSET -4;\ 247 CFI_ADJUST_CFA_OFFSET -4
154 CFI_RESTORE esi;\ 248 CFI_RESTORE esi
155 popl %edi; \ 249 popl %edi
156 CFI_ADJUST_CFA_OFFSET -4;\ 250 CFI_ADJUST_CFA_OFFSET -4
157 CFI_RESTORE edi;\ 251 CFI_RESTORE edi
158 popl %ebp; \ 252 popl %ebp
159 CFI_ADJUST_CFA_OFFSET -4;\ 253 CFI_ADJUST_CFA_OFFSET -4
160 CFI_RESTORE ebp;\ 254 CFI_RESTORE ebp
161 popl %eax; \ 255 popl %eax
162 CFI_ADJUST_CFA_OFFSET -4;\ 256 CFI_ADJUST_CFA_OFFSET -4
163 CFI_RESTORE eax 257 CFI_RESTORE eax
258.endm
164 259
165#define RESTORE_REGS \ 260.macro RESTORE_REGS pop=0
166 RESTORE_INT_REGS; \ 261 RESTORE_INT_REGS
1671: popl %ds; \ 2621: popl %ds
168 CFI_ADJUST_CFA_OFFSET -4;\ 263 CFI_ADJUST_CFA_OFFSET -4
169 /*CFI_RESTORE ds;*/\ 264 /*CFI_RESTORE ds;*/
1702: popl %es; \ 2652: popl %es
171 CFI_ADJUST_CFA_OFFSET -4;\ 266 CFI_ADJUST_CFA_OFFSET -4
172 /*CFI_RESTORE es;*/\ 267 /*CFI_RESTORE es;*/
1733: popl %fs; \ 2683: popl %fs
174 CFI_ADJUST_CFA_OFFSET -4;\ 269 CFI_ADJUST_CFA_OFFSET -4
175 /*CFI_RESTORE fs;*/\ 270 /*CFI_RESTORE fs;*/
176.pushsection .fixup,"ax"; \ 271 POP_GS \pop
1774: movl $0,(%esp); \ 272.pushsection .fixup, "ax"
178 jmp 1b; \ 2734: movl $0, (%esp)
1795: movl $0,(%esp); \ 274 jmp 1b
180 jmp 2b; \ 2755: movl $0, (%esp)
1816: movl $0,(%esp); \ 276 jmp 2b
182 jmp 3b; \ 2776: movl $0, (%esp)
183.section __ex_table,"a";\ 278 jmp 3b
184 .align 4; \ 279.section __ex_table, "a"
185 .long 1b,4b; \ 280 .align 4
186 .long 2b,5b; \ 281 .long 1b, 4b
187 .long 3b,6b; \ 282 .long 2b, 5b
283 .long 3b, 6b
188.popsection 284.popsection
285 POP_GS_EX
286.endm
189 287
190#define RING0_INT_FRAME \ 288.macro RING0_INT_FRAME
191 CFI_STARTPROC simple;\ 289 CFI_STARTPROC simple
192 CFI_SIGNAL_FRAME;\ 290 CFI_SIGNAL_FRAME
193 CFI_DEF_CFA esp, 3*4;\ 291 CFI_DEF_CFA esp, 3*4
194 /*CFI_OFFSET cs, -2*4;*/\ 292 /*CFI_OFFSET cs, -2*4;*/
195 CFI_OFFSET eip, -3*4 293 CFI_OFFSET eip, -3*4
294.endm
196 295
197#define RING0_EC_FRAME \ 296.macro RING0_EC_FRAME
198 CFI_STARTPROC simple;\ 297 CFI_STARTPROC simple
199 CFI_SIGNAL_FRAME;\ 298 CFI_SIGNAL_FRAME
200 CFI_DEF_CFA esp, 4*4;\ 299 CFI_DEF_CFA esp, 4*4
201 /*CFI_OFFSET cs, -2*4;*/\ 300 /*CFI_OFFSET cs, -2*4;*/
202 CFI_OFFSET eip, -3*4 301 CFI_OFFSET eip, -3*4
302.endm
203 303
204#define RING0_PTREGS_FRAME \ 304.macro RING0_PTREGS_FRAME
205 CFI_STARTPROC simple;\ 305 CFI_STARTPROC simple
206 CFI_SIGNAL_FRAME;\ 306 CFI_SIGNAL_FRAME
207 CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\ 307 CFI_DEF_CFA esp, PT_OLDESP-PT_EBX
208 /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\ 308 /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/
209 CFI_OFFSET eip, PT_EIP-PT_OLDESP;\ 309 CFI_OFFSET eip, PT_EIP-PT_OLDESP
210 /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\ 310 /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/
211 /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\ 311 /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/
212 CFI_OFFSET eax, PT_EAX-PT_OLDESP;\ 312 CFI_OFFSET eax, PT_EAX-PT_OLDESP
213 CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\ 313 CFI_OFFSET ebp, PT_EBP-PT_OLDESP
214 CFI_OFFSET edi, PT_EDI-PT_OLDESP;\ 314 CFI_OFFSET edi, PT_EDI-PT_OLDESP
215 CFI_OFFSET esi, PT_ESI-PT_OLDESP;\ 315 CFI_OFFSET esi, PT_ESI-PT_OLDESP
216 CFI_OFFSET edx, PT_EDX-PT_OLDESP;\ 316 CFI_OFFSET edx, PT_EDX-PT_OLDESP
217 CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\ 317 CFI_OFFSET ecx, PT_ECX-PT_OLDESP
218 CFI_OFFSET ebx, PT_EBX-PT_OLDESP 318 CFI_OFFSET ebx, PT_EBX-PT_OLDESP
319.endm
219 320
220ENTRY(ret_from_fork) 321ENTRY(ret_from_fork)
221 CFI_STARTPROC 322 CFI_STARTPROC
@@ -362,6 +463,7 @@ sysenter_exit:
362 xorl %ebp,%ebp 463 xorl %ebp,%ebp
363 TRACE_IRQS_ON 464 TRACE_IRQS_ON
3641: mov PT_FS(%esp), %fs 4651: mov PT_FS(%esp), %fs
466 PTGS_TO_GS
365 ENABLE_INTERRUPTS_SYSEXIT 467 ENABLE_INTERRUPTS_SYSEXIT
366 468
367#ifdef CONFIG_AUDITSYSCALL 469#ifdef CONFIG_AUDITSYSCALL
@@ -410,6 +512,7 @@ sysexit_audit:
410 .align 4 512 .align 4
411 .long 1b,2b 513 .long 1b,2b
412.popsection 514.popsection
515 PTGS_TO_GS_EX
413ENDPROC(ia32_sysenter_target) 516ENDPROC(ia32_sysenter_target)
414 517
415 # system call handler stub 518 # system call handler stub
@@ -452,8 +555,7 @@ restore_all:
452restore_nocheck: 555restore_nocheck:
453 TRACE_IRQS_IRET 556 TRACE_IRQS_IRET
454restore_nocheck_notrace: 557restore_nocheck_notrace:
455 RESTORE_REGS 558 RESTORE_REGS 4 # skip orig_eax/error_code
456 addl $4, %esp # skip orig_eax/error_code
457 CFI_ADJUST_CFA_OFFSET -4 559 CFI_ADJUST_CFA_OFFSET -4
458irq_return: 560irq_return:
459 INTERRUPT_RETURN 561 INTERRUPT_RETURN
@@ -595,28 +697,50 @@ syscall_badsys:
595END(syscall_badsys) 697END(syscall_badsys)
596 CFI_ENDPROC 698 CFI_ENDPROC
597 699
598#define FIXUP_ESPFIX_STACK \ 700/*
599 /* since we are on a wrong stack, we cant make it a C code :( */ \ 701 * System calls that need a pt_regs pointer.
600 PER_CPU(gdt_page, %ebx); \ 702 */
601 GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ 703#define PTREGSCALL(name) \
602 addl %esp, %eax; \ 704 ALIGN; \
603 pushl $__KERNEL_DS; \ 705ptregs_##name: \
604 CFI_ADJUST_CFA_OFFSET 4; \ 706 leal 4(%esp),%eax; \
605 pushl %eax; \ 707 jmp sys_##name;
606 CFI_ADJUST_CFA_OFFSET 4; \ 708
607 lss (%esp), %esp; \ 709PTREGSCALL(iopl)
608 CFI_ADJUST_CFA_OFFSET -8; 710PTREGSCALL(fork)
609#define UNWIND_ESPFIX_STACK \ 711PTREGSCALL(clone)
610 movl %ss, %eax; \ 712PTREGSCALL(vfork)
611 /* see if on espfix stack */ \ 713PTREGSCALL(execve)
612 cmpw $__ESPFIX_SS, %ax; \ 714PTREGSCALL(sigaltstack)
613 jne 27f; \ 715PTREGSCALL(sigreturn)
614 movl $__KERNEL_DS, %eax; \ 716PTREGSCALL(rt_sigreturn)
615 movl %eax, %ds; \ 717PTREGSCALL(vm86)
616 movl %eax, %es; \ 718PTREGSCALL(vm86old)
617 /* switch to normal stack */ \ 719
618 FIXUP_ESPFIX_STACK; \ 720.macro FIXUP_ESPFIX_STACK
61927:; 721 /* since we are on a wrong stack, we cant make it a C code :( */
722 PER_CPU(gdt_page, %ebx)
723 GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
724 addl %esp, %eax
725 pushl $__KERNEL_DS
726 CFI_ADJUST_CFA_OFFSET 4
727 pushl %eax
728 CFI_ADJUST_CFA_OFFSET 4
729 lss (%esp), %esp
730 CFI_ADJUST_CFA_OFFSET -8
731.endm
732.macro UNWIND_ESPFIX_STACK
733 movl %ss, %eax
734 /* see if on espfix stack */
735 cmpw $__ESPFIX_SS, %ax
736 jne 27f
737 movl $__KERNEL_DS, %eax
738 movl %eax, %ds
739 movl %eax, %es
740 /* switch to normal stack */
741 FIXUP_ESPFIX_STACK
74227:
743.endm
620 744
621/* 745/*
622 * Build the entry stubs and pointer table with some assembler magic. 746 * Build the entry stubs and pointer table with some assembler magic.
@@ -688,7 +812,7 @@ ENDPROC(name)
688#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name) 812#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name)
689 813
690/* The include is where all of the SMP etc. interrupts come from */ 814/* The include is where all of the SMP etc. interrupts come from */
691#include "entry_arch.h" 815#include <asm/entry_arch.h>
692 816
693ENTRY(coprocessor_error) 817ENTRY(coprocessor_error)
694 RING0_INT_FRAME 818 RING0_INT_FRAME
@@ -1070,7 +1194,10 @@ ENTRY(page_fault)
1070 CFI_ADJUST_CFA_OFFSET 4 1194 CFI_ADJUST_CFA_OFFSET 4
1071 ALIGN 1195 ALIGN
1072error_code: 1196error_code:
1073 /* the function address is in %fs's slot on the stack */ 1197 /* the function address is in %gs's slot on the stack */
1198 pushl %fs
1199 CFI_ADJUST_CFA_OFFSET 4
1200 /*CFI_REL_OFFSET fs, 0*/
1074 pushl %es 1201 pushl %es
1075 CFI_ADJUST_CFA_OFFSET 4 1202 CFI_ADJUST_CFA_OFFSET 4
1076 /*CFI_REL_OFFSET es, 0*/ 1203 /*CFI_REL_OFFSET es, 0*/
@@ -1099,20 +1226,15 @@ error_code:
1099 CFI_ADJUST_CFA_OFFSET 4 1226 CFI_ADJUST_CFA_OFFSET 4
1100 CFI_REL_OFFSET ebx, 0 1227 CFI_REL_OFFSET ebx, 0
1101 cld 1228 cld
1102 pushl %fs
1103 CFI_ADJUST_CFA_OFFSET 4
1104 /*CFI_REL_OFFSET fs, 0*/
1105 movl $(__KERNEL_PERCPU), %ecx 1229 movl $(__KERNEL_PERCPU), %ecx
1106 movl %ecx, %fs 1230 movl %ecx, %fs
1107 UNWIND_ESPFIX_STACK 1231 UNWIND_ESPFIX_STACK
1108 popl %ecx 1232 GS_TO_REG %ecx
1109 CFI_ADJUST_CFA_OFFSET -4 1233 movl PT_GS(%esp), %edi # get the function address
1110 /*CFI_REGISTER es, ecx*/
1111 movl PT_FS(%esp), %edi # get the function address
1112 movl PT_ORIG_EAX(%esp), %edx # get the error code 1234 movl PT_ORIG_EAX(%esp), %edx # get the error code
1113 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart 1235 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
1114 mov %ecx, PT_FS(%esp) 1236 REG_TO_PTGS %ecx
1115 /*CFI_REL_OFFSET fs, ES*/ 1237 SET_KERNEL_GS %ecx
1116 movl $(__USER_DS), %ecx 1238 movl $(__USER_DS), %ecx
1117 movl %ecx, %ds 1239 movl %ecx, %ds
1118 movl %ecx, %es 1240 movl %ecx, %es
@@ -1136,26 +1258,27 @@ END(page_fault)
1136 * by hand onto the new stack - while updating the return eip past 1258 * by hand onto the new stack - while updating the return eip past
1137 * the instruction that would have done it for sysenter. 1259 * the instruction that would have done it for sysenter.
1138 */ 1260 */
1139#define FIX_STACK(offset, ok, label) \ 1261.macro FIX_STACK offset ok label
1140 cmpw $__KERNEL_CS,4(%esp); \ 1262 cmpw $__KERNEL_CS, 4(%esp)
1141 jne ok; \ 1263 jne \ok
1142label: \ 1264\label:
1143 movl TSS_sysenter_sp0+offset(%esp),%esp; \ 1265 movl TSS_sysenter_sp0 + \offset(%esp), %esp
1144 CFI_DEF_CFA esp, 0; \ 1266 CFI_DEF_CFA esp, 0
1145 CFI_UNDEFINED eip; \ 1267 CFI_UNDEFINED eip
1146 pushfl; \ 1268 pushfl
1147 CFI_ADJUST_CFA_OFFSET 4; \ 1269 CFI_ADJUST_CFA_OFFSET 4
1148 pushl $__KERNEL_CS; \ 1270 pushl $__KERNEL_CS
1149 CFI_ADJUST_CFA_OFFSET 4; \ 1271 CFI_ADJUST_CFA_OFFSET 4
1150 pushl $sysenter_past_esp; \ 1272 pushl $sysenter_past_esp
1151 CFI_ADJUST_CFA_OFFSET 4; \ 1273 CFI_ADJUST_CFA_OFFSET 4
1152 CFI_REL_OFFSET eip, 0 1274 CFI_REL_OFFSET eip, 0
1275.endm
1153 1276
1154ENTRY(debug) 1277ENTRY(debug)
1155 RING0_INT_FRAME 1278 RING0_INT_FRAME
1156 cmpl $ia32_sysenter_target,(%esp) 1279 cmpl $ia32_sysenter_target,(%esp)
1157 jne debug_stack_correct 1280 jne debug_stack_correct
1158 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) 1281 FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
1159debug_stack_correct: 1282debug_stack_correct:
1160 pushl $-1 # mark this as an int 1283 pushl $-1 # mark this as an int
1161 CFI_ADJUST_CFA_OFFSET 4 1284 CFI_ADJUST_CFA_OFFSET 4
@@ -1213,7 +1336,7 @@ nmi_stack_correct:
1213 1336
1214nmi_stack_fixup: 1337nmi_stack_fixup:
1215 RING0_INT_FRAME 1338 RING0_INT_FRAME
1216 FIX_STACK(12,nmi_stack_correct, 1) 1339 FIX_STACK 12, nmi_stack_correct, 1
1217 jmp nmi_stack_correct 1340 jmp nmi_stack_correct
1218 1341
1219nmi_debug_stack_check: 1342nmi_debug_stack_check:
@@ -1224,7 +1347,7 @@ nmi_debug_stack_check:
1224 jb nmi_stack_correct 1347 jb nmi_stack_correct
1225 cmpl $debug_esp_fix_insn,(%esp) 1348 cmpl $debug_esp_fix_insn,(%esp)
1226 ja nmi_stack_correct 1349 ja nmi_stack_correct
1227 FIX_STACK(24,nmi_stack_correct, 1) 1350 FIX_STACK 24, nmi_stack_correct, 1
1228 jmp nmi_stack_correct 1351 jmp nmi_stack_correct
1229 1352
1230nmi_espfix_stack: 1353nmi_espfix_stack:
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 8f8f61a1fce8..860afce9660a 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -410,6 +410,8 @@ END(save_paranoid)
410ENTRY(ret_from_fork) 410ENTRY(ret_from_fork)
411 DEFAULT_FRAME 411 DEFAULT_FRAME
412 412
413 LOCK ; btr $TIF_FORK,TI_flags(%r8)
414
413 push kernel_eflags(%rip) 415 push kernel_eflags(%rip)
414 CFI_ADJUST_CFA_OFFSET 8 416 CFI_ADJUST_CFA_OFFSET 8
415 popf # reset kernel eflags 417 popf # reset kernel eflags
@@ -1146,7 +1148,7 @@ ENTRY(native_load_gs_index)
1146 CFI_STARTPROC 1148 CFI_STARTPROC
1147 pushf 1149 pushf
1148 CFI_ADJUST_CFA_OFFSET 8 1150 CFI_ADJUST_CFA_OFFSET 8
1149 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) 1151 DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
1150 SWAPGS 1152 SWAPGS
1151gs_change: 1153gs_change:
1152 movl %edi,%gs 1154 movl %edi,%gs
diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c
index 53699c931ad4..55515d73d9c2 100644
--- a/arch/x86/kernel/es7000_32.c
+++ b/arch/x86/kernel/es7000_32.c
@@ -40,7 +40,6 @@
40#include <asm/smp.h> 40#include <asm/smp.h>
41#include <asm/atomic.h> 41#include <asm/atomic.h>
42#include <asm/apicdef.h> 42#include <asm/apicdef.h>
43#include <mach_mpparse.h>
44#include <asm/genapic.h> 43#include <asm/genapic.h>
45#include <asm/setup.h> 44#include <asm/setup.h>
46 45
@@ -182,20 +181,16 @@ static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
182 return 0; 181 return 0;
183} 182}
184 183
185static void noop_wait_for_deassert(atomic_t *deassert_not_used)
186{
187}
188
189static int __init es7000_update_genapic(void) 184static int __init es7000_update_genapic(void)
190{ 185{
191 genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip; 186 apic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
192 187
193 /* MPENTIUMIII */ 188 /* MPENTIUMIII */
194 if (boot_cpu_data.x86 == 6 && 189 if (boot_cpu_data.x86 == 6 &&
195 (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) { 190 (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) {
196 es7000_update_genapic_to_cluster(); 191 es7000_update_genapic_to_cluster();
197 genapic->wait_for_init_deassert = noop_wait_for_deassert; 192 apic->wait_for_init_deassert = NULL;
198 genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip; 193 apic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
199 } 194 }
200 195
201 return 0; 196 return 0;
@@ -292,24 +287,31 @@ int __init find_unisys_acpi_oem_table(unsigned long *oem_addr)
292{ 287{
293 struct acpi_table_header *header = NULL; 288 struct acpi_table_header *header = NULL;
294 int i = 0; 289 int i = 0;
290 acpi_size tbl_size;
295 291
296 while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) { 292 while (ACPI_SUCCESS(acpi_get_table_with_size("OEM1", i++, &header, &tbl_size))) {
297 if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) { 293 if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) {
298 struct oem_table *t = (struct oem_table *)header; 294 struct oem_table *t = (struct oem_table *)header;
299 295
300 oem_addrX = t->OEMTableAddr; 296 oem_addrX = t->OEMTableAddr;
301 oem_size = t->OEMTableSize; 297 oem_size = t->OEMTableSize;
298 early_acpi_os_unmap_memory(header, tbl_size);
302 299
303 *oem_addr = (unsigned long)__acpi_map_table(oem_addrX, 300 *oem_addr = (unsigned long)__acpi_map_table(oem_addrX,
304 oem_size); 301 oem_size);
305 return 0; 302 return 0;
306 } 303 }
304 early_acpi_os_unmap_memory(header, tbl_size);
307 } 305 }
308 return -1; 306 return -1;
309} 307}
310 308
311void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr) 309void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr)
312{ 310{
311 if (!oem_addr)
312 return;
313
314 __acpi_unmap_table((char *)oem_addr, oem_size);
313} 315}
314#endif 316#endif
315 317
@@ -359,20 +361,449 @@ es7000_mip_write(struct mip_reg *mip_reg)
359 return status; 361 return status;
360} 362}
361 363
362void __init 364void __init es7000_enable_apic_mode(void)
363es7000_sw_apic(void) 365{
364{ 366 struct mip_reg es7000_mip_reg;
365 if (es7000_plat) { 367 int mip_status;
366 int mip_status; 368
367 struct mip_reg es7000_mip_reg; 369 if (!es7000_plat)
368
369 printk("ES7000: Enabling APIC mode.\n");
370 memset(&es7000_mip_reg, 0, sizeof(struct mip_reg));
371 es7000_mip_reg.off_0 = MIP_SW_APIC;
372 es7000_mip_reg.off_38 = (MIP_VALID);
373 while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0)
374 printk("es7000_sw_apic: command failed, status = %x\n",
375 mip_status);
376 return; 370 return;
371
372 printk("ES7000: Enabling APIC mode.\n");
373 memset(&es7000_mip_reg, 0, sizeof(struct mip_reg));
374 es7000_mip_reg.off_0 = MIP_SW_APIC;
375 es7000_mip_reg.off_38 = MIP_VALID;
376
377 while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) {
378 printk("es7000_enable_apic_mode: command failed, status = %x\n",
379 mip_status);
380 }
381}
382
383/*
384 * APIC driver for the Unisys ES7000 chipset.
385 */
386#define APIC_DEFINITION 1
387#include <linux/threads.h>
388#include <linux/cpumask.h>
389#include <asm/mpspec.h>
390#include <asm/genapic.h>
391#include <asm/fixmap.h>
392#include <asm/apicdef.h>
393#include <linux/kernel.h>
394#include <linux/string.h>
395#include <linux/init.h>
396#include <linux/acpi.h>
397#include <linux/smp.h>
398#include <asm/ipi.h>
399
400#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER)
401#define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio)
402#define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */
403
404#define APIC_DFR_VALUE (APIC_DFR_FLAT)
405
406extern void es7000_enable_apic_mode(void);
407extern int apic_version [MAX_APICS];
408extern u8 cpu_2_logical_apicid[];
409extern unsigned int boot_cpu_physical_apicid;
410
411extern int parse_unisys_oem (char *oemptr);
412extern int find_unisys_acpi_oem_table(unsigned long *oem_addr);
413extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr);
414extern void setup_unisys(void);
415
416#define apicid_cluster(apicid) (apicid & 0xF0)
417#define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu)
418
419static void es7000_vector_allocation_domain(int cpu, cpumask_t *retmask)
420{
421 /* Careful. Some cpus do not strictly honor the set of cpus
422 * specified in the interrupt destination when using lowest
423 * priority interrupt delivery mode.
424 *
425 * In particular there was a hyperthreading cpu observed to
426 * deliver interrupts to the wrong hyperthread when only one
427 * hyperthread was specified in the interrupt desitination.
428 */
429 *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
430}
431
432
433static void es7000_wait_for_init_deassert(atomic_t *deassert)
434{
435#ifndef CONFIG_ES7000_CLUSTERED_APIC
436 while (!atomic_read(deassert))
437 cpu_relax();
438#endif
439 return;
440}
441
442static unsigned int es7000_get_apic_id(unsigned long x)
443{
444 return (x >> 24) & 0xFF;
445}
446
447#ifdef CONFIG_ACPI
448static int es7000_check_dsdt(void)
449{
450 struct acpi_table_header header;
451
452 if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) &&
453 !strncmp(header.oem_id, "UNISYS", 6))
454 return 1;
455 return 0;
456}
457#endif
458
459static void es7000_send_IPI_mask(const struct cpumask *mask, int vector)
460{
461 default_send_IPI_mask_sequence_phys(mask, vector);
462}
463
464static void es7000_send_IPI_allbutself(int vector)
465{
466 default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
467}
468
469static void es7000_send_IPI_all(int vector)
470{
471 es7000_send_IPI_mask(cpu_online_mask, vector);
472}
473
474static int es7000_apic_id_registered(void)
475{
476 return 1;
477}
478
479static const cpumask_t *target_cpus_cluster(void)
480{
481 return &CPU_MASK_ALL;
482}
483
484static const cpumask_t *es7000_target_cpus(void)
485{
486 return &cpumask_of_cpu(smp_processor_id());
487}
488
489static unsigned long
490es7000_check_apicid_used(physid_mask_t bitmap, int apicid)
491{
492 return 0;
493}
494static unsigned long es7000_check_apicid_present(int bit)
495{
496 return physid_isset(bit, phys_cpu_present_map);
497}
498
499static unsigned long calculate_ldr(int cpu)
500{
501 unsigned long id = xapic_phys_to_log_apicid(cpu);
502
503 return (SET_APIC_LOGICAL_ID(id));
504}
505
506/*
507 * Set up the logical destination ID.
508 *
509 * Intel recommends to set DFR, LdR and TPR before enabling
510 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
511 * document number 292116). So here it goes...
512 */
513static void es7000_init_apic_ldr_cluster(void)
514{
515 unsigned long val;
516 int cpu = smp_processor_id();
517
518 apic_write(APIC_DFR, APIC_DFR_VALUE_CLUSTER);
519 val = calculate_ldr(cpu);
520 apic_write(APIC_LDR, val);
521}
522
523static void es7000_init_apic_ldr(void)
524{
525 unsigned long val;
526 int cpu = smp_processor_id();
527
528 apic_write(APIC_DFR, APIC_DFR_VALUE);
529 val = calculate_ldr(cpu);
530 apic_write(APIC_LDR, val);
531}
532
533static void es7000_setup_apic_routing(void)
534{
535 int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
536 printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
537 (apic_version[apic] == 0x14) ?
538 "Physical Cluster" : "Logical Cluster",
539 nr_ioapics, cpus_addr(*es7000_target_cpus())[0]);
540}
541
542static int es7000_apicid_to_node(int logical_apicid)
543{
544 return 0;
545}
546
547
548static int es7000_cpu_present_to_apicid(int mps_cpu)
549{
550 if (!mps_cpu)
551 return boot_cpu_physical_apicid;
552 else if (mps_cpu < nr_cpu_ids)
553 return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
554 else
555 return BAD_APICID;
556}
557
558static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid)
559{
560 static int id = 0;
561 physid_mask_t mask;
562
563 mask = physid_mask_of_physid(id);
564 ++id;
565
566 return mask;
567}
568
569/* Mapping from cpu number to logical apicid */
570static int es7000_cpu_to_logical_apicid(int cpu)
571{
572#ifdef CONFIG_SMP
573 if (cpu >= nr_cpu_ids)
574 return BAD_APICID;
575 return (int)cpu_2_logical_apicid[cpu];
576#else
577 return logical_smp_processor_id();
578#endif
579}
580
581static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map)
582{
583 /* For clustered we don't have a good way to do this yet - hack */
584 return physids_promote(0xff);
585}
586
587static int es7000_check_phys_apicid_present(int cpu_physical_apicid)
588{
589 boot_cpu_physical_apicid = read_apic_id();
590 return (1);
591}
592
593static unsigned int
594es7000_cpu_mask_to_apicid_cluster(const struct cpumask *cpumask)
595{
596 int cpus_found = 0;
597 int num_bits_set;
598 int apicid;
599 int cpu;
600
601 num_bits_set = cpumask_weight(cpumask);
602 /* Return id to all */
603 if (num_bits_set == nr_cpu_ids)
604 return 0xFF;
605 /*
606 * The cpus in the mask must all be on the apic cluster. If are not
607 * on the same apicid cluster return default value of target_cpus():
608 */
609 cpu = cpumask_first(cpumask);
610 apicid = es7000_cpu_to_logical_apicid(cpu);
611
612 while (cpus_found < num_bits_set) {
613 if (cpumask_test_cpu(cpu, cpumask)) {
614 int new_apicid = es7000_cpu_to_logical_apicid(cpu);
615
616 if (apicid_cluster(apicid) !=
617 apicid_cluster(new_apicid)) {
618 printk ("%s: Not a valid mask!\n", __func__);
619
620 return 0xFF;
621 }
622 apicid = new_apicid;
623 cpus_found++;
624 }
625 cpu++;
626 }
627 return apicid;
628}
629
630static unsigned int es7000_cpu_mask_to_apicid(const cpumask_t *cpumask)
631{
632 int cpus_found = 0;
633 int num_bits_set;
634 int apicid;
635 int cpu;
636
637 num_bits_set = cpus_weight(*cpumask);
638 /* Return id to all */
639 if (num_bits_set == nr_cpu_ids)
640 return es7000_cpu_to_logical_apicid(0);
641 /*
642 * The cpus in the mask must all be on the apic cluster. If are not
643 * on the same apicid cluster return default value of target_cpus():
644 */
645 cpu = first_cpu(*cpumask);
646 apicid = es7000_cpu_to_logical_apicid(cpu);
647 while (cpus_found < num_bits_set) {
648 if (cpu_isset(cpu, *cpumask)) {
649 int new_apicid = es7000_cpu_to_logical_apicid(cpu);
650
651 if (apicid_cluster(apicid) !=
652 apicid_cluster(new_apicid)) {
653 printk ("%s: Not a valid mask!\n", __func__);
654
655 return es7000_cpu_to_logical_apicid(0);
656 }
657 apicid = new_apicid;
658 cpus_found++;
659 }
660 cpu++;
661 }
662 return apicid;
663}
664
665static unsigned int
666es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask,
667 const struct cpumask *andmask)
668{
669 int apicid = es7000_cpu_to_logical_apicid(0);
670 cpumask_var_t cpumask;
671
672 if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
673 return apicid;
674
675 cpumask_and(cpumask, inmask, andmask);
676 cpumask_and(cpumask, cpumask, cpu_online_mask);
677 apicid = es7000_cpu_mask_to_apicid(cpumask);
678
679 free_cpumask_var(cpumask);
680
681 return apicid;
682}
683
684static int es7000_phys_pkg_id(int cpuid_apic, int index_msb)
685{
686 return cpuid_apic >> index_msb;
687}
688
689void __init es7000_update_genapic_to_cluster(void)
690{
691 apic->target_cpus = target_cpus_cluster;
692 apic->irq_delivery_mode = INT_DELIVERY_MODE_CLUSTER;
693 apic->irq_dest_mode = INT_DEST_MODE_CLUSTER;
694
695 apic->init_apic_ldr = es7000_init_apic_ldr_cluster;
696
697 apic->cpu_mask_to_apicid = es7000_cpu_mask_to_apicid_cluster;
698}
699
700static int probe_es7000(void)
701{
702 /* probed later in mptable/ACPI hooks */
703 return 0;
704}
705
706static __init int
707es7000_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
708{
709 if (mpc->oemptr) {
710 struct mpc_oemtable *oem_table =
711 (struct mpc_oemtable *)mpc->oemptr;
712
713 if (!strncmp(oem, "UNISYS", 6))
714 return parse_unisys_oem((char *)oem_table);
715 }
716 return 0;
717}
718
719#ifdef CONFIG_ACPI
720/* Hook from generic ACPI tables.c */
721static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
722{
723 unsigned long oem_addr = 0;
724 int check_dsdt;
725 int ret = 0;
726
727 /* check dsdt at first to avoid clear fix_map for oem_addr */
728 check_dsdt = es7000_check_dsdt();
729
730 if (!find_unisys_acpi_oem_table(&oem_addr)) {
731 if (check_dsdt)
732 ret = parse_unisys_oem((char *)oem_addr);
733 else {
734 setup_unisys();
735 ret = 1;
736 }
737 /*
738 * we need to unmap it
739 */
740 unmap_unisys_acpi_oem_table(oem_addr);
377 } 741 }
742 return ret;
743}
744#else
745static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
746{
747 return 0;
378} 748}
749#endif
750
751
752struct genapic apic_es7000 = {
753
754 .name = "es7000",
755 .probe = probe_es7000,
756 .acpi_madt_oem_check = es7000_acpi_madt_oem_check,
757 .apic_id_registered = es7000_apic_id_registered,
758
759 .irq_delivery_mode = dest_Fixed,
760 /* phys delivery to target CPUs: */
761 .irq_dest_mode = 0,
762
763 .target_cpus = es7000_target_cpus,
764 .disable_esr = 1,
765 .dest_logical = 0,
766 .check_apicid_used = es7000_check_apicid_used,
767 .check_apicid_present = es7000_check_apicid_present,
768
769 .vector_allocation_domain = es7000_vector_allocation_domain,
770 .init_apic_ldr = es7000_init_apic_ldr,
771
772 .ioapic_phys_id_map = es7000_ioapic_phys_id_map,
773 .setup_apic_routing = es7000_setup_apic_routing,
774 .multi_timer_check = NULL,
775 .apicid_to_node = es7000_apicid_to_node,
776 .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid,
777 .cpu_present_to_apicid = es7000_cpu_present_to_apicid,
778 .apicid_to_cpu_present = es7000_apicid_to_cpu_present,
779 .setup_portio_remap = NULL,
780 .check_phys_apicid_present = es7000_check_phys_apicid_present,
781 .enable_apic_mode = es7000_enable_apic_mode,
782 .phys_pkg_id = es7000_phys_pkg_id,
783 .mps_oem_check = es7000_mps_oem_check,
784
785 .get_apic_id = es7000_get_apic_id,
786 .set_apic_id = NULL,
787 .apic_id_mask = 0xFF << 24,
788
789 .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid,
790 .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and,
791
792 .send_IPI_mask = es7000_send_IPI_mask,
793 .send_IPI_mask_allbutself = NULL,
794 .send_IPI_allbutself = es7000_send_IPI_allbutself,
795 .send_IPI_all = es7000_send_IPI_all,
796 .send_IPI_self = default_send_IPI_self,
797
798 .wakeup_cpu = NULL,
799
800 .trampoline_phys_low = 0x467,
801 .trampoline_phys_high = 0x469,
802
803 .wait_for_init_deassert = es7000_wait_for_init_deassert,
804
805 /* Nothing to do for most platforms, since cleared by the INIT cycle: */
806 .smp_callin_clear_local_apic = NULL,
807 .store_NMI_vector = NULL,
808 .inquire_remote_apic = default_inquire_remote_apic,
809};
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index e656c2721154..820dea5d0ebe 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -29,7 +29,7 @@ extern struct genapic apic_x2xpic_uv_x;
29extern struct genapic apic_x2apic_phys; 29extern struct genapic apic_x2apic_phys;
30extern struct genapic apic_x2apic_cluster; 30extern struct genapic apic_x2apic_cluster;
31 31
32struct genapic __read_mostly *genapic = &apic_flat; 32struct genapic __read_mostly *apic = &apic_flat;
33 33
34static struct genapic *apic_probe[] __initdata = { 34static struct genapic *apic_probe[] __initdata = {
35#ifdef CONFIG_X86_UV 35#ifdef CONFIG_X86_UV
@@ -44,17 +44,17 @@ static struct genapic *apic_probe[] __initdata = {
44/* 44/*
45 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. 45 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
46 */ 46 */
47void __init setup_apic_routing(void) 47void __init default_setup_apic_routing(void)
48{ 48{
49 if (genapic == &apic_x2apic_phys || genapic == &apic_x2apic_cluster) { 49 if (apic == &apic_x2apic_phys || apic == &apic_x2apic_cluster) {
50 if (!intr_remapping_enabled) 50 if (!intr_remapping_enabled)
51 genapic = &apic_flat; 51 apic = &apic_flat;
52 } 52 }
53 53
54 if (genapic == &apic_flat) { 54 if (apic == &apic_flat) {
55 if (max_physical_apicid >= 8) 55 if (max_physical_apicid >= 8)
56 genapic = &apic_physflat; 56 apic = &apic_physflat;
57 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); 57 printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
58 } 58 }
59 59
60 if (x86_quirks->update_genapic) 60 if (x86_quirks->update_genapic)
@@ -65,18 +65,18 @@ void __init setup_apic_routing(void)
65 65
66void apic_send_IPI_self(int vector) 66void apic_send_IPI_self(int vector)
67{ 67{
68 __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); 68 __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
69} 69}
70 70
71int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) 71int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
72{ 72{
73 int i; 73 int i;
74 74
75 for (i = 0; apic_probe[i]; ++i) { 75 for (i = 0; apic_probe[i]; ++i) {
76 if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { 76 if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
77 genapic = apic_probe[i]; 77 apic = apic_probe[i];
78 printk(KERN_INFO "Setting APIC routing to %s.\n", 78 printk(KERN_INFO "Setting APIC routing to %s.\n",
79 genapic->name); 79 apic->name);
80 return 1; 80 return 1;
81 } 81 }
82 } 82 }
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 34185488e4fb..249d2d3c034c 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -19,7 +19,6 @@
19#include <asm/smp.h> 19#include <asm/smp.h>
20#include <asm/ipi.h> 20#include <asm/ipi.h>
21#include <asm/genapic.h> 21#include <asm/genapic.h>
22#include <mach_apicdef.h>
23 22
24#ifdef CONFIG_ACPI 23#ifdef CONFIG_ACPI
25#include <acpi/acpi_bus.h> 24#include <acpi/acpi_bus.h>
@@ -74,7 +73,7 @@ static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
74 unsigned long flags; 73 unsigned long flags;
75 74
76 local_irq_save(flags); 75 local_irq_save(flags);
77 __send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL); 76 __default_send_IPI_dest_field(mask, vector, apic->dest_logical);
78 local_irq_restore(flags); 77 local_irq_restore(flags);
79} 78}
80 79
@@ -85,14 +84,15 @@ static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
85 _flat_send_IPI_mask(mask, vector); 84 _flat_send_IPI_mask(mask, vector);
86} 85}
87 86
88static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, 87static void
89 int vector) 88 flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
90{ 89{
91 unsigned long mask = cpumask_bits(cpumask)[0]; 90 unsigned long mask = cpumask_bits(cpumask)[0];
92 int cpu = smp_processor_id(); 91 int cpu = smp_processor_id();
93 92
94 if (cpu < BITS_PER_LONG) 93 if (cpu < BITS_PER_LONG)
95 clear_bit(cpu, &mask); 94 clear_bit(cpu, &mask);
95
96 _flat_send_IPI_mask(mask, vector); 96 _flat_send_IPI_mask(mask, vector);
97} 97}
98 98
@@ -114,23 +114,27 @@ static void flat_send_IPI_allbutself(int vector)
114 _flat_send_IPI_mask(mask, vector); 114 _flat_send_IPI_mask(mask, vector);
115 } 115 }
116 } else if (num_online_cpus() > 1) { 116 } else if (num_online_cpus() > 1) {
117 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); 117 __default_send_IPI_shortcut(APIC_DEST_ALLBUT,
118 vector, apic->dest_logical);
118 } 119 }
119} 120}
120 121
121static void flat_send_IPI_all(int vector) 122static void flat_send_IPI_all(int vector)
122{ 123{
123 if (vector == NMI_VECTOR) 124 if (vector == NMI_VECTOR) {
124 flat_send_IPI_mask(cpu_online_mask, vector); 125 flat_send_IPI_mask(cpu_online_mask, vector);
125 else 126 } else {
126 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); 127 __default_send_IPI_shortcut(APIC_DEST_ALLINC,
128 vector, apic->dest_logical);
129 }
127} 130}
128 131
129static unsigned int get_apic_id(unsigned long x) 132static unsigned int flat_get_apic_id(unsigned long x)
130{ 133{
131 unsigned int id; 134 unsigned int id;
132 135
133 id = (((x)>>24) & 0xFFu); 136 id = (((x)>>24) & 0xFFu);
137
134 return id; 138 return id;
135} 139}
136 140
@@ -146,7 +150,7 @@ static unsigned int read_xapic_id(void)
146{ 150{
147 unsigned int id; 151 unsigned int id;
148 152
149 id = get_apic_id(apic_read(APIC_ID)); 153 id = flat_get_apic_id(apic_read(APIC_ID));
150 return id; 154 return id;
151} 155}
152 156
@@ -169,31 +173,62 @@ static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
169 return mask1 & mask2; 173 return mask1 & mask2;
170} 174}
171 175
172static unsigned int phys_pkg_id(int index_msb) 176static int flat_phys_pkg_id(int initial_apic_id, int index_msb)
173{ 177{
174 return hard_smp_processor_id() >> index_msb; 178 return hard_smp_processor_id() >> index_msb;
175} 179}
176 180
177struct genapic apic_flat = { 181struct genapic apic_flat = {
178 .name = "flat", 182 .name = "flat",
179 .acpi_madt_oem_check = flat_acpi_madt_oem_check, 183 .probe = NULL,
180 .int_delivery_mode = dest_LowestPrio, 184 .acpi_madt_oem_check = flat_acpi_madt_oem_check,
181 .int_dest_mode = (APIC_DEST_LOGICAL != 0), 185 .apic_id_registered = flat_apic_id_registered,
182 .target_cpus = flat_target_cpus, 186
183 .vector_allocation_domain = flat_vector_allocation_domain, 187 .irq_delivery_mode = dest_LowestPrio,
184 .apic_id_registered = flat_apic_id_registered, 188 .irq_dest_mode = 1, /* logical */
185 .init_apic_ldr = flat_init_apic_ldr, 189
186 .send_IPI_all = flat_send_IPI_all, 190 .target_cpus = flat_target_cpus,
187 .send_IPI_allbutself = flat_send_IPI_allbutself, 191 .disable_esr = 0,
188 .send_IPI_mask = flat_send_IPI_mask, 192 .dest_logical = APIC_DEST_LOGICAL,
189 .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, 193 .check_apicid_used = NULL,
190 .send_IPI_self = apic_send_IPI_self, 194 .check_apicid_present = NULL,
191 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, 195
192 .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, 196 .vector_allocation_domain = flat_vector_allocation_domain,
193 .phys_pkg_id = phys_pkg_id, 197 .init_apic_ldr = flat_init_apic_ldr,
194 .get_apic_id = get_apic_id, 198
195 .set_apic_id = set_apic_id, 199 .ioapic_phys_id_map = NULL,
196 .apic_id_mask = (0xFFu<<24), 200 .setup_apic_routing = NULL,
201 .multi_timer_check = NULL,
202 .apicid_to_node = NULL,
203 .cpu_to_logical_apicid = NULL,
204 .cpu_present_to_apicid = default_cpu_present_to_apicid,
205 .apicid_to_cpu_present = NULL,
206 .setup_portio_remap = NULL,
207 .check_phys_apicid_present = default_check_phys_apicid_present,
208 .enable_apic_mode = NULL,
209 .phys_pkg_id = flat_phys_pkg_id,
210 .mps_oem_check = NULL,
211
212 .get_apic_id = flat_get_apic_id,
213 .set_apic_id = set_apic_id,
214 .apic_id_mask = 0xFFu << 24,
215
216 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
217 .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
218
219 .send_IPI_mask = flat_send_IPI_mask,
220 .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
221 .send_IPI_allbutself = flat_send_IPI_allbutself,
222 .send_IPI_all = flat_send_IPI_all,
223 .send_IPI_self = apic_send_IPI_self,
224
225 .wakeup_cpu = NULL,
226 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
227 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
228 .wait_for_init_deassert = NULL,
229 .smp_callin_clear_local_apic = NULL,
230 .store_NMI_vector = NULL,
231 .inquire_remote_apic = NULL,
197}; 232};
198 233
199/* 234/*
@@ -232,18 +267,18 @@ static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask)
232 267
233static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) 268static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
234{ 269{
235 send_IPI_mask_sequence(cpumask, vector); 270 default_send_IPI_mask_sequence_phys(cpumask, vector);
236} 271}
237 272
238static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask, 273static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
239 int vector) 274 int vector)
240{ 275{
241 send_IPI_mask_allbutself(cpumask, vector); 276 default_send_IPI_mask_allbutself_phys(cpumask, vector);
242} 277}
243 278
244static void physflat_send_IPI_allbutself(int vector) 279static void physflat_send_IPI_allbutself(int vector)
245{ 280{
246 send_IPI_mask_allbutself(cpu_online_mask, vector); 281 default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
247} 282}
248 283
249static void physflat_send_IPI_all(int vector) 284static void physflat_send_IPI_all(int vector)
@@ -276,32 +311,67 @@ physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
276 * We're using fixed IRQ delivery, can only return one phys APIC ID. 311 * We're using fixed IRQ delivery, can only return one phys APIC ID.
277 * May as well be the first. 312 * May as well be the first.
278 */ 313 */
279 for_each_cpu_and(cpu, cpumask, andmask) 314 for_each_cpu_and(cpu, cpumask, andmask) {
280 if (cpumask_test_cpu(cpu, cpu_online_mask)) 315 if (cpumask_test_cpu(cpu, cpu_online_mask))
281 break; 316 break;
317 }
282 if (cpu < nr_cpu_ids) 318 if (cpu < nr_cpu_ids)
283 return per_cpu(x86_cpu_to_apicid, cpu); 319 return per_cpu(x86_cpu_to_apicid, cpu);
320
284 return BAD_APICID; 321 return BAD_APICID;
285} 322}
286 323
287struct genapic apic_physflat = { 324struct genapic apic_physflat = {
288 .name = "physical flat", 325
289 .acpi_madt_oem_check = physflat_acpi_madt_oem_check, 326 .name = "physical flat",
290 .int_delivery_mode = dest_Fixed, 327 .probe = NULL,
291 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 328 .acpi_madt_oem_check = physflat_acpi_madt_oem_check,
292 .target_cpus = physflat_target_cpus, 329 .apic_id_registered = flat_apic_id_registered,
293 .vector_allocation_domain = physflat_vector_allocation_domain, 330
294 .apic_id_registered = flat_apic_id_registered, 331 .irq_delivery_mode = dest_Fixed,
295 .init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/ 332 .irq_dest_mode = 0, /* physical */
296 .send_IPI_all = physflat_send_IPI_all, 333
297 .send_IPI_allbutself = physflat_send_IPI_allbutself, 334 .target_cpus = physflat_target_cpus,
298 .send_IPI_mask = physflat_send_IPI_mask, 335 .disable_esr = 0,
299 .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, 336 .dest_logical = 0,
300 .send_IPI_self = apic_send_IPI_self, 337 .check_apicid_used = NULL,
301 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, 338 .check_apicid_present = NULL,
302 .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, 339
303 .phys_pkg_id = phys_pkg_id, 340 .vector_allocation_domain = physflat_vector_allocation_domain,
304 .get_apic_id = get_apic_id, 341 /* not needed, but shouldn't hurt: */
305 .set_apic_id = set_apic_id, 342 .init_apic_ldr = flat_init_apic_ldr,
306 .apic_id_mask = (0xFFu<<24), 343
344 .ioapic_phys_id_map = NULL,
345 .setup_apic_routing = NULL,
346 .multi_timer_check = NULL,
347 .apicid_to_node = NULL,
348 .cpu_to_logical_apicid = NULL,
349 .cpu_present_to_apicid = default_cpu_present_to_apicid,
350 .apicid_to_cpu_present = NULL,
351 .setup_portio_remap = NULL,
352 .check_phys_apicid_present = default_check_phys_apicid_present,
353 .enable_apic_mode = NULL,
354 .phys_pkg_id = flat_phys_pkg_id,
355 .mps_oem_check = NULL,
356
357 .get_apic_id = flat_get_apic_id,
358 .set_apic_id = set_apic_id,
359 .apic_id_mask = 0xFFu << 24,
360
361 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
362 .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and,
363
364 .send_IPI_mask = physflat_send_IPI_mask,
365 .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
366 .send_IPI_allbutself = physflat_send_IPI_allbutself,
367 .send_IPI_all = physflat_send_IPI_all,
368 .send_IPI_self = apic_send_IPI_self,
369
370 .wakeup_cpu = NULL,
371 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
372 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
373 .wait_for_init_deassert = NULL,
374 .smp_callin_clear_local_apic = NULL,
375 .store_NMI_vector = NULL,
376 .inquire_remote_apic = NULL,
307}; 377};
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
index 6ce497cc372d..7c87156b6411 100644
--- a/arch/x86/kernel/genx2apic_cluster.c
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -36,8 +36,8 @@ static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
36 cpumask_set_cpu(cpu, retmask); 36 cpumask_set_cpu(cpu, retmask);
37} 37}
38 38
39static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, 39static void
40 unsigned int dest) 40 __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest)
41{ 41{
42 unsigned long cfg; 42 unsigned long cfg;
43 43
@@ -57,45 +57,50 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
57 */ 57 */
58static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) 58static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
59{ 59{
60 unsigned long flags;
61 unsigned long query_cpu; 60 unsigned long query_cpu;
61 unsigned long flags;
62 62
63 local_irq_save(flags); 63 local_irq_save(flags);
64 for_each_cpu(query_cpu, mask) 64 for_each_cpu(query_cpu, mask) {
65 __x2apic_send_IPI_dest( 65 __x2apic_send_IPI_dest(
66 per_cpu(x86_cpu_to_logical_apicid, query_cpu), 66 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
67 vector, APIC_DEST_LOGICAL); 67 vector, apic->dest_logical);
68 }
68 local_irq_restore(flags); 69 local_irq_restore(flags);
69} 70}
70 71
71static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, 72static void
72 int vector) 73 x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
73{ 74{
74 unsigned long flags;
75 unsigned long query_cpu;
76 unsigned long this_cpu = smp_processor_id(); 75 unsigned long this_cpu = smp_processor_id();
76 unsigned long query_cpu;
77 unsigned long flags;
77 78
78 local_irq_save(flags); 79 local_irq_save(flags);
79 for_each_cpu(query_cpu, mask) 80 for_each_cpu(query_cpu, mask) {
80 if (query_cpu != this_cpu) 81 if (query_cpu == this_cpu)
81 __x2apic_send_IPI_dest( 82 continue;
83 __x2apic_send_IPI_dest(
82 per_cpu(x86_cpu_to_logical_apicid, query_cpu), 84 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
83 vector, APIC_DEST_LOGICAL); 85 vector, apic->dest_logical);
86 }
84 local_irq_restore(flags); 87 local_irq_restore(flags);
85} 88}
86 89
87static void x2apic_send_IPI_allbutself(int vector) 90static void x2apic_send_IPI_allbutself(int vector)
88{ 91{
89 unsigned long flags;
90 unsigned long query_cpu;
91 unsigned long this_cpu = smp_processor_id(); 92 unsigned long this_cpu = smp_processor_id();
93 unsigned long query_cpu;
94 unsigned long flags;
92 95
93 local_irq_save(flags); 96 local_irq_save(flags);
94 for_each_online_cpu(query_cpu) 97 for_each_online_cpu(query_cpu) {
95 if (query_cpu != this_cpu) 98 if (query_cpu == this_cpu)
96 __x2apic_send_IPI_dest( 99 continue;
100 __x2apic_send_IPI_dest(
97 per_cpu(x86_cpu_to_logical_apicid, query_cpu), 101 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
98 vector, APIC_DEST_LOGICAL); 102 vector, apic->dest_logical);
103 }
99 local_irq_restore(flags); 104 local_irq_restore(flags);
100} 105}
101 106
@@ -111,21 +116,21 @@ static int x2apic_apic_id_registered(void)
111 116
112static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) 117static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
113{ 118{
114 int cpu;
115
116 /* 119 /*
117 * We're using fixed IRQ delivery, can only return one logical APIC ID. 120 * We're using fixed IRQ delivery, can only return one logical APIC ID.
118 * May as well be the first. 121 * May as well be the first.
119 */ 122 */
120 cpu = cpumask_first(cpumask); 123 int cpu = cpumask_first(cpumask);
124
121 if ((unsigned)cpu < nr_cpu_ids) 125 if ((unsigned)cpu < nr_cpu_ids)
122 return per_cpu(x86_cpu_to_logical_apicid, cpu); 126 return per_cpu(x86_cpu_to_logical_apicid, cpu);
123 else 127 else
124 return BAD_APICID; 128 return BAD_APICID;
125} 129}
126 130
127static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, 131static unsigned int
128 const struct cpumask *andmask) 132x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
133 const struct cpumask *andmask)
129{ 134{
130 int cpu; 135 int cpu;
131 136
@@ -133,15 +138,18 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
133 * We're using fixed IRQ delivery, can only return one logical APIC ID. 138 * We're using fixed IRQ delivery, can only return one logical APIC ID.
134 * May as well be the first. 139 * May as well be the first.
135 */ 140 */
136 for_each_cpu_and(cpu, cpumask, andmask) 141 for_each_cpu_and(cpu, cpumask, andmask) {
137 if (cpumask_test_cpu(cpu, cpu_online_mask)) 142 if (cpumask_test_cpu(cpu, cpu_online_mask))
138 break; 143 break;
144 }
145
139 if (cpu < nr_cpu_ids) 146 if (cpu < nr_cpu_ids)
140 return per_cpu(x86_cpu_to_logical_apicid, cpu); 147 return per_cpu(x86_cpu_to_logical_apicid, cpu);
148
141 return BAD_APICID; 149 return BAD_APICID;
142} 150}
143 151
144static unsigned int get_apic_id(unsigned long x) 152static unsigned int x2apic_cluster_phys_get_apic_id(unsigned long x)
145{ 153{
146 unsigned int id; 154 unsigned int id;
147 155
@@ -157,7 +165,7 @@ static unsigned long set_apic_id(unsigned int id)
157 return x; 165 return x;
158} 166}
159 167
160static unsigned int phys_pkg_id(int index_msb) 168static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb)
161{ 169{
162 return current_cpu_data.initial_apicid >> index_msb; 170 return current_cpu_data.initial_apicid >> index_msb;
163} 171}
@@ -172,27 +180,58 @@ static void init_x2apic_ldr(void)
172 int cpu = smp_processor_id(); 180 int cpu = smp_processor_id();
173 181
174 per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR); 182 per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR);
175 return;
176} 183}
177 184
178struct genapic apic_x2apic_cluster = { 185struct genapic apic_x2apic_cluster = {
179 .name = "cluster x2apic", 186
180 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, 187 .name = "cluster x2apic",
181 .int_delivery_mode = dest_LowestPrio, 188 .probe = NULL,
182 .int_dest_mode = (APIC_DEST_LOGICAL != 0), 189 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
183 .target_cpus = x2apic_target_cpus, 190 .apic_id_registered = x2apic_apic_id_registered,
184 .vector_allocation_domain = x2apic_vector_allocation_domain, 191
185 .apic_id_registered = x2apic_apic_id_registered, 192 .irq_delivery_mode = dest_LowestPrio,
186 .init_apic_ldr = init_x2apic_ldr, 193 .irq_dest_mode = 1, /* logical */
187 .send_IPI_all = x2apic_send_IPI_all, 194
188 .send_IPI_allbutself = x2apic_send_IPI_allbutself, 195 .target_cpus = x2apic_target_cpus,
189 .send_IPI_mask = x2apic_send_IPI_mask, 196 .disable_esr = 0,
190 .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, 197 .dest_logical = APIC_DEST_LOGICAL,
191 .send_IPI_self = x2apic_send_IPI_self, 198 .check_apicid_used = NULL,
192 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, 199 .check_apicid_present = NULL,
193 .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, 200
194 .phys_pkg_id = phys_pkg_id, 201 .vector_allocation_domain = x2apic_vector_allocation_domain,
195 .get_apic_id = get_apic_id, 202 .init_apic_ldr = init_x2apic_ldr,
196 .set_apic_id = set_apic_id, 203
197 .apic_id_mask = (0xFFFFFFFFu), 204 .ioapic_phys_id_map = NULL,
205 .setup_apic_routing = NULL,
206 .multi_timer_check = NULL,
207 .apicid_to_node = NULL,
208 .cpu_to_logical_apicid = NULL,
209 .cpu_present_to_apicid = default_cpu_present_to_apicid,
210 .apicid_to_cpu_present = NULL,
211 .setup_portio_remap = NULL,
212 .check_phys_apicid_present = default_check_phys_apicid_present,
213 .enable_apic_mode = NULL,
214 .phys_pkg_id = x2apic_cluster_phys_pkg_id,
215 .mps_oem_check = NULL,
216
217 .get_apic_id = x2apic_cluster_phys_get_apic_id,
218 .set_apic_id = set_apic_id,
219 .apic_id_mask = 0xFFFFFFFFu,
220
221 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
222 .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
223
224 .send_IPI_mask = x2apic_send_IPI_mask,
225 .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
226 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
227 .send_IPI_all = x2apic_send_IPI_all,
228 .send_IPI_self = x2apic_send_IPI_self,
229
230 .wakeup_cpu = NULL,
231 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
232 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
233 .wait_for_init_deassert = NULL,
234 .smp_callin_clear_local_apic = NULL,
235 .store_NMI_vector = NULL,
236 .inquire_remote_apic = NULL,
198}; 237};
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
index 21bcc0e098ba..5cbae8aa0408 100644
--- a/arch/x86/kernel/genx2apic_phys.c
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -55,8 +55,8 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
55 55
56static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) 56static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
57{ 57{
58 unsigned long flags;
59 unsigned long query_cpu; 58 unsigned long query_cpu;
59 unsigned long flags;
60 60
61 local_irq_save(flags); 61 local_irq_save(flags);
62 for_each_cpu(query_cpu, mask) { 62 for_each_cpu(query_cpu, mask) {
@@ -66,12 +66,12 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
66 local_irq_restore(flags); 66 local_irq_restore(flags);
67} 67}
68 68
69static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, 69static void
70 int vector) 70 x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
71{ 71{
72 unsigned long flags;
73 unsigned long query_cpu;
74 unsigned long this_cpu = smp_processor_id(); 72 unsigned long this_cpu = smp_processor_id();
73 unsigned long query_cpu;
74 unsigned long flags;
75 75
76 local_irq_save(flags); 76 local_irq_save(flags);
77 for_each_cpu(query_cpu, mask) { 77 for_each_cpu(query_cpu, mask) {
@@ -85,16 +85,17 @@ static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
85 85
86static void x2apic_send_IPI_allbutself(int vector) 86static void x2apic_send_IPI_allbutself(int vector)
87{ 87{
88 unsigned long flags;
89 unsigned long query_cpu;
90 unsigned long this_cpu = smp_processor_id(); 88 unsigned long this_cpu = smp_processor_id();
89 unsigned long query_cpu;
90 unsigned long flags;
91 91
92 local_irq_save(flags); 92 local_irq_save(flags);
93 for_each_online_cpu(query_cpu) 93 for_each_online_cpu(query_cpu) {
94 if (query_cpu != this_cpu) 94 if (query_cpu == this_cpu)
95 __x2apic_send_IPI_dest( 95 continue;
96 per_cpu(x86_cpu_to_apicid, query_cpu), 96 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
97 vector, APIC_DEST_PHYSICAL); 97 vector, APIC_DEST_PHYSICAL);
98 }
98 local_irq_restore(flags); 99 local_irq_restore(flags);
99} 100}
100 101
@@ -110,21 +111,21 @@ static int x2apic_apic_id_registered(void)
110 111
111static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) 112static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
112{ 113{
113 int cpu;
114
115 /* 114 /*
116 * We're using fixed IRQ delivery, can only return one phys APIC ID. 115 * We're using fixed IRQ delivery, can only return one phys APIC ID.
117 * May as well be the first. 116 * May as well be the first.
118 */ 117 */
119 cpu = cpumask_first(cpumask); 118 int cpu = cpumask_first(cpumask);
119
120 if ((unsigned)cpu < nr_cpu_ids) 120 if ((unsigned)cpu < nr_cpu_ids)
121 return per_cpu(x86_cpu_to_apicid, cpu); 121 return per_cpu(x86_cpu_to_apicid, cpu);
122 else 122 else
123 return BAD_APICID; 123 return BAD_APICID;
124} 124}
125 125
126static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, 126static unsigned int
127 const struct cpumask *andmask) 127x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
128 const struct cpumask *andmask)
128{ 129{
129 int cpu; 130 int cpu;
130 131
@@ -132,31 +133,28 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
132 * We're using fixed IRQ delivery, can only return one phys APIC ID. 133 * We're using fixed IRQ delivery, can only return one phys APIC ID.
133 * May as well be the first. 134 * May as well be the first.
134 */ 135 */
135 for_each_cpu_and(cpu, cpumask, andmask) 136 for_each_cpu_and(cpu, cpumask, andmask) {
136 if (cpumask_test_cpu(cpu, cpu_online_mask)) 137 if (cpumask_test_cpu(cpu, cpu_online_mask))
137 break; 138 break;
139 }
140
138 if (cpu < nr_cpu_ids) 141 if (cpu < nr_cpu_ids)
139 return per_cpu(x86_cpu_to_apicid, cpu); 142 return per_cpu(x86_cpu_to_apicid, cpu);
143
140 return BAD_APICID; 144 return BAD_APICID;
141} 145}
142 146
143static unsigned int get_apic_id(unsigned long x) 147static unsigned int x2apic_phys_get_apic_id(unsigned long x)
144{ 148{
145 unsigned int id; 149 return x;
146
147 id = x;
148 return id;
149} 150}
150 151
151static unsigned long set_apic_id(unsigned int id) 152static unsigned long set_apic_id(unsigned int id)
152{ 153{
153 unsigned long x; 154 return id;
154
155 x = id;
156 return x;
157} 155}
158 156
159static unsigned int phys_pkg_id(int index_msb) 157static int x2apic_phys_pkg_id(int initial_apicid, int index_msb)
160{ 158{
161 return current_cpu_data.initial_apicid >> index_msb; 159 return current_cpu_data.initial_apicid >> index_msb;
162} 160}
@@ -168,27 +166,58 @@ static void x2apic_send_IPI_self(int vector)
168 166
169static void init_x2apic_ldr(void) 167static void init_x2apic_ldr(void)
170{ 168{
171 return;
172} 169}
173 170
174struct genapic apic_x2apic_phys = { 171struct genapic apic_x2apic_phys = {
175 .name = "physical x2apic", 172
176 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, 173 .name = "physical x2apic",
177 .int_delivery_mode = dest_Fixed, 174 .probe = NULL,
178 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 175 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
179 .target_cpus = x2apic_target_cpus, 176 .apic_id_registered = x2apic_apic_id_registered,
180 .vector_allocation_domain = x2apic_vector_allocation_domain, 177
181 .apic_id_registered = x2apic_apic_id_registered, 178 .irq_delivery_mode = dest_Fixed,
182 .init_apic_ldr = init_x2apic_ldr, 179 .irq_dest_mode = 0, /* physical */
183 .send_IPI_all = x2apic_send_IPI_all, 180
184 .send_IPI_allbutself = x2apic_send_IPI_allbutself, 181 .target_cpus = x2apic_target_cpus,
185 .send_IPI_mask = x2apic_send_IPI_mask, 182 .disable_esr = 0,
186 .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, 183 .dest_logical = 0,
187 .send_IPI_self = x2apic_send_IPI_self, 184 .check_apicid_used = NULL,
188 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, 185 .check_apicid_present = NULL,
189 .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, 186
190 .phys_pkg_id = phys_pkg_id, 187 .vector_allocation_domain = x2apic_vector_allocation_domain,
191 .get_apic_id = get_apic_id, 188 .init_apic_ldr = init_x2apic_ldr,
192 .set_apic_id = set_apic_id, 189
193 .apic_id_mask = (0xFFFFFFFFu), 190 .ioapic_phys_id_map = NULL,
191 .setup_apic_routing = NULL,
192 .multi_timer_check = NULL,
193 .apicid_to_node = NULL,
194 .cpu_to_logical_apicid = NULL,
195 .cpu_present_to_apicid = default_cpu_present_to_apicid,
196 .apicid_to_cpu_present = NULL,
197 .setup_portio_remap = NULL,
198 .check_phys_apicid_present = default_check_phys_apicid_present,
199 .enable_apic_mode = NULL,
200 .phys_pkg_id = x2apic_phys_pkg_id,
201 .mps_oem_check = NULL,
202
203 .get_apic_id = x2apic_phys_get_apic_id,
204 .set_apic_id = set_apic_id,
205 .apic_id_mask = 0xFFFFFFFFu,
206
207 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
208 .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
209
210 .send_IPI_mask = x2apic_send_IPI_mask,
211 .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
212 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
213 .send_IPI_all = x2apic_send_IPI_all,
214 .send_IPI_self = x2apic_send_IPI_self,
215
216 .wakeup_cpu = NULL,
217 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
218 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
219 .wait_for_init_deassert = NULL,
220 .smp_callin_clear_local_apic = NULL,
221 .store_NMI_vector = NULL,
222 .inquire_remote_apic = NULL,
194}; 223};
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index bfe36249145c..6adb5e6f4d92 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -118,12 +118,13 @@ static void uv_send_IPI_one(int cpu, int vector)
118 int pnode; 118 int pnode;
119 119
120 apicid = per_cpu(x86_cpu_to_apicid, cpu); 120 apicid = per_cpu(x86_cpu_to_apicid, cpu);
121 lapicid = apicid & 0x3f; /* ZZZ macro needed */ 121 lapicid = apicid & 0x3f; /* ZZZ macro needed */
122 pnode = uv_apicid_to_pnode(apicid); 122 pnode = uv_apicid_to_pnode(apicid);
123 val = 123
124 (1UL << UVH_IPI_INT_SEND_SHFT) | (lapicid << 124 val = ( 1UL << UVH_IPI_INT_SEND_SHFT ) |
125 UVH_IPI_INT_APIC_ID_SHFT) | 125 ( lapicid << UVH_IPI_INT_APIC_ID_SHFT ) |
126 (vector << UVH_IPI_INT_VECTOR_SHFT); 126 ( vector << UVH_IPI_INT_VECTOR_SHFT );
127
127 uv_write_global_mmr64(pnode, UVH_IPI_INT, val); 128 uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
128} 129}
129 130
@@ -137,22 +138,24 @@ static void uv_send_IPI_mask(const struct cpumask *mask, int vector)
137 138
138static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) 139static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
139{ 140{
140 unsigned int cpu;
141 unsigned int this_cpu = smp_processor_id(); 141 unsigned int this_cpu = smp_processor_id();
142 unsigned int cpu;
142 143
143 for_each_cpu(cpu, mask) 144 for_each_cpu(cpu, mask) {
144 if (cpu != this_cpu) 145 if (cpu != this_cpu)
145 uv_send_IPI_one(cpu, vector); 146 uv_send_IPI_one(cpu, vector);
147 }
146} 148}
147 149
148static void uv_send_IPI_allbutself(int vector) 150static void uv_send_IPI_allbutself(int vector)
149{ 151{
150 unsigned int cpu;
151 unsigned int this_cpu = smp_processor_id(); 152 unsigned int this_cpu = smp_processor_id();
153 unsigned int cpu;
152 154
153 for_each_online_cpu(cpu) 155 for_each_online_cpu(cpu) {
154 if (cpu != this_cpu) 156 if (cpu != this_cpu)
155 uv_send_IPI_one(cpu, vector); 157 uv_send_IPI_one(cpu, vector);
158 }
156} 159}
157 160
158static void uv_send_IPI_all(int vector) 161static void uv_send_IPI_all(int vector)
@@ -171,21 +174,21 @@ static void uv_init_apic_ldr(void)
171 174
172static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) 175static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
173{ 176{
174 int cpu;
175
176 /* 177 /*
177 * We're using fixed IRQ delivery, can only return one phys APIC ID. 178 * We're using fixed IRQ delivery, can only return one phys APIC ID.
178 * May as well be the first. 179 * May as well be the first.
179 */ 180 */
180 cpu = cpumask_first(cpumask); 181 int cpu = cpumask_first(cpumask);
182
181 if ((unsigned)cpu < nr_cpu_ids) 183 if ((unsigned)cpu < nr_cpu_ids)
182 return per_cpu(x86_cpu_to_apicid, cpu); 184 return per_cpu(x86_cpu_to_apicid, cpu);
183 else 185 else
184 return BAD_APICID; 186 return BAD_APICID;
185} 187}
186 188
187static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, 189static unsigned int
188 const struct cpumask *andmask) 190uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
191 const struct cpumask *andmask)
189{ 192{
190 int cpu; 193 int cpu;
191 194
@@ -193,15 +196,17 @@ static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
193 * We're using fixed IRQ delivery, can only return one phys APIC ID. 196 * We're using fixed IRQ delivery, can only return one phys APIC ID.
194 * May as well be the first. 197 * May as well be the first.
195 */ 198 */
196 for_each_cpu_and(cpu, cpumask, andmask) 199 for_each_cpu_and(cpu, cpumask, andmask) {
197 if (cpumask_test_cpu(cpu, cpu_online_mask)) 200 if (cpumask_test_cpu(cpu, cpu_online_mask))
198 break; 201 break;
202 }
199 if (cpu < nr_cpu_ids) 203 if (cpu < nr_cpu_ids)
200 return per_cpu(x86_cpu_to_apicid, cpu); 204 return per_cpu(x86_cpu_to_apicid, cpu);
205
201 return BAD_APICID; 206 return BAD_APICID;
202} 207}
203 208
204static unsigned int get_apic_id(unsigned long x) 209static unsigned int x2apic_get_apic_id(unsigned long x)
205{ 210{
206 unsigned int id; 211 unsigned int id;
207 212
@@ -223,10 +228,10 @@ static unsigned long set_apic_id(unsigned int id)
223static unsigned int uv_read_apic_id(void) 228static unsigned int uv_read_apic_id(void)
224{ 229{
225 230
226 return get_apic_id(apic_read(APIC_ID)); 231 return x2apic_get_apic_id(apic_read(APIC_ID));
227} 232}
228 233
229static unsigned int phys_pkg_id(int index_msb) 234static int uv_phys_pkg_id(int initial_apicid, int index_msb)
230{ 235{
231 return uv_read_apic_id() >> index_msb; 236 return uv_read_apic_id() >> index_msb;
232} 237}
@@ -237,25 +242,57 @@ static void uv_send_IPI_self(int vector)
237} 242}
238 243
239struct genapic apic_x2apic_uv_x = { 244struct genapic apic_x2apic_uv_x = {
240 .name = "UV large system", 245
241 .acpi_madt_oem_check = uv_acpi_madt_oem_check, 246 .name = "UV large system",
242 .int_delivery_mode = dest_Fixed, 247 .probe = NULL,
243 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 248 .acpi_madt_oem_check = uv_acpi_madt_oem_check,
244 .target_cpus = uv_target_cpus, 249 .apic_id_registered = uv_apic_id_registered,
245 .vector_allocation_domain = uv_vector_allocation_domain, 250
246 .apic_id_registered = uv_apic_id_registered, 251 .irq_delivery_mode = dest_Fixed,
247 .init_apic_ldr = uv_init_apic_ldr, 252 .irq_dest_mode = 1, /* logical */
248 .send_IPI_all = uv_send_IPI_all, 253
249 .send_IPI_allbutself = uv_send_IPI_allbutself, 254 .target_cpus = uv_target_cpus,
250 .send_IPI_mask = uv_send_IPI_mask, 255 .disable_esr = 0,
251 .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself, 256 .dest_logical = APIC_DEST_LOGICAL,
252 .send_IPI_self = uv_send_IPI_self, 257 .check_apicid_used = NULL,
253 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, 258 .check_apicid_present = NULL,
254 .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, 259
255 .phys_pkg_id = phys_pkg_id, 260 .vector_allocation_domain = uv_vector_allocation_domain,
256 .get_apic_id = get_apic_id, 261 .init_apic_ldr = uv_init_apic_ldr,
257 .set_apic_id = set_apic_id, 262
258 .apic_id_mask = (0xFFFFFFFFu), 263 .ioapic_phys_id_map = NULL,
264 .setup_apic_routing = NULL,
265 .multi_timer_check = NULL,
266 .apicid_to_node = NULL,
267 .cpu_to_logical_apicid = NULL,
268 .cpu_present_to_apicid = default_cpu_present_to_apicid,
269 .apicid_to_cpu_present = NULL,
270 .setup_portio_remap = NULL,
271 .check_phys_apicid_present = default_check_phys_apicid_present,
272 .enable_apic_mode = NULL,
273 .phys_pkg_id = uv_phys_pkg_id,
274 .mps_oem_check = NULL,
275
276 .get_apic_id = x2apic_get_apic_id,
277 .set_apic_id = set_apic_id,
278 .apic_id_mask = 0xFFFFFFFFu,
279
280 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
281 .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
282
283 .send_IPI_mask = uv_send_IPI_mask,
284 .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
285 .send_IPI_allbutself = uv_send_IPI_allbutself,
286 .send_IPI_all = uv_send_IPI_all,
287 .send_IPI_self = uv_send_IPI_self,
288
289 .wakeup_cpu = NULL,
290 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
291 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
292 .wait_for_init_deassert = NULL,
293 .smp_callin_clear_local_apic = NULL,
294 .store_NMI_vector = NULL,
295 .inquire_remote_apic = NULL,
259}; 296};
260 297
261static __cpuinit void set_x2apic_extra_bits(int pnode) 298static __cpuinit void set_x2apic_extra_bits(int pnode)
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index 24c0e5cd71e3..2a0aad7718d5 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -19,6 +19,7 @@
19#include <asm/asm-offsets.h> 19#include <asm/asm-offsets.h>
20#include <asm/setup.h> 20#include <asm/setup.h>
21#include <asm/processor-flags.h> 21#include <asm/processor-flags.h>
22#include <asm/percpu.h>
22 23
23/* Physical address */ 24/* Physical address */
24#define pa(X) ((X) - __PAGE_OFFSET) 25#define pa(X) ((X) - __PAGE_OFFSET)
@@ -437,8 +438,26 @@ is386: movl $2,%ecx # set MP
437 movl $(__KERNEL_PERCPU), %eax 438 movl $(__KERNEL_PERCPU), %eax
438 movl %eax,%fs # set this cpu's percpu 439 movl %eax,%fs # set this cpu's percpu
439 440
440 xorl %eax,%eax # Clear GS and LDT 441#ifdef CONFIG_CC_STACKPROTECTOR
442 /*
443 * The linker can't handle this by relocation. Manually set
444 * base address in stack canary segment descriptor.
445 */
446 cmpb $0,ready
447 jne 1f
448 movl $per_cpu__gdt_page,%eax
449 movl $per_cpu__stack_canary,%ecx
450 subl $20, %ecx
451 movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
452 shrl $16, %ecx
453 movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
454 movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax)
4551:
456#endif
457 movl $(__KERNEL_STACK_CANARY),%eax
441 movl %eax,%gs 458 movl %eax,%gs
459
460 xorl %eax,%eax # Clear LDT
442 lldt %ax 461 lldt %ax
443 462
444 cld # gcc2 wants the direction flag cleared at all times 463 cld # gcc2 wants the direction flag cleared at all times
@@ -548,12 +567,8 @@ early_fault:
548 pushl %eax 567 pushl %eax
549 pushl %edx /* trapno */ 568 pushl %edx /* trapno */
550 pushl $fault_msg 569 pushl $fault_msg
551#ifdef CONFIG_EARLY_PRINTK
552 call early_printk
553#else
554 call printk 570 call printk
555#endif 571#endif
556#endif
557 call dump_stack 572 call dump_stack
558hlt_loop: 573hlt_loop:
559 hlt 574 hlt
@@ -580,11 +595,10 @@ ignore_int:
580 pushl 32(%esp) 595 pushl 32(%esp)
581 pushl 40(%esp) 596 pushl 40(%esp)
582 pushl $int_msg 597 pushl $int_msg
583#ifdef CONFIG_EARLY_PRINTK
584 call early_printk
585#else
586 call printk 598 call printk
587#endif 599
600 call dump_stack
601
588 addl $(5*4),%esp 602 addl $(5*4),%esp
589 popl %ds 603 popl %ds
590 popl %es 604 popl %es
@@ -660,7 +674,7 @@ early_recursion_flag:
660 .long 0 674 .long 0
661 675
662int_msg: 676int_msg:
663 .asciz "Unknown interrupt or fault at EIP %p %p %p\n" 677 .asciz "Unknown interrupt or fault at: %p %p %p\n"
664 678
665fault_msg: 679fault_msg:
666/* fault info: */ 680/* fault info: */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index a0a2b5ca9b7d..2e648e3a5ea4 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -205,19 +205,6 @@ ENTRY(secondary_startup_64)
205 pushq $0 205 pushq $0
206 popfq 206 popfq
207 207
208#ifdef CONFIG_SMP
209 /*
210 * Fix up static pointers that need __per_cpu_load added. The assembler
211 * is unable to do this directly. This is only needed for the boot cpu.
212 * These values are set up with the correct base addresses by C code for
213 * secondary cpus.
214 */
215 movq initial_gs(%rip), %rax
216 cmpl $0, per_cpu__cpu_number(%rax)
217 jne 1f
218 addq %rax, early_gdt_descr_base(%rip)
2191:
220#endif
221 /* 208 /*
222 * We must switch to a new descriptor in kernel space for the GDT 209 * We must switch to a new descriptor in kernel space for the GDT
223 * because soon the kernel won't have access anymore to the userspace 210 * because soon the kernel won't have access anymore to the userspace
@@ -275,11 +262,7 @@ ENTRY(secondary_startup_64)
275 ENTRY(initial_code) 262 ENTRY(initial_code)
276 .quad x86_64_start_kernel 263 .quad x86_64_start_kernel
277 ENTRY(initial_gs) 264 ENTRY(initial_gs)
278#ifdef CONFIG_SMP 265 .quad INIT_PER_CPU_VAR(irq_stack_union)
279 .quad __per_cpu_load
280#else
281 .quad PER_CPU_VAR(irq_stack_union)
282#endif
283 __FINITDATA 266 __FINITDATA
284 267
285 ENTRY(stack_start) 268 ENTRY(stack_start)
@@ -425,7 +408,7 @@ NEXT_PAGE(level2_spare_pgt)
425early_gdt_descr: 408early_gdt_descr:
426 .word GDT_ENTRIES*8-1 409 .word GDT_ENTRIES*8-1
427early_gdt_descr_base: 410early_gdt_descr_base:
428 .quad per_cpu__gdt_page 411 .quad INIT_PER_CPU_VAR(gdt_page)
429 412
430ENTRY(phys_base) 413ENTRY(phys_base)
431 /* This must match the first entry in level2_kernel_pgt */ 414 /* This must match the first entry in level2_kernel_pgt */
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 388254f69a2a..a00545fe5cdd 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -269,6 +269,8 @@ static void hpet_set_mode(enum clock_event_mode mode,
269 now = hpet_readl(HPET_COUNTER); 269 now = hpet_readl(HPET_COUNTER);
270 cmp = now + (unsigned long) delta; 270 cmp = now + (unsigned long) delta;
271 cfg = hpet_readl(HPET_Tn_CFG(timer)); 271 cfg = hpet_readl(HPET_Tn_CFG(timer));
272 /* Make sure we use edge triggered interrupts */
273 cfg &= ~HPET_TN_LEVEL;
272 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | 274 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
273 HPET_TN_SETVAL | HPET_TN_32BIT; 275 HPET_TN_SETVAL | HPET_TN_32BIT;
274 hpet_writel(cfg, HPET_Tn_CFG(timer)); 276 hpet_writel(cfg, HPET_Tn_CFG(timer));
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index f61d945620b3..7248ca11bdcd 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Intel IO-APIC support for multi-Pentium hosts. 2 * Intel IO-APIC support for multi-Pentium hosts.
3 * 3 *
4 * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo 4 * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
5 * 5 *
6 * Many thanks to Stig Venaas for trying out countless experimental 6 * Many thanks to Stig Venaas for trying out countless experimental
7 * patches and reporting/debugging problems patiently! 7 * patches and reporting/debugging problems patiently!
@@ -62,9 +62,7 @@
62#include <asm/uv/uv_hub.h> 62#include <asm/uv/uv_hub.h>
63#include <asm/uv/uv_irq.h> 63#include <asm/uv/uv_irq.h>
64 64
65#include <mach_ipi.h> 65#include <asm/genapic.h>
66#include <mach_apic.h>
67#include <mach_apicdef.h>
68 66
69#define __apicdebuginit(type) static type __init 67#define __apicdebuginit(type) static type __init
70 68
@@ -100,10 +98,19 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
100 98
101int skip_ioapic_setup; 99int skip_ioapic_setup;
102 100
101void arch_disable_smp_support(void)
102{
103#ifdef CONFIG_PCI
104 noioapicquirk = 1;
105 noioapicreroute = -1;
106#endif
107 skip_ioapic_setup = 1;
108}
109
103static int __init parse_noapic(char *str) 110static int __init parse_noapic(char *str)
104{ 111{
105 /* disable IO-APIC */ 112 /* disable IO-APIC */
106 disable_ioapic_setup(); 113 arch_disable_smp_support();
107 return 0; 114 return 0;
108} 115}
109early_param("noapic", parse_noapic); 116early_param("noapic", parse_noapic);
@@ -479,7 +486,7 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
479 io_apic_write(apic, 0x10 + 2*pin, eu.w1); 486 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
480} 487}
481 488
482static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) 489void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
483{ 490{
484 unsigned long flags; 491 unsigned long flags;
485 spin_lock_irqsave(&ioapic_lock, flags); 492 spin_lock_irqsave(&ioapic_lock, flags);
@@ -514,11 +521,11 @@ static void send_cleanup_vector(struct irq_cfg *cfg)
514 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) 521 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
515 cfg->move_cleanup_count++; 522 cfg->move_cleanup_count++;
516 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) 523 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
517 send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); 524 apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
518 } else { 525 } else {
519 cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); 526 cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
520 cfg->move_cleanup_count = cpumask_weight(cleanup_mask); 527 cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
521 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); 528 apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
522 free_cpumask_var(cleanup_mask); 529 free_cpumask_var(cleanup_mask);
523 } 530 }
524 cfg->move_in_progress = 0; 531 cfg->move_in_progress = 0;
@@ -563,8 +570,9 @@ static int
563assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); 570assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
564 571
565/* 572/*
566 * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid 573 * Either sets desc->affinity to a valid value, and returns
567 * of that, or returns BAD_APICID and leaves desc->affinity untouched. 574 * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
575 * leaves desc->affinity untouched.
568 */ 576 */
569static unsigned int 577static unsigned int
570set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) 578set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
@@ -582,7 +590,8 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
582 590
583 cpumask_and(desc->affinity, cfg->domain, mask); 591 cpumask_and(desc->affinity, cfg->domain, mask);
584 set_extra_move_desc(desc, mask); 592 set_extra_move_desc(desc, mask);
585 return cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask); 593
594 return apic->cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask);
586} 595}
587 596
588static void 597static void
@@ -797,23 +806,6 @@ static void clear_IO_APIC (void)
797 clear_IO_APIC_pin(apic, pin); 806 clear_IO_APIC_pin(apic, pin);
798} 807}
799 808
800#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32)
801void send_IPI_self(int vector)
802{
803 unsigned int cfg;
804
805 /*
806 * Wait for idle.
807 */
808 apic_wait_icr_idle();
809 cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
810 /*
811 * Send the IPI. The write to APIC_ICR fires this off.
812 */
813 apic_write(APIC_ICR, cfg);
814}
815#endif /* !CONFIG_SMP && CONFIG_X86_32*/
816
817#ifdef CONFIG_X86_32 809#ifdef CONFIG_X86_32
818/* 810/*
819 * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to 811 * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
@@ -1316,7 +1308,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1316 int new_cpu; 1308 int new_cpu;
1317 int vector, offset; 1309 int vector, offset;
1318 1310
1319 vector_allocation_domain(cpu, tmp_mask); 1311 apic->vector_allocation_domain(cpu, tmp_mask);
1320 1312
1321 vector = current_vector; 1313 vector = current_vector;
1322 offset = current_offset; 1314 offset = current_offset;
@@ -1486,10 +1478,10 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t
1486 handle_edge_irq, "edge"); 1478 handle_edge_irq, "edge");
1487} 1479}
1488 1480
1489static int setup_ioapic_entry(int apic, int irq, 1481int setup_ioapic_entry(int apic_id, int irq,
1490 struct IO_APIC_route_entry *entry, 1482 struct IO_APIC_route_entry *entry,
1491 unsigned int destination, int trigger, 1483 unsigned int destination, int trigger,
1492 int polarity, int vector) 1484 int polarity, int vector)
1493{ 1485{
1494 /* 1486 /*
1495 * add it to the IO-APIC irq-routing table: 1487 * add it to the IO-APIC irq-routing table:
@@ -1498,25 +1490,25 @@ static int setup_ioapic_entry(int apic, int irq,
1498 1490
1499#ifdef CONFIG_INTR_REMAP 1491#ifdef CONFIG_INTR_REMAP
1500 if (intr_remapping_enabled) { 1492 if (intr_remapping_enabled) {
1501 struct intel_iommu *iommu = map_ioapic_to_ir(apic); 1493 struct intel_iommu *iommu = map_ioapic_to_ir(apic_id);
1502 struct irte irte; 1494 struct irte irte;
1503 struct IR_IO_APIC_route_entry *ir_entry = 1495 struct IR_IO_APIC_route_entry *ir_entry =
1504 (struct IR_IO_APIC_route_entry *) entry; 1496 (struct IR_IO_APIC_route_entry *) entry;
1505 int index; 1497 int index;
1506 1498
1507 if (!iommu) 1499 if (!iommu)
1508 panic("No mapping iommu for ioapic %d\n", apic); 1500 panic("No mapping iommu for ioapic %d\n", apic_id);
1509 1501
1510 index = alloc_irte(iommu, irq, 1); 1502 index = alloc_irte(iommu, irq, 1);
1511 if (index < 0) 1503 if (index < 0)
1512 panic("Failed to allocate IRTE for ioapic %d\n", apic); 1504 panic("Failed to allocate IRTE for ioapic %d\n", apic_id);
1513 1505
1514 memset(&irte, 0, sizeof(irte)); 1506 memset(&irte, 0, sizeof(irte));
1515 1507
1516 irte.present = 1; 1508 irte.present = 1;
1517 irte.dst_mode = INT_DEST_MODE; 1509 irte.dst_mode = apic->irq_dest_mode;
1518 irte.trigger_mode = trigger; 1510 irte.trigger_mode = trigger;
1519 irte.dlvry_mode = INT_DELIVERY_MODE; 1511 irte.dlvry_mode = apic->irq_delivery_mode;
1520 irte.vector = vector; 1512 irte.vector = vector;
1521 irte.dest_id = IRTE_DEST(destination); 1513 irte.dest_id = IRTE_DEST(destination);
1522 1514
@@ -1529,8 +1521,8 @@ static int setup_ioapic_entry(int apic, int irq,
1529 } else 1521 } else
1530#endif 1522#endif
1531 { 1523 {
1532 entry->delivery_mode = INT_DELIVERY_MODE; 1524 entry->delivery_mode = apic->irq_delivery_mode;
1533 entry->dest_mode = INT_DEST_MODE; 1525 entry->dest_mode = apic->irq_dest_mode;
1534 entry->dest = destination; 1526 entry->dest = destination;
1535 } 1527 }
1536 1528
@@ -1547,7 +1539,7 @@ static int setup_ioapic_entry(int apic, int irq,
1547 return 0; 1539 return 0;
1548} 1540}
1549 1541
1550static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc, 1542static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq_desc *desc,
1551 int trigger, int polarity) 1543 int trigger, int polarity)
1552{ 1544{
1553 struct irq_cfg *cfg; 1545 struct irq_cfg *cfg;
@@ -1559,22 +1551,22 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
1559 1551
1560 cfg = desc->chip_data; 1552 cfg = desc->chip_data;
1561 1553
1562 if (assign_irq_vector(irq, cfg, TARGET_CPUS)) 1554 if (assign_irq_vector(irq, cfg, apic->target_cpus()))
1563 return; 1555 return;
1564 1556
1565 dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); 1557 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
1566 1558
1567 apic_printk(APIC_VERBOSE,KERN_DEBUG 1559 apic_printk(APIC_VERBOSE,KERN_DEBUG
1568 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " 1560 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
1569 "IRQ %d Mode:%i Active:%i)\n", 1561 "IRQ %d Mode:%i Active:%i)\n",
1570 apic, mp_ioapics[apic].apicid, pin, cfg->vector, 1562 apic_id, mp_ioapics[apic_id].apicid, pin, cfg->vector,
1571 irq, trigger, polarity); 1563 irq, trigger, polarity);
1572 1564
1573 1565
1574 if (setup_ioapic_entry(mp_ioapics[apic].apicid, irq, &entry, 1566 if (setup_ioapic_entry(mp_ioapics[apic_id].apicid, irq, &entry,
1575 dest, trigger, polarity, cfg->vector)) { 1567 dest, trigger, polarity, cfg->vector)) {
1576 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", 1568 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
1577 mp_ioapics[apic].apicid, pin); 1569 mp_ioapics[apic_id].apicid, pin);
1578 __clear_irq_vector(irq, cfg); 1570 __clear_irq_vector(irq, cfg);
1579 return; 1571 return;
1580 } 1572 }
@@ -1583,12 +1575,12 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
1583 if (irq < NR_IRQS_LEGACY) 1575 if (irq < NR_IRQS_LEGACY)
1584 disable_8259A_irq(irq); 1576 disable_8259A_irq(irq);
1585 1577
1586 ioapic_write_entry(apic, pin, entry); 1578 ioapic_write_entry(apic_id, pin, entry);
1587} 1579}
1588 1580
1589static void __init setup_IO_APIC_irqs(void) 1581static void __init setup_IO_APIC_irqs(void)
1590{ 1582{
1591 int apic, pin, idx, irq; 1583 int apic_id, pin, idx, irq;
1592 int notcon = 0; 1584 int notcon = 0;
1593 struct irq_desc *desc; 1585 struct irq_desc *desc;
1594 struct irq_cfg *cfg; 1586 struct irq_cfg *cfg;
@@ -1596,19 +1588,19 @@ static void __init setup_IO_APIC_irqs(void)
1596 1588
1597 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); 1589 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1598 1590
1599 for (apic = 0; apic < nr_ioapics; apic++) { 1591 for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
1600 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { 1592 for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
1601 1593
1602 idx = find_irq_entry(apic, pin, mp_INT); 1594 idx = find_irq_entry(apic_id, pin, mp_INT);
1603 if (idx == -1) { 1595 if (idx == -1) {
1604 if (!notcon) { 1596 if (!notcon) {
1605 notcon = 1; 1597 notcon = 1;
1606 apic_printk(APIC_VERBOSE, 1598 apic_printk(APIC_VERBOSE,
1607 KERN_DEBUG " %d-%d", 1599 KERN_DEBUG " %d-%d",
1608 mp_ioapics[apic].apicid, pin); 1600 mp_ioapics[apic_id].apicid, pin);
1609 } else 1601 } else
1610 apic_printk(APIC_VERBOSE, " %d-%d", 1602 apic_printk(APIC_VERBOSE, " %d-%d",
1611 mp_ioapics[apic].apicid, pin); 1603 mp_ioapics[apic_id].apicid, pin);
1612 continue; 1604 continue;
1613 } 1605 }
1614 if (notcon) { 1606 if (notcon) {
@@ -1617,20 +1609,25 @@ static void __init setup_IO_APIC_irqs(void)
1617 notcon = 0; 1609 notcon = 0;
1618 } 1610 }
1619 1611
1620 irq = pin_2_irq(idx, apic, pin); 1612 irq = pin_2_irq(idx, apic_id, pin);
1621#ifdef CONFIG_X86_32 1613
1622 if (multi_timer_check(apic, irq)) 1614 /*
1615 * Skip the timer IRQ if there's a quirk handler
1616 * installed and if it returns 1:
1617 */
1618 if (apic->multi_timer_check &&
1619 apic->multi_timer_check(apic_id, irq))
1623 continue; 1620 continue;
1624#endif 1621
1625 desc = irq_to_desc_alloc_cpu(irq, cpu); 1622 desc = irq_to_desc_alloc_cpu(irq, cpu);
1626 if (!desc) { 1623 if (!desc) {
1627 printk(KERN_INFO "can not get irq_desc for %d\n", irq); 1624 printk(KERN_INFO "can not get irq_desc for %d\n", irq);
1628 continue; 1625 continue;
1629 } 1626 }
1630 cfg = desc->chip_data; 1627 cfg = desc->chip_data;
1631 add_pin_to_irq_cpu(cfg, cpu, apic, pin); 1628 add_pin_to_irq_cpu(cfg, cpu, apic_id, pin);
1632 1629
1633 setup_IO_APIC_irq(apic, pin, irq, desc, 1630 setup_IO_APIC_irq(apic_id, pin, irq, desc,
1634 irq_trigger(idx), irq_polarity(idx)); 1631 irq_trigger(idx), irq_polarity(idx));
1635 } 1632 }
1636 } 1633 }
@@ -1643,7 +1640,7 @@ static void __init setup_IO_APIC_irqs(void)
1643/* 1640/*
1644 * Set up the timer pin, possibly with the 8259A-master behind. 1641 * Set up the timer pin, possibly with the 8259A-master behind.
1645 */ 1642 */
1646static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, 1643static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin,
1647 int vector) 1644 int vector)
1648{ 1645{
1649 struct IO_APIC_route_entry entry; 1646 struct IO_APIC_route_entry entry;
@@ -1659,10 +1656,10 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
1659 * We use logical delivery to get the timer IRQ 1656 * We use logical delivery to get the timer IRQ
1660 * to the first CPU. 1657 * to the first CPU.
1661 */ 1658 */
1662 entry.dest_mode = INT_DEST_MODE; 1659 entry.dest_mode = apic->irq_dest_mode;
1663 entry.mask = 1; /* mask IRQ now */ 1660 entry.mask = 0; /* don't mask IRQ for edge */
1664 entry.dest = cpu_mask_to_apicid(TARGET_CPUS); 1661 entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus());
1665 entry.delivery_mode = INT_DELIVERY_MODE; 1662 entry.delivery_mode = apic->irq_delivery_mode;
1666 entry.polarity = 0; 1663 entry.polarity = 0;
1667 entry.trigger = 0; 1664 entry.trigger = 0;
1668 entry.vector = vector; 1665 entry.vector = vector;
@@ -1676,7 +1673,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
1676 /* 1673 /*
1677 * Add it to the IO-APIC irq-routing table: 1674 * Add it to the IO-APIC irq-routing table:
1678 */ 1675 */
1679 ioapic_write_entry(apic, pin, entry); 1676 ioapic_write_entry(apic_id, pin, entry);
1680} 1677}
1681 1678
1682 1679
@@ -2089,7 +2086,7 @@ static void __init setup_ioapic_ids_from_mpc(void)
2089{ 2086{
2090 union IO_APIC_reg_00 reg_00; 2087 union IO_APIC_reg_00 reg_00;
2091 physid_mask_t phys_id_present_map; 2088 physid_mask_t phys_id_present_map;
2092 int apic; 2089 int apic_id;
2093 int i; 2090 int i;
2094 unsigned char old_id; 2091 unsigned char old_id;
2095 unsigned long flags; 2092 unsigned long flags;
@@ -2108,26 +2105,26 @@ static void __init setup_ioapic_ids_from_mpc(void)
2108 * This is broken; anything with a real cpu count has to 2105 * This is broken; anything with a real cpu count has to
2109 * circumvent this idiocy regardless. 2106 * circumvent this idiocy regardless.
2110 */ 2107 */
2111 phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); 2108 phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
2112 2109
2113 /* 2110 /*
2114 * Set the IOAPIC ID to the value stored in the MPC table. 2111 * Set the IOAPIC ID to the value stored in the MPC table.
2115 */ 2112 */
2116 for (apic = 0; apic < nr_ioapics; apic++) { 2113 for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
2117 2114
2118 /* Read the register 0 value */ 2115 /* Read the register 0 value */
2119 spin_lock_irqsave(&ioapic_lock, flags); 2116 spin_lock_irqsave(&ioapic_lock, flags);
2120 reg_00.raw = io_apic_read(apic, 0); 2117 reg_00.raw = io_apic_read(apic_id, 0);
2121 spin_unlock_irqrestore(&ioapic_lock, flags); 2118 spin_unlock_irqrestore(&ioapic_lock, flags);
2122 2119
2123 old_id = mp_ioapics[apic].apicid; 2120 old_id = mp_ioapics[apic_id].apicid;
2124 2121
2125 if (mp_ioapics[apic].apicid >= get_physical_broadcast()) { 2122 if (mp_ioapics[apic_id].apicid >= get_physical_broadcast()) {
2126 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", 2123 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
2127 apic, mp_ioapics[apic].apicid); 2124 apic_id, mp_ioapics[apic_id].apicid);
2128 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", 2125 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
2129 reg_00.bits.ID); 2126 reg_00.bits.ID);
2130 mp_ioapics[apic].apicid = reg_00.bits.ID; 2127 mp_ioapics[apic_id].apicid = reg_00.bits.ID;
2131 } 2128 }
2132 2129
2133 /* 2130 /*
@@ -2135,10 +2132,10 @@ static void __init setup_ioapic_ids_from_mpc(void)
2135 * system must have a unique ID or we get lots of nice 2132 * system must have a unique ID or we get lots of nice
2136 * 'stuck on smp_invalidate_needed IPI wait' messages. 2133 * 'stuck on smp_invalidate_needed IPI wait' messages.
2137 */ 2134 */
2138 if (check_apicid_used(phys_id_present_map, 2135 if (apic->check_apicid_used(phys_id_present_map,
2139 mp_ioapics[apic].apicid)) { 2136 mp_ioapics[apic_id].apicid)) {
2140 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", 2137 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
2141 apic, mp_ioapics[apic].apicid); 2138 apic_id, mp_ioapics[apic_id].apicid);
2142 for (i = 0; i < get_physical_broadcast(); i++) 2139 for (i = 0; i < get_physical_broadcast(); i++)
2143 if (!physid_isset(i, phys_id_present_map)) 2140 if (!physid_isset(i, phys_id_present_map))
2144 break; 2141 break;
@@ -2147,13 +2144,13 @@ static void __init setup_ioapic_ids_from_mpc(void)
2147 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", 2144 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
2148 i); 2145 i);
2149 physid_set(i, phys_id_present_map); 2146 physid_set(i, phys_id_present_map);
2150 mp_ioapics[apic].apicid = i; 2147 mp_ioapics[apic_id].apicid = i;
2151 } else { 2148 } else {
2152 physid_mask_t tmp; 2149 physid_mask_t tmp;
2153 tmp = apicid_to_cpu_present(mp_ioapics[apic].apicid); 2150 tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid);
2154 apic_printk(APIC_VERBOSE, "Setting %d in the " 2151 apic_printk(APIC_VERBOSE, "Setting %d in the "
2155 "phys_id_present_map\n", 2152 "phys_id_present_map\n",
2156 mp_ioapics[apic].apicid); 2153 mp_ioapics[apic_id].apicid);
2157 physids_or(phys_id_present_map, phys_id_present_map, tmp); 2154 physids_or(phys_id_present_map, phys_id_present_map, tmp);
2158 } 2155 }
2159 2156
@@ -2162,11 +2159,11 @@ static void __init setup_ioapic_ids_from_mpc(void)
2162 * We need to adjust the IRQ routing table 2159 * We need to adjust the IRQ routing table
2163 * if the ID changed. 2160 * if the ID changed.
2164 */ 2161 */
2165 if (old_id != mp_ioapics[apic].apicid) 2162 if (old_id != mp_ioapics[apic_id].apicid)
2166 for (i = 0; i < mp_irq_entries; i++) 2163 for (i = 0; i < mp_irq_entries; i++)
2167 if (mp_irqs[i].dstapic == old_id) 2164 if (mp_irqs[i].dstapic == old_id)
2168 mp_irqs[i].dstapic 2165 mp_irqs[i].dstapic
2169 = mp_ioapics[apic].apicid; 2166 = mp_ioapics[apic_id].apicid;
2170 2167
2171 /* 2168 /*
2172 * Read the right value from the MPC table and 2169 * Read the right value from the MPC table and
@@ -2174,20 +2171,20 @@ static void __init setup_ioapic_ids_from_mpc(void)
2174 */ 2171 */
2175 apic_printk(APIC_VERBOSE, KERN_INFO 2172 apic_printk(APIC_VERBOSE, KERN_INFO
2176 "...changing IO-APIC physical APIC ID to %d ...", 2173 "...changing IO-APIC physical APIC ID to %d ...",
2177 mp_ioapics[apic].apicid); 2174 mp_ioapics[apic_id].apicid);
2178 2175
2179 reg_00.bits.ID = mp_ioapics[apic].apicid; 2176 reg_00.bits.ID = mp_ioapics[apic_id].apicid;
2180 spin_lock_irqsave(&ioapic_lock, flags); 2177 spin_lock_irqsave(&ioapic_lock, flags);
2181 io_apic_write(apic, 0, reg_00.raw); 2178 io_apic_write(apic_id, 0, reg_00.raw);
2182 spin_unlock_irqrestore(&ioapic_lock, flags); 2179 spin_unlock_irqrestore(&ioapic_lock, flags);
2183 2180
2184 /* 2181 /*
2185 * Sanity check 2182 * Sanity check
2186 */ 2183 */
2187 spin_lock_irqsave(&ioapic_lock, flags); 2184 spin_lock_irqsave(&ioapic_lock, flags);
2188 reg_00.raw = io_apic_read(apic, 0); 2185 reg_00.raw = io_apic_read(apic_id, 0);
2189 spin_unlock_irqrestore(&ioapic_lock, flags); 2186 spin_unlock_irqrestore(&ioapic_lock, flags);
2190 if (reg_00.bits.ID != mp_ioapics[apic].apicid) 2187 if (reg_00.bits.ID != mp_ioapics[apic_id].apicid)
2191 printk("could not set ID!\n"); 2188 printk("could not set ID!\n");
2192 else 2189 else
2193 apic_printk(APIC_VERBOSE, " ok.\n"); 2190 apic_printk(APIC_VERBOSE, " ok.\n");
@@ -2290,7 +2287,7 @@ static int ioapic_retrigger_irq(unsigned int irq)
2290 unsigned long flags; 2287 unsigned long flags;
2291 2288
2292 spin_lock_irqsave(&vector_lock, flags); 2289 spin_lock_irqsave(&vector_lock, flags);
2293 send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); 2290 apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
2294 spin_unlock_irqrestore(&vector_lock, flags); 2291 spin_unlock_irqrestore(&vector_lock, flags);
2295 2292
2296 return 1; 2293 return 1;
@@ -2298,7 +2295,7 @@ static int ioapic_retrigger_irq(unsigned int irq)
2298#else 2295#else
2299static int ioapic_retrigger_irq(unsigned int irq) 2296static int ioapic_retrigger_irq(unsigned int irq)
2300{ 2297{
2301 send_IPI_self(irq_cfg(irq)->vector); 2298 apic->send_IPI_self(irq_cfg(irq)->vector);
2302 2299
2303 return 1; 2300 return 1;
2304} 2301}
@@ -2362,7 +2359,7 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2362 2359
2363 set_extra_move_desc(desc, mask); 2360 set_extra_move_desc(desc, mask);
2364 2361
2365 dest = cpu_mask_to_apicid_and(cfg->domain, mask); 2362 dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
2366 2363
2367 modify_ioapic_rte = desc->status & IRQ_LEVEL; 2364 modify_ioapic_rte = desc->status & IRQ_LEVEL;
2368 if (modify_ioapic_rte) { 2365 if (modify_ioapic_rte) {
@@ -2866,19 +2863,15 @@ static inline void __init check_timer(void)
2866 int cpu = boot_cpu_id; 2863 int cpu = boot_cpu_id;
2867 int apic1, pin1, apic2, pin2; 2864 int apic1, pin1, apic2, pin2;
2868 unsigned long flags; 2865 unsigned long flags;
2869 unsigned int ver;
2870 int no_pin1 = 0; 2866 int no_pin1 = 0;
2871 2867
2872 local_irq_save(flags); 2868 local_irq_save(flags);
2873 2869
2874 ver = apic_read(APIC_LVR);
2875 ver = GET_APIC_VERSION(ver);
2876
2877 /* 2870 /*
2878 * get/set the timer IRQ vector: 2871 * get/set the timer IRQ vector:
2879 */ 2872 */
2880 disable_8259A_irq(0); 2873 disable_8259A_irq(0);
2881 assign_irq_vector(0, cfg, TARGET_CPUS); 2874 assign_irq_vector(0, cfg, apic->target_cpus());
2882 2875
2883 /* 2876 /*
2884 * As IRQ0 is to be enabled in the 8259A, the virtual 2877 * As IRQ0 is to be enabled in the 8259A, the virtual
@@ -2892,7 +2885,13 @@ static inline void __init check_timer(void)
2892 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 2885 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2893 init_8259A(1); 2886 init_8259A(1);
2894#ifdef CONFIG_X86_32 2887#ifdef CONFIG_X86_32
2895 timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); 2888 {
2889 unsigned int ver;
2890
2891 ver = apic_read(APIC_LVR);
2892 ver = GET_APIC_VERSION(ver);
2893 timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
2894 }
2896#endif 2895#endif
2897 2896
2898 pin1 = find_isa_irq_pin(0, mp_INT); 2897 pin1 = find_isa_irq_pin(0, mp_INT);
@@ -2931,8 +2930,17 @@ static inline void __init check_timer(void)
2931 if (no_pin1) { 2930 if (no_pin1) {
2932 add_pin_to_irq_cpu(cfg, cpu, apic1, pin1); 2931 add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
2933 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); 2932 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
2933 } else {
2934 /* for edge trigger, setup_IO_APIC_irq already
2935 * leave it unmasked.
2936 * so only need to unmask if it is level-trigger
2937 * do we really have level trigger timer?
2938 */
2939 int idx;
2940 idx = find_irq_entry(apic1, pin1, mp_INT);
2941 if (idx != -1 && irq_trigger(idx))
2942 unmask_IO_APIC_irq_desc(desc);
2934 } 2943 }
2935 unmask_IO_APIC_irq_desc(desc);
2936 if (timer_irq_works()) { 2944 if (timer_irq_works()) {
2937 if (nmi_watchdog == NMI_IO_APIC) { 2945 if (nmi_watchdog == NMI_IO_APIC) {
2938 setup_nmi(); 2946 setup_nmi();
@@ -2946,6 +2954,7 @@ static inline void __init check_timer(void)
2946 if (intr_remapping_enabled) 2954 if (intr_remapping_enabled)
2947 panic("timer doesn't work through Interrupt-remapped IO-APIC"); 2955 panic("timer doesn't work through Interrupt-remapped IO-APIC");
2948#endif 2956#endif
2957 local_irq_disable();
2949 clear_IO_APIC_pin(apic1, pin1); 2958 clear_IO_APIC_pin(apic1, pin1);
2950 if (!no_pin1) 2959 if (!no_pin1)
2951 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " 2960 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -2960,7 +2969,6 @@ static inline void __init check_timer(void)
2960 */ 2969 */
2961 replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2); 2970 replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
2962 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); 2971 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
2963 unmask_IO_APIC_irq_desc(desc);
2964 enable_8259A_irq(0); 2972 enable_8259A_irq(0);
2965 if (timer_irq_works()) { 2973 if (timer_irq_works()) {
2966 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); 2974 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
@@ -2975,6 +2983,7 @@ static inline void __init check_timer(void)
2975 /* 2983 /*
2976 * Cleanup, just in case ... 2984 * Cleanup, just in case ...
2977 */ 2985 */
2986 local_irq_disable();
2978 disable_8259A_irq(0); 2987 disable_8259A_irq(0);
2979 clear_IO_APIC_pin(apic2, pin2); 2988 clear_IO_APIC_pin(apic2, pin2);
2980 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); 2989 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
@@ -3000,6 +3009,7 @@ static inline void __init check_timer(void)
3000 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); 3009 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
3001 goto out; 3010 goto out;
3002 } 3011 }
3012 local_irq_disable();
3003 disable_8259A_irq(0); 3013 disable_8259A_irq(0);
3004 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); 3014 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
3005 apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); 3015 apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
@@ -3017,6 +3027,7 @@ static inline void __init check_timer(void)
3017 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); 3027 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
3018 goto out; 3028 goto out;
3019 } 3029 }
3030 local_irq_disable();
3020 apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); 3031 apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
3021 panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " 3032 panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
3022 "report. Then try booting with the 'noapic' option.\n"); 3033 "report. Then try booting with the 'noapic' option.\n");
@@ -3168,6 +3179,7 @@ static int __init ioapic_init_sysfs(void)
3168 3179
3169device_initcall(ioapic_init_sysfs); 3180device_initcall(ioapic_init_sysfs);
3170 3181
3182static int nr_irqs_gsi = NR_IRQS_LEGACY;
3171/* 3183/*
3172 * Dynamic irq allocate and deallocation 3184 * Dynamic irq allocate and deallocation
3173 */ 3185 */
@@ -3182,11 +3194,11 @@ unsigned int create_irq_nr(unsigned int irq_want)
3182 struct irq_desc *desc_new = NULL; 3194 struct irq_desc *desc_new = NULL;
3183 3195
3184 irq = 0; 3196 irq = 0;
3197 if (irq_want < nr_irqs_gsi)
3198 irq_want = nr_irqs_gsi;
3199
3185 spin_lock_irqsave(&vector_lock, flags); 3200 spin_lock_irqsave(&vector_lock, flags);
3186 for (new = irq_want; new < nr_irqs; new++) { 3201 for (new = irq_want; new < nr_irqs; new++) {
3187 if (platform_legacy_irq(new))
3188 continue;
3189
3190 desc_new = irq_to_desc_alloc_cpu(new, cpu); 3202 desc_new = irq_to_desc_alloc_cpu(new, cpu);
3191 if (!desc_new) { 3203 if (!desc_new) {
3192 printk(KERN_INFO "can not get irq_desc for %d\n", new); 3204 printk(KERN_INFO "can not get irq_desc for %d\n", new);
@@ -3196,7 +3208,7 @@ unsigned int create_irq_nr(unsigned int irq_want)
3196 3208
3197 if (cfg_new->vector != 0) 3209 if (cfg_new->vector != 0)
3198 continue; 3210 continue;
3199 if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0) 3211 if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
3200 irq = new; 3212 irq = new;
3201 break; 3213 break;
3202 } 3214 }
@@ -3211,7 +3223,6 @@ unsigned int create_irq_nr(unsigned int irq_want)
3211 return irq; 3223 return irq;
3212} 3224}
3213 3225
3214static int nr_irqs_gsi = NR_IRQS_LEGACY;
3215int create_irq(void) 3226int create_irq(void)
3216{ 3227{
3217 unsigned int irq_want; 3228 unsigned int irq_want;
@@ -3262,11 +3273,11 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3262 return -ENXIO; 3273 return -ENXIO;
3263 3274
3264 cfg = irq_cfg(irq); 3275 cfg = irq_cfg(irq);
3265 err = assign_irq_vector(irq, cfg, TARGET_CPUS); 3276 err = assign_irq_vector(irq, cfg, apic->target_cpus());
3266 if (err) 3277 if (err)
3267 return err; 3278 return err;
3268 3279
3269 dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); 3280 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
3270 3281
3271#ifdef CONFIG_INTR_REMAP 3282#ifdef CONFIG_INTR_REMAP
3272 if (irq_remapped(irq)) { 3283 if (irq_remapped(irq)) {
@@ -3280,9 +3291,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3280 memset (&irte, 0, sizeof(irte)); 3291 memset (&irte, 0, sizeof(irte));
3281 3292
3282 irte.present = 1; 3293 irte.present = 1;
3283 irte.dst_mode = INT_DEST_MODE; 3294 irte.dst_mode = apic->irq_dest_mode;
3284 irte.trigger_mode = 0; /* edge */ 3295 irte.trigger_mode = 0; /* edge */
3285 irte.dlvry_mode = INT_DELIVERY_MODE; 3296 irte.dlvry_mode = apic->irq_delivery_mode;
3286 irte.vector = cfg->vector; 3297 irte.vector = cfg->vector;
3287 irte.dest_id = IRTE_DEST(dest); 3298 irte.dest_id = IRTE_DEST(dest);
3288 3299
@@ -3300,10 +3311,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3300 msg->address_hi = MSI_ADDR_BASE_HI; 3311 msg->address_hi = MSI_ADDR_BASE_HI;
3301 msg->address_lo = 3312 msg->address_lo =
3302 MSI_ADDR_BASE_LO | 3313 MSI_ADDR_BASE_LO |
3303 ((INT_DEST_MODE == 0) ? 3314 ((apic->irq_dest_mode == 0) ?
3304 MSI_ADDR_DEST_MODE_PHYSICAL: 3315 MSI_ADDR_DEST_MODE_PHYSICAL:
3305 MSI_ADDR_DEST_MODE_LOGICAL) | 3316 MSI_ADDR_DEST_MODE_LOGICAL) |
3306 ((INT_DELIVERY_MODE != dest_LowestPrio) ? 3317 ((apic->irq_delivery_mode != dest_LowestPrio) ?
3307 MSI_ADDR_REDIRECTION_CPU: 3318 MSI_ADDR_REDIRECTION_CPU:
3308 MSI_ADDR_REDIRECTION_LOWPRI) | 3319 MSI_ADDR_REDIRECTION_LOWPRI) |
3309 MSI_ADDR_DEST_ID(dest); 3320 MSI_ADDR_DEST_ID(dest);
@@ -3311,7 +3322,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3311 msg->data = 3322 msg->data =
3312 MSI_DATA_TRIGGER_EDGE | 3323 MSI_DATA_TRIGGER_EDGE |
3313 MSI_DATA_LEVEL_ASSERT | 3324 MSI_DATA_LEVEL_ASSERT |
3314 ((INT_DELIVERY_MODE != dest_LowestPrio) ? 3325 ((apic->irq_delivery_mode != dest_LowestPrio) ?
3315 MSI_DATA_DELIVERY_FIXED: 3326 MSI_DATA_DELIVERY_FIXED:
3316 MSI_DATA_DELIVERY_LOWPRI) | 3327 MSI_DATA_DELIVERY_LOWPRI) |
3317 MSI_DATA_VECTOR(cfg->vector); 3328 MSI_DATA_VECTOR(cfg->vector);
@@ -3466,40 +3477,6 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3466 return 0; 3477 return 0;
3467} 3478}
3468 3479
3469int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc)
3470{
3471 unsigned int irq;
3472 int ret;
3473 unsigned int irq_want;
3474
3475 irq_want = nr_irqs_gsi;
3476 irq = create_irq_nr(irq_want);
3477 if (irq == 0)
3478 return -1;
3479
3480#ifdef CONFIG_INTR_REMAP
3481 if (!intr_remapping_enabled)
3482 goto no_ir;
3483
3484 ret = msi_alloc_irte(dev, irq, 1);
3485 if (ret < 0)
3486 goto error;
3487no_ir:
3488#endif
3489 ret = setup_msi_irq(dev, msidesc, irq);
3490 if (ret < 0) {
3491 destroy_irq(irq);
3492 return ret;
3493 }
3494 return 0;
3495
3496#ifdef CONFIG_INTR_REMAP
3497error:
3498 destroy_irq(irq);
3499 return ret;
3500#endif
3501}
3502
3503int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 3480int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3504{ 3481{
3505 unsigned int irq; 3482 unsigned int irq;
@@ -3516,9 +3493,9 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3516 sub_handle = 0; 3493 sub_handle = 0;
3517 list_for_each_entry(msidesc, &dev->msi_list, list) { 3494 list_for_each_entry(msidesc, &dev->msi_list, list) {
3518 irq = create_irq_nr(irq_want); 3495 irq = create_irq_nr(irq_want);
3519 irq_want++;
3520 if (irq == 0) 3496 if (irq == 0)
3521 return -1; 3497 return -1;
3498 irq_want = irq + 1;
3522#ifdef CONFIG_INTR_REMAP 3499#ifdef CONFIG_INTR_REMAP
3523 if (!intr_remapping_enabled) 3500 if (!intr_remapping_enabled)
3524 goto no_ir; 3501 goto no_ir;
@@ -3733,12 +3710,13 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3733 return -ENXIO; 3710 return -ENXIO;
3734 3711
3735 cfg = irq_cfg(irq); 3712 cfg = irq_cfg(irq);
3736 err = assign_irq_vector(irq, cfg, TARGET_CPUS); 3713 err = assign_irq_vector(irq, cfg, apic->target_cpus());
3737 if (!err) { 3714 if (!err) {
3738 struct ht_irq_msg msg; 3715 struct ht_irq_msg msg;
3739 unsigned dest; 3716 unsigned dest;
3740 3717
3741 dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); 3718 dest = apic->cpu_mask_to_apicid_and(cfg->domain,
3719 apic->target_cpus());
3742 3720
3743 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); 3721 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
3744 3722
@@ -3746,11 +3724,11 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3746 HT_IRQ_LOW_BASE | 3724 HT_IRQ_LOW_BASE |
3747 HT_IRQ_LOW_DEST_ID(dest) | 3725 HT_IRQ_LOW_DEST_ID(dest) |
3748 HT_IRQ_LOW_VECTOR(cfg->vector) | 3726 HT_IRQ_LOW_VECTOR(cfg->vector) |
3749 ((INT_DEST_MODE == 0) ? 3727 ((apic->irq_dest_mode == 0) ?
3750 HT_IRQ_LOW_DM_PHYSICAL : 3728 HT_IRQ_LOW_DM_PHYSICAL :
3751 HT_IRQ_LOW_DM_LOGICAL) | 3729 HT_IRQ_LOW_DM_LOGICAL) |
3752 HT_IRQ_LOW_RQEOI_EDGE | 3730 HT_IRQ_LOW_RQEOI_EDGE |
3753 ((INT_DELIVERY_MODE != dest_LowestPrio) ? 3731 ((apic->irq_delivery_mode != dest_LowestPrio) ?
3754 HT_IRQ_LOW_MT_FIXED : 3732 HT_IRQ_LOW_MT_FIXED :
3755 HT_IRQ_LOW_MT_ARBITRATED) | 3733 HT_IRQ_LOW_MT_ARBITRATED) |
3756 HT_IRQ_LOW_IRQ_MASKED; 3734 HT_IRQ_LOW_IRQ_MASKED;
@@ -3798,12 +3776,12 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3798 BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); 3776 BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
3799 3777
3800 entry->vector = cfg->vector; 3778 entry->vector = cfg->vector;
3801 entry->delivery_mode = INT_DELIVERY_MODE; 3779 entry->delivery_mode = apic->irq_delivery_mode;
3802 entry->dest_mode = INT_DEST_MODE; 3780 entry->dest_mode = apic->irq_dest_mode;
3803 entry->polarity = 0; 3781 entry->polarity = 0;
3804 entry->trigger = 0; 3782 entry->trigger = 0;
3805 entry->mask = 0; 3783 entry->mask = 0;
3806 entry->dest = cpu_mask_to_apicid(eligible_cpu); 3784 entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
3807 3785
3808 mmr_pnode = uv_blade_to_pnode(mmr_blade); 3786 mmr_pnode = uv_blade_to_pnode(mmr_blade);
3809 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); 3787 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
@@ -3871,11 +3849,17 @@ int __init arch_probe_nr_irqs(void)
3871{ 3849{
3872 int nr; 3850 int nr;
3873 3851
3874 nr = ((8 * nr_cpu_ids) > (32 * nr_ioapics) ? 3852 if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
3875 (NR_VECTORS + (8 * nr_cpu_ids)) : 3853 nr_irqs = NR_VECTORS * nr_cpu_ids;
3876 (NR_VECTORS + (32 * nr_ioapics)));
3877 3854
3878 if (nr < nr_irqs && nr > nr_irqs_gsi) 3855 nr = nr_irqs_gsi + 8 * nr_cpu_ids;
3856#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
3857 /*
3858 * for MSI and HT dyn irq
3859 */
3860 nr += nr_irqs_gsi * 16;
3861#endif
3862 if (nr < nr_irqs)
3879 nr_irqs = nr; 3863 nr_irqs = nr;
3880 3864
3881 return 0; 3865 return 0;
@@ -3907,7 +3891,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
3907 */ 3891 */
3908 3892
3909 if (physids_empty(apic_id_map)) 3893 if (physids_empty(apic_id_map))
3910 apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); 3894 apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
3911 3895
3912 spin_lock_irqsave(&ioapic_lock, flags); 3896 spin_lock_irqsave(&ioapic_lock, flags);
3913 reg_00.raw = io_apic_read(ioapic, 0); 3897 reg_00.raw = io_apic_read(ioapic, 0);
@@ -3923,10 +3907,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
3923 * Every APIC in a system must have a unique ID or we get lots of nice 3907 * Every APIC in a system must have a unique ID or we get lots of nice
3924 * 'stuck on smp_invalidate_needed IPI wait' messages. 3908 * 'stuck on smp_invalidate_needed IPI wait' messages.
3925 */ 3909 */
3926 if (check_apicid_used(apic_id_map, apic_id)) { 3910 if (apic->check_apicid_used(apic_id_map, apic_id)) {
3927 3911
3928 for (i = 0; i < get_physical_broadcast(); i++) { 3912 for (i = 0; i < get_physical_broadcast(); i++) {
3929 if (!check_apicid_used(apic_id_map, i)) 3913 if (!apic->check_apicid_used(apic_id_map, i))
3930 break; 3914 break;
3931 } 3915 }
3932 3916
@@ -3939,7 +3923,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
3939 apic_id = i; 3923 apic_id = i;
3940 } 3924 }
3941 3925
3942 tmp = apicid_to_cpu_present(apic_id); 3926 tmp = apic->apicid_to_cpu_present(apic_id);
3943 physids_or(apic_id_map, apic_id_map, tmp); 3927 physids_or(apic_id_map, apic_id_map, tmp);
3944 3928
3945 if (reg_00.bits.ID != apic_id) { 3929 if (reg_00.bits.ID != apic_id) {
@@ -4032,7 +4016,7 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
4032/* 4016/*
4033 * This function currently is only a helper for the i386 smp boot process where 4017 * This function currently is only a helper for the i386 smp boot process where
4034 * we need to reprogram the ioredtbls to cater for the cpus which have come online 4018 * we need to reprogram the ioredtbls to cater for the cpus which have come online
4035 * so mask in all cases should simply be TARGET_CPUS 4019 * so mask in all cases should simply be apic->target_cpus()
4036 */ 4020 */
4037#ifdef CONFIG_SMP 4021#ifdef CONFIG_SMP
4038void __init setup_ioapic_dest(void) 4022void __init setup_ioapic_dest(void)
@@ -4073,7 +4057,7 @@ void __init setup_ioapic_dest(void)
4073 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) 4057 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
4074 mask = desc->affinity; 4058 mask = desc->affinity;
4075 else 4059 else
4076 mask = TARGET_CPUS; 4060 mask = apic->target_cpus();
4077 4061
4078#ifdef CONFIG_INTR_REMAP 4062#ifdef CONFIG_INTR_REMAP
4079 if (intr_remapping_enabled) 4063 if (intr_remapping_enabled)
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index b12208f4dfee..e41980a373ab 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -131,9 +131,8 @@ static int do_iopl(unsigned int level, struct pt_regs *regs)
131} 131}
132 132
133#ifdef CONFIG_X86_32 133#ifdef CONFIG_X86_32
134asmlinkage long sys_iopl(unsigned long regsp) 134long sys_iopl(struct pt_regs *regs)
135{ 135{
136 struct pt_regs *regs = (struct pt_regs *)&regsp;
137 unsigned int level = regs->bx; 136 unsigned int level = regs->bx;
138 struct thread_struct *t = &current->thread; 137 struct thread_struct *t = &current->thread;
139 int rc; 138 int rc;
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
index 285bbf8831fa..dbf5445727a9 100644
--- a/arch/x86/kernel/ipi.c
+++ b/arch/x86/kernel/ipi.c
@@ -17,147 +17,121 @@
17#include <asm/mmu_context.h> 17#include <asm/mmu_context.h>
18#include <asm/apic.h> 18#include <asm/apic.h>
19#include <asm/proto.h> 19#include <asm/proto.h>
20#include <asm/ipi.h>
20 21
21#ifdef CONFIG_X86_32 22void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector)
22#include <mach_apic.h>
23#include <mach_ipi.h>
24
25/*
26 * the following functions deal with sending IPIs between CPUs.
27 *
28 * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
29 */
30
31static inline int __prepare_ICR(unsigned int shortcut, int vector)
32{ 23{
33 unsigned int icr = shortcut | APIC_DEST_LOGICAL; 24 unsigned long query_cpu;
34 25 unsigned long flags;
35 switch (vector) { 26
36 default: 27 /*
37 icr |= APIC_DM_FIXED | vector; 28 * Hack. The clustered APIC addressing mode doesn't allow us to send
38 break; 29 * to an arbitrary mask, so I do a unicast to each CPU instead.
39 case NMI_VECTOR: 30 * - mbligh
40 icr |= APIC_DM_NMI; 31 */
41 break; 32 local_irq_save(flags);
33 for_each_cpu(query_cpu, mask) {
34 __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
35 query_cpu), vector, APIC_DEST_PHYSICAL);
42 } 36 }
43 return icr; 37 local_irq_restore(flags);
44} 38}
45 39
46static inline int __prepare_ICR2(unsigned int mask) 40void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
41 int vector)
47{ 42{
48 return SET_APIC_DEST_FIELD(mask); 43 unsigned int this_cpu = smp_processor_id();
49} 44 unsigned int query_cpu;
45 unsigned long flags;
50 46
51void __send_IPI_shortcut(unsigned int shortcut, int vector) 47 /* See Hack comment above */
52{
53 /*
54 * Subtle. In the case of the 'never do double writes' workaround
55 * we have to lock out interrupts to be safe. As we don't care
56 * of the value read we use an atomic rmw access to avoid costly
57 * cli/sti. Otherwise we use an even cheaper single atomic write
58 * to the APIC.
59 */
60 unsigned int cfg;
61 48
62 /* 49 local_irq_save(flags);
63 * Wait for idle. 50 for_each_cpu(query_cpu, mask) {
64 */ 51 if (query_cpu == this_cpu)
65 apic_wait_icr_idle(); 52 continue;
53 __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
54 query_cpu), vector, APIC_DEST_PHYSICAL);
55 }
56 local_irq_restore(flags);
57}
66 58
67 /* 59void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
68 * No need to touch the target chip field 60 int vector)
69 */ 61{
70 cfg = __prepare_ICR(shortcut, vector); 62 unsigned long flags;
63 unsigned int query_cpu;
71 64
72 /* 65 /*
73 * Send the IPI. The write to APIC_ICR fires this off. 66 * Hack. The clustered APIC addressing mode doesn't allow us to send
67 * to an arbitrary mask, so I do a unicasts to each CPU instead. This
68 * should be modified to do 1 message per cluster ID - mbligh
74 */ 69 */
75 apic_write(APIC_ICR, cfg);
76}
77 70
78void send_IPI_self(int vector) 71 local_irq_save(flags);
79{ 72 for_each_cpu(query_cpu, mask)
80 __send_IPI_shortcut(APIC_DEST_SELF, vector); 73 __default_send_IPI_dest_field(
74 apic->cpu_to_logical_apicid(query_cpu), vector,
75 apic->dest_logical);
76 local_irq_restore(flags);
81} 77}
82 78
83/* 79void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
84 * This is used to send an IPI with no shorthand notation (the destination is 80 int vector)
85 * specified in bits 56 to 63 of the ICR).
86 */
87static inline void __send_IPI_dest_field(unsigned long mask, int vector)
88{ 81{
89 unsigned long cfg; 82 unsigned long flags;
90 83 unsigned int query_cpu;
91 /* 84 unsigned int this_cpu = smp_processor_id();
92 * Wait for idle.
93 */
94 if (unlikely(vector == NMI_VECTOR))
95 safe_apic_wait_icr_idle();
96 else
97 apic_wait_icr_idle();
98
99 /*
100 * prepare target chip field
101 */
102 cfg = __prepare_ICR2(mask);
103 apic_write(APIC_ICR2, cfg);
104 85
105 /* 86 /* See Hack comment above */
106 * program the ICR
107 */
108 cfg = __prepare_ICR(0, vector);
109 87
110 /* 88 local_irq_save(flags);
111 * Send the IPI. The write to APIC_ICR fires this off. 89 for_each_cpu(query_cpu, mask) {
112 */ 90 if (query_cpu == this_cpu)
113 apic_write(APIC_ICR, cfg); 91 continue;
92 __default_send_IPI_dest_field(
93 apic->cpu_to_logical_apicid(query_cpu), vector,
94 apic->dest_logical);
95 }
96 local_irq_restore(flags);
114} 97}
115 98
99#ifdef CONFIG_X86_32
100
116/* 101/*
117 * This is only used on smaller machines. 102 * This is only used on smaller machines.
118 */ 103 */
119void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector) 104void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
120{ 105{
121 unsigned long mask = cpumask_bits(cpumask)[0]; 106 unsigned long mask = cpumask_bits(cpumask)[0];
122 unsigned long flags; 107 unsigned long flags;
123 108
124 local_irq_save(flags); 109 local_irq_save(flags);
125 WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); 110 WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
126 __send_IPI_dest_field(mask, vector); 111 __default_send_IPI_dest_field(mask, vector, apic->dest_logical);
127 local_irq_restore(flags); 112 local_irq_restore(flags);
128} 113}
129 114
130void send_IPI_mask_sequence(const struct cpumask *mask, int vector) 115void default_send_IPI_allbutself(int vector)
131{ 116{
132 unsigned long flags;
133 unsigned int query_cpu;
134
135 /* 117 /*
136 * Hack. The clustered APIC addressing mode doesn't allow us to send 118 * if there are no other CPUs in the system then we get an APIC send
137 * to an arbitrary mask, so I do a unicasts to each CPU instead. This 119 * error if we try to broadcast, thus avoid sending IPIs in this case.
138 * should be modified to do 1 message per cluster ID - mbligh
139 */ 120 */
121 if (!(num_online_cpus() > 1))
122 return;
140 123
141 local_irq_save(flags); 124 __default_local_send_IPI_allbutself(vector);
142 for_each_cpu(query_cpu, mask)
143 __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector);
144 local_irq_restore(flags);
145} 125}
146 126
147void send_IPI_mask_allbutself(const struct cpumask *mask, int vector) 127void default_send_IPI_all(int vector)
148{ 128{
149 unsigned long flags; 129 __default_local_send_IPI_all(vector);
150 unsigned int query_cpu; 130}
151 unsigned int this_cpu = smp_processor_id();
152
153 /* See Hack comment above */
154 131
155 local_irq_save(flags); 132void default_send_IPI_self(int vector)
156 for_each_cpu(query_cpu, mask) 133{
157 if (query_cpu != this_cpu) 134 __default_send_IPI_shortcut(APIC_DEST_SELF, vector, apic->dest_logical);
158 __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
159 vector);
160 local_irq_restore(flags);
161} 135}
162 136
163/* must come after the send_IPI functions above for inlining */ 137/* must come after the send_IPI functions above for inlining */
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index a6bca1d33a8a..7c95c8918a8f 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -6,10 +6,12 @@
6#include <linux/kernel_stat.h> 6#include <linux/kernel_stat.h>
7#include <linux/seq_file.h> 7#include <linux/seq_file.h>
8#include <linux/smp.h> 8#include <linux/smp.h>
9#include <linux/ftrace.h>
9 10
10#include <asm/apic.h> 11#include <asm/apic.h>
11#include <asm/io_apic.h> 12#include <asm/io_apic.h>
12#include <asm/irq.h> 13#include <asm/irq.h>
14#include <asm/idle.h>
13 15
14atomic_t irq_err_count; 16atomic_t irq_err_count;
15 17
@@ -193,4 +195,40 @@ u64 arch_irq_stat(void)
193 return sum; 195 return sum;
194} 196}
195 197
198
199/*
200 * do_IRQ handles all normal device IRQ's (the special
201 * SMP cross-CPU interrupts have their own specific
202 * handlers).
203 */
204unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
205{
206 struct pt_regs *old_regs = set_irq_regs(regs);
207
208 /* high bit used in ret_from_ code */
209 unsigned vector = ~regs->orig_ax;
210 unsigned irq;
211
212 exit_idle();
213 irq_enter();
214
215 irq = __get_cpu_var(vector_irq)[vector];
216
217 if (!handle_irq(irq, regs)) {
218#ifdef CONFIG_X86_64
219 if (!disable_apic)
220 ack_APIC_irq();
221#endif
222
223 if (printk_ratelimit())
224 printk(KERN_EMERG "%s: %d.%d No irq handler for vector (irq %d)\n",
225 __func__, smp_processor_id(), vector, irq);
226 }
227
228 irq_exit();
229
230 set_irq_regs(old_regs);
231 return 1;
232}
233
196EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); 234EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index e0f29be8ab0b..4beb9a13873d 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -191,33 +191,16 @@ static inline int
191execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; } 191execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; }
192#endif 192#endif
193 193
194/* 194bool handle_irq(unsigned irq, struct pt_regs *regs)
195 * do_IRQ handles all normal device IRQ's (the special
196 * SMP cross-CPU interrupts have their own specific
197 * handlers).
198 */
199unsigned int do_IRQ(struct pt_regs *regs)
200{ 195{
201 struct pt_regs *old_regs;
202 /* high bit used in ret_from_ code */
203 int overflow;
204 unsigned vector = ~regs->orig_ax;
205 struct irq_desc *desc; 196 struct irq_desc *desc;
206 unsigned irq; 197 int overflow;
207
208
209 old_regs = set_irq_regs(regs);
210 irq_enter();
211 irq = __get_cpu_var(vector_irq)[vector];
212 198
213 overflow = check_stack_overflow(); 199 overflow = check_stack_overflow();
214 200
215 desc = irq_to_desc(irq); 201 desc = irq_to_desc(irq);
216 if (unlikely(!desc)) { 202 if (unlikely(!desc))
217 printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x cpu %d\n", 203 return false;
218 __func__, irq, vector, smp_processor_id());
219 BUG();
220 }
221 204
222 if (!execute_on_irq_stack(overflow, desc, irq)) { 205 if (!execute_on_irq_stack(overflow, desc, irq)) {
223 if (unlikely(overflow)) 206 if (unlikely(overflow))
@@ -225,13 +208,11 @@ unsigned int do_IRQ(struct pt_regs *regs)
225 desc->handle_irq(irq, desc); 208 desc->handle_irq(irq, desc);
226 } 209 }
227 210
228 irq_exit(); 211 return true;
229 set_irq_regs(old_regs);
230 return 1;
231} 212}
232 213
233#ifdef CONFIG_HOTPLUG_CPU 214#ifdef CONFIG_HOTPLUG_CPU
234#include <mach_apic.h> 215#include <asm/genapic.h>
235 216
236/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ 217/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
237void fixup_irqs(void) 218void fixup_irqs(void)
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 018963aa6ee3..977d8b43a0dd 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -48,42 +48,18 @@ static inline void stack_overflow_check(struct pt_regs *regs)
48#endif 48#endif
49} 49}
50 50
51/* 51bool handle_irq(unsigned irq, struct pt_regs *regs)
52 * do_IRQ handles all normal device IRQ's (the special
53 * SMP cross-CPU interrupts have their own specific
54 * handlers).
55 */
56asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
57{ 52{
58 struct pt_regs *old_regs = set_irq_regs(regs);
59 struct irq_desc *desc; 53 struct irq_desc *desc;
60 54
61 /* high bit used in ret_from_ code */
62 unsigned vector = ~regs->orig_ax;
63 unsigned irq;
64
65 exit_idle();
66 irq_enter();
67 irq = __get_cpu_var(vector_irq)[vector];
68
69 stack_overflow_check(regs); 55 stack_overflow_check(regs);
70 56
71 desc = irq_to_desc(irq); 57 desc = irq_to_desc(irq);
72 if (likely(desc)) 58 if (unlikely(!desc))
73 generic_handle_irq_desc(irq, desc); 59 return false;
74 else {
75 if (!disable_apic)
76 ack_APIC_irq();
77
78 if (printk_ratelimit())
79 printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n",
80 __func__, smp_processor_id(), vector);
81 }
82
83 irq_exit();
84 60
85 set_irq_regs(old_regs); 61 generic_handle_irq_desc(irq, desc);
86 return 1; 62 return true;
87} 63}
88 64
89#ifdef CONFIG_HOTPLUG_CPU 65#ifdef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index f6ff71cdaba8..520e6c1c5d22 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -78,6 +78,15 @@ void __init init_ISA_irqs(void)
78 } 78 }
79} 79}
80 80
81/*
82 * IRQ2 is cascade interrupt to second interrupt controller
83 */
84static struct irqaction irq2 = {
85 .handler = no_action,
86 .mask = CPU_MASK_NONE,
87 .name = "cascade",
88};
89
81DEFINE_PER_CPU(vector_irq_t, vector_irq) = { 90DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
82 [0 ... IRQ0_VECTOR - 1] = -1, 91 [0 ... IRQ0_VECTOR - 1] = -1,
83 [IRQ0_VECTOR] = 0, 92 [IRQ0_VECTOR] = 0,
@@ -189,6 +198,9 @@ void __init native_init_IRQ(void)
189 set_intr_gate(vector, interrupt[i]); 198 set_intr_gate(vector, interrupt[i]);
190 } 199 }
191 200
201 if (!acpi_ioapic)
202 setup_irq(2, &irq2);
203
192 /* setup after call gates are initialised (usually add in 204 /* setup after call gates are initialised (usually add in
193 * the architecture specific gates) 205 * the architecture specific gates)
194 */ 206 */
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 10435a120d22..5c4f55483849 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -46,7 +46,7 @@
46#include <asm/apicdef.h> 46#include <asm/apicdef.h>
47#include <asm/system.h> 47#include <asm/system.h>
48 48
49#include <mach_ipi.h> 49#include <asm/genapic.h>
50 50
51/* 51/*
52 * Put the error code here just in case the user cares: 52 * Put the error code here just in case the user cares:
@@ -347,7 +347,7 @@ void kgdb_post_primary_code(struct pt_regs *regs, int e_vector, int err_code)
347 */ 347 */
348void kgdb_roundup_cpus(unsigned long flags) 348void kgdb_roundup_cpus(unsigned long flags)
349{ 349{
350 send_IPI_allbutself(APIC_DM_NMI); 350 apic->send_IPI_allbutself(APIC_DM_NMI);
351} 351}
352#endif 352#endif
353 353
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index c43caa3a91f3..6993d51b7fd8 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -18,15 +18,6 @@
18#include <asm/mmu_context.h> 18#include <asm/mmu_context.h>
19#include <asm/io.h> 19#include <asm/io.h>
20 20
21#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
22static u64 kexec_pgd[512] PAGE_ALIGNED;
23static u64 kexec_pud0[512] PAGE_ALIGNED;
24static u64 kexec_pmd0[512] PAGE_ALIGNED;
25static u64 kexec_pte0[512] PAGE_ALIGNED;
26static u64 kexec_pud1[512] PAGE_ALIGNED;
27static u64 kexec_pmd1[512] PAGE_ALIGNED;
28static u64 kexec_pte1[512] PAGE_ALIGNED;
29
30static void init_level2_page(pmd_t *level2p, unsigned long addr) 21static void init_level2_page(pmd_t *level2p, unsigned long addr)
31{ 22{
32 unsigned long end_addr; 23 unsigned long end_addr;
@@ -107,12 +98,65 @@ out:
107 return result; 98 return result;
108} 99}
109 100
101static void free_transition_pgtable(struct kimage *image)
102{
103 free_page((unsigned long)image->arch.pud);
104 free_page((unsigned long)image->arch.pmd);
105 free_page((unsigned long)image->arch.pte);
106}
107
108static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
109{
110 pud_t *pud;
111 pmd_t *pmd;
112 pte_t *pte;
113 unsigned long vaddr, paddr;
114 int result = -ENOMEM;
115
116 vaddr = (unsigned long)relocate_kernel;
117 paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE);
118 pgd += pgd_index(vaddr);
119 if (!pgd_present(*pgd)) {
120 pud = (pud_t *)get_zeroed_page(GFP_KERNEL);
121 if (!pud)
122 goto err;
123 image->arch.pud = pud;
124 set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
125 }
126 pud = pud_offset(pgd, vaddr);
127 if (!pud_present(*pud)) {
128 pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL);
129 if (!pmd)
130 goto err;
131 image->arch.pmd = pmd;
132 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
133 }
134 pmd = pmd_offset(pud, vaddr);
135 if (!pmd_present(*pmd)) {
136 pte = (pte_t *)get_zeroed_page(GFP_KERNEL);
137 if (!pte)
138 goto err;
139 image->arch.pte = pte;
140 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
141 }
142 pte = pte_offset_kernel(pmd, vaddr);
143 set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
144 return 0;
145err:
146 free_transition_pgtable(image);
147 return result;
148}
149
110 150
111static int init_pgtable(struct kimage *image, unsigned long start_pgtable) 151static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
112{ 152{
113 pgd_t *level4p; 153 pgd_t *level4p;
154 int result;
114 level4p = (pgd_t *)__va(start_pgtable); 155 level4p = (pgd_t *)__va(start_pgtable);
115 return init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); 156 result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
157 if (result)
158 return result;
159 return init_transition_pgtable(image, level4p);
116} 160}
117 161
118static void set_idt(void *newidt, u16 limit) 162static void set_idt(void *newidt, u16 limit)
@@ -174,7 +218,7 @@ int machine_kexec_prepare(struct kimage *image)
174 218
175void machine_kexec_cleanup(struct kimage *image) 219void machine_kexec_cleanup(struct kimage *image)
176{ 220{
177 return; 221 free_transition_pgtable(image);
178} 222}
179 223
180/* 224/*
@@ -195,22 +239,6 @@ void machine_kexec(struct kimage *image)
195 memcpy(control_page, relocate_kernel, PAGE_SIZE); 239 memcpy(control_page, relocate_kernel, PAGE_SIZE);
196 240
197 page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); 241 page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
198 page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
199 page_list[PA_PGD] = virt_to_phys(&kexec_pgd);
200 page_list[VA_PGD] = (unsigned long)kexec_pgd;
201 page_list[PA_PUD_0] = virt_to_phys(&kexec_pud0);
202 page_list[VA_PUD_0] = (unsigned long)kexec_pud0;
203 page_list[PA_PMD_0] = virt_to_phys(&kexec_pmd0);
204 page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
205 page_list[PA_PTE_0] = virt_to_phys(&kexec_pte0);
206 page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
207 page_list[PA_PUD_1] = virt_to_phys(&kexec_pud1);
208 page_list[VA_PUD_1] = (unsigned long)kexec_pud1;
209 page_list[PA_PMD_1] = virt_to_phys(&kexec_pmd1);
210 page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
211 page_list[PA_PTE_1] = virt_to_phys(&kexec_pte1);
212 page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
213
214 page_list[PA_TABLE_PAGE] = 242 page_list[PA_TABLE_PAGE] =
215 (unsigned long)__pa(page_address(image->control_code_page)); 243 (unsigned long)__pa(page_address(image->control_code_page));
216 244
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index fa6bb263892e..200764453195 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -3,7 +3,7 @@
3 * compliant MP-table parsing routines. 3 * compliant MP-table parsing routines.
4 * 4 *
5 * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> 5 * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
6 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com> 6 * (c) 1998, 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
7 * (c) 2008 Alexey Starikovskiy <astarikovskiy@suse.de> 7 * (c) 2008 Alexey Starikovskiy <astarikovskiy@suse.de>
8 */ 8 */
9 9
@@ -29,12 +29,7 @@
29#include <asm/setup.h> 29#include <asm/setup.h>
30#include <asm/smp.h> 30#include <asm/smp.h>
31 31
32#include <mach_apic.h> 32#include <asm/genapic.h>
33#ifdef CONFIG_X86_32
34#include <mach_apicdef.h>
35#include <mach_mpparse.h>
36#endif
37
38/* 33/*
39 * Checksum an MP configuration block. 34 * Checksum an MP configuration block.
40 */ 35 */
@@ -292,16 +287,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
292 return 0; 287 return 0;
293 288
294#ifdef CONFIG_X86_32 289#ifdef CONFIG_X86_32
295 /* 290 generic_mps_oem_check(mpc, oem, str);
296 * need to make sure summit and es7000's mps_oem_check is safe to be
297 * called early via genericarch 's mps_oem_check
298 */
299 if (early) {
300#ifdef CONFIG_X86_NUMAQ
301 numaq_mps_oem_check(mpc, oem, str);
302#endif
303 } else
304 mps_oem_check(mpc, oem, str);
305#endif 291#endif
306 /* save the local APIC address, it might be non-default */ 292 /* save the local APIC address, it might be non-default */
307 if (!acpi_lapic) 293 if (!acpi_lapic)
@@ -386,13 +372,13 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
386 (*x86_quirks->mpc_record)++; 372 (*x86_quirks->mpc_record)++;
387 } 373 }
388 374
389#ifdef CONFIG_X86_GENERICARCH 375#ifdef CONFIG_X86_BIGSMP
390 generic_bigsmp_probe(); 376 generic_bigsmp_probe();
391#endif 377#endif
392 378
393#ifdef CONFIG_X86_32 379 if (apic->setup_apic_routing)
394 setup_apic_routing(); 380 apic->setup_apic_routing();
395#endif 381
396 if (!num_processors) 382 if (!num_processors)
397 printk(KERN_ERR "MPTABLE: no processors registered!\n"); 383 printk(KERN_ERR "MPTABLE: no processors registered!\n");
398 return num_processors; 384 return num_processors;
@@ -706,8 +692,8 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
706#endif 692#endif
707 mpf_found = mpf; 693 mpf_found = mpf;
708 694
709 printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", 695 printk(KERN_INFO "found SMP MP-table at [%p] %llx\n",
710 mpf, virt_to_phys(mpf)); 696 mpf, (u64)virt_to_phys(mpf));
711 697
712 if (!reserve) 698 if (!reserve)
713 return 1; 699 return 1;
@@ -1025,7 +1011,7 @@ static int __init update_mp_table(void)
1025 if (!smp_check_mpc(mpc, oem, str)) 1011 if (!smp_check_mpc(mpc, oem, str))
1026 return 0; 1012 return 0;
1027 1013
1028 printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf)); 1014 printk(KERN_INFO "mpf: %llx\n", (u64)virt_to_phys(mpf));
1029 printk(KERN_INFO "physptr: %x\n", mpf->physptr); 1015 printk(KERN_INFO "physptr: %x\n", mpf->physptr);
1030 1016
1031 if (mpc_new_phys && mpc->length > mpc_new_length) { 1017 if (mpc_new_phys && mpc->length > mpc_new_length) {
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 23b6d9e6e4f5..bdfad80c3cf1 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -34,7 +34,7 @@
34 34
35#include <asm/mce.h> 35#include <asm/mce.h>
36 36
37#include <mach_traps.h> 37#include <asm/mach_traps.h>
38 38
39int unknown_nmi_panic; 39int unknown_nmi_panic;
40int nmi_watchdog_enabled; 40int nmi_watchdog_enabled;
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index f2191d4f2717..0cc41a1d2550 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * Copyright (C) 2002, IBM Corp. 4 * Copyright (C) 2002, IBM Corp.
5 * 5 *
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by 9 * it under the terms of the GNU General Public License as published by
@@ -23,17 +23,18 @@
23 * Send feedback to <gone@us.ibm.com> 23 * Send feedback to <gone@us.ibm.com>
24 */ 24 */
25 25
26#include <linux/mm.h> 26#include <linux/nodemask.h>
27#include <linux/bootmem.h> 27#include <linux/bootmem.h>
28#include <linux/mmzone.h> 28#include <linux/mmzone.h>
29#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/nodemask.h> 30#include <linux/mm.h>
31#include <asm/numaq.h> 31
32#include <asm/topology.h>
33#include <asm/processor.h> 32#include <asm/processor.h>
33#include <asm/topology.h>
34#include <asm/genapic.h> 34#include <asm/genapic.h>
35#include <asm/e820.h> 35#include <asm/numaq.h>
36#include <asm/setup.h> 36#include <asm/setup.h>
37#include <asm/e820.h>
37 38
38#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) 39#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
39 40
@@ -91,19 +92,20 @@ static int __init numaq_pre_time_init(void)
91} 92}
92 93
93int found_numaq; 94int found_numaq;
95
94/* 96/*
95 * Have to match translation table entries to main table entries by counter 97 * Have to match translation table entries to main table entries by counter
96 * hence the mpc_record variable .... can't see a less disgusting way of 98 * hence the mpc_record variable .... can't see a less disgusting way of
97 * doing this .... 99 * doing this ....
98 */ 100 */
99struct mpc_config_translation { 101struct mpc_config_translation {
100 unsigned char mpc_type; 102 unsigned char mpc_type;
101 unsigned char trans_len; 103 unsigned char trans_len;
102 unsigned char trans_type; 104 unsigned char trans_type;
103 unsigned char trans_quad; 105 unsigned char trans_quad;
104 unsigned char trans_global; 106 unsigned char trans_global;
105 unsigned char trans_local; 107 unsigned char trans_local;
106 unsigned short trans_reserved; 108 unsigned short trans_reserved;
107}; 109};
108 110
109/* x86_quirks member */ 111/* x86_quirks member */
@@ -236,7 +238,7 @@ static int __init numaq_setup_ioapic_ids(void)
236 238
237static int __init numaq_update_genapic(void) 239static int __init numaq_update_genapic(void)
238{ 240{
239 genapic->wakeup_cpu = wakeup_secondary_cpu_via_nmi; 241 apic->wakeup_cpu = wakeup_secondary_cpu_via_nmi;
240 242
241 return 0; 243 return 0;
242} 244}
@@ -291,3 +293,280 @@ int __init get_memcfg_numaq(void)
291 smp_dump_qct(); 293 smp_dump_qct();
292 return 1; 294 return 1;
293} 295}
296
297/*
298 * APIC driver for the IBM NUMAQ chipset.
299 */
300#define APIC_DEFINITION 1
301#include <linux/threads.h>
302#include <linux/cpumask.h>
303#include <asm/mpspec.h>
304#include <asm/genapic.h>
305#include <asm/fixmap.h>
306#include <asm/apicdef.h>
307#include <asm/ipi.h>
308#include <linux/kernel.h>
309#include <linux/string.h>
310#include <linux/init.h>
311#include <linux/numa.h>
312#include <linux/smp.h>
313#include <asm/numaq.h>
314#include <asm/io.h>
315#include <linux/mmzone.h>
316#include <linux/nodemask.h>
317
318#define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER)
319
320static inline unsigned int numaq_get_apic_id(unsigned long x)
321{
322 return (x >> 24) & 0x0F;
323}
324
325static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector)
326{
327 default_send_IPI_mask_sequence_logical(mask, vector);
328}
329
330static inline void numaq_send_IPI_allbutself(int vector)
331{
332 default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector);
333}
334
335static inline void numaq_send_IPI_all(int vector)
336{
337 numaq_send_IPI_mask(cpu_online_mask, vector);
338}
339
340extern void numaq_mps_oem_check(struct mpc_table *, char *, char *);
341
342#define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8)
343#define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa)
344
345/*
346 * Because we use NMIs rather than the INIT-STARTUP sequence to
347 * bootstrap the CPUs, the APIC may be in a weird state. Kick it:
348 */
349static inline void numaq_smp_callin_clear_local_apic(void)
350{
351 clear_local_APIC();
352}
353
354static inline void
355numaq_store_NMI_vector(unsigned short *high, unsigned short *low)
356{
357 printk("Storing NMI vector\n");
358 *high =
359 *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_HIGH));
360 *low =
361 *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_LOW));
362}
363
364static inline const cpumask_t *numaq_target_cpus(void)
365{
366 return &CPU_MASK_ALL;
367}
368
369static inline unsigned long
370numaq_check_apicid_used(physid_mask_t bitmap, int apicid)
371{
372 return physid_isset(apicid, bitmap);
373}
374
375static inline unsigned long numaq_check_apicid_present(int bit)
376{
377 return physid_isset(bit, phys_cpu_present_map);
378}
379
380#define apicid_cluster(apicid) (apicid & 0xF0)
381
382static inline int numaq_apic_id_registered(void)
383{
384 return 1;
385}
386
387static inline void numaq_init_apic_ldr(void)
388{
389 /* Already done in NUMA-Q firmware */
390}
391
392static inline void numaq_setup_apic_routing(void)
393{
394 printk("Enabling APIC mode: %s. Using %d I/O APICs\n",
395 "NUMA-Q", nr_ioapics);
396}
397
398/*
399 * Skip adding the timer int on secondary nodes, which causes
400 * a small but painful rift in the time-space continuum.
401 */
402static inline int numaq_multi_timer_check(int apic, int irq)
403{
404 return apic != 0 && irq == 0;
405}
406
407static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map)
408{
409 /* We don't have a good way to do this yet - hack */
410 return physids_promote(0xFUL);
411}
412
413/* Mapping from cpu number to logical apicid */
414extern u8 cpu_2_logical_apicid[];
415
416static inline int numaq_cpu_to_logical_apicid(int cpu)
417{
418 if (cpu >= nr_cpu_ids)
419 return BAD_APICID;
420 return (int)cpu_2_logical_apicid[cpu];
421}
422
423/*
424 * Supporting over 60 cpus on NUMA-Q requires a locality-dependent
425 * cpu to APIC ID relation to properly interact with the intelligent
426 * mode of the cluster controller.
427 */
428static inline int numaq_cpu_present_to_apicid(int mps_cpu)
429{
430 if (mps_cpu < 60)
431 return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3));
432 else
433 return BAD_APICID;
434}
435
436static inline int numaq_apicid_to_node(int logical_apicid)
437{
438 return logical_apicid >> 4;
439}
440
441static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid)
442{
443 int node = numaq_apicid_to_node(logical_apicid);
444 int cpu = __ffs(logical_apicid & 0xf);
445
446 return physid_mask_of_physid(cpu + 4*node);
447}
448
449/* Where the IO area was mapped on multiquad, always 0 otherwise */
450void *xquad_portio;
451
452static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid)
453{
454 return 1;
455}
456
457/*
458 * We use physical apicids here, not logical, so just return the default
459 * physical broadcast to stop people from breaking us
460 */
461static inline unsigned int numaq_cpu_mask_to_apicid(const cpumask_t *cpumask)
462{
463 return 0x0F;
464}
465
466static inline unsigned int
467numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
468 const struct cpumask *andmask)
469{
470 return 0x0F;
471}
472
473/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */
474static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb)
475{
476 return cpuid_apic >> index_msb;
477}
478static int __numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
479{
480 numaq_mps_oem_check(mpc, oem, productid);
481 return found_numaq;
482}
483
484static int probe_numaq(void)
485{
486 /* already know from get_memcfg_numaq() */
487 return found_numaq;
488}
489
490static void numaq_vector_allocation_domain(int cpu, cpumask_t *retmask)
491{
492 /* Careful. Some cpus do not strictly honor the set of cpus
493 * specified in the interrupt destination when using lowest
494 * priority interrupt delivery mode.
495 *
496 * In particular there was a hyperthreading cpu observed to
497 * deliver interrupts to the wrong hyperthread when only one
498 * hyperthread was specified in the interrupt desitination.
499 */
500 *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
501}
502
503static void numaq_setup_portio_remap(void)
504{
505 int num_quads = num_online_nodes();
506
507 if (num_quads <= 1)
508 return;
509
510 printk("Remapping cross-quad port I/O for %d quads\n", num_quads);
511 xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD);
512 printk("xquad_portio vaddr 0x%08lx, len %08lx\n",
513 (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD);
514}
515
516struct genapic apic_numaq = {
517
518 .name = "NUMAQ",
519 .probe = probe_numaq,
520 .acpi_madt_oem_check = NULL,
521 .apic_id_registered = numaq_apic_id_registered,
522
523 .irq_delivery_mode = dest_LowestPrio,
524 /* physical delivery on LOCAL quad: */
525 .irq_dest_mode = 0,
526
527 .target_cpus = numaq_target_cpus,
528 .disable_esr = 1,
529 .dest_logical = APIC_DEST_LOGICAL,
530 .check_apicid_used = numaq_check_apicid_used,
531 .check_apicid_present = numaq_check_apicid_present,
532
533 .vector_allocation_domain = numaq_vector_allocation_domain,
534 .init_apic_ldr = numaq_init_apic_ldr,
535
536 .ioapic_phys_id_map = numaq_ioapic_phys_id_map,
537 .setup_apic_routing = numaq_setup_apic_routing,
538 .multi_timer_check = numaq_multi_timer_check,
539 .apicid_to_node = numaq_apicid_to_node,
540 .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid,
541 .cpu_present_to_apicid = numaq_cpu_present_to_apicid,
542 .apicid_to_cpu_present = numaq_apicid_to_cpu_present,
543 .setup_portio_remap = numaq_setup_portio_remap,
544 .check_phys_apicid_present = numaq_check_phys_apicid_present,
545 .enable_apic_mode = NULL,
546 .phys_pkg_id = numaq_phys_pkg_id,
547 .mps_oem_check = __numaq_mps_oem_check,
548
549 .get_apic_id = numaq_get_apic_id,
550 .set_apic_id = NULL,
551 .apic_id_mask = 0x0F << 24,
552
553 .cpu_mask_to_apicid = numaq_cpu_mask_to_apicid,
554 .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and,
555
556 .send_IPI_mask = numaq_send_IPI_mask,
557 .send_IPI_mask_allbutself = NULL,
558 .send_IPI_allbutself = numaq_send_IPI_allbutself,
559 .send_IPI_all = numaq_send_IPI_all,
560 .send_IPI_self = default_send_IPI_self,
561
562 .wakeup_cpu = NULL,
563 .trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW,
564 .trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH,
565
566 /* We don't do anything here because we use NMI's to boot instead */
567 .wait_for_init_deassert = NULL,
568
569 .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic,
570 .store_NMI_vector = numaq_store_NMI_vector,
571 .inquire_remote_apic = NULL,
572};
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 95777b0faa73..3a7c5a44082e 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -26,13 +26,3 @@ struct pv_lock_ops pv_lock_ops = {
26}; 26};
27EXPORT_SYMBOL(pv_lock_ops); 27EXPORT_SYMBOL(pv_lock_ops);
28 28
29void __init paravirt_use_bytelocks(void)
30{
31#ifdef CONFIG_SMP
32 pv_lock_ops.spin_is_locked = __byte_spin_is_locked;
33 pv_lock_ops.spin_is_contended = __byte_spin_is_contended;
34 pv_lock_ops.spin_lock = __byte_spin_lock;
35 pv_lock_ops.spin_trylock = __byte_spin_trylock;
36 pv_lock_ops.spin_unlock = __byte_spin_unlock;
37#endif
38}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e4c8fb608873..6dc4dca255e4 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -44,6 +44,17 @@ void _paravirt_nop(void)
44{ 44{
45} 45}
46 46
47/* identity function, which can be inlined */
48u32 _paravirt_ident_32(u32 x)
49{
50 return x;
51}
52
53u64 _paravirt_ident_64(u64 x)
54{
55 return x;
56}
57
47static void __init default_banner(void) 58static void __init default_banner(void)
48{ 59{
49 printk(KERN_INFO "Booting paravirtualized kernel on %s\n", 60 printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
@@ -138,9 +149,16 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
138 if (opfunc == NULL) 149 if (opfunc == NULL)
139 /* If there's no function, patch it with a ud2a (BUG) */ 150 /* If there's no function, patch it with a ud2a (BUG) */
140 ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a)); 151 ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a));
141 else if (opfunc == paravirt_nop) 152 else if (opfunc == _paravirt_nop)
142 /* If the operation is a nop, then nop the callsite */ 153 /* If the operation is a nop, then nop the callsite */
143 ret = paravirt_patch_nop(); 154 ret = paravirt_patch_nop();
155
156 /* identity functions just return their single argument */
157 else if (opfunc == _paravirt_ident_32)
158 ret = paravirt_patch_ident_32(insnbuf, len);
159 else if (opfunc == _paravirt_ident_64)
160 ret = paravirt_patch_ident_64(insnbuf, len);
161
144 else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || 162 else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
145 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || 163 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
146 type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || 164 type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
@@ -268,6 +286,32 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
268 return __get_cpu_var(paravirt_lazy_mode); 286 return __get_cpu_var(paravirt_lazy_mode);
269} 287}
270 288
289void arch_flush_lazy_mmu_mode(void)
290{
291 preempt_disable();
292
293 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
294 WARN_ON(preempt_count() == 1);
295 arch_leave_lazy_mmu_mode();
296 arch_enter_lazy_mmu_mode();
297 }
298
299 preempt_enable();
300}
301
302void arch_flush_lazy_cpu_mode(void)
303{
304 preempt_disable();
305
306 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
307 WARN_ON(preempt_count() == 1);
308 arch_leave_lazy_cpu_mode();
309 arch_enter_lazy_cpu_mode();
310 }
311
312 preempt_enable();
313}
314
271struct pv_info pv_info = { 315struct pv_info pv_info = {
272 .name = "bare hardware", 316 .name = "bare hardware",
273 .paravirt_enabled = 0, 317 .paravirt_enabled = 0,
@@ -292,10 +336,10 @@ struct pv_time_ops pv_time_ops = {
292 336
293struct pv_irq_ops pv_irq_ops = { 337struct pv_irq_ops pv_irq_ops = {
294 .init_IRQ = native_init_IRQ, 338 .init_IRQ = native_init_IRQ,
295 .save_fl = native_save_fl, 339 .save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
296 .restore_fl = native_restore_fl, 340 .restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl),
297 .irq_disable = native_irq_disable, 341 .irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable),
298 .irq_enable = native_irq_enable, 342 .irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
299 .safe_halt = native_safe_halt, 343 .safe_halt = native_safe_halt,
300 .halt = native_halt, 344 .halt = native_halt,
301#ifdef CONFIG_X86_64 345#ifdef CONFIG_X86_64
@@ -373,6 +417,14 @@ struct pv_apic_ops pv_apic_ops = {
373#endif 417#endif
374}; 418};
375 419
420#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
421/* 32-bit pagetable entries */
422#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_32)
423#else
424/* 64-bit pagetable entries */
425#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64)
426#endif
427
376struct pv_mmu_ops pv_mmu_ops = { 428struct pv_mmu_ops pv_mmu_ops = {
377#ifndef CONFIG_X86_64 429#ifndef CONFIG_X86_64
378 .pagetable_setup_start = native_pagetable_setup_start, 430 .pagetable_setup_start = native_pagetable_setup_start,
@@ -424,22 +476,23 @@ struct pv_mmu_ops pv_mmu_ops = {
424 .pmd_clear = native_pmd_clear, 476 .pmd_clear = native_pmd_clear,
425#endif 477#endif
426 .set_pud = native_set_pud, 478 .set_pud = native_set_pud,
427 .pmd_val = native_pmd_val, 479
428 .make_pmd = native_make_pmd, 480 .pmd_val = PTE_IDENT,
481 .make_pmd = PTE_IDENT,
429 482
430#if PAGETABLE_LEVELS == 4 483#if PAGETABLE_LEVELS == 4
431 .pud_val = native_pud_val, 484 .pud_val = PTE_IDENT,
432 .make_pud = native_make_pud, 485 .make_pud = PTE_IDENT,
486
433 .set_pgd = native_set_pgd, 487 .set_pgd = native_set_pgd,
434#endif 488#endif
435#endif /* PAGETABLE_LEVELS >= 3 */ 489#endif /* PAGETABLE_LEVELS >= 3 */
436 490
437 .pte_val = native_pte_val, 491 .pte_val = PTE_IDENT,
438 .pte_flags = native_pte_flags, 492 .pgd_val = PTE_IDENT,
439 .pgd_val = native_pgd_val,
440 493
441 .make_pte = native_make_pte, 494 .make_pte = PTE_IDENT,
442 .make_pgd = native_make_pgd, 495 .make_pgd = PTE_IDENT,
443 496
444 .dup_mmap = paravirt_nop, 497 .dup_mmap = paravirt_nop,
445 .exit_mmap = paravirt_nop, 498 .exit_mmap = paravirt_nop,
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index 9fe644f4861d..d9f32e6d6ab6 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -12,6 +12,18 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
12DEF_NATIVE(pv_cpu_ops, clts, "clts"); 12DEF_NATIVE(pv_cpu_ops, clts, "clts");
13DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); 13DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
14 14
15unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
16{
17 /* arg in %eax, return in %eax */
18 return 0;
19}
20
21unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
22{
23 /* arg in %edx:%eax, return in %edx:%eax */
24 return 0;
25}
26
15unsigned native_patch(u8 type, u16 clobbers, void *ibuf, 27unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
16 unsigned long addr, unsigned len) 28 unsigned long addr, unsigned len)
17{ 29{
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 061d01df9ae6..3f08f34f93eb 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -19,6 +19,21 @@ DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
19DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl"); 19DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl");
20DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); 20DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
21 21
22DEF_NATIVE(, mov32, "mov %edi, %eax");
23DEF_NATIVE(, mov64, "mov %rdi, %rax");
24
25unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
26{
27 return paravirt_patch_insns(insnbuf, len,
28 start__mov32, end__mov32);
29}
30
31unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
32{
33 return paravirt_patch_insns(insnbuf, len,
34 start__mov64, end__mov64);
35}
36
22unsigned native_patch(u8 type, u16 clobbers, void *ibuf, 37unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
23 unsigned long addr, unsigned len) 38 unsigned long addr, unsigned len)
24{ 39{
diff --git a/arch/x86/kernel/probe_32.c b/arch/x86/kernel/probe_32.c
new file mode 100644
index 000000000000..22337b75de62
--- /dev/null
+++ b/arch/x86/kernel/probe_32.c
@@ -0,0 +1,411 @@
1/*
2 * Default generic APIC driver. This handles up to 8 CPUs.
3 *
4 * Copyright 2003 Andi Kleen, SuSE Labs.
5 * Subject to the GNU Public License, v.2
6 *
7 * Generic x86 APIC driver probe layer.
8 */
9#include <linux/threads.h>
10#include <linux/cpumask.h>
11#include <linux/string.h>
12#include <linux/kernel.h>
13#include <linux/ctype.h>
14#include <linux/init.h>
15#include <linux/errno.h>
16#include <asm/fixmap.h>
17#include <asm/mpspec.h>
18#include <asm/apicdef.h>
19#include <asm/genapic.h>
20#include <asm/setup.h>
21
22#include <linux/threads.h>
23#include <linux/cpumask.h>
24#include <asm/mpspec.h>
25#include <asm/genapic.h>
26#include <asm/fixmap.h>
27#include <asm/apicdef.h>
28#include <linux/kernel.h>
29#include <linux/string.h>
30#include <linux/smp.h>
31#include <linux/init.h>
32#include <asm/genapic.h>
33#include <asm/ipi.h>
34
35#include <linux/smp.h>
36#include <linux/init.h>
37#include <linux/interrupt.h>
38#include <asm/acpi.h>
39#include <asm/arch_hooks.h>
40#include <asm/e820.h>
41#include <asm/setup.h>
42
43#include <asm/genapic.h>
44
45#ifdef CONFIG_HOTPLUG_CPU
46#define DEFAULT_SEND_IPI (1)
47#else
48#define DEFAULT_SEND_IPI (0)
49#endif
50
51int no_broadcast = DEFAULT_SEND_IPI;
52
53#ifdef CONFIG_X86_LOCAL_APIC
54
55static void default_vector_allocation_domain(int cpu, struct cpumask *retmask)
56{
57 /*
58 * Careful. Some cpus do not strictly honor the set of cpus
59 * specified in the interrupt destination when using lowest
60 * priority interrupt delivery mode.
61 *
62 * In particular there was a hyperthreading cpu observed to
63 * deliver interrupts to the wrong hyperthread when only one
64 * hyperthread was specified in the interrupt desitination.
65 */
66 *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } };
67}
68
69/* should be called last. */
70static int probe_default(void)
71{
72 return 1;
73}
74
75struct genapic apic_default = {
76
77 .name = "default",
78 .probe = probe_default,
79 .acpi_madt_oem_check = NULL,
80 .apic_id_registered = default_apic_id_registered,
81
82 .irq_delivery_mode = dest_LowestPrio,
83 /* logical delivery broadcast to all CPUs: */
84 .irq_dest_mode = 1,
85
86 .target_cpus = default_target_cpus,
87 .disable_esr = 0,
88 .dest_logical = APIC_DEST_LOGICAL,
89 .check_apicid_used = default_check_apicid_used,
90 .check_apicid_present = default_check_apicid_present,
91
92 .vector_allocation_domain = default_vector_allocation_domain,
93 .init_apic_ldr = default_init_apic_ldr,
94
95 .ioapic_phys_id_map = default_ioapic_phys_id_map,
96 .setup_apic_routing = default_setup_apic_routing,
97 .multi_timer_check = NULL,
98 .apicid_to_node = default_apicid_to_node,
99 .cpu_to_logical_apicid = default_cpu_to_logical_apicid,
100 .cpu_present_to_apicid = default_cpu_present_to_apicid,
101 .apicid_to_cpu_present = default_apicid_to_cpu_present,
102 .setup_portio_remap = NULL,
103 .check_phys_apicid_present = default_check_phys_apicid_present,
104 .enable_apic_mode = NULL,
105 .phys_pkg_id = default_phys_pkg_id,
106 .mps_oem_check = NULL,
107
108 .get_apic_id = default_get_apic_id,
109 .set_apic_id = NULL,
110 .apic_id_mask = 0x0F << 24,
111
112 .cpu_mask_to_apicid = default_cpu_mask_to_apicid,
113 .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
114
115 .send_IPI_mask = default_send_IPI_mask_logical,
116 .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical,
117 .send_IPI_allbutself = default_send_IPI_allbutself,
118 .send_IPI_all = default_send_IPI_all,
119 .send_IPI_self = default_send_IPI_self,
120
121 .wakeup_cpu = NULL,
122 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
123 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
124
125 .wait_for_init_deassert = default_wait_for_init_deassert,
126
127 .smp_callin_clear_local_apic = NULL,
128 .store_NMI_vector = NULL,
129 .inquire_remote_apic = default_inquire_remote_apic,
130};
131
132extern struct genapic apic_numaq;
133extern struct genapic apic_summit;
134extern struct genapic apic_bigsmp;
135extern struct genapic apic_es7000;
136extern struct genapic apic_default;
137
138struct genapic *apic = &apic_default;
139
140static struct genapic *apic_probe[] __initdata = {
141#ifdef CONFIG_X86_NUMAQ
142 &apic_numaq,
143#endif
144#ifdef CONFIG_X86_SUMMIT
145 &apic_summit,
146#endif
147#ifdef CONFIG_X86_BIGSMP
148 &apic_bigsmp,
149#endif
150#ifdef CONFIG_X86_ES7000
151 &apic_es7000,
152#endif
153 &apic_default, /* must be last */
154 NULL,
155};
156
157static int cmdline_apic __initdata;
158static int __init parse_apic(char *arg)
159{
160 int i;
161
162 if (!arg)
163 return -EINVAL;
164
165 for (i = 0; apic_probe[i]; i++) {
166 if (!strcmp(apic_probe[i]->name, arg)) {
167 apic = apic_probe[i];
168 cmdline_apic = 1;
169 return 0;
170 }
171 }
172
173 if (x86_quirks->update_genapic)
174 x86_quirks->update_genapic();
175
176 /* Parsed again by __setup for debug/verbose */
177 return 0;
178}
179early_param("apic", parse_apic);
180
181void __init generic_bigsmp_probe(void)
182{
183#ifdef CONFIG_X86_BIGSMP
184 /*
185 * This routine is used to switch to bigsmp mode when
186 * - There is no apic= option specified by the user
187 * - generic_apic_probe() has chosen apic_default as the sub_arch
188 * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support
189 */
190
191 if (!cmdline_apic && apic == &apic_default) {
192 if (apic_bigsmp.probe()) {
193 apic = &apic_bigsmp;
194 if (x86_quirks->update_genapic)
195 x86_quirks->update_genapic();
196 printk(KERN_INFO "Overriding APIC driver with %s\n",
197 apic->name);
198 }
199 }
200#endif
201}
202
203void __init generic_apic_probe(void)
204{
205 if (!cmdline_apic) {
206 int i;
207 for (i = 0; apic_probe[i]; i++) {
208 if (apic_probe[i]->probe()) {
209 apic = apic_probe[i];
210 break;
211 }
212 }
213 /* Not visible without early console */
214 if (!apic_probe[i])
215 panic("Didn't find an APIC driver");
216
217 if (x86_quirks->update_genapic)
218 x86_quirks->update_genapic();
219 }
220 printk(KERN_INFO "Using APIC driver %s\n", apic->name);
221}
222
223/* These functions can switch the APIC even after the initial ->probe() */
224
225int __init
226generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
227{
228 int i;
229
230 for (i = 0; apic_probe[i]; ++i) {
231 if (!apic_probe[i]->mps_oem_check)
232 continue;
233 if (!apic_probe[i]->mps_oem_check(mpc, oem, productid))
234 continue;
235
236 if (!cmdline_apic) {
237 apic = apic_probe[i];
238 if (x86_quirks->update_genapic)
239 x86_quirks->update_genapic();
240 printk(KERN_INFO "Switched to APIC driver `%s'.\n",
241 apic->name);
242 }
243 return 1;
244 }
245 return 0;
246}
247
248int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
249{
250 int i;
251
252 for (i = 0; apic_probe[i]; ++i) {
253 if (!apic_probe[i]->acpi_madt_oem_check)
254 continue;
255 if (!apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id))
256 continue;
257
258 if (!cmdline_apic) {
259 apic = apic_probe[i];
260 if (x86_quirks->update_genapic)
261 x86_quirks->update_genapic();
262 printk(KERN_INFO "Switched to APIC driver `%s'.\n",
263 apic->name);
264 }
265 return 1;
266 }
267 return 0;
268}
269
270#endif /* CONFIG_X86_LOCAL_APIC */
271
272/**
273 * pre_intr_init_hook - initialisation prior to setting up interrupt vectors
274 *
275 * Description:
276 * Perform any necessary interrupt initialisation prior to setting up
277 * the "ordinary" interrupt call gates. For legacy reasons, the ISA
278 * interrupts should be initialised here if the machine emulates a PC
279 * in any way.
280 **/
281void __init pre_intr_init_hook(void)
282{
283 if (x86_quirks->arch_pre_intr_init) {
284 if (x86_quirks->arch_pre_intr_init())
285 return;
286 }
287 init_ISA_irqs();
288}
289
290/**
291 * intr_init_hook - post gate setup interrupt initialisation
292 *
293 * Description:
294 * Fill in any interrupts that may have been left out by the general
295 * init_IRQ() routine. interrupts having to do with the machine rather
296 * than the devices on the I/O bus (like APIC interrupts in intel MP
297 * systems) are started here.
298 **/
299void __init intr_init_hook(void)
300{
301 if (x86_quirks->arch_intr_init) {
302 if (x86_quirks->arch_intr_init())
303 return;
304 }
305}
306
307/**
308 * pre_setup_arch_hook - hook called prior to any setup_arch() execution
309 *
310 * Description:
311 * generally used to activate any machine specific identification
312 * routines that may be needed before setup_arch() runs. On Voyager
313 * this is used to get the board revision and type.
314 **/
315void __init pre_setup_arch_hook(void)
316{
317}
318
319/**
320 * trap_init_hook - initialise system specific traps
321 *
322 * Description:
323 * Called as the final act of trap_init(). Used in VISWS to initialise
324 * the various board specific APIC traps.
325 **/
326void __init trap_init_hook(void)
327{
328 if (x86_quirks->arch_trap_init) {
329 if (x86_quirks->arch_trap_init())
330 return;
331 }
332}
333
334static struct irqaction irq0 = {
335 .handler = timer_interrupt,
336 .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL,
337 .mask = CPU_MASK_NONE,
338 .name = "timer"
339};
340
341/**
342 * pre_time_init_hook - do any specific initialisations before.
343 *
344 **/
345void __init pre_time_init_hook(void)
346{
347 if (x86_quirks->arch_pre_time_init)
348 x86_quirks->arch_pre_time_init();
349}
350
351/**
352 * time_init_hook - do any specific initialisations for the system timer.
353 *
354 * Description:
355 * Must plug the system timer interrupt source at HZ into the IRQ listed
356 * in irq_vectors.h:TIMER_IRQ
357 **/
358void __init time_init_hook(void)
359{
360 if (x86_quirks->arch_time_init) {
361 /*
362 * A nonzero return code does not mean failure, it means
363 * that the architecture quirk does not want any
364 * generic (timer) setup to be performed after this:
365 */
366 if (x86_quirks->arch_time_init())
367 return;
368 }
369
370 irq0.mask = cpumask_of_cpu(0);
371 setup_irq(0, &irq0);
372}
373
374#ifdef CONFIG_MCA
375/**
376 * mca_nmi_hook - hook into MCA specific NMI chain
377 *
378 * Description:
379 * The MCA (Microchannel Architecture) has an NMI chain for NMI sources
380 * along the MCA bus. Use this to hook into that chain if you will need
381 * it.
382 **/
383void mca_nmi_hook(void)
384{
385 /*
386 * If I recall correctly, there's a whole bunch of other things that
387 * we can do to check for NMI problems, but that's all I know about
388 * at the moment.
389 */
390 pr_warning("NMI generated from unknown source!\n");
391}
392#endif
393
394static __init int no_ipi_broadcast(char *str)
395{
396 get_option(&str, &no_broadcast);
397 pr_info("Using %s mode\n",
398 no_broadcast ? "No IPI Broadcast" : "IPI Broadcast");
399 return 1;
400}
401__setup("no_ipi_broadcast=", no_ipi_broadcast);
402
403static int __init print_ipi_mode(void)
404{
405 pr_info("Using IPI %s mode\n",
406 no_broadcast ? "No-Shortcut" : "Shortcut");
407 return 0;
408}
409
410late_initcall(print_ipi_mode);
411
diff --git a/arch/x86/kernel/probe_roms_32.c b/arch/x86/kernel/probe_roms_32.c
index 675a48c404a5..071e7fea42e5 100644
--- a/arch/x86/kernel/probe_roms_32.c
+++ b/arch/x86/kernel/probe_roms_32.c
@@ -18,7 +18,7 @@
18#include <asm/setup.h> 18#include <asm/setup.h>
19#include <asm/sections.h> 19#include <asm/sections.h>
20#include <asm/io.h> 20#include <asm/io.h>
21#include <setup_arch.h> 21#include <asm/setup_arch.h>
22 22
23static struct resource system_rom_resource = { 23static struct resource system_rom_resource = {
24 .name = "System ROM", 24 .name = "System ROM",
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 6d12f7e37f8c..87b69d4fac16 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -350,7 +350,7 @@ static void c1e_idle(void)
350 350
351void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) 351void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
352{ 352{
353#ifdef CONFIG_X86_SMP 353#ifdef CONFIG_SMP
354 if (pm_idle == poll_idle && smp_num_siblings > 1) { 354 if (pm_idle == poll_idle && smp_num_siblings > 1) {
355 printk(KERN_WARNING "WARNING: polling idle and HT enabled," 355 printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
356 " performance may degrade.\n"); 356 " performance may degrade.\n");
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 1a1ae8edc40c..fec79ad85dc6 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -11,6 +11,7 @@
11 11
12#include <stdarg.h> 12#include <stdarg.h>
13 13
14#include <linux/stackprotector.h>
14#include <linux/cpu.h> 15#include <linux/cpu.h>
15#include <linux/errno.h> 16#include <linux/errno.h>
16#include <linux/sched.h> 17#include <linux/sched.h>
@@ -91,6 +92,15 @@ void cpu_idle(void)
91{ 92{
92 int cpu = smp_processor_id(); 93 int cpu = smp_processor_id();
93 94
95 /*
96 * If we're the non-boot CPU, nothing set the stack canary up
97 * for us. CPU0 already has it initialized but no harm in
98 * doing it again. This is a good place for updating it, as
99 * we wont ever return from this function (so the invalid
100 * canaries already on the stack wont ever trigger).
101 */
102 boot_init_stack_canary();
103
94 current_thread_info()->status |= TS_POLLING; 104 current_thread_info()->status |= TS_POLLING;
95 105
96 /* endless idle loop with no priority at all */ 106 /* endless idle loop with no priority at all */
@@ -131,7 +141,7 @@ void __show_regs(struct pt_regs *regs, int all)
131 if (user_mode_vm(regs)) { 141 if (user_mode_vm(regs)) {
132 sp = regs->sp; 142 sp = regs->sp;
133 ss = regs->ss & 0xffff; 143 ss = regs->ss & 0xffff;
134 savesegment(gs, gs); 144 gs = get_user_gs(regs);
135 } else { 145 } else {
136 sp = (unsigned long) (&regs->sp); 146 sp = (unsigned long) (&regs->sp);
137 savesegment(ss, ss); 147 savesegment(ss, ss);
@@ -212,6 +222,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
212 regs.ds = __USER_DS; 222 regs.ds = __USER_DS;
213 regs.es = __USER_DS; 223 regs.es = __USER_DS;
214 regs.fs = __KERNEL_PERCPU; 224 regs.fs = __KERNEL_PERCPU;
225 regs.gs = __KERNEL_STACK_CANARY;
215 regs.orig_ax = -1; 226 regs.orig_ax = -1;
216 regs.ip = (unsigned long) kernel_thread_helper; 227 regs.ip = (unsigned long) kernel_thread_helper;
217 regs.cs = __KERNEL_CS | get_kernel_rpl(); 228 regs.cs = __KERNEL_CS | get_kernel_rpl();
@@ -304,7 +315,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
304 315
305 p->thread.ip = (unsigned long) ret_from_fork; 316 p->thread.ip = (unsigned long) ret_from_fork;
306 317
307 savesegment(gs, p->thread.gs); 318 task_user_gs(p) = get_user_gs(regs);
308 319
309 tsk = current; 320 tsk = current;
310 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { 321 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
@@ -342,7 +353,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
342void 353void
343start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) 354start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
344{ 355{
345 __asm__("movl %0, %%gs" : : "r"(0)); 356 set_user_gs(regs, 0);
346 regs->fs = 0; 357 regs->fs = 0;
347 set_fs(USER_DS); 358 set_fs(USER_DS);
348 regs->ds = __USER_DS; 359 regs->ds = __USER_DS;
@@ -539,7 +550,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
539 * used %fs or %gs (it does not today), or if the kernel is 550 * used %fs or %gs (it does not today), or if the kernel is
540 * running inside of a hypervisor layer. 551 * running inside of a hypervisor layer.
541 */ 552 */
542 savesegment(gs, prev->gs); 553 lazy_save_gs(prev->gs);
543 554
544 /* 555 /*
545 * Load the per-thread Thread-Local Storage descriptor. 556 * Load the per-thread Thread-Local Storage descriptor.
@@ -585,31 +596,31 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
585 * Restore %gs if needed (which is common) 596 * Restore %gs if needed (which is common)
586 */ 597 */
587 if (prev->gs | next->gs) 598 if (prev->gs | next->gs)
588 loadsegment(gs, next->gs); 599 lazy_load_gs(next->gs);
589 600
590 percpu_write(current_task, next_p); 601 percpu_write(current_task, next_p);
591 602
592 return prev_p; 603 return prev_p;
593} 604}
594 605
595asmlinkage int sys_fork(struct pt_regs regs) 606int sys_fork(struct pt_regs *regs)
596{ 607{
597 return do_fork(SIGCHLD, regs.sp, &regs, 0, NULL, NULL); 608 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
598} 609}
599 610
600asmlinkage int sys_clone(struct pt_regs regs) 611int sys_clone(struct pt_regs *regs)
601{ 612{
602 unsigned long clone_flags; 613 unsigned long clone_flags;
603 unsigned long newsp; 614 unsigned long newsp;
604 int __user *parent_tidptr, *child_tidptr; 615 int __user *parent_tidptr, *child_tidptr;
605 616
606 clone_flags = regs.bx; 617 clone_flags = regs->bx;
607 newsp = regs.cx; 618 newsp = regs->cx;
608 parent_tidptr = (int __user *)regs.dx; 619 parent_tidptr = (int __user *)regs->dx;
609 child_tidptr = (int __user *)regs.di; 620 child_tidptr = (int __user *)regs->di;
610 if (!newsp) 621 if (!newsp)
611 newsp = regs.sp; 622 newsp = regs->sp;
612 return do_fork(clone_flags, newsp, &regs, 0, parent_tidptr, child_tidptr); 623 return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr);
613} 624}
614 625
615/* 626/*
@@ -622,27 +633,27 @@ asmlinkage int sys_clone(struct pt_regs regs)
622 * do not have enough call-clobbered registers to hold all 633 * do not have enough call-clobbered registers to hold all
623 * the information you need. 634 * the information you need.
624 */ 635 */
625asmlinkage int sys_vfork(struct pt_regs regs) 636int sys_vfork(struct pt_regs *regs)
626{ 637{
627 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.sp, &regs, 0, NULL, NULL); 638 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, NULL, NULL);
628} 639}
629 640
630/* 641/*
631 * sys_execve() executes a new program. 642 * sys_execve() executes a new program.
632 */ 643 */
633asmlinkage int sys_execve(struct pt_regs regs) 644int sys_execve(struct pt_regs *regs)
634{ 645{
635 int error; 646 int error;
636 char *filename; 647 char *filename;
637 648
638 filename = getname((char __user *) regs.bx); 649 filename = getname((char __user *) regs->bx);
639 error = PTR_ERR(filename); 650 error = PTR_ERR(filename);
640 if (IS_ERR(filename)) 651 if (IS_ERR(filename))
641 goto out; 652 goto out;
642 error = do_execve(filename, 653 error = do_execve(filename,
643 (char __user * __user *) regs.cx, 654 (char __user * __user *) regs->cx,
644 (char __user * __user *) regs.dx, 655 (char __user * __user *) regs->dx,
645 &regs); 656 regs);
646 if (error == 0) { 657 if (error == 0) {
647 /* Make sure we don't return using sysenter.. */ 658 /* Make sure we don't return using sysenter.. */
648 set_thread_flag(TIF_IRET); 659 set_thread_flag(TIF_IRET);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 8eb169e45584..836ef6575f01 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -120,12 +120,11 @@ void cpu_idle(void)
120 current_thread_info()->status |= TS_POLLING; 120 current_thread_info()->status |= TS_POLLING;
121 121
122 /* 122 /*
123 * If we're the non-boot CPU, nothing set the PDA stack 123 * If we're the non-boot CPU, nothing set the stack canary up
124 * canary up for us - and if we are the boot CPU we have 124 * for us. CPU0 already has it initialized but no harm in
125 * a 0 stack canary. This is a good place for updating 125 * doing it again. This is a good place for updating it, as
126 * it, as we wont ever return from this function (so the 126 * we wont ever return from this function (so the invalid
127 * invalid canaries already on the stack wont ever 127 * canaries already on the stack wont ever trigger).
128 * trigger):
129 */ 128 */
130 boot_init_stack_canary(); 129 boot_init_stack_canary();
131 130
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0a5df5f82fb9..d2f7cd5b2c83 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -75,10 +75,7 @@ static inline bool invalid_selector(u16 value)
75static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) 75static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
76{ 76{
77 BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); 77 BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
78 regno >>= 2; 78 return &regs->bx + (regno >> 2);
79 if (regno > FS)
80 --regno;
81 return &regs->bx + regno;
82} 79}
83 80
84static u16 get_segment_reg(struct task_struct *task, unsigned long offset) 81static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
@@ -90,9 +87,10 @@ static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
90 if (offset != offsetof(struct user_regs_struct, gs)) 87 if (offset != offsetof(struct user_regs_struct, gs))
91 retval = *pt_regs_access(task_pt_regs(task), offset); 88 retval = *pt_regs_access(task_pt_regs(task), offset);
92 else { 89 else {
93 retval = task->thread.gs;
94 if (task == current) 90 if (task == current)
95 savesegment(gs, retval); 91 retval = get_user_gs(task_pt_regs(task));
92 else
93 retval = task_user_gs(task);
96 } 94 }
97 return retval; 95 return retval;
98} 96}
@@ -126,13 +124,10 @@ static int set_segment_reg(struct task_struct *task,
126 break; 124 break;
127 125
128 case offsetof(struct user_regs_struct, gs): 126 case offsetof(struct user_regs_struct, gs):
129 task->thread.gs = value;
130 if (task == current) 127 if (task == current)
131 /* 128 set_user_gs(task_pt_regs(task), value);
132 * The user-mode %gs is not affected by 129 else
133 * kernel entry, so we must update the CPU. 130 task_user_gs(task) = value;
134 */
135 loadsegment(gs, value);
136 } 131 }
137 132
138 return 0; 133 return 0;
@@ -810,12 +805,16 @@ static void ptrace_bts_untrace(struct task_struct *child)
810 805
811static void ptrace_bts_detach(struct task_struct *child) 806static void ptrace_bts_detach(struct task_struct *child)
812{ 807{
813 if (unlikely(child->bts)) { 808 /*
814 ds_release_bts(child->bts); 809 * Ptrace_detach() races with ptrace_untrace() in case
815 child->bts = NULL; 810 * the child dies and is reaped by another thread.
816 811 *
817 ptrace_bts_free_buffer(child); 812 * We only do the memory accounting at this point and
818 } 813 * leave the buffer deallocation and the bts tracer
814 * release to ptrace_bts_untrace() which will be called
815 * later on with tasklist_lock held.
816 */
817 release_locked_buffer(child->bts_buffer, child->bts_size);
819} 818}
820#else 819#else
821static inline void ptrace_bts_fork(struct task_struct *tsk) {} 820static inline void ptrace_bts_fork(struct task_struct *tsk) {}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index f8536fee5c12..32e8f0af292c 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -24,7 +24,7 @@
24# include <asm/iommu.h> 24# include <asm/iommu.h>
25#endif 25#endif
26 26
27#include <mach_ipi.h> 27#include <asm/genapic.h>
28 28
29/* 29/*
30 * Power off function, if any 30 * Power off function, if any
@@ -651,7 +651,7 @@ static int crash_nmi_callback(struct notifier_block *self,
651 651
652static void smp_send_nmi_allbutself(void) 652static void smp_send_nmi_allbutself(void)
653{ 653{
654 send_IPI_allbutself(NMI_VECTOR); 654 apic->send_IPI_allbutself(NMI_VECTOR);
655} 655}
656 656
657static struct notifier_block crash_nmi_nb = { 657static struct notifier_block crash_nmi_nb = {
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index f5afe665a82b..b0bbdd4829c9 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -29,122 +29,6 @@ relocate_kernel:
29 * %rdx start address 29 * %rdx start address
30 */ 30 */
31 31
32 /* map the control page at its virtual address */
33
34 movq $0x0000ff8000000000, %r10 /* mask */
35 mov $(39 - 3), %cl /* bits to shift */
36 movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
37
38 movq %r11, %r9
39 andq %r10, %r9
40 shrq %cl, %r9
41
42 movq PTR(VA_PGD)(%rsi), %r8
43 addq %r8, %r9
44 movq PTR(PA_PUD_0)(%rsi), %r8
45 orq $PAGE_ATTR, %r8
46 movq %r8, (%r9)
47
48 shrq $9, %r10
49 sub $9, %cl
50
51 movq %r11, %r9
52 andq %r10, %r9
53 shrq %cl, %r9
54
55 movq PTR(VA_PUD_0)(%rsi), %r8
56 addq %r8, %r9
57 movq PTR(PA_PMD_0)(%rsi), %r8
58 orq $PAGE_ATTR, %r8
59 movq %r8, (%r9)
60
61 shrq $9, %r10
62 sub $9, %cl
63
64 movq %r11, %r9
65 andq %r10, %r9
66 shrq %cl, %r9
67
68 movq PTR(VA_PMD_0)(%rsi), %r8
69 addq %r8, %r9
70 movq PTR(PA_PTE_0)(%rsi), %r8
71 orq $PAGE_ATTR, %r8
72 movq %r8, (%r9)
73
74 shrq $9, %r10
75 sub $9, %cl
76
77 movq %r11, %r9
78 andq %r10, %r9
79 shrq %cl, %r9
80
81 movq PTR(VA_PTE_0)(%rsi), %r8
82 addq %r8, %r9
83 movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
84 orq $PAGE_ATTR, %r8
85 movq %r8, (%r9)
86
87 /* identity map the control page at its physical address */
88
89 movq $0x0000ff8000000000, %r10 /* mask */
90 mov $(39 - 3), %cl /* bits to shift */
91 movq PTR(PA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
92
93 movq %r11, %r9
94 andq %r10, %r9
95 shrq %cl, %r9
96
97 movq PTR(VA_PGD)(%rsi), %r8
98 addq %r8, %r9
99 movq PTR(PA_PUD_1)(%rsi), %r8
100 orq $PAGE_ATTR, %r8
101 movq %r8, (%r9)
102
103 shrq $9, %r10
104 sub $9, %cl
105
106 movq %r11, %r9
107 andq %r10, %r9
108 shrq %cl, %r9
109
110 movq PTR(VA_PUD_1)(%rsi), %r8
111 addq %r8, %r9
112 movq PTR(PA_PMD_1)(%rsi), %r8
113 orq $PAGE_ATTR, %r8
114 movq %r8, (%r9)
115
116 shrq $9, %r10
117 sub $9, %cl
118
119 movq %r11, %r9
120 andq %r10, %r9
121 shrq %cl, %r9
122
123 movq PTR(VA_PMD_1)(%rsi), %r8
124 addq %r8, %r9
125 movq PTR(PA_PTE_1)(%rsi), %r8
126 orq $PAGE_ATTR, %r8
127 movq %r8, (%r9)
128
129 shrq $9, %r10
130 sub $9, %cl
131
132 movq %r11, %r9
133 andq %r10, %r9
134 shrq %cl, %r9
135
136 movq PTR(VA_PTE_1)(%rsi), %r8
137 addq %r8, %r9
138 movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
139 orq $PAGE_ATTR, %r8
140 movq %r8, (%r9)
141
142relocate_new_kernel:
143 /* %rdi indirection_page
144 * %rsi page_list
145 * %rdx start address
146 */
147
148 /* zero out flags, and disable interrupts */ 32 /* zero out flags, and disable interrupts */
149 pushq $0 33 pushq $0
150 popfq 34 popfq
@@ -156,9 +40,8 @@ relocate_new_kernel:
156 /* get physical address of page table now too */ 40 /* get physical address of page table now too */
157 movq PTR(PA_TABLE_PAGE)(%rsi), %rcx 41 movq PTR(PA_TABLE_PAGE)(%rsi), %rcx
158 42
159 /* switch to new set of page tables */ 43 /* Switch to the identity mapped page tables */
160 movq PTR(PA_PGD)(%rsi), %r9 44 movq %rcx, %cr3
161 movq %r9, %cr3
162 45
163 /* setup a new stack at the end of the physical control page */ 46 /* setup a new stack at the end of the physical control page */
164 lea PAGE_SIZE(%r8), %rsp 47 lea PAGE_SIZE(%r8), %rsp
@@ -194,9 +77,7 @@ identity_mapped:
194 jmp 1f 77 jmp 1f
1951: 781:
196 79
197 /* Switch to the identity mapped page tables, 80 /* Flush the TLB (needed?) */
198 * and flush the TLB.
199 */
200 movq %rcx, %cr3 81 movq %rcx, %cr3
201 82
202 /* Do the copies */ 83 /* Do the copies */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index d5d6693b706d..8fce6c714514 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -81,7 +81,7 @@
81#include <asm/io_apic.h> 81#include <asm/io_apic.h>
82#include <asm/ist.h> 82#include <asm/ist.h>
83#include <asm/vmi.h> 83#include <asm/vmi.h>
84#include <setup_arch.h> 84#include <asm/setup_arch.h>
85#include <asm/bios_ebda.h> 85#include <asm/bios_ebda.h>
86#include <asm/cacheflush.h> 86#include <asm/cacheflush.h>
87#include <asm/processor.h> 87#include <asm/processor.h>
@@ -97,7 +97,7 @@
97#include <asm/mmu_context.h> 97#include <asm/mmu_context.h>
98#include <asm/proto.h> 98#include <asm/proto.h>
99 99
100#include <mach_apic.h> 100#include <asm/genapic.h>
101#include <asm/paravirt.h> 101#include <asm/paravirt.h>
102#include <asm/hypervisor.h> 102#include <asm/hypervisor.h>
103 103
@@ -112,6 +112,20 @@
112#define ARCH_SETUP 112#define ARCH_SETUP
113#endif 113#endif
114 114
115unsigned int boot_cpu_id __read_mostly;
116
117#ifdef CONFIG_X86_64
118int default_cpu_present_to_apicid(int mps_cpu)
119{
120 return __default_cpu_present_to_apicid(mps_cpu);
121}
122
123int default_check_phys_apicid_present(int boot_cpu_physical_apicid)
124{
125 return __default_check_phys_apicid_present(boot_cpu_physical_apicid);
126}
127#endif
128
115#ifndef CONFIG_DEBUG_BOOT_PARAMS 129#ifndef CONFIG_DEBUG_BOOT_PARAMS
116struct boot_params __initdata boot_params; 130struct boot_params __initdata boot_params;
117#else 131#else
@@ -588,10 +602,9 @@ early_param("elfcorehdr", setup_elfcorehdr);
588 602
589static int __init default_update_genapic(void) 603static int __init default_update_genapic(void)
590{ 604{
591#ifdef CONFIG_X86_SMP 605#ifdef CONFIG_SMP
592# if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64) 606 if (!apic->wakeup_cpu)
593 genapic->wakeup_cpu = wakeup_secondary_cpu_via_init; 607 apic->wakeup_cpu = wakeup_secondary_cpu_via_init;
594# endif
595#endif 608#endif
596 609
597 return 0; 610 return 0;
@@ -892,12 +905,11 @@ void __init setup_arch(char **cmdline_p)
892 */ 905 */
893 acpi_reserve_bootmem(); 906 acpi_reserve_bootmem();
894#endif 907#endif
895#ifdef CONFIG_X86_FIND_SMP_CONFIG
896 /* 908 /*
897 * Find and reserve possible boot-time SMP configuration: 909 * Find and reserve possible boot-time SMP configuration:
898 */ 910 */
899 find_smp_config(); 911 find_smp_config();
900#endif 912
901 reserve_crashkernel(); 913 reserve_crashkernel();
902 914
903#ifdef CONFIG_X86_64 915#ifdef CONFIG_X86_64
@@ -924,9 +936,7 @@ void __init setup_arch(char **cmdline_p)
924 map_vsyscall(); 936 map_vsyscall();
925#endif 937#endif
926 938
927#ifdef CONFIG_X86_GENERICARCH
928 generic_apic_probe(); 939 generic_apic_probe();
929#endif
930 940
931 early_quirks(); 941 early_quirks();
932 942
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index e553803cd2db..d992e6cff730 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -15,6 +15,8 @@
15#include <asm/highmem.h> 15#include <asm/highmem.h>
16#include <asm/proto.h> 16#include <asm/proto.h>
17#include <asm/cpumask.h> 17#include <asm/cpumask.h>
18#include <asm/cpu.h>
19#include <asm/stackprotector.h>
18 20
19#ifdef CONFIG_DEBUG_PER_CPU_MAPS 21#ifdef CONFIG_DEBUG_PER_CPU_MAPS
20# define DBG(x...) printk(KERN_DEBUG x) 22# define DBG(x...) printk(KERN_DEBUG x)
@@ -22,118 +24,36 @@
22# define DBG(x...) 24# define DBG(x...)
23#endif 25#endif
24 26
25/*
26 * Could be inside CONFIG_HAVE_SETUP_PER_CPU_AREA with other stuff but
27 * voyager wants cpu_number too.
28 */
29#ifdef CONFIG_SMP
30DEFINE_PER_CPU(int, cpu_number); 27DEFINE_PER_CPU(int, cpu_number);
31EXPORT_PER_CPU_SYMBOL(cpu_number); 28EXPORT_PER_CPU_SYMBOL(cpu_number);
32#endif
33
34#ifdef CONFIG_X86_LOCAL_APIC
35unsigned int num_processors;
36unsigned disabled_cpus __cpuinitdata;
37/* Processor that is doing the boot up */
38unsigned int boot_cpu_physical_apicid = -1U;
39EXPORT_SYMBOL(boot_cpu_physical_apicid);
40unsigned int max_physical_apicid;
41
42/* Bitmask of physically existing CPUs */
43physid_mask_t phys_cpu_present_map;
44#endif
45
46/*
47 * Map cpu index to physical APIC ID
48 */
49DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
50DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
51EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
52EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
53
54#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
55#define X86_64_NUMA 1 /* (used later) */
56DEFINE_PER_CPU(int, node_number) = 0;
57EXPORT_PER_CPU_SYMBOL(node_number);
58
59/*
60 * Map cpu index to node index
61 */
62DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
63EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
64
65/*
66 * Which logical CPUs are on which nodes
67 */
68cpumask_t *node_to_cpumask_map;
69EXPORT_SYMBOL(node_to_cpumask_map);
70
71/*
72 * Setup node_to_cpumask_map
73 */
74static void __init setup_node_to_cpumask_map(void);
75 29
30#ifdef CONFIG_X86_64
31#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load)
76#else 32#else
77static inline void setup_node_to_cpumask_map(void) { } 33#define BOOT_PERCPU_OFFSET 0
78#endif 34#endif
79 35
80#ifdef CONFIG_X86_64 36DEFINE_PER_CPU(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
81 37EXPORT_PER_CPU_SYMBOL(this_cpu_off);
82/* correctly size the local cpu masks */
83static void __init setup_cpu_local_masks(void)
84{
85 alloc_bootmem_cpumask_var(&cpu_initialized_mask);
86 alloc_bootmem_cpumask_var(&cpu_callin_mask);
87 alloc_bootmem_cpumask_var(&cpu_callout_mask);
88 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
89}
90
91#else /* CONFIG_X86_32 */
92
93static inline void setup_cpu_local_masks(void)
94{
95}
96 38
97#endif /* CONFIG_X86_32 */ 39unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
40 [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET,
41};
42EXPORT_SYMBOL(__per_cpu_offset);
98 43
99#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA 44static inline void setup_percpu_segment(int cpu)
100/*
101 * Copy data used in early init routines from the initial arrays to the
102 * per cpu data areas. These arrays then become expendable and the
103 * *_early_ptr's are zeroed indicating that the static arrays are gone.
104 */
105static void __init setup_per_cpu_maps(void)
106{ 45{
107 int cpu; 46#ifdef CONFIG_X86_32
108 47 struct desc_struct gdt;
109 for_each_possible_cpu(cpu) {
110 per_cpu(x86_cpu_to_apicid, cpu) =
111 early_per_cpu_map(x86_cpu_to_apicid, cpu);
112 per_cpu(x86_bios_cpu_apicid, cpu) =
113 early_per_cpu_map(x86_bios_cpu_apicid, cpu);
114#ifdef X86_64_NUMA
115 per_cpu(x86_cpu_to_node_map, cpu) =
116 early_per_cpu_map(x86_cpu_to_node_map, cpu);
117#endif
118 }
119 48
120 /* indicate the early static arrays will soon be gone */ 49 pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF,
121 early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; 50 0x2 | DESCTYPE_S, 0x8);
122 early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL; 51 gdt.s = 1;
123#ifdef X86_64_NUMA 52 write_gdt_entry(get_cpu_gdt_table(cpu),
124 early_per_cpu_ptr(x86_cpu_to_node_map) = NULL; 53 GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
125#endif 54#endif
126} 55}
127 56
128#ifdef CONFIG_X86_64
129unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
130 [0] = (unsigned long)__per_cpu_load,
131};
132#else
133unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
134#endif
135EXPORT_SYMBOL(__per_cpu_offset);
136
137/* 57/*
138 * Great future plan: 58 * Great future plan:
139 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. 59 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
@@ -141,15 +61,12 @@ EXPORT_SYMBOL(__per_cpu_offset);
141 */ 61 */
142void __init setup_per_cpu_areas(void) 62void __init setup_per_cpu_areas(void)
143{ 63{
144 ssize_t size, old_size; 64 ssize_t size;
145 char *ptr; 65 char *ptr;
146 int cpu; 66 int cpu;
147 unsigned long align = 1;
148 67
149 /* Copy section for each CPU (we discard the original) */ 68 /* Copy section for each CPU (we discard the original) */
150 old_size = PERCPU_ENOUGH_ROOM; 69 size = roundup(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
151 align = max_t(unsigned long, PAGE_SIZE, align);
152 size = roundup(old_size, align);
153 70
154 pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", 71 pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
155 NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); 72 NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
@@ -158,20 +75,17 @@ void __init setup_per_cpu_areas(void)
158 75
159 for_each_possible_cpu(cpu) { 76 for_each_possible_cpu(cpu) {
160#ifndef CONFIG_NEED_MULTIPLE_NODES 77#ifndef CONFIG_NEED_MULTIPLE_NODES
161 ptr = __alloc_bootmem(size, align, 78 ptr = alloc_bootmem_pages(size);
162 __pa(MAX_DMA_ADDRESS));
163#else 79#else
164 int node = early_cpu_to_node(cpu); 80 int node = early_cpu_to_node(cpu);
165 if (!node_online(node) || !NODE_DATA(node)) { 81 if (!node_online(node) || !NODE_DATA(node)) {
166 ptr = __alloc_bootmem(size, align, 82 ptr = alloc_bootmem_pages(size);
167 __pa(MAX_DMA_ADDRESS));
168 pr_info("cpu %d has no node %d or node-local memory\n", 83 pr_info("cpu %d has no node %d or node-local memory\n",
169 cpu, node); 84 cpu, node);
170 pr_debug("per cpu data for cpu%d at %016lx\n", 85 pr_debug("per cpu data for cpu%d at %016lx\n",
171 cpu, __pa(ptr)); 86 cpu, __pa(ptr));
172 } else { 87 } else {
173 ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, 88 ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
174 __pa(MAX_DMA_ADDRESS));
175 pr_debug("per cpu data for cpu%d on node%d at %016lx\n", 89 pr_debug("per cpu data for cpu%d on node%d at %016lx\n",
176 cpu, node, __pa(ptr)); 90 cpu, node, __pa(ptr));
177 } 91 }
@@ -181,22 +95,48 @@ void __init setup_per_cpu_areas(void)
181 per_cpu_offset(cpu) = ptr - __per_cpu_start; 95 per_cpu_offset(cpu) = ptr - __per_cpu_start;
182 per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu); 96 per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
183 per_cpu(cpu_number, cpu) = cpu; 97 per_cpu(cpu_number, cpu) = cpu;
98 setup_percpu_segment(cpu);
99 setup_stack_canary_segment(cpu);
100 /*
101 * Copy data used in early init routines from the
102 * initial arrays to the per cpu data areas. These
103 * arrays then become expendable and the *_early_ptr's
104 * are zeroed indicating that the static arrays are
105 * gone.
106 */
107#ifdef CONFIG_X86_LOCAL_APIC
108 per_cpu(x86_cpu_to_apicid, cpu) =
109 early_per_cpu_map(x86_cpu_to_apicid, cpu);
110 per_cpu(x86_bios_cpu_apicid, cpu) =
111 early_per_cpu_map(x86_bios_cpu_apicid, cpu);
112#endif
184#ifdef CONFIG_X86_64 113#ifdef CONFIG_X86_64
185 per_cpu(irq_stack_ptr, cpu) = 114 per_cpu(irq_stack_ptr, cpu) =
186 per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64; 115 per_cpu(irq_stack_union.irq_stack, cpu) +
116 IRQ_STACK_SIZE - 64;
117#ifdef CONFIG_NUMA
118 per_cpu(x86_cpu_to_node_map, cpu) =
119 early_per_cpu_map(x86_cpu_to_node_map, cpu);
120#endif
121#endif
187 /* 122 /*
188 * Up to this point, CPU0 has been using .data.init 123 * Up to this point, the boot CPU has been using .data.init
189 * area. Reload %gs offset for CPU0. 124 * area. Reload any changed state for the boot CPU.
190 */ 125 */
191 if (cpu == 0) 126 if (cpu == boot_cpu_id)
192 load_gs_base(cpu); 127 switch_to_new_gdt(cpu);
193#endif
194 128
195 DBG("PERCPU: cpu %4d %p\n", cpu, ptr); 129 DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
196 } 130 }
197 131
198 /* Setup percpu data maps */ 132 /* indicate the early static arrays will soon be gone */
199 setup_per_cpu_maps(); 133#ifdef CONFIG_X86_LOCAL_APIC
134 early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
135 early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
136#endif
137#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
138 early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
139#endif
200 140
201 /* Setup node to cpumask map */ 141 /* Setup node to cpumask map */
202 setup_node_to_cpumask_map(); 142 setup_node_to_cpumask_map();
@@ -204,207 +144,3 @@ void __init setup_per_cpu_areas(void)
204 /* Setup cpu initialized, callin, callout masks */ 144 /* Setup cpu initialized, callin, callout masks */
205 setup_cpu_local_masks(); 145 setup_cpu_local_masks();
206} 146}
207
208#endif
209
210#ifdef X86_64_NUMA
211
212/*
213 * Allocate node_to_cpumask_map based on number of available nodes
214 * Requires node_possible_map to be valid.
215 *
216 * Note: node_to_cpumask() is not valid until after this is done.
217 * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
218 */
219static void __init setup_node_to_cpumask_map(void)
220{
221 unsigned int node, num = 0;
222 cpumask_t *map;
223
224 /* setup nr_node_ids if not done yet */
225 if (nr_node_ids == MAX_NUMNODES) {
226 for_each_node_mask(node, node_possible_map)
227 num = node;
228 nr_node_ids = num + 1;
229 }
230
231 /* allocate the map */
232 map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
233 DBG("node_to_cpumask_map at %p for %d nodes\n", map, nr_node_ids);
234
235 pr_debug("Node to cpumask map at %p for %d nodes\n",
236 map, nr_node_ids);
237
238 /* node_to_cpumask() will now work */
239 node_to_cpumask_map = map;
240}
241
242void __cpuinit numa_set_node(int cpu, int node)
243{
244 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
245
246 /* early setting, no percpu area yet */
247 if (cpu_to_node_map) {
248 cpu_to_node_map[cpu] = node;
249 return;
250 }
251
252#ifdef CONFIG_DEBUG_PER_CPU_MAPS
253 if (cpu >= nr_cpu_ids || !per_cpu_offset(cpu)) {
254 printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
255 dump_stack();
256 return;
257 }
258#endif
259 per_cpu(x86_cpu_to_node_map, cpu) = node;
260
261 if (node != NUMA_NO_NODE)
262 per_cpu(node_number, cpu) = node;
263}
264
265void __cpuinit numa_clear_node(int cpu)
266{
267 numa_set_node(cpu, NUMA_NO_NODE);
268}
269
270#ifndef CONFIG_DEBUG_PER_CPU_MAPS
271
272void __cpuinit numa_add_cpu(int cpu)
273{
274 cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
275}
276
277void __cpuinit numa_remove_cpu(int cpu)
278{
279 cpu_clear(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
280}
281
282#else /* CONFIG_DEBUG_PER_CPU_MAPS */
283
284/*
285 * --------- debug versions of the numa functions ---------
286 */
287static void __cpuinit numa_set_cpumask(int cpu, int enable)
288{
289 int node = early_cpu_to_node(cpu);
290 cpumask_t *mask;
291 char buf[64];
292
293 if (node_to_cpumask_map == NULL) {
294 printk(KERN_ERR "node_to_cpumask_map NULL\n");
295 dump_stack();
296 return;
297 }
298
299 mask = &node_to_cpumask_map[node];
300 if (enable)
301 cpu_set(cpu, *mask);
302 else
303 cpu_clear(cpu, *mask);
304
305 cpulist_scnprintf(buf, sizeof(buf), mask);
306 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
307 enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);
308}
309
310void __cpuinit numa_add_cpu(int cpu)
311{
312 numa_set_cpumask(cpu, 1);
313}
314
315void __cpuinit numa_remove_cpu(int cpu)
316{
317 numa_set_cpumask(cpu, 0);
318}
319
320int cpu_to_node(int cpu)
321{
322 if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
323 printk(KERN_WARNING
324 "cpu_to_node(%d): usage too early!\n", cpu);
325 dump_stack();
326 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
327 }
328 return per_cpu(x86_cpu_to_node_map, cpu);
329}
330EXPORT_SYMBOL(cpu_to_node);
331
332/*
333 * Same function as cpu_to_node() but used if called before the
334 * per_cpu areas are setup.
335 */
336int early_cpu_to_node(int cpu)
337{
338 if (early_per_cpu_ptr(x86_cpu_to_node_map))
339 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
340
341 if (!per_cpu_offset(cpu)) {
342 printk(KERN_WARNING
343 "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
344 dump_stack();
345 return NUMA_NO_NODE;
346 }
347 return per_cpu(x86_cpu_to_node_map, cpu);
348}
349
350
351/* empty cpumask */
352static const cpumask_t cpu_mask_none;
353
354/*
355 * Returns a pointer to the bitmask of CPUs on Node 'node'.
356 */
357const cpumask_t *cpumask_of_node(int node)
358{
359 if (node_to_cpumask_map == NULL) {
360 printk(KERN_WARNING
361 "cpumask_of_node(%d): no node_to_cpumask_map!\n",
362 node);
363 dump_stack();
364 return (const cpumask_t *)&cpu_online_map;
365 }
366 if (node >= nr_node_ids) {
367 printk(KERN_WARNING
368 "cpumask_of_node(%d): node > nr_node_ids(%d)\n",
369 node, nr_node_ids);
370 dump_stack();
371 return &cpu_mask_none;
372 }
373 return &node_to_cpumask_map[node];
374}
375EXPORT_SYMBOL(cpumask_of_node);
376
377/*
378 * Returns a bitmask of CPUs on Node 'node'.
379 *
380 * Side note: this function creates the returned cpumask on the stack
381 * so with a high NR_CPUS count, excessive stack space is used. The
382 * node_to_cpumask_ptr function should be used whenever possible.
383 */
384cpumask_t node_to_cpumask(int node)
385{
386 if (node_to_cpumask_map == NULL) {
387 printk(KERN_WARNING
388 "node_to_cpumask(%d): no node_to_cpumask_map!\n", node);
389 dump_stack();
390 return cpu_online_map;
391 }
392 if (node >= nr_node_ids) {
393 printk(KERN_WARNING
394 "node_to_cpumask(%d): node > nr_node_ids(%d)\n",
395 node, nr_node_ids);
396 dump_stack();
397 return cpu_mask_none;
398 }
399 return node_to_cpumask_map[node];
400}
401EXPORT_SYMBOL(node_to_cpumask);
402
403/*
404 * --------- end of debug versions of the numa functions ---------
405 */
406
407#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
408
409#endif /* X86_64_NUMA */
410
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 0bc73d67acfb..4d3441018065 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -50,27 +50,23 @@
50# define FIX_EFLAGS __FIX_EFLAGS 50# define FIX_EFLAGS __FIX_EFLAGS
51#endif 51#endif
52 52
53#define COPY(x) { \ 53#define COPY(x) do { \
54 err |= __get_user(regs->x, &sc->x); \ 54 get_user_ex(regs->x, &sc->x); \
55} 55} while (0)
56 56
57#define COPY_SEG(seg) { \ 57#define GET_SEG(seg) ({ \
58 unsigned short tmp; \ 58 unsigned short tmp; \
59 err |= __get_user(tmp, &sc->seg); \ 59 get_user_ex(tmp, &sc->seg); \
60 regs->seg = tmp; \ 60 tmp; \
61} 61})
62 62
63#define COPY_SEG_CPL3(seg) { \ 63#define COPY_SEG(seg) do { \
64 unsigned short tmp; \ 64 regs->seg = GET_SEG(seg); \
65 err |= __get_user(tmp, &sc->seg); \ 65} while (0)
66 regs->seg = tmp | 3; \
67}
68 66
69#define GET_SEG(seg) { \ 67#define COPY_SEG_CPL3(seg) do { \
70 unsigned short tmp; \ 68 regs->seg = GET_SEG(seg) | 3; \
71 err |= __get_user(tmp, &sc->seg); \ 69} while (0)
72 loadsegment(seg, tmp); \
73}
74 70
75static int 71static int
76restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, 72restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
@@ -83,45 +79,49 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
83 /* Always make any pending restarted system calls return -EINTR */ 79 /* Always make any pending restarted system calls return -EINTR */
84 current_thread_info()->restart_block.fn = do_no_restart_syscall; 80 current_thread_info()->restart_block.fn = do_no_restart_syscall;
85 81
82 get_user_try {
83
86#ifdef CONFIG_X86_32 84#ifdef CONFIG_X86_32
87 GET_SEG(gs); 85 set_user_gs(regs, GET_SEG(gs));
88 COPY_SEG(fs); 86 COPY_SEG(fs);
89 COPY_SEG(es); 87 COPY_SEG(es);
90 COPY_SEG(ds); 88 COPY_SEG(ds);
91#endif /* CONFIG_X86_32 */ 89#endif /* CONFIG_X86_32 */
92 90
93 COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); 91 COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
94 COPY(dx); COPY(cx); COPY(ip); 92 COPY(dx); COPY(cx); COPY(ip);
95 93
96#ifdef CONFIG_X86_64 94#ifdef CONFIG_X86_64
97 COPY(r8); 95 COPY(r8);
98 COPY(r9); 96 COPY(r9);
99 COPY(r10); 97 COPY(r10);
100 COPY(r11); 98 COPY(r11);
101 COPY(r12); 99 COPY(r12);
102 COPY(r13); 100 COPY(r13);
103 COPY(r14); 101 COPY(r14);
104 COPY(r15); 102 COPY(r15);
105#endif /* CONFIG_X86_64 */ 103#endif /* CONFIG_X86_64 */
106 104
107#ifdef CONFIG_X86_32 105#ifdef CONFIG_X86_32
108 COPY_SEG_CPL3(cs); 106 COPY_SEG_CPL3(cs);
109 COPY_SEG_CPL3(ss); 107 COPY_SEG_CPL3(ss);
110#else /* !CONFIG_X86_32 */ 108#else /* !CONFIG_X86_32 */
111 /* Kernel saves and restores only the CS segment register on signals, 109 /* Kernel saves and restores only the CS segment register on signals,
112 * which is the bare minimum needed to allow mixed 32/64-bit code. 110 * which is the bare minimum needed to allow mixed 32/64-bit code.
113 * App's signal handler can save/restore other segments if needed. */ 111 * App's signal handler can save/restore other segments if needed. */
114 COPY_SEG_CPL3(cs); 112 COPY_SEG_CPL3(cs);
115#endif /* CONFIG_X86_32 */ 113#endif /* CONFIG_X86_32 */
116 114
117 err |= __get_user(tmpflags, &sc->flags); 115 get_user_ex(tmpflags, &sc->flags);
118 regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); 116 regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
119 regs->orig_ax = -1; /* disable syscall checks */ 117 regs->orig_ax = -1; /* disable syscall checks */
118
119 get_user_ex(buf, &sc->fpstate);
120 err |= restore_i387_xstate(buf);
120 121
121 err |= __get_user(buf, &sc->fpstate); 122 get_user_ex(*pax, &sc->ax);
122 err |= restore_i387_xstate(buf); 123 } get_user_catch(err);
123 124
124 err |= __get_user(*pax, &sc->ax);
125 return err; 125 return err;
126} 126}
127 127
@@ -131,57 +131,55 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
131{ 131{
132 int err = 0; 132 int err = 0;
133 133
134#ifdef CONFIG_X86_32 134 put_user_try {
135 {
136 unsigned int tmp;
137 135
138 savesegment(gs, tmp); 136#ifdef CONFIG_X86_32
139 err |= __put_user(tmp, (unsigned int __user *)&sc->gs); 137 put_user_ex(get_user_gs(regs), (unsigned int __user *)&sc->gs);
140 } 138 put_user_ex(regs->fs, (unsigned int __user *)&sc->fs);
141 err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs); 139 put_user_ex(regs->es, (unsigned int __user *)&sc->es);
142 err |= __put_user(regs->es, (unsigned int __user *)&sc->es); 140 put_user_ex(regs->ds, (unsigned int __user *)&sc->ds);
143 err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds);
144#endif /* CONFIG_X86_32 */ 141#endif /* CONFIG_X86_32 */
145 142
146 err |= __put_user(regs->di, &sc->di); 143 put_user_ex(regs->di, &sc->di);
147 err |= __put_user(regs->si, &sc->si); 144 put_user_ex(regs->si, &sc->si);
148 err |= __put_user(regs->bp, &sc->bp); 145 put_user_ex(regs->bp, &sc->bp);
149 err |= __put_user(regs->sp, &sc->sp); 146 put_user_ex(regs->sp, &sc->sp);
150 err |= __put_user(regs->bx, &sc->bx); 147 put_user_ex(regs->bx, &sc->bx);
151 err |= __put_user(regs->dx, &sc->dx); 148 put_user_ex(regs->dx, &sc->dx);
152 err |= __put_user(regs->cx, &sc->cx); 149 put_user_ex(regs->cx, &sc->cx);
153 err |= __put_user(regs->ax, &sc->ax); 150 put_user_ex(regs->ax, &sc->ax);
154#ifdef CONFIG_X86_64 151#ifdef CONFIG_X86_64
155 err |= __put_user(regs->r8, &sc->r8); 152 put_user_ex(regs->r8, &sc->r8);
156 err |= __put_user(regs->r9, &sc->r9); 153 put_user_ex(regs->r9, &sc->r9);
157 err |= __put_user(regs->r10, &sc->r10); 154 put_user_ex(regs->r10, &sc->r10);
158 err |= __put_user(regs->r11, &sc->r11); 155 put_user_ex(regs->r11, &sc->r11);
159 err |= __put_user(regs->r12, &sc->r12); 156 put_user_ex(regs->r12, &sc->r12);
160 err |= __put_user(regs->r13, &sc->r13); 157 put_user_ex(regs->r13, &sc->r13);
161 err |= __put_user(regs->r14, &sc->r14); 158 put_user_ex(regs->r14, &sc->r14);
162 err |= __put_user(regs->r15, &sc->r15); 159 put_user_ex(regs->r15, &sc->r15);
163#endif /* CONFIG_X86_64 */ 160#endif /* CONFIG_X86_64 */
164 161
165 err |= __put_user(current->thread.trap_no, &sc->trapno); 162 put_user_ex(current->thread.trap_no, &sc->trapno);
166 err |= __put_user(current->thread.error_code, &sc->err); 163 put_user_ex(current->thread.error_code, &sc->err);
167 err |= __put_user(regs->ip, &sc->ip); 164 put_user_ex(regs->ip, &sc->ip);
168#ifdef CONFIG_X86_32 165#ifdef CONFIG_X86_32
169 err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs); 166 put_user_ex(regs->cs, (unsigned int __user *)&sc->cs);
170 err |= __put_user(regs->flags, &sc->flags); 167 put_user_ex(regs->flags, &sc->flags);
171 err |= __put_user(regs->sp, &sc->sp_at_signal); 168 put_user_ex(regs->sp, &sc->sp_at_signal);
172 err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); 169 put_user_ex(regs->ss, (unsigned int __user *)&sc->ss);
173#else /* !CONFIG_X86_32 */ 170#else /* !CONFIG_X86_32 */
174 err |= __put_user(regs->flags, &sc->flags); 171 put_user_ex(regs->flags, &sc->flags);
175 err |= __put_user(regs->cs, &sc->cs); 172 put_user_ex(regs->cs, &sc->cs);
176 err |= __put_user(0, &sc->gs); 173 put_user_ex(0, &sc->gs);
177 err |= __put_user(0, &sc->fs); 174 put_user_ex(0, &sc->fs);
178#endif /* CONFIG_X86_32 */ 175#endif /* CONFIG_X86_32 */
179 176
180 err |= __put_user(fpstate, &sc->fpstate); 177 put_user_ex(fpstate, &sc->fpstate);
181 178
182 /* non-iBCS2 extensions.. */ 179 /* non-iBCS2 extensions.. */
183 err |= __put_user(mask, &sc->oldmask); 180 put_user_ex(mask, &sc->oldmask);
184 err |= __put_user(current->thread.cr2, &sc->cr2); 181 put_user_ex(current->thread.cr2, &sc->cr2);
182 } put_user_catch(err);
185 183
186 return err; 184 return err;
187} 185}
@@ -336,43 +334,41 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
336 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 334 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
337 return -EFAULT; 335 return -EFAULT;
338 336
339 err |= __put_user(sig, &frame->sig); 337 put_user_try {
340 err |= __put_user(&frame->info, &frame->pinfo); 338 put_user_ex(sig, &frame->sig);
341 err |= __put_user(&frame->uc, &frame->puc); 339 put_user_ex(&frame->info, &frame->pinfo);
342 err |= copy_siginfo_to_user(&frame->info, info); 340 put_user_ex(&frame->uc, &frame->puc);
343 if (err) 341 err |= copy_siginfo_to_user(&frame->info, info);
344 return -EFAULT;
345
346 /* Create the ucontext. */
347 if (cpu_has_xsave)
348 err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
349 else
350 err |= __put_user(0, &frame->uc.uc_flags);
351 err |= __put_user(0, &frame->uc.uc_link);
352 err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
353 err |= __put_user(sas_ss_flags(regs->sp),
354 &frame->uc.uc_stack.ss_flags);
355 err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
356 err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
357 regs, set->sig[0]);
358 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
359 if (err)
360 return -EFAULT;
361 342
362 /* Set up to return from userspace. */ 343 /* Create the ucontext. */
363 restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); 344 if (cpu_has_xsave)
364 if (ka->sa.sa_flags & SA_RESTORER) 345 put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
365 restorer = ka->sa.sa_restorer; 346 else
366 err |= __put_user(restorer, &frame->pretcode); 347 put_user_ex(0, &frame->uc.uc_flags);
348 put_user_ex(0, &frame->uc.uc_link);
349 put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
350 put_user_ex(sas_ss_flags(regs->sp),
351 &frame->uc.uc_stack.ss_flags);
352 put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
353 err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
354 regs, set->sig[0]);
355 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
356
357 /* Set up to return from userspace. */
358 restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
359 if (ka->sa.sa_flags & SA_RESTORER)
360 restorer = ka->sa.sa_restorer;
361 put_user_ex(restorer, &frame->pretcode);
367 362
368 /* 363 /*
369 * This is movl $__NR_rt_sigreturn, %ax ; int $0x80 364 * This is movl $__NR_rt_sigreturn, %ax ; int $0x80
370 * 365 *
371 * WE DO NOT USE IT ANY MORE! It's only left here for historical 366 * WE DO NOT USE IT ANY MORE! It's only left here for historical
372 * reasons and because gdb uses it as a signature to notice 367 * reasons and because gdb uses it as a signature to notice
373 * signal handler stack frames. 368 * signal handler stack frames.
374 */ 369 */
375 err |= __put_user(*((u64 *)&rt_retcode), (u64 *)frame->retcode); 370 put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
371 } put_user_catch(err);
376 372
377 if (err) 373 if (err)
378 return -EFAULT; 374 return -EFAULT;
@@ -436,28 +432,30 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
436 return -EFAULT; 432 return -EFAULT;
437 } 433 }
438 434
439 /* Create the ucontext. */ 435 put_user_try {
440 if (cpu_has_xsave) 436 /* Create the ucontext. */
441 err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); 437 if (cpu_has_xsave)
442 else 438 put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
443 err |= __put_user(0, &frame->uc.uc_flags); 439 else
444 err |= __put_user(0, &frame->uc.uc_link); 440 put_user_ex(0, &frame->uc.uc_flags);
445 err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); 441 put_user_ex(0, &frame->uc.uc_link);
446 err |= __put_user(sas_ss_flags(regs->sp), 442 put_user_ex(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
447 &frame->uc.uc_stack.ss_flags); 443 put_user_ex(sas_ss_flags(regs->sp),
448 err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); 444 &frame->uc.uc_stack.ss_flags);
449 err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); 445 put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
450 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); 446 err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
451 447 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
452 /* Set up to return from userspace. If provided, use a stub 448
453 already in userspace. */ 449 /* Set up to return from userspace. If provided, use a stub
454 /* x86-64 should always use SA_RESTORER. */ 450 already in userspace. */
455 if (ka->sa.sa_flags & SA_RESTORER) { 451 /* x86-64 should always use SA_RESTORER. */
456 err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); 452 if (ka->sa.sa_flags & SA_RESTORER) {
457 } else { 453 put_user_ex(ka->sa.sa_restorer, &frame->pretcode);
458 /* could use a vstub here */ 454 } else {
459 return -EFAULT; 455 /* could use a vstub here */
460 } 456 err |= -EFAULT;
457 }
458 } put_user_catch(err);
461 459
462 if (err) 460 if (err)
463 return -EFAULT; 461 return -EFAULT;
@@ -509,31 +507,41 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
509 struct old_sigaction __user *oact) 507 struct old_sigaction __user *oact)
510{ 508{
511 struct k_sigaction new_ka, old_ka; 509 struct k_sigaction new_ka, old_ka;
512 int ret; 510 int ret = 0;
513 511
514 if (act) { 512 if (act) {
515 old_sigset_t mask; 513 old_sigset_t mask;
516 514
517 if (!access_ok(VERIFY_READ, act, sizeof(*act)) || 515 if (!access_ok(VERIFY_READ, act, sizeof(*act)))
518 __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
519 __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
520 return -EFAULT; 516 return -EFAULT;
521 517
522 __get_user(new_ka.sa.sa_flags, &act->sa_flags); 518 get_user_try {
523 __get_user(mask, &act->sa_mask); 519 get_user_ex(new_ka.sa.sa_handler, &act->sa_handler);
520 get_user_ex(new_ka.sa.sa_flags, &act->sa_flags);
521 get_user_ex(mask, &act->sa_mask);
522 get_user_ex(new_ka.sa.sa_restorer, &act->sa_restorer);
523 } get_user_catch(ret);
524
525 if (ret)
526 return -EFAULT;
524 siginitset(&new_ka.sa.sa_mask, mask); 527 siginitset(&new_ka.sa.sa_mask, mask);
525 } 528 }
526 529
527 ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); 530 ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
528 531
529 if (!ret && oact) { 532 if (!ret && oact) {
530 if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || 533 if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)))
531 __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
532 __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
533 return -EFAULT; 534 return -EFAULT;
534 535
535 __put_user(old_ka.sa.sa_flags, &oact->sa_flags); 536 put_user_try {
536 __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); 537 put_user_ex(old_ka.sa.sa_handler, &oact->sa_handler);
538 put_user_ex(old_ka.sa.sa_flags, &oact->sa_flags);
539 put_user_ex(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
540 put_user_ex(old_ka.sa.sa_restorer, &oact->sa_restorer);
541 } put_user_catch(ret);
542
543 if (ret)
544 return -EFAULT;
537 } 545 }
538 546
539 return ret; 547 return ret;
@@ -541,14 +549,9 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
541#endif /* CONFIG_X86_32 */ 549#endif /* CONFIG_X86_32 */
542 550
543#ifdef CONFIG_X86_32 551#ifdef CONFIG_X86_32
544asmlinkage int sys_sigaltstack(unsigned long bx) 552int sys_sigaltstack(struct pt_regs *regs)
545{ 553{
546 /* 554 const stack_t __user *uss = (const stack_t __user *)regs->bx;
547 * This is needed to make gcc realize it doesn't own the
548 * "struct pt_regs"
549 */
550 struct pt_regs *regs = (struct pt_regs *)&bx;
551 const stack_t __user *uss = (const stack_t __user *)bx;
552 stack_t __user *uoss = (stack_t __user *)regs->cx; 555 stack_t __user *uoss = (stack_t __user *)regs->cx;
553 556
554 return do_sigaltstack(uss, uoss, regs->sp); 557 return do_sigaltstack(uss, uoss, regs->sp);
@@ -566,14 +569,12 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
566 * Do a signal return; undo the signal stack. 569 * Do a signal return; undo the signal stack.
567 */ 570 */
568#ifdef CONFIG_X86_32 571#ifdef CONFIG_X86_32
569asmlinkage unsigned long sys_sigreturn(unsigned long __unused) 572unsigned long sys_sigreturn(struct pt_regs *regs)
570{ 573{
571 struct sigframe __user *frame; 574 struct sigframe __user *frame;
572 struct pt_regs *regs;
573 unsigned long ax; 575 unsigned long ax;
574 sigset_t set; 576 sigset_t set;
575 577
576 regs = (struct pt_regs *) &__unused;
577 frame = (struct sigframe __user *)(regs->sp - 8); 578 frame = (struct sigframe __user *)(regs->sp - 8);
578 579
579 if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) 580 if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
@@ -600,7 +601,7 @@ badframe:
600} 601}
601#endif /* CONFIG_X86_32 */ 602#endif /* CONFIG_X86_32 */
602 603
603static long do_rt_sigreturn(struct pt_regs *regs) 604long sys_rt_sigreturn(struct pt_regs *regs)
604{ 605{
605 struct rt_sigframe __user *frame; 606 struct rt_sigframe __user *frame;
606 unsigned long ax; 607 unsigned long ax;
@@ -631,25 +632,6 @@ badframe:
631 return 0; 632 return 0;
632} 633}
633 634
634#ifdef CONFIG_X86_32
635/*
636 * Note: do not pass in pt_regs directly as with tail-call optimization
637 * GCC will incorrectly stomp on the caller's frame and corrupt user-space
638 * register state:
639 */
640asmlinkage int sys_rt_sigreturn(unsigned long __unused)
641{
642 struct pt_regs *regs = (struct pt_regs *)&__unused;
643
644 return do_rt_sigreturn(regs);
645}
646#else /* !CONFIG_X86_32 */
647asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
648{
649 return do_rt_sigreturn(regs);
650}
651#endif /* CONFIG_X86_32 */
652
653/* 635/*
654 * OK, we're invoking a handler: 636 * OK, we're invoking a handler:
655 */ 637 */
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index e6faa3316bd2..eaaffae31cc0 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -2,7 +2,7 @@
2 * Intel SMP support routines. 2 * Intel SMP support routines.
3 * 3 *
4 * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> 4 * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
5 * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com> 5 * (c) 1998-99, 2000, 2009 Ingo Molnar <mingo@redhat.com>
6 * (c) 2002,2003 Andi Kleen, SuSE Labs. 6 * (c) 2002,2003 Andi Kleen, SuSE Labs.
7 * 7 *
8 * i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com> 8 * i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com>
@@ -26,8 +26,7 @@
26#include <asm/tlbflush.h> 26#include <asm/tlbflush.h>
27#include <asm/mmu_context.h> 27#include <asm/mmu_context.h>
28#include <asm/proto.h> 28#include <asm/proto.h>
29#include <mach_ipi.h> 29#include <asm/genapic.h>
30#include <mach_apic.h>
31/* 30/*
32 * Some notes on x86 processor bugs affecting SMP operation: 31 * Some notes on x86 processor bugs affecting SMP operation:
33 * 32 *
@@ -118,12 +117,12 @@ static void native_smp_send_reschedule(int cpu)
118 WARN_ON(1); 117 WARN_ON(1);
119 return; 118 return;
120 } 119 }
121 send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); 120 apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
122} 121}
123 122
124void native_send_call_func_single_ipi(int cpu) 123void native_send_call_func_single_ipi(int cpu)
125{ 124{
126 send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR); 125 apic->send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
127} 126}
128 127
129void native_send_call_func_ipi(const struct cpumask *mask) 128void native_send_call_func_ipi(const struct cpumask *mask)
@@ -131,7 +130,7 @@ void native_send_call_func_ipi(const struct cpumask *mask)
131 cpumask_var_t allbutself; 130 cpumask_var_t allbutself;
132 131
133 if (!alloc_cpumask_var(&allbutself, GFP_ATOMIC)) { 132 if (!alloc_cpumask_var(&allbutself, GFP_ATOMIC)) {
134 send_IPI_mask(mask, CALL_FUNCTION_VECTOR); 133 apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
135 return; 134 return;
136 } 135 }
137 136
@@ -140,9 +139,9 @@ void native_send_call_func_ipi(const struct cpumask *mask)
140 139
141 if (cpumask_equal(mask, allbutself) && 140 if (cpumask_equal(mask, allbutself) &&
142 cpumask_equal(cpu_online_mask, cpu_callout_mask)) 141 cpumask_equal(cpu_online_mask, cpu_callout_mask))
143 send_IPI_allbutself(CALL_FUNCTION_VECTOR); 142 apic->send_IPI_allbutself(CALL_FUNCTION_VECTOR);
144 else 143 else
145 send_IPI_mask(mask, CALL_FUNCTION_VECTOR); 144 apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
146 145
147 free_cpumask_var(allbutself); 146 free_cpumask_var(allbutself);
148} 147}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index def770b57b5a..af57f88186e7 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -2,7 +2,7 @@
2 * x86 SMP booting functions 2 * x86 SMP booting functions
3 * 3 *
4 * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> 4 * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
5 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com> 5 * (c) 1998, 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
6 * Copyright 2001 Andi Kleen, SuSE Labs. 6 * Copyright 2001 Andi Kleen, SuSE Labs.
7 * 7 *
8 * Much of the core SMP work is based on previous work by Thomas Radke, to 8 * Much of the core SMP work is based on previous work by Thomas Radke, to
@@ -65,9 +65,8 @@
65#include <asm/uv/uv.h> 65#include <asm/uv/uv.h>
66#include <linux/mc146818rtc.h> 66#include <linux/mc146818rtc.h>
67 67
68#include <mach_apic.h> 68#include <asm/genapic.h>
69#include <mach_wakecpu.h> 69#include <asm/smpboot_hooks.h>
70#include <smpboot_hooks.h>
71 70
72#ifdef CONFIG_X86_32 71#ifdef CONFIG_X86_32
73u8 apicid_2_node[MAX_APICID]; 72u8 apicid_2_node[MAX_APICID];
@@ -163,7 +162,7 @@ static void map_cpu_to_logical_apicid(void)
163{ 162{
164 int cpu = smp_processor_id(); 163 int cpu = smp_processor_id();
165 int apicid = logical_smp_processor_id(); 164 int apicid = logical_smp_processor_id();
166 int node = apicid_to_node(apicid); 165 int node = apic->apicid_to_node(apicid);
167 166
168 if (!node_online(node)) 167 if (!node_online(node))
169 node = first_online_node; 168 node = first_online_node;
@@ -196,7 +195,8 @@ static void __cpuinit smp_callin(void)
196 * our local APIC. We have to wait for the IPI or we'll 195 * our local APIC. We have to wait for the IPI or we'll
197 * lock up on an APIC access. 196 * lock up on an APIC access.
198 */ 197 */
199 wait_for_init_deassert(&init_deasserted); 198 if (apic->wait_for_init_deassert)
199 apic->wait_for_init_deassert(&init_deasserted);
200 200
201 /* 201 /*
202 * (This works even if the APIC is not enabled.) 202 * (This works even if the APIC is not enabled.)
@@ -243,7 +243,8 @@ static void __cpuinit smp_callin(void)
243 */ 243 */
244 244
245 pr_debug("CALLIN, before setup_local_APIC().\n"); 245 pr_debug("CALLIN, before setup_local_APIC().\n");
246 smp_callin_clear_local_apic(); 246 if (apic->smp_callin_clear_local_apic)
247 apic->smp_callin_clear_local_apic();
247 setup_local_APIC(); 248 setup_local_APIC();
248 end_local_APIC_setup(); 249 end_local_APIC_setup();
249 map_cpu_to_logical_apicid(); 250 map_cpu_to_logical_apicid();
@@ -583,7 +584,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
583 /* Target chip */ 584 /* Target chip */
584 /* Boot on the stack */ 585 /* Boot on the stack */
585 /* Kick the second */ 586 /* Kick the second */
586 apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid); 587 apic_icr_write(APIC_DM_NMI | apic->dest_logical, logical_apicid);
587 588
588 pr_debug("Waiting for send to finish...\n"); 589 pr_debug("Waiting for send to finish...\n");
589 send_status = safe_apic_wait_icr_idle(); 590 send_status = safe_apic_wait_icr_idle();
@@ -749,7 +750,7 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
749/* 750/*
750 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad 751 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
751 * (ie clustered apic addressing mode), this is a LOGICAL apic ID. 752 * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
752 * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu. 753 * Returns zero if CPU booted OK, else error code from ->wakeup_cpu.
753 */ 754 */
754{ 755{
755 unsigned long boot_error = 0; 756 unsigned long boot_error = 0;
@@ -793,7 +794,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
793do_rest: 794do_rest:
794 per_cpu(current_task, cpu) = c_idle.idle; 795 per_cpu(current_task, cpu) = c_idle.idle;
795#ifdef CONFIG_X86_32 796#ifdef CONFIG_X86_32
796 init_gdt(cpu);
797 /* Stack for startup_32 can be just as for start_secondary onwards */ 797 /* Stack for startup_32 can be just as for start_secondary onwards */
798 irq_ctx_init(cpu); 798 irq_ctx_init(cpu);
799#else 799#else
@@ -825,7 +825,8 @@ do_rest:
825 825
826 pr_debug("Setting warm reset code and vector.\n"); 826 pr_debug("Setting warm reset code and vector.\n");
827 827
828 store_NMI_vector(&nmi_high, &nmi_low); 828 if (apic->store_NMI_vector)
829 apic->store_NMI_vector(&nmi_high, &nmi_low);
829 830
830 smpboot_setup_warm_reset_vector(start_ip); 831 smpboot_setup_warm_reset_vector(start_ip);
831 /* 832 /*
@@ -840,7 +841,7 @@ do_rest:
840 /* 841 /*
841 * Starting actual IPI sequence... 842 * Starting actual IPI sequence...
842 */ 843 */
843 boot_error = wakeup_secondary_cpu(apicid, start_ip); 844 boot_error = apic->wakeup_cpu(apicid, start_ip);
844 845
845 if (!boot_error) { 846 if (!boot_error) {
846 /* 847 /*
@@ -874,8 +875,8 @@ do_rest:
874 else 875 else
875 /* trampoline code not run */ 876 /* trampoline code not run */
876 printk(KERN_ERR "Not responding.\n"); 877 printk(KERN_ERR "Not responding.\n");
877 if (get_uv_system_type() != UV_NON_UNIQUE_APIC) 878 if (apic->inquire_remote_apic)
878 inquire_remote_apic(apicid); 879 apic->inquire_remote_apic(apicid);
879 } 880 }
880 } 881 }
881 882
@@ -906,7 +907,7 @@ do_rest:
906 907
907int __cpuinit native_cpu_up(unsigned int cpu) 908int __cpuinit native_cpu_up(unsigned int cpu)
908{ 909{
909 int apicid = cpu_present_to_apicid(cpu); 910 int apicid = apic->cpu_present_to_apicid(cpu);
910 unsigned long flags; 911 unsigned long flags;
911 int err; 912 int err;
912 913
@@ -999,14 +1000,14 @@ static int __init smp_sanity_check(unsigned max_cpus)
999{ 1000{
1000 preempt_disable(); 1001 preempt_disable();
1001 1002
1002#if defined(CONFIG_X86_PC) && defined(CONFIG_X86_32) 1003#if !defined(CONFIG_X86_BIGSMP) && defined(CONFIG_X86_32)
1003 if (def_to_bigsmp && nr_cpu_ids > 8) { 1004 if (def_to_bigsmp && nr_cpu_ids > 8) {
1004 unsigned int cpu; 1005 unsigned int cpu;
1005 unsigned nr; 1006 unsigned nr;
1006 1007
1007 printk(KERN_WARNING 1008 printk(KERN_WARNING
1008 "More than 8 CPUs detected - skipping them.\n" 1009 "More than 8 CPUs detected - skipping them.\n"
1009 "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n"); 1010 "Use CONFIG_X86_BIGSMP.\n");
1010 1011
1011 nr = 0; 1012 nr = 0;
1012 for_each_present_cpu(cpu) { 1013 for_each_present_cpu(cpu) {
@@ -1052,7 +1053,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
1052 * Should not be necessary because the MP table should list the boot 1053 * Should not be necessary because the MP table should list the boot
1053 * CPU too, but we do it for the sake of robustness anyway. 1054 * CPU too, but we do it for the sake of robustness anyway.
1054 */ 1055 */
1055 if (!check_phys_apicid_present(boot_cpu_physical_apicid)) { 1056 if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) {
1056 printk(KERN_NOTICE 1057 printk(KERN_NOTICE
1057 "weird, boot CPU (#%d) not listed by the BIOS.\n", 1058 "weird, boot CPU (#%d) not listed by the BIOS.\n",
1058 boot_cpu_physical_apicid); 1059 boot_cpu_physical_apicid);
@@ -1070,7 +1071,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
1070 printk(KERN_ERR "... forcing use of dummy APIC emulation." 1071 printk(KERN_ERR "... forcing use of dummy APIC emulation."
1071 "(tell your hw vendor)\n"); 1072 "(tell your hw vendor)\n");
1072 smpboot_clear_io_apic(); 1073 smpboot_clear_io_apic();
1073 disable_ioapic_setup(); 1074 arch_disable_smp_support();
1074 return -1; 1075 return -1;
1075 } 1076 }
1076 1077
@@ -1129,7 +1130,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1129 1130
1130#ifdef CONFIG_X86_64 1131#ifdef CONFIG_X86_64
1131 enable_IR_x2apic(); 1132 enable_IR_x2apic();
1132 setup_apic_routing(); 1133 default_setup_apic_routing();
1133#endif 1134#endif
1134 1135
1135 if (smp_sanity_check(max_cpus) < 0) { 1136 if (smp_sanity_check(max_cpus) < 0) {
@@ -1164,7 +1165,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1164 1165
1165 map_cpu_to_logical_apicid(); 1166 map_cpu_to_logical_apicid();
1166 1167
1167 setup_portio_remap(); 1168 if (apic->setup_portio_remap)
1169 apic->setup_portio_remap();
1168 1170
1169 smpboot_setup_io_apic(); 1171 smpboot_setup_io_apic();
1170 /* 1172 /*
@@ -1186,10 +1188,7 @@ out:
1186void __init native_smp_prepare_boot_cpu(void) 1188void __init native_smp_prepare_boot_cpu(void)
1187{ 1189{
1188 int me = smp_processor_id(); 1190 int me = smp_processor_id();
1189#ifdef CONFIG_X86_32 1191 switch_to_new_gdt(me);
1190 init_gdt(me);
1191#endif
1192 switch_to_new_gdt();
1193 /* already set me in cpu_online_mask in boot_cpu_init() */ 1192 /* already set me in cpu_online_mask in boot_cpu_init() */
1194 cpumask_set_cpu(me, cpu_callout_mask); 1193 cpumask_set_cpu(me, cpu_callout_mask);
1195 per_cpu(cpu_state, me) = CPU_ONLINE; 1194 per_cpu(cpu_state, me) = CPU_ONLINE;
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
deleted file mode 100644
index add36b4e37c9..000000000000
--- a/arch/x86/kernel/smpcommon.c
+++ /dev/null
@@ -1,32 +0,0 @@
1/*
2 * SMP stuff which is common to all sub-architectures.
3 */
4#include <linux/module.h>
5#include <asm/smp.h>
6#include <asm/sections.h>
7
8#ifdef CONFIG_X86_64
9DEFINE_PER_CPU(unsigned long, this_cpu_off) = (unsigned long)__per_cpu_load;
10#else
11DEFINE_PER_CPU(unsigned long, this_cpu_off);
12#endif
13EXPORT_PER_CPU_SYMBOL(this_cpu_off);
14
15#ifdef CONFIG_X86_32
16/*
17 * Initialize the CPU's GDT. This is either the boot CPU doing itself
18 * (still using the master per-cpu area), or a CPU doing it for a
19 * secondary which will soon come up.
20 */
21__cpuinit void init_gdt(int cpu)
22{
23 struct desc_struct gdt;
24
25 pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF,
26 0x2 | DESCTYPE_S, 0x8);
27 gdt.s = 1;
28
29 write_gdt_entry(get_cpu_gdt_table(cpu),
30 GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
31}
32#endif
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 10786af95545..f7bddc2e37d1 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Stack trace management functions 2 * Stack trace management functions
3 * 3 *
4 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 4 * Copyright (C) 2006-2009 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
5 */ 5 */
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/stacktrace.h> 7#include <linux/stacktrace.h>
diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c
index 7b987852e876..1e733eff9b33 100644
--- a/arch/x86/kernel/summit_32.c
+++ b/arch/x86/kernel/summit_32.c
@@ -30,8 +30,364 @@
30#include <linux/init.h> 30#include <linux/init.h>
31#include <asm/io.h> 31#include <asm/io.h>
32#include <asm/bios_ebda.h> 32#include <asm/bios_ebda.h>
33#include <asm/summit/mpparse.h>
34 33
34/*
35 * APIC driver for the IBM "Summit" chipset.
36 */
37#define APIC_DEFINITION 1
38#include <linux/threads.h>
39#include <linux/cpumask.h>
40#include <asm/mpspec.h>
41#include <asm/apic.h>
42#include <asm/smp.h>
43#include <asm/genapic.h>
44#include <asm/fixmap.h>
45#include <asm/apicdef.h>
46#include <asm/ipi.h>
47#include <linux/kernel.h>
48#include <linux/string.h>
49#include <linux/init.h>
50#include <linux/gfp.h>
51#include <linux/smp.h>
52
53static inline unsigned summit_get_apic_id(unsigned long x)
54{
55 return (x >> 24) & 0xFF;
56}
57
58static inline void summit_send_IPI_mask(const cpumask_t *mask, int vector)
59{
60 default_send_IPI_mask_sequence_logical(mask, vector);
61}
62
63static inline void summit_send_IPI_allbutself(int vector)
64{
65 cpumask_t mask = cpu_online_map;
66 cpu_clear(smp_processor_id(), mask);
67
68 if (!cpus_empty(mask))
69 summit_send_IPI_mask(&mask, vector);
70}
71
72static inline void summit_send_IPI_all(int vector)
73{
74 summit_send_IPI_mask(&cpu_online_map, vector);
75}
76
77#include <asm/tsc.h>
78
79extern int use_cyclone;
80
81#ifdef CONFIG_X86_SUMMIT_NUMA
82extern void setup_summit(void);
83#else
84#define setup_summit() {}
85#endif
86
87static inline int
88summit_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
89{
90 if (!strncmp(oem, "IBM ENSW", 8) &&
91 (!strncmp(productid, "VIGIL SMP", 9)
92 || !strncmp(productid, "EXA", 3)
93 || !strncmp(productid, "RUTHLESS SMP", 12))){
94 mark_tsc_unstable("Summit based system");
95 use_cyclone = 1; /*enable cyclone-timer*/
96 setup_summit();
97 return 1;
98 }
99 return 0;
100}
101
102/* Hook from generic ACPI tables.c */
103static inline int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
104{
105 if (!strncmp(oem_id, "IBM", 3) &&
106 (!strncmp(oem_table_id, "SERVIGIL", 8)
107 || !strncmp(oem_table_id, "EXA", 3))){
108 mark_tsc_unstable("Summit based system");
109 use_cyclone = 1; /*enable cyclone-timer*/
110 setup_summit();
111 return 1;
112 }
113 return 0;
114}
115
116struct rio_table_hdr {
117 unsigned char version; /* Version number of this data structure */
118 /* Version 3 adds chassis_num & WP_index */
119 unsigned char num_scal_dev; /* # of Scalability devices (Twisters for Vigil) */
120 unsigned char num_rio_dev; /* # of RIO I/O devices (Cyclones and Winnipegs) */
121} __attribute__((packed));
122
123struct scal_detail {
124 unsigned char node_id; /* Scalability Node ID */
125 unsigned long CBAR; /* Address of 1MB register space */
126 unsigned char port0node; /* Node ID port connected to: 0xFF=None */
127 unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */
128 unsigned char port1node; /* Node ID port connected to: 0xFF = None */
129 unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */
130 unsigned char port2node; /* Node ID port connected to: 0xFF = None */
131 unsigned char port2port; /* Port num port connected to: 0,1,2, or 0xFF=None */
132 unsigned char chassis_num; /* 1 based Chassis number (1 = boot node) */
133} __attribute__((packed));
134
135struct rio_detail {
136 unsigned char node_id; /* RIO Node ID */
137 unsigned long BBAR; /* Address of 1MB register space */
138 unsigned char type; /* Type of device */
139 unsigned char owner_id; /* For WPEG: Node ID of Cyclone that owns this WPEG*/
140 /* For CYC: Node ID of Twister that owns this CYC */
141 unsigned char port0node; /* Node ID port connected to: 0xFF=None */
142 unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */
143 unsigned char port1node; /* Node ID port connected to: 0xFF=None */
144 unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */
145 unsigned char first_slot; /* For WPEG: Lowest slot number below this WPEG */
146 /* For CYC: 0 */
147 unsigned char status; /* For WPEG: Bit 0 = 1 : the XAPIC is used */
148 /* = 0 : the XAPIC is not used, ie:*/
149 /* ints fwded to another XAPIC */
150 /* Bits1:7 Reserved */
151 /* For CYC: Bits0:7 Reserved */
152 unsigned char WP_index; /* For WPEG: WPEG instance index - lower ones have */
153 /* lower slot numbers/PCI bus numbers */
154 /* For CYC: No meaning */
155 unsigned char chassis_num; /* 1 based Chassis number */
156 /* For LookOut WPEGs this field indicates the */
157 /* Expansion Chassis #, enumerated from Boot */
158 /* Node WPEG external port, then Boot Node CYC */
159 /* external port, then Next Vigil chassis WPEG */
160 /* external port, etc. */
161 /* Shared Lookouts have only 1 chassis number (the */
162 /* first one assigned) */
163} __attribute__((packed));
164
165
166typedef enum {
167 CompatTwister = 0, /* Compatibility Twister */
168 AltTwister = 1, /* Alternate Twister of internal 8-way */
169 CompatCyclone = 2, /* Compatibility Cyclone */
170 AltCyclone = 3, /* Alternate Cyclone of internal 8-way */
171 CompatWPEG = 4, /* Compatibility WPEG */
172 AltWPEG = 5, /* Second Planar WPEG */
173 LookOutAWPEG = 6, /* LookOut WPEG */
174 LookOutBWPEG = 7, /* LookOut WPEG */
175} node_type;
176
177static inline int is_WPEG(struct rio_detail *rio){
178 return (rio->type == CompatWPEG || rio->type == AltWPEG ||
179 rio->type == LookOutAWPEG || rio->type == LookOutBWPEG);
180}
181
182
183/* In clustered mode, the high nibble of APIC ID is a cluster number.
184 * The low nibble is a 4-bit bitmap. */
185#define XAPIC_DEST_CPUS_SHIFT 4
186#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1)
187#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT)
188
189#define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER)
190
191static inline const cpumask_t *summit_target_cpus(void)
192{
193 /* CPU_MASK_ALL (0xff) has undefined behaviour with
194 * dest_LowestPrio mode logical clustered apic interrupt routing
195 * Just start on cpu 0. IRQ balancing will spread load
196 */
197 return &cpumask_of_cpu(0);
198}
199
200static inline unsigned long
201summit_check_apicid_used(physid_mask_t bitmap, int apicid)
202{
203 return 0;
204}
205
206/* we don't use the phys_cpu_present_map to indicate apicid presence */
207static inline unsigned long summit_check_apicid_present(int bit)
208{
209 return 1;
210}
211
212#define apicid_cluster(apicid) ((apicid) & XAPIC_DEST_CLUSTER_MASK)
213
214extern u8 cpu_2_logical_apicid[];
215
216static inline void summit_init_apic_ldr(void)
217{
218 unsigned long val, id;
219 int count = 0;
220 u8 my_id = (u8)hard_smp_processor_id();
221 u8 my_cluster = (u8)apicid_cluster(my_id);
222#ifdef CONFIG_SMP
223 u8 lid;
224 int i;
225
226 /* Create logical APIC IDs by counting CPUs already in cluster. */
227 for (count = 0, i = nr_cpu_ids; --i >= 0; ) {
228 lid = cpu_2_logical_apicid[i];
229 if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster)
230 ++count;
231 }
232#endif
233 /* We only have a 4 wide bitmap in cluster mode. If a deranged
234 * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */
235 BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT);
236 id = my_cluster | (1UL << count);
237 apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE);
238 val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
239 val |= SET_APIC_LOGICAL_ID(id);
240 apic_write(APIC_LDR, val);
241}
242
243static inline int summit_apic_id_registered(void)
244{
245 return 1;
246}
247
248static inline void summit_setup_apic_routing(void)
249{
250 printk("Enabling APIC mode: Summit. Using %d I/O APICs\n",
251 nr_ioapics);
252}
253
254static inline int summit_apicid_to_node(int logical_apicid)
255{
256#ifdef CONFIG_SMP
257 return apicid_2_node[hard_smp_processor_id()];
258#else
259 return 0;
260#endif
261}
262
263/* Mapping from cpu number to logical apicid */
264static inline int summit_cpu_to_logical_apicid(int cpu)
265{
266#ifdef CONFIG_SMP
267 if (cpu >= nr_cpu_ids)
268 return BAD_APICID;
269 return (int)cpu_2_logical_apicid[cpu];
270#else
271 return logical_smp_processor_id();
272#endif
273}
274
275static inline int summit_cpu_present_to_apicid(int mps_cpu)
276{
277 if (mps_cpu < nr_cpu_ids)
278 return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
279 else
280 return BAD_APICID;
281}
282
283static inline physid_mask_t
284summit_ioapic_phys_id_map(physid_mask_t phys_id_map)
285{
286 /* For clustered we don't have a good way to do this yet - hack */
287 return physids_promote(0x0F);
288}
289
290static inline physid_mask_t summit_apicid_to_cpu_present(int apicid)
291{
292 return physid_mask_of_physid(0);
293}
294
295static inline void summit_setup_portio_remap(void)
296{
297}
298
299static inline int summit_check_phys_apicid_present(int boot_cpu_physical_apicid)
300{
301 return 1;
302}
303
304static inline unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask)
305{
306 int cpus_found = 0;
307 int num_bits_set;
308 int apicid;
309 int cpu;
310
311 num_bits_set = cpus_weight(*cpumask);
312 /* Return id to all */
313 if (num_bits_set >= nr_cpu_ids)
314 return 0xFF;
315 /*
316 * The cpus in the mask must all be on the apic cluster. If are not
317 * on the same apicid cluster return default value of target_cpus():
318 */
319 cpu = first_cpu(*cpumask);
320 apicid = summit_cpu_to_logical_apicid(cpu);
321
322 while (cpus_found < num_bits_set) {
323 if (cpu_isset(cpu, *cpumask)) {
324 int new_apicid = summit_cpu_to_logical_apicid(cpu);
325
326 if (apicid_cluster(apicid) !=
327 apicid_cluster(new_apicid)) {
328 printk ("%s: Not a valid mask!\n", __func__);
329
330 return 0xFF;
331 }
332 apicid = apicid | new_apicid;
333 cpus_found++;
334 }
335 cpu++;
336 }
337 return apicid;
338}
339
340static inline unsigned int
341summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
342 const struct cpumask *andmask)
343{
344 int apicid = summit_cpu_to_logical_apicid(0);
345 cpumask_var_t cpumask;
346
347 if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
348 return apicid;
349
350 cpumask_and(cpumask, inmask, andmask);
351 cpumask_and(cpumask, cpumask, cpu_online_mask);
352 apicid = summit_cpu_mask_to_apicid(cpumask);
353
354 free_cpumask_var(cpumask);
355
356 return apicid;
357}
358
359/*
360 * cpuid returns the value latched in the HW at reset, not the APIC ID
361 * register's value. For any box whose BIOS changes APIC IDs, like
362 * clustered APIC systems, we must use hard_smp_processor_id.
363 *
364 * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID.
365 */
366static inline int summit_phys_pkg_id(int cpuid_apic, int index_msb)
367{
368 return hard_smp_processor_id() >> index_msb;
369}
370
371static int probe_summit(void)
372{
373 /* probed later in mptable/ACPI hooks */
374 return 0;
375}
376
377static void summit_vector_allocation_domain(int cpu, cpumask_t *retmask)
378{
379 /* Careful. Some cpus do not strictly honor the set of cpus
380 * specified in the interrupt destination when using lowest
381 * priority interrupt delivery mode.
382 *
383 * In particular there was a hyperthreading cpu observed to
384 * deliver interrupts to the wrong hyperthread when only one
385 * hyperthread was specified in the interrupt desitination.
386 */
387 *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
388}
389
390#ifdef CONFIG_X86_SUMMIT_NUMA
35static struct rio_table_hdr *rio_table_hdr __initdata; 391static struct rio_table_hdr *rio_table_hdr __initdata;
36static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; 392static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata;
37static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata; 393static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata;
@@ -186,3 +542,61 @@ void __init setup_summit(void)
186 next_wpeg = 0; 542 next_wpeg = 0;
187 } while (next_wpeg != 0); 543 } while (next_wpeg != 0);
188} 544}
545#endif
546
547struct genapic apic_summit = {
548
549 .name = "summit",
550 .probe = probe_summit,
551 .acpi_madt_oem_check = summit_acpi_madt_oem_check,
552 .apic_id_registered = summit_apic_id_registered,
553
554 .irq_delivery_mode = dest_LowestPrio,
555 /* logical delivery broadcast to all CPUs: */
556 .irq_dest_mode = 1,
557
558 .target_cpus = summit_target_cpus,
559 .disable_esr = 1,
560 .dest_logical = APIC_DEST_LOGICAL,
561 .check_apicid_used = summit_check_apicid_used,
562 .check_apicid_present = summit_check_apicid_present,
563
564 .vector_allocation_domain = summit_vector_allocation_domain,
565 .init_apic_ldr = summit_init_apic_ldr,
566
567 .ioapic_phys_id_map = summit_ioapic_phys_id_map,
568 .setup_apic_routing = summit_setup_apic_routing,
569 .multi_timer_check = NULL,
570 .apicid_to_node = summit_apicid_to_node,
571 .cpu_to_logical_apicid = summit_cpu_to_logical_apicid,
572 .cpu_present_to_apicid = summit_cpu_present_to_apicid,
573 .apicid_to_cpu_present = summit_apicid_to_cpu_present,
574 .setup_portio_remap = NULL,
575 .check_phys_apicid_present = summit_check_phys_apicid_present,
576 .enable_apic_mode = NULL,
577 .phys_pkg_id = summit_phys_pkg_id,
578 .mps_oem_check = summit_mps_oem_check,
579
580 .get_apic_id = summit_get_apic_id,
581 .set_apic_id = NULL,
582 .apic_id_mask = 0xFF << 24,
583
584 .cpu_mask_to_apicid = summit_cpu_mask_to_apicid,
585 .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and,
586
587 .send_IPI_mask = summit_send_IPI_mask,
588 .send_IPI_mask_allbutself = NULL,
589 .send_IPI_allbutself = summit_send_IPI_allbutself,
590 .send_IPI_all = summit_send_IPI_all,
591 .send_IPI_self = default_send_IPI_self,
592
593 .wakeup_cpu = NULL,
594 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
595 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
596
597 .wait_for_init_deassert = default_wait_for_init_deassert,
598
599 .smp_callin_clear_local_apic = NULL,
600 .store_NMI_vector = NULL,
601 .inquire_remote_apic = default_inquire_remote_apic,
602};
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index 0c4d601bc853..b7607c4f2042 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -1,7 +1,7 @@
1ENTRY(sys_call_table) 1ENTRY(sys_call_table)
2 .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ 2 .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */
3 .long sys_exit 3 .long sys_exit
4 .long sys_fork 4 .long ptregs_fork
5 .long sys_read 5 .long sys_read
6 .long sys_write 6 .long sys_write
7 .long sys_open /* 5 */ 7 .long sys_open /* 5 */
@@ -10,7 +10,7 @@ ENTRY(sys_call_table)
10 .long sys_creat 10 .long sys_creat
11 .long sys_link 11 .long sys_link
12 .long sys_unlink /* 10 */ 12 .long sys_unlink /* 10 */
13 .long sys_execve 13 .long ptregs_execve
14 .long sys_chdir 14 .long sys_chdir
15 .long sys_time 15 .long sys_time
16 .long sys_mknod 16 .long sys_mknod
@@ -109,17 +109,17 @@ ENTRY(sys_call_table)
109 .long sys_newlstat 109 .long sys_newlstat
110 .long sys_newfstat 110 .long sys_newfstat
111 .long sys_uname 111 .long sys_uname
112 .long sys_iopl /* 110 */ 112 .long ptregs_iopl /* 110 */
113 .long sys_vhangup 113 .long sys_vhangup
114 .long sys_ni_syscall /* old "idle" system call */ 114 .long sys_ni_syscall /* old "idle" system call */
115 .long sys_vm86old 115 .long ptregs_vm86old
116 .long sys_wait4 116 .long sys_wait4
117 .long sys_swapoff /* 115 */ 117 .long sys_swapoff /* 115 */
118 .long sys_sysinfo 118 .long sys_sysinfo
119 .long sys_ipc 119 .long sys_ipc
120 .long sys_fsync 120 .long sys_fsync
121 .long sys_sigreturn 121 .long ptregs_sigreturn
122 .long sys_clone /* 120 */ 122 .long ptregs_clone /* 120 */
123 .long sys_setdomainname 123 .long sys_setdomainname
124 .long sys_newuname 124 .long sys_newuname
125 .long sys_modify_ldt 125 .long sys_modify_ldt
@@ -165,14 +165,14 @@ ENTRY(sys_call_table)
165 .long sys_mremap 165 .long sys_mremap
166 .long sys_setresuid16 166 .long sys_setresuid16
167 .long sys_getresuid16 /* 165 */ 167 .long sys_getresuid16 /* 165 */
168 .long sys_vm86 168 .long ptregs_vm86
169 .long sys_ni_syscall /* Old sys_query_module */ 169 .long sys_ni_syscall /* Old sys_query_module */
170 .long sys_poll 170 .long sys_poll
171 .long sys_nfsservctl 171 .long sys_nfsservctl
172 .long sys_setresgid16 /* 170 */ 172 .long sys_setresgid16 /* 170 */
173 .long sys_getresgid16 173 .long sys_getresgid16
174 .long sys_prctl 174 .long sys_prctl
175 .long sys_rt_sigreturn 175 .long ptregs_rt_sigreturn
176 .long sys_rt_sigaction 176 .long sys_rt_sigaction
177 .long sys_rt_sigprocmask /* 175 */ 177 .long sys_rt_sigprocmask /* 175 */
178 .long sys_rt_sigpending 178 .long sys_rt_sigpending
@@ -185,11 +185,11 @@ ENTRY(sys_call_table)
185 .long sys_getcwd 185 .long sys_getcwd
186 .long sys_capget 186 .long sys_capget
187 .long sys_capset /* 185 */ 187 .long sys_capset /* 185 */
188 .long sys_sigaltstack 188 .long ptregs_sigaltstack
189 .long sys_sendfile 189 .long sys_sendfile
190 .long sys_ni_syscall /* reserved for streams1 */ 190 .long sys_ni_syscall /* reserved for streams1 */
191 .long sys_ni_syscall /* reserved for streams2 */ 191 .long sys_ni_syscall /* reserved for streams2 */
192 .long sys_vfork /* 190 */ 192 .long ptregs_vfork /* 190 */
193 .long sys_getrlimit 193 .long sys_getrlimit
194 .long sys_mmap2 194 .long sys_mmap2
195 .long sys_truncate64 195 .long sys_truncate64
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index 3985cac0ed47..764c74e871f2 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -38,7 +38,7 @@
38#include <asm/time.h> 38#include <asm/time.h>
39#include <asm/timer.h> 39#include <asm/timer.h>
40 40
41#include "do_timer.h" 41#include <asm/do_timer.h>
42 42
43int timer_ack; 43int timer_ack;
44 44
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 89fce1b6d01f..f396e61bcb34 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -20,7 +20,7 @@
20#include <asm/tsc.h> 20#include <asm/tsc.h>
21#include <asm/irq_vectors.h> 21#include <asm/irq_vectors.h>
22 22
23#include <mach_apic.h> 23#include <asm/genapic.h>
24 24
25static struct bau_control **uv_bau_table_bases __read_mostly; 25static struct bau_control **uv_bau_table_bases __read_mostly;
26static int uv_bau_retry_limit __read_mostly; 26static int uv_bau_retry_limit __read_mostly;
@@ -259,7 +259,7 @@ const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
259 * the cpu's, all of which are still in the mask. 259 * the cpu's, all of which are still in the mask.
260 */ 260 */
261 __get_cpu_var(ptcstats).ptc_i++; 261 __get_cpu_var(ptcstats).ptc_i++;
262 return 0; 262 return flush_mask;
263 } 263 }
264 264
265 /* 265 /*
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S
index 894293c598db..95a012a4664e 100644
--- a/arch/x86/kernel/trampoline_64.S
+++ b/arch/x86/kernel/trampoline_64.S
@@ -29,6 +29,7 @@
29#include <asm/page.h> 29#include <asm/page.h>
30#include <asm/msr.h> 30#include <asm/msr.h>
31#include <asm/segment.h> 31#include <asm/segment.h>
32#include <asm/processor-flags.h>
32 33
33.section .rodata, "a", @progbits 34.section .rodata, "a", @progbits
34 35
@@ -37,7 +38,7 @@
37ENTRY(trampoline_data) 38ENTRY(trampoline_data)
38r_base = . 39r_base = .
39 cli # We should be safe anyway 40 cli # We should be safe anyway
40 wbinvd 41 wbinvd
41 mov %cs, %ax # Code and data in the same place 42 mov %cs, %ax # Code and data in the same place
42 mov %ax, %ds 43 mov %ax, %ds
43 mov %ax, %es 44 mov %ax, %es
@@ -73,9 +74,8 @@ r_base = .
73 lidtl tidt - r_base # load idt with 0, 0 74 lidtl tidt - r_base # load idt with 0, 0
74 lgdtl tgdt - r_base # load gdt with whatever is appropriate 75 lgdtl tgdt - r_base # load gdt with whatever is appropriate
75 76
76 xor %ax, %ax 77 mov $X86_CR0_PE, %ax # protected mode (PE) bit
77 inc %ax # protected mode (PE) bit 78 lmsw %ax # into protected mode
78 lmsw %ax # into protected mode
79 79
80 # flush prefetch and jump to startup_32 80 # flush prefetch and jump to startup_32
81 ljmpl *(startup_32_vector - r_base) 81 ljmpl *(startup_32_vector - r_base)
@@ -86,9 +86,8 @@ startup_32:
86 movl $__KERNEL_DS, %eax # Initialize the %ds segment register 86 movl $__KERNEL_DS, %eax # Initialize the %ds segment register
87 movl %eax, %ds 87 movl %eax, %ds
88 88
89 xorl %eax, %eax 89 movl $X86_CR4_PAE, %eax
90 btsl $5, %eax # Enable PAE mode 90 movl %eax, %cr4 # Enable PAE mode
91 movl %eax, %cr4
92 91
93 # Setup trampoline 4 level pagetables 92 # Setup trampoline 4 level pagetables
94 leal (trampoline_level4_pgt - r_base)(%esi), %eax 93 leal (trampoline_level4_pgt - r_base)(%esi), %eax
@@ -99,9 +98,9 @@ startup_32:
99 xorl %edx, %edx 98 xorl %edx, %edx
100 wrmsr 99 wrmsr
101 100
102 xorl %eax, %eax 101 # Enable paging and in turn activate Long Mode
103 btsl $31, %eax # Enable paging and in turn activate Long Mode 102 # Enable protected mode
104 btsl $0, %eax # Enable protected mode 103 movl $(X86_CR0_PG | X86_CR0_PE), %eax
105 movl %eax, %cr0 104 movl %eax, %cr0
106 105
107 /* 106 /*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 17483fe98e9c..6cba7abf3518 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -54,7 +54,7 @@
54#include <asm/desc.h> 54#include <asm/desc.h>
55#include <asm/i387.h> 55#include <asm/i387.h>
56 56
57#include <mach_traps.h> 57#include <asm/mach_traps.h>
58 58
59#ifdef CONFIG_X86_64 59#ifdef CONFIG_X86_64
60#include <asm/pgalloc.h> 60#include <asm/pgalloc.h>
@@ -905,19 +905,20 @@ void math_emulate(struct math_emu_info *info)
905} 905}
906#endif /* CONFIG_MATH_EMULATION */ 906#endif /* CONFIG_MATH_EMULATION */
907 907
908dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs) 908dotraplinkage void __kprobes
909do_device_not_available(struct pt_regs *regs, long error_code)
909{ 910{
910#ifdef CONFIG_X86_32 911#ifdef CONFIG_X86_32
911 if (read_cr0() & X86_CR0_EM) { 912 if (read_cr0() & X86_CR0_EM) {
912 struct math_emu_info info = { }; 913 struct math_emu_info info = { };
913 914
914 conditional_sti(&regs); 915 conditional_sti(regs);
915 916
916 info.regs = &regs; 917 info.regs = regs;
917 math_emulate(&info); 918 math_emulate(&info);
918 } else { 919 } else {
919 math_state_restore(); /* interrupts still off */ 920 math_state_restore(); /* interrupts still off */
920 conditional_sti(&regs); 921 conditional_sti(regs);
921 } 922 }
922#else 923#else
923 math_state_restore(); 924 math_state_restore();
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 599e58168631..83d53ce5d4c4 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -773,7 +773,7 @@ __cpuinit int unsynchronized_tsc(void)
773 if (!cpu_has_tsc || tsc_unstable) 773 if (!cpu_has_tsc || tsc_unstable)
774 return 1; 774 return 1;
775 775
776#ifdef CONFIG_X86_SMP 776#ifdef CONFIG_SMP
777 if (apic_is_clustered_box()) 777 if (apic_is_clustered_box())
778 return 1; 778 return 1;
779#endif 779#endif
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index d801d06af068..4fd646e6dd43 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -32,9 +32,9 @@
32#include <asm/e820.h> 32#include <asm/e820.h>
33#include <asm/io.h> 33#include <asm/io.h>
34 34
35#include <mach_ipi.h> 35#include <asm/genapic.h>
36 36
37#include "mach_apic.h" 37#include <asm/genapic.h>
38 38
39#include <linux/kernel_stat.h> 39#include <linux/kernel_stat.h>
40 40
@@ -200,7 +200,7 @@ static void __init MP_processor_info(struct mpc_cpu *m)
200 return; 200 return;
201 } 201 }
202 202
203 apic_cpus = apicid_to_cpu_present(m->apicid); 203 apic_cpus = apic->apicid_to_cpu_present(m->apicid);
204 physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); 204 physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus);
205 /* 205 /*
206 * Validate version 206 * Validate version
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 4eeb5cf9720d..d7ac84e7fc1c 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -158,7 +158,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
158 ret = KVM86->regs32; 158 ret = KVM86->regs32;
159 159
160 ret->fs = current->thread.saved_fs; 160 ret->fs = current->thread.saved_fs;
161 loadsegment(gs, current->thread.saved_gs); 161 set_user_gs(ret, current->thread.saved_gs);
162 162
163 return ret; 163 return ret;
164} 164}
@@ -197,9 +197,9 @@ out:
197static int do_vm86_irq_handling(int subfunction, int irqnumber); 197static int do_vm86_irq_handling(int subfunction, int irqnumber);
198static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); 198static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk);
199 199
200asmlinkage int sys_vm86old(struct pt_regs regs) 200int sys_vm86old(struct pt_regs *regs)
201{ 201{
202 struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.bx; 202 struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs->bx;
203 struct kernel_vm86_struct info; /* declare this _on top_, 203 struct kernel_vm86_struct info; /* declare this _on top_,
204 * this avoids wasting of stack space. 204 * this avoids wasting of stack space.
205 * This remains on the stack until we 205 * This remains on the stack until we
@@ -218,7 +218,7 @@ asmlinkage int sys_vm86old(struct pt_regs regs)
218 if (tmp) 218 if (tmp)
219 goto out; 219 goto out;
220 memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); 220 memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus);
221 info.regs32 = &regs; 221 info.regs32 = regs;
222 tsk->thread.vm86_info = v86; 222 tsk->thread.vm86_info = v86;
223 do_sys_vm86(&info, tsk); 223 do_sys_vm86(&info, tsk);
224 ret = 0; /* we never return here */ 224 ret = 0; /* we never return here */
@@ -227,7 +227,7 @@ out:
227} 227}
228 228
229 229
230asmlinkage int sys_vm86(struct pt_regs regs) 230int sys_vm86(struct pt_regs *regs)
231{ 231{
232 struct kernel_vm86_struct info; /* declare this _on top_, 232 struct kernel_vm86_struct info; /* declare this _on top_,
233 * this avoids wasting of stack space. 233 * this avoids wasting of stack space.
@@ -239,12 +239,12 @@ asmlinkage int sys_vm86(struct pt_regs regs)
239 struct vm86plus_struct __user *v86; 239 struct vm86plus_struct __user *v86;
240 240
241 tsk = current; 241 tsk = current;
242 switch (regs.bx) { 242 switch (regs->bx) {
243 case VM86_REQUEST_IRQ: 243 case VM86_REQUEST_IRQ:
244 case VM86_FREE_IRQ: 244 case VM86_FREE_IRQ:
245 case VM86_GET_IRQ_BITS: 245 case VM86_GET_IRQ_BITS:
246 case VM86_GET_AND_RESET_IRQ: 246 case VM86_GET_AND_RESET_IRQ:
247 ret = do_vm86_irq_handling(regs.bx, (int)regs.cx); 247 ret = do_vm86_irq_handling(regs->bx, (int)regs->cx);
248 goto out; 248 goto out;
249 case VM86_PLUS_INSTALL_CHECK: 249 case VM86_PLUS_INSTALL_CHECK:
250 /* 250 /*
@@ -261,14 +261,14 @@ asmlinkage int sys_vm86(struct pt_regs regs)
261 ret = -EPERM; 261 ret = -EPERM;
262 if (tsk->thread.saved_sp0) 262 if (tsk->thread.saved_sp0)
263 goto out; 263 goto out;
264 v86 = (struct vm86plus_struct __user *)regs.cx; 264 v86 = (struct vm86plus_struct __user *)regs->cx;
265 tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, 265 tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
266 offsetof(struct kernel_vm86_struct, regs32) - 266 offsetof(struct kernel_vm86_struct, regs32) -
267 sizeof(info.regs)); 267 sizeof(info.regs));
268 ret = -EFAULT; 268 ret = -EFAULT;
269 if (tmp) 269 if (tmp)
270 goto out; 270 goto out;
271 info.regs32 = &regs; 271 info.regs32 = regs;
272 info.vm86plus.is_vm86pus = 1; 272 info.vm86plus.is_vm86pus = 1;
273 tsk->thread.vm86_info = (struct vm86_struct __user *)v86; 273 tsk->thread.vm86_info = (struct vm86_struct __user *)v86;
274 do_sys_vm86(&info, tsk); 274 do_sys_vm86(&info, tsk);
@@ -323,7 +323,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
323 info->regs32->ax = 0; 323 info->regs32->ax = 0;
324 tsk->thread.saved_sp0 = tsk->thread.sp0; 324 tsk->thread.saved_sp0 = tsk->thread.sp0;
325 tsk->thread.saved_fs = info->regs32->fs; 325 tsk->thread.saved_fs = info->regs32->fs;
326 savesegment(gs, tsk->thread.saved_gs); 326 tsk->thread.saved_gs = get_user_gs(info->regs32);
327 327
328 tss = &per_cpu(init_tss, get_cpu()); 328 tss = &per_cpu(init_tss, get_cpu());
329 tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; 329 tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index bef58b4982db..f052c84ecbe4 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -680,10 +680,11 @@ static inline int __init activate_vmi(void)
680 para_fill(pv_mmu_ops.write_cr2, SetCR2); 680 para_fill(pv_mmu_ops.write_cr2, SetCR2);
681 para_fill(pv_mmu_ops.write_cr3, SetCR3); 681 para_fill(pv_mmu_ops.write_cr3, SetCR3);
682 para_fill(pv_cpu_ops.write_cr4, SetCR4); 682 para_fill(pv_cpu_ops.write_cr4, SetCR4);
683 para_fill(pv_irq_ops.save_fl, GetInterruptMask); 683
684 para_fill(pv_irq_ops.restore_fl, SetInterruptMask); 684 para_fill(pv_irq_ops.save_fl.func, GetInterruptMask);
685 para_fill(pv_irq_ops.irq_disable, DisableInterrupts); 685 para_fill(pv_irq_ops.restore_fl.func, SetInterruptMask);
686 para_fill(pv_irq_ops.irq_enable, EnableInterrupts); 686 para_fill(pv_irq_ops.irq_disable.func, DisableInterrupts);
687 para_fill(pv_irq_ops.irq_enable.func, EnableInterrupts);
687 688
688 para_fill(pv_cpu_ops.wbinvd, WBINVD); 689 para_fill(pv_cpu_ops.wbinvd, WBINVD);
689 para_fill(pv_cpu_ops.read_tsc, RDTSC); 690 para_fill(pv_cpu_ops.read_tsc, RDTSC);
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index c4c1f9e09402..a4791ef412d1 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -256,7 +256,7 @@ void __devinit vmi_time_bsp_init(void)
256 */ 256 */
257 clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); 257 clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
258 local_irq_disable(); 258 local_irq_disable();
259#ifdef CONFIG_X86_SMP 259#ifdef CONFIG_SMP
260 /* 260 /*
261 * XXX handle_percpu_irq only defined for SMP; we need to switch over 261 * XXX handle_percpu_irq only defined for SMP; we need to switch over
262 * to using it, since this is a local interrupt, which each CPU must 262 * to using it, since this is a local interrupt, which each CPU must
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index c9740996430a..087a7f2c639b 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -22,6 +22,7 @@ PHDRS {
22#ifdef CONFIG_SMP 22#ifdef CONFIG_SMP
23 percpu PT_LOAD FLAGS(7); /* RWE */ 23 percpu PT_LOAD FLAGS(7); /* RWE */
24#endif 24#endif
25 data.init2 PT_LOAD FLAGS(7); /* RWE */
25 note PT_NOTE FLAGS(0); /* ___ */ 26 note PT_NOTE FLAGS(0); /* ___ */
26} 27}
27SECTIONS 28SECTIONS
@@ -215,7 +216,7 @@ SECTIONS
215 /* 216 /*
216 * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the 217 * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
217 * output PHDR, so the next output section - __data_nosave - should 218 * output PHDR, so the next output section - __data_nosave - should
218 * switch it back to data.init. Also, pda should be at the head of 219 * start another section data.init2. Also, pda should be at the head of
219 * percpu area. Preallocate it and define the percpu offset symbol 220 * percpu area. Preallocate it and define the percpu offset symbol
220 * so that it can be accessed as a percpu variable. 221 * so that it can be accessed as a percpu variable.
221 */ 222 */
@@ -232,7 +233,7 @@ SECTIONS
232 __nosave_begin = .; 233 __nosave_begin = .;
233 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { 234 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
234 *(.data.nosave) 235 *(.data.nosave)
235 } :data.init /* switch back to data.init, see PERCPU_VADDR() above */ 236 } :data.init2 /* use another section data.init2, see PERCPU_VADDR() above */
236 . = ALIGN(PAGE_SIZE); 237 . = ALIGN(PAGE_SIZE);
237 __nosave_end = .; 238 __nosave_end = .;
238 239
@@ -256,6 +257,14 @@ SECTIONS
256 DWARF_DEBUG 257 DWARF_DEBUG
257} 258}
258 259
260 /*
261 * Per-cpu symbols which need to be offset from __per_cpu_load
262 * for the boot processor.
263 */
264#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load
265INIT_PER_CPU(gdt_page);
266INIT_PER_CPU(irq_stack_union);
267
259/* 268/*
260 * Build-time check on the image size: 269 * Build-time check on the image size:
261 */ 270 */
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index a688f3bfaec2..c609205df594 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -37,6 +37,7 @@ static unsigned long vsmp_save_fl(void)
37 flags &= ~X86_EFLAGS_IF; 37 flags &= ~X86_EFLAGS_IF;
38 return flags; 38 return flags;
39} 39}
40PV_CALLEE_SAVE_REGS_THUNK(vsmp_save_fl);
40 41
41static void vsmp_restore_fl(unsigned long flags) 42static void vsmp_restore_fl(unsigned long flags)
42{ 43{
@@ -46,6 +47,7 @@ static void vsmp_restore_fl(unsigned long flags)
46 flags |= X86_EFLAGS_AC; 47 flags |= X86_EFLAGS_AC;
47 native_restore_fl(flags); 48 native_restore_fl(flags);
48} 49}
50PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl);
49 51
50static void vsmp_irq_disable(void) 52static void vsmp_irq_disable(void)
51{ 53{
@@ -53,6 +55,7 @@ static void vsmp_irq_disable(void)
53 55
54 native_restore_fl((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC); 56 native_restore_fl((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC);
55} 57}
58PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable);
56 59
57static void vsmp_irq_enable(void) 60static void vsmp_irq_enable(void)
58{ 61{
@@ -60,6 +63,7 @@ static void vsmp_irq_enable(void)
60 63
61 native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); 64 native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC));
62} 65}
66PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_enable);
63 67
64static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf, 68static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf,
65 unsigned long addr, unsigned len) 69 unsigned long addr, unsigned len)
@@ -90,10 +94,10 @@ static void __init set_vsmp_pv_ops(void)
90 cap, ctl); 94 cap, ctl);
91 if (cap & ctl & (1 << 4)) { 95 if (cap & ctl & (1 << 4)) {
92 /* Setup irq ops and turn on vSMP IRQ fastpath handling */ 96 /* Setup irq ops and turn on vSMP IRQ fastpath handling */
93 pv_irq_ops.irq_disable = vsmp_irq_disable; 97 pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable);
94 pv_irq_ops.irq_enable = vsmp_irq_enable; 98 pv_irq_ops.irq_enable = PV_CALLEE_SAVE(vsmp_irq_enable);
95 pv_irq_ops.save_fl = vsmp_save_fl; 99 pv_irq_ops.save_fl = PV_CALLEE_SAVE(vsmp_save_fl);
96 pv_irq_ops.restore_fl = vsmp_restore_fl; 100 pv_irq_ops.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl);
97 pv_init_ops.patch = vsmp_patch; 101 pv_init_ops.patch = vsmp_patch;
98 102
99 ctl &= ~(1 << 4); 103 ctl &= ~(1 << 4);