aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@zytor.com>2009-02-24 19:11:51 -0500
committerH. Peter Anvin <hpa@zytor.com>2009-02-24 19:11:51 -0500
commit638bee71c83a2837b48062fdc5b222163cf53d79 (patch)
tree9c1699c07a5aa547d769138317279b8ee1ba89e8 /arch/x86/kernel
parent2aaa822984b97efd894d10d1e1382206ef0291a7 (diff)
parenta852cbfaaf8122827602027b1614971cfd832304 (diff)
Merge branch 'x86/core' into x86/mce2
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile40
-rw-r--r--arch/x86/kernel/acpi/boot.c169
-rw-r--r--arch/x86/kernel/acpi/realmode/wakeup.S4
-rw-r--r--arch/x86/kernel/acpi/sleep.c1
-rw-r--r--arch/x86/kernel/acpi/wakeup_32.S2
-rw-r--r--arch/x86/kernel/acpi/wakeup_64.S4
-rw-r--r--arch/x86/kernel/apic/Makefile19
-rw-r--r--arch/x86/kernel/apic/apic.c (renamed from arch/x86/kernel/apic.c)299
-rw-r--r--arch/x86/kernel/apic/apic_flat_64.c (renamed from arch/x86/kernel/genapic_flat_64.c)194
-rw-r--r--arch/x86/kernel/apic/bigsmp_32.c274
-rw-r--r--arch/x86/kernel/apic/es7000_32.c757
-rw-r--r--arch/x86/kernel/apic/io_apic.c (renamed from arch/x86/kernel/io_apic.c)443
-rw-r--r--arch/x86/kernel/apic/ipi.c164
-rw-r--r--arch/x86/kernel/apic/nmi.c (renamed from arch/x86/kernel/nmi.c)12
-rw-r--r--arch/x86/kernel/apic/numaq_32.c565
-rw-r--r--arch/x86/kernel/apic/probe_32.c295
-rw-r--r--arch/x86/kernel/apic/probe_64.c96
-rw-r--r--arch/x86/kernel/apic/summit_32.c601
-rw-r--r--arch/x86/kernel/apic/x2apic_cluster.c (renamed from arch/x86/kernel/genx2apic_cluster.c)154
-rw-r--r--arch/x86/kernel/apic/x2apic_phys.c (renamed from arch/x86/kernel/genx2apic_phys.c)151
-rw-r--r--arch/x86/kernel/apic/x2apic_uv_x.c (renamed from arch/x86/kernel/genx2apic_uv_x.c)123
-rw-r--r--arch/x86/kernel/apm_32.c2
-rw-r--r--arch/x86/kernel/asm-offsets_32.c1
-rw-r--r--arch/x86/kernel/asm-offsets_64.c11
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c54
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/common.c257
-rw-r--r--arch/x86/kernel/cpu/cpufreq/e_powersaver.c6
-rw-r--r--arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c6
-rw-r--r--arch/x86/kernel/cpu/intel.c17
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c63
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c21
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c7
-rw-r--r--arch/x86/kernel/cpu/mcheck/p4.c4
-rw-r--r--arch/x86/kernel/cpu/perfctr-watchdog.c2
-rw-r--r--arch/x86/kernel/crash.c4
-rw-r--r--arch/x86/kernel/dumpstack.c2
-rw-r--r--arch/x86/kernel/dumpstack_64.c35
-rw-r--r--arch/x86/kernel/early_printk.c2
-rw-r--r--arch/x86/kernel/efi.c2
-rw-r--r--arch/x86/kernel/efi_64.c1
-rw-r--r--arch/x86/kernel/efi_stub_32.S3
-rw-r--r--arch/x86/kernel/efi_stub_64.S7
-rw-r--r--arch/x86/kernel/entry_32.S455
-rw-r--r--arch/x86/kernel/entry_64.S72
-rw-r--r--arch/x86/kernel/es7000_32.c378
-rw-r--r--arch/x86/kernel/genapic_64.c82
-rw-r--r--arch/x86/kernel/head64.c23
-rw-r--r--arch/x86/kernel/head_32.S44
-rw-r--r--arch/x86/kernel/head_64.S23
-rw-r--r--arch/x86/kernel/i8259.c1
-rw-r--r--arch/x86/kernel/ioport.c3
-rw-r--r--arch/x86/kernel/ipi.c190
-rw-r--r--arch/x86/kernel/irq.c44
-rw-r--r--arch/x86/kernel/irq_32.c32
-rw-r--r--arch/x86/kernel/irq_64.c43
-rw-r--r--arch/x86/kernel/irqinit_32.c36
-rw-r--r--arch/x86/kernel/kgdb.c4
-rw-r--r--arch/x86/kernel/kvmclock.c1
-rw-r--r--arch/x86/kernel/machine_kexec_32.c2
-rw-r--r--arch/x86/kernel/machine_kexec_64.c82
-rw-r--r--arch/x86/kernel/mca_32.c5
-rw-r--r--arch/x86/kernel/microcode_intel.c10
-rw-r--r--arch/x86/kernel/module_32.c6
-rw-r--r--arch/x86/kernel/module_64.c32
-rw-r--r--arch/x86/kernel/mpparse.c189
-rw-r--r--arch/x86/kernel/msr.c2
-rw-r--r--arch/x86/kernel/numaq_32.c293
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c10
-rw-r--r--arch/x86/kernel/paravirt.c56
-rw-r--r--arch/x86/kernel/paravirt_patch_32.c12
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c15
-rw-r--r--arch/x86/kernel/probe_roms_32.c2
-rw-r--r--arch/x86/kernel/process.c2
-rw-r--r--arch/x86/kernel/process_32.c59
-rw-r--r--arch/x86/kernel/process_64.c42
-rw-r--r--arch/x86/kernel/ptrace.c21
-rw-r--r--arch/x86/kernel/reboot.c5
-rw-r--r--arch/x86/kernel/relocate_kernel_32.S2
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S129
-rw-r--r--arch/x86/kernel/setup.c133
-rw-r--r--arch/x86/kernel/setup_percpu.c414
-rw-r--r--arch/x86/kernel/signal.c346
-rw-r--r--arch/x86/kernel/smp.c15
-rw-r--r--arch/x86/kernel/smpboot.c132
-rw-r--r--arch/x86/kernel/smpcommon.c30
-rw-r--r--arch/x86/kernel/stacktrace.c2
-rw-r--r--arch/x86/kernel/summit_32.c188
-rw-r--r--arch/x86/kernel/syscall_table_32.S20
-rw-r--r--arch/x86/kernel/time_32.c8
-rw-r--r--arch/x86/kernel/tlb_32.c256
-rw-r--r--arch/x86/kernel/tlb_64.c284
-rw-r--r--arch/x86/kernel/tlb_uv.c74
-rw-r--r--arch/x86/kernel/trampoline_32.S2
-rw-r--r--arch/x86/kernel/trampoline_64.S23
-rw-r--r--arch/x86/kernel/traps.c18
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/kernel/visws_quirks.c10
-rw-r--r--arch/x86/kernel/vm86_32.c20
-rw-r--r--arch/x86/kernel/vmi_32.c13
-rw-r--r--arch/x86/kernel/vmiclock_32.c6
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S11
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S37
-rw-r--r--arch/x86/kernel/vsmp_64.c12
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c2
105 files changed, 5221 insertions, 4120 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index d364df03c1d6..de5657c039e9 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -23,11 +23,12 @@ nostackp := $(call cc-option, -fno-stack-protector)
23CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) 23CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
24CFLAGS_hpet.o := $(nostackp) 24CFLAGS_hpet.o := $(nostackp)
25CFLAGS_tsc.o := $(nostackp) 25CFLAGS_tsc.o := $(nostackp)
26CFLAGS_paravirt.o := $(nostackp)
26 27
27obj-y := process_$(BITS).o signal.o entry_$(BITS).o 28obj-y := process_$(BITS).o signal.o entry_$(BITS).o
28obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 29obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
29obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o 30obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o
30obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o 31obj-y += setup.o i8259.o irqinit_$(BITS).o
31obj-$(CONFIG_X86_VISWS) += visws_quirks.o 32obj-$(CONFIG_X86_VISWS) += visws_quirks.o
32obj-$(CONFIG_X86_32) += probe_roms_32.o 33obj-$(CONFIG_X86_32) += probe_roms_32.o
33obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 34obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
@@ -49,30 +50,26 @@ obj-y += step.o
49obj-$(CONFIG_STACKTRACE) += stacktrace.o 50obj-$(CONFIG_STACKTRACE) += stacktrace.o
50obj-y += cpu/ 51obj-y += cpu/
51obj-y += acpi/ 52obj-y += acpi/
52obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o 53obj-y += reboot.o
53obj-$(CONFIG_MCA) += mca_32.o 54obj-$(CONFIG_MCA) += mca_32.o
54obj-$(CONFIG_X86_MSR) += msr.o 55obj-$(CONFIG_X86_MSR) += msr.o
55obj-$(CONFIG_X86_CPUID) += cpuid.o 56obj-$(CONFIG_X86_CPUID) += cpuid.o
56obj-$(CONFIG_PCI) += early-quirks.o 57obj-$(CONFIG_PCI) += early-quirks.o
57apm-y := apm_32.o 58apm-y := apm_32.o
58obj-$(CONFIG_APM) += apm.o 59obj-$(CONFIG_APM) += apm.o
59obj-$(CONFIG_X86_SMP) += smp.o 60obj-$(CONFIG_SMP) += smp.o
60obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o 61obj-$(CONFIG_SMP) += smpboot.o tsc_sync.o
61obj-$(CONFIG_X86_32_SMP) += smpcommon.o 62obj-$(CONFIG_SMP) += setup_percpu.o
62obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o 63obj-$(CONFIG_X86_64_SMP) += tsc_sync.o
63obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o 64obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o
64obj-$(CONFIG_X86_MPPARSE) += mpparse.o 65obj-$(CONFIG_X86_MPPARSE) += mpparse.o
65obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o 66obj-y += apic/
66obj-$(CONFIG_X86_IO_APIC) += io_apic.o
67obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o 67obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
68obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o 68obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
69obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o 69obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
70obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 70obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
71obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 71obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
72obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 72obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
73obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
74obj-$(CONFIG_X86_ES7000) += es7000_32.o
75obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o
76obj-y += vsmp_64.o 73obj-y += vsmp_64.o
77obj-$(CONFIG_KPROBES) += kprobes.o 74obj-$(CONFIG_KPROBES) += kprobes.o
78obj-$(CONFIG_MODULES) += module_$(BITS).o 75obj-$(CONFIG_MODULES) += module_$(BITS).o
@@ -114,16 +111,13 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64
114### 111###
115# 64 bit specific files 112# 64 bit specific files
116ifeq ($(CONFIG_X86_64),y) 113ifeq ($(CONFIG_X86_64),y)
117 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o 114 obj-$(CONFIG_X86_UV) += tlb_uv.o bios_uv.o uv_irq.o uv_sysfs.o
118 obj-y += bios_uv.o uv_irq.o uv_sysfs.o 115 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
119 obj-y += genx2apic_cluster.o 116 obj-$(CONFIG_AUDIT) += audit_64.o
120 obj-y += genx2apic_phys.o 117
121 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o 118 obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o
122 obj-$(CONFIG_AUDIT) += audit_64.o 119 obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o
123 120 obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o
124 obj-$(CONFIG_GART_IOMMU) += pci-gart_64.o aperture_64.o 121
125 obj-$(CONFIG_CALGARY_IOMMU) += pci-calgary_64.o tce_64.o 122 obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o
126 obj-$(CONFIG_AMD_IOMMU) += amd_iommu_init.o amd_iommu.o
127
128 obj-$(CONFIG_PCI_MMCONFIG) += mmconf-fam10h_64.o
129endif 123endif
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 7678f10c4568..a18eb7ce2236 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -37,15 +37,10 @@
37#include <asm/pgtable.h> 37#include <asm/pgtable.h>
38#include <asm/io_apic.h> 38#include <asm/io_apic.h>
39#include <asm/apic.h> 39#include <asm/apic.h>
40#include <asm/genapic.h>
41#include <asm/io.h> 40#include <asm/io.h>
42#include <asm/mpspec.h> 41#include <asm/mpspec.h>
43#include <asm/smp.h> 42#include <asm/smp.h>
44 43
45#ifdef CONFIG_X86_LOCAL_APIC
46# include <mach_apic.h>
47#endif
48
49static int __initdata acpi_force = 0; 44static int __initdata acpi_force = 0;
50u32 acpi_rsdt_forced; 45u32 acpi_rsdt_forced;
51#ifdef CONFIG_ACPI 46#ifdef CONFIG_ACPI
@@ -56,16 +51,7 @@ int acpi_disabled = 1;
56EXPORT_SYMBOL(acpi_disabled); 51EXPORT_SYMBOL(acpi_disabled);
57 52
58#ifdef CONFIG_X86_64 53#ifdef CONFIG_X86_64
59 54# include <asm/proto.h>
60#include <asm/proto.h>
61
62#else /* X86 */
63
64#ifdef CONFIG_X86_LOCAL_APIC
65#include <mach_apic.h>
66#include <mach_mpparse.h>
67#endif /* CONFIG_X86_LOCAL_APIC */
68
69#endif /* X86 */ 55#endif /* X86 */
70 56
71#define BAD_MADT_ENTRY(entry, end) ( \ 57#define BAD_MADT_ENTRY(entry, end) ( \
@@ -121,35 +107,18 @@ enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC;
121 */ 107 */
122char *__init __acpi_map_table(unsigned long phys, unsigned long size) 108char *__init __acpi_map_table(unsigned long phys, unsigned long size)
123{ 109{
124 unsigned long base, offset, mapped_size;
125 int idx;
126 110
127 if (!phys || !size) 111 if (!phys || !size)
128 return NULL; 112 return NULL;
129 113
130 if (phys+size <= (max_low_pfn_mapped << PAGE_SHIFT)) 114 return early_ioremap(phys, size);
131 return __va(phys); 115}
132 116void __init __acpi_unmap_table(char *map, unsigned long size)
133 offset = phys & (PAGE_SIZE - 1); 117{
134 mapped_size = PAGE_SIZE - offset; 118 if (!map || !size)
135 clear_fixmap(FIX_ACPI_END); 119 return;
136 set_fixmap(FIX_ACPI_END, phys);
137 base = fix_to_virt(FIX_ACPI_END);
138
139 /*
140 * Most cases can be covered by the below.
141 */
142 idx = FIX_ACPI_END;
143 while (mapped_size < size) {
144 if (--idx < FIX_ACPI_BEGIN)
145 return NULL; /* cannot handle this */
146 phys += PAGE_SIZE;
147 clear_fixmap(idx);
148 set_fixmap(idx, phys);
149 mapped_size += PAGE_SIZE;
150 }
151 120
152 return ((unsigned char *)base + offset); 121 early_iounmap(map, size);
153} 122}
154 123
155#ifdef CONFIG_PCI_MMCONFIG 124#ifdef CONFIG_PCI_MMCONFIG
@@ -239,7 +208,8 @@ static int __init acpi_parse_madt(struct acpi_table_header *table)
239 madt->address); 208 madt->address);
240 } 209 }
241 210
242 acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id); 211 default_acpi_madt_oem_check(madt->header.oem_id,
212 madt->header.oem_table_id);
243 213
244 return 0; 214 return 0;
245} 215}
@@ -884,7 +854,7 @@ static struct {
884 DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); 854 DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
885} mp_ioapic_routing[MAX_IO_APICS]; 855} mp_ioapic_routing[MAX_IO_APICS];
886 856
887static int mp_find_ioapic(int gsi) 857int mp_find_ioapic(int gsi)
888{ 858{
889 int i = 0; 859 int i = 0;
890 860
@@ -899,6 +869,16 @@ static int mp_find_ioapic(int gsi)
899 return -1; 869 return -1;
900} 870}
901 871
872int mp_find_ioapic_pin(int ioapic, int gsi)
873{
874 if (WARN_ON(ioapic == -1))
875 return -1;
876 if (WARN_ON(gsi > mp_ioapic_routing[ioapic].gsi_end))
877 return -1;
878
879 return gsi - mp_ioapic_routing[ioapic].gsi_base;
880}
881
902static u8 __init uniq_ioapic_id(u8 id) 882static u8 __init uniq_ioapic_id(u8 id)
903{ 883{
904#ifdef CONFIG_X86_32 884#ifdef CONFIG_X86_32
@@ -912,8 +892,8 @@ static u8 __init uniq_ioapic_id(u8 id)
912 DECLARE_BITMAP(used, 256); 892 DECLARE_BITMAP(used, 256);
913 bitmap_zero(used, 256); 893 bitmap_zero(used, 256);
914 for (i = 0; i < nr_ioapics; i++) { 894 for (i = 0; i < nr_ioapics; i++) {
915 struct mp_config_ioapic *ia = &mp_ioapics[i]; 895 struct mpc_ioapic *ia = &mp_ioapics[i];
916 __set_bit(ia->mp_apicid, used); 896 __set_bit(ia->apicid, used);
917 } 897 }
918 if (!test_bit(id, used)) 898 if (!test_bit(id, used))
919 return id; 899 return id;
@@ -945,29 +925,29 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
945 925
946 idx = nr_ioapics; 926 idx = nr_ioapics;
947 927
948 mp_ioapics[idx].mp_type = MP_IOAPIC; 928 mp_ioapics[idx].type = MP_IOAPIC;
949 mp_ioapics[idx].mp_flags = MPC_APIC_USABLE; 929 mp_ioapics[idx].flags = MPC_APIC_USABLE;
950 mp_ioapics[idx].mp_apicaddr = address; 930 mp_ioapics[idx].apicaddr = address;
951 931
952 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); 932 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
953 mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id); 933 mp_ioapics[idx].apicid = uniq_ioapic_id(id);
954#ifdef CONFIG_X86_32 934#ifdef CONFIG_X86_32
955 mp_ioapics[idx].mp_apicver = io_apic_get_version(idx); 935 mp_ioapics[idx].apicver = io_apic_get_version(idx);
956#else 936#else
957 mp_ioapics[idx].mp_apicver = 0; 937 mp_ioapics[idx].apicver = 0;
958#endif 938#endif
959 /* 939 /*
960 * Build basic GSI lookup table to facilitate gsi->io_apic lookups 940 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
961 * and to prevent reprogramming of IOAPIC pins (PCI GSIs). 941 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
962 */ 942 */
963 mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid; 943 mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].apicid;
964 mp_ioapic_routing[idx].gsi_base = gsi_base; 944 mp_ioapic_routing[idx].gsi_base = gsi_base;
965 mp_ioapic_routing[idx].gsi_end = gsi_base + 945 mp_ioapic_routing[idx].gsi_end = gsi_base +
966 io_apic_get_redir_entries(idx); 946 io_apic_get_redir_entries(idx);
967 947
968 printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " 948 printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
969 "GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid, 949 "GSI %d-%d\n", idx, mp_ioapics[idx].apicid,
970 mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr, 950 mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr,
971 mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); 951 mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
972 952
973 nr_ioapics++; 953 nr_ioapics++;
@@ -996,19 +976,19 @@ int __init acpi_probe_gsi(void)
996 return max_gsi + 1; 976 return max_gsi + 1;
997} 977}
998 978
999static void assign_to_mp_irq(struct mp_config_intsrc *m, 979static void assign_to_mp_irq(struct mpc_intsrc *m,
1000 struct mp_config_intsrc *mp_irq) 980 struct mpc_intsrc *mp_irq)
1001{ 981{
1002 memcpy(mp_irq, m, sizeof(struct mp_config_intsrc)); 982 memcpy(mp_irq, m, sizeof(struct mpc_intsrc));
1003} 983}
1004 984
1005static int mp_irq_cmp(struct mp_config_intsrc *mp_irq, 985static int mp_irq_cmp(struct mpc_intsrc *mp_irq,
1006 struct mp_config_intsrc *m) 986 struct mpc_intsrc *m)
1007{ 987{
1008 return memcmp(mp_irq, m, sizeof(struct mp_config_intsrc)); 988 return memcmp(mp_irq, m, sizeof(struct mpc_intsrc));
1009} 989}
1010 990
1011static void save_mp_irq(struct mp_config_intsrc *m) 991static void save_mp_irq(struct mpc_intsrc *m)
1012{ 992{
1013 int i; 993 int i;
1014 994
@@ -1026,7 +1006,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
1026{ 1006{
1027 int ioapic; 1007 int ioapic;
1028 int pin; 1008 int pin;
1029 struct mp_config_intsrc mp_irq; 1009 struct mpc_intsrc mp_irq;
1030 1010
1031 /* 1011 /*
1032 * Convert 'gsi' to 'ioapic.pin'. 1012 * Convert 'gsi' to 'ioapic.pin'.
@@ -1034,7 +1014,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
1034 ioapic = mp_find_ioapic(gsi); 1014 ioapic = mp_find_ioapic(gsi);
1035 if (ioapic < 0) 1015 if (ioapic < 0)
1036 return; 1016 return;
1037 pin = gsi - mp_ioapic_routing[ioapic].gsi_base; 1017 pin = mp_find_ioapic_pin(ioapic, gsi);
1038 1018
1039 /* 1019 /*
1040 * TBD: This check is for faulty timer entries, where the override 1020 * TBD: This check is for faulty timer entries, where the override
@@ -1044,13 +1024,13 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
1044 if ((bus_irq == 0) && (trigger == 3)) 1024 if ((bus_irq == 0) && (trigger == 3))
1045 trigger = 1; 1025 trigger = 1;
1046 1026
1047 mp_irq.mp_type = MP_INTSRC; 1027 mp_irq.type = MP_INTSRC;
1048 mp_irq.mp_irqtype = mp_INT; 1028 mp_irq.irqtype = mp_INT;
1049 mp_irq.mp_irqflag = (trigger << 2) | polarity; 1029 mp_irq.irqflag = (trigger << 2) | polarity;
1050 mp_irq.mp_srcbus = MP_ISA_BUS; 1030 mp_irq.srcbus = MP_ISA_BUS;
1051 mp_irq.mp_srcbusirq = bus_irq; /* IRQ */ 1031 mp_irq.srcbusirq = bus_irq; /* IRQ */
1052 mp_irq.mp_dstapic = mp_ioapics[ioapic].mp_apicid; /* APIC ID */ 1032 mp_irq.dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */
1053 mp_irq.mp_dstirq = pin; /* INTIN# */ 1033 mp_irq.dstirq = pin; /* INTIN# */
1054 1034
1055 save_mp_irq(&mp_irq); 1035 save_mp_irq(&mp_irq);
1056} 1036}
@@ -1060,7 +1040,7 @@ void __init mp_config_acpi_legacy_irqs(void)
1060 int i; 1040 int i;
1061 int ioapic; 1041 int ioapic;
1062 unsigned int dstapic; 1042 unsigned int dstapic;
1063 struct mp_config_intsrc mp_irq; 1043 struct mpc_intsrc mp_irq;
1064 1044
1065#if defined (CONFIG_MCA) || defined (CONFIG_EISA) 1045#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
1066 /* 1046 /*
@@ -1085,7 +1065,7 @@ void __init mp_config_acpi_legacy_irqs(void)
1085 ioapic = mp_find_ioapic(0); 1065 ioapic = mp_find_ioapic(0);
1086 if (ioapic < 0) 1066 if (ioapic < 0)
1087 return; 1067 return;
1088 dstapic = mp_ioapics[ioapic].mp_apicid; 1068 dstapic = mp_ioapics[ioapic].apicid;
1089 1069
1090 /* 1070 /*
1091 * Use the default configuration for the IRQs 0-15. Unless 1071 * Use the default configuration for the IRQs 0-15. Unless
@@ -1095,16 +1075,14 @@ void __init mp_config_acpi_legacy_irqs(void)
1095 int idx; 1075 int idx;
1096 1076
1097 for (idx = 0; idx < mp_irq_entries; idx++) { 1077 for (idx = 0; idx < mp_irq_entries; idx++) {
1098 struct mp_config_intsrc *irq = mp_irqs + idx; 1078 struct mpc_intsrc *irq = mp_irqs + idx;
1099 1079
1100 /* Do we already have a mapping for this ISA IRQ? */ 1080 /* Do we already have a mapping for this ISA IRQ? */
1101 if (irq->mp_srcbus == MP_ISA_BUS 1081 if (irq->srcbus == MP_ISA_BUS && irq->srcbusirq == i)
1102 && irq->mp_srcbusirq == i)
1103 break; 1082 break;
1104 1083
1105 /* Do we already have a mapping for this IOAPIC pin */ 1084 /* Do we already have a mapping for this IOAPIC pin */
1106 if (irq->mp_dstapic == dstapic && 1085 if (irq->dstapic == dstapic && irq->dstirq == i)
1107 irq->mp_dstirq == i)
1108 break; 1086 break;
1109 } 1087 }
1110 1088
@@ -1113,13 +1091,13 @@ void __init mp_config_acpi_legacy_irqs(void)
1113 continue; /* IRQ already used */ 1091 continue; /* IRQ already used */
1114 } 1092 }
1115 1093
1116 mp_irq.mp_type = MP_INTSRC; 1094 mp_irq.type = MP_INTSRC;
1117 mp_irq.mp_irqflag = 0; /* Conforming */ 1095 mp_irq.irqflag = 0; /* Conforming */
1118 mp_irq.mp_srcbus = MP_ISA_BUS; 1096 mp_irq.srcbus = MP_ISA_BUS;
1119 mp_irq.mp_dstapic = dstapic; 1097 mp_irq.dstapic = dstapic;
1120 mp_irq.mp_irqtype = mp_INT; 1098 mp_irq.irqtype = mp_INT;
1121 mp_irq.mp_srcbusirq = i; /* Identity mapped */ 1099 mp_irq.srcbusirq = i; /* Identity mapped */
1122 mp_irq.mp_dstirq = i; 1100 mp_irq.dstirq = i;
1123 1101
1124 save_mp_irq(&mp_irq); 1102 save_mp_irq(&mp_irq);
1125 } 1103 }
@@ -1156,7 +1134,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
1156 return gsi; 1134 return gsi;
1157 } 1135 }
1158 1136
1159 ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; 1137 ioapic_pin = mp_find_ioapic_pin(ioapic, gsi);
1160 1138
1161#ifdef CONFIG_X86_32 1139#ifdef CONFIG_X86_32
1162 if (ioapic_renumber_irq) 1140 if (ioapic_renumber_irq)
@@ -1230,22 +1208,22 @@ int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
1230 u32 gsi, int triggering, int polarity) 1208 u32 gsi, int triggering, int polarity)
1231{ 1209{
1232#ifdef CONFIG_X86_MPPARSE 1210#ifdef CONFIG_X86_MPPARSE
1233 struct mp_config_intsrc mp_irq; 1211 struct mpc_intsrc mp_irq;
1234 int ioapic; 1212 int ioapic;
1235 1213
1236 if (!acpi_ioapic) 1214 if (!acpi_ioapic)
1237 return 0; 1215 return 0;
1238 1216
1239 /* print the entry should happen on mptable identically */ 1217 /* print the entry should happen on mptable identically */
1240 mp_irq.mp_type = MP_INTSRC; 1218 mp_irq.type = MP_INTSRC;
1241 mp_irq.mp_irqtype = mp_INT; 1219 mp_irq.irqtype = mp_INT;
1242 mp_irq.mp_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | 1220 mp_irq.irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
1243 (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); 1221 (polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
1244 mp_irq.mp_srcbus = number; 1222 mp_irq.srcbus = number;
1245 mp_irq.mp_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); 1223 mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
1246 ioapic = mp_find_ioapic(gsi); 1224 ioapic = mp_find_ioapic(gsi);
1247 mp_irq.mp_dstapic = mp_ioapic_routing[ioapic].apic_id; 1225 mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id;
1248 mp_irq.mp_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base; 1226 mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
1249 1227
1250 save_mp_irq(&mp_irq); 1228 save_mp_irq(&mp_irq);
1251#endif 1229#endif
@@ -1372,7 +1350,7 @@ static void __init acpi_process_madt(void)
1372 if (!error) { 1350 if (!error) {
1373 acpi_lapic = 1; 1351 acpi_lapic = 1;
1374 1352
1375#ifdef CONFIG_X86_GENERICARCH 1353#ifdef CONFIG_X86_BIGSMP
1376 generic_bigsmp_probe(); 1354 generic_bigsmp_probe();
1377#endif 1355#endif
1378 /* 1356 /*
@@ -1384,9 +1362,8 @@ static void __init acpi_process_madt(void)
1384 acpi_ioapic = 1; 1362 acpi_ioapic = 1;
1385 1363
1386 smp_found_config = 1; 1364 smp_found_config = 1;
1387#ifdef CONFIG_X86_32 1365 if (apic->setup_apic_routing)
1388 setup_apic_routing(); 1366 apic->setup_apic_routing();
1389#endif
1390 } 1367 }
1391 } 1368 }
1392 if (error == -EINVAL) { 1369 if (error == -EINVAL) {
diff --git a/arch/x86/kernel/acpi/realmode/wakeup.S b/arch/x86/kernel/acpi/realmode/wakeup.S
index 3355973b12ac..580b4e296010 100644
--- a/arch/x86/kernel/acpi/realmode/wakeup.S
+++ b/arch/x86/kernel/acpi/realmode/wakeup.S
@@ -3,8 +3,8 @@
3 */ 3 */
4#include <asm/segment.h> 4#include <asm/segment.h>
5#include <asm/msr-index.h> 5#include <asm/msr-index.h>
6#include <asm/page.h> 6#include <asm/page_types.h>
7#include <asm/pgtable.h> 7#include <asm/pgtable_types.h>
8#include <asm/processor-flags.h> 8#include <asm/processor-flags.h>
9 9
10 .code16 10 .code16
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index a60c1f3bcb87..7c243a2c5115 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -101,6 +101,7 @@ int acpi_save_state_mem(void)
101 stack_start.sp = temp_stack + sizeof(temp_stack); 101 stack_start.sp = temp_stack + sizeof(temp_stack);
102 early_gdt_descr.address = 102 early_gdt_descr.address =
103 (unsigned long)get_cpu_gdt_table(smp_processor_id()); 103 (unsigned long)get_cpu_gdt_table(smp_processor_id());
104 initial_gs = per_cpu_offset(smp_processor_id());
104#endif 105#endif
105 initial_code = (unsigned long)wakeup_long64; 106 initial_code = (unsigned long)wakeup_long64;
106 saved_magic = 0x123456789abcdef0; 107 saved_magic = 0x123456789abcdef0;
diff --git a/arch/x86/kernel/acpi/wakeup_32.S b/arch/x86/kernel/acpi/wakeup_32.S
index a12e6a9fb659..8ded418b0593 100644
--- a/arch/x86/kernel/acpi/wakeup_32.S
+++ b/arch/x86/kernel/acpi/wakeup_32.S
@@ -1,7 +1,7 @@
1 .section .text.page_aligned 1 .section .text.page_aligned
2#include <linux/linkage.h> 2#include <linux/linkage.h>
3#include <asm/segment.h> 3#include <asm/segment.h>
4#include <asm/page.h> 4#include <asm/page_types.h>
5 5
6# Copyright 2003, 2008 Pavel Machek <pavel@suse.cz>, distribute under GPLv2 6# Copyright 2003, 2008 Pavel Machek <pavel@suse.cz>, distribute under GPLv2
7 7
diff --git a/arch/x86/kernel/acpi/wakeup_64.S b/arch/x86/kernel/acpi/wakeup_64.S
index 96258d9dc974..8ea5164cbd04 100644
--- a/arch/x86/kernel/acpi/wakeup_64.S
+++ b/arch/x86/kernel/acpi/wakeup_64.S
@@ -1,8 +1,8 @@
1.text 1.text
2#include <linux/linkage.h> 2#include <linux/linkage.h>
3#include <asm/segment.h> 3#include <asm/segment.h>
4#include <asm/pgtable.h> 4#include <asm/pgtable_types.h>
5#include <asm/page.h> 5#include <asm/page_types.h>
6#include <asm/msr.h> 6#include <asm/msr.h>
7#include <asm/asm-offsets.h> 7#include <asm/asm-offsets.h>
8 8
diff --git a/arch/x86/kernel/apic/Makefile b/arch/x86/kernel/apic/Makefile
new file mode 100644
index 000000000000..da7b7b9f8bd8
--- /dev/null
+++ b/arch/x86/kernel/apic/Makefile
@@ -0,0 +1,19 @@
1#
2# Makefile for local APIC drivers and for the IO-APIC code
3#
4
5obj-$(CONFIG_X86_LOCAL_APIC) += apic.o probe_$(BITS).o ipi.o nmi.o
6obj-$(CONFIG_X86_IO_APIC) += io_apic.o
7obj-$(CONFIG_SMP) += ipi.o
8
9ifeq ($(CONFIG_X86_64),y)
10obj-y += apic_flat_64.o
11obj-$(CONFIG_X86_X2APIC) += x2apic_cluster.o
12obj-$(CONFIG_X86_X2APIC) += x2apic_phys.o
13obj-$(CONFIG_X86_UV) += x2apic_uv_x.o
14endif
15
16obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o
17obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
18obj-$(CONFIG_X86_ES7000) += es7000_32.o
19obj-$(CONFIG_X86_SUMMIT) += summit_32.o
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic/apic.c
index 57b53774d986..30909a258d0f 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic/apic.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Local APIC handling, local APIC timers 2 * Local APIC handling, local APIC timers
3 * 3 *
4 * (c) 1999, 2000 Ingo Molnar <mingo@redhat.com> 4 * (c) 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
5 * 5 *
6 * Fixes 6 * Fixes
7 * Maciej W. Rozycki : Bits for genuine 82489DX APICs; 7 * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
@@ -14,52 +14,70 @@
14 * Mikael Pettersson : PM converted to driver model. 14 * Mikael Pettersson : PM converted to driver model.
15 */ 15 */
16 16
17#include <linux/init.h>
18
19#include <linux/mm.h>
20#include <linux/delay.h>
21#include <linux/bootmem.h>
22#include <linux/interrupt.h>
23#include <linux/mc146818rtc.h>
24#include <linux/kernel_stat.h> 17#include <linux/kernel_stat.h>
25#include <linux/sysdev.h> 18#include <linux/mc146818rtc.h>
26#include <linux/ioport.h>
27#include <linux/cpu.h>
28#include <linux/clockchips.h>
29#include <linux/acpi_pmtmr.h> 19#include <linux/acpi_pmtmr.h>
20#include <linux/clockchips.h>
21#include <linux/interrupt.h>
22#include <linux/bootmem.h>
23#include <linux/ftrace.h>
24#include <linux/ioport.h>
30#include <linux/module.h> 25#include <linux/module.h>
31#include <linux/dmi.h> 26#include <linux/sysdev.h>
27#include <linux/delay.h>
28#include <linux/timex.h>
32#include <linux/dmar.h> 29#include <linux/dmar.h>
33#include <linux/ftrace.h> 30#include <linux/init.h>
34#include <linux/smp.h> 31#include <linux/cpu.h>
32#include <linux/dmi.h>
35#include <linux/nmi.h> 33#include <linux/nmi.h>
36#include <linux/timex.h> 34#include <linux/smp.h>
35#include <linux/mm.h>
37 36
37#include <asm/pgalloc.h>
38#include <asm/atomic.h> 38#include <asm/atomic.h>
39#include <asm/mtrr.h>
40#include <asm/mpspec.h> 39#include <asm/mpspec.h>
41#include <asm/desc.h>
42#include <asm/arch_hooks.h>
43#include <asm/hpet.h>
44#include <asm/pgalloc.h>
45#include <asm/i8253.h> 40#include <asm/i8253.h>
46#include <asm/idle.h> 41#include <asm/i8259.h>
47#include <asm/proto.h> 42#include <asm/proto.h>
48#include <asm/apic.h> 43#include <asm/apic.h>
49#include <asm/i8259.h> 44#include <asm/desc.h>
45#include <asm/hpet.h>
46#include <asm/idle.h>
47#include <asm/mtrr.h>
50#include <asm/smp.h> 48#include <asm/smp.h>
51#include <asm/mce.h> 49#include <asm/mce.h>
52 50
53#include <mach_apic.h> 51unsigned int num_processors;
54#include <mach_apicdef.h> 52
55#include <mach_ipi.h> 53unsigned disabled_cpus __cpuinitdata;
54
55/* Processor that is doing the boot up */
56unsigned int boot_cpu_physical_apicid = -1U;
56 57
57/* 58/*
58 * Sanity check 59 * The highest APIC ID seen during enumeration.
60 *
61 * This determines the messaging protocol we can use: if all APIC IDs
62 * are in the 0 ... 7 range, then we can use logical addressing which
63 * has some performance advantages (better broadcasting).
64 *
65 * If there's an APIC ID above 8, we use physical addressing.
59 */ 66 */
60#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F) 67unsigned int max_physical_apicid;
61# error SPURIOUS_APIC_VECTOR definition error 68
62#endif 69/*
70 * Bitmask of physically existing CPUs:
71 */
72physid_mask_t phys_cpu_present_map;
73
74/*
75 * Map cpu index to physical APIC ID
76 */
77DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
78DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
79EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
80EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
63 81
64#ifdef CONFIG_X86_32 82#ifdef CONFIG_X86_32
65/* 83/*
@@ -93,11 +111,7 @@ static __init int setup_apicpmtimer(char *s)
93__setup("apicpmtimer", setup_apicpmtimer); 111__setup("apicpmtimer", setup_apicpmtimer);
94#endif 112#endif
95 113
96#ifdef CONFIG_X86_64 114#ifdef CONFIG_X86_X2APIC
97#define HAVE_X2APIC
98#endif
99
100#ifdef HAVE_X2APIC
101int x2apic; 115int x2apic;
102/* x2apic enabled before OS handover */ 116/* x2apic enabled before OS handover */
103static int x2apic_preenabled; 117static int x2apic_preenabled;
@@ -195,18 +209,13 @@ static int modern_apic(void)
195 return lapic_get_version() >= 0x14; 209 return lapic_get_version() >= 0x14;
196} 210}
197 211
198/* 212void native_apic_wait_icr_idle(void)
199 * Paravirt kernels also might be using these below ops. So we still
200 * use generic apic_read()/apic_write(), which might be pointing to different
201 * ops in PARAVIRT case.
202 */
203void xapic_wait_icr_idle(void)
204{ 213{
205 while (apic_read(APIC_ICR) & APIC_ICR_BUSY) 214 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
206 cpu_relax(); 215 cpu_relax();
207} 216}
208 217
209u32 safe_xapic_wait_icr_idle(void) 218u32 native_safe_apic_wait_icr_idle(void)
210{ 219{
211 u32 send_status; 220 u32 send_status;
212 int timeout; 221 int timeout;
@@ -222,13 +231,13 @@ u32 safe_xapic_wait_icr_idle(void)
222 return send_status; 231 return send_status;
223} 232}
224 233
225void xapic_icr_write(u32 low, u32 id) 234void native_apic_icr_write(u32 low, u32 id)
226{ 235{
227 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id)); 236 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
228 apic_write(APIC_ICR, low); 237 apic_write(APIC_ICR, low);
229} 238}
230 239
231static u64 xapic_icr_read(void) 240u64 native_apic_icr_read(void)
232{ 241{
233 u32 icr1, icr2; 242 u32 icr1, icr2;
234 243
@@ -238,54 +247,6 @@ static u64 xapic_icr_read(void)
238 return icr1 | ((u64)icr2 << 32); 247 return icr1 | ((u64)icr2 << 32);
239} 248}
240 249
241static struct apic_ops xapic_ops = {
242 .read = native_apic_mem_read,
243 .write = native_apic_mem_write,
244 .icr_read = xapic_icr_read,
245 .icr_write = xapic_icr_write,
246 .wait_icr_idle = xapic_wait_icr_idle,
247 .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
248};
249
250struct apic_ops __read_mostly *apic_ops = &xapic_ops;
251EXPORT_SYMBOL_GPL(apic_ops);
252
253#ifdef HAVE_X2APIC
254static void x2apic_wait_icr_idle(void)
255{
256 /* no need to wait for icr idle in x2apic */
257 return;
258}
259
260static u32 safe_x2apic_wait_icr_idle(void)
261{
262 /* no need to wait for icr idle in x2apic */
263 return 0;
264}
265
266void x2apic_icr_write(u32 low, u32 id)
267{
268 wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
269}
270
271static u64 x2apic_icr_read(void)
272{
273 unsigned long val;
274
275 rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
276 return val;
277}
278
279static struct apic_ops x2apic_ops = {
280 .read = native_apic_msr_read,
281 .write = native_apic_msr_write,
282 .icr_read = x2apic_icr_read,
283 .icr_write = x2apic_icr_write,
284 .wait_icr_idle = x2apic_wait_icr_idle,
285 .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
286};
287#endif
288
289/** 250/**
290 * enable_NMI_through_LVT0 - enable NMI through local vector table 0 251 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
291 */ 252 */
@@ -458,7 +419,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
458static void lapic_timer_broadcast(const struct cpumask *mask) 419static void lapic_timer_broadcast(const struct cpumask *mask)
459{ 420{
460#ifdef CONFIG_SMP 421#ifdef CONFIG_SMP
461 send_IPI_mask(mask, LOCAL_TIMER_VECTOR); 422 apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
462#endif 423#endif
463} 424}
464 425
@@ -536,7 +497,8 @@ static void __init lapic_cal_handler(struct clock_event_device *dev)
536 } 497 }
537} 498}
538 499
539static int __init calibrate_by_pmtimer(long deltapm, long *delta) 500static int __init
501calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
540{ 502{
541 const long pm_100ms = PMTMR_TICKS_PER_SEC / 10; 503 const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
542 const long pm_thresh = pm_100ms / 100; 504 const long pm_thresh = pm_100ms / 100;
@@ -547,7 +509,7 @@ static int __init calibrate_by_pmtimer(long deltapm, long *delta)
547 return -1; 509 return -1;
548#endif 510#endif
549 511
550 apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); 512 apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm);
551 513
552 /* Check, if the PM timer is available */ 514 /* Check, if the PM timer is available */
553 if (!deltapm) 515 if (!deltapm)
@@ -557,19 +519,30 @@ static int __init calibrate_by_pmtimer(long deltapm, long *delta)
557 519
558 if (deltapm > (pm_100ms - pm_thresh) && 520 if (deltapm > (pm_100ms - pm_thresh) &&
559 deltapm < (pm_100ms + pm_thresh)) { 521 deltapm < (pm_100ms + pm_thresh)) {
560 apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); 522 apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n");
561 } else { 523 return 0;
562 res = (((u64)deltapm) * mult) >> 22; 524 }
563 do_div(res, 1000000); 525
564 pr_warning("APIC calibration not consistent " 526 res = (((u64)deltapm) * mult) >> 22;
565 "with PM Timer: %ldms instead of 100ms\n", 527 do_div(res, 1000000);
566 (long)res); 528 pr_warning("APIC calibration not consistent "
567 /* Correct the lapic counter value */ 529 "with PM-Timer: %ldms instead of 100ms\n",(long)res);
568 res = (((u64)(*delta)) * pm_100ms); 530
531 /* Correct the lapic counter value */
532 res = (((u64)(*delta)) * pm_100ms);
533 do_div(res, deltapm);
534 pr_info("APIC delta adjusted to PM-Timer: "
535 "%lu (%ld)\n", (unsigned long)res, *delta);
536 *delta = (long)res;
537
538 /* Correct the tsc counter value */
539 if (cpu_has_tsc) {
540 res = (((u64)(*deltatsc)) * pm_100ms);
569 do_div(res, deltapm); 541 do_div(res, deltapm);
570 pr_info("APIC delta adjusted to PM-Timer: " 542 apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
571 "%lu (%ld)\n", (unsigned long)res, *delta); 543 "PM-Timer: %lu (%ld) \n",
572 *delta = (long)res; 544 (unsigned long)res, *deltatsc);
545 *deltatsc = (long)res;
573 } 546 }
574 547
575 return 0; 548 return 0;
@@ -580,7 +553,7 @@ static int __init calibrate_APIC_clock(void)
580 struct clock_event_device *levt = &__get_cpu_var(lapic_events); 553 struct clock_event_device *levt = &__get_cpu_var(lapic_events);
581 void (*real_handler)(struct clock_event_device *dev); 554 void (*real_handler)(struct clock_event_device *dev);
582 unsigned long deltaj; 555 unsigned long deltaj;
583 long delta; 556 long delta, deltatsc;
584 int pm_referenced = 0; 557 int pm_referenced = 0;
585 558
586 local_irq_disable(); 559 local_irq_disable();
@@ -610,9 +583,11 @@ static int __init calibrate_APIC_clock(void)
610 delta = lapic_cal_t1 - lapic_cal_t2; 583 delta = lapic_cal_t1 - lapic_cal_t2;
611 apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); 584 apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
612 585
586 deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
587
613 /* we trust the PM based calibration if possible */ 588 /* we trust the PM based calibration if possible */
614 pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1, 589 pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
615 &delta); 590 &delta, &deltatsc);
616 591
617 /* Calculate the scaled math multiplication factor */ 592 /* Calculate the scaled math multiplication factor */
618 lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, 593 lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
@@ -630,11 +605,10 @@ static int __init calibrate_APIC_clock(void)
630 calibration_result); 605 calibration_result);
631 606
632 if (cpu_has_tsc) { 607 if (cpu_has_tsc) {
633 delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
634 apic_printk(APIC_VERBOSE, "..... CPU clock speed is " 608 apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
635 "%ld.%04ld MHz.\n", 609 "%ld.%04ld MHz.\n",
636 (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ), 610 (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
637 (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ)); 611 (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ));
638 } 612 }
639 613
640 apic_printk(APIC_VERBOSE, "..... host bus clock speed is " 614 apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
@@ -1000,11 +974,11 @@ int __init verify_local_APIC(void)
1000 */ 974 */
1001 reg0 = apic_read(APIC_ID); 975 reg0 = apic_read(APIC_ID);
1002 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); 976 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
1003 apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); 977 apic_write(APIC_ID, reg0 ^ apic->apic_id_mask);
1004 reg1 = apic_read(APIC_ID); 978 reg1 = apic_read(APIC_ID);
1005 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); 979 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
1006 apic_write(APIC_ID, reg0); 980 apic_write(APIC_ID, reg0);
1007 if (reg1 != (reg0 ^ APIC_ID_MASK)) 981 if (reg1 != (reg0 ^ apic->apic_id_mask))
1008 return 0; 982 return 0;
1009 983
1010 /* 984 /*
@@ -1098,7 +1072,7 @@ static void __cpuinit lapic_setup_esr(void)
1098 return; 1072 return;
1099 } 1073 }
1100 1074
1101 if (esr_disable) { 1075 if (apic->disable_esr) {
1102 /* 1076 /*
1103 * Something untraceable is creating bad interrupts on 1077 * Something untraceable is creating bad interrupts on
1104 * secondary quads ... for the moment, just leave the 1078 * secondary quads ... for the moment, just leave the
@@ -1139,9 +1113,14 @@ void __cpuinit setup_local_APIC(void)
1139 unsigned int value; 1113 unsigned int value;
1140 int i, j; 1114 int i, j;
1141 1115
1116 if (disable_apic) {
1117 arch_disable_smp_support();
1118 return;
1119 }
1120
1142#ifdef CONFIG_X86_32 1121#ifdef CONFIG_X86_32
1143 /* Pound the ESR really hard over the head with a big hammer - mbligh */ 1122 /* Pound the ESR really hard over the head with a big hammer - mbligh */
1144 if (lapic_is_integrated() && esr_disable) { 1123 if (lapic_is_integrated() && apic->disable_esr) {
1145 apic_write(APIC_ESR, 0); 1124 apic_write(APIC_ESR, 0);
1146 apic_write(APIC_ESR, 0); 1125 apic_write(APIC_ESR, 0);
1147 apic_write(APIC_ESR, 0); 1126 apic_write(APIC_ESR, 0);
@@ -1155,7 +1134,7 @@ void __cpuinit setup_local_APIC(void)
1155 * Double-check whether this APIC is really registered. 1134 * Double-check whether this APIC is really registered.
1156 * This is meaningless in clustered apic mode, so we skip it. 1135 * This is meaningless in clustered apic mode, so we skip it.
1157 */ 1136 */
1158 if (!apic_id_registered()) 1137 if (!apic->apic_id_registered())
1159 BUG(); 1138 BUG();
1160 1139
1161 /* 1140 /*
@@ -1163,7 +1142,7 @@ void __cpuinit setup_local_APIC(void)
1163 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel 1142 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
1164 * document number 292116). So here it goes... 1143 * document number 292116). So here it goes...
1165 */ 1144 */
1166 init_apic_ldr(); 1145 apic->init_apic_ldr();
1167 1146
1168 /* 1147 /*
1169 * Set Task Priority to 'accept all'. We never change this 1148 * Set Task Priority to 'accept all'. We never change this
@@ -1297,17 +1276,12 @@ void __cpuinit end_local_APIC_setup(void)
1297 apic_pm_activate(); 1276 apic_pm_activate();
1298} 1277}
1299 1278
1300#ifdef HAVE_X2APIC 1279#ifdef CONFIG_X86_X2APIC
1301void check_x2apic(void) 1280void check_x2apic(void)
1302{ 1281{
1303 int msr, msr2; 1282 if (x2apic_enabled()) {
1304
1305 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1306
1307 if (msr & X2APIC_ENABLE) {
1308 pr_info("x2apic enabled by BIOS, switching to x2apic ops\n"); 1283 pr_info("x2apic enabled by BIOS, switching to x2apic ops\n");
1309 x2apic_preenabled = x2apic = 1; 1284 x2apic_preenabled = x2apic = 1;
1310 apic_ops = &x2apic_ops;
1311 } 1285 }
1312} 1286}
1313 1287
@@ -1315,6 +1289,9 @@ void enable_x2apic(void)
1315{ 1289{
1316 int msr, msr2; 1290 int msr, msr2;
1317 1291
1292 if (!x2apic)
1293 return;
1294
1318 rdmsr(MSR_IA32_APICBASE, msr, msr2); 1295 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1319 if (!(msr & X2APIC_ENABLE)) { 1296 if (!(msr & X2APIC_ENABLE)) {
1320 pr_info("Enabling x2apic\n"); 1297 pr_info("Enabling x2apic\n");
@@ -1378,7 +1355,6 @@ void __init enable_IR_x2apic(void)
1378 1355
1379 if (!x2apic) { 1356 if (!x2apic) {
1380 x2apic = 1; 1357 x2apic = 1;
1381 apic_ops = &x2apic_ops;
1382 enable_x2apic(); 1358 enable_x2apic();
1383 } 1359 }
1384 1360
@@ -1416,7 +1392,7 @@ end:
1416 1392
1417 return; 1393 return;
1418} 1394}
1419#endif /* HAVE_X2APIC */ 1395#endif /* CONFIG_X86_X2APIC */
1420 1396
1421#ifdef CONFIG_X86_64 1397#ifdef CONFIG_X86_64
1422/* 1398/*
@@ -1547,7 +1523,7 @@ void __init early_init_lapic_mapping(void)
1547 */ 1523 */
1548void __init init_apic_mappings(void) 1524void __init init_apic_mappings(void)
1549{ 1525{
1550#ifdef HAVE_X2APIC 1526#ifdef CONFIG_X86_X2APIC
1551 if (x2apic) { 1527 if (x2apic) {
1552 boot_cpu_physical_apicid = read_apic_id(); 1528 boot_cpu_physical_apicid = read_apic_id();
1553 return; 1529 return;
@@ -1585,11 +1561,11 @@ int apic_version[MAX_APICS];
1585 1561
1586int __init APIC_init_uniprocessor(void) 1562int __init APIC_init_uniprocessor(void)
1587{ 1563{
1588#ifdef CONFIG_X86_64
1589 if (disable_apic) { 1564 if (disable_apic) {
1590 pr_info("Apic disabled\n"); 1565 pr_info("Apic disabled\n");
1591 return -1; 1566 return -1;
1592 } 1567 }
1568#ifdef CONFIG_X86_64
1593 if (!cpu_has_apic) { 1569 if (!cpu_has_apic) {
1594 disable_apic = 1; 1570 disable_apic = 1;
1595 pr_info("Apic disabled by BIOS\n"); 1571 pr_info("Apic disabled by BIOS\n");
@@ -1611,11 +1587,9 @@ int __init APIC_init_uniprocessor(void)
1611 } 1587 }
1612#endif 1588#endif
1613 1589
1614#ifdef HAVE_X2APIC
1615 enable_IR_x2apic(); 1590 enable_IR_x2apic();
1616#endif
1617#ifdef CONFIG_X86_64 1591#ifdef CONFIG_X86_64
1618 setup_apic_routing(); 1592 default_setup_apic_routing();
1619#endif 1593#endif
1620 1594
1621 verify_local_APIC(); 1595 verify_local_APIC();
@@ -1636,35 +1610,31 @@ int __init APIC_init_uniprocessor(void)
1636 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); 1610 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
1637 setup_local_APIC(); 1611 setup_local_APIC();
1638 1612
1639#ifdef CONFIG_X86_64 1613#ifdef CONFIG_X86_IO_APIC
1640 /* 1614 /*
1641 * Now enable IO-APICs, actually call clear_IO_APIC 1615 * Now enable IO-APICs, actually call clear_IO_APIC
1642 * We need clear_IO_APIC before enabling vector on BP 1616 * We need clear_IO_APIC before enabling error vector
1643 */ 1617 */
1644 if (!skip_ioapic_setup && nr_ioapics) 1618 if (!skip_ioapic_setup && nr_ioapics)
1645 enable_IO_APIC(); 1619 enable_IO_APIC();
1646#endif 1620#endif
1647 1621
1648#ifdef CONFIG_X86_IO_APIC
1649 if (!smp_found_config || skip_ioapic_setup || !nr_ioapics)
1650#endif
1651 localise_nmi_watchdog();
1652 end_local_APIC_setup(); 1622 end_local_APIC_setup();
1653 1623
1654#ifdef CONFIG_X86_IO_APIC 1624#ifdef CONFIG_X86_IO_APIC
1655 if (smp_found_config && !skip_ioapic_setup && nr_ioapics) 1625 if (smp_found_config && !skip_ioapic_setup && nr_ioapics)
1656 setup_IO_APIC(); 1626 setup_IO_APIC();
1657# ifdef CONFIG_X86_64 1627 else {
1658 else
1659 nr_ioapics = 0; 1628 nr_ioapics = 0;
1660# endif 1629 localise_nmi_watchdog();
1630 }
1631#else
1632 localise_nmi_watchdog();
1661#endif 1633#endif
1662 1634
1635 setup_boot_clock();
1663#ifdef CONFIG_X86_64 1636#ifdef CONFIG_X86_64
1664 setup_boot_APIC_clock();
1665 check_nmi_watchdog(); 1637 check_nmi_watchdog();
1666#else
1667 setup_boot_clock();
1668#endif 1638#endif
1669 1639
1670 return 0; 1640 return 0;
@@ -1753,7 +1723,8 @@ void __init connect_bsp_APIC(void)
1753 outb(0x01, 0x23); 1723 outb(0x01, 0x23);
1754 } 1724 }
1755#endif 1725#endif
1756 enable_apic_mode(); 1726 if (apic->enable_apic_mode)
1727 apic->enable_apic_mode();
1757} 1728}
1758 1729
1759/** 1730/**
@@ -1891,29 +1862,39 @@ void __cpuinit generic_processor_info(int apicid, int version)
1891 } 1862 }
1892#endif 1863#endif
1893 1864
1894#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) 1865#if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
1895 /* are we being called early in kernel startup? */ 1866 early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
1896 if (early_per_cpu_ptr(x86_cpu_to_apicid)) { 1867 early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1897 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
1898 u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
1899
1900 cpu_to_apicid[cpu] = apicid;
1901 bios_cpu_apicid[cpu] = apicid;
1902 } else {
1903 per_cpu(x86_cpu_to_apicid, cpu) = apicid;
1904 per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1905 }
1906#endif 1868#endif
1907 1869
1908 set_cpu_possible(cpu, true); 1870 set_cpu_possible(cpu, true);
1909 set_cpu_present(cpu, true); 1871 set_cpu_present(cpu, true);
1910} 1872}
1911 1873
1912#ifdef CONFIG_X86_64
1913int hard_smp_processor_id(void) 1874int hard_smp_processor_id(void)
1914{ 1875{
1915 return read_apic_id(); 1876 return read_apic_id();
1916} 1877}
1878
1879void default_init_apic_ldr(void)
1880{
1881 unsigned long val;
1882
1883 apic_write(APIC_DFR, APIC_DFR_VALUE);
1884 val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
1885 val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
1886 apic_write(APIC_LDR, val);
1887}
1888
1889#ifdef CONFIG_X86_32
1890int default_apicid_to_node(int logical_apicid)
1891{
1892#ifdef CONFIG_SMP
1893 return apicid_2_node[hard_smp_processor_id()];
1894#else
1895 return 0;
1896#endif
1897}
1917#endif 1898#endif
1918 1899
1919/* 1900/*
@@ -1991,7 +1972,7 @@ static int lapic_resume(struct sys_device *dev)
1991 1972
1992 local_irq_save(flags); 1973 local_irq_save(flags);
1993 1974
1994#ifdef HAVE_X2APIC 1975#ifdef CONFIG_X86_X2APIC
1995 if (x2apic) 1976 if (x2apic)
1996 enable_x2apic(); 1977 enable_x2apic();
1997 else 1978 else
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/apic/apic_flat_64.c
index 34185488e4fb..3b002995e145 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/apic/apic_flat_64.c
@@ -17,9 +17,8 @@
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/hardirq.h> 18#include <linux/hardirq.h>
19#include <asm/smp.h> 19#include <asm/smp.h>
20#include <asm/apic.h>
20#include <asm/ipi.h> 21#include <asm/ipi.h>
21#include <asm/genapic.h>
22#include <mach_apicdef.h>
23 22
24#ifdef CONFIG_ACPI 23#ifdef CONFIG_ACPI
25#include <acpi/acpi_bus.h> 24#include <acpi/acpi_bus.h>
@@ -74,7 +73,7 @@ static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
74 unsigned long flags; 73 unsigned long flags;
75 74
76 local_irq_save(flags); 75 local_irq_save(flags);
77 __send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL); 76 __default_send_IPI_dest_field(mask, vector, apic->dest_logical);
78 local_irq_restore(flags); 77 local_irq_restore(flags);
79} 78}
80 79
@@ -85,14 +84,15 @@ static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
85 _flat_send_IPI_mask(mask, vector); 84 _flat_send_IPI_mask(mask, vector);
86} 85}
87 86
88static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, 87static void
89 int vector) 88 flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
90{ 89{
91 unsigned long mask = cpumask_bits(cpumask)[0]; 90 unsigned long mask = cpumask_bits(cpumask)[0];
92 int cpu = smp_processor_id(); 91 int cpu = smp_processor_id();
93 92
94 if (cpu < BITS_PER_LONG) 93 if (cpu < BITS_PER_LONG)
95 clear_bit(cpu, &mask); 94 clear_bit(cpu, &mask);
95
96 _flat_send_IPI_mask(mask, vector); 96 _flat_send_IPI_mask(mask, vector);
97} 97}
98 98
@@ -114,23 +114,27 @@ static void flat_send_IPI_allbutself(int vector)
114 _flat_send_IPI_mask(mask, vector); 114 _flat_send_IPI_mask(mask, vector);
115 } 115 }
116 } else if (num_online_cpus() > 1) { 116 } else if (num_online_cpus() > 1) {
117 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); 117 __default_send_IPI_shortcut(APIC_DEST_ALLBUT,
118 vector, apic->dest_logical);
118 } 119 }
119} 120}
120 121
121static void flat_send_IPI_all(int vector) 122static void flat_send_IPI_all(int vector)
122{ 123{
123 if (vector == NMI_VECTOR) 124 if (vector == NMI_VECTOR) {
124 flat_send_IPI_mask(cpu_online_mask, vector); 125 flat_send_IPI_mask(cpu_online_mask, vector);
125 else 126 } else {
126 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); 127 __default_send_IPI_shortcut(APIC_DEST_ALLINC,
128 vector, apic->dest_logical);
129 }
127} 130}
128 131
129static unsigned int get_apic_id(unsigned long x) 132static unsigned int flat_get_apic_id(unsigned long x)
130{ 133{
131 unsigned int id; 134 unsigned int id;
132 135
133 id = (((x)>>24) & 0xFFu); 136 id = (((x)>>24) & 0xFFu);
137
134 return id; 138 return id;
135} 139}
136 140
@@ -146,7 +150,7 @@ static unsigned int read_xapic_id(void)
146{ 150{
147 unsigned int id; 151 unsigned int id;
148 152
149 id = get_apic_id(apic_read(APIC_ID)); 153 id = flat_get_apic_id(apic_read(APIC_ID));
150 return id; 154 return id;
151} 155}
152 156
@@ -169,31 +173,68 @@ static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
169 return mask1 & mask2; 173 return mask1 & mask2;
170} 174}
171 175
172static unsigned int phys_pkg_id(int index_msb) 176static int flat_phys_pkg_id(int initial_apic_id, int index_msb)
173{ 177{
174 return hard_smp_processor_id() >> index_msb; 178 return hard_smp_processor_id() >> index_msb;
175} 179}
176 180
177struct genapic apic_flat = { 181struct apic apic_flat = {
178 .name = "flat", 182 .name = "flat",
179 .acpi_madt_oem_check = flat_acpi_madt_oem_check, 183 .probe = NULL,
180 .int_delivery_mode = dest_LowestPrio, 184 .acpi_madt_oem_check = flat_acpi_madt_oem_check,
181 .int_dest_mode = (APIC_DEST_LOGICAL != 0), 185 .apic_id_registered = flat_apic_id_registered,
182 .target_cpus = flat_target_cpus, 186
183 .vector_allocation_domain = flat_vector_allocation_domain, 187 .irq_delivery_mode = dest_LowestPrio,
184 .apic_id_registered = flat_apic_id_registered, 188 .irq_dest_mode = 1, /* logical */
185 .init_apic_ldr = flat_init_apic_ldr, 189
186 .send_IPI_all = flat_send_IPI_all, 190 .target_cpus = flat_target_cpus,
187 .send_IPI_allbutself = flat_send_IPI_allbutself, 191 .disable_esr = 0,
188 .send_IPI_mask = flat_send_IPI_mask, 192 .dest_logical = APIC_DEST_LOGICAL,
189 .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, 193 .check_apicid_used = NULL,
190 .send_IPI_self = apic_send_IPI_self, 194 .check_apicid_present = NULL,
191 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, 195
192 .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, 196 .vector_allocation_domain = flat_vector_allocation_domain,
193 .phys_pkg_id = phys_pkg_id, 197 .init_apic_ldr = flat_init_apic_ldr,
194 .get_apic_id = get_apic_id, 198
195 .set_apic_id = set_apic_id, 199 .ioapic_phys_id_map = NULL,
196 .apic_id_mask = (0xFFu<<24), 200 .setup_apic_routing = NULL,
201 .multi_timer_check = NULL,
202 .apicid_to_node = NULL,
203 .cpu_to_logical_apicid = NULL,
204 .cpu_present_to_apicid = default_cpu_present_to_apicid,
205 .apicid_to_cpu_present = NULL,
206 .setup_portio_remap = NULL,
207 .check_phys_apicid_present = default_check_phys_apicid_present,
208 .enable_apic_mode = NULL,
209 .phys_pkg_id = flat_phys_pkg_id,
210 .mps_oem_check = NULL,
211
212 .get_apic_id = flat_get_apic_id,
213 .set_apic_id = set_apic_id,
214 .apic_id_mask = 0xFFu << 24,
215
216 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
217 .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
218
219 .send_IPI_mask = flat_send_IPI_mask,
220 .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
221 .send_IPI_allbutself = flat_send_IPI_allbutself,
222 .send_IPI_all = flat_send_IPI_all,
223 .send_IPI_self = apic_send_IPI_self,
224
225 .wakeup_cpu = NULL,
226 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
227 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
228 .wait_for_init_deassert = NULL,
229 .smp_callin_clear_local_apic = NULL,
230 .inquire_remote_apic = NULL,
231
232 .read = native_apic_mem_read,
233 .write = native_apic_mem_write,
234 .icr_read = native_apic_icr_read,
235 .icr_write = native_apic_icr_write,
236 .wait_icr_idle = native_apic_wait_icr_idle,
237 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
197}; 238};
198 239
199/* 240/*
@@ -232,18 +273,18 @@ static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask)
232 273
233static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) 274static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
234{ 275{
235 send_IPI_mask_sequence(cpumask, vector); 276 default_send_IPI_mask_sequence_phys(cpumask, vector);
236} 277}
237 278
238static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask, 279static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
239 int vector) 280 int vector)
240{ 281{
241 send_IPI_mask_allbutself(cpumask, vector); 282 default_send_IPI_mask_allbutself_phys(cpumask, vector);
242} 283}
243 284
244static void physflat_send_IPI_allbutself(int vector) 285static void physflat_send_IPI_allbutself(int vector)
245{ 286{
246 send_IPI_mask_allbutself(cpu_online_mask, vector); 287 default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
247} 288}
248 289
249static void physflat_send_IPI_all(int vector) 290static void physflat_send_IPI_all(int vector)
@@ -276,32 +317,73 @@ physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
276 * We're using fixed IRQ delivery, can only return one phys APIC ID. 317 * We're using fixed IRQ delivery, can only return one phys APIC ID.
277 * May as well be the first. 318 * May as well be the first.
278 */ 319 */
279 for_each_cpu_and(cpu, cpumask, andmask) 320 for_each_cpu_and(cpu, cpumask, andmask) {
280 if (cpumask_test_cpu(cpu, cpu_online_mask)) 321 if (cpumask_test_cpu(cpu, cpu_online_mask))
281 break; 322 break;
323 }
282 if (cpu < nr_cpu_ids) 324 if (cpu < nr_cpu_ids)
283 return per_cpu(x86_cpu_to_apicid, cpu); 325 return per_cpu(x86_cpu_to_apicid, cpu);
326
284 return BAD_APICID; 327 return BAD_APICID;
285} 328}
286 329
287struct genapic apic_physflat = { 330struct apic apic_physflat = {
288 .name = "physical flat", 331
289 .acpi_madt_oem_check = physflat_acpi_madt_oem_check, 332 .name = "physical flat",
290 .int_delivery_mode = dest_Fixed, 333 .probe = NULL,
291 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 334 .acpi_madt_oem_check = physflat_acpi_madt_oem_check,
292 .target_cpus = physflat_target_cpus, 335 .apic_id_registered = flat_apic_id_registered,
293 .vector_allocation_domain = physflat_vector_allocation_domain, 336
294 .apic_id_registered = flat_apic_id_registered, 337 .irq_delivery_mode = dest_Fixed,
295 .init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/ 338 .irq_dest_mode = 0, /* physical */
296 .send_IPI_all = physflat_send_IPI_all, 339
297 .send_IPI_allbutself = physflat_send_IPI_allbutself, 340 .target_cpus = physflat_target_cpus,
298 .send_IPI_mask = physflat_send_IPI_mask, 341 .disable_esr = 0,
299 .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, 342 .dest_logical = 0,
300 .send_IPI_self = apic_send_IPI_self, 343 .check_apicid_used = NULL,
301 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, 344 .check_apicid_present = NULL,
302 .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, 345
303 .phys_pkg_id = phys_pkg_id, 346 .vector_allocation_domain = physflat_vector_allocation_domain,
304 .get_apic_id = get_apic_id, 347 /* not needed, but shouldn't hurt: */
305 .set_apic_id = set_apic_id, 348 .init_apic_ldr = flat_init_apic_ldr,
306 .apic_id_mask = (0xFFu<<24), 349
350 .ioapic_phys_id_map = NULL,
351 .setup_apic_routing = NULL,
352 .multi_timer_check = NULL,
353 .apicid_to_node = NULL,
354 .cpu_to_logical_apicid = NULL,
355 .cpu_present_to_apicid = default_cpu_present_to_apicid,
356 .apicid_to_cpu_present = NULL,
357 .setup_portio_remap = NULL,
358 .check_phys_apicid_present = default_check_phys_apicid_present,
359 .enable_apic_mode = NULL,
360 .phys_pkg_id = flat_phys_pkg_id,
361 .mps_oem_check = NULL,
362
363 .get_apic_id = flat_get_apic_id,
364 .set_apic_id = set_apic_id,
365 .apic_id_mask = 0xFFu << 24,
366
367 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
368 .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and,
369
370 .send_IPI_mask = physflat_send_IPI_mask,
371 .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
372 .send_IPI_allbutself = physflat_send_IPI_allbutself,
373 .send_IPI_all = physflat_send_IPI_all,
374 .send_IPI_self = apic_send_IPI_self,
375
376 .wakeup_cpu = NULL,
377 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
378 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
379 .wait_for_init_deassert = NULL,
380 .smp_callin_clear_local_apic = NULL,
381 .inquire_remote_apic = NULL,
382
383 .read = native_apic_mem_read,
384 .write = native_apic_mem_write,
385 .icr_read = native_apic_icr_read,
386 .icr_write = native_apic_icr_write,
387 .wait_icr_idle = native_apic_wait_icr_idle,
388 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
307}; 389};
diff --git a/arch/x86/kernel/apic/bigsmp_32.c b/arch/x86/kernel/apic/bigsmp_32.c
new file mode 100644
index 000000000000..0b1093394fdf
--- /dev/null
+++ b/arch/x86/kernel/apic/bigsmp_32.c
@@ -0,0 +1,274 @@
1/*
2 * APIC driver for "bigsmp" xAPIC machines with more than 8 virtual CPUs.
3 *
4 * Drives the local APIC in "clustered mode".
5 */
6#include <linux/threads.h>
7#include <linux/cpumask.h>
8#include <linux/kernel.h>
9#include <linux/init.h>
10#include <linux/dmi.h>
11#include <linux/smp.h>
12
13#include <asm/apicdef.h>
14#include <asm/fixmap.h>
15#include <asm/mpspec.h>
16#include <asm/apic.h>
17#include <asm/ipi.h>
18
19static inline unsigned bigsmp_get_apic_id(unsigned long x)
20{
21 return (x >> 24) & 0xFF;
22}
23
24static inline int bigsmp_apic_id_registered(void)
25{
26 return 1;
27}
28
29static inline const cpumask_t *bigsmp_target_cpus(void)
30{
31#ifdef CONFIG_SMP
32 return &cpu_online_map;
33#else
34 return &cpumask_of_cpu(0);
35#endif
36}
37
38static inline unsigned long
39bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid)
40{
41 return 0;
42}
43
44static inline unsigned long bigsmp_check_apicid_present(int bit)
45{
46 return 1;
47}
48
49static inline unsigned long calculate_ldr(int cpu)
50{
51 unsigned long val, id;
52
53 val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
54 id = per_cpu(x86_bios_cpu_apicid, cpu);
55 val |= SET_APIC_LOGICAL_ID(id);
56
57 return val;
58}
59
60/*
61 * Set up the logical destination ID.
62 *
63 * Intel recommends to set DFR, LDR and TPR before enabling
64 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
65 * document number 292116). So here it goes...
66 */
67static inline void bigsmp_init_apic_ldr(void)
68{
69 unsigned long val;
70 int cpu = smp_processor_id();
71
72 apic_write(APIC_DFR, APIC_DFR_FLAT);
73 val = calculate_ldr(cpu);
74 apic_write(APIC_LDR, val);
75}
76
77static inline void bigsmp_setup_apic_routing(void)
78{
79 printk(KERN_INFO
80 "Enabling APIC mode: Physflat. Using %d I/O APICs\n",
81 nr_ioapics);
82}
83
84static inline int bigsmp_apicid_to_node(int logical_apicid)
85{
86 return apicid_2_node[hard_smp_processor_id()];
87}
88
89static inline int bigsmp_cpu_present_to_apicid(int mps_cpu)
90{
91 if (mps_cpu < nr_cpu_ids)
92 return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
93
94 return BAD_APICID;
95}
96
97static inline physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid)
98{
99 return physid_mask_of_physid(phys_apicid);
100}
101
102/* Mapping from cpu number to logical apicid */
103static inline int bigsmp_cpu_to_logical_apicid(int cpu)
104{
105 if (cpu >= nr_cpu_ids)
106 return BAD_APICID;
107 return cpu_physical_id(cpu);
108}
109
110static inline physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map)
111{
112 /* For clustered we don't have a good way to do this yet - hack */
113 return physids_promote(0xFFL);
114}
115
116static inline void bigsmp_setup_portio_remap(void)
117{
118}
119
120static inline int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid)
121{
122 return 1;
123}
124
125/* As we are using single CPU as destination, pick only one CPU here */
126static inline unsigned int bigsmp_cpu_mask_to_apicid(const cpumask_t *cpumask)
127{
128 return bigsmp_cpu_to_logical_apicid(first_cpu(*cpumask));
129}
130
131static inline unsigned int
132bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
133 const struct cpumask *andmask)
134{
135 int cpu;
136
137 /*
138 * We're using fixed IRQ delivery, can only return one phys APIC ID.
139 * May as well be the first.
140 */
141 for_each_cpu_and(cpu, cpumask, andmask) {
142 if (cpumask_test_cpu(cpu, cpu_online_mask))
143 break;
144 }
145 if (cpu < nr_cpu_ids)
146 return bigsmp_cpu_to_logical_apicid(cpu);
147
148 return BAD_APICID;
149}
150
151static inline int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
152{
153 return cpuid_apic >> index_msb;
154}
155
156static inline void bigsmp_send_IPI_mask(const struct cpumask *mask, int vector)
157{
158 default_send_IPI_mask_sequence_phys(mask, vector);
159}
160
161static inline void bigsmp_send_IPI_allbutself(int vector)
162{
163 default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
164}
165
166static inline void bigsmp_send_IPI_all(int vector)
167{
168 bigsmp_send_IPI_mask(cpu_online_mask, vector);
169}
170
171static int dmi_bigsmp; /* can be set by dmi scanners */
172
173static int hp_ht_bigsmp(const struct dmi_system_id *d)
174{
175 printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident);
176 dmi_bigsmp = 1;
177
178 return 0;
179}
180
181
182static const struct dmi_system_id bigsmp_dmi_table[] = {
183 { hp_ht_bigsmp, "HP ProLiant DL760 G2",
184 { DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
185 DMI_MATCH(DMI_BIOS_VERSION, "P44-"),
186 }
187 },
188
189 { hp_ht_bigsmp, "HP ProLiant DL740",
190 { DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
191 DMI_MATCH(DMI_BIOS_VERSION, "P47-"),
192 }
193 },
194 { } /* NULL entry stops DMI scanning */
195};
196
197static void bigsmp_vector_allocation_domain(int cpu, cpumask_t *retmask)
198{
199 cpus_clear(*retmask);
200 cpu_set(cpu, *retmask);
201}
202
203static int probe_bigsmp(void)
204{
205 if (def_to_bigsmp)
206 dmi_bigsmp = 1;
207 else
208 dmi_check_system(bigsmp_dmi_table);
209
210 return dmi_bigsmp;
211}
212
213struct apic apic_bigsmp = {
214
215 .name = "bigsmp",
216 .probe = probe_bigsmp,
217 .acpi_madt_oem_check = NULL,
218 .apic_id_registered = bigsmp_apic_id_registered,
219
220 .irq_delivery_mode = dest_Fixed,
221 /* phys delivery to target CPU: */
222 .irq_dest_mode = 0,
223
224 .target_cpus = bigsmp_target_cpus,
225 .disable_esr = 1,
226 .dest_logical = 0,
227 .check_apicid_used = bigsmp_check_apicid_used,
228 .check_apicid_present = bigsmp_check_apicid_present,
229
230 .vector_allocation_domain = bigsmp_vector_allocation_domain,
231 .init_apic_ldr = bigsmp_init_apic_ldr,
232
233 .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map,
234 .setup_apic_routing = bigsmp_setup_apic_routing,
235 .multi_timer_check = NULL,
236 .apicid_to_node = bigsmp_apicid_to_node,
237 .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid,
238 .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid,
239 .apicid_to_cpu_present = bigsmp_apicid_to_cpu_present,
240 .setup_portio_remap = NULL,
241 .check_phys_apicid_present = bigsmp_check_phys_apicid_present,
242 .enable_apic_mode = NULL,
243 .phys_pkg_id = bigsmp_phys_pkg_id,
244 .mps_oem_check = NULL,
245
246 .get_apic_id = bigsmp_get_apic_id,
247 .set_apic_id = NULL,
248 .apic_id_mask = 0xFF << 24,
249
250 .cpu_mask_to_apicid = bigsmp_cpu_mask_to_apicid,
251 .cpu_mask_to_apicid_and = bigsmp_cpu_mask_to_apicid_and,
252
253 .send_IPI_mask = bigsmp_send_IPI_mask,
254 .send_IPI_mask_allbutself = NULL,
255 .send_IPI_allbutself = bigsmp_send_IPI_allbutself,
256 .send_IPI_all = bigsmp_send_IPI_all,
257 .send_IPI_self = default_send_IPI_self,
258
259 .wakeup_cpu = NULL,
260 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
261 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
262
263 .wait_for_init_deassert = default_wait_for_init_deassert,
264
265 .smp_callin_clear_local_apic = NULL,
266 .inquire_remote_apic = default_inquire_remote_apic,
267
268 .read = native_apic_mem_read,
269 .write = native_apic_mem_write,
270 .icr_read = native_apic_icr_read,
271 .icr_write = native_apic_icr_write,
272 .wait_icr_idle = native_apic_wait_icr_idle,
273 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
274};
diff --git a/arch/x86/kernel/apic/es7000_32.c b/arch/x86/kernel/apic/es7000_32.c
new file mode 100644
index 000000000000..320f2d2e4e54
--- /dev/null
+++ b/arch/x86/kernel/apic/es7000_32.c
@@ -0,0 +1,757 @@
1/*
2 * Written by: Garry Forsgren, Unisys Corporation
3 * Natalie Protasevich, Unisys Corporation
4 *
5 * This file contains the code to configure and interface
6 * with Unisys ES7000 series hardware system manager.
7 *
8 * Copyright (c) 2003 Unisys Corporation.
9 * Copyright (C) 2009, Red Hat, Inc., Ingo Molnar
10 *
11 * All Rights Reserved.
12 *
13 * This program is free software; you can redistribute it and/or modify it
14 * under the terms of version 2 of the GNU General Public License as
15 * published by the Free Software Foundation.
16 *
17 * This program is distributed in the hope that it would be useful, but
18 * WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
20 *
21 * You should have received a copy of the GNU General Public License along
22 * with this program; if not, write the Free Software Foundation, Inc., 59
23 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
24 *
25 * Contact information: Unisys Corporation, Township Line & Union Meeting
26 * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or:
27 *
28 * http://www.unisys.com
29 */
30#include <linux/notifier.h>
31#include <linux/spinlock.h>
32#include <linux/cpumask.h>
33#include <linux/threads.h>
34#include <linux/kernel.h>
35#include <linux/module.h>
36#include <linux/reboot.h>
37#include <linux/string.h>
38#include <linux/types.h>
39#include <linux/errno.h>
40#include <linux/acpi.h>
41#include <linux/init.h>
42#include <linux/nmi.h>
43#include <linux/smp.h>
44#include <linux/io.h>
45
46#include <asm/apicdef.h>
47#include <asm/atomic.h>
48#include <asm/fixmap.h>
49#include <asm/mpspec.h>
50#include <asm/setup.h>
51#include <asm/apic.h>
52#include <asm/ipi.h>
53
54/*
55 * ES7000 chipsets
56 */
57
58#define NON_UNISYS 0
59#define ES7000_CLASSIC 1
60#define ES7000_ZORRO 2
61
62#define MIP_REG 1
63#define MIP_PSAI_REG 4
64
65#define MIP_BUSY 1
66#define MIP_SPIN 0xf0000
67#define MIP_VALID 0x0100000000000000ULL
68#define MIP_SW_APIC 0x1020b
69
70#define MIP_PORT(val) ((val >> 32) & 0xffff)
71
72#define MIP_RD_LO(val) (val & 0xffffffff)
73
74struct mip_reg {
75 unsigned long long off_0x00;
76 unsigned long long off_0x08;
77 unsigned long long off_0x10;
78 unsigned long long off_0x18;
79 unsigned long long off_0x20;
80 unsigned long long off_0x28;
81 unsigned long long off_0x30;
82 unsigned long long off_0x38;
83};
84
85struct mip_reg_info {
86 unsigned long long mip_info;
87 unsigned long long delivery_info;
88 unsigned long long host_reg;
89 unsigned long long mip_reg;
90};
91
92struct psai {
93 unsigned long long entry_type;
94 unsigned long long addr;
95 unsigned long long bep_addr;
96};
97
98#ifdef CONFIG_ACPI
99
100struct es7000_oem_table {
101 struct acpi_table_header Header;
102 u32 OEMTableAddr;
103 u32 OEMTableSize;
104};
105
106static unsigned long oem_addrX;
107static unsigned long oem_size;
108
109#endif
110
111/*
112 * ES7000 Globals
113 */
114
115static volatile unsigned long *psai;
116static struct mip_reg *mip_reg;
117static struct mip_reg *host_reg;
118static int mip_port;
119static unsigned long mip_addr;
120static unsigned long host_addr;
121
122int es7000_plat;
123
124/*
125 * GSI override for ES7000 platforms.
126 */
127
128static unsigned int base;
129
130static int
131es7000_rename_gsi(int ioapic, int gsi)
132{
133 if (es7000_plat == ES7000_ZORRO)
134 return gsi;
135
136 if (!base) {
137 int i;
138 for (i = 0; i < nr_ioapics; i++)
139 base += nr_ioapic_registers[i];
140 }
141
142 if (!ioapic && (gsi < 16))
143 gsi += base;
144
145 return gsi;
146}
147
148static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
149{
150 unsigned long vect = 0, psaival = 0;
151
152 if (psai == NULL)
153 return -1;
154
155 vect = ((unsigned long)__pa(eip)/0x1000) << 16;
156 psaival = (0x1000000 | vect | cpu);
157
158 while (*psai & 0x1000000)
159 ;
160
161 *psai = psaival;
162
163 return 0;
164}
165
166static int __init es7000_update_apic(void)
167{
168 apic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
169
170 /* MPENTIUMIII */
171 if (boot_cpu_data.x86 == 6 &&
172 (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) {
173 es7000_update_apic_to_cluster();
174 apic->wait_for_init_deassert = NULL;
175 apic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
176 }
177
178 return 0;
179}
180
181static void __init setup_unisys(void)
182{
183 /*
184 * Determine the generation of the ES7000 currently running.
185 *
186 * es7000_plat = 1 if the machine is a 5xx ES7000 box
187 * es7000_plat = 2 if the machine is a x86_64 ES7000 box
188 *
189 */
190 if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2))
191 es7000_plat = ES7000_ZORRO;
192 else
193 es7000_plat = ES7000_CLASSIC;
194 ioapic_renumber_irq = es7000_rename_gsi;
195
196 x86_quirks->update_apic = es7000_update_apic;
197}
198
199/*
200 * Parse the OEM Table:
201 */
202static int __init parse_unisys_oem(char *oemptr)
203{
204 int i;
205 int success = 0;
206 unsigned char type, size;
207 unsigned long val;
208 char *tp = NULL;
209 struct psai *psaip = NULL;
210 struct mip_reg_info *mi;
211 struct mip_reg *host, *mip;
212
213 tp = oemptr;
214
215 tp += 8;
216
217 for (i = 0; i <= 6; i++) {
218 type = *tp++;
219 size = *tp++;
220 tp -= 2;
221 switch (type) {
222 case MIP_REG:
223 mi = (struct mip_reg_info *)tp;
224 val = MIP_RD_LO(mi->host_reg);
225 host_addr = val;
226 host = (struct mip_reg *)val;
227 host_reg = __va(host);
228 val = MIP_RD_LO(mi->mip_reg);
229 mip_port = MIP_PORT(mi->mip_info);
230 mip_addr = val;
231 mip = (struct mip_reg *)val;
232 mip_reg = __va(mip);
233 pr_debug("es7000_mipcfg: host_reg = 0x%lx \n",
234 (unsigned long)host_reg);
235 pr_debug("es7000_mipcfg: mip_reg = 0x%lx \n",
236 (unsigned long)mip_reg);
237 success++;
238 break;
239 case MIP_PSAI_REG:
240 psaip = (struct psai *)tp;
241 if (tp != NULL) {
242 if (psaip->addr)
243 psai = __va(psaip->addr);
244 else
245 psai = NULL;
246 success++;
247 }
248 break;
249 default:
250 break;
251 }
252 tp += size;
253 }
254
255 if (success < 2)
256 es7000_plat = NON_UNISYS;
257 else
258 setup_unisys();
259
260 return es7000_plat;
261}
262
263#ifdef CONFIG_ACPI
264static int __init find_unisys_acpi_oem_table(unsigned long *oem_addr)
265{
266 struct acpi_table_header *header = NULL;
267 struct es7000_oem_table *table;
268 acpi_size tbl_size;
269 acpi_status ret;
270 int i = 0;
271
272 for (;;) {
273 ret = acpi_get_table_with_size("OEM1", i++, &header, &tbl_size);
274 if (!ACPI_SUCCESS(ret))
275 return -1;
276
277 if (!memcmp((char *) &header->oem_id, "UNISYS", 6))
278 break;
279
280 early_acpi_os_unmap_memory(header, tbl_size);
281 }
282
283 table = (void *)header;
284
285 oem_addrX = table->OEMTableAddr;
286 oem_size = table->OEMTableSize;
287
288 early_acpi_os_unmap_memory(header, tbl_size);
289
290 *oem_addr = (unsigned long)__acpi_map_table(oem_addrX, oem_size);
291
292 return 0;
293}
294
295static void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr)
296{
297 if (!oem_addr)
298 return;
299
300 __acpi_unmap_table((char *)oem_addr, oem_size);
301}
302
303static int es7000_check_dsdt(void)
304{
305 struct acpi_table_header header;
306
307 if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) &&
308 !strncmp(header.oem_id, "UNISYS", 6))
309 return 1;
310 return 0;
311}
312
313/* Hook from generic ACPI tables.c */
314static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
315{
316 unsigned long oem_addr = 0;
317 int check_dsdt;
318 int ret = 0;
319
320 /* check dsdt at first to avoid clear fix_map for oem_addr */
321 check_dsdt = es7000_check_dsdt();
322
323 if (!find_unisys_acpi_oem_table(&oem_addr)) {
324 if (check_dsdt) {
325 ret = parse_unisys_oem((char *)oem_addr);
326 } else {
327 setup_unisys();
328 ret = 1;
329 }
330 /*
331 * we need to unmap it
332 */
333 unmap_unisys_acpi_oem_table(oem_addr);
334 }
335 return ret;
336}
337#else /* !CONFIG_ACPI: */
338static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
339{
340 return 0;
341}
342#endif /* !CONFIG_ACPI */
343
344static void es7000_spin(int n)
345{
346 int i = 0;
347
348 while (i++ < n)
349 rep_nop();
350}
351
352static int __init
353es7000_mip_write(struct mip_reg *mip_reg)
354{
355 int status = 0;
356 int spin;
357
358 spin = MIP_SPIN;
359 while ((host_reg->off_0x38 & MIP_VALID) != 0) {
360 if (--spin <= 0) {
361 WARN(1, "Timeout waiting for Host Valid Flag\n");
362 return -1;
363 }
364 es7000_spin(MIP_SPIN);
365 }
366
367 memcpy(host_reg, mip_reg, sizeof(struct mip_reg));
368 outb(1, mip_port);
369
370 spin = MIP_SPIN;
371
372 while ((mip_reg->off_0x38 & MIP_VALID) == 0) {
373 if (--spin <= 0) {
374 WARN(1, "Timeout waiting for MIP Valid Flag\n");
375 return -1;
376 }
377 es7000_spin(MIP_SPIN);
378 }
379
380 status = (mip_reg->off_0x00 & 0xffff0000000000ULL) >> 48;
381 mip_reg->off_0x38 &= ~MIP_VALID;
382
383 return status;
384}
385
386static void __init es7000_enable_apic_mode(void)
387{
388 struct mip_reg es7000_mip_reg;
389 int mip_status;
390
391 if (!es7000_plat)
392 return;
393
394 printk(KERN_INFO "ES7000: Enabling APIC mode.\n");
395 memset(&es7000_mip_reg, 0, sizeof(struct mip_reg));
396 es7000_mip_reg.off_0x00 = MIP_SW_APIC;
397 es7000_mip_reg.off_0x38 = MIP_VALID;
398
399 while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0)
400 WARN(1, "Command failed, status = %x\n", mip_status);
401}
402
403static void es7000_vector_allocation_domain(int cpu, cpumask_t *retmask)
404{
405 /* Careful. Some cpus do not strictly honor the set of cpus
406 * specified in the interrupt destination when using lowest
407 * priority interrupt delivery mode.
408 *
409 * In particular there was a hyperthreading cpu observed to
410 * deliver interrupts to the wrong hyperthread when only one
411 * hyperthread was specified in the interrupt desitination.
412 */
413 *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
414}
415
416
417static void es7000_wait_for_init_deassert(atomic_t *deassert)
418{
419#ifndef CONFIG_ES7000_CLUSTERED_APIC
420 while (!atomic_read(deassert))
421 cpu_relax();
422#endif
423 return;
424}
425
426static unsigned int es7000_get_apic_id(unsigned long x)
427{
428 return (x >> 24) & 0xFF;
429}
430
431static void es7000_send_IPI_mask(const struct cpumask *mask, int vector)
432{
433 default_send_IPI_mask_sequence_phys(mask, vector);
434}
435
436static void es7000_send_IPI_allbutself(int vector)
437{
438 default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
439}
440
441static void es7000_send_IPI_all(int vector)
442{
443 es7000_send_IPI_mask(cpu_online_mask, vector);
444}
445
446static int es7000_apic_id_registered(void)
447{
448 return 1;
449}
450
451static const cpumask_t *target_cpus_cluster(void)
452{
453 return &CPU_MASK_ALL;
454}
455
456static const cpumask_t *es7000_target_cpus(void)
457{
458 return &cpumask_of_cpu(smp_processor_id());
459}
460
461static unsigned long
462es7000_check_apicid_used(physid_mask_t bitmap, int apicid)
463{
464 return 0;
465}
466static unsigned long es7000_check_apicid_present(int bit)
467{
468 return physid_isset(bit, phys_cpu_present_map);
469}
470
471static unsigned long calculate_ldr(int cpu)
472{
473 unsigned long id = per_cpu(x86_bios_cpu_apicid, cpu);
474
475 return SET_APIC_LOGICAL_ID(id);
476}
477
478/*
479 * Set up the logical destination ID.
480 *
481 * Intel recommends to set DFR, LdR and TPR before enabling
482 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
483 * document number 292116). So here it goes...
484 */
485static void es7000_init_apic_ldr_cluster(void)
486{
487 unsigned long val;
488 int cpu = smp_processor_id();
489
490 apic_write(APIC_DFR, APIC_DFR_CLUSTER);
491 val = calculate_ldr(cpu);
492 apic_write(APIC_LDR, val);
493}
494
495static void es7000_init_apic_ldr(void)
496{
497 unsigned long val;
498 int cpu = smp_processor_id();
499
500 apic_write(APIC_DFR, APIC_DFR_FLAT);
501 val = calculate_ldr(cpu);
502 apic_write(APIC_LDR, val);
503}
504
505static void es7000_setup_apic_routing(void)
506{
507 int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
508
509 printk(KERN_INFO
510 "Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
511 (apic_version[apic] == 0x14) ?
512 "Physical Cluster" : "Logical Cluster",
513 nr_ioapics, cpus_addr(*es7000_target_cpus())[0]);
514}
515
516static int es7000_apicid_to_node(int logical_apicid)
517{
518 return 0;
519}
520
521
522static int es7000_cpu_present_to_apicid(int mps_cpu)
523{
524 if (!mps_cpu)
525 return boot_cpu_physical_apicid;
526 else if (mps_cpu < nr_cpu_ids)
527 return per_cpu(x86_bios_cpu_apicid, mps_cpu);
528 else
529 return BAD_APICID;
530}
531
532static int cpu_id;
533
534static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid)
535{
536 physid_mask_t mask;
537
538 mask = physid_mask_of_physid(cpu_id);
539 ++cpu_id;
540
541 return mask;
542}
543
544/* Mapping from cpu number to logical apicid */
545static int es7000_cpu_to_logical_apicid(int cpu)
546{
547#ifdef CONFIG_SMP
548 if (cpu >= nr_cpu_ids)
549 return BAD_APICID;
550 return cpu_2_logical_apicid[cpu];
551#else
552 return logical_smp_processor_id();
553#endif
554}
555
556static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map)
557{
558 /* For clustered we don't have a good way to do this yet - hack */
559 return physids_promote(0xff);
560}
561
562static int es7000_check_phys_apicid_present(int cpu_physical_apicid)
563{
564 boot_cpu_physical_apicid = read_apic_id();
565 return 1;
566}
567
568static unsigned int
569es7000_cpu_mask_to_apicid_cluster(const struct cpumask *cpumask)
570{
571 int cpus_found = 0;
572 int num_bits_set;
573 int apicid;
574 int cpu;
575
576 num_bits_set = cpumask_weight(cpumask);
577 /* Return id to all */
578 if (num_bits_set == nr_cpu_ids)
579 return 0xFF;
580 /*
581 * The cpus in the mask must all be on the apic cluster. If are not
582 * on the same apicid cluster return default value of target_cpus():
583 */
584 cpu = cpumask_first(cpumask);
585 apicid = es7000_cpu_to_logical_apicid(cpu);
586
587 while (cpus_found < num_bits_set) {
588 if (cpumask_test_cpu(cpu, cpumask)) {
589 int new_apicid = es7000_cpu_to_logical_apicid(cpu);
590
591 if (APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
592 WARN(1, "Not a valid mask!");
593
594 return 0xFF;
595 }
596 apicid = new_apicid;
597 cpus_found++;
598 }
599 cpu++;
600 }
601 return apicid;
602}
603
604static unsigned int es7000_cpu_mask_to_apicid(const cpumask_t *cpumask)
605{
606 int cpus_found = 0;
607 int num_bits_set;
608 int apicid;
609 int cpu;
610
611 num_bits_set = cpus_weight(*cpumask);
612 /* Return id to all */
613 if (num_bits_set == nr_cpu_ids)
614 return es7000_cpu_to_logical_apicid(0);
615 /*
616 * The cpus in the mask must all be on the apic cluster. If are not
617 * on the same apicid cluster return default value of target_cpus():
618 */
619 cpu = first_cpu(*cpumask);
620 apicid = es7000_cpu_to_logical_apicid(cpu);
621 while (cpus_found < num_bits_set) {
622 if (cpu_isset(cpu, *cpumask)) {
623 int new_apicid = es7000_cpu_to_logical_apicid(cpu);
624
625 if (APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
626 printk("%s: Not a valid mask!\n", __func__);
627
628 return es7000_cpu_to_logical_apicid(0);
629 }
630 apicid = new_apicid;
631 cpus_found++;
632 }
633 cpu++;
634 }
635 return apicid;
636}
637
638static unsigned int
639es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask,
640 const struct cpumask *andmask)
641{
642 int apicid = es7000_cpu_to_logical_apicid(0);
643 cpumask_var_t cpumask;
644
645 if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
646 return apicid;
647
648 cpumask_and(cpumask, inmask, andmask);
649 cpumask_and(cpumask, cpumask, cpu_online_mask);
650 apicid = es7000_cpu_mask_to_apicid(cpumask);
651
652 free_cpumask_var(cpumask);
653
654 return apicid;
655}
656
657static int es7000_phys_pkg_id(int cpuid_apic, int index_msb)
658{
659 return cpuid_apic >> index_msb;
660}
661
662void __init es7000_update_apic_to_cluster(void)
663{
664 apic->target_cpus = target_cpus_cluster;
665 apic->irq_delivery_mode = dest_LowestPrio;
666 /* logical delivery broadcast to all procs: */
667 apic->irq_dest_mode = 1;
668
669 apic->init_apic_ldr = es7000_init_apic_ldr_cluster;
670
671 apic->cpu_mask_to_apicid = es7000_cpu_mask_to_apicid_cluster;
672}
673
674static int probe_es7000(void)
675{
676 /* probed later in mptable/ACPI hooks */
677 return 0;
678}
679
680static __init int
681es7000_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
682{
683 if (mpc->oemptr) {
684 struct mpc_oemtable *oem_table =
685 (struct mpc_oemtable *)mpc->oemptr;
686
687 if (!strncmp(oem, "UNISYS", 6))
688 return parse_unisys_oem((char *)oem_table);
689 }
690 return 0;
691}
692
693
694struct apic apic_es7000 = {
695
696 .name = "es7000",
697 .probe = probe_es7000,
698 .acpi_madt_oem_check = es7000_acpi_madt_oem_check,
699 .apic_id_registered = es7000_apic_id_registered,
700
701 .irq_delivery_mode = dest_Fixed,
702 /* phys delivery to target CPUs: */
703 .irq_dest_mode = 0,
704
705 .target_cpus = es7000_target_cpus,
706 .disable_esr = 1,
707 .dest_logical = 0,
708 .check_apicid_used = es7000_check_apicid_used,
709 .check_apicid_present = es7000_check_apicid_present,
710
711 .vector_allocation_domain = es7000_vector_allocation_domain,
712 .init_apic_ldr = es7000_init_apic_ldr,
713
714 .ioapic_phys_id_map = es7000_ioapic_phys_id_map,
715 .setup_apic_routing = es7000_setup_apic_routing,
716 .multi_timer_check = NULL,
717 .apicid_to_node = es7000_apicid_to_node,
718 .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid,
719 .cpu_present_to_apicid = es7000_cpu_present_to_apicid,
720 .apicid_to_cpu_present = es7000_apicid_to_cpu_present,
721 .setup_portio_remap = NULL,
722 .check_phys_apicid_present = es7000_check_phys_apicid_present,
723 .enable_apic_mode = es7000_enable_apic_mode,
724 .phys_pkg_id = es7000_phys_pkg_id,
725 .mps_oem_check = es7000_mps_oem_check,
726
727 .get_apic_id = es7000_get_apic_id,
728 .set_apic_id = NULL,
729 .apic_id_mask = 0xFF << 24,
730
731 .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid,
732 .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and,
733
734 .send_IPI_mask = es7000_send_IPI_mask,
735 .send_IPI_mask_allbutself = NULL,
736 .send_IPI_allbutself = es7000_send_IPI_allbutself,
737 .send_IPI_all = es7000_send_IPI_all,
738 .send_IPI_self = default_send_IPI_self,
739
740 .wakeup_cpu = NULL,
741
742 .trampoline_phys_low = 0x467,
743 .trampoline_phys_high = 0x469,
744
745 .wait_for_init_deassert = es7000_wait_for_init_deassert,
746
747 /* Nothing to do for most platforms, since cleared by the INIT cycle: */
748 .smp_callin_clear_local_apic = NULL,
749 .inquire_remote_apic = default_inquire_remote_apic,
750
751 .read = native_apic_mem_read,
752 .write = native_apic_mem_write,
753 .icr_read = native_apic_icr_read,
754 .icr_write = native_apic_icr_write,
755 .wait_icr_idle = native_apic_wait_icr_idle,
756 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
757};
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index bc7ac4da90d7..00e6071cefc4 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Intel IO-APIC support for multi-Pentium hosts. 2 * Intel IO-APIC support for multi-Pentium hosts.
3 * 3 *
4 * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo 4 * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
5 * 5 *
6 * Many thanks to Stig Venaas for trying out countless experimental 6 * Many thanks to Stig Venaas for trying out countless experimental
7 * patches and reporting/debugging problems patiently! 7 * patches and reporting/debugging problems patiently!
@@ -46,6 +46,7 @@
46#include <asm/idle.h> 46#include <asm/idle.h>
47#include <asm/io.h> 47#include <asm/io.h>
48#include <asm/smp.h> 48#include <asm/smp.h>
49#include <asm/cpu.h>
49#include <asm/desc.h> 50#include <asm/desc.h>
50#include <asm/proto.h> 51#include <asm/proto.h>
51#include <asm/acpi.h> 52#include <asm/acpi.h>
@@ -61,9 +62,7 @@
61#include <asm/uv/uv_hub.h> 62#include <asm/uv/uv_hub.h>
62#include <asm/uv/uv_irq.h> 63#include <asm/uv/uv_irq.h>
63 64
64#include <mach_ipi.h> 65#include <asm/apic.h>
65#include <mach_apic.h>
66#include <mach_apicdef.h>
67 66
68#define __apicdebuginit(type) static type __init 67#define __apicdebuginit(type) static type __init
69 68
@@ -82,11 +81,11 @@ static DEFINE_SPINLOCK(vector_lock);
82int nr_ioapic_registers[MAX_IO_APICS]; 81int nr_ioapic_registers[MAX_IO_APICS];
83 82
84/* I/O APIC entries */ 83/* I/O APIC entries */
85struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; 84struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
86int nr_ioapics; 85int nr_ioapics;
87 86
88/* MP IRQ source entries */ 87/* MP IRQ source entries */
89struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; 88struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
90 89
91/* # of MP IRQ source entries */ 90/* # of MP IRQ source entries */
92int mp_irq_entries; 91int mp_irq_entries;
@@ -99,10 +98,19 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
99 98
100int skip_ioapic_setup; 99int skip_ioapic_setup;
101 100
101void arch_disable_smp_support(void)
102{
103#ifdef CONFIG_PCI
104 noioapicquirk = 1;
105 noioapicreroute = -1;
106#endif
107 skip_ioapic_setup = 1;
108}
109
102static int __init parse_noapic(char *str) 110static int __init parse_noapic(char *str)
103{ 111{
104 /* disable IO-APIC */ 112 /* disable IO-APIC */
105 disable_ioapic_setup(); 113 arch_disable_smp_support();
106 return 0; 114 return 0;
107} 115}
108early_param("noapic", parse_noapic); 116early_param("noapic", parse_noapic);
@@ -356,7 +364,7 @@ set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
356 364
357 if (!cfg->move_in_progress) { 365 if (!cfg->move_in_progress) {
358 /* it means that domain is not changed */ 366 /* it means that domain is not changed */
359 if (!cpumask_intersects(&desc->affinity, mask)) 367 if (!cpumask_intersects(desc->affinity, mask))
360 cfg->move_desc_pending = 1; 368 cfg->move_desc_pending = 1;
361 } 369 }
362} 370}
@@ -386,7 +394,7 @@ struct io_apic {
386static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) 394static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
387{ 395{
388 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) 396 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
389 + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); 397 + (mp_ioapics[idx].apicaddr & ~PAGE_MASK);
390} 398}
391 399
392static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) 400static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
@@ -478,7 +486,7 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
478 io_apic_write(apic, 0x10 + 2*pin, eu.w1); 486 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
479} 487}
480 488
481static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) 489void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
482{ 490{
483 unsigned long flags; 491 unsigned long flags;
484 spin_lock_irqsave(&ioapic_lock, flags); 492 spin_lock_irqsave(&ioapic_lock, flags);
@@ -513,11 +521,11 @@ static void send_cleanup_vector(struct irq_cfg *cfg)
513 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) 521 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
514 cfg->move_cleanup_count++; 522 cfg->move_cleanup_count++;
515 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) 523 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
516 send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); 524 apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
517 } else { 525 } else {
518 cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); 526 cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
519 cfg->move_cleanup_count = cpumask_weight(cleanup_mask); 527 cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
520 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); 528 apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
521 free_cpumask_var(cleanup_mask); 529 free_cpumask_var(cleanup_mask);
522 } 530 }
523 cfg->move_in_progress = 0; 531 cfg->move_in_progress = 0;
@@ -562,8 +570,9 @@ static int
562assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); 570assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
563 571
564/* 572/*
565 * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid 573 * Either sets desc->affinity to a valid value, and returns
566 * of that, or returns BAD_APICID and leaves desc->affinity untouched. 574 * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
575 * leaves desc->affinity untouched.
567 */ 576 */
568static unsigned int 577static unsigned int
569set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) 578set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
@@ -579,9 +588,10 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
579 if (assign_irq_vector(irq, cfg, mask)) 588 if (assign_irq_vector(irq, cfg, mask))
580 return BAD_APICID; 589 return BAD_APICID;
581 590
582 cpumask_and(&desc->affinity, cfg->domain, mask); 591 cpumask_and(desc->affinity, cfg->domain, mask);
583 set_extra_move_desc(desc, mask); 592 set_extra_move_desc(desc, mask);
584 return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask); 593
594 return apic->cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask);
585} 595}
586 596
587static void 597static void
@@ -796,23 +806,6 @@ static void clear_IO_APIC (void)
796 clear_IO_APIC_pin(apic, pin); 806 clear_IO_APIC_pin(apic, pin);
797} 807}
798 808
799#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32)
800void send_IPI_self(int vector)
801{
802 unsigned int cfg;
803
804 /*
805 * Wait for idle.
806 */
807 apic_wait_icr_idle();
808 cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
809 /*
810 * Send the IPI. The write to APIC_ICR fires this off.
811 */
812 apic_write(APIC_ICR, cfg);
813}
814#endif /* !CONFIG_SMP && CONFIG_X86_32*/
815
816#ifdef CONFIG_X86_32 809#ifdef CONFIG_X86_32
817/* 810/*
818 * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to 811 * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
@@ -820,8 +813,9 @@ void send_IPI_self(int vector)
820 */ 813 */
821 814
822#define MAX_PIRQS 8 815#define MAX_PIRQS 8
823static int pirq_entries [MAX_PIRQS]; 816static int pirq_entries[MAX_PIRQS] = {
824static int pirqs_enabled; 817 [0 ... MAX_PIRQS - 1] = -1
818};
825 819
826static int __init ioapic_pirq_setup(char *str) 820static int __init ioapic_pirq_setup(char *str)
827{ 821{
@@ -830,10 +824,6 @@ static int __init ioapic_pirq_setup(char *str)
830 824
831 get_options(str, ARRAY_SIZE(ints), ints); 825 get_options(str, ARRAY_SIZE(ints), ints);
832 826
833 for (i = 0; i < MAX_PIRQS; i++)
834 pirq_entries[i] = -1;
835
836 pirqs_enabled = 1;
837 apic_printk(APIC_VERBOSE, KERN_INFO 827 apic_printk(APIC_VERBOSE, KERN_INFO
838 "PIRQ redirection, working around broken MP-BIOS.\n"); 828 "PIRQ redirection, working around broken MP-BIOS.\n");
839 max = MAX_PIRQS; 829 max = MAX_PIRQS;
@@ -944,10 +934,10 @@ static int find_irq_entry(int apic, int pin, int type)
944 int i; 934 int i;
945 935
946 for (i = 0; i < mp_irq_entries; i++) 936 for (i = 0; i < mp_irq_entries; i++)
947 if (mp_irqs[i].mp_irqtype == type && 937 if (mp_irqs[i].irqtype == type &&
948 (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || 938 (mp_irqs[i].dstapic == mp_ioapics[apic].apicid ||
949 mp_irqs[i].mp_dstapic == MP_APIC_ALL) && 939 mp_irqs[i].dstapic == MP_APIC_ALL) &&
950 mp_irqs[i].mp_dstirq == pin) 940 mp_irqs[i].dstirq == pin)
951 return i; 941 return i;
952 942
953 return -1; 943 return -1;
@@ -961,13 +951,13 @@ static int __init find_isa_irq_pin(int irq, int type)
961 int i; 951 int i;
962 952
963 for (i = 0; i < mp_irq_entries; i++) { 953 for (i = 0; i < mp_irq_entries; i++) {
964 int lbus = mp_irqs[i].mp_srcbus; 954 int lbus = mp_irqs[i].srcbus;
965 955
966 if (test_bit(lbus, mp_bus_not_pci) && 956 if (test_bit(lbus, mp_bus_not_pci) &&
967 (mp_irqs[i].mp_irqtype == type) && 957 (mp_irqs[i].irqtype == type) &&
968 (mp_irqs[i].mp_srcbusirq == irq)) 958 (mp_irqs[i].srcbusirq == irq))
969 959
970 return mp_irqs[i].mp_dstirq; 960 return mp_irqs[i].dstirq;
971 } 961 }
972 return -1; 962 return -1;
973} 963}
@@ -977,17 +967,17 @@ static int __init find_isa_irq_apic(int irq, int type)
977 int i; 967 int i;
978 968
979 for (i = 0; i < mp_irq_entries; i++) { 969 for (i = 0; i < mp_irq_entries; i++) {
980 int lbus = mp_irqs[i].mp_srcbus; 970 int lbus = mp_irqs[i].srcbus;
981 971
982 if (test_bit(lbus, mp_bus_not_pci) && 972 if (test_bit(lbus, mp_bus_not_pci) &&
983 (mp_irqs[i].mp_irqtype == type) && 973 (mp_irqs[i].irqtype == type) &&
984 (mp_irqs[i].mp_srcbusirq == irq)) 974 (mp_irqs[i].srcbusirq == irq))
985 break; 975 break;
986 } 976 }
987 if (i < mp_irq_entries) { 977 if (i < mp_irq_entries) {
988 int apic; 978 int apic;
989 for(apic = 0; apic < nr_ioapics; apic++) { 979 for(apic = 0; apic < nr_ioapics; apic++) {
990 if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) 980 if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic)
991 return apic; 981 return apic;
992 } 982 }
993 } 983 }
@@ -1012,23 +1002,23 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
1012 return -1; 1002 return -1;
1013 } 1003 }
1014 for (i = 0; i < mp_irq_entries; i++) { 1004 for (i = 0; i < mp_irq_entries; i++) {
1015 int lbus = mp_irqs[i].mp_srcbus; 1005 int lbus = mp_irqs[i].srcbus;
1016 1006
1017 for (apic = 0; apic < nr_ioapics; apic++) 1007 for (apic = 0; apic < nr_ioapics; apic++)
1018 if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || 1008 if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
1019 mp_irqs[i].mp_dstapic == MP_APIC_ALL) 1009 mp_irqs[i].dstapic == MP_APIC_ALL)
1020 break; 1010 break;
1021 1011
1022 if (!test_bit(lbus, mp_bus_not_pci) && 1012 if (!test_bit(lbus, mp_bus_not_pci) &&
1023 !mp_irqs[i].mp_irqtype && 1013 !mp_irqs[i].irqtype &&
1024 (bus == lbus) && 1014 (bus == lbus) &&
1025 (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { 1015 (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
1026 int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); 1016 int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
1027 1017
1028 if (!(apic || IO_APIC_IRQ(irq))) 1018 if (!(apic || IO_APIC_IRQ(irq)))
1029 continue; 1019 continue;
1030 1020
1031 if (pin == (mp_irqs[i].mp_srcbusirq & 3)) 1021 if (pin == (mp_irqs[i].srcbusirq & 3))
1032 return irq; 1022 return irq;
1033 /* 1023 /*
1034 * Use the first all-but-pin matching entry as a 1024 * Use the first all-but-pin matching entry as a
@@ -1071,7 +1061,7 @@ static int EISA_ELCR(unsigned int irq)
1071 * EISA conforming in the MP table, that means its trigger type must 1061 * EISA conforming in the MP table, that means its trigger type must
1072 * be read in from the ELCR */ 1062 * be read in from the ELCR */
1073 1063
1074#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) 1064#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq))
1075#define default_EISA_polarity(idx) default_ISA_polarity(idx) 1065#define default_EISA_polarity(idx) default_ISA_polarity(idx)
1076 1066
1077/* PCI interrupts are always polarity one level triggered, 1067/* PCI interrupts are always polarity one level triggered,
@@ -1088,13 +1078,13 @@ static int EISA_ELCR(unsigned int irq)
1088 1078
1089static int MPBIOS_polarity(int idx) 1079static int MPBIOS_polarity(int idx)
1090{ 1080{
1091 int bus = mp_irqs[idx].mp_srcbus; 1081 int bus = mp_irqs[idx].srcbus;
1092 int polarity; 1082 int polarity;
1093 1083
1094 /* 1084 /*
1095 * Determine IRQ line polarity (high active or low active): 1085 * Determine IRQ line polarity (high active or low active):
1096 */ 1086 */
1097 switch (mp_irqs[idx].mp_irqflag & 3) 1087 switch (mp_irqs[idx].irqflag & 3)
1098 { 1088 {
1099 case 0: /* conforms, ie. bus-type dependent polarity */ 1089 case 0: /* conforms, ie. bus-type dependent polarity */
1100 if (test_bit(bus, mp_bus_not_pci)) 1090 if (test_bit(bus, mp_bus_not_pci))
@@ -1130,13 +1120,13 @@ static int MPBIOS_polarity(int idx)
1130 1120
1131static int MPBIOS_trigger(int idx) 1121static int MPBIOS_trigger(int idx)
1132{ 1122{
1133 int bus = mp_irqs[idx].mp_srcbus; 1123 int bus = mp_irqs[idx].srcbus;
1134 int trigger; 1124 int trigger;
1135 1125
1136 /* 1126 /*
1137 * Determine IRQ trigger mode (edge or level sensitive): 1127 * Determine IRQ trigger mode (edge or level sensitive):
1138 */ 1128 */
1139 switch ((mp_irqs[idx].mp_irqflag>>2) & 3) 1129 switch ((mp_irqs[idx].irqflag>>2) & 3)
1140 { 1130 {
1141 case 0: /* conforms, ie. bus-type dependent */ 1131 case 0: /* conforms, ie. bus-type dependent */
1142 if (test_bit(bus, mp_bus_not_pci)) 1132 if (test_bit(bus, mp_bus_not_pci))
@@ -1214,16 +1204,16 @@ int (*ioapic_renumber_irq)(int ioapic, int irq);
1214static int pin_2_irq(int idx, int apic, int pin) 1204static int pin_2_irq(int idx, int apic, int pin)
1215{ 1205{
1216 int irq, i; 1206 int irq, i;
1217 int bus = mp_irqs[idx].mp_srcbus; 1207 int bus = mp_irqs[idx].srcbus;
1218 1208
1219 /* 1209 /*
1220 * Debugging check, we are in big trouble if this message pops up! 1210 * Debugging check, we are in big trouble if this message pops up!
1221 */ 1211 */
1222 if (mp_irqs[idx].mp_dstirq != pin) 1212 if (mp_irqs[idx].dstirq != pin)
1223 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); 1213 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
1224 1214
1225 if (test_bit(bus, mp_bus_not_pci)) { 1215 if (test_bit(bus, mp_bus_not_pci)) {
1226 irq = mp_irqs[idx].mp_srcbusirq; 1216 irq = mp_irqs[idx].srcbusirq;
1227 } else { 1217 } else {
1228 /* 1218 /*
1229 * PCI IRQs are mapped in order 1219 * PCI IRQs are mapped in order
@@ -1315,7 +1305,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1315 int new_cpu; 1305 int new_cpu;
1316 int vector, offset; 1306 int vector, offset;
1317 1307
1318 vector_allocation_domain(cpu, tmp_mask); 1308 apic->vector_allocation_domain(cpu, tmp_mask);
1319 1309
1320 vector = current_vector; 1310 vector = current_vector;
1321 offset = current_offset; 1311 offset = current_offset;
@@ -1485,10 +1475,10 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t
1485 handle_edge_irq, "edge"); 1475 handle_edge_irq, "edge");
1486} 1476}
1487 1477
1488static int setup_ioapic_entry(int apic, int irq, 1478int setup_ioapic_entry(int apic_id, int irq,
1489 struct IO_APIC_route_entry *entry, 1479 struct IO_APIC_route_entry *entry,
1490 unsigned int destination, int trigger, 1480 unsigned int destination, int trigger,
1491 int polarity, int vector) 1481 int polarity, int vector)
1492{ 1482{
1493 /* 1483 /*
1494 * add it to the IO-APIC irq-routing table: 1484 * add it to the IO-APIC irq-routing table:
@@ -1497,25 +1487,25 @@ static int setup_ioapic_entry(int apic, int irq,
1497 1487
1498#ifdef CONFIG_INTR_REMAP 1488#ifdef CONFIG_INTR_REMAP
1499 if (intr_remapping_enabled) { 1489 if (intr_remapping_enabled) {
1500 struct intel_iommu *iommu = map_ioapic_to_ir(apic); 1490 struct intel_iommu *iommu = map_ioapic_to_ir(apic_id);
1501 struct irte irte; 1491 struct irte irte;
1502 struct IR_IO_APIC_route_entry *ir_entry = 1492 struct IR_IO_APIC_route_entry *ir_entry =
1503 (struct IR_IO_APIC_route_entry *) entry; 1493 (struct IR_IO_APIC_route_entry *) entry;
1504 int index; 1494 int index;
1505 1495
1506 if (!iommu) 1496 if (!iommu)
1507 panic("No mapping iommu for ioapic %d\n", apic); 1497 panic("No mapping iommu for ioapic %d\n", apic_id);
1508 1498
1509 index = alloc_irte(iommu, irq, 1); 1499 index = alloc_irte(iommu, irq, 1);
1510 if (index < 0) 1500 if (index < 0)
1511 panic("Failed to allocate IRTE for ioapic %d\n", apic); 1501 panic("Failed to allocate IRTE for ioapic %d\n", apic_id);
1512 1502
1513 memset(&irte, 0, sizeof(irte)); 1503 memset(&irte, 0, sizeof(irte));
1514 1504
1515 irte.present = 1; 1505 irte.present = 1;
1516 irte.dst_mode = INT_DEST_MODE; 1506 irte.dst_mode = apic->irq_dest_mode;
1517 irte.trigger_mode = trigger; 1507 irte.trigger_mode = trigger;
1518 irte.dlvry_mode = INT_DELIVERY_MODE; 1508 irte.dlvry_mode = apic->irq_delivery_mode;
1519 irte.vector = vector; 1509 irte.vector = vector;
1520 irte.dest_id = IRTE_DEST(destination); 1510 irte.dest_id = IRTE_DEST(destination);
1521 1511
@@ -1528,8 +1518,8 @@ static int setup_ioapic_entry(int apic, int irq,
1528 } else 1518 } else
1529#endif 1519#endif
1530 { 1520 {
1531 entry->delivery_mode = INT_DELIVERY_MODE; 1521 entry->delivery_mode = apic->irq_delivery_mode;
1532 entry->dest_mode = INT_DEST_MODE; 1522 entry->dest_mode = apic->irq_dest_mode;
1533 entry->dest = destination; 1523 entry->dest = destination;
1534 } 1524 }
1535 1525
@@ -1546,7 +1536,7 @@ static int setup_ioapic_entry(int apic, int irq,
1546 return 0; 1536 return 0;
1547} 1537}
1548 1538
1549static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc, 1539static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq_desc *desc,
1550 int trigger, int polarity) 1540 int trigger, int polarity)
1551{ 1541{
1552 struct irq_cfg *cfg; 1542 struct irq_cfg *cfg;
@@ -1558,22 +1548,22 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
1558 1548
1559 cfg = desc->chip_data; 1549 cfg = desc->chip_data;
1560 1550
1561 if (assign_irq_vector(irq, cfg, TARGET_CPUS)) 1551 if (assign_irq_vector(irq, cfg, apic->target_cpus()))
1562 return; 1552 return;
1563 1553
1564 dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); 1554 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
1565 1555
1566 apic_printk(APIC_VERBOSE,KERN_DEBUG 1556 apic_printk(APIC_VERBOSE,KERN_DEBUG
1567 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " 1557 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
1568 "IRQ %d Mode:%i Active:%i)\n", 1558 "IRQ %d Mode:%i Active:%i)\n",
1569 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, 1559 apic_id, mp_ioapics[apic_id].apicid, pin, cfg->vector,
1570 irq, trigger, polarity); 1560 irq, trigger, polarity);
1571 1561
1572 1562
1573 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, 1563 if (setup_ioapic_entry(mp_ioapics[apic_id].apicid, irq, &entry,
1574 dest, trigger, polarity, cfg->vector)) { 1564 dest, trigger, polarity, cfg->vector)) {
1575 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", 1565 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
1576 mp_ioapics[apic].mp_apicid, pin); 1566 mp_ioapics[apic_id].apicid, pin);
1577 __clear_irq_vector(irq, cfg); 1567 __clear_irq_vector(irq, cfg);
1578 return; 1568 return;
1579 } 1569 }
@@ -1582,12 +1572,12 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
1582 if (irq < NR_IRQS_LEGACY) 1572 if (irq < NR_IRQS_LEGACY)
1583 disable_8259A_irq(irq); 1573 disable_8259A_irq(irq);
1584 1574
1585 ioapic_write_entry(apic, pin, entry); 1575 ioapic_write_entry(apic_id, pin, entry);
1586} 1576}
1587 1577
1588static void __init setup_IO_APIC_irqs(void) 1578static void __init setup_IO_APIC_irqs(void)
1589{ 1579{
1590 int apic, pin, idx, irq; 1580 int apic_id, pin, idx, irq;
1591 int notcon = 0; 1581 int notcon = 0;
1592 struct irq_desc *desc; 1582 struct irq_desc *desc;
1593 struct irq_cfg *cfg; 1583 struct irq_cfg *cfg;
@@ -1595,21 +1585,19 @@ static void __init setup_IO_APIC_irqs(void)
1595 1585
1596 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); 1586 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1597 1587
1598 for (apic = 0; apic < nr_ioapics; apic++) { 1588 for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
1599 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { 1589 for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
1600 1590
1601 idx = find_irq_entry(apic, pin, mp_INT); 1591 idx = find_irq_entry(apic_id, pin, mp_INT);
1602 if (idx == -1) { 1592 if (idx == -1) {
1603 if (!notcon) { 1593 if (!notcon) {
1604 notcon = 1; 1594 notcon = 1;
1605 apic_printk(APIC_VERBOSE, 1595 apic_printk(APIC_VERBOSE,
1606 KERN_DEBUG " %d-%d", 1596 KERN_DEBUG " %d-%d",
1607 mp_ioapics[apic].mp_apicid, 1597 mp_ioapics[apic_id].apicid, pin);
1608 pin);
1609 } else 1598 } else
1610 apic_printk(APIC_VERBOSE, " %d-%d", 1599 apic_printk(APIC_VERBOSE, " %d-%d",
1611 mp_ioapics[apic].mp_apicid, 1600 mp_ioapics[apic_id].apicid, pin);
1612 pin);
1613 continue; 1601 continue;
1614 } 1602 }
1615 if (notcon) { 1603 if (notcon) {
@@ -1618,20 +1606,25 @@ static void __init setup_IO_APIC_irqs(void)
1618 notcon = 0; 1606 notcon = 0;
1619 } 1607 }
1620 1608
1621 irq = pin_2_irq(idx, apic, pin); 1609 irq = pin_2_irq(idx, apic_id, pin);
1622#ifdef CONFIG_X86_32 1610
1623 if (multi_timer_check(apic, irq)) 1611 /*
1612 * Skip the timer IRQ if there's a quirk handler
1613 * installed and if it returns 1:
1614 */
1615 if (apic->multi_timer_check &&
1616 apic->multi_timer_check(apic_id, irq))
1624 continue; 1617 continue;
1625#endif 1618
1626 desc = irq_to_desc_alloc_cpu(irq, cpu); 1619 desc = irq_to_desc_alloc_cpu(irq, cpu);
1627 if (!desc) { 1620 if (!desc) {
1628 printk(KERN_INFO "can not get irq_desc for %d\n", irq); 1621 printk(KERN_INFO "can not get irq_desc for %d\n", irq);
1629 continue; 1622 continue;
1630 } 1623 }
1631 cfg = desc->chip_data; 1624 cfg = desc->chip_data;
1632 add_pin_to_irq_cpu(cfg, cpu, apic, pin); 1625 add_pin_to_irq_cpu(cfg, cpu, apic_id, pin);
1633 1626
1634 setup_IO_APIC_irq(apic, pin, irq, desc, 1627 setup_IO_APIC_irq(apic_id, pin, irq, desc,
1635 irq_trigger(idx), irq_polarity(idx)); 1628 irq_trigger(idx), irq_polarity(idx));
1636 } 1629 }
1637 } 1630 }
@@ -1644,7 +1637,7 @@ static void __init setup_IO_APIC_irqs(void)
1644/* 1637/*
1645 * Set up the timer pin, possibly with the 8259A-master behind. 1638 * Set up the timer pin, possibly with the 8259A-master behind.
1646 */ 1639 */
1647static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, 1640static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin,
1648 int vector) 1641 int vector)
1649{ 1642{
1650 struct IO_APIC_route_entry entry; 1643 struct IO_APIC_route_entry entry;
@@ -1660,10 +1653,10 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
1660 * We use logical delivery to get the timer IRQ 1653 * We use logical delivery to get the timer IRQ
1661 * to the first CPU. 1654 * to the first CPU.
1662 */ 1655 */
1663 entry.dest_mode = INT_DEST_MODE; 1656 entry.dest_mode = apic->irq_dest_mode;
1664 entry.mask = 1; /* mask IRQ now */ 1657 entry.mask = 0; /* don't mask IRQ for edge */
1665 entry.dest = cpu_mask_to_apicid(TARGET_CPUS); 1658 entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus());
1666 entry.delivery_mode = INT_DELIVERY_MODE; 1659 entry.delivery_mode = apic->irq_delivery_mode;
1667 entry.polarity = 0; 1660 entry.polarity = 0;
1668 entry.trigger = 0; 1661 entry.trigger = 0;
1669 entry.vector = vector; 1662 entry.vector = vector;
@@ -1677,7 +1670,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
1677 /* 1670 /*
1678 * Add it to the IO-APIC irq-routing table: 1671 * Add it to the IO-APIC irq-routing table:
1679 */ 1672 */
1680 ioapic_write_entry(apic, pin, entry); 1673 ioapic_write_entry(apic_id, pin, entry);
1681} 1674}
1682 1675
1683 1676
@@ -1699,7 +1692,7 @@ __apicdebuginit(void) print_IO_APIC(void)
1699 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); 1692 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
1700 for (i = 0; i < nr_ioapics; i++) 1693 for (i = 0; i < nr_ioapics; i++)
1701 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", 1694 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
1702 mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); 1695 mp_ioapics[i].apicid, nr_ioapic_registers[i]);
1703 1696
1704 /* 1697 /*
1705 * We are a bit conservative about what we expect. We have to 1698 * We are a bit conservative about what we expect. We have to
@@ -1719,7 +1712,7 @@ __apicdebuginit(void) print_IO_APIC(void)
1719 spin_unlock_irqrestore(&ioapic_lock, flags); 1712 spin_unlock_irqrestore(&ioapic_lock, flags);
1720 1713
1721 printk("\n"); 1714 printk("\n");
1722 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); 1715 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid);
1723 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); 1716 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
1724 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); 1717 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
1725 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); 1718 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
@@ -1980,13 +1973,6 @@ void __init enable_IO_APIC(void)
1980 int apic; 1973 int apic;
1981 unsigned long flags; 1974 unsigned long flags;
1982 1975
1983#ifdef CONFIG_X86_32
1984 int i;
1985 if (!pirqs_enabled)
1986 for (i = 0; i < MAX_PIRQS; i++)
1987 pirq_entries[i] = -1;
1988#endif
1989
1990 /* 1976 /*
1991 * The number of IO-APIC IRQ registers (== #pins): 1977 * The number of IO-APIC IRQ registers (== #pins):
1992 */ 1978 */
@@ -2090,7 +2076,7 @@ static void __init setup_ioapic_ids_from_mpc(void)
2090{ 2076{
2091 union IO_APIC_reg_00 reg_00; 2077 union IO_APIC_reg_00 reg_00;
2092 physid_mask_t phys_id_present_map; 2078 physid_mask_t phys_id_present_map;
2093 int apic; 2079 int apic_id;
2094 int i; 2080 int i;
2095 unsigned char old_id; 2081 unsigned char old_id;
2096 unsigned long flags; 2082 unsigned long flags;
@@ -2109,26 +2095,26 @@ static void __init setup_ioapic_ids_from_mpc(void)
2109 * This is broken; anything with a real cpu count has to 2095 * This is broken; anything with a real cpu count has to
2110 * circumvent this idiocy regardless. 2096 * circumvent this idiocy regardless.
2111 */ 2097 */
2112 phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); 2098 phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
2113 2099
2114 /* 2100 /*
2115 * Set the IOAPIC ID to the value stored in the MPC table. 2101 * Set the IOAPIC ID to the value stored in the MPC table.
2116 */ 2102 */
2117 for (apic = 0; apic < nr_ioapics; apic++) { 2103 for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
2118 2104
2119 /* Read the register 0 value */ 2105 /* Read the register 0 value */
2120 spin_lock_irqsave(&ioapic_lock, flags); 2106 spin_lock_irqsave(&ioapic_lock, flags);
2121 reg_00.raw = io_apic_read(apic, 0); 2107 reg_00.raw = io_apic_read(apic_id, 0);
2122 spin_unlock_irqrestore(&ioapic_lock, flags); 2108 spin_unlock_irqrestore(&ioapic_lock, flags);
2123 2109
2124 old_id = mp_ioapics[apic].mp_apicid; 2110 old_id = mp_ioapics[apic_id].apicid;
2125 2111
2126 if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { 2112 if (mp_ioapics[apic_id].apicid >= get_physical_broadcast()) {
2127 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", 2113 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
2128 apic, mp_ioapics[apic].mp_apicid); 2114 apic_id, mp_ioapics[apic_id].apicid);
2129 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", 2115 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
2130 reg_00.bits.ID); 2116 reg_00.bits.ID);
2131 mp_ioapics[apic].mp_apicid = reg_00.bits.ID; 2117 mp_ioapics[apic_id].apicid = reg_00.bits.ID;
2132 } 2118 }
2133 2119
2134 /* 2120 /*
@@ -2136,10 +2122,10 @@ static void __init setup_ioapic_ids_from_mpc(void)
2136 * system must have a unique ID or we get lots of nice 2122 * system must have a unique ID or we get lots of nice
2137 * 'stuck on smp_invalidate_needed IPI wait' messages. 2123 * 'stuck on smp_invalidate_needed IPI wait' messages.
2138 */ 2124 */
2139 if (check_apicid_used(phys_id_present_map, 2125 if (apic->check_apicid_used(phys_id_present_map,
2140 mp_ioapics[apic].mp_apicid)) { 2126 mp_ioapics[apic_id].apicid)) {
2141 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", 2127 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
2142 apic, mp_ioapics[apic].mp_apicid); 2128 apic_id, mp_ioapics[apic_id].apicid);
2143 for (i = 0; i < get_physical_broadcast(); i++) 2129 for (i = 0; i < get_physical_broadcast(); i++)
2144 if (!physid_isset(i, phys_id_present_map)) 2130 if (!physid_isset(i, phys_id_present_map))
2145 break; 2131 break;
@@ -2148,13 +2134,13 @@ static void __init setup_ioapic_ids_from_mpc(void)
2148 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", 2134 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
2149 i); 2135 i);
2150 physid_set(i, phys_id_present_map); 2136 physid_set(i, phys_id_present_map);
2151 mp_ioapics[apic].mp_apicid = i; 2137 mp_ioapics[apic_id].apicid = i;
2152 } else { 2138 } else {
2153 physid_mask_t tmp; 2139 physid_mask_t tmp;
2154 tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); 2140 tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid);
2155 apic_printk(APIC_VERBOSE, "Setting %d in the " 2141 apic_printk(APIC_VERBOSE, "Setting %d in the "
2156 "phys_id_present_map\n", 2142 "phys_id_present_map\n",
2157 mp_ioapics[apic].mp_apicid); 2143 mp_ioapics[apic_id].apicid);
2158 physids_or(phys_id_present_map, phys_id_present_map, tmp); 2144 physids_or(phys_id_present_map, phys_id_present_map, tmp);
2159 } 2145 }
2160 2146
@@ -2163,11 +2149,11 @@ static void __init setup_ioapic_ids_from_mpc(void)
2163 * We need to adjust the IRQ routing table 2149 * We need to adjust the IRQ routing table
2164 * if the ID changed. 2150 * if the ID changed.
2165 */ 2151 */
2166 if (old_id != mp_ioapics[apic].mp_apicid) 2152 if (old_id != mp_ioapics[apic_id].apicid)
2167 for (i = 0; i < mp_irq_entries; i++) 2153 for (i = 0; i < mp_irq_entries; i++)
2168 if (mp_irqs[i].mp_dstapic == old_id) 2154 if (mp_irqs[i].dstapic == old_id)
2169 mp_irqs[i].mp_dstapic 2155 mp_irqs[i].dstapic
2170 = mp_ioapics[apic].mp_apicid; 2156 = mp_ioapics[apic_id].apicid;
2171 2157
2172 /* 2158 /*
2173 * Read the right value from the MPC table and 2159 * Read the right value from the MPC table and
@@ -2175,20 +2161,20 @@ static void __init setup_ioapic_ids_from_mpc(void)
2175 */ 2161 */
2176 apic_printk(APIC_VERBOSE, KERN_INFO 2162 apic_printk(APIC_VERBOSE, KERN_INFO
2177 "...changing IO-APIC physical APIC ID to %d ...", 2163 "...changing IO-APIC physical APIC ID to %d ...",
2178 mp_ioapics[apic].mp_apicid); 2164 mp_ioapics[apic_id].apicid);
2179 2165
2180 reg_00.bits.ID = mp_ioapics[apic].mp_apicid; 2166 reg_00.bits.ID = mp_ioapics[apic_id].apicid;
2181 spin_lock_irqsave(&ioapic_lock, flags); 2167 spin_lock_irqsave(&ioapic_lock, flags);
2182 io_apic_write(apic, 0, reg_00.raw); 2168 io_apic_write(apic_id, 0, reg_00.raw);
2183 spin_unlock_irqrestore(&ioapic_lock, flags); 2169 spin_unlock_irqrestore(&ioapic_lock, flags);
2184 2170
2185 /* 2171 /*
2186 * Sanity check 2172 * Sanity check
2187 */ 2173 */
2188 spin_lock_irqsave(&ioapic_lock, flags); 2174 spin_lock_irqsave(&ioapic_lock, flags);
2189 reg_00.raw = io_apic_read(apic, 0); 2175 reg_00.raw = io_apic_read(apic_id, 0);
2190 spin_unlock_irqrestore(&ioapic_lock, flags); 2176 spin_unlock_irqrestore(&ioapic_lock, flags);
2191 if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) 2177 if (reg_00.bits.ID != mp_ioapics[apic_id].apicid)
2192 printk("could not set ID!\n"); 2178 printk("could not set ID!\n");
2193 else 2179 else
2194 apic_printk(APIC_VERBOSE, " ok.\n"); 2180 apic_printk(APIC_VERBOSE, " ok.\n");
@@ -2291,7 +2277,7 @@ static int ioapic_retrigger_irq(unsigned int irq)
2291 unsigned long flags; 2277 unsigned long flags;
2292 2278
2293 spin_lock_irqsave(&vector_lock, flags); 2279 spin_lock_irqsave(&vector_lock, flags);
2294 send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); 2280 apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
2295 spin_unlock_irqrestore(&vector_lock, flags); 2281 spin_unlock_irqrestore(&vector_lock, flags);
2296 2282
2297 return 1; 2283 return 1;
@@ -2299,7 +2285,7 @@ static int ioapic_retrigger_irq(unsigned int irq)
2299#else 2285#else
2300static int ioapic_retrigger_irq(unsigned int irq) 2286static int ioapic_retrigger_irq(unsigned int irq)
2301{ 2287{
2302 send_IPI_self(irq_cfg(irq)->vector); 2288 apic->send_IPI_self(irq_cfg(irq)->vector);
2303 2289
2304 return 1; 2290 return 1;
2305} 2291}
@@ -2363,7 +2349,7 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2363 2349
2364 set_extra_move_desc(desc, mask); 2350 set_extra_move_desc(desc, mask);
2365 2351
2366 dest = cpu_mask_to_apicid_and(cfg->domain, mask); 2352 dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
2367 2353
2368 modify_ioapic_rte = desc->status & IRQ_LEVEL; 2354 modify_ioapic_rte = desc->status & IRQ_LEVEL;
2369 if (modify_ioapic_rte) { 2355 if (modify_ioapic_rte) {
@@ -2383,7 +2369,7 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2383 if (cfg->move_in_progress) 2369 if (cfg->move_in_progress)
2384 send_cleanup_vector(cfg); 2370 send_cleanup_vector(cfg);
2385 2371
2386 cpumask_copy(&desc->affinity, mask); 2372 cpumask_copy(desc->affinity, mask);
2387} 2373}
2388 2374
2389static int migrate_irq_remapped_level_desc(struct irq_desc *desc) 2375static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
@@ -2405,11 +2391,11 @@ static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
2405 } 2391 }
2406 2392
2407 /* everthing is clear. we have right of way */ 2393 /* everthing is clear. we have right of way */
2408 migrate_ioapic_irq_desc(desc, &desc->pending_mask); 2394 migrate_ioapic_irq_desc(desc, desc->pending_mask);
2409 2395
2410 ret = 0; 2396 ret = 0;
2411 desc->status &= ~IRQ_MOVE_PENDING; 2397 desc->status &= ~IRQ_MOVE_PENDING;
2412 cpumask_clear(&desc->pending_mask); 2398 cpumask_clear(desc->pending_mask);
2413 2399
2414unmask: 2400unmask:
2415 unmask_IO_APIC_irq_desc(desc); 2401 unmask_IO_APIC_irq_desc(desc);
@@ -2434,7 +2420,7 @@ static void ir_irq_migration(struct work_struct *work)
2434 continue; 2420 continue;
2435 } 2421 }
2436 2422
2437 desc->chip->set_affinity(irq, &desc->pending_mask); 2423 desc->chip->set_affinity(irq, desc->pending_mask);
2438 spin_unlock_irqrestore(&desc->lock, flags); 2424 spin_unlock_irqrestore(&desc->lock, flags);
2439 } 2425 }
2440 } 2426 }
@@ -2448,7 +2434,7 @@ static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
2448{ 2434{
2449 if (desc->status & IRQ_LEVEL) { 2435 if (desc->status & IRQ_LEVEL) {
2450 desc->status |= IRQ_MOVE_PENDING; 2436 desc->status |= IRQ_MOVE_PENDING;
2451 cpumask_copy(&desc->pending_mask, mask); 2437 cpumask_copy(desc->pending_mask, mask);
2452 migrate_irq_remapped_level_desc(desc); 2438 migrate_irq_remapped_level_desc(desc);
2453 return; 2439 return;
2454 } 2440 }
@@ -2516,7 +2502,7 @@ static void irq_complete_move(struct irq_desc **descp)
2516 2502
2517 /* domain has not changed, but affinity did */ 2503 /* domain has not changed, but affinity did */
2518 me = smp_processor_id(); 2504 me = smp_processor_id();
2519 if (cpu_isset(me, desc->affinity)) { 2505 if (cpumask_test_cpu(me, desc->affinity)) {
2520 *descp = desc = move_irq_desc(desc, me); 2506 *descp = desc = move_irq_desc(desc, me);
2521 /* get the new one */ 2507 /* get the new one */
2522 cfg = desc->chip_data; 2508 cfg = desc->chip_data;
@@ -2867,19 +2853,15 @@ static inline void __init check_timer(void)
2867 int cpu = boot_cpu_id; 2853 int cpu = boot_cpu_id;
2868 int apic1, pin1, apic2, pin2; 2854 int apic1, pin1, apic2, pin2;
2869 unsigned long flags; 2855 unsigned long flags;
2870 unsigned int ver;
2871 int no_pin1 = 0; 2856 int no_pin1 = 0;
2872 2857
2873 local_irq_save(flags); 2858 local_irq_save(flags);
2874 2859
2875 ver = apic_read(APIC_LVR);
2876 ver = GET_APIC_VERSION(ver);
2877
2878 /* 2860 /*
2879 * get/set the timer IRQ vector: 2861 * get/set the timer IRQ vector:
2880 */ 2862 */
2881 disable_8259A_irq(0); 2863 disable_8259A_irq(0);
2882 assign_irq_vector(0, cfg, TARGET_CPUS); 2864 assign_irq_vector(0, cfg, apic->target_cpus());
2883 2865
2884 /* 2866 /*
2885 * As IRQ0 is to be enabled in the 8259A, the virtual 2867 * As IRQ0 is to be enabled in the 8259A, the virtual
@@ -2893,7 +2875,13 @@ static inline void __init check_timer(void)
2893 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 2875 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2894 init_8259A(1); 2876 init_8259A(1);
2895#ifdef CONFIG_X86_32 2877#ifdef CONFIG_X86_32
2896 timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); 2878 {
2879 unsigned int ver;
2880
2881 ver = apic_read(APIC_LVR);
2882 ver = GET_APIC_VERSION(ver);
2883 timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
2884 }
2897#endif 2885#endif
2898 2886
2899 pin1 = find_isa_irq_pin(0, mp_INT); 2887 pin1 = find_isa_irq_pin(0, mp_INT);
@@ -2932,8 +2920,17 @@ static inline void __init check_timer(void)
2932 if (no_pin1) { 2920 if (no_pin1) {
2933 add_pin_to_irq_cpu(cfg, cpu, apic1, pin1); 2921 add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
2934 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); 2922 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
2923 } else {
2924 /* for edge trigger, setup_IO_APIC_irq already
2925 * leave it unmasked.
2926 * so only need to unmask if it is level-trigger
2927 * do we really have level trigger timer?
2928 */
2929 int idx;
2930 idx = find_irq_entry(apic1, pin1, mp_INT);
2931 if (idx != -1 && irq_trigger(idx))
2932 unmask_IO_APIC_irq_desc(desc);
2935 } 2933 }
2936 unmask_IO_APIC_irq_desc(desc);
2937 if (timer_irq_works()) { 2934 if (timer_irq_works()) {
2938 if (nmi_watchdog == NMI_IO_APIC) { 2935 if (nmi_watchdog == NMI_IO_APIC) {
2939 setup_nmi(); 2936 setup_nmi();
@@ -2947,6 +2944,7 @@ static inline void __init check_timer(void)
2947 if (intr_remapping_enabled) 2944 if (intr_remapping_enabled)
2948 panic("timer doesn't work through Interrupt-remapped IO-APIC"); 2945 panic("timer doesn't work through Interrupt-remapped IO-APIC");
2949#endif 2946#endif
2947 local_irq_disable();
2950 clear_IO_APIC_pin(apic1, pin1); 2948 clear_IO_APIC_pin(apic1, pin1);
2951 if (!no_pin1) 2949 if (!no_pin1)
2952 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " 2950 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -2961,7 +2959,6 @@ static inline void __init check_timer(void)
2961 */ 2959 */
2962 replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2); 2960 replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
2963 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); 2961 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
2964 unmask_IO_APIC_irq_desc(desc);
2965 enable_8259A_irq(0); 2962 enable_8259A_irq(0);
2966 if (timer_irq_works()) { 2963 if (timer_irq_works()) {
2967 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); 2964 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
@@ -2976,6 +2973,7 @@ static inline void __init check_timer(void)
2976 /* 2973 /*
2977 * Cleanup, just in case ... 2974 * Cleanup, just in case ...
2978 */ 2975 */
2976 local_irq_disable();
2979 disable_8259A_irq(0); 2977 disable_8259A_irq(0);
2980 clear_IO_APIC_pin(apic2, pin2); 2978 clear_IO_APIC_pin(apic2, pin2);
2981 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); 2979 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
@@ -3001,6 +2999,7 @@ static inline void __init check_timer(void)
3001 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); 2999 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
3002 goto out; 3000 goto out;
3003 } 3001 }
3002 local_irq_disable();
3004 disable_8259A_irq(0); 3003 disable_8259A_irq(0);
3005 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); 3004 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
3006 apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); 3005 apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
@@ -3018,6 +3017,7 @@ static inline void __init check_timer(void)
3018 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); 3017 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
3019 goto out; 3018 goto out;
3020 } 3019 }
3020 local_irq_disable();
3021 apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); 3021 apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
3022 panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " 3022 panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
3023 "report. Then try booting with the 'noapic' option.\n"); 3023 "report. Then try booting with the 'noapic' option.\n");
@@ -3047,13 +3047,9 @@ out:
3047void __init setup_IO_APIC(void) 3047void __init setup_IO_APIC(void)
3048{ 3048{
3049 3049
3050#ifdef CONFIG_X86_32
3051 enable_IO_APIC();
3052#else
3053 /* 3050 /*
3054 * calling enable_IO_APIC() is moved to setup_local_APIC for BP 3051 * calling enable_IO_APIC() is moved to setup_local_APIC for BP
3055 */ 3052 */
3056#endif
3057 3053
3058 io_apic_irqs = ~PIC_IRQS; 3054 io_apic_irqs = ~PIC_IRQS;
3059 3055
@@ -3118,8 +3114,8 @@ static int ioapic_resume(struct sys_device *dev)
3118 3114
3119 spin_lock_irqsave(&ioapic_lock, flags); 3115 spin_lock_irqsave(&ioapic_lock, flags);
3120 reg_00.raw = io_apic_read(dev->id, 0); 3116 reg_00.raw = io_apic_read(dev->id, 0);
3121 if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { 3117 if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) {
3122 reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; 3118 reg_00.bits.ID = mp_ioapics[dev->id].apicid;
3123 io_apic_write(dev->id, 0, reg_00.raw); 3119 io_apic_write(dev->id, 0, reg_00.raw);
3124 } 3120 }
3125 spin_unlock_irqrestore(&ioapic_lock, flags); 3121 spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -3169,6 +3165,7 @@ static int __init ioapic_init_sysfs(void)
3169 3165
3170device_initcall(ioapic_init_sysfs); 3166device_initcall(ioapic_init_sysfs);
3171 3167
3168static int nr_irqs_gsi = NR_IRQS_LEGACY;
3172/* 3169/*
3173 * Dynamic irq allocate and deallocation 3170 * Dynamic irq allocate and deallocation
3174 */ 3171 */
@@ -3183,11 +3180,11 @@ unsigned int create_irq_nr(unsigned int irq_want)
3183 struct irq_desc *desc_new = NULL; 3180 struct irq_desc *desc_new = NULL;
3184 3181
3185 irq = 0; 3182 irq = 0;
3186 spin_lock_irqsave(&vector_lock, flags); 3183 if (irq_want < nr_irqs_gsi)
3187 for (new = irq_want; new < NR_IRQS; new++) { 3184 irq_want = nr_irqs_gsi;
3188 if (platform_legacy_irq(new))
3189 continue;
3190 3185
3186 spin_lock_irqsave(&vector_lock, flags);
3187 for (new = irq_want; new < nr_irqs; new++) {
3191 desc_new = irq_to_desc_alloc_cpu(new, cpu); 3188 desc_new = irq_to_desc_alloc_cpu(new, cpu);
3192 if (!desc_new) { 3189 if (!desc_new) {
3193 printk(KERN_INFO "can not get irq_desc for %d\n", new); 3190 printk(KERN_INFO "can not get irq_desc for %d\n", new);
@@ -3197,7 +3194,7 @@ unsigned int create_irq_nr(unsigned int irq_want)
3197 3194
3198 if (cfg_new->vector != 0) 3195 if (cfg_new->vector != 0)
3199 continue; 3196 continue;
3200 if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0) 3197 if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
3201 irq = new; 3198 irq = new;
3202 break; 3199 break;
3203 } 3200 }
@@ -3212,7 +3209,6 @@ unsigned int create_irq_nr(unsigned int irq_want)
3212 return irq; 3209 return irq;
3213} 3210}
3214 3211
3215static int nr_irqs_gsi = NR_IRQS_LEGACY;
3216int create_irq(void) 3212int create_irq(void)
3217{ 3213{
3218 unsigned int irq_want; 3214 unsigned int irq_want;
@@ -3259,12 +3255,15 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3259 int err; 3255 int err;
3260 unsigned dest; 3256 unsigned dest;
3261 3257
3258 if (disable_apic)
3259 return -ENXIO;
3260
3262 cfg = irq_cfg(irq); 3261 cfg = irq_cfg(irq);
3263 err = assign_irq_vector(irq, cfg, TARGET_CPUS); 3262 err = assign_irq_vector(irq, cfg, apic->target_cpus());
3264 if (err) 3263 if (err)
3265 return err; 3264 return err;
3266 3265
3267 dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); 3266 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
3268 3267
3269#ifdef CONFIG_INTR_REMAP 3268#ifdef CONFIG_INTR_REMAP
3270 if (irq_remapped(irq)) { 3269 if (irq_remapped(irq)) {
@@ -3278,9 +3277,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3278 memset (&irte, 0, sizeof(irte)); 3277 memset (&irte, 0, sizeof(irte));
3279 3278
3280 irte.present = 1; 3279 irte.present = 1;
3281 irte.dst_mode = INT_DEST_MODE; 3280 irte.dst_mode = apic->irq_dest_mode;
3282 irte.trigger_mode = 0; /* edge */ 3281 irte.trigger_mode = 0; /* edge */
3283 irte.dlvry_mode = INT_DELIVERY_MODE; 3282 irte.dlvry_mode = apic->irq_delivery_mode;
3284 irte.vector = cfg->vector; 3283 irte.vector = cfg->vector;
3285 irte.dest_id = IRTE_DEST(dest); 3284 irte.dest_id = IRTE_DEST(dest);
3286 3285
@@ -3298,10 +3297,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3298 msg->address_hi = MSI_ADDR_BASE_HI; 3297 msg->address_hi = MSI_ADDR_BASE_HI;
3299 msg->address_lo = 3298 msg->address_lo =
3300 MSI_ADDR_BASE_LO | 3299 MSI_ADDR_BASE_LO |
3301 ((INT_DEST_MODE == 0) ? 3300 ((apic->irq_dest_mode == 0) ?
3302 MSI_ADDR_DEST_MODE_PHYSICAL: 3301 MSI_ADDR_DEST_MODE_PHYSICAL:
3303 MSI_ADDR_DEST_MODE_LOGICAL) | 3302 MSI_ADDR_DEST_MODE_LOGICAL) |
3304 ((INT_DELIVERY_MODE != dest_LowestPrio) ? 3303 ((apic->irq_delivery_mode != dest_LowestPrio) ?
3305 MSI_ADDR_REDIRECTION_CPU: 3304 MSI_ADDR_REDIRECTION_CPU:
3306 MSI_ADDR_REDIRECTION_LOWPRI) | 3305 MSI_ADDR_REDIRECTION_LOWPRI) |
3307 MSI_ADDR_DEST_ID(dest); 3306 MSI_ADDR_DEST_ID(dest);
@@ -3309,7 +3308,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3309 msg->data = 3308 msg->data =
3310 MSI_DATA_TRIGGER_EDGE | 3309 MSI_DATA_TRIGGER_EDGE |
3311 MSI_DATA_LEVEL_ASSERT | 3310 MSI_DATA_LEVEL_ASSERT |
3312 ((INT_DELIVERY_MODE != dest_LowestPrio) ? 3311 ((apic->irq_delivery_mode != dest_LowestPrio) ?
3313 MSI_DATA_DELIVERY_FIXED: 3312 MSI_DATA_DELIVERY_FIXED:
3314 MSI_DATA_DELIVERY_LOWPRI) | 3313 MSI_DATA_DELIVERY_LOWPRI) |
3315 MSI_DATA_VECTOR(cfg->vector); 3314 MSI_DATA_VECTOR(cfg->vector);
@@ -3464,40 +3463,6 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3464 return 0; 3463 return 0;
3465} 3464}
3466 3465
3467int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc)
3468{
3469 unsigned int irq;
3470 int ret;
3471 unsigned int irq_want;
3472
3473 irq_want = nr_irqs_gsi;
3474 irq = create_irq_nr(irq_want);
3475 if (irq == 0)
3476 return -1;
3477
3478#ifdef CONFIG_INTR_REMAP
3479 if (!intr_remapping_enabled)
3480 goto no_ir;
3481
3482 ret = msi_alloc_irte(dev, irq, 1);
3483 if (ret < 0)
3484 goto error;
3485no_ir:
3486#endif
3487 ret = setup_msi_irq(dev, msidesc, irq);
3488 if (ret < 0) {
3489 destroy_irq(irq);
3490 return ret;
3491 }
3492 return 0;
3493
3494#ifdef CONFIG_INTR_REMAP
3495error:
3496 destroy_irq(irq);
3497 return ret;
3498#endif
3499}
3500
3501int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 3466int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3502{ 3467{
3503 unsigned int irq; 3468 unsigned int irq;
@@ -3514,9 +3479,9 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3514 sub_handle = 0; 3479 sub_handle = 0;
3515 list_for_each_entry(msidesc, &dev->msi_list, list) { 3480 list_for_each_entry(msidesc, &dev->msi_list, list) {
3516 irq = create_irq_nr(irq_want); 3481 irq = create_irq_nr(irq_want);
3517 irq_want++;
3518 if (irq == 0) 3482 if (irq == 0)
3519 return -1; 3483 return -1;
3484 irq_want = irq + 1;
3520#ifdef CONFIG_INTR_REMAP 3485#ifdef CONFIG_INTR_REMAP
3521 if (!intr_remapping_enabled) 3486 if (!intr_remapping_enabled)
3522 goto no_ir; 3487 goto no_ir;
@@ -3727,13 +3692,17 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3727 struct irq_cfg *cfg; 3692 struct irq_cfg *cfg;
3728 int err; 3693 int err;
3729 3694
3695 if (disable_apic)
3696 return -ENXIO;
3697
3730 cfg = irq_cfg(irq); 3698 cfg = irq_cfg(irq);
3731 err = assign_irq_vector(irq, cfg, TARGET_CPUS); 3699 err = assign_irq_vector(irq, cfg, apic->target_cpus());
3732 if (!err) { 3700 if (!err) {
3733 struct ht_irq_msg msg; 3701 struct ht_irq_msg msg;
3734 unsigned dest; 3702 unsigned dest;
3735 3703
3736 dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); 3704 dest = apic->cpu_mask_to_apicid_and(cfg->domain,
3705 apic->target_cpus());
3737 3706
3738 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); 3707 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
3739 3708
@@ -3741,11 +3710,11 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3741 HT_IRQ_LOW_BASE | 3710 HT_IRQ_LOW_BASE |
3742 HT_IRQ_LOW_DEST_ID(dest) | 3711 HT_IRQ_LOW_DEST_ID(dest) |
3743 HT_IRQ_LOW_VECTOR(cfg->vector) | 3712 HT_IRQ_LOW_VECTOR(cfg->vector) |
3744 ((INT_DEST_MODE == 0) ? 3713 ((apic->irq_dest_mode == 0) ?
3745 HT_IRQ_LOW_DM_PHYSICAL : 3714 HT_IRQ_LOW_DM_PHYSICAL :
3746 HT_IRQ_LOW_DM_LOGICAL) | 3715 HT_IRQ_LOW_DM_LOGICAL) |
3747 HT_IRQ_LOW_RQEOI_EDGE | 3716 HT_IRQ_LOW_RQEOI_EDGE |
3748 ((INT_DELIVERY_MODE != dest_LowestPrio) ? 3717 ((apic->irq_delivery_mode != dest_LowestPrio) ?
3749 HT_IRQ_LOW_MT_FIXED : 3718 HT_IRQ_LOW_MT_FIXED :
3750 HT_IRQ_LOW_MT_ARBITRATED) | 3719 HT_IRQ_LOW_MT_ARBITRATED) |
3751 HT_IRQ_LOW_IRQ_MASKED; 3720 HT_IRQ_LOW_IRQ_MASKED;
@@ -3761,7 +3730,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3761} 3730}
3762#endif /* CONFIG_HT_IRQ */ 3731#endif /* CONFIG_HT_IRQ */
3763 3732
3764#ifdef CONFIG_X86_64 3733#ifdef CONFIG_X86_UV
3765/* 3734/*
3766 * Re-target the irq to the specified CPU and enable the specified MMR located 3735 * Re-target the irq to the specified CPU and enable the specified MMR located
3767 * on the specified blade to allow the sending of MSIs to the specified CPU. 3736 * on the specified blade to allow the sending of MSIs to the specified CPU.
@@ -3793,12 +3762,12 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3793 BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); 3762 BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
3794 3763
3795 entry->vector = cfg->vector; 3764 entry->vector = cfg->vector;
3796 entry->delivery_mode = INT_DELIVERY_MODE; 3765 entry->delivery_mode = apic->irq_delivery_mode;
3797 entry->dest_mode = INT_DEST_MODE; 3766 entry->dest_mode = apic->irq_dest_mode;
3798 entry->polarity = 0; 3767 entry->polarity = 0;
3799 entry->trigger = 0; 3768 entry->trigger = 0;
3800 entry->mask = 0; 3769 entry->mask = 0;
3801 entry->dest = cpu_mask_to_apicid(eligible_cpu); 3770 entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
3802 3771
3803 mmr_pnode = uv_blade_to_pnode(mmr_blade); 3772 mmr_pnode = uv_blade_to_pnode(mmr_blade);
3804 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); 3773 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
@@ -3861,6 +3830,28 @@ void __init probe_nr_irqs_gsi(void)
3861 printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi); 3830 printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
3862} 3831}
3863 3832
3833#ifdef CONFIG_SPARSE_IRQ
3834int __init arch_probe_nr_irqs(void)
3835{
3836 int nr;
3837
3838 if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
3839 nr_irqs = NR_VECTORS * nr_cpu_ids;
3840
3841 nr = nr_irqs_gsi + 8 * nr_cpu_ids;
3842#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
3843 /*
3844 * for MSI and HT dyn irq
3845 */
3846 nr += nr_irqs_gsi * 16;
3847#endif
3848 if (nr < nr_irqs)
3849 nr_irqs = nr;
3850
3851 return 0;
3852}
3853#endif
3854
3864/* -------------------------------------------------------------------------- 3855/* --------------------------------------------------------------------------
3865 ACPI-based IOAPIC Configuration 3856 ACPI-based IOAPIC Configuration
3866 -------------------------------------------------------------------------- */ 3857 -------------------------------------------------------------------------- */
@@ -3886,7 +3877,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
3886 */ 3877 */
3887 3878
3888 if (physids_empty(apic_id_map)) 3879 if (physids_empty(apic_id_map))
3889 apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); 3880 apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
3890 3881
3891 spin_lock_irqsave(&ioapic_lock, flags); 3882 spin_lock_irqsave(&ioapic_lock, flags);
3892 reg_00.raw = io_apic_read(ioapic, 0); 3883 reg_00.raw = io_apic_read(ioapic, 0);
@@ -3902,10 +3893,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
3902 * Every APIC in a system must have a unique ID or we get lots of nice 3893 * Every APIC in a system must have a unique ID or we get lots of nice
3903 * 'stuck on smp_invalidate_needed IPI wait' messages. 3894 * 'stuck on smp_invalidate_needed IPI wait' messages.
3904 */ 3895 */
3905 if (check_apicid_used(apic_id_map, apic_id)) { 3896 if (apic->check_apicid_used(apic_id_map, apic_id)) {
3906 3897
3907 for (i = 0; i < get_physical_broadcast(); i++) { 3898 for (i = 0; i < get_physical_broadcast(); i++) {
3908 if (!check_apicid_used(apic_id_map, i)) 3899 if (!apic->check_apicid_used(apic_id_map, i))
3909 break; 3900 break;
3910 } 3901 }
3911 3902
@@ -3918,7 +3909,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
3918 apic_id = i; 3909 apic_id = i;
3919 } 3910 }
3920 3911
3921 tmp = apicid_to_cpu_present(apic_id); 3912 tmp = apic->apicid_to_cpu_present(apic_id);
3922 physids_or(apic_id_map, apic_id_map, tmp); 3913 physids_or(apic_id_map, apic_id_map, tmp);
3923 3914
3924 if (reg_00.bits.ID != apic_id) { 3915 if (reg_00.bits.ID != apic_id) {
@@ -3995,8 +3986,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
3995 return -1; 3986 return -1;
3996 3987
3997 for (i = 0; i < mp_irq_entries; i++) 3988 for (i = 0; i < mp_irq_entries; i++)
3998 if (mp_irqs[i].mp_irqtype == mp_INT && 3989 if (mp_irqs[i].irqtype == mp_INT &&
3999 mp_irqs[i].mp_srcbusirq == bus_irq) 3990 mp_irqs[i].srcbusirq == bus_irq)
4000 break; 3991 break;
4001 if (i >= mp_irq_entries) 3992 if (i >= mp_irq_entries)
4002 return -1; 3993 return -1;
@@ -4011,7 +4002,7 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
4011/* 4002/*
4012 * This function currently is only a helper for the i386 smp boot process where 4003 * This function currently is only a helper for the i386 smp boot process where
4013 * we need to reprogram the ioredtbls to cater for the cpus which have come online 4004 * we need to reprogram the ioredtbls to cater for the cpus which have come online
4014 * so mask in all cases should simply be TARGET_CPUS 4005 * so mask in all cases should simply be apic->target_cpus()
4015 */ 4006 */
4016#ifdef CONFIG_SMP 4007#ifdef CONFIG_SMP
4017void __init setup_ioapic_dest(void) 4008void __init setup_ioapic_dest(void)
@@ -4050,9 +4041,9 @@ void __init setup_ioapic_dest(void)
4050 */ 4041 */
4051 if (desc->status & 4042 if (desc->status &
4052 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) 4043 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
4053 mask = &desc->affinity; 4044 mask = desc->affinity;
4054 else 4045 else
4055 mask = TARGET_CPUS; 4046 mask = apic->target_cpus();
4056 4047
4057#ifdef CONFIG_INTR_REMAP 4048#ifdef CONFIG_INTR_REMAP
4058 if (intr_remapping_enabled) 4049 if (intr_remapping_enabled)
@@ -4111,7 +4102,7 @@ void __init ioapic_init_mappings(void)
4111 ioapic_res = ioapic_setup_resources(); 4102 ioapic_res = ioapic_setup_resources();
4112 for (i = 0; i < nr_ioapics; i++) { 4103 for (i = 0; i < nr_ioapics; i++) {
4113 if (smp_found_config) { 4104 if (smp_found_config) {
4114 ioapic_phys = mp_ioapics[i].mp_apicaddr; 4105 ioapic_phys = mp_ioapics[i].apicaddr;
4115#ifdef CONFIG_X86_32 4106#ifdef CONFIG_X86_32
4116 if (!ioapic_phys) { 4107 if (!ioapic_phys) {
4117 printk(KERN_ERR 4108 printk(KERN_ERR
diff --git a/arch/x86/kernel/apic/ipi.c b/arch/x86/kernel/apic/ipi.c
new file mode 100644
index 000000000000..dbf5445727a9
--- /dev/null
+++ b/arch/x86/kernel/apic/ipi.c
@@ -0,0 +1,164 @@
1#include <linux/cpumask.h>
2#include <linux/interrupt.h>
3#include <linux/init.h>
4
5#include <linux/mm.h>
6#include <linux/delay.h>
7#include <linux/spinlock.h>
8#include <linux/kernel_stat.h>
9#include <linux/mc146818rtc.h>
10#include <linux/cache.h>
11#include <linux/cpu.h>
12#include <linux/module.h>
13
14#include <asm/smp.h>
15#include <asm/mtrr.h>
16#include <asm/tlbflush.h>
17#include <asm/mmu_context.h>
18#include <asm/apic.h>
19#include <asm/proto.h>
20#include <asm/ipi.h>
21
22void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector)
23{
24 unsigned long query_cpu;
25 unsigned long flags;
26
27 /*
28 * Hack. The clustered APIC addressing mode doesn't allow us to send
29 * to an arbitrary mask, so I do a unicast to each CPU instead.
30 * - mbligh
31 */
32 local_irq_save(flags);
33 for_each_cpu(query_cpu, mask) {
34 __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
35 query_cpu), vector, APIC_DEST_PHYSICAL);
36 }
37 local_irq_restore(flags);
38}
39
40void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
41 int vector)
42{
43 unsigned int this_cpu = smp_processor_id();
44 unsigned int query_cpu;
45 unsigned long flags;
46
47 /* See Hack comment above */
48
49 local_irq_save(flags);
50 for_each_cpu(query_cpu, mask) {
51 if (query_cpu == this_cpu)
52 continue;
53 __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
54 query_cpu), vector, APIC_DEST_PHYSICAL);
55 }
56 local_irq_restore(flags);
57}
58
59void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
60 int vector)
61{
62 unsigned long flags;
63 unsigned int query_cpu;
64
65 /*
66 * Hack. The clustered APIC addressing mode doesn't allow us to send
67 * to an arbitrary mask, so I do a unicasts to each CPU instead. This
68 * should be modified to do 1 message per cluster ID - mbligh
69 */
70
71 local_irq_save(flags);
72 for_each_cpu(query_cpu, mask)
73 __default_send_IPI_dest_field(
74 apic->cpu_to_logical_apicid(query_cpu), vector,
75 apic->dest_logical);
76 local_irq_restore(flags);
77}
78
79void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
80 int vector)
81{
82 unsigned long flags;
83 unsigned int query_cpu;
84 unsigned int this_cpu = smp_processor_id();
85
86 /* See Hack comment above */
87
88 local_irq_save(flags);
89 for_each_cpu(query_cpu, mask) {
90 if (query_cpu == this_cpu)
91 continue;
92 __default_send_IPI_dest_field(
93 apic->cpu_to_logical_apicid(query_cpu), vector,
94 apic->dest_logical);
95 }
96 local_irq_restore(flags);
97}
98
99#ifdef CONFIG_X86_32
100
101/*
102 * This is only used on smaller machines.
103 */
104void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
105{
106 unsigned long mask = cpumask_bits(cpumask)[0];
107 unsigned long flags;
108
109 local_irq_save(flags);
110 WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
111 __default_send_IPI_dest_field(mask, vector, apic->dest_logical);
112 local_irq_restore(flags);
113}
114
115void default_send_IPI_allbutself(int vector)
116{
117 /*
118 * if there are no other CPUs in the system then we get an APIC send
119 * error if we try to broadcast, thus avoid sending IPIs in this case.
120 */
121 if (!(num_online_cpus() > 1))
122 return;
123
124 __default_local_send_IPI_allbutself(vector);
125}
126
127void default_send_IPI_all(int vector)
128{
129 __default_local_send_IPI_all(vector);
130}
131
132void default_send_IPI_self(int vector)
133{
134 __default_send_IPI_shortcut(APIC_DEST_SELF, vector, apic->dest_logical);
135}
136
137/* must come after the send_IPI functions above for inlining */
138static int convert_apicid_to_cpu(int apic_id)
139{
140 int i;
141
142 for_each_possible_cpu(i) {
143 if (per_cpu(x86_cpu_to_apicid, i) == apic_id)
144 return i;
145 }
146 return -1;
147}
148
149int safe_smp_processor_id(void)
150{
151 int apicid, cpuid;
152
153 if (!boot_cpu_has(X86_FEATURE_APIC))
154 return 0;
155
156 apicid = hard_smp_processor_id();
157 if (apicid == BAD_APICID)
158 return 0;
159
160 cpuid = convert_apicid_to_cpu(apicid);
161
162 return cpuid >= 0 ? cpuid : 0;
163}
164#endif
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/apic/nmi.c
index 7228979f1e7f..bdfad80c3cf1 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -34,7 +34,7 @@
34 34
35#include <asm/mce.h> 35#include <asm/mce.h>
36 36
37#include <mach_traps.h> 37#include <asm/mach_traps.h>
38 38
39int unknown_nmi_panic; 39int unknown_nmi_panic;
40int nmi_watchdog_enabled; 40int nmi_watchdog_enabled;
@@ -61,11 +61,7 @@ static int endflag __initdata;
61 61
62static inline unsigned int get_nmi_count(int cpu) 62static inline unsigned int get_nmi_count(int cpu)
63{ 63{
64#ifdef CONFIG_X86_64 64 return per_cpu(irq_stat, cpu).__nmi_count;
65 return cpu_pda(cpu)->__nmi_count;
66#else
67 return nmi_count(cpu);
68#endif
69} 65}
70 66
71static inline int mce_in_progress(void) 67static inline int mce_in_progress(void)
@@ -82,12 +78,8 @@ static inline int mce_in_progress(void)
82 */ 78 */
83static inline unsigned int get_timer_irqs(int cpu) 79static inline unsigned int get_timer_irqs(int cpu)
84{ 80{
85#ifdef CONFIG_X86_64
86 return read_pda(apic_timer_irqs) + read_pda(irq0_irqs);
87#else
88 return per_cpu(irq_stat, cpu).apic_timer_irqs + 81 return per_cpu(irq_stat, cpu).apic_timer_irqs +
89 per_cpu(irq_stat, cpu).irq0_irqs; 82 per_cpu(irq_stat, cpu).irq0_irqs;
90#endif
91} 83}
92 84
93#ifdef CONFIG_SMP 85#ifdef CONFIG_SMP
diff --git a/arch/x86/kernel/apic/numaq_32.c b/arch/x86/kernel/apic/numaq_32.c
new file mode 100644
index 000000000000..d9d6d61eed82
--- /dev/null
+++ b/arch/x86/kernel/apic/numaq_32.c
@@ -0,0 +1,565 @@
1/*
2 * Written by: Patricia Gaughen, IBM Corporation
3 *
4 * Copyright (C) 2002, IBM Corp.
5 * Copyright (C) 2009, Red Hat, Inc., Ingo Molnar
6 *
7 * All rights reserved.
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful, but
15 * WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
17 * NON INFRINGEMENT. See the GNU General Public License for more
18 * details.
19 *
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 *
24 * Send feedback to <gone@us.ibm.com>
25 */
26#include <linux/nodemask.h>
27#include <linux/topology.h>
28#include <linux/bootmem.h>
29#include <linux/threads.h>
30#include <linux/cpumask.h>
31#include <linux/kernel.h>
32#include <linux/mmzone.h>
33#include <linux/module.h>
34#include <linux/string.h>
35#include <linux/init.h>
36#include <linux/numa.h>
37#include <linux/smp.h>
38#include <linux/io.h>
39#include <linux/mm.h>
40
41#include <asm/processor.h>
42#include <asm/fixmap.h>
43#include <asm/mpspec.h>
44#include <asm/numaq.h>
45#include <asm/setup.h>
46#include <asm/apic.h>
47#include <asm/e820.h>
48#include <asm/ipi.h>
49
50#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
51
52int found_numaq;
53
54/*
55 * Have to match translation table entries to main table entries by counter
56 * hence the mpc_record variable .... can't see a less disgusting way of
57 * doing this ....
58 */
59struct mpc_trans {
60 unsigned char mpc_type;
61 unsigned char trans_len;
62 unsigned char trans_type;
63 unsigned char trans_quad;
64 unsigned char trans_global;
65 unsigned char trans_local;
66 unsigned short trans_reserved;
67};
68
69/* x86_quirks member */
70static int mpc_record;
71
72static __cpuinitdata struct mpc_trans *translation_table[MAX_MPC_ENTRY];
73
74int mp_bus_id_to_node[MAX_MP_BUSSES];
75int mp_bus_id_to_local[MAX_MP_BUSSES];
76int quad_local_to_mp_bus_id[NR_CPUS/4][4];
77
78
79static inline void numaq_register_node(int node, struct sys_cfg_data *scd)
80{
81 struct eachquadmem *eq = scd->eq + node;
82
83 node_set_online(node);
84
85 /* Convert to pages */
86 node_start_pfn[node] =
87 MB_TO_PAGES(eq->hi_shrd_mem_start - eq->priv_mem_size);
88
89 node_end_pfn[node] =
90 MB_TO_PAGES(eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
91
92 e820_register_active_regions(node, node_start_pfn[node],
93 node_end_pfn[node]);
94
95 memory_present(node, node_start_pfn[node], node_end_pfn[node]);
96
97 node_remap_size[node] = node_memmap_size_bytes(node,
98 node_start_pfn[node],
99 node_end_pfn[node]);
100}
101
102/*
103 * Function: smp_dump_qct()
104 *
105 * Description: gets memory layout from the quad config table. This
106 * function also updates node_online_map with the nodes (quads) present.
107 */
108static void __init smp_dump_qct(void)
109{
110 struct sys_cfg_data *scd;
111 int node;
112
113 scd = (void *)__va(SYS_CFG_DATA_PRIV_ADDR);
114
115 nodes_clear(node_online_map);
116 for_each_node(node) {
117 if (scd->quads_present31_0 & (1 << node))
118 numaq_register_node(node, scd);
119 }
120}
121
122void __cpuinit numaq_tsc_disable(void)
123{
124 if (!found_numaq)
125 return;
126
127 if (num_online_nodes() > 1) {
128 printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
129 setup_clear_cpu_cap(X86_FEATURE_TSC);
130 }
131}
132
133static int __init numaq_pre_time_init(void)
134{
135 numaq_tsc_disable();
136 return 0;
137}
138
139static inline int generate_logical_apicid(int quad, int phys_apicid)
140{
141 return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
142}
143
144/* x86_quirks member */
145static int mpc_apic_id(struct mpc_cpu *m)
146{
147 int quad = translation_table[mpc_record]->trans_quad;
148 int logical_apicid = generate_logical_apicid(quad, m->apicid);
149
150 printk(KERN_DEBUG
151 "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
152 m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8,
153 (m->cpufeature & CPU_MODEL_MASK) >> 4,
154 m->apicver, quad, logical_apicid);
155
156 return logical_apicid;
157}
158
159/* x86_quirks member */
160static void mpc_oem_bus_info(struct mpc_bus *m, char *name)
161{
162 int quad = translation_table[mpc_record]->trans_quad;
163 int local = translation_table[mpc_record]->trans_local;
164
165 mp_bus_id_to_node[m->busid] = quad;
166 mp_bus_id_to_local[m->busid] = local;
167
168 printk(KERN_INFO "Bus #%d is %s (node %d)\n", m->busid, name, quad);
169}
170
171/* x86_quirks member */
172static void mpc_oem_pci_bus(struct mpc_bus *m)
173{
174 int quad = translation_table[mpc_record]->trans_quad;
175 int local = translation_table[mpc_record]->trans_local;
176
177 quad_local_to_mp_bus_id[quad][local] = m->busid;
178}
179
180static void __init MP_translation_info(struct mpc_trans *m)
181{
182 printk(KERN_INFO
183 "Translation: record %d, type %d, quad %d, global %d, local %d\n",
184 mpc_record, m->trans_type, m->trans_quad, m->trans_global,
185 m->trans_local);
186
187 if (mpc_record >= MAX_MPC_ENTRY)
188 printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
189 else
190 translation_table[mpc_record] = m; /* stash this for later */
191
192 if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
193 node_set_online(m->trans_quad);
194}
195
196static int __init mpf_checksum(unsigned char *mp, int len)
197{
198 int sum = 0;
199
200 while (len--)
201 sum += *mp++;
202
203 return sum & 0xFF;
204}
205
206/*
207 * Read/parse the MPC oem tables
208 */
209static void __init
210 smp_read_mpc_oem(struct mpc_oemtable *oemtable, unsigned short oemsize)
211{
212 int count = sizeof(*oemtable); /* the header size */
213 unsigned char *oemptr = ((unsigned char *)oemtable) + count;
214
215 mpc_record = 0;
216 printk(KERN_INFO
217 "Found an OEM MPC table at %8p - parsing it ... \n", oemtable);
218
219 if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) {
220 printk(KERN_WARNING
221 "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
222 oemtable->signature[0], oemtable->signature[1],
223 oemtable->signature[2], oemtable->signature[3]);
224 return;
225 }
226
227 if (mpf_checksum((unsigned char *)oemtable, oemtable->length)) {
228 printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
229 return;
230 }
231
232 while (count < oemtable->length) {
233 switch (*oemptr) {
234 case MP_TRANSLATION:
235 {
236 struct mpc_trans *m = (void *)oemptr;
237
238 MP_translation_info(m);
239 oemptr += sizeof(*m);
240 count += sizeof(*m);
241 ++mpc_record;
242 break;
243 }
244 default:
245 printk(KERN_WARNING
246 "Unrecognised OEM table entry type! - %d\n",
247 (int)*oemptr);
248 return;
249 }
250 }
251}
252
253static int __init numaq_setup_ioapic_ids(void)
254{
255 /* so can skip it */
256 return 1;
257}
258
259static int __init numaq_update_apic(void)
260{
261 apic->wakeup_cpu = wakeup_secondary_cpu_via_nmi;
262
263 return 0;
264}
265
266static struct x86_quirks numaq_x86_quirks __initdata = {
267 .arch_pre_time_init = numaq_pre_time_init,
268 .arch_time_init = NULL,
269 .arch_pre_intr_init = NULL,
270 .arch_memory_setup = NULL,
271 .arch_intr_init = NULL,
272 .arch_trap_init = NULL,
273 .mach_get_smp_config = NULL,
274 .mach_find_smp_config = NULL,
275 .mpc_record = &mpc_record,
276 .mpc_apic_id = mpc_apic_id,
277 .mpc_oem_bus_info = mpc_oem_bus_info,
278 .mpc_oem_pci_bus = mpc_oem_pci_bus,
279 .smp_read_mpc_oem = smp_read_mpc_oem,
280 .setup_ioapic_ids = numaq_setup_ioapic_ids,
281 .update_apic = numaq_update_apic,
282};
283
284static __init void early_check_numaq(void)
285{
286 /*
287 * Find possible boot-time SMP configuration:
288 */
289 early_find_smp_config();
290
291 /*
292 * get boot-time SMP configuration:
293 */
294 if (smp_found_config)
295 early_get_smp_config();
296
297 if (found_numaq)
298 x86_quirks = &numaq_x86_quirks;
299}
300
301int __init get_memcfg_numaq(void)
302{
303 early_check_numaq();
304 if (!found_numaq)
305 return 0;
306 smp_dump_qct();
307
308 return 1;
309}
310
311#define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER)
312
313static inline unsigned int numaq_get_apic_id(unsigned long x)
314{
315 return (x >> 24) & 0x0F;
316}
317
318static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector)
319{
320 default_send_IPI_mask_sequence_logical(mask, vector);
321}
322
323static inline void numaq_send_IPI_allbutself(int vector)
324{
325 default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector);
326}
327
328static inline void numaq_send_IPI_all(int vector)
329{
330 numaq_send_IPI_mask(cpu_online_mask, vector);
331}
332
333#define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8)
334#define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa)
335
336/*
337 * Because we use NMIs rather than the INIT-STARTUP sequence to
338 * bootstrap the CPUs, the APIC may be in a weird state. Kick it:
339 */
340static inline void numaq_smp_callin_clear_local_apic(void)
341{
342 clear_local_APIC();
343}
344
345static inline const cpumask_t *numaq_target_cpus(void)
346{
347 return &CPU_MASK_ALL;
348}
349
350static inline unsigned long
351numaq_check_apicid_used(physid_mask_t bitmap, int apicid)
352{
353 return physid_isset(apicid, bitmap);
354}
355
356static inline unsigned long numaq_check_apicid_present(int bit)
357{
358 return physid_isset(bit, phys_cpu_present_map);
359}
360
361static inline int numaq_apic_id_registered(void)
362{
363 return 1;
364}
365
366static inline void numaq_init_apic_ldr(void)
367{
368 /* Already done in NUMA-Q firmware */
369}
370
371static inline void numaq_setup_apic_routing(void)
372{
373 printk(KERN_INFO
374 "Enabling APIC mode: NUMA-Q. Using %d I/O APICs\n",
375 nr_ioapics);
376}
377
378/*
379 * Skip adding the timer int on secondary nodes, which causes
380 * a small but painful rift in the time-space continuum.
381 */
382static inline int numaq_multi_timer_check(int apic, int irq)
383{
384 return apic != 0 && irq == 0;
385}
386
387static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map)
388{
389 /* We don't have a good way to do this yet - hack */
390 return physids_promote(0xFUL);
391}
392
393static inline int numaq_cpu_to_logical_apicid(int cpu)
394{
395 if (cpu >= nr_cpu_ids)
396 return BAD_APICID;
397 return cpu_2_logical_apicid[cpu];
398}
399
400/*
401 * Supporting over 60 cpus on NUMA-Q requires a locality-dependent
402 * cpu to APIC ID relation to properly interact with the intelligent
403 * mode of the cluster controller.
404 */
405static inline int numaq_cpu_present_to_apicid(int mps_cpu)
406{
407 if (mps_cpu < 60)
408 return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3));
409 else
410 return BAD_APICID;
411}
412
413static inline int numaq_apicid_to_node(int logical_apicid)
414{
415 return logical_apicid >> 4;
416}
417
418static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid)
419{
420 int node = numaq_apicid_to_node(logical_apicid);
421 int cpu = __ffs(logical_apicid & 0xf);
422
423 return physid_mask_of_physid(cpu + 4*node);
424}
425
426/* Where the IO area was mapped on multiquad, always 0 otherwise */
427void *xquad_portio;
428
429static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid)
430{
431 return 1;
432}
433
434/*
435 * We use physical apicids here, not logical, so just return the default
436 * physical broadcast to stop people from breaking us
437 */
438static inline unsigned int numaq_cpu_mask_to_apicid(const cpumask_t *cpumask)
439{
440 return 0x0F;
441}
442
443static inline unsigned int
444numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
445 const struct cpumask *andmask)
446{
447 return 0x0F;
448}
449
450/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */
451static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb)
452{
453 return cpuid_apic >> index_msb;
454}
455
456static int
457numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
458{
459 if (strncmp(oem, "IBM NUMA", 8))
460 printk(KERN_ERR "Warning! Not a NUMA-Q system!\n");
461 else
462 found_numaq = 1;
463
464 return found_numaq;
465}
466
467static int probe_numaq(void)
468{
469 /* already know from get_memcfg_numaq() */
470 return found_numaq;
471}
472
473static void numaq_vector_allocation_domain(int cpu, cpumask_t *retmask)
474{
475 /* Careful. Some cpus do not strictly honor the set of cpus
476 * specified in the interrupt destination when using lowest
477 * priority interrupt delivery mode.
478 *
479 * In particular there was a hyperthreading cpu observed to
480 * deliver interrupts to the wrong hyperthread when only one
481 * hyperthread was specified in the interrupt desitination.
482 */
483 *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
484}
485
486static void numaq_setup_portio_remap(void)
487{
488 int num_quads = num_online_nodes();
489
490 if (num_quads <= 1)
491 return;
492
493 printk(KERN_INFO
494 "Remapping cross-quad port I/O for %d quads\n", num_quads);
495
496 xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD);
497
498 printk(KERN_INFO
499 "xquad_portio vaddr 0x%08lx, len %08lx\n",
500 (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD);
501}
502
503struct apic apic_numaq = {
504
505 .name = "NUMAQ",
506 .probe = probe_numaq,
507 .acpi_madt_oem_check = NULL,
508 .apic_id_registered = numaq_apic_id_registered,
509
510 .irq_delivery_mode = dest_LowestPrio,
511 /* physical delivery on LOCAL quad: */
512 .irq_dest_mode = 0,
513
514 .target_cpus = numaq_target_cpus,
515 .disable_esr = 1,
516 .dest_logical = APIC_DEST_LOGICAL,
517 .check_apicid_used = numaq_check_apicid_used,
518 .check_apicid_present = numaq_check_apicid_present,
519
520 .vector_allocation_domain = numaq_vector_allocation_domain,
521 .init_apic_ldr = numaq_init_apic_ldr,
522
523 .ioapic_phys_id_map = numaq_ioapic_phys_id_map,
524 .setup_apic_routing = numaq_setup_apic_routing,
525 .multi_timer_check = numaq_multi_timer_check,
526 .apicid_to_node = numaq_apicid_to_node,
527 .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid,
528 .cpu_present_to_apicid = numaq_cpu_present_to_apicid,
529 .apicid_to_cpu_present = numaq_apicid_to_cpu_present,
530 .setup_portio_remap = numaq_setup_portio_remap,
531 .check_phys_apicid_present = numaq_check_phys_apicid_present,
532 .enable_apic_mode = NULL,
533 .phys_pkg_id = numaq_phys_pkg_id,
534 .mps_oem_check = numaq_mps_oem_check,
535
536 .get_apic_id = numaq_get_apic_id,
537 .set_apic_id = NULL,
538 .apic_id_mask = 0x0F << 24,
539
540 .cpu_mask_to_apicid = numaq_cpu_mask_to_apicid,
541 .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and,
542
543 .send_IPI_mask = numaq_send_IPI_mask,
544 .send_IPI_mask_allbutself = NULL,
545 .send_IPI_allbutself = numaq_send_IPI_allbutself,
546 .send_IPI_all = numaq_send_IPI_all,
547 .send_IPI_self = default_send_IPI_self,
548
549 .wakeup_cpu = NULL,
550 .trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW,
551 .trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH,
552
553 /* We don't do anything here because we use NMI's to boot instead */
554 .wait_for_init_deassert = NULL,
555
556 .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic,
557 .inquire_remote_apic = NULL,
558
559 .read = native_apic_mem_read,
560 .write = native_apic_mem_write,
561 .icr_read = native_apic_icr_read,
562 .icr_write = native_apic_icr_write,
563 .wait_icr_idle = native_apic_wait_icr_idle,
564 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
565};
diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c
new file mode 100644
index 000000000000..3a730fa574bb
--- /dev/null
+++ b/arch/x86/kernel/apic/probe_32.c
@@ -0,0 +1,295 @@
1/*
2 * Default generic APIC driver. This handles up to 8 CPUs.
3 *
4 * Copyright 2003 Andi Kleen, SuSE Labs.
5 * Subject to the GNU Public License, v.2
6 *
7 * Generic x86 APIC driver probe layer.
8 */
9#include <linux/threads.h>
10#include <linux/cpumask.h>
11#include <linux/module.h>
12#include <linux/string.h>
13#include <linux/kernel.h>
14#include <linux/ctype.h>
15#include <linux/init.h>
16#include <linux/errno.h>
17#include <asm/fixmap.h>
18#include <asm/mpspec.h>
19#include <asm/apicdef.h>
20#include <asm/apic.h>
21#include <asm/setup.h>
22
23#include <linux/threads.h>
24#include <linux/cpumask.h>
25#include <asm/mpspec.h>
26#include <asm/fixmap.h>
27#include <asm/apicdef.h>
28#include <linux/kernel.h>
29#include <linux/string.h>
30#include <linux/smp.h>
31#include <linux/init.h>
32#include <asm/ipi.h>
33
34#include <linux/smp.h>
35#include <linux/init.h>
36#include <linux/interrupt.h>
37#include <asm/acpi.h>
38#include <asm/e820.h>
39#include <asm/setup.h>
40
41#ifdef CONFIG_HOTPLUG_CPU
42#define DEFAULT_SEND_IPI (1)
43#else
44#define DEFAULT_SEND_IPI (0)
45#endif
46
47int no_broadcast = DEFAULT_SEND_IPI;
48
49static __init int no_ipi_broadcast(char *str)
50{
51 get_option(&str, &no_broadcast);
52 pr_info("Using %s mode\n",
53 no_broadcast ? "No IPI Broadcast" : "IPI Broadcast");
54 return 1;
55}
56__setup("no_ipi_broadcast=", no_ipi_broadcast);
57
58static int __init print_ipi_mode(void)
59{
60 pr_info("Using IPI %s mode\n",
61 no_broadcast ? "No-Shortcut" : "Shortcut");
62 return 0;
63}
64late_initcall(print_ipi_mode);
65
66void default_setup_apic_routing(void)
67{
68#ifdef CONFIG_X86_IO_APIC
69 printk(KERN_INFO
70 "Enabling APIC mode: Flat. Using %d I/O APICs\n",
71 nr_ioapics);
72#endif
73}
74
75static void default_vector_allocation_domain(int cpu, struct cpumask *retmask)
76{
77 /*
78 * Careful. Some cpus do not strictly honor the set of cpus
79 * specified in the interrupt destination when using lowest
80 * priority interrupt delivery mode.
81 *
82 * In particular there was a hyperthreading cpu observed to
83 * deliver interrupts to the wrong hyperthread when only one
84 * hyperthread was specified in the interrupt desitination.
85 */
86 *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } };
87}
88
89/* should be called last. */
90static int probe_default(void)
91{
92 return 1;
93}
94
95struct apic apic_default = {
96
97 .name = "default",
98 .probe = probe_default,
99 .acpi_madt_oem_check = NULL,
100 .apic_id_registered = default_apic_id_registered,
101
102 .irq_delivery_mode = dest_LowestPrio,
103 /* logical delivery broadcast to all CPUs: */
104 .irq_dest_mode = 1,
105
106 .target_cpus = default_target_cpus,
107 .disable_esr = 0,
108 .dest_logical = APIC_DEST_LOGICAL,
109 .check_apicid_used = default_check_apicid_used,
110 .check_apicid_present = default_check_apicid_present,
111
112 .vector_allocation_domain = default_vector_allocation_domain,
113 .init_apic_ldr = default_init_apic_ldr,
114
115 .ioapic_phys_id_map = default_ioapic_phys_id_map,
116 .setup_apic_routing = default_setup_apic_routing,
117 .multi_timer_check = NULL,
118 .apicid_to_node = default_apicid_to_node,
119 .cpu_to_logical_apicid = default_cpu_to_logical_apicid,
120 .cpu_present_to_apicid = default_cpu_present_to_apicid,
121 .apicid_to_cpu_present = default_apicid_to_cpu_present,
122 .setup_portio_remap = NULL,
123 .check_phys_apicid_present = default_check_phys_apicid_present,
124 .enable_apic_mode = NULL,
125 .phys_pkg_id = default_phys_pkg_id,
126 .mps_oem_check = NULL,
127
128 .get_apic_id = default_get_apic_id,
129 .set_apic_id = NULL,
130 .apic_id_mask = 0x0F << 24,
131
132 .cpu_mask_to_apicid = default_cpu_mask_to_apicid,
133 .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
134
135 .send_IPI_mask = default_send_IPI_mask_logical,
136 .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical,
137 .send_IPI_allbutself = default_send_IPI_allbutself,
138 .send_IPI_all = default_send_IPI_all,
139 .send_IPI_self = default_send_IPI_self,
140
141 .wakeup_cpu = NULL,
142 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
143 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
144
145 .wait_for_init_deassert = default_wait_for_init_deassert,
146
147 .smp_callin_clear_local_apic = NULL,
148 .inquire_remote_apic = default_inquire_remote_apic,
149
150 .read = native_apic_mem_read,
151 .write = native_apic_mem_write,
152 .icr_read = native_apic_icr_read,
153 .icr_write = native_apic_icr_write,
154 .wait_icr_idle = native_apic_wait_icr_idle,
155 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
156};
157
158extern struct apic apic_numaq;
159extern struct apic apic_summit;
160extern struct apic apic_bigsmp;
161extern struct apic apic_es7000;
162extern struct apic apic_default;
163
164struct apic *apic = &apic_default;
165EXPORT_SYMBOL_GPL(apic);
166
167static struct apic *apic_probe[] __initdata = {
168#ifdef CONFIG_X86_NUMAQ
169 &apic_numaq,
170#endif
171#ifdef CONFIG_X86_SUMMIT
172 &apic_summit,
173#endif
174#ifdef CONFIG_X86_BIGSMP
175 &apic_bigsmp,
176#endif
177#ifdef CONFIG_X86_ES7000
178 &apic_es7000,
179#endif
180 &apic_default, /* must be last */
181 NULL,
182};
183
184static int cmdline_apic __initdata;
185static int __init parse_apic(char *arg)
186{
187 int i;
188
189 if (!arg)
190 return -EINVAL;
191
192 for (i = 0; apic_probe[i]; i++) {
193 if (!strcmp(apic_probe[i]->name, arg)) {
194 apic = apic_probe[i];
195 cmdline_apic = 1;
196 return 0;
197 }
198 }
199
200 if (x86_quirks->update_apic)
201 x86_quirks->update_apic();
202
203 /* Parsed again by __setup for debug/verbose */
204 return 0;
205}
206early_param("apic", parse_apic);
207
208void __init generic_bigsmp_probe(void)
209{
210#ifdef CONFIG_X86_BIGSMP
211 /*
212 * This routine is used to switch to bigsmp mode when
213 * - There is no apic= option specified by the user
214 * - generic_apic_probe() has chosen apic_default as the sub_arch
215 * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support
216 */
217
218 if (!cmdline_apic && apic == &apic_default) {
219 if (apic_bigsmp.probe()) {
220 apic = &apic_bigsmp;
221 if (x86_quirks->update_apic)
222 x86_quirks->update_apic();
223 printk(KERN_INFO "Overriding APIC driver with %s\n",
224 apic->name);
225 }
226 }
227#endif
228}
229
230void __init generic_apic_probe(void)
231{
232 if (!cmdline_apic) {
233 int i;
234 for (i = 0; apic_probe[i]; i++) {
235 if (apic_probe[i]->probe()) {
236 apic = apic_probe[i];
237 break;
238 }
239 }
240 /* Not visible without early console */
241 if (!apic_probe[i])
242 panic("Didn't find an APIC driver");
243
244 if (x86_quirks->update_apic)
245 x86_quirks->update_apic();
246 }
247 printk(KERN_INFO "Using APIC driver %s\n", apic->name);
248}
249
250/* These functions can switch the APIC even after the initial ->probe() */
251
252int __init
253generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
254{
255 int i;
256
257 for (i = 0; apic_probe[i]; ++i) {
258 if (!apic_probe[i]->mps_oem_check)
259 continue;
260 if (!apic_probe[i]->mps_oem_check(mpc, oem, productid))
261 continue;
262
263 if (!cmdline_apic) {
264 apic = apic_probe[i];
265 if (x86_quirks->update_apic)
266 x86_quirks->update_apic();
267 printk(KERN_INFO "Switched to APIC driver `%s'.\n",
268 apic->name);
269 }
270 return 1;
271 }
272 return 0;
273}
274
275int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
276{
277 int i;
278
279 for (i = 0; apic_probe[i]; ++i) {
280 if (!apic_probe[i]->acpi_madt_oem_check)
281 continue;
282 if (!apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id))
283 continue;
284
285 if (!cmdline_apic) {
286 apic = apic_probe[i];
287 if (x86_quirks->update_apic)
288 x86_quirks->update_apic();
289 printk(KERN_INFO "Switched to APIC driver `%s'.\n",
290 apic->name);
291 }
292 return 1;
293 }
294 return 0;
295}
diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c
new file mode 100644
index 000000000000..e7c163661c77
--- /dev/null
+++ b/arch/x86/kernel/apic/probe_64.c
@@ -0,0 +1,96 @@
1/*
2 * Copyright 2004 James Cleverdon, IBM.
3 * Subject to the GNU Public License, v.2
4 *
5 * Generic APIC sub-arch probe layer.
6 *
7 * Hacked for x86-64 by James Cleverdon from i386 architecture code by
8 * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
9 * James Cleverdon.
10 */
11#include <linux/threads.h>
12#include <linux/cpumask.h>
13#include <linux/string.h>
14#include <linux/module.h>
15#include <linux/kernel.h>
16#include <linux/ctype.h>
17#include <linux/init.h>
18#include <linux/hardirq.h>
19#include <linux/dmar.h>
20
21#include <asm/smp.h>
22#include <asm/apic.h>
23#include <asm/ipi.h>
24#include <asm/setup.h>
25
26extern struct apic apic_flat;
27extern struct apic apic_physflat;
28extern struct apic apic_x2xpic_uv_x;
29extern struct apic apic_x2apic_phys;
30extern struct apic apic_x2apic_cluster;
31
32struct apic __read_mostly *apic = &apic_flat;
33EXPORT_SYMBOL_GPL(apic);
34
35static struct apic *apic_probe[] __initdata = {
36#ifdef CONFIG_X86_UV
37 &apic_x2apic_uv_x,
38#endif
39#ifdef CONFIG_X86_X2APIC
40 &apic_x2apic_phys,
41 &apic_x2apic_cluster,
42#endif
43 &apic_physflat,
44 NULL,
45};
46
47/*
48 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
49 */
50void __init default_setup_apic_routing(void)
51{
52#ifdef CONFIG_X86_X2APIC
53 if (x2apic && (apic != &apic_x2apic_phys &&
54#ifdef CONFIG_X86_UV
55 apic != &apic_x2apic_uv_x &&
56#endif
57 apic != &apic_x2apic_cluster)) {
58 if (x2apic_phys)
59 apic = &apic_x2apic_phys;
60 else
61 apic = &apic_x2apic_cluster;
62 printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
63 }
64#endif
65
66 if (apic == &apic_flat) {
67 if (max_physical_apicid >= 8)
68 apic = &apic_physflat;
69 printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
70 }
71
72 if (x86_quirks->update_apic)
73 x86_quirks->update_apic();
74}
75
76/* Same for both flat and physical. */
77
78void apic_send_IPI_self(int vector)
79{
80 __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
81}
82
83int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
84{
85 int i;
86
87 for (i = 0; apic_probe[i]; ++i) {
88 if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
89 apic = apic_probe[i];
90 printk(KERN_INFO "Setting APIC routing to %s.\n",
91 apic->name);
92 return 1;
93 }
94 }
95 return 0;
96}
diff --git a/arch/x86/kernel/apic/summit_32.c b/arch/x86/kernel/apic/summit_32.c
new file mode 100644
index 000000000000..cfe7b09015d8
--- /dev/null
+++ b/arch/x86/kernel/apic/summit_32.c
@@ -0,0 +1,601 @@
1/*
2 * IBM Summit-Specific Code
3 *
4 * Written By: Matthew Dobson, IBM Corporation
5 *
6 * Copyright (c) 2003 IBM Corp.
7 *
8 * All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or (at
13 * your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
18 * NON INFRINGEMENT. See the GNU General Public License for more
19 * details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 * Send feedback to <colpatch@us.ibm.com>
26 *
27 */
28
29#include <linux/mm.h>
30#include <linux/init.h>
31#include <asm/io.h>
32#include <asm/bios_ebda.h>
33
34/*
35 * APIC driver for the IBM "Summit" chipset.
36 */
37#include <linux/threads.h>
38#include <linux/cpumask.h>
39#include <asm/mpspec.h>
40#include <asm/apic.h>
41#include <asm/smp.h>
42#include <asm/fixmap.h>
43#include <asm/apicdef.h>
44#include <asm/ipi.h>
45#include <linux/kernel.h>
46#include <linux/string.h>
47#include <linux/init.h>
48#include <linux/gfp.h>
49#include <linux/smp.h>
50
51static inline unsigned summit_get_apic_id(unsigned long x)
52{
53 return (x >> 24) & 0xFF;
54}
55
56static inline void summit_send_IPI_mask(const cpumask_t *mask, int vector)
57{
58 default_send_IPI_mask_sequence_logical(mask, vector);
59}
60
61static inline void summit_send_IPI_allbutself(int vector)
62{
63 cpumask_t mask = cpu_online_map;
64 cpu_clear(smp_processor_id(), mask);
65
66 if (!cpus_empty(mask))
67 summit_send_IPI_mask(&mask, vector);
68}
69
70static inline void summit_send_IPI_all(int vector)
71{
72 summit_send_IPI_mask(&cpu_online_map, vector);
73}
74
75#include <asm/tsc.h>
76
77extern int use_cyclone;
78
79#ifdef CONFIG_X86_SUMMIT_NUMA
80extern void setup_summit(void);
81#else
82#define setup_summit() {}
83#endif
84
85static inline int
86summit_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
87{
88 if (!strncmp(oem, "IBM ENSW", 8) &&
89 (!strncmp(productid, "VIGIL SMP", 9)
90 || !strncmp(productid, "EXA", 3)
91 || !strncmp(productid, "RUTHLESS SMP", 12))){
92 mark_tsc_unstable("Summit based system");
93 use_cyclone = 1; /*enable cyclone-timer*/
94 setup_summit();
95 return 1;
96 }
97 return 0;
98}
99
100/* Hook from generic ACPI tables.c */
101static inline int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
102{
103 if (!strncmp(oem_id, "IBM", 3) &&
104 (!strncmp(oem_table_id, "SERVIGIL", 8)
105 || !strncmp(oem_table_id, "EXA", 3))){
106 mark_tsc_unstable("Summit based system");
107 use_cyclone = 1; /*enable cyclone-timer*/
108 setup_summit();
109 return 1;
110 }
111 return 0;
112}
113
114struct rio_table_hdr {
115 unsigned char version; /* Version number of this data structure */
116 /* Version 3 adds chassis_num & WP_index */
117 unsigned char num_scal_dev; /* # of Scalability devices (Twisters for Vigil) */
118 unsigned char num_rio_dev; /* # of RIO I/O devices (Cyclones and Winnipegs) */
119} __attribute__((packed));
120
121struct scal_detail {
122 unsigned char node_id; /* Scalability Node ID */
123 unsigned long CBAR; /* Address of 1MB register space */
124 unsigned char port0node; /* Node ID port connected to: 0xFF=None */
125 unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */
126 unsigned char port1node; /* Node ID port connected to: 0xFF = None */
127 unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */
128 unsigned char port2node; /* Node ID port connected to: 0xFF = None */
129 unsigned char port2port; /* Port num port connected to: 0,1,2, or 0xFF=None */
130 unsigned char chassis_num; /* 1 based Chassis number (1 = boot node) */
131} __attribute__((packed));
132
133struct rio_detail {
134 unsigned char node_id; /* RIO Node ID */
135 unsigned long BBAR; /* Address of 1MB register space */
136 unsigned char type; /* Type of device */
137 unsigned char owner_id; /* For WPEG: Node ID of Cyclone that owns this WPEG*/
138 /* For CYC: Node ID of Twister that owns this CYC */
139 unsigned char port0node; /* Node ID port connected to: 0xFF=None */
140 unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */
141 unsigned char port1node; /* Node ID port connected to: 0xFF=None */
142 unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */
143 unsigned char first_slot; /* For WPEG: Lowest slot number below this WPEG */
144 /* For CYC: 0 */
145 unsigned char status; /* For WPEG: Bit 0 = 1 : the XAPIC is used */
146 /* = 0 : the XAPIC is not used, ie:*/
147 /* ints fwded to another XAPIC */
148 /* Bits1:7 Reserved */
149 /* For CYC: Bits0:7 Reserved */
150 unsigned char WP_index; /* For WPEG: WPEG instance index - lower ones have */
151 /* lower slot numbers/PCI bus numbers */
152 /* For CYC: No meaning */
153 unsigned char chassis_num; /* 1 based Chassis number */
154 /* For LookOut WPEGs this field indicates the */
155 /* Expansion Chassis #, enumerated from Boot */
156 /* Node WPEG external port, then Boot Node CYC */
157 /* external port, then Next Vigil chassis WPEG */
158 /* external port, etc. */
159 /* Shared Lookouts have only 1 chassis number (the */
160 /* first one assigned) */
161} __attribute__((packed));
162
163
164typedef enum {
165 CompatTwister = 0, /* Compatibility Twister */
166 AltTwister = 1, /* Alternate Twister of internal 8-way */
167 CompatCyclone = 2, /* Compatibility Cyclone */
168 AltCyclone = 3, /* Alternate Cyclone of internal 8-way */
169 CompatWPEG = 4, /* Compatibility WPEG */
170 AltWPEG = 5, /* Second Planar WPEG */
171 LookOutAWPEG = 6, /* LookOut WPEG */
172 LookOutBWPEG = 7, /* LookOut WPEG */
173} node_type;
174
175static inline int is_WPEG(struct rio_detail *rio){
176 return (rio->type == CompatWPEG || rio->type == AltWPEG ||
177 rio->type == LookOutAWPEG || rio->type == LookOutBWPEG);
178}
179
180
181/* In clustered mode, the high nibble of APIC ID is a cluster number.
182 * The low nibble is a 4-bit bitmap. */
183#define XAPIC_DEST_CPUS_SHIFT 4
184#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1)
185#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT)
186
187#define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER)
188
189static inline const cpumask_t *summit_target_cpus(void)
190{
191 /* CPU_MASK_ALL (0xff) has undefined behaviour with
192 * dest_LowestPrio mode logical clustered apic interrupt routing
193 * Just start on cpu 0. IRQ balancing will spread load
194 */
195 return &cpumask_of_cpu(0);
196}
197
198static inline unsigned long
199summit_check_apicid_used(physid_mask_t bitmap, int apicid)
200{
201 return 0;
202}
203
204/* we don't use the phys_cpu_present_map to indicate apicid presence */
205static inline unsigned long summit_check_apicid_present(int bit)
206{
207 return 1;
208}
209
210static inline void summit_init_apic_ldr(void)
211{
212 unsigned long val, id;
213 int count = 0;
214 u8 my_id = (u8)hard_smp_processor_id();
215 u8 my_cluster = APIC_CLUSTER(my_id);
216#ifdef CONFIG_SMP
217 u8 lid;
218 int i;
219
220 /* Create logical APIC IDs by counting CPUs already in cluster. */
221 for (count = 0, i = nr_cpu_ids; --i >= 0; ) {
222 lid = cpu_2_logical_apicid[i];
223 if (lid != BAD_APICID && APIC_CLUSTER(lid) == my_cluster)
224 ++count;
225 }
226#endif
227 /* We only have a 4 wide bitmap in cluster mode. If a deranged
228 * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */
229 BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT);
230 id = my_cluster | (1UL << count);
231 apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE);
232 val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
233 val |= SET_APIC_LOGICAL_ID(id);
234 apic_write(APIC_LDR, val);
235}
236
237static inline int summit_apic_id_registered(void)
238{
239 return 1;
240}
241
242static inline void summit_setup_apic_routing(void)
243{
244 printk("Enabling APIC mode: Summit. Using %d I/O APICs\n",
245 nr_ioapics);
246}
247
248static inline int summit_apicid_to_node(int logical_apicid)
249{
250#ifdef CONFIG_SMP
251 return apicid_2_node[hard_smp_processor_id()];
252#else
253 return 0;
254#endif
255}
256
257/* Mapping from cpu number to logical apicid */
258static inline int summit_cpu_to_logical_apicid(int cpu)
259{
260#ifdef CONFIG_SMP
261 if (cpu >= nr_cpu_ids)
262 return BAD_APICID;
263 return cpu_2_logical_apicid[cpu];
264#else
265 return logical_smp_processor_id();
266#endif
267}
268
269static inline int summit_cpu_present_to_apicid(int mps_cpu)
270{
271 if (mps_cpu < nr_cpu_ids)
272 return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
273 else
274 return BAD_APICID;
275}
276
277static inline physid_mask_t
278summit_ioapic_phys_id_map(physid_mask_t phys_id_map)
279{
280 /* For clustered we don't have a good way to do this yet - hack */
281 return physids_promote(0x0F);
282}
283
284static inline physid_mask_t summit_apicid_to_cpu_present(int apicid)
285{
286 return physid_mask_of_physid(0);
287}
288
289static inline void summit_setup_portio_remap(void)
290{
291}
292
293static inline int summit_check_phys_apicid_present(int boot_cpu_physical_apicid)
294{
295 return 1;
296}
297
298static inline unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask)
299{
300 int cpus_found = 0;
301 int num_bits_set;
302 int apicid;
303 int cpu;
304
305 num_bits_set = cpus_weight(*cpumask);
306 /* Return id to all */
307 if (num_bits_set >= nr_cpu_ids)
308 return 0xFF;
309 /*
310 * The cpus in the mask must all be on the apic cluster. If are not
311 * on the same apicid cluster return default value of target_cpus():
312 */
313 cpu = first_cpu(*cpumask);
314 apicid = summit_cpu_to_logical_apicid(cpu);
315
316 while (cpus_found < num_bits_set) {
317 if (cpu_isset(cpu, *cpumask)) {
318 int new_apicid = summit_cpu_to_logical_apicid(cpu);
319
320 if (APIC_CLUSTER(apicid) != APIC_CLUSTER(new_apicid)) {
321 printk ("%s: Not a valid mask!\n", __func__);
322
323 return 0xFF;
324 }
325 apicid = apicid | new_apicid;
326 cpus_found++;
327 }
328 cpu++;
329 }
330 return apicid;
331}
332
333static inline unsigned int
334summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
335 const struct cpumask *andmask)
336{
337 int apicid = summit_cpu_to_logical_apicid(0);
338 cpumask_var_t cpumask;
339
340 if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
341 return apicid;
342
343 cpumask_and(cpumask, inmask, andmask);
344 cpumask_and(cpumask, cpumask, cpu_online_mask);
345 apicid = summit_cpu_mask_to_apicid(cpumask);
346
347 free_cpumask_var(cpumask);
348
349 return apicid;
350}
351
352/*
353 * cpuid returns the value latched in the HW at reset, not the APIC ID
354 * register's value. For any box whose BIOS changes APIC IDs, like
355 * clustered APIC systems, we must use hard_smp_processor_id.
356 *
357 * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID.
358 */
359static inline int summit_phys_pkg_id(int cpuid_apic, int index_msb)
360{
361 return hard_smp_processor_id() >> index_msb;
362}
363
364static int probe_summit(void)
365{
366 /* probed later in mptable/ACPI hooks */
367 return 0;
368}
369
370static void summit_vector_allocation_domain(int cpu, cpumask_t *retmask)
371{
372 /* Careful. Some cpus do not strictly honor the set of cpus
373 * specified in the interrupt destination when using lowest
374 * priority interrupt delivery mode.
375 *
376 * In particular there was a hyperthreading cpu observed to
377 * deliver interrupts to the wrong hyperthread when only one
378 * hyperthread was specified in the interrupt desitination.
379 */
380 *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
381}
382
383#ifdef CONFIG_X86_SUMMIT_NUMA
384static struct rio_table_hdr *rio_table_hdr __initdata;
385static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata;
386static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata;
387
388#ifndef CONFIG_X86_NUMAQ
389static int mp_bus_id_to_node[MAX_MP_BUSSES] __initdata;
390#endif
391
392static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
393{
394 int twister = 0, node = 0;
395 int i, bus, num_buses;
396
397 for (i = 0; i < rio_table_hdr->num_rio_dev; i++) {
398 if (rio_devs[i]->node_id == rio_devs[wpeg_num]->owner_id) {
399 twister = rio_devs[i]->owner_id;
400 break;
401 }
402 }
403 if (i == rio_table_hdr->num_rio_dev) {
404 printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __func__);
405 return last_bus;
406 }
407
408 for (i = 0; i < rio_table_hdr->num_scal_dev; i++) {
409 if (scal_devs[i]->node_id == twister) {
410 node = scal_devs[i]->node_id;
411 break;
412 }
413 }
414 if (i == rio_table_hdr->num_scal_dev) {
415 printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __func__);
416 return last_bus;
417 }
418
419 switch (rio_devs[wpeg_num]->type) {
420 case CompatWPEG:
421 /*
422 * The Compatibility Winnipeg controls the 2 legacy buses,
423 * the 66MHz PCI bus [2 slots] and the 2 "extra" buses in case
424 * a PCI-PCI bridge card is used in either slot: total 5 buses.
425 */
426 num_buses = 5;
427 break;
428 case AltWPEG:
429 /*
430 * The Alternate Winnipeg controls the 2 133MHz buses [1 slot
431 * each], their 2 "extra" buses, the 100MHz bus [2 slots] and
432 * the "extra" buses for each of those slots: total 7 buses.
433 */
434 num_buses = 7;
435 break;
436 case LookOutAWPEG:
437 case LookOutBWPEG:
438 /*
439 * A Lookout Winnipeg controls 3 100MHz buses [2 slots each]
440 * & the "extra" buses for each of those slots: total 9 buses.
441 */
442 num_buses = 9;
443 break;
444 default:
445 printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __func__);
446 return last_bus;
447 }
448
449 for (bus = last_bus; bus < last_bus + num_buses; bus++)
450 mp_bus_id_to_node[bus] = node;
451 return bus;
452}
453
454static int __init build_detail_arrays(void)
455{
456 unsigned long ptr;
457 int i, scal_detail_size, rio_detail_size;
458
459 if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) {
460 printk(KERN_WARNING "%s: MAX_NUMNODES too low! Defined as %d, but system has %d nodes.\n", __func__, MAX_NUMNODES, rio_table_hdr->num_scal_dev);
461 return 0;
462 }
463
464 switch (rio_table_hdr->version) {
465 default:
466 printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __func__, rio_table_hdr->version);
467 return 0;
468 case 2:
469 scal_detail_size = 11;
470 rio_detail_size = 13;
471 break;
472 case 3:
473 scal_detail_size = 12;
474 rio_detail_size = 15;
475 break;
476 }
477
478 ptr = (unsigned long)rio_table_hdr + 3;
479 for (i = 0; i < rio_table_hdr->num_scal_dev; i++, ptr += scal_detail_size)
480 scal_devs[i] = (struct scal_detail *)ptr;
481
482 for (i = 0; i < rio_table_hdr->num_rio_dev; i++, ptr += rio_detail_size)
483 rio_devs[i] = (struct rio_detail *)ptr;
484
485 return 1;
486}
487
488void __init setup_summit(void)
489{
490 unsigned long ptr;
491 unsigned short offset;
492 int i, next_wpeg, next_bus = 0;
493
494 /* The pointer to the EBDA is stored in the word @ phys 0x40E(40:0E) */
495 ptr = get_bios_ebda();
496 ptr = (unsigned long)phys_to_virt(ptr);
497
498 rio_table_hdr = NULL;
499 offset = 0x180;
500 while (offset) {
501 /* The block id is stored in the 2nd word */
502 if (*((unsigned short *)(ptr + offset + 2)) == 0x4752) {
503 /* set the pointer past the offset & block id */
504 rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4);
505 break;
506 }
507 /* The next offset is stored in the 1st word. 0 means no more */
508 offset = *((unsigned short *)(ptr + offset));
509 }
510 if (!rio_table_hdr) {
511 printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __func__);
512 return;
513 }
514
515 if (!build_detail_arrays())
516 return;
517
518 /* The first Winnipeg we're looking for has an index of 0 */
519 next_wpeg = 0;
520 do {
521 for (i = 0; i < rio_table_hdr->num_rio_dev; i++) {
522 if (is_WPEG(rio_devs[i]) && rio_devs[i]->WP_index == next_wpeg) {
523 /* It's the Winnipeg we're looking for! */
524 next_bus = setup_pci_node_map_for_wpeg(i, next_bus);
525 next_wpeg++;
526 break;
527 }
528 }
529 /*
530 * If we go through all Rio devices and don't find one with
531 * the next index, it means we've found all the Winnipegs,
532 * and thus all the PCI buses.
533 */
534 if (i == rio_table_hdr->num_rio_dev)
535 next_wpeg = 0;
536 } while (next_wpeg != 0);
537}
538#endif
539
540struct apic apic_summit = {
541
542 .name = "summit",
543 .probe = probe_summit,
544 .acpi_madt_oem_check = summit_acpi_madt_oem_check,
545 .apic_id_registered = summit_apic_id_registered,
546
547 .irq_delivery_mode = dest_LowestPrio,
548 /* logical delivery broadcast to all CPUs: */
549 .irq_dest_mode = 1,
550
551 .target_cpus = summit_target_cpus,
552 .disable_esr = 1,
553 .dest_logical = APIC_DEST_LOGICAL,
554 .check_apicid_used = summit_check_apicid_used,
555 .check_apicid_present = summit_check_apicid_present,
556
557 .vector_allocation_domain = summit_vector_allocation_domain,
558 .init_apic_ldr = summit_init_apic_ldr,
559
560 .ioapic_phys_id_map = summit_ioapic_phys_id_map,
561 .setup_apic_routing = summit_setup_apic_routing,
562 .multi_timer_check = NULL,
563 .apicid_to_node = summit_apicid_to_node,
564 .cpu_to_logical_apicid = summit_cpu_to_logical_apicid,
565 .cpu_present_to_apicid = summit_cpu_present_to_apicid,
566 .apicid_to_cpu_present = summit_apicid_to_cpu_present,
567 .setup_portio_remap = NULL,
568 .check_phys_apicid_present = summit_check_phys_apicid_present,
569 .enable_apic_mode = NULL,
570 .phys_pkg_id = summit_phys_pkg_id,
571 .mps_oem_check = summit_mps_oem_check,
572
573 .get_apic_id = summit_get_apic_id,
574 .set_apic_id = NULL,
575 .apic_id_mask = 0xFF << 24,
576
577 .cpu_mask_to_apicid = summit_cpu_mask_to_apicid,
578 .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and,
579
580 .send_IPI_mask = summit_send_IPI_mask,
581 .send_IPI_mask_allbutself = NULL,
582 .send_IPI_allbutself = summit_send_IPI_allbutself,
583 .send_IPI_all = summit_send_IPI_all,
584 .send_IPI_self = default_send_IPI_self,
585
586 .wakeup_cpu = NULL,
587 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
588 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
589
590 .wait_for_init_deassert = default_wait_for_init_deassert,
591
592 .smp_callin_clear_local_apic = NULL,
593 .inquire_remote_apic = default_inquire_remote_apic,
594
595 .read = native_apic_mem_read,
596 .write = native_apic_mem_write,
597 .icr_read = native_apic_icr_read,
598 .icr_write = native_apic_icr_write,
599 .wait_icr_idle = native_apic_wait_icr_idle,
600 .safe_wait_icr_idle = native_safe_apic_wait_icr_idle,
601};
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 6ce497cc372d..354b9c45601d 100644
--- a/arch/x86/kernel/genx2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -7,17 +7,14 @@
7#include <linux/dmar.h> 7#include <linux/dmar.h>
8 8
9#include <asm/smp.h> 9#include <asm/smp.h>
10#include <asm/apic.h>
10#include <asm/ipi.h> 11#include <asm/ipi.h>
11#include <asm/genapic.h>
12 12
13DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid); 13DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
14 14
15static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 15static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
16{ 16{
17 if (cpu_has_x2apic) 17 return x2apic_enabled();
18 return 1;
19
20 return 0;
21} 18}
22 19
23/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 20/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
@@ -36,8 +33,8 @@ static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
36 cpumask_set_cpu(cpu, retmask); 33 cpumask_set_cpu(cpu, retmask);
37} 34}
38 35
39static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, 36static void
40 unsigned int dest) 37 __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest)
41{ 38{
42 unsigned long cfg; 39 unsigned long cfg;
43 40
@@ -46,7 +43,7 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
46 /* 43 /*
47 * send the IPI. 44 * send the IPI.
48 */ 45 */
49 x2apic_icr_write(cfg, apicid); 46 native_x2apic_icr_write(cfg, apicid);
50} 47}
51 48
52/* 49/*
@@ -57,45 +54,50 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
57 */ 54 */
58static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) 55static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
59{ 56{
60 unsigned long flags;
61 unsigned long query_cpu; 57 unsigned long query_cpu;
58 unsigned long flags;
62 59
63 local_irq_save(flags); 60 local_irq_save(flags);
64 for_each_cpu(query_cpu, mask) 61 for_each_cpu(query_cpu, mask) {
65 __x2apic_send_IPI_dest( 62 __x2apic_send_IPI_dest(
66 per_cpu(x86_cpu_to_logical_apicid, query_cpu), 63 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
67 vector, APIC_DEST_LOGICAL); 64 vector, apic->dest_logical);
65 }
68 local_irq_restore(flags); 66 local_irq_restore(flags);
69} 67}
70 68
71static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, 69static void
72 int vector) 70 x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
73{ 71{
74 unsigned long flags;
75 unsigned long query_cpu;
76 unsigned long this_cpu = smp_processor_id(); 72 unsigned long this_cpu = smp_processor_id();
73 unsigned long query_cpu;
74 unsigned long flags;
77 75
78 local_irq_save(flags); 76 local_irq_save(flags);
79 for_each_cpu(query_cpu, mask) 77 for_each_cpu(query_cpu, mask) {
80 if (query_cpu != this_cpu) 78 if (query_cpu == this_cpu)
81 __x2apic_send_IPI_dest( 79 continue;
80 __x2apic_send_IPI_dest(
82 per_cpu(x86_cpu_to_logical_apicid, query_cpu), 81 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
83 vector, APIC_DEST_LOGICAL); 82 vector, apic->dest_logical);
83 }
84 local_irq_restore(flags); 84 local_irq_restore(flags);
85} 85}
86 86
87static void x2apic_send_IPI_allbutself(int vector) 87static void x2apic_send_IPI_allbutself(int vector)
88{ 88{
89 unsigned long flags;
90 unsigned long query_cpu;
91 unsigned long this_cpu = smp_processor_id(); 89 unsigned long this_cpu = smp_processor_id();
90 unsigned long query_cpu;
91 unsigned long flags;
92 92
93 local_irq_save(flags); 93 local_irq_save(flags);
94 for_each_online_cpu(query_cpu) 94 for_each_online_cpu(query_cpu) {
95 if (query_cpu != this_cpu) 95 if (query_cpu == this_cpu)
96 __x2apic_send_IPI_dest( 96 continue;
97 __x2apic_send_IPI_dest(
97 per_cpu(x86_cpu_to_logical_apicid, query_cpu), 98 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
98 vector, APIC_DEST_LOGICAL); 99 vector, apic->dest_logical);
100 }
99 local_irq_restore(flags); 101 local_irq_restore(flags);
100} 102}
101 103
@@ -111,21 +113,21 @@ static int x2apic_apic_id_registered(void)
111 113
112static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) 114static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
113{ 115{
114 int cpu;
115
116 /* 116 /*
117 * We're using fixed IRQ delivery, can only return one logical APIC ID. 117 * We're using fixed IRQ delivery, can only return one logical APIC ID.
118 * May as well be the first. 118 * May as well be the first.
119 */ 119 */
120 cpu = cpumask_first(cpumask); 120 int cpu = cpumask_first(cpumask);
121
121 if ((unsigned)cpu < nr_cpu_ids) 122 if ((unsigned)cpu < nr_cpu_ids)
122 return per_cpu(x86_cpu_to_logical_apicid, cpu); 123 return per_cpu(x86_cpu_to_logical_apicid, cpu);
123 else 124 else
124 return BAD_APICID; 125 return BAD_APICID;
125} 126}
126 127
127static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, 128static unsigned int
128 const struct cpumask *andmask) 129x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
130 const struct cpumask *andmask)
129{ 131{
130 int cpu; 132 int cpu;
131 133
@@ -133,15 +135,18 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
133 * We're using fixed IRQ delivery, can only return one logical APIC ID. 135 * We're using fixed IRQ delivery, can only return one logical APIC ID.
134 * May as well be the first. 136 * May as well be the first.
135 */ 137 */
136 for_each_cpu_and(cpu, cpumask, andmask) 138 for_each_cpu_and(cpu, cpumask, andmask) {
137 if (cpumask_test_cpu(cpu, cpu_online_mask)) 139 if (cpumask_test_cpu(cpu, cpu_online_mask))
138 break; 140 break;
141 }
142
139 if (cpu < nr_cpu_ids) 143 if (cpu < nr_cpu_ids)
140 return per_cpu(x86_cpu_to_logical_apicid, cpu); 144 return per_cpu(x86_cpu_to_logical_apicid, cpu);
145
141 return BAD_APICID; 146 return BAD_APICID;
142} 147}
143 148
144static unsigned int get_apic_id(unsigned long x) 149static unsigned int x2apic_cluster_phys_get_apic_id(unsigned long x)
145{ 150{
146 unsigned int id; 151 unsigned int id;
147 152
@@ -157,7 +162,7 @@ static unsigned long set_apic_id(unsigned int id)
157 return x; 162 return x;
158} 163}
159 164
160static unsigned int phys_pkg_id(int index_msb) 165static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb)
161{ 166{
162 return current_cpu_data.initial_apicid >> index_msb; 167 return current_cpu_data.initial_apicid >> index_msb;
163} 168}
@@ -172,27 +177,64 @@ static void init_x2apic_ldr(void)
172 int cpu = smp_processor_id(); 177 int cpu = smp_processor_id();
173 178
174 per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR); 179 per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR);
175 return; 180}
176} 181
177 182struct apic apic_x2apic_cluster = {
178struct genapic apic_x2apic_cluster = { 183
179 .name = "cluster x2apic", 184 .name = "cluster x2apic",
180 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, 185 .probe = NULL,
181 .int_delivery_mode = dest_LowestPrio, 186 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
182 .int_dest_mode = (APIC_DEST_LOGICAL != 0), 187 .apic_id_registered = x2apic_apic_id_registered,
183 .target_cpus = x2apic_target_cpus, 188
184 .vector_allocation_domain = x2apic_vector_allocation_domain, 189 .irq_delivery_mode = dest_LowestPrio,
185 .apic_id_registered = x2apic_apic_id_registered, 190 .irq_dest_mode = 1, /* logical */
186 .init_apic_ldr = init_x2apic_ldr, 191
187 .send_IPI_all = x2apic_send_IPI_all, 192 .target_cpus = x2apic_target_cpus,
188 .send_IPI_allbutself = x2apic_send_IPI_allbutself, 193 .disable_esr = 0,
189 .send_IPI_mask = x2apic_send_IPI_mask, 194 .dest_logical = APIC_DEST_LOGICAL,
190 .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, 195 .check_apicid_used = NULL,
191 .send_IPI_self = x2apic_send_IPI_self, 196 .check_apicid_present = NULL,
192 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, 197
193 .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, 198 .vector_allocation_domain = x2apic_vector_allocation_domain,
194 .phys_pkg_id = phys_pkg_id, 199 .init_apic_ldr = init_x2apic_ldr,
195 .get_apic_id = get_apic_id, 200
196 .set_apic_id = set_apic_id, 201 .ioapic_phys_id_map = NULL,
197 .apic_id_mask = (0xFFFFFFFFu), 202 .setup_apic_routing = NULL,
203 .multi_timer_check = NULL,
204 .apicid_to_node = NULL,
205 .cpu_to_logical_apicid = NULL,
206 .cpu_present_to_apicid = default_cpu_present_to_apicid,
207 .apicid_to_cpu_present = NULL,
208 .setup_portio_remap = NULL,
209 .check_phys_apicid_present = default_check_phys_apicid_present,
210 .enable_apic_mode = NULL,
211 .phys_pkg_id = x2apic_cluster_phys_pkg_id,
212 .mps_oem_check = NULL,
213
214 .get_apic_id = x2apic_cluster_phys_get_apic_id,
215 .set_apic_id = set_apic_id,
216 .apic_id_mask = 0xFFFFFFFFu,
217
218 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
219 .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
220
221 .send_IPI_mask = x2apic_send_IPI_mask,
222 .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
223 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
224 .send_IPI_all = x2apic_send_IPI_all,
225 .send_IPI_self = x2apic_send_IPI_self,
226
227 .wakeup_cpu = NULL,
228 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
229 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
230 .wait_for_init_deassert = NULL,
231 .smp_callin_clear_local_apic = NULL,
232 .inquire_remote_apic = NULL,
233
234 .read = native_apic_msr_read,
235 .write = native_apic_msr_write,
236 .icr_read = native_x2apic_icr_read,
237 .icr_write = native_x2apic_icr_write,
238 .wait_icr_idle = native_x2apic_wait_icr_idle,
239 .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle,
198}; 240};
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/apic/x2apic_phys.c
index 21bcc0e098ba..5bcb174409bc 100644
--- a/arch/x86/kernel/genx2apic_phys.c
+++ b/arch/x86/kernel/apic/x2apic_phys.c
@@ -7,10 +7,10 @@
7#include <linux/dmar.h> 7#include <linux/dmar.h>
8 8
9#include <asm/smp.h> 9#include <asm/smp.h>
10#include <asm/apic.h>
10#include <asm/ipi.h> 11#include <asm/ipi.h>
11#include <asm/genapic.h>
12 12
13static int x2apic_phys; 13int x2apic_phys;
14 14
15static int set_x2apic_phys_mode(char *arg) 15static int set_x2apic_phys_mode(char *arg)
16{ 16{
@@ -21,10 +21,10 @@ early_param("x2apic_phys", set_x2apic_phys_mode);
21 21
22static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id) 22static int x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
23{ 23{
24 if (cpu_has_x2apic && x2apic_phys) 24 if (x2apic_phys)
25 return 1; 25 return x2apic_enabled();
26 26 else
27 return 0; 27 return 0;
28} 28}
29 29
30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */ 30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
@@ -50,13 +50,13 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
50 /* 50 /*
51 * send the IPI. 51 * send the IPI.
52 */ 52 */
53 x2apic_icr_write(cfg, apicid); 53 native_x2apic_icr_write(cfg, apicid);
54} 54}
55 55
56static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) 56static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
57{ 57{
58 unsigned long flags;
59 unsigned long query_cpu; 58 unsigned long query_cpu;
59 unsigned long flags;
60 60
61 local_irq_save(flags); 61 local_irq_save(flags);
62 for_each_cpu(query_cpu, mask) { 62 for_each_cpu(query_cpu, mask) {
@@ -66,12 +66,12 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
66 local_irq_restore(flags); 66 local_irq_restore(flags);
67} 67}
68 68
69static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, 69static void
70 int vector) 70 x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
71{ 71{
72 unsigned long flags;
73 unsigned long query_cpu;
74 unsigned long this_cpu = smp_processor_id(); 72 unsigned long this_cpu = smp_processor_id();
73 unsigned long query_cpu;
74 unsigned long flags;
75 75
76 local_irq_save(flags); 76 local_irq_save(flags);
77 for_each_cpu(query_cpu, mask) { 77 for_each_cpu(query_cpu, mask) {
@@ -85,16 +85,17 @@ static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
85 85
86static void x2apic_send_IPI_allbutself(int vector) 86static void x2apic_send_IPI_allbutself(int vector)
87{ 87{
88 unsigned long flags;
89 unsigned long query_cpu;
90 unsigned long this_cpu = smp_processor_id(); 88 unsigned long this_cpu = smp_processor_id();
89 unsigned long query_cpu;
90 unsigned long flags;
91 91
92 local_irq_save(flags); 92 local_irq_save(flags);
93 for_each_online_cpu(query_cpu) 93 for_each_online_cpu(query_cpu) {
94 if (query_cpu != this_cpu) 94 if (query_cpu == this_cpu)
95 __x2apic_send_IPI_dest( 95 continue;
96 per_cpu(x86_cpu_to_apicid, query_cpu), 96 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
97 vector, APIC_DEST_PHYSICAL); 97 vector, APIC_DEST_PHYSICAL);
98 }
98 local_irq_restore(flags); 99 local_irq_restore(flags);
99} 100}
100 101
@@ -110,21 +111,21 @@ static int x2apic_apic_id_registered(void)
110 111
111static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) 112static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
112{ 113{
113 int cpu;
114
115 /* 114 /*
116 * We're using fixed IRQ delivery, can only return one phys APIC ID. 115 * We're using fixed IRQ delivery, can only return one phys APIC ID.
117 * May as well be the first. 116 * May as well be the first.
118 */ 117 */
119 cpu = cpumask_first(cpumask); 118 int cpu = cpumask_first(cpumask);
119
120 if ((unsigned)cpu < nr_cpu_ids) 120 if ((unsigned)cpu < nr_cpu_ids)
121 return per_cpu(x86_cpu_to_apicid, cpu); 121 return per_cpu(x86_cpu_to_apicid, cpu);
122 else 122 else
123 return BAD_APICID; 123 return BAD_APICID;
124} 124}
125 125
126static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, 126static unsigned int
127 const struct cpumask *andmask) 127x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
128 const struct cpumask *andmask)
128{ 129{
129 int cpu; 130 int cpu;
130 131
@@ -132,31 +133,28 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
132 * We're using fixed IRQ delivery, can only return one phys APIC ID. 133 * We're using fixed IRQ delivery, can only return one phys APIC ID.
133 * May as well be the first. 134 * May as well be the first.
134 */ 135 */
135 for_each_cpu_and(cpu, cpumask, andmask) 136 for_each_cpu_and(cpu, cpumask, andmask) {
136 if (cpumask_test_cpu(cpu, cpu_online_mask)) 137 if (cpumask_test_cpu(cpu, cpu_online_mask))
137 break; 138 break;
139 }
140
138 if (cpu < nr_cpu_ids) 141 if (cpu < nr_cpu_ids)
139 return per_cpu(x86_cpu_to_apicid, cpu); 142 return per_cpu(x86_cpu_to_apicid, cpu);
143
140 return BAD_APICID; 144 return BAD_APICID;
141} 145}
142 146
143static unsigned int get_apic_id(unsigned long x) 147static unsigned int x2apic_phys_get_apic_id(unsigned long x)
144{ 148{
145 unsigned int id; 149 return x;
146
147 id = x;
148 return id;
149} 150}
150 151
151static unsigned long set_apic_id(unsigned int id) 152static unsigned long set_apic_id(unsigned int id)
152{ 153{
153 unsigned long x; 154 return id;
154
155 x = id;
156 return x;
157} 155}
158 156
159static unsigned int phys_pkg_id(int index_msb) 157static int x2apic_phys_pkg_id(int initial_apicid, int index_msb)
160{ 158{
161 return current_cpu_data.initial_apicid >> index_msb; 159 return current_cpu_data.initial_apicid >> index_msb;
162} 160}
@@ -168,27 +166,64 @@ static void x2apic_send_IPI_self(int vector)
168 166
169static void init_x2apic_ldr(void) 167static void init_x2apic_ldr(void)
170{ 168{
171 return; 169}
172} 170
173 171struct apic apic_x2apic_phys = {
174struct genapic apic_x2apic_phys = { 172
175 .name = "physical x2apic", 173 .name = "physical x2apic",
176 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, 174 .probe = NULL,
177 .int_delivery_mode = dest_Fixed, 175 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
178 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 176 .apic_id_registered = x2apic_apic_id_registered,
179 .target_cpus = x2apic_target_cpus, 177
180 .vector_allocation_domain = x2apic_vector_allocation_domain, 178 .irq_delivery_mode = dest_Fixed,
181 .apic_id_registered = x2apic_apic_id_registered, 179 .irq_dest_mode = 0, /* physical */
182 .init_apic_ldr = init_x2apic_ldr, 180
183 .send_IPI_all = x2apic_send_IPI_all, 181 .target_cpus = x2apic_target_cpus,
184 .send_IPI_allbutself = x2apic_send_IPI_allbutself, 182 .disable_esr = 0,
185 .send_IPI_mask = x2apic_send_IPI_mask, 183 .dest_logical = 0,
186 .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, 184 .check_apicid_used = NULL,
187 .send_IPI_self = x2apic_send_IPI_self, 185 .check_apicid_present = NULL,
188 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, 186
189 .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, 187 .vector_allocation_domain = x2apic_vector_allocation_domain,
190 .phys_pkg_id = phys_pkg_id, 188 .init_apic_ldr = init_x2apic_ldr,
191 .get_apic_id = get_apic_id, 189
192 .set_apic_id = set_apic_id, 190 .ioapic_phys_id_map = NULL,
193 .apic_id_mask = (0xFFFFFFFFu), 191 .setup_apic_routing = NULL,
192 .multi_timer_check = NULL,
193 .apicid_to_node = NULL,
194 .cpu_to_logical_apicid = NULL,
195 .cpu_present_to_apicid = default_cpu_present_to_apicid,
196 .apicid_to_cpu_present = NULL,
197 .setup_portio_remap = NULL,
198 .check_phys_apicid_present = default_check_phys_apicid_present,
199 .enable_apic_mode = NULL,
200 .phys_pkg_id = x2apic_phys_pkg_id,
201 .mps_oem_check = NULL,
202
203 .get_apic_id = x2apic_phys_get_apic_id,
204 .set_apic_id = set_apic_id,
205 .apic_id_mask = 0xFFFFFFFFu,
206
207 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
208 .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
209
210 .send_IPI_mask = x2apic_send_IPI_mask,
211 .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
212 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
213 .send_IPI_all = x2apic_send_IPI_all,
214 .send_IPI_self = x2apic_send_IPI_self,
215
216 .wakeup_cpu = NULL,
217 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
218 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
219 .wait_for_init_deassert = NULL,
220 .smp_callin_clear_local_apic = NULL,
221 .inquire_remote_apic = NULL,
222
223 .read = native_apic_msr_read,
224 .write = native_apic_msr_write,
225 .icr_read = native_x2apic_icr_read,
226 .icr_write = native_x2apic_icr_write,
227 .wait_icr_idle = native_x2apic_wait_icr_idle,
228 .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle,
194}; 229};
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index b193e082f6ce..20b4ad07c3a1 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -22,9 +22,10 @@
22#include <linux/proc_fs.h> 22#include <linux/proc_fs.h>
23#include <asm/current.h> 23#include <asm/current.h>
24#include <asm/smp.h> 24#include <asm/smp.h>
25#include <asm/apic.h>
25#include <asm/ipi.h> 26#include <asm/ipi.h>
26#include <asm/genapic.h>
27#include <asm/pgtable.h> 27#include <asm/pgtable.h>
28#include <asm/uv/uv.h>
28#include <asm/uv/uv_mmrs.h> 29#include <asm/uv/uv_mmrs.h>
29#include <asm/uv/uv_hub.h> 30#include <asm/uv/uv_hub.h>
30#include <asm/uv/bios.h> 31#include <asm/uv/bios.h>
@@ -113,16 +114,16 @@ int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
113 114
114static void uv_send_IPI_one(int cpu, int vector) 115static void uv_send_IPI_one(int cpu, int vector)
115{ 116{
116 unsigned long val, apicid, lapicid; 117 unsigned long val, apicid;
117 int pnode; 118 int pnode;
118 119
119 apicid = per_cpu(x86_cpu_to_apicid, cpu); 120 apicid = per_cpu(x86_cpu_to_apicid, cpu);
120 lapicid = apicid & 0x3f; /* ZZZ macro needed */
121 pnode = uv_apicid_to_pnode(apicid); 121 pnode = uv_apicid_to_pnode(apicid);
122 val = 122
123 (1UL << UVH_IPI_INT_SEND_SHFT) | (lapicid << 123 val = (1UL << UVH_IPI_INT_SEND_SHFT) |
124 UVH_IPI_INT_APIC_ID_SHFT) | 124 (apicid << UVH_IPI_INT_APIC_ID_SHFT) |
125 (vector << UVH_IPI_INT_VECTOR_SHFT); 125 (vector << UVH_IPI_INT_VECTOR_SHFT);
126
126 uv_write_global_mmr64(pnode, UVH_IPI_INT, val); 127 uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
127} 128}
128 129
@@ -136,22 +137,24 @@ static void uv_send_IPI_mask(const struct cpumask *mask, int vector)
136 137
137static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) 138static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
138{ 139{
139 unsigned int cpu;
140 unsigned int this_cpu = smp_processor_id(); 140 unsigned int this_cpu = smp_processor_id();
141 unsigned int cpu;
141 142
142 for_each_cpu(cpu, mask) 143 for_each_cpu(cpu, mask) {
143 if (cpu != this_cpu) 144 if (cpu != this_cpu)
144 uv_send_IPI_one(cpu, vector); 145 uv_send_IPI_one(cpu, vector);
146 }
145} 147}
146 148
147static void uv_send_IPI_allbutself(int vector) 149static void uv_send_IPI_allbutself(int vector)
148{ 150{
149 unsigned int cpu;
150 unsigned int this_cpu = smp_processor_id(); 151 unsigned int this_cpu = smp_processor_id();
152 unsigned int cpu;
151 153
152 for_each_online_cpu(cpu) 154 for_each_online_cpu(cpu) {
153 if (cpu != this_cpu) 155 if (cpu != this_cpu)
154 uv_send_IPI_one(cpu, vector); 156 uv_send_IPI_one(cpu, vector);
157 }
155} 158}
156 159
157static void uv_send_IPI_all(int vector) 160static void uv_send_IPI_all(int vector)
@@ -170,21 +173,21 @@ static void uv_init_apic_ldr(void)
170 173
171static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) 174static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
172{ 175{
173 int cpu;
174
175 /* 176 /*
176 * We're using fixed IRQ delivery, can only return one phys APIC ID. 177 * We're using fixed IRQ delivery, can only return one phys APIC ID.
177 * May as well be the first. 178 * May as well be the first.
178 */ 179 */
179 cpu = cpumask_first(cpumask); 180 int cpu = cpumask_first(cpumask);
181
180 if ((unsigned)cpu < nr_cpu_ids) 182 if ((unsigned)cpu < nr_cpu_ids)
181 return per_cpu(x86_cpu_to_apicid, cpu); 183 return per_cpu(x86_cpu_to_apicid, cpu);
182 else 184 else
183 return BAD_APICID; 185 return BAD_APICID;
184} 186}
185 187
186static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, 188static unsigned int
187 const struct cpumask *andmask) 189uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
190 const struct cpumask *andmask)
188{ 191{
189 int cpu; 192 int cpu;
190 193
@@ -192,15 +195,17 @@ static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
192 * We're using fixed IRQ delivery, can only return one phys APIC ID. 195 * We're using fixed IRQ delivery, can only return one phys APIC ID.
193 * May as well be the first. 196 * May as well be the first.
194 */ 197 */
195 for_each_cpu_and(cpu, cpumask, andmask) 198 for_each_cpu_and(cpu, cpumask, andmask) {
196 if (cpumask_test_cpu(cpu, cpu_online_mask)) 199 if (cpumask_test_cpu(cpu, cpu_online_mask))
197 break; 200 break;
201 }
198 if (cpu < nr_cpu_ids) 202 if (cpu < nr_cpu_ids)
199 return per_cpu(x86_cpu_to_apicid, cpu); 203 return per_cpu(x86_cpu_to_apicid, cpu);
204
200 return BAD_APICID; 205 return BAD_APICID;
201} 206}
202 207
203static unsigned int get_apic_id(unsigned long x) 208static unsigned int x2apic_get_apic_id(unsigned long x)
204{ 209{
205 unsigned int id; 210 unsigned int id;
206 211
@@ -222,10 +227,10 @@ static unsigned long set_apic_id(unsigned int id)
222static unsigned int uv_read_apic_id(void) 227static unsigned int uv_read_apic_id(void)
223{ 228{
224 229
225 return get_apic_id(apic_read(APIC_ID)); 230 return x2apic_get_apic_id(apic_read(APIC_ID));
226} 231}
227 232
228static unsigned int phys_pkg_id(int index_msb) 233static int uv_phys_pkg_id(int initial_apicid, int index_msb)
229{ 234{
230 return uv_read_apic_id() >> index_msb; 235 return uv_read_apic_id() >> index_msb;
231} 236}
@@ -235,26 +240,64 @@ static void uv_send_IPI_self(int vector)
235 apic_write(APIC_SELF_IPI, vector); 240 apic_write(APIC_SELF_IPI, vector);
236} 241}
237 242
238struct genapic apic_x2apic_uv_x = { 243struct apic apic_x2apic_uv_x = {
239 .name = "UV large system", 244
240 .acpi_madt_oem_check = uv_acpi_madt_oem_check, 245 .name = "UV large system",
241 .int_delivery_mode = dest_Fixed, 246 .probe = NULL,
242 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 247 .acpi_madt_oem_check = uv_acpi_madt_oem_check,
243 .target_cpus = uv_target_cpus, 248 .apic_id_registered = uv_apic_id_registered,
244 .vector_allocation_domain = uv_vector_allocation_domain, 249
245 .apic_id_registered = uv_apic_id_registered, 250 .irq_delivery_mode = dest_Fixed,
246 .init_apic_ldr = uv_init_apic_ldr, 251 .irq_dest_mode = 1, /* logical */
247 .send_IPI_all = uv_send_IPI_all, 252
248 .send_IPI_allbutself = uv_send_IPI_allbutself, 253 .target_cpus = uv_target_cpus,
249 .send_IPI_mask = uv_send_IPI_mask, 254 .disable_esr = 0,
250 .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself, 255 .dest_logical = APIC_DEST_LOGICAL,
251 .send_IPI_self = uv_send_IPI_self, 256 .check_apicid_used = NULL,
252 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, 257 .check_apicid_present = NULL,
253 .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, 258
254 .phys_pkg_id = phys_pkg_id, 259 .vector_allocation_domain = uv_vector_allocation_domain,
255 .get_apic_id = get_apic_id, 260 .init_apic_ldr = uv_init_apic_ldr,
256 .set_apic_id = set_apic_id, 261
257 .apic_id_mask = (0xFFFFFFFFu), 262 .ioapic_phys_id_map = NULL,
263 .setup_apic_routing = NULL,
264 .multi_timer_check = NULL,
265 .apicid_to_node = NULL,
266 .cpu_to_logical_apicid = NULL,
267 .cpu_present_to_apicid = default_cpu_present_to_apicid,
268 .apicid_to_cpu_present = NULL,
269 .setup_portio_remap = NULL,
270 .check_phys_apicid_present = default_check_phys_apicid_present,
271 .enable_apic_mode = NULL,
272 .phys_pkg_id = uv_phys_pkg_id,
273 .mps_oem_check = NULL,
274
275 .get_apic_id = x2apic_get_apic_id,
276 .set_apic_id = set_apic_id,
277 .apic_id_mask = 0xFFFFFFFFu,
278
279 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
280 .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
281
282 .send_IPI_mask = uv_send_IPI_mask,
283 .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
284 .send_IPI_allbutself = uv_send_IPI_allbutself,
285 .send_IPI_all = uv_send_IPI_all,
286 .send_IPI_self = uv_send_IPI_self,
287
288 .wakeup_cpu = NULL,
289 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
290 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
291 .wait_for_init_deassert = NULL,
292 .smp_callin_clear_local_apic = NULL,
293 .inquire_remote_apic = NULL,
294
295 .read = native_apic_msr_read,
296 .write = native_apic_msr_write,
297 .icr_read = native_x2apic_icr_read,
298 .icr_write = native_x2apic_icr_write,
299 .wait_icr_idle = native_x2apic_wait_icr_idle,
300 .safe_wait_icr_idle = native_safe_x2apic_wait_icr_idle,
258}; 301};
259 302
260static __cpuinit void set_x2apic_extra_bits(int pnode) 303static __cpuinit void set_x2apic_extra_bits(int pnode)
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 266ec6c18b6c..10033fe718e0 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -301,7 +301,7 @@ extern int (*console_blank_hook)(int);
301 */ 301 */
302#define APM_ZERO_SEGS 302#define APM_ZERO_SEGS
303 303
304#include "apm.h" 304#include <asm/apm.h>
305 305
306/* 306/*
307 * Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend. 307 * Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend.
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index ee4df08feee6..fbf2f33e3080 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -75,6 +75,7 @@ void foo(void)
75 OFFSET(PT_DS, pt_regs, ds); 75 OFFSET(PT_DS, pt_regs, ds);
76 OFFSET(PT_ES, pt_regs, es); 76 OFFSET(PT_ES, pt_regs, es);
77 OFFSET(PT_FS, pt_regs, fs); 77 OFFSET(PT_FS, pt_regs, fs);
78 OFFSET(PT_GS, pt_regs, gs);
78 OFFSET(PT_ORIG_EAX, pt_regs, orig_ax); 79 OFFSET(PT_ORIG_EAX, pt_regs, orig_ax);
79 OFFSET(PT_EIP, pt_regs, ip); 80 OFFSET(PT_EIP, pt_regs, ip);
80 OFFSET(PT_CS, pt_regs, cs); 81 OFFSET(PT_CS, pt_regs, cs);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 1d41d3f1edbc..8793ab33e2c1 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -11,7 +11,6 @@
11#include <linux/hardirq.h> 11#include <linux/hardirq.h>
12#include <linux/suspend.h> 12#include <linux/suspend.h>
13#include <linux/kbuild.h> 13#include <linux/kbuild.h>
14#include <asm/pda.h>
15#include <asm/processor.h> 14#include <asm/processor.h>
16#include <asm/segment.h> 15#include <asm/segment.h>
17#include <asm/thread_info.h> 16#include <asm/thread_info.h>
@@ -48,16 +47,6 @@ int main(void)
48#endif 47#endif
49 BLANK(); 48 BLANK();
50#undef ENTRY 49#undef ENTRY
51#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
52 ENTRY(kernelstack);
53 ENTRY(oldrsp);
54 ENTRY(pcurrent);
55 ENTRY(irqcount);
56 ENTRY(cpunumber);
57 ENTRY(irqstackptr);
58 ENTRY(data_offset);
59 BLANK();
60#undef ENTRY
61#ifdef CONFIG_PARAVIRT 50#ifdef CONFIG_PARAVIRT
62 BLANK(); 51 BLANK();
63 OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); 52 OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 2cf23634b6d9..6882a735d9c0 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -7,7 +7,7 @@
7#include <asm/pat.h> 7#include <asm/pat.h>
8#include <asm/processor.h> 8#include <asm/processor.h>
9 9
10#include <mach_apic.h> 10#include <asm/apic.h>
11 11
12struct cpuid_bit { 12struct cpuid_bit {
13 u16 feature; 13 u16 feature;
@@ -69,7 +69,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
69 */ 69 */
70void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) 70void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
71{ 71{
72#ifdef CONFIG_X86_SMP 72#ifdef CONFIG_SMP
73 unsigned int eax, ebx, ecx, edx, sub_index; 73 unsigned int eax, ebx, ecx, edx, sub_index;
74 unsigned int ht_mask_width, core_plus_mask_width; 74 unsigned int ht_mask_width, core_plus_mask_width;
75 unsigned int core_select_mask, core_level_siblings; 75 unsigned int core_select_mask, core_level_siblings;
@@ -116,22 +116,14 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
116 116
117 core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; 117 core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width;
118 118
119#ifdef CONFIG_X86_32 119 c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, ht_mask_width)
120 c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width)
121 & core_select_mask; 120 & core_select_mask;
122 c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width); 121 c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, core_plus_mask_width);
123 /* 122 /*
124 * Reinit the apicid, now that we have extended initial_apicid. 123 * Reinit the apicid, now that we have extended initial_apicid.
125 */ 124 */
126 c->apicid = phys_pkg_id(c->initial_apicid, 0); 125 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
127#else 126
128 c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask;
129 c->phys_proc_id = phys_pkg_id(core_plus_mask_width);
130 /*
131 * Reinit the apicid, now that we have extended initial_apicid.
132 */
133 c->apicid = phys_pkg_id(0);
134#endif
135 c->x86_max_cores = (core_level_siblings / smp_num_siblings); 127 c->x86_max_cores = (core_level_siblings / smp_num_siblings);
136 128
137 129
@@ -143,37 +135,3 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
143 return; 135 return;
144#endif 136#endif
145} 137}
146
147#ifdef CONFIG_X86_PAT
148void __cpuinit validate_pat_support(struct cpuinfo_x86 *c)
149{
150 if (!cpu_has_pat)
151 pat_disable("PAT not supported by CPU.");
152
153 switch (c->x86_vendor) {
154 case X86_VENDOR_INTEL:
155 /*
156 * There is a known erratum on Pentium III and Core Solo
157 * and Core Duo CPUs.
158 * " Page with PAT set to WC while associated MTRR is UC
159 * may consolidate to UC "
160 * Because of this erratum, it is better to stick with
161 * setting WC in MTRR rather than using PAT on these CPUs.
162 *
163 * Enable PAT WC only on P4, Core 2 or later CPUs.
164 */
165 if (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 15))
166 return;
167
168 pat_disable("PAT WC disabled due to known CPU erratum.");
169 return;
170
171 case X86_VENDOR_AMD:
172 case X86_VENDOR_CENTAUR:
173 case X86_VENDOR_TRANSMETA:
174 return;
175 }
176
177 pat_disable("PAT disabled. Not yet verified on this CPU type.");
178}
179#endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7c878f6aa919..25423a5b80ed 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -12,8 +12,6 @@
12# include <asm/cacheflush.h> 12# include <asm/cacheflush.h>
13#endif 13#endif
14 14
15#include <mach_apic.h>
16
17#include "cpu.h" 15#include "cpu.h"
18 16
19#ifdef CONFIG_X86_32 17#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 83492b1f93b1..826d5c876278 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -21,14 +21,14 @@
21#include <asm/asm.h> 21#include <asm/asm.h>
22#include <asm/numa.h> 22#include <asm/numa.h>
23#include <asm/smp.h> 23#include <asm/smp.h>
24#ifdef CONFIG_X86_LOCAL_APIC 24#include <asm/cpu.h>
25#include <asm/mpspec.h> 25#include <asm/cpumask.h>
26#include <asm/apic.h> 26#include <asm/apic.h>
27#include <mach_apic.h> 27
28#include <asm/genapic.h> 28#ifdef CONFIG_X86_LOCAL_APIC
29#include <asm/uv/uv.h>
29#endif 30#endif
30 31
31#include <asm/pda.h>
32#include <asm/pgtable.h> 32#include <asm/pgtable.h>
33#include <asm/processor.h> 33#include <asm/processor.h>
34#include <asm/desc.h> 34#include <asm/desc.h>
@@ -37,6 +37,7 @@
37#include <asm/sections.h> 37#include <asm/sections.h>
38#include <asm/setup.h> 38#include <asm/setup.h>
39#include <asm/hypervisor.h> 39#include <asm/hypervisor.h>
40#include <asm/stackprotector.h>
40 41
41#include "cpu.h" 42#include "cpu.h"
42 43
@@ -50,6 +51,15 @@ cpumask_var_t cpu_initialized_mask;
50/* representing cpus for which sibling maps can be computed */ 51/* representing cpus for which sibling maps can be computed */
51cpumask_var_t cpu_sibling_setup_mask; 52cpumask_var_t cpu_sibling_setup_mask;
52 53
54/* correctly size the local cpu masks */
55void __init setup_cpu_local_masks(void)
56{
57 alloc_bootmem_cpumask_var(&cpu_initialized_mask);
58 alloc_bootmem_cpumask_var(&cpu_callin_mask);
59 alloc_bootmem_cpumask_var(&cpu_callout_mask);
60 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
61}
62
53#else /* CONFIG_X86_32 */ 63#else /* CONFIG_X86_32 */
54 64
55cpumask_t cpu_callin_map; 65cpumask_t cpu_callin_map;
@@ -62,23 +72,23 @@ cpumask_t cpu_sibling_setup_map;
62 72
63static struct cpu_dev *this_cpu __cpuinitdata; 73static struct cpu_dev *this_cpu __cpuinitdata;
64 74
75DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
65#ifdef CONFIG_X86_64 76#ifdef CONFIG_X86_64
66/* We need valid kernel segments for data and code in long mode too 77 /*
67 * IRET will check the segment types kkeil 2000/10/28 78 * We need valid kernel segments for data and code in long mode too
68 * Also sysret mandates a special GDT layout 79 * IRET will check the segment types kkeil 2000/10/28
69 */ 80 * Also sysret mandates a special GDT layout
70/* The TLS descriptors are currently at a different place compared to i386. 81 *
71 Hopefully nobody expects them at a fixed place (Wine?) */ 82 * The TLS descriptors are currently at a different place compared to i386.
72DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { 83 * Hopefully nobody expects them at a fixed place (Wine?)
84 */
73 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, 85 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
74 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, 86 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
75 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, 87 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
76 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, 88 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
77 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, 89 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
78 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, 90 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
79} };
80#else 91#else
81DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
82 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, 92 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
83 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, 93 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
84 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, 94 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
@@ -110,9 +120,10 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
110 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, 120 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
111 121
112 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, 122 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
113 [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, 123 [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
114} }; 124 GDT_STACK_CANARY_INIT
115#endif 125#endif
126} };
116EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); 127EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
117 128
118#ifdef CONFIG_X86_32 129#ifdef CONFIG_X86_32
@@ -213,6 +224,49 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
213#endif 224#endif
214 225
215/* 226/*
227 * Some CPU features depend on higher CPUID levels, which may not always
228 * be available due to CPUID level capping or broken virtualization
229 * software. Add those features to this table to auto-disable them.
230 */
231struct cpuid_dependent_feature {
232 u32 feature;
233 u32 level;
234};
235static const struct cpuid_dependent_feature __cpuinitconst
236cpuid_dependent_features[] = {
237 { X86_FEATURE_MWAIT, 0x00000005 },
238 { X86_FEATURE_DCA, 0x00000009 },
239 { X86_FEATURE_XSAVE, 0x0000000d },
240 { 0, 0 }
241};
242
243static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
244{
245 const struct cpuid_dependent_feature *df;
246 for (df = cpuid_dependent_features; df->feature; df++) {
247 /*
248 * Note: cpuid_level is set to -1 if unavailable, but
249 * extended_extended_level is set to 0 if unavailable
250 * and the legitimate extended levels are all negative
251 * when signed; hence the weird messing around with
252 * signs here...
253 */
254 if (cpu_has(c, df->feature) &&
255 ((s32)df->level < 0 ?
256 (u32)df->level > (u32)c->extended_cpuid_level :
257 (s32)df->level > (s32)c->cpuid_level)) {
258 clear_cpu_cap(c, df->feature);
259 if (warn)
260 printk(KERN_WARNING
261 "CPU: CPU feature %s disabled "
262 "due to lack of CPUID level 0x%x\n",
263 x86_cap_flags[df->feature],
264 df->level);
265 }
266 }
267}
268
269/*
216 * Naming convention should be: <Name> [(<Codename>)] 270 * Naming convention should be: <Name> [(<Codename>)]
217 * This table only is used unless init_<vendor>() below doesn't set it; 271 * This table only is used unless init_<vendor>() below doesn't set it;
218 * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used 272 * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
@@ -242,18 +296,29 @@ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
242 296
243__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; 297__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
244 298
299void load_percpu_segment(int cpu)
300{
301#ifdef CONFIG_X86_32
302 loadsegment(fs, __KERNEL_PERCPU);
303#else
304 loadsegment(gs, 0);
305 wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
306#endif
307 load_stack_canary_segment();
308}
309
245/* Current gdt points %fs at the "master" per-cpu area: after this, 310/* Current gdt points %fs at the "master" per-cpu area: after this,
246 * it's on the real one. */ 311 * it's on the real one. */
247void switch_to_new_gdt(void) 312void switch_to_new_gdt(int cpu)
248{ 313{
249 struct desc_ptr gdt_descr; 314 struct desc_ptr gdt_descr;
250 315
251 gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); 316 gdt_descr.address = (long)get_cpu_gdt_table(cpu);
252 gdt_descr.size = GDT_SIZE - 1; 317 gdt_descr.size = GDT_SIZE - 1;
253 load_gdt(&gdt_descr); 318 load_gdt(&gdt_descr);
254#ifdef CONFIG_X86_32 319 /* Reload the per-cpu base */
255 asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); 320
256#endif 321 load_percpu_segment(cpu);
257} 322}
258 323
259static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; 324static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
@@ -383,11 +448,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
383 } 448 }
384 449
385 index_msb = get_count_order(smp_num_siblings); 450 index_msb = get_count_order(smp_num_siblings);
386#ifdef CONFIG_X86_64 451 c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb);
387 c->phys_proc_id = phys_pkg_id(index_msb);
388#else
389 c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb);
390#endif
391 452
392 smp_num_siblings = smp_num_siblings / c->x86_max_cores; 453 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
393 454
@@ -395,13 +456,8 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
395 456
396 core_bits = get_count_order(c->x86_max_cores); 457 core_bits = get_count_order(c->x86_max_cores);
397 458
398#ifdef CONFIG_X86_64 459 c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) &
399 c->cpu_core_id = phys_pkg_id(index_msb) &
400 ((1 << core_bits) - 1); 460 ((1 << core_bits) - 1);
401#else
402 c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) &
403 ((1 << core_bits) - 1);
404#endif
405 } 461 }
406 462
407out: 463out:
@@ -570,11 +626,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
570 if (this_cpu->c_early_init) 626 if (this_cpu->c_early_init)
571 this_cpu->c_early_init(c); 627 this_cpu->c_early_init(c);
572 628
573 validate_pat_support(c);
574
575#ifdef CONFIG_SMP 629#ifdef CONFIG_SMP
576 c->cpu_index = boot_cpu_id; 630 c->cpu_index = boot_cpu_id;
577#endif 631#endif
632 filter_cpuid_features(c, false);
578} 633}
579 634
580void __init early_cpu_init(void) 635void __init early_cpu_init(void)
@@ -637,7 +692,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
637 c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; 692 c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
638#ifdef CONFIG_X86_32 693#ifdef CONFIG_X86_32
639# ifdef CONFIG_X86_HT 694# ifdef CONFIG_X86_HT
640 c->apicid = phys_pkg_id(c->initial_apicid, 0); 695 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
641# else 696# else
642 c->apicid = c->initial_apicid; 697 c->apicid = c->initial_apicid;
643# endif 698# endif
@@ -684,7 +739,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
684 this_cpu->c_identify(c); 739 this_cpu->c_identify(c);
685 740
686#ifdef CONFIG_X86_64 741#ifdef CONFIG_X86_64
687 c->apicid = phys_pkg_id(0); 742 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
688#endif 743#endif
689 744
690 /* 745 /*
@@ -708,6 +763,9 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
708 * we do "generic changes." 763 * we do "generic changes."
709 */ 764 */
710 765
766 /* Filter out anything that depends on CPUID levels we don't have */
767 filter_cpuid_features(c, true);
768
711 /* If the model name is still unset, do table lookup. */ 769 /* If the model name is still unset, do table lookup. */
712 if (!c->x86_model_id[0]) { 770 if (!c->x86_model_id[0]) {
713 char *p; 771 char *p;
@@ -877,54 +935,22 @@ static __init int setup_disablecpuid(char *arg)
877__setup("clearcpuid=", setup_disablecpuid); 935__setup("clearcpuid=", setup_disablecpuid);
878 936
879#ifdef CONFIG_X86_64 937#ifdef CONFIG_X86_64
880struct x8664_pda **_cpu_pda __read_mostly;
881EXPORT_SYMBOL(_cpu_pda);
882
883struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; 938struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
884 939
885static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; 940DEFINE_PER_CPU_FIRST(union irq_stack_union,
941 irq_stack_union) __aligned(PAGE_SIZE);
942DEFINE_PER_CPU(char *, irq_stack_ptr) =
943 init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
886 944
887void __cpuinit pda_init(int cpu) 945DEFINE_PER_CPU(unsigned long, kernel_stack) =
888{ 946 (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
889 struct x8664_pda *pda = cpu_pda(cpu); 947EXPORT_PER_CPU_SYMBOL(kernel_stack);
890 948
891 /* Setup up data that may be needed in __get_free_pages early */ 949DEFINE_PER_CPU(unsigned int, irq_count) = -1;
892 loadsegment(fs, 0);
893 loadsegment(gs, 0);
894 /* Memory clobbers used to order PDA accessed */
895 mb();
896 wrmsrl(MSR_GS_BASE, pda);
897 mb();
898
899 pda->cpunumber = cpu;
900 pda->irqcount = -1;
901 pda->kernelstack = (unsigned long)stack_thread_info() -
902 PDA_STACKOFFSET + THREAD_SIZE;
903 pda->active_mm = &init_mm;
904 pda->mmu_state = 0;
905
906 if (cpu == 0) {
907 /* others are initialized in smpboot.c */
908 pda->pcurrent = &init_task;
909 pda->irqstackptr = boot_cpu_stack;
910 pda->irqstackptr += IRQSTACKSIZE - 64;
911 } else {
912 if (!pda->irqstackptr) {
913 pda->irqstackptr = (char *)
914 __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
915 if (!pda->irqstackptr)
916 panic("cannot allocate irqstack for cpu %d",
917 cpu);
918 pda->irqstackptr += IRQSTACKSIZE - 64;
919 }
920 950
921 if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) 951static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
922 pda->nodenumber = cpu_to_node(cpu); 952 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
923 } 953 __aligned(PAGE_SIZE);
924}
925
926static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
927 DEBUG_STKSZ] __page_aligned_bss;
928 954
929extern asmlinkage void ignore_sysret(void); 955extern asmlinkage void ignore_sysret(void);
930 956
@@ -957,16 +983,21 @@ unsigned long kernel_eflags;
957 */ 983 */
958DEFINE_PER_CPU(struct orig_ist, orig_ist); 984DEFINE_PER_CPU(struct orig_ist, orig_ist);
959 985
960#else 986#else /* x86_64 */
961 987
962/* Make sure %fs is initialized properly in idle threads */ 988#ifdef CONFIG_CC_STACKPROTECTOR
989DEFINE_PER_CPU(unsigned long, stack_canary);
990#endif
991
992/* Make sure %fs and %gs are initialized properly in idle threads */
963struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) 993struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
964{ 994{
965 memset(regs, 0, sizeof(struct pt_regs)); 995 memset(regs, 0, sizeof(struct pt_regs));
966 regs->fs = __KERNEL_PERCPU; 996 regs->fs = __KERNEL_PERCPU;
997 regs->gs = __KERNEL_STACK_CANARY;
967 return regs; 998 return regs;
968} 999}
969#endif 1000#endif /* x86_64 */
970 1001
971/* 1002/*
972 * cpu_init() initializes state that is per-CPU. Some data is already 1003 * cpu_init() initializes state that is per-CPU. Some data is already
@@ -982,15 +1013,14 @@ void __cpuinit cpu_init(void)
982 struct tss_struct *t = &per_cpu(init_tss, cpu); 1013 struct tss_struct *t = &per_cpu(init_tss, cpu);
983 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); 1014 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
984 unsigned long v; 1015 unsigned long v;
985 char *estacks = NULL;
986 struct task_struct *me; 1016 struct task_struct *me;
987 int i; 1017 int i;
988 1018
989 /* CPU 0 is initialised in head64.c */ 1019#ifdef CONFIG_NUMA
990 if (cpu != 0) 1020 if (cpu != 0 && percpu_read(node_number) == 0 &&
991 pda_init(cpu); 1021 cpu_to_node(cpu) != NUMA_NO_NODE)
992 else 1022 percpu_write(node_number, cpu_to_node(cpu));
993 estacks = boot_exception_stacks; 1023#endif
994 1024
995 me = current; 1025 me = current;
996 1026
@@ -1006,7 +1036,9 @@ void __cpuinit cpu_init(void)
1006 * and set up the GDT descriptor: 1036 * and set up the GDT descriptor:
1007 */ 1037 */
1008 1038
1009 switch_to_new_gdt(); 1039 switch_to_new_gdt(cpu);
1040 loadsegment(fs, 0);
1041
1010 load_idt((const struct desc_ptr *)&idt_descr); 1042 load_idt((const struct desc_ptr *)&idt_descr);
1011 1043
1012 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); 1044 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
@@ -1017,25 +1049,20 @@ void __cpuinit cpu_init(void)
1017 barrier(); 1049 barrier();
1018 1050
1019 check_efer(); 1051 check_efer();
1020 if (cpu != 0 && x2apic) 1052 if (cpu != 0)
1021 enable_x2apic(); 1053 enable_x2apic();
1022 1054
1023 /* 1055 /*
1024 * set up and load the per-CPU TSS 1056 * set up and load the per-CPU TSS
1025 */ 1057 */
1026 if (!orig_ist->ist[0]) { 1058 if (!orig_ist->ist[0]) {
1027 static const unsigned int order[N_EXCEPTION_STACKS] = { 1059 static const unsigned int sizes[N_EXCEPTION_STACKS] = {
1028 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, 1060 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
1029 [DEBUG_STACK - 1] = DEBUG_STACK_ORDER 1061 [DEBUG_STACK - 1] = DEBUG_STKSZ
1030 }; 1062 };
1063 char *estacks = per_cpu(exception_stacks, cpu);
1031 for (v = 0; v < N_EXCEPTION_STACKS; v++) { 1064 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
1032 if (cpu) { 1065 estacks += sizes[v];
1033 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
1034 if (!estacks)
1035 panic("Cannot allocate exception "
1036 "stack %ld %d\n", v, cpu);
1037 }
1038 estacks += PAGE_SIZE << order[v];
1039 orig_ist->ist[v] = t->x86_tss.ist[v] = 1066 orig_ist->ist[v] = t->x86_tss.ist[v] =
1040 (unsigned long)estacks; 1067 (unsigned long)estacks;
1041 } 1068 }
@@ -1069,22 +1096,19 @@ void __cpuinit cpu_init(void)
1069 */ 1096 */
1070 if (kgdb_connected && arch_kgdb_ops.correct_hw_break) 1097 if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
1071 arch_kgdb_ops.correct_hw_break(); 1098 arch_kgdb_ops.correct_hw_break();
1072 else { 1099 else
1073#endif 1100#endif
1074 /* 1101 {
1075 * Clear all 6 debug registers: 1102 /*
1076 */ 1103 * Clear all 6 debug registers:
1077 1104 */
1078 set_debugreg(0UL, 0); 1105 set_debugreg(0UL, 0);
1079 set_debugreg(0UL, 1); 1106 set_debugreg(0UL, 1);
1080 set_debugreg(0UL, 2); 1107 set_debugreg(0UL, 2);
1081 set_debugreg(0UL, 3); 1108 set_debugreg(0UL, 3);
1082 set_debugreg(0UL, 6); 1109 set_debugreg(0UL, 6);
1083 set_debugreg(0UL, 7); 1110 set_debugreg(0UL, 7);
1084#ifdef CONFIG_KGDB
1085 /* If the kgdb is connected no debug regs should be altered. */
1086 } 1111 }
1087#endif
1088 1112
1089 fpu_init(); 1113 fpu_init();
1090 1114
@@ -1114,7 +1138,7 @@ void __cpuinit cpu_init(void)
1114 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); 1138 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
1115 1139
1116 load_idt(&idt_descr); 1140 load_idt(&idt_descr);
1117 switch_to_new_gdt(); 1141 switch_to_new_gdt(cpu);
1118 1142
1119 /* 1143 /*
1120 * Set up and load the per-CPU TSS and LDT 1144 * Set up and load the per-CPU TSS and LDT
@@ -1135,9 +1159,6 @@ void __cpuinit cpu_init(void)
1135 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); 1159 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
1136#endif 1160#endif
1137 1161
1138 /* Clear %gs. */
1139 asm volatile ("mov %0, %%gs" : : "r" (0));
1140
1141 /* Clear all 6 debug registers: */ 1162 /* Clear all 6 debug registers: */
1142 set_debugreg(0, 0); 1163 set_debugreg(0, 0);
1143 set_debugreg(0, 1); 1164 set_debugreg(0, 1);
diff --git a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
index c2f930d86640..41ab3f064cb1 100644
--- a/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
+++ b/arch/x86/kernel/cpu/cpufreq/e_powersaver.c
@@ -204,12 +204,12 @@ static int eps_cpu_init(struct cpufreq_policy *policy)
204 } 204 }
205 /* Enable Enhanced PowerSaver */ 205 /* Enable Enhanced PowerSaver */
206 rdmsrl(MSR_IA32_MISC_ENABLE, val); 206 rdmsrl(MSR_IA32_MISC_ENABLE, val);
207 if (!(val & 1 << 16)) { 207 if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
208 val |= 1 << 16; 208 val |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
209 wrmsrl(MSR_IA32_MISC_ENABLE, val); 209 wrmsrl(MSR_IA32_MISC_ENABLE, val);
210 /* Can be locked at 0 */ 210 /* Can be locked at 0 */
211 rdmsrl(MSR_IA32_MISC_ENABLE, val); 211 rdmsrl(MSR_IA32_MISC_ENABLE, val);
212 if (!(val & 1 << 16)) { 212 if (!(val & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
213 printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n"); 213 printk(KERN_INFO "eps: Can't enable Enhanced PowerSaver\n");
214 return -ENODEV; 214 return -ENODEV;
215 } 215 }
diff --git a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
index f08998278a3a..c9f1fdc02830 100644
--- a/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
+++ b/arch/x86/kernel/cpu/cpufreq/speedstep-centrino.c
@@ -390,14 +390,14 @@ static int centrino_cpu_init(struct cpufreq_policy *policy)
390 enable it if not. */ 390 enable it if not. */
391 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 391 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
392 392
393 if (!(l & (1<<16))) { 393 if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
394 l |= (1<<16); 394 l |= MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP;
395 dprintk("trying to enable Enhanced SpeedStep (%x)\n", l); 395 dprintk("trying to enable Enhanced SpeedStep (%x)\n", l);
396 wrmsr(MSR_IA32_MISC_ENABLE, l, h); 396 wrmsr(MSR_IA32_MISC_ENABLE, l, h);
397 397
398 /* check to see if it stuck */ 398 /* check to see if it stuck */
399 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 399 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
400 if (!(l & (1<<16))) { 400 if (!(l & MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP)) {
401 printk(KERN_INFO PFX 401 printk(KERN_INFO PFX
402 "couldn't enable Enhanced SpeedStep\n"); 402 "couldn't enable Enhanced SpeedStep\n");
403 return -ENODEV; 403 return -ENODEV;
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 24ff26a38ade..25c559ba8d54 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -24,7 +24,6 @@
24#ifdef CONFIG_X86_LOCAL_APIC 24#ifdef CONFIG_X86_LOCAL_APIC
25#include <asm/mpspec.h> 25#include <asm/mpspec.h>
26#include <asm/apic.h> 26#include <asm/apic.h>
27#include <mach_apic.h>
28#endif 27#endif
29 28
30static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) 29static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
@@ -63,6 +62,18 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
63 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); 62 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
64 } 63 }
65 64
65 /*
66 * There is a known erratum on Pentium III and Core Solo
67 * and Core Duo CPUs.
68 * " Page with PAT set to WC while associated MTRR is UC
69 * may consolidate to UC "
70 * Because of this erratum, it is better to stick with
71 * setting WC in MTRR rather than using PAT on these CPUs.
72 *
73 * Enable PAT WC only on P4, Core 2 or later CPUs.
74 */
75 if (c->x86 == 6 && c->x86_model < 15)
76 clear_cpu_cap(c, X86_FEATURE_PAT);
66} 77}
67 78
68#ifdef CONFIG_X86_32 79#ifdef CONFIG_X86_32
@@ -135,10 +146,10 @@ static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
135 */ 146 */
136 if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { 147 if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
137 rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); 148 rdmsr(MSR_IA32_MISC_ENABLE, lo, hi);
138 if ((lo & (1<<9)) == 0) { 149 if ((lo & MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE) == 0) {
139 printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n"); 150 printk (KERN_INFO "CPU: C0 stepping P4 Xeon detected.\n");
140 printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n"); 151 printk (KERN_INFO "CPU: Disabling hardware prefetching (Errata 037)\n");
141 lo |= (1<<9); /* Disable hw prefetching */ 152 lo |= MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE;
142 wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); 153 wrmsr (MSR_IA32_MISC_ENABLE, lo, hi);
143 } 154 }
144 } 155 }
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index da299eb85fc0..7293508d8f5c 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -147,7 +147,16 @@ struct _cpuid4_info {
147 union _cpuid4_leaf_ecx ecx; 147 union _cpuid4_leaf_ecx ecx;
148 unsigned long size; 148 unsigned long size;
149 unsigned long can_disable; 149 unsigned long can_disable;
150 cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ 150 DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
151};
152
153/* subset of above _cpuid4_info w/o shared_cpu_map */
154struct _cpuid4_info_regs {
155 union _cpuid4_leaf_eax eax;
156 union _cpuid4_leaf_ebx ebx;
157 union _cpuid4_leaf_ecx ecx;
158 unsigned long size;
159 unsigned long can_disable;
151}; 160};
152 161
153#ifdef CONFIG_PCI 162#ifdef CONFIG_PCI
@@ -278,7 +287,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
278} 287}
279 288
280static void __cpuinit 289static void __cpuinit
281amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) 290amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
282{ 291{
283 if (index < 3) 292 if (index < 3)
284 return; 293 return;
@@ -286,7 +295,8 @@ amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
286} 295}
287 296
288static int 297static int
289__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) 298__cpuinit cpuid4_cache_lookup_regs(int index,
299 struct _cpuid4_info_regs *this_leaf)
290{ 300{
291 union _cpuid4_leaf_eax eax; 301 union _cpuid4_leaf_eax eax;
292 union _cpuid4_leaf_ebx ebx; 302 union _cpuid4_leaf_ebx ebx;
@@ -314,6 +324,15 @@ __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
314 return 0; 324 return 0;
315} 325}
316 326
327static int
328__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
329{
330 struct _cpuid4_info_regs *leaf_regs =
331 (struct _cpuid4_info_regs *)this_leaf;
332
333 return cpuid4_cache_lookup_regs(index, leaf_regs);
334}
335
317static int __cpuinit find_num_cache_leaves(void) 336static int __cpuinit find_num_cache_leaves(void)
318{ 337{
319 unsigned int eax, ebx, ecx, edx; 338 unsigned int eax, ebx, ecx, edx;
@@ -353,11 +372,10 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
353 * parameters cpuid leaf to find the cache details 372 * parameters cpuid leaf to find the cache details
354 */ 373 */
355 for (i = 0; i < num_cache_leaves; i++) { 374 for (i = 0; i < num_cache_leaves; i++) {
356 struct _cpuid4_info this_leaf; 375 struct _cpuid4_info_regs this_leaf;
357
358 int retval; 376 int retval;
359 377
360 retval = cpuid4_cache_lookup(i, &this_leaf); 378 retval = cpuid4_cache_lookup_regs(i, &this_leaf);
361 if (retval >= 0) { 379 if (retval >= 0) {
362 switch(this_leaf.eax.split.level) { 380 switch(this_leaf.eax.split.level) {
363 case 1: 381 case 1:
@@ -506,17 +524,20 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
506 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; 524 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
507 525
508 if (num_threads_sharing == 1) 526 if (num_threads_sharing == 1)
509 cpu_set(cpu, this_leaf->shared_cpu_map); 527 cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
510 else { 528 else {
511 index_msb = get_count_order(num_threads_sharing); 529 index_msb = get_count_order(num_threads_sharing);
512 530
513 for_each_online_cpu(i) { 531 for_each_online_cpu(i) {
514 if (cpu_data(i).apicid >> index_msb == 532 if (cpu_data(i).apicid >> index_msb ==
515 c->apicid >> index_msb) { 533 c->apicid >> index_msb) {
516 cpu_set(i, this_leaf->shared_cpu_map); 534 cpumask_set_cpu(i,
535 to_cpumask(this_leaf->shared_cpu_map));
517 if (i != cpu && per_cpu(cpuid4_info, i)) { 536 if (i != cpu && per_cpu(cpuid4_info, i)) {
518 sibling_leaf = CPUID4_INFO_IDX(i, index); 537 sibling_leaf =
519 cpu_set(cpu, sibling_leaf->shared_cpu_map); 538 CPUID4_INFO_IDX(i, index);
539 cpumask_set_cpu(cpu, to_cpumask(
540 sibling_leaf->shared_cpu_map));
520 } 541 }
521 } 542 }
522 } 543 }
@@ -528,9 +549,10 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
528 int sibling; 549 int sibling;
529 550
530 this_leaf = CPUID4_INFO_IDX(cpu, index); 551 this_leaf = CPUID4_INFO_IDX(cpu, index);
531 for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) { 552 for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
532 sibling_leaf = CPUID4_INFO_IDX(sibling, index); 553 sibling_leaf = CPUID4_INFO_IDX(sibling, index);
533 cpu_clear(cpu, sibling_leaf->shared_cpu_map); 554 cpumask_clear_cpu(cpu,
555 to_cpumask(sibling_leaf->shared_cpu_map));
534 } 556 }
535} 557}
536#else 558#else
@@ -635,8 +657,9 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
635 int n = 0; 657 int n = 0;
636 658
637 if (len > 1) { 659 if (len > 1) {
638 cpumask_t *mask = &this_leaf->shared_cpu_map; 660 const struct cpumask *mask;
639 661
662 mask = to_cpumask(this_leaf->shared_cpu_map);
640 n = type? 663 n = type?
641 cpulist_scnprintf(buf, len-2, mask) : 664 cpulist_scnprintf(buf, len-2, mask) :
642 cpumask_scnprintf(buf, len-2, mask); 665 cpumask_scnprintf(buf, len-2, mask);
@@ -699,7 +722,8 @@ static struct pci_dev *get_k8_northbridge(int node)
699 722
700static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) 723static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
701{ 724{
702 int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); 725 const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
726 int node = cpu_to_node(cpumask_first(mask));
703 struct pci_dev *dev = NULL; 727 struct pci_dev *dev = NULL;
704 ssize_t ret = 0; 728 ssize_t ret = 0;
705 int i; 729 int i;
@@ -733,7 +757,8 @@ static ssize_t
733store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, 757store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
734 size_t count) 758 size_t count)
735{ 759{
736 int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); 760 const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
761 int node = cpu_to_node(cpumask_first(mask));
737 struct pci_dev *dev = NULL; 762 struct pci_dev *dev = NULL;
738 unsigned int ret, index, val; 763 unsigned int ret, index, val;
739 764
@@ -878,7 +903,7 @@ err_out:
878 return -ENOMEM; 903 return -ENOMEM;
879} 904}
880 905
881static cpumask_t cache_dev_map = CPU_MASK_NONE; 906static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
882 907
883/* Add/Remove cache interface for CPU device */ 908/* Add/Remove cache interface for CPU device */
884static int __cpuinit cache_add_dev(struct sys_device * sys_dev) 909static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
@@ -918,7 +943,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
918 } 943 }
919 kobject_uevent(&(this_object->kobj), KOBJ_ADD); 944 kobject_uevent(&(this_object->kobj), KOBJ_ADD);
920 } 945 }
921 cpu_set(cpu, cache_dev_map); 946 cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
922 947
923 kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); 948 kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD);
924 return 0; 949 return 0;
@@ -931,9 +956,9 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
931 956
932 if (per_cpu(cpuid4_info, cpu) == NULL) 957 if (per_cpu(cpuid4_info, cpu) == NULL)
933 return; 958 return;
934 if (!cpu_isset(cpu, cache_dev_map)) 959 if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
935 return; 960 return;
936 cpu_clear(cpu, cache_dev_map); 961 cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
937 962
938 for (i = 0; i < num_cache_leaves; i++) 963 for (i = 0; i < num_cache_leaves; i++)
939 kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); 964 kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index ee8bfcd3aa32..c5a32f92d07e 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -67,7 +67,7 @@ static struct threshold_block threshold_defaults = {
67struct threshold_bank { 67struct threshold_bank {
68 struct kobject *kobj; 68 struct kobject *kobj;
69 struct threshold_block *blocks; 69 struct threshold_block *blocks;
70 cpumask_t cpus; 70 cpumask_var_t cpus;
71}; 71};
72static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); 72static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
73 73
@@ -477,7 +477,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
477 477
478#ifdef CONFIG_SMP 478#ifdef CONFIG_SMP
479 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ 479 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
480 i = first_cpu(per_cpu(cpu_core_map, cpu)); 480 i = cpumask_first(&per_cpu(cpu_core_map, cpu));
481 481
482 /* first core not up yet */ 482 /* first core not up yet */
483 if (cpu_data(i).cpu_core_id) 483 if (cpu_data(i).cpu_core_id)
@@ -497,7 +497,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
497 if (err) 497 if (err)
498 goto out; 498 goto out;
499 499
500 b->cpus = per_cpu(cpu_core_map, cpu); 500 cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu));
501 per_cpu(threshold_banks, cpu)[bank] = b; 501 per_cpu(threshold_banks, cpu)[bank] = b;
502 goto out; 502 goto out;
503 } 503 }
@@ -508,15 +508,20 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
508 err = -ENOMEM; 508 err = -ENOMEM;
509 goto out; 509 goto out;
510 } 510 }
511 if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) {
512 kfree(b);
513 err = -ENOMEM;
514 goto out;
515 }
511 516
512 b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj); 517 b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj);
513 if (!b->kobj) 518 if (!b->kobj)
514 goto out_free; 519 goto out_free;
515 520
516#ifndef CONFIG_SMP 521#ifndef CONFIG_SMP
517 b->cpus = CPU_MASK_ALL; 522 cpumask_setall(b->cpus);
518#else 523#else
519 b->cpus = per_cpu(cpu_core_map, cpu); 524 cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu));
520#endif 525#endif
521 526
522 per_cpu(threshold_banks, cpu)[bank] = b; 527 per_cpu(threshold_banks, cpu)[bank] = b;
@@ -525,7 +530,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
525 if (err) 530 if (err)
526 goto out_free; 531 goto out_free;
527 532
528 for_each_cpu_mask_nr(i, b->cpus) { 533 for_each_cpu(i, b->cpus) {
529 if (i == cpu) 534 if (i == cpu)
530 continue; 535 continue;
531 536
@@ -541,6 +546,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
541 546
542out_free: 547out_free:
543 per_cpu(threshold_banks, cpu)[bank] = NULL; 548 per_cpu(threshold_banks, cpu)[bank] = NULL;
549 free_cpumask_var(b->cpus);
544 kfree(b); 550 kfree(b);
545out: 551out:
546 return err; 552 return err;
@@ -615,7 +621,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
615#endif 621#endif
616 622
617 /* remove all sibling symlinks before unregistering */ 623 /* remove all sibling symlinks before unregistering */
618 for_each_cpu_mask_nr(i, b->cpus) { 624 for_each_cpu(i, b->cpus) {
619 if (i == cpu) 625 if (i == cpu)
620 continue; 626 continue;
621 627
@@ -628,6 +634,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
628free_out: 634free_out:
629 kobject_del(b->kobj); 635 kobject_del(b->kobj);
630 kobject_put(b->kobj); 636 kobject_put(b->kobj);
637 free_cpumask_var(b->cpus);
631 kfree(b); 638 kfree(b);
632 per_cpu(threshold_banks, cpu)[bank] = NULL; 639 per_cpu(threshold_banks, cpu)[bank] = NULL;
633} 640}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index 7a2e10fcfa34..aaa7d9730938 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -9,6 +9,7 @@
9#include <linux/interrupt.h> 9#include <linux/interrupt.h>
10#include <linux/percpu.h> 10#include <linux/percpu.h>
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <asm/apic.h>
12#include <asm/msr.h> 13#include <asm/msr.h>
13#include <asm/mce.h> 14#include <asm/mce.h>
14#include <asm/hw_irq.h> 15#include <asm/hw_irq.h>
@@ -51,13 +52,13 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
51 */ 52 */
52 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 53 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
53 h = apic_read(APIC_LVTTHMR); 54 h = apic_read(APIC_LVTTHMR);
54 if ((l & (1 << 3)) && (h & APIC_DM_SMI)) { 55 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
55 printk(KERN_DEBUG 56 printk(KERN_DEBUG
56 "CPU%d: Thermal monitoring handled by SMI\n", cpu); 57 "CPU%d: Thermal monitoring handled by SMI\n", cpu);
57 return; 58 return;
58 } 59 }
59 60
60 if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13))) 61 if (cpu_has(c, X86_FEATURE_TM2) && (l & MSR_IA32_MISC_ENABLE_TM2))
61 tm2 = 1; 62 tm2 = 1;
62 63
63 if (h & APIC_VECTOR_MASK) { 64 if (h & APIC_VECTOR_MASK) {
@@ -75,7 +76,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
75 wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h); 76 wrmsr(MSR_IA32_THERM_INTERRUPT, l | 0x03, h);
76 77
77 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 78 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
78 wrmsr(MSR_IA32_MISC_ENABLE, l | (1 << 3), h); 79 wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
79 80
80 l = apic_read(APIC_LVTTHMR); 81 l = apic_read(APIC_LVTTHMR);
81 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); 82 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
diff --git a/arch/x86/kernel/cpu/mcheck/p4.c b/arch/x86/kernel/cpu/mcheck/p4.c
index 9b60fce09f75..f53bdcbaf382 100644
--- a/arch/x86/kernel/cpu/mcheck/p4.c
+++ b/arch/x86/kernel/cpu/mcheck/p4.c
@@ -85,7 +85,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
85 */ 85 */
86 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 86 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
87 h = apic_read(APIC_LVTTHMR); 87 h = apic_read(APIC_LVTTHMR);
88 if ((l & (1<<3)) && (h & APIC_DM_SMI)) { 88 if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
89 printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", 89 printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",
90 cpu); 90 cpu);
91 return; /* -EBUSY */ 91 return; /* -EBUSY */
@@ -111,7 +111,7 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
111 vendor_thermal_interrupt = intel_thermal_interrupt; 111 vendor_thermal_interrupt = intel_thermal_interrupt;
112 112
113 rdmsr(MSR_IA32_MISC_ENABLE, l, h); 113 rdmsr(MSR_IA32_MISC_ENABLE, l, h);
114 wrmsr(MSR_IA32_MISC_ENABLE, l | (1<<3), h); 114 wrmsr(MSR_IA32_MISC_ENABLE, l | MSR_IA32_MISC_ENABLE_TM1, h);
115 115
116 l = apic_read(APIC_LVTTHMR); 116 l = apic_read(APIC_LVTTHMR);
117 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); 117 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c
index 9abd48b22674..f6c70a164e32 100644
--- a/arch/x86/kernel/cpu/perfctr-watchdog.c
+++ b/arch/x86/kernel/cpu/perfctr-watchdog.c
@@ -19,7 +19,7 @@
19#include <linux/nmi.h> 19#include <linux/nmi.h>
20#include <linux/kprobes.h> 20#include <linux/kprobes.h>
21 21
22#include <asm/apic.h> 22#include <asm/genapic.h>
23#include <asm/intel_arch_perfmon.h> 23#include <asm/intel_arch_perfmon.h>
24 24
25struct nmi_watchdog_ctlblk { 25struct nmi_watchdog_ctlblk {
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index c689d19e35ab..ff958248e61d 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -24,12 +24,10 @@
24#include <asm/apic.h> 24#include <asm/apic.h>
25#include <asm/hpet.h> 25#include <asm/hpet.h>
26#include <linux/kdebug.h> 26#include <linux/kdebug.h>
27#include <asm/smp.h> 27#include <asm/cpu.h>
28#include <asm/reboot.h> 28#include <asm/reboot.h>
29#include <asm/virtext.h> 29#include <asm/virtext.h>
30 30
31#include <mach_ipi.h>
32
33 31
34#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) 32#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
35 33
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 6b1f6f6f8661..87d103ded1c3 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -99,7 +99,7 @@ print_context_stack(struct thread_info *tinfo,
99 frame = frame->next_frame; 99 frame = frame->next_frame;
100 bp = (unsigned long) frame; 100 bp = (unsigned long) frame;
101 } else { 101 } else {
102 ops->address(data, addr, bp == 0); 102 ops->address(data, addr, 0);
103 } 103 }
104 print_ftrace_graph_addr(addr, data, ops, tinfo, graph); 104 print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
105 } 105 }
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index c302d0707048..d35db5993fd6 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -106,7 +106,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
106 const struct stacktrace_ops *ops, void *data) 106 const struct stacktrace_ops *ops, void *data)
107{ 107{
108 const unsigned cpu = get_cpu(); 108 const unsigned cpu = get_cpu();
109 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; 109 unsigned long *irq_stack_end =
110 (unsigned long *)per_cpu(irq_stack_ptr, cpu);
110 unsigned used = 0; 111 unsigned used = 0;
111 struct thread_info *tinfo; 112 struct thread_info *tinfo;
112 int graph = 0; 113 int graph = 0;
@@ -160,23 +161,23 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
160 stack = (unsigned long *) estack_end[-2]; 161 stack = (unsigned long *) estack_end[-2];
161 continue; 162 continue;
162 } 163 }
163 if (irqstack_end) { 164 if (irq_stack_end) {
164 unsigned long *irqstack; 165 unsigned long *irq_stack;
165 irqstack = irqstack_end - 166 irq_stack = irq_stack_end -
166 (IRQSTACKSIZE - 64) / sizeof(*irqstack); 167 (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack);
167 168
168 if (stack >= irqstack && stack < irqstack_end) { 169 if (stack >= irq_stack && stack < irq_stack_end) {
169 if (ops->stack(data, "IRQ") < 0) 170 if (ops->stack(data, "IRQ") < 0)
170 break; 171 break;
171 bp = print_context_stack(tinfo, stack, bp, 172 bp = print_context_stack(tinfo, stack, bp,
172 ops, data, irqstack_end, &graph); 173 ops, data, irq_stack_end, &graph);
173 /* 174 /*
174 * We link to the next stack (which would be 175 * We link to the next stack (which would be
175 * the process stack normally) the last 176 * the process stack normally) the last
176 * pointer (index -1 to end) in the IRQ stack: 177 * pointer (index -1 to end) in the IRQ stack:
177 */ 178 */
178 stack = (unsigned long *) (irqstack_end[-1]); 179 stack = (unsigned long *) (irq_stack_end[-1]);
179 irqstack_end = NULL; 180 irq_stack_end = NULL;
180 ops->stack(data, "EOI"); 181 ops->stack(data, "EOI");
181 continue; 182 continue;
182 } 183 }
@@ -199,10 +200,10 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
199 unsigned long *stack; 200 unsigned long *stack;
200 int i; 201 int i;
201 const int cpu = smp_processor_id(); 202 const int cpu = smp_processor_id();
202 unsigned long *irqstack_end = 203 unsigned long *irq_stack_end =
203 (unsigned long *) (cpu_pda(cpu)->irqstackptr); 204 (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
204 unsigned long *irqstack = 205 unsigned long *irq_stack =
205 (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); 206 (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
206 207
207 /* 208 /*
208 * debugging aid: "show_stack(NULL, NULL);" prints the 209 * debugging aid: "show_stack(NULL, NULL);" prints the
@@ -218,9 +219,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
218 219
219 stack = sp; 220 stack = sp;
220 for (i = 0; i < kstack_depth_to_print; i++) { 221 for (i = 0; i < kstack_depth_to_print; i++) {
221 if (stack >= irqstack && stack <= irqstack_end) { 222 if (stack >= irq_stack && stack <= irq_stack_end) {
222 if (stack == irqstack_end) { 223 if (stack == irq_stack_end) {
223 stack = (unsigned long *) (irqstack_end[-1]); 224 stack = (unsigned long *) (irq_stack_end[-1]);
224 printk(" <EOI> "); 225 printk(" <EOI> ");
225 } 226 }
226 } else { 227 } else {
@@ -241,7 +242,7 @@ void show_registers(struct pt_regs *regs)
241 int i; 242 int i;
242 unsigned long sp; 243 unsigned long sp;
243 const int cpu = smp_processor_id(); 244 const int cpu = smp_processor_id();
244 struct task_struct *cur = cpu_pda(cpu)->pcurrent; 245 struct task_struct *cur = current;
245 246
246 sp = regs->sp; 247 sp = regs->sp;
247 printk("CPU %d ", cpu); 248 printk("CPU %d ", cpu);
diff --git a/arch/x86/kernel/early_printk.c b/arch/x86/kernel/early_printk.c
index 504ad198e4ad..639ad98238a2 100644
--- a/arch/x86/kernel/early_printk.c
+++ b/arch/x86/kernel/early_printk.c
@@ -13,8 +13,8 @@
13#include <asm/setup.h> 13#include <asm/setup.h>
14#include <xen/hvc-console.h> 14#include <xen/hvc-console.h>
15#include <asm/pci-direct.h> 15#include <asm/pci-direct.h>
16#include <asm/pgtable.h>
17#include <asm/fixmap.h> 16#include <asm/fixmap.h>
17#include <asm/pgtable.h>
18#include <linux/usb/ehci_def.h> 18#include <linux/usb/ehci_def.h>
19 19
20/* Simple VGA output */ 20/* Simple VGA output */
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 1119d247fe11..b205272ad394 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -366,10 +366,12 @@ void __init efi_init(void)
366 SMBIOS_TABLE_GUID)) { 366 SMBIOS_TABLE_GUID)) {
367 efi.smbios = config_tables[i].table; 367 efi.smbios = config_tables[i].table;
368 printk(" SMBIOS=0x%lx ", config_tables[i].table); 368 printk(" SMBIOS=0x%lx ", config_tables[i].table);
369#ifdef CONFIG_X86_UV
369 } else if (!efi_guidcmp(config_tables[i].guid, 370 } else if (!efi_guidcmp(config_tables[i].guid,
370 UV_SYSTEM_TABLE_GUID)) { 371 UV_SYSTEM_TABLE_GUID)) {
371 efi.uv_systab = config_tables[i].table; 372 efi.uv_systab = config_tables[i].table;
372 printk(" UVsystab=0x%lx ", config_tables[i].table); 373 printk(" UVsystab=0x%lx ", config_tables[i].table);
374#endif
373 } else if (!efi_guidcmp(config_tables[i].guid, 375 } else if (!efi_guidcmp(config_tables[i].guid,
374 HCDP_TABLE_GUID)) { 376 HCDP_TABLE_GUID)) {
375 efi.hcdp = config_tables[i].table; 377 efi.hcdp = config_tables[i].table;
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index 652c5287215f..a4ee29127fdf 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -36,6 +36,7 @@
36#include <asm/proto.h> 36#include <asm/proto.h>
37#include <asm/efi.h> 37#include <asm/efi.h>
38#include <asm/cacheflush.h> 38#include <asm/cacheflush.h>
39#include <asm/fixmap.h>
39 40
40static pgd_t save_pgd __initdata; 41static pgd_t save_pgd __initdata;
41static unsigned long efi_flags __initdata; 42static unsigned long efi_flags __initdata;
diff --git a/arch/x86/kernel/efi_stub_32.S b/arch/x86/kernel/efi_stub_32.S
index ef00bb77d7e4..fbe66e626c09 100644
--- a/arch/x86/kernel/efi_stub_32.S
+++ b/arch/x86/kernel/efi_stub_32.S
@@ -6,7 +6,7 @@
6 */ 6 */
7 7
8#include <linux/linkage.h> 8#include <linux/linkage.h>
9#include <asm/page.h> 9#include <asm/page_types.h>
10 10
11/* 11/*
12 * efi_call_phys(void *, ...) is a function with variable parameters. 12 * efi_call_phys(void *, ...) is a function with variable parameters.
@@ -113,6 +113,7 @@ ENTRY(efi_call_phys)
113 movl (%edx), %ecx 113 movl (%edx), %ecx
114 pushl %ecx 114 pushl %ecx
115 ret 115 ret
116ENDPROC(efi_call_phys)
116.previous 117.previous
117 118
118.data 119.data
diff --git a/arch/x86/kernel/efi_stub_64.S b/arch/x86/kernel/efi_stub_64.S
index 99b47d48c9f4..4c07ccab8146 100644
--- a/arch/x86/kernel/efi_stub_64.S
+++ b/arch/x86/kernel/efi_stub_64.S
@@ -41,6 +41,7 @@ ENTRY(efi_call0)
41 addq $32, %rsp 41 addq $32, %rsp
42 RESTORE_XMM 42 RESTORE_XMM
43 ret 43 ret
44ENDPROC(efi_call0)
44 45
45ENTRY(efi_call1) 46ENTRY(efi_call1)
46 SAVE_XMM 47 SAVE_XMM
@@ -50,6 +51,7 @@ ENTRY(efi_call1)
50 addq $32, %rsp 51 addq $32, %rsp
51 RESTORE_XMM 52 RESTORE_XMM
52 ret 53 ret
54ENDPROC(efi_call1)
53 55
54ENTRY(efi_call2) 56ENTRY(efi_call2)
55 SAVE_XMM 57 SAVE_XMM
@@ -59,6 +61,7 @@ ENTRY(efi_call2)
59 addq $32, %rsp 61 addq $32, %rsp
60 RESTORE_XMM 62 RESTORE_XMM
61 ret 63 ret
64ENDPROC(efi_call2)
62 65
63ENTRY(efi_call3) 66ENTRY(efi_call3)
64 SAVE_XMM 67 SAVE_XMM
@@ -69,6 +72,7 @@ ENTRY(efi_call3)
69 addq $32, %rsp 72 addq $32, %rsp
70 RESTORE_XMM 73 RESTORE_XMM
71 ret 74 ret
75ENDPROC(efi_call3)
72 76
73ENTRY(efi_call4) 77ENTRY(efi_call4)
74 SAVE_XMM 78 SAVE_XMM
@@ -80,6 +84,7 @@ ENTRY(efi_call4)
80 addq $32, %rsp 84 addq $32, %rsp
81 RESTORE_XMM 85 RESTORE_XMM
82 ret 86 ret
87ENDPROC(efi_call4)
83 88
84ENTRY(efi_call5) 89ENTRY(efi_call5)
85 SAVE_XMM 90 SAVE_XMM
@@ -92,6 +97,7 @@ ENTRY(efi_call5)
92 addq $48, %rsp 97 addq $48, %rsp
93 RESTORE_XMM 98 RESTORE_XMM
94 ret 99 ret
100ENDPROC(efi_call5)
95 101
96ENTRY(efi_call6) 102ENTRY(efi_call6)
97 SAVE_XMM 103 SAVE_XMM
@@ -107,3 +113,4 @@ ENTRY(efi_call6)
107 addq $48, %rsp 113 addq $48, %rsp
108 RESTORE_XMM 114 RESTORE_XMM
109 ret 115 ret
116ENDPROC(efi_call6)
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 46469029e9d3..899e8938e79f 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -30,12 +30,13 @@
30 * 1C(%esp) - %ds 30 * 1C(%esp) - %ds
31 * 20(%esp) - %es 31 * 20(%esp) - %es
32 * 24(%esp) - %fs 32 * 24(%esp) - %fs
33 * 28(%esp) - orig_eax 33 * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS
34 * 2C(%esp) - %eip 34 * 2C(%esp) - orig_eax
35 * 30(%esp) - %cs 35 * 30(%esp) - %eip
36 * 34(%esp) - %eflags 36 * 34(%esp) - %cs
37 * 38(%esp) - %oldesp 37 * 38(%esp) - %eflags
38 * 3C(%esp) - %oldss 38 * 3C(%esp) - %oldesp
39 * 40(%esp) - %oldss
39 * 40 *
40 * "current" is in register %ebx during any slow entries. 41 * "current" is in register %ebx during any slow entries.
41 */ 42 */
@@ -46,7 +47,7 @@
46#include <asm/errno.h> 47#include <asm/errno.h>
47#include <asm/segment.h> 48#include <asm/segment.h>
48#include <asm/smp.h> 49#include <asm/smp.h>
49#include <asm/page.h> 50#include <asm/page_types.h>
50#include <asm/desc.h> 51#include <asm/desc.h>
51#include <asm/percpu.h> 52#include <asm/percpu.h>
52#include <asm/dwarf2.h> 53#include <asm/dwarf2.h>
@@ -101,121 +102,221 @@
101#define resume_userspace_sig resume_userspace 102#define resume_userspace_sig resume_userspace
102#endif 103#endif
103 104
104#define SAVE_ALL \ 105/*
105 cld; \ 106 * User gs save/restore
106 pushl %fs; \ 107 *
107 CFI_ADJUST_CFA_OFFSET 4;\ 108 * %gs is used for userland TLS and kernel only uses it for stack
108 /*CFI_REL_OFFSET fs, 0;*/\ 109 * canary which is required to be at %gs:20 by gcc. Read the comment
109 pushl %es; \ 110 * at the top of stackprotector.h for more info.
110 CFI_ADJUST_CFA_OFFSET 4;\ 111 *
111 /*CFI_REL_OFFSET es, 0;*/\ 112 * Local labels 98 and 99 are used.
112 pushl %ds; \ 113 */
113 CFI_ADJUST_CFA_OFFSET 4;\ 114#ifdef CONFIG_X86_32_LAZY_GS
114 /*CFI_REL_OFFSET ds, 0;*/\ 115
115 pushl %eax; \ 116 /* unfortunately push/pop can't be no-op */
116 CFI_ADJUST_CFA_OFFSET 4;\ 117.macro PUSH_GS
117 CFI_REL_OFFSET eax, 0;\ 118 pushl $0
118 pushl %ebp; \ 119 CFI_ADJUST_CFA_OFFSET 4
119 CFI_ADJUST_CFA_OFFSET 4;\ 120.endm
120 CFI_REL_OFFSET ebp, 0;\ 121.macro POP_GS pop=0
121 pushl %edi; \ 122 addl $(4 + \pop), %esp
122 CFI_ADJUST_CFA_OFFSET 4;\ 123 CFI_ADJUST_CFA_OFFSET -(4 + \pop)
123 CFI_REL_OFFSET edi, 0;\ 124.endm
124 pushl %esi; \ 125.macro POP_GS_EX
125 CFI_ADJUST_CFA_OFFSET 4;\ 126.endm
126 CFI_REL_OFFSET esi, 0;\ 127
127 pushl %edx; \ 128 /* all the rest are no-op */
128 CFI_ADJUST_CFA_OFFSET 4;\ 129.macro PTGS_TO_GS
129 CFI_REL_OFFSET edx, 0;\ 130.endm
130 pushl %ecx; \ 131.macro PTGS_TO_GS_EX
131 CFI_ADJUST_CFA_OFFSET 4;\ 132.endm
132 CFI_REL_OFFSET ecx, 0;\ 133.macro GS_TO_REG reg
133 pushl %ebx; \ 134.endm
134 CFI_ADJUST_CFA_OFFSET 4;\ 135.macro REG_TO_PTGS reg
135 CFI_REL_OFFSET ebx, 0;\ 136.endm
136 movl $(__USER_DS), %edx; \ 137.macro SET_KERNEL_GS reg
137 movl %edx, %ds; \ 138.endm
138 movl %edx, %es; \ 139
139 movl $(__KERNEL_PERCPU), %edx; \ 140#else /* CONFIG_X86_32_LAZY_GS */
141
142.macro PUSH_GS
143 pushl %gs
144 CFI_ADJUST_CFA_OFFSET 4
145 /*CFI_REL_OFFSET gs, 0*/
146.endm
147
148.macro POP_GS pop=0
14998: popl %gs
150 CFI_ADJUST_CFA_OFFSET -4
151 /*CFI_RESTORE gs*/
152 .if \pop <> 0
153 add $\pop, %esp
154 CFI_ADJUST_CFA_OFFSET -\pop
155 .endif
156.endm
157.macro POP_GS_EX
158.pushsection .fixup, "ax"
15999: movl $0, (%esp)
160 jmp 98b
161.section __ex_table, "a"
162 .align 4
163 .long 98b, 99b
164.popsection
165.endm
166
167.macro PTGS_TO_GS
16898: mov PT_GS(%esp), %gs
169.endm
170.macro PTGS_TO_GS_EX
171.pushsection .fixup, "ax"
17299: movl $0, PT_GS(%esp)
173 jmp 98b
174.section __ex_table, "a"
175 .align 4
176 .long 98b, 99b
177.popsection
178.endm
179
180.macro GS_TO_REG reg
181 movl %gs, \reg
182 /*CFI_REGISTER gs, \reg*/
183.endm
184.macro REG_TO_PTGS reg
185 movl \reg, PT_GS(%esp)
186 /*CFI_REL_OFFSET gs, PT_GS*/
187.endm
188.macro SET_KERNEL_GS reg
189 movl $(__KERNEL_STACK_CANARY), \reg
190 movl \reg, %gs
191.endm
192
193#endif /* CONFIG_X86_32_LAZY_GS */
194
195.macro SAVE_ALL
196 cld
197 PUSH_GS
198 pushl %fs
199 CFI_ADJUST_CFA_OFFSET 4
200 /*CFI_REL_OFFSET fs, 0;*/
201 pushl %es
202 CFI_ADJUST_CFA_OFFSET 4
203 /*CFI_REL_OFFSET es, 0;*/
204 pushl %ds
205 CFI_ADJUST_CFA_OFFSET 4
206 /*CFI_REL_OFFSET ds, 0;*/
207 pushl %eax
208 CFI_ADJUST_CFA_OFFSET 4
209 CFI_REL_OFFSET eax, 0
210 pushl %ebp
211 CFI_ADJUST_CFA_OFFSET 4
212 CFI_REL_OFFSET ebp, 0
213 pushl %edi
214 CFI_ADJUST_CFA_OFFSET 4
215 CFI_REL_OFFSET edi, 0
216 pushl %esi
217 CFI_ADJUST_CFA_OFFSET 4
218 CFI_REL_OFFSET esi, 0
219 pushl %edx
220 CFI_ADJUST_CFA_OFFSET 4
221 CFI_REL_OFFSET edx, 0
222 pushl %ecx
223 CFI_ADJUST_CFA_OFFSET 4
224 CFI_REL_OFFSET ecx, 0
225 pushl %ebx
226 CFI_ADJUST_CFA_OFFSET 4
227 CFI_REL_OFFSET ebx, 0
228 movl $(__USER_DS), %edx
229 movl %edx, %ds
230 movl %edx, %es
231 movl $(__KERNEL_PERCPU), %edx
140 movl %edx, %fs 232 movl %edx, %fs
233 SET_KERNEL_GS %edx
234.endm
141 235
142#define RESTORE_INT_REGS \ 236.macro RESTORE_INT_REGS
143 popl %ebx; \ 237 popl %ebx
144 CFI_ADJUST_CFA_OFFSET -4;\ 238 CFI_ADJUST_CFA_OFFSET -4
145 CFI_RESTORE ebx;\ 239 CFI_RESTORE ebx
146 popl %ecx; \ 240 popl %ecx
147 CFI_ADJUST_CFA_OFFSET -4;\ 241 CFI_ADJUST_CFA_OFFSET -4
148 CFI_RESTORE ecx;\ 242 CFI_RESTORE ecx
149 popl %edx; \ 243 popl %edx
150 CFI_ADJUST_CFA_OFFSET -4;\ 244 CFI_ADJUST_CFA_OFFSET -4
151 CFI_RESTORE edx;\ 245 CFI_RESTORE edx
152 popl %esi; \ 246 popl %esi
153 CFI_ADJUST_CFA_OFFSET -4;\ 247 CFI_ADJUST_CFA_OFFSET -4
154 CFI_RESTORE esi;\ 248 CFI_RESTORE esi
155 popl %edi; \ 249 popl %edi
156 CFI_ADJUST_CFA_OFFSET -4;\ 250 CFI_ADJUST_CFA_OFFSET -4
157 CFI_RESTORE edi;\ 251 CFI_RESTORE edi
158 popl %ebp; \ 252 popl %ebp
159 CFI_ADJUST_CFA_OFFSET -4;\ 253 CFI_ADJUST_CFA_OFFSET -4
160 CFI_RESTORE ebp;\ 254 CFI_RESTORE ebp
161 popl %eax; \ 255 popl %eax
162 CFI_ADJUST_CFA_OFFSET -4;\ 256 CFI_ADJUST_CFA_OFFSET -4
163 CFI_RESTORE eax 257 CFI_RESTORE eax
258.endm
164 259
165#define RESTORE_REGS \ 260.macro RESTORE_REGS pop=0
166 RESTORE_INT_REGS; \ 261 RESTORE_INT_REGS
1671: popl %ds; \ 2621: popl %ds
168 CFI_ADJUST_CFA_OFFSET -4;\ 263 CFI_ADJUST_CFA_OFFSET -4
169 /*CFI_RESTORE ds;*/\ 264 /*CFI_RESTORE ds;*/
1702: popl %es; \ 2652: popl %es
171 CFI_ADJUST_CFA_OFFSET -4;\ 266 CFI_ADJUST_CFA_OFFSET -4
172 /*CFI_RESTORE es;*/\ 267 /*CFI_RESTORE es;*/
1733: popl %fs; \ 2683: popl %fs
174 CFI_ADJUST_CFA_OFFSET -4;\ 269 CFI_ADJUST_CFA_OFFSET -4
175 /*CFI_RESTORE fs;*/\ 270 /*CFI_RESTORE fs;*/
176.pushsection .fixup,"ax"; \ 271 POP_GS \pop
1774: movl $0,(%esp); \ 272.pushsection .fixup, "ax"
178 jmp 1b; \ 2734: movl $0, (%esp)
1795: movl $0,(%esp); \ 274 jmp 1b
180 jmp 2b; \ 2755: movl $0, (%esp)
1816: movl $0,(%esp); \ 276 jmp 2b
182 jmp 3b; \ 2776: movl $0, (%esp)
183.section __ex_table,"a";\ 278 jmp 3b
184 .align 4; \ 279.section __ex_table, "a"
185 .long 1b,4b; \ 280 .align 4
186 .long 2b,5b; \ 281 .long 1b, 4b
187 .long 3b,6b; \ 282 .long 2b, 5b
283 .long 3b, 6b
188.popsection 284.popsection
285 POP_GS_EX
286.endm
189 287
190#define RING0_INT_FRAME \ 288.macro RING0_INT_FRAME
191 CFI_STARTPROC simple;\ 289 CFI_STARTPROC simple
192 CFI_SIGNAL_FRAME;\ 290 CFI_SIGNAL_FRAME
193 CFI_DEF_CFA esp, 3*4;\ 291 CFI_DEF_CFA esp, 3*4
194 /*CFI_OFFSET cs, -2*4;*/\ 292 /*CFI_OFFSET cs, -2*4;*/
195 CFI_OFFSET eip, -3*4 293 CFI_OFFSET eip, -3*4
294.endm
196 295
197#define RING0_EC_FRAME \ 296.macro RING0_EC_FRAME
198 CFI_STARTPROC simple;\ 297 CFI_STARTPROC simple
199 CFI_SIGNAL_FRAME;\ 298 CFI_SIGNAL_FRAME
200 CFI_DEF_CFA esp, 4*4;\ 299 CFI_DEF_CFA esp, 4*4
201 /*CFI_OFFSET cs, -2*4;*/\ 300 /*CFI_OFFSET cs, -2*4;*/
202 CFI_OFFSET eip, -3*4 301 CFI_OFFSET eip, -3*4
302.endm
203 303
204#define RING0_PTREGS_FRAME \ 304.macro RING0_PTREGS_FRAME
205 CFI_STARTPROC simple;\ 305 CFI_STARTPROC simple
206 CFI_SIGNAL_FRAME;\ 306 CFI_SIGNAL_FRAME
207 CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\ 307 CFI_DEF_CFA esp, PT_OLDESP-PT_EBX
208 /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\ 308 /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/
209 CFI_OFFSET eip, PT_EIP-PT_OLDESP;\ 309 CFI_OFFSET eip, PT_EIP-PT_OLDESP
210 /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\ 310 /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/
211 /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\ 311 /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/
212 CFI_OFFSET eax, PT_EAX-PT_OLDESP;\ 312 CFI_OFFSET eax, PT_EAX-PT_OLDESP
213 CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\ 313 CFI_OFFSET ebp, PT_EBP-PT_OLDESP
214 CFI_OFFSET edi, PT_EDI-PT_OLDESP;\ 314 CFI_OFFSET edi, PT_EDI-PT_OLDESP
215 CFI_OFFSET esi, PT_ESI-PT_OLDESP;\ 315 CFI_OFFSET esi, PT_ESI-PT_OLDESP
216 CFI_OFFSET edx, PT_EDX-PT_OLDESP;\ 316 CFI_OFFSET edx, PT_EDX-PT_OLDESP
217 CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\ 317 CFI_OFFSET ecx, PT_ECX-PT_OLDESP
218 CFI_OFFSET ebx, PT_EBX-PT_OLDESP 318 CFI_OFFSET ebx, PT_EBX-PT_OLDESP
319.endm
219 320
220ENTRY(ret_from_fork) 321ENTRY(ret_from_fork)
221 CFI_STARTPROC 322 CFI_STARTPROC
@@ -362,6 +463,7 @@ sysenter_exit:
362 xorl %ebp,%ebp 463 xorl %ebp,%ebp
363 TRACE_IRQS_ON 464 TRACE_IRQS_ON
3641: mov PT_FS(%esp), %fs 4651: mov PT_FS(%esp), %fs
466 PTGS_TO_GS
365 ENABLE_INTERRUPTS_SYSEXIT 467 ENABLE_INTERRUPTS_SYSEXIT
366 468
367#ifdef CONFIG_AUDITSYSCALL 469#ifdef CONFIG_AUDITSYSCALL
@@ -410,6 +512,7 @@ sysexit_audit:
410 .align 4 512 .align 4
411 .long 1b,2b 513 .long 1b,2b
412.popsection 514.popsection
515 PTGS_TO_GS_EX
413ENDPROC(ia32_sysenter_target) 516ENDPROC(ia32_sysenter_target)
414 517
415 # system call handler stub 518 # system call handler stub
@@ -452,8 +555,7 @@ restore_all:
452restore_nocheck: 555restore_nocheck:
453 TRACE_IRQS_IRET 556 TRACE_IRQS_IRET
454restore_nocheck_notrace: 557restore_nocheck_notrace:
455 RESTORE_REGS 558 RESTORE_REGS 4 # skip orig_eax/error_code
456 addl $4, %esp # skip orig_eax/error_code
457 CFI_ADJUST_CFA_OFFSET -4 559 CFI_ADJUST_CFA_OFFSET -4
458irq_return: 560irq_return:
459 INTERRUPT_RETURN 561 INTERRUPT_RETURN
@@ -595,28 +697,50 @@ syscall_badsys:
595END(syscall_badsys) 697END(syscall_badsys)
596 CFI_ENDPROC 698 CFI_ENDPROC
597 699
598#define FIXUP_ESPFIX_STACK \ 700/*
599 /* since we are on a wrong stack, we cant make it a C code :( */ \ 701 * System calls that need a pt_regs pointer.
600 PER_CPU(gdt_page, %ebx); \ 702 */
601 GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ 703#define PTREGSCALL(name) \
602 addl %esp, %eax; \ 704 ALIGN; \
603 pushl $__KERNEL_DS; \ 705ptregs_##name: \
604 CFI_ADJUST_CFA_OFFSET 4; \ 706 leal 4(%esp),%eax; \
605 pushl %eax; \ 707 jmp sys_##name;
606 CFI_ADJUST_CFA_OFFSET 4; \ 708
607 lss (%esp), %esp; \ 709PTREGSCALL(iopl)
608 CFI_ADJUST_CFA_OFFSET -8; 710PTREGSCALL(fork)
609#define UNWIND_ESPFIX_STACK \ 711PTREGSCALL(clone)
610 movl %ss, %eax; \ 712PTREGSCALL(vfork)
611 /* see if on espfix stack */ \ 713PTREGSCALL(execve)
612 cmpw $__ESPFIX_SS, %ax; \ 714PTREGSCALL(sigaltstack)
613 jne 27f; \ 715PTREGSCALL(sigreturn)
614 movl $__KERNEL_DS, %eax; \ 716PTREGSCALL(rt_sigreturn)
615 movl %eax, %ds; \ 717PTREGSCALL(vm86)
616 movl %eax, %es; \ 718PTREGSCALL(vm86old)
617 /* switch to normal stack */ \ 719
618 FIXUP_ESPFIX_STACK; \ 720.macro FIXUP_ESPFIX_STACK
61927:; 721 /* since we are on a wrong stack, we cant make it a C code :( */
722 PER_CPU(gdt_page, %ebx)
723 GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
724 addl %esp, %eax
725 pushl $__KERNEL_DS
726 CFI_ADJUST_CFA_OFFSET 4
727 pushl %eax
728 CFI_ADJUST_CFA_OFFSET 4
729 lss (%esp), %esp
730 CFI_ADJUST_CFA_OFFSET -8
731.endm
732.macro UNWIND_ESPFIX_STACK
733 movl %ss, %eax
734 /* see if on espfix stack */
735 cmpw $__ESPFIX_SS, %ax
736 jne 27f
737 movl $__KERNEL_DS, %eax
738 movl %eax, %ds
739 movl %eax, %es
740 /* switch to normal stack */
741 FIXUP_ESPFIX_STACK
74227:
743.endm
620 744
621/* 745/*
622 * Build the entry stubs and pointer table with some assembler magic. 746 * Build the entry stubs and pointer table with some assembler magic.
@@ -672,7 +796,7 @@ common_interrupt:
672ENDPROC(common_interrupt) 796ENDPROC(common_interrupt)
673 CFI_ENDPROC 797 CFI_ENDPROC
674 798
675#define BUILD_INTERRUPT(name, nr) \ 799#define BUILD_INTERRUPT3(name, nr, fn) \
676ENTRY(name) \ 800ENTRY(name) \
677 RING0_INT_FRAME; \ 801 RING0_INT_FRAME; \
678 pushl $~(nr); \ 802 pushl $~(nr); \
@@ -680,13 +804,15 @@ ENTRY(name) \
680 SAVE_ALL; \ 804 SAVE_ALL; \
681 TRACE_IRQS_OFF \ 805 TRACE_IRQS_OFF \
682 movl %esp,%eax; \ 806 movl %esp,%eax; \
683 call smp_##name; \ 807 call fn; \
684 jmp ret_from_intr; \ 808 jmp ret_from_intr; \
685 CFI_ENDPROC; \ 809 CFI_ENDPROC; \
686ENDPROC(name) 810ENDPROC(name)
687 811
812#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name)
813
688/* The include is where all of the SMP etc. interrupts come from */ 814/* The include is where all of the SMP etc. interrupts come from */
689#include "entry_arch.h" 815#include <asm/entry_arch.h>
690 816
691ENTRY(coprocessor_error) 817ENTRY(coprocessor_error)
692 RING0_INT_FRAME 818 RING0_INT_FRAME
@@ -1068,7 +1194,10 @@ ENTRY(page_fault)
1068 CFI_ADJUST_CFA_OFFSET 4 1194 CFI_ADJUST_CFA_OFFSET 4
1069 ALIGN 1195 ALIGN
1070error_code: 1196error_code:
1071 /* the function address is in %fs's slot on the stack */ 1197 /* the function address is in %gs's slot on the stack */
1198 pushl %fs
1199 CFI_ADJUST_CFA_OFFSET 4
1200 /*CFI_REL_OFFSET fs, 0*/
1072 pushl %es 1201 pushl %es
1073 CFI_ADJUST_CFA_OFFSET 4 1202 CFI_ADJUST_CFA_OFFSET 4
1074 /*CFI_REL_OFFSET es, 0*/ 1203 /*CFI_REL_OFFSET es, 0*/
@@ -1097,20 +1226,15 @@ error_code:
1097 CFI_ADJUST_CFA_OFFSET 4 1226 CFI_ADJUST_CFA_OFFSET 4
1098 CFI_REL_OFFSET ebx, 0 1227 CFI_REL_OFFSET ebx, 0
1099 cld 1228 cld
1100 pushl %fs
1101 CFI_ADJUST_CFA_OFFSET 4
1102 /*CFI_REL_OFFSET fs, 0*/
1103 movl $(__KERNEL_PERCPU), %ecx 1229 movl $(__KERNEL_PERCPU), %ecx
1104 movl %ecx, %fs 1230 movl %ecx, %fs
1105 UNWIND_ESPFIX_STACK 1231 UNWIND_ESPFIX_STACK
1106 popl %ecx 1232 GS_TO_REG %ecx
1107 CFI_ADJUST_CFA_OFFSET -4 1233 movl PT_GS(%esp), %edi # get the function address
1108 /*CFI_REGISTER es, ecx*/
1109 movl PT_FS(%esp), %edi # get the function address
1110 movl PT_ORIG_EAX(%esp), %edx # get the error code 1234 movl PT_ORIG_EAX(%esp), %edx # get the error code
1111 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart 1235 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
1112 mov %ecx, PT_FS(%esp) 1236 REG_TO_PTGS %ecx
1113 /*CFI_REL_OFFSET fs, ES*/ 1237 SET_KERNEL_GS %ecx
1114 movl $(__USER_DS), %ecx 1238 movl $(__USER_DS), %ecx
1115 movl %ecx, %ds 1239 movl %ecx, %ds
1116 movl %ecx, %es 1240 movl %ecx, %es
@@ -1134,26 +1258,27 @@ END(page_fault)
1134 * by hand onto the new stack - while updating the return eip past 1258 * by hand onto the new stack - while updating the return eip past
1135 * the instruction that would have done it for sysenter. 1259 * the instruction that would have done it for sysenter.
1136 */ 1260 */
1137#define FIX_STACK(offset, ok, label) \ 1261.macro FIX_STACK offset ok label
1138 cmpw $__KERNEL_CS,4(%esp); \ 1262 cmpw $__KERNEL_CS, 4(%esp)
1139 jne ok; \ 1263 jne \ok
1140label: \ 1264\label:
1141 movl TSS_sysenter_sp0+offset(%esp),%esp; \ 1265 movl TSS_sysenter_sp0 + \offset(%esp), %esp
1142 CFI_DEF_CFA esp, 0; \ 1266 CFI_DEF_CFA esp, 0
1143 CFI_UNDEFINED eip; \ 1267 CFI_UNDEFINED eip
1144 pushfl; \ 1268 pushfl
1145 CFI_ADJUST_CFA_OFFSET 4; \ 1269 CFI_ADJUST_CFA_OFFSET 4
1146 pushl $__KERNEL_CS; \ 1270 pushl $__KERNEL_CS
1147 CFI_ADJUST_CFA_OFFSET 4; \ 1271 CFI_ADJUST_CFA_OFFSET 4
1148 pushl $sysenter_past_esp; \ 1272 pushl $sysenter_past_esp
1149 CFI_ADJUST_CFA_OFFSET 4; \ 1273 CFI_ADJUST_CFA_OFFSET 4
1150 CFI_REL_OFFSET eip, 0 1274 CFI_REL_OFFSET eip, 0
1275.endm
1151 1276
1152ENTRY(debug) 1277ENTRY(debug)
1153 RING0_INT_FRAME 1278 RING0_INT_FRAME
1154 cmpl $ia32_sysenter_target,(%esp) 1279 cmpl $ia32_sysenter_target,(%esp)
1155 jne debug_stack_correct 1280 jne debug_stack_correct
1156 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) 1281 FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
1157debug_stack_correct: 1282debug_stack_correct:
1158 pushl $-1 # mark this as an int 1283 pushl $-1 # mark this as an int
1159 CFI_ADJUST_CFA_OFFSET 4 1284 CFI_ADJUST_CFA_OFFSET 4
@@ -1211,7 +1336,7 @@ nmi_stack_correct:
1211 1336
1212nmi_stack_fixup: 1337nmi_stack_fixup:
1213 RING0_INT_FRAME 1338 RING0_INT_FRAME
1214 FIX_STACK(12,nmi_stack_correct, 1) 1339 FIX_STACK 12, nmi_stack_correct, 1
1215 jmp nmi_stack_correct 1340 jmp nmi_stack_correct
1216 1341
1217nmi_debug_stack_check: 1342nmi_debug_stack_check:
@@ -1222,7 +1347,7 @@ nmi_debug_stack_check:
1222 jb nmi_stack_correct 1347 jb nmi_stack_correct
1223 cmpl $debug_esp_fix_insn,(%esp) 1348 cmpl $debug_esp_fix_insn,(%esp)
1224 ja nmi_stack_correct 1349 ja nmi_stack_correct
1225 FIX_STACK(24,nmi_stack_correct, 1) 1350 FIX_STACK 24, nmi_stack_correct, 1
1226 jmp nmi_stack_correct 1351 jmp nmi_stack_correct
1227 1352
1228nmi_espfix_stack: 1353nmi_espfix_stack:
@@ -1234,7 +1359,7 @@ nmi_espfix_stack:
1234 CFI_ADJUST_CFA_OFFSET 4 1359 CFI_ADJUST_CFA_OFFSET 4
1235 pushl %esp 1360 pushl %esp
1236 CFI_ADJUST_CFA_OFFSET 4 1361 CFI_ADJUST_CFA_OFFSET 4
1237 addw $4, (%esp) 1362 addl $4, (%esp)
1238 /* copy the iret frame of 12 bytes */ 1363 /* copy the iret frame of 12 bytes */
1239 .rept 3 1364 .rept 3
1240 pushl 16(%esp) 1365 pushl 16(%esp)
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index a1346217e43c..83d1836b9467 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -48,10 +48,11 @@
48#include <asm/unistd.h> 48#include <asm/unistd.h>
49#include <asm/thread_info.h> 49#include <asm/thread_info.h>
50#include <asm/hw_irq.h> 50#include <asm/hw_irq.h>
51#include <asm/page.h> 51#include <asm/page_types.h>
52#include <asm/irqflags.h> 52#include <asm/irqflags.h>
53#include <asm/paravirt.h> 53#include <asm/paravirt.h>
54#include <asm/ftrace.h> 54#include <asm/ftrace.h>
55#include <asm/percpu.h>
55 56
56/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 57/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
57#include <linux/elf-em.h> 58#include <linux/elf-em.h>
@@ -76,20 +77,17 @@ ENTRY(ftrace_caller)
76 movq 8(%rbp), %rsi 77 movq 8(%rbp), %rsi
77 subq $MCOUNT_INSN_SIZE, %rdi 78 subq $MCOUNT_INSN_SIZE, %rdi
78 79
79.globl ftrace_call 80GLOBAL(ftrace_call)
80ftrace_call:
81 call ftrace_stub 81 call ftrace_stub
82 82
83 MCOUNT_RESTORE_FRAME 83 MCOUNT_RESTORE_FRAME
84 84
85#ifdef CONFIG_FUNCTION_GRAPH_TRACER 85#ifdef CONFIG_FUNCTION_GRAPH_TRACER
86.globl ftrace_graph_call 86GLOBAL(ftrace_graph_call)
87ftrace_graph_call:
88 jmp ftrace_stub 87 jmp ftrace_stub
89#endif 88#endif
90 89
91.globl ftrace_stub 90GLOBAL(ftrace_stub)
92ftrace_stub:
93 retq 91 retq
94END(ftrace_caller) 92END(ftrace_caller)
95 93
@@ -109,8 +107,7 @@ ENTRY(mcount)
109 jnz ftrace_graph_caller 107 jnz ftrace_graph_caller
110#endif 108#endif
111 109
112.globl ftrace_stub 110GLOBAL(ftrace_stub)
113ftrace_stub:
114 retq 111 retq
115 112
116trace: 113trace:
@@ -147,9 +144,7 @@ ENTRY(ftrace_graph_caller)
147 retq 144 retq
148END(ftrace_graph_caller) 145END(ftrace_graph_caller)
149 146
150 147GLOBAL(return_to_handler)
151.globl return_to_handler
152return_to_handler:
153 subq $80, %rsp 148 subq $80, %rsp
154 149
155 movq %rax, (%rsp) 150 movq %rax, (%rsp)
@@ -187,6 +182,7 @@ return_to_handler:
187ENTRY(native_usergs_sysret64) 182ENTRY(native_usergs_sysret64)
188 swapgs 183 swapgs
189 sysretq 184 sysretq
185ENDPROC(native_usergs_sysret64)
190#endif /* CONFIG_PARAVIRT */ 186#endif /* CONFIG_PARAVIRT */
191 187
192 188
@@ -209,7 +205,7 @@ ENTRY(native_usergs_sysret64)
209 205
210 /* %rsp:at FRAMEEND */ 206 /* %rsp:at FRAMEEND */
211 .macro FIXUP_TOP_OF_STACK tmp offset=0 207 .macro FIXUP_TOP_OF_STACK tmp offset=0
212 movq %gs:pda_oldrsp,\tmp 208 movq PER_CPU_VAR(old_rsp),\tmp
213 movq \tmp,RSP+\offset(%rsp) 209 movq \tmp,RSP+\offset(%rsp)
214 movq $__USER_DS,SS+\offset(%rsp) 210 movq $__USER_DS,SS+\offset(%rsp)
215 movq $__USER_CS,CS+\offset(%rsp) 211 movq $__USER_CS,CS+\offset(%rsp)
@@ -220,7 +216,7 @@ ENTRY(native_usergs_sysret64)
220 216
221 .macro RESTORE_TOP_OF_STACK tmp offset=0 217 .macro RESTORE_TOP_OF_STACK tmp offset=0
222 movq RSP+\offset(%rsp),\tmp 218 movq RSP+\offset(%rsp),\tmp
223 movq \tmp,%gs:pda_oldrsp 219 movq \tmp,PER_CPU_VAR(old_rsp)
224 movq EFLAGS+\offset(%rsp),\tmp 220 movq EFLAGS+\offset(%rsp),\tmp
225 movq \tmp,R11+\offset(%rsp) 221 movq \tmp,R11+\offset(%rsp)
226 .endm 222 .endm
@@ -336,15 +332,15 @@ ENTRY(save_args)
336 je 1f 332 je 1f
337 SWAPGS 333 SWAPGS
338 /* 334 /*
339 * irqcount is used to check if a CPU is already on an interrupt stack 335 * irq_count is used to check if a CPU is already on an interrupt stack
340 * or not. While this is essentially redundant with preempt_count it is 336 * or not. While this is essentially redundant with preempt_count it is
341 * a little cheaper to use a separate counter in the PDA (short of 337 * a little cheaper to use a separate counter in the PDA (short of
342 * moving irq_enter into assembly, which would be too much work) 338 * moving irq_enter into assembly, which would be too much work)
343 */ 339 */
3441: incl %gs:pda_irqcount 3401: incl PER_CPU_VAR(irq_count)
345 jne 2f 341 jne 2f
346 popq_cfi %rax /* move return address... */ 342 popq_cfi %rax /* move return address... */
347 mov %gs:pda_irqstackptr,%rsp 343 mov PER_CPU_VAR(irq_stack_ptr),%rsp
348 EMPTY_FRAME 0 344 EMPTY_FRAME 0
349 pushq_cfi %rbp /* backlink for unwinder */ 345 pushq_cfi %rbp /* backlink for unwinder */
350 pushq_cfi %rax /* ... to the new stack */ 346 pushq_cfi %rax /* ... to the new stack */
@@ -409,6 +405,8 @@ END(save_paranoid)
409ENTRY(ret_from_fork) 405ENTRY(ret_from_fork)
410 DEFAULT_FRAME 406 DEFAULT_FRAME
411 407
408 LOCK ; btr $TIF_FORK,TI_flags(%r8)
409
412 push kernel_eflags(%rip) 410 push kernel_eflags(%rip)
413 CFI_ADJUST_CFA_OFFSET 8 411 CFI_ADJUST_CFA_OFFSET 8
414 popf # reset kernel eflags 412 popf # reset kernel eflags
@@ -468,7 +466,7 @@ END(ret_from_fork)
468ENTRY(system_call) 466ENTRY(system_call)
469 CFI_STARTPROC simple 467 CFI_STARTPROC simple
470 CFI_SIGNAL_FRAME 468 CFI_SIGNAL_FRAME
471 CFI_DEF_CFA rsp,PDA_STACKOFFSET 469 CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
472 CFI_REGISTER rip,rcx 470 CFI_REGISTER rip,rcx
473 /*CFI_REGISTER rflags,r11*/ 471 /*CFI_REGISTER rflags,r11*/
474 SWAPGS_UNSAFE_STACK 472 SWAPGS_UNSAFE_STACK
@@ -479,8 +477,8 @@ ENTRY(system_call)
479 */ 477 */
480ENTRY(system_call_after_swapgs) 478ENTRY(system_call_after_swapgs)
481 479
482 movq %rsp,%gs:pda_oldrsp 480 movq %rsp,PER_CPU_VAR(old_rsp)
483 movq %gs:pda_kernelstack,%rsp 481 movq PER_CPU_VAR(kernel_stack),%rsp
484 /* 482 /*
485 * No need to follow this irqs off/on section - it's straight 483 * No need to follow this irqs off/on section - it's straight
486 * and short: 484 * and short:
@@ -523,7 +521,7 @@ sysret_check:
523 CFI_REGISTER rip,rcx 521 CFI_REGISTER rip,rcx
524 RESTORE_ARGS 0,-ARG_SKIP,1 522 RESTORE_ARGS 0,-ARG_SKIP,1
525 /*CFI_REGISTER rflags,r11*/ 523 /*CFI_REGISTER rflags,r11*/
526 movq %gs:pda_oldrsp, %rsp 524 movq PER_CPU_VAR(old_rsp), %rsp
527 USERGS_SYSRET64 525 USERGS_SYSRET64
528 526
529 CFI_RESTORE_STATE 527 CFI_RESTORE_STATE
@@ -630,16 +628,14 @@ tracesys:
630 * Syscall return path ending with IRET. 628 * Syscall return path ending with IRET.
631 * Has correct top of stack, but partial stack frame. 629 * Has correct top of stack, but partial stack frame.
632 */ 630 */
633 .globl int_ret_from_sys_call 631GLOBAL(int_ret_from_sys_call)
634 .globl int_with_check
635int_ret_from_sys_call:
636 DISABLE_INTERRUPTS(CLBR_NONE) 632 DISABLE_INTERRUPTS(CLBR_NONE)
637 TRACE_IRQS_OFF 633 TRACE_IRQS_OFF
638 testl $3,CS-ARGOFFSET(%rsp) 634 testl $3,CS-ARGOFFSET(%rsp)
639 je retint_restore_args 635 je retint_restore_args
640 movl $_TIF_ALLWORK_MASK,%edi 636 movl $_TIF_ALLWORK_MASK,%edi
641 /* edi: mask to check */ 637 /* edi: mask to check */
642int_with_check: 638GLOBAL(int_with_check)
643 LOCKDEP_SYS_EXIT_IRQ 639 LOCKDEP_SYS_EXIT_IRQ
644 GET_THREAD_INFO(%rcx) 640 GET_THREAD_INFO(%rcx)
645 movl TI_flags(%rcx),%edx 641 movl TI_flags(%rcx),%edx
@@ -833,11 +829,11 @@ common_interrupt:
833 XCPT_FRAME 829 XCPT_FRAME
834 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ 830 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
835 interrupt do_IRQ 831 interrupt do_IRQ
836 /* 0(%rsp): oldrsp-ARGOFFSET */ 832 /* 0(%rsp): old_rsp-ARGOFFSET */
837ret_from_intr: 833ret_from_intr:
838 DISABLE_INTERRUPTS(CLBR_NONE) 834 DISABLE_INTERRUPTS(CLBR_NONE)
839 TRACE_IRQS_OFF 835 TRACE_IRQS_OFF
840 decl %gs:pda_irqcount 836 decl PER_CPU_VAR(irq_count)
841 leaveq 837 leaveq
842 CFI_DEF_CFA_REGISTER rsp 838 CFI_DEF_CFA_REGISTER rsp
843 CFI_ADJUST_CFA_OFFSET -8 839 CFI_ADJUST_CFA_OFFSET -8
@@ -982,8 +978,10 @@ apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
982 irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt 978 irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
983#endif 979#endif
984 980
981#ifdef CONFIG_X86_UV
985apicinterrupt UV_BAU_MESSAGE \ 982apicinterrupt UV_BAU_MESSAGE \
986 uv_bau_message_intr1 uv_bau_message_interrupt 983 uv_bau_message_intr1 uv_bau_message_interrupt
984#endif
987apicinterrupt LOCAL_TIMER_VECTOR \ 985apicinterrupt LOCAL_TIMER_VECTOR \
988 apic_timer_interrupt smp_apic_timer_interrupt 986 apic_timer_interrupt smp_apic_timer_interrupt
989 987
@@ -1073,10 +1071,10 @@ ENTRY(\sym)
1073 TRACE_IRQS_OFF 1071 TRACE_IRQS_OFF
1074 movq %rsp,%rdi /* pt_regs pointer */ 1072 movq %rsp,%rdi /* pt_regs pointer */
1075 xorl %esi,%esi /* no error code */ 1073 xorl %esi,%esi /* no error code */
1076 movq %gs:pda_data_offset, %rbp 1074 PER_CPU(init_tss, %rbp)
1077 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) 1075 subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
1078 call \do_sym 1076 call \do_sym
1079 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) 1077 addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
1080 jmp paranoid_exit /* %ebx: no swapgs flag */ 1078 jmp paranoid_exit /* %ebx: no swapgs flag */
1081 CFI_ENDPROC 1079 CFI_ENDPROC
1082END(\sym) 1080END(\sym)
@@ -1138,7 +1136,7 @@ ENTRY(native_load_gs_index)
1138 CFI_STARTPROC 1136 CFI_STARTPROC
1139 pushf 1137 pushf
1140 CFI_ADJUST_CFA_OFFSET 8 1138 CFI_ADJUST_CFA_OFFSET 8
1141 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) 1139 DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
1142 SWAPGS 1140 SWAPGS
1143gs_change: 1141gs_change:
1144 movl %edi,%gs 1142 movl %edi,%gs
@@ -1260,14 +1258,14 @@ ENTRY(call_softirq)
1260 CFI_REL_OFFSET rbp,0 1258 CFI_REL_OFFSET rbp,0
1261 mov %rsp,%rbp 1259 mov %rsp,%rbp
1262 CFI_DEF_CFA_REGISTER rbp 1260 CFI_DEF_CFA_REGISTER rbp
1263 incl %gs:pda_irqcount 1261 incl PER_CPU_VAR(irq_count)
1264 cmove %gs:pda_irqstackptr,%rsp 1262 cmove PER_CPU_VAR(irq_stack_ptr),%rsp
1265 push %rbp # backlink for old unwinder 1263 push %rbp # backlink for old unwinder
1266 call __do_softirq 1264 call __do_softirq
1267 leaveq 1265 leaveq
1268 CFI_DEF_CFA_REGISTER rsp 1266 CFI_DEF_CFA_REGISTER rsp
1269 CFI_ADJUST_CFA_OFFSET -8 1267 CFI_ADJUST_CFA_OFFSET -8
1270 decl %gs:pda_irqcount 1268 decl PER_CPU_VAR(irq_count)
1271 ret 1269 ret
1272 CFI_ENDPROC 1270 CFI_ENDPROC
1273END(call_softirq) 1271END(call_softirq)
@@ -1297,15 +1295,15 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1297 movq %rdi, %rsp # we don't return, adjust the stack frame 1295 movq %rdi, %rsp # we don't return, adjust the stack frame
1298 CFI_ENDPROC 1296 CFI_ENDPROC
1299 DEFAULT_FRAME 1297 DEFAULT_FRAME
130011: incl %gs:pda_irqcount 129811: incl PER_CPU_VAR(irq_count)
1301 movq %rsp,%rbp 1299 movq %rsp,%rbp
1302 CFI_DEF_CFA_REGISTER rbp 1300 CFI_DEF_CFA_REGISTER rbp
1303 cmovzq %gs:pda_irqstackptr,%rsp 1301 cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
1304 pushq %rbp # backlink for old unwinder 1302 pushq %rbp # backlink for old unwinder
1305 call xen_evtchn_do_upcall 1303 call xen_evtchn_do_upcall
1306 popq %rsp 1304 popq %rsp
1307 CFI_DEF_CFA_REGISTER rsp 1305 CFI_DEF_CFA_REGISTER rsp
1308 decl %gs:pda_irqcount 1306 decl PER_CPU_VAR(irq_count)
1309 jmp error_exit 1307 jmp error_exit
1310 CFI_ENDPROC 1308 CFI_ENDPROC
1311END(do_hypervisor_callback) 1309END(do_hypervisor_callback)
diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c
deleted file mode 100644
index 53699c931ad4..000000000000
--- a/arch/x86/kernel/es7000_32.c
+++ /dev/null
@@ -1,378 +0,0 @@
1/*
2 * Written by: Garry Forsgren, Unisys Corporation
3 * Natalie Protasevich, Unisys Corporation
4 * This file contains the code to configure and interface
5 * with Unisys ES7000 series hardware system manager.
6 *
7 * Copyright (c) 2003 Unisys Corporation. All Rights Reserved.
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it would be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write the Free Software Foundation, Inc., 59
19 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
20 *
21 * Contact information: Unisys Corporation, Township Line & Union Meeting
22 * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or:
23 *
24 * http://www.unisys.com
25 */
26
27#include <linux/module.h>
28#include <linux/types.h>
29#include <linux/kernel.h>
30#include <linux/smp.h>
31#include <linux/string.h>
32#include <linux/spinlock.h>
33#include <linux/errno.h>
34#include <linux/notifier.h>
35#include <linux/reboot.h>
36#include <linux/init.h>
37#include <linux/acpi.h>
38#include <asm/io.h>
39#include <asm/nmi.h>
40#include <asm/smp.h>
41#include <asm/atomic.h>
42#include <asm/apicdef.h>
43#include <mach_mpparse.h>
44#include <asm/genapic.h>
45#include <asm/setup.h>
46
47/*
48 * ES7000 chipsets
49 */
50
51#define NON_UNISYS 0
52#define ES7000_CLASSIC 1
53#define ES7000_ZORRO 2
54
55
56#define MIP_REG 1
57#define MIP_PSAI_REG 4
58
59#define MIP_BUSY 1
60#define MIP_SPIN 0xf0000
61#define MIP_VALID 0x0100000000000000ULL
62#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff)
63
64#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff)
65
66struct mip_reg_info {
67 unsigned long long mip_info;
68 unsigned long long delivery_info;
69 unsigned long long host_reg;
70 unsigned long long mip_reg;
71};
72
73struct part_info {
74 unsigned char type;
75 unsigned char length;
76 unsigned char part_id;
77 unsigned char apic_mode;
78 unsigned long snum;
79 char ptype[16];
80 char sname[64];
81 char pname[64];
82};
83
84struct psai {
85 unsigned long long entry_type;
86 unsigned long long addr;
87 unsigned long long bep_addr;
88};
89
90struct es7000_mem_info {
91 unsigned char type;
92 unsigned char length;
93 unsigned char resv[6];
94 unsigned long long start;
95 unsigned long long size;
96};
97
98struct es7000_oem_table {
99 unsigned long long hdr;
100 struct mip_reg_info mip;
101 struct part_info pif;
102 struct es7000_mem_info shm;
103 struct psai psai;
104};
105
106#ifdef CONFIG_ACPI
107
108struct oem_table {
109 struct acpi_table_header Header;
110 u32 OEMTableAddr;
111 u32 OEMTableSize;
112};
113
114extern int find_unisys_acpi_oem_table(unsigned long *oem_addr);
115extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr);
116#endif
117
118struct mip_reg {
119 unsigned long long off_0;
120 unsigned long long off_8;
121 unsigned long long off_10;
122 unsigned long long off_18;
123 unsigned long long off_20;
124 unsigned long long off_28;
125 unsigned long long off_30;
126 unsigned long long off_38;
127};
128
129#define MIP_SW_APIC 0x1020b
130#define MIP_FUNC(VALUE) (VALUE & 0xff)
131
132/*
133 * ES7000 Globals
134 */
135
136static volatile unsigned long *psai = NULL;
137static struct mip_reg *mip_reg;
138static struct mip_reg *host_reg;
139static int mip_port;
140static unsigned long mip_addr, host_addr;
141
142int es7000_plat;
143
144/*
145 * GSI override for ES7000 platforms.
146 */
147
148static unsigned int base;
149
150static int
151es7000_rename_gsi(int ioapic, int gsi)
152{
153 if (es7000_plat == ES7000_ZORRO)
154 return gsi;
155
156 if (!base) {
157 int i;
158 for (i = 0; i < nr_ioapics; i++)
159 base += nr_ioapic_registers[i];
160 }
161
162 if (!ioapic && (gsi < 16))
163 gsi += base;
164 return gsi;
165}
166
167static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
168{
169 unsigned long vect = 0, psaival = 0;
170
171 if (psai == NULL)
172 return -1;
173
174 vect = ((unsigned long)__pa(eip)/0x1000) << 16;
175 psaival = (0x1000000 | vect | cpu);
176
177 while (*psai & 0x1000000)
178 ;
179
180 *psai = psaival;
181
182 return 0;
183}
184
185static void noop_wait_for_deassert(atomic_t *deassert_not_used)
186{
187}
188
189static int __init es7000_update_genapic(void)
190{
191 genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
192
193 /* MPENTIUMIII */
194 if (boot_cpu_data.x86 == 6 &&
195 (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) {
196 es7000_update_genapic_to_cluster();
197 genapic->wait_for_init_deassert = noop_wait_for_deassert;
198 genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
199 }
200
201 return 0;
202}
203
204void __init
205setup_unisys(void)
206{
207 /*
208 * Determine the generation of the ES7000 currently running.
209 *
210 * es7000_plat = 1 if the machine is a 5xx ES7000 box
211 * es7000_plat = 2 if the machine is a x86_64 ES7000 box
212 *
213 */
214 if (!(boot_cpu_data.x86 <= 15 && boot_cpu_data.x86_model <= 2))
215 es7000_plat = ES7000_ZORRO;
216 else
217 es7000_plat = ES7000_CLASSIC;
218 ioapic_renumber_irq = es7000_rename_gsi;
219
220 x86_quirks->update_genapic = es7000_update_genapic;
221}
222
223/*
224 * Parse the OEM Table
225 */
226
227int __init
228parse_unisys_oem (char *oemptr)
229{
230 int i;
231 int success = 0;
232 unsigned char type, size;
233 unsigned long val;
234 char *tp = NULL;
235 struct psai *psaip = NULL;
236 struct mip_reg_info *mi;
237 struct mip_reg *host, *mip;
238
239 tp = oemptr;
240
241 tp += 8;
242
243 for (i=0; i <= 6; i++) {
244 type = *tp++;
245 size = *tp++;
246 tp -= 2;
247 switch (type) {
248 case MIP_REG:
249 mi = (struct mip_reg_info *)tp;
250 val = MIP_RD_LO(mi->host_reg);
251 host_addr = val;
252 host = (struct mip_reg *)val;
253 host_reg = __va(host);
254 val = MIP_RD_LO(mi->mip_reg);
255 mip_port = MIP_PORT(mi->mip_info);
256 mip_addr = val;
257 mip = (struct mip_reg *)val;
258 mip_reg = __va(mip);
259 pr_debug("es7000_mipcfg: host_reg = 0x%lx \n",
260 (unsigned long)host_reg);
261 pr_debug("es7000_mipcfg: mip_reg = 0x%lx \n",
262 (unsigned long)mip_reg);
263 success++;
264 break;
265 case MIP_PSAI_REG:
266 psaip = (struct psai *)tp;
267 if (tp != NULL) {
268 if (psaip->addr)
269 psai = __va(psaip->addr);
270 else
271 psai = NULL;
272 success++;
273 }
274 break;
275 default:
276 break;
277 }
278 tp += size;
279 }
280
281 if (success < 2) {
282 es7000_plat = NON_UNISYS;
283 } else
284 setup_unisys();
285 return es7000_plat;
286}
287
288#ifdef CONFIG_ACPI
289static unsigned long oem_addrX;
290static unsigned long oem_size;
291int __init find_unisys_acpi_oem_table(unsigned long *oem_addr)
292{
293 struct acpi_table_header *header = NULL;
294 int i = 0;
295
296 while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) {
297 if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) {
298 struct oem_table *t = (struct oem_table *)header;
299
300 oem_addrX = t->OEMTableAddr;
301 oem_size = t->OEMTableSize;
302
303 *oem_addr = (unsigned long)__acpi_map_table(oem_addrX,
304 oem_size);
305 return 0;
306 }
307 }
308 return -1;
309}
310
311void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr)
312{
313}
314#endif
315
316static void
317es7000_spin(int n)
318{
319 int i = 0;
320
321 while (i++ < n)
322 rep_nop();
323}
324
325static int __init
326es7000_mip_write(struct mip_reg *mip_reg)
327{
328 int status = 0;
329 int spin;
330
331 spin = MIP_SPIN;
332 while (((unsigned long long)host_reg->off_38 &
333 (unsigned long long)MIP_VALID) != 0) {
334 if (--spin <= 0) {
335 printk("es7000_mip_write: Timeout waiting for Host Valid Flag");
336 return -1;
337 }
338 es7000_spin(MIP_SPIN);
339 }
340
341 memcpy(host_reg, mip_reg, sizeof(struct mip_reg));
342 outb(1, mip_port);
343
344 spin = MIP_SPIN;
345
346 while (((unsigned long long)mip_reg->off_38 &
347 (unsigned long long)MIP_VALID) == 0) {
348 if (--spin <= 0) {
349 printk("es7000_mip_write: Timeout waiting for MIP Valid Flag");
350 return -1;
351 }
352 es7000_spin(MIP_SPIN);
353 }
354
355 status = ((unsigned long long)mip_reg->off_0 &
356 (unsigned long long)0xffff0000000000ULL) >> 48;
357 mip_reg->off_38 = ((unsigned long long)mip_reg->off_38 &
358 (unsigned long long)~MIP_VALID);
359 return status;
360}
361
362void __init
363es7000_sw_apic(void)
364{
365 if (es7000_plat) {
366 int mip_status;
367 struct mip_reg es7000_mip_reg;
368
369 printk("ES7000: Enabling APIC mode.\n");
370 memset(&es7000_mip_reg, 0, sizeof(struct mip_reg));
371 es7000_mip_reg.off_0 = MIP_SW_APIC;
372 es7000_mip_reg.off_38 = (MIP_VALID);
373 while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0)
374 printk("es7000_sw_apic: command failed, status = %x\n",
375 mip_status);
376 return;
377 }
378}
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
deleted file mode 100644
index 2bced78b0b8e..000000000000
--- a/arch/x86/kernel/genapic_64.c
+++ /dev/null
@@ -1,82 +0,0 @@
1/*
2 * Copyright 2004 James Cleverdon, IBM.
3 * Subject to the GNU Public License, v.2
4 *
5 * Generic APIC sub-arch probe layer.
6 *
7 * Hacked for x86-64 by James Cleverdon from i386 architecture code by
8 * Martin Bligh, Andi Kleen, James Bottomley, John Stultz, and
9 * James Cleverdon.
10 */
11#include <linux/threads.h>
12#include <linux/cpumask.h>
13#include <linux/string.h>
14#include <linux/module.h>
15#include <linux/kernel.h>
16#include <linux/ctype.h>
17#include <linux/init.h>
18#include <linux/hardirq.h>
19#include <linux/dmar.h>
20
21#include <asm/smp.h>
22#include <asm/ipi.h>
23#include <asm/genapic.h>
24#include <asm/setup.h>
25
26extern struct genapic apic_flat;
27extern struct genapic apic_physflat;
28extern struct genapic apic_x2xpic_uv_x;
29extern struct genapic apic_x2apic_phys;
30extern struct genapic apic_x2apic_cluster;
31
32struct genapic __read_mostly *genapic = &apic_flat;
33
34static struct genapic *apic_probe[] __initdata = {
35 &apic_x2apic_uv_x,
36 &apic_x2apic_phys,
37 &apic_x2apic_cluster,
38 &apic_physflat,
39 NULL,
40};
41
42/*
43 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
44 */
45void __init setup_apic_routing(void)
46{
47 if (genapic == &apic_x2apic_phys || genapic == &apic_x2apic_cluster) {
48 if (!intr_remapping_enabled)
49 genapic = &apic_flat;
50 }
51
52 if (genapic == &apic_flat) {
53 if (max_physical_apicid >= 8)
54 genapic = &apic_physflat;
55 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
56 }
57
58 if (x86_quirks->update_genapic)
59 x86_quirks->update_genapic();
60}
61
62/* Same for both flat and physical. */
63
64void apic_send_IPI_self(int vector)
65{
66 __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
67}
68
69int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
70{
71 int i;
72
73 for (i = 0; apic_probe[i]; ++i) {
74 if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
75 genapic = apic_probe[i];
76 printk(KERN_INFO "Setting APIC routing to %s.\n",
77 genapic->name);
78 return 1;
79 }
80 }
81 return 0;
82}
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index b9a4d8c4b935..f5b272247690 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -26,27 +26,6 @@
26#include <asm/bios_ebda.h> 26#include <asm/bios_ebda.h>
27#include <asm/trampoline.h> 27#include <asm/trampoline.h>
28 28
29/* boot cpu pda */
30static struct x8664_pda _boot_cpu_pda;
31
32#ifdef CONFIG_SMP
33/*
34 * We install an empty cpu_pda pointer table to indicate to early users
35 * (numa_set_node) that the cpu_pda pointer table for cpus other than
36 * the boot cpu is not yet setup.
37 */
38static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata;
39#else
40static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
41#endif
42
43void __init x86_64_init_pda(void)
44{
45 _cpu_pda = __cpu_pda;
46 cpu_pda(0) = &_boot_cpu_pda;
47 pda_init(0);
48}
49
50static void __init zap_identity_mappings(void) 29static void __init zap_identity_mappings(void)
51{ 30{
52 pgd_t *pgd = pgd_offset_k(0UL); 31 pgd_t *pgd = pgd_offset_k(0UL);
@@ -112,8 +91,6 @@ void __init x86_64_start_kernel(char * real_mode_data)
112 if (console_loglevel == 10) 91 if (console_loglevel == 10)
113 early_printk("Kernel alive\n"); 92 early_printk("Kernel alive\n");
114 93
115 x86_64_init_pda();
116
117 x86_64_start_reservations(real_mode_data); 94 x86_64_start_reservations(real_mode_data);
118} 95}
119 96
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index e835b4eea70b..c32ca19d591a 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -11,14 +11,15 @@
11#include <linux/init.h> 11#include <linux/init.h>
12#include <linux/linkage.h> 12#include <linux/linkage.h>
13#include <asm/segment.h> 13#include <asm/segment.h>
14#include <asm/page.h> 14#include <asm/page_types.h>
15#include <asm/pgtable.h> 15#include <asm/pgtable_types.h>
16#include <asm/desc.h> 16#include <asm/desc.h>
17#include <asm/cache.h> 17#include <asm/cache.h>
18#include <asm/thread_info.h> 18#include <asm/thread_info.h>
19#include <asm/asm-offsets.h> 19#include <asm/asm-offsets.h>
20#include <asm/setup.h> 20#include <asm/setup.h>
21#include <asm/processor-flags.h> 21#include <asm/processor-flags.h>
22#include <asm/percpu.h>
22 23
23/* Physical address */ 24/* Physical address */
24#define pa(X) ((X) - __PAGE_OFFSET) 25#define pa(X) ((X) - __PAGE_OFFSET)
@@ -429,14 +430,34 @@ is386: movl $2,%ecx # set MP
429 ljmp $(__KERNEL_CS),$1f 430 ljmp $(__KERNEL_CS),$1f
4301: movl $(__KERNEL_DS),%eax # reload all the segment registers 4311: movl $(__KERNEL_DS),%eax # reload all the segment registers
431 movl %eax,%ss # after changing gdt. 432 movl %eax,%ss # after changing gdt.
432 movl %eax,%fs # gets reset once there's real percpu
433 433
434 movl $(__USER_DS),%eax # DS/ES contains default USER segment 434 movl $(__USER_DS),%eax # DS/ES contains default USER segment
435 movl %eax,%ds 435 movl %eax,%ds
436 movl %eax,%es 436 movl %eax,%es
437 437
438 xorl %eax,%eax # Clear GS and LDT 438 movl $(__KERNEL_PERCPU), %eax
439 movl %eax,%fs # set this cpu's percpu
440
441#ifdef CONFIG_CC_STACKPROTECTOR
442 /*
443 * The linker can't handle this by relocation. Manually set
444 * base address in stack canary segment descriptor.
445 */
446 cmpb $0,ready
447 jne 1f
448 movl $per_cpu__gdt_page,%eax
449 movl $per_cpu__stack_canary,%ecx
450 subl $20, %ecx
451 movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
452 shrl $16, %ecx
453 movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
454 movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax)
4551:
456#endif
457 movl $(__KERNEL_STACK_CANARY),%eax
439 movl %eax,%gs 458 movl %eax,%gs
459
460 xorl %eax,%eax # Clear LDT
440 lldt %ax 461 lldt %ax
441 462
442 cld # gcc2 wants the direction flag cleared at all times 463 cld # gcc2 wants the direction flag cleared at all times
@@ -446,8 +467,6 @@ is386: movl $2,%ecx # set MP
446 movb $1, ready 467 movb $1, ready
447 cmpb $0,%cl # the first CPU calls start_kernel 468 cmpb $0,%cl # the first CPU calls start_kernel
448 je 1f 469 je 1f
449 movl $(__KERNEL_PERCPU), %eax
450 movl %eax,%fs # set this cpu's percpu
451 movl (stack_start), %esp 470 movl (stack_start), %esp
4521: 4711:
453#endif /* CONFIG_SMP */ 472#endif /* CONFIG_SMP */
@@ -548,12 +567,8 @@ early_fault:
548 pushl %eax 567 pushl %eax
549 pushl %edx /* trapno */ 568 pushl %edx /* trapno */
550 pushl $fault_msg 569 pushl $fault_msg
551#ifdef CONFIG_EARLY_PRINTK
552 call early_printk
553#else
554 call printk 570 call printk
555#endif 571#endif
556#endif
557 call dump_stack 572 call dump_stack
558hlt_loop: 573hlt_loop:
559 hlt 574 hlt
@@ -580,11 +595,10 @@ ignore_int:
580 pushl 32(%esp) 595 pushl 32(%esp)
581 pushl 40(%esp) 596 pushl 40(%esp)
582 pushl $int_msg 597 pushl $int_msg
583#ifdef CONFIG_EARLY_PRINTK
584 call early_printk
585#else
586 call printk 598 call printk
587#endif 599
600 call dump_stack
601
588 addl $(5*4),%esp 602 addl $(5*4),%esp
589 popl %ds 603 popl %ds
590 popl %es 604 popl %es
@@ -660,7 +674,7 @@ early_recursion_flag:
660 .long 0 674 .long 0
661 675
662int_msg: 676int_msg:
663 .asciz "Unknown interrupt or fault at EIP %p %p %p\n" 677 .asciz "Unknown interrupt or fault at: %p %p %p\n"
664 678
665fault_msg: 679fault_msg:
666/* fault info: */ 680/* fault info: */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 0e275d495563..54b29bb24e71 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -19,6 +19,7 @@
19#include <asm/msr.h> 19#include <asm/msr.h>
20#include <asm/cache.h> 20#include <asm/cache.h>
21#include <asm/processor-flags.h> 21#include <asm/processor-flags.h>
22#include <asm/percpu.h>
22 23
23#ifdef CONFIG_PARAVIRT 24#ifdef CONFIG_PARAVIRT
24#include <asm/asm-offsets.h> 25#include <asm/asm-offsets.h>
@@ -226,12 +227,15 @@ ENTRY(secondary_startup_64)
226 movl %eax,%fs 227 movl %eax,%fs
227 movl %eax,%gs 228 movl %eax,%gs
228 229
229 /* 230 /* Set up %gs.
230 * Setup up a dummy PDA. this is just for some early bootup code 231 *
231 * that does in_interrupt() 232 * The base of %gs always points to the bottom of the irqstack
232 */ 233 * union. If the stack protector canary is enabled, it is
234 * located at %gs:40. Note that, on SMP, the boot cpu uses
235 * init data section till per cpu areas are set up.
236 */
233 movl $MSR_GS_BASE,%ecx 237 movl $MSR_GS_BASE,%ecx
234 movq $empty_zero_page,%rax 238 movq initial_gs(%rip),%rax
235 movq %rax,%rdx 239 movq %rax,%rdx
236 shrq $32,%rdx 240 shrq $32,%rdx
237 wrmsr 241 wrmsr
@@ -257,6 +261,8 @@ ENTRY(secondary_startup_64)
257 .align 8 261 .align 8
258 ENTRY(initial_code) 262 ENTRY(initial_code)
259 .quad x86_64_start_kernel 263 .quad x86_64_start_kernel
264 ENTRY(initial_gs)
265 .quad INIT_PER_CPU_VAR(irq_stack_union)
260 __FINITDATA 266 __FINITDATA
261 267
262 ENTRY(stack_start) 268 ENTRY(stack_start)
@@ -323,8 +329,6 @@ early_idt_ripmsg:
323#endif /* CONFIG_EARLY_PRINTK */ 329#endif /* CONFIG_EARLY_PRINTK */
324 .previous 330 .previous
325 331
326.balign PAGE_SIZE
327
328#define NEXT_PAGE(name) \ 332#define NEXT_PAGE(name) \
329 .balign PAGE_SIZE; \ 333 .balign PAGE_SIZE; \
330ENTRY(name) 334ENTRY(name)
@@ -401,7 +405,8 @@ NEXT_PAGE(level2_spare_pgt)
401 .globl early_gdt_descr 405 .globl early_gdt_descr
402early_gdt_descr: 406early_gdt_descr:
403 .word GDT_ENTRIES*8-1 407 .word GDT_ENTRIES*8-1
404 .quad per_cpu__gdt_page 408early_gdt_descr_base:
409 .quad INIT_PER_CPU_VAR(gdt_page)
405 410
406ENTRY(phys_base) 411ENTRY(phys_base)
407 /* This must match the first entry in level2_kernel_pgt */ 412 /* This must match the first entry in level2_kernel_pgt */
@@ -412,7 +417,7 @@ ENTRY(phys_base)
412 .section .bss, "aw", @nobits 417 .section .bss, "aw", @nobits
413 .align L1_CACHE_BYTES 418 .align L1_CACHE_BYTES
414ENTRY(idt_table) 419ENTRY(idt_table)
415 .skip 256 * 16 420 .skip IDT_ENTRIES * 16
416 421
417 .section .bss.page_aligned, "aw", @nobits 422 .section .bss.page_aligned, "aw", @nobits
418 .align PAGE_SIZE 423 .align PAGE_SIZE
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index 11d5093eb281..df89102bef80 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -22,7 +22,6 @@
22#include <asm/pgtable.h> 22#include <asm/pgtable.h>
23#include <asm/desc.h> 23#include <asm/desc.h>
24#include <asm/apic.h> 24#include <asm/apic.h>
25#include <asm/arch_hooks.h>
26#include <asm/i8259.h> 25#include <asm/i8259.h>
27 26
28/* 27/*
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index b12208f4dfee..e41980a373ab 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -131,9 +131,8 @@ static int do_iopl(unsigned int level, struct pt_regs *regs)
131} 131}
132 132
133#ifdef CONFIG_X86_32 133#ifdef CONFIG_X86_32
134asmlinkage long sys_iopl(unsigned long regsp) 134long sys_iopl(struct pt_regs *regs)
135{ 135{
136 struct pt_regs *regs = (struct pt_regs *)&regsp;
137 unsigned int level = regs->bx; 136 unsigned int level = regs->bx;
138 struct thread_struct *t = &current->thread; 137 struct thread_struct *t = &current->thread;
139 int rc; 138 int rc;
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
deleted file mode 100644
index 285bbf8831fa..000000000000
--- a/arch/x86/kernel/ipi.c
+++ /dev/null
@@ -1,190 +0,0 @@
1#include <linux/cpumask.h>
2#include <linux/interrupt.h>
3#include <linux/init.h>
4
5#include <linux/mm.h>
6#include <linux/delay.h>
7#include <linux/spinlock.h>
8#include <linux/kernel_stat.h>
9#include <linux/mc146818rtc.h>
10#include <linux/cache.h>
11#include <linux/cpu.h>
12#include <linux/module.h>
13
14#include <asm/smp.h>
15#include <asm/mtrr.h>
16#include <asm/tlbflush.h>
17#include <asm/mmu_context.h>
18#include <asm/apic.h>
19#include <asm/proto.h>
20
21#ifdef CONFIG_X86_32
22#include <mach_apic.h>
23#include <mach_ipi.h>
24
25/*
26 * the following functions deal with sending IPIs between CPUs.
27 *
28 * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
29 */
30
31static inline int __prepare_ICR(unsigned int shortcut, int vector)
32{
33 unsigned int icr = shortcut | APIC_DEST_LOGICAL;
34
35 switch (vector) {
36 default:
37 icr |= APIC_DM_FIXED | vector;
38 break;
39 case NMI_VECTOR:
40 icr |= APIC_DM_NMI;
41 break;
42 }
43 return icr;
44}
45
46static inline int __prepare_ICR2(unsigned int mask)
47{
48 return SET_APIC_DEST_FIELD(mask);
49}
50
51void __send_IPI_shortcut(unsigned int shortcut, int vector)
52{
53 /*
54 * Subtle. In the case of the 'never do double writes' workaround
55 * we have to lock out interrupts to be safe. As we don't care
56 * of the value read we use an atomic rmw access to avoid costly
57 * cli/sti. Otherwise we use an even cheaper single atomic write
58 * to the APIC.
59 */
60 unsigned int cfg;
61
62 /*
63 * Wait for idle.
64 */
65 apic_wait_icr_idle();
66
67 /*
68 * No need to touch the target chip field
69 */
70 cfg = __prepare_ICR(shortcut, vector);
71
72 /*
73 * Send the IPI. The write to APIC_ICR fires this off.
74 */
75 apic_write(APIC_ICR, cfg);
76}
77
78void send_IPI_self(int vector)
79{
80 __send_IPI_shortcut(APIC_DEST_SELF, vector);
81}
82
83/*
84 * This is used to send an IPI with no shorthand notation (the destination is
85 * specified in bits 56 to 63 of the ICR).
86 */
87static inline void __send_IPI_dest_field(unsigned long mask, int vector)
88{
89 unsigned long cfg;
90
91 /*
92 * Wait for idle.
93 */
94 if (unlikely(vector == NMI_VECTOR))
95 safe_apic_wait_icr_idle();
96 else
97 apic_wait_icr_idle();
98
99 /*
100 * prepare target chip field
101 */
102 cfg = __prepare_ICR2(mask);
103 apic_write(APIC_ICR2, cfg);
104
105 /*
106 * program the ICR
107 */
108 cfg = __prepare_ICR(0, vector);
109
110 /*
111 * Send the IPI. The write to APIC_ICR fires this off.
112 */
113 apic_write(APIC_ICR, cfg);
114}
115
116/*
117 * This is only used on smaller machines.
118 */
119void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector)
120{
121 unsigned long mask = cpumask_bits(cpumask)[0];
122 unsigned long flags;
123
124 local_irq_save(flags);
125 WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
126 __send_IPI_dest_field(mask, vector);
127 local_irq_restore(flags);
128}
129
130void send_IPI_mask_sequence(const struct cpumask *mask, int vector)
131{
132 unsigned long flags;
133 unsigned int query_cpu;
134
135 /*
136 * Hack. The clustered APIC addressing mode doesn't allow us to send
137 * to an arbitrary mask, so I do a unicasts to each CPU instead. This
138 * should be modified to do 1 message per cluster ID - mbligh
139 */
140
141 local_irq_save(flags);
142 for_each_cpu(query_cpu, mask)
143 __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector);
144 local_irq_restore(flags);
145}
146
147void send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
148{
149 unsigned long flags;
150 unsigned int query_cpu;
151 unsigned int this_cpu = smp_processor_id();
152
153 /* See Hack comment above */
154
155 local_irq_save(flags);
156 for_each_cpu(query_cpu, mask)
157 if (query_cpu != this_cpu)
158 __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
159 vector);
160 local_irq_restore(flags);
161}
162
163/* must come after the send_IPI functions above for inlining */
164static int convert_apicid_to_cpu(int apic_id)
165{
166 int i;
167
168 for_each_possible_cpu(i) {
169 if (per_cpu(x86_cpu_to_apicid, i) == apic_id)
170 return i;
171 }
172 return -1;
173}
174
175int safe_smp_processor_id(void)
176{
177 int apicid, cpuid;
178
179 if (!boot_cpu_has(X86_FEATURE_APIC))
180 return 0;
181
182 apicid = hard_smp_processor_id();
183 if (apicid == BAD_APICID)
184 return 0;
185
186 cpuid = convert_apicid_to_cpu(apicid);
187
188 return cpuid >= 0 ? cpuid : 0;
189}
190#endif
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 3973e2df7f87..f13ca1650aaf 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -6,10 +6,12 @@
6#include <linux/kernel_stat.h> 6#include <linux/kernel_stat.h>
7#include <linux/seq_file.h> 7#include <linux/seq_file.h>
8#include <linux/smp.h> 8#include <linux/smp.h>
9#include <linux/ftrace.h>
9 10
10#include <asm/apic.h> 11#include <asm/apic.h>
11#include <asm/io_apic.h> 12#include <asm/io_apic.h>
12#include <asm/irq.h> 13#include <asm/irq.h>
14#include <asm/idle.h>
13 15
14atomic_t irq_err_count; 16atomic_t irq_err_count;
15 17
@@ -36,11 +38,7 @@ void ack_bad_irq(unsigned int irq)
36#endif 38#endif
37} 39}
38 40
39#ifdef CONFIG_X86_32 41#define irq_stats(x) (&per_cpu(irq_stat, x))
40# define irq_stats(x) (&per_cpu(irq_stat, x))
41#else
42# define irq_stats(x) cpu_pda(x)
43#endif
44/* 42/*
45 * /proc/interrupts printing: 43 * /proc/interrupts printing:
46 */ 44 */
@@ -192,4 +190,40 @@ u64 arch_irq_stat(void)
192 return sum; 190 return sum;
193} 191}
194 192
193
194/*
195 * do_IRQ handles all normal device IRQ's (the special
196 * SMP cross-CPU interrupts have their own specific
197 * handlers).
198 */
199unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
200{
201 struct pt_regs *old_regs = set_irq_regs(regs);
202
203 /* high bit used in ret_from_ code */
204 unsigned vector = ~regs->orig_ax;
205 unsigned irq;
206
207 exit_idle();
208 irq_enter();
209
210 irq = __get_cpu_var(vector_irq)[vector];
211
212 if (!handle_irq(irq, regs)) {
213#ifdef CONFIG_X86_64
214 if (!disable_apic)
215 ack_APIC_irq();
216#endif
217
218 if (printk_ratelimit())
219 printk(KERN_EMERG "%s: %d.%d No irq handler for vector (irq %d)\n",
220 __func__, smp_processor_id(), vector, irq);
221 }
222
223 irq_exit();
224
225 set_irq_regs(old_regs);
226 return 1;
227}
228
195EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); 229EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 74b9ff7341e9..9dc6b2b24275 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -191,33 +191,16 @@ static inline int
191execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; } 191execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; }
192#endif 192#endif
193 193
194/* 194bool handle_irq(unsigned irq, struct pt_regs *regs)
195 * do_IRQ handles all normal device IRQ's (the special
196 * SMP cross-CPU interrupts have their own specific
197 * handlers).
198 */
199unsigned int do_IRQ(struct pt_regs *regs)
200{ 195{
201 struct pt_regs *old_regs;
202 /* high bit used in ret_from_ code */
203 int overflow;
204 unsigned vector = ~regs->orig_ax;
205 struct irq_desc *desc; 196 struct irq_desc *desc;
206 unsigned irq; 197 int overflow;
207
208
209 old_regs = set_irq_regs(regs);
210 irq_enter();
211 irq = __get_cpu_var(vector_irq)[vector];
212 198
213 overflow = check_stack_overflow(); 199 overflow = check_stack_overflow();
214 200
215 desc = irq_to_desc(irq); 201 desc = irq_to_desc(irq);
216 if (unlikely(!desc)) { 202 if (unlikely(!desc))
217 printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x cpu %d\n", 203 return false;
218 __func__, irq, vector, smp_processor_id());
219 BUG();
220 }
221 204
222 if (!execute_on_irq_stack(overflow, desc, irq)) { 205 if (!execute_on_irq_stack(overflow, desc, irq)) {
223 if (unlikely(overflow)) 206 if (unlikely(overflow))
@@ -225,13 +208,10 @@ unsigned int do_IRQ(struct pt_regs *regs)
225 desc->handle_irq(irq, desc); 208 desc->handle_irq(irq, desc);
226 } 209 }
227 210
228 irq_exit(); 211 return true;
229 set_irq_regs(old_regs);
230 return 1;
231} 212}
232 213
233#ifdef CONFIG_HOTPLUG_CPU 214#ifdef CONFIG_HOTPLUG_CPU
234#include <mach_apic.h>
235 215
236/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ 216/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
237void fixup_irqs(void) 217void fixup_irqs(void)
@@ -248,7 +228,7 @@ void fixup_irqs(void)
248 if (irq == 2) 228 if (irq == 2)
249 continue; 229 continue;
250 230
251 affinity = &desc->affinity; 231 affinity = desc->affinity;
252 if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { 232 if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
253 printk("Breaking affinity for irq %i\n", irq); 233 printk("Breaking affinity for irq %i\n", irq);
254 affinity = cpu_all_mask; 234 affinity = cpu_all_mask;
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 63c88e6ec025..977d8b43a0dd 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -18,6 +18,13 @@
18#include <linux/smp.h> 18#include <linux/smp.h>
19#include <asm/io_apic.h> 19#include <asm/io_apic.h>
20#include <asm/idle.h> 20#include <asm/idle.h>
21#include <asm/apic.h>
22
23DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
24EXPORT_PER_CPU_SYMBOL(irq_stat);
25
26DEFINE_PER_CPU(struct pt_regs *, irq_regs);
27EXPORT_PER_CPU_SYMBOL(irq_regs);
21 28
22/* 29/*
23 * Probabilistic stack overflow check: 30 * Probabilistic stack overflow check:
@@ -41,42 +48,18 @@ static inline void stack_overflow_check(struct pt_regs *regs)
41#endif 48#endif
42} 49}
43 50
44/* 51bool handle_irq(unsigned irq, struct pt_regs *regs)
45 * do_IRQ handles all normal device IRQ's (the special
46 * SMP cross-CPU interrupts have their own specific
47 * handlers).
48 */
49asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
50{ 52{
51 struct pt_regs *old_regs = set_irq_regs(regs);
52 struct irq_desc *desc; 53 struct irq_desc *desc;
53 54
54 /* high bit used in ret_from_ code */
55 unsigned vector = ~regs->orig_ax;
56 unsigned irq;
57
58 exit_idle();
59 irq_enter();
60 irq = __get_cpu_var(vector_irq)[vector];
61
62 stack_overflow_check(regs); 55 stack_overflow_check(regs);
63 56
64 desc = irq_to_desc(irq); 57 desc = irq_to_desc(irq);
65 if (likely(desc)) 58 if (unlikely(!desc))
66 generic_handle_irq_desc(irq, desc); 59 return false;
67 else {
68 if (!disable_apic)
69 ack_APIC_irq();
70
71 if (printk_ratelimit())
72 printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n",
73 __func__, smp_processor_id(), vector);
74 }
75
76 irq_exit();
77 60
78 set_irq_regs(old_regs); 61 generic_handle_irq_desc(irq, desc);
79 return 1; 62 return true;
80} 63}
81 64
82#ifdef CONFIG_HOTPLUG_CPU 65#ifdef CONFIG_HOTPLUG_CPU
@@ -100,7 +83,7 @@ void fixup_irqs(void)
100 /* interrupt's are disabled at this point */ 83 /* interrupt's are disabled at this point */
101 spin_lock(&desc->lock); 84 spin_lock(&desc->lock);
102 85
103 affinity = &desc->affinity; 86 affinity = desc->affinity;
104 if (!irq_has_action(irq) || 87 if (!irq_has_action(irq) ||
105 cpumask_equal(affinity, cpu_online_mask)) { 88 cpumask_equal(affinity, cpu_online_mask)) {
106 spin_unlock(&desc->lock); 89 spin_unlock(&desc->lock);
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 10a09c2f1828..50b8c3a3006c 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -18,7 +18,7 @@
18#include <asm/pgtable.h> 18#include <asm/pgtable.h>
19#include <asm/desc.h> 19#include <asm/desc.h>
20#include <asm/apic.h> 20#include <asm/apic.h>
21#include <asm/arch_hooks.h> 21#include <asm/setup.h>
22#include <asm/i8259.h> 22#include <asm/i8259.h>
23#include <asm/traps.h> 23#include <asm/traps.h>
24 24
@@ -78,6 +78,15 @@ void __init init_ISA_irqs(void)
78 } 78 }
79} 79}
80 80
81/*
82 * IRQ2 is cascade interrupt to second interrupt controller
83 */
84static struct irqaction irq2 = {
85 .handler = no_action,
86 .mask = CPU_MASK_NONE,
87 .name = "cascade",
88};
89
81DEFINE_PER_CPU(vector_irq_t, vector_irq) = { 90DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
82 [0 ... IRQ0_VECTOR - 1] = -1, 91 [0 ... IRQ0_VECTOR - 1] = -1,
83 [IRQ0_VECTOR] = 0, 92 [IRQ0_VECTOR] = 0,
@@ -118,8 +127,8 @@ void __init native_init_IRQ(void)
118{ 127{
119 int i; 128 int i;
120 129
121 /* all the set up before the call gates are initialised */ 130 /* Execute any quirks before the call gates are initialised: */
122 pre_intr_init_hook(); 131 x86_quirk_pre_intr_init();
123 132
124 /* 133 /*
125 * Cover the whole vector space, no vector can escape 134 * Cover the whole vector space, no vector can escape
@@ -140,8 +149,15 @@ void __init native_init_IRQ(void)
140 */ 149 */
141 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); 150 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
142 151
143 /* IPI for invalidation */ 152 /* IPIs for invalidation */
144 alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); 153 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
154 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
155 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
156 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
157 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
158 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
159 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
160 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
145 161
146 /* IPI for generic function call */ 162 /* IPI for generic function call */
147 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 163 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
@@ -169,10 +185,14 @@ void __init native_init_IRQ(void)
169 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); 185 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
170#endif 186#endif
171 187
172 /* setup after call gates are initialised (usually add in 188 if (!acpi_ioapic)
173 * the architecture specific gates) 189 setup_irq(2, &irq2);
190
191 /*
192 * Call quirks after call gates are initialised (usually add in
193 * the architecture specific gates):
174 */ 194 */
175 intr_init_hook(); 195 x86_quirk_intr_init();
176 196
177 /* 197 /*
178 * External FPU? Set up irq13 if so, for 198 * External FPU? Set up irq13 if so, for
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 10435a120d22..eedfaebe1063 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -46,7 +46,7 @@
46#include <asm/apicdef.h> 46#include <asm/apicdef.h>
47#include <asm/system.h> 47#include <asm/system.h>
48 48
49#include <mach_ipi.h> 49#include <asm/apic.h>
50 50
51/* 51/*
52 * Put the error code here just in case the user cares: 52 * Put the error code here just in case the user cares:
@@ -347,7 +347,7 @@ void kgdb_post_primary_code(struct pt_regs *regs, int e_vector, int err_code)
347 */ 347 */
348void kgdb_roundup_cpus(unsigned long flags) 348void kgdb_roundup_cpus(unsigned long flags)
349{ 349{
350 send_IPI_allbutself(APIC_DM_NMI); 350 apic->send_IPI_allbutself(APIC_DM_NMI);
351} 351}
352#endif 352#endif
353 353
diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
index 652fce6d2cce..137f2e8132df 100644
--- a/arch/x86/kernel/kvmclock.c
+++ b/arch/x86/kernel/kvmclock.c
@@ -19,7 +19,6 @@
19#include <linux/clocksource.h> 19#include <linux/clocksource.h>
20#include <linux/kvm_para.h> 20#include <linux/kvm_para.h>
21#include <asm/pvclock.h> 21#include <asm/pvclock.h>
22#include <asm/arch_hooks.h>
23#include <asm/msr.h> 22#include <asm/msr.h>
24#include <asm/apic.h> 23#include <asm/apic.h>
25#include <linux/percpu.h> 24#include <linux/percpu.h>
diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c
index 37f420018a41..f5fc8c781a62 100644
--- a/arch/x86/kernel/machine_kexec_32.c
+++ b/arch/x86/kernel/machine_kexec_32.c
@@ -121,7 +121,7 @@ static void machine_kexec_page_table_set_one(
121static void machine_kexec_prepare_page_tables(struct kimage *image) 121static void machine_kexec_prepare_page_tables(struct kimage *image)
122{ 122{
123 void *control_page; 123 void *control_page;
124 pmd_t *pmd = 0; 124 pmd_t *pmd = NULL;
125 125
126 control_page = page_address(image->control_code_page); 126 control_page = page_address(image->control_code_page);
127#ifdef CONFIG_X86_PAE 127#ifdef CONFIG_X86_PAE
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index c43caa3a91f3..6993d51b7fd8 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -18,15 +18,6 @@
18#include <asm/mmu_context.h> 18#include <asm/mmu_context.h>
19#include <asm/io.h> 19#include <asm/io.h>
20 20
21#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
22static u64 kexec_pgd[512] PAGE_ALIGNED;
23static u64 kexec_pud0[512] PAGE_ALIGNED;
24static u64 kexec_pmd0[512] PAGE_ALIGNED;
25static u64 kexec_pte0[512] PAGE_ALIGNED;
26static u64 kexec_pud1[512] PAGE_ALIGNED;
27static u64 kexec_pmd1[512] PAGE_ALIGNED;
28static u64 kexec_pte1[512] PAGE_ALIGNED;
29
30static void init_level2_page(pmd_t *level2p, unsigned long addr) 21static void init_level2_page(pmd_t *level2p, unsigned long addr)
31{ 22{
32 unsigned long end_addr; 23 unsigned long end_addr;
@@ -107,12 +98,65 @@ out:
107 return result; 98 return result;
108} 99}
109 100
101static void free_transition_pgtable(struct kimage *image)
102{
103 free_page((unsigned long)image->arch.pud);
104 free_page((unsigned long)image->arch.pmd);
105 free_page((unsigned long)image->arch.pte);
106}
107
108static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
109{
110 pud_t *pud;
111 pmd_t *pmd;
112 pte_t *pte;
113 unsigned long vaddr, paddr;
114 int result = -ENOMEM;
115
116 vaddr = (unsigned long)relocate_kernel;
117 paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE);
118 pgd += pgd_index(vaddr);
119 if (!pgd_present(*pgd)) {
120 pud = (pud_t *)get_zeroed_page(GFP_KERNEL);
121 if (!pud)
122 goto err;
123 image->arch.pud = pud;
124 set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
125 }
126 pud = pud_offset(pgd, vaddr);
127 if (!pud_present(*pud)) {
128 pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL);
129 if (!pmd)
130 goto err;
131 image->arch.pmd = pmd;
132 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
133 }
134 pmd = pmd_offset(pud, vaddr);
135 if (!pmd_present(*pmd)) {
136 pte = (pte_t *)get_zeroed_page(GFP_KERNEL);
137 if (!pte)
138 goto err;
139 image->arch.pte = pte;
140 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
141 }
142 pte = pte_offset_kernel(pmd, vaddr);
143 set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
144 return 0;
145err:
146 free_transition_pgtable(image);
147 return result;
148}
149
110 150
111static int init_pgtable(struct kimage *image, unsigned long start_pgtable) 151static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
112{ 152{
113 pgd_t *level4p; 153 pgd_t *level4p;
154 int result;
114 level4p = (pgd_t *)__va(start_pgtable); 155 level4p = (pgd_t *)__va(start_pgtable);
115 return init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); 156 result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
157 if (result)
158 return result;
159 return init_transition_pgtable(image, level4p);
116} 160}
117 161
118static void set_idt(void *newidt, u16 limit) 162static void set_idt(void *newidt, u16 limit)
@@ -174,7 +218,7 @@ int machine_kexec_prepare(struct kimage *image)
174 218
175void machine_kexec_cleanup(struct kimage *image) 219void machine_kexec_cleanup(struct kimage *image)
176{ 220{
177 return; 221 free_transition_pgtable(image);
178} 222}
179 223
180/* 224/*
@@ -195,22 +239,6 @@ void machine_kexec(struct kimage *image)
195 memcpy(control_page, relocate_kernel, PAGE_SIZE); 239 memcpy(control_page, relocate_kernel, PAGE_SIZE);
196 240
197 page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); 241 page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
198 page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
199 page_list[PA_PGD] = virt_to_phys(&kexec_pgd);
200 page_list[VA_PGD] = (unsigned long)kexec_pgd;
201 page_list[PA_PUD_0] = virt_to_phys(&kexec_pud0);
202 page_list[VA_PUD_0] = (unsigned long)kexec_pud0;
203 page_list[PA_PMD_0] = virt_to_phys(&kexec_pmd0);
204 page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
205 page_list[PA_PTE_0] = virt_to_phys(&kexec_pte0);
206 page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
207 page_list[PA_PUD_1] = virt_to_phys(&kexec_pud1);
208 page_list[VA_PUD_1] = (unsigned long)kexec_pud1;
209 page_list[PA_PMD_1] = virt_to_phys(&kexec_pmd1);
210 page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
211 page_list[PA_PTE_1] = virt_to_phys(&kexec_pte1);
212 page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
213
214 page_list[PA_TABLE_PAGE] = 242 page_list[PA_TABLE_PAGE] =
215 (unsigned long)__pa(page_address(image->control_code_page)); 243 (unsigned long)__pa(page_address(image->control_code_page));
216 244
diff --git a/arch/x86/kernel/mca_32.c b/arch/x86/kernel/mca_32.c
index 2dc183758be3..845d80ce1ef1 100644
--- a/arch/x86/kernel/mca_32.c
+++ b/arch/x86/kernel/mca_32.c
@@ -51,7 +51,6 @@
51#include <linux/ioport.h> 51#include <linux/ioport.h>
52#include <asm/uaccess.h> 52#include <asm/uaccess.h>
53#include <linux/init.h> 53#include <linux/init.h>
54#include <asm/arch_hooks.h>
55 54
56static unsigned char which_scsi; 55static unsigned char which_scsi;
57 56
@@ -474,6 +473,4 @@ void __kprobes mca_handle_nmi(void)
474 * adapter was responsible for the error. 473 * adapter was responsible for the error.
475 */ 474 */
476 bus_for_each_dev(&mca_bus_type, NULL, NULL, mca_handle_nmi_callback); 475 bus_for_each_dev(&mca_bus_type, NULL, NULL, mca_handle_nmi_callback);
477 476}
478 mca_nmi_hook();
479} /* mca_handle_nmi */
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index b7f4c929e615..5e9f4fc51385 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -87,9 +87,9 @@
87#include <linux/cpu.h> 87#include <linux/cpu.h>
88#include <linux/firmware.h> 88#include <linux/firmware.h>
89#include <linux/platform_device.h> 89#include <linux/platform_device.h>
90#include <linux/uaccess.h>
90 91
91#include <asm/msr.h> 92#include <asm/msr.h>
92#include <asm/uaccess.h>
93#include <asm/processor.h> 93#include <asm/processor.h>
94#include <asm/microcode.h> 94#include <asm/microcode.h>
95 95
@@ -196,7 +196,7 @@ static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf)
196 return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1; 196 return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1;
197} 197}
198 198
199static inline int 199static inline int
200update_match_revision(struct microcode_header_intel *mc_header, int rev) 200update_match_revision(struct microcode_header_intel *mc_header, int rev)
201{ 201{
202 return (mc_header->rev <= rev) ? 0 : 1; 202 return (mc_header->rev <= rev) ? 0 : 1;
@@ -442,8 +442,8 @@ static int request_microcode_fw(int cpu, struct device *device)
442 return ret; 442 return ret;
443 } 443 }
444 444
445 ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size, 445 ret = generic_load_microcode(cpu, (void *)firmware->data,
446 &get_ucode_fw); 446 firmware->size, &get_ucode_fw);
447 447
448 release_firmware(firmware); 448 release_firmware(firmware);
449 449
@@ -460,7 +460,7 @@ static int request_microcode_user(int cpu, const void __user *buf, size_t size)
460 /* We should bind the task to the CPU */ 460 /* We should bind the task to the CPU */
461 BUG_ON(cpu != raw_smp_processor_id()); 461 BUG_ON(cpu != raw_smp_processor_id());
462 462
463 return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user); 463 return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user);
464} 464}
465 465
466static void microcode_fini_cpu(int cpu) 466static void microcode_fini_cpu(int cpu)
diff --git a/arch/x86/kernel/module_32.c b/arch/x86/kernel/module_32.c
index 3db0a5442eb1..0edd819050e7 100644
--- a/arch/x86/kernel/module_32.c
+++ b/arch/x86/kernel/module_32.c
@@ -42,7 +42,7 @@ void module_free(struct module *mod, void *module_region)
42{ 42{
43 vfree(module_region); 43 vfree(module_region);
44 /* FIXME: If module_region == mod->init_region, trim exception 44 /* FIXME: If module_region == mod->init_region, trim exception
45 table entries. */ 45 table entries. */
46} 46}
47 47
48/* We don't need anything special. */ 48/* We don't need anything special. */
@@ -113,13 +113,13 @@ int module_finalize(const Elf_Ehdr *hdr,
113 *para = NULL; 113 *para = NULL;
114 char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; 114 char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
115 115
116 for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { 116 for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
117 if (!strcmp(".text", secstrings + s->sh_name)) 117 if (!strcmp(".text", secstrings + s->sh_name))
118 text = s; 118 text = s;
119 if (!strcmp(".altinstructions", secstrings + s->sh_name)) 119 if (!strcmp(".altinstructions", secstrings + s->sh_name))
120 alt = s; 120 alt = s;
121 if (!strcmp(".smp_locks", secstrings + s->sh_name)) 121 if (!strcmp(".smp_locks", secstrings + s->sh_name))
122 locks= s; 122 locks = s;
123 if (!strcmp(".parainstructions", secstrings + s->sh_name)) 123 if (!strcmp(".parainstructions", secstrings + s->sh_name))
124 para = s; 124 para = s;
125 } 125 }
diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c
index 6ba87830d4b1..c23880b90b5c 100644
--- a/arch/x86/kernel/module_64.c
+++ b/arch/x86/kernel/module_64.c
@@ -30,14 +30,14 @@
30#include <asm/page.h> 30#include <asm/page.h>
31#include <asm/pgtable.h> 31#include <asm/pgtable.h>
32 32
33#define DEBUGP(fmt...) 33#define DEBUGP(fmt...)
34 34
35#ifndef CONFIG_UML 35#ifndef CONFIG_UML
36void module_free(struct module *mod, void *module_region) 36void module_free(struct module *mod, void *module_region)
37{ 37{
38 vfree(module_region); 38 vfree(module_region);
39 /* FIXME: If module_region == mod->init_region, trim exception 39 /* FIXME: If module_region == mod->init_region, trim exception
40 table entries. */ 40 table entries. */
41} 41}
42 42
43void *module_alloc(unsigned long size) 43void *module_alloc(unsigned long size)
@@ -77,7 +77,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
77 Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr; 77 Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr;
78 Elf64_Sym *sym; 78 Elf64_Sym *sym;
79 void *loc; 79 void *loc;
80 u64 val; 80 u64 val;
81 81
82 DEBUGP("Applying relocate section %u to %u\n", relsec, 82 DEBUGP("Applying relocate section %u to %u\n", relsec,
83 sechdrs[relsec].sh_info); 83 sechdrs[relsec].sh_info);
@@ -91,11 +91,11 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
91 sym = (Elf64_Sym *)sechdrs[symindex].sh_addr 91 sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
92 + ELF64_R_SYM(rel[i].r_info); 92 + ELF64_R_SYM(rel[i].r_info);
93 93
94 DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", 94 DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n",
95 (int)ELF64_R_TYPE(rel[i].r_info), 95 (int)ELF64_R_TYPE(rel[i].r_info),
96 sym->st_value, rel[i].r_addend, (u64)loc); 96 sym->st_value, rel[i].r_addend, (u64)loc);
97 97
98 val = sym->st_value + rel[i].r_addend; 98 val = sym->st_value + rel[i].r_addend;
99 99
100 switch (ELF64_R_TYPE(rel[i].r_info)) { 100 switch (ELF64_R_TYPE(rel[i].r_info)) {
101 case R_X86_64_NONE: 101 case R_X86_64_NONE:
@@ -113,16 +113,16 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
113 if ((s64)val != *(s32 *)loc) 113 if ((s64)val != *(s32 *)loc)
114 goto overflow; 114 goto overflow;
115 break; 115 break;
116 case R_X86_64_PC32: 116 case R_X86_64_PC32:
117 val -= (u64)loc; 117 val -= (u64)loc;
118 *(u32 *)loc = val; 118 *(u32 *)loc = val;
119#if 0 119#if 0
120 if ((s64)val != *(s32 *)loc) 120 if ((s64)val != *(s32 *)loc)
121 goto overflow; 121 goto overflow;
122#endif 122#endif
123 break; 123 break;
124 default: 124 default:
125 printk(KERN_ERR "module %s: Unknown rela relocation: %Lu\n", 125 printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n",
126 me->name, ELF64_R_TYPE(rel[i].r_info)); 126 me->name, ELF64_R_TYPE(rel[i].r_info));
127 return -ENOEXEC; 127 return -ENOEXEC;
128 } 128 }
@@ -130,7 +130,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
130 return 0; 130 return 0;
131 131
132overflow: 132overflow:
133 printk(KERN_ERR "overflow in relocation type %d val %Lx\n", 133 printk(KERN_ERR "overflow in relocation type %d val %Lx\n",
134 (int)ELF64_R_TYPE(rel[i].r_info), val); 134 (int)ELF64_R_TYPE(rel[i].r_info), val);
135 printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n", 135 printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n",
136 me->name); 136 me->name);
@@ -143,13 +143,13 @@ int apply_relocate(Elf_Shdr *sechdrs,
143 unsigned int relsec, 143 unsigned int relsec,
144 struct module *me) 144 struct module *me)
145{ 145{
146 printk("non add relocation not supported\n"); 146 printk(KERN_ERR "non add relocation not supported\n");
147 return -ENOSYS; 147 return -ENOSYS;
148} 148}
149 149
150int module_finalize(const Elf_Ehdr *hdr, 150int module_finalize(const Elf_Ehdr *hdr,
151 const Elf_Shdr *sechdrs, 151 const Elf_Shdr *sechdrs,
152 struct module *me) 152 struct module *me)
153{ 153{
154 const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, 154 const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
155 *para = NULL; 155 *para = NULL;
@@ -161,7 +161,7 @@ int module_finalize(const Elf_Ehdr *hdr,
161 if (!strcmp(".altinstructions", secstrings + s->sh_name)) 161 if (!strcmp(".altinstructions", secstrings + s->sh_name))
162 alt = s; 162 alt = s;
163 if (!strcmp(".smp_locks", secstrings + s->sh_name)) 163 if (!strcmp(".smp_locks", secstrings + s->sh_name))
164 locks= s; 164 locks = s;
165 if (!strcmp(".parainstructions", secstrings + s->sh_name)) 165 if (!strcmp(".parainstructions", secstrings + s->sh_name))
166 para = s; 166 para = s;
167 } 167 }
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index a649a4ccad43..37cb1bda1baf 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -3,7 +3,7 @@
3 * compliant MP-table parsing routines. 3 * compliant MP-table parsing routines.
4 * 4 *
5 * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> 5 * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
6 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com> 6 * (c) 1998, 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
7 * (c) 2008 Alexey Starikovskiy <astarikovskiy@suse.de> 7 * (c) 2008 Alexey Starikovskiy <astarikovskiy@suse.de>
8 */ 8 */
9 9
@@ -29,12 +29,7 @@
29#include <asm/setup.h> 29#include <asm/setup.h>
30#include <asm/smp.h> 30#include <asm/smp.h>
31 31
32#include <mach_apic.h> 32#include <asm/apic.h>
33#ifdef CONFIG_X86_32
34#include <mach_apicdef.h>
35#include <mach_mpparse.h>
36#endif
37
38/* 33/*
39 * Checksum an MP configuration block. 34 * Checksum an MP configuration block.
40 */ 35 */
@@ -144,11 +139,11 @@ static void __init MP_ioapic_info(struct mpc_ioapic *m)
144 if (bad_ioapic(m->apicaddr)) 139 if (bad_ioapic(m->apicaddr))
145 return; 140 return;
146 141
147 mp_ioapics[nr_ioapics].mp_apicaddr = m->apicaddr; 142 mp_ioapics[nr_ioapics].apicaddr = m->apicaddr;
148 mp_ioapics[nr_ioapics].mp_apicid = m->apicid; 143 mp_ioapics[nr_ioapics].apicid = m->apicid;
149 mp_ioapics[nr_ioapics].mp_type = m->type; 144 mp_ioapics[nr_ioapics].type = m->type;
150 mp_ioapics[nr_ioapics].mp_apicver = m->apicver; 145 mp_ioapics[nr_ioapics].apicver = m->apicver;
151 mp_ioapics[nr_ioapics].mp_flags = m->flags; 146 mp_ioapics[nr_ioapics].flags = m->flags;
152 nr_ioapics++; 147 nr_ioapics++;
153} 148}
154 149
@@ -160,55 +155,55 @@ static void print_MP_intsrc_info(struct mpc_intsrc *m)
160 m->srcbusirq, m->dstapic, m->dstirq); 155 m->srcbusirq, m->dstapic, m->dstirq);
161} 156}
162 157
163static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) 158static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq)
164{ 159{
165 apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," 160 apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
166 " IRQ %02x, APIC ID %x, APIC INT %02x\n", 161 " IRQ %02x, APIC ID %x, APIC INT %02x\n",
167 mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, 162 mp_irq->irqtype, mp_irq->irqflag & 3,
168 (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, 163 (mp_irq->irqflag >> 2) & 3, mp_irq->srcbus,
169 mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq); 164 mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq);
170} 165}
171 166
172static void __init assign_to_mp_irq(struct mpc_intsrc *m, 167static void __init assign_to_mp_irq(struct mpc_intsrc *m,
173 struct mp_config_intsrc *mp_irq) 168 struct mpc_intsrc *mp_irq)
174{ 169{
175 mp_irq->mp_dstapic = m->dstapic; 170 mp_irq->dstapic = m->dstapic;
176 mp_irq->mp_type = m->type; 171 mp_irq->type = m->type;
177 mp_irq->mp_irqtype = m->irqtype; 172 mp_irq->irqtype = m->irqtype;
178 mp_irq->mp_irqflag = m->irqflag; 173 mp_irq->irqflag = m->irqflag;
179 mp_irq->mp_srcbus = m->srcbus; 174 mp_irq->srcbus = m->srcbus;
180 mp_irq->mp_srcbusirq = m->srcbusirq; 175 mp_irq->srcbusirq = m->srcbusirq;
181 mp_irq->mp_dstirq = m->dstirq; 176 mp_irq->dstirq = m->dstirq;
182} 177}
183 178
184static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq, 179static void __init assign_to_mpc_intsrc(struct mpc_intsrc *mp_irq,
185 struct mpc_intsrc *m) 180 struct mpc_intsrc *m)
186{ 181{
187 m->dstapic = mp_irq->mp_dstapic; 182 m->dstapic = mp_irq->dstapic;
188 m->type = mp_irq->mp_type; 183 m->type = mp_irq->type;
189 m->irqtype = mp_irq->mp_irqtype; 184 m->irqtype = mp_irq->irqtype;
190 m->irqflag = mp_irq->mp_irqflag; 185 m->irqflag = mp_irq->irqflag;
191 m->srcbus = mp_irq->mp_srcbus; 186 m->srcbus = mp_irq->srcbus;
192 m->srcbusirq = mp_irq->mp_srcbusirq; 187 m->srcbusirq = mp_irq->srcbusirq;
193 m->dstirq = mp_irq->mp_dstirq; 188 m->dstirq = mp_irq->dstirq;
194} 189}
195 190
196static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq, 191static int __init mp_irq_mpc_intsrc_cmp(struct mpc_intsrc *mp_irq,
197 struct mpc_intsrc *m) 192 struct mpc_intsrc *m)
198{ 193{
199 if (mp_irq->mp_dstapic != m->dstapic) 194 if (mp_irq->dstapic != m->dstapic)
200 return 1; 195 return 1;
201 if (mp_irq->mp_type != m->type) 196 if (mp_irq->type != m->type)
202 return 2; 197 return 2;
203 if (mp_irq->mp_irqtype != m->irqtype) 198 if (mp_irq->irqtype != m->irqtype)
204 return 3; 199 return 3;
205 if (mp_irq->mp_irqflag != m->irqflag) 200 if (mp_irq->irqflag != m->irqflag)
206 return 4; 201 return 4;
207 if (mp_irq->mp_srcbus != m->srcbus) 202 if (mp_irq->srcbus != m->srcbus)
208 return 5; 203 return 5;
209 if (mp_irq->mp_srcbusirq != m->srcbusirq) 204 if (mp_irq->srcbusirq != m->srcbusirq)
210 return 6; 205 return 6;
211 if (mp_irq->mp_dstirq != m->dstirq) 206 if (mp_irq->dstirq != m->dstirq)
212 return 7; 207 return 7;
213 208
214 return 0; 209 return 0;
@@ -292,16 +287,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
292 return 0; 287 return 0;
293 288
294#ifdef CONFIG_X86_32 289#ifdef CONFIG_X86_32
295 /* 290 generic_mps_oem_check(mpc, oem, str);
296 * need to make sure summit and es7000's mps_oem_check is safe to be
297 * called early via genericarch 's mps_oem_check
298 */
299 if (early) {
300#ifdef CONFIG_X86_NUMAQ
301 numaq_mps_oem_check(mpc, oem, str);
302#endif
303 } else
304 mps_oem_check(mpc, oem, str);
305#endif 291#endif
306 /* save the local APIC address, it might be non-default */ 292 /* save the local APIC address, it might be non-default */
307 if (!acpi_lapic) 293 if (!acpi_lapic)
@@ -386,13 +372,13 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
386 (*x86_quirks->mpc_record)++; 372 (*x86_quirks->mpc_record)++;
387 } 373 }
388 374
389#ifdef CONFIG_X86_GENERICARCH 375#ifdef CONFIG_X86_BIGSMP
390 generic_bigsmp_probe(); 376 generic_bigsmp_probe();
391#endif 377#endif
392 378
393#ifdef CONFIG_X86_32 379 if (apic->setup_apic_routing)
394 setup_apic_routing(); 380 apic->setup_apic_routing();
395#endif 381
396 if (!num_processors) 382 if (!num_processors)
397 printk(KERN_ERR "MPTABLE: no processors registered!\n"); 383 printk(KERN_ERR "MPTABLE: no processors registered!\n");
398 return num_processors; 384 return num_processors;
@@ -417,7 +403,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
417 intsrc.type = MP_INTSRC; 403 intsrc.type = MP_INTSRC;
418 intsrc.irqflag = 0; /* conforming */ 404 intsrc.irqflag = 0; /* conforming */
419 intsrc.srcbus = 0; 405 intsrc.srcbus = 0;
420 intsrc.dstapic = mp_ioapics[0].mp_apicid; 406 intsrc.dstapic = mp_ioapics[0].apicid;
421 407
422 intsrc.irqtype = mp_INT; 408 intsrc.irqtype = mp_INT;
423 409
@@ -570,14 +556,14 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
570 } 556 }
571} 557}
572 558
573static struct intel_mp_floating *mpf_found; 559static struct mpf_intel *mpf_found;
574 560
575/* 561/*
576 * Scan the memory blocks for an SMP configuration block. 562 * Scan the memory blocks for an SMP configuration block.
577 */ 563 */
578static void __init __get_smp_config(unsigned int early) 564static void __init __get_smp_config(unsigned int early)
579{ 565{
580 struct intel_mp_floating *mpf = mpf_found; 566 struct mpf_intel *mpf = mpf_found;
581 567
582 if (!mpf) 568 if (!mpf)
583 return; 569 return;
@@ -598,9 +584,9 @@ static void __init __get_smp_config(unsigned int early)
598 } 584 }
599 585
600 printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", 586 printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
601 mpf->mpf_specification); 587 mpf->specification);
602#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) 588#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
603 if (mpf->mpf_feature2 & (1 << 7)) { 589 if (mpf->feature2 & (1 << 7)) {
604 printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); 590 printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
605 pic_mode = 1; 591 pic_mode = 1;
606 } else { 592 } else {
@@ -611,7 +597,7 @@ static void __init __get_smp_config(unsigned int early)
611 /* 597 /*
612 * Now see if we need to read further. 598 * Now see if we need to read further.
613 */ 599 */
614 if (mpf->mpf_feature1 != 0) { 600 if (mpf->feature1 != 0) {
615 if (early) { 601 if (early) {
616 /* 602 /*
617 * local APIC has default address 603 * local APIC has default address
@@ -621,16 +607,16 @@ static void __init __get_smp_config(unsigned int early)
621 } 607 }
622 608
623 printk(KERN_INFO "Default MP configuration #%d\n", 609 printk(KERN_INFO "Default MP configuration #%d\n",
624 mpf->mpf_feature1); 610 mpf->feature1);
625 construct_default_ISA_mptable(mpf->mpf_feature1); 611 construct_default_ISA_mptable(mpf->feature1);
626 612
627 } else if (mpf->mpf_physptr) { 613 } else if (mpf->physptr) {
628 614
629 /* 615 /*
630 * Read the physical hardware table. Anything here will 616 * Read the physical hardware table. Anything here will
631 * override the defaults. 617 * override the defaults.
632 */ 618 */
633 if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) { 619 if (!smp_read_mpc(phys_to_virt(mpf->physptr), early)) {
634#ifdef CONFIG_X86_LOCAL_APIC 620#ifdef CONFIG_X86_LOCAL_APIC
635 smp_found_config = 0; 621 smp_found_config = 0;
636#endif 622#endif
@@ -688,32 +674,32 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
688 unsigned reserve) 674 unsigned reserve)
689{ 675{
690 unsigned int *bp = phys_to_virt(base); 676 unsigned int *bp = phys_to_virt(base);
691 struct intel_mp_floating *mpf; 677 struct mpf_intel *mpf;
692 678
693 apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", 679 apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
694 bp, length); 680 bp, length);
695 BUILD_BUG_ON(sizeof(*mpf) != 16); 681 BUILD_BUG_ON(sizeof(*mpf) != 16);
696 682
697 while (length > 0) { 683 while (length > 0) {
698 mpf = (struct intel_mp_floating *)bp; 684 mpf = (struct mpf_intel *)bp;
699 if ((*bp == SMP_MAGIC_IDENT) && 685 if ((*bp == SMP_MAGIC_IDENT) &&
700 (mpf->mpf_length == 1) && 686 (mpf->length == 1) &&
701 !mpf_checksum((unsigned char *)bp, 16) && 687 !mpf_checksum((unsigned char *)bp, 16) &&
702 ((mpf->mpf_specification == 1) 688 ((mpf->specification == 1)
703 || (mpf->mpf_specification == 4))) { 689 || (mpf->specification == 4))) {
704#ifdef CONFIG_X86_LOCAL_APIC 690#ifdef CONFIG_X86_LOCAL_APIC
705 smp_found_config = 1; 691 smp_found_config = 1;
706#endif 692#endif
707 mpf_found = mpf; 693 mpf_found = mpf;
708 694
709 printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", 695 printk(KERN_INFO "found SMP MP-table at [%p] %llx\n",
710 mpf, virt_to_phys(mpf)); 696 mpf, (u64)virt_to_phys(mpf));
711 697
712 if (!reserve) 698 if (!reserve)
713 return 1; 699 return 1;
714 reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE, 700 reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE,
715 BOOTMEM_DEFAULT); 701 BOOTMEM_DEFAULT);
716 if (mpf->mpf_physptr) { 702 if (mpf->physptr) {
717 unsigned long size = PAGE_SIZE; 703 unsigned long size = PAGE_SIZE;
718#ifdef CONFIG_X86_32 704#ifdef CONFIG_X86_32
719 /* 705 /*
@@ -722,15 +708,24 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
722 * the bottom is mapped now. 708 * the bottom is mapped now.
723 * PC-9800's MPC table places on the very last 709 * PC-9800's MPC table places on the very last
724 * of physical memory; so that simply reserving 710 * of physical memory; so that simply reserving
725 * PAGE_SIZE from mpg->mpf_physptr yields BUG() 711 * PAGE_SIZE from mpf->physptr yields BUG()
726 * in reserve_bootmem. 712 * in reserve_bootmem.
713 * also need to make sure physptr is below than
714 * max_low_pfn
715 * we don't need reserve the area above max_low_pfn
727 */ 716 */
728 unsigned long end = max_low_pfn * PAGE_SIZE; 717 unsigned long end = max_low_pfn * PAGE_SIZE;
729 if (mpf->mpf_physptr + size > end) 718
730 size = end - mpf->mpf_physptr; 719 if (mpf->physptr < end) {
731#endif 720 if (mpf->physptr + size > end)
732 reserve_bootmem_generic(mpf->mpf_physptr, size, 721 size = end - mpf->physptr;
722 reserve_bootmem_generic(mpf->physptr, size,
723 BOOTMEM_DEFAULT);
724 }
725#else
726 reserve_bootmem_generic(mpf->physptr, size,
733 BOOTMEM_DEFAULT); 727 BOOTMEM_DEFAULT);
728#endif
734 } 729 }
735 730
736 return 1; 731 return 1;
@@ -809,15 +804,15 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m)
809 /* not legacy */ 804 /* not legacy */
810 805
811 for (i = 0; i < mp_irq_entries; i++) { 806 for (i = 0; i < mp_irq_entries; i++) {
812 if (mp_irqs[i].mp_irqtype != mp_INT) 807 if (mp_irqs[i].irqtype != mp_INT)
813 continue; 808 continue;
814 809
815 if (mp_irqs[i].mp_irqflag != 0x0f) 810 if (mp_irqs[i].irqflag != 0x0f)
816 continue; 811 continue;
817 812
818 if (mp_irqs[i].mp_srcbus != m->srcbus) 813 if (mp_irqs[i].srcbus != m->srcbus)
819 continue; 814 continue;
820 if (mp_irqs[i].mp_srcbusirq != m->srcbusirq) 815 if (mp_irqs[i].srcbusirq != m->srcbusirq)
821 continue; 816 continue;
822 if (irq_used[i]) { 817 if (irq_used[i]) {
823 /* already claimed */ 818 /* already claimed */
@@ -922,10 +917,10 @@ static int __init replace_intsrc_all(struct mpc_table *mpc,
922 if (irq_used[i]) 917 if (irq_used[i])
923 continue; 918 continue;
924 919
925 if (mp_irqs[i].mp_irqtype != mp_INT) 920 if (mp_irqs[i].irqtype != mp_INT)
926 continue; 921 continue;
927 922
928 if (mp_irqs[i].mp_irqflag != 0x0f) 923 if (mp_irqs[i].irqflag != 0x0f)
929 continue; 924 continue;
930 925
931 if (nr_m_spare > 0) { 926 if (nr_m_spare > 0) {
@@ -1001,7 +996,7 @@ static int __init update_mp_table(void)
1001{ 996{
1002 char str[16]; 997 char str[16];
1003 char oem[10]; 998 char oem[10];
1004 struct intel_mp_floating *mpf; 999 struct mpf_intel *mpf;
1005 struct mpc_table *mpc, *mpc_new; 1000 struct mpc_table *mpc, *mpc_new;
1006 1001
1007 if (!enable_update_mptable) 1002 if (!enable_update_mptable)
@@ -1014,19 +1009,19 @@ static int __init update_mp_table(void)
1014 /* 1009 /*
1015 * Now see if we need to go further. 1010 * Now see if we need to go further.
1016 */ 1011 */
1017 if (mpf->mpf_feature1 != 0) 1012 if (mpf->feature1 != 0)
1018 return 0; 1013 return 0;
1019 1014
1020 if (!mpf->mpf_physptr) 1015 if (!mpf->physptr)
1021 return 0; 1016 return 0;
1022 1017
1023 mpc = phys_to_virt(mpf->mpf_physptr); 1018 mpc = phys_to_virt(mpf->physptr);
1024 1019
1025 if (!smp_check_mpc(mpc, oem, str)) 1020 if (!smp_check_mpc(mpc, oem, str))
1026 return 0; 1021 return 0;
1027 1022
1028 printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf)); 1023 printk(KERN_INFO "mpf: %llx\n", (u64)virt_to_phys(mpf));
1029 printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr); 1024 printk(KERN_INFO "physptr: %x\n", mpf->physptr);
1030 1025
1031 if (mpc_new_phys && mpc->length > mpc_new_length) { 1026 if (mpc_new_phys && mpc->length > mpc_new_length) {
1032 mpc_new_phys = 0; 1027 mpc_new_phys = 0;
@@ -1047,23 +1042,23 @@ static int __init update_mp_table(void)
1047 } 1042 }
1048 printk(KERN_INFO "use in-positon replacing\n"); 1043 printk(KERN_INFO "use in-positon replacing\n");
1049 } else { 1044 } else {
1050 mpf->mpf_physptr = mpc_new_phys; 1045 mpf->physptr = mpc_new_phys;
1051 mpc_new = phys_to_virt(mpc_new_phys); 1046 mpc_new = phys_to_virt(mpc_new_phys);
1052 memcpy(mpc_new, mpc, mpc->length); 1047 memcpy(mpc_new, mpc, mpc->length);
1053 mpc = mpc_new; 1048 mpc = mpc_new;
1054 /* check if we can modify that */ 1049 /* check if we can modify that */
1055 if (mpc_new_phys - mpf->mpf_physptr) { 1050 if (mpc_new_phys - mpf->physptr) {
1056 struct intel_mp_floating *mpf_new; 1051 struct mpf_intel *mpf_new;
1057 /* steal 16 bytes from [0, 1k) */ 1052 /* steal 16 bytes from [0, 1k) */
1058 printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); 1053 printk(KERN_INFO "mpf new: %x\n", 0x400 - 16);
1059 mpf_new = phys_to_virt(0x400 - 16); 1054 mpf_new = phys_to_virt(0x400 - 16);
1060 memcpy(mpf_new, mpf, 16); 1055 memcpy(mpf_new, mpf, 16);
1061 mpf = mpf_new; 1056 mpf = mpf_new;
1062 mpf->mpf_physptr = mpc_new_phys; 1057 mpf->physptr = mpc_new_phys;
1063 } 1058 }
1064 mpf->mpf_checksum = 0; 1059 mpf->checksum = 0;
1065 mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16); 1060 mpf->checksum -= mpf_checksum((unsigned char *)mpf, 16);
1066 printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr); 1061 printk(KERN_INFO "physptr new: %x\n", mpf->physptr);
1067 } 1062 }
1068 1063
1069 /* 1064 /*
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 726266695b2c..3cf3413ec626 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -35,10 +35,10 @@
35#include <linux/device.h> 35#include <linux/device.h>
36#include <linux/cpu.h> 36#include <linux/cpu.h>
37#include <linux/notifier.h> 37#include <linux/notifier.h>
38#include <linux/uaccess.h>
38 39
39#include <asm/processor.h> 40#include <asm/processor.h>
40#include <asm/msr.h> 41#include <asm/msr.h>
41#include <asm/uaccess.h>
42#include <asm/system.h> 42#include <asm/system.h>
43 43
44static struct class *msr_class; 44static struct class *msr_class;
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
deleted file mode 100644
index f2191d4f2717..000000000000
--- a/arch/x86/kernel/numaq_32.c
+++ /dev/null
@@ -1,293 +0,0 @@
1/*
2 * Written by: Patricia Gaughen, IBM Corporation
3 *
4 * Copyright (C) 2002, IBM Corp.
5 *
6 * All rights reserved.
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
16 * NON INFRINGEMENT. See the GNU General Public License for more
17 * details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22 *
23 * Send feedback to <gone@us.ibm.com>
24 */
25
26#include <linux/mm.h>
27#include <linux/bootmem.h>
28#include <linux/mmzone.h>
29#include <linux/module.h>
30#include <linux/nodemask.h>
31#include <asm/numaq.h>
32#include <asm/topology.h>
33#include <asm/processor.h>
34#include <asm/genapic.h>
35#include <asm/e820.h>
36#include <asm/setup.h>
37
38#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
39
40/*
41 * Function: smp_dump_qct()
42 *
43 * Description: gets memory layout from the quad config table. This
44 * function also updates node_online_map with the nodes (quads) present.
45 */
46static void __init smp_dump_qct(void)
47{
48 int node;
49 struct eachquadmem *eq;
50 struct sys_cfg_data *scd =
51 (struct sys_cfg_data *)__va(SYS_CFG_DATA_PRIV_ADDR);
52
53 nodes_clear(node_online_map);
54 for_each_node(node) {
55 if (scd->quads_present31_0 & (1 << node)) {
56 node_set_online(node);
57 eq = &scd->eq[node];
58 /* Convert to pages */
59 node_start_pfn[node] = MB_TO_PAGES(
60 eq->hi_shrd_mem_start - eq->priv_mem_size);
61 node_end_pfn[node] = MB_TO_PAGES(
62 eq->hi_shrd_mem_start + eq->hi_shrd_mem_size);
63
64 e820_register_active_regions(node, node_start_pfn[node],
65 node_end_pfn[node]);
66 memory_present(node,
67 node_start_pfn[node], node_end_pfn[node]);
68 node_remap_size[node] = node_memmap_size_bytes(node,
69 node_start_pfn[node],
70 node_end_pfn[node]);
71 }
72 }
73}
74
75
76void __cpuinit numaq_tsc_disable(void)
77{
78 if (!found_numaq)
79 return;
80
81 if (num_online_nodes() > 1) {
82 printk(KERN_DEBUG "NUMAQ: disabling TSC\n");
83 setup_clear_cpu_cap(X86_FEATURE_TSC);
84 }
85}
86
87static int __init numaq_pre_time_init(void)
88{
89 numaq_tsc_disable();
90 return 0;
91}
92
93int found_numaq;
94/*
95 * Have to match translation table entries to main table entries by counter
96 * hence the mpc_record variable .... can't see a less disgusting way of
97 * doing this ....
98 */
99struct mpc_config_translation {
100 unsigned char mpc_type;
101 unsigned char trans_len;
102 unsigned char trans_type;
103 unsigned char trans_quad;
104 unsigned char trans_global;
105 unsigned char trans_local;
106 unsigned short trans_reserved;
107};
108
109/* x86_quirks member */
110static int mpc_record;
111static struct mpc_config_translation *translation_table[MAX_MPC_ENTRY]
112 __cpuinitdata;
113
114static inline int generate_logical_apicid(int quad, int phys_apicid)
115{
116 return (quad << 4) + (phys_apicid ? phys_apicid << 1 : 1);
117}
118
119/* x86_quirks member */
120static int mpc_apic_id(struct mpc_cpu *m)
121{
122 int quad = translation_table[mpc_record]->trans_quad;
123 int logical_apicid = generate_logical_apicid(quad, m->apicid);
124
125 printk(KERN_DEBUG "Processor #%d %u:%u APIC version %d (quad %d, apic %d)\n",
126 m->apicid, (m->cpufeature & CPU_FAMILY_MASK) >> 8,
127 (m->cpufeature & CPU_MODEL_MASK) >> 4,
128 m->apicver, quad, logical_apicid);
129 return logical_apicid;
130}
131
132int mp_bus_id_to_node[MAX_MP_BUSSES];
133
134int mp_bus_id_to_local[MAX_MP_BUSSES];
135
136/* x86_quirks member */
137static void mpc_oem_bus_info(struct mpc_bus *m, char *name)
138{
139 int quad = translation_table[mpc_record]->trans_quad;
140 int local = translation_table[mpc_record]->trans_local;
141
142 mp_bus_id_to_node[m->busid] = quad;
143 mp_bus_id_to_local[m->busid] = local;
144 printk(KERN_INFO "Bus #%d is %s (node %d)\n",
145 m->busid, name, quad);
146}
147
148int quad_local_to_mp_bus_id [NR_CPUS/4][4];
149
150/* x86_quirks member */
151static void mpc_oem_pci_bus(struct mpc_bus *m)
152{
153 int quad = translation_table[mpc_record]->trans_quad;
154 int local = translation_table[mpc_record]->trans_local;
155
156 quad_local_to_mp_bus_id[quad][local] = m->busid;
157}
158
159static void __init MP_translation_info(struct mpc_config_translation *m)
160{
161 printk(KERN_INFO
162 "Translation: record %d, type %d, quad %d, global %d, local %d\n",
163 mpc_record, m->trans_type, m->trans_quad, m->trans_global,
164 m->trans_local);
165
166 if (mpc_record >= MAX_MPC_ENTRY)
167 printk(KERN_ERR "MAX_MPC_ENTRY exceeded!\n");
168 else
169 translation_table[mpc_record] = m; /* stash this for later */
170 if (m->trans_quad < MAX_NUMNODES && !node_online(m->trans_quad))
171 node_set_online(m->trans_quad);
172}
173
174static int __init mpf_checksum(unsigned char *mp, int len)
175{
176 int sum = 0;
177
178 while (len--)
179 sum += *mp++;
180
181 return sum & 0xFF;
182}
183
184/*
185 * Read/parse the MPC oem tables
186 */
187
188static void __init smp_read_mpc_oem(struct mpc_oemtable *oemtable,
189 unsigned short oemsize)
190{
191 int count = sizeof(*oemtable); /* the header size */
192 unsigned char *oemptr = ((unsigned char *)oemtable) + count;
193
194 mpc_record = 0;
195 printk(KERN_INFO "Found an OEM MPC table at %8p - parsing it ... \n",
196 oemtable);
197 if (memcmp(oemtable->signature, MPC_OEM_SIGNATURE, 4)) {
198 printk(KERN_WARNING
199 "SMP mpc oemtable: bad signature [%c%c%c%c]!\n",
200 oemtable->signature[0], oemtable->signature[1],
201 oemtable->signature[2], oemtable->signature[3]);
202 return;
203 }
204 if (mpf_checksum((unsigned char *)oemtable, oemtable->length)) {
205 printk(KERN_WARNING "SMP oem mptable: checksum error!\n");
206 return;
207 }
208 while (count < oemtable->length) {
209 switch (*oemptr) {
210 case MP_TRANSLATION:
211 {
212 struct mpc_config_translation *m =
213 (struct mpc_config_translation *)oemptr;
214 MP_translation_info(m);
215 oemptr += sizeof(*m);
216 count += sizeof(*m);
217 ++mpc_record;
218 break;
219 }
220 default:
221 {
222 printk(KERN_WARNING
223 "Unrecognised OEM table entry type! - %d\n",
224 (int)*oemptr);
225 return;
226 }
227 }
228 }
229}
230
231static int __init numaq_setup_ioapic_ids(void)
232{
233 /* so can skip it */
234 return 1;
235}
236
237static int __init numaq_update_genapic(void)
238{
239 genapic->wakeup_cpu = wakeup_secondary_cpu_via_nmi;
240
241 return 0;
242}
243
244static struct x86_quirks numaq_x86_quirks __initdata = {
245 .arch_pre_time_init = numaq_pre_time_init,
246 .arch_time_init = NULL,
247 .arch_pre_intr_init = NULL,
248 .arch_memory_setup = NULL,
249 .arch_intr_init = NULL,
250 .arch_trap_init = NULL,
251 .mach_get_smp_config = NULL,
252 .mach_find_smp_config = NULL,
253 .mpc_record = &mpc_record,
254 .mpc_apic_id = mpc_apic_id,
255 .mpc_oem_bus_info = mpc_oem_bus_info,
256 .mpc_oem_pci_bus = mpc_oem_pci_bus,
257 .smp_read_mpc_oem = smp_read_mpc_oem,
258 .setup_ioapic_ids = numaq_setup_ioapic_ids,
259 .update_genapic = numaq_update_genapic,
260};
261
262void numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
263{
264 if (strncmp(oem, "IBM NUMA", 8))
265 printk("Warning! Not a NUMA-Q system!\n");
266 else
267 found_numaq = 1;
268}
269
270static __init void early_check_numaq(void)
271{
272 /*
273 * Find possible boot-time SMP configuration:
274 */
275 early_find_smp_config();
276 /*
277 * get boot-time SMP configuration:
278 */
279 if (smp_found_config)
280 early_get_smp_config();
281
282 if (found_numaq)
283 x86_quirks = &numaq_x86_quirks;
284}
285
286int __init get_memcfg_numaq(void)
287{
288 early_check_numaq();
289 if (!found_numaq)
290 return 0;
291 smp_dump_qct();
292 return 1;
293}
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 95777b0faa73..3a7c5a44082e 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -26,13 +26,3 @@ struct pv_lock_ops pv_lock_ops = {
26}; 26};
27EXPORT_SYMBOL(pv_lock_ops); 27EXPORT_SYMBOL(pv_lock_ops);
28 28
29void __init paravirt_use_bytelocks(void)
30{
31#ifdef CONFIG_SMP
32 pv_lock_ops.spin_is_locked = __byte_spin_is_locked;
33 pv_lock_ops.spin_is_contended = __byte_spin_is_contended;
34 pv_lock_ops.spin_lock = __byte_spin_lock;
35 pv_lock_ops.spin_trylock = __byte_spin_trylock;
36 pv_lock_ops.spin_unlock = __byte_spin_unlock;
37#endif
38}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index c6520a4e85d4..63dd358d8ee1 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -28,7 +28,6 @@
28#include <asm/paravirt.h> 28#include <asm/paravirt.h>
29#include <asm/desc.h> 29#include <asm/desc.h>
30#include <asm/setup.h> 30#include <asm/setup.h>
31#include <asm/arch_hooks.h>
32#include <asm/pgtable.h> 31#include <asm/pgtable.h>
33#include <asm/time.h> 32#include <asm/time.h>
34#include <asm/pgalloc.h> 33#include <asm/pgalloc.h>
@@ -44,6 +43,17 @@ void _paravirt_nop(void)
44{ 43{
45} 44}
46 45
46/* identity function, which can be inlined */
47u32 _paravirt_ident_32(u32 x)
48{
49 return x;
50}
51
52u64 _paravirt_ident_64(u64 x)
53{
54 return x;
55}
56
47static void __init default_banner(void) 57static void __init default_banner(void)
48{ 58{
49 printk(KERN_INFO "Booting paravirtualized kernel on %s\n", 59 printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
@@ -138,9 +148,16 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
138 if (opfunc == NULL) 148 if (opfunc == NULL)
139 /* If there's no function, patch it with a ud2a (BUG) */ 149 /* If there's no function, patch it with a ud2a (BUG) */
140 ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a)); 150 ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a));
141 else if (opfunc == paravirt_nop) 151 else if (opfunc == _paravirt_nop)
142 /* If the operation is a nop, then nop the callsite */ 152 /* If the operation is a nop, then nop the callsite */
143 ret = paravirt_patch_nop(); 153 ret = paravirt_patch_nop();
154
155 /* identity functions just return their single argument */
156 else if (opfunc == _paravirt_ident_32)
157 ret = paravirt_patch_ident_32(insnbuf, len);
158 else if (opfunc == _paravirt_ident_64)
159 ret = paravirt_patch_ident_64(insnbuf, len);
160
144 else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || 161 else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
145 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || 162 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
146 type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || 163 type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
@@ -318,10 +335,10 @@ struct pv_time_ops pv_time_ops = {
318 335
319struct pv_irq_ops pv_irq_ops = { 336struct pv_irq_ops pv_irq_ops = {
320 .init_IRQ = native_init_IRQ, 337 .init_IRQ = native_init_IRQ,
321 .save_fl = native_save_fl, 338 .save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
322 .restore_fl = native_restore_fl, 339 .restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl),
323 .irq_disable = native_irq_disable, 340 .irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable),
324 .irq_enable = native_irq_enable, 341 .irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
325 .safe_halt = native_safe_halt, 342 .safe_halt = native_safe_halt,
326 .halt = native_halt, 343 .halt = native_halt,
327#ifdef CONFIG_X86_64 344#ifdef CONFIG_X86_64
@@ -399,6 +416,14 @@ struct pv_apic_ops pv_apic_ops = {
399#endif 416#endif
400}; 417};
401 418
419#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
420/* 32-bit pagetable entries */
421#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_32)
422#else
423/* 64-bit pagetable entries */
424#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64)
425#endif
426
402struct pv_mmu_ops pv_mmu_ops = { 427struct pv_mmu_ops pv_mmu_ops = {
403#ifndef CONFIG_X86_64 428#ifndef CONFIG_X86_64
404 .pagetable_setup_start = native_pagetable_setup_start, 429 .pagetable_setup_start = native_pagetable_setup_start,
@@ -450,22 +475,23 @@ struct pv_mmu_ops pv_mmu_ops = {
450 .pmd_clear = native_pmd_clear, 475 .pmd_clear = native_pmd_clear,
451#endif 476#endif
452 .set_pud = native_set_pud, 477 .set_pud = native_set_pud,
453 .pmd_val = native_pmd_val, 478
454 .make_pmd = native_make_pmd, 479 .pmd_val = PTE_IDENT,
480 .make_pmd = PTE_IDENT,
455 481
456#if PAGETABLE_LEVELS == 4 482#if PAGETABLE_LEVELS == 4
457 .pud_val = native_pud_val, 483 .pud_val = PTE_IDENT,
458 .make_pud = native_make_pud, 484 .make_pud = PTE_IDENT,
485
459 .set_pgd = native_set_pgd, 486 .set_pgd = native_set_pgd,
460#endif 487#endif
461#endif /* PAGETABLE_LEVELS >= 3 */ 488#endif /* PAGETABLE_LEVELS >= 3 */
462 489
463 .pte_val = native_pte_val, 490 .pte_val = PTE_IDENT,
464 .pte_flags = native_pte_flags, 491 .pgd_val = PTE_IDENT,
465 .pgd_val = native_pgd_val,
466 492
467 .make_pte = native_make_pte, 493 .make_pte = PTE_IDENT,
468 .make_pgd = native_make_pgd, 494 .make_pgd = PTE_IDENT,
469 495
470 .dup_mmap = paravirt_nop, 496 .dup_mmap = paravirt_nop,
471 .exit_mmap = paravirt_nop, 497 .exit_mmap = paravirt_nop,
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index 9fe644f4861d..d9f32e6d6ab6 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -12,6 +12,18 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
12DEF_NATIVE(pv_cpu_ops, clts, "clts"); 12DEF_NATIVE(pv_cpu_ops, clts, "clts");
13DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); 13DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
14 14
15unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
16{
17 /* arg in %eax, return in %eax */
18 return 0;
19}
20
21unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
22{
23 /* arg in %edx:%eax, return in %edx:%eax */
24 return 0;
25}
26
15unsigned native_patch(u8 type, u16 clobbers, void *ibuf, 27unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
16 unsigned long addr, unsigned len) 28 unsigned long addr, unsigned len)
17{ 29{
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 061d01df9ae6..3f08f34f93eb 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -19,6 +19,21 @@ DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
19DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl"); 19DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl");
20DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); 20DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
21 21
22DEF_NATIVE(, mov32, "mov %edi, %eax");
23DEF_NATIVE(, mov64, "mov %rdi, %rax");
24
25unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
26{
27 return paravirt_patch_insns(insnbuf, len,
28 start__mov32, end__mov32);
29}
30
31unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
32{
33 return paravirt_patch_insns(insnbuf, len,
34 start__mov64, end__mov64);
35}
36
22unsigned native_patch(u8 type, u16 clobbers, void *ibuf, 37unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
23 unsigned long addr, unsigned len) 38 unsigned long addr, unsigned len)
24{ 39{
diff --git a/arch/x86/kernel/probe_roms_32.c b/arch/x86/kernel/probe_roms_32.c
index 675a48c404a5..071e7fea42e5 100644
--- a/arch/x86/kernel/probe_roms_32.c
+++ b/arch/x86/kernel/probe_roms_32.c
@@ -18,7 +18,7 @@
18#include <asm/setup.h> 18#include <asm/setup.h>
19#include <asm/sections.h> 19#include <asm/sections.h>
20#include <asm/io.h> 20#include <asm/io.h>
21#include <setup_arch.h> 21#include <asm/setup_arch.h>
22 22
23static struct resource system_rom_resource = { 23static struct resource system_rom_resource = {
24 .name = "System ROM", 24 .name = "System ROM",
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index 6d12f7e37f8c..87b69d4fac16 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -350,7 +350,7 @@ static void c1e_idle(void)
350 350
351void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) 351void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
352{ 352{
353#ifdef CONFIG_X86_SMP 353#ifdef CONFIG_SMP
354 if (pm_idle == poll_idle && smp_num_siblings > 1) { 354 if (pm_idle == poll_idle && smp_num_siblings > 1) {
355 printk(KERN_WARNING "WARNING: polling idle and HT enabled," 355 printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
356 " performance may degrade.\n"); 356 " performance may degrade.\n");
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index bd4da2af08ae..646da41a620a 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -11,6 +11,7 @@
11 11
12#include <stdarg.h> 12#include <stdarg.h>
13 13
14#include <linux/stackprotector.h>
14#include <linux/cpu.h> 15#include <linux/cpu.h>
15#include <linux/errno.h> 16#include <linux/errno.h>
16#include <linux/sched.h> 17#include <linux/sched.h>
@@ -66,9 +67,6 @@ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
66DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; 67DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
67EXPORT_PER_CPU_SYMBOL(current_task); 68EXPORT_PER_CPU_SYMBOL(current_task);
68 69
69DEFINE_PER_CPU(int, cpu_number);
70EXPORT_PER_CPU_SYMBOL(cpu_number);
71
72/* 70/*
73 * Return saved PC of a blocked thread. 71 * Return saved PC of a blocked thread.
74 */ 72 */
@@ -94,6 +92,15 @@ void cpu_idle(void)
94{ 92{
95 int cpu = smp_processor_id(); 93 int cpu = smp_processor_id();
96 94
95 /*
96 * If we're the non-boot CPU, nothing set the stack canary up
97 * for us. CPU0 already has it initialized but no harm in
98 * doing it again. This is a good place for updating it, as
99 * we wont ever return from this function (so the invalid
100 * canaries already on the stack wont ever trigger).
101 */
102 boot_init_stack_canary();
103
97 current_thread_info()->status |= TS_POLLING; 104 current_thread_info()->status |= TS_POLLING;
98 105
99 /* endless idle loop with no priority at all */ 106 /* endless idle loop with no priority at all */
@@ -108,7 +115,6 @@ void cpu_idle(void)
108 play_dead(); 115 play_dead();
109 116
110 local_irq_disable(); 117 local_irq_disable();
111 __get_cpu_var(irq_stat).idle_timestamp = jiffies;
112 /* Don't trace irqs off for idle */ 118 /* Don't trace irqs off for idle */
113 stop_critical_timings(); 119 stop_critical_timings();
114 pm_idle(); 120 pm_idle();
@@ -132,7 +138,7 @@ void __show_regs(struct pt_regs *regs, int all)
132 if (user_mode_vm(regs)) { 138 if (user_mode_vm(regs)) {
133 sp = regs->sp; 139 sp = regs->sp;
134 ss = regs->ss & 0xffff; 140 ss = regs->ss & 0xffff;
135 savesegment(gs, gs); 141 gs = get_user_gs(regs);
136 } else { 142 } else {
137 sp = (unsigned long) (&regs->sp); 143 sp = (unsigned long) (&regs->sp);
138 savesegment(ss, ss); 144 savesegment(ss, ss);
@@ -213,6 +219,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
213 regs.ds = __USER_DS; 219 regs.ds = __USER_DS;
214 regs.es = __USER_DS; 220 regs.es = __USER_DS;
215 regs.fs = __KERNEL_PERCPU; 221 regs.fs = __KERNEL_PERCPU;
222 regs.gs = __KERNEL_STACK_CANARY;
216 regs.orig_ax = -1; 223 regs.orig_ax = -1;
217 regs.ip = (unsigned long) kernel_thread_helper; 224 regs.ip = (unsigned long) kernel_thread_helper;
218 regs.cs = __KERNEL_CS | get_kernel_rpl(); 225 regs.cs = __KERNEL_CS | get_kernel_rpl();
@@ -305,7 +312,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
305 312
306 p->thread.ip = (unsigned long) ret_from_fork; 313 p->thread.ip = (unsigned long) ret_from_fork;
307 314
308 savesegment(gs, p->thread.gs); 315 task_user_gs(p) = get_user_gs(regs);
309 316
310 tsk = current; 317 tsk = current;
311 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { 318 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
@@ -343,7 +350,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
343void 350void
344start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) 351start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
345{ 352{
346 __asm__("movl %0, %%gs" : : "r"(0)); 353 set_user_gs(regs, 0);
347 regs->fs = 0; 354 regs->fs = 0;
348 set_fs(USER_DS); 355 set_fs(USER_DS);
349 regs->ds = __USER_DS; 356 regs->ds = __USER_DS;
@@ -540,7 +547,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
540 * used %fs or %gs (it does not today), or if the kernel is 547 * used %fs or %gs (it does not today), or if the kernel is
541 * running inside of a hypervisor layer. 548 * running inside of a hypervisor layer.
542 */ 549 */
543 savesegment(gs, prev->gs); 550 lazy_save_gs(prev->gs);
544 551
545 /* 552 /*
546 * Load the per-thread Thread-Local Storage descriptor. 553 * Load the per-thread Thread-Local Storage descriptor.
@@ -586,31 +593,31 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
586 * Restore %gs if needed (which is common) 593 * Restore %gs if needed (which is common)
587 */ 594 */
588 if (prev->gs | next->gs) 595 if (prev->gs | next->gs)
589 loadsegment(gs, next->gs); 596 lazy_load_gs(next->gs);
590 597
591 x86_write_percpu(current_task, next_p); 598 percpu_write(current_task, next_p);
592 599
593 return prev_p; 600 return prev_p;
594} 601}
595 602
596asmlinkage int sys_fork(struct pt_regs regs) 603int sys_fork(struct pt_regs *regs)
597{ 604{
598 return do_fork(SIGCHLD, regs.sp, &regs, 0, NULL, NULL); 605 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
599} 606}
600 607
601asmlinkage int sys_clone(struct pt_regs regs) 608int sys_clone(struct pt_regs *regs)
602{ 609{
603 unsigned long clone_flags; 610 unsigned long clone_flags;
604 unsigned long newsp; 611 unsigned long newsp;
605 int __user *parent_tidptr, *child_tidptr; 612 int __user *parent_tidptr, *child_tidptr;
606 613
607 clone_flags = regs.bx; 614 clone_flags = regs->bx;
608 newsp = regs.cx; 615 newsp = regs->cx;
609 parent_tidptr = (int __user *)regs.dx; 616 parent_tidptr = (int __user *)regs->dx;
610 child_tidptr = (int __user *)regs.di; 617 child_tidptr = (int __user *)regs->di;
611 if (!newsp) 618 if (!newsp)
612 newsp = regs.sp; 619 newsp = regs->sp;
613 return do_fork(clone_flags, newsp, &regs, 0, parent_tidptr, child_tidptr); 620 return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr);
614} 621}
615 622
616/* 623/*
@@ -623,27 +630,27 @@ asmlinkage int sys_clone(struct pt_regs regs)
623 * do not have enough call-clobbered registers to hold all 630 * do not have enough call-clobbered registers to hold all
624 * the information you need. 631 * the information you need.
625 */ 632 */
626asmlinkage int sys_vfork(struct pt_regs regs) 633int sys_vfork(struct pt_regs *regs)
627{ 634{
628 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.sp, &regs, 0, NULL, NULL); 635 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, NULL, NULL);
629} 636}
630 637
631/* 638/*
632 * sys_execve() executes a new program. 639 * sys_execve() executes a new program.
633 */ 640 */
634asmlinkage int sys_execve(struct pt_regs regs) 641int sys_execve(struct pt_regs *regs)
635{ 642{
636 int error; 643 int error;
637 char *filename; 644 char *filename;
638 645
639 filename = getname((char __user *) regs.bx); 646 filename = getname((char __user *) regs->bx);
640 error = PTR_ERR(filename); 647 error = PTR_ERR(filename);
641 if (IS_ERR(filename)) 648 if (IS_ERR(filename))
642 goto out; 649 goto out;
643 error = do_execve(filename, 650 error = do_execve(filename,
644 (char __user * __user *) regs.cx, 651 (char __user * __user *) regs->cx,
645 (char __user * __user *) regs.dx, 652 (char __user * __user *) regs->dx,
646 &regs); 653 regs);
647 if (error == 0) { 654 if (error == 0) {
648 /* Make sure we don't return using sysenter.. */ 655 /* Make sure we don't return using sysenter.. */
649 set_thread_flag(TIF_IRET); 656 set_thread_flag(TIF_IRET);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 85b4cb5c1980..836ef6575f01 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -16,6 +16,7 @@
16 16
17#include <stdarg.h> 17#include <stdarg.h>
18 18
19#include <linux/stackprotector.h>
19#include <linux/cpu.h> 20#include <linux/cpu.h>
20#include <linux/errno.h> 21#include <linux/errno.h>
21#include <linux/sched.h> 22#include <linux/sched.h>
@@ -47,7 +48,6 @@
47#include <asm/processor.h> 48#include <asm/processor.h>
48#include <asm/i387.h> 49#include <asm/i387.h>
49#include <asm/mmu_context.h> 50#include <asm/mmu_context.h>
50#include <asm/pda.h>
51#include <asm/prctl.h> 51#include <asm/prctl.h>
52#include <asm/desc.h> 52#include <asm/desc.h>
53#include <asm/proto.h> 53#include <asm/proto.h>
@@ -58,6 +58,12 @@
58 58
59asmlinkage extern void ret_from_fork(void); 59asmlinkage extern void ret_from_fork(void);
60 60
61DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
62EXPORT_PER_CPU_SYMBOL(current_task);
63
64DEFINE_PER_CPU(unsigned long, old_rsp);
65static DEFINE_PER_CPU(unsigned char, is_idle);
66
61unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; 67unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
62 68
63static ATOMIC_NOTIFIER_HEAD(idle_notifier); 69static ATOMIC_NOTIFIER_HEAD(idle_notifier);
@@ -76,13 +82,13 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister);
76 82
77void enter_idle(void) 83void enter_idle(void)
78{ 84{
79 write_pda(isidle, 1); 85 percpu_write(is_idle, 1);
80 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); 86 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
81} 87}
82 88
83static void __exit_idle(void) 89static void __exit_idle(void)
84{ 90{
85 if (test_and_clear_bit_pda(0, isidle) == 0) 91 if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
86 return; 92 return;
87 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); 93 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
88} 94}
@@ -112,6 +118,16 @@ static inline void play_dead(void)
112void cpu_idle(void) 118void cpu_idle(void)
113{ 119{
114 current_thread_info()->status |= TS_POLLING; 120 current_thread_info()->status |= TS_POLLING;
121
122 /*
123 * If we're the non-boot CPU, nothing set the stack canary up
124 * for us. CPU0 already has it initialized but no harm in
125 * doing it again. This is a good place for updating it, as
126 * we wont ever return from this function (so the invalid
127 * canaries already on the stack wont ever trigger).
128 */
129 boot_init_stack_canary();
130
115 /* endless idle loop with no priority at all */ 131 /* endless idle loop with no priority at all */
116 while (1) { 132 while (1) {
117 tick_nohz_stop_sched_tick(1); 133 tick_nohz_stop_sched_tick(1);
@@ -397,7 +413,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
397 load_gs_index(0); 413 load_gs_index(0);
398 regs->ip = new_ip; 414 regs->ip = new_ip;
399 regs->sp = new_sp; 415 regs->sp = new_sp;
400 write_pda(oldrsp, new_sp); 416 percpu_write(old_rsp, new_sp);
401 regs->cs = __USER_CS; 417 regs->cs = __USER_CS;
402 regs->ss = __USER_DS; 418 regs->ss = __USER_DS;
403 regs->flags = 0x200; 419 regs->flags = 0x200;
@@ -618,21 +634,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
618 /* 634 /*
619 * Switch the PDA and FPU contexts. 635 * Switch the PDA and FPU contexts.
620 */ 636 */
621 prev->usersp = read_pda(oldrsp); 637 prev->usersp = percpu_read(old_rsp);
622 write_pda(oldrsp, next->usersp); 638 percpu_write(old_rsp, next->usersp);
623 write_pda(pcurrent, next_p); 639 percpu_write(current_task, next_p);
624 640
625 write_pda(kernelstack, 641 percpu_write(kernel_stack,
626 (unsigned long)task_stack_page(next_p) + 642 (unsigned long)task_stack_page(next_p) +
627 THREAD_SIZE - PDA_STACKOFFSET); 643 THREAD_SIZE - KERNEL_STACK_OFFSET);
628#ifdef CONFIG_CC_STACKPROTECTOR
629 write_pda(stack_canary, next_p->stack_canary);
630 /*
631 * Build time only check to make sure the stack_canary is at
632 * offset 40 in the pda; this is a gcc ABI requirement
633 */
634 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
635#endif
636 644
637 /* 645 /*
638 * Now maybe reload the debug registers and handle I/O bitmaps 646 * Now maybe reload the debug registers and handle I/O bitmaps
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 5a4c23d89892..fb2159a5c817 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -75,10 +75,7 @@ static inline bool invalid_selector(u16 value)
75static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) 75static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
76{ 76{
77 BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); 77 BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
78 regno >>= 2; 78 return &regs->bx + (regno >> 2);
79 if (regno > FS)
80 --regno;
81 return &regs->bx + regno;
82} 79}
83 80
84static u16 get_segment_reg(struct task_struct *task, unsigned long offset) 81static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
@@ -90,9 +87,10 @@ static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
90 if (offset != offsetof(struct user_regs_struct, gs)) 87 if (offset != offsetof(struct user_regs_struct, gs))
91 retval = *pt_regs_access(task_pt_regs(task), offset); 88 retval = *pt_regs_access(task_pt_regs(task), offset);
92 else { 89 else {
93 retval = task->thread.gs;
94 if (task == current) 90 if (task == current)
95 savesegment(gs, retval); 91 retval = get_user_gs(task_pt_regs(task));
92 else
93 retval = task_user_gs(task);
96 } 94 }
97 return retval; 95 return retval;
98} 96}
@@ -126,13 +124,10 @@ static int set_segment_reg(struct task_struct *task,
126 break; 124 break;
127 125
128 case offsetof(struct user_regs_struct, gs): 126 case offsetof(struct user_regs_struct, gs):
129 task->thread.gs = value;
130 if (task == current) 127 if (task == current)
131 /* 128 set_user_gs(task_pt_regs(task), value);
132 * The user-mode %gs is not affected by 129 else
133 * kernel entry, so we must update the CPU. 130 task_user_gs(task) = value;
134 */
135 loadsegment(gs, value);
136 } 131 }
137 132
138 return 0; 133 return 0;
@@ -273,7 +268,7 @@ static unsigned long debugreg_addr_limit(struct task_struct *task)
273 if (test_tsk_thread_flag(task, TIF_IA32)) 268 if (test_tsk_thread_flag(task, TIF_IA32))
274 return IA32_PAGE_OFFSET - 3; 269 return IA32_PAGE_OFFSET - 3;
275#endif 270#endif
276 return TASK_SIZE64 - 7; 271 return TASK_SIZE_MAX - 7;
277} 272}
278 273
279#endif /* CONFIG_X86_32 */ 274#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 2b46eb41643b..1cc18d439bbb 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -14,6 +14,7 @@
14#include <asm/reboot.h> 14#include <asm/reboot.h>
15#include <asm/pci_x86.h> 15#include <asm/pci_x86.h>
16#include <asm/virtext.h> 16#include <asm/virtext.h>
17#include <asm/cpu.h>
17 18
18#ifdef CONFIG_X86_32 19#ifdef CONFIG_X86_32
19# include <linux/dmi.h> 20# include <linux/dmi.h>
@@ -23,8 +24,6 @@
23# include <asm/iommu.h> 24# include <asm/iommu.h>
24#endif 25#endif
25 26
26#include <mach_ipi.h>
27
28/* 27/*
29 * Power off function, if any 28 * Power off function, if any
30 */ 29 */
@@ -650,7 +649,7 @@ static int crash_nmi_callback(struct notifier_block *self,
650 649
651static void smp_send_nmi_allbutself(void) 650static void smp_send_nmi_allbutself(void)
652{ 651{
653 send_IPI_allbutself(NMI_VECTOR); 652 apic->send_IPI_allbutself(NMI_VECTOR);
654} 653}
655 654
656static struct notifier_block crash_nmi_nb = { 655static struct notifier_block crash_nmi_nb = {
diff --git a/arch/x86/kernel/relocate_kernel_32.S b/arch/x86/kernel/relocate_kernel_32.S
index a160f3119725..2064d0aa8d28 100644
--- a/arch/x86/kernel/relocate_kernel_32.S
+++ b/arch/x86/kernel/relocate_kernel_32.S
@@ -7,7 +7,7 @@
7 */ 7 */
8 8
9#include <linux/linkage.h> 9#include <linux/linkage.h>
10#include <asm/page.h> 10#include <asm/page_types.h>
11#include <asm/kexec.h> 11#include <asm/kexec.h>
12#include <asm/processor-flags.h> 12#include <asm/processor-flags.h>
13 13
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index f5afe665a82b..d32cfb27a479 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -7,10 +7,10 @@
7 */ 7 */
8 8
9#include <linux/linkage.h> 9#include <linux/linkage.h>
10#include <asm/page.h> 10#include <asm/page_types.h>
11#include <asm/kexec.h> 11#include <asm/kexec.h>
12#include <asm/processor-flags.h> 12#include <asm/processor-flags.h>
13#include <asm/pgtable.h> 13#include <asm/pgtable_types.h>
14 14
15/* 15/*
16 * Must be relocatable PIC code callable as a C function 16 * Must be relocatable PIC code callable as a C function
@@ -29,122 +29,6 @@ relocate_kernel:
29 * %rdx start address 29 * %rdx start address
30 */ 30 */
31 31
32 /* map the control page at its virtual address */
33
34 movq $0x0000ff8000000000, %r10 /* mask */
35 mov $(39 - 3), %cl /* bits to shift */
36 movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
37
38 movq %r11, %r9
39 andq %r10, %r9
40 shrq %cl, %r9
41
42 movq PTR(VA_PGD)(%rsi), %r8
43 addq %r8, %r9
44 movq PTR(PA_PUD_0)(%rsi), %r8
45 orq $PAGE_ATTR, %r8
46 movq %r8, (%r9)
47
48 shrq $9, %r10
49 sub $9, %cl
50
51 movq %r11, %r9
52 andq %r10, %r9
53 shrq %cl, %r9
54
55 movq PTR(VA_PUD_0)(%rsi), %r8
56 addq %r8, %r9
57 movq PTR(PA_PMD_0)(%rsi), %r8
58 orq $PAGE_ATTR, %r8
59 movq %r8, (%r9)
60
61 shrq $9, %r10
62 sub $9, %cl
63
64 movq %r11, %r9
65 andq %r10, %r9
66 shrq %cl, %r9
67
68 movq PTR(VA_PMD_0)(%rsi), %r8
69 addq %r8, %r9
70 movq PTR(PA_PTE_0)(%rsi), %r8
71 orq $PAGE_ATTR, %r8
72 movq %r8, (%r9)
73
74 shrq $9, %r10
75 sub $9, %cl
76
77 movq %r11, %r9
78 andq %r10, %r9
79 shrq %cl, %r9
80
81 movq PTR(VA_PTE_0)(%rsi), %r8
82 addq %r8, %r9
83 movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
84 orq $PAGE_ATTR, %r8
85 movq %r8, (%r9)
86
87 /* identity map the control page at its physical address */
88
89 movq $0x0000ff8000000000, %r10 /* mask */
90 mov $(39 - 3), %cl /* bits to shift */
91 movq PTR(PA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
92
93 movq %r11, %r9
94 andq %r10, %r9
95 shrq %cl, %r9
96
97 movq PTR(VA_PGD)(%rsi), %r8
98 addq %r8, %r9
99 movq PTR(PA_PUD_1)(%rsi), %r8
100 orq $PAGE_ATTR, %r8
101 movq %r8, (%r9)
102
103 shrq $9, %r10
104 sub $9, %cl
105
106 movq %r11, %r9
107 andq %r10, %r9
108 shrq %cl, %r9
109
110 movq PTR(VA_PUD_1)(%rsi), %r8
111 addq %r8, %r9
112 movq PTR(PA_PMD_1)(%rsi), %r8
113 orq $PAGE_ATTR, %r8
114 movq %r8, (%r9)
115
116 shrq $9, %r10
117 sub $9, %cl
118
119 movq %r11, %r9
120 andq %r10, %r9
121 shrq %cl, %r9
122
123 movq PTR(VA_PMD_1)(%rsi), %r8
124 addq %r8, %r9
125 movq PTR(PA_PTE_1)(%rsi), %r8
126 orq $PAGE_ATTR, %r8
127 movq %r8, (%r9)
128
129 shrq $9, %r10
130 sub $9, %cl
131
132 movq %r11, %r9
133 andq %r10, %r9
134 shrq %cl, %r9
135
136 movq PTR(VA_PTE_1)(%rsi), %r8
137 addq %r8, %r9
138 movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
139 orq $PAGE_ATTR, %r8
140 movq %r8, (%r9)
141
142relocate_new_kernel:
143 /* %rdi indirection_page
144 * %rsi page_list
145 * %rdx start address
146 */
147
148 /* zero out flags, and disable interrupts */ 32 /* zero out flags, and disable interrupts */
149 pushq $0 33 pushq $0
150 popfq 34 popfq
@@ -156,9 +40,8 @@ relocate_new_kernel:
156 /* get physical address of page table now too */ 40 /* get physical address of page table now too */
157 movq PTR(PA_TABLE_PAGE)(%rsi), %rcx 41 movq PTR(PA_TABLE_PAGE)(%rsi), %rcx
158 42
159 /* switch to new set of page tables */ 43 /* Switch to the identity mapped page tables */
160 movq PTR(PA_PGD)(%rsi), %r9 44 movq %rcx, %cr3
161 movq %r9, %cr3
162 45
163 /* setup a new stack at the end of the physical control page */ 46 /* setup a new stack at the end of the physical control page */
164 lea PAGE_SIZE(%r8), %rsp 47 lea PAGE_SIZE(%r8), %rsp
@@ -194,9 +77,7 @@ identity_mapped:
194 jmp 1f 77 jmp 1f
1951: 781:
196 79
197 /* Switch to the identity mapped page tables, 80 /* Flush the TLB (needed?) */
198 * and flush the TLB.
199 */
200 movq %rcx, %cr3 81 movq %rcx, %cr3
201 82
202 /* Do the copies */ 83 /* Do the copies */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index c461f6d69074..5b85759e7972 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -74,14 +74,15 @@
74#include <asm/e820.h> 74#include <asm/e820.h>
75#include <asm/mpspec.h> 75#include <asm/mpspec.h>
76#include <asm/setup.h> 76#include <asm/setup.h>
77#include <asm/arch_hooks.h>
78#include <asm/efi.h> 77#include <asm/efi.h>
78#include <asm/timer.h>
79#include <asm/i8259.h>
79#include <asm/sections.h> 80#include <asm/sections.h>
80#include <asm/dmi.h> 81#include <asm/dmi.h>
81#include <asm/io_apic.h> 82#include <asm/io_apic.h>
82#include <asm/ist.h> 83#include <asm/ist.h>
83#include <asm/vmi.h> 84#include <asm/vmi.h>
84#include <setup_arch.h> 85#include <asm/setup_arch.h>
85#include <asm/bios_ebda.h> 86#include <asm/bios_ebda.h>
86#include <asm/cacheflush.h> 87#include <asm/cacheflush.h>
87#include <asm/processor.h> 88#include <asm/processor.h>
@@ -89,7 +90,7 @@
89 90
90#include <asm/system.h> 91#include <asm/system.h>
91#include <asm/vsyscall.h> 92#include <asm/vsyscall.h>
92#include <asm/smp.h> 93#include <asm/cpu.h>
93#include <asm/desc.h> 94#include <asm/desc.h>
94#include <asm/dma.h> 95#include <asm/dma.h>
95#include <asm/iommu.h> 96#include <asm/iommu.h>
@@ -97,7 +98,6 @@
97#include <asm/mmu_context.h> 98#include <asm/mmu_context.h>
98#include <asm/proto.h> 99#include <asm/proto.h>
99 100
100#include <mach_apic.h>
101#include <asm/paravirt.h> 101#include <asm/paravirt.h>
102#include <asm/hypervisor.h> 102#include <asm/hypervisor.h>
103 103
@@ -112,6 +112,20 @@
112#define ARCH_SETUP 112#define ARCH_SETUP
113#endif 113#endif
114 114
115unsigned int boot_cpu_id __read_mostly;
116
117#ifdef CONFIG_X86_64
118int default_cpu_present_to_apicid(int mps_cpu)
119{
120 return __default_cpu_present_to_apicid(mps_cpu);
121}
122
123int default_check_phys_apicid_present(int boot_cpu_physical_apicid)
124{
125 return __default_check_phys_apicid_present(boot_cpu_physical_apicid);
126}
127#endif
128
115#ifndef CONFIG_DEBUG_BOOT_PARAMS 129#ifndef CONFIG_DEBUG_BOOT_PARAMS
116struct boot_params __initdata boot_params; 130struct boot_params __initdata boot_params;
117#else 131#else
@@ -586,19 +600,18 @@ static int __init setup_elfcorehdr(char *arg)
586early_param("elfcorehdr", setup_elfcorehdr); 600early_param("elfcorehdr", setup_elfcorehdr);
587#endif 601#endif
588 602
589static int __init default_update_genapic(void) 603static int __init default_update_apic(void)
590{ 604{
591#ifdef CONFIG_X86_SMP 605#ifdef CONFIG_SMP
592# if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64) 606 if (!apic->wakeup_cpu)
593 genapic->wakeup_cpu = wakeup_secondary_cpu_via_init; 607 apic->wakeup_cpu = wakeup_secondary_cpu_via_init;
594# endif
595#endif 608#endif
596 609
597 return 0; 610 return 0;
598} 611}
599 612
600static struct x86_quirks default_x86_quirks __initdata = { 613static struct x86_quirks default_x86_quirks __initdata = {
601 .update_genapic = default_update_genapic, 614 .update_apic = default_update_apic,
602}; 615};
603 616
604struct x86_quirks *x86_quirks __initdata = &default_x86_quirks; 617struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
@@ -656,7 +669,6 @@ void __init setup_arch(char **cmdline_p)
656#ifdef CONFIG_X86_32 669#ifdef CONFIG_X86_32
657 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data)); 670 memcpy(&boot_cpu_data, &new_cpu_data, sizeof(new_cpu_data));
658 visws_early_detect(); 671 visws_early_detect();
659 pre_setup_arch_hook();
660#else 672#else
661 printk(KERN_INFO "Command line: %s\n", boot_command_line); 673 printk(KERN_INFO "Command line: %s\n", boot_command_line);
662#endif 674#endif
@@ -823,8 +835,7 @@ void __init setup_arch(char **cmdline_p)
823#else 835#else
824 num_physpages = max_pfn; 836 num_physpages = max_pfn;
825 837
826 if (cpu_has_x2apic) 838 check_x2apic();
827 check_x2apic();
828 839
829 /* How many end-of-memory variables you have, grandma! */ 840 /* How many end-of-memory variables you have, grandma! */
830 /* need this before calling reserve_initrd */ 841 /* need this before calling reserve_initrd */
@@ -892,12 +903,11 @@ void __init setup_arch(char **cmdline_p)
892 */ 903 */
893 acpi_reserve_bootmem(); 904 acpi_reserve_bootmem();
894#endif 905#endif
895#ifdef CONFIG_X86_FIND_SMP_CONFIG
896 /* 906 /*
897 * Find and reserve possible boot-time SMP configuration: 907 * Find and reserve possible boot-time SMP configuration:
898 */ 908 */
899 find_smp_config(); 909 find_smp_config();
900#endif 910
901 reserve_crashkernel(); 911 reserve_crashkernel();
902 912
903#ifdef CONFIG_X86_64 913#ifdef CONFIG_X86_64
@@ -924,9 +934,7 @@ void __init setup_arch(char **cmdline_p)
924 map_vsyscall(); 934 map_vsyscall();
925#endif 935#endif
926 936
927#ifdef CONFIG_X86_GENERICARCH
928 generic_apic_probe(); 937 generic_apic_probe();
929#endif
930 938
931 early_quirks(); 939 early_quirks();
932 940
@@ -977,4 +985,95 @@ void __init setup_arch(char **cmdline_p)
977#endif 985#endif
978} 986}
979 987
988#ifdef CONFIG_X86_32
989
990/**
991 * x86_quirk_pre_intr_init - initialisation prior to setting up interrupt vectors
992 *
993 * Description:
994 * Perform any necessary interrupt initialisation prior to setting up
995 * the "ordinary" interrupt call gates. For legacy reasons, the ISA
996 * interrupts should be initialised here if the machine emulates a PC
997 * in any way.
998 **/
999void __init x86_quirk_pre_intr_init(void)
1000{
1001 if (x86_quirks->arch_pre_intr_init) {
1002 if (x86_quirks->arch_pre_intr_init())
1003 return;
1004 }
1005 init_ISA_irqs();
1006}
1007
1008/**
1009 * x86_quirk_intr_init - post gate setup interrupt initialisation
1010 *
1011 * Description:
1012 * Fill in any interrupts that may have been left out by the general
1013 * init_IRQ() routine. interrupts having to do with the machine rather
1014 * than the devices on the I/O bus (like APIC interrupts in intel MP
1015 * systems) are started here.
1016 **/
1017void __init x86_quirk_intr_init(void)
1018{
1019 if (x86_quirks->arch_intr_init) {
1020 if (x86_quirks->arch_intr_init())
1021 return;
1022 }
1023}
1024
1025/**
1026 * x86_quirk_trap_init - initialise system specific traps
1027 *
1028 * Description:
1029 * Called as the final act of trap_init(). Used in VISWS to initialise
1030 * the various board specific APIC traps.
1031 **/
1032void __init x86_quirk_trap_init(void)
1033{
1034 if (x86_quirks->arch_trap_init) {
1035 if (x86_quirks->arch_trap_init())
1036 return;
1037 }
1038}
1039
1040static struct irqaction irq0 = {
1041 .handler = timer_interrupt,
1042 .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL | IRQF_TIMER,
1043 .mask = CPU_MASK_NONE,
1044 .name = "timer"
1045};
980 1046
1047/**
1048 * x86_quirk_pre_time_init - do any specific initialisations before.
1049 *
1050 **/
1051void __init x86_quirk_pre_time_init(void)
1052{
1053 if (x86_quirks->arch_pre_time_init)
1054 x86_quirks->arch_pre_time_init();
1055}
1056
1057/**
1058 * x86_quirk_time_init - do any specific initialisations for the system timer.
1059 *
1060 * Description:
1061 * Must plug the system timer interrupt source at HZ into the IRQ listed
1062 * in irq_vectors.h:TIMER_IRQ
1063 **/
1064void __init x86_quirk_time_init(void)
1065{
1066 if (x86_quirks->arch_time_init) {
1067 /*
1068 * A nonzero return code does not mean failure, it means
1069 * that the architecture quirk does not want any
1070 * generic (timer) setup to be performed after this:
1071 */
1072 if (x86_quirks->arch_time_init())
1073 return;
1074 }
1075
1076 irq0.mask = cpumask_of_cpu(0);
1077 setup_irq(0, &irq0);
1078}
1079#endif /* CONFIG_X86_32 */
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 01161077a49c..d992e6cff730 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -13,145 +13,47 @@
13#include <asm/mpspec.h> 13#include <asm/mpspec.h>
14#include <asm/apicdef.h> 14#include <asm/apicdef.h>
15#include <asm/highmem.h> 15#include <asm/highmem.h>
16#include <asm/proto.h>
17#include <asm/cpumask.h>
18#include <asm/cpu.h>
19#include <asm/stackprotector.h>
16 20
17#ifdef CONFIG_X86_LOCAL_APIC 21#ifdef CONFIG_DEBUG_PER_CPU_MAPS
18unsigned int num_processors; 22# define DBG(x...) printk(KERN_DEBUG x)
19unsigned disabled_cpus __cpuinitdata;
20/* Processor that is doing the boot up */
21unsigned int boot_cpu_physical_apicid = -1U;
22EXPORT_SYMBOL(boot_cpu_physical_apicid);
23unsigned int max_physical_apicid;
24
25/* Bitmask of physically existing CPUs */
26physid_mask_t phys_cpu_present_map;
27#endif
28
29/* map cpu index to physical APIC ID */
30DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
31DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
32EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
33EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
34
35#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
36#define X86_64_NUMA 1
37
38/* map cpu index to node index */
39DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
40EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
41
42/* which logical CPUs are on which nodes */
43cpumask_t *node_to_cpumask_map;
44EXPORT_SYMBOL(node_to_cpumask_map);
45
46/* setup node_to_cpumask_map */
47static void __init setup_node_to_cpumask_map(void);
48
49#else 23#else
50static inline void setup_node_to_cpumask_map(void) { } 24# define DBG(x...)
51#endif 25#endif
52 26
53#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) 27DEFINE_PER_CPU(int, cpu_number);
54/* 28EXPORT_PER_CPU_SYMBOL(cpu_number);
55 * Copy data used in early init routines from the initial arrays to the
56 * per cpu data areas. These arrays then become expendable and the
57 * *_early_ptr's are zeroed indicating that the static arrays are gone.
58 */
59static void __init setup_per_cpu_maps(void)
60{
61 int cpu;
62 29
63 for_each_possible_cpu(cpu) { 30#ifdef CONFIG_X86_64
64 per_cpu(x86_cpu_to_apicid, cpu) = 31#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load)
65 early_per_cpu_map(x86_cpu_to_apicid, cpu); 32#else
66 per_cpu(x86_bios_cpu_apicid, cpu) = 33#define BOOT_PERCPU_OFFSET 0
67 early_per_cpu_map(x86_bios_cpu_apicid, cpu);
68#ifdef X86_64_NUMA
69 per_cpu(x86_cpu_to_node_map, cpu) =
70 early_per_cpu_map(x86_cpu_to_node_map, cpu);
71#endif 34#endif
72 }
73 35
74 /* indicate the early static arrays will soon be gone */ 36DEFINE_PER_CPU(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
75 early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; 37EXPORT_PER_CPU_SYMBOL(this_cpu_off);
76 early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
77#ifdef X86_64_NUMA
78 early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
79#endif
80}
81 38
82#ifdef CONFIG_X86_32 39unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
83/* 40 [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET,
84 * Great future not-so-futuristic plan: make i386 and x86_64 do it 41};
85 * the same way
86 */
87unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
88EXPORT_SYMBOL(__per_cpu_offset); 42EXPORT_SYMBOL(__per_cpu_offset);
89static inline void setup_cpu_pda_map(void) { }
90
91#elif !defined(CONFIG_SMP)
92static inline void setup_cpu_pda_map(void) { }
93
94#else /* CONFIG_SMP && CONFIG_X86_64 */
95
96/*
97 * Allocate cpu_pda pointer table and array via alloc_bootmem.
98 */
99static void __init setup_cpu_pda_map(void)
100{
101 char *pda;
102 struct x8664_pda **new_cpu_pda;
103 unsigned long size;
104 int cpu;
105
106 size = roundup(sizeof(struct x8664_pda), cache_line_size());
107
108 /* allocate cpu_pda array and pointer table */
109 {
110 unsigned long tsize = nr_cpu_ids * sizeof(void *);
111 unsigned long asize = size * (nr_cpu_ids - 1);
112 43
113 tsize = roundup(tsize, cache_line_size()); 44static inline void setup_percpu_segment(int cpu)
114 new_cpu_pda = alloc_bootmem(tsize + asize);
115 pda = (char *)new_cpu_pda + tsize;
116 }
117
118 /* initialize pointer table to static pda's */
119 for_each_possible_cpu(cpu) {
120 if (cpu == 0) {
121 /* leave boot cpu pda in place */
122 new_cpu_pda[0] = cpu_pda(0);
123 continue;
124 }
125 new_cpu_pda[cpu] = (struct x8664_pda *)pda;
126 new_cpu_pda[cpu]->in_bootmem = 1;
127 pda += size;
128 }
129
130 /* point to new pointer table */
131 _cpu_pda = new_cpu_pda;
132}
133
134#endif /* CONFIG_SMP && CONFIG_X86_64 */
135
136#ifdef CONFIG_X86_64
137
138/* correctly size the local cpu masks */
139static void __init setup_cpu_local_masks(void)
140{ 45{
141 alloc_bootmem_cpumask_var(&cpu_initialized_mask); 46#ifdef CONFIG_X86_32
142 alloc_bootmem_cpumask_var(&cpu_callin_mask); 47 struct desc_struct gdt;
143 alloc_bootmem_cpumask_var(&cpu_callout_mask);
144 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
145}
146
147#else /* CONFIG_X86_32 */
148 48
149static inline void setup_cpu_local_masks(void) 49 pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF,
150{ 50 0x2 | DESCTYPE_S, 0x8);
51 gdt.s = 1;
52 write_gdt_entry(get_cpu_gdt_table(cpu),
53 GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
54#endif
151} 55}
152 56
153#endif /* CONFIG_X86_32 */
154
155/* 57/*
156 * Great future plan: 58 * Great future plan:
157 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. 59 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
@@ -159,18 +61,12 @@ static inline void setup_cpu_local_masks(void)
159 */ 61 */
160void __init setup_per_cpu_areas(void) 62void __init setup_per_cpu_areas(void)
161{ 63{
162 ssize_t size, old_size; 64 ssize_t size;
163 char *ptr; 65 char *ptr;
164 int cpu; 66 int cpu;
165 unsigned long align = 1;
166
167 /* Setup cpu_pda map */
168 setup_cpu_pda_map();
169 67
170 /* Copy section for each CPU (we discard the original) */ 68 /* Copy section for each CPU (we discard the original) */
171 old_size = PERCPU_ENOUGH_ROOM; 69 size = roundup(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
172 align = max_t(unsigned long, PAGE_SIZE, align);
173 size = roundup(old_size, align);
174 70
175 pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", 71 pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
176 NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); 72 NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
@@ -179,30 +75,68 @@ void __init setup_per_cpu_areas(void)
179 75
180 for_each_possible_cpu(cpu) { 76 for_each_possible_cpu(cpu) {
181#ifndef CONFIG_NEED_MULTIPLE_NODES 77#ifndef CONFIG_NEED_MULTIPLE_NODES
182 ptr = __alloc_bootmem(size, align, 78 ptr = alloc_bootmem_pages(size);
183 __pa(MAX_DMA_ADDRESS));
184#else 79#else
185 int node = early_cpu_to_node(cpu); 80 int node = early_cpu_to_node(cpu);
186 if (!node_online(node) || !NODE_DATA(node)) { 81 if (!node_online(node) || !NODE_DATA(node)) {
187 ptr = __alloc_bootmem(size, align, 82 ptr = alloc_bootmem_pages(size);
188 __pa(MAX_DMA_ADDRESS));
189 pr_info("cpu %d has no node %d or node-local memory\n", 83 pr_info("cpu %d has no node %d or node-local memory\n",
190 cpu, node); 84 cpu, node);
191 pr_debug("per cpu data for cpu%d at %016lx\n", 85 pr_debug("per cpu data for cpu%d at %016lx\n",
192 cpu, __pa(ptr)); 86 cpu, __pa(ptr));
193 } else { 87 } else {
194 ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, 88 ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
195 __pa(MAX_DMA_ADDRESS));
196 pr_debug("per cpu data for cpu%d on node%d at %016lx\n", 89 pr_debug("per cpu data for cpu%d on node%d at %016lx\n",
197 cpu, node, __pa(ptr)); 90 cpu, node, __pa(ptr));
198 } 91 }
199#endif 92#endif
93
94 memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);
200 per_cpu_offset(cpu) = ptr - __per_cpu_start; 95 per_cpu_offset(cpu) = ptr - __per_cpu_start;
201 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 96 per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
97 per_cpu(cpu_number, cpu) = cpu;
98 setup_percpu_segment(cpu);
99 setup_stack_canary_segment(cpu);
100 /*
101 * Copy data used in early init routines from the
102 * initial arrays to the per cpu data areas. These
103 * arrays then become expendable and the *_early_ptr's
104 * are zeroed indicating that the static arrays are
105 * gone.
106 */
107#ifdef CONFIG_X86_LOCAL_APIC
108 per_cpu(x86_cpu_to_apicid, cpu) =
109 early_per_cpu_map(x86_cpu_to_apicid, cpu);
110 per_cpu(x86_bios_cpu_apicid, cpu) =
111 early_per_cpu_map(x86_bios_cpu_apicid, cpu);
112#endif
113#ifdef CONFIG_X86_64
114 per_cpu(irq_stack_ptr, cpu) =
115 per_cpu(irq_stack_union.irq_stack, cpu) +
116 IRQ_STACK_SIZE - 64;
117#ifdef CONFIG_NUMA
118 per_cpu(x86_cpu_to_node_map, cpu) =
119 early_per_cpu_map(x86_cpu_to_node_map, cpu);
120#endif
121#endif
122 /*
123 * Up to this point, the boot CPU has been using .data.init
124 * area. Reload any changed state for the boot CPU.
125 */
126 if (cpu == boot_cpu_id)
127 switch_to_new_gdt(cpu);
128
129 DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
202 } 130 }
203 131
204 /* Setup percpu data maps */ 132 /* indicate the early static arrays will soon be gone */
205 setup_per_cpu_maps(); 133#ifdef CONFIG_X86_LOCAL_APIC
134 early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
135 early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
136#endif
137#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
138 early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
139#endif
206 140
207 /* Setup node to cpumask map */ 141 /* Setup node to cpumask map */
208 setup_node_to_cpumask_map(); 142 setup_node_to_cpumask_map();
@@ -210,199 +144,3 @@ void __init setup_per_cpu_areas(void)
210 /* Setup cpu initialized, callin, callout masks */ 144 /* Setup cpu initialized, callin, callout masks */
211 setup_cpu_local_masks(); 145 setup_cpu_local_masks();
212} 146}
213
214#endif
215
216#ifdef X86_64_NUMA
217
218/*
219 * Allocate node_to_cpumask_map based on number of available nodes
220 * Requires node_possible_map to be valid.
221 *
222 * Note: node_to_cpumask() is not valid until after this is done.
223 */
224static void __init setup_node_to_cpumask_map(void)
225{
226 unsigned int node, num = 0;
227 cpumask_t *map;
228
229 /* setup nr_node_ids if not done yet */
230 if (nr_node_ids == MAX_NUMNODES) {
231 for_each_node_mask(node, node_possible_map)
232 num = node;
233 nr_node_ids = num + 1;
234 }
235
236 /* allocate the map */
237 map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
238
239 pr_debug("Node to cpumask map at %p for %d nodes\n",
240 map, nr_node_ids);
241
242 /* node_to_cpumask() will now work */
243 node_to_cpumask_map = map;
244}
245
246void __cpuinit numa_set_node(int cpu, int node)
247{
248 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
249
250 if (cpu_pda(cpu) && node != NUMA_NO_NODE)
251 cpu_pda(cpu)->nodenumber = node;
252
253 if (cpu_to_node_map)
254 cpu_to_node_map[cpu] = node;
255
256 else if (per_cpu_offset(cpu))
257 per_cpu(x86_cpu_to_node_map, cpu) = node;
258
259 else
260 pr_debug("Setting node for non-present cpu %d\n", cpu);
261}
262
263void __cpuinit numa_clear_node(int cpu)
264{
265 numa_set_node(cpu, NUMA_NO_NODE);
266}
267
268#ifndef CONFIG_DEBUG_PER_CPU_MAPS
269
270void __cpuinit numa_add_cpu(int cpu)
271{
272 cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
273}
274
275void __cpuinit numa_remove_cpu(int cpu)
276{
277 cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]);
278}
279
280#else /* CONFIG_DEBUG_PER_CPU_MAPS */
281
282/*
283 * --------- debug versions of the numa functions ---------
284 */
285static void __cpuinit numa_set_cpumask(int cpu, int enable)
286{
287 int node = cpu_to_node(cpu);
288 cpumask_t *mask;
289 char buf[64];
290
291 if (node_to_cpumask_map == NULL) {
292 printk(KERN_ERR "node_to_cpumask_map NULL\n");
293 dump_stack();
294 return;
295 }
296
297 mask = &node_to_cpumask_map[node];
298 if (enable)
299 cpu_set(cpu, *mask);
300 else
301 cpu_clear(cpu, *mask);
302
303 cpulist_scnprintf(buf, sizeof(buf), mask);
304 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
305 enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);
306}
307
308void __cpuinit numa_add_cpu(int cpu)
309{
310 numa_set_cpumask(cpu, 1);
311}
312
313void __cpuinit numa_remove_cpu(int cpu)
314{
315 numa_set_cpumask(cpu, 0);
316}
317
318int cpu_to_node(int cpu)
319{
320 if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
321 printk(KERN_WARNING
322 "cpu_to_node(%d): usage too early!\n", cpu);
323 dump_stack();
324 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
325 }
326 return per_cpu(x86_cpu_to_node_map, cpu);
327}
328EXPORT_SYMBOL(cpu_to_node);
329
330/*
331 * Same function as cpu_to_node() but used if called before the
332 * per_cpu areas are setup.
333 */
334int early_cpu_to_node(int cpu)
335{
336 if (early_per_cpu_ptr(x86_cpu_to_node_map))
337 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
338
339 if (!per_cpu_offset(cpu)) {
340 printk(KERN_WARNING
341 "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
342 dump_stack();
343 return NUMA_NO_NODE;
344 }
345 return per_cpu(x86_cpu_to_node_map, cpu);
346}
347
348
349/* empty cpumask */
350static const cpumask_t cpu_mask_none;
351
352/*
353 * Returns a pointer to the bitmask of CPUs on Node 'node'.
354 */
355const cpumask_t *cpumask_of_node(int node)
356{
357 if (node_to_cpumask_map == NULL) {
358 printk(KERN_WARNING
359 "cpumask_of_node(%d): no node_to_cpumask_map!\n",
360 node);
361 dump_stack();
362 return (const cpumask_t *)&cpu_online_map;
363 }
364 if (node >= nr_node_ids) {
365 printk(KERN_WARNING
366 "cpumask_of_node(%d): node > nr_node_ids(%d)\n",
367 node, nr_node_ids);
368 dump_stack();
369 return &cpu_mask_none;
370 }
371 return &node_to_cpumask_map[node];
372}
373EXPORT_SYMBOL(cpumask_of_node);
374
375/*
376 * Returns a bitmask of CPUs on Node 'node'.
377 *
378 * Side note: this function creates the returned cpumask on the stack
379 * so with a high NR_CPUS count, excessive stack space is used. The
380 * node_to_cpumask_ptr function should be used whenever possible.
381 */
382cpumask_t node_to_cpumask(int node)
383{
384 if (node_to_cpumask_map == NULL) {
385 printk(KERN_WARNING
386 "node_to_cpumask(%d): no node_to_cpumask_map!\n", node);
387 dump_stack();
388 return cpu_online_map;
389 }
390 if (node >= nr_node_ids) {
391 printk(KERN_WARNING
392 "node_to_cpumask(%d): node > nr_node_ids(%d)\n",
393 node, nr_node_ids);
394 dump_stack();
395 return cpu_mask_none;
396 }
397 return node_to_cpumask_map[node];
398}
399EXPORT_SYMBOL(node_to_cpumask);
400
401/*
402 * --------- end of debug versions of the numa functions ---------
403 */
404
405#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
406
407#endif /* X86_64_NUMA */
408
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index df0587f24c54..7cdcd16885ed 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -50,27 +50,23 @@
50# define FIX_EFLAGS __FIX_EFLAGS 50# define FIX_EFLAGS __FIX_EFLAGS
51#endif 51#endif
52 52
53#define COPY(x) { \ 53#define COPY(x) do { \
54 err |= __get_user(regs->x, &sc->x); \ 54 get_user_ex(regs->x, &sc->x); \
55} 55} while (0)
56 56
57#define COPY_SEG(seg) { \ 57#define GET_SEG(seg) ({ \
58 unsigned short tmp; \ 58 unsigned short tmp; \
59 err |= __get_user(tmp, &sc->seg); \ 59 get_user_ex(tmp, &sc->seg); \
60 regs->seg = tmp; \ 60 tmp; \
61} 61})
62 62
63#define COPY_SEG_CPL3(seg) { \ 63#define COPY_SEG(seg) do { \
64 unsigned short tmp; \ 64 regs->seg = GET_SEG(seg); \
65 err |= __get_user(tmp, &sc->seg); \ 65} while (0)
66 regs->seg = tmp | 3; \
67}
68 66
69#define GET_SEG(seg) { \ 67#define COPY_SEG_CPL3(seg) do { \
70 unsigned short tmp; \ 68 regs->seg = GET_SEG(seg) | 3; \
71 err |= __get_user(tmp, &sc->seg); \ 69} while (0)
72 loadsegment(seg, tmp); \
73}
74 70
75static int 71static int
76restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, 72restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
@@ -83,45 +79,49 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
83 /* Always make any pending restarted system calls return -EINTR */ 79 /* Always make any pending restarted system calls return -EINTR */
84 current_thread_info()->restart_block.fn = do_no_restart_syscall; 80 current_thread_info()->restart_block.fn = do_no_restart_syscall;
85 81
82 get_user_try {
83
86#ifdef CONFIG_X86_32 84#ifdef CONFIG_X86_32
87 GET_SEG(gs); 85 set_user_gs(regs, GET_SEG(gs));
88 COPY_SEG(fs); 86 COPY_SEG(fs);
89 COPY_SEG(es); 87 COPY_SEG(es);
90 COPY_SEG(ds); 88 COPY_SEG(ds);
91#endif /* CONFIG_X86_32 */ 89#endif /* CONFIG_X86_32 */
92 90
93 COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); 91 COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
94 COPY(dx); COPY(cx); COPY(ip); 92 COPY(dx); COPY(cx); COPY(ip);
95 93
96#ifdef CONFIG_X86_64 94#ifdef CONFIG_X86_64
97 COPY(r8); 95 COPY(r8);
98 COPY(r9); 96 COPY(r9);
99 COPY(r10); 97 COPY(r10);
100 COPY(r11); 98 COPY(r11);
101 COPY(r12); 99 COPY(r12);
102 COPY(r13); 100 COPY(r13);
103 COPY(r14); 101 COPY(r14);
104 COPY(r15); 102 COPY(r15);
105#endif /* CONFIG_X86_64 */ 103#endif /* CONFIG_X86_64 */
106 104
107#ifdef CONFIG_X86_32 105#ifdef CONFIG_X86_32
108 COPY_SEG_CPL3(cs); 106 COPY_SEG_CPL3(cs);
109 COPY_SEG_CPL3(ss); 107 COPY_SEG_CPL3(ss);
110#else /* !CONFIG_X86_32 */ 108#else /* !CONFIG_X86_32 */
111 /* Kernel saves and restores only the CS segment register on signals, 109 /* Kernel saves and restores only the CS segment register on signals,
112 * which is the bare minimum needed to allow mixed 32/64-bit code. 110 * which is the bare minimum needed to allow mixed 32/64-bit code.
113 * App's signal handler can save/restore other segments if needed. */ 111 * App's signal handler can save/restore other segments if needed. */
114 COPY_SEG_CPL3(cs); 112 COPY_SEG_CPL3(cs);
115#endif /* CONFIG_X86_32 */ 113#endif /* CONFIG_X86_32 */
116 114
117 err |= __get_user(tmpflags, &sc->flags); 115 get_user_ex(tmpflags, &sc->flags);
118 regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); 116 regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
119 regs->orig_ax = -1; /* disable syscall checks */ 117 regs->orig_ax = -1; /* disable syscall checks */
118
119 get_user_ex(buf, &sc->fpstate);
120 err |= restore_i387_xstate(buf);
120 121
121 err |= __get_user(buf, &sc->fpstate); 122 get_user_ex(*pax, &sc->ax);
122 err |= restore_i387_xstate(buf); 123 } get_user_catch(err);
123 124
124 err |= __get_user(*pax, &sc->ax);
125 return err; 125 return err;
126} 126}
127 127
@@ -131,57 +131,55 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
131{ 131{
132 int err = 0; 132 int err = 0;
133 133
134#ifdef CONFIG_X86_32 134 put_user_try {
135 {
136 unsigned int tmp;
137 135
138 savesegment(gs, tmp); 136#ifdef CONFIG_X86_32
139 err |= __put_user(tmp, (unsigned int __user *)&sc->gs); 137 put_user_ex(get_user_gs(regs), (unsigned int __user *)&sc->gs);
140 } 138 put_user_ex(regs->fs, (unsigned int __user *)&sc->fs);
141 err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs); 139 put_user_ex(regs->es, (unsigned int __user *)&sc->es);
142 err |= __put_user(regs->es, (unsigned int __user *)&sc->es); 140 put_user_ex(regs->ds, (unsigned int __user *)&sc->ds);
143 err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds);
144#endif /* CONFIG_X86_32 */ 141#endif /* CONFIG_X86_32 */
145 142
146 err |= __put_user(regs->di, &sc->di); 143 put_user_ex(regs->di, &sc->di);
147 err |= __put_user(regs->si, &sc->si); 144 put_user_ex(regs->si, &sc->si);
148 err |= __put_user(regs->bp, &sc->bp); 145 put_user_ex(regs->bp, &sc->bp);
149 err |= __put_user(regs->sp, &sc->sp); 146 put_user_ex(regs->sp, &sc->sp);
150 err |= __put_user(regs->bx, &sc->bx); 147 put_user_ex(regs->bx, &sc->bx);
151 err |= __put_user(regs->dx, &sc->dx); 148 put_user_ex(regs->dx, &sc->dx);
152 err |= __put_user(regs->cx, &sc->cx); 149 put_user_ex(regs->cx, &sc->cx);
153 err |= __put_user(regs->ax, &sc->ax); 150 put_user_ex(regs->ax, &sc->ax);
154#ifdef CONFIG_X86_64 151#ifdef CONFIG_X86_64
155 err |= __put_user(regs->r8, &sc->r8); 152 put_user_ex(regs->r8, &sc->r8);
156 err |= __put_user(regs->r9, &sc->r9); 153 put_user_ex(regs->r9, &sc->r9);
157 err |= __put_user(regs->r10, &sc->r10); 154 put_user_ex(regs->r10, &sc->r10);
158 err |= __put_user(regs->r11, &sc->r11); 155 put_user_ex(regs->r11, &sc->r11);
159 err |= __put_user(regs->r12, &sc->r12); 156 put_user_ex(regs->r12, &sc->r12);
160 err |= __put_user(regs->r13, &sc->r13); 157 put_user_ex(regs->r13, &sc->r13);
161 err |= __put_user(regs->r14, &sc->r14); 158 put_user_ex(regs->r14, &sc->r14);
162 err |= __put_user(regs->r15, &sc->r15); 159 put_user_ex(regs->r15, &sc->r15);
163#endif /* CONFIG_X86_64 */ 160#endif /* CONFIG_X86_64 */
164 161
165 err |= __put_user(current->thread.trap_no, &sc->trapno); 162 put_user_ex(current->thread.trap_no, &sc->trapno);
166 err |= __put_user(current->thread.error_code, &sc->err); 163 put_user_ex(current->thread.error_code, &sc->err);
167 err |= __put_user(regs->ip, &sc->ip); 164 put_user_ex(regs->ip, &sc->ip);
168#ifdef CONFIG_X86_32 165#ifdef CONFIG_X86_32
169 err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs); 166 put_user_ex(regs->cs, (unsigned int __user *)&sc->cs);
170 err |= __put_user(regs->flags, &sc->flags); 167 put_user_ex(regs->flags, &sc->flags);
171 err |= __put_user(regs->sp, &sc->sp_at_signal); 168 put_user_ex(regs->sp, &sc->sp_at_signal);
172 err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); 169 put_user_ex(regs->ss, (unsigned int __user *)&sc->ss);
173#else /* !CONFIG_X86_32 */ 170#else /* !CONFIG_X86_32 */
174 err |= __put_user(regs->flags, &sc->flags); 171 put_user_ex(regs->flags, &sc->flags);
175 err |= __put_user(regs->cs, &sc->cs); 172 put_user_ex(regs->cs, &sc->cs);
176 err |= __put_user(0, &sc->gs); 173 put_user_ex(0, &sc->gs);
177 err |= __put_user(0, &sc->fs); 174 put_user_ex(0, &sc->fs);
178#endif /* CONFIG_X86_32 */ 175#endif /* CONFIG_X86_32 */
179 176
180 err |= __put_user(fpstate, &sc->fpstate); 177 put_user_ex(fpstate, &sc->fpstate);
181 178
182 /* non-iBCS2 extensions.. */ 179 /* non-iBCS2 extensions.. */
183 err |= __put_user(mask, &sc->oldmask); 180 put_user_ex(mask, &sc->oldmask);
184 err |= __put_user(current->thread.cr2, &sc->cr2); 181 put_user_ex(current->thread.cr2, &sc->cr2);
182 } put_user_catch(err);
185 183
186 return err; 184 return err;
187} 185}
@@ -336,43 +334,41 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
336 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 334 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
337 return -EFAULT; 335 return -EFAULT;
338 336
339 err |= __put_user(sig, &frame->sig); 337 put_user_try {
340 err |= __put_user(&frame->info, &frame->pinfo); 338 put_user_ex(sig, &frame->sig);
341 err |= __put_user(&frame->uc, &frame->puc); 339 put_user_ex(&frame->info, &frame->pinfo);
342 err |= copy_siginfo_to_user(&frame->info, info); 340 put_user_ex(&frame->uc, &frame->puc);
343 if (err) 341 err |= copy_siginfo_to_user(&frame->info, info);
344 return -EFAULT;
345
346 /* Create the ucontext. */
347 if (cpu_has_xsave)
348 err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
349 else
350 err |= __put_user(0, &frame->uc.uc_flags);
351 err |= __put_user(0, &frame->uc.uc_link);
352 err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
353 err |= __put_user(sas_ss_flags(regs->sp),
354 &frame->uc.uc_stack.ss_flags);
355 err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
356 err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
357 regs, set->sig[0]);
358 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
359 if (err)
360 return -EFAULT;
361 342
362 /* Set up to return from userspace. */ 343 /* Create the ucontext. */
363 restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); 344 if (cpu_has_xsave)
364 if (ka->sa.sa_flags & SA_RESTORER) 345 put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
365 restorer = ka->sa.sa_restorer; 346 else
366 err |= __put_user(restorer, &frame->pretcode); 347 put_user_ex(0, &frame->uc.uc_flags);
348 put_user_ex(0, &frame->uc.uc_link);
349 put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
350 put_user_ex(sas_ss_flags(regs->sp),
351 &frame->uc.uc_stack.ss_flags);
352 put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
353 err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
354 regs, set->sig[0]);
355 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
356
357 /* Set up to return from userspace. */
358 restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
359 if (ka->sa.sa_flags & SA_RESTORER)
360 restorer = ka->sa.sa_restorer;
361 put_user_ex(restorer, &frame->pretcode);
367 362
368 /* 363 /*
369 * This is movl $__NR_rt_sigreturn, %ax ; int $0x80 364 * This is movl $__NR_rt_sigreturn, %ax ; int $0x80
370 * 365 *
371 * WE DO NOT USE IT ANY MORE! It's only left here for historical 366 * WE DO NOT USE IT ANY MORE! It's only left here for historical
372 * reasons and because gdb uses it as a signature to notice 367 * reasons and because gdb uses it as a signature to notice
373 * signal handler stack frames. 368 * signal handler stack frames.
374 */ 369 */
375 err |= __put_user(*((u64 *)&rt_retcode), (u64 *)frame->retcode); 370 put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
371 } put_user_catch(err);
376 372
377 if (err) 373 if (err)
378 return -EFAULT; 374 return -EFAULT;
@@ -436,28 +432,30 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
436 return -EFAULT; 432 return -EFAULT;
437 } 433 }
438 434
439 /* Create the ucontext. */ 435 put_user_try {
440 if (cpu_has_xsave) 436 /* Create the ucontext. */
441 err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); 437 if (cpu_has_xsave)
442 else 438 put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
443 err |= __put_user(0, &frame->uc.uc_flags); 439 else
444 err |= __put_user(0, &frame->uc.uc_link); 440 put_user_ex(0, &frame->uc.uc_flags);
445 err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); 441 put_user_ex(0, &frame->uc.uc_link);
446 err |= __put_user(sas_ss_flags(regs->sp), 442 put_user_ex(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
447 &frame->uc.uc_stack.ss_flags); 443 put_user_ex(sas_ss_flags(regs->sp),
448 err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); 444 &frame->uc.uc_stack.ss_flags);
449 err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); 445 put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
450 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); 446 err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
451 447 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
452 /* Set up to return from userspace. If provided, use a stub 448
453 already in userspace. */ 449 /* Set up to return from userspace. If provided, use a stub
454 /* x86-64 should always use SA_RESTORER. */ 450 already in userspace. */
455 if (ka->sa.sa_flags & SA_RESTORER) { 451 /* x86-64 should always use SA_RESTORER. */
456 err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); 452 if (ka->sa.sa_flags & SA_RESTORER) {
457 } else { 453 put_user_ex(ka->sa.sa_restorer, &frame->pretcode);
458 /* could use a vstub here */ 454 } else {
459 return -EFAULT; 455 /* could use a vstub here */
460 } 456 err |= -EFAULT;
457 }
458 } put_user_catch(err);
461 459
462 if (err) 460 if (err)
463 return -EFAULT; 461 return -EFAULT;
@@ -509,31 +507,41 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
509 struct old_sigaction __user *oact) 507 struct old_sigaction __user *oact)
510{ 508{
511 struct k_sigaction new_ka, old_ka; 509 struct k_sigaction new_ka, old_ka;
512 int ret; 510 int ret = 0;
513 511
514 if (act) { 512 if (act) {
515 old_sigset_t mask; 513 old_sigset_t mask;
516 514
517 if (!access_ok(VERIFY_READ, act, sizeof(*act)) || 515 if (!access_ok(VERIFY_READ, act, sizeof(*act)))
518 __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
519 __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
520 return -EFAULT; 516 return -EFAULT;
521 517
522 __get_user(new_ka.sa.sa_flags, &act->sa_flags); 518 get_user_try {
523 __get_user(mask, &act->sa_mask); 519 get_user_ex(new_ka.sa.sa_handler, &act->sa_handler);
520 get_user_ex(new_ka.sa.sa_flags, &act->sa_flags);
521 get_user_ex(mask, &act->sa_mask);
522 get_user_ex(new_ka.sa.sa_restorer, &act->sa_restorer);
523 } get_user_catch(ret);
524
525 if (ret)
526 return -EFAULT;
524 siginitset(&new_ka.sa.sa_mask, mask); 527 siginitset(&new_ka.sa.sa_mask, mask);
525 } 528 }
526 529
527 ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); 530 ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
528 531
529 if (!ret && oact) { 532 if (!ret && oact) {
530 if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || 533 if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)))
531 __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
532 __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
533 return -EFAULT; 534 return -EFAULT;
534 535
535 __put_user(old_ka.sa.sa_flags, &oact->sa_flags); 536 put_user_try {
536 __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); 537 put_user_ex(old_ka.sa.sa_handler, &oact->sa_handler);
538 put_user_ex(old_ka.sa.sa_flags, &oact->sa_flags);
539 put_user_ex(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
540 put_user_ex(old_ka.sa.sa_restorer, &oact->sa_restorer);
541 } put_user_catch(ret);
542
543 if (ret)
544 return -EFAULT;
537 } 545 }
538 546
539 return ret; 547 return ret;
@@ -541,14 +549,9 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
541#endif /* CONFIG_X86_32 */ 549#endif /* CONFIG_X86_32 */
542 550
543#ifdef CONFIG_X86_32 551#ifdef CONFIG_X86_32
544asmlinkage int sys_sigaltstack(unsigned long bx) 552int sys_sigaltstack(struct pt_regs *regs)
545{ 553{
546 /* 554 const stack_t __user *uss = (const stack_t __user *)regs->bx;
547 * This is needed to make gcc realize it doesn't own the
548 * "struct pt_regs"
549 */
550 struct pt_regs *regs = (struct pt_regs *)&bx;
551 const stack_t __user *uss = (const stack_t __user *)bx;
552 stack_t __user *uoss = (stack_t __user *)regs->cx; 555 stack_t __user *uoss = (stack_t __user *)regs->cx;
553 556
554 return do_sigaltstack(uss, uoss, regs->sp); 557 return do_sigaltstack(uss, uoss, regs->sp);
@@ -566,14 +569,12 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
566 * Do a signal return; undo the signal stack. 569 * Do a signal return; undo the signal stack.
567 */ 570 */
568#ifdef CONFIG_X86_32 571#ifdef CONFIG_X86_32
569asmlinkage unsigned long sys_sigreturn(unsigned long __unused) 572unsigned long sys_sigreturn(struct pt_regs *regs)
570{ 573{
571 struct sigframe __user *frame; 574 struct sigframe __user *frame;
572 struct pt_regs *regs;
573 unsigned long ax; 575 unsigned long ax;
574 sigset_t set; 576 sigset_t set;
575 577
576 regs = (struct pt_regs *) &__unused;
577 frame = (struct sigframe __user *)(regs->sp - 8); 578 frame = (struct sigframe __user *)(regs->sp - 8);
578 579
579 if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) 580 if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
@@ -600,7 +601,7 @@ badframe:
600} 601}
601#endif /* CONFIG_X86_32 */ 602#endif /* CONFIG_X86_32 */
602 603
603static long do_rt_sigreturn(struct pt_regs *regs) 604long sys_rt_sigreturn(struct pt_regs *regs)
604{ 605{
605 struct rt_sigframe __user *frame; 606 struct rt_sigframe __user *frame;
606 unsigned long ax; 607 unsigned long ax;
@@ -631,25 +632,6 @@ badframe:
631 return 0; 632 return 0;
632} 633}
633 634
634#ifdef CONFIG_X86_32
635/*
636 * Note: do not pass in pt_regs directly as with tail-call optimization
637 * GCC will incorrectly stomp on the caller's frame and corrupt user-space
638 * register state:
639 */
640asmlinkage int sys_rt_sigreturn(unsigned long __unused)
641{
642 struct pt_regs *regs = (struct pt_regs *)&__unused;
643
644 return do_rt_sigreturn(regs);
645}
646#else /* !CONFIG_X86_32 */
647asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
648{
649 return do_rt_sigreturn(regs);
650}
651#endif /* CONFIG_X86_32 */
652
653/* 635/*
654 * OK, we're invoking a handler: 636 * OK, we're invoking a handler:
655 */ 637 */
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index e6faa3316bd2..13f33ea8ccaa 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -2,7 +2,7 @@
2 * Intel SMP support routines. 2 * Intel SMP support routines.
3 * 3 *
4 * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> 4 * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
5 * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com> 5 * (c) 1998-99, 2000, 2009 Ingo Molnar <mingo@redhat.com>
6 * (c) 2002,2003 Andi Kleen, SuSE Labs. 6 * (c) 2002,2003 Andi Kleen, SuSE Labs.
7 * 7 *
8 * i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com> 8 * i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com>
@@ -26,8 +26,7 @@
26#include <asm/tlbflush.h> 26#include <asm/tlbflush.h>
27#include <asm/mmu_context.h> 27#include <asm/mmu_context.h>
28#include <asm/proto.h> 28#include <asm/proto.h>
29#include <mach_ipi.h> 29#include <asm/apic.h>
30#include <mach_apic.h>
31/* 30/*
32 * Some notes on x86 processor bugs affecting SMP operation: 31 * Some notes on x86 processor bugs affecting SMP operation:
33 * 32 *
@@ -118,12 +117,12 @@ static void native_smp_send_reschedule(int cpu)
118 WARN_ON(1); 117 WARN_ON(1);
119 return; 118 return;
120 } 119 }
121 send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); 120 apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
122} 121}
123 122
124void native_send_call_func_single_ipi(int cpu) 123void native_send_call_func_single_ipi(int cpu)
125{ 124{
126 send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR); 125 apic->send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
127} 126}
128 127
129void native_send_call_func_ipi(const struct cpumask *mask) 128void native_send_call_func_ipi(const struct cpumask *mask)
@@ -131,7 +130,7 @@ void native_send_call_func_ipi(const struct cpumask *mask)
131 cpumask_var_t allbutself; 130 cpumask_var_t allbutself;
132 131
133 if (!alloc_cpumask_var(&allbutself, GFP_ATOMIC)) { 132 if (!alloc_cpumask_var(&allbutself, GFP_ATOMIC)) {
134 send_IPI_mask(mask, CALL_FUNCTION_VECTOR); 133 apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
135 return; 134 return;
136 } 135 }
137 136
@@ -140,9 +139,9 @@ void native_send_call_func_ipi(const struct cpumask *mask)
140 139
141 if (cpumask_equal(mask, allbutself) && 140 if (cpumask_equal(mask, allbutself) &&
142 cpumask_equal(cpu_online_mask, cpu_callout_mask)) 141 cpumask_equal(cpu_online_mask, cpu_callout_mask))
143 send_IPI_allbutself(CALL_FUNCTION_VECTOR); 142 apic->send_IPI_allbutself(CALL_FUNCTION_VECTOR);
144 else 143 else
145 send_IPI_mask(mask, CALL_FUNCTION_VECTOR); 144 apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
146 145
147 free_cpumask_var(allbutself); 146 free_cpumask_var(allbutself);
148} 147}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index bb1a3b1fc87f..9ce666387f37 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -2,7 +2,7 @@
2 * x86 SMP booting functions 2 * x86 SMP booting functions
3 * 3 *
4 * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> 4 * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
5 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com> 5 * (c) 1998, 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
6 * Copyright 2001 Andi Kleen, SuSE Labs. 6 * Copyright 2001 Andi Kleen, SuSE Labs.
7 * 7 *
8 * Much of the core SMP work is based on previous work by Thomas Radke, to 8 * Much of the core SMP work is based on previous work by Thomas Radke, to
@@ -53,7 +53,6 @@
53#include <asm/nmi.h> 53#include <asm/nmi.h>
54#include <asm/irq.h> 54#include <asm/irq.h>
55#include <asm/idle.h> 55#include <asm/idle.h>
56#include <asm/smp.h>
57#include <asm/trampoline.h> 56#include <asm/trampoline.h>
58#include <asm/cpu.h> 57#include <asm/cpu.h>
59#include <asm/numa.h> 58#include <asm/numa.h>
@@ -61,13 +60,12 @@
61#include <asm/tlbflush.h> 60#include <asm/tlbflush.h>
62#include <asm/mtrr.h> 61#include <asm/mtrr.h>
63#include <asm/vmi.h> 62#include <asm/vmi.h>
64#include <asm/genapic.h> 63#include <asm/apic.h>
65#include <asm/setup.h> 64#include <asm/setup.h>
65#include <asm/uv/uv.h>
66#include <linux/mc146818rtc.h> 66#include <linux/mc146818rtc.h>
67 67
68#include <mach_apic.h> 68#include <asm/smpboot_hooks.h>
69#include <mach_wakecpu.h>
70#include <smpboot_hooks.h>
71 69
72#ifdef CONFIG_X86_32 70#ifdef CONFIG_X86_32
73u8 apicid_2_node[MAX_APICID]; 71u8 apicid_2_node[MAX_APICID];
@@ -163,7 +161,7 @@ static void map_cpu_to_logical_apicid(void)
163{ 161{
164 int cpu = smp_processor_id(); 162 int cpu = smp_processor_id();
165 int apicid = logical_smp_processor_id(); 163 int apicid = logical_smp_processor_id();
166 int node = apicid_to_node(apicid); 164 int node = apic->apicid_to_node(apicid);
167 165
168 if (!node_online(node)) 166 if (!node_online(node))
169 node = first_online_node; 167 node = first_online_node;
@@ -196,7 +194,8 @@ static void __cpuinit smp_callin(void)
196 * our local APIC. We have to wait for the IPI or we'll 194 * our local APIC. We have to wait for the IPI or we'll
197 * lock up on an APIC access. 195 * lock up on an APIC access.
198 */ 196 */
199 wait_for_init_deassert(&init_deasserted); 197 if (apic->wait_for_init_deassert)
198 apic->wait_for_init_deassert(&init_deasserted);
200 199
201 /* 200 /*
202 * (This works even if the APIC is not enabled.) 201 * (This works even if the APIC is not enabled.)
@@ -243,7 +242,8 @@ static void __cpuinit smp_callin(void)
243 */ 242 */
244 243
245 pr_debug("CALLIN, before setup_local_APIC().\n"); 244 pr_debug("CALLIN, before setup_local_APIC().\n");
246 smp_callin_clear_local_apic(); 245 if (apic->smp_callin_clear_local_apic)
246 apic->smp_callin_clear_local_apic();
247 setup_local_APIC(); 247 setup_local_APIC();
248 end_local_APIC_setup(); 248 end_local_APIC_setup();
249 map_cpu_to_logical_apicid(); 249 map_cpu_to_logical_apicid();
@@ -583,7 +583,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
583 /* Target chip */ 583 /* Target chip */
584 /* Boot on the stack */ 584 /* Boot on the stack */
585 /* Kick the second */ 585 /* Kick the second */
586 apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid); 586 apic_icr_write(APIC_DM_NMI | apic->dest_logical, logical_apicid);
587 587
588 pr_debug("Waiting for send to finish...\n"); 588 pr_debug("Waiting for send to finish...\n");
589 send_status = safe_apic_wait_icr_idle(); 589 send_status = safe_apic_wait_icr_idle();
@@ -745,78 +745,22 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
745 complete(&c_idle->done); 745 complete(&c_idle->done);
746} 746}
747 747
748#ifdef CONFIG_X86_64
749
750/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */
751static void __ref free_bootmem_pda(struct x8664_pda *oldpda)
752{
753 if (!after_bootmem)
754 free_bootmem((unsigned long)oldpda, sizeof(*oldpda));
755}
756
757/*
758 * Allocate node local memory for the AP pda.
759 *
760 * Must be called after the _cpu_pda pointer table is initialized.
761 */
762int __cpuinit get_local_pda(int cpu)
763{
764 struct x8664_pda *oldpda, *newpda;
765 unsigned long size = sizeof(struct x8664_pda);
766 int node = cpu_to_node(cpu);
767
768 if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem)
769 return 0;
770
771 oldpda = cpu_pda(cpu);
772 newpda = kmalloc_node(size, GFP_ATOMIC, node);
773 if (!newpda) {
774 printk(KERN_ERR "Could not allocate node local PDA "
775 "for CPU %d on node %d\n", cpu, node);
776
777 if (oldpda)
778 return 0; /* have a usable pda */
779 else
780 return -1;
781 }
782
783 if (oldpda) {
784 memcpy(newpda, oldpda, size);
785 free_bootmem_pda(oldpda);
786 }
787
788 newpda->in_bootmem = 0;
789 cpu_pda(cpu) = newpda;
790 return 0;
791}
792#endif /* CONFIG_X86_64 */
793
794static int __cpuinit do_boot_cpu(int apicid, int cpu)
795/* 748/*
796 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad 749 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
797 * (ie clustered apic addressing mode), this is a LOGICAL apic ID. 750 * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
798 * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu. 751 * Returns zero if CPU booted OK, else error code from ->wakeup_cpu.
799 */ 752 */
753static int __cpuinit do_boot_cpu(int apicid, int cpu)
800{ 754{
801 unsigned long boot_error = 0; 755 unsigned long boot_error = 0;
802 int timeout;
803 unsigned long start_ip; 756 unsigned long start_ip;
804 unsigned short nmi_high = 0, nmi_low = 0; 757 int timeout;
805 struct create_idle c_idle = { 758 struct create_idle c_idle = {
806 .cpu = cpu, 759 .cpu = cpu,
807 .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done), 760 .done = COMPLETION_INITIALIZER_ONSTACK(c_idle.done),
808 }; 761 };
809 INIT_WORK(&c_idle.work, do_fork_idle);
810 762
811#ifdef CONFIG_X86_64 763 INIT_WORK(&c_idle.work, do_fork_idle);
812 /* Allocate node local memory for AP pdas */
813 if (cpu > 0) {
814 boot_error = get_local_pda(cpu);
815 if (boot_error)
816 goto restore_state;
817 /* if can't get pda memory, can't start cpu */
818 }
819#endif
820 764
821 alternatives_smp_switch(1); 765 alternatives_smp_switch(1);
822 766
@@ -847,14 +791,16 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
847 791
848 set_idle_for_cpu(cpu, c_idle.idle); 792 set_idle_for_cpu(cpu, c_idle.idle);
849do_rest: 793do_rest:
850#ifdef CONFIG_X86_32
851 per_cpu(current_task, cpu) = c_idle.idle; 794 per_cpu(current_task, cpu) = c_idle.idle;
852 init_gdt(cpu); 795#ifdef CONFIG_X86_32
853 /* Stack for startup_32 can be just as for start_secondary onwards */ 796 /* Stack for startup_32 can be just as for start_secondary onwards */
854 irq_ctx_init(cpu); 797 irq_ctx_init(cpu);
855#else 798#else
856 cpu_pda(cpu)->pcurrent = c_idle.idle;
857 clear_tsk_thread_flag(c_idle.idle, TIF_FORK); 799 clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
800 initial_gs = per_cpu_offset(cpu);
801 per_cpu(kernel_stack, cpu) =
802 (unsigned long)task_stack_page(c_idle.idle) -
803 KERNEL_STACK_OFFSET + THREAD_SIZE;
858#endif 804#endif
859 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); 805 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
860 initial_code = (unsigned long)start_secondary; 806 initial_code = (unsigned long)start_secondary;
@@ -878,8 +824,6 @@ do_rest:
878 824
879 pr_debug("Setting warm reset code and vector.\n"); 825 pr_debug("Setting warm reset code and vector.\n");
880 826
881 store_NMI_vector(&nmi_high, &nmi_low);
882
883 smpboot_setup_warm_reset_vector(start_ip); 827 smpboot_setup_warm_reset_vector(start_ip);
884 /* 828 /*
885 * Be paranoid about clearing APIC errors. 829 * Be paranoid about clearing APIC errors.
@@ -893,7 +837,7 @@ do_rest:
893 /* 837 /*
894 * Starting actual IPI sequence... 838 * Starting actual IPI sequence...
895 */ 839 */
896 boot_error = wakeup_secondary_cpu(apicid, start_ip); 840 boot_error = apic->wakeup_cpu(apicid, start_ip);
897 841
898 if (!boot_error) { 842 if (!boot_error) {
899 /* 843 /*
@@ -927,13 +871,11 @@ do_rest:
927 else 871 else
928 /* trampoline code not run */ 872 /* trampoline code not run */
929 printk(KERN_ERR "Not responding.\n"); 873 printk(KERN_ERR "Not responding.\n");
930 if (get_uv_system_type() != UV_NON_UNIQUE_APIC) 874 if (apic->inquire_remote_apic)
931 inquire_remote_apic(apicid); 875 apic->inquire_remote_apic(apicid);
932 } 876 }
933 } 877 }
934#ifdef CONFIG_X86_64 878
935restore_state:
936#endif
937 if (boot_error) { 879 if (boot_error) {
938 /* Try to put things back the way they were before ... */ 880 /* Try to put things back the way they were before ... */
939 numa_remove_cpu(cpu); /* was set by numa_add_cpu */ 881 numa_remove_cpu(cpu); /* was set by numa_add_cpu */
@@ -961,7 +903,7 @@ restore_state:
961 903
962int __cpuinit native_cpu_up(unsigned int cpu) 904int __cpuinit native_cpu_up(unsigned int cpu)
963{ 905{
964 int apicid = cpu_present_to_apicid(cpu); 906 int apicid = apic->cpu_present_to_apicid(cpu);
965 unsigned long flags; 907 unsigned long flags;
966 int err; 908 int err;
967 909
@@ -1054,14 +996,14 @@ static int __init smp_sanity_check(unsigned max_cpus)
1054{ 996{
1055 preempt_disable(); 997 preempt_disable();
1056 998
1057#if defined(CONFIG_X86_PC) && defined(CONFIG_X86_32) 999#if !defined(CONFIG_X86_BIGSMP) && defined(CONFIG_X86_32)
1058 if (def_to_bigsmp && nr_cpu_ids > 8) { 1000 if (def_to_bigsmp && nr_cpu_ids > 8) {
1059 unsigned int cpu; 1001 unsigned int cpu;
1060 unsigned nr; 1002 unsigned nr;
1061 1003
1062 printk(KERN_WARNING 1004 printk(KERN_WARNING
1063 "More than 8 CPUs detected - skipping them.\n" 1005 "More than 8 CPUs detected - skipping them.\n"
1064 "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n"); 1006 "Use CONFIG_X86_BIGSMP.\n");
1065 1007
1066 nr = 0; 1008 nr = 0;
1067 for_each_present_cpu(cpu) { 1009 for_each_present_cpu(cpu) {
@@ -1107,7 +1049,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
1107 * Should not be necessary because the MP table should list the boot 1049 * Should not be necessary because the MP table should list the boot
1108 * CPU too, but we do it for the sake of robustness anyway. 1050 * CPU too, but we do it for the sake of robustness anyway.
1109 */ 1051 */
1110 if (!check_phys_apicid_present(boot_cpu_physical_apicid)) { 1052 if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) {
1111 printk(KERN_NOTICE 1053 printk(KERN_NOTICE
1112 "weird, boot CPU (#%d) not listed by the BIOS.\n", 1054 "weird, boot CPU (#%d) not listed by the BIOS.\n",
1113 boot_cpu_physical_apicid); 1055 boot_cpu_physical_apicid);
@@ -1125,6 +1067,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
1125 printk(KERN_ERR "... forcing use of dummy APIC emulation." 1067 printk(KERN_ERR "... forcing use of dummy APIC emulation."
1126 "(tell your hw vendor)\n"); 1068 "(tell your hw vendor)\n");
1127 smpboot_clear_io_apic(); 1069 smpboot_clear_io_apic();
1070 arch_disable_smp_support();
1128 return -1; 1071 return -1;
1129 } 1072 }
1130 1073
@@ -1181,9 +1124,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1181 current_thread_info()->cpu = 0; /* needed? */ 1124 current_thread_info()->cpu = 0; /* needed? */
1182 set_cpu_sibling_map(0); 1125 set_cpu_sibling_map(0);
1183 1126
1184#ifdef CONFIG_X86_64
1185 enable_IR_x2apic(); 1127 enable_IR_x2apic();
1186 setup_apic_routing(); 1128#ifdef CONFIG_X86_64
1129 default_setup_apic_routing();
1187#endif 1130#endif
1188 1131
1189 if (smp_sanity_check(max_cpus) < 0) { 1132 if (smp_sanity_check(max_cpus) < 0) {
@@ -1207,18 +1150,18 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1207 */ 1150 */
1208 setup_local_APIC(); 1151 setup_local_APIC();
1209 1152
1210#ifdef CONFIG_X86_64
1211 /* 1153 /*
1212 * Enable IO APIC before setting up error vector 1154 * Enable IO APIC before setting up error vector
1213 */ 1155 */
1214 if (!skip_ioapic_setup && nr_ioapics) 1156 if (!skip_ioapic_setup && nr_ioapics)
1215 enable_IO_APIC(); 1157 enable_IO_APIC();
1216#endif 1158
1217 end_local_APIC_setup(); 1159 end_local_APIC_setup();
1218 1160
1219 map_cpu_to_logical_apicid(); 1161 map_cpu_to_logical_apicid();
1220 1162
1221 setup_portio_remap(); 1163 if (apic->setup_portio_remap)
1164 apic->setup_portio_remap();
1222 1165
1223 smpboot_setup_io_apic(); 1166 smpboot_setup_io_apic();
1224 /* 1167 /*
@@ -1240,10 +1183,7 @@ out:
1240void __init native_smp_prepare_boot_cpu(void) 1183void __init native_smp_prepare_boot_cpu(void)
1241{ 1184{
1242 int me = smp_processor_id(); 1185 int me = smp_processor_id();
1243#ifdef CONFIG_X86_32 1186 switch_to_new_gdt(me);
1244 init_gdt(me);
1245#endif
1246 switch_to_new_gdt();
1247 /* already set me in cpu_online_mask in boot_cpu_init() */ 1187 /* already set me in cpu_online_mask in boot_cpu_init() */
1248 cpumask_set_cpu(me, cpu_callout_mask); 1188 cpumask_set_cpu(me, cpu_callout_mask);
1249 per_cpu(cpu_state, me) = CPU_ONLINE; 1189 per_cpu(cpu_state, me) = CPU_ONLINE;
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
deleted file mode 100644
index 397e309839dd..000000000000
--- a/arch/x86/kernel/smpcommon.c
+++ /dev/null
@@ -1,30 +0,0 @@
1/*
2 * SMP stuff which is common to all sub-architectures.
3 */
4#include <linux/module.h>
5#include <asm/smp.h>
6
7#ifdef CONFIG_X86_32
8DEFINE_PER_CPU(unsigned long, this_cpu_off);
9EXPORT_PER_CPU_SYMBOL(this_cpu_off);
10
11/*
12 * Initialize the CPU's GDT. This is either the boot CPU doing itself
13 * (still using the master per-cpu area), or a CPU doing it for a
14 * secondary which will soon come up.
15 */
16__cpuinit void init_gdt(int cpu)
17{
18 struct desc_struct gdt;
19
20 pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF,
21 0x2 | DESCTYPE_S, 0x8);
22 gdt.s = 1;
23
24 write_gdt_entry(get_cpu_gdt_table(cpu),
25 GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
26
27 per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
28 per_cpu(cpu_number, cpu) = cpu;
29}
30#endif
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 10786af95545..f7bddc2e37d1 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Stack trace management functions 2 * Stack trace management functions
3 * 3 *
4 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 4 * Copyright (C) 2006-2009 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
5 */ 5 */
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/stacktrace.h> 7#include <linux/stacktrace.h>
diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c
deleted file mode 100644
index 7b987852e876..000000000000
--- a/arch/x86/kernel/summit_32.c
+++ /dev/null
@@ -1,188 +0,0 @@
1/*
2 * IBM Summit-Specific Code
3 *
4 * Written By: Matthew Dobson, IBM Corporation
5 *
6 * Copyright (c) 2003 IBM Corp.
7 *
8 * All rights reserved.
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License as published by
12 * the Free Software Foundation; either version 2 of the License, or (at
13 * your option) any later version.
14 *
15 * This program is distributed in the hope that it will be useful, but
16 * WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
18 * NON INFRINGEMENT. See the GNU General Public License for more
19 * details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 * Send feedback to <colpatch@us.ibm.com>
26 *
27 */
28
29#include <linux/mm.h>
30#include <linux/init.h>
31#include <asm/io.h>
32#include <asm/bios_ebda.h>
33#include <asm/summit/mpparse.h>
34
35static struct rio_table_hdr *rio_table_hdr __initdata;
36static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata;
37static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata;
38
39#ifndef CONFIG_X86_NUMAQ
40static int mp_bus_id_to_node[MAX_MP_BUSSES] __initdata;
41#endif
42
43static int __init setup_pci_node_map_for_wpeg(int wpeg_num, int last_bus)
44{
45 int twister = 0, node = 0;
46 int i, bus, num_buses;
47
48 for (i = 0; i < rio_table_hdr->num_rio_dev; i++) {
49 if (rio_devs[i]->node_id == rio_devs[wpeg_num]->owner_id) {
50 twister = rio_devs[i]->owner_id;
51 break;
52 }
53 }
54 if (i == rio_table_hdr->num_rio_dev) {
55 printk(KERN_ERR "%s: Couldn't find owner Cyclone for Winnipeg!\n", __func__);
56 return last_bus;
57 }
58
59 for (i = 0; i < rio_table_hdr->num_scal_dev; i++) {
60 if (scal_devs[i]->node_id == twister) {
61 node = scal_devs[i]->node_id;
62 break;
63 }
64 }
65 if (i == rio_table_hdr->num_scal_dev) {
66 printk(KERN_ERR "%s: Couldn't find owner Twister for Cyclone!\n", __func__);
67 return last_bus;
68 }
69
70 switch (rio_devs[wpeg_num]->type) {
71 case CompatWPEG:
72 /*
73 * The Compatibility Winnipeg controls the 2 legacy buses,
74 * the 66MHz PCI bus [2 slots] and the 2 "extra" buses in case
75 * a PCI-PCI bridge card is used in either slot: total 5 buses.
76 */
77 num_buses = 5;
78 break;
79 case AltWPEG:
80 /*
81 * The Alternate Winnipeg controls the 2 133MHz buses [1 slot
82 * each], their 2 "extra" buses, the 100MHz bus [2 slots] and
83 * the "extra" buses for each of those slots: total 7 buses.
84 */
85 num_buses = 7;
86 break;
87 case LookOutAWPEG:
88 case LookOutBWPEG:
89 /*
90 * A Lookout Winnipeg controls 3 100MHz buses [2 slots each]
91 * & the "extra" buses for each of those slots: total 9 buses.
92 */
93 num_buses = 9;
94 break;
95 default:
96 printk(KERN_INFO "%s: Unsupported Winnipeg type!\n", __func__);
97 return last_bus;
98 }
99
100 for (bus = last_bus; bus < last_bus + num_buses; bus++)
101 mp_bus_id_to_node[bus] = node;
102 return bus;
103}
104
105static int __init build_detail_arrays(void)
106{
107 unsigned long ptr;
108 int i, scal_detail_size, rio_detail_size;
109
110 if (rio_table_hdr->num_scal_dev > MAX_NUMNODES) {
111 printk(KERN_WARNING "%s: MAX_NUMNODES too low! Defined as %d, but system has %d nodes.\n", __func__, MAX_NUMNODES, rio_table_hdr->num_scal_dev);
112 return 0;
113 }
114
115 switch (rio_table_hdr->version) {
116 default:
117 printk(KERN_WARNING "%s: Invalid Rio Grande Table Version: %d\n", __func__, rio_table_hdr->version);
118 return 0;
119 case 2:
120 scal_detail_size = 11;
121 rio_detail_size = 13;
122 break;
123 case 3:
124 scal_detail_size = 12;
125 rio_detail_size = 15;
126 break;
127 }
128
129 ptr = (unsigned long)rio_table_hdr + 3;
130 for (i = 0; i < rio_table_hdr->num_scal_dev; i++, ptr += scal_detail_size)
131 scal_devs[i] = (struct scal_detail *)ptr;
132
133 for (i = 0; i < rio_table_hdr->num_rio_dev; i++, ptr += rio_detail_size)
134 rio_devs[i] = (struct rio_detail *)ptr;
135
136 return 1;
137}
138
139void __init setup_summit(void)
140{
141 unsigned long ptr;
142 unsigned short offset;
143 int i, next_wpeg, next_bus = 0;
144
145 /* The pointer to the EBDA is stored in the word @ phys 0x40E(40:0E) */
146 ptr = get_bios_ebda();
147 ptr = (unsigned long)phys_to_virt(ptr);
148
149 rio_table_hdr = NULL;
150 offset = 0x180;
151 while (offset) {
152 /* The block id is stored in the 2nd word */
153 if (*((unsigned short *)(ptr + offset + 2)) == 0x4752) {
154 /* set the pointer past the offset & block id */
155 rio_table_hdr = (struct rio_table_hdr *)(ptr + offset + 4);
156 break;
157 }
158 /* The next offset is stored in the 1st word. 0 means no more */
159 offset = *((unsigned short *)(ptr + offset));
160 }
161 if (!rio_table_hdr) {
162 printk(KERN_ERR "%s: Unable to locate Rio Grande Table in EBDA - bailing!\n", __func__);
163 return;
164 }
165
166 if (!build_detail_arrays())
167 return;
168
169 /* The first Winnipeg we're looking for has an index of 0 */
170 next_wpeg = 0;
171 do {
172 for (i = 0; i < rio_table_hdr->num_rio_dev; i++) {
173 if (is_WPEG(rio_devs[i]) && rio_devs[i]->WP_index == next_wpeg) {
174 /* It's the Winnipeg we're looking for! */
175 next_bus = setup_pci_node_map_for_wpeg(i, next_bus);
176 next_wpeg++;
177 break;
178 }
179 }
180 /*
181 * If we go through all Rio devices and don't find one with
182 * the next index, it means we've found all the Winnipegs,
183 * and thus all the PCI buses.
184 */
185 if (i == rio_table_hdr->num_rio_dev)
186 next_wpeg = 0;
187 } while (next_wpeg != 0);
188}
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index e2e86a08f31d..3bdb64829b82 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -1,7 +1,7 @@
1ENTRY(sys_call_table) 1ENTRY(sys_call_table)
2 .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ 2 .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */
3 .long sys_exit 3 .long sys_exit
4 .long sys_fork 4 .long ptregs_fork
5 .long sys_read 5 .long sys_read
6 .long sys_write 6 .long sys_write
7 .long sys_open /* 5 */ 7 .long sys_open /* 5 */
@@ -10,7 +10,7 @@ ENTRY(sys_call_table)
10 .long sys_creat 10 .long sys_creat
11 .long sys_link 11 .long sys_link
12 .long sys_unlink /* 10 */ 12 .long sys_unlink /* 10 */
13 .long sys_execve 13 .long ptregs_execve
14 .long sys_chdir 14 .long sys_chdir
15 .long sys_time 15 .long sys_time
16 .long sys_mknod 16 .long sys_mknod
@@ -109,17 +109,17 @@ ENTRY(sys_call_table)
109 .long sys_newlstat 109 .long sys_newlstat
110 .long sys_newfstat 110 .long sys_newfstat
111 .long sys_uname 111 .long sys_uname
112 .long sys_iopl /* 110 */ 112 .long ptregs_iopl /* 110 */
113 .long sys_vhangup 113 .long sys_vhangup
114 .long sys_ni_syscall /* old "idle" system call */ 114 .long sys_ni_syscall /* old "idle" system call */
115 .long sys_vm86old 115 .long ptregs_vm86old
116 .long sys_wait4 116 .long sys_wait4
117 .long sys_swapoff /* 115 */ 117 .long sys_swapoff /* 115 */
118 .long sys_sysinfo 118 .long sys_sysinfo
119 .long sys_ipc 119 .long sys_ipc
120 .long sys_fsync 120 .long sys_fsync
121 .long sys_sigreturn 121 .long ptregs_sigreturn
122 .long sys_clone /* 120 */ 122 .long ptregs_clone /* 120 */
123 .long sys_setdomainname 123 .long sys_setdomainname
124 .long sys_newuname 124 .long sys_newuname
125 .long sys_modify_ldt 125 .long sys_modify_ldt
@@ -165,14 +165,14 @@ ENTRY(sys_call_table)
165 .long sys_mremap 165 .long sys_mremap
166 .long sys_setresuid16 166 .long sys_setresuid16
167 .long sys_getresuid16 /* 165 */ 167 .long sys_getresuid16 /* 165 */
168 .long sys_vm86 168 .long ptregs_vm86
169 .long sys_ni_syscall /* Old sys_query_module */ 169 .long sys_ni_syscall /* Old sys_query_module */
170 .long sys_poll 170 .long sys_poll
171 .long sys_nfsservctl 171 .long sys_nfsservctl
172 .long sys_setresgid16 /* 170 */ 172 .long sys_setresgid16 /* 170 */
173 .long sys_getresgid16 173 .long sys_getresgid16
174 .long sys_prctl 174 .long sys_prctl
175 .long sys_rt_sigreturn 175 .long ptregs_rt_sigreturn
176 .long sys_rt_sigaction 176 .long sys_rt_sigaction
177 .long sys_rt_sigprocmask /* 175 */ 177 .long sys_rt_sigprocmask /* 175 */
178 .long sys_rt_sigpending 178 .long sys_rt_sigpending
@@ -185,11 +185,11 @@ ENTRY(sys_call_table)
185 .long sys_getcwd 185 .long sys_getcwd
186 .long sys_capget 186 .long sys_capget
187 .long sys_capset /* 185 */ 187 .long sys_capset /* 185 */
188 .long sys_sigaltstack 188 .long ptregs_sigaltstack
189 .long sys_sendfile 189 .long sys_sendfile
190 .long sys_ni_syscall /* reserved for streams1 */ 190 .long sys_ni_syscall /* reserved for streams1 */
191 .long sys_ni_syscall /* reserved for streams2 */ 191 .long sys_ni_syscall /* reserved for streams2 */
192 .long sys_vfork /* 190 */ 192 .long ptregs_vfork /* 190 */
193 .long sys_getrlimit 193 .long sys_getrlimit
194 .long sys_mmap2 194 .long sys_mmap2
195 .long sys_truncate64 195 .long sys_truncate64
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index 3985cac0ed47..5c5d87f0b2e1 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -33,12 +33,12 @@
33#include <linux/time.h> 33#include <linux/time.h>
34#include <linux/mca.h> 34#include <linux/mca.h>
35 35
36#include <asm/arch_hooks.h> 36#include <asm/setup.h>
37#include <asm/hpet.h> 37#include <asm/hpet.h>
38#include <asm/time.h> 38#include <asm/time.h>
39#include <asm/timer.h> 39#include <asm/timer.h>
40 40
41#include "do_timer.h" 41#include <asm/do_timer.h>
42 42
43int timer_ack; 43int timer_ack;
44 44
@@ -118,7 +118,7 @@ void __init hpet_time_init(void)
118{ 118{
119 if (!hpet_enable()) 119 if (!hpet_enable())
120 setup_pit_timer(); 120 setup_pit_timer();
121 time_init_hook(); 121 x86_quirk_time_init();
122} 122}
123 123
124/* 124/*
@@ -131,7 +131,7 @@ void __init hpet_time_init(void)
131 */ 131 */
132void __init time_init(void) 132void __init time_init(void)
133{ 133{
134 pre_time_init_hook(); 134 x86_quirk_pre_time_init();
135 tsc_init(); 135 tsc_init();
136 late_time_init = choose_time_init(); 136 late_time_init = choose_time_init();
137} 137}
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
deleted file mode 100644
index ce5054642247..000000000000
--- a/arch/x86/kernel/tlb_32.c
+++ /dev/null
@@ -1,256 +0,0 @@
1#include <linux/spinlock.h>
2#include <linux/cpu.h>
3#include <linux/interrupt.h>
4
5#include <asm/tlbflush.h>
6
7DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate)
8 ____cacheline_aligned = { &init_mm, 0, };
9
10/* must come after the send_IPI functions above for inlining */
11#include <mach_ipi.h>
12
13/*
14 * Smarter SMP flushing macros.
15 * c/o Linus Torvalds.
16 *
17 * These mean you can really definitely utterly forget about
18 * writing to user space from interrupts. (Its not allowed anyway).
19 *
20 * Optimizations Manfred Spraul <manfred@colorfullife.com>
21 */
22
23static cpumask_t flush_cpumask;
24static struct mm_struct *flush_mm;
25static unsigned long flush_va;
26static DEFINE_SPINLOCK(tlbstate_lock);
27
28/*
29 * We cannot call mmdrop() because we are in interrupt context,
30 * instead update mm->cpu_vm_mask.
31 *
32 * We need to reload %cr3 since the page tables may be going
33 * away from under us..
34 */
35void leave_mm(int cpu)
36{
37 BUG_ON(x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK);
38 cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask);
39 load_cr3(swapper_pg_dir);
40}
41EXPORT_SYMBOL_GPL(leave_mm);
42
43/*
44 *
45 * The flush IPI assumes that a thread switch happens in this order:
46 * [cpu0: the cpu that switches]
47 * 1) switch_mm() either 1a) or 1b)
48 * 1a) thread switch to a different mm
49 * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
50 * Stop ipi delivery for the old mm. This is not synchronized with
51 * the other cpus, but smp_invalidate_interrupt ignore flush ipis
52 * for the wrong mm, and in the worst case we perform a superfluous
53 * tlb flush.
54 * 1a2) set cpu_tlbstate to TLBSTATE_OK
55 * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
56 * was in lazy tlb mode.
57 * 1a3) update cpu_tlbstate[].active_mm
58 * Now cpu0 accepts tlb flushes for the new mm.
59 * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
60 * Now the other cpus will send tlb flush ipis.
61 * 1a4) change cr3.
62 * 1b) thread switch without mm change
63 * cpu_tlbstate[].active_mm is correct, cpu0 already handles
64 * flush ipis.
65 * 1b1) set cpu_tlbstate to TLBSTATE_OK
66 * 1b2) test_and_set the cpu bit in cpu_vm_mask.
67 * Atomically set the bit [other cpus will start sending flush ipis],
68 * and test the bit.
69 * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
70 * 2) switch %%esp, ie current
71 *
72 * The interrupt must handle 2 special cases:
73 * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
74 * - the cpu performs speculative tlb reads, i.e. even if the cpu only
75 * runs in kernel space, the cpu could load tlb entries for user space
76 * pages.
77 *
78 * The good news is that cpu_tlbstate is local to each cpu, no
79 * write/read ordering problems.
80 */
81
82/*
83 * TLB flush IPI:
84 *
85 * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
86 * 2) Leave the mm if we are in the lazy tlb mode.
87 */
88
89void smp_invalidate_interrupt(struct pt_regs *regs)
90{
91 unsigned long cpu;
92
93 cpu = get_cpu();
94
95 if (!cpu_isset(cpu, flush_cpumask))
96 goto out;
97 /*
98 * This was a BUG() but until someone can quote me the
99 * line from the intel manual that guarantees an IPI to
100 * multiple CPUs is retried _only_ on the erroring CPUs
101 * its staying as a return
102 *
103 * BUG();
104 */
105
106 if (flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) {
107 if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) {
108 if (flush_va == TLB_FLUSH_ALL)
109 local_flush_tlb();
110 else
111 __flush_tlb_one(flush_va);
112 } else
113 leave_mm(cpu);
114 }
115 ack_APIC_irq();
116 smp_mb__before_clear_bit();
117 cpu_clear(cpu, flush_cpumask);
118 smp_mb__after_clear_bit();
119out:
120 put_cpu_no_resched();
121 inc_irq_stat(irq_tlb_count);
122}
123
124void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
125 unsigned long va)
126{
127 cpumask_t cpumask = *cpumaskp;
128
129 /*
130 * A couple of (to be removed) sanity checks:
131 *
132 * - current CPU must not be in mask
133 * - mask must exist :)
134 */
135 BUG_ON(cpus_empty(cpumask));
136 BUG_ON(cpu_isset(smp_processor_id(), cpumask));
137 BUG_ON(!mm);
138
139#ifdef CONFIG_HOTPLUG_CPU
140 /* If a CPU which we ran on has gone down, OK. */
141 cpus_and(cpumask, cpumask, cpu_online_map);
142 if (unlikely(cpus_empty(cpumask)))
143 return;
144#endif
145
146 /*
147 * i'm not happy about this global shared spinlock in the
148 * MM hot path, but we'll see how contended it is.
149 * AK: x86-64 has a faster method that could be ported.
150 */
151 spin_lock(&tlbstate_lock);
152
153 flush_mm = mm;
154 flush_va = va;
155 cpus_or(flush_cpumask, cpumask, flush_cpumask);
156
157 /*
158 * Make the above memory operations globally visible before
159 * sending the IPI.
160 */
161 smp_mb();
162 /*
163 * We have to send the IPI only to
164 * CPUs affected.
165 */
166 send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR);
167
168 while (!cpus_empty(flush_cpumask))
169 /* nothing. lockup detection does not belong here */
170 cpu_relax();
171
172 flush_mm = NULL;
173 flush_va = 0;
174 spin_unlock(&tlbstate_lock);
175}
176
177void flush_tlb_current_task(void)
178{
179 struct mm_struct *mm = current->mm;
180 cpumask_t cpu_mask;
181
182 preempt_disable();
183 cpu_mask = mm->cpu_vm_mask;
184 cpu_clear(smp_processor_id(), cpu_mask);
185
186 local_flush_tlb();
187 if (!cpus_empty(cpu_mask))
188 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
189 preempt_enable();
190}
191
192void flush_tlb_mm(struct mm_struct *mm)
193{
194 cpumask_t cpu_mask;
195
196 preempt_disable();
197 cpu_mask = mm->cpu_vm_mask;
198 cpu_clear(smp_processor_id(), cpu_mask);
199
200 if (current->active_mm == mm) {
201 if (current->mm)
202 local_flush_tlb();
203 else
204 leave_mm(smp_processor_id());
205 }
206 if (!cpus_empty(cpu_mask))
207 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
208
209 preempt_enable();
210}
211
212void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
213{
214 struct mm_struct *mm = vma->vm_mm;
215 cpumask_t cpu_mask;
216
217 preempt_disable();
218 cpu_mask = mm->cpu_vm_mask;
219 cpu_clear(smp_processor_id(), cpu_mask);
220
221 if (current->active_mm == mm) {
222 if (current->mm)
223 __flush_tlb_one(va);
224 else
225 leave_mm(smp_processor_id());
226 }
227
228 if (!cpus_empty(cpu_mask))
229 flush_tlb_others(cpu_mask, mm, va);
230
231 preempt_enable();
232}
233EXPORT_SYMBOL(flush_tlb_page);
234
235static void do_flush_tlb_all(void *info)
236{
237 unsigned long cpu = smp_processor_id();
238
239 __flush_tlb_all();
240 if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY)
241 leave_mm(cpu);
242}
243
244void flush_tlb_all(void)
245{
246 on_each_cpu(do_flush_tlb_all, NULL, 1);
247}
248
249void reset_lazy_tlbstate(void)
250{
251 int cpu = raw_smp_processor_id();
252
253 per_cpu(cpu_tlbstate, cpu).state = 0;
254 per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
255}
256
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
deleted file mode 100644
index f8be6f1d2e48..000000000000
--- a/arch/x86/kernel/tlb_64.c
+++ /dev/null
@@ -1,284 +0,0 @@
1#include <linux/init.h>
2
3#include <linux/mm.h>
4#include <linux/delay.h>
5#include <linux/spinlock.h>
6#include <linux/smp.h>
7#include <linux/kernel_stat.h>
8#include <linux/mc146818rtc.h>
9#include <linux/interrupt.h>
10
11#include <asm/mtrr.h>
12#include <asm/pgalloc.h>
13#include <asm/tlbflush.h>
14#include <asm/mmu_context.h>
15#include <asm/proto.h>
16#include <asm/apicdef.h>
17#include <asm/idle.h>
18#include <asm/uv/uv_hub.h>
19#include <asm/uv/uv_bau.h>
20
21#include <mach_ipi.h>
22/*
23 * Smarter SMP flushing macros.
24 * c/o Linus Torvalds.
25 *
26 * These mean you can really definitely utterly forget about
27 * writing to user space from interrupts. (Its not allowed anyway).
28 *
29 * Optimizations Manfred Spraul <manfred@colorfullife.com>
30 *
31 * More scalable flush, from Andi Kleen
32 *
33 * To avoid global state use 8 different call vectors.
34 * Each CPU uses a specific vector to trigger flushes on other
35 * CPUs. Depending on the received vector the target CPUs look into
36 * the right per cpu variable for the flush data.
37 *
38 * With more than 8 CPUs they are hashed to the 8 available
39 * vectors. The limited global vector space forces us to this right now.
40 * In future when interrupts are split into per CPU domains this could be
41 * fixed, at the cost of triggering multiple IPIs in some cases.
42 */
43
44union smp_flush_state {
45 struct {
46 cpumask_t flush_cpumask;
47 struct mm_struct *flush_mm;
48 unsigned long flush_va;
49 spinlock_t tlbstate_lock;
50 };
51 char pad[SMP_CACHE_BYTES];
52} ____cacheline_aligned;
53
54/* State is put into the per CPU data section, but padded
55 to a full cache line because other CPUs can access it and we don't
56 want false sharing in the per cpu data segment. */
57static DEFINE_PER_CPU(union smp_flush_state, flush_state);
58
59/*
60 * We cannot call mmdrop() because we are in interrupt context,
61 * instead update mm->cpu_vm_mask.
62 */
63void leave_mm(int cpu)
64{
65 if (read_pda(mmu_state) == TLBSTATE_OK)
66 BUG();
67 cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
68 load_cr3(swapper_pg_dir);
69}
70EXPORT_SYMBOL_GPL(leave_mm);
71
72/*
73 *
74 * The flush IPI assumes that a thread switch happens in this order:
75 * [cpu0: the cpu that switches]
76 * 1) switch_mm() either 1a) or 1b)
77 * 1a) thread switch to a different mm
78 * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
79 * Stop ipi delivery for the old mm. This is not synchronized with
80 * the other cpus, but smp_invalidate_interrupt ignore flush ipis
81 * for the wrong mm, and in the worst case we perform a superfluous
82 * tlb flush.
83 * 1a2) set cpu mmu_state to TLBSTATE_OK
84 * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
85 * was in lazy tlb mode.
86 * 1a3) update cpu active_mm
87 * Now cpu0 accepts tlb flushes for the new mm.
88 * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
89 * Now the other cpus will send tlb flush ipis.
90 * 1a4) change cr3.
91 * 1b) thread switch without mm change
92 * cpu active_mm is correct, cpu0 already handles
93 * flush ipis.
94 * 1b1) set cpu mmu_state to TLBSTATE_OK
95 * 1b2) test_and_set the cpu bit in cpu_vm_mask.
96 * Atomically set the bit [other cpus will start sending flush ipis],
97 * and test the bit.
98 * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
99 * 2) switch %%esp, ie current
100 *
101 * The interrupt must handle 2 special cases:
102 * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
103 * - the cpu performs speculative tlb reads, i.e. even if the cpu only
104 * runs in kernel space, the cpu could load tlb entries for user space
105 * pages.
106 *
107 * The good news is that cpu mmu_state is local to each cpu, no
108 * write/read ordering problems.
109 */
110
111/*
112 * TLB flush IPI:
113 *
114 * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
115 * 2) Leave the mm if we are in the lazy tlb mode.
116 *
117 * Interrupts are disabled.
118 */
119
120asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
121{
122 int cpu;
123 int sender;
124 union smp_flush_state *f;
125
126 cpu = smp_processor_id();
127 /*
128 * orig_rax contains the negated interrupt vector.
129 * Use that to determine where the sender put the data.
130 */
131 sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
132 f = &per_cpu(flush_state, sender);
133
134 if (!cpu_isset(cpu, f->flush_cpumask))
135 goto out;
136 /*
137 * This was a BUG() but until someone can quote me the
138 * line from the intel manual that guarantees an IPI to
139 * multiple CPUs is retried _only_ on the erroring CPUs
140 * its staying as a return
141 *
142 * BUG();
143 */
144
145 if (f->flush_mm == read_pda(active_mm)) {
146 if (read_pda(mmu_state) == TLBSTATE_OK) {
147 if (f->flush_va == TLB_FLUSH_ALL)
148 local_flush_tlb();
149 else
150 __flush_tlb_one(f->flush_va);
151 } else
152 leave_mm(cpu);
153 }
154out:
155 ack_APIC_irq();
156 cpu_clear(cpu, f->flush_cpumask);
157 inc_irq_stat(irq_tlb_count);
158}
159
160void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
161 unsigned long va)
162{
163 int sender;
164 union smp_flush_state *f;
165 cpumask_t cpumask = *cpumaskp;
166
167 if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va))
168 return;
169
170 /* Caller has disabled preemption */
171 sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
172 f = &per_cpu(flush_state, sender);
173
174 /*
175 * Could avoid this lock when
176 * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
177 * probably not worth checking this for a cache-hot lock.
178 */
179 spin_lock(&f->tlbstate_lock);
180
181 f->flush_mm = mm;
182 f->flush_va = va;
183 cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask);
184
185 /*
186 * Make the above memory operations globally visible before
187 * sending the IPI.
188 */
189 smp_mb();
190 /*
191 * We have to send the IPI only to
192 * CPUs affected.
193 */
194 send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender);
195
196 while (!cpus_empty(f->flush_cpumask))
197 cpu_relax();
198
199 f->flush_mm = NULL;
200 f->flush_va = 0;
201 spin_unlock(&f->tlbstate_lock);
202}
203
204static int __cpuinit init_smp_flush(void)
205{
206 int i;
207
208 for_each_possible_cpu(i)
209 spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock);
210
211 return 0;
212}
213core_initcall(init_smp_flush);
214
215void flush_tlb_current_task(void)
216{
217 struct mm_struct *mm = current->mm;
218 cpumask_t cpu_mask;
219
220 preempt_disable();
221 cpu_mask = mm->cpu_vm_mask;
222 cpu_clear(smp_processor_id(), cpu_mask);
223
224 local_flush_tlb();
225 if (!cpus_empty(cpu_mask))
226 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
227 preempt_enable();
228}
229
230void flush_tlb_mm(struct mm_struct *mm)
231{
232 cpumask_t cpu_mask;
233
234 preempt_disable();
235 cpu_mask = mm->cpu_vm_mask;
236 cpu_clear(smp_processor_id(), cpu_mask);
237
238 if (current->active_mm == mm) {
239 if (current->mm)
240 local_flush_tlb();
241 else
242 leave_mm(smp_processor_id());
243 }
244 if (!cpus_empty(cpu_mask))
245 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
246
247 preempt_enable();
248}
249
250void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
251{
252 struct mm_struct *mm = vma->vm_mm;
253 cpumask_t cpu_mask;
254
255 preempt_disable();
256 cpu_mask = mm->cpu_vm_mask;
257 cpu_clear(smp_processor_id(), cpu_mask);
258
259 if (current->active_mm == mm) {
260 if (current->mm)
261 __flush_tlb_one(va);
262 else
263 leave_mm(smp_processor_id());
264 }
265
266 if (!cpus_empty(cpu_mask))
267 flush_tlb_others(cpu_mask, mm, va);
268
269 preempt_enable();
270}
271
272static void do_flush_tlb_all(void *info)
273{
274 unsigned long cpu = smp_processor_id();
275
276 __flush_tlb_all();
277 if (read_pda(mmu_state) == TLBSTATE_LAZY)
278 leave_mm(cpu);
279}
280
281void flush_tlb_all(void)
282{
283 on_each_cpu(do_flush_tlb_all, NULL, 1);
284}
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 6812b829ed83..f04549afcfe9 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -11,16 +11,15 @@
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12 12
13#include <asm/mmu_context.h> 13#include <asm/mmu_context.h>
14#include <asm/uv/uv.h>
14#include <asm/uv/uv_mmrs.h> 15#include <asm/uv/uv_mmrs.h>
15#include <asm/uv/uv_hub.h> 16#include <asm/uv/uv_hub.h>
16#include <asm/uv/uv_bau.h> 17#include <asm/uv/uv_bau.h>
17#include <asm/genapic.h> 18#include <asm/apic.h>
18#include <asm/idle.h> 19#include <asm/idle.h>
19#include <asm/tsc.h> 20#include <asm/tsc.h>
20#include <asm/irq_vectors.h> 21#include <asm/irq_vectors.h>
21 22
22#include <mach_apic.h>
23
24static struct bau_control **uv_bau_table_bases __read_mostly; 23static struct bau_control **uv_bau_table_bases __read_mostly;
25static int uv_bau_retry_limit __read_mostly; 24static int uv_bau_retry_limit __read_mostly;
26 25
@@ -210,14 +209,15 @@ static int uv_wait_completion(struct bau_desc *bau_desc,
210 * 209 *
211 * Send a broadcast and wait for a broadcast message to complete. 210 * Send a broadcast and wait for a broadcast message to complete.
212 * 211 *
213 * The cpumaskp mask contains the cpus the broadcast was sent to. 212 * The flush_mask contains the cpus the broadcast was sent to.
214 * 213 *
215 * Returns 1 if all remote flushing was done. The mask is zeroed. 214 * Returns NULL if all remote flushing was done. The mask is zeroed.
216 * Returns 0 if some remote flushing remains to be done. The mask is left 215 * Returns @flush_mask if some remote flushing remains to be done. The
217 * unchanged. 216 * mask will have some bits still set.
218 */ 217 */
219int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, 218const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
220 cpumask_t *cpumaskp) 219 struct bau_desc *bau_desc,
220 struct cpumask *flush_mask)
221{ 221{
222 int completion_status = 0; 222 int completion_status = 0;
223 int right_shift; 223 int right_shift;
@@ -257,66 +257,76 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
257 * the cpu's, all of which are still in the mask. 257 * the cpu's, all of which are still in the mask.
258 */ 258 */
259 __get_cpu_var(ptcstats).ptc_i++; 259 __get_cpu_var(ptcstats).ptc_i++;
260 return 0; 260 return flush_mask;
261 } 261 }
262 262
263 /* 263 /*
264 * Success, so clear the remote cpu's from the mask so we don't 264 * Success, so clear the remote cpu's from the mask so we don't
265 * use the IPI method of shootdown on them. 265 * use the IPI method of shootdown on them.
266 */ 266 */
267 for_each_cpu_mask(bit, *cpumaskp) { 267 for_each_cpu(bit, flush_mask) {
268 blade = uv_cpu_to_blade_id(bit); 268 blade = uv_cpu_to_blade_id(bit);
269 if (blade == this_blade) 269 if (blade == this_blade)
270 continue; 270 continue;
271 cpu_clear(bit, *cpumaskp); 271 cpumask_clear_cpu(bit, flush_mask);
272 } 272 }
273 if (!cpus_empty(*cpumaskp)) 273 if (!cpumask_empty(flush_mask))
274 return 0; 274 return flush_mask;
275 return 1; 275 return NULL;
276} 276}
277 277
278/** 278/**
279 * uv_flush_tlb_others - globally purge translation cache of a virtual 279 * uv_flush_tlb_others - globally purge translation cache of a virtual
280 * address or all TLB's 280 * address or all TLB's
281 * @cpumaskp: mask of all cpu's in which the address is to be removed 281 * @cpumask: mask of all cpu's in which the address is to be removed
282 * @mm: mm_struct containing virtual address range 282 * @mm: mm_struct containing virtual address range
283 * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) 283 * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
284 * @cpu: the current cpu
284 * 285 *
285 * This is the entry point for initiating any UV global TLB shootdown. 286 * This is the entry point for initiating any UV global TLB shootdown.
286 * 287 *
287 * Purges the translation caches of all specified processors of the given 288 * Purges the translation caches of all specified processors of the given
288 * virtual address, or purges all TLB's on specified processors. 289 * virtual address, or purges all TLB's on specified processors.
289 * 290 *
290 * The caller has derived the cpumaskp from the mm_struct and has subtracted 291 * The caller has derived the cpumask from the mm_struct. This function
291 * the local cpu from the mask. This function is called only if there 292 * is called only if there are bits set in the mask. (e.g. flush_tlb_page())
292 * are bits set in the mask. (e.g. flush_tlb_page())
293 * 293 *
294 * The cpumaskp is converted into a nodemask of the nodes containing 294 * The cpumask is converted into a nodemask of the nodes containing
295 * the cpus. 295 * the cpus.
296 * 296 *
297 * Returns 1 if all remote flushing was done. 297 * Note that this function should be called with preemption disabled.
298 * Returns 0 if some remote flushing remains to be done. 298 *
299 * Returns NULL if all remote flushing was done.
300 * Returns pointer to cpumask if some remote flushing remains to be
301 * done. The returned pointer is valid till preemption is re-enabled.
299 */ 302 */
300int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm, 303const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
301 unsigned long va) 304 struct mm_struct *mm,
305 unsigned long va, unsigned int cpu)
302{ 306{
307 static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
308 struct cpumask *flush_mask = &__get_cpu_var(flush_tlb_mask);
303 int i; 309 int i;
304 int bit; 310 int bit;
305 int blade; 311 int blade;
306 int cpu; 312 int uv_cpu;
307 int this_blade; 313 int this_blade;
308 int locals = 0; 314 int locals = 0;
309 struct bau_desc *bau_desc; 315 struct bau_desc *bau_desc;
310 316
311 cpu = uv_blade_processor_id(); 317 WARN_ON(!in_atomic());
318
319 cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
320
321 uv_cpu = uv_blade_processor_id();
312 this_blade = uv_numa_blade_id(); 322 this_blade = uv_numa_blade_id();
313 bau_desc = __get_cpu_var(bau_control).descriptor_base; 323 bau_desc = __get_cpu_var(bau_control).descriptor_base;
314 bau_desc += UV_ITEMS_PER_DESCRIPTOR * cpu; 324 bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu;
315 325
316 bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); 326 bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
317 327
318 i = 0; 328 i = 0;
319 for_each_cpu_mask(bit, *cpumaskp) { 329 for_each_cpu(bit, flush_mask) {
320 blade = uv_cpu_to_blade_id(bit); 330 blade = uv_cpu_to_blade_id(bit);
321 BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1)); 331 BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
322 if (blade == this_blade) { 332 if (blade == this_blade) {
@@ -331,17 +341,17 @@ int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm,
331 * no off_node flushing; return status for local node 341 * no off_node flushing; return status for local node
332 */ 342 */
333 if (locals) 343 if (locals)
334 return 0; 344 return flush_mask;
335 else 345 else
336 return 1; 346 return NULL;
337 } 347 }
338 __get_cpu_var(ptcstats).requestor++; 348 __get_cpu_var(ptcstats).requestor++;
339 __get_cpu_var(ptcstats).ntargeted += i; 349 __get_cpu_var(ptcstats).ntargeted += i;
340 350
341 bau_desc->payload.address = va; 351 bau_desc->payload.address = va;
342 bau_desc->payload.sending_cpu = smp_processor_id(); 352 bau_desc->payload.sending_cpu = cpu;
343 353
344 return uv_flush_send_and_wait(cpu, this_blade, bau_desc, cpumaskp); 354 return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask);
345} 355}
346 356
347/* 357/*
diff --git a/arch/x86/kernel/trampoline_32.S b/arch/x86/kernel/trampoline_32.S
index d8ccc3c6552f..66d874e5404c 100644
--- a/arch/x86/kernel/trampoline_32.S
+++ b/arch/x86/kernel/trampoline_32.S
@@ -29,7 +29,7 @@
29 29
30#include <linux/linkage.h> 30#include <linux/linkage.h>
31#include <asm/segment.h> 31#include <asm/segment.h>
32#include <asm/page.h> 32#include <asm/page_types.h>
33 33
34/* We can free up trampoline after bootup if cpu hotplug is not supported. */ 34/* We can free up trampoline after bootup if cpu hotplug is not supported. */
35#ifndef CONFIG_HOTPLUG_CPU 35#ifndef CONFIG_HOTPLUG_CPU
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S
index 894293c598db..cddfb8d386b9 100644
--- a/arch/x86/kernel/trampoline_64.S
+++ b/arch/x86/kernel/trampoline_64.S
@@ -25,10 +25,11 @@
25 */ 25 */
26 26
27#include <linux/linkage.h> 27#include <linux/linkage.h>
28#include <asm/pgtable.h> 28#include <asm/pgtable_types.h>
29#include <asm/page.h> 29#include <asm/page_types.h>
30#include <asm/msr.h> 30#include <asm/msr.h>
31#include <asm/segment.h> 31#include <asm/segment.h>
32#include <asm/processor-flags.h>
32 33
33.section .rodata, "a", @progbits 34.section .rodata, "a", @progbits
34 35
@@ -37,7 +38,7 @@
37ENTRY(trampoline_data) 38ENTRY(trampoline_data)
38r_base = . 39r_base = .
39 cli # We should be safe anyway 40 cli # We should be safe anyway
40 wbinvd 41 wbinvd
41 mov %cs, %ax # Code and data in the same place 42 mov %cs, %ax # Code and data in the same place
42 mov %ax, %ds 43 mov %ax, %ds
43 mov %ax, %es 44 mov %ax, %es
@@ -73,9 +74,8 @@ r_base = .
73 lidtl tidt - r_base # load idt with 0, 0 74 lidtl tidt - r_base # load idt with 0, 0
74 lgdtl tgdt - r_base # load gdt with whatever is appropriate 75 lgdtl tgdt - r_base # load gdt with whatever is appropriate
75 76
76 xor %ax, %ax 77 mov $X86_CR0_PE, %ax # protected mode (PE) bit
77 inc %ax # protected mode (PE) bit 78 lmsw %ax # into protected mode
78 lmsw %ax # into protected mode
79 79
80 # flush prefetch and jump to startup_32 80 # flush prefetch and jump to startup_32
81 ljmpl *(startup_32_vector - r_base) 81 ljmpl *(startup_32_vector - r_base)
@@ -86,9 +86,8 @@ startup_32:
86 movl $__KERNEL_DS, %eax # Initialize the %ds segment register 86 movl $__KERNEL_DS, %eax # Initialize the %ds segment register
87 movl %eax, %ds 87 movl %eax, %ds
88 88
89 xorl %eax, %eax 89 movl $X86_CR4_PAE, %eax
90 btsl $5, %eax # Enable PAE mode 90 movl %eax, %cr4 # Enable PAE mode
91 movl %eax, %cr4
92 91
93 # Setup trampoline 4 level pagetables 92 # Setup trampoline 4 level pagetables
94 leal (trampoline_level4_pgt - r_base)(%esi), %eax 93 leal (trampoline_level4_pgt - r_base)(%esi), %eax
@@ -99,9 +98,9 @@ startup_32:
99 xorl %edx, %edx 98 xorl %edx, %edx
100 wrmsr 99 wrmsr
101 100
102 xorl %eax, %eax 101 # Enable paging and in turn activate Long Mode
103 btsl $31, %eax # Enable paging and in turn activate Long Mode 102 # Enable protected mode
104 btsl $0, %eax # Enable protected mode 103 movl $(X86_CR0_PG | X86_CR0_PE), %eax
105 movl %eax, %cr0 104 movl %eax, %cr0
106 105
107 /* 106 /*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index a9e7548e1790..c05430ac1b44 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -54,15 +54,14 @@
54#include <asm/desc.h> 54#include <asm/desc.h>
55#include <asm/i387.h> 55#include <asm/i387.h>
56 56
57#include <mach_traps.h> 57#include <asm/mach_traps.h>
58 58
59#ifdef CONFIG_X86_64 59#ifdef CONFIG_X86_64
60#include <asm/pgalloc.h> 60#include <asm/pgalloc.h>
61#include <asm/proto.h> 61#include <asm/proto.h>
62#include <asm/pda.h>
63#else 62#else
64#include <asm/processor-flags.h> 63#include <asm/processor-flags.h>
65#include <asm/arch_hooks.h> 64#include <asm/setup.h>
66#include <asm/traps.h> 65#include <asm/traps.h>
67 66
68#include "cpu/mcheck/mce.h" 67#include "cpu/mcheck/mce.h"
@@ -914,19 +913,20 @@ void math_emulate(struct math_emu_info *info)
914} 913}
915#endif /* CONFIG_MATH_EMULATION */ 914#endif /* CONFIG_MATH_EMULATION */
916 915
917dotraplinkage void __kprobes do_device_not_available(struct pt_regs regs) 916dotraplinkage void __kprobes
917do_device_not_available(struct pt_regs *regs, long error_code)
918{ 918{
919#ifdef CONFIG_X86_32 919#ifdef CONFIG_X86_32
920 if (read_cr0() & X86_CR0_EM) { 920 if (read_cr0() & X86_CR0_EM) {
921 struct math_emu_info info = { }; 921 struct math_emu_info info = { };
922 922
923 conditional_sti(&regs); 923 conditional_sti(regs);
924 924
925 info.regs = &regs; 925 info.regs = regs;
926 math_emulate(&info); 926 math_emulate(&info);
927 } else { 927 } else {
928 math_state_restore(); /* interrupts still off */ 928 math_state_restore(); /* interrupts still off */
929 conditional_sti(&regs); 929 conditional_sti(regs);
930 } 930 }
931#else 931#else
932 math_state_restore(); 932 math_state_restore();
@@ -942,7 +942,7 @@ dotraplinkage void do_iret_error(struct pt_regs *regs, long error_code)
942 info.si_signo = SIGILL; 942 info.si_signo = SIGILL;
943 info.si_errno = 0; 943 info.si_errno = 0;
944 info.si_code = ILL_BADSTK; 944 info.si_code = ILL_BADSTK;
945 info.si_addr = 0; 945 info.si_addr = NULL;
946 if (notify_die(DIE_TRAP, "iret exception", 946 if (notify_die(DIE_TRAP, "iret exception",
947 regs, error_code, 32, SIGILL) == NOTIFY_STOP) 947 regs, error_code, 32, SIGILL) == NOTIFY_STOP)
948 return; 948 return;
@@ -1026,6 +1026,6 @@ void __init trap_init(void)
1026 cpu_init(); 1026 cpu_init();
1027 1027
1028#ifdef CONFIG_X86_32 1028#ifdef CONFIG_X86_32
1029 trap_init_hook(); 1029 x86_quirk_trap_init();
1030#endif 1030#endif
1031} 1031}
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 599e58168631..83d53ce5d4c4 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -773,7 +773,7 @@ __cpuinit int unsynchronized_tsc(void)
773 if (!cpu_has_tsc || tsc_unstable) 773 if (!cpu_has_tsc || tsc_unstable)
774 return 1; 774 return 1;
775 775
776#ifdef CONFIG_X86_SMP 776#ifdef CONFIG_SMP
777 if (apic_is_clustered_box()) 777 if (apic_is_clustered_box())
778 return 1; 778 return 1;
779#endif 779#endif
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index d801d06af068..191a876e9e87 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -24,18 +24,14 @@
24 24
25#include <asm/visws/cobalt.h> 25#include <asm/visws/cobalt.h>
26#include <asm/visws/piix4.h> 26#include <asm/visws/piix4.h>
27#include <asm/arch_hooks.h>
28#include <asm/io_apic.h> 27#include <asm/io_apic.h>
29#include <asm/fixmap.h> 28#include <asm/fixmap.h>
30#include <asm/reboot.h> 29#include <asm/reboot.h>
31#include <asm/setup.h> 30#include <asm/setup.h>
31#include <asm/apic.h>
32#include <asm/e820.h> 32#include <asm/e820.h>
33#include <asm/io.h> 33#include <asm/io.h>
34 34
35#include <mach_ipi.h>
36
37#include "mach_apic.h"
38
39#include <linux/kernel_stat.h> 35#include <linux/kernel_stat.h>
40 36
41#include <asm/i8259.h> 37#include <asm/i8259.h>
@@ -49,8 +45,6 @@
49 45
50extern int no_broadcast; 46extern int no_broadcast;
51 47
52#include <asm/apic.h>
53
54char visws_board_type = -1; 48char visws_board_type = -1;
55char visws_board_rev = -1; 49char visws_board_rev = -1;
56 50
@@ -200,7 +194,7 @@ static void __init MP_processor_info(struct mpc_cpu *m)
200 return; 194 return;
201 } 195 }
202 196
203 apic_cpus = apicid_to_cpu_present(m->apicid); 197 apic_cpus = apic->apicid_to_cpu_present(m->apicid);
204 physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); 198 physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus);
205 /* 199 /*
206 * Validate version 200 * Validate version
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 4eeb5cf9720d..d7ac84e7fc1c 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -158,7 +158,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
158 ret = KVM86->regs32; 158 ret = KVM86->regs32;
159 159
160 ret->fs = current->thread.saved_fs; 160 ret->fs = current->thread.saved_fs;
161 loadsegment(gs, current->thread.saved_gs); 161 set_user_gs(ret, current->thread.saved_gs);
162 162
163 return ret; 163 return ret;
164} 164}
@@ -197,9 +197,9 @@ out:
197static int do_vm86_irq_handling(int subfunction, int irqnumber); 197static int do_vm86_irq_handling(int subfunction, int irqnumber);
198static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); 198static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk);
199 199
200asmlinkage int sys_vm86old(struct pt_regs regs) 200int sys_vm86old(struct pt_regs *regs)
201{ 201{
202 struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.bx; 202 struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs->bx;
203 struct kernel_vm86_struct info; /* declare this _on top_, 203 struct kernel_vm86_struct info; /* declare this _on top_,
204 * this avoids wasting of stack space. 204 * this avoids wasting of stack space.
205 * This remains on the stack until we 205 * This remains on the stack until we
@@ -218,7 +218,7 @@ asmlinkage int sys_vm86old(struct pt_regs regs)
218 if (tmp) 218 if (tmp)
219 goto out; 219 goto out;
220 memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); 220 memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus);
221 info.regs32 = &regs; 221 info.regs32 = regs;
222 tsk->thread.vm86_info = v86; 222 tsk->thread.vm86_info = v86;
223 do_sys_vm86(&info, tsk); 223 do_sys_vm86(&info, tsk);
224 ret = 0; /* we never return here */ 224 ret = 0; /* we never return here */
@@ -227,7 +227,7 @@ out:
227} 227}
228 228
229 229
230asmlinkage int sys_vm86(struct pt_regs regs) 230int sys_vm86(struct pt_regs *regs)
231{ 231{
232 struct kernel_vm86_struct info; /* declare this _on top_, 232 struct kernel_vm86_struct info; /* declare this _on top_,
233 * this avoids wasting of stack space. 233 * this avoids wasting of stack space.
@@ -239,12 +239,12 @@ asmlinkage int sys_vm86(struct pt_regs regs)
239 struct vm86plus_struct __user *v86; 239 struct vm86plus_struct __user *v86;
240 240
241 tsk = current; 241 tsk = current;
242 switch (regs.bx) { 242 switch (regs->bx) {
243 case VM86_REQUEST_IRQ: 243 case VM86_REQUEST_IRQ:
244 case VM86_FREE_IRQ: 244 case VM86_FREE_IRQ:
245 case VM86_GET_IRQ_BITS: 245 case VM86_GET_IRQ_BITS:
246 case VM86_GET_AND_RESET_IRQ: 246 case VM86_GET_AND_RESET_IRQ:
247 ret = do_vm86_irq_handling(regs.bx, (int)regs.cx); 247 ret = do_vm86_irq_handling(regs->bx, (int)regs->cx);
248 goto out; 248 goto out;
249 case VM86_PLUS_INSTALL_CHECK: 249 case VM86_PLUS_INSTALL_CHECK:
250 /* 250 /*
@@ -261,14 +261,14 @@ asmlinkage int sys_vm86(struct pt_regs regs)
261 ret = -EPERM; 261 ret = -EPERM;
262 if (tsk->thread.saved_sp0) 262 if (tsk->thread.saved_sp0)
263 goto out; 263 goto out;
264 v86 = (struct vm86plus_struct __user *)regs.cx; 264 v86 = (struct vm86plus_struct __user *)regs->cx;
265 tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, 265 tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
266 offsetof(struct kernel_vm86_struct, regs32) - 266 offsetof(struct kernel_vm86_struct, regs32) -
267 sizeof(info.regs)); 267 sizeof(info.regs));
268 ret = -EFAULT; 268 ret = -EFAULT;
269 if (tmp) 269 if (tmp)
270 goto out; 270 goto out;
271 info.regs32 = &regs; 271 info.regs32 = regs;
272 info.vm86plus.is_vm86pus = 1; 272 info.vm86plus.is_vm86pus = 1;
273 tsk->thread.vm86_info = (struct vm86_struct __user *)v86; 273 tsk->thread.vm86_info = (struct vm86_struct __user *)v86;
274 do_sys_vm86(&info, tsk); 274 do_sys_vm86(&info, tsk);
@@ -323,7 +323,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
323 info->regs32->ax = 0; 323 info->regs32->ax = 0;
324 tsk->thread.saved_sp0 = tsk->thread.sp0; 324 tsk->thread.saved_sp0 = tsk->thread.sp0;
325 tsk->thread.saved_fs = info->regs32->fs; 325 tsk->thread.saved_fs = info->regs32->fs;
326 savesegment(gs, tsk->thread.saved_gs); 326 tsk->thread.saved_gs = get_user_gs(info->regs32);
327 327
328 tss = &per_cpu(init_tss, get_cpu()); 328 tss = &per_cpu(init_tss, get_cpu());
329 tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; 329 tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index bef58b4982db..2cc4a90e2cb3 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -680,10 +680,11 @@ static inline int __init activate_vmi(void)
680 para_fill(pv_mmu_ops.write_cr2, SetCR2); 680 para_fill(pv_mmu_ops.write_cr2, SetCR2);
681 para_fill(pv_mmu_ops.write_cr3, SetCR3); 681 para_fill(pv_mmu_ops.write_cr3, SetCR3);
682 para_fill(pv_cpu_ops.write_cr4, SetCR4); 682 para_fill(pv_cpu_ops.write_cr4, SetCR4);
683 para_fill(pv_irq_ops.save_fl, GetInterruptMask); 683
684 para_fill(pv_irq_ops.restore_fl, SetInterruptMask); 684 para_fill(pv_irq_ops.save_fl.func, GetInterruptMask);
685 para_fill(pv_irq_ops.irq_disable, DisableInterrupts); 685 para_fill(pv_irq_ops.restore_fl.func, SetInterruptMask);
686 para_fill(pv_irq_ops.irq_enable, EnableInterrupts); 686 para_fill(pv_irq_ops.irq_disable.func, DisableInterrupts);
687 para_fill(pv_irq_ops.irq_enable.func, EnableInterrupts);
687 688
688 para_fill(pv_cpu_ops.wbinvd, WBINVD); 689 para_fill(pv_cpu_ops.wbinvd, WBINVD);
689 para_fill(pv_cpu_ops.read_tsc, RDTSC); 690 para_fill(pv_cpu_ops.read_tsc, RDTSC);
@@ -797,8 +798,8 @@ static inline int __init activate_vmi(void)
797#endif 798#endif
798 799
799#ifdef CONFIG_X86_LOCAL_APIC 800#ifdef CONFIG_X86_LOCAL_APIC
800 para_fill(apic_ops->read, APICRead); 801 para_fill(apic->read, APICRead);
801 para_fill(apic_ops->write, APICWrite); 802 para_fill(apic->write, APICWrite);
802#endif 803#endif
803 804
804 /* 805 /*
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index e5b088fffa40..33a788d5879c 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -28,7 +28,6 @@
28 28
29#include <asm/vmi.h> 29#include <asm/vmi.h>
30#include <asm/vmi_time.h> 30#include <asm/vmi_time.h>
31#include <asm/arch_hooks.h>
32#include <asm/apicdef.h> 31#include <asm/apicdef.h>
33#include <asm/apic.h> 32#include <asm/apic.h>
34#include <asm/timer.h> 33#include <asm/timer.h>
@@ -256,7 +255,7 @@ void __devinit vmi_time_bsp_init(void)
256 */ 255 */
257 clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); 256 clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
258 local_irq_disable(); 257 local_irq_disable();
259#ifdef CONFIG_X86_SMP 258#ifdef CONFIG_SMP
260 /* 259 /*
261 * XXX handle_percpu_irq only defined for SMP; we need to switch over 260 * XXX handle_percpu_irq only defined for SMP; we need to switch over
262 * to using it, since this is a local interrupt, which each CPU must 261 * to using it, since this is a local interrupt, which each CPU must
@@ -288,8 +287,7 @@ static struct clocksource clocksource_vmi;
288static cycle_t read_real_cycles(void) 287static cycle_t read_real_cycles(void)
289{ 288{
290 cycle_t ret = (cycle_t)vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL); 289 cycle_t ret = (cycle_t)vmi_timer_ops.get_cycle_counter(VMI_CYCLES_REAL);
291 return ret >= clocksource_vmi.cycle_last ? 290 return max(ret, clocksource_vmi.cycle_last);
292 ret : clocksource_vmi.cycle_last;
293} 291}
294 292
295static struct clocksource clocksource_vmi = { 293static struct clocksource clocksource_vmi = {
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 82c67559dde7..0d860963f268 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -12,7 +12,7 @@
12 12
13#include <asm-generic/vmlinux.lds.h> 13#include <asm-generic/vmlinux.lds.h>
14#include <asm/thread_info.h> 14#include <asm/thread_info.h>
15#include <asm/page.h> 15#include <asm/page_types.h>
16#include <asm/cache.h> 16#include <asm/cache.h>
17#include <asm/boot.h> 17#include <asm/boot.h>
18 18
@@ -178,14 +178,7 @@ SECTIONS
178 __initramfs_end = .; 178 __initramfs_end = .;
179 } 179 }
180#endif 180#endif
181 . = ALIGN(PAGE_SIZE); 181 PERCPU(PAGE_SIZE)
182 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
183 __per_cpu_start = .;
184 *(.data.percpu.page_aligned)
185 *(.data.percpu)
186 *(.data.percpu.shared_aligned)
187 __per_cpu_end = .;
188 }
189 . = ALIGN(PAGE_SIZE); 182 . = ALIGN(PAGE_SIZE);
190 /* freed after init ends here */ 183 /* freed after init ends here */
191 184
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 1a614c0e6bef..fbfced6f6800 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -5,7 +5,8 @@
5#define LOAD_OFFSET __START_KERNEL_map 5#define LOAD_OFFSET __START_KERNEL_map
6 6
7#include <asm-generic/vmlinux.lds.h> 7#include <asm-generic/vmlinux.lds.h>
8#include <asm/page.h> 8#include <asm/asm-offsets.h>
9#include <asm/page_types.h>
9 10
10#undef i386 /* in case the preprocessor is a 32bit one */ 11#undef i386 /* in case the preprocessor is a 32bit one */
11 12
@@ -13,12 +14,15 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
13OUTPUT_ARCH(i386:x86-64) 14OUTPUT_ARCH(i386:x86-64)
14ENTRY(phys_startup_64) 15ENTRY(phys_startup_64)
15jiffies_64 = jiffies; 16jiffies_64 = jiffies;
16_proxy_pda = 1;
17PHDRS { 17PHDRS {
18 text PT_LOAD FLAGS(5); /* R_E */ 18 text PT_LOAD FLAGS(5); /* R_E */
19 data PT_LOAD FLAGS(7); /* RWE */ 19 data PT_LOAD FLAGS(7); /* RWE */
20 user PT_LOAD FLAGS(7); /* RWE */ 20 user PT_LOAD FLAGS(7); /* RWE */
21 data.init PT_LOAD FLAGS(7); /* RWE */ 21 data.init PT_LOAD FLAGS(7); /* RWE */
22#ifdef CONFIG_SMP
23 percpu PT_LOAD FLAGS(7); /* RWE */
24#endif
25 data.init2 PT_LOAD FLAGS(7); /* RWE */
22 note PT_NOTE FLAGS(0); /* ___ */ 26 note PT_NOTE FLAGS(0); /* ___ */
23} 27}
24SECTIONS 28SECTIONS
@@ -208,14 +212,28 @@ SECTIONS
208 __initramfs_end = .; 212 __initramfs_end = .;
209#endif 213#endif
210 214
215#ifdef CONFIG_SMP
216 /*
217 * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
218 * output PHDR, so the next output section - __data_nosave - should
219 * start another section data.init2. Also, pda should be at the head of
220 * percpu area. Preallocate it and define the percpu offset symbol
221 * so that it can be accessed as a percpu variable.
222 */
223 . = ALIGN(PAGE_SIZE);
224 PERCPU_VADDR(0, :percpu)
225#else
211 PERCPU(PAGE_SIZE) 226 PERCPU(PAGE_SIZE)
227#endif
212 228
213 . = ALIGN(PAGE_SIZE); 229 . = ALIGN(PAGE_SIZE);
214 __init_end = .; 230 __init_end = .;
215 231
216 . = ALIGN(PAGE_SIZE); 232 . = ALIGN(PAGE_SIZE);
217 __nosave_begin = .; 233 __nosave_begin = .;
218 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } 234 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
235 *(.data.nosave)
236 } :data.init2 /* use another section data.init2, see PERCPU_VADDR() above */
219 . = ALIGN(PAGE_SIZE); 237 . = ALIGN(PAGE_SIZE);
220 __nosave_end = .; 238 __nosave_end = .;
221 239
@@ -239,8 +257,21 @@ SECTIONS
239 DWARF_DEBUG 257 DWARF_DEBUG
240} 258}
241 259
260 /*
261 * Per-cpu symbols which need to be offset from __per_cpu_load
262 * for the boot processor.
263 */
264#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load
265INIT_PER_CPU(gdt_page);
266INIT_PER_CPU(irq_stack_union);
267
242/* 268/*
243 * Build-time check on the image size: 269 * Build-time check on the image size:
244 */ 270 */
245ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), 271ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
246 "kernel image bigger than KERNEL_IMAGE_SIZE") 272 "kernel image bigger than KERNEL_IMAGE_SIZE")
273
274#ifdef CONFIG_SMP
275ASSERT((per_cpu__irq_stack_union == 0),
276 "irq_stack_union is not at start of per-cpu area");
277#endif
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index a688f3bfaec2..c609205df594 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -37,6 +37,7 @@ static unsigned long vsmp_save_fl(void)
37 flags &= ~X86_EFLAGS_IF; 37 flags &= ~X86_EFLAGS_IF;
38 return flags; 38 return flags;
39} 39}
40PV_CALLEE_SAVE_REGS_THUNK(vsmp_save_fl);
40 41
41static void vsmp_restore_fl(unsigned long flags) 42static void vsmp_restore_fl(unsigned long flags)
42{ 43{
@@ -46,6 +47,7 @@ static void vsmp_restore_fl(unsigned long flags)
46 flags |= X86_EFLAGS_AC; 47 flags |= X86_EFLAGS_AC;
47 native_restore_fl(flags); 48 native_restore_fl(flags);
48} 49}
50PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl);
49 51
50static void vsmp_irq_disable(void) 52static void vsmp_irq_disable(void)
51{ 53{
@@ -53,6 +55,7 @@ static void vsmp_irq_disable(void)
53 55
54 native_restore_fl((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC); 56 native_restore_fl((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC);
55} 57}
58PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable);
56 59
57static void vsmp_irq_enable(void) 60static void vsmp_irq_enable(void)
58{ 61{
@@ -60,6 +63,7 @@ static void vsmp_irq_enable(void)
60 63
61 native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); 64 native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC));
62} 65}
66PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_enable);
63 67
64static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf, 68static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf,
65 unsigned long addr, unsigned len) 69 unsigned long addr, unsigned len)
@@ -90,10 +94,10 @@ static void __init set_vsmp_pv_ops(void)
90 cap, ctl); 94 cap, ctl);
91 if (cap & ctl & (1 << 4)) { 95 if (cap & ctl & (1 << 4)) {
92 /* Setup irq ops and turn on vSMP IRQ fastpath handling */ 96 /* Setup irq ops and turn on vSMP IRQ fastpath handling */
93 pv_irq_ops.irq_disable = vsmp_irq_disable; 97 pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable);
94 pv_irq_ops.irq_enable = vsmp_irq_enable; 98 pv_irq_ops.irq_enable = PV_CALLEE_SAVE(vsmp_irq_enable);
95 pv_irq_ops.save_fl = vsmp_save_fl; 99 pv_irq_ops.save_fl = PV_CALLEE_SAVE(vsmp_save_fl);
96 pv_irq_ops.restore_fl = vsmp_restore_fl; 100 pv_irq_ops.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl);
97 pv_init_ops.patch = vsmp_patch; 101 pv_init_ops.patch = vsmp_patch;
98 102
99 ctl &= ~(1 << 4); 103 ctl &= ~(1 << 4);
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 695e426aa354..3909e3ba5ce3 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -58,5 +58,3 @@ EXPORT_SYMBOL(__memcpy);
58EXPORT_SYMBOL(empty_zero_page); 58EXPORT_SYMBOL(empty_zero_page);
59EXPORT_SYMBOL(init_level4_pgt); 59EXPORT_SYMBOL(init_level4_pgt);
60EXPORT_SYMBOL(load_gs_index); 60EXPORT_SYMBOL(load_gs_index);
61
62EXPORT_SYMBOL(_proxy_pda);