aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-02-13 07:05:57 -0500
committerIngo Molnar <mingo@elte.hu>2009-02-13 07:05:57 -0500
commitd040c1614c24162adc3fe106b182596999264e26 (patch)
tree5af02052e633bcde0e33f77cac52ab4685ad07f1 /arch/x86/kernel
parentd88316c243e5458a1888edbe0353c4dec6e61c73 (diff)
parent7032e8696726354d6180d8a2d17191f958cd93ae (diff)
Merge branch 'x86/core' into x86/headers
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile25
-rw-r--r--arch/x86/kernel/acpi/boot.c191
-rw-r--r--arch/x86/kernel/acpi/sleep.c1
-rw-r--r--arch/x86/kernel/apic.c193
-rw-r--r--arch/x86/kernel/apm_32.c2
-rw-r--r--arch/x86/kernel/asm-offsets_32.c1
-rw-r--r--arch/x86/kernel/asm-offsets_64.c11
-rw-r--r--arch/x86/kernel/bigsmp_32.c266
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c54
-rw-r--r--arch/x86/kernel/cpu/amd.c2
-rw-r--r--arch/x86/kernel/cpu/common.c251
-rw-r--r--arch/x86/kernel/cpu/cpufreq/powernow-k8.c28
-rw-r--r--arch/x86/kernel/cpu/intel.c17
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c63
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c21
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c1
-rw-r--r--arch/x86/kernel/crash.c4
-rw-r--r--arch/x86/kernel/dumpstack.c2
-rw-r--r--arch/x86/kernel/dumpstack_64.c35
-rw-r--r--arch/x86/kernel/efi.c2
-rw-r--r--arch/x86/kernel/efi_64.c1
-rw-r--r--arch/x86/kernel/entry_32.S451
-rw-r--r--arch/x86/kernel/entry_64.S47
-rw-r--r--arch/x86/kernel/es7000_32.c477
-rw-r--r--arch/x86/kernel/ftrace.c17
-rw-r--r--arch/x86/kernel/genapic_64.c24
-rw-r--r--arch/x86/kernel/genapic_flat_64.c176
-rw-r--r--arch/x86/kernel/genx2apic_cluster.c133
-rw-r--r--arch/x86/kernel/genx2apic_phys.c125
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c112
-rw-r--r--arch/x86/kernel/head64.c23
-rw-r--r--arch/x86/kernel/head_32.S40
-rw-r--r--arch/x86/kernel/head_64.S19
-rw-r--r--arch/x86/kernel/hpet.c14
-rw-r--r--arch/x86/kernel/i8237.c17
-rw-r--r--arch/x86/kernel/io_apic.c443
-rw-r--r--arch/x86/kernel/ioport.c3
-rw-r--r--arch/x86/kernel/ipi.c176
-rw-r--r--arch/x86/kernel/irq.c44
-rw-r--r--arch/x86/kernel/irq_32.c33
-rw-r--r--arch/x86/kernel/irq_64.c43
-rw-r--r--arch/x86/kernel/irqinit_32.c23
-rw-r--r--arch/x86/kernel/kgdb.c4
-rw-r--r--arch/x86/kernel/machine_kexec_64.c82
-rw-r--r--arch/x86/kernel/microcode_intel.c10
-rw-r--r--arch/x86/kernel/module_32.c6
-rw-r--r--arch/x86/kernel/module_64.c32
-rw-r--r--arch/x86/kernel/mpparse.c178
-rw-r--r--arch/x86/kernel/msr.c2
-rw-r--r--arch/x86/kernel/nmi.c12
-rw-r--r--arch/x86/kernel/numaq_32.c307
-rw-r--r--arch/x86/kernel/paravirt-spinlocks.c10
-rw-r--r--arch/x86/kernel/paravirt.c81
-rw-r--r--arch/x86/kernel/paravirt_patch_32.c12
-rw-r--r--arch/x86/kernel/paravirt_patch_64.c15
-rw-r--r--arch/x86/kernel/probe_32.c411
-rw-r--r--arch/x86/kernel/probe_roms_32.c2
-rw-r--r--arch/x86/kernel/process.c8
-rw-r--r--arch/x86/kernel/process_32.c59
-rw-r--r--arch/x86/kernel/process_64.c51
-rw-r--r--arch/x86/kernel/ptrace.c35
-rw-r--r--arch/x86/kernel/reboot.c5
-rw-r--r--arch/x86/kernel/relocate_kernel_64.S125
-rw-r--r--arch/x86/kernel/setup.c34
-rw-r--r--arch/x86/kernel/setup_percpu.c414
-rw-r--r--arch/x86/kernel/signal.c346
-rw-r--r--arch/x86/kernel/smp.c15
-rw-r--r--arch/x86/kernel/smpboot.c117
-rw-r--r--arch/x86/kernel/smpcommon.c30
-rw-r--r--arch/x86/kernel/stacktrace.c2
-rw-r--r--arch/x86/kernel/summit_32.c416
-rw-r--r--arch/x86/kernel/syscall_table_32.S20
-rw-r--r--arch/x86/kernel/time_32.c2
-rw-r--r--arch/x86/kernel/tlb_32.c256
-rw-r--r--arch/x86/kernel/tlb_64.c284
-rw-r--r--arch/x86/kernel/tlb_uv.c72
-rw-r--r--arch/x86/kernel/trampoline_64.S19
-rw-r--r--arch/x86/kernel/traps.c13
-rw-r--r--arch/x86/kernel/tsc.c2
-rw-r--r--arch/x86/kernel/visws_quirks.c6
-rw-r--r--arch/x86/kernel/vm86_32.c20
-rw-r--r--arch/x86/kernel/vmi_32.c20
-rw-r--r--arch/x86/kernel/vmiclock_32.c2
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S9
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S35
-rw-r--r--arch/x86/kernel/vsmp_64.c12
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c2
87 files changed, 4280 insertions, 2926 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index d364df03c1d6..24f357e7557a 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -23,13 +23,14 @@ nostackp := $(call cc-option, -fno-stack-protector)
23CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) 23CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
24CFLAGS_hpet.o := $(nostackp) 24CFLAGS_hpet.o := $(nostackp)
25CFLAGS_tsc.o := $(nostackp) 25CFLAGS_tsc.o := $(nostackp)
26CFLAGS_paravirt.o := $(nostackp)
26 27
27obj-y := process_$(BITS).o signal.o entry_$(BITS).o 28obj-y := process_$(BITS).o signal.o entry_$(BITS).o
28obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 29obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
29obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o 30obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o
30obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o 31obj-y += setup.o i8259.o irqinit_$(BITS).o
31obj-$(CONFIG_X86_VISWS) += visws_quirks.o 32obj-$(CONFIG_X86_VISWS) += visws_quirks.o
32obj-$(CONFIG_X86_32) += probe_roms_32.o 33obj-$(CONFIG_X86_32) += probe_32.o probe_roms_32.o
33obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 34obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
34obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o 35obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o
35obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o 36obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o
@@ -49,20 +50,20 @@ obj-y += step.o
49obj-$(CONFIG_STACKTRACE) += stacktrace.o 50obj-$(CONFIG_STACKTRACE) += stacktrace.o
50obj-y += cpu/ 51obj-y += cpu/
51obj-y += acpi/ 52obj-y += acpi/
52obj-$(CONFIG_X86_BIOS_REBOOT) += reboot.o 53obj-y += reboot.o
53obj-$(CONFIG_MCA) += mca_32.o 54obj-$(CONFIG_MCA) += mca_32.o
54obj-$(CONFIG_X86_MSR) += msr.o 55obj-$(CONFIG_X86_MSR) += msr.o
55obj-$(CONFIG_X86_CPUID) += cpuid.o 56obj-$(CONFIG_X86_CPUID) += cpuid.o
56obj-$(CONFIG_PCI) += early-quirks.o 57obj-$(CONFIG_PCI) += early-quirks.o
57apm-y := apm_32.o 58apm-y := apm_32.o
58obj-$(CONFIG_APM) += apm.o 59obj-$(CONFIG_APM) += apm.o
59obj-$(CONFIG_X86_SMP) += smp.o 60obj-$(CONFIG_SMP) += smp.o
60obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o 61obj-$(CONFIG_SMP) += smpboot.o tsc_sync.o ipi.o
61obj-$(CONFIG_X86_32_SMP) += smpcommon.o 62obj-$(CONFIG_SMP) += setup_percpu.o
62obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o 63obj-$(CONFIG_X86_64_SMP) += tsc_sync.o
63obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o 64obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o
64obj-$(CONFIG_X86_MPPARSE) += mpparse.o 65obj-$(CONFIG_X86_MPPARSE) += mpparse.o
65obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o 66obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o ipi.o
66obj-$(CONFIG_X86_IO_APIC) += io_apic.o 67obj-$(CONFIG_X86_IO_APIC) += io_apic.o
67obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o 68obj-$(CONFIG_X86_REBOOTFIXUPS) += reboot_fixups_32.o
68obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o 69obj-$(CONFIG_DYNAMIC_FTRACE) += ftrace.o
@@ -70,9 +71,10 @@ obj-$(CONFIG_FUNCTION_GRAPH_TRACER) += ftrace.o
70obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o 71obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
71obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 72obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
72obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 73obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
74obj-$(CONFIG_X86_BIGSMP) += bigsmp_32.o
73obj-$(CONFIG_X86_NUMAQ) += numaq_32.o 75obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
74obj-$(CONFIG_X86_ES7000) += es7000_32.o 76obj-$(CONFIG_X86_ES7000) += es7000_32.o
75obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o 77obj-$(CONFIG_X86_SUMMIT) += summit_32.o
76obj-y += vsmp_64.o 78obj-y += vsmp_64.o
77obj-$(CONFIG_KPROBES) += kprobes.o 79obj-$(CONFIG_KPROBES) += kprobes.o
78obj-$(CONFIG_MODULES) += module_$(BITS).o 80obj-$(CONFIG_MODULES) += module_$(BITS).o
@@ -114,10 +116,11 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64
114### 116###
115# 64 bit specific files 117# 64 bit specific files
116ifeq ($(CONFIG_X86_64),y) 118ifeq ($(CONFIG_X86_64),y)
117 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o 119 obj-y += genapic_64.o genapic_flat_64.o
118 obj-y += bios_uv.o uv_irq.o uv_sysfs.o
119 obj-y += genx2apic_cluster.o 120 obj-y += genx2apic_cluster.o
120 obj-y += genx2apic_phys.o 121 obj-y += genx2apic_phys.o
122 obj-$(CONFIG_X86_UV) += genx2apic_uv_x.o tlb_uv.o
123 obj-$(CONFIG_X86_UV) += bios_uv.o uv_irq.o uv_sysfs.o
121 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o 124 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
122 obj-$(CONFIG_AUDIT) += audit_64.o 125 obj-$(CONFIG_AUDIT) += audit_64.o
123 126
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index d37593c2f438..956c1dee6fbe 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -42,10 +42,6 @@
42#include <asm/mpspec.h> 42#include <asm/mpspec.h>
43#include <asm/smp.h> 43#include <asm/smp.h>
44 44
45#ifdef CONFIG_X86_LOCAL_APIC
46# include <mach_apic.h>
47#endif
48
49static int __initdata acpi_force = 0; 45static int __initdata acpi_force = 0;
50u32 acpi_rsdt_forced; 46u32 acpi_rsdt_forced;
51#ifdef CONFIG_ACPI 47#ifdef CONFIG_ACPI
@@ -56,16 +52,7 @@ int acpi_disabled = 1;
56EXPORT_SYMBOL(acpi_disabled); 52EXPORT_SYMBOL(acpi_disabled);
57 53
58#ifdef CONFIG_X86_64 54#ifdef CONFIG_X86_64
59 55# include <asm/proto.h>
60#include <asm/proto.h>
61
62#else /* X86 */
63
64#ifdef CONFIG_X86_LOCAL_APIC
65#include <mach_apic.h>
66#include <mach_mpparse.h>
67#endif /* CONFIG_X86_LOCAL_APIC */
68
69#endif /* X86 */ 56#endif /* X86 */
70 57
71#define BAD_MADT_ENTRY(entry, end) ( \ 58#define BAD_MADT_ENTRY(entry, end) ( \
@@ -121,35 +108,18 @@ enum acpi_irq_model_id acpi_irq_model = ACPI_IRQ_MODEL_PIC;
121 */ 108 */
122char *__init __acpi_map_table(unsigned long phys, unsigned long size) 109char *__init __acpi_map_table(unsigned long phys, unsigned long size)
123{ 110{
124 unsigned long base, offset, mapped_size;
125 int idx;
126 111
127 if (!phys || !size) 112 if (!phys || !size)
128 return NULL; 113 return NULL;
129 114
130 if (phys+size <= (max_low_pfn_mapped << PAGE_SHIFT)) 115 return early_ioremap(phys, size);
131 return __va(phys); 116}
132 117void __init __acpi_unmap_table(char *map, unsigned long size)
133 offset = phys & (PAGE_SIZE - 1); 118{
134 mapped_size = PAGE_SIZE - offset; 119 if (!map || !size)
135 clear_fixmap(FIX_ACPI_END); 120 return;
136 set_fixmap(FIX_ACPI_END, phys);
137 base = fix_to_virt(FIX_ACPI_END);
138
139 /*
140 * Most cases can be covered by the below.
141 */
142 idx = FIX_ACPI_END;
143 while (mapped_size < size) {
144 if (--idx < FIX_ACPI_BEGIN)
145 return NULL; /* cannot handle this */
146 phys += PAGE_SIZE;
147 clear_fixmap(idx);
148 set_fixmap(idx, phys);
149 mapped_size += PAGE_SIZE;
150 }
151 121
152 return ((unsigned char *)base + offset); 122 early_iounmap(map, size);
153} 123}
154 124
155#ifdef CONFIG_PCI_MMCONFIG 125#ifdef CONFIG_PCI_MMCONFIG
@@ -239,7 +209,8 @@ static int __init acpi_parse_madt(struct acpi_table_header *table)
239 madt->address); 209 madt->address);
240 } 210 }
241 211
242 acpi_madt_oem_check(madt->header.oem_id, madt->header.oem_table_id); 212 default_acpi_madt_oem_check(madt->header.oem_id,
213 madt->header.oem_table_id);
243 214
244 return 0; 215 return 0;
245} 216}
@@ -884,7 +855,7 @@ static struct {
884 DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1); 855 DECLARE_BITMAP(pin_programmed, MP_MAX_IOAPIC_PIN + 1);
885} mp_ioapic_routing[MAX_IO_APICS]; 856} mp_ioapic_routing[MAX_IO_APICS];
886 857
887static int mp_find_ioapic(int gsi) 858int mp_find_ioapic(int gsi)
888{ 859{
889 int i = 0; 860 int i = 0;
890 861
@@ -899,6 +870,16 @@ static int mp_find_ioapic(int gsi)
899 return -1; 870 return -1;
900} 871}
901 872
873int mp_find_ioapic_pin(int ioapic, int gsi)
874{
875 if (WARN_ON(ioapic == -1))
876 return -1;
877 if (WARN_ON(gsi > mp_ioapic_routing[ioapic].gsi_end))
878 return -1;
879
880 return gsi - mp_ioapic_routing[ioapic].gsi_base;
881}
882
902static u8 __init uniq_ioapic_id(u8 id) 883static u8 __init uniq_ioapic_id(u8 id)
903{ 884{
904#ifdef CONFIG_X86_32 885#ifdef CONFIG_X86_32
@@ -912,8 +893,8 @@ static u8 __init uniq_ioapic_id(u8 id)
912 DECLARE_BITMAP(used, 256); 893 DECLARE_BITMAP(used, 256);
913 bitmap_zero(used, 256); 894 bitmap_zero(used, 256);
914 for (i = 0; i < nr_ioapics; i++) { 895 for (i = 0; i < nr_ioapics; i++) {
915 struct mp_config_ioapic *ia = &mp_ioapics[i]; 896 struct mpc_ioapic *ia = &mp_ioapics[i];
916 __set_bit(ia->mp_apicid, used); 897 __set_bit(ia->apicid, used);
917 } 898 }
918 if (!test_bit(id, used)) 899 if (!test_bit(id, used))
919 return id; 900 return id;
@@ -945,47 +926,70 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
945 926
946 idx = nr_ioapics; 927 idx = nr_ioapics;
947 928
948 mp_ioapics[idx].mp_type = MP_IOAPIC; 929 mp_ioapics[idx].type = MP_IOAPIC;
949 mp_ioapics[idx].mp_flags = MPC_APIC_USABLE; 930 mp_ioapics[idx].flags = MPC_APIC_USABLE;
950 mp_ioapics[idx].mp_apicaddr = address; 931 mp_ioapics[idx].apicaddr = address;
951 932
952 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); 933 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
953 mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id); 934 mp_ioapics[idx].apicid = uniq_ioapic_id(id);
954#ifdef CONFIG_X86_32 935#ifdef CONFIG_X86_32
955 mp_ioapics[idx].mp_apicver = io_apic_get_version(idx); 936 mp_ioapics[idx].apicver = io_apic_get_version(idx);
956#else 937#else
957 mp_ioapics[idx].mp_apicver = 0; 938 mp_ioapics[idx].apicver = 0;
958#endif 939#endif
959 /* 940 /*
960 * Build basic GSI lookup table to facilitate gsi->io_apic lookups 941 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
961 * and to prevent reprogramming of IOAPIC pins (PCI GSIs). 942 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
962 */ 943 */
963 mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid; 944 mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].apicid;
964 mp_ioapic_routing[idx].gsi_base = gsi_base; 945 mp_ioapic_routing[idx].gsi_base = gsi_base;
965 mp_ioapic_routing[idx].gsi_end = gsi_base + 946 mp_ioapic_routing[idx].gsi_end = gsi_base +
966 io_apic_get_redir_entries(idx); 947 io_apic_get_redir_entries(idx);
967 948
968 printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " 949 printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
969 "GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid, 950 "GSI %d-%d\n", idx, mp_ioapics[idx].apicid,
970 mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr, 951 mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr,
971 mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); 952 mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
972 953
973 nr_ioapics++; 954 nr_ioapics++;
974} 955}
975 956
976static void assign_to_mp_irq(struct mp_config_intsrc *m, 957int __init acpi_probe_gsi(void)
977 struct mp_config_intsrc *mp_irq)
978{ 958{
979 memcpy(mp_irq, m, sizeof(struct mp_config_intsrc)); 959 int idx;
960 int gsi;
961 int max_gsi = 0;
962
963 if (acpi_disabled)
964 return 0;
965
966 if (!acpi_ioapic)
967 return 0;
968
969 max_gsi = 0;
970 for (idx = 0; idx < nr_ioapics; idx++) {
971 gsi = mp_ioapic_routing[idx].gsi_end;
972
973 if (gsi > max_gsi)
974 max_gsi = gsi;
975 }
976
977 return max_gsi + 1;
980} 978}
981 979
982static int mp_irq_cmp(struct mp_config_intsrc *mp_irq, 980static void assign_to_mp_irq(struct mpc_intsrc *m,
983 struct mp_config_intsrc *m) 981 struct mpc_intsrc *mp_irq)
984{ 982{
985 return memcmp(mp_irq, m, sizeof(struct mp_config_intsrc)); 983 memcpy(mp_irq, m, sizeof(struct mpc_intsrc));
986} 984}
987 985
988static void save_mp_irq(struct mp_config_intsrc *m) 986static int mp_irq_cmp(struct mpc_intsrc *mp_irq,
987 struct mpc_intsrc *m)
988{
989 return memcmp(mp_irq, m, sizeof(struct mpc_intsrc));
990}
991
992static void save_mp_irq(struct mpc_intsrc *m)
989{ 993{
990 int i; 994 int i;
991 995
@@ -1003,7 +1007,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
1003{ 1007{
1004 int ioapic; 1008 int ioapic;
1005 int pin; 1009 int pin;
1006 struct mp_config_intsrc mp_irq; 1010 struct mpc_intsrc mp_irq;
1007 1011
1008 /* 1012 /*
1009 * Convert 'gsi' to 'ioapic.pin'. 1013 * Convert 'gsi' to 'ioapic.pin'.
@@ -1011,7 +1015,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
1011 ioapic = mp_find_ioapic(gsi); 1015 ioapic = mp_find_ioapic(gsi);
1012 if (ioapic < 0) 1016 if (ioapic < 0)
1013 return; 1017 return;
1014 pin = gsi - mp_ioapic_routing[ioapic].gsi_base; 1018 pin = mp_find_ioapic_pin(ioapic, gsi);
1015 1019
1016 /* 1020 /*
1017 * TBD: This check is for faulty timer entries, where the override 1021 * TBD: This check is for faulty timer entries, where the override
@@ -1021,13 +1025,13 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
1021 if ((bus_irq == 0) && (trigger == 3)) 1025 if ((bus_irq == 0) && (trigger == 3))
1022 trigger = 1; 1026 trigger = 1;
1023 1027
1024 mp_irq.mp_type = MP_INTSRC; 1028 mp_irq.type = MP_INTSRC;
1025 mp_irq.mp_irqtype = mp_INT; 1029 mp_irq.irqtype = mp_INT;
1026 mp_irq.mp_irqflag = (trigger << 2) | polarity; 1030 mp_irq.irqflag = (trigger << 2) | polarity;
1027 mp_irq.mp_srcbus = MP_ISA_BUS; 1031 mp_irq.srcbus = MP_ISA_BUS;
1028 mp_irq.mp_srcbusirq = bus_irq; /* IRQ */ 1032 mp_irq.srcbusirq = bus_irq; /* IRQ */
1029 mp_irq.mp_dstapic = mp_ioapics[ioapic].mp_apicid; /* APIC ID */ 1033 mp_irq.dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */
1030 mp_irq.mp_dstirq = pin; /* INTIN# */ 1034 mp_irq.dstirq = pin; /* INTIN# */
1031 1035
1032 save_mp_irq(&mp_irq); 1036 save_mp_irq(&mp_irq);
1033} 1037}
@@ -1037,7 +1041,7 @@ void __init mp_config_acpi_legacy_irqs(void)
1037 int i; 1041 int i;
1038 int ioapic; 1042 int ioapic;
1039 unsigned int dstapic; 1043 unsigned int dstapic;
1040 struct mp_config_intsrc mp_irq; 1044 struct mpc_intsrc mp_irq;
1041 1045
1042#if defined (CONFIG_MCA) || defined (CONFIG_EISA) 1046#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
1043 /* 1047 /*
@@ -1062,7 +1066,7 @@ void __init mp_config_acpi_legacy_irqs(void)
1062 ioapic = mp_find_ioapic(0); 1066 ioapic = mp_find_ioapic(0);
1063 if (ioapic < 0) 1067 if (ioapic < 0)
1064 return; 1068 return;
1065 dstapic = mp_ioapics[ioapic].mp_apicid; 1069 dstapic = mp_ioapics[ioapic].apicid;
1066 1070
1067 /* 1071 /*
1068 * Use the default configuration for the IRQs 0-15. Unless 1072 * Use the default configuration for the IRQs 0-15. Unless
@@ -1072,16 +1076,14 @@ void __init mp_config_acpi_legacy_irqs(void)
1072 int idx; 1076 int idx;
1073 1077
1074 for (idx = 0; idx < mp_irq_entries; idx++) { 1078 for (idx = 0; idx < mp_irq_entries; idx++) {
1075 struct mp_config_intsrc *irq = mp_irqs + idx; 1079 struct mpc_intsrc *irq = mp_irqs + idx;
1076 1080
1077 /* Do we already have a mapping for this ISA IRQ? */ 1081 /* Do we already have a mapping for this ISA IRQ? */
1078 if (irq->mp_srcbus == MP_ISA_BUS 1082 if (irq->srcbus == MP_ISA_BUS && irq->srcbusirq == i)
1079 && irq->mp_srcbusirq == i)
1080 break; 1083 break;
1081 1084
1082 /* Do we already have a mapping for this IOAPIC pin */ 1085 /* Do we already have a mapping for this IOAPIC pin */
1083 if (irq->mp_dstapic == dstapic && 1086 if (irq->dstapic == dstapic && irq->dstirq == i)
1084 irq->mp_dstirq == i)
1085 break; 1087 break;
1086 } 1088 }
1087 1089
@@ -1090,13 +1092,13 @@ void __init mp_config_acpi_legacy_irqs(void)
1090 continue; /* IRQ already used */ 1092 continue; /* IRQ already used */
1091 } 1093 }
1092 1094
1093 mp_irq.mp_type = MP_INTSRC; 1095 mp_irq.type = MP_INTSRC;
1094 mp_irq.mp_irqflag = 0; /* Conforming */ 1096 mp_irq.irqflag = 0; /* Conforming */
1095 mp_irq.mp_srcbus = MP_ISA_BUS; 1097 mp_irq.srcbus = MP_ISA_BUS;
1096 mp_irq.mp_dstapic = dstapic; 1098 mp_irq.dstapic = dstapic;
1097 mp_irq.mp_irqtype = mp_INT; 1099 mp_irq.irqtype = mp_INT;
1098 mp_irq.mp_srcbusirq = i; /* Identity mapped */ 1100 mp_irq.srcbusirq = i; /* Identity mapped */
1099 mp_irq.mp_dstirq = i; 1101 mp_irq.dstirq = i;
1100 1102
1101 save_mp_irq(&mp_irq); 1103 save_mp_irq(&mp_irq);
1102 } 1104 }
@@ -1133,7 +1135,7 @@ int mp_register_gsi(u32 gsi, int triggering, int polarity)
1133 return gsi; 1135 return gsi;
1134 } 1136 }
1135 1137
1136 ioapic_pin = gsi - mp_ioapic_routing[ioapic].gsi_base; 1138 ioapic_pin = mp_find_ioapic_pin(ioapic, gsi);
1137 1139
1138#ifdef CONFIG_X86_32 1140#ifdef CONFIG_X86_32
1139 if (ioapic_renumber_irq) 1141 if (ioapic_renumber_irq)
@@ -1207,22 +1209,22 @@ int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
1207 u32 gsi, int triggering, int polarity) 1209 u32 gsi, int triggering, int polarity)
1208{ 1210{
1209#ifdef CONFIG_X86_MPPARSE 1211#ifdef CONFIG_X86_MPPARSE
1210 struct mp_config_intsrc mp_irq; 1212 struct mpc_intsrc mp_irq;
1211 int ioapic; 1213 int ioapic;
1212 1214
1213 if (!acpi_ioapic) 1215 if (!acpi_ioapic)
1214 return 0; 1216 return 0;
1215 1217
1216 /* print the entry should happen on mptable identically */ 1218 /* print the entry should happen on mptable identically */
1217 mp_irq.mp_type = MP_INTSRC; 1219 mp_irq.type = MP_INTSRC;
1218 mp_irq.mp_irqtype = mp_INT; 1220 mp_irq.irqtype = mp_INT;
1219 mp_irq.mp_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | 1221 mp_irq.irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
1220 (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); 1222 (polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
1221 mp_irq.mp_srcbus = number; 1223 mp_irq.srcbus = number;
1222 mp_irq.mp_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); 1224 mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
1223 ioapic = mp_find_ioapic(gsi); 1225 ioapic = mp_find_ioapic(gsi);
1224 mp_irq.mp_dstapic = mp_ioapic_routing[ioapic].apic_id; 1226 mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id;
1225 mp_irq.mp_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base; 1227 mp_irq.dstirq = mp_find_ioapic_pin(ioapic, gsi);
1226 1228
1227 save_mp_irq(&mp_irq); 1229 save_mp_irq(&mp_irq);
1228#endif 1230#endif
@@ -1349,7 +1351,7 @@ static void __init acpi_process_madt(void)
1349 if (!error) { 1351 if (!error) {
1350 acpi_lapic = 1; 1352 acpi_lapic = 1;
1351 1353
1352#ifdef CONFIG_X86_GENERICARCH 1354#ifdef CONFIG_X86_BIGSMP
1353 generic_bigsmp_probe(); 1355 generic_bigsmp_probe();
1354#endif 1356#endif
1355 /* 1357 /*
@@ -1361,9 +1363,8 @@ static void __init acpi_process_madt(void)
1361 acpi_ioapic = 1; 1363 acpi_ioapic = 1;
1362 1364
1363 smp_found_config = 1; 1365 smp_found_config = 1;
1364#ifdef CONFIG_X86_32 1366 if (apic->setup_apic_routing)
1365 setup_apic_routing(); 1367 apic->setup_apic_routing();
1366#endif
1367 } 1368 }
1368 } 1369 }
1369 if (error == -EINVAL) { 1370 if (error == -EINVAL) {
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index a60c1f3bcb87..7c243a2c5115 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -101,6 +101,7 @@ int acpi_save_state_mem(void)
101 stack_start.sp = temp_stack + sizeof(temp_stack); 101 stack_start.sp = temp_stack + sizeof(temp_stack);
102 early_gdt_descr.address = 102 early_gdt_descr.address =
103 (unsigned long)get_cpu_gdt_table(smp_processor_id()); 103 (unsigned long)get_cpu_gdt_table(smp_processor_id());
104 initial_gs = per_cpu_offset(smp_processor_id());
104#endif 105#endif
105 initial_code = (unsigned long)wakeup_long64; 106 initial_code = (unsigned long)wakeup_long64;
106 saved_magic = 0x123456789abcdef0; 107 saved_magic = 0x123456789abcdef0;
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 115449f869ee..cf2ca19e62da 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Local APIC handling, local APIC timers 2 * Local APIC handling, local APIC timers
3 * 3 *
4 * (c) 1999, 2000 Ingo Molnar <mingo@redhat.com> 4 * (c) 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
5 * 5 *
6 * Fixes 6 * Fixes
7 * Maciej W. Rozycki : Bits for genuine 82489DX APICs; 7 * Maciej W. Rozycki : Bits for genuine 82489DX APICs;
@@ -14,51 +14,71 @@
14 * Mikael Pettersson : PM converted to driver model. 14 * Mikael Pettersson : PM converted to driver model.
15 */ 15 */
16 16
17#include <linux/init.h>
18
19#include <linux/mm.h>
20#include <linux/delay.h>
21#include <linux/bootmem.h>
22#include <linux/interrupt.h>
23#include <linux/mc146818rtc.h>
24#include <linux/kernel_stat.h> 17#include <linux/kernel_stat.h>
25#include <linux/sysdev.h> 18#include <linux/mc146818rtc.h>
26#include <linux/ioport.h>
27#include <linux/cpu.h>
28#include <linux/clockchips.h>
29#include <linux/acpi_pmtmr.h> 19#include <linux/acpi_pmtmr.h>
20#include <linux/clockchips.h>
21#include <linux/interrupt.h>
22#include <linux/bootmem.h>
23#include <linux/ftrace.h>
24#include <linux/ioport.h>
30#include <linux/module.h> 25#include <linux/module.h>
31#include <linux/dmi.h> 26#include <linux/sysdev.h>
27#include <linux/delay.h>
28#include <linux/timex.h>
32#include <linux/dmar.h> 29#include <linux/dmar.h>
33#include <linux/ftrace.h> 30#include <linux/init.h>
34#include <linux/smp.h> 31#include <linux/cpu.h>
32#include <linux/dmi.h>
35#include <linux/nmi.h> 33#include <linux/nmi.h>
36#include <linux/timex.h> 34#include <linux/smp.h>
35#include <linux/mm.h>
37 36
38#include <asm/atomic.h>
39#include <asm/mtrr.h>
40#include <asm/mpspec.h>
41#include <asm/desc.h>
42#include <asm/arch_hooks.h> 37#include <asm/arch_hooks.h>
43#include <asm/hpet.h>
44#include <asm/pgalloc.h> 38#include <asm/pgalloc.h>
39#include <asm/genapic.h>
40#include <asm/atomic.h>
41#include <asm/mpspec.h>
45#include <asm/i8253.h> 42#include <asm/i8253.h>
46#include <asm/idle.h> 43#include <asm/i8259.h>
47#include <asm/proto.h> 44#include <asm/proto.h>
48#include <asm/apic.h> 45#include <asm/apic.h>
49#include <asm/i8259.h> 46#include <asm/desc.h>
47#include <asm/hpet.h>
48#include <asm/idle.h>
49#include <asm/mtrr.h>
50#include <asm/smp.h> 50#include <asm/smp.h>
51 51
52#include <mach_apic.h> 52unsigned int num_processors;
53#include <mach_apicdef.h> 53
54#include <mach_ipi.h> 54unsigned disabled_cpus __cpuinitdata;
55
56/* Processor that is doing the boot up */
57unsigned int boot_cpu_physical_apicid = -1U;
55 58
56/* 59/*
57 * Sanity check 60 * The highest APIC ID seen during enumeration.
61 *
62 * This determines the messaging protocol we can use: if all APIC IDs
63 * are in the 0 ... 7 range, then we can use logical addressing which
64 * has some performance advantages (better broadcasting).
65 *
66 * If there's an APIC ID above 8, we use physical addressing.
58 */ 67 */
59#if ((SPURIOUS_APIC_VECTOR & 0x0F) != 0x0F) 68unsigned int max_physical_apicid;
60# error SPURIOUS_APIC_VECTOR definition error 69
61#endif 70/*
71 * Bitmask of physically existing CPUs:
72 */
73physid_mask_t phys_cpu_present_map;
74
75/*
76 * Map cpu index to physical APIC ID
77 */
78DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
79DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
80EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
81EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
62 82
63#ifdef CONFIG_X86_32 83#ifdef CONFIG_X86_32
64/* 84/*
@@ -457,7 +477,7 @@ static void lapic_timer_setup(enum clock_event_mode mode,
457static void lapic_timer_broadcast(const struct cpumask *mask) 477static void lapic_timer_broadcast(const struct cpumask *mask)
458{ 478{
459#ifdef CONFIG_SMP 479#ifdef CONFIG_SMP
460 send_IPI_mask(mask, LOCAL_TIMER_VECTOR); 480 apic->send_IPI_mask(mask, LOCAL_TIMER_VECTOR);
461#endif 481#endif
462} 482}
463 483
@@ -535,7 +555,8 @@ static void __init lapic_cal_handler(struct clock_event_device *dev)
535 } 555 }
536} 556}
537 557
538static int __init calibrate_by_pmtimer(long deltapm, long *delta) 558static int __init
559calibrate_by_pmtimer(long deltapm, long *delta, long *deltatsc)
539{ 560{
540 const long pm_100ms = PMTMR_TICKS_PER_SEC / 10; 561 const long pm_100ms = PMTMR_TICKS_PER_SEC / 10;
541 const long pm_thresh = pm_100ms / 100; 562 const long pm_thresh = pm_100ms / 100;
@@ -546,7 +567,7 @@ static int __init calibrate_by_pmtimer(long deltapm, long *delta)
546 return -1; 567 return -1;
547#endif 568#endif
548 569
549 apic_printk(APIC_VERBOSE, "... PM timer delta = %ld\n", deltapm); 570 apic_printk(APIC_VERBOSE, "... PM-Timer delta = %ld\n", deltapm);
550 571
551 /* Check, if the PM timer is available */ 572 /* Check, if the PM timer is available */
552 if (!deltapm) 573 if (!deltapm)
@@ -556,19 +577,30 @@ static int __init calibrate_by_pmtimer(long deltapm, long *delta)
556 577
557 if (deltapm > (pm_100ms - pm_thresh) && 578 if (deltapm > (pm_100ms - pm_thresh) &&
558 deltapm < (pm_100ms + pm_thresh)) { 579 deltapm < (pm_100ms + pm_thresh)) {
559 apic_printk(APIC_VERBOSE, "... PM timer result ok\n"); 580 apic_printk(APIC_VERBOSE, "... PM-Timer result ok\n");
560 } else { 581 return 0;
561 res = (((u64)deltapm) * mult) >> 22; 582 }
562 do_div(res, 1000000); 583
563 pr_warning("APIC calibration not consistent " 584 res = (((u64)deltapm) * mult) >> 22;
564 "with PM Timer: %ldms instead of 100ms\n", 585 do_div(res, 1000000);
565 (long)res); 586 pr_warning("APIC calibration not consistent "
566 /* Correct the lapic counter value */ 587 "with PM-Timer: %ldms instead of 100ms\n",(long)res);
567 res = (((u64)(*delta)) * pm_100ms); 588
589 /* Correct the lapic counter value */
590 res = (((u64)(*delta)) * pm_100ms);
591 do_div(res, deltapm);
592 pr_info("APIC delta adjusted to PM-Timer: "
593 "%lu (%ld)\n", (unsigned long)res, *delta);
594 *delta = (long)res;
595
596 /* Correct the tsc counter value */
597 if (cpu_has_tsc) {
598 res = (((u64)(*deltatsc)) * pm_100ms);
568 do_div(res, deltapm); 599 do_div(res, deltapm);
569 pr_info("APIC delta adjusted to PM-Timer: " 600 apic_printk(APIC_VERBOSE, "TSC delta adjusted to "
570 "%lu (%ld)\n", (unsigned long)res, *delta); 601 "PM-Timer: %lu (%ld) \n",
571 *delta = (long)res; 602 (unsigned long)res, *deltatsc);
603 *deltatsc = (long)res;
572 } 604 }
573 605
574 return 0; 606 return 0;
@@ -579,7 +611,7 @@ static int __init calibrate_APIC_clock(void)
579 struct clock_event_device *levt = &__get_cpu_var(lapic_events); 611 struct clock_event_device *levt = &__get_cpu_var(lapic_events);
580 void (*real_handler)(struct clock_event_device *dev); 612 void (*real_handler)(struct clock_event_device *dev);
581 unsigned long deltaj; 613 unsigned long deltaj;
582 long delta; 614 long delta, deltatsc;
583 int pm_referenced = 0; 615 int pm_referenced = 0;
584 616
585 local_irq_disable(); 617 local_irq_disable();
@@ -609,9 +641,11 @@ static int __init calibrate_APIC_clock(void)
609 delta = lapic_cal_t1 - lapic_cal_t2; 641 delta = lapic_cal_t1 - lapic_cal_t2;
610 apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta); 642 apic_printk(APIC_VERBOSE, "... lapic delta = %ld\n", delta);
611 643
644 deltatsc = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
645
612 /* we trust the PM based calibration if possible */ 646 /* we trust the PM based calibration if possible */
613 pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1, 647 pm_referenced = !calibrate_by_pmtimer(lapic_cal_pm2 - lapic_cal_pm1,
614 &delta); 648 &delta, &deltatsc);
615 649
616 /* Calculate the scaled math multiplication factor */ 650 /* Calculate the scaled math multiplication factor */
617 lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS, 651 lapic_clockevent.mult = div_sc(delta, TICK_NSEC * LAPIC_CAL_LOOPS,
@@ -629,11 +663,10 @@ static int __init calibrate_APIC_clock(void)
629 calibration_result); 663 calibration_result);
630 664
631 if (cpu_has_tsc) { 665 if (cpu_has_tsc) {
632 delta = (long)(lapic_cal_tsc2 - lapic_cal_tsc1);
633 apic_printk(APIC_VERBOSE, "..... CPU clock speed is " 666 apic_printk(APIC_VERBOSE, "..... CPU clock speed is "
634 "%ld.%04ld MHz.\n", 667 "%ld.%04ld MHz.\n",
635 (delta / LAPIC_CAL_LOOPS) / (1000000 / HZ), 668 (deltatsc / LAPIC_CAL_LOOPS) / (1000000 / HZ),
636 (delta / LAPIC_CAL_LOOPS) % (1000000 / HZ)); 669 (deltatsc / LAPIC_CAL_LOOPS) % (1000000 / HZ));
637 } 670 }
638 671
639 apic_printk(APIC_VERBOSE, "..... host bus clock speed is " 672 apic_printk(APIC_VERBOSE, "..... host bus clock speed is "
@@ -991,11 +1024,11 @@ int __init verify_local_APIC(void)
991 */ 1024 */
992 reg0 = apic_read(APIC_ID); 1025 reg0 = apic_read(APIC_ID);
993 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); 1026 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
994 apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); 1027 apic_write(APIC_ID, reg0 ^ apic->apic_id_mask);
995 reg1 = apic_read(APIC_ID); 1028 reg1 = apic_read(APIC_ID);
996 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); 1029 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
997 apic_write(APIC_ID, reg0); 1030 apic_write(APIC_ID, reg0);
998 if (reg1 != (reg0 ^ APIC_ID_MASK)) 1031 if (reg1 != (reg0 ^ apic->apic_id_mask))
999 return 0; 1032 return 0;
1000 1033
1001 /* 1034 /*
@@ -1089,7 +1122,7 @@ static void __cpuinit lapic_setup_esr(void)
1089 return; 1122 return;
1090 } 1123 }
1091 1124
1092 if (esr_disable) { 1125 if (apic->disable_esr) {
1093 /* 1126 /*
1094 * Something untraceable is creating bad interrupts on 1127 * Something untraceable is creating bad interrupts on
1095 * secondary quads ... for the moment, just leave the 1128 * secondary quads ... for the moment, just leave the
@@ -1130,9 +1163,14 @@ void __cpuinit setup_local_APIC(void)
1130 unsigned int value; 1163 unsigned int value;
1131 int i, j; 1164 int i, j;
1132 1165
1166 if (disable_apic) {
1167 arch_disable_smp_support();
1168 return;
1169 }
1170
1133#ifdef CONFIG_X86_32 1171#ifdef CONFIG_X86_32
1134 /* Pound the ESR really hard over the head with a big hammer - mbligh */ 1172 /* Pound the ESR really hard over the head with a big hammer - mbligh */
1135 if (lapic_is_integrated() && esr_disable) { 1173 if (lapic_is_integrated() && apic->disable_esr) {
1136 apic_write(APIC_ESR, 0); 1174 apic_write(APIC_ESR, 0);
1137 apic_write(APIC_ESR, 0); 1175 apic_write(APIC_ESR, 0);
1138 apic_write(APIC_ESR, 0); 1176 apic_write(APIC_ESR, 0);
@@ -1146,7 +1184,7 @@ void __cpuinit setup_local_APIC(void)
1146 * Double-check whether this APIC is really registered. 1184 * Double-check whether this APIC is really registered.
1147 * This is meaningless in clustered apic mode, so we skip it. 1185 * This is meaningless in clustered apic mode, so we skip it.
1148 */ 1186 */
1149 if (!apic_id_registered()) 1187 if (!apic->apic_id_registered())
1150 BUG(); 1188 BUG();
1151 1189
1152 /* 1190 /*
@@ -1154,7 +1192,7 @@ void __cpuinit setup_local_APIC(void)
1154 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel 1192 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
1155 * document number 292116). So here it goes... 1193 * document number 292116). So here it goes...
1156 */ 1194 */
1157 init_apic_ldr(); 1195 apic->init_apic_ldr();
1158 1196
1159 /* 1197 /*
1160 * Set Task Priority to 'accept all'. We never change this 1198 * Set Task Priority to 'accept all'. We never change this
@@ -1570,11 +1608,11 @@ int apic_version[MAX_APICS];
1570 1608
1571int __init APIC_init_uniprocessor(void) 1609int __init APIC_init_uniprocessor(void)
1572{ 1610{
1573#ifdef CONFIG_X86_64
1574 if (disable_apic) { 1611 if (disable_apic) {
1575 pr_info("Apic disabled\n"); 1612 pr_info("Apic disabled\n");
1576 return -1; 1613 return -1;
1577 } 1614 }
1615#ifdef CONFIG_X86_64
1578 if (!cpu_has_apic) { 1616 if (!cpu_has_apic) {
1579 disable_apic = 1; 1617 disable_apic = 1;
1580 pr_info("Apic disabled by BIOS\n"); 1618 pr_info("Apic disabled by BIOS\n");
@@ -1600,7 +1638,7 @@ int __init APIC_init_uniprocessor(void)
1600 enable_IR_x2apic(); 1638 enable_IR_x2apic();
1601#endif 1639#endif
1602#ifdef CONFIG_X86_64 1640#ifdef CONFIG_X86_64
1603 setup_apic_routing(); 1641 default_setup_apic_routing();
1604#endif 1642#endif
1605 1643
1606 verify_local_APIC(); 1644 verify_local_APIC();
@@ -1738,7 +1776,8 @@ void __init connect_bsp_APIC(void)
1738 outb(0x01, 0x23); 1776 outb(0x01, 0x23);
1739 } 1777 }
1740#endif 1778#endif
1741 enable_apic_mode(); 1779 if (apic->enable_apic_mode)
1780 apic->enable_apic_mode();
1742} 1781}
1743 1782
1744/** 1783/**
@@ -1876,29 +1915,39 @@ void __cpuinit generic_processor_info(int apicid, int version)
1876 } 1915 }
1877#endif 1916#endif
1878 1917
1879#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) 1918#if defined(CONFIG_SMP) || defined(CONFIG_X86_64)
1880 /* are we being called early in kernel startup? */ 1919 early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
1881 if (early_per_cpu_ptr(x86_cpu_to_apicid)) { 1920 early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1882 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
1883 u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
1884
1885 cpu_to_apicid[cpu] = apicid;
1886 bios_cpu_apicid[cpu] = apicid;
1887 } else {
1888 per_cpu(x86_cpu_to_apicid, cpu) = apicid;
1889 per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1890 }
1891#endif 1921#endif
1892 1922
1893 set_cpu_possible(cpu, true); 1923 set_cpu_possible(cpu, true);
1894 set_cpu_present(cpu, true); 1924 set_cpu_present(cpu, true);
1895} 1925}
1896 1926
1897#ifdef CONFIG_X86_64
1898int hard_smp_processor_id(void) 1927int hard_smp_processor_id(void)
1899{ 1928{
1900 return read_apic_id(); 1929 return read_apic_id();
1901} 1930}
1931
1932void default_init_apic_ldr(void)
1933{
1934 unsigned long val;
1935
1936 apic_write(APIC_DFR, APIC_DFR_VALUE);
1937 val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
1938 val |= SET_APIC_LOGICAL_ID(1UL << smp_processor_id());
1939 apic_write(APIC_LDR, val);
1940}
1941
1942#ifdef CONFIG_X86_32
1943int default_apicid_to_node(int logical_apicid)
1944{
1945#ifdef CONFIG_SMP
1946 return apicid_2_node[hard_smp_processor_id()];
1947#else
1948 return 0;
1949#endif
1950}
1902#endif 1951#endif
1903 1952
1904/* 1953/*
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 98807bb095ad..37ba5f85b718 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -301,7 +301,7 @@ extern int (*console_blank_hook)(int);
301 */ 301 */
302#define APM_ZERO_SEGS 302#define APM_ZERO_SEGS
303 303
304#include "apm.h" 304#include <asm/apm.h>
305 305
306/* 306/*
307 * Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend. 307 * Define to re-initialize the interrupt 0 timer to 100 Hz after a suspend.
diff --git a/arch/x86/kernel/asm-offsets_32.c b/arch/x86/kernel/asm-offsets_32.c
index ee4df08feee6..fbf2f33e3080 100644
--- a/arch/x86/kernel/asm-offsets_32.c
+++ b/arch/x86/kernel/asm-offsets_32.c
@@ -75,6 +75,7 @@ void foo(void)
75 OFFSET(PT_DS, pt_regs, ds); 75 OFFSET(PT_DS, pt_regs, ds);
76 OFFSET(PT_ES, pt_regs, es); 76 OFFSET(PT_ES, pt_regs, es);
77 OFFSET(PT_FS, pt_regs, fs); 77 OFFSET(PT_FS, pt_regs, fs);
78 OFFSET(PT_GS, pt_regs, gs);
78 OFFSET(PT_ORIG_EAX, pt_regs, orig_ax); 79 OFFSET(PT_ORIG_EAX, pt_regs, orig_ax);
79 OFFSET(PT_EIP, pt_regs, ip); 80 OFFSET(PT_EIP, pt_regs, ip);
80 OFFSET(PT_CS, pt_regs, cs); 81 OFFSET(PT_CS, pt_regs, cs);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 1d41d3f1edbc..8793ab33e2c1 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -11,7 +11,6 @@
11#include <linux/hardirq.h> 11#include <linux/hardirq.h>
12#include <linux/suspend.h> 12#include <linux/suspend.h>
13#include <linux/kbuild.h> 13#include <linux/kbuild.h>
14#include <asm/pda.h>
15#include <asm/processor.h> 14#include <asm/processor.h>
16#include <asm/segment.h> 15#include <asm/segment.h>
17#include <asm/thread_info.h> 16#include <asm/thread_info.h>
@@ -48,16 +47,6 @@ int main(void)
48#endif 47#endif
49 BLANK(); 48 BLANK();
50#undef ENTRY 49#undef ENTRY
51#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
52 ENTRY(kernelstack);
53 ENTRY(oldrsp);
54 ENTRY(pcurrent);
55 ENTRY(irqcount);
56 ENTRY(cpunumber);
57 ENTRY(irqstackptr);
58 ENTRY(data_offset);
59 BLANK();
60#undef ENTRY
61#ifdef CONFIG_PARAVIRT 50#ifdef CONFIG_PARAVIRT
62 BLANK(); 51 BLANK();
63 OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); 52 OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
diff --git a/arch/x86/kernel/bigsmp_32.c b/arch/x86/kernel/bigsmp_32.c
new file mode 100644
index 000000000000..47a62f46afdb
--- /dev/null
+++ b/arch/x86/kernel/bigsmp_32.c
@@ -0,0 +1,266 @@
1/*
2 * APIC driver for "bigsmp" XAPIC machines with more than 8 virtual CPUs.
3 * Drives the local APIC in "clustered mode".
4 */
5#define APIC_DEFINITION 1
6#include <linux/threads.h>
7#include <linux/cpumask.h>
8#include <asm/mpspec.h>
9#include <asm/genapic.h>
10#include <asm/fixmap.h>
11#include <asm/apicdef.h>
12#include <asm/ipi.h>
13#include <linux/kernel.h>
14#include <linux/init.h>
15#include <linux/dmi.h>
16#include <linux/smp.h>
17
18
19static inline unsigned bigsmp_get_apic_id(unsigned long x)
20{
21 return (x >> 24) & 0xFF;
22}
23
24#define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu))
25
26static inline int bigsmp_apic_id_registered(void)
27{
28 return 1;
29}
30
31static inline const cpumask_t *bigsmp_target_cpus(void)
32{
33#ifdef CONFIG_SMP
34 return &cpu_online_map;
35#else
36 return &cpumask_of_cpu(0);
37#endif
38}
39
40#define APIC_DFR_VALUE (APIC_DFR_FLAT)
41
42static inline unsigned long
43bigsmp_check_apicid_used(physid_mask_t bitmap, int apicid)
44{
45 return 0;
46}
47
48static inline unsigned long bigsmp_check_apicid_present(int bit)
49{
50 return 1;
51}
52
53static inline unsigned long calculate_ldr(int cpu)
54{
55 unsigned long val, id;
56 val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
57 id = xapic_phys_to_log_apicid(cpu);
58 val |= SET_APIC_LOGICAL_ID(id);
59 return val;
60}
61
62/*
63 * Set up the logical destination ID.
64 *
65 * Intel recommends to set DFR, LDR and TPR before enabling
66 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
67 * document number 292116). So here it goes...
68 */
69static inline void bigsmp_init_apic_ldr(void)
70{
71 unsigned long val;
72 int cpu = smp_processor_id();
73
74 apic_write(APIC_DFR, APIC_DFR_VALUE);
75 val = calculate_ldr(cpu);
76 apic_write(APIC_LDR, val);
77}
78
79static inline void bigsmp_setup_apic_routing(void)
80{
81 printk("Enabling APIC mode: %s. Using %d I/O APICs\n",
82 "Physflat", nr_ioapics);
83}
84
85static inline int bigsmp_apicid_to_node(int logical_apicid)
86{
87 return apicid_2_node[hard_smp_processor_id()];
88}
89
90static inline int bigsmp_cpu_present_to_apicid(int mps_cpu)
91{
92 if (mps_cpu < nr_cpu_ids)
93 return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
94
95 return BAD_APICID;
96}
97
98static inline physid_mask_t bigsmp_apicid_to_cpu_present(int phys_apicid)
99{
100 return physid_mask_of_physid(phys_apicid);
101}
102
103extern u8 cpu_2_logical_apicid[];
104/* Mapping from cpu number to logical apicid */
105static inline int bigsmp_cpu_to_logical_apicid(int cpu)
106{
107 if (cpu >= nr_cpu_ids)
108 return BAD_APICID;
109 return cpu_physical_id(cpu);
110}
111
112static inline physid_mask_t bigsmp_ioapic_phys_id_map(physid_mask_t phys_map)
113{
114 /* For clustered we don't have a good way to do this yet - hack */
115 return physids_promote(0xFFL);
116}
117
118static inline void bigsmp_setup_portio_remap(void)
119{
120}
121
122static inline int bigsmp_check_phys_apicid_present(int boot_cpu_physical_apicid)
123{
124 return 1;
125}
126
127/* As we are using single CPU as destination, pick only one CPU here */
128static inline unsigned int bigsmp_cpu_mask_to_apicid(const cpumask_t *cpumask)
129{
130 return bigsmp_cpu_to_logical_apicid(first_cpu(*cpumask));
131}
132
133static inline unsigned int
134bigsmp_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
135 const struct cpumask *andmask)
136{
137 int cpu;
138
139 /*
140 * We're using fixed IRQ delivery, can only return one phys APIC ID.
141 * May as well be the first.
142 */
143 for_each_cpu_and(cpu, cpumask, andmask) {
144 if (cpumask_test_cpu(cpu, cpu_online_mask))
145 break;
146 }
147 if (cpu < nr_cpu_ids)
148 return bigsmp_cpu_to_logical_apicid(cpu);
149
150 return BAD_APICID;
151}
152
153static inline int bigsmp_phys_pkg_id(int cpuid_apic, int index_msb)
154{
155 return cpuid_apic >> index_msb;
156}
157
158static inline void bigsmp_send_IPI_mask(const struct cpumask *mask, int vector)
159{
160 default_send_IPI_mask_sequence_phys(mask, vector);
161}
162
163static inline void bigsmp_send_IPI_allbutself(int vector)
164{
165 default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
166}
167
168static inline void bigsmp_send_IPI_all(int vector)
169{
170 bigsmp_send_IPI_mask(cpu_online_mask, vector);
171}
172
173static int dmi_bigsmp; /* can be set by dmi scanners */
174
175static int hp_ht_bigsmp(const struct dmi_system_id *d)
176{
177 printk(KERN_NOTICE "%s detected: force use of apic=bigsmp\n", d->ident);
178 dmi_bigsmp = 1;
179 return 0;
180}
181
182
183static const struct dmi_system_id bigsmp_dmi_table[] = {
184 { hp_ht_bigsmp, "HP ProLiant DL760 G2",
185 { DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
186 DMI_MATCH(DMI_BIOS_VERSION, "P44-"),}
187 },
188
189 { hp_ht_bigsmp, "HP ProLiant DL740",
190 { DMI_MATCH(DMI_BIOS_VENDOR, "HP"),
191 DMI_MATCH(DMI_BIOS_VERSION, "P47-"),}
192 },
193 { }
194};
195
196static void bigsmp_vector_allocation_domain(int cpu, cpumask_t *retmask)
197{
198 cpus_clear(*retmask);
199 cpu_set(cpu, *retmask);
200}
201
202static int probe_bigsmp(void)
203{
204 if (def_to_bigsmp)
205 dmi_bigsmp = 1;
206 else
207 dmi_check_system(bigsmp_dmi_table);
208 return dmi_bigsmp;
209}
210
211struct genapic apic_bigsmp = {
212
213 .name = "bigsmp",
214 .probe = probe_bigsmp,
215 .acpi_madt_oem_check = NULL,
216 .apic_id_registered = bigsmp_apic_id_registered,
217
218 .irq_delivery_mode = dest_Fixed,
219 /* phys delivery to target CPU: */
220 .irq_dest_mode = 0,
221
222 .target_cpus = bigsmp_target_cpus,
223 .disable_esr = 1,
224 .dest_logical = 0,
225 .check_apicid_used = bigsmp_check_apicid_used,
226 .check_apicid_present = bigsmp_check_apicid_present,
227
228 .vector_allocation_domain = bigsmp_vector_allocation_domain,
229 .init_apic_ldr = bigsmp_init_apic_ldr,
230
231 .ioapic_phys_id_map = bigsmp_ioapic_phys_id_map,
232 .setup_apic_routing = bigsmp_setup_apic_routing,
233 .multi_timer_check = NULL,
234 .apicid_to_node = bigsmp_apicid_to_node,
235 .cpu_to_logical_apicid = bigsmp_cpu_to_logical_apicid,
236 .cpu_present_to_apicid = bigsmp_cpu_present_to_apicid,
237 .apicid_to_cpu_present = bigsmp_apicid_to_cpu_present,
238 .setup_portio_remap = NULL,
239 .check_phys_apicid_present = bigsmp_check_phys_apicid_present,
240 .enable_apic_mode = NULL,
241 .phys_pkg_id = bigsmp_phys_pkg_id,
242 .mps_oem_check = NULL,
243
244 .get_apic_id = bigsmp_get_apic_id,
245 .set_apic_id = NULL,
246 .apic_id_mask = 0xFF << 24,
247
248 .cpu_mask_to_apicid = bigsmp_cpu_mask_to_apicid,
249 .cpu_mask_to_apicid_and = bigsmp_cpu_mask_to_apicid_and,
250
251 .send_IPI_mask = bigsmp_send_IPI_mask,
252 .send_IPI_mask_allbutself = NULL,
253 .send_IPI_allbutself = bigsmp_send_IPI_allbutself,
254 .send_IPI_all = bigsmp_send_IPI_all,
255 .send_IPI_self = default_send_IPI_self,
256
257 .wakeup_cpu = NULL,
258 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
259 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
260
261 .wait_for_init_deassert = default_wait_for_init_deassert,
262
263 .smp_callin_clear_local_apic = NULL,
264 .store_NMI_vector = NULL,
265 .inquire_remote_apic = default_inquire_remote_apic,
266};
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 2cf23634b6d9..e48640cfac0c 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -7,7 +7,7 @@
7#include <asm/pat.h> 7#include <asm/pat.h>
8#include <asm/processor.h> 8#include <asm/processor.h>
9 9
10#include <mach_apic.h> 10#include <asm/genapic.h>
11 11
12struct cpuid_bit { 12struct cpuid_bit {
13 u16 feature; 13 u16 feature;
@@ -69,7 +69,7 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
69 */ 69 */
70void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c) 70void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
71{ 71{
72#ifdef CONFIG_X86_SMP 72#ifdef CONFIG_SMP
73 unsigned int eax, ebx, ecx, edx, sub_index; 73 unsigned int eax, ebx, ecx, edx, sub_index;
74 unsigned int ht_mask_width, core_plus_mask_width; 74 unsigned int ht_mask_width, core_plus_mask_width;
75 unsigned int core_select_mask, core_level_siblings; 75 unsigned int core_select_mask, core_level_siblings;
@@ -116,22 +116,14 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
116 116
117 core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width; 117 core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width;
118 118
119#ifdef CONFIG_X86_32 119 c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, ht_mask_width)
120 c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width)
121 & core_select_mask; 120 & core_select_mask;
122 c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width); 121 c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, core_plus_mask_width);
123 /* 122 /*
124 * Reinit the apicid, now that we have extended initial_apicid. 123 * Reinit the apicid, now that we have extended initial_apicid.
125 */ 124 */
126 c->apicid = phys_pkg_id(c->initial_apicid, 0); 125 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
127#else 126
128 c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask;
129 c->phys_proc_id = phys_pkg_id(core_plus_mask_width);
130 /*
131 * Reinit the apicid, now that we have extended initial_apicid.
132 */
133 c->apicid = phys_pkg_id(0);
134#endif
135 c->x86_max_cores = (core_level_siblings / smp_num_siblings); 127 c->x86_max_cores = (core_level_siblings / smp_num_siblings);
136 128
137 129
@@ -143,37 +135,3 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
143 return; 135 return;
144#endif 136#endif
145} 137}
146
147#ifdef CONFIG_X86_PAT
148void __cpuinit validate_pat_support(struct cpuinfo_x86 *c)
149{
150 if (!cpu_has_pat)
151 pat_disable("PAT not supported by CPU.");
152
153 switch (c->x86_vendor) {
154 case X86_VENDOR_INTEL:
155 /*
156 * There is a known erratum on Pentium III and Core Solo
157 * and Core Duo CPUs.
158 * " Page with PAT set to WC while associated MTRR is UC
159 * may consolidate to UC "
160 * Because of this erratum, it is better to stick with
161 * setting WC in MTRR rather than using PAT on these CPUs.
162 *
163 * Enable PAT WC only on P4, Core 2 or later CPUs.
164 */
165 if (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 15))
166 return;
167
168 pat_disable("PAT WC disabled due to known CPU erratum.");
169 return;
170
171 case X86_VENDOR_AMD:
172 case X86_VENDOR_CENTAUR:
173 case X86_VENDOR_TRANSMETA:
174 return;
175 }
176
177 pat_disable("PAT disabled. Not yet verified on this CPU type.");
178}
179#endif
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 7c878f6aa919..ff4d7b9e32e4 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -12,7 +12,7 @@
12# include <asm/cacheflush.h> 12# include <asm/cacheflush.h>
13#endif 13#endif
14 14
15#include <mach_apic.h> 15#include <asm/genapic.h>
16 16
17#include "cpu.h" 17#include "cpu.h"
18 18
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 83492b1f93b1..e8f4a386bd9d 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -21,14 +21,16 @@
21#include <asm/asm.h> 21#include <asm/asm.h>
22#include <asm/numa.h> 22#include <asm/numa.h>
23#include <asm/smp.h> 23#include <asm/smp.h>
24#include <asm/cpu.h>
25#include <asm/cpumask.h>
24#ifdef CONFIG_X86_LOCAL_APIC 26#ifdef CONFIG_X86_LOCAL_APIC
25#include <asm/mpspec.h> 27#include <asm/mpspec.h>
26#include <asm/apic.h> 28#include <asm/apic.h>
27#include <mach_apic.h>
28#include <asm/genapic.h> 29#include <asm/genapic.h>
30#include <asm/genapic.h>
31#include <asm/uv/uv.h>
29#endif 32#endif
30 33
31#include <asm/pda.h>
32#include <asm/pgtable.h> 34#include <asm/pgtable.h>
33#include <asm/processor.h> 35#include <asm/processor.h>
34#include <asm/desc.h> 36#include <asm/desc.h>
@@ -37,6 +39,7 @@
37#include <asm/sections.h> 39#include <asm/sections.h>
38#include <asm/setup.h> 40#include <asm/setup.h>
39#include <asm/hypervisor.h> 41#include <asm/hypervisor.h>
42#include <asm/stackprotector.h>
40 43
41#include "cpu.h" 44#include "cpu.h"
42 45
@@ -50,6 +53,15 @@ cpumask_var_t cpu_initialized_mask;
50/* representing cpus for which sibling maps can be computed */ 53/* representing cpus for which sibling maps can be computed */
51cpumask_var_t cpu_sibling_setup_mask; 54cpumask_var_t cpu_sibling_setup_mask;
52 55
56/* correctly size the local cpu masks */
57void __init setup_cpu_local_masks(void)
58{
59 alloc_bootmem_cpumask_var(&cpu_initialized_mask);
60 alloc_bootmem_cpumask_var(&cpu_callin_mask);
61 alloc_bootmem_cpumask_var(&cpu_callout_mask);
62 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
63}
64
53#else /* CONFIG_X86_32 */ 65#else /* CONFIG_X86_32 */
54 66
55cpumask_t cpu_callin_map; 67cpumask_t cpu_callin_map;
@@ -62,23 +74,23 @@ cpumask_t cpu_sibling_setup_map;
62 74
63static struct cpu_dev *this_cpu __cpuinitdata; 75static struct cpu_dev *this_cpu __cpuinitdata;
64 76
77DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
65#ifdef CONFIG_X86_64 78#ifdef CONFIG_X86_64
66/* We need valid kernel segments for data and code in long mode too 79 /*
67 * IRET will check the segment types kkeil 2000/10/28 80 * We need valid kernel segments for data and code in long mode too
68 * Also sysret mandates a special GDT layout 81 * IRET will check the segment types kkeil 2000/10/28
69 */ 82 * Also sysret mandates a special GDT layout
70/* The TLS descriptors are currently at a different place compared to i386. 83 *
71 Hopefully nobody expects them at a fixed place (Wine?) */ 84 * The TLS descriptors are currently at a different place compared to i386.
72DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { 85 * Hopefully nobody expects them at a fixed place (Wine?)
86 */
73 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, 87 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
74 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, 88 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
75 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, 89 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
76 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, 90 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
77 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, 91 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
78 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, 92 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
79} };
80#else 93#else
81DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
82 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, 94 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
83 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, 95 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
84 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, 96 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
@@ -110,9 +122,10 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
110 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, 122 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
111 123
112 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, 124 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
113 [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, 125 [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
114} }; 126 GDT_STACK_CANARY_INIT
115#endif 127#endif
128} };
116EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); 129EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
117 130
118#ifdef CONFIG_X86_32 131#ifdef CONFIG_X86_32
@@ -213,6 +226,49 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
213#endif 226#endif
214 227
215/* 228/*
229 * Some CPU features depend on higher CPUID levels, which may not always
230 * be available due to CPUID level capping or broken virtualization
231 * software. Add those features to this table to auto-disable them.
232 */
233struct cpuid_dependent_feature {
234 u32 feature;
235 u32 level;
236};
237static const struct cpuid_dependent_feature __cpuinitconst
238cpuid_dependent_features[] = {
239 { X86_FEATURE_MWAIT, 0x00000005 },
240 { X86_FEATURE_DCA, 0x00000009 },
241 { X86_FEATURE_XSAVE, 0x0000000d },
242 { 0, 0 }
243};
244
245static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
246{
247 const struct cpuid_dependent_feature *df;
248 for (df = cpuid_dependent_features; df->feature; df++) {
249 /*
250 * Note: cpuid_level is set to -1 if unavailable, but
251 * extended_extended_level is set to 0 if unavailable
252 * and the legitimate extended levels are all negative
253 * when signed; hence the weird messing around with
254 * signs here...
255 */
256 if (cpu_has(c, df->feature) &&
257 ((s32)df->feature < 0 ?
258 (u32)df->feature > (u32)c->extended_cpuid_level :
259 (s32)df->feature > (s32)c->cpuid_level)) {
260 clear_cpu_cap(c, df->feature);
261 if (warn)
262 printk(KERN_WARNING
263 "CPU: CPU feature %s disabled "
264 "due to lack of CPUID level 0x%x\n",
265 x86_cap_flags[df->feature],
266 df->level);
267 }
268 }
269}
270
271/*
216 * Naming convention should be: <Name> [(<Codename>)] 272 * Naming convention should be: <Name> [(<Codename>)]
217 * This table only is used unless init_<vendor>() below doesn't set it; 273 * This table only is used unless init_<vendor>() below doesn't set it;
218 * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used 274 * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
@@ -242,18 +298,29 @@ static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
242 298
243__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; 299__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
244 300
301void load_percpu_segment(int cpu)
302{
303#ifdef CONFIG_X86_32
304 loadsegment(fs, __KERNEL_PERCPU);
305#else
306 loadsegment(gs, 0);
307 wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
308#endif
309 load_stack_canary_segment();
310}
311
245/* Current gdt points %fs at the "master" per-cpu area: after this, 312/* Current gdt points %fs at the "master" per-cpu area: after this,
246 * it's on the real one. */ 313 * it's on the real one. */
247void switch_to_new_gdt(void) 314void switch_to_new_gdt(int cpu)
248{ 315{
249 struct desc_ptr gdt_descr; 316 struct desc_ptr gdt_descr;
250 317
251 gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); 318 gdt_descr.address = (long)get_cpu_gdt_table(cpu);
252 gdt_descr.size = GDT_SIZE - 1; 319 gdt_descr.size = GDT_SIZE - 1;
253 load_gdt(&gdt_descr); 320 load_gdt(&gdt_descr);
254#ifdef CONFIG_X86_32 321 /* Reload the per-cpu base */
255 asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); 322
256#endif 323 load_percpu_segment(cpu);
257} 324}
258 325
259static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; 326static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
@@ -383,11 +450,7 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
383 } 450 }
384 451
385 index_msb = get_count_order(smp_num_siblings); 452 index_msb = get_count_order(smp_num_siblings);
386#ifdef CONFIG_X86_64 453 c->phys_proc_id = apic->phys_pkg_id(c->initial_apicid, index_msb);
387 c->phys_proc_id = phys_pkg_id(index_msb);
388#else
389 c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb);
390#endif
391 454
392 smp_num_siblings = smp_num_siblings / c->x86_max_cores; 455 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
393 456
@@ -395,13 +458,8 @@ void __cpuinit detect_ht(struct cpuinfo_x86 *c)
395 458
396 core_bits = get_count_order(c->x86_max_cores); 459 core_bits = get_count_order(c->x86_max_cores);
397 460
398#ifdef CONFIG_X86_64 461 c->cpu_core_id = apic->phys_pkg_id(c->initial_apicid, index_msb) &
399 c->cpu_core_id = phys_pkg_id(index_msb) &
400 ((1 << core_bits) - 1);
401#else
402 c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) &
403 ((1 << core_bits) - 1); 462 ((1 << core_bits) - 1);
404#endif
405 } 463 }
406 464
407out: 465out:
@@ -570,11 +628,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
570 if (this_cpu->c_early_init) 628 if (this_cpu->c_early_init)
571 this_cpu->c_early_init(c); 629 this_cpu->c_early_init(c);
572 630
573 validate_pat_support(c);
574
575#ifdef CONFIG_SMP 631#ifdef CONFIG_SMP
576 c->cpu_index = boot_cpu_id; 632 c->cpu_index = boot_cpu_id;
577#endif 633#endif
634 filter_cpuid_features(c, false);
578} 635}
579 636
580void __init early_cpu_init(void) 637void __init early_cpu_init(void)
@@ -637,7 +694,7 @@ static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
637 c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF; 694 c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
638#ifdef CONFIG_X86_32 695#ifdef CONFIG_X86_32
639# ifdef CONFIG_X86_HT 696# ifdef CONFIG_X86_HT
640 c->apicid = phys_pkg_id(c->initial_apicid, 0); 697 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
641# else 698# else
642 c->apicid = c->initial_apicid; 699 c->apicid = c->initial_apicid;
643# endif 700# endif
@@ -684,7 +741,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
684 this_cpu->c_identify(c); 741 this_cpu->c_identify(c);
685 742
686#ifdef CONFIG_X86_64 743#ifdef CONFIG_X86_64
687 c->apicid = phys_pkg_id(0); 744 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0);
688#endif 745#endif
689 746
690 /* 747 /*
@@ -708,6 +765,9 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
708 * we do "generic changes." 765 * we do "generic changes."
709 */ 766 */
710 767
768 /* Filter out anything that depends on CPUID levels we don't have */
769 filter_cpuid_features(c, true);
770
711 /* If the model name is still unset, do table lookup. */ 771 /* If the model name is still unset, do table lookup. */
712 if (!c->x86_model_id[0]) { 772 if (!c->x86_model_id[0]) {
713 char *p; 773 char *p;
@@ -877,54 +937,22 @@ static __init int setup_disablecpuid(char *arg)
877__setup("clearcpuid=", setup_disablecpuid); 937__setup("clearcpuid=", setup_disablecpuid);
878 938
879#ifdef CONFIG_X86_64 939#ifdef CONFIG_X86_64
880struct x8664_pda **_cpu_pda __read_mostly;
881EXPORT_SYMBOL(_cpu_pda);
882
883struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; 940struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
884 941
885static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; 942DEFINE_PER_CPU_FIRST(union irq_stack_union,
886 943 irq_stack_union) __aligned(PAGE_SIZE);
887void __cpuinit pda_init(int cpu) 944DEFINE_PER_CPU(char *, irq_stack_ptr) =
888{ 945 init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
889 struct x8664_pda *pda = cpu_pda(cpu);
890 946
891 /* Setup up data that may be needed in __get_free_pages early */ 947DEFINE_PER_CPU(unsigned long, kernel_stack) =
892 loadsegment(fs, 0); 948 (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
893 loadsegment(gs, 0); 949EXPORT_PER_CPU_SYMBOL(kernel_stack);
894 /* Memory clobbers used to order PDA accessed */
895 mb();
896 wrmsrl(MSR_GS_BASE, pda);
897 mb();
898
899 pda->cpunumber = cpu;
900 pda->irqcount = -1;
901 pda->kernelstack = (unsigned long)stack_thread_info() -
902 PDA_STACKOFFSET + THREAD_SIZE;
903 pda->active_mm = &init_mm;
904 pda->mmu_state = 0;
905
906 if (cpu == 0) {
907 /* others are initialized in smpboot.c */
908 pda->pcurrent = &init_task;
909 pda->irqstackptr = boot_cpu_stack;
910 pda->irqstackptr += IRQSTACKSIZE - 64;
911 } else {
912 if (!pda->irqstackptr) {
913 pda->irqstackptr = (char *)
914 __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
915 if (!pda->irqstackptr)
916 panic("cannot allocate irqstack for cpu %d",
917 cpu);
918 pda->irqstackptr += IRQSTACKSIZE - 64;
919 }
920 950
921 if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) 951DEFINE_PER_CPU(unsigned int, irq_count) = -1;
922 pda->nodenumber = cpu_to_node(cpu);
923 }
924}
925 952
926static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + 953static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
927 DEBUG_STKSZ] __page_aligned_bss; 954 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
955 __aligned(PAGE_SIZE);
928 956
929extern asmlinkage void ignore_sysret(void); 957extern asmlinkage void ignore_sysret(void);
930 958
@@ -957,16 +985,21 @@ unsigned long kernel_eflags;
957 */ 985 */
958DEFINE_PER_CPU(struct orig_ist, orig_ist); 986DEFINE_PER_CPU(struct orig_ist, orig_ist);
959 987
960#else 988#else /* x86_64 */
989
990#ifdef CONFIG_CC_STACKPROTECTOR
991DEFINE_PER_CPU(unsigned long, stack_canary);
992#endif
961 993
962/* Make sure %fs is initialized properly in idle threads */ 994/* Make sure %fs and %gs are initialized properly in idle threads */
963struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) 995struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
964{ 996{
965 memset(regs, 0, sizeof(struct pt_regs)); 997 memset(regs, 0, sizeof(struct pt_regs));
966 regs->fs = __KERNEL_PERCPU; 998 regs->fs = __KERNEL_PERCPU;
999 regs->gs = __KERNEL_STACK_CANARY;
967 return regs; 1000 return regs;
968} 1001}
969#endif 1002#endif /* x86_64 */
970 1003
971/* 1004/*
972 * cpu_init() initializes state that is per-CPU. Some data is already 1005 * cpu_init() initializes state that is per-CPU. Some data is already
@@ -982,15 +1015,14 @@ void __cpuinit cpu_init(void)
982 struct tss_struct *t = &per_cpu(init_tss, cpu); 1015 struct tss_struct *t = &per_cpu(init_tss, cpu);
983 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); 1016 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
984 unsigned long v; 1017 unsigned long v;
985 char *estacks = NULL;
986 struct task_struct *me; 1018 struct task_struct *me;
987 int i; 1019 int i;
988 1020
989 /* CPU 0 is initialised in head64.c */ 1021#ifdef CONFIG_NUMA
990 if (cpu != 0) 1022 if (cpu != 0 && percpu_read(node_number) == 0 &&
991 pda_init(cpu); 1023 cpu_to_node(cpu) != NUMA_NO_NODE)
992 else 1024 percpu_write(node_number, cpu_to_node(cpu));
993 estacks = boot_exception_stacks; 1025#endif
994 1026
995 me = current; 1027 me = current;
996 1028
@@ -1006,7 +1038,9 @@ void __cpuinit cpu_init(void)
1006 * and set up the GDT descriptor: 1038 * and set up the GDT descriptor:
1007 */ 1039 */
1008 1040
1009 switch_to_new_gdt(); 1041 switch_to_new_gdt(cpu);
1042 loadsegment(fs, 0);
1043
1010 load_idt((const struct desc_ptr *)&idt_descr); 1044 load_idt((const struct desc_ptr *)&idt_descr);
1011 1045
1012 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); 1046 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
@@ -1024,18 +1058,13 @@ void __cpuinit cpu_init(void)
1024 * set up and load the per-CPU TSS 1058 * set up and load the per-CPU TSS
1025 */ 1059 */
1026 if (!orig_ist->ist[0]) { 1060 if (!orig_ist->ist[0]) {
1027 static const unsigned int order[N_EXCEPTION_STACKS] = { 1061 static const unsigned int sizes[N_EXCEPTION_STACKS] = {
1028 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, 1062 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
1029 [DEBUG_STACK - 1] = DEBUG_STACK_ORDER 1063 [DEBUG_STACK - 1] = DEBUG_STKSZ
1030 }; 1064 };
1065 char *estacks = per_cpu(exception_stacks, cpu);
1031 for (v = 0; v < N_EXCEPTION_STACKS; v++) { 1066 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
1032 if (cpu) { 1067 estacks += sizes[v];
1033 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
1034 if (!estacks)
1035 panic("Cannot allocate exception "
1036 "stack %ld %d\n", v, cpu);
1037 }
1038 estacks += PAGE_SIZE << order[v];
1039 orig_ist->ist[v] = t->x86_tss.ist[v] = 1068 orig_ist->ist[v] = t->x86_tss.ist[v] =
1040 (unsigned long)estacks; 1069 (unsigned long)estacks;
1041 } 1070 }
@@ -1069,22 +1098,19 @@ void __cpuinit cpu_init(void)
1069 */ 1098 */
1070 if (kgdb_connected && arch_kgdb_ops.correct_hw_break) 1099 if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
1071 arch_kgdb_ops.correct_hw_break(); 1100 arch_kgdb_ops.correct_hw_break();
1072 else { 1101 else
1073#endif 1102#endif
1074 /* 1103 {
1075 * Clear all 6 debug registers: 1104 /*
1076 */ 1105 * Clear all 6 debug registers:
1077 1106 */
1078 set_debugreg(0UL, 0); 1107 set_debugreg(0UL, 0);
1079 set_debugreg(0UL, 1); 1108 set_debugreg(0UL, 1);
1080 set_debugreg(0UL, 2); 1109 set_debugreg(0UL, 2);
1081 set_debugreg(0UL, 3); 1110 set_debugreg(0UL, 3);
1082 set_debugreg(0UL, 6); 1111 set_debugreg(0UL, 6);
1083 set_debugreg(0UL, 7); 1112 set_debugreg(0UL, 7);
1084#ifdef CONFIG_KGDB
1085 /* If the kgdb is connected no debug regs should be altered. */
1086 } 1113 }
1087#endif
1088 1114
1089 fpu_init(); 1115 fpu_init();
1090 1116
@@ -1114,7 +1140,7 @@ void __cpuinit cpu_init(void)
1114 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE); 1140 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
1115 1141
1116 load_idt(&idt_descr); 1142 load_idt(&idt_descr);
1117 switch_to_new_gdt(); 1143 switch_to_new_gdt(cpu);
1118 1144
1119 /* 1145 /*
1120 * Set up and load the per-CPU TSS and LDT 1146 * Set up and load the per-CPU TSS and LDT
@@ -1135,9 +1161,6 @@ void __cpuinit cpu_init(void)
1135 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss); 1161 __set_tss_desc(cpu, GDT_ENTRY_DOUBLEFAULT_TSS, &doublefault_tss);
1136#endif 1162#endif
1137 1163
1138 /* Clear %gs. */
1139 asm volatile ("mov %0, %%gs" : : "r" (0));
1140
1141 /* Clear all 6 debug registers: */ 1164 /* Clear all 6 debug registers: */
1142 set_debugreg(0, 0); 1165 set_debugreg(0, 0);
1143 set_debugreg(0, 1); 1166 set_debugreg(0, 1);
diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
index 5c28b37dea11..fb039cd345d8 100644
--- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
+++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c
@@ -939,10 +939,25 @@ static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data)
939 free_cpumask_var(data->acpi_data.shared_cpu_map); 939 free_cpumask_var(data->acpi_data.shared_cpu_map);
940} 940}
941 941
942static int get_transition_latency(struct powernow_k8_data *data)
943{
944 int max_latency = 0;
945 int i;
946 for (i = 0; i < data->acpi_data.state_count; i++) {
947 int cur_latency = data->acpi_data.states[i].transition_latency
948 + data->acpi_data.states[i].bus_master_latency;
949 if (cur_latency > max_latency)
950 max_latency = cur_latency;
951 }
952 /* value in usecs, needs to be in nanoseconds */
953 return 1000 * max_latency;
954}
955
942#else 956#else
943static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; } 957static int powernow_k8_cpu_init_acpi(struct powernow_k8_data *data) { return -ENODEV; }
944static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; } 958static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { return; }
945static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; } 959static void powernow_k8_acpi_pst_values(struct powernow_k8_data *data, unsigned int index) { return; }
960static int get_transition_latency(struct powernow_k8_data *data) { return 0; }
946#endif /* CONFIG_X86_POWERNOW_K8_ACPI */ 961#endif /* CONFIG_X86_POWERNOW_K8_ACPI */
947 962
948/* Take a frequency, and issue the fid/vid transition command */ 963/* Take a frequency, and issue the fid/vid transition command */
@@ -1173,7 +1188,13 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1173 if (rc) { 1188 if (rc) {
1174 goto err_out; 1189 goto err_out;
1175 } 1190 }
1176 } 1191 /* Take a crude guess here.
1192 * That guess was in microseconds, so multiply with 1000 */
1193 pol->cpuinfo.transition_latency = (
1194 ((data->rvo + 8) * data->vstable * VST_UNITS_20US) +
1195 ((1 << data->irt) * 30)) * 1000;
1196 } else /* ACPI _PSS objects available */
1197 pol->cpuinfo.transition_latency = get_transition_latency(data);
1177 1198
1178 /* only run on specific CPU from here on */ 1199 /* only run on specific CPU from here on */
1179 oldmask = current->cpus_allowed; 1200 oldmask = current->cpus_allowed;
@@ -1204,11 +1225,6 @@ static int __cpuinit powernowk8_cpu_init(struct cpufreq_policy *pol)
1204 cpumask_copy(pol->cpus, &per_cpu(cpu_core_map, pol->cpu)); 1225 cpumask_copy(pol->cpus, &per_cpu(cpu_core_map, pol->cpu));
1205 data->available_cores = pol->cpus; 1226 data->available_cores = pol->cpus;
1206 1227
1207 /* Take a crude guess here.
1208 * That guess was in microseconds, so multiply with 1000 */
1209 pol->cpuinfo.transition_latency = (((data->rvo + 8) * data->vstable * VST_UNITS_20US)
1210 + (3 * (1 << data->irt) * 10)) * 1000;
1211
1212 if (cpu_family == CPU_HW_PSTATE) 1228 if (cpu_family == CPU_HW_PSTATE)
1213 pol->cur = find_khz_freq_from_pstate(data->powernow_table, data->currpstate); 1229 pol->cur = find_khz_freq_from_pstate(data->powernow_table, data->currpstate);
1214 else 1230 else
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 430e5c38a544..1f137a87d4bd 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -24,7 +24,7 @@
24#ifdef CONFIG_X86_LOCAL_APIC 24#ifdef CONFIG_X86_LOCAL_APIC
25#include <asm/mpspec.h> 25#include <asm/mpspec.h>
26#include <asm/apic.h> 26#include <asm/apic.h>
27#include <mach_apic.h> 27#include <asm/genapic.h>
28#endif 28#endif
29 29
30static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) 30static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
@@ -63,6 +63,18 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
63 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); 63 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
64 } 64 }
65 65
66 /*
67 * There is a known erratum on Pentium III and Core Solo
68 * and Core Duo CPUs.
69 * " Page with PAT set to WC while associated MTRR is UC
70 * may consolidate to UC "
71 * Because of this erratum, it is better to stick with
72 * setting WC in MTRR rather than using PAT on these CPUs.
73 *
74 * Enable PAT WC only on P4, Core 2 or later CPUs.
75 */
76 if (c->x86 == 6 && c->x86_model < 15)
77 clear_cpu_cap(c, X86_FEATURE_PAT);
66} 78}
67 79
68#ifdef CONFIG_X86_32 80#ifdef CONFIG_X86_32
@@ -291,6 +303,9 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
291 ds_init_intel(c); 303 ds_init_intel(c);
292 } 304 }
293 305
306 if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush)
307 set_cpu_cap(c, X86_FEATURE_CLFLUSH_MONITOR);
308
294#ifdef CONFIG_X86_64 309#ifdef CONFIG_X86_64
295 if (c->x86 == 15) 310 if (c->x86 == 15)
296 c->x86_cache_alignment = c->x86_clflush_size * 2; 311 c->x86_cache_alignment = c->x86_clflush_size * 2;
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index da299eb85fc0..7293508d8f5c 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -147,7 +147,16 @@ struct _cpuid4_info {
147 union _cpuid4_leaf_ecx ecx; 147 union _cpuid4_leaf_ecx ecx;
148 unsigned long size; 148 unsigned long size;
149 unsigned long can_disable; 149 unsigned long can_disable;
150 cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ 150 DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
151};
152
153/* subset of above _cpuid4_info w/o shared_cpu_map */
154struct _cpuid4_info_regs {
155 union _cpuid4_leaf_eax eax;
156 union _cpuid4_leaf_ebx ebx;
157 union _cpuid4_leaf_ecx ecx;
158 unsigned long size;
159 unsigned long can_disable;
151}; 160};
152 161
153#ifdef CONFIG_PCI 162#ifdef CONFIG_PCI
@@ -278,7 +287,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
278} 287}
279 288
280static void __cpuinit 289static void __cpuinit
281amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) 290amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
282{ 291{
283 if (index < 3) 292 if (index < 3)
284 return; 293 return;
@@ -286,7 +295,8 @@ amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
286} 295}
287 296
288static int 297static int
289__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) 298__cpuinit cpuid4_cache_lookup_regs(int index,
299 struct _cpuid4_info_regs *this_leaf)
290{ 300{
291 union _cpuid4_leaf_eax eax; 301 union _cpuid4_leaf_eax eax;
292 union _cpuid4_leaf_ebx ebx; 302 union _cpuid4_leaf_ebx ebx;
@@ -314,6 +324,15 @@ __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
314 return 0; 324 return 0;
315} 325}
316 326
327static int
328__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
329{
330 struct _cpuid4_info_regs *leaf_regs =
331 (struct _cpuid4_info_regs *)this_leaf;
332
333 return cpuid4_cache_lookup_regs(index, leaf_regs);
334}
335
317static int __cpuinit find_num_cache_leaves(void) 336static int __cpuinit find_num_cache_leaves(void)
318{ 337{
319 unsigned int eax, ebx, ecx, edx; 338 unsigned int eax, ebx, ecx, edx;
@@ -353,11 +372,10 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
353 * parameters cpuid leaf to find the cache details 372 * parameters cpuid leaf to find the cache details
354 */ 373 */
355 for (i = 0; i < num_cache_leaves; i++) { 374 for (i = 0; i < num_cache_leaves; i++) {
356 struct _cpuid4_info this_leaf; 375 struct _cpuid4_info_regs this_leaf;
357
358 int retval; 376 int retval;
359 377
360 retval = cpuid4_cache_lookup(i, &this_leaf); 378 retval = cpuid4_cache_lookup_regs(i, &this_leaf);
361 if (retval >= 0) { 379 if (retval >= 0) {
362 switch(this_leaf.eax.split.level) { 380 switch(this_leaf.eax.split.level) {
363 case 1: 381 case 1:
@@ -506,17 +524,20 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
506 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; 524 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
507 525
508 if (num_threads_sharing == 1) 526 if (num_threads_sharing == 1)
509 cpu_set(cpu, this_leaf->shared_cpu_map); 527 cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
510 else { 528 else {
511 index_msb = get_count_order(num_threads_sharing); 529 index_msb = get_count_order(num_threads_sharing);
512 530
513 for_each_online_cpu(i) { 531 for_each_online_cpu(i) {
514 if (cpu_data(i).apicid >> index_msb == 532 if (cpu_data(i).apicid >> index_msb ==
515 c->apicid >> index_msb) { 533 c->apicid >> index_msb) {
516 cpu_set(i, this_leaf->shared_cpu_map); 534 cpumask_set_cpu(i,
535 to_cpumask(this_leaf->shared_cpu_map));
517 if (i != cpu && per_cpu(cpuid4_info, i)) { 536 if (i != cpu && per_cpu(cpuid4_info, i)) {
518 sibling_leaf = CPUID4_INFO_IDX(i, index); 537 sibling_leaf =
519 cpu_set(cpu, sibling_leaf->shared_cpu_map); 538 CPUID4_INFO_IDX(i, index);
539 cpumask_set_cpu(cpu, to_cpumask(
540 sibling_leaf->shared_cpu_map));
520 } 541 }
521 } 542 }
522 } 543 }
@@ -528,9 +549,10 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
528 int sibling; 549 int sibling;
529 550
530 this_leaf = CPUID4_INFO_IDX(cpu, index); 551 this_leaf = CPUID4_INFO_IDX(cpu, index);
531 for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) { 552 for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
532 sibling_leaf = CPUID4_INFO_IDX(sibling, index); 553 sibling_leaf = CPUID4_INFO_IDX(sibling, index);
533 cpu_clear(cpu, sibling_leaf->shared_cpu_map); 554 cpumask_clear_cpu(cpu,
555 to_cpumask(sibling_leaf->shared_cpu_map));
534 } 556 }
535} 557}
536#else 558#else
@@ -635,8 +657,9 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
635 int n = 0; 657 int n = 0;
636 658
637 if (len > 1) { 659 if (len > 1) {
638 cpumask_t *mask = &this_leaf->shared_cpu_map; 660 const struct cpumask *mask;
639 661
662 mask = to_cpumask(this_leaf->shared_cpu_map);
640 n = type? 663 n = type?
641 cpulist_scnprintf(buf, len-2, mask) : 664 cpulist_scnprintf(buf, len-2, mask) :
642 cpumask_scnprintf(buf, len-2, mask); 665 cpumask_scnprintf(buf, len-2, mask);
@@ -699,7 +722,8 @@ static struct pci_dev *get_k8_northbridge(int node)
699 722
700static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) 723static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
701{ 724{
702 int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); 725 const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
726 int node = cpu_to_node(cpumask_first(mask));
703 struct pci_dev *dev = NULL; 727 struct pci_dev *dev = NULL;
704 ssize_t ret = 0; 728 ssize_t ret = 0;
705 int i; 729 int i;
@@ -733,7 +757,8 @@ static ssize_t
733store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, 757store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
734 size_t count) 758 size_t count)
735{ 759{
736 int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); 760 const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
761 int node = cpu_to_node(cpumask_first(mask));
737 struct pci_dev *dev = NULL; 762 struct pci_dev *dev = NULL;
738 unsigned int ret, index, val; 763 unsigned int ret, index, val;
739 764
@@ -878,7 +903,7 @@ err_out:
878 return -ENOMEM; 903 return -ENOMEM;
879} 904}
880 905
881static cpumask_t cache_dev_map = CPU_MASK_NONE; 906static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
882 907
883/* Add/Remove cache interface for CPU device */ 908/* Add/Remove cache interface for CPU device */
884static int __cpuinit cache_add_dev(struct sys_device * sys_dev) 909static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
@@ -918,7 +943,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
918 } 943 }
919 kobject_uevent(&(this_object->kobj), KOBJ_ADD); 944 kobject_uevent(&(this_object->kobj), KOBJ_ADD);
920 } 945 }
921 cpu_set(cpu, cache_dev_map); 946 cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
922 947
923 kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); 948 kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD);
924 return 0; 949 return 0;
@@ -931,9 +956,9 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
931 956
932 if (per_cpu(cpuid4_info, cpu) == NULL) 957 if (per_cpu(cpuid4_info, cpu) == NULL)
933 return; 958 return;
934 if (!cpu_isset(cpu, cache_dev_map)) 959 if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
935 return; 960 return;
936 cpu_clear(cpu, cache_dev_map); 961 cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
937 962
938 for (i = 0; i < num_cache_leaves; i++) 963 for (i = 0; i < num_cache_leaves; i++)
939 kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); 964 kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 8ae8c4ff094d..4772e91e8246 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -67,7 +67,7 @@ static struct threshold_block threshold_defaults = {
67struct threshold_bank { 67struct threshold_bank {
68 struct kobject *kobj; 68 struct kobject *kobj;
69 struct threshold_block *blocks; 69 struct threshold_block *blocks;
70 cpumask_t cpus; 70 cpumask_var_t cpus;
71}; 71};
72static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); 72static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
73 73
@@ -481,7 +481,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
481 481
482#ifdef CONFIG_SMP 482#ifdef CONFIG_SMP
483 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ 483 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
484 i = first_cpu(per_cpu(cpu_core_map, cpu)); 484 i = cpumask_first(&per_cpu(cpu_core_map, cpu));
485 485
486 /* first core not up yet */ 486 /* first core not up yet */
487 if (cpu_data(i).cpu_core_id) 487 if (cpu_data(i).cpu_core_id)
@@ -501,7 +501,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
501 if (err) 501 if (err)
502 goto out; 502 goto out;
503 503
504 b->cpus = per_cpu(cpu_core_map, cpu); 504 cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu));
505 per_cpu(threshold_banks, cpu)[bank] = b; 505 per_cpu(threshold_banks, cpu)[bank] = b;
506 goto out; 506 goto out;
507 } 507 }
@@ -512,15 +512,20 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
512 err = -ENOMEM; 512 err = -ENOMEM;
513 goto out; 513 goto out;
514 } 514 }
515 if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) {
516 kfree(b);
517 err = -ENOMEM;
518 goto out;
519 }
515 520
516 b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj); 521 b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj);
517 if (!b->kobj) 522 if (!b->kobj)
518 goto out_free; 523 goto out_free;
519 524
520#ifndef CONFIG_SMP 525#ifndef CONFIG_SMP
521 b->cpus = CPU_MASK_ALL; 526 cpumask_setall(b->cpus);
522#else 527#else
523 b->cpus = per_cpu(cpu_core_map, cpu); 528 cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu));
524#endif 529#endif
525 530
526 per_cpu(threshold_banks, cpu)[bank] = b; 531 per_cpu(threshold_banks, cpu)[bank] = b;
@@ -529,7 +534,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
529 if (err) 534 if (err)
530 goto out_free; 535 goto out_free;
531 536
532 for_each_cpu_mask_nr(i, b->cpus) { 537 for_each_cpu(i, b->cpus) {
533 if (i == cpu) 538 if (i == cpu)
534 continue; 539 continue;
535 540
@@ -545,6 +550,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
545 550
546out_free: 551out_free:
547 per_cpu(threshold_banks, cpu)[bank] = NULL; 552 per_cpu(threshold_banks, cpu)[bank] = NULL;
553 free_cpumask_var(b->cpus);
548 kfree(b); 554 kfree(b);
549out: 555out:
550 return err; 556 return err;
@@ -619,7 +625,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
619#endif 625#endif
620 626
621 /* remove all sibling symlinks before unregistering */ 627 /* remove all sibling symlinks before unregistering */
622 for_each_cpu_mask_nr(i, b->cpus) { 628 for_each_cpu(i, b->cpus) {
623 if (i == cpu) 629 if (i == cpu)
624 continue; 630 continue;
625 631
@@ -632,6 +638,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
632free_out: 638free_out:
633 kobject_del(b->kobj); 639 kobject_del(b->kobj);
634 kobject_put(b->kobj); 640 kobject_put(b->kobj);
641 free_cpumask_var(b->cpus);
635 kfree(b); 642 kfree(b);
636 per_cpu(threshold_banks, cpu)[bank] = NULL; 643 per_cpu(threshold_banks, cpu)[bank] = NULL;
637} 644}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index 4b48f251fd39..5e8c79e748a6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -7,6 +7,7 @@
7#include <linux/interrupt.h> 7#include <linux/interrupt.h>
8#include <linux/percpu.h> 8#include <linux/percpu.h>
9#include <asm/processor.h> 9#include <asm/processor.h>
10#include <asm/apic.h>
10#include <asm/msr.h> 11#include <asm/msr.h>
11#include <asm/mce.h> 12#include <asm/mce.h>
12#include <asm/hw_irq.h> 13#include <asm/hw_irq.h>
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index c689d19e35ab..ad7f2a696f4a 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -24,11 +24,11 @@
24#include <asm/apic.h> 24#include <asm/apic.h>
25#include <asm/hpet.h> 25#include <asm/hpet.h>
26#include <linux/kdebug.h> 26#include <linux/kdebug.h>
27#include <asm/smp.h> 27#include <asm/cpu.h>
28#include <asm/reboot.h> 28#include <asm/reboot.h>
29#include <asm/virtext.h> 29#include <asm/virtext.h>
30 30
31#include <mach_ipi.h> 31#include <asm/genapic.h>
32 32
33 33
34#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC) 34#if defined(CONFIG_SMP) && defined(CONFIG_X86_LOCAL_APIC)
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
index 6b1f6f6f8661..87d103ded1c3 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
@@ -99,7 +99,7 @@ print_context_stack(struct thread_info *tinfo,
99 frame = frame->next_frame; 99 frame = frame->next_frame;
100 bp = (unsigned long) frame; 100 bp = (unsigned long) frame;
101 } else { 101 } else {
102 ops->address(data, addr, bp == 0); 102 ops->address(data, addr, 0);
103 } 103 }
104 print_ftrace_graph_addr(addr, data, ops, tinfo, graph); 104 print_ftrace_graph_addr(addr, data, ops, tinfo, graph);
105 } 105 }
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index c302d0707048..d35db5993fd6 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -106,7 +106,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
106 const struct stacktrace_ops *ops, void *data) 106 const struct stacktrace_ops *ops, void *data)
107{ 107{
108 const unsigned cpu = get_cpu(); 108 const unsigned cpu = get_cpu();
109 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; 109 unsigned long *irq_stack_end =
110 (unsigned long *)per_cpu(irq_stack_ptr, cpu);
110 unsigned used = 0; 111 unsigned used = 0;
111 struct thread_info *tinfo; 112 struct thread_info *tinfo;
112 int graph = 0; 113 int graph = 0;
@@ -160,23 +161,23 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
160 stack = (unsigned long *) estack_end[-2]; 161 stack = (unsigned long *) estack_end[-2];
161 continue; 162 continue;
162 } 163 }
163 if (irqstack_end) { 164 if (irq_stack_end) {
164 unsigned long *irqstack; 165 unsigned long *irq_stack;
165 irqstack = irqstack_end - 166 irq_stack = irq_stack_end -
166 (IRQSTACKSIZE - 64) / sizeof(*irqstack); 167 (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack);
167 168
168 if (stack >= irqstack && stack < irqstack_end) { 169 if (stack >= irq_stack && stack < irq_stack_end) {
169 if (ops->stack(data, "IRQ") < 0) 170 if (ops->stack(data, "IRQ") < 0)
170 break; 171 break;
171 bp = print_context_stack(tinfo, stack, bp, 172 bp = print_context_stack(tinfo, stack, bp,
172 ops, data, irqstack_end, &graph); 173 ops, data, irq_stack_end, &graph);
173 /* 174 /*
174 * We link to the next stack (which would be 175 * We link to the next stack (which would be
175 * the process stack normally) the last 176 * the process stack normally) the last
176 * pointer (index -1 to end) in the IRQ stack: 177 * pointer (index -1 to end) in the IRQ stack:
177 */ 178 */
178 stack = (unsigned long *) (irqstack_end[-1]); 179 stack = (unsigned long *) (irq_stack_end[-1]);
179 irqstack_end = NULL; 180 irq_stack_end = NULL;
180 ops->stack(data, "EOI"); 181 ops->stack(data, "EOI");
181 continue; 182 continue;
182 } 183 }
@@ -199,10 +200,10 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
199 unsigned long *stack; 200 unsigned long *stack;
200 int i; 201 int i;
201 const int cpu = smp_processor_id(); 202 const int cpu = smp_processor_id();
202 unsigned long *irqstack_end = 203 unsigned long *irq_stack_end =
203 (unsigned long *) (cpu_pda(cpu)->irqstackptr); 204 (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
204 unsigned long *irqstack = 205 unsigned long *irq_stack =
205 (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); 206 (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
206 207
207 /* 208 /*
208 * debugging aid: "show_stack(NULL, NULL);" prints the 209 * debugging aid: "show_stack(NULL, NULL);" prints the
@@ -218,9 +219,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
218 219
219 stack = sp; 220 stack = sp;
220 for (i = 0; i < kstack_depth_to_print; i++) { 221 for (i = 0; i < kstack_depth_to_print; i++) {
221 if (stack >= irqstack && stack <= irqstack_end) { 222 if (stack >= irq_stack && stack <= irq_stack_end) {
222 if (stack == irqstack_end) { 223 if (stack == irq_stack_end) {
223 stack = (unsigned long *) (irqstack_end[-1]); 224 stack = (unsigned long *) (irq_stack_end[-1]);
224 printk(" <EOI> "); 225 printk(" <EOI> ");
225 } 226 }
226 } else { 227 } else {
@@ -241,7 +242,7 @@ void show_registers(struct pt_regs *regs)
241 int i; 242 int i;
242 unsigned long sp; 243 unsigned long sp;
243 const int cpu = smp_processor_id(); 244 const int cpu = smp_processor_id();
244 struct task_struct *cur = cpu_pda(cpu)->pcurrent; 245 struct task_struct *cur = current;
245 246
246 sp = regs->sp; 247 sp = regs->sp;
247 printk("CPU %d ", cpu); 248 printk("CPU %d ", cpu);
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 1119d247fe11..b205272ad394 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -366,10 +366,12 @@ void __init efi_init(void)
366 SMBIOS_TABLE_GUID)) { 366 SMBIOS_TABLE_GUID)) {
367 efi.smbios = config_tables[i].table; 367 efi.smbios = config_tables[i].table;
368 printk(" SMBIOS=0x%lx ", config_tables[i].table); 368 printk(" SMBIOS=0x%lx ", config_tables[i].table);
369#ifdef CONFIG_X86_UV
369 } else if (!efi_guidcmp(config_tables[i].guid, 370 } else if (!efi_guidcmp(config_tables[i].guid,
370 UV_SYSTEM_TABLE_GUID)) { 371 UV_SYSTEM_TABLE_GUID)) {
371 efi.uv_systab = config_tables[i].table; 372 efi.uv_systab = config_tables[i].table;
372 printk(" UVsystab=0x%lx ", config_tables[i].table); 373 printk(" UVsystab=0x%lx ", config_tables[i].table);
374#endif
373 } else if (!efi_guidcmp(config_tables[i].guid, 375 } else if (!efi_guidcmp(config_tables[i].guid,
374 HCDP_TABLE_GUID)) { 376 HCDP_TABLE_GUID)) {
375 efi.hcdp = config_tables[i].table; 377 efi.hcdp = config_tables[i].table;
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index 652c5287215f..a4ee29127fdf 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -36,6 +36,7 @@
36#include <asm/proto.h> 36#include <asm/proto.h>
37#include <asm/efi.h> 37#include <asm/efi.h>
38#include <asm/cacheflush.h> 38#include <asm/cacheflush.h>
39#include <asm/fixmap.h>
39 40
40static pgd_t save_pgd __initdata; 41static pgd_t save_pgd __initdata;
41static unsigned long efi_flags __initdata; 42static unsigned long efi_flags __initdata;
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 46469029e9d3..e99206831459 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -30,12 +30,13 @@
30 * 1C(%esp) - %ds 30 * 1C(%esp) - %ds
31 * 20(%esp) - %es 31 * 20(%esp) - %es
32 * 24(%esp) - %fs 32 * 24(%esp) - %fs
33 * 28(%esp) - orig_eax 33 * 28(%esp) - %gs saved iff !CONFIG_X86_32_LAZY_GS
34 * 2C(%esp) - %eip 34 * 2C(%esp) - orig_eax
35 * 30(%esp) - %cs 35 * 30(%esp) - %eip
36 * 34(%esp) - %eflags 36 * 34(%esp) - %cs
37 * 38(%esp) - %oldesp 37 * 38(%esp) - %eflags
38 * 3C(%esp) - %oldss 38 * 3C(%esp) - %oldesp
39 * 40(%esp) - %oldss
39 * 40 *
40 * "current" is in register %ebx during any slow entries. 41 * "current" is in register %ebx during any slow entries.
41 */ 42 */
@@ -101,121 +102,221 @@
101#define resume_userspace_sig resume_userspace 102#define resume_userspace_sig resume_userspace
102#endif 103#endif
103 104
104#define SAVE_ALL \ 105/*
105 cld; \ 106 * User gs save/restore
106 pushl %fs; \ 107 *
107 CFI_ADJUST_CFA_OFFSET 4;\ 108 * %gs is used for userland TLS and kernel only uses it for stack
108 /*CFI_REL_OFFSET fs, 0;*/\ 109 * canary which is required to be at %gs:20 by gcc. Read the comment
109 pushl %es; \ 110 * at the top of stackprotector.h for more info.
110 CFI_ADJUST_CFA_OFFSET 4;\ 111 *
111 /*CFI_REL_OFFSET es, 0;*/\ 112 * Local labels 98 and 99 are used.
112 pushl %ds; \ 113 */
113 CFI_ADJUST_CFA_OFFSET 4;\ 114#ifdef CONFIG_X86_32_LAZY_GS
114 /*CFI_REL_OFFSET ds, 0;*/\ 115
115 pushl %eax; \ 116 /* unfortunately push/pop can't be no-op */
116 CFI_ADJUST_CFA_OFFSET 4;\ 117.macro PUSH_GS
117 CFI_REL_OFFSET eax, 0;\ 118 pushl $0
118 pushl %ebp; \ 119 CFI_ADJUST_CFA_OFFSET 4
119 CFI_ADJUST_CFA_OFFSET 4;\ 120.endm
120 CFI_REL_OFFSET ebp, 0;\ 121.macro POP_GS pop=0
121 pushl %edi; \ 122 addl $(4 + \pop), %esp
122 CFI_ADJUST_CFA_OFFSET 4;\ 123 CFI_ADJUST_CFA_OFFSET -(4 + \pop)
123 CFI_REL_OFFSET edi, 0;\ 124.endm
124 pushl %esi; \ 125.macro POP_GS_EX
125 CFI_ADJUST_CFA_OFFSET 4;\ 126.endm
126 CFI_REL_OFFSET esi, 0;\ 127
127 pushl %edx; \ 128 /* all the rest are no-op */
128 CFI_ADJUST_CFA_OFFSET 4;\ 129.macro PTGS_TO_GS
129 CFI_REL_OFFSET edx, 0;\ 130.endm
130 pushl %ecx; \ 131.macro PTGS_TO_GS_EX
131 CFI_ADJUST_CFA_OFFSET 4;\ 132.endm
132 CFI_REL_OFFSET ecx, 0;\ 133.macro GS_TO_REG reg
133 pushl %ebx; \ 134.endm
134 CFI_ADJUST_CFA_OFFSET 4;\ 135.macro REG_TO_PTGS reg
135 CFI_REL_OFFSET ebx, 0;\ 136.endm
136 movl $(__USER_DS), %edx; \ 137.macro SET_KERNEL_GS reg
137 movl %edx, %ds; \ 138.endm
138 movl %edx, %es; \ 139
139 movl $(__KERNEL_PERCPU), %edx; \ 140#else /* CONFIG_X86_32_LAZY_GS */
141
142.macro PUSH_GS
143 pushl %gs
144 CFI_ADJUST_CFA_OFFSET 4
145 /*CFI_REL_OFFSET gs, 0*/
146.endm
147
148.macro POP_GS pop=0
14998: popl %gs
150 CFI_ADJUST_CFA_OFFSET -4
151 /*CFI_RESTORE gs*/
152 .if \pop <> 0
153 add $\pop, %esp
154 CFI_ADJUST_CFA_OFFSET -\pop
155 .endif
156.endm
157.macro POP_GS_EX
158.pushsection .fixup, "ax"
15999: movl $0, (%esp)
160 jmp 98b
161.section __ex_table, "a"
162 .align 4
163 .long 98b, 99b
164.popsection
165.endm
166
167.macro PTGS_TO_GS
16898: mov PT_GS(%esp), %gs
169.endm
170.macro PTGS_TO_GS_EX
171.pushsection .fixup, "ax"
17299: movl $0, PT_GS(%esp)
173 jmp 98b
174.section __ex_table, "a"
175 .align 4
176 .long 98b, 99b
177.popsection
178.endm
179
180.macro GS_TO_REG reg
181 movl %gs, \reg
182 /*CFI_REGISTER gs, \reg*/
183.endm
184.macro REG_TO_PTGS reg
185 movl \reg, PT_GS(%esp)
186 /*CFI_REL_OFFSET gs, PT_GS*/
187.endm
188.macro SET_KERNEL_GS reg
189 movl $(__KERNEL_STACK_CANARY), \reg
190 movl \reg, %gs
191.endm
192
193#endif /* CONFIG_X86_32_LAZY_GS */
194
195.macro SAVE_ALL
196 cld
197 PUSH_GS
198 pushl %fs
199 CFI_ADJUST_CFA_OFFSET 4
200 /*CFI_REL_OFFSET fs, 0;*/
201 pushl %es
202 CFI_ADJUST_CFA_OFFSET 4
203 /*CFI_REL_OFFSET es, 0;*/
204 pushl %ds
205 CFI_ADJUST_CFA_OFFSET 4
206 /*CFI_REL_OFFSET ds, 0;*/
207 pushl %eax
208 CFI_ADJUST_CFA_OFFSET 4
209 CFI_REL_OFFSET eax, 0
210 pushl %ebp
211 CFI_ADJUST_CFA_OFFSET 4
212 CFI_REL_OFFSET ebp, 0
213 pushl %edi
214 CFI_ADJUST_CFA_OFFSET 4
215 CFI_REL_OFFSET edi, 0
216 pushl %esi
217 CFI_ADJUST_CFA_OFFSET 4
218 CFI_REL_OFFSET esi, 0
219 pushl %edx
220 CFI_ADJUST_CFA_OFFSET 4
221 CFI_REL_OFFSET edx, 0
222 pushl %ecx
223 CFI_ADJUST_CFA_OFFSET 4
224 CFI_REL_OFFSET ecx, 0
225 pushl %ebx
226 CFI_ADJUST_CFA_OFFSET 4
227 CFI_REL_OFFSET ebx, 0
228 movl $(__USER_DS), %edx
229 movl %edx, %ds
230 movl %edx, %es
231 movl $(__KERNEL_PERCPU), %edx
140 movl %edx, %fs 232 movl %edx, %fs
233 SET_KERNEL_GS %edx
234.endm
141 235
142#define RESTORE_INT_REGS \ 236.macro RESTORE_INT_REGS
143 popl %ebx; \ 237 popl %ebx
144 CFI_ADJUST_CFA_OFFSET -4;\ 238 CFI_ADJUST_CFA_OFFSET -4
145 CFI_RESTORE ebx;\ 239 CFI_RESTORE ebx
146 popl %ecx; \ 240 popl %ecx
147 CFI_ADJUST_CFA_OFFSET -4;\ 241 CFI_ADJUST_CFA_OFFSET -4
148 CFI_RESTORE ecx;\ 242 CFI_RESTORE ecx
149 popl %edx; \ 243 popl %edx
150 CFI_ADJUST_CFA_OFFSET -4;\ 244 CFI_ADJUST_CFA_OFFSET -4
151 CFI_RESTORE edx;\ 245 CFI_RESTORE edx
152 popl %esi; \ 246 popl %esi
153 CFI_ADJUST_CFA_OFFSET -4;\ 247 CFI_ADJUST_CFA_OFFSET -4
154 CFI_RESTORE esi;\ 248 CFI_RESTORE esi
155 popl %edi; \ 249 popl %edi
156 CFI_ADJUST_CFA_OFFSET -4;\ 250 CFI_ADJUST_CFA_OFFSET -4
157 CFI_RESTORE edi;\ 251 CFI_RESTORE edi
158 popl %ebp; \ 252 popl %ebp
159 CFI_ADJUST_CFA_OFFSET -4;\ 253 CFI_ADJUST_CFA_OFFSET -4
160 CFI_RESTORE ebp;\ 254 CFI_RESTORE ebp
161 popl %eax; \ 255 popl %eax
162 CFI_ADJUST_CFA_OFFSET -4;\ 256 CFI_ADJUST_CFA_OFFSET -4
163 CFI_RESTORE eax 257 CFI_RESTORE eax
258.endm
164 259
165#define RESTORE_REGS \ 260.macro RESTORE_REGS pop=0
166 RESTORE_INT_REGS; \ 261 RESTORE_INT_REGS
1671: popl %ds; \ 2621: popl %ds
168 CFI_ADJUST_CFA_OFFSET -4;\ 263 CFI_ADJUST_CFA_OFFSET -4
169 /*CFI_RESTORE ds;*/\ 264 /*CFI_RESTORE ds;*/
1702: popl %es; \ 2652: popl %es
171 CFI_ADJUST_CFA_OFFSET -4;\ 266 CFI_ADJUST_CFA_OFFSET -4
172 /*CFI_RESTORE es;*/\ 267 /*CFI_RESTORE es;*/
1733: popl %fs; \ 2683: popl %fs
174 CFI_ADJUST_CFA_OFFSET -4;\ 269 CFI_ADJUST_CFA_OFFSET -4
175 /*CFI_RESTORE fs;*/\ 270 /*CFI_RESTORE fs;*/
176.pushsection .fixup,"ax"; \ 271 POP_GS \pop
1774: movl $0,(%esp); \ 272.pushsection .fixup, "ax"
178 jmp 1b; \ 2734: movl $0, (%esp)
1795: movl $0,(%esp); \ 274 jmp 1b
180 jmp 2b; \ 2755: movl $0, (%esp)
1816: movl $0,(%esp); \ 276 jmp 2b
182 jmp 3b; \ 2776: movl $0, (%esp)
183.section __ex_table,"a";\ 278 jmp 3b
184 .align 4; \ 279.section __ex_table, "a"
185 .long 1b,4b; \ 280 .align 4
186 .long 2b,5b; \ 281 .long 1b, 4b
187 .long 3b,6b; \ 282 .long 2b, 5b
283 .long 3b, 6b
188.popsection 284.popsection
285 POP_GS_EX
286.endm
189 287
190#define RING0_INT_FRAME \ 288.macro RING0_INT_FRAME
191 CFI_STARTPROC simple;\ 289 CFI_STARTPROC simple
192 CFI_SIGNAL_FRAME;\ 290 CFI_SIGNAL_FRAME
193 CFI_DEF_CFA esp, 3*4;\ 291 CFI_DEF_CFA esp, 3*4
194 /*CFI_OFFSET cs, -2*4;*/\ 292 /*CFI_OFFSET cs, -2*4;*/
195 CFI_OFFSET eip, -3*4 293 CFI_OFFSET eip, -3*4
294.endm
196 295
197#define RING0_EC_FRAME \ 296.macro RING0_EC_FRAME
198 CFI_STARTPROC simple;\ 297 CFI_STARTPROC simple
199 CFI_SIGNAL_FRAME;\ 298 CFI_SIGNAL_FRAME
200 CFI_DEF_CFA esp, 4*4;\ 299 CFI_DEF_CFA esp, 4*4
201 /*CFI_OFFSET cs, -2*4;*/\ 300 /*CFI_OFFSET cs, -2*4;*/
202 CFI_OFFSET eip, -3*4 301 CFI_OFFSET eip, -3*4
302.endm
203 303
204#define RING0_PTREGS_FRAME \ 304.macro RING0_PTREGS_FRAME
205 CFI_STARTPROC simple;\ 305 CFI_STARTPROC simple
206 CFI_SIGNAL_FRAME;\ 306 CFI_SIGNAL_FRAME
207 CFI_DEF_CFA esp, PT_OLDESP-PT_EBX;\ 307 CFI_DEF_CFA esp, PT_OLDESP-PT_EBX
208 /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/\ 308 /*CFI_OFFSET cs, PT_CS-PT_OLDESP;*/
209 CFI_OFFSET eip, PT_EIP-PT_OLDESP;\ 309 CFI_OFFSET eip, PT_EIP-PT_OLDESP
210 /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/\ 310 /*CFI_OFFSET es, PT_ES-PT_OLDESP;*/
211 /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/\ 311 /*CFI_OFFSET ds, PT_DS-PT_OLDESP;*/
212 CFI_OFFSET eax, PT_EAX-PT_OLDESP;\ 312 CFI_OFFSET eax, PT_EAX-PT_OLDESP
213 CFI_OFFSET ebp, PT_EBP-PT_OLDESP;\ 313 CFI_OFFSET ebp, PT_EBP-PT_OLDESP
214 CFI_OFFSET edi, PT_EDI-PT_OLDESP;\ 314 CFI_OFFSET edi, PT_EDI-PT_OLDESP
215 CFI_OFFSET esi, PT_ESI-PT_OLDESP;\ 315 CFI_OFFSET esi, PT_ESI-PT_OLDESP
216 CFI_OFFSET edx, PT_EDX-PT_OLDESP;\ 316 CFI_OFFSET edx, PT_EDX-PT_OLDESP
217 CFI_OFFSET ecx, PT_ECX-PT_OLDESP;\ 317 CFI_OFFSET ecx, PT_ECX-PT_OLDESP
218 CFI_OFFSET ebx, PT_EBX-PT_OLDESP 318 CFI_OFFSET ebx, PT_EBX-PT_OLDESP
319.endm
219 320
220ENTRY(ret_from_fork) 321ENTRY(ret_from_fork)
221 CFI_STARTPROC 322 CFI_STARTPROC
@@ -362,6 +463,7 @@ sysenter_exit:
362 xorl %ebp,%ebp 463 xorl %ebp,%ebp
363 TRACE_IRQS_ON 464 TRACE_IRQS_ON
3641: mov PT_FS(%esp), %fs 4651: mov PT_FS(%esp), %fs
466 PTGS_TO_GS
365 ENABLE_INTERRUPTS_SYSEXIT 467 ENABLE_INTERRUPTS_SYSEXIT
366 468
367#ifdef CONFIG_AUDITSYSCALL 469#ifdef CONFIG_AUDITSYSCALL
@@ -410,6 +512,7 @@ sysexit_audit:
410 .align 4 512 .align 4
411 .long 1b,2b 513 .long 1b,2b
412.popsection 514.popsection
515 PTGS_TO_GS_EX
413ENDPROC(ia32_sysenter_target) 516ENDPROC(ia32_sysenter_target)
414 517
415 # system call handler stub 518 # system call handler stub
@@ -452,8 +555,7 @@ restore_all:
452restore_nocheck: 555restore_nocheck:
453 TRACE_IRQS_IRET 556 TRACE_IRQS_IRET
454restore_nocheck_notrace: 557restore_nocheck_notrace:
455 RESTORE_REGS 558 RESTORE_REGS 4 # skip orig_eax/error_code
456 addl $4, %esp # skip orig_eax/error_code
457 CFI_ADJUST_CFA_OFFSET -4 559 CFI_ADJUST_CFA_OFFSET -4
458irq_return: 560irq_return:
459 INTERRUPT_RETURN 561 INTERRUPT_RETURN
@@ -595,28 +697,50 @@ syscall_badsys:
595END(syscall_badsys) 697END(syscall_badsys)
596 CFI_ENDPROC 698 CFI_ENDPROC
597 699
598#define FIXUP_ESPFIX_STACK \ 700/*
599 /* since we are on a wrong stack, we cant make it a C code :( */ \ 701 * System calls that need a pt_regs pointer.
600 PER_CPU(gdt_page, %ebx); \ 702 */
601 GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah); \ 703#define PTREGSCALL(name) \
602 addl %esp, %eax; \ 704 ALIGN; \
603 pushl $__KERNEL_DS; \ 705ptregs_##name: \
604 CFI_ADJUST_CFA_OFFSET 4; \ 706 leal 4(%esp),%eax; \
605 pushl %eax; \ 707 jmp sys_##name;
606 CFI_ADJUST_CFA_OFFSET 4; \ 708
607 lss (%esp), %esp; \ 709PTREGSCALL(iopl)
608 CFI_ADJUST_CFA_OFFSET -8; 710PTREGSCALL(fork)
609#define UNWIND_ESPFIX_STACK \ 711PTREGSCALL(clone)
610 movl %ss, %eax; \ 712PTREGSCALL(vfork)
611 /* see if on espfix stack */ \ 713PTREGSCALL(execve)
612 cmpw $__ESPFIX_SS, %ax; \ 714PTREGSCALL(sigaltstack)
613 jne 27f; \ 715PTREGSCALL(sigreturn)
614 movl $__KERNEL_DS, %eax; \ 716PTREGSCALL(rt_sigreturn)
615 movl %eax, %ds; \ 717PTREGSCALL(vm86)
616 movl %eax, %es; \ 718PTREGSCALL(vm86old)
617 /* switch to normal stack */ \ 719
618 FIXUP_ESPFIX_STACK; \ 720.macro FIXUP_ESPFIX_STACK
61927:; 721 /* since we are on a wrong stack, we cant make it a C code :( */
722 PER_CPU(gdt_page, %ebx)
723 GET_DESC_BASE(GDT_ENTRY_ESPFIX_SS, %ebx, %eax, %ax, %al, %ah)
724 addl %esp, %eax
725 pushl $__KERNEL_DS
726 CFI_ADJUST_CFA_OFFSET 4
727 pushl %eax
728 CFI_ADJUST_CFA_OFFSET 4
729 lss (%esp), %esp
730 CFI_ADJUST_CFA_OFFSET -8
731.endm
732.macro UNWIND_ESPFIX_STACK
733 movl %ss, %eax
734 /* see if on espfix stack */
735 cmpw $__ESPFIX_SS, %ax
736 jne 27f
737 movl $__KERNEL_DS, %eax
738 movl %eax, %ds
739 movl %eax, %es
740 /* switch to normal stack */
741 FIXUP_ESPFIX_STACK
74227:
743.endm
620 744
621/* 745/*
622 * Build the entry stubs and pointer table with some assembler magic. 746 * Build the entry stubs and pointer table with some assembler magic.
@@ -672,7 +796,7 @@ common_interrupt:
672ENDPROC(common_interrupt) 796ENDPROC(common_interrupt)
673 CFI_ENDPROC 797 CFI_ENDPROC
674 798
675#define BUILD_INTERRUPT(name, nr) \ 799#define BUILD_INTERRUPT3(name, nr, fn) \
676ENTRY(name) \ 800ENTRY(name) \
677 RING0_INT_FRAME; \ 801 RING0_INT_FRAME; \
678 pushl $~(nr); \ 802 pushl $~(nr); \
@@ -680,13 +804,15 @@ ENTRY(name) \
680 SAVE_ALL; \ 804 SAVE_ALL; \
681 TRACE_IRQS_OFF \ 805 TRACE_IRQS_OFF \
682 movl %esp,%eax; \ 806 movl %esp,%eax; \
683 call smp_##name; \ 807 call fn; \
684 jmp ret_from_intr; \ 808 jmp ret_from_intr; \
685 CFI_ENDPROC; \ 809 CFI_ENDPROC; \
686ENDPROC(name) 810ENDPROC(name)
687 811
812#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name)
813
688/* The include is where all of the SMP etc. interrupts come from */ 814/* The include is where all of the SMP etc. interrupts come from */
689#include "entry_arch.h" 815#include <asm/entry_arch.h>
690 816
691ENTRY(coprocessor_error) 817ENTRY(coprocessor_error)
692 RING0_INT_FRAME 818 RING0_INT_FRAME
@@ -1068,7 +1194,10 @@ ENTRY(page_fault)
1068 CFI_ADJUST_CFA_OFFSET 4 1194 CFI_ADJUST_CFA_OFFSET 4
1069 ALIGN 1195 ALIGN
1070error_code: 1196error_code:
1071 /* the function address is in %fs's slot on the stack */ 1197 /* the function address is in %gs's slot on the stack */
1198 pushl %fs
1199 CFI_ADJUST_CFA_OFFSET 4
1200 /*CFI_REL_OFFSET fs, 0*/
1072 pushl %es 1201 pushl %es
1073 CFI_ADJUST_CFA_OFFSET 4 1202 CFI_ADJUST_CFA_OFFSET 4
1074 /*CFI_REL_OFFSET es, 0*/ 1203 /*CFI_REL_OFFSET es, 0*/
@@ -1097,20 +1226,15 @@ error_code:
1097 CFI_ADJUST_CFA_OFFSET 4 1226 CFI_ADJUST_CFA_OFFSET 4
1098 CFI_REL_OFFSET ebx, 0 1227 CFI_REL_OFFSET ebx, 0
1099 cld 1228 cld
1100 pushl %fs
1101 CFI_ADJUST_CFA_OFFSET 4
1102 /*CFI_REL_OFFSET fs, 0*/
1103 movl $(__KERNEL_PERCPU), %ecx 1229 movl $(__KERNEL_PERCPU), %ecx
1104 movl %ecx, %fs 1230 movl %ecx, %fs
1105 UNWIND_ESPFIX_STACK 1231 UNWIND_ESPFIX_STACK
1106 popl %ecx 1232 GS_TO_REG %ecx
1107 CFI_ADJUST_CFA_OFFSET -4 1233 movl PT_GS(%esp), %edi # get the function address
1108 /*CFI_REGISTER es, ecx*/
1109 movl PT_FS(%esp), %edi # get the function address
1110 movl PT_ORIG_EAX(%esp), %edx # get the error code 1234 movl PT_ORIG_EAX(%esp), %edx # get the error code
1111 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart 1235 movl $-1, PT_ORIG_EAX(%esp) # no syscall to restart
1112 mov %ecx, PT_FS(%esp) 1236 REG_TO_PTGS %ecx
1113 /*CFI_REL_OFFSET fs, ES*/ 1237 SET_KERNEL_GS %ecx
1114 movl $(__USER_DS), %ecx 1238 movl $(__USER_DS), %ecx
1115 movl %ecx, %ds 1239 movl %ecx, %ds
1116 movl %ecx, %es 1240 movl %ecx, %es
@@ -1134,26 +1258,27 @@ END(page_fault)
1134 * by hand onto the new stack - while updating the return eip past 1258 * by hand onto the new stack - while updating the return eip past
1135 * the instruction that would have done it for sysenter. 1259 * the instruction that would have done it for sysenter.
1136 */ 1260 */
1137#define FIX_STACK(offset, ok, label) \ 1261.macro FIX_STACK offset ok label
1138 cmpw $__KERNEL_CS,4(%esp); \ 1262 cmpw $__KERNEL_CS, 4(%esp)
1139 jne ok; \ 1263 jne \ok
1140label: \ 1264\label:
1141 movl TSS_sysenter_sp0+offset(%esp),%esp; \ 1265 movl TSS_sysenter_sp0 + \offset(%esp), %esp
1142 CFI_DEF_CFA esp, 0; \ 1266 CFI_DEF_CFA esp, 0
1143 CFI_UNDEFINED eip; \ 1267 CFI_UNDEFINED eip
1144 pushfl; \ 1268 pushfl
1145 CFI_ADJUST_CFA_OFFSET 4; \ 1269 CFI_ADJUST_CFA_OFFSET 4
1146 pushl $__KERNEL_CS; \ 1270 pushl $__KERNEL_CS
1147 CFI_ADJUST_CFA_OFFSET 4; \ 1271 CFI_ADJUST_CFA_OFFSET 4
1148 pushl $sysenter_past_esp; \ 1272 pushl $sysenter_past_esp
1149 CFI_ADJUST_CFA_OFFSET 4; \ 1273 CFI_ADJUST_CFA_OFFSET 4
1150 CFI_REL_OFFSET eip, 0 1274 CFI_REL_OFFSET eip, 0
1275.endm
1151 1276
1152ENTRY(debug) 1277ENTRY(debug)
1153 RING0_INT_FRAME 1278 RING0_INT_FRAME
1154 cmpl $ia32_sysenter_target,(%esp) 1279 cmpl $ia32_sysenter_target,(%esp)
1155 jne debug_stack_correct 1280 jne debug_stack_correct
1156 FIX_STACK(12, debug_stack_correct, debug_esp_fix_insn) 1281 FIX_STACK 12, debug_stack_correct, debug_esp_fix_insn
1157debug_stack_correct: 1282debug_stack_correct:
1158 pushl $-1 # mark this as an int 1283 pushl $-1 # mark this as an int
1159 CFI_ADJUST_CFA_OFFSET 4 1284 CFI_ADJUST_CFA_OFFSET 4
@@ -1211,7 +1336,7 @@ nmi_stack_correct:
1211 1336
1212nmi_stack_fixup: 1337nmi_stack_fixup:
1213 RING0_INT_FRAME 1338 RING0_INT_FRAME
1214 FIX_STACK(12,nmi_stack_correct, 1) 1339 FIX_STACK 12, nmi_stack_correct, 1
1215 jmp nmi_stack_correct 1340 jmp nmi_stack_correct
1216 1341
1217nmi_debug_stack_check: 1342nmi_debug_stack_check:
@@ -1222,7 +1347,7 @@ nmi_debug_stack_check:
1222 jb nmi_stack_correct 1347 jb nmi_stack_correct
1223 cmpl $debug_esp_fix_insn,(%esp) 1348 cmpl $debug_esp_fix_insn,(%esp)
1224 ja nmi_stack_correct 1349 ja nmi_stack_correct
1225 FIX_STACK(24,nmi_stack_correct, 1) 1350 FIX_STACK 24, nmi_stack_correct, 1
1226 jmp nmi_stack_correct 1351 jmp nmi_stack_correct
1227 1352
1228nmi_espfix_stack: 1353nmi_espfix_stack:
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index a1346217e43c..fbcf96b295ff 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -52,6 +52,7 @@
52#include <asm/irqflags.h> 52#include <asm/irqflags.h>
53#include <asm/paravirt.h> 53#include <asm/paravirt.h>
54#include <asm/ftrace.h> 54#include <asm/ftrace.h>
55#include <asm/percpu.h>
55 56
56/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 57/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
57#include <linux/elf-em.h> 58#include <linux/elf-em.h>
@@ -209,7 +210,7 @@ ENTRY(native_usergs_sysret64)
209 210
210 /* %rsp:at FRAMEEND */ 211 /* %rsp:at FRAMEEND */
211 .macro FIXUP_TOP_OF_STACK tmp offset=0 212 .macro FIXUP_TOP_OF_STACK tmp offset=0
212 movq %gs:pda_oldrsp,\tmp 213 movq PER_CPU_VAR(old_rsp),\tmp
213 movq \tmp,RSP+\offset(%rsp) 214 movq \tmp,RSP+\offset(%rsp)
214 movq $__USER_DS,SS+\offset(%rsp) 215 movq $__USER_DS,SS+\offset(%rsp)
215 movq $__USER_CS,CS+\offset(%rsp) 216 movq $__USER_CS,CS+\offset(%rsp)
@@ -220,7 +221,7 @@ ENTRY(native_usergs_sysret64)
220 221
221 .macro RESTORE_TOP_OF_STACK tmp offset=0 222 .macro RESTORE_TOP_OF_STACK tmp offset=0
222 movq RSP+\offset(%rsp),\tmp 223 movq RSP+\offset(%rsp),\tmp
223 movq \tmp,%gs:pda_oldrsp 224 movq \tmp,PER_CPU_VAR(old_rsp)
224 movq EFLAGS+\offset(%rsp),\tmp 225 movq EFLAGS+\offset(%rsp),\tmp
225 movq \tmp,R11+\offset(%rsp) 226 movq \tmp,R11+\offset(%rsp)
226 .endm 227 .endm
@@ -336,15 +337,15 @@ ENTRY(save_args)
336 je 1f 337 je 1f
337 SWAPGS 338 SWAPGS
338 /* 339 /*
339 * irqcount is used to check if a CPU is already on an interrupt stack 340 * irq_count is used to check if a CPU is already on an interrupt stack
340 * or not. While this is essentially redundant with preempt_count it is 341 * or not. While this is essentially redundant with preempt_count it is
341 * a little cheaper to use a separate counter in the PDA (short of 342 * a little cheaper to use a separate counter in the PDA (short of
342 * moving irq_enter into assembly, which would be too much work) 343 * moving irq_enter into assembly, which would be too much work)
343 */ 344 */
3441: incl %gs:pda_irqcount 3451: incl PER_CPU_VAR(irq_count)
345 jne 2f 346 jne 2f
346 popq_cfi %rax /* move return address... */ 347 popq_cfi %rax /* move return address... */
347 mov %gs:pda_irqstackptr,%rsp 348 mov PER_CPU_VAR(irq_stack_ptr),%rsp
348 EMPTY_FRAME 0 349 EMPTY_FRAME 0
349 pushq_cfi %rbp /* backlink for unwinder */ 350 pushq_cfi %rbp /* backlink for unwinder */
350 pushq_cfi %rax /* ... to the new stack */ 351 pushq_cfi %rax /* ... to the new stack */
@@ -409,6 +410,8 @@ END(save_paranoid)
409ENTRY(ret_from_fork) 410ENTRY(ret_from_fork)
410 DEFAULT_FRAME 411 DEFAULT_FRAME
411 412
413 LOCK ; btr $TIF_FORK,TI_flags(%r8)
414
412 push kernel_eflags(%rip) 415 push kernel_eflags(%rip)
413 CFI_ADJUST_CFA_OFFSET 8 416 CFI_ADJUST_CFA_OFFSET 8
414 popf # reset kernel eflags 417 popf # reset kernel eflags
@@ -468,7 +471,7 @@ END(ret_from_fork)
468ENTRY(system_call) 471ENTRY(system_call)
469 CFI_STARTPROC simple 472 CFI_STARTPROC simple
470 CFI_SIGNAL_FRAME 473 CFI_SIGNAL_FRAME
471 CFI_DEF_CFA rsp,PDA_STACKOFFSET 474 CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
472 CFI_REGISTER rip,rcx 475 CFI_REGISTER rip,rcx
473 /*CFI_REGISTER rflags,r11*/ 476 /*CFI_REGISTER rflags,r11*/
474 SWAPGS_UNSAFE_STACK 477 SWAPGS_UNSAFE_STACK
@@ -479,8 +482,8 @@ ENTRY(system_call)
479 */ 482 */
480ENTRY(system_call_after_swapgs) 483ENTRY(system_call_after_swapgs)
481 484
482 movq %rsp,%gs:pda_oldrsp 485 movq %rsp,PER_CPU_VAR(old_rsp)
483 movq %gs:pda_kernelstack,%rsp 486 movq PER_CPU_VAR(kernel_stack),%rsp
484 /* 487 /*
485 * No need to follow this irqs off/on section - it's straight 488 * No need to follow this irqs off/on section - it's straight
486 * and short: 489 * and short:
@@ -523,7 +526,7 @@ sysret_check:
523 CFI_REGISTER rip,rcx 526 CFI_REGISTER rip,rcx
524 RESTORE_ARGS 0,-ARG_SKIP,1 527 RESTORE_ARGS 0,-ARG_SKIP,1
525 /*CFI_REGISTER rflags,r11*/ 528 /*CFI_REGISTER rflags,r11*/
526 movq %gs:pda_oldrsp, %rsp 529 movq PER_CPU_VAR(old_rsp), %rsp
527 USERGS_SYSRET64 530 USERGS_SYSRET64
528 531
529 CFI_RESTORE_STATE 532 CFI_RESTORE_STATE
@@ -833,11 +836,11 @@ common_interrupt:
833 XCPT_FRAME 836 XCPT_FRAME
834 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ 837 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
835 interrupt do_IRQ 838 interrupt do_IRQ
836 /* 0(%rsp): oldrsp-ARGOFFSET */ 839 /* 0(%rsp): old_rsp-ARGOFFSET */
837ret_from_intr: 840ret_from_intr:
838 DISABLE_INTERRUPTS(CLBR_NONE) 841 DISABLE_INTERRUPTS(CLBR_NONE)
839 TRACE_IRQS_OFF 842 TRACE_IRQS_OFF
840 decl %gs:pda_irqcount 843 decl PER_CPU_VAR(irq_count)
841 leaveq 844 leaveq
842 CFI_DEF_CFA_REGISTER rsp 845 CFI_DEF_CFA_REGISTER rsp
843 CFI_ADJUST_CFA_OFFSET -8 846 CFI_ADJUST_CFA_OFFSET -8
@@ -982,8 +985,10 @@ apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
982 irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt 985 irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
983#endif 986#endif
984 987
988#ifdef CONFIG_X86_UV
985apicinterrupt UV_BAU_MESSAGE \ 989apicinterrupt UV_BAU_MESSAGE \
986 uv_bau_message_intr1 uv_bau_message_interrupt 990 uv_bau_message_intr1 uv_bau_message_interrupt
991#endif
987apicinterrupt LOCAL_TIMER_VECTOR \ 992apicinterrupt LOCAL_TIMER_VECTOR \
988 apic_timer_interrupt smp_apic_timer_interrupt 993 apic_timer_interrupt smp_apic_timer_interrupt
989 994
@@ -1073,10 +1078,10 @@ ENTRY(\sym)
1073 TRACE_IRQS_OFF 1078 TRACE_IRQS_OFF
1074 movq %rsp,%rdi /* pt_regs pointer */ 1079 movq %rsp,%rdi /* pt_regs pointer */
1075 xorl %esi,%esi /* no error code */ 1080 xorl %esi,%esi /* no error code */
1076 movq %gs:pda_data_offset, %rbp 1081 PER_CPU(init_tss, %rbp)
1077 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) 1082 subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
1078 call \do_sym 1083 call \do_sym
1079 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) 1084 addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
1080 jmp paranoid_exit /* %ebx: no swapgs flag */ 1085 jmp paranoid_exit /* %ebx: no swapgs flag */
1081 CFI_ENDPROC 1086 CFI_ENDPROC
1082END(\sym) 1087END(\sym)
@@ -1138,7 +1143,7 @@ ENTRY(native_load_gs_index)
1138 CFI_STARTPROC 1143 CFI_STARTPROC
1139 pushf 1144 pushf
1140 CFI_ADJUST_CFA_OFFSET 8 1145 CFI_ADJUST_CFA_OFFSET 8
1141 DISABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) 1146 DISABLE_INTERRUPTS(CLBR_ANY & ~CLBR_RDI)
1142 SWAPGS 1147 SWAPGS
1143gs_change: 1148gs_change:
1144 movl %edi,%gs 1149 movl %edi,%gs
@@ -1260,14 +1265,14 @@ ENTRY(call_softirq)
1260 CFI_REL_OFFSET rbp,0 1265 CFI_REL_OFFSET rbp,0
1261 mov %rsp,%rbp 1266 mov %rsp,%rbp
1262 CFI_DEF_CFA_REGISTER rbp 1267 CFI_DEF_CFA_REGISTER rbp
1263 incl %gs:pda_irqcount 1268 incl PER_CPU_VAR(irq_count)
1264 cmove %gs:pda_irqstackptr,%rsp 1269 cmove PER_CPU_VAR(irq_stack_ptr),%rsp
1265 push %rbp # backlink for old unwinder 1270 push %rbp # backlink for old unwinder
1266 call __do_softirq 1271 call __do_softirq
1267 leaveq 1272 leaveq
1268 CFI_DEF_CFA_REGISTER rsp 1273 CFI_DEF_CFA_REGISTER rsp
1269 CFI_ADJUST_CFA_OFFSET -8 1274 CFI_ADJUST_CFA_OFFSET -8
1270 decl %gs:pda_irqcount 1275 decl PER_CPU_VAR(irq_count)
1271 ret 1276 ret
1272 CFI_ENDPROC 1277 CFI_ENDPROC
1273END(call_softirq) 1278END(call_softirq)
@@ -1297,15 +1302,15 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1297 movq %rdi, %rsp # we don't return, adjust the stack frame 1302 movq %rdi, %rsp # we don't return, adjust the stack frame
1298 CFI_ENDPROC 1303 CFI_ENDPROC
1299 DEFAULT_FRAME 1304 DEFAULT_FRAME
130011: incl %gs:pda_irqcount 130511: incl PER_CPU_VAR(irq_count)
1301 movq %rsp,%rbp 1306 movq %rsp,%rbp
1302 CFI_DEF_CFA_REGISTER rbp 1307 CFI_DEF_CFA_REGISTER rbp
1303 cmovzq %gs:pda_irqstackptr,%rsp 1308 cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
1304 pushq %rbp # backlink for old unwinder 1309 pushq %rbp # backlink for old unwinder
1305 call xen_evtchn_do_upcall 1310 call xen_evtchn_do_upcall
1306 popq %rsp 1311 popq %rsp
1307 CFI_DEF_CFA_REGISTER rsp 1312 CFI_DEF_CFA_REGISTER rsp
1308 decl %gs:pda_irqcount 1313 decl PER_CPU_VAR(irq_count)
1309 jmp error_exit 1314 jmp error_exit
1310 CFI_ENDPROC 1315 CFI_ENDPROC
1311END(do_hypervisor_callback) 1316END(do_hypervisor_callback)
diff --git a/arch/x86/kernel/es7000_32.c b/arch/x86/kernel/es7000_32.c
index 53699c931ad4..55515d73d9c2 100644
--- a/arch/x86/kernel/es7000_32.c
+++ b/arch/x86/kernel/es7000_32.c
@@ -40,7 +40,6 @@
40#include <asm/smp.h> 40#include <asm/smp.h>
41#include <asm/atomic.h> 41#include <asm/atomic.h>
42#include <asm/apicdef.h> 42#include <asm/apicdef.h>
43#include <mach_mpparse.h>
44#include <asm/genapic.h> 43#include <asm/genapic.h>
45#include <asm/setup.h> 44#include <asm/setup.h>
46 45
@@ -182,20 +181,16 @@ static int wakeup_secondary_cpu_via_mip(int cpu, unsigned long eip)
182 return 0; 181 return 0;
183} 182}
184 183
185static void noop_wait_for_deassert(atomic_t *deassert_not_used)
186{
187}
188
189static int __init es7000_update_genapic(void) 184static int __init es7000_update_genapic(void)
190{ 185{
191 genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip; 186 apic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
192 187
193 /* MPENTIUMIII */ 188 /* MPENTIUMIII */
194 if (boot_cpu_data.x86 == 6 && 189 if (boot_cpu_data.x86 == 6 &&
195 (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) { 190 (boot_cpu_data.x86_model >= 7 || boot_cpu_data.x86_model <= 11)) {
196 es7000_update_genapic_to_cluster(); 191 es7000_update_genapic_to_cluster();
197 genapic->wait_for_init_deassert = noop_wait_for_deassert; 192 apic->wait_for_init_deassert = NULL;
198 genapic->wakeup_cpu = wakeup_secondary_cpu_via_mip; 193 apic->wakeup_cpu = wakeup_secondary_cpu_via_mip;
199 } 194 }
200 195
201 return 0; 196 return 0;
@@ -292,24 +287,31 @@ int __init find_unisys_acpi_oem_table(unsigned long *oem_addr)
292{ 287{
293 struct acpi_table_header *header = NULL; 288 struct acpi_table_header *header = NULL;
294 int i = 0; 289 int i = 0;
290 acpi_size tbl_size;
295 291
296 while (ACPI_SUCCESS(acpi_get_table("OEM1", i++, &header))) { 292 while (ACPI_SUCCESS(acpi_get_table_with_size("OEM1", i++, &header, &tbl_size))) {
297 if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) { 293 if (!memcmp((char *) &header->oem_id, "UNISYS", 6)) {
298 struct oem_table *t = (struct oem_table *)header; 294 struct oem_table *t = (struct oem_table *)header;
299 295
300 oem_addrX = t->OEMTableAddr; 296 oem_addrX = t->OEMTableAddr;
301 oem_size = t->OEMTableSize; 297 oem_size = t->OEMTableSize;
298 early_acpi_os_unmap_memory(header, tbl_size);
302 299
303 *oem_addr = (unsigned long)__acpi_map_table(oem_addrX, 300 *oem_addr = (unsigned long)__acpi_map_table(oem_addrX,
304 oem_size); 301 oem_size);
305 return 0; 302 return 0;
306 } 303 }
304 early_acpi_os_unmap_memory(header, tbl_size);
307 } 305 }
308 return -1; 306 return -1;
309} 307}
310 308
311void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr) 309void __init unmap_unisys_acpi_oem_table(unsigned long oem_addr)
312{ 310{
311 if (!oem_addr)
312 return;
313
314 __acpi_unmap_table((char *)oem_addr, oem_size);
313} 315}
314#endif 316#endif
315 317
@@ -359,20 +361,449 @@ es7000_mip_write(struct mip_reg *mip_reg)
359 return status; 361 return status;
360} 362}
361 363
362void __init 364void __init es7000_enable_apic_mode(void)
363es7000_sw_apic(void) 365{
364{ 366 struct mip_reg es7000_mip_reg;
365 if (es7000_plat) { 367 int mip_status;
366 int mip_status; 368
367 struct mip_reg es7000_mip_reg; 369 if (!es7000_plat)
368
369 printk("ES7000: Enabling APIC mode.\n");
370 memset(&es7000_mip_reg, 0, sizeof(struct mip_reg));
371 es7000_mip_reg.off_0 = MIP_SW_APIC;
372 es7000_mip_reg.off_38 = (MIP_VALID);
373 while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0)
374 printk("es7000_sw_apic: command failed, status = %x\n",
375 mip_status);
376 return; 370 return;
371
372 printk("ES7000: Enabling APIC mode.\n");
373 memset(&es7000_mip_reg, 0, sizeof(struct mip_reg));
374 es7000_mip_reg.off_0 = MIP_SW_APIC;
375 es7000_mip_reg.off_38 = MIP_VALID;
376
377 while ((mip_status = es7000_mip_write(&es7000_mip_reg)) != 0) {
378 printk("es7000_enable_apic_mode: command failed, status = %x\n",
379 mip_status);
380 }
381}
382
383/*
384 * APIC driver for the Unisys ES7000 chipset.
385 */
386#define APIC_DEFINITION 1
387#include <linux/threads.h>
388#include <linux/cpumask.h>
389#include <asm/mpspec.h>
390#include <asm/genapic.h>
391#include <asm/fixmap.h>
392#include <asm/apicdef.h>
393#include <linux/kernel.h>
394#include <linux/string.h>
395#include <linux/init.h>
396#include <linux/acpi.h>
397#include <linux/smp.h>
398#include <asm/ipi.h>
399
400#define APIC_DFR_VALUE_CLUSTER (APIC_DFR_CLUSTER)
401#define INT_DELIVERY_MODE_CLUSTER (dest_LowestPrio)
402#define INT_DEST_MODE_CLUSTER (1) /* logical delivery broadcast to all procs */
403
404#define APIC_DFR_VALUE (APIC_DFR_FLAT)
405
406extern void es7000_enable_apic_mode(void);
407extern int apic_version [MAX_APICS];
408extern u8 cpu_2_logical_apicid[];
409extern unsigned int boot_cpu_physical_apicid;
410
411extern int parse_unisys_oem (char *oemptr);
412extern int find_unisys_acpi_oem_table(unsigned long *oem_addr);
413extern void unmap_unisys_acpi_oem_table(unsigned long oem_addr);
414extern void setup_unisys(void);
415
416#define apicid_cluster(apicid) (apicid & 0xF0)
417#define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu)
418
419static void es7000_vector_allocation_domain(int cpu, cpumask_t *retmask)
420{
421 /* Careful. Some cpus do not strictly honor the set of cpus
422 * specified in the interrupt destination when using lowest
423 * priority interrupt delivery mode.
424 *
425 * In particular there was a hyperthreading cpu observed to
426 * deliver interrupts to the wrong hyperthread when only one
427 * hyperthread was specified in the interrupt desitination.
428 */
429 *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
430}
431
432
433static void es7000_wait_for_init_deassert(atomic_t *deassert)
434{
435#ifndef CONFIG_ES7000_CLUSTERED_APIC
436 while (!atomic_read(deassert))
437 cpu_relax();
438#endif
439 return;
440}
441
442static unsigned int es7000_get_apic_id(unsigned long x)
443{
444 return (x >> 24) & 0xFF;
445}
446
447#ifdef CONFIG_ACPI
448static int es7000_check_dsdt(void)
449{
450 struct acpi_table_header header;
451
452 if (ACPI_SUCCESS(acpi_get_table_header(ACPI_SIG_DSDT, 0, &header)) &&
453 !strncmp(header.oem_id, "UNISYS", 6))
454 return 1;
455 return 0;
456}
457#endif
458
459static void es7000_send_IPI_mask(const struct cpumask *mask, int vector)
460{
461 default_send_IPI_mask_sequence_phys(mask, vector);
462}
463
464static void es7000_send_IPI_allbutself(int vector)
465{
466 default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
467}
468
469static void es7000_send_IPI_all(int vector)
470{
471 es7000_send_IPI_mask(cpu_online_mask, vector);
472}
473
474static int es7000_apic_id_registered(void)
475{
476 return 1;
477}
478
479static const cpumask_t *target_cpus_cluster(void)
480{
481 return &CPU_MASK_ALL;
482}
483
484static const cpumask_t *es7000_target_cpus(void)
485{
486 return &cpumask_of_cpu(smp_processor_id());
487}
488
489static unsigned long
490es7000_check_apicid_used(physid_mask_t bitmap, int apicid)
491{
492 return 0;
493}
494static unsigned long es7000_check_apicid_present(int bit)
495{
496 return physid_isset(bit, phys_cpu_present_map);
497}
498
499static unsigned long calculate_ldr(int cpu)
500{
501 unsigned long id = xapic_phys_to_log_apicid(cpu);
502
503 return (SET_APIC_LOGICAL_ID(id));
504}
505
506/*
507 * Set up the logical destination ID.
508 *
509 * Intel recommends to set DFR, LdR and TPR before enabling
510 * an APIC. See e.g. "AP-388 82489DX User's Manual" (Intel
511 * document number 292116). So here it goes...
512 */
513static void es7000_init_apic_ldr_cluster(void)
514{
515 unsigned long val;
516 int cpu = smp_processor_id();
517
518 apic_write(APIC_DFR, APIC_DFR_VALUE_CLUSTER);
519 val = calculate_ldr(cpu);
520 apic_write(APIC_LDR, val);
521}
522
523static void es7000_init_apic_ldr(void)
524{
525 unsigned long val;
526 int cpu = smp_processor_id();
527
528 apic_write(APIC_DFR, APIC_DFR_VALUE);
529 val = calculate_ldr(cpu);
530 apic_write(APIC_LDR, val);
531}
532
533static void es7000_setup_apic_routing(void)
534{
535 int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
536 printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
537 (apic_version[apic] == 0x14) ?
538 "Physical Cluster" : "Logical Cluster",
539 nr_ioapics, cpus_addr(*es7000_target_cpus())[0]);
540}
541
542static int es7000_apicid_to_node(int logical_apicid)
543{
544 return 0;
545}
546
547
548static int es7000_cpu_present_to_apicid(int mps_cpu)
549{
550 if (!mps_cpu)
551 return boot_cpu_physical_apicid;
552 else if (mps_cpu < nr_cpu_ids)
553 return (int) per_cpu(x86_bios_cpu_apicid, mps_cpu);
554 else
555 return BAD_APICID;
556}
557
558static physid_mask_t es7000_apicid_to_cpu_present(int phys_apicid)
559{
560 static int id = 0;
561 physid_mask_t mask;
562
563 mask = physid_mask_of_physid(id);
564 ++id;
565
566 return mask;
567}
568
569/* Mapping from cpu number to logical apicid */
570static int es7000_cpu_to_logical_apicid(int cpu)
571{
572#ifdef CONFIG_SMP
573 if (cpu >= nr_cpu_ids)
574 return BAD_APICID;
575 return (int)cpu_2_logical_apicid[cpu];
576#else
577 return logical_smp_processor_id();
578#endif
579}
580
581static physid_mask_t es7000_ioapic_phys_id_map(physid_mask_t phys_map)
582{
583 /* For clustered we don't have a good way to do this yet - hack */
584 return physids_promote(0xff);
585}
586
587static int es7000_check_phys_apicid_present(int cpu_physical_apicid)
588{
589 boot_cpu_physical_apicid = read_apic_id();
590 return (1);
591}
592
593static unsigned int
594es7000_cpu_mask_to_apicid_cluster(const struct cpumask *cpumask)
595{
596 int cpus_found = 0;
597 int num_bits_set;
598 int apicid;
599 int cpu;
600
601 num_bits_set = cpumask_weight(cpumask);
602 /* Return id to all */
603 if (num_bits_set == nr_cpu_ids)
604 return 0xFF;
605 /*
606 * The cpus in the mask must all be on the apic cluster. If are not
607 * on the same apicid cluster return default value of target_cpus():
608 */
609 cpu = cpumask_first(cpumask);
610 apicid = es7000_cpu_to_logical_apicid(cpu);
611
612 while (cpus_found < num_bits_set) {
613 if (cpumask_test_cpu(cpu, cpumask)) {
614 int new_apicid = es7000_cpu_to_logical_apicid(cpu);
615
616 if (apicid_cluster(apicid) !=
617 apicid_cluster(new_apicid)) {
618 printk ("%s: Not a valid mask!\n", __func__);
619
620 return 0xFF;
621 }
622 apicid = new_apicid;
623 cpus_found++;
624 }
625 cpu++;
626 }
627 return apicid;
628}
629
630static unsigned int es7000_cpu_mask_to_apicid(const cpumask_t *cpumask)
631{
632 int cpus_found = 0;
633 int num_bits_set;
634 int apicid;
635 int cpu;
636
637 num_bits_set = cpus_weight(*cpumask);
638 /* Return id to all */
639 if (num_bits_set == nr_cpu_ids)
640 return es7000_cpu_to_logical_apicid(0);
641 /*
642 * The cpus in the mask must all be on the apic cluster. If are not
643 * on the same apicid cluster return default value of target_cpus():
644 */
645 cpu = first_cpu(*cpumask);
646 apicid = es7000_cpu_to_logical_apicid(cpu);
647 while (cpus_found < num_bits_set) {
648 if (cpu_isset(cpu, *cpumask)) {
649 int new_apicid = es7000_cpu_to_logical_apicid(cpu);
650
651 if (apicid_cluster(apicid) !=
652 apicid_cluster(new_apicid)) {
653 printk ("%s: Not a valid mask!\n", __func__);
654
655 return es7000_cpu_to_logical_apicid(0);
656 }
657 apicid = new_apicid;
658 cpus_found++;
659 }
660 cpu++;
661 }
662 return apicid;
663}
664
665static unsigned int
666es7000_cpu_mask_to_apicid_and(const struct cpumask *inmask,
667 const struct cpumask *andmask)
668{
669 int apicid = es7000_cpu_to_logical_apicid(0);
670 cpumask_var_t cpumask;
671
672 if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
673 return apicid;
674
675 cpumask_and(cpumask, inmask, andmask);
676 cpumask_and(cpumask, cpumask, cpu_online_mask);
677 apicid = es7000_cpu_mask_to_apicid(cpumask);
678
679 free_cpumask_var(cpumask);
680
681 return apicid;
682}
683
684static int es7000_phys_pkg_id(int cpuid_apic, int index_msb)
685{
686 return cpuid_apic >> index_msb;
687}
688
689void __init es7000_update_genapic_to_cluster(void)
690{
691 apic->target_cpus = target_cpus_cluster;
692 apic->irq_delivery_mode = INT_DELIVERY_MODE_CLUSTER;
693 apic->irq_dest_mode = INT_DEST_MODE_CLUSTER;
694
695 apic->init_apic_ldr = es7000_init_apic_ldr_cluster;
696
697 apic->cpu_mask_to_apicid = es7000_cpu_mask_to_apicid_cluster;
698}
699
700static int probe_es7000(void)
701{
702 /* probed later in mptable/ACPI hooks */
703 return 0;
704}
705
706static __init int
707es7000_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
708{
709 if (mpc->oemptr) {
710 struct mpc_oemtable *oem_table =
711 (struct mpc_oemtable *)mpc->oemptr;
712
713 if (!strncmp(oem, "UNISYS", 6))
714 return parse_unisys_oem((char *)oem_table);
715 }
716 return 0;
717}
718
719#ifdef CONFIG_ACPI
720/* Hook from generic ACPI tables.c */
721static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
722{
723 unsigned long oem_addr = 0;
724 int check_dsdt;
725 int ret = 0;
726
727 /* check dsdt at first to avoid clear fix_map for oem_addr */
728 check_dsdt = es7000_check_dsdt();
729
730 if (!find_unisys_acpi_oem_table(&oem_addr)) {
731 if (check_dsdt)
732 ret = parse_unisys_oem((char *)oem_addr);
733 else {
734 setup_unisys();
735 ret = 1;
736 }
737 /*
738 * we need to unmap it
739 */
740 unmap_unisys_acpi_oem_table(oem_addr);
377 } 741 }
742 return ret;
743}
744#else
745static int __init es7000_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
746{
747 return 0;
378} 748}
749#endif
750
751
752struct genapic apic_es7000 = {
753
754 .name = "es7000",
755 .probe = probe_es7000,
756 .acpi_madt_oem_check = es7000_acpi_madt_oem_check,
757 .apic_id_registered = es7000_apic_id_registered,
758
759 .irq_delivery_mode = dest_Fixed,
760 /* phys delivery to target CPUs: */
761 .irq_dest_mode = 0,
762
763 .target_cpus = es7000_target_cpus,
764 .disable_esr = 1,
765 .dest_logical = 0,
766 .check_apicid_used = es7000_check_apicid_used,
767 .check_apicid_present = es7000_check_apicid_present,
768
769 .vector_allocation_domain = es7000_vector_allocation_domain,
770 .init_apic_ldr = es7000_init_apic_ldr,
771
772 .ioapic_phys_id_map = es7000_ioapic_phys_id_map,
773 .setup_apic_routing = es7000_setup_apic_routing,
774 .multi_timer_check = NULL,
775 .apicid_to_node = es7000_apicid_to_node,
776 .cpu_to_logical_apicid = es7000_cpu_to_logical_apicid,
777 .cpu_present_to_apicid = es7000_cpu_present_to_apicid,
778 .apicid_to_cpu_present = es7000_apicid_to_cpu_present,
779 .setup_portio_remap = NULL,
780 .check_phys_apicid_present = es7000_check_phys_apicid_present,
781 .enable_apic_mode = es7000_enable_apic_mode,
782 .phys_pkg_id = es7000_phys_pkg_id,
783 .mps_oem_check = es7000_mps_oem_check,
784
785 .get_apic_id = es7000_get_apic_id,
786 .set_apic_id = NULL,
787 .apic_id_mask = 0xFF << 24,
788
789 .cpu_mask_to_apicid = es7000_cpu_mask_to_apicid,
790 .cpu_mask_to_apicid_and = es7000_cpu_mask_to_apicid_and,
791
792 .send_IPI_mask = es7000_send_IPI_mask,
793 .send_IPI_mask_allbutself = NULL,
794 .send_IPI_allbutself = es7000_send_IPI_allbutself,
795 .send_IPI_all = es7000_send_IPI_all,
796 .send_IPI_self = default_send_IPI_self,
797
798 .wakeup_cpu = NULL,
799
800 .trampoline_phys_low = 0x467,
801 .trampoline_phys_high = 0x469,
802
803 .wait_for_init_deassert = es7000_wait_for_init_deassert,
804
805 /* Nothing to do for most platforms, since cleared by the INIT cycle: */
806 .smp_callin_clear_local_apic = NULL,
807 .store_NMI_vector = NULL,
808 .inquire_remote_apic = default_inquire_remote_apic,
809};
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 1b43086b097a..231bdd3c5b1c 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -488,20 +488,21 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr)
488 * ignore such a protection. 488 * ignore such a protection.
489 */ 489 */
490 asm volatile( 490 asm volatile(
491 "1: " _ASM_MOV " (%[parent_old]), %[old]\n" 491 "1: " _ASM_MOV " (%[parent]), %[old]\n"
492 "2: " _ASM_MOV " %[return_hooker], (%[parent_replaced])\n" 492 "2: " _ASM_MOV " %[return_hooker], (%[parent])\n"
493 " movl $0, %[faulted]\n" 493 " movl $0, %[faulted]\n"
494 "3:\n"
494 495
495 ".section .fixup, \"ax\"\n" 496 ".section .fixup, \"ax\"\n"
496 "3: movl $1, %[faulted]\n" 497 "4: movl $1, %[faulted]\n"
498 " jmp 3b\n"
497 ".previous\n" 499 ".previous\n"
498 500
499 _ASM_EXTABLE(1b, 3b) 501 _ASM_EXTABLE(1b, 4b)
500 _ASM_EXTABLE(2b, 3b) 502 _ASM_EXTABLE(2b, 4b)
501 503
502 : [parent_replaced] "=r" (parent), [old] "=r" (old), 504 : [old] "=r" (old), [faulted] "=r" (faulted)
503 [faulted] "=r" (faulted) 505 : [parent] "r" (parent), [return_hooker] "r" (return_hooker)
504 : [parent_old] "0" (parent), [return_hooker] "r" (return_hooker)
505 : "memory" 506 : "memory"
506 ); 507 );
507 508
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 2bced78b0b8e..820dea5d0ebe 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -29,10 +29,12 @@ extern struct genapic apic_x2xpic_uv_x;
29extern struct genapic apic_x2apic_phys; 29extern struct genapic apic_x2apic_phys;
30extern struct genapic apic_x2apic_cluster; 30extern struct genapic apic_x2apic_cluster;
31 31
32struct genapic __read_mostly *genapic = &apic_flat; 32struct genapic __read_mostly *apic = &apic_flat;
33 33
34static struct genapic *apic_probe[] __initdata = { 34static struct genapic *apic_probe[] __initdata = {
35#ifdef CONFIG_X86_UV
35 &apic_x2apic_uv_x, 36 &apic_x2apic_uv_x,
37#endif
36 &apic_x2apic_phys, 38 &apic_x2apic_phys,
37 &apic_x2apic_cluster, 39 &apic_x2apic_cluster,
38 &apic_physflat, 40 &apic_physflat,
@@ -42,17 +44,17 @@ static struct genapic *apic_probe[] __initdata = {
42/* 44/*
43 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. 45 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
44 */ 46 */
45void __init setup_apic_routing(void) 47void __init default_setup_apic_routing(void)
46{ 48{
47 if (genapic == &apic_x2apic_phys || genapic == &apic_x2apic_cluster) { 49 if (apic == &apic_x2apic_phys || apic == &apic_x2apic_cluster) {
48 if (!intr_remapping_enabled) 50 if (!intr_remapping_enabled)
49 genapic = &apic_flat; 51 apic = &apic_flat;
50 } 52 }
51 53
52 if (genapic == &apic_flat) { 54 if (apic == &apic_flat) {
53 if (max_physical_apicid >= 8) 55 if (max_physical_apicid >= 8)
54 genapic = &apic_physflat; 56 apic = &apic_physflat;
55 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); 57 printk(KERN_INFO "Setting APIC routing to %s\n", apic->name);
56 } 58 }
57 59
58 if (x86_quirks->update_genapic) 60 if (x86_quirks->update_genapic)
@@ -63,18 +65,18 @@ void __init setup_apic_routing(void)
63 65
64void apic_send_IPI_self(int vector) 66void apic_send_IPI_self(int vector)
65{ 67{
66 __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); 68 __default_send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
67} 69}
68 70
69int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) 71int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
70{ 72{
71 int i; 73 int i;
72 74
73 for (i = 0; apic_probe[i]; ++i) { 75 for (i = 0; apic_probe[i]; ++i) {
74 if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) { 76 if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
75 genapic = apic_probe[i]; 77 apic = apic_probe[i];
76 printk(KERN_INFO "Setting APIC routing to %s.\n", 78 printk(KERN_INFO "Setting APIC routing to %s.\n",
77 genapic->name); 79 apic->name);
78 return 1; 80 return 1;
79 } 81 }
80 } 82 }
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 34185488e4fb..249d2d3c034c 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -19,7 +19,6 @@
19#include <asm/smp.h> 19#include <asm/smp.h>
20#include <asm/ipi.h> 20#include <asm/ipi.h>
21#include <asm/genapic.h> 21#include <asm/genapic.h>
22#include <mach_apicdef.h>
23 22
24#ifdef CONFIG_ACPI 23#ifdef CONFIG_ACPI
25#include <acpi/acpi_bus.h> 24#include <acpi/acpi_bus.h>
@@ -74,7 +73,7 @@ static inline void _flat_send_IPI_mask(unsigned long mask, int vector)
74 unsigned long flags; 73 unsigned long flags;
75 74
76 local_irq_save(flags); 75 local_irq_save(flags);
77 __send_IPI_dest_field(mask, vector, APIC_DEST_LOGICAL); 76 __default_send_IPI_dest_field(mask, vector, apic->dest_logical);
78 local_irq_restore(flags); 77 local_irq_restore(flags);
79} 78}
80 79
@@ -85,14 +84,15 @@ static void flat_send_IPI_mask(const struct cpumask *cpumask, int vector)
85 _flat_send_IPI_mask(mask, vector); 84 _flat_send_IPI_mask(mask, vector);
86} 85}
87 86
88static void flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, 87static void
89 int vector) 88 flat_send_IPI_mask_allbutself(const struct cpumask *cpumask, int vector)
90{ 89{
91 unsigned long mask = cpumask_bits(cpumask)[0]; 90 unsigned long mask = cpumask_bits(cpumask)[0];
92 int cpu = smp_processor_id(); 91 int cpu = smp_processor_id();
93 92
94 if (cpu < BITS_PER_LONG) 93 if (cpu < BITS_PER_LONG)
95 clear_bit(cpu, &mask); 94 clear_bit(cpu, &mask);
95
96 _flat_send_IPI_mask(mask, vector); 96 _flat_send_IPI_mask(mask, vector);
97} 97}
98 98
@@ -114,23 +114,27 @@ static void flat_send_IPI_allbutself(int vector)
114 _flat_send_IPI_mask(mask, vector); 114 _flat_send_IPI_mask(mask, vector);
115 } 115 }
116 } else if (num_online_cpus() > 1) { 116 } else if (num_online_cpus() > 1) {
117 __send_IPI_shortcut(APIC_DEST_ALLBUT, vector,APIC_DEST_LOGICAL); 117 __default_send_IPI_shortcut(APIC_DEST_ALLBUT,
118 vector, apic->dest_logical);
118 } 119 }
119} 120}
120 121
121static void flat_send_IPI_all(int vector) 122static void flat_send_IPI_all(int vector)
122{ 123{
123 if (vector == NMI_VECTOR) 124 if (vector == NMI_VECTOR) {
124 flat_send_IPI_mask(cpu_online_mask, vector); 125 flat_send_IPI_mask(cpu_online_mask, vector);
125 else 126 } else {
126 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); 127 __default_send_IPI_shortcut(APIC_DEST_ALLINC,
128 vector, apic->dest_logical);
129 }
127} 130}
128 131
129static unsigned int get_apic_id(unsigned long x) 132static unsigned int flat_get_apic_id(unsigned long x)
130{ 133{
131 unsigned int id; 134 unsigned int id;
132 135
133 id = (((x)>>24) & 0xFFu); 136 id = (((x)>>24) & 0xFFu);
137
134 return id; 138 return id;
135} 139}
136 140
@@ -146,7 +150,7 @@ static unsigned int read_xapic_id(void)
146{ 150{
147 unsigned int id; 151 unsigned int id;
148 152
149 id = get_apic_id(apic_read(APIC_ID)); 153 id = flat_get_apic_id(apic_read(APIC_ID));
150 return id; 154 return id;
151} 155}
152 156
@@ -169,31 +173,62 @@ static unsigned int flat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
169 return mask1 & mask2; 173 return mask1 & mask2;
170} 174}
171 175
172static unsigned int phys_pkg_id(int index_msb) 176static int flat_phys_pkg_id(int initial_apic_id, int index_msb)
173{ 177{
174 return hard_smp_processor_id() >> index_msb; 178 return hard_smp_processor_id() >> index_msb;
175} 179}
176 180
177struct genapic apic_flat = { 181struct genapic apic_flat = {
178 .name = "flat", 182 .name = "flat",
179 .acpi_madt_oem_check = flat_acpi_madt_oem_check, 183 .probe = NULL,
180 .int_delivery_mode = dest_LowestPrio, 184 .acpi_madt_oem_check = flat_acpi_madt_oem_check,
181 .int_dest_mode = (APIC_DEST_LOGICAL != 0), 185 .apic_id_registered = flat_apic_id_registered,
182 .target_cpus = flat_target_cpus, 186
183 .vector_allocation_domain = flat_vector_allocation_domain, 187 .irq_delivery_mode = dest_LowestPrio,
184 .apic_id_registered = flat_apic_id_registered, 188 .irq_dest_mode = 1, /* logical */
185 .init_apic_ldr = flat_init_apic_ldr, 189
186 .send_IPI_all = flat_send_IPI_all, 190 .target_cpus = flat_target_cpus,
187 .send_IPI_allbutself = flat_send_IPI_allbutself, 191 .disable_esr = 0,
188 .send_IPI_mask = flat_send_IPI_mask, 192 .dest_logical = APIC_DEST_LOGICAL,
189 .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself, 193 .check_apicid_used = NULL,
190 .send_IPI_self = apic_send_IPI_self, 194 .check_apicid_present = NULL,
191 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, 195
192 .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and, 196 .vector_allocation_domain = flat_vector_allocation_domain,
193 .phys_pkg_id = phys_pkg_id, 197 .init_apic_ldr = flat_init_apic_ldr,
194 .get_apic_id = get_apic_id, 198
195 .set_apic_id = set_apic_id, 199 .ioapic_phys_id_map = NULL,
196 .apic_id_mask = (0xFFu<<24), 200 .setup_apic_routing = NULL,
201 .multi_timer_check = NULL,
202 .apicid_to_node = NULL,
203 .cpu_to_logical_apicid = NULL,
204 .cpu_present_to_apicid = default_cpu_present_to_apicid,
205 .apicid_to_cpu_present = NULL,
206 .setup_portio_remap = NULL,
207 .check_phys_apicid_present = default_check_phys_apicid_present,
208 .enable_apic_mode = NULL,
209 .phys_pkg_id = flat_phys_pkg_id,
210 .mps_oem_check = NULL,
211
212 .get_apic_id = flat_get_apic_id,
213 .set_apic_id = set_apic_id,
214 .apic_id_mask = 0xFFu << 24,
215
216 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
217 .cpu_mask_to_apicid_and = flat_cpu_mask_to_apicid_and,
218
219 .send_IPI_mask = flat_send_IPI_mask,
220 .send_IPI_mask_allbutself = flat_send_IPI_mask_allbutself,
221 .send_IPI_allbutself = flat_send_IPI_allbutself,
222 .send_IPI_all = flat_send_IPI_all,
223 .send_IPI_self = apic_send_IPI_self,
224
225 .wakeup_cpu = NULL,
226 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
227 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
228 .wait_for_init_deassert = NULL,
229 .smp_callin_clear_local_apic = NULL,
230 .store_NMI_vector = NULL,
231 .inquire_remote_apic = NULL,
197}; 232};
198 233
199/* 234/*
@@ -232,18 +267,18 @@ static void physflat_vector_allocation_domain(int cpu, struct cpumask *retmask)
232 267
233static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector) 268static void physflat_send_IPI_mask(const struct cpumask *cpumask, int vector)
234{ 269{
235 send_IPI_mask_sequence(cpumask, vector); 270 default_send_IPI_mask_sequence_phys(cpumask, vector);
236} 271}
237 272
238static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask, 273static void physflat_send_IPI_mask_allbutself(const struct cpumask *cpumask,
239 int vector) 274 int vector)
240{ 275{
241 send_IPI_mask_allbutself(cpumask, vector); 276 default_send_IPI_mask_allbutself_phys(cpumask, vector);
242} 277}
243 278
244static void physflat_send_IPI_allbutself(int vector) 279static void physflat_send_IPI_allbutself(int vector)
245{ 280{
246 send_IPI_mask_allbutself(cpu_online_mask, vector); 281 default_send_IPI_mask_allbutself_phys(cpu_online_mask, vector);
247} 282}
248 283
249static void physflat_send_IPI_all(int vector) 284static void physflat_send_IPI_all(int vector)
@@ -276,32 +311,67 @@ physflat_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
276 * We're using fixed IRQ delivery, can only return one phys APIC ID. 311 * We're using fixed IRQ delivery, can only return one phys APIC ID.
277 * May as well be the first. 312 * May as well be the first.
278 */ 313 */
279 for_each_cpu_and(cpu, cpumask, andmask) 314 for_each_cpu_and(cpu, cpumask, andmask) {
280 if (cpumask_test_cpu(cpu, cpu_online_mask)) 315 if (cpumask_test_cpu(cpu, cpu_online_mask))
281 break; 316 break;
317 }
282 if (cpu < nr_cpu_ids) 318 if (cpu < nr_cpu_ids)
283 return per_cpu(x86_cpu_to_apicid, cpu); 319 return per_cpu(x86_cpu_to_apicid, cpu);
320
284 return BAD_APICID; 321 return BAD_APICID;
285} 322}
286 323
287struct genapic apic_physflat = { 324struct genapic apic_physflat = {
288 .name = "physical flat", 325
289 .acpi_madt_oem_check = physflat_acpi_madt_oem_check, 326 .name = "physical flat",
290 .int_delivery_mode = dest_Fixed, 327 .probe = NULL,
291 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 328 .acpi_madt_oem_check = physflat_acpi_madt_oem_check,
292 .target_cpus = physflat_target_cpus, 329 .apic_id_registered = flat_apic_id_registered,
293 .vector_allocation_domain = physflat_vector_allocation_domain, 330
294 .apic_id_registered = flat_apic_id_registered, 331 .irq_delivery_mode = dest_Fixed,
295 .init_apic_ldr = flat_init_apic_ldr,/*not needed, but shouldn't hurt*/ 332 .irq_dest_mode = 0, /* physical */
296 .send_IPI_all = physflat_send_IPI_all, 333
297 .send_IPI_allbutself = physflat_send_IPI_allbutself, 334 .target_cpus = physflat_target_cpus,
298 .send_IPI_mask = physflat_send_IPI_mask, 335 .disable_esr = 0,
299 .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself, 336 .dest_logical = 0,
300 .send_IPI_self = apic_send_IPI_self, 337 .check_apicid_used = NULL,
301 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, 338 .check_apicid_present = NULL,
302 .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and, 339
303 .phys_pkg_id = phys_pkg_id, 340 .vector_allocation_domain = physflat_vector_allocation_domain,
304 .get_apic_id = get_apic_id, 341 /* not needed, but shouldn't hurt: */
305 .set_apic_id = set_apic_id, 342 .init_apic_ldr = flat_init_apic_ldr,
306 .apic_id_mask = (0xFFu<<24), 343
344 .ioapic_phys_id_map = NULL,
345 .setup_apic_routing = NULL,
346 .multi_timer_check = NULL,
347 .apicid_to_node = NULL,
348 .cpu_to_logical_apicid = NULL,
349 .cpu_present_to_apicid = default_cpu_present_to_apicid,
350 .apicid_to_cpu_present = NULL,
351 .setup_portio_remap = NULL,
352 .check_phys_apicid_present = default_check_phys_apicid_present,
353 .enable_apic_mode = NULL,
354 .phys_pkg_id = flat_phys_pkg_id,
355 .mps_oem_check = NULL,
356
357 .get_apic_id = flat_get_apic_id,
358 .set_apic_id = set_apic_id,
359 .apic_id_mask = 0xFFu << 24,
360
361 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
362 .cpu_mask_to_apicid_and = physflat_cpu_mask_to_apicid_and,
363
364 .send_IPI_mask = physflat_send_IPI_mask,
365 .send_IPI_mask_allbutself = physflat_send_IPI_mask_allbutself,
366 .send_IPI_allbutself = physflat_send_IPI_allbutself,
367 .send_IPI_all = physflat_send_IPI_all,
368 .send_IPI_self = apic_send_IPI_self,
369
370 .wakeup_cpu = NULL,
371 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
372 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
373 .wait_for_init_deassert = NULL,
374 .smp_callin_clear_local_apic = NULL,
375 .store_NMI_vector = NULL,
376 .inquire_remote_apic = NULL,
307}; 377};
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
index 6ce497cc372d..7c87156b6411 100644
--- a/arch/x86/kernel/genx2apic_cluster.c
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -36,8 +36,8 @@ static void x2apic_vector_allocation_domain(int cpu, struct cpumask *retmask)
36 cpumask_set_cpu(cpu, retmask); 36 cpumask_set_cpu(cpu, retmask);
37} 37}
38 38
39static void __x2apic_send_IPI_dest(unsigned int apicid, int vector, 39static void
40 unsigned int dest) 40 __x2apic_send_IPI_dest(unsigned int apicid, int vector, unsigned int dest)
41{ 41{
42 unsigned long cfg; 42 unsigned long cfg;
43 43
@@ -57,45 +57,50 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
57 */ 57 */
58static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) 58static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
59{ 59{
60 unsigned long flags;
61 unsigned long query_cpu; 60 unsigned long query_cpu;
61 unsigned long flags;
62 62
63 local_irq_save(flags); 63 local_irq_save(flags);
64 for_each_cpu(query_cpu, mask) 64 for_each_cpu(query_cpu, mask) {
65 __x2apic_send_IPI_dest( 65 __x2apic_send_IPI_dest(
66 per_cpu(x86_cpu_to_logical_apicid, query_cpu), 66 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
67 vector, APIC_DEST_LOGICAL); 67 vector, apic->dest_logical);
68 }
68 local_irq_restore(flags); 69 local_irq_restore(flags);
69} 70}
70 71
71static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, 72static void
72 int vector) 73 x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
73{ 74{
74 unsigned long flags;
75 unsigned long query_cpu;
76 unsigned long this_cpu = smp_processor_id(); 75 unsigned long this_cpu = smp_processor_id();
76 unsigned long query_cpu;
77 unsigned long flags;
77 78
78 local_irq_save(flags); 79 local_irq_save(flags);
79 for_each_cpu(query_cpu, mask) 80 for_each_cpu(query_cpu, mask) {
80 if (query_cpu != this_cpu) 81 if (query_cpu == this_cpu)
81 __x2apic_send_IPI_dest( 82 continue;
83 __x2apic_send_IPI_dest(
82 per_cpu(x86_cpu_to_logical_apicid, query_cpu), 84 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
83 vector, APIC_DEST_LOGICAL); 85 vector, apic->dest_logical);
86 }
84 local_irq_restore(flags); 87 local_irq_restore(flags);
85} 88}
86 89
87static void x2apic_send_IPI_allbutself(int vector) 90static void x2apic_send_IPI_allbutself(int vector)
88{ 91{
89 unsigned long flags;
90 unsigned long query_cpu;
91 unsigned long this_cpu = smp_processor_id(); 92 unsigned long this_cpu = smp_processor_id();
93 unsigned long query_cpu;
94 unsigned long flags;
92 95
93 local_irq_save(flags); 96 local_irq_save(flags);
94 for_each_online_cpu(query_cpu) 97 for_each_online_cpu(query_cpu) {
95 if (query_cpu != this_cpu) 98 if (query_cpu == this_cpu)
96 __x2apic_send_IPI_dest( 99 continue;
100 __x2apic_send_IPI_dest(
97 per_cpu(x86_cpu_to_logical_apicid, query_cpu), 101 per_cpu(x86_cpu_to_logical_apicid, query_cpu),
98 vector, APIC_DEST_LOGICAL); 102 vector, apic->dest_logical);
103 }
99 local_irq_restore(flags); 104 local_irq_restore(flags);
100} 105}
101 106
@@ -111,21 +116,21 @@ static int x2apic_apic_id_registered(void)
111 116
112static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) 117static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
113{ 118{
114 int cpu;
115
116 /* 119 /*
117 * We're using fixed IRQ delivery, can only return one logical APIC ID. 120 * We're using fixed IRQ delivery, can only return one logical APIC ID.
118 * May as well be the first. 121 * May as well be the first.
119 */ 122 */
120 cpu = cpumask_first(cpumask); 123 int cpu = cpumask_first(cpumask);
124
121 if ((unsigned)cpu < nr_cpu_ids) 125 if ((unsigned)cpu < nr_cpu_ids)
122 return per_cpu(x86_cpu_to_logical_apicid, cpu); 126 return per_cpu(x86_cpu_to_logical_apicid, cpu);
123 else 127 else
124 return BAD_APICID; 128 return BAD_APICID;
125} 129}
126 130
127static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, 131static unsigned int
128 const struct cpumask *andmask) 132x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
133 const struct cpumask *andmask)
129{ 134{
130 int cpu; 135 int cpu;
131 136
@@ -133,15 +138,18 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
133 * We're using fixed IRQ delivery, can only return one logical APIC ID. 138 * We're using fixed IRQ delivery, can only return one logical APIC ID.
134 * May as well be the first. 139 * May as well be the first.
135 */ 140 */
136 for_each_cpu_and(cpu, cpumask, andmask) 141 for_each_cpu_and(cpu, cpumask, andmask) {
137 if (cpumask_test_cpu(cpu, cpu_online_mask)) 142 if (cpumask_test_cpu(cpu, cpu_online_mask))
138 break; 143 break;
144 }
145
139 if (cpu < nr_cpu_ids) 146 if (cpu < nr_cpu_ids)
140 return per_cpu(x86_cpu_to_logical_apicid, cpu); 147 return per_cpu(x86_cpu_to_logical_apicid, cpu);
148
141 return BAD_APICID; 149 return BAD_APICID;
142} 150}
143 151
144static unsigned int get_apic_id(unsigned long x) 152static unsigned int x2apic_cluster_phys_get_apic_id(unsigned long x)
145{ 153{
146 unsigned int id; 154 unsigned int id;
147 155
@@ -157,7 +165,7 @@ static unsigned long set_apic_id(unsigned int id)
157 return x; 165 return x;
158} 166}
159 167
160static unsigned int phys_pkg_id(int index_msb) 168static int x2apic_cluster_phys_pkg_id(int initial_apicid, int index_msb)
161{ 169{
162 return current_cpu_data.initial_apicid >> index_msb; 170 return current_cpu_data.initial_apicid >> index_msb;
163} 171}
@@ -172,27 +180,58 @@ static void init_x2apic_ldr(void)
172 int cpu = smp_processor_id(); 180 int cpu = smp_processor_id();
173 181
174 per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR); 182 per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR);
175 return;
176} 183}
177 184
178struct genapic apic_x2apic_cluster = { 185struct genapic apic_x2apic_cluster = {
179 .name = "cluster x2apic", 186
180 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, 187 .name = "cluster x2apic",
181 .int_delivery_mode = dest_LowestPrio, 188 .probe = NULL,
182 .int_dest_mode = (APIC_DEST_LOGICAL != 0), 189 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
183 .target_cpus = x2apic_target_cpus, 190 .apic_id_registered = x2apic_apic_id_registered,
184 .vector_allocation_domain = x2apic_vector_allocation_domain, 191
185 .apic_id_registered = x2apic_apic_id_registered, 192 .irq_delivery_mode = dest_LowestPrio,
186 .init_apic_ldr = init_x2apic_ldr, 193 .irq_dest_mode = 1, /* logical */
187 .send_IPI_all = x2apic_send_IPI_all, 194
188 .send_IPI_allbutself = x2apic_send_IPI_allbutself, 195 .target_cpus = x2apic_target_cpus,
189 .send_IPI_mask = x2apic_send_IPI_mask, 196 .disable_esr = 0,
190 .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, 197 .dest_logical = APIC_DEST_LOGICAL,
191 .send_IPI_self = x2apic_send_IPI_self, 198 .check_apicid_used = NULL,
192 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, 199 .check_apicid_present = NULL,
193 .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, 200
194 .phys_pkg_id = phys_pkg_id, 201 .vector_allocation_domain = x2apic_vector_allocation_domain,
195 .get_apic_id = get_apic_id, 202 .init_apic_ldr = init_x2apic_ldr,
196 .set_apic_id = set_apic_id, 203
197 .apic_id_mask = (0xFFFFFFFFu), 204 .ioapic_phys_id_map = NULL,
205 .setup_apic_routing = NULL,
206 .multi_timer_check = NULL,
207 .apicid_to_node = NULL,
208 .cpu_to_logical_apicid = NULL,
209 .cpu_present_to_apicid = default_cpu_present_to_apicid,
210 .apicid_to_cpu_present = NULL,
211 .setup_portio_remap = NULL,
212 .check_phys_apicid_present = default_check_phys_apicid_present,
213 .enable_apic_mode = NULL,
214 .phys_pkg_id = x2apic_cluster_phys_pkg_id,
215 .mps_oem_check = NULL,
216
217 .get_apic_id = x2apic_cluster_phys_get_apic_id,
218 .set_apic_id = set_apic_id,
219 .apic_id_mask = 0xFFFFFFFFu,
220
221 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
222 .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
223
224 .send_IPI_mask = x2apic_send_IPI_mask,
225 .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
226 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
227 .send_IPI_all = x2apic_send_IPI_all,
228 .send_IPI_self = x2apic_send_IPI_self,
229
230 .wakeup_cpu = NULL,
231 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
232 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
233 .wait_for_init_deassert = NULL,
234 .smp_callin_clear_local_apic = NULL,
235 .store_NMI_vector = NULL,
236 .inquire_remote_apic = NULL,
198}; 237};
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
index 21bcc0e098ba..5cbae8aa0408 100644
--- a/arch/x86/kernel/genx2apic_phys.c
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -55,8 +55,8 @@ static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
55 55
56static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector) 56static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
57{ 57{
58 unsigned long flags;
59 unsigned long query_cpu; 58 unsigned long query_cpu;
59 unsigned long flags;
60 60
61 local_irq_save(flags); 61 local_irq_save(flags);
62 for_each_cpu(query_cpu, mask) { 62 for_each_cpu(query_cpu, mask) {
@@ -66,12 +66,12 @@ static void x2apic_send_IPI_mask(const struct cpumask *mask, int vector)
66 local_irq_restore(flags); 66 local_irq_restore(flags);
67} 67}
68 68
69static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, 69static void
70 int vector) 70 x2apic_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
71{ 71{
72 unsigned long flags;
73 unsigned long query_cpu;
74 unsigned long this_cpu = smp_processor_id(); 72 unsigned long this_cpu = smp_processor_id();
73 unsigned long query_cpu;
74 unsigned long flags;
75 75
76 local_irq_save(flags); 76 local_irq_save(flags);
77 for_each_cpu(query_cpu, mask) { 77 for_each_cpu(query_cpu, mask) {
@@ -85,16 +85,17 @@ static void x2apic_send_IPI_mask_allbutself(const struct cpumask *mask,
85 85
86static void x2apic_send_IPI_allbutself(int vector) 86static void x2apic_send_IPI_allbutself(int vector)
87{ 87{
88 unsigned long flags;
89 unsigned long query_cpu;
90 unsigned long this_cpu = smp_processor_id(); 88 unsigned long this_cpu = smp_processor_id();
89 unsigned long query_cpu;
90 unsigned long flags;
91 91
92 local_irq_save(flags); 92 local_irq_save(flags);
93 for_each_online_cpu(query_cpu) 93 for_each_online_cpu(query_cpu) {
94 if (query_cpu != this_cpu) 94 if (query_cpu == this_cpu)
95 __x2apic_send_IPI_dest( 95 continue;
96 per_cpu(x86_cpu_to_apicid, query_cpu), 96 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
97 vector, APIC_DEST_PHYSICAL); 97 vector, APIC_DEST_PHYSICAL);
98 }
98 local_irq_restore(flags); 99 local_irq_restore(flags);
99} 100}
100 101
@@ -110,21 +111,21 @@ static int x2apic_apic_id_registered(void)
110 111
111static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask) 112static unsigned int x2apic_cpu_mask_to_apicid(const struct cpumask *cpumask)
112{ 113{
113 int cpu;
114
115 /* 114 /*
116 * We're using fixed IRQ delivery, can only return one phys APIC ID. 115 * We're using fixed IRQ delivery, can only return one phys APIC ID.
117 * May as well be the first. 116 * May as well be the first.
118 */ 117 */
119 cpu = cpumask_first(cpumask); 118 int cpu = cpumask_first(cpumask);
119
120 if ((unsigned)cpu < nr_cpu_ids) 120 if ((unsigned)cpu < nr_cpu_ids)
121 return per_cpu(x86_cpu_to_apicid, cpu); 121 return per_cpu(x86_cpu_to_apicid, cpu);
122 else 122 else
123 return BAD_APICID; 123 return BAD_APICID;
124} 124}
125 125
126static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask, 126static unsigned int
127 const struct cpumask *andmask) 127x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
128 const struct cpumask *andmask)
128{ 129{
129 int cpu; 130 int cpu;
130 131
@@ -132,31 +133,28 @@ static unsigned int x2apic_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
132 * We're using fixed IRQ delivery, can only return one phys APIC ID. 133 * We're using fixed IRQ delivery, can only return one phys APIC ID.
133 * May as well be the first. 134 * May as well be the first.
134 */ 135 */
135 for_each_cpu_and(cpu, cpumask, andmask) 136 for_each_cpu_and(cpu, cpumask, andmask) {
136 if (cpumask_test_cpu(cpu, cpu_online_mask)) 137 if (cpumask_test_cpu(cpu, cpu_online_mask))
137 break; 138 break;
139 }
140
138 if (cpu < nr_cpu_ids) 141 if (cpu < nr_cpu_ids)
139 return per_cpu(x86_cpu_to_apicid, cpu); 142 return per_cpu(x86_cpu_to_apicid, cpu);
143
140 return BAD_APICID; 144 return BAD_APICID;
141} 145}
142 146
143static unsigned int get_apic_id(unsigned long x) 147static unsigned int x2apic_phys_get_apic_id(unsigned long x)
144{ 148{
145 unsigned int id; 149 return x;
146
147 id = x;
148 return id;
149} 150}
150 151
151static unsigned long set_apic_id(unsigned int id) 152static unsigned long set_apic_id(unsigned int id)
152{ 153{
153 unsigned long x; 154 return id;
154
155 x = id;
156 return x;
157} 155}
158 156
159static unsigned int phys_pkg_id(int index_msb) 157static int x2apic_phys_pkg_id(int initial_apicid, int index_msb)
160{ 158{
161 return current_cpu_data.initial_apicid >> index_msb; 159 return current_cpu_data.initial_apicid >> index_msb;
162} 160}
@@ -168,27 +166,58 @@ static void x2apic_send_IPI_self(int vector)
168 166
169static void init_x2apic_ldr(void) 167static void init_x2apic_ldr(void)
170{ 168{
171 return;
172} 169}
173 170
174struct genapic apic_x2apic_phys = { 171struct genapic apic_x2apic_phys = {
175 .name = "physical x2apic", 172
176 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check, 173 .name = "physical x2apic",
177 .int_delivery_mode = dest_Fixed, 174 .probe = NULL,
178 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 175 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
179 .target_cpus = x2apic_target_cpus, 176 .apic_id_registered = x2apic_apic_id_registered,
180 .vector_allocation_domain = x2apic_vector_allocation_domain, 177
181 .apic_id_registered = x2apic_apic_id_registered, 178 .irq_delivery_mode = dest_Fixed,
182 .init_apic_ldr = init_x2apic_ldr, 179 .irq_dest_mode = 0, /* physical */
183 .send_IPI_all = x2apic_send_IPI_all, 180
184 .send_IPI_allbutself = x2apic_send_IPI_allbutself, 181 .target_cpus = x2apic_target_cpus,
185 .send_IPI_mask = x2apic_send_IPI_mask, 182 .disable_esr = 0,
186 .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself, 183 .dest_logical = 0,
187 .send_IPI_self = x2apic_send_IPI_self, 184 .check_apicid_used = NULL,
188 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid, 185 .check_apicid_present = NULL,
189 .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and, 186
190 .phys_pkg_id = phys_pkg_id, 187 .vector_allocation_domain = x2apic_vector_allocation_domain,
191 .get_apic_id = get_apic_id, 188 .init_apic_ldr = init_x2apic_ldr,
192 .set_apic_id = set_apic_id, 189
193 .apic_id_mask = (0xFFFFFFFFu), 190 .ioapic_phys_id_map = NULL,
191 .setup_apic_routing = NULL,
192 .multi_timer_check = NULL,
193 .apicid_to_node = NULL,
194 .cpu_to_logical_apicid = NULL,
195 .cpu_present_to_apicid = default_cpu_present_to_apicid,
196 .apicid_to_cpu_present = NULL,
197 .setup_portio_remap = NULL,
198 .check_phys_apicid_present = default_check_phys_apicid_present,
199 .enable_apic_mode = NULL,
200 .phys_pkg_id = x2apic_phys_pkg_id,
201 .mps_oem_check = NULL,
202
203 .get_apic_id = x2apic_phys_get_apic_id,
204 .set_apic_id = set_apic_id,
205 .apic_id_mask = 0xFFFFFFFFu,
206
207 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
208 .cpu_mask_to_apicid_and = x2apic_cpu_mask_to_apicid_and,
209
210 .send_IPI_mask = x2apic_send_IPI_mask,
211 .send_IPI_mask_allbutself = x2apic_send_IPI_mask_allbutself,
212 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
213 .send_IPI_all = x2apic_send_IPI_all,
214 .send_IPI_self = x2apic_send_IPI_self,
215
216 .wakeup_cpu = NULL,
217 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
218 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
219 .wait_for_init_deassert = NULL,
220 .smp_callin_clear_local_apic = NULL,
221 .store_NMI_vector = NULL,
222 .inquire_remote_apic = NULL,
194}; 223};
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index b193e082f6ce..6adb5e6f4d92 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -25,6 +25,7 @@
25#include <asm/ipi.h> 25#include <asm/ipi.h>
26#include <asm/genapic.h> 26#include <asm/genapic.h>
27#include <asm/pgtable.h> 27#include <asm/pgtable.h>
28#include <asm/uv/uv.h>
28#include <asm/uv/uv_mmrs.h> 29#include <asm/uv/uv_mmrs.h>
29#include <asm/uv/uv_hub.h> 30#include <asm/uv/uv_hub.h>
30#include <asm/uv/bios.h> 31#include <asm/uv/bios.h>
@@ -117,12 +118,13 @@ static void uv_send_IPI_one(int cpu, int vector)
117 int pnode; 118 int pnode;
118 119
119 apicid = per_cpu(x86_cpu_to_apicid, cpu); 120 apicid = per_cpu(x86_cpu_to_apicid, cpu);
120 lapicid = apicid & 0x3f; /* ZZZ macro needed */ 121 lapicid = apicid & 0x3f; /* ZZZ macro needed */
121 pnode = uv_apicid_to_pnode(apicid); 122 pnode = uv_apicid_to_pnode(apicid);
122 val = 123
123 (1UL << UVH_IPI_INT_SEND_SHFT) | (lapicid << 124 val = ( 1UL << UVH_IPI_INT_SEND_SHFT ) |
124 UVH_IPI_INT_APIC_ID_SHFT) | 125 ( lapicid << UVH_IPI_INT_APIC_ID_SHFT ) |
125 (vector << UVH_IPI_INT_VECTOR_SHFT); 126 ( vector << UVH_IPI_INT_VECTOR_SHFT );
127
126 uv_write_global_mmr64(pnode, UVH_IPI_INT, val); 128 uv_write_global_mmr64(pnode, UVH_IPI_INT, val);
127} 129}
128 130
@@ -136,22 +138,24 @@ static void uv_send_IPI_mask(const struct cpumask *mask, int vector)
136 138
137static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector) 139static void uv_send_IPI_mask_allbutself(const struct cpumask *mask, int vector)
138{ 140{
139 unsigned int cpu;
140 unsigned int this_cpu = smp_processor_id(); 141 unsigned int this_cpu = smp_processor_id();
142 unsigned int cpu;
141 143
142 for_each_cpu(cpu, mask) 144 for_each_cpu(cpu, mask) {
143 if (cpu != this_cpu) 145 if (cpu != this_cpu)
144 uv_send_IPI_one(cpu, vector); 146 uv_send_IPI_one(cpu, vector);
147 }
145} 148}
146 149
147static void uv_send_IPI_allbutself(int vector) 150static void uv_send_IPI_allbutself(int vector)
148{ 151{
149 unsigned int cpu;
150 unsigned int this_cpu = smp_processor_id(); 152 unsigned int this_cpu = smp_processor_id();
153 unsigned int cpu;
151 154
152 for_each_online_cpu(cpu) 155 for_each_online_cpu(cpu) {
153 if (cpu != this_cpu) 156 if (cpu != this_cpu)
154 uv_send_IPI_one(cpu, vector); 157 uv_send_IPI_one(cpu, vector);
158 }
155} 159}
156 160
157static void uv_send_IPI_all(int vector) 161static void uv_send_IPI_all(int vector)
@@ -170,21 +174,21 @@ static void uv_init_apic_ldr(void)
170 174
171static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask) 175static unsigned int uv_cpu_mask_to_apicid(const struct cpumask *cpumask)
172{ 176{
173 int cpu;
174
175 /* 177 /*
176 * We're using fixed IRQ delivery, can only return one phys APIC ID. 178 * We're using fixed IRQ delivery, can only return one phys APIC ID.
177 * May as well be the first. 179 * May as well be the first.
178 */ 180 */
179 cpu = cpumask_first(cpumask); 181 int cpu = cpumask_first(cpumask);
182
180 if ((unsigned)cpu < nr_cpu_ids) 183 if ((unsigned)cpu < nr_cpu_ids)
181 return per_cpu(x86_cpu_to_apicid, cpu); 184 return per_cpu(x86_cpu_to_apicid, cpu);
182 else 185 else
183 return BAD_APICID; 186 return BAD_APICID;
184} 187}
185 188
186static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask, 189static unsigned int
187 const struct cpumask *andmask) 190uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
191 const struct cpumask *andmask)
188{ 192{
189 int cpu; 193 int cpu;
190 194
@@ -192,15 +196,17 @@ static unsigned int uv_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
192 * We're using fixed IRQ delivery, can only return one phys APIC ID. 196 * We're using fixed IRQ delivery, can only return one phys APIC ID.
193 * May as well be the first. 197 * May as well be the first.
194 */ 198 */
195 for_each_cpu_and(cpu, cpumask, andmask) 199 for_each_cpu_and(cpu, cpumask, andmask) {
196 if (cpumask_test_cpu(cpu, cpu_online_mask)) 200 if (cpumask_test_cpu(cpu, cpu_online_mask))
197 break; 201 break;
202 }
198 if (cpu < nr_cpu_ids) 203 if (cpu < nr_cpu_ids)
199 return per_cpu(x86_cpu_to_apicid, cpu); 204 return per_cpu(x86_cpu_to_apicid, cpu);
205
200 return BAD_APICID; 206 return BAD_APICID;
201} 207}
202 208
203static unsigned int get_apic_id(unsigned long x) 209static unsigned int x2apic_get_apic_id(unsigned long x)
204{ 210{
205 unsigned int id; 211 unsigned int id;
206 212
@@ -222,10 +228,10 @@ static unsigned long set_apic_id(unsigned int id)
222static unsigned int uv_read_apic_id(void) 228static unsigned int uv_read_apic_id(void)
223{ 229{
224 230
225 return get_apic_id(apic_read(APIC_ID)); 231 return x2apic_get_apic_id(apic_read(APIC_ID));
226} 232}
227 233
228static unsigned int phys_pkg_id(int index_msb) 234static int uv_phys_pkg_id(int initial_apicid, int index_msb)
229{ 235{
230 return uv_read_apic_id() >> index_msb; 236 return uv_read_apic_id() >> index_msb;
231} 237}
@@ -236,25 +242,57 @@ static void uv_send_IPI_self(int vector)
236} 242}
237 243
238struct genapic apic_x2apic_uv_x = { 244struct genapic apic_x2apic_uv_x = {
239 .name = "UV large system", 245
240 .acpi_madt_oem_check = uv_acpi_madt_oem_check, 246 .name = "UV large system",
241 .int_delivery_mode = dest_Fixed, 247 .probe = NULL,
242 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 248 .acpi_madt_oem_check = uv_acpi_madt_oem_check,
243 .target_cpus = uv_target_cpus, 249 .apic_id_registered = uv_apic_id_registered,
244 .vector_allocation_domain = uv_vector_allocation_domain, 250
245 .apic_id_registered = uv_apic_id_registered, 251 .irq_delivery_mode = dest_Fixed,
246 .init_apic_ldr = uv_init_apic_ldr, 252 .irq_dest_mode = 1, /* logical */
247 .send_IPI_all = uv_send_IPI_all, 253
248 .send_IPI_allbutself = uv_send_IPI_allbutself, 254 .target_cpus = uv_target_cpus,
249 .send_IPI_mask = uv_send_IPI_mask, 255 .disable_esr = 0,
250 .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself, 256 .dest_logical = APIC_DEST_LOGICAL,
251 .send_IPI_self = uv_send_IPI_self, 257 .check_apicid_used = NULL,
252 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, 258 .check_apicid_present = NULL,
253 .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and, 259
254 .phys_pkg_id = phys_pkg_id, 260 .vector_allocation_domain = uv_vector_allocation_domain,
255 .get_apic_id = get_apic_id, 261 .init_apic_ldr = uv_init_apic_ldr,
256 .set_apic_id = set_apic_id, 262
257 .apic_id_mask = (0xFFFFFFFFu), 263 .ioapic_phys_id_map = NULL,
264 .setup_apic_routing = NULL,
265 .multi_timer_check = NULL,
266 .apicid_to_node = NULL,
267 .cpu_to_logical_apicid = NULL,
268 .cpu_present_to_apicid = default_cpu_present_to_apicid,
269 .apicid_to_cpu_present = NULL,
270 .setup_portio_remap = NULL,
271 .check_phys_apicid_present = default_check_phys_apicid_present,
272 .enable_apic_mode = NULL,
273 .phys_pkg_id = uv_phys_pkg_id,
274 .mps_oem_check = NULL,
275
276 .get_apic_id = x2apic_get_apic_id,
277 .set_apic_id = set_apic_id,
278 .apic_id_mask = 0xFFFFFFFFu,
279
280 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
281 .cpu_mask_to_apicid_and = uv_cpu_mask_to_apicid_and,
282
283 .send_IPI_mask = uv_send_IPI_mask,
284 .send_IPI_mask_allbutself = uv_send_IPI_mask_allbutself,
285 .send_IPI_allbutself = uv_send_IPI_allbutself,
286 .send_IPI_all = uv_send_IPI_all,
287 .send_IPI_self = uv_send_IPI_self,
288
289 .wakeup_cpu = NULL,
290 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
291 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
292 .wait_for_init_deassert = NULL,
293 .smp_callin_clear_local_apic = NULL,
294 .store_NMI_vector = NULL,
295 .inquire_remote_apic = NULL,
258}; 296};
259 297
260static __cpuinit void set_x2apic_extra_bits(int pnode) 298static __cpuinit void set_x2apic_extra_bits(int pnode)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index b9a4d8c4b935..f5b272247690 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -26,27 +26,6 @@
26#include <asm/bios_ebda.h> 26#include <asm/bios_ebda.h>
27#include <asm/trampoline.h> 27#include <asm/trampoline.h>
28 28
29/* boot cpu pda */
30static struct x8664_pda _boot_cpu_pda;
31
32#ifdef CONFIG_SMP
33/*
34 * We install an empty cpu_pda pointer table to indicate to early users
35 * (numa_set_node) that the cpu_pda pointer table for cpus other than
36 * the boot cpu is not yet setup.
37 */
38static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata;
39#else
40static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
41#endif
42
43void __init x86_64_init_pda(void)
44{
45 _cpu_pda = __cpu_pda;
46 cpu_pda(0) = &_boot_cpu_pda;
47 pda_init(0);
48}
49
50static void __init zap_identity_mappings(void) 29static void __init zap_identity_mappings(void)
51{ 30{
52 pgd_t *pgd = pgd_offset_k(0UL); 31 pgd_t *pgd = pgd_offset_k(0UL);
@@ -112,8 +91,6 @@ void __init x86_64_start_kernel(char * real_mode_data)
112 if (console_loglevel == 10) 91 if (console_loglevel == 10)
113 early_printk("Kernel alive\n"); 92 early_printk("Kernel alive\n");
114 93
115 x86_64_init_pda();
116
117 x86_64_start_reservations(real_mode_data); 94 x86_64_start_reservations(real_mode_data);
118} 95}
119 96
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index e835b4eea70b..2a0aad7718d5 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -19,6 +19,7 @@
19#include <asm/asm-offsets.h> 19#include <asm/asm-offsets.h>
20#include <asm/setup.h> 20#include <asm/setup.h>
21#include <asm/processor-flags.h> 21#include <asm/processor-flags.h>
22#include <asm/percpu.h>
22 23
23/* Physical address */ 24/* Physical address */
24#define pa(X) ((X) - __PAGE_OFFSET) 25#define pa(X) ((X) - __PAGE_OFFSET)
@@ -429,14 +430,34 @@ is386: movl $2,%ecx # set MP
429 ljmp $(__KERNEL_CS),$1f 430 ljmp $(__KERNEL_CS),$1f
4301: movl $(__KERNEL_DS),%eax # reload all the segment registers 4311: movl $(__KERNEL_DS),%eax # reload all the segment registers
431 movl %eax,%ss # after changing gdt. 432 movl %eax,%ss # after changing gdt.
432 movl %eax,%fs # gets reset once there's real percpu
433 433
434 movl $(__USER_DS),%eax # DS/ES contains default USER segment 434 movl $(__USER_DS),%eax # DS/ES contains default USER segment
435 movl %eax,%ds 435 movl %eax,%ds
436 movl %eax,%es 436 movl %eax,%es
437 437
438 xorl %eax,%eax # Clear GS and LDT 438 movl $(__KERNEL_PERCPU), %eax
439 movl %eax,%fs # set this cpu's percpu
440
441#ifdef CONFIG_CC_STACKPROTECTOR
442 /*
443 * The linker can't handle this by relocation. Manually set
444 * base address in stack canary segment descriptor.
445 */
446 cmpb $0,ready
447 jne 1f
448 movl $per_cpu__gdt_page,%eax
449 movl $per_cpu__stack_canary,%ecx
450 subl $20, %ecx
451 movw %cx, 8 * GDT_ENTRY_STACK_CANARY + 2(%eax)
452 shrl $16, %ecx
453 movb %cl, 8 * GDT_ENTRY_STACK_CANARY + 4(%eax)
454 movb %ch, 8 * GDT_ENTRY_STACK_CANARY + 7(%eax)
4551:
456#endif
457 movl $(__KERNEL_STACK_CANARY),%eax
439 movl %eax,%gs 458 movl %eax,%gs
459
460 xorl %eax,%eax # Clear LDT
440 lldt %ax 461 lldt %ax
441 462
442 cld # gcc2 wants the direction flag cleared at all times 463 cld # gcc2 wants the direction flag cleared at all times
@@ -446,8 +467,6 @@ is386: movl $2,%ecx # set MP
446 movb $1, ready 467 movb $1, ready
447 cmpb $0,%cl # the first CPU calls start_kernel 468 cmpb $0,%cl # the first CPU calls start_kernel
448 je 1f 469 je 1f
449 movl $(__KERNEL_PERCPU), %eax
450 movl %eax,%fs # set this cpu's percpu
451 movl (stack_start), %esp 470 movl (stack_start), %esp
4521: 4711:
453#endif /* CONFIG_SMP */ 472#endif /* CONFIG_SMP */
@@ -548,12 +567,8 @@ early_fault:
548 pushl %eax 567 pushl %eax
549 pushl %edx /* trapno */ 568 pushl %edx /* trapno */
550 pushl $fault_msg 569 pushl $fault_msg
551#ifdef CONFIG_EARLY_PRINTK
552 call early_printk
553#else
554 call printk 570 call printk
555#endif 571#endif
556#endif
557 call dump_stack 572 call dump_stack
558hlt_loop: 573hlt_loop:
559 hlt 574 hlt
@@ -580,11 +595,10 @@ ignore_int:
580 pushl 32(%esp) 595 pushl 32(%esp)
581 pushl 40(%esp) 596 pushl 40(%esp)
582 pushl $int_msg 597 pushl $int_msg
583#ifdef CONFIG_EARLY_PRINTK
584 call early_printk
585#else
586 call printk 598 call printk
587#endif 599
600 call dump_stack
601
588 addl $(5*4),%esp 602 addl $(5*4),%esp
589 popl %ds 603 popl %ds
590 popl %es 604 popl %es
@@ -660,7 +674,7 @@ early_recursion_flag:
660 .long 0 674 .long 0
661 675
662int_msg: 676int_msg:
663 .asciz "Unknown interrupt or fault at EIP %p %p %p\n" 677 .asciz "Unknown interrupt or fault at: %p %p %p\n"
664 678
665fault_msg: 679fault_msg:
666/* fault info: */ 680/* fault info: */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 0e275d495563..2e648e3a5ea4 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -19,6 +19,7 @@
19#include <asm/msr.h> 19#include <asm/msr.h>
20#include <asm/cache.h> 20#include <asm/cache.h>
21#include <asm/processor-flags.h> 21#include <asm/processor-flags.h>
22#include <asm/percpu.h>
22 23
23#ifdef CONFIG_PARAVIRT 24#ifdef CONFIG_PARAVIRT
24#include <asm/asm-offsets.h> 25#include <asm/asm-offsets.h>
@@ -226,12 +227,15 @@ ENTRY(secondary_startup_64)
226 movl %eax,%fs 227 movl %eax,%fs
227 movl %eax,%gs 228 movl %eax,%gs
228 229
229 /* 230 /* Set up %gs.
230 * Setup up a dummy PDA. this is just for some early bootup code 231 *
231 * that does in_interrupt() 232 * The base of %gs always points to the bottom of the irqstack
232 */ 233 * union. If the stack protector canary is enabled, it is
234 * located at %gs:40. Note that, on SMP, the boot cpu uses
235 * init data section till per cpu areas are set up.
236 */
233 movl $MSR_GS_BASE,%ecx 237 movl $MSR_GS_BASE,%ecx
234 movq $empty_zero_page,%rax 238 movq initial_gs(%rip),%rax
235 movq %rax,%rdx 239 movq %rax,%rdx
236 shrq $32,%rdx 240 shrq $32,%rdx
237 wrmsr 241 wrmsr
@@ -257,6 +261,8 @@ ENTRY(secondary_startup_64)
257 .align 8 261 .align 8
258 ENTRY(initial_code) 262 ENTRY(initial_code)
259 .quad x86_64_start_kernel 263 .quad x86_64_start_kernel
264 ENTRY(initial_gs)
265 .quad INIT_PER_CPU_VAR(irq_stack_union)
260 __FINITDATA 266 __FINITDATA
261 267
262 ENTRY(stack_start) 268 ENTRY(stack_start)
@@ -401,7 +407,8 @@ NEXT_PAGE(level2_spare_pgt)
401 .globl early_gdt_descr 407 .globl early_gdt_descr
402early_gdt_descr: 408early_gdt_descr:
403 .word GDT_ENTRIES*8-1 409 .word GDT_ENTRIES*8-1
404 .quad per_cpu__gdt_page 410early_gdt_descr_base:
411 .quad INIT_PER_CPU_VAR(gdt_page)
405 412
406ENTRY(phys_base) 413ENTRY(phys_base)
407 /* This must match the first entry in level2_kernel_pgt */ 414 /* This must match the first entry in level2_kernel_pgt */
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index 64d5ad0b8add..a00545fe5cdd 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -269,6 +269,8 @@ static void hpet_set_mode(enum clock_event_mode mode,
269 now = hpet_readl(HPET_COUNTER); 269 now = hpet_readl(HPET_COUNTER);
270 cmp = now + (unsigned long) delta; 270 cmp = now + (unsigned long) delta;
271 cfg = hpet_readl(HPET_Tn_CFG(timer)); 271 cfg = hpet_readl(HPET_Tn_CFG(timer));
272 /* Make sure we use edge triggered interrupts */
273 cfg &= ~HPET_TN_LEVEL;
272 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC | 274 cfg |= HPET_TN_ENABLE | HPET_TN_PERIODIC |
273 HPET_TN_SETVAL | HPET_TN_32BIT; 275 HPET_TN_SETVAL | HPET_TN_32BIT;
274 hpet_writel(cfg, HPET_Tn_CFG(timer)); 276 hpet_writel(cfg, HPET_Tn_CFG(timer));
@@ -897,7 +899,7 @@ static unsigned long hpet_rtc_flags;
897static int hpet_prev_update_sec; 899static int hpet_prev_update_sec;
898static struct rtc_time hpet_alarm_time; 900static struct rtc_time hpet_alarm_time;
899static unsigned long hpet_pie_count; 901static unsigned long hpet_pie_count;
900static unsigned long hpet_t1_cmp; 902static u32 hpet_t1_cmp;
901static unsigned long hpet_default_delta; 903static unsigned long hpet_default_delta;
902static unsigned long hpet_pie_delta; 904static unsigned long hpet_pie_delta;
903static unsigned long hpet_pie_limit; 905static unsigned long hpet_pie_limit;
@@ -905,6 +907,14 @@ static unsigned long hpet_pie_limit;
905static rtc_irq_handler irq_handler; 907static rtc_irq_handler irq_handler;
906 908
907/* 909/*
910 * Check that the hpet counter c1 is ahead of the c2
911 */
912static inline int hpet_cnt_ahead(u32 c1, u32 c2)
913{
914 return (s32)(c2 - c1) < 0;
915}
916
917/*
908 * Registers a IRQ handler. 918 * Registers a IRQ handler.
909 */ 919 */
910int hpet_register_irq_handler(rtc_irq_handler handler) 920int hpet_register_irq_handler(rtc_irq_handler handler)
@@ -1075,7 +1085,7 @@ static void hpet_rtc_timer_reinit(void)
1075 hpet_t1_cmp += delta; 1085 hpet_t1_cmp += delta;
1076 hpet_writel(hpet_t1_cmp, HPET_T1_CMP); 1086 hpet_writel(hpet_t1_cmp, HPET_T1_CMP);
1077 lost_ints++; 1087 lost_ints++;
1078 } while ((long)(hpet_readl(HPET_COUNTER) - hpet_t1_cmp) > 0); 1088 } while (!hpet_cnt_ahead(hpet_t1_cmp, hpet_readl(HPET_COUNTER)));
1079 1089
1080 if (lost_ints) { 1090 if (lost_ints) {
1081 if (hpet_rtc_flags & RTC_PIE) 1091 if (hpet_rtc_flags & RTC_PIE)
diff --git a/arch/x86/kernel/i8237.c b/arch/x86/kernel/i8237.c
index dbd6c1d1b638..b42ca694dc68 100644
--- a/arch/x86/kernel/i8237.c
+++ b/arch/x86/kernel/i8237.c
@@ -28,10 +28,10 @@ static int i8237A_resume(struct sys_device *dev)
28 28
29 flags = claim_dma_lock(); 29 flags = claim_dma_lock();
30 30
31 dma_outb(DMA1_RESET_REG, 0); 31 dma_outb(0, DMA1_RESET_REG);
32 dma_outb(DMA2_RESET_REG, 0); 32 dma_outb(0, DMA2_RESET_REG);
33 33
34 for (i = 0;i < 8;i++) { 34 for (i = 0; i < 8; i++) {
35 set_dma_addr(i, 0x000000); 35 set_dma_addr(i, 0x000000);
36 /* DMA count is a bit weird so this is not 0 */ 36 /* DMA count is a bit weird so this is not 0 */
37 set_dma_count(i, 1); 37 set_dma_count(i, 1);
@@ -51,14 +51,14 @@ static int i8237A_suspend(struct sys_device *dev, pm_message_t state)
51} 51}
52 52
53static struct sysdev_class i8237_sysdev_class = { 53static struct sysdev_class i8237_sysdev_class = {
54 .name = "i8237", 54 .name = "i8237",
55 .suspend = i8237A_suspend, 55 .suspend = i8237A_suspend,
56 .resume = i8237A_resume, 56 .resume = i8237A_resume,
57}; 57};
58 58
59static struct sys_device device_i8237A = { 59static struct sys_device device_i8237A = {
60 .id = 0, 60 .id = 0,
61 .cls = &i8237_sysdev_class, 61 .cls = &i8237_sysdev_class,
62}; 62};
63 63
64static int __init i8237A_init_sysfs(void) 64static int __init i8237A_init_sysfs(void)
@@ -68,5 +68,4 @@ static int __init i8237A_init_sysfs(void)
68 error = sysdev_register(&device_i8237A); 68 error = sysdev_register(&device_i8237A);
69 return error; 69 return error;
70} 70}
71
72device_initcall(i8237A_init_sysfs); 71device_initcall(i8237A_init_sysfs);
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 9b0c480c383b..7248ca11bdcd 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Intel IO-APIC support for multi-Pentium hosts. 2 * Intel IO-APIC support for multi-Pentium hosts.
3 * 3 *
4 * Copyright (C) 1997, 1998, 1999, 2000 Ingo Molnar, Hajnalka Szabo 4 * Copyright (C) 1997, 1998, 1999, 2000, 2009 Ingo Molnar, Hajnalka Szabo
5 * 5 *
6 * Many thanks to Stig Venaas for trying out countless experimental 6 * Many thanks to Stig Venaas for trying out countless experimental
7 * patches and reporting/debugging problems patiently! 7 * patches and reporting/debugging problems patiently!
@@ -46,6 +46,7 @@
46#include <asm/idle.h> 46#include <asm/idle.h>
47#include <asm/io.h> 47#include <asm/io.h>
48#include <asm/smp.h> 48#include <asm/smp.h>
49#include <asm/cpu.h>
49#include <asm/desc.h> 50#include <asm/desc.h>
50#include <asm/proto.h> 51#include <asm/proto.h>
51#include <asm/acpi.h> 52#include <asm/acpi.h>
@@ -61,9 +62,7 @@
61#include <asm/uv/uv_hub.h> 62#include <asm/uv/uv_hub.h>
62#include <asm/uv/uv_irq.h> 63#include <asm/uv/uv_irq.h>
63 64
64#include <mach_ipi.h> 65#include <asm/genapic.h>
65#include <mach_apic.h>
66#include <mach_apicdef.h>
67 66
68#define __apicdebuginit(type) static type __init 67#define __apicdebuginit(type) static type __init
69 68
@@ -82,11 +81,11 @@ static DEFINE_SPINLOCK(vector_lock);
82int nr_ioapic_registers[MAX_IO_APICS]; 81int nr_ioapic_registers[MAX_IO_APICS];
83 82
84/* I/O APIC entries */ 83/* I/O APIC entries */
85struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; 84struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
86int nr_ioapics; 85int nr_ioapics;
87 86
88/* MP IRQ source entries */ 87/* MP IRQ source entries */
89struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; 88struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
90 89
91/* # of MP IRQ source entries */ 90/* # of MP IRQ source entries */
92int mp_irq_entries; 91int mp_irq_entries;
@@ -99,10 +98,19 @@ DECLARE_BITMAP(mp_bus_not_pci, MAX_MP_BUSSES);
99 98
100int skip_ioapic_setup; 99int skip_ioapic_setup;
101 100
101void arch_disable_smp_support(void)
102{
103#ifdef CONFIG_PCI
104 noioapicquirk = 1;
105 noioapicreroute = -1;
106#endif
107 skip_ioapic_setup = 1;
108}
109
102static int __init parse_noapic(char *str) 110static int __init parse_noapic(char *str)
103{ 111{
104 /* disable IO-APIC */ 112 /* disable IO-APIC */
105 disable_ioapic_setup(); 113 arch_disable_smp_support();
106 return 0; 114 return 0;
107} 115}
108early_param("noapic", parse_noapic); 116early_param("noapic", parse_noapic);
@@ -356,7 +364,7 @@ set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
356 364
357 if (!cfg->move_in_progress) { 365 if (!cfg->move_in_progress) {
358 /* it means that domain is not changed */ 366 /* it means that domain is not changed */
359 if (!cpumask_intersects(&desc->affinity, mask)) 367 if (!cpumask_intersects(desc->affinity, mask))
360 cfg->move_desc_pending = 1; 368 cfg->move_desc_pending = 1;
361 } 369 }
362} 370}
@@ -386,7 +394,7 @@ struct io_apic {
386static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) 394static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
387{ 395{
388 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) 396 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
389 + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); 397 + (mp_ioapics[idx].apicaddr & ~PAGE_MASK);
390} 398}
391 399
392static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) 400static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
@@ -478,7 +486,7 @@ __ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
478 io_apic_write(apic, 0x10 + 2*pin, eu.w1); 486 io_apic_write(apic, 0x10 + 2*pin, eu.w1);
479} 487}
480 488
481static void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e) 489void ioapic_write_entry(int apic, int pin, struct IO_APIC_route_entry e)
482{ 490{
483 unsigned long flags; 491 unsigned long flags;
484 spin_lock_irqsave(&ioapic_lock, flags); 492 spin_lock_irqsave(&ioapic_lock, flags);
@@ -513,11 +521,11 @@ static void send_cleanup_vector(struct irq_cfg *cfg)
513 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) 521 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
514 cfg->move_cleanup_count++; 522 cfg->move_cleanup_count++;
515 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask) 523 for_each_cpu_and(i, cfg->old_domain, cpu_online_mask)
516 send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR); 524 apic->send_IPI_mask(cpumask_of(i), IRQ_MOVE_CLEANUP_VECTOR);
517 } else { 525 } else {
518 cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask); 526 cpumask_and(cleanup_mask, cfg->old_domain, cpu_online_mask);
519 cfg->move_cleanup_count = cpumask_weight(cleanup_mask); 527 cfg->move_cleanup_count = cpumask_weight(cleanup_mask);
520 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR); 528 apic->send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
521 free_cpumask_var(cleanup_mask); 529 free_cpumask_var(cleanup_mask);
522 } 530 }
523 cfg->move_in_progress = 0; 531 cfg->move_in_progress = 0;
@@ -562,8 +570,9 @@ static int
562assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask); 570assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask);
563 571
564/* 572/*
565 * Either sets desc->affinity to a valid value, and returns cpu_mask_to_apicid 573 * Either sets desc->affinity to a valid value, and returns
566 * of that, or returns BAD_APICID and leaves desc->affinity untouched. 574 * ->cpu_mask_to_apicid of that, or returns BAD_APICID and
575 * leaves desc->affinity untouched.
567 */ 576 */
568static unsigned int 577static unsigned int
569set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask) 578set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
@@ -579,9 +588,10 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
579 if (assign_irq_vector(irq, cfg, mask)) 588 if (assign_irq_vector(irq, cfg, mask))
580 return BAD_APICID; 589 return BAD_APICID;
581 590
582 cpumask_and(&desc->affinity, cfg->domain, mask); 591 cpumask_and(desc->affinity, cfg->domain, mask);
583 set_extra_move_desc(desc, mask); 592 set_extra_move_desc(desc, mask);
584 return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask); 593
594 return apic->cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask);
585} 595}
586 596
587static void 597static void
@@ -796,23 +806,6 @@ static void clear_IO_APIC (void)
796 clear_IO_APIC_pin(apic, pin); 806 clear_IO_APIC_pin(apic, pin);
797} 807}
798 808
799#if !defined(CONFIG_SMP) && defined(CONFIG_X86_32)
800void send_IPI_self(int vector)
801{
802 unsigned int cfg;
803
804 /*
805 * Wait for idle.
806 */
807 apic_wait_icr_idle();
808 cfg = APIC_DM_FIXED | APIC_DEST_SELF | vector | APIC_DEST_LOGICAL;
809 /*
810 * Send the IPI. The write to APIC_ICR fires this off.
811 */
812 apic_write(APIC_ICR, cfg);
813}
814#endif /* !CONFIG_SMP && CONFIG_X86_32*/
815
816#ifdef CONFIG_X86_32 809#ifdef CONFIG_X86_32
817/* 810/*
818 * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to 811 * support for broken MP BIOSs, enables hand-redirection of PIRQ0-7 to
@@ -944,10 +937,10 @@ static int find_irq_entry(int apic, int pin, int type)
944 int i; 937 int i;
945 938
946 for (i = 0; i < mp_irq_entries; i++) 939 for (i = 0; i < mp_irq_entries; i++)
947 if (mp_irqs[i].mp_irqtype == type && 940 if (mp_irqs[i].irqtype == type &&
948 (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || 941 (mp_irqs[i].dstapic == mp_ioapics[apic].apicid ||
949 mp_irqs[i].mp_dstapic == MP_APIC_ALL) && 942 mp_irqs[i].dstapic == MP_APIC_ALL) &&
950 mp_irqs[i].mp_dstirq == pin) 943 mp_irqs[i].dstirq == pin)
951 return i; 944 return i;
952 945
953 return -1; 946 return -1;
@@ -961,13 +954,13 @@ static int __init find_isa_irq_pin(int irq, int type)
961 int i; 954 int i;
962 955
963 for (i = 0; i < mp_irq_entries; i++) { 956 for (i = 0; i < mp_irq_entries; i++) {
964 int lbus = mp_irqs[i].mp_srcbus; 957 int lbus = mp_irqs[i].srcbus;
965 958
966 if (test_bit(lbus, mp_bus_not_pci) && 959 if (test_bit(lbus, mp_bus_not_pci) &&
967 (mp_irqs[i].mp_irqtype == type) && 960 (mp_irqs[i].irqtype == type) &&
968 (mp_irqs[i].mp_srcbusirq == irq)) 961 (mp_irqs[i].srcbusirq == irq))
969 962
970 return mp_irqs[i].mp_dstirq; 963 return mp_irqs[i].dstirq;
971 } 964 }
972 return -1; 965 return -1;
973} 966}
@@ -977,17 +970,17 @@ static int __init find_isa_irq_apic(int irq, int type)
977 int i; 970 int i;
978 971
979 for (i = 0; i < mp_irq_entries; i++) { 972 for (i = 0; i < mp_irq_entries; i++) {
980 int lbus = mp_irqs[i].mp_srcbus; 973 int lbus = mp_irqs[i].srcbus;
981 974
982 if (test_bit(lbus, mp_bus_not_pci) && 975 if (test_bit(lbus, mp_bus_not_pci) &&
983 (mp_irqs[i].mp_irqtype == type) && 976 (mp_irqs[i].irqtype == type) &&
984 (mp_irqs[i].mp_srcbusirq == irq)) 977 (mp_irqs[i].srcbusirq == irq))
985 break; 978 break;
986 } 979 }
987 if (i < mp_irq_entries) { 980 if (i < mp_irq_entries) {
988 int apic; 981 int apic;
989 for(apic = 0; apic < nr_ioapics; apic++) { 982 for(apic = 0; apic < nr_ioapics; apic++) {
990 if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) 983 if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic)
991 return apic; 984 return apic;
992 } 985 }
993 } 986 }
@@ -1012,23 +1005,23 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
1012 return -1; 1005 return -1;
1013 } 1006 }
1014 for (i = 0; i < mp_irq_entries; i++) { 1007 for (i = 0; i < mp_irq_entries; i++) {
1015 int lbus = mp_irqs[i].mp_srcbus; 1008 int lbus = mp_irqs[i].srcbus;
1016 1009
1017 for (apic = 0; apic < nr_ioapics; apic++) 1010 for (apic = 0; apic < nr_ioapics; apic++)
1018 if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || 1011 if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
1019 mp_irqs[i].mp_dstapic == MP_APIC_ALL) 1012 mp_irqs[i].dstapic == MP_APIC_ALL)
1020 break; 1013 break;
1021 1014
1022 if (!test_bit(lbus, mp_bus_not_pci) && 1015 if (!test_bit(lbus, mp_bus_not_pci) &&
1023 !mp_irqs[i].mp_irqtype && 1016 !mp_irqs[i].irqtype &&
1024 (bus == lbus) && 1017 (bus == lbus) &&
1025 (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { 1018 (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
1026 int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); 1019 int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
1027 1020
1028 if (!(apic || IO_APIC_IRQ(irq))) 1021 if (!(apic || IO_APIC_IRQ(irq)))
1029 continue; 1022 continue;
1030 1023
1031 if (pin == (mp_irqs[i].mp_srcbusirq & 3)) 1024 if (pin == (mp_irqs[i].srcbusirq & 3))
1032 return irq; 1025 return irq;
1033 /* 1026 /*
1034 * Use the first all-but-pin matching entry as a 1027 * Use the first all-but-pin matching entry as a
@@ -1071,7 +1064,7 @@ static int EISA_ELCR(unsigned int irq)
1071 * EISA conforming in the MP table, that means its trigger type must 1064 * EISA conforming in the MP table, that means its trigger type must
1072 * be read in from the ELCR */ 1065 * be read in from the ELCR */
1073 1066
1074#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) 1067#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq))
1075#define default_EISA_polarity(idx) default_ISA_polarity(idx) 1068#define default_EISA_polarity(idx) default_ISA_polarity(idx)
1076 1069
1077/* PCI interrupts are always polarity one level triggered, 1070/* PCI interrupts are always polarity one level triggered,
@@ -1088,13 +1081,13 @@ static int EISA_ELCR(unsigned int irq)
1088 1081
1089static int MPBIOS_polarity(int idx) 1082static int MPBIOS_polarity(int idx)
1090{ 1083{
1091 int bus = mp_irqs[idx].mp_srcbus; 1084 int bus = mp_irqs[idx].srcbus;
1092 int polarity; 1085 int polarity;
1093 1086
1094 /* 1087 /*
1095 * Determine IRQ line polarity (high active or low active): 1088 * Determine IRQ line polarity (high active or low active):
1096 */ 1089 */
1097 switch (mp_irqs[idx].mp_irqflag & 3) 1090 switch (mp_irqs[idx].irqflag & 3)
1098 { 1091 {
1099 case 0: /* conforms, ie. bus-type dependent polarity */ 1092 case 0: /* conforms, ie. bus-type dependent polarity */
1100 if (test_bit(bus, mp_bus_not_pci)) 1093 if (test_bit(bus, mp_bus_not_pci))
@@ -1130,13 +1123,13 @@ static int MPBIOS_polarity(int idx)
1130 1123
1131static int MPBIOS_trigger(int idx) 1124static int MPBIOS_trigger(int idx)
1132{ 1125{
1133 int bus = mp_irqs[idx].mp_srcbus; 1126 int bus = mp_irqs[idx].srcbus;
1134 int trigger; 1127 int trigger;
1135 1128
1136 /* 1129 /*
1137 * Determine IRQ trigger mode (edge or level sensitive): 1130 * Determine IRQ trigger mode (edge or level sensitive):
1138 */ 1131 */
1139 switch ((mp_irqs[idx].mp_irqflag>>2) & 3) 1132 switch ((mp_irqs[idx].irqflag>>2) & 3)
1140 { 1133 {
1141 case 0: /* conforms, ie. bus-type dependent */ 1134 case 0: /* conforms, ie. bus-type dependent */
1142 if (test_bit(bus, mp_bus_not_pci)) 1135 if (test_bit(bus, mp_bus_not_pci))
@@ -1214,16 +1207,16 @@ int (*ioapic_renumber_irq)(int ioapic, int irq);
1214static int pin_2_irq(int idx, int apic, int pin) 1207static int pin_2_irq(int idx, int apic, int pin)
1215{ 1208{
1216 int irq, i; 1209 int irq, i;
1217 int bus = mp_irqs[idx].mp_srcbus; 1210 int bus = mp_irqs[idx].srcbus;
1218 1211
1219 /* 1212 /*
1220 * Debugging check, we are in big trouble if this message pops up! 1213 * Debugging check, we are in big trouble if this message pops up!
1221 */ 1214 */
1222 if (mp_irqs[idx].mp_dstirq != pin) 1215 if (mp_irqs[idx].dstirq != pin)
1223 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); 1216 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
1224 1217
1225 if (test_bit(bus, mp_bus_not_pci)) { 1218 if (test_bit(bus, mp_bus_not_pci)) {
1226 irq = mp_irqs[idx].mp_srcbusirq; 1219 irq = mp_irqs[idx].srcbusirq;
1227 } else { 1220 } else {
1228 /* 1221 /*
1229 * PCI IRQs are mapped in order 1222 * PCI IRQs are mapped in order
@@ -1315,7 +1308,7 @@ __assign_irq_vector(int irq, struct irq_cfg *cfg, const struct cpumask *mask)
1315 int new_cpu; 1308 int new_cpu;
1316 int vector, offset; 1309 int vector, offset;
1317 1310
1318 vector_allocation_domain(cpu, tmp_mask); 1311 apic->vector_allocation_domain(cpu, tmp_mask);
1319 1312
1320 vector = current_vector; 1313 vector = current_vector;
1321 offset = current_offset; 1314 offset = current_offset;
@@ -1485,10 +1478,10 @@ static void ioapic_register_intr(int irq, struct irq_desc *desc, unsigned long t
1485 handle_edge_irq, "edge"); 1478 handle_edge_irq, "edge");
1486} 1479}
1487 1480
1488static int setup_ioapic_entry(int apic, int irq, 1481int setup_ioapic_entry(int apic_id, int irq,
1489 struct IO_APIC_route_entry *entry, 1482 struct IO_APIC_route_entry *entry,
1490 unsigned int destination, int trigger, 1483 unsigned int destination, int trigger,
1491 int polarity, int vector) 1484 int polarity, int vector)
1492{ 1485{
1493 /* 1486 /*
1494 * add it to the IO-APIC irq-routing table: 1487 * add it to the IO-APIC irq-routing table:
@@ -1497,25 +1490,25 @@ static int setup_ioapic_entry(int apic, int irq,
1497 1490
1498#ifdef CONFIG_INTR_REMAP 1491#ifdef CONFIG_INTR_REMAP
1499 if (intr_remapping_enabled) { 1492 if (intr_remapping_enabled) {
1500 struct intel_iommu *iommu = map_ioapic_to_ir(apic); 1493 struct intel_iommu *iommu = map_ioapic_to_ir(apic_id);
1501 struct irte irte; 1494 struct irte irte;
1502 struct IR_IO_APIC_route_entry *ir_entry = 1495 struct IR_IO_APIC_route_entry *ir_entry =
1503 (struct IR_IO_APIC_route_entry *) entry; 1496 (struct IR_IO_APIC_route_entry *) entry;
1504 int index; 1497 int index;
1505 1498
1506 if (!iommu) 1499 if (!iommu)
1507 panic("No mapping iommu for ioapic %d\n", apic); 1500 panic("No mapping iommu for ioapic %d\n", apic_id);
1508 1501
1509 index = alloc_irte(iommu, irq, 1); 1502 index = alloc_irte(iommu, irq, 1);
1510 if (index < 0) 1503 if (index < 0)
1511 panic("Failed to allocate IRTE for ioapic %d\n", apic); 1504 panic("Failed to allocate IRTE for ioapic %d\n", apic_id);
1512 1505
1513 memset(&irte, 0, sizeof(irte)); 1506 memset(&irte, 0, sizeof(irte));
1514 1507
1515 irte.present = 1; 1508 irte.present = 1;
1516 irte.dst_mode = INT_DEST_MODE; 1509 irte.dst_mode = apic->irq_dest_mode;
1517 irte.trigger_mode = trigger; 1510 irte.trigger_mode = trigger;
1518 irte.dlvry_mode = INT_DELIVERY_MODE; 1511 irte.dlvry_mode = apic->irq_delivery_mode;
1519 irte.vector = vector; 1512 irte.vector = vector;
1520 irte.dest_id = IRTE_DEST(destination); 1513 irte.dest_id = IRTE_DEST(destination);
1521 1514
@@ -1528,8 +1521,8 @@ static int setup_ioapic_entry(int apic, int irq,
1528 } else 1521 } else
1529#endif 1522#endif
1530 { 1523 {
1531 entry->delivery_mode = INT_DELIVERY_MODE; 1524 entry->delivery_mode = apic->irq_delivery_mode;
1532 entry->dest_mode = INT_DEST_MODE; 1525 entry->dest_mode = apic->irq_dest_mode;
1533 entry->dest = destination; 1526 entry->dest = destination;
1534 } 1527 }
1535 1528
@@ -1546,7 +1539,7 @@ static int setup_ioapic_entry(int apic, int irq,
1546 return 0; 1539 return 0;
1547} 1540}
1548 1541
1549static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_desc *desc, 1542static void setup_IO_APIC_irq(int apic_id, int pin, unsigned int irq, struct irq_desc *desc,
1550 int trigger, int polarity) 1543 int trigger, int polarity)
1551{ 1544{
1552 struct irq_cfg *cfg; 1545 struct irq_cfg *cfg;
@@ -1558,22 +1551,22 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
1558 1551
1559 cfg = desc->chip_data; 1552 cfg = desc->chip_data;
1560 1553
1561 if (assign_irq_vector(irq, cfg, TARGET_CPUS)) 1554 if (assign_irq_vector(irq, cfg, apic->target_cpus()))
1562 return; 1555 return;
1563 1556
1564 dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); 1557 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
1565 1558
1566 apic_printk(APIC_VERBOSE,KERN_DEBUG 1559 apic_printk(APIC_VERBOSE,KERN_DEBUG
1567 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " 1560 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
1568 "IRQ %d Mode:%i Active:%i)\n", 1561 "IRQ %d Mode:%i Active:%i)\n",
1569 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, 1562 apic_id, mp_ioapics[apic_id].apicid, pin, cfg->vector,
1570 irq, trigger, polarity); 1563 irq, trigger, polarity);
1571 1564
1572 1565
1573 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, 1566 if (setup_ioapic_entry(mp_ioapics[apic_id].apicid, irq, &entry,
1574 dest, trigger, polarity, cfg->vector)) { 1567 dest, trigger, polarity, cfg->vector)) {
1575 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", 1568 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
1576 mp_ioapics[apic].mp_apicid, pin); 1569 mp_ioapics[apic_id].apicid, pin);
1577 __clear_irq_vector(irq, cfg); 1570 __clear_irq_vector(irq, cfg);
1578 return; 1571 return;
1579 } 1572 }
@@ -1582,12 +1575,12 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
1582 if (irq < NR_IRQS_LEGACY) 1575 if (irq < NR_IRQS_LEGACY)
1583 disable_8259A_irq(irq); 1576 disable_8259A_irq(irq);
1584 1577
1585 ioapic_write_entry(apic, pin, entry); 1578 ioapic_write_entry(apic_id, pin, entry);
1586} 1579}
1587 1580
1588static void __init setup_IO_APIC_irqs(void) 1581static void __init setup_IO_APIC_irqs(void)
1589{ 1582{
1590 int apic, pin, idx, irq; 1583 int apic_id, pin, idx, irq;
1591 int notcon = 0; 1584 int notcon = 0;
1592 struct irq_desc *desc; 1585 struct irq_desc *desc;
1593 struct irq_cfg *cfg; 1586 struct irq_cfg *cfg;
@@ -1595,21 +1588,19 @@ static void __init setup_IO_APIC_irqs(void)
1595 1588
1596 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n"); 1589 apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
1597 1590
1598 for (apic = 0; apic < nr_ioapics; apic++) { 1591 for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
1599 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { 1592 for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
1600 1593
1601 idx = find_irq_entry(apic, pin, mp_INT); 1594 idx = find_irq_entry(apic_id, pin, mp_INT);
1602 if (idx == -1) { 1595 if (idx == -1) {
1603 if (!notcon) { 1596 if (!notcon) {
1604 notcon = 1; 1597 notcon = 1;
1605 apic_printk(APIC_VERBOSE, 1598 apic_printk(APIC_VERBOSE,
1606 KERN_DEBUG " %d-%d", 1599 KERN_DEBUG " %d-%d",
1607 mp_ioapics[apic].mp_apicid, 1600 mp_ioapics[apic_id].apicid, pin);
1608 pin);
1609 } else 1601 } else
1610 apic_printk(APIC_VERBOSE, " %d-%d", 1602 apic_printk(APIC_VERBOSE, " %d-%d",
1611 mp_ioapics[apic].mp_apicid, 1603 mp_ioapics[apic_id].apicid, pin);
1612 pin);
1613 continue; 1604 continue;
1614 } 1605 }
1615 if (notcon) { 1606 if (notcon) {
@@ -1618,20 +1609,25 @@ static void __init setup_IO_APIC_irqs(void)
1618 notcon = 0; 1609 notcon = 0;
1619 } 1610 }
1620 1611
1621 irq = pin_2_irq(idx, apic, pin); 1612 irq = pin_2_irq(idx, apic_id, pin);
1622#ifdef CONFIG_X86_32 1613
1623 if (multi_timer_check(apic, irq)) 1614 /*
1615 * Skip the timer IRQ if there's a quirk handler
1616 * installed and if it returns 1:
1617 */
1618 if (apic->multi_timer_check &&
1619 apic->multi_timer_check(apic_id, irq))
1624 continue; 1620 continue;
1625#endif 1621
1626 desc = irq_to_desc_alloc_cpu(irq, cpu); 1622 desc = irq_to_desc_alloc_cpu(irq, cpu);
1627 if (!desc) { 1623 if (!desc) {
1628 printk(KERN_INFO "can not get irq_desc for %d\n", irq); 1624 printk(KERN_INFO "can not get irq_desc for %d\n", irq);
1629 continue; 1625 continue;
1630 } 1626 }
1631 cfg = desc->chip_data; 1627 cfg = desc->chip_data;
1632 add_pin_to_irq_cpu(cfg, cpu, apic, pin); 1628 add_pin_to_irq_cpu(cfg, cpu, apic_id, pin);
1633 1629
1634 setup_IO_APIC_irq(apic, pin, irq, desc, 1630 setup_IO_APIC_irq(apic_id, pin, irq, desc,
1635 irq_trigger(idx), irq_polarity(idx)); 1631 irq_trigger(idx), irq_polarity(idx));
1636 } 1632 }
1637 } 1633 }
@@ -1644,7 +1640,7 @@ static void __init setup_IO_APIC_irqs(void)
1644/* 1640/*
1645 * Set up the timer pin, possibly with the 8259A-master behind. 1641 * Set up the timer pin, possibly with the 8259A-master behind.
1646 */ 1642 */
1647static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin, 1643static void __init setup_timer_IRQ0_pin(unsigned int apic_id, unsigned int pin,
1648 int vector) 1644 int vector)
1649{ 1645{
1650 struct IO_APIC_route_entry entry; 1646 struct IO_APIC_route_entry entry;
@@ -1660,10 +1656,10 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
1660 * We use logical delivery to get the timer IRQ 1656 * We use logical delivery to get the timer IRQ
1661 * to the first CPU. 1657 * to the first CPU.
1662 */ 1658 */
1663 entry.dest_mode = INT_DEST_MODE; 1659 entry.dest_mode = apic->irq_dest_mode;
1664 entry.mask = 1; /* mask IRQ now */ 1660 entry.mask = 0; /* don't mask IRQ for edge */
1665 entry.dest = cpu_mask_to_apicid(TARGET_CPUS); 1661 entry.dest = apic->cpu_mask_to_apicid(apic->target_cpus());
1666 entry.delivery_mode = INT_DELIVERY_MODE; 1662 entry.delivery_mode = apic->irq_delivery_mode;
1667 entry.polarity = 0; 1663 entry.polarity = 0;
1668 entry.trigger = 0; 1664 entry.trigger = 0;
1669 entry.vector = vector; 1665 entry.vector = vector;
@@ -1677,7 +1673,7 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
1677 /* 1673 /*
1678 * Add it to the IO-APIC irq-routing table: 1674 * Add it to the IO-APIC irq-routing table:
1679 */ 1675 */
1680 ioapic_write_entry(apic, pin, entry); 1676 ioapic_write_entry(apic_id, pin, entry);
1681} 1677}
1682 1678
1683 1679
@@ -1699,7 +1695,7 @@ __apicdebuginit(void) print_IO_APIC(void)
1699 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); 1695 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
1700 for (i = 0; i < nr_ioapics; i++) 1696 for (i = 0; i < nr_ioapics; i++)
1701 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", 1697 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
1702 mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); 1698 mp_ioapics[i].apicid, nr_ioapic_registers[i]);
1703 1699
1704 /* 1700 /*
1705 * We are a bit conservative about what we expect. We have to 1701 * We are a bit conservative about what we expect. We have to
@@ -1719,7 +1715,7 @@ __apicdebuginit(void) print_IO_APIC(void)
1719 spin_unlock_irqrestore(&ioapic_lock, flags); 1715 spin_unlock_irqrestore(&ioapic_lock, flags);
1720 1716
1721 printk("\n"); 1717 printk("\n");
1722 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); 1718 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid);
1723 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); 1719 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
1724 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); 1720 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
1725 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); 1721 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
@@ -2090,7 +2086,7 @@ static void __init setup_ioapic_ids_from_mpc(void)
2090{ 2086{
2091 union IO_APIC_reg_00 reg_00; 2087 union IO_APIC_reg_00 reg_00;
2092 physid_mask_t phys_id_present_map; 2088 physid_mask_t phys_id_present_map;
2093 int apic; 2089 int apic_id;
2094 int i; 2090 int i;
2095 unsigned char old_id; 2091 unsigned char old_id;
2096 unsigned long flags; 2092 unsigned long flags;
@@ -2109,26 +2105,26 @@ static void __init setup_ioapic_ids_from_mpc(void)
2109 * This is broken; anything with a real cpu count has to 2105 * This is broken; anything with a real cpu count has to
2110 * circumvent this idiocy regardless. 2106 * circumvent this idiocy regardless.
2111 */ 2107 */
2112 phys_id_present_map = ioapic_phys_id_map(phys_cpu_present_map); 2108 phys_id_present_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
2113 2109
2114 /* 2110 /*
2115 * Set the IOAPIC ID to the value stored in the MPC table. 2111 * Set the IOAPIC ID to the value stored in the MPC table.
2116 */ 2112 */
2117 for (apic = 0; apic < nr_ioapics; apic++) { 2113 for (apic_id = 0; apic_id < nr_ioapics; apic_id++) {
2118 2114
2119 /* Read the register 0 value */ 2115 /* Read the register 0 value */
2120 spin_lock_irqsave(&ioapic_lock, flags); 2116 spin_lock_irqsave(&ioapic_lock, flags);
2121 reg_00.raw = io_apic_read(apic, 0); 2117 reg_00.raw = io_apic_read(apic_id, 0);
2122 spin_unlock_irqrestore(&ioapic_lock, flags); 2118 spin_unlock_irqrestore(&ioapic_lock, flags);
2123 2119
2124 old_id = mp_ioapics[apic].mp_apicid; 2120 old_id = mp_ioapics[apic_id].apicid;
2125 2121
2126 if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { 2122 if (mp_ioapics[apic_id].apicid >= get_physical_broadcast()) {
2127 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", 2123 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
2128 apic, mp_ioapics[apic].mp_apicid); 2124 apic_id, mp_ioapics[apic_id].apicid);
2129 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", 2125 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
2130 reg_00.bits.ID); 2126 reg_00.bits.ID);
2131 mp_ioapics[apic].mp_apicid = reg_00.bits.ID; 2127 mp_ioapics[apic_id].apicid = reg_00.bits.ID;
2132 } 2128 }
2133 2129
2134 /* 2130 /*
@@ -2136,10 +2132,10 @@ static void __init setup_ioapic_ids_from_mpc(void)
2136 * system must have a unique ID or we get lots of nice 2132 * system must have a unique ID or we get lots of nice
2137 * 'stuck on smp_invalidate_needed IPI wait' messages. 2133 * 'stuck on smp_invalidate_needed IPI wait' messages.
2138 */ 2134 */
2139 if (check_apicid_used(phys_id_present_map, 2135 if (apic->check_apicid_used(phys_id_present_map,
2140 mp_ioapics[apic].mp_apicid)) { 2136 mp_ioapics[apic_id].apicid)) {
2141 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", 2137 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
2142 apic, mp_ioapics[apic].mp_apicid); 2138 apic_id, mp_ioapics[apic_id].apicid);
2143 for (i = 0; i < get_physical_broadcast(); i++) 2139 for (i = 0; i < get_physical_broadcast(); i++)
2144 if (!physid_isset(i, phys_id_present_map)) 2140 if (!physid_isset(i, phys_id_present_map))
2145 break; 2141 break;
@@ -2148,13 +2144,13 @@ static void __init setup_ioapic_ids_from_mpc(void)
2148 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", 2144 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
2149 i); 2145 i);
2150 physid_set(i, phys_id_present_map); 2146 physid_set(i, phys_id_present_map);
2151 mp_ioapics[apic].mp_apicid = i; 2147 mp_ioapics[apic_id].apicid = i;
2152 } else { 2148 } else {
2153 physid_mask_t tmp; 2149 physid_mask_t tmp;
2154 tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); 2150 tmp = apic->apicid_to_cpu_present(mp_ioapics[apic_id].apicid);
2155 apic_printk(APIC_VERBOSE, "Setting %d in the " 2151 apic_printk(APIC_VERBOSE, "Setting %d in the "
2156 "phys_id_present_map\n", 2152 "phys_id_present_map\n",
2157 mp_ioapics[apic].mp_apicid); 2153 mp_ioapics[apic_id].apicid);
2158 physids_or(phys_id_present_map, phys_id_present_map, tmp); 2154 physids_or(phys_id_present_map, phys_id_present_map, tmp);
2159 } 2155 }
2160 2156
@@ -2163,11 +2159,11 @@ static void __init setup_ioapic_ids_from_mpc(void)
2163 * We need to adjust the IRQ routing table 2159 * We need to adjust the IRQ routing table
2164 * if the ID changed. 2160 * if the ID changed.
2165 */ 2161 */
2166 if (old_id != mp_ioapics[apic].mp_apicid) 2162 if (old_id != mp_ioapics[apic_id].apicid)
2167 for (i = 0; i < mp_irq_entries; i++) 2163 for (i = 0; i < mp_irq_entries; i++)
2168 if (mp_irqs[i].mp_dstapic == old_id) 2164 if (mp_irqs[i].dstapic == old_id)
2169 mp_irqs[i].mp_dstapic 2165 mp_irqs[i].dstapic
2170 = mp_ioapics[apic].mp_apicid; 2166 = mp_ioapics[apic_id].apicid;
2171 2167
2172 /* 2168 /*
2173 * Read the right value from the MPC table and 2169 * Read the right value from the MPC table and
@@ -2175,20 +2171,20 @@ static void __init setup_ioapic_ids_from_mpc(void)
2175 */ 2171 */
2176 apic_printk(APIC_VERBOSE, KERN_INFO 2172 apic_printk(APIC_VERBOSE, KERN_INFO
2177 "...changing IO-APIC physical APIC ID to %d ...", 2173 "...changing IO-APIC physical APIC ID to %d ...",
2178 mp_ioapics[apic].mp_apicid); 2174 mp_ioapics[apic_id].apicid);
2179 2175
2180 reg_00.bits.ID = mp_ioapics[apic].mp_apicid; 2176 reg_00.bits.ID = mp_ioapics[apic_id].apicid;
2181 spin_lock_irqsave(&ioapic_lock, flags); 2177 spin_lock_irqsave(&ioapic_lock, flags);
2182 io_apic_write(apic, 0, reg_00.raw); 2178 io_apic_write(apic_id, 0, reg_00.raw);
2183 spin_unlock_irqrestore(&ioapic_lock, flags); 2179 spin_unlock_irqrestore(&ioapic_lock, flags);
2184 2180
2185 /* 2181 /*
2186 * Sanity check 2182 * Sanity check
2187 */ 2183 */
2188 spin_lock_irqsave(&ioapic_lock, flags); 2184 spin_lock_irqsave(&ioapic_lock, flags);
2189 reg_00.raw = io_apic_read(apic, 0); 2185 reg_00.raw = io_apic_read(apic_id, 0);
2190 spin_unlock_irqrestore(&ioapic_lock, flags); 2186 spin_unlock_irqrestore(&ioapic_lock, flags);
2191 if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) 2187 if (reg_00.bits.ID != mp_ioapics[apic_id].apicid)
2192 printk("could not set ID!\n"); 2188 printk("could not set ID!\n");
2193 else 2189 else
2194 apic_printk(APIC_VERBOSE, " ok.\n"); 2190 apic_printk(APIC_VERBOSE, " ok.\n");
@@ -2291,7 +2287,7 @@ static int ioapic_retrigger_irq(unsigned int irq)
2291 unsigned long flags; 2287 unsigned long flags;
2292 2288
2293 spin_lock_irqsave(&vector_lock, flags); 2289 spin_lock_irqsave(&vector_lock, flags);
2294 send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector); 2290 apic->send_IPI_mask(cpumask_of(cpumask_first(cfg->domain)), cfg->vector);
2295 spin_unlock_irqrestore(&vector_lock, flags); 2291 spin_unlock_irqrestore(&vector_lock, flags);
2296 2292
2297 return 1; 2293 return 1;
@@ -2299,7 +2295,7 @@ static int ioapic_retrigger_irq(unsigned int irq)
2299#else 2295#else
2300static int ioapic_retrigger_irq(unsigned int irq) 2296static int ioapic_retrigger_irq(unsigned int irq)
2301{ 2297{
2302 send_IPI_self(irq_cfg(irq)->vector); 2298 apic->send_IPI_self(irq_cfg(irq)->vector);
2303 2299
2304 return 1; 2300 return 1;
2305} 2301}
@@ -2363,7 +2359,7 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2363 2359
2364 set_extra_move_desc(desc, mask); 2360 set_extra_move_desc(desc, mask);
2365 2361
2366 dest = cpu_mask_to_apicid_and(cfg->domain, mask); 2362 dest = apic->cpu_mask_to_apicid_and(cfg->domain, mask);
2367 2363
2368 modify_ioapic_rte = desc->status & IRQ_LEVEL; 2364 modify_ioapic_rte = desc->status & IRQ_LEVEL;
2369 if (modify_ioapic_rte) { 2365 if (modify_ioapic_rte) {
@@ -2383,7 +2379,7 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2383 if (cfg->move_in_progress) 2379 if (cfg->move_in_progress)
2384 send_cleanup_vector(cfg); 2380 send_cleanup_vector(cfg);
2385 2381
2386 cpumask_copy(&desc->affinity, mask); 2382 cpumask_copy(desc->affinity, mask);
2387} 2383}
2388 2384
2389static int migrate_irq_remapped_level_desc(struct irq_desc *desc) 2385static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
@@ -2405,11 +2401,11 @@ static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
2405 } 2401 }
2406 2402
2407 /* everthing is clear. we have right of way */ 2403 /* everthing is clear. we have right of way */
2408 migrate_ioapic_irq_desc(desc, &desc->pending_mask); 2404 migrate_ioapic_irq_desc(desc, desc->pending_mask);
2409 2405
2410 ret = 0; 2406 ret = 0;
2411 desc->status &= ~IRQ_MOVE_PENDING; 2407 desc->status &= ~IRQ_MOVE_PENDING;
2412 cpumask_clear(&desc->pending_mask); 2408 cpumask_clear(desc->pending_mask);
2413 2409
2414unmask: 2410unmask:
2415 unmask_IO_APIC_irq_desc(desc); 2411 unmask_IO_APIC_irq_desc(desc);
@@ -2434,7 +2430,7 @@ static void ir_irq_migration(struct work_struct *work)
2434 continue; 2430 continue;
2435 } 2431 }
2436 2432
2437 desc->chip->set_affinity(irq, &desc->pending_mask); 2433 desc->chip->set_affinity(irq, desc->pending_mask);
2438 spin_unlock_irqrestore(&desc->lock, flags); 2434 spin_unlock_irqrestore(&desc->lock, flags);
2439 } 2435 }
2440 } 2436 }
@@ -2448,7 +2444,7 @@ static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
2448{ 2444{
2449 if (desc->status & IRQ_LEVEL) { 2445 if (desc->status & IRQ_LEVEL) {
2450 desc->status |= IRQ_MOVE_PENDING; 2446 desc->status |= IRQ_MOVE_PENDING;
2451 cpumask_copy(&desc->pending_mask, mask); 2447 cpumask_copy(desc->pending_mask, mask);
2452 migrate_irq_remapped_level_desc(desc); 2448 migrate_irq_remapped_level_desc(desc);
2453 return; 2449 return;
2454 } 2450 }
@@ -2516,7 +2512,7 @@ static void irq_complete_move(struct irq_desc **descp)
2516 2512
2517 /* domain has not changed, but affinity did */ 2513 /* domain has not changed, but affinity did */
2518 me = smp_processor_id(); 2514 me = smp_processor_id();
2519 if (cpu_isset(me, desc->affinity)) { 2515 if (cpumask_test_cpu(me, desc->affinity)) {
2520 *descp = desc = move_irq_desc(desc, me); 2516 *descp = desc = move_irq_desc(desc, me);
2521 /* get the new one */ 2517 /* get the new one */
2522 cfg = desc->chip_data; 2518 cfg = desc->chip_data;
@@ -2867,19 +2863,15 @@ static inline void __init check_timer(void)
2867 int cpu = boot_cpu_id; 2863 int cpu = boot_cpu_id;
2868 int apic1, pin1, apic2, pin2; 2864 int apic1, pin1, apic2, pin2;
2869 unsigned long flags; 2865 unsigned long flags;
2870 unsigned int ver;
2871 int no_pin1 = 0; 2866 int no_pin1 = 0;
2872 2867
2873 local_irq_save(flags); 2868 local_irq_save(flags);
2874 2869
2875 ver = apic_read(APIC_LVR);
2876 ver = GET_APIC_VERSION(ver);
2877
2878 /* 2870 /*
2879 * get/set the timer IRQ vector: 2871 * get/set the timer IRQ vector:
2880 */ 2872 */
2881 disable_8259A_irq(0); 2873 disable_8259A_irq(0);
2882 assign_irq_vector(0, cfg, TARGET_CPUS); 2874 assign_irq_vector(0, cfg, apic->target_cpus());
2883 2875
2884 /* 2876 /*
2885 * As IRQ0 is to be enabled in the 8259A, the virtual 2877 * As IRQ0 is to be enabled in the 8259A, the virtual
@@ -2893,7 +2885,13 @@ static inline void __init check_timer(void)
2893 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT); 2885 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_EXTINT);
2894 init_8259A(1); 2886 init_8259A(1);
2895#ifdef CONFIG_X86_32 2887#ifdef CONFIG_X86_32
2896 timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver)); 2888 {
2889 unsigned int ver;
2890
2891 ver = apic_read(APIC_LVR);
2892 ver = GET_APIC_VERSION(ver);
2893 timer_ack = (nmi_watchdog == NMI_IO_APIC && !APIC_INTEGRATED(ver));
2894 }
2897#endif 2895#endif
2898 2896
2899 pin1 = find_isa_irq_pin(0, mp_INT); 2897 pin1 = find_isa_irq_pin(0, mp_INT);
@@ -2932,8 +2930,17 @@ static inline void __init check_timer(void)
2932 if (no_pin1) { 2930 if (no_pin1) {
2933 add_pin_to_irq_cpu(cfg, cpu, apic1, pin1); 2931 add_pin_to_irq_cpu(cfg, cpu, apic1, pin1);
2934 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector); 2932 setup_timer_IRQ0_pin(apic1, pin1, cfg->vector);
2933 } else {
2934 /* for edge trigger, setup_IO_APIC_irq already
2935 * leave it unmasked.
2936 * so only need to unmask if it is level-trigger
2937 * do we really have level trigger timer?
2938 */
2939 int idx;
2940 idx = find_irq_entry(apic1, pin1, mp_INT);
2941 if (idx != -1 && irq_trigger(idx))
2942 unmask_IO_APIC_irq_desc(desc);
2935 } 2943 }
2936 unmask_IO_APIC_irq_desc(desc);
2937 if (timer_irq_works()) { 2944 if (timer_irq_works()) {
2938 if (nmi_watchdog == NMI_IO_APIC) { 2945 if (nmi_watchdog == NMI_IO_APIC) {
2939 setup_nmi(); 2946 setup_nmi();
@@ -2947,6 +2954,7 @@ static inline void __init check_timer(void)
2947 if (intr_remapping_enabled) 2954 if (intr_remapping_enabled)
2948 panic("timer doesn't work through Interrupt-remapped IO-APIC"); 2955 panic("timer doesn't work through Interrupt-remapped IO-APIC");
2949#endif 2956#endif
2957 local_irq_disable();
2950 clear_IO_APIC_pin(apic1, pin1); 2958 clear_IO_APIC_pin(apic1, pin1);
2951 if (!no_pin1) 2959 if (!no_pin1)
2952 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " 2960 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -2961,7 +2969,6 @@ static inline void __init check_timer(void)
2961 */ 2969 */
2962 replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2); 2970 replace_pin_at_irq_cpu(cfg, cpu, apic1, pin1, apic2, pin2);
2963 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector); 2971 setup_timer_IRQ0_pin(apic2, pin2, cfg->vector);
2964 unmask_IO_APIC_irq_desc(desc);
2965 enable_8259A_irq(0); 2972 enable_8259A_irq(0);
2966 if (timer_irq_works()) { 2973 if (timer_irq_works()) {
2967 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n"); 2974 apic_printk(APIC_QUIET, KERN_INFO "....... works.\n");
@@ -2976,6 +2983,7 @@ static inline void __init check_timer(void)
2976 /* 2983 /*
2977 * Cleanup, just in case ... 2984 * Cleanup, just in case ...
2978 */ 2985 */
2986 local_irq_disable();
2979 disable_8259A_irq(0); 2987 disable_8259A_irq(0);
2980 clear_IO_APIC_pin(apic2, pin2); 2988 clear_IO_APIC_pin(apic2, pin2);
2981 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n"); 2989 apic_printk(APIC_QUIET, KERN_INFO "....... failed.\n");
@@ -3001,6 +3009,7 @@ static inline void __init check_timer(void)
3001 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); 3009 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
3002 goto out; 3010 goto out;
3003 } 3011 }
3012 local_irq_disable();
3004 disable_8259A_irq(0); 3013 disable_8259A_irq(0);
3005 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector); 3014 apic_write(APIC_LVT0, APIC_LVT_MASKED | APIC_DM_FIXED | cfg->vector);
3006 apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n"); 3015 apic_printk(APIC_QUIET, KERN_INFO "..... failed.\n");
@@ -3018,6 +3027,7 @@ static inline void __init check_timer(void)
3018 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n"); 3027 apic_printk(APIC_QUIET, KERN_INFO "..... works.\n");
3019 goto out; 3028 goto out;
3020 } 3029 }
3030 local_irq_disable();
3021 apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n"); 3031 apic_printk(APIC_QUIET, KERN_INFO "..... failed :(.\n");
3022 panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a " 3032 panic("IO-APIC + timer doesn't work! Boot with apic=debug and send a "
3023 "report. Then try booting with the 'noapic' option.\n"); 3033 "report. Then try booting with the 'noapic' option.\n");
@@ -3118,8 +3128,8 @@ static int ioapic_resume(struct sys_device *dev)
3118 3128
3119 spin_lock_irqsave(&ioapic_lock, flags); 3129 spin_lock_irqsave(&ioapic_lock, flags);
3120 reg_00.raw = io_apic_read(dev->id, 0); 3130 reg_00.raw = io_apic_read(dev->id, 0);
3121 if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { 3131 if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) {
3122 reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; 3132 reg_00.bits.ID = mp_ioapics[dev->id].apicid;
3123 io_apic_write(dev->id, 0, reg_00.raw); 3133 io_apic_write(dev->id, 0, reg_00.raw);
3124 } 3134 }
3125 spin_unlock_irqrestore(&ioapic_lock, flags); 3135 spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -3169,6 +3179,7 @@ static int __init ioapic_init_sysfs(void)
3169 3179
3170device_initcall(ioapic_init_sysfs); 3180device_initcall(ioapic_init_sysfs);
3171 3181
3182static int nr_irqs_gsi = NR_IRQS_LEGACY;
3172/* 3183/*
3173 * Dynamic irq allocate and deallocation 3184 * Dynamic irq allocate and deallocation
3174 */ 3185 */
@@ -3183,11 +3194,11 @@ unsigned int create_irq_nr(unsigned int irq_want)
3183 struct irq_desc *desc_new = NULL; 3194 struct irq_desc *desc_new = NULL;
3184 3195
3185 irq = 0; 3196 irq = 0;
3186 spin_lock_irqsave(&vector_lock, flags); 3197 if (irq_want < nr_irqs_gsi)
3187 for (new = irq_want; new < NR_IRQS; new++) { 3198 irq_want = nr_irqs_gsi;
3188 if (platform_legacy_irq(new))
3189 continue;
3190 3199
3200 spin_lock_irqsave(&vector_lock, flags);
3201 for (new = irq_want; new < nr_irqs; new++) {
3191 desc_new = irq_to_desc_alloc_cpu(new, cpu); 3202 desc_new = irq_to_desc_alloc_cpu(new, cpu);
3192 if (!desc_new) { 3203 if (!desc_new) {
3193 printk(KERN_INFO "can not get irq_desc for %d\n", new); 3204 printk(KERN_INFO "can not get irq_desc for %d\n", new);
@@ -3197,7 +3208,7 @@ unsigned int create_irq_nr(unsigned int irq_want)
3197 3208
3198 if (cfg_new->vector != 0) 3209 if (cfg_new->vector != 0)
3199 continue; 3210 continue;
3200 if (__assign_irq_vector(new, cfg_new, TARGET_CPUS) == 0) 3211 if (__assign_irq_vector(new, cfg_new, apic->target_cpus()) == 0)
3201 irq = new; 3212 irq = new;
3202 break; 3213 break;
3203 } 3214 }
@@ -3212,7 +3223,6 @@ unsigned int create_irq_nr(unsigned int irq_want)
3212 return irq; 3223 return irq;
3213} 3224}
3214 3225
3215static int nr_irqs_gsi = NR_IRQS_LEGACY;
3216int create_irq(void) 3226int create_irq(void)
3217{ 3227{
3218 unsigned int irq_want; 3228 unsigned int irq_want;
@@ -3259,12 +3269,15 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3259 int err; 3269 int err;
3260 unsigned dest; 3270 unsigned dest;
3261 3271
3272 if (disable_apic)
3273 return -ENXIO;
3274
3262 cfg = irq_cfg(irq); 3275 cfg = irq_cfg(irq);
3263 err = assign_irq_vector(irq, cfg, TARGET_CPUS); 3276 err = assign_irq_vector(irq, cfg, apic->target_cpus());
3264 if (err) 3277 if (err)
3265 return err; 3278 return err;
3266 3279
3267 dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); 3280 dest = apic->cpu_mask_to_apicid_and(cfg->domain, apic->target_cpus());
3268 3281
3269#ifdef CONFIG_INTR_REMAP 3282#ifdef CONFIG_INTR_REMAP
3270 if (irq_remapped(irq)) { 3283 if (irq_remapped(irq)) {
@@ -3278,9 +3291,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3278 memset (&irte, 0, sizeof(irte)); 3291 memset (&irte, 0, sizeof(irte));
3279 3292
3280 irte.present = 1; 3293 irte.present = 1;
3281 irte.dst_mode = INT_DEST_MODE; 3294 irte.dst_mode = apic->irq_dest_mode;
3282 irte.trigger_mode = 0; /* edge */ 3295 irte.trigger_mode = 0; /* edge */
3283 irte.dlvry_mode = INT_DELIVERY_MODE; 3296 irte.dlvry_mode = apic->irq_delivery_mode;
3284 irte.vector = cfg->vector; 3297 irte.vector = cfg->vector;
3285 irte.dest_id = IRTE_DEST(dest); 3298 irte.dest_id = IRTE_DEST(dest);
3286 3299
@@ -3298,10 +3311,10 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3298 msg->address_hi = MSI_ADDR_BASE_HI; 3311 msg->address_hi = MSI_ADDR_BASE_HI;
3299 msg->address_lo = 3312 msg->address_lo =
3300 MSI_ADDR_BASE_LO | 3313 MSI_ADDR_BASE_LO |
3301 ((INT_DEST_MODE == 0) ? 3314 ((apic->irq_dest_mode == 0) ?
3302 MSI_ADDR_DEST_MODE_PHYSICAL: 3315 MSI_ADDR_DEST_MODE_PHYSICAL:
3303 MSI_ADDR_DEST_MODE_LOGICAL) | 3316 MSI_ADDR_DEST_MODE_LOGICAL) |
3304 ((INT_DELIVERY_MODE != dest_LowestPrio) ? 3317 ((apic->irq_delivery_mode != dest_LowestPrio) ?
3305 MSI_ADDR_REDIRECTION_CPU: 3318 MSI_ADDR_REDIRECTION_CPU:
3306 MSI_ADDR_REDIRECTION_LOWPRI) | 3319 MSI_ADDR_REDIRECTION_LOWPRI) |
3307 MSI_ADDR_DEST_ID(dest); 3320 MSI_ADDR_DEST_ID(dest);
@@ -3309,7 +3322,7 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3309 msg->data = 3322 msg->data =
3310 MSI_DATA_TRIGGER_EDGE | 3323 MSI_DATA_TRIGGER_EDGE |
3311 MSI_DATA_LEVEL_ASSERT | 3324 MSI_DATA_LEVEL_ASSERT |
3312 ((INT_DELIVERY_MODE != dest_LowestPrio) ? 3325 ((apic->irq_delivery_mode != dest_LowestPrio) ?
3313 MSI_DATA_DELIVERY_FIXED: 3326 MSI_DATA_DELIVERY_FIXED:
3314 MSI_DATA_DELIVERY_LOWPRI) | 3327 MSI_DATA_DELIVERY_LOWPRI) |
3315 MSI_DATA_VECTOR(cfg->vector); 3328 MSI_DATA_VECTOR(cfg->vector);
@@ -3464,40 +3477,6 @@ static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc, int irq)
3464 return 0; 3477 return 0;
3465} 3478}
3466 3479
3467int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc)
3468{
3469 unsigned int irq;
3470 int ret;
3471 unsigned int irq_want;
3472
3473 irq_want = nr_irqs_gsi;
3474 irq = create_irq_nr(irq_want);
3475 if (irq == 0)
3476 return -1;
3477
3478#ifdef CONFIG_INTR_REMAP
3479 if (!intr_remapping_enabled)
3480 goto no_ir;
3481
3482 ret = msi_alloc_irte(dev, irq, 1);
3483 if (ret < 0)
3484 goto error;
3485no_ir:
3486#endif
3487 ret = setup_msi_irq(dev, msidesc, irq);
3488 if (ret < 0) {
3489 destroy_irq(irq);
3490 return ret;
3491 }
3492 return 0;
3493
3494#ifdef CONFIG_INTR_REMAP
3495error:
3496 destroy_irq(irq);
3497 return ret;
3498#endif
3499}
3500
3501int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) 3480int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3502{ 3481{
3503 unsigned int irq; 3482 unsigned int irq;
@@ -3514,9 +3493,9 @@ int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
3514 sub_handle = 0; 3493 sub_handle = 0;
3515 list_for_each_entry(msidesc, &dev->msi_list, list) { 3494 list_for_each_entry(msidesc, &dev->msi_list, list) {
3516 irq = create_irq_nr(irq_want); 3495 irq = create_irq_nr(irq_want);
3517 irq_want++;
3518 if (irq == 0) 3496 if (irq == 0)
3519 return -1; 3497 return -1;
3498 irq_want = irq + 1;
3520#ifdef CONFIG_INTR_REMAP 3499#ifdef CONFIG_INTR_REMAP
3521 if (!intr_remapping_enabled) 3500 if (!intr_remapping_enabled)
3522 goto no_ir; 3501 goto no_ir;
@@ -3727,13 +3706,17 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3727 struct irq_cfg *cfg; 3706 struct irq_cfg *cfg;
3728 int err; 3707 int err;
3729 3708
3709 if (disable_apic)
3710 return -ENXIO;
3711
3730 cfg = irq_cfg(irq); 3712 cfg = irq_cfg(irq);
3731 err = assign_irq_vector(irq, cfg, TARGET_CPUS); 3713 err = assign_irq_vector(irq, cfg, apic->target_cpus());
3732 if (!err) { 3714 if (!err) {
3733 struct ht_irq_msg msg; 3715 struct ht_irq_msg msg;
3734 unsigned dest; 3716 unsigned dest;
3735 3717
3736 dest = cpu_mask_to_apicid_and(cfg->domain, TARGET_CPUS); 3718 dest = apic->cpu_mask_to_apicid_and(cfg->domain,
3719 apic->target_cpus());
3737 3720
3738 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest); 3721 msg.address_hi = HT_IRQ_HIGH_DEST_ID(dest);
3739 3722
@@ -3741,11 +3724,11 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3741 HT_IRQ_LOW_BASE | 3724 HT_IRQ_LOW_BASE |
3742 HT_IRQ_LOW_DEST_ID(dest) | 3725 HT_IRQ_LOW_DEST_ID(dest) |
3743 HT_IRQ_LOW_VECTOR(cfg->vector) | 3726 HT_IRQ_LOW_VECTOR(cfg->vector) |
3744 ((INT_DEST_MODE == 0) ? 3727 ((apic->irq_dest_mode == 0) ?
3745 HT_IRQ_LOW_DM_PHYSICAL : 3728 HT_IRQ_LOW_DM_PHYSICAL :
3746 HT_IRQ_LOW_DM_LOGICAL) | 3729 HT_IRQ_LOW_DM_LOGICAL) |
3747 HT_IRQ_LOW_RQEOI_EDGE | 3730 HT_IRQ_LOW_RQEOI_EDGE |
3748 ((INT_DELIVERY_MODE != dest_LowestPrio) ? 3731 ((apic->irq_delivery_mode != dest_LowestPrio) ?
3749 HT_IRQ_LOW_MT_FIXED : 3732 HT_IRQ_LOW_MT_FIXED :
3750 HT_IRQ_LOW_MT_ARBITRATED) | 3733 HT_IRQ_LOW_MT_ARBITRATED) |
3751 HT_IRQ_LOW_IRQ_MASKED; 3734 HT_IRQ_LOW_IRQ_MASKED;
@@ -3761,7 +3744,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3761} 3744}
3762#endif /* CONFIG_HT_IRQ */ 3745#endif /* CONFIG_HT_IRQ */
3763 3746
3764#ifdef CONFIG_X86_64 3747#ifdef CONFIG_X86_UV
3765/* 3748/*
3766 * Re-target the irq to the specified CPU and enable the specified MMR located 3749 * Re-target the irq to the specified CPU and enable the specified MMR located
3767 * on the specified blade to allow the sending of MSIs to the specified CPU. 3750 * on the specified blade to allow the sending of MSIs to the specified CPU.
@@ -3793,12 +3776,12 @@ int arch_enable_uv_irq(char *irq_name, unsigned int irq, int cpu, int mmr_blade,
3793 BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long)); 3776 BUG_ON(sizeof(struct uv_IO_APIC_route_entry) != sizeof(unsigned long));
3794 3777
3795 entry->vector = cfg->vector; 3778 entry->vector = cfg->vector;
3796 entry->delivery_mode = INT_DELIVERY_MODE; 3779 entry->delivery_mode = apic->irq_delivery_mode;
3797 entry->dest_mode = INT_DEST_MODE; 3780 entry->dest_mode = apic->irq_dest_mode;
3798 entry->polarity = 0; 3781 entry->polarity = 0;
3799 entry->trigger = 0; 3782 entry->trigger = 0;
3800 entry->mask = 0; 3783 entry->mask = 0;
3801 entry->dest = cpu_mask_to_apicid(eligible_cpu); 3784 entry->dest = apic->cpu_mask_to_apicid(eligible_cpu);
3802 3785
3803 mmr_pnode = uv_blade_to_pnode(mmr_blade); 3786 mmr_pnode = uv_blade_to_pnode(mmr_blade);
3804 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value); 3787 uv_write_global_mmr64(mmr_pnode, mmr_offset, mmr_value);
@@ -3841,16 +3824,48 @@ int __init io_apic_get_redir_entries (int ioapic)
3841 3824
3842void __init probe_nr_irqs_gsi(void) 3825void __init probe_nr_irqs_gsi(void)
3843{ 3826{
3844 int idx;
3845 int nr = 0; 3827 int nr = 0;
3846 3828
3847 for (idx = 0; idx < nr_ioapics; idx++) 3829 nr = acpi_probe_gsi();
3848 nr += io_apic_get_redir_entries(idx) + 1; 3830 if (nr > nr_irqs_gsi) {
3849
3850 if (nr > nr_irqs_gsi)
3851 nr_irqs_gsi = nr; 3831 nr_irqs_gsi = nr;
3832 } else {
3833 /* for acpi=off or acpi is not compiled in */
3834 int idx;
3835
3836 nr = 0;
3837 for (idx = 0; idx < nr_ioapics; idx++)
3838 nr += io_apic_get_redir_entries(idx) + 1;
3839
3840 if (nr > nr_irqs_gsi)
3841 nr_irqs_gsi = nr;
3842 }
3843
3844 printk(KERN_DEBUG "nr_irqs_gsi: %d\n", nr_irqs_gsi);
3852} 3845}
3853 3846
3847#ifdef CONFIG_SPARSE_IRQ
3848int __init arch_probe_nr_irqs(void)
3849{
3850 int nr;
3851
3852 if (nr_irqs > (NR_VECTORS * nr_cpu_ids))
3853 nr_irqs = NR_VECTORS * nr_cpu_ids;
3854
3855 nr = nr_irqs_gsi + 8 * nr_cpu_ids;
3856#if defined(CONFIG_PCI_MSI) || defined(CONFIG_HT_IRQ)
3857 /*
3858 * for MSI and HT dyn irq
3859 */
3860 nr += nr_irqs_gsi * 16;
3861#endif
3862 if (nr < nr_irqs)
3863 nr_irqs = nr;
3864
3865 return 0;
3866}
3867#endif
3868
3854/* -------------------------------------------------------------------------- 3869/* --------------------------------------------------------------------------
3855 ACPI-based IOAPIC Configuration 3870 ACPI-based IOAPIC Configuration
3856 -------------------------------------------------------------------------- */ 3871 -------------------------------------------------------------------------- */
@@ -3876,7 +3891,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
3876 */ 3891 */
3877 3892
3878 if (physids_empty(apic_id_map)) 3893 if (physids_empty(apic_id_map))
3879 apic_id_map = ioapic_phys_id_map(phys_cpu_present_map); 3894 apic_id_map = apic->ioapic_phys_id_map(phys_cpu_present_map);
3880 3895
3881 spin_lock_irqsave(&ioapic_lock, flags); 3896 spin_lock_irqsave(&ioapic_lock, flags);
3882 reg_00.raw = io_apic_read(ioapic, 0); 3897 reg_00.raw = io_apic_read(ioapic, 0);
@@ -3892,10 +3907,10 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
3892 * Every APIC in a system must have a unique ID or we get lots of nice 3907 * Every APIC in a system must have a unique ID or we get lots of nice
3893 * 'stuck on smp_invalidate_needed IPI wait' messages. 3908 * 'stuck on smp_invalidate_needed IPI wait' messages.
3894 */ 3909 */
3895 if (check_apicid_used(apic_id_map, apic_id)) { 3910 if (apic->check_apicid_used(apic_id_map, apic_id)) {
3896 3911
3897 for (i = 0; i < get_physical_broadcast(); i++) { 3912 for (i = 0; i < get_physical_broadcast(); i++) {
3898 if (!check_apicid_used(apic_id_map, i)) 3913 if (!apic->check_apicid_used(apic_id_map, i))
3899 break; 3914 break;
3900 } 3915 }
3901 3916
@@ -3908,7 +3923,7 @@ int __init io_apic_get_unique_id(int ioapic, int apic_id)
3908 apic_id = i; 3923 apic_id = i;
3909 } 3924 }
3910 3925
3911 tmp = apicid_to_cpu_present(apic_id); 3926 tmp = apic->apicid_to_cpu_present(apic_id);
3912 physids_or(apic_id_map, apic_id_map, tmp); 3927 physids_or(apic_id_map, apic_id_map, tmp);
3913 3928
3914 if (reg_00.bits.ID != apic_id) { 3929 if (reg_00.bits.ID != apic_id) {
@@ -3985,8 +4000,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
3985 return -1; 4000 return -1;
3986 4001
3987 for (i = 0; i < mp_irq_entries; i++) 4002 for (i = 0; i < mp_irq_entries; i++)
3988 if (mp_irqs[i].mp_irqtype == mp_INT && 4003 if (mp_irqs[i].irqtype == mp_INT &&
3989 mp_irqs[i].mp_srcbusirq == bus_irq) 4004 mp_irqs[i].srcbusirq == bus_irq)
3990 break; 4005 break;
3991 if (i >= mp_irq_entries) 4006 if (i >= mp_irq_entries)
3992 return -1; 4007 return -1;
@@ -4001,7 +4016,7 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
4001/* 4016/*
4002 * This function currently is only a helper for the i386 smp boot process where 4017 * This function currently is only a helper for the i386 smp boot process where
4003 * we need to reprogram the ioredtbls to cater for the cpus which have come online 4018 * we need to reprogram the ioredtbls to cater for the cpus which have come online
4004 * so mask in all cases should simply be TARGET_CPUS 4019 * so mask in all cases should simply be apic->target_cpus()
4005 */ 4020 */
4006#ifdef CONFIG_SMP 4021#ifdef CONFIG_SMP
4007void __init setup_ioapic_dest(void) 4022void __init setup_ioapic_dest(void)
@@ -4040,9 +4055,9 @@ void __init setup_ioapic_dest(void)
4040 */ 4055 */
4041 if (desc->status & 4056 if (desc->status &
4042 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) 4057 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
4043 mask = &desc->affinity; 4058 mask = desc->affinity;
4044 else 4059 else
4045 mask = TARGET_CPUS; 4060 mask = apic->target_cpus();
4046 4061
4047#ifdef CONFIG_INTR_REMAP 4062#ifdef CONFIG_INTR_REMAP
4048 if (intr_remapping_enabled) 4063 if (intr_remapping_enabled)
@@ -4101,7 +4116,7 @@ void __init ioapic_init_mappings(void)
4101 ioapic_res = ioapic_setup_resources(); 4116 ioapic_res = ioapic_setup_resources();
4102 for (i = 0; i < nr_ioapics; i++) { 4117 for (i = 0; i < nr_ioapics; i++) {
4103 if (smp_found_config) { 4118 if (smp_found_config) {
4104 ioapic_phys = mp_ioapics[i].mp_apicaddr; 4119 ioapic_phys = mp_ioapics[i].apicaddr;
4105#ifdef CONFIG_X86_32 4120#ifdef CONFIG_X86_32
4106 if (!ioapic_phys) { 4121 if (!ioapic_phys) {
4107 printk(KERN_ERR 4122 printk(KERN_ERR
diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c
index b12208f4dfee..e41980a373ab 100644
--- a/arch/x86/kernel/ioport.c
+++ b/arch/x86/kernel/ioport.c
@@ -131,9 +131,8 @@ static int do_iopl(unsigned int level, struct pt_regs *regs)
131} 131}
132 132
133#ifdef CONFIG_X86_32 133#ifdef CONFIG_X86_32
134asmlinkage long sys_iopl(unsigned long regsp) 134long sys_iopl(struct pt_regs *regs)
135{ 135{
136 struct pt_regs *regs = (struct pt_regs *)&regsp;
137 unsigned int level = regs->bx; 136 unsigned int level = regs->bx;
138 struct thread_struct *t = &current->thread; 137 struct thread_struct *t = &current->thread;
139 int rc; 138 int rc;
diff --git a/arch/x86/kernel/ipi.c b/arch/x86/kernel/ipi.c
index 285bbf8831fa..dbf5445727a9 100644
--- a/arch/x86/kernel/ipi.c
+++ b/arch/x86/kernel/ipi.c
@@ -17,147 +17,121 @@
17#include <asm/mmu_context.h> 17#include <asm/mmu_context.h>
18#include <asm/apic.h> 18#include <asm/apic.h>
19#include <asm/proto.h> 19#include <asm/proto.h>
20#include <asm/ipi.h>
20 21
21#ifdef CONFIG_X86_32 22void default_send_IPI_mask_sequence_phys(const struct cpumask *mask, int vector)
22#include <mach_apic.h>
23#include <mach_ipi.h>
24
25/*
26 * the following functions deal with sending IPIs between CPUs.
27 *
28 * We use 'broadcast', CPU->CPU IPIs and self-IPIs too.
29 */
30
31static inline int __prepare_ICR(unsigned int shortcut, int vector)
32{ 23{
33 unsigned int icr = shortcut | APIC_DEST_LOGICAL; 24 unsigned long query_cpu;
34 25 unsigned long flags;
35 switch (vector) { 26
36 default: 27 /*
37 icr |= APIC_DM_FIXED | vector; 28 * Hack. The clustered APIC addressing mode doesn't allow us to send
38 break; 29 * to an arbitrary mask, so I do a unicast to each CPU instead.
39 case NMI_VECTOR: 30 * - mbligh
40 icr |= APIC_DM_NMI; 31 */
41 break; 32 local_irq_save(flags);
33 for_each_cpu(query_cpu, mask) {
34 __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
35 query_cpu), vector, APIC_DEST_PHYSICAL);
42 } 36 }
43 return icr; 37 local_irq_restore(flags);
44} 38}
45 39
46static inline int __prepare_ICR2(unsigned int mask) 40void default_send_IPI_mask_allbutself_phys(const struct cpumask *mask,
41 int vector)
47{ 42{
48 return SET_APIC_DEST_FIELD(mask); 43 unsigned int this_cpu = smp_processor_id();
49} 44 unsigned int query_cpu;
45 unsigned long flags;
50 46
51void __send_IPI_shortcut(unsigned int shortcut, int vector) 47 /* See Hack comment above */
52{
53 /*
54 * Subtle. In the case of the 'never do double writes' workaround
55 * we have to lock out interrupts to be safe. As we don't care
56 * of the value read we use an atomic rmw access to avoid costly
57 * cli/sti. Otherwise we use an even cheaper single atomic write
58 * to the APIC.
59 */
60 unsigned int cfg;
61 48
62 /* 49 local_irq_save(flags);
63 * Wait for idle. 50 for_each_cpu(query_cpu, mask) {
64 */ 51 if (query_cpu == this_cpu)
65 apic_wait_icr_idle(); 52 continue;
53 __default_send_IPI_dest_field(per_cpu(x86_cpu_to_apicid,
54 query_cpu), vector, APIC_DEST_PHYSICAL);
55 }
56 local_irq_restore(flags);
57}
66 58
67 /* 59void default_send_IPI_mask_sequence_logical(const struct cpumask *mask,
68 * No need to touch the target chip field 60 int vector)
69 */ 61{
70 cfg = __prepare_ICR(shortcut, vector); 62 unsigned long flags;
63 unsigned int query_cpu;
71 64
72 /* 65 /*
73 * Send the IPI. The write to APIC_ICR fires this off. 66 * Hack. The clustered APIC addressing mode doesn't allow us to send
67 * to an arbitrary mask, so I do a unicasts to each CPU instead. This
68 * should be modified to do 1 message per cluster ID - mbligh
74 */ 69 */
75 apic_write(APIC_ICR, cfg);
76}
77 70
78void send_IPI_self(int vector) 71 local_irq_save(flags);
79{ 72 for_each_cpu(query_cpu, mask)
80 __send_IPI_shortcut(APIC_DEST_SELF, vector); 73 __default_send_IPI_dest_field(
74 apic->cpu_to_logical_apicid(query_cpu), vector,
75 apic->dest_logical);
76 local_irq_restore(flags);
81} 77}
82 78
83/* 79void default_send_IPI_mask_allbutself_logical(const struct cpumask *mask,
84 * This is used to send an IPI with no shorthand notation (the destination is 80 int vector)
85 * specified in bits 56 to 63 of the ICR).
86 */
87static inline void __send_IPI_dest_field(unsigned long mask, int vector)
88{ 81{
89 unsigned long cfg; 82 unsigned long flags;
90 83 unsigned int query_cpu;
91 /* 84 unsigned int this_cpu = smp_processor_id();
92 * Wait for idle.
93 */
94 if (unlikely(vector == NMI_VECTOR))
95 safe_apic_wait_icr_idle();
96 else
97 apic_wait_icr_idle();
98
99 /*
100 * prepare target chip field
101 */
102 cfg = __prepare_ICR2(mask);
103 apic_write(APIC_ICR2, cfg);
104 85
105 /* 86 /* See Hack comment above */
106 * program the ICR
107 */
108 cfg = __prepare_ICR(0, vector);
109 87
110 /* 88 local_irq_save(flags);
111 * Send the IPI. The write to APIC_ICR fires this off. 89 for_each_cpu(query_cpu, mask) {
112 */ 90 if (query_cpu == this_cpu)
113 apic_write(APIC_ICR, cfg); 91 continue;
92 __default_send_IPI_dest_field(
93 apic->cpu_to_logical_apicid(query_cpu), vector,
94 apic->dest_logical);
95 }
96 local_irq_restore(flags);
114} 97}
115 98
99#ifdef CONFIG_X86_32
100
116/* 101/*
117 * This is only used on smaller machines. 102 * This is only used on smaller machines.
118 */ 103 */
119void send_IPI_mask_bitmask(const struct cpumask *cpumask, int vector) 104void default_send_IPI_mask_logical(const struct cpumask *cpumask, int vector)
120{ 105{
121 unsigned long mask = cpumask_bits(cpumask)[0]; 106 unsigned long mask = cpumask_bits(cpumask)[0];
122 unsigned long flags; 107 unsigned long flags;
123 108
124 local_irq_save(flags); 109 local_irq_save(flags);
125 WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]); 110 WARN_ON(mask & ~cpumask_bits(cpu_online_mask)[0]);
126 __send_IPI_dest_field(mask, vector); 111 __default_send_IPI_dest_field(mask, vector, apic->dest_logical);
127 local_irq_restore(flags); 112 local_irq_restore(flags);
128} 113}
129 114
130void send_IPI_mask_sequence(const struct cpumask *mask, int vector) 115void default_send_IPI_allbutself(int vector)
131{ 116{
132 unsigned long flags;
133 unsigned int query_cpu;
134
135 /* 117 /*
136 * Hack. The clustered APIC addressing mode doesn't allow us to send 118 * if there are no other CPUs in the system then we get an APIC send
137 * to an arbitrary mask, so I do a unicasts to each CPU instead. This 119 * error if we try to broadcast, thus avoid sending IPIs in this case.
138 * should be modified to do 1 message per cluster ID - mbligh
139 */ 120 */
121 if (!(num_online_cpus() > 1))
122 return;
140 123
141 local_irq_save(flags); 124 __default_local_send_IPI_allbutself(vector);
142 for_each_cpu(query_cpu, mask)
143 __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu), vector);
144 local_irq_restore(flags);
145} 125}
146 126
147void send_IPI_mask_allbutself(const struct cpumask *mask, int vector) 127void default_send_IPI_all(int vector)
148{ 128{
149 unsigned long flags; 129 __default_local_send_IPI_all(vector);
150 unsigned int query_cpu; 130}
151 unsigned int this_cpu = smp_processor_id();
152
153 /* See Hack comment above */
154 131
155 local_irq_save(flags); 132void default_send_IPI_self(int vector)
156 for_each_cpu(query_cpu, mask) 133{
157 if (query_cpu != this_cpu) 134 __default_send_IPI_shortcut(APIC_DEST_SELF, vector, apic->dest_logical);
158 __send_IPI_dest_field(cpu_to_logical_apicid(query_cpu),
159 vector);
160 local_irq_restore(flags);
161} 135}
162 136
163/* must come after the send_IPI functions above for inlining */ 137/* must come after the send_IPI functions above for inlining */
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 3973e2df7f87..f13ca1650aaf 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -6,10 +6,12 @@
6#include <linux/kernel_stat.h> 6#include <linux/kernel_stat.h>
7#include <linux/seq_file.h> 7#include <linux/seq_file.h>
8#include <linux/smp.h> 8#include <linux/smp.h>
9#include <linux/ftrace.h>
9 10
10#include <asm/apic.h> 11#include <asm/apic.h>
11#include <asm/io_apic.h> 12#include <asm/io_apic.h>
12#include <asm/irq.h> 13#include <asm/irq.h>
14#include <asm/idle.h>
13 15
14atomic_t irq_err_count; 16atomic_t irq_err_count;
15 17
@@ -36,11 +38,7 @@ void ack_bad_irq(unsigned int irq)
36#endif 38#endif
37} 39}
38 40
39#ifdef CONFIG_X86_32 41#define irq_stats(x) (&per_cpu(irq_stat, x))
40# define irq_stats(x) (&per_cpu(irq_stat, x))
41#else
42# define irq_stats(x) cpu_pda(x)
43#endif
44/* 42/*
45 * /proc/interrupts printing: 43 * /proc/interrupts printing:
46 */ 44 */
@@ -192,4 +190,40 @@ u64 arch_irq_stat(void)
192 return sum; 190 return sum;
193} 191}
194 192
193
194/*
195 * do_IRQ handles all normal device IRQ's (the special
196 * SMP cross-CPU interrupts have their own specific
197 * handlers).
198 */
199unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
200{
201 struct pt_regs *old_regs = set_irq_regs(regs);
202
203 /* high bit used in ret_from_ code */
204 unsigned vector = ~regs->orig_ax;
205 unsigned irq;
206
207 exit_idle();
208 irq_enter();
209
210 irq = __get_cpu_var(vector_irq)[vector];
211
212 if (!handle_irq(irq, regs)) {
213#ifdef CONFIG_X86_64
214 if (!disable_apic)
215 ack_APIC_irq();
216#endif
217
218 if (printk_ratelimit())
219 printk(KERN_EMERG "%s: %d.%d No irq handler for vector (irq %d)\n",
220 __func__, smp_processor_id(), vector, irq);
221 }
222
223 irq_exit();
224
225 set_irq_regs(old_regs);
226 return 1;
227}
228
195EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq); 229EXPORT_SYMBOL_GPL(vector_used_by_percpu_irq);
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 74b9ff7341e9..4beb9a13873d 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -191,33 +191,16 @@ static inline int
191execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; } 191execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; }
192#endif 192#endif
193 193
194/* 194bool handle_irq(unsigned irq, struct pt_regs *regs)
195 * do_IRQ handles all normal device IRQ's (the special
196 * SMP cross-CPU interrupts have their own specific
197 * handlers).
198 */
199unsigned int do_IRQ(struct pt_regs *regs)
200{ 195{
201 struct pt_regs *old_regs;
202 /* high bit used in ret_from_ code */
203 int overflow;
204 unsigned vector = ~regs->orig_ax;
205 struct irq_desc *desc; 196 struct irq_desc *desc;
206 unsigned irq; 197 int overflow;
207
208
209 old_regs = set_irq_regs(regs);
210 irq_enter();
211 irq = __get_cpu_var(vector_irq)[vector];
212 198
213 overflow = check_stack_overflow(); 199 overflow = check_stack_overflow();
214 200
215 desc = irq_to_desc(irq); 201 desc = irq_to_desc(irq);
216 if (unlikely(!desc)) { 202 if (unlikely(!desc))
217 printk(KERN_EMERG "%s: cannot handle IRQ %d vector %#x cpu %d\n", 203 return false;
218 __func__, irq, vector, smp_processor_id());
219 BUG();
220 }
221 204
222 if (!execute_on_irq_stack(overflow, desc, irq)) { 205 if (!execute_on_irq_stack(overflow, desc, irq)) {
223 if (unlikely(overflow)) 206 if (unlikely(overflow))
@@ -225,13 +208,11 @@ unsigned int do_IRQ(struct pt_regs *regs)
225 desc->handle_irq(irq, desc); 208 desc->handle_irq(irq, desc);
226 } 209 }
227 210
228 irq_exit(); 211 return true;
229 set_irq_regs(old_regs);
230 return 1;
231} 212}
232 213
233#ifdef CONFIG_HOTPLUG_CPU 214#ifdef CONFIG_HOTPLUG_CPU
234#include <mach_apic.h> 215#include <asm/genapic.h>
235 216
236/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */ 217/* A cpu has been removed from cpu_online_mask. Reset irq affinities. */
237void fixup_irqs(void) 218void fixup_irqs(void)
@@ -248,7 +229,7 @@ void fixup_irqs(void)
248 if (irq == 2) 229 if (irq == 2)
249 continue; 230 continue;
250 231
251 affinity = &desc->affinity; 232 affinity = desc->affinity;
252 if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { 233 if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
253 printk("Breaking affinity for irq %i\n", irq); 234 printk("Breaking affinity for irq %i\n", irq);
254 affinity = cpu_all_mask; 235 affinity = cpu_all_mask;
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 63c88e6ec025..977d8b43a0dd 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -18,6 +18,13 @@
18#include <linux/smp.h> 18#include <linux/smp.h>
19#include <asm/io_apic.h> 19#include <asm/io_apic.h>
20#include <asm/idle.h> 20#include <asm/idle.h>
21#include <asm/apic.h>
22
23DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
24EXPORT_PER_CPU_SYMBOL(irq_stat);
25
26DEFINE_PER_CPU(struct pt_regs *, irq_regs);
27EXPORT_PER_CPU_SYMBOL(irq_regs);
21 28
22/* 29/*
23 * Probabilistic stack overflow check: 30 * Probabilistic stack overflow check:
@@ -41,42 +48,18 @@ static inline void stack_overflow_check(struct pt_regs *regs)
41#endif 48#endif
42} 49}
43 50
44/* 51bool handle_irq(unsigned irq, struct pt_regs *regs)
45 * do_IRQ handles all normal device IRQ's (the special
46 * SMP cross-CPU interrupts have their own specific
47 * handlers).
48 */
49asmlinkage unsigned int __irq_entry do_IRQ(struct pt_regs *regs)
50{ 52{
51 struct pt_regs *old_regs = set_irq_regs(regs);
52 struct irq_desc *desc; 53 struct irq_desc *desc;
53 54
54 /* high bit used in ret_from_ code */
55 unsigned vector = ~regs->orig_ax;
56 unsigned irq;
57
58 exit_idle();
59 irq_enter();
60 irq = __get_cpu_var(vector_irq)[vector];
61
62 stack_overflow_check(regs); 55 stack_overflow_check(regs);
63 56
64 desc = irq_to_desc(irq); 57 desc = irq_to_desc(irq);
65 if (likely(desc)) 58 if (unlikely(!desc))
66 generic_handle_irq_desc(irq, desc); 59 return false;
67 else {
68 if (!disable_apic)
69 ack_APIC_irq();
70
71 if (printk_ratelimit())
72 printk(KERN_EMERG "%s: %d.%d No irq handler for vector\n",
73 __func__, smp_processor_id(), vector);
74 }
75
76 irq_exit();
77 60
78 set_irq_regs(old_regs); 61 generic_handle_irq_desc(irq, desc);
79 return 1; 62 return true;
80} 63}
81 64
82#ifdef CONFIG_HOTPLUG_CPU 65#ifdef CONFIG_HOTPLUG_CPU
@@ -100,7 +83,7 @@ void fixup_irqs(void)
100 /* interrupt's are disabled at this point */ 83 /* interrupt's are disabled at this point */
101 spin_lock(&desc->lock); 84 spin_lock(&desc->lock);
102 85
103 affinity = &desc->affinity; 86 affinity = desc->affinity;
104 if (!irq_has_action(irq) || 87 if (!irq_has_action(irq) ||
105 cpumask_equal(affinity, cpu_online_mask)) { 88 cpumask_equal(affinity, cpu_online_mask)) {
106 spin_unlock(&desc->lock); 89 spin_unlock(&desc->lock);
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 10a09c2f1828..bf629cadec1a 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -78,6 +78,15 @@ void __init init_ISA_irqs(void)
78 } 78 }
79} 79}
80 80
81/*
82 * IRQ2 is cascade interrupt to second interrupt controller
83 */
84static struct irqaction irq2 = {
85 .handler = no_action,
86 .mask = CPU_MASK_NONE,
87 .name = "cascade",
88};
89
81DEFINE_PER_CPU(vector_irq_t, vector_irq) = { 90DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
82 [0 ... IRQ0_VECTOR - 1] = -1, 91 [0 ... IRQ0_VECTOR - 1] = -1,
83 [IRQ0_VECTOR] = 0, 92 [IRQ0_VECTOR] = 0,
@@ -140,8 +149,15 @@ void __init native_init_IRQ(void)
140 */ 149 */
141 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); 150 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
142 151
143 /* IPI for invalidation */ 152 /* IPIs for invalidation */
144 alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); 153 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
154 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
155 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
156 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
157 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
158 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
159 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
160 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
145 161
146 /* IPI for generic function call */ 162 /* IPI for generic function call */
147 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 163 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
@@ -169,6 +185,9 @@ void __init native_init_IRQ(void)
169 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); 185 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
170#endif 186#endif
171 187
188 if (!acpi_ioapic)
189 setup_irq(2, &irq2);
190
172 /* setup after call gates are initialised (usually add in 191 /* setup after call gates are initialised (usually add in
173 * the architecture specific gates) 192 * the architecture specific gates)
174 */ 193 */
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 10435a120d22..5c4f55483849 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -46,7 +46,7 @@
46#include <asm/apicdef.h> 46#include <asm/apicdef.h>
47#include <asm/system.h> 47#include <asm/system.h>
48 48
49#include <mach_ipi.h> 49#include <asm/genapic.h>
50 50
51/* 51/*
52 * Put the error code here just in case the user cares: 52 * Put the error code here just in case the user cares:
@@ -347,7 +347,7 @@ void kgdb_post_primary_code(struct pt_regs *regs, int e_vector, int err_code)
347 */ 347 */
348void kgdb_roundup_cpus(unsigned long flags) 348void kgdb_roundup_cpus(unsigned long flags)
349{ 349{
350 send_IPI_allbutself(APIC_DM_NMI); 350 apic->send_IPI_allbutself(APIC_DM_NMI);
351} 351}
352#endif 352#endif
353 353
diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c
index c43caa3a91f3..6993d51b7fd8 100644
--- a/arch/x86/kernel/machine_kexec_64.c
+++ b/arch/x86/kernel/machine_kexec_64.c
@@ -18,15 +18,6 @@
18#include <asm/mmu_context.h> 18#include <asm/mmu_context.h>
19#include <asm/io.h> 19#include <asm/io.h>
20 20
21#define PAGE_ALIGNED __attribute__ ((__aligned__(PAGE_SIZE)))
22static u64 kexec_pgd[512] PAGE_ALIGNED;
23static u64 kexec_pud0[512] PAGE_ALIGNED;
24static u64 kexec_pmd0[512] PAGE_ALIGNED;
25static u64 kexec_pte0[512] PAGE_ALIGNED;
26static u64 kexec_pud1[512] PAGE_ALIGNED;
27static u64 kexec_pmd1[512] PAGE_ALIGNED;
28static u64 kexec_pte1[512] PAGE_ALIGNED;
29
30static void init_level2_page(pmd_t *level2p, unsigned long addr) 21static void init_level2_page(pmd_t *level2p, unsigned long addr)
31{ 22{
32 unsigned long end_addr; 23 unsigned long end_addr;
@@ -107,12 +98,65 @@ out:
107 return result; 98 return result;
108} 99}
109 100
101static void free_transition_pgtable(struct kimage *image)
102{
103 free_page((unsigned long)image->arch.pud);
104 free_page((unsigned long)image->arch.pmd);
105 free_page((unsigned long)image->arch.pte);
106}
107
108static int init_transition_pgtable(struct kimage *image, pgd_t *pgd)
109{
110 pud_t *pud;
111 pmd_t *pmd;
112 pte_t *pte;
113 unsigned long vaddr, paddr;
114 int result = -ENOMEM;
115
116 vaddr = (unsigned long)relocate_kernel;
117 paddr = __pa(page_address(image->control_code_page)+PAGE_SIZE);
118 pgd += pgd_index(vaddr);
119 if (!pgd_present(*pgd)) {
120 pud = (pud_t *)get_zeroed_page(GFP_KERNEL);
121 if (!pud)
122 goto err;
123 image->arch.pud = pud;
124 set_pgd(pgd, __pgd(__pa(pud) | _KERNPG_TABLE));
125 }
126 pud = pud_offset(pgd, vaddr);
127 if (!pud_present(*pud)) {
128 pmd = (pmd_t *)get_zeroed_page(GFP_KERNEL);
129 if (!pmd)
130 goto err;
131 image->arch.pmd = pmd;
132 set_pud(pud, __pud(__pa(pmd) | _KERNPG_TABLE));
133 }
134 pmd = pmd_offset(pud, vaddr);
135 if (!pmd_present(*pmd)) {
136 pte = (pte_t *)get_zeroed_page(GFP_KERNEL);
137 if (!pte)
138 goto err;
139 image->arch.pte = pte;
140 set_pmd(pmd, __pmd(__pa(pte) | _KERNPG_TABLE));
141 }
142 pte = pte_offset_kernel(pmd, vaddr);
143 set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC));
144 return 0;
145err:
146 free_transition_pgtable(image);
147 return result;
148}
149
110 150
111static int init_pgtable(struct kimage *image, unsigned long start_pgtable) 151static int init_pgtable(struct kimage *image, unsigned long start_pgtable)
112{ 152{
113 pgd_t *level4p; 153 pgd_t *level4p;
154 int result;
114 level4p = (pgd_t *)__va(start_pgtable); 155 level4p = (pgd_t *)__va(start_pgtable);
115 return init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT); 156 result = init_level4_page(image, level4p, 0, max_pfn << PAGE_SHIFT);
157 if (result)
158 return result;
159 return init_transition_pgtable(image, level4p);
116} 160}
117 161
118static void set_idt(void *newidt, u16 limit) 162static void set_idt(void *newidt, u16 limit)
@@ -174,7 +218,7 @@ int machine_kexec_prepare(struct kimage *image)
174 218
175void machine_kexec_cleanup(struct kimage *image) 219void machine_kexec_cleanup(struct kimage *image)
176{ 220{
177 return; 221 free_transition_pgtable(image);
178} 222}
179 223
180/* 224/*
@@ -195,22 +239,6 @@ void machine_kexec(struct kimage *image)
195 memcpy(control_page, relocate_kernel, PAGE_SIZE); 239 memcpy(control_page, relocate_kernel, PAGE_SIZE);
196 240
197 page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page); 241 page_list[PA_CONTROL_PAGE] = virt_to_phys(control_page);
198 page_list[VA_CONTROL_PAGE] = (unsigned long)relocate_kernel;
199 page_list[PA_PGD] = virt_to_phys(&kexec_pgd);
200 page_list[VA_PGD] = (unsigned long)kexec_pgd;
201 page_list[PA_PUD_0] = virt_to_phys(&kexec_pud0);
202 page_list[VA_PUD_0] = (unsigned long)kexec_pud0;
203 page_list[PA_PMD_0] = virt_to_phys(&kexec_pmd0);
204 page_list[VA_PMD_0] = (unsigned long)kexec_pmd0;
205 page_list[PA_PTE_0] = virt_to_phys(&kexec_pte0);
206 page_list[VA_PTE_0] = (unsigned long)kexec_pte0;
207 page_list[PA_PUD_1] = virt_to_phys(&kexec_pud1);
208 page_list[VA_PUD_1] = (unsigned long)kexec_pud1;
209 page_list[PA_PMD_1] = virt_to_phys(&kexec_pmd1);
210 page_list[VA_PMD_1] = (unsigned long)kexec_pmd1;
211 page_list[PA_PTE_1] = virt_to_phys(&kexec_pte1);
212 page_list[VA_PTE_1] = (unsigned long)kexec_pte1;
213
214 page_list[PA_TABLE_PAGE] = 242 page_list[PA_TABLE_PAGE] =
215 (unsigned long)__pa(page_address(image->control_code_page)); 243 (unsigned long)__pa(page_address(image->control_code_page));
216 244
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index b7f4c929e615..5e9f4fc51385 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -87,9 +87,9 @@
87#include <linux/cpu.h> 87#include <linux/cpu.h>
88#include <linux/firmware.h> 88#include <linux/firmware.h>
89#include <linux/platform_device.h> 89#include <linux/platform_device.h>
90#include <linux/uaccess.h>
90 91
91#include <asm/msr.h> 92#include <asm/msr.h>
92#include <asm/uaccess.h>
93#include <asm/processor.h> 93#include <asm/processor.h>
94#include <asm/microcode.h> 94#include <asm/microcode.h>
95 95
@@ -196,7 +196,7 @@ static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf)
196 return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1; 196 return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1;
197} 197}
198 198
199static inline int 199static inline int
200update_match_revision(struct microcode_header_intel *mc_header, int rev) 200update_match_revision(struct microcode_header_intel *mc_header, int rev)
201{ 201{
202 return (mc_header->rev <= rev) ? 0 : 1; 202 return (mc_header->rev <= rev) ? 0 : 1;
@@ -442,8 +442,8 @@ static int request_microcode_fw(int cpu, struct device *device)
442 return ret; 442 return ret;
443 } 443 }
444 444
445 ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size, 445 ret = generic_load_microcode(cpu, (void *)firmware->data,
446 &get_ucode_fw); 446 firmware->size, &get_ucode_fw);
447 447
448 release_firmware(firmware); 448 release_firmware(firmware);
449 449
@@ -460,7 +460,7 @@ static int request_microcode_user(int cpu, const void __user *buf, size_t size)
460 /* We should bind the task to the CPU */ 460 /* We should bind the task to the CPU */
461 BUG_ON(cpu != raw_smp_processor_id()); 461 BUG_ON(cpu != raw_smp_processor_id());
462 462
463 return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user); 463 return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user);
464} 464}
465 465
466static void microcode_fini_cpu(int cpu) 466static void microcode_fini_cpu(int cpu)
diff --git a/arch/x86/kernel/module_32.c b/arch/x86/kernel/module_32.c
index 3db0a5442eb1..0edd819050e7 100644
--- a/arch/x86/kernel/module_32.c
+++ b/arch/x86/kernel/module_32.c
@@ -42,7 +42,7 @@ void module_free(struct module *mod, void *module_region)
42{ 42{
43 vfree(module_region); 43 vfree(module_region);
44 /* FIXME: If module_region == mod->init_region, trim exception 44 /* FIXME: If module_region == mod->init_region, trim exception
45 table entries. */ 45 table entries. */
46} 46}
47 47
48/* We don't need anything special. */ 48/* We don't need anything special. */
@@ -113,13 +113,13 @@ int module_finalize(const Elf_Ehdr *hdr,
113 *para = NULL; 113 *para = NULL;
114 char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; 114 char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
115 115
116 for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { 116 for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
117 if (!strcmp(".text", secstrings + s->sh_name)) 117 if (!strcmp(".text", secstrings + s->sh_name))
118 text = s; 118 text = s;
119 if (!strcmp(".altinstructions", secstrings + s->sh_name)) 119 if (!strcmp(".altinstructions", secstrings + s->sh_name))
120 alt = s; 120 alt = s;
121 if (!strcmp(".smp_locks", secstrings + s->sh_name)) 121 if (!strcmp(".smp_locks", secstrings + s->sh_name))
122 locks= s; 122 locks = s;
123 if (!strcmp(".parainstructions", secstrings + s->sh_name)) 123 if (!strcmp(".parainstructions", secstrings + s->sh_name))
124 para = s; 124 para = s;
125 } 125 }
diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c
index 6ba87830d4b1..c23880b90b5c 100644
--- a/arch/x86/kernel/module_64.c
+++ b/arch/x86/kernel/module_64.c
@@ -30,14 +30,14 @@
30#include <asm/page.h> 30#include <asm/page.h>
31#include <asm/pgtable.h> 31#include <asm/pgtable.h>
32 32
33#define DEBUGP(fmt...) 33#define DEBUGP(fmt...)
34 34
35#ifndef CONFIG_UML 35#ifndef CONFIG_UML
36void module_free(struct module *mod, void *module_region) 36void module_free(struct module *mod, void *module_region)
37{ 37{
38 vfree(module_region); 38 vfree(module_region);
39 /* FIXME: If module_region == mod->init_region, trim exception 39 /* FIXME: If module_region == mod->init_region, trim exception
40 table entries. */ 40 table entries. */
41} 41}
42 42
43void *module_alloc(unsigned long size) 43void *module_alloc(unsigned long size)
@@ -77,7 +77,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
77 Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr; 77 Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr;
78 Elf64_Sym *sym; 78 Elf64_Sym *sym;
79 void *loc; 79 void *loc;
80 u64 val; 80 u64 val;
81 81
82 DEBUGP("Applying relocate section %u to %u\n", relsec, 82 DEBUGP("Applying relocate section %u to %u\n", relsec,
83 sechdrs[relsec].sh_info); 83 sechdrs[relsec].sh_info);
@@ -91,11 +91,11 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
91 sym = (Elf64_Sym *)sechdrs[symindex].sh_addr 91 sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
92 + ELF64_R_SYM(rel[i].r_info); 92 + ELF64_R_SYM(rel[i].r_info);
93 93
94 DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", 94 DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n",
95 (int)ELF64_R_TYPE(rel[i].r_info), 95 (int)ELF64_R_TYPE(rel[i].r_info),
96 sym->st_value, rel[i].r_addend, (u64)loc); 96 sym->st_value, rel[i].r_addend, (u64)loc);
97 97
98 val = sym->st_value + rel[i].r_addend; 98 val = sym->st_value + rel[i].r_addend;
99 99
100 switch (ELF64_R_TYPE(rel[i].r_info)) { 100 switch (ELF64_R_TYPE(rel[i].r_info)) {
101 case R_X86_64_NONE: 101 case R_X86_64_NONE:
@@ -113,16 +113,16 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
113 if ((s64)val != *(s32 *)loc) 113 if ((s64)val != *(s32 *)loc)
114 goto overflow; 114 goto overflow;
115 break; 115 break;
116 case R_X86_64_PC32: 116 case R_X86_64_PC32:
117 val -= (u64)loc; 117 val -= (u64)loc;
118 *(u32 *)loc = val; 118 *(u32 *)loc = val;
119#if 0 119#if 0
120 if ((s64)val != *(s32 *)loc) 120 if ((s64)val != *(s32 *)loc)
121 goto overflow; 121 goto overflow;
122#endif 122#endif
123 break; 123 break;
124 default: 124 default:
125 printk(KERN_ERR "module %s: Unknown rela relocation: %Lu\n", 125 printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n",
126 me->name, ELF64_R_TYPE(rel[i].r_info)); 126 me->name, ELF64_R_TYPE(rel[i].r_info));
127 return -ENOEXEC; 127 return -ENOEXEC;
128 } 128 }
@@ -130,7 +130,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
130 return 0; 130 return 0;
131 131
132overflow: 132overflow:
133 printk(KERN_ERR "overflow in relocation type %d val %Lx\n", 133 printk(KERN_ERR "overflow in relocation type %d val %Lx\n",
134 (int)ELF64_R_TYPE(rel[i].r_info), val); 134 (int)ELF64_R_TYPE(rel[i].r_info), val);
135 printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n", 135 printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n",
136 me->name); 136 me->name);
@@ -143,13 +143,13 @@ int apply_relocate(Elf_Shdr *sechdrs,
143 unsigned int relsec, 143 unsigned int relsec,
144 struct module *me) 144 struct module *me)
145{ 145{
146 printk("non add relocation not supported\n"); 146 printk(KERN_ERR "non add relocation not supported\n");
147 return -ENOSYS; 147 return -ENOSYS;
148} 148}
149 149
150int module_finalize(const Elf_Ehdr *hdr, 150int module_finalize(const Elf_Ehdr *hdr,
151 const Elf_Shdr *sechdrs, 151 const Elf_Shdr *sechdrs,
152 struct module *me) 152 struct module *me)
153{ 153{
154 const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, 154 const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
155 *para = NULL; 155 *para = NULL;
@@ -161,7 +161,7 @@ int module_finalize(const Elf_Ehdr *hdr,
161 if (!strcmp(".altinstructions", secstrings + s->sh_name)) 161 if (!strcmp(".altinstructions", secstrings + s->sh_name))
162 alt = s; 162 alt = s;
163 if (!strcmp(".smp_locks", secstrings + s->sh_name)) 163 if (!strcmp(".smp_locks", secstrings + s->sh_name))
164 locks= s; 164 locks = s;
165 if (!strcmp(".parainstructions", secstrings + s->sh_name)) 165 if (!strcmp(".parainstructions", secstrings + s->sh_name))
166 para = s; 166 para = s;
167 } 167 }
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index a649a4ccad43..200764453195 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -3,7 +3,7 @@
3 * compliant MP-table parsing routines. 3 * compliant MP-table parsing routines.
4 * 4 *
5 * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> 5 * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
6 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com> 6 * (c) 1998, 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
7 * (c) 2008 Alexey Starikovskiy <astarikovskiy@suse.de> 7 * (c) 2008 Alexey Starikovskiy <astarikovskiy@suse.de>
8 */ 8 */
9 9
@@ -29,12 +29,7 @@
29#include <asm/setup.h> 29#include <asm/setup.h>
30#include <asm/smp.h> 30#include <asm/smp.h>
31 31
32#include <mach_apic.h> 32#include <asm/genapic.h>
33#ifdef CONFIG_X86_32
34#include <mach_apicdef.h>
35#include <mach_mpparse.h>
36#endif
37
38/* 33/*
39 * Checksum an MP configuration block. 34 * Checksum an MP configuration block.
40 */ 35 */
@@ -144,11 +139,11 @@ static void __init MP_ioapic_info(struct mpc_ioapic *m)
144 if (bad_ioapic(m->apicaddr)) 139 if (bad_ioapic(m->apicaddr))
145 return; 140 return;
146 141
147 mp_ioapics[nr_ioapics].mp_apicaddr = m->apicaddr; 142 mp_ioapics[nr_ioapics].apicaddr = m->apicaddr;
148 mp_ioapics[nr_ioapics].mp_apicid = m->apicid; 143 mp_ioapics[nr_ioapics].apicid = m->apicid;
149 mp_ioapics[nr_ioapics].mp_type = m->type; 144 mp_ioapics[nr_ioapics].type = m->type;
150 mp_ioapics[nr_ioapics].mp_apicver = m->apicver; 145 mp_ioapics[nr_ioapics].apicver = m->apicver;
151 mp_ioapics[nr_ioapics].mp_flags = m->flags; 146 mp_ioapics[nr_ioapics].flags = m->flags;
152 nr_ioapics++; 147 nr_ioapics++;
153} 148}
154 149
@@ -160,55 +155,55 @@ static void print_MP_intsrc_info(struct mpc_intsrc *m)
160 m->srcbusirq, m->dstapic, m->dstirq); 155 m->srcbusirq, m->dstapic, m->dstirq);
161} 156}
162 157
163static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) 158static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq)
164{ 159{
165 apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," 160 apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
166 " IRQ %02x, APIC ID %x, APIC INT %02x\n", 161 " IRQ %02x, APIC ID %x, APIC INT %02x\n",
167 mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, 162 mp_irq->irqtype, mp_irq->irqflag & 3,
168 (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, 163 (mp_irq->irqflag >> 2) & 3, mp_irq->srcbus,
169 mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq); 164 mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq);
170} 165}
171 166
172static void __init assign_to_mp_irq(struct mpc_intsrc *m, 167static void __init assign_to_mp_irq(struct mpc_intsrc *m,
173 struct mp_config_intsrc *mp_irq) 168 struct mpc_intsrc *mp_irq)
174{ 169{
175 mp_irq->mp_dstapic = m->dstapic; 170 mp_irq->dstapic = m->dstapic;
176 mp_irq->mp_type = m->type; 171 mp_irq->type = m->type;
177 mp_irq->mp_irqtype = m->irqtype; 172 mp_irq->irqtype = m->irqtype;
178 mp_irq->mp_irqflag = m->irqflag; 173 mp_irq->irqflag = m->irqflag;
179 mp_irq->mp_srcbus = m->srcbus; 174 mp_irq->srcbus = m->srcbus;
180 mp_irq->mp_srcbusirq = m->srcbusirq; 175 mp_irq->srcbusirq = m->srcbusirq;
181 mp_irq->mp_dstirq = m->dstirq; 176 mp_irq->dstirq = m->dstirq;
182} 177}
183 178
184static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq, 179static void __init assign_to_mpc_intsrc(struct mpc_intsrc *mp_irq,
185 struct mpc_intsrc *m) 180 struct mpc_intsrc *m)
186{ 181{
187 m->dstapic = mp_irq->mp_dstapic; 182 m->dstapic = mp_irq->dstapic;
188 m->type = mp_irq->mp_type; 183 m->type = mp_irq->type;
189 m->irqtype = mp_irq->mp_irqtype; 184 m->irqtype = mp_irq->irqtype;
190 m->irqflag = mp_irq->mp_irqflag; 185 m->irqflag = mp_irq->irqflag;
191 m->srcbus = mp_irq->mp_srcbus; 186 m->srcbus = mp_irq->srcbus;
192 m->srcbusirq = mp_irq->mp_srcbusirq; 187 m->srcbusirq = mp_irq->srcbusirq;
193 m->dstirq = mp_irq->mp_dstirq; 188 m->dstirq = mp_irq->dstirq;
194} 189}
195 190
196static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq, 191static int __init mp_irq_mpc_intsrc_cmp(struct mpc_intsrc *mp_irq,
197 struct mpc_intsrc *m) 192 struct mpc_intsrc *m)
198{ 193{
199 if (mp_irq->mp_dstapic != m->dstapic) 194 if (mp_irq->dstapic != m->dstapic)
200 return 1; 195 return 1;
201 if (mp_irq->mp_type != m->type) 196 if (mp_irq->type != m->type)
202 return 2; 197 return 2;
203 if (mp_irq->mp_irqtype != m->irqtype) 198 if (mp_irq->irqtype != m->irqtype)
204 return 3; 199 return 3;
205 if (mp_irq->mp_irqflag != m->irqflag) 200 if (mp_irq->irqflag != m->irqflag)
206 return 4; 201 return 4;
207 if (mp_irq->mp_srcbus != m->srcbus) 202 if (mp_irq->srcbus != m->srcbus)
208 return 5; 203 return 5;
209 if (mp_irq->mp_srcbusirq != m->srcbusirq) 204 if (mp_irq->srcbusirq != m->srcbusirq)
210 return 6; 205 return 6;
211 if (mp_irq->mp_dstirq != m->dstirq) 206 if (mp_irq->dstirq != m->dstirq)
212 return 7; 207 return 7;
213 208
214 return 0; 209 return 0;
@@ -292,16 +287,7 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
292 return 0; 287 return 0;
293 288
294#ifdef CONFIG_X86_32 289#ifdef CONFIG_X86_32
295 /* 290 generic_mps_oem_check(mpc, oem, str);
296 * need to make sure summit and es7000's mps_oem_check is safe to be
297 * called early via genericarch 's mps_oem_check
298 */
299 if (early) {
300#ifdef CONFIG_X86_NUMAQ
301 numaq_mps_oem_check(mpc, oem, str);
302#endif
303 } else
304 mps_oem_check(mpc, oem, str);
305#endif 291#endif
306 /* save the local APIC address, it might be non-default */ 292 /* save the local APIC address, it might be non-default */
307 if (!acpi_lapic) 293 if (!acpi_lapic)
@@ -386,13 +372,13 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early)
386 (*x86_quirks->mpc_record)++; 372 (*x86_quirks->mpc_record)++;
387 } 373 }
388 374
389#ifdef CONFIG_X86_GENERICARCH 375#ifdef CONFIG_X86_BIGSMP
390 generic_bigsmp_probe(); 376 generic_bigsmp_probe();
391#endif 377#endif
392 378
393#ifdef CONFIG_X86_32 379 if (apic->setup_apic_routing)
394 setup_apic_routing(); 380 apic->setup_apic_routing();
395#endif 381
396 if (!num_processors) 382 if (!num_processors)
397 printk(KERN_ERR "MPTABLE: no processors registered!\n"); 383 printk(KERN_ERR "MPTABLE: no processors registered!\n");
398 return num_processors; 384 return num_processors;
@@ -417,7 +403,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
417 intsrc.type = MP_INTSRC; 403 intsrc.type = MP_INTSRC;
418 intsrc.irqflag = 0; /* conforming */ 404 intsrc.irqflag = 0; /* conforming */
419 intsrc.srcbus = 0; 405 intsrc.srcbus = 0;
420 intsrc.dstapic = mp_ioapics[0].mp_apicid; 406 intsrc.dstapic = mp_ioapics[0].apicid;
421 407
422 intsrc.irqtype = mp_INT; 408 intsrc.irqtype = mp_INT;
423 409
@@ -570,14 +556,14 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
570 } 556 }
571} 557}
572 558
573static struct intel_mp_floating *mpf_found; 559static struct mpf_intel *mpf_found;
574 560
575/* 561/*
576 * Scan the memory blocks for an SMP configuration block. 562 * Scan the memory blocks for an SMP configuration block.
577 */ 563 */
578static void __init __get_smp_config(unsigned int early) 564static void __init __get_smp_config(unsigned int early)
579{ 565{
580 struct intel_mp_floating *mpf = mpf_found; 566 struct mpf_intel *mpf = mpf_found;
581 567
582 if (!mpf) 568 if (!mpf)
583 return; 569 return;
@@ -598,9 +584,9 @@ static void __init __get_smp_config(unsigned int early)
598 } 584 }
599 585
600 printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", 586 printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
601 mpf->mpf_specification); 587 mpf->specification);
602#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) 588#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
603 if (mpf->mpf_feature2 & (1 << 7)) { 589 if (mpf->feature2 & (1 << 7)) {
604 printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); 590 printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
605 pic_mode = 1; 591 pic_mode = 1;
606 } else { 592 } else {
@@ -611,7 +597,7 @@ static void __init __get_smp_config(unsigned int early)
611 /* 597 /*
612 * Now see if we need to read further. 598 * Now see if we need to read further.
613 */ 599 */
614 if (mpf->mpf_feature1 != 0) { 600 if (mpf->feature1 != 0) {
615 if (early) { 601 if (early) {
616 /* 602 /*
617 * local APIC has default address 603 * local APIC has default address
@@ -621,16 +607,16 @@ static void __init __get_smp_config(unsigned int early)
621 } 607 }
622 608
623 printk(KERN_INFO "Default MP configuration #%d\n", 609 printk(KERN_INFO "Default MP configuration #%d\n",
624 mpf->mpf_feature1); 610 mpf->feature1);
625 construct_default_ISA_mptable(mpf->mpf_feature1); 611 construct_default_ISA_mptable(mpf->feature1);
626 612
627 } else if (mpf->mpf_physptr) { 613 } else if (mpf->physptr) {
628 614
629 /* 615 /*
630 * Read the physical hardware table. Anything here will 616 * Read the physical hardware table. Anything here will
631 * override the defaults. 617 * override the defaults.
632 */ 618 */
633 if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) { 619 if (!smp_read_mpc(phys_to_virt(mpf->physptr), early)) {
634#ifdef CONFIG_X86_LOCAL_APIC 620#ifdef CONFIG_X86_LOCAL_APIC
635 smp_found_config = 0; 621 smp_found_config = 0;
636#endif 622#endif
@@ -688,32 +674,32 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
688 unsigned reserve) 674 unsigned reserve)
689{ 675{
690 unsigned int *bp = phys_to_virt(base); 676 unsigned int *bp = phys_to_virt(base);
691 struct intel_mp_floating *mpf; 677 struct mpf_intel *mpf;
692 678
693 apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", 679 apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
694 bp, length); 680 bp, length);
695 BUILD_BUG_ON(sizeof(*mpf) != 16); 681 BUILD_BUG_ON(sizeof(*mpf) != 16);
696 682
697 while (length > 0) { 683 while (length > 0) {
698 mpf = (struct intel_mp_floating *)bp; 684 mpf = (struct mpf_intel *)bp;
699 if ((*bp == SMP_MAGIC_IDENT) && 685 if ((*bp == SMP_MAGIC_IDENT) &&
700 (mpf->mpf_length == 1) && 686 (mpf->length == 1) &&
701 !mpf_checksum((unsigned char *)bp, 16) && 687 !mpf_checksum((unsigned char *)bp, 16) &&
702 ((mpf->mpf_specification == 1) 688 ((mpf->specification == 1)
703 || (mpf->mpf_specification == 4))) { 689 || (mpf->specification == 4))) {
704#ifdef CONFIG_X86_LOCAL_APIC 690#ifdef CONFIG_X86_LOCAL_APIC
705 smp_found_config = 1; 691 smp_found_config = 1;
706#endif 692#endif
707 mpf_found = mpf; 693 mpf_found = mpf;
708 694
709 printk(KERN_INFO "found SMP MP-table at [%p] %08lx\n", 695 printk(KERN_INFO "found SMP MP-table at [%p] %llx\n",
710 mpf, virt_to_phys(mpf)); 696 mpf, (u64)virt_to_phys(mpf));
711 697
712 if (!reserve) 698 if (!reserve)
713 return 1; 699 return 1;
714 reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE, 700 reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE,
715 BOOTMEM_DEFAULT); 701 BOOTMEM_DEFAULT);
716 if (mpf->mpf_physptr) { 702 if (mpf->physptr) {
717 unsigned long size = PAGE_SIZE; 703 unsigned long size = PAGE_SIZE;
718#ifdef CONFIG_X86_32 704#ifdef CONFIG_X86_32
719 /* 705 /*
@@ -722,14 +708,14 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
722 * the bottom is mapped now. 708 * the bottom is mapped now.
723 * PC-9800's MPC table places on the very last 709 * PC-9800's MPC table places on the very last
724 * of physical memory; so that simply reserving 710 * of physical memory; so that simply reserving
725 * PAGE_SIZE from mpg->mpf_physptr yields BUG() 711 * PAGE_SIZE from mpf->physptr yields BUG()
726 * in reserve_bootmem. 712 * in reserve_bootmem.
727 */ 713 */
728 unsigned long end = max_low_pfn * PAGE_SIZE; 714 unsigned long end = max_low_pfn * PAGE_SIZE;
729 if (mpf->mpf_physptr + size > end) 715 if (mpf->physptr + size > end)
730 size = end - mpf->mpf_physptr; 716 size = end - mpf->physptr;
731#endif 717#endif
732 reserve_bootmem_generic(mpf->mpf_physptr, size, 718 reserve_bootmem_generic(mpf->physptr, size,
733 BOOTMEM_DEFAULT); 719 BOOTMEM_DEFAULT);
734 } 720 }
735 721
@@ -809,15 +795,15 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m)
809 /* not legacy */ 795 /* not legacy */
810 796
811 for (i = 0; i < mp_irq_entries; i++) { 797 for (i = 0; i < mp_irq_entries; i++) {
812 if (mp_irqs[i].mp_irqtype != mp_INT) 798 if (mp_irqs[i].irqtype != mp_INT)
813 continue; 799 continue;
814 800
815 if (mp_irqs[i].mp_irqflag != 0x0f) 801 if (mp_irqs[i].irqflag != 0x0f)
816 continue; 802 continue;
817 803
818 if (mp_irqs[i].mp_srcbus != m->srcbus) 804 if (mp_irqs[i].srcbus != m->srcbus)
819 continue; 805 continue;
820 if (mp_irqs[i].mp_srcbusirq != m->srcbusirq) 806 if (mp_irqs[i].srcbusirq != m->srcbusirq)
821 continue; 807 continue;
822 if (irq_used[i]) { 808 if (irq_used[i]) {
823 /* already claimed */ 809 /* already claimed */
@@ -922,10 +908,10 @@ static int __init replace_intsrc_all(struct mpc_table *mpc,
922 if (irq_used[i]) 908 if (irq_used[i])
923 continue; 909 continue;
924 910
925 if (mp_irqs[i].mp_irqtype != mp_INT) 911 if (mp_irqs[i].irqtype != mp_INT)
926 continue; 912 continue;
927 913
928 if (mp_irqs[i].mp_irqflag != 0x0f) 914 if (mp_irqs[i].irqflag != 0x0f)
929 continue; 915 continue;
930 916
931 if (nr_m_spare > 0) { 917 if (nr_m_spare > 0) {
@@ -1001,7 +987,7 @@ static int __init update_mp_table(void)
1001{ 987{
1002 char str[16]; 988 char str[16];
1003 char oem[10]; 989 char oem[10];
1004 struct intel_mp_floating *mpf; 990 struct mpf_intel *mpf;
1005 struct mpc_table *mpc, *mpc_new; 991 struct mpc_table *mpc, *mpc_new;
1006 992
1007 if (!enable_update_mptable) 993 if (!enable_update_mptable)
@@ -1014,19 +1000,19 @@ static int __init update_mp_table(void)
1014 /* 1000 /*
1015 * Now see if we need to go further. 1001 * Now see if we need to go further.
1016 */ 1002 */
1017 if (mpf->mpf_feature1 != 0) 1003 if (mpf->feature1 != 0)
1018 return 0; 1004 return 0;
1019 1005
1020 if (!mpf->mpf_physptr) 1006 if (!mpf->physptr)
1021 return 0; 1007 return 0;
1022 1008
1023 mpc = phys_to_virt(mpf->mpf_physptr); 1009 mpc = phys_to_virt(mpf->physptr);
1024 1010
1025 if (!smp_check_mpc(mpc, oem, str)) 1011 if (!smp_check_mpc(mpc, oem, str))
1026 return 0; 1012 return 0;
1027 1013
1028 printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf)); 1014 printk(KERN_INFO "mpf: %llx\n", (u64)virt_to_phys(mpf));
1029 printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr); 1015 printk(KERN_INFO "physptr: %x\n", mpf->physptr);
1030 1016
1031 if (mpc_new_phys && mpc->length > mpc_new_length) { 1017 if (mpc_new_phys && mpc->length > mpc_new_length) {
1032 mpc_new_phys = 0; 1018 mpc_new_phys = 0;
@@ -1047,23 +1033,23 @@ static int __init update_mp_table(void)
1047 } 1033 }
1048 printk(KERN_INFO "use in-positon replacing\n"); 1034 printk(KERN_INFO "use in-positon replacing\n");
1049 } else { 1035 } else {
1050 mpf->mpf_physptr = mpc_new_phys; 1036 mpf->physptr = mpc_new_phys;
1051 mpc_new = phys_to_virt(mpc_new_phys); 1037 mpc_new = phys_to_virt(mpc_new_phys);
1052 memcpy(mpc_new, mpc, mpc->length); 1038 memcpy(mpc_new, mpc, mpc->length);
1053 mpc = mpc_new; 1039 mpc = mpc_new;
1054 /* check if we can modify that */ 1040 /* check if we can modify that */
1055 if (mpc_new_phys - mpf->mpf_physptr) { 1041 if (mpc_new_phys - mpf->physptr) {
1056 struct intel_mp_floating *mpf_new; 1042 struct mpf_intel *mpf_new;
1057 /* steal 16 bytes from [0, 1k) */ 1043 /* steal 16 bytes from [0, 1k) */
1058 printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); 1044 printk(KERN_INFO "mpf new: %x\n", 0x400 - 16);
1059 mpf_new = phys_to_virt(0x400 - 16); 1045 mpf_new = phys_to_virt(0x400 - 16);
1060 memcpy(mpf_new, mpf, 16); 1046 memcpy(mpf_new, mpf, 16);
1061 mpf = mpf_new; 1047 mpf = mpf_new;
1062 mpf->mpf_physptr = mpc_new_phys; 1048 mpf->physptr = mpc_new_phys;
1063 } 1049 }
1064 mpf->mpf_checksum = 0; 1050 mpf->checksum = 0;
1065 mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16); 1051 mpf->checksum -= mpf_checksum((unsigned char *)mpf, 16);
1066 printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr); 1052 printk(KERN_INFO "physptr new: %x\n", mpf->physptr);
1067 } 1053 }
1068 1054
1069 /* 1055 /*
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 726266695b2c..3cf3413ec626 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -35,10 +35,10 @@
35#include <linux/device.h> 35#include <linux/device.h>
36#include <linux/cpu.h> 36#include <linux/cpu.h>
37#include <linux/notifier.h> 37#include <linux/notifier.h>
38#include <linux/uaccess.h>
38 39
39#include <asm/processor.h> 40#include <asm/processor.h>
40#include <asm/msr.h> 41#include <asm/msr.h>
41#include <asm/uaccess.h>
42#include <asm/system.h> 42#include <asm/system.h>
43 43
44static struct class *msr_class; 44static struct class *msr_class;
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 7228979f1e7f..bdfad80c3cf1 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -34,7 +34,7 @@
34 34
35#include <asm/mce.h> 35#include <asm/mce.h>
36 36
37#include <mach_traps.h> 37#include <asm/mach_traps.h>
38 38
39int unknown_nmi_panic; 39int unknown_nmi_panic;
40int nmi_watchdog_enabled; 40int nmi_watchdog_enabled;
@@ -61,11 +61,7 @@ static int endflag __initdata;
61 61
62static inline unsigned int get_nmi_count(int cpu) 62static inline unsigned int get_nmi_count(int cpu)
63{ 63{
64#ifdef CONFIG_X86_64 64 return per_cpu(irq_stat, cpu).__nmi_count;
65 return cpu_pda(cpu)->__nmi_count;
66#else
67 return nmi_count(cpu);
68#endif
69} 65}
70 66
71static inline int mce_in_progress(void) 67static inline int mce_in_progress(void)
@@ -82,12 +78,8 @@ static inline int mce_in_progress(void)
82 */ 78 */
83static inline unsigned int get_timer_irqs(int cpu) 79static inline unsigned int get_timer_irqs(int cpu)
84{ 80{
85#ifdef CONFIG_X86_64
86 return read_pda(apic_timer_irqs) + read_pda(irq0_irqs);
87#else
88 return per_cpu(irq_stat, cpu).apic_timer_irqs + 81 return per_cpu(irq_stat, cpu).apic_timer_irqs +
89 per_cpu(irq_stat, cpu).irq0_irqs; 82 per_cpu(irq_stat, cpu).irq0_irqs;
90#endif
91} 83}
92 84
93#ifdef CONFIG_SMP 85#ifdef CONFIG_SMP
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index f2191d4f2717..0cc41a1d2550 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -3,7 +3,7 @@
3 * 3 *
4 * Copyright (C) 2002, IBM Corp. 4 * Copyright (C) 2002, IBM Corp.
5 * 5 *
6 * All rights reserved. 6 * All rights reserved.
7 * 7 *
8 * This program is free software; you can redistribute it and/or modify 8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by 9 * it under the terms of the GNU General Public License as published by
@@ -23,17 +23,18 @@
23 * Send feedback to <gone@us.ibm.com> 23 * Send feedback to <gone@us.ibm.com>
24 */ 24 */
25 25
26#include <linux/mm.h> 26#include <linux/nodemask.h>
27#include <linux/bootmem.h> 27#include <linux/bootmem.h>
28#include <linux/mmzone.h> 28#include <linux/mmzone.h>
29#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/nodemask.h> 30#include <linux/mm.h>
31#include <asm/numaq.h> 31
32#include <asm/topology.h>
33#include <asm/processor.h> 32#include <asm/processor.h>
33#include <asm/topology.h>
34#include <asm/genapic.h> 34#include <asm/genapic.h>
35#include <asm/e820.h> 35#include <asm/numaq.h>
36#include <asm/setup.h> 36#include <asm/setup.h>
37#include <asm/e820.h>
37 38
38#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT)) 39#define MB_TO_PAGES(addr) ((addr) << (20 - PAGE_SHIFT))
39 40
@@ -91,19 +92,20 @@ static int __init numaq_pre_time_init(void)
91} 92}
92 93
93int found_numaq; 94int found_numaq;
95
94/* 96/*
95 * Have to match translation table entries to main table entries by counter 97 * Have to match translation table entries to main table entries by counter
96 * hence the mpc_record variable .... can't see a less disgusting way of 98 * hence the mpc_record variable .... can't see a less disgusting way of
97 * doing this .... 99 * doing this ....
98 */ 100 */
99struct mpc_config_translation { 101struct mpc_config_translation {
100 unsigned char mpc_type; 102 unsigned char mpc_type;
101 unsigned char trans_len; 103 unsigned char trans_len;
102 unsigned char trans_type; 104 unsigned char trans_type;
103 unsigned char trans_quad; 105 unsigned char trans_quad;
104 unsigned char trans_global; 106 unsigned char trans_global;
105 unsigned char trans_local; 107 unsigned char trans_local;
106 unsigned short trans_reserved; 108 unsigned short trans_reserved;
107}; 109};
108 110
109/* x86_quirks member */ 111/* x86_quirks member */
@@ -236,7 +238,7 @@ static int __init numaq_setup_ioapic_ids(void)
236 238
237static int __init numaq_update_genapic(void) 239static int __init numaq_update_genapic(void)
238{ 240{
239 genapic->wakeup_cpu = wakeup_secondary_cpu_via_nmi; 241 apic->wakeup_cpu = wakeup_secondary_cpu_via_nmi;
240 242
241 return 0; 243 return 0;
242} 244}
@@ -291,3 +293,280 @@ int __init get_memcfg_numaq(void)
291 smp_dump_qct(); 293 smp_dump_qct();
292 return 1; 294 return 1;
293} 295}
296
297/*
298 * APIC driver for the IBM NUMAQ chipset.
299 */
300#define APIC_DEFINITION 1
301#include <linux/threads.h>
302#include <linux/cpumask.h>
303#include <asm/mpspec.h>
304#include <asm/genapic.h>
305#include <asm/fixmap.h>
306#include <asm/apicdef.h>
307#include <asm/ipi.h>
308#include <linux/kernel.h>
309#include <linux/string.h>
310#include <linux/init.h>
311#include <linux/numa.h>
312#include <linux/smp.h>
313#include <asm/numaq.h>
314#include <asm/io.h>
315#include <linux/mmzone.h>
316#include <linux/nodemask.h>
317
318#define NUMAQ_APIC_DFR_VALUE (APIC_DFR_CLUSTER)
319
320static inline unsigned int numaq_get_apic_id(unsigned long x)
321{
322 return (x >> 24) & 0x0F;
323}
324
325static inline void numaq_send_IPI_mask(const struct cpumask *mask, int vector)
326{
327 default_send_IPI_mask_sequence_logical(mask, vector);
328}
329
330static inline void numaq_send_IPI_allbutself(int vector)
331{
332 default_send_IPI_mask_allbutself_logical(cpu_online_mask, vector);
333}
334
335static inline void numaq_send_IPI_all(int vector)
336{
337 numaq_send_IPI_mask(cpu_online_mask, vector);
338}
339
340extern void numaq_mps_oem_check(struct mpc_table *, char *, char *);
341
342#define NUMAQ_TRAMPOLINE_PHYS_LOW (0x8)
343#define NUMAQ_TRAMPOLINE_PHYS_HIGH (0xa)
344
345/*
346 * Because we use NMIs rather than the INIT-STARTUP sequence to
347 * bootstrap the CPUs, the APIC may be in a weird state. Kick it:
348 */
349static inline void numaq_smp_callin_clear_local_apic(void)
350{
351 clear_local_APIC();
352}
353
354static inline void
355numaq_store_NMI_vector(unsigned short *high, unsigned short *low)
356{
357 printk("Storing NMI vector\n");
358 *high =
359 *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_HIGH));
360 *low =
361 *((volatile unsigned short *)phys_to_virt(NUMAQ_TRAMPOLINE_PHYS_LOW));
362}
363
364static inline const cpumask_t *numaq_target_cpus(void)
365{
366 return &CPU_MASK_ALL;
367}
368
369static inline unsigned long
370numaq_check_apicid_used(physid_mask_t bitmap, int apicid)
371{
372 return physid_isset(apicid, bitmap);
373}
374
375static inline unsigned long numaq_check_apicid_present(int bit)
376{
377 return physid_isset(bit, phys_cpu_present_map);
378}
379
380#define apicid_cluster(apicid) (apicid & 0xF0)
381
382static inline int numaq_apic_id_registered(void)
383{
384 return 1;
385}
386
387static inline void numaq_init_apic_ldr(void)
388{
389 /* Already done in NUMA-Q firmware */
390}
391
392static inline void numaq_setup_apic_routing(void)
393{
394 printk("Enabling APIC mode: %s. Using %d I/O APICs\n",
395 "NUMA-Q", nr_ioapics);
396}
397
398/*
399 * Skip adding the timer int on secondary nodes, which causes
400 * a small but painful rift in the time-space continuum.
401 */
402static inline int numaq_multi_timer_check(int apic, int irq)
403{
404 return apic != 0 && irq == 0;
405}
406
407static inline physid_mask_t numaq_ioapic_phys_id_map(physid_mask_t phys_map)
408{
409 /* We don't have a good way to do this yet - hack */
410 return physids_promote(0xFUL);
411}
412
413/* Mapping from cpu number to logical apicid */
414extern u8 cpu_2_logical_apicid[];
415
416static inline int numaq_cpu_to_logical_apicid(int cpu)
417{
418 if (cpu >= nr_cpu_ids)
419 return BAD_APICID;
420 return (int)cpu_2_logical_apicid[cpu];
421}
422
423/*
424 * Supporting over 60 cpus on NUMA-Q requires a locality-dependent
425 * cpu to APIC ID relation to properly interact with the intelligent
426 * mode of the cluster controller.
427 */
428static inline int numaq_cpu_present_to_apicid(int mps_cpu)
429{
430 if (mps_cpu < 60)
431 return ((mps_cpu >> 2) << 4) | (1 << (mps_cpu & 0x3));
432 else
433 return BAD_APICID;
434}
435
436static inline int numaq_apicid_to_node(int logical_apicid)
437{
438 return logical_apicid >> 4;
439}
440
441static inline physid_mask_t numaq_apicid_to_cpu_present(int logical_apicid)
442{
443 int node = numaq_apicid_to_node(logical_apicid);
444 int cpu = __ffs(logical_apicid & 0xf);
445
446 return physid_mask_of_physid(cpu + 4*node);
447}
448
449/* Where the IO area was mapped on multiquad, always 0 otherwise */
450void *xquad_portio;
451
452static inline int numaq_check_phys_apicid_present(int boot_cpu_physical_apicid)
453{
454 return 1;
455}
456
457/*
458 * We use physical apicids here, not logical, so just return the default
459 * physical broadcast to stop people from breaking us
460 */
461static inline unsigned int numaq_cpu_mask_to_apicid(const cpumask_t *cpumask)
462{
463 return 0x0F;
464}
465
466static inline unsigned int
467numaq_cpu_mask_to_apicid_and(const struct cpumask *cpumask,
468 const struct cpumask *andmask)
469{
470 return 0x0F;
471}
472
473/* No NUMA-Q box has a HT CPU, but it can't hurt to use the default code. */
474static inline int numaq_phys_pkg_id(int cpuid_apic, int index_msb)
475{
476 return cpuid_apic >> index_msb;
477}
478static int __numaq_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
479{
480 numaq_mps_oem_check(mpc, oem, productid);
481 return found_numaq;
482}
483
484static int probe_numaq(void)
485{
486 /* already know from get_memcfg_numaq() */
487 return found_numaq;
488}
489
490static void numaq_vector_allocation_domain(int cpu, cpumask_t *retmask)
491{
492 /* Careful. Some cpus do not strictly honor the set of cpus
493 * specified in the interrupt destination when using lowest
494 * priority interrupt delivery mode.
495 *
496 * In particular there was a hyperthreading cpu observed to
497 * deliver interrupts to the wrong hyperthread when only one
498 * hyperthread was specified in the interrupt desitination.
499 */
500 *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
501}
502
503static void numaq_setup_portio_remap(void)
504{
505 int num_quads = num_online_nodes();
506
507 if (num_quads <= 1)
508 return;
509
510 printk("Remapping cross-quad port I/O for %d quads\n", num_quads);
511 xquad_portio = ioremap(XQUAD_PORTIO_BASE, num_quads*XQUAD_PORTIO_QUAD);
512 printk("xquad_portio vaddr 0x%08lx, len %08lx\n",
513 (u_long) xquad_portio, (u_long) num_quads*XQUAD_PORTIO_QUAD);
514}
515
516struct genapic apic_numaq = {
517
518 .name = "NUMAQ",
519 .probe = probe_numaq,
520 .acpi_madt_oem_check = NULL,
521 .apic_id_registered = numaq_apic_id_registered,
522
523 .irq_delivery_mode = dest_LowestPrio,
524 /* physical delivery on LOCAL quad: */
525 .irq_dest_mode = 0,
526
527 .target_cpus = numaq_target_cpus,
528 .disable_esr = 1,
529 .dest_logical = APIC_DEST_LOGICAL,
530 .check_apicid_used = numaq_check_apicid_used,
531 .check_apicid_present = numaq_check_apicid_present,
532
533 .vector_allocation_domain = numaq_vector_allocation_domain,
534 .init_apic_ldr = numaq_init_apic_ldr,
535
536 .ioapic_phys_id_map = numaq_ioapic_phys_id_map,
537 .setup_apic_routing = numaq_setup_apic_routing,
538 .multi_timer_check = numaq_multi_timer_check,
539 .apicid_to_node = numaq_apicid_to_node,
540 .cpu_to_logical_apicid = numaq_cpu_to_logical_apicid,
541 .cpu_present_to_apicid = numaq_cpu_present_to_apicid,
542 .apicid_to_cpu_present = numaq_apicid_to_cpu_present,
543 .setup_portio_remap = numaq_setup_portio_remap,
544 .check_phys_apicid_present = numaq_check_phys_apicid_present,
545 .enable_apic_mode = NULL,
546 .phys_pkg_id = numaq_phys_pkg_id,
547 .mps_oem_check = __numaq_mps_oem_check,
548
549 .get_apic_id = numaq_get_apic_id,
550 .set_apic_id = NULL,
551 .apic_id_mask = 0x0F << 24,
552
553 .cpu_mask_to_apicid = numaq_cpu_mask_to_apicid,
554 .cpu_mask_to_apicid_and = numaq_cpu_mask_to_apicid_and,
555
556 .send_IPI_mask = numaq_send_IPI_mask,
557 .send_IPI_mask_allbutself = NULL,
558 .send_IPI_allbutself = numaq_send_IPI_allbutself,
559 .send_IPI_all = numaq_send_IPI_all,
560 .send_IPI_self = default_send_IPI_self,
561
562 .wakeup_cpu = NULL,
563 .trampoline_phys_low = NUMAQ_TRAMPOLINE_PHYS_LOW,
564 .trampoline_phys_high = NUMAQ_TRAMPOLINE_PHYS_HIGH,
565
566 /* We don't do anything here because we use NMI's to boot instead */
567 .wait_for_init_deassert = NULL,
568
569 .smp_callin_clear_local_apic = numaq_smp_callin_clear_local_apic,
570 .store_NMI_vector = numaq_store_NMI_vector,
571 .inquire_remote_apic = NULL,
572};
diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c
index 95777b0faa73..3a7c5a44082e 100644
--- a/arch/x86/kernel/paravirt-spinlocks.c
+++ b/arch/x86/kernel/paravirt-spinlocks.c
@@ -26,13 +26,3 @@ struct pv_lock_ops pv_lock_ops = {
26}; 26};
27EXPORT_SYMBOL(pv_lock_ops); 27EXPORT_SYMBOL(pv_lock_ops);
28 28
29void __init paravirt_use_bytelocks(void)
30{
31#ifdef CONFIG_SMP
32 pv_lock_ops.spin_is_locked = __byte_spin_is_locked;
33 pv_lock_ops.spin_is_contended = __byte_spin_is_contended;
34 pv_lock_ops.spin_lock = __byte_spin_lock;
35 pv_lock_ops.spin_trylock = __byte_spin_trylock;
36 pv_lock_ops.spin_unlock = __byte_spin_unlock;
37#endif
38}
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e4c8fb608873..6dc4dca255e4 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -44,6 +44,17 @@ void _paravirt_nop(void)
44{ 44{
45} 45}
46 46
47/* identity function, which can be inlined */
48u32 _paravirt_ident_32(u32 x)
49{
50 return x;
51}
52
53u64 _paravirt_ident_64(u64 x)
54{
55 return x;
56}
57
47static void __init default_banner(void) 58static void __init default_banner(void)
48{ 59{
49 printk(KERN_INFO "Booting paravirtualized kernel on %s\n", 60 printk(KERN_INFO "Booting paravirtualized kernel on %s\n",
@@ -138,9 +149,16 @@ unsigned paravirt_patch_default(u8 type, u16 clobbers, void *insnbuf,
138 if (opfunc == NULL) 149 if (opfunc == NULL)
139 /* If there's no function, patch it with a ud2a (BUG) */ 150 /* If there's no function, patch it with a ud2a (BUG) */
140 ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a)); 151 ret = paravirt_patch_insns(insnbuf, len, ud2a, ud2a+sizeof(ud2a));
141 else if (opfunc == paravirt_nop) 152 else if (opfunc == _paravirt_nop)
142 /* If the operation is a nop, then nop the callsite */ 153 /* If the operation is a nop, then nop the callsite */
143 ret = paravirt_patch_nop(); 154 ret = paravirt_patch_nop();
155
156 /* identity functions just return their single argument */
157 else if (opfunc == _paravirt_ident_32)
158 ret = paravirt_patch_ident_32(insnbuf, len);
159 else if (opfunc == _paravirt_ident_64)
160 ret = paravirt_patch_ident_64(insnbuf, len);
161
144 else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) || 162 else if (type == PARAVIRT_PATCH(pv_cpu_ops.iret) ||
145 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) || 163 type == PARAVIRT_PATCH(pv_cpu_ops.irq_enable_sysexit) ||
146 type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) || 164 type == PARAVIRT_PATCH(pv_cpu_ops.usergs_sysret32) ||
@@ -268,6 +286,32 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void)
268 return __get_cpu_var(paravirt_lazy_mode); 286 return __get_cpu_var(paravirt_lazy_mode);
269} 287}
270 288
289void arch_flush_lazy_mmu_mode(void)
290{
291 preempt_disable();
292
293 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_MMU) {
294 WARN_ON(preempt_count() == 1);
295 arch_leave_lazy_mmu_mode();
296 arch_enter_lazy_mmu_mode();
297 }
298
299 preempt_enable();
300}
301
302void arch_flush_lazy_cpu_mode(void)
303{
304 preempt_disable();
305
306 if (paravirt_get_lazy_mode() == PARAVIRT_LAZY_CPU) {
307 WARN_ON(preempt_count() == 1);
308 arch_leave_lazy_cpu_mode();
309 arch_enter_lazy_cpu_mode();
310 }
311
312 preempt_enable();
313}
314
271struct pv_info pv_info = { 315struct pv_info pv_info = {
272 .name = "bare hardware", 316 .name = "bare hardware",
273 .paravirt_enabled = 0, 317 .paravirt_enabled = 0,
@@ -292,10 +336,10 @@ struct pv_time_ops pv_time_ops = {
292 336
293struct pv_irq_ops pv_irq_ops = { 337struct pv_irq_ops pv_irq_ops = {
294 .init_IRQ = native_init_IRQ, 338 .init_IRQ = native_init_IRQ,
295 .save_fl = native_save_fl, 339 .save_fl = __PV_IS_CALLEE_SAVE(native_save_fl),
296 .restore_fl = native_restore_fl, 340 .restore_fl = __PV_IS_CALLEE_SAVE(native_restore_fl),
297 .irq_disable = native_irq_disable, 341 .irq_disable = __PV_IS_CALLEE_SAVE(native_irq_disable),
298 .irq_enable = native_irq_enable, 342 .irq_enable = __PV_IS_CALLEE_SAVE(native_irq_enable),
299 .safe_halt = native_safe_halt, 343 .safe_halt = native_safe_halt,
300 .halt = native_halt, 344 .halt = native_halt,
301#ifdef CONFIG_X86_64 345#ifdef CONFIG_X86_64
@@ -373,6 +417,14 @@ struct pv_apic_ops pv_apic_ops = {
373#endif 417#endif
374}; 418};
375 419
420#if defined(CONFIG_X86_32) && !defined(CONFIG_X86_PAE)
421/* 32-bit pagetable entries */
422#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_32)
423#else
424/* 64-bit pagetable entries */
425#define PTE_IDENT __PV_IS_CALLEE_SAVE(_paravirt_ident_64)
426#endif
427
376struct pv_mmu_ops pv_mmu_ops = { 428struct pv_mmu_ops pv_mmu_ops = {
377#ifndef CONFIG_X86_64 429#ifndef CONFIG_X86_64
378 .pagetable_setup_start = native_pagetable_setup_start, 430 .pagetable_setup_start = native_pagetable_setup_start,
@@ -424,22 +476,23 @@ struct pv_mmu_ops pv_mmu_ops = {
424 .pmd_clear = native_pmd_clear, 476 .pmd_clear = native_pmd_clear,
425#endif 477#endif
426 .set_pud = native_set_pud, 478 .set_pud = native_set_pud,
427 .pmd_val = native_pmd_val, 479
428 .make_pmd = native_make_pmd, 480 .pmd_val = PTE_IDENT,
481 .make_pmd = PTE_IDENT,
429 482
430#if PAGETABLE_LEVELS == 4 483#if PAGETABLE_LEVELS == 4
431 .pud_val = native_pud_val, 484 .pud_val = PTE_IDENT,
432 .make_pud = native_make_pud, 485 .make_pud = PTE_IDENT,
486
433 .set_pgd = native_set_pgd, 487 .set_pgd = native_set_pgd,
434#endif 488#endif
435#endif /* PAGETABLE_LEVELS >= 3 */ 489#endif /* PAGETABLE_LEVELS >= 3 */
436 490
437 .pte_val = native_pte_val, 491 .pte_val = PTE_IDENT,
438 .pte_flags = native_pte_flags, 492 .pgd_val = PTE_IDENT,
439 .pgd_val = native_pgd_val,
440 493
441 .make_pte = native_make_pte, 494 .make_pte = PTE_IDENT,
442 .make_pgd = native_make_pgd, 495 .make_pgd = PTE_IDENT,
443 496
444 .dup_mmap = paravirt_nop, 497 .dup_mmap = paravirt_nop,
445 .exit_mmap = paravirt_nop, 498 .exit_mmap = paravirt_nop,
diff --git a/arch/x86/kernel/paravirt_patch_32.c b/arch/x86/kernel/paravirt_patch_32.c
index 9fe644f4861d..d9f32e6d6ab6 100644
--- a/arch/x86/kernel/paravirt_patch_32.c
+++ b/arch/x86/kernel/paravirt_patch_32.c
@@ -12,6 +12,18 @@ DEF_NATIVE(pv_mmu_ops, read_cr3, "mov %cr3, %eax");
12DEF_NATIVE(pv_cpu_ops, clts, "clts"); 12DEF_NATIVE(pv_cpu_ops, clts, "clts");
13DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc"); 13DEF_NATIVE(pv_cpu_ops, read_tsc, "rdtsc");
14 14
15unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
16{
17 /* arg in %eax, return in %eax */
18 return 0;
19}
20
21unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
22{
23 /* arg in %edx:%eax, return in %edx:%eax */
24 return 0;
25}
26
15unsigned native_patch(u8 type, u16 clobbers, void *ibuf, 27unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
16 unsigned long addr, unsigned len) 28 unsigned long addr, unsigned len)
17{ 29{
diff --git a/arch/x86/kernel/paravirt_patch_64.c b/arch/x86/kernel/paravirt_patch_64.c
index 061d01df9ae6..3f08f34f93eb 100644
--- a/arch/x86/kernel/paravirt_patch_64.c
+++ b/arch/x86/kernel/paravirt_patch_64.c
@@ -19,6 +19,21 @@ DEF_NATIVE(pv_cpu_ops, usergs_sysret64, "swapgs; sysretq");
19DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl"); 19DEF_NATIVE(pv_cpu_ops, usergs_sysret32, "swapgs; sysretl");
20DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs"); 20DEF_NATIVE(pv_cpu_ops, swapgs, "swapgs");
21 21
22DEF_NATIVE(, mov32, "mov %edi, %eax");
23DEF_NATIVE(, mov64, "mov %rdi, %rax");
24
25unsigned paravirt_patch_ident_32(void *insnbuf, unsigned len)
26{
27 return paravirt_patch_insns(insnbuf, len,
28 start__mov32, end__mov32);
29}
30
31unsigned paravirt_patch_ident_64(void *insnbuf, unsigned len)
32{
33 return paravirt_patch_insns(insnbuf, len,
34 start__mov64, end__mov64);
35}
36
22unsigned native_patch(u8 type, u16 clobbers, void *ibuf, 37unsigned native_patch(u8 type, u16 clobbers, void *ibuf,
23 unsigned long addr, unsigned len) 38 unsigned long addr, unsigned len)
24{ 39{
diff --git a/arch/x86/kernel/probe_32.c b/arch/x86/kernel/probe_32.c
new file mode 100644
index 000000000000..22337b75de62
--- /dev/null
+++ b/arch/x86/kernel/probe_32.c
@@ -0,0 +1,411 @@
1/*
2 * Default generic APIC driver. This handles up to 8 CPUs.
3 *
4 * Copyright 2003 Andi Kleen, SuSE Labs.
5 * Subject to the GNU Public License, v.2
6 *
7 * Generic x86 APIC driver probe layer.
8 */
9#include <linux/threads.h>
10#include <linux/cpumask.h>
11#include <linux/string.h>
12#include <linux/kernel.h>
13#include <linux/ctype.h>
14#include <linux/init.h>
15#include <linux/errno.h>
16#include <asm/fixmap.h>
17#include <asm/mpspec.h>
18#include <asm/apicdef.h>
19#include <asm/genapic.h>
20#include <asm/setup.h>
21
22#include <linux/threads.h>
23#include <linux/cpumask.h>
24#include <asm/mpspec.h>
25#include <asm/genapic.h>
26#include <asm/fixmap.h>
27#include <asm/apicdef.h>
28#include <linux/kernel.h>
29#include <linux/string.h>
30#include <linux/smp.h>
31#include <linux/init.h>
32#include <asm/genapic.h>
33#include <asm/ipi.h>
34
35#include <linux/smp.h>
36#include <linux/init.h>
37#include <linux/interrupt.h>
38#include <asm/acpi.h>
39#include <asm/arch_hooks.h>
40#include <asm/e820.h>
41#include <asm/setup.h>
42
43#include <asm/genapic.h>
44
45#ifdef CONFIG_HOTPLUG_CPU
46#define DEFAULT_SEND_IPI (1)
47#else
48#define DEFAULT_SEND_IPI (0)
49#endif
50
51int no_broadcast = DEFAULT_SEND_IPI;
52
53#ifdef CONFIG_X86_LOCAL_APIC
54
55static void default_vector_allocation_domain(int cpu, struct cpumask *retmask)
56{
57 /*
58 * Careful. Some cpus do not strictly honor the set of cpus
59 * specified in the interrupt destination when using lowest
60 * priority interrupt delivery mode.
61 *
62 * In particular there was a hyperthreading cpu observed to
63 * deliver interrupts to the wrong hyperthread when only one
64 * hyperthread was specified in the interrupt desitination.
65 */
66 *retmask = (cpumask_t) { { [0] = APIC_ALL_CPUS } };
67}
68
69/* should be called last. */
70static int probe_default(void)
71{
72 return 1;
73}
74
75struct genapic apic_default = {
76
77 .name = "default",
78 .probe = probe_default,
79 .acpi_madt_oem_check = NULL,
80 .apic_id_registered = default_apic_id_registered,
81
82 .irq_delivery_mode = dest_LowestPrio,
83 /* logical delivery broadcast to all CPUs: */
84 .irq_dest_mode = 1,
85
86 .target_cpus = default_target_cpus,
87 .disable_esr = 0,
88 .dest_logical = APIC_DEST_LOGICAL,
89 .check_apicid_used = default_check_apicid_used,
90 .check_apicid_present = default_check_apicid_present,
91
92 .vector_allocation_domain = default_vector_allocation_domain,
93 .init_apic_ldr = default_init_apic_ldr,
94
95 .ioapic_phys_id_map = default_ioapic_phys_id_map,
96 .setup_apic_routing = default_setup_apic_routing,
97 .multi_timer_check = NULL,
98 .apicid_to_node = default_apicid_to_node,
99 .cpu_to_logical_apicid = default_cpu_to_logical_apicid,
100 .cpu_present_to_apicid = default_cpu_present_to_apicid,
101 .apicid_to_cpu_present = default_apicid_to_cpu_present,
102 .setup_portio_remap = NULL,
103 .check_phys_apicid_present = default_check_phys_apicid_present,
104 .enable_apic_mode = NULL,
105 .phys_pkg_id = default_phys_pkg_id,
106 .mps_oem_check = NULL,
107
108 .get_apic_id = default_get_apic_id,
109 .set_apic_id = NULL,
110 .apic_id_mask = 0x0F << 24,
111
112 .cpu_mask_to_apicid = default_cpu_mask_to_apicid,
113 .cpu_mask_to_apicid_and = default_cpu_mask_to_apicid_and,
114
115 .send_IPI_mask = default_send_IPI_mask_logical,
116 .send_IPI_mask_allbutself = default_send_IPI_mask_allbutself_logical,
117 .send_IPI_allbutself = default_send_IPI_allbutself,
118 .send_IPI_all = default_send_IPI_all,
119 .send_IPI_self = default_send_IPI_self,
120
121 .wakeup_cpu = NULL,
122 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
123 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
124
125 .wait_for_init_deassert = default_wait_for_init_deassert,
126
127 .smp_callin_clear_local_apic = NULL,
128 .store_NMI_vector = NULL,
129 .inquire_remote_apic = default_inquire_remote_apic,
130};
131
132extern struct genapic apic_numaq;
133extern struct genapic apic_summit;
134extern struct genapic apic_bigsmp;
135extern struct genapic apic_es7000;
136extern struct genapic apic_default;
137
138struct genapic *apic = &apic_default;
139
140static struct genapic *apic_probe[] __initdata = {
141#ifdef CONFIG_X86_NUMAQ
142 &apic_numaq,
143#endif
144#ifdef CONFIG_X86_SUMMIT
145 &apic_summit,
146#endif
147#ifdef CONFIG_X86_BIGSMP
148 &apic_bigsmp,
149#endif
150#ifdef CONFIG_X86_ES7000
151 &apic_es7000,
152#endif
153 &apic_default, /* must be last */
154 NULL,
155};
156
157static int cmdline_apic __initdata;
158static int __init parse_apic(char *arg)
159{
160 int i;
161
162 if (!arg)
163 return -EINVAL;
164
165 for (i = 0; apic_probe[i]; i++) {
166 if (!strcmp(apic_probe[i]->name, arg)) {
167 apic = apic_probe[i];
168 cmdline_apic = 1;
169 return 0;
170 }
171 }
172
173 if (x86_quirks->update_genapic)
174 x86_quirks->update_genapic();
175
176 /* Parsed again by __setup for debug/verbose */
177 return 0;
178}
179early_param("apic", parse_apic);
180
181void __init generic_bigsmp_probe(void)
182{
183#ifdef CONFIG_X86_BIGSMP
184 /*
185 * This routine is used to switch to bigsmp mode when
186 * - There is no apic= option specified by the user
187 * - generic_apic_probe() has chosen apic_default as the sub_arch
188 * - we find more than 8 CPUs in acpi LAPIC listing with xAPIC support
189 */
190
191 if (!cmdline_apic && apic == &apic_default) {
192 if (apic_bigsmp.probe()) {
193 apic = &apic_bigsmp;
194 if (x86_quirks->update_genapic)
195 x86_quirks->update_genapic();
196 printk(KERN_INFO "Overriding APIC driver with %s\n",
197 apic->name);
198 }
199 }
200#endif
201}
202
203void __init generic_apic_probe(void)
204{
205 if (!cmdline_apic) {
206 int i;
207 for (i = 0; apic_probe[i]; i++) {
208 if (apic_probe[i]->probe()) {
209 apic = apic_probe[i];
210 break;
211 }
212 }
213 /* Not visible without early console */
214 if (!apic_probe[i])
215 panic("Didn't find an APIC driver");
216
217 if (x86_quirks->update_genapic)
218 x86_quirks->update_genapic();
219 }
220 printk(KERN_INFO "Using APIC driver %s\n", apic->name);
221}
222
223/* These functions can switch the APIC even after the initial ->probe() */
224
225int __init
226generic_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
227{
228 int i;
229
230 for (i = 0; apic_probe[i]; ++i) {
231 if (!apic_probe[i]->mps_oem_check)
232 continue;
233 if (!apic_probe[i]->mps_oem_check(mpc, oem, productid))
234 continue;
235
236 if (!cmdline_apic) {
237 apic = apic_probe[i];
238 if (x86_quirks->update_genapic)
239 x86_quirks->update_genapic();
240 printk(KERN_INFO "Switched to APIC driver `%s'.\n",
241 apic->name);
242 }
243 return 1;
244 }
245 return 0;
246}
247
248int __init default_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
249{
250 int i;
251
252 for (i = 0; apic_probe[i]; ++i) {
253 if (!apic_probe[i]->acpi_madt_oem_check)
254 continue;
255 if (!apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id))
256 continue;
257
258 if (!cmdline_apic) {
259 apic = apic_probe[i];
260 if (x86_quirks->update_genapic)
261 x86_quirks->update_genapic();
262 printk(KERN_INFO "Switched to APIC driver `%s'.\n",
263 apic->name);
264 }
265 return 1;
266 }
267 return 0;
268}
269
270#endif /* CONFIG_X86_LOCAL_APIC */
271
272/**
273 * pre_intr_init_hook - initialisation prior to setting up interrupt vectors
274 *
275 * Description:
276 * Perform any necessary interrupt initialisation prior to setting up
277 * the "ordinary" interrupt call gates. For legacy reasons, the ISA
278 * interrupts should be initialised here if the machine emulates a PC
279 * in any way.
280 **/
281void __init pre_intr_init_hook(void)
282{
283 if (x86_quirks->arch_pre_intr_init) {
284 if (x86_quirks->arch_pre_intr_init())
285 return;
286 }
287 init_ISA_irqs();
288}
289
290/**
291 * intr_init_hook - post gate setup interrupt initialisation
292 *
293 * Description:
294 * Fill in any interrupts that may have been left out by the general
295 * init_IRQ() routine. interrupts having to do with the machine rather
296 * than the devices on the I/O bus (like APIC interrupts in intel MP
297 * systems) are started here.
298 **/
299void __init intr_init_hook(void)
300{
301 if (x86_quirks->arch_intr_init) {
302 if (x86_quirks->arch_intr_init())
303 return;
304 }
305}
306
307/**
308 * pre_setup_arch_hook - hook called prior to any setup_arch() execution
309 *
310 * Description:
311 * generally used to activate any machine specific identification
312 * routines that may be needed before setup_arch() runs. On Voyager
313 * this is used to get the board revision and type.
314 **/
315void __init pre_setup_arch_hook(void)
316{
317}
318
319/**
320 * trap_init_hook - initialise system specific traps
321 *
322 * Description:
323 * Called as the final act of trap_init(). Used in VISWS to initialise
324 * the various board specific APIC traps.
325 **/
326void __init trap_init_hook(void)
327{
328 if (x86_quirks->arch_trap_init) {
329 if (x86_quirks->arch_trap_init())
330 return;
331 }
332}
333
334static struct irqaction irq0 = {
335 .handler = timer_interrupt,
336 .flags = IRQF_DISABLED | IRQF_NOBALANCING | IRQF_IRQPOLL,
337 .mask = CPU_MASK_NONE,
338 .name = "timer"
339};
340
341/**
342 * pre_time_init_hook - do any specific initialisations before.
343 *
344 **/
345void __init pre_time_init_hook(void)
346{
347 if (x86_quirks->arch_pre_time_init)
348 x86_quirks->arch_pre_time_init();
349}
350
351/**
352 * time_init_hook - do any specific initialisations for the system timer.
353 *
354 * Description:
355 * Must plug the system timer interrupt source at HZ into the IRQ listed
356 * in irq_vectors.h:TIMER_IRQ
357 **/
358void __init time_init_hook(void)
359{
360 if (x86_quirks->arch_time_init) {
361 /*
362 * A nonzero return code does not mean failure, it means
363 * that the architecture quirk does not want any
364 * generic (timer) setup to be performed after this:
365 */
366 if (x86_quirks->arch_time_init())
367 return;
368 }
369
370 irq0.mask = cpumask_of_cpu(0);
371 setup_irq(0, &irq0);
372}
373
374#ifdef CONFIG_MCA
375/**
376 * mca_nmi_hook - hook into MCA specific NMI chain
377 *
378 * Description:
379 * The MCA (Microchannel Architecture) has an NMI chain for NMI sources
380 * along the MCA bus. Use this to hook into that chain if you will need
381 * it.
382 **/
383void mca_nmi_hook(void)
384{
385 /*
386 * If I recall correctly, there's a whole bunch of other things that
387 * we can do to check for NMI problems, but that's all I know about
388 * at the moment.
389 */
390 pr_warning("NMI generated from unknown source!\n");
391}
392#endif
393
394static __init int no_ipi_broadcast(char *str)
395{
396 get_option(&str, &no_broadcast);
397 pr_info("Using %s mode\n",
398 no_broadcast ? "No IPI Broadcast" : "IPI Broadcast");
399 return 1;
400}
401__setup("no_ipi_broadcast=", no_ipi_broadcast);
402
403static int __init print_ipi_mode(void)
404{
405 pr_info("Using IPI %s mode\n",
406 no_broadcast ? "No-Shortcut" : "Shortcut");
407 return 0;
408}
409
410late_initcall(print_ipi_mode);
411
diff --git a/arch/x86/kernel/probe_roms_32.c b/arch/x86/kernel/probe_roms_32.c
index 675a48c404a5..071e7fea42e5 100644
--- a/arch/x86/kernel/probe_roms_32.c
+++ b/arch/x86/kernel/probe_roms_32.c
@@ -18,7 +18,7 @@
18#include <asm/setup.h> 18#include <asm/setup.h>
19#include <asm/sections.h> 19#include <asm/sections.h>
20#include <asm/io.h> 20#include <asm/io.h>
21#include <setup_arch.h> 21#include <asm/setup_arch.h>
22 22
23static struct resource system_rom_resource = { 23static struct resource system_rom_resource = {
24 .name = "System ROM", 24 .name = "System ROM",
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index e68bb9e30864..87b69d4fac16 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -180,6 +180,9 @@ void mwait_idle_with_hints(unsigned long ax, unsigned long cx)
180 180
181 trace_power_start(&it, POWER_CSTATE, (ax>>4)+1); 181 trace_power_start(&it, POWER_CSTATE, (ax>>4)+1);
182 if (!need_resched()) { 182 if (!need_resched()) {
183 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
184 clflush((void *)&current_thread_info()->flags);
185
183 __monitor((void *)&current_thread_info()->flags, 0, 0); 186 __monitor((void *)&current_thread_info()->flags, 0, 0);
184 smp_mb(); 187 smp_mb();
185 if (!need_resched()) 188 if (!need_resched())
@@ -194,6 +197,9 @@ static void mwait_idle(void)
194 struct power_trace it; 197 struct power_trace it;
195 if (!need_resched()) { 198 if (!need_resched()) {
196 trace_power_start(&it, POWER_CSTATE, 1); 199 trace_power_start(&it, POWER_CSTATE, 1);
200 if (cpu_has(&current_cpu_data, X86_FEATURE_CLFLUSH_MONITOR))
201 clflush((void *)&current_thread_info()->flags);
202
197 __monitor((void *)&current_thread_info()->flags, 0, 0); 203 __monitor((void *)&current_thread_info()->flags, 0, 0);
198 smp_mb(); 204 smp_mb();
199 if (!need_resched()) 205 if (!need_resched())
@@ -344,7 +350,7 @@ static void c1e_idle(void)
344 350
345void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c) 351void __cpuinit select_idle_routine(const struct cpuinfo_x86 *c)
346{ 352{
347#ifdef CONFIG_X86_SMP 353#ifdef CONFIG_SMP
348 if (pm_idle == poll_idle && smp_num_siblings > 1) { 354 if (pm_idle == poll_idle && smp_num_siblings > 1) {
349 printk(KERN_WARNING "WARNING: polling idle and HT enabled," 355 printk(KERN_WARNING "WARNING: polling idle and HT enabled,"
350 " performance may degrade.\n"); 356 " performance may degrade.\n");
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a546f55c77b4..fec79ad85dc6 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -11,6 +11,7 @@
11 11
12#include <stdarg.h> 12#include <stdarg.h>
13 13
14#include <linux/stackprotector.h>
14#include <linux/cpu.h> 15#include <linux/cpu.h>
15#include <linux/errno.h> 16#include <linux/errno.h>
16#include <linux/sched.h> 17#include <linux/sched.h>
@@ -66,9 +67,6 @@ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
66DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; 67DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
67EXPORT_PER_CPU_SYMBOL(current_task); 68EXPORT_PER_CPU_SYMBOL(current_task);
68 69
69DEFINE_PER_CPU(int, cpu_number);
70EXPORT_PER_CPU_SYMBOL(cpu_number);
71
72/* 70/*
73 * Return saved PC of a blocked thread. 71 * Return saved PC of a blocked thread.
74 */ 72 */
@@ -94,6 +92,15 @@ void cpu_idle(void)
94{ 92{
95 int cpu = smp_processor_id(); 93 int cpu = smp_processor_id();
96 94
95 /*
96 * If we're the non-boot CPU, nothing set the stack canary up
97 * for us. CPU0 already has it initialized but no harm in
98 * doing it again. This is a good place for updating it, as
99 * we wont ever return from this function (so the invalid
100 * canaries already on the stack wont ever trigger).
101 */
102 boot_init_stack_canary();
103
97 current_thread_info()->status |= TS_POLLING; 104 current_thread_info()->status |= TS_POLLING;
98 105
99 /* endless idle loop with no priority at all */ 106 /* endless idle loop with no priority at all */
@@ -111,7 +118,6 @@ void cpu_idle(void)
111 play_dead(); 118 play_dead();
112 119
113 local_irq_disable(); 120 local_irq_disable();
114 __get_cpu_var(irq_stat).idle_timestamp = jiffies;
115 /* Don't trace irqs off for idle */ 121 /* Don't trace irqs off for idle */
116 stop_critical_timings(); 122 stop_critical_timings();
117 pm_idle(); 123 pm_idle();
@@ -135,7 +141,7 @@ void __show_regs(struct pt_regs *regs, int all)
135 if (user_mode_vm(regs)) { 141 if (user_mode_vm(regs)) {
136 sp = regs->sp; 142 sp = regs->sp;
137 ss = regs->ss & 0xffff; 143 ss = regs->ss & 0xffff;
138 savesegment(gs, gs); 144 gs = get_user_gs(regs);
139 } else { 145 } else {
140 sp = (unsigned long) (&regs->sp); 146 sp = (unsigned long) (&regs->sp);
141 savesegment(ss, ss); 147 savesegment(ss, ss);
@@ -216,6 +222,7 @@ int kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
216 regs.ds = __USER_DS; 222 regs.ds = __USER_DS;
217 regs.es = __USER_DS; 223 regs.es = __USER_DS;
218 regs.fs = __KERNEL_PERCPU; 224 regs.fs = __KERNEL_PERCPU;
225 regs.gs = __KERNEL_STACK_CANARY;
219 regs.orig_ax = -1; 226 regs.orig_ax = -1;
220 regs.ip = (unsigned long) kernel_thread_helper; 227 regs.ip = (unsigned long) kernel_thread_helper;
221 regs.cs = __KERNEL_CS | get_kernel_rpl(); 228 regs.cs = __KERNEL_CS | get_kernel_rpl();
@@ -308,7 +315,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
308 315
309 p->thread.ip = (unsigned long) ret_from_fork; 316 p->thread.ip = (unsigned long) ret_from_fork;
310 317
311 savesegment(gs, p->thread.gs); 318 task_user_gs(p) = get_user_gs(regs);
312 319
313 tsk = current; 320 tsk = current;
314 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) { 321 if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {
@@ -346,7 +353,7 @@ int copy_thread(int nr, unsigned long clone_flags, unsigned long sp,
346void 353void
347start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) 354start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
348{ 355{
349 __asm__("movl %0, %%gs" : : "r"(0)); 356 set_user_gs(regs, 0);
350 regs->fs = 0; 357 regs->fs = 0;
351 set_fs(USER_DS); 358 set_fs(USER_DS);
352 regs->ds = __USER_DS; 359 regs->ds = __USER_DS;
@@ -543,7 +550,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
543 * used %fs or %gs (it does not today), or if the kernel is 550 * used %fs or %gs (it does not today), or if the kernel is
544 * running inside of a hypervisor layer. 551 * running inside of a hypervisor layer.
545 */ 552 */
546 savesegment(gs, prev->gs); 553 lazy_save_gs(prev->gs);
547 554
548 /* 555 /*
549 * Load the per-thread Thread-Local Storage descriptor. 556 * Load the per-thread Thread-Local Storage descriptor.
@@ -589,31 +596,31 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
589 * Restore %gs if needed (which is common) 596 * Restore %gs if needed (which is common)
590 */ 597 */
591 if (prev->gs | next->gs) 598 if (prev->gs | next->gs)
592 loadsegment(gs, next->gs); 599 lazy_load_gs(next->gs);
593 600
594 x86_write_percpu(current_task, next_p); 601 percpu_write(current_task, next_p);
595 602
596 return prev_p; 603 return prev_p;
597} 604}
598 605
599asmlinkage int sys_fork(struct pt_regs regs) 606int sys_fork(struct pt_regs *regs)
600{ 607{
601 return do_fork(SIGCHLD, regs.sp, &regs, 0, NULL, NULL); 608 return do_fork(SIGCHLD, regs->sp, regs, 0, NULL, NULL);
602} 609}
603 610
604asmlinkage int sys_clone(struct pt_regs regs) 611int sys_clone(struct pt_regs *regs)
605{ 612{
606 unsigned long clone_flags; 613 unsigned long clone_flags;
607 unsigned long newsp; 614 unsigned long newsp;
608 int __user *parent_tidptr, *child_tidptr; 615 int __user *parent_tidptr, *child_tidptr;
609 616
610 clone_flags = regs.bx; 617 clone_flags = regs->bx;
611 newsp = regs.cx; 618 newsp = regs->cx;
612 parent_tidptr = (int __user *)regs.dx; 619 parent_tidptr = (int __user *)regs->dx;
613 child_tidptr = (int __user *)regs.di; 620 child_tidptr = (int __user *)regs->di;
614 if (!newsp) 621 if (!newsp)
615 newsp = regs.sp; 622 newsp = regs->sp;
616 return do_fork(clone_flags, newsp, &regs, 0, parent_tidptr, child_tidptr); 623 return do_fork(clone_flags, newsp, regs, 0, parent_tidptr, child_tidptr);
617} 624}
618 625
619/* 626/*
@@ -626,27 +633,27 @@ asmlinkage int sys_clone(struct pt_regs regs)
626 * do not have enough call-clobbered registers to hold all 633 * do not have enough call-clobbered registers to hold all
627 * the information you need. 634 * the information you need.
628 */ 635 */
629asmlinkage int sys_vfork(struct pt_regs regs) 636int sys_vfork(struct pt_regs *regs)
630{ 637{
631 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs.sp, &regs, 0, NULL, NULL); 638 return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, regs->sp, regs, 0, NULL, NULL);
632} 639}
633 640
634/* 641/*
635 * sys_execve() executes a new program. 642 * sys_execve() executes a new program.
636 */ 643 */
637asmlinkage int sys_execve(struct pt_regs regs) 644int sys_execve(struct pt_regs *regs)
638{ 645{
639 int error; 646 int error;
640 char *filename; 647 char *filename;
641 648
642 filename = getname((char __user *) regs.bx); 649 filename = getname((char __user *) regs->bx);
643 error = PTR_ERR(filename); 650 error = PTR_ERR(filename);
644 if (IS_ERR(filename)) 651 if (IS_ERR(filename))
645 goto out; 652 goto out;
646 error = do_execve(filename, 653 error = do_execve(filename,
647 (char __user * __user *) regs.cx, 654 (char __user * __user *) regs->cx,
648 (char __user * __user *) regs.dx, 655 (char __user * __user *) regs->dx,
649 &regs); 656 regs);
650 if (error == 0) { 657 if (error == 0) {
651 /* Make sure we don't return using sysenter.. */ 658 /* Make sure we don't return using sysenter.. */
652 set_thread_flag(TIF_IRET); 659 set_thread_flag(TIF_IRET);
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 416fb9282f4f..836ef6575f01 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -16,6 +16,7 @@
16 16
17#include <stdarg.h> 17#include <stdarg.h>
18 18
19#include <linux/stackprotector.h>
19#include <linux/cpu.h> 20#include <linux/cpu.h>
20#include <linux/errno.h> 21#include <linux/errno.h>
21#include <linux/sched.h> 22#include <linux/sched.h>
@@ -40,13 +41,13 @@
40#include <linux/uaccess.h> 41#include <linux/uaccess.h>
41#include <linux/io.h> 42#include <linux/io.h>
42#include <linux/ftrace.h> 43#include <linux/ftrace.h>
44#include <linux/dmi.h>
43 45
44#include <asm/pgtable.h> 46#include <asm/pgtable.h>
45#include <asm/system.h> 47#include <asm/system.h>
46#include <asm/processor.h> 48#include <asm/processor.h>
47#include <asm/i387.h> 49#include <asm/i387.h>
48#include <asm/mmu_context.h> 50#include <asm/mmu_context.h>
49#include <asm/pda.h>
50#include <asm/prctl.h> 51#include <asm/prctl.h>
51#include <asm/desc.h> 52#include <asm/desc.h>
52#include <asm/proto.h> 53#include <asm/proto.h>
@@ -57,6 +58,12 @@
57 58
58asmlinkage extern void ret_from_fork(void); 59asmlinkage extern void ret_from_fork(void);
59 60
61DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
62EXPORT_PER_CPU_SYMBOL(current_task);
63
64DEFINE_PER_CPU(unsigned long, old_rsp);
65static DEFINE_PER_CPU(unsigned char, is_idle);
66
60unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; 67unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
61 68
62static ATOMIC_NOTIFIER_HEAD(idle_notifier); 69static ATOMIC_NOTIFIER_HEAD(idle_notifier);
@@ -75,13 +82,13 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister);
75 82
76void enter_idle(void) 83void enter_idle(void)
77{ 84{
78 write_pda(isidle, 1); 85 percpu_write(is_idle, 1);
79 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); 86 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
80} 87}
81 88
82static void __exit_idle(void) 89static void __exit_idle(void)
83{ 90{
84 if (test_and_clear_bit_pda(0, isidle) == 0) 91 if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
85 return; 92 return;
86 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); 93 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
87} 94}
@@ -111,6 +118,16 @@ static inline void play_dead(void)
111void cpu_idle(void) 118void cpu_idle(void)
112{ 119{
113 current_thread_info()->status |= TS_POLLING; 120 current_thread_info()->status |= TS_POLLING;
121
122 /*
123 * If we're the non-boot CPU, nothing set the stack canary up
124 * for us. CPU0 already has it initialized but no harm in
125 * doing it again. This is a good place for updating it, as
126 * we wont ever return from this function (so the invalid
127 * canaries already on the stack wont ever trigger).
128 */
129 boot_init_stack_canary();
130
114 /* endless idle loop with no priority at all */ 131 /* endless idle loop with no priority at all */
115 while (1) { 132 while (1) {
116 tick_nohz_stop_sched_tick(1); 133 tick_nohz_stop_sched_tick(1);
@@ -151,14 +168,18 @@ void __show_regs(struct pt_regs *regs, int all)
151 unsigned long d0, d1, d2, d3, d6, d7; 168 unsigned long d0, d1, d2, d3, d6, d7;
152 unsigned int fsindex, gsindex; 169 unsigned int fsindex, gsindex;
153 unsigned int ds, cs, es; 170 unsigned int ds, cs, es;
171 const char *board;
154 172
155 printk("\n"); 173 printk("\n");
156 print_modules(); 174 print_modules();
157 printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s\n", 175 board = dmi_get_system_info(DMI_PRODUCT_NAME);
176 if (!board)
177 board = "";
178 printk(KERN_INFO "Pid: %d, comm: %.20s %s %s %.*s %s\n",
158 current->pid, current->comm, print_tainted(), 179 current->pid, current->comm, print_tainted(),
159 init_utsname()->release, 180 init_utsname()->release,
160 (int)strcspn(init_utsname()->version, " "), 181 (int)strcspn(init_utsname()->version, " "),
161 init_utsname()->version); 182 init_utsname()->version, board);
162 printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip); 183 printk(KERN_INFO "RIP: %04lx:[<%016lx>] ", regs->cs & 0xffff, regs->ip);
163 printk_address(regs->ip, 1); 184 printk_address(regs->ip, 1);
164 printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss, 185 printk(KERN_INFO "RSP: %04lx:%016lx EFLAGS: %08lx\n", regs->ss,
@@ -392,7 +413,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
392 load_gs_index(0); 413 load_gs_index(0);
393 regs->ip = new_ip; 414 regs->ip = new_ip;
394 regs->sp = new_sp; 415 regs->sp = new_sp;
395 write_pda(oldrsp, new_sp); 416 percpu_write(old_rsp, new_sp);
396 regs->cs = __USER_CS; 417 regs->cs = __USER_CS;
397 regs->ss = __USER_DS; 418 regs->ss = __USER_DS;
398 regs->flags = 0x200; 419 regs->flags = 0x200;
@@ -613,21 +634,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
613 /* 634 /*
614 * Switch the PDA and FPU contexts. 635 * Switch the PDA and FPU contexts.
615 */ 636 */
616 prev->usersp = read_pda(oldrsp); 637 prev->usersp = percpu_read(old_rsp);
617 write_pda(oldrsp, next->usersp); 638 percpu_write(old_rsp, next->usersp);
618 write_pda(pcurrent, next_p); 639 percpu_write(current_task, next_p);
619 640
620 write_pda(kernelstack, 641 percpu_write(kernel_stack,
621 (unsigned long)task_stack_page(next_p) + 642 (unsigned long)task_stack_page(next_p) +
622 THREAD_SIZE - PDA_STACKOFFSET); 643 THREAD_SIZE - KERNEL_STACK_OFFSET);
623#ifdef CONFIG_CC_STACKPROTECTOR
624 write_pda(stack_canary, next_p->stack_canary);
625 /*
626 * Build time only check to make sure the stack_canary is at
627 * offset 40 in the pda; this is a gcc ABI requirement
628 */
629 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
630#endif
631 644
632 /* 645 /*
633 * Now maybe reload the debug registers and handle I/O bitmaps 646 * Now maybe reload the debug registers and handle I/O bitmaps
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 0a5df5f82fb9..d2f7cd5b2c83 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -75,10 +75,7 @@ static inline bool invalid_selector(u16 value)
75static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno) 75static unsigned long *pt_regs_access(struct pt_regs *regs, unsigned long regno)
76{ 76{
77 BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0); 77 BUILD_BUG_ON(offsetof(struct pt_regs, bx) != 0);
78 regno >>= 2; 78 return &regs->bx + (regno >> 2);
79 if (regno > FS)
80 --regno;
81 return &regs->bx + regno;
82} 79}
83 80
84static u16 get_segment_reg(struct task_struct *task, unsigned long offset) 81static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
@@ -90,9 +87,10 @@ static u16 get_segment_reg(struct task_struct *task, unsigned long offset)
90 if (offset != offsetof(struct user_regs_struct, gs)) 87 if (offset != offsetof(struct user_regs_struct, gs))
91 retval = *pt_regs_access(task_pt_regs(task), offset); 88 retval = *pt_regs_access(task_pt_regs(task), offset);
92 else { 89 else {
93 retval = task->thread.gs;
94 if (task == current) 90 if (task == current)
95 savesegment(gs, retval); 91 retval = get_user_gs(task_pt_regs(task));
92 else
93 retval = task_user_gs(task);
96 } 94 }
97 return retval; 95 return retval;
98} 96}
@@ -126,13 +124,10 @@ static int set_segment_reg(struct task_struct *task,
126 break; 124 break;
127 125
128 case offsetof(struct user_regs_struct, gs): 126 case offsetof(struct user_regs_struct, gs):
129 task->thread.gs = value;
130 if (task == current) 127 if (task == current)
131 /* 128 set_user_gs(task_pt_regs(task), value);
132 * The user-mode %gs is not affected by 129 else
133 * kernel entry, so we must update the CPU. 130 task_user_gs(task) = value;
134 */
135 loadsegment(gs, value);
136 } 131 }
137 132
138 return 0; 133 return 0;
@@ -810,12 +805,16 @@ static void ptrace_bts_untrace(struct task_struct *child)
810 805
811static void ptrace_bts_detach(struct task_struct *child) 806static void ptrace_bts_detach(struct task_struct *child)
812{ 807{
813 if (unlikely(child->bts)) { 808 /*
814 ds_release_bts(child->bts); 809 * Ptrace_detach() races with ptrace_untrace() in case
815 child->bts = NULL; 810 * the child dies and is reaped by another thread.
816 811 *
817 ptrace_bts_free_buffer(child); 812 * We only do the memory accounting at this point and
818 } 813 * leave the buffer deallocation and the bts tracer
814 * release to ptrace_bts_untrace() which will be called
815 * later on with tasklist_lock held.
816 */
817 release_locked_buffer(child->bts_buffer, child->bts_size);
819} 818}
820#else 819#else
821static inline void ptrace_bts_fork(struct task_struct *tsk) {} 820static inline void ptrace_bts_fork(struct task_struct *tsk) {}
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 2b46eb41643b..32e8f0af292c 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -14,6 +14,7 @@
14#include <asm/reboot.h> 14#include <asm/reboot.h>
15#include <asm/pci_x86.h> 15#include <asm/pci_x86.h>
16#include <asm/virtext.h> 16#include <asm/virtext.h>
17#include <asm/cpu.h>
17 18
18#ifdef CONFIG_X86_32 19#ifdef CONFIG_X86_32
19# include <linux/dmi.h> 20# include <linux/dmi.h>
@@ -23,7 +24,7 @@
23# include <asm/iommu.h> 24# include <asm/iommu.h>
24#endif 25#endif
25 26
26#include <mach_ipi.h> 27#include <asm/genapic.h>
27 28
28/* 29/*
29 * Power off function, if any 30 * Power off function, if any
@@ -650,7 +651,7 @@ static int crash_nmi_callback(struct notifier_block *self,
650 651
651static void smp_send_nmi_allbutself(void) 652static void smp_send_nmi_allbutself(void)
652{ 653{
653 send_IPI_allbutself(NMI_VECTOR); 654 apic->send_IPI_allbutself(NMI_VECTOR);
654} 655}
655 656
656static struct notifier_block crash_nmi_nb = { 657static struct notifier_block crash_nmi_nb = {
diff --git a/arch/x86/kernel/relocate_kernel_64.S b/arch/x86/kernel/relocate_kernel_64.S
index f5afe665a82b..b0bbdd4829c9 100644
--- a/arch/x86/kernel/relocate_kernel_64.S
+++ b/arch/x86/kernel/relocate_kernel_64.S
@@ -29,122 +29,6 @@ relocate_kernel:
29 * %rdx start address 29 * %rdx start address
30 */ 30 */
31 31
32 /* map the control page at its virtual address */
33
34 movq $0x0000ff8000000000, %r10 /* mask */
35 mov $(39 - 3), %cl /* bits to shift */
36 movq PTR(VA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
37
38 movq %r11, %r9
39 andq %r10, %r9
40 shrq %cl, %r9
41
42 movq PTR(VA_PGD)(%rsi), %r8
43 addq %r8, %r9
44 movq PTR(PA_PUD_0)(%rsi), %r8
45 orq $PAGE_ATTR, %r8
46 movq %r8, (%r9)
47
48 shrq $9, %r10
49 sub $9, %cl
50
51 movq %r11, %r9
52 andq %r10, %r9
53 shrq %cl, %r9
54
55 movq PTR(VA_PUD_0)(%rsi), %r8
56 addq %r8, %r9
57 movq PTR(PA_PMD_0)(%rsi), %r8
58 orq $PAGE_ATTR, %r8
59 movq %r8, (%r9)
60
61 shrq $9, %r10
62 sub $9, %cl
63
64 movq %r11, %r9
65 andq %r10, %r9
66 shrq %cl, %r9
67
68 movq PTR(VA_PMD_0)(%rsi), %r8
69 addq %r8, %r9
70 movq PTR(PA_PTE_0)(%rsi), %r8
71 orq $PAGE_ATTR, %r8
72 movq %r8, (%r9)
73
74 shrq $9, %r10
75 sub $9, %cl
76
77 movq %r11, %r9
78 andq %r10, %r9
79 shrq %cl, %r9
80
81 movq PTR(VA_PTE_0)(%rsi), %r8
82 addq %r8, %r9
83 movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
84 orq $PAGE_ATTR, %r8
85 movq %r8, (%r9)
86
87 /* identity map the control page at its physical address */
88
89 movq $0x0000ff8000000000, %r10 /* mask */
90 mov $(39 - 3), %cl /* bits to shift */
91 movq PTR(PA_CONTROL_PAGE)(%rsi), %r11 /* address to map */
92
93 movq %r11, %r9
94 andq %r10, %r9
95 shrq %cl, %r9
96
97 movq PTR(VA_PGD)(%rsi), %r8
98 addq %r8, %r9
99 movq PTR(PA_PUD_1)(%rsi), %r8
100 orq $PAGE_ATTR, %r8
101 movq %r8, (%r9)
102
103 shrq $9, %r10
104 sub $9, %cl
105
106 movq %r11, %r9
107 andq %r10, %r9
108 shrq %cl, %r9
109
110 movq PTR(VA_PUD_1)(%rsi), %r8
111 addq %r8, %r9
112 movq PTR(PA_PMD_1)(%rsi), %r8
113 orq $PAGE_ATTR, %r8
114 movq %r8, (%r9)
115
116 shrq $9, %r10
117 sub $9, %cl
118
119 movq %r11, %r9
120 andq %r10, %r9
121 shrq %cl, %r9
122
123 movq PTR(VA_PMD_1)(%rsi), %r8
124 addq %r8, %r9
125 movq PTR(PA_PTE_1)(%rsi), %r8
126 orq $PAGE_ATTR, %r8
127 movq %r8, (%r9)
128
129 shrq $9, %r10
130 sub $9, %cl
131
132 movq %r11, %r9
133 andq %r10, %r9
134 shrq %cl, %r9
135
136 movq PTR(VA_PTE_1)(%rsi), %r8
137 addq %r8, %r9
138 movq PTR(PA_CONTROL_PAGE)(%rsi), %r8
139 orq $PAGE_ATTR, %r8
140 movq %r8, (%r9)
141
142relocate_new_kernel:
143 /* %rdi indirection_page
144 * %rsi page_list
145 * %rdx start address
146 */
147
148 /* zero out flags, and disable interrupts */ 32 /* zero out flags, and disable interrupts */
149 pushq $0 33 pushq $0
150 popfq 34 popfq
@@ -156,9 +40,8 @@ relocate_new_kernel:
156 /* get physical address of page table now too */ 40 /* get physical address of page table now too */
157 movq PTR(PA_TABLE_PAGE)(%rsi), %rcx 41 movq PTR(PA_TABLE_PAGE)(%rsi), %rcx
158 42
159 /* switch to new set of page tables */ 43 /* Switch to the identity mapped page tables */
160 movq PTR(PA_PGD)(%rsi), %r9 44 movq %rcx, %cr3
161 movq %r9, %cr3
162 45
163 /* setup a new stack at the end of the physical control page */ 46 /* setup a new stack at the end of the physical control page */
164 lea PAGE_SIZE(%r8), %rsp 47 lea PAGE_SIZE(%r8), %rsp
@@ -194,9 +77,7 @@ identity_mapped:
194 jmp 1f 77 jmp 1f
1951: 781:
196 79
197 /* Switch to the identity mapped page tables, 80 /* Flush the TLB (needed?) */
198 * and flush the TLB.
199 */
200 movq %rcx, %cr3 81 movq %rcx, %cr3
201 82
202 /* Do the copies */ 83 /* Do the copies */
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ae0d8042cf69..8fce6c714514 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -81,7 +81,7 @@
81#include <asm/io_apic.h> 81#include <asm/io_apic.h>
82#include <asm/ist.h> 82#include <asm/ist.h>
83#include <asm/vmi.h> 83#include <asm/vmi.h>
84#include <setup_arch.h> 84#include <asm/setup_arch.h>
85#include <asm/bios_ebda.h> 85#include <asm/bios_ebda.h>
86#include <asm/cacheflush.h> 86#include <asm/cacheflush.h>
87#include <asm/processor.h> 87#include <asm/processor.h>
@@ -89,7 +89,7 @@
89 89
90#include <asm/system.h> 90#include <asm/system.h>
91#include <asm/vsyscall.h> 91#include <asm/vsyscall.h>
92#include <asm/smp.h> 92#include <asm/cpu.h>
93#include <asm/desc.h> 93#include <asm/desc.h>
94#include <asm/dma.h> 94#include <asm/dma.h>
95#include <asm/iommu.h> 95#include <asm/iommu.h>
@@ -97,7 +97,7 @@
97#include <asm/mmu_context.h> 97#include <asm/mmu_context.h>
98#include <asm/proto.h> 98#include <asm/proto.h>
99 99
100#include <mach_apic.h> 100#include <asm/genapic.h>
101#include <asm/paravirt.h> 101#include <asm/paravirt.h>
102#include <asm/hypervisor.h> 102#include <asm/hypervisor.h>
103 103
@@ -112,6 +112,20 @@
112#define ARCH_SETUP 112#define ARCH_SETUP
113#endif 113#endif
114 114
115unsigned int boot_cpu_id __read_mostly;
116
117#ifdef CONFIG_X86_64
118int default_cpu_present_to_apicid(int mps_cpu)
119{
120 return __default_cpu_present_to_apicid(mps_cpu);
121}
122
123int default_check_phys_apicid_present(int boot_cpu_physical_apicid)
124{
125 return __default_check_phys_apicid_present(boot_cpu_physical_apicid);
126}
127#endif
128
115#ifndef CONFIG_DEBUG_BOOT_PARAMS 129#ifndef CONFIG_DEBUG_BOOT_PARAMS
116struct boot_params __initdata boot_params; 130struct boot_params __initdata boot_params;
117#else 131#else
@@ -588,10 +602,9 @@ early_param("elfcorehdr", setup_elfcorehdr);
588 602
589static int __init default_update_genapic(void) 603static int __init default_update_genapic(void)
590{ 604{
591#ifdef CONFIG_X86_SMP 605#ifdef CONFIG_SMP
592# if defined(CONFIG_X86_GENERICARCH) || defined(CONFIG_X86_64) 606 if (!apic->wakeup_cpu)
593 genapic->wakeup_cpu = wakeup_secondary_cpu_via_init; 607 apic->wakeup_cpu = wakeup_secondary_cpu_via_init;
594# endif
595#endif 608#endif
596 609
597 return 0; 610 return 0;
@@ -607,7 +620,7 @@ struct x86_quirks *x86_quirks __initdata = &default_x86_quirks;
607static int __init dmi_low_memory_corruption(const struct dmi_system_id *d) 620static int __init dmi_low_memory_corruption(const struct dmi_system_id *d)
608{ 621{
609 printk(KERN_NOTICE 622 printk(KERN_NOTICE
610 "%s detected: BIOS may corrupt low RAM, working it around.\n", 623 "%s detected: BIOS may corrupt low RAM, working around it.\n",
611 d->ident); 624 d->ident);
612 625
613 e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED); 626 e820_update_range(0, 0x10000, E820_RAM, E820_RESERVED);
@@ -892,12 +905,11 @@ void __init setup_arch(char **cmdline_p)
892 */ 905 */
893 acpi_reserve_bootmem(); 906 acpi_reserve_bootmem();
894#endif 907#endif
895#ifdef CONFIG_X86_FIND_SMP_CONFIG
896 /* 908 /*
897 * Find and reserve possible boot-time SMP configuration: 909 * Find and reserve possible boot-time SMP configuration:
898 */ 910 */
899 find_smp_config(); 911 find_smp_config();
900#endif 912
901 reserve_crashkernel(); 913 reserve_crashkernel();
902 914
903#ifdef CONFIG_X86_64 915#ifdef CONFIG_X86_64
@@ -924,9 +936,7 @@ void __init setup_arch(char **cmdline_p)
924 map_vsyscall(); 936 map_vsyscall();
925#endif 937#endif
926 938
927#ifdef CONFIG_X86_GENERICARCH
928 generic_apic_probe(); 939 generic_apic_probe();
929#endif
930 940
931 early_quirks(); 941 early_quirks();
932 942
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 01161077a49c..d992e6cff730 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -13,145 +13,47 @@
13#include <asm/mpspec.h> 13#include <asm/mpspec.h>
14#include <asm/apicdef.h> 14#include <asm/apicdef.h>
15#include <asm/highmem.h> 15#include <asm/highmem.h>
16#include <asm/proto.h>
17#include <asm/cpumask.h>
18#include <asm/cpu.h>
19#include <asm/stackprotector.h>
16 20
17#ifdef CONFIG_X86_LOCAL_APIC 21#ifdef CONFIG_DEBUG_PER_CPU_MAPS
18unsigned int num_processors; 22# define DBG(x...) printk(KERN_DEBUG x)
19unsigned disabled_cpus __cpuinitdata;
20/* Processor that is doing the boot up */
21unsigned int boot_cpu_physical_apicid = -1U;
22EXPORT_SYMBOL(boot_cpu_physical_apicid);
23unsigned int max_physical_apicid;
24
25/* Bitmask of physically existing CPUs */
26physid_mask_t phys_cpu_present_map;
27#endif
28
29/* map cpu index to physical APIC ID */
30DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
31DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
32EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
33EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
34
35#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
36#define X86_64_NUMA 1
37
38/* map cpu index to node index */
39DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
40EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
41
42/* which logical CPUs are on which nodes */
43cpumask_t *node_to_cpumask_map;
44EXPORT_SYMBOL(node_to_cpumask_map);
45
46/* setup node_to_cpumask_map */
47static void __init setup_node_to_cpumask_map(void);
48
49#else 23#else
50static inline void setup_node_to_cpumask_map(void) { } 24# define DBG(x...)
51#endif 25#endif
52 26
53#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) 27DEFINE_PER_CPU(int, cpu_number);
54/* 28EXPORT_PER_CPU_SYMBOL(cpu_number);
55 * Copy data used in early init routines from the initial arrays to the
56 * per cpu data areas. These arrays then become expendable and the
57 * *_early_ptr's are zeroed indicating that the static arrays are gone.
58 */
59static void __init setup_per_cpu_maps(void)
60{
61 int cpu;
62 29
63 for_each_possible_cpu(cpu) { 30#ifdef CONFIG_X86_64
64 per_cpu(x86_cpu_to_apicid, cpu) = 31#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load)
65 early_per_cpu_map(x86_cpu_to_apicid, cpu); 32#else
66 per_cpu(x86_bios_cpu_apicid, cpu) = 33#define BOOT_PERCPU_OFFSET 0
67 early_per_cpu_map(x86_bios_cpu_apicid, cpu);
68#ifdef X86_64_NUMA
69 per_cpu(x86_cpu_to_node_map, cpu) =
70 early_per_cpu_map(x86_cpu_to_node_map, cpu);
71#endif 34#endif
72 }
73 35
74 /* indicate the early static arrays will soon be gone */ 36DEFINE_PER_CPU(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
75 early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; 37EXPORT_PER_CPU_SYMBOL(this_cpu_off);
76 early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
77#ifdef X86_64_NUMA
78 early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
79#endif
80}
81 38
82#ifdef CONFIG_X86_32 39unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
83/* 40 [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET,
84 * Great future not-so-futuristic plan: make i386 and x86_64 do it 41};
85 * the same way
86 */
87unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
88EXPORT_SYMBOL(__per_cpu_offset); 42EXPORT_SYMBOL(__per_cpu_offset);
89static inline void setup_cpu_pda_map(void) { }
90
91#elif !defined(CONFIG_SMP)
92static inline void setup_cpu_pda_map(void) { }
93
94#else /* CONFIG_SMP && CONFIG_X86_64 */
95
96/*
97 * Allocate cpu_pda pointer table and array via alloc_bootmem.
98 */
99static void __init setup_cpu_pda_map(void)
100{
101 char *pda;
102 struct x8664_pda **new_cpu_pda;
103 unsigned long size;
104 int cpu;
105
106 size = roundup(sizeof(struct x8664_pda), cache_line_size());
107
108 /* allocate cpu_pda array and pointer table */
109 {
110 unsigned long tsize = nr_cpu_ids * sizeof(void *);
111 unsigned long asize = size * (nr_cpu_ids - 1);
112 43
113 tsize = roundup(tsize, cache_line_size()); 44static inline void setup_percpu_segment(int cpu)
114 new_cpu_pda = alloc_bootmem(tsize + asize);
115 pda = (char *)new_cpu_pda + tsize;
116 }
117
118 /* initialize pointer table to static pda's */
119 for_each_possible_cpu(cpu) {
120 if (cpu == 0) {
121 /* leave boot cpu pda in place */
122 new_cpu_pda[0] = cpu_pda(0);
123 continue;
124 }
125 new_cpu_pda[cpu] = (struct x8664_pda *)pda;
126 new_cpu_pda[cpu]->in_bootmem = 1;
127 pda += size;
128 }
129
130 /* point to new pointer table */
131 _cpu_pda = new_cpu_pda;
132}
133
134#endif /* CONFIG_SMP && CONFIG_X86_64 */
135
136#ifdef CONFIG_X86_64
137
138/* correctly size the local cpu masks */
139static void __init setup_cpu_local_masks(void)
140{ 45{
141 alloc_bootmem_cpumask_var(&cpu_initialized_mask); 46#ifdef CONFIG_X86_32
142 alloc_bootmem_cpumask_var(&cpu_callin_mask); 47 struct desc_struct gdt;
143 alloc_bootmem_cpumask_var(&cpu_callout_mask);
144 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
145}
146
147#else /* CONFIG_X86_32 */
148 48
149static inline void setup_cpu_local_masks(void) 49 pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF,
150{ 50 0x2 | DESCTYPE_S, 0x8);
51 gdt.s = 1;
52 write_gdt_entry(get_cpu_gdt_table(cpu),
53 GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
54#endif
151} 55}
152 56
153#endif /* CONFIG_X86_32 */
154
155/* 57/*
156 * Great future plan: 58 * Great future plan:
157 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. 59 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
@@ -159,18 +61,12 @@ static inline void setup_cpu_local_masks(void)
159 */ 61 */
160void __init setup_per_cpu_areas(void) 62void __init setup_per_cpu_areas(void)
161{ 63{
162 ssize_t size, old_size; 64 ssize_t size;
163 char *ptr; 65 char *ptr;
164 int cpu; 66 int cpu;
165 unsigned long align = 1;
166
167 /* Setup cpu_pda map */
168 setup_cpu_pda_map();
169 67
170 /* Copy section for each CPU (we discard the original) */ 68 /* Copy section for each CPU (we discard the original) */
171 old_size = PERCPU_ENOUGH_ROOM; 69 size = roundup(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
172 align = max_t(unsigned long, PAGE_SIZE, align);
173 size = roundup(old_size, align);
174 70
175 pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", 71 pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
176 NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); 72 NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
@@ -179,30 +75,68 @@ void __init setup_per_cpu_areas(void)
179 75
180 for_each_possible_cpu(cpu) { 76 for_each_possible_cpu(cpu) {
181#ifndef CONFIG_NEED_MULTIPLE_NODES 77#ifndef CONFIG_NEED_MULTIPLE_NODES
182 ptr = __alloc_bootmem(size, align, 78 ptr = alloc_bootmem_pages(size);
183 __pa(MAX_DMA_ADDRESS));
184#else 79#else
185 int node = early_cpu_to_node(cpu); 80 int node = early_cpu_to_node(cpu);
186 if (!node_online(node) || !NODE_DATA(node)) { 81 if (!node_online(node) || !NODE_DATA(node)) {
187 ptr = __alloc_bootmem(size, align, 82 ptr = alloc_bootmem_pages(size);
188 __pa(MAX_DMA_ADDRESS));
189 pr_info("cpu %d has no node %d or node-local memory\n", 83 pr_info("cpu %d has no node %d or node-local memory\n",
190 cpu, node); 84 cpu, node);
191 pr_debug("per cpu data for cpu%d at %016lx\n", 85 pr_debug("per cpu data for cpu%d at %016lx\n",
192 cpu, __pa(ptr)); 86 cpu, __pa(ptr));
193 } else { 87 } else {
194 ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, 88 ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
195 __pa(MAX_DMA_ADDRESS));
196 pr_debug("per cpu data for cpu%d on node%d at %016lx\n", 89 pr_debug("per cpu data for cpu%d on node%d at %016lx\n",
197 cpu, node, __pa(ptr)); 90 cpu, node, __pa(ptr));
198 } 91 }
199#endif 92#endif
93
94 memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);
200 per_cpu_offset(cpu) = ptr - __per_cpu_start; 95 per_cpu_offset(cpu) = ptr - __per_cpu_start;
201 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 96 per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
97 per_cpu(cpu_number, cpu) = cpu;
98 setup_percpu_segment(cpu);
99 setup_stack_canary_segment(cpu);
100 /*
101 * Copy data used in early init routines from the
102 * initial arrays to the per cpu data areas. These
103 * arrays then become expendable and the *_early_ptr's
104 * are zeroed indicating that the static arrays are
105 * gone.
106 */
107#ifdef CONFIG_X86_LOCAL_APIC
108 per_cpu(x86_cpu_to_apicid, cpu) =
109 early_per_cpu_map(x86_cpu_to_apicid, cpu);
110 per_cpu(x86_bios_cpu_apicid, cpu) =
111 early_per_cpu_map(x86_bios_cpu_apicid, cpu);
112#endif
113#ifdef CONFIG_X86_64
114 per_cpu(irq_stack_ptr, cpu) =
115 per_cpu(irq_stack_union.irq_stack, cpu) +
116 IRQ_STACK_SIZE - 64;
117#ifdef CONFIG_NUMA
118 per_cpu(x86_cpu_to_node_map, cpu) =
119 early_per_cpu_map(x86_cpu_to_node_map, cpu);
120#endif
121#endif
122 /*
123 * Up to this point, the boot CPU has been using .data.init
124 * area. Reload any changed state for the boot CPU.
125 */
126 if (cpu == boot_cpu_id)
127 switch_to_new_gdt(cpu);
128
129 DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
202 } 130 }
203 131
204 /* Setup percpu data maps */ 132 /* indicate the early static arrays will soon be gone */
205 setup_per_cpu_maps(); 133#ifdef CONFIG_X86_LOCAL_APIC
134 early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
135 early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
136#endif
137#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
138 early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
139#endif
206 140
207 /* Setup node to cpumask map */ 141 /* Setup node to cpumask map */
208 setup_node_to_cpumask_map(); 142 setup_node_to_cpumask_map();
@@ -210,199 +144,3 @@ void __init setup_per_cpu_areas(void)
210 /* Setup cpu initialized, callin, callout masks */ 144 /* Setup cpu initialized, callin, callout masks */
211 setup_cpu_local_masks(); 145 setup_cpu_local_masks();
212} 146}
213
214#endif
215
216#ifdef X86_64_NUMA
217
218/*
219 * Allocate node_to_cpumask_map based on number of available nodes
220 * Requires node_possible_map to be valid.
221 *
222 * Note: node_to_cpumask() is not valid until after this is done.
223 */
224static void __init setup_node_to_cpumask_map(void)
225{
226 unsigned int node, num = 0;
227 cpumask_t *map;
228
229 /* setup nr_node_ids if not done yet */
230 if (nr_node_ids == MAX_NUMNODES) {
231 for_each_node_mask(node, node_possible_map)
232 num = node;
233 nr_node_ids = num + 1;
234 }
235
236 /* allocate the map */
237 map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
238
239 pr_debug("Node to cpumask map at %p for %d nodes\n",
240 map, nr_node_ids);
241
242 /* node_to_cpumask() will now work */
243 node_to_cpumask_map = map;
244}
245
246void __cpuinit numa_set_node(int cpu, int node)
247{
248 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
249
250 if (cpu_pda(cpu) && node != NUMA_NO_NODE)
251 cpu_pda(cpu)->nodenumber = node;
252
253 if (cpu_to_node_map)
254 cpu_to_node_map[cpu] = node;
255
256 else if (per_cpu_offset(cpu))
257 per_cpu(x86_cpu_to_node_map, cpu) = node;
258
259 else
260 pr_debug("Setting node for non-present cpu %d\n", cpu);
261}
262
263void __cpuinit numa_clear_node(int cpu)
264{
265 numa_set_node(cpu, NUMA_NO_NODE);
266}
267
268#ifndef CONFIG_DEBUG_PER_CPU_MAPS
269
270void __cpuinit numa_add_cpu(int cpu)
271{
272 cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
273}
274
275void __cpuinit numa_remove_cpu(int cpu)
276{
277 cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]);
278}
279
280#else /* CONFIG_DEBUG_PER_CPU_MAPS */
281
282/*
283 * --------- debug versions of the numa functions ---------
284 */
285static void __cpuinit numa_set_cpumask(int cpu, int enable)
286{
287 int node = cpu_to_node(cpu);
288 cpumask_t *mask;
289 char buf[64];
290
291 if (node_to_cpumask_map == NULL) {
292 printk(KERN_ERR "node_to_cpumask_map NULL\n");
293 dump_stack();
294 return;
295 }
296
297 mask = &node_to_cpumask_map[node];
298 if (enable)
299 cpu_set(cpu, *mask);
300 else
301 cpu_clear(cpu, *mask);
302
303 cpulist_scnprintf(buf, sizeof(buf), mask);
304 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
305 enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);
306}
307
308void __cpuinit numa_add_cpu(int cpu)
309{
310 numa_set_cpumask(cpu, 1);
311}
312
313void __cpuinit numa_remove_cpu(int cpu)
314{
315 numa_set_cpumask(cpu, 0);
316}
317
318int cpu_to_node(int cpu)
319{
320 if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
321 printk(KERN_WARNING
322 "cpu_to_node(%d): usage too early!\n", cpu);
323 dump_stack();
324 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
325 }
326 return per_cpu(x86_cpu_to_node_map, cpu);
327}
328EXPORT_SYMBOL(cpu_to_node);
329
330/*
331 * Same function as cpu_to_node() but used if called before the
332 * per_cpu areas are setup.
333 */
334int early_cpu_to_node(int cpu)
335{
336 if (early_per_cpu_ptr(x86_cpu_to_node_map))
337 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
338
339 if (!per_cpu_offset(cpu)) {
340 printk(KERN_WARNING
341 "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
342 dump_stack();
343 return NUMA_NO_NODE;
344 }
345 return per_cpu(x86_cpu_to_node_map, cpu);
346}
347
348
349/* empty cpumask */
350static const cpumask_t cpu_mask_none;
351
352/*
353 * Returns a pointer to the bitmask of CPUs on Node 'node'.
354 */
355const cpumask_t *cpumask_of_node(int node)
356{
357 if (node_to_cpumask_map == NULL) {
358 printk(KERN_WARNING
359 "cpumask_of_node(%d): no node_to_cpumask_map!\n",
360 node);
361 dump_stack();
362 return (const cpumask_t *)&cpu_online_map;
363 }
364 if (node >= nr_node_ids) {
365 printk(KERN_WARNING
366 "cpumask_of_node(%d): node > nr_node_ids(%d)\n",
367 node, nr_node_ids);
368 dump_stack();
369 return &cpu_mask_none;
370 }
371 return &node_to_cpumask_map[node];
372}
373EXPORT_SYMBOL(cpumask_of_node);
374
375/*
376 * Returns a bitmask of CPUs on Node 'node'.
377 *
378 * Side note: this function creates the returned cpumask on the stack
379 * so with a high NR_CPUS count, excessive stack space is used. The
380 * node_to_cpumask_ptr function should be used whenever possible.
381 */
382cpumask_t node_to_cpumask(int node)
383{
384 if (node_to_cpumask_map == NULL) {
385 printk(KERN_WARNING
386 "node_to_cpumask(%d): no node_to_cpumask_map!\n", node);
387 dump_stack();
388 return cpu_online_map;
389 }
390 if (node >= nr_node_ids) {
391 printk(KERN_WARNING
392 "node_to_cpumask(%d): node > nr_node_ids(%d)\n",
393 node, nr_node_ids);
394 dump_stack();
395 return cpu_mask_none;
396 }
397 return node_to_cpumask_map[node];
398}
399EXPORT_SYMBOL(node_to_cpumask);
400
401/*
402 * --------- end of debug versions of the numa functions ---------
403 */
404
405#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
406
407#endif /* X86_64_NUMA */
408
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index df0587f24c54..7cdcd16885ed 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -50,27 +50,23 @@
50# define FIX_EFLAGS __FIX_EFLAGS 50# define FIX_EFLAGS __FIX_EFLAGS
51#endif 51#endif
52 52
53#define COPY(x) { \ 53#define COPY(x) do { \
54 err |= __get_user(regs->x, &sc->x); \ 54 get_user_ex(regs->x, &sc->x); \
55} 55} while (0)
56 56
57#define COPY_SEG(seg) { \ 57#define GET_SEG(seg) ({ \
58 unsigned short tmp; \ 58 unsigned short tmp; \
59 err |= __get_user(tmp, &sc->seg); \ 59 get_user_ex(tmp, &sc->seg); \
60 regs->seg = tmp; \ 60 tmp; \
61} 61})
62 62
63#define COPY_SEG_CPL3(seg) { \ 63#define COPY_SEG(seg) do { \
64 unsigned short tmp; \ 64 regs->seg = GET_SEG(seg); \
65 err |= __get_user(tmp, &sc->seg); \ 65} while (0)
66 regs->seg = tmp | 3; \
67}
68 66
69#define GET_SEG(seg) { \ 67#define COPY_SEG_CPL3(seg) do { \
70 unsigned short tmp; \ 68 regs->seg = GET_SEG(seg) | 3; \
71 err |= __get_user(tmp, &sc->seg); \ 69} while (0)
72 loadsegment(seg, tmp); \
73}
74 70
75static int 71static int
76restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, 72restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
@@ -83,45 +79,49 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
83 /* Always make any pending restarted system calls return -EINTR */ 79 /* Always make any pending restarted system calls return -EINTR */
84 current_thread_info()->restart_block.fn = do_no_restart_syscall; 80 current_thread_info()->restart_block.fn = do_no_restart_syscall;
85 81
82 get_user_try {
83
86#ifdef CONFIG_X86_32 84#ifdef CONFIG_X86_32
87 GET_SEG(gs); 85 set_user_gs(regs, GET_SEG(gs));
88 COPY_SEG(fs); 86 COPY_SEG(fs);
89 COPY_SEG(es); 87 COPY_SEG(es);
90 COPY_SEG(ds); 88 COPY_SEG(ds);
91#endif /* CONFIG_X86_32 */ 89#endif /* CONFIG_X86_32 */
92 90
93 COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx); 91 COPY(di); COPY(si); COPY(bp); COPY(sp); COPY(bx);
94 COPY(dx); COPY(cx); COPY(ip); 92 COPY(dx); COPY(cx); COPY(ip);
95 93
96#ifdef CONFIG_X86_64 94#ifdef CONFIG_X86_64
97 COPY(r8); 95 COPY(r8);
98 COPY(r9); 96 COPY(r9);
99 COPY(r10); 97 COPY(r10);
100 COPY(r11); 98 COPY(r11);
101 COPY(r12); 99 COPY(r12);
102 COPY(r13); 100 COPY(r13);
103 COPY(r14); 101 COPY(r14);
104 COPY(r15); 102 COPY(r15);
105#endif /* CONFIG_X86_64 */ 103#endif /* CONFIG_X86_64 */
106 104
107#ifdef CONFIG_X86_32 105#ifdef CONFIG_X86_32
108 COPY_SEG_CPL3(cs); 106 COPY_SEG_CPL3(cs);
109 COPY_SEG_CPL3(ss); 107 COPY_SEG_CPL3(ss);
110#else /* !CONFIG_X86_32 */ 108#else /* !CONFIG_X86_32 */
111 /* Kernel saves and restores only the CS segment register on signals, 109 /* Kernel saves and restores only the CS segment register on signals,
112 * which is the bare minimum needed to allow mixed 32/64-bit code. 110 * which is the bare minimum needed to allow mixed 32/64-bit code.
113 * App's signal handler can save/restore other segments if needed. */ 111 * App's signal handler can save/restore other segments if needed. */
114 COPY_SEG_CPL3(cs); 112 COPY_SEG_CPL3(cs);
115#endif /* CONFIG_X86_32 */ 113#endif /* CONFIG_X86_32 */
116 114
117 err |= __get_user(tmpflags, &sc->flags); 115 get_user_ex(tmpflags, &sc->flags);
118 regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); 116 regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS);
119 regs->orig_ax = -1; /* disable syscall checks */ 117 regs->orig_ax = -1; /* disable syscall checks */
118
119 get_user_ex(buf, &sc->fpstate);
120 err |= restore_i387_xstate(buf);
120 121
121 err |= __get_user(buf, &sc->fpstate); 122 get_user_ex(*pax, &sc->ax);
122 err |= restore_i387_xstate(buf); 123 } get_user_catch(err);
123 124
124 err |= __get_user(*pax, &sc->ax);
125 return err; 125 return err;
126} 126}
127 127
@@ -131,57 +131,55 @@ setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
131{ 131{
132 int err = 0; 132 int err = 0;
133 133
134#ifdef CONFIG_X86_32 134 put_user_try {
135 {
136 unsigned int tmp;
137 135
138 savesegment(gs, tmp); 136#ifdef CONFIG_X86_32
139 err |= __put_user(tmp, (unsigned int __user *)&sc->gs); 137 put_user_ex(get_user_gs(regs), (unsigned int __user *)&sc->gs);
140 } 138 put_user_ex(regs->fs, (unsigned int __user *)&sc->fs);
141 err |= __put_user(regs->fs, (unsigned int __user *)&sc->fs); 139 put_user_ex(regs->es, (unsigned int __user *)&sc->es);
142 err |= __put_user(regs->es, (unsigned int __user *)&sc->es); 140 put_user_ex(regs->ds, (unsigned int __user *)&sc->ds);
143 err |= __put_user(regs->ds, (unsigned int __user *)&sc->ds);
144#endif /* CONFIG_X86_32 */ 141#endif /* CONFIG_X86_32 */
145 142
146 err |= __put_user(regs->di, &sc->di); 143 put_user_ex(regs->di, &sc->di);
147 err |= __put_user(regs->si, &sc->si); 144 put_user_ex(regs->si, &sc->si);
148 err |= __put_user(regs->bp, &sc->bp); 145 put_user_ex(regs->bp, &sc->bp);
149 err |= __put_user(regs->sp, &sc->sp); 146 put_user_ex(regs->sp, &sc->sp);
150 err |= __put_user(regs->bx, &sc->bx); 147 put_user_ex(regs->bx, &sc->bx);
151 err |= __put_user(regs->dx, &sc->dx); 148 put_user_ex(regs->dx, &sc->dx);
152 err |= __put_user(regs->cx, &sc->cx); 149 put_user_ex(regs->cx, &sc->cx);
153 err |= __put_user(regs->ax, &sc->ax); 150 put_user_ex(regs->ax, &sc->ax);
154#ifdef CONFIG_X86_64 151#ifdef CONFIG_X86_64
155 err |= __put_user(regs->r8, &sc->r8); 152 put_user_ex(regs->r8, &sc->r8);
156 err |= __put_user(regs->r9, &sc->r9); 153 put_user_ex(regs->r9, &sc->r9);
157 err |= __put_user(regs->r10, &sc->r10); 154 put_user_ex(regs->r10, &sc->r10);
158 err |= __put_user(regs->r11, &sc->r11); 155 put_user_ex(regs->r11, &sc->r11);
159 err |= __put_user(regs->r12, &sc->r12); 156 put_user_ex(regs->r12, &sc->r12);
160 err |= __put_user(regs->r13, &sc->r13); 157 put_user_ex(regs->r13, &sc->r13);
161 err |= __put_user(regs->r14, &sc->r14); 158 put_user_ex(regs->r14, &sc->r14);
162 err |= __put_user(regs->r15, &sc->r15); 159 put_user_ex(regs->r15, &sc->r15);
163#endif /* CONFIG_X86_64 */ 160#endif /* CONFIG_X86_64 */
164 161
165 err |= __put_user(current->thread.trap_no, &sc->trapno); 162 put_user_ex(current->thread.trap_no, &sc->trapno);
166 err |= __put_user(current->thread.error_code, &sc->err); 163 put_user_ex(current->thread.error_code, &sc->err);
167 err |= __put_user(regs->ip, &sc->ip); 164 put_user_ex(regs->ip, &sc->ip);
168#ifdef CONFIG_X86_32 165#ifdef CONFIG_X86_32
169 err |= __put_user(regs->cs, (unsigned int __user *)&sc->cs); 166 put_user_ex(regs->cs, (unsigned int __user *)&sc->cs);
170 err |= __put_user(regs->flags, &sc->flags); 167 put_user_ex(regs->flags, &sc->flags);
171 err |= __put_user(regs->sp, &sc->sp_at_signal); 168 put_user_ex(regs->sp, &sc->sp_at_signal);
172 err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); 169 put_user_ex(regs->ss, (unsigned int __user *)&sc->ss);
173#else /* !CONFIG_X86_32 */ 170#else /* !CONFIG_X86_32 */
174 err |= __put_user(regs->flags, &sc->flags); 171 put_user_ex(regs->flags, &sc->flags);
175 err |= __put_user(regs->cs, &sc->cs); 172 put_user_ex(regs->cs, &sc->cs);
176 err |= __put_user(0, &sc->gs); 173 put_user_ex(0, &sc->gs);
177 err |= __put_user(0, &sc->fs); 174 put_user_ex(0, &sc->fs);
178#endif /* CONFIG_X86_32 */ 175#endif /* CONFIG_X86_32 */
179 176
180 err |= __put_user(fpstate, &sc->fpstate); 177 put_user_ex(fpstate, &sc->fpstate);
181 178
182 /* non-iBCS2 extensions.. */ 179 /* non-iBCS2 extensions.. */
183 err |= __put_user(mask, &sc->oldmask); 180 put_user_ex(mask, &sc->oldmask);
184 err |= __put_user(current->thread.cr2, &sc->cr2); 181 put_user_ex(current->thread.cr2, &sc->cr2);
182 } put_user_catch(err);
185 183
186 return err; 184 return err;
187} 185}
@@ -336,43 +334,41 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
336 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 334 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
337 return -EFAULT; 335 return -EFAULT;
338 336
339 err |= __put_user(sig, &frame->sig); 337 put_user_try {
340 err |= __put_user(&frame->info, &frame->pinfo); 338 put_user_ex(sig, &frame->sig);
341 err |= __put_user(&frame->uc, &frame->puc); 339 put_user_ex(&frame->info, &frame->pinfo);
342 err |= copy_siginfo_to_user(&frame->info, info); 340 put_user_ex(&frame->uc, &frame->puc);
343 if (err) 341 err |= copy_siginfo_to_user(&frame->info, info);
344 return -EFAULT;
345
346 /* Create the ucontext. */
347 if (cpu_has_xsave)
348 err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
349 else
350 err |= __put_user(0, &frame->uc.uc_flags);
351 err |= __put_user(0, &frame->uc.uc_link);
352 err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
353 err |= __put_user(sas_ss_flags(regs->sp),
354 &frame->uc.uc_stack.ss_flags);
355 err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
356 err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
357 regs, set->sig[0]);
358 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
359 if (err)
360 return -EFAULT;
361 342
362 /* Set up to return from userspace. */ 343 /* Create the ucontext. */
363 restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn); 344 if (cpu_has_xsave)
364 if (ka->sa.sa_flags & SA_RESTORER) 345 put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
365 restorer = ka->sa.sa_restorer; 346 else
366 err |= __put_user(restorer, &frame->pretcode); 347 put_user_ex(0, &frame->uc.uc_flags);
348 put_user_ex(0, &frame->uc.uc_link);
349 put_user_ex(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
350 put_user_ex(sas_ss_flags(regs->sp),
351 &frame->uc.uc_stack.ss_flags);
352 put_user_ex(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
353 err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
354 regs, set->sig[0]);
355 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
356
357 /* Set up to return from userspace. */
358 restorer = VDSO32_SYMBOL(current->mm->context.vdso, rt_sigreturn);
359 if (ka->sa.sa_flags & SA_RESTORER)
360 restorer = ka->sa.sa_restorer;
361 put_user_ex(restorer, &frame->pretcode);
367 362
368 /* 363 /*
369 * This is movl $__NR_rt_sigreturn, %ax ; int $0x80 364 * This is movl $__NR_rt_sigreturn, %ax ; int $0x80
370 * 365 *
371 * WE DO NOT USE IT ANY MORE! It's only left here for historical 366 * WE DO NOT USE IT ANY MORE! It's only left here for historical
372 * reasons and because gdb uses it as a signature to notice 367 * reasons and because gdb uses it as a signature to notice
373 * signal handler stack frames. 368 * signal handler stack frames.
374 */ 369 */
375 err |= __put_user(*((u64 *)&rt_retcode), (u64 *)frame->retcode); 370 put_user_ex(*((u64 *)&rt_retcode), (u64 *)frame->retcode);
371 } put_user_catch(err);
376 372
377 if (err) 373 if (err)
378 return -EFAULT; 374 return -EFAULT;
@@ -436,28 +432,30 @@ static int __setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
436 return -EFAULT; 432 return -EFAULT;
437 } 433 }
438 434
439 /* Create the ucontext. */ 435 put_user_try {
440 if (cpu_has_xsave) 436 /* Create the ucontext. */
441 err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags); 437 if (cpu_has_xsave)
442 else 438 put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
443 err |= __put_user(0, &frame->uc.uc_flags); 439 else
444 err |= __put_user(0, &frame->uc.uc_link); 440 put_user_ex(0, &frame->uc.uc_flags);
445 err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); 441 put_user_ex(0, &frame->uc.uc_link);
446 err |= __put_user(sas_ss_flags(regs->sp), 442 put_user_ex(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
447 &frame->uc.uc_stack.ss_flags); 443 put_user_ex(sas_ss_flags(regs->sp),
448 err |= __put_user(me->sas_ss_size, &frame->uc.uc_stack.ss_size); 444 &frame->uc.uc_stack.ss_flags);
449 err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]); 445 put_user_ex(me->sas_ss_size, &frame->uc.uc_stack.ss_size);
450 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); 446 err |= setup_sigcontext(&frame->uc.uc_mcontext, fp, regs, set->sig[0]);
451 447 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
452 /* Set up to return from userspace. If provided, use a stub 448
453 already in userspace. */ 449 /* Set up to return from userspace. If provided, use a stub
454 /* x86-64 should always use SA_RESTORER. */ 450 already in userspace. */
455 if (ka->sa.sa_flags & SA_RESTORER) { 451 /* x86-64 should always use SA_RESTORER. */
456 err |= __put_user(ka->sa.sa_restorer, &frame->pretcode); 452 if (ka->sa.sa_flags & SA_RESTORER) {
457 } else { 453 put_user_ex(ka->sa.sa_restorer, &frame->pretcode);
458 /* could use a vstub here */ 454 } else {
459 return -EFAULT; 455 /* could use a vstub here */
460 } 456 err |= -EFAULT;
457 }
458 } put_user_catch(err);
461 459
462 if (err) 460 if (err)
463 return -EFAULT; 461 return -EFAULT;
@@ -509,31 +507,41 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
509 struct old_sigaction __user *oact) 507 struct old_sigaction __user *oact)
510{ 508{
511 struct k_sigaction new_ka, old_ka; 509 struct k_sigaction new_ka, old_ka;
512 int ret; 510 int ret = 0;
513 511
514 if (act) { 512 if (act) {
515 old_sigset_t mask; 513 old_sigset_t mask;
516 514
517 if (!access_ok(VERIFY_READ, act, sizeof(*act)) || 515 if (!access_ok(VERIFY_READ, act, sizeof(*act)))
518 __get_user(new_ka.sa.sa_handler, &act->sa_handler) ||
519 __get_user(new_ka.sa.sa_restorer, &act->sa_restorer))
520 return -EFAULT; 516 return -EFAULT;
521 517
522 __get_user(new_ka.sa.sa_flags, &act->sa_flags); 518 get_user_try {
523 __get_user(mask, &act->sa_mask); 519 get_user_ex(new_ka.sa.sa_handler, &act->sa_handler);
520 get_user_ex(new_ka.sa.sa_flags, &act->sa_flags);
521 get_user_ex(mask, &act->sa_mask);
522 get_user_ex(new_ka.sa.sa_restorer, &act->sa_restorer);
523 } get_user_catch(ret);
524
525 if (ret)
526 return -EFAULT;
524 siginitset(&new_ka.sa.sa_mask, mask); 527 siginitset(&new_ka.sa.sa_mask, mask);
525 } 528 }
526 529
527 ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL); 530 ret = do_sigaction(sig, act ? &new_ka : NULL, oact ? &old_ka : NULL);
528 531
529 if (!ret && oact) { 532 if (!ret && oact) {
530 if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)) || 533 if (!access_ok(VERIFY_WRITE, oact, sizeof(*oact)))
531 __put_user(old_ka.sa.sa_handler, &oact->sa_handler) ||
532 __put_user(old_ka.sa.sa_restorer, &oact->sa_restorer))
533 return -EFAULT; 534 return -EFAULT;
534 535
535 __put_user(old_ka.sa.sa_flags, &oact->sa_flags); 536 put_user_try {
536 __put_user(old_ka.sa.sa_mask.sig[0], &oact->sa_mask); 537 put_user_ex(old_ka.sa.sa_handler, &oact->sa_handler);
538 put_user_ex(old_ka.sa.sa_flags, &oact->sa_flags);
539 put_user_ex(old_ka.sa.sa_mask.sig[0], &oact->sa_mask);
540 put_user_ex(old_ka.sa.sa_restorer, &oact->sa_restorer);
541 } put_user_catch(ret);
542
543 if (ret)
544 return -EFAULT;
537 } 545 }
538 546
539 return ret; 547 return ret;
@@ -541,14 +549,9 @@ sys_sigaction(int sig, const struct old_sigaction __user *act,
541#endif /* CONFIG_X86_32 */ 549#endif /* CONFIG_X86_32 */
542 550
543#ifdef CONFIG_X86_32 551#ifdef CONFIG_X86_32
544asmlinkage int sys_sigaltstack(unsigned long bx) 552int sys_sigaltstack(struct pt_regs *regs)
545{ 553{
546 /* 554 const stack_t __user *uss = (const stack_t __user *)regs->bx;
547 * This is needed to make gcc realize it doesn't own the
548 * "struct pt_regs"
549 */
550 struct pt_regs *regs = (struct pt_regs *)&bx;
551 const stack_t __user *uss = (const stack_t __user *)bx;
552 stack_t __user *uoss = (stack_t __user *)regs->cx; 555 stack_t __user *uoss = (stack_t __user *)regs->cx;
553 556
554 return do_sigaltstack(uss, uoss, regs->sp); 557 return do_sigaltstack(uss, uoss, regs->sp);
@@ -566,14 +569,12 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
566 * Do a signal return; undo the signal stack. 569 * Do a signal return; undo the signal stack.
567 */ 570 */
568#ifdef CONFIG_X86_32 571#ifdef CONFIG_X86_32
569asmlinkage unsigned long sys_sigreturn(unsigned long __unused) 572unsigned long sys_sigreturn(struct pt_regs *regs)
570{ 573{
571 struct sigframe __user *frame; 574 struct sigframe __user *frame;
572 struct pt_regs *regs;
573 unsigned long ax; 575 unsigned long ax;
574 sigset_t set; 576 sigset_t set;
575 577
576 regs = (struct pt_regs *) &__unused;
577 frame = (struct sigframe __user *)(regs->sp - 8); 578 frame = (struct sigframe __user *)(regs->sp - 8);
578 579
579 if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) 580 if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
@@ -600,7 +601,7 @@ badframe:
600} 601}
601#endif /* CONFIG_X86_32 */ 602#endif /* CONFIG_X86_32 */
602 603
603static long do_rt_sigreturn(struct pt_regs *regs) 604long sys_rt_sigreturn(struct pt_regs *regs)
604{ 605{
605 struct rt_sigframe __user *frame; 606 struct rt_sigframe __user *frame;
606 unsigned long ax; 607 unsigned long ax;
@@ -631,25 +632,6 @@ badframe:
631 return 0; 632 return 0;
632} 633}
633 634
634#ifdef CONFIG_X86_32
635/*
636 * Note: do not pass in pt_regs directly as with tail-call optimization
637 * GCC will incorrectly stomp on the caller's frame and corrupt user-space
638 * register state:
639 */
640asmlinkage int sys_rt_sigreturn(unsigned long __unused)
641{
642 struct pt_regs *regs = (struct pt_regs *)&__unused;
643
644 return do_rt_sigreturn(regs);
645}
646#else /* !CONFIG_X86_32 */
647asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
648{
649 return do_rt_sigreturn(regs);
650}
651#endif /* CONFIG_X86_32 */
652
653/* 635/*
654 * OK, we're invoking a handler: 636 * OK, we're invoking a handler:
655 */ 637 */
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index e6faa3316bd2..eaaffae31cc0 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -2,7 +2,7 @@
2 * Intel SMP support routines. 2 * Intel SMP support routines.
3 * 3 *
4 * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> 4 * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
5 * (c) 1998-99, 2000 Ingo Molnar <mingo@redhat.com> 5 * (c) 1998-99, 2000, 2009 Ingo Molnar <mingo@redhat.com>
6 * (c) 2002,2003 Andi Kleen, SuSE Labs. 6 * (c) 2002,2003 Andi Kleen, SuSE Labs.
7 * 7 *
8 * i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com> 8 * i386 and x86_64 integration by Glauber Costa <gcosta@redhat.com>
@@ -26,8 +26,7 @@
26#include <asm/tlbflush.h> 26#include <asm/tlbflush.h>
27#include <asm/mmu_context.h> 27#include <asm/mmu_context.h>
28#include <asm/proto.h> 28#include <asm/proto.h>
29#include <mach_ipi.h> 29#include <asm/genapic.h>
30#include <mach_apic.h>
31/* 30/*
32 * Some notes on x86 processor bugs affecting SMP operation: 31 * Some notes on x86 processor bugs affecting SMP operation:
33 * 32 *
@@ -118,12 +117,12 @@ static void native_smp_send_reschedule(int cpu)
118 WARN_ON(1); 117 WARN_ON(1);
119 return; 118 return;
120 } 119 }
121 send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); 120 apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
122} 121}
123 122
124void native_send_call_func_single_ipi(int cpu) 123void native_send_call_func_single_ipi(int cpu)
125{ 124{
126 send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR); 125 apic->send_IPI_mask(cpumask_of(cpu), CALL_FUNCTION_SINGLE_VECTOR);
127} 126}
128 127
129void native_send_call_func_ipi(const struct cpumask *mask) 128void native_send_call_func_ipi(const struct cpumask *mask)
@@ -131,7 +130,7 @@ void native_send_call_func_ipi(const struct cpumask *mask)
131 cpumask_var_t allbutself; 130 cpumask_var_t allbutself;
132 131
133 if (!alloc_cpumask_var(&allbutself, GFP_ATOMIC)) { 132 if (!alloc_cpumask_var(&allbutself, GFP_ATOMIC)) {
134 send_IPI_mask(mask, CALL_FUNCTION_VECTOR); 133 apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
135 return; 134 return;
136 } 135 }
137 136
@@ -140,9 +139,9 @@ void native_send_call_func_ipi(const struct cpumask *mask)
140 139
141 if (cpumask_equal(mask, allbutself) && 140 if (cpumask_equal(mask, allbutself) &&
142 cpumask_equal(cpu_online_mask, cpu_callout_mask)) 141 cpumask_equal(cpu_online_mask, cpu_callout_mask))
143 send_IPI_allbutself(CALL_FUNCTION_VECTOR); 142 apic->send_IPI_allbutself(CALL_FUNCTION_VECTOR);
144 else 143 else
145 send_IPI_mask(mask, CALL_FUNCTION_VECTOR); 144 apic->send_IPI_mask(mask, CALL_FUNCTION_VECTOR);
146 145
147 free_cpumask_var(allbutself); 146 free_cpumask_var(allbutself);
148} 147}
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index bb1a3b1fc87f..af57f88186e7 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -2,7 +2,7 @@
2 * x86 SMP booting functions 2 * x86 SMP booting functions
3 * 3 *
4 * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk> 4 * (c) 1995 Alan Cox, Building #3 <alan@lxorguk.ukuu.org.uk>
5 * (c) 1998, 1999, 2000 Ingo Molnar <mingo@redhat.com> 5 * (c) 1998, 1999, 2000, 2009 Ingo Molnar <mingo@redhat.com>
6 * Copyright 2001 Andi Kleen, SuSE Labs. 6 * Copyright 2001 Andi Kleen, SuSE Labs.
7 * 7 *
8 * Much of the core SMP work is based on previous work by Thomas Radke, to 8 * Much of the core SMP work is based on previous work by Thomas Radke, to
@@ -53,7 +53,6 @@
53#include <asm/nmi.h> 53#include <asm/nmi.h>
54#include <asm/irq.h> 54#include <asm/irq.h>
55#include <asm/idle.h> 55#include <asm/idle.h>
56#include <asm/smp.h>
57#include <asm/trampoline.h> 56#include <asm/trampoline.h>
58#include <asm/cpu.h> 57#include <asm/cpu.h>
59#include <asm/numa.h> 58#include <asm/numa.h>
@@ -63,11 +62,11 @@
63#include <asm/vmi.h> 62#include <asm/vmi.h>
64#include <asm/genapic.h> 63#include <asm/genapic.h>
65#include <asm/setup.h> 64#include <asm/setup.h>
65#include <asm/uv/uv.h>
66#include <linux/mc146818rtc.h> 66#include <linux/mc146818rtc.h>
67 67
68#include <mach_apic.h> 68#include <asm/genapic.h>
69#include <mach_wakecpu.h> 69#include <asm/smpboot_hooks.h>
70#include <smpboot_hooks.h>
71 70
72#ifdef CONFIG_X86_32 71#ifdef CONFIG_X86_32
73u8 apicid_2_node[MAX_APICID]; 72u8 apicid_2_node[MAX_APICID];
@@ -163,7 +162,7 @@ static void map_cpu_to_logical_apicid(void)
163{ 162{
164 int cpu = smp_processor_id(); 163 int cpu = smp_processor_id();
165 int apicid = logical_smp_processor_id(); 164 int apicid = logical_smp_processor_id();
166 int node = apicid_to_node(apicid); 165 int node = apic->apicid_to_node(apicid);
167 166
168 if (!node_online(node)) 167 if (!node_online(node))
169 node = first_online_node; 168 node = first_online_node;
@@ -196,7 +195,8 @@ static void __cpuinit smp_callin(void)
196 * our local APIC. We have to wait for the IPI or we'll 195 * our local APIC. We have to wait for the IPI or we'll
197 * lock up on an APIC access. 196 * lock up on an APIC access.
198 */ 197 */
199 wait_for_init_deassert(&init_deasserted); 198 if (apic->wait_for_init_deassert)
199 apic->wait_for_init_deassert(&init_deasserted);
200 200
201 /* 201 /*
202 * (This works even if the APIC is not enabled.) 202 * (This works even if the APIC is not enabled.)
@@ -243,7 +243,8 @@ static void __cpuinit smp_callin(void)
243 */ 243 */
244 244
245 pr_debug("CALLIN, before setup_local_APIC().\n"); 245 pr_debug("CALLIN, before setup_local_APIC().\n");
246 smp_callin_clear_local_apic(); 246 if (apic->smp_callin_clear_local_apic)
247 apic->smp_callin_clear_local_apic();
247 setup_local_APIC(); 248 setup_local_APIC();
248 end_local_APIC_setup(); 249 end_local_APIC_setup();
249 map_cpu_to_logical_apicid(); 250 map_cpu_to_logical_apicid();
@@ -583,7 +584,7 @@ wakeup_secondary_cpu_via_nmi(int logical_apicid, unsigned long start_eip)
583 /* Target chip */ 584 /* Target chip */
584 /* Boot on the stack */ 585 /* Boot on the stack */
585 /* Kick the second */ 586 /* Kick the second */
586 apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid); 587 apic_icr_write(APIC_DM_NMI | apic->dest_logical, logical_apicid);
587 588
588 pr_debug("Waiting for send to finish...\n"); 589 pr_debug("Waiting for send to finish...\n");
589 send_status = safe_apic_wait_icr_idle(); 590 send_status = safe_apic_wait_icr_idle();
@@ -745,57 +746,11 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
745 complete(&c_idle->done); 746 complete(&c_idle->done);
746} 747}
747 748
748#ifdef CONFIG_X86_64
749
750/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */
751static void __ref free_bootmem_pda(struct x8664_pda *oldpda)
752{
753 if (!after_bootmem)
754 free_bootmem((unsigned long)oldpda, sizeof(*oldpda));
755}
756
757/*
758 * Allocate node local memory for the AP pda.
759 *
760 * Must be called after the _cpu_pda pointer table is initialized.
761 */
762int __cpuinit get_local_pda(int cpu)
763{
764 struct x8664_pda *oldpda, *newpda;
765 unsigned long size = sizeof(struct x8664_pda);
766 int node = cpu_to_node(cpu);
767
768 if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem)
769 return 0;
770
771 oldpda = cpu_pda(cpu);
772 newpda = kmalloc_node(size, GFP_ATOMIC, node);
773 if (!newpda) {
774 printk(KERN_ERR "Could not allocate node local PDA "
775 "for CPU %d on node %d\n", cpu, node);
776
777 if (oldpda)
778 return 0; /* have a usable pda */
779 else
780 return -1;
781 }
782
783 if (oldpda) {
784 memcpy(newpda, oldpda, size);
785 free_bootmem_pda(oldpda);
786 }
787
788 newpda->in_bootmem = 0;
789 cpu_pda(cpu) = newpda;
790 return 0;
791}
792#endif /* CONFIG_X86_64 */
793
794static int __cpuinit do_boot_cpu(int apicid, int cpu) 749static int __cpuinit do_boot_cpu(int apicid, int cpu)
795/* 750/*
796 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad 751 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
797 * (ie clustered apic addressing mode), this is a LOGICAL apic ID. 752 * (ie clustered apic addressing mode), this is a LOGICAL apic ID.
798 * Returns zero if CPU booted OK, else error code from wakeup_secondary_cpu. 753 * Returns zero if CPU booted OK, else error code from ->wakeup_cpu.
799 */ 754 */
800{ 755{
801 unsigned long boot_error = 0; 756 unsigned long boot_error = 0;
@@ -808,16 +763,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
808 }; 763 };
809 INIT_WORK(&c_idle.work, do_fork_idle); 764 INIT_WORK(&c_idle.work, do_fork_idle);
810 765
811#ifdef CONFIG_X86_64
812 /* Allocate node local memory for AP pdas */
813 if (cpu > 0) {
814 boot_error = get_local_pda(cpu);
815 if (boot_error)
816 goto restore_state;
817 /* if can't get pda memory, can't start cpu */
818 }
819#endif
820
821 alternatives_smp_switch(1); 766 alternatives_smp_switch(1);
822 767
823 c_idle.idle = get_idle_for_cpu(cpu); 768 c_idle.idle = get_idle_for_cpu(cpu);
@@ -847,14 +792,16 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
847 792
848 set_idle_for_cpu(cpu, c_idle.idle); 793 set_idle_for_cpu(cpu, c_idle.idle);
849do_rest: 794do_rest:
850#ifdef CONFIG_X86_32
851 per_cpu(current_task, cpu) = c_idle.idle; 795 per_cpu(current_task, cpu) = c_idle.idle;
852 init_gdt(cpu); 796#ifdef CONFIG_X86_32
853 /* Stack for startup_32 can be just as for start_secondary onwards */ 797 /* Stack for startup_32 can be just as for start_secondary onwards */
854 irq_ctx_init(cpu); 798 irq_ctx_init(cpu);
855#else 799#else
856 cpu_pda(cpu)->pcurrent = c_idle.idle;
857 clear_tsk_thread_flag(c_idle.idle, TIF_FORK); 800 clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
801 initial_gs = per_cpu_offset(cpu);
802 per_cpu(kernel_stack, cpu) =
803 (unsigned long)task_stack_page(c_idle.idle) -
804 KERNEL_STACK_OFFSET + THREAD_SIZE;
858#endif 805#endif
859 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); 806 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
860 initial_code = (unsigned long)start_secondary; 807 initial_code = (unsigned long)start_secondary;
@@ -878,7 +825,8 @@ do_rest:
878 825
879 pr_debug("Setting warm reset code and vector.\n"); 826 pr_debug("Setting warm reset code and vector.\n");
880 827
881 store_NMI_vector(&nmi_high, &nmi_low); 828 if (apic->store_NMI_vector)
829 apic->store_NMI_vector(&nmi_high, &nmi_low);
882 830
883 smpboot_setup_warm_reset_vector(start_ip); 831 smpboot_setup_warm_reset_vector(start_ip);
884 /* 832 /*
@@ -893,7 +841,7 @@ do_rest:
893 /* 841 /*
894 * Starting actual IPI sequence... 842 * Starting actual IPI sequence...
895 */ 843 */
896 boot_error = wakeup_secondary_cpu(apicid, start_ip); 844 boot_error = apic->wakeup_cpu(apicid, start_ip);
897 845
898 if (!boot_error) { 846 if (!boot_error) {
899 /* 847 /*
@@ -927,13 +875,11 @@ do_rest:
927 else 875 else
928 /* trampoline code not run */ 876 /* trampoline code not run */
929 printk(KERN_ERR "Not responding.\n"); 877 printk(KERN_ERR "Not responding.\n");
930 if (get_uv_system_type() != UV_NON_UNIQUE_APIC) 878 if (apic->inquire_remote_apic)
931 inquire_remote_apic(apicid); 879 apic->inquire_remote_apic(apicid);
932 } 880 }
933 } 881 }
934#ifdef CONFIG_X86_64 882
935restore_state:
936#endif
937 if (boot_error) { 883 if (boot_error) {
938 /* Try to put things back the way they were before ... */ 884 /* Try to put things back the way they were before ... */
939 numa_remove_cpu(cpu); /* was set by numa_add_cpu */ 885 numa_remove_cpu(cpu); /* was set by numa_add_cpu */
@@ -961,7 +907,7 @@ restore_state:
961 907
962int __cpuinit native_cpu_up(unsigned int cpu) 908int __cpuinit native_cpu_up(unsigned int cpu)
963{ 909{
964 int apicid = cpu_present_to_apicid(cpu); 910 int apicid = apic->cpu_present_to_apicid(cpu);
965 unsigned long flags; 911 unsigned long flags;
966 int err; 912 int err;
967 913
@@ -1054,14 +1000,14 @@ static int __init smp_sanity_check(unsigned max_cpus)
1054{ 1000{
1055 preempt_disable(); 1001 preempt_disable();
1056 1002
1057#if defined(CONFIG_X86_PC) && defined(CONFIG_X86_32) 1003#if !defined(CONFIG_X86_BIGSMP) && defined(CONFIG_X86_32)
1058 if (def_to_bigsmp && nr_cpu_ids > 8) { 1004 if (def_to_bigsmp && nr_cpu_ids > 8) {
1059 unsigned int cpu; 1005 unsigned int cpu;
1060 unsigned nr; 1006 unsigned nr;
1061 1007
1062 printk(KERN_WARNING 1008 printk(KERN_WARNING
1063 "More than 8 CPUs detected - skipping them.\n" 1009 "More than 8 CPUs detected - skipping them.\n"
1064 "Use CONFIG_X86_GENERICARCH and CONFIG_X86_BIGSMP.\n"); 1010 "Use CONFIG_X86_BIGSMP.\n");
1065 1011
1066 nr = 0; 1012 nr = 0;
1067 for_each_present_cpu(cpu) { 1013 for_each_present_cpu(cpu) {
@@ -1107,7 +1053,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
1107 * Should not be necessary because the MP table should list the boot 1053 * Should not be necessary because the MP table should list the boot
1108 * CPU too, but we do it for the sake of robustness anyway. 1054 * CPU too, but we do it for the sake of robustness anyway.
1109 */ 1055 */
1110 if (!check_phys_apicid_present(boot_cpu_physical_apicid)) { 1056 if (!apic->check_phys_apicid_present(boot_cpu_physical_apicid)) {
1111 printk(KERN_NOTICE 1057 printk(KERN_NOTICE
1112 "weird, boot CPU (#%d) not listed by the BIOS.\n", 1058 "weird, boot CPU (#%d) not listed by the BIOS.\n",
1113 boot_cpu_physical_apicid); 1059 boot_cpu_physical_apicid);
@@ -1125,6 +1071,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
1125 printk(KERN_ERR "... forcing use of dummy APIC emulation." 1071 printk(KERN_ERR "... forcing use of dummy APIC emulation."
1126 "(tell your hw vendor)\n"); 1072 "(tell your hw vendor)\n");
1127 smpboot_clear_io_apic(); 1073 smpboot_clear_io_apic();
1074 arch_disable_smp_support();
1128 return -1; 1075 return -1;
1129 } 1076 }
1130 1077
@@ -1183,7 +1130,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1183 1130
1184#ifdef CONFIG_X86_64 1131#ifdef CONFIG_X86_64
1185 enable_IR_x2apic(); 1132 enable_IR_x2apic();
1186 setup_apic_routing(); 1133 default_setup_apic_routing();
1187#endif 1134#endif
1188 1135
1189 if (smp_sanity_check(max_cpus) < 0) { 1136 if (smp_sanity_check(max_cpus) < 0) {
@@ -1218,7 +1165,8 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1218 1165
1219 map_cpu_to_logical_apicid(); 1166 map_cpu_to_logical_apicid();
1220 1167
1221 setup_portio_remap(); 1168 if (apic->setup_portio_remap)
1169 apic->setup_portio_remap();
1222 1170
1223 smpboot_setup_io_apic(); 1171 smpboot_setup_io_apic();
1224 /* 1172 /*
@@ -1240,10 +1188,7 @@ out:
1240void __init native_smp_prepare_boot_cpu(void) 1188void __init native_smp_prepare_boot_cpu(void)
1241{ 1189{
1242 int me = smp_processor_id(); 1190 int me = smp_processor_id();
1243#ifdef CONFIG_X86_32 1191 switch_to_new_gdt(me);
1244 init_gdt(me);
1245#endif
1246 switch_to_new_gdt();
1247 /* already set me in cpu_online_mask in boot_cpu_init() */ 1192 /* already set me in cpu_online_mask in boot_cpu_init() */
1248 cpumask_set_cpu(me, cpu_callout_mask); 1193 cpumask_set_cpu(me, cpu_callout_mask);
1249 per_cpu(cpu_state, me) = CPU_ONLINE; 1194 per_cpu(cpu_state, me) = CPU_ONLINE;
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
deleted file mode 100644
index 397e309839dd..000000000000
--- a/arch/x86/kernel/smpcommon.c
+++ /dev/null
@@ -1,30 +0,0 @@
1/*
2 * SMP stuff which is common to all sub-architectures.
3 */
4#include <linux/module.h>
5#include <asm/smp.h>
6
7#ifdef CONFIG_X86_32
8DEFINE_PER_CPU(unsigned long, this_cpu_off);
9EXPORT_PER_CPU_SYMBOL(this_cpu_off);
10
11/*
12 * Initialize the CPU's GDT. This is either the boot CPU doing itself
13 * (still using the master per-cpu area), or a CPU doing it for a
14 * secondary which will soon come up.
15 */
16__cpuinit void init_gdt(int cpu)
17{
18 struct desc_struct gdt;
19
20 pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF,
21 0x2 | DESCTYPE_S, 0x8);
22 gdt.s = 1;
23
24 write_gdt_entry(get_cpu_gdt_table(cpu),
25 GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
26
27 per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
28 per_cpu(cpu_number, cpu) = cpu;
29}
30#endif
diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c
index 10786af95545..f7bddc2e37d1 100644
--- a/arch/x86/kernel/stacktrace.c
+++ b/arch/x86/kernel/stacktrace.c
@@ -1,7 +1,7 @@
1/* 1/*
2 * Stack trace management functions 2 * Stack trace management functions
3 * 3 *
4 * Copyright (C) 2006 Red Hat, Inc., Ingo Molnar <mingo@redhat.com> 4 * Copyright (C) 2006-2009 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
5 */ 5 */
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/stacktrace.h> 7#include <linux/stacktrace.h>
diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c
index 7b987852e876..1e733eff9b33 100644
--- a/arch/x86/kernel/summit_32.c
+++ b/arch/x86/kernel/summit_32.c
@@ -30,8 +30,364 @@
30#include <linux/init.h> 30#include <linux/init.h>
31#include <asm/io.h> 31#include <asm/io.h>
32#include <asm/bios_ebda.h> 32#include <asm/bios_ebda.h>
33#include <asm/summit/mpparse.h>
34 33
34/*
35 * APIC driver for the IBM "Summit" chipset.
36 */
37#define APIC_DEFINITION 1
38#include <linux/threads.h>
39#include <linux/cpumask.h>
40#include <asm/mpspec.h>
41#include <asm/apic.h>
42#include <asm/smp.h>
43#include <asm/genapic.h>
44#include <asm/fixmap.h>
45#include <asm/apicdef.h>
46#include <asm/ipi.h>
47#include <linux/kernel.h>
48#include <linux/string.h>
49#include <linux/init.h>
50#include <linux/gfp.h>
51#include <linux/smp.h>
52
53static inline unsigned summit_get_apic_id(unsigned long x)
54{
55 return (x >> 24) & 0xFF;
56}
57
58static inline void summit_send_IPI_mask(const cpumask_t *mask, int vector)
59{
60 default_send_IPI_mask_sequence_logical(mask, vector);
61}
62
63static inline void summit_send_IPI_allbutself(int vector)
64{
65 cpumask_t mask = cpu_online_map;
66 cpu_clear(smp_processor_id(), mask);
67
68 if (!cpus_empty(mask))
69 summit_send_IPI_mask(&mask, vector);
70}
71
72static inline void summit_send_IPI_all(int vector)
73{
74 summit_send_IPI_mask(&cpu_online_map, vector);
75}
76
77#include <asm/tsc.h>
78
79extern int use_cyclone;
80
81#ifdef CONFIG_X86_SUMMIT_NUMA
82extern void setup_summit(void);
83#else
84#define setup_summit() {}
85#endif
86
87static inline int
88summit_mps_oem_check(struct mpc_table *mpc, char *oem, char *productid)
89{
90 if (!strncmp(oem, "IBM ENSW", 8) &&
91 (!strncmp(productid, "VIGIL SMP", 9)
92 || !strncmp(productid, "EXA", 3)
93 || !strncmp(productid, "RUTHLESS SMP", 12))){
94 mark_tsc_unstable("Summit based system");
95 use_cyclone = 1; /*enable cyclone-timer*/
96 setup_summit();
97 return 1;
98 }
99 return 0;
100}
101
102/* Hook from generic ACPI tables.c */
103static inline int summit_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
104{
105 if (!strncmp(oem_id, "IBM", 3) &&
106 (!strncmp(oem_table_id, "SERVIGIL", 8)
107 || !strncmp(oem_table_id, "EXA", 3))){
108 mark_tsc_unstable("Summit based system");
109 use_cyclone = 1; /*enable cyclone-timer*/
110 setup_summit();
111 return 1;
112 }
113 return 0;
114}
115
116struct rio_table_hdr {
117 unsigned char version; /* Version number of this data structure */
118 /* Version 3 adds chassis_num & WP_index */
119 unsigned char num_scal_dev; /* # of Scalability devices (Twisters for Vigil) */
120 unsigned char num_rio_dev; /* # of RIO I/O devices (Cyclones and Winnipegs) */
121} __attribute__((packed));
122
123struct scal_detail {
124 unsigned char node_id; /* Scalability Node ID */
125 unsigned long CBAR; /* Address of 1MB register space */
126 unsigned char port0node; /* Node ID port connected to: 0xFF=None */
127 unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */
128 unsigned char port1node; /* Node ID port connected to: 0xFF = None */
129 unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */
130 unsigned char port2node; /* Node ID port connected to: 0xFF = None */
131 unsigned char port2port; /* Port num port connected to: 0,1,2, or 0xFF=None */
132 unsigned char chassis_num; /* 1 based Chassis number (1 = boot node) */
133} __attribute__((packed));
134
135struct rio_detail {
136 unsigned char node_id; /* RIO Node ID */
137 unsigned long BBAR; /* Address of 1MB register space */
138 unsigned char type; /* Type of device */
139 unsigned char owner_id; /* For WPEG: Node ID of Cyclone that owns this WPEG*/
140 /* For CYC: Node ID of Twister that owns this CYC */
141 unsigned char port0node; /* Node ID port connected to: 0xFF=None */
142 unsigned char port0port; /* Port num port connected to: 0,1,2, or 0xFF=None */
143 unsigned char port1node; /* Node ID port connected to: 0xFF=None */
144 unsigned char port1port; /* Port num port connected to: 0,1,2, or 0xFF=None */
145 unsigned char first_slot; /* For WPEG: Lowest slot number below this WPEG */
146 /* For CYC: 0 */
147 unsigned char status; /* For WPEG: Bit 0 = 1 : the XAPIC is used */
148 /* = 0 : the XAPIC is not used, ie:*/
149 /* ints fwded to another XAPIC */
150 /* Bits1:7 Reserved */
151 /* For CYC: Bits0:7 Reserved */
152 unsigned char WP_index; /* For WPEG: WPEG instance index - lower ones have */
153 /* lower slot numbers/PCI bus numbers */
154 /* For CYC: No meaning */
155 unsigned char chassis_num; /* 1 based Chassis number */
156 /* For LookOut WPEGs this field indicates the */
157 /* Expansion Chassis #, enumerated from Boot */
158 /* Node WPEG external port, then Boot Node CYC */
159 /* external port, then Next Vigil chassis WPEG */
160 /* external port, etc. */
161 /* Shared Lookouts have only 1 chassis number (the */
162 /* first one assigned) */
163} __attribute__((packed));
164
165
166typedef enum {
167 CompatTwister = 0, /* Compatibility Twister */
168 AltTwister = 1, /* Alternate Twister of internal 8-way */
169 CompatCyclone = 2, /* Compatibility Cyclone */
170 AltCyclone = 3, /* Alternate Cyclone of internal 8-way */
171 CompatWPEG = 4, /* Compatibility WPEG */
172 AltWPEG = 5, /* Second Planar WPEG */
173 LookOutAWPEG = 6, /* LookOut WPEG */
174 LookOutBWPEG = 7, /* LookOut WPEG */
175} node_type;
176
177static inline int is_WPEG(struct rio_detail *rio){
178 return (rio->type == CompatWPEG || rio->type == AltWPEG ||
179 rio->type == LookOutAWPEG || rio->type == LookOutBWPEG);
180}
181
182
183/* In clustered mode, the high nibble of APIC ID is a cluster number.
184 * The low nibble is a 4-bit bitmap. */
185#define XAPIC_DEST_CPUS_SHIFT 4
186#define XAPIC_DEST_CPUS_MASK ((1u << XAPIC_DEST_CPUS_SHIFT) - 1)
187#define XAPIC_DEST_CLUSTER_MASK (XAPIC_DEST_CPUS_MASK << XAPIC_DEST_CPUS_SHIFT)
188
189#define SUMMIT_APIC_DFR_VALUE (APIC_DFR_CLUSTER)
190
191static inline const cpumask_t *summit_target_cpus(void)
192{
193 /* CPU_MASK_ALL (0xff) has undefined behaviour with
194 * dest_LowestPrio mode logical clustered apic interrupt routing
195 * Just start on cpu 0. IRQ balancing will spread load
196 */
197 return &cpumask_of_cpu(0);
198}
199
200static inline unsigned long
201summit_check_apicid_used(physid_mask_t bitmap, int apicid)
202{
203 return 0;
204}
205
206/* we don't use the phys_cpu_present_map to indicate apicid presence */
207static inline unsigned long summit_check_apicid_present(int bit)
208{
209 return 1;
210}
211
212#define apicid_cluster(apicid) ((apicid) & XAPIC_DEST_CLUSTER_MASK)
213
214extern u8 cpu_2_logical_apicid[];
215
216static inline void summit_init_apic_ldr(void)
217{
218 unsigned long val, id;
219 int count = 0;
220 u8 my_id = (u8)hard_smp_processor_id();
221 u8 my_cluster = (u8)apicid_cluster(my_id);
222#ifdef CONFIG_SMP
223 u8 lid;
224 int i;
225
226 /* Create logical APIC IDs by counting CPUs already in cluster. */
227 for (count = 0, i = nr_cpu_ids; --i >= 0; ) {
228 lid = cpu_2_logical_apicid[i];
229 if (lid != BAD_APICID && apicid_cluster(lid) == my_cluster)
230 ++count;
231 }
232#endif
233 /* We only have a 4 wide bitmap in cluster mode. If a deranged
234 * BIOS puts 5 CPUs in one APIC cluster, we're hosed. */
235 BUG_ON(count >= XAPIC_DEST_CPUS_SHIFT);
236 id = my_cluster | (1UL << count);
237 apic_write(APIC_DFR, SUMMIT_APIC_DFR_VALUE);
238 val = apic_read(APIC_LDR) & ~APIC_LDR_MASK;
239 val |= SET_APIC_LOGICAL_ID(id);
240 apic_write(APIC_LDR, val);
241}
242
243static inline int summit_apic_id_registered(void)
244{
245 return 1;
246}
247
248static inline void summit_setup_apic_routing(void)
249{
250 printk("Enabling APIC mode: Summit. Using %d I/O APICs\n",
251 nr_ioapics);
252}
253
254static inline int summit_apicid_to_node(int logical_apicid)
255{
256#ifdef CONFIG_SMP
257 return apicid_2_node[hard_smp_processor_id()];
258#else
259 return 0;
260#endif
261}
262
263/* Mapping from cpu number to logical apicid */
264static inline int summit_cpu_to_logical_apicid(int cpu)
265{
266#ifdef CONFIG_SMP
267 if (cpu >= nr_cpu_ids)
268 return BAD_APICID;
269 return (int)cpu_2_logical_apicid[cpu];
270#else
271 return logical_smp_processor_id();
272#endif
273}
274
275static inline int summit_cpu_present_to_apicid(int mps_cpu)
276{
277 if (mps_cpu < nr_cpu_ids)
278 return (int)per_cpu(x86_bios_cpu_apicid, mps_cpu);
279 else
280 return BAD_APICID;
281}
282
283static inline physid_mask_t
284summit_ioapic_phys_id_map(physid_mask_t phys_id_map)
285{
286 /* For clustered we don't have a good way to do this yet - hack */
287 return physids_promote(0x0F);
288}
289
290static inline physid_mask_t summit_apicid_to_cpu_present(int apicid)
291{
292 return physid_mask_of_physid(0);
293}
294
295static inline void summit_setup_portio_remap(void)
296{
297}
298
299static inline int summit_check_phys_apicid_present(int boot_cpu_physical_apicid)
300{
301 return 1;
302}
303
304static inline unsigned int summit_cpu_mask_to_apicid(const cpumask_t *cpumask)
305{
306 int cpus_found = 0;
307 int num_bits_set;
308 int apicid;
309 int cpu;
310
311 num_bits_set = cpus_weight(*cpumask);
312 /* Return id to all */
313 if (num_bits_set >= nr_cpu_ids)
314 return 0xFF;
315 /*
316 * The cpus in the mask must all be on the apic cluster. If are not
317 * on the same apicid cluster return default value of target_cpus():
318 */
319 cpu = first_cpu(*cpumask);
320 apicid = summit_cpu_to_logical_apicid(cpu);
321
322 while (cpus_found < num_bits_set) {
323 if (cpu_isset(cpu, *cpumask)) {
324 int new_apicid = summit_cpu_to_logical_apicid(cpu);
325
326 if (apicid_cluster(apicid) !=
327 apicid_cluster(new_apicid)) {
328 printk ("%s: Not a valid mask!\n", __func__);
329
330 return 0xFF;
331 }
332 apicid = apicid | new_apicid;
333 cpus_found++;
334 }
335 cpu++;
336 }
337 return apicid;
338}
339
340static inline unsigned int
341summit_cpu_mask_to_apicid_and(const struct cpumask *inmask,
342 const struct cpumask *andmask)
343{
344 int apicid = summit_cpu_to_logical_apicid(0);
345 cpumask_var_t cpumask;
346
347 if (!alloc_cpumask_var(&cpumask, GFP_ATOMIC))
348 return apicid;
349
350 cpumask_and(cpumask, inmask, andmask);
351 cpumask_and(cpumask, cpumask, cpu_online_mask);
352 apicid = summit_cpu_mask_to_apicid(cpumask);
353
354 free_cpumask_var(cpumask);
355
356 return apicid;
357}
358
359/*
360 * cpuid returns the value latched in the HW at reset, not the APIC ID
361 * register's value. For any box whose BIOS changes APIC IDs, like
362 * clustered APIC systems, we must use hard_smp_processor_id.
363 *
364 * See Intel's IA-32 SW Dev's Manual Vol2 under CPUID.
365 */
366static inline int summit_phys_pkg_id(int cpuid_apic, int index_msb)
367{
368 return hard_smp_processor_id() >> index_msb;
369}
370
371static int probe_summit(void)
372{
373 /* probed later in mptable/ACPI hooks */
374 return 0;
375}
376
377static void summit_vector_allocation_domain(int cpu, cpumask_t *retmask)
378{
379 /* Careful. Some cpus do not strictly honor the set of cpus
380 * specified in the interrupt destination when using lowest
381 * priority interrupt delivery mode.
382 *
383 * In particular there was a hyperthreading cpu observed to
384 * deliver interrupts to the wrong hyperthread when only one
385 * hyperthread was specified in the interrupt desitination.
386 */
387 *retmask = (cpumask_t){ { [0] = APIC_ALL_CPUS, } };
388}
389
390#ifdef CONFIG_X86_SUMMIT_NUMA
35static struct rio_table_hdr *rio_table_hdr __initdata; 391static struct rio_table_hdr *rio_table_hdr __initdata;
36static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; 392static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata;
37static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata; 393static struct rio_detail *rio_devs[MAX_NUMNODES*4] __initdata;
@@ -186,3 +542,61 @@ void __init setup_summit(void)
186 next_wpeg = 0; 542 next_wpeg = 0;
187 } while (next_wpeg != 0); 543 } while (next_wpeg != 0);
188} 544}
545#endif
546
547struct genapic apic_summit = {
548
549 .name = "summit",
550 .probe = probe_summit,
551 .acpi_madt_oem_check = summit_acpi_madt_oem_check,
552 .apic_id_registered = summit_apic_id_registered,
553
554 .irq_delivery_mode = dest_LowestPrio,
555 /* logical delivery broadcast to all CPUs: */
556 .irq_dest_mode = 1,
557
558 .target_cpus = summit_target_cpus,
559 .disable_esr = 1,
560 .dest_logical = APIC_DEST_LOGICAL,
561 .check_apicid_used = summit_check_apicid_used,
562 .check_apicid_present = summit_check_apicid_present,
563
564 .vector_allocation_domain = summit_vector_allocation_domain,
565 .init_apic_ldr = summit_init_apic_ldr,
566
567 .ioapic_phys_id_map = summit_ioapic_phys_id_map,
568 .setup_apic_routing = summit_setup_apic_routing,
569 .multi_timer_check = NULL,
570 .apicid_to_node = summit_apicid_to_node,
571 .cpu_to_logical_apicid = summit_cpu_to_logical_apicid,
572 .cpu_present_to_apicid = summit_cpu_present_to_apicid,
573 .apicid_to_cpu_present = summit_apicid_to_cpu_present,
574 .setup_portio_remap = NULL,
575 .check_phys_apicid_present = summit_check_phys_apicid_present,
576 .enable_apic_mode = NULL,
577 .phys_pkg_id = summit_phys_pkg_id,
578 .mps_oem_check = summit_mps_oem_check,
579
580 .get_apic_id = summit_get_apic_id,
581 .set_apic_id = NULL,
582 .apic_id_mask = 0xFF << 24,
583
584 .cpu_mask_to_apicid = summit_cpu_mask_to_apicid,
585 .cpu_mask_to_apicid_and = summit_cpu_mask_to_apicid_and,
586
587 .send_IPI_mask = summit_send_IPI_mask,
588 .send_IPI_mask_allbutself = NULL,
589 .send_IPI_allbutself = summit_send_IPI_allbutself,
590 .send_IPI_all = summit_send_IPI_all,
591 .send_IPI_self = default_send_IPI_self,
592
593 .wakeup_cpu = NULL,
594 .trampoline_phys_low = DEFAULT_TRAMPOLINE_PHYS_LOW,
595 .trampoline_phys_high = DEFAULT_TRAMPOLINE_PHYS_HIGH,
596
597 .wait_for_init_deassert = default_wait_for_init_deassert,
598
599 .smp_callin_clear_local_apic = NULL,
600 .store_NMI_vector = NULL,
601 .inquire_remote_apic = default_inquire_remote_apic,
602};
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index e2e86a08f31d..3bdb64829b82 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -1,7 +1,7 @@
1ENTRY(sys_call_table) 1ENTRY(sys_call_table)
2 .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */ 2 .long sys_restart_syscall /* 0 - old "setup()" system call, used for restarting */
3 .long sys_exit 3 .long sys_exit
4 .long sys_fork 4 .long ptregs_fork
5 .long sys_read 5 .long sys_read
6 .long sys_write 6 .long sys_write
7 .long sys_open /* 5 */ 7 .long sys_open /* 5 */
@@ -10,7 +10,7 @@ ENTRY(sys_call_table)
10 .long sys_creat 10 .long sys_creat
11 .long sys_link 11 .long sys_link
12 .long sys_unlink /* 10 */ 12 .long sys_unlink /* 10 */
13 .long sys_execve 13 .long ptregs_execve
14 .long sys_chdir 14 .long sys_chdir
15 .long sys_time 15 .long sys_time
16 .long sys_mknod 16 .long sys_mknod
@@ -109,17 +109,17 @@ ENTRY(sys_call_table)
109 .long sys_newlstat 109 .long sys_newlstat
110 .long sys_newfstat 110 .long sys_newfstat
111 .long sys_uname 111 .long sys_uname
112 .long sys_iopl /* 110 */ 112 .long ptregs_iopl /* 110 */
113 .long sys_vhangup 113 .long sys_vhangup
114 .long sys_ni_syscall /* old "idle" system call */ 114 .long sys_ni_syscall /* old "idle" system call */
115 .long sys_vm86old 115 .long ptregs_vm86old
116 .long sys_wait4 116 .long sys_wait4
117 .long sys_swapoff /* 115 */ 117 .long sys_swapoff /* 115 */
118 .long sys_sysinfo 118 .long sys_sysinfo
119 .long sys_ipc 119 .long sys_ipc
120 .long sys_fsync 120 .long sys_fsync
121 .long sys_sigreturn 121 .long ptregs_sigreturn
122 .long sys_clone /* 120 */ 122 .long ptregs_clone /* 120 */
123 .long sys_setdomainname 123 .long sys_setdomainname
124 .long sys_newuname 124 .long sys_newuname
125 .long sys_modify_ldt 125 .long sys_modify_ldt
@@ -165,14 +165,14 @@ ENTRY(sys_call_table)
165 .long sys_mremap 165 .long sys_mremap
166 .long sys_setresuid16 166 .long sys_setresuid16
167 .long sys_getresuid16 /* 165 */ 167 .long sys_getresuid16 /* 165 */
168 .long sys_vm86 168 .long ptregs_vm86
169 .long sys_ni_syscall /* Old sys_query_module */ 169 .long sys_ni_syscall /* Old sys_query_module */
170 .long sys_poll 170 .long sys_poll
171 .long sys_nfsservctl 171 .long sys_nfsservctl
172 .long sys_setresgid16 /* 170 */ 172 .long sys_setresgid16 /* 170 */
173 .long sys_getresgid16 173 .long sys_getresgid16
174 .long sys_prctl 174 .long sys_prctl
175 .long sys_rt_sigreturn 175 .long ptregs_rt_sigreturn
176 .long sys_rt_sigaction 176 .long sys_rt_sigaction
177 .long sys_rt_sigprocmask /* 175 */ 177 .long sys_rt_sigprocmask /* 175 */
178 .long sys_rt_sigpending 178 .long sys_rt_sigpending
@@ -185,11 +185,11 @@ ENTRY(sys_call_table)
185 .long sys_getcwd 185 .long sys_getcwd
186 .long sys_capget 186 .long sys_capget
187 .long sys_capset /* 185 */ 187 .long sys_capset /* 185 */
188 .long sys_sigaltstack 188 .long ptregs_sigaltstack
189 .long sys_sendfile 189 .long sys_sendfile
190 .long sys_ni_syscall /* reserved for streams1 */ 190 .long sys_ni_syscall /* reserved for streams1 */
191 .long sys_ni_syscall /* reserved for streams2 */ 191 .long sys_ni_syscall /* reserved for streams2 */
192 .long sys_vfork /* 190 */ 192 .long ptregs_vfork /* 190 */
193 .long sys_getrlimit 193 .long sys_getrlimit
194 .long sys_mmap2 194 .long sys_mmap2
195 .long sys_truncate64 195 .long sys_truncate64
diff --git a/arch/x86/kernel/time_32.c b/arch/x86/kernel/time_32.c
index 3985cac0ed47..764c74e871f2 100644
--- a/arch/x86/kernel/time_32.c
+++ b/arch/x86/kernel/time_32.c
@@ -38,7 +38,7 @@
38#include <asm/time.h> 38#include <asm/time.h>
39#include <asm/timer.h> 39#include <asm/timer.h>
40 40
41#include "do_timer.h" 41#include <asm/do_timer.h>
42 42
43int timer_ack; 43int timer_ack;
44 44
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
deleted file mode 100644
index ce5054642247..000000000000
--- a/arch/x86/kernel/tlb_32.c
+++ /dev/null
@@ -1,256 +0,0 @@
1#include <linux/spinlock.h>
2#include <linux/cpu.h>
3#include <linux/interrupt.h>
4
5#include <asm/tlbflush.h>
6
7DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate)
8 ____cacheline_aligned = { &init_mm, 0, };
9
10/* must come after the send_IPI functions above for inlining */
11#include <mach_ipi.h>
12
13/*
14 * Smarter SMP flushing macros.
15 * c/o Linus Torvalds.
16 *
17 * These mean you can really definitely utterly forget about
18 * writing to user space from interrupts. (Its not allowed anyway).
19 *
20 * Optimizations Manfred Spraul <manfred@colorfullife.com>
21 */
22
23static cpumask_t flush_cpumask;
24static struct mm_struct *flush_mm;
25static unsigned long flush_va;
26static DEFINE_SPINLOCK(tlbstate_lock);
27
28/*
29 * We cannot call mmdrop() because we are in interrupt context,
30 * instead update mm->cpu_vm_mask.
31 *
32 * We need to reload %cr3 since the page tables may be going
33 * away from under us..
34 */
35void leave_mm(int cpu)
36{
37 BUG_ON(x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK);
38 cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask);
39 load_cr3(swapper_pg_dir);
40}
41EXPORT_SYMBOL_GPL(leave_mm);
42
43/*
44 *
45 * The flush IPI assumes that a thread switch happens in this order:
46 * [cpu0: the cpu that switches]
47 * 1) switch_mm() either 1a) or 1b)
48 * 1a) thread switch to a different mm
49 * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
50 * Stop ipi delivery for the old mm. This is not synchronized with
51 * the other cpus, but smp_invalidate_interrupt ignore flush ipis
52 * for the wrong mm, and in the worst case we perform a superfluous
53 * tlb flush.
54 * 1a2) set cpu_tlbstate to TLBSTATE_OK
55 * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
56 * was in lazy tlb mode.
57 * 1a3) update cpu_tlbstate[].active_mm
58 * Now cpu0 accepts tlb flushes for the new mm.
59 * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
60 * Now the other cpus will send tlb flush ipis.
61 * 1a4) change cr3.
62 * 1b) thread switch without mm change
63 * cpu_tlbstate[].active_mm is correct, cpu0 already handles
64 * flush ipis.
65 * 1b1) set cpu_tlbstate to TLBSTATE_OK
66 * 1b2) test_and_set the cpu bit in cpu_vm_mask.
67 * Atomically set the bit [other cpus will start sending flush ipis],
68 * and test the bit.
69 * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
70 * 2) switch %%esp, ie current
71 *
72 * The interrupt must handle 2 special cases:
73 * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
74 * - the cpu performs speculative tlb reads, i.e. even if the cpu only
75 * runs in kernel space, the cpu could load tlb entries for user space
76 * pages.
77 *
78 * The good news is that cpu_tlbstate is local to each cpu, no
79 * write/read ordering problems.
80 */
81
82/*
83 * TLB flush IPI:
84 *
85 * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
86 * 2) Leave the mm if we are in the lazy tlb mode.
87 */
88
89void smp_invalidate_interrupt(struct pt_regs *regs)
90{
91 unsigned long cpu;
92
93 cpu = get_cpu();
94
95 if (!cpu_isset(cpu, flush_cpumask))
96 goto out;
97 /*
98 * This was a BUG() but until someone can quote me the
99 * line from the intel manual that guarantees an IPI to
100 * multiple CPUs is retried _only_ on the erroring CPUs
101 * its staying as a return
102 *
103 * BUG();
104 */
105
106 if (flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) {
107 if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) {
108 if (flush_va == TLB_FLUSH_ALL)
109 local_flush_tlb();
110 else
111 __flush_tlb_one(flush_va);
112 } else
113 leave_mm(cpu);
114 }
115 ack_APIC_irq();
116 smp_mb__before_clear_bit();
117 cpu_clear(cpu, flush_cpumask);
118 smp_mb__after_clear_bit();
119out:
120 put_cpu_no_resched();
121 inc_irq_stat(irq_tlb_count);
122}
123
124void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
125 unsigned long va)
126{
127 cpumask_t cpumask = *cpumaskp;
128
129 /*
130 * A couple of (to be removed) sanity checks:
131 *
132 * - current CPU must not be in mask
133 * - mask must exist :)
134 */
135 BUG_ON(cpus_empty(cpumask));
136 BUG_ON(cpu_isset(smp_processor_id(), cpumask));
137 BUG_ON(!mm);
138
139#ifdef CONFIG_HOTPLUG_CPU
140 /* If a CPU which we ran on has gone down, OK. */
141 cpus_and(cpumask, cpumask, cpu_online_map);
142 if (unlikely(cpus_empty(cpumask)))
143 return;
144#endif
145
146 /*
147 * i'm not happy about this global shared spinlock in the
148 * MM hot path, but we'll see how contended it is.
149 * AK: x86-64 has a faster method that could be ported.
150 */
151 spin_lock(&tlbstate_lock);
152
153 flush_mm = mm;
154 flush_va = va;
155 cpus_or(flush_cpumask, cpumask, flush_cpumask);
156
157 /*
158 * Make the above memory operations globally visible before
159 * sending the IPI.
160 */
161 smp_mb();
162 /*
163 * We have to send the IPI only to
164 * CPUs affected.
165 */
166 send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR);
167
168 while (!cpus_empty(flush_cpumask))
169 /* nothing. lockup detection does not belong here */
170 cpu_relax();
171
172 flush_mm = NULL;
173 flush_va = 0;
174 spin_unlock(&tlbstate_lock);
175}
176
177void flush_tlb_current_task(void)
178{
179 struct mm_struct *mm = current->mm;
180 cpumask_t cpu_mask;
181
182 preempt_disable();
183 cpu_mask = mm->cpu_vm_mask;
184 cpu_clear(smp_processor_id(), cpu_mask);
185
186 local_flush_tlb();
187 if (!cpus_empty(cpu_mask))
188 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
189 preempt_enable();
190}
191
192void flush_tlb_mm(struct mm_struct *mm)
193{
194 cpumask_t cpu_mask;
195
196 preempt_disable();
197 cpu_mask = mm->cpu_vm_mask;
198 cpu_clear(smp_processor_id(), cpu_mask);
199
200 if (current->active_mm == mm) {
201 if (current->mm)
202 local_flush_tlb();
203 else
204 leave_mm(smp_processor_id());
205 }
206 if (!cpus_empty(cpu_mask))
207 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
208
209 preempt_enable();
210}
211
212void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
213{
214 struct mm_struct *mm = vma->vm_mm;
215 cpumask_t cpu_mask;
216
217 preempt_disable();
218 cpu_mask = mm->cpu_vm_mask;
219 cpu_clear(smp_processor_id(), cpu_mask);
220
221 if (current->active_mm == mm) {
222 if (current->mm)
223 __flush_tlb_one(va);
224 else
225 leave_mm(smp_processor_id());
226 }
227
228 if (!cpus_empty(cpu_mask))
229 flush_tlb_others(cpu_mask, mm, va);
230
231 preempt_enable();
232}
233EXPORT_SYMBOL(flush_tlb_page);
234
235static void do_flush_tlb_all(void *info)
236{
237 unsigned long cpu = smp_processor_id();
238
239 __flush_tlb_all();
240 if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY)
241 leave_mm(cpu);
242}
243
244void flush_tlb_all(void)
245{
246 on_each_cpu(do_flush_tlb_all, NULL, 1);
247}
248
249void reset_lazy_tlbstate(void)
250{
251 int cpu = raw_smp_processor_id();
252
253 per_cpu(cpu_tlbstate, cpu).state = 0;
254 per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
255}
256
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
deleted file mode 100644
index f8be6f1d2e48..000000000000
--- a/arch/x86/kernel/tlb_64.c
+++ /dev/null
@@ -1,284 +0,0 @@
1#include <linux/init.h>
2
3#include <linux/mm.h>
4#include <linux/delay.h>
5#include <linux/spinlock.h>
6#include <linux/smp.h>
7#include <linux/kernel_stat.h>
8#include <linux/mc146818rtc.h>
9#include <linux/interrupt.h>
10
11#include <asm/mtrr.h>
12#include <asm/pgalloc.h>
13#include <asm/tlbflush.h>
14#include <asm/mmu_context.h>
15#include <asm/proto.h>
16#include <asm/apicdef.h>
17#include <asm/idle.h>
18#include <asm/uv/uv_hub.h>
19#include <asm/uv/uv_bau.h>
20
21#include <mach_ipi.h>
22/*
23 * Smarter SMP flushing macros.
24 * c/o Linus Torvalds.
25 *
26 * These mean you can really definitely utterly forget about
27 * writing to user space from interrupts. (Its not allowed anyway).
28 *
29 * Optimizations Manfred Spraul <manfred@colorfullife.com>
30 *
31 * More scalable flush, from Andi Kleen
32 *
33 * To avoid global state use 8 different call vectors.
34 * Each CPU uses a specific vector to trigger flushes on other
35 * CPUs. Depending on the received vector the target CPUs look into
36 * the right per cpu variable for the flush data.
37 *
38 * With more than 8 CPUs they are hashed to the 8 available
39 * vectors. The limited global vector space forces us to this right now.
40 * In future when interrupts are split into per CPU domains this could be
41 * fixed, at the cost of triggering multiple IPIs in some cases.
42 */
43
44union smp_flush_state {
45 struct {
46 cpumask_t flush_cpumask;
47 struct mm_struct *flush_mm;
48 unsigned long flush_va;
49 spinlock_t tlbstate_lock;
50 };
51 char pad[SMP_CACHE_BYTES];
52} ____cacheline_aligned;
53
54/* State is put into the per CPU data section, but padded
55 to a full cache line because other CPUs can access it and we don't
56 want false sharing in the per cpu data segment. */
57static DEFINE_PER_CPU(union smp_flush_state, flush_state);
58
59/*
60 * We cannot call mmdrop() because we are in interrupt context,
61 * instead update mm->cpu_vm_mask.
62 */
63void leave_mm(int cpu)
64{
65 if (read_pda(mmu_state) == TLBSTATE_OK)
66 BUG();
67 cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask);
68 load_cr3(swapper_pg_dir);
69}
70EXPORT_SYMBOL_GPL(leave_mm);
71
72/*
73 *
74 * The flush IPI assumes that a thread switch happens in this order:
75 * [cpu0: the cpu that switches]
76 * 1) switch_mm() either 1a) or 1b)
77 * 1a) thread switch to a different mm
78 * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
79 * Stop ipi delivery for the old mm. This is not synchronized with
80 * the other cpus, but smp_invalidate_interrupt ignore flush ipis
81 * for the wrong mm, and in the worst case we perform a superfluous
82 * tlb flush.
83 * 1a2) set cpu mmu_state to TLBSTATE_OK
84 * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
85 * was in lazy tlb mode.
86 * 1a3) update cpu active_mm
87 * Now cpu0 accepts tlb flushes for the new mm.
88 * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
89 * Now the other cpus will send tlb flush ipis.
90 * 1a4) change cr3.
91 * 1b) thread switch without mm change
92 * cpu active_mm is correct, cpu0 already handles
93 * flush ipis.
94 * 1b1) set cpu mmu_state to TLBSTATE_OK
95 * 1b2) test_and_set the cpu bit in cpu_vm_mask.
96 * Atomically set the bit [other cpus will start sending flush ipis],
97 * and test the bit.
98 * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
99 * 2) switch %%esp, ie current
100 *
101 * The interrupt must handle 2 special cases:
102 * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
103 * - the cpu performs speculative tlb reads, i.e. even if the cpu only
104 * runs in kernel space, the cpu could load tlb entries for user space
105 * pages.
106 *
107 * The good news is that cpu mmu_state is local to each cpu, no
108 * write/read ordering problems.
109 */
110
111/*
112 * TLB flush IPI:
113 *
114 * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
115 * 2) Leave the mm if we are in the lazy tlb mode.
116 *
117 * Interrupts are disabled.
118 */
119
120asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
121{
122 int cpu;
123 int sender;
124 union smp_flush_state *f;
125
126 cpu = smp_processor_id();
127 /*
128 * orig_rax contains the negated interrupt vector.
129 * Use that to determine where the sender put the data.
130 */
131 sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
132 f = &per_cpu(flush_state, sender);
133
134 if (!cpu_isset(cpu, f->flush_cpumask))
135 goto out;
136 /*
137 * This was a BUG() but until someone can quote me the
138 * line from the intel manual that guarantees an IPI to
139 * multiple CPUs is retried _only_ on the erroring CPUs
140 * its staying as a return
141 *
142 * BUG();
143 */
144
145 if (f->flush_mm == read_pda(active_mm)) {
146 if (read_pda(mmu_state) == TLBSTATE_OK) {
147 if (f->flush_va == TLB_FLUSH_ALL)
148 local_flush_tlb();
149 else
150 __flush_tlb_one(f->flush_va);
151 } else
152 leave_mm(cpu);
153 }
154out:
155 ack_APIC_irq();
156 cpu_clear(cpu, f->flush_cpumask);
157 inc_irq_stat(irq_tlb_count);
158}
159
160void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
161 unsigned long va)
162{
163 int sender;
164 union smp_flush_state *f;
165 cpumask_t cpumask = *cpumaskp;
166
167 if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va))
168 return;
169
170 /* Caller has disabled preemption */
171 sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
172 f = &per_cpu(flush_state, sender);
173
174 /*
175 * Could avoid this lock when
176 * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
177 * probably not worth checking this for a cache-hot lock.
178 */
179 spin_lock(&f->tlbstate_lock);
180
181 f->flush_mm = mm;
182 f->flush_va = va;
183 cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask);
184
185 /*
186 * Make the above memory operations globally visible before
187 * sending the IPI.
188 */
189 smp_mb();
190 /*
191 * We have to send the IPI only to
192 * CPUs affected.
193 */
194 send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender);
195
196 while (!cpus_empty(f->flush_cpumask))
197 cpu_relax();
198
199 f->flush_mm = NULL;
200 f->flush_va = 0;
201 spin_unlock(&f->tlbstate_lock);
202}
203
204static int __cpuinit init_smp_flush(void)
205{
206 int i;
207
208 for_each_possible_cpu(i)
209 spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock);
210
211 return 0;
212}
213core_initcall(init_smp_flush);
214
215void flush_tlb_current_task(void)
216{
217 struct mm_struct *mm = current->mm;
218 cpumask_t cpu_mask;
219
220 preempt_disable();
221 cpu_mask = mm->cpu_vm_mask;
222 cpu_clear(smp_processor_id(), cpu_mask);
223
224 local_flush_tlb();
225 if (!cpus_empty(cpu_mask))
226 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
227 preempt_enable();
228}
229
230void flush_tlb_mm(struct mm_struct *mm)
231{
232 cpumask_t cpu_mask;
233
234 preempt_disable();
235 cpu_mask = mm->cpu_vm_mask;
236 cpu_clear(smp_processor_id(), cpu_mask);
237
238 if (current->active_mm == mm) {
239 if (current->mm)
240 local_flush_tlb();
241 else
242 leave_mm(smp_processor_id());
243 }
244 if (!cpus_empty(cpu_mask))
245 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
246
247 preempt_enable();
248}
249
250void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
251{
252 struct mm_struct *mm = vma->vm_mm;
253 cpumask_t cpu_mask;
254
255 preempt_disable();
256 cpu_mask = mm->cpu_vm_mask;
257 cpu_clear(smp_processor_id(), cpu_mask);
258
259 if (current->active_mm == mm) {
260 if (current->mm)
261 __flush_tlb_one(va);
262 else
263 leave_mm(smp_processor_id());
264 }
265
266 if (!cpus_empty(cpu_mask))
267 flush_tlb_others(cpu_mask, mm, va);
268
269 preempt_enable();
270}
271
272static void do_flush_tlb_all(void *info)
273{
274 unsigned long cpu = smp_processor_id();
275
276 __flush_tlb_all();
277 if (read_pda(mmu_state) == TLBSTATE_LAZY)
278 leave_mm(cpu);
279}
280
281void flush_tlb_all(void)
282{
283 on_each_cpu(do_flush_tlb_all, NULL, 1);
284}
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 6812b829ed83..f396e61bcb34 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -11,6 +11,7 @@
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12 12
13#include <asm/mmu_context.h> 13#include <asm/mmu_context.h>
14#include <asm/uv/uv.h>
14#include <asm/uv/uv_mmrs.h> 15#include <asm/uv/uv_mmrs.h>
15#include <asm/uv/uv_hub.h> 16#include <asm/uv/uv_hub.h>
16#include <asm/uv/uv_bau.h> 17#include <asm/uv/uv_bau.h>
@@ -19,7 +20,7 @@
19#include <asm/tsc.h> 20#include <asm/tsc.h>
20#include <asm/irq_vectors.h> 21#include <asm/irq_vectors.h>
21 22
22#include <mach_apic.h> 23#include <asm/genapic.h>
23 24
24static struct bau_control **uv_bau_table_bases __read_mostly; 25static struct bau_control **uv_bau_table_bases __read_mostly;
25static int uv_bau_retry_limit __read_mostly; 26static int uv_bau_retry_limit __read_mostly;
@@ -210,14 +211,15 @@ static int uv_wait_completion(struct bau_desc *bau_desc,
210 * 211 *
211 * Send a broadcast and wait for a broadcast message to complete. 212 * Send a broadcast and wait for a broadcast message to complete.
212 * 213 *
213 * The cpumaskp mask contains the cpus the broadcast was sent to. 214 * The flush_mask contains the cpus the broadcast was sent to.
214 * 215 *
215 * Returns 1 if all remote flushing was done. The mask is zeroed. 216 * Returns NULL if all remote flushing was done. The mask is zeroed.
216 * Returns 0 if some remote flushing remains to be done. The mask is left 217 * Returns @flush_mask if some remote flushing remains to be done. The
217 * unchanged. 218 * mask will have some bits still set.
218 */ 219 */
219int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, 220const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
220 cpumask_t *cpumaskp) 221 struct bau_desc *bau_desc,
222 struct cpumask *flush_mask)
221{ 223{
222 int completion_status = 0; 224 int completion_status = 0;
223 int right_shift; 225 int right_shift;
@@ -257,66 +259,76 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
257 * the cpu's, all of which are still in the mask. 259 * the cpu's, all of which are still in the mask.
258 */ 260 */
259 __get_cpu_var(ptcstats).ptc_i++; 261 __get_cpu_var(ptcstats).ptc_i++;
260 return 0; 262 return flush_mask;
261 } 263 }
262 264
263 /* 265 /*
264 * Success, so clear the remote cpu's from the mask so we don't 266 * Success, so clear the remote cpu's from the mask so we don't
265 * use the IPI method of shootdown on them. 267 * use the IPI method of shootdown on them.
266 */ 268 */
267 for_each_cpu_mask(bit, *cpumaskp) { 269 for_each_cpu(bit, flush_mask) {
268 blade = uv_cpu_to_blade_id(bit); 270 blade = uv_cpu_to_blade_id(bit);
269 if (blade == this_blade) 271 if (blade == this_blade)
270 continue; 272 continue;
271 cpu_clear(bit, *cpumaskp); 273 cpumask_clear_cpu(bit, flush_mask);
272 } 274 }
273 if (!cpus_empty(*cpumaskp)) 275 if (!cpumask_empty(flush_mask))
274 return 0; 276 return flush_mask;
275 return 1; 277 return NULL;
276} 278}
277 279
278/** 280/**
279 * uv_flush_tlb_others - globally purge translation cache of a virtual 281 * uv_flush_tlb_others - globally purge translation cache of a virtual
280 * address or all TLB's 282 * address or all TLB's
281 * @cpumaskp: mask of all cpu's in which the address is to be removed 283 * @cpumask: mask of all cpu's in which the address is to be removed
282 * @mm: mm_struct containing virtual address range 284 * @mm: mm_struct containing virtual address range
283 * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) 285 * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
286 * @cpu: the current cpu
284 * 287 *
285 * This is the entry point for initiating any UV global TLB shootdown. 288 * This is the entry point for initiating any UV global TLB shootdown.
286 * 289 *
287 * Purges the translation caches of all specified processors of the given 290 * Purges the translation caches of all specified processors of the given
288 * virtual address, or purges all TLB's on specified processors. 291 * virtual address, or purges all TLB's on specified processors.
289 * 292 *
290 * The caller has derived the cpumaskp from the mm_struct and has subtracted 293 * The caller has derived the cpumask from the mm_struct. This function
291 * the local cpu from the mask. This function is called only if there 294 * is called only if there are bits set in the mask. (e.g. flush_tlb_page())
292 * are bits set in the mask. (e.g. flush_tlb_page())
293 * 295 *
294 * The cpumaskp is converted into a nodemask of the nodes containing 296 * The cpumask is converted into a nodemask of the nodes containing
295 * the cpus. 297 * the cpus.
296 * 298 *
297 * Returns 1 if all remote flushing was done. 299 * Note that this function should be called with preemption disabled.
298 * Returns 0 if some remote flushing remains to be done. 300 *
301 * Returns NULL if all remote flushing was done.
302 * Returns pointer to cpumask if some remote flushing remains to be
303 * done. The returned pointer is valid till preemption is re-enabled.
299 */ 304 */
300int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm, 305const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
301 unsigned long va) 306 struct mm_struct *mm,
307 unsigned long va, unsigned int cpu)
302{ 308{
309 static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
310 struct cpumask *flush_mask = &__get_cpu_var(flush_tlb_mask);
303 int i; 311 int i;
304 int bit; 312 int bit;
305 int blade; 313 int blade;
306 int cpu; 314 int uv_cpu;
307 int this_blade; 315 int this_blade;
308 int locals = 0; 316 int locals = 0;
309 struct bau_desc *bau_desc; 317 struct bau_desc *bau_desc;
310 318
311 cpu = uv_blade_processor_id(); 319 WARN_ON(!in_atomic());
320
321 cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
322
323 uv_cpu = uv_blade_processor_id();
312 this_blade = uv_numa_blade_id(); 324 this_blade = uv_numa_blade_id();
313 bau_desc = __get_cpu_var(bau_control).descriptor_base; 325 bau_desc = __get_cpu_var(bau_control).descriptor_base;
314 bau_desc += UV_ITEMS_PER_DESCRIPTOR * cpu; 326 bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu;
315 327
316 bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); 328 bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
317 329
318 i = 0; 330 i = 0;
319 for_each_cpu_mask(bit, *cpumaskp) { 331 for_each_cpu(bit, flush_mask) {
320 blade = uv_cpu_to_blade_id(bit); 332 blade = uv_cpu_to_blade_id(bit);
321 BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1)); 333 BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
322 if (blade == this_blade) { 334 if (blade == this_blade) {
@@ -331,17 +343,17 @@ int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm,
331 * no off_node flushing; return status for local node 343 * no off_node flushing; return status for local node
332 */ 344 */
333 if (locals) 345 if (locals)
334 return 0; 346 return flush_mask;
335 else 347 else
336 return 1; 348 return NULL;
337 } 349 }
338 __get_cpu_var(ptcstats).requestor++; 350 __get_cpu_var(ptcstats).requestor++;
339 __get_cpu_var(ptcstats).ntargeted += i; 351 __get_cpu_var(ptcstats).ntargeted += i;
340 352
341 bau_desc->payload.address = va; 353 bau_desc->payload.address = va;
342 bau_desc->payload.sending_cpu = smp_processor_id(); 354 bau_desc->payload.sending_cpu = cpu;
343 355
344 return uv_flush_send_and_wait(cpu, this_blade, bau_desc, cpumaskp); 356 return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask);
345} 357}
346 358
347/* 359/*
diff --git a/arch/x86/kernel/trampoline_64.S b/arch/x86/kernel/trampoline_64.S
index 894293c598db..95a012a4664e 100644
--- a/arch/x86/kernel/trampoline_64.S
+++ b/arch/x86/kernel/trampoline_64.S
@@ -29,6 +29,7 @@
29#include <asm/page.h> 29#include <asm/page.h>
30#include <asm/msr.h> 30#include <asm/msr.h>
31#include <asm/segment.h> 31#include <asm/segment.h>
32#include <asm/processor-flags.h>
32 33
33.section .rodata, "a", @progbits 34.section .rodata, "a", @progbits
34 35
@@ -37,7 +38,7 @@
37ENTRY(trampoline_data) 38ENTRY(trampoline_data)
38r_base = . 39r_base = .
39 cli # We should be safe anyway 40 cli # We should be safe anyway
40 wbinvd 41 wbinvd
41 mov %cs, %ax # Code and data in the same place 42 mov %cs, %ax # Code and data in the same place
42 mov %ax, %ds 43 mov %ax, %ds
43 mov %ax, %es 44 mov %ax, %es
@@ -73,9 +74,8 @@ r_base = .
73 lidtl tidt - r_base # load idt with 0, 0 74 lidtl tidt - r_base # load idt with 0, 0
74 lgdtl tgdt - r_base # load gdt with whatever is appropriate 75 lgdtl tgdt - r_base # load gdt with whatever is appropriate
75 76
76 xor %ax, %ax 77 mov $X86_CR0_PE, %ax # protected mode (PE) bit
77 inc %ax # protected mode (PE) bit 78 lmsw %ax # into protected mode
78 lmsw %ax # into protected mode
79 79
80 # flush prefetch and jump to startup_32 80 # flush prefetch and jump to startup_32
81 ljmpl *(startup_32_vector - r_base) 81 ljmpl *(startup_32_vector - r_base)
@@ -86,9 +86,8 @@ startup_32:
86 movl $__KERNEL_DS, %eax # Initialize the %ds segment register 86 movl $__KERNEL_DS, %eax # Initialize the %ds segment register
87 movl %eax, %ds 87 movl %eax, %ds
88 88
89 xorl %eax, %eax 89 movl $X86_CR4_PAE, %eax
90 btsl $5, %eax # Enable PAE mode 90 movl %eax, %cr4 # Enable PAE mode
91 movl %eax, %cr4
92 91
93 # Setup trampoline 4 level pagetables 92 # Setup trampoline 4 level pagetables
94 leal (trampoline_level4_pgt - r_base)(%esi), %eax 93 leal (trampoline_level4_pgt - r_base)(%esi), %eax
@@ -99,9 +98,9 @@ startup_32:
99 xorl %edx, %edx 98 xorl %edx, %edx
100 wrmsr 99 wrmsr
101 100
102 xorl %eax, %eax 101 # Enable paging and in turn activate Long Mode
103 btsl $31, %eax # Enable paging and in turn activate Long Mode 102 # Enable protected mode
104 btsl $0, %eax # Enable protected mode 103 movl $(X86_CR0_PG | X86_CR0_PE), %eax
105 movl %eax, %cr0 104 movl %eax, %cr0
106 105
107 /* 106 /*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 98c2d055284b..bde57f0f1616 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -54,12 +54,11 @@
54#include <asm/desc.h> 54#include <asm/desc.h>
55#include <asm/i387.h> 55#include <asm/i387.h>
56 56
57#include <mach_traps.h> 57#include <asm/mach_traps.h>
58 58
59#ifdef CONFIG_X86_64 59#ifdef CONFIG_X86_64
60#include <asm/pgalloc.h> 60#include <asm/pgalloc.h>
61#include <asm/proto.h> 61#include <asm/proto.h>
62#include <asm/pda.h>
63#else 62#else
64#include <asm/processor-flags.h> 63#include <asm/processor-flags.h>
65#include <asm/arch_hooks.h> 64#include <asm/arch_hooks.h>
@@ -896,7 +895,7 @@ asmlinkage void math_state_restore(void)
896EXPORT_SYMBOL_GPL(math_state_restore); 895EXPORT_SYMBOL_GPL(math_state_restore);
897 896
898#ifndef CONFIG_MATH_EMULATION 897#ifndef CONFIG_MATH_EMULATION
899asmlinkage void math_emulate(long arg) 898void math_emulate(struct math_emu_info *info)
900{ 899{
901 printk(KERN_EMERG 900 printk(KERN_EMERG
902 "math-emulation not enabled and no coprocessor found.\n"); 901 "math-emulation not enabled and no coprocessor found.\n");
@@ -907,12 +906,16 @@ asmlinkage void math_emulate(long arg)
907#endif /* CONFIG_MATH_EMULATION */ 906#endif /* CONFIG_MATH_EMULATION */
908 907
909dotraplinkage void __kprobes 908dotraplinkage void __kprobes
910do_device_not_available(struct pt_regs *regs, long error) 909do_device_not_available(struct pt_regs *regs, long error_code)
911{ 910{
912#ifdef CONFIG_X86_32 911#ifdef CONFIG_X86_32
913 if (read_cr0() & X86_CR0_EM) { 912 if (read_cr0() & X86_CR0_EM) {
913 struct math_emu_info info = { };
914
914 conditional_sti(regs); 915 conditional_sti(regs);
915 math_emulate(0); 916
917 info.regs = regs;
918 math_emulate(&info);
916 } else { 919 } else {
917 math_state_restore(); /* interrupts still off */ 920 math_state_restore(); /* interrupts still off */
918 conditional_sti(regs); 921 conditional_sti(regs);
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 599e58168631..83d53ce5d4c4 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -773,7 +773,7 @@ __cpuinit int unsynchronized_tsc(void)
773 if (!cpu_has_tsc || tsc_unstable) 773 if (!cpu_has_tsc || tsc_unstable)
774 return 1; 774 return 1;
775 775
776#ifdef CONFIG_X86_SMP 776#ifdef CONFIG_SMP
777 if (apic_is_clustered_box()) 777 if (apic_is_clustered_box())
778 return 1; 778 return 1;
779#endif 779#endif
diff --git a/arch/x86/kernel/visws_quirks.c b/arch/x86/kernel/visws_quirks.c
index d801d06af068..4fd646e6dd43 100644
--- a/arch/x86/kernel/visws_quirks.c
+++ b/arch/x86/kernel/visws_quirks.c
@@ -32,9 +32,9 @@
32#include <asm/e820.h> 32#include <asm/e820.h>
33#include <asm/io.h> 33#include <asm/io.h>
34 34
35#include <mach_ipi.h> 35#include <asm/genapic.h>
36 36
37#include "mach_apic.h" 37#include <asm/genapic.h>
38 38
39#include <linux/kernel_stat.h> 39#include <linux/kernel_stat.h>
40 40
@@ -200,7 +200,7 @@ static void __init MP_processor_info(struct mpc_cpu *m)
200 return; 200 return;
201 } 201 }
202 202
203 apic_cpus = apicid_to_cpu_present(m->apicid); 203 apic_cpus = apic->apicid_to_cpu_present(m->apicid);
204 physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus); 204 physids_or(phys_cpu_present_map, phys_cpu_present_map, apic_cpus);
205 /* 205 /*
206 * Validate version 206 * Validate version
diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c
index 4eeb5cf9720d..d7ac84e7fc1c 100644
--- a/arch/x86/kernel/vm86_32.c
+++ b/arch/x86/kernel/vm86_32.c
@@ -158,7 +158,7 @@ struct pt_regs *save_v86_state(struct kernel_vm86_regs *regs)
158 ret = KVM86->regs32; 158 ret = KVM86->regs32;
159 159
160 ret->fs = current->thread.saved_fs; 160 ret->fs = current->thread.saved_fs;
161 loadsegment(gs, current->thread.saved_gs); 161 set_user_gs(ret, current->thread.saved_gs);
162 162
163 return ret; 163 return ret;
164} 164}
@@ -197,9 +197,9 @@ out:
197static int do_vm86_irq_handling(int subfunction, int irqnumber); 197static int do_vm86_irq_handling(int subfunction, int irqnumber);
198static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk); 198static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk);
199 199
200asmlinkage int sys_vm86old(struct pt_regs regs) 200int sys_vm86old(struct pt_regs *regs)
201{ 201{
202 struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs.bx; 202 struct vm86_struct __user *v86 = (struct vm86_struct __user *)regs->bx;
203 struct kernel_vm86_struct info; /* declare this _on top_, 203 struct kernel_vm86_struct info; /* declare this _on top_,
204 * this avoids wasting of stack space. 204 * this avoids wasting of stack space.
205 * This remains on the stack until we 205 * This remains on the stack until we
@@ -218,7 +218,7 @@ asmlinkage int sys_vm86old(struct pt_regs regs)
218 if (tmp) 218 if (tmp)
219 goto out; 219 goto out;
220 memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus); 220 memset(&info.vm86plus, 0, (int)&info.regs32 - (int)&info.vm86plus);
221 info.regs32 = &regs; 221 info.regs32 = regs;
222 tsk->thread.vm86_info = v86; 222 tsk->thread.vm86_info = v86;
223 do_sys_vm86(&info, tsk); 223 do_sys_vm86(&info, tsk);
224 ret = 0; /* we never return here */ 224 ret = 0; /* we never return here */
@@ -227,7 +227,7 @@ out:
227} 227}
228 228
229 229
230asmlinkage int sys_vm86(struct pt_regs regs) 230int sys_vm86(struct pt_regs *regs)
231{ 231{
232 struct kernel_vm86_struct info; /* declare this _on top_, 232 struct kernel_vm86_struct info; /* declare this _on top_,
233 * this avoids wasting of stack space. 233 * this avoids wasting of stack space.
@@ -239,12 +239,12 @@ asmlinkage int sys_vm86(struct pt_regs regs)
239 struct vm86plus_struct __user *v86; 239 struct vm86plus_struct __user *v86;
240 240
241 tsk = current; 241 tsk = current;
242 switch (regs.bx) { 242 switch (regs->bx) {
243 case VM86_REQUEST_IRQ: 243 case VM86_REQUEST_IRQ:
244 case VM86_FREE_IRQ: 244 case VM86_FREE_IRQ:
245 case VM86_GET_IRQ_BITS: 245 case VM86_GET_IRQ_BITS:
246 case VM86_GET_AND_RESET_IRQ: 246 case VM86_GET_AND_RESET_IRQ:
247 ret = do_vm86_irq_handling(regs.bx, (int)regs.cx); 247 ret = do_vm86_irq_handling(regs->bx, (int)regs->cx);
248 goto out; 248 goto out;
249 case VM86_PLUS_INSTALL_CHECK: 249 case VM86_PLUS_INSTALL_CHECK:
250 /* 250 /*
@@ -261,14 +261,14 @@ asmlinkage int sys_vm86(struct pt_regs regs)
261 ret = -EPERM; 261 ret = -EPERM;
262 if (tsk->thread.saved_sp0) 262 if (tsk->thread.saved_sp0)
263 goto out; 263 goto out;
264 v86 = (struct vm86plus_struct __user *)regs.cx; 264 v86 = (struct vm86plus_struct __user *)regs->cx;
265 tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs, 265 tmp = copy_vm86_regs_from_user(&info.regs, &v86->regs,
266 offsetof(struct kernel_vm86_struct, regs32) - 266 offsetof(struct kernel_vm86_struct, regs32) -
267 sizeof(info.regs)); 267 sizeof(info.regs));
268 ret = -EFAULT; 268 ret = -EFAULT;
269 if (tmp) 269 if (tmp)
270 goto out; 270 goto out;
271 info.regs32 = &regs; 271 info.regs32 = regs;
272 info.vm86plus.is_vm86pus = 1; 272 info.vm86plus.is_vm86pus = 1;
273 tsk->thread.vm86_info = (struct vm86_struct __user *)v86; 273 tsk->thread.vm86_info = (struct vm86_struct __user *)v86;
274 do_sys_vm86(&info, tsk); 274 do_sys_vm86(&info, tsk);
@@ -323,7 +323,7 @@ static void do_sys_vm86(struct kernel_vm86_struct *info, struct task_struct *tsk
323 info->regs32->ax = 0; 323 info->regs32->ax = 0;
324 tsk->thread.saved_sp0 = tsk->thread.sp0; 324 tsk->thread.saved_sp0 = tsk->thread.sp0;
325 tsk->thread.saved_fs = info->regs32->fs; 325 tsk->thread.saved_fs = info->regs32->fs;
326 savesegment(gs, tsk->thread.saved_gs); 326 tsk->thread.saved_gs = get_user_gs(info->regs32);
327 327
328 tss = &per_cpu(init_tss, get_cpu()); 328 tss = &per_cpu(init_tss, get_cpu());
329 tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0; 329 tsk->thread.sp0 = (unsigned long) &info->VM86_TSS_ESP0;
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 1d3302cc2ddf..f052c84ecbe4 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -321,6 +321,16 @@ static void vmi_release_pmd(unsigned long pfn)
321} 321}
322 322
323/* 323/*
324 * We use the pgd_free hook for releasing the pgd page:
325 */
326static void vmi_pgd_free(struct mm_struct *mm, pgd_t *pgd)
327{
328 unsigned long pfn = __pa(pgd) >> PAGE_SHIFT;
329
330 vmi_ops.release_page(pfn, VMI_PAGE_L2);
331}
332
333/*
324 * Helper macros for MMU update flags. We can defer updates until a flush 334 * Helper macros for MMU update flags. We can defer updates until a flush
325 * or page invalidation only if the update is to the current address space 335 * or page invalidation only if the update is to the current address space
326 * (otherwise, there is no flush). We must check against init_mm, since 336 * (otherwise, there is no flush). We must check against init_mm, since
@@ -670,10 +680,11 @@ static inline int __init activate_vmi(void)
670 para_fill(pv_mmu_ops.write_cr2, SetCR2); 680 para_fill(pv_mmu_ops.write_cr2, SetCR2);
671 para_fill(pv_mmu_ops.write_cr3, SetCR3); 681 para_fill(pv_mmu_ops.write_cr3, SetCR3);
672 para_fill(pv_cpu_ops.write_cr4, SetCR4); 682 para_fill(pv_cpu_ops.write_cr4, SetCR4);
673 para_fill(pv_irq_ops.save_fl, GetInterruptMask); 683
674 para_fill(pv_irq_ops.restore_fl, SetInterruptMask); 684 para_fill(pv_irq_ops.save_fl.func, GetInterruptMask);
675 para_fill(pv_irq_ops.irq_disable, DisableInterrupts); 685 para_fill(pv_irq_ops.restore_fl.func, SetInterruptMask);
676 para_fill(pv_irq_ops.irq_enable, EnableInterrupts); 686 para_fill(pv_irq_ops.irq_disable.func, DisableInterrupts);
687 para_fill(pv_irq_ops.irq_enable.func, EnableInterrupts);
677 688
678 para_fill(pv_cpu_ops.wbinvd, WBINVD); 689 para_fill(pv_cpu_ops.wbinvd, WBINVD);
679 para_fill(pv_cpu_ops.read_tsc, RDTSC); 690 para_fill(pv_cpu_ops.read_tsc, RDTSC);
@@ -762,6 +773,7 @@ static inline int __init activate_vmi(void)
762 if (vmi_ops.release_page) { 773 if (vmi_ops.release_page) {
763 pv_mmu_ops.release_pte = vmi_release_pte; 774 pv_mmu_ops.release_pte = vmi_release_pte;
764 pv_mmu_ops.release_pmd = vmi_release_pmd; 775 pv_mmu_ops.release_pmd = vmi_release_pmd;
776 pv_mmu_ops.pgd_free = vmi_pgd_free;
765 } 777 }
766 778
767 /* Set linear is needed in all cases */ 779 /* Set linear is needed in all cases */
diff --git a/arch/x86/kernel/vmiclock_32.c b/arch/x86/kernel/vmiclock_32.c
index c4c1f9e09402..a4791ef412d1 100644
--- a/arch/x86/kernel/vmiclock_32.c
+++ b/arch/x86/kernel/vmiclock_32.c
@@ -256,7 +256,7 @@ void __devinit vmi_time_bsp_init(void)
256 */ 256 */
257 clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL); 257 clockevents_notify(CLOCK_EVT_NOTIFY_SUSPEND, NULL);
258 local_irq_disable(); 258 local_irq_disable();
259#ifdef CONFIG_X86_SMP 259#ifdef CONFIG_SMP
260 /* 260 /*
261 * XXX handle_percpu_irq only defined for SMP; we need to switch over 261 * XXX handle_percpu_irq only defined for SMP; we need to switch over
262 * to using it, since this is a local interrupt, which each CPU must 262 * to using it, since this is a local interrupt, which each CPU must
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 82c67559dde7..3eba7f7bac05 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -178,14 +178,7 @@ SECTIONS
178 __initramfs_end = .; 178 __initramfs_end = .;
179 } 179 }
180#endif 180#endif
181 . = ALIGN(PAGE_SIZE); 181 PERCPU(PAGE_SIZE)
182 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
183 __per_cpu_start = .;
184 *(.data.percpu.page_aligned)
185 *(.data.percpu)
186 *(.data.percpu.shared_aligned)
187 __per_cpu_end = .;
188 }
189 . = ALIGN(PAGE_SIZE); 182 . = ALIGN(PAGE_SIZE);
190 /* freed after init ends here */ 183 /* freed after init ends here */
191 184
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 1a614c0e6bef..087a7f2c639b 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -5,6 +5,7 @@
5#define LOAD_OFFSET __START_KERNEL_map 5#define LOAD_OFFSET __START_KERNEL_map
6 6
7#include <asm-generic/vmlinux.lds.h> 7#include <asm-generic/vmlinux.lds.h>
8#include <asm/asm-offsets.h>
8#include <asm/page.h> 9#include <asm/page.h>
9 10
10#undef i386 /* in case the preprocessor is a 32bit one */ 11#undef i386 /* in case the preprocessor is a 32bit one */
@@ -13,12 +14,15 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
13OUTPUT_ARCH(i386:x86-64) 14OUTPUT_ARCH(i386:x86-64)
14ENTRY(phys_startup_64) 15ENTRY(phys_startup_64)
15jiffies_64 = jiffies; 16jiffies_64 = jiffies;
16_proxy_pda = 1;
17PHDRS { 17PHDRS {
18 text PT_LOAD FLAGS(5); /* R_E */ 18 text PT_LOAD FLAGS(5); /* R_E */
19 data PT_LOAD FLAGS(7); /* RWE */ 19 data PT_LOAD FLAGS(7); /* RWE */
20 user PT_LOAD FLAGS(7); /* RWE */ 20 user PT_LOAD FLAGS(7); /* RWE */
21 data.init PT_LOAD FLAGS(7); /* RWE */ 21 data.init PT_LOAD FLAGS(7); /* RWE */
22#ifdef CONFIG_SMP
23 percpu PT_LOAD FLAGS(7); /* RWE */
24#endif
25 data.init2 PT_LOAD FLAGS(7); /* RWE */
22 note PT_NOTE FLAGS(0); /* ___ */ 26 note PT_NOTE FLAGS(0); /* ___ */
23} 27}
24SECTIONS 28SECTIONS
@@ -208,14 +212,28 @@ SECTIONS
208 __initramfs_end = .; 212 __initramfs_end = .;
209#endif 213#endif
210 214
215#ifdef CONFIG_SMP
216 /*
217 * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
218 * output PHDR, so the next output section - __data_nosave - should
219 * start another section data.init2. Also, pda should be at the head of
220 * percpu area. Preallocate it and define the percpu offset symbol
221 * so that it can be accessed as a percpu variable.
222 */
223 . = ALIGN(PAGE_SIZE);
224 PERCPU_VADDR(0, :percpu)
225#else
211 PERCPU(PAGE_SIZE) 226 PERCPU(PAGE_SIZE)
227#endif
212 228
213 . = ALIGN(PAGE_SIZE); 229 . = ALIGN(PAGE_SIZE);
214 __init_end = .; 230 __init_end = .;
215 231
216 . = ALIGN(PAGE_SIZE); 232 . = ALIGN(PAGE_SIZE);
217 __nosave_begin = .; 233 __nosave_begin = .;
218 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } 234 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
235 *(.data.nosave)
236 } :data.init2 /* use another section data.init2, see PERCPU_VADDR() above */
219 . = ALIGN(PAGE_SIZE); 237 . = ALIGN(PAGE_SIZE);
220 __nosave_end = .; 238 __nosave_end = .;
221 239
@@ -239,8 +257,21 @@ SECTIONS
239 DWARF_DEBUG 257 DWARF_DEBUG
240} 258}
241 259
260 /*
261 * Per-cpu symbols which need to be offset from __per_cpu_load
262 * for the boot processor.
263 */
264#define INIT_PER_CPU(x) init_per_cpu__##x = per_cpu__##x + __per_cpu_load
265INIT_PER_CPU(gdt_page);
266INIT_PER_CPU(irq_stack_union);
267
242/* 268/*
243 * Build-time check on the image size: 269 * Build-time check on the image size:
244 */ 270 */
245ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), 271ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
246 "kernel image bigger than KERNEL_IMAGE_SIZE") 272 "kernel image bigger than KERNEL_IMAGE_SIZE")
273
274#ifdef CONFIG_SMP
275ASSERT((per_cpu__irq_stack_union == 0),
276 "irq_stack_union is not at start of per-cpu area");
277#endif
diff --git a/arch/x86/kernel/vsmp_64.c b/arch/x86/kernel/vsmp_64.c
index a688f3bfaec2..c609205df594 100644
--- a/arch/x86/kernel/vsmp_64.c
+++ b/arch/x86/kernel/vsmp_64.c
@@ -37,6 +37,7 @@ static unsigned long vsmp_save_fl(void)
37 flags &= ~X86_EFLAGS_IF; 37 flags &= ~X86_EFLAGS_IF;
38 return flags; 38 return flags;
39} 39}
40PV_CALLEE_SAVE_REGS_THUNK(vsmp_save_fl);
40 41
41static void vsmp_restore_fl(unsigned long flags) 42static void vsmp_restore_fl(unsigned long flags)
42{ 43{
@@ -46,6 +47,7 @@ static void vsmp_restore_fl(unsigned long flags)
46 flags |= X86_EFLAGS_AC; 47 flags |= X86_EFLAGS_AC;
47 native_restore_fl(flags); 48 native_restore_fl(flags);
48} 49}
50PV_CALLEE_SAVE_REGS_THUNK(vsmp_restore_fl);
49 51
50static void vsmp_irq_disable(void) 52static void vsmp_irq_disable(void)
51{ 53{
@@ -53,6 +55,7 @@ static void vsmp_irq_disable(void)
53 55
54 native_restore_fl((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC); 56 native_restore_fl((flags & ~X86_EFLAGS_IF) | X86_EFLAGS_AC);
55} 57}
58PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_disable);
56 59
57static void vsmp_irq_enable(void) 60static void vsmp_irq_enable(void)
58{ 61{
@@ -60,6 +63,7 @@ static void vsmp_irq_enable(void)
60 63
61 native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC)); 64 native_restore_fl((flags | X86_EFLAGS_IF) & (~X86_EFLAGS_AC));
62} 65}
66PV_CALLEE_SAVE_REGS_THUNK(vsmp_irq_enable);
63 67
64static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf, 68static unsigned __init_or_module vsmp_patch(u8 type, u16 clobbers, void *ibuf,
65 unsigned long addr, unsigned len) 69 unsigned long addr, unsigned len)
@@ -90,10 +94,10 @@ static void __init set_vsmp_pv_ops(void)
90 cap, ctl); 94 cap, ctl);
91 if (cap & ctl & (1 << 4)) { 95 if (cap & ctl & (1 << 4)) {
92 /* Setup irq ops and turn on vSMP IRQ fastpath handling */ 96 /* Setup irq ops and turn on vSMP IRQ fastpath handling */
93 pv_irq_ops.irq_disable = vsmp_irq_disable; 97 pv_irq_ops.irq_disable = PV_CALLEE_SAVE(vsmp_irq_disable);
94 pv_irq_ops.irq_enable = vsmp_irq_enable; 98 pv_irq_ops.irq_enable = PV_CALLEE_SAVE(vsmp_irq_enable);
95 pv_irq_ops.save_fl = vsmp_save_fl; 99 pv_irq_ops.save_fl = PV_CALLEE_SAVE(vsmp_save_fl);
96 pv_irq_ops.restore_fl = vsmp_restore_fl; 100 pv_irq_ops.restore_fl = PV_CALLEE_SAVE(vsmp_restore_fl);
97 pv_init_ops.patch = vsmp_patch; 101 pv_init_ops.patch = vsmp_patch;
98 102
99 ctl &= ~(1 << 4); 103 ctl &= ~(1 << 4);
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 695e426aa354..3909e3ba5ce3 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -58,5 +58,3 @@ EXPORT_SYMBOL(__memcpy);
58EXPORT_SYMBOL(empty_zero_page); 58EXPORT_SYMBOL(empty_zero_page);
59EXPORT_SYMBOL(init_level4_pgt); 59EXPORT_SYMBOL(init_level4_pgt);
60EXPORT_SYMBOL(load_gs_index); 60EXPORT_SYMBOL(load_gs_index);
61
62EXPORT_SYMBOL(_proxy_pda);