aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-01-18 12:15:49 -0500
committerIngo Molnar <mingo@elte.hu>2009-01-18 12:15:49 -0500
commitaf37501c792107c2bde1524bdae38d9a247b841a (patch)
treeb50ee90d29e72956b8b7d8d19677fe5996755d49 /arch/x86/kernel
parentd859e29fe34cb833071b20aef860ee94fbad9bb2 (diff)
parent99937d6455cea95405ac681c86a857d0fcd530bd (diff)
Merge branch 'core/percpu' into perfcounters/core
Conflicts: arch/x86/include/asm/pda.h We merge tip/core/percpu into tip/perfcounters/core because of a semantic and contextual conflict: the former eliminates the PDA, while the latter extends it with apic_perf_irqs field. Resolve the conflict by moving the new field to the irq_cpustat structure on 64-bit too. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/acpi/boot.c96
-rw-r--r--arch/x86/kernel/acpi/sleep.c1
-rw-r--r--arch/x86/kernel/apic.c25
-rw-r--r--arch/x86/kernel/asm-offsets_64.c8
-rw-r--r--arch/x86/kernel/cpu/common.c80
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c27
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c63
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c21
-rw-r--r--arch/x86/kernel/crash.c2
-rw-r--r--arch/x86/kernel/dumpstack_64.c35
-rw-r--r--arch/x86/kernel/entry_32.S2
-rw-r--r--arch/x86/kernel/entry_64.S41
-rw-r--r--arch/x86/kernel/head64.c23
-rw-r--r--arch/x86/kernel/head_64.S47
-rw-r--r--arch/x86/kernel/io_apic.c163
-rw-r--r--arch/x86/kernel/irq.c6
-rw-r--r--arch/x86/kernel/irq_32.c2
-rw-r--r--arch/x86/kernel/irq_64.c5
-rw-r--r--arch/x86/kernel/microcode_intel.c10
-rw-r--r--arch/x86/kernel/module_32.c6
-rw-r--r--arch/x86/kernel/module_64.c32
-rw-r--r--arch/x86/kernel/mpparse.c143
-rw-r--r--arch/x86/kernel/msr.c2
-rw-r--r--arch/x86/kernel/nmi.c10
-rw-r--r--arch/x86/kernel/process_32.c5
-rw-r--r--arch/x86/kernel/process_64.c22
-rw-r--r--arch/x86/kernel/reboot.c1
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/setup_percpu.c208
-rw-r--r--arch/x86/kernel/smpboot.c69
-rw-r--r--arch/x86/kernel/smpcommon.c10
-rw-r--r--arch/x86/kernel/tlb_32.c85
-rw-r--r--arch/x86/kernel/tlb_64.c76
-rw-r--r--arch/x86/kernel/tlb_uv.c16
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S9
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S22
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c2
37 files changed, 694 insertions, 683 deletions
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index d37593c2f43..4cb5964f149 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -912,8 +912,8 @@ static u8 __init uniq_ioapic_id(u8 id)
912 DECLARE_BITMAP(used, 256); 912 DECLARE_BITMAP(used, 256);
913 bitmap_zero(used, 256); 913 bitmap_zero(used, 256);
914 for (i = 0; i < nr_ioapics; i++) { 914 for (i = 0; i < nr_ioapics; i++) {
915 struct mp_config_ioapic *ia = &mp_ioapics[i]; 915 struct mpc_ioapic *ia = &mp_ioapics[i];
916 __set_bit(ia->mp_apicid, used); 916 __set_bit(ia->apicid, used);
917 } 917 }
918 if (!test_bit(id, used)) 918 if (!test_bit(id, used))
919 return id; 919 return id;
@@ -945,47 +945,47 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
945 945
946 idx = nr_ioapics; 946 idx = nr_ioapics;
947 947
948 mp_ioapics[idx].mp_type = MP_IOAPIC; 948 mp_ioapics[idx].type = MP_IOAPIC;
949 mp_ioapics[idx].mp_flags = MPC_APIC_USABLE; 949 mp_ioapics[idx].flags = MPC_APIC_USABLE;
950 mp_ioapics[idx].mp_apicaddr = address; 950 mp_ioapics[idx].apicaddr = address;
951 951
952 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); 952 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
953 mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id); 953 mp_ioapics[idx].apicid = uniq_ioapic_id(id);
954#ifdef CONFIG_X86_32 954#ifdef CONFIG_X86_32
955 mp_ioapics[idx].mp_apicver = io_apic_get_version(idx); 955 mp_ioapics[idx].apicver = io_apic_get_version(idx);
956#else 956#else
957 mp_ioapics[idx].mp_apicver = 0; 957 mp_ioapics[idx].apicver = 0;
958#endif 958#endif
959 /* 959 /*
960 * Build basic GSI lookup table to facilitate gsi->io_apic lookups 960 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
961 * and to prevent reprogramming of IOAPIC pins (PCI GSIs). 961 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
962 */ 962 */
963 mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid; 963 mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].apicid;
964 mp_ioapic_routing[idx].gsi_base = gsi_base; 964 mp_ioapic_routing[idx].gsi_base = gsi_base;
965 mp_ioapic_routing[idx].gsi_end = gsi_base + 965 mp_ioapic_routing[idx].gsi_end = gsi_base +
966 io_apic_get_redir_entries(idx); 966 io_apic_get_redir_entries(idx);
967 967
968 printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " 968 printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
969 "GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid, 969 "GSI %d-%d\n", idx, mp_ioapics[idx].apicid,
970 mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr, 970 mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr,
971 mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); 971 mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
972 972
973 nr_ioapics++; 973 nr_ioapics++;
974} 974}
975 975
976static void assign_to_mp_irq(struct mp_config_intsrc *m, 976static void assign_to_mp_irq(struct mpc_intsrc *m,
977 struct mp_config_intsrc *mp_irq) 977 struct mpc_intsrc *mp_irq)
978{ 978{
979 memcpy(mp_irq, m, sizeof(struct mp_config_intsrc)); 979 memcpy(mp_irq, m, sizeof(struct mpc_intsrc));
980} 980}
981 981
982static int mp_irq_cmp(struct mp_config_intsrc *mp_irq, 982static int mp_irq_cmp(struct mpc_intsrc *mp_irq,
983 struct mp_config_intsrc *m) 983 struct mpc_intsrc *m)
984{ 984{
985 return memcmp(mp_irq, m, sizeof(struct mp_config_intsrc)); 985 return memcmp(mp_irq, m, sizeof(struct mpc_intsrc));
986} 986}
987 987
988static void save_mp_irq(struct mp_config_intsrc *m) 988static void save_mp_irq(struct mpc_intsrc *m)
989{ 989{
990 int i; 990 int i;
991 991
@@ -1003,7 +1003,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
1003{ 1003{
1004 int ioapic; 1004 int ioapic;
1005 int pin; 1005 int pin;
1006 struct mp_config_intsrc mp_irq; 1006 struct mpc_intsrc mp_irq;
1007 1007
1008 /* 1008 /*
1009 * Convert 'gsi' to 'ioapic.pin'. 1009 * Convert 'gsi' to 'ioapic.pin'.
@@ -1021,13 +1021,13 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
1021 if ((bus_irq == 0) && (trigger == 3)) 1021 if ((bus_irq == 0) && (trigger == 3))
1022 trigger = 1; 1022 trigger = 1;
1023 1023
1024 mp_irq.mp_type = MP_INTSRC; 1024 mp_irq.type = MP_INTSRC;
1025 mp_irq.mp_irqtype = mp_INT; 1025 mp_irq.irqtype = mp_INT;
1026 mp_irq.mp_irqflag = (trigger << 2) | polarity; 1026 mp_irq.irqflag = (trigger << 2) | polarity;
1027 mp_irq.mp_srcbus = MP_ISA_BUS; 1027 mp_irq.srcbus = MP_ISA_BUS;
1028 mp_irq.mp_srcbusirq = bus_irq; /* IRQ */ 1028 mp_irq.srcbusirq = bus_irq; /* IRQ */
1029 mp_irq.mp_dstapic = mp_ioapics[ioapic].mp_apicid; /* APIC ID */ 1029 mp_irq.dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */
1030 mp_irq.mp_dstirq = pin; /* INTIN# */ 1030 mp_irq.dstirq = pin; /* INTIN# */
1031 1031
1032 save_mp_irq(&mp_irq); 1032 save_mp_irq(&mp_irq);
1033} 1033}
@@ -1037,7 +1037,7 @@ void __init mp_config_acpi_legacy_irqs(void)
1037 int i; 1037 int i;
1038 int ioapic; 1038 int ioapic;
1039 unsigned int dstapic; 1039 unsigned int dstapic;
1040 struct mp_config_intsrc mp_irq; 1040 struct mpc_intsrc mp_irq;
1041 1041
1042#if defined (CONFIG_MCA) || defined (CONFIG_EISA) 1042#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
1043 /* 1043 /*
@@ -1062,7 +1062,7 @@ void __init mp_config_acpi_legacy_irqs(void)
1062 ioapic = mp_find_ioapic(0); 1062 ioapic = mp_find_ioapic(0);
1063 if (ioapic < 0) 1063 if (ioapic < 0)
1064 return; 1064 return;
1065 dstapic = mp_ioapics[ioapic].mp_apicid; 1065 dstapic = mp_ioapics[ioapic].apicid;
1066 1066
1067 /* 1067 /*
1068 * Use the default configuration for the IRQs 0-15. Unless 1068 * Use the default configuration for the IRQs 0-15. Unless
@@ -1072,16 +1072,14 @@ void __init mp_config_acpi_legacy_irqs(void)
1072 int idx; 1072 int idx;
1073 1073
1074 for (idx = 0; idx < mp_irq_entries; idx++) { 1074 for (idx = 0; idx < mp_irq_entries; idx++) {
1075 struct mp_config_intsrc *irq = mp_irqs + idx; 1075 struct mpc_intsrc *irq = mp_irqs + idx;
1076 1076
1077 /* Do we already have a mapping for this ISA IRQ? */ 1077 /* Do we already have a mapping for this ISA IRQ? */
1078 if (irq->mp_srcbus == MP_ISA_BUS 1078 if (irq->srcbus == MP_ISA_BUS && irq->srcbusirq == i)
1079 && irq->mp_srcbusirq == i)
1080 break; 1079 break;
1081 1080
1082 /* Do we already have a mapping for this IOAPIC pin */ 1081 /* Do we already have a mapping for this IOAPIC pin */
1083 if (irq->mp_dstapic == dstapic && 1082 if (irq->dstapic == dstapic && irq->dstirq == i)
1084 irq->mp_dstirq == i)
1085 break; 1083 break;
1086 } 1084 }
1087 1085
@@ -1090,13 +1088,13 @@ void __init mp_config_acpi_legacy_irqs(void)
1090 continue; /* IRQ already used */ 1088 continue; /* IRQ already used */
1091 } 1089 }
1092 1090
1093 mp_irq.mp_type = MP_INTSRC; 1091 mp_irq.type = MP_INTSRC;
1094 mp_irq.mp_irqflag = 0; /* Conforming */ 1092 mp_irq.irqflag = 0; /* Conforming */
1095 mp_irq.mp_srcbus = MP_ISA_BUS; 1093 mp_irq.srcbus = MP_ISA_BUS;
1096 mp_irq.mp_dstapic = dstapic; 1094 mp_irq.dstapic = dstapic;
1097 mp_irq.mp_irqtype = mp_INT; 1095 mp_irq.irqtype = mp_INT;
1098 mp_irq.mp_srcbusirq = i; /* Identity mapped */ 1096 mp_irq.srcbusirq = i; /* Identity mapped */
1099 mp_irq.mp_dstirq = i; 1097 mp_irq.dstirq = i;
1100 1098
1101 save_mp_irq(&mp_irq); 1099 save_mp_irq(&mp_irq);
1102 } 1100 }
@@ -1207,22 +1205,22 @@ int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
1207 u32 gsi, int triggering, int polarity) 1205 u32 gsi, int triggering, int polarity)
1208{ 1206{
1209#ifdef CONFIG_X86_MPPARSE 1207#ifdef CONFIG_X86_MPPARSE
1210 struct mp_config_intsrc mp_irq; 1208 struct mpc_intsrc mp_irq;
1211 int ioapic; 1209 int ioapic;
1212 1210
1213 if (!acpi_ioapic) 1211 if (!acpi_ioapic)
1214 return 0; 1212 return 0;
1215 1213
1216 /* print the entry should happen on mptable identically */ 1214 /* print the entry should happen on mptable identically */
1217 mp_irq.mp_type = MP_INTSRC; 1215 mp_irq.type = MP_INTSRC;
1218 mp_irq.mp_irqtype = mp_INT; 1216 mp_irq.irqtype = mp_INT;
1219 mp_irq.mp_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | 1217 mp_irq.irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
1220 (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); 1218 (polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
1221 mp_irq.mp_srcbus = number; 1219 mp_irq.srcbus = number;
1222 mp_irq.mp_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); 1220 mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
1223 ioapic = mp_find_ioapic(gsi); 1221 ioapic = mp_find_ioapic(gsi);
1224 mp_irq.mp_dstapic = mp_ioapic_routing[ioapic].apic_id; 1222 mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id;
1225 mp_irq.mp_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base; 1223 mp_irq.dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base;
1226 1224
1227 save_mp_irq(&mp_irq); 1225 save_mp_irq(&mp_irq);
1228#endif 1226#endif
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 707c1f6f95f..4abff454c55 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -101,6 +101,7 @@ int acpi_save_state_mem(void)
101 stack_start.sp = temp_stack + sizeof(temp_stack); 101 stack_start.sp = temp_stack + sizeof(temp_stack);
102 early_gdt_descr.address = 102 early_gdt_descr.address =
103 (unsigned long)get_cpu_gdt_table(smp_processor_id()); 103 (unsigned long)get_cpu_gdt_table(smp_processor_id());
104 initial_gs = per_cpu_offset(smp_processor_id());
104#endif 105#endif
105 initial_code = (unsigned long)wakeup_long64; 106 initial_code = (unsigned long)wakeup_long64;
106 saved_magic = 0x123456789abcdef0; 107 saved_magic = 0x123456789abcdef0;
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index d2d17b8d10f..e9af14f748e 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -48,6 +48,7 @@
48#include <asm/proto.h> 48#include <asm/proto.h>
49#include <asm/apic.h> 49#include <asm/apic.h>
50#include <asm/i8259.h> 50#include <asm/i8259.h>
51#include <asm/smp.h>
51 52
52#include <mach_apic.h> 53#include <mach_apic.h>
53#include <mach_apicdef.h> 54#include <mach_apicdef.h>
@@ -895,6 +896,10 @@ void disable_local_APIC(void)
895{ 896{
896 unsigned int value; 897 unsigned int value;
897 898
899 /* APIC hasn't been mapped yet */
900 if (!apic_phys)
901 return;
902
898 clear_local_APIC(); 903 clear_local_APIC();
899 904
900 /* 905 /*
@@ -1126,6 +1131,11 @@ void __cpuinit setup_local_APIC(void)
1126 unsigned int value; 1131 unsigned int value;
1127 int i, j; 1132 int i, j;
1128 1133
1134 if (disable_apic) {
1135 disable_ioapic_setup();
1136 return;
1137 }
1138
1129#ifdef CONFIG_X86_32 1139#ifdef CONFIG_X86_32
1130 /* Pound the ESR really hard over the head with a big hammer - mbligh */ 1140 /* Pound the ESR really hard over the head with a big hammer - mbligh */
1131 if (lapic_is_integrated() && esr_disable) { 1141 if (lapic_is_integrated() && esr_disable) {
@@ -1567,11 +1577,11 @@ int apic_version[MAX_APICS];
1567 1577
1568int __init APIC_init_uniprocessor(void) 1578int __init APIC_init_uniprocessor(void)
1569{ 1579{
1570#ifdef CONFIG_X86_64
1571 if (disable_apic) { 1580 if (disable_apic) {
1572 pr_info("Apic disabled\n"); 1581 pr_info("Apic disabled\n");
1573 return -1; 1582 return -1;
1574 } 1583 }
1584#ifdef CONFIG_X86_64
1575 if (!cpu_has_apic) { 1585 if (!cpu_has_apic) {
1576 disable_apic = 1; 1586 disable_apic = 1;
1577 pr_info("Apic disabled by BIOS\n"); 1587 pr_info("Apic disabled by BIOS\n");
@@ -1869,17 +1879,8 @@ void __cpuinit generic_processor_info(int apicid, int version)
1869#endif 1879#endif
1870 1880
1871#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) 1881#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
1872 /* are we being called early in kernel startup? */ 1882 early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
1873 if (early_per_cpu_ptr(x86_cpu_to_apicid)) { 1883 early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1874 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
1875 u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
1876
1877 cpu_to_apicid[cpu] = apicid;
1878 bios_cpu_apicid[cpu] = apicid;
1879 } else {
1880 per_cpu(x86_cpu_to_apicid, cpu) = apicid;
1881 per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1882 }
1883#endif 1884#endif
1884 1885
1885 set_cpu_possible(cpu, true); 1886 set_cpu_possible(cpu, true);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 1d41d3f1edb..64c834a39aa 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -49,13 +49,7 @@ int main(void)
49 BLANK(); 49 BLANK();
50#undef ENTRY 50#undef ENTRY
51#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) 51#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
52 ENTRY(kernelstack); 52 DEFINE(pda_size, sizeof(struct x8664_pda));
53 ENTRY(oldrsp);
54 ENTRY(pcurrent);
55 ENTRY(irqcount);
56 ENTRY(cpunumber);
57 ENTRY(irqstackptr);
58 ENTRY(data_offset);
59 BLANK(); 53 BLANK();
60#undef ENTRY 54#undef ENTRY
61#ifdef CONFIG_PARAVIRT 55#ifdef CONFIG_PARAVIRT
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 667e5d561ed..95eb30e1e67 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -22,6 +22,8 @@
22#include <asm/asm.h> 22#include <asm/asm.h>
23#include <asm/numa.h> 23#include <asm/numa.h>
24#include <asm/smp.h> 24#include <asm/smp.h>
25#include <asm/cpu.h>
26#include <asm/cpumask.h>
25#ifdef CONFIG_X86_LOCAL_APIC 27#ifdef CONFIG_X86_LOCAL_APIC
26#include <asm/mpspec.h> 28#include <asm/mpspec.h>
27#include <asm/apic.h> 29#include <asm/apic.h>
@@ -879,54 +881,34 @@ static __init int setup_disablecpuid(char *arg)
879__setup("clearcpuid=", setup_disablecpuid); 881__setup("clearcpuid=", setup_disablecpuid);
880 882
881#ifdef CONFIG_X86_64 883#ifdef CONFIG_X86_64
882struct x8664_pda **_cpu_pda __read_mostly;
883EXPORT_SYMBOL(_cpu_pda);
884
885struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; 884struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
886 885
887static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; 886DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack);
887#ifdef CONFIG_SMP
888DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */
889#else
890DEFINE_PER_CPU(char *, irq_stack_ptr) =
891 per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64;
892#endif
893
894DEFINE_PER_CPU(unsigned long, kernel_stack) =
895 (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
896EXPORT_PER_CPU_SYMBOL(kernel_stack);
897
898DEFINE_PER_CPU(unsigned int, irq_count) = -1;
888 899
889void __cpuinit pda_init(int cpu) 900void __cpuinit pda_init(int cpu)
890{ 901{
891 struct x8664_pda *pda = cpu_pda(cpu);
892
893 /* Setup up data that may be needed in __get_free_pages early */ 902 /* Setup up data that may be needed in __get_free_pages early */
894 loadsegment(fs, 0); 903 loadsegment(fs, 0);
895 loadsegment(gs, 0); 904 loadsegment(gs, 0);
896 /* Memory clobbers used to order PDA accessed */
897 mb();
898 wrmsrl(MSR_GS_BASE, pda);
899 mb();
900
901 pda->cpunumber = cpu;
902 pda->irqcount = -1;
903 pda->kernelstack = (unsigned long)stack_thread_info() -
904 PDA_STACKOFFSET + THREAD_SIZE;
905 pda->active_mm = &init_mm;
906 pda->mmu_state = 0;
907
908 if (cpu == 0) {
909 /* others are initialized in smpboot.c */
910 pda->pcurrent = &init_task;
911 pda->irqstackptr = boot_cpu_stack;
912 pda->irqstackptr += IRQSTACKSIZE - 64;
913 } else {
914 if (!pda->irqstackptr) {
915 pda->irqstackptr = (char *)
916 __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
917 if (!pda->irqstackptr)
918 panic("cannot allocate irqstack for cpu %d",
919 cpu);
920 pda->irqstackptr += IRQSTACKSIZE - 64;
921 }
922 905
923 if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) 906 load_pda_offset(cpu);
924 pda->nodenumber = cpu_to_node(cpu);
925 }
926} 907}
927 908
928static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + 909static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
929 DEBUG_STKSZ] __page_aligned_bss; 910 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
911 __aligned(PAGE_SIZE);
930 912
931extern asmlinkage void ignore_sysret(void); 913extern asmlinkage void ignore_sysret(void);
932 914
@@ -984,15 +966,18 @@ void __cpuinit cpu_init(void)
984 struct tss_struct *t = &per_cpu(init_tss, cpu); 966 struct tss_struct *t = &per_cpu(init_tss, cpu);
985 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); 967 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
986 unsigned long v; 968 unsigned long v;
987 char *estacks = NULL;
988 struct task_struct *me; 969 struct task_struct *me;
989 int i; 970 int i;
990 971
991 /* CPU 0 is initialised in head64.c */ 972 /* CPU 0 is initialised in head64.c */
992 if (cpu != 0) 973 if (cpu != 0)
993 pda_init(cpu); 974 pda_init(cpu);
994 else 975
995 estacks = boot_exception_stacks; 976#ifdef CONFIG_NUMA
977 if (cpu != 0 && percpu_read(node_number) == 0 &&
978 cpu_to_node(cpu) != NUMA_NO_NODE)
979 percpu_write(node_number, cpu_to_node(cpu));
980#endif
996 981
997 me = current; 982 me = current;
998 983
@@ -1026,18 +1011,13 @@ void __cpuinit cpu_init(void)
1026 * set up and load the per-CPU TSS 1011 * set up and load the per-CPU TSS
1027 */ 1012 */
1028 if (!orig_ist->ist[0]) { 1013 if (!orig_ist->ist[0]) {
1029 static const unsigned int order[N_EXCEPTION_STACKS] = { 1014 static const unsigned int sizes[N_EXCEPTION_STACKS] = {
1030 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, 1015 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
1031 [DEBUG_STACK - 1] = DEBUG_STACK_ORDER 1016 [DEBUG_STACK - 1] = DEBUG_STKSZ
1032 }; 1017 };
1018 char *estacks = per_cpu(exception_stacks, cpu);
1033 for (v = 0; v < N_EXCEPTION_STACKS; v++) { 1019 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
1034 if (cpu) { 1020 estacks += sizes[v];
1035 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
1036 if (!estacks)
1037 panic("Cannot allocate exception "
1038 "stack %ld %d\n", v, cpu);
1039 }
1040 estacks += PAGE_SIZE << order[v];
1041 orig_ist->ist[v] = t->x86_tss.ist[v] = 1021 orig_ist->ist[v] = t->x86_tss.ist[v] =
1042 (unsigned long)estacks; 1022 (unsigned long)estacks;
1043 } 1023 }
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 06fcd8f9323..8f3c95c7e61 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -150,9 +150,8 @@ struct drv_cmd {
150 u32 val; 150 u32 val;
151}; 151};
152 152
153static long do_drv_read(void *_cmd) 153static void do_drv_read(struct drv_cmd *cmd)
154{ 154{
155 struct drv_cmd *cmd = _cmd;
156 u32 h; 155 u32 h;
157 156
158 switch (cmd->type) { 157 switch (cmd->type) {
@@ -167,12 +166,10 @@ static long do_drv_read(void *_cmd)
167 default: 166 default:
168 break; 167 break;
169 } 168 }
170 return 0;
171} 169}
172 170
173static long do_drv_write(void *_cmd) 171static void do_drv_write(struct drv_cmd *cmd)
174{ 172{
175 struct drv_cmd *cmd = _cmd;
176 u32 lo, hi; 173 u32 lo, hi;
177 174
178 switch (cmd->type) { 175 switch (cmd->type) {
@@ -189,23 +186,30 @@ static long do_drv_write(void *_cmd)
189 default: 186 default:
190 break; 187 break;
191 } 188 }
192 return 0;
193} 189}
194 190
195static void drv_read(struct drv_cmd *cmd) 191static void drv_read(struct drv_cmd *cmd)
196{ 192{
193 cpumask_t saved_mask = current->cpus_allowed;
197 cmd->val = 0; 194 cmd->val = 0;
198 195
199 work_on_cpu(cpumask_any(cmd->mask), do_drv_read, cmd); 196 set_cpus_allowed_ptr(current, cmd->mask);
197 do_drv_read(cmd);
198 set_cpus_allowed_ptr(current, &saved_mask);
200} 199}
201 200
202static void drv_write(struct drv_cmd *cmd) 201static void drv_write(struct drv_cmd *cmd)
203{ 202{
203 cpumask_t saved_mask = current->cpus_allowed;
204 unsigned int i; 204 unsigned int i;
205 205
206 for_each_cpu(i, cmd->mask) { 206 for_each_cpu(i, cmd->mask) {
207 work_on_cpu(i, do_drv_write, cmd); 207 set_cpus_allowed_ptr(current, cpumask_of(i));
208 do_drv_write(cmd);
208 } 209 }
210
211 set_cpus_allowed_ptr(current, &saved_mask);
212 return;
209} 213}
210 214
211static u32 get_cur_val(const struct cpumask *mask) 215static u32 get_cur_val(const struct cpumask *mask)
@@ -231,15 +235,8 @@ static u32 get_cur_val(const struct cpumask *mask)
231 return 0; 235 return 0;
232 } 236 }
233 237
234 if (unlikely(!alloc_cpumask_var(&cmd.mask, GFP_KERNEL)))
235 return 0;
236
237 cpumask_copy(cmd.mask, mask);
238
239 drv_read(&cmd); 238 drv_read(&cmd);
240 239
241 free_cpumask_var(cmd.mask);
242
243 dprintk("get_cur_val = %u\n", cmd.val); 240 dprintk("get_cur_val = %u\n", cmd.val);
244 241
245 return cmd.val; 242 return cmd.val;
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 48533d77be7..58527a9fc40 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -132,7 +132,16 @@ struct _cpuid4_info {
132 union _cpuid4_leaf_ecx ecx; 132 union _cpuid4_leaf_ecx ecx;
133 unsigned long size; 133 unsigned long size;
134 unsigned long can_disable; 134 unsigned long can_disable;
135 cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ 135 DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
136};
137
138/* subset of above _cpuid4_info w/o shared_cpu_map */
139struct _cpuid4_info_regs {
140 union _cpuid4_leaf_eax eax;
141 union _cpuid4_leaf_ebx ebx;
142 union _cpuid4_leaf_ecx ecx;
143 unsigned long size;
144 unsigned long can_disable;
136}; 145};
137 146
138#ifdef CONFIG_PCI 147#ifdef CONFIG_PCI
@@ -263,7 +272,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
263} 272}
264 273
265static void __cpuinit 274static void __cpuinit
266amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) 275amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
267{ 276{
268 if (index < 3) 277 if (index < 3)
269 return; 278 return;
@@ -271,7 +280,8 @@ amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
271} 280}
272 281
273static int 282static int
274__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) 283__cpuinit cpuid4_cache_lookup_regs(int index,
284 struct _cpuid4_info_regs *this_leaf)
275{ 285{
276 union _cpuid4_leaf_eax eax; 286 union _cpuid4_leaf_eax eax;
277 union _cpuid4_leaf_ebx ebx; 287 union _cpuid4_leaf_ebx ebx;
@@ -299,6 +309,15 @@ __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
299 return 0; 309 return 0;
300} 310}
301 311
312static int
313__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
314{
315 struct _cpuid4_info_regs *leaf_regs =
316 (struct _cpuid4_info_regs *)this_leaf;
317
318 return cpuid4_cache_lookup_regs(index, leaf_regs);
319}
320
302static int __cpuinit find_num_cache_leaves(void) 321static int __cpuinit find_num_cache_leaves(void)
303{ 322{
304 unsigned int eax, ebx, ecx, edx; 323 unsigned int eax, ebx, ecx, edx;
@@ -338,11 +357,10 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
338 * parameters cpuid leaf to find the cache details 357 * parameters cpuid leaf to find the cache details
339 */ 358 */
340 for (i = 0; i < num_cache_leaves; i++) { 359 for (i = 0; i < num_cache_leaves; i++) {
341 struct _cpuid4_info this_leaf; 360 struct _cpuid4_info_regs this_leaf;
342
343 int retval; 361 int retval;
344 362
345 retval = cpuid4_cache_lookup(i, &this_leaf); 363 retval = cpuid4_cache_lookup_regs(i, &this_leaf);
346 if (retval >= 0) { 364 if (retval >= 0) {
347 switch(this_leaf.eax.split.level) { 365 switch(this_leaf.eax.split.level) {
348 case 1: 366 case 1:
@@ -491,17 +509,20 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
491 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; 509 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
492 510
493 if (num_threads_sharing == 1) 511 if (num_threads_sharing == 1)
494 cpu_set(cpu, this_leaf->shared_cpu_map); 512 cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
495 else { 513 else {
496 index_msb = get_count_order(num_threads_sharing); 514 index_msb = get_count_order(num_threads_sharing);
497 515
498 for_each_online_cpu(i) { 516 for_each_online_cpu(i) {
499 if (cpu_data(i).apicid >> index_msb == 517 if (cpu_data(i).apicid >> index_msb ==
500 c->apicid >> index_msb) { 518 c->apicid >> index_msb) {
501 cpu_set(i, this_leaf->shared_cpu_map); 519 cpumask_set_cpu(i,
520 to_cpumask(this_leaf->shared_cpu_map));
502 if (i != cpu && per_cpu(cpuid4_info, i)) { 521 if (i != cpu && per_cpu(cpuid4_info, i)) {
503 sibling_leaf = CPUID4_INFO_IDX(i, index); 522 sibling_leaf =
504 cpu_set(cpu, sibling_leaf->shared_cpu_map); 523 CPUID4_INFO_IDX(i, index);
524 cpumask_set_cpu(cpu, to_cpumask(
525 sibling_leaf->shared_cpu_map));
505 } 526 }
506 } 527 }
507 } 528 }
@@ -513,9 +534,10 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
513 int sibling; 534 int sibling;
514 535
515 this_leaf = CPUID4_INFO_IDX(cpu, index); 536 this_leaf = CPUID4_INFO_IDX(cpu, index);
516 for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) { 537 for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
517 sibling_leaf = CPUID4_INFO_IDX(sibling, index); 538 sibling_leaf = CPUID4_INFO_IDX(sibling, index);
518 cpu_clear(cpu, sibling_leaf->shared_cpu_map); 539 cpumask_clear_cpu(cpu,
540 to_cpumask(sibling_leaf->shared_cpu_map));
519 } 541 }
520} 542}
521#else 543#else
@@ -620,8 +642,9 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
620 int n = 0; 642 int n = 0;
621 643
622 if (len > 1) { 644 if (len > 1) {
623 cpumask_t *mask = &this_leaf->shared_cpu_map; 645 const struct cpumask *mask;
624 646
647 mask = to_cpumask(this_leaf->shared_cpu_map);
625 n = type? 648 n = type?
626 cpulist_scnprintf(buf, len-2, mask) : 649 cpulist_scnprintf(buf, len-2, mask) :
627 cpumask_scnprintf(buf, len-2, mask); 650 cpumask_scnprintf(buf, len-2, mask);
@@ -684,7 +707,8 @@ static struct pci_dev *get_k8_northbridge(int node)
684 707
685static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) 708static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
686{ 709{
687 int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); 710 const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
711 int node = cpu_to_node(cpumask_first(mask));
688 struct pci_dev *dev = NULL; 712 struct pci_dev *dev = NULL;
689 ssize_t ret = 0; 713 ssize_t ret = 0;
690 int i; 714 int i;
@@ -718,7 +742,8 @@ static ssize_t
718store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, 742store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
719 size_t count) 743 size_t count)
720{ 744{
721 int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); 745 const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
746 int node = cpu_to_node(cpumask_first(mask));
722 struct pci_dev *dev = NULL; 747 struct pci_dev *dev = NULL;
723 unsigned int ret, index, val; 748 unsigned int ret, index, val;
724 749
@@ -863,7 +888,7 @@ err_out:
863 return -ENOMEM; 888 return -ENOMEM;
864} 889}
865 890
866static cpumask_t cache_dev_map = CPU_MASK_NONE; 891static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
867 892
868/* Add/Remove cache interface for CPU device */ 893/* Add/Remove cache interface for CPU device */
869static int __cpuinit cache_add_dev(struct sys_device * sys_dev) 894static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
@@ -903,7 +928,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
903 } 928 }
904 kobject_uevent(&(this_object->kobj), KOBJ_ADD); 929 kobject_uevent(&(this_object->kobj), KOBJ_ADD);
905 } 930 }
906 cpu_set(cpu, cache_dev_map); 931 cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
907 932
908 kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); 933 kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD);
909 return 0; 934 return 0;
@@ -916,9 +941,9 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
916 941
917 if (per_cpu(cpuid4_info, cpu) == NULL) 942 if (per_cpu(cpuid4_info, cpu) == NULL)
918 return; 943 return;
919 if (!cpu_isset(cpu, cache_dev_map)) 944 if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
920 return; 945 return;
921 cpu_clear(cpu, cache_dev_map); 946 cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
922 947
923 for (i = 0; i < num_cache_leaves; i++) 948 for (i = 0; i < num_cache_leaves; i++)
924 kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); 949 kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 8ae8c4ff094..4772e91e824 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -67,7 +67,7 @@ static struct threshold_block threshold_defaults = {
67struct threshold_bank { 67struct threshold_bank {
68 struct kobject *kobj; 68 struct kobject *kobj;
69 struct threshold_block *blocks; 69 struct threshold_block *blocks;
70 cpumask_t cpus; 70 cpumask_var_t cpus;
71}; 71};
72static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); 72static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
73 73
@@ -481,7 +481,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
481 481
482#ifdef CONFIG_SMP 482#ifdef CONFIG_SMP
483 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ 483 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
484 i = first_cpu(per_cpu(cpu_core_map, cpu)); 484 i = cpumask_first(&per_cpu(cpu_core_map, cpu));
485 485
486 /* first core not up yet */ 486 /* first core not up yet */
487 if (cpu_data(i).cpu_core_id) 487 if (cpu_data(i).cpu_core_id)
@@ -501,7 +501,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
501 if (err) 501 if (err)
502 goto out; 502 goto out;
503 503
504 b->cpus = per_cpu(cpu_core_map, cpu); 504 cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu));
505 per_cpu(threshold_banks, cpu)[bank] = b; 505 per_cpu(threshold_banks, cpu)[bank] = b;
506 goto out; 506 goto out;
507 } 507 }
@@ -512,15 +512,20 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
512 err = -ENOMEM; 512 err = -ENOMEM;
513 goto out; 513 goto out;
514 } 514 }
515 if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) {
516 kfree(b);
517 err = -ENOMEM;
518 goto out;
519 }
515 520
516 b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj); 521 b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj);
517 if (!b->kobj) 522 if (!b->kobj)
518 goto out_free; 523 goto out_free;
519 524
520#ifndef CONFIG_SMP 525#ifndef CONFIG_SMP
521 b->cpus = CPU_MASK_ALL; 526 cpumask_setall(b->cpus);
522#else 527#else
523 b->cpus = per_cpu(cpu_core_map, cpu); 528 cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu));
524#endif 529#endif
525 530
526 per_cpu(threshold_banks, cpu)[bank] = b; 531 per_cpu(threshold_banks, cpu)[bank] = b;
@@ -529,7 +534,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
529 if (err) 534 if (err)
530 goto out_free; 535 goto out_free;
531 536
532 for_each_cpu_mask_nr(i, b->cpus) { 537 for_each_cpu(i, b->cpus) {
533 if (i == cpu) 538 if (i == cpu)
534 continue; 539 continue;
535 540
@@ -545,6 +550,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
545 550
546out_free: 551out_free:
547 per_cpu(threshold_banks, cpu)[bank] = NULL; 552 per_cpu(threshold_banks, cpu)[bank] = NULL;
553 free_cpumask_var(b->cpus);
548 kfree(b); 554 kfree(b);
549out: 555out:
550 return err; 556 return err;
@@ -619,7 +625,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
619#endif 625#endif
620 626
621 /* remove all sibling symlinks before unregistering */ 627 /* remove all sibling symlinks before unregistering */
622 for_each_cpu_mask_nr(i, b->cpus) { 628 for_each_cpu(i, b->cpus) {
623 if (i == cpu) 629 if (i == cpu)
624 continue; 630 continue;
625 631
@@ -632,6 +638,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
632free_out: 638free_out:
633 kobject_del(b->kobj); 639 kobject_del(b->kobj);
634 kobject_put(b->kobj); 640 kobject_put(b->kobj);
641 free_cpumask_var(b->cpus);
635 kfree(b); 642 kfree(b);
636 per_cpu(threshold_banks, cpu)[bank] = NULL; 643 per_cpu(threshold_banks, cpu)[bank] = NULL;
637} 644}
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index c689d19e35a..11b93cabdf7 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -24,7 +24,7 @@
24#include <asm/apic.h> 24#include <asm/apic.h>
25#include <asm/hpet.h> 25#include <asm/hpet.h>
26#include <linux/kdebug.h> 26#include <linux/kdebug.h>
27#include <asm/smp.h> 27#include <asm/cpu.h>
28#include <asm/reboot.h> 28#include <asm/reboot.h>
29#include <asm/virtext.h> 29#include <asm/virtext.h>
30 30
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index c302d070704..d35db5993fd 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -106,7 +106,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
106 const struct stacktrace_ops *ops, void *data) 106 const struct stacktrace_ops *ops, void *data)
107{ 107{
108 const unsigned cpu = get_cpu(); 108 const unsigned cpu = get_cpu();
109 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; 109 unsigned long *irq_stack_end =
110 (unsigned long *)per_cpu(irq_stack_ptr, cpu);
110 unsigned used = 0; 111 unsigned used = 0;
111 struct thread_info *tinfo; 112 struct thread_info *tinfo;
112 int graph = 0; 113 int graph = 0;
@@ -160,23 +161,23 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
160 stack = (unsigned long *) estack_end[-2]; 161 stack = (unsigned long *) estack_end[-2];
161 continue; 162 continue;
162 } 163 }
163 if (irqstack_end) { 164 if (irq_stack_end) {
164 unsigned long *irqstack; 165 unsigned long *irq_stack;
165 irqstack = irqstack_end - 166 irq_stack = irq_stack_end -
166 (IRQSTACKSIZE - 64) / sizeof(*irqstack); 167 (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack);
167 168
168 if (stack >= irqstack && stack < irqstack_end) { 169 if (stack >= irq_stack && stack < irq_stack_end) {
169 if (ops->stack(data, "IRQ") < 0) 170 if (ops->stack(data, "IRQ") < 0)
170 break; 171 break;
171 bp = print_context_stack(tinfo, stack, bp, 172 bp = print_context_stack(tinfo, stack, bp,
172 ops, data, irqstack_end, &graph); 173 ops, data, irq_stack_end, &graph);
173 /* 174 /*
174 * We link to the next stack (which would be 175 * We link to the next stack (which would be
175 * the process stack normally) the last 176 * the process stack normally) the last
176 * pointer (index -1 to end) in the IRQ stack: 177 * pointer (index -1 to end) in the IRQ stack:
177 */ 178 */
178 stack = (unsigned long *) (irqstack_end[-1]); 179 stack = (unsigned long *) (irq_stack_end[-1]);
179 irqstack_end = NULL; 180 irq_stack_end = NULL;
180 ops->stack(data, "EOI"); 181 ops->stack(data, "EOI");
181 continue; 182 continue;
182 } 183 }
@@ -199,10 +200,10 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
199 unsigned long *stack; 200 unsigned long *stack;
200 int i; 201 int i;
201 const int cpu = smp_processor_id(); 202 const int cpu = smp_processor_id();
202 unsigned long *irqstack_end = 203 unsigned long *irq_stack_end =
203 (unsigned long *) (cpu_pda(cpu)->irqstackptr); 204 (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
204 unsigned long *irqstack = 205 unsigned long *irq_stack =
205 (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); 206 (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
206 207
207 /* 208 /*
208 * debugging aid: "show_stack(NULL, NULL);" prints the 209 * debugging aid: "show_stack(NULL, NULL);" prints the
@@ -218,9 +219,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
218 219
219 stack = sp; 220 stack = sp;
220 for (i = 0; i < kstack_depth_to_print; i++) { 221 for (i = 0; i < kstack_depth_to_print; i++) {
221 if (stack >= irqstack && stack <= irqstack_end) { 222 if (stack >= irq_stack && stack <= irq_stack_end) {
222 if (stack == irqstack_end) { 223 if (stack == irq_stack_end) {
223 stack = (unsigned long *) (irqstack_end[-1]); 224 stack = (unsigned long *) (irq_stack_end[-1]);
224 printk(" <EOI> "); 225 printk(" <EOI> ");
225 } 226 }
226 } else { 227 } else {
@@ -241,7 +242,7 @@ void show_registers(struct pt_regs *regs)
241 int i; 242 int i;
242 unsigned long sp; 243 unsigned long sp;
243 const int cpu = smp_processor_id(); 244 const int cpu = smp_processor_id();
244 struct task_struct *cur = cpu_pda(cpu)->pcurrent; 245 struct task_struct *cur = current;
245 246
246 sp = regs->sp; 247 sp = regs->sp;
247 printk("CPU %d ", cpu); 248 printk("CPU %d ", cpu);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index d6f0490a739..46469029e9d 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1203,7 +1203,6 @@ nmi_stack_correct:
1203 pushl %eax 1203 pushl %eax
1204 CFI_ADJUST_CFA_OFFSET 4 1204 CFI_ADJUST_CFA_OFFSET 4
1205 SAVE_ALL 1205 SAVE_ALL
1206 TRACE_IRQS_OFF
1207 xorl %edx,%edx # zero error code 1206 xorl %edx,%edx # zero error code
1208 movl %esp,%eax # pt_regs pointer 1207 movl %esp,%eax # pt_regs pointer
1209 call do_nmi 1208 call do_nmi
@@ -1244,7 +1243,6 @@ nmi_espfix_stack:
1244 pushl %eax 1243 pushl %eax
1245 CFI_ADJUST_CFA_OFFSET 4 1244 CFI_ADJUST_CFA_OFFSET 4
1246 SAVE_ALL 1245 SAVE_ALL
1247 TRACE_IRQS_OFF
1248 FIXUP_ESPFIX_STACK # %eax == %esp 1246 FIXUP_ESPFIX_STACK # %eax == %esp
1249 xorl %edx,%edx # zero error code 1247 xorl %edx,%edx # zero error code
1250 call do_nmi 1248 call do_nmi
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1954a966220..c092e7d2686 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -52,6 +52,7 @@
52#include <asm/irqflags.h> 52#include <asm/irqflags.h>
53#include <asm/paravirt.h> 53#include <asm/paravirt.h>
54#include <asm/ftrace.h> 54#include <asm/ftrace.h>
55#include <asm/percpu.h>
55 56
56/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 57/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
57#include <linux/elf-em.h> 58#include <linux/elf-em.h>
@@ -209,7 +210,7 @@ ENTRY(native_usergs_sysret64)
209 210
210 /* %rsp:at FRAMEEND */ 211 /* %rsp:at FRAMEEND */
211 .macro FIXUP_TOP_OF_STACK tmp offset=0 212 .macro FIXUP_TOP_OF_STACK tmp offset=0
212 movq %gs:pda_oldrsp,\tmp 213 movq PER_CPU_VAR(old_rsp),\tmp
213 movq \tmp,RSP+\offset(%rsp) 214 movq \tmp,RSP+\offset(%rsp)
214 movq $__USER_DS,SS+\offset(%rsp) 215 movq $__USER_DS,SS+\offset(%rsp)
215 movq $__USER_CS,CS+\offset(%rsp) 216 movq $__USER_CS,CS+\offset(%rsp)
@@ -220,7 +221,7 @@ ENTRY(native_usergs_sysret64)
220 221
221 .macro RESTORE_TOP_OF_STACK tmp offset=0 222 .macro RESTORE_TOP_OF_STACK tmp offset=0
222 movq RSP+\offset(%rsp),\tmp 223 movq RSP+\offset(%rsp),\tmp
223 movq \tmp,%gs:pda_oldrsp 224 movq \tmp,PER_CPU_VAR(old_rsp)
224 movq EFLAGS+\offset(%rsp),\tmp 225 movq EFLAGS+\offset(%rsp),\tmp
225 movq \tmp,R11+\offset(%rsp) 226 movq \tmp,R11+\offset(%rsp)
226 .endm 227 .endm
@@ -336,15 +337,15 @@ ENTRY(save_args)
336 je 1f 337 je 1f
337 SWAPGS 338 SWAPGS
338 /* 339 /*
339 * irqcount is used to check if a CPU is already on an interrupt stack 340 * irq_count is used to check if a CPU is already on an interrupt stack
340 * or not. While this is essentially redundant with preempt_count it is 341 * or not. While this is essentially redundant with preempt_count it is
341 * a little cheaper to use a separate counter in the PDA (short of 342 * a little cheaper to use a separate counter in the PDA (short of
342 * moving irq_enter into assembly, which would be too much work) 343 * moving irq_enter into assembly, which would be too much work)
343 */ 344 */
3441: incl %gs:pda_irqcount 3451: incl PER_CPU_VAR(irq_count)
345 jne 2f 346 jne 2f
346 popq_cfi %rax /* move return address... */ 347 popq_cfi %rax /* move return address... */
347 mov %gs:pda_irqstackptr,%rsp 348 mov PER_CPU_VAR(irq_stack_ptr),%rsp
348 EMPTY_FRAME 0 349 EMPTY_FRAME 0
349 pushq_cfi %rax /* ... to the new stack */ 350 pushq_cfi %rax /* ... to the new stack */
350 /* 351 /*
@@ -467,7 +468,7 @@ END(ret_from_fork)
467ENTRY(system_call) 468ENTRY(system_call)
468 CFI_STARTPROC simple 469 CFI_STARTPROC simple
469 CFI_SIGNAL_FRAME 470 CFI_SIGNAL_FRAME
470 CFI_DEF_CFA rsp,PDA_STACKOFFSET 471 CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
471 CFI_REGISTER rip,rcx 472 CFI_REGISTER rip,rcx
472 /*CFI_REGISTER rflags,r11*/ 473 /*CFI_REGISTER rflags,r11*/
473 SWAPGS_UNSAFE_STACK 474 SWAPGS_UNSAFE_STACK
@@ -478,8 +479,8 @@ ENTRY(system_call)
478 */ 479 */
479ENTRY(system_call_after_swapgs) 480ENTRY(system_call_after_swapgs)
480 481
481 movq %rsp,%gs:pda_oldrsp 482 movq %rsp,PER_CPU_VAR(old_rsp)
482 movq %gs:pda_kernelstack,%rsp 483 movq PER_CPU_VAR(kernel_stack),%rsp
483 /* 484 /*
484 * No need to follow this irqs off/on section - it's straight 485 * No need to follow this irqs off/on section - it's straight
485 * and short: 486 * and short:
@@ -522,7 +523,7 @@ sysret_check:
522 CFI_REGISTER rip,rcx 523 CFI_REGISTER rip,rcx
523 RESTORE_ARGS 0,-ARG_SKIP,1 524 RESTORE_ARGS 0,-ARG_SKIP,1
524 /*CFI_REGISTER rflags,r11*/ 525 /*CFI_REGISTER rflags,r11*/
525 movq %gs:pda_oldrsp, %rsp 526 movq PER_CPU_VAR(old_rsp), %rsp
526 USERGS_SYSRET64 527 USERGS_SYSRET64
527 528
528 CFI_RESTORE_STATE 529 CFI_RESTORE_STATE
@@ -832,11 +833,11 @@ common_interrupt:
832 XCPT_FRAME 833 XCPT_FRAME
833 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ 834 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
834 interrupt do_IRQ 835 interrupt do_IRQ
835 /* 0(%rsp): oldrsp-ARGOFFSET */ 836 /* 0(%rsp): old_rsp-ARGOFFSET */
836ret_from_intr: 837ret_from_intr:
837 DISABLE_INTERRUPTS(CLBR_NONE) 838 DISABLE_INTERRUPTS(CLBR_NONE)
838 TRACE_IRQS_OFF 839 TRACE_IRQS_OFF
839 decl %gs:pda_irqcount 840 decl PER_CPU_VAR(irq_count)
840 leaveq 841 leaveq
841 CFI_DEF_CFA_REGISTER rsp 842 CFI_DEF_CFA_REGISTER rsp
842 CFI_ADJUST_CFA_OFFSET -8 843 CFI_ADJUST_CFA_OFFSET -8
@@ -1077,10 +1078,10 @@ ENTRY(\sym)
1077 TRACE_IRQS_OFF 1078 TRACE_IRQS_OFF
1078 movq %rsp,%rdi /* pt_regs pointer */ 1079 movq %rsp,%rdi /* pt_regs pointer */
1079 xorl %esi,%esi /* no error code */ 1080 xorl %esi,%esi /* no error code */
1080 movq %gs:pda_data_offset, %rbp 1081 PER_CPU(init_tss, %rbp)
1081 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) 1082 subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
1082 call \do_sym 1083 call \do_sym
1083 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) 1084 addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
1084 jmp paranoid_exit /* %ebx: no swapgs flag */ 1085 jmp paranoid_exit /* %ebx: no swapgs flag */
1085 CFI_ENDPROC 1086 CFI_ENDPROC
1086END(\sym) 1087END(\sym)
@@ -1264,14 +1265,14 @@ ENTRY(call_softirq)
1264 CFI_REL_OFFSET rbp,0 1265 CFI_REL_OFFSET rbp,0
1265 mov %rsp,%rbp 1266 mov %rsp,%rbp
1266 CFI_DEF_CFA_REGISTER rbp 1267 CFI_DEF_CFA_REGISTER rbp
1267 incl %gs:pda_irqcount 1268 incl PER_CPU_VAR(irq_count)
1268 cmove %gs:pda_irqstackptr,%rsp 1269 cmove PER_CPU_VAR(irq_stack_ptr),%rsp
1269 push %rbp # backlink for old unwinder 1270 push %rbp # backlink for old unwinder
1270 call __do_softirq 1271 call __do_softirq
1271 leaveq 1272 leaveq
1272 CFI_DEF_CFA_REGISTER rsp 1273 CFI_DEF_CFA_REGISTER rsp
1273 CFI_ADJUST_CFA_OFFSET -8 1274 CFI_ADJUST_CFA_OFFSET -8
1274 decl %gs:pda_irqcount 1275 decl PER_CPU_VAR(irq_count)
1275 ret 1276 ret
1276 CFI_ENDPROC 1277 CFI_ENDPROC
1277END(call_softirq) 1278END(call_softirq)
@@ -1301,15 +1302,15 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1301 movq %rdi, %rsp # we don't return, adjust the stack frame 1302 movq %rdi, %rsp # we don't return, adjust the stack frame
1302 CFI_ENDPROC 1303 CFI_ENDPROC
1303 DEFAULT_FRAME 1304 DEFAULT_FRAME
130411: incl %gs:pda_irqcount 130511: incl PER_CPU_VAR(irq_count)
1305 movq %rsp,%rbp 1306 movq %rsp,%rbp
1306 CFI_DEF_CFA_REGISTER rbp 1307 CFI_DEF_CFA_REGISTER rbp
1307 cmovzq %gs:pda_irqstackptr,%rsp 1308 cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
1308 pushq %rbp # backlink for old unwinder 1309 pushq %rbp # backlink for old unwinder
1309 call xen_evtchn_do_upcall 1310 call xen_evtchn_do_upcall
1310 popq %rsp 1311 popq %rsp
1311 CFI_DEF_CFA_REGISTER rsp 1312 CFI_DEF_CFA_REGISTER rsp
1312 decl %gs:pda_irqcount 1313 decl PER_CPU_VAR(irq_count)
1313 jmp error_exit 1314 jmp error_exit
1314 CFI_ENDPROC 1315 CFI_ENDPROC
1315END(do_hypervisor_callback) 1316END(do_hypervisor_callback)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index b9a4d8c4b93..af67d3227ea 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -26,27 +26,6 @@
26#include <asm/bios_ebda.h> 26#include <asm/bios_ebda.h>
27#include <asm/trampoline.h> 27#include <asm/trampoline.h>
28 28
29/* boot cpu pda */
30static struct x8664_pda _boot_cpu_pda;
31
32#ifdef CONFIG_SMP
33/*
34 * We install an empty cpu_pda pointer table to indicate to early users
35 * (numa_set_node) that the cpu_pda pointer table for cpus other than
36 * the boot cpu is not yet setup.
37 */
38static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata;
39#else
40static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
41#endif
42
43void __init x86_64_init_pda(void)
44{
45 _cpu_pda = __cpu_pda;
46 cpu_pda(0) = &_boot_cpu_pda;
47 pda_init(0);
48}
49
50static void __init zap_identity_mappings(void) 29static void __init zap_identity_mappings(void)
51{ 30{
52 pgd_t *pgd = pgd_offset_k(0UL); 31 pgd_t *pgd = pgd_offset_k(0UL);
@@ -112,7 +91,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
112 if (console_loglevel == 10) 91 if (console_loglevel == 10)
113 early_printk("Kernel alive\n"); 92 early_printk("Kernel alive\n");
114 93
115 x86_64_init_pda(); 94 pda_init(0);
116 95
117 x86_64_start_reservations(real_mode_data); 96 x86_64_start_reservations(real_mode_data);
118} 97}
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 0e275d49556..c8ace880661 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -19,6 +19,7 @@
19#include <asm/msr.h> 19#include <asm/msr.h>
20#include <asm/cache.h> 20#include <asm/cache.h>
21#include <asm/processor-flags.h> 21#include <asm/processor-flags.h>
22#include <asm/percpu.h>
22 23
23#ifdef CONFIG_PARAVIRT 24#ifdef CONFIG_PARAVIRT
24#include <asm/asm-offsets.h> 25#include <asm/asm-offsets.h>
@@ -204,6 +205,23 @@ ENTRY(secondary_startup_64)
204 pushq $0 205 pushq $0
205 popfq 206 popfq
206 207
208#ifdef CONFIG_SMP
209 /*
210 * early_gdt_base should point to the gdt_page in static percpu init
211 * data area. Computing this requires two symbols - __per_cpu_load
212 * and per_cpu__gdt_page. As linker can't do no such relocation, do
213 * it by hand. As early_gdt_descr is manipulated by C code for
214 * secondary CPUs, this should be done only once for the boot CPU
215 * when early_gdt_descr_base contains zero.
216 */
217 movq early_gdt_descr_base(%rip), %rax
218 testq %rax, %rax
219 jnz 1f
220 movq $__per_cpu_load, %rax
221 addq $per_cpu__gdt_page, %rax
222 movq %rax, early_gdt_descr_base(%rip)
2231:
224#endif
207 /* 225 /*
208 * We must switch to a new descriptor in kernel space for the GDT 226 * We must switch to a new descriptor in kernel space for the GDT
209 * because soon the kernel won't have access anymore to the userspace 227 * because soon the kernel won't have access anymore to the userspace
@@ -226,12 +244,18 @@ ENTRY(secondary_startup_64)
226 movl %eax,%fs 244 movl %eax,%fs
227 movl %eax,%gs 245 movl %eax,%gs
228 246
229 /* 247 /* Set up %gs.
230 * Setup up a dummy PDA. this is just for some early bootup code 248 *
231 * that does in_interrupt() 249 * On SMP, %gs should point to the per-cpu area. For initial
232 */ 250 * boot, make %gs point to the init data section. For a
251 * secondary CPU,initial_gs should be set to its pda address
252 * before the CPU runs this code.
253 *
254 * On UP, initial_gs points to PER_CPU_VAR(__pda) and doesn't
255 * change.
256 */
233 movl $MSR_GS_BASE,%ecx 257 movl $MSR_GS_BASE,%ecx
234 movq $empty_zero_page,%rax 258 movq initial_gs(%rip),%rax
235 movq %rax,%rdx 259 movq %rax,%rdx
236 shrq $32,%rdx 260 shrq $32,%rdx
237 wrmsr 261 wrmsr
@@ -257,6 +281,12 @@ ENTRY(secondary_startup_64)
257 .align 8 281 .align 8
258 ENTRY(initial_code) 282 ENTRY(initial_code)
259 .quad x86_64_start_kernel 283 .quad x86_64_start_kernel
284 ENTRY(initial_gs)
285#ifdef CONFIG_SMP
286 .quad __per_cpu_load
287#else
288 .quad PER_CPU_VAR(__pda)
289#endif
260 __FINITDATA 290 __FINITDATA
261 291
262 ENTRY(stack_start) 292 ENTRY(stack_start)
@@ -401,7 +431,12 @@ NEXT_PAGE(level2_spare_pgt)
401 .globl early_gdt_descr 431 .globl early_gdt_descr
402early_gdt_descr: 432early_gdt_descr:
403 .word GDT_ENTRIES*8-1 433 .word GDT_ENTRIES*8-1
404 .quad per_cpu__gdt_page 434#ifdef CONFIG_SMP
435early_gdt_descr_base:
436 .quad 0x0000000000000000
437#else
438 .quad per_cpu__gdt_page
439#endif
405 440
406ENTRY(phys_base) 441ENTRY(phys_base)
407 /* This must match the first entry in level2_kernel_pgt */ 442 /* This must match the first entry in level2_kernel_pgt */
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 1c4a1302536..f7966039072 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -46,6 +46,7 @@
46#include <asm/idle.h> 46#include <asm/idle.h>
47#include <asm/io.h> 47#include <asm/io.h>
48#include <asm/smp.h> 48#include <asm/smp.h>
49#include <asm/cpu.h>
49#include <asm/desc.h> 50#include <asm/desc.h>
50#include <asm/proto.h> 51#include <asm/proto.h>
51#include <asm/acpi.h> 52#include <asm/acpi.h>
@@ -82,11 +83,11 @@ static DEFINE_SPINLOCK(vector_lock);
82int nr_ioapic_registers[MAX_IO_APICS]; 83int nr_ioapic_registers[MAX_IO_APICS];
83 84
84/* I/O APIC entries */ 85/* I/O APIC entries */
85struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; 86struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
86int nr_ioapics; 87int nr_ioapics;
87 88
88/* MP IRQ source entries */ 89/* MP IRQ source entries */
89struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; 90struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
90 91
91/* # of MP IRQ source entries */ 92/* # of MP IRQ source entries */
92int mp_irq_entries; 93int mp_irq_entries;
@@ -356,7 +357,7 @@ set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
356 357
357 if (!cfg->move_in_progress) { 358 if (!cfg->move_in_progress) {
358 /* it means that domain is not changed */ 359 /* it means that domain is not changed */
359 if (!cpumask_intersects(&desc->affinity, mask)) 360 if (!cpumask_intersects(desc->affinity, mask))
360 cfg->move_desc_pending = 1; 361 cfg->move_desc_pending = 1;
361 } 362 }
362} 363}
@@ -386,7 +387,7 @@ struct io_apic {
386static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) 387static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
387{ 388{
388 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) 389 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
389 + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); 390 + (mp_ioapics[idx].apicaddr & ~PAGE_MASK);
390} 391}
391 392
392static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) 393static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
@@ -579,9 +580,9 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
579 if (assign_irq_vector(irq, cfg, mask)) 580 if (assign_irq_vector(irq, cfg, mask))
580 return BAD_APICID; 581 return BAD_APICID;
581 582
582 cpumask_and(&desc->affinity, cfg->domain, mask); 583 cpumask_and(desc->affinity, cfg->domain, mask);
583 set_extra_move_desc(desc, mask); 584 set_extra_move_desc(desc, mask);
584 return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask); 585 return cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask);
585} 586}
586 587
587static void 588static void
@@ -944,10 +945,10 @@ static int find_irq_entry(int apic, int pin, int type)
944 int i; 945 int i;
945 946
946 for (i = 0; i < mp_irq_entries; i++) 947 for (i = 0; i < mp_irq_entries; i++)
947 if (mp_irqs[i].mp_irqtype == type && 948 if (mp_irqs[i].irqtype == type &&
948 (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || 949 (mp_irqs[i].dstapic == mp_ioapics[apic].apicid ||
949 mp_irqs[i].mp_dstapic == MP_APIC_ALL) && 950 mp_irqs[i].dstapic == MP_APIC_ALL) &&
950 mp_irqs[i].mp_dstirq == pin) 951 mp_irqs[i].dstirq == pin)
951 return i; 952 return i;
952 953
953 return -1; 954 return -1;
@@ -961,13 +962,13 @@ static int __init find_isa_irq_pin(int irq, int type)
961 int i; 962 int i;
962 963
963 for (i = 0; i < mp_irq_entries; i++) { 964 for (i = 0; i < mp_irq_entries; i++) {
964 int lbus = mp_irqs[i].mp_srcbus; 965 int lbus = mp_irqs[i].srcbus;
965 966
966 if (test_bit(lbus, mp_bus_not_pci) && 967 if (test_bit(lbus, mp_bus_not_pci) &&
967 (mp_irqs[i].mp_irqtype == type) && 968 (mp_irqs[i].irqtype == type) &&
968 (mp_irqs[i].mp_srcbusirq == irq)) 969 (mp_irqs[i].srcbusirq == irq))
969 970
970 return mp_irqs[i].mp_dstirq; 971 return mp_irqs[i].dstirq;
971 } 972 }
972 return -1; 973 return -1;
973} 974}
@@ -977,17 +978,17 @@ static int __init find_isa_irq_apic(int irq, int type)
977 int i; 978 int i;
978 979
979 for (i = 0; i < mp_irq_entries; i++) { 980 for (i = 0; i < mp_irq_entries; i++) {
980 int lbus = mp_irqs[i].mp_srcbus; 981 int lbus = mp_irqs[i].srcbus;
981 982
982 if (test_bit(lbus, mp_bus_not_pci) && 983 if (test_bit(lbus, mp_bus_not_pci) &&
983 (mp_irqs[i].mp_irqtype == type) && 984 (mp_irqs[i].irqtype == type) &&
984 (mp_irqs[i].mp_srcbusirq == irq)) 985 (mp_irqs[i].srcbusirq == irq))
985 break; 986 break;
986 } 987 }
987 if (i < mp_irq_entries) { 988 if (i < mp_irq_entries) {
988 int apic; 989 int apic;
989 for(apic = 0; apic < nr_ioapics; apic++) { 990 for(apic = 0; apic < nr_ioapics; apic++) {
990 if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) 991 if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic)
991 return apic; 992 return apic;
992 } 993 }
993 } 994 }
@@ -1012,23 +1013,23 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
1012 return -1; 1013 return -1;
1013 } 1014 }
1014 for (i = 0; i < mp_irq_entries; i++) { 1015 for (i = 0; i < mp_irq_entries; i++) {
1015 int lbus = mp_irqs[i].mp_srcbus; 1016 int lbus = mp_irqs[i].srcbus;
1016 1017
1017 for (apic = 0; apic < nr_ioapics; apic++) 1018 for (apic = 0; apic < nr_ioapics; apic++)
1018 if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || 1019 if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
1019 mp_irqs[i].mp_dstapic == MP_APIC_ALL) 1020 mp_irqs[i].dstapic == MP_APIC_ALL)
1020 break; 1021 break;
1021 1022
1022 if (!test_bit(lbus, mp_bus_not_pci) && 1023 if (!test_bit(lbus, mp_bus_not_pci) &&
1023 !mp_irqs[i].mp_irqtype && 1024 !mp_irqs[i].irqtype &&
1024 (bus == lbus) && 1025 (bus == lbus) &&
1025 (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { 1026 (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
1026 int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); 1027 int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
1027 1028
1028 if (!(apic || IO_APIC_IRQ(irq))) 1029 if (!(apic || IO_APIC_IRQ(irq)))
1029 continue; 1030 continue;
1030 1031
1031 if (pin == (mp_irqs[i].mp_srcbusirq & 3)) 1032 if (pin == (mp_irqs[i].srcbusirq & 3))
1032 return irq; 1033 return irq;
1033 /* 1034 /*
1034 * Use the first all-but-pin matching entry as a 1035 * Use the first all-but-pin matching entry as a
@@ -1071,7 +1072,7 @@ static int EISA_ELCR(unsigned int irq)
1071 * EISA conforming in the MP table, that means its trigger type must 1072 * EISA conforming in the MP table, that means its trigger type must
1072 * be read in from the ELCR */ 1073 * be read in from the ELCR */
1073 1074
1074#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) 1075#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq))
1075#define default_EISA_polarity(idx) default_ISA_polarity(idx) 1076#define default_EISA_polarity(idx) default_ISA_polarity(idx)
1076 1077
1077/* PCI interrupts are always polarity one level triggered, 1078/* PCI interrupts are always polarity one level triggered,
@@ -1088,13 +1089,13 @@ static int EISA_ELCR(unsigned int irq)
1088 1089
1089static int MPBIOS_polarity(int idx) 1090static int MPBIOS_polarity(int idx)
1090{ 1091{
1091 int bus = mp_irqs[idx].mp_srcbus; 1092 int bus = mp_irqs[idx].srcbus;
1092 int polarity; 1093 int polarity;
1093 1094
1094 /* 1095 /*
1095 * Determine IRQ line polarity (high active or low active): 1096 * Determine IRQ line polarity (high active or low active):
1096 */ 1097 */
1097 switch (mp_irqs[idx].mp_irqflag & 3) 1098 switch (mp_irqs[idx].irqflag & 3)
1098 { 1099 {
1099 case 0: /* conforms, ie. bus-type dependent polarity */ 1100 case 0: /* conforms, ie. bus-type dependent polarity */
1100 if (test_bit(bus, mp_bus_not_pci)) 1101 if (test_bit(bus, mp_bus_not_pci))
@@ -1130,13 +1131,13 @@ static int MPBIOS_polarity(int idx)
1130 1131
1131static int MPBIOS_trigger(int idx) 1132static int MPBIOS_trigger(int idx)
1132{ 1133{
1133 int bus = mp_irqs[idx].mp_srcbus; 1134 int bus = mp_irqs[idx].srcbus;
1134 int trigger; 1135 int trigger;
1135 1136
1136 /* 1137 /*
1137 * Determine IRQ trigger mode (edge or level sensitive): 1138 * Determine IRQ trigger mode (edge or level sensitive):
1138 */ 1139 */
1139 switch ((mp_irqs[idx].mp_irqflag>>2) & 3) 1140 switch ((mp_irqs[idx].irqflag>>2) & 3)
1140 { 1141 {
1141 case 0: /* conforms, ie. bus-type dependent */ 1142 case 0: /* conforms, ie. bus-type dependent */
1142 if (test_bit(bus, mp_bus_not_pci)) 1143 if (test_bit(bus, mp_bus_not_pci))
@@ -1214,16 +1215,16 @@ int (*ioapic_renumber_irq)(int ioapic, int irq);
1214static int pin_2_irq(int idx, int apic, int pin) 1215static int pin_2_irq(int idx, int apic, int pin)
1215{ 1216{
1216 int irq, i; 1217 int irq, i;
1217 int bus = mp_irqs[idx].mp_srcbus; 1218 int bus = mp_irqs[idx].srcbus;
1218 1219
1219 /* 1220 /*
1220 * Debugging check, we are in big trouble if this message pops up! 1221 * Debugging check, we are in big trouble if this message pops up!
1221 */ 1222 */
1222 if (mp_irqs[idx].mp_dstirq != pin) 1223 if (mp_irqs[idx].dstirq != pin)
1223 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); 1224 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
1224 1225
1225 if (test_bit(bus, mp_bus_not_pci)) { 1226 if (test_bit(bus, mp_bus_not_pci)) {
1226 irq = mp_irqs[idx].mp_srcbusirq; 1227 irq = mp_irqs[idx].srcbusirq;
1227 } else { 1228 } else {
1228 /* 1229 /*
1229 * PCI IRQs are mapped in order 1230 * PCI IRQs are mapped in order
@@ -1566,14 +1567,14 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
1566 apic_printk(APIC_VERBOSE,KERN_DEBUG 1567 apic_printk(APIC_VERBOSE,KERN_DEBUG
1567 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " 1568 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
1568 "IRQ %d Mode:%i Active:%i)\n", 1569 "IRQ %d Mode:%i Active:%i)\n",
1569 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, 1570 apic, mp_ioapics[apic].apicid, pin, cfg->vector,
1570 irq, trigger, polarity); 1571 irq, trigger, polarity);
1571 1572
1572 1573
1573 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, 1574 if (setup_ioapic_entry(mp_ioapics[apic].apicid, irq, &entry,
1574 dest, trigger, polarity, cfg->vector)) { 1575 dest, trigger, polarity, cfg->vector)) {
1575 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", 1576 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
1576 mp_ioapics[apic].mp_apicid, pin); 1577 mp_ioapics[apic].apicid, pin);
1577 __clear_irq_vector(irq, cfg); 1578 __clear_irq_vector(irq, cfg);
1578 return; 1579 return;
1579 } 1580 }
@@ -1604,12 +1605,10 @@ static void __init setup_IO_APIC_irqs(void)
1604 notcon = 1; 1605 notcon = 1;
1605 apic_printk(APIC_VERBOSE, 1606 apic_printk(APIC_VERBOSE,
1606 KERN_DEBUG " %d-%d", 1607 KERN_DEBUG " %d-%d",
1607 mp_ioapics[apic].mp_apicid, 1608 mp_ioapics[apic].apicid, pin);
1608 pin);
1609 } else 1609 } else
1610 apic_printk(APIC_VERBOSE, " %d-%d", 1610 apic_printk(APIC_VERBOSE, " %d-%d",
1611 mp_ioapics[apic].mp_apicid, 1611 mp_ioapics[apic].apicid, pin);
1612 pin);
1613 continue; 1612 continue;
1614 } 1613 }
1615 if (notcon) { 1614 if (notcon) {
@@ -1699,7 +1698,7 @@ __apicdebuginit(void) print_IO_APIC(void)
1699 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); 1698 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
1700 for (i = 0; i < nr_ioapics; i++) 1699 for (i = 0; i < nr_ioapics; i++)
1701 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", 1700 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
1702 mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); 1701 mp_ioapics[i].apicid, nr_ioapic_registers[i]);
1703 1702
1704 /* 1703 /*
1705 * We are a bit conservative about what we expect. We have to 1704 * We are a bit conservative about what we expect. We have to
@@ -1719,7 +1718,7 @@ __apicdebuginit(void) print_IO_APIC(void)
1719 spin_unlock_irqrestore(&ioapic_lock, flags); 1718 spin_unlock_irqrestore(&ioapic_lock, flags);
1720 1719
1721 printk("\n"); 1720 printk("\n");
1722 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); 1721 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid);
1723 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); 1722 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
1724 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); 1723 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
1725 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); 1724 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
@@ -2121,14 +2120,14 @@ static void __init setup_ioapic_ids_from_mpc(void)
2121 reg_00.raw = io_apic_read(apic, 0); 2120 reg_00.raw = io_apic_read(apic, 0);
2122 spin_unlock_irqrestore(&ioapic_lock, flags); 2121 spin_unlock_irqrestore(&ioapic_lock, flags);
2123 2122
2124 old_id = mp_ioapics[apic].mp_apicid; 2123 old_id = mp_ioapics[apic].apicid;
2125 2124
2126 if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { 2125 if (mp_ioapics[apic].apicid >= get_physical_broadcast()) {
2127 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", 2126 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
2128 apic, mp_ioapics[apic].mp_apicid); 2127 apic, mp_ioapics[apic].apicid);
2129 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", 2128 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
2130 reg_00.bits.ID); 2129 reg_00.bits.ID);
2131 mp_ioapics[apic].mp_apicid = reg_00.bits.ID; 2130 mp_ioapics[apic].apicid = reg_00.bits.ID;
2132 } 2131 }
2133 2132
2134 /* 2133 /*
@@ -2137,9 +2136,9 @@ static void __init setup_ioapic_ids_from_mpc(void)
2137 * 'stuck on smp_invalidate_needed IPI wait' messages. 2136 * 'stuck on smp_invalidate_needed IPI wait' messages.
2138 */ 2137 */
2139 if (check_apicid_used(phys_id_present_map, 2138 if (check_apicid_used(phys_id_present_map,
2140 mp_ioapics[apic].mp_apicid)) { 2139 mp_ioapics[apic].apicid)) {
2141 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", 2140 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
2142 apic, mp_ioapics[apic].mp_apicid); 2141 apic, mp_ioapics[apic].apicid);
2143 for (i = 0; i < get_physical_broadcast(); i++) 2142 for (i = 0; i < get_physical_broadcast(); i++)
2144 if (!physid_isset(i, phys_id_present_map)) 2143 if (!physid_isset(i, phys_id_present_map))
2145 break; 2144 break;
@@ -2148,13 +2147,13 @@ static void __init setup_ioapic_ids_from_mpc(void)
2148 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", 2147 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
2149 i); 2148 i);
2150 physid_set(i, phys_id_present_map); 2149 physid_set(i, phys_id_present_map);
2151 mp_ioapics[apic].mp_apicid = i; 2150 mp_ioapics[apic].apicid = i;
2152 } else { 2151 } else {
2153 physid_mask_t tmp; 2152 physid_mask_t tmp;
2154 tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); 2153 tmp = apicid_to_cpu_present(mp_ioapics[apic].apicid);
2155 apic_printk(APIC_VERBOSE, "Setting %d in the " 2154 apic_printk(APIC_VERBOSE, "Setting %d in the "
2156 "phys_id_present_map\n", 2155 "phys_id_present_map\n",
2157 mp_ioapics[apic].mp_apicid); 2156 mp_ioapics[apic].apicid);
2158 physids_or(phys_id_present_map, phys_id_present_map, tmp); 2157 physids_or(phys_id_present_map, phys_id_present_map, tmp);
2159 } 2158 }
2160 2159
@@ -2163,11 +2162,11 @@ static void __init setup_ioapic_ids_from_mpc(void)
2163 * We need to adjust the IRQ routing table 2162 * We need to adjust the IRQ routing table
2164 * if the ID changed. 2163 * if the ID changed.
2165 */ 2164 */
2166 if (old_id != mp_ioapics[apic].mp_apicid) 2165 if (old_id != mp_ioapics[apic].apicid)
2167 for (i = 0; i < mp_irq_entries; i++) 2166 for (i = 0; i < mp_irq_entries; i++)
2168 if (mp_irqs[i].mp_dstapic == old_id) 2167 if (mp_irqs[i].dstapic == old_id)
2169 mp_irqs[i].mp_dstapic 2168 mp_irqs[i].dstapic
2170 = mp_ioapics[apic].mp_apicid; 2169 = mp_ioapics[apic].apicid;
2171 2170
2172 /* 2171 /*
2173 * Read the right value from the MPC table and 2172 * Read the right value from the MPC table and
@@ -2175,9 +2174,9 @@ static void __init setup_ioapic_ids_from_mpc(void)
2175 */ 2174 */
2176 apic_printk(APIC_VERBOSE, KERN_INFO 2175 apic_printk(APIC_VERBOSE, KERN_INFO
2177 "...changing IO-APIC physical APIC ID to %d ...", 2176 "...changing IO-APIC physical APIC ID to %d ...",
2178 mp_ioapics[apic].mp_apicid); 2177 mp_ioapics[apic].apicid);
2179 2178
2180 reg_00.bits.ID = mp_ioapics[apic].mp_apicid; 2179 reg_00.bits.ID = mp_ioapics[apic].apicid;
2181 spin_lock_irqsave(&ioapic_lock, flags); 2180 spin_lock_irqsave(&ioapic_lock, flags);
2182 io_apic_write(apic, 0, reg_00.raw); 2181 io_apic_write(apic, 0, reg_00.raw);
2183 spin_unlock_irqrestore(&ioapic_lock, flags); 2182 spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -2188,7 +2187,7 @@ static void __init setup_ioapic_ids_from_mpc(void)
2188 spin_lock_irqsave(&ioapic_lock, flags); 2187 spin_lock_irqsave(&ioapic_lock, flags);
2189 reg_00.raw = io_apic_read(apic, 0); 2188 reg_00.raw = io_apic_read(apic, 0);
2190 spin_unlock_irqrestore(&ioapic_lock, flags); 2189 spin_unlock_irqrestore(&ioapic_lock, flags);
2191 if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) 2190 if (reg_00.bits.ID != mp_ioapics[apic].apicid)
2192 printk("could not set ID!\n"); 2191 printk("could not set ID!\n");
2193 else 2192 else
2194 apic_printk(APIC_VERBOSE, " ok.\n"); 2193 apic_printk(APIC_VERBOSE, " ok.\n");
@@ -2383,7 +2382,7 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2383 if (cfg->move_in_progress) 2382 if (cfg->move_in_progress)
2384 send_cleanup_vector(cfg); 2383 send_cleanup_vector(cfg);
2385 2384
2386 cpumask_copy(&desc->affinity, mask); 2385 cpumask_copy(desc->affinity, mask);
2387} 2386}
2388 2387
2389static int migrate_irq_remapped_level_desc(struct irq_desc *desc) 2388static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
@@ -2405,11 +2404,11 @@ static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
2405 } 2404 }
2406 2405
2407 /* everthing is clear. we have right of way */ 2406 /* everthing is clear. we have right of way */
2408 migrate_ioapic_irq_desc(desc, &desc->pending_mask); 2407 migrate_ioapic_irq_desc(desc, desc->pending_mask);
2409 2408
2410 ret = 0; 2409 ret = 0;
2411 desc->status &= ~IRQ_MOVE_PENDING; 2410 desc->status &= ~IRQ_MOVE_PENDING;
2412 cpumask_clear(&desc->pending_mask); 2411 cpumask_clear(desc->pending_mask);
2413 2412
2414unmask: 2413unmask:
2415 unmask_IO_APIC_irq_desc(desc); 2414 unmask_IO_APIC_irq_desc(desc);
@@ -2434,7 +2433,7 @@ static void ir_irq_migration(struct work_struct *work)
2434 continue; 2433 continue;
2435 } 2434 }
2436 2435
2437 desc->chip->set_affinity(irq, &desc->pending_mask); 2436 desc->chip->set_affinity(irq, desc->pending_mask);
2438 spin_unlock_irqrestore(&desc->lock, flags); 2437 spin_unlock_irqrestore(&desc->lock, flags);
2439 } 2438 }
2440 } 2439 }
@@ -2448,7 +2447,7 @@ static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
2448{ 2447{
2449 if (desc->status & IRQ_LEVEL) { 2448 if (desc->status & IRQ_LEVEL) {
2450 desc->status |= IRQ_MOVE_PENDING; 2449 desc->status |= IRQ_MOVE_PENDING;
2451 cpumask_copy(&desc->pending_mask, mask); 2450 cpumask_copy(desc->pending_mask, mask);
2452 migrate_irq_remapped_level_desc(desc); 2451 migrate_irq_remapped_level_desc(desc);
2453 return; 2452 return;
2454 } 2453 }
@@ -2516,7 +2515,7 @@ static void irq_complete_move(struct irq_desc **descp)
2516 2515
2517 /* domain has not changed, but affinity did */ 2516 /* domain has not changed, but affinity did */
2518 me = smp_processor_id(); 2517 me = smp_processor_id();
2519 if (cpu_isset(me, desc->affinity)) { 2518 if (cpumask_test_cpu(me, desc->affinity)) {
2520 *descp = desc = move_irq_desc(desc, me); 2519 *descp = desc = move_irq_desc(desc, me);
2521 /* get the new one */ 2520 /* get the new one */
2522 cfg = desc->chip_data; 2521 cfg = desc->chip_data;
@@ -3117,8 +3116,8 @@ static int ioapic_resume(struct sys_device *dev)
3117 3116
3118 spin_lock_irqsave(&ioapic_lock, flags); 3117 spin_lock_irqsave(&ioapic_lock, flags);
3119 reg_00.raw = io_apic_read(dev->id, 0); 3118 reg_00.raw = io_apic_read(dev->id, 0);
3120 if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { 3119 if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) {
3121 reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; 3120 reg_00.bits.ID = mp_ioapics[dev->id].apicid;
3122 io_apic_write(dev->id, 0, reg_00.raw); 3121 io_apic_write(dev->id, 0, reg_00.raw);
3123 } 3122 }
3124 spin_unlock_irqrestore(&ioapic_lock, flags); 3123 spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -3183,7 +3182,7 @@ unsigned int create_irq_nr(unsigned int irq_want)
3183 3182
3184 irq = 0; 3183 irq = 0;
3185 spin_lock_irqsave(&vector_lock, flags); 3184 spin_lock_irqsave(&vector_lock, flags);
3186 for (new = irq_want; new < NR_IRQS; new++) { 3185 for (new = irq_want; new < nr_irqs; new++) {
3187 if (platform_legacy_irq(new)) 3186 if (platform_legacy_irq(new))
3188 continue; 3187 continue;
3189 3188
@@ -3258,6 +3257,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3258 int err; 3257 int err;
3259 unsigned dest; 3258 unsigned dest;
3260 3259
3260 if (disable_apic)
3261 return -ENXIO;
3262
3261 cfg = irq_cfg(irq); 3263 cfg = irq_cfg(irq);
3262 err = assign_irq_vector(irq, cfg, TARGET_CPUS); 3264 err = assign_irq_vector(irq, cfg, TARGET_CPUS);
3263 if (err) 3265 if (err)
@@ -3726,6 +3728,9 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3726 struct irq_cfg *cfg; 3728 struct irq_cfg *cfg;
3727 int err; 3729 int err;
3728 3730
3731 if (disable_apic)
3732 return -ENXIO;
3733
3729 cfg = irq_cfg(irq); 3734 cfg = irq_cfg(irq);
3730 err = assign_irq_vector(irq, cfg, TARGET_CPUS); 3735 err = assign_irq_vector(irq, cfg, TARGET_CPUS);
3731 if (!err) { 3736 if (!err) {
@@ -3850,6 +3855,22 @@ void __init probe_nr_irqs_gsi(void)
3850 nr_irqs_gsi = nr; 3855 nr_irqs_gsi = nr;
3851} 3856}
3852 3857
3858#ifdef CONFIG_SPARSE_IRQ
3859int __init arch_probe_nr_irqs(void)
3860{
3861 int nr;
3862
3863 nr = ((8 * nr_cpu_ids) > (32 * nr_ioapics) ?
3864 (NR_VECTORS + (8 * nr_cpu_ids)) :
3865 (NR_VECTORS + (32 * nr_ioapics)));
3866
3867 if (nr < nr_irqs && nr > nr_irqs_gsi)
3868 nr_irqs = nr;
3869
3870 return 0;
3871}
3872#endif
3873
3853/* -------------------------------------------------------------------------- 3874/* --------------------------------------------------------------------------
3854 ACPI-based IOAPIC Configuration 3875 ACPI-based IOAPIC Configuration
3855 -------------------------------------------------------------------------- */ 3876 -------------------------------------------------------------------------- */
@@ -3984,8 +4005,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
3984 return -1; 4005 return -1;
3985 4006
3986 for (i = 0; i < mp_irq_entries; i++) 4007 for (i = 0; i < mp_irq_entries; i++)
3987 if (mp_irqs[i].mp_irqtype == mp_INT && 4008 if (mp_irqs[i].irqtype == mp_INT &&
3988 mp_irqs[i].mp_srcbusirq == bus_irq) 4009 mp_irqs[i].srcbusirq == bus_irq)
3989 break; 4010 break;
3990 if (i >= mp_irq_entries) 4011 if (i >= mp_irq_entries)
3991 return -1; 4012 return -1;
@@ -4039,7 +4060,7 @@ void __init setup_ioapic_dest(void)
4039 */ 4060 */
4040 if (desc->status & 4061 if (desc->status &
4041 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) 4062 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
4042 mask = &desc->affinity; 4063 mask = desc->affinity;
4043 else 4064 else
4044 mask = TARGET_CPUS; 4065 mask = TARGET_CPUS;
4045 4066
@@ -4100,7 +4121,7 @@ void __init ioapic_init_mappings(void)
4100 ioapic_res = ioapic_setup_resources(); 4121 ioapic_res = ioapic_setup_resources();
4101 for (i = 0; i < nr_ioapics; i++) { 4122 for (i = 0; i < nr_ioapics; i++) {
4102 if (smp_found_config) { 4123 if (smp_found_config) {
4103 ioapic_phys = mp_ioapics[i].mp_apicaddr; 4124 ioapic_phys = mp_ioapics[i].apicaddr;
4104#ifdef CONFIG_X86_32 4125#ifdef CONFIG_X86_32
4105 if (!ioapic_phys) { 4126 if (!ioapic_phys) {
4106 printk(KERN_ERR 4127 printk(KERN_ERR
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 22f650db917..a6bca1d33a8 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -36,11 +36,7 @@ void ack_bad_irq(unsigned int irq)
36#endif 36#endif
37} 37}
38 38
39#ifdef CONFIG_X86_32 39#define irq_stats(x) (&per_cpu(irq_stat, x))
40# define irq_stats(x) (&per_cpu(irq_stat, x))
41#else
42# define irq_stats(x) cpu_pda(x)
43#endif
44/* 40/*
45 * /proc/interrupts printing: 41 * /proc/interrupts printing:
46 */ 42 */
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 74b9ff7341e..e0f29be8ab0 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -248,7 +248,7 @@ void fixup_irqs(void)
248 if (irq == 2) 248 if (irq == 2)
249 continue; 249 continue;
250 250
251 affinity = &desc->affinity; 251 affinity = desc->affinity;
252 if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { 252 if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
253 printk("Breaking affinity for irq %i\n", irq); 253 printk("Breaking affinity for irq %i\n", irq);
254 affinity = cpu_all_mask; 254 affinity = cpu_all_mask;
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 63c88e6ec02..1db05247b47 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -19,6 +19,9 @@
19#include <asm/io_apic.h> 19#include <asm/io_apic.h>
20#include <asm/idle.h> 20#include <asm/idle.h>
21 21
22DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
23EXPORT_PER_CPU_SYMBOL(irq_stat);
24
22/* 25/*
23 * Probabilistic stack overflow check: 26 * Probabilistic stack overflow check:
24 * 27 *
@@ -100,7 +103,7 @@ void fixup_irqs(void)
100 /* interrupt's are disabled at this point */ 103 /* interrupt's are disabled at this point */
101 spin_lock(&desc->lock); 104 spin_lock(&desc->lock);
102 105
103 affinity = &desc->affinity; 106 affinity = desc->affinity;
104 if (!irq_has_action(irq) || 107 if (!irq_has_action(irq) ||
105 cpumask_equal(affinity, cpu_online_mask)) { 108 cpumask_equal(affinity, cpu_online_mask)) {
106 spin_unlock(&desc->lock); 109 spin_unlock(&desc->lock);
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index b7f4c929e61..5e9f4fc5138 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -87,9 +87,9 @@
87#include <linux/cpu.h> 87#include <linux/cpu.h>
88#include <linux/firmware.h> 88#include <linux/firmware.h>
89#include <linux/platform_device.h> 89#include <linux/platform_device.h>
90#include <linux/uaccess.h>
90 91
91#include <asm/msr.h> 92#include <asm/msr.h>
92#include <asm/uaccess.h>
93#include <asm/processor.h> 93#include <asm/processor.h>
94#include <asm/microcode.h> 94#include <asm/microcode.h>
95 95
@@ -196,7 +196,7 @@ static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf)
196 return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1; 196 return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1;
197} 197}
198 198
199static inline int 199static inline int
200update_match_revision(struct microcode_header_intel *mc_header, int rev) 200update_match_revision(struct microcode_header_intel *mc_header, int rev)
201{ 201{
202 return (mc_header->rev <= rev) ? 0 : 1; 202 return (mc_header->rev <= rev) ? 0 : 1;
@@ -442,8 +442,8 @@ static int request_microcode_fw(int cpu, struct device *device)
442 return ret; 442 return ret;
443 } 443 }
444 444
445 ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size, 445 ret = generic_load_microcode(cpu, (void *)firmware->data,
446 &get_ucode_fw); 446 firmware->size, &get_ucode_fw);
447 447
448 release_firmware(firmware); 448 release_firmware(firmware);
449 449
@@ -460,7 +460,7 @@ static int request_microcode_user(int cpu, const void __user *buf, size_t size)
460 /* We should bind the task to the CPU */ 460 /* We should bind the task to the CPU */
461 BUG_ON(cpu != raw_smp_processor_id()); 461 BUG_ON(cpu != raw_smp_processor_id());
462 462
463 return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user); 463 return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user);
464} 464}
465 465
466static void microcode_fini_cpu(int cpu) 466static void microcode_fini_cpu(int cpu)
diff --git a/arch/x86/kernel/module_32.c b/arch/x86/kernel/module_32.c
index 3db0a5442eb..0edd819050e 100644
--- a/arch/x86/kernel/module_32.c
+++ b/arch/x86/kernel/module_32.c
@@ -42,7 +42,7 @@ void module_free(struct module *mod, void *module_region)
42{ 42{
43 vfree(module_region); 43 vfree(module_region);
44 /* FIXME: If module_region == mod->init_region, trim exception 44 /* FIXME: If module_region == mod->init_region, trim exception
45 table entries. */ 45 table entries. */
46} 46}
47 47
48/* We don't need anything special. */ 48/* We don't need anything special. */
@@ -113,13 +113,13 @@ int module_finalize(const Elf_Ehdr *hdr,
113 *para = NULL; 113 *para = NULL;
114 char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; 114 char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
115 115
116 for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { 116 for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
117 if (!strcmp(".text", secstrings + s->sh_name)) 117 if (!strcmp(".text", secstrings + s->sh_name))
118 text = s; 118 text = s;
119 if (!strcmp(".altinstructions", secstrings + s->sh_name)) 119 if (!strcmp(".altinstructions", secstrings + s->sh_name))
120 alt = s; 120 alt = s;
121 if (!strcmp(".smp_locks", secstrings + s->sh_name)) 121 if (!strcmp(".smp_locks", secstrings + s->sh_name))
122 locks= s; 122 locks = s;
123 if (!strcmp(".parainstructions", secstrings + s->sh_name)) 123 if (!strcmp(".parainstructions", secstrings + s->sh_name))
124 para = s; 124 para = s;
125 } 125 }
diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c
index 6ba87830d4b..c23880b90b5 100644
--- a/arch/x86/kernel/module_64.c
+++ b/arch/x86/kernel/module_64.c
@@ -30,14 +30,14 @@
30#include <asm/page.h> 30#include <asm/page.h>
31#include <asm/pgtable.h> 31#include <asm/pgtable.h>
32 32
33#define DEBUGP(fmt...) 33#define DEBUGP(fmt...)
34 34
35#ifndef CONFIG_UML 35#ifndef CONFIG_UML
36void module_free(struct module *mod, void *module_region) 36void module_free(struct module *mod, void *module_region)
37{ 37{
38 vfree(module_region); 38 vfree(module_region);
39 /* FIXME: If module_region == mod->init_region, trim exception 39 /* FIXME: If module_region == mod->init_region, trim exception
40 table entries. */ 40 table entries. */
41} 41}
42 42
43void *module_alloc(unsigned long size) 43void *module_alloc(unsigned long size)
@@ -77,7 +77,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
77 Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr; 77 Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr;
78 Elf64_Sym *sym; 78 Elf64_Sym *sym;
79 void *loc; 79 void *loc;
80 u64 val; 80 u64 val;
81 81
82 DEBUGP("Applying relocate section %u to %u\n", relsec, 82 DEBUGP("Applying relocate section %u to %u\n", relsec,
83 sechdrs[relsec].sh_info); 83 sechdrs[relsec].sh_info);
@@ -91,11 +91,11 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
91 sym = (Elf64_Sym *)sechdrs[symindex].sh_addr 91 sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
92 + ELF64_R_SYM(rel[i].r_info); 92 + ELF64_R_SYM(rel[i].r_info);
93 93
94 DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", 94 DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n",
95 (int)ELF64_R_TYPE(rel[i].r_info), 95 (int)ELF64_R_TYPE(rel[i].r_info),
96 sym->st_value, rel[i].r_addend, (u64)loc); 96 sym->st_value, rel[i].r_addend, (u64)loc);
97 97
98 val = sym->st_value + rel[i].r_addend; 98 val = sym->st_value + rel[i].r_addend;
99 99
100 switch (ELF64_R_TYPE(rel[i].r_info)) { 100 switch (ELF64_R_TYPE(rel[i].r_info)) {
101 case R_X86_64_NONE: 101 case R_X86_64_NONE:
@@ -113,16 +113,16 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
113 if ((s64)val != *(s32 *)loc) 113 if ((s64)val != *(s32 *)loc)
114 goto overflow; 114 goto overflow;
115 break; 115 break;
116 case R_X86_64_PC32: 116 case R_X86_64_PC32:
117 val -= (u64)loc; 117 val -= (u64)loc;
118 *(u32 *)loc = val; 118 *(u32 *)loc = val;
119#if 0 119#if 0
120 if ((s64)val != *(s32 *)loc) 120 if ((s64)val != *(s32 *)loc)
121 goto overflow; 121 goto overflow;
122#endif 122#endif
123 break; 123 break;
124 default: 124 default:
125 printk(KERN_ERR "module %s: Unknown rela relocation: %Lu\n", 125 printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n",
126 me->name, ELF64_R_TYPE(rel[i].r_info)); 126 me->name, ELF64_R_TYPE(rel[i].r_info));
127 return -ENOEXEC; 127 return -ENOEXEC;
128 } 128 }
@@ -130,7 +130,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
130 return 0; 130 return 0;
131 131
132overflow: 132overflow:
133 printk(KERN_ERR "overflow in relocation type %d val %Lx\n", 133 printk(KERN_ERR "overflow in relocation type %d val %Lx\n",
134 (int)ELF64_R_TYPE(rel[i].r_info), val); 134 (int)ELF64_R_TYPE(rel[i].r_info), val);
135 printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n", 135 printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n",
136 me->name); 136 me->name);
@@ -143,13 +143,13 @@ int apply_relocate(Elf_Shdr *sechdrs,
143 unsigned int relsec, 143 unsigned int relsec,
144 struct module *me) 144 struct module *me)
145{ 145{
146 printk("non add relocation not supported\n"); 146 printk(KERN_ERR "non add relocation not supported\n");
147 return -ENOSYS; 147 return -ENOSYS;
148} 148}
149 149
150int module_finalize(const Elf_Ehdr *hdr, 150int module_finalize(const Elf_Ehdr *hdr,
151 const Elf_Shdr *sechdrs, 151 const Elf_Shdr *sechdrs,
152 struct module *me) 152 struct module *me)
153{ 153{
154 const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, 154 const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
155 *para = NULL; 155 *para = NULL;
@@ -161,7 +161,7 @@ int module_finalize(const Elf_Ehdr *hdr,
161 if (!strcmp(".altinstructions", secstrings + s->sh_name)) 161 if (!strcmp(".altinstructions", secstrings + s->sh_name))
162 alt = s; 162 alt = s;
163 if (!strcmp(".smp_locks", secstrings + s->sh_name)) 163 if (!strcmp(".smp_locks", secstrings + s->sh_name))
164 locks= s; 164 locks = s;
165 if (!strcmp(".parainstructions", secstrings + s->sh_name)) 165 if (!strcmp(".parainstructions", secstrings + s->sh_name))
166 para = s; 166 para = s;
167 } 167 }
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index c0601c2848a..fa6bb263892 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -27,6 +27,7 @@
27#include <asm/e820.h> 27#include <asm/e820.h>
28#include <asm/trampoline.h> 28#include <asm/trampoline.h>
29#include <asm/setup.h> 29#include <asm/setup.h>
30#include <asm/smp.h>
30 31
31#include <mach_apic.h> 32#include <mach_apic.h>
32#ifdef CONFIG_X86_32 33#ifdef CONFIG_X86_32
@@ -143,11 +144,11 @@ static void __init MP_ioapic_info(struct mpc_ioapic *m)
143 if (bad_ioapic(m->apicaddr)) 144 if (bad_ioapic(m->apicaddr))
144 return; 145 return;
145 146
146 mp_ioapics[nr_ioapics].mp_apicaddr = m->apicaddr; 147 mp_ioapics[nr_ioapics].apicaddr = m->apicaddr;
147 mp_ioapics[nr_ioapics].mp_apicid = m->apicid; 148 mp_ioapics[nr_ioapics].apicid = m->apicid;
148 mp_ioapics[nr_ioapics].mp_type = m->type; 149 mp_ioapics[nr_ioapics].type = m->type;
149 mp_ioapics[nr_ioapics].mp_apicver = m->apicver; 150 mp_ioapics[nr_ioapics].apicver = m->apicver;
150 mp_ioapics[nr_ioapics].mp_flags = m->flags; 151 mp_ioapics[nr_ioapics].flags = m->flags;
151 nr_ioapics++; 152 nr_ioapics++;
152} 153}
153 154
@@ -159,55 +160,55 @@ static void print_MP_intsrc_info(struct mpc_intsrc *m)
159 m->srcbusirq, m->dstapic, m->dstirq); 160 m->srcbusirq, m->dstapic, m->dstirq);
160} 161}
161 162
162static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) 163static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq)
163{ 164{
164 apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," 165 apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
165 " IRQ %02x, APIC ID %x, APIC INT %02x\n", 166 " IRQ %02x, APIC ID %x, APIC INT %02x\n",
166 mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, 167 mp_irq->irqtype, mp_irq->irqflag & 3,
167 (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, 168 (mp_irq->irqflag >> 2) & 3, mp_irq->srcbus,
168 mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq); 169 mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq);
169} 170}
170 171
171static void __init assign_to_mp_irq(struct mpc_intsrc *m, 172static void __init assign_to_mp_irq(struct mpc_intsrc *m,
172 struct mp_config_intsrc *mp_irq) 173 struct mpc_intsrc *mp_irq)
173{ 174{
174 mp_irq->mp_dstapic = m->dstapic; 175 mp_irq->dstapic = m->dstapic;
175 mp_irq->mp_type = m->type; 176 mp_irq->type = m->type;
176 mp_irq->mp_irqtype = m->irqtype; 177 mp_irq->irqtype = m->irqtype;
177 mp_irq->mp_irqflag = m->irqflag; 178 mp_irq->irqflag = m->irqflag;
178 mp_irq->mp_srcbus = m->srcbus; 179 mp_irq->srcbus = m->srcbus;
179 mp_irq->mp_srcbusirq = m->srcbusirq; 180 mp_irq->srcbusirq = m->srcbusirq;
180 mp_irq->mp_dstirq = m->dstirq; 181 mp_irq->dstirq = m->dstirq;
181} 182}
182 183
183static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq, 184static void __init assign_to_mpc_intsrc(struct mpc_intsrc *mp_irq,
184 struct mpc_intsrc *m) 185 struct mpc_intsrc *m)
185{ 186{
186 m->dstapic = mp_irq->mp_dstapic; 187 m->dstapic = mp_irq->dstapic;
187 m->type = mp_irq->mp_type; 188 m->type = mp_irq->type;
188 m->irqtype = mp_irq->mp_irqtype; 189 m->irqtype = mp_irq->irqtype;
189 m->irqflag = mp_irq->mp_irqflag; 190 m->irqflag = mp_irq->irqflag;
190 m->srcbus = mp_irq->mp_srcbus; 191 m->srcbus = mp_irq->srcbus;
191 m->srcbusirq = mp_irq->mp_srcbusirq; 192 m->srcbusirq = mp_irq->srcbusirq;
192 m->dstirq = mp_irq->mp_dstirq; 193 m->dstirq = mp_irq->dstirq;
193} 194}
194 195
195static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq, 196static int __init mp_irq_mpc_intsrc_cmp(struct mpc_intsrc *mp_irq,
196 struct mpc_intsrc *m) 197 struct mpc_intsrc *m)
197{ 198{
198 if (mp_irq->mp_dstapic != m->dstapic) 199 if (mp_irq->dstapic != m->dstapic)
199 return 1; 200 return 1;
200 if (mp_irq->mp_type != m->type) 201 if (mp_irq->type != m->type)
201 return 2; 202 return 2;
202 if (mp_irq->mp_irqtype != m->irqtype) 203 if (mp_irq->irqtype != m->irqtype)
203 return 3; 204 return 3;
204 if (mp_irq->mp_irqflag != m->irqflag) 205 if (mp_irq->irqflag != m->irqflag)
205 return 4; 206 return 4;
206 if (mp_irq->mp_srcbus != m->srcbus) 207 if (mp_irq->srcbus != m->srcbus)
207 return 5; 208 return 5;
208 if (mp_irq->mp_srcbusirq != m->srcbusirq) 209 if (mp_irq->srcbusirq != m->srcbusirq)
209 return 6; 210 return 6;
210 if (mp_irq->mp_dstirq != m->dstirq) 211 if (mp_irq->dstirq != m->dstirq)
211 return 7; 212 return 7;
212 213
213 return 0; 214 return 0;
@@ -416,7 +417,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
416 intsrc.type = MP_INTSRC; 417 intsrc.type = MP_INTSRC;
417 intsrc.irqflag = 0; /* conforming */ 418 intsrc.irqflag = 0; /* conforming */
418 intsrc.srcbus = 0; 419 intsrc.srcbus = 0;
419 intsrc.dstapic = mp_ioapics[0].mp_apicid; 420 intsrc.dstapic = mp_ioapics[0].apicid;
420 421
421 intsrc.irqtype = mp_INT; 422 intsrc.irqtype = mp_INT;
422 423
@@ -569,14 +570,14 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
569 } 570 }
570} 571}
571 572
572static struct intel_mp_floating *mpf_found; 573static struct mpf_intel *mpf_found;
573 574
574/* 575/*
575 * Scan the memory blocks for an SMP configuration block. 576 * Scan the memory blocks for an SMP configuration block.
576 */ 577 */
577static void __init __get_smp_config(unsigned int early) 578static void __init __get_smp_config(unsigned int early)
578{ 579{
579 struct intel_mp_floating *mpf = mpf_found; 580 struct mpf_intel *mpf = mpf_found;
580 581
581 if (!mpf) 582 if (!mpf)
582 return; 583 return;
@@ -597,9 +598,9 @@ static void __init __get_smp_config(unsigned int early)
597 } 598 }
598 599
599 printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", 600 printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
600 mpf->mpf_specification); 601 mpf->specification);
601#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) 602#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
602 if (mpf->mpf_feature2 & (1 << 7)) { 603 if (mpf->feature2 & (1 << 7)) {
603 printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); 604 printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
604 pic_mode = 1; 605 pic_mode = 1;
605 } else { 606 } else {
@@ -610,7 +611,7 @@ static void __init __get_smp_config(unsigned int early)
610 /* 611 /*
611 * Now see if we need to read further. 612 * Now see if we need to read further.
612 */ 613 */
613 if (mpf->mpf_feature1 != 0) { 614 if (mpf->feature1 != 0) {
614 if (early) { 615 if (early) {
615 /* 616 /*
616 * local APIC has default address 617 * local APIC has default address
@@ -620,16 +621,16 @@ static void __init __get_smp_config(unsigned int early)
620 } 621 }
621 622
622 printk(KERN_INFO "Default MP configuration #%d\n", 623 printk(KERN_INFO "Default MP configuration #%d\n",
623 mpf->mpf_feature1); 624 mpf->feature1);
624 construct_default_ISA_mptable(mpf->mpf_feature1); 625 construct_default_ISA_mptable(mpf->feature1);
625 626
626 } else if (mpf->mpf_physptr) { 627 } else if (mpf->physptr) {
627 628
628 /* 629 /*
629 * Read the physical hardware table. Anything here will 630 * Read the physical hardware table. Anything here will
630 * override the defaults. 631 * override the defaults.
631 */ 632 */
632 if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) { 633 if (!smp_read_mpc(phys_to_virt(mpf->physptr), early)) {
633#ifdef CONFIG_X86_LOCAL_APIC 634#ifdef CONFIG_X86_LOCAL_APIC
634 smp_found_config = 0; 635 smp_found_config = 0;
635#endif 636#endif
@@ -687,19 +688,19 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
687 unsigned reserve) 688 unsigned reserve)
688{ 689{
689 unsigned int *bp = phys_to_virt(base); 690 unsigned int *bp = phys_to_virt(base);
690 struct intel_mp_floating *mpf; 691 struct mpf_intel *mpf;
691 692
692 apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", 693 apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
693 bp, length); 694 bp, length);
694 BUILD_BUG_ON(sizeof(*mpf) != 16); 695 BUILD_BUG_ON(sizeof(*mpf) != 16);
695 696
696 while (length > 0) { 697 while (length > 0) {
697 mpf = (struct intel_mp_floating *)bp; 698 mpf = (struct mpf_intel *)bp;
698 if ((*bp == SMP_MAGIC_IDENT) && 699 if ((*bp == SMP_MAGIC_IDENT) &&
699 (mpf->mpf_length == 1) && 700 (mpf->length == 1) &&
700 !mpf_checksum((unsigned char *)bp, 16) && 701 !mpf_checksum((unsigned char *)bp, 16) &&
701 ((mpf->mpf_specification == 1) 702 ((mpf->specification == 1)
702 || (mpf->mpf_specification == 4))) { 703 || (mpf->specification == 4))) {
703#ifdef CONFIG_X86_LOCAL_APIC 704#ifdef CONFIG_X86_LOCAL_APIC
704 smp_found_config = 1; 705 smp_found_config = 1;
705#endif 706#endif
@@ -712,7 +713,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
712 return 1; 713 return 1;
713 reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE, 714 reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE,
714 BOOTMEM_DEFAULT); 715 BOOTMEM_DEFAULT);
715 if (mpf->mpf_physptr) { 716 if (mpf->physptr) {
716 unsigned long size = PAGE_SIZE; 717 unsigned long size = PAGE_SIZE;
717#ifdef CONFIG_X86_32 718#ifdef CONFIG_X86_32
718 /* 719 /*
@@ -721,14 +722,14 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
721 * the bottom is mapped now. 722 * the bottom is mapped now.
722 * PC-9800's MPC table places on the very last 723 * PC-9800's MPC table places on the very last
723 * of physical memory; so that simply reserving 724 * of physical memory; so that simply reserving
724 * PAGE_SIZE from mpg->mpf_physptr yields BUG() 725 * PAGE_SIZE from mpf->physptr yields BUG()
725 * in reserve_bootmem. 726 * in reserve_bootmem.
726 */ 727 */
727 unsigned long end = max_low_pfn * PAGE_SIZE; 728 unsigned long end = max_low_pfn * PAGE_SIZE;
728 if (mpf->mpf_physptr + size > end) 729 if (mpf->physptr + size > end)
729 size = end - mpf->mpf_physptr; 730 size = end - mpf->physptr;
730#endif 731#endif
731 reserve_bootmem_generic(mpf->mpf_physptr, size, 732 reserve_bootmem_generic(mpf->physptr, size,
732 BOOTMEM_DEFAULT); 733 BOOTMEM_DEFAULT);
733 } 734 }
734 735
@@ -808,15 +809,15 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m)
808 /* not legacy */ 809 /* not legacy */
809 810
810 for (i = 0; i < mp_irq_entries; i++) { 811 for (i = 0; i < mp_irq_entries; i++) {
811 if (mp_irqs[i].mp_irqtype != mp_INT) 812 if (mp_irqs[i].irqtype != mp_INT)
812 continue; 813 continue;
813 814
814 if (mp_irqs[i].mp_irqflag != 0x0f) 815 if (mp_irqs[i].irqflag != 0x0f)
815 continue; 816 continue;
816 817
817 if (mp_irqs[i].mp_srcbus != m->srcbus) 818 if (mp_irqs[i].srcbus != m->srcbus)
818 continue; 819 continue;
819 if (mp_irqs[i].mp_srcbusirq != m->srcbusirq) 820 if (mp_irqs[i].srcbusirq != m->srcbusirq)
820 continue; 821 continue;
821 if (irq_used[i]) { 822 if (irq_used[i]) {
822 /* already claimed */ 823 /* already claimed */
@@ -921,10 +922,10 @@ static int __init replace_intsrc_all(struct mpc_table *mpc,
921 if (irq_used[i]) 922 if (irq_used[i])
922 continue; 923 continue;
923 924
924 if (mp_irqs[i].mp_irqtype != mp_INT) 925 if (mp_irqs[i].irqtype != mp_INT)
925 continue; 926 continue;
926 927
927 if (mp_irqs[i].mp_irqflag != 0x0f) 928 if (mp_irqs[i].irqflag != 0x0f)
928 continue; 929 continue;
929 930
930 if (nr_m_spare > 0) { 931 if (nr_m_spare > 0) {
@@ -1000,7 +1001,7 @@ static int __init update_mp_table(void)
1000{ 1001{
1001 char str[16]; 1002 char str[16];
1002 char oem[10]; 1003 char oem[10];
1003 struct intel_mp_floating *mpf; 1004 struct mpf_intel *mpf;
1004 struct mpc_table *mpc, *mpc_new; 1005 struct mpc_table *mpc, *mpc_new;
1005 1006
1006 if (!enable_update_mptable) 1007 if (!enable_update_mptable)
@@ -1013,19 +1014,19 @@ static int __init update_mp_table(void)
1013 /* 1014 /*
1014 * Now see if we need to go further. 1015 * Now see if we need to go further.
1015 */ 1016 */
1016 if (mpf->mpf_feature1 != 0) 1017 if (mpf->feature1 != 0)
1017 return 0; 1018 return 0;
1018 1019
1019 if (!mpf->mpf_physptr) 1020 if (!mpf->physptr)
1020 return 0; 1021 return 0;
1021 1022
1022 mpc = phys_to_virt(mpf->mpf_physptr); 1023 mpc = phys_to_virt(mpf->physptr);
1023 1024
1024 if (!smp_check_mpc(mpc, oem, str)) 1025 if (!smp_check_mpc(mpc, oem, str))
1025 return 0; 1026 return 0;
1026 1027
1027 printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf)); 1028 printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf));
1028 printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr); 1029 printk(KERN_INFO "physptr: %x\n", mpf->physptr);
1029 1030
1030 if (mpc_new_phys && mpc->length > mpc_new_length) { 1031 if (mpc_new_phys && mpc->length > mpc_new_length) {
1031 mpc_new_phys = 0; 1032 mpc_new_phys = 0;
@@ -1046,23 +1047,23 @@ static int __init update_mp_table(void)
1046 } 1047 }
1047 printk(KERN_INFO "use in-positon replacing\n"); 1048 printk(KERN_INFO "use in-positon replacing\n");
1048 } else { 1049 } else {
1049 mpf->mpf_physptr = mpc_new_phys; 1050 mpf->physptr = mpc_new_phys;
1050 mpc_new = phys_to_virt(mpc_new_phys); 1051 mpc_new = phys_to_virt(mpc_new_phys);
1051 memcpy(mpc_new, mpc, mpc->length); 1052 memcpy(mpc_new, mpc, mpc->length);
1052 mpc = mpc_new; 1053 mpc = mpc_new;
1053 /* check if we can modify that */ 1054 /* check if we can modify that */
1054 if (mpc_new_phys - mpf->mpf_physptr) { 1055 if (mpc_new_phys - mpf->physptr) {
1055 struct intel_mp_floating *mpf_new; 1056 struct mpf_intel *mpf_new;
1056 /* steal 16 bytes from [0, 1k) */ 1057 /* steal 16 bytes from [0, 1k) */
1057 printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); 1058 printk(KERN_INFO "mpf new: %x\n", 0x400 - 16);
1058 mpf_new = phys_to_virt(0x400 - 16); 1059 mpf_new = phys_to_virt(0x400 - 16);
1059 memcpy(mpf_new, mpf, 16); 1060 memcpy(mpf_new, mpf, 16);
1060 mpf = mpf_new; 1061 mpf = mpf_new;
1061 mpf->mpf_physptr = mpc_new_phys; 1062 mpf->physptr = mpc_new_phys;
1062 } 1063 }
1063 mpf->mpf_checksum = 0; 1064 mpf->checksum = 0;
1064 mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16); 1065 mpf->checksum -= mpf_checksum((unsigned char *)mpf, 16);
1065 printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr); 1066 printk(KERN_INFO "physptr new: %x\n", mpf->physptr);
1066 } 1067 }
1067 1068
1068 /* 1069 /*
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 726266695b2..3cf3413ec62 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -35,10 +35,10 @@
35#include <linux/device.h> 35#include <linux/device.h>
36#include <linux/cpu.h> 36#include <linux/cpu.h>
37#include <linux/notifier.h> 37#include <linux/notifier.h>
38#include <linux/uaccess.h>
38 39
39#include <asm/processor.h> 40#include <asm/processor.h>
40#include <asm/msr.h> 41#include <asm/msr.h>
41#include <asm/uaccess.h>
42#include <asm/system.h> 42#include <asm/system.h>
43 43
44static struct class *msr_class; 44static struct class *msr_class;
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 7228979f1e7..23b6d9e6e4f 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -61,11 +61,7 @@ static int endflag __initdata;
61 61
62static inline unsigned int get_nmi_count(int cpu) 62static inline unsigned int get_nmi_count(int cpu)
63{ 63{
64#ifdef CONFIG_X86_64 64 return per_cpu(irq_stat, cpu).__nmi_count;
65 return cpu_pda(cpu)->__nmi_count;
66#else
67 return nmi_count(cpu);
68#endif
69} 65}
70 66
71static inline int mce_in_progress(void) 67static inline int mce_in_progress(void)
@@ -82,12 +78,8 @@ static inline int mce_in_progress(void)
82 */ 78 */
83static inline unsigned int get_timer_irqs(int cpu) 79static inline unsigned int get_timer_irqs(int cpu)
84{ 80{
85#ifdef CONFIG_X86_64
86 return read_pda(apic_timer_irqs) + read_pda(irq0_irqs);
87#else
88 return per_cpu(irq_stat, cpu).apic_timer_irqs + 81 return per_cpu(irq_stat, cpu).apic_timer_irqs +
89 per_cpu(irq_stat, cpu).irq0_irqs; 82 per_cpu(irq_stat, cpu).irq0_irqs;
90#endif
91} 83}
92 84
93#ifdef CONFIG_SMP 85#ifdef CONFIG_SMP
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a546f55c77b..2c00a57ccb9 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -66,9 +66,6 @@ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
66DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; 66DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
67EXPORT_PER_CPU_SYMBOL(current_task); 67EXPORT_PER_CPU_SYMBOL(current_task);
68 68
69DEFINE_PER_CPU(int, cpu_number);
70EXPORT_PER_CPU_SYMBOL(cpu_number);
71
72/* 69/*
73 * Return saved PC of a blocked thread. 70 * Return saved PC of a blocked thread.
74 */ 71 */
@@ -591,7 +588,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
591 if (prev->gs | next->gs) 588 if (prev->gs | next->gs)
592 loadsegment(gs, next->gs); 589 loadsegment(gs, next->gs);
593 590
594 x86_write_percpu(current_task, next_p); 591 percpu_write(current_task, next_p);
595 592
596 return prev_p; 593 return prev_p;
597} 594}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 416fb9282f4..4523ff88a69 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -57,6 +57,12 @@
57 57
58asmlinkage extern void ret_from_fork(void); 58asmlinkage extern void ret_from_fork(void);
59 59
60DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
61EXPORT_PER_CPU_SYMBOL(current_task);
62
63DEFINE_PER_CPU(unsigned long, old_rsp);
64static DEFINE_PER_CPU(unsigned char, is_idle);
65
60unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; 66unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
61 67
62static ATOMIC_NOTIFIER_HEAD(idle_notifier); 68static ATOMIC_NOTIFIER_HEAD(idle_notifier);
@@ -75,13 +81,13 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister);
75 81
76void enter_idle(void) 82void enter_idle(void)
77{ 83{
78 write_pda(isidle, 1); 84 percpu_write(is_idle, 1);
79 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); 85 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
80} 86}
81 87
82static void __exit_idle(void) 88static void __exit_idle(void)
83{ 89{
84 if (test_and_clear_bit_pda(0, isidle) == 0) 90 if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
85 return; 91 return;
86 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); 92 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
87} 93}
@@ -392,7 +398,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
392 load_gs_index(0); 398 load_gs_index(0);
393 regs->ip = new_ip; 399 regs->ip = new_ip;
394 regs->sp = new_sp; 400 regs->sp = new_sp;
395 write_pda(oldrsp, new_sp); 401 percpu_write(old_rsp, new_sp);
396 regs->cs = __USER_CS; 402 regs->cs = __USER_CS;
397 regs->ss = __USER_DS; 403 regs->ss = __USER_DS;
398 regs->flags = 0x200; 404 regs->flags = 0x200;
@@ -613,13 +619,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
613 /* 619 /*
614 * Switch the PDA and FPU contexts. 620 * Switch the PDA and FPU contexts.
615 */ 621 */
616 prev->usersp = read_pda(oldrsp); 622 prev->usersp = percpu_read(old_rsp);
617 write_pda(oldrsp, next->usersp); 623 percpu_write(old_rsp, next->usersp);
618 write_pda(pcurrent, next_p); 624 percpu_write(current_task, next_p);
619 625
620 write_pda(kernelstack, 626 percpu_write(kernel_stack,
621 (unsigned long)task_stack_page(next_p) + 627 (unsigned long)task_stack_page(next_p) +
622 THREAD_SIZE - PDA_STACKOFFSET); 628 THREAD_SIZE - KERNEL_STACK_OFFSET);
623#ifdef CONFIG_CC_STACKPROTECTOR 629#ifdef CONFIG_CC_STACKPROTECTOR
624 write_pda(stack_canary, next_p->stack_canary); 630 write_pda(stack_canary, next_p->stack_canary);
625 /* 631 /*
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 2b46eb41643..f8536fee5c1 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -14,6 +14,7 @@
14#include <asm/reboot.h> 14#include <asm/reboot.h>
15#include <asm/pci_x86.h> 15#include <asm/pci_x86.h>
16#include <asm/virtext.h> 16#include <asm/virtext.h>
17#include <asm/cpu.h>
17 18
18#ifdef CONFIG_X86_32 19#ifdef CONFIG_X86_32
19# include <linux/dmi.h> 20# include <linux/dmi.h>
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ae0d8042cf6..f41c4486c27 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -89,7 +89,7 @@
89 89
90#include <asm/system.h> 90#include <asm/system.h>
91#include <asm/vsyscall.h> 91#include <asm/vsyscall.h>
92#include <asm/smp.h> 92#include <asm/cpu.h>
93#include <asm/desc.h> 93#include <asm/desc.h>
94#include <asm/dma.h> 94#include <asm/dma.h>
95#include <asm/iommu.h> 95#include <asm/iommu.h>
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 55c46074eba..efbafbbff58 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -13,6 +13,23 @@
13#include <asm/mpspec.h> 13#include <asm/mpspec.h>
14#include <asm/apicdef.h> 14#include <asm/apicdef.h>
15#include <asm/highmem.h> 15#include <asm/highmem.h>
16#include <asm/proto.h>
17#include <asm/cpumask.h>
18
19#ifdef CONFIG_DEBUG_PER_CPU_MAPS
20# define DBG(x...) printk(KERN_DEBUG x)
21#else
22# define DBG(x...)
23#endif
24
25/*
26 * Could be inside CONFIG_HAVE_SETUP_PER_CPU_AREA with other stuff but
27 * voyager wants cpu_number too.
28 */
29#ifdef CONFIG_SMP
30DEFINE_PER_CPU(int, cpu_number);
31EXPORT_PER_CPU_SYMBOL(cpu_number);
32#endif
16 33
17#ifdef CONFIG_X86_LOCAL_APIC 34#ifdef CONFIG_X86_LOCAL_APIC
18unsigned int num_processors; 35unsigned int num_processors;
@@ -26,31 +43,84 @@ unsigned int max_physical_apicid;
26physid_mask_t phys_cpu_present_map; 43physid_mask_t phys_cpu_present_map;
27#endif 44#endif
28 45
29/* map cpu index to physical APIC ID */ 46/*
47 * Map cpu index to physical APIC ID
48 */
30DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); 49DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
31DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); 50DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
32EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); 51EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
33EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); 52EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
34 53
35#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 54#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
36#define X86_64_NUMA 1 55#define X86_64_NUMA 1 /* (used later) */
56DEFINE_PER_CPU(int, node_number) = 0;
57EXPORT_PER_CPU_SYMBOL(node_number);
37 58
38/* map cpu index to node index */ 59/*
60 * Map cpu index to node index
61 */
39DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); 62DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
40EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); 63EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
41 64
42/* which logical CPUs are on which nodes */ 65/*
66 * Which logical CPUs are on which nodes
67 */
43cpumask_t *node_to_cpumask_map; 68cpumask_t *node_to_cpumask_map;
44EXPORT_SYMBOL(node_to_cpumask_map); 69EXPORT_SYMBOL(node_to_cpumask_map);
45 70
46/* setup node_to_cpumask_map */ 71/*
72 * Setup node_to_cpumask_map
73 */
47static void __init setup_node_to_cpumask_map(void); 74static void __init setup_node_to_cpumask_map(void);
48 75
49#else 76#else
50static inline void setup_node_to_cpumask_map(void) { } 77static inline void setup_node_to_cpumask_map(void) { }
51#endif 78#endif
52 79
53#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) 80/*
81 * Define load_pda_offset() and per-cpu __pda for x86_64.
82 * load_pda_offset() is responsible for loading the offset of pda into
83 * %gs.
84 *
85 * On SMP, pda offset also duals as percpu base address and thus it
86 * should be at the start of per-cpu area. To achieve this, it's
87 * preallocated in vmlinux_64.lds.S directly instead of using
88 * DEFINE_PER_CPU().
89 */
90#ifdef CONFIG_X86_64
91void __cpuinit load_pda_offset(int cpu)
92{
93 /* Memory clobbers used to order pda/percpu accesses */
94 mb();
95 wrmsrl(MSR_GS_BASE, cpu_pda(cpu));
96 mb();
97}
98#ifndef CONFIG_SMP
99DEFINE_PER_CPU(struct x8664_pda, __pda);
100#endif
101EXPORT_PER_CPU_SYMBOL(__pda);
102#endif /* CONFIG_SMP && CONFIG_X86_64 */
103
104#ifdef CONFIG_X86_64
105
106/* correctly size the local cpu masks */
107static void setup_cpu_local_masks(void)
108{
109 alloc_bootmem_cpumask_var(&cpu_initialized_mask);
110 alloc_bootmem_cpumask_var(&cpu_callin_mask);
111 alloc_bootmem_cpumask_var(&cpu_callout_mask);
112 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
113}
114
115#else /* CONFIG_X86_32 */
116
117static inline void setup_cpu_local_masks(void)
118{
119}
120
121#endif /* CONFIG_X86_32 */
122
123#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
54/* 124/*
55 * Copy data used in early init routines from the initial arrays to the 125 * Copy data used in early init routines from the initial arrays to the
56 * per cpu data areas. These arrays then become expendable and the 126 * per cpu data areas. These arrays then become expendable and the
@@ -79,78 +149,14 @@ static void __init setup_per_cpu_maps(void)
79#endif 149#endif
80} 150}
81 151
82#ifdef CONFIG_X86_32 152#ifdef CONFIG_X86_64
83/* 153unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
84 * Great future not-so-futuristic plan: make i386 and x86_64 do it 154 [0] = (unsigned long)__per_cpu_load,
85 * the same way 155};
86 */ 156#else
87unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; 157unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
158#endif
88EXPORT_SYMBOL(__per_cpu_offset); 159EXPORT_SYMBOL(__per_cpu_offset);
89static inline void setup_cpu_pda_map(void) { }
90
91#elif !defined(CONFIG_SMP)
92static inline void setup_cpu_pda_map(void) { }
93
94#else /* CONFIG_SMP && CONFIG_X86_64 */
95
96/*
97 * Allocate cpu_pda pointer table and array via alloc_bootmem.
98 */
99static void __init setup_cpu_pda_map(void)
100{
101 char *pda;
102 struct x8664_pda **new_cpu_pda;
103 unsigned long size;
104 int cpu;
105
106 size = roundup(sizeof(struct x8664_pda), cache_line_size());
107
108 /* allocate cpu_pda array and pointer table */
109 {
110 unsigned long tsize = nr_cpu_ids * sizeof(void *);
111 unsigned long asize = size * (nr_cpu_ids - 1);
112
113 tsize = roundup(tsize, cache_line_size());
114 new_cpu_pda = alloc_bootmem(tsize + asize);
115 pda = (char *)new_cpu_pda + tsize;
116 }
117
118 /* initialize pointer table to static pda's */
119 for_each_possible_cpu(cpu) {
120 if (cpu == 0) {
121 /* leave boot cpu pda in place */
122 new_cpu_pda[0] = cpu_pda(0);
123 continue;
124 }
125 new_cpu_pda[cpu] = (struct x8664_pda *)pda;
126 new_cpu_pda[cpu]->in_bootmem = 1;
127 pda += size;
128 }
129
130 /* point to new pointer table */
131 _cpu_pda = new_cpu_pda;
132}
133
134#endif /* CONFIG_SMP && CONFIG_X86_64 */
135
136#ifdef CONFIG_X86_64
137
138/* correctly size the local cpu masks */
139static void setup_cpu_local_masks(void)
140{
141 alloc_bootmem_cpumask_var(&cpu_initialized_mask);
142 alloc_bootmem_cpumask_var(&cpu_callin_mask);
143 alloc_bootmem_cpumask_var(&cpu_callout_mask);
144 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
145}
146
147#else /* CONFIG_X86_32 */
148
149static inline void setup_cpu_local_masks(void)
150{
151}
152
153#endif /* CONFIG_X86_32 */
154 160
155/* 161/*
156 * Great future plan: 162 * Great future plan:
@@ -164,9 +170,6 @@ void __init setup_per_cpu_areas(void)
164 int cpu; 170 int cpu;
165 unsigned long align = 1; 171 unsigned long align = 1;
166 172
167 /* Setup cpu_pda map */
168 setup_cpu_pda_map();
169
170 /* Copy section for each CPU (we discard the original) */ 173 /* Copy section for each CPU (we discard the original) */
171 old_size = PERCPU_ENOUGH_ROOM; 174 old_size = PERCPU_ENOUGH_ROOM;
172 align = max_t(unsigned long, PAGE_SIZE, align); 175 align = max_t(unsigned long, PAGE_SIZE, align);
@@ -197,8 +200,25 @@ void __init setup_per_cpu_areas(void)
197 cpu, node, __pa(ptr)); 200 cpu, node, __pa(ptr));
198 } 201 }
199#endif 202#endif
203
204 memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);
200 per_cpu_offset(cpu) = ptr - __per_cpu_start; 205 per_cpu_offset(cpu) = ptr - __per_cpu_start;
201 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 206 per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
207 per_cpu(cpu_number, cpu) = cpu;
208#ifdef CONFIG_X86_64
209 per_cpu(irq_stack_ptr, cpu) =
210 (char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64;
211 /*
212 * CPU0 modified pda in the init data area, reload pda
213 * offset for CPU0 and clear the area for others.
214 */
215 if (cpu == 0)
216 load_pda_offset(0);
217 else
218 memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu)));
219#endif
220
221 DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
202 } 222 }
203 223
204 /* Setup percpu data maps */ 224 /* Setup percpu data maps */
@@ -220,6 +240,7 @@ void __init setup_per_cpu_areas(void)
220 * Requires node_possible_map to be valid. 240 * Requires node_possible_map to be valid.
221 * 241 *
222 * Note: node_to_cpumask() is not valid until after this is done. 242 * Note: node_to_cpumask() is not valid until after this is done.
243 * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
223 */ 244 */
224static void __init setup_node_to_cpumask_map(void) 245static void __init setup_node_to_cpumask_map(void)
225{ 246{
@@ -235,6 +256,7 @@ static void __init setup_node_to_cpumask_map(void)
235 256
236 /* allocate the map */ 257 /* allocate the map */
237 map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); 258 map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
259 DBG("node_to_cpumask_map at %p for %d nodes\n", map, nr_node_ids);
238 260
239 pr_debug("Node to cpumask map at %p for %d nodes\n", 261 pr_debug("Node to cpumask map at %p for %d nodes\n",
240 map, nr_node_ids); 262 map, nr_node_ids);
@@ -247,17 +269,23 @@ void __cpuinit numa_set_node(int cpu, int node)
247{ 269{
248 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); 270 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
249 271
250 if (cpu_pda(cpu) && node != NUMA_NO_NODE) 272 /* early setting, no percpu area yet */
251 cpu_pda(cpu)->nodenumber = node; 273 if (cpu_to_node_map) {
252
253 if (cpu_to_node_map)
254 cpu_to_node_map[cpu] = node; 274 cpu_to_node_map[cpu] = node;
275 return;
276 }
255 277
256 else if (per_cpu_offset(cpu)) 278#ifdef CONFIG_DEBUG_PER_CPU_MAPS
257 per_cpu(x86_cpu_to_node_map, cpu) = node; 279 if (cpu >= nr_cpu_ids || !per_cpu_offset(cpu)) {
280 printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
281 dump_stack();
282 return;
283 }
284#endif
285 per_cpu(x86_cpu_to_node_map, cpu) = node;
258 286
259 else 287 if (node != NUMA_NO_NODE)
260 pr_debug("Setting node for non-present cpu %d\n", cpu); 288 per_cpu(node_number, cpu) = node;
261} 289}
262 290
263void __cpuinit numa_clear_node(int cpu) 291void __cpuinit numa_clear_node(int cpu)
@@ -274,7 +302,7 @@ void __cpuinit numa_add_cpu(int cpu)
274 302
275void __cpuinit numa_remove_cpu(int cpu) 303void __cpuinit numa_remove_cpu(int cpu)
276{ 304{
277 cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]); 305 cpu_clear(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
278} 306}
279 307
280#else /* CONFIG_DEBUG_PER_CPU_MAPS */ 308#else /* CONFIG_DEBUG_PER_CPU_MAPS */
@@ -284,7 +312,7 @@ void __cpuinit numa_remove_cpu(int cpu)
284 */ 312 */
285static void __cpuinit numa_set_cpumask(int cpu, int enable) 313static void __cpuinit numa_set_cpumask(int cpu, int enable)
286{ 314{
287 int node = cpu_to_node(cpu); 315 int node = early_cpu_to_node(cpu);
288 cpumask_t *mask; 316 cpumask_t *mask;
289 char buf[64]; 317 char buf[64];
290 318
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index bb1a3b1fc87..869b98840fd 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -53,7 +53,6 @@
53#include <asm/nmi.h> 53#include <asm/nmi.h>
54#include <asm/irq.h> 54#include <asm/irq.h>
55#include <asm/idle.h> 55#include <asm/idle.h>
56#include <asm/smp.h>
57#include <asm/trampoline.h> 56#include <asm/trampoline.h>
58#include <asm/cpu.h> 57#include <asm/cpu.h>
59#include <asm/numa.h> 58#include <asm/numa.h>
@@ -745,52 +744,6 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
745 complete(&c_idle->done); 744 complete(&c_idle->done);
746} 745}
747 746
748#ifdef CONFIG_X86_64
749
750/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */
751static void __ref free_bootmem_pda(struct x8664_pda *oldpda)
752{
753 if (!after_bootmem)
754 free_bootmem((unsigned long)oldpda, sizeof(*oldpda));
755}
756
757/*
758 * Allocate node local memory for the AP pda.
759 *
760 * Must be called after the _cpu_pda pointer table is initialized.
761 */
762int __cpuinit get_local_pda(int cpu)
763{
764 struct x8664_pda *oldpda, *newpda;
765 unsigned long size = sizeof(struct x8664_pda);
766 int node = cpu_to_node(cpu);
767
768 if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem)
769 return 0;
770
771 oldpda = cpu_pda(cpu);
772 newpda = kmalloc_node(size, GFP_ATOMIC, node);
773 if (!newpda) {
774 printk(KERN_ERR "Could not allocate node local PDA "
775 "for CPU %d on node %d\n", cpu, node);
776
777 if (oldpda)
778 return 0; /* have a usable pda */
779 else
780 return -1;
781 }
782
783 if (oldpda) {
784 memcpy(newpda, oldpda, size);
785 free_bootmem_pda(oldpda);
786 }
787
788 newpda->in_bootmem = 0;
789 cpu_pda(cpu) = newpda;
790 return 0;
791}
792#endif /* CONFIG_X86_64 */
793
794static int __cpuinit do_boot_cpu(int apicid, int cpu) 747static int __cpuinit do_boot_cpu(int apicid, int cpu)
795/* 748/*
796 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad 749 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
@@ -808,16 +761,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
808 }; 761 };
809 INIT_WORK(&c_idle.work, do_fork_idle); 762 INIT_WORK(&c_idle.work, do_fork_idle);
810 763
811#ifdef CONFIG_X86_64
812 /* Allocate node local memory for AP pdas */
813 if (cpu > 0) {
814 boot_error = get_local_pda(cpu);
815 if (boot_error)
816 goto restore_state;
817 /* if can't get pda memory, can't start cpu */
818 }
819#endif
820
821 alternatives_smp_switch(1); 764 alternatives_smp_switch(1);
822 765
823 c_idle.idle = get_idle_for_cpu(cpu); 766 c_idle.idle = get_idle_for_cpu(cpu);
@@ -847,14 +790,17 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
847 790
848 set_idle_for_cpu(cpu, c_idle.idle); 791 set_idle_for_cpu(cpu, c_idle.idle);
849do_rest: 792do_rest:
850#ifdef CONFIG_X86_32
851 per_cpu(current_task, cpu) = c_idle.idle; 793 per_cpu(current_task, cpu) = c_idle.idle;
794#ifdef CONFIG_X86_32
852 init_gdt(cpu); 795 init_gdt(cpu);
853 /* Stack for startup_32 can be just as for start_secondary onwards */ 796 /* Stack for startup_32 can be just as for start_secondary onwards */
854 irq_ctx_init(cpu); 797 irq_ctx_init(cpu);
855#else 798#else
856 cpu_pda(cpu)->pcurrent = c_idle.idle;
857 clear_tsk_thread_flag(c_idle.idle, TIF_FORK); 799 clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
800 initial_gs = per_cpu_offset(cpu);
801 per_cpu(kernel_stack, cpu) =
802 (unsigned long)task_stack_page(c_idle.idle) -
803 KERNEL_STACK_OFFSET + THREAD_SIZE;
858#endif 804#endif
859 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); 805 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
860 initial_code = (unsigned long)start_secondary; 806 initial_code = (unsigned long)start_secondary;
@@ -931,9 +877,7 @@ do_rest:
931 inquire_remote_apic(apicid); 877 inquire_remote_apic(apicid);
932 } 878 }
933 } 879 }
934#ifdef CONFIG_X86_64 880
935restore_state:
936#endif
937 if (boot_error) { 881 if (boot_error) {
938 /* Try to put things back the way they were before ... */ 882 /* Try to put things back the way they were before ... */
939 numa_remove_cpu(cpu); /* was set by numa_add_cpu */ 883 numa_remove_cpu(cpu); /* was set by numa_add_cpu */
@@ -1125,6 +1069,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
1125 printk(KERN_ERR "... forcing use of dummy APIC emulation." 1069 printk(KERN_ERR "... forcing use of dummy APIC emulation."
1126 "(tell your hw vendor)\n"); 1070 "(tell your hw vendor)\n");
1127 smpboot_clear_io_apic(); 1071 smpboot_clear_io_apic();
1072 disable_ioapic_setup();
1128 return -1; 1073 return -1;
1129 } 1074 }
1130 1075
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
index 397e309839d..add36b4e37c 100644
--- a/arch/x86/kernel/smpcommon.c
+++ b/arch/x86/kernel/smpcommon.c
@@ -3,11 +3,16 @@
3 */ 3 */
4#include <linux/module.h> 4#include <linux/module.h>
5#include <asm/smp.h> 5#include <asm/smp.h>
6#include <asm/sections.h>
6 7
7#ifdef CONFIG_X86_32 8#ifdef CONFIG_X86_64
9DEFINE_PER_CPU(unsigned long, this_cpu_off) = (unsigned long)__per_cpu_load;
10#else
8DEFINE_PER_CPU(unsigned long, this_cpu_off); 11DEFINE_PER_CPU(unsigned long, this_cpu_off);
12#endif
9EXPORT_PER_CPU_SYMBOL(this_cpu_off); 13EXPORT_PER_CPU_SYMBOL(this_cpu_off);
10 14
15#ifdef CONFIG_X86_32
11/* 16/*
12 * Initialize the CPU's GDT. This is either the boot CPU doing itself 17 * Initialize the CPU's GDT. This is either the boot CPU doing itself
13 * (still using the master per-cpu area), or a CPU doing it for a 18 * (still using the master per-cpu area), or a CPU doing it for a
@@ -23,8 +28,5 @@ __cpuinit void init_gdt(int cpu)
23 28
24 write_gdt_entry(get_cpu_gdt_table(cpu), 29 write_gdt_entry(get_cpu_gdt_table(cpu),
25 GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); 30 GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
26
27 per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
28 per_cpu(cpu_number, cpu) = cpu;
29} 31}
30#endif 32#endif
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index ce505464224..abf0808d6fc 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -4,8 +4,8 @@
4 4
5#include <asm/tlbflush.h> 5#include <asm/tlbflush.h>
6 6
7DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) 7DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
8 ____cacheline_aligned = { &init_mm, 0, }; 8 = { &init_mm, 0, };
9 9
10/* must come after the send_IPI functions above for inlining */ 10/* must come after the send_IPI functions above for inlining */
11#include <mach_ipi.h> 11#include <mach_ipi.h>
@@ -20,7 +20,7 @@ DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate)
20 * Optimizations Manfred Spraul <manfred@colorfullife.com> 20 * Optimizations Manfred Spraul <manfred@colorfullife.com>
21 */ 21 */
22 22
23static cpumask_t flush_cpumask; 23static cpumask_var_t flush_cpumask;
24static struct mm_struct *flush_mm; 24static struct mm_struct *flush_mm;
25static unsigned long flush_va; 25static unsigned long flush_va;
26static DEFINE_SPINLOCK(tlbstate_lock); 26static DEFINE_SPINLOCK(tlbstate_lock);
@@ -34,8 +34,8 @@ static DEFINE_SPINLOCK(tlbstate_lock);
34 */ 34 */
35void leave_mm(int cpu) 35void leave_mm(int cpu)
36{ 36{
37 BUG_ON(x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK); 37 BUG_ON(percpu_read(cpu_tlbstate.state) == TLBSTATE_OK);
38 cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask); 38 cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
39 load_cr3(swapper_pg_dir); 39 load_cr3(swapper_pg_dir);
40} 40}
41EXPORT_SYMBOL_GPL(leave_mm); 41EXPORT_SYMBOL_GPL(leave_mm);
@@ -92,7 +92,7 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
92 92
93 cpu = get_cpu(); 93 cpu = get_cpu();
94 94
95 if (!cpu_isset(cpu, flush_cpumask)) 95 if (!cpumask_test_cpu(cpu, flush_cpumask))
96 goto out; 96 goto out;
97 /* 97 /*
98 * This was a BUG() but until someone can quote me the 98 * This was a BUG() but until someone can quote me the
@@ -103,8 +103,8 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
103 * BUG(); 103 * BUG();
104 */ 104 */
105 105
106 if (flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) { 106 if (flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
107 if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) { 107 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
108 if (flush_va == TLB_FLUSH_ALL) 108 if (flush_va == TLB_FLUSH_ALL)
109 local_flush_tlb(); 109 local_flush_tlb();
110 else 110 else
@@ -114,35 +114,22 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
114 } 114 }
115 ack_APIC_irq(); 115 ack_APIC_irq();
116 smp_mb__before_clear_bit(); 116 smp_mb__before_clear_bit();
117 cpu_clear(cpu, flush_cpumask); 117 cpumask_clear_cpu(cpu, flush_cpumask);
118 smp_mb__after_clear_bit(); 118 smp_mb__after_clear_bit();
119out: 119out:
120 put_cpu_no_resched(); 120 put_cpu_no_resched();
121 inc_irq_stat(irq_tlb_count); 121 inc_irq_stat(irq_tlb_count);
122} 122}
123 123
124void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, 124void native_flush_tlb_others(const struct cpumask *cpumask,
125 unsigned long va) 125 struct mm_struct *mm, unsigned long va)
126{ 126{
127 cpumask_t cpumask = *cpumaskp;
128
129 /* 127 /*
130 * A couple of (to be removed) sanity checks:
131 *
132 * - current CPU must not be in mask
133 * - mask must exist :) 128 * - mask must exist :)
134 */ 129 */
135 BUG_ON(cpus_empty(cpumask)); 130 BUG_ON(cpumask_empty(cpumask));
136 BUG_ON(cpu_isset(smp_processor_id(), cpumask));
137 BUG_ON(!mm); 131 BUG_ON(!mm);
138 132
139#ifdef CONFIG_HOTPLUG_CPU
140 /* If a CPU which we ran on has gone down, OK. */
141 cpus_and(cpumask, cpumask, cpu_online_map);
142 if (unlikely(cpus_empty(cpumask)))
143 return;
144#endif
145
146 /* 133 /*
147 * i'm not happy about this global shared spinlock in the 134 * i'm not happy about this global shared spinlock in the
148 * MM hot path, but we'll see how contended it is. 135 * MM hot path, but we'll see how contended it is.
@@ -150,9 +137,17 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
150 */ 137 */
151 spin_lock(&tlbstate_lock); 138 spin_lock(&tlbstate_lock);
152 139
140 cpumask_andnot(flush_cpumask, cpumask, cpumask_of(smp_processor_id()));
141#ifdef CONFIG_HOTPLUG_CPU
142 /* If a CPU which we ran on has gone down, OK. */
143 cpumask_and(flush_cpumask, flush_cpumask, cpu_online_mask);
144 if (unlikely(cpumask_empty(flush_cpumask))) {
145 spin_unlock(&tlbstate_lock);
146 return;
147 }
148#endif
153 flush_mm = mm; 149 flush_mm = mm;
154 flush_va = va; 150 flush_va = va;
155 cpus_or(flush_cpumask, cpumask, flush_cpumask);
156 151
157 /* 152 /*
158 * Make the above memory operations globally visible before 153 * Make the above memory operations globally visible before
@@ -163,9 +158,9 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
163 * We have to send the IPI only to 158 * We have to send the IPI only to
164 * CPUs affected. 159 * CPUs affected.
165 */ 160 */
166 send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR); 161 send_IPI_mask(flush_cpumask, INVALIDATE_TLB_VECTOR);
167 162
168 while (!cpus_empty(flush_cpumask)) 163 while (!cpumask_empty(flush_cpumask))
169 /* nothing. lockup detection does not belong here */ 164 /* nothing. lockup detection does not belong here */
170 cpu_relax(); 165 cpu_relax();
171 166
@@ -177,25 +172,19 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
177void flush_tlb_current_task(void) 172void flush_tlb_current_task(void)
178{ 173{
179 struct mm_struct *mm = current->mm; 174 struct mm_struct *mm = current->mm;
180 cpumask_t cpu_mask;
181 175
182 preempt_disable(); 176 preempt_disable();
183 cpu_mask = mm->cpu_vm_mask;
184 cpu_clear(smp_processor_id(), cpu_mask);
185 177
186 local_flush_tlb(); 178 local_flush_tlb();
187 if (!cpus_empty(cpu_mask)) 179 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
188 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); 180 flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
189 preempt_enable(); 181 preempt_enable();
190} 182}
191 183
192void flush_tlb_mm(struct mm_struct *mm) 184void flush_tlb_mm(struct mm_struct *mm)
193{ 185{
194 cpumask_t cpu_mask;
195 186
196 preempt_disable(); 187 preempt_disable();
197 cpu_mask = mm->cpu_vm_mask;
198 cpu_clear(smp_processor_id(), cpu_mask);
199 188
200 if (current->active_mm == mm) { 189 if (current->active_mm == mm) {
201 if (current->mm) 190 if (current->mm)
@@ -203,8 +192,8 @@ void flush_tlb_mm(struct mm_struct *mm)
203 else 192 else
204 leave_mm(smp_processor_id()); 193 leave_mm(smp_processor_id());
205 } 194 }
206 if (!cpus_empty(cpu_mask)) 195 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
207 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); 196 flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
208 197
209 preempt_enable(); 198 preempt_enable();
210} 199}
@@ -212,11 +201,8 @@ void flush_tlb_mm(struct mm_struct *mm)
212void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) 201void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
213{ 202{
214 struct mm_struct *mm = vma->vm_mm; 203 struct mm_struct *mm = vma->vm_mm;
215 cpumask_t cpu_mask;
216 204
217 preempt_disable(); 205 preempt_disable();
218 cpu_mask = mm->cpu_vm_mask;
219 cpu_clear(smp_processor_id(), cpu_mask);
220 206
221 if (current->active_mm == mm) { 207 if (current->active_mm == mm) {
222 if (current->mm) 208 if (current->mm)
@@ -225,9 +211,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
225 leave_mm(smp_processor_id()); 211 leave_mm(smp_processor_id());
226 } 212 }
227 213
228 if (!cpus_empty(cpu_mask)) 214 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
229 flush_tlb_others(cpu_mask, mm, va); 215 flush_tlb_others(&mm->cpu_vm_mask, mm, va);
230
231 preempt_enable(); 216 preempt_enable();
232} 217}
233EXPORT_SYMBOL(flush_tlb_page); 218EXPORT_SYMBOL(flush_tlb_page);
@@ -237,7 +222,7 @@ static void do_flush_tlb_all(void *info)
237 unsigned long cpu = smp_processor_id(); 222 unsigned long cpu = smp_processor_id();
238 223
239 __flush_tlb_all(); 224 __flush_tlb_all();
240 if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY) 225 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
241 leave_mm(cpu); 226 leave_mm(cpu);
242} 227}
243 228
@@ -246,11 +231,9 @@ void flush_tlb_all(void)
246 on_each_cpu(do_flush_tlb_all, NULL, 1); 231 on_each_cpu(do_flush_tlb_all, NULL, 1);
247} 232}
248 233
249void reset_lazy_tlbstate(void) 234static int init_flush_cpumask(void)
250{ 235{
251 int cpu = raw_smp_processor_id(); 236 alloc_cpumask_var(&flush_cpumask, GFP_KERNEL);
252 237 return 0;
253 per_cpu(cpu_tlbstate, cpu).state = 0;
254 per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
255} 238}
256 239early_initcall(init_flush_cpumask);
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index f8be6f1d2e4..e64a32c4882 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -18,6 +18,9 @@
18#include <asm/uv/uv_hub.h> 18#include <asm/uv/uv_hub.h>
19#include <asm/uv/uv_bau.h> 19#include <asm/uv/uv_bau.h>
20 20
21DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
22 = { &init_mm, 0, };
23
21#include <mach_ipi.h> 24#include <mach_ipi.h>
22/* 25/*
23 * Smarter SMP flushing macros. 26 * Smarter SMP flushing macros.
@@ -43,10 +46,10 @@
43 46
44union smp_flush_state { 47union smp_flush_state {
45 struct { 48 struct {
46 cpumask_t flush_cpumask;
47 struct mm_struct *flush_mm; 49 struct mm_struct *flush_mm;
48 unsigned long flush_va; 50 unsigned long flush_va;
49 spinlock_t tlbstate_lock; 51 spinlock_t tlbstate_lock;
52 DECLARE_BITMAP(flush_cpumask, NR_CPUS);
50 }; 53 };
51 char pad[SMP_CACHE_BYTES]; 54 char pad[SMP_CACHE_BYTES];
52} ____cacheline_aligned; 55} ____cacheline_aligned;
@@ -62,9 +65,9 @@ static DEFINE_PER_CPU(union smp_flush_state, flush_state);
62 */ 65 */
63void leave_mm(int cpu) 66void leave_mm(int cpu)
64{ 67{
65 if (read_pda(mmu_state) == TLBSTATE_OK) 68 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
66 BUG(); 69 BUG();
67 cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); 70 cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
68 load_cr3(swapper_pg_dir); 71 load_cr3(swapper_pg_dir);
69} 72}
70EXPORT_SYMBOL_GPL(leave_mm); 73EXPORT_SYMBOL_GPL(leave_mm);
@@ -131,7 +134,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
131 sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; 134 sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
132 f = &per_cpu(flush_state, sender); 135 f = &per_cpu(flush_state, sender);
133 136
134 if (!cpu_isset(cpu, f->flush_cpumask)) 137 if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask)))
135 goto out; 138 goto out;
136 /* 139 /*
137 * This was a BUG() but until someone can quote me the 140 * This was a BUG() but until someone can quote me the
@@ -142,8 +145,8 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
142 * BUG(); 145 * BUG();
143 */ 146 */
144 147
145 if (f->flush_mm == read_pda(active_mm)) { 148 if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
146 if (read_pda(mmu_state) == TLBSTATE_OK) { 149 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
147 if (f->flush_va == TLB_FLUSH_ALL) 150 if (f->flush_va == TLB_FLUSH_ALL)
148 local_flush_tlb(); 151 local_flush_tlb();
149 else 152 else
@@ -153,19 +156,15 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
153 } 156 }
154out: 157out:
155 ack_APIC_irq(); 158 ack_APIC_irq();
156 cpu_clear(cpu, f->flush_cpumask); 159 cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask));
157 inc_irq_stat(irq_tlb_count); 160 inc_irq_stat(irq_tlb_count);
158} 161}
159 162
160void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, 163static void flush_tlb_others_ipi(const struct cpumask *cpumask,
161 unsigned long va) 164 struct mm_struct *mm, unsigned long va)
162{ 165{
163 int sender; 166 int sender;
164 union smp_flush_state *f; 167 union smp_flush_state *f;
165 cpumask_t cpumask = *cpumaskp;
166
167 if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va))
168 return;
169 168
170 /* Caller has disabled preemption */ 169 /* Caller has disabled preemption */
171 sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; 170 sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
@@ -180,7 +179,8 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
180 179
181 f->flush_mm = mm; 180 f->flush_mm = mm;
182 f->flush_va = va; 181 f->flush_va = va;
183 cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask); 182 cpumask_andnot(to_cpumask(f->flush_cpumask),
183 cpumask, cpumask_of(smp_processor_id()));
184 184
185 /* 185 /*
186 * Make the above memory operations globally visible before 186 * Make the above memory operations globally visible before
@@ -191,9 +191,10 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
191 * We have to send the IPI only to 191 * We have to send the IPI only to
192 * CPUs affected. 192 * CPUs affected.
193 */ 193 */
194 send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender); 194 send_IPI_mask(to_cpumask(f->flush_cpumask),
195 INVALIDATE_TLB_VECTOR_START + sender);
195 196
196 while (!cpus_empty(f->flush_cpumask)) 197 while (!cpumask_empty(to_cpumask(f->flush_cpumask)))
197 cpu_relax(); 198 cpu_relax();
198 199
199 f->flush_mm = NULL; 200 f->flush_mm = NULL;
@@ -201,6 +202,25 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
201 spin_unlock(&f->tlbstate_lock); 202 spin_unlock(&f->tlbstate_lock);
202} 203}
203 204
205void native_flush_tlb_others(const struct cpumask *cpumask,
206 struct mm_struct *mm, unsigned long va)
207{
208 if (is_uv_system()) {
209 /* FIXME: could be an percpu_alloc'd thing */
210 static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
211 struct cpumask *after_uv_flush = &get_cpu_var(flush_tlb_mask);
212
213 cpumask_andnot(after_uv_flush, cpumask,
214 cpumask_of(smp_processor_id()));
215 if (!uv_flush_tlb_others(after_uv_flush, mm, va))
216 flush_tlb_others_ipi(after_uv_flush, mm, va);
217
218 put_cpu_var(flush_tlb_uv_cpumask);
219 return;
220 }
221 flush_tlb_others_ipi(cpumask, mm, va);
222}
223
204static int __cpuinit init_smp_flush(void) 224static int __cpuinit init_smp_flush(void)
205{ 225{
206 int i; 226 int i;
@@ -215,25 +235,18 @@ core_initcall(init_smp_flush);
215void flush_tlb_current_task(void) 235void flush_tlb_current_task(void)
216{ 236{
217 struct mm_struct *mm = current->mm; 237 struct mm_struct *mm = current->mm;
218 cpumask_t cpu_mask;
219 238
220 preempt_disable(); 239 preempt_disable();
221 cpu_mask = mm->cpu_vm_mask;
222 cpu_clear(smp_processor_id(), cpu_mask);
223 240
224 local_flush_tlb(); 241 local_flush_tlb();
225 if (!cpus_empty(cpu_mask)) 242 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
226 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); 243 flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
227 preempt_enable(); 244 preempt_enable();
228} 245}
229 246
230void flush_tlb_mm(struct mm_struct *mm) 247void flush_tlb_mm(struct mm_struct *mm)
231{ 248{
232 cpumask_t cpu_mask;
233
234 preempt_disable(); 249 preempt_disable();
235 cpu_mask = mm->cpu_vm_mask;
236 cpu_clear(smp_processor_id(), cpu_mask);
237 250
238 if (current->active_mm == mm) { 251 if (current->active_mm == mm) {
239 if (current->mm) 252 if (current->mm)
@@ -241,8 +254,8 @@ void flush_tlb_mm(struct mm_struct *mm)
241 else 254 else
242 leave_mm(smp_processor_id()); 255 leave_mm(smp_processor_id());
243 } 256 }
244 if (!cpus_empty(cpu_mask)) 257 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
245 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); 258 flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
246 259
247 preempt_enable(); 260 preempt_enable();
248} 261}
@@ -250,11 +263,8 @@ void flush_tlb_mm(struct mm_struct *mm)
250void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) 263void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
251{ 264{
252 struct mm_struct *mm = vma->vm_mm; 265 struct mm_struct *mm = vma->vm_mm;
253 cpumask_t cpu_mask;
254 266
255 preempt_disable(); 267 preempt_disable();
256 cpu_mask = mm->cpu_vm_mask;
257 cpu_clear(smp_processor_id(), cpu_mask);
258 268
259 if (current->active_mm == mm) { 269 if (current->active_mm == mm) {
260 if (current->mm) 270 if (current->mm)
@@ -263,8 +273,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
263 leave_mm(smp_processor_id()); 273 leave_mm(smp_processor_id());
264 } 274 }
265 275
266 if (!cpus_empty(cpu_mask)) 276 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
267 flush_tlb_others(cpu_mask, mm, va); 277 flush_tlb_others(&mm->cpu_vm_mask, mm, va);
268 278
269 preempt_enable(); 279 preempt_enable();
270} 280}
@@ -274,7 +284,7 @@ static void do_flush_tlb_all(void *info)
274 unsigned long cpu = smp_processor_id(); 284 unsigned long cpu = smp_processor_id();
275 285
276 __flush_tlb_all(); 286 __flush_tlb_all();
277 if (read_pda(mmu_state) == TLBSTATE_LAZY) 287 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
278 leave_mm(cpu); 288 leave_mm(cpu);
279} 289}
280 290
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index f885023167e..690dcf1a27d 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -212,11 +212,11 @@ static int uv_wait_completion(struct bau_desc *bau_desc,
212 * The cpumaskp mask contains the cpus the broadcast was sent to. 212 * The cpumaskp mask contains the cpus the broadcast was sent to.
213 * 213 *
214 * Returns 1 if all remote flushing was done. The mask is zeroed. 214 * Returns 1 if all remote flushing was done. The mask is zeroed.
215 * Returns 0 if some remote flushing remains to be done. The mask is left 215 * Returns 0 if some remote flushing remains to be done. The mask will have
216 * unchanged. 216 * some bits still set.
217 */ 217 */
218int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, 218int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
219 cpumask_t *cpumaskp) 219 struct cpumask *cpumaskp)
220{ 220{
221 int completion_status = 0; 221 int completion_status = 0;
222 int right_shift; 222 int right_shift;
@@ -263,13 +263,13 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
263 * Success, so clear the remote cpu's from the mask so we don't 263 * Success, so clear the remote cpu's from the mask so we don't
264 * use the IPI method of shootdown on them. 264 * use the IPI method of shootdown on them.
265 */ 265 */
266 for_each_cpu_mask(bit, *cpumaskp) { 266 for_each_cpu(bit, cpumaskp) {
267 blade = uv_cpu_to_blade_id(bit); 267 blade = uv_cpu_to_blade_id(bit);
268 if (blade == this_blade) 268 if (blade == this_blade)
269 continue; 269 continue;
270 cpu_clear(bit, *cpumaskp); 270 cpumask_clear_cpu(bit, cpumaskp);
271 } 271 }
272 if (!cpus_empty(*cpumaskp)) 272 if (!cpumask_empty(cpumaskp))
273 return 0; 273 return 0;
274 return 1; 274 return 1;
275} 275}
@@ -296,7 +296,7 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
296 * Returns 1 if all remote flushing was done. 296 * Returns 1 if all remote flushing was done.
297 * Returns 0 if some remote flushing remains to be done. 297 * Returns 0 if some remote flushing remains to be done.
298 */ 298 */
299int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm, 299int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm,
300 unsigned long va) 300 unsigned long va)
301{ 301{
302 int i; 302 int i;
@@ -315,7 +315,7 @@ int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm,
315 bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); 315 bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
316 316
317 i = 0; 317 i = 0;
318 for_each_cpu_mask(bit, *cpumaskp) { 318 for_each_cpu(bit, cpumaskp) {
319 blade = uv_cpu_to_blade_id(bit); 319 blade = uv_cpu_to_blade_id(bit);
320 BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1)); 320 BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
321 if (blade == this_blade) { 321 if (blade == this_blade) {
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 82c67559dde..3eba7f7bac0 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -178,14 +178,7 @@ SECTIONS
178 __initramfs_end = .; 178 __initramfs_end = .;
179 } 179 }
180#endif 180#endif
181 . = ALIGN(PAGE_SIZE); 181 PERCPU(PAGE_SIZE)
182 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
183 __per_cpu_start = .;
184 *(.data.percpu.page_aligned)
185 *(.data.percpu)
186 *(.data.percpu.shared_aligned)
187 __per_cpu_end = .;
188 }
189 . = ALIGN(PAGE_SIZE); 182 . = ALIGN(PAGE_SIZE);
190 /* freed after init ends here */ 183 /* freed after init ends here */
191 184
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 1a614c0e6be..a09abb8fb97 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -5,6 +5,7 @@
5#define LOAD_OFFSET __START_KERNEL_map 5#define LOAD_OFFSET __START_KERNEL_map
6 6
7#include <asm-generic/vmlinux.lds.h> 7#include <asm-generic/vmlinux.lds.h>
8#include <asm/asm-offsets.h>
8#include <asm/page.h> 9#include <asm/page.h>
9 10
10#undef i386 /* in case the preprocessor is a 32bit one */ 11#undef i386 /* in case the preprocessor is a 32bit one */
@@ -13,12 +14,14 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
13OUTPUT_ARCH(i386:x86-64) 14OUTPUT_ARCH(i386:x86-64)
14ENTRY(phys_startup_64) 15ENTRY(phys_startup_64)
15jiffies_64 = jiffies; 16jiffies_64 = jiffies;
16_proxy_pda = 1;
17PHDRS { 17PHDRS {
18 text PT_LOAD FLAGS(5); /* R_E */ 18 text PT_LOAD FLAGS(5); /* R_E */
19 data PT_LOAD FLAGS(7); /* RWE */ 19 data PT_LOAD FLAGS(7); /* RWE */
20 user PT_LOAD FLAGS(7); /* RWE */ 20 user PT_LOAD FLAGS(7); /* RWE */
21 data.init PT_LOAD FLAGS(7); /* RWE */ 21 data.init PT_LOAD FLAGS(7); /* RWE */
22#ifdef CONFIG_SMP
23 percpu PT_LOAD FLAGS(7); /* RWE */
24#endif
22 note PT_NOTE FLAGS(0); /* ___ */ 25 note PT_NOTE FLAGS(0); /* ___ */
23} 26}
24SECTIONS 27SECTIONS
@@ -208,14 +211,29 @@ SECTIONS
208 __initramfs_end = .; 211 __initramfs_end = .;
209#endif 212#endif
210 213
214#ifdef CONFIG_SMP
215 /*
216 * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
217 * output PHDR, so the next output section - __data_nosave - should
218 * switch it back to data.init. Also, pda should be at the head of
219 * percpu area. Preallocate it and define the percpu offset symbol
220 * so that it can be accessed as a percpu variable.
221 */
222 . = ALIGN(PAGE_SIZE);
223 PERCPU_VADDR_PREALLOC(0, :percpu, pda_size)
224 per_cpu____pda = __per_cpu_start;
225#else
211 PERCPU(PAGE_SIZE) 226 PERCPU(PAGE_SIZE)
227#endif
212 228
213 . = ALIGN(PAGE_SIZE); 229 . = ALIGN(PAGE_SIZE);
214 __init_end = .; 230 __init_end = .;
215 231
216 . = ALIGN(PAGE_SIZE); 232 . = ALIGN(PAGE_SIZE);
217 __nosave_begin = .; 233 __nosave_begin = .;
218 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } 234 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
235 *(.data.nosave)
236 } :data.init /* switch back to data.init, see PERCPU_VADDR() above */
219 . = ALIGN(PAGE_SIZE); 237 . = ALIGN(PAGE_SIZE);
220 __nosave_end = .; 238 __nosave_end = .;
221 239
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 695e426aa35..3909e3ba5ce 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -58,5 +58,3 @@ EXPORT_SYMBOL(__memcpy);
58EXPORT_SYMBOL(empty_zero_page); 58EXPORT_SYMBOL(empty_zero_page);
59EXPORT_SYMBOL(init_level4_pgt); 59EXPORT_SYMBOL(init_level4_pgt);
60EXPORT_SYMBOL(load_gs_index); 60EXPORT_SYMBOL(load_gs_index);
61
62EXPORT_SYMBOL(_proxy_pda);