aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86/kernel')
-rw-r--r--arch/x86/kernel/Makefile8
-rw-r--r--arch/x86/kernel/apic.c7
-rw-r--r--arch/x86/kernel/asm-offsets_64.c5
-rw-r--r--arch/x86/kernel/cpu/common.c44
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c33
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c1
-rw-r--r--arch/x86/kernel/efi.c2
-rw-r--r--arch/x86/kernel/efi_64.c1
-rw-r--r--arch/x86/kernel/entry_32.S6
-rw-r--r--arch/x86/kernel/entry_64.S2
-rw-r--r--arch/x86/kernel/genapic_64.c2
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c1
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/head_32.S6
-rw-r--r--arch/x86/kernel/head_64.S37
-rw-r--r--arch/x86/kernel/io_apic.c2
-rw-r--r--arch/x86/kernel/irq_64.c4
-rw-r--r--arch/x86/kernel/irqinit_32.c11
-rw-r--r--arch/x86/kernel/process_32.c1
-rw-r--r--arch/x86/kernel/process_64.c21
-rw-r--r--arch/x86/kernel/setup_percpu.c34
-rw-r--r--arch/x86/kernel/smpboot.c1
-rw-r--r--arch/x86/kernel/tlb_32.c239
-rw-r--r--arch/x86/kernel/tlb_64.c294
-rw-r--r--arch/x86/kernel/tlb_uv.c68
-rw-r--r--arch/x86/kernel/traps.c1
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S8
27 files changed, 149 insertions, 692 deletions
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index d364df03c1d6..a99437c965cc 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -23,6 +23,7 @@ nostackp := $(call cc-option, -fno-stack-protector)
23CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) 23CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
24CFLAGS_hpet.o := $(nostackp) 24CFLAGS_hpet.o := $(nostackp)
25CFLAGS_tsc.o := $(nostackp) 25CFLAGS_tsc.o := $(nostackp)
26CFLAGS_paravirt.o := $(nostackp)
26 27
27obj-y := process_$(BITS).o signal.o entry_$(BITS).o 28obj-y := process_$(BITS).o signal.o entry_$(BITS).o
28obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 29obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
@@ -57,7 +58,7 @@ obj-$(CONFIG_PCI) += early-quirks.o
57apm-y := apm_32.o 58apm-y := apm_32.o
58obj-$(CONFIG_APM) += apm.o 59obj-$(CONFIG_APM) += apm.o
59obj-$(CONFIG_X86_SMP) += smp.o 60obj-$(CONFIG_X86_SMP) += smp.o
60obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o 61obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o
61obj-$(CONFIG_X86_32_SMP) += smpcommon.o 62obj-$(CONFIG_X86_32_SMP) += smpcommon.o
62obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o 63obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o
63obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o 64obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o
@@ -114,10 +115,11 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64
114### 115###
115# 64 bit specific files 116# 64 bit specific files
116ifeq ($(CONFIG_X86_64),y) 117ifeq ($(CONFIG_X86_64),y)
117 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o 118 obj-y += genapic_64.o genapic_flat_64.o
118 obj-y += bios_uv.o uv_irq.o uv_sysfs.o
119 obj-y += genx2apic_cluster.o 119 obj-y += genx2apic_cluster.o
120 obj-y += genx2apic_phys.o 120 obj-y += genx2apic_phys.o
121 obj-$(CONFIG_X86_UV) += genx2apic_uv_x.o tlb_uv.o
122 obj-$(CONFIG_X86_UV) += bios_uv.o uv_irq.o uv_sysfs.o
121 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o 123 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
122 obj-$(CONFIG_AUDIT) += audit_64.o 124 obj-$(CONFIG_AUDIT) += audit_64.o
123 125
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index e9af14f748ea..7b434e5b14c9 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -1132,7 +1132,9 @@ void __cpuinit setup_local_APIC(void)
1132 int i, j; 1132 int i, j;
1133 1133
1134 if (disable_apic) { 1134 if (disable_apic) {
1135#ifdef CONFIG_X86_IO_APIC
1135 disable_ioapic_setup(); 1136 disable_ioapic_setup();
1137#endif
1136 return; 1138 return;
1137 } 1139 }
1138 1140
@@ -1844,6 +1846,11 @@ void __cpuinit generic_processor_info(int apicid, int version)
1844 num_processors++; 1846 num_processors++;
1845 cpu = cpumask_next_zero(-1, cpu_present_mask); 1847 cpu = cpumask_next_zero(-1, cpu_present_mask);
1846 1848
1849 if (version != apic_version[boot_cpu_physical_apicid])
1850 WARN_ONCE(1,
1851 "ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n",
1852 apic_version[boot_cpu_physical_apicid], cpu, version);
1853
1847 physid_set(apicid, phys_cpu_present_map); 1854 physid_set(apicid, phys_cpu_present_map);
1848 if (apicid == boot_cpu_physical_apicid) { 1855 if (apicid == boot_cpu_physical_apicid) {
1849 /* 1856 /*
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 64c834a39aa8..8793ab33e2c1 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -11,7 +11,6 @@
11#include <linux/hardirq.h> 11#include <linux/hardirq.h>
12#include <linux/suspend.h> 12#include <linux/suspend.h>
13#include <linux/kbuild.h> 13#include <linux/kbuild.h>
14#include <asm/pda.h>
15#include <asm/processor.h> 14#include <asm/processor.h>
16#include <asm/segment.h> 15#include <asm/segment.h>
17#include <asm/thread_info.h> 16#include <asm/thread_info.h>
@@ -48,10 +47,6 @@ int main(void)
48#endif 47#endif
49 BLANK(); 48 BLANK();
50#undef ENTRY 49#undef ENTRY
51#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
52 DEFINE(pda_size, sizeof(struct x8664_pda));
53 BLANK();
54#undef ENTRY
55#ifdef CONFIG_PARAVIRT 50#ifdef CONFIG_PARAVIRT
56 BLANK(); 51 BLANK();
57 OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); 52 OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 95eb30e1e677..6fd316689c47 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -29,9 +29,9 @@
29#include <asm/apic.h> 29#include <asm/apic.h>
30#include <mach_apic.h> 30#include <mach_apic.h>
31#include <asm/genapic.h> 31#include <asm/genapic.h>
32#include <asm/uv/uv.h>
32#endif 33#endif
33 34
34#include <asm/pda.h>
35#include <asm/pgtable.h> 35#include <asm/pgtable.h>
36#include <asm/processor.h> 36#include <asm/processor.h>
37#include <asm/desc.h> 37#include <asm/desc.h>
@@ -65,23 +65,23 @@ cpumask_t cpu_sibling_setup_map;
65 65
66static struct cpu_dev *this_cpu __cpuinitdata; 66static struct cpu_dev *this_cpu __cpuinitdata;
67 67
68DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
68#ifdef CONFIG_X86_64 69#ifdef CONFIG_X86_64
69/* We need valid kernel segments for data and code in long mode too 70 /*
70 * IRET will check the segment types kkeil 2000/10/28 71 * We need valid kernel segments for data and code in long mode too
71 * Also sysret mandates a special GDT layout 72 * IRET will check the segment types kkeil 2000/10/28
72 */ 73 * Also sysret mandates a special GDT layout
73/* The TLS descriptors are currently at a different place compared to i386. 74 *
74 Hopefully nobody expects them at a fixed place (Wine?) */ 75 * The TLS descriptors are currently at a different place compared to i386.
75DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { 76 * Hopefully nobody expects them at a fixed place (Wine?)
77 */
76 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, 78 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
77 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, 79 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
78 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, 80 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
79 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, 81 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
80 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, 82 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
81 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, 83 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
82} };
83#else 84#else
84DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
85 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, 85 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
86 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, 86 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
87 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, 87 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
@@ -113,9 +113,9 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
113 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, 113 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
114 114
115 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, 115 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
116 [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, 116 [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
117} };
118#endif 117#endif
118} };
119EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); 119EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
120 120
121#ifdef CONFIG_X86_32 121#ifdef CONFIG_X86_32
@@ -883,12 +883,13 @@ __setup("clearcpuid=", setup_disablecpuid);
883#ifdef CONFIG_X86_64 883#ifdef CONFIG_X86_64
884struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; 884struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
885 885
886DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack); 886DEFINE_PER_CPU_FIRST(union irq_stack_union,
887 irq_stack_union) __aligned(PAGE_SIZE);
887#ifdef CONFIG_SMP 888#ifdef CONFIG_SMP
888DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */ 889DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */
889#else 890#else
890DEFINE_PER_CPU(char *, irq_stack_ptr) = 891DEFINE_PER_CPU(char *, irq_stack_ptr) =
891 per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64; 892 per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
892#endif 893#endif
893 894
894DEFINE_PER_CPU(unsigned long, kernel_stack) = 895DEFINE_PER_CPU(unsigned long, kernel_stack) =
@@ -897,15 +898,6 @@ EXPORT_PER_CPU_SYMBOL(kernel_stack);
897 898
898DEFINE_PER_CPU(unsigned int, irq_count) = -1; 899DEFINE_PER_CPU(unsigned int, irq_count) = -1;
899 900
900void __cpuinit pda_init(int cpu)
901{
902 /* Setup up data that may be needed in __get_free_pages early */
903 loadsegment(fs, 0);
904 loadsegment(gs, 0);
905
906 load_pda_offset(cpu);
907}
908
909static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks 901static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
910 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) 902 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
911 __aligned(PAGE_SIZE); 903 __aligned(PAGE_SIZE);
@@ -969,9 +961,9 @@ void __cpuinit cpu_init(void)
969 struct task_struct *me; 961 struct task_struct *me;
970 int i; 962 int i;
971 963
972 /* CPU 0 is initialised in head64.c */ 964 loadsegment(fs, 0);
973 if (cpu != 0) 965 loadsegment(gs, 0);
974 pda_init(cpu); 966 load_gs_base(cpu);
975 967
976#ifdef CONFIG_NUMA 968#ifdef CONFIG_NUMA
977 if (cpu != 0 && percpu_read(node_number) == 0 && 969 if (cpu != 0 && percpu_read(node_number) == 0 &&
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 8f3c95c7e61f..4b1c319d30c3 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -145,13 +145,14 @@ typedef union {
145 145
146struct drv_cmd { 146struct drv_cmd {
147 unsigned int type; 147 unsigned int type;
148 cpumask_var_t mask; 148 const struct cpumask *mask;
149 drv_addr_union addr; 149 drv_addr_union addr;
150 u32 val; 150 u32 val;
151}; 151};
152 152
153static void do_drv_read(struct drv_cmd *cmd) 153static long do_drv_read(void *_cmd)
154{ 154{
155 struct drv_cmd *cmd = _cmd;
155 u32 h; 156 u32 h;
156 157
157 switch (cmd->type) { 158 switch (cmd->type) {
@@ -166,10 +167,12 @@ static void do_drv_read(struct drv_cmd *cmd)
166 default: 167 default:
167 break; 168 break;
168 } 169 }
170 return 0;
169} 171}
170 172
171static void do_drv_write(struct drv_cmd *cmd) 173static long do_drv_write(void *_cmd)
172{ 174{
175 struct drv_cmd *cmd = _cmd;
173 u32 lo, hi; 176 u32 lo, hi;
174 177
175 switch (cmd->type) { 178 switch (cmd->type) {
@@ -186,30 +189,23 @@ static void do_drv_write(struct drv_cmd *cmd)
186 default: 189 default:
187 break; 190 break;
188 } 191 }
192 return 0;
189} 193}
190 194
191static void drv_read(struct drv_cmd *cmd) 195static void drv_read(struct drv_cmd *cmd)
192{ 196{
193 cpumask_t saved_mask = current->cpus_allowed;
194 cmd->val = 0; 197 cmd->val = 0;
195 198
196 set_cpus_allowed_ptr(current, cmd->mask); 199 work_on_cpu(cpumask_any(cmd->mask), do_drv_read, cmd);
197 do_drv_read(cmd);
198 set_cpus_allowed_ptr(current, &saved_mask);
199} 200}
200 201
201static void drv_write(struct drv_cmd *cmd) 202static void drv_write(struct drv_cmd *cmd)
202{ 203{
203 cpumask_t saved_mask = current->cpus_allowed;
204 unsigned int i; 204 unsigned int i;
205 205
206 for_each_cpu(i, cmd->mask) { 206 for_each_cpu(i, cmd->mask) {
207 set_cpus_allowed_ptr(current, cpumask_of(i)); 207 work_on_cpu(i, do_drv_write, cmd);
208 do_drv_write(cmd);
209 } 208 }
210
211 set_cpus_allowed_ptr(current, &saved_mask);
212 return;
213} 209}
214 210
215static u32 get_cur_val(const struct cpumask *mask) 211static u32 get_cur_val(const struct cpumask *mask)
@@ -235,6 +231,7 @@ static u32 get_cur_val(const struct cpumask *mask)
235 return 0; 231 return 0;
236 } 232 }
237 233
234 cmd.mask = mask;
238 drv_read(&cmd); 235 drv_read(&cmd);
239 236
240 dprintk("get_cur_val = %u\n", cmd.val); 237 dprintk("get_cur_val = %u\n", cmd.val);
@@ -366,7 +363,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
366 return freq; 363 return freq;
367} 364}
368 365
369static unsigned int check_freqs(const cpumask_t *mask, unsigned int freq, 366static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
370 struct acpi_cpufreq_data *data) 367 struct acpi_cpufreq_data *data)
371{ 368{
372 unsigned int cur_freq; 369 unsigned int cur_freq;
@@ -401,9 +398,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
401 return -ENODEV; 398 return -ENODEV;
402 } 399 }
403 400
404 if (unlikely(!alloc_cpumask_var(&cmd.mask, GFP_KERNEL)))
405 return -ENOMEM;
406
407 perf = data->acpi_data; 401 perf = data->acpi_data;
408 result = cpufreq_frequency_table_target(policy, 402 result = cpufreq_frequency_table_target(policy,
409 data->freq_table, 403 data->freq_table,
@@ -448,9 +442,9 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
448 442
449 /* cpufreq holds the hotplug lock, so we are safe from here on */ 443 /* cpufreq holds the hotplug lock, so we are safe from here on */
450 if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) 444 if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
451 cpumask_and(cmd.mask, cpu_online_mask, policy->cpus); 445 cmd.mask = policy->cpus;
452 else 446 else
453 cpumask_copy(cmd.mask, cpumask_of(policy->cpu)); 447 cmd.mask = cpumask_of(policy->cpu);
454 448
455 freqs.old = perf->states[perf->state].core_frequency * 1000; 449 freqs.old = perf->states[perf->state].core_frequency * 1000;
456 freqs.new = data->freq_table[next_state].frequency; 450 freqs.new = data->freq_table[next_state].frequency;
@@ -477,7 +471,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
477 perf->state = next_perf_state; 471 perf->state = next_perf_state;
478 472
479out: 473out:
480 free_cpumask_var(cmd.mask);
481 return result; 474 return result;
482} 475}
483 476
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index 4b48f251fd39..5e8c79e748a6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -7,6 +7,7 @@
7#include <linux/interrupt.h> 7#include <linux/interrupt.h>
8#include <linux/percpu.h> 8#include <linux/percpu.h>
9#include <asm/processor.h> 9#include <asm/processor.h>
10#include <asm/apic.h>
10#include <asm/msr.h> 11#include <asm/msr.h>
11#include <asm/mce.h> 12#include <asm/mce.h>
12#include <asm/hw_irq.h> 13#include <asm/hw_irq.h>
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 1119d247fe11..b205272ad394 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -366,10 +366,12 @@ void __init efi_init(void)
366 SMBIOS_TABLE_GUID)) { 366 SMBIOS_TABLE_GUID)) {
367 efi.smbios = config_tables[i].table; 367 efi.smbios = config_tables[i].table;
368 printk(" SMBIOS=0x%lx ", config_tables[i].table); 368 printk(" SMBIOS=0x%lx ", config_tables[i].table);
369#ifdef CONFIG_X86_UV
369 } else if (!efi_guidcmp(config_tables[i].guid, 370 } else if (!efi_guidcmp(config_tables[i].guid,
370 UV_SYSTEM_TABLE_GUID)) { 371 UV_SYSTEM_TABLE_GUID)) {
371 efi.uv_systab = config_tables[i].table; 372 efi.uv_systab = config_tables[i].table;
372 printk(" UVsystab=0x%lx ", config_tables[i].table); 373 printk(" UVsystab=0x%lx ", config_tables[i].table);
374#endif
373 } else if (!efi_guidcmp(config_tables[i].guid, 375 } else if (!efi_guidcmp(config_tables[i].guid,
374 HCDP_TABLE_GUID)) { 376 HCDP_TABLE_GUID)) {
375 efi.hcdp = config_tables[i].table; 377 efi.hcdp = config_tables[i].table;
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index 652c5287215f..a4ee29127fdf 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -36,6 +36,7 @@
36#include <asm/proto.h> 36#include <asm/proto.h>
37#include <asm/efi.h> 37#include <asm/efi.h>
38#include <asm/cacheflush.h> 38#include <asm/cacheflush.h>
39#include <asm/fixmap.h>
39 40
40static pgd_t save_pgd __initdata; 41static pgd_t save_pgd __initdata;
41static unsigned long efi_flags __initdata; 42static unsigned long efi_flags __initdata;
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 46469029e9d3..a0b91aac72a1 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -672,7 +672,7 @@ common_interrupt:
672ENDPROC(common_interrupt) 672ENDPROC(common_interrupt)
673 CFI_ENDPROC 673 CFI_ENDPROC
674 674
675#define BUILD_INTERRUPT(name, nr) \ 675#define BUILD_INTERRUPT3(name, nr, fn) \
676ENTRY(name) \ 676ENTRY(name) \
677 RING0_INT_FRAME; \ 677 RING0_INT_FRAME; \
678 pushl $~(nr); \ 678 pushl $~(nr); \
@@ -680,11 +680,13 @@ ENTRY(name) \
680 SAVE_ALL; \ 680 SAVE_ALL; \
681 TRACE_IRQS_OFF \ 681 TRACE_IRQS_OFF \
682 movl %esp,%eax; \ 682 movl %esp,%eax; \
683 call smp_##name; \ 683 call fn; \
684 jmp ret_from_intr; \ 684 jmp ret_from_intr; \
685 CFI_ENDPROC; \ 685 CFI_ENDPROC; \
686ENDPROC(name) 686ENDPROC(name)
687 687
688#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name)
689
688/* The include is where all of the SMP etc. interrupts come from */ 690/* The include is where all of the SMP etc. interrupts come from */
689#include "entry_arch.h" 691#include "entry_arch.h"
690 692
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index c092e7d2686d..eb0a0703f4c9 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -982,8 +982,10 @@ apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
982 irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt 982 irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
983#endif 983#endif
984 984
985#ifdef CONFIG_X86_UV
985apicinterrupt UV_BAU_MESSAGE \ 986apicinterrupt UV_BAU_MESSAGE \
986 uv_bau_message_intr1 uv_bau_message_interrupt 987 uv_bau_message_intr1 uv_bau_message_interrupt
988#endif
987apicinterrupt LOCAL_TIMER_VECTOR \ 989apicinterrupt LOCAL_TIMER_VECTOR \
988 apic_timer_interrupt smp_apic_timer_interrupt 990 apic_timer_interrupt smp_apic_timer_interrupt
989 991
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 2bced78b0b8e..e656c2721154 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -32,7 +32,9 @@ extern struct genapic apic_x2apic_cluster;
32struct genapic __read_mostly *genapic = &apic_flat; 32struct genapic __read_mostly *genapic = &apic_flat;
33 33
34static struct genapic *apic_probe[] __initdata = { 34static struct genapic *apic_probe[] __initdata = {
35#ifdef CONFIG_X86_UV
35 &apic_x2apic_uv_x, 36 &apic_x2apic_uv_x,
37#endif
36 &apic_x2apic_phys, 38 &apic_x2apic_phys,
37 &apic_x2apic_cluster, 39 &apic_x2apic_cluster,
38 &apic_physflat, 40 &apic_physflat,
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index b193e082f6ce..bfe36249145c 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -25,6 +25,7 @@
25#include <asm/ipi.h> 25#include <asm/ipi.h>
26#include <asm/genapic.h> 26#include <asm/genapic.h>
27#include <asm/pgtable.h> 27#include <asm/pgtable.h>
28#include <asm/uv/uv.h>
28#include <asm/uv/uv_mmrs.h> 29#include <asm/uv/uv_mmrs.h>
29#include <asm/uv/uv_hub.h> 30#include <asm/uv/uv_hub.h>
30#include <asm/uv/bios.h> 31#include <asm/uv/bios.h>
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index af67d3227ea6..f5b272247690 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -91,8 +91,6 @@ void __init x86_64_start_kernel(char * real_mode_data)
91 if (console_loglevel == 10) 91 if (console_loglevel == 10)
92 early_printk("Kernel alive\n"); 92 early_printk("Kernel alive\n");
93 93
94 pda_init(0);
95
96 x86_64_start_reservations(real_mode_data); 94 x86_64_start_reservations(real_mode_data);
97} 95}
98 96
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index e835b4eea70b..24c0e5cd71e3 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -429,12 +429,14 @@ is386: movl $2,%ecx # set MP
429 ljmp $(__KERNEL_CS),$1f 429 ljmp $(__KERNEL_CS),$1f
4301: movl $(__KERNEL_DS),%eax # reload all the segment registers 4301: movl $(__KERNEL_DS),%eax # reload all the segment registers
431 movl %eax,%ss # after changing gdt. 431 movl %eax,%ss # after changing gdt.
432 movl %eax,%fs # gets reset once there's real percpu
433 432
434 movl $(__USER_DS),%eax # DS/ES contains default USER segment 433 movl $(__USER_DS),%eax # DS/ES contains default USER segment
435 movl %eax,%ds 434 movl %eax,%ds
436 movl %eax,%es 435 movl %eax,%es
437 436
437 movl $(__KERNEL_PERCPU), %eax
438 movl %eax,%fs # set this cpu's percpu
439
438 xorl %eax,%eax # Clear GS and LDT 440 xorl %eax,%eax # Clear GS and LDT
439 movl %eax,%gs 441 movl %eax,%gs
440 lldt %ax 442 lldt %ax
@@ -446,8 +448,6 @@ is386: movl $2,%ecx # set MP
446 movb $1, ready 448 movb $1, ready
447 cmpb $0,%cl # the first CPU calls start_kernel 449 cmpb $0,%cl # the first CPU calls start_kernel
448 je 1f 450 je 1f
449 movl $(__KERNEL_PERCPU), %eax
450 movl %eax,%fs # set this cpu's percpu
451 movl (stack_start), %esp 451 movl (stack_start), %esp
4521: 4521:
453#endif /* CONFIG_SMP */ 453#endif /* CONFIG_SMP */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index c8ace880661b..a0a2b5ca9b7d 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -207,19 +207,15 @@ ENTRY(secondary_startup_64)
207 207
208#ifdef CONFIG_SMP 208#ifdef CONFIG_SMP
209 /* 209 /*
210 * early_gdt_base should point to the gdt_page in static percpu init 210 * Fix up static pointers that need __per_cpu_load added. The assembler
211 * data area. Computing this requires two symbols - __per_cpu_load 211 * is unable to do this directly. This is only needed for the boot cpu.
212 * and per_cpu__gdt_page. As linker can't do no such relocation, do 212 * These values are set up with the correct base addresses by C code for
213 * it by hand. As early_gdt_descr is manipulated by C code for 213 * secondary cpus.
214 * secondary CPUs, this should be done only once for the boot CPU
215 * when early_gdt_descr_base contains zero.
216 */ 214 */
217 movq early_gdt_descr_base(%rip), %rax 215 movq initial_gs(%rip), %rax
218 testq %rax, %rax 216 cmpl $0, per_cpu__cpu_number(%rax)
219 jnz 1f 217 jne 1f
220 movq $__per_cpu_load, %rax 218 addq %rax, early_gdt_descr_base(%rip)
221 addq $per_cpu__gdt_page, %rax
222 movq %rax, early_gdt_descr_base(%rip)
2231: 2191:
224#endif 220#endif
225 /* 221 /*
@@ -246,13 +242,10 @@ ENTRY(secondary_startup_64)
246 242
247 /* Set up %gs. 243 /* Set up %gs.
248 * 244 *
249 * On SMP, %gs should point to the per-cpu area. For initial 245 * The base of %gs always points to the bottom of the irqstack
250 * boot, make %gs point to the init data section. For a 246 * union. If the stack protector canary is enabled, it is
251 * secondary CPU,initial_gs should be set to its pda address 247 * located at %gs:40. Note that, on SMP, the boot cpu uses
252 * before the CPU runs this code. 248 * init data section till per cpu areas are set up.
253 *
254 * On UP, initial_gs points to PER_CPU_VAR(__pda) and doesn't
255 * change.
256 */ 249 */
257 movl $MSR_GS_BASE,%ecx 250 movl $MSR_GS_BASE,%ecx
258 movq initial_gs(%rip),%rax 251 movq initial_gs(%rip),%rax
@@ -285,7 +278,7 @@ ENTRY(secondary_startup_64)
285#ifdef CONFIG_SMP 278#ifdef CONFIG_SMP
286 .quad __per_cpu_load 279 .quad __per_cpu_load
287#else 280#else
288 .quad PER_CPU_VAR(__pda) 281 .quad PER_CPU_VAR(irq_stack_union)
289#endif 282#endif
290 __FINITDATA 283 __FINITDATA
291 284
@@ -431,12 +424,8 @@ NEXT_PAGE(level2_spare_pgt)
431 .globl early_gdt_descr 424 .globl early_gdt_descr
432early_gdt_descr: 425early_gdt_descr:
433 .word GDT_ENTRIES*8-1 426 .word GDT_ENTRIES*8-1
434#ifdef CONFIG_SMP
435early_gdt_descr_base: 427early_gdt_descr_base:
436 .quad 0x0000000000000000
437#else
438 .quad per_cpu__gdt_page 428 .quad per_cpu__gdt_page
439#endif
440 429
441ENTRY(phys_base) 430ENTRY(phys_base)
442 /* This must match the first entry in level2_kernel_pgt */ 431 /* This must match the first entry in level2_kernel_pgt */
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index f79660390724..e4d36bd56b62 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -3765,7 +3765,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3765} 3765}
3766#endif /* CONFIG_HT_IRQ */ 3766#endif /* CONFIG_HT_IRQ */
3767 3767
3768#ifdef CONFIG_X86_64 3768#ifdef CONFIG_X86_UV
3769/* 3769/*
3770 * Re-target the irq to the specified CPU and enable the specified MMR located 3770 * Re-target the irq to the specified CPU and enable the specified MMR located
3771 * on the specified blade to allow the sending of MSIs to the specified CPU. 3771 * on the specified blade to allow the sending of MSIs to the specified CPU.
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 1db05247b47f..018963aa6ee3 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -18,10 +18,14 @@
18#include <linux/smp.h> 18#include <linux/smp.h>
19#include <asm/io_apic.h> 19#include <asm/io_apic.h>
20#include <asm/idle.h> 20#include <asm/idle.h>
21#include <asm/apic.h>
21 22
22DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); 23DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
23EXPORT_PER_CPU_SYMBOL(irq_stat); 24EXPORT_PER_CPU_SYMBOL(irq_stat);
24 25
26DEFINE_PER_CPU(struct pt_regs *, irq_regs);
27EXPORT_PER_CPU_SYMBOL(irq_regs);
28
25/* 29/*
26 * Probabilistic stack overflow check: 30 * Probabilistic stack overflow check:
27 * 31 *
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 0bef6280f30c..c56496f8c6fc 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -149,8 +149,15 @@ void __init native_init_IRQ(void)
149 */ 149 */
150 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); 150 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
151 151
152 /* IPI for invalidation */ 152 /* IPIs for invalidation */
153 alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); 153 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
154 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
155 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
156 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
157 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
158 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
159 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
160 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
154 161
155 /* IPI for generic function call */ 162 /* IPI for generic function call */
156 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 163 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 2c00a57ccb90..1a1ae8edc40c 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -108,7 +108,6 @@ void cpu_idle(void)
108 play_dead(); 108 play_dead();
109 109
110 local_irq_disable(); 110 local_irq_disable();
111 __get_cpu_var(irq_stat).idle_timestamp = jiffies;
112 /* Don't trace irqs off for idle */ 111 /* Don't trace irqs off for idle */
113 stop_critical_timings(); 112 stop_critical_timings();
114 pm_idle(); 113 pm_idle();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 4523ff88a69d..c422eebb0c58 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -16,6 +16,7 @@
16 16
17#include <stdarg.h> 17#include <stdarg.h>
18 18
19#include <linux/stackprotector.h>
19#include <linux/cpu.h> 20#include <linux/cpu.h>
20#include <linux/errno.h> 21#include <linux/errno.h>
21#include <linux/sched.h> 22#include <linux/sched.h>
@@ -46,7 +47,6 @@
46#include <asm/processor.h> 47#include <asm/processor.h>
47#include <asm/i387.h> 48#include <asm/i387.h>
48#include <asm/mmu_context.h> 49#include <asm/mmu_context.h>
49#include <asm/pda.h>
50#include <asm/prctl.h> 50#include <asm/prctl.h>
51#include <asm/desc.h> 51#include <asm/desc.h>
52#include <asm/proto.h> 52#include <asm/proto.h>
@@ -117,6 +117,17 @@ static inline void play_dead(void)
117void cpu_idle(void) 117void cpu_idle(void)
118{ 118{
119 current_thread_info()->status |= TS_POLLING; 119 current_thread_info()->status |= TS_POLLING;
120
121 /*
122 * If we're the non-boot CPU, nothing set the PDA stack
123 * canary up for us - and if we are the boot CPU we have
124 * a 0 stack canary. This is a good place for updating
125 * it, as we wont ever return from this function (so the
126 * invalid canaries already on the stack wont ever
127 * trigger):
128 */
129 boot_init_stack_canary();
130
120 /* endless idle loop with no priority at all */ 131 /* endless idle loop with no priority at all */
121 while (1) { 132 while (1) {
122 tick_nohz_stop_sched_tick(1); 133 tick_nohz_stop_sched_tick(1);
@@ -626,14 +637,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
626 percpu_write(kernel_stack, 637 percpu_write(kernel_stack,
627 (unsigned long)task_stack_page(next_p) + 638 (unsigned long)task_stack_page(next_p) +
628 THREAD_SIZE - KERNEL_STACK_OFFSET); 639 THREAD_SIZE - KERNEL_STACK_OFFSET);
629#ifdef CONFIG_CC_STACKPROTECTOR
630 write_pda(stack_canary, next_p->stack_canary);
631 /*
632 * Build time only check to make sure the stack_canary is at
633 * offset 40 in the pda; this is a gcc ABI requirement
634 */
635 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
636#endif
637 640
638 /* 641 /*
639 * Now maybe reload the debug registers and handle I/O bitmaps 642 * Now maybe reload the debug registers and handle I/O bitmaps
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index efbafbbff584..90b8e154bb53 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -77,30 +77,6 @@ static void __init setup_node_to_cpumask_map(void);
77static inline void setup_node_to_cpumask_map(void) { } 77static inline void setup_node_to_cpumask_map(void) { }
78#endif 78#endif
79 79
80/*
81 * Define load_pda_offset() and per-cpu __pda for x86_64.
82 * load_pda_offset() is responsible for loading the offset of pda into
83 * %gs.
84 *
85 * On SMP, pda offset also duals as percpu base address and thus it
86 * should be at the start of per-cpu area. To achieve this, it's
87 * preallocated in vmlinux_64.lds.S directly instead of using
88 * DEFINE_PER_CPU().
89 */
90#ifdef CONFIG_X86_64
91void __cpuinit load_pda_offset(int cpu)
92{
93 /* Memory clobbers used to order pda/percpu accesses */
94 mb();
95 wrmsrl(MSR_GS_BASE, cpu_pda(cpu));
96 mb();
97}
98#ifndef CONFIG_SMP
99DEFINE_PER_CPU(struct x8664_pda, __pda);
100#endif
101EXPORT_PER_CPU_SYMBOL(__pda);
102#endif /* CONFIG_SMP && CONFIG_X86_64 */
103
104#ifdef CONFIG_X86_64 80#ifdef CONFIG_X86_64
105 81
106/* correctly size the local cpu masks */ 82/* correctly size the local cpu masks */
@@ -207,15 +183,13 @@ void __init setup_per_cpu_areas(void)
207 per_cpu(cpu_number, cpu) = cpu; 183 per_cpu(cpu_number, cpu) = cpu;
208#ifdef CONFIG_X86_64 184#ifdef CONFIG_X86_64
209 per_cpu(irq_stack_ptr, cpu) = 185 per_cpu(irq_stack_ptr, cpu) =
210 (char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64; 186 per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64;
211 /* 187 /*
212 * CPU0 modified pda in the init data area, reload pda 188 * Up to this point, CPU0 has been using .data.init
213 * offset for CPU0 and clear the area for others. 189 * area. Reload %gs offset for CPU0.
214 */ 190 */
215 if (cpu == 0) 191 if (cpu == 0)
216 load_pda_offset(0); 192 load_gs_base(cpu);
217 else
218 memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu)));
219#endif 193#endif
220 194
221 DBG("PERCPU: cpu %4d %p\n", cpu, ptr); 195 DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 869b98840fd0..def770b57b5a 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -62,6 +62,7 @@
62#include <asm/vmi.h> 62#include <asm/vmi.h>
63#include <asm/genapic.h> 63#include <asm/genapic.h>
64#include <asm/setup.h> 64#include <asm/setup.h>
65#include <asm/uv/uv.h>
65#include <linux/mc146818rtc.h> 66#include <linux/mc146818rtc.h>
66 67
67#include <mach_apic.h> 68#include <mach_apic.h>
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
deleted file mode 100644
index abf0808d6fc4..000000000000
--- a/arch/x86/kernel/tlb_32.c
+++ /dev/null
@@ -1,239 +0,0 @@
1#include <linux/spinlock.h>
2#include <linux/cpu.h>
3#include <linux/interrupt.h>
4
5#include <asm/tlbflush.h>
6
7DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
8 = { &init_mm, 0, };
9
10/* must come after the send_IPI functions above for inlining */
11#include <mach_ipi.h>
12
13/*
14 * Smarter SMP flushing macros.
15 * c/o Linus Torvalds.
16 *
17 * These mean you can really definitely utterly forget about
18 * writing to user space from interrupts. (Its not allowed anyway).
19 *
20 * Optimizations Manfred Spraul <manfred@colorfullife.com>
21 */
22
23static cpumask_var_t flush_cpumask;
24static struct mm_struct *flush_mm;
25static unsigned long flush_va;
26static DEFINE_SPINLOCK(tlbstate_lock);
27
28/*
29 * We cannot call mmdrop() because we are in interrupt context,
30 * instead update mm->cpu_vm_mask.
31 *
32 * We need to reload %cr3 since the page tables may be going
33 * away from under us..
34 */
35void leave_mm(int cpu)
36{
37 BUG_ON(percpu_read(cpu_tlbstate.state) == TLBSTATE_OK);
38 cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
39 load_cr3(swapper_pg_dir);
40}
41EXPORT_SYMBOL_GPL(leave_mm);
42
43/*
44 *
45 * The flush IPI assumes that a thread switch happens in this order:
46 * [cpu0: the cpu that switches]
47 * 1) switch_mm() either 1a) or 1b)
48 * 1a) thread switch to a different mm
49 * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
50 * Stop ipi delivery for the old mm. This is not synchronized with
51 * the other cpus, but smp_invalidate_interrupt ignore flush ipis
52 * for the wrong mm, and in the worst case we perform a superfluous
53 * tlb flush.
54 * 1a2) set cpu_tlbstate to TLBSTATE_OK
55 * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
56 * was in lazy tlb mode.
57 * 1a3) update cpu_tlbstate[].active_mm
58 * Now cpu0 accepts tlb flushes for the new mm.
59 * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
60 * Now the other cpus will send tlb flush ipis.
61 * 1a4) change cr3.
62 * 1b) thread switch without mm change
63 * cpu_tlbstate[].active_mm is correct, cpu0 already handles
64 * flush ipis.
65 * 1b1) set cpu_tlbstate to TLBSTATE_OK
66 * 1b2) test_and_set the cpu bit in cpu_vm_mask.
67 * Atomically set the bit [other cpus will start sending flush ipis],
68 * and test the bit.
69 * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
70 * 2) switch %%esp, ie current
71 *
72 * The interrupt must handle 2 special cases:
73 * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
74 * - the cpu performs speculative tlb reads, i.e. even if the cpu only
75 * runs in kernel space, the cpu could load tlb entries for user space
76 * pages.
77 *
78 * The good news is that cpu_tlbstate is local to each cpu, no
79 * write/read ordering problems.
80 */
81
82/*
83 * TLB flush IPI:
84 *
85 * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
86 * 2) Leave the mm if we are in the lazy tlb mode.
87 */
88
89void smp_invalidate_interrupt(struct pt_regs *regs)
90{
91 unsigned long cpu;
92
93 cpu = get_cpu();
94
95 if (!cpumask_test_cpu(cpu, flush_cpumask))
96 goto out;
97 /*
98 * This was a BUG() but until someone can quote me the
99 * line from the intel manual that guarantees an IPI to
100 * multiple CPUs is retried _only_ on the erroring CPUs
101 * its staying as a return
102 *
103 * BUG();
104 */
105
106 if (flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
107 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
108 if (flush_va == TLB_FLUSH_ALL)
109 local_flush_tlb();
110 else
111 __flush_tlb_one(flush_va);
112 } else
113 leave_mm(cpu);
114 }
115 ack_APIC_irq();
116 smp_mb__before_clear_bit();
117 cpumask_clear_cpu(cpu, flush_cpumask);
118 smp_mb__after_clear_bit();
119out:
120 put_cpu_no_resched();
121 inc_irq_stat(irq_tlb_count);
122}
123
124void native_flush_tlb_others(const struct cpumask *cpumask,
125 struct mm_struct *mm, unsigned long va)
126{
127 /*
128 * - mask must exist :)
129 */
130 BUG_ON(cpumask_empty(cpumask));
131 BUG_ON(!mm);
132
133 /*
134 * i'm not happy about this global shared spinlock in the
135 * MM hot path, but we'll see how contended it is.
136 * AK: x86-64 has a faster method that could be ported.
137 */
138 spin_lock(&tlbstate_lock);
139
140 cpumask_andnot(flush_cpumask, cpumask, cpumask_of(smp_processor_id()));
141#ifdef CONFIG_HOTPLUG_CPU
142 /* If a CPU which we ran on has gone down, OK. */
143 cpumask_and(flush_cpumask, flush_cpumask, cpu_online_mask);
144 if (unlikely(cpumask_empty(flush_cpumask))) {
145 spin_unlock(&tlbstate_lock);
146 return;
147 }
148#endif
149 flush_mm = mm;
150 flush_va = va;
151
152 /*
153 * Make the above memory operations globally visible before
154 * sending the IPI.
155 */
156 smp_mb();
157 /*
158 * We have to send the IPI only to
159 * CPUs affected.
160 */
161 send_IPI_mask(flush_cpumask, INVALIDATE_TLB_VECTOR);
162
163 while (!cpumask_empty(flush_cpumask))
164 /* nothing. lockup detection does not belong here */
165 cpu_relax();
166
167 flush_mm = NULL;
168 flush_va = 0;
169 spin_unlock(&tlbstate_lock);
170}
171
172void flush_tlb_current_task(void)
173{
174 struct mm_struct *mm = current->mm;
175
176 preempt_disable();
177
178 local_flush_tlb();
179 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
180 flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
181 preempt_enable();
182}
183
184void flush_tlb_mm(struct mm_struct *mm)
185{
186
187 preempt_disable();
188
189 if (current->active_mm == mm) {
190 if (current->mm)
191 local_flush_tlb();
192 else
193 leave_mm(smp_processor_id());
194 }
195 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
196 flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
197
198 preempt_enable();
199}
200
201void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
202{
203 struct mm_struct *mm = vma->vm_mm;
204
205 preempt_disable();
206
207 if (current->active_mm == mm) {
208 if (current->mm)
209 __flush_tlb_one(va);
210 else
211 leave_mm(smp_processor_id());
212 }
213
214 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
215 flush_tlb_others(&mm->cpu_vm_mask, mm, va);
216 preempt_enable();
217}
218EXPORT_SYMBOL(flush_tlb_page);
219
220static void do_flush_tlb_all(void *info)
221{
222 unsigned long cpu = smp_processor_id();
223
224 __flush_tlb_all();
225 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
226 leave_mm(cpu);
227}
228
229void flush_tlb_all(void)
230{
231 on_each_cpu(do_flush_tlb_all, NULL, 1);
232}
233
234static int init_flush_cpumask(void)
235{
236 alloc_cpumask_var(&flush_cpumask, GFP_KERNEL);
237 return 0;
238}
239early_initcall(init_flush_cpumask);
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
deleted file mode 100644
index e64a32c48825..000000000000
--- a/arch/x86/kernel/tlb_64.c
+++ /dev/null
@@ -1,294 +0,0 @@
1#include <linux/init.h>
2
3#include <linux/mm.h>
4#include <linux/delay.h>
5#include <linux/spinlock.h>
6#include <linux/smp.h>
7#include <linux/kernel_stat.h>
8#include <linux/mc146818rtc.h>
9#include <linux/interrupt.h>
10
11#include <asm/mtrr.h>
12#include <asm/pgalloc.h>
13#include <asm/tlbflush.h>
14#include <asm/mmu_context.h>
15#include <asm/proto.h>
16#include <asm/apicdef.h>
17#include <asm/idle.h>
18#include <asm/uv/uv_hub.h>
19#include <asm/uv/uv_bau.h>
20
21DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
22 = { &init_mm, 0, };
23
24#include <mach_ipi.h>
25/*
26 * Smarter SMP flushing macros.
27 * c/o Linus Torvalds.
28 *
29 * These mean you can really definitely utterly forget about
30 * writing to user space from interrupts. (Its not allowed anyway).
31 *
32 * Optimizations Manfred Spraul <manfred@colorfullife.com>
33 *
34 * More scalable flush, from Andi Kleen
35 *
36 * To avoid global state use 8 different call vectors.
37 * Each CPU uses a specific vector to trigger flushes on other
38 * CPUs. Depending on the received vector the target CPUs look into
39 * the right per cpu variable for the flush data.
40 *
41 * With more than 8 CPUs they are hashed to the 8 available
42 * vectors. The limited global vector space forces us to this right now.
43 * In future when interrupts are split into per CPU domains this could be
44 * fixed, at the cost of triggering multiple IPIs in some cases.
45 */
46
47union smp_flush_state {
48 struct {
49 struct mm_struct *flush_mm;
50 unsigned long flush_va;
51 spinlock_t tlbstate_lock;
52 DECLARE_BITMAP(flush_cpumask, NR_CPUS);
53 };
54 char pad[SMP_CACHE_BYTES];
55} ____cacheline_aligned;
56
57/* State is put into the per CPU data section, but padded
58 to a full cache line because other CPUs can access it and we don't
59 want false sharing in the per cpu data segment. */
60static DEFINE_PER_CPU(union smp_flush_state, flush_state);
61
62/*
63 * We cannot call mmdrop() because we are in interrupt context,
64 * instead update mm->cpu_vm_mask.
65 */
66void leave_mm(int cpu)
67{
68 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
69 BUG();
70 cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
71 load_cr3(swapper_pg_dir);
72}
73EXPORT_SYMBOL_GPL(leave_mm);
74
75/*
76 *
77 * The flush IPI assumes that a thread switch happens in this order:
78 * [cpu0: the cpu that switches]
79 * 1) switch_mm() either 1a) or 1b)
80 * 1a) thread switch to a different mm
81 * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
82 * Stop ipi delivery for the old mm. This is not synchronized with
83 * the other cpus, but smp_invalidate_interrupt ignore flush ipis
84 * for the wrong mm, and in the worst case we perform a superfluous
85 * tlb flush.
86 * 1a2) set cpu mmu_state to TLBSTATE_OK
87 * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
88 * was in lazy tlb mode.
89 * 1a3) update cpu active_mm
90 * Now cpu0 accepts tlb flushes for the new mm.
91 * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
92 * Now the other cpus will send tlb flush ipis.
93 * 1a4) change cr3.
94 * 1b) thread switch without mm change
95 * cpu active_mm is correct, cpu0 already handles
96 * flush ipis.
97 * 1b1) set cpu mmu_state to TLBSTATE_OK
98 * 1b2) test_and_set the cpu bit in cpu_vm_mask.
99 * Atomically set the bit [other cpus will start sending flush ipis],
100 * and test the bit.
101 * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
102 * 2) switch %%esp, ie current
103 *
104 * The interrupt must handle 2 special cases:
105 * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
106 * - the cpu performs speculative tlb reads, i.e. even if the cpu only
107 * runs in kernel space, the cpu could load tlb entries for user space
108 * pages.
109 *
110 * The good news is that cpu mmu_state is local to each cpu, no
111 * write/read ordering problems.
112 */
113
114/*
115 * TLB flush IPI:
116 *
117 * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
118 * 2) Leave the mm if we are in the lazy tlb mode.
119 *
120 * Interrupts are disabled.
121 */
122
123asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
124{
125 int cpu;
126 int sender;
127 union smp_flush_state *f;
128
129 cpu = smp_processor_id();
130 /*
131 * orig_rax contains the negated interrupt vector.
132 * Use that to determine where the sender put the data.
133 */
134 sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
135 f = &per_cpu(flush_state, sender);
136
137 if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask)))
138 goto out;
139 /*
140 * This was a BUG() but until someone can quote me the
141 * line from the intel manual that guarantees an IPI to
142 * multiple CPUs is retried _only_ on the erroring CPUs
143 * its staying as a return
144 *
145 * BUG();
146 */
147
148 if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
149 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
150 if (f->flush_va == TLB_FLUSH_ALL)
151 local_flush_tlb();
152 else
153 __flush_tlb_one(f->flush_va);
154 } else
155 leave_mm(cpu);
156 }
157out:
158 ack_APIC_irq();
159 cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask));
160 inc_irq_stat(irq_tlb_count);
161}
162
163static void flush_tlb_others_ipi(const struct cpumask *cpumask,
164 struct mm_struct *mm, unsigned long va)
165{
166 int sender;
167 union smp_flush_state *f;
168
169 /* Caller has disabled preemption */
170 sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
171 f = &per_cpu(flush_state, sender);
172
173 /*
174 * Could avoid this lock when
175 * num_online_cpus() <= NUM_INVALIDATE_TLB_VECTORS, but it is
176 * probably not worth checking this for a cache-hot lock.
177 */
178 spin_lock(&f->tlbstate_lock);
179
180 f->flush_mm = mm;
181 f->flush_va = va;
182 cpumask_andnot(to_cpumask(f->flush_cpumask),
183 cpumask, cpumask_of(smp_processor_id()));
184
185 /*
186 * Make the above memory operations globally visible before
187 * sending the IPI.
188 */
189 smp_mb();
190 /*
191 * We have to send the IPI only to
192 * CPUs affected.
193 */
194 send_IPI_mask(to_cpumask(f->flush_cpumask),
195 INVALIDATE_TLB_VECTOR_START + sender);
196
197 while (!cpumask_empty(to_cpumask(f->flush_cpumask)))
198 cpu_relax();
199
200 f->flush_mm = NULL;
201 f->flush_va = 0;
202 spin_unlock(&f->tlbstate_lock);
203}
204
205void native_flush_tlb_others(const struct cpumask *cpumask,
206 struct mm_struct *mm, unsigned long va)
207{
208 if (is_uv_system()) {
209 /* FIXME: could be an percpu_alloc'd thing */
210 static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
211 struct cpumask *after_uv_flush = &get_cpu_var(flush_tlb_mask);
212
213 cpumask_andnot(after_uv_flush, cpumask,
214 cpumask_of(smp_processor_id()));
215 if (!uv_flush_tlb_others(after_uv_flush, mm, va))
216 flush_tlb_others_ipi(after_uv_flush, mm, va);
217
218 put_cpu_var(flush_tlb_uv_cpumask);
219 return;
220 }
221 flush_tlb_others_ipi(cpumask, mm, va);
222}
223
224static int __cpuinit init_smp_flush(void)
225{
226 int i;
227
228 for_each_possible_cpu(i)
229 spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock);
230
231 return 0;
232}
233core_initcall(init_smp_flush);
234
235void flush_tlb_current_task(void)
236{
237 struct mm_struct *mm = current->mm;
238
239 preempt_disable();
240
241 local_flush_tlb();
242 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
243 flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
244 preempt_enable();
245}
246
247void flush_tlb_mm(struct mm_struct *mm)
248{
249 preempt_disable();
250
251 if (current->active_mm == mm) {
252 if (current->mm)
253 local_flush_tlb();
254 else
255 leave_mm(smp_processor_id());
256 }
257 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
258 flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
259
260 preempt_enable();
261}
262
263void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
264{
265 struct mm_struct *mm = vma->vm_mm;
266
267 preempt_disable();
268
269 if (current->active_mm == mm) {
270 if (current->mm)
271 __flush_tlb_one(va);
272 else
273 leave_mm(smp_processor_id());
274 }
275
276 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
277 flush_tlb_others(&mm->cpu_vm_mask, mm, va);
278
279 preempt_enable();
280}
281
282static void do_flush_tlb_all(void *info)
283{
284 unsigned long cpu = smp_processor_id();
285
286 __flush_tlb_all();
287 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
288 leave_mm(cpu);
289}
290
291void flush_tlb_all(void)
292{
293 on_each_cpu(do_flush_tlb_all, NULL, 1);
294}
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 690dcf1a27d4..aae15dd72604 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -11,6 +11,7 @@
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12 12
13#include <asm/mmu_context.h> 13#include <asm/mmu_context.h>
14#include <asm/uv/uv.h>
14#include <asm/uv/uv_mmrs.h> 15#include <asm/uv/uv_mmrs.h>
15#include <asm/uv/uv_hub.h> 16#include <asm/uv/uv_hub.h>
16#include <asm/uv/uv_bau.h> 17#include <asm/uv/uv_bau.h>
@@ -209,14 +210,15 @@ static int uv_wait_completion(struct bau_desc *bau_desc,
209 * 210 *
210 * Send a broadcast and wait for a broadcast message to complete. 211 * Send a broadcast and wait for a broadcast message to complete.
211 * 212 *
212 * The cpumaskp mask contains the cpus the broadcast was sent to. 213 * The flush_mask contains the cpus the broadcast was sent to.
213 * 214 *
214 * Returns 1 if all remote flushing was done. The mask is zeroed. 215 * Returns NULL if all remote flushing was done. The mask is zeroed.
215 * Returns 0 if some remote flushing remains to be done. The mask will have 216 * Returns @flush_mask if some remote flushing remains to be done. The
216 * some bits still set. 217 * mask will have some bits still set.
217 */ 218 */
218int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, 219const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
219 struct cpumask *cpumaskp) 220 struct bau_desc *bau_desc,
221 struct cpumask *flush_mask)
220{ 222{
221 int completion_status = 0; 223 int completion_status = 0;
222 int right_shift; 224 int right_shift;
@@ -263,59 +265,69 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
263 * Success, so clear the remote cpu's from the mask so we don't 265 * Success, so clear the remote cpu's from the mask so we don't
264 * use the IPI method of shootdown on them. 266 * use the IPI method of shootdown on them.
265 */ 267 */
266 for_each_cpu(bit, cpumaskp) { 268 for_each_cpu(bit, flush_mask) {
267 blade = uv_cpu_to_blade_id(bit); 269 blade = uv_cpu_to_blade_id(bit);
268 if (blade == this_blade) 270 if (blade == this_blade)
269 continue; 271 continue;
270 cpumask_clear_cpu(bit, cpumaskp); 272 cpumask_clear_cpu(bit, flush_mask);
271 } 273 }
272 if (!cpumask_empty(cpumaskp)) 274 if (!cpumask_empty(flush_mask))
273 return 0; 275 return flush_mask;
274 return 1; 276 return NULL;
275} 277}
276 278
277/** 279/**
278 * uv_flush_tlb_others - globally purge translation cache of a virtual 280 * uv_flush_tlb_others - globally purge translation cache of a virtual
279 * address or all TLB's 281 * address or all TLB's
280 * @cpumaskp: mask of all cpu's in which the address is to be removed 282 * @cpumask: mask of all cpu's in which the address is to be removed
281 * @mm: mm_struct containing virtual address range 283 * @mm: mm_struct containing virtual address range
282 * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) 284 * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
285 * @cpu: the current cpu
283 * 286 *
284 * This is the entry point for initiating any UV global TLB shootdown. 287 * This is the entry point for initiating any UV global TLB shootdown.
285 * 288 *
286 * Purges the translation caches of all specified processors of the given 289 * Purges the translation caches of all specified processors of the given
287 * virtual address, or purges all TLB's on specified processors. 290 * virtual address, or purges all TLB's on specified processors.
288 * 291 *
289 * The caller has derived the cpumaskp from the mm_struct and has subtracted 292 * The caller has derived the cpumask from the mm_struct. This function
290 * the local cpu from the mask. This function is called only if there 293 * is called only if there are bits set in the mask. (e.g. flush_tlb_page())
291 * are bits set in the mask. (e.g. flush_tlb_page())
292 * 294 *
293 * The cpumaskp is converted into a nodemask of the nodes containing 295 * The cpumask is converted into a nodemask of the nodes containing
294 * the cpus. 296 * the cpus.
295 * 297 *
296 * Returns 1 if all remote flushing was done. 298 * Note that this function should be called with preemption disabled.
297 * Returns 0 if some remote flushing remains to be done. 299 *
300 * Returns NULL if all remote flushing was done.
301 * Returns pointer to cpumask if some remote flushing remains to be
302 * done. The returned pointer is valid till preemption is re-enabled.
298 */ 303 */
299int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm, 304const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
300 unsigned long va) 305 struct mm_struct *mm,
306 unsigned long va, unsigned int cpu)
301{ 307{
308 static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
309 struct cpumask *flush_mask = &__get_cpu_var(flush_tlb_mask);
302 int i; 310 int i;
303 int bit; 311 int bit;
304 int blade; 312 int blade;
305 int cpu; 313 int uv_cpu;
306 int this_blade; 314 int this_blade;
307 int locals = 0; 315 int locals = 0;
308 struct bau_desc *bau_desc; 316 struct bau_desc *bau_desc;
309 317
310 cpu = uv_blade_processor_id(); 318 WARN_ON(!in_atomic());
319
320 cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
321
322 uv_cpu = uv_blade_processor_id();
311 this_blade = uv_numa_blade_id(); 323 this_blade = uv_numa_blade_id();
312 bau_desc = __get_cpu_var(bau_control).descriptor_base; 324 bau_desc = __get_cpu_var(bau_control).descriptor_base;
313 bau_desc += UV_ITEMS_PER_DESCRIPTOR * cpu; 325 bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu;
314 326
315 bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); 327 bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
316 328
317 i = 0; 329 i = 0;
318 for_each_cpu(bit, cpumaskp) { 330 for_each_cpu(bit, flush_mask) {
319 blade = uv_cpu_to_blade_id(bit); 331 blade = uv_cpu_to_blade_id(bit);
320 BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1)); 332 BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
321 if (blade == this_blade) { 333 if (blade == this_blade) {
@@ -330,17 +342,17 @@ int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm,
330 * no off_node flushing; return status for local node 342 * no off_node flushing; return status for local node
331 */ 343 */
332 if (locals) 344 if (locals)
333 return 0; 345 return flush_mask;
334 else 346 else
335 return 1; 347 return NULL;
336 } 348 }
337 __get_cpu_var(ptcstats).requestor++; 349 __get_cpu_var(ptcstats).requestor++;
338 __get_cpu_var(ptcstats).ntargeted += i; 350 __get_cpu_var(ptcstats).ntargeted += i;
339 351
340 bau_desc->payload.address = va; 352 bau_desc->payload.address = va;
341 bau_desc->payload.sending_cpu = smp_processor_id(); 353 bau_desc->payload.sending_cpu = cpu;
342 354
343 return uv_flush_send_and_wait(cpu, this_blade, bau_desc, cpumaskp); 355 return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask);
344} 356}
345 357
346/* 358/*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 98c2d055284b..ed5aee5f3fcc 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -59,7 +59,6 @@
59#ifdef CONFIG_X86_64 59#ifdef CONFIG_X86_64
60#include <asm/pgalloc.h> 60#include <asm/pgalloc.h>
61#include <asm/proto.h> 61#include <asm/proto.h>
62#include <asm/pda.h>
63#else 62#else
64#include <asm/processor-flags.h> 63#include <asm/processor-flags.h>
65#include <asm/arch_hooks.h> 64#include <asm/arch_hooks.h>
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index a09abb8fb97f..c9740996430a 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -220,8 +220,7 @@ SECTIONS
220 * so that it can be accessed as a percpu variable. 220 * so that it can be accessed as a percpu variable.
221 */ 221 */
222 . = ALIGN(PAGE_SIZE); 222 . = ALIGN(PAGE_SIZE);
223 PERCPU_VADDR_PREALLOC(0, :percpu, pda_size) 223 PERCPU_VADDR(0, :percpu)
224 per_cpu____pda = __per_cpu_start;
225#else 224#else
226 PERCPU(PAGE_SIZE) 225 PERCPU(PAGE_SIZE)
227#endif 226#endif
@@ -262,3 +261,8 @@ SECTIONS
262 */ 261 */
263ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), 262ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
264 "kernel image bigger than KERNEL_IMAGE_SIZE") 263 "kernel image bigger than KERNEL_IMAGE_SIZE")
264
265#ifdef CONFIG_SMP
266ASSERT((per_cpu__irq_stack_union == 0),
267 "irq_stack_union is not at start of per-cpu area");
268#endif