aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/arm/kernel/vmlinux.lds.S1
-rw-r--r--arch/ia64/kernel/vmlinux.lds.S1
-rw-r--r--arch/powerpc/kernel/vmlinux.lds.S1
-rw-r--r--arch/x86/Kconfig23
-rw-r--r--arch/x86/Kconfig.debug1
-rw-r--r--arch/x86/Makefile2
-rw-r--r--arch/x86/ia32/ia32entry.S8
-rw-r--r--arch/x86/include/asm/bitops.h14
-rw-r--r--arch/x86/include/asm/cpu.h21
-rw-r--r--arch/x86/include/asm/cpumask.h28
-rw-r--r--arch/x86/include/asm/current.h24
-rw-r--r--arch/x86/include/asm/genapic_32.h7
-rw-r--r--arch/x86/include/asm/genapic_64.h6
-rw-r--r--arch/x86/include/asm/hardirq_32.h3
-rw-r--r--arch/x86/include/asm/hardirq_64.h24
-rw-r--r--arch/x86/include/asm/io_apic.h26
-rw-r--r--arch/x86/include/asm/irq_regs.h36
-rw-r--r--arch/x86/include/asm/irq_regs_32.h31
-rw-r--r--arch/x86/include/asm/irq_regs_64.h1
-rw-r--r--arch/x86/include/asm/irq_vectors.h36
-rw-r--r--arch/x86/include/asm/mach-default/entry_arch.h18
-rw-r--r--arch/x86/include/asm/mmu_context.h63
-rw-r--r--arch/x86/include/asm/mmu_context_32.h55
-rw-r--r--arch/x86/include/asm/mmu_context_64.h54
-rw-r--r--arch/x86/include/asm/mpspec_def.h23
-rw-r--r--arch/x86/include/asm/page_64.h4
-rw-r--r--arch/x86/include/asm/pda.h137
-rw-r--r--arch/x86/include/asm/percpu.h153
-rw-r--r--arch/x86/include/asm/pgtable_64.h1
-rw-r--r--arch/x86/include/asm/processor.h24
-rw-r--r--arch/x86/include/asm/setup.h1
-rw-r--r--arch/x86/include/asm/smp.h50
-rw-r--r--arch/x86/include/asm/stackprotector.h38
-rw-r--r--arch/x86/include/asm/system.h23
-rw-r--r--arch/x86/include/asm/thread_info.h20
-rw-r--r--arch/x86/include/asm/tlbflush.h9
-rw-r--r--arch/x86/include/asm/topology.h8
-rw-r--r--arch/x86/include/asm/trampoline.h1
-rw-r--r--arch/x86/include/asm/uv/uv.h33
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h2
-rw-r--r--arch/x86/kernel/Makefile3
-rw-r--r--arch/x86/kernel/acpi/boot.c96
-rw-r--r--arch/x86/kernel/acpi/sleep.c1
-rw-r--r--arch/x86/kernel/apic.c26
-rw-r--r--arch/x86/kernel/asm-offsets_64.c11
-rw-r--r--arch/x86/kernel/cpu/common.c114
-rw-r--r--arch/x86/kernel/crash.c2
-rw-r--r--arch/x86/kernel/dumpstack_64.c35
-rw-r--r--arch/x86/kernel/entry_32.S6
-rw-r--r--arch/x86/kernel/entry_64.S41
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c1
-rw-r--r--arch/x86/kernel/head64.c23
-rw-r--r--arch/x86/kernel/head_32.S6
-rw-r--r--arch/x86/kernel/head_64.S36
-rw-r--r--arch/x86/kernel/io_apic.c125
-rw-r--r--arch/x86/kernel/irq.c6
-rw-r--r--arch/x86/kernel/irq_64.c6
-rw-r--r--arch/x86/kernel/irqinit_32.c11
-rw-r--r--arch/x86/kernel/microcode_intel.c10
-rw-r--r--arch/x86/kernel/module_32.c6
-rw-r--r--arch/x86/kernel/module_64.c32
-rw-r--r--arch/x86/kernel/mpparse.c142
-rw-r--r--arch/x86/kernel/msr.c2
-rw-r--r--arch/x86/kernel/nmi.c10
-rw-r--r--arch/x86/kernel/process_32.c5
-rw-r--r--arch/x86/kernel/process_64.c43
-rw-r--r--arch/x86/kernel/reboot.c1
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/setup_percpu.c182
-rw-r--r--arch/x86/kernel/smpboot.c70
-rw-r--r--arch/x86/kernel/smpcommon.c10
-rw-r--r--arch/x86/kernel/tlb.c (renamed from arch/x86/kernel/tlb_64.c)61
-rw-r--r--arch/x86/kernel/tlb_32.c250
-rw-r--r--arch/x86/kernel/tlb_uv.c68
-rw-r--r--arch/x86/kernel/traps.c1
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S9
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S26
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c2
-rw-r--r--arch/x86/mach-voyager/setup.c1
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c6
-rw-r--r--arch/x86/mm/fault.c7
-rw-r--r--arch/x86/mm/init_32.c1
-rw-r--r--arch/x86/mm/pat.c37
-rw-r--r--arch/x86/xen/enlighten.c15
-rw-r--r--arch/x86/xen/irq.c8
-rw-r--r--arch/x86/xen/mmu.c8
-rw-r--r--arch/x86/xen/multicalls.h2
-rw-r--r--arch/x86/xen/smp.c33
-rw-r--r--arch/x86/xen/xen-asm_64.S31
-rw-r--r--drivers/pci/intr_remapping.c1
-rw-r--r--include/asm-generic/bitops/__ffs.h2
-rw-r--r--include/asm-generic/bitops/__fls.h2
-rw-r--r--include/asm-generic/bitops/fls.h2
-rw-r--r--include/asm-generic/bitops/fls64.h4
-rw-r--r--include/asm-generic/percpu.h52
-rw-r--r--include/asm-generic/sections.h2
-rw-r--r--include/asm-generic/vmlinux.lds.h47
-rw-r--r--include/linux/magic.h1
-rw-r--r--include/linux/percpu.h41
-rw-r--r--include/linux/sched.h16
-rw-r--r--include/linux/stackprotector.h16
-rw-r--r--init/main.c7
-rw-r--r--kernel/exit.c5
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/panic.c12
-rw-r--r--kernel/sched.c7
106 files changed, 1286 insertions, 1577 deletions
diff --git a/arch/arm/kernel/vmlinux.lds.S b/arch/arm/kernel/vmlinux.lds.S
index 00216071eaf7..85598f7da407 100644
--- a/arch/arm/kernel/vmlinux.lds.S
+++ b/arch/arm/kernel/vmlinux.lds.S
@@ -65,6 +65,7 @@ SECTIONS
65#endif 65#endif
66 . = ALIGN(4096); 66 . = ALIGN(4096);
67 __per_cpu_start = .; 67 __per_cpu_start = .;
68 *(.data.percpu.page_aligned)
68 *(.data.percpu) 69 *(.data.percpu)
69 *(.data.percpu.shared_aligned) 70 *(.data.percpu.shared_aligned)
70 __per_cpu_end = .; 71 __per_cpu_end = .;
diff --git a/arch/ia64/kernel/vmlinux.lds.S b/arch/ia64/kernel/vmlinux.lds.S
index 10a7d47e8510..f45e4e508eca 100644
--- a/arch/ia64/kernel/vmlinux.lds.S
+++ b/arch/ia64/kernel/vmlinux.lds.S
@@ -219,6 +219,7 @@ SECTIONS
219 .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET) 219 .data.percpu PERCPU_ADDR : AT(__phys_per_cpu_start - LOAD_OFFSET)
220 { 220 {
221 __per_cpu_start = .; 221 __per_cpu_start = .;
222 *(.data.percpu.page_aligned)
222 *(.data.percpu) 223 *(.data.percpu)
223 *(.data.percpu.shared_aligned) 224 *(.data.percpu.shared_aligned)
224 __per_cpu_end = .; 225 __per_cpu_end = .;
diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S
index 47bf15cd2c9e..04e8ecea9b40 100644
--- a/arch/powerpc/kernel/vmlinux.lds.S
+++ b/arch/powerpc/kernel/vmlinux.lds.S
@@ -182,6 +182,7 @@ SECTIONS
182 . = ALIGN(PAGE_SIZE); 182 . = ALIGN(PAGE_SIZE);
183 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { 183 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
184 __per_cpu_start = .; 184 __per_cpu_start = .;
185 *(.data.percpu.page_aligned)
185 *(.data.percpu) 186 *(.data.percpu)
186 *(.data.percpu.shared_aligned) 187 *(.data.percpu.shared_aligned)
187 __per_cpu_end = .; 188 __per_cpu_end = .;
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 73f7fe8fd4d1..ef27aed6ff74 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1340,13 +1340,17 @@ config SECCOMP
1340 1340
1341 If unsure, say Y. Only embedded should say N here. 1341 If unsure, say Y. Only embedded should say N here.
1342 1342
1343config CC_STACKPROTECTOR_ALL
1344 bool
1345
1343config CC_STACKPROTECTOR 1346config CC_STACKPROTECTOR
1344 bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" 1347 bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
1345 depends on X86_64 && EXPERIMENTAL && BROKEN 1348 depends on X86_64
1349 select CC_STACKPROTECTOR_ALL
1346 help 1350 help
1347 This option turns on the -fstack-protector GCC feature. This 1351 This option turns on the -fstack-protector GCC feature. This
1348 feature puts, at the beginning of critical functions, a canary 1352 feature puts, at the beginning of functions, a canary value on
1349 value on the stack just before the return address, and validates 1353 the stack just before the return address, and validates
1350 the value just before actually returning. Stack based buffer 1354 the value just before actually returning. Stack based buffer
1351 overflows (that need to overwrite this return address) now also 1355 overflows (that need to overwrite this return address) now also
1352 overwrite the canary, which gets detected and the attack is then 1356 overwrite the canary, which gets detected and the attack is then
@@ -1354,15 +1358,8 @@ config CC_STACKPROTECTOR
1354 1358
1355 This feature requires gcc version 4.2 or above, or a distribution 1359 This feature requires gcc version 4.2 or above, or a distribution
1356 gcc with the feature backported. Older versions are automatically 1360 gcc with the feature backported. Older versions are automatically
1357 detected and for those versions, this configuration option is ignored. 1361 detected and for those versions, this configuration option is
1358 1362 ignored. (and a warning is printed during bootup)
1359config CC_STACKPROTECTOR_ALL
1360 bool "Use stack-protector for all functions"
1361 depends on CC_STACKPROTECTOR
1362 help
1363 Normally, GCC only inserts the canary value protection for
1364 functions that use large-ish on-stack buffers. By enabling
1365 this option, GCC will be asked to do this for ALL functions.
1366 1363
1367source kernel/Kconfig.hz 1364source kernel/Kconfig.hz
1368 1365
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 10d6cc3fd052..28f111461ca8 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -117,6 +117,7 @@ config DEBUG_RODATA
117config DEBUG_RODATA_TEST 117config DEBUG_RODATA_TEST
118 bool "Testcase for the DEBUG_RODATA feature" 118 bool "Testcase for the DEBUG_RODATA feature"
119 depends on DEBUG_RODATA 119 depends on DEBUG_RODATA
120 default y
120 help 121 help
121 This option enables a testcase for the DEBUG_RODATA 122 This option enables a testcase for the DEBUG_RODATA
122 feature as well as for the change_page_attr() infrastructure. 123 feature as well as for the change_page_attr() infrastructure.
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index d1a47adb5aec..cacee981d166 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -73,7 +73,7 @@ else
73 73
74 stackp := $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh 74 stackp := $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh
75 stackp-$(CONFIG_CC_STACKPROTECTOR) := $(shell $(stackp) \ 75 stackp-$(CONFIG_CC_STACKPROTECTOR) := $(shell $(stackp) \
76 "$(CC)" -fstack-protector ) 76 "$(CC)" "-fstack-protector -DGCC_HAS_SP" )
77 stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(stackp) \ 77 stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(stackp) \
78 "$(CC)" -fstack-protector-all ) 78 "$(CC)" -fstack-protector-all )
79 79
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 256b00b61892..9c79b2477008 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -112,8 +112,8 @@ ENTRY(ia32_sysenter_target)
112 CFI_DEF_CFA rsp,0 112 CFI_DEF_CFA rsp,0
113 CFI_REGISTER rsp,rbp 113 CFI_REGISTER rsp,rbp
114 SWAPGS_UNSAFE_STACK 114 SWAPGS_UNSAFE_STACK
115 movq %gs:pda_kernelstack, %rsp 115 movq PER_CPU_VAR(kernel_stack), %rsp
116 addq $(PDA_STACKOFFSET),%rsp 116 addq $(KERNEL_STACK_OFFSET),%rsp
117 /* 117 /*
118 * No need to follow this irqs on/off section: the syscall 118 * No need to follow this irqs on/off section: the syscall
119 * disabled irqs, here we enable it straight after entry: 119 * disabled irqs, here we enable it straight after entry:
@@ -273,13 +273,13 @@ ENDPROC(ia32_sysenter_target)
273ENTRY(ia32_cstar_target) 273ENTRY(ia32_cstar_target)
274 CFI_STARTPROC32 simple 274 CFI_STARTPROC32 simple
275 CFI_SIGNAL_FRAME 275 CFI_SIGNAL_FRAME
276 CFI_DEF_CFA rsp,PDA_STACKOFFSET 276 CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
277 CFI_REGISTER rip,rcx 277 CFI_REGISTER rip,rcx
278 /*CFI_REGISTER rflags,r11*/ 278 /*CFI_REGISTER rflags,r11*/
279 SWAPGS_UNSAFE_STACK 279 SWAPGS_UNSAFE_STACK
280 movl %esp,%r8d 280 movl %esp,%r8d
281 CFI_REGISTER rsp,r8 281 CFI_REGISTER rsp,r8
282 movq %gs:pda_kernelstack,%rsp 282 movq PER_CPU_VAR(kernel_stack),%rsp
283 /* 283 /*
284 * No need to follow this irqs on/off section: the syscall 284 * No need to follow this irqs on/off section: the syscall
285 * disabled irqs and here we enable it straight after entry: 285 * disabled irqs and here we enable it straight after entry:
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index e02a359d2aa5..02b47a603fc8 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -3,6 +3,9 @@
3 3
4/* 4/*
5 * Copyright 1992, Linus Torvalds. 5 * Copyright 1992, Linus Torvalds.
6 *
7 * Note: inlines with more than a single statement should be marked
8 * __always_inline to avoid problems with older gcc's inlining heuristics.
6 */ 9 */
7 10
8#ifndef _LINUX_BITOPS_H 11#ifndef _LINUX_BITOPS_H
@@ -53,7 +56,8 @@
53 * Note that @nr may be almost arbitrarily large; this function is not 56 * Note that @nr may be almost arbitrarily large; this function is not
54 * restricted to acting on a single-word quantity. 57 * restricted to acting on a single-word quantity.
55 */ 58 */
56static inline void set_bit(unsigned int nr, volatile unsigned long *addr) 59static __always_inline void
60set_bit(unsigned int nr, volatile unsigned long *addr)
57{ 61{
58 if (IS_IMMEDIATE(nr)) { 62 if (IS_IMMEDIATE(nr)) {
59 asm volatile(LOCK_PREFIX "orb %1,%0" 63 asm volatile(LOCK_PREFIX "orb %1,%0"
@@ -90,7 +94,8 @@ static inline void __set_bit(int nr, volatile unsigned long *addr)
90 * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() 94 * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
91 * in order to ensure changes are visible on other processors. 95 * in order to ensure changes are visible on other processors.
92 */ 96 */
93static inline void clear_bit(int nr, volatile unsigned long *addr) 97static __always_inline void
98clear_bit(int nr, volatile unsigned long *addr)
94{ 99{
95 if (IS_IMMEDIATE(nr)) { 100 if (IS_IMMEDIATE(nr)) {
96 asm volatile(LOCK_PREFIX "andb %1,%0" 101 asm volatile(LOCK_PREFIX "andb %1,%0"
@@ -204,7 +209,8 @@ static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
204 * 209 *
205 * This is the same as test_and_set_bit on x86. 210 * This is the same as test_and_set_bit on x86.
206 */ 211 */
207static inline int test_and_set_bit_lock(int nr, volatile unsigned long *addr) 212static __always_inline int
213test_and_set_bit_lock(int nr, volatile unsigned long *addr)
208{ 214{
209 return test_and_set_bit(nr, addr); 215 return test_and_set_bit(nr, addr);
210} 216}
@@ -300,7 +306,7 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
300 return oldbit; 306 return oldbit;
301} 307}
302 308
303static inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr) 309static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr)
304{ 310{
305 return ((1UL << (nr % BITS_PER_LONG)) & 311 return ((1UL << (nr % BITS_PER_LONG)) &
306 (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; 312 (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0;
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index bae482df6039..f03b23e32864 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -7,6 +7,20 @@
7#include <linux/nodemask.h> 7#include <linux/nodemask.h>
8#include <linux/percpu.h> 8#include <linux/percpu.h>
9 9
10#ifdef CONFIG_SMP
11
12extern void prefill_possible_map(void);
13
14#else /* CONFIG_SMP */
15
16static inline void prefill_possible_map(void) {}
17
18#define cpu_physical_id(cpu) boot_cpu_physical_apicid
19#define safe_smp_processor_id() 0
20#define stack_smp_processor_id() 0
21
22#endif /* CONFIG_SMP */
23
10struct x86_cpu { 24struct x86_cpu {
11 struct cpu cpu; 25 struct cpu cpu;
12}; 26};
@@ -17,4 +31,11 @@ extern void arch_unregister_cpu(int);
17#endif 31#endif
18 32
19DECLARE_PER_CPU(int, cpu_state); 33DECLARE_PER_CPU(int, cpu_state);
34
35#ifdef CONFIG_X86_HAS_BOOT_CPU_ID
36extern unsigned char boot_cpu_id;
37#else
38#define boot_cpu_id 0
39#endif
40
20#endif /* _ASM_X86_CPU_H */ 41#endif /* _ASM_X86_CPU_H */
diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h
new file mode 100644
index 000000000000..26c6dad90479
--- /dev/null
+++ b/arch/x86/include/asm/cpumask.h
@@ -0,0 +1,28 @@
1#ifndef _ASM_X86_CPUMASK_H
2#define _ASM_X86_CPUMASK_H
3#ifndef __ASSEMBLY__
4#include <linux/cpumask.h>
5
6#ifdef CONFIG_X86_64
7
8extern cpumask_var_t cpu_callin_mask;
9extern cpumask_var_t cpu_callout_mask;
10extern cpumask_var_t cpu_initialized_mask;
11extern cpumask_var_t cpu_sibling_setup_mask;
12
13#else /* CONFIG_X86_32 */
14
15extern cpumask_t cpu_callin_map;
16extern cpumask_t cpu_callout_map;
17extern cpumask_t cpu_initialized;
18extern cpumask_t cpu_sibling_setup_map;
19
20#define cpu_callin_mask ((struct cpumask *)&cpu_callin_map)
21#define cpu_callout_mask ((struct cpumask *)&cpu_callout_map)
22#define cpu_initialized_mask ((struct cpumask *)&cpu_initialized)
23#define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map)
24
25#endif /* CONFIG_X86_32 */
26
27#endif /* __ASSEMBLY__ */
28#endif /* _ASM_X86_CPUMASK_H */
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index 0930b4f8d672..c68c361697e1 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -1,39 +1,21 @@
1#ifndef _ASM_X86_CURRENT_H 1#ifndef _ASM_X86_CURRENT_H
2#define _ASM_X86_CURRENT_H 2#define _ASM_X86_CURRENT_H
3 3
4#ifdef CONFIG_X86_32
5#include <linux/compiler.h> 4#include <linux/compiler.h>
6#include <asm/percpu.h> 5#include <asm/percpu.h>
7 6
7#ifndef __ASSEMBLY__
8struct task_struct; 8struct task_struct;
9 9
10DECLARE_PER_CPU(struct task_struct *, current_task); 10DECLARE_PER_CPU(struct task_struct *, current_task);
11static __always_inline struct task_struct *get_current(void)
12{
13 return x86_read_percpu(current_task);
14}
15
16#else /* X86_32 */
17
18#ifndef __ASSEMBLY__
19#include <asm/pda.h>
20
21struct task_struct;
22 11
23static __always_inline struct task_struct *get_current(void) 12static __always_inline struct task_struct *get_current(void)
24{ 13{
25 return read_pda(pcurrent); 14 return percpu_read(current_task);
26} 15}
27 16
28#else /* __ASSEMBLY__ */ 17#define current get_current()
29
30#include <asm/asm-offsets.h>
31#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg
32 18
33#endif /* __ASSEMBLY__ */ 19#endif /* __ASSEMBLY__ */
34 20
35#endif /* X86_32 */
36
37#define current get_current()
38
39#endif /* _ASM_X86_CURRENT_H */ 21#endif /* _ASM_X86_CURRENT_H */
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index 2c05b737ee22..4334502d3664 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -138,11 +138,4 @@ struct genapic {
138extern struct genapic *genapic; 138extern struct genapic *genapic;
139extern void es7000_update_genapic_to_cluster(void); 139extern void es7000_update_genapic_to_cluster(void);
140 140
141enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
142#define get_uv_system_type() UV_NONE
143#define is_uv_system() 0
144#define uv_wakeup_secondary(a, b) 1
145#define uv_system_init() do {} while (0)
146
147
148#endif /* _ASM_X86_GENAPIC_32_H */ 141#endif /* _ASM_X86_GENAPIC_32_H */
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h
index adf32fb56aa6..7bb092c59055 100644
--- a/arch/x86/include/asm/genapic_64.h
+++ b/arch/x86/include/asm/genapic_64.h
@@ -51,15 +51,9 @@ extern struct genapic apic_x2apic_phys;
51extern int acpi_madt_oem_check(char *, char *); 51extern int acpi_madt_oem_check(char *, char *);
52 52
53extern void apic_send_IPI_self(int vector); 53extern void apic_send_IPI_self(int vector);
54enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
55extern enum uv_system_type get_uv_system_type(void);
56extern int is_uv_system(void);
57 54
58extern struct genapic apic_x2apic_uv_x; 55extern struct genapic apic_x2apic_uv_x;
59DECLARE_PER_CPU(int, x2apic_extra_bits); 56DECLARE_PER_CPU(int, x2apic_extra_bits);
60extern void uv_cpu_init(void);
61extern void uv_system_init(void);
62extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
63 57
64extern void setup_apic_routing(void); 58extern void setup_apic_routing(void);
65 59
diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h
index cf7954d1405f..d4b5d731073f 100644
--- a/arch/x86/include/asm/hardirq_32.h
+++ b/arch/x86/include/asm/hardirq_32.h
@@ -19,6 +19,9 @@ typedef struct {
19 19
20DECLARE_PER_CPU(irq_cpustat_t, irq_stat); 20DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
21 21
22/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
23#define MAX_HARDIRQS_PER_CPU NR_VECTORS
24
22#define __ARCH_IRQ_STAT 25#define __ARCH_IRQ_STAT
23#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member) 26#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member)
24 27
diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h
index b5a6b5d56704..a65bab20f6ce 100644
--- a/arch/x86/include/asm/hardirq_64.h
+++ b/arch/x86/include/asm/hardirq_64.h
@@ -3,22 +3,36 @@
3 3
4#include <linux/threads.h> 4#include <linux/threads.h>
5#include <linux/irq.h> 5#include <linux/irq.h>
6#include <asm/pda.h>
7#include <asm/apic.h> 6#include <asm/apic.h>
8 7
8typedef struct {
9 unsigned int __softirq_pending;
10 unsigned int __nmi_count; /* arch dependent */
11 unsigned int apic_timer_irqs; /* arch dependent */
12 unsigned int irq0_irqs;
13 unsigned int irq_resched_count;
14 unsigned int irq_call_count;
15 unsigned int irq_tlb_count;
16 unsigned int irq_thermal_count;
17 unsigned int irq_spurious_count;
18 unsigned int irq_threshold_count;
19} ____cacheline_aligned irq_cpustat_t;
20
21DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
22
9/* We can have at most NR_VECTORS irqs routed to a cpu at a time */ 23/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
10#define MAX_HARDIRQS_PER_CPU NR_VECTORS 24#define MAX_HARDIRQS_PER_CPU NR_VECTORS
11 25
12#define __ARCH_IRQ_STAT 1 26#define __ARCH_IRQ_STAT 1
13 27
14#define inc_irq_stat(member) add_pda(member, 1) 28#define inc_irq_stat(member) percpu_add(irq_stat.member, 1)
15 29
16#define local_softirq_pending() read_pda(__softirq_pending) 30#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending)
17 31
18#define __ARCH_SET_SOFTIRQ_PENDING 1 32#define __ARCH_SET_SOFTIRQ_PENDING 1
19 33
20#define set_softirq_pending(x) write_pda(__softirq_pending, (x)) 34#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x))
21#define or_softirq_pending(x) or_pda(__softirq_pending, (x)) 35#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x))
22 36
23extern void ack_bad_irq(unsigned int irq); 37extern void ack_bad_irq(unsigned int irq);
24 38
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 7a1f44ac1f17..08ec793aa043 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -114,38 +114,16 @@ struct IR_IO_APIC_route_entry {
114extern int nr_ioapics; 114extern int nr_ioapics;
115extern int nr_ioapic_registers[MAX_IO_APICS]; 115extern int nr_ioapic_registers[MAX_IO_APICS];
116 116
117/*
118 * MP-BIOS irq configuration table structures:
119 */
120
121#define MP_MAX_IOAPIC_PIN 127 117#define MP_MAX_IOAPIC_PIN 127
122 118
123struct mp_config_ioapic {
124 unsigned long mp_apicaddr;
125 unsigned int mp_apicid;
126 unsigned char mp_type;
127 unsigned char mp_apicver;
128 unsigned char mp_flags;
129};
130
131struct mp_config_intsrc {
132 unsigned int mp_dstapic;
133 unsigned char mp_type;
134 unsigned char mp_irqtype;
135 unsigned short mp_irqflag;
136 unsigned char mp_srcbus;
137 unsigned char mp_srcbusirq;
138 unsigned char mp_dstirq;
139};
140
141/* I/O APIC entries */ 119/* I/O APIC entries */
142extern struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; 120extern struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
143 121
144/* # of MP IRQ source entries */ 122/* # of MP IRQ source entries */
145extern int mp_irq_entries; 123extern int mp_irq_entries;
146 124
147/* MP IRQ source entries */ 125/* MP IRQ source entries */
148extern struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; 126extern struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
149 127
150/* non-0 if default (table-less) MP configuration */ 128/* non-0 if default (table-less) MP configuration */
151extern int mpc_default_type; 129extern int mpc_default_type;
diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h
index 89c898ab298b..77843225b7ea 100644
--- a/arch/x86/include/asm/irq_regs.h
+++ b/arch/x86/include/asm/irq_regs.h
@@ -1,5 +1,31 @@
1#ifdef CONFIG_X86_32 1/*
2# include "irq_regs_32.h" 2 * Per-cpu current frame pointer - the location of the last exception frame on
3#else 3 * the stack, stored in the per-cpu area.
4# include "irq_regs_64.h" 4 *
5#endif 5 * Jeremy Fitzhardinge <jeremy@goop.org>
6 */
7#ifndef _ASM_X86_IRQ_REGS_H
8#define _ASM_X86_IRQ_REGS_H
9
10#include <asm/percpu.h>
11
12#define ARCH_HAS_OWN_IRQ_REGS
13
14DECLARE_PER_CPU(struct pt_regs *, irq_regs);
15
16static inline struct pt_regs *get_irq_regs(void)
17{
18 return percpu_read(irq_regs);
19}
20
21static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
22{
23 struct pt_regs *old_regs;
24
25 old_regs = get_irq_regs();
26 percpu_write(irq_regs, new_regs);
27
28 return old_regs;
29}
30
31#endif /* _ASM_X86_IRQ_REGS_32_H */
diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h
deleted file mode 100644
index 86afd7473457..000000000000
--- a/arch/x86/include/asm/irq_regs_32.h
+++ /dev/null
@@ -1,31 +0,0 @@
1/*
2 * Per-cpu current frame pointer - the location of the last exception frame on
3 * the stack, stored in the per-cpu area.
4 *
5 * Jeremy Fitzhardinge <jeremy@goop.org>
6 */
7#ifndef _ASM_X86_IRQ_REGS_32_H
8#define _ASM_X86_IRQ_REGS_32_H
9
10#include <asm/percpu.h>
11
12#define ARCH_HAS_OWN_IRQ_REGS
13
14DECLARE_PER_CPU(struct pt_regs *, irq_regs);
15
16static inline struct pt_regs *get_irq_regs(void)
17{
18 return x86_read_percpu(irq_regs);
19}
20
21static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
22{
23 struct pt_regs *old_regs;
24
25 old_regs = get_irq_regs();
26 x86_write_percpu(irq_regs, new_regs);
27
28 return old_regs;
29}
30
31#endif /* _ASM_X86_IRQ_REGS_32_H */
diff --git a/arch/x86/include/asm/irq_regs_64.h b/arch/x86/include/asm/irq_regs_64.h
deleted file mode 100644
index 3dd9c0b70270..000000000000
--- a/arch/x86/include/asm/irq_regs_64.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/irq_regs.h>
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index a16a2ab2b429..9a83a10a5d51 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -49,31 +49,33 @@
49 * some of the following vectors are 'rare', they are merged 49 * some of the following vectors are 'rare', they are merged
50 * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. 50 * into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
51 * TLB, reschedule and local APIC vectors are performance-critical. 51 * TLB, reschedule and local APIC vectors are performance-critical.
52 *
53 * Vectors 0xf0-0xfa are free (reserved for future Linux use).
54 */ 52 */
55#ifdef CONFIG_X86_32 53#ifdef CONFIG_X86_32
56 54
57# define SPURIOUS_APIC_VECTOR 0xff 55# define SPURIOUS_APIC_VECTOR 0xff
58# define ERROR_APIC_VECTOR 0xfe 56# define ERROR_APIC_VECTOR 0xfe
59# define INVALIDATE_TLB_VECTOR 0xfd 57# define RESCHEDULE_VECTOR 0xfd
60# define RESCHEDULE_VECTOR 0xfc 58# define CALL_FUNCTION_VECTOR 0xfc
61# define CALL_FUNCTION_VECTOR 0xfb 59# define CALL_FUNCTION_SINGLE_VECTOR 0xfb
62# define CALL_FUNCTION_SINGLE_VECTOR 0xfa 60# define THERMAL_APIC_VECTOR 0xfa
63# define THERMAL_APIC_VECTOR 0xf0 61/* 0xf8 - 0xf9 : free */
62# define INVALIDATE_TLB_VECTOR_END 0xf7
63# define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */
64
65# define NUM_INVALIDATE_TLB_VECTORS 8
64 66
65#else 67#else
66 68
67#define SPURIOUS_APIC_VECTOR 0xff 69# define SPURIOUS_APIC_VECTOR 0xff
68#define ERROR_APIC_VECTOR 0xfe 70# define ERROR_APIC_VECTOR 0xfe
69#define RESCHEDULE_VECTOR 0xfd 71# define RESCHEDULE_VECTOR 0xfd
70#define CALL_FUNCTION_VECTOR 0xfc 72# define CALL_FUNCTION_VECTOR 0xfc
71#define CALL_FUNCTION_SINGLE_VECTOR 0xfb 73# define CALL_FUNCTION_SINGLE_VECTOR 0xfb
72#define THERMAL_APIC_VECTOR 0xfa 74# define THERMAL_APIC_VECTOR 0xfa
73#define THRESHOLD_APIC_VECTOR 0xf9 75# define THRESHOLD_APIC_VECTOR 0xf9
74#define UV_BAU_MESSAGE 0xf8 76# define UV_BAU_MESSAGE 0xf8
75#define INVALIDATE_TLB_VECTOR_END 0xf7 77# define INVALIDATE_TLB_VECTOR_END 0xf7
76#define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ 78# define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */
77 79
78#define NUM_INVALIDATE_TLB_VECTORS 8 80#define NUM_INVALIDATE_TLB_VECTORS 8
79 81
diff --git a/arch/x86/include/asm/mach-default/entry_arch.h b/arch/x86/include/asm/mach-default/entry_arch.h
index 6b1add8e31dd..6fa399ad1de2 100644
--- a/arch/x86/include/asm/mach-default/entry_arch.h
+++ b/arch/x86/include/asm/mach-default/entry_arch.h
@@ -11,10 +11,26 @@
11 */ 11 */
12#ifdef CONFIG_X86_SMP 12#ifdef CONFIG_X86_SMP
13BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) 13BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
14BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
15BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) 14BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
16BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) 15BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
17BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) 16BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
17
18BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0,
19 smp_invalidate_interrupt)
20BUILD_INTERRUPT3(invalidate_interrupt1,INVALIDATE_TLB_VECTOR_START+1,
21 smp_invalidate_interrupt)
22BUILD_INTERRUPT3(invalidate_interrupt2,INVALIDATE_TLB_VECTOR_START+2,
23 smp_invalidate_interrupt)
24BUILD_INTERRUPT3(invalidate_interrupt3,INVALIDATE_TLB_VECTOR_START+3,
25 smp_invalidate_interrupt)
26BUILD_INTERRUPT3(invalidate_interrupt4,INVALIDATE_TLB_VECTOR_START+4,
27 smp_invalidate_interrupt)
28BUILD_INTERRUPT3(invalidate_interrupt5,INVALIDATE_TLB_VECTOR_START+5,
29 smp_invalidate_interrupt)
30BUILD_INTERRUPT3(invalidate_interrupt6,INVALIDATE_TLB_VECTOR_START+6,
31 smp_invalidate_interrupt)
32BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7,
33 smp_invalidate_interrupt)
18#endif 34#endif
19 35
20/* 36/*
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 8aeeb3fd73db..52948df9cd1d 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -21,11 +21,54 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
21int init_new_context(struct task_struct *tsk, struct mm_struct *mm); 21int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
22void destroy_context(struct mm_struct *mm); 22void destroy_context(struct mm_struct *mm);
23 23
24#ifdef CONFIG_X86_32 24
25# include "mmu_context_32.h" 25static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
26#else 26{
27# include "mmu_context_64.h" 27#ifdef CONFIG_SMP
28 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
29 percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
30#endif
31}
32
33static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
34 struct task_struct *tsk)
35{
36 unsigned cpu = smp_processor_id();
37
38 if (likely(prev != next)) {
39 /* stop flush ipis for the previous mm */
40 cpu_clear(cpu, prev->cpu_vm_mask);
41#ifdef CONFIG_SMP
42 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
43 percpu_write(cpu_tlbstate.active_mm, next);
28#endif 44#endif
45 cpu_set(cpu, next->cpu_vm_mask);
46
47 /* Re-load page tables */
48 load_cr3(next->pgd);
49
50 /*
51 * load the LDT, if the LDT is different:
52 */
53 if (unlikely(prev->context.ldt != next->context.ldt))
54 load_LDT_nolock(&next->context);
55 }
56#ifdef CONFIG_SMP
57 else {
58 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
59 BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
60
61 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
62 /* We were in lazy tlb mode and leave_mm disabled
63 * tlb flush IPI delivery. We must reload CR3
64 * to make sure to use no freed page tables.
65 */
66 load_cr3(next->pgd);
67 load_LDT_nolock(&next->context);
68 }
69 }
70#endif
71}
29 72
30#define activate_mm(prev, next) \ 73#define activate_mm(prev, next) \
31do { \ 74do { \
@@ -33,5 +76,17 @@ do { \
33 switch_mm((prev), (next), NULL); \ 76 switch_mm((prev), (next), NULL); \
34} while (0); 77} while (0);
35 78
79#ifdef CONFIG_X86_32
80#define deactivate_mm(tsk, mm) \
81do { \
82 loadsegment(gs, 0); \
83} while (0)
84#else
85#define deactivate_mm(tsk, mm) \
86do { \
87 load_gs_index(0); \
88 loadsegment(fs, 0); \
89} while (0)
90#endif
36 91
37#endif /* _ASM_X86_MMU_CONTEXT_H */ 92#endif /* _ASM_X86_MMU_CONTEXT_H */
diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h
deleted file mode 100644
index 7e98ce1d2c0e..000000000000
--- a/arch/x86/include/asm/mmu_context_32.h
+++ /dev/null
@@ -1,55 +0,0 @@
1#ifndef _ASM_X86_MMU_CONTEXT_32_H
2#define _ASM_X86_MMU_CONTEXT_32_H
3
4static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
5{
6#ifdef CONFIG_SMP
7 if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK)
8 x86_write_percpu(cpu_tlbstate.state, TLBSTATE_LAZY);
9#endif
10}
11
12static inline void switch_mm(struct mm_struct *prev,
13 struct mm_struct *next,
14 struct task_struct *tsk)
15{
16 int cpu = smp_processor_id();
17
18 if (likely(prev != next)) {
19 /* stop flush ipis for the previous mm */
20 cpu_clear(cpu, prev->cpu_vm_mask);
21#ifdef CONFIG_SMP
22 x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK);
23 x86_write_percpu(cpu_tlbstate.active_mm, next);
24#endif
25 cpu_set(cpu, next->cpu_vm_mask);
26
27 /* Re-load page tables */
28 load_cr3(next->pgd);
29
30 /*
31 * load the LDT, if the LDT is different:
32 */
33 if (unlikely(prev->context.ldt != next->context.ldt))
34 load_LDT_nolock(&next->context);
35 }
36#ifdef CONFIG_SMP
37 else {
38 x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK);
39 BUG_ON(x86_read_percpu(cpu_tlbstate.active_mm) != next);
40
41 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
42 /* We were in lazy tlb mode and leave_mm disabled
43 * tlb flush IPI delivery. We must reload %cr3.
44 */
45 load_cr3(next->pgd);
46 load_LDT_nolock(&next->context);
47 }
48 }
49#endif
50}
51
52#define deactivate_mm(tsk, mm) \
53 asm("movl %0,%%gs": :"r" (0));
54
55#endif /* _ASM_X86_MMU_CONTEXT_32_H */
diff --git a/arch/x86/include/asm/mmu_context_64.h b/arch/x86/include/asm/mmu_context_64.h
deleted file mode 100644
index 677d36e9540a..000000000000
--- a/arch/x86/include/asm/mmu_context_64.h
+++ /dev/null
@@ -1,54 +0,0 @@
1#ifndef _ASM_X86_MMU_CONTEXT_64_H
2#define _ASM_X86_MMU_CONTEXT_64_H
3
4#include <asm/pda.h>
5
6static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
7{
8#ifdef CONFIG_SMP
9 if (read_pda(mmu_state) == TLBSTATE_OK)
10 write_pda(mmu_state, TLBSTATE_LAZY);
11#endif
12}
13
14static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
15 struct task_struct *tsk)
16{
17 unsigned cpu = smp_processor_id();
18 if (likely(prev != next)) {
19 /* stop flush ipis for the previous mm */
20 cpu_clear(cpu, prev->cpu_vm_mask);
21#ifdef CONFIG_SMP
22 write_pda(mmu_state, TLBSTATE_OK);
23 write_pda(active_mm, next);
24#endif
25 cpu_set(cpu, next->cpu_vm_mask);
26 load_cr3(next->pgd);
27
28 if (unlikely(next->context.ldt != prev->context.ldt))
29 load_LDT_nolock(&next->context);
30 }
31#ifdef CONFIG_SMP
32 else {
33 write_pda(mmu_state, TLBSTATE_OK);
34 if (read_pda(active_mm) != next)
35 BUG();
36 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
37 /* We were in lazy tlb mode and leave_mm disabled
38 * tlb flush IPI delivery. We must reload CR3
39 * to make sure to use no freed page tables.
40 */
41 load_cr3(next->pgd);
42 load_LDT_nolock(&next->context);
43 }
44 }
45#endif
46}
47
48#define deactivate_mm(tsk, mm) \
49do { \
50 load_gs_index(0); \
51 asm volatile("movl %0,%%fs"::"r"(0)); \
52} while (0)
53
54#endif /* _ASM_X86_MMU_CONTEXT_64_H */
diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h
index 59568bc4767f..4a7f96d7c188 100644
--- a/arch/x86/include/asm/mpspec_def.h
+++ b/arch/x86/include/asm/mpspec_def.h
@@ -24,17 +24,18 @@
24# endif 24# endif
25#endif 25#endif
26 26
27struct intel_mp_floating { 27/* Intel MP Floating Pointer Structure */
28 char mpf_signature[4]; /* "_MP_" */ 28struct mpf_intel {
29 unsigned int mpf_physptr; /* Configuration table address */ 29 char signature[4]; /* "_MP_" */
30 unsigned char mpf_length; /* Our length (paragraphs) */ 30 unsigned int physptr; /* Configuration table address */
31 unsigned char mpf_specification;/* Specification version */ 31 unsigned char length; /* Our length (paragraphs) */
32 unsigned char mpf_checksum; /* Checksum (makes sum 0) */ 32 unsigned char specification; /* Specification version */
33 unsigned char mpf_feature1; /* Standard or configuration ? */ 33 unsigned char checksum; /* Checksum (makes sum 0) */
34 unsigned char mpf_feature2; /* Bit7 set for IMCR|PIC */ 34 unsigned char feature1; /* Standard or configuration ? */
35 unsigned char mpf_feature3; /* Unused (0) */ 35 unsigned char feature2; /* Bit7 set for IMCR|PIC */
36 unsigned char mpf_feature4; /* Unused (0) */ 36 unsigned char feature3; /* Unused (0) */
37 unsigned char mpf_feature5; /* Unused (0) */ 37 unsigned char feature4; /* Unused (0) */
38 unsigned char feature5; /* Unused (0) */
38}; 39};
39 40
40#define MPC_SIGNATURE "PCMP" 41#define MPC_SIGNATURE "PCMP"
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 5ebca29f44f0..e27fdbe5f9e4 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -13,8 +13,8 @@
13#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1) 13#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
14#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER) 14#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
15 15
16#define IRQSTACK_ORDER 2 16#define IRQ_STACK_ORDER 2
17#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER) 17#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
18 18
19#define STACKFAULT_STACK 1 19#define STACKFAULT_STACK 1
20#define DOUBLEFAULT_STACK 2 20#define DOUBLEFAULT_STACK 2
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
deleted file mode 100644
index 2fbfff88df37..000000000000
--- a/arch/x86/include/asm/pda.h
+++ /dev/null
@@ -1,137 +0,0 @@
1#ifndef _ASM_X86_PDA_H
2#define _ASM_X86_PDA_H
3
4#ifndef __ASSEMBLY__
5#include <linux/stddef.h>
6#include <linux/types.h>
7#include <linux/cache.h>
8#include <asm/page.h>
9
10/* Per processor datastructure. %gs points to it while the kernel runs */
11struct x8664_pda {
12 struct task_struct *pcurrent; /* 0 Current process */
13 unsigned long data_offset; /* 8 Per cpu data offset from linker
14 address */
15 unsigned long kernelstack; /* 16 top of kernel stack for current */
16 unsigned long oldrsp; /* 24 user rsp for system call */
17 int irqcount; /* 32 Irq nesting counter. Starts -1 */
18 unsigned int cpunumber; /* 36 Logical CPU number */
19#ifdef CONFIG_CC_STACKPROTECTOR
20 unsigned long stack_canary; /* 40 stack canary value */
21 /* gcc-ABI: this canary MUST be at
22 offset 40!!! */
23#endif
24 char *irqstackptr;
25 short nodenumber; /* number of current node (32k max) */
26 short in_bootmem; /* pda lives in bootmem */
27 unsigned int __softirq_pending;
28 unsigned int __nmi_count; /* number of NMI on this CPUs */
29 short mmu_state;
30 short isidle;
31 struct mm_struct *active_mm;
32 unsigned apic_timer_irqs;
33 unsigned irq0_irqs;
34 unsigned irq_resched_count;
35 unsigned irq_call_count;
36 unsigned irq_tlb_count;
37 unsigned irq_thermal_count;
38 unsigned irq_threshold_count;
39 unsigned irq_spurious_count;
40} ____cacheline_aligned_in_smp;
41
42extern struct x8664_pda **_cpu_pda;
43extern void pda_init(int);
44
45#define cpu_pda(i) (_cpu_pda[i])
46
47/*
48 * There is no fast way to get the base address of the PDA, all the accesses
49 * have to mention %fs/%gs. So it needs to be done this Torvaldian way.
50 */
51extern void __bad_pda_field(void) __attribute__((noreturn));
52
53/*
54 * proxy_pda doesn't actually exist, but tell gcc it is accessed for
55 * all PDA accesses so it gets read/write dependencies right.
56 */
57extern struct x8664_pda _proxy_pda;
58
59#define pda_offset(field) offsetof(struct x8664_pda, field)
60
61#define pda_to_op(op, field, val) \
62do { \
63 typedef typeof(_proxy_pda.field) T__; \
64 if (0) { T__ tmp__; tmp__ = (val); } /* type checking */ \
65 switch (sizeof(_proxy_pda.field)) { \
66 case 2: \
67 asm(op "w %1,%%gs:%c2" : \
68 "+m" (_proxy_pda.field) : \
69 "ri" ((T__)val), \
70 "i"(pda_offset(field))); \
71 break; \
72 case 4: \
73 asm(op "l %1,%%gs:%c2" : \
74 "+m" (_proxy_pda.field) : \
75 "ri" ((T__)val), \
76 "i" (pda_offset(field))); \
77 break; \
78 case 8: \
79 asm(op "q %1,%%gs:%c2": \
80 "+m" (_proxy_pda.field) : \
81 "ri" ((T__)val), \
82 "i"(pda_offset(field))); \
83 break; \
84 default: \
85 __bad_pda_field(); \
86 } \
87} while (0)
88
89#define pda_from_op(op, field) \
90({ \
91 typeof(_proxy_pda.field) ret__; \
92 switch (sizeof(_proxy_pda.field)) { \
93 case 2: \
94 asm(op "w %%gs:%c1,%0" : \
95 "=r" (ret__) : \
96 "i" (pda_offset(field)), \
97 "m" (_proxy_pda.field)); \
98 break; \
99 case 4: \
100 asm(op "l %%gs:%c1,%0": \
101 "=r" (ret__): \
102 "i" (pda_offset(field)), \
103 "m" (_proxy_pda.field)); \
104 break; \
105 case 8: \
106 asm(op "q %%gs:%c1,%0": \
107 "=r" (ret__) : \
108 "i" (pda_offset(field)), \
109 "m" (_proxy_pda.field)); \
110 break; \
111 default: \
112 __bad_pda_field(); \
113 } \
114 ret__; \
115})
116
117#define read_pda(field) pda_from_op("mov", field)
118#define write_pda(field, val) pda_to_op("mov", field, val)
119#define add_pda(field, val) pda_to_op("add", field, val)
120#define sub_pda(field, val) pda_to_op("sub", field, val)
121#define or_pda(field, val) pda_to_op("or", field, val)
122
123/* This is not atomic against other CPUs -- CPU preemption needs to be off */
124#define test_and_clear_bit_pda(bit, field) \
125({ \
126 int old__; \
127 asm volatile("btr %2,%%gs:%c3\n\tsbbl %0,%0" \
128 : "=r" (old__), "+m" (_proxy_pda.field) \
129 : "dIr" (bit), "i" (pda_offset(field)) : "memory");\
130 old__; \
131})
132
133#endif
134
135#define PDA_STACKOFFSET (5*8)
136
137#endif /* _ASM_X86_PDA_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index ece72053ba63..0b64af4f13ac 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -2,53 +2,12 @@
2#define _ASM_X86_PERCPU_H 2#define _ASM_X86_PERCPU_H
3 3
4#ifdef CONFIG_X86_64 4#ifdef CONFIG_X86_64
5#include <linux/compiler.h> 5#define __percpu_seg gs
6 6#define __percpu_mov_op movq
7/* Same as asm-generic/percpu.h, except that we store the per cpu offset 7#else
8 in the PDA. Longer term the PDA and every per cpu variable 8#define __percpu_seg fs
9 should be just put into a single section and referenced directly 9#define __percpu_mov_op movl
10 from %gs */
11
12#ifdef CONFIG_SMP
13#include <asm/pda.h>
14
15#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
16#define __my_cpu_offset read_pda(data_offset)
17
18#define per_cpu_offset(x) (__per_cpu_offset(x))
19
20#endif 10#endif
21#include <asm-generic/percpu.h>
22
23DECLARE_PER_CPU(struct x8664_pda, pda);
24
25/*
26 * These are supposed to be implemented as a single instruction which
27 * operates on the per-cpu data base segment. x86-64 doesn't have
28 * that yet, so this is a fairly inefficient workaround for the
29 * meantime. The single instruction is atomic with respect to
30 * preemption and interrupts, so we need to explicitly disable
31 * interrupts here to achieve the same effect. However, because it
32 * can be used from within interrupt-disable/enable, we can't actually
33 * disable interrupts; disabling preemption is enough.
34 */
35#define x86_read_percpu(var) \
36 ({ \
37 typeof(per_cpu_var(var)) __tmp; \
38 preempt_disable(); \
39 __tmp = __get_cpu_var(var); \
40 preempt_enable(); \
41 __tmp; \
42 })
43
44#define x86_write_percpu(var, val) \
45 do { \
46 preempt_disable(); \
47 __get_cpu_var(var) = (val); \
48 preempt_enable(); \
49 } while(0)
50
51#else /* CONFIG_X86_64 */
52 11
53#ifdef __ASSEMBLY__ 12#ifdef __ASSEMBLY__
54 13
@@ -65,47 +24,26 @@ DECLARE_PER_CPU(struct x8664_pda, pda);
65 * PER_CPU(cpu_gdt_descr, %ebx) 24 * PER_CPU(cpu_gdt_descr, %ebx)
66 */ 25 */
67#ifdef CONFIG_SMP 26#ifdef CONFIG_SMP
68#define PER_CPU(var, reg) \ 27#define PER_CPU(var, reg) \
69 movl %fs:per_cpu__##this_cpu_off, reg; \ 28 __percpu_mov_op %__percpu_seg:per_cpu__this_cpu_off, reg; \
70 lea per_cpu__##var(reg), reg 29 lea per_cpu__##var(reg), reg
71#define PER_CPU_VAR(var) %fs:per_cpu__##var 30#define PER_CPU_VAR(var) %__percpu_seg:per_cpu__##var
72#else /* ! SMP */ 31#else /* ! SMP */
73#define PER_CPU(var, reg) \ 32#define PER_CPU(var, reg) \
74 movl $per_cpu__##var, reg 33 __percpu_mov_op $per_cpu__##var, reg
75#define PER_CPU_VAR(var) per_cpu__##var 34#define PER_CPU_VAR(var) per_cpu__##var
76#endif /* SMP */ 35#endif /* SMP */
77 36
78#else /* ...!ASSEMBLY */ 37#else /* ...!ASSEMBLY */
79 38
80/* 39#include <linux/stringify.h>
81 * PER_CPU finds an address of a per-cpu variable.
82 *
83 * Args:
84 * var - variable name
85 * cpu - 32bit register containing the current CPU number
86 *
87 * The resulting address is stored in the "cpu" argument.
88 *
89 * Example:
90 * PER_CPU(cpu_gdt_descr, %ebx)
91 */
92#ifdef CONFIG_SMP
93
94#define __my_cpu_offset x86_read_percpu(this_cpu_off)
95 40
96/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */ 41#ifdef CONFIG_SMP
97#define __percpu_seg "%%fs:" 42#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x
98 43#define __my_cpu_offset percpu_read(this_cpu_off)
99#else /* !SMP */ 44#else
100 45#define __percpu_arg(x) "%" #x
101#define __percpu_seg "" 46#endif
102
103#endif /* SMP */
104
105#include <asm-generic/percpu.h>
106
107/* We can use this directly for local CPU (faster). */
108DECLARE_PER_CPU(unsigned long, this_cpu_off);
109 47
110/* For arch-specific code, we can use direct single-insn ops (they 48/* For arch-specific code, we can use direct single-insn ops (they
111 * don't give an lvalue though). */ 49 * don't give an lvalue though). */
@@ -120,20 +58,25 @@ do { \
120 } \ 58 } \
121 switch (sizeof(var)) { \ 59 switch (sizeof(var)) { \
122 case 1: \ 60 case 1: \
123 asm(op "b %1,"__percpu_seg"%0" \ 61 asm(op "b %1,"__percpu_arg(0) \
124 : "+m" (var) \ 62 : "+m" (var) \
125 : "ri" ((T__)val)); \ 63 : "ri" ((T__)val)); \
126 break; \ 64 break; \
127 case 2: \ 65 case 2: \
128 asm(op "w %1,"__percpu_seg"%0" \ 66 asm(op "w %1,"__percpu_arg(0) \
129 : "+m" (var) \ 67 : "+m" (var) \
130 : "ri" ((T__)val)); \ 68 : "ri" ((T__)val)); \
131 break; \ 69 break; \
132 case 4: \ 70 case 4: \
133 asm(op "l %1,"__percpu_seg"%0" \ 71 asm(op "l %1,"__percpu_arg(0) \
134 : "+m" (var) \ 72 : "+m" (var) \
135 : "ri" ((T__)val)); \ 73 : "ri" ((T__)val)); \
136 break; \ 74 break; \
75 case 8: \
76 asm(op "q %1,"__percpu_arg(0) \
77 : "+m" (var) \
78 : "re" ((T__)val)); \
79 break; \
137 default: __bad_percpu_size(); \ 80 default: __bad_percpu_size(); \
138 } \ 81 } \
139} while (0) 82} while (0)
@@ -143,17 +86,22 @@ do { \
143 typeof(var) ret__; \ 86 typeof(var) ret__; \
144 switch (sizeof(var)) { \ 87 switch (sizeof(var)) { \
145 case 1: \ 88 case 1: \
146 asm(op "b "__percpu_seg"%1,%0" \ 89 asm(op "b "__percpu_arg(1)",%0" \
147 : "=r" (ret__) \ 90 : "=r" (ret__) \
148 : "m" (var)); \ 91 : "m" (var)); \
149 break; \ 92 break; \
150 case 2: \ 93 case 2: \
151 asm(op "w "__percpu_seg"%1,%0" \ 94 asm(op "w "__percpu_arg(1)",%0" \
152 : "=r" (ret__) \ 95 : "=r" (ret__) \
153 : "m" (var)); \ 96 : "m" (var)); \
154 break; \ 97 break; \
155 case 4: \ 98 case 4: \
156 asm(op "l "__percpu_seg"%1,%0" \ 99 asm(op "l "__percpu_arg(1)",%0" \
100 : "=r" (ret__) \
101 : "m" (var)); \
102 break; \
103 case 8: \
104 asm(op "q "__percpu_arg(1)",%0" \
157 : "=r" (ret__) \ 105 : "=r" (ret__) \
158 : "m" (var)); \ 106 : "m" (var)); \
159 break; \ 107 break; \
@@ -162,13 +110,30 @@ do { \
162 ret__; \ 110 ret__; \
163}) 111})
164 112
165#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var) 113#define percpu_read(var) percpu_from_op("mov", per_cpu__##var)
166#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu__##var, val) 114#define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val)
167#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val) 115#define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val)
168#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val) 116#define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val)
169#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) 117#define percpu_and(var, val) percpu_to_op("and", per_cpu__##var, val)
118#define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val)
119#define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val)
120
121/* This is not atomic against other CPUs -- CPU preemption needs to be off */
122#define x86_test_and_clear_bit_percpu(bit, var) \
123({ \
124 int old__; \
125 asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0" \
126 : "=r" (old__), "+m" (per_cpu__##var) \
127 : "dIr" (bit)); \
128 old__; \
129})
130
131#include <asm-generic/percpu.h>
132
133/* We can use this directly for local CPU (faster). */
134DECLARE_PER_CPU(unsigned long, this_cpu_off);
135
170#endif /* !__ASSEMBLY__ */ 136#endif /* !__ASSEMBLY__ */
171#endif /* !CONFIG_X86_64 */
172 137
173#ifdef CONFIG_SMP 138#ifdef CONFIG_SMP
174 139
@@ -195,9 +160,9 @@ do { \
195#define early_per_cpu_ptr(_name) (_name##_early_ptr) 160#define early_per_cpu_ptr(_name) (_name##_early_ptr)
196#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx]) 161#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx])
197#define early_per_cpu(_name, _cpu) \ 162#define early_per_cpu(_name, _cpu) \
198 (early_per_cpu_ptr(_name) ? \ 163 *(early_per_cpu_ptr(_name) ? \
199 early_per_cpu_ptr(_name)[_cpu] : \ 164 &early_per_cpu_ptr(_name)[_cpu] : \
200 per_cpu(_name, _cpu)) 165 &per_cpu(_name, _cpu))
201 166
202#else /* !CONFIG_SMP */ 167#else /* !CONFIG_SMP */
203#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ 168#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index ba09289accaa..1df9637dfda3 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -11,7 +11,6 @@
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <linux/bitops.h> 12#include <linux/bitops.h>
13#include <linux/threads.h> 13#include <linux/threads.h>
14#include <asm/pda.h>
15 14
16extern pud_t level3_kernel_pgt[512]; 15extern pud_t level3_kernel_pgt[512];
17extern pud_t level3_ident_pgt[512]; 16extern pud_t level3_ident_pgt[512];
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 091cd8855f2e..48676b943b92 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -378,6 +378,30 @@ union thread_xstate {
378 378
379#ifdef CONFIG_X86_64 379#ifdef CONFIG_X86_64
380DECLARE_PER_CPU(struct orig_ist, orig_ist); 380DECLARE_PER_CPU(struct orig_ist, orig_ist);
381
382union irq_stack_union {
383 char irq_stack[IRQ_STACK_SIZE];
384 /*
385 * GCC hardcodes the stack canary as %gs:40. Since the
386 * irq_stack is the object at %gs:0, we reserve the bottom
387 * 48 bytes of the irq stack for the canary.
388 */
389 struct {
390 char gs_base[40];
391 unsigned long stack_canary;
392 };
393};
394
395DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
396DECLARE_PER_CPU(char *, irq_stack_ptr);
397
398static inline void load_gs_base(int cpu)
399{
400 /* Memory clobbers used to order pda/percpu accesses */
401 mb();
402 wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
403 mb();
404}
381#endif 405#endif
382 406
383extern void print_cpu_info(struct cpuinfo_x86 *); 407extern void print_cpu_info(struct cpuinfo_x86 *);
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index ebe858cdc8a3..536949749bc2 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -100,7 +100,6 @@ extern unsigned long init_pg_tables_start;
100extern unsigned long init_pg_tables_end; 100extern unsigned long init_pg_tables_end;
101 101
102#else 102#else
103void __init x86_64_init_pda(void);
104void __init x86_64_start_kernel(char *real_mode); 103void __init x86_64_start_kernel(char *real_mode);
105void __init x86_64_start_reservations(char *real_mode_data); 104void __init x86_64_start_reservations(char *real_mode_data);
106 105
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 19953df61c52..45ef8a1b9d7c 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -15,34 +15,8 @@
15# include <asm/io_apic.h> 15# include <asm/io_apic.h>
16# endif 16# endif
17#endif 17#endif
18#include <asm/pda.h>
19#include <asm/thread_info.h> 18#include <asm/thread_info.h>
20 19#include <asm/cpumask.h>
21#ifdef CONFIG_X86_64
22
23extern cpumask_var_t cpu_callin_mask;
24extern cpumask_var_t cpu_callout_mask;
25extern cpumask_var_t cpu_initialized_mask;
26extern cpumask_var_t cpu_sibling_setup_mask;
27
28#else /* CONFIG_X86_32 */
29
30extern cpumask_t cpu_callin_map;
31extern cpumask_t cpu_callout_map;
32extern cpumask_t cpu_initialized;
33extern cpumask_t cpu_sibling_setup_map;
34
35#define cpu_callin_mask ((struct cpumask *)&cpu_callin_map)
36#define cpu_callout_mask ((struct cpumask *)&cpu_callout_map)
37#define cpu_initialized_mask ((struct cpumask *)&cpu_initialized)
38#define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map)
39
40#endif /* CONFIG_X86_32 */
41
42extern void (*mtrr_hook)(void);
43extern void zap_low_mappings(void);
44
45extern int __cpuinit get_local_pda(int cpu);
46 20
47extern int smp_num_siblings; 21extern int smp_num_siblings;
48extern unsigned int num_processors; 22extern unsigned int num_processors;
@@ -50,9 +24,7 @@ extern unsigned int num_processors;
50DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); 24DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
51DECLARE_PER_CPU(cpumask_t, cpu_core_map); 25DECLARE_PER_CPU(cpumask_t, cpu_core_map);
52DECLARE_PER_CPU(u16, cpu_llc_id); 26DECLARE_PER_CPU(u16, cpu_llc_id);
53#ifdef CONFIG_X86_32
54DECLARE_PER_CPU(int, cpu_number); 27DECLARE_PER_CPU(int, cpu_number);
55#endif
56 28
57static inline struct cpumask *cpu_sibling_mask(int cpu) 29static inline struct cpumask *cpu_sibling_mask(int cpu)
58{ 30{
@@ -167,8 +139,6 @@ void play_dead_common(void);
167void native_send_call_func_ipi(const struct cpumask *mask); 139void native_send_call_func_ipi(const struct cpumask *mask);
168void native_send_call_func_single_ipi(int cpu); 140void native_send_call_func_single_ipi(int cpu);
169 141
170extern void prefill_possible_map(void);
171
172void smp_store_cpu_info(int id); 142void smp_store_cpu_info(int id);
173#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) 143#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
174 144
@@ -177,10 +147,6 @@ static inline int num_booting_cpus(void)
177{ 147{
178 return cpumask_weight(cpu_callout_mask); 148 return cpumask_weight(cpu_callout_mask);
179} 149}
180#else
181static inline void prefill_possible_map(void)
182{
183}
184#endif /* CONFIG_SMP */ 150#endif /* CONFIG_SMP */
185 151
186extern unsigned disabled_cpus __cpuinitdata; 152extern unsigned disabled_cpus __cpuinitdata;
@@ -191,11 +157,11 @@ extern unsigned disabled_cpus __cpuinitdata;
191 * from the initial startup. We map APIC_BASE very early in page_setup(), 157 * from the initial startup. We map APIC_BASE very early in page_setup(),
192 * so this is correct in the x86 case. 158 * so this is correct in the x86 case.
193 */ 159 */
194#define raw_smp_processor_id() (x86_read_percpu(cpu_number)) 160#define raw_smp_processor_id() (percpu_read(cpu_number))
195extern int safe_smp_processor_id(void); 161extern int safe_smp_processor_id(void);
196 162
197#elif defined(CONFIG_X86_64_SMP) 163#elif defined(CONFIG_X86_64_SMP)
198#define raw_smp_processor_id() read_pda(cpunumber) 164#define raw_smp_processor_id() (percpu_read(cpu_number))
199 165
200#define stack_smp_processor_id() \ 166#define stack_smp_processor_id() \
201({ \ 167({ \
@@ -205,10 +171,6 @@ extern int safe_smp_processor_id(void);
205}) 171})
206#define safe_smp_processor_id() smp_processor_id() 172#define safe_smp_processor_id() smp_processor_id()
207 173
208#else /* !CONFIG_X86_32_SMP && !CONFIG_X86_64_SMP */
209#define cpu_physical_id(cpu) boot_cpu_physical_apicid
210#define safe_smp_processor_id() 0
211#define stack_smp_processor_id() 0
212#endif 174#endif
213 175
214#ifdef CONFIG_X86_LOCAL_APIC 176#ifdef CONFIG_X86_LOCAL_APIC
@@ -251,11 +213,5 @@ static inline int hard_smp_processor_id(void)
251 213
252#endif /* CONFIG_X86_LOCAL_APIC */ 214#endif /* CONFIG_X86_LOCAL_APIC */
253 215
254#ifdef CONFIG_X86_HAS_BOOT_CPU_ID
255extern unsigned char boot_cpu_id;
256#else
257#define boot_cpu_id 0
258#endif
259
260#endif /* __ASSEMBLY__ */ 216#endif /* __ASSEMBLY__ */
261#endif /* _ASM_X86_SMP_H */ 217#endif /* _ASM_X86_SMP_H */
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
new file mode 100644
index 000000000000..36a700acaf2b
--- /dev/null
+++ b/arch/x86/include/asm/stackprotector.h
@@ -0,0 +1,38 @@
1#ifndef _ASM_STACKPROTECTOR_H
2#define _ASM_STACKPROTECTOR_H 1
3
4#include <asm/tsc.h>
5#include <asm/processor.h>
6
7/*
8 * Initialize the stackprotector canary value.
9 *
10 * NOTE: this must only be called from functions that never return,
11 * and it must always be inlined.
12 */
13static __always_inline void boot_init_stack_canary(void)
14{
15 u64 canary;
16 u64 tsc;
17
18 /*
19 * Build time only check to make sure the stack_canary is at
20 * offset 40 in the pda; this is a gcc ABI requirement
21 */
22 BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
23
24 /*
25 * We both use the random pool and the current TSC as a source
26 * of randomness. The TSC only matters for very early init,
27 * there it already has some randomness on most systems. Later
28 * on during the bootup the random pool has true entropy too.
29 */
30 get_random_bytes(&canary, sizeof(canary));
31 tsc = __native_read_tsc();
32 canary += tsc + (tsc << 32UL);
33
34 current->stack_canary = canary;
35 percpu_write(irq_stack_union.stack_canary, canary);
36}
37
38#endif
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 8e626ea33a1a..2fcc70bc85f3 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -86,27 +86,44 @@ do { \
86 , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ 86 , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
87 "r12", "r13", "r14", "r15" 87 "r12", "r13", "r14", "r15"
88 88
89#ifdef CONFIG_CC_STACKPROTECTOR
90#define __switch_canary \
91 "movq %P[task_canary](%%rsi),%%r8\n\t" \
92 "movq %%r8,"__percpu_arg([gs_canary])"\n\t"
93#define __switch_canary_oparam \
94 , [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary))
95#define __switch_canary_iparam \
96 , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
97#else /* CC_STACKPROTECTOR */
98#define __switch_canary
99#define __switch_canary_oparam
100#define __switch_canary_iparam
101#endif /* CC_STACKPROTECTOR */
102
89/* Save restore flags to clear handle leaking NT */ 103/* Save restore flags to clear handle leaking NT */
90#define switch_to(prev, next, last) \ 104#define switch_to(prev, next, last) \
91 asm volatile(SAVE_CONTEXT \ 105 asm volatile(SAVE_CONTEXT \
92 "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ 106 "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
93 "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ 107 "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
94 "call __switch_to\n\t" \ 108 "call __switch_to\n\t" \
95 ".globl thread_return\n" \ 109 ".globl thread_return\n" \
96 "thread_return:\n\t" \ 110 "thread_return:\n\t" \
97 "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ 111 "movq "__percpu_arg([current_task])",%%rsi\n\t" \
112 __switch_canary \
98 "movq %P[thread_info](%%rsi),%%r8\n\t" \ 113 "movq %P[thread_info](%%rsi),%%r8\n\t" \
99 LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ 114 LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \
100 "movq %%rax,%%rdi\n\t" \ 115 "movq %%rax,%%rdi\n\t" \
101 "jc ret_from_fork\n\t" \ 116 "jc ret_from_fork\n\t" \
102 RESTORE_CONTEXT \ 117 RESTORE_CONTEXT \
103 : "=a" (last) \ 118 : "=a" (last) \
119 __switch_canary_oparam \
104 : [next] "S" (next), [prev] "D" (prev), \ 120 : [next] "S" (next), [prev] "D" (prev), \
105 [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ 121 [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
106 [ti_flags] "i" (offsetof(struct thread_info, flags)), \ 122 [ti_flags] "i" (offsetof(struct thread_info, flags)), \
107 [tif_fork] "i" (TIF_FORK), \ 123 [tif_fork] "i" (TIF_FORK), \
108 [thread_info] "i" (offsetof(struct task_struct, stack)), \ 124 [thread_info] "i" (offsetof(struct task_struct, stack)), \
109 [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ 125 [current_task] "m" (per_cpu_var(current_task)) \
126 __switch_canary_iparam \
110 : "memory", "cc" __EXTRA_CLOBBER) 127 : "memory", "cc" __EXTRA_CLOBBER)
111#endif 128#endif
112 129
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 98789647baa9..b46f8ca007b5 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -194,25 +194,21 @@ static inline struct thread_info *current_thread_info(void)
194 194
195#else /* X86_32 */ 195#else /* X86_32 */
196 196
197#include <asm/pda.h> 197#include <asm/percpu.h>
198#define KERNEL_STACK_OFFSET (5*8)
198 199
199/* 200/*
200 * macros/functions for gaining access to the thread information structure 201 * macros/functions for gaining access to the thread information structure
201 * preempt_count needs to be 1 initially, until the scheduler is functional. 202 * preempt_count needs to be 1 initially, until the scheduler is functional.
202 */ 203 */
203#ifndef __ASSEMBLY__ 204#ifndef __ASSEMBLY__
204static inline struct thread_info *current_thread_info(void) 205DECLARE_PER_CPU(unsigned long, kernel_stack);
205{
206 struct thread_info *ti;
207 ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE);
208 return ti;
209}
210 206
211/* do not use in interrupt context */ 207static inline struct thread_info *current_thread_info(void)
212static inline struct thread_info *stack_thread_info(void)
213{ 208{
214 struct thread_info *ti; 209 struct thread_info *ti;
215 asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1))); 210 ti = (void *)(percpu_read(kernel_stack) +
211 KERNEL_STACK_OFFSET - THREAD_SIZE);
216 return ti; 212 return ti;
217} 213}
218 214
@@ -220,8 +216,8 @@ static inline struct thread_info *stack_thread_info(void)
220 216
221/* how to get the thread information struct from ASM */ 217/* how to get the thread information struct from ASM */
222#define GET_THREAD_INFO(reg) \ 218#define GET_THREAD_INFO(reg) \
223 movq %gs:pda_kernelstack,reg ; \ 219 movq PER_CPU_VAR(kernel_stack),reg ; \
224 subq $(THREAD_SIZE-PDA_STACKOFFSET),reg 220 subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg
225 221
226#endif 222#endif
227 223
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index f4e1b550ce61..d3539f998f88 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -148,20 +148,17 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
148#define TLBSTATE_OK 1 148#define TLBSTATE_OK 1
149#define TLBSTATE_LAZY 2 149#define TLBSTATE_LAZY 2
150 150
151#ifdef CONFIG_X86_32
152struct tlb_state { 151struct tlb_state {
153 struct mm_struct *active_mm; 152 struct mm_struct *active_mm;
154 int state; 153 int state;
155 char __cacheline_padding[L1_CACHE_BYTES-8];
156}; 154};
157DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); 155DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
158 156
159void reset_lazy_tlbstate(void);
160#else
161static inline void reset_lazy_tlbstate(void) 157static inline void reset_lazy_tlbstate(void)
162{ 158{
159 percpu_write(cpu_tlbstate.state, 0);
160 percpu_write(cpu_tlbstate.active_mm, &init_mm);
163} 161}
164#endif
165 162
166#endif /* SMP */ 163#endif /* SMP */
167 164
@@ -175,4 +172,6 @@ static inline void flush_tlb_kernel_range(unsigned long start,
175 flush_tlb_all(); 172 flush_tlb_all();
176} 173}
177 174
175extern void zap_low_mappings(void);
176
178#endif /* _ASM_X86_TLBFLUSH_H */ 177#endif /* _ASM_X86_TLBFLUSH_H */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index d0c68e291635..10022ed3a4b6 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -83,7 +83,8 @@ extern cpumask_t *node_to_cpumask_map;
83DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); 83DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
84 84
85/* Returns the number of the current Node. */ 85/* Returns the number of the current Node. */
86#define numa_node_id() read_pda(nodenumber) 86DECLARE_PER_CPU(int, node_number);
87#define numa_node_id() percpu_read(node_number)
87 88
88#ifdef CONFIG_DEBUG_PER_CPU_MAPS 89#ifdef CONFIG_DEBUG_PER_CPU_MAPS
89extern int cpu_to_node(int cpu); 90extern int cpu_to_node(int cpu);
@@ -102,10 +103,7 @@ static inline int cpu_to_node(int cpu)
102/* Same function but used if called before per_cpu areas are setup */ 103/* Same function but used if called before per_cpu areas are setup */
103static inline int early_cpu_to_node(int cpu) 104static inline int early_cpu_to_node(int cpu)
104{ 105{
105 if (early_per_cpu_ptr(x86_cpu_to_node_map)) 106 return early_per_cpu(x86_cpu_to_node_map, cpu);
106 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
107
108 return per_cpu(x86_cpu_to_node_map, cpu);
109} 107}
110 108
111/* Returns a pointer to the cpumask of CPUs on Node 'node'. */ 109/* Returns a pointer to the cpumask of CPUs on Node 'node'. */
diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h
index 780ba0ab94f9..90f06c25221d 100644
--- a/arch/x86/include/asm/trampoline.h
+++ b/arch/x86/include/asm/trampoline.h
@@ -13,6 +13,7 @@ extern unsigned char *trampoline_base;
13 13
14extern unsigned long init_rsp; 14extern unsigned long init_rsp;
15extern unsigned long initial_code; 15extern unsigned long initial_code;
16extern unsigned long initial_gs;
16 17
17#define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE) 18#define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE)
18#define TRAMPOLINE_BASE 0x6000 19#define TRAMPOLINE_BASE 0x6000
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
new file mode 100644
index 000000000000..dce5fe350134
--- /dev/null
+++ b/arch/x86/include/asm/uv/uv.h
@@ -0,0 +1,33 @@
1#ifndef _ASM_X86_UV_UV_H
2#define _ASM_X86_UV_UV_H
3
4enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
5
6#ifdef CONFIG_X86_64
7
8extern enum uv_system_type get_uv_system_type(void);
9extern int is_uv_system(void);
10extern void uv_cpu_init(void);
11extern void uv_system_init(void);
12extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
13extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
14 struct mm_struct *mm,
15 unsigned long va,
16 unsigned int cpu);
17
18#else /* X86_64 */
19
20static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
21static inline int is_uv_system(void) { return 0; }
22static inline void uv_cpu_init(void) { }
23static inline void uv_system_init(void) { }
24static inline int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
25{ return 1; }
26static inline const struct cpumask *
27uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm,
28 unsigned long va, unsigned int cpu)
29{ return cpumask; }
30
31#endif /* X86_64 */
32
33#endif /* _ASM_X86_UV_UV_H */
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 74e6393bfddb..9b0e61bf7a88 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -325,8 +325,6 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
325#define cpubit_isset(cpu, bau_local_cpumask) \ 325#define cpubit_isset(cpu, bau_local_cpumask) \
326 test_bit((cpu), (bau_local_cpumask).bits) 326 test_bit((cpu), (bau_local_cpumask).bits)
327 327
328extern int uv_flush_tlb_others(struct cpumask *,
329 struct mm_struct *, unsigned long);
330extern void uv_bau_message_intr1(void); 328extern void uv_bau_message_intr1(void);
331extern void uv_bau_timeout_intr1(void); 329extern void uv_bau_timeout_intr1(void);
332 330
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index d364df03c1d6..0626a88fbb46 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -23,6 +23,7 @@ nostackp := $(call cc-option, -fno-stack-protector)
23CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) 23CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
24CFLAGS_hpet.o := $(nostackp) 24CFLAGS_hpet.o := $(nostackp)
25CFLAGS_tsc.o := $(nostackp) 25CFLAGS_tsc.o := $(nostackp)
26CFLAGS_paravirt.o := $(nostackp)
26 27
27obj-y := process_$(BITS).o signal.o entry_$(BITS).o 28obj-y := process_$(BITS).o signal.o entry_$(BITS).o
28obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 29obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
@@ -57,7 +58,7 @@ obj-$(CONFIG_PCI) += early-quirks.o
57apm-y := apm_32.o 58apm-y := apm_32.o
58obj-$(CONFIG_APM) += apm.o 59obj-$(CONFIG_APM) += apm.o
59obj-$(CONFIG_X86_SMP) += smp.o 60obj-$(CONFIG_X86_SMP) += smp.o
60obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o 61obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb.o
61obj-$(CONFIG_X86_32_SMP) += smpcommon.o 62obj-$(CONFIG_X86_32_SMP) += smpcommon.o
62obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o 63obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o
63obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o 64obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index d37593c2f438..4cb5964f1499 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -912,8 +912,8 @@ static u8 __init uniq_ioapic_id(u8 id)
912 DECLARE_BITMAP(used, 256); 912 DECLARE_BITMAP(used, 256);
913 bitmap_zero(used, 256); 913 bitmap_zero(used, 256);
914 for (i = 0; i < nr_ioapics; i++) { 914 for (i = 0; i < nr_ioapics; i++) {
915 struct mp_config_ioapic *ia = &mp_ioapics[i]; 915 struct mpc_ioapic *ia = &mp_ioapics[i];
916 __set_bit(ia->mp_apicid, used); 916 __set_bit(ia->apicid, used);
917 } 917 }
918 if (!test_bit(id, used)) 918 if (!test_bit(id, used))
919 return id; 919 return id;
@@ -945,47 +945,47 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
945 945
946 idx = nr_ioapics; 946 idx = nr_ioapics;
947 947
948 mp_ioapics[idx].mp_type = MP_IOAPIC; 948 mp_ioapics[idx].type = MP_IOAPIC;
949 mp_ioapics[idx].mp_flags = MPC_APIC_USABLE; 949 mp_ioapics[idx].flags = MPC_APIC_USABLE;
950 mp_ioapics[idx].mp_apicaddr = address; 950 mp_ioapics[idx].apicaddr = address;
951 951
952 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); 952 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
953 mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id); 953 mp_ioapics[idx].apicid = uniq_ioapic_id(id);
954#ifdef CONFIG_X86_32 954#ifdef CONFIG_X86_32
955 mp_ioapics[idx].mp_apicver = io_apic_get_version(idx); 955 mp_ioapics[idx].apicver = io_apic_get_version(idx);
956#else 956#else
957 mp_ioapics[idx].mp_apicver = 0; 957 mp_ioapics[idx].apicver = 0;
958#endif 958#endif
959 /* 959 /*
960 * Build basic GSI lookup table to facilitate gsi->io_apic lookups 960 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
961 * and to prevent reprogramming of IOAPIC pins (PCI GSIs). 961 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
962 */ 962 */
963 mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid; 963 mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].apicid;
964 mp_ioapic_routing[idx].gsi_base = gsi_base; 964 mp_ioapic_routing[idx].gsi_base = gsi_base;
965 mp_ioapic_routing[idx].gsi_end = gsi_base + 965 mp_ioapic_routing[idx].gsi_end = gsi_base +
966 io_apic_get_redir_entries(idx); 966 io_apic_get_redir_entries(idx);
967 967
968 printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " 968 printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
969 "GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid, 969 "GSI %d-%d\n", idx, mp_ioapics[idx].apicid,
970 mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr, 970 mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr,
971 mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); 971 mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
972 972
973 nr_ioapics++; 973 nr_ioapics++;
974} 974}
975 975
976static void assign_to_mp_irq(struct mp_config_intsrc *m, 976static void assign_to_mp_irq(struct mpc_intsrc *m,
977 struct mp_config_intsrc *mp_irq) 977 struct mpc_intsrc *mp_irq)
978{ 978{
979 memcpy(mp_irq, m, sizeof(struct mp_config_intsrc)); 979 memcpy(mp_irq, m, sizeof(struct mpc_intsrc));
980} 980}
981 981
982static int mp_irq_cmp(struct mp_config_intsrc *mp_irq, 982static int mp_irq_cmp(struct mpc_intsrc *mp_irq,
983 struct mp_config_intsrc *m) 983 struct mpc_intsrc *m)
984{ 984{
985 return memcmp(mp_irq, m, sizeof(struct mp_config_intsrc)); 985 return memcmp(mp_irq, m, sizeof(struct mpc_intsrc));
986} 986}
987 987
988static void save_mp_irq(struct mp_config_intsrc *m) 988static void save_mp_irq(struct mpc_intsrc *m)
989{ 989{
990 int i; 990 int i;
991 991
@@ -1003,7 +1003,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
1003{ 1003{
1004 int ioapic; 1004 int ioapic;
1005 int pin; 1005 int pin;
1006 struct mp_config_intsrc mp_irq; 1006 struct mpc_intsrc mp_irq;
1007 1007
1008 /* 1008 /*
1009 * Convert 'gsi' to 'ioapic.pin'. 1009 * Convert 'gsi' to 'ioapic.pin'.
@@ -1021,13 +1021,13 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
1021 if ((bus_irq == 0) && (trigger == 3)) 1021 if ((bus_irq == 0) && (trigger == 3))
1022 trigger = 1; 1022 trigger = 1;
1023 1023
1024 mp_irq.mp_type = MP_INTSRC; 1024 mp_irq.type = MP_INTSRC;
1025 mp_irq.mp_irqtype = mp_INT; 1025 mp_irq.irqtype = mp_INT;
1026 mp_irq.mp_irqflag = (trigger << 2) | polarity; 1026 mp_irq.irqflag = (trigger << 2) | polarity;
1027 mp_irq.mp_srcbus = MP_ISA_BUS; 1027 mp_irq.srcbus = MP_ISA_BUS;
1028 mp_irq.mp_srcbusirq = bus_irq; /* IRQ */ 1028 mp_irq.srcbusirq = bus_irq; /* IRQ */
1029 mp_irq.mp_dstapic = mp_ioapics[ioapic].mp_apicid; /* APIC ID */ 1029 mp_irq.dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */
1030 mp_irq.mp_dstirq = pin; /* INTIN# */ 1030 mp_irq.dstirq = pin; /* INTIN# */
1031 1031
1032 save_mp_irq(&mp_irq); 1032 save_mp_irq(&mp_irq);
1033} 1033}
@@ -1037,7 +1037,7 @@ void __init mp_config_acpi_legacy_irqs(void)
1037 int i; 1037 int i;
1038 int ioapic; 1038 int ioapic;
1039 unsigned int dstapic; 1039 unsigned int dstapic;
1040 struct mp_config_intsrc mp_irq; 1040 struct mpc_intsrc mp_irq;
1041 1041
1042#if defined (CONFIG_MCA) || defined (CONFIG_EISA) 1042#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
1043 /* 1043 /*
@@ -1062,7 +1062,7 @@ void __init mp_config_acpi_legacy_irqs(void)
1062 ioapic = mp_find_ioapic(0); 1062 ioapic = mp_find_ioapic(0);
1063 if (ioapic < 0) 1063 if (ioapic < 0)
1064 return; 1064 return;
1065 dstapic = mp_ioapics[ioapic].mp_apicid; 1065 dstapic = mp_ioapics[ioapic].apicid;
1066 1066
1067 /* 1067 /*
1068 * Use the default configuration for the IRQs 0-15. Unless 1068 * Use the default configuration for the IRQs 0-15. Unless
@@ -1072,16 +1072,14 @@ void __init mp_config_acpi_legacy_irqs(void)
1072 int idx; 1072 int idx;
1073 1073
1074 for (idx = 0; idx < mp_irq_entries; idx++) { 1074 for (idx = 0; idx < mp_irq_entries; idx++) {
1075 struct mp_config_intsrc *irq = mp_irqs + idx; 1075 struct mpc_intsrc *irq = mp_irqs + idx;
1076 1076
1077 /* Do we already have a mapping for this ISA IRQ? */ 1077 /* Do we already have a mapping for this ISA IRQ? */
1078 if (irq->mp_srcbus == MP_ISA_BUS 1078 if (irq->srcbus == MP_ISA_BUS && irq->srcbusirq == i)
1079 && irq->mp_srcbusirq == i)
1080 break; 1079 break;
1081 1080
1082 /* Do we already have a mapping for this IOAPIC pin */ 1081 /* Do we already have a mapping for this IOAPIC pin */
1083 if (irq->mp_dstapic == dstapic && 1082 if (irq->dstapic == dstapic && irq->dstirq == i)
1084 irq->mp_dstirq == i)
1085 break; 1083 break;
1086 } 1084 }
1087 1085
@@ -1090,13 +1088,13 @@ void __init mp_config_acpi_legacy_irqs(void)
1090 continue; /* IRQ already used */ 1088 continue; /* IRQ already used */
1091 } 1089 }
1092 1090
1093 mp_irq.mp_type = MP_INTSRC; 1091 mp_irq.type = MP_INTSRC;
1094 mp_irq.mp_irqflag = 0; /* Conforming */ 1092 mp_irq.irqflag = 0; /* Conforming */
1095 mp_irq.mp_srcbus = MP_ISA_BUS; 1093 mp_irq.srcbus = MP_ISA_BUS;
1096 mp_irq.mp_dstapic = dstapic; 1094 mp_irq.dstapic = dstapic;
1097 mp_irq.mp_irqtype = mp_INT; 1095 mp_irq.irqtype = mp_INT;
1098 mp_irq.mp_srcbusirq = i; /* Identity mapped */ 1096 mp_irq.srcbusirq = i; /* Identity mapped */
1099 mp_irq.mp_dstirq = i; 1097 mp_irq.dstirq = i;
1100 1098
1101 save_mp_irq(&mp_irq); 1099 save_mp_irq(&mp_irq);
1102 } 1100 }
@@ -1207,22 +1205,22 @@ int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
1207 u32 gsi, int triggering, int polarity) 1205 u32 gsi, int triggering, int polarity)
1208{ 1206{
1209#ifdef CONFIG_X86_MPPARSE 1207#ifdef CONFIG_X86_MPPARSE
1210 struct mp_config_intsrc mp_irq; 1208 struct mpc_intsrc mp_irq;
1211 int ioapic; 1209 int ioapic;
1212 1210
1213 if (!acpi_ioapic) 1211 if (!acpi_ioapic)
1214 return 0; 1212 return 0;
1215 1213
1216 /* print the entry should happen on mptable identically */ 1214 /* print the entry should happen on mptable identically */
1217 mp_irq.mp_type = MP_INTSRC; 1215 mp_irq.type = MP_INTSRC;
1218 mp_irq.mp_irqtype = mp_INT; 1216 mp_irq.irqtype = mp_INT;
1219 mp_irq.mp_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | 1217 mp_irq.irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
1220 (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); 1218 (polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
1221 mp_irq.mp_srcbus = number; 1219 mp_irq.srcbus = number;
1222 mp_irq.mp_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); 1220 mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
1223 ioapic = mp_find_ioapic(gsi); 1221 ioapic = mp_find_ioapic(gsi);
1224 mp_irq.mp_dstapic = mp_ioapic_routing[ioapic].apic_id; 1222 mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id;
1225 mp_irq.mp_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base; 1223 mp_irq.dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base;
1226 1224
1227 save_mp_irq(&mp_irq); 1225 save_mp_irq(&mp_irq);
1228#endif 1226#endif
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 707c1f6f95fa..4abff454c55b 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -101,6 +101,7 @@ int acpi_save_state_mem(void)
101 stack_start.sp = temp_stack + sizeof(temp_stack); 101 stack_start.sp = temp_stack + sizeof(temp_stack);
102 early_gdt_descr.address = 102 early_gdt_descr.address =
103 (unsigned long)get_cpu_gdt_table(smp_processor_id()); 103 (unsigned long)get_cpu_gdt_table(smp_processor_id());
104 initial_gs = per_cpu_offset(smp_processor_id());
104#endif 105#endif
105 initial_code = (unsigned long)wakeup_long64; 106 initial_code = (unsigned long)wakeup_long64;
106 saved_magic = 0x123456789abcdef0; 107 saved_magic = 0x123456789abcdef0;
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index db0998641c58..1df341a528a1 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -895,6 +895,10 @@ void disable_local_APIC(void)
895{ 895{
896 unsigned int value; 896 unsigned int value;
897 897
898 /* APIC hasn't been mapped yet */
899 if (!apic_phys)
900 return;
901
898 clear_local_APIC(); 902 clear_local_APIC();
899 903
900 /* 904 /*
@@ -1126,6 +1130,13 @@ void __cpuinit setup_local_APIC(void)
1126 unsigned int value; 1130 unsigned int value;
1127 int i, j; 1131 int i, j;
1128 1132
1133 if (disable_apic) {
1134#ifdef CONFIG_X86_IO_APIC
1135 disable_ioapic_setup();
1136#endif
1137 return;
1138 }
1139
1129#ifdef CONFIG_X86_32 1140#ifdef CONFIG_X86_32
1130 /* Pound the ESR really hard over the head with a big hammer - mbligh */ 1141 /* Pound the ESR really hard over the head with a big hammer - mbligh */
1131 if (lapic_is_integrated() && esr_disable) { 1142 if (lapic_is_integrated() && esr_disable) {
@@ -1566,11 +1577,11 @@ int apic_version[MAX_APICS];
1566 1577
1567int __init APIC_init_uniprocessor(void) 1578int __init APIC_init_uniprocessor(void)
1568{ 1579{
1569#ifdef CONFIG_X86_64
1570 if (disable_apic) { 1580 if (disable_apic) {
1571 pr_info("Apic disabled\n"); 1581 pr_info("Apic disabled\n");
1572 return -1; 1582 return -1;
1573 } 1583 }
1584#ifdef CONFIG_X86_64
1574 if (!cpu_has_apic) { 1585 if (!cpu_has_apic) {
1575 disable_apic = 1; 1586 disable_apic = 1;
1576 pr_info("Apic disabled by BIOS\n"); 1587 pr_info("Apic disabled by BIOS\n");
@@ -1873,17 +1884,8 @@ void __cpuinit generic_processor_info(int apicid, int version)
1873#endif 1884#endif
1874 1885
1875#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) 1886#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
1876 /* are we being called early in kernel startup? */ 1887 early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
1877 if (early_per_cpu_ptr(x86_cpu_to_apicid)) { 1888 early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1878 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
1879 u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
1880
1881 cpu_to_apicid[cpu] = apicid;
1882 bios_cpu_apicid[cpu] = apicid;
1883 } else {
1884 per_cpu(x86_cpu_to_apicid, cpu) = apicid;
1885 per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1886 }
1887#endif 1889#endif
1888 1890
1889 set_cpu_possible(cpu, true); 1891 set_cpu_possible(cpu, true);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 1d41d3f1edbc..8793ab33e2c1 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -11,7 +11,6 @@
11#include <linux/hardirq.h> 11#include <linux/hardirq.h>
12#include <linux/suspend.h> 12#include <linux/suspend.h>
13#include <linux/kbuild.h> 13#include <linux/kbuild.h>
14#include <asm/pda.h>
15#include <asm/processor.h> 14#include <asm/processor.h>
16#include <asm/segment.h> 15#include <asm/segment.h>
17#include <asm/thread_info.h> 16#include <asm/thread_info.h>
@@ -48,16 +47,6 @@ int main(void)
48#endif 47#endif
49 BLANK(); 48 BLANK();
50#undef ENTRY 49#undef ENTRY
51#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
52 ENTRY(kernelstack);
53 ENTRY(oldrsp);
54 ENTRY(pcurrent);
55 ENTRY(irqcount);
56 ENTRY(cpunumber);
57 ENTRY(irqstackptr);
58 ENTRY(data_offset);
59 BLANK();
60#undef ENTRY
61#ifdef CONFIG_PARAVIRT 50#ifdef CONFIG_PARAVIRT
62 BLANK(); 51 BLANK();
63 OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); 52 OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 83492b1f93b1..99904f288d6a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -21,14 +21,16 @@
21#include <asm/asm.h> 21#include <asm/asm.h>
22#include <asm/numa.h> 22#include <asm/numa.h>
23#include <asm/smp.h> 23#include <asm/smp.h>
24#include <asm/cpu.h>
25#include <asm/cpumask.h>
24#ifdef CONFIG_X86_LOCAL_APIC 26#ifdef CONFIG_X86_LOCAL_APIC
25#include <asm/mpspec.h> 27#include <asm/mpspec.h>
26#include <asm/apic.h> 28#include <asm/apic.h>
27#include <mach_apic.h> 29#include <mach_apic.h>
28#include <asm/genapic.h> 30#include <asm/genapic.h>
31#include <asm/uv/uv.h>
29#endif 32#endif
30 33
31#include <asm/pda.h>
32#include <asm/pgtable.h> 34#include <asm/pgtable.h>
33#include <asm/processor.h> 35#include <asm/processor.h>
34#include <asm/desc.h> 36#include <asm/desc.h>
@@ -62,23 +64,23 @@ cpumask_t cpu_sibling_setup_map;
62 64
63static struct cpu_dev *this_cpu __cpuinitdata; 65static struct cpu_dev *this_cpu __cpuinitdata;
64 66
67DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
65#ifdef CONFIG_X86_64 68#ifdef CONFIG_X86_64
66/* We need valid kernel segments for data and code in long mode too 69 /*
67 * IRET will check the segment types kkeil 2000/10/28 70 * We need valid kernel segments for data and code in long mode too
68 * Also sysret mandates a special GDT layout 71 * IRET will check the segment types kkeil 2000/10/28
69 */ 72 * Also sysret mandates a special GDT layout
70/* The TLS descriptors are currently at a different place compared to i386. 73 *
71 Hopefully nobody expects them at a fixed place (Wine?) */ 74 * The TLS descriptors are currently at a different place compared to i386.
72DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { 75 * Hopefully nobody expects them at a fixed place (Wine?)
76 */
73 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, 77 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
74 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, 78 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
75 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, 79 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
76 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, 80 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
77 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, 81 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
78 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, 82 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
79} };
80#else 83#else
81DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
82 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, 84 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
83 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, 85 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
84 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, 86 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
@@ -110,9 +112,9 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
110 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, 112 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
111 113
112 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, 114 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
113 [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, 115 [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
114} };
115#endif 116#endif
117} };
116EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); 118EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
117 119
118#ifdef CONFIG_X86_32 120#ifdef CONFIG_X86_32
@@ -877,54 +879,26 @@ static __init int setup_disablecpuid(char *arg)
877__setup("clearcpuid=", setup_disablecpuid); 879__setup("clearcpuid=", setup_disablecpuid);
878 880
879#ifdef CONFIG_X86_64 881#ifdef CONFIG_X86_64
880struct x8664_pda **_cpu_pda __read_mostly;
881EXPORT_SYMBOL(_cpu_pda);
882
883struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; 882struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
884 883
885static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; 884DEFINE_PER_CPU_FIRST(union irq_stack_union,
886 885 irq_stack_union) __aligned(PAGE_SIZE);
887void __cpuinit pda_init(int cpu) 886#ifdef CONFIG_SMP
888{ 887DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */
889 struct x8664_pda *pda = cpu_pda(cpu); 888#else
889DEFINE_PER_CPU(char *, irq_stack_ptr) =
890 per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
891#endif
890 892
891 /* Setup up data that may be needed in __get_free_pages early */ 893DEFINE_PER_CPU(unsigned long, kernel_stack) =
892 loadsegment(fs, 0); 894 (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
893 loadsegment(gs, 0); 895EXPORT_PER_CPU_SYMBOL(kernel_stack);
894 /* Memory clobbers used to order PDA accessed */
895 mb();
896 wrmsrl(MSR_GS_BASE, pda);
897 mb();
898
899 pda->cpunumber = cpu;
900 pda->irqcount = -1;
901 pda->kernelstack = (unsigned long)stack_thread_info() -
902 PDA_STACKOFFSET + THREAD_SIZE;
903 pda->active_mm = &init_mm;
904 pda->mmu_state = 0;
905
906 if (cpu == 0) {
907 /* others are initialized in smpboot.c */
908 pda->pcurrent = &init_task;
909 pda->irqstackptr = boot_cpu_stack;
910 pda->irqstackptr += IRQSTACKSIZE - 64;
911 } else {
912 if (!pda->irqstackptr) {
913 pda->irqstackptr = (char *)
914 __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
915 if (!pda->irqstackptr)
916 panic("cannot allocate irqstack for cpu %d",
917 cpu);
918 pda->irqstackptr += IRQSTACKSIZE - 64;
919 }
920 896
921 if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) 897DEFINE_PER_CPU(unsigned int, irq_count) = -1;
922 pda->nodenumber = cpu_to_node(cpu);
923 }
924}
925 898
926static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + 899static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
927 DEBUG_STKSZ] __page_aligned_bss; 900 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
901 __aligned(PAGE_SIZE);
928 902
929extern asmlinkage void ignore_sysret(void); 903extern asmlinkage void ignore_sysret(void);
930 904
@@ -982,15 +956,18 @@ void __cpuinit cpu_init(void)
982 struct tss_struct *t = &per_cpu(init_tss, cpu); 956 struct tss_struct *t = &per_cpu(init_tss, cpu);
983 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); 957 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
984 unsigned long v; 958 unsigned long v;
985 char *estacks = NULL;
986 struct task_struct *me; 959 struct task_struct *me;
987 int i; 960 int i;
988 961
989 /* CPU 0 is initialised in head64.c */ 962 loadsegment(fs, 0);
990 if (cpu != 0) 963 loadsegment(gs, 0);
991 pda_init(cpu); 964 load_gs_base(cpu);
992 else 965
993 estacks = boot_exception_stacks; 966#ifdef CONFIG_NUMA
967 if (cpu != 0 && percpu_read(node_number) == 0 &&
968 cpu_to_node(cpu) != NUMA_NO_NODE)
969 percpu_write(node_number, cpu_to_node(cpu));
970#endif
994 971
995 me = current; 972 me = current;
996 973
@@ -1024,18 +1001,13 @@ void __cpuinit cpu_init(void)
1024 * set up and load the per-CPU TSS 1001 * set up and load the per-CPU TSS
1025 */ 1002 */
1026 if (!orig_ist->ist[0]) { 1003 if (!orig_ist->ist[0]) {
1027 static const unsigned int order[N_EXCEPTION_STACKS] = { 1004 static const unsigned int sizes[N_EXCEPTION_STACKS] = {
1028 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, 1005 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
1029 [DEBUG_STACK - 1] = DEBUG_STACK_ORDER 1006 [DEBUG_STACK - 1] = DEBUG_STKSZ
1030 }; 1007 };
1008 char *estacks = per_cpu(exception_stacks, cpu);
1031 for (v = 0; v < N_EXCEPTION_STACKS; v++) { 1009 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
1032 if (cpu) { 1010 estacks += sizes[v];
1033 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
1034 if (!estacks)
1035 panic("Cannot allocate exception "
1036 "stack %ld %d\n", v, cpu);
1037 }
1038 estacks += PAGE_SIZE << order[v];
1039 orig_ist->ist[v] = t->x86_tss.ist[v] = 1011 orig_ist->ist[v] = t->x86_tss.ist[v] =
1040 (unsigned long)estacks; 1012 (unsigned long)estacks;
1041 } 1013 }
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index c689d19e35ab..11b93cabdf78 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -24,7 +24,7 @@
24#include <asm/apic.h> 24#include <asm/apic.h>
25#include <asm/hpet.h> 25#include <asm/hpet.h>
26#include <linux/kdebug.h> 26#include <linux/kdebug.h>
27#include <asm/smp.h> 27#include <asm/cpu.h>
28#include <asm/reboot.h> 28#include <asm/reboot.h>
29#include <asm/virtext.h> 29#include <asm/virtext.h>
30 30
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index c302d0707048..d35db5993fd6 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -106,7 +106,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
106 const struct stacktrace_ops *ops, void *data) 106 const struct stacktrace_ops *ops, void *data)
107{ 107{
108 const unsigned cpu = get_cpu(); 108 const unsigned cpu = get_cpu();
109 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; 109 unsigned long *irq_stack_end =
110 (unsigned long *)per_cpu(irq_stack_ptr, cpu);
110 unsigned used = 0; 111 unsigned used = 0;
111 struct thread_info *tinfo; 112 struct thread_info *tinfo;
112 int graph = 0; 113 int graph = 0;
@@ -160,23 +161,23 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
160 stack = (unsigned long *) estack_end[-2]; 161 stack = (unsigned long *) estack_end[-2];
161 continue; 162 continue;
162 } 163 }
163 if (irqstack_end) { 164 if (irq_stack_end) {
164 unsigned long *irqstack; 165 unsigned long *irq_stack;
165 irqstack = irqstack_end - 166 irq_stack = irq_stack_end -
166 (IRQSTACKSIZE - 64) / sizeof(*irqstack); 167 (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack);
167 168
168 if (stack >= irqstack && stack < irqstack_end) { 169 if (stack >= irq_stack && stack < irq_stack_end) {
169 if (ops->stack(data, "IRQ") < 0) 170 if (ops->stack(data, "IRQ") < 0)
170 break; 171 break;
171 bp = print_context_stack(tinfo, stack, bp, 172 bp = print_context_stack(tinfo, stack, bp,
172 ops, data, irqstack_end, &graph); 173 ops, data, irq_stack_end, &graph);
173 /* 174 /*
174 * We link to the next stack (which would be 175 * We link to the next stack (which would be
175 * the process stack normally) the last 176 * the process stack normally) the last
176 * pointer (index -1 to end) in the IRQ stack: 177 * pointer (index -1 to end) in the IRQ stack:
177 */ 178 */
178 stack = (unsigned long *) (irqstack_end[-1]); 179 stack = (unsigned long *) (irq_stack_end[-1]);
179 irqstack_end = NULL; 180 irq_stack_end = NULL;
180 ops->stack(data, "EOI"); 181 ops->stack(data, "EOI");
181 continue; 182 continue;
182 } 183 }
@@ -199,10 +200,10 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
199 unsigned long *stack; 200 unsigned long *stack;
200 int i; 201 int i;
201 const int cpu = smp_processor_id(); 202 const int cpu = smp_processor_id();
202 unsigned long *irqstack_end = 203 unsigned long *irq_stack_end =
203 (unsigned long *) (cpu_pda(cpu)->irqstackptr); 204 (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
204 unsigned long *irqstack = 205 unsigned long *irq_stack =
205 (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); 206 (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
206 207
207 /* 208 /*
208 * debugging aid: "show_stack(NULL, NULL);" prints the 209 * debugging aid: "show_stack(NULL, NULL);" prints the
@@ -218,9 +219,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
218 219
219 stack = sp; 220 stack = sp;
220 for (i = 0; i < kstack_depth_to_print; i++) { 221 for (i = 0; i < kstack_depth_to_print; i++) {
221 if (stack >= irqstack && stack <= irqstack_end) { 222 if (stack >= irq_stack && stack <= irq_stack_end) {
222 if (stack == irqstack_end) { 223 if (stack == irq_stack_end) {
223 stack = (unsigned long *) (irqstack_end[-1]); 224 stack = (unsigned long *) (irq_stack_end[-1]);
224 printk(" <EOI> "); 225 printk(" <EOI> ");
225 } 226 }
226 } else { 227 } else {
@@ -241,7 +242,7 @@ void show_registers(struct pt_regs *regs)
241 int i; 242 int i;
242 unsigned long sp; 243 unsigned long sp;
243 const int cpu = smp_processor_id(); 244 const int cpu = smp_processor_id();
244 struct task_struct *cur = cpu_pda(cpu)->pcurrent; 245 struct task_struct *cur = current;
245 246
246 sp = regs->sp; 247 sp = regs->sp;
247 printk("CPU %d ", cpu); 248 printk("CPU %d ", cpu);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 46469029e9d3..a0b91aac72a1 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -672,7 +672,7 @@ common_interrupt:
672ENDPROC(common_interrupt) 672ENDPROC(common_interrupt)
673 CFI_ENDPROC 673 CFI_ENDPROC
674 674
675#define BUILD_INTERRUPT(name, nr) \ 675#define BUILD_INTERRUPT3(name, nr, fn) \
676ENTRY(name) \ 676ENTRY(name) \
677 RING0_INT_FRAME; \ 677 RING0_INT_FRAME; \
678 pushl $~(nr); \ 678 pushl $~(nr); \
@@ -680,11 +680,13 @@ ENTRY(name) \
680 SAVE_ALL; \ 680 SAVE_ALL; \
681 TRACE_IRQS_OFF \ 681 TRACE_IRQS_OFF \
682 movl %esp,%eax; \ 682 movl %esp,%eax; \
683 call smp_##name; \ 683 call fn; \
684 jmp ret_from_intr; \ 684 jmp ret_from_intr; \
685 CFI_ENDPROC; \ 685 CFI_ENDPROC; \
686ENDPROC(name) 686ENDPROC(name)
687 687
688#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name)
689
688/* The include is where all of the SMP etc. interrupts come from */ 690/* The include is where all of the SMP etc. interrupts come from */
689#include "entry_arch.h" 691#include "entry_arch.h"
690 692
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e28c7a987793..c52b60919163 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -52,6 +52,7 @@
52#include <asm/irqflags.h> 52#include <asm/irqflags.h>
53#include <asm/paravirt.h> 53#include <asm/paravirt.h>
54#include <asm/ftrace.h> 54#include <asm/ftrace.h>
55#include <asm/percpu.h>
55 56
56/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 57/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
57#include <linux/elf-em.h> 58#include <linux/elf-em.h>
@@ -209,7 +210,7 @@ ENTRY(native_usergs_sysret64)
209 210
210 /* %rsp:at FRAMEEND */ 211 /* %rsp:at FRAMEEND */
211 .macro FIXUP_TOP_OF_STACK tmp offset=0 212 .macro FIXUP_TOP_OF_STACK tmp offset=0
212 movq %gs:pda_oldrsp,\tmp 213 movq PER_CPU_VAR(old_rsp),\tmp
213 movq \tmp,RSP+\offset(%rsp) 214 movq \tmp,RSP+\offset(%rsp)
214 movq $__USER_DS,SS+\offset(%rsp) 215 movq $__USER_DS,SS+\offset(%rsp)
215 movq $__USER_CS,CS+\offset(%rsp) 216 movq $__USER_CS,CS+\offset(%rsp)
@@ -220,7 +221,7 @@ ENTRY(native_usergs_sysret64)
220 221
221 .macro RESTORE_TOP_OF_STACK tmp offset=0 222 .macro RESTORE_TOP_OF_STACK tmp offset=0
222 movq RSP+\offset(%rsp),\tmp 223 movq RSP+\offset(%rsp),\tmp
223 movq \tmp,%gs:pda_oldrsp 224 movq \tmp,PER_CPU_VAR(old_rsp)
224 movq EFLAGS+\offset(%rsp),\tmp 225 movq EFLAGS+\offset(%rsp),\tmp
225 movq \tmp,R11+\offset(%rsp) 226 movq \tmp,R11+\offset(%rsp)
226 .endm 227 .endm
@@ -336,15 +337,15 @@ ENTRY(save_args)
336 je 1f 337 je 1f
337 SWAPGS 338 SWAPGS
338 /* 339 /*
339 * irqcount is used to check if a CPU is already on an interrupt stack 340 * irq_count is used to check if a CPU is already on an interrupt stack
340 * or not. While this is essentially redundant with preempt_count it is 341 * or not. While this is essentially redundant with preempt_count it is
341 * a little cheaper to use a separate counter in the PDA (short of 342 * a little cheaper to use a separate counter in the PDA (short of
342 * moving irq_enter into assembly, which would be too much work) 343 * moving irq_enter into assembly, which would be too much work)
343 */ 344 */
3441: incl %gs:pda_irqcount 3451: incl PER_CPU_VAR(irq_count)
345 jne 2f 346 jne 2f
346 popq_cfi %rax /* move return address... */ 347 popq_cfi %rax /* move return address... */
347 mov %gs:pda_irqstackptr,%rsp 348 mov PER_CPU_VAR(irq_stack_ptr),%rsp
348 EMPTY_FRAME 0 349 EMPTY_FRAME 0
349 pushq_cfi %rax /* ... to the new stack */ 350 pushq_cfi %rax /* ... to the new stack */
350 /* 351 /*
@@ -467,7 +468,7 @@ END(ret_from_fork)
467ENTRY(system_call) 468ENTRY(system_call)
468 CFI_STARTPROC simple 469 CFI_STARTPROC simple
469 CFI_SIGNAL_FRAME 470 CFI_SIGNAL_FRAME
470 CFI_DEF_CFA rsp,PDA_STACKOFFSET 471 CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
471 CFI_REGISTER rip,rcx 472 CFI_REGISTER rip,rcx
472 /*CFI_REGISTER rflags,r11*/ 473 /*CFI_REGISTER rflags,r11*/
473 SWAPGS_UNSAFE_STACK 474 SWAPGS_UNSAFE_STACK
@@ -478,8 +479,8 @@ ENTRY(system_call)
478 */ 479 */
479ENTRY(system_call_after_swapgs) 480ENTRY(system_call_after_swapgs)
480 481
481 movq %rsp,%gs:pda_oldrsp 482 movq %rsp,PER_CPU_VAR(old_rsp)
482 movq %gs:pda_kernelstack,%rsp 483 movq PER_CPU_VAR(kernel_stack),%rsp
483 /* 484 /*
484 * No need to follow this irqs off/on section - it's straight 485 * No need to follow this irqs off/on section - it's straight
485 * and short: 486 * and short:
@@ -522,7 +523,7 @@ sysret_check:
522 CFI_REGISTER rip,rcx 523 CFI_REGISTER rip,rcx
523 RESTORE_ARGS 0,-ARG_SKIP,1 524 RESTORE_ARGS 0,-ARG_SKIP,1
524 /*CFI_REGISTER rflags,r11*/ 525 /*CFI_REGISTER rflags,r11*/
525 movq %gs:pda_oldrsp, %rsp 526 movq PER_CPU_VAR(old_rsp), %rsp
526 USERGS_SYSRET64 527 USERGS_SYSRET64
527 528
528 CFI_RESTORE_STATE 529 CFI_RESTORE_STATE
@@ -832,11 +833,11 @@ common_interrupt:
832 XCPT_FRAME 833 XCPT_FRAME
833 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ 834 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
834 interrupt do_IRQ 835 interrupt do_IRQ
835 /* 0(%rsp): oldrsp-ARGOFFSET */ 836 /* 0(%rsp): old_rsp-ARGOFFSET */
836ret_from_intr: 837ret_from_intr:
837 DISABLE_INTERRUPTS(CLBR_NONE) 838 DISABLE_INTERRUPTS(CLBR_NONE)
838 TRACE_IRQS_OFF 839 TRACE_IRQS_OFF
839 decl %gs:pda_irqcount 840 decl PER_CPU_VAR(irq_count)
840 leaveq 841 leaveq
841 CFI_DEF_CFA_REGISTER rsp 842 CFI_DEF_CFA_REGISTER rsp
842 CFI_ADJUST_CFA_OFFSET -8 843 CFI_ADJUST_CFA_OFFSET -8
@@ -1072,10 +1073,10 @@ ENTRY(\sym)
1072 TRACE_IRQS_OFF 1073 TRACE_IRQS_OFF
1073 movq %rsp,%rdi /* pt_regs pointer */ 1074 movq %rsp,%rdi /* pt_regs pointer */
1074 xorl %esi,%esi /* no error code */ 1075 xorl %esi,%esi /* no error code */
1075 movq %gs:pda_data_offset, %rbp 1076 PER_CPU(init_tss, %rbp)
1076 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) 1077 subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
1077 call \do_sym 1078 call \do_sym
1078 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) 1079 addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
1079 jmp paranoid_exit /* %ebx: no swapgs flag */ 1080 jmp paranoid_exit /* %ebx: no swapgs flag */
1080 CFI_ENDPROC 1081 CFI_ENDPROC
1081END(\sym) 1082END(\sym)
@@ -1259,14 +1260,14 @@ ENTRY(call_softirq)
1259 CFI_REL_OFFSET rbp,0 1260 CFI_REL_OFFSET rbp,0
1260 mov %rsp,%rbp 1261 mov %rsp,%rbp
1261 CFI_DEF_CFA_REGISTER rbp 1262 CFI_DEF_CFA_REGISTER rbp
1262 incl %gs:pda_irqcount 1263 incl PER_CPU_VAR(irq_count)
1263 cmove %gs:pda_irqstackptr,%rsp 1264 cmove PER_CPU_VAR(irq_stack_ptr),%rsp
1264 push %rbp # backlink for old unwinder 1265 push %rbp # backlink for old unwinder
1265 call __do_softirq 1266 call __do_softirq
1266 leaveq 1267 leaveq
1267 CFI_DEF_CFA_REGISTER rsp 1268 CFI_DEF_CFA_REGISTER rsp
1268 CFI_ADJUST_CFA_OFFSET -8 1269 CFI_ADJUST_CFA_OFFSET -8
1269 decl %gs:pda_irqcount 1270 decl PER_CPU_VAR(irq_count)
1270 ret 1271 ret
1271 CFI_ENDPROC 1272 CFI_ENDPROC
1272END(call_softirq) 1273END(call_softirq)
@@ -1296,15 +1297,15 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1296 movq %rdi, %rsp # we don't return, adjust the stack frame 1297 movq %rdi, %rsp # we don't return, adjust the stack frame
1297 CFI_ENDPROC 1298 CFI_ENDPROC
1298 DEFAULT_FRAME 1299 DEFAULT_FRAME
129911: incl %gs:pda_irqcount 130011: incl PER_CPU_VAR(irq_count)
1300 movq %rsp,%rbp 1301 movq %rsp,%rbp
1301 CFI_DEF_CFA_REGISTER rbp 1302 CFI_DEF_CFA_REGISTER rbp
1302 cmovzq %gs:pda_irqstackptr,%rsp 1303 cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
1303 pushq %rbp # backlink for old unwinder 1304 pushq %rbp # backlink for old unwinder
1304 call xen_evtchn_do_upcall 1305 call xen_evtchn_do_upcall
1305 popq %rsp 1306 popq %rsp
1306 CFI_DEF_CFA_REGISTER rsp 1307 CFI_DEF_CFA_REGISTER rsp
1307 decl %gs:pda_irqcount 1308 decl PER_CPU_VAR(irq_count)
1308 jmp error_exit 1309 jmp error_exit
1309 CFI_ENDPROC 1310 CFI_ENDPROC
1310END(do_hypervisor_callback) 1311END(do_hypervisor_callback)
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index b193e082f6ce..bfe36249145c 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -25,6 +25,7 @@
25#include <asm/ipi.h> 25#include <asm/ipi.h>
26#include <asm/genapic.h> 26#include <asm/genapic.h>
27#include <asm/pgtable.h> 27#include <asm/pgtable.h>
28#include <asm/uv/uv.h>
28#include <asm/uv/uv_mmrs.h> 29#include <asm/uv/uv_mmrs.h>
29#include <asm/uv/uv_hub.h> 30#include <asm/uv/uv_hub.h>
30#include <asm/uv/bios.h> 31#include <asm/uv/bios.h>
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index b9a4d8c4b935..f5b272247690 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -26,27 +26,6 @@
26#include <asm/bios_ebda.h> 26#include <asm/bios_ebda.h>
27#include <asm/trampoline.h> 27#include <asm/trampoline.h>
28 28
29/* boot cpu pda */
30static struct x8664_pda _boot_cpu_pda;
31
32#ifdef CONFIG_SMP
33/*
34 * We install an empty cpu_pda pointer table to indicate to early users
35 * (numa_set_node) that the cpu_pda pointer table for cpus other than
36 * the boot cpu is not yet setup.
37 */
38static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata;
39#else
40static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
41#endif
42
43void __init x86_64_init_pda(void)
44{
45 _cpu_pda = __cpu_pda;
46 cpu_pda(0) = &_boot_cpu_pda;
47 pda_init(0);
48}
49
50static void __init zap_identity_mappings(void) 29static void __init zap_identity_mappings(void)
51{ 30{
52 pgd_t *pgd = pgd_offset_k(0UL); 31 pgd_t *pgd = pgd_offset_k(0UL);
@@ -112,8 +91,6 @@ void __init x86_64_start_kernel(char * real_mode_data)
112 if (console_loglevel == 10) 91 if (console_loglevel == 10)
113 early_printk("Kernel alive\n"); 92 early_printk("Kernel alive\n");
114 93
115 x86_64_init_pda();
116
117 x86_64_start_reservations(real_mode_data); 94 x86_64_start_reservations(real_mode_data);
118} 95}
119 96
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index e835b4eea70b..24c0e5cd71e3 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -429,12 +429,14 @@ is386: movl $2,%ecx # set MP
429 ljmp $(__KERNEL_CS),$1f 429 ljmp $(__KERNEL_CS),$1f
4301: movl $(__KERNEL_DS),%eax # reload all the segment registers 4301: movl $(__KERNEL_DS),%eax # reload all the segment registers
431 movl %eax,%ss # after changing gdt. 431 movl %eax,%ss # after changing gdt.
432 movl %eax,%fs # gets reset once there's real percpu
433 432
434 movl $(__USER_DS),%eax # DS/ES contains default USER segment 433 movl $(__USER_DS),%eax # DS/ES contains default USER segment
435 movl %eax,%ds 434 movl %eax,%ds
436 movl %eax,%es 435 movl %eax,%es
437 436
437 movl $(__KERNEL_PERCPU), %eax
438 movl %eax,%fs # set this cpu's percpu
439
438 xorl %eax,%eax # Clear GS and LDT 440 xorl %eax,%eax # Clear GS and LDT
439 movl %eax,%gs 441 movl %eax,%gs
440 lldt %ax 442 lldt %ax
@@ -446,8 +448,6 @@ is386: movl $2,%ecx # set MP
446 movb $1, ready 448 movb $1, ready
447 cmpb $0,%cl # the first CPU calls start_kernel 449 cmpb $0,%cl # the first CPU calls start_kernel
448 je 1f 450 je 1f
449 movl $(__KERNEL_PERCPU), %eax
450 movl %eax,%fs # set this cpu's percpu
451 movl (stack_start), %esp 451 movl (stack_start), %esp
4521: 4521:
453#endif /* CONFIG_SMP */ 453#endif /* CONFIG_SMP */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 0e275d495563..a0a2b5ca9b7d 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -19,6 +19,7 @@
19#include <asm/msr.h> 19#include <asm/msr.h>
20#include <asm/cache.h> 20#include <asm/cache.h>
21#include <asm/processor-flags.h> 21#include <asm/processor-flags.h>
22#include <asm/percpu.h>
22 23
23#ifdef CONFIG_PARAVIRT 24#ifdef CONFIG_PARAVIRT
24#include <asm/asm-offsets.h> 25#include <asm/asm-offsets.h>
@@ -204,6 +205,19 @@ ENTRY(secondary_startup_64)
204 pushq $0 205 pushq $0
205 popfq 206 popfq
206 207
208#ifdef CONFIG_SMP
209 /*
210 * Fix up static pointers that need __per_cpu_load added. The assembler
211 * is unable to do this directly. This is only needed for the boot cpu.
212 * These values are set up with the correct base addresses by C code for
213 * secondary cpus.
214 */
215 movq initial_gs(%rip), %rax
216 cmpl $0, per_cpu__cpu_number(%rax)
217 jne 1f
218 addq %rax, early_gdt_descr_base(%rip)
2191:
220#endif
207 /* 221 /*
208 * We must switch to a new descriptor in kernel space for the GDT 222 * We must switch to a new descriptor in kernel space for the GDT
209 * because soon the kernel won't have access anymore to the userspace 223 * because soon the kernel won't have access anymore to the userspace
@@ -226,12 +240,15 @@ ENTRY(secondary_startup_64)
226 movl %eax,%fs 240 movl %eax,%fs
227 movl %eax,%gs 241 movl %eax,%gs
228 242
229 /* 243 /* Set up %gs.
230 * Setup up a dummy PDA. this is just for some early bootup code 244 *
231 * that does in_interrupt() 245 * The base of %gs always points to the bottom of the irqstack
232 */ 246 * union. If the stack protector canary is enabled, it is
247 * located at %gs:40. Note that, on SMP, the boot cpu uses
248 * init data section till per cpu areas are set up.
249 */
233 movl $MSR_GS_BASE,%ecx 250 movl $MSR_GS_BASE,%ecx
234 movq $empty_zero_page,%rax 251 movq initial_gs(%rip),%rax
235 movq %rax,%rdx 252 movq %rax,%rdx
236 shrq $32,%rdx 253 shrq $32,%rdx
237 wrmsr 254 wrmsr
@@ -257,6 +274,12 @@ ENTRY(secondary_startup_64)
257 .align 8 274 .align 8
258 ENTRY(initial_code) 275 ENTRY(initial_code)
259 .quad x86_64_start_kernel 276 .quad x86_64_start_kernel
277 ENTRY(initial_gs)
278#ifdef CONFIG_SMP
279 .quad __per_cpu_load
280#else
281 .quad PER_CPU_VAR(irq_stack_union)
282#endif
260 __FINITDATA 283 __FINITDATA
261 284
262 ENTRY(stack_start) 285 ENTRY(stack_start)
@@ -401,7 +424,8 @@ NEXT_PAGE(level2_spare_pgt)
401 .globl early_gdt_descr 424 .globl early_gdt_descr
402early_gdt_descr: 425early_gdt_descr:
403 .word GDT_ENTRIES*8-1 426 .word GDT_ENTRIES*8-1
404 .quad per_cpu__gdt_page 427early_gdt_descr_base:
428 .quad per_cpu__gdt_page
405 429
406ENTRY(phys_base) 430ENTRY(phys_base)
407 /* This must match the first entry in level2_kernel_pgt */ 431 /* This must match the first entry in level2_kernel_pgt */
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 157986916cd1..f79660390724 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -46,6 +46,7 @@
46#include <asm/idle.h> 46#include <asm/idle.h>
47#include <asm/io.h> 47#include <asm/io.h>
48#include <asm/smp.h> 48#include <asm/smp.h>
49#include <asm/cpu.h>
49#include <asm/desc.h> 50#include <asm/desc.h>
50#include <asm/proto.h> 51#include <asm/proto.h>
51#include <asm/acpi.h> 52#include <asm/acpi.h>
@@ -82,11 +83,11 @@ static DEFINE_SPINLOCK(vector_lock);
82int nr_ioapic_registers[MAX_IO_APICS]; 83int nr_ioapic_registers[MAX_IO_APICS];
83 84
84/* I/O APIC entries */ 85/* I/O APIC entries */
85struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; 86struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
86int nr_ioapics; 87int nr_ioapics;
87 88
88/* MP IRQ source entries */ 89/* MP IRQ source entries */
89struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; 90struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
90 91
91/* # of MP IRQ source entries */ 92/* # of MP IRQ source entries */
92int mp_irq_entries; 93int mp_irq_entries;
@@ -386,7 +387,7 @@ struct io_apic {
386static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) 387static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
387{ 388{
388 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) 389 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
389 + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); 390 + (mp_ioapics[idx].apicaddr & ~PAGE_MASK);
390} 391}
391 392
392static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) 393static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
@@ -944,10 +945,10 @@ static int find_irq_entry(int apic, int pin, int type)
944 int i; 945 int i;
945 946
946 for (i = 0; i < mp_irq_entries; i++) 947 for (i = 0; i < mp_irq_entries; i++)
947 if (mp_irqs[i].mp_irqtype == type && 948 if (mp_irqs[i].irqtype == type &&
948 (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || 949 (mp_irqs[i].dstapic == mp_ioapics[apic].apicid ||
949 mp_irqs[i].mp_dstapic == MP_APIC_ALL) && 950 mp_irqs[i].dstapic == MP_APIC_ALL) &&
950 mp_irqs[i].mp_dstirq == pin) 951 mp_irqs[i].dstirq == pin)
951 return i; 952 return i;
952 953
953 return -1; 954 return -1;
@@ -961,13 +962,13 @@ static int __init find_isa_irq_pin(int irq, int type)
961 int i; 962 int i;
962 963
963 for (i = 0; i < mp_irq_entries; i++) { 964 for (i = 0; i < mp_irq_entries; i++) {
964 int lbus = mp_irqs[i].mp_srcbus; 965 int lbus = mp_irqs[i].srcbus;
965 966
966 if (test_bit(lbus, mp_bus_not_pci) && 967 if (test_bit(lbus, mp_bus_not_pci) &&
967 (mp_irqs[i].mp_irqtype == type) && 968 (mp_irqs[i].irqtype == type) &&
968 (mp_irqs[i].mp_srcbusirq == irq)) 969 (mp_irqs[i].srcbusirq == irq))
969 970
970 return mp_irqs[i].mp_dstirq; 971 return mp_irqs[i].dstirq;
971 } 972 }
972 return -1; 973 return -1;
973} 974}
@@ -977,17 +978,17 @@ static int __init find_isa_irq_apic(int irq, int type)
977 int i; 978 int i;
978 979
979 for (i = 0; i < mp_irq_entries; i++) { 980 for (i = 0; i < mp_irq_entries; i++) {
980 int lbus = mp_irqs[i].mp_srcbus; 981 int lbus = mp_irqs[i].srcbus;
981 982
982 if (test_bit(lbus, mp_bus_not_pci) && 983 if (test_bit(lbus, mp_bus_not_pci) &&
983 (mp_irqs[i].mp_irqtype == type) && 984 (mp_irqs[i].irqtype == type) &&
984 (mp_irqs[i].mp_srcbusirq == irq)) 985 (mp_irqs[i].srcbusirq == irq))
985 break; 986 break;
986 } 987 }
987 if (i < mp_irq_entries) { 988 if (i < mp_irq_entries) {
988 int apic; 989 int apic;
989 for(apic = 0; apic < nr_ioapics; apic++) { 990 for(apic = 0; apic < nr_ioapics; apic++) {
990 if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) 991 if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic)
991 return apic; 992 return apic;
992 } 993 }
993 } 994 }
@@ -1012,23 +1013,23 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
1012 return -1; 1013 return -1;
1013 } 1014 }
1014 for (i = 0; i < mp_irq_entries; i++) { 1015 for (i = 0; i < mp_irq_entries; i++) {
1015 int lbus = mp_irqs[i].mp_srcbus; 1016 int lbus = mp_irqs[i].srcbus;
1016 1017
1017 for (apic = 0; apic < nr_ioapics; apic++) 1018 for (apic = 0; apic < nr_ioapics; apic++)
1018 if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || 1019 if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
1019 mp_irqs[i].mp_dstapic == MP_APIC_ALL) 1020 mp_irqs[i].dstapic == MP_APIC_ALL)
1020 break; 1021 break;
1021 1022
1022 if (!test_bit(lbus, mp_bus_not_pci) && 1023 if (!test_bit(lbus, mp_bus_not_pci) &&
1023 !mp_irqs[i].mp_irqtype && 1024 !mp_irqs[i].irqtype &&
1024 (bus == lbus) && 1025 (bus == lbus) &&
1025 (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { 1026 (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
1026 int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); 1027 int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
1027 1028
1028 if (!(apic || IO_APIC_IRQ(irq))) 1029 if (!(apic || IO_APIC_IRQ(irq)))
1029 continue; 1030 continue;
1030 1031
1031 if (pin == (mp_irqs[i].mp_srcbusirq & 3)) 1032 if (pin == (mp_irqs[i].srcbusirq & 3))
1032 return irq; 1033 return irq;
1033 /* 1034 /*
1034 * Use the first all-but-pin matching entry as a 1035 * Use the first all-but-pin matching entry as a
@@ -1071,7 +1072,7 @@ static int EISA_ELCR(unsigned int irq)
1071 * EISA conforming in the MP table, that means its trigger type must 1072 * EISA conforming in the MP table, that means its trigger type must
1072 * be read in from the ELCR */ 1073 * be read in from the ELCR */
1073 1074
1074#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) 1075#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq))
1075#define default_EISA_polarity(idx) default_ISA_polarity(idx) 1076#define default_EISA_polarity(idx) default_ISA_polarity(idx)
1076 1077
1077/* PCI interrupts are always polarity one level triggered, 1078/* PCI interrupts are always polarity one level triggered,
@@ -1088,13 +1089,13 @@ static int EISA_ELCR(unsigned int irq)
1088 1089
1089static int MPBIOS_polarity(int idx) 1090static int MPBIOS_polarity(int idx)
1090{ 1091{
1091 int bus = mp_irqs[idx].mp_srcbus; 1092 int bus = mp_irqs[idx].srcbus;
1092 int polarity; 1093 int polarity;
1093 1094
1094 /* 1095 /*
1095 * Determine IRQ line polarity (high active or low active): 1096 * Determine IRQ line polarity (high active or low active):
1096 */ 1097 */
1097 switch (mp_irqs[idx].mp_irqflag & 3) 1098 switch (mp_irqs[idx].irqflag & 3)
1098 { 1099 {
1099 case 0: /* conforms, ie. bus-type dependent polarity */ 1100 case 0: /* conforms, ie. bus-type dependent polarity */
1100 if (test_bit(bus, mp_bus_not_pci)) 1101 if (test_bit(bus, mp_bus_not_pci))
@@ -1130,13 +1131,13 @@ static int MPBIOS_polarity(int idx)
1130 1131
1131static int MPBIOS_trigger(int idx) 1132static int MPBIOS_trigger(int idx)
1132{ 1133{
1133 int bus = mp_irqs[idx].mp_srcbus; 1134 int bus = mp_irqs[idx].srcbus;
1134 int trigger; 1135 int trigger;
1135 1136
1136 /* 1137 /*
1137 * Determine IRQ trigger mode (edge or level sensitive): 1138 * Determine IRQ trigger mode (edge or level sensitive):
1138 */ 1139 */
1139 switch ((mp_irqs[idx].mp_irqflag>>2) & 3) 1140 switch ((mp_irqs[idx].irqflag>>2) & 3)
1140 { 1141 {
1141 case 0: /* conforms, ie. bus-type dependent */ 1142 case 0: /* conforms, ie. bus-type dependent */
1142 if (test_bit(bus, mp_bus_not_pci)) 1143 if (test_bit(bus, mp_bus_not_pci))
@@ -1214,16 +1215,16 @@ int (*ioapic_renumber_irq)(int ioapic, int irq);
1214static int pin_2_irq(int idx, int apic, int pin) 1215static int pin_2_irq(int idx, int apic, int pin)
1215{ 1216{
1216 int irq, i; 1217 int irq, i;
1217 int bus = mp_irqs[idx].mp_srcbus; 1218 int bus = mp_irqs[idx].srcbus;
1218 1219
1219 /* 1220 /*
1220 * Debugging check, we are in big trouble if this message pops up! 1221 * Debugging check, we are in big trouble if this message pops up!
1221 */ 1222 */
1222 if (mp_irqs[idx].mp_dstirq != pin) 1223 if (mp_irqs[idx].dstirq != pin)
1223 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); 1224 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
1224 1225
1225 if (test_bit(bus, mp_bus_not_pci)) { 1226 if (test_bit(bus, mp_bus_not_pci)) {
1226 irq = mp_irqs[idx].mp_srcbusirq; 1227 irq = mp_irqs[idx].srcbusirq;
1227 } else { 1228 } else {
1228 /* 1229 /*
1229 * PCI IRQs are mapped in order 1230 * PCI IRQs are mapped in order
@@ -1566,14 +1567,14 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
1566 apic_printk(APIC_VERBOSE,KERN_DEBUG 1567 apic_printk(APIC_VERBOSE,KERN_DEBUG
1567 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " 1568 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
1568 "IRQ %d Mode:%i Active:%i)\n", 1569 "IRQ %d Mode:%i Active:%i)\n",
1569 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, 1570 apic, mp_ioapics[apic].apicid, pin, cfg->vector,
1570 irq, trigger, polarity); 1571 irq, trigger, polarity);
1571 1572
1572 1573
1573 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, 1574 if (setup_ioapic_entry(mp_ioapics[apic].apicid, irq, &entry,
1574 dest, trigger, polarity, cfg->vector)) { 1575 dest, trigger, polarity, cfg->vector)) {
1575 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", 1576 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
1576 mp_ioapics[apic].mp_apicid, pin); 1577 mp_ioapics[apic].apicid, pin);
1577 __clear_irq_vector(irq, cfg); 1578 __clear_irq_vector(irq, cfg);
1578 return; 1579 return;
1579 } 1580 }
@@ -1604,12 +1605,10 @@ static void __init setup_IO_APIC_irqs(void)
1604 notcon = 1; 1605 notcon = 1;
1605 apic_printk(APIC_VERBOSE, 1606 apic_printk(APIC_VERBOSE,
1606 KERN_DEBUG " %d-%d", 1607 KERN_DEBUG " %d-%d",
1607 mp_ioapics[apic].mp_apicid, 1608 mp_ioapics[apic].apicid, pin);
1608 pin);
1609 } else 1609 } else
1610 apic_printk(APIC_VERBOSE, " %d-%d", 1610 apic_printk(APIC_VERBOSE, " %d-%d",
1611 mp_ioapics[apic].mp_apicid, 1611 mp_ioapics[apic].apicid, pin);
1612 pin);
1613 continue; 1612 continue;
1614 } 1613 }
1615 if (notcon) { 1614 if (notcon) {
@@ -1699,7 +1698,7 @@ __apicdebuginit(void) print_IO_APIC(void)
1699 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); 1698 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
1700 for (i = 0; i < nr_ioapics; i++) 1699 for (i = 0; i < nr_ioapics; i++)
1701 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", 1700 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
1702 mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); 1701 mp_ioapics[i].apicid, nr_ioapic_registers[i]);
1703 1702
1704 /* 1703 /*
1705 * We are a bit conservative about what we expect. We have to 1704 * We are a bit conservative about what we expect. We have to
@@ -1719,7 +1718,7 @@ __apicdebuginit(void) print_IO_APIC(void)
1719 spin_unlock_irqrestore(&ioapic_lock, flags); 1718 spin_unlock_irqrestore(&ioapic_lock, flags);
1720 1719
1721 printk("\n"); 1720 printk("\n");
1722 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); 1721 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid);
1723 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); 1722 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
1724 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); 1723 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
1725 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); 1724 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
@@ -2121,14 +2120,14 @@ static void __init setup_ioapic_ids_from_mpc(void)
2121 reg_00.raw = io_apic_read(apic, 0); 2120 reg_00.raw = io_apic_read(apic, 0);
2122 spin_unlock_irqrestore(&ioapic_lock, flags); 2121 spin_unlock_irqrestore(&ioapic_lock, flags);
2123 2122
2124 old_id = mp_ioapics[apic].mp_apicid; 2123 old_id = mp_ioapics[apic].apicid;
2125 2124
2126 if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { 2125 if (mp_ioapics[apic].apicid >= get_physical_broadcast()) {
2127 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", 2126 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
2128 apic, mp_ioapics[apic].mp_apicid); 2127 apic, mp_ioapics[apic].apicid);
2129 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", 2128 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
2130 reg_00.bits.ID); 2129 reg_00.bits.ID);
2131 mp_ioapics[apic].mp_apicid = reg_00.bits.ID; 2130 mp_ioapics[apic].apicid = reg_00.bits.ID;
2132 } 2131 }
2133 2132
2134 /* 2133 /*
@@ -2137,9 +2136,9 @@ static void __init setup_ioapic_ids_from_mpc(void)
2137 * 'stuck on smp_invalidate_needed IPI wait' messages. 2136 * 'stuck on smp_invalidate_needed IPI wait' messages.
2138 */ 2137 */
2139 if (check_apicid_used(phys_id_present_map, 2138 if (check_apicid_used(phys_id_present_map,
2140 mp_ioapics[apic].mp_apicid)) { 2139 mp_ioapics[apic].apicid)) {
2141 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", 2140 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
2142 apic, mp_ioapics[apic].mp_apicid); 2141 apic, mp_ioapics[apic].apicid);
2143 for (i = 0; i < get_physical_broadcast(); i++) 2142 for (i = 0; i < get_physical_broadcast(); i++)
2144 if (!physid_isset(i, phys_id_present_map)) 2143 if (!physid_isset(i, phys_id_present_map))
2145 break; 2144 break;
@@ -2148,13 +2147,13 @@ static void __init setup_ioapic_ids_from_mpc(void)
2148 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", 2147 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
2149 i); 2148 i);
2150 physid_set(i, phys_id_present_map); 2149 physid_set(i, phys_id_present_map);
2151 mp_ioapics[apic].mp_apicid = i; 2150 mp_ioapics[apic].apicid = i;
2152 } else { 2151 } else {
2153 physid_mask_t tmp; 2152 physid_mask_t tmp;
2154 tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); 2153 tmp = apicid_to_cpu_present(mp_ioapics[apic].apicid);
2155 apic_printk(APIC_VERBOSE, "Setting %d in the " 2154 apic_printk(APIC_VERBOSE, "Setting %d in the "
2156 "phys_id_present_map\n", 2155 "phys_id_present_map\n",
2157 mp_ioapics[apic].mp_apicid); 2156 mp_ioapics[apic].apicid);
2158 physids_or(phys_id_present_map, phys_id_present_map, tmp); 2157 physids_or(phys_id_present_map, phys_id_present_map, tmp);
2159 } 2158 }
2160 2159
@@ -2163,11 +2162,11 @@ static void __init setup_ioapic_ids_from_mpc(void)
2163 * We need to adjust the IRQ routing table 2162 * We need to adjust the IRQ routing table
2164 * if the ID changed. 2163 * if the ID changed.
2165 */ 2164 */
2166 if (old_id != mp_ioapics[apic].mp_apicid) 2165 if (old_id != mp_ioapics[apic].apicid)
2167 for (i = 0; i < mp_irq_entries; i++) 2166 for (i = 0; i < mp_irq_entries; i++)
2168 if (mp_irqs[i].mp_dstapic == old_id) 2167 if (mp_irqs[i].dstapic == old_id)
2169 mp_irqs[i].mp_dstapic 2168 mp_irqs[i].dstapic
2170 = mp_ioapics[apic].mp_apicid; 2169 = mp_ioapics[apic].apicid;
2171 2170
2172 /* 2171 /*
2173 * Read the right value from the MPC table and 2172 * Read the right value from the MPC table and
@@ -2175,9 +2174,9 @@ static void __init setup_ioapic_ids_from_mpc(void)
2175 */ 2174 */
2176 apic_printk(APIC_VERBOSE, KERN_INFO 2175 apic_printk(APIC_VERBOSE, KERN_INFO
2177 "...changing IO-APIC physical APIC ID to %d ...", 2176 "...changing IO-APIC physical APIC ID to %d ...",
2178 mp_ioapics[apic].mp_apicid); 2177 mp_ioapics[apic].apicid);
2179 2178
2180 reg_00.bits.ID = mp_ioapics[apic].mp_apicid; 2179 reg_00.bits.ID = mp_ioapics[apic].apicid;
2181 spin_lock_irqsave(&ioapic_lock, flags); 2180 spin_lock_irqsave(&ioapic_lock, flags);
2182 io_apic_write(apic, 0, reg_00.raw); 2181 io_apic_write(apic, 0, reg_00.raw);
2183 spin_unlock_irqrestore(&ioapic_lock, flags); 2182 spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -2188,7 +2187,7 @@ static void __init setup_ioapic_ids_from_mpc(void)
2188 spin_lock_irqsave(&ioapic_lock, flags); 2187 spin_lock_irqsave(&ioapic_lock, flags);
2189 reg_00.raw = io_apic_read(apic, 0); 2188 reg_00.raw = io_apic_read(apic, 0);
2190 spin_unlock_irqrestore(&ioapic_lock, flags); 2189 spin_unlock_irqrestore(&ioapic_lock, flags);
2191 if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) 2190 if (reg_00.bits.ID != mp_ioapics[apic].apicid)
2192 printk("could not set ID!\n"); 2191 printk("could not set ID!\n");
2193 else 2192 else
2194 apic_printk(APIC_VERBOSE, " ok.\n"); 2193 apic_printk(APIC_VERBOSE, " ok.\n");
@@ -3117,8 +3116,8 @@ static int ioapic_resume(struct sys_device *dev)
3117 3116
3118 spin_lock_irqsave(&ioapic_lock, flags); 3117 spin_lock_irqsave(&ioapic_lock, flags);
3119 reg_00.raw = io_apic_read(dev->id, 0); 3118 reg_00.raw = io_apic_read(dev->id, 0);
3120 if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { 3119 if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) {
3121 reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; 3120 reg_00.bits.ID = mp_ioapics[dev->id].apicid;
3122 io_apic_write(dev->id, 0, reg_00.raw); 3121 io_apic_write(dev->id, 0, reg_00.raw);
3123 } 3122 }
3124 spin_unlock_irqrestore(&ioapic_lock, flags); 3123 spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -3258,6 +3257,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3258 int err; 3257 int err;
3259 unsigned dest; 3258 unsigned dest;
3260 3259
3260 if (disable_apic)
3261 return -ENXIO;
3262
3261 cfg = irq_cfg(irq); 3263 cfg = irq_cfg(irq);
3262 err = assign_irq_vector(irq, cfg, TARGET_CPUS); 3264 err = assign_irq_vector(irq, cfg, TARGET_CPUS);
3263 if (err) 3265 if (err)
@@ -3726,6 +3728,9 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3726 struct irq_cfg *cfg; 3728 struct irq_cfg *cfg;
3727 int err; 3729 int err;
3728 3730
3731 if (disable_apic)
3732 return -ENXIO;
3733
3729 cfg = irq_cfg(irq); 3734 cfg = irq_cfg(irq);
3730 err = assign_irq_vector(irq, cfg, TARGET_CPUS); 3735 err = assign_irq_vector(irq, cfg, TARGET_CPUS);
3731 if (!err) { 3736 if (!err) {
@@ -4000,8 +4005,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
4000 return -1; 4005 return -1;
4001 4006
4002 for (i = 0; i < mp_irq_entries; i++) 4007 for (i = 0; i < mp_irq_entries; i++)
4003 if (mp_irqs[i].mp_irqtype == mp_INT && 4008 if (mp_irqs[i].irqtype == mp_INT &&
4004 mp_irqs[i].mp_srcbusirq == bus_irq) 4009 mp_irqs[i].srcbusirq == bus_irq)
4005 break; 4010 break;
4006 if (i >= mp_irq_entries) 4011 if (i >= mp_irq_entries)
4007 return -1; 4012 return -1;
@@ -4116,7 +4121,7 @@ void __init ioapic_init_mappings(void)
4116 ioapic_res = ioapic_setup_resources(); 4121 ioapic_res = ioapic_setup_resources();
4117 for (i = 0; i < nr_ioapics; i++) { 4122 for (i = 0; i < nr_ioapics; i++) {
4118 if (smp_found_config) { 4123 if (smp_found_config) {
4119 ioapic_phys = mp_ioapics[i].mp_apicaddr; 4124 ioapic_phys = mp_ioapics[i].apicaddr;
4120#ifdef CONFIG_X86_32 4125#ifdef CONFIG_X86_32
4121 if (!ioapic_phys) { 4126 if (!ioapic_phys) {
4122 printk(KERN_ERR 4127 printk(KERN_ERR
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 3973e2df7f87..8b30d0c2512c 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -36,11 +36,7 @@ void ack_bad_irq(unsigned int irq)
36#endif 36#endif
37} 37}
38 38
39#ifdef CONFIG_X86_32 39#define irq_stats(x) (&per_cpu(irq_stat, x))
40# define irq_stats(x) (&per_cpu(irq_stat, x))
41#else
42# define irq_stats(x) cpu_pda(x)
43#endif
44/* 40/*
45 * /proc/interrupts printing: 41 * /proc/interrupts printing:
46 */ 42 */
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 0b21cb1ea11f..0b254de84083 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -19,6 +19,12 @@
19#include <asm/io_apic.h> 19#include <asm/io_apic.h>
20#include <asm/idle.h> 20#include <asm/idle.h>
21 21
22DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
23EXPORT_PER_CPU_SYMBOL(irq_stat);
24
25DEFINE_PER_CPU(struct pt_regs *, irq_regs);
26EXPORT_PER_CPU_SYMBOL(irq_regs);
27
22/* 28/*
23 * Probabilistic stack overflow check: 29 * Probabilistic stack overflow check:
24 * 30 *
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 1507ad4e674d..bf629cadec1a 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -149,8 +149,15 @@ void __init native_init_IRQ(void)
149 */ 149 */
150 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); 150 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
151 151
152 /* IPI for invalidation */ 152 /* IPIs for invalidation */
153 alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); 153 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
154 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
155 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
156 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
157 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
158 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
159 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
160 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
154 161
155 /* IPI for generic function call */ 162 /* IPI for generic function call */
156 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 163 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index b7f4c929e615..5e9f4fc51385 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -87,9 +87,9 @@
87#include <linux/cpu.h> 87#include <linux/cpu.h>
88#include <linux/firmware.h> 88#include <linux/firmware.h>
89#include <linux/platform_device.h> 89#include <linux/platform_device.h>
90#include <linux/uaccess.h>
90 91
91#include <asm/msr.h> 92#include <asm/msr.h>
92#include <asm/uaccess.h>
93#include <asm/processor.h> 93#include <asm/processor.h>
94#include <asm/microcode.h> 94#include <asm/microcode.h>
95 95
@@ -196,7 +196,7 @@ static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf)
196 return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1; 196 return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1;
197} 197}
198 198
199static inline int 199static inline int
200update_match_revision(struct microcode_header_intel *mc_header, int rev) 200update_match_revision(struct microcode_header_intel *mc_header, int rev)
201{ 201{
202 return (mc_header->rev <= rev) ? 0 : 1; 202 return (mc_header->rev <= rev) ? 0 : 1;
@@ -442,8 +442,8 @@ static int request_microcode_fw(int cpu, struct device *device)
442 return ret; 442 return ret;
443 } 443 }
444 444
445 ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size, 445 ret = generic_load_microcode(cpu, (void *)firmware->data,
446 &get_ucode_fw); 446 firmware->size, &get_ucode_fw);
447 447
448 release_firmware(firmware); 448 release_firmware(firmware);
449 449
@@ -460,7 +460,7 @@ static int request_microcode_user(int cpu, const void __user *buf, size_t size)
460 /* We should bind the task to the CPU */ 460 /* We should bind the task to the CPU */
461 BUG_ON(cpu != raw_smp_processor_id()); 461 BUG_ON(cpu != raw_smp_processor_id());
462 462
463 return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user); 463 return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user);
464} 464}
465 465
466static void microcode_fini_cpu(int cpu) 466static void microcode_fini_cpu(int cpu)
diff --git a/arch/x86/kernel/module_32.c b/arch/x86/kernel/module_32.c
index 3db0a5442eb1..0edd819050e7 100644
--- a/arch/x86/kernel/module_32.c
+++ b/arch/x86/kernel/module_32.c
@@ -42,7 +42,7 @@ void module_free(struct module *mod, void *module_region)
42{ 42{
43 vfree(module_region); 43 vfree(module_region);
44 /* FIXME: If module_region == mod->init_region, trim exception 44 /* FIXME: If module_region == mod->init_region, trim exception
45 table entries. */ 45 table entries. */
46} 46}
47 47
48/* We don't need anything special. */ 48/* We don't need anything special. */
@@ -113,13 +113,13 @@ int module_finalize(const Elf_Ehdr *hdr,
113 *para = NULL; 113 *para = NULL;
114 char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; 114 char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
115 115
116 for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { 116 for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
117 if (!strcmp(".text", secstrings + s->sh_name)) 117 if (!strcmp(".text", secstrings + s->sh_name))
118 text = s; 118 text = s;
119 if (!strcmp(".altinstructions", secstrings + s->sh_name)) 119 if (!strcmp(".altinstructions", secstrings + s->sh_name))
120 alt = s; 120 alt = s;
121 if (!strcmp(".smp_locks", secstrings + s->sh_name)) 121 if (!strcmp(".smp_locks", secstrings + s->sh_name))
122 locks= s; 122 locks = s;
123 if (!strcmp(".parainstructions", secstrings + s->sh_name)) 123 if (!strcmp(".parainstructions", secstrings + s->sh_name))
124 para = s; 124 para = s;
125 } 125 }
diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c
index 6ba87830d4b1..c23880b90b5c 100644
--- a/arch/x86/kernel/module_64.c
+++ b/arch/x86/kernel/module_64.c
@@ -30,14 +30,14 @@
30#include <asm/page.h> 30#include <asm/page.h>
31#include <asm/pgtable.h> 31#include <asm/pgtable.h>
32 32
33#define DEBUGP(fmt...) 33#define DEBUGP(fmt...)
34 34
35#ifndef CONFIG_UML 35#ifndef CONFIG_UML
36void module_free(struct module *mod, void *module_region) 36void module_free(struct module *mod, void *module_region)
37{ 37{
38 vfree(module_region); 38 vfree(module_region);
39 /* FIXME: If module_region == mod->init_region, trim exception 39 /* FIXME: If module_region == mod->init_region, trim exception
40 table entries. */ 40 table entries. */
41} 41}
42 42
43void *module_alloc(unsigned long size) 43void *module_alloc(unsigned long size)
@@ -77,7 +77,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
77 Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr; 77 Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr;
78 Elf64_Sym *sym; 78 Elf64_Sym *sym;
79 void *loc; 79 void *loc;
80 u64 val; 80 u64 val;
81 81
82 DEBUGP("Applying relocate section %u to %u\n", relsec, 82 DEBUGP("Applying relocate section %u to %u\n", relsec,
83 sechdrs[relsec].sh_info); 83 sechdrs[relsec].sh_info);
@@ -91,11 +91,11 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
91 sym = (Elf64_Sym *)sechdrs[symindex].sh_addr 91 sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
92 + ELF64_R_SYM(rel[i].r_info); 92 + ELF64_R_SYM(rel[i].r_info);
93 93
94 DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", 94 DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n",
95 (int)ELF64_R_TYPE(rel[i].r_info), 95 (int)ELF64_R_TYPE(rel[i].r_info),
96 sym->st_value, rel[i].r_addend, (u64)loc); 96 sym->st_value, rel[i].r_addend, (u64)loc);
97 97
98 val = sym->st_value + rel[i].r_addend; 98 val = sym->st_value + rel[i].r_addend;
99 99
100 switch (ELF64_R_TYPE(rel[i].r_info)) { 100 switch (ELF64_R_TYPE(rel[i].r_info)) {
101 case R_X86_64_NONE: 101 case R_X86_64_NONE:
@@ -113,16 +113,16 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
113 if ((s64)val != *(s32 *)loc) 113 if ((s64)val != *(s32 *)loc)
114 goto overflow; 114 goto overflow;
115 break; 115 break;
116 case R_X86_64_PC32: 116 case R_X86_64_PC32:
117 val -= (u64)loc; 117 val -= (u64)loc;
118 *(u32 *)loc = val; 118 *(u32 *)loc = val;
119#if 0 119#if 0
120 if ((s64)val != *(s32 *)loc) 120 if ((s64)val != *(s32 *)loc)
121 goto overflow; 121 goto overflow;
122#endif 122#endif
123 break; 123 break;
124 default: 124 default:
125 printk(KERN_ERR "module %s: Unknown rela relocation: %Lu\n", 125 printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n",
126 me->name, ELF64_R_TYPE(rel[i].r_info)); 126 me->name, ELF64_R_TYPE(rel[i].r_info));
127 return -ENOEXEC; 127 return -ENOEXEC;
128 } 128 }
@@ -130,7 +130,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
130 return 0; 130 return 0;
131 131
132overflow: 132overflow:
133 printk(KERN_ERR "overflow in relocation type %d val %Lx\n", 133 printk(KERN_ERR "overflow in relocation type %d val %Lx\n",
134 (int)ELF64_R_TYPE(rel[i].r_info), val); 134 (int)ELF64_R_TYPE(rel[i].r_info), val);
135 printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n", 135 printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n",
136 me->name); 136 me->name);
@@ -143,13 +143,13 @@ int apply_relocate(Elf_Shdr *sechdrs,
143 unsigned int relsec, 143 unsigned int relsec,
144 struct module *me) 144 struct module *me)
145{ 145{
146 printk("non add relocation not supported\n"); 146 printk(KERN_ERR "non add relocation not supported\n");
147 return -ENOSYS; 147 return -ENOSYS;
148} 148}
149 149
150int module_finalize(const Elf_Ehdr *hdr, 150int module_finalize(const Elf_Ehdr *hdr,
151 const Elf_Shdr *sechdrs, 151 const Elf_Shdr *sechdrs,
152 struct module *me) 152 struct module *me)
153{ 153{
154 const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, 154 const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
155 *para = NULL; 155 *para = NULL;
@@ -161,7 +161,7 @@ int module_finalize(const Elf_Ehdr *hdr,
161 if (!strcmp(".altinstructions", secstrings + s->sh_name)) 161 if (!strcmp(".altinstructions", secstrings + s->sh_name))
162 alt = s; 162 alt = s;
163 if (!strcmp(".smp_locks", secstrings + s->sh_name)) 163 if (!strcmp(".smp_locks", secstrings + s->sh_name))
164 locks= s; 164 locks = s;
165 if (!strcmp(".parainstructions", secstrings + s->sh_name)) 165 if (!strcmp(".parainstructions", secstrings + s->sh_name))
166 para = s; 166 para = s;
167 } 167 }
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index a649a4ccad43..fa6bb263892e 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -144,11 +144,11 @@ static void __init MP_ioapic_info(struct mpc_ioapic *m)
144 if (bad_ioapic(m->apicaddr)) 144 if (bad_ioapic(m->apicaddr))
145 return; 145 return;
146 146
147 mp_ioapics[nr_ioapics].mp_apicaddr = m->apicaddr; 147 mp_ioapics[nr_ioapics].apicaddr = m->apicaddr;
148 mp_ioapics[nr_ioapics].mp_apicid = m->apicid; 148 mp_ioapics[nr_ioapics].apicid = m->apicid;
149 mp_ioapics[nr_ioapics].mp_type = m->type; 149 mp_ioapics[nr_ioapics].type = m->type;
150 mp_ioapics[nr_ioapics].mp_apicver = m->apicver; 150 mp_ioapics[nr_ioapics].apicver = m->apicver;
151 mp_ioapics[nr_ioapics].mp_flags = m->flags; 151 mp_ioapics[nr_ioapics].flags = m->flags;
152 nr_ioapics++; 152 nr_ioapics++;
153} 153}
154 154
@@ -160,55 +160,55 @@ static void print_MP_intsrc_info(struct mpc_intsrc *m)
160 m->srcbusirq, m->dstapic, m->dstirq); 160 m->srcbusirq, m->dstapic, m->dstirq);
161} 161}
162 162
163static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) 163static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq)
164{ 164{
165 apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," 165 apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
166 " IRQ %02x, APIC ID %x, APIC INT %02x\n", 166 " IRQ %02x, APIC ID %x, APIC INT %02x\n",
167 mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, 167 mp_irq->irqtype, mp_irq->irqflag & 3,
168 (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, 168 (mp_irq->irqflag >> 2) & 3, mp_irq->srcbus,
169 mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq); 169 mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq);
170} 170}
171 171
172static void __init assign_to_mp_irq(struct mpc_intsrc *m, 172static void __init assign_to_mp_irq(struct mpc_intsrc *m,
173 struct mp_config_intsrc *mp_irq) 173 struct mpc_intsrc *mp_irq)
174{ 174{
175 mp_irq->mp_dstapic = m->dstapic; 175 mp_irq->dstapic = m->dstapic;
176 mp_irq->mp_type = m->type; 176 mp_irq->type = m->type;
177 mp_irq->mp_irqtype = m->irqtype; 177 mp_irq->irqtype = m->irqtype;
178 mp_irq->mp_irqflag = m->irqflag; 178 mp_irq->irqflag = m->irqflag;
179 mp_irq->mp_srcbus = m->srcbus; 179 mp_irq->srcbus = m->srcbus;
180 mp_irq->mp_srcbusirq = m->srcbusirq; 180 mp_irq->srcbusirq = m->srcbusirq;
181 mp_irq->mp_dstirq = m->dstirq; 181 mp_irq->dstirq = m->dstirq;
182} 182}
183 183
184static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq, 184static void __init assign_to_mpc_intsrc(struct mpc_intsrc *mp_irq,
185 struct mpc_intsrc *m) 185 struct mpc_intsrc *m)
186{ 186{
187 m->dstapic = mp_irq->mp_dstapic; 187 m->dstapic = mp_irq->dstapic;
188 m->type = mp_irq->mp_type; 188 m->type = mp_irq->type;
189 m->irqtype = mp_irq->mp_irqtype; 189 m->irqtype = mp_irq->irqtype;
190 m->irqflag = mp_irq->mp_irqflag; 190 m->irqflag = mp_irq->irqflag;
191 m->srcbus = mp_irq->mp_srcbus; 191 m->srcbus = mp_irq->srcbus;
192 m->srcbusirq = mp_irq->mp_srcbusirq; 192 m->srcbusirq = mp_irq->srcbusirq;
193 m->dstirq = mp_irq->mp_dstirq; 193 m->dstirq = mp_irq->dstirq;
194} 194}
195 195
196static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq, 196static int __init mp_irq_mpc_intsrc_cmp(struct mpc_intsrc *mp_irq,
197 struct mpc_intsrc *m) 197 struct mpc_intsrc *m)
198{ 198{
199 if (mp_irq->mp_dstapic != m->dstapic) 199 if (mp_irq->dstapic != m->dstapic)
200 return 1; 200 return 1;
201 if (mp_irq->mp_type != m->type) 201 if (mp_irq->type != m->type)
202 return 2; 202 return 2;
203 if (mp_irq->mp_irqtype != m->irqtype) 203 if (mp_irq->irqtype != m->irqtype)
204 return 3; 204 return 3;
205 if (mp_irq->mp_irqflag != m->irqflag) 205 if (mp_irq->irqflag != m->irqflag)
206 return 4; 206 return 4;
207 if (mp_irq->mp_srcbus != m->srcbus) 207 if (mp_irq->srcbus != m->srcbus)
208 return 5; 208 return 5;
209 if (mp_irq->mp_srcbusirq != m->srcbusirq) 209 if (mp_irq->srcbusirq != m->srcbusirq)
210 return 6; 210 return 6;
211 if (mp_irq->mp_dstirq != m->dstirq) 211 if (mp_irq->dstirq != m->dstirq)
212 return 7; 212 return 7;
213 213
214 return 0; 214 return 0;
@@ -417,7 +417,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
417 intsrc.type = MP_INTSRC; 417 intsrc.type = MP_INTSRC;
418 intsrc.irqflag = 0; /* conforming */ 418 intsrc.irqflag = 0; /* conforming */
419 intsrc.srcbus = 0; 419 intsrc.srcbus = 0;
420 intsrc.dstapic = mp_ioapics[0].mp_apicid; 420 intsrc.dstapic = mp_ioapics[0].apicid;
421 421
422 intsrc.irqtype = mp_INT; 422 intsrc.irqtype = mp_INT;
423 423
@@ -570,14 +570,14 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
570 } 570 }
571} 571}
572 572
573static struct intel_mp_floating *mpf_found; 573static struct mpf_intel *mpf_found;
574 574
575/* 575/*
576 * Scan the memory blocks for an SMP configuration block. 576 * Scan the memory blocks for an SMP configuration block.
577 */ 577 */
578static void __init __get_smp_config(unsigned int early) 578static void __init __get_smp_config(unsigned int early)
579{ 579{
580 struct intel_mp_floating *mpf = mpf_found; 580 struct mpf_intel *mpf = mpf_found;
581 581
582 if (!mpf) 582 if (!mpf)
583 return; 583 return;
@@ -598,9 +598,9 @@ static void __init __get_smp_config(unsigned int early)
598 } 598 }
599 599
600 printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", 600 printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
601 mpf->mpf_specification); 601 mpf->specification);
602#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) 602#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
603 if (mpf->mpf_feature2 & (1 << 7)) { 603 if (mpf->feature2 & (1 << 7)) {
604 printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); 604 printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
605 pic_mode = 1; 605 pic_mode = 1;
606 } else { 606 } else {
@@ -611,7 +611,7 @@ static void __init __get_smp_config(unsigned int early)
611 /* 611 /*
612 * Now see if we need to read further. 612 * Now see if we need to read further.
613 */ 613 */
614 if (mpf->mpf_feature1 != 0) { 614 if (mpf->feature1 != 0) {
615 if (early) { 615 if (early) {
616 /* 616 /*
617 * local APIC has default address 617 * local APIC has default address
@@ -621,16 +621,16 @@ static void __init __get_smp_config(unsigned int early)
621 } 621 }
622 622
623 printk(KERN_INFO "Default MP configuration #%d\n", 623 printk(KERN_INFO "Default MP configuration #%d\n",
624 mpf->mpf_feature1); 624 mpf->feature1);
625 construct_default_ISA_mptable(mpf->mpf_feature1); 625 construct_default_ISA_mptable(mpf->feature1);
626 626
627 } else if (mpf->mpf_physptr) { 627 } else if (mpf->physptr) {
628 628
629 /* 629 /*
630 * Read the physical hardware table. Anything here will 630 * Read the physical hardware table. Anything here will
631 * override the defaults. 631 * override the defaults.
632 */ 632 */
633 if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) { 633 if (!smp_read_mpc(phys_to_virt(mpf->physptr), early)) {
634#ifdef CONFIG_X86_LOCAL_APIC 634#ifdef CONFIG_X86_LOCAL_APIC
635 smp_found_config = 0; 635 smp_found_config = 0;
636#endif 636#endif
@@ -688,19 +688,19 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
688 unsigned reserve) 688 unsigned reserve)
689{ 689{
690 unsigned int *bp = phys_to_virt(base); 690 unsigned int *bp = phys_to_virt(base);
691 struct intel_mp_floating *mpf; 691 struct mpf_intel *mpf;
692 692
693 apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", 693 apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
694 bp, length); 694 bp, length);
695 BUILD_BUG_ON(sizeof(*mpf) != 16); 695 BUILD_BUG_ON(sizeof(*mpf) != 16);
696 696
697 while (length > 0) { 697 while (length > 0) {
698 mpf = (struct intel_mp_floating *)bp; 698 mpf = (struct mpf_intel *)bp;
699 if ((*bp == SMP_MAGIC_IDENT) && 699 if ((*bp == SMP_MAGIC_IDENT) &&
700 (mpf->mpf_length == 1) && 700 (mpf->length == 1) &&
701 !mpf_checksum((unsigned char *)bp, 16) && 701 !mpf_checksum((unsigned char *)bp, 16) &&
702 ((mpf->mpf_specification == 1) 702 ((mpf->specification == 1)
703 || (mpf->mpf_specification == 4))) { 703 || (mpf->specification == 4))) {
704#ifdef CONFIG_X86_LOCAL_APIC 704#ifdef CONFIG_X86_LOCAL_APIC
705 smp_found_config = 1; 705 smp_found_config = 1;
706#endif 706#endif
@@ -713,7 +713,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
713 return 1; 713 return 1;
714 reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE, 714 reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE,
715 BOOTMEM_DEFAULT); 715 BOOTMEM_DEFAULT);
716 if (mpf->mpf_physptr) { 716 if (mpf->physptr) {
717 unsigned long size = PAGE_SIZE; 717 unsigned long size = PAGE_SIZE;
718#ifdef CONFIG_X86_32 718#ifdef CONFIG_X86_32
719 /* 719 /*
@@ -722,14 +722,14 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
722 * the bottom is mapped now. 722 * the bottom is mapped now.
723 * PC-9800's MPC table places on the very last 723 * PC-9800's MPC table places on the very last
724 * of physical memory; so that simply reserving 724 * of physical memory; so that simply reserving
725 * PAGE_SIZE from mpg->mpf_physptr yields BUG() 725 * PAGE_SIZE from mpf->physptr yields BUG()
726 * in reserve_bootmem. 726 * in reserve_bootmem.
727 */ 727 */
728 unsigned long end = max_low_pfn * PAGE_SIZE; 728 unsigned long end = max_low_pfn * PAGE_SIZE;
729 if (mpf->mpf_physptr + size > end) 729 if (mpf->physptr + size > end)
730 size = end - mpf->mpf_physptr; 730 size = end - mpf->physptr;
731#endif 731#endif
732 reserve_bootmem_generic(mpf->mpf_physptr, size, 732 reserve_bootmem_generic(mpf->physptr, size,
733 BOOTMEM_DEFAULT); 733 BOOTMEM_DEFAULT);
734 } 734 }
735 735
@@ -809,15 +809,15 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m)
809 /* not legacy */ 809 /* not legacy */
810 810
811 for (i = 0; i < mp_irq_entries; i++) { 811 for (i = 0; i < mp_irq_entries; i++) {
812 if (mp_irqs[i].mp_irqtype != mp_INT) 812 if (mp_irqs[i].irqtype != mp_INT)
813 continue; 813 continue;
814 814
815 if (mp_irqs[i].mp_irqflag != 0x0f) 815 if (mp_irqs[i].irqflag != 0x0f)
816 continue; 816 continue;
817 817
818 if (mp_irqs[i].mp_srcbus != m->srcbus) 818 if (mp_irqs[i].srcbus != m->srcbus)
819 continue; 819 continue;
820 if (mp_irqs[i].mp_srcbusirq != m->srcbusirq) 820 if (mp_irqs[i].srcbusirq != m->srcbusirq)
821 continue; 821 continue;
822 if (irq_used[i]) { 822 if (irq_used[i]) {
823 /* already claimed */ 823 /* already claimed */
@@ -922,10 +922,10 @@ static int __init replace_intsrc_all(struct mpc_table *mpc,
922 if (irq_used[i]) 922 if (irq_used[i])
923 continue; 923 continue;
924 924
925 if (mp_irqs[i].mp_irqtype != mp_INT) 925 if (mp_irqs[i].irqtype != mp_INT)
926 continue; 926 continue;
927 927
928 if (mp_irqs[i].mp_irqflag != 0x0f) 928 if (mp_irqs[i].irqflag != 0x0f)
929 continue; 929 continue;
930 930
931 if (nr_m_spare > 0) { 931 if (nr_m_spare > 0) {
@@ -1001,7 +1001,7 @@ static int __init update_mp_table(void)
1001{ 1001{
1002 char str[16]; 1002 char str[16];
1003 char oem[10]; 1003 char oem[10];
1004 struct intel_mp_floating *mpf; 1004 struct mpf_intel *mpf;
1005 struct mpc_table *mpc, *mpc_new; 1005 struct mpc_table *mpc, *mpc_new;
1006 1006
1007 if (!enable_update_mptable) 1007 if (!enable_update_mptable)
@@ -1014,19 +1014,19 @@ static int __init update_mp_table(void)
1014 /* 1014 /*
1015 * Now see if we need to go further. 1015 * Now see if we need to go further.
1016 */ 1016 */
1017 if (mpf->mpf_feature1 != 0) 1017 if (mpf->feature1 != 0)
1018 return 0; 1018 return 0;
1019 1019
1020 if (!mpf->mpf_physptr) 1020 if (!mpf->physptr)
1021 return 0; 1021 return 0;
1022 1022
1023 mpc = phys_to_virt(mpf->mpf_physptr); 1023 mpc = phys_to_virt(mpf->physptr);
1024 1024
1025 if (!smp_check_mpc(mpc, oem, str)) 1025 if (!smp_check_mpc(mpc, oem, str))
1026 return 0; 1026 return 0;
1027 1027
1028 printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf)); 1028 printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf));
1029 printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr); 1029 printk(KERN_INFO "physptr: %x\n", mpf->physptr);
1030 1030
1031 if (mpc_new_phys && mpc->length > mpc_new_length) { 1031 if (mpc_new_phys && mpc->length > mpc_new_length) {
1032 mpc_new_phys = 0; 1032 mpc_new_phys = 0;
@@ -1047,23 +1047,23 @@ static int __init update_mp_table(void)
1047 } 1047 }
1048 printk(KERN_INFO "use in-positon replacing\n"); 1048 printk(KERN_INFO "use in-positon replacing\n");
1049 } else { 1049 } else {
1050 mpf->mpf_physptr = mpc_new_phys; 1050 mpf->physptr = mpc_new_phys;
1051 mpc_new = phys_to_virt(mpc_new_phys); 1051 mpc_new = phys_to_virt(mpc_new_phys);
1052 memcpy(mpc_new, mpc, mpc->length); 1052 memcpy(mpc_new, mpc, mpc->length);
1053 mpc = mpc_new; 1053 mpc = mpc_new;
1054 /* check if we can modify that */ 1054 /* check if we can modify that */
1055 if (mpc_new_phys - mpf->mpf_physptr) { 1055 if (mpc_new_phys - mpf->physptr) {
1056 struct intel_mp_floating *mpf_new; 1056 struct mpf_intel *mpf_new;
1057 /* steal 16 bytes from [0, 1k) */ 1057 /* steal 16 bytes from [0, 1k) */
1058 printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); 1058 printk(KERN_INFO "mpf new: %x\n", 0x400 - 16);
1059 mpf_new = phys_to_virt(0x400 - 16); 1059 mpf_new = phys_to_virt(0x400 - 16);
1060 memcpy(mpf_new, mpf, 16); 1060 memcpy(mpf_new, mpf, 16);
1061 mpf = mpf_new; 1061 mpf = mpf_new;
1062 mpf->mpf_physptr = mpc_new_phys; 1062 mpf->physptr = mpc_new_phys;
1063 } 1063 }
1064 mpf->mpf_checksum = 0; 1064 mpf->checksum = 0;
1065 mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16); 1065 mpf->checksum -= mpf_checksum((unsigned char *)mpf, 16);
1066 printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr); 1066 printk(KERN_INFO "physptr new: %x\n", mpf->physptr);
1067 } 1067 }
1068 1068
1069 /* 1069 /*
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 726266695b2c..3cf3413ec626 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -35,10 +35,10 @@
35#include <linux/device.h> 35#include <linux/device.h>
36#include <linux/cpu.h> 36#include <linux/cpu.h>
37#include <linux/notifier.h> 37#include <linux/notifier.h>
38#include <linux/uaccess.h>
38 39
39#include <asm/processor.h> 40#include <asm/processor.h>
40#include <asm/msr.h> 41#include <asm/msr.h>
41#include <asm/uaccess.h>
42#include <asm/system.h> 42#include <asm/system.h>
43 43
44static struct class *msr_class; 44static struct class *msr_class;
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 7228979f1e7f..23b6d9e6e4f5 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -61,11 +61,7 @@ static int endflag __initdata;
61 61
62static inline unsigned int get_nmi_count(int cpu) 62static inline unsigned int get_nmi_count(int cpu)
63{ 63{
64#ifdef CONFIG_X86_64 64 return per_cpu(irq_stat, cpu).__nmi_count;
65 return cpu_pda(cpu)->__nmi_count;
66#else
67 return nmi_count(cpu);
68#endif
69} 65}
70 66
71static inline int mce_in_progress(void) 67static inline int mce_in_progress(void)
@@ -82,12 +78,8 @@ static inline int mce_in_progress(void)
82 */ 78 */
83static inline unsigned int get_timer_irqs(int cpu) 79static inline unsigned int get_timer_irqs(int cpu)
84{ 80{
85#ifdef CONFIG_X86_64
86 return read_pda(apic_timer_irqs) + read_pda(irq0_irqs);
87#else
88 return per_cpu(irq_stat, cpu).apic_timer_irqs + 81 return per_cpu(irq_stat, cpu).apic_timer_irqs +
89 per_cpu(irq_stat, cpu).irq0_irqs; 82 per_cpu(irq_stat, cpu).irq0_irqs;
90#endif
91} 83}
92 84
93#ifdef CONFIG_SMP 85#ifdef CONFIG_SMP
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a546f55c77b4..2c00a57ccb90 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -66,9 +66,6 @@ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
66DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; 66DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
67EXPORT_PER_CPU_SYMBOL(current_task); 67EXPORT_PER_CPU_SYMBOL(current_task);
68 68
69DEFINE_PER_CPU(int, cpu_number);
70EXPORT_PER_CPU_SYMBOL(cpu_number);
71
72/* 69/*
73 * Return saved PC of a blocked thread. 70 * Return saved PC of a blocked thread.
74 */ 71 */
@@ -591,7 +588,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
591 if (prev->gs | next->gs) 588 if (prev->gs | next->gs)
592 loadsegment(gs, next->gs); 589 loadsegment(gs, next->gs);
593 590
594 x86_write_percpu(current_task, next_p); 591 percpu_write(current_task, next_p);
595 592
596 return prev_p; 593 return prev_p;
597} 594}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 416fb9282f4f..c422eebb0c58 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -16,6 +16,7 @@
16 16
17#include <stdarg.h> 17#include <stdarg.h>
18 18
19#include <linux/stackprotector.h>
19#include <linux/cpu.h> 20#include <linux/cpu.h>
20#include <linux/errno.h> 21#include <linux/errno.h>
21#include <linux/sched.h> 22#include <linux/sched.h>
@@ -46,7 +47,6 @@
46#include <asm/processor.h> 47#include <asm/processor.h>
47#include <asm/i387.h> 48#include <asm/i387.h>
48#include <asm/mmu_context.h> 49#include <asm/mmu_context.h>
49#include <asm/pda.h>
50#include <asm/prctl.h> 50#include <asm/prctl.h>
51#include <asm/desc.h> 51#include <asm/desc.h>
52#include <asm/proto.h> 52#include <asm/proto.h>
@@ -57,6 +57,12 @@
57 57
58asmlinkage extern void ret_from_fork(void); 58asmlinkage extern void ret_from_fork(void);
59 59
60DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
61EXPORT_PER_CPU_SYMBOL(current_task);
62
63DEFINE_PER_CPU(unsigned long, old_rsp);
64static DEFINE_PER_CPU(unsigned char, is_idle);
65
60unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; 66unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
61 67
62static ATOMIC_NOTIFIER_HEAD(idle_notifier); 68static ATOMIC_NOTIFIER_HEAD(idle_notifier);
@@ -75,13 +81,13 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister);
75 81
76void enter_idle(void) 82void enter_idle(void)
77{ 83{
78 write_pda(isidle, 1); 84 percpu_write(is_idle, 1);
79 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); 85 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
80} 86}
81 87
82static void __exit_idle(void) 88static void __exit_idle(void)
83{ 89{
84 if (test_and_clear_bit_pda(0, isidle) == 0) 90 if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
85 return; 91 return;
86 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); 92 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
87} 93}
@@ -111,6 +117,17 @@ static inline void play_dead(void)
111void cpu_idle(void) 117void cpu_idle(void)
112{ 118{
113 current_thread_info()->status |= TS_POLLING; 119 current_thread_info()->status |= TS_POLLING;
120
121 /*
122 * If we're the non-boot CPU, nothing set the PDA stack
123 * canary up for us - and if we are the boot CPU we have
124 * a 0 stack canary. This is a good place for updating
125 * it, as we wont ever return from this function (so the
126 * invalid canaries already on the stack wont ever
127 * trigger):
128 */
129 boot_init_stack_canary();
130
114 /* endless idle loop with no priority at all */ 131 /* endless idle loop with no priority at all */
115 while (1) { 132 while (1) {
116 tick_nohz_stop_sched_tick(1); 133 tick_nohz_stop_sched_tick(1);
@@ -392,7 +409,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
392 load_gs_index(0); 409 load_gs_index(0);
393 regs->ip = new_ip; 410 regs->ip = new_ip;
394 regs->sp = new_sp; 411 regs->sp = new_sp;
395 write_pda(oldrsp, new_sp); 412 percpu_write(old_rsp, new_sp);
396 regs->cs = __USER_CS; 413 regs->cs = __USER_CS;
397 regs->ss = __USER_DS; 414 regs->ss = __USER_DS;
398 regs->flags = 0x200; 415 regs->flags = 0x200;
@@ -613,21 +630,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
613 /* 630 /*
614 * Switch the PDA and FPU contexts. 631 * Switch the PDA and FPU contexts.
615 */ 632 */
616 prev->usersp = read_pda(oldrsp); 633 prev->usersp = percpu_read(old_rsp);
617 write_pda(oldrsp, next->usersp); 634 percpu_write(old_rsp, next->usersp);
618 write_pda(pcurrent, next_p); 635 percpu_write(current_task, next_p);
619 636
620 write_pda(kernelstack, 637 percpu_write(kernel_stack,
621 (unsigned long)task_stack_page(next_p) + 638 (unsigned long)task_stack_page(next_p) +
622 THREAD_SIZE - PDA_STACKOFFSET); 639 THREAD_SIZE - KERNEL_STACK_OFFSET);
623#ifdef CONFIG_CC_STACKPROTECTOR
624 write_pda(stack_canary, next_p->stack_canary);
625 /*
626 * Build time only check to make sure the stack_canary is at
627 * offset 40 in the pda; this is a gcc ABI requirement
628 */
629 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
630#endif
631 640
632 /* 641 /*
633 * Now maybe reload the debug registers and handle I/O bitmaps 642 * Now maybe reload the debug registers and handle I/O bitmaps
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 2b46eb41643b..f8536fee5c12 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -14,6 +14,7 @@
14#include <asm/reboot.h> 14#include <asm/reboot.h>
15#include <asm/pci_x86.h> 15#include <asm/pci_x86.h>
16#include <asm/virtext.h> 16#include <asm/virtext.h>
17#include <asm/cpu.h>
17 18
18#ifdef CONFIG_X86_32 19#ifdef CONFIG_X86_32
19# include <linux/dmi.h> 20# include <linux/dmi.h>
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ae0d8042cf69..f41c4486c270 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -89,7 +89,7 @@
89 89
90#include <asm/system.h> 90#include <asm/system.h>
91#include <asm/vsyscall.h> 91#include <asm/vsyscall.h>
92#include <asm/smp.h> 92#include <asm/cpu.h>
93#include <asm/desc.h> 93#include <asm/desc.h>
94#include <asm/dma.h> 94#include <asm/dma.h>
95#include <asm/iommu.h> 95#include <asm/iommu.h>
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 55c46074eba0..90b8e154bb53 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -13,6 +13,23 @@
13#include <asm/mpspec.h> 13#include <asm/mpspec.h>
14#include <asm/apicdef.h> 14#include <asm/apicdef.h>
15#include <asm/highmem.h> 15#include <asm/highmem.h>
16#include <asm/proto.h>
17#include <asm/cpumask.h>
18
19#ifdef CONFIG_DEBUG_PER_CPU_MAPS
20# define DBG(x...) printk(KERN_DEBUG x)
21#else
22# define DBG(x...)
23#endif
24
25/*
26 * Could be inside CONFIG_HAVE_SETUP_PER_CPU_AREA with other stuff but
27 * voyager wants cpu_number too.
28 */
29#ifdef CONFIG_SMP
30DEFINE_PER_CPU(int, cpu_number);
31EXPORT_PER_CPU_SYMBOL(cpu_number);
32#endif
16 33
17#ifdef CONFIG_X86_LOCAL_APIC 34#ifdef CONFIG_X86_LOCAL_APIC
18unsigned int num_processors; 35unsigned int num_processors;
@@ -26,31 +43,60 @@ unsigned int max_physical_apicid;
26physid_mask_t phys_cpu_present_map; 43physid_mask_t phys_cpu_present_map;
27#endif 44#endif
28 45
29/* map cpu index to physical APIC ID */ 46/*
47 * Map cpu index to physical APIC ID
48 */
30DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); 49DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
31DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); 50DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
32EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); 51EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
33EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); 52EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
34 53
35#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 54#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
36#define X86_64_NUMA 1 55#define X86_64_NUMA 1 /* (used later) */
56DEFINE_PER_CPU(int, node_number) = 0;
57EXPORT_PER_CPU_SYMBOL(node_number);
37 58
38/* map cpu index to node index */ 59/*
60 * Map cpu index to node index
61 */
39DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); 62DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
40EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); 63EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
41 64
42/* which logical CPUs are on which nodes */ 65/*
66 * Which logical CPUs are on which nodes
67 */
43cpumask_t *node_to_cpumask_map; 68cpumask_t *node_to_cpumask_map;
44EXPORT_SYMBOL(node_to_cpumask_map); 69EXPORT_SYMBOL(node_to_cpumask_map);
45 70
46/* setup node_to_cpumask_map */ 71/*
72 * Setup node_to_cpumask_map
73 */
47static void __init setup_node_to_cpumask_map(void); 74static void __init setup_node_to_cpumask_map(void);
48 75
49#else 76#else
50static inline void setup_node_to_cpumask_map(void) { } 77static inline void setup_node_to_cpumask_map(void) { }
51#endif 78#endif
52 79
53#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) 80#ifdef CONFIG_X86_64
81
82/* correctly size the local cpu masks */
83static void setup_cpu_local_masks(void)
84{
85 alloc_bootmem_cpumask_var(&cpu_initialized_mask);
86 alloc_bootmem_cpumask_var(&cpu_callin_mask);
87 alloc_bootmem_cpumask_var(&cpu_callout_mask);
88 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
89}
90
91#else /* CONFIG_X86_32 */
92
93static inline void setup_cpu_local_masks(void)
94{
95}
96
97#endif /* CONFIG_X86_32 */
98
99#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
54/* 100/*
55 * Copy data used in early init routines from the initial arrays to the 101 * Copy data used in early init routines from the initial arrays to the
56 * per cpu data areas. These arrays then become expendable and the 102 * per cpu data areas. These arrays then become expendable and the
@@ -79,78 +125,14 @@ static void __init setup_per_cpu_maps(void)
79#endif 125#endif
80} 126}
81 127
82#ifdef CONFIG_X86_32 128#ifdef CONFIG_X86_64
83/* 129unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
84 * Great future not-so-futuristic plan: make i386 and x86_64 do it 130 [0] = (unsigned long)__per_cpu_load,
85 * the same way 131};
86 */ 132#else
87unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; 133unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
134#endif
88EXPORT_SYMBOL(__per_cpu_offset); 135EXPORT_SYMBOL(__per_cpu_offset);
89static inline void setup_cpu_pda_map(void) { }
90
91#elif !defined(CONFIG_SMP)
92static inline void setup_cpu_pda_map(void) { }
93
94#else /* CONFIG_SMP && CONFIG_X86_64 */
95
96/*
97 * Allocate cpu_pda pointer table and array via alloc_bootmem.
98 */
99static void __init setup_cpu_pda_map(void)
100{
101 char *pda;
102 struct x8664_pda **new_cpu_pda;
103 unsigned long size;
104 int cpu;
105
106 size = roundup(sizeof(struct x8664_pda), cache_line_size());
107
108 /* allocate cpu_pda array and pointer table */
109 {
110 unsigned long tsize = nr_cpu_ids * sizeof(void *);
111 unsigned long asize = size * (nr_cpu_ids - 1);
112
113 tsize = roundup(tsize, cache_line_size());
114 new_cpu_pda = alloc_bootmem(tsize + asize);
115 pda = (char *)new_cpu_pda + tsize;
116 }
117
118 /* initialize pointer table to static pda's */
119 for_each_possible_cpu(cpu) {
120 if (cpu == 0) {
121 /* leave boot cpu pda in place */
122 new_cpu_pda[0] = cpu_pda(0);
123 continue;
124 }
125 new_cpu_pda[cpu] = (struct x8664_pda *)pda;
126 new_cpu_pda[cpu]->in_bootmem = 1;
127 pda += size;
128 }
129
130 /* point to new pointer table */
131 _cpu_pda = new_cpu_pda;
132}
133
134#endif /* CONFIG_SMP && CONFIG_X86_64 */
135
136#ifdef CONFIG_X86_64
137
138/* correctly size the local cpu masks */
139static void setup_cpu_local_masks(void)
140{
141 alloc_bootmem_cpumask_var(&cpu_initialized_mask);
142 alloc_bootmem_cpumask_var(&cpu_callin_mask);
143 alloc_bootmem_cpumask_var(&cpu_callout_mask);
144 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
145}
146
147#else /* CONFIG_X86_32 */
148
149static inline void setup_cpu_local_masks(void)
150{
151}
152
153#endif /* CONFIG_X86_32 */
154 136
155/* 137/*
156 * Great future plan: 138 * Great future plan:
@@ -164,9 +146,6 @@ void __init setup_per_cpu_areas(void)
164 int cpu; 146 int cpu;
165 unsigned long align = 1; 147 unsigned long align = 1;
166 148
167 /* Setup cpu_pda map */
168 setup_cpu_pda_map();
169
170 /* Copy section for each CPU (we discard the original) */ 149 /* Copy section for each CPU (we discard the original) */
171 old_size = PERCPU_ENOUGH_ROOM; 150 old_size = PERCPU_ENOUGH_ROOM;
172 align = max_t(unsigned long, PAGE_SIZE, align); 151 align = max_t(unsigned long, PAGE_SIZE, align);
@@ -197,8 +176,23 @@ void __init setup_per_cpu_areas(void)
197 cpu, node, __pa(ptr)); 176 cpu, node, __pa(ptr));
198 } 177 }
199#endif 178#endif
179
180 memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);
200 per_cpu_offset(cpu) = ptr - __per_cpu_start; 181 per_cpu_offset(cpu) = ptr - __per_cpu_start;
201 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 182 per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
183 per_cpu(cpu_number, cpu) = cpu;
184#ifdef CONFIG_X86_64
185 per_cpu(irq_stack_ptr, cpu) =
186 per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64;
187 /*
188 * Up to this point, CPU0 has been using .data.init
189 * area. Reload %gs offset for CPU0.
190 */
191 if (cpu == 0)
192 load_gs_base(cpu);
193#endif
194
195 DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
202 } 196 }
203 197
204 /* Setup percpu data maps */ 198 /* Setup percpu data maps */
@@ -220,6 +214,7 @@ void __init setup_per_cpu_areas(void)
220 * Requires node_possible_map to be valid. 214 * Requires node_possible_map to be valid.
221 * 215 *
222 * Note: node_to_cpumask() is not valid until after this is done. 216 * Note: node_to_cpumask() is not valid until after this is done.
217 * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
223 */ 218 */
224static void __init setup_node_to_cpumask_map(void) 219static void __init setup_node_to_cpumask_map(void)
225{ 220{
@@ -235,6 +230,7 @@ static void __init setup_node_to_cpumask_map(void)
235 230
236 /* allocate the map */ 231 /* allocate the map */
237 map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); 232 map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
233 DBG("node_to_cpumask_map at %p for %d nodes\n", map, nr_node_ids);
238 234
239 pr_debug("Node to cpumask map at %p for %d nodes\n", 235 pr_debug("Node to cpumask map at %p for %d nodes\n",
240 map, nr_node_ids); 236 map, nr_node_ids);
@@ -247,17 +243,23 @@ void __cpuinit numa_set_node(int cpu, int node)
247{ 243{
248 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); 244 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
249 245
250 if (cpu_pda(cpu) && node != NUMA_NO_NODE) 246 /* early setting, no percpu area yet */
251 cpu_pda(cpu)->nodenumber = node; 247 if (cpu_to_node_map) {
252
253 if (cpu_to_node_map)
254 cpu_to_node_map[cpu] = node; 248 cpu_to_node_map[cpu] = node;
249 return;
250 }
255 251
256 else if (per_cpu_offset(cpu)) 252#ifdef CONFIG_DEBUG_PER_CPU_MAPS
257 per_cpu(x86_cpu_to_node_map, cpu) = node; 253 if (cpu >= nr_cpu_ids || !per_cpu_offset(cpu)) {
254 printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
255 dump_stack();
256 return;
257 }
258#endif
259 per_cpu(x86_cpu_to_node_map, cpu) = node;
258 260
259 else 261 if (node != NUMA_NO_NODE)
260 pr_debug("Setting node for non-present cpu %d\n", cpu); 262 per_cpu(node_number, cpu) = node;
261} 263}
262 264
263void __cpuinit numa_clear_node(int cpu) 265void __cpuinit numa_clear_node(int cpu)
@@ -274,7 +276,7 @@ void __cpuinit numa_add_cpu(int cpu)
274 276
275void __cpuinit numa_remove_cpu(int cpu) 277void __cpuinit numa_remove_cpu(int cpu)
276{ 278{
277 cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]); 279 cpu_clear(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
278} 280}
279 281
280#else /* CONFIG_DEBUG_PER_CPU_MAPS */ 282#else /* CONFIG_DEBUG_PER_CPU_MAPS */
@@ -284,7 +286,7 @@ void __cpuinit numa_remove_cpu(int cpu)
284 */ 286 */
285static void __cpuinit numa_set_cpumask(int cpu, int enable) 287static void __cpuinit numa_set_cpumask(int cpu, int enable)
286{ 288{
287 int node = cpu_to_node(cpu); 289 int node = early_cpu_to_node(cpu);
288 cpumask_t *mask; 290 cpumask_t *mask;
289 char buf[64]; 291 char buf[64];
290 292
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index bb1a3b1fc87f..def770b57b5a 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -53,7 +53,6 @@
53#include <asm/nmi.h> 53#include <asm/nmi.h>
54#include <asm/irq.h> 54#include <asm/irq.h>
55#include <asm/idle.h> 55#include <asm/idle.h>
56#include <asm/smp.h>
57#include <asm/trampoline.h> 56#include <asm/trampoline.h>
58#include <asm/cpu.h> 57#include <asm/cpu.h>
59#include <asm/numa.h> 58#include <asm/numa.h>
@@ -63,6 +62,7 @@
63#include <asm/vmi.h> 62#include <asm/vmi.h>
64#include <asm/genapic.h> 63#include <asm/genapic.h>
65#include <asm/setup.h> 64#include <asm/setup.h>
65#include <asm/uv/uv.h>
66#include <linux/mc146818rtc.h> 66#include <linux/mc146818rtc.h>
67 67
68#include <mach_apic.h> 68#include <mach_apic.h>
@@ -745,52 +745,6 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
745 complete(&c_idle->done); 745 complete(&c_idle->done);
746} 746}
747 747
748#ifdef CONFIG_X86_64
749
750/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */
751static void __ref free_bootmem_pda(struct x8664_pda *oldpda)
752{
753 if (!after_bootmem)
754 free_bootmem((unsigned long)oldpda, sizeof(*oldpda));
755}
756
757/*
758 * Allocate node local memory for the AP pda.
759 *
760 * Must be called after the _cpu_pda pointer table is initialized.
761 */
762int __cpuinit get_local_pda(int cpu)
763{
764 struct x8664_pda *oldpda, *newpda;
765 unsigned long size = sizeof(struct x8664_pda);
766 int node = cpu_to_node(cpu);
767
768 if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem)
769 return 0;
770
771 oldpda = cpu_pda(cpu);
772 newpda = kmalloc_node(size, GFP_ATOMIC, node);
773 if (!newpda) {
774 printk(KERN_ERR "Could not allocate node local PDA "
775 "for CPU %d on node %d\n", cpu, node);
776
777 if (oldpda)
778 return 0; /* have a usable pda */
779 else
780 return -1;
781 }
782
783 if (oldpda) {
784 memcpy(newpda, oldpda, size);
785 free_bootmem_pda(oldpda);
786 }
787
788 newpda->in_bootmem = 0;
789 cpu_pda(cpu) = newpda;
790 return 0;
791}
792#endif /* CONFIG_X86_64 */
793
794static int __cpuinit do_boot_cpu(int apicid, int cpu) 748static int __cpuinit do_boot_cpu(int apicid, int cpu)
795/* 749/*
796 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad 750 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
@@ -808,16 +762,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
808 }; 762 };
809 INIT_WORK(&c_idle.work, do_fork_idle); 763 INIT_WORK(&c_idle.work, do_fork_idle);
810 764
811#ifdef CONFIG_X86_64
812 /* Allocate node local memory for AP pdas */
813 if (cpu > 0) {
814 boot_error = get_local_pda(cpu);
815 if (boot_error)
816 goto restore_state;
817 /* if can't get pda memory, can't start cpu */
818 }
819#endif
820
821 alternatives_smp_switch(1); 765 alternatives_smp_switch(1);
822 766
823 c_idle.idle = get_idle_for_cpu(cpu); 767 c_idle.idle = get_idle_for_cpu(cpu);
@@ -847,14 +791,17 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
847 791
848 set_idle_for_cpu(cpu, c_idle.idle); 792 set_idle_for_cpu(cpu, c_idle.idle);
849do_rest: 793do_rest:
850#ifdef CONFIG_X86_32
851 per_cpu(current_task, cpu) = c_idle.idle; 794 per_cpu(current_task, cpu) = c_idle.idle;
795#ifdef CONFIG_X86_32
852 init_gdt(cpu); 796 init_gdt(cpu);
853 /* Stack for startup_32 can be just as for start_secondary onwards */ 797 /* Stack for startup_32 can be just as for start_secondary onwards */
854 irq_ctx_init(cpu); 798 irq_ctx_init(cpu);
855#else 799#else
856 cpu_pda(cpu)->pcurrent = c_idle.idle;
857 clear_tsk_thread_flag(c_idle.idle, TIF_FORK); 800 clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
801 initial_gs = per_cpu_offset(cpu);
802 per_cpu(kernel_stack, cpu) =
803 (unsigned long)task_stack_page(c_idle.idle) -
804 KERNEL_STACK_OFFSET + THREAD_SIZE;
858#endif 805#endif
859 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); 806 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
860 initial_code = (unsigned long)start_secondary; 807 initial_code = (unsigned long)start_secondary;
@@ -931,9 +878,7 @@ do_rest:
931 inquire_remote_apic(apicid); 878 inquire_remote_apic(apicid);
932 } 879 }
933 } 880 }
934#ifdef CONFIG_X86_64 881
935restore_state:
936#endif
937 if (boot_error) { 882 if (boot_error) {
938 /* Try to put things back the way they were before ... */ 883 /* Try to put things back the way they were before ... */
939 numa_remove_cpu(cpu); /* was set by numa_add_cpu */ 884 numa_remove_cpu(cpu); /* was set by numa_add_cpu */
@@ -1125,6 +1070,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
1125 printk(KERN_ERR "... forcing use of dummy APIC emulation." 1070 printk(KERN_ERR "... forcing use of dummy APIC emulation."
1126 "(tell your hw vendor)\n"); 1071 "(tell your hw vendor)\n");
1127 smpboot_clear_io_apic(); 1072 smpboot_clear_io_apic();
1073 disable_ioapic_setup();
1128 return -1; 1074 return -1;
1129 } 1075 }
1130 1076
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
index 397e309839dd..add36b4e37c9 100644
--- a/arch/x86/kernel/smpcommon.c
+++ b/arch/x86/kernel/smpcommon.c
@@ -3,11 +3,16 @@
3 */ 3 */
4#include <linux/module.h> 4#include <linux/module.h>
5#include <asm/smp.h> 5#include <asm/smp.h>
6#include <asm/sections.h>
6 7
7#ifdef CONFIG_X86_32 8#ifdef CONFIG_X86_64
9DEFINE_PER_CPU(unsigned long, this_cpu_off) = (unsigned long)__per_cpu_load;
10#else
8DEFINE_PER_CPU(unsigned long, this_cpu_off); 11DEFINE_PER_CPU(unsigned long, this_cpu_off);
12#endif
9EXPORT_PER_CPU_SYMBOL(this_cpu_off); 13EXPORT_PER_CPU_SYMBOL(this_cpu_off);
10 14
15#ifdef CONFIG_X86_32
11/* 16/*
12 * Initialize the CPU's GDT. This is either the boot CPU doing itself 17 * Initialize the CPU's GDT. This is either the boot CPU doing itself
13 * (still using the master per-cpu area), or a CPU doing it for a 18 * (still using the master per-cpu area), or a CPU doing it for a
@@ -23,8 +28,5 @@ __cpuinit void init_gdt(int cpu)
23 28
24 write_gdt_entry(get_cpu_gdt_table(cpu), 29 write_gdt_entry(get_cpu_gdt_table(cpu),
25 GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); 30 GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
26
27 per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
28 per_cpu(cpu_number, cpu) = cpu;
29} 31}
30#endif 32#endif
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb.c
index 7f4141d3b661..b3ca1b940654 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb.c
@@ -1,22 +1,18 @@
1#include <linux/init.h> 1#include <linux/init.h>
2 2
3#include <linux/mm.h> 3#include <linux/mm.h>
4#include <linux/delay.h>
5#include <linux/spinlock.h> 4#include <linux/spinlock.h>
6#include <linux/smp.h> 5#include <linux/smp.h>
7#include <linux/kernel_stat.h>
8#include <linux/mc146818rtc.h>
9#include <linux/interrupt.h> 6#include <linux/interrupt.h>
7#include <linux/module.h>
10 8
11#include <asm/mtrr.h>
12#include <asm/pgalloc.h>
13#include <asm/tlbflush.h> 9#include <asm/tlbflush.h>
14#include <asm/mmu_context.h> 10#include <asm/mmu_context.h>
15#include <asm/proto.h> 11#include <asm/apic.h>
16#include <asm/apicdef.h> 12#include <asm/uv/uv.h>
17#include <asm/idle.h> 13
18#include <asm/uv/uv_hub.h> 14DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
19#include <asm/uv/uv_bau.h> 15 = { &init_mm, 0, };
20 16
21#include <mach_ipi.h> 17#include <mach_ipi.h>
22/* 18/*
@@ -62,9 +58,9 @@ static DEFINE_PER_CPU(union smp_flush_state, flush_state);
62 */ 58 */
63void leave_mm(int cpu) 59void leave_mm(int cpu)
64{ 60{
65 if (read_pda(mmu_state) == TLBSTATE_OK) 61 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
66 BUG(); 62 BUG();
67 cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); 63 cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
68 load_cr3(swapper_pg_dir); 64 load_cr3(swapper_pg_dir);
69} 65}
70EXPORT_SYMBOL_GPL(leave_mm); 66EXPORT_SYMBOL_GPL(leave_mm);
@@ -117,10 +113,20 @@ EXPORT_SYMBOL_GPL(leave_mm);
117 * Interrupts are disabled. 113 * Interrupts are disabled.
118 */ 114 */
119 115
120asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) 116/*
117 * FIXME: use of asmlinkage is not consistent. On x86_64 it's noop
118 * but still used for documentation purpose but the usage is slightly
119 * inconsistent. On x86_32, asmlinkage is regparm(0) but interrupt
120 * entry calls in with the first parameter in %eax. Maybe define
121 * intrlinkage?
122 */
123#ifdef CONFIG_X86_64
124asmlinkage
125#endif
126void smp_invalidate_interrupt(struct pt_regs *regs)
121{ 127{
122 int cpu; 128 unsigned int cpu;
123 int sender; 129 unsigned int sender;
124 union smp_flush_state *f; 130 union smp_flush_state *f;
125 131
126 cpu = smp_processor_id(); 132 cpu = smp_processor_id();
@@ -142,8 +148,8 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
142 * BUG(); 148 * BUG();
143 */ 149 */
144 150
145 if (f->flush_mm == read_pda(active_mm)) { 151 if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
146 if (read_pda(mmu_state) == TLBSTATE_OK) { 152 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
147 if (f->flush_va == TLB_FLUSH_ALL) 153 if (f->flush_va == TLB_FLUSH_ALL)
148 local_flush_tlb(); 154 local_flush_tlb();
149 else 155 else
@@ -153,14 +159,16 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
153 } 159 }
154out: 160out:
155 ack_APIC_irq(); 161 ack_APIC_irq();
162 smp_mb__before_clear_bit();
156 cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask)); 163 cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask));
164 smp_mb__after_clear_bit();
157 inc_irq_stat(irq_tlb_count); 165 inc_irq_stat(irq_tlb_count);
158} 166}
159 167
160static void flush_tlb_others_ipi(const struct cpumask *cpumask, 168static void flush_tlb_others_ipi(const struct cpumask *cpumask,
161 struct mm_struct *mm, unsigned long va) 169 struct mm_struct *mm, unsigned long va)
162{ 170{
163 int sender; 171 unsigned int sender;
164 union smp_flush_state *f; 172 union smp_flush_state *f;
165 173
166 /* Caller has disabled preemption */ 174 /* Caller has disabled preemption */
@@ -203,16 +211,13 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
203 struct mm_struct *mm, unsigned long va) 211 struct mm_struct *mm, unsigned long va)
204{ 212{
205 if (is_uv_system()) { 213 if (is_uv_system()) {
206 /* FIXME: could be an percpu_alloc'd thing */ 214 unsigned int cpu;
207 static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
208 struct cpumask *after_uv_flush = &get_cpu_var(flush_tlb_mask);
209
210 cpumask_andnot(after_uv_flush, cpumask,
211 cpumask_of(smp_processor_id()));
212 if (!uv_flush_tlb_others(after_uv_flush, mm, va))
213 flush_tlb_others_ipi(after_uv_flush, mm, va);
214 215
215 put_cpu_var(flush_tlb_uv_cpumask); 216 cpu = get_cpu();
217 cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu);
218 if (cpumask)
219 flush_tlb_others_ipi(cpumask, mm, va);
220 put_cpu();
216 return; 221 return;
217 } 222 }
218 flush_tlb_others_ipi(cpumask, mm, va); 223 flush_tlb_others_ipi(cpumask, mm, va);
@@ -281,7 +286,7 @@ static void do_flush_tlb_all(void *info)
281 unsigned long cpu = smp_processor_id(); 286 unsigned long cpu = smp_processor_id();
282 287
283 __flush_tlb_all(); 288 __flush_tlb_all();
284 if (read_pda(mmu_state) == TLBSTATE_LAZY) 289 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
285 leave_mm(cpu); 290 leave_mm(cpu);
286} 291}
287 292
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
deleted file mode 100644
index d37bbfcb813d..000000000000
--- a/arch/x86/kernel/tlb_32.c
+++ /dev/null
@@ -1,250 +0,0 @@
1#include <linux/spinlock.h>
2#include <linux/cpu.h>
3#include <linux/interrupt.h>
4
5#include <asm/tlbflush.h>
6
7DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate)
8 ____cacheline_aligned = { &init_mm, 0, };
9
10/* must come after the send_IPI functions above for inlining */
11#include <mach_ipi.h>
12
13/*
14 * Smarter SMP flushing macros.
15 * c/o Linus Torvalds.
16 *
17 * These mean you can really definitely utterly forget about
18 * writing to user space from interrupts. (Its not allowed anyway).
19 *
20 * Optimizations Manfred Spraul <manfred@colorfullife.com>
21 */
22
23static cpumask_var_t flush_cpumask;
24static struct mm_struct *flush_mm;
25static unsigned long flush_va;
26static DEFINE_SPINLOCK(tlbstate_lock);
27
28/*
29 * We cannot call mmdrop() because we are in interrupt context,
30 * instead update mm->cpu_vm_mask.
31 *
32 * We need to reload %cr3 since the page tables may be going
33 * away from under us..
34 */
35void leave_mm(int cpu)
36{
37 BUG_ON(x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK);
38 cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask);
39 load_cr3(swapper_pg_dir);
40}
41EXPORT_SYMBOL_GPL(leave_mm);
42
43/*
44 *
45 * The flush IPI assumes that a thread switch happens in this order:
46 * [cpu0: the cpu that switches]
47 * 1) switch_mm() either 1a) or 1b)
48 * 1a) thread switch to a different mm
49 * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
50 * Stop ipi delivery for the old mm. This is not synchronized with
51 * the other cpus, but smp_invalidate_interrupt ignore flush ipis
52 * for the wrong mm, and in the worst case we perform a superfluous
53 * tlb flush.
54 * 1a2) set cpu_tlbstate to TLBSTATE_OK
55 * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
56 * was in lazy tlb mode.
57 * 1a3) update cpu_tlbstate[].active_mm
58 * Now cpu0 accepts tlb flushes for the new mm.
59 * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
60 * Now the other cpus will send tlb flush ipis.
61 * 1a4) change cr3.
62 * 1b) thread switch without mm change
63 * cpu_tlbstate[].active_mm is correct, cpu0 already handles
64 * flush ipis.
65 * 1b1) set cpu_tlbstate to TLBSTATE_OK
66 * 1b2) test_and_set the cpu bit in cpu_vm_mask.
67 * Atomically set the bit [other cpus will start sending flush ipis],
68 * and test the bit.
69 * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
70 * 2) switch %%esp, ie current
71 *
72 * The interrupt must handle 2 special cases:
73 * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
74 * - the cpu performs speculative tlb reads, i.e. even if the cpu only
75 * runs in kernel space, the cpu could load tlb entries for user space
76 * pages.
77 *
78 * The good news is that cpu_tlbstate is local to each cpu, no
79 * write/read ordering problems.
80 */
81
82/*
83 * TLB flush IPI:
84 *
85 * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
86 * 2) Leave the mm if we are in the lazy tlb mode.
87 */
88
89void smp_invalidate_interrupt(struct pt_regs *regs)
90{
91 unsigned long cpu;
92
93 cpu = get_cpu();
94
95 if (!cpumask_test_cpu(cpu, flush_cpumask))
96 goto out;
97 /*
98 * This was a BUG() but until someone can quote me the
99 * line from the intel manual that guarantees an IPI to
100 * multiple CPUs is retried _only_ on the erroring CPUs
101 * its staying as a return
102 *
103 * BUG();
104 */
105
106 if (flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) {
107 if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) {
108 if (flush_va == TLB_FLUSH_ALL)
109 local_flush_tlb();
110 else
111 __flush_tlb_one(flush_va);
112 } else
113 leave_mm(cpu);
114 }
115 ack_APIC_irq();
116 smp_mb__before_clear_bit();
117 cpumask_clear_cpu(cpu, flush_cpumask);
118 smp_mb__after_clear_bit();
119out:
120 put_cpu_no_resched();
121 inc_irq_stat(irq_tlb_count);
122}
123
124void native_flush_tlb_others(const struct cpumask *cpumask,
125 struct mm_struct *mm, unsigned long va)
126{
127 /*
128 * mm must exist :)
129 */
130 BUG_ON(!mm);
131
132 /*
133 * i'm not happy about this global shared spinlock in the
134 * MM hot path, but we'll see how contended it is.
135 * AK: x86-64 has a faster method that could be ported.
136 */
137 spin_lock(&tlbstate_lock);
138
139 cpumask_andnot(flush_cpumask, cpumask, cpumask_of(smp_processor_id()));
140 cpumask_and(flush_cpumask, flush_cpumask, cpu_online_mask);
141
142 /*
143 * If a task whose mm mask we are looking at has descheduled and
144 * has cleared its presence from the mask, or if a CPU which we ran
145 * on has gone down then there might be no flush work left:
146 */
147 if (unlikely(cpumask_empty(flush_cpumask))) {
148 spin_unlock(&tlbstate_lock);
149 return;
150 }
151
152 flush_mm = mm;
153 flush_va = va;
154
155 /*
156 * Make the above memory operations globally visible before
157 * sending the IPI.
158 */
159 smp_mb();
160 /*
161 * We have to send the IPI only to
162 * CPUs affected.
163 */
164 send_IPI_mask(flush_cpumask, INVALIDATE_TLB_VECTOR);
165
166 while (!cpumask_empty(flush_cpumask))
167 /* nothing. lockup detection does not belong here */
168 cpu_relax();
169
170 flush_mm = NULL;
171 flush_va = 0;
172 spin_unlock(&tlbstate_lock);
173}
174
175void flush_tlb_current_task(void)
176{
177 struct mm_struct *mm = current->mm;
178
179 preempt_disable();
180
181 local_flush_tlb();
182 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
183 flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
184 preempt_enable();
185}
186
187void flush_tlb_mm(struct mm_struct *mm)
188{
189
190 preempt_disable();
191
192 if (current->active_mm == mm) {
193 if (current->mm)
194 local_flush_tlb();
195 else
196 leave_mm(smp_processor_id());
197 }
198 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
199 flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
200
201 preempt_enable();
202}
203
204void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
205{
206 struct mm_struct *mm = vma->vm_mm;
207
208 preempt_disable();
209
210 if (current->active_mm == mm) {
211 if (current->mm)
212 __flush_tlb_one(va);
213 else
214 leave_mm(smp_processor_id());
215 }
216
217 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
218 flush_tlb_others(&mm->cpu_vm_mask, mm, va);
219 preempt_enable();
220}
221EXPORT_SYMBOL(flush_tlb_page);
222
223static void do_flush_tlb_all(void *info)
224{
225 unsigned long cpu = smp_processor_id();
226
227 __flush_tlb_all();
228 if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY)
229 leave_mm(cpu);
230}
231
232void flush_tlb_all(void)
233{
234 on_each_cpu(do_flush_tlb_all, NULL, 1);
235}
236
237void reset_lazy_tlbstate(void)
238{
239 int cpu = raw_smp_processor_id();
240
241 per_cpu(cpu_tlbstate, cpu).state = 0;
242 per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
243}
244
245static int init_flush_cpumask(void)
246{
247 alloc_cpumask_var(&flush_cpumask, GFP_KERNEL);
248 return 0;
249}
250early_initcall(init_flush_cpumask);
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 690dcf1a27d4..aae15dd72604 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -11,6 +11,7 @@
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12 12
13#include <asm/mmu_context.h> 13#include <asm/mmu_context.h>
14#include <asm/uv/uv.h>
14#include <asm/uv/uv_mmrs.h> 15#include <asm/uv/uv_mmrs.h>
15#include <asm/uv/uv_hub.h> 16#include <asm/uv/uv_hub.h>
16#include <asm/uv/uv_bau.h> 17#include <asm/uv/uv_bau.h>
@@ -209,14 +210,15 @@ static int uv_wait_completion(struct bau_desc *bau_desc,
209 * 210 *
210 * Send a broadcast and wait for a broadcast message to complete. 211 * Send a broadcast and wait for a broadcast message to complete.
211 * 212 *
212 * The cpumaskp mask contains the cpus the broadcast was sent to. 213 * The flush_mask contains the cpus the broadcast was sent to.
213 * 214 *
214 * Returns 1 if all remote flushing was done. The mask is zeroed. 215 * Returns NULL if all remote flushing was done. The mask is zeroed.
215 * Returns 0 if some remote flushing remains to be done. The mask will have 216 * Returns @flush_mask if some remote flushing remains to be done. The
216 * some bits still set. 217 * mask will have some bits still set.
217 */ 218 */
218int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, 219const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
219 struct cpumask *cpumaskp) 220 struct bau_desc *bau_desc,
221 struct cpumask *flush_mask)
220{ 222{
221 int completion_status = 0; 223 int completion_status = 0;
222 int right_shift; 224 int right_shift;
@@ -263,59 +265,69 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
263 * Success, so clear the remote cpu's from the mask so we don't 265 * Success, so clear the remote cpu's from the mask so we don't
264 * use the IPI method of shootdown on them. 266 * use the IPI method of shootdown on them.
265 */ 267 */
266 for_each_cpu(bit, cpumaskp) { 268 for_each_cpu(bit, flush_mask) {
267 blade = uv_cpu_to_blade_id(bit); 269 blade = uv_cpu_to_blade_id(bit);
268 if (blade == this_blade) 270 if (blade == this_blade)
269 continue; 271 continue;
270 cpumask_clear_cpu(bit, cpumaskp); 272 cpumask_clear_cpu(bit, flush_mask);
271 } 273 }
272 if (!cpumask_empty(cpumaskp)) 274 if (!cpumask_empty(flush_mask))
273 return 0; 275 return flush_mask;
274 return 1; 276 return NULL;
275} 277}
276 278
277/** 279/**
278 * uv_flush_tlb_others - globally purge translation cache of a virtual 280 * uv_flush_tlb_others - globally purge translation cache of a virtual
279 * address or all TLB's 281 * address or all TLB's
280 * @cpumaskp: mask of all cpu's in which the address is to be removed 282 * @cpumask: mask of all cpu's in which the address is to be removed
281 * @mm: mm_struct containing virtual address range 283 * @mm: mm_struct containing virtual address range
282 * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) 284 * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
285 * @cpu: the current cpu
283 * 286 *
284 * This is the entry point for initiating any UV global TLB shootdown. 287 * This is the entry point for initiating any UV global TLB shootdown.
285 * 288 *
286 * Purges the translation caches of all specified processors of the given 289 * Purges the translation caches of all specified processors of the given
287 * virtual address, or purges all TLB's on specified processors. 290 * virtual address, or purges all TLB's on specified processors.
288 * 291 *
289 * The caller has derived the cpumaskp from the mm_struct and has subtracted 292 * The caller has derived the cpumask from the mm_struct. This function
290 * the local cpu from the mask. This function is called only if there 293 * is called only if there are bits set in the mask. (e.g. flush_tlb_page())
291 * are bits set in the mask. (e.g. flush_tlb_page())
292 * 294 *
293 * The cpumaskp is converted into a nodemask of the nodes containing 295 * The cpumask is converted into a nodemask of the nodes containing
294 * the cpus. 296 * the cpus.
295 * 297 *
296 * Returns 1 if all remote flushing was done. 298 * Note that this function should be called with preemption disabled.
297 * Returns 0 if some remote flushing remains to be done. 299 *
300 * Returns NULL if all remote flushing was done.
301 * Returns pointer to cpumask if some remote flushing remains to be
302 * done. The returned pointer is valid till preemption is re-enabled.
298 */ 303 */
299int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm, 304const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
300 unsigned long va) 305 struct mm_struct *mm,
306 unsigned long va, unsigned int cpu)
301{ 307{
308 static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
309 struct cpumask *flush_mask = &__get_cpu_var(flush_tlb_mask);
302 int i; 310 int i;
303 int bit; 311 int bit;
304 int blade; 312 int blade;
305 int cpu; 313 int uv_cpu;
306 int this_blade; 314 int this_blade;
307 int locals = 0; 315 int locals = 0;
308 struct bau_desc *bau_desc; 316 struct bau_desc *bau_desc;
309 317
310 cpu = uv_blade_processor_id(); 318 WARN_ON(!in_atomic());
319
320 cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
321
322 uv_cpu = uv_blade_processor_id();
311 this_blade = uv_numa_blade_id(); 323 this_blade = uv_numa_blade_id();
312 bau_desc = __get_cpu_var(bau_control).descriptor_base; 324 bau_desc = __get_cpu_var(bau_control).descriptor_base;
313 bau_desc += UV_ITEMS_PER_DESCRIPTOR * cpu; 325 bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu;
314 326
315 bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); 327 bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
316 328
317 i = 0; 329 i = 0;
318 for_each_cpu(bit, cpumaskp) { 330 for_each_cpu(bit, flush_mask) {
319 blade = uv_cpu_to_blade_id(bit); 331 blade = uv_cpu_to_blade_id(bit);
320 BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1)); 332 BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
321 if (blade == this_blade) { 333 if (blade == this_blade) {
@@ -330,17 +342,17 @@ int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm,
330 * no off_node flushing; return status for local node 342 * no off_node flushing; return status for local node
331 */ 343 */
332 if (locals) 344 if (locals)
333 return 0; 345 return flush_mask;
334 else 346 else
335 return 1; 347 return NULL;
336 } 348 }
337 __get_cpu_var(ptcstats).requestor++; 349 __get_cpu_var(ptcstats).requestor++;
338 __get_cpu_var(ptcstats).ntargeted += i; 350 __get_cpu_var(ptcstats).ntargeted += i;
339 351
340 bau_desc->payload.address = va; 352 bau_desc->payload.address = va;
341 bau_desc->payload.sending_cpu = smp_processor_id(); 353 bau_desc->payload.sending_cpu = cpu;
342 354
343 return uv_flush_send_and_wait(cpu, this_blade, bau_desc, cpumaskp); 355 return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask);
344} 356}
345 357
346/* 358/*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 98c2d055284b..ed5aee5f3fcc 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -59,7 +59,6 @@
59#ifdef CONFIG_X86_64 59#ifdef CONFIG_X86_64
60#include <asm/pgalloc.h> 60#include <asm/pgalloc.h>
61#include <asm/proto.h> 61#include <asm/proto.h>
62#include <asm/pda.h>
63#else 62#else
64#include <asm/processor-flags.h> 63#include <asm/processor-flags.h>
65#include <asm/arch_hooks.h> 64#include <asm/arch_hooks.h>
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 82c67559dde7..3eba7f7bac05 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -178,14 +178,7 @@ SECTIONS
178 __initramfs_end = .; 178 __initramfs_end = .;
179 } 179 }
180#endif 180#endif
181 . = ALIGN(PAGE_SIZE); 181 PERCPU(PAGE_SIZE)
182 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
183 __per_cpu_start = .;
184 *(.data.percpu.page_aligned)
185 *(.data.percpu)
186 *(.data.percpu.shared_aligned)
187 __per_cpu_end = .;
188 }
189 . = ALIGN(PAGE_SIZE); 182 . = ALIGN(PAGE_SIZE);
190 /* freed after init ends here */ 183 /* freed after init ends here */
191 184
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 1a614c0e6bef..c9740996430a 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -5,6 +5,7 @@
5#define LOAD_OFFSET __START_KERNEL_map 5#define LOAD_OFFSET __START_KERNEL_map
6 6
7#include <asm-generic/vmlinux.lds.h> 7#include <asm-generic/vmlinux.lds.h>
8#include <asm/asm-offsets.h>
8#include <asm/page.h> 9#include <asm/page.h>
9 10
10#undef i386 /* in case the preprocessor is a 32bit one */ 11#undef i386 /* in case the preprocessor is a 32bit one */
@@ -13,12 +14,14 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
13OUTPUT_ARCH(i386:x86-64) 14OUTPUT_ARCH(i386:x86-64)
14ENTRY(phys_startup_64) 15ENTRY(phys_startup_64)
15jiffies_64 = jiffies; 16jiffies_64 = jiffies;
16_proxy_pda = 1;
17PHDRS { 17PHDRS {
18 text PT_LOAD FLAGS(5); /* R_E */ 18 text PT_LOAD FLAGS(5); /* R_E */
19 data PT_LOAD FLAGS(7); /* RWE */ 19 data PT_LOAD FLAGS(7); /* RWE */
20 user PT_LOAD FLAGS(7); /* RWE */ 20 user PT_LOAD FLAGS(7); /* RWE */
21 data.init PT_LOAD FLAGS(7); /* RWE */ 21 data.init PT_LOAD FLAGS(7); /* RWE */
22#ifdef CONFIG_SMP
23 percpu PT_LOAD FLAGS(7); /* RWE */
24#endif
22 note PT_NOTE FLAGS(0); /* ___ */ 25 note PT_NOTE FLAGS(0); /* ___ */
23} 26}
24SECTIONS 27SECTIONS
@@ -208,14 +211,28 @@ SECTIONS
208 __initramfs_end = .; 211 __initramfs_end = .;
209#endif 212#endif
210 213
214#ifdef CONFIG_SMP
215 /*
216 * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
217 * output PHDR, so the next output section - __data_nosave - should
218 * switch it back to data.init. Also, pda should be at the head of
219 * percpu area. Preallocate it and define the percpu offset symbol
220 * so that it can be accessed as a percpu variable.
221 */
222 . = ALIGN(PAGE_SIZE);
223 PERCPU_VADDR(0, :percpu)
224#else
211 PERCPU(PAGE_SIZE) 225 PERCPU(PAGE_SIZE)
226#endif
212 227
213 . = ALIGN(PAGE_SIZE); 228 . = ALIGN(PAGE_SIZE);
214 __init_end = .; 229 __init_end = .;
215 230
216 . = ALIGN(PAGE_SIZE); 231 . = ALIGN(PAGE_SIZE);
217 __nosave_begin = .; 232 __nosave_begin = .;
218 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } 233 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
234 *(.data.nosave)
235 } :data.init /* switch back to data.init, see PERCPU_VADDR() above */
219 . = ALIGN(PAGE_SIZE); 236 . = ALIGN(PAGE_SIZE);
220 __nosave_end = .; 237 __nosave_end = .;
221 238
@@ -244,3 +261,8 @@ SECTIONS
244 */ 261 */
245ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), 262ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
246 "kernel image bigger than KERNEL_IMAGE_SIZE") 263 "kernel image bigger than KERNEL_IMAGE_SIZE")
264
265#ifdef CONFIG_SMP
266ASSERT((per_cpu__irq_stack_union == 0),
267 "irq_stack_union is not at start of per-cpu area");
268#endif
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 695e426aa354..3909e3ba5ce3 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -58,5 +58,3 @@ EXPORT_SYMBOL(__memcpy);
58EXPORT_SYMBOL(empty_zero_page); 58EXPORT_SYMBOL(empty_zero_page);
59EXPORT_SYMBOL(init_level4_pgt); 59EXPORT_SYMBOL(init_level4_pgt);
60EXPORT_SYMBOL(load_gs_index); 60EXPORT_SYMBOL(load_gs_index);
61
62EXPORT_SYMBOL(_proxy_pda);
diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c
index a580b9562e76..0ade62555ff3 100644
--- a/arch/x86/mach-voyager/setup.c
+++ b/arch/x86/mach-voyager/setup.c
@@ -9,6 +9,7 @@
9#include <asm/e820.h> 9#include <asm/e820.h>
10#include <asm/io.h> 10#include <asm/io.h>
11#include <asm/setup.h> 11#include <asm/setup.h>
12#include <asm/cpu.h>
12 13
13void __init pre_intr_init_hook(void) 14void __init pre_intr_init_hook(void)
14{ 15{
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 9840b7ec749a..96f15b09a4c5 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -402,7 +402,7 @@ void __init find_smp_config(void)
402 VOYAGER_SUS_IN_CONTROL_PORT); 402 VOYAGER_SUS_IN_CONTROL_PORT);
403 403
404 current_thread_info()->cpu = boot_cpu_id; 404 current_thread_info()->cpu = boot_cpu_id;
405 x86_write_percpu(cpu_number, boot_cpu_id); 405 percpu_write(cpu_number, boot_cpu_id);
406} 406}
407 407
408/* 408/*
@@ -531,6 +531,7 @@ static void __init do_boot_cpu(__u8 cpu)
531 stack_start.sp = (void *)idle->thread.sp; 531 stack_start.sp = (void *)idle->thread.sp;
532 532
533 init_gdt(cpu); 533 init_gdt(cpu);
534 per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
534 per_cpu(current_task, cpu) = idle; 535 per_cpu(current_task, cpu) = idle;
535 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); 536 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
536 irq_ctx_init(cpu); 537 irq_ctx_init(cpu);
@@ -1748,6 +1749,7 @@ static void __init voyager_smp_prepare_cpus(unsigned int max_cpus)
1748static void __cpuinit voyager_smp_prepare_boot_cpu(void) 1749static void __cpuinit voyager_smp_prepare_boot_cpu(void)
1749{ 1750{
1750 init_gdt(smp_processor_id()); 1751 init_gdt(smp_processor_id());
1752 per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
1751 switch_to_new_gdt(); 1753 switch_to_new_gdt();
1752 1754
1753 cpu_set(smp_processor_id(), cpu_online_map); 1755 cpu_set(smp_processor_id(), cpu_online_map);
@@ -1780,7 +1782,7 @@ static void __init voyager_smp_cpus_done(unsigned int max_cpus)
1780void __init smp_setup_processor_id(void) 1782void __init smp_setup_processor_id(void)
1781{ 1783{
1782 current_thread_info()->cpu = hard_smp_processor_id(); 1784 current_thread_info()->cpu = hard_smp_processor_id();
1783 x86_write_percpu(cpu_number, hard_smp_processor_id()); 1785 percpu_write(cpu_number, hard_smp_processor_id());
1784} 1786}
1785 1787
1786static void voyager_send_call_func(cpumask_t callmask) 1788static void voyager_send_call_func(cpumask_t callmask)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 90dfae511a41..37242c405f16 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -26,6 +26,7 @@
26#include <linux/kprobes.h> 26#include <linux/kprobes.h>
27#include <linux/uaccess.h> 27#include <linux/uaccess.h>
28#include <linux/kdebug.h> 28#include <linux/kdebug.h>
29#include <linux/magic.h>
29 30
30#include <asm/system.h> 31#include <asm/system.h>
31#include <asm/desc.h> 32#include <asm/desc.h>
@@ -589,6 +590,8 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
589 unsigned long address; 590 unsigned long address;
590 int write, si_code; 591 int write, si_code;
591 int fault; 592 int fault;
593 unsigned long *stackend;
594
592#ifdef CONFIG_X86_64 595#ifdef CONFIG_X86_64
593 unsigned long flags; 596 unsigned long flags;
594 int sig; 597 int sig;
@@ -841,6 +844,10 @@ no_context:
841 844
842 show_fault_oops(regs, error_code, address); 845 show_fault_oops(regs, error_code, address);
843 846
847 stackend = end_of_stack(tsk);
848 if (*stackend != STACK_END_MAGIC)
849 printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
850
844 tsk->thread.cr2 = address; 851 tsk->thread.cr2 = address;
845 tsk->thread.trap_no = 14; 852 tsk->thread.trap_no = 14;
846 tsk->thread.error_code = error_code; 853 tsk->thread.error_code = error_code;
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 88f1b10de3be..4a6989e47a53 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -49,7 +49,6 @@
49#include <asm/paravirt.h> 49#include <asm/paravirt.h>
50#include <asm/setup.h> 50#include <asm/setup.h>
51#include <asm/cacheflush.h> 51#include <asm/cacheflush.h>
52#include <asm/smp.h>
53 52
54unsigned int __VMALLOC_RESERVE = 128 << 20; 53unsigned int __VMALLOC_RESERVE = 128 << 20;
55 54
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 160c42d3eb8f..3be399013de6 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -333,11 +333,20 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
333 req_type & _PAGE_CACHE_MASK); 333 req_type & _PAGE_CACHE_MASK);
334 } 334 }
335 335
336 is_range_ram = pagerange_is_ram(start, end); 336 /*
337 if (is_range_ram == 1) 337 * For legacy reasons, some parts of the physical address range in the
338 return reserve_ram_pages_type(start, end, req_type, new_type); 338 * legacy 1MB region is treated as non-RAM (even when listed as RAM in
339 else if (is_range_ram < 0) 339 * the e820 tables). So we will track the memory attributes of this
340 return -EINVAL; 340 * legacy 1MB region using the linear memtype_list always.
341 */
342 if (end >= ISA_END_ADDRESS) {
343 is_range_ram = pagerange_is_ram(start, end);
344 if (is_range_ram == 1)
345 return reserve_ram_pages_type(start, end, req_type,
346 new_type);
347 else if (is_range_ram < 0)
348 return -EINVAL;
349 }
341 350
342 new = kmalloc(sizeof(struct memtype), GFP_KERNEL); 351 new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
343 if (!new) 352 if (!new)
@@ -437,11 +446,19 @@ int free_memtype(u64 start, u64 end)
437 if (is_ISA_range(start, end - 1)) 446 if (is_ISA_range(start, end - 1))
438 return 0; 447 return 0;
439 448
440 is_range_ram = pagerange_is_ram(start, end); 449 /*
441 if (is_range_ram == 1) 450 * For legacy reasons, some parts of the physical address range in the
442 return free_ram_pages_type(start, end); 451 * legacy 1MB region is treated as non-RAM (even when listed as RAM in
443 else if (is_range_ram < 0) 452 * the e820 tables). So we will track the memory attributes of this
444 return -EINVAL; 453 * legacy 1MB region using the linear memtype_list always.
454 */
455 if (end >= ISA_END_ADDRESS) {
456 is_range_ram = pagerange_is_ram(start, end);
457 if (is_range_ram == 1)
458 return free_ram_pages_type(start, end);
459 else if (is_range_ram < 0)
460 return -EINVAL;
461 }
445 462
446 spin_lock(&memtype_lock); 463 spin_lock(&memtype_lock);
447 list_for_each_entry(entry, &memtype_list, nd) { 464 list_for_each_entry(entry, &memtype_list, nd) {
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 965539ec425f..bef941f61451 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -695,17 +695,17 @@ static void xen_write_cr0(unsigned long cr0)
695 695
696static void xen_write_cr2(unsigned long cr2) 696static void xen_write_cr2(unsigned long cr2)
697{ 697{
698 x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; 698 percpu_read(xen_vcpu)->arch.cr2 = cr2;
699} 699}
700 700
701static unsigned long xen_read_cr2(void) 701static unsigned long xen_read_cr2(void)
702{ 702{
703 return x86_read_percpu(xen_vcpu)->arch.cr2; 703 return percpu_read(xen_vcpu)->arch.cr2;
704} 704}
705 705
706static unsigned long xen_read_cr2_direct(void) 706static unsigned long xen_read_cr2_direct(void)
707{ 707{
708 return x86_read_percpu(xen_vcpu_info.arch.cr2); 708 return percpu_read(xen_vcpu_info.arch.cr2);
709} 709}
710 710
711static void xen_write_cr4(unsigned long cr4) 711static void xen_write_cr4(unsigned long cr4)
@@ -718,12 +718,12 @@ static void xen_write_cr4(unsigned long cr4)
718 718
719static unsigned long xen_read_cr3(void) 719static unsigned long xen_read_cr3(void)
720{ 720{
721 return x86_read_percpu(xen_cr3); 721 return percpu_read(xen_cr3);
722} 722}
723 723
724static void set_current_cr3(void *v) 724static void set_current_cr3(void *v)
725{ 725{
726 x86_write_percpu(xen_current_cr3, (unsigned long)v); 726 percpu_write(xen_current_cr3, (unsigned long)v);
727} 727}
728 728
729static void __xen_write_cr3(bool kernel, unsigned long cr3) 729static void __xen_write_cr3(bool kernel, unsigned long cr3)
@@ -748,7 +748,7 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3)
748 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 748 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
749 749
750 if (kernel) { 750 if (kernel) {
751 x86_write_percpu(xen_cr3, cr3); 751 percpu_write(xen_cr3, cr3);
752 752
753 /* Update xen_current_cr3 once the batch has actually 753 /* Update xen_current_cr3 once the batch has actually
754 been submitted. */ 754 been submitted. */
@@ -764,7 +764,7 @@ static void xen_write_cr3(unsigned long cr3)
764 764
765 /* Update while interrupts are disabled, so its atomic with 765 /* Update while interrupts are disabled, so its atomic with
766 respect to ipis */ 766 respect to ipis */
767 x86_write_percpu(xen_cr3, cr3); 767 percpu_write(xen_cr3, cr3);
768 768
769 __xen_write_cr3(true, cr3); 769 __xen_write_cr3(true, cr3);
770 770
@@ -1645,7 +1645,6 @@ asmlinkage void __init xen_start_kernel(void)
1645#ifdef CONFIG_X86_64 1645#ifdef CONFIG_X86_64
1646 /* Disable until direct per-cpu data access. */ 1646 /* Disable until direct per-cpu data access. */
1647 have_vcpu_info_placement = 0; 1647 have_vcpu_info_placement = 0;
1648 x86_64_init_pda();
1649#endif 1648#endif
1650 1649
1651 xen_smp_init(); 1650 xen_smp_init();
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index bb042608c602..2e8271431e1a 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -39,7 +39,7 @@ static unsigned long xen_save_fl(void)
39 struct vcpu_info *vcpu; 39 struct vcpu_info *vcpu;
40 unsigned long flags; 40 unsigned long flags;
41 41
42 vcpu = x86_read_percpu(xen_vcpu); 42 vcpu = percpu_read(xen_vcpu);
43 43
44 /* flag has opposite sense of mask */ 44 /* flag has opposite sense of mask */
45 flags = !vcpu->evtchn_upcall_mask; 45 flags = !vcpu->evtchn_upcall_mask;
@@ -62,7 +62,7 @@ static void xen_restore_fl(unsigned long flags)
62 make sure we're don't switch CPUs between getting the vcpu 62 make sure we're don't switch CPUs between getting the vcpu
63 pointer and updating the mask. */ 63 pointer and updating the mask. */
64 preempt_disable(); 64 preempt_disable();
65 vcpu = x86_read_percpu(xen_vcpu); 65 vcpu = percpu_read(xen_vcpu);
66 vcpu->evtchn_upcall_mask = flags; 66 vcpu->evtchn_upcall_mask = flags;
67 preempt_enable_no_resched(); 67 preempt_enable_no_resched();
68 68
@@ -83,7 +83,7 @@ static void xen_irq_disable(void)
83 make sure we're don't switch CPUs between getting the vcpu 83 make sure we're don't switch CPUs between getting the vcpu
84 pointer and updating the mask. */ 84 pointer and updating the mask. */
85 preempt_disable(); 85 preempt_disable();
86 x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1; 86 percpu_read(xen_vcpu)->evtchn_upcall_mask = 1;
87 preempt_enable_no_resched(); 87 preempt_enable_no_resched();
88} 88}
89 89
@@ -96,7 +96,7 @@ static void xen_irq_enable(void)
96 the caller is confused and is trying to re-enable interrupts 96 the caller is confused and is trying to re-enable interrupts
97 on an indeterminate processor. */ 97 on an indeterminate processor. */
98 98
99 vcpu = x86_read_percpu(xen_vcpu); 99 vcpu = percpu_read(xen_vcpu);
100 vcpu->evtchn_upcall_mask = 0; 100 vcpu->evtchn_upcall_mask = 0;
101 101
102 /* Doesn't matter if we get preempted here, because any 102 /* Doesn't matter if we get preempted here, because any
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 503c240e26c7..98cb9869eb24 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1063,18 +1063,14 @@ static void drop_other_mm_ref(void *info)
1063 struct mm_struct *mm = info; 1063 struct mm_struct *mm = info;
1064 struct mm_struct *active_mm; 1064 struct mm_struct *active_mm;
1065 1065
1066#ifdef CONFIG_X86_64 1066 active_mm = percpu_read(cpu_tlbstate.active_mm);
1067 active_mm = read_pda(active_mm);
1068#else
1069 active_mm = __get_cpu_var(cpu_tlbstate).active_mm;
1070#endif
1071 1067
1072 if (active_mm == mm) 1068 if (active_mm == mm)
1073 leave_mm(smp_processor_id()); 1069 leave_mm(smp_processor_id());
1074 1070
1075 /* If this cpu still has a stale cr3 reference, then make sure 1071 /* If this cpu still has a stale cr3 reference, then make sure
1076 it has been flushed. */ 1072 it has been flushed. */
1077 if (x86_read_percpu(xen_current_cr3) == __pa(mm->pgd)) { 1073 if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) {
1078 load_cr3(swapper_pg_dir); 1074 load_cr3(swapper_pg_dir);
1079 arch_flush_lazy_cpu_mode(); 1075 arch_flush_lazy_cpu_mode();
1080 } 1076 }
diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h
index 858938241616..e786fa7f2615 100644
--- a/arch/x86/xen/multicalls.h
+++ b/arch/x86/xen/multicalls.h
@@ -39,7 +39,7 @@ static inline void xen_mc_issue(unsigned mode)
39 xen_mc_flush(); 39 xen_mc_flush();
40 40
41 /* restore flags saved in xen_mc_batch */ 41 /* restore flags saved in xen_mc_batch */
42 local_irq_restore(x86_read_percpu(xen_mc_irq_flags)); 42 local_irq_restore(percpu_read(xen_mc_irq_flags));
43} 43}
44 44
45/* Set up a callback to be called when the current batch is flushed */ 45/* Set up a callback to be called when the current batch is flushed */
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index c44e2069c7c7..72c2eb9b64cd 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -50,11 +50,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
50 */ 50 */
51static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) 51static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
52{ 52{
53#ifdef CONFIG_X86_32 53 inc_irq_stat(irq_resched_count);
54 __get_cpu_var(irq_stat).irq_resched_count++;
55#else
56 add_pda(irq_resched_count, 1);
57#endif
58 54
59 return IRQ_HANDLED; 55 return IRQ_HANDLED;
60} 56}
@@ -78,7 +74,7 @@ static __cpuinit void cpu_bringup(void)
78 xen_setup_cpu_clockevents(); 74 xen_setup_cpu_clockevents();
79 75
80 cpu_set(cpu, cpu_online_map); 76 cpu_set(cpu, cpu_online_map);
81 x86_write_percpu(cpu_state, CPU_ONLINE); 77 percpu_write(cpu_state, CPU_ONLINE);
82 wmb(); 78 wmb();
83 79
84 /* We can take interrupts now: we're officially "up". */ 80 /* We can take interrupts now: we're officially "up". */
@@ -283,22 +279,11 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
283 struct task_struct *idle = idle_task(cpu); 279 struct task_struct *idle = idle_task(cpu);
284 int rc; 280 int rc;
285 281
286#ifdef CONFIG_X86_64 282 per_cpu(current_task, cpu) = idle;
287 /* Allocate node local memory for AP pdas */
288 WARN_ON(cpu == 0);
289 if (cpu > 0) {
290 rc = get_local_pda(cpu);
291 if (rc)
292 return rc;
293 }
294#endif
295
296#ifdef CONFIG_X86_32 283#ifdef CONFIG_X86_32
297 init_gdt(cpu); 284 init_gdt(cpu);
298 per_cpu(current_task, cpu) = idle;
299 irq_ctx_init(cpu); 285 irq_ctx_init(cpu);
300#else 286#else
301 cpu_pda(cpu)->pcurrent = idle;
302 clear_tsk_thread_flag(idle, TIF_FORK); 287 clear_tsk_thread_flag(idle, TIF_FORK);
303#endif 288#endif
304 xen_setup_timer(cpu); 289 xen_setup_timer(cpu);
@@ -445,11 +430,7 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
445{ 430{
446 irq_enter(); 431 irq_enter();
447 generic_smp_call_function_interrupt(); 432 generic_smp_call_function_interrupt();
448#ifdef CONFIG_X86_32 433 inc_irq_stat(irq_call_count);
449 __get_cpu_var(irq_stat).irq_call_count++;
450#else
451 add_pda(irq_call_count, 1);
452#endif
453 irq_exit(); 434 irq_exit();
454 435
455 return IRQ_HANDLED; 436 return IRQ_HANDLED;
@@ -459,11 +440,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
459{ 440{
460 irq_enter(); 441 irq_enter();
461 generic_smp_call_function_single_interrupt(); 442 generic_smp_call_function_single_interrupt();
462#ifdef CONFIG_X86_32 443 inc_irq_stat(irq_call_count);
463 __get_cpu_var(irq_stat).irq_call_count++;
464#else
465 add_pda(irq_call_count, 1);
466#endif
467 irq_exit(); 444 irq_exit();
468 445
469 return IRQ_HANDLED; 446 return IRQ_HANDLED;
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 05794c566e87..d6fc51f4ce85 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -17,6 +17,7 @@
17#include <asm/processor-flags.h> 17#include <asm/processor-flags.h>
18#include <asm/errno.h> 18#include <asm/errno.h>
19#include <asm/segment.h> 19#include <asm/segment.h>
20#include <asm/percpu.h>
20 21
21#include <xen/interface/xen.h> 22#include <xen/interface/xen.h>
22 23
@@ -28,12 +29,10 @@
28 29
29#if 1 30#if 1
30/* 31/*
31 x86-64 does not yet support direct access to percpu variables 32 FIXME: x86_64 now can support direct access to percpu variables
32 via a segment override, so we just need to make sure this code 33 via a segment override. Update xen accordingly.
33 never gets used
34 */ 34 */
35#define BUG ud2a 35#define BUG ud2a
36#define PER_CPU_VAR(var, off) 0xdeadbeef
37#endif 36#endif
38 37
39/* 38/*
@@ -45,14 +44,14 @@ ENTRY(xen_irq_enable_direct)
45 BUG 44 BUG
46 45
47 /* Unmask events */ 46 /* Unmask events */
48 movb $0, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) 47 movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
49 48
50 /* Preempt here doesn't matter because that will deal with 49 /* Preempt here doesn't matter because that will deal with
51 any pending interrupts. The pending check may end up being 50 any pending interrupts. The pending check may end up being
52 run on the wrong CPU, but that doesn't hurt. */ 51 run on the wrong CPU, but that doesn't hurt. */
53 52
54 /* Test for pending */ 53 /* Test for pending */
55 testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending) 54 testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
56 jz 1f 55 jz 1f
57 56
582: call check_events 572: call check_events
@@ -69,7 +68,7 @@ ENDPATCH(xen_irq_enable_direct)
69ENTRY(xen_irq_disable_direct) 68ENTRY(xen_irq_disable_direct)
70 BUG 69 BUG
71 70
72 movb $1, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) 71 movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
73ENDPATCH(xen_irq_disable_direct) 72ENDPATCH(xen_irq_disable_direct)
74 ret 73 ret
75 ENDPROC(xen_irq_disable_direct) 74 ENDPROC(xen_irq_disable_direct)
@@ -87,7 +86,7 @@ ENDPATCH(xen_irq_disable_direct)
87ENTRY(xen_save_fl_direct) 86ENTRY(xen_save_fl_direct)
88 BUG 87 BUG
89 88
90 testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) 89 testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
91 setz %ah 90 setz %ah
92 addb %ah,%ah 91 addb %ah,%ah
93ENDPATCH(xen_save_fl_direct) 92ENDPATCH(xen_save_fl_direct)
@@ -107,13 +106,13 @@ ENTRY(xen_restore_fl_direct)
107 BUG 106 BUG
108 107
109 testb $X86_EFLAGS_IF>>8, %ah 108 testb $X86_EFLAGS_IF>>8, %ah
110 setz PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) 109 setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
111 /* Preempt here doesn't matter because that will deal with 110 /* Preempt here doesn't matter because that will deal with
112 any pending interrupts. The pending check may end up being 111 any pending interrupts. The pending check may end up being
113 run on the wrong CPU, but that doesn't hurt. */ 112 run on the wrong CPU, but that doesn't hurt. */
114 113
115 /* check for unmasked and pending */ 114 /* check for unmasked and pending */
116 cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending) 115 cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
117 jz 1f 116 jz 1f
1182: call check_events 1172: call check_events
1191: 1181:
@@ -195,11 +194,11 @@ RELOC(xen_sysexit, 1b+1)
195ENTRY(xen_sysret64) 194ENTRY(xen_sysret64)
196 /* We're already on the usermode stack at this point, but still 195 /* We're already on the usermode stack at this point, but still
197 with the kernel gs, so we can easily switch back */ 196 with the kernel gs, so we can easily switch back */
198 movq %rsp, %gs:pda_oldrsp 197 movq %rsp, PER_CPU_VAR(old_rsp)
199 movq %gs:pda_kernelstack,%rsp 198 movq PER_CPU_VAR(kernel_stack),%rsp
200 199
201 pushq $__USER_DS 200 pushq $__USER_DS
202 pushq %gs:pda_oldrsp 201 pushq PER_CPU_VAR(old_rsp)
203 pushq %r11 202 pushq %r11
204 pushq $__USER_CS 203 pushq $__USER_CS
205 pushq %rcx 204 pushq %rcx
@@ -212,11 +211,11 @@ RELOC(xen_sysret64, 1b+1)
212ENTRY(xen_sysret32) 211ENTRY(xen_sysret32)
213 /* We're already on the usermode stack at this point, but still 212 /* We're already on the usermode stack at this point, but still
214 with the kernel gs, so we can easily switch back */ 213 with the kernel gs, so we can easily switch back */
215 movq %rsp, %gs:pda_oldrsp 214 movq %rsp, PER_CPU_VAR(old_rsp)
216 movq %gs:pda_kernelstack, %rsp 215 movq PER_CPU_VAR(kernel_stack), %rsp
217 216
218 pushq $__USER32_DS 217 pushq $__USER32_DS
219 pushq %gs:pda_oldrsp 218 pushq PER_CPU_VAR(old_rsp)
220 pushq %r11 219 pushq %r11
221 pushq $__USER32_CS 220 pushq $__USER32_CS
222 pushq %rcx 221 pushq %rcx
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
index f78371b22529..5a57753ea9fc 100644
--- a/drivers/pci/intr_remapping.c
+++ b/drivers/pci/intr_remapping.c
@@ -6,6 +6,7 @@
6#include <linux/irq.h> 6#include <linux/irq.h>
7#include <asm/io_apic.h> 7#include <asm/io_apic.h>
8#include <asm/smp.h> 8#include <asm/smp.h>
9#include <asm/cpu.h>
9#include <linux/intel-iommu.h> 10#include <linux/intel-iommu.h>
10#include "intr_remapping.h" 11#include "intr_remapping.h"
11 12
diff --git a/include/asm-generic/bitops/__ffs.h b/include/asm-generic/bitops/__ffs.h
index 9a3274aecf83..937d7c435575 100644
--- a/include/asm-generic/bitops/__ffs.h
+++ b/include/asm-generic/bitops/__ffs.h
@@ -9,7 +9,7 @@
9 * 9 *
10 * Undefined if no bit exists, so code should check against 0 first. 10 * Undefined if no bit exists, so code should check against 0 first.
11 */ 11 */
12static inline unsigned long __ffs(unsigned long word) 12static __always_inline unsigned long __ffs(unsigned long word)
13{ 13{
14 int num = 0; 14 int num = 0;
15 15
diff --git a/include/asm-generic/bitops/__fls.h b/include/asm-generic/bitops/__fls.h
index be24465403d6..a60a7ccb6782 100644
--- a/include/asm-generic/bitops/__fls.h
+++ b/include/asm-generic/bitops/__fls.h
@@ -9,7 +9,7 @@
9 * 9 *
10 * Undefined if no set bit exists, so code should check against 0 first. 10 * Undefined if no set bit exists, so code should check against 0 first.
11 */ 11 */
12static inline unsigned long __fls(unsigned long word) 12static __always_inline unsigned long __fls(unsigned long word)
13{ 13{
14 int num = BITS_PER_LONG - 1; 14 int num = BITS_PER_LONG - 1;
15 15
diff --git a/include/asm-generic/bitops/fls.h b/include/asm-generic/bitops/fls.h
index 850859bc5069..0576d1f42f43 100644
--- a/include/asm-generic/bitops/fls.h
+++ b/include/asm-generic/bitops/fls.h
@@ -9,7 +9,7 @@
9 * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32. 9 * Note fls(0) = 0, fls(1) = 1, fls(0x80000000) = 32.
10 */ 10 */
11 11
12static inline int fls(int x) 12static __always_inline int fls(int x)
13{ 13{
14 int r = 32; 14 int r = 32;
15 15
diff --git a/include/asm-generic/bitops/fls64.h b/include/asm-generic/bitops/fls64.h
index 86d403f8b256..b097cf8444e3 100644
--- a/include/asm-generic/bitops/fls64.h
+++ b/include/asm-generic/bitops/fls64.h
@@ -15,7 +15,7 @@
15 * at position 64. 15 * at position 64.
16 */ 16 */
17#if BITS_PER_LONG == 32 17#if BITS_PER_LONG == 32
18static inline int fls64(__u64 x) 18static __always_inline int fls64(__u64 x)
19{ 19{
20 __u32 h = x >> 32; 20 __u32 h = x >> 32;
21 if (h) 21 if (h)
@@ -23,7 +23,7 @@ static inline int fls64(__u64 x)
23 return fls(x); 23 return fls(x);
24} 24}
25#elif BITS_PER_LONG == 64 25#elif BITS_PER_LONG == 64
26static inline int fls64(__u64 x) 26static __always_inline int fls64(__u64 x)
27{ 27{
28 if (x == 0) 28 if (x == 0)
29 return 0; 29 return 0;
diff --git a/include/asm-generic/percpu.h b/include/asm-generic/percpu.h
index b0e63c672ebd..00f45ff081a6 100644
--- a/include/asm-generic/percpu.h
+++ b/include/asm-generic/percpu.h
@@ -80,4 +80,56 @@ extern void setup_per_cpu_areas(void);
80#define DECLARE_PER_CPU(type, name) extern PER_CPU_ATTRIBUTES \ 80#define DECLARE_PER_CPU(type, name) extern PER_CPU_ATTRIBUTES \
81 __typeof__(type) per_cpu_var(name) 81 __typeof__(type) per_cpu_var(name)
82 82
83/*
84 * Optional methods for optimized non-lvalue per-cpu variable access.
85 *
86 * @var can be a percpu variable or a field of it and its size should
87 * equal char, int or long. percpu_read() evaluates to a lvalue and
88 * all others to void.
89 *
90 * These operations are guaranteed to be atomic w.r.t. preemption.
91 * The generic versions use plain get/put_cpu_var(). Archs are
92 * encouraged to implement single-instruction alternatives which don't
93 * require preemption protection.
94 */
95#ifndef percpu_read
96# define percpu_read(var) \
97 ({ \
98 typeof(per_cpu_var(var)) __tmp_var__; \
99 __tmp_var__ = get_cpu_var(var); \
100 put_cpu_var(var); \
101 __tmp_var__; \
102 })
103#endif
104
105#define __percpu_generic_to_op(var, val, op) \
106do { \
107 get_cpu_var(var) op val; \
108 put_cpu_var(var); \
109} while (0)
110
111#ifndef percpu_write
112# define percpu_write(var, val) __percpu_generic_to_op(var, (val), =)
113#endif
114
115#ifndef percpu_add
116# define percpu_add(var, val) __percpu_generic_to_op(var, (val), +=)
117#endif
118
119#ifndef percpu_sub
120# define percpu_sub(var, val) __percpu_generic_to_op(var, (val), -=)
121#endif
122
123#ifndef percpu_and
124# define percpu_and(var, val) __percpu_generic_to_op(var, (val), &=)
125#endif
126
127#ifndef percpu_or
128# define percpu_or(var, val) __percpu_generic_to_op(var, (val), |=)
129#endif
130
131#ifndef percpu_xor
132# define percpu_xor(var, val) __percpu_generic_to_op(var, (val), ^=)
133#endif
134
83#endif /* _ASM_GENERIC_PERCPU_H_ */ 135#endif /* _ASM_GENERIC_PERCPU_H_ */
diff --git a/include/asm-generic/sections.h b/include/asm-generic/sections.h
index 79a7ff925bf8..4ce48e878530 100644
--- a/include/asm-generic/sections.h
+++ b/include/asm-generic/sections.h
@@ -9,7 +9,7 @@ extern char __bss_start[], __bss_stop[];
9extern char __init_begin[], __init_end[]; 9extern char __init_begin[], __init_end[];
10extern char _sinittext[], _einittext[]; 10extern char _sinittext[], _einittext[];
11extern char _end[]; 11extern char _end[];
12extern char __per_cpu_start[], __per_cpu_end[]; 12extern char __per_cpu_load[], __per_cpu_start[], __per_cpu_end[];
13extern char __kprobes_text_start[], __kprobes_text_end[]; 13extern char __kprobes_text_start[], __kprobes_text_end[];
14extern char __initdata_begin[], __initdata_end[]; 14extern char __initdata_begin[], __initdata_end[];
15extern char __start_rodata[], __end_rodata[]; 15extern char __start_rodata[], __end_rodata[];
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index c61fab1dd2f8..53e21f36a802 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -430,12 +430,47 @@
430 *(.initcall7.init) \ 430 *(.initcall7.init) \
431 *(.initcall7s.init) 431 *(.initcall7s.init)
432 432
433#define PERCPU(align) \ 433/**
434 . = ALIGN(align); \ 434 * PERCPU_VADDR - define output section for percpu area
435 VMLINUX_SYMBOL(__per_cpu_start) = .; \ 435 * @vaddr: explicit base address (optional)
436 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \ 436 * @phdr: destination PHDR (optional)
437 *
438 * Macro which expands to output section for percpu area. If @vaddr
439 * is not blank, it specifies explicit base address and all percpu
440 * symbols will be offset from the given address. If blank, @vaddr
441 * always equals @laddr + LOAD_OFFSET.
442 *
443 * @phdr defines the output PHDR to use if not blank. Be warned that
444 * output PHDR is sticky. If @phdr is specified, the next output
445 * section in the linker script will go there too. @phdr should have
446 * a leading colon.
447 *
448 * This macro defines three symbols, __per_cpu_load, __per_cpu_start
449 * and __per_cpu_end. The first one is the vaddr of loaded percpu
450 * init data. __per_cpu_start equals @vaddr and __per_cpu_end is the
451 * end offset.
452 */
453#define PERCPU_VADDR(vaddr, phdr) \
454 VMLINUX_SYMBOL(__per_cpu_load) = .; \
455 .data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \
456 - LOAD_OFFSET) { \
457 VMLINUX_SYMBOL(__per_cpu_start) = .; \
458 *(.data.percpu.first) \
437 *(.data.percpu.page_aligned) \ 459 *(.data.percpu.page_aligned) \
438 *(.data.percpu) \ 460 *(.data.percpu) \
439 *(.data.percpu.shared_aligned) \ 461 *(.data.percpu.shared_aligned) \
440 } \ 462 VMLINUX_SYMBOL(__per_cpu_end) = .; \
441 VMLINUX_SYMBOL(__per_cpu_end) = .; 463 } phdr \
464 . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu);
465
466/**
467 * PERCPU - define output section for percpu area, simple version
468 * @align: required alignment
469 *
470 * Align to @align and outputs output section for percpu area. This
471 * macro doesn't maniuplate @vaddr or @phdr and __per_cpu_load and
472 * __per_cpu_start will be identical.
473 */
474#define PERCPU(align) \
475 . = ALIGN(align); \
476 PERCPU_VADDR( , )
diff --git a/include/linux/magic.h b/include/linux/magic.h
index 439f6f3cb0c4..561a5ff92c88 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -47,4 +47,5 @@
47#define FUTEXFS_SUPER_MAGIC 0xBAD1DEA 47#define FUTEXFS_SUPER_MAGIC 0xBAD1DEA
48#define INOTIFYFS_SUPER_MAGIC 0x2BAD1DEA 48#define INOTIFYFS_SUPER_MAGIC 0x2BAD1DEA
49 49
50#define STACK_END_MAGIC 0x57AC6E9D
50#endif /* __LINUX_MAGIC_H__ */ 51#endif /* __LINUX_MAGIC_H__ */
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 9f2a3751873a..0e24202b5a4e 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -9,34 +9,39 @@
9#include <asm/percpu.h> 9#include <asm/percpu.h>
10 10
11#ifdef CONFIG_SMP 11#ifdef CONFIG_SMP
12#define DEFINE_PER_CPU(type, name) \ 12#define PER_CPU_BASE_SECTION ".data.percpu"
13 __attribute__((__section__(".data.percpu"))) \
14 PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
15 13
16#ifdef MODULE 14#ifdef MODULE
17#define SHARED_ALIGNED_SECTION ".data.percpu" 15#define PER_CPU_SHARED_ALIGNED_SECTION ""
18#else 16#else
19#define SHARED_ALIGNED_SECTION ".data.percpu.shared_aligned" 17#define PER_CPU_SHARED_ALIGNED_SECTION ".shared_aligned"
20#endif 18#endif
19#define PER_CPU_FIRST_SECTION ".first"
21 20
22#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ 21#else
23 __attribute__((__section__(SHARED_ALIGNED_SECTION))) \ 22
24 PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \ 23#define PER_CPU_BASE_SECTION ".data"
25 ____cacheline_aligned_in_smp 24#define PER_CPU_SHARED_ALIGNED_SECTION ""
25#define PER_CPU_FIRST_SECTION ""
26
27#endif
26 28
27#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ 29#define DEFINE_PER_CPU_SECTION(type, name, section) \
28 __attribute__((__section__(".data.percpu.page_aligned"))) \ 30 __attribute__((__section__(PER_CPU_BASE_SECTION section))) \
29 PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name 31 PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
30#else 32
31#define DEFINE_PER_CPU(type, name) \ 33#define DEFINE_PER_CPU(type, name) \
32 PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name 34 DEFINE_PER_CPU_SECTION(type, name, "")
33 35
34#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ 36#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
35 DEFINE_PER_CPU(type, name) 37 DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \
38 ____cacheline_aligned_in_smp
36 39
37#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ 40#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \
38 DEFINE_PER_CPU(type, name) 41 DEFINE_PER_CPU_SECTION(type, name, ".page_aligned")
39#endif 42
43#define DEFINE_PER_CPU_FIRST(type, name) \
44 DEFINE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION)
40 45
41#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) 46#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
42#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) 47#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4cae9b81a1f8..a85b0cec7d12 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1157,10 +1157,9 @@ struct task_struct {
1157 pid_t pid; 1157 pid_t pid;
1158 pid_t tgid; 1158 pid_t tgid;
1159 1159
1160#ifdef CONFIG_CC_STACKPROTECTOR
1161 /* Canary value for the -fstack-protector gcc feature */ 1160 /* Canary value for the -fstack-protector gcc feature */
1162 unsigned long stack_canary; 1161 unsigned long stack_canary;
1163#endif 1162
1164 /* 1163 /*
1165 * pointers to (original) parent process, youngest child, younger sibling, 1164 * pointers to (original) parent process, youngest child, younger sibling,
1166 * older sibling, respectively. (p->father can be replaced with 1165 * older sibling, respectively. (p->father can be replaced with
@@ -2066,6 +2065,19 @@ static inline int object_is_on_stack(void *obj)
2066 2065
2067extern void thread_info_cache_init(void); 2066extern void thread_info_cache_init(void);
2068 2067
2068#ifdef CONFIG_DEBUG_STACK_USAGE
2069static inline unsigned long stack_not_used(struct task_struct *p)
2070{
2071 unsigned long *n = end_of_stack(p);
2072
2073 do { /* Skip over canary */
2074 n++;
2075 } while (!*n);
2076
2077 return (unsigned long)n - (unsigned long)end_of_stack(p);
2078}
2079#endif
2080
2069/* set thread flags in other task's structures 2081/* set thread flags in other task's structures
2070 * - see asm/thread_info.h for TIF_xxxx flags available 2082 * - see asm/thread_info.h for TIF_xxxx flags available
2071 */ 2083 */
diff --git a/include/linux/stackprotector.h b/include/linux/stackprotector.h
new file mode 100644
index 000000000000..6f3e54c704c0
--- /dev/null
+++ b/include/linux/stackprotector.h
@@ -0,0 +1,16 @@
1#ifndef _LINUX_STACKPROTECTOR_H
2#define _LINUX_STACKPROTECTOR_H 1
3
4#include <linux/compiler.h>
5#include <linux/sched.h>
6#include <linux/random.h>
7
8#ifdef CONFIG_CC_STACKPROTECTOR
9# include <asm/stackprotector.h>
10#else
11static inline void boot_init_stack_canary(void)
12{
13}
14#endif
15
16#endif
diff --git a/init/main.c b/init/main.c
index 844209453c02..bfe4fb0c9842 100644
--- a/init/main.c
+++ b/init/main.c
@@ -14,6 +14,7 @@
14#include <linux/proc_fs.h> 14#include <linux/proc_fs.h>
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/syscalls.h> 16#include <linux/syscalls.h>
17#include <linux/stackprotector.h>
17#include <linux/string.h> 18#include <linux/string.h>
18#include <linux/ctype.h> 19#include <linux/ctype.h>
19#include <linux/delay.h> 20#include <linux/delay.h>
@@ -539,6 +540,12 @@ asmlinkage void __init start_kernel(void)
539 */ 540 */
540 lockdep_init(); 541 lockdep_init();
541 debug_objects_early_init(); 542 debug_objects_early_init();
543
544 /*
545 * Set up the the initial canary ASAP:
546 */
547 boot_init_stack_canary();
548
542 cgroup_init_early(); 549 cgroup_init_early();
543 550
544 local_irq_disable(); 551 local_irq_disable();
diff --git a/kernel/exit.c b/kernel/exit.c
index c7740fa3252c..2a803c28df9e 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -977,12 +977,9 @@ static void check_stack_usage(void)
977{ 977{
978 static DEFINE_SPINLOCK(low_water_lock); 978 static DEFINE_SPINLOCK(low_water_lock);
979 static int lowest_to_date = THREAD_SIZE; 979 static int lowest_to_date = THREAD_SIZE;
980 unsigned long *n = end_of_stack(current);
981 unsigned long free; 980 unsigned long free;
982 981
983 while (*n == 0) 982 free = stack_not_used(current);
984 n++;
985 free = (unsigned long)n - (unsigned long)end_of_stack(current);
986 983
987 if (free >= lowest_to_date) 984 if (free >= lowest_to_date)
988 return; 985 return;
diff --git a/kernel/fork.c b/kernel/fork.c
index 1d68f1255dd8..4a9b318dad0d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -61,6 +61,7 @@
61#include <linux/proc_fs.h> 61#include <linux/proc_fs.h>
62#include <linux/blkdev.h> 62#include <linux/blkdev.h>
63#include <trace/sched.h> 63#include <trace/sched.h>
64#include <linux/magic.h>
64 65
65#include <asm/pgtable.h> 66#include <asm/pgtable.h>
66#include <asm/pgalloc.h> 67#include <asm/pgalloc.h>
@@ -212,6 +213,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
212{ 213{
213 struct task_struct *tsk; 214 struct task_struct *tsk;
214 struct thread_info *ti; 215 struct thread_info *ti;
216 unsigned long *stackend;
217
215 int err; 218 int err;
216 219
217 prepare_to_copy(orig); 220 prepare_to_copy(orig);
@@ -237,6 +240,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
237 goto out; 240 goto out;
238 241
239 setup_thread_stack(tsk, orig); 242 setup_thread_stack(tsk, orig);
243 stackend = end_of_stack(tsk);
244 *stackend = STACK_END_MAGIC; /* for overflow detection */
240 245
241#ifdef CONFIG_CC_STACKPROTECTOR 246#ifdef CONFIG_CC_STACKPROTECTOR
242 tsk->stack_canary = get_random_int(); 247 tsk->stack_canary = get_random_int();
diff --git a/kernel/panic.c b/kernel/panic.c
index 2a2ff36ff44d..33cab3de1763 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -74,6 +74,9 @@ NORET_TYPE void panic(const char * fmt, ...)
74 vsnprintf(buf, sizeof(buf), fmt, args); 74 vsnprintf(buf, sizeof(buf), fmt, args);
75 va_end(args); 75 va_end(args);
76 printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf); 76 printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf);
77#ifdef CONFIG_DEBUG_BUGVERBOSE
78 dump_stack();
79#endif
77 bust_spinlocks(0); 80 bust_spinlocks(0);
78 81
79 /* 82 /*
@@ -355,15 +358,22 @@ EXPORT_SYMBOL(warn_slowpath);
355#endif 358#endif
356 359
357#ifdef CONFIG_CC_STACKPROTECTOR 360#ifdef CONFIG_CC_STACKPROTECTOR
361
362#ifndef GCC_HAS_SP
363#warning You have selected the CONFIG_CC_STACKPROTECTOR option, but the gcc used does not support this.
364#endif
365
358/* 366/*
359 * Called when gcc's -fstack-protector feature is used, and 367 * Called when gcc's -fstack-protector feature is used, and
360 * gcc detects corruption of the on-stack canary value 368 * gcc detects corruption of the on-stack canary value
361 */ 369 */
362void __stack_chk_fail(void) 370void __stack_chk_fail(void)
363{ 371{
364 panic("stack-protector: Kernel stack is corrupted"); 372 panic("stack-protector: Kernel stack is corrupted in: %p\n",
373 __builtin_return_address(0));
365} 374}
366EXPORT_SYMBOL(__stack_chk_fail); 375EXPORT_SYMBOL(__stack_chk_fail);
376
367#endif 377#endif
368 378
369core_param(panic, panic_timeout, int, 0644); 379core_param(panic, panic_timeout, int, 0644);
diff --git a/kernel/sched.c b/kernel/sched.c
index 8be2c13b50d0..1d2909067040 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5939,12 +5939,7 @@ void sched_show_task(struct task_struct *p)
5939 printk(KERN_CONT " %016lx ", thread_saved_pc(p)); 5939 printk(KERN_CONT " %016lx ", thread_saved_pc(p));
5940#endif 5940#endif
5941#ifdef CONFIG_DEBUG_STACK_USAGE 5941#ifdef CONFIG_DEBUG_STACK_USAGE
5942 { 5942 free = stack_not_used(p);
5943 unsigned long *n = end_of_stack(p);
5944 while (!*n)
5945 n++;
5946 free = (unsigned long)n - (unsigned long)end_of_stack(p);
5947 }
5948#endif 5943#endif
5949 printk(KERN_CONT "%5lu %5d %6d\n", free, 5944 printk(KERN_CONT "%5lu %5d %6d\n", free,
5950 task_pid_nr(p), task_pid_nr(p->real_parent)); 5945 task_pid_nr(p), task_pid_nr(p->real_parent));