aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/ia64/include/asm/topology.h2
-rw-r--r--arch/x86/Kconfig30
-rw-r--r--arch/x86/Kconfig.cpu10
-rw-r--r--arch/x86/Kconfig.debug1
-rw-r--r--arch/x86/Makefile2
-rw-r--r--arch/x86/include/asm/genapic_32.h7
-rw-r--r--arch/x86/include/asm/genapic_64.h6
-rw-r--r--arch/x86/include/asm/hardirq.h50
-rw-r--r--arch/x86/include/asm/hardirq_32.h34
-rw-r--r--arch/x86/include/asm/hardirq_64.h40
-rw-r--r--arch/x86/include/asm/irq_regs.h36
-rw-r--r--arch/x86/include/asm/irq_regs_32.h31
-rw-r--r--arch/x86/include/asm/irq_regs_64.h1
-rw-r--r--arch/x86/include/asm/irq_vectors.h36
-rw-r--r--arch/x86/include/asm/mach-default/entry_arch.h18
-rw-r--r--arch/x86/include/asm/mmu_context.h63
-rw-r--r--arch/x86/include/asm/mmu_context_32.h55
-rw-r--r--arch/x86/include/asm/mmu_context_64.h52
-rw-r--r--arch/x86/include/asm/pda.h45
-rw-r--r--arch/x86/include/asm/percpu.h8
-rw-r--r--arch/x86/include/asm/pgtable_64.h1
-rw-r--r--arch/x86/include/asm/processor.h23
-rw-r--r--arch/x86/include/asm/smp.h1
-rw-r--r--arch/x86/include/asm/stackprotector.h38
-rw-r--r--arch/x86/include/asm/system.h19
-rw-r--r--arch/x86/include/asm/topology.h17
-rw-r--r--arch/x86/include/asm/uv/uv.h33
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h2
-rw-r--r--arch/x86/kernel/Makefile8
-rw-r--r--arch/x86/kernel/apic.c7
-rw-r--r--arch/x86/kernel/asm-offsets_64.c5
-rw-r--r--arch/x86/kernel/cpu/common.c44
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c33
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c1
-rw-r--r--arch/x86/kernel/efi.c2
-rw-r--r--arch/x86/kernel/efi_64.c1
-rw-r--r--arch/x86/kernel/entry_32.S6
-rw-r--r--arch/x86/kernel/entry_64.S2
-rw-r--r--arch/x86/kernel/genapic_64.c2
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c1
-rw-r--r--arch/x86/kernel/head64.c2
-rw-r--r--arch/x86/kernel/head_32.S6
-rw-r--r--arch/x86/kernel/head_64.S37
-rw-r--r--arch/x86/kernel/io_apic.c2
-rw-r--r--arch/x86/kernel/irq_64.c4
-rw-r--r--arch/x86/kernel/irqinit_32.c11
-rw-r--r--arch/x86/kernel/process_32.c1
-rw-r--r--arch/x86/kernel/process_64.c21
-rw-r--r--arch/x86/kernel/setup_percpu.c34
-rw-r--r--arch/x86/kernel/smpboot.c1
-rw-r--r--arch/x86/kernel/tlb_32.c239
-rw-r--r--arch/x86/kernel/tlb_uv.c68
-rw-r--r--arch/x86/kernel/traps.c1
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S8
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/fault.c445
-rw-r--r--arch/x86/mm/srat_64.c1
-rw-r--r--arch/x86/mm/tlb.c (renamed from arch/x86/kernel/tlb_64.c)64
-rw-r--r--arch/x86/xen/enlighten.c1
-rw-r--r--drivers/misc/Kconfig4
-rw-r--r--drivers/misc/sgi-gru/gru.h2
-rw-r--r--drivers/misc/sgi-xp/xp.h2
-rw-r--r--include/asm-generic/vmlinux.lds.h46
-rw-r--r--include/linux/magic.h1
-rw-r--r--include/linux/percpu.h41
-rw-r--r--include/linux/sched.h16
-rw-r--r--include/linux/stackprotector.h16
-rw-r--r--init/main.c7
-rw-r--r--kernel/exit.c5
-rw-r--r--kernel/fork.c5
-rw-r--r--kernel/panic.c12
-rw-r--r--kernel/sched.c7
-rw-r--r--kernel/workqueue.c20
73 files changed, 874 insertions, 1031 deletions
diff --git a/arch/ia64/include/asm/topology.h b/arch/ia64/include/asm/topology.h
index 32f3af1641c5..3193f4417e16 100644
--- a/arch/ia64/include/asm/topology.h
+++ b/arch/ia64/include/asm/topology.h
@@ -84,7 +84,7 @@ void build_cpu_to_node_map(void);
84 .child = NULL, \ 84 .child = NULL, \
85 .groups = NULL, \ 85 .groups = NULL, \
86 .min_interval = 8, \ 86 .min_interval = 8, \
87 .max_interval = 8*(min(num_online_cpus(), 32)), \ 87 .max_interval = 8*(min(num_online_cpus(), 32U)), \
88 .busy_factor = 64, \ 88 .busy_factor = 64, \
89 .imbalance_pct = 125, \ 89 .imbalance_pct = 125, \
90 .cache_nice_tries = 2, \ 90 .cache_nice_tries = 2, \
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1f4844505765..2a75931567c4 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -391,6 +391,13 @@ config X86_RDC321X
391 as R-8610-(G). 391 as R-8610-(G).
392 If you don't have one of these chips, you should say N here. 392 If you don't have one of these chips, you should say N here.
393 393
394config X86_UV
395 bool "SGI Ultraviolet"
396 depends on X86_64
397 help
398 This option is needed in order to support SGI Ultraviolet systems.
399 If you don't have one of these, you should say N here.
400
394config SCHED_OMIT_FRAME_POINTER 401config SCHED_OMIT_FRAME_POINTER
395 def_bool y 402 def_bool y
396 prompt "Single-depth WCHAN output" 403 prompt "Single-depth WCHAN output"
@@ -1341,13 +1348,17 @@ config SECCOMP
1341 1348
1342 If unsure, say Y. Only embedded should say N here. 1349 If unsure, say Y. Only embedded should say N here.
1343 1350
1351config CC_STACKPROTECTOR_ALL
1352 bool
1353
1344config CC_STACKPROTECTOR 1354config CC_STACKPROTECTOR
1345 bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" 1355 bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
1346 depends on X86_64 && EXPERIMENTAL && BROKEN 1356 depends on X86_64
1357 select CC_STACKPROTECTOR_ALL
1347 help 1358 help
1348 This option turns on the -fstack-protector GCC feature. This 1359 This option turns on the -fstack-protector GCC feature. This
1349 feature puts, at the beginning of critical functions, a canary 1360 feature puts, at the beginning of functions, a canary value on
1350 value on the stack just before the return address, and validates 1361 the stack just before the return address, and validates
1351 the value just before actually returning. Stack based buffer 1362 the value just before actually returning. Stack based buffer
1352 overflows (that need to overwrite this return address) now also 1363 overflows (that need to overwrite this return address) now also
1353 overwrite the canary, which gets detected and the attack is then 1364 overwrite the canary, which gets detected and the attack is then
@@ -1355,15 +1366,8 @@ config CC_STACKPROTECTOR
1355 1366
1356 This feature requires gcc version 4.2 or above, or a distribution 1367 This feature requires gcc version 4.2 or above, or a distribution
1357 gcc with the feature backported. Older versions are automatically 1368 gcc with the feature backported. Older versions are automatically
1358 detected and for those versions, this configuration option is ignored. 1369 detected and for those versions, this configuration option is
1359 1370 ignored. (and a warning is printed during bootup)
1360config CC_STACKPROTECTOR_ALL
1361 bool "Use stack-protector for all functions"
1362 depends on CC_STACKPROTECTOR
1363 help
1364 Normally, GCC only inserts the canary value protection for
1365 functions that use large-ish on-stack buffers. By enabling
1366 this option, GCC will be asked to do this for ALL functions.
1367 1371
1368source kernel/Kconfig.hz 1372source kernel/Kconfig.hz
1369 1373
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 8078955845ae..8eb50ba9161e 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -292,25 +292,23 @@ config X86_CPU
292# Define implied options from the CPU selection here 292# Define implied options from the CPU selection here
293config X86_L1_CACHE_BYTES 293config X86_L1_CACHE_BYTES
294 int 294 int
295 default "128" if GENERIC_CPU || MPSC 295 default "128" if MPSC
296 default "64" if MK8 || MCORE2 296 default "64" if GENERIC_CPU || MK8 || MCORE2 || X86_32
297 depends on X86_64
298 297
299config X86_INTERNODE_CACHE_BYTES 298config X86_INTERNODE_CACHE_BYTES
300 int 299 int
301 default "4096" if X86_VSMP 300 default "4096" if X86_VSMP
302 default X86_L1_CACHE_BYTES if !X86_VSMP 301 default X86_L1_CACHE_BYTES if !X86_VSMP
303 depends on X86_64
304 302
305config X86_CMPXCHG 303config X86_CMPXCHG
306 def_bool X86_64 || (X86_32 && !M386) 304 def_bool X86_64 || (X86_32 && !M386)
307 305
308config X86_L1_CACHE_SHIFT 306config X86_L1_CACHE_SHIFT
309 int 307 int
310 default "7" if MPENTIUM4 || X86_GENERIC || GENERIC_CPU || MPSC 308 default "7" if MPENTIUM4 || MPSC
311 default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 309 default "4" if X86_ELAN || M486 || M386 || MGEODEGX1
312 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX 310 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
313 default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 311 default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 || X86_GENERIC || GENERIC_CPU
314 312
315config X86_XADD 313config X86_XADD
316 def_bool y 314 def_bool y
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 10d6cc3fd052..28f111461ca8 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -117,6 +117,7 @@ config DEBUG_RODATA
117config DEBUG_RODATA_TEST 117config DEBUG_RODATA_TEST
118 bool "Testcase for the DEBUG_RODATA feature" 118 bool "Testcase for the DEBUG_RODATA feature"
119 depends on DEBUG_RODATA 119 depends on DEBUG_RODATA
120 default y
120 help 121 help
121 This option enables a testcase for the DEBUG_RODATA 122 This option enables a testcase for the DEBUG_RODATA
122 feature as well as for the change_page_attr() infrastructure. 123 feature as well as for the change_page_attr() infrastructure.
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index d1a47adb5aec..cacee981d166 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -73,7 +73,7 @@ else
73 73
74 stackp := $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh 74 stackp := $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh
75 stackp-$(CONFIG_CC_STACKPROTECTOR) := $(shell $(stackp) \ 75 stackp-$(CONFIG_CC_STACKPROTECTOR) := $(shell $(stackp) \
76 "$(CC)" -fstack-protector ) 76 "$(CC)" "-fstack-protector -DGCC_HAS_SP" )
77 stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(stackp) \ 77 stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(stackp) \
78 "$(CC)" -fstack-protector-all ) 78 "$(CC)" -fstack-protector-all )
79 79
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index 2c05b737ee22..4334502d3664 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -138,11 +138,4 @@ struct genapic {
138extern struct genapic *genapic; 138extern struct genapic *genapic;
139extern void es7000_update_genapic_to_cluster(void); 139extern void es7000_update_genapic_to_cluster(void);
140 140
141enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
142#define get_uv_system_type() UV_NONE
143#define is_uv_system() 0
144#define uv_wakeup_secondary(a, b) 1
145#define uv_system_init() do {} while (0)
146
147
148#endif /* _ASM_X86_GENAPIC_32_H */ 141#endif /* _ASM_X86_GENAPIC_32_H */
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h
index adf32fb56aa6..7bb092c59055 100644
--- a/arch/x86/include/asm/genapic_64.h
+++ b/arch/x86/include/asm/genapic_64.h
@@ -51,15 +51,9 @@ extern struct genapic apic_x2apic_phys;
51extern int acpi_madt_oem_check(char *, char *); 51extern int acpi_madt_oem_check(char *, char *);
52 52
53extern void apic_send_IPI_self(int vector); 53extern void apic_send_IPI_self(int vector);
54enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
55extern enum uv_system_type get_uv_system_type(void);
56extern int is_uv_system(void);
57 54
58extern struct genapic apic_x2apic_uv_x; 55extern struct genapic apic_x2apic_uv_x;
59DECLARE_PER_CPU(int, x2apic_extra_bits); 56DECLARE_PER_CPU(int, x2apic_extra_bits);
60extern void uv_cpu_init(void);
61extern void uv_system_init(void);
62extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
63 57
64extern void setup_apic_routing(void); 58extern void setup_apic_routing(void);
65 59
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 000787df66e6..46ebed797e4f 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -1,11 +1,53 @@
1#ifdef CONFIG_X86_32 1#ifndef _ASM_X86_HARDIRQ_H
2# include "hardirq_32.h" 2#define _ASM_X86_HARDIRQ_H
3#else 3
4# include "hardirq_64.h" 4#include <linux/threads.h>
5#include <linux/irq.h>
6
7typedef struct {
8 unsigned int __softirq_pending;
9 unsigned int __nmi_count; /* arch dependent */
10 unsigned int irq0_irqs;
11#ifdef CONFIG_X86_LOCAL_APIC
12 unsigned int apic_timer_irqs; /* arch dependent */
13 unsigned int irq_spurious_count;
14#endif
15 unsigned int apic_perf_irqs;
16#ifdef CONFIG_SMP
17 unsigned int irq_resched_count;
18 unsigned int irq_call_count;
19 unsigned int irq_tlb_count;
20#endif
21#ifdef CONFIG_X86_MCE
22 unsigned int irq_thermal_count;
23# ifdef CONFIG_X86_64
24 unsigned int irq_threshold_count;
25# endif
5#endif 26#endif
27} ____cacheline_aligned irq_cpustat_t;
28
29DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
30
31/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
32#define MAX_HARDIRQS_PER_CPU NR_VECTORS
33
34#define __ARCH_IRQ_STAT
35
36#define inc_irq_stat(member) percpu_add(irq_stat.member, 1)
37
38#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending)
39
40#define __ARCH_SET_SOFTIRQ_PENDING
41
42#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x))
43#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x))
44
45extern void ack_bad_irq(unsigned int irq);
6 46
7extern u64 arch_irq_stat_cpu(unsigned int cpu); 47extern u64 arch_irq_stat_cpu(unsigned int cpu);
8#define arch_irq_stat_cpu arch_irq_stat_cpu 48#define arch_irq_stat_cpu arch_irq_stat_cpu
9 49
10extern u64 arch_irq_stat(void); 50extern u64 arch_irq_stat(void);
11#define arch_irq_stat arch_irq_stat 51#define arch_irq_stat arch_irq_stat
52
53#endif /* _ASM_X86_HARDIRQ_H */
diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h
deleted file mode 100644
index 7838276bfe51..000000000000
--- a/arch/x86/include/asm/hardirq_32.h
+++ /dev/null
@@ -1,34 +0,0 @@
1#ifndef _ASM_X86_HARDIRQ_32_H
2#define _ASM_X86_HARDIRQ_32_H
3
4#include <linux/threads.h>
5#include <linux/irq.h>
6
7typedef struct {
8 unsigned int __softirq_pending;
9 unsigned long idle_timestamp;
10 unsigned int __nmi_count; /* arch dependent */
11 unsigned int apic_timer_irqs; /* arch dependent */
12 unsigned int apic_perf_irqs; /* arch dependent */
13 unsigned int irq0_irqs;
14 unsigned int irq_resched_count;
15 unsigned int irq_call_count;
16 unsigned int irq_tlb_count;
17 unsigned int irq_thermal_count;
18 unsigned int irq_spurious_count;
19} ____cacheline_aligned irq_cpustat_t;
20
21DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
22
23/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
24#define MAX_HARDIRQS_PER_CPU NR_VECTORS
25
26#define __ARCH_IRQ_STAT
27#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member)
28
29#define inc_irq_stat(member) (__get_cpu_var(irq_stat).member++)
30
31void ack_bad_irq(unsigned int irq);
32#include <linux/irq_cpustat.h>
33
34#endif /* _ASM_X86_HARDIRQ_32_H */
diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h
deleted file mode 100644
index 42930b279215..000000000000
--- a/arch/x86/include/asm/hardirq_64.h
+++ /dev/null
@@ -1,40 +0,0 @@
1#ifndef _ASM_X86_HARDIRQ_64_H
2#define _ASM_X86_HARDIRQ_64_H
3
4#include <linux/threads.h>
5#include <linux/irq.h>
6#include <asm/apic.h>
7
8typedef struct {
9 unsigned int __softirq_pending;
10 unsigned int __nmi_count; /* arch dependent */
11 unsigned int apic_timer_irqs; /* arch dependent */
12 unsigned int apic_perf_irqs; /* arch dependent */
13 unsigned int irq0_irqs;
14 unsigned int irq_resched_count;
15 unsigned int irq_call_count;
16 unsigned int irq_tlb_count;
17 unsigned int irq_thermal_count;
18 unsigned int irq_spurious_count;
19 unsigned int irq_threshold_count;
20} ____cacheline_aligned irq_cpustat_t;
21
22DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
23
24/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
25#define MAX_HARDIRQS_PER_CPU NR_VECTORS
26
27#define __ARCH_IRQ_STAT 1
28
29#define inc_irq_stat(member) percpu_add(irq_stat.member, 1)
30
31#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending)
32
33#define __ARCH_SET_SOFTIRQ_PENDING 1
34
35#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x))
36#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x))
37
38extern void ack_bad_irq(unsigned int irq);
39
40#endif /* _ASM_X86_HARDIRQ_64_H */
diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h
index 89c898ab298b..77843225b7ea 100644
--- a/arch/x86/include/asm/irq_regs.h
+++ b/arch/x86/include/asm/irq_regs.h
@@ -1,5 +1,31 @@
1#ifdef CONFIG_X86_32 1/*
2# include "irq_regs_32.h" 2 * Per-cpu current frame pointer - the location of the last exception frame on
3#else 3 * the stack, stored in the per-cpu area.
4# include "irq_regs_64.h" 4 *
5#endif 5 * Jeremy Fitzhardinge <jeremy@goop.org>
6 */
7#ifndef _ASM_X86_IRQ_REGS_H
8#define _ASM_X86_IRQ_REGS_H
9
10#include <asm/percpu.h>
11
12#define ARCH_HAS_OWN_IRQ_REGS
13
14DECLARE_PER_CPU(struct pt_regs *, irq_regs);
15
16static inline struct pt_regs *get_irq_regs(void)
17{
18 return percpu_read(irq_regs);
19}
20
21static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
22{
23 struct pt_regs *old_regs;
24
25 old_regs = get_irq_regs();
26 percpu_write(irq_regs, new_regs);
27
28 return old_regs;
29}
30
31#endif /* _ASM_X86_IRQ_REGS_32_H */
diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h
deleted file mode 100644
index d7ed33ee94e9..000000000000
--- a/arch/x86/include/asm/irq_regs_32.h
+++ /dev/null
@@ -1,31 +0,0 @@
1/*
2 * Per-cpu current frame pointer - the location of the last exception frame on
3 * the stack, stored in the per-cpu area.
4 *
5 * Jeremy Fitzhardinge <jeremy@goop.org>
6 */
7#ifndef _ASM_X86_IRQ_REGS_32_H
8#define _ASM_X86_IRQ_REGS_32_H
9
10#include <asm/percpu.h>
11
12#define ARCH_HAS_OWN_IRQ_REGS
13
14DECLARE_PER_CPU(struct pt_regs *, irq_regs);
15
16static inline struct pt_regs *get_irq_regs(void)
17{
18 return percpu_read(irq_regs);
19}
20
21static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
22{
23 struct pt_regs *old_regs;
24
25 old_regs = get_irq_regs();
26 percpu_write(irq_regs, new_regs);
27
28 return old_regs;
29}
30
31#endif /* _ASM_X86_IRQ_REGS_32_H */
diff --git a/arch/x86/include/asm/irq_regs_64.h b/arch/x86/include/asm/irq_regs_64.h
deleted file mode 100644
index 3dd9c0b70270..000000000000
--- a/arch/x86/include/asm/irq_regs_64.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/irq_regs.h>
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 1554d0236e03..0e2220bb3142 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -49,31 +49,33 @@
49 * some of the following vectors are 'rare', they are merged 49 * some of the following vectors are 'rare', they are merged
50 * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. 50 * into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
51 * TLB, reschedule and local APIC vectors are performance-critical. 51 * TLB, reschedule and local APIC vectors are performance-critical.
52 *
53 * Vectors 0xf0-0xfa are free (reserved for future Linux use).
54 */ 52 */
55#ifdef CONFIG_X86_32 53#ifdef CONFIG_X86_32
56 54
57# define SPURIOUS_APIC_VECTOR 0xff 55# define SPURIOUS_APIC_VECTOR 0xff
58# define ERROR_APIC_VECTOR 0xfe 56# define ERROR_APIC_VECTOR 0xfe
59# define INVALIDATE_TLB_VECTOR 0xfd 57# define RESCHEDULE_VECTOR 0xfd
60# define RESCHEDULE_VECTOR 0xfc 58# define CALL_FUNCTION_VECTOR 0xfc
61# define CALL_FUNCTION_VECTOR 0xfb 59# define CALL_FUNCTION_SINGLE_VECTOR 0xfb
62# define CALL_FUNCTION_SINGLE_VECTOR 0xfa 60# define THERMAL_APIC_VECTOR 0xfa
63# define THERMAL_APIC_VECTOR 0xf0 61/* 0xf8 - 0xf9 : free */
62# define INVALIDATE_TLB_VECTOR_END 0xf7
63# define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */
64
65# define NUM_INVALIDATE_TLB_VECTORS 8
64 66
65#else 67#else
66 68
67#define SPURIOUS_APIC_VECTOR 0xff 69# define SPURIOUS_APIC_VECTOR 0xff
68#define ERROR_APIC_VECTOR 0xfe 70# define ERROR_APIC_VECTOR 0xfe
69#define RESCHEDULE_VECTOR 0xfd 71# define RESCHEDULE_VECTOR 0xfd
70#define CALL_FUNCTION_VECTOR 0xfc 72# define CALL_FUNCTION_VECTOR 0xfc
71#define CALL_FUNCTION_SINGLE_VECTOR 0xfb 73# define CALL_FUNCTION_SINGLE_VECTOR 0xfb
72#define THERMAL_APIC_VECTOR 0xfa 74# define THERMAL_APIC_VECTOR 0xfa
73#define THRESHOLD_APIC_VECTOR 0xf9 75# define THRESHOLD_APIC_VECTOR 0xf9
74#define UV_BAU_MESSAGE 0xf8 76# define UV_BAU_MESSAGE 0xf8
75#define INVALIDATE_TLB_VECTOR_END 0xf7 77# define INVALIDATE_TLB_VECTOR_END 0xf7
76#define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ 78# define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */
77 79
78#define NUM_INVALIDATE_TLB_VECTORS 8 80#define NUM_INVALIDATE_TLB_VECTORS 8
79 81
diff --git a/arch/x86/include/asm/mach-default/entry_arch.h b/arch/x86/include/asm/mach-default/entry_arch.h
index ad31e5d90e90..b87b077cc231 100644
--- a/arch/x86/include/asm/mach-default/entry_arch.h
+++ b/arch/x86/include/asm/mach-default/entry_arch.h
@@ -11,10 +11,26 @@
11 */ 11 */
12#ifdef CONFIG_X86_SMP 12#ifdef CONFIG_X86_SMP
13BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) 13BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
14BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
15BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) 14BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
16BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) 15BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
17BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) 16BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
17
18BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0,
19 smp_invalidate_interrupt)
20BUILD_INTERRUPT3(invalidate_interrupt1,INVALIDATE_TLB_VECTOR_START+1,
21 smp_invalidate_interrupt)
22BUILD_INTERRUPT3(invalidate_interrupt2,INVALIDATE_TLB_VECTOR_START+2,
23 smp_invalidate_interrupt)
24BUILD_INTERRUPT3(invalidate_interrupt3,INVALIDATE_TLB_VECTOR_START+3,
25 smp_invalidate_interrupt)
26BUILD_INTERRUPT3(invalidate_interrupt4,INVALIDATE_TLB_VECTOR_START+4,
27 smp_invalidate_interrupt)
28BUILD_INTERRUPT3(invalidate_interrupt5,INVALIDATE_TLB_VECTOR_START+5,
29 smp_invalidate_interrupt)
30BUILD_INTERRUPT3(invalidate_interrupt6,INVALIDATE_TLB_VECTOR_START+6,
31 smp_invalidate_interrupt)
32BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7,
33 smp_invalidate_interrupt)
18#endif 34#endif
19 35
20/* 36/*
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 8aeeb3fd73db..52948df9cd1d 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -21,11 +21,54 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
21int init_new_context(struct task_struct *tsk, struct mm_struct *mm); 21int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
22void destroy_context(struct mm_struct *mm); 22void destroy_context(struct mm_struct *mm);
23 23
24#ifdef CONFIG_X86_32 24
25# include "mmu_context_32.h" 25static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
26#else 26{
27# include "mmu_context_64.h" 27#ifdef CONFIG_SMP
28 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
29 percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
30#endif
31}
32
33static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
34 struct task_struct *tsk)
35{
36 unsigned cpu = smp_processor_id();
37
38 if (likely(prev != next)) {
39 /* stop flush ipis for the previous mm */
40 cpu_clear(cpu, prev->cpu_vm_mask);
41#ifdef CONFIG_SMP
42 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
43 percpu_write(cpu_tlbstate.active_mm, next);
28#endif 44#endif
45 cpu_set(cpu, next->cpu_vm_mask);
46
47 /* Re-load page tables */
48 load_cr3(next->pgd);
49
50 /*
51 * load the LDT, if the LDT is different:
52 */
53 if (unlikely(prev->context.ldt != next->context.ldt))
54 load_LDT_nolock(&next->context);
55 }
56#ifdef CONFIG_SMP
57 else {
58 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
59 BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
60
61 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
62 /* We were in lazy tlb mode and leave_mm disabled
63 * tlb flush IPI delivery. We must reload CR3
64 * to make sure to use no freed page tables.
65 */
66 load_cr3(next->pgd);
67 load_LDT_nolock(&next->context);
68 }
69 }
70#endif
71}
29 72
30#define activate_mm(prev, next) \ 73#define activate_mm(prev, next) \
31do { \ 74do { \
@@ -33,5 +76,17 @@ do { \
33 switch_mm((prev), (next), NULL); \ 76 switch_mm((prev), (next), NULL); \
34} while (0); 77} while (0);
35 78
79#ifdef CONFIG_X86_32
80#define deactivate_mm(tsk, mm) \
81do { \
82 loadsegment(gs, 0); \
83} while (0)
84#else
85#define deactivate_mm(tsk, mm) \
86do { \
87 load_gs_index(0); \
88 loadsegment(fs, 0); \
89} while (0)
90#endif
36 91
37#endif /* _ASM_X86_MMU_CONTEXT_H */ 92#endif /* _ASM_X86_MMU_CONTEXT_H */
diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h
deleted file mode 100644
index 08b53454f831..000000000000
--- a/arch/x86/include/asm/mmu_context_32.h
+++ /dev/null
@@ -1,55 +0,0 @@
1#ifndef _ASM_X86_MMU_CONTEXT_32_H
2#define _ASM_X86_MMU_CONTEXT_32_H
3
4static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
5{
6#ifdef CONFIG_SMP
7 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
8 percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
9#endif
10}
11
12static inline void switch_mm(struct mm_struct *prev,
13 struct mm_struct *next,
14 struct task_struct *tsk)
15{
16 int cpu = smp_processor_id();
17
18 if (likely(prev != next)) {
19 /* stop flush ipis for the previous mm */
20 cpu_clear(cpu, prev->cpu_vm_mask);
21#ifdef CONFIG_SMP
22 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
23 percpu_write(cpu_tlbstate.active_mm, next);
24#endif
25 cpu_set(cpu, next->cpu_vm_mask);
26
27 /* Re-load page tables */
28 load_cr3(next->pgd);
29
30 /*
31 * load the LDT, if the LDT is different:
32 */
33 if (unlikely(prev->context.ldt != next->context.ldt))
34 load_LDT_nolock(&next->context);
35 }
36#ifdef CONFIG_SMP
37 else {
38 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
39 BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
40
41 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
42 /* We were in lazy tlb mode and leave_mm disabled
43 * tlb flush IPI delivery. We must reload %cr3.
44 */
45 load_cr3(next->pgd);
46 load_LDT_nolock(&next->context);
47 }
48 }
49#endif
50}
51
52#define deactivate_mm(tsk, mm) \
53 asm("movl %0,%%gs": :"r" (0));
54
55#endif /* _ASM_X86_MMU_CONTEXT_32_H */
diff --git a/arch/x86/include/asm/mmu_context_64.h b/arch/x86/include/asm/mmu_context_64.h
deleted file mode 100644
index c4572505ab3e..000000000000
--- a/arch/x86/include/asm/mmu_context_64.h
+++ /dev/null
@@ -1,52 +0,0 @@
1#ifndef _ASM_X86_MMU_CONTEXT_64_H
2#define _ASM_X86_MMU_CONTEXT_64_H
3
4static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
5{
6#ifdef CONFIG_SMP
7 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
8 percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
9#endif
10}
11
12static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
13 struct task_struct *tsk)
14{
15 unsigned cpu = smp_processor_id();
16 if (likely(prev != next)) {
17 /* stop flush ipis for the previous mm */
18 cpu_clear(cpu, prev->cpu_vm_mask);
19#ifdef CONFIG_SMP
20 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
21 percpu_write(cpu_tlbstate.active_mm, next);
22#endif
23 cpu_set(cpu, next->cpu_vm_mask);
24 load_cr3(next->pgd);
25
26 if (unlikely(next->context.ldt != prev->context.ldt))
27 load_LDT_nolock(&next->context);
28 }
29#ifdef CONFIG_SMP
30 else {
31 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
32 BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
33
34 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
35 /* We were in lazy tlb mode and leave_mm disabled
36 * tlb flush IPI delivery. We must reload CR3
37 * to make sure to use no freed page tables.
38 */
39 load_cr3(next->pgd);
40 load_LDT_nolock(&next->context);
41 }
42 }
43#endif
44}
45
46#define deactivate_mm(tsk, mm) \
47do { \
48 load_gs_index(0); \
49 asm volatile("movl %0,%%fs"::"r"(0)); \
50} while (0)
51
52#endif /* _ASM_X86_MMU_CONTEXT_64_H */
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
deleted file mode 100644
index c31ca048a901..000000000000
--- a/arch/x86/include/asm/pda.h
+++ /dev/null
@@ -1,45 +0,0 @@
1#ifndef _ASM_X86_PDA_H
2#define _ASM_X86_PDA_H
3
4#ifndef __ASSEMBLY__
5#include <linux/stddef.h>
6#include <linux/types.h>
7#include <linux/cache.h>
8#include <linux/threads.h>
9#include <asm/page.h>
10#include <asm/percpu.h>
11
12/* Per processor datastructure. %gs points to it while the kernel runs */
13struct x8664_pda {
14 unsigned long unused1;
15 unsigned long unused2;
16 unsigned long unused3;
17 unsigned long unused4;
18 int unused5;
19 unsigned int unused6; /* 36 was cpunumber */
20#ifdef CONFIG_CC_STACKPROTECTOR
21 unsigned long stack_canary; /* 40 stack canary value */
22 /* gcc-ABI: this canary MUST be at
23 offset 40!!! */
24#endif
25 short in_bootmem; /* pda lives in bootmem */
26} ____cacheline_aligned_in_smp;
27
28DECLARE_PER_CPU(struct x8664_pda, __pda);
29extern void pda_init(int);
30
31#define cpu_pda(cpu) (&per_cpu(__pda, cpu))
32
33#define read_pda(field) percpu_read(__pda.field)
34#define write_pda(field, val) percpu_write(__pda.field, val)
35#define add_pda(field, val) percpu_add(__pda.field, val)
36#define sub_pda(field, val) percpu_sub(__pda.field, val)
37#define or_pda(field, val) percpu_or(__pda.field, val)
38
39/* This is not atomic against other CPUs -- CPU preemption needs to be off */
40#define test_and_clear_bit_pda(bit, field) \
41 x86_test_and_clear_bit_percpu(bit, __pda.field)
42
43#endif
44
45#endif /* _ASM_X86_PDA_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index 165d5272ece1..0b64af4f13ac 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -75,7 +75,7 @@ do { \
75 case 8: \ 75 case 8: \
76 asm(op "q %1,"__percpu_arg(0) \ 76 asm(op "q %1,"__percpu_arg(0) \
77 : "+m" (var) \ 77 : "+m" (var) \
78 : "r" ((T__)val)); \ 78 : "re" ((T__)val)); \
79 break; \ 79 break; \
80 default: __bad_percpu_size(); \ 80 default: __bad_percpu_size(); \
81 } \ 81 } \
@@ -133,12 +133,6 @@ do { \
133/* We can use this directly for local CPU (faster). */ 133/* We can use this directly for local CPU (faster). */
134DECLARE_PER_CPU(unsigned long, this_cpu_off); 134DECLARE_PER_CPU(unsigned long, this_cpu_off);
135 135
136#ifdef CONFIG_X86_64
137extern void load_pda_offset(int cpu);
138#else
139static inline void load_pda_offset(int cpu) { }
140#endif
141
142#endif /* !__ASSEMBLY__ */ 136#endif /* !__ASSEMBLY__ */
143 137
144#ifdef CONFIG_SMP 138#ifdef CONFIG_SMP
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index ba09289accaa..1df9637dfda3 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -11,7 +11,6 @@
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <linux/bitops.h> 12#include <linux/bitops.h>
13#include <linux/threads.h> 13#include <linux/threads.h>
14#include <asm/pda.h>
15 14
16extern pud_t level3_kernel_pgt[512]; 15extern pud_t level3_kernel_pgt[512];
17extern pud_t level3_ident_pgt[512]; 16extern pud_t level3_ident_pgt[512];
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index f511246fa6cd..48676b943b92 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -379,8 +379,29 @@ union thread_xstate {
379#ifdef CONFIG_X86_64 379#ifdef CONFIG_X86_64
380DECLARE_PER_CPU(struct orig_ist, orig_ist); 380DECLARE_PER_CPU(struct orig_ist, orig_ist);
381 381
382DECLARE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack); 382union irq_stack_union {
383 char irq_stack[IRQ_STACK_SIZE];
384 /*
385 * GCC hardcodes the stack canary as %gs:40. Since the
386 * irq_stack is the object at %gs:0, we reserve the bottom
387 * 48 bytes of the irq stack for the canary.
388 */
389 struct {
390 char gs_base[40];
391 unsigned long stack_canary;
392 };
393};
394
395DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
383DECLARE_PER_CPU(char *, irq_stack_ptr); 396DECLARE_PER_CPU(char *, irq_stack_ptr);
397
398static inline void load_gs_base(int cpu)
399{
400 /* Memory clobbers used to order pda/percpu accesses */
401 mb();
402 wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
403 mb();
404}
384#endif 405#endif
385 406
386extern void print_cpu_info(struct cpuinfo_x86 *); 407extern void print_cpu_info(struct cpuinfo_x86 *);
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 68636e767a91..45ef8a1b9d7c 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -15,7 +15,6 @@
15# include <asm/io_apic.h> 15# include <asm/io_apic.h>
16# endif 16# endif
17#endif 17#endif
18#include <asm/pda.h>
19#include <asm/thread_info.h> 18#include <asm/thread_info.h>
20#include <asm/cpumask.h> 19#include <asm/cpumask.h>
21 20
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
new file mode 100644
index 000000000000..36a700acaf2b
--- /dev/null
+++ b/arch/x86/include/asm/stackprotector.h
@@ -0,0 +1,38 @@
1#ifndef _ASM_STACKPROTECTOR_H
2#define _ASM_STACKPROTECTOR_H 1
3
4#include <asm/tsc.h>
5#include <asm/processor.h>
6
7/*
8 * Initialize the stackprotector canary value.
9 *
10 * NOTE: this must only be called from functions that never return,
11 * and it must always be inlined.
12 */
13static __always_inline void boot_init_stack_canary(void)
14{
15 u64 canary;
16 u64 tsc;
17
18 /*
19 * Build time only check to make sure the stack_canary is at
20 * offset 40 in the pda; this is a gcc ABI requirement
21 */
22 BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
23
24 /*
25 * We both use the random pool and the current TSC as a source
26 * of randomness. The TSC only matters for very early init,
27 * there it already has some randomness on most systems. Later
28 * on during the bootup the random pool has true entropy too.
29 */
30 get_random_bytes(&canary, sizeof(canary));
31 tsc = __native_read_tsc();
32 canary += tsc + (tsc << 32UL);
33
34 current->stack_canary = canary;
35 percpu_write(irq_stack_union.stack_canary, canary);
36}
37
38#endif
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index d1dc27dba36d..2fcc70bc85f3 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -86,27 +86,44 @@ do { \
86 , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ 86 , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
87 "r12", "r13", "r14", "r15" 87 "r12", "r13", "r14", "r15"
88 88
89#ifdef CONFIG_CC_STACKPROTECTOR
90#define __switch_canary \
91 "movq %P[task_canary](%%rsi),%%r8\n\t" \
92 "movq %%r8,"__percpu_arg([gs_canary])"\n\t"
93#define __switch_canary_oparam \
94 , [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary))
95#define __switch_canary_iparam \
96 , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
97#else /* CC_STACKPROTECTOR */
98#define __switch_canary
99#define __switch_canary_oparam
100#define __switch_canary_iparam
101#endif /* CC_STACKPROTECTOR */
102
89/* Save restore flags to clear handle leaking NT */ 103/* Save restore flags to clear handle leaking NT */
90#define switch_to(prev, next, last) \ 104#define switch_to(prev, next, last) \
91 asm volatile(SAVE_CONTEXT \ 105 asm volatile(SAVE_CONTEXT \
92 "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ 106 "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
93 "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ 107 "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
94 "call __switch_to\n\t" \ 108 "call __switch_to\n\t" \
95 ".globl thread_return\n" \ 109 ".globl thread_return\n" \
96 "thread_return:\n\t" \ 110 "thread_return:\n\t" \
97 "movq "__percpu_arg([current_task])",%%rsi\n\t" \ 111 "movq "__percpu_arg([current_task])",%%rsi\n\t" \
112 __switch_canary \
98 "movq %P[thread_info](%%rsi),%%r8\n\t" \ 113 "movq %P[thread_info](%%rsi),%%r8\n\t" \
99 LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ 114 LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \
100 "movq %%rax,%%rdi\n\t" \ 115 "movq %%rax,%%rdi\n\t" \
101 "jc ret_from_fork\n\t" \ 116 "jc ret_from_fork\n\t" \
102 RESTORE_CONTEXT \ 117 RESTORE_CONTEXT \
103 : "=a" (last) \ 118 : "=a" (last) \
119 __switch_canary_oparam \
104 : [next] "S" (next), [prev] "D" (prev), \ 120 : [next] "S" (next), [prev] "D" (prev), \
105 [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ 121 [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
106 [ti_flags] "i" (offsetof(struct thread_info, flags)), \ 122 [ti_flags] "i" (offsetof(struct thread_info, flags)), \
107 [tif_fork] "i" (TIF_FORK), \ 123 [tif_fork] "i" (TIF_FORK), \
108 [thread_info] "i" (offsetof(struct task_struct, stack)), \ 124 [thread_info] "i" (offsetof(struct task_struct, stack)), \
109 [current_task] "m" (per_cpu_var(current_task)) \ 125 [current_task] "m" (per_cpu_var(current_task)) \
126 __switch_canary_iparam \
110 : "memory", "cc" __EXTRA_CLOBBER) 127 : "memory", "cc" __EXTRA_CLOBBER)
111#endif 128#endif
112 129
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index ffea1fe03a99..10022ed3a4b6 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -190,9 +190,20 @@ extern int __node_distance(int, int);
190 190
191#else /* !CONFIG_NUMA */ 191#else /* !CONFIG_NUMA */
192 192
193#define numa_node_id() 0 193static inline int numa_node_id(void)
194#define cpu_to_node(cpu) 0 194{
195#define early_cpu_to_node(cpu) 0 195 return 0;
196}
197
198static inline int cpu_to_node(int cpu)
199{
200 return 0;
201}
202
203static inline int early_cpu_to_node(int cpu)
204{
205 return 0;
206}
196 207
197static inline const cpumask_t *cpumask_of_node(int node) 208static inline const cpumask_t *cpumask_of_node(int node)
198{ 209{
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
new file mode 100644
index 000000000000..8ac1d7e312f3
--- /dev/null
+++ b/arch/x86/include/asm/uv/uv.h
@@ -0,0 +1,33 @@
1#ifndef _ASM_X86_UV_UV_H
2#define _ASM_X86_UV_UV_H
3
4enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
5
6#ifdef CONFIG_X86_UV
7
8extern enum uv_system_type get_uv_system_type(void);
9extern int is_uv_system(void);
10extern void uv_cpu_init(void);
11extern void uv_system_init(void);
12extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
13extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
14 struct mm_struct *mm,
15 unsigned long va,
16 unsigned int cpu);
17
18#else /* X86_UV */
19
20static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
21static inline int is_uv_system(void) { return 0; }
22static inline void uv_cpu_init(void) { }
23static inline void uv_system_init(void) { }
24static inline int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
25{ return 1; }
26static inline const struct cpumask *
27uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm,
28 unsigned long va, unsigned int cpu)
29{ return cpumask; }
30
31#endif /* X86_UV */
32
33#endif /* _ASM_X86_UV_UV_H */
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 74e6393bfddb..9b0e61bf7a88 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -325,8 +325,6 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
325#define cpubit_isset(cpu, bau_local_cpumask) \ 325#define cpubit_isset(cpu, bau_local_cpumask) \
326 test_bit((cpu), (bau_local_cpumask).bits) 326 test_bit((cpu), (bau_local_cpumask).bits)
327 327
328extern int uv_flush_tlb_others(struct cpumask *,
329 struct mm_struct *, unsigned long);
330extern void uv_bau_message_intr1(void); 328extern void uv_bau_message_intr1(void);
331extern void uv_bau_timeout_intr1(void); 329extern void uv_bau_timeout_intr1(void);
332 330
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index d364df03c1d6..a99437c965cc 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -23,6 +23,7 @@ nostackp := $(call cc-option, -fno-stack-protector)
23CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) 23CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
24CFLAGS_hpet.o := $(nostackp) 24CFLAGS_hpet.o := $(nostackp)
25CFLAGS_tsc.o := $(nostackp) 25CFLAGS_tsc.o := $(nostackp)
26CFLAGS_paravirt.o := $(nostackp)
26 27
27obj-y := process_$(BITS).o signal.o entry_$(BITS).o 28obj-y := process_$(BITS).o signal.o entry_$(BITS).o
28obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 29obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
@@ -57,7 +58,7 @@ obj-$(CONFIG_PCI) += early-quirks.o
57apm-y := apm_32.o 58apm-y := apm_32.o
58obj-$(CONFIG_APM) += apm.o 59obj-$(CONFIG_APM) += apm.o
59obj-$(CONFIG_X86_SMP) += smp.o 60obj-$(CONFIG_X86_SMP) += smp.o
60obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o 61obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o
61obj-$(CONFIG_X86_32_SMP) += smpcommon.o 62obj-$(CONFIG_X86_32_SMP) += smpcommon.o
62obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o 63obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o
63obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o 64obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o
@@ -114,10 +115,11 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64
114### 115###
115# 64 bit specific files 116# 64 bit specific files
116ifeq ($(CONFIG_X86_64),y) 117ifeq ($(CONFIG_X86_64),y)
117 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o 118 obj-y += genapic_64.o genapic_flat_64.o
118 obj-y += bios_uv.o uv_irq.o uv_sysfs.o
119 obj-y += genx2apic_cluster.o 119 obj-y += genx2apic_cluster.o
120 obj-y += genx2apic_phys.o 120 obj-y += genx2apic_phys.o
121 obj-$(CONFIG_X86_UV) += genx2apic_uv_x.o tlb_uv.o
122 obj-$(CONFIG_X86_UV) += bios_uv.o uv_irq.o uv_sysfs.o
121 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o 123 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
122 obj-$(CONFIG_AUDIT) += audit_64.o 124 obj-$(CONFIG_AUDIT) += audit_64.o
123 125
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index e9af14f748ea..7b434e5b14c9 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -1132,7 +1132,9 @@ void __cpuinit setup_local_APIC(void)
1132 int i, j; 1132 int i, j;
1133 1133
1134 if (disable_apic) { 1134 if (disable_apic) {
1135#ifdef CONFIG_X86_IO_APIC
1135 disable_ioapic_setup(); 1136 disable_ioapic_setup();
1137#endif
1136 return; 1138 return;
1137 } 1139 }
1138 1140
@@ -1844,6 +1846,11 @@ void __cpuinit generic_processor_info(int apicid, int version)
1844 num_processors++; 1846 num_processors++;
1845 cpu = cpumask_next_zero(-1, cpu_present_mask); 1847 cpu = cpumask_next_zero(-1, cpu_present_mask);
1846 1848
1849 if (version != apic_version[boot_cpu_physical_apicid])
1850 WARN_ONCE(1,
1851 "ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n",
1852 apic_version[boot_cpu_physical_apicid], cpu, version);
1853
1847 physid_set(apicid, phys_cpu_present_map); 1854 physid_set(apicid, phys_cpu_present_map);
1848 if (apicid == boot_cpu_physical_apicid) { 1855 if (apicid == boot_cpu_physical_apicid) {
1849 /* 1856 /*
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 64c834a39aa8..8793ab33e2c1 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -11,7 +11,6 @@
11#include <linux/hardirq.h> 11#include <linux/hardirq.h>
12#include <linux/suspend.h> 12#include <linux/suspend.h>
13#include <linux/kbuild.h> 13#include <linux/kbuild.h>
14#include <asm/pda.h>
15#include <asm/processor.h> 14#include <asm/processor.h>
16#include <asm/segment.h> 15#include <asm/segment.h>
17#include <asm/thread_info.h> 16#include <asm/thread_info.h>
@@ -48,10 +47,6 @@ int main(void)
48#endif 47#endif
49 BLANK(); 48 BLANK();
50#undef ENTRY 49#undef ENTRY
51#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
52 DEFINE(pda_size, sizeof(struct x8664_pda));
53 BLANK();
54#undef ENTRY
55#ifdef CONFIG_PARAVIRT 50#ifdef CONFIG_PARAVIRT
56 BLANK(); 51 BLANK();
57 OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); 52 OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 95eb30e1e677..6fd316689c47 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -29,9 +29,9 @@
29#include <asm/apic.h> 29#include <asm/apic.h>
30#include <mach_apic.h> 30#include <mach_apic.h>
31#include <asm/genapic.h> 31#include <asm/genapic.h>
32#include <asm/uv/uv.h>
32#endif 33#endif
33 34
34#include <asm/pda.h>
35#include <asm/pgtable.h> 35#include <asm/pgtable.h>
36#include <asm/processor.h> 36#include <asm/processor.h>
37#include <asm/desc.h> 37#include <asm/desc.h>
@@ -65,23 +65,23 @@ cpumask_t cpu_sibling_setup_map;
65 65
66static struct cpu_dev *this_cpu __cpuinitdata; 66static struct cpu_dev *this_cpu __cpuinitdata;
67 67
68DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
68#ifdef CONFIG_X86_64 69#ifdef CONFIG_X86_64
69/* We need valid kernel segments for data and code in long mode too 70 /*
70 * IRET will check the segment types kkeil 2000/10/28 71 * We need valid kernel segments for data and code in long mode too
71 * Also sysret mandates a special GDT layout 72 * IRET will check the segment types kkeil 2000/10/28
72 */ 73 * Also sysret mandates a special GDT layout
73/* The TLS descriptors are currently at a different place compared to i386. 74 *
74 Hopefully nobody expects them at a fixed place (Wine?) */ 75 * The TLS descriptors are currently at a different place compared to i386.
75DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { 76 * Hopefully nobody expects them at a fixed place (Wine?)
77 */
76 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, 78 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
77 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, 79 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
78 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, 80 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
79 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, 81 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
80 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, 82 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
81 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, 83 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
82} };
83#else 84#else
84DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
85 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, 85 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
86 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, 86 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
87 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, 87 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
@@ -113,9 +113,9 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
113 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, 113 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
114 114
115 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, 115 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
116 [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, 116 [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
117} };
118#endif 117#endif
118} };
119EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); 119EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
120 120
121#ifdef CONFIG_X86_32 121#ifdef CONFIG_X86_32
@@ -883,12 +883,13 @@ __setup("clearcpuid=", setup_disablecpuid);
883#ifdef CONFIG_X86_64 883#ifdef CONFIG_X86_64
884struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; 884struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
885 885
886DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack); 886DEFINE_PER_CPU_FIRST(union irq_stack_union,
887 irq_stack_union) __aligned(PAGE_SIZE);
887#ifdef CONFIG_SMP 888#ifdef CONFIG_SMP
888DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */ 889DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */
889#else 890#else
890DEFINE_PER_CPU(char *, irq_stack_ptr) = 891DEFINE_PER_CPU(char *, irq_stack_ptr) =
891 per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64; 892 per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
892#endif 893#endif
893 894
894DEFINE_PER_CPU(unsigned long, kernel_stack) = 895DEFINE_PER_CPU(unsigned long, kernel_stack) =
@@ -897,15 +898,6 @@ EXPORT_PER_CPU_SYMBOL(kernel_stack);
897 898
898DEFINE_PER_CPU(unsigned int, irq_count) = -1; 899DEFINE_PER_CPU(unsigned int, irq_count) = -1;
899 900
900void __cpuinit pda_init(int cpu)
901{
902 /* Setup up data that may be needed in __get_free_pages early */
903 loadsegment(fs, 0);
904 loadsegment(gs, 0);
905
906 load_pda_offset(cpu);
907}
908
909static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks 901static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
910 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]) 902 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
911 __aligned(PAGE_SIZE); 903 __aligned(PAGE_SIZE);
@@ -969,9 +961,9 @@ void __cpuinit cpu_init(void)
969 struct task_struct *me; 961 struct task_struct *me;
970 int i; 962 int i;
971 963
972 /* CPU 0 is initialised in head64.c */ 964 loadsegment(fs, 0);
973 if (cpu != 0) 965 loadsegment(gs, 0);
974 pda_init(cpu); 966 load_gs_base(cpu);
975 967
976#ifdef CONFIG_NUMA 968#ifdef CONFIG_NUMA
977 if (cpu != 0 && percpu_read(node_number) == 0 && 969 if (cpu != 0 && percpu_read(node_number) == 0 &&
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 8f3c95c7e61f..4b1c319d30c3 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -145,13 +145,14 @@ typedef union {
145 145
146struct drv_cmd { 146struct drv_cmd {
147 unsigned int type; 147 unsigned int type;
148 cpumask_var_t mask; 148 const struct cpumask *mask;
149 drv_addr_union addr; 149 drv_addr_union addr;
150 u32 val; 150 u32 val;
151}; 151};
152 152
153static void do_drv_read(struct drv_cmd *cmd) 153static long do_drv_read(void *_cmd)
154{ 154{
155 struct drv_cmd *cmd = _cmd;
155 u32 h; 156 u32 h;
156 157
157 switch (cmd->type) { 158 switch (cmd->type) {
@@ -166,10 +167,12 @@ static void do_drv_read(struct drv_cmd *cmd)
166 default: 167 default:
167 break; 168 break;
168 } 169 }
170 return 0;
169} 171}
170 172
171static void do_drv_write(struct drv_cmd *cmd) 173static long do_drv_write(void *_cmd)
172{ 174{
175 struct drv_cmd *cmd = _cmd;
173 u32 lo, hi; 176 u32 lo, hi;
174 177
175 switch (cmd->type) { 178 switch (cmd->type) {
@@ -186,30 +189,23 @@ static void do_drv_write(struct drv_cmd *cmd)
186 default: 189 default:
187 break; 190 break;
188 } 191 }
192 return 0;
189} 193}
190 194
191static void drv_read(struct drv_cmd *cmd) 195static void drv_read(struct drv_cmd *cmd)
192{ 196{
193 cpumask_t saved_mask = current->cpus_allowed;
194 cmd->val = 0; 197 cmd->val = 0;
195 198
196 set_cpus_allowed_ptr(current, cmd->mask); 199 work_on_cpu(cpumask_any(cmd->mask), do_drv_read, cmd);
197 do_drv_read(cmd);
198 set_cpus_allowed_ptr(current, &saved_mask);
199} 200}
200 201
201static void drv_write(struct drv_cmd *cmd) 202static void drv_write(struct drv_cmd *cmd)
202{ 203{
203 cpumask_t saved_mask = current->cpus_allowed;
204 unsigned int i; 204 unsigned int i;
205 205
206 for_each_cpu(i, cmd->mask) { 206 for_each_cpu(i, cmd->mask) {
207 set_cpus_allowed_ptr(current, cpumask_of(i)); 207 work_on_cpu(i, do_drv_write, cmd);
208 do_drv_write(cmd);
209 } 208 }
210
211 set_cpus_allowed_ptr(current, &saved_mask);
212 return;
213} 209}
214 210
215static u32 get_cur_val(const struct cpumask *mask) 211static u32 get_cur_val(const struct cpumask *mask)
@@ -235,6 +231,7 @@ static u32 get_cur_val(const struct cpumask *mask)
235 return 0; 231 return 0;
236 } 232 }
237 233
234 cmd.mask = mask;
238 drv_read(&cmd); 235 drv_read(&cmd);
239 236
240 dprintk("get_cur_val = %u\n", cmd.val); 237 dprintk("get_cur_val = %u\n", cmd.val);
@@ -366,7 +363,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
366 return freq; 363 return freq;
367} 364}
368 365
369static unsigned int check_freqs(const cpumask_t *mask, unsigned int freq, 366static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
370 struct acpi_cpufreq_data *data) 367 struct acpi_cpufreq_data *data)
371{ 368{
372 unsigned int cur_freq; 369 unsigned int cur_freq;
@@ -401,9 +398,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
401 return -ENODEV; 398 return -ENODEV;
402 } 399 }
403 400
404 if (unlikely(!alloc_cpumask_var(&cmd.mask, GFP_KERNEL)))
405 return -ENOMEM;
406
407 perf = data->acpi_data; 401 perf = data->acpi_data;
408 result = cpufreq_frequency_table_target(policy, 402 result = cpufreq_frequency_table_target(policy,
409 data->freq_table, 403 data->freq_table,
@@ -448,9 +442,9 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
448 442
449 /* cpufreq holds the hotplug lock, so we are safe from here on */ 443 /* cpufreq holds the hotplug lock, so we are safe from here on */
450 if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) 444 if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
451 cpumask_and(cmd.mask, cpu_online_mask, policy->cpus); 445 cmd.mask = policy->cpus;
452 else 446 else
453 cpumask_copy(cmd.mask, cpumask_of(policy->cpu)); 447 cmd.mask = cpumask_of(policy->cpu);
454 448
455 freqs.old = perf->states[perf->state].core_frequency * 1000; 449 freqs.old = perf->states[perf->state].core_frequency * 1000;
456 freqs.new = data->freq_table[next_state].frequency; 450 freqs.new = data->freq_table[next_state].frequency;
@@ -477,7 +471,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
477 perf->state = next_perf_state; 471 perf->state = next_perf_state;
478 472
479out: 473out:
480 free_cpumask_var(cmd.mask);
481 return result; 474 return result;
482} 475}
483 476
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index 4b48f251fd39..5e8c79e748a6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -7,6 +7,7 @@
7#include <linux/interrupt.h> 7#include <linux/interrupt.h>
8#include <linux/percpu.h> 8#include <linux/percpu.h>
9#include <asm/processor.h> 9#include <asm/processor.h>
10#include <asm/apic.h>
10#include <asm/msr.h> 11#include <asm/msr.h>
11#include <asm/mce.h> 12#include <asm/mce.h>
12#include <asm/hw_irq.h> 13#include <asm/hw_irq.h>
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 1119d247fe11..b205272ad394 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -366,10 +366,12 @@ void __init efi_init(void)
366 SMBIOS_TABLE_GUID)) { 366 SMBIOS_TABLE_GUID)) {
367 efi.smbios = config_tables[i].table; 367 efi.smbios = config_tables[i].table;
368 printk(" SMBIOS=0x%lx ", config_tables[i].table); 368 printk(" SMBIOS=0x%lx ", config_tables[i].table);
369#ifdef CONFIG_X86_UV
369 } else if (!efi_guidcmp(config_tables[i].guid, 370 } else if (!efi_guidcmp(config_tables[i].guid,
370 UV_SYSTEM_TABLE_GUID)) { 371 UV_SYSTEM_TABLE_GUID)) {
371 efi.uv_systab = config_tables[i].table; 372 efi.uv_systab = config_tables[i].table;
372 printk(" UVsystab=0x%lx ", config_tables[i].table); 373 printk(" UVsystab=0x%lx ", config_tables[i].table);
374#endif
373 } else if (!efi_guidcmp(config_tables[i].guid, 375 } else if (!efi_guidcmp(config_tables[i].guid,
374 HCDP_TABLE_GUID)) { 376 HCDP_TABLE_GUID)) {
375 efi.hcdp = config_tables[i].table; 377 efi.hcdp = config_tables[i].table;
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index 652c5287215f..a4ee29127fdf 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -36,6 +36,7 @@
36#include <asm/proto.h> 36#include <asm/proto.h>
37#include <asm/efi.h> 37#include <asm/efi.h>
38#include <asm/cacheflush.h> 38#include <asm/cacheflush.h>
39#include <asm/fixmap.h>
39 40
40static pgd_t save_pgd __initdata; 41static pgd_t save_pgd __initdata;
41static unsigned long efi_flags __initdata; 42static unsigned long efi_flags __initdata;
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 46469029e9d3..a0b91aac72a1 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -672,7 +672,7 @@ common_interrupt:
672ENDPROC(common_interrupt) 672ENDPROC(common_interrupt)
673 CFI_ENDPROC 673 CFI_ENDPROC
674 674
675#define BUILD_INTERRUPT(name, nr) \ 675#define BUILD_INTERRUPT3(name, nr, fn) \
676ENTRY(name) \ 676ENTRY(name) \
677 RING0_INT_FRAME; \ 677 RING0_INT_FRAME; \
678 pushl $~(nr); \ 678 pushl $~(nr); \
@@ -680,11 +680,13 @@ ENTRY(name) \
680 SAVE_ALL; \ 680 SAVE_ALL; \
681 TRACE_IRQS_OFF \ 681 TRACE_IRQS_OFF \
682 movl %esp,%eax; \ 682 movl %esp,%eax; \
683 call smp_##name; \ 683 call fn; \
684 jmp ret_from_intr; \ 684 jmp ret_from_intr; \
685 CFI_ENDPROC; \ 685 CFI_ENDPROC; \
686ENDPROC(name) 686ENDPROC(name)
687 687
688#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name)
689
688/* The include is where all of the SMP etc. interrupts come from */ 690/* The include is where all of the SMP etc. interrupts come from */
689#include "entry_arch.h" 691#include "entry_arch.h"
690 692
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index c092e7d2686d..eb0a0703f4c9 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -982,8 +982,10 @@ apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
982 irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt 982 irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
983#endif 983#endif
984 984
985#ifdef CONFIG_X86_UV
985apicinterrupt UV_BAU_MESSAGE \ 986apicinterrupt UV_BAU_MESSAGE \
986 uv_bau_message_intr1 uv_bau_message_interrupt 987 uv_bau_message_intr1 uv_bau_message_interrupt
988#endif
987apicinterrupt LOCAL_TIMER_VECTOR \ 989apicinterrupt LOCAL_TIMER_VECTOR \
988 apic_timer_interrupt smp_apic_timer_interrupt 990 apic_timer_interrupt smp_apic_timer_interrupt
989 991
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 2bced78b0b8e..e656c2721154 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -32,7 +32,9 @@ extern struct genapic apic_x2apic_cluster;
32struct genapic __read_mostly *genapic = &apic_flat; 32struct genapic __read_mostly *genapic = &apic_flat;
33 33
34static struct genapic *apic_probe[] __initdata = { 34static struct genapic *apic_probe[] __initdata = {
35#ifdef CONFIG_X86_UV
35 &apic_x2apic_uv_x, 36 &apic_x2apic_uv_x,
37#endif
36 &apic_x2apic_phys, 38 &apic_x2apic_phys,
37 &apic_x2apic_cluster, 39 &apic_x2apic_cluster,
38 &apic_physflat, 40 &apic_physflat,
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index b193e082f6ce..bfe36249145c 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -25,6 +25,7 @@
25#include <asm/ipi.h> 25#include <asm/ipi.h>
26#include <asm/genapic.h> 26#include <asm/genapic.h>
27#include <asm/pgtable.h> 27#include <asm/pgtable.h>
28#include <asm/uv/uv.h>
28#include <asm/uv/uv_mmrs.h> 29#include <asm/uv/uv_mmrs.h>
29#include <asm/uv/uv_hub.h> 30#include <asm/uv/uv_hub.h>
30#include <asm/uv/bios.h> 31#include <asm/uv/bios.h>
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index af67d3227ea6..f5b272247690 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -91,8 +91,6 @@ void __init x86_64_start_kernel(char * real_mode_data)
91 if (console_loglevel == 10) 91 if (console_loglevel == 10)
92 early_printk("Kernel alive\n"); 92 early_printk("Kernel alive\n");
93 93
94 pda_init(0);
95
96 x86_64_start_reservations(real_mode_data); 94 x86_64_start_reservations(real_mode_data);
97} 95}
98 96
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index e835b4eea70b..24c0e5cd71e3 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -429,12 +429,14 @@ is386: movl $2,%ecx # set MP
429 ljmp $(__KERNEL_CS),$1f 429 ljmp $(__KERNEL_CS),$1f
4301: movl $(__KERNEL_DS),%eax # reload all the segment registers 4301: movl $(__KERNEL_DS),%eax # reload all the segment registers
431 movl %eax,%ss # after changing gdt. 431 movl %eax,%ss # after changing gdt.
432 movl %eax,%fs # gets reset once there's real percpu
433 432
434 movl $(__USER_DS),%eax # DS/ES contains default USER segment 433 movl $(__USER_DS),%eax # DS/ES contains default USER segment
435 movl %eax,%ds 434 movl %eax,%ds
436 movl %eax,%es 435 movl %eax,%es
437 436
437 movl $(__KERNEL_PERCPU), %eax
438 movl %eax,%fs # set this cpu's percpu
439
438 xorl %eax,%eax # Clear GS and LDT 440 xorl %eax,%eax # Clear GS and LDT
439 movl %eax,%gs 441 movl %eax,%gs
440 lldt %ax 442 lldt %ax
@@ -446,8 +448,6 @@ is386: movl $2,%ecx # set MP
446 movb $1, ready 448 movb $1, ready
447 cmpb $0,%cl # the first CPU calls start_kernel 449 cmpb $0,%cl # the first CPU calls start_kernel
448 je 1f 450 je 1f
449 movl $(__KERNEL_PERCPU), %eax
450 movl %eax,%fs # set this cpu's percpu
451 movl (stack_start), %esp 451 movl (stack_start), %esp
4521: 4521:
453#endif /* CONFIG_SMP */ 453#endif /* CONFIG_SMP */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index c8ace880661b..a0a2b5ca9b7d 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -207,19 +207,15 @@ ENTRY(secondary_startup_64)
207 207
208#ifdef CONFIG_SMP 208#ifdef CONFIG_SMP
209 /* 209 /*
210 * early_gdt_base should point to the gdt_page in static percpu init 210 * Fix up static pointers that need __per_cpu_load added. The assembler
211 * data area. Computing this requires two symbols - __per_cpu_load 211 * is unable to do this directly. This is only needed for the boot cpu.
212 * and per_cpu__gdt_page. As linker can't do no such relocation, do 212 * These values are set up with the correct base addresses by C code for
213 * it by hand. As early_gdt_descr is manipulated by C code for 213 * secondary cpus.
214 * secondary CPUs, this should be done only once for the boot CPU
215 * when early_gdt_descr_base contains zero.
216 */ 214 */
217 movq early_gdt_descr_base(%rip), %rax 215 movq initial_gs(%rip), %rax
218 testq %rax, %rax 216 cmpl $0, per_cpu__cpu_number(%rax)
219 jnz 1f 217 jne 1f
220 movq $__per_cpu_load, %rax 218 addq %rax, early_gdt_descr_base(%rip)
221 addq $per_cpu__gdt_page, %rax
222 movq %rax, early_gdt_descr_base(%rip)
2231: 2191:
224#endif 220#endif
225 /* 221 /*
@@ -246,13 +242,10 @@ ENTRY(secondary_startup_64)
246 242
247 /* Set up %gs. 243 /* Set up %gs.
248 * 244 *
249 * On SMP, %gs should point to the per-cpu area. For initial 245 * The base of %gs always points to the bottom of the irqstack
250 * boot, make %gs point to the init data section. For a 246 * union. If the stack protector canary is enabled, it is
251 * secondary CPU,initial_gs should be set to its pda address 247 * located at %gs:40. Note that, on SMP, the boot cpu uses
252 * before the CPU runs this code. 248 * init data section till per cpu areas are set up.
253 *
254 * On UP, initial_gs points to PER_CPU_VAR(__pda) and doesn't
255 * change.
256 */ 249 */
257 movl $MSR_GS_BASE,%ecx 250 movl $MSR_GS_BASE,%ecx
258 movq initial_gs(%rip),%rax 251 movq initial_gs(%rip),%rax
@@ -285,7 +278,7 @@ ENTRY(secondary_startup_64)
285#ifdef CONFIG_SMP 278#ifdef CONFIG_SMP
286 .quad __per_cpu_load 279 .quad __per_cpu_load
287#else 280#else
288 .quad PER_CPU_VAR(__pda) 281 .quad PER_CPU_VAR(irq_stack_union)
289#endif 282#endif
290 __FINITDATA 283 __FINITDATA
291 284
@@ -431,12 +424,8 @@ NEXT_PAGE(level2_spare_pgt)
431 .globl early_gdt_descr 424 .globl early_gdt_descr
432early_gdt_descr: 425early_gdt_descr:
433 .word GDT_ENTRIES*8-1 426 .word GDT_ENTRIES*8-1
434#ifdef CONFIG_SMP
435early_gdt_descr_base: 427early_gdt_descr_base:
436 .quad 0x0000000000000000
437#else
438 .quad per_cpu__gdt_page 428 .quad per_cpu__gdt_page
439#endif
440 429
441ENTRY(phys_base) 430ENTRY(phys_base)
442 /* This must match the first entry in level2_kernel_pgt */ 431 /* This must match the first entry in level2_kernel_pgt */
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index f79660390724..e4d36bd56b62 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -3765,7 +3765,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3765} 3765}
3766#endif /* CONFIG_HT_IRQ */ 3766#endif /* CONFIG_HT_IRQ */
3767 3767
3768#ifdef CONFIG_X86_64 3768#ifdef CONFIG_X86_UV
3769/* 3769/*
3770 * Re-target the irq to the specified CPU and enable the specified MMR located 3770 * Re-target the irq to the specified CPU and enable the specified MMR located
3771 * on the specified blade to allow the sending of MSIs to the specified CPU. 3771 * on the specified blade to allow the sending of MSIs to the specified CPU.
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 1db05247b47f..018963aa6ee3 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -18,10 +18,14 @@
18#include <linux/smp.h> 18#include <linux/smp.h>
19#include <asm/io_apic.h> 19#include <asm/io_apic.h>
20#include <asm/idle.h> 20#include <asm/idle.h>
21#include <asm/apic.h>
21 22
22DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); 23DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
23EXPORT_PER_CPU_SYMBOL(irq_stat); 24EXPORT_PER_CPU_SYMBOL(irq_stat);
24 25
26DEFINE_PER_CPU(struct pt_regs *, irq_regs);
27EXPORT_PER_CPU_SYMBOL(irq_regs);
28
25/* 29/*
26 * Probabilistic stack overflow check: 30 * Probabilistic stack overflow check:
27 * 31 *
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 0bef6280f30c..c56496f8c6fc 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -149,8 +149,15 @@ void __init native_init_IRQ(void)
149 */ 149 */
150 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); 150 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
151 151
152 /* IPI for invalidation */ 152 /* IPIs for invalidation */
153 alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); 153 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
154 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
155 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
156 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
157 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
158 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
159 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
160 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
154 161
155 /* IPI for generic function call */ 162 /* IPI for generic function call */
156 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 163 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index 2c00a57ccb90..1a1ae8edc40c 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -108,7 +108,6 @@ void cpu_idle(void)
108 play_dead(); 108 play_dead();
109 109
110 local_irq_disable(); 110 local_irq_disable();
111 __get_cpu_var(irq_stat).idle_timestamp = jiffies;
112 /* Don't trace irqs off for idle */ 111 /* Don't trace irqs off for idle */
113 stop_critical_timings(); 112 stop_critical_timings();
114 pm_idle(); 113 pm_idle();
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 4523ff88a69d..c422eebb0c58 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -16,6 +16,7 @@
16 16
17#include <stdarg.h> 17#include <stdarg.h>
18 18
19#include <linux/stackprotector.h>
19#include <linux/cpu.h> 20#include <linux/cpu.h>
20#include <linux/errno.h> 21#include <linux/errno.h>
21#include <linux/sched.h> 22#include <linux/sched.h>
@@ -46,7 +47,6 @@
46#include <asm/processor.h> 47#include <asm/processor.h>
47#include <asm/i387.h> 48#include <asm/i387.h>
48#include <asm/mmu_context.h> 49#include <asm/mmu_context.h>
49#include <asm/pda.h>
50#include <asm/prctl.h> 50#include <asm/prctl.h>
51#include <asm/desc.h> 51#include <asm/desc.h>
52#include <asm/proto.h> 52#include <asm/proto.h>
@@ -117,6 +117,17 @@ static inline void play_dead(void)
117void cpu_idle(void) 117void cpu_idle(void)
118{ 118{
119 current_thread_info()->status |= TS_POLLING; 119 current_thread_info()->status |= TS_POLLING;
120
121 /*
122 * If we're the non-boot CPU, nothing set the PDA stack
123 * canary up for us - and if we are the boot CPU we have
124 * a 0 stack canary. This is a good place for updating
125 * it, as we wont ever return from this function (so the
126 * invalid canaries already on the stack wont ever
127 * trigger):
128 */
129 boot_init_stack_canary();
130
120 /* endless idle loop with no priority at all */ 131 /* endless idle loop with no priority at all */
121 while (1) { 132 while (1) {
122 tick_nohz_stop_sched_tick(1); 133 tick_nohz_stop_sched_tick(1);
@@ -626,14 +637,6 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
626 percpu_write(kernel_stack, 637 percpu_write(kernel_stack,
627 (unsigned long)task_stack_page(next_p) + 638 (unsigned long)task_stack_page(next_p) +
628 THREAD_SIZE - KERNEL_STACK_OFFSET); 639 THREAD_SIZE - KERNEL_STACK_OFFSET);
629#ifdef CONFIG_CC_STACKPROTECTOR
630 write_pda(stack_canary, next_p->stack_canary);
631 /*
632 * Build time only check to make sure the stack_canary is at
633 * offset 40 in the pda; this is a gcc ABI requirement
634 */
635 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
636#endif
637 640
638 /* 641 /*
639 * Now maybe reload the debug registers and handle I/O bitmaps 642 * Now maybe reload the debug registers and handle I/O bitmaps
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index efbafbbff584..90b8e154bb53 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -77,30 +77,6 @@ static void __init setup_node_to_cpumask_map(void);
77static inline void setup_node_to_cpumask_map(void) { } 77static inline void setup_node_to_cpumask_map(void) { }
78#endif 78#endif
79 79
80/*
81 * Define load_pda_offset() and per-cpu __pda for x86_64.
82 * load_pda_offset() is responsible for loading the offset of pda into
83 * %gs.
84 *
85 * On SMP, pda offset also duals as percpu base address and thus it
86 * should be at the start of per-cpu area. To achieve this, it's
87 * preallocated in vmlinux_64.lds.S directly instead of using
88 * DEFINE_PER_CPU().
89 */
90#ifdef CONFIG_X86_64
91void __cpuinit load_pda_offset(int cpu)
92{
93 /* Memory clobbers used to order pda/percpu accesses */
94 mb();
95 wrmsrl(MSR_GS_BASE, cpu_pda(cpu));
96 mb();
97}
98#ifndef CONFIG_SMP
99DEFINE_PER_CPU(struct x8664_pda, __pda);
100#endif
101EXPORT_PER_CPU_SYMBOL(__pda);
102#endif /* CONFIG_SMP && CONFIG_X86_64 */
103
104#ifdef CONFIG_X86_64 80#ifdef CONFIG_X86_64
105 81
106/* correctly size the local cpu masks */ 82/* correctly size the local cpu masks */
@@ -207,15 +183,13 @@ void __init setup_per_cpu_areas(void)
207 per_cpu(cpu_number, cpu) = cpu; 183 per_cpu(cpu_number, cpu) = cpu;
208#ifdef CONFIG_X86_64 184#ifdef CONFIG_X86_64
209 per_cpu(irq_stack_ptr, cpu) = 185 per_cpu(irq_stack_ptr, cpu) =
210 (char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64; 186 per_cpu(irq_stack_union.irq_stack, cpu) + IRQ_STACK_SIZE - 64;
211 /* 187 /*
212 * CPU0 modified pda in the init data area, reload pda 188 * Up to this point, CPU0 has been using .data.init
213 * offset for CPU0 and clear the area for others. 189 * area. Reload %gs offset for CPU0.
214 */ 190 */
215 if (cpu == 0) 191 if (cpu == 0)
216 load_pda_offset(0); 192 load_gs_base(cpu);
217 else
218 memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu)));
219#endif 193#endif
220 194
221 DBG("PERCPU: cpu %4d %p\n", cpu, ptr); 195 DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 869b98840fd0..def770b57b5a 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -62,6 +62,7 @@
62#include <asm/vmi.h> 62#include <asm/vmi.h>
63#include <asm/genapic.h> 63#include <asm/genapic.h>
64#include <asm/setup.h> 64#include <asm/setup.h>
65#include <asm/uv/uv.h>
65#include <linux/mc146818rtc.h> 66#include <linux/mc146818rtc.h>
66 67
67#include <mach_apic.h> 68#include <mach_apic.h>
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
deleted file mode 100644
index abf0808d6fc4..000000000000
--- a/arch/x86/kernel/tlb_32.c
+++ /dev/null
@@ -1,239 +0,0 @@
1#include <linux/spinlock.h>
2#include <linux/cpu.h>
3#include <linux/interrupt.h>
4
5#include <asm/tlbflush.h>
6
7DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
8 = { &init_mm, 0, };
9
10/* must come after the send_IPI functions above for inlining */
11#include <mach_ipi.h>
12
13/*
14 * Smarter SMP flushing macros.
15 * c/o Linus Torvalds.
16 *
17 * These mean you can really definitely utterly forget about
18 * writing to user space from interrupts. (Its not allowed anyway).
19 *
20 * Optimizations Manfred Spraul <manfred@colorfullife.com>
21 */
22
23static cpumask_var_t flush_cpumask;
24static struct mm_struct *flush_mm;
25static unsigned long flush_va;
26static DEFINE_SPINLOCK(tlbstate_lock);
27
28/*
29 * We cannot call mmdrop() because we are in interrupt context,
30 * instead update mm->cpu_vm_mask.
31 *
32 * We need to reload %cr3 since the page tables may be going
33 * away from under us..
34 */
35void leave_mm(int cpu)
36{
37 BUG_ON(percpu_read(cpu_tlbstate.state) == TLBSTATE_OK);
38 cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
39 load_cr3(swapper_pg_dir);
40}
41EXPORT_SYMBOL_GPL(leave_mm);
42
43/*
44 *
45 * The flush IPI assumes that a thread switch happens in this order:
46 * [cpu0: the cpu that switches]
47 * 1) switch_mm() either 1a) or 1b)
48 * 1a) thread switch to a different mm
49 * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
50 * Stop ipi delivery for the old mm. This is not synchronized with
51 * the other cpus, but smp_invalidate_interrupt ignore flush ipis
52 * for the wrong mm, and in the worst case we perform a superfluous
53 * tlb flush.
54 * 1a2) set cpu_tlbstate to TLBSTATE_OK
55 * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
56 * was in lazy tlb mode.
57 * 1a3) update cpu_tlbstate[].active_mm
58 * Now cpu0 accepts tlb flushes for the new mm.
59 * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
60 * Now the other cpus will send tlb flush ipis.
61 * 1a4) change cr3.
62 * 1b) thread switch without mm change
63 * cpu_tlbstate[].active_mm is correct, cpu0 already handles
64 * flush ipis.
65 * 1b1) set cpu_tlbstate to TLBSTATE_OK
66 * 1b2) test_and_set the cpu bit in cpu_vm_mask.
67 * Atomically set the bit [other cpus will start sending flush ipis],
68 * and test the bit.
69 * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
70 * 2) switch %%esp, ie current
71 *
72 * The interrupt must handle 2 special cases:
73 * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
74 * - the cpu performs speculative tlb reads, i.e. even if the cpu only
75 * runs in kernel space, the cpu could load tlb entries for user space
76 * pages.
77 *
78 * The good news is that cpu_tlbstate is local to each cpu, no
79 * write/read ordering problems.
80 */
81
82/*
83 * TLB flush IPI:
84 *
85 * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
86 * 2) Leave the mm if we are in the lazy tlb mode.
87 */
88
89void smp_invalidate_interrupt(struct pt_regs *regs)
90{
91 unsigned long cpu;
92
93 cpu = get_cpu();
94
95 if (!cpumask_test_cpu(cpu, flush_cpumask))
96 goto out;
97 /*
98 * This was a BUG() but until someone can quote me the
99 * line from the intel manual that guarantees an IPI to
100 * multiple CPUs is retried _only_ on the erroring CPUs
101 * its staying as a return
102 *
103 * BUG();
104 */
105
106 if (flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
107 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
108 if (flush_va == TLB_FLUSH_ALL)
109 local_flush_tlb();
110 else
111 __flush_tlb_one(flush_va);
112 } else
113 leave_mm(cpu);
114 }
115 ack_APIC_irq();
116 smp_mb__before_clear_bit();
117 cpumask_clear_cpu(cpu, flush_cpumask);
118 smp_mb__after_clear_bit();
119out:
120 put_cpu_no_resched();
121 inc_irq_stat(irq_tlb_count);
122}
123
124void native_flush_tlb_others(const struct cpumask *cpumask,
125 struct mm_struct *mm, unsigned long va)
126{
127 /*
128 * - mask must exist :)
129 */
130 BUG_ON(cpumask_empty(cpumask));
131 BUG_ON(!mm);
132
133 /*
134 * i'm not happy about this global shared spinlock in the
135 * MM hot path, but we'll see how contended it is.
136 * AK: x86-64 has a faster method that could be ported.
137 */
138 spin_lock(&tlbstate_lock);
139
140 cpumask_andnot(flush_cpumask, cpumask, cpumask_of(smp_processor_id()));
141#ifdef CONFIG_HOTPLUG_CPU
142 /* If a CPU which we ran on has gone down, OK. */
143 cpumask_and(flush_cpumask, flush_cpumask, cpu_online_mask);
144 if (unlikely(cpumask_empty(flush_cpumask))) {
145 spin_unlock(&tlbstate_lock);
146 return;
147 }
148#endif
149 flush_mm = mm;
150 flush_va = va;
151
152 /*
153 * Make the above memory operations globally visible before
154 * sending the IPI.
155 */
156 smp_mb();
157 /*
158 * We have to send the IPI only to
159 * CPUs affected.
160 */
161 send_IPI_mask(flush_cpumask, INVALIDATE_TLB_VECTOR);
162
163 while (!cpumask_empty(flush_cpumask))
164 /* nothing. lockup detection does not belong here */
165 cpu_relax();
166
167 flush_mm = NULL;
168 flush_va = 0;
169 spin_unlock(&tlbstate_lock);
170}
171
172void flush_tlb_current_task(void)
173{
174 struct mm_struct *mm = current->mm;
175
176 preempt_disable();
177
178 local_flush_tlb();
179 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
180 flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
181 preempt_enable();
182}
183
184void flush_tlb_mm(struct mm_struct *mm)
185{
186
187 preempt_disable();
188
189 if (current->active_mm == mm) {
190 if (current->mm)
191 local_flush_tlb();
192 else
193 leave_mm(smp_processor_id());
194 }
195 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
196 flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
197
198 preempt_enable();
199}
200
201void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
202{
203 struct mm_struct *mm = vma->vm_mm;
204
205 preempt_disable();
206
207 if (current->active_mm == mm) {
208 if (current->mm)
209 __flush_tlb_one(va);
210 else
211 leave_mm(smp_processor_id());
212 }
213
214 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
215 flush_tlb_others(&mm->cpu_vm_mask, mm, va);
216 preempt_enable();
217}
218EXPORT_SYMBOL(flush_tlb_page);
219
220static void do_flush_tlb_all(void *info)
221{
222 unsigned long cpu = smp_processor_id();
223
224 __flush_tlb_all();
225 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
226 leave_mm(cpu);
227}
228
229void flush_tlb_all(void)
230{
231 on_each_cpu(do_flush_tlb_all, NULL, 1);
232}
233
234static int init_flush_cpumask(void)
235{
236 alloc_cpumask_var(&flush_cpumask, GFP_KERNEL);
237 return 0;
238}
239early_initcall(init_flush_cpumask);
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 690dcf1a27d4..aae15dd72604 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -11,6 +11,7 @@
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12 12
13#include <asm/mmu_context.h> 13#include <asm/mmu_context.h>
14#include <asm/uv/uv.h>
14#include <asm/uv/uv_mmrs.h> 15#include <asm/uv/uv_mmrs.h>
15#include <asm/uv/uv_hub.h> 16#include <asm/uv/uv_hub.h>
16#include <asm/uv/uv_bau.h> 17#include <asm/uv/uv_bau.h>
@@ -209,14 +210,15 @@ static int uv_wait_completion(struct bau_desc *bau_desc,
209 * 210 *
210 * Send a broadcast and wait for a broadcast message to complete. 211 * Send a broadcast and wait for a broadcast message to complete.
211 * 212 *
212 * The cpumaskp mask contains the cpus the broadcast was sent to. 213 * The flush_mask contains the cpus the broadcast was sent to.
213 * 214 *
214 * Returns 1 if all remote flushing was done. The mask is zeroed. 215 * Returns NULL if all remote flushing was done. The mask is zeroed.
215 * Returns 0 if some remote flushing remains to be done. The mask will have 216 * Returns @flush_mask if some remote flushing remains to be done. The
216 * some bits still set. 217 * mask will have some bits still set.
217 */ 218 */
218int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, 219const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
219 struct cpumask *cpumaskp) 220 struct bau_desc *bau_desc,
221 struct cpumask *flush_mask)
220{ 222{
221 int completion_status = 0; 223 int completion_status = 0;
222 int right_shift; 224 int right_shift;
@@ -263,59 +265,69 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
263 * Success, so clear the remote cpu's from the mask so we don't 265 * Success, so clear the remote cpu's from the mask so we don't
264 * use the IPI method of shootdown on them. 266 * use the IPI method of shootdown on them.
265 */ 267 */
266 for_each_cpu(bit, cpumaskp) { 268 for_each_cpu(bit, flush_mask) {
267 blade = uv_cpu_to_blade_id(bit); 269 blade = uv_cpu_to_blade_id(bit);
268 if (blade == this_blade) 270 if (blade == this_blade)
269 continue; 271 continue;
270 cpumask_clear_cpu(bit, cpumaskp); 272 cpumask_clear_cpu(bit, flush_mask);
271 } 273 }
272 if (!cpumask_empty(cpumaskp)) 274 if (!cpumask_empty(flush_mask))
273 return 0; 275 return flush_mask;
274 return 1; 276 return NULL;
275} 277}
276 278
277/** 279/**
278 * uv_flush_tlb_others - globally purge translation cache of a virtual 280 * uv_flush_tlb_others - globally purge translation cache of a virtual
279 * address or all TLB's 281 * address or all TLB's
280 * @cpumaskp: mask of all cpu's in which the address is to be removed 282 * @cpumask: mask of all cpu's in which the address is to be removed
281 * @mm: mm_struct containing virtual address range 283 * @mm: mm_struct containing virtual address range
282 * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) 284 * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
285 * @cpu: the current cpu
283 * 286 *
284 * This is the entry point for initiating any UV global TLB shootdown. 287 * This is the entry point for initiating any UV global TLB shootdown.
285 * 288 *
286 * Purges the translation caches of all specified processors of the given 289 * Purges the translation caches of all specified processors of the given
287 * virtual address, or purges all TLB's on specified processors. 290 * virtual address, or purges all TLB's on specified processors.
288 * 291 *
289 * The caller has derived the cpumaskp from the mm_struct and has subtracted 292 * The caller has derived the cpumask from the mm_struct. This function
290 * the local cpu from the mask. This function is called only if there 293 * is called only if there are bits set in the mask. (e.g. flush_tlb_page())
291 * are bits set in the mask. (e.g. flush_tlb_page())
292 * 294 *
293 * The cpumaskp is converted into a nodemask of the nodes containing 295 * The cpumask is converted into a nodemask of the nodes containing
294 * the cpus. 296 * the cpus.
295 * 297 *
296 * Returns 1 if all remote flushing was done. 298 * Note that this function should be called with preemption disabled.
297 * Returns 0 if some remote flushing remains to be done. 299 *
300 * Returns NULL if all remote flushing was done.
301 * Returns pointer to cpumask if some remote flushing remains to be
302 * done. The returned pointer is valid till preemption is re-enabled.
298 */ 303 */
299int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm, 304const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
300 unsigned long va) 305 struct mm_struct *mm,
306 unsigned long va, unsigned int cpu)
301{ 307{
308 static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
309 struct cpumask *flush_mask = &__get_cpu_var(flush_tlb_mask);
302 int i; 310 int i;
303 int bit; 311 int bit;
304 int blade; 312 int blade;
305 int cpu; 313 int uv_cpu;
306 int this_blade; 314 int this_blade;
307 int locals = 0; 315 int locals = 0;
308 struct bau_desc *bau_desc; 316 struct bau_desc *bau_desc;
309 317
310 cpu = uv_blade_processor_id(); 318 WARN_ON(!in_atomic());
319
320 cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
321
322 uv_cpu = uv_blade_processor_id();
311 this_blade = uv_numa_blade_id(); 323 this_blade = uv_numa_blade_id();
312 bau_desc = __get_cpu_var(bau_control).descriptor_base; 324 bau_desc = __get_cpu_var(bau_control).descriptor_base;
313 bau_desc += UV_ITEMS_PER_DESCRIPTOR * cpu; 325 bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu;
314 326
315 bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); 327 bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
316 328
317 i = 0; 329 i = 0;
318 for_each_cpu(bit, cpumaskp) { 330 for_each_cpu(bit, flush_mask) {
319 blade = uv_cpu_to_blade_id(bit); 331 blade = uv_cpu_to_blade_id(bit);
320 BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1)); 332 BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
321 if (blade == this_blade) { 333 if (blade == this_blade) {
@@ -330,17 +342,17 @@ int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm,
330 * no off_node flushing; return status for local node 342 * no off_node flushing; return status for local node
331 */ 343 */
332 if (locals) 344 if (locals)
333 return 0; 345 return flush_mask;
334 else 346 else
335 return 1; 347 return NULL;
336 } 348 }
337 __get_cpu_var(ptcstats).requestor++; 349 __get_cpu_var(ptcstats).requestor++;
338 __get_cpu_var(ptcstats).ntargeted += i; 350 __get_cpu_var(ptcstats).ntargeted += i;
339 351
340 bau_desc->payload.address = va; 352 bau_desc->payload.address = va;
341 bau_desc->payload.sending_cpu = smp_processor_id(); 353 bau_desc->payload.sending_cpu = cpu;
342 354
343 return uv_flush_send_and_wait(cpu, this_blade, bau_desc, cpumaskp); 355 return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask);
344} 356}
345 357
346/* 358/*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 98c2d055284b..ed5aee5f3fcc 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -59,7 +59,6 @@
59#ifdef CONFIG_X86_64 59#ifdef CONFIG_X86_64
60#include <asm/pgalloc.h> 60#include <asm/pgalloc.h>
61#include <asm/proto.h> 61#include <asm/proto.h>
62#include <asm/pda.h>
63#else 62#else
64#include <asm/processor-flags.h> 63#include <asm/processor-flags.h>
65#include <asm/arch_hooks.h> 64#include <asm/arch_hooks.h>
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index a09abb8fb97f..c9740996430a 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -220,8 +220,7 @@ SECTIONS
220 * so that it can be accessed as a percpu variable. 220 * so that it can be accessed as a percpu variable.
221 */ 221 */
222 . = ALIGN(PAGE_SIZE); 222 . = ALIGN(PAGE_SIZE);
223 PERCPU_VADDR_PREALLOC(0, :percpu, pda_size) 223 PERCPU_VADDR(0, :percpu)
224 per_cpu____pda = __per_cpu_start;
225#else 224#else
226 PERCPU(PAGE_SIZE) 225 PERCPU(PAGE_SIZE)
227#endif 226#endif
@@ -262,3 +261,8 @@ SECTIONS
262 */ 261 */
263ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), 262ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
264 "kernel image bigger than KERNEL_IMAGE_SIZE") 263 "kernel image bigger than KERNEL_IMAGE_SIZE")
264
265#ifdef CONFIG_SMP
266ASSERT((per_cpu__irq_stack_union == 0),
267 "irq_stack_union is not at start of per-cpu area");
268#endif
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index d8cc96a2738f..9f05157220f5 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,6 +1,8 @@
1obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ 1obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
2 pat.o pgtable.o gup.o 2 pat.o pgtable.o gup.o
3 3
4obj-$(CONFIG_X86_SMP) += tlb.o
5
4obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o 6obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
5 7
6obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o 8obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 90dfae511a41..65709a6aa6ee 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -26,6 +26,7 @@
26#include <linux/kprobes.h> 26#include <linux/kprobes.h>
27#include <linux/uaccess.h> 27#include <linux/uaccess.h>
28#include <linux/kdebug.h> 28#include <linux/kdebug.h>
29#include <linux/magic.h>
29 30
30#include <asm/system.h> 31#include <asm/system.h>
31#include <asm/desc.h> 32#include <asm/desc.h>
@@ -91,8 +92,8 @@ static inline int notify_page_fault(struct pt_regs *regs)
91 * 92 *
92 * Opcode checker based on code by Richard Brunner 93 * Opcode checker based on code by Richard Brunner
93 */ 94 */
94static int is_prefetch(struct pt_regs *regs, unsigned long addr, 95static int is_prefetch(struct pt_regs *regs, unsigned long error_code,
95 unsigned long error_code) 96 unsigned long addr)
96{ 97{
97 unsigned char *instr; 98 unsigned char *instr;
98 int scan_more = 1; 99 int scan_more = 1;
@@ -409,15 +410,15 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
409} 410}
410 411
411#ifdef CONFIG_X86_64 412#ifdef CONFIG_X86_64
412static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, 413static noinline void pgtable_bad(struct pt_regs *regs,
413 unsigned long error_code) 414 unsigned long error_code, unsigned long address)
414{ 415{
415 unsigned long flags = oops_begin(); 416 unsigned long flags = oops_begin();
416 int sig = SIGKILL; 417 int sig = SIGKILL;
417 struct task_struct *tsk; 418 struct task_struct *tsk = current;
418 419
419 printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", 420 printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
420 current->comm, address); 421 tsk->comm, address);
421 dump_pagetable(address); 422 dump_pagetable(address);
422 tsk = current; 423 tsk = current;
423 tsk->thread.cr2 = address; 424 tsk->thread.cr2 = address;
@@ -429,6 +430,196 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
429} 430}
430#endif 431#endif
431 432
433static noinline void no_context(struct pt_regs *regs,
434 unsigned long error_code, unsigned long address)
435{
436 struct task_struct *tsk = current;
437 unsigned long *stackend;
438
439#ifdef CONFIG_X86_64
440 unsigned long flags;
441 int sig;
442#endif
443
444 /* Are we prepared to handle this kernel fault? */
445 if (fixup_exception(regs))
446 return;
447
448 /*
449 * X86_32
450 * Valid to do another page fault here, because if this fault
451 * had been triggered by is_prefetch fixup_exception would have
452 * handled it.
453 *
454 * X86_64
455 * Hall of shame of CPU/BIOS bugs.
456 */
457 if (is_prefetch(regs, error_code, address))
458 return;
459
460 if (is_errata93(regs, address))
461 return;
462
463 /*
464 * Oops. The kernel tried to access some bad page. We'll have to
465 * terminate things with extreme prejudice.
466 */
467#ifdef CONFIG_X86_32
468 bust_spinlocks(1);
469#else
470 flags = oops_begin();
471#endif
472
473 show_fault_oops(regs, error_code, address);
474
475 stackend = end_of_stack(tsk);
476 if (*stackend != STACK_END_MAGIC)
477 printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
478
479 tsk->thread.cr2 = address;
480 tsk->thread.trap_no = 14;
481 tsk->thread.error_code = error_code;
482
483#ifdef CONFIG_X86_32
484 die("Oops", regs, error_code);
485 bust_spinlocks(0);
486 do_exit(SIGKILL);
487#else
488 sig = SIGKILL;
489 if (__die("Oops", regs, error_code))
490 sig = 0;
491 /* Executive summary in case the body of the oops scrolled away */
492 printk(KERN_EMERG "CR2: %016lx\n", address);
493 oops_end(flags, regs, sig);
494#endif
495}
496
497static void __bad_area_nosemaphore(struct pt_regs *regs,
498 unsigned long error_code, unsigned long address,
499 int si_code)
500{
501 struct task_struct *tsk = current;
502
503 /* User mode accesses just cause a SIGSEGV */
504 if (error_code & PF_USER) {
505 /*
506 * It's possible to have interrupts off here.
507 */
508 local_irq_enable();
509
510 /*
511 * Valid to do another page fault here because this one came
512 * from user space.
513 */
514 if (is_prefetch(regs, error_code, address))
515 return;
516
517 if (is_errata100(regs, address))
518 return;
519
520 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
521 printk_ratelimit()) {
522 printk(
523 "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
524 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
525 tsk->comm, task_pid_nr(tsk), address,
526 (void *) regs->ip, (void *) regs->sp, error_code);
527 print_vma_addr(" in ", regs->ip);
528 printk("\n");
529 }
530
531 tsk->thread.cr2 = address;
532 /* Kernel addresses are always protection faults */
533 tsk->thread.error_code = error_code | (address >= TASK_SIZE);
534 tsk->thread.trap_no = 14;
535 force_sig_info_fault(SIGSEGV, si_code, address, tsk);
536 return;
537 }
538
539 if (is_f00f_bug(regs, address))
540 return;
541
542 no_context(regs, error_code, address);
543}
544
545static noinline void bad_area_nosemaphore(struct pt_regs *regs,
546 unsigned long error_code, unsigned long address)
547{
548 __bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR);
549}
550
551static void __bad_area(struct pt_regs *regs,
552 unsigned long error_code, unsigned long address,
553 int si_code)
554{
555 struct mm_struct *mm = current->mm;
556
557 /*
558 * Something tried to access memory that isn't in our memory map..
559 * Fix it, but check if it's kernel or user first..
560 */
561 up_read(&mm->mmap_sem);
562
563 __bad_area_nosemaphore(regs, error_code, address, si_code);
564}
565
566static noinline void bad_area(struct pt_regs *regs,
567 unsigned long error_code, unsigned long address)
568{
569 __bad_area(regs, error_code, address, SEGV_MAPERR);
570}
571
572static noinline void bad_area_access_error(struct pt_regs *regs,
573 unsigned long error_code, unsigned long address)
574{
575 __bad_area(regs, error_code, address, SEGV_ACCERR);
576}
577
578/* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */
579static void out_of_memory(struct pt_regs *regs,
580 unsigned long error_code, unsigned long address)
581{
582 /*
583 * We ran out of memory, call the OOM killer, and return the userspace
584 * (which will retry the fault, or kill us if we got oom-killed).
585 */
586 up_read(&current->mm->mmap_sem);
587 pagefault_out_of_memory();
588}
589
590static void do_sigbus(struct pt_regs *regs,
591 unsigned long error_code, unsigned long address)
592{
593 struct task_struct *tsk = current;
594 struct mm_struct *mm = tsk->mm;
595
596 up_read(&mm->mmap_sem);
597
598 /* Kernel mode? Handle exceptions or die */
599 if (!(error_code & PF_USER))
600 no_context(regs, error_code, address);
601#ifdef CONFIG_X86_32
602 /* User space => ok to do another page fault */
603 if (is_prefetch(regs, error_code, address))
604 return;
605#endif
606 tsk->thread.cr2 = address;
607 tsk->thread.error_code = error_code;
608 tsk->thread.trap_no = 14;
609 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
610}
611
612static noinline void mm_fault_error(struct pt_regs *regs,
613 unsigned long error_code, unsigned long address, unsigned int fault)
614{
615 if (fault & VM_FAULT_OOM)
616 out_of_memory(regs, error_code, address);
617 else if (fault & VM_FAULT_SIGBUS)
618 do_sigbus(regs, error_code, address);
619 else
620 BUG();
621}
622
432static int spurious_fault_check(unsigned long error_code, pte_t *pte) 623static int spurious_fault_check(unsigned long error_code, pte_t *pte)
433{ 624{
434 if ((error_code & PF_WRITE) && !pte_write(*pte)) 625 if ((error_code & PF_WRITE) && !pte_write(*pte))
@@ -448,8 +639,8 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte)
448 * There are no security implications to leaving a stale TLB when 639 * There are no security implications to leaving a stale TLB when
449 * increasing the permissions on a page. 640 * increasing the permissions on a page.
450 */ 641 */
451static int spurious_fault(unsigned long address, 642static noinline int spurious_fault(unsigned long error_code,
452 unsigned long error_code) 643 unsigned long address)
453{ 644{
454 pgd_t *pgd; 645 pgd_t *pgd;
455 pud_t *pud; 646 pud_t *pud;
@@ -494,7 +685,7 @@ static int spurious_fault(unsigned long address,
494 * 685 *
495 * This assumes no large pages in there. 686 * This assumes no large pages in there.
496 */ 687 */
497static int vmalloc_fault(unsigned long address) 688static noinline int vmalloc_fault(unsigned long address)
498{ 689{
499#ifdef CONFIG_X86_32 690#ifdef CONFIG_X86_32
500 unsigned long pgd_paddr; 691 unsigned long pgd_paddr;
@@ -573,6 +764,25 @@ static int vmalloc_fault(unsigned long address)
573 764
574int show_unhandled_signals = 1; 765int show_unhandled_signals = 1;
575 766
767static inline int access_error(unsigned long error_code, int write,
768 struct vm_area_struct *vma)
769{
770 if (write) {
771 /* write, present and write, not present */
772 if (unlikely(!(vma->vm_flags & VM_WRITE)))
773 return 1;
774 } else if (unlikely(error_code & PF_PROT)) {
775 /* read, present */
776 return 1;
777 } else {
778 /* read, not present */
779 if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
780 return 1;
781 }
782
783 return 0;
784}
785
576/* 786/*
577 * This routine handles page faults. It determines the address, 787 * This routine handles page faults. It determines the address,
578 * and the problem, and then passes it off to one of the appropriate 788 * and the problem, and then passes it off to one of the appropriate
@@ -583,16 +793,12 @@ asmlinkage
583#endif 793#endif
584void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) 794void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
585{ 795{
796 unsigned long address;
586 struct task_struct *tsk; 797 struct task_struct *tsk;
587 struct mm_struct *mm; 798 struct mm_struct *mm;
588 struct vm_area_struct *vma; 799 struct vm_area_struct *vma;
589 unsigned long address; 800 int write;
590 int write, si_code;
591 int fault; 801 int fault;
592#ifdef CONFIG_X86_64
593 unsigned long flags;
594 int sig;
595#endif
596 802
597 tsk = current; 803 tsk = current;
598 mm = tsk->mm; 804 mm = tsk->mm;
@@ -601,9 +807,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
601 /* get the address */ 807 /* get the address */
602 address = read_cr2(); 808 address = read_cr2();
603 809
604 si_code = SEGV_MAPERR; 810 if (unlikely(notify_page_fault(regs)))
605
606 if (notify_page_fault(regs))
607 return; 811 return;
608 if (unlikely(kmmio_fault(regs, address))) 812 if (unlikely(kmmio_fault(regs, address)))
609 return; 813 return;
@@ -631,17 +835,17 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
631 return; 835 return;
632 836
633 /* Can handle a stale RO->RW TLB */ 837 /* Can handle a stale RO->RW TLB */
634 if (spurious_fault(address, error_code)) 838 if (spurious_fault(error_code, address))
635 return; 839 return;
636 840
637 /* 841 /*
638 * Don't take the mm semaphore here. If we fixup a prefetch 842 * Don't take the mm semaphore here. If we fixup a prefetch
639 * fault we could otherwise deadlock. 843 * fault we could otherwise deadlock.
640 */ 844 */
641 goto bad_area_nosemaphore; 845 bad_area_nosemaphore(regs, error_code, address);
846 return;
642 } 847 }
643 848
644
645 /* 849 /*
646 * It's safe to allow irq's after cr2 has been saved and the 850 * It's safe to allow irq's after cr2 has been saved and the
647 * vmalloc fault has been handled. 851 * vmalloc fault has been handled.
@@ -657,15 +861,17 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
657 861
658#ifdef CONFIG_X86_64 862#ifdef CONFIG_X86_64
659 if (unlikely(error_code & PF_RSVD)) 863 if (unlikely(error_code & PF_RSVD))
660 pgtable_bad(address, regs, error_code); 864 pgtable_bad(regs, error_code, address);
661#endif 865#endif
662 866
663 /* 867 /*
664 * If we're in an interrupt, have no user context or are running in an 868 * If we're in an interrupt, have no user context or are running in an
665 * atomic region then we must not take the fault. 869 * atomic region then we must not take the fault.
666 */ 870 */
667 if (unlikely(in_atomic() || !mm)) 871 if (unlikely(in_atomic() || !mm)) {
668 goto bad_area_nosemaphore; 872 bad_area_nosemaphore(regs, error_code, address);
873 return;
874 }
669 875
670 /* 876 /*
671 * When running in the kernel we expect faults to occur only to 877 * When running in the kernel we expect faults to occur only to
@@ -683,20 +889,26 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
683 * source. If this is invalid we can skip the address space check, 889 * source. If this is invalid we can skip the address space check,
684 * thus avoiding the deadlock. 890 * thus avoiding the deadlock.
685 */ 891 */
686 if (!down_read_trylock(&mm->mmap_sem)) { 892 if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
687 if ((error_code & PF_USER) == 0 && 893 if ((error_code & PF_USER) == 0 &&
688 !search_exception_tables(regs->ip)) 894 !search_exception_tables(regs->ip)) {
689 goto bad_area_nosemaphore; 895 bad_area_nosemaphore(regs, error_code, address);
896 return;
897 }
690 down_read(&mm->mmap_sem); 898 down_read(&mm->mmap_sem);
691 } 899 }
692 900
693 vma = find_vma(mm, address); 901 vma = find_vma(mm, address);
694 if (!vma) 902 if (unlikely(!vma)) {
695 goto bad_area; 903 bad_area(regs, error_code, address);
696 if (vma->vm_start <= address) 904 return;
905 }
906 if (likely(vma->vm_start <= address))
697 goto good_area; 907 goto good_area;
698 if (!(vma->vm_flags & VM_GROWSDOWN)) 908 if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
699 goto bad_area; 909 bad_area(regs, error_code, address);
910 return;
911 }
700 if (error_code & PF_USER) { 912 if (error_code & PF_USER) {
701 /* 913 /*
702 * Accessing the stack below %sp is always a bug. 914 * Accessing the stack below %sp is always a bug.
@@ -704,31 +916,25 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
704 * and pusha to work. ("enter $65535,$31" pushes 916 * and pusha to work. ("enter $65535,$31" pushes
705 * 32 pointers and then decrements %sp by 65535.) 917 * 32 pointers and then decrements %sp by 65535.)
706 */ 918 */
707 if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp) 919 if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) {
708 goto bad_area; 920 bad_area(regs, error_code, address);
921 return;
922 }
709 } 923 }
710 if (expand_stack(vma, address)) 924 if (unlikely(expand_stack(vma, address))) {
711 goto bad_area; 925 bad_area(regs, error_code, address);
712/* 926 return;
713 * Ok, we have a good vm_area for this memory access, so 927 }
714 * we can handle it.. 928
715 */ 929 /*
930 * Ok, we have a good vm_area for this memory access, so
931 * we can handle it..
932 */
716good_area: 933good_area:
717 si_code = SEGV_ACCERR; 934 write = error_code & PF_WRITE;
718 write = 0; 935 if (unlikely(access_error(error_code, write, vma))) {
719 switch (error_code & (PF_PROT|PF_WRITE)) { 936 bad_area_access_error(regs, error_code, address);
720 default: /* 3: write, present */ 937 return;
721 /* fall through */
722 case PF_WRITE: /* write, not present */
723 if (!(vma->vm_flags & VM_WRITE))
724 goto bad_area;
725 write++;
726 break;
727 case PF_PROT: /* read, present */
728 goto bad_area;
729 case 0: /* read, not present */
730 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
731 goto bad_area;
732 } 938 }
733 939
734 /* 940 /*
@@ -738,11 +944,8 @@ good_area:
738 */ 944 */
739 fault = handle_mm_fault(mm, vma, address, write); 945 fault = handle_mm_fault(mm, vma, address, write);
740 if (unlikely(fault & VM_FAULT_ERROR)) { 946 if (unlikely(fault & VM_FAULT_ERROR)) {
741 if (fault & VM_FAULT_OOM) 947 mm_fault_error(regs, error_code, address, fault);
742 goto out_of_memory; 948 return;
743 else if (fault & VM_FAULT_SIGBUS)
744 goto do_sigbus;
745 BUG();
746 } 949 }
747 if (fault & VM_FAULT_MAJOR) 950 if (fault & VM_FAULT_MAJOR)
748 tsk->maj_flt++; 951 tsk->maj_flt++;
@@ -760,128 +963,6 @@ good_area:
760 } 963 }
761#endif 964#endif
762 up_read(&mm->mmap_sem); 965 up_read(&mm->mmap_sem);
763 return;
764
765/*
766 * Something tried to access memory that isn't in our memory map..
767 * Fix it, but check if it's kernel or user first..
768 */
769bad_area:
770 up_read(&mm->mmap_sem);
771
772bad_area_nosemaphore:
773 /* User mode accesses just cause a SIGSEGV */
774 if (error_code & PF_USER) {
775 /*
776 * It's possible to have interrupts off here.
777 */
778 local_irq_enable();
779
780 /*
781 * Valid to do another page fault here because this one came
782 * from user space.
783 */
784 if (is_prefetch(regs, address, error_code))
785 return;
786
787 if (is_errata100(regs, address))
788 return;
789
790 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
791 printk_ratelimit()) {
792 printk(
793 "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
794 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
795 tsk->comm, task_pid_nr(tsk), address,
796 (void *) regs->ip, (void *) regs->sp, error_code);
797 print_vma_addr(" in ", regs->ip);
798 printk("\n");
799 }
800
801 tsk->thread.cr2 = address;
802 /* Kernel addresses are always protection faults */
803 tsk->thread.error_code = error_code | (address >= TASK_SIZE);
804 tsk->thread.trap_no = 14;
805 force_sig_info_fault(SIGSEGV, si_code, address, tsk);
806 return;
807 }
808
809 if (is_f00f_bug(regs, address))
810 return;
811
812no_context:
813 /* Are we prepared to handle this kernel fault? */
814 if (fixup_exception(regs))
815 return;
816
817 /*
818 * X86_32
819 * Valid to do another page fault here, because if this fault
820 * had been triggered by is_prefetch fixup_exception would have
821 * handled it.
822 *
823 * X86_64
824 * Hall of shame of CPU/BIOS bugs.
825 */
826 if (is_prefetch(regs, address, error_code))
827 return;
828
829 if (is_errata93(regs, address))
830 return;
831
832/*
833 * Oops. The kernel tried to access some bad page. We'll have to
834 * terminate things with extreme prejudice.
835 */
836#ifdef CONFIG_X86_32
837 bust_spinlocks(1);
838#else
839 flags = oops_begin();
840#endif
841
842 show_fault_oops(regs, error_code, address);
843
844 tsk->thread.cr2 = address;
845 tsk->thread.trap_no = 14;
846 tsk->thread.error_code = error_code;
847
848#ifdef CONFIG_X86_32
849 die("Oops", regs, error_code);
850 bust_spinlocks(0);
851 do_exit(SIGKILL);
852#else
853 sig = SIGKILL;
854 if (__die("Oops", regs, error_code))
855 sig = 0;
856 /* Executive summary in case the body of the oops scrolled away */
857 printk(KERN_EMERG "CR2: %016lx\n", address);
858 oops_end(flags, regs, sig);
859#endif
860
861out_of_memory:
862 /*
863 * We ran out of memory, call the OOM killer, and return the userspace
864 * (which will retry the fault, or kill us if we got oom-killed).
865 */
866 up_read(&mm->mmap_sem);
867 pagefault_out_of_memory();
868 return;
869
870do_sigbus:
871 up_read(&mm->mmap_sem);
872
873 /* Kernel mode? Handle exceptions or die */
874 if (!(error_code & PF_USER))
875 goto no_context;
876#ifdef CONFIG_X86_32
877 /* User space => ok to do another page fault */
878 if (is_prefetch(regs, address, error_code))
879 return;
880#endif
881 tsk->thread.cr2 = address;
882 tsk->thread.error_code = error_code;
883 tsk->thread.trap_no = 14;
884 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
885} 966}
886 967
887DEFINE_SPINLOCK(pgd_lock); 968DEFINE_SPINLOCK(pgd_lock);
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 09737c8af074..15df1baee100 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -21,6 +21,7 @@
21#include <asm/numa.h> 21#include <asm/numa.h>
22#include <asm/e820.h> 22#include <asm/e820.h>
23#include <asm/genapic.h> 23#include <asm/genapic.h>
24#include <asm/uv/uv.h>
24 25
25int acpi_numa __initdata; 26int acpi_numa __initdata;
26 27
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/mm/tlb.c
index e64a32c48825..72a6d4ebe34d 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/mm/tlb.c
@@ -1,22 +1,15 @@
1#include <linux/init.h> 1#include <linux/init.h>
2 2
3#include <linux/mm.h> 3#include <linux/mm.h>
4#include <linux/delay.h>
5#include <linux/spinlock.h> 4#include <linux/spinlock.h>
6#include <linux/smp.h> 5#include <linux/smp.h>
7#include <linux/kernel_stat.h>
8#include <linux/mc146818rtc.h>
9#include <linux/interrupt.h> 6#include <linux/interrupt.h>
7#include <linux/module.h>
10 8
11#include <asm/mtrr.h>
12#include <asm/pgalloc.h>
13#include <asm/tlbflush.h> 9#include <asm/tlbflush.h>
14#include <asm/mmu_context.h> 10#include <asm/mmu_context.h>
15#include <asm/proto.h> 11#include <asm/apic.h>
16#include <asm/apicdef.h> 12#include <asm/uv/uv.h>
17#include <asm/idle.h>
18#include <asm/uv/uv_hub.h>
19#include <asm/uv/uv_bau.h>
20 13
21DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) 14DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
22 = { &init_mm, 0, }; 15 = { &init_mm, 0, };
@@ -36,7 +29,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
36 * To avoid global state use 8 different call vectors. 29 * To avoid global state use 8 different call vectors.
37 * Each CPU uses a specific vector to trigger flushes on other 30 * Each CPU uses a specific vector to trigger flushes on other
38 * CPUs. Depending on the received vector the target CPUs look into 31 * CPUs. Depending on the received vector the target CPUs look into
39 * the right per cpu variable for the flush data. 32 * the right array slot for the flush data.
40 * 33 *
41 * With more than 8 CPUs they are hashed to the 8 available 34 * With more than 8 CPUs they are hashed to the 8 available
42 * vectors. The limited global vector space forces us to this right now. 35 * vectors. The limited global vector space forces us to this right now.
@@ -51,13 +44,13 @@ union smp_flush_state {
51 spinlock_t tlbstate_lock; 44 spinlock_t tlbstate_lock;
52 DECLARE_BITMAP(flush_cpumask, NR_CPUS); 45 DECLARE_BITMAP(flush_cpumask, NR_CPUS);
53 }; 46 };
54 char pad[SMP_CACHE_BYTES]; 47 char pad[CONFIG_X86_INTERNODE_CACHE_BYTES];
55} ____cacheline_aligned; 48} ____cacheline_internodealigned_in_smp;
56 49
57/* State is put into the per CPU data section, but padded 50/* State is put into the per CPU data section, but padded
58 to a full cache line because other CPUs can access it and we don't 51 to a full cache line because other CPUs can access it and we don't
59 want false sharing in the per cpu data segment. */ 52 want false sharing in the per cpu data segment. */
60static DEFINE_PER_CPU(union smp_flush_state, flush_state); 53static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS];
61 54
62/* 55/*
63 * We cannot call mmdrop() because we are in interrupt context, 56 * We cannot call mmdrop() because we are in interrupt context,
@@ -120,10 +113,20 @@ EXPORT_SYMBOL_GPL(leave_mm);
120 * Interrupts are disabled. 113 * Interrupts are disabled.
121 */ 114 */
122 115
123asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) 116/*
117 * FIXME: use of asmlinkage is not consistent. On x86_64 it's noop
118 * but still used for documentation purpose but the usage is slightly
119 * inconsistent. On x86_32, asmlinkage is regparm(0) but interrupt
120 * entry calls in with the first parameter in %eax. Maybe define
121 * intrlinkage?
122 */
123#ifdef CONFIG_X86_64
124asmlinkage
125#endif
126void smp_invalidate_interrupt(struct pt_regs *regs)
124{ 127{
125 int cpu; 128 unsigned int cpu;
126 int sender; 129 unsigned int sender;
127 union smp_flush_state *f; 130 union smp_flush_state *f;
128 131
129 cpu = smp_processor_id(); 132 cpu = smp_processor_id();
@@ -132,7 +135,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
132 * Use that to determine where the sender put the data. 135 * Use that to determine where the sender put the data.
133 */ 136 */
134 sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; 137 sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
135 f = &per_cpu(flush_state, sender); 138 f = &flush_state[sender];
136 139
137 if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask))) 140 if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask)))
138 goto out; 141 goto out;
@@ -156,19 +159,21 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
156 } 159 }
157out: 160out:
158 ack_APIC_irq(); 161 ack_APIC_irq();
162 smp_mb__before_clear_bit();
159 cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask)); 163 cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask));
164 smp_mb__after_clear_bit();
160 inc_irq_stat(irq_tlb_count); 165 inc_irq_stat(irq_tlb_count);
161} 166}
162 167
163static void flush_tlb_others_ipi(const struct cpumask *cpumask, 168static void flush_tlb_others_ipi(const struct cpumask *cpumask,
164 struct mm_struct *mm, unsigned long va) 169 struct mm_struct *mm, unsigned long va)
165{ 170{
166 int sender; 171 unsigned int sender;
167 union smp_flush_state *f; 172 union smp_flush_state *f;
168 173
169 /* Caller has disabled preemption */ 174 /* Caller has disabled preemption */
170 sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; 175 sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
171 f = &per_cpu(flush_state, sender); 176 f = &flush_state[sender];
172 177
173 /* 178 /*
174 * Could avoid this lock when 179 * Could avoid this lock when
@@ -206,16 +211,13 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
206 struct mm_struct *mm, unsigned long va) 211 struct mm_struct *mm, unsigned long va)
207{ 212{
208 if (is_uv_system()) { 213 if (is_uv_system()) {
209 /* FIXME: could be an percpu_alloc'd thing */ 214 unsigned int cpu;
210 static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
211 struct cpumask *after_uv_flush = &get_cpu_var(flush_tlb_mask);
212
213 cpumask_andnot(after_uv_flush, cpumask,
214 cpumask_of(smp_processor_id()));
215 if (!uv_flush_tlb_others(after_uv_flush, mm, va))
216 flush_tlb_others_ipi(after_uv_flush, mm, va);
217 215
218 put_cpu_var(flush_tlb_uv_cpumask); 216 cpu = get_cpu();
217 cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu);
218 if (cpumask)
219 flush_tlb_others_ipi(cpumask, mm, va);
220 put_cpu();
219 return; 221 return;
220 } 222 }
221 flush_tlb_others_ipi(cpumask, mm, va); 223 flush_tlb_others_ipi(cpumask, mm, va);
@@ -225,8 +227,8 @@ static int __cpuinit init_smp_flush(void)
225{ 227{
226 int i; 228 int i;
227 229
228 for_each_possible_cpu(i) 230 for (i = 0; i < ARRAY_SIZE(flush_state); i++)
229 spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); 231 spin_lock_init(&flush_state[i].tlbstate_lock);
230 232
231 return 0; 233 return 0;
232} 234}
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 75b94139e1f2..bef941f61451 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -1645,7 +1645,6 @@ asmlinkage void __init xen_start_kernel(void)
1645#ifdef CONFIG_X86_64 1645#ifdef CONFIG_X86_64
1646 /* Disable until direct per-cpu data access. */ 1646 /* Disable until direct per-cpu data access. */
1647 have_vcpu_info_placement = 0; 1647 have_vcpu_info_placement = 0;
1648 pda_init(0);
1649#endif 1648#endif
1650 1649
1651 xen_smp_init(); 1650 xen_smp_init();
diff --git a/drivers/misc/Kconfig b/drivers/misc/Kconfig
index 419c378bd24b..abcb8459254e 100644
--- a/drivers/misc/Kconfig
+++ b/drivers/misc/Kconfig
@@ -170,7 +170,7 @@ config ENCLOSURE_SERVICES
170config SGI_XP 170config SGI_XP
171 tristate "Support communication between SGI SSIs" 171 tristate "Support communication between SGI SSIs"
172 depends on NET 172 depends on NET
173 depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_64) && SMP 173 depends on (IA64_GENERIC || IA64_SGI_SN2 || IA64_SGI_UV || X86_UV) && SMP
174 select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2 174 select IA64_UNCACHED_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
175 select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2 175 select GENERIC_ALLOCATOR if IA64_GENERIC || IA64_SGI_SN2
176 select SGI_GRU if (IA64_GENERIC || IA64_SGI_UV || X86_64) && SMP 176 select SGI_GRU if (IA64_GENERIC || IA64_SGI_UV || X86_64) && SMP
@@ -197,7 +197,7 @@ config HP_ILO
197 197
198config SGI_GRU 198config SGI_GRU
199 tristate "SGI GRU driver" 199 tristate "SGI GRU driver"
200 depends on (X86_64 || IA64_SGI_UV || IA64_GENERIC) && SMP 200 depends on (X86_UV || IA64_SGI_UV || IA64_GENERIC) && SMP
201 default n 201 default n
202 select MMU_NOTIFIER 202 select MMU_NOTIFIER
203 ---help--- 203 ---help---
diff --git a/drivers/misc/sgi-gru/gru.h b/drivers/misc/sgi-gru/gru.h
index f93f03a9e6e9..1b5f579df15f 100644
--- a/drivers/misc/sgi-gru/gru.h
+++ b/drivers/misc/sgi-gru/gru.h
@@ -19,6 +19,8 @@
19#ifndef __GRU_H__ 19#ifndef __GRU_H__
20#define __GRU_H__ 20#define __GRU_H__
21 21
22#include <asm/uv/uv.h>
23
22/* 24/*
23 * GRU architectural definitions 25 * GRU architectural definitions
24 */ 26 */
diff --git a/drivers/misc/sgi-xp/xp.h b/drivers/misc/sgi-xp/xp.h
index 7b4cbd5e03e9..069ad3a1c2ac 100644
--- a/drivers/misc/sgi-xp/xp.h
+++ b/drivers/misc/sgi-xp/xp.h
@@ -15,6 +15,8 @@
15 15
16#include <linux/mutex.h> 16#include <linux/mutex.h>
17 17
18#include <asm/uv/uv.h>
19
18#ifdef CONFIG_IA64 20#ifdef CONFIG_IA64
19#include <asm/system.h> 21#include <asm/system.h>
20#include <asm/sn/arch.h> /* defines is_shub1() and is_shub2() */ 22#include <asm/sn/arch.h> /* defines is_shub1() and is_shub2() */
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index aa6b9b1b30b5..53e21f36a802 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -430,22 +430,10 @@
430 *(.initcall7.init) \ 430 *(.initcall7.init) \
431 *(.initcall7s.init) 431 *(.initcall7s.init)
432 432
433#define PERCPU_PROLOG(vaddr) \
434 VMLINUX_SYMBOL(__per_cpu_load) = .; \
435 .data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \
436 - LOAD_OFFSET) { \
437 VMLINUX_SYMBOL(__per_cpu_start) = .;
438
439#define PERCPU_EPILOG(phdr) \
440 VMLINUX_SYMBOL(__per_cpu_end) = .; \
441 } phdr \
442 . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu);
443
444/** 433/**
445 * PERCPU_VADDR_PREALLOC - define output section for percpu area with prealloc 434 * PERCPU_VADDR - define output section for percpu area
446 * @vaddr: explicit base address (optional) 435 * @vaddr: explicit base address (optional)
447 * @phdr: destination PHDR (optional) 436 * @phdr: destination PHDR (optional)
448 * @prealloc: the size of prealloc area
449 * 437 *
450 * Macro which expands to output section for percpu area. If @vaddr 438 * Macro which expands to output section for percpu area. If @vaddr
451 * is not blank, it specifies explicit base address and all percpu 439 * is not blank, it specifies explicit base address and all percpu
@@ -457,39 +445,23 @@
457 * section in the linker script will go there too. @phdr should have 445 * section in the linker script will go there too. @phdr should have
458 * a leading colon. 446 * a leading colon.
459 * 447 *
460 * If @prealloc is non-zero, the specified number of bytes will be
461 * reserved at the start of percpu area. As the prealloc area is
462 * likely to break alignment, this macro puts areas in increasing
463 * alignment order.
464 *
465 * This macro defines three symbols, __per_cpu_load, __per_cpu_start 448 * This macro defines three symbols, __per_cpu_load, __per_cpu_start
466 * and __per_cpu_end. The first one is the vaddr of loaded percpu 449 * and __per_cpu_end. The first one is the vaddr of loaded percpu
467 * init data. __per_cpu_start equals @vaddr and __per_cpu_end is the 450 * init data. __per_cpu_start equals @vaddr and __per_cpu_end is the
468 * end offset. 451 * end offset.
469 */ 452 */
470#define PERCPU_VADDR_PREALLOC(vaddr, segment, prealloc) \
471 PERCPU_PROLOG(vaddr) \
472 . += prealloc; \
473 *(.data.percpu) \
474 *(.data.percpu.shared_aligned) \
475 *(.data.percpu.page_aligned) \
476 PERCPU_EPILOG(segment)
477
478/**
479 * PERCPU_VADDR - define output section for percpu area
480 * @vaddr: explicit base address (optional)
481 * @phdr: destination PHDR (optional)
482 *
483 * Macro which expands to output section for percpu area. Mostly
484 * identical to PERCPU_VADDR_PREALLOC(@vaddr, @phdr, 0) other than
485 * using slighly different layout.
486 */
487#define PERCPU_VADDR(vaddr, phdr) \ 453#define PERCPU_VADDR(vaddr, phdr) \
488 PERCPU_PROLOG(vaddr) \ 454 VMLINUX_SYMBOL(__per_cpu_load) = .; \
455 .data.percpu vaddr : AT(VMLINUX_SYMBOL(__per_cpu_load) \
456 - LOAD_OFFSET) { \
457 VMLINUX_SYMBOL(__per_cpu_start) = .; \
458 *(.data.percpu.first) \
489 *(.data.percpu.page_aligned) \ 459 *(.data.percpu.page_aligned) \
490 *(.data.percpu) \ 460 *(.data.percpu) \
491 *(.data.percpu.shared_aligned) \ 461 *(.data.percpu.shared_aligned) \
492 PERCPU_EPILOG(phdr) 462 VMLINUX_SYMBOL(__per_cpu_end) = .; \
463 } phdr \
464 . = VMLINUX_SYMBOL(__per_cpu_load) + SIZEOF(.data.percpu);
493 465
494/** 466/**
495 * PERCPU - define output section for percpu area, simple version 467 * PERCPU - define output section for percpu area, simple version
diff --git a/include/linux/magic.h b/include/linux/magic.h
index 0b4df7eba852..5b4e28bcb788 100644
--- a/include/linux/magic.h
+++ b/include/linux/magic.h
@@ -49,4 +49,5 @@
49#define FUTEXFS_SUPER_MAGIC 0xBAD1DEA 49#define FUTEXFS_SUPER_MAGIC 0xBAD1DEA
50#define INOTIFYFS_SUPER_MAGIC 0x2BAD1DEA 50#define INOTIFYFS_SUPER_MAGIC 0x2BAD1DEA
51 51
52#define STACK_END_MAGIC 0x57AC6E9D
52#endif /* __LINUX_MAGIC_H__ */ 53#endif /* __LINUX_MAGIC_H__ */
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 9f2a3751873a..0e24202b5a4e 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -9,34 +9,39 @@
9#include <asm/percpu.h> 9#include <asm/percpu.h>
10 10
11#ifdef CONFIG_SMP 11#ifdef CONFIG_SMP
12#define DEFINE_PER_CPU(type, name) \ 12#define PER_CPU_BASE_SECTION ".data.percpu"
13 __attribute__((__section__(".data.percpu"))) \
14 PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
15 13
16#ifdef MODULE 14#ifdef MODULE
17#define SHARED_ALIGNED_SECTION ".data.percpu" 15#define PER_CPU_SHARED_ALIGNED_SECTION ""
18#else 16#else
19#define SHARED_ALIGNED_SECTION ".data.percpu.shared_aligned" 17#define PER_CPU_SHARED_ALIGNED_SECTION ".shared_aligned"
20#endif 18#endif
19#define PER_CPU_FIRST_SECTION ".first"
21 20
22#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ 21#else
23 __attribute__((__section__(SHARED_ALIGNED_SECTION))) \ 22
24 PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \ 23#define PER_CPU_BASE_SECTION ".data"
25 ____cacheline_aligned_in_smp 24#define PER_CPU_SHARED_ALIGNED_SECTION ""
25#define PER_CPU_FIRST_SECTION ""
26
27#endif
26 28
27#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ 29#define DEFINE_PER_CPU_SECTION(type, name, section) \
28 __attribute__((__section__(".data.percpu.page_aligned"))) \ 30 __attribute__((__section__(PER_CPU_BASE_SECTION section))) \
29 PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name 31 PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
30#else 32
31#define DEFINE_PER_CPU(type, name) \ 33#define DEFINE_PER_CPU(type, name) \
32 PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name 34 DEFINE_PER_CPU_SECTION(type, name, "")
33 35
34#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ 36#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
35 DEFINE_PER_CPU(type, name) 37 DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \
38 ____cacheline_aligned_in_smp
36 39
37#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \ 40#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \
38 DEFINE_PER_CPU(type, name) 41 DEFINE_PER_CPU_SECTION(type, name, ".page_aligned")
39#endif 42
43#define DEFINE_PER_CPU_FIRST(type, name) \
44 DEFINE_PER_CPU_SECTION(type, name, PER_CPU_FIRST_SECTION)
40 45
41#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) 46#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
42#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var) 47#define EXPORT_PER_CPU_SYMBOL_GPL(var) EXPORT_SYMBOL_GPL(per_cpu__##var)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index f134a0f7080a..b85b10abf770 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1159,10 +1159,9 @@ struct task_struct {
1159 pid_t pid; 1159 pid_t pid;
1160 pid_t tgid; 1160 pid_t tgid;
1161 1161
1162#ifdef CONFIG_CC_STACKPROTECTOR
1163 /* Canary value for the -fstack-protector gcc feature */ 1162 /* Canary value for the -fstack-protector gcc feature */
1164 unsigned long stack_canary; 1163 unsigned long stack_canary;
1165#endif 1164
1166 /* 1165 /*
1167 * pointers to (original) parent process, youngest child, younger sibling, 1166 * pointers to (original) parent process, youngest child, younger sibling,
1168 * older sibling, respectively. (p->father can be replaced with 1167 * older sibling, respectively. (p->father can be replaced with
@@ -2069,6 +2068,19 @@ static inline int object_is_on_stack(void *obj)
2069 2068
2070extern void thread_info_cache_init(void); 2069extern void thread_info_cache_init(void);
2071 2070
2071#ifdef CONFIG_DEBUG_STACK_USAGE
2072static inline unsigned long stack_not_used(struct task_struct *p)
2073{
2074 unsigned long *n = end_of_stack(p);
2075
2076 do { /* Skip over canary */
2077 n++;
2078 } while (!*n);
2079
2080 return (unsigned long)n - (unsigned long)end_of_stack(p);
2081}
2082#endif
2083
2072/* set thread flags in other task's structures 2084/* set thread flags in other task's structures
2073 * - see asm/thread_info.h for TIF_xxxx flags available 2085 * - see asm/thread_info.h for TIF_xxxx flags available
2074 */ 2086 */
diff --git a/include/linux/stackprotector.h b/include/linux/stackprotector.h
new file mode 100644
index 000000000000..6f3e54c704c0
--- /dev/null
+++ b/include/linux/stackprotector.h
@@ -0,0 +1,16 @@
1#ifndef _LINUX_STACKPROTECTOR_H
2#define _LINUX_STACKPROTECTOR_H 1
3
4#include <linux/compiler.h>
5#include <linux/sched.h>
6#include <linux/random.h>
7
8#ifdef CONFIG_CC_STACKPROTECTOR
9# include <asm/stackprotector.h>
10#else
11static inline void boot_init_stack_canary(void)
12{
13}
14#endif
15
16#endif
diff --git a/init/main.c b/init/main.c
index 844209453c02..bfe4fb0c9842 100644
--- a/init/main.c
+++ b/init/main.c
@@ -14,6 +14,7 @@
14#include <linux/proc_fs.h> 14#include <linux/proc_fs.h>
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/syscalls.h> 16#include <linux/syscalls.h>
17#include <linux/stackprotector.h>
17#include <linux/string.h> 18#include <linux/string.h>
18#include <linux/ctype.h> 19#include <linux/ctype.h>
19#include <linux/delay.h> 20#include <linux/delay.h>
@@ -539,6 +540,12 @@ asmlinkage void __init start_kernel(void)
539 */ 540 */
540 lockdep_init(); 541 lockdep_init();
541 debug_objects_early_init(); 542 debug_objects_early_init();
543
544 /*
545 * Set up the the initial canary ASAP:
546 */
547 boot_init_stack_canary();
548
542 cgroup_init_early(); 549 cgroup_init_early();
543 550
544 local_irq_disable(); 551 local_irq_disable();
diff --git a/kernel/exit.c b/kernel/exit.c
index 29f4b790751c..a1b18c03b4c0 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -980,12 +980,9 @@ static void check_stack_usage(void)
980{ 980{
981 static DEFINE_SPINLOCK(low_water_lock); 981 static DEFINE_SPINLOCK(low_water_lock);
982 static int lowest_to_date = THREAD_SIZE; 982 static int lowest_to_date = THREAD_SIZE;
983 unsigned long *n = end_of_stack(current);
984 unsigned long free; 983 unsigned long free;
985 984
986 while (*n == 0) 985 free = stack_not_used(current);
987 n++;
988 free = (unsigned long)n - (unsigned long)end_of_stack(current);
989 986
990 if (free >= lowest_to_date) 987 if (free >= lowest_to_date)
991 return; 988 return;
diff --git a/kernel/fork.c b/kernel/fork.c
index 70ca1852a0e5..0181b7b2281a 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -61,6 +61,7 @@
61#include <linux/proc_fs.h> 61#include <linux/proc_fs.h>
62#include <linux/blkdev.h> 62#include <linux/blkdev.h>
63#include <trace/sched.h> 63#include <trace/sched.h>
64#include <linux/magic.h>
64 65
65#include <asm/pgtable.h> 66#include <asm/pgtable.h>
66#include <asm/pgalloc.h> 67#include <asm/pgalloc.h>
@@ -212,6 +213,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
212{ 213{
213 struct task_struct *tsk; 214 struct task_struct *tsk;
214 struct thread_info *ti; 215 struct thread_info *ti;
216 unsigned long *stackend;
217
215 int err; 218 int err;
216 219
217 prepare_to_copy(orig); 220 prepare_to_copy(orig);
@@ -237,6 +240,8 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
237 goto out; 240 goto out;
238 241
239 setup_thread_stack(tsk, orig); 242 setup_thread_stack(tsk, orig);
243 stackend = end_of_stack(tsk);
244 *stackend = STACK_END_MAGIC; /* for overflow detection */
240 245
241#ifdef CONFIG_CC_STACKPROTECTOR 246#ifdef CONFIG_CC_STACKPROTECTOR
242 tsk->stack_canary = get_random_int(); 247 tsk->stack_canary = get_random_int();
diff --git a/kernel/panic.c b/kernel/panic.c
index 2a2ff36ff44d..33cab3de1763 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -74,6 +74,9 @@ NORET_TYPE void panic(const char * fmt, ...)
74 vsnprintf(buf, sizeof(buf), fmt, args); 74 vsnprintf(buf, sizeof(buf), fmt, args);
75 va_end(args); 75 va_end(args);
76 printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf); 76 printk(KERN_EMERG "Kernel panic - not syncing: %s\n",buf);
77#ifdef CONFIG_DEBUG_BUGVERBOSE
78 dump_stack();
79#endif
77 bust_spinlocks(0); 80 bust_spinlocks(0);
78 81
79 /* 82 /*
@@ -355,15 +358,22 @@ EXPORT_SYMBOL(warn_slowpath);
355#endif 358#endif
356 359
357#ifdef CONFIG_CC_STACKPROTECTOR 360#ifdef CONFIG_CC_STACKPROTECTOR
361
362#ifndef GCC_HAS_SP
363#warning You have selected the CONFIG_CC_STACKPROTECTOR option, but the gcc used does not support this.
364#endif
365
358/* 366/*
359 * Called when gcc's -fstack-protector feature is used, and 367 * Called when gcc's -fstack-protector feature is used, and
360 * gcc detects corruption of the on-stack canary value 368 * gcc detects corruption of the on-stack canary value
361 */ 369 */
362void __stack_chk_fail(void) 370void __stack_chk_fail(void)
363{ 371{
364 panic("stack-protector: Kernel stack is corrupted"); 372 panic("stack-protector: Kernel stack is corrupted in: %p\n",
373 __builtin_return_address(0));
365} 374}
366EXPORT_SYMBOL(__stack_chk_fail); 375EXPORT_SYMBOL(__stack_chk_fail);
376
367#endif 377#endif
368 378
369core_param(panic, panic_timeout, int, 0644); 379core_param(panic, panic_timeout, int, 0644);
diff --git a/kernel/sched.c b/kernel/sched.c
index 40d70d9c0af3..8db1a4cf2082 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -6009,12 +6009,7 @@ void sched_show_task(struct task_struct *p)
6009 printk(KERN_CONT " %016lx ", thread_saved_pc(p)); 6009 printk(KERN_CONT " %016lx ", thread_saved_pc(p));
6010#endif 6010#endif
6011#ifdef CONFIG_DEBUG_STACK_USAGE 6011#ifdef CONFIG_DEBUG_STACK_USAGE
6012 { 6012 free = stack_not_used(p);
6013 unsigned long *n = end_of_stack(p);
6014 while (!*n)
6015 n++;
6016 free = (unsigned long)n - (unsigned long)end_of_stack(p);
6017 }
6018#endif 6013#endif
6019 printk(KERN_CONT "%5lu %5d %6d\n", free, 6014 printk(KERN_CONT "%5lu %5d %6d\n", free,
6020 task_pid_nr(p), task_pid_nr(p->real_parent)); 6015 task_pid_nr(p), task_pid_nr(p->real_parent));
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 2f445833ae37..1f0c509b40d3 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -971,6 +971,8 @@ undo:
971} 971}
972 972
973#ifdef CONFIG_SMP 973#ifdef CONFIG_SMP
974static struct workqueue_struct *work_on_cpu_wq __read_mostly;
975
974struct work_for_cpu { 976struct work_for_cpu {
975 struct work_struct work; 977 struct work_struct work;
976 long (*fn)(void *); 978 long (*fn)(void *);
@@ -991,8 +993,8 @@ static void do_work_for_cpu(struct work_struct *w)
991 * @fn: the function to run 993 * @fn: the function to run
992 * @arg: the function arg 994 * @arg: the function arg
993 * 995 *
994 * This will return -EINVAL in the cpu is not online, or the return value 996 * This will return the value @fn returns.
995 * of @fn otherwise. 997 * It is up to the caller to ensure that the cpu doesn't go offline.
996 */ 998 */
997long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg) 999long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
998{ 1000{
@@ -1001,14 +1003,8 @@ long work_on_cpu(unsigned int cpu, long (*fn)(void *), void *arg)
1001 INIT_WORK(&wfc.work, do_work_for_cpu); 1003 INIT_WORK(&wfc.work, do_work_for_cpu);
1002 wfc.fn = fn; 1004 wfc.fn = fn;
1003 wfc.arg = arg; 1005 wfc.arg = arg;
1004 get_online_cpus(); 1006 queue_work_on(cpu, work_on_cpu_wq, &wfc.work);
1005 if (unlikely(!cpu_online(cpu))) 1007 flush_work(&wfc.work);
1006 wfc.ret = -EINVAL;
1007 else {
1008 schedule_work_on(cpu, &wfc.work);
1009 flush_work(&wfc.work);
1010 }
1011 put_online_cpus();
1012 1008
1013 return wfc.ret; 1009 return wfc.ret;
1014} 1010}
@@ -1025,4 +1021,8 @@ void __init init_workqueues(void)
1025 hotcpu_notifier(workqueue_cpu_callback, 0); 1021 hotcpu_notifier(workqueue_cpu_callback, 0);
1026 keventd_wq = create_workqueue("events"); 1022 keventd_wq = create_workqueue("events");
1027 BUG_ON(!keventd_wq); 1023 BUG_ON(!keventd_wq);
1024#ifdef CONFIG_SMP
1025 work_on_cpu_wq = create_workqueue("work_on_cpu");
1026 BUG_ON(!work_on_cpu_wq);
1027#endif
1028} 1028}