aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-01-21 04:04:52 -0500
committerIngo Molnar <mingo@elte.hu>2009-01-21 04:04:52 -0500
commitae2b56b92bd33b416528986528e0732c98c7a285 (patch)
tree736d0ace296c9682eb99d841ba41c6a082f3124d
parent8f5d36ed5bb6e33024619eaee15b7ce2e3d115b3 (diff)
parent16c2d3f895a3bc8d8e4c76c2646a6b750c181299 (diff)
Merge branch 'tj-percpu' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/misc into core/percpu
-rw-r--r--arch/x86/include/asm/genapic_32.h7
-rw-r--r--arch/x86/include/asm/genapic_64.h6
-rw-r--r--arch/x86/include/asm/irq_regs.h36
-rw-r--r--arch/x86/include/asm/irq_regs_32.h31
-rw-r--r--arch/x86/include/asm/irq_regs_64.h1
-rw-r--r--arch/x86/include/asm/irq_vectors.h36
-rw-r--r--arch/x86/include/asm/mach-default/entry_arch.h18
-rw-r--r--arch/x86/include/asm/mmu_context.h63
-rw-r--r--arch/x86/include/asm/mmu_context_32.h55
-rw-r--r--arch/x86/include/asm/mmu_context_64.h52
-rw-r--r--arch/x86/include/asm/percpu.h2
-rw-r--r--arch/x86/include/asm/system.h15
-rw-r--r--arch/x86/include/asm/uv/uv.h33
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h2
-rw-r--r--arch/x86/kernel/Makefile2
-rw-r--r--arch/x86/kernel/cpu/common.c23
-rw-r--r--arch/x86/kernel/entry_32.S6
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c1
-rw-r--r--arch/x86/kernel/head_32.S6
-rw-r--r--arch/x86/kernel/irq_64.c3
-rw-r--r--arch/x86/kernel/irqinit_32.c11
-rw-r--r--arch/x86/kernel/smpboot.c1
-rw-r--r--arch/x86/kernel/tlb.c (renamed from arch/x86/kernel/tlb_64.c)48
-rw-r--r--arch/x86/kernel/tlb_32.c239
-rw-r--r--arch/x86/kernel/tlb_uv.c68
25 files changed, 268 insertions, 497 deletions
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index 2c05b737ee22..4334502d3664 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -138,11 +138,4 @@ struct genapic {
138extern struct genapic *genapic; 138extern struct genapic *genapic;
139extern void es7000_update_genapic_to_cluster(void); 139extern void es7000_update_genapic_to_cluster(void);
140 140
141enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
142#define get_uv_system_type() UV_NONE
143#define is_uv_system() 0
144#define uv_wakeup_secondary(a, b) 1
145#define uv_system_init() do {} while (0)
146
147
148#endif /* _ASM_X86_GENAPIC_32_H */ 141#endif /* _ASM_X86_GENAPIC_32_H */
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h
index adf32fb56aa6..7bb092c59055 100644
--- a/arch/x86/include/asm/genapic_64.h
+++ b/arch/x86/include/asm/genapic_64.h
@@ -51,15 +51,9 @@ extern struct genapic apic_x2apic_phys;
51extern int acpi_madt_oem_check(char *, char *); 51extern int acpi_madt_oem_check(char *, char *);
52 52
53extern void apic_send_IPI_self(int vector); 53extern void apic_send_IPI_self(int vector);
54enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
55extern enum uv_system_type get_uv_system_type(void);
56extern int is_uv_system(void);
57 54
58extern struct genapic apic_x2apic_uv_x; 55extern struct genapic apic_x2apic_uv_x;
59DECLARE_PER_CPU(int, x2apic_extra_bits); 56DECLARE_PER_CPU(int, x2apic_extra_bits);
60extern void uv_cpu_init(void);
61extern void uv_system_init(void);
62extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
63 57
64extern void setup_apic_routing(void); 58extern void setup_apic_routing(void);
65 59
diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h
index 89c898ab298b..77843225b7ea 100644
--- a/arch/x86/include/asm/irq_regs.h
+++ b/arch/x86/include/asm/irq_regs.h
@@ -1,5 +1,31 @@
1#ifdef CONFIG_X86_32 1/*
2# include "irq_regs_32.h" 2 * Per-cpu current frame pointer - the location of the last exception frame on
3#else 3 * the stack, stored in the per-cpu area.
4# include "irq_regs_64.h" 4 *
5#endif 5 * Jeremy Fitzhardinge <jeremy@goop.org>
6 */
7#ifndef _ASM_X86_IRQ_REGS_H
8#define _ASM_X86_IRQ_REGS_H
9
10#include <asm/percpu.h>
11
12#define ARCH_HAS_OWN_IRQ_REGS
13
14DECLARE_PER_CPU(struct pt_regs *, irq_regs);
15
16static inline struct pt_regs *get_irq_regs(void)
17{
18 return percpu_read(irq_regs);
19}
20
21static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
22{
23 struct pt_regs *old_regs;
24
25 old_regs = get_irq_regs();
26 percpu_write(irq_regs, new_regs);
27
28 return old_regs;
29}
30
31#endif /* _ASM_X86_IRQ_REGS_32_H */
diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h
deleted file mode 100644
index d7ed33ee94e9..000000000000
--- a/arch/x86/include/asm/irq_regs_32.h
+++ /dev/null
@@ -1,31 +0,0 @@
1/*
2 * Per-cpu current frame pointer - the location of the last exception frame on
3 * the stack, stored in the per-cpu area.
4 *
5 * Jeremy Fitzhardinge <jeremy@goop.org>
6 */
7#ifndef _ASM_X86_IRQ_REGS_32_H
8#define _ASM_X86_IRQ_REGS_32_H
9
10#include <asm/percpu.h>
11
12#define ARCH_HAS_OWN_IRQ_REGS
13
14DECLARE_PER_CPU(struct pt_regs *, irq_regs);
15
16static inline struct pt_regs *get_irq_regs(void)
17{
18 return percpu_read(irq_regs);
19}
20
21static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
22{
23 struct pt_regs *old_regs;
24
25 old_regs = get_irq_regs();
26 percpu_write(irq_regs, new_regs);
27
28 return old_regs;
29}
30
31#endif /* _ASM_X86_IRQ_REGS_32_H */
diff --git a/arch/x86/include/asm/irq_regs_64.h b/arch/x86/include/asm/irq_regs_64.h
deleted file mode 100644
index 3dd9c0b70270..000000000000
--- a/arch/x86/include/asm/irq_regs_64.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/irq_regs.h>
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index a16a2ab2b429..9a83a10a5d51 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -49,31 +49,33 @@
49 * some of the following vectors are 'rare', they are merged 49 * some of the following vectors are 'rare', they are merged
50 * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. 50 * into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
51 * TLB, reschedule and local APIC vectors are performance-critical. 51 * TLB, reschedule and local APIC vectors are performance-critical.
52 *
53 * Vectors 0xf0-0xfa are free (reserved for future Linux use).
54 */ 52 */
55#ifdef CONFIG_X86_32 53#ifdef CONFIG_X86_32
56 54
57# define SPURIOUS_APIC_VECTOR 0xff 55# define SPURIOUS_APIC_VECTOR 0xff
58# define ERROR_APIC_VECTOR 0xfe 56# define ERROR_APIC_VECTOR 0xfe
59# define INVALIDATE_TLB_VECTOR 0xfd 57# define RESCHEDULE_VECTOR 0xfd
60# define RESCHEDULE_VECTOR 0xfc 58# define CALL_FUNCTION_VECTOR 0xfc
61# define CALL_FUNCTION_VECTOR 0xfb 59# define CALL_FUNCTION_SINGLE_VECTOR 0xfb
62# define CALL_FUNCTION_SINGLE_VECTOR 0xfa 60# define THERMAL_APIC_VECTOR 0xfa
63# define THERMAL_APIC_VECTOR 0xf0 61/* 0xf8 - 0xf9 : free */
62# define INVALIDATE_TLB_VECTOR_END 0xf7
63# define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */
64
65# define NUM_INVALIDATE_TLB_VECTORS 8
64 66
65#else 67#else
66 68
67#define SPURIOUS_APIC_VECTOR 0xff 69# define SPURIOUS_APIC_VECTOR 0xff
68#define ERROR_APIC_VECTOR 0xfe 70# define ERROR_APIC_VECTOR 0xfe
69#define RESCHEDULE_VECTOR 0xfd 71# define RESCHEDULE_VECTOR 0xfd
70#define CALL_FUNCTION_VECTOR 0xfc 72# define CALL_FUNCTION_VECTOR 0xfc
71#define CALL_FUNCTION_SINGLE_VECTOR 0xfb 73# define CALL_FUNCTION_SINGLE_VECTOR 0xfb
72#define THERMAL_APIC_VECTOR 0xfa 74# define THERMAL_APIC_VECTOR 0xfa
73#define THRESHOLD_APIC_VECTOR 0xf9 75# define THRESHOLD_APIC_VECTOR 0xf9
74#define UV_BAU_MESSAGE 0xf8 76# define UV_BAU_MESSAGE 0xf8
75#define INVALIDATE_TLB_VECTOR_END 0xf7 77# define INVALIDATE_TLB_VECTOR_END 0xf7
76#define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ 78# define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */
77 79
78#define NUM_INVALIDATE_TLB_VECTORS 8 80#define NUM_INVALIDATE_TLB_VECTORS 8
79 81
diff --git a/arch/x86/include/asm/mach-default/entry_arch.h b/arch/x86/include/asm/mach-default/entry_arch.h
index 6b1add8e31dd..6fa399ad1de2 100644
--- a/arch/x86/include/asm/mach-default/entry_arch.h
+++ b/arch/x86/include/asm/mach-default/entry_arch.h
@@ -11,10 +11,26 @@
11 */ 11 */
12#ifdef CONFIG_X86_SMP 12#ifdef CONFIG_X86_SMP
13BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) 13BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
14BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
15BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) 14BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
16BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) 15BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
17BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) 16BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
17
18BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0,
19 smp_invalidate_interrupt)
20BUILD_INTERRUPT3(invalidate_interrupt1,INVALIDATE_TLB_VECTOR_START+1,
21 smp_invalidate_interrupt)
22BUILD_INTERRUPT3(invalidate_interrupt2,INVALIDATE_TLB_VECTOR_START+2,
23 smp_invalidate_interrupt)
24BUILD_INTERRUPT3(invalidate_interrupt3,INVALIDATE_TLB_VECTOR_START+3,
25 smp_invalidate_interrupt)
26BUILD_INTERRUPT3(invalidate_interrupt4,INVALIDATE_TLB_VECTOR_START+4,
27 smp_invalidate_interrupt)
28BUILD_INTERRUPT3(invalidate_interrupt5,INVALIDATE_TLB_VECTOR_START+5,
29 smp_invalidate_interrupt)
30BUILD_INTERRUPT3(invalidate_interrupt6,INVALIDATE_TLB_VECTOR_START+6,
31 smp_invalidate_interrupt)
32BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7,
33 smp_invalidate_interrupt)
18#endif 34#endif
19 35
20/* 36/*
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 8aeeb3fd73db..52948df9cd1d 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -21,11 +21,54 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
21int init_new_context(struct task_struct *tsk, struct mm_struct *mm); 21int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
22void destroy_context(struct mm_struct *mm); 22void destroy_context(struct mm_struct *mm);
23 23
24#ifdef CONFIG_X86_32 24
25# include "mmu_context_32.h" 25static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
26#else 26{
27# include "mmu_context_64.h" 27#ifdef CONFIG_SMP
28 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
29 percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
30#endif
31}
32
33static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
34 struct task_struct *tsk)
35{
36 unsigned cpu = smp_processor_id();
37
38 if (likely(prev != next)) {
39 /* stop flush ipis for the previous mm */
40 cpu_clear(cpu, prev->cpu_vm_mask);
41#ifdef CONFIG_SMP
42 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
43 percpu_write(cpu_tlbstate.active_mm, next);
28#endif 44#endif
45 cpu_set(cpu, next->cpu_vm_mask);
46
47 /* Re-load page tables */
48 load_cr3(next->pgd);
49
50 /*
51 * load the LDT, if the LDT is different:
52 */
53 if (unlikely(prev->context.ldt != next->context.ldt))
54 load_LDT_nolock(&next->context);
55 }
56#ifdef CONFIG_SMP
57 else {
58 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
59 BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
60
61 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
62 /* We were in lazy tlb mode and leave_mm disabled
63 * tlb flush IPI delivery. We must reload CR3
64 * to make sure to use no freed page tables.
65 */
66 load_cr3(next->pgd);
67 load_LDT_nolock(&next->context);
68 }
69 }
70#endif
71}
29 72
30#define activate_mm(prev, next) \ 73#define activate_mm(prev, next) \
31do { \ 74do { \
@@ -33,5 +76,17 @@ do { \
33 switch_mm((prev), (next), NULL); \ 76 switch_mm((prev), (next), NULL); \
34} while (0); 77} while (0);
35 78
79#ifdef CONFIG_X86_32
80#define deactivate_mm(tsk, mm) \
81do { \
82 loadsegment(gs, 0); \
83} while (0)
84#else
85#define deactivate_mm(tsk, mm) \
86do { \
87 load_gs_index(0); \
88 loadsegment(fs, 0); \
89} while (0)
90#endif
36 91
37#endif /* _ASM_X86_MMU_CONTEXT_H */ 92#endif /* _ASM_X86_MMU_CONTEXT_H */
diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h
deleted file mode 100644
index 08b53454f831..000000000000
--- a/arch/x86/include/asm/mmu_context_32.h
+++ /dev/null
@@ -1,55 +0,0 @@
1#ifndef _ASM_X86_MMU_CONTEXT_32_H
2#define _ASM_X86_MMU_CONTEXT_32_H
3
4static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
5{
6#ifdef CONFIG_SMP
7 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
8 percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
9#endif
10}
11
12static inline void switch_mm(struct mm_struct *prev,
13 struct mm_struct *next,
14 struct task_struct *tsk)
15{
16 int cpu = smp_processor_id();
17
18 if (likely(prev != next)) {
19 /* stop flush ipis for the previous mm */
20 cpu_clear(cpu, prev->cpu_vm_mask);
21#ifdef CONFIG_SMP
22 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
23 percpu_write(cpu_tlbstate.active_mm, next);
24#endif
25 cpu_set(cpu, next->cpu_vm_mask);
26
27 /* Re-load page tables */
28 load_cr3(next->pgd);
29
30 /*
31 * load the LDT, if the LDT is different:
32 */
33 if (unlikely(prev->context.ldt != next->context.ldt))
34 load_LDT_nolock(&next->context);
35 }
36#ifdef CONFIG_SMP
37 else {
38 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
39 BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
40
41 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
42 /* We were in lazy tlb mode and leave_mm disabled
43 * tlb flush IPI delivery. We must reload %cr3.
44 */
45 load_cr3(next->pgd);
46 load_LDT_nolock(&next->context);
47 }
48 }
49#endif
50}
51
52#define deactivate_mm(tsk, mm) \
53 asm("movl %0,%%gs": :"r" (0));
54
55#endif /* _ASM_X86_MMU_CONTEXT_32_H */
diff --git a/arch/x86/include/asm/mmu_context_64.h b/arch/x86/include/asm/mmu_context_64.h
deleted file mode 100644
index c4572505ab3e..000000000000
--- a/arch/x86/include/asm/mmu_context_64.h
+++ /dev/null
@@ -1,52 +0,0 @@
1#ifndef _ASM_X86_MMU_CONTEXT_64_H
2#define _ASM_X86_MMU_CONTEXT_64_H
3
4static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
5{
6#ifdef CONFIG_SMP
7 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
8 percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
9#endif
10}
11
12static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
13 struct task_struct *tsk)
14{
15 unsigned cpu = smp_processor_id();
16 if (likely(prev != next)) {
17 /* stop flush ipis for the previous mm */
18 cpu_clear(cpu, prev->cpu_vm_mask);
19#ifdef CONFIG_SMP
20 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
21 percpu_write(cpu_tlbstate.active_mm, next);
22#endif
23 cpu_set(cpu, next->cpu_vm_mask);
24 load_cr3(next->pgd);
25
26 if (unlikely(next->context.ldt != prev->context.ldt))
27 load_LDT_nolock(&next->context);
28 }
29#ifdef CONFIG_SMP
30 else {
31 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
32 BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
33
34 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
35 /* We were in lazy tlb mode and leave_mm disabled
36 * tlb flush IPI delivery. We must reload CR3
37 * to make sure to use no freed page tables.
38 */
39 load_cr3(next->pgd);
40 load_LDT_nolock(&next->context);
41 }
42 }
43#endif
44}
45
46#define deactivate_mm(tsk, mm) \
47do { \
48 load_gs_index(0); \
49 asm volatile("movl %0,%%fs"::"r"(0)); \
50} while (0)
51
52#endif /* _ASM_X86_MMU_CONTEXT_64_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index ce980db5e59d..0b64af4f13ac 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -75,7 +75,7 @@ do { \
75 case 8: \ 75 case 8: \
76 asm(op "q %1,"__percpu_arg(0) \ 76 asm(op "q %1,"__percpu_arg(0) \
77 : "+m" (var) \ 77 : "+m" (var) \
78 : "r" ((T__)val)); \ 78 : "re" ((T__)val)); \
79 break; \ 79 break; \
80 default: __bad_percpu_size(); \ 80 default: __bad_percpu_size(); \
81 } \ 81 } \
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 52eb748a68af..2fcc70bc85f3 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -89,13 +89,15 @@ do { \
89#ifdef CONFIG_CC_STACKPROTECTOR 89#ifdef CONFIG_CC_STACKPROTECTOR
90#define __switch_canary \ 90#define __switch_canary \
91 "movq %P[task_canary](%%rsi),%%r8\n\t" \ 91 "movq %P[task_canary](%%rsi),%%r8\n\t" \
92 "movq %%r8,%%gs:%P[gs_canary]\n\t" 92 "movq %%r8,"__percpu_arg([gs_canary])"\n\t"
93#define __switch_canary_param \ 93#define __switch_canary_oparam \
94 , [task_canary] "i" (offsetof(struct task_struct, stack_canary)) \ 94 , [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary))
95 , [gs_canary] "i" (offsetof(union irq_stack_union, stack_canary)) 95#define __switch_canary_iparam \
96 , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
96#else /* CC_STACKPROTECTOR */ 97#else /* CC_STACKPROTECTOR */
97#define __switch_canary 98#define __switch_canary
98#define __switch_canary_param 99#define __switch_canary_oparam
100#define __switch_canary_iparam
99#endif /* CC_STACKPROTECTOR */ 101#endif /* CC_STACKPROTECTOR */
100 102
101/* Save restore flags to clear handle leaking NT */ 103/* Save restore flags to clear handle leaking NT */
@@ -114,13 +116,14 @@ do { \
114 "jc ret_from_fork\n\t" \ 116 "jc ret_from_fork\n\t" \
115 RESTORE_CONTEXT \ 117 RESTORE_CONTEXT \
116 : "=a" (last) \ 118 : "=a" (last) \
119 __switch_canary_oparam \
117 : [next] "S" (next), [prev] "D" (prev), \ 120 : [next] "S" (next), [prev] "D" (prev), \
118 [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ 121 [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
119 [ti_flags] "i" (offsetof(struct thread_info, flags)), \ 122 [ti_flags] "i" (offsetof(struct thread_info, flags)), \
120 [tif_fork] "i" (TIF_FORK), \ 123 [tif_fork] "i" (TIF_FORK), \
121 [thread_info] "i" (offsetof(struct task_struct, stack)), \ 124 [thread_info] "i" (offsetof(struct task_struct, stack)), \
122 [current_task] "m" (per_cpu_var(current_task)) \ 125 [current_task] "m" (per_cpu_var(current_task)) \
123 __switch_canary_param \ 126 __switch_canary_iparam \
124 : "memory", "cc" __EXTRA_CLOBBER) 127 : "memory", "cc" __EXTRA_CLOBBER)
125#endif 128#endif
126 129
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
new file mode 100644
index 000000000000..dce5fe350134
--- /dev/null
+++ b/arch/x86/include/asm/uv/uv.h
@@ -0,0 +1,33 @@
1#ifndef _ASM_X86_UV_UV_H
2#define _ASM_X86_UV_UV_H
3
4enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
5
6#ifdef CONFIG_X86_64
7
8extern enum uv_system_type get_uv_system_type(void);
9extern int is_uv_system(void);
10extern void uv_cpu_init(void);
11extern void uv_system_init(void);
12extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
13extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
14 struct mm_struct *mm,
15 unsigned long va,
16 unsigned int cpu);
17
18#else /* X86_64 */
19
20static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
21static inline int is_uv_system(void) { return 0; }
22static inline void uv_cpu_init(void) { }
23static inline void uv_system_init(void) { }
24static inline int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
25{ return 1; }
26static inline const struct cpumask *
27uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm,
28 unsigned long va, unsigned int cpu)
29{ return cpumask; }
30
31#endif /* X86_64 */
32
33#endif /* _ASM_X86_UV_UV_H */
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 74e6393bfddb..9b0e61bf7a88 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -325,8 +325,6 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
325#define cpubit_isset(cpu, bau_local_cpumask) \ 325#define cpubit_isset(cpu, bau_local_cpumask) \
326 test_bit((cpu), (bau_local_cpumask).bits) 326 test_bit((cpu), (bau_local_cpumask).bits)
327 327
328extern int uv_flush_tlb_others(struct cpumask *,
329 struct mm_struct *, unsigned long);
330extern void uv_bau_message_intr1(void); 328extern void uv_bau_message_intr1(void);
331extern void uv_bau_timeout_intr1(void); 329extern void uv_bau_timeout_intr1(void);
332 330
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index eb074530c7d3..0626a88fbb46 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -58,7 +58,7 @@ obj-$(CONFIG_PCI) += early-quirks.o
58apm-y := apm_32.o 58apm-y := apm_32.o
59obj-$(CONFIG_APM) += apm.o 59obj-$(CONFIG_APM) += apm.o
60obj-$(CONFIG_X86_SMP) += smp.o 60obj-$(CONFIG_X86_SMP) += smp.o
61obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o 61obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb.o
62obj-$(CONFIG_X86_32_SMP) += smpcommon.o 62obj-$(CONFIG_X86_32_SMP) += smpcommon.o
63obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o 63obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o
64obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o 64obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 3887fcf6e519..99904f288d6a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -28,6 +28,7 @@
28#include <asm/apic.h> 28#include <asm/apic.h>
29#include <mach_apic.h> 29#include <mach_apic.h>
30#include <asm/genapic.h> 30#include <asm/genapic.h>
31#include <asm/uv/uv.h>
31#endif 32#endif
32 33
33#include <asm/pgtable.h> 34#include <asm/pgtable.h>
@@ -63,23 +64,23 @@ cpumask_t cpu_sibling_setup_map;
63 64
64static struct cpu_dev *this_cpu __cpuinitdata; 65static struct cpu_dev *this_cpu __cpuinitdata;
65 66
67DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
66#ifdef CONFIG_X86_64 68#ifdef CONFIG_X86_64
67/* We need valid kernel segments for data and code in long mode too 69 /*
68 * IRET will check the segment types kkeil 2000/10/28 70 * We need valid kernel segments for data and code in long mode too
69 * Also sysret mandates a special GDT layout 71 * IRET will check the segment types kkeil 2000/10/28
70 */ 72 * Also sysret mandates a special GDT layout
71/* The TLS descriptors are currently at a different place compared to i386. 73 *
72 Hopefully nobody expects them at a fixed place (Wine?) */ 74 * The TLS descriptors are currently at a different place compared to i386.
73DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { 75 * Hopefully nobody expects them at a fixed place (Wine?)
76 */
74 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, 77 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
75 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, 78 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
76 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, 79 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
77 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, 80 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
78 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, 81 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
79 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, 82 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
80} };
81#else 83#else
82DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
83 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, 84 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
84 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, 85 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
85 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, 86 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
@@ -111,9 +112,9 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
111 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, 112 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
112 113
113 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, 114 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
114 [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, 115 [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
115} };
116#endif 116#endif
117} };
117EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); 118EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
118 119
119#ifdef CONFIG_X86_32 120#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 46469029e9d3..a0b91aac72a1 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -672,7 +672,7 @@ common_interrupt:
672ENDPROC(common_interrupt) 672ENDPROC(common_interrupt)
673 CFI_ENDPROC 673 CFI_ENDPROC
674 674
675#define BUILD_INTERRUPT(name, nr) \ 675#define BUILD_INTERRUPT3(name, nr, fn) \
676ENTRY(name) \ 676ENTRY(name) \
677 RING0_INT_FRAME; \ 677 RING0_INT_FRAME; \
678 pushl $~(nr); \ 678 pushl $~(nr); \
@@ -680,11 +680,13 @@ ENTRY(name) \
680 SAVE_ALL; \ 680 SAVE_ALL; \
681 TRACE_IRQS_OFF \ 681 TRACE_IRQS_OFF \
682 movl %esp,%eax; \ 682 movl %esp,%eax; \
683 call smp_##name; \ 683 call fn; \
684 jmp ret_from_intr; \ 684 jmp ret_from_intr; \
685 CFI_ENDPROC; \ 685 CFI_ENDPROC; \
686ENDPROC(name) 686ENDPROC(name)
687 687
688#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name)
689
688/* The include is where all of the SMP etc. interrupts come from */ 690/* The include is where all of the SMP etc. interrupts come from */
689#include "entry_arch.h" 691#include "entry_arch.h"
690 692
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index b193e082f6ce..bfe36249145c 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -25,6 +25,7 @@
25#include <asm/ipi.h> 25#include <asm/ipi.h>
26#include <asm/genapic.h> 26#include <asm/genapic.h>
27#include <asm/pgtable.h> 27#include <asm/pgtable.h>
28#include <asm/uv/uv.h>
28#include <asm/uv/uv_mmrs.h> 29#include <asm/uv/uv_mmrs.h>
29#include <asm/uv/uv_hub.h> 30#include <asm/uv/uv_hub.h>
30#include <asm/uv/bios.h> 31#include <asm/uv/bios.h>
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index e835b4eea70b..24c0e5cd71e3 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -429,12 +429,14 @@ is386: movl $2,%ecx # set MP
429 ljmp $(__KERNEL_CS),$1f 429 ljmp $(__KERNEL_CS),$1f
4301: movl $(__KERNEL_DS),%eax # reload all the segment registers 4301: movl $(__KERNEL_DS),%eax # reload all the segment registers
431 movl %eax,%ss # after changing gdt. 431 movl %eax,%ss # after changing gdt.
432 movl %eax,%fs # gets reset once there's real percpu
433 432
434 movl $(__USER_DS),%eax # DS/ES contains default USER segment 433 movl $(__USER_DS),%eax # DS/ES contains default USER segment
435 movl %eax,%ds 434 movl %eax,%ds
436 movl %eax,%es 435 movl %eax,%es
437 436
437 movl $(__KERNEL_PERCPU), %eax
438 movl %eax,%fs # set this cpu's percpu
439
438 xorl %eax,%eax # Clear GS and LDT 440 xorl %eax,%eax # Clear GS and LDT
439 movl %eax,%gs 441 movl %eax,%gs
440 lldt %ax 442 lldt %ax
@@ -446,8 +448,6 @@ is386: movl $2,%ecx # set MP
446 movb $1, ready 448 movb $1, ready
447 cmpb $0,%cl # the first CPU calls start_kernel 449 cmpb $0,%cl # the first CPU calls start_kernel
448 je 1f 450 je 1f
449 movl $(__KERNEL_PERCPU), %eax
450 movl %eax,%fs # set this cpu's percpu
451 movl (stack_start), %esp 451 movl (stack_start), %esp
4521: 4521:
453#endif /* CONFIG_SMP */ 453#endif /* CONFIG_SMP */
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 1db05247b47f..0b254de84083 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -22,6 +22,9 @@
22DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat); 22DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
23EXPORT_PER_CPU_SYMBOL(irq_stat); 23EXPORT_PER_CPU_SYMBOL(irq_stat);
24 24
25DEFINE_PER_CPU(struct pt_regs *, irq_regs);
26EXPORT_PER_CPU_SYMBOL(irq_regs);
27
25/* 28/*
26 * Probabilistic stack overflow check: 29 * Probabilistic stack overflow check:
27 * 30 *
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 1507ad4e674d..bf629cadec1a 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -149,8 +149,15 @@ void __init native_init_IRQ(void)
149 */ 149 */
150 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); 150 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
151 151
152 /* IPI for invalidation */ 152 /* IPIs for invalidation */
153 alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); 153 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
154 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
155 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
156 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
157 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
158 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
159 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
160 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
154 161
155 /* IPI for generic function call */ 162 /* IPI for generic function call */
156 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 163 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 869b98840fd0..def770b57b5a 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -62,6 +62,7 @@
62#include <asm/vmi.h> 62#include <asm/vmi.h>
63#include <asm/genapic.h> 63#include <asm/genapic.h>
64#include <asm/setup.h> 64#include <asm/setup.h>
65#include <asm/uv/uv.h>
65#include <linux/mc146818rtc.h> 66#include <linux/mc146818rtc.h>
66 67
67#include <mach_apic.h> 68#include <mach_apic.h>
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb.c
index e64a32c48825..b3ca1b940654 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb.c
@@ -1,22 +1,15 @@
1#include <linux/init.h> 1#include <linux/init.h>
2 2
3#include <linux/mm.h> 3#include <linux/mm.h>
4#include <linux/delay.h>
5#include <linux/spinlock.h> 4#include <linux/spinlock.h>
6#include <linux/smp.h> 5#include <linux/smp.h>
7#include <linux/kernel_stat.h>
8#include <linux/mc146818rtc.h>
9#include <linux/interrupt.h> 6#include <linux/interrupt.h>
7#include <linux/module.h>
10 8
11#include <asm/mtrr.h>
12#include <asm/pgalloc.h>
13#include <asm/tlbflush.h> 9#include <asm/tlbflush.h>
14#include <asm/mmu_context.h> 10#include <asm/mmu_context.h>
15#include <asm/proto.h> 11#include <asm/apic.h>
16#include <asm/apicdef.h> 12#include <asm/uv/uv.h>
17#include <asm/idle.h>
18#include <asm/uv/uv_hub.h>
19#include <asm/uv/uv_bau.h>
20 13
21DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate) 14DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
22 = { &init_mm, 0, }; 15 = { &init_mm, 0, };
@@ -120,10 +113,20 @@ EXPORT_SYMBOL_GPL(leave_mm);
120 * Interrupts are disabled. 113 * Interrupts are disabled.
121 */ 114 */
122 115
123asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) 116/*
117 * FIXME: use of asmlinkage is not consistent. On x86_64 it's noop
118 * but still used for documentation purpose but the usage is slightly
119 * inconsistent. On x86_32, asmlinkage is regparm(0) but interrupt
120 * entry calls in with the first parameter in %eax. Maybe define
121 * intrlinkage?
122 */
123#ifdef CONFIG_X86_64
124asmlinkage
125#endif
126void smp_invalidate_interrupt(struct pt_regs *regs)
124{ 127{
125 int cpu; 128 unsigned int cpu;
126 int sender; 129 unsigned int sender;
127 union smp_flush_state *f; 130 union smp_flush_state *f;
128 131
129 cpu = smp_processor_id(); 132 cpu = smp_processor_id();
@@ -156,14 +159,16 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
156 } 159 }
157out: 160out:
158 ack_APIC_irq(); 161 ack_APIC_irq();
162 smp_mb__before_clear_bit();
159 cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask)); 163 cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask));
164 smp_mb__after_clear_bit();
160 inc_irq_stat(irq_tlb_count); 165 inc_irq_stat(irq_tlb_count);
161} 166}
162 167
163static void flush_tlb_others_ipi(const struct cpumask *cpumask, 168static void flush_tlb_others_ipi(const struct cpumask *cpumask,
164 struct mm_struct *mm, unsigned long va) 169 struct mm_struct *mm, unsigned long va)
165{ 170{
166 int sender; 171 unsigned int sender;
167 union smp_flush_state *f; 172 union smp_flush_state *f;
168 173
169 /* Caller has disabled preemption */ 174 /* Caller has disabled preemption */
@@ -206,16 +211,13 @@ void native_flush_tlb_others(const struct cpumask *cpumask,
206 struct mm_struct *mm, unsigned long va) 211 struct mm_struct *mm, unsigned long va)
207{ 212{
208 if (is_uv_system()) { 213 if (is_uv_system()) {
209 /* FIXME: could be an percpu_alloc'd thing */ 214 unsigned int cpu;
210 static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
211 struct cpumask *after_uv_flush = &get_cpu_var(flush_tlb_mask);
212
213 cpumask_andnot(after_uv_flush, cpumask,
214 cpumask_of(smp_processor_id()));
215 if (!uv_flush_tlb_others(after_uv_flush, mm, va))
216 flush_tlb_others_ipi(after_uv_flush, mm, va);
217 215
218 put_cpu_var(flush_tlb_uv_cpumask); 216 cpu = get_cpu();
217 cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu);
218 if (cpumask)
219 flush_tlb_others_ipi(cpumask, mm, va);
220 put_cpu();
219 return; 221 return;
220 } 222 }
221 flush_tlb_others_ipi(cpumask, mm, va); 223 flush_tlb_others_ipi(cpumask, mm, va);
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
deleted file mode 100644
index abf0808d6fc4..000000000000
--- a/arch/x86/kernel/tlb_32.c
+++ /dev/null
@@ -1,239 +0,0 @@
1#include <linux/spinlock.h>
2#include <linux/cpu.h>
3#include <linux/interrupt.h>
4
5#include <asm/tlbflush.h>
6
7DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
8 = { &init_mm, 0, };
9
10/* must come after the send_IPI functions above for inlining */
11#include <mach_ipi.h>
12
13/*
14 * Smarter SMP flushing macros.
15 * c/o Linus Torvalds.
16 *
17 * These mean you can really definitely utterly forget about
18 * writing to user space from interrupts. (Its not allowed anyway).
19 *
20 * Optimizations Manfred Spraul <manfred@colorfullife.com>
21 */
22
23static cpumask_var_t flush_cpumask;
24static struct mm_struct *flush_mm;
25static unsigned long flush_va;
26static DEFINE_SPINLOCK(tlbstate_lock);
27
28/*
29 * We cannot call mmdrop() because we are in interrupt context,
30 * instead update mm->cpu_vm_mask.
31 *
32 * We need to reload %cr3 since the page tables may be going
33 * away from under us..
34 */
35void leave_mm(int cpu)
36{
37 BUG_ON(percpu_read(cpu_tlbstate.state) == TLBSTATE_OK);
38 cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
39 load_cr3(swapper_pg_dir);
40}
41EXPORT_SYMBOL_GPL(leave_mm);
42
43/*
44 *
45 * The flush IPI assumes that a thread switch happens in this order:
46 * [cpu0: the cpu that switches]
47 * 1) switch_mm() either 1a) or 1b)
48 * 1a) thread switch to a different mm
49 * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
50 * Stop ipi delivery for the old mm. This is not synchronized with
51 * the other cpus, but smp_invalidate_interrupt ignore flush ipis
52 * for the wrong mm, and in the worst case we perform a superfluous
53 * tlb flush.
54 * 1a2) set cpu_tlbstate to TLBSTATE_OK
55 * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
56 * was in lazy tlb mode.
57 * 1a3) update cpu_tlbstate[].active_mm
58 * Now cpu0 accepts tlb flushes for the new mm.
59 * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
60 * Now the other cpus will send tlb flush ipis.
61 * 1a4) change cr3.
62 * 1b) thread switch without mm change
63 * cpu_tlbstate[].active_mm is correct, cpu0 already handles
64 * flush ipis.
65 * 1b1) set cpu_tlbstate to TLBSTATE_OK
66 * 1b2) test_and_set the cpu bit in cpu_vm_mask.
67 * Atomically set the bit [other cpus will start sending flush ipis],
68 * and test the bit.
69 * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
70 * 2) switch %%esp, ie current
71 *
72 * The interrupt must handle 2 special cases:
73 * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
74 * - the cpu performs speculative tlb reads, i.e. even if the cpu only
75 * runs in kernel space, the cpu could load tlb entries for user space
76 * pages.
77 *
78 * The good news is that cpu_tlbstate is local to each cpu, no
79 * write/read ordering problems.
80 */
81
82/*
83 * TLB flush IPI:
84 *
85 * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
86 * 2) Leave the mm if we are in the lazy tlb mode.
87 */
88
89void smp_invalidate_interrupt(struct pt_regs *regs)
90{
91 unsigned long cpu;
92
93 cpu = get_cpu();
94
95 if (!cpumask_test_cpu(cpu, flush_cpumask))
96 goto out;
97 /*
98 * This was a BUG() but until someone can quote me the
99 * line from the intel manual that guarantees an IPI to
100 * multiple CPUs is retried _only_ on the erroring CPUs
101 * its staying as a return
102 *
103 * BUG();
104 */
105
106 if (flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
107 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
108 if (flush_va == TLB_FLUSH_ALL)
109 local_flush_tlb();
110 else
111 __flush_tlb_one(flush_va);
112 } else
113 leave_mm(cpu);
114 }
115 ack_APIC_irq();
116 smp_mb__before_clear_bit();
117 cpumask_clear_cpu(cpu, flush_cpumask);
118 smp_mb__after_clear_bit();
119out:
120 put_cpu_no_resched();
121 inc_irq_stat(irq_tlb_count);
122}
123
124void native_flush_tlb_others(const struct cpumask *cpumask,
125 struct mm_struct *mm, unsigned long va)
126{
127 /*
128 * - mask must exist :)
129 */
130 BUG_ON(cpumask_empty(cpumask));
131 BUG_ON(!mm);
132
133 /*
134 * i'm not happy about this global shared spinlock in the
135 * MM hot path, but we'll see how contended it is.
136 * AK: x86-64 has a faster method that could be ported.
137 */
138 spin_lock(&tlbstate_lock);
139
140 cpumask_andnot(flush_cpumask, cpumask, cpumask_of(smp_processor_id()));
141#ifdef CONFIG_HOTPLUG_CPU
142 /* If a CPU which we ran on has gone down, OK. */
143 cpumask_and(flush_cpumask, flush_cpumask, cpu_online_mask);
144 if (unlikely(cpumask_empty(flush_cpumask))) {
145 spin_unlock(&tlbstate_lock);
146 return;
147 }
148#endif
149 flush_mm = mm;
150 flush_va = va;
151
152 /*
153 * Make the above memory operations globally visible before
154 * sending the IPI.
155 */
156 smp_mb();
157 /*
158 * We have to send the IPI only to
159 * CPUs affected.
160 */
161 send_IPI_mask(flush_cpumask, INVALIDATE_TLB_VECTOR);
162
163 while (!cpumask_empty(flush_cpumask))
164 /* nothing. lockup detection does not belong here */
165 cpu_relax();
166
167 flush_mm = NULL;
168 flush_va = 0;
169 spin_unlock(&tlbstate_lock);
170}
171
172void flush_tlb_current_task(void)
173{
174 struct mm_struct *mm = current->mm;
175
176 preempt_disable();
177
178 local_flush_tlb();
179 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
180 flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
181 preempt_enable();
182}
183
184void flush_tlb_mm(struct mm_struct *mm)
185{
186
187 preempt_disable();
188
189 if (current->active_mm == mm) {
190 if (current->mm)
191 local_flush_tlb();
192 else
193 leave_mm(smp_processor_id());
194 }
195 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
196 flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
197
198 preempt_enable();
199}
200
201void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
202{
203 struct mm_struct *mm = vma->vm_mm;
204
205 preempt_disable();
206
207 if (current->active_mm == mm) {
208 if (current->mm)
209 __flush_tlb_one(va);
210 else
211 leave_mm(smp_processor_id());
212 }
213
214 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
215 flush_tlb_others(&mm->cpu_vm_mask, mm, va);
216 preempt_enable();
217}
218EXPORT_SYMBOL(flush_tlb_page);
219
220static void do_flush_tlb_all(void *info)
221{
222 unsigned long cpu = smp_processor_id();
223
224 __flush_tlb_all();
225 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
226 leave_mm(cpu);
227}
228
229void flush_tlb_all(void)
230{
231 on_each_cpu(do_flush_tlb_all, NULL, 1);
232}
233
234static int init_flush_cpumask(void)
235{
236 alloc_cpumask_var(&flush_cpumask, GFP_KERNEL);
237 return 0;
238}
239early_initcall(init_flush_cpumask);
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index 690dcf1a27d4..aae15dd72604 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -11,6 +11,7 @@
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12 12
13#include <asm/mmu_context.h> 13#include <asm/mmu_context.h>
14#include <asm/uv/uv.h>
14#include <asm/uv/uv_mmrs.h> 15#include <asm/uv/uv_mmrs.h>
15#include <asm/uv/uv_hub.h> 16#include <asm/uv/uv_hub.h>
16#include <asm/uv/uv_bau.h> 17#include <asm/uv/uv_bau.h>
@@ -209,14 +210,15 @@ static int uv_wait_completion(struct bau_desc *bau_desc,
209 * 210 *
210 * Send a broadcast and wait for a broadcast message to complete. 211 * Send a broadcast and wait for a broadcast message to complete.
211 * 212 *
212 * The cpumaskp mask contains the cpus the broadcast was sent to. 213 * The flush_mask contains the cpus the broadcast was sent to.
213 * 214 *
214 * Returns 1 if all remote flushing was done. The mask is zeroed. 215 * Returns NULL if all remote flushing was done. The mask is zeroed.
215 * Returns 0 if some remote flushing remains to be done. The mask will have 216 * Returns @flush_mask if some remote flushing remains to be done. The
216 * some bits still set. 217 * mask will have some bits still set.
217 */ 218 */
218int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, 219const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
219 struct cpumask *cpumaskp) 220 struct bau_desc *bau_desc,
221 struct cpumask *flush_mask)
220{ 222{
221 int completion_status = 0; 223 int completion_status = 0;
222 int right_shift; 224 int right_shift;
@@ -263,59 +265,69 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
263 * Success, so clear the remote cpu's from the mask so we don't 265 * Success, so clear the remote cpu's from the mask so we don't
264 * use the IPI method of shootdown on them. 266 * use the IPI method of shootdown on them.
265 */ 267 */
266 for_each_cpu(bit, cpumaskp) { 268 for_each_cpu(bit, flush_mask) {
267 blade = uv_cpu_to_blade_id(bit); 269 blade = uv_cpu_to_blade_id(bit);
268 if (blade == this_blade) 270 if (blade == this_blade)
269 continue; 271 continue;
270 cpumask_clear_cpu(bit, cpumaskp); 272 cpumask_clear_cpu(bit, flush_mask);
271 } 273 }
272 if (!cpumask_empty(cpumaskp)) 274 if (!cpumask_empty(flush_mask))
273 return 0; 275 return flush_mask;
274 return 1; 276 return NULL;
275} 277}
276 278
277/** 279/**
278 * uv_flush_tlb_others - globally purge translation cache of a virtual 280 * uv_flush_tlb_others - globally purge translation cache of a virtual
279 * address or all TLB's 281 * address or all TLB's
280 * @cpumaskp: mask of all cpu's in which the address is to be removed 282 * @cpumask: mask of all cpu's in which the address is to be removed
281 * @mm: mm_struct containing virtual address range 283 * @mm: mm_struct containing virtual address range
282 * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) 284 * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
285 * @cpu: the current cpu
283 * 286 *
284 * This is the entry point for initiating any UV global TLB shootdown. 287 * This is the entry point for initiating any UV global TLB shootdown.
285 * 288 *
286 * Purges the translation caches of all specified processors of the given 289 * Purges the translation caches of all specified processors of the given
287 * virtual address, or purges all TLB's on specified processors. 290 * virtual address, or purges all TLB's on specified processors.
288 * 291 *
289 * The caller has derived the cpumaskp from the mm_struct and has subtracted 292 * The caller has derived the cpumask from the mm_struct. This function
290 * the local cpu from the mask. This function is called only if there 293 * is called only if there are bits set in the mask. (e.g. flush_tlb_page())
291 * are bits set in the mask. (e.g. flush_tlb_page())
292 * 294 *
293 * The cpumaskp is converted into a nodemask of the nodes containing 295 * The cpumask is converted into a nodemask of the nodes containing
294 * the cpus. 296 * the cpus.
295 * 297 *
296 * Returns 1 if all remote flushing was done. 298 * Note that this function should be called with preemption disabled.
297 * Returns 0 if some remote flushing remains to be done. 299 *
300 * Returns NULL if all remote flushing was done.
301 * Returns pointer to cpumask if some remote flushing remains to be
302 * done. The returned pointer is valid till preemption is re-enabled.
298 */ 303 */
299int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm, 304const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
300 unsigned long va) 305 struct mm_struct *mm,
306 unsigned long va, unsigned int cpu)
301{ 307{
308 static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
309 struct cpumask *flush_mask = &__get_cpu_var(flush_tlb_mask);
302 int i; 310 int i;
303 int bit; 311 int bit;
304 int blade; 312 int blade;
305 int cpu; 313 int uv_cpu;
306 int this_blade; 314 int this_blade;
307 int locals = 0; 315 int locals = 0;
308 struct bau_desc *bau_desc; 316 struct bau_desc *bau_desc;
309 317
310 cpu = uv_blade_processor_id(); 318 WARN_ON(!in_atomic());
319
320 cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
321
322 uv_cpu = uv_blade_processor_id();
311 this_blade = uv_numa_blade_id(); 323 this_blade = uv_numa_blade_id();
312 bau_desc = __get_cpu_var(bau_control).descriptor_base; 324 bau_desc = __get_cpu_var(bau_control).descriptor_base;
313 bau_desc += UV_ITEMS_PER_DESCRIPTOR * cpu; 325 bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu;
314 326
315 bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); 327 bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
316 328
317 i = 0; 329 i = 0;
318 for_each_cpu(bit, cpumaskp) { 330 for_each_cpu(bit, flush_mask) {
319 blade = uv_cpu_to_blade_id(bit); 331 blade = uv_cpu_to_blade_id(bit);
320 BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1)); 332 BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
321 if (blade == this_blade) { 333 if (blade == this_blade) {
@@ -330,17 +342,17 @@ int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm,
330 * no off_node flushing; return status for local node 342 * no off_node flushing; return status for local node
331 */ 343 */
332 if (locals) 344 if (locals)
333 return 0; 345 return flush_mask;
334 else 346 else
335 return 1; 347 return NULL;
336 } 348 }
337 __get_cpu_var(ptcstats).requestor++; 349 __get_cpu_var(ptcstats).requestor++;
338 __get_cpu_var(ptcstats).ntargeted += i; 350 __get_cpu_var(ptcstats).ntargeted += i;
339 351
340 bau_desc->payload.address = va; 352 bau_desc->payload.address = va;
341 bau_desc->payload.sending_cpu = smp_processor_id(); 353 bau_desc->payload.sending_cpu = cpu;
342 354
343 return uv_flush_send_and_wait(cpu, this_blade, bau_desc, cpumaskp); 355 return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask);
344} 356}
345 357
346/* 358/*