aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-01-18 12:15:49 -0500
committerIngo Molnar <mingo@elte.hu>2009-01-18 12:15:49 -0500
commitaf37501c792107c2bde1524bdae38d9a247b841a (patch)
treeb50ee90d29e72956b8b7d8d19677fe5996755d49 /arch/x86
parentd859e29fe34cb833071b20aef860ee94fbad9bb2 (diff)
parent99937d6455cea95405ac681c86a857d0fcd530bd (diff)
Merge branch 'core/percpu' into perfcounters/core
Conflicts: arch/x86/include/asm/pda.h We merge tip/core/percpu into tip/perfcounters/core because of a semantic and contextual conflict: the former eliminates the PDA, while the latter extends it with apic_perf_irqs field. Resolve the conflict by moving the new field to the irq_cpustat structure on 64-bit too. Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/ia32/ia32entry.S8
-rw-r--r--arch/x86/include/asm/apicnum.h12
-rw-r--r--arch/x86/include/asm/bitops.h14
-rw-r--r--arch/x86/include/asm/cpu.h21
-rw-r--r--arch/x86/include/asm/cpumask.h28
-rw-r--r--arch/x86/include/asm/current.h24
-rw-r--r--arch/x86/include/asm/hardirq_32.h3
-rw-r--r--arch/x86/include/asm/hardirq_64.h25
-rw-r--r--arch/x86/include/asm/io_apic.h26
-rw-r--r--arch/x86/include/asm/irq_regs_32.h4
-rw-r--r--arch/x86/include/asm/irq_vectors.h13
-rw-r--r--arch/x86/include/asm/mach-default/mach_wakecpu.h6
-rw-r--r--arch/x86/include/asm/mmu_context_32.h12
-rw-r--r--arch/x86/include/asm/mmu_context_64.h16
-rw-r--r--arch/x86/include/asm/mpspec_def.h23
-rw-r--r--arch/x86/include/asm/mtrr.h10
-rw-r--r--arch/x86/include/asm/page_64.h4
-rw-r--r--arch/x86/include/asm/paravirt.h8
-rw-r--r--arch/x86/include/asm/pda.h125
-rw-r--r--arch/x86/include/asm/percpu.h159
-rw-r--r--arch/x86/include/asm/pgtable.h19
-rw-r--r--arch/x86/include/asm/processor.h3
-rw-r--r--arch/x86/include/asm/setup.h1
-rw-r--r--arch/x86/include/asm/smp.h49
-rw-r--r--arch/x86/include/asm/system.h4
-rw-r--r--arch/x86/include/asm/thread_info.h20
-rw-r--r--arch/x86/include/asm/tlbflush.h17
-rw-r--r--arch/x86/include/asm/topology.h8
-rw-r--r--arch/x86/include/asm/trampoline.h1
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h3
-rw-r--r--arch/x86/kernel/acpi/boot.c96
-rw-r--r--arch/x86/kernel/acpi/sleep.c1
-rw-r--r--arch/x86/kernel/apic.c25
-rw-r--r--arch/x86/kernel/asm-offsets_64.c8
-rw-r--r--arch/x86/kernel/cpu/common.c80
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c27
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c63
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c21
-rw-r--r--arch/x86/kernel/crash.c2
-rw-r--r--arch/x86/kernel/dumpstack_64.c35
-rw-r--r--arch/x86/kernel/entry_32.S2
-rw-r--r--arch/x86/kernel/entry_64.S41
-rw-r--r--arch/x86/kernel/head64.c23
-rw-r--r--arch/x86/kernel/head_64.S47
-rw-r--r--arch/x86/kernel/io_apic.c163
-rw-r--r--arch/x86/kernel/irq.c6
-rw-r--r--arch/x86/kernel/irq_32.c2
-rw-r--r--arch/x86/kernel/irq_64.c5
-rw-r--r--arch/x86/kernel/microcode_intel.c10
-rw-r--r--arch/x86/kernel/module_32.c6
-rw-r--r--arch/x86/kernel/module_64.c32
-rw-r--r--arch/x86/kernel/mpparse.c143
-rw-r--r--arch/x86/kernel/msr.c2
-rw-r--r--arch/x86/kernel/nmi.c10
-rw-r--r--arch/x86/kernel/process_32.c5
-rw-r--r--arch/x86/kernel/process_64.c22
-rw-r--r--arch/x86/kernel/reboot.c1
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/setup_percpu.c208
-rw-r--r--arch/x86/kernel/smpboot.c69
-rw-r--r--arch/x86/kernel/smpcommon.c10
-rw-r--r--arch/x86/kernel/tlb_32.c85
-rw-r--r--arch/x86/kernel/tlb_64.c76
-rw-r--r--arch/x86/kernel/tlb_uv.c16
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S9
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S22
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c2
-rw-r--r--arch/x86/mach-voyager/setup.c1
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c6
-rw-r--r--arch/x86/mm/fault.c2
-rw-r--r--arch/x86/mm/init_32.c1
-rw-r--r--arch/x86/mm/pageattr.c10
-rw-r--r--arch/x86/mm/pat.c127
-rw-r--r--arch/x86/pci/i386.c12
-rw-r--r--arch/x86/xen/enlighten.c47
-rw-r--r--arch/x86/xen/irq.c8
-rw-r--r--arch/x86/xen/mmu.c8
-rw-r--r--arch/x86/xen/multicalls.h2
-rw-r--r--arch/x86/xen/smp.c33
-rw-r--r--arch/x86/xen/xen-asm_64.S31
80 files changed, 1128 insertions, 1203 deletions
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 3c14ed07dc4e..01e7c4c5c7fe 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -112,8 +112,8 @@ ENTRY(ia32_sysenter_target)
112 CFI_DEF_CFA rsp,0 112 CFI_DEF_CFA rsp,0
113 CFI_REGISTER rsp,rbp 113 CFI_REGISTER rsp,rbp
114 SWAPGS_UNSAFE_STACK 114 SWAPGS_UNSAFE_STACK
115 movq %gs:pda_kernelstack, %rsp 115 movq PER_CPU_VAR(kernel_stack), %rsp
116 addq $(PDA_STACKOFFSET),%rsp 116 addq $(KERNEL_STACK_OFFSET),%rsp
117 /* 117 /*
118 * No need to follow this irqs on/off section: the syscall 118 * No need to follow this irqs on/off section: the syscall
119 * disabled irqs, here we enable it straight after entry: 119 * disabled irqs, here we enable it straight after entry:
@@ -273,13 +273,13 @@ ENDPROC(ia32_sysenter_target)
273ENTRY(ia32_cstar_target) 273ENTRY(ia32_cstar_target)
274 CFI_STARTPROC32 simple 274 CFI_STARTPROC32 simple
275 CFI_SIGNAL_FRAME 275 CFI_SIGNAL_FRAME
276 CFI_DEF_CFA rsp,PDA_STACKOFFSET 276 CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
277 CFI_REGISTER rip,rcx 277 CFI_REGISTER rip,rcx
278 /*CFI_REGISTER rflags,r11*/ 278 /*CFI_REGISTER rflags,r11*/
279 SWAPGS_UNSAFE_STACK 279 SWAPGS_UNSAFE_STACK
280 movl %esp,%r8d 280 movl %esp,%r8d
281 CFI_REGISTER rsp,r8 281 CFI_REGISTER rsp,r8
282 movq %gs:pda_kernelstack,%rsp 282 movq PER_CPU_VAR(kernel_stack),%rsp
283 /* 283 /*
284 * No need to follow this irqs on/off section: the syscall 284 * No need to follow this irqs on/off section: the syscall
285 * disabled irqs and here we enable it straight after entry: 285 * disabled irqs and here we enable it straight after entry:
diff --git a/arch/x86/include/asm/apicnum.h b/arch/x86/include/asm/apicnum.h
new file mode 100644
index 000000000000..82f613c607ce
--- /dev/null
+++ b/arch/x86/include/asm/apicnum.h
@@ -0,0 +1,12 @@
1#ifndef _ASM_X86_APICNUM_H
2#define _ASM_X86_APICNUM_H
3
4/* define MAX_IO_APICS */
5#ifdef CONFIG_X86_32
6# define MAX_IO_APICS 64
7#else
8# define MAX_IO_APICS 128
9# define MAX_LOCAL_APIC 32768
10#endif
11
12#endif /* _ASM_X86_APICNUM_H */
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index e02a359d2aa5..02b47a603fc8 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -3,6 +3,9 @@
3 3
4/* 4/*
5 * Copyright 1992, Linus Torvalds. 5 * Copyright 1992, Linus Torvalds.
6 *
7 * Note: inlines with more than a single statement should be marked
8 * __always_inline to avoid problems with older gcc's inlining heuristics.
6 */ 9 */
7 10
8#ifndef _LINUX_BITOPS_H 11#ifndef _LINUX_BITOPS_H
@@ -53,7 +56,8 @@
53 * Note that @nr may be almost arbitrarily large; this function is not 56 * Note that @nr may be almost arbitrarily large; this function is not
54 * restricted to acting on a single-word quantity. 57 * restricted to acting on a single-word quantity.
55 */ 58 */
56static inline void set_bit(unsigned int nr, volatile unsigned long *addr) 59static __always_inline void
60set_bit(unsigned int nr, volatile unsigned long *addr)
57{ 61{
58 if (IS_IMMEDIATE(nr)) { 62 if (IS_IMMEDIATE(nr)) {
59 asm volatile(LOCK_PREFIX "orb %1,%0" 63 asm volatile(LOCK_PREFIX "orb %1,%0"
@@ -90,7 +94,8 @@ static inline void __set_bit(int nr, volatile unsigned long *addr)
90 * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() 94 * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
91 * in order to ensure changes are visible on other processors. 95 * in order to ensure changes are visible on other processors.
92 */ 96 */
93static inline void clear_bit(int nr, volatile unsigned long *addr) 97static __always_inline void
98clear_bit(int nr, volatile unsigned long *addr)
94{ 99{
95 if (IS_IMMEDIATE(nr)) { 100 if (IS_IMMEDIATE(nr)) {
96 asm volatile(LOCK_PREFIX "andb %1,%0" 101 asm volatile(LOCK_PREFIX "andb %1,%0"
@@ -204,7 +209,8 @@ static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
204 * 209 *
205 * This is the same as test_and_set_bit on x86. 210 * This is the same as test_and_set_bit on x86.
206 */ 211 */
207static inline int test_and_set_bit_lock(int nr, volatile unsigned long *addr) 212static __always_inline int
213test_and_set_bit_lock(int nr, volatile unsigned long *addr)
208{ 214{
209 return test_and_set_bit(nr, addr); 215 return test_and_set_bit(nr, addr);
210} 216}
@@ -300,7 +306,7 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
300 return oldbit; 306 return oldbit;
301} 307}
302 308
303static inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr) 309static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr)
304{ 310{
305 return ((1UL << (nr % BITS_PER_LONG)) & 311 return ((1UL << (nr % BITS_PER_LONG)) &
306 (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; 312 (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0;
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index bae482df6039..f03b23e32864 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -7,6 +7,20 @@
7#include <linux/nodemask.h> 7#include <linux/nodemask.h>
8#include <linux/percpu.h> 8#include <linux/percpu.h>
9 9
10#ifdef CONFIG_SMP
11
12extern void prefill_possible_map(void);
13
14#else /* CONFIG_SMP */
15
16static inline void prefill_possible_map(void) {}
17
18#define cpu_physical_id(cpu) boot_cpu_physical_apicid
19#define safe_smp_processor_id() 0
20#define stack_smp_processor_id() 0
21
22#endif /* CONFIG_SMP */
23
10struct x86_cpu { 24struct x86_cpu {
11 struct cpu cpu; 25 struct cpu cpu;
12}; 26};
@@ -17,4 +31,11 @@ extern void arch_unregister_cpu(int);
17#endif 31#endif
18 32
19DECLARE_PER_CPU(int, cpu_state); 33DECLARE_PER_CPU(int, cpu_state);
34
35#ifdef CONFIG_X86_HAS_BOOT_CPU_ID
36extern unsigned char boot_cpu_id;
37#else
38#define boot_cpu_id 0
39#endif
40
20#endif /* _ASM_X86_CPU_H */ 41#endif /* _ASM_X86_CPU_H */
diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h
new file mode 100644
index 000000000000..26c6dad90479
--- /dev/null
+++ b/arch/x86/include/asm/cpumask.h
@@ -0,0 +1,28 @@
1#ifndef _ASM_X86_CPUMASK_H
2#define _ASM_X86_CPUMASK_H
3#ifndef __ASSEMBLY__
4#include <linux/cpumask.h>
5
6#ifdef CONFIG_X86_64
7
8extern cpumask_var_t cpu_callin_mask;
9extern cpumask_var_t cpu_callout_mask;
10extern cpumask_var_t cpu_initialized_mask;
11extern cpumask_var_t cpu_sibling_setup_mask;
12
13#else /* CONFIG_X86_32 */
14
15extern cpumask_t cpu_callin_map;
16extern cpumask_t cpu_callout_map;
17extern cpumask_t cpu_initialized;
18extern cpumask_t cpu_sibling_setup_map;
19
20#define cpu_callin_mask ((struct cpumask *)&cpu_callin_map)
21#define cpu_callout_mask ((struct cpumask *)&cpu_callout_map)
22#define cpu_initialized_mask ((struct cpumask *)&cpu_initialized)
23#define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map)
24
25#endif /* CONFIG_X86_32 */
26
27#endif /* __ASSEMBLY__ */
28#endif /* _ASM_X86_CPUMASK_H */
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index 0930b4f8d672..c68c361697e1 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -1,39 +1,21 @@
1#ifndef _ASM_X86_CURRENT_H 1#ifndef _ASM_X86_CURRENT_H
2#define _ASM_X86_CURRENT_H 2#define _ASM_X86_CURRENT_H
3 3
4#ifdef CONFIG_X86_32
5#include <linux/compiler.h> 4#include <linux/compiler.h>
6#include <asm/percpu.h> 5#include <asm/percpu.h>
7 6
7#ifndef __ASSEMBLY__
8struct task_struct; 8struct task_struct;
9 9
10DECLARE_PER_CPU(struct task_struct *, current_task); 10DECLARE_PER_CPU(struct task_struct *, current_task);
11static __always_inline struct task_struct *get_current(void)
12{
13 return x86_read_percpu(current_task);
14}
15
16#else /* X86_32 */
17
18#ifndef __ASSEMBLY__
19#include <asm/pda.h>
20
21struct task_struct;
22 11
23static __always_inline struct task_struct *get_current(void) 12static __always_inline struct task_struct *get_current(void)
24{ 13{
25 return read_pda(pcurrent); 14 return percpu_read(current_task);
26} 15}
27 16
28#else /* __ASSEMBLY__ */ 17#define current get_current()
29
30#include <asm/asm-offsets.h>
31#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg
32 18
33#endif /* __ASSEMBLY__ */ 19#endif /* __ASSEMBLY__ */
34 20
35#endif /* X86_32 */
36
37#define current get_current()
38
39#endif /* _ASM_X86_CURRENT_H */ 21#endif /* _ASM_X86_CURRENT_H */
diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h
index 7a07897a7888..7838276bfe51 100644
--- a/arch/x86/include/asm/hardirq_32.h
+++ b/arch/x86/include/asm/hardirq_32.h
@@ -20,6 +20,9 @@ typedef struct {
20 20
21DECLARE_PER_CPU(irq_cpustat_t, irq_stat); 21DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
22 22
23/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
24#define MAX_HARDIRQS_PER_CPU NR_VECTORS
25
23#define __ARCH_IRQ_STAT 26#define __ARCH_IRQ_STAT
24#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member) 27#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member)
25 28
diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h
index b5a6b5d56704..42930b279215 100644
--- a/arch/x86/include/asm/hardirq_64.h
+++ b/arch/x86/include/asm/hardirq_64.h
@@ -3,22 +3,37 @@
3 3
4#include <linux/threads.h> 4#include <linux/threads.h>
5#include <linux/irq.h> 5#include <linux/irq.h>
6#include <asm/pda.h>
7#include <asm/apic.h> 6#include <asm/apic.h>
8 7
8typedef struct {
9 unsigned int __softirq_pending;
10 unsigned int __nmi_count; /* arch dependent */
11 unsigned int apic_timer_irqs; /* arch dependent */
12 unsigned int apic_perf_irqs; /* arch dependent */
13 unsigned int irq0_irqs;
14 unsigned int irq_resched_count;
15 unsigned int irq_call_count;
16 unsigned int irq_tlb_count;
17 unsigned int irq_thermal_count;
18 unsigned int irq_spurious_count;
19 unsigned int irq_threshold_count;
20} ____cacheline_aligned irq_cpustat_t;
21
22DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
23
9/* We can have at most NR_VECTORS irqs routed to a cpu at a time */ 24/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
10#define MAX_HARDIRQS_PER_CPU NR_VECTORS 25#define MAX_HARDIRQS_PER_CPU NR_VECTORS
11 26
12#define __ARCH_IRQ_STAT 1 27#define __ARCH_IRQ_STAT 1
13 28
14#define inc_irq_stat(member) add_pda(member, 1) 29#define inc_irq_stat(member) percpu_add(irq_stat.member, 1)
15 30
16#define local_softirq_pending() read_pda(__softirq_pending) 31#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending)
17 32
18#define __ARCH_SET_SOFTIRQ_PENDING 1 33#define __ARCH_SET_SOFTIRQ_PENDING 1
19 34
20#define set_softirq_pending(x) write_pda(__softirq_pending, (x)) 35#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x))
21#define or_softirq_pending(x) or_pda(__softirq_pending, (x)) 36#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x))
22 37
23extern void ack_bad_irq(unsigned int irq); 38extern void ack_bad_irq(unsigned int irq);
24 39
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 7a1f44ac1f17..08ec793aa043 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -114,38 +114,16 @@ struct IR_IO_APIC_route_entry {
114extern int nr_ioapics; 114extern int nr_ioapics;
115extern int nr_ioapic_registers[MAX_IO_APICS]; 115extern int nr_ioapic_registers[MAX_IO_APICS];
116 116
117/*
118 * MP-BIOS irq configuration table structures:
119 */
120
121#define MP_MAX_IOAPIC_PIN 127 117#define MP_MAX_IOAPIC_PIN 127
122 118
123struct mp_config_ioapic {
124 unsigned long mp_apicaddr;
125 unsigned int mp_apicid;
126 unsigned char mp_type;
127 unsigned char mp_apicver;
128 unsigned char mp_flags;
129};
130
131struct mp_config_intsrc {
132 unsigned int mp_dstapic;
133 unsigned char mp_type;
134 unsigned char mp_irqtype;
135 unsigned short mp_irqflag;
136 unsigned char mp_srcbus;
137 unsigned char mp_srcbusirq;
138 unsigned char mp_dstirq;
139};
140
141/* I/O APIC entries */ 119/* I/O APIC entries */
142extern struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; 120extern struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
143 121
144/* # of MP IRQ source entries */ 122/* # of MP IRQ source entries */
145extern int mp_irq_entries; 123extern int mp_irq_entries;
146 124
147/* MP IRQ source entries */ 125/* MP IRQ source entries */
148extern struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; 126extern struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
149 127
150/* non-0 if default (table-less) MP configuration */ 128/* non-0 if default (table-less) MP configuration */
151extern int mpc_default_type; 129extern int mpc_default_type;
diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h
index 86afd7473457..d7ed33ee94e9 100644
--- a/arch/x86/include/asm/irq_regs_32.h
+++ b/arch/x86/include/asm/irq_regs_32.h
@@ -15,7 +15,7 @@ DECLARE_PER_CPU(struct pt_regs *, irq_regs);
15 15
16static inline struct pt_regs *get_irq_regs(void) 16static inline struct pt_regs *get_irq_regs(void)
17{ 17{
18 return x86_read_percpu(irq_regs); 18 return percpu_read(irq_regs);
19} 19}
20 20
21static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs) 21static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
@@ -23,7 +23,7 @@ static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
23 struct pt_regs *old_regs; 23 struct pt_regs *old_regs;
24 24
25 old_regs = get_irq_regs(); 25 old_regs = get_irq_regs();
26 x86_write_percpu(irq_regs, new_regs); 26 percpu_write(irq_regs, new_regs);
27 27
28 return old_regs; 28 return old_regs;
29} 29}
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index 21a0b92027f5..1554d0236e03 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -110,6 +110,8 @@
110 110
111#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) 111#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER)
112 112
113#include <asm/apicnum.h> /* need MAX_IO_APICS */
114
113#ifndef CONFIG_SPARSE_IRQ 115#ifndef CONFIG_SPARSE_IRQ
114# if NR_CPUS < MAX_IO_APICS 116# if NR_CPUS < MAX_IO_APICS
115# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) 117# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS))
@@ -117,11 +119,12 @@
117# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) 119# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
118# endif 120# endif
119#else 121#else
120# if (8 * NR_CPUS) > (32 * MAX_IO_APICS) 122
121# define NR_IRQS (NR_VECTORS + (8 * NR_CPUS)) 123# define NR_IRQS \
122# else 124 ((8 * NR_CPUS) > (32 * MAX_IO_APICS) ? \
123# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) 125 (NR_VECTORS + (8 * NR_CPUS)) : \
124# endif 126 (NR_VECTORS + (32 * MAX_IO_APICS))) \
127
125#endif 128#endif
126 129
127#elif defined(CONFIG_X86_VOYAGER) 130#elif defined(CONFIG_X86_VOYAGER)
diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h
index ceb013660146..89897a6a65b9 100644
--- a/arch/x86/include/asm/mach-default/mach_wakecpu.h
+++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h
@@ -24,7 +24,13 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
24{ 24{
25} 25}
26 26
27#ifdef CONFIG_SMP
27extern void __inquire_remote_apic(int apicid); 28extern void __inquire_remote_apic(int apicid);
29#else /* CONFIG_SMP */
30static inline void __inquire_remote_apic(int apicid)
31{
32}
33#endif /* CONFIG_SMP */
28 34
29static inline void inquire_remote_apic(int apicid) 35static inline void inquire_remote_apic(int apicid)
30{ 36{
diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h
index 7e98ce1d2c0e..08b53454f831 100644
--- a/arch/x86/include/asm/mmu_context_32.h
+++ b/arch/x86/include/asm/mmu_context_32.h
@@ -4,8 +4,8 @@
4static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) 4static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
5{ 5{
6#ifdef CONFIG_SMP 6#ifdef CONFIG_SMP
7 if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) 7 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
8 x86_write_percpu(cpu_tlbstate.state, TLBSTATE_LAZY); 8 percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
9#endif 9#endif
10} 10}
11 11
@@ -19,8 +19,8 @@ static inline void switch_mm(struct mm_struct *prev,
19 /* stop flush ipis for the previous mm */ 19 /* stop flush ipis for the previous mm */
20 cpu_clear(cpu, prev->cpu_vm_mask); 20 cpu_clear(cpu, prev->cpu_vm_mask);
21#ifdef CONFIG_SMP 21#ifdef CONFIG_SMP
22 x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); 22 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
23 x86_write_percpu(cpu_tlbstate.active_mm, next); 23 percpu_write(cpu_tlbstate.active_mm, next);
24#endif 24#endif
25 cpu_set(cpu, next->cpu_vm_mask); 25 cpu_set(cpu, next->cpu_vm_mask);
26 26
@@ -35,8 +35,8 @@ static inline void switch_mm(struct mm_struct *prev,
35 } 35 }
36#ifdef CONFIG_SMP 36#ifdef CONFIG_SMP
37 else { 37 else {
38 x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK); 38 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
39 BUG_ON(x86_read_percpu(cpu_tlbstate.active_mm) != next); 39 BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
40 40
41 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { 41 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
42 /* We were in lazy tlb mode and leave_mm disabled 42 /* We were in lazy tlb mode and leave_mm disabled
diff --git a/arch/x86/include/asm/mmu_context_64.h b/arch/x86/include/asm/mmu_context_64.h
index 677d36e9540a..c4572505ab3e 100644
--- a/arch/x86/include/asm/mmu_context_64.h
+++ b/arch/x86/include/asm/mmu_context_64.h
@@ -1,13 +1,11 @@
1#ifndef _ASM_X86_MMU_CONTEXT_64_H 1#ifndef _ASM_X86_MMU_CONTEXT_64_H
2#define _ASM_X86_MMU_CONTEXT_64_H 2#define _ASM_X86_MMU_CONTEXT_64_H
3 3
4#include <asm/pda.h>
5
6static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) 4static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
7{ 5{
8#ifdef CONFIG_SMP 6#ifdef CONFIG_SMP
9 if (read_pda(mmu_state) == TLBSTATE_OK) 7 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
10 write_pda(mmu_state, TLBSTATE_LAZY); 8 percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
11#endif 9#endif
12} 10}
13 11
@@ -19,8 +17,8 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
19 /* stop flush ipis for the previous mm */ 17 /* stop flush ipis for the previous mm */
20 cpu_clear(cpu, prev->cpu_vm_mask); 18 cpu_clear(cpu, prev->cpu_vm_mask);
21#ifdef CONFIG_SMP 19#ifdef CONFIG_SMP
22 write_pda(mmu_state, TLBSTATE_OK); 20 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
23 write_pda(active_mm, next); 21 percpu_write(cpu_tlbstate.active_mm, next);
24#endif 22#endif
25 cpu_set(cpu, next->cpu_vm_mask); 23 cpu_set(cpu, next->cpu_vm_mask);
26 load_cr3(next->pgd); 24 load_cr3(next->pgd);
@@ -30,9 +28,9 @@ static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
30 } 28 }
31#ifdef CONFIG_SMP 29#ifdef CONFIG_SMP
32 else { 30 else {
33 write_pda(mmu_state, TLBSTATE_OK); 31 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
34 if (read_pda(active_mm) != next) 32 BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
35 BUG(); 33
36 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) { 34 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
37 /* We were in lazy tlb mode and leave_mm disabled 35 /* We were in lazy tlb mode and leave_mm disabled
38 * tlb flush IPI delivery. We must reload CR3 36 * tlb flush IPI delivery. We must reload CR3
diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h
index 59568bc4767f..4a7f96d7c188 100644
--- a/arch/x86/include/asm/mpspec_def.h
+++ b/arch/x86/include/asm/mpspec_def.h
@@ -24,17 +24,18 @@
24# endif 24# endif
25#endif 25#endif
26 26
27struct intel_mp_floating { 27/* Intel MP Floating Pointer Structure */
28 char mpf_signature[4]; /* "_MP_" */ 28struct mpf_intel {
29 unsigned int mpf_physptr; /* Configuration table address */ 29 char signature[4]; /* "_MP_" */
30 unsigned char mpf_length; /* Our length (paragraphs) */ 30 unsigned int physptr; /* Configuration table address */
31 unsigned char mpf_specification;/* Specification version */ 31 unsigned char length; /* Our length (paragraphs) */
32 unsigned char mpf_checksum; /* Checksum (makes sum 0) */ 32 unsigned char specification; /* Specification version */
33 unsigned char mpf_feature1; /* Standard or configuration ? */ 33 unsigned char checksum; /* Checksum (makes sum 0) */
34 unsigned char mpf_feature2; /* Bit7 set for IMCR|PIC */ 34 unsigned char feature1; /* Standard or configuration ? */
35 unsigned char mpf_feature3; /* Unused (0) */ 35 unsigned char feature2; /* Bit7 set for IMCR|PIC */
36 unsigned char mpf_feature4; /* Unused (0) */ 36 unsigned char feature3; /* Unused (0) */
37 unsigned char mpf_feature5; /* Unused (0) */ 37 unsigned char feature4; /* Unused (0) */
38 unsigned char feature5; /* Unused (0) */
38}; 39};
39 40
40#define MPC_SIGNATURE "PCMP" 41#define MPC_SIGNATURE "PCMP"
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index cb988aab716d..14080d22edb3 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -58,15 +58,15 @@ struct mtrr_gentry {
58#endif /* !__i386__ */ 58#endif /* !__i386__ */
59 59
60struct mtrr_var_range { 60struct mtrr_var_range {
61 u32 base_lo; 61 __u32 base_lo;
62 u32 base_hi; 62 __u32 base_hi;
63 u32 mask_lo; 63 __u32 mask_lo;
64 u32 mask_hi; 64 __u32 mask_hi;
65}; 65};
66 66
67/* In the Intel processor's MTRR interface, the MTRR type is always held in 67/* In the Intel processor's MTRR interface, the MTRR type is always held in
68 an 8 bit field: */ 68 an 8 bit field: */
69typedef u8 mtrr_type; 69typedef __u8 mtrr_type;
70 70
71#define MTRR_NUM_FIXED_RANGES 88 71#define MTRR_NUM_FIXED_RANGES 88
72#define MTRR_MAX_VAR_RANGES 256 72#define MTRR_MAX_VAR_RANGES 256
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 5ebca29f44f0..e27fdbe5f9e4 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -13,8 +13,8 @@
13#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1) 13#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
14#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER) 14#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
15 15
16#define IRQSTACK_ORDER 2 16#define IRQ_STACK_ORDER 2
17#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER) 17#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
18 18
19#define STACKFAULT_STACK 1 19#define STACKFAULT_STACK 1
20#define DOUBLEFAULT_STACK 2 20#define DOUBLEFAULT_STACK 2
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index ba3e2ff6aedc..c26c6bf4da00 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -244,7 +244,8 @@ struct pv_mmu_ops {
244 void (*flush_tlb_user)(void); 244 void (*flush_tlb_user)(void);
245 void (*flush_tlb_kernel)(void); 245 void (*flush_tlb_kernel)(void);
246 void (*flush_tlb_single)(unsigned long addr); 246 void (*flush_tlb_single)(unsigned long addr);
247 void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, 247 void (*flush_tlb_others)(const struct cpumask *cpus,
248 struct mm_struct *mm,
248 unsigned long va); 249 unsigned long va);
249 250
250 /* Hooks for allocating and freeing a pagetable top-level */ 251 /* Hooks for allocating and freeing a pagetable top-level */
@@ -984,10 +985,11 @@ static inline void __flush_tlb_single(unsigned long addr)
984 PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr); 985 PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr);
985} 986}
986 987
987static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, 988static inline void flush_tlb_others(const struct cpumask *cpumask,
989 struct mm_struct *mm,
988 unsigned long va) 990 unsigned long va)
989{ 991{
990 PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va); 992 PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, cpumask, mm, va);
991} 993}
992 994
993static inline int paravirt_pgd_alloc(struct mm_struct *mm) 995static inline int paravirt_pgd_alloc(struct mm_struct *mm)
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
index 90a8d9d4206b..c31ca048a901 100644
--- a/arch/x86/include/asm/pda.h
+++ b/arch/x86/include/asm/pda.h
@@ -5,134 +5,41 @@
5#include <linux/stddef.h> 5#include <linux/stddef.h>
6#include <linux/types.h> 6#include <linux/types.h>
7#include <linux/cache.h> 7#include <linux/cache.h>
8#include <linux/threads.h>
8#include <asm/page.h> 9#include <asm/page.h>
10#include <asm/percpu.h>
9 11
10/* Per processor datastructure. %gs points to it while the kernel runs */ 12/* Per processor datastructure. %gs points to it while the kernel runs */
11struct x8664_pda { 13struct x8664_pda {
12 struct task_struct *pcurrent; /* 0 Current process */ 14 unsigned long unused1;
13 unsigned long data_offset; /* 8 Per cpu data offset from linker 15 unsigned long unused2;
14 address */ 16 unsigned long unused3;
15 unsigned long kernelstack; /* 16 top of kernel stack for current */ 17 unsigned long unused4;
16 unsigned long oldrsp; /* 24 user rsp for system call */ 18 int unused5;
17 int irqcount; /* 32 Irq nesting counter. Starts -1 */ 19 unsigned int unused6; /* 36 was cpunumber */
18 unsigned int cpunumber; /* 36 Logical CPU number */
19#ifdef CONFIG_CC_STACKPROTECTOR 20#ifdef CONFIG_CC_STACKPROTECTOR
20 unsigned long stack_canary; /* 40 stack canary value */ 21 unsigned long stack_canary; /* 40 stack canary value */
21 /* gcc-ABI: this canary MUST be at 22 /* gcc-ABI: this canary MUST be at
22 offset 40!!! */ 23 offset 40!!! */
23#endif 24#endif
24 char *irqstackptr;
25 short nodenumber; /* number of current node (32k max) */
26 short in_bootmem; /* pda lives in bootmem */ 25 short in_bootmem; /* pda lives in bootmem */
27 unsigned int __softirq_pending;
28 unsigned int __nmi_count; /* number of NMI on this CPUs */
29 short mmu_state;
30 short isidle;
31 struct mm_struct *active_mm;
32 unsigned apic_timer_irqs;
33 unsigned apic_perf_irqs;
34 unsigned irq0_irqs;
35 unsigned irq_resched_count;
36 unsigned irq_call_count;
37 unsigned irq_tlb_count;
38 unsigned irq_thermal_count;
39 unsigned irq_threshold_count;
40 unsigned irq_spurious_count;
41} ____cacheline_aligned_in_smp; 26} ____cacheline_aligned_in_smp;
42 27
43extern struct x8664_pda **_cpu_pda; 28DECLARE_PER_CPU(struct x8664_pda, __pda);
44extern void pda_init(int); 29extern void pda_init(int);
45 30
46#define cpu_pda(i) (_cpu_pda[i]) 31#define cpu_pda(cpu) (&per_cpu(__pda, cpu))
47 32
48/* 33#define read_pda(field) percpu_read(__pda.field)
49 * There is no fast way to get the base address of the PDA, all the accesses 34#define write_pda(field, val) percpu_write(__pda.field, val)
50 * have to mention %fs/%gs. So it needs to be done this Torvaldian way. 35#define add_pda(field, val) percpu_add(__pda.field, val)
51 */ 36#define sub_pda(field, val) percpu_sub(__pda.field, val)
52extern void __bad_pda_field(void) __attribute__((noreturn)); 37#define or_pda(field, val) percpu_or(__pda.field, val)
53
54/*
55 * proxy_pda doesn't actually exist, but tell gcc it is accessed for
56 * all PDA accesses so it gets read/write dependencies right.
57 */
58extern struct x8664_pda _proxy_pda;
59
60#define pda_offset(field) offsetof(struct x8664_pda, field)
61
62#define pda_to_op(op, field, val) \
63do { \
64 typedef typeof(_proxy_pda.field) T__; \
65 if (0) { T__ tmp__; tmp__ = (val); } /* type checking */ \
66 switch (sizeof(_proxy_pda.field)) { \
67 case 2: \
68 asm(op "w %1,%%gs:%c2" : \
69 "+m" (_proxy_pda.field) : \
70 "ri" ((T__)val), \
71 "i"(pda_offset(field))); \
72 break; \
73 case 4: \
74 asm(op "l %1,%%gs:%c2" : \
75 "+m" (_proxy_pda.field) : \
76 "ri" ((T__)val), \
77 "i" (pda_offset(field))); \
78 break; \
79 case 8: \
80 asm(op "q %1,%%gs:%c2": \
81 "+m" (_proxy_pda.field) : \
82 "ri" ((T__)val), \
83 "i"(pda_offset(field))); \
84 break; \
85 default: \
86 __bad_pda_field(); \
87 } \
88} while (0)
89
90#define pda_from_op(op, field) \
91({ \
92 typeof(_proxy_pda.field) ret__; \
93 switch (sizeof(_proxy_pda.field)) { \
94 case 2: \
95 asm(op "w %%gs:%c1,%0" : \
96 "=r" (ret__) : \
97 "i" (pda_offset(field)), \
98 "m" (_proxy_pda.field)); \
99 break; \
100 case 4: \
101 asm(op "l %%gs:%c1,%0": \
102 "=r" (ret__): \
103 "i" (pda_offset(field)), \
104 "m" (_proxy_pda.field)); \
105 break; \
106 case 8: \
107 asm(op "q %%gs:%c1,%0": \
108 "=r" (ret__) : \
109 "i" (pda_offset(field)), \
110 "m" (_proxy_pda.field)); \
111 break; \
112 default: \
113 __bad_pda_field(); \
114 } \
115 ret__; \
116})
117
118#define read_pda(field) pda_from_op("mov", field)
119#define write_pda(field, val) pda_to_op("mov", field, val)
120#define add_pda(field, val) pda_to_op("add", field, val)
121#define sub_pda(field, val) pda_to_op("sub", field, val)
122#define or_pda(field, val) pda_to_op("or", field, val)
123 38
124/* This is not atomic against other CPUs -- CPU preemption needs to be off */ 39/* This is not atomic against other CPUs -- CPU preemption needs to be off */
125#define test_and_clear_bit_pda(bit, field) \ 40#define test_and_clear_bit_pda(bit, field) \
126({ \ 41 x86_test_and_clear_bit_percpu(bit, __pda.field)
127 int old__; \
128 asm volatile("btr %2,%%gs:%c3\n\tsbbl %0,%0" \
129 : "=r" (old__), "+m" (_proxy_pda.field) \
130 : "dIr" (bit), "i" (pda_offset(field)) : "memory");\
131 old__; \
132})
133 42
134#endif 43#endif
135 44
136#define PDA_STACKOFFSET (5*8)
137
138#endif /* _ASM_X86_PDA_H */ 45#endif /* _ASM_X86_PDA_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index ece72053ba63..165d5272ece1 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -2,53 +2,12 @@
2#define _ASM_X86_PERCPU_H 2#define _ASM_X86_PERCPU_H
3 3
4#ifdef CONFIG_X86_64 4#ifdef CONFIG_X86_64
5#include <linux/compiler.h> 5#define __percpu_seg gs
6 6#define __percpu_mov_op movq
7/* Same as asm-generic/percpu.h, except that we store the per cpu offset 7#else
8 in the PDA. Longer term the PDA and every per cpu variable 8#define __percpu_seg fs
9 should be just put into a single section and referenced directly 9#define __percpu_mov_op movl
10 from %gs */
11
12#ifdef CONFIG_SMP
13#include <asm/pda.h>
14
15#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
16#define __my_cpu_offset read_pda(data_offset)
17
18#define per_cpu_offset(x) (__per_cpu_offset(x))
19
20#endif 10#endif
21#include <asm-generic/percpu.h>
22
23DECLARE_PER_CPU(struct x8664_pda, pda);
24
25/*
26 * These are supposed to be implemented as a single instruction which
27 * operates on the per-cpu data base segment. x86-64 doesn't have
28 * that yet, so this is a fairly inefficient workaround for the
29 * meantime. The single instruction is atomic with respect to
30 * preemption and interrupts, so we need to explicitly disable
31 * interrupts here to achieve the same effect. However, because it
32 * can be used from within interrupt-disable/enable, we can't actually
33 * disable interrupts; disabling preemption is enough.
34 */
35#define x86_read_percpu(var) \
36 ({ \
37 typeof(per_cpu_var(var)) __tmp; \
38 preempt_disable(); \
39 __tmp = __get_cpu_var(var); \
40 preempt_enable(); \
41 __tmp; \
42 })
43
44#define x86_write_percpu(var, val) \
45 do { \
46 preempt_disable(); \
47 __get_cpu_var(var) = (val); \
48 preempt_enable(); \
49 } while(0)
50
51#else /* CONFIG_X86_64 */
52 11
53#ifdef __ASSEMBLY__ 12#ifdef __ASSEMBLY__
54 13
@@ -65,47 +24,26 @@ DECLARE_PER_CPU(struct x8664_pda, pda);
65 * PER_CPU(cpu_gdt_descr, %ebx) 24 * PER_CPU(cpu_gdt_descr, %ebx)
66 */ 25 */
67#ifdef CONFIG_SMP 26#ifdef CONFIG_SMP
68#define PER_CPU(var, reg) \ 27#define PER_CPU(var, reg) \
69 movl %fs:per_cpu__##this_cpu_off, reg; \ 28 __percpu_mov_op %__percpu_seg:per_cpu__this_cpu_off, reg; \
70 lea per_cpu__##var(reg), reg 29 lea per_cpu__##var(reg), reg
71#define PER_CPU_VAR(var) %fs:per_cpu__##var 30#define PER_CPU_VAR(var) %__percpu_seg:per_cpu__##var
72#else /* ! SMP */ 31#else /* ! SMP */
73#define PER_CPU(var, reg) \ 32#define PER_CPU(var, reg) \
74 movl $per_cpu__##var, reg 33 __percpu_mov_op $per_cpu__##var, reg
75#define PER_CPU_VAR(var) per_cpu__##var 34#define PER_CPU_VAR(var) per_cpu__##var
76#endif /* SMP */ 35#endif /* SMP */
77 36
78#else /* ...!ASSEMBLY */ 37#else /* ...!ASSEMBLY */
79 38
80/* 39#include <linux/stringify.h>
81 * PER_CPU finds an address of a per-cpu variable.
82 *
83 * Args:
84 * var - variable name
85 * cpu - 32bit register containing the current CPU number
86 *
87 * The resulting address is stored in the "cpu" argument.
88 *
89 * Example:
90 * PER_CPU(cpu_gdt_descr, %ebx)
91 */
92#ifdef CONFIG_SMP
93
94#define __my_cpu_offset x86_read_percpu(this_cpu_off)
95
96/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */
97#define __percpu_seg "%%fs:"
98
99#else /* !SMP */
100 40
101#define __percpu_seg "" 41#ifdef CONFIG_SMP
102 42#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x
103#endif /* SMP */ 43#define __my_cpu_offset percpu_read(this_cpu_off)
104 44#else
105#include <asm-generic/percpu.h> 45#define __percpu_arg(x) "%" #x
106 46#endif
107/* We can use this directly for local CPU (faster). */
108DECLARE_PER_CPU(unsigned long, this_cpu_off);
109 47
110/* For arch-specific code, we can use direct single-insn ops (they 48/* For arch-specific code, we can use direct single-insn ops (they
111 * don't give an lvalue though). */ 49 * don't give an lvalue though). */
@@ -120,20 +58,25 @@ do { \
120 } \ 58 } \
121 switch (sizeof(var)) { \ 59 switch (sizeof(var)) { \
122 case 1: \ 60 case 1: \
123 asm(op "b %1,"__percpu_seg"%0" \ 61 asm(op "b %1,"__percpu_arg(0) \
124 : "+m" (var) \ 62 : "+m" (var) \
125 : "ri" ((T__)val)); \ 63 : "ri" ((T__)val)); \
126 break; \ 64 break; \
127 case 2: \ 65 case 2: \
128 asm(op "w %1,"__percpu_seg"%0" \ 66 asm(op "w %1,"__percpu_arg(0) \
129 : "+m" (var) \ 67 : "+m" (var) \
130 : "ri" ((T__)val)); \ 68 : "ri" ((T__)val)); \
131 break; \ 69 break; \
132 case 4: \ 70 case 4: \
133 asm(op "l %1,"__percpu_seg"%0" \ 71 asm(op "l %1,"__percpu_arg(0) \
134 : "+m" (var) \ 72 : "+m" (var) \
135 : "ri" ((T__)val)); \ 73 : "ri" ((T__)val)); \
136 break; \ 74 break; \
75 case 8: \
76 asm(op "q %1,"__percpu_arg(0) \
77 : "+m" (var) \
78 : "r" ((T__)val)); \
79 break; \
137 default: __bad_percpu_size(); \ 80 default: __bad_percpu_size(); \
138 } \ 81 } \
139} while (0) 82} while (0)
@@ -143,17 +86,22 @@ do { \
143 typeof(var) ret__; \ 86 typeof(var) ret__; \
144 switch (sizeof(var)) { \ 87 switch (sizeof(var)) { \
145 case 1: \ 88 case 1: \
146 asm(op "b "__percpu_seg"%1,%0" \ 89 asm(op "b "__percpu_arg(1)",%0" \
147 : "=r" (ret__) \ 90 : "=r" (ret__) \
148 : "m" (var)); \ 91 : "m" (var)); \
149 break; \ 92 break; \
150 case 2: \ 93 case 2: \
151 asm(op "w "__percpu_seg"%1,%0" \ 94 asm(op "w "__percpu_arg(1)",%0" \
152 : "=r" (ret__) \ 95 : "=r" (ret__) \
153 : "m" (var)); \ 96 : "m" (var)); \
154 break; \ 97 break; \
155 case 4: \ 98 case 4: \
156 asm(op "l "__percpu_seg"%1,%0" \ 99 asm(op "l "__percpu_arg(1)",%0" \
100 : "=r" (ret__) \
101 : "m" (var)); \
102 break; \
103 case 8: \
104 asm(op "q "__percpu_arg(1)",%0" \
157 : "=r" (ret__) \ 105 : "=r" (ret__) \
158 : "m" (var)); \ 106 : "m" (var)); \
159 break; \ 107 break; \
@@ -162,13 +110,36 @@ do { \
162 ret__; \ 110 ret__; \
163}) 111})
164 112
165#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var) 113#define percpu_read(var) percpu_from_op("mov", per_cpu__##var)
166#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu__##var, val) 114#define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val)
167#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val) 115#define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val)
168#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val) 116#define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val)
169#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) 117#define percpu_and(var, val) percpu_to_op("and", per_cpu__##var, val)
118#define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val)
119#define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val)
120
121/* This is not atomic against other CPUs -- CPU preemption needs to be off */
122#define x86_test_and_clear_bit_percpu(bit, var) \
123({ \
124 int old__; \
125 asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0" \
126 : "=r" (old__), "+m" (per_cpu__##var) \
127 : "dIr" (bit)); \
128 old__; \
129})
130
131#include <asm-generic/percpu.h>
132
133/* We can use this directly for local CPU (faster). */
134DECLARE_PER_CPU(unsigned long, this_cpu_off);
135
136#ifdef CONFIG_X86_64
137extern void load_pda_offset(int cpu);
138#else
139static inline void load_pda_offset(int cpu) { }
140#endif
141
170#endif /* !__ASSEMBLY__ */ 142#endif /* !__ASSEMBLY__ */
171#endif /* !CONFIG_X86_64 */
172 143
173#ifdef CONFIG_SMP 144#ifdef CONFIG_SMP
174 145
@@ -195,9 +166,9 @@ do { \
195#define early_per_cpu_ptr(_name) (_name##_early_ptr) 166#define early_per_cpu_ptr(_name) (_name##_early_ptr)
196#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx]) 167#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx])
197#define early_per_cpu(_name, _cpu) \ 168#define early_per_cpu(_name, _cpu) \
198 (early_per_cpu_ptr(_name) ? \ 169 *(early_per_cpu_ptr(_name) ? \
199 early_per_cpu_ptr(_name)[_cpu] : \ 170 &early_per_cpu_ptr(_name)[_cpu] : \
200 per_cpu(_name, _cpu)) 171 &per_cpu(_name, _cpu))
201 172
202#else /* !CONFIG_SMP */ 173#else /* !CONFIG_SMP */
203#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ 174#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 83e69f4a37f0..06bbcbd66e9c 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -341,6 +341,25 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
341 341
342#define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) 342#define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask)
343 343
344static inline int is_new_memtype_allowed(unsigned long flags,
345 unsigned long new_flags)
346{
347 /*
348 * Certain new memtypes are not allowed with certain
349 * requested memtype:
350 * - request is uncached, return cannot be write-back
351 * - request is write-combine, return cannot be write-back
352 */
353 if ((flags == _PAGE_CACHE_UC_MINUS &&
354 new_flags == _PAGE_CACHE_WB) ||
355 (flags == _PAGE_CACHE_WC &&
356 new_flags == _PAGE_CACHE_WB)) {
357 return 0;
358 }
359
360 return 1;
361}
362
344#ifndef __ASSEMBLY__ 363#ifndef __ASSEMBLY__
345/* Indicate that x86 has its own track and untrack pfn vma functions */ 364/* Indicate that x86 has its own track and untrack pfn vma functions */
346#define __HAVE_PFNMAP_TRACKING 365#define __HAVE_PFNMAP_TRACKING
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 091cd8855f2e..f511246fa6cd 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -378,6 +378,9 @@ union thread_xstate {
378 378
379#ifdef CONFIG_X86_64 379#ifdef CONFIG_X86_64
380DECLARE_PER_CPU(struct orig_ist, orig_ist); 380DECLARE_PER_CPU(struct orig_ist, orig_ist);
381
382DECLARE_PER_CPU(char[IRQ_STACK_SIZE], irq_stack);
383DECLARE_PER_CPU(char *, irq_stack_ptr);
381#endif 384#endif
382 385
383extern void print_cpu_info(struct cpuinfo_x86 *); 386extern void print_cpu_info(struct cpuinfo_x86 *);
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index ebe858cdc8a3..536949749bc2 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -100,7 +100,6 @@ extern unsigned long init_pg_tables_start;
100extern unsigned long init_pg_tables_end; 100extern unsigned long init_pg_tables_end;
101 101
102#else 102#else
103void __init x86_64_init_pda(void);
104void __init x86_64_start_kernel(char *real_mode); 103void __init x86_64_start_kernel(char *real_mode);
105void __init x86_64_start_reservations(char *real_mode_data); 104void __init x86_64_start_reservations(char *real_mode_data);
106 105
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 19953df61c52..68636e767a91 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -17,32 +17,7 @@
17#endif 17#endif
18#include <asm/pda.h> 18#include <asm/pda.h>
19#include <asm/thread_info.h> 19#include <asm/thread_info.h>
20 20#include <asm/cpumask.h>
21#ifdef CONFIG_X86_64
22
23extern cpumask_var_t cpu_callin_mask;
24extern cpumask_var_t cpu_callout_mask;
25extern cpumask_var_t cpu_initialized_mask;
26extern cpumask_var_t cpu_sibling_setup_mask;
27
28#else /* CONFIG_X86_32 */
29
30extern cpumask_t cpu_callin_map;
31extern cpumask_t cpu_callout_map;
32extern cpumask_t cpu_initialized;
33extern cpumask_t cpu_sibling_setup_map;
34
35#define cpu_callin_mask ((struct cpumask *)&cpu_callin_map)
36#define cpu_callout_mask ((struct cpumask *)&cpu_callout_map)
37#define cpu_initialized_mask ((struct cpumask *)&cpu_initialized)
38#define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map)
39
40#endif /* CONFIG_X86_32 */
41
42extern void (*mtrr_hook)(void);
43extern void zap_low_mappings(void);
44
45extern int __cpuinit get_local_pda(int cpu);
46 21
47extern int smp_num_siblings; 22extern int smp_num_siblings;
48extern unsigned int num_processors; 23extern unsigned int num_processors;
@@ -50,9 +25,7 @@ extern unsigned int num_processors;
50DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); 25DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
51DECLARE_PER_CPU(cpumask_t, cpu_core_map); 26DECLARE_PER_CPU(cpumask_t, cpu_core_map);
52DECLARE_PER_CPU(u16, cpu_llc_id); 27DECLARE_PER_CPU(u16, cpu_llc_id);
53#ifdef CONFIG_X86_32
54DECLARE_PER_CPU(int, cpu_number); 28DECLARE_PER_CPU(int, cpu_number);
55#endif
56 29
57static inline struct cpumask *cpu_sibling_mask(int cpu) 30static inline struct cpumask *cpu_sibling_mask(int cpu)
58{ 31{
@@ -167,8 +140,6 @@ void play_dead_common(void);
167void native_send_call_func_ipi(const struct cpumask *mask); 140void native_send_call_func_ipi(const struct cpumask *mask);
168void native_send_call_func_single_ipi(int cpu); 141void native_send_call_func_single_ipi(int cpu);
169 142
170extern void prefill_possible_map(void);
171
172void smp_store_cpu_info(int id); 143void smp_store_cpu_info(int id);
173#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) 144#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
174 145
@@ -177,10 +148,6 @@ static inline int num_booting_cpus(void)
177{ 148{
178 return cpumask_weight(cpu_callout_mask); 149 return cpumask_weight(cpu_callout_mask);
179} 150}
180#else
181static inline void prefill_possible_map(void)
182{
183}
184#endif /* CONFIG_SMP */ 151#endif /* CONFIG_SMP */
185 152
186extern unsigned disabled_cpus __cpuinitdata; 153extern unsigned disabled_cpus __cpuinitdata;
@@ -191,11 +158,11 @@ extern unsigned disabled_cpus __cpuinitdata;
191 * from the initial startup. We map APIC_BASE very early in page_setup(), 158 * from the initial startup. We map APIC_BASE very early in page_setup(),
192 * so this is correct in the x86 case. 159 * so this is correct in the x86 case.
193 */ 160 */
194#define raw_smp_processor_id() (x86_read_percpu(cpu_number)) 161#define raw_smp_processor_id() (percpu_read(cpu_number))
195extern int safe_smp_processor_id(void); 162extern int safe_smp_processor_id(void);
196 163
197#elif defined(CONFIG_X86_64_SMP) 164#elif defined(CONFIG_X86_64_SMP)
198#define raw_smp_processor_id() read_pda(cpunumber) 165#define raw_smp_processor_id() (percpu_read(cpu_number))
199 166
200#define stack_smp_processor_id() \ 167#define stack_smp_processor_id() \
201({ \ 168({ \
@@ -205,10 +172,6 @@ extern int safe_smp_processor_id(void);
205}) 172})
206#define safe_smp_processor_id() smp_processor_id() 173#define safe_smp_processor_id() smp_processor_id()
207 174
208#else /* !CONFIG_X86_32_SMP && !CONFIG_X86_64_SMP */
209#define cpu_physical_id(cpu) boot_cpu_physical_apicid
210#define safe_smp_processor_id() 0
211#define stack_smp_processor_id() 0
212#endif 175#endif
213 176
214#ifdef CONFIG_X86_LOCAL_APIC 177#ifdef CONFIG_X86_LOCAL_APIC
@@ -251,11 +214,5 @@ static inline int hard_smp_processor_id(void)
251 214
252#endif /* CONFIG_X86_LOCAL_APIC */ 215#endif /* CONFIG_X86_LOCAL_APIC */
253 216
254#ifdef CONFIG_X86_HAS_BOOT_CPU_ID
255extern unsigned char boot_cpu_id;
256#else
257#define boot_cpu_id 0
258#endif
259
260#endif /* __ASSEMBLY__ */ 217#endif /* __ASSEMBLY__ */
261#endif /* _ASM_X86_SMP_H */ 218#endif /* _ASM_X86_SMP_H */
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 8e626ea33a1a..d1dc27dba36d 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -94,7 +94,7 @@ do { \
94 "call __switch_to\n\t" \ 94 "call __switch_to\n\t" \
95 ".globl thread_return\n" \ 95 ".globl thread_return\n" \
96 "thread_return:\n\t" \ 96 "thread_return:\n\t" \
97 "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ 97 "movq "__percpu_arg([current_task])",%%rsi\n\t" \
98 "movq %P[thread_info](%%rsi),%%r8\n\t" \ 98 "movq %P[thread_info](%%rsi),%%r8\n\t" \
99 LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ 99 LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \
100 "movq %%rax,%%rdi\n\t" \ 100 "movq %%rax,%%rdi\n\t" \
@@ -106,7 +106,7 @@ do { \
106 [ti_flags] "i" (offsetof(struct thread_info, flags)), \ 106 [ti_flags] "i" (offsetof(struct thread_info, flags)), \
107 [tif_fork] "i" (TIF_FORK), \ 107 [tif_fork] "i" (TIF_FORK), \
108 [thread_info] "i" (offsetof(struct task_struct, stack)), \ 108 [thread_info] "i" (offsetof(struct task_struct, stack)), \
109 [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ 109 [current_task] "m" (per_cpu_var(current_task)) \
110 : "memory", "cc" __EXTRA_CLOBBER) 110 : "memory", "cc" __EXTRA_CLOBBER)
111#endif 111#endif
112 112
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index efdf93820aed..f38488989db7 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -196,25 +196,21 @@ static inline struct thread_info *current_thread_info(void)
196 196
197#else /* X86_32 */ 197#else /* X86_32 */
198 198
199#include <asm/pda.h> 199#include <asm/percpu.h>
200#define KERNEL_STACK_OFFSET (5*8)
200 201
201/* 202/*
202 * macros/functions for gaining access to the thread information structure 203 * macros/functions for gaining access to the thread information structure
203 * preempt_count needs to be 1 initially, until the scheduler is functional. 204 * preempt_count needs to be 1 initially, until the scheduler is functional.
204 */ 205 */
205#ifndef __ASSEMBLY__ 206#ifndef __ASSEMBLY__
206static inline struct thread_info *current_thread_info(void) 207DECLARE_PER_CPU(unsigned long, kernel_stack);
207{
208 struct thread_info *ti;
209 ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE);
210 return ti;
211}
212 208
213/* do not use in interrupt context */ 209static inline struct thread_info *current_thread_info(void)
214static inline struct thread_info *stack_thread_info(void)
215{ 210{
216 struct thread_info *ti; 211 struct thread_info *ti;
217 asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1))); 212 ti = (void *)(percpu_read(kernel_stack) +
213 KERNEL_STACK_OFFSET - THREAD_SIZE);
218 return ti; 214 return ti;
219} 215}
220 216
@@ -222,8 +218,8 @@ static inline struct thread_info *stack_thread_info(void)
222 218
223/* how to get the thread information struct from ASM */ 219/* how to get the thread information struct from ASM */
224#define GET_THREAD_INFO(reg) \ 220#define GET_THREAD_INFO(reg) \
225 movq %gs:pda_kernelstack,reg ; \ 221 movq PER_CPU_VAR(kernel_stack),reg ; \
226 subq $(THREAD_SIZE-PDA_STACKOFFSET),reg 222 subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg
227 223
228#endif 224#endif
229 225
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 0e7bbb549116..d3539f998f88 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -113,7 +113,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
113 __flush_tlb(); 113 __flush_tlb();
114} 114}
115 115
116static inline void native_flush_tlb_others(const cpumask_t *cpumask, 116static inline void native_flush_tlb_others(const struct cpumask *cpumask,
117 struct mm_struct *mm, 117 struct mm_struct *mm,
118 unsigned long va) 118 unsigned long va)
119{ 119{
@@ -142,31 +142,28 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
142 flush_tlb_mm(vma->vm_mm); 142 flush_tlb_mm(vma->vm_mm);
143} 143}
144 144
145void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm, 145void native_flush_tlb_others(const struct cpumask *cpumask,
146 unsigned long va); 146 struct mm_struct *mm, unsigned long va);
147 147
148#define TLBSTATE_OK 1 148#define TLBSTATE_OK 1
149#define TLBSTATE_LAZY 2 149#define TLBSTATE_LAZY 2
150 150
151#ifdef CONFIG_X86_32
152struct tlb_state { 151struct tlb_state {
153 struct mm_struct *active_mm; 152 struct mm_struct *active_mm;
154 int state; 153 int state;
155 char __cacheline_padding[L1_CACHE_BYTES-8];
156}; 154};
157DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); 155DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
158 156
159void reset_lazy_tlbstate(void);
160#else
161static inline void reset_lazy_tlbstate(void) 157static inline void reset_lazy_tlbstate(void)
162{ 158{
159 percpu_write(cpu_tlbstate.state, 0);
160 percpu_write(cpu_tlbstate.active_mm, &init_mm);
163} 161}
164#endif
165 162
166#endif /* SMP */ 163#endif /* SMP */
167 164
168#ifndef CONFIG_PARAVIRT 165#ifndef CONFIG_PARAVIRT
169#define flush_tlb_others(mask, mm, va) native_flush_tlb_others(&mask, mm, va) 166#define flush_tlb_others(mask, mm, va) native_flush_tlb_others(mask, mm, va)
170#endif 167#endif
171 168
172static inline void flush_tlb_kernel_range(unsigned long start, 169static inline void flush_tlb_kernel_range(unsigned long start,
@@ -175,4 +172,6 @@ static inline void flush_tlb_kernel_range(unsigned long start,
175 flush_tlb_all(); 172 flush_tlb_all();
176} 173}
177 174
175extern void zap_low_mappings(void);
176
178#endif /* _ASM_X86_TLBFLUSH_H */ 177#endif /* _ASM_X86_TLBFLUSH_H */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 4e2f2e0aab27..ffea1fe03a99 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -83,7 +83,8 @@ extern cpumask_t *node_to_cpumask_map;
83DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); 83DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
84 84
85/* Returns the number of the current Node. */ 85/* Returns the number of the current Node. */
86#define numa_node_id() read_pda(nodenumber) 86DECLARE_PER_CPU(int, node_number);
87#define numa_node_id() percpu_read(node_number)
87 88
88#ifdef CONFIG_DEBUG_PER_CPU_MAPS 89#ifdef CONFIG_DEBUG_PER_CPU_MAPS
89extern int cpu_to_node(int cpu); 90extern int cpu_to_node(int cpu);
@@ -102,10 +103,7 @@ static inline int cpu_to_node(int cpu)
102/* Same function but used if called before per_cpu areas are setup */ 103/* Same function but used if called before per_cpu areas are setup */
103static inline int early_cpu_to_node(int cpu) 104static inline int early_cpu_to_node(int cpu)
104{ 105{
105 if (early_per_cpu_ptr(x86_cpu_to_node_map)) 106 return early_per_cpu(x86_cpu_to_node_map, cpu);
106 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
107
108 return per_cpu(x86_cpu_to_node_map, cpu);
109} 107}
110 108
111/* Returns a pointer to the cpumask of CPUs on Node 'node'. */ 109/* Returns a pointer to the cpumask of CPUs on Node 'node'. */
diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h
index 780ba0ab94f9..90f06c25221d 100644
--- a/arch/x86/include/asm/trampoline.h
+++ b/arch/x86/include/asm/trampoline.h
@@ -13,6 +13,7 @@ extern unsigned char *trampoline_base;
13 13
14extern unsigned long init_rsp; 14extern unsigned long init_rsp;
15extern unsigned long initial_code; 15extern unsigned long initial_code;
16extern unsigned long initial_gs;
16 17
17#define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE) 18#define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE)
18#define TRAMPOLINE_BASE 0x6000 19#define TRAMPOLINE_BASE 0x6000
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 50423c7b56b2..74e6393bfddb 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -325,7 +325,8 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
325#define cpubit_isset(cpu, bau_local_cpumask) \ 325#define cpubit_isset(cpu, bau_local_cpumask) \
326 test_bit((cpu), (bau_local_cpumask).bits) 326 test_bit((cpu), (bau_local_cpumask).bits)
327 327
328extern int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long); 328extern int uv_flush_tlb_others(struct cpumask *,
329 struct mm_struct *, unsigned long);
329extern void uv_bau_message_intr1(void); 330extern void uv_bau_message_intr1(void);
330extern void uv_bau_timeout_intr1(void); 331extern void uv_bau_timeout_intr1(void);
331 332
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index d37593c2f438..4cb5964f1499 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -912,8 +912,8 @@ static u8 __init uniq_ioapic_id(u8 id)
912 DECLARE_BITMAP(used, 256); 912 DECLARE_BITMAP(used, 256);
913 bitmap_zero(used, 256); 913 bitmap_zero(used, 256);
914 for (i = 0; i < nr_ioapics; i++) { 914 for (i = 0; i < nr_ioapics; i++) {
915 struct mp_config_ioapic *ia = &mp_ioapics[i]; 915 struct mpc_ioapic *ia = &mp_ioapics[i];
916 __set_bit(ia->mp_apicid, used); 916 __set_bit(ia->apicid, used);
917 } 917 }
918 if (!test_bit(id, used)) 918 if (!test_bit(id, used))
919 return id; 919 return id;
@@ -945,47 +945,47 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
945 945
946 idx = nr_ioapics; 946 idx = nr_ioapics;
947 947
948 mp_ioapics[idx].mp_type = MP_IOAPIC; 948 mp_ioapics[idx].type = MP_IOAPIC;
949 mp_ioapics[idx].mp_flags = MPC_APIC_USABLE; 949 mp_ioapics[idx].flags = MPC_APIC_USABLE;
950 mp_ioapics[idx].mp_apicaddr = address; 950 mp_ioapics[idx].apicaddr = address;
951 951
952 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); 952 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
953 mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id); 953 mp_ioapics[idx].apicid = uniq_ioapic_id(id);
954#ifdef CONFIG_X86_32 954#ifdef CONFIG_X86_32
955 mp_ioapics[idx].mp_apicver = io_apic_get_version(idx); 955 mp_ioapics[idx].apicver = io_apic_get_version(idx);
956#else 956#else
957 mp_ioapics[idx].mp_apicver = 0; 957 mp_ioapics[idx].apicver = 0;
958#endif 958#endif
959 /* 959 /*
960 * Build basic GSI lookup table to facilitate gsi->io_apic lookups 960 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
961 * and to prevent reprogramming of IOAPIC pins (PCI GSIs). 961 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
962 */ 962 */
963 mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid; 963 mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].apicid;
964 mp_ioapic_routing[idx].gsi_base = gsi_base; 964 mp_ioapic_routing[idx].gsi_base = gsi_base;
965 mp_ioapic_routing[idx].gsi_end = gsi_base + 965 mp_ioapic_routing[idx].gsi_end = gsi_base +
966 io_apic_get_redir_entries(idx); 966 io_apic_get_redir_entries(idx);
967 967
968 printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " 968 printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
969 "GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid, 969 "GSI %d-%d\n", idx, mp_ioapics[idx].apicid,
970 mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr, 970 mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr,
971 mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); 971 mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
972 972
973 nr_ioapics++; 973 nr_ioapics++;
974} 974}
975 975
976static void assign_to_mp_irq(struct mp_config_intsrc *m, 976static void assign_to_mp_irq(struct mpc_intsrc *m,
977 struct mp_config_intsrc *mp_irq) 977 struct mpc_intsrc *mp_irq)
978{ 978{
979 memcpy(mp_irq, m, sizeof(struct mp_config_intsrc)); 979 memcpy(mp_irq, m, sizeof(struct mpc_intsrc));
980} 980}
981 981
982static int mp_irq_cmp(struct mp_config_intsrc *mp_irq, 982static int mp_irq_cmp(struct mpc_intsrc *mp_irq,
983 struct mp_config_intsrc *m) 983 struct mpc_intsrc *m)
984{ 984{
985 return memcmp(mp_irq, m, sizeof(struct mp_config_intsrc)); 985 return memcmp(mp_irq, m, sizeof(struct mpc_intsrc));
986} 986}
987 987
988static void save_mp_irq(struct mp_config_intsrc *m) 988static void save_mp_irq(struct mpc_intsrc *m)
989{ 989{
990 int i; 990 int i;
991 991
@@ -1003,7 +1003,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
1003{ 1003{
1004 int ioapic; 1004 int ioapic;
1005 int pin; 1005 int pin;
1006 struct mp_config_intsrc mp_irq; 1006 struct mpc_intsrc mp_irq;
1007 1007
1008 /* 1008 /*
1009 * Convert 'gsi' to 'ioapic.pin'. 1009 * Convert 'gsi' to 'ioapic.pin'.
@@ -1021,13 +1021,13 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
1021 if ((bus_irq == 0) && (trigger == 3)) 1021 if ((bus_irq == 0) && (trigger == 3))
1022 trigger = 1; 1022 trigger = 1;
1023 1023
1024 mp_irq.mp_type = MP_INTSRC; 1024 mp_irq.type = MP_INTSRC;
1025 mp_irq.mp_irqtype = mp_INT; 1025 mp_irq.irqtype = mp_INT;
1026 mp_irq.mp_irqflag = (trigger << 2) | polarity; 1026 mp_irq.irqflag = (trigger << 2) | polarity;
1027 mp_irq.mp_srcbus = MP_ISA_BUS; 1027 mp_irq.srcbus = MP_ISA_BUS;
1028 mp_irq.mp_srcbusirq = bus_irq; /* IRQ */ 1028 mp_irq.srcbusirq = bus_irq; /* IRQ */
1029 mp_irq.mp_dstapic = mp_ioapics[ioapic].mp_apicid; /* APIC ID */ 1029 mp_irq.dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */
1030 mp_irq.mp_dstirq = pin; /* INTIN# */ 1030 mp_irq.dstirq = pin; /* INTIN# */
1031 1031
1032 save_mp_irq(&mp_irq); 1032 save_mp_irq(&mp_irq);
1033} 1033}
@@ -1037,7 +1037,7 @@ void __init mp_config_acpi_legacy_irqs(void)
1037 int i; 1037 int i;
1038 int ioapic; 1038 int ioapic;
1039 unsigned int dstapic; 1039 unsigned int dstapic;
1040 struct mp_config_intsrc mp_irq; 1040 struct mpc_intsrc mp_irq;
1041 1041
1042#if defined (CONFIG_MCA) || defined (CONFIG_EISA) 1042#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
1043 /* 1043 /*
@@ -1062,7 +1062,7 @@ void __init mp_config_acpi_legacy_irqs(void)
1062 ioapic = mp_find_ioapic(0); 1062 ioapic = mp_find_ioapic(0);
1063 if (ioapic < 0) 1063 if (ioapic < 0)
1064 return; 1064 return;
1065 dstapic = mp_ioapics[ioapic].mp_apicid; 1065 dstapic = mp_ioapics[ioapic].apicid;
1066 1066
1067 /* 1067 /*
1068 * Use the default configuration for the IRQs 0-15. Unless 1068 * Use the default configuration for the IRQs 0-15. Unless
@@ -1072,16 +1072,14 @@ void __init mp_config_acpi_legacy_irqs(void)
1072 int idx; 1072 int idx;
1073 1073
1074 for (idx = 0; idx < mp_irq_entries; idx++) { 1074 for (idx = 0; idx < mp_irq_entries; idx++) {
1075 struct mp_config_intsrc *irq = mp_irqs + idx; 1075 struct mpc_intsrc *irq = mp_irqs + idx;
1076 1076
1077 /* Do we already have a mapping for this ISA IRQ? */ 1077 /* Do we already have a mapping for this ISA IRQ? */
1078 if (irq->mp_srcbus == MP_ISA_BUS 1078 if (irq->srcbus == MP_ISA_BUS && irq->srcbusirq == i)
1079 && irq->mp_srcbusirq == i)
1080 break; 1079 break;
1081 1080
1082 /* Do we already have a mapping for this IOAPIC pin */ 1081 /* Do we already have a mapping for this IOAPIC pin */
1083 if (irq->mp_dstapic == dstapic && 1082 if (irq->dstapic == dstapic && irq->dstirq == i)
1084 irq->mp_dstirq == i)
1085 break; 1083 break;
1086 } 1084 }
1087 1085
@@ -1090,13 +1088,13 @@ void __init mp_config_acpi_legacy_irqs(void)
1090 continue; /* IRQ already used */ 1088 continue; /* IRQ already used */
1091 } 1089 }
1092 1090
1093 mp_irq.mp_type = MP_INTSRC; 1091 mp_irq.type = MP_INTSRC;
1094 mp_irq.mp_irqflag = 0; /* Conforming */ 1092 mp_irq.irqflag = 0; /* Conforming */
1095 mp_irq.mp_srcbus = MP_ISA_BUS; 1093 mp_irq.srcbus = MP_ISA_BUS;
1096 mp_irq.mp_dstapic = dstapic; 1094 mp_irq.dstapic = dstapic;
1097 mp_irq.mp_irqtype = mp_INT; 1095 mp_irq.irqtype = mp_INT;
1098 mp_irq.mp_srcbusirq = i; /* Identity mapped */ 1096 mp_irq.srcbusirq = i; /* Identity mapped */
1099 mp_irq.mp_dstirq = i; 1097 mp_irq.dstirq = i;
1100 1098
1101 save_mp_irq(&mp_irq); 1099 save_mp_irq(&mp_irq);
1102 } 1100 }
@@ -1207,22 +1205,22 @@ int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
1207 u32 gsi, int triggering, int polarity) 1205 u32 gsi, int triggering, int polarity)
1208{ 1206{
1209#ifdef CONFIG_X86_MPPARSE 1207#ifdef CONFIG_X86_MPPARSE
1210 struct mp_config_intsrc mp_irq; 1208 struct mpc_intsrc mp_irq;
1211 int ioapic; 1209 int ioapic;
1212 1210
1213 if (!acpi_ioapic) 1211 if (!acpi_ioapic)
1214 return 0; 1212 return 0;
1215 1213
1216 /* print the entry should happen on mptable identically */ 1214 /* print the entry should happen on mptable identically */
1217 mp_irq.mp_type = MP_INTSRC; 1215 mp_irq.type = MP_INTSRC;
1218 mp_irq.mp_irqtype = mp_INT; 1216 mp_irq.irqtype = mp_INT;
1219 mp_irq.mp_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | 1217 mp_irq.irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
1220 (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); 1218 (polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
1221 mp_irq.mp_srcbus = number; 1219 mp_irq.srcbus = number;
1222 mp_irq.mp_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); 1220 mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
1223 ioapic = mp_find_ioapic(gsi); 1221 ioapic = mp_find_ioapic(gsi);
1224 mp_irq.mp_dstapic = mp_ioapic_routing[ioapic].apic_id; 1222 mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id;
1225 mp_irq.mp_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base; 1223 mp_irq.dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base;
1226 1224
1227 save_mp_irq(&mp_irq); 1225 save_mp_irq(&mp_irq);
1228#endif 1226#endif
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 707c1f6f95fa..4abff454c55b 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -101,6 +101,7 @@ int acpi_save_state_mem(void)
101 stack_start.sp = temp_stack + sizeof(temp_stack); 101 stack_start.sp = temp_stack + sizeof(temp_stack);
102 early_gdt_descr.address = 102 early_gdt_descr.address =
103 (unsigned long)get_cpu_gdt_table(smp_processor_id()); 103 (unsigned long)get_cpu_gdt_table(smp_processor_id());
104 initial_gs = per_cpu_offset(smp_processor_id());
104#endif 105#endif
105 initial_code = (unsigned long)wakeup_long64; 106 initial_code = (unsigned long)wakeup_long64;
106 saved_magic = 0x123456789abcdef0; 107 saved_magic = 0x123456789abcdef0;
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index d2d17b8d10f8..e9af14f748ea 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -48,6 +48,7 @@
48#include <asm/proto.h> 48#include <asm/proto.h>
49#include <asm/apic.h> 49#include <asm/apic.h>
50#include <asm/i8259.h> 50#include <asm/i8259.h>
51#include <asm/smp.h>
51 52
52#include <mach_apic.h> 53#include <mach_apic.h>
53#include <mach_apicdef.h> 54#include <mach_apicdef.h>
@@ -895,6 +896,10 @@ void disable_local_APIC(void)
895{ 896{
896 unsigned int value; 897 unsigned int value;
897 898
899 /* APIC hasn't been mapped yet */
900 if (!apic_phys)
901 return;
902
898 clear_local_APIC(); 903 clear_local_APIC();
899 904
900 /* 905 /*
@@ -1126,6 +1131,11 @@ void __cpuinit setup_local_APIC(void)
1126 unsigned int value; 1131 unsigned int value;
1127 int i, j; 1132 int i, j;
1128 1133
1134 if (disable_apic) {
1135 disable_ioapic_setup();
1136 return;
1137 }
1138
1129#ifdef CONFIG_X86_32 1139#ifdef CONFIG_X86_32
1130 /* Pound the ESR really hard over the head with a big hammer - mbligh */ 1140 /* Pound the ESR really hard over the head with a big hammer - mbligh */
1131 if (lapic_is_integrated() && esr_disable) { 1141 if (lapic_is_integrated() && esr_disable) {
@@ -1567,11 +1577,11 @@ int apic_version[MAX_APICS];
1567 1577
1568int __init APIC_init_uniprocessor(void) 1578int __init APIC_init_uniprocessor(void)
1569{ 1579{
1570#ifdef CONFIG_X86_64
1571 if (disable_apic) { 1580 if (disable_apic) {
1572 pr_info("Apic disabled\n"); 1581 pr_info("Apic disabled\n");
1573 return -1; 1582 return -1;
1574 } 1583 }
1584#ifdef CONFIG_X86_64
1575 if (!cpu_has_apic) { 1585 if (!cpu_has_apic) {
1576 disable_apic = 1; 1586 disable_apic = 1;
1577 pr_info("Apic disabled by BIOS\n"); 1587 pr_info("Apic disabled by BIOS\n");
@@ -1869,17 +1879,8 @@ void __cpuinit generic_processor_info(int apicid, int version)
1869#endif 1879#endif
1870 1880
1871#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) 1881#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
1872 /* are we being called early in kernel startup? */ 1882 early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
1873 if (early_per_cpu_ptr(x86_cpu_to_apicid)) { 1883 early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1874 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
1875 u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
1876
1877 cpu_to_apicid[cpu] = apicid;
1878 bios_cpu_apicid[cpu] = apicid;
1879 } else {
1880 per_cpu(x86_cpu_to_apicid, cpu) = apicid;
1881 per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1882 }
1883#endif 1884#endif
1884 1885
1885 set_cpu_possible(cpu, true); 1886 set_cpu_possible(cpu, true);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 1d41d3f1edbc..64c834a39aa8 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -49,13 +49,7 @@ int main(void)
49 BLANK(); 49 BLANK();
50#undef ENTRY 50#undef ENTRY
51#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry)) 51#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
52 ENTRY(kernelstack); 52 DEFINE(pda_size, sizeof(struct x8664_pda));
53 ENTRY(oldrsp);
54 ENTRY(pcurrent);
55 ENTRY(irqcount);
56 ENTRY(cpunumber);
57 ENTRY(irqstackptr);
58 ENTRY(data_offset);
59 BLANK(); 53 BLANK();
60#undef ENTRY 54#undef ENTRY
61#ifdef CONFIG_PARAVIRT 55#ifdef CONFIG_PARAVIRT
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 667e5d561ed7..95eb30e1e677 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -22,6 +22,8 @@
22#include <asm/asm.h> 22#include <asm/asm.h>
23#include <asm/numa.h> 23#include <asm/numa.h>
24#include <asm/smp.h> 24#include <asm/smp.h>
25#include <asm/cpu.h>
26#include <asm/cpumask.h>
25#ifdef CONFIG_X86_LOCAL_APIC 27#ifdef CONFIG_X86_LOCAL_APIC
26#include <asm/mpspec.h> 28#include <asm/mpspec.h>
27#include <asm/apic.h> 29#include <asm/apic.h>
@@ -879,54 +881,34 @@ static __init int setup_disablecpuid(char *arg)
879__setup("clearcpuid=", setup_disablecpuid); 881__setup("clearcpuid=", setup_disablecpuid);
880 882
881#ifdef CONFIG_X86_64 883#ifdef CONFIG_X86_64
882struct x8664_pda **_cpu_pda __read_mostly;
883EXPORT_SYMBOL(_cpu_pda);
884
885struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; 884struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
886 885
887static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; 886DEFINE_PER_CPU_PAGE_ALIGNED(char[IRQ_STACK_SIZE], irq_stack);
887#ifdef CONFIG_SMP
888DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */
889#else
890DEFINE_PER_CPU(char *, irq_stack_ptr) =
891 per_cpu_var(irq_stack) + IRQ_STACK_SIZE - 64;
892#endif
893
894DEFINE_PER_CPU(unsigned long, kernel_stack) =
895 (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
896EXPORT_PER_CPU_SYMBOL(kernel_stack);
897
898DEFINE_PER_CPU(unsigned int, irq_count) = -1;
888 899
889void __cpuinit pda_init(int cpu) 900void __cpuinit pda_init(int cpu)
890{ 901{
891 struct x8664_pda *pda = cpu_pda(cpu);
892
893 /* Setup up data that may be needed in __get_free_pages early */ 902 /* Setup up data that may be needed in __get_free_pages early */
894 loadsegment(fs, 0); 903 loadsegment(fs, 0);
895 loadsegment(gs, 0); 904 loadsegment(gs, 0);
896 /* Memory clobbers used to order PDA accessed */
897 mb();
898 wrmsrl(MSR_GS_BASE, pda);
899 mb();
900
901 pda->cpunumber = cpu;
902 pda->irqcount = -1;
903 pda->kernelstack = (unsigned long)stack_thread_info() -
904 PDA_STACKOFFSET + THREAD_SIZE;
905 pda->active_mm = &init_mm;
906 pda->mmu_state = 0;
907
908 if (cpu == 0) {
909 /* others are initialized in smpboot.c */
910 pda->pcurrent = &init_task;
911 pda->irqstackptr = boot_cpu_stack;
912 pda->irqstackptr += IRQSTACKSIZE - 64;
913 } else {
914 if (!pda->irqstackptr) {
915 pda->irqstackptr = (char *)
916 __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
917 if (!pda->irqstackptr)
918 panic("cannot allocate irqstack for cpu %d",
919 cpu);
920 pda->irqstackptr += IRQSTACKSIZE - 64;
921 }
922 905
923 if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) 906 load_pda_offset(cpu);
924 pda->nodenumber = cpu_to_node(cpu);
925 }
926} 907}
927 908
928static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + 909static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
929 DEBUG_STKSZ] __page_aligned_bss; 910 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
911 __aligned(PAGE_SIZE);
930 912
931extern asmlinkage void ignore_sysret(void); 913extern asmlinkage void ignore_sysret(void);
932 914
@@ -984,15 +966,18 @@ void __cpuinit cpu_init(void)
984 struct tss_struct *t = &per_cpu(init_tss, cpu); 966 struct tss_struct *t = &per_cpu(init_tss, cpu);
985 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); 967 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
986 unsigned long v; 968 unsigned long v;
987 char *estacks = NULL;
988 struct task_struct *me; 969 struct task_struct *me;
989 int i; 970 int i;
990 971
991 /* CPU 0 is initialised in head64.c */ 972 /* CPU 0 is initialised in head64.c */
992 if (cpu != 0) 973 if (cpu != 0)
993 pda_init(cpu); 974 pda_init(cpu);
994 else 975
995 estacks = boot_exception_stacks; 976#ifdef CONFIG_NUMA
977 if (cpu != 0 && percpu_read(node_number) == 0 &&
978 cpu_to_node(cpu) != NUMA_NO_NODE)
979 percpu_write(node_number, cpu_to_node(cpu));
980#endif
996 981
997 me = current; 982 me = current;
998 983
@@ -1026,18 +1011,13 @@ void __cpuinit cpu_init(void)
1026 * set up and load the per-CPU TSS 1011 * set up and load the per-CPU TSS
1027 */ 1012 */
1028 if (!orig_ist->ist[0]) { 1013 if (!orig_ist->ist[0]) {
1029 static const unsigned int order[N_EXCEPTION_STACKS] = { 1014 static const unsigned int sizes[N_EXCEPTION_STACKS] = {
1030 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, 1015 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
1031 [DEBUG_STACK - 1] = DEBUG_STACK_ORDER 1016 [DEBUG_STACK - 1] = DEBUG_STKSZ
1032 }; 1017 };
1018 char *estacks = per_cpu(exception_stacks, cpu);
1033 for (v = 0; v < N_EXCEPTION_STACKS; v++) { 1019 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
1034 if (cpu) { 1020 estacks += sizes[v];
1035 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
1036 if (!estacks)
1037 panic("Cannot allocate exception "
1038 "stack %ld %d\n", v, cpu);
1039 }
1040 estacks += PAGE_SIZE << order[v];
1041 orig_ist->ist[v] = t->x86_tss.ist[v] = 1021 orig_ist->ist[v] = t->x86_tss.ist[v] =
1042 (unsigned long)estacks; 1022 (unsigned long)estacks;
1043 } 1023 }
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 06fcd8f9323c..8f3c95c7e61f 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -150,9 +150,8 @@ struct drv_cmd {
150 u32 val; 150 u32 val;
151}; 151};
152 152
153static long do_drv_read(void *_cmd) 153static void do_drv_read(struct drv_cmd *cmd)
154{ 154{
155 struct drv_cmd *cmd = _cmd;
156 u32 h; 155 u32 h;
157 156
158 switch (cmd->type) { 157 switch (cmd->type) {
@@ -167,12 +166,10 @@ static long do_drv_read(void *_cmd)
167 default: 166 default:
168 break; 167 break;
169 } 168 }
170 return 0;
171} 169}
172 170
173static long do_drv_write(void *_cmd) 171static void do_drv_write(struct drv_cmd *cmd)
174{ 172{
175 struct drv_cmd *cmd = _cmd;
176 u32 lo, hi; 173 u32 lo, hi;
177 174
178 switch (cmd->type) { 175 switch (cmd->type) {
@@ -189,23 +186,30 @@ static long do_drv_write(void *_cmd)
189 default: 186 default:
190 break; 187 break;
191 } 188 }
192 return 0;
193} 189}
194 190
195static void drv_read(struct drv_cmd *cmd) 191static void drv_read(struct drv_cmd *cmd)
196{ 192{
193 cpumask_t saved_mask = current->cpus_allowed;
197 cmd->val = 0; 194 cmd->val = 0;
198 195
199 work_on_cpu(cpumask_any(cmd->mask), do_drv_read, cmd); 196 set_cpus_allowed_ptr(current, cmd->mask);
197 do_drv_read(cmd);
198 set_cpus_allowed_ptr(current, &saved_mask);
200} 199}
201 200
202static void drv_write(struct drv_cmd *cmd) 201static void drv_write(struct drv_cmd *cmd)
203{ 202{
203 cpumask_t saved_mask = current->cpus_allowed;
204 unsigned int i; 204 unsigned int i;
205 205
206 for_each_cpu(i, cmd->mask) { 206 for_each_cpu(i, cmd->mask) {
207 work_on_cpu(i, do_drv_write, cmd); 207 set_cpus_allowed_ptr(current, cpumask_of(i));
208 do_drv_write(cmd);
208 } 209 }
210
211 set_cpus_allowed_ptr(current, &saved_mask);
212 return;
209} 213}
210 214
211static u32 get_cur_val(const struct cpumask *mask) 215static u32 get_cur_val(const struct cpumask *mask)
@@ -231,15 +235,8 @@ static u32 get_cur_val(const struct cpumask *mask)
231 return 0; 235 return 0;
232 } 236 }
233 237
234 if (unlikely(!alloc_cpumask_var(&cmd.mask, GFP_KERNEL)))
235 return 0;
236
237 cpumask_copy(cmd.mask, mask);
238
239 drv_read(&cmd); 238 drv_read(&cmd);
240 239
241 free_cpumask_var(cmd.mask);
242
243 dprintk("get_cur_val = %u\n", cmd.val); 240 dprintk("get_cur_val = %u\n", cmd.val);
244 241
245 return cmd.val; 242 return cmd.val;
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 48533d77be78..58527a9fc404 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -132,7 +132,16 @@ struct _cpuid4_info {
132 union _cpuid4_leaf_ecx ecx; 132 union _cpuid4_leaf_ecx ecx;
133 unsigned long size; 133 unsigned long size;
134 unsigned long can_disable; 134 unsigned long can_disable;
135 cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ 135 DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
136};
137
138/* subset of above _cpuid4_info w/o shared_cpu_map */
139struct _cpuid4_info_regs {
140 union _cpuid4_leaf_eax eax;
141 union _cpuid4_leaf_ebx ebx;
142 union _cpuid4_leaf_ecx ecx;
143 unsigned long size;
144 unsigned long can_disable;
136}; 145};
137 146
138#ifdef CONFIG_PCI 147#ifdef CONFIG_PCI
@@ -263,7 +272,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
263} 272}
264 273
265static void __cpuinit 274static void __cpuinit
266amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) 275amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
267{ 276{
268 if (index < 3) 277 if (index < 3)
269 return; 278 return;
@@ -271,7 +280,8 @@ amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
271} 280}
272 281
273static int 282static int
274__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) 283__cpuinit cpuid4_cache_lookup_regs(int index,
284 struct _cpuid4_info_regs *this_leaf)
275{ 285{
276 union _cpuid4_leaf_eax eax; 286 union _cpuid4_leaf_eax eax;
277 union _cpuid4_leaf_ebx ebx; 287 union _cpuid4_leaf_ebx ebx;
@@ -299,6 +309,15 @@ __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
299 return 0; 309 return 0;
300} 310}
301 311
312static int
313__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
314{
315 struct _cpuid4_info_regs *leaf_regs =
316 (struct _cpuid4_info_regs *)this_leaf;
317
318 return cpuid4_cache_lookup_regs(index, leaf_regs);
319}
320
302static int __cpuinit find_num_cache_leaves(void) 321static int __cpuinit find_num_cache_leaves(void)
303{ 322{
304 unsigned int eax, ebx, ecx, edx; 323 unsigned int eax, ebx, ecx, edx;
@@ -338,11 +357,10 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
338 * parameters cpuid leaf to find the cache details 357 * parameters cpuid leaf to find the cache details
339 */ 358 */
340 for (i = 0; i < num_cache_leaves; i++) { 359 for (i = 0; i < num_cache_leaves; i++) {
341 struct _cpuid4_info this_leaf; 360 struct _cpuid4_info_regs this_leaf;
342
343 int retval; 361 int retval;
344 362
345 retval = cpuid4_cache_lookup(i, &this_leaf); 363 retval = cpuid4_cache_lookup_regs(i, &this_leaf);
346 if (retval >= 0) { 364 if (retval >= 0) {
347 switch(this_leaf.eax.split.level) { 365 switch(this_leaf.eax.split.level) {
348 case 1: 366 case 1:
@@ -491,17 +509,20 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
491 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; 509 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
492 510
493 if (num_threads_sharing == 1) 511 if (num_threads_sharing == 1)
494 cpu_set(cpu, this_leaf->shared_cpu_map); 512 cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
495 else { 513 else {
496 index_msb = get_count_order(num_threads_sharing); 514 index_msb = get_count_order(num_threads_sharing);
497 515
498 for_each_online_cpu(i) { 516 for_each_online_cpu(i) {
499 if (cpu_data(i).apicid >> index_msb == 517 if (cpu_data(i).apicid >> index_msb ==
500 c->apicid >> index_msb) { 518 c->apicid >> index_msb) {
501 cpu_set(i, this_leaf->shared_cpu_map); 519 cpumask_set_cpu(i,
520 to_cpumask(this_leaf->shared_cpu_map));
502 if (i != cpu && per_cpu(cpuid4_info, i)) { 521 if (i != cpu && per_cpu(cpuid4_info, i)) {
503 sibling_leaf = CPUID4_INFO_IDX(i, index); 522 sibling_leaf =
504 cpu_set(cpu, sibling_leaf->shared_cpu_map); 523 CPUID4_INFO_IDX(i, index);
524 cpumask_set_cpu(cpu, to_cpumask(
525 sibling_leaf->shared_cpu_map));
505 } 526 }
506 } 527 }
507 } 528 }
@@ -513,9 +534,10 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
513 int sibling; 534 int sibling;
514 535
515 this_leaf = CPUID4_INFO_IDX(cpu, index); 536 this_leaf = CPUID4_INFO_IDX(cpu, index);
516 for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) { 537 for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
517 sibling_leaf = CPUID4_INFO_IDX(sibling, index); 538 sibling_leaf = CPUID4_INFO_IDX(sibling, index);
518 cpu_clear(cpu, sibling_leaf->shared_cpu_map); 539 cpumask_clear_cpu(cpu,
540 to_cpumask(sibling_leaf->shared_cpu_map));
519 } 541 }
520} 542}
521#else 543#else
@@ -620,8 +642,9 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
620 int n = 0; 642 int n = 0;
621 643
622 if (len > 1) { 644 if (len > 1) {
623 cpumask_t *mask = &this_leaf->shared_cpu_map; 645 const struct cpumask *mask;
624 646
647 mask = to_cpumask(this_leaf->shared_cpu_map);
625 n = type? 648 n = type?
626 cpulist_scnprintf(buf, len-2, mask) : 649 cpulist_scnprintf(buf, len-2, mask) :
627 cpumask_scnprintf(buf, len-2, mask); 650 cpumask_scnprintf(buf, len-2, mask);
@@ -684,7 +707,8 @@ static struct pci_dev *get_k8_northbridge(int node)
684 707
685static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) 708static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
686{ 709{
687 int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); 710 const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
711 int node = cpu_to_node(cpumask_first(mask));
688 struct pci_dev *dev = NULL; 712 struct pci_dev *dev = NULL;
689 ssize_t ret = 0; 713 ssize_t ret = 0;
690 int i; 714 int i;
@@ -718,7 +742,8 @@ static ssize_t
718store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, 742store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
719 size_t count) 743 size_t count)
720{ 744{
721 int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); 745 const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
746 int node = cpu_to_node(cpumask_first(mask));
722 struct pci_dev *dev = NULL; 747 struct pci_dev *dev = NULL;
723 unsigned int ret, index, val; 748 unsigned int ret, index, val;
724 749
@@ -863,7 +888,7 @@ err_out:
863 return -ENOMEM; 888 return -ENOMEM;
864} 889}
865 890
866static cpumask_t cache_dev_map = CPU_MASK_NONE; 891static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
867 892
868/* Add/Remove cache interface for CPU device */ 893/* Add/Remove cache interface for CPU device */
869static int __cpuinit cache_add_dev(struct sys_device * sys_dev) 894static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
@@ -903,7 +928,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
903 } 928 }
904 kobject_uevent(&(this_object->kobj), KOBJ_ADD); 929 kobject_uevent(&(this_object->kobj), KOBJ_ADD);
905 } 930 }
906 cpu_set(cpu, cache_dev_map); 931 cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
907 932
908 kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); 933 kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD);
909 return 0; 934 return 0;
@@ -916,9 +941,9 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
916 941
917 if (per_cpu(cpuid4_info, cpu) == NULL) 942 if (per_cpu(cpuid4_info, cpu) == NULL)
918 return; 943 return;
919 if (!cpu_isset(cpu, cache_dev_map)) 944 if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
920 return; 945 return;
921 cpu_clear(cpu, cache_dev_map); 946 cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
922 947
923 for (i = 0; i < num_cache_leaves; i++) 948 for (i = 0; i < num_cache_leaves; i++)
924 kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); 949 kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 8ae8c4ff094d..4772e91e8246 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -67,7 +67,7 @@ static struct threshold_block threshold_defaults = {
67struct threshold_bank { 67struct threshold_bank {
68 struct kobject *kobj; 68 struct kobject *kobj;
69 struct threshold_block *blocks; 69 struct threshold_block *blocks;
70 cpumask_t cpus; 70 cpumask_var_t cpus;
71}; 71};
72static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); 72static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
73 73
@@ -481,7 +481,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
481 481
482#ifdef CONFIG_SMP 482#ifdef CONFIG_SMP
483 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ 483 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
484 i = first_cpu(per_cpu(cpu_core_map, cpu)); 484 i = cpumask_first(&per_cpu(cpu_core_map, cpu));
485 485
486 /* first core not up yet */ 486 /* first core not up yet */
487 if (cpu_data(i).cpu_core_id) 487 if (cpu_data(i).cpu_core_id)
@@ -501,7 +501,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
501 if (err) 501 if (err)
502 goto out; 502 goto out;
503 503
504 b->cpus = per_cpu(cpu_core_map, cpu); 504 cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu));
505 per_cpu(threshold_banks, cpu)[bank] = b; 505 per_cpu(threshold_banks, cpu)[bank] = b;
506 goto out; 506 goto out;
507 } 507 }
@@ -512,15 +512,20 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
512 err = -ENOMEM; 512 err = -ENOMEM;
513 goto out; 513 goto out;
514 } 514 }
515 if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) {
516 kfree(b);
517 err = -ENOMEM;
518 goto out;
519 }
515 520
516 b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj); 521 b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj);
517 if (!b->kobj) 522 if (!b->kobj)
518 goto out_free; 523 goto out_free;
519 524
520#ifndef CONFIG_SMP 525#ifndef CONFIG_SMP
521 b->cpus = CPU_MASK_ALL; 526 cpumask_setall(b->cpus);
522#else 527#else
523 b->cpus = per_cpu(cpu_core_map, cpu); 528 cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu));
524#endif 529#endif
525 530
526 per_cpu(threshold_banks, cpu)[bank] = b; 531 per_cpu(threshold_banks, cpu)[bank] = b;
@@ -529,7 +534,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
529 if (err) 534 if (err)
530 goto out_free; 535 goto out_free;
531 536
532 for_each_cpu_mask_nr(i, b->cpus) { 537 for_each_cpu(i, b->cpus) {
533 if (i == cpu) 538 if (i == cpu)
534 continue; 539 continue;
535 540
@@ -545,6 +550,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
545 550
546out_free: 551out_free:
547 per_cpu(threshold_banks, cpu)[bank] = NULL; 552 per_cpu(threshold_banks, cpu)[bank] = NULL;
553 free_cpumask_var(b->cpus);
548 kfree(b); 554 kfree(b);
549out: 555out:
550 return err; 556 return err;
@@ -619,7 +625,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
619#endif 625#endif
620 626
621 /* remove all sibling symlinks before unregistering */ 627 /* remove all sibling symlinks before unregistering */
622 for_each_cpu_mask_nr(i, b->cpus) { 628 for_each_cpu(i, b->cpus) {
623 if (i == cpu) 629 if (i == cpu)
624 continue; 630 continue;
625 631
@@ -632,6 +638,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
632free_out: 638free_out:
633 kobject_del(b->kobj); 639 kobject_del(b->kobj);
634 kobject_put(b->kobj); 640 kobject_put(b->kobj);
641 free_cpumask_var(b->cpus);
635 kfree(b); 642 kfree(b);
636 per_cpu(threshold_banks, cpu)[bank] = NULL; 643 per_cpu(threshold_banks, cpu)[bank] = NULL;
637} 644}
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index c689d19e35ab..11b93cabdf78 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -24,7 +24,7 @@
24#include <asm/apic.h> 24#include <asm/apic.h>
25#include <asm/hpet.h> 25#include <asm/hpet.h>
26#include <linux/kdebug.h> 26#include <linux/kdebug.h>
27#include <asm/smp.h> 27#include <asm/cpu.h>
28#include <asm/reboot.h> 28#include <asm/reboot.h>
29#include <asm/virtext.h> 29#include <asm/virtext.h>
30 30
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index c302d0707048..d35db5993fd6 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -106,7 +106,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
106 const struct stacktrace_ops *ops, void *data) 106 const struct stacktrace_ops *ops, void *data)
107{ 107{
108 const unsigned cpu = get_cpu(); 108 const unsigned cpu = get_cpu();
109 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; 109 unsigned long *irq_stack_end =
110 (unsigned long *)per_cpu(irq_stack_ptr, cpu);
110 unsigned used = 0; 111 unsigned used = 0;
111 struct thread_info *tinfo; 112 struct thread_info *tinfo;
112 int graph = 0; 113 int graph = 0;
@@ -160,23 +161,23 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
160 stack = (unsigned long *) estack_end[-2]; 161 stack = (unsigned long *) estack_end[-2];
161 continue; 162 continue;
162 } 163 }
163 if (irqstack_end) { 164 if (irq_stack_end) {
164 unsigned long *irqstack; 165 unsigned long *irq_stack;
165 irqstack = irqstack_end - 166 irq_stack = irq_stack_end -
166 (IRQSTACKSIZE - 64) / sizeof(*irqstack); 167 (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack);
167 168
168 if (stack >= irqstack && stack < irqstack_end) { 169 if (stack >= irq_stack && stack < irq_stack_end) {
169 if (ops->stack(data, "IRQ") < 0) 170 if (ops->stack(data, "IRQ") < 0)
170 break; 171 break;
171 bp = print_context_stack(tinfo, stack, bp, 172 bp = print_context_stack(tinfo, stack, bp,
172 ops, data, irqstack_end, &graph); 173 ops, data, irq_stack_end, &graph);
173 /* 174 /*
174 * We link to the next stack (which would be 175 * We link to the next stack (which would be
175 * the process stack normally) the last 176 * the process stack normally) the last
176 * pointer (index -1 to end) in the IRQ stack: 177 * pointer (index -1 to end) in the IRQ stack:
177 */ 178 */
178 stack = (unsigned long *) (irqstack_end[-1]); 179 stack = (unsigned long *) (irq_stack_end[-1]);
179 irqstack_end = NULL; 180 irq_stack_end = NULL;
180 ops->stack(data, "EOI"); 181 ops->stack(data, "EOI");
181 continue; 182 continue;
182 } 183 }
@@ -199,10 +200,10 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
199 unsigned long *stack; 200 unsigned long *stack;
200 int i; 201 int i;
201 const int cpu = smp_processor_id(); 202 const int cpu = smp_processor_id();
202 unsigned long *irqstack_end = 203 unsigned long *irq_stack_end =
203 (unsigned long *) (cpu_pda(cpu)->irqstackptr); 204 (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
204 unsigned long *irqstack = 205 unsigned long *irq_stack =
205 (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); 206 (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
206 207
207 /* 208 /*
208 * debugging aid: "show_stack(NULL, NULL);" prints the 209 * debugging aid: "show_stack(NULL, NULL);" prints the
@@ -218,9 +219,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
218 219
219 stack = sp; 220 stack = sp;
220 for (i = 0; i < kstack_depth_to_print; i++) { 221 for (i = 0; i < kstack_depth_to_print; i++) {
221 if (stack >= irqstack && stack <= irqstack_end) { 222 if (stack >= irq_stack && stack <= irq_stack_end) {
222 if (stack == irqstack_end) { 223 if (stack == irq_stack_end) {
223 stack = (unsigned long *) (irqstack_end[-1]); 224 stack = (unsigned long *) (irq_stack_end[-1]);
224 printk(" <EOI> "); 225 printk(" <EOI> ");
225 } 226 }
226 } else { 227 } else {
@@ -241,7 +242,7 @@ void show_registers(struct pt_regs *regs)
241 int i; 242 int i;
242 unsigned long sp; 243 unsigned long sp;
243 const int cpu = smp_processor_id(); 244 const int cpu = smp_processor_id();
244 struct task_struct *cur = cpu_pda(cpu)->pcurrent; 245 struct task_struct *cur = current;
245 246
246 sp = regs->sp; 247 sp = regs->sp;
247 printk("CPU %d ", cpu); 248 printk("CPU %d ", cpu);
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index d6f0490a7391..46469029e9d3 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -1203,7 +1203,6 @@ nmi_stack_correct:
1203 pushl %eax 1203 pushl %eax
1204 CFI_ADJUST_CFA_OFFSET 4 1204 CFI_ADJUST_CFA_OFFSET 4
1205 SAVE_ALL 1205 SAVE_ALL
1206 TRACE_IRQS_OFF
1207 xorl %edx,%edx # zero error code 1206 xorl %edx,%edx # zero error code
1208 movl %esp,%eax # pt_regs pointer 1207 movl %esp,%eax # pt_regs pointer
1209 call do_nmi 1208 call do_nmi
@@ -1244,7 +1243,6 @@ nmi_espfix_stack:
1244 pushl %eax 1243 pushl %eax
1245 CFI_ADJUST_CFA_OFFSET 4 1244 CFI_ADJUST_CFA_OFFSET 4
1246 SAVE_ALL 1245 SAVE_ALL
1247 TRACE_IRQS_OFF
1248 FIXUP_ESPFIX_STACK # %eax == %esp 1246 FIXUP_ESPFIX_STACK # %eax == %esp
1249 xorl %edx,%edx # zero error code 1247 xorl %edx,%edx # zero error code
1250 call do_nmi 1248 call do_nmi
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 1954a9662203..c092e7d2686d 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -52,6 +52,7 @@
52#include <asm/irqflags.h> 52#include <asm/irqflags.h>
53#include <asm/paravirt.h> 53#include <asm/paravirt.h>
54#include <asm/ftrace.h> 54#include <asm/ftrace.h>
55#include <asm/percpu.h>
55 56
56/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 57/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
57#include <linux/elf-em.h> 58#include <linux/elf-em.h>
@@ -209,7 +210,7 @@ ENTRY(native_usergs_sysret64)
209 210
210 /* %rsp:at FRAMEEND */ 211 /* %rsp:at FRAMEEND */
211 .macro FIXUP_TOP_OF_STACK tmp offset=0 212 .macro FIXUP_TOP_OF_STACK tmp offset=0
212 movq %gs:pda_oldrsp,\tmp 213 movq PER_CPU_VAR(old_rsp),\tmp
213 movq \tmp,RSP+\offset(%rsp) 214 movq \tmp,RSP+\offset(%rsp)
214 movq $__USER_DS,SS+\offset(%rsp) 215 movq $__USER_DS,SS+\offset(%rsp)
215 movq $__USER_CS,CS+\offset(%rsp) 216 movq $__USER_CS,CS+\offset(%rsp)
@@ -220,7 +221,7 @@ ENTRY(native_usergs_sysret64)
220 221
221 .macro RESTORE_TOP_OF_STACK tmp offset=0 222 .macro RESTORE_TOP_OF_STACK tmp offset=0
222 movq RSP+\offset(%rsp),\tmp 223 movq RSP+\offset(%rsp),\tmp
223 movq \tmp,%gs:pda_oldrsp 224 movq \tmp,PER_CPU_VAR(old_rsp)
224 movq EFLAGS+\offset(%rsp),\tmp 225 movq EFLAGS+\offset(%rsp),\tmp
225 movq \tmp,R11+\offset(%rsp) 226 movq \tmp,R11+\offset(%rsp)
226 .endm 227 .endm
@@ -336,15 +337,15 @@ ENTRY(save_args)
336 je 1f 337 je 1f
337 SWAPGS 338 SWAPGS
338 /* 339 /*
339 * irqcount is used to check if a CPU is already on an interrupt stack 340 * irq_count is used to check if a CPU is already on an interrupt stack
340 * or not. While this is essentially redundant with preempt_count it is 341 * or not. While this is essentially redundant with preempt_count it is
341 * a little cheaper to use a separate counter in the PDA (short of 342 * a little cheaper to use a separate counter in the PDA (short of
342 * moving irq_enter into assembly, which would be too much work) 343 * moving irq_enter into assembly, which would be too much work)
343 */ 344 */
3441: incl %gs:pda_irqcount 3451: incl PER_CPU_VAR(irq_count)
345 jne 2f 346 jne 2f
346 popq_cfi %rax /* move return address... */ 347 popq_cfi %rax /* move return address... */
347 mov %gs:pda_irqstackptr,%rsp 348 mov PER_CPU_VAR(irq_stack_ptr),%rsp
348 EMPTY_FRAME 0 349 EMPTY_FRAME 0
349 pushq_cfi %rax /* ... to the new stack */ 350 pushq_cfi %rax /* ... to the new stack */
350 /* 351 /*
@@ -467,7 +468,7 @@ END(ret_from_fork)
467ENTRY(system_call) 468ENTRY(system_call)
468 CFI_STARTPROC simple 469 CFI_STARTPROC simple
469 CFI_SIGNAL_FRAME 470 CFI_SIGNAL_FRAME
470 CFI_DEF_CFA rsp,PDA_STACKOFFSET 471 CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
471 CFI_REGISTER rip,rcx 472 CFI_REGISTER rip,rcx
472 /*CFI_REGISTER rflags,r11*/ 473 /*CFI_REGISTER rflags,r11*/
473 SWAPGS_UNSAFE_STACK 474 SWAPGS_UNSAFE_STACK
@@ -478,8 +479,8 @@ ENTRY(system_call)
478 */ 479 */
479ENTRY(system_call_after_swapgs) 480ENTRY(system_call_after_swapgs)
480 481
481 movq %rsp,%gs:pda_oldrsp 482 movq %rsp,PER_CPU_VAR(old_rsp)
482 movq %gs:pda_kernelstack,%rsp 483 movq PER_CPU_VAR(kernel_stack),%rsp
483 /* 484 /*
484 * No need to follow this irqs off/on section - it's straight 485 * No need to follow this irqs off/on section - it's straight
485 * and short: 486 * and short:
@@ -522,7 +523,7 @@ sysret_check:
522 CFI_REGISTER rip,rcx 523 CFI_REGISTER rip,rcx
523 RESTORE_ARGS 0,-ARG_SKIP,1 524 RESTORE_ARGS 0,-ARG_SKIP,1
524 /*CFI_REGISTER rflags,r11*/ 525 /*CFI_REGISTER rflags,r11*/
525 movq %gs:pda_oldrsp, %rsp 526 movq PER_CPU_VAR(old_rsp), %rsp
526 USERGS_SYSRET64 527 USERGS_SYSRET64
527 528
528 CFI_RESTORE_STATE 529 CFI_RESTORE_STATE
@@ -832,11 +833,11 @@ common_interrupt:
832 XCPT_FRAME 833 XCPT_FRAME
833 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ 834 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
834 interrupt do_IRQ 835 interrupt do_IRQ
835 /* 0(%rsp): oldrsp-ARGOFFSET */ 836 /* 0(%rsp): old_rsp-ARGOFFSET */
836ret_from_intr: 837ret_from_intr:
837 DISABLE_INTERRUPTS(CLBR_NONE) 838 DISABLE_INTERRUPTS(CLBR_NONE)
838 TRACE_IRQS_OFF 839 TRACE_IRQS_OFF
839 decl %gs:pda_irqcount 840 decl PER_CPU_VAR(irq_count)
840 leaveq 841 leaveq
841 CFI_DEF_CFA_REGISTER rsp 842 CFI_DEF_CFA_REGISTER rsp
842 CFI_ADJUST_CFA_OFFSET -8 843 CFI_ADJUST_CFA_OFFSET -8
@@ -1077,10 +1078,10 @@ ENTRY(\sym)
1077 TRACE_IRQS_OFF 1078 TRACE_IRQS_OFF
1078 movq %rsp,%rdi /* pt_regs pointer */ 1079 movq %rsp,%rdi /* pt_regs pointer */
1079 xorl %esi,%esi /* no error code */ 1080 xorl %esi,%esi /* no error code */
1080 movq %gs:pda_data_offset, %rbp 1081 PER_CPU(init_tss, %rbp)
1081 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) 1082 subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
1082 call \do_sym 1083 call \do_sym
1083 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) 1084 addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
1084 jmp paranoid_exit /* %ebx: no swapgs flag */ 1085 jmp paranoid_exit /* %ebx: no swapgs flag */
1085 CFI_ENDPROC 1086 CFI_ENDPROC
1086END(\sym) 1087END(\sym)
@@ -1264,14 +1265,14 @@ ENTRY(call_softirq)
1264 CFI_REL_OFFSET rbp,0 1265 CFI_REL_OFFSET rbp,0
1265 mov %rsp,%rbp 1266 mov %rsp,%rbp
1266 CFI_DEF_CFA_REGISTER rbp 1267 CFI_DEF_CFA_REGISTER rbp
1267 incl %gs:pda_irqcount 1268 incl PER_CPU_VAR(irq_count)
1268 cmove %gs:pda_irqstackptr,%rsp 1269 cmove PER_CPU_VAR(irq_stack_ptr),%rsp
1269 push %rbp # backlink for old unwinder 1270 push %rbp # backlink for old unwinder
1270 call __do_softirq 1271 call __do_softirq
1271 leaveq 1272 leaveq
1272 CFI_DEF_CFA_REGISTER rsp 1273 CFI_DEF_CFA_REGISTER rsp
1273 CFI_ADJUST_CFA_OFFSET -8 1274 CFI_ADJUST_CFA_OFFSET -8
1274 decl %gs:pda_irqcount 1275 decl PER_CPU_VAR(irq_count)
1275 ret 1276 ret
1276 CFI_ENDPROC 1277 CFI_ENDPROC
1277END(call_softirq) 1278END(call_softirq)
@@ -1301,15 +1302,15 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1301 movq %rdi, %rsp # we don't return, adjust the stack frame 1302 movq %rdi, %rsp # we don't return, adjust the stack frame
1302 CFI_ENDPROC 1303 CFI_ENDPROC
1303 DEFAULT_FRAME 1304 DEFAULT_FRAME
130411: incl %gs:pda_irqcount 130511: incl PER_CPU_VAR(irq_count)
1305 movq %rsp,%rbp 1306 movq %rsp,%rbp
1306 CFI_DEF_CFA_REGISTER rbp 1307 CFI_DEF_CFA_REGISTER rbp
1307 cmovzq %gs:pda_irqstackptr,%rsp 1308 cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
1308 pushq %rbp # backlink for old unwinder 1309 pushq %rbp # backlink for old unwinder
1309 call xen_evtchn_do_upcall 1310 call xen_evtchn_do_upcall
1310 popq %rsp 1311 popq %rsp
1311 CFI_DEF_CFA_REGISTER rsp 1312 CFI_DEF_CFA_REGISTER rsp
1312 decl %gs:pda_irqcount 1313 decl PER_CPU_VAR(irq_count)
1313 jmp error_exit 1314 jmp error_exit
1314 CFI_ENDPROC 1315 CFI_ENDPROC
1315END(do_hypervisor_callback) 1316END(do_hypervisor_callback)
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index b9a4d8c4b935..af67d3227ea6 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -26,27 +26,6 @@
26#include <asm/bios_ebda.h> 26#include <asm/bios_ebda.h>
27#include <asm/trampoline.h> 27#include <asm/trampoline.h>
28 28
29/* boot cpu pda */
30static struct x8664_pda _boot_cpu_pda;
31
32#ifdef CONFIG_SMP
33/*
34 * We install an empty cpu_pda pointer table to indicate to early users
35 * (numa_set_node) that the cpu_pda pointer table for cpus other than
36 * the boot cpu is not yet setup.
37 */
38static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata;
39#else
40static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
41#endif
42
43void __init x86_64_init_pda(void)
44{
45 _cpu_pda = __cpu_pda;
46 cpu_pda(0) = &_boot_cpu_pda;
47 pda_init(0);
48}
49
50static void __init zap_identity_mappings(void) 29static void __init zap_identity_mappings(void)
51{ 30{
52 pgd_t *pgd = pgd_offset_k(0UL); 31 pgd_t *pgd = pgd_offset_k(0UL);
@@ -112,7 +91,7 @@ void __init x86_64_start_kernel(char * real_mode_data)
112 if (console_loglevel == 10) 91 if (console_loglevel == 10)
113 early_printk("Kernel alive\n"); 92 early_printk("Kernel alive\n");
114 93
115 x86_64_init_pda(); 94 pda_init(0);
116 95
117 x86_64_start_reservations(real_mode_data); 96 x86_64_start_reservations(real_mode_data);
118} 97}
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 0e275d495563..c8ace880661b 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -19,6 +19,7 @@
19#include <asm/msr.h> 19#include <asm/msr.h>
20#include <asm/cache.h> 20#include <asm/cache.h>
21#include <asm/processor-flags.h> 21#include <asm/processor-flags.h>
22#include <asm/percpu.h>
22 23
23#ifdef CONFIG_PARAVIRT 24#ifdef CONFIG_PARAVIRT
24#include <asm/asm-offsets.h> 25#include <asm/asm-offsets.h>
@@ -204,6 +205,23 @@ ENTRY(secondary_startup_64)
204 pushq $0 205 pushq $0
205 popfq 206 popfq
206 207
208#ifdef CONFIG_SMP
209 /*
210 * early_gdt_base should point to the gdt_page in static percpu init
211 * data area. Computing this requires two symbols - __per_cpu_load
212 * and per_cpu__gdt_page. As linker can't do no such relocation, do
213 * it by hand. As early_gdt_descr is manipulated by C code for
214 * secondary CPUs, this should be done only once for the boot CPU
215 * when early_gdt_descr_base contains zero.
216 */
217 movq early_gdt_descr_base(%rip), %rax
218 testq %rax, %rax
219 jnz 1f
220 movq $__per_cpu_load, %rax
221 addq $per_cpu__gdt_page, %rax
222 movq %rax, early_gdt_descr_base(%rip)
2231:
224#endif
207 /* 225 /*
208 * We must switch to a new descriptor in kernel space for the GDT 226 * We must switch to a new descriptor in kernel space for the GDT
209 * because soon the kernel won't have access anymore to the userspace 227 * because soon the kernel won't have access anymore to the userspace
@@ -226,12 +244,18 @@ ENTRY(secondary_startup_64)
226 movl %eax,%fs 244 movl %eax,%fs
227 movl %eax,%gs 245 movl %eax,%gs
228 246
229 /* 247 /* Set up %gs.
230 * Setup up a dummy PDA. this is just for some early bootup code 248 *
231 * that does in_interrupt() 249 * On SMP, %gs should point to the per-cpu area. For initial
232 */ 250 * boot, make %gs point to the init data section. For a
251 * secondary CPU,initial_gs should be set to its pda address
252 * before the CPU runs this code.
253 *
254 * On UP, initial_gs points to PER_CPU_VAR(__pda) and doesn't
255 * change.
256 */
233 movl $MSR_GS_BASE,%ecx 257 movl $MSR_GS_BASE,%ecx
234 movq $empty_zero_page,%rax 258 movq initial_gs(%rip),%rax
235 movq %rax,%rdx 259 movq %rax,%rdx
236 shrq $32,%rdx 260 shrq $32,%rdx
237 wrmsr 261 wrmsr
@@ -257,6 +281,12 @@ ENTRY(secondary_startup_64)
257 .align 8 281 .align 8
258 ENTRY(initial_code) 282 ENTRY(initial_code)
259 .quad x86_64_start_kernel 283 .quad x86_64_start_kernel
284 ENTRY(initial_gs)
285#ifdef CONFIG_SMP
286 .quad __per_cpu_load
287#else
288 .quad PER_CPU_VAR(__pda)
289#endif
260 __FINITDATA 290 __FINITDATA
261 291
262 ENTRY(stack_start) 292 ENTRY(stack_start)
@@ -401,7 +431,12 @@ NEXT_PAGE(level2_spare_pgt)
401 .globl early_gdt_descr 431 .globl early_gdt_descr
402early_gdt_descr: 432early_gdt_descr:
403 .word GDT_ENTRIES*8-1 433 .word GDT_ENTRIES*8-1
404 .quad per_cpu__gdt_page 434#ifdef CONFIG_SMP
435early_gdt_descr_base:
436 .quad 0x0000000000000000
437#else
438 .quad per_cpu__gdt_page
439#endif
405 440
406ENTRY(phys_base) 441ENTRY(phys_base)
407 /* This must match the first entry in level2_kernel_pgt */ 442 /* This must match the first entry in level2_kernel_pgt */
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 1c4a1302536c..f79660390724 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -46,6 +46,7 @@
46#include <asm/idle.h> 46#include <asm/idle.h>
47#include <asm/io.h> 47#include <asm/io.h>
48#include <asm/smp.h> 48#include <asm/smp.h>
49#include <asm/cpu.h>
49#include <asm/desc.h> 50#include <asm/desc.h>
50#include <asm/proto.h> 51#include <asm/proto.h>
51#include <asm/acpi.h> 52#include <asm/acpi.h>
@@ -82,11 +83,11 @@ static DEFINE_SPINLOCK(vector_lock);
82int nr_ioapic_registers[MAX_IO_APICS]; 83int nr_ioapic_registers[MAX_IO_APICS];
83 84
84/* I/O APIC entries */ 85/* I/O APIC entries */
85struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; 86struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
86int nr_ioapics; 87int nr_ioapics;
87 88
88/* MP IRQ source entries */ 89/* MP IRQ source entries */
89struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; 90struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
90 91
91/* # of MP IRQ source entries */ 92/* # of MP IRQ source entries */
92int mp_irq_entries; 93int mp_irq_entries;
@@ -356,7 +357,7 @@ set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
356 357
357 if (!cfg->move_in_progress) { 358 if (!cfg->move_in_progress) {
358 /* it means that domain is not changed */ 359 /* it means that domain is not changed */
359 if (!cpumask_intersects(&desc->affinity, mask)) 360 if (!cpumask_intersects(desc->affinity, mask))
360 cfg->move_desc_pending = 1; 361 cfg->move_desc_pending = 1;
361 } 362 }
362} 363}
@@ -386,7 +387,7 @@ struct io_apic {
386static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) 387static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
387{ 388{
388 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) 389 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
389 + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); 390 + (mp_ioapics[idx].apicaddr & ~PAGE_MASK);
390} 391}
391 392
392static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) 393static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
@@ -579,9 +580,9 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
579 if (assign_irq_vector(irq, cfg, mask)) 580 if (assign_irq_vector(irq, cfg, mask))
580 return BAD_APICID; 581 return BAD_APICID;
581 582
582 cpumask_and(&desc->affinity, cfg->domain, mask); 583 cpumask_and(desc->affinity, cfg->domain, mask);
583 set_extra_move_desc(desc, mask); 584 set_extra_move_desc(desc, mask);
584 return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask); 585 return cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask);
585} 586}
586 587
587static void 588static void
@@ -944,10 +945,10 @@ static int find_irq_entry(int apic, int pin, int type)
944 int i; 945 int i;
945 946
946 for (i = 0; i < mp_irq_entries; i++) 947 for (i = 0; i < mp_irq_entries; i++)
947 if (mp_irqs[i].mp_irqtype == type && 948 if (mp_irqs[i].irqtype == type &&
948 (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || 949 (mp_irqs[i].dstapic == mp_ioapics[apic].apicid ||
949 mp_irqs[i].mp_dstapic == MP_APIC_ALL) && 950 mp_irqs[i].dstapic == MP_APIC_ALL) &&
950 mp_irqs[i].mp_dstirq == pin) 951 mp_irqs[i].dstirq == pin)
951 return i; 952 return i;
952 953
953 return -1; 954 return -1;
@@ -961,13 +962,13 @@ static int __init find_isa_irq_pin(int irq, int type)
961 int i; 962 int i;
962 963
963 for (i = 0; i < mp_irq_entries; i++) { 964 for (i = 0; i < mp_irq_entries; i++) {
964 int lbus = mp_irqs[i].mp_srcbus; 965 int lbus = mp_irqs[i].srcbus;
965 966
966 if (test_bit(lbus, mp_bus_not_pci) && 967 if (test_bit(lbus, mp_bus_not_pci) &&
967 (mp_irqs[i].mp_irqtype == type) && 968 (mp_irqs[i].irqtype == type) &&
968 (mp_irqs[i].mp_srcbusirq == irq)) 969 (mp_irqs[i].srcbusirq == irq))
969 970
970 return mp_irqs[i].mp_dstirq; 971 return mp_irqs[i].dstirq;
971 } 972 }
972 return -1; 973 return -1;
973} 974}
@@ -977,17 +978,17 @@ static int __init find_isa_irq_apic(int irq, int type)
977 int i; 978 int i;
978 979
979 for (i = 0; i < mp_irq_entries; i++) { 980 for (i = 0; i < mp_irq_entries; i++) {
980 int lbus = mp_irqs[i].mp_srcbus; 981 int lbus = mp_irqs[i].srcbus;
981 982
982 if (test_bit(lbus, mp_bus_not_pci) && 983 if (test_bit(lbus, mp_bus_not_pci) &&
983 (mp_irqs[i].mp_irqtype == type) && 984 (mp_irqs[i].irqtype == type) &&
984 (mp_irqs[i].mp_srcbusirq == irq)) 985 (mp_irqs[i].srcbusirq == irq))
985 break; 986 break;
986 } 987 }
987 if (i < mp_irq_entries) { 988 if (i < mp_irq_entries) {
988 int apic; 989 int apic;
989 for(apic = 0; apic < nr_ioapics; apic++) { 990 for(apic = 0; apic < nr_ioapics; apic++) {
990 if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) 991 if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic)
991 return apic; 992 return apic;
992 } 993 }
993 } 994 }
@@ -1012,23 +1013,23 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
1012 return -1; 1013 return -1;
1013 } 1014 }
1014 for (i = 0; i < mp_irq_entries; i++) { 1015 for (i = 0; i < mp_irq_entries; i++) {
1015 int lbus = mp_irqs[i].mp_srcbus; 1016 int lbus = mp_irqs[i].srcbus;
1016 1017
1017 for (apic = 0; apic < nr_ioapics; apic++) 1018 for (apic = 0; apic < nr_ioapics; apic++)
1018 if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || 1019 if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
1019 mp_irqs[i].mp_dstapic == MP_APIC_ALL) 1020 mp_irqs[i].dstapic == MP_APIC_ALL)
1020 break; 1021 break;
1021 1022
1022 if (!test_bit(lbus, mp_bus_not_pci) && 1023 if (!test_bit(lbus, mp_bus_not_pci) &&
1023 !mp_irqs[i].mp_irqtype && 1024 !mp_irqs[i].irqtype &&
1024 (bus == lbus) && 1025 (bus == lbus) &&
1025 (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { 1026 (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
1026 int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); 1027 int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
1027 1028
1028 if (!(apic || IO_APIC_IRQ(irq))) 1029 if (!(apic || IO_APIC_IRQ(irq)))
1029 continue; 1030 continue;
1030 1031
1031 if (pin == (mp_irqs[i].mp_srcbusirq & 3)) 1032 if (pin == (mp_irqs[i].srcbusirq & 3))
1032 return irq; 1033 return irq;
1033 /* 1034 /*
1034 * Use the first all-but-pin matching entry as a 1035 * Use the first all-but-pin matching entry as a
@@ -1071,7 +1072,7 @@ static int EISA_ELCR(unsigned int irq)
1071 * EISA conforming in the MP table, that means its trigger type must 1072 * EISA conforming in the MP table, that means its trigger type must
1072 * be read in from the ELCR */ 1073 * be read in from the ELCR */
1073 1074
1074#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) 1075#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq))
1075#define default_EISA_polarity(idx) default_ISA_polarity(idx) 1076#define default_EISA_polarity(idx) default_ISA_polarity(idx)
1076 1077
1077/* PCI interrupts are always polarity one level triggered, 1078/* PCI interrupts are always polarity one level triggered,
@@ -1088,13 +1089,13 @@ static int EISA_ELCR(unsigned int irq)
1088 1089
1089static int MPBIOS_polarity(int idx) 1090static int MPBIOS_polarity(int idx)
1090{ 1091{
1091 int bus = mp_irqs[idx].mp_srcbus; 1092 int bus = mp_irqs[idx].srcbus;
1092 int polarity; 1093 int polarity;
1093 1094
1094 /* 1095 /*
1095 * Determine IRQ line polarity (high active or low active): 1096 * Determine IRQ line polarity (high active or low active):
1096 */ 1097 */
1097 switch (mp_irqs[idx].mp_irqflag & 3) 1098 switch (mp_irqs[idx].irqflag & 3)
1098 { 1099 {
1099 case 0: /* conforms, ie. bus-type dependent polarity */ 1100 case 0: /* conforms, ie. bus-type dependent polarity */
1100 if (test_bit(bus, mp_bus_not_pci)) 1101 if (test_bit(bus, mp_bus_not_pci))
@@ -1130,13 +1131,13 @@ static int MPBIOS_polarity(int idx)
1130 1131
1131static int MPBIOS_trigger(int idx) 1132static int MPBIOS_trigger(int idx)
1132{ 1133{
1133 int bus = mp_irqs[idx].mp_srcbus; 1134 int bus = mp_irqs[idx].srcbus;
1134 int trigger; 1135 int trigger;
1135 1136
1136 /* 1137 /*
1137 * Determine IRQ trigger mode (edge or level sensitive): 1138 * Determine IRQ trigger mode (edge or level sensitive):
1138 */ 1139 */
1139 switch ((mp_irqs[idx].mp_irqflag>>2) & 3) 1140 switch ((mp_irqs[idx].irqflag>>2) & 3)
1140 { 1141 {
1141 case 0: /* conforms, ie. bus-type dependent */ 1142 case 0: /* conforms, ie. bus-type dependent */
1142 if (test_bit(bus, mp_bus_not_pci)) 1143 if (test_bit(bus, mp_bus_not_pci))
@@ -1214,16 +1215,16 @@ int (*ioapic_renumber_irq)(int ioapic, int irq);
1214static int pin_2_irq(int idx, int apic, int pin) 1215static int pin_2_irq(int idx, int apic, int pin)
1215{ 1216{
1216 int irq, i; 1217 int irq, i;
1217 int bus = mp_irqs[idx].mp_srcbus; 1218 int bus = mp_irqs[idx].srcbus;
1218 1219
1219 /* 1220 /*
1220 * Debugging check, we are in big trouble if this message pops up! 1221 * Debugging check, we are in big trouble if this message pops up!
1221 */ 1222 */
1222 if (mp_irqs[idx].mp_dstirq != pin) 1223 if (mp_irqs[idx].dstirq != pin)
1223 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); 1224 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
1224 1225
1225 if (test_bit(bus, mp_bus_not_pci)) { 1226 if (test_bit(bus, mp_bus_not_pci)) {
1226 irq = mp_irqs[idx].mp_srcbusirq; 1227 irq = mp_irqs[idx].srcbusirq;
1227 } else { 1228 } else {
1228 /* 1229 /*
1229 * PCI IRQs are mapped in order 1230 * PCI IRQs are mapped in order
@@ -1566,14 +1567,14 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
1566 apic_printk(APIC_VERBOSE,KERN_DEBUG 1567 apic_printk(APIC_VERBOSE,KERN_DEBUG
1567 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " 1568 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
1568 "IRQ %d Mode:%i Active:%i)\n", 1569 "IRQ %d Mode:%i Active:%i)\n",
1569 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, 1570 apic, mp_ioapics[apic].apicid, pin, cfg->vector,
1570 irq, trigger, polarity); 1571 irq, trigger, polarity);
1571 1572
1572 1573
1573 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, 1574 if (setup_ioapic_entry(mp_ioapics[apic].apicid, irq, &entry,
1574 dest, trigger, polarity, cfg->vector)) { 1575 dest, trigger, polarity, cfg->vector)) {
1575 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", 1576 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
1576 mp_ioapics[apic].mp_apicid, pin); 1577 mp_ioapics[apic].apicid, pin);
1577 __clear_irq_vector(irq, cfg); 1578 __clear_irq_vector(irq, cfg);
1578 return; 1579 return;
1579 } 1580 }
@@ -1604,12 +1605,10 @@ static void __init setup_IO_APIC_irqs(void)
1604 notcon = 1; 1605 notcon = 1;
1605 apic_printk(APIC_VERBOSE, 1606 apic_printk(APIC_VERBOSE,
1606 KERN_DEBUG " %d-%d", 1607 KERN_DEBUG " %d-%d",
1607 mp_ioapics[apic].mp_apicid, 1608 mp_ioapics[apic].apicid, pin);
1608 pin);
1609 } else 1609 } else
1610 apic_printk(APIC_VERBOSE, " %d-%d", 1610 apic_printk(APIC_VERBOSE, " %d-%d",
1611 mp_ioapics[apic].mp_apicid, 1611 mp_ioapics[apic].apicid, pin);
1612 pin);
1613 continue; 1612 continue;
1614 } 1613 }
1615 if (notcon) { 1614 if (notcon) {
@@ -1699,7 +1698,7 @@ __apicdebuginit(void) print_IO_APIC(void)
1699 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); 1698 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
1700 for (i = 0; i < nr_ioapics; i++) 1699 for (i = 0; i < nr_ioapics; i++)
1701 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", 1700 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
1702 mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); 1701 mp_ioapics[i].apicid, nr_ioapic_registers[i]);
1703 1702
1704 /* 1703 /*
1705 * We are a bit conservative about what we expect. We have to 1704 * We are a bit conservative about what we expect. We have to
@@ -1719,7 +1718,7 @@ __apicdebuginit(void) print_IO_APIC(void)
1719 spin_unlock_irqrestore(&ioapic_lock, flags); 1718 spin_unlock_irqrestore(&ioapic_lock, flags);
1720 1719
1721 printk("\n"); 1720 printk("\n");
1722 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); 1721 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid);
1723 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); 1722 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
1724 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); 1723 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
1725 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); 1724 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
@@ -2121,14 +2120,14 @@ static void __init setup_ioapic_ids_from_mpc(void)
2121 reg_00.raw = io_apic_read(apic, 0); 2120 reg_00.raw = io_apic_read(apic, 0);
2122 spin_unlock_irqrestore(&ioapic_lock, flags); 2121 spin_unlock_irqrestore(&ioapic_lock, flags);
2123 2122
2124 old_id = mp_ioapics[apic].mp_apicid; 2123 old_id = mp_ioapics[apic].apicid;
2125 2124
2126 if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { 2125 if (mp_ioapics[apic].apicid >= get_physical_broadcast()) {
2127 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", 2126 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
2128 apic, mp_ioapics[apic].mp_apicid); 2127 apic, mp_ioapics[apic].apicid);
2129 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", 2128 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
2130 reg_00.bits.ID); 2129 reg_00.bits.ID);
2131 mp_ioapics[apic].mp_apicid = reg_00.bits.ID; 2130 mp_ioapics[apic].apicid = reg_00.bits.ID;
2132 } 2131 }
2133 2132
2134 /* 2133 /*
@@ -2137,9 +2136,9 @@ static void __init setup_ioapic_ids_from_mpc(void)
2137 * 'stuck on smp_invalidate_needed IPI wait' messages. 2136 * 'stuck on smp_invalidate_needed IPI wait' messages.
2138 */ 2137 */
2139 if (check_apicid_used(phys_id_present_map, 2138 if (check_apicid_used(phys_id_present_map,
2140 mp_ioapics[apic].mp_apicid)) { 2139 mp_ioapics[apic].apicid)) {
2141 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", 2140 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
2142 apic, mp_ioapics[apic].mp_apicid); 2141 apic, mp_ioapics[apic].apicid);
2143 for (i = 0; i < get_physical_broadcast(); i++) 2142 for (i = 0; i < get_physical_broadcast(); i++)
2144 if (!physid_isset(i, phys_id_present_map)) 2143 if (!physid_isset(i, phys_id_present_map))
2145 break; 2144 break;
@@ -2148,13 +2147,13 @@ static void __init setup_ioapic_ids_from_mpc(void)
2148 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", 2147 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
2149 i); 2148 i);
2150 physid_set(i, phys_id_present_map); 2149 physid_set(i, phys_id_present_map);
2151 mp_ioapics[apic].mp_apicid = i; 2150 mp_ioapics[apic].apicid = i;
2152 } else { 2151 } else {
2153 physid_mask_t tmp; 2152 physid_mask_t tmp;
2154 tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); 2153 tmp = apicid_to_cpu_present(mp_ioapics[apic].apicid);
2155 apic_printk(APIC_VERBOSE, "Setting %d in the " 2154 apic_printk(APIC_VERBOSE, "Setting %d in the "
2156 "phys_id_present_map\n", 2155 "phys_id_present_map\n",
2157 mp_ioapics[apic].mp_apicid); 2156 mp_ioapics[apic].apicid);
2158 physids_or(phys_id_present_map, phys_id_present_map, tmp); 2157 physids_or(phys_id_present_map, phys_id_present_map, tmp);
2159 } 2158 }
2160 2159
@@ -2163,11 +2162,11 @@ static void __init setup_ioapic_ids_from_mpc(void)
2163 * We need to adjust the IRQ routing table 2162 * We need to adjust the IRQ routing table
2164 * if the ID changed. 2163 * if the ID changed.
2165 */ 2164 */
2166 if (old_id != mp_ioapics[apic].mp_apicid) 2165 if (old_id != mp_ioapics[apic].apicid)
2167 for (i = 0; i < mp_irq_entries; i++) 2166 for (i = 0; i < mp_irq_entries; i++)
2168 if (mp_irqs[i].mp_dstapic == old_id) 2167 if (mp_irqs[i].dstapic == old_id)
2169 mp_irqs[i].mp_dstapic 2168 mp_irqs[i].dstapic
2170 = mp_ioapics[apic].mp_apicid; 2169 = mp_ioapics[apic].apicid;
2171 2170
2172 /* 2171 /*
2173 * Read the right value from the MPC table and 2172 * Read the right value from the MPC table and
@@ -2175,9 +2174,9 @@ static void __init setup_ioapic_ids_from_mpc(void)
2175 */ 2174 */
2176 apic_printk(APIC_VERBOSE, KERN_INFO 2175 apic_printk(APIC_VERBOSE, KERN_INFO
2177 "...changing IO-APIC physical APIC ID to %d ...", 2176 "...changing IO-APIC physical APIC ID to %d ...",
2178 mp_ioapics[apic].mp_apicid); 2177 mp_ioapics[apic].apicid);
2179 2178
2180 reg_00.bits.ID = mp_ioapics[apic].mp_apicid; 2179 reg_00.bits.ID = mp_ioapics[apic].apicid;
2181 spin_lock_irqsave(&ioapic_lock, flags); 2180 spin_lock_irqsave(&ioapic_lock, flags);
2182 io_apic_write(apic, 0, reg_00.raw); 2181 io_apic_write(apic, 0, reg_00.raw);
2183 spin_unlock_irqrestore(&ioapic_lock, flags); 2182 spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -2188,7 +2187,7 @@ static void __init setup_ioapic_ids_from_mpc(void)
2188 spin_lock_irqsave(&ioapic_lock, flags); 2187 spin_lock_irqsave(&ioapic_lock, flags);
2189 reg_00.raw = io_apic_read(apic, 0); 2188 reg_00.raw = io_apic_read(apic, 0);
2190 spin_unlock_irqrestore(&ioapic_lock, flags); 2189 spin_unlock_irqrestore(&ioapic_lock, flags);
2191 if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) 2190 if (reg_00.bits.ID != mp_ioapics[apic].apicid)
2192 printk("could not set ID!\n"); 2191 printk("could not set ID!\n");
2193 else 2192 else
2194 apic_printk(APIC_VERBOSE, " ok.\n"); 2193 apic_printk(APIC_VERBOSE, " ok.\n");
@@ -2383,7 +2382,7 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2383 if (cfg->move_in_progress) 2382 if (cfg->move_in_progress)
2384 send_cleanup_vector(cfg); 2383 send_cleanup_vector(cfg);
2385 2384
2386 cpumask_copy(&desc->affinity, mask); 2385 cpumask_copy(desc->affinity, mask);
2387} 2386}
2388 2387
2389static int migrate_irq_remapped_level_desc(struct irq_desc *desc) 2388static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
@@ -2405,11 +2404,11 @@ static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
2405 } 2404 }
2406 2405
2407 /* everthing is clear. we have right of way */ 2406 /* everthing is clear. we have right of way */
2408 migrate_ioapic_irq_desc(desc, &desc->pending_mask); 2407 migrate_ioapic_irq_desc(desc, desc->pending_mask);
2409 2408
2410 ret = 0; 2409 ret = 0;
2411 desc->status &= ~IRQ_MOVE_PENDING; 2410 desc->status &= ~IRQ_MOVE_PENDING;
2412 cpumask_clear(&desc->pending_mask); 2411 cpumask_clear(desc->pending_mask);
2413 2412
2414unmask: 2413unmask:
2415 unmask_IO_APIC_irq_desc(desc); 2414 unmask_IO_APIC_irq_desc(desc);
@@ -2434,7 +2433,7 @@ static void ir_irq_migration(struct work_struct *work)
2434 continue; 2433 continue;
2435 } 2434 }
2436 2435
2437 desc->chip->set_affinity(irq, &desc->pending_mask); 2436 desc->chip->set_affinity(irq, desc->pending_mask);
2438 spin_unlock_irqrestore(&desc->lock, flags); 2437 spin_unlock_irqrestore(&desc->lock, flags);
2439 } 2438 }
2440 } 2439 }
@@ -2448,7 +2447,7 @@ static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
2448{ 2447{
2449 if (desc->status & IRQ_LEVEL) { 2448 if (desc->status & IRQ_LEVEL) {
2450 desc->status |= IRQ_MOVE_PENDING; 2449 desc->status |= IRQ_MOVE_PENDING;
2451 cpumask_copy(&desc->pending_mask, mask); 2450 cpumask_copy(desc->pending_mask, mask);
2452 migrate_irq_remapped_level_desc(desc); 2451 migrate_irq_remapped_level_desc(desc);
2453 return; 2452 return;
2454 } 2453 }
@@ -2516,7 +2515,7 @@ static void irq_complete_move(struct irq_desc **descp)
2516 2515
2517 /* domain has not changed, but affinity did */ 2516 /* domain has not changed, but affinity did */
2518 me = smp_processor_id(); 2517 me = smp_processor_id();
2519 if (cpu_isset(me, desc->affinity)) { 2518 if (cpumask_test_cpu(me, desc->affinity)) {
2520 *descp = desc = move_irq_desc(desc, me); 2519 *descp = desc = move_irq_desc(desc, me);
2521 /* get the new one */ 2520 /* get the new one */
2522 cfg = desc->chip_data; 2521 cfg = desc->chip_data;
@@ -3117,8 +3116,8 @@ static int ioapic_resume(struct sys_device *dev)
3117 3116
3118 spin_lock_irqsave(&ioapic_lock, flags); 3117 spin_lock_irqsave(&ioapic_lock, flags);
3119 reg_00.raw = io_apic_read(dev->id, 0); 3118 reg_00.raw = io_apic_read(dev->id, 0);
3120 if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { 3119 if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) {
3121 reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; 3120 reg_00.bits.ID = mp_ioapics[dev->id].apicid;
3122 io_apic_write(dev->id, 0, reg_00.raw); 3121 io_apic_write(dev->id, 0, reg_00.raw);
3123 } 3122 }
3124 spin_unlock_irqrestore(&ioapic_lock, flags); 3123 spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -3183,7 +3182,7 @@ unsigned int create_irq_nr(unsigned int irq_want)
3183 3182
3184 irq = 0; 3183 irq = 0;
3185 spin_lock_irqsave(&vector_lock, flags); 3184 spin_lock_irqsave(&vector_lock, flags);
3186 for (new = irq_want; new < NR_IRQS; new++) { 3185 for (new = irq_want; new < nr_irqs; new++) {
3187 if (platform_legacy_irq(new)) 3186 if (platform_legacy_irq(new))
3188 continue; 3187 continue;
3189 3188
@@ -3258,6 +3257,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3258 int err; 3257 int err;
3259 unsigned dest; 3258 unsigned dest;
3260 3259
3260 if (disable_apic)
3261 return -ENXIO;
3262
3261 cfg = irq_cfg(irq); 3263 cfg = irq_cfg(irq);
3262 err = assign_irq_vector(irq, cfg, TARGET_CPUS); 3264 err = assign_irq_vector(irq, cfg, TARGET_CPUS);
3263 if (err) 3265 if (err)
@@ -3726,6 +3728,9 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3726 struct irq_cfg *cfg; 3728 struct irq_cfg *cfg;
3727 int err; 3729 int err;
3728 3730
3731 if (disable_apic)
3732 return -ENXIO;
3733
3729 cfg = irq_cfg(irq); 3734 cfg = irq_cfg(irq);
3730 err = assign_irq_vector(irq, cfg, TARGET_CPUS); 3735 err = assign_irq_vector(irq, cfg, TARGET_CPUS);
3731 if (!err) { 3736 if (!err) {
@@ -3850,6 +3855,22 @@ void __init probe_nr_irqs_gsi(void)
3850 nr_irqs_gsi = nr; 3855 nr_irqs_gsi = nr;
3851} 3856}
3852 3857
3858#ifdef CONFIG_SPARSE_IRQ
3859int __init arch_probe_nr_irqs(void)
3860{
3861 int nr;
3862
3863 nr = ((8 * nr_cpu_ids) > (32 * nr_ioapics) ?
3864 (NR_VECTORS + (8 * nr_cpu_ids)) :
3865 (NR_VECTORS + (32 * nr_ioapics)));
3866
3867 if (nr < nr_irqs && nr > nr_irqs_gsi)
3868 nr_irqs = nr;
3869
3870 return 0;
3871}
3872#endif
3873
3853/* -------------------------------------------------------------------------- 3874/* --------------------------------------------------------------------------
3854 ACPI-based IOAPIC Configuration 3875 ACPI-based IOAPIC Configuration
3855 -------------------------------------------------------------------------- */ 3876 -------------------------------------------------------------------------- */
@@ -3984,8 +4005,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
3984 return -1; 4005 return -1;
3985 4006
3986 for (i = 0; i < mp_irq_entries; i++) 4007 for (i = 0; i < mp_irq_entries; i++)
3987 if (mp_irqs[i].mp_irqtype == mp_INT && 4008 if (mp_irqs[i].irqtype == mp_INT &&
3988 mp_irqs[i].mp_srcbusirq == bus_irq) 4009 mp_irqs[i].srcbusirq == bus_irq)
3989 break; 4010 break;
3990 if (i >= mp_irq_entries) 4011 if (i >= mp_irq_entries)
3991 return -1; 4012 return -1;
@@ -4039,7 +4060,7 @@ void __init setup_ioapic_dest(void)
4039 */ 4060 */
4040 if (desc->status & 4061 if (desc->status &
4041 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) 4062 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
4042 mask = &desc->affinity; 4063 mask = desc->affinity;
4043 else 4064 else
4044 mask = TARGET_CPUS; 4065 mask = TARGET_CPUS;
4045 4066
@@ -4100,7 +4121,7 @@ void __init ioapic_init_mappings(void)
4100 ioapic_res = ioapic_setup_resources(); 4121 ioapic_res = ioapic_setup_resources();
4101 for (i = 0; i < nr_ioapics; i++) { 4122 for (i = 0; i < nr_ioapics; i++) {
4102 if (smp_found_config) { 4123 if (smp_found_config) {
4103 ioapic_phys = mp_ioapics[i].mp_apicaddr; 4124 ioapic_phys = mp_ioapics[i].apicaddr;
4104#ifdef CONFIG_X86_32 4125#ifdef CONFIG_X86_32
4105 if (!ioapic_phys) { 4126 if (!ioapic_phys) {
4106 printk(KERN_ERR 4127 printk(KERN_ERR
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 22f650db917f..a6bca1d33a8a 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -36,11 +36,7 @@ void ack_bad_irq(unsigned int irq)
36#endif 36#endif
37} 37}
38 38
39#ifdef CONFIG_X86_32 39#define irq_stats(x) (&per_cpu(irq_stat, x))
40# define irq_stats(x) (&per_cpu(irq_stat, x))
41#else
42# define irq_stats(x) cpu_pda(x)
43#endif
44/* 40/*
45 * /proc/interrupts printing: 41 * /proc/interrupts printing:
46 */ 42 */
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 74b9ff7341e9..e0f29be8ab0b 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -248,7 +248,7 @@ void fixup_irqs(void)
248 if (irq == 2) 248 if (irq == 2)
249 continue; 249 continue;
250 250
251 affinity = &desc->affinity; 251 affinity = desc->affinity;
252 if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { 252 if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
253 printk("Breaking affinity for irq %i\n", irq); 253 printk("Breaking affinity for irq %i\n", irq);
254 affinity = cpu_all_mask; 254 affinity = cpu_all_mask;
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 63c88e6ec025..1db05247b47f 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -19,6 +19,9 @@
19#include <asm/io_apic.h> 19#include <asm/io_apic.h>
20#include <asm/idle.h> 20#include <asm/idle.h>
21 21
22DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
23EXPORT_PER_CPU_SYMBOL(irq_stat);
24
22/* 25/*
23 * Probabilistic stack overflow check: 26 * Probabilistic stack overflow check:
24 * 27 *
@@ -100,7 +103,7 @@ void fixup_irqs(void)
100 /* interrupt's are disabled at this point */ 103 /* interrupt's are disabled at this point */
101 spin_lock(&desc->lock); 104 spin_lock(&desc->lock);
102 105
103 affinity = &desc->affinity; 106 affinity = desc->affinity;
104 if (!irq_has_action(irq) || 107 if (!irq_has_action(irq) ||
105 cpumask_equal(affinity, cpu_online_mask)) { 108 cpumask_equal(affinity, cpu_online_mask)) {
106 spin_unlock(&desc->lock); 109 spin_unlock(&desc->lock);
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index b7f4c929e615..5e9f4fc51385 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -87,9 +87,9 @@
87#include <linux/cpu.h> 87#include <linux/cpu.h>
88#include <linux/firmware.h> 88#include <linux/firmware.h>
89#include <linux/platform_device.h> 89#include <linux/platform_device.h>
90#include <linux/uaccess.h>
90 91
91#include <asm/msr.h> 92#include <asm/msr.h>
92#include <asm/uaccess.h>
93#include <asm/processor.h> 93#include <asm/processor.h>
94#include <asm/microcode.h> 94#include <asm/microcode.h>
95 95
@@ -196,7 +196,7 @@ static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf)
196 return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1; 196 return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1;
197} 197}
198 198
199static inline int 199static inline int
200update_match_revision(struct microcode_header_intel *mc_header, int rev) 200update_match_revision(struct microcode_header_intel *mc_header, int rev)
201{ 201{
202 return (mc_header->rev <= rev) ? 0 : 1; 202 return (mc_header->rev <= rev) ? 0 : 1;
@@ -442,8 +442,8 @@ static int request_microcode_fw(int cpu, struct device *device)
442 return ret; 442 return ret;
443 } 443 }
444 444
445 ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size, 445 ret = generic_load_microcode(cpu, (void *)firmware->data,
446 &get_ucode_fw); 446 firmware->size, &get_ucode_fw);
447 447
448 release_firmware(firmware); 448 release_firmware(firmware);
449 449
@@ -460,7 +460,7 @@ static int request_microcode_user(int cpu, const void __user *buf, size_t size)
460 /* We should bind the task to the CPU */ 460 /* We should bind the task to the CPU */
461 BUG_ON(cpu != raw_smp_processor_id()); 461 BUG_ON(cpu != raw_smp_processor_id());
462 462
463 return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user); 463 return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user);
464} 464}
465 465
466static void microcode_fini_cpu(int cpu) 466static void microcode_fini_cpu(int cpu)
diff --git a/arch/x86/kernel/module_32.c b/arch/x86/kernel/module_32.c
index 3db0a5442eb1..0edd819050e7 100644
--- a/arch/x86/kernel/module_32.c
+++ b/arch/x86/kernel/module_32.c
@@ -42,7 +42,7 @@ void module_free(struct module *mod, void *module_region)
42{ 42{
43 vfree(module_region); 43 vfree(module_region);
44 /* FIXME: If module_region == mod->init_region, trim exception 44 /* FIXME: If module_region == mod->init_region, trim exception
45 table entries. */ 45 table entries. */
46} 46}
47 47
48/* We don't need anything special. */ 48/* We don't need anything special. */
@@ -113,13 +113,13 @@ int module_finalize(const Elf_Ehdr *hdr,
113 *para = NULL; 113 *para = NULL;
114 char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; 114 char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
115 115
116 for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { 116 for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
117 if (!strcmp(".text", secstrings + s->sh_name)) 117 if (!strcmp(".text", secstrings + s->sh_name))
118 text = s; 118 text = s;
119 if (!strcmp(".altinstructions", secstrings + s->sh_name)) 119 if (!strcmp(".altinstructions", secstrings + s->sh_name))
120 alt = s; 120 alt = s;
121 if (!strcmp(".smp_locks", secstrings + s->sh_name)) 121 if (!strcmp(".smp_locks", secstrings + s->sh_name))
122 locks= s; 122 locks = s;
123 if (!strcmp(".parainstructions", secstrings + s->sh_name)) 123 if (!strcmp(".parainstructions", secstrings + s->sh_name))
124 para = s; 124 para = s;
125 } 125 }
diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c
index 6ba87830d4b1..c23880b90b5c 100644
--- a/arch/x86/kernel/module_64.c
+++ b/arch/x86/kernel/module_64.c
@@ -30,14 +30,14 @@
30#include <asm/page.h> 30#include <asm/page.h>
31#include <asm/pgtable.h> 31#include <asm/pgtable.h>
32 32
33#define DEBUGP(fmt...) 33#define DEBUGP(fmt...)
34 34
35#ifndef CONFIG_UML 35#ifndef CONFIG_UML
36void module_free(struct module *mod, void *module_region) 36void module_free(struct module *mod, void *module_region)
37{ 37{
38 vfree(module_region); 38 vfree(module_region);
39 /* FIXME: If module_region == mod->init_region, trim exception 39 /* FIXME: If module_region == mod->init_region, trim exception
40 table entries. */ 40 table entries. */
41} 41}
42 42
43void *module_alloc(unsigned long size) 43void *module_alloc(unsigned long size)
@@ -77,7 +77,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
77 Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr; 77 Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr;
78 Elf64_Sym *sym; 78 Elf64_Sym *sym;
79 void *loc; 79 void *loc;
80 u64 val; 80 u64 val;
81 81
82 DEBUGP("Applying relocate section %u to %u\n", relsec, 82 DEBUGP("Applying relocate section %u to %u\n", relsec,
83 sechdrs[relsec].sh_info); 83 sechdrs[relsec].sh_info);
@@ -91,11 +91,11 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
91 sym = (Elf64_Sym *)sechdrs[symindex].sh_addr 91 sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
92 + ELF64_R_SYM(rel[i].r_info); 92 + ELF64_R_SYM(rel[i].r_info);
93 93
94 DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", 94 DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n",
95 (int)ELF64_R_TYPE(rel[i].r_info), 95 (int)ELF64_R_TYPE(rel[i].r_info),
96 sym->st_value, rel[i].r_addend, (u64)loc); 96 sym->st_value, rel[i].r_addend, (u64)loc);
97 97
98 val = sym->st_value + rel[i].r_addend; 98 val = sym->st_value + rel[i].r_addend;
99 99
100 switch (ELF64_R_TYPE(rel[i].r_info)) { 100 switch (ELF64_R_TYPE(rel[i].r_info)) {
101 case R_X86_64_NONE: 101 case R_X86_64_NONE:
@@ -113,16 +113,16 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
113 if ((s64)val != *(s32 *)loc) 113 if ((s64)val != *(s32 *)loc)
114 goto overflow; 114 goto overflow;
115 break; 115 break;
116 case R_X86_64_PC32: 116 case R_X86_64_PC32:
117 val -= (u64)loc; 117 val -= (u64)loc;
118 *(u32 *)loc = val; 118 *(u32 *)loc = val;
119#if 0 119#if 0
120 if ((s64)val != *(s32 *)loc) 120 if ((s64)val != *(s32 *)loc)
121 goto overflow; 121 goto overflow;
122#endif 122#endif
123 break; 123 break;
124 default: 124 default:
125 printk(KERN_ERR "module %s: Unknown rela relocation: %Lu\n", 125 printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n",
126 me->name, ELF64_R_TYPE(rel[i].r_info)); 126 me->name, ELF64_R_TYPE(rel[i].r_info));
127 return -ENOEXEC; 127 return -ENOEXEC;
128 } 128 }
@@ -130,7 +130,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
130 return 0; 130 return 0;
131 131
132overflow: 132overflow:
133 printk(KERN_ERR "overflow in relocation type %d val %Lx\n", 133 printk(KERN_ERR "overflow in relocation type %d val %Lx\n",
134 (int)ELF64_R_TYPE(rel[i].r_info), val); 134 (int)ELF64_R_TYPE(rel[i].r_info), val);
135 printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n", 135 printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n",
136 me->name); 136 me->name);
@@ -143,13 +143,13 @@ int apply_relocate(Elf_Shdr *sechdrs,
143 unsigned int relsec, 143 unsigned int relsec,
144 struct module *me) 144 struct module *me)
145{ 145{
146 printk("non add relocation not supported\n"); 146 printk(KERN_ERR "non add relocation not supported\n");
147 return -ENOSYS; 147 return -ENOSYS;
148} 148}
149 149
150int module_finalize(const Elf_Ehdr *hdr, 150int module_finalize(const Elf_Ehdr *hdr,
151 const Elf_Shdr *sechdrs, 151 const Elf_Shdr *sechdrs,
152 struct module *me) 152 struct module *me)
153{ 153{
154 const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, 154 const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
155 *para = NULL; 155 *para = NULL;
@@ -161,7 +161,7 @@ int module_finalize(const Elf_Ehdr *hdr,
161 if (!strcmp(".altinstructions", secstrings + s->sh_name)) 161 if (!strcmp(".altinstructions", secstrings + s->sh_name))
162 alt = s; 162 alt = s;
163 if (!strcmp(".smp_locks", secstrings + s->sh_name)) 163 if (!strcmp(".smp_locks", secstrings + s->sh_name))
164 locks= s; 164 locks = s;
165 if (!strcmp(".parainstructions", secstrings + s->sh_name)) 165 if (!strcmp(".parainstructions", secstrings + s->sh_name))
166 para = s; 166 para = s;
167 } 167 }
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index c0601c2848a1..fa6bb263892e 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -27,6 +27,7 @@
27#include <asm/e820.h> 27#include <asm/e820.h>
28#include <asm/trampoline.h> 28#include <asm/trampoline.h>
29#include <asm/setup.h> 29#include <asm/setup.h>
30#include <asm/smp.h>
30 31
31#include <mach_apic.h> 32#include <mach_apic.h>
32#ifdef CONFIG_X86_32 33#ifdef CONFIG_X86_32
@@ -143,11 +144,11 @@ static void __init MP_ioapic_info(struct mpc_ioapic *m)
143 if (bad_ioapic(m->apicaddr)) 144 if (bad_ioapic(m->apicaddr))
144 return; 145 return;
145 146
146 mp_ioapics[nr_ioapics].mp_apicaddr = m->apicaddr; 147 mp_ioapics[nr_ioapics].apicaddr = m->apicaddr;
147 mp_ioapics[nr_ioapics].mp_apicid = m->apicid; 148 mp_ioapics[nr_ioapics].apicid = m->apicid;
148 mp_ioapics[nr_ioapics].mp_type = m->type; 149 mp_ioapics[nr_ioapics].type = m->type;
149 mp_ioapics[nr_ioapics].mp_apicver = m->apicver; 150 mp_ioapics[nr_ioapics].apicver = m->apicver;
150 mp_ioapics[nr_ioapics].mp_flags = m->flags; 151 mp_ioapics[nr_ioapics].flags = m->flags;
151 nr_ioapics++; 152 nr_ioapics++;
152} 153}
153 154
@@ -159,55 +160,55 @@ static void print_MP_intsrc_info(struct mpc_intsrc *m)
159 m->srcbusirq, m->dstapic, m->dstirq); 160 m->srcbusirq, m->dstapic, m->dstirq);
160} 161}
161 162
162static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) 163static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq)
163{ 164{
164 apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," 165 apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
165 " IRQ %02x, APIC ID %x, APIC INT %02x\n", 166 " IRQ %02x, APIC ID %x, APIC INT %02x\n",
166 mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, 167 mp_irq->irqtype, mp_irq->irqflag & 3,
167 (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, 168 (mp_irq->irqflag >> 2) & 3, mp_irq->srcbus,
168 mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq); 169 mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq);
169} 170}
170 171
171static void __init assign_to_mp_irq(struct mpc_intsrc *m, 172static void __init assign_to_mp_irq(struct mpc_intsrc *m,
172 struct mp_config_intsrc *mp_irq) 173 struct mpc_intsrc *mp_irq)
173{ 174{
174 mp_irq->mp_dstapic = m->dstapic; 175 mp_irq->dstapic = m->dstapic;
175 mp_irq->mp_type = m->type; 176 mp_irq->type = m->type;
176 mp_irq->mp_irqtype = m->irqtype; 177 mp_irq->irqtype = m->irqtype;
177 mp_irq->mp_irqflag = m->irqflag; 178 mp_irq->irqflag = m->irqflag;
178 mp_irq->mp_srcbus = m->srcbus; 179 mp_irq->srcbus = m->srcbus;
179 mp_irq->mp_srcbusirq = m->srcbusirq; 180 mp_irq->srcbusirq = m->srcbusirq;
180 mp_irq->mp_dstirq = m->dstirq; 181 mp_irq->dstirq = m->dstirq;
181} 182}
182 183
183static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq, 184static void __init assign_to_mpc_intsrc(struct mpc_intsrc *mp_irq,
184 struct mpc_intsrc *m) 185 struct mpc_intsrc *m)
185{ 186{
186 m->dstapic = mp_irq->mp_dstapic; 187 m->dstapic = mp_irq->dstapic;
187 m->type = mp_irq->mp_type; 188 m->type = mp_irq->type;
188 m->irqtype = mp_irq->mp_irqtype; 189 m->irqtype = mp_irq->irqtype;
189 m->irqflag = mp_irq->mp_irqflag; 190 m->irqflag = mp_irq->irqflag;
190 m->srcbus = mp_irq->mp_srcbus; 191 m->srcbus = mp_irq->srcbus;
191 m->srcbusirq = mp_irq->mp_srcbusirq; 192 m->srcbusirq = mp_irq->srcbusirq;
192 m->dstirq = mp_irq->mp_dstirq; 193 m->dstirq = mp_irq->dstirq;
193} 194}
194 195
195static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq, 196static int __init mp_irq_mpc_intsrc_cmp(struct mpc_intsrc *mp_irq,
196 struct mpc_intsrc *m) 197 struct mpc_intsrc *m)
197{ 198{
198 if (mp_irq->mp_dstapic != m->dstapic) 199 if (mp_irq->dstapic != m->dstapic)
199 return 1; 200 return 1;
200 if (mp_irq->mp_type != m->type) 201 if (mp_irq->type != m->type)
201 return 2; 202 return 2;
202 if (mp_irq->mp_irqtype != m->irqtype) 203 if (mp_irq->irqtype != m->irqtype)
203 return 3; 204 return 3;
204 if (mp_irq->mp_irqflag != m->irqflag) 205 if (mp_irq->irqflag != m->irqflag)
205 return 4; 206 return 4;
206 if (mp_irq->mp_srcbus != m->srcbus) 207 if (mp_irq->srcbus != m->srcbus)
207 return 5; 208 return 5;
208 if (mp_irq->mp_srcbusirq != m->srcbusirq) 209 if (mp_irq->srcbusirq != m->srcbusirq)
209 return 6; 210 return 6;
210 if (mp_irq->mp_dstirq != m->dstirq) 211 if (mp_irq->dstirq != m->dstirq)
211 return 7; 212 return 7;
212 213
213 return 0; 214 return 0;
@@ -416,7 +417,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
416 intsrc.type = MP_INTSRC; 417 intsrc.type = MP_INTSRC;
417 intsrc.irqflag = 0; /* conforming */ 418 intsrc.irqflag = 0; /* conforming */
418 intsrc.srcbus = 0; 419 intsrc.srcbus = 0;
419 intsrc.dstapic = mp_ioapics[0].mp_apicid; 420 intsrc.dstapic = mp_ioapics[0].apicid;
420 421
421 intsrc.irqtype = mp_INT; 422 intsrc.irqtype = mp_INT;
422 423
@@ -569,14 +570,14 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
569 } 570 }
570} 571}
571 572
572static struct intel_mp_floating *mpf_found; 573static struct mpf_intel *mpf_found;
573 574
574/* 575/*
575 * Scan the memory blocks for an SMP configuration block. 576 * Scan the memory blocks for an SMP configuration block.
576 */ 577 */
577static void __init __get_smp_config(unsigned int early) 578static void __init __get_smp_config(unsigned int early)
578{ 579{
579 struct intel_mp_floating *mpf = mpf_found; 580 struct mpf_intel *mpf = mpf_found;
580 581
581 if (!mpf) 582 if (!mpf)
582 return; 583 return;
@@ -597,9 +598,9 @@ static void __init __get_smp_config(unsigned int early)
597 } 598 }
598 599
599 printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", 600 printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
600 mpf->mpf_specification); 601 mpf->specification);
601#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) 602#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
602 if (mpf->mpf_feature2 & (1 << 7)) { 603 if (mpf->feature2 & (1 << 7)) {
603 printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); 604 printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
604 pic_mode = 1; 605 pic_mode = 1;
605 } else { 606 } else {
@@ -610,7 +611,7 @@ static void __init __get_smp_config(unsigned int early)
610 /* 611 /*
611 * Now see if we need to read further. 612 * Now see if we need to read further.
612 */ 613 */
613 if (mpf->mpf_feature1 != 0) { 614 if (mpf->feature1 != 0) {
614 if (early) { 615 if (early) {
615 /* 616 /*
616 * local APIC has default address 617 * local APIC has default address
@@ -620,16 +621,16 @@ static void __init __get_smp_config(unsigned int early)
620 } 621 }
621 622
622 printk(KERN_INFO "Default MP configuration #%d\n", 623 printk(KERN_INFO "Default MP configuration #%d\n",
623 mpf->mpf_feature1); 624 mpf->feature1);
624 construct_default_ISA_mptable(mpf->mpf_feature1); 625 construct_default_ISA_mptable(mpf->feature1);
625 626
626 } else if (mpf->mpf_physptr) { 627 } else if (mpf->physptr) {
627 628
628 /* 629 /*
629 * Read the physical hardware table. Anything here will 630 * Read the physical hardware table. Anything here will
630 * override the defaults. 631 * override the defaults.
631 */ 632 */
632 if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) { 633 if (!smp_read_mpc(phys_to_virt(mpf->physptr), early)) {
633#ifdef CONFIG_X86_LOCAL_APIC 634#ifdef CONFIG_X86_LOCAL_APIC
634 smp_found_config = 0; 635 smp_found_config = 0;
635#endif 636#endif
@@ -687,19 +688,19 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
687 unsigned reserve) 688 unsigned reserve)
688{ 689{
689 unsigned int *bp = phys_to_virt(base); 690 unsigned int *bp = phys_to_virt(base);
690 struct intel_mp_floating *mpf; 691 struct mpf_intel *mpf;
691 692
692 apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", 693 apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
693 bp, length); 694 bp, length);
694 BUILD_BUG_ON(sizeof(*mpf) != 16); 695 BUILD_BUG_ON(sizeof(*mpf) != 16);
695 696
696 while (length > 0) { 697 while (length > 0) {
697 mpf = (struct intel_mp_floating *)bp; 698 mpf = (struct mpf_intel *)bp;
698 if ((*bp == SMP_MAGIC_IDENT) && 699 if ((*bp == SMP_MAGIC_IDENT) &&
699 (mpf->mpf_length == 1) && 700 (mpf->length == 1) &&
700 !mpf_checksum((unsigned char *)bp, 16) && 701 !mpf_checksum((unsigned char *)bp, 16) &&
701 ((mpf->mpf_specification == 1) 702 ((mpf->specification == 1)
702 || (mpf->mpf_specification == 4))) { 703 || (mpf->specification == 4))) {
703#ifdef CONFIG_X86_LOCAL_APIC 704#ifdef CONFIG_X86_LOCAL_APIC
704 smp_found_config = 1; 705 smp_found_config = 1;
705#endif 706#endif
@@ -712,7 +713,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
712 return 1; 713 return 1;
713 reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE, 714 reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE,
714 BOOTMEM_DEFAULT); 715 BOOTMEM_DEFAULT);
715 if (mpf->mpf_physptr) { 716 if (mpf->physptr) {
716 unsigned long size = PAGE_SIZE; 717 unsigned long size = PAGE_SIZE;
717#ifdef CONFIG_X86_32 718#ifdef CONFIG_X86_32
718 /* 719 /*
@@ -721,14 +722,14 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
721 * the bottom is mapped now. 722 * the bottom is mapped now.
722 * PC-9800's MPC table places on the very last 723 * PC-9800's MPC table places on the very last
723 * of physical memory; so that simply reserving 724 * of physical memory; so that simply reserving
724 * PAGE_SIZE from mpg->mpf_physptr yields BUG() 725 * PAGE_SIZE from mpf->physptr yields BUG()
725 * in reserve_bootmem. 726 * in reserve_bootmem.
726 */ 727 */
727 unsigned long end = max_low_pfn * PAGE_SIZE; 728 unsigned long end = max_low_pfn * PAGE_SIZE;
728 if (mpf->mpf_physptr + size > end) 729 if (mpf->physptr + size > end)
729 size = end - mpf->mpf_physptr; 730 size = end - mpf->physptr;
730#endif 731#endif
731 reserve_bootmem_generic(mpf->mpf_physptr, size, 732 reserve_bootmem_generic(mpf->physptr, size,
732 BOOTMEM_DEFAULT); 733 BOOTMEM_DEFAULT);
733 } 734 }
734 735
@@ -808,15 +809,15 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m)
808 /* not legacy */ 809 /* not legacy */
809 810
810 for (i = 0; i < mp_irq_entries; i++) { 811 for (i = 0; i < mp_irq_entries; i++) {
811 if (mp_irqs[i].mp_irqtype != mp_INT) 812 if (mp_irqs[i].irqtype != mp_INT)
812 continue; 813 continue;
813 814
814 if (mp_irqs[i].mp_irqflag != 0x0f) 815 if (mp_irqs[i].irqflag != 0x0f)
815 continue; 816 continue;
816 817
817 if (mp_irqs[i].mp_srcbus != m->srcbus) 818 if (mp_irqs[i].srcbus != m->srcbus)
818 continue; 819 continue;
819 if (mp_irqs[i].mp_srcbusirq != m->srcbusirq) 820 if (mp_irqs[i].srcbusirq != m->srcbusirq)
820 continue; 821 continue;
821 if (irq_used[i]) { 822 if (irq_used[i]) {
822 /* already claimed */ 823 /* already claimed */
@@ -921,10 +922,10 @@ static int __init replace_intsrc_all(struct mpc_table *mpc,
921 if (irq_used[i]) 922 if (irq_used[i])
922 continue; 923 continue;
923 924
924 if (mp_irqs[i].mp_irqtype != mp_INT) 925 if (mp_irqs[i].irqtype != mp_INT)
925 continue; 926 continue;
926 927
927 if (mp_irqs[i].mp_irqflag != 0x0f) 928 if (mp_irqs[i].irqflag != 0x0f)
928 continue; 929 continue;
929 930
930 if (nr_m_spare > 0) { 931 if (nr_m_spare > 0) {
@@ -1000,7 +1001,7 @@ static int __init update_mp_table(void)
1000{ 1001{
1001 char str[16]; 1002 char str[16];
1002 char oem[10]; 1003 char oem[10];
1003 struct intel_mp_floating *mpf; 1004 struct mpf_intel *mpf;
1004 struct mpc_table *mpc, *mpc_new; 1005 struct mpc_table *mpc, *mpc_new;
1005 1006
1006 if (!enable_update_mptable) 1007 if (!enable_update_mptable)
@@ -1013,19 +1014,19 @@ static int __init update_mp_table(void)
1013 /* 1014 /*
1014 * Now see if we need to go further. 1015 * Now see if we need to go further.
1015 */ 1016 */
1016 if (mpf->mpf_feature1 != 0) 1017 if (mpf->feature1 != 0)
1017 return 0; 1018 return 0;
1018 1019
1019 if (!mpf->mpf_physptr) 1020 if (!mpf->physptr)
1020 return 0; 1021 return 0;
1021 1022
1022 mpc = phys_to_virt(mpf->mpf_physptr); 1023 mpc = phys_to_virt(mpf->physptr);
1023 1024
1024 if (!smp_check_mpc(mpc, oem, str)) 1025 if (!smp_check_mpc(mpc, oem, str))
1025 return 0; 1026 return 0;
1026 1027
1027 printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf)); 1028 printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf));
1028 printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr); 1029 printk(KERN_INFO "physptr: %x\n", mpf->physptr);
1029 1030
1030 if (mpc_new_phys && mpc->length > mpc_new_length) { 1031 if (mpc_new_phys && mpc->length > mpc_new_length) {
1031 mpc_new_phys = 0; 1032 mpc_new_phys = 0;
@@ -1046,23 +1047,23 @@ static int __init update_mp_table(void)
1046 } 1047 }
1047 printk(KERN_INFO "use in-positon replacing\n"); 1048 printk(KERN_INFO "use in-positon replacing\n");
1048 } else { 1049 } else {
1049 mpf->mpf_physptr = mpc_new_phys; 1050 mpf->physptr = mpc_new_phys;
1050 mpc_new = phys_to_virt(mpc_new_phys); 1051 mpc_new = phys_to_virt(mpc_new_phys);
1051 memcpy(mpc_new, mpc, mpc->length); 1052 memcpy(mpc_new, mpc, mpc->length);
1052 mpc = mpc_new; 1053 mpc = mpc_new;
1053 /* check if we can modify that */ 1054 /* check if we can modify that */
1054 if (mpc_new_phys - mpf->mpf_physptr) { 1055 if (mpc_new_phys - mpf->physptr) {
1055 struct intel_mp_floating *mpf_new; 1056 struct mpf_intel *mpf_new;
1056 /* steal 16 bytes from [0, 1k) */ 1057 /* steal 16 bytes from [0, 1k) */
1057 printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); 1058 printk(KERN_INFO "mpf new: %x\n", 0x400 - 16);
1058 mpf_new = phys_to_virt(0x400 - 16); 1059 mpf_new = phys_to_virt(0x400 - 16);
1059 memcpy(mpf_new, mpf, 16); 1060 memcpy(mpf_new, mpf, 16);
1060 mpf = mpf_new; 1061 mpf = mpf_new;
1061 mpf->mpf_physptr = mpc_new_phys; 1062 mpf->physptr = mpc_new_phys;
1062 } 1063 }
1063 mpf->mpf_checksum = 0; 1064 mpf->checksum = 0;
1064 mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16); 1065 mpf->checksum -= mpf_checksum((unsigned char *)mpf, 16);
1065 printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr); 1066 printk(KERN_INFO "physptr new: %x\n", mpf->physptr);
1066 } 1067 }
1067 1068
1068 /* 1069 /*
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 726266695b2c..3cf3413ec626 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -35,10 +35,10 @@
35#include <linux/device.h> 35#include <linux/device.h>
36#include <linux/cpu.h> 36#include <linux/cpu.h>
37#include <linux/notifier.h> 37#include <linux/notifier.h>
38#include <linux/uaccess.h>
38 39
39#include <asm/processor.h> 40#include <asm/processor.h>
40#include <asm/msr.h> 41#include <asm/msr.h>
41#include <asm/uaccess.h>
42#include <asm/system.h> 42#include <asm/system.h>
43 43
44static struct class *msr_class; 44static struct class *msr_class;
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 7228979f1e7f..23b6d9e6e4f5 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -61,11 +61,7 @@ static int endflag __initdata;
61 61
62static inline unsigned int get_nmi_count(int cpu) 62static inline unsigned int get_nmi_count(int cpu)
63{ 63{
64#ifdef CONFIG_X86_64 64 return per_cpu(irq_stat, cpu).__nmi_count;
65 return cpu_pda(cpu)->__nmi_count;
66#else
67 return nmi_count(cpu);
68#endif
69} 65}
70 66
71static inline int mce_in_progress(void) 67static inline int mce_in_progress(void)
@@ -82,12 +78,8 @@ static inline int mce_in_progress(void)
82 */ 78 */
83static inline unsigned int get_timer_irqs(int cpu) 79static inline unsigned int get_timer_irqs(int cpu)
84{ 80{
85#ifdef CONFIG_X86_64
86 return read_pda(apic_timer_irqs) + read_pda(irq0_irqs);
87#else
88 return per_cpu(irq_stat, cpu).apic_timer_irqs + 81 return per_cpu(irq_stat, cpu).apic_timer_irqs +
89 per_cpu(irq_stat, cpu).irq0_irqs; 82 per_cpu(irq_stat, cpu).irq0_irqs;
90#endif
91} 83}
92 84
93#ifdef CONFIG_SMP 85#ifdef CONFIG_SMP
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a546f55c77b4..2c00a57ccb90 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -66,9 +66,6 @@ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
66DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; 66DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
67EXPORT_PER_CPU_SYMBOL(current_task); 67EXPORT_PER_CPU_SYMBOL(current_task);
68 68
69DEFINE_PER_CPU(int, cpu_number);
70EXPORT_PER_CPU_SYMBOL(cpu_number);
71
72/* 69/*
73 * Return saved PC of a blocked thread. 70 * Return saved PC of a blocked thread.
74 */ 71 */
@@ -591,7 +588,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
591 if (prev->gs | next->gs) 588 if (prev->gs | next->gs)
592 loadsegment(gs, next->gs); 589 loadsegment(gs, next->gs);
593 590
594 x86_write_percpu(current_task, next_p); 591 percpu_write(current_task, next_p);
595 592
596 return prev_p; 593 return prev_p;
597} 594}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 416fb9282f4f..4523ff88a69d 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -57,6 +57,12 @@
57 57
58asmlinkage extern void ret_from_fork(void); 58asmlinkage extern void ret_from_fork(void);
59 59
60DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
61EXPORT_PER_CPU_SYMBOL(current_task);
62
63DEFINE_PER_CPU(unsigned long, old_rsp);
64static DEFINE_PER_CPU(unsigned char, is_idle);
65
60unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; 66unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
61 67
62static ATOMIC_NOTIFIER_HEAD(idle_notifier); 68static ATOMIC_NOTIFIER_HEAD(idle_notifier);
@@ -75,13 +81,13 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister);
75 81
76void enter_idle(void) 82void enter_idle(void)
77{ 83{
78 write_pda(isidle, 1); 84 percpu_write(is_idle, 1);
79 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); 85 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
80} 86}
81 87
82static void __exit_idle(void) 88static void __exit_idle(void)
83{ 89{
84 if (test_and_clear_bit_pda(0, isidle) == 0) 90 if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
85 return; 91 return;
86 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); 92 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
87} 93}
@@ -392,7 +398,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
392 load_gs_index(0); 398 load_gs_index(0);
393 regs->ip = new_ip; 399 regs->ip = new_ip;
394 regs->sp = new_sp; 400 regs->sp = new_sp;
395 write_pda(oldrsp, new_sp); 401 percpu_write(old_rsp, new_sp);
396 regs->cs = __USER_CS; 402 regs->cs = __USER_CS;
397 regs->ss = __USER_DS; 403 regs->ss = __USER_DS;
398 regs->flags = 0x200; 404 regs->flags = 0x200;
@@ -613,13 +619,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
613 /* 619 /*
614 * Switch the PDA and FPU contexts. 620 * Switch the PDA and FPU contexts.
615 */ 621 */
616 prev->usersp = read_pda(oldrsp); 622 prev->usersp = percpu_read(old_rsp);
617 write_pda(oldrsp, next->usersp); 623 percpu_write(old_rsp, next->usersp);
618 write_pda(pcurrent, next_p); 624 percpu_write(current_task, next_p);
619 625
620 write_pda(kernelstack, 626 percpu_write(kernel_stack,
621 (unsigned long)task_stack_page(next_p) + 627 (unsigned long)task_stack_page(next_p) +
622 THREAD_SIZE - PDA_STACKOFFSET); 628 THREAD_SIZE - KERNEL_STACK_OFFSET);
623#ifdef CONFIG_CC_STACKPROTECTOR 629#ifdef CONFIG_CC_STACKPROTECTOR
624 write_pda(stack_canary, next_p->stack_canary); 630 write_pda(stack_canary, next_p->stack_canary);
625 /* 631 /*
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 2b46eb41643b..f8536fee5c12 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -14,6 +14,7 @@
14#include <asm/reboot.h> 14#include <asm/reboot.h>
15#include <asm/pci_x86.h> 15#include <asm/pci_x86.h>
16#include <asm/virtext.h> 16#include <asm/virtext.h>
17#include <asm/cpu.h>
17 18
18#ifdef CONFIG_X86_32 19#ifdef CONFIG_X86_32
19# include <linux/dmi.h> 20# include <linux/dmi.h>
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ae0d8042cf69..f41c4486c270 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -89,7 +89,7 @@
89 89
90#include <asm/system.h> 90#include <asm/system.h>
91#include <asm/vsyscall.h> 91#include <asm/vsyscall.h>
92#include <asm/smp.h> 92#include <asm/cpu.h>
93#include <asm/desc.h> 93#include <asm/desc.h>
94#include <asm/dma.h> 94#include <asm/dma.h>
95#include <asm/iommu.h> 95#include <asm/iommu.h>
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 55c46074eba0..efbafbbff584 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -13,6 +13,23 @@
13#include <asm/mpspec.h> 13#include <asm/mpspec.h>
14#include <asm/apicdef.h> 14#include <asm/apicdef.h>
15#include <asm/highmem.h> 15#include <asm/highmem.h>
16#include <asm/proto.h>
17#include <asm/cpumask.h>
18
19#ifdef CONFIG_DEBUG_PER_CPU_MAPS
20# define DBG(x...) printk(KERN_DEBUG x)
21#else
22# define DBG(x...)
23#endif
24
25/*
26 * Could be inside CONFIG_HAVE_SETUP_PER_CPU_AREA with other stuff but
27 * voyager wants cpu_number too.
28 */
29#ifdef CONFIG_SMP
30DEFINE_PER_CPU(int, cpu_number);
31EXPORT_PER_CPU_SYMBOL(cpu_number);
32#endif
16 33
17#ifdef CONFIG_X86_LOCAL_APIC 34#ifdef CONFIG_X86_LOCAL_APIC
18unsigned int num_processors; 35unsigned int num_processors;
@@ -26,31 +43,84 @@ unsigned int max_physical_apicid;
26physid_mask_t phys_cpu_present_map; 43physid_mask_t phys_cpu_present_map;
27#endif 44#endif
28 45
29/* map cpu index to physical APIC ID */ 46/*
47 * Map cpu index to physical APIC ID
48 */
30DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID); 49DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
31DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID); 50DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
32EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid); 51EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
33EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid); 52EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
34 53
35#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64) 54#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
36#define X86_64_NUMA 1 55#define X86_64_NUMA 1 /* (used later) */
56DEFINE_PER_CPU(int, node_number) = 0;
57EXPORT_PER_CPU_SYMBOL(node_number);
37 58
38/* map cpu index to node index */ 59/*
60 * Map cpu index to node index
61 */
39DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE); 62DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
40EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map); 63EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
41 64
42/* which logical CPUs are on which nodes */ 65/*
66 * Which logical CPUs are on which nodes
67 */
43cpumask_t *node_to_cpumask_map; 68cpumask_t *node_to_cpumask_map;
44EXPORT_SYMBOL(node_to_cpumask_map); 69EXPORT_SYMBOL(node_to_cpumask_map);
45 70
46/* setup node_to_cpumask_map */ 71/*
72 * Setup node_to_cpumask_map
73 */
47static void __init setup_node_to_cpumask_map(void); 74static void __init setup_node_to_cpumask_map(void);
48 75
49#else 76#else
50static inline void setup_node_to_cpumask_map(void) { } 77static inline void setup_node_to_cpumask_map(void) { }
51#endif 78#endif
52 79
53#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) 80/*
81 * Define load_pda_offset() and per-cpu __pda for x86_64.
82 * load_pda_offset() is responsible for loading the offset of pda into
83 * %gs.
84 *
85 * On SMP, pda offset also duals as percpu base address and thus it
86 * should be at the start of per-cpu area. To achieve this, it's
87 * preallocated in vmlinux_64.lds.S directly instead of using
88 * DEFINE_PER_CPU().
89 */
90#ifdef CONFIG_X86_64
91void __cpuinit load_pda_offset(int cpu)
92{
93 /* Memory clobbers used to order pda/percpu accesses */
94 mb();
95 wrmsrl(MSR_GS_BASE, cpu_pda(cpu));
96 mb();
97}
98#ifndef CONFIG_SMP
99DEFINE_PER_CPU(struct x8664_pda, __pda);
100#endif
101EXPORT_PER_CPU_SYMBOL(__pda);
102#endif /* CONFIG_SMP && CONFIG_X86_64 */
103
104#ifdef CONFIG_X86_64
105
106/* correctly size the local cpu masks */
107static void setup_cpu_local_masks(void)
108{
109 alloc_bootmem_cpumask_var(&cpu_initialized_mask);
110 alloc_bootmem_cpumask_var(&cpu_callin_mask);
111 alloc_bootmem_cpumask_var(&cpu_callout_mask);
112 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
113}
114
115#else /* CONFIG_X86_32 */
116
117static inline void setup_cpu_local_masks(void)
118{
119}
120
121#endif /* CONFIG_X86_32 */
122
123#ifdef CONFIG_HAVE_SETUP_PER_CPU_AREA
54/* 124/*
55 * Copy data used in early init routines from the initial arrays to the 125 * Copy data used in early init routines from the initial arrays to the
56 * per cpu data areas. These arrays then become expendable and the 126 * per cpu data areas. These arrays then become expendable and the
@@ -79,78 +149,14 @@ static void __init setup_per_cpu_maps(void)
79#endif 149#endif
80} 150}
81 151
82#ifdef CONFIG_X86_32 152#ifdef CONFIG_X86_64
83/* 153unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
84 * Great future not-so-futuristic plan: make i386 and x86_64 do it 154 [0] = (unsigned long)__per_cpu_load,
85 * the same way 155};
86 */ 156#else
87unsigned long __per_cpu_offset[NR_CPUS] __read_mostly; 157unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
158#endif
88EXPORT_SYMBOL(__per_cpu_offset); 159EXPORT_SYMBOL(__per_cpu_offset);
89static inline void setup_cpu_pda_map(void) { }
90
91#elif !defined(CONFIG_SMP)
92static inline void setup_cpu_pda_map(void) { }
93
94#else /* CONFIG_SMP && CONFIG_X86_64 */
95
96/*
97 * Allocate cpu_pda pointer table and array via alloc_bootmem.
98 */
99static void __init setup_cpu_pda_map(void)
100{
101 char *pda;
102 struct x8664_pda **new_cpu_pda;
103 unsigned long size;
104 int cpu;
105
106 size = roundup(sizeof(struct x8664_pda), cache_line_size());
107
108 /* allocate cpu_pda array and pointer table */
109 {
110 unsigned long tsize = nr_cpu_ids * sizeof(void *);
111 unsigned long asize = size * (nr_cpu_ids - 1);
112
113 tsize = roundup(tsize, cache_line_size());
114 new_cpu_pda = alloc_bootmem(tsize + asize);
115 pda = (char *)new_cpu_pda + tsize;
116 }
117
118 /* initialize pointer table to static pda's */
119 for_each_possible_cpu(cpu) {
120 if (cpu == 0) {
121 /* leave boot cpu pda in place */
122 new_cpu_pda[0] = cpu_pda(0);
123 continue;
124 }
125 new_cpu_pda[cpu] = (struct x8664_pda *)pda;
126 new_cpu_pda[cpu]->in_bootmem = 1;
127 pda += size;
128 }
129
130 /* point to new pointer table */
131 _cpu_pda = new_cpu_pda;
132}
133
134#endif /* CONFIG_SMP && CONFIG_X86_64 */
135
136#ifdef CONFIG_X86_64
137
138/* correctly size the local cpu masks */
139static void setup_cpu_local_masks(void)
140{
141 alloc_bootmem_cpumask_var(&cpu_initialized_mask);
142 alloc_bootmem_cpumask_var(&cpu_callin_mask);
143 alloc_bootmem_cpumask_var(&cpu_callout_mask);
144 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
145}
146
147#else /* CONFIG_X86_32 */
148
149static inline void setup_cpu_local_masks(void)
150{
151}
152
153#endif /* CONFIG_X86_32 */
154 160
155/* 161/*
156 * Great future plan: 162 * Great future plan:
@@ -164,9 +170,6 @@ void __init setup_per_cpu_areas(void)
164 int cpu; 170 int cpu;
165 unsigned long align = 1; 171 unsigned long align = 1;
166 172
167 /* Setup cpu_pda map */
168 setup_cpu_pda_map();
169
170 /* Copy section for each CPU (we discard the original) */ 173 /* Copy section for each CPU (we discard the original) */
171 old_size = PERCPU_ENOUGH_ROOM; 174 old_size = PERCPU_ENOUGH_ROOM;
172 align = max_t(unsigned long, PAGE_SIZE, align); 175 align = max_t(unsigned long, PAGE_SIZE, align);
@@ -197,8 +200,25 @@ void __init setup_per_cpu_areas(void)
197 cpu, node, __pa(ptr)); 200 cpu, node, __pa(ptr));
198 } 201 }
199#endif 202#endif
203
204 memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);
200 per_cpu_offset(cpu) = ptr - __per_cpu_start; 205 per_cpu_offset(cpu) = ptr - __per_cpu_start;
201 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 206 per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
207 per_cpu(cpu_number, cpu) = cpu;
208#ifdef CONFIG_X86_64
209 per_cpu(irq_stack_ptr, cpu) =
210 (char *)per_cpu(irq_stack, cpu) + IRQ_STACK_SIZE - 64;
211 /*
212 * CPU0 modified pda in the init data area, reload pda
213 * offset for CPU0 and clear the area for others.
214 */
215 if (cpu == 0)
216 load_pda_offset(0);
217 else
218 memset(cpu_pda(cpu), 0, sizeof(*cpu_pda(cpu)));
219#endif
220
221 DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
202 } 222 }
203 223
204 /* Setup percpu data maps */ 224 /* Setup percpu data maps */
@@ -220,6 +240,7 @@ void __init setup_per_cpu_areas(void)
220 * Requires node_possible_map to be valid. 240 * Requires node_possible_map to be valid.
221 * 241 *
222 * Note: node_to_cpumask() is not valid until after this is done. 242 * Note: node_to_cpumask() is not valid until after this is done.
243 * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
223 */ 244 */
224static void __init setup_node_to_cpumask_map(void) 245static void __init setup_node_to_cpumask_map(void)
225{ 246{
@@ -235,6 +256,7 @@ static void __init setup_node_to_cpumask_map(void)
235 256
236 /* allocate the map */ 257 /* allocate the map */
237 map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t)); 258 map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
259 DBG("node_to_cpumask_map at %p for %d nodes\n", map, nr_node_ids);
238 260
239 pr_debug("Node to cpumask map at %p for %d nodes\n", 261 pr_debug("Node to cpumask map at %p for %d nodes\n",
240 map, nr_node_ids); 262 map, nr_node_ids);
@@ -247,17 +269,23 @@ void __cpuinit numa_set_node(int cpu, int node)
247{ 269{
248 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map); 270 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
249 271
250 if (cpu_pda(cpu) && node != NUMA_NO_NODE) 272 /* early setting, no percpu area yet */
251 cpu_pda(cpu)->nodenumber = node; 273 if (cpu_to_node_map) {
252
253 if (cpu_to_node_map)
254 cpu_to_node_map[cpu] = node; 274 cpu_to_node_map[cpu] = node;
275 return;
276 }
255 277
256 else if (per_cpu_offset(cpu)) 278#ifdef CONFIG_DEBUG_PER_CPU_MAPS
257 per_cpu(x86_cpu_to_node_map, cpu) = node; 279 if (cpu >= nr_cpu_ids || !per_cpu_offset(cpu)) {
280 printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
281 dump_stack();
282 return;
283 }
284#endif
285 per_cpu(x86_cpu_to_node_map, cpu) = node;
258 286
259 else 287 if (node != NUMA_NO_NODE)
260 pr_debug("Setting node for non-present cpu %d\n", cpu); 288 per_cpu(node_number, cpu) = node;
261} 289}
262 290
263void __cpuinit numa_clear_node(int cpu) 291void __cpuinit numa_clear_node(int cpu)
@@ -274,7 +302,7 @@ void __cpuinit numa_add_cpu(int cpu)
274 302
275void __cpuinit numa_remove_cpu(int cpu) 303void __cpuinit numa_remove_cpu(int cpu)
276{ 304{
277 cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]); 305 cpu_clear(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
278} 306}
279 307
280#else /* CONFIG_DEBUG_PER_CPU_MAPS */ 308#else /* CONFIG_DEBUG_PER_CPU_MAPS */
@@ -284,7 +312,7 @@ void __cpuinit numa_remove_cpu(int cpu)
284 */ 312 */
285static void __cpuinit numa_set_cpumask(int cpu, int enable) 313static void __cpuinit numa_set_cpumask(int cpu, int enable)
286{ 314{
287 int node = cpu_to_node(cpu); 315 int node = early_cpu_to_node(cpu);
288 cpumask_t *mask; 316 cpumask_t *mask;
289 char buf[64]; 317 char buf[64];
290 318
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index bb1a3b1fc87f..869b98840fd0 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -53,7 +53,6 @@
53#include <asm/nmi.h> 53#include <asm/nmi.h>
54#include <asm/irq.h> 54#include <asm/irq.h>
55#include <asm/idle.h> 55#include <asm/idle.h>
56#include <asm/smp.h>
57#include <asm/trampoline.h> 56#include <asm/trampoline.h>
58#include <asm/cpu.h> 57#include <asm/cpu.h>
59#include <asm/numa.h> 58#include <asm/numa.h>
@@ -745,52 +744,6 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
745 complete(&c_idle->done); 744 complete(&c_idle->done);
746} 745}
747 746
748#ifdef CONFIG_X86_64
749
750/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */
751static void __ref free_bootmem_pda(struct x8664_pda *oldpda)
752{
753 if (!after_bootmem)
754 free_bootmem((unsigned long)oldpda, sizeof(*oldpda));
755}
756
757/*
758 * Allocate node local memory for the AP pda.
759 *
760 * Must be called after the _cpu_pda pointer table is initialized.
761 */
762int __cpuinit get_local_pda(int cpu)
763{
764 struct x8664_pda *oldpda, *newpda;
765 unsigned long size = sizeof(struct x8664_pda);
766 int node = cpu_to_node(cpu);
767
768 if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem)
769 return 0;
770
771 oldpda = cpu_pda(cpu);
772 newpda = kmalloc_node(size, GFP_ATOMIC, node);
773 if (!newpda) {
774 printk(KERN_ERR "Could not allocate node local PDA "
775 "for CPU %d on node %d\n", cpu, node);
776
777 if (oldpda)
778 return 0; /* have a usable pda */
779 else
780 return -1;
781 }
782
783 if (oldpda) {
784 memcpy(newpda, oldpda, size);
785 free_bootmem_pda(oldpda);
786 }
787
788 newpda->in_bootmem = 0;
789 cpu_pda(cpu) = newpda;
790 return 0;
791}
792#endif /* CONFIG_X86_64 */
793
794static int __cpuinit do_boot_cpu(int apicid, int cpu) 747static int __cpuinit do_boot_cpu(int apicid, int cpu)
795/* 748/*
796 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad 749 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
@@ -808,16 +761,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
808 }; 761 };
809 INIT_WORK(&c_idle.work, do_fork_idle); 762 INIT_WORK(&c_idle.work, do_fork_idle);
810 763
811#ifdef CONFIG_X86_64
812 /* Allocate node local memory for AP pdas */
813 if (cpu > 0) {
814 boot_error = get_local_pda(cpu);
815 if (boot_error)
816 goto restore_state;
817 /* if can't get pda memory, can't start cpu */
818 }
819#endif
820
821 alternatives_smp_switch(1); 764 alternatives_smp_switch(1);
822 765
823 c_idle.idle = get_idle_for_cpu(cpu); 766 c_idle.idle = get_idle_for_cpu(cpu);
@@ -847,14 +790,17 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
847 790
848 set_idle_for_cpu(cpu, c_idle.idle); 791 set_idle_for_cpu(cpu, c_idle.idle);
849do_rest: 792do_rest:
850#ifdef CONFIG_X86_32
851 per_cpu(current_task, cpu) = c_idle.idle; 793 per_cpu(current_task, cpu) = c_idle.idle;
794#ifdef CONFIG_X86_32
852 init_gdt(cpu); 795 init_gdt(cpu);
853 /* Stack for startup_32 can be just as for start_secondary onwards */ 796 /* Stack for startup_32 can be just as for start_secondary onwards */
854 irq_ctx_init(cpu); 797 irq_ctx_init(cpu);
855#else 798#else
856 cpu_pda(cpu)->pcurrent = c_idle.idle;
857 clear_tsk_thread_flag(c_idle.idle, TIF_FORK); 799 clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
800 initial_gs = per_cpu_offset(cpu);
801 per_cpu(kernel_stack, cpu) =
802 (unsigned long)task_stack_page(c_idle.idle) -
803 KERNEL_STACK_OFFSET + THREAD_SIZE;
858#endif 804#endif
859 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); 805 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
860 initial_code = (unsigned long)start_secondary; 806 initial_code = (unsigned long)start_secondary;
@@ -931,9 +877,7 @@ do_rest:
931 inquire_remote_apic(apicid); 877 inquire_remote_apic(apicid);
932 } 878 }
933 } 879 }
934#ifdef CONFIG_X86_64 880
935restore_state:
936#endif
937 if (boot_error) { 881 if (boot_error) {
938 /* Try to put things back the way they were before ... */ 882 /* Try to put things back the way they were before ... */
939 numa_remove_cpu(cpu); /* was set by numa_add_cpu */ 883 numa_remove_cpu(cpu); /* was set by numa_add_cpu */
@@ -1125,6 +1069,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
1125 printk(KERN_ERR "... forcing use of dummy APIC emulation." 1069 printk(KERN_ERR "... forcing use of dummy APIC emulation."
1126 "(tell your hw vendor)\n"); 1070 "(tell your hw vendor)\n");
1127 smpboot_clear_io_apic(); 1071 smpboot_clear_io_apic();
1072 disable_ioapic_setup();
1128 return -1; 1073 return -1;
1129 } 1074 }
1130 1075
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
index 397e309839dd..add36b4e37c9 100644
--- a/arch/x86/kernel/smpcommon.c
+++ b/arch/x86/kernel/smpcommon.c
@@ -3,11 +3,16 @@
3 */ 3 */
4#include <linux/module.h> 4#include <linux/module.h>
5#include <asm/smp.h> 5#include <asm/smp.h>
6#include <asm/sections.h>
6 7
7#ifdef CONFIG_X86_32 8#ifdef CONFIG_X86_64
9DEFINE_PER_CPU(unsigned long, this_cpu_off) = (unsigned long)__per_cpu_load;
10#else
8DEFINE_PER_CPU(unsigned long, this_cpu_off); 11DEFINE_PER_CPU(unsigned long, this_cpu_off);
12#endif
9EXPORT_PER_CPU_SYMBOL(this_cpu_off); 13EXPORT_PER_CPU_SYMBOL(this_cpu_off);
10 14
15#ifdef CONFIG_X86_32
11/* 16/*
12 * Initialize the CPU's GDT. This is either the boot CPU doing itself 17 * Initialize the CPU's GDT. This is either the boot CPU doing itself
13 * (still using the master per-cpu area), or a CPU doing it for a 18 * (still using the master per-cpu area), or a CPU doing it for a
@@ -23,8 +28,5 @@ __cpuinit void init_gdt(int cpu)
23 28
24 write_gdt_entry(get_cpu_gdt_table(cpu), 29 write_gdt_entry(get_cpu_gdt_table(cpu),
25 GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S); 30 GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
26
27 per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
28 per_cpu(cpu_number, cpu) = cpu;
29} 31}
30#endif 32#endif
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
index ce5054642247..abf0808d6fc4 100644
--- a/arch/x86/kernel/tlb_32.c
+++ b/arch/x86/kernel/tlb_32.c
@@ -4,8 +4,8 @@
4 4
5#include <asm/tlbflush.h> 5#include <asm/tlbflush.h>
6 6
7DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate) 7DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
8 ____cacheline_aligned = { &init_mm, 0, }; 8 = { &init_mm, 0, };
9 9
10/* must come after the send_IPI functions above for inlining */ 10/* must come after the send_IPI functions above for inlining */
11#include <mach_ipi.h> 11#include <mach_ipi.h>
@@ -20,7 +20,7 @@ DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate)
20 * Optimizations Manfred Spraul <manfred@colorfullife.com> 20 * Optimizations Manfred Spraul <manfred@colorfullife.com>
21 */ 21 */
22 22
23static cpumask_t flush_cpumask; 23static cpumask_var_t flush_cpumask;
24static struct mm_struct *flush_mm; 24static struct mm_struct *flush_mm;
25static unsigned long flush_va; 25static unsigned long flush_va;
26static DEFINE_SPINLOCK(tlbstate_lock); 26static DEFINE_SPINLOCK(tlbstate_lock);
@@ -34,8 +34,8 @@ static DEFINE_SPINLOCK(tlbstate_lock);
34 */ 34 */
35void leave_mm(int cpu) 35void leave_mm(int cpu)
36{ 36{
37 BUG_ON(x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK); 37 BUG_ON(percpu_read(cpu_tlbstate.state) == TLBSTATE_OK);
38 cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask); 38 cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
39 load_cr3(swapper_pg_dir); 39 load_cr3(swapper_pg_dir);
40} 40}
41EXPORT_SYMBOL_GPL(leave_mm); 41EXPORT_SYMBOL_GPL(leave_mm);
@@ -92,7 +92,7 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
92 92
93 cpu = get_cpu(); 93 cpu = get_cpu();
94 94
95 if (!cpu_isset(cpu, flush_cpumask)) 95 if (!cpumask_test_cpu(cpu, flush_cpumask))
96 goto out; 96 goto out;
97 /* 97 /*
98 * This was a BUG() but until someone can quote me the 98 * This was a BUG() but until someone can quote me the
@@ -103,8 +103,8 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
103 * BUG(); 103 * BUG();
104 */ 104 */
105 105
106 if (flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) { 106 if (flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
107 if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) { 107 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
108 if (flush_va == TLB_FLUSH_ALL) 108 if (flush_va == TLB_FLUSH_ALL)
109 local_flush_tlb(); 109 local_flush_tlb();
110 else 110 else
@@ -114,35 +114,22 @@ void smp_invalidate_interrupt(struct pt_regs *regs)
114 } 114 }
115 ack_APIC_irq(); 115 ack_APIC_irq();
116 smp_mb__before_clear_bit(); 116 smp_mb__before_clear_bit();
117 cpu_clear(cpu, flush_cpumask); 117 cpumask_clear_cpu(cpu, flush_cpumask);
118 smp_mb__after_clear_bit(); 118 smp_mb__after_clear_bit();
119out: 119out:
120 put_cpu_no_resched(); 120 put_cpu_no_resched();
121 inc_irq_stat(irq_tlb_count); 121 inc_irq_stat(irq_tlb_count);
122} 122}
123 123
124void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, 124void native_flush_tlb_others(const struct cpumask *cpumask,
125 unsigned long va) 125 struct mm_struct *mm, unsigned long va)
126{ 126{
127 cpumask_t cpumask = *cpumaskp;
128
129 /* 127 /*
130 * A couple of (to be removed) sanity checks:
131 *
132 * - current CPU must not be in mask
133 * - mask must exist :) 128 * - mask must exist :)
134 */ 129 */
135 BUG_ON(cpus_empty(cpumask)); 130 BUG_ON(cpumask_empty(cpumask));
136 BUG_ON(cpu_isset(smp_processor_id(), cpumask));
137 BUG_ON(!mm); 131 BUG_ON(!mm);
138 132
139#ifdef CONFIG_HOTPLUG_CPU
140 /* If a CPU which we ran on has gone down, OK. */
141 cpus_and(cpumask, cpumask, cpu_online_map);
142 if (unlikely(cpus_empty(cpumask)))
143 return;
144#endif
145
146 /* 133 /*
147 * i'm not happy about this global shared spinlock in the 134 * i'm not happy about this global shared spinlock in the
148 * MM hot path, but we'll see how contended it is. 135 * MM hot path, but we'll see how contended it is.
@@ -150,9 +137,17 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
150 */ 137 */
151 spin_lock(&tlbstate_lock); 138 spin_lock(&tlbstate_lock);
152 139
140 cpumask_andnot(flush_cpumask, cpumask, cpumask_of(smp_processor_id()));
141#ifdef CONFIG_HOTPLUG_CPU
142 /* If a CPU which we ran on has gone down, OK. */
143 cpumask_and(flush_cpumask, flush_cpumask, cpu_online_mask);
144 if (unlikely(cpumask_empty(flush_cpumask))) {
145 spin_unlock(&tlbstate_lock);
146 return;
147 }
148#endif
153 flush_mm = mm; 149 flush_mm = mm;
154 flush_va = va; 150 flush_va = va;
155 cpus_or(flush_cpumask, cpumask, flush_cpumask);
156 151
157 /* 152 /*
158 * Make the above memory operations globally visible before 153 * Make the above memory operations globally visible before
@@ -163,9 +158,9 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
163 * We have to send the IPI only to 158 * We have to send the IPI only to
164 * CPUs affected. 159 * CPUs affected.
165 */ 160 */
166 send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR); 161 send_IPI_mask(flush_cpumask, INVALIDATE_TLB_VECTOR);
167 162
168 while (!cpus_empty(flush_cpumask)) 163 while (!cpumask_empty(flush_cpumask))
169 /* nothing. lockup detection does not belong here */ 164 /* nothing. lockup detection does not belong here */
170 cpu_relax(); 165 cpu_relax();
171 166
@@ -177,25 +172,19 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
177void flush_tlb_current_task(void) 172void flush_tlb_current_task(void)
178{ 173{
179 struct mm_struct *mm = current->mm; 174 struct mm_struct *mm = current->mm;
180 cpumask_t cpu_mask;
181 175
182 preempt_disable(); 176 preempt_disable();
183 cpu_mask = mm->cpu_vm_mask;
184 cpu_clear(smp_processor_id(), cpu_mask);
185 177
186 local_flush_tlb(); 178 local_flush_tlb();
187 if (!cpus_empty(cpu_mask)) 179 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
188 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); 180 flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
189 preempt_enable(); 181 preempt_enable();
190} 182}
191 183
192void flush_tlb_mm(struct mm_struct *mm) 184void flush_tlb_mm(struct mm_struct *mm)
193{ 185{
194 cpumask_t cpu_mask;
195 186
196 preempt_disable(); 187 preempt_disable();
197 cpu_mask = mm->cpu_vm_mask;
198 cpu_clear(smp_processor_id(), cpu_mask);
199 188
200 if (current->active_mm == mm) { 189 if (current->active_mm == mm) {
201 if (current->mm) 190 if (current->mm)
@@ -203,8 +192,8 @@ void flush_tlb_mm(struct mm_struct *mm)
203 else 192 else
204 leave_mm(smp_processor_id()); 193 leave_mm(smp_processor_id());
205 } 194 }
206 if (!cpus_empty(cpu_mask)) 195 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
207 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); 196 flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
208 197
209 preempt_enable(); 198 preempt_enable();
210} 199}
@@ -212,11 +201,8 @@ void flush_tlb_mm(struct mm_struct *mm)
212void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) 201void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
213{ 202{
214 struct mm_struct *mm = vma->vm_mm; 203 struct mm_struct *mm = vma->vm_mm;
215 cpumask_t cpu_mask;
216 204
217 preempt_disable(); 205 preempt_disable();
218 cpu_mask = mm->cpu_vm_mask;
219 cpu_clear(smp_processor_id(), cpu_mask);
220 206
221 if (current->active_mm == mm) { 207 if (current->active_mm == mm) {
222 if (current->mm) 208 if (current->mm)
@@ -225,9 +211,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
225 leave_mm(smp_processor_id()); 211 leave_mm(smp_processor_id());
226 } 212 }
227 213
228 if (!cpus_empty(cpu_mask)) 214 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
229 flush_tlb_others(cpu_mask, mm, va); 215 flush_tlb_others(&mm->cpu_vm_mask, mm, va);
230
231 preempt_enable(); 216 preempt_enable();
232} 217}
233EXPORT_SYMBOL(flush_tlb_page); 218EXPORT_SYMBOL(flush_tlb_page);
@@ -237,7 +222,7 @@ static void do_flush_tlb_all(void *info)
237 unsigned long cpu = smp_processor_id(); 222 unsigned long cpu = smp_processor_id();
238 223
239 __flush_tlb_all(); 224 __flush_tlb_all();
240 if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY) 225 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
241 leave_mm(cpu); 226 leave_mm(cpu);
242} 227}
243 228
@@ -246,11 +231,9 @@ void flush_tlb_all(void)
246 on_each_cpu(do_flush_tlb_all, NULL, 1); 231 on_each_cpu(do_flush_tlb_all, NULL, 1);
247} 232}
248 233
249void reset_lazy_tlbstate(void) 234static int init_flush_cpumask(void)
250{ 235{
251 int cpu = raw_smp_processor_id(); 236 alloc_cpumask_var(&flush_cpumask, GFP_KERNEL);
252 237 return 0;
253 per_cpu(cpu_tlbstate, cpu).state = 0;
254 per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
255} 238}
256 239early_initcall(init_flush_cpumask);
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/kernel/tlb_64.c
index f8be6f1d2e48..e64a32c48825 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/kernel/tlb_64.c
@@ -18,6 +18,9 @@
18#include <asm/uv/uv_hub.h> 18#include <asm/uv/uv_hub.h>
19#include <asm/uv/uv_bau.h> 19#include <asm/uv/uv_bau.h>
20 20
21DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
22 = { &init_mm, 0, };
23
21#include <mach_ipi.h> 24#include <mach_ipi.h>
22/* 25/*
23 * Smarter SMP flushing macros. 26 * Smarter SMP flushing macros.
@@ -43,10 +46,10 @@
43 46
44union smp_flush_state { 47union smp_flush_state {
45 struct { 48 struct {
46 cpumask_t flush_cpumask;
47 struct mm_struct *flush_mm; 49 struct mm_struct *flush_mm;
48 unsigned long flush_va; 50 unsigned long flush_va;
49 spinlock_t tlbstate_lock; 51 spinlock_t tlbstate_lock;
52 DECLARE_BITMAP(flush_cpumask, NR_CPUS);
50 }; 53 };
51 char pad[SMP_CACHE_BYTES]; 54 char pad[SMP_CACHE_BYTES];
52} ____cacheline_aligned; 55} ____cacheline_aligned;
@@ -62,9 +65,9 @@ static DEFINE_PER_CPU(union smp_flush_state, flush_state);
62 */ 65 */
63void leave_mm(int cpu) 66void leave_mm(int cpu)
64{ 67{
65 if (read_pda(mmu_state) == TLBSTATE_OK) 68 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
66 BUG(); 69 BUG();
67 cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); 70 cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
68 load_cr3(swapper_pg_dir); 71 load_cr3(swapper_pg_dir);
69} 72}
70EXPORT_SYMBOL_GPL(leave_mm); 73EXPORT_SYMBOL_GPL(leave_mm);
@@ -131,7 +134,7 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
131 sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; 134 sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
132 f = &per_cpu(flush_state, sender); 135 f = &per_cpu(flush_state, sender);
133 136
134 if (!cpu_isset(cpu, f->flush_cpumask)) 137 if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask)))
135 goto out; 138 goto out;
136 /* 139 /*
137 * This was a BUG() but until someone can quote me the 140 * This was a BUG() but until someone can quote me the
@@ -142,8 +145,8 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
142 * BUG(); 145 * BUG();
143 */ 146 */
144 147
145 if (f->flush_mm == read_pda(active_mm)) { 148 if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
146 if (read_pda(mmu_state) == TLBSTATE_OK) { 149 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
147 if (f->flush_va == TLB_FLUSH_ALL) 150 if (f->flush_va == TLB_FLUSH_ALL)
148 local_flush_tlb(); 151 local_flush_tlb();
149 else 152 else
@@ -153,19 +156,15 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
153 } 156 }
154out: 157out:
155 ack_APIC_irq(); 158 ack_APIC_irq();
156 cpu_clear(cpu, f->flush_cpumask); 159 cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask));
157 inc_irq_stat(irq_tlb_count); 160 inc_irq_stat(irq_tlb_count);
158} 161}
159 162
160void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, 163static void flush_tlb_others_ipi(const struct cpumask *cpumask,
161 unsigned long va) 164 struct mm_struct *mm, unsigned long va)
162{ 165{
163 int sender; 166 int sender;
164 union smp_flush_state *f; 167 union smp_flush_state *f;
165 cpumask_t cpumask = *cpumaskp;
166
167 if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va))
168 return;
169 168
170 /* Caller has disabled preemption */ 169 /* Caller has disabled preemption */
171 sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; 170 sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
@@ -180,7 +179,8 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
180 179
181 f->flush_mm = mm; 180 f->flush_mm = mm;
182 f->flush_va = va; 181 f->flush_va = va;
183 cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask); 182 cpumask_andnot(to_cpumask(f->flush_cpumask),
183 cpumask, cpumask_of(smp_processor_id()));
184 184
185 /* 185 /*
186 * Make the above memory operations globally visible before 186 * Make the above memory operations globally visible before
@@ -191,9 +191,10 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
191 * We have to send the IPI only to 191 * We have to send the IPI only to
192 * CPUs affected. 192 * CPUs affected.
193 */ 193 */
194 send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender); 194 send_IPI_mask(to_cpumask(f->flush_cpumask),
195 INVALIDATE_TLB_VECTOR_START + sender);
195 196
196 while (!cpus_empty(f->flush_cpumask)) 197 while (!cpumask_empty(to_cpumask(f->flush_cpumask)))
197 cpu_relax(); 198 cpu_relax();
198 199
199 f->flush_mm = NULL; 200 f->flush_mm = NULL;
@@ -201,6 +202,25 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
201 spin_unlock(&f->tlbstate_lock); 202 spin_unlock(&f->tlbstate_lock);
202} 203}
203 204
205void native_flush_tlb_others(const struct cpumask *cpumask,
206 struct mm_struct *mm, unsigned long va)
207{
208 if (is_uv_system()) {
209 /* FIXME: could be an percpu_alloc'd thing */
210 static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
211 struct cpumask *after_uv_flush = &get_cpu_var(flush_tlb_mask);
212
213 cpumask_andnot(after_uv_flush, cpumask,
214 cpumask_of(smp_processor_id()));
215 if (!uv_flush_tlb_others(after_uv_flush, mm, va))
216 flush_tlb_others_ipi(after_uv_flush, mm, va);
217
218 put_cpu_var(flush_tlb_uv_cpumask);
219 return;
220 }
221 flush_tlb_others_ipi(cpumask, mm, va);
222}
223
204static int __cpuinit init_smp_flush(void) 224static int __cpuinit init_smp_flush(void)
205{ 225{
206 int i; 226 int i;
@@ -215,25 +235,18 @@ core_initcall(init_smp_flush);
215void flush_tlb_current_task(void) 235void flush_tlb_current_task(void)
216{ 236{
217 struct mm_struct *mm = current->mm; 237 struct mm_struct *mm = current->mm;
218 cpumask_t cpu_mask;
219 238
220 preempt_disable(); 239 preempt_disable();
221 cpu_mask = mm->cpu_vm_mask;
222 cpu_clear(smp_processor_id(), cpu_mask);
223 240
224 local_flush_tlb(); 241 local_flush_tlb();
225 if (!cpus_empty(cpu_mask)) 242 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
226 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); 243 flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
227 preempt_enable(); 244 preempt_enable();
228} 245}
229 246
230void flush_tlb_mm(struct mm_struct *mm) 247void flush_tlb_mm(struct mm_struct *mm)
231{ 248{
232 cpumask_t cpu_mask;
233
234 preempt_disable(); 249 preempt_disable();
235 cpu_mask = mm->cpu_vm_mask;
236 cpu_clear(smp_processor_id(), cpu_mask);
237 250
238 if (current->active_mm == mm) { 251 if (current->active_mm == mm) {
239 if (current->mm) 252 if (current->mm)
@@ -241,8 +254,8 @@ void flush_tlb_mm(struct mm_struct *mm)
241 else 254 else
242 leave_mm(smp_processor_id()); 255 leave_mm(smp_processor_id());
243 } 256 }
244 if (!cpus_empty(cpu_mask)) 257 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
245 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); 258 flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
246 259
247 preempt_enable(); 260 preempt_enable();
248} 261}
@@ -250,11 +263,8 @@ void flush_tlb_mm(struct mm_struct *mm)
250void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) 263void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
251{ 264{
252 struct mm_struct *mm = vma->vm_mm; 265 struct mm_struct *mm = vma->vm_mm;
253 cpumask_t cpu_mask;
254 266
255 preempt_disable(); 267 preempt_disable();
256 cpu_mask = mm->cpu_vm_mask;
257 cpu_clear(smp_processor_id(), cpu_mask);
258 268
259 if (current->active_mm == mm) { 269 if (current->active_mm == mm) {
260 if (current->mm) 270 if (current->mm)
@@ -263,8 +273,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
263 leave_mm(smp_processor_id()); 273 leave_mm(smp_processor_id());
264 } 274 }
265 275
266 if (!cpus_empty(cpu_mask)) 276 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
267 flush_tlb_others(cpu_mask, mm, va); 277 flush_tlb_others(&mm->cpu_vm_mask, mm, va);
268 278
269 preempt_enable(); 279 preempt_enable();
270} 280}
@@ -274,7 +284,7 @@ static void do_flush_tlb_all(void *info)
274 unsigned long cpu = smp_processor_id(); 284 unsigned long cpu = smp_processor_id();
275 285
276 __flush_tlb_all(); 286 __flush_tlb_all();
277 if (read_pda(mmu_state) == TLBSTATE_LAZY) 287 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
278 leave_mm(cpu); 288 leave_mm(cpu);
279} 289}
280 290
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index f885023167e0..690dcf1a27d4 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -212,11 +212,11 @@ static int uv_wait_completion(struct bau_desc *bau_desc,
212 * The cpumaskp mask contains the cpus the broadcast was sent to. 212 * The cpumaskp mask contains the cpus the broadcast was sent to.
213 * 213 *
214 * Returns 1 if all remote flushing was done. The mask is zeroed. 214 * Returns 1 if all remote flushing was done. The mask is zeroed.
215 * Returns 0 if some remote flushing remains to be done. The mask is left 215 * Returns 0 if some remote flushing remains to be done. The mask will have
216 * unchanged. 216 * some bits still set.
217 */ 217 */
218int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, 218int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
219 cpumask_t *cpumaskp) 219 struct cpumask *cpumaskp)
220{ 220{
221 int completion_status = 0; 221 int completion_status = 0;
222 int right_shift; 222 int right_shift;
@@ -263,13 +263,13 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
263 * Success, so clear the remote cpu's from the mask so we don't 263 * Success, so clear the remote cpu's from the mask so we don't
264 * use the IPI method of shootdown on them. 264 * use the IPI method of shootdown on them.
265 */ 265 */
266 for_each_cpu_mask(bit, *cpumaskp) { 266 for_each_cpu(bit, cpumaskp) {
267 blade = uv_cpu_to_blade_id(bit); 267 blade = uv_cpu_to_blade_id(bit);
268 if (blade == this_blade) 268 if (blade == this_blade)
269 continue; 269 continue;
270 cpu_clear(bit, *cpumaskp); 270 cpumask_clear_cpu(bit, cpumaskp);
271 } 271 }
272 if (!cpus_empty(*cpumaskp)) 272 if (!cpumask_empty(cpumaskp))
273 return 0; 273 return 0;
274 return 1; 274 return 1;
275} 275}
@@ -296,7 +296,7 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
296 * Returns 1 if all remote flushing was done. 296 * Returns 1 if all remote flushing was done.
297 * Returns 0 if some remote flushing remains to be done. 297 * Returns 0 if some remote flushing remains to be done.
298 */ 298 */
299int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm, 299int uv_flush_tlb_others(struct cpumask *cpumaskp, struct mm_struct *mm,
300 unsigned long va) 300 unsigned long va)
301{ 301{
302 int i; 302 int i;
@@ -315,7 +315,7 @@ int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm,
315 bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); 315 bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
316 316
317 i = 0; 317 i = 0;
318 for_each_cpu_mask(bit, *cpumaskp) { 318 for_each_cpu(bit, cpumaskp) {
319 blade = uv_cpu_to_blade_id(bit); 319 blade = uv_cpu_to_blade_id(bit);
320 BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1)); 320 BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
321 if (blade == this_blade) { 321 if (blade == this_blade) {
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 82c67559dde7..3eba7f7bac05 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -178,14 +178,7 @@ SECTIONS
178 __initramfs_end = .; 178 __initramfs_end = .;
179 } 179 }
180#endif 180#endif
181 . = ALIGN(PAGE_SIZE); 181 PERCPU(PAGE_SIZE)
182 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
183 __per_cpu_start = .;
184 *(.data.percpu.page_aligned)
185 *(.data.percpu)
186 *(.data.percpu.shared_aligned)
187 __per_cpu_end = .;
188 }
189 . = ALIGN(PAGE_SIZE); 182 . = ALIGN(PAGE_SIZE);
190 /* freed after init ends here */ 183 /* freed after init ends here */
191 184
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 1a614c0e6bef..a09abb8fb97f 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -5,6 +5,7 @@
5#define LOAD_OFFSET __START_KERNEL_map 5#define LOAD_OFFSET __START_KERNEL_map
6 6
7#include <asm-generic/vmlinux.lds.h> 7#include <asm-generic/vmlinux.lds.h>
8#include <asm/asm-offsets.h>
8#include <asm/page.h> 9#include <asm/page.h>
9 10
10#undef i386 /* in case the preprocessor is a 32bit one */ 11#undef i386 /* in case the preprocessor is a 32bit one */
@@ -13,12 +14,14 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
13OUTPUT_ARCH(i386:x86-64) 14OUTPUT_ARCH(i386:x86-64)
14ENTRY(phys_startup_64) 15ENTRY(phys_startup_64)
15jiffies_64 = jiffies; 16jiffies_64 = jiffies;
16_proxy_pda = 1;
17PHDRS { 17PHDRS {
18 text PT_LOAD FLAGS(5); /* R_E */ 18 text PT_LOAD FLAGS(5); /* R_E */
19 data PT_LOAD FLAGS(7); /* RWE */ 19 data PT_LOAD FLAGS(7); /* RWE */
20 user PT_LOAD FLAGS(7); /* RWE */ 20 user PT_LOAD FLAGS(7); /* RWE */
21 data.init PT_LOAD FLAGS(7); /* RWE */ 21 data.init PT_LOAD FLAGS(7); /* RWE */
22#ifdef CONFIG_SMP
23 percpu PT_LOAD FLAGS(7); /* RWE */
24#endif
22 note PT_NOTE FLAGS(0); /* ___ */ 25 note PT_NOTE FLAGS(0); /* ___ */
23} 26}
24SECTIONS 27SECTIONS
@@ -208,14 +211,29 @@ SECTIONS
208 __initramfs_end = .; 211 __initramfs_end = .;
209#endif 212#endif
210 213
214#ifdef CONFIG_SMP
215 /*
216 * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
217 * output PHDR, so the next output section - __data_nosave - should
218 * switch it back to data.init. Also, pda should be at the head of
219 * percpu area. Preallocate it and define the percpu offset symbol
220 * so that it can be accessed as a percpu variable.
221 */
222 . = ALIGN(PAGE_SIZE);
223 PERCPU_VADDR_PREALLOC(0, :percpu, pda_size)
224 per_cpu____pda = __per_cpu_start;
225#else
211 PERCPU(PAGE_SIZE) 226 PERCPU(PAGE_SIZE)
227#endif
212 228
213 . = ALIGN(PAGE_SIZE); 229 . = ALIGN(PAGE_SIZE);
214 __init_end = .; 230 __init_end = .;
215 231
216 . = ALIGN(PAGE_SIZE); 232 . = ALIGN(PAGE_SIZE);
217 __nosave_begin = .; 233 __nosave_begin = .;
218 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } 234 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
235 *(.data.nosave)
236 } :data.init /* switch back to data.init, see PERCPU_VADDR() above */
219 . = ALIGN(PAGE_SIZE); 237 . = ALIGN(PAGE_SIZE);
220 __nosave_end = .; 238 __nosave_end = .;
221 239
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 695e426aa354..3909e3ba5ce3 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -58,5 +58,3 @@ EXPORT_SYMBOL(__memcpy);
58EXPORT_SYMBOL(empty_zero_page); 58EXPORT_SYMBOL(empty_zero_page);
59EXPORT_SYMBOL(init_level4_pgt); 59EXPORT_SYMBOL(init_level4_pgt);
60EXPORT_SYMBOL(load_gs_index); 60EXPORT_SYMBOL(load_gs_index);
61
62EXPORT_SYMBOL(_proxy_pda);
diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c
index a580b9562e76..0ade62555ff3 100644
--- a/arch/x86/mach-voyager/setup.c
+++ b/arch/x86/mach-voyager/setup.c
@@ -9,6 +9,7 @@
9#include <asm/e820.h> 9#include <asm/e820.h>
10#include <asm/io.h> 10#include <asm/io.h>
11#include <asm/setup.h> 11#include <asm/setup.h>
12#include <asm/cpu.h>
12 13
13void __init pre_intr_init_hook(void) 14void __init pre_intr_init_hook(void)
14{ 15{
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 9840b7ec749a..96f15b09a4c5 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -402,7 +402,7 @@ void __init find_smp_config(void)
402 VOYAGER_SUS_IN_CONTROL_PORT); 402 VOYAGER_SUS_IN_CONTROL_PORT);
403 403
404 current_thread_info()->cpu = boot_cpu_id; 404 current_thread_info()->cpu = boot_cpu_id;
405 x86_write_percpu(cpu_number, boot_cpu_id); 405 percpu_write(cpu_number, boot_cpu_id);
406} 406}
407 407
408/* 408/*
@@ -531,6 +531,7 @@ static void __init do_boot_cpu(__u8 cpu)
531 stack_start.sp = (void *)idle->thread.sp; 531 stack_start.sp = (void *)idle->thread.sp;
532 532
533 init_gdt(cpu); 533 init_gdt(cpu);
534 per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
534 per_cpu(current_task, cpu) = idle; 535 per_cpu(current_task, cpu) = idle;
535 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); 536 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
536 irq_ctx_init(cpu); 537 irq_ctx_init(cpu);
@@ -1748,6 +1749,7 @@ static void __init voyager_smp_prepare_cpus(unsigned int max_cpus)
1748static void __cpuinit voyager_smp_prepare_boot_cpu(void) 1749static void __cpuinit voyager_smp_prepare_boot_cpu(void)
1749{ 1750{
1750 init_gdt(smp_processor_id()); 1751 init_gdt(smp_processor_id());
1752 per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
1751 switch_to_new_gdt(); 1753 switch_to_new_gdt();
1752 1754
1753 cpu_set(smp_processor_id(), cpu_online_map); 1755 cpu_set(smp_processor_id(), cpu_online_map);
@@ -1780,7 +1782,7 @@ static void __init voyager_smp_cpus_done(unsigned int max_cpus)
1780void __init smp_setup_processor_id(void) 1782void __init smp_setup_processor_id(void)
1781{ 1783{
1782 current_thread_info()->cpu = hard_smp_processor_id(); 1784 current_thread_info()->cpu = hard_smp_processor_id();
1783 x86_write_percpu(cpu_number, hard_smp_processor_id()); 1785 percpu_write(cpu_number, hard_smp_processor_id());
1784} 1786}
1785 1787
1786static void voyager_send_call_func(cpumask_t callmask) 1788static void voyager_send_call_func(cpumask_t callmask)
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 9e268b6b204e..90dfae511a41 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -534,7 +534,7 @@ static int vmalloc_fault(unsigned long address)
534 happen within a race in page table update. In the later 534 happen within a race in page table update. In the later
535 case just flush. */ 535 case just flush. */
536 536
537 pgd = pgd_offset(current->mm ?: &init_mm, address); 537 pgd = pgd_offset(current->active_mm, address);
538 pgd_ref = pgd_offset_k(address); 538 pgd_ref = pgd_offset_k(address);
539 if (pgd_none(*pgd_ref)) 539 if (pgd_none(*pgd_ref))
540 return -1; 540 return -1;
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 88f1b10de3be..4a6989e47a53 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -49,7 +49,6 @@
49#include <asm/paravirt.h> 49#include <asm/paravirt.h>
50#include <asm/setup.h> 50#include <asm/setup.h>
51#include <asm/cacheflush.h> 51#include <asm/cacheflush.h>
52#include <asm/smp.h>
53 52
54unsigned int __VMALLOC_RESERVE = 128 << 20; 53unsigned int __VMALLOC_RESERVE = 128 << 20;
55 54
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index e89d24815f26..4cf30dee8161 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -555,10 +555,12 @@ repeat:
555 if (!pte_val(old_pte)) { 555 if (!pte_val(old_pte)) {
556 if (!primary) 556 if (!primary)
557 return 0; 557 return 0;
558 WARN(1, KERN_WARNING "CPA: called for zero pte. " 558
559 "vaddr = %lx cpa->vaddr = %lx\n", address, 559 /*
560 *cpa->vaddr); 560 * Special error value returned, indicating that the mapping
561 return -EINVAL; 561 * did not exist at this address.
562 */
563 return -EFAULT;
562 } 564 }
563 565
564 if (level == PG_LEVEL_4K) { 566 if (level == PG_LEVEL_4K) {
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 85cbd3cd3723..3be399013de6 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -333,11 +333,20 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
333 req_type & _PAGE_CACHE_MASK); 333 req_type & _PAGE_CACHE_MASK);
334 } 334 }
335 335
336 is_range_ram = pagerange_is_ram(start, end); 336 /*
337 if (is_range_ram == 1) 337 * For legacy reasons, some parts of the physical address range in the
338 return reserve_ram_pages_type(start, end, req_type, new_type); 338 * legacy 1MB region is treated as non-RAM (even when listed as RAM in
339 else if (is_range_ram < 0) 339 * the e820 tables). So we will track the memory attributes of this
340 return -EINVAL; 340 * legacy 1MB region using the linear memtype_list always.
341 */
342 if (end >= ISA_END_ADDRESS) {
343 is_range_ram = pagerange_is_ram(start, end);
344 if (is_range_ram == 1)
345 return reserve_ram_pages_type(start, end, req_type,
346 new_type);
347 else if (is_range_ram < 0)
348 return -EINVAL;
349 }
341 350
342 new = kmalloc(sizeof(struct memtype), GFP_KERNEL); 351 new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
343 if (!new) 352 if (!new)
@@ -437,11 +446,19 @@ int free_memtype(u64 start, u64 end)
437 if (is_ISA_range(start, end - 1)) 446 if (is_ISA_range(start, end - 1))
438 return 0; 447 return 0;
439 448
440 is_range_ram = pagerange_is_ram(start, end); 449 /*
441 if (is_range_ram == 1) 450 * For legacy reasons, some parts of the physical address range in the
442 return free_ram_pages_type(start, end); 451 * legacy 1MB region is treated as non-RAM (even when listed as RAM in
443 else if (is_range_ram < 0) 452 * the e820 tables). So we will track the memory attributes of this
444 return -EINVAL; 453 * legacy 1MB region using the linear memtype_list always.
454 */
455 if (end >= ISA_END_ADDRESS) {
456 is_range_ram = pagerange_is_ram(start, end);
457 if (is_range_ram == 1)
458 return free_ram_pages_type(start, end);
459 else if (is_range_ram < 0)
460 return -EINVAL;
461 }
445 462
446 spin_lock(&memtype_lock); 463 spin_lock(&memtype_lock);
447 list_for_each_entry(entry, &memtype_list, nd) { 464 list_for_each_entry(entry, &memtype_list, nd) {
@@ -505,6 +522,35 @@ static inline int range_is_allowed(unsigned long pfn, unsigned long size)
505} 522}
506#endif /* CONFIG_STRICT_DEVMEM */ 523#endif /* CONFIG_STRICT_DEVMEM */
507 524
525/*
526 * Change the memory type for the physial address range in kernel identity
527 * mapping space if that range is a part of identity map.
528 */
529static int kernel_map_sync_memtype(u64 base, unsigned long size,
530 unsigned long flags)
531{
532 unsigned long id_sz;
533 int ret;
534
535 if (!pat_enabled || base >= __pa(high_memory))
536 return 0;
537
538 id_sz = (__pa(high_memory) < base + size) ?
539 __pa(high_memory) - base :
540 size;
541
542 ret = ioremap_change_attr((unsigned long)__va(base), id_sz, flags);
543 /*
544 * -EFAULT return means that the addr was not valid and did not have
545 * any identity mapping. That case is a success for
546 * kernel_map_sync_memtype.
547 */
548 if (ret == -EFAULT)
549 ret = 0;
550
551 return ret;
552}
553
508int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn, 554int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
509 unsigned long size, pgprot_t *vma_prot) 555 unsigned long size, pgprot_t *vma_prot)
510{ 556{
@@ -555,9 +601,7 @@ int phys_mem_access_prot_allowed(struct file *file, unsigned long pfn,
555 if (retval < 0) 601 if (retval < 0)
556 return 0; 602 return 0;
557 603
558 if (((pfn < max_low_pfn_mapped) || 604 if (kernel_map_sync_memtype(offset, size, flags)) {
559 (pfn >= (1UL<<(32 - PAGE_SHIFT)) && pfn < max_pfn_mapped)) &&
560 ioremap_change_attr((unsigned long)__va(offset), size, flags) < 0) {
561 free_memtype(offset, offset + size); 605 free_memtype(offset, offset + size);
562 printk(KERN_INFO 606 printk(KERN_INFO
563 "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n", 607 "%s:%d /dev/mem ioremap_change_attr failed %s for %Lx-%Lx\n",
@@ -601,12 +645,13 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
601 * Reserved non RAM regions only and after successful reserve_memtype, 645 * Reserved non RAM regions only and after successful reserve_memtype,
602 * this func also keeps identity mapping (if any) in sync with this new prot. 646 * this func also keeps identity mapping (if any) in sync with this new prot.
603 */ 647 */
604static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t vma_prot) 648static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
649 int strict_prot)
605{ 650{
606 int is_ram = 0; 651 int is_ram = 0;
607 int id_sz, ret; 652 int ret;
608 unsigned long flags; 653 unsigned long flags;
609 unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK); 654 unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK);
610 655
611 is_ram = pagerange_is_ram(paddr, paddr + size); 656 is_ram = pagerange_is_ram(paddr, paddr + size);
612 657
@@ -625,26 +670,27 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t vma_prot)
625 return ret; 670 return ret;
626 671
627 if (flags != want_flags) { 672 if (flags != want_flags) {
628 free_memtype(paddr, paddr + size); 673 if (strict_prot || !is_new_memtype_allowed(want_flags, flags)) {
629 printk(KERN_ERR 674 free_memtype(paddr, paddr + size);
630 "%s:%d map pfn expected mapping type %s for %Lx-%Lx, got %s\n", 675 printk(KERN_ERR "%s:%d map pfn expected mapping type %s"
631 current->comm, current->pid, 676 " for %Lx-%Lx, got %s\n",
632 cattr_name(want_flags), 677 current->comm, current->pid,
633 (unsigned long long)paddr, 678 cattr_name(want_flags),
634 (unsigned long long)(paddr + size), 679 (unsigned long long)paddr,
635 cattr_name(flags)); 680 (unsigned long long)(paddr + size),
636 return -EINVAL; 681 cattr_name(flags));
682 return -EINVAL;
683 }
684 /*
685 * We allow returning different type than the one requested in
686 * non strict case.
687 */
688 *vma_prot = __pgprot((pgprot_val(*vma_prot) &
689 (~_PAGE_CACHE_MASK)) |
690 flags);
637 } 691 }
638 692
639 /* Need to keep identity mapping in sync */ 693 if (kernel_map_sync_memtype(paddr, size, flags)) {
640 if (paddr >= __pa(high_memory))
641 return 0;
642
643 id_sz = (__pa(high_memory) < paddr + size) ?
644 __pa(high_memory) - paddr :
645 size;
646
647 if (ioremap_change_attr((unsigned long)__va(paddr), id_sz, flags) < 0) {
648 free_memtype(paddr, paddr + size); 694 free_memtype(paddr, paddr + size);
649 printk(KERN_ERR 695 printk(KERN_ERR
650 "%s:%d reserve_pfn_range ioremap_change_attr failed %s " 696 "%s:%d reserve_pfn_range ioremap_change_attr failed %s "
@@ -689,6 +735,7 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
689 unsigned long vma_start = vma->vm_start; 735 unsigned long vma_start = vma->vm_start;
690 unsigned long vma_end = vma->vm_end; 736 unsigned long vma_end = vma->vm_end;
691 unsigned long vma_size = vma_end - vma_start; 737 unsigned long vma_size = vma_end - vma_start;
738 pgprot_t pgprot;
692 739
693 if (!pat_enabled) 740 if (!pat_enabled)
694 return 0; 741 return 0;
@@ -702,7 +749,8 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
702 WARN_ON_ONCE(1); 749 WARN_ON_ONCE(1);
703 return -EINVAL; 750 return -EINVAL;
704 } 751 }
705 return reserve_pfn_range(paddr, vma_size, __pgprot(prot)); 752 pgprot = __pgprot(prot);
753 return reserve_pfn_range(paddr, vma_size, &pgprot, 1);
706 } 754 }
707 755
708 /* reserve entire vma page by page, using pfn and prot from pte */ 756 /* reserve entire vma page by page, using pfn and prot from pte */
@@ -710,7 +758,8 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
710 if (follow_phys(vma, vma_start + i, 0, &prot, &paddr)) 758 if (follow_phys(vma, vma_start + i, 0, &prot, &paddr))
711 continue; 759 continue;
712 760
713 retval = reserve_pfn_range(paddr, PAGE_SIZE, __pgprot(prot)); 761 pgprot = __pgprot(prot);
762 retval = reserve_pfn_range(paddr, PAGE_SIZE, &pgprot, 1);
714 if (retval) 763 if (retval)
715 goto cleanup_ret; 764 goto cleanup_ret;
716 } 765 }
@@ -741,7 +790,7 @@ cleanup_ret:
741 * Note that this function can be called with caller trying to map only a 790 * Note that this function can be called with caller trying to map only a
742 * subrange/page inside the vma. 791 * subrange/page inside the vma.
743 */ 792 */
744int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot, 793int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
745 unsigned long pfn, unsigned long size) 794 unsigned long pfn, unsigned long size)
746{ 795{
747 int retval = 0; 796 int retval = 0;
@@ -758,14 +807,14 @@ int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot,
758 if (is_linear_pfn_mapping(vma)) { 807 if (is_linear_pfn_mapping(vma)) {
759 /* reserve the whole chunk starting from vm_pgoff */ 808 /* reserve the whole chunk starting from vm_pgoff */
760 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; 809 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
761 return reserve_pfn_range(paddr, vma_size, prot); 810 return reserve_pfn_range(paddr, vma_size, prot, 0);
762 } 811 }
763 812
764 /* reserve page by page using pfn and size */ 813 /* reserve page by page using pfn and size */
765 base_paddr = (resource_size_t)pfn << PAGE_SHIFT; 814 base_paddr = (resource_size_t)pfn << PAGE_SHIFT;
766 for (i = 0; i < size; i += PAGE_SIZE) { 815 for (i = 0; i < size; i += PAGE_SIZE) {
767 paddr = base_paddr + i; 816 paddr = base_paddr + i;
768 retval = reserve_pfn_range(paddr, PAGE_SIZE, prot); 817 retval = reserve_pfn_range(paddr, PAGE_SIZE, prot, 0);
769 if (retval) 818 if (retval)
770 goto cleanup_ret; 819 goto cleanup_ret;
771 } 820 }
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index f884740da318..5ead808dd70c 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -314,17 +314,7 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
314 return retval; 314 return retval;
315 315
316 if (flags != new_flags) { 316 if (flags != new_flags) {
317 /* 317 if (!is_new_memtype_allowed(flags, new_flags)) {
318 * Do not fallback to certain memory types with certain
319 * requested type:
320 * - request is uncached, return cannot be write-back
321 * - request is uncached, return cannot be write-combine
322 * - request is write-combine, return cannot be write-back
323 */
324 if ((flags == _PAGE_CACHE_UC_MINUS &&
325 (new_flags == _PAGE_CACHE_WB)) ||
326 (flags == _PAGE_CACHE_WC &&
327 new_flags == _PAGE_CACHE_WB)) {
328 free_memtype(addr, addr+len); 318 free_memtype(addr, addr+len);
329 return -EINVAL; 319 return -EINVAL;
330 } 320 }
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index bea215230b20..75b94139e1f2 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -634,35 +634,27 @@ static void xen_flush_tlb_single(unsigned long addr)
634 preempt_enable(); 634 preempt_enable();
635} 635}
636 636
637static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, 637static void xen_flush_tlb_others(const struct cpumask *cpus,
638 unsigned long va) 638 struct mm_struct *mm, unsigned long va)
639{ 639{
640 struct { 640 struct {
641 struct mmuext_op op; 641 struct mmuext_op op;
642 cpumask_t mask; 642 DECLARE_BITMAP(mask, NR_CPUS);
643 } *args; 643 } *args;
644 cpumask_t cpumask = *cpus;
645 struct multicall_space mcs; 644 struct multicall_space mcs;
646 645
647 /* 646 BUG_ON(cpumask_empty(cpus));
648 * A couple of (to be removed) sanity checks:
649 *
650 * - current CPU must not be in mask
651 * - mask must exist :)
652 */
653 BUG_ON(cpus_empty(cpumask));
654 BUG_ON(cpu_isset(smp_processor_id(), cpumask));
655 BUG_ON(!mm); 647 BUG_ON(!mm);
656 648
657 /* If a CPU which we ran on has gone down, OK. */
658 cpus_and(cpumask, cpumask, cpu_online_map);
659 if (cpus_empty(cpumask))
660 return;
661
662 mcs = xen_mc_entry(sizeof(*args)); 649 mcs = xen_mc_entry(sizeof(*args));
663 args = mcs.args; 650 args = mcs.args;
664 args->mask = cpumask; 651 args->op.arg2.vcpumask = to_cpumask(args->mask);
665 args->op.arg2.vcpumask = &args->mask; 652
653 /* Remove us, and any offline CPUS. */
654 cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask);
655 cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
656 if (unlikely(cpumask_empty(to_cpumask(args->mask))))
657 goto issue;
666 658
667 if (va == TLB_FLUSH_ALL) { 659 if (va == TLB_FLUSH_ALL) {
668 args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; 660 args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
@@ -673,6 +665,7 @@ static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm,
673 665
674 MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); 666 MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
675 667
668issue:
676 xen_mc_issue(PARAVIRT_LAZY_MMU); 669 xen_mc_issue(PARAVIRT_LAZY_MMU);
677} 670}
678 671
@@ -702,17 +695,17 @@ static void xen_write_cr0(unsigned long cr0)
702 695
703static void xen_write_cr2(unsigned long cr2) 696static void xen_write_cr2(unsigned long cr2)
704{ 697{
705 x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; 698 percpu_read(xen_vcpu)->arch.cr2 = cr2;
706} 699}
707 700
708static unsigned long xen_read_cr2(void) 701static unsigned long xen_read_cr2(void)
709{ 702{
710 return x86_read_percpu(xen_vcpu)->arch.cr2; 703 return percpu_read(xen_vcpu)->arch.cr2;
711} 704}
712 705
713static unsigned long xen_read_cr2_direct(void) 706static unsigned long xen_read_cr2_direct(void)
714{ 707{
715 return x86_read_percpu(xen_vcpu_info.arch.cr2); 708 return percpu_read(xen_vcpu_info.arch.cr2);
716} 709}
717 710
718static void xen_write_cr4(unsigned long cr4) 711static void xen_write_cr4(unsigned long cr4)
@@ -725,12 +718,12 @@ static void xen_write_cr4(unsigned long cr4)
725 718
726static unsigned long xen_read_cr3(void) 719static unsigned long xen_read_cr3(void)
727{ 720{
728 return x86_read_percpu(xen_cr3); 721 return percpu_read(xen_cr3);
729} 722}
730 723
731static void set_current_cr3(void *v) 724static void set_current_cr3(void *v)
732{ 725{
733 x86_write_percpu(xen_current_cr3, (unsigned long)v); 726 percpu_write(xen_current_cr3, (unsigned long)v);
734} 727}
735 728
736static void __xen_write_cr3(bool kernel, unsigned long cr3) 729static void __xen_write_cr3(bool kernel, unsigned long cr3)
@@ -755,7 +748,7 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3)
755 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 748 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
756 749
757 if (kernel) { 750 if (kernel) {
758 x86_write_percpu(xen_cr3, cr3); 751 percpu_write(xen_cr3, cr3);
759 752
760 /* Update xen_current_cr3 once the batch has actually 753 /* Update xen_current_cr3 once the batch has actually
761 been submitted. */ 754 been submitted. */
@@ -771,7 +764,7 @@ static void xen_write_cr3(unsigned long cr3)
771 764
772 /* Update while interrupts are disabled, so its atomic with 765 /* Update while interrupts are disabled, so its atomic with
773 respect to ipis */ 766 respect to ipis */
774 x86_write_percpu(xen_cr3, cr3); 767 percpu_write(xen_cr3, cr3);
775 768
776 __xen_write_cr3(true, cr3); 769 __xen_write_cr3(true, cr3);
777 770
@@ -1652,7 +1645,7 @@ asmlinkage void __init xen_start_kernel(void)
1652#ifdef CONFIG_X86_64 1645#ifdef CONFIG_X86_64
1653 /* Disable until direct per-cpu data access. */ 1646 /* Disable until direct per-cpu data access. */
1654 have_vcpu_info_placement = 0; 1647 have_vcpu_info_placement = 0;
1655 x86_64_init_pda(); 1648 pda_init(0);
1656#endif 1649#endif
1657 1650
1658 xen_smp_init(); 1651 xen_smp_init();
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index bb042608c602..2e8271431e1a 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -39,7 +39,7 @@ static unsigned long xen_save_fl(void)
39 struct vcpu_info *vcpu; 39 struct vcpu_info *vcpu;
40 unsigned long flags; 40 unsigned long flags;
41 41
42 vcpu = x86_read_percpu(xen_vcpu); 42 vcpu = percpu_read(xen_vcpu);
43 43
44 /* flag has opposite sense of mask */ 44 /* flag has opposite sense of mask */
45 flags = !vcpu->evtchn_upcall_mask; 45 flags = !vcpu->evtchn_upcall_mask;
@@ -62,7 +62,7 @@ static void xen_restore_fl(unsigned long flags)
62 make sure we're don't switch CPUs between getting the vcpu 62 make sure we're don't switch CPUs between getting the vcpu
63 pointer and updating the mask. */ 63 pointer and updating the mask. */
64 preempt_disable(); 64 preempt_disable();
65 vcpu = x86_read_percpu(xen_vcpu); 65 vcpu = percpu_read(xen_vcpu);
66 vcpu->evtchn_upcall_mask = flags; 66 vcpu->evtchn_upcall_mask = flags;
67 preempt_enable_no_resched(); 67 preempt_enable_no_resched();
68 68
@@ -83,7 +83,7 @@ static void xen_irq_disable(void)
83 make sure we're don't switch CPUs between getting the vcpu 83 make sure we're don't switch CPUs between getting the vcpu
84 pointer and updating the mask. */ 84 pointer and updating the mask. */
85 preempt_disable(); 85 preempt_disable();
86 x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1; 86 percpu_read(xen_vcpu)->evtchn_upcall_mask = 1;
87 preempt_enable_no_resched(); 87 preempt_enable_no_resched();
88} 88}
89 89
@@ -96,7 +96,7 @@ static void xen_irq_enable(void)
96 the caller is confused and is trying to re-enable interrupts 96 the caller is confused and is trying to re-enable interrupts
97 on an indeterminate processor. */ 97 on an indeterminate processor. */
98 98
99 vcpu = x86_read_percpu(xen_vcpu); 99 vcpu = percpu_read(xen_vcpu);
100 vcpu->evtchn_upcall_mask = 0; 100 vcpu->evtchn_upcall_mask = 0;
101 101
102 /* Doesn't matter if we get preempted here, because any 102 /* Doesn't matter if we get preempted here, because any
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 503c240e26c7..98cb9869eb24 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1063,18 +1063,14 @@ static void drop_other_mm_ref(void *info)
1063 struct mm_struct *mm = info; 1063 struct mm_struct *mm = info;
1064 struct mm_struct *active_mm; 1064 struct mm_struct *active_mm;
1065 1065
1066#ifdef CONFIG_X86_64 1066 active_mm = percpu_read(cpu_tlbstate.active_mm);
1067 active_mm = read_pda(active_mm);
1068#else
1069 active_mm = __get_cpu_var(cpu_tlbstate).active_mm;
1070#endif
1071 1067
1072 if (active_mm == mm) 1068 if (active_mm == mm)
1073 leave_mm(smp_processor_id()); 1069 leave_mm(smp_processor_id());
1074 1070
1075 /* If this cpu still has a stale cr3 reference, then make sure 1071 /* If this cpu still has a stale cr3 reference, then make sure
1076 it has been flushed. */ 1072 it has been flushed. */
1077 if (x86_read_percpu(xen_current_cr3) == __pa(mm->pgd)) { 1073 if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) {
1078 load_cr3(swapper_pg_dir); 1074 load_cr3(swapper_pg_dir);
1079 arch_flush_lazy_cpu_mode(); 1075 arch_flush_lazy_cpu_mode();
1080 } 1076 }
diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h
index 858938241616..e786fa7f2615 100644
--- a/arch/x86/xen/multicalls.h
+++ b/arch/x86/xen/multicalls.h
@@ -39,7 +39,7 @@ static inline void xen_mc_issue(unsigned mode)
39 xen_mc_flush(); 39 xen_mc_flush();
40 40
41 /* restore flags saved in xen_mc_batch */ 41 /* restore flags saved in xen_mc_batch */
42 local_irq_restore(x86_read_percpu(xen_mc_irq_flags)); 42 local_irq_restore(percpu_read(xen_mc_irq_flags));
43} 43}
44 44
45/* Set up a callback to be called when the current batch is flushed */ 45/* Set up a callback to be called when the current batch is flushed */
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index c44e2069c7c7..72c2eb9b64cd 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -50,11 +50,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
50 */ 50 */
51static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) 51static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
52{ 52{
53#ifdef CONFIG_X86_32 53 inc_irq_stat(irq_resched_count);
54 __get_cpu_var(irq_stat).irq_resched_count++;
55#else
56 add_pda(irq_resched_count, 1);
57#endif
58 54
59 return IRQ_HANDLED; 55 return IRQ_HANDLED;
60} 56}
@@ -78,7 +74,7 @@ static __cpuinit void cpu_bringup(void)
78 xen_setup_cpu_clockevents(); 74 xen_setup_cpu_clockevents();
79 75
80 cpu_set(cpu, cpu_online_map); 76 cpu_set(cpu, cpu_online_map);
81 x86_write_percpu(cpu_state, CPU_ONLINE); 77 percpu_write(cpu_state, CPU_ONLINE);
82 wmb(); 78 wmb();
83 79
84 /* We can take interrupts now: we're officially "up". */ 80 /* We can take interrupts now: we're officially "up". */
@@ -283,22 +279,11 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
283 struct task_struct *idle = idle_task(cpu); 279 struct task_struct *idle = idle_task(cpu);
284 int rc; 280 int rc;
285 281
286#ifdef CONFIG_X86_64 282 per_cpu(current_task, cpu) = idle;
287 /* Allocate node local memory for AP pdas */
288 WARN_ON(cpu == 0);
289 if (cpu > 0) {
290 rc = get_local_pda(cpu);
291 if (rc)
292 return rc;
293 }
294#endif
295
296#ifdef CONFIG_X86_32 283#ifdef CONFIG_X86_32
297 init_gdt(cpu); 284 init_gdt(cpu);
298 per_cpu(current_task, cpu) = idle;
299 irq_ctx_init(cpu); 285 irq_ctx_init(cpu);
300#else 286#else
301 cpu_pda(cpu)->pcurrent = idle;
302 clear_tsk_thread_flag(idle, TIF_FORK); 287 clear_tsk_thread_flag(idle, TIF_FORK);
303#endif 288#endif
304 xen_setup_timer(cpu); 289 xen_setup_timer(cpu);
@@ -445,11 +430,7 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
445{ 430{
446 irq_enter(); 431 irq_enter();
447 generic_smp_call_function_interrupt(); 432 generic_smp_call_function_interrupt();
448#ifdef CONFIG_X86_32 433 inc_irq_stat(irq_call_count);
449 __get_cpu_var(irq_stat).irq_call_count++;
450#else
451 add_pda(irq_call_count, 1);
452#endif
453 irq_exit(); 434 irq_exit();
454 435
455 return IRQ_HANDLED; 436 return IRQ_HANDLED;
@@ -459,11 +440,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
459{ 440{
460 irq_enter(); 441 irq_enter();
461 generic_smp_call_function_single_interrupt(); 442 generic_smp_call_function_single_interrupt();
462#ifdef CONFIG_X86_32 443 inc_irq_stat(irq_call_count);
463 __get_cpu_var(irq_stat).irq_call_count++;
464#else
465 add_pda(irq_call_count, 1);
466#endif
467 irq_exit(); 444 irq_exit();
468 445
469 return IRQ_HANDLED; 446 return IRQ_HANDLED;
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 05794c566e87..d6fc51f4ce85 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -17,6 +17,7 @@
17#include <asm/processor-flags.h> 17#include <asm/processor-flags.h>
18#include <asm/errno.h> 18#include <asm/errno.h>
19#include <asm/segment.h> 19#include <asm/segment.h>
20#include <asm/percpu.h>
20 21
21#include <xen/interface/xen.h> 22#include <xen/interface/xen.h>
22 23
@@ -28,12 +29,10 @@
28 29
29#if 1 30#if 1
30/* 31/*
31 x86-64 does not yet support direct access to percpu variables 32 FIXME: x86_64 now can support direct access to percpu variables
32 via a segment override, so we just need to make sure this code 33 via a segment override. Update xen accordingly.
33 never gets used
34 */ 34 */
35#define BUG ud2a 35#define BUG ud2a
36#define PER_CPU_VAR(var, off) 0xdeadbeef
37#endif 36#endif
38 37
39/* 38/*
@@ -45,14 +44,14 @@ ENTRY(xen_irq_enable_direct)
45 BUG 44 BUG
46 45
47 /* Unmask events */ 46 /* Unmask events */
48 movb $0, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) 47 movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
49 48
50 /* Preempt here doesn't matter because that will deal with 49 /* Preempt here doesn't matter because that will deal with
51 any pending interrupts. The pending check may end up being 50 any pending interrupts. The pending check may end up being
52 run on the wrong CPU, but that doesn't hurt. */ 51 run on the wrong CPU, but that doesn't hurt. */
53 52
54 /* Test for pending */ 53 /* Test for pending */
55 testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending) 54 testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
56 jz 1f 55 jz 1f
57 56
582: call check_events 572: call check_events
@@ -69,7 +68,7 @@ ENDPATCH(xen_irq_enable_direct)
69ENTRY(xen_irq_disable_direct) 68ENTRY(xen_irq_disable_direct)
70 BUG 69 BUG
71 70
72 movb $1, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) 71 movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
73ENDPATCH(xen_irq_disable_direct) 72ENDPATCH(xen_irq_disable_direct)
74 ret 73 ret
75 ENDPROC(xen_irq_disable_direct) 74 ENDPROC(xen_irq_disable_direct)
@@ -87,7 +86,7 @@ ENDPATCH(xen_irq_disable_direct)
87ENTRY(xen_save_fl_direct) 86ENTRY(xen_save_fl_direct)
88 BUG 87 BUG
89 88
90 testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) 89 testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
91 setz %ah 90 setz %ah
92 addb %ah,%ah 91 addb %ah,%ah
93ENDPATCH(xen_save_fl_direct) 92ENDPATCH(xen_save_fl_direct)
@@ -107,13 +106,13 @@ ENTRY(xen_restore_fl_direct)
107 BUG 106 BUG
108 107
109 testb $X86_EFLAGS_IF>>8, %ah 108 testb $X86_EFLAGS_IF>>8, %ah
110 setz PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) 109 setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
111 /* Preempt here doesn't matter because that will deal with 110 /* Preempt here doesn't matter because that will deal with
112 any pending interrupts. The pending check may end up being 111 any pending interrupts. The pending check may end up being
113 run on the wrong CPU, but that doesn't hurt. */ 112 run on the wrong CPU, but that doesn't hurt. */
114 113
115 /* check for unmasked and pending */ 114 /* check for unmasked and pending */
116 cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending) 115 cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
117 jz 1f 116 jz 1f
1182: call check_events 1172: call check_events
1191: 1181:
@@ -195,11 +194,11 @@ RELOC(xen_sysexit, 1b+1)
195ENTRY(xen_sysret64) 194ENTRY(xen_sysret64)
196 /* We're already on the usermode stack at this point, but still 195 /* We're already on the usermode stack at this point, but still
197 with the kernel gs, so we can easily switch back */ 196 with the kernel gs, so we can easily switch back */
198 movq %rsp, %gs:pda_oldrsp 197 movq %rsp, PER_CPU_VAR(old_rsp)
199 movq %gs:pda_kernelstack,%rsp 198 movq PER_CPU_VAR(kernel_stack),%rsp
200 199
201 pushq $__USER_DS 200 pushq $__USER_DS
202 pushq %gs:pda_oldrsp 201 pushq PER_CPU_VAR(old_rsp)
203 pushq %r11 202 pushq %r11
204 pushq $__USER_CS 203 pushq $__USER_CS
205 pushq %rcx 204 pushq %rcx
@@ -212,11 +211,11 @@ RELOC(xen_sysret64, 1b+1)
212ENTRY(xen_sysret32) 211ENTRY(xen_sysret32)
213 /* We're already on the usermode stack at this point, but still 212 /* We're already on the usermode stack at this point, but still
214 with the kernel gs, so we can easily switch back */ 213 with the kernel gs, so we can easily switch back */
215 movq %rsp, %gs:pda_oldrsp 214 movq %rsp, PER_CPU_VAR(old_rsp)
216 movq %gs:pda_kernelstack, %rsp 215 movq PER_CPU_VAR(kernel_stack), %rsp
217 216
218 pushq $__USER32_DS 217 pushq $__USER32_DS
219 pushq %gs:pda_oldrsp 218 pushq PER_CPU_VAR(old_rsp)
220 pushq %r11 219 pushq %r11
221 pushq $__USER32_CS 220 pushq $__USER32_CS
222 pushq %rcx 221 pushq %rcx