aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
authorH. Peter Anvin <hpa@linux.intel.com>2009-01-30 17:50:57 -0500
committerH. Peter Anvin <hpa@linux.intel.com>2009-01-30 17:50:57 -0500
commit9b7ed8faa034fc2d350e2eff5c68680eb5c43a07 (patch)
tree5c94c34ad30e312604c1ce4f08ab6631b64a94f5 /arch/x86
parent6522869c34664dd5f05a0a327e93915b1281c90d (diff)
parentc43e0e46adf79c321ed3fbf0351e1005fb8a2413 (diff)
Merge branch 'core/percpu' into x86/paravirt
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig32
-rw-r--r--arch/x86/Kconfig.cpu10
-rw-r--r--arch/x86/Kconfig.debug1
-rw-r--r--arch/x86/Makefile2
-rw-r--r--arch/x86/configs/i386_defconfig4
-rw-r--r--arch/x86/configs/x86_64_defconfig4
-rw-r--r--arch/x86/ia32/ia32entry.S8
-rw-r--r--arch/x86/include/asm/apicnum.h12
-rw-r--r--arch/x86/include/asm/bitops.h14
-rw-r--r--arch/x86/include/asm/cpu.h21
-rw-r--r--arch/x86/include/asm/cpumask.h32
-rw-r--r--arch/x86/include/asm/current.h24
-rw-r--r--arch/x86/include/asm/dma-mapping.h4
-rw-r--r--arch/x86/include/asm/genapic_32.h7
-rw-r--r--arch/x86/include/asm/genapic_64.h6
-rw-r--r--arch/x86/include/asm/hardirq.h49
-rw-r--r--arch/x86/include/asm/hardirq_32.h30
-rw-r--r--arch/x86/include/asm/hardirq_64.h25
-rw-r--r--arch/x86/include/asm/io.h1
-rw-r--r--arch/x86/include/asm/io_apic.h26
-rw-r--r--arch/x86/include/asm/irq_regs.h36
-rw-r--r--arch/x86/include/asm/irq_regs_32.h31
-rw-r--r--arch/x86/include/asm/irq_regs_64.h1
-rw-r--r--arch/x86/include/asm/irq_vectors.h49
-rw-r--r--arch/x86/include/asm/mach-default/entry_arch.h18
-rw-r--r--arch/x86/include/asm/mmu_context.h63
-rw-r--r--arch/x86/include/asm/mmu_context_32.h55
-rw-r--r--arch/x86/include/asm/mmu_context_64.h54
-rw-r--r--arch/x86/include/asm/mpspec_def.h23
-rw-r--r--arch/x86/include/asm/msr-index.h29
-rw-r--r--arch/x86/include/asm/page_64.h4
-rw-r--r--arch/x86/include/asm/paravirt.h8
-rw-r--r--arch/x86/include/asm/pda.h137
-rw-r--r--arch/x86/include/asm/percpu.h153
-rw-r--r--arch/x86/include/asm/pgalloc.h1
-rw-r--r--arch/x86/include/asm/pgtable_64.h1
-rw-r--r--arch/x86/include/asm/processor.h17
-rw-r--r--arch/x86/include/asm/setup.h1
-rw-r--r--arch/x86/include/asm/smp.h50
-rw-r--r--arch/x86/include/asm/stackprotector.h38
-rw-r--r--arch/x86/include/asm/syscalls.h2
-rw-r--r--arch/x86/include/asm/system.h23
-rw-r--r--arch/x86/include/asm/thread_info.h20
-rw-r--r--arch/x86/include/asm/timex.h13
-rw-r--r--arch/x86/include/asm/tlbflush.h17
-rw-r--r--arch/x86/include/asm/topology.h31
-rw-r--r--arch/x86/include/asm/trampoline.h1
-rw-r--r--arch/x86/include/asm/uv/uv.h33
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h1
-rw-r--r--arch/x86/kernel/Makefile14
-rw-r--r--arch/x86/kernel/acpi/boot.c96
-rw-r--r--arch/x86/kernel/acpi/sleep.c1
-rw-r--r--arch/x86/kernel/apic.c49
-rw-r--r--arch/x86/kernel/asm-offsets_64.c11
-rw-r--r--arch/x86/kernel/cpu/common.c130
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c35
-rw-r--r--arch/x86/kernel/cpu/intel.c13
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c63
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c21
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c12
-rw-r--r--arch/x86/kernel/crash.c2
-rw-r--r--arch/x86/kernel/dumpstack_64.c35
-rw-r--r--arch/x86/kernel/efi.c2
-rw-r--r--arch/x86/kernel/efi_64.c1
-rw-r--r--arch/x86/kernel/entry_32.S6
-rw-r--r--arch/x86/kernel/entry_64.S43
-rw-r--r--arch/x86/kernel/genapic_64.c2
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c1
-rw-r--r--arch/x86/kernel/head64.c23
-rw-r--r--arch/x86/kernel/head_32.S6
-rw-r--r--arch/x86/kernel/head_64.S36
-rw-r--r--arch/x86/kernel/hpet.c3
-rw-r--r--arch/x86/kernel/io_apic.c165
-rw-r--r--arch/x86/kernel/irq.c6
-rw-r--r--arch/x86/kernel/irq_32.c2
-rw-r--r--arch/x86/kernel/irq_64.c9
-rw-r--r--arch/x86/kernel/irqinit_32.c11
-rw-r--r--arch/x86/kernel/microcode_intel.c10
-rw-r--r--arch/x86/kernel/module_32.c6
-rw-r--r--arch/x86/kernel/module_64.c32
-rw-r--r--arch/x86/kernel/mpparse.c142
-rw-r--r--arch/x86/kernel/msr.c2
-rw-r--r--arch/x86/kernel/nmi.c10
-rw-r--r--arch/x86/kernel/pci-gart_64.c2
-rw-r--r--arch/x86/kernel/process_32.c6
-rw-r--r--arch/x86/kernel/process_64.c43
-rw-r--r--arch/x86/kernel/reboot.c1
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/setup_percpu.c412
-rw-r--r--arch/x86/kernel/signal.c11
-rw-r--r--arch/x86/kernel/smpboot.c74
-rw-r--r--arch/x86/kernel/smpcommon.c30
-rw-r--r--arch/x86/kernel/tlb_32.c256
-rw-r--r--arch/x86/kernel/tlb_uv.c69
-rw-r--r--arch/x86/kernel/traps.c1
-rw-r--r--arch/x86/kernel/vmi_32.c2
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S9
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S26
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c2
-rw-r--r--arch/x86/lguest/boot.c4
-rw-r--r--arch/x86/lib/usercopy_32.c4
-rw-r--r--arch/x86/lib/usercopy_64.c4
-rw-r--r--arch/x86/mach-voyager/setup.c1
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c5
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/fault.c445
-rw-r--r--arch/x86/mm/init_32.c49
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/mm/iomap_32.c10
-rw-r--r--arch/x86/mm/ioremap.c25
-rw-r--r--arch/x86/mm/numa_64.c217
-rw-r--r--arch/x86/mm/pageattr.c49
-rw-r--r--arch/x86/mm/pat.c43
-rw-r--r--arch/x86/mm/srat_64.c1
-rw-r--r--arch/x86/mm/tlb.c (renamed from arch/x86/kernel/tlb_64.c)122
-rw-r--r--arch/x86/scripts/strip-symbols1
-rw-r--r--arch/x86/xen/enlighten.c46
-rw-r--r--arch/x86/xen/irq.c8
-rw-r--r--arch/x86/xen/mmu.c8
-rw-r--r--arch/x86/xen/multicalls.h2
-rw-r--r--arch/x86/xen/smp.c34
-rw-r--r--arch/x86/xen/suspend.c1
-rw-r--r--arch/x86/xen/xen-asm_64.S31
124 files changed, 2007 insertions, 2276 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 73f7fe8fd4d1..d6218e6c9824 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -133,7 +133,7 @@ config ARCH_HAS_CACHE_LINE_SIZE
133 def_bool y 133 def_bool y
134 134
135config HAVE_SETUP_PER_CPU_AREA 135config HAVE_SETUP_PER_CPU_AREA
136 def_bool X86_64_SMP || (X86_SMP && !X86_VOYAGER) 136 def_bool y
137 137
138config HAVE_CPUMASK_OF_CPU_MAP 138config HAVE_CPUMASK_OF_CPU_MAP
139 def_bool X86_64_SMP 139 def_bool X86_64_SMP
@@ -391,6 +391,13 @@ config X86_RDC321X
391 as R-8610-(G). 391 as R-8610-(G).
392 If you don't have one of these chips, you should say N here. 392 If you don't have one of these chips, you should say N here.
393 393
394config X86_UV
395 bool "SGI Ultraviolet"
396 depends on X86_64
397 help
398 This option is needed in order to support SGI Ultraviolet systems.
399 If you don't have one of these, you should say N here.
400
394config SCHED_OMIT_FRAME_POINTER 401config SCHED_OMIT_FRAME_POINTER
395 def_bool y 402 def_bool y
396 prompt "Single-depth WCHAN output" 403 prompt "Single-depth WCHAN output"
@@ -1340,13 +1347,17 @@ config SECCOMP
1340 1347
1341 If unsure, say Y. Only embedded should say N here. 1348 If unsure, say Y. Only embedded should say N here.
1342 1349
1350config CC_STACKPROTECTOR_ALL
1351 bool
1352
1343config CC_STACKPROTECTOR 1353config CC_STACKPROTECTOR
1344 bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" 1354 bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
1345 depends on X86_64 && EXPERIMENTAL && BROKEN 1355 depends on X86_64
1356 select CC_STACKPROTECTOR_ALL
1346 help 1357 help
1347 This option turns on the -fstack-protector GCC feature. This 1358 This option turns on the -fstack-protector GCC feature. This
1348 feature puts, at the beginning of critical functions, a canary 1359 feature puts, at the beginning of functions, a canary value on
1349 value on the stack just before the return address, and validates 1360 the stack just before the return address, and validates
1350 the value just before actually returning. Stack based buffer 1361 the value just before actually returning. Stack based buffer
1351 overflows (that need to overwrite this return address) now also 1362 overflows (that need to overwrite this return address) now also
1352 overwrite the canary, which gets detected and the attack is then 1363 overwrite the canary, which gets detected and the attack is then
@@ -1354,15 +1365,8 @@ config CC_STACKPROTECTOR
1354 1365
1355 This feature requires gcc version 4.2 or above, or a distribution 1366 This feature requires gcc version 4.2 or above, or a distribution
1356 gcc with the feature backported. Older versions are automatically 1367 gcc with the feature backported. Older versions are automatically
1357 detected and for those versions, this configuration option is ignored. 1368 detected and for those versions, this configuration option is
1358 1369 ignored. (and a warning is printed during bootup)
1359config CC_STACKPROTECTOR_ALL
1360 bool "Use stack-protector for all functions"
1361 depends on CC_STACKPROTECTOR
1362 help
1363 Normally, GCC only inserts the canary value protection for
1364 functions that use large-ish on-stack buffers. By enabling
1365 this option, GCC will be asked to do this for ALL functions.
1366 1370
1367source kernel/Kconfig.hz 1371source kernel/Kconfig.hz
1368 1372
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 8078955845ae..8eb50ba9161e 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -292,25 +292,23 @@ config X86_CPU
292# Define implied options from the CPU selection here 292# Define implied options from the CPU selection here
293config X86_L1_CACHE_BYTES 293config X86_L1_CACHE_BYTES
294 int 294 int
295 default "128" if GENERIC_CPU || MPSC 295 default "128" if MPSC
296 default "64" if MK8 || MCORE2 296 default "64" if GENERIC_CPU || MK8 || MCORE2 || X86_32
297 depends on X86_64
298 297
299config X86_INTERNODE_CACHE_BYTES 298config X86_INTERNODE_CACHE_BYTES
300 int 299 int
301 default "4096" if X86_VSMP 300 default "4096" if X86_VSMP
302 default X86_L1_CACHE_BYTES if !X86_VSMP 301 default X86_L1_CACHE_BYTES if !X86_VSMP
303 depends on X86_64
304 302
305config X86_CMPXCHG 303config X86_CMPXCHG
306 def_bool X86_64 || (X86_32 && !M386) 304 def_bool X86_64 || (X86_32 && !M386)
307 305
308config X86_L1_CACHE_SHIFT 306config X86_L1_CACHE_SHIFT
309 int 307 int
310 default "7" if MPENTIUM4 || X86_GENERIC || GENERIC_CPU || MPSC 308 default "7" if MPENTIUM4 || MPSC
311 default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 309 default "4" if X86_ELAN || M486 || M386 || MGEODEGX1
312 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX 310 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
313 default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 311 default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 || X86_GENERIC || GENERIC_CPU
314 312
315config X86_XADD 313config X86_XADD
316 def_bool y 314 def_bool y
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 10d6cc3fd052..28f111461ca8 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -117,6 +117,7 @@ config DEBUG_RODATA
117config DEBUG_RODATA_TEST 117config DEBUG_RODATA_TEST
118 bool "Testcase for the DEBUG_RODATA feature" 118 bool "Testcase for the DEBUG_RODATA feature"
119 depends on DEBUG_RODATA 119 depends on DEBUG_RODATA
120 default y
120 help 121 help
121 This option enables a testcase for the DEBUG_RODATA 122 This option enables a testcase for the DEBUG_RODATA
122 feature as well as for the change_page_attr() infrastructure. 123 feature as well as for the change_page_attr() infrastructure.
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index d1a47adb5aec..cacee981d166 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -73,7 +73,7 @@ else
73 73
74 stackp := $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh 74 stackp := $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh
75 stackp-$(CONFIG_CC_STACKPROTECTOR) := $(shell $(stackp) \ 75 stackp-$(CONFIG_CC_STACKPROTECTOR) := $(shell $(stackp) \
76 "$(CC)" -fstack-protector ) 76 "$(CC)" "-fstack-protector -DGCC_HAS_SP" )
77 stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(stackp) \ 77 stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(stackp) \
78 "$(CC)" -fstack-protector-all ) 78 "$(CC)" -fstack-protector-all )
79 79
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index b30a08ed8eb4..edba00d98ac3 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1331,8 +1331,8 @@ CONFIG_I2C_I801=y
1331# Miscellaneous I2C Chip support 1331# Miscellaneous I2C Chip support
1332# 1332#
1333# CONFIG_DS1682 is not set 1333# CONFIG_DS1682 is not set
1334# CONFIG_AT24 is not set 1334# CONFIG_EEPROM_AT24 is not set
1335# CONFIG_SENSORS_EEPROM is not set 1335# CONFIG_EEPROM_LEGACY is not set
1336# CONFIG_SENSORS_PCF8574 is not set 1336# CONFIG_SENSORS_PCF8574 is not set
1337# CONFIG_PCF8575 is not set 1337# CONFIG_PCF8575 is not set
1338# CONFIG_SENSORS_PCA9539 is not set 1338# CONFIG_SENSORS_PCA9539 is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 0e7dbc0a3e46..322dd2748fc9 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1311,8 +1311,8 @@ CONFIG_I2C_I801=y
1311# Miscellaneous I2C Chip support 1311# Miscellaneous I2C Chip support
1312# 1312#
1313# CONFIG_DS1682 is not set 1313# CONFIG_DS1682 is not set
1314# CONFIG_AT24 is not set 1314# CONFIG_EEPROM_AT24 is not set
1315# CONFIG_SENSORS_EEPROM is not set 1315# CONFIG_EEPROM_LEGACY is not set
1316# CONFIG_SENSORS_PCF8574 is not set 1316# CONFIG_SENSORS_PCF8574 is not set
1317# CONFIG_PCF8575 is not set 1317# CONFIG_PCF8575 is not set
1318# CONFIG_SENSORS_PCA9539 is not set 1318# CONFIG_SENSORS_PCA9539 is not set
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 256b00b61892..9c79b2477008 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -112,8 +112,8 @@ ENTRY(ia32_sysenter_target)
112 CFI_DEF_CFA rsp,0 112 CFI_DEF_CFA rsp,0
113 CFI_REGISTER rsp,rbp 113 CFI_REGISTER rsp,rbp
114 SWAPGS_UNSAFE_STACK 114 SWAPGS_UNSAFE_STACK
115 movq %gs:pda_kernelstack, %rsp 115 movq PER_CPU_VAR(kernel_stack), %rsp
116 addq $(PDA_STACKOFFSET),%rsp 116 addq $(KERNEL_STACK_OFFSET),%rsp
117 /* 117 /*
118 * No need to follow this irqs on/off section: the syscall 118 * No need to follow this irqs on/off section: the syscall
119 * disabled irqs, here we enable it straight after entry: 119 * disabled irqs, here we enable it straight after entry:
@@ -273,13 +273,13 @@ ENDPROC(ia32_sysenter_target)
273ENTRY(ia32_cstar_target) 273ENTRY(ia32_cstar_target)
274 CFI_STARTPROC32 simple 274 CFI_STARTPROC32 simple
275 CFI_SIGNAL_FRAME 275 CFI_SIGNAL_FRAME
276 CFI_DEF_CFA rsp,PDA_STACKOFFSET 276 CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
277 CFI_REGISTER rip,rcx 277 CFI_REGISTER rip,rcx
278 /*CFI_REGISTER rflags,r11*/ 278 /*CFI_REGISTER rflags,r11*/
279 SWAPGS_UNSAFE_STACK 279 SWAPGS_UNSAFE_STACK
280 movl %esp,%r8d 280 movl %esp,%r8d
281 CFI_REGISTER rsp,r8 281 CFI_REGISTER rsp,r8
282 movq %gs:pda_kernelstack,%rsp 282 movq PER_CPU_VAR(kernel_stack),%rsp
283 /* 283 /*
284 * No need to follow this irqs on/off section: the syscall 284 * No need to follow this irqs on/off section: the syscall
285 * disabled irqs and here we enable it straight after entry: 285 * disabled irqs and here we enable it straight after entry:
diff --git a/arch/x86/include/asm/apicnum.h b/arch/x86/include/asm/apicnum.h
new file mode 100644
index 000000000000..82f613c607ce
--- /dev/null
+++ b/arch/x86/include/asm/apicnum.h
@@ -0,0 +1,12 @@
1#ifndef _ASM_X86_APICNUM_H
2#define _ASM_X86_APICNUM_H
3
4/* define MAX_IO_APICS */
5#ifdef CONFIG_X86_32
6# define MAX_IO_APICS 64
7#else
8# define MAX_IO_APICS 128
9# define MAX_LOCAL_APIC 32768
10#endif
11
12#endif /* _ASM_X86_APICNUM_H */
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index e02a359d2aa5..02b47a603fc8 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -3,6 +3,9 @@
3 3
4/* 4/*
5 * Copyright 1992, Linus Torvalds. 5 * Copyright 1992, Linus Torvalds.
6 *
7 * Note: inlines with more than a single statement should be marked
8 * __always_inline to avoid problems with older gcc's inlining heuristics.
6 */ 9 */
7 10
8#ifndef _LINUX_BITOPS_H 11#ifndef _LINUX_BITOPS_H
@@ -53,7 +56,8 @@
53 * Note that @nr may be almost arbitrarily large; this function is not 56 * Note that @nr may be almost arbitrarily large; this function is not
54 * restricted to acting on a single-word quantity. 57 * restricted to acting on a single-word quantity.
55 */ 58 */
56static inline void set_bit(unsigned int nr, volatile unsigned long *addr) 59static __always_inline void
60set_bit(unsigned int nr, volatile unsigned long *addr)
57{ 61{
58 if (IS_IMMEDIATE(nr)) { 62 if (IS_IMMEDIATE(nr)) {
59 asm volatile(LOCK_PREFIX "orb %1,%0" 63 asm volatile(LOCK_PREFIX "orb %1,%0"
@@ -90,7 +94,8 @@ static inline void __set_bit(int nr, volatile unsigned long *addr)
90 * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() 94 * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
91 * in order to ensure changes are visible on other processors. 95 * in order to ensure changes are visible on other processors.
92 */ 96 */
93static inline void clear_bit(int nr, volatile unsigned long *addr) 97static __always_inline void
98clear_bit(int nr, volatile unsigned long *addr)
94{ 99{
95 if (IS_IMMEDIATE(nr)) { 100 if (IS_IMMEDIATE(nr)) {
96 asm volatile(LOCK_PREFIX "andb %1,%0" 101 asm volatile(LOCK_PREFIX "andb %1,%0"
@@ -204,7 +209,8 @@ static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
204 * 209 *
205 * This is the same as test_and_set_bit on x86. 210 * This is the same as test_and_set_bit on x86.
206 */ 211 */
207static inline int test_and_set_bit_lock(int nr, volatile unsigned long *addr) 212static __always_inline int
213test_and_set_bit_lock(int nr, volatile unsigned long *addr)
208{ 214{
209 return test_and_set_bit(nr, addr); 215 return test_and_set_bit(nr, addr);
210} 216}
@@ -300,7 +306,7 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
300 return oldbit; 306 return oldbit;
301} 307}
302 308
303static inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr) 309static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr)
304{ 310{
305 return ((1UL << (nr % BITS_PER_LONG)) & 311 return ((1UL << (nr % BITS_PER_LONG)) &
306 (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; 312 (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0;
diff --git a/arch/x86/include/asm/cpu.h b/arch/x86/include/asm/cpu.h
index bae482df6039..f03b23e32864 100644
--- a/arch/x86/include/asm/cpu.h
+++ b/arch/x86/include/asm/cpu.h
@@ -7,6 +7,20 @@
7#include <linux/nodemask.h> 7#include <linux/nodemask.h>
8#include <linux/percpu.h> 8#include <linux/percpu.h>
9 9
10#ifdef CONFIG_SMP
11
12extern void prefill_possible_map(void);
13
14#else /* CONFIG_SMP */
15
16static inline void prefill_possible_map(void) {}
17
18#define cpu_physical_id(cpu) boot_cpu_physical_apicid
19#define safe_smp_processor_id() 0
20#define stack_smp_processor_id() 0
21
22#endif /* CONFIG_SMP */
23
10struct x86_cpu { 24struct x86_cpu {
11 struct cpu cpu; 25 struct cpu cpu;
12}; 26};
@@ -17,4 +31,11 @@ extern void arch_unregister_cpu(int);
17#endif 31#endif
18 32
19DECLARE_PER_CPU(int, cpu_state); 33DECLARE_PER_CPU(int, cpu_state);
34
35#ifdef CONFIG_X86_HAS_BOOT_CPU_ID
36extern unsigned char boot_cpu_id;
37#else
38#define boot_cpu_id 0
39#endif
40
20#endif /* _ASM_X86_CPU_H */ 41#endif /* _ASM_X86_CPU_H */
diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h
new file mode 100644
index 000000000000..a7f3c75f8ad7
--- /dev/null
+++ b/arch/x86/include/asm/cpumask.h
@@ -0,0 +1,32 @@
1#ifndef _ASM_X86_CPUMASK_H
2#define _ASM_X86_CPUMASK_H
3#ifndef __ASSEMBLY__
4#include <linux/cpumask.h>
5
6#ifdef CONFIG_X86_64
7
8extern cpumask_var_t cpu_callin_mask;
9extern cpumask_var_t cpu_callout_mask;
10extern cpumask_var_t cpu_initialized_mask;
11extern cpumask_var_t cpu_sibling_setup_mask;
12
13extern void setup_cpu_local_masks(void);
14
15#else /* CONFIG_X86_32 */
16
17extern cpumask_t cpu_callin_map;
18extern cpumask_t cpu_callout_map;
19extern cpumask_t cpu_initialized;
20extern cpumask_t cpu_sibling_setup_map;
21
22#define cpu_callin_mask ((struct cpumask *)&cpu_callin_map)
23#define cpu_callout_mask ((struct cpumask *)&cpu_callout_map)
24#define cpu_initialized_mask ((struct cpumask *)&cpu_initialized)
25#define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map)
26
27static inline void setup_cpu_local_masks(void) { }
28
29#endif /* CONFIG_X86_32 */
30
31#endif /* __ASSEMBLY__ */
32#endif /* _ASM_X86_CPUMASK_H */
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index 0930b4f8d672..c68c361697e1 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -1,39 +1,21 @@
1#ifndef _ASM_X86_CURRENT_H 1#ifndef _ASM_X86_CURRENT_H
2#define _ASM_X86_CURRENT_H 2#define _ASM_X86_CURRENT_H
3 3
4#ifdef CONFIG_X86_32
5#include <linux/compiler.h> 4#include <linux/compiler.h>
6#include <asm/percpu.h> 5#include <asm/percpu.h>
7 6
7#ifndef __ASSEMBLY__
8struct task_struct; 8struct task_struct;
9 9
10DECLARE_PER_CPU(struct task_struct *, current_task); 10DECLARE_PER_CPU(struct task_struct *, current_task);
11static __always_inline struct task_struct *get_current(void)
12{
13 return x86_read_percpu(current_task);
14}
15
16#else /* X86_32 */
17
18#ifndef __ASSEMBLY__
19#include <asm/pda.h>
20
21struct task_struct;
22 11
23static __always_inline struct task_struct *get_current(void) 12static __always_inline struct task_struct *get_current(void)
24{ 13{
25 return read_pda(pcurrent); 14 return percpu_read(current_task);
26} 15}
27 16
28#else /* __ASSEMBLY__ */ 17#define current get_current()
29
30#include <asm/asm-offsets.h>
31#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg
32 18
33#endif /* __ASSEMBLY__ */ 19#endif /* __ASSEMBLY__ */
34 20
35#endif /* X86_32 */
36
37#define current get_current()
38
39#endif /* _ASM_X86_CURRENT_H */ 21#endif /* _ASM_X86_CURRENT_H */
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 4035357f5b9d..132a134d12f2 100644
--- a/arch/x86/include/asm/dma-mapping.h
+++ b/arch/x86/include/asm/dma-mapping.h
@@ -2,8 +2,8 @@
2#define _ASM_X86_DMA_MAPPING_H 2#define _ASM_X86_DMA_MAPPING_H
3 3
4/* 4/*
5 * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for 5 * IOMMU interface. See Documentation/PCI/PCI-DMA-mapping.txt and
6 * documentation. 6 * Documentation/DMA-API.txt for documentation.
7 */ 7 */
8 8
9#include <linux/scatterlist.h> 9#include <linux/scatterlist.h>
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index 2c05b737ee22..4334502d3664 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -138,11 +138,4 @@ struct genapic {
138extern struct genapic *genapic; 138extern struct genapic *genapic;
139extern void es7000_update_genapic_to_cluster(void); 139extern void es7000_update_genapic_to_cluster(void);
140 140
141enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
142#define get_uv_system_type() UV_NONE
143#define is_uv_system() 0
144#define uv_wakeup_secondary(a, b) 1
145#define uv_system_init() do {} while (0)
146
147
148#endif /* _ASM_X86_GENAPIC_32_H */ 141#endif /* _ASM_X86_GENAPIC_32_H */
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h
index adf32fb56aa6..7bb092c59055 100644
--- a/arch/x86/include/asm/genapic_64.h
+++ b/arch/x86/include/asm/genapic_64.h
@@ -51,15 +51,9 @@ extern struct genapic apic_x2apic_phys;
51extern int acpi_madt_oem_check(char *, char *); 51extern int acpi_madt_oem_check(char *, char *);
52 52
53extern void apic_send_IPI_self(int vector); 53extern void apic_send_IPI_self(int vector);
54enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
55extern enum uv_system_type get_uv_system_type(void);
56extern int is_uv_system(void);
57 54
58extern struct genapic apic_x2apic_uv_x; 55extern struct genapic apic_x2apic_uv_x;
59DECLARE_PER_CPU(int, x2apic_extra_bits); 56DECLARE_PER_CPU(int, x2apic_extra_bits);
60extern void uv_cpu_init(void);
61extern void uv_system_init(void);
62extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
63 57
64extern void setup_apic_routing(void); 58extern void setup_apic_routing(void);
65 59
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 000787df66e6..176f058e7159 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -1,11 +1,52 @@
1#ifdef CONFIG_X86_32 1#ifndef _ASM_X86_HARDIRQ_H
2# include "hardirq_32.h" 2#define _ASM_X86_HARDIRQ_H
3#else 3
4# include "hardirq_64.h" 4#include <linux/threads.h>
5#include <linux/irq.h>
6
7typedef struct {
8 unsigned int __softirq_pending;
9 unsigned int __nmi_count; /* arch dependent */
10 unsigned int irq0_irqs;
11#ifdef CONFIG_X86_LOCAL_APIC
12 unsigned int apic_timer_irqs; /* arch dependent */
13 unsigned int irq_spurious_count;
14#endif
15#ifdef CONFIG_SMP
16 unsigned int irq_resched_count;
17 unsigned int irq_call_count;
18 unsigned int irq_tlb_count;
19#endif
20#ifdef CONFIG_X86_MCE
21 unsigned int irq_thermal_count;
22# ifdef CONFIG_X86_64
23 unsigned int irq_threshold_count;
24# endif
5#endif 25#endif
26} ____cacheline_aligned irq_cpustat_t;
27
28DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
29
30/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
31#define MAX_HARDIRQS_PER_CPU NR_VECTORS
32
33#define __ARCH_IRQ_STAT
34
35#define inc_irq_stat(member) percpu_add(irq_stat.member, 1)
36
37#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending)
38
39#define __ARCH_SET_SOFTIRQ_PENDING
40
41#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x))
42#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x))
43
44extern void ack_bad_irq(unsigned int irq);
6 45
7extern u64 arch_irq_stat_cpu(unsigned int cpu); 46extern u64 arch_irq_stat_cpu(unsigned int cpu);
8#define arch_irq_stat_cpu arch_irq_stat_cpu 47#define arch_irq_stat_cpu arch_irq_stat_cpu
9 48
10extern u64 arch_irq_stat(void); 49extern u64 arch_irq_stat(void);
11#define arch_irq_stat arch_irq_stat 50#define arch_irq_stat arch_irq_stat
51
52#endif /* _ASM_X86_HARDIRQ_H */
diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h
deleted file mode 100644
index cf7954d1405f..000000000000
--- a/arch/x86/include/asm/hardirq_32.h
+++ /dev/null
@@ -1,30 +0,0 @@
1#ifndef _ASM_X86_HARDIRQ_32_H
2#define _ASM_X86_HARDIRQ_32_H
3
4#include <linux/threads.h>
5#include <linux/irq.h>
6
7typedef struct {
8 unsigned int __softirq_pending;
9 unsigned long idle_timestamp;
10 unsigned int __nmi_count; /* arch dependent */
11 unsigned int apic_timer_irqs; /* arch dependent */
12 unsigned int irq0_irqs;
13 unsigned int irq_resched_count;
14 unsigned int irq_call_count;
15 unsigned int irq_tlb_count;
16 unsigned int irq_thermal_count;
17 unsigned int irq_spurious_count;
18} ____cacheline_aligned irq_cpustat_t;
19
20DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
21
22#define __ARCH_IRQ_STAT
23#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member)
24
25#define inc_irq_stat(member) (__get_cpu_var(irq_stat).member++)
26
27void ack_bad_irq(unsigned int irq);
28#include <linux/irq_cpustat.h>
29
30#endif /* _ASM_X86_HARDIRQ_32_H */
diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h
deleted file mode 100644
index b5a6b5d56704..000000000000
--- a/arch/x86/include/asm/hardirq_64.h
+++ /dev/null
@@ -1,25 +0,0 @@
1#ifndef _ASM_X86_HARDIRQ_64_H
2#define _ASM_X86_HARDIRQ_64_H
3
4#include <linux/threads.h>
5#include <linux/irq.h>
6#include <asm/pda.h>
7#include <asm/apic.h>
8
9/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
10#define MAX_HARDIRQS_PER_CPU NR_VECTORS
11
12#define __ARCH_IRQ_STAT 1
13
14#define inc_irq_stat(member) add_pda(member, 1)
15
16#define local_softirq_pending() read_pda(__softirq_pending)
17
18#define __ARCH_SET_SOFTIRQ_PENDING 1
19
20#define set_softirq_pending(x) write_pda(__softirq_pending, (x))
21#define or_softirq_pending(x) or_pda(__softirq_pending, (x))
22
23extern void ack_bad_irq(unsigned int irq);
24
25#endif /* _ASM_X86_HARDIRQ_64_H */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 05cfed4485fa..1dbbdf4be9b4 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -99,7 +99,6 @@ extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size);
99 * A boot-time mapping is currently limited to at most 16 pages. 99 * A boot-time mapping is currently limited to at most 16 pages.
100 */ 100 */
101extern void early_ioremap_init(void); 101extern void early_ioremap_init(void);
102extern void early_ioremap_clear(void);
103extern void early_ioremap_reset(void); 102extern void early_ioremap_reset(void);
104extern void __iomem *early_ioremap(unsigned long offset, unsigned long size); 103extern void __iomem *early_ioremap(unsigned long offset, unsigned long size);
105extern void __iomem *early_memremap(unsigned long offset, unsigned long size); 104extern void __iomem *early_memremap(unsigned long offset, unsigned long size);
diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index 7a1f44ac1f17..08ec793aa043 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -114,38 +114,16 @@ struct IR_IO_APIC_route_entry {
114extern int nr_ioapics; 114extern int nr_ioapics;
115extern int nr_ioapic_registers[MAX_IO_APICS]; 115extern int nr_ioapic_registers[MAX_IO_APICS];
116 116
117/*
118 * MP-BIOS irq configuration table structures:
119 */
120
121#define MP_MAX_IOAPIC_PIN 127 117#define MP_MAX_IOAPIC_PIN 127
122 118
123struct mp_config_ioapic {
124 unsigned long mp_apicaddr;
125 unsigned int mp_apicid;
126 unsigned char mp_type;
127 unsigned char mp_apicver;
128 unsigned char mp_flags;
129};
130
131struct mp_config_intsrc {
132 unsigned int mp_dstapic;
133 unsigned char mp_type;
134 unsigned char mp_irqtype;
135 unsigned short mp_irqflag;
136 unsigned char mp_srcbus;
137 unsigned char mp_srcbusirq;
138 unsigned char mp_dstirq;
139};
140
141/* I/O APIC entries */ 119/* I/O APIC entries */
142extern struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; 120extern struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
143 121
144/* # of MP IRQ source entries */ 122/* # of MP IRQ source entries */
145extern int mp_irq_entries; 123extern int mp_irq_entries;
146 124
147/* MP IRQ source entries */ 125/* MP IRQ source entries */
148extern struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; 126extern struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
149 127
150/* non-0 if default (table-less) MP configuration */ 128/* non-0 if default (table-less) MP configuration */
151extern int mpc_default_type; 129extern int mpc_default_type;
diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h
index 89c898ab298b..77843225b7ea 100644
--- a/arch/x86/include/asm/irq_regs.h
+++ b/arch/x86/include/asm/irq_regs.h
@@ -1,5 +1,31 @@
1#ifdef CONFIG_X86_32 1/*
2# include "irq_regs_32.h" 2 * Per-cpu current frame pointer - the location of the last exception frame on
3#else 3 * the stack, stored in the per-cpu area.
4# include "irq_regs_64.h" 4 *
5#endif 5 * Jeremy Fitzhardinge <jeremy@goop.org>
6 */
7#ifndef _ASM_X86_IRQ_REGS_H
8#define _ASM_X86_IRQ_REGS_H
9
10#include <asm/percpu.h>
11
12#define ARCH_HAS_OWN_IRQ_REGS
13
14DECLARE_PER_CPU(struct pt_regs *, irq_regs);
15
16static inline struct pt_regs *get_irq_regs(void)
17{
18 return percpu_read(irq_regs);
19}
20
21static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
22{
23 struct pt_regs *old_regs;
24
25 old_regs = get_irq_regs();
26 percpu_write(irq_regs, new_regs);
27
28 return old_regs;
29}
30
31#endif /* _ASM_X86_IRQ_REGS_32_H */
diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h
deleted file mode 100644
index 86afd7473457..000000000000
--- a/arch/x86/include/asm/irq_regs_32.h
+++ /dev/null
@@ -1,31 +0,0 @@
1/*
2 * Per-cpu current frame pointer - the location of the last exception frame on
3 * the stack, stored in the per-cpu area.
4 *
5 * Jeremy Fitzhardinge <jeremy@goop.org>
6 */
7#ifndef _ASM_X86_IRQ_REGS_32_H
8#define _ASM_X86_IRQ_REGS_32_H
9
10#include <asm/percpu.h>
11
12#define ARCH_HAS_OWN_IRQ_REGS
13
14DECLARE_PER_CPU(struct pt_regs *, irq_regs);
15
16static inline struct pt_regs *get_irq_regs(void)
17{
18 return x86_read_percpu(irq_regs);
19}
20
21static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
22{
23 struct pt_regs *old_regs;
24
25 old_regs = get_irq_regs();
26 x86_write_percpu(irq_regs, new_regs);
27
28 return old_regs;
29}
30
31#endif /* _ASM_X86_IRQ_REGS_32_H */
diff --git a/arch/x86/include/asm/irq_regs_64.h b/arch/x86/include/asm/irq_regs_64.h
deleted file mode 100644
index 3dd9c0b70270..000000000000
--- a/arch/x86/include/asm/irq_regs_64.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/irq_regs.h>
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index f7ff65032b9d..9a83a10a5d51 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -49,31 +49,33 @@
49 * some of the following vectors are 'rare', they are merged 49 * some of the following vectors are 'rare', they are merged
50 * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. 50 * into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
51 * TLB, reschedule and local APIC vectors are performance-critical. 51 * TLB, reschedule and local APIC vectors are performance-critical.
52 *
53 * Vectors 0xf0-0xfa are free (reserved for future Linux use).
54 */ 52 */
55#ifdef CONFIG_X86_32 53#ifdef CONFIG_X86_32
56 54
57# define SPURIOUS_APIC_VECTOR 0xff 55# define SPURIOUS_APIC_VECTOR 0xff
58# define ERROR_APIC_VECTOR 0xfe 56# define ERROR_APIC_VECTOR 0xfe
59# define INVALIDATE_TLB_VECTOR 0xfd 57# define RESCHEDULE_VECTOR 0xfd
60# define RESCHEDULE_VECTOR 0xfc 58# define CALL_FUNCTION_VECTOR 0xfc
61# define CALL_FUNCTION_VECTOR 0xfb 59# define CALL_FUNCTION_SINGLE_VECTOR 0xfb
62# define CALL_FUNCTION_SINGLE_VECTOR 0xfa 60# define THERMAL_APIC_VECTOR 0xfa
63# define THERMAL_APIC_VECTOR 0xf0 61/* 0xf8 - 0xf9 : free */
62# define INVALIDATE_TLB_VECTOR_END 0xf7
63# define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */
64
65# define NUM_INVALIDATE_TLB_VECTORS 8
64 66
65#else 67#else
66 68
67#define SPURIOUS_APIC_VECTOR 0xff 69# define SPURIOUS_APIC_VECTOR 0xff
68#define ERROR_APIC_VECTOR 0xfe 70# define ERROR_APIC_VECTOR 0xfe
69#define RESCHEDULE_VECTOR 0xfd 71# define RESCHEDULE_VECTOR 0xfd
70#define CALL_FUNCTION_VECTOR 0xfc 72# define CALL_FUNCTION_VECTOR 0xfc
71#define CALL_FUNCTION_SINGLE_VECTOR 0xfb 73# define CALL_FUNCTION_SINGLE_VECTOR 0xfb
72#define THERMAL_APIC_VECTOR 0xfa 74# define THERMAL_APIC_VECTOR 0xfa
73#define THRESHOLD_APIC_VECTOR 0xf9 75# define THRESHOLD_APIC_VECTOR 0xf9
74#define UV_BAU_MESSAGE 0xf8 76# define UV_BAU_MESSAGE 0xf8
75#define INVALIDATE_TLB_VECTOR_END 0xf7 77# define INVALIDATE_TLB_VECTOR_END 0xf7
76#define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ 78# define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */
77 79
78#define NUM_INVALIDATE_TLB_VECTORS 8 80#define NUM_INVALIDATE_TLB_VECTORS 8
79 81
@@ -105,6 +107,8 @@
105 107
106#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) 108#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER)
107 109
110#include <asm/apicnum.h> /* need MAX_IO_APICS */
111
108#ifndef CONFIG_SPARSE_IRQ 112#ifndef CONFIG_SPARSE_IRQ
109# if NR_CPUS < MAX_IO_APICS 113# if NR_CPUS < MAX_IO_APICS
110# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) 114# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS))
@@ -112,11 +116,12 @@
112# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) 116# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
113# endif 117# endif
114#else 118#else
115# if (8 * NR_CPUS) > (32 * MAX_IO_APICS) 119
116# define NR_IRQS (NR_VECTORS + (8 * NR_CPUS)) 120# define NR_IRQS \
117# else 121 ((8 * NR_CPUS) > (32 * MAX_IO_APICS) ? \
118# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) 122 (NR_VECTORS + (8 * NR_CPUS)) : \
119# endif 123 (NR_VECTORS + (32 * MAX_IO_APICS))) \
124
120#endif 125#endif
121 126
122#elif defined(CONFIG_X86_VOYAGER) 127#elif defined(CONFIG_X86_VOYAGER)
diff --git a/arch/x86/include/asm/mach-default/entry_arch.h b/arch/x86/include/asm/mach-default/entry_arch.h
index 6b1add8e31dd..6fa399ad1de2 100644
--- a/arch/x86/include/asm/mach-default/entry_arch.h
+++ b/arch/x86/include/asm/mach-default/entry_arch.h
@@ -11,10 +11,26 @@
11 */ 11 */
12#ifdef CONFIG_X86_SMP 12#ifdef CONFIG_X86_SMP
13BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) 13BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
14BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
15BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) 14BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
16BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) 15BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
17BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) 16BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
17
18BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0,
19 smp_invalidate_interrupt)
20BUILD_INTERRUPT3(invalidate_interrupt1,INVALIDATE_TLB_VECTOR_START+1,
21 smp_invalidate_interrupt)
22BUILD_INTERRUPT3(invalidate_interrupt2,INVALIDATE_TLB_VECTOR_START+2,
23 smp_invalidate_interrupt)
24BUILD_INTERRUPT3(invalidate_interrupt3,INVALIDATE_TLB_VECTOR_START+3,
25 smp_invalidate_interrupt)
26BUILD_INTERRUPT3(invalidate_interrupt4,INVALIDATE_TLB_VECTOR_START+4,
27 smp_invalidate_interrupt)
28BUILD_INTERRUPT3(invalidate_interrupt5,INVALIDATE_TLB_VECTOR_START+5,
29 smp_invalidate_interrupt)
30BUILD_INTERRUPT3(invalidate_interrupt6,INVALIDATE_TLB_VECTOR_START+6,
31 smp_invalidate_interrupt)
32BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7,
33 smp_invalidate_interrupt)
18#endif 34#endif
19 35
20/* 36/*
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 8aeeb3fd73db..52948df9cd1d 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -21,11 +21,54 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
21int init_new_context(struct task_struct *tsk, struct mm_struct *mm); 21int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
22void destroy_context(struct mm_struct *mm); 22void destroy_context(struct mm_struct *mm);
23 23
24#ifdef CONFIG_X86_32 24
25# include "mmu_context_32.h" 25static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
26#else 26{
27# include "mmu_context_64.h" 27#ifdef CONFIG_SMP
28 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
29 percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
30#endif
31}
32
33static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
34 struct task_struct *tsk)
35{
36 unsigned cpu = smp_processor_id();
37
38 if (likely(prev != next)) {
39 /* stop flush ipis for the previous mm */
40 cpu_clear(cpu, prev->cpu_vm_mask);
41#ifdef CONFIG_SMP
42 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
43 percpu_write(cpu_tlbstate.active_mm, next);
28#endif 44#endif
45 cpu_set(cpu, next->cpu_vm_mask);
46
47 /* Re-load page tables */
48 load_cr3(next->pgd);
49
50 /*
51 * load the LDT, if the LDT is different:
52 */
53 if (unlikely(prev->context.ldt != next->context.ldt))
54 load_LDT_nolock(&next->context);
55 }
56#ifdef CONFIG_SMP
57 else {
58 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
59 BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
60
61 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
62 /* We were in lazy tlb mode and leave_mm disabled
63 * tlb flush IPI delivery. We must reload CR3
64 * to make sure to use no freed page tables.
65 */
66 load_cr3(next->pgd);
67 load_LDT_nolock(&next->context);
68 }
69 }
70#endif
71}
29 72
30#define activate_mm(prev, next) \ 73#define activate_mm(prev, next) \
31do { \ 74do { \
@@ -33,5 +76,17 @@ do { \
33 switch_mm((prev), (next), NULL); \ 76 switch_mm((prev), (next), NULL); \
34} while (0); 77} while (0);
35 78
79#ifdef CONFIG_X86_32
80#define deactivate_mm(tsk, mm) \
81do { \
82 loadsegment(gs, 0); \
83} while (0)
84#else
85#define deactivate_mm(tsk, mm) \
86do { \
87 load_gs_index(0); \
88 loadsegment(fs, 0); \
89} while (0)
90#endif
36 91
37#endif /* _ASM_X86_MMU_CONTEXT_H */ 92#endif /* _ASM_X86_MMU_CONTEXT_H */
diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h
deleted file mode 100644
index 7e98ce1d2c0e..000000000000
--- a/arch/x86/include/asm/mmu_context_32.h
+++ /dev/null
@@ -1,55 +0,0 @@
1#ifndef _ASM_X86_MMU_CONTEXT_32_H
2#define _ASM_X86_MMU_CONTEXT_32_H
3
4static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
5{
6#ifdef CONFIG_SMP
7 if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK)
8 x86_write_percpu(cpu_tlbstate.state, TLBSTATE_LAZY);
9#endif
10}
11
12static inline void switch_mm(struct mm_struct *prev,
13 struct mm_struct *next,
14 struct task_struct *tsk)
15{
16 int cpu = smp_processor_id();
17
18 if (likely(prev != next)) {
19 /* stop flush ipis for the previous mm */
20 cpu_clear(cpu, prev->cpu_vm_mask);
21#ifdef CONFIG_SMP
22 x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK);
23 x86_write_percpu(cpu_tlbstate.active_mm, next);
24#endif
25 cpu_set(cpu, next->cpu_vm_mask);
26
27 /* Re-load page tables */
28 load_cr3(next->pgd);
29
30 /*
31 * load the LDT, if the LDT is different:
32 */
33 if (unlikely(prev->context.ldt != next->context.ldt))
34 load_LDT_nolock(&next->context);
35 }
36#ifdef CONFIG_SMP
37 else {
38 x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK);
39 BUG_ON(x86_read_percpu(cpu_tlbstate.active_mm) != next);
40
41 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
42 /* We were in lazy tlb mode and leave_mm disabled
43 * tlb flush IPI delivery. We must reload %cr3.
44 */
45 load_cr3(next->pgd);
46 load_LDT_nolock(&next->context);
47 }
48 }
49#endif
50}
51
52#define deactivate_mm(tsk, mm) \
53 asm("movl %0,%%gs": :"r" (0));
54
55#endif /* _ASM_X86_MMU_CONTEXT_32_H */
diff --git a/arch/x86/include/asm/mmu_context_64.h b/arch/x86/include/asm/mmu_context_64.h
deleted file mode 100644
index 677d36e9540a..000000000000
--- a/arch/x86/include/asm/mmu_context_64.h
+++ /dev/null
@@ -1,54 +0,0 @@
1#ifndef _ASM_X86_MMU_CONTEXT_64_H
2#define _ASM_X86_MMU_CONTEXT_64_H
3
4#include <asm/pda.h>
5
6static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
7{
8#ifdef CONFIG_SMP
9 if (read_pda(mmu_state) == TLBSTATE_OK)
10 write_pda(mmu_state, TLBSTATE_LAZY);
11#endif
12}
13
14static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
15 struct task_struct *tsk)
16{
17 unsigned cpu = smp_processor_id();
18 if (likely(prev != next)) {
19 /* stop flush ipis for the previous mm */
20 cpu_clear(cpu, prev->cpu_vm_mask);
21#ifdef CONFIG_SMP
22 write_pda(mmu_state, TLBSTATE_OK);
23 write_pda(active_mm, next);
24#endif
25 cpu_set(cpu, next->cpu_vm_mask);
26 load_cr3(next->pgd);
27
28 if (unlikely(next->context.ldt != prev->context.ldt))
29 load_LDT_nolock(&next->context);
30 }
31#ifdef CONFIG_SMP
32 else {
33 write_pda(mmu_state, TLBSTATE_OK);
34 if (read_pda(active_mm) != next)
35 BUG();
36 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
37 /* We were in lazy tlb mode and leave_mm disabled
38 * tlb flush IPI delivery. We must reload CR3
39 * to make sure to use no freed page tables.
40 */
41 load_cr3(next->pgd);
42 load_LDT_nolock(&next->context);
43 }
44 }
45#endif
46}
47
48#define deactivate_mm(tsk, mm) \
49do { \
50 load_gs_index(0); \
51 asm volatile("movl %0,%%fs"::"r"(0)); \
52} while (0)
53
54#endif /* _ASM_X86_MMU_CONTEXT_64_H */
diff --git a/arch/x86/include/asm/mpspec_def.h b/arch/x86/include/asm/mpspec_def.h
index 59568bc4767f..4a7f96d7c188 100644
--- a/arch/x86/include/asm/mpspec_def.h
+++ b/arch/x86/include/asm/mpspec_def.h
@@ -24,17 +24,18 @@
24# endif 24# endif
25#endif 25#endif
26 26
27struct intel_mp_floating { 27/* Intel MP Floating Pointer Structure */
28 char mpf_signature[4]; /* "_MP_" */ 28struct mpf_intel {
29 unsigned int mpf_physptr; /* Configuration table address */ 29 char signature[4]; /* "_MP_" */
30 unsigned char mpf_length; /* Our length (paragraphs) */ 30 unsigned int physptr; /* Configuration table address */
31 unsigned char mpf_specification;/* Specification version */ 31 unsigned char length; /* Our length (paragraphs) */
32 unsigned char mpf_checksum; /* Checksum (makes sum 0) */ 32 unsigned char specification; /* Specification version */
33 unsigned char mpf_feature1; /* Standard or configuration ? */ 33 unsigned char checksum; /* Checksum (makes sum 0) */
34 unsigned char mpf_feature2; /* Bit7 set for IMCR|PIC */ 34 unsigned char feature1; /* Standard or configuration ? */
35 unsigned char mpf_feature3; /* Unused (0) */ 35 unsigned char feature2; /* Bit7 set for IMCR|PIC */
36 unsigned char mpf_feature4; /* Unused (0) */ 36 unsigned char feature3; /* Unused (0) */
37 unsigned char mpf_feature5; /* Unused (0) */ 37 unsigned char feature4; /* Unused (0) */
38 unsigned char feature5; /* Unused (0) */
38}; 39};
39 40
40#define MPC_SIGNATURE "PCMP" 41#define MPC_SIGNATURE "PCMP"
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index cb58643947b9..358acc59ae04 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -202,6 +202,35 @@
202#define MSR_IA32_THERM_STATUS 0x0000019c 202#define MSR_IA32_THERM_STATUS 0x0000019c
203#define MSR_IA32_MISC_ENABLE 0x000001a0 203#define MSR_IA32_MISC_ENABLE 0x000001a0
204 204
205/* MISC_ENABLE bits: architectural */
206#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0)
207#define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1)
208#define MSR_IA32_MISC_ENABLE_EMON (1ULL << 7)
209#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << 11)
210#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << 12)
211#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << 16)
212#define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << 18)
213#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << 22)
214#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << 23)
215#define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << 34)
216
217/* MISC_ENABLE bits: model-specific, meaning may vary from core to core */
218#define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << 2)
219#define MSR_IA32_MISC_ENABLE_TM1 (1ULL << 3)
220#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << 4)
221#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << 6)
222#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << 8)
223#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << 9)
224#define MSR_IA32_MISC_ENABLE_FERR (1ULL << 10)
225#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << 10)
226#define MSR_IA32_MISC_ENABLE_TM2 (1ULL << 13)
227#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << 19)
228#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << 20)
229#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << 24)
230#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << 37)
231#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << 38)
232#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << 39)
233
205/* Intel Model 6 */ 234/* Intel Model 6 */
206#define MSR_P6_EVNTSEL0 0x00000186 235#define MSR_P6_EVNTSEL0 0x00000186
207#define MSR_P6_EVNTSEL1 0x00000187 236#define MSR_P6_EVNTSEL1 0x00000187
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 5ebca29f44f0..e27fdbe5f9e4 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -13,8 +13,8 @@
13#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1) 13#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
14#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER) 14#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
15 15
16#define IRQSTACK_ORDER 2 16#define IRQ_STACK_ORDER 2
17#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER) 17#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
18 18
19#define STACKFAULT_STACK 1 19#define STACKFAULT_STACK 1
20#define DOUBLEFAULT_STACK 2 20#define DOUBLEFAULT_STACK 2
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index e25c410f3d8c..175778887090 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -244,7 +244,8 @@ struct pv_mmu_ops {
244 void (*flush_tlb_user)(void); 244 void (*flush_tlb_user)(void);
245 void (*flush_tlb_kernel)(void); 245 void (*flush_tlb_kernel)(void);
246 void (*flush_tlb_single)(unsigned long addr); 246 void (*flush_tlb_single)(unsigned long addr);
247 void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, 247 void (*flush_tlb_others)(const struct cpumask *cpus,
248 struct mm_struct *mm,
248 unsigned long va); 249 unsigned long va);
249 250
250 /* Hooks for allocating and freeing a pagetable top-level */ 251 /* Hooks for allocating and freeing a pagetable top-level */
@@ -983,10 +984,11 @@ static inline void __flush_tlb_single(unsigned long addr)
983 PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr); 984 PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr);
984} 985}
985 986
986static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, 987static inline void flush_tlb_others(const struct cpumask *cpumask,
988 struct mm_struct *mm,
987 unsigned long va) 989 unsigned long va)
988{ 990{
989 PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va); 991 PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, cpumask, mm, va);
990} 992}
991 993
992static inline int paravirt_pgd_alloc(struct mm_struct *mm) 994static inline int paravirt_pgd_alloc(struct mm_struct *mm)
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
deleted file mode 100644
index 2fbfff88df37..000000000000
--- a/arch/x86/include/asm/pda.h
+++ /dev/null
@@ -1,137 +0,0 @@
1#ifndef _ASM_X86_PDA_H
2#define _ASM_X86_PDA_H
3
4#ifndef __ASSEMBLY__
5#include <linux/stddef.h>
6#include <linux/types.h>
7#include <linux/cache.h>
8#include <asm/page.h>
9
10/* Per processor datastructure. %gs points to it while the kernel runs */
11struct x8664_pda {
12 struct task_struct *pcurrent; /* 0 Current process */
13 unsigned long data_offset; /* 8 Per cpu data offset from linker
14 address */
15 unsigned long kernelstack; /* 16 top of kernel stack for current */
16 unsigned long oldrsp; /* 24 user rsp for system call */
17 int irqcount; /* 32 Irq nesting counter. Starts -1 */
18 unsigned int cpunumber; /* 36 Logical CPU number */
19#ifdef CONFIG_CC_STACKPROTECTOR
20 unsigned long stack_canary; /* 40 stack canary value */
21 /* gcc-ABI: this canary MUST be at
22 offset 40!!! */
23#endif
24 char *irqstackptr;
25 short nodenumber; /* number of current node (32k max) */
26 short in_bootmem; /* pda lives in bootmem */
27 unsigned int __softirq_pending;
28 unsigned int __nmi_count; /* number of NMI on this CPUs */
29 short mmu_state;
30 short isidle;
31 struct mm_struct *active_mm;
32 unsigned apic_timer_irqs;
33 unsigned irq0_irqs;
34 unsigned irq_resched_count;
35 unsigned irq_call_count;
36 unsigned irq_tlb_count;
37 unsigned irq_thermal_count;
38 unsigned irq_threshold_count;
39 unsigned irq_spurious_count;
40} ____cacheline_aligned_in_smp;
41
42extern struct x8664_pda **_cpu_pda;
43extern void pda_init(int);
44
45#define cpu_pda(i) (_cpu_pda[i])
46
47/*
48 * There is no fast way to get the base address of the PDA, all the accesses
49 * have to mention %fs/%gs. So it needs to be done this Torvaldian way.
50 */
51extern void __bad_pda_field(void) __attribute__((noreturn));
52
53/*
54 * proxy_pda doesn't actually exist, but tell gcc it is accessed for
55 * all PDA accesses so it gets read/write dependencies right.
56 */
57extern struct x8664_pda _proxy_pda;
58
59#define pda_offset(field) offsetof(struct x8664_pda, field)
60
61#define pda_to_op(op, field, val) \
62do { \
63 typedef typeof(_proxy_pda.field) T__; \
64 if (0) { T__ tmp__; tmp__ = (val); } /* type checking */ \
65 switch (sizeof(_proxy_pda.field)) { \
66 case 2: \
67 asm(op "w %1,%%gs:%c2" : \
68 "+m" (_proxy_pda.field) : \
69 "ri" ((T__)val), \
70 "i"(pda_offset(field))); \
71 break; \
72 case 4: \
73 asm(op "l %1,%%gs:%c2" : \
74 "+m" (_proxy_pda.field) : \
75 "ri" ((T__)val), \
76 "i" (pda_offset(field))); \
77 break; \
78 case 8: \
79 asm(op "q %1,%%gs:%c2": \
80 "+m" (_proxy_pda.field) : \
81 "ri" ((T__)val), \
82 "i"(pda_offset(field))); \
83 break; \
84 default: \
85 __bad_pda_field(); \
86 } \
87} while (0)
88
89#define pda_from_op(op, field) \
90({ \
91 typeof(_proxy_pda.field) ret__; \
92 switch (sizeof(_proxy_pda.field)) { \
93 case 2: \
94 asm(op "w %%gs:%c1,%0" : \
95 "=r" (ret__) : \
96 "i" (pda_offset(field)), \
97 "m" (_proxy_pda.field)); \
98 break; \
99 case 4: \
100 asm(op "l %%gs:%c1,%0": \
101 "=r" (ret__): \
102 "i" (pda_offset(field)), \
103 "m" (_proxy_pda.field)); \
104 break; \
105 case 8: \
106 asm(op "q %%gs:%c1,%0": \
107 "=r" (ret__) : \
108 "i" (pda_offset(field)), \
109 "m" (_proxy_pda.field)); \
110 break; \
111 default: \
112 __bad_pda_field(); \
113 } \
114 ret__; \
115})
116
117#define read_pda(field) pda_from_op("mov", field)
118#define write_pda(field, val) pda_to_op("mov", field, val)
119#define add_pda(field, val) pda_to_op("add", field, val)
120#define sub_pda(field, val) pda_to_op("sub", field, val)
121#define or_pda(field, val) pda_to_op("or", field, val)
122
123/* This is not atomic against other CPUs -- CPU preemption needs to be off */
124#define test_and_clear_bit_pda(bit, field) \
125({ \
126 int old__; \
127 asm volatile("btr %2,%%gs:%c3\n\tsbbl %0,%0" \
128 : "=r" (old__), "+m" (_proxy_pda.field) \
129 : "dIr" (bit), "i" (pda_offset(field)) : "memory");\
130 old__; \
131})
132
133#endif
134
135#define PDA_STACKOFFSET (5*8)
136
137#endif /* _ASM_X86_PDA_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index ece72053ba63..0b64af4f13ac 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -2,53 +2,12 @@
2#define _ASM_X86_PERCPU_H 2#define _ASM_X86_PERCPU_H
3 3
4#ifdef CONFIG_X86_64 4#ifdef CONFIG_X86_64
5#include <linux/compiler.h> 5#define __percpu_seg gs
6 6#define __percpu_mov_op movq
7/* Same as asm-generic/percpu.h, except that we store the per cpu offset 7#else
8 in the PDA. Longer term the PDA and every per cpu variable 8#define __percpu_seg fs
9 should be just put into a single section and referenced directly 9#define __percpu_mov_op movl
10 from %gs */
11
12#ifdef CONFIG_SMP
13#include <asm/pda.h>
14
15#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
16#define __my_cpu_offset read_pda(data_offset)
17
18#define per_cpu_offset(x) (__per_cpu_offset(x))
19
20#endif 10#endif
21#include <asm-generic/percpu.h>
22
23DECLARE_PER_CPU(struct x8664_pda, pda);
24
25/*
26 * These are supposed to be implemented as a single instruction which
27 * operates on the per-cpu data base segment. x86-64 doesn't have
28 * that yet, so this is a fairly inefficient workaround for the
29 * meantime. The single instruction is atomic with respect to
30 * preemption and interrupts, so we need to explicitly disable
31 * interrupts here to achieve the same effect. However, because it
32 * can be used from within interrupt-disable/enable, we can't actually
33 * disable interrupts; disabling preemption is enough.
34 */
35#define x86_read_percpu(var) \
36 ({ \
37 typeof(per_cpu_var(var)) __tmp; \
38 preempt_disable(); \
39 __tmp = __get_cpu_var(var); \
40 preempt_enable(); \
41 __tmp; \
42 })
43
44#define x86_write_percpu(var, val) \
45 do { \
46 preempt_disable(); \
47 __get_cpu_var(var) = (val); \
48 preempt_enable(); \
49 } while(0)
50
51#else /* CONFIG_X86_64 */
52 11
53#ifdef __ASSEMBLY__ 12#ifdef __ASSEMBLY__
54 13
@@ -65,47 +24,26 @@ DECLARE_PER_CPU(struct x8664_pda, pda);
65 * PER_CPU(cpu_gdt_descr, %ebx) 24 * PER_CPU(cpu_gdt_descr, %ebx)
66 */ 25 */
67#ifdef CONFIG_SMP 26#ifdef CONFIG_SMP
68#define PER_CPU(var, reg) \ 27#define PER_CPU(var, reg) \
69 movl %fs:per_cpu__##this_cpu_off, reg; \ 28 __percpu_mov_op %__percpu_seg:per_cpu__this_cpu_off, reg; \
70 lea per_cpu__##var(reg), reg 29 lea per_cpu__##var(reg), reg
71#define PER_CPU_VAR(var) %fs:per_cpu__##var 30#define PER_CPU_VAR(var) %__percpu_seg:per_cpu__##var
72#else /* ! SMP */ 31#else /* ! SMP */
73#define PER_CPU(var, reg) \ 32#define PER_CPU(var, reg) \
74 movl $per_cpu__##var, reg 33 __percpu_mov_op $per_cpu__##var, reg
75#define PER_CPU_VAR(var) per_cpu__##var 34#define PER_CPU_VAR(var) per_cpu__##var
76#endif /* SMP */ 35#endif /* SMP */
77 36
78#else /* ...!ASSEMBLY */ 37#else /* ...!ASSEMBLY */
79 38
80/* 39#include <linux/stringify.h>
81 * PER_CPU finds an address of a per-cpu variable.
82 *
83 * Args:
84 * var - variable name
85 * cpu - 32bit register containing the current CPU number
86 *
87 * The resulting address is stored in the "cpu" argument.
88 *
89 * Example:
90 * PER_CPU(cpu_gdt_descr, %ebx)
91 */
92#ifdef CONFIG_SMP
93
94#define __my_cpu_offset x86_read_percpu(this_cpu_off)
95 40
96/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */ 41#ifdef CONFIG_SMP
97#define __percpu_seg "%%fs:" 42#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x
98 43#define __my_cpu_offset percpu_read(this_cpu_off)
99#else /* !SMP */ 44#else
100 45#define __percpu_arg(x) "%" #x
101#define __percpu_seg "" 46#endif
102
103#endif /* SMP */
104
105#include <asm-generic/percpu.h>
106
107/* We can use this directly for local CPU (faster). */
108DECLARE_PER_CPU(unsigned long, this_cpu_off);
109 47
110/* For arch-specific code, we can use direct single-insn ops (they 48/* For arch-specific code, we can use direct single-insn ops (they
111 * don't give an lvalue though). */ 49 * don't give an lvalue though). */
@@ -120,20 +58,25 @@ do { \
120 } \ 58 } \
121 switch (sizeof(var)) { \ 59 switch (sizeof(var)) { \
122 case 1: \ 60 case 1: \
123 asm(op "b %1,"__percpu_seg"%0" \ 61 asm(op "b %1,"__percpu_arg(0) \
124 : "+m" (var) \ 62 : "+m" (var) \
125 : "ri" ((T__)val)); \ 63 : "ri" ((T__)val)); \
126 break; \ 64 break; \
127 case 2: \ 65 case 2: \
128 asm(op "w %1,"__percpu_seg"%0" \ 66 asm(op "w %1,"__percpu_arg(0) \
129 : "+m" (var) \ 67 : "+m" (var) \
130 : "ri" ((T__)val)); \ 68 : "ri" ((T__)val)); \
131 break; \ 69 break; \
132 case 4: \ 70 case 4: \
133 asm(op "l %1,"__percpu_seg"%0" \ 71 asm(op "l %1,"__percpu_arg(0) \
134 : "+m" (var) \ 72 : "+m" (var) \
135 : "ri" ((T__)val)); \ 73 : "ri" ((T__)val)); \
136 break; \ 74 break; \
75 case 8: \
76 asm(op "q %1,"__percpu_arg(0) \
77 : "+m" (var) \
78 : "re" ((T__)val)); \
79 break; \
137 default: __bad_percpu_size(); \ 80 default: __bad_percpu_size(); \
138 } \ 81 } \
139} while (0) 82} while (0)
@@ -143,17 +86,22 @@ do { \
143 typeof(var) ret__; \ 86 typeof(var) ret__; \
144 switch (sizeof(var)) { \ 87 switch (sizeof(var)) { \
145 case 1: \ 88 case 1: \
146 asm(op "b "__percpu_seg"%1,%0" \ 89 asm(op "b "__percpu_arg(1)",%0" \
147 : "=r" (ret__) \ 90 : "=r" (ret__) \
148 : "m" (var)); \ 91 : "m" (var)); \
149 break; \ 92 break; \
150 case 2: \ 93 case 2: \
151 asm(op "w "__percpu_seg"%1,%0" \ 94 asm(op "w "__percpu_arg(1)",%0" \
152 : "=r" (ret__) \ 95 : "=r" (ret__) \
153 : "m" (var)); \ 96 : "m" (var)); \
154 break; \ 97 break; \
155 case 4: \ 98 case 4: \
156 asm(op "l "__percpu_seg"%1,%0" \ 99 asm(op "l "__percpu_arg(1)",%0" \
100 : "=r" (ret__) \
101 : "m" (var)); \
102 break; \
103 case 8: \
104 asm(op "q "__percpu_arg(1)",%0" \
157 : "=r" (ret__) \ 105 : "=r" (ret__) \
158 : "m" (var)); \ 106 : "m" (var)); \
159 break; \ 107 break; \
@@ -162,13 +110,30 @@ do { \
162 ret__; \ 110 ret__; \
163}) 111})
164 112
165#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var) 113#define percpu_read(var) percpu_from_op("mov", per_cpu__##var)
166#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu__##var, val) 114#define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val)
167#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val) 115#define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val)
168#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val) 116#define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val)
169#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) 117#define percpu_and(var, val) percpu_to_op("and", per_cpu__##var, val)
118#define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val)
119#define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val)
120
121/* This is not atomic against other CPUs -- CPU preemption needs to be off */
122#define x86_test_and_clear_bit_percpu(bit, var) \
123({ \
124 int old__; \
125 asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0" \
126 : "=r" (old__), "+m" (per_cpu__##var) \
127 : "dIr" (bit)); \
128 old__; \
129})
130
131#include <asm-generic/percpu.h>
132
133/* We can use this directly for local CPU (faster). */
134DECLARE_PER_CPU(unsigned long, this_cpu_off);
135
170#endif /* !__ASSEMBLY__ */ 136#endif /* !__ASSEMBLY__ */
171#endif /* !CONFIG_X86_64 */
172 137
173#ifdef CONFIG_SMP 138#ifdef CONFIG_SMP
174 139
@@ -195,9 +160,9 @@ do { \
195#define early_per_cpu_ptr(_name) (_name##_early_ptr) 160#define early_per_cpu_ptr(_name) (_name##_early_ptr)
196#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx]) 161#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx])
197#define early_per_cpu(_name, _cpu) \ 162#define early_per_cpu(_name, _cpu) \
198 (early_per_cpu_ptr(_name) ? \ 163 *(early_per_cpu_ptr(_name) ? \
199 early_per_cpu_ptr(_name)[_cpu] : \ 164 &early_per_cpu_ptr(_name)[_cpu] : \
200 per_cpu(_name, _cpu)) 165 &per_cpu(_name, _cpu))
201 166
202#else /* !CONFIG_SMP */ 167#else /* !CONFIG_SMP */
203#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ 168#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index cb7c151a8bff..dd14c54ac718 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -42,6 +42,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
42 42
43static inline void pte_free(struct mm_struct *mm, struct page *pte) 43static inline void pte_free(struct mm_struct *mm, struct page *pte)
44{ 44{
45 pgtable_page_dtor(pte);
45 __free_page(pte); 46 __free_page(pte);
46} 47}
47 48
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index ba09289accaa..1df9637dfda3 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -11,7 +11,6 @@
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <linux/bitops.h> 12#include <linux/bitops.h>
13#include <linux/threads.h> 13#include <linux/threads.h>
14#include <asm/pda.h>
15 14
16extern pud_t level3_kernel_pgt[512]; 15extern pud_t level3_kernel_pgt[512];
17extern pud_t level3_ident_pgt[512]; 16extern pud_t level3_ident_pgt[512];
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 091cd8855f2e..befa20b4a68c 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -378,6 +378,22 @@ union thread_xstate {
378 378
379#ifdef CONFIG_X86_64 379#ifdef CONFIG_X86_64
380DECLARE_PER_CPU(struct orig_ist, orig_ist); 380DECLARE_PER_CPU(struct orig_ist, orig_ist);
381
382union irq_stack_union {
383 char irq_stack[IRQ_STACK_SIZE];
384 /*
385 * GCC hardcodes the stack canary as %gs:40. Since the
386 * irq_stack is the object at %gs:0, we reserve the bottom
387 * 48 bytes of the irq stack for the canary.
388 */
389 struct {
390 char gs_base[40];
391 unsigned long stack_canary;
392 };
393};
394
395DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
396DECLARE_PER_CPU(char *, irq_stack_ptr);
381#endif 397#endif
382 398
383extern void print_cpu_info(struct cpuinfo_x86 *); 399extern void print_cpu_info(struct cpuinfo_x86 *);
@@ -754,7 +770,6 @@ extern struct desc_ptr early_gdt_descr;
754extern void cpu_set_gdt(int); 770extern void cpu_set_gdt(int);
755extern void switch_to_new_gdt(void); 771extern void switch_to_new_gdt(void);
756extern void cpu_init(void); 772extern void cpu_init(void);
757extern void init_gdt(int cpu);
758 773
759static inline unsigned long get_debugctlmsr(void) 774static inline unsigned long get_debugctlmsr(void)
760{ 775{
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index ebe858cdc8a3..536949749bc2 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -100,7 +100,6 @@ extern unsigned long init_pg_tables_start;
100extern unsigned long init_pg_tables_end; 100extern unsigned long init_pg_tables_end;
101 101
102#else 102#else
103void __init x86_64_init_pda(void);
104void __init x86_64_start_kernel(char *real_mode); 103void __init x86_64_start_kernel(char *real_mode);
105void __init x86_64_start_reservations(char *real_mode_data); 104void __init x86_64_start_reservations(char *real_mode_data);
106 105
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index 19953df61c52..45ef8a1b9d7c 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -15,34 +15,8 @@
15# include <asm/io_apic.h> 15# include <asm/io_apic.h>
16# endif 16# endif
17#endif 17#endif
18#include <asm/pda.h>
19#include <asm/thread_info.h> 18#include <asm/thread_info.h>
20 19#include <asm/cpumask.h>
21#ifdef CONFIG_X86_64
22
23extern cpumask_var_t cpu_callin_mask;
24extern cpumask_var_t cpu_callout_mask;
25extern cpumask_var_t cpu_initialized_mask;
26extern cpumask_var_t cpu_sibling_setup_mask;
27
28#else /* CONFIG_X86_32 */
29
30extern cpumask_t cpu_callin_map;
31extern cpumask_t cpu_callout_map;
32extern cpumask_t cpu_initialized;
33extern cpumask_t cpu_sibling_setup_map;
34
35#define cpu_callin_mask ((struct cpumask *)&cpu_callin_map)
36#define cpu_callout_mask ((struct cpumask *)&cpu_callout_map)
37#define cpu_initialized_mask ((struct cpumask *)&cpu_initialized)
38#define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map)
39
40#endif /* CONFIG_X86_32 */
41
42extern void (*mtrr_hook)(void);
43extern void zap_low_mappings(void);
44
45extern int __cpuinit get_local_pda(int cpu);
46 20
47extern int smp_num_siblings; 21extern int smp_num_siblings;
48extern unsigned int num_processors; 22extern unsigned int num_processors;
@@ -50,9 +24,7 @@ extern unsigned int num_processors;
50DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); 24DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
51DECLARE_PER_CPU(cpumask_t, cpu_core_map); 25DECLARE_PER_CPU(cpumask_t, cpu_core_map);
52DECLARE_PER_CPU(u16, cpu_llc_id); 26DECLARE_PER_CPU(u16, cpu_llc_id);
53#ifdef CONFIG_X86_32
54DECLARE_PER_CPU(int, cpu_number); 27DECLARE_PER_CPU(int, cpu_number);
55#endif
56 28
57static inline struct cpumask *cpu_sibling_mask(int cpu) 29static inline struct cpumask *cpu_sibling_mask(int cpu)
58{ 30{
@@ -167,8 +139,6 @@ void play_dead_common(void);
167void native_send_call_func_ipi(const struct cpumask *mask); 139void native_send_call_func_ipi(const struct cpumask *mask);
168void native_send_call_func_single_ipi(int cpu); 140void native_send_call_func_single_ipi(int cpu);
169 141
170extern void prefill_possible_map(void);
171
172void smp_store_cpu_info(int id); 142void smp_store_cpu_info(int id);
173#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu) 143#define cpu_physical_id(cpu) per_cpu(x86_cpu_to_apicid, cpu)
174 144
@@ -177,10 +147,6 @@ static inline int num_booting_cpus(void)
177{ 147{
178 return cpumask_weight(cpu_callout_mask); 148 return cpumask_weight(cpu_callout_mask);
179} 149}
180#else
181static inline void prefill_possible_map(void)
182{
183}
184#endif /* CONFIG_SMP */ 150#endif /* CONFIG_SMP */
185 151
186extern unsigned disabled_cpus __cpuinitdata; 152extern unsigned disabled_cpus __cpuinitdata;
@@ -191,11 +157,11 @@ extern unsigned disabled_cpus __cpuinitdata;
191 * from the initial startup. We map APIC_BASE very early in page_setup(), 157 * from the initial startup. We map APIC_BASE very early in page_setup(),
192 * so this is correct in the x86 case. 158 * so this is correct in the x86 case.
193 */ 159 */
194#define raw_smp_processor_id() (x86_read_percpu(cpu_number)) 160#define raw_smp_processor_id() (percpu_read(cpu_number))
195extern int safe_smp_processor_id(void); 161extern int safe_smp_processor_id(void);
196 162
197#elif defined(CONFIG_X86_64_SMP) 163#elif defined(CONFIG_X86_64_SMP)
198#define raw_smp_processor_id() read_pda(cpunumber) 164#define raw_smp_processor_id() (percpu_read(cpu_number))
199 165
200#define stack_smp_processor_id() \ 166#define stack_smp_processor_id() \
201({ \ 167({ \
@@ -205,10 +171,6 @@ extern int safe_smp_processor_id(void);
205}) 171})
206#define safe_smp_processor_id() smp_processor_id() 172#define safe_smp_processor_id() smp_processor_id()
207 173
208#else /* !CONFIG_X86_32_SMP && !CONFIG_X86_64_SMP */
209#define cpu_physical_id(cpu) boot_cpu_physical_apicid
210#define safe_smp_processor_id() 0
211#define stack_smp_processor_id() 0
212#endif 174#endif
213 175
214#ifdef CONFIG_X86_LOCAL_APIC 176#ifdef CONFIG_X86_LOCAL_APIC
@@ -251,11 +213,5 @@ static inline int hard_smp_processor_id(void)
251 213
252#endif /* CONFIG_X86_LOCAL_APIC */ 214#endif /* CONFIG_X86_LOCAL_APIC */
253 215
254#ifdef CONFIG_X86_HAS_BOOT_CPU_ID
255extern unsigned char boot_cpu_id;
256#else
257#define boot_cpu_id 0
258#endif
259
260#endif /* __ASSEMBLY__ */ 216#endif /* __ASSEMBLY__ */
261#endif /* _ASM_X86_SMP_H */ 217#endif /* _ASM_X86_SMP_H */
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
new file mode 100644
index 000000000000..36a700acaf2b
--- /dev/null
+++ b/arch/x86/include/asm/stackprotector.h
@@ -0,0 +1,38 @@
1#ifndef _ASM_STACKPROTECTOR_H
2#define _ASM_STACKPROTECTOR_H 1
3
4#include <asm/tsc.h>
5#include <asm/processor.h>
6
7/*
8 * Initialize the stackprotector canary value.
9 *
10 * NOTE: this must only be called from functions that never return,
11 * and it must always be inlined.
12 */
13static __always_inline void boot_init_stack_canary(void)
14{
15 u64 canary;
16 u64 tsc;
17
18 /*
19 * Build time only check to make sure the stack_canary is at
20 * offset 40 in the pda; this is a gcc ABI requirement
21 */
22 BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
23
24 /*
25 * We both use the random pool and the current TSC as a source
26 * of randomness. The TSC only matters for very early init,
27 * there it already has some randomness on most systems. Later
28 * on during the bootup the random pool has true entropy too.
29 */
30 get_random_bytes(&canary, sizeof(canary));
31 tsc = __native_read_tsc();
32 canary += tsc + (tsc << 32UL);
33
34 current->stack_canary = canary;
35 percpu_write(irq_stack_union.stack_canary, canary);
36}
37
38#endif
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 9c6797c3e56c..c0b0bda754ee 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -40,7 +40,7 @@ asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
40 struct old_sigaction __user *); 40 struct old_sigaction __user *);
41asmlinkage int sys_sigaltstack(unsigned long); 41asmlinkage int sys_sigaltstack(unsigned long);
42asmlinkage unsigned long sys_sigreturn(unsigned long); 42asmlinkage unsigned long sys_sigreturn(unsigned long);
43asmlinkage int sys_rt_sigreturn(struct pt_regs); 43asmlinkage int sys_rt_sigreturn(unsigned long);
44 44
45/* kernel/ioport.c */ 45/* kernel/ioport.c */
46asmlinkage long sys_iopl(unsigned long); 46asmlinkage long sys_iopl(unsigned long);
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 8e626ea33a1a..2fcc70bc85f3 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -86,27 +86,44 @@ do { \
86 , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ 86 , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
87 "r12", "r13", "r14", "r15" 87 "r12", "r13", "r14", "r15"
88 88
89#ifdef CONFIG_CC_STACKPROTECTOR
90#define __switch_canary \
91 "movq %P[task_canary](%%rsi),%%r8\n\t" \
92 "movq %%r8,"__percpu_arg([gs_canary])"\n\t"
93#define __switch_canary_oparam \
94 , [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary))
95#define __switch_canary_iparam \
96 , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
97#else /* CC_STACKPROTECTOR */
98#define __switch_canary
99#define __switch_canary_oparam
100#define __switch_canary_iparam
101#endif /* CC_STACKPROTECTOR */
102
89/* Save restore flags to clear handle leaking NT */ 103/* Save restore flags to clear handle leaking NT */
90#define switch_to(prev, next, last) \ 104#define switch_to(prev, next, last) \
91 asm volatile(SAVE_CONTEXT \ 105 asm volatile(SAVE_CONTEXT \
92 "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ 106 "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
93 "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ 107 "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
94 "call __switch_to\n\t" \ 108 "call __switch_to\n\t" \
95 ".globl thread_return\n" \ 109 ".globl thread_return\n" \
96 "thread_return:\n\t" \ 110 "thread_return:\n\t" \
97 "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ 111 "movq "__percpu_arg([current_task])",%%rsi\n\t" \
112 __switch_canary \
98 "movq %P[thread_info](%%rsi),%%r8\n\t" \ 113 "movq %P[thread_info](%%rsi),%%r8\n\t" \
99 LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \ 114 LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \
100 "movq %%rax,%%rdi\n\t" \ 115 "movq %%rax,%%rdi\n\t" \
101 "jc ret_from_fork\n\t" \ 116 "jc ret_from_fork\n\t" \
102 RESTORE_CONTEXT \ 117 RESTORE_CONTEXT \
103 : "=a" (last) \ 118 : "=a" (last) \
119 __switch_canary_oparam \
104 : [next] "S" (next), [prev] "D" (prev), \ 120 : [next] "S" (next), [prev] "D" (prev), \
105 [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ 121 [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
106 [ti_flags] "i" (offsetof(struct thread_info, flags)), \ 122 [ti_flags] "i" (offsetof(struct thread_info, flags)), \
107 [tif_fork] "i" (TIF_FORK), \ 123 [tif_fork] "i" (TIF_FORK), \
108 [thread_info] "i" (offsetof(struct task_struct, stack)), \ 124 [thread_info] "i" (offsetof(struct task_struct, stack)), \
109 [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ 125 [current_task] "m" (per_cpu_var(current_task)) \
126 __switch_canary_iparam \
110 : "memory", "cc" __EXTRA_CLOBBER) 127 : "memory", "cc" __EXTRA_CLOBBER)
111#endif 128#endif
112 129
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 98789647baa9..b46f8ca007b5 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -194,25 +194,21 @@ static inline struct thread_info *current_thread_info(void)
194 194
195#else /* X86_32 */ 195#else /* X86_32 */
196 196
197#include <asm/pda.h> 197#include <asm/percpu.h>
198#define KERNEL_STACK_OFFSET (5*8)
198 199
199/* 200/*
200 * macros/functions for gaining access to the thread information structure 201 * macros/functions for gaining access to the thread information structure
201 * preempt_count needs to be 1 initially, until the scheduler is functional. 202 * preempt_count needs to be 1 initially, until the scheduler is functional.
202 */ 203 */
203#ifndef __ASSEMBLY__ 204#ifndef __ASSEMBLY__
204static inline struct thread_info *current_thread_info(void) 205DECLARE_PER_CPU(unsigned long, kernel_stack);
205{
206 struct thread_info *ti;
207 ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE);
208 return ti;
209}
210 206
211/* do not use in interrupt context */ 207static inline struct thread_info *current_thread_info(void)
212static inline struct thread_info *stack_thread_info(void)
213{ 208{
214 struct thread_info *ti; 209 struct thread_info *ti;
215 asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1))); 210 ti = (void *)(percpu_read(kernel_stack) +
211 KERNEL_STACK_OFFSET - THREAD_SIZE);
216 return ti; 212 return ti;
217} 213}
218 214
@@ -220,8 +216,8 @@ static inline struct thread_info *stack_thread_info(void)
220 216
221/* how to get the thread information struct from ASM */ 217/* how to get the thread information struct from ASM */
222#define GET_THREAD_INFO(reg) \ 218#define GET_THREAD_INFO(reg) \
223 movq %gs:pda_kernelstack,reg ; \ 219 movq PER_CPU_VAR(kernel_stack),reg ; \
224 subq $(THREAD_SIZE-PDA_STACKOFFSET),reg 220 subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg
225 221
226#endif 222#endif
227 223
diff --git a/arch/x86/include/asm/timex.h b/arch/x86/include/asm/timex.h
index 1287dc1347d6..b5c9d45c981f 100644
--- a/arch/x86/include/asm/timex.h
+++ b/arch/x86/include/asm/timex.h
@@ -1,18 +1,13 @@
1/* x86 architecture timex specifications */
2#ifndef _ASM_X86_TIMEX_H 1#ifndef _ASM_X86_TIMEX_H
3#define _ASM_X86_TIMEX_H 2#define _ASM_X86_TIMEX_H
4 3
5#include <asm/processor.h> 4#include <asm/processor.h>
6#include <asm/tsc.h> 5#include <asm/tsc.h>
7 6
8#ifdef CONFIG_X86_ELAN 7/* The PIT ticks at this frequency (in HZ): */
9# define PIT_TICK_RATE 1189200 /* AMD Elan has different frequency! */ 8#define PIT_TICK_RATE 1193182
10#elif defined(CONFIG_X86_RDC321X) 9
11# define PIT_TICK_RATE 1041667 /* Underlying HZ for R8610 */ 10#define CLOCK_TICK_RATE PIT_TICK_RATE
12#else
13# define PIT_TICK_RATE 1193182 /* Underlying HZ */
14#endif
15#define CLOCK_TICK_RATE PIT_TICK_RATE
16 11
17#define ARCH_HAS_READ_CURRENT_TIMER 12#define ARCH_HAS_READ_CURRENT_TIMER
18 13
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 0e7bbb549116..d3539f998f88 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -113,7 +113,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
113 __flush_tlb(); 113 __flush_tlb();
114} 114}
115 115
116static inline void native_flush_tlb_others(const cpumask_t *cpumask, 116static inline void native_flush_tlb_others(const struct cpumask *cpumask,
117 struct mm_struct *mm, 117 struct mm_struct *mm,
118 unsigned long va) 118 unsigned long va)
119{ 119{
@@ -142,31 +142,28 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
142 flush_tlb_mm(vma->vm_mm); 142 flush_tlb_mm(vma->vm_mm);
143} 143}
144 144
145void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm, 145void native_flush_tlb_others(const struct cpumask *cpumask,
146 unsigned long va); 146 struct mm_struct *mm, unsigned long va);
147 147
148#define TLBSTATE_OK 1 148#define TLBSTATE_OK 1
149#define TLBSTATE_LAZY 2 149#define TLBSTATE_LAZY 2
150 150
151#ifdef CONFIG_X86_32
152struct tlb_state { 151struct tlb_state {
153 struct mm_struct *active_mm; 152 struct mm_struct *active_mm;
154 int state; 153 int state;
155 char __cacheline_padding[L1_CACHE_BYTES-8];
156}; 154};
157DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); 155DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
158 156
159void reset_lazy_tlbstate(void);
160#else
161static inline void reset_lazy_tlbstate(void) 157static inline void reset_lazy_tlbstate(void)
162{ 158{
159 percpu_write(cpu_tlbstate.state, 0);
160 percpu_write(cpu_tlbstate.active_mm, &init_mm);
163} 161}
164#endif
165 162
166#endif /* SMP */ 163#endif /* SMP */
167 164
168#ifndef CONFIG_PARAVIRT 165#ifndef CONFIG_PARAVIRT
169#define flush_tlb_others(mask, mm, va) native_flush_tlb_others(&mask, mm, va) 166#define flush_tlb_others(mask, mm, va) native_flush_tlb_others(mask, mm, va)
170#endif 167#endif
171 168
172static inline void flush_tlb_kernel_range(unsigned long start, 169static inline void flush_tlb_kernel_range(unsigned long start,
@@ -175,4 +172,6 @@ static inline void flush_tlb_kernel_range(unsigned long start,
175 flush_tlb_all(); 172 flush_tlb_all();
176} 173}
177 174
175extern void zap_low_mappings(void);
176
178#endif /* _ASM_X86_TLBFLUSH_H */ 177#endif /* _ASM_X86_TLBFLUSH_H */
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 4e2f2e0aab27..77cfb2cfb386 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -74,6 +74,8 @@ static inline const struct cpumask *cpumask_of_node(int node)
74 return &node_to_cpumask_map[node]; 74 return &node_to_cpumask_map[node];
75} 75}
76 76
77static inline void setup_node_to_cpumask_map(void) { }
78
77#else /* CONFIG_X86_64 */ 79#else /* CONFIG_X86_64 */
78 80
79/* Mappings between node number and cpus on that node. */ 81/* Mappings between node number and cpus on that node. */
@@ -83,7 +85,8 @@ extern cpumask_t *node_to_cpumask_map;
83DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); 85DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
84 86
85/* Returns the number of the current Node. */ 87/* Returns the number of the current Node. */
86#define numa_node_id() read_pda(nodenumber) 88DECLARE_PER_CPU(int, node_number);
89#define numa_node_id() percpu_read(node_number)
87 90
88#ifdef CONFIG_DEBUG_PER_CPU_MAPS 91#ifdef CONFIG_DEBUG_PER_CPU_MAPS
89extern int cpu_to_node(int cpu); 92extern int cpu_to_node(int cpu);
@@ -102,10 +105,7 @@ static inline int cpu_to_node(int cpu)
102/* Same function but used if called before per_cpu areas are setup */ 105/* Same function but used if called before per_cpu areas are setup */
103static inline int early_cpu_to_node(int cpu) 106static inline int early_cpu_to_node(int cpu)
104{ 107{
105 if (early_per_cpu_ptr(x86_cpu_to_node_map)) 108 return early_per_cpu(x86_cpu_to_node_map, cpu);
106 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
107
108 return per_cpu(x86_cpu_to_node_map, cpu);
109} 109}
110 110
111/* Returns a pointer to the cpumask of CPUs on Node 'node'. */ 111/* Returns a pointer to the cpumask of CPUs on Node 'node'. */
@@ -122,6 +122,8 @@ static inline cpumask_t node_to_cpumask(int node)
122 122
123#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ 123#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
124 124
125extern void setup_node_to_cpumask_map(void);
126
125/* 127/*
126 * Replace default node_to_cpumask_ptr with optimized version 128 * Replace default node_to_cpumask_ptr with optimized version
127 * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" 129 * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
@@ -192,9 +194,20 @@ extern int __node_distance(int, int);
192 194
193#else /* !CONFIG_NUMA */ 195#else /* !CONFIG_NUMA */
194 196
195#define numa_node_id() 0 197static inline int numa_node_id(void)
196#define cpu_to_node(cpu) 0 198{
197#define early_cpu_to_node(cpu) 0 199 return 0;
200}
201
202static inline int cpu_to_node(int cpu)
203{
204 return 0;
205}
206
207static inline int early_cpu_to_node(int cpu)
208{
209 return 0;
210}
198 211
199static inline const cpumask_t *cpumask_of_node(int node) 212static inline const cpumask_t *cpumask_of_node(int node)
200{ 213{
@@ -209,6 +222,8 @@ static inline int node_to_first_cpu(int node)
209 return first_cpu(cpu_online_map); 222 return first_cpu(cpu_online_map);
210} 223}
211 224
225static inline void setup_node_to_cpumask_map(void) { }
226
212/* 227/*
213 * Replace default node_to_cpumask_ptr with optimized version 228 * Replace default node_to_cpumask_ptr with optimized version
214 * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" 229 * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h
index 780ba0ab94f9..90f06c25221d 100644
--- a/arch/x86/include/asm/trampoline.h
+++ b/arch/x86/include/asm/trampoline.h
@@ -13,6 +13,7 @@ extern unsigned char *trampoline_base;
13 13
14extern unsigned long init_rsp; 14extern unsigned long init_rsp;
15extern unsigned long initial_code; 15extern unsigned long initial_code;
16extern unsigned long initial_gs;
16 17
17#define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE) 18#define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE)
18#define TRAMPOLINE_BASE 0x6000 19#define TRAMPOLINE_BASE 0x6000
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
new file mode 100644
index 000000000000..8ac1d7e312f3
--- /dev/null
+++ b/arch/x86/include/asm/uv/uv.h
@@ -0,0 +1,33 @@
1#ifndef _ASM_X86_UV_UV_H
2#define _ASM_X86_UV_UV_H
3
4enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
5
6#ifdef CONFIG_X86_UV
7
8extern enum uv_system_type get_uv_system_type(void);
9extern int is_uv_system(void);
10extern void uv_cpu_init(void);
11extern void uv_system_init(void);
12extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
13extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
14 struct mm_struct *mm,
15 unsigned long va,
16 unsigned int cpu);
17
18#else /* X86_UV */
19
20static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
21static inline int is_uv_system(void) { return 0; }
22static inline void uv_cpu_init(void) { }
23static inline void uv_system_init(void) { }
24static inline int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
25{ return 1; }
26static inline const struct cpumask *
27uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm,
28 unsigned long va, unsigned int cpu)
29{ return cpumask; }
30
31#endif /* X86_UV */
32
33#endif /* _ASM_X86_UV_UV_H */
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 50423c7b56b2..9b0e61bf7a88 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -325,7 +325,6 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
325#define cpubit_isset(cpu, bau_local_cpumask) \ 325#define cpubit_isset(cpu, bau_local_cpumask) \
326 test_bit((cpu), (bau_local_cpumask).bits) 326 test_bit((cpu), (bau_local_cpumask).bits)
327 327
328extern int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long);
329extern void uv_bau_message_intr1(void); 328extern void uv_bau_message_intr1(void);
330extern void uv_bau_timeout_intr1(void); 329extern void uv_bau_timeout_intr1(void);
331 330
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index d364df03c1d6..37fa30bada17 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -23,11 +23,12 @@ nostackp := $(call cc-option, -fno-stack-protector)
23CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) 23CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
24CFLAGS_hpet.o := $(nostackp) 24CFLAGS_hpet.o := $(nostackp)
25CFLAGS_tsc.o := $(nostackp) 25CFLAGS_tsc.o := $(nostackp)
26CFLAGS_paravirt.o := $(nostackp)
26 27
27obj-y := process_$(BITS).o signal.o entry_$(BITS).o 28obj-y := process_$(BITS).o signal.o entry_$(BITS).o
28obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 29obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
29obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o 30obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o
30obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o 31obj-y += setup.o i8259.o irqinit_$(BITS).o
31obj-$(CONFIG_X86_VISWS) += visws_quirks.o 32obj-$(CONFIG_X86_VISWS) += visws_quirks.o
32obj-$(CONFIG_X86_32) += probe_roms_32.o 33obj-$(CONFIG_X86_32) += probe_roms_32.o
33obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 34obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
@@ -57,9 +58,9 @@ obj-$(CONFIG_PCI) += early-quirks.o
57apm-y := apm_32.o 58apm-y := apm_32.o
58obj-$(CONFIG_APM) += apm.o 59obj-$(CONFIG_APM) += apm.o
59obj-$(CONFIG_X86_SMP) += smp.o 60obj-$(CONFIG_X86_SMP) += smp.o
60obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o 61obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o
61obj-$(CONFIG_X86_32_SMP) += smpcommon.o 62obj-$(CONFIG_SMP) += setup_percpu.o
62obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o 63obj-$(CONFIG_X86_64_SMP) += tsc_sync.o
63obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o 64obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o
64obj-$(CONFIG_X86_MPPARSE) += mpparse.o 65obj-$(CONFIG_X86_MPPARSE) += mpparse.o
65obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o 66obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
@@ -114,10 +115,11 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64
114### 115###
115# 64 bit specific files 116# 64 bit specific files
116ifeq ($(CONFIG_X86_64),y) 117ifeq ($(CONFIG_X86_64),y)
117 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o 118 obj-y += genapic_64.o genapic_flat_64.o
118 obj-y += bios_uv.o uv_irq.o uv_sysfs.o
119 obj-y += genx2apic_cluster.o 119 obj-y += genx2apic_cluster.o
120 obj-y += genx2apic_phys.o 120 obj-y += genx2apic_phys.o
121 obj-$(CONFIG_X86_UV) += genx2apic_uv_x.o tlb_uv.o
122 obj-$(CONFIG_X86_UV) += bios_uv.o uv_irq.o uv_sysfs.o
121 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o 123 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
122 obj-$(CONFIG_AUDIT) += audit_64.o 124 obj-$(CONFIG_AUDIT) += audit_64.o
123 125
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index d37593c2f438..4cb5964f1499 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -912,8 +912,8 @@ static u8 __init uniq_ioapic_id(u8 id)
912 DECLARE_BITMAP(used, 256); 912 DECLARE_BITMAP(used, 256);
913 bitmap_zero(used, 256); 913 bitmap_zero(used, 256);
914 for (i = 0; i < nr_ioapics; i++) { 914 for (i = 0; i < nr_ioapics; i++) {
915 struct mp_config_ioapic *ia = &mp_ioapics[i]; 915 struct mpc_ioapic *ia = &mp_ioapics[i];
916 __set_bit(ia->mp_apicid, used); 916 __set_bit(ia->apicid, used);
917 } 917 }
918 if (!test_bit(id, used)) 918 if (!test_bit(id, used))
919 return id; 919 return id;
@@ -945,47 +945,47 @@ void __init mp_register_ioapic(int id, u32 address, u32 gsi_base)
945 945
946 idx = nr_ioapics; 946 idx = nr_ioapics;
947 947
948 mp_ioapics[idx].mp_type = MP_IOAPIC; 948 mp_ioapics[idx].type = MP_IOAPIC;
949 mp_ioapics[idx].mp_flags = MPC_APIC_USABLE; 949 mp_ioapics[idx].flags = MPC_APIC_USABLE;
950 mp_ioapics[idx].mp_apicaddr = address; 950 mp_ioapics[idx].apicaddr = address;
951 951
952 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address); 952 set_fixmap_nocache(FIX_IO_APIC_BASE_0 + idx, address);
953 mp_ioapics[idx].mp_apicid = uniq_ioapic_id(id); 953 mp_ioapics[idx].apicid = uniq_ioapic_id(id);
954#ifdef CONFIG_X86_32 954#ifdef CONFIG_X86_32
955 mp_ioapics[idx].mp_apicver = io_apic_get_version(idx); 955 mp_ioapics[idx].apicver = io_apic_get_version(idx);
956#else 956#else
957 mp_ioapics[idx].mp_apicver = 0; 957 mp_ioapics[idx].apicver = 0;
958#endif 958#endif
959 /* 959 /*
960 * Build basic GSI lookup table to facilitate gsi->io_apic lookups 960 * Build basic GSI lookup table to facilitate gsi->io_apic lookups
961 * and to prevent reprogramming of IOAPIC pins (PCI GSIs). 961 * and to prevent reprogramming of IOAPIC pins (PCI GSIs).
962 */ 962 */
963 mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].mp_apicid; 963 mp_ioapic_routing[idx].apic_id = mp_ioapics[idx].apicid;
964 mp_ioapic_routing[idx].gsi_base = gsi_base; 964 mp_ioapic_routing[idx].gsi_base = gsi_base;
965 mp_ioapic_routing[idx].gsi_end = gsi_base + 965 mp_ioapic_routing[idx].gsi_end = gsi_base +
966 io_apic_get_redir_entries(idx); 966 io_apic_get_redir_entries(idx);
967 967
968 printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%lx, " 968 printk(KERN_INFO "IOAPIC[%d]: apic_id %d, version %d, address 0x%x, "
969 "GSI %d-%d\n", idx, mp_ioapics[idx].mp_apicid, 969 "GSI %d-%d\n", idx, mp_ioapics[idx].apicid,
970 mp_ioapics[idx].mp_apicver, mp_ioapics[idx].mp_apicaddr, 970 mp_ioapics[idx].apicver, mp_ioapics[idx].apicaddr,
971 mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end); 971 mp_ioapic_routing[idx].gsi_base, mp_ioapic_routing[idx].gsi_end);
972 972
973 nr_ioapics++; 973 nr_ioapics++;
974} 974}
975 975
976static void assign_to_mp_irq(struct mp_config_intsrc *m, 976static void assign_to_mp_irq(struct mpc_intsrc *m,
977 struct mp_config_intsrc *mp_irq) 977 struct mpc_intsrc *mp_irq)
978{ 978{
979 memcpy(mp_irq, m, sizeof(struct mp_config_intsrc)); 979 memcpy(mp_irq, m, sizeof(struct mpc_intsrc));
980} 980}
981 981
982static int mp_irq_cmp(struct mp_config_intsrc *mp_irq, 982static int mp_irq_cmp(struct mpc_intsrc *mp_irq,
983 struct mp_config_intsrc *m) 983 struct mpc_intsrc *m)
984{ 984{
985 return memcmp(mp_irq, m, sizeof(struct mp_config_intsrc)); 985 return memcmp(mp_irq, m, sizeof(struct mpc_intsrc));
986} 986}
987 987
988static void save_mp_irq(struct mp_config_intsrc *m) 988static void save_mp_irq(struct mpc_intsrc *m)
989{ 989{
990 int i; 990 int i;
991 991
@@ -1003,7 +1003,7 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
1003{ 1003{
1004 int ioapic; 1004 int ioapic;
1005 int pin; 1005 int pin;
1006 struct mp_config_intsrc mp_irq; 1006 struct mpc_intsrc mp_irq;
1007 1007
1008 /* 1008 /*
1009 * Convert 'gsi' to 'ioapic.pin'. 1009 * Convert 'gsi' to 'ioapic.pin'.
@@ -1021,13 +1021,13 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
1021 if ((bus_irq == 0) && (trigger == 3)) 1021 if ((bus_irq == 0) && (trigger == 3))
1022 trigger = 1; 1022 trigger = 1;
1023 1023
1024 mp_irq.mp_type = MP_INTSRC; 1024 mp_irq.type = MP_INTSRC;
1025 mp_irq.mp_irqtype = mp_INT; 1025 mp_irq.irqtype = mp_INT;
1026 mp_irq.mp_irqflag = (trigger << 2) | polarity; 1026 mp_irq.irqflag = (trigger << 2) | polarity;
1027 mp_irq.mp_srcbus = MP_ISA_BUS; 1027 mp_irq.srcbus = MP_ISA_BUS;
1028 mp_irq.mp_srcbusirq = bus_irq; /* IRQ */ 1028 mp_irq.srcbusirq = bus_irq; /* IRQ */
1029 mp_irq.mp_dstapic = mp_ioapics[ioapic].mp_apicid; /* APIC ID */ 1029 mp_irq.dstapic = mp_ioapics[ioapic].apicid; /* APIC ID */
1030 mp_irq.mp_dstirq = pin; /* INTIN# */ 1030 mp_irq.dstirq = pin; /* INTIN# */
1031 1031
1032 save_mp_irq(&mp_irq); 1032 save_mp_irq(&mp_irq);
1033} 1033}
@@ -1037,7 +1037,7 @@ void __init mp_config_acpi_legacy_irqs(void)
1037 int i; 1037 int i;
1038 int ioapic; 1038 int ioapic;
1039 unsigned int dstapic; 1039 unsigned int dstapic;
1040 struct mp_config_intsrc mp_irq; 1040 struct mpc_intsrc mp_irq;
1041 1041
1042#if defined (CONFIG_MCA) || defined (CONFIG_EISA) 1042#if defined (CONFIG_MCA) || defined (CONFIG_EISA)
1043 /* 1043 /*
@@ -1062,7 +1062,7 @@ void __init mp_config_acpi_legacy_irqs(void)
1062 ioapic = mp_find_ioapic(0); 1062 ioapic = mp_find_ioapic(0);
1063 if (ioapic < 0) 1063 if (ioapic < 0)
1064 return; 1064 return;
1065 dstapic = mp_ioapics[ioapic].mp_apicid; 1065 dstapic = mp_ioapics[ioapic].apicid;
1066 1066
1067 /* 1067 /*
1068 * Use the default configuration for the IRQs 0-15. Unless 1068 * Use the default configuration for the IRQs 0-15. Unless
@@ -1072,16 +1072,14 @@ void __init mp_config_acpi_legacy_irqs(void)
1072 int idx; 1072 int idx;
1073 1073
1074 for (idx = 0; idx < mp_irq_entries; idx++) { 1074 for (idx = 0; idx < mp_irq_entries; idx++) {
1075 struct mp_config_intsrc *irq = mp_irqs + idx; 1075 struct mpc_intsrc *irq = mp_irqs + idx;
1076 1076
1077 /* Do we already have a mapping for this ISA IRQ? */ 1077 /* Do we already have a mapping for this ISA IRQ? */
1078 if (irq->mp_srcbus == MP_ISA_BUS 1078 if (irq->srcbus == MP_ISA_BUS && irq->srcbusirq == i)
1079 && irq->mp_srcbusirq == i)
1080 break; 1079 break;
1081 1080
1082 /* Do we already have a mapping for this IOAPIC pin */ 1081 /* Do we already have a mapping for this IOAPIC pin */
1083 if (irq->mp_dstapic == dstapic && 1082 if (irq->dstapic == dstapic && irq->dstirq == i)
1084 irq->mp_dstirq == i)
1085 break; 1083 break;
1086 } 1084 }
1087 1085
@@ -1090,13 +1088,13 @@ void __init mp_config_acpi_legacy_irqs(void)
1090 continue; /* IRQ already used */ 1088 continue; /* IRQ already used */
1091 } 1089 }
1092 1090
1093 mp_irq.mp_type = MP_INTSRC; 1091 mp_irq.type = MP_INTSRC;
1094 mp_irq.mp_irqflag = 0; /* Conforming */ 1092 mp_irq.irqflag = 0; /* Conforming */
1095 mp_irq.mp_srcbus = MP_ISA_BUS; 1093 mp_irq.srcbus = MP_ISA_BUS;
1096 mp_irq.mp_dstapic = dstapic; 1094 mp_irq.dstapic = dstapic;
1097 mp_irq.mp_irqtype = mp_INT; 1095 mp_irq.irqtype = mp_INT;
1098 mp_irq.mp_srcbusirq = i; /* Identity mapped */ 1096 mp_irq.srcbusirq = i; /* Identity mapped */
1099 mp_irq.mp_dstirq = i; 1097 mp_irq.dstirq = i;
1100 1098
1101 save_mp_irq(&mp_irq); 1099 save_mp_irq(&mp_irq);
1102 } 1100 }
@@ -1207,22 +1205,22 @@ int mp_config_acpi_gsi(unsigned char number, unsigned int devfn, u8 pin,
1207 u32 gsi, int triggering, int polarity) 1205 u32 gsi, int triggering, int polarity)
1208{ 1206{
1209#ifdef CONFIG_X86_MPPARSE 1207#ifdef CONFIG_X86_MPPARSE
1210 struct mp_config_intsrc mp_irq; 1208 struct mpc_intsrc mp_irq;
1211 int ioapic; 1209 int ioapic;
1212 1210
1213 if (!acpi_ioapic) 1211 if (!acpi_ioapic)
1214 return 0; 1212 return 0;
1215 1213
1216 /* print the entry should happen on mptable identically */ 1214 /* print the entry should happen on mptable identically */
1217 mp_irq.mp_type = MP_INTSRC; 1215 mp_irq.type = MP_INTSRC;
1218 mp_irq.mp_irqtype = mp_INT; 1216 mp_irq.irqtype = mp_INT;
1219 mp_irq.mp_irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) | 1217 mp_irq.irqflag = (triggering == ACPI_EDGE_SENSITIVE ? 4 : 0x0c) |
1220 (polarity == ACPI_ACTIVE_HIGH ? 1 : 3); 1218 (polarity == ACPI_ACTIVE_HIGH ? 1 : 3);
1221 mp_irq.mp_srcbus = number; 1219 mp_irq.srcbus = number;
1222 mp_irq.mp_srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3); 1220 mp_irq.srcbusirq = (((devfn >> 3) & 0x1f) << 2) | ((pin - 1) & 3);
1223 ioapic = mp_find_ioapic(gsi); 1221 ioapic = mp_find_ioapic(gsi);
1224 mp_irq.mp_dstapic = mp_ioapic_routing[ioapic].apic_id; 1222 mp_irq.dstapic = mp_ioapic_routing[ioapic].apic_id;
1225 mp_irq.mp_dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base; 1223 mp_irq.dstirq = gsi - mp_ioapic_routing[ioapic].gsi_base;
1226 1224
1227 save_mp_irq(&mp_irq); 1225 save_mp_irq(&mp_irq);
1228#endif 1226#endif
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 707c1f6f95fa..4abff454c55b 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -101,6 +101,7 @@ int acpi_save_state_mem(void)
101 stack_start.sp = temp_stack + sizeof(temp_stack); 101 stack_start.sp = temp_stack + sizeof(temp_stack);
102 early_gdt_descr.address = 102 early_gdt_descr.address =
103 (unsigned long)get_cpu_gdt_table(smp_processor_id()); 103 (unsigned long)get_cpu_gdt_table(smp_processor_id());
104 initial_gs = per_cpu_offset(smp_processor_id());
104#endif 105#endif
105 initial_code = (unsigned long)wakeup_long64; 106 initial_code = (unsigned long)wakeup_long64;
106 saved_magic = 0x123456789abcdef0; 107 saved_magic = 0x123456789abcdef0;
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 0f830e4f5675..c6f15647eba9 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -60,6 +60,24 @@
60# error SPURIOUS_APIC_VECTOR definition error 60# error SPURIOUS_APIC_VECTOR definition error
61#endif 61#endif
62 62
63unsigned int num_processors;
64unsigned disabled_cpus __cpuinitdata;
65/* Processor that is doing the boot up */
66unsigned int boot_cpu_physical_apicid = -1U;
67EXPORT_SYMBOL(boot_cpu_physical_apicid);
68unsigned int max_physical_apicid;
69
70/* Bitmask of physically existing CPUs */
71physid_mask_t phys_cpu_present_map;
72
73/*
74 * Map cpu index to physical APIC ID
75 */
76DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
77DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
78EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
79EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
80
63#ifdef CONFIG_X86_32 81#ifdef CONFIG_X86_32
64/* 82/*
65 * Knob to control our willingness to enable the local APIC. 83 * Knob to control our willingness to enable the local APIC.
@@ -895,6 +913,10 @@ void disable_local_APIC(void)
895{ 913{
896 unsigned int value; 914 unsigned int value;
897 915
916 /* APIC hasn't been mapped yet */
917 if (!apic_phys)
918 return;
919
898 clear_local_APIC(); 920 clear_local_APIC();
899 921
900 /* 922 /*
@@ -1126,6 +1148,13 @@ void __cpuinit setup_local_APIC(void)
1126 unsigned int value; 1148 unsigned int value;
1127 int i, j; 1149 int i, j;
1128 1150
1151 if (disable_apic) {
1152#ifdef CONFIG_X86_IO_APIC
1153 disable_ioapic_setup();
1154#endif
1155 return;
1156 }
1157
1129#ifdef CONFIG_X86_32 1158#ifdef CONFIG_X86_32
1130 /* Pound the ESR really hard over the head with a big hammer - mbligh */ 1159 /* Pound the ESR really hard over the head with a big hammer - mbligh */
1131 if (lapic_is_integrated() && esr_disable) { 1160 if (lapic_is_integrated() && esr_disable) {
@@ -1566,11 +1595,11 @@ int apic_version[MAX_APICS];
1566 1595
1567int __init APIC_init_uniprocessor(void) 1596int __init APIC_init_uniprocessor(void)
1568{ 1597{
1569#ifdef CONFIG_X86_64
1570 if (disable_apic) { 1598 if (disable_apic) {
1571 pr_info("Apic disabled\n"); 1599 pr_info("Apic disabled\n");
1572 return -1; 1600 return -1;
1573 } 1601 }
1602#ifdef CONFIG_X86_64
1574 if (!cpu_has_apic) { 1603 if (!cpu_has_apic) {
1575 disable_apic = 1; 1604 disable_apic = 1;
1576 pr_info("Apic disabled by BIOS\n"); 1605 pr_info("Apic disabled by BIOS\n");
@@ -1833,6 +1862,11 @@ void __cpuinit generic_processor_info(int apicid, int version)
1833 num_processors++; 1862 num_processors++;
1834 cpu = cpumask_next_zero(-1, cpu_present_mask); 1863 cpu = cpumask_next_zero(-1, cpu_present_mask);
1835 1864
1865 if (version != apic_version[boot_cpu_physical_apicid])
1866 WARN_ONCE(1,
1867 "ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n",
1868 apic_version[boot_cpu_physical_apicid], cpu, version);
1869
1836 physid_set(apicid, phys_cpu_present_map); 1870 physid_set(apicid, phys_cpu_present_map);
1837 if (apicid == boot_cpu_physical_apicid) { 1871 if (apicid == boot_cpu_physical_apicid) {
1838 /* 1872 /*
@@ -1868,17 +1902,8 @@ void __cpuinit generic_processor_info(int apicid, int version)
1868#endif 1902#endif
1869 1903
1870#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) 1904#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
1871 /* are we being called early in kernel startup? */ 1905 early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
1872 if (early_per_cpu_ptr(x86_cpu_to_apicid)) { 1906 early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1873 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
1874 u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
1875
1876 cpu_to_apicid[cpu] = apicid;
1877 bios_cpu_apicid[cpu] = apicid;
1878 } else {
1879 per_cpu(x86_cpu_to_apicid, cpu) = apicid;
1880 per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1881 }
1882#endif 1907#endif
1883 1908
1884 set_cpu_possible(cpu, true); 1909 set_cpu_possible(cpu, true);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 1d41d3f1edbc..8793ab33e2c1 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -11,7 +11,6 @@
11#include <linux/hardirq.h> 11#include <linux/hardirq.h>
12#include <linux/suspend.h> 12#include <linux/suspend.h>
13#include <linux/kbuild.h> 13#include <linux/kbuild.h>
14#include <asm/pda.h>
15#include <asm/processor.h> 14#include <asm/processor.h>
16#include <asm/segment.h> 15#include <asm/segment.h>
17#include <asm/thread_info.h> 16#include <asm/thread_info.h>
@@ -48,16 +47,6 @@ int main(void)
48#endif 47#endif
49 BLANK(); 48 BLANK();
50#undef ENTRY 49#undef ENTRY
51#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
52 ENTRY(kernelstack);
53 ENTRY(oldrsp);
54 ENTRY(pcurrent);
55 ENTRY(irqcount);
56 ENTRY(cpunumber);
57 ENTRY(irqstackptr);
58 ENTRY(data_offset);
59 BLANK();
60#undef ENTRY
61#ifdef CONFIG_PARAVIRT 50#ifdef CONFIG_PARAVIRT
62 BLANK(); 51 BLANK();
63 OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); 52 OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 83492b1f93b1..652fdc9a757a 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -21,14 +21,16 @@
21#include <asm/asm.h> 21#include <asm/asm.h>
22#include <asm/numa.h> 22#include <asm/numa.h>
23#include <asm/smp.h> 23#include <asm/smp.h>
24#include <asm/cpu.h>
25#include <asm/cpumask.h>
24#ifdef CONFIG_X86_LOCAL_APIC 26#ifdef CONFIG_X86_LOCAL_APIC
25#include <asm/mpspec.h> 27#include <asm/mpspec.h>
26#include <asm/apic.h> 28#include <asm/apic.h>
27#include <mach_apic.h> 29#include <mach_apic.h>
28#include <asm/genapic.h> 30#include <asm/genapic.h>
31#include <asm/uv/uv.h>
29#endif 32#endif
30 33
31#include <asm/pda.h>
32#include <asm/pgtable.h> 34#include <asm/pgtable.h>
33#include <asm/processor.h> 35#include <asm/processor.h>
34#include <asm/desc.h> 36#include <asm/desc.h>
@@ -50,6 +52,15 @@ cpumask_var_t cpu_initialized_mask;
50/* representing cpus for which sibling maps can be computed */ 52/* representing cpus for which sibling maps can be computed */
51cpumask_var_t cpu_sibling_setup_mask; 53cpumask_var_t cpu_sibling_setup_mask;
52 54
55/* correctly size the local cpu masks */
56void __init setup_cpu_local_masks(void)
57{
58 alloc_bootmem_cpumask_var(&cpu_initialized_mask);
59 alloc_bootmem_cpumask_var(&cpu_callin_mask);
60 alloc_bootmem_cpumask_var(&cpu_callout_mask);
61 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
62}
63
53#else /* CONFIG_X86_32 */ 64#else /* CONFIG_X86_32 */
54 65
55cpumask_t cpu_callin_map; 66cpumask_t cpu_callin_map;
@@ -62,23 +73,23 @@ cpumask_t cpu_sibling_setup_map;
62 73
63static struct cpu_dev *this_cpu __cpuinitdata; 74static struct cpu_dev *this_cpu __cpuinitdata;
64 75
76DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
65#ifdef CONFIG_X86_64 77#ifdef CONFIG_X86_64
66/* We need valid kernel segments for data and code in long mode too 78 /*
67 * IRET will check the segment types kkeil 2000/10/28 79 * We need valid kernel segments for data and code in long mode too
68 * Also sysret mandates a special GDT layout 80 * IRET will check the segment types kkeil 2000/10/28
69 */ 81 * Also sysret mandates a special GDT layout
70/* The TLS descriptors are currently at a different place compared to i386. 82 *
71 Hopefully nobody expects them at a fixed place (Wine?) */ 83 * The TLS descriptors are currently at a different place compared to i386.
72DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { 84 * Hopefully nobody expects them at a fixed place (Wine?)
85 */
73 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, 86 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
74 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, 87 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
75 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, 88 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
76 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, 89 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
77 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, 90 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
78 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, 91 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
79} };
80#else 92#else
81DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
82 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, 93 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
83 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, 94 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
84 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, 95 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
@@ -110,9 +121,9 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
110 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, 121 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
111 122
112 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, 123 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
113 [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, 124 [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
114} };
115#endif 125#endif
126} };
116EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); 127EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
117 128
118#ifdef CONFIG_X86_32 129#ifdef CONFIG_X86_32
@@ -247,12 +258,17 @@ __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
247void switch_to_new_gdt(void) 258void switch_to_new_gdt(void)
248{ 259{
249 struct desc_ptr gdt_descr; 260 struct desc_ptr gdt_descr;
261 int cpu = smp_processor_id();
250 262
251 gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); 263 gdt_descr.address = (long)get_cpu_gdt_table(cpu);
252 gdt_descr.size = GDT_SIZE - 1; 264 gdt_descr.size = GDT_SIZE - 1;
253 load_gdt(&gdt_descr); 265 load_gdt(&gdt_descr);
266 /* Reload the per-cpu base */
254#ifdef CONFIG_X86_32 267#ifdef CONFIG_X86_32
255 asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); 268 loadsegment(fs, __KERNEL_PERCPU);
269#else
270 loadsegment(gs, 0);
271 wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
256#endif 272#endif
257} 273}
258 274
@@ -877,54 +893,26 @@ static __init int setup_disablecpuid(char *arg)
877__setup("clearcpuid=", setup_disablecpuid); 893__setup("clearcpuid=", setup_disablecpuid);
878 894
879#ifdef CONFIG_X86_64 895#ifdef CONFIG_X86_64
880struct x8664_pda **_cpu_pda __read_mostly;
881EXPORT_SYMBOL(_cpu_pda);
882
883struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; 896struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
884 897
885static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; 898DEFINE_PER_CPU_FIRST(union irq_stack_union,
899 irq_stack_union) __aligned(PAGE_SIZE);
900#ifdef CONFIG_SMP
901DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */
902#else
903DEFINE_PER_CPU(char *, irq_stack_ptr) =
904 per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
905#endif
886 906
887void __cpuinit pda_init(int cpu) 907DEFINE_PER_CPU(unsigned long, kernel_stack) =
888{ 908 (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
889 struct x8664_pda *pda = cpu_pda(cpu); 909EXPORT_PER_CPU_SYMBOL(kernel_stack);
890 910
891 /* Setup up data that may be needed in __get_free_pages early */ 911DEFINE_PER_CPU(unsigned int, irq_count) = -1;
892 loadsegment(fs, 0);
893 loadsegment(gs, 0);
894 /* Memory clobbers used to order PDA accessed */
895 mb();
896 wrmsrl(MSR_GS_BASE, pda);
897 mb();
898
899 pda->cpunumber = cpu;
900 pda->irqcount = -1;
901 pda->kernelstack = (unsigned long)stack_thread_info() -
902 PDA_STACKOFFSET + THREAD_SIZE;
903 pda->active_mm = &init_mm;
904 pda->mmu_state = 0;
905
906 if (cpu == 0) {
907 /* others are initialized in smpboot.c */
908 pda->pcurrent = &init_task;
909 pda->irqstackptr = boot_cpu_stack;
910 pda->irqstackptr += IRQSTACKSIZE - 64;
911 } else {
912 if (!pda->irqstackptr) {
913 pda->irqstackptr = (char *)
914 __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
915 if (!pda->irqstackptr)
916 panic("cannot allocate irqstack for cpu %d",
917 cpu);
918 pda->irqstackptr += IRQSTACKSIZE - 64;
919 }
920 912
921 if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) 913static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
922 pda->nodenumber = cpu_to_node(cpu); 914 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
923 } 915 __aligned(PAGE_SIZE);
924}
925
926static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
927 DEBUG_STKSZ] __page_aligned_bss;
928 916
929extern asmlinkage void ignore_sysret(void); 917extern asmlinkage void ignore_sysret(void);
930 918
@@ -982,15 +970,14 @@ void __cpuinit cpu_init(void)
982 struct tss_struct *t = &per_cpu(init_tss, cpu); 970 struct tss_struct *t = &per_cpu(init_tss, cpu);
983 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); 971 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
984 unsigned long v; 972 unsigned long v;
985 char *estacks = NULL;
986 struct task_struct *me; 973 struct task_struct *me;
987 int i; 974 int i;
988 975
989 /* CPU 0 is initialised in head64.c */ 976#ifdef CONFIG_NUMA
990 if (cpu != 0) 977 if (cpu != 0 && percpu_read(node_number) == 0 &&
991 pda_init(cpu); 978 cpu_to_node(cpu) != NUMA_NO_NODE)
992 else 979 percpu_write(node_number, cpu_to_node(cpu));
993 estacks = boot_exception_stacks; 980#endif
994 981
995 me = current; 982 me = current;
996 983
@@ -1007,6 +994,8 @@ void __cpuinit cpu_init(void)
1007 */ 994 */
1008 995
1009 switch_to_new_gdt(); 996 switch_to_new_gdt();
997 loadsegment(fs, 0);
998
1010 load_idt((const struct desc_ptr *)&idt_descr); 999 load_idt((const struct desc_ptr *)&idt_descr);
1011 1000
1012 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); 1001 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
@@ -1024,18 +1013,13 @@ void __cpuinit cpu_init(void)
1024 * set up and load the per-CPU TSS 1013 * set up and load the per-CPU TSS
1025 */ 1014 */
1026 if (!orig_ist->ist[0]) { 1015 if (!orig_ist->ist[0]) {
1027 static const unsigned int order[N_EXCEPTION_STACKS] = { 1016 static const unsigned int sizes[N_EXCEPTION_STACKS] = {
1028 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, 1017 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
1029 [DEBUG_STACK - 1] = DEBUG_STACK_ORDER 1018 [DEBUG_STACK - 1] = DEBUG_STKSZ
1030 }; 1019 };
1020 char *estacks = per_cpu(exception_stacks, cpu);
1031 for (v = 0; v < N_EXCEPTION_STACKS; v++) { 1021 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
1032 if (cpu) { 1022 estacks += sizes[v];
1033 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
1034 if (!estacks)
1035 panic("Cannot allocate exception "
1036 "stack %ld %d\n", v, cpu);
1037 }
1038 estacks += PAGE_SIZE << order[v];
1039 orig_ist->ist[v] = t->x86_tss.ist[v] = 1023 orig_ist->ist[v] = t->x86_tss.ist[v] =
1040 (unsigned long)estacks; 1024 (unsigned long)estacks;
1041 } 1025 }
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 6f11e029e8c5..4b1c319d30c3 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -145,13 +145,14 @@ typedef union {
145 145
146struct drv_cmd { 146struct drv_cmd {
147 unsigned int type; 147 unsigned int type;
148 cpumask_var_t mask; 148 const struct cpumask *mask;
149 drv_addr_union addr; 149 drv_addr_union addr;
150 u32 val; 150 u32 val;
151}; 151};
152 152
153static void do_drv_read(struct drv_cmd *cmd) 153static long do_drv_read(void *_cmd)
154{ 154{
155 struct drv_cmd *cmd = _cmd;
155 u32 h; 156 u32 h;
156 157
157 switch (cmd->type) { 158 switch (cmd->type) {
@@ -166,10 +167,12 @@ static void do_drv_read(struct drv_cmd *cmd)
166 default: 167 default:
167 break; 168 break;
168 } 169 }
170 return 0;
169} 171}
170 172
171static void do_drv_write(struct drv_cmd *cmd) 173static long do_drv_write(void *_cmd)
172{ 174{
175 struct drv_cmd *cmd = _cmd;
173 u32 lo, hi; 176 u32 lo, hi;
174 177
175 switch (cmd->type) { 178 switch (cmd->type) {
@@ -186,30 +189,23 @@ static void do_drv_write(struct drv_cmd *cmd)
186 default: 189 default:
187 break; 190 break;
188 } 191 }
192 return 0;
189} 193}
190 194
191static void drv_read(struct drv_cmd *cmd) 195static void drv_read(struct drv_cmd *cmd)
192{ 196{
193 cpumask_t saved_mask = current->cpus_allowed;
194 cmd->val = 0; 197 cmd->val = 0;
195 198
196 set_cpus_allowed_ptr(current, cmd->mask); 199 work_on_cpu(cpumask_any(cmd->mask), do_drv_read, cmd);
197 do_drv_read(cmd);
198 set_cpus_allowed_ptr(current, &saved_mask);
199} 200}
200 201
201static void drv_write(struct drv_cmd *cmd) 202static void drv_write(struct drv_cmd *cmd)
202{ 203{
203 cpumask_t saved_mask = current->cpus_allowed;
204 unsigned int i; 204 unsigned int i;
205 205
206 for_each_cpu(i, cmd->mask) { 206 for_each_cpu(i, cmd->mask) {
207 set_cpus_allowed_ptr(current, cpumask_of(i)); 207 work_on_cpu(i, do_drv_write, cmd);
208 do_drv_write(cmd);
209 } 208 }
210
211 set_cpus_allowed_ptr(current, &saved_mask);
212 return;
213} 209}
214 210
215static u32 get_cur_val(const struct cpumask *mask) 211static u32 get_cur_val(const struct cpumask *mask)
@@ -235,8 +231,7 @@ static u32 get_cur_val(const struct cpumask *mask)
235 return 0; 231 return 0;
236 } 232 }
237 233
238 cpumask_copy(cmd.mask, mask); 234 cmd.mask = mask;
239
240 drv_read(&cmd); 235 drv_read(&cmd);
241 236
242 dprintk("get_cur_val = %u\n", cmd.val); 237 dprintk("get_cur_val = %u\n", cmd.val);
@@ -368,7 +363,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
368 return freq; 363 return freq;
369} 364}
370 365
371static unsigned int check_freqs(const cpumask_t *mask, unsigned int freq, 366static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
372 struct acpi_cpufreq_data *data) 367 struct acpi_cpufreq_data *data)
373{ 368{
374 unsigned int cur_freq; 369 unsigned int cur_freq;
@@ -403,9 +398,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
403 return -ENODEV; 398 return -ENODEV;
404 } 399 }
405 400
406 if (unlikely(!alloc_cpumask_var(&cmd.mask, GFP_KERNEL)))
407 return -ENOMEM;
408
409 perf = data->acpi_data; 401 perf = data->acpi_data;
410 result = cpufreq_frequency_table_target(policy, 402 result = cpufreq_frequency_table_target(policy,
411 data->freq_table, 403 data->freq_table,
@@ -450,9 +442,9 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
450 442
451 /* cpufreq holds the hotplug lock, so we are safe from here on */ 443 /* cpufreq holds the hotplug lock, so we are safe from here on */
452 if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) 444 if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
453 cpumask_and(cmd.mask, cpu_online_mask, policy->cpus); 445 cmd.mask = policy->cpus;
454 else 446 else
455 cpumask_copy(cmd.mask, cpumask_of(policy->cpu)); 447 cmd.mask = cpumask_of(policy->cpu);
456 448
457 freqs.old = perf->states[perf->state].core_frequency * 1000; 449 freqs.old = perf->states[perf->state].core_frequency * 1000;
458 freqs.new = data->freq_table[next_state].frequency; 450 freqs.new = data->freq_table[next_state].frequency;
@@ -479,7 +471,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
479 perf->state = next_perf_state; 471 perf->state = next_perf_state;
480 472
481out: 473out:
482 free_cpumask_var(cmd.mask);
483 return result; 474 return result;
484} 475}
485 476
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 8ea6929e974c..549f2ada55f5 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -29,6 +29,19 @@
29 29
30static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) 30static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
31{ 31{
32 /* Unmask CPUID levels if masked: */
33 if (c->x86 == 6 && c->x86_model >= 15) {
34 u64 misc_enable;
35
36 rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
37
38 if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) {
39 misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID;
40 wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
41 c->cpuid_level = cpuid_eax(0);
42 }
43 }
44
32 if ((c->x86 == 0xf && c->x86_model >= 0x03) || 45 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
33 (c->x86 == 0x6 && c->x86_model >= 0x0e)) 46 (c->x86 == 0x6 && c->x86_model >= 0x0e))
34 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 47 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 48533d77be78..58527a9fc404 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -132,7 +132,16 @@ struct _cpuid4_info {
132 union _cpuid4_leaf_ecx ecx; 132 union _cpuid4_leaf_ecx ecx;
133 unsigned long size; 133 unsigned long size;
134 unsigned long can_disable; 134 unsigned long can_disable;
135 cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ 135 DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
136};
137
138/* subset of above _cpuid4_info w/o shared_cpu_map */
139struct _cpuid4_info_regs {
140 union _cpuid4_leaf_eax eax;
141 union _cpuid4_leaf_ebx ebx;
142 union _cpuid4_leaf_ecx ecx;
143 unsigned long size;
144 unsigned long can_disable;
136}; 145};
137 146
138#ifdef CONFIG_PCI 147#ifdef CONFIG_PCI
@@ -263,7 +272,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
263} 272}
264 273
265static void __cpuinit 274static void __cpuinit
266amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) 275amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
267{ 276{
268 if (index < 3) 277 if (index < 3)
269 return; 278 return;
@@ -271,7 +280,8 @@ amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
271} 280}
272 281
273static int 282static int
274__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) 283__cpuinit cpuid4_cache_lookup_regs(int index,
284 struct _cpuid4_info_regs *this_leaf)
275{ 285{
276 union _cpuid4_leaf_eax eax; 286 union _cpuid4_leaf_eax eax;
277 union _cpuid4_leaf_ebx ebx; 287 union _cpuid4_leaf_ebx ebx;
@@ -299,6 +309,15 @@ __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
299 return 0; 309 return 0;
300} 310}
301 311
312static int
313__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
314{
315 struct _cpuid4_info_regs *leaf_regs =
316 (struct _cpuid4_info_regs *)this_leaf;
317
318 return cpuid4_cache_lookup_regs(index, leaf_regs);
319}
320
302static int __cpuinit find_num_cache_leaves(void) 321static int __cpuinit find_num_cache_leaves(void)
303{ 322{
304 unsigned int eax, ebx, ecx, edx; 323 unsigned int eax, ebx, ecx, edx;
@@ -338,11 +357,10 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
338 * parameters cpuid leaf to find the cache details 357 * parameters cpuid leaf to find the cache details
339 */ 358 */
340 for (i = 0; i < num_cache_leaves; i++) { 359 for (i = 0; i < num_cache_leaves; i++) {
341 struct _cpuid4_info this_leaf; 360 struct _cpuid4_info_regs this_leaf;
342
343 int retval; 361 int retval;
344 362
345 retval = cpuid4_cache_lookup(i, &this_leaf); 363 retval = cpuid4_cache_lookup_regs(i, &this_leaf);
346 if (retval >= 0) { 364 if (retval >= 0) {
347 switch(this_leaf.eax.split.level) { 365 switch(this_leaf.eax.split.level) {
348 case 1: 366 case 1:
@@ -491,17 +509,20 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
491 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; 509 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
492 510
493 if (num_threads_sharing == 1) 511 if (num_threads_sharing == 1)
494 cpu_set(cpu, this_leaf->shared_cpu_map); 512 cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
495 else { 513 else {
496 index_msb = get_count_order(num_threads_sharing); 514 index_msb = get_count_order(num_threads_sharing);
497 515
498 for_each_online_cpu(i) { 516 for_each_online_cpu(i) {
499 if (cpu_data(i).apicid >> index_msb == 517 if (cpu_data(i).apicid >> index_msb ==
500 c->apicid >> index_msb) { 518 c->apicid >> index_msb) {
501 cpu_set(i, this_leaf->shared_cpu_map); 519 cpumask_set_cpu(i,
520 to_cpumask(this_leaf->shared_cpu_map));
502 if (i != cpu && per_cpu(cpuid4_info, i)) { 521 if (i != cpu && per_cpu(cpuid4_info, i)) {
503 sibling_leaf = CPUID4_INFO_IDX(i, index); 522 sibling_leaf =
504 cpu_set(cpu, sibling_leaf->shared_cpu_map); 523 CPUID4_INFO_IDX(i, index);
524 cpumask_set_cpu(cpu, to_cpumask(
525 sibling_leaf->shared_cpu_map));
505 } 526 }
506 } 527 }
507 } 528 }
@@ -513,9 +534,10 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
513 int sibling; 534 int sibling;
514 535
515 this_leaf = CPUID4_INFO_IDX(cpu, index); 536 this_leaf = CPUID4_INFO_IDX(cpu, index);
516 for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) { 537 for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
517 sibling_leaf = CPUID4_INFO_IDX(sibling, index); 538 sibling_leaf = CPUID4_INFO_IDX(sibling, index);
518 cpu_clear(cpu, sibling_leaf->shared_cpu_map); 539 cpumask_clear_cpu(cpu,
540 to_cpumask(sibling_leaf->shared_cpu_map));
519 } 541 }
520} 542}
521#else 543#else
@@ -620,8 +642,9 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
620 int n = 0; 642 int n = 0;
621 643
622 if (len > 1) { 644 if (len > 1) {
623 cpumask_t *mask = &this_leaf->shared_cpu_map; 645 const struct cpumask *mask;
624 646
647 mask = to_cpumask(this_leaf->shared_cpu_map);
625 n = type? 648 n = type?
626 cpulist_scnprintf(buf, len-2, mask) : 649 cpulist_scnprintf(buf, len-2, mask) :
627 cpumask_scnprintf(buf, len-2, mask); 650 cpumask_scnprintf(buf, len-2, mask);
@@ -684,7 +707,8 @@ static struct pci_dev *get_k8_northbridge(int node)
684 707
685static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) 708static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
686{ 709{
687 int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); 710 const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
711 int node = cpu_to_node(cpumask_first(mask));
688 struct pci_dev *dev = NULL; 712 struct pci_dev *dev = NULL;
689 ssize_t ret = 0; 713 ssize_t ret = 0;
690 int i; 714 int i;
@@ -718,7 +742,8 @@ static ssize_t
718store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, 742store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
719 size_t count) 743 size_t count)
720{ 744{
721 int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); 745 const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
746 int node = cpu_to_node(cpumask_first(mask));
722 struct pci_dev *dev = NULL; 747 struct pci_dev *dev = NULL;
723 unsigned int ret, index, val; 748 unsigned int ret, index, val;
724 749
@@ -863,7 +888,7 @@ err_out:
863 return -ENOMEM; 888 return -ENOMEM;
864} 889}
865 890
866static cpumask_t cache_dev_map = CPU_MASK_NONE; 891static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
867 892
868/* Add/Remove cache interface for CPU device */ 893/* Add/Remove cache interface for CPU device */
869static int __cpuinit cache_add_dev(struct sys_device * sys_dev) 894static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
@@ -903,7 +928,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
903 } 928 }
904 kobject_uevent(&(this_object->kobj), KOBJ_ADD); 929 kobject_uevent(&(this_object->kobj), KOBJ_ADD);
905 } 930 }
906 cpu_set(cpu, cache_dev_map); 931 cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
907 932
908 kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); 933 kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD);
909 return 0; 934 return 0;
@@ -916,9 +941,9 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
916 941
917 if (per_cpu(cpuid4_info, cpu) == NULL) 942 if (per_cpu(cpuid4_info, cpu) == NULL)
918 return; 943 return;
919 if (!cpu_isset(cpu, cache_dev_map)) 944 if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
920 return; 945 return;
921 cpu_clear(cpu, cache_dev_map); 946 cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
922 947
923 for (i = 0; i < num_cache_leaves; i++) 948 for (i = 0; i < num_cache_leaves; i++)
924 kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); 949 kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 8ae8c4ff094d..4772e91e8246 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -67,7 +67,7 @@ static struct threshold_block threshold_defaults = {
67struct threshold_bank { 67struct threshold_bank {
68 struct kobject *kobj; 68 struct kobject *kobj;
69 struct threshold_block *blocks; 69 struct threshold_block *blocks;
70 cpumask_t cpus; 70 cpumask_var_t cpus;
71}; 71};
72static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); 72static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
73 73
@@ -481,7 +481,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
481 481
482#ifdef CONFIG_SMP 482#ifdef CONFIG_SMP
483 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ 483 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
484 i = first_cpu(per_cpu(cpu_core_map, cpu)); 484 i = cpumask_first(&per_cpu(cpu_core_map, cpu));
485 485
486 /* first core not up yet */ 486 /* first core not up yet */
487 if (cpu_data(i).cpu_core_id) 487 if (cpu_data(i).cpu_core_id)
@@ -501,7 +501,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
501 if (err) 501 if (err)
502 goto out; 502 goto out;
503 503
504 b->cpus = per_cpu(cpu_core_map, cpu); 504 cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu));
505 per_cpu(threshold_banks, cpu)[bank] = b; 505 per_cpu(threshold_banks, cpu)[bank] = b;
506 goto out; 506 goto out;
507 } 507 }
@@ -512,15 +512,20 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
512 err = -ENOMEM; 512 err = -ENOMEM;
513 goto out; 513 goto out;
514 } 514 }
515 if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) {
516 kfree(b);
517 err = -ENOMEM;
518 goto out;
519 }
515 520
516 b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj); 521 b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj);
517 if (!b->kobj) 522 if (!b->kobj)
518 goto out_free; 523 goto out_free;
519 524
520#ifndef CONFIG_SMP 525#ifndef CONFIG_SMP
521 b->cpus = CPU_MASK_ALL; 526 cpumask_setall(b->cpus);
522#else 527#else
523 b->cpus = per_cpu(cpu_core_map, cpu); 528 cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu));
524#endif 529#endif
525 530
526 per_cpu(threshold_banks, cpu)[bank] = b; 531 per_cpu(threshold_banks, cpu)[bank] = b;
@@ -529,7 +534,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
529 if (err) 534 if (err)
530 goto out_free; 535 goto out_free;
531 536
532 for_each_cpu_mask_nr(i, b->cpus) { 537 for_each_cpu(i, b->cpus) {
533 if (i == cpu) 538 if (i == cpu)
534 continue; 539 continue;
535 540
@@ -545,6 +550,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
545 550
546out_free: 551out_free:
547 per_cpu(threshold_banks, cpu)[bank] = NULL; 552 per_cpu(threshold_banks, cpu)[bank] = NULL;
553 free_cpumask_var(b->cpus);
548 kfree(b); 554 kfree(b);
549out: 555out:
550 return err; 556 return err;
@@ -619,7 +625,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
619#endif 625#endif
620 626
621 /* remove all sibling symlinks before unregistering */ 627 /* remove all sibling symlinks before unregistering */
622 for_each_cpu_mask_nr(i, b->cpus) { 628 for_each_cpu(i, b->cpus) {
623 if (i == cpu) 629 if (i == cpu)
624 continue; 630 continue;
625 631
@@ -632,6 +638,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
632free_out: 638free_out:
633 kobject_del(b->kobj); 639 kobject_del(b->kobj);
634 kobject_put(b->kobj); 640 kobject_put(b->kobj);
641 free_cpumask_var(b->cpus);
635 kfree(b); 642 kfree(b);
636 per_cpu(threshold_banks, cpu)[bank] = NULL; 643 per_cpu(threshold_banks, cpu)[bank] = NULL;
637} 644}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index 4b48f251fd39..5e8c79e748a6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -7,6 +7,7 @@
7#include <linux/interrupt.h> 7#include <linux/interrupt.h>
8#include <linux/percpu.h> 8#include <linux/percpu.h>
9#include <asm/processor.h> 9#include <asm/processor.h>
10#include <asm/apic.h>
10#include <asm/msr.h> 11#include <asm/msr.h>
11#include <asm/mce.h> 12#include <asm/mce.h>
12#include <asm/hw_irq.h> 13#include <asm/hw_irq.h>
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index b59ddcc88cd8..0c0a455fe95c 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -33,11 +33,13 @@ u64 mtrr_tom2;
33struct mtrr_state_type mtrr_state = {}; 33struct mtrr_state_type mtrr_state = {};
34EXPORT_SYMBOL_GPL(mtrr_state); 34EXPORT_SYMBOL_GPL(mtrr_state);
35 35
36#undef MODULE_PARAM_PREFIX 36static int __initdata mtrr_show;
37#define MODULE_PARAM_PREFIX "mtrr." 37static int __init mtrr_debug(char *opt)
38 38{
39static int mtrr_show; 39 mtrr_show = 1;
40module_param_named(show, mtrr_show, bool, 0); 40 return 0;
41}
42early_param("mtrr.show", mtrr_debug);
41 43
42/* 44/*
43 * Returns the effective MTRR type for the region 45 * Returns the effective MTRR type for the region
diff --git a/arch/x86/kernel/crash.c b/arch/x86/kernel/crash.c
index c689d19e35ab..11b93cabdf78 100644
--- a/arch/x86/kernel/crash.c
+++ b/arch/x86/kernel/crash.c
@@ -24,7 +24,7 @@
24#include <asm/apic.h> 24#include <asm/apic.h>
25#include <asm/hpet.h> 25#include <asm/hpet.h>
26#include <linux/kdebug.h> 26#include <linux/kdebug.h>
27#include <asm/smp.h> 27#include <asm/cpu.h>
28#include <asm/reboot.h> 28#include <asm/reboot.h>
29#include <asm/virtext.h> 29#include <asm/virtext.h>
30 30
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index c302d0707048..d35db5993fd6 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -106,7 +106,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
106 const struct stacktrace_ops *ops, void *data) 106 const struct stacktrace_ops *ops, void *data)
107{ 107{
108 const unsigned cpu = get_cpu(); 108 const unsigned cpu = get_cpu();
109 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; 109 unsigned long *irq_stack_end =
110 (unsigned long *)per_cpu(irq_stack_ptr, cpu);
110 unsigned used = 0; 111 unsigned used = 0;
111 struct thread_info *tinfo; 112 struct thread_info *tinfo;
112 int graph = 0; 113 int graph = 0;
@@ -160,23 +161,23 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
160 stack = (unsigned long *) estack_end[-2]; 161 stack = (unsigned long *) estack_end[-2];
161 continue; 162 continue;
162 } 163 }
163 if (irqstack_end) { 164 if (irq_stack_end) {
164 unsigned long *irqstack; 165 unsigned long *irq_stack;
165 irqstack = irqstack_end - 166 irq_stack = irq_stack_end -
166 (IRQSTACKSIZE - 64) / sizeof(*irqstack); 167 (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack);
167 168
168 if (stack >= irqstack && stack < irqstack_end) { 169 if (stack >= irq_stack && stack < irq_stack_end) {
169 if (ops->stack(data, "IRQ") < 0) 170 if (ops->stack(data, "IRQ") < 0)
170 break; 171 break;
171 bp = print_context_stack(tinfo, stack, bp, 172 bp = print_context_stack(tinfo, stack, bp,
172 ops, data, irqstack_end, &graph); 173 ops, data, irq_stack_end, &graph);
173 /* 174 /*
174 * We link to the next stack (which would be 175 * We link to the next stack (which would be
175 * the process stack normally) the last 176 * the process stack normally) the last
176 * pointer (index -1 to end) in the IRQ stack: 177 * pointer (index -1 to end) in the IRQ stack:
177 */ 178 */
178 stack = (unsigned long *) (irqstack_end[-1]); 179 stack = (unsigned long *) (irq_stack_end[-1]);
179 irqstack_end = NULL; 180 irq_stack_end = NULL;
180 ops->stack(data, "EOI"); 181 ops->stack(data, "EOI");
181 continue; 182 continue;
182 } 183 }
@@ -199,10 +200,10 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
199 unsigned long *stack; 200 unsigned long *stack;
200 int i; 201 int i;
201 const int cpu = smp_processor_id(); 202 const int cpu = smp_processor_id();
202 unsigned long *irqstack_end = 203 unsigned long *irq_stack_end =
203 (unsigned long *) (cpu_pda(cpu)->irqstackptr); 204 (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
204 unsigned long *irqstack = 205 unsigned long *irq_stack =
205 (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); 206 (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
206 207
207 /* 208 /*
208 * debugging aid: "show_stack(NULL, NULL);" prints the 209 * debugging aid: "show_stack(NULL, NULL);" prints the
@@ -218,9 +219,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
218 219
219 stack = sp; 220 stack = sp;
220 for (i = 0; i < kstack_depth_to_print; i++) { 221 for (i = 0; i < kstack_depth_to_print; i++) {
221 if (stack >= irqstack && stack <= irqstack_end) { 222 if (stack >= irq_stack && stack <= irq_stack_end) {
222 if (stack == irqstack_end) { 223 if (stack == irq_stack_end) {
223 stack = (unsigned long *) (irqstack_end[-1]); 224 stack = (unsigned long *) (irq_stack_end[-1]);
224 printk(" <EOI> "); 225 printk(" <EOI> ");
225 } 226 }
226 } else { 227 } else {
@@ -241,7 +242,7 @@ void show_registers(struct pt_regs *regs)
241 int i; 242 int i;
242 unsigned long sp; 243 unsigned long sp;
243 const int cpu = smp_processor_id(); 244 const int cpu = smp_processor_id();
244 struct task_struct *cur = cpu_pda(cpu)->pcurrent; 245 struct task_struct *cur = current;
245 246
246 sp = regs->sp; 247 sp = regs->sp;
247 printk("CPU %d ", cpu); 248 printk("CPU %d ", cpu);
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 1119d247fe11..b205272ad394 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -366,10 +366,12 @@ void __init efi_init(void)
366 SMBIOS_TABLE_GUID)) { 366 SMBIOS_TABLE_GUID)) {
367 efi.smbios = config_tables[i].table; 367 efi.smbios = config_tables[i].table;
368 printk(" SMBIOS=0x%lx ", config_tables[i].table); 368 printk(" SMBIOS=0x%lx ", config_tables[i].table);
369#ifdef CONFIG_X86_UV
369 } else if (!efi_guidcmp(config_tables[i].guid, 370 } else if (!efi_guidcmp(config_tables[i].guid,
370 UV_SYSTEM_TABLE_GUID)) { 371 UV_SYSTEM_TABLE_GUID)) {
371 efi.uv_systab = config_tables[i].table; 372 efi.uv_systab = config_tables[i].table;
372 printk(" UVsystab=0x%lx ", config_tables[i].table); 373 printk(" UVsystab=0x%lx ", config_tables[i].table);
374#endif
373 } else if (!efi_guidcmp(config_tables[i].guid, 375 } else if (!efi_guidcmp(config_tables[i].guid,
374 HCDP_TABLE_GUID)) { 376 HCDP_TABLE_GUID)) {
375 efi.hcdp = config_tables[i].table; 377 efi.hcdp = config_tables[i].table;
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index 652c5287215f..a4ee29127fdf 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -36,6 +36,7 @@
36#include <asm/proto.h> 36#include <asm/proto.h>
37#include <asm/efi.h> 37#include <asm/efi.h>
38#include <asm/cacheflush.h> 38#include <asm/cacheflush.h>
39#include <asm/fixmap.h>
39 40
40static pgd_t save_pgd __initdata; 41static pgd_t save_pgd __initdata;
41static unsigned long efi_flags __initdata; 42static unsigned long efi_flags __initdata;
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 46469029e9d3..a0b91aac72a1 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -672,7 +672,7 @@ common_interrupt:
672ENDPROC(common_interrupt) 672ENDPROC(common_interrupt)
673 CFI_ENDPROC 673 CFI_ENDPROC
674 674
675#define BUILD_INTERRUPT(name, nr) \ 675#define BUILD_INTERRUPT3(name, nr, fn) \
676ENTRY(name) \ 676ENTRY(name) \
677 RING0_INT_FRAME; \ 677 RING0_INT_FRAME; \
678 pushl $~(nr); \ 678 pushl $~(nr); \
@@ -680,11 +680,13 @@ ENTRY(name) \
680 SAVE_ALL; \ 680 SAVE_ALL; \
681 TRACE_IRQS_OFF \ 681 TRACE_IRQS_OFF \
682 movl %esp,%eax; \ 682 movl %esp,%eax; \
683 call smp_##name; \ 683 call fn; \
684 jmp ret_from_intr; \ 684 jmp ret_from_intr; \
685 CFI_ENDPROC; \ 685 CFI_ENDPROC; \
686ENDPROC(name) 686ENDPROC(name)
687 687
688#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name)
689
688/* The include is where all of the SMP etc. interrupts come from */ 690/* The include is where all of the SMP etc. interrupts come from */
689#include "entry_arch.h" 691#include "entry_arch.h"
690 692
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e28c7a987793..a52703864a16 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -52,6 +52,7 @@
52#include <asm/irqflags.h> 52#include <asm/irqflags.h>
53#include <asm/paravirt.h> 53#include <asm/paravirt.h>
54#include <asm/ftrace.h> 54#include <asm/ftrace.h>
55#include <asm/percpu.h>
55 56
56/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 57/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
57#include <linux/elf-em.h> 58#include <linux/elf-em.h>
@@ -209,7 +210,7 @@ ENTRY(native_usergs_sysret64)
209 210
210 /* %rsp:at FRAMEEND */ 211 /* %rsp:at FRAMEEND */
211 .macro FIXUP_TOP_OF_STACK tmp offset=0 212 .macro FIXUP_TOP_OF_STACK tmp offset=0
212 movq %gs:pda_oldrsp,\tmp 213 movq PER_CPU_VAR(old_rsp),\tmp
213 movq \tmp,RSP+\offset(%rsp) 214 movq \tmp,RSP+\offset(%rsp)
214 movq $__USER_DS,SS+\offset(%rsp) 215 movq $__USER_DS,SS+\offset(%rsp)
215 movq $__USER_CS,CS+\offset(%rsp) 216 movq $__USER_CS,CS+\offset(%rsp)
@@ -220,7 +221,7 @@ ENTRY(native_usergs_sysret64)
220 221
221 .macro RESTORE_TOP_OF_STACK tmp offset=0 222 .macro RESTORE_TOP_OF_STACK tmp offset=0
222 movq RSP+\offset(%rsp),\tmp 223 movq RSP+\offset(%rsp),\tmp
223 movq \tmp,%gs:pda_oldrsp 224 movq \tmp,PER_CPU_VAR(old_rsp)
224 movq EFLAGS+\offset(%rsp),\tmp 225 movq EFLAGS+\offset(%rsp),\tmp
225 movq \tmp,R11+\offset(%rsp) 226 movq \tmp,R11+\offset(%rsp)
226 .endm 227 .endm
@@ -336,15 +337,15 @@ ENTRY(save_args)
336 je 1f 337 je 1f
337 SWAPGS 338 SWAPGS
338 /* 339 /*
339 * irqcount is used to check if a CPU is already on an interrupt stack 340 * irq_count is used to check if a CPU is already on an interrupt stack
340 * or not. While this is essentially redundant with preempt_count it is 341 * or not. While this is essentially redundant with preempt_count it is
341 * a little cheaper to use a separate counter in the PDA (short of 342 * a little cheaper to use a separate counter in the PDA (short of
342 * moving irq_enter into assembly, which would be too much work) 343 * moving irq_enter into assembly, which would be too much work)
343 */ 344 */
3441: incl %gs:pda_irqcount 3451: incl PER_CPU_VAR(irq_count)
345 jne 2f 346 jne 2f
346 popq_cfi %rax /* move return address... */ 347 popq_cfi %rax /* move return address... */
347 mov %gs:pda_irqstackptr,%rsp 348 mov PER_CPU_VAR(irq_stack_ptr),%rsp
348 EMPTY_FRAME 0 349 EMPTY_FRAME 0
349 pushq_cfi %rax /* ... to the new stack */ 350 pushq_cfi %rax /* ... to the new stack */
350 /* 351 /*
@@ -467,7 +468,7 @@ END(ret_from_fork)
467ENTRY(system_call) 468ENTRY(system_call)
468 CFI_STARTPROC simple 469 CFI_STARTPROC simple
469 CFI_SIGNAL_FRAME 470 CFI_SIGNAL_FRAME
470 CFI_DEF_CFA rsp,PDA_STACKOFFSET 471 CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
471 CFI_REGISTER rip,rcx 472 CFI_REGISTER rip,rcx
472 /*CFI_REGISTER rflags,r11*/ 473 /*CFI_REGISTER rflags,r11*/
473 SWAPGS_UNSAFE_STACK 474 SWAPGS_UNSAFE_STACK
@@ -478,8 +479,8 @@ ENTRY(system_call)
478 */ 479 */
479ENTRY(system_call_after_swapgs) 480ENTRY(system_call_after_swapgs)
480 481
481 movq %rsp,%gs:pda_oldrsp 482 movq %rsp,PER_CPU_VAR(old_rsp)
482 movq %gs:pda_kernelstack,%rsp 483 movq PER_CPU_VAR(kernel_stack),%rsp
483 /* 484 /*
484 * No need to follow this irqs off/on section - it's straight 485 * No need to follow this irqs off/on section - it's straight
485 * and short: 486 * and short:
@@ -522,7 +523,7 @@ sysret_check:
522 CFI_REGISTER rip,rcx 523 CFI_REGISTER rip,rcx
523 RESTORE_ARGS 0,-ARG_SKIP,1 524 RESTORE_ARGS 0,-ARG_SKIP,1
524 /*CFI_REGISTER rflags,r11*/ 525 /*CFI_REGISTER rflags,r11*/
525 movq %gs:pda_oldrsp, %rsp 526 movq PER_CPU_VAR(old_rsp), %rsp
526 USERGS_SYSRET64 527 USERGS_SYSRET64
527 528
528 CFI_RESTORE_STATE 529 CFI_RESTORE_STATE
@@ -832,11 +833,11 @@ common_interrupt:
832 XCPT_FRAME 833 XCPT_FRAME
833 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ 834 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
834 interrupt do_IRQ 835 interrupt do_IRQ
835 /* 0(%rsp): oldrsp-ARGOFFSET */ 836 /* 0(%rsp): old_rsp-ARGOFFSET */
836ret_from_intr: 837ret_from_intr:
837 DISABLE_INTERRUPTS(CLBR_NONE) 838 DISABLE_INTERRUPTS(CLBR_NONE)
838 TRACE_IRQS_OFF 839 TRACE_IRQS_OFF
839 decl %gs:pda_irqcount 840 decl PER_CPU_VAR(irq_count)
840 leaveq 841 leaveq
841 CFI_DEF_CFA_REGISTER rsp 842 CFI_DEF_CFA_REGISTER rsp
842 CFI_ADJUST_CFA_OFFSET -8 843 CFI_ADJUST_CFA_OFFSET -8
@@ -981,8 +982,10 @@ apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
981 irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt 982 irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
982#endif 983#endif
983 984
985#ifdef CONFIG_X86_UV
984apicinterrupt UV_BAU_MESSAGE \ 986apicinterrupt UV_BAU_MESSAGE \
985 uv_bau_message_intr1 uv_bau_message_interrupt 987 uv_bau_message_intr1 uv_bau_message_interrupt
988#endif
986apicinterrupt LOCAL_TIMER_VECTOR \ 989apicinterrupt LOCAL_TIMER_VECTOR \
987 apic_timer_interrupt smp_apic_timer_interrupt 990 apic_timer_interrupt smp_apic_timer_interrupt
988 991
@@ -1072,10 +1075,10 @@ ENTRY(\sym)
1072 TRACE_IRQS_OFF 1075 TRACE_IRQS_OFF
1073 movq %rsp,%rdi /* pt_regs pointer */ 1076 movq %rsp,%rdi /* pt_regs pointer */
1074 xorl %esi,%esi /* no error code */ 1077 xorl %esi,%esi /* no error code */
1075 movq %gs:pda_data_offset, %rbp 1078 PER_CPU(init_tss, %rbp)
1076 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) 1079 subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
1077 call \do_sym 1080 call \do_sym
1078 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) 1081 addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
1079 jmp paranoid_exit /* %ebx: no swapgs flag */ 1082 jmp paranoid_exit /* %ebx: no swapgs flag */
1080 CFI_ENDPROC 1083 CFI_ENDPROC
1081END(\sym) 1084END(\sym)
@@ -1259,14 +1262,14 @@ ENTRY(call_softirq)
1259 CFI_REL_OFFSET rbp,0 1262 CFI_REL_OFFSET rbp,0
1260 mov %rsp,%rbp 1263 mov %rsp,%rbp
1261 CFI_DEF_CFA_REGISTER rbp 1264 CFI_DEF_CFA_REGISTER rbp
1262 incl %gs:pda_irqcount 1265 incl PER_CPU_VAR(irq_count)
1263 cmove %gs:pda_irqstackptr,%rsp 1266 cmove PER_CPU_VAR(irq_stack_ptr),%rsp
1264 push %rbp # backlink for old unwinder 1267 push %rbp # backlink for old unwinder
1265 call __do_softirq 1268 call __do_softirq
1266 leaveq 1269 leaveq
1267 CFI_DEF_CFA_REGISTER rsp 1270 CFI_DEF_CFA_REGISTER rsp
1268 CFI_ADJUST_CFA_OFFSET -8 1271 CFI_ADJUST_CFA_OFFSET -8
1269 decl %gs:pda_irqcount 1272 decl PER_CPU_VAR(irq_count)
1270 ret 1273 ret
1271 CFI_ENDPROC 1274 CFI_ENDPROC
1272END(call_softirq) 1275END(call_softirq)
@@ -1296,15 +1299,15 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1296 movq %rdi, %rsp # we don't return, adjust the stack frame 1299 movq %rdi, %rsp # we don't return, adjust the stack frame
1297 CFI_ENDPROC 1300 CFI_ENDPROC
1298 DEFAULT_FRAME 1301 DEFAULT_FRAME
129911: incl %gs:pda_irqcount 130211: incl PER_CPU_VAR(irq_count)
1300 movq %rsp,%rbp 1303 movq %rsp,%rbp
1301 CFI_DEF_CFA_REGISTER rbp 1304 CFI_DEF_CFA_REGISTER rbp
1302 cmovzq %gs:pda_irqstackptr,%rsp 1305 cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
1303 pushq %rbp # backlink for old unwinder 1306 pushq %rbp # backlink for old unwinder
1304 call xen_evtchn_do_upcall 1307 call xen_evtchn_do_upcall
1305 popq %rsp 1308 popq %rsp
1306 CFI_DEF_CFA_REGISTER rsp 1309 CFI_DEF_CFA_REGISTER rsp
1307 decl %gs:pda_irqcount 1310 decl PER_CPU_VAR(irq_count)
1308 jmp error_exit 1311 jmp error_exit
1309 CFI_ENDPROC 1312 CFI_ENDPROC
1310END(do_hypervisor_callback) 1313END(do_hypervisor_callback)
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 2bced78b0b8e..e656c2721154 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -32,7 +32,9 @@ extern struct genapic apic_x2apic_cluster;
32struct genapic __read_mostly *genapic = &apic_flat; 32struct genapic __read_mostly *genapic = &apic_flat;
33 33
34static struct genapic *apic_probe[] __initdata = { 34static struct genapic *apic_probe[] __initdata = {
35#ifdef CONFIG_X86_UV
35 &apic_x2apic_uv_x, 36 &apic_x2apic_uv_x,
37#endif
36 &apic_x2apic_phys, 38 &apic_x2apic_phys,
37 &apic_x2apic_cluster, 39 &apic_x2apic_cluster,
38 &apic_physflat, 40 &apic_physflat,
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index b193e082f6ce..bfe36249145c 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -25,6 +25,7 @@
25#include <asm/ipi.h> 25#include <asm/ipi.h>
26#include <asm/genapic.h> 26#include <asm/genapic.h>
27#include <asm/pgtable.h> 27#include <asm/pgtable.h>
28#include <asm/uv/uv.h>
28#include <asm/uv/uv_mmrs.h> 29#include <asm/uv/uv_mmrs.h>
29#include <asm/uv/uv_hub.h> 30#include <asm/uv/uv_hub.h>
30#include <asm/uv/bios.h> 31#include <asm/uv/bios.h>
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index b9a4d8c4b935..f5b272247690 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -26,27 +26,6 @@
26#include <asm/bios_ebda.h> 26#include <asm/bios_ebda.h>
27#include <asm/trampoline.h> 27#include <asm/trampoline.h>
28 28
29/* boot cpu pda */
30static struct x8664_pda _boot_cpu_pda;
31
32#ifdef CONFIG_SMP
33/*
34 * We install an empty cpu_pda pointer table to indicate to early users
35 * (numa_set_node) that the cpu_pda pointer table for cpus other than
36 * the boot cpu is not yet setup.
37 */
38static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata;
39#else
40static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
41#endif
42
43void __init x86_64_init_pda(void)
44{
45 _cpu_pda = __cpu_pda;
46 cpu_pda(0) = &_boot_cpu_pda;
47 pda_init(0);
48}
49
50static void __init zap_identity_mappings(void) 29static void __init zap_identity_mappings(void)
51{ 30{
52 pgd_t *pgd = pgd_offset_k(0UL); 31 pgd_t *pgd = pgd_offset_k(0UL);
@@ -112,8 +91,6 @@ void __init x86_64_start_kernel(char * real_mode_data)
112 if (console_loglevel == 10) 91 if (console_loglevel == 10)
113 early_printk("Kernel alive\n"); 92 early_printk("Kernel alive\n");
114 93
115 x86_64_init_pda();
116
117 x86_64_start_reservations(real_mode_data); 94 x86_64_start_reservations(real_mode_data);
118} 95}
119 96
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index e835b4eea70b..24c0e5cd71e3 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -429,12 +429,14 @@ is386: movl $2,%ecx # set MP
429 ljmp $(__KERNEL_CS),$1f 429 ljmp $(__KERNEL_CS),$1f
4301: movl $(__KERNEL_DS),%eax # reload all the segment registers 4301: movl $(__KERNEL_DS),%eax # reload all the segment registers
431 movl %eax,%ss # after changing gdt. 431 movl %eax,%ss # after changing gdt.
432 movl %eax,%fs # gets reset once there's real percpu
433 432
434 movl $(__USER_DS),%eax # DS/ES contains default USER segment 433 movl $(__USER_DS),%eax # DS/ES contains default USER segment
435 movl %eax,%ds 434 movl %eax,%ds
436 movl %eax,%es 435 movl %eax,%es
437 436
437 movl $(__KERNEL_PERCPU), %eax
438 movl %eax,%fs # set this cpu's percpu
439
438 xorl %eax,%eax # Clear GS and LDT 440 xorl %eax,%eax # Clear GS and LDT
439 movl %eax,%gs 441 movl %eax,%gs
440 lldt %ax 442 lldt %ax
@@ -446,8 +448,6 @@ is386: movl $2,%ecx # set MP
446 movb $1, ready 448 movb $1, ready
447 cmpb $0,%cl # the first CPU calls start_kernel 449 cmpb $0,%cl # the first CPU calls start_kernel
448 je 1f 450 je 1f
449 movl $(__KERNEL_PERCPU), %eax
450 movl %eax,%fs # set this cpu's percpu
451 movl (stack_start), %esp 451 movl (stack_start), %esp
4521: 4521:
453#endif /* CONFIG_SMP */ 453#endif /* CONFIG_SMP */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 0e275d495563..a0a2b5ca9b7d 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -19,6 +19,7 @@
19#include <asm/msr.h> 19#include <asm/msr.h>
20#include <asm/cache.h> 20#include <asm/cache.h>
21#include <asm/processor-flags.h> 21#include <asm/processor-flags.h>
22#include <asm/percpu.h>
22 23
23#ifdef CONFIG_PARAVIRT 24#ifdef CONFIG_PARAVIRT
24#include <asm/asm-offsets.h> 25#include <asm/asm-offsets.h>
@@ -204,6 +205,19 @@ ENTRY(secondary_startup_64)
204 pushq $0 205 pushq $0
205 popfq 206 popfq
206 207
208#ifdef CONFIG_SMP
209 /*
210 * Fix up static pointers that need __per_cpu_load added. The assembler
211 * is unable to do this directly. This is only needed for the boot cpu.
212 * These values are set up with the correct base addresses by C code for
213 * secondary cpus.
214 */
215 movq initial_gs(%rip), %rax
216 cmpl $0, per_cpu__cpu_number(%rax)
217 jne 1f
218 addq %rax, early_gdt_descr_base(%rip)
2191:
220#endif
207 /* 221 /*
208 * We must switch to a new descriptor in kernel space for the GDT 222 * We must switch to a new descriptor in kernel space for the GDT
209 * because soon the kernel won't have access anymore to the userspace 223 * because soon the kernel won't have access anymore to the userspace
@@ -226,12 +240,15 @@ ENTRY(secondary_startup_64)
226 movl %eax,%fs 240 movl %eax,%fs
227 movl %eax,%gs 241 movl %eax,%gs
228 242
229 /* 243 /* Set up %gs.
230 * Setup up a dummy PDA. this is just for some early bootup code 244 *
231 * that does in_interrupt() 245 * The base of %gs always points to the bottom of the irqstack
232 */ 246 * union. If the stack protector canary is enabled, it is
247 * located at %gs:40. Note that, on SMP, the boot cpu uses
248 * init data section till per cpu areas are set up.
249 */
233 movl $MSR_GS_BASE,%ecx 250 movl $MSR_GS_BASE,%ecx
234 movq $empty_zero_page,%rax 251 movq initial_gs(%rip),%rax
235 movq %rax,%rdx 252 movq %rax,%rdx
236 shrq $32,%rdx 253 shrq $32,%rdx
237 wrmsr 254 wrmsr
@@ -257,6 +274,12 @@ ENTRY(secondary_startup_64)
257 .align 8 274 .align 8
258 ENTRY(initial_code) 275 ENTRY(initial_code)
259 .quad x86_64_start_kernel 276 .quad x86_64_start_kernel
277 ENTRY(initial_gs)
278#ifdef CONFIG_SMP
279 .quad __per_cpu_load
280#else
281 .quad PER_CPU_VAR(irq_stack_union)
282#endif
260 __FINITDATA 283 __FINITDATA
261 284
262 ENTRY(stack_start) 285 ENTRY(stack_start)
@@ -401,7 +424,8 @@ NEXT_PAGE(level2_spare_pgt)
401 .globl early_gdt_descr 424 .globl early_gdt_descr
402early_gdt_descr: 425early_gdt_descr:
403 .word GDT_ENTRIES*8-1 426 .word GDT_ENTRIES*8-1
404 .quad per_cpu__gdt_page 427early_gdt_descr_base:
428 .quad per_cpu__gdt_page
405 429
406ENTRY(phys_base) 430ENTRY(phys_base)
407 /* This must match the first entry in level2_kernel_pgt */ 431 /* This must match the first entry in level2_kernel_pgt */
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index cd759ad90690..64d5ad0b8add 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -628,11 +628,12 @@ static int hpet_cpuhp_notify(struct notifier_block *n,
628 628
629 switch (action & 0xf) { 629 switch (action & 0xf) {
630 case CPU_ONLINE: 630 case CPU_ONLINE:
631 INIT_DELAYED_WORK(&work.work, hpet_work); 631 INIT_DELAYED_WORK_ON_STACK(&work.work, hpet_work);
632 init_completion(&work.complete); 632 init_completion(&work.complete);
633 /* FIXME: add schedule_work_on() */ 633 /* FIXME: add schedule_work_on() */
634 schedule_delayed_work_on(cpu, &work.work, 0); 634 schedule_delayed_work_on(cpu, &work.work, 0);
635 wait_for_completion(&work.complete); 635 wait_for_completion(&work.complete);
636 destroy_timer_on_stack(&work.work.timer);
636 break; 637 break;
637 case CPU_DEAD: 638 case CPU_DEAD:
638 if (hdev) { 639 if (hdev) {
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 1c4a1302536c..e4d36bd56b62 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -46,6 +46,7 @@
46#include <asm/idle.h> 46#include <asm/idle.h>
47#include <asm/io.h> 47#include <asm/io.h>
48#include <asm/smp.h> 48#include <asm/smp.h>
49#include <asm/cpu.h>
49#include <asm/desc.h> 50#include <asm/desc.h>
50#include <asm/proto.h> 51#include <asm/proto.h>
51#include <asm/acpi.h> 52#include <asm/acpi.h>
@@ -82,11 +83,11 @@ static DEFINE_SPINLOCK(vector_lock);
82int nr_ioapic_registers[MAX_IO_APICS]; 83int nr_ioapic_registers[MAX_IO_APICS];
83 84
84/* I/O APIC entries */ 85/* I/O APIC entries */
85struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; 86struct mpc_ioapic mp_ioapics[MAX_IO_APICS];
86int nr_ioapics; 87int nr_ioapics;
87 88
88/* MP IRQ source entries */ 89/* MP IRQ source entries */
89struct mp_config_intsrc mp_irqs[MAX_IRQ_SOURCES]; 90struct mpc_intsrc mp_irqs[MAX_IRQ_SOURCES];
90 91
91/* # of MP IRQ source entries */ 92/* # of MP IRQ source entries */
92int mp_irq_entries; 93int mp_irq_entries;
@@ -356,7 +357,7 @@ set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
356 357
357 if (!cfg->move_in_progress) { 358 if (!cfg->move_in_progress) {
358 /* it means that domain is not changed */ 359 /* it means that domain is not changed */
359 if (!cpumask_intersects(&desc->affinity, mask)) 360 if (!cpumask_intersects(desc->affinity, mask))
360 cfg->move_desc_pending = 1; 361 cfg->move_desc_pending = 1;
361 } 362 }
362} 363}
@@ -386,7 +387,7 @@ struct io_apic {
386static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx) 387static __attribute_const__ struct io_apic __iomem *io_apic_base(int idx)
387{ 388{
388 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx) 389 return (void __iomem *) __fix_to_virt(FIX_IO_APIC_BASE_0 + idx)
389 + (mp_ioapics[idx].mp_apicaddr & ~PAGE_MASK); 390 + (mp_ioapics[idx].apicaddr & ~PAGE_MASK);
390} 391}
391 392
392static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg) 393static inline unsigned int io_apic_read(unsigned int apic, unsigned int reg)
@@ -579,9 +580,9 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
579 if (assign_irq_vector(irq, cfg, mask)) 580 if (assign_irq_vector(irq, cfg, mask))
580 return BAD_APICID; 581 return BAD_APICID;
581 582
582 cpumask_and(&desc->affinity, cfg->domain, mask); 583 cpumask_and(desc->affinity, cfg->domain, mask);
583 set_extra_move_desc(desc, mask); 584 set_extra_move_desc(desc, mask);
584 return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask); 585 return cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask);
585} 586}
586 587
587static void 588static void
@@ -944,10 +945,10 @@ static int find_irq_entry(int apic, int pin, int type)
944 int i; 945 int i;
945 946
946 for (i = 0; i < mp_irq_entries; i++) 947 for (i = 0; i < mp_irq_entries; i++)
947 if (mp_irqs[i].mp_irqtype == type && 948 if (mp_irqs[i].irqtype == type &&
948 (mp_irqs[i].mp_dstapic == mp_ioapics[apic].mp_apicid || 949 (mp_irqs[i].dstapic == mp_ioapics[apic].apicid ||
949 mp_irqs[i].mp_dstapic == MP_APIC_ALL) && 950 mp_irqs[i].dstapic == MP_APIC_ALL) &&
950 mp_irqs[i].mp_dstirq == pin) 951 mp_irqs[i].dstirq == pin)
951 return i; 952 return i;
952 953
953 return -1; 954 return -1;
@@ -961,13 +962,13 @@ static int __init find_isa_irq_pin(int irq, int type)
961 int i; 962 int i;
962 963
963 for (i = 0; i < mp_irq_entries; i++) { 964 for (i = 0; i < mp_irq_entries; i++) {
964 int lbus = mp_irqs[i].mp_srcbus; 965 int lbus = mp_irqs[i].srcbus;
965 966
966 if (test_bit(lbus, mp_bus_not_pci) && 967 if (test_bit(lbus, mp_bus_not_pci) &&
967 (mp_irqs[i].mp_irqtype == type) && 968 (mp_irqs[i].irqtype == type) &&
968 (mp_irqs[i].mp_srcbusirq == irq)) 969 (mp_irqs[i].srcbusirq == irq))
969 970
970 return mp_irqs[i].mp_dstirq; 971 return mp_irqs[i].dstirq;
971 } 972 }
972 return -1; 973 return -1;
973} 974}
@@ -977,17 +978,17 @@ static int __init find_isa_irq_apic(int irq, int type)
977 int i; 978 int i;
978 979
979 for (i = 0; i < mp_irq_entries; i++) { 980 for (i = 0; i < mp_irq_entries; i++) {
980 int lbus = mp_irqs[i].mp_srcbus; 981 int lbus = mp_irqs[i].srcbus;
981 982
982 if (test_bit(lbus, mp_bus_not_pci) && 983 if (test_bit(lbus, mp_bus_not_pci) &&
983 (mp_irqs[i].mp_irqtype == type) && 984 (mp_irqs[i].irqtype == type) &&
984 (mp_irqs[i].mp_srcbusirq == irq)) 985 (mp_irqs[i].srcbusirq == irq))
985 break; 986 break;
986 } 987 }
987 if (i < mp_irq_entries) { 988 if (i < mp_irq_entries) {
988 int apic; 989 int apic;
989 for(apic = 0; apic < nr_ioapics; apic++) { 990 for(apic = 0; apic < nr_ioapics; apic++) {
990 if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic) 991 if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic)
991 return apic; 992 return apic;
992 } 993 }
993 } 994 }
@@ -1012,23 +1013,23 @@ int IO_APIC_get_PCI_irq_vector(int bus, int slot, int pin)
1012 return -1; 1013 return -1;
1013 } 1014 }
1014 for (i = 0; i < mp_irq_entries; i++) { 1015 for (i = 0; i < mp_irq_entries; i++) {
1015 int lbus = mp_irqs[i].mp_srcbus; 1016 int lbus = mp_irqs[i].srcbus;
1016 1017
1017 for (apic = 0; apic < nr_ioapics; apic++) 1018 for (apic = 0; apic < nr_ioapics; apic++)
1018 if (mp_ioapics[apic].mp_apicid == mp_irqs[i].mp_dstapic || 1019 if (mp_ioapics[apic].apicid == mp_irqs[i].dstapic ||
1019 mp_irqs[i].mp_dstapic == MP_APIC_ALL) 1020 mp_irqs[i].dstapic == MP_APIC_ALL)
1020 break; 1021 break;
1021 1022
1022 if (!test_bit(lbus, mp_bus_not_pci) && 1023 if (!test_bit(lbus, mp_bus_not_pci) &&
1023 !mp_irqs[i].mp_irqtype && 1024 !mp_irqs[i].irqtype &&
1024 (bus == lbus) && 1025 (bus == lbus) &&
1025 (slot == ((mp_irqs[i].mp_srcbusirq >> 2) & 0x1f))) { 1026 (slot == ((mp_irqs[i].srcbusirq >> 2) & 0x1f))) {
1026 int irq = pin_2_irq(i,apic,mp_irqs[i].mp_dstirq); 1027 int irq = pin_2_irq(i, apic, mp_irqs[i].dstirq);
1027 1028
1028 if (!(apic || IO_APIC_IRQ(irq))) 1029 if (!(apic || IO_APIC_IRQ(irq)))
1029 continue; 1030 continue;
1030 1031
1031 if (pin == (mp_irqs[i].mp_srcbusirq & 3)) 1032 if (pin == (mp_irqs[i].srcbusirq & 3))
1032 return irq; 1033 return irq;
1033 /* 1034 /*
1034 * Use the first all-but-pin matching entry as a 1035 * Use the first all-but-pin matching entry as a
@@ -1071,7 +1072,7 @@ static int EISA_ELCR(unsigned int irq)
1071 * EISA conforming in the MP table, that means its trigger type must 1072 * EISA conforming in the MP table, that means its trigger type must
1072 * be read in from the ELCR */ 1073 * be read in from the ELCR */
1073 1074
1074#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].mp_srcbusirq)) 1075#define default_EISA_trigger(idx) (EISA_ELCR(mp_irqs[idx].srcbusirq))
1075#define default_EISA_polarity(idx) default_ISA_polarity(idx) 1076#define default_EISA_polarity(idx) default_ISA_polarity(idx)
1076 1077
1077/* PCI interrupts are always polarity one level triggered, 1078/* PCI interrupts are always polarity one level triggered,
@@ -1088,13 +1089,13 @@ static int EISA_ELCR(unsigned int irq)
1088 1089
1089static int MPBIOS_polarity(int idx) 1090static int MPBIOS_polarity(int idx)
1090{ 1091{
1091 int bus = mp_irqs[idx].mp_srcbus; 1092 int bus = mp_irqs[idx].srcbus;
1092 int polarity; 1093 int polarity;
1093 1094
1094 /* 1095 /*
1095 * Determine IRQ line polarity (high active or low active): 1096 * Determine IRQ line polarity (high active or low active):
1096 */ 1097 */
1097 switch (mp_irqs[idx].mp_irqflag & 3) 1098 switch (mp_irqs[idx].irqflag & 3)
1098 { 1099 {
1099 case 0: /* conforms, ie. bus-type dependent polarity */ 1100 case 0: /* conforms, ie. bus-type dependent polarity */
1100 if (test_bit(bus, mp_bus_not_pci)) 1101 if (test_bit(bus, mp_bus_not_pci))
@@ -1130,13 +1131,13 @@ static int MPBIOS_polarity(int idx)
1130 1131
1131static int MPBIOS_trigger(int idx) 1132static int MPBIOS_trigger(int idx)
1132{ 1133{
1133 int bus = mp_irqs[idx].mp_srcbus; 1134 int bus = mp_irqs[idx].srcbus;
1134 int trigger; 1135 int trigger;
1135 1136
1136 /* 1137 /*
1137 * Determine IRQ trigger mode (edge or level sensitive): 1138 * Determine IRQ trigger mode (edge or level sensitive):
1138 */ 1139 */
1139 switch ((mp_irqs[idx].mp_irqflag>>2) & 3) 1140 switch ((mp_irqs[idx].irqflag>>2) & 3)
1140 { 1141 {
1141 case 0: /* conforms, ie. bus-type dependent */ 1142 case 0: /* conforms, ie. bus-type dependent */
1142 if (test_bit(bus, mp_bus_not_pci)) 1143 if (test_bit(bus, mp_bus_not_pci))
@@ -1214,16 +1215,16 @@ int (*ioapic_renumber_irq)(int ioapic, int irq);
1214static int pin_2_irq(int idx, int apic, int pin) 1215static int pin_2_irq(int idx, int apic, int pin)
1215{ 1216{
1216 int irq, i; 1217 int irq, i;
1217 int bus = mp_irqs[idx].mp_srcbus; 1218 int bus = mp_irqs[idx].srcbus;
1218 1219
1219 /* 1220 /*
1220 * Debugging check, we are in big trouble if this message pops up! 1221 * Debugging check, we are in big trouble if this message pops up!
1221 */ 1222 */
1222 if (mp_irqs[idx].mp_dstirq != pin) 1223 if (mp_irqs[idx].dstirq != pin)
1223 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n"); 1224 printk(KERN_ERR "broken BIOS or MPTABLE parser, ayiee!!\n");
1224 1225
1225 if (test_bit(bus, mp_bus_not_pci)) { 1226 if (test_bit(bus, mp_bus_not_pci)) {
1226 irq = mp_irqs[idx].mp_srcbusirq; 1227 irq = mp_irqs[idx].srcbusirq;
1227 } else { 1228 } else {
1228 /* 1229 /*
1229 * PCI IRQs are mapped in order 1230 * PCI IRQs are mapped in order
@@ -1566,14 +1567,14 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, struct irq_de
1566 apic_printk(APIC_VERBOSE,KERN_DEBUG 1567 apic_printk(APIC_VERBOSE,KERN_DEBUG
1567 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> " 1568 "IOAPIC[%d]: Set routing entry (%d-%d -> 0x%x -> "
1568 "IRQ %d Mode:%i Active:%i)\n", 1569 "IRQ %d Mode:%i Active:%i)\n",
1569 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, 1570 apic, mp_ioapics[apic].apicid, pin, cfg->vector,
1570 irq, trigger, polarity); 1571 irq, trigger, polarity);
1571 1572
1572 1573
1573 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry, 1574 if (setup_ioapic_entry(mp_ioapics[apic].apicid, irq, &entry,
1574 dest, trigger, polarity, cfg->vector)) { 1575 dest, trigger, polarity, cfg->vector)) {
1575 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n", 1576 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
1576 mp_ioapics[apic].mp_apicid, pin); 1577 mp_ioapics[apic].apicid, pin);
1577 __clear_irq_vector(irq, cfg); 1578 __clear_irq_vector(irq, cfg);
1578 return; 1579 return;
1579 } 1580 }
@@ -1604,12 +1605,10 @@ static void __init setup_IO_APIC_irqs(void)
1604 notcon = 1; 1605 notcon = 1;
1605 apic_printk(APIC_VERBOSE, 1606 apic_printk(APIC_VERBOSE,
1606 KERN_DEBUG " %d-%d", 1607 KERN_DEBUG " %d-%d",
1607 mp_ioapics[apic].mp_apicid, 1608 mp_ioapics[apic].apicid, pin);
1608 pin);
1609 } else 1609 } else
1610 apic_printk(APIC_VERBOSE, " %d-%d", 1610 apic_printk(APIC_VERBOSE, " %d-%d",
1611 mp_ioapics[apic].mp_apicid, 1611 mp_ioapics[apic].apicid, pin);
1612 pin);
1613 continue; 1612 continue;
1614 } 1613 }
1615 if (notcon) { 1614 if (notcon) {
@@ -1699,7 +1698,7 @@ __apicdebuginit(void) print_IO_APIC(void)
1699 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries); 1698 printk(KERN_DEBUG "number of MP IRQ sources: %d.\n", mp_irq_entries);
1700 for (i = 0; i < nr_ioapics; i++) 1699 for (i = 0; i < nr_ioapics; i++)
1701 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n", 1700 printk(KERN_DEBUG "number of IO-APIC #%d registers: %d.\n",
1702 mp_ioapics[i].mp_apicid, nr_ioapic_registers[i]); 1701 mp_ioapics[i].apicid, nr_ioapic_registers[i]);
1703 1702
1704 /* 1703 /*
1705 * We are a bit conservative about what we expect. We have to 1704 * We are a bit conservative about what we expect. We have to
@@ -1719,7 +1718,7 @@ __apicdebuginit(void) print_IO_APIC(void)
1719 spin_unlock_irqrestore(&ioapic_lock, flags); 1718 spin_unlock_irqrestore(&ioapic_lock, flags);
1720 1719
1721 printk("\n"); 1720 printk("\n");
1722 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].mp_apicid); 1721 printk(KERN_DEBUG "IO APIC #%d......\n", mp_ioapics[apic].apicid);
1723 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw); 1722 printk(KERN_DEBUG ".... register #00: %08X\n", reg_00.raw);
1724 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID); 1723 printk(KERN_DEBUG "....... : physical APIC id: %02X\n", reg_00.bits.ID);
1725 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type); 1724 printk(KERN_DEBUG "....... : Delivery Type: %X\n", reg_00.bits.delivery_type);
@@ -2121,14 +2120,14 @@ static void __init setup_ioapic_ids_from_mpc(void)
2121 reg_00.raw = io_apic_read(apic, 0); 2120 reg_00.raw = io_apic_read(apic, 0);
2122 spin_unlock_irqrestore(&ioapic_lock, flags); 2121 spin_unlock_irqrestore(&ioapic_lock, flags);
2123 2122
2124 old_id = mp_ioapics[apic].mp_apicid; 2123 old_id = mp_ioapics[apic].apicid;
2125 2124
2126 if (mp_ioapics[apic].mp_apicid >= get_physical_broadcast()) { 2125 if (mp_ioapics[apic].apicid >= get_physical_broadcast()) {
2127 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n", 2126 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID is %d in the MPC table!...\n",
2128 apic, mp_ioapics[apic].mp_apicid); 2127 apic, mp_ioapics[apic].apicid);
2129 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", 2128 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
2130 reg_00.bits.ID); 2129 reg_00.bits.ID);
2131 mp_ioapics[apic].mp_apicid = reg_00.bits.ID; 2130 mp_ioapics[apic].apicid = reg_00.bits.ID;
2132 } 2131 }
2133 2132
2134 /* 2133 /*
@@ -2137,9 +2136,9 @@ static void __init setup_ioapic_ids_from_mpc(void)
2137 * 'stuck on smp_invalidate_needed IPI wait' messages. 2136 * 'stuck on smp_invalidate_needed IPI wait' messages.
2138 */ 2137 */
2139 if (check_apicid_used(phys_id_present_map, 2138 if (check_apicid_used(phys_id_present_map,
2140 mp_ioapics[apic].mp_apicid)) { 2139 mp_ioapics[apic].apicid)) {
2141 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n", 2140 printk(KERN_ERR "BIOS bug, IO-APIC#%d ID %d is already used!...\n",
2142 apic, mp_ioapics[apic].mp_apicid); 2141 apic, mp_ioapics[apic].apicid);
2143 for (i = 0; i < get_physical_broadcast(); i++) 2142 for (i = 0; i < get_physical_broadcast(); i++)
2144 if (!physid_isset(i, phys_id_present_map)) 2143 if (!physid_isset(i, phys_id_present_map))
2145 break; 2144 break;
@@ -2148,13 +2147,13 @@ static void __init setup_ioapic_ids_from_mpc(void)
2148 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n", 2147 printk(KERN_ERR "... fixing up to %d. (tell your hw vendor)\n",
2149 i); 2148 i);
2150 physid_set(i, phys_id_present_map); 2149 physid_set(i, phys_id_present_map);
2151 mp_ioapics[apic].mp_apicid = i; 2150 mp_ioapics[apic].apicid = i;
2152 } else { 2151 } else {
2153 physid_mask_t tmp; 2152 physid_mask_t tmp;
2154 tmp = apicid_to_cpu_present(mp_ioapics[apic].mp_apicid); 2153 tmp = apicid_to_cpu_present(mp_ioapics[apic].apicid);
2155 apic_printk(APIC_VERBOSE, "Setting %d in the " 2154 apic_printk(APIC_VERBOSE, "Setting %d in the "
2156 "phys_id_present_map\n", 2155 "phys_id_present_map\n",
2157 mp_ioapics[apic].mp_apicid); 2156 mp_ioapics[apic].apicid);
2158 physids_or(phys_id_present_map, phys_id_present_map, tmp); 2157 physids_or(phys_id_present_map, phys_id_present_map, tmp);
2159 } 2158 }
2160 2159
@@ -2163,11 +2162,11 @@ static void __init setup_ioapic_ids_from_mpc(void)
2163 * We need to adjust the IRQ routing table 2162 * We need to adjust the IRQ routing table
2164 * if the ID changed. 2163 * if the ID changed.
2165 */ 2164 */
2166 if (old_id != mp_ioapics[apic].mp_apicid) 2165 if (old_id != mp_ioapics[apic].apicid)
2167 for (i = 0; i < mp_irq_entries; i++) 2166 for (i = 0; i < mp_irq_entries; i++)
2168 if (mp_irqs[i].mp_dstapic == old_id) 2167 if (mp_irqs[i].dstapic == old_id)
2169 mp_irqs[i].mp_dstapic 2168 mp_irqs[i].dstapic
2170 = mp_ioapics[apic].mp_apicid; 2169 = mp_ioapics[apic].apicid;
2171 2170
2172 /* 2171 /*
2173 * Read the right value from the MPC table and 2172 * Read the right value from the MPC table and
@@ -2175,9 +2174,9 @@ static void __init setup_ioapic_ids_from_mpc(void)
2175 */ 2174 */
2176 apic_printk(APIC_VERBOSE, KERN_INFO 2175 apic_printk(APIC_VERBOSE, KERN_INFO
2177 "...changing IO-APIC physical APIC ID to %d ...", 2176 "...changing IO-APIC physical APIC ID to %d ...",
2178 mp_ioapics[apic].mp_apicid); 2177 mp_ioapics[apic].apicid);
2179 2178
2180 reg_00.bits.ID = mp_ioapics[apic].mp_apicid; 2179 reg_00.bits.ID = mp_ioapics[apic].apicid;
2181 spin_lock_irqsave(&ioapic_lock, flags); 2180 spin_lock_irqsave(&ioapic_lock, flags);
2182 io_apic_write(apic, 0, reg_00.raw); 2181 io_apic_write(apic, 0, reg_00.raw);
2183 spin_unlock_irqrestore(&ioapic_lock, flags); 2182 spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -2188,7 +2187,7 @@ static void __init setup_ioapic_ids_from_mpc(void)
2188 spin_lock_irqsave(&ioapic_lock, flags); 2187 spin_lock_irqsave(&ioapic_lock, flags);
2189 reg_00.raw = io_apic_read(apic, 0); 2188 reg_00.raw = io_apic_read(apic, 0);
2190 spin_unlock_irqrestore(&ioapic_lock, flags); 2189 spin_unlock_irqrestore(&ioapic_lock, flags);
2191 if (reg_00.bits.ID != mp_ioapics[apic].mp_apicid) 2190 if (reg_00.bits.ID != mp_ioapics[apic].apicid)
2192 printk("could not set ID!\n"); 2191 printk("could not set ID!\n");
2193 else 2192 else
2194 apic_printk(APIC_VERBOSE, " ok.\n"); 2193 apic_printk(APIC_VERBOSE, " ok.\n");
@@ -2383,7 +2382,7 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2383 if (cfg->move_in_progress) 2382 if (cfg->move_in_progress)
2384 send_cleanup_vector(cfg); 2383 send_cleanup_vector(cfg);
2385 2384
2386 cpumask_copy(&desc->affinity, mask); 2385 cpumask_copy(desc->affinity, mask);
2387} 2386}
2388 2387
2389static int migrate_irq_remapped_level_desc(struct irq_desc *desc) 2388static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
@@ -2405,11 +2404,11 @@ static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
2405 } 2404 }
2406 2405
2407 /* everthing is clear. we have right of way */ 2406 /* everthing is clear. we have right of way */
2408 migrate_ioapic_irq_desc(desc, &desc->pending_mask); 2407 migrate_ioapic_irq_desc(desc, desc->pending_mask);
2409 2408
2410 ret = 0; 2409 ret = 0;
2411 desc->status &= ~IRQ_MOVE_PENDING; 2410 desc->status &= ~IRQ_MOVE_PENDING;
2412 cpumask_clear(&desc->pending_mask); 2411 cpumask_clear(desc->pending_mask);
2413 2412
2414unmask: 2413unmask:
2415 unmask_IO_APIC_irq_desc(desc); 2414 unmask_IO_APIC_irq_desc(desc);
@@ -2434,7 +2433,7 @@ static void ir_irq_migration(struct work_struct *work)
2434 continue; 2433 continue;
2435 } 2434 }
2436 2435
2437 desc->chip->set_affinity(irq, &desc->pending_mask); 2436 desc->chip->set_affinity(irq, desc->pending_mask);
2438 spin_unlock_irqrestore(&desc->lock, flags); 2437 spin_unlock_irqrestore(&desc->lock, flags);
2439 } 2438 }
2440 } 2439 }
@@ -2448,7 +2447,7 @@ static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
2448{ 2447{
2449 if (desc->status & IRQ_LEVEL) { 2448 if (desc->status & IRQ_LEVEL) {
2450 desc->status |= IRQ_MOVE_PENDING; 2449 desc->status |= IRQ_MOVE_PENDING;
2451 cpumask_copy(&desc->pending_mask, mask); 2450 cpumask_copy(desc->pending_mask, mask);
2452 migrate_irq_remapped_level_desc(desc); 2451 migrate_irq_remapped_level_desc(desc);
2453 return; 2452 return;
2454 } 2453 }
@@ -2516,7 +2515,7 @@ static void irq_complete_move(struct irq_desc **descp)
2516 2515
2517 /* domain has not changed, but affinity did */ 2516 /* domain has not changed, but affinity did */
2518 me = smp_processor_id(); 2517 me = smp_processor_id();
2519 if (cpu_isset(me, desc->affinity)) { 2518 if (cpumask_test_cpu(me, desc->affinity)) {
2520 *descp = desc = move_irq_desc(desc, me); 2519 *descp = desc = move_irq_desc(desc, me);
2521 /* get the new one */ 2520 /* get the new one */
2522 cfg = desc->chip_data; 2521 cfg = desc->chip_data;
@@ -3117,8 +3116,8 @@ static int ioapic_resume(struct sys_device *dev)
3117 3116
3118 spin_lock_irqsave(&ioapic_lock, flags); 3117 spin_lock_irqsave(&ioapic_lock, flags);
3119 reg_00.raw = io_apic_read(dev->id, 0); 3118 reg_00.raw = io_apic_read(dev->id, 0);
3120 if (reg_00.bits.ID != mp_ioapics[dev->id].mp_apicid) { 3119 if (reg_00.bits.ID != mp_ioapics[dev->id].apicid) {
3121 reg_00.bits.ID = mp_ioapics[dev->id].mp_apicid; 3120 reg_00.bits.ID = mp_ioapics[dev->id].apicid;
3122 io_apic_write(dev->id, 0, reg_00.raw); 3121 io_apic_write(dev->id, 0, reg_00.raw);
3123 } 3122 }
3124 spin_unlock_irqrestore(&ioapic_lock, flags); 3123 spin_unlock_irqrestore(&ioapic_lock, flags);
@@ -3183,7 +3182,7 @@ unsigned int create_irq_nr(unsigned int irq_want)
3183 3182
3184 irq = 0; 3183 irq = 0;
3185 spin_lock_irqsave(&vector_lock, flags); 3184 spin_lock_irqsave(&vector_lock, flags);
3186 for (new = irq_want; new < NR_IRQS; new++) { 3185 for (new = irq_want; new < nr_irqs; new++) {
3187 if (platform_legacy_irq(new)) 3186 if (platform_legacy_irq(new))
3188 continue; 3187 continue;
3189 3188
@@ -3258,6 +3257,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3258 int err; 3257 int err;
3259 unsigned dest; 3258 unsigned dest;
3260 3259
3260 if (disable_apic)
3261 return -ENXIO;
3262
3261 cfg = irq_cfg(irq); 3263 cfg = irq_cfg(irq);
3262 err = assign_irq_vector(irq, cfg, TARGET_CPUS); 3264 err = assign_irq_vector(irq, cfg, TARGET_CPUS);
3263 if (err) 3265 if (err)
@@ -3726,6 +3728,9 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3726 struct irq_cfg *cfg; 3728 struct irq_cfg *cfg;
3727 int err; 3729 int err;
3728 3730
3731 if (disable_apic)
3732 return -ENXIO;
3733
3729 cfg = irq_cfg(irq); 3734 cfg = irq_cfg(irq);
3730 err = assign_irq_vector(irq, cfg, TARGET_CPUS); 3735 err = assign_irq_vector(irq, cfg, TARGET_CPUS);
3731 if (!err) { 3736 if (!err) {
@@ -3760,7 +3765,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3760} 3765}
3761#endif /* CONFIG_HT_IRQ */ 3766#endif /* CONFIG_HT_IRQ */
3762 3767
3763#ifdef CONFIG_X86_64 3768#ifdef CONFIG_X86_UV
3764/* 3769/*
3765 * Re-target the irq to the specified CPU and enable the specified MMR located 3770 * Re-target the irq to the specified CPU and enable the specified MMR located
3766 * on the specified blade to allow the sending of MSIs to the specified CPU. 3771 * on the specified blade to allow the sending of MSIs to the specified CPU.
@@ -3850,6 +3855,22 @@ void __init probe_nr_irqs_gsi(void)
3850 nr_irqs_gsi = nr; 3855 nr_irqs_gsi = nr;
3851} 3856}
3852 3857
3858#ifdef CONFIG_SPARSE_IRQ
3859int __init arch_probe_nr_irqs(void)
3860{
3861 int nr;
3862
3863 nr = ((8 * nr_cpu_ids) > (32 * nr_ioapics) ?
3864 (NR_VECTORS + (8 * nr_cpu_ids)) :
3865 (NR_VECTORS + (32 * nr_ioapics)));
3866
3867 if (nr < nr_irqs && nr > nr_irqs_gsi)
3868 nr_irqs = nr;
3869
3870 return 0;
3871}
3872#endif
3873
3853/* -------------------------------------------------------------------------- 3874/* --------------------------------------------------------------------------
3854 ACPI-based IOAPIC Configuration 3875 ACPI-based IOAPIC Configuration
3855 -------------------------------------------------------------------------- */ 3876 -------------------------------------------------------------------------- */
@@ -3984,8 +4005,8 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
3984 return -1; 4005 return -1;
3985 4006
3986 for (i = 0; i < mp_irq_entries; i++) 4007 for (i = 0; i < mp_irq_entries; i++)
3987 if (mp_irqs[i].mp_irqtype == mp_INT && 4008 if (mp_irqs[i].irqtype == mp_INT &&
3988 mp_irqs[i].mp_srcbusirq == bus_irq) 4009 mp_irqs[i].srcbusirq == bus_irq)
3989 break; 4010 break;
3990 if (i >= mp_irq_entries) 4011 if (i >= mp_irq_entries)
3991 return -1; 4012 return -1;
@@ -4039,7 +4060,7 @@ void __init setup_ioapic_dest(void)
4039 */ 4060 */
4040 if (desc->status & 4061 if (desc->status &
4041 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) 4062 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
4042 mask = &desc->affinity; 4063 mask = desc->affinity;
4043 else 4064 else
4044 mask = TARGET_CPUS; 4065 mask = TARGET_CPUS;
4045 4066
@@ -4100,7 +4121,7 @@ void __init ioapic_init_mappings(void)
4100 ioapic_res = ioapic_setup_resources(); 4121 ioapic_res = ioapic_setup_resources();
4101 for (i = 0; i < nr_ioapics; i++) { 4122 for (i = 0; i < nr_ioapics; i++) {
4102 if (smp_found_config) { 4123 if (smp_found_config) {
4103 ioapic_phys = mp_ioapics[i].mp_apicaddr; 4124 ioapic_phys = mp_ioapics[i].apicaddr;
4104#ifdef CONFIG_X86_32 4125#ifdef CONFIG_X86_32
4105 if (!ioapic_phys) { 4126 if (!ioapic_phys) {
4106 printk(KERN_ERR 4127 printk(KERN_ERR
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 3973e2df7f87..8b30d0c2512c 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -36,11 +36,7 @@ void ack_bad_irq(unsigned int irq)
36#endif 36#endif
37} 37}
38 38
39#ifdef CONFIG_X86_32 39#define irq_stats(x) (&per_cpu(irq_stat, x))
40# define irq_stats(x) (&per_cpu(irq_stat, x))
41#else
42# define irq_stats(x) cpu_pda(x)
43#endif
44/* 40/*
45 * /proc/interrupts printing: 41 * /proc/interrupts printing:
46 */ 42 */
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 74b9ff7341e9..e0f29be8ab0b 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -248,7 +248,7 @@ void fixup_irqs(void)
248 if (irq == 2) 248 if (irq == 2)
249 continue; 249 continue;
250 250
251 affinity = &desc->affinity; 251 affinity = desc->affinity;
252 if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { 252 if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
253 printk("Breaking affinity for irq %i\n", irq); 253 printk("Breaking affinity for irq %i\n", irq);
254 affinity = cpu_all_mask; 254 affinity = cpu_all_mask;
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 63c88e6ec025..018963aa6ee3 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -18,6 +18,13 @@
18#include <linux/smp.h> 18#include <linux/smp.h>
19#include <asm/io_apic.h> 19#include <asm/io_apic.h>
20#include <asm/idle.h> 20#include <asm/idle.h>
21#include <asm/apic.h>
22
23DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
24EXPORT_PER_CPU_SYMBOL(irq_stat);
25
26DEFINE_PER_CPU(struct pt_regs *, irq_regs);
27EXPORT_PER_CPU_SYMBOL(irq_regs);
21 28
22/* 29/*
23 * Probabilistic stack overflow check: 30 * Probabilistic stack overflow check:
@@ -100,7 +107,7 @@ void fixup_irqs(void)
100 /* interrupt's are disabled at this point */ 107 /* interrupt's are disabled at this point */
101 spin_lock(&desc->lock); 108 spin_lock(&desc->lock);
102 109
103 affinity = &desc->affinity; 110 affinity = desc->affinity;
104 if (!irq_has_action(irq) || 111 if (!irq_has_action(irq) ||
105 cpumask_equal(affinity, cpu_online_mask)) { 112 cpumask_equal(affinity, cpu_online_mask)) {
106 spin_unlock(&desc->lock); 113 spin_unlock(&desc->lock);
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 1507ad4e674d..bf629cadec1a 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -149,8 +149,15 @@ void __init native_init_IRQ(void)
149 */ 149 */
150 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); 150 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
151 151
152 /* IPI for invalidation */ 152 /* IPIs for invalidation */
153 alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); 153 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
154 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
155 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
156 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
157 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
158 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
159 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
160 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
154 161
155 /* IPI for generic function call */ 162 /* IPI for generic function call */
156 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 163 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
diff --git a/arch/x86/kernel/microcode_intel.c b/arch/x86/kernel/microcode_intel.c
index b7f4c929e615..5e9f4fc51385 100644
--- a/arch/x86/kernel/microcode_intel.c
+++ b/arch/x86/kernel/microcode_intel.c
@@ -87,9 +87,9 @@
87#include <linux/cpu.h> 87#include <linux/cpu.h>
88#include <linux/firmware.h> 88#include <linux/firmware.h>
89#include <linux/platform_device.h> 89#include <linux/platform_device.h>
90#include <linux/uaccess.h>
90 91
91#include <asm/msr.h> 92#include <asm/msr.h>
92#include <asm/uaccess.h>
93#include <asm/processor.h> 93#include <asm/processor.h>
94#include <asm/microcode.h> 94#include <asm/microcode.h>
95 95
@@ -196,7 +196,7 @@ static inline int update_match_cpu(struct cpu_signature *csig, int sig, int pf)
196 return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1; 196 return (!sigmatch(sig, csig->sig, pf, csig->pf)) ? 0 : 1;
197} 197}
198 198
199static inline int 199static inline int
200update_match_revision(struct microcode_header_intel *mc_header, int rev) 200update_match_revision(struct microcode_header_intel *mc_header, int rev)
201{ 201{
202 return (mc_header->rev <= rev) ? 0 : 1; 202 return (mc_header->rev <= rev) ? 0 : 1;
@@ -442,8 +442,8 @@ static int request_microcode_fw(int cpu, struct device *device)
442 return ret; 442 return ret;
443 } 443 }
444 444
445 ret = generic_load_microcode(cpu, (void*)firmware->data, firmware->size, 445 ret = generic_load_microcode(cpu, (void *)firmware->data,
446 &get_ucode_fw); 446 firmware->size, &get_ucode_fw);
447 447
448 release_firmware(firmware); 448 release_firmware(firmware);
449 449
@@ -460,7 +460,7 @@ static int request_microcode_user(int cpu, const void __user *buf, size_t size)
460 /* We should bind the task to the CPU */ 460 /* We should bind the task to the CPU */
461 BUG_ON(cpu != raw_smp_processor_id()); 461 BUG_ON(cpu != raw_smp_processor_id());
462 462
463 return generic_load_microcode(cpu, (void*)buf, size, &get_ucode_user); 463 return generic_load_microcode(cpu, (void *)buf, size, &get_ucode_user);
464} 464}
465 465
466static void microcode_fini_cpu(int cpu) 466static void microcode_fini_cpu(int cpu)
diff --git a/arch/x86/kernel/module_32.c b/arch/x86/kernel/module_32.c
index 3db0a5442eb1..0edd819050e7 100644
--- a/arch/x86/kernel/module_32.c
+++ b/arch/x86/kernel/module_32.c
@@ -42,7 +42,7 @@ void module_free(struct module *mod, void *module_region)
42{ 42{
43 vfree(module_region); 43 vfree(module_region);
44 /* FIXME: If module_region == mod->init_region, trim exception 44 /* FIXME: If module_region == mod->init_region, trim exception
45 table entries. */ 45 table entries. */
46} 46}
47 47
48/* We don't need anything special. */ 48/* We don't need anything special. */
@@ -113,13 +113,13 @@ int module_finalize(const Elf_Ehdr *hdr,
113 *para = NULL; 113 *para = NULL;
114 char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; 114 char *secstrings = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset;
115 115
116 for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) { 116 for (s = sechdrs; s < sechdrs + hdr->e_shnum; s++) {
117 if (!strcmp(".text", secstrings + s->sh_name)) 117 if (!strcmp(".text", secstrings + s->sh_name))
118 text = s; 118 text = s;
119 if (!strcmp(".altinstructions", secstrings + s->sh_name)) 119 if (!strcmp(".altinstructions", secstrings + s->sh_name))
120 alt = s; 120 alt = s;
121 if (!strcmp(".smp_locks", secstrings + s->sh_name)) 121 if (!strcmp(".smp_locks", secstrings + s->sh_name))
122 locks= s; 122 locks = s;
123 if (!strcmp(".parainstructions", secstrings + s->sh_name)) 123 if (!strcmp(".parainstructions", secstrings + s->sh_name))
124 para = s; 124 para = s;
125 } 125 }
diff --git a/arch/x86/kernel/module_64.c b/arch/x86/kernel/module_64.c
index 6ba87830d4b1..c23880b90b5c 100644
--- a/arch/x86/kernel/module_64.c
+++ b/arch/x86/kernel/module_64.c
@@ -30,14 +30,14 @@
30#include <asm/page.h> 30#include <asm/page.h>
31#include <asm/pgtable.h> 31#include <asm/pgtable.h>
32 32
33#define DEBUGP(fmt...) 33#define DEBUGP(fmt...)
34 34
35#ifndef CONFIG_UML 35#ifndef CONFIG_UML
36void module_free(struct module *mod, void *module_region) 36void module_free(struct module *mod, void *module_region)
37{ 37{
38 vfree(module_region); 38 vfree(module_region);
39 /* FIXME: If module_region == mod->init_region, trim exception 39 /* FIXME: If module_region == mod->init_region, trim exception
40 table entries. */ 40 table entries. */
41} 41}
42 42
43void *module_alloc(unsigned long size) 43void *module_alloc(unsigned long size)
@@ -77,7 +77,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
77 Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr; 77 Elf64_Rela *rel = (void *)sechdrs[relsec].sh_addr;
78 Elf64_Sym *sym; 78 Elf64_Sym *sym;
79 void *loc; 79 void *loc;
80 u64 val; 80 u64 val;
81 81
82 DEBUGP("Applying relocate section %u to %u\n", relsec, 82 DEBUGP("Applying relocate section %u to %u\n", relsec,
83 sechdrs[relsec].sh_info); 83 sechdrs[relsec].sh_info);
@@ -91,11 +91,11 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
91 sym = (Elf64_Sym *)sechdrs[symindex].sh_addr 91 sym = (Elf64_Sym *)sechdrs[symindex].sh_addr
92 + ELF64_R_SYM(rel[i].r_info); 92 + ELF64_R_SYM(rel[i].r_info);
93 93
94 DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n", 94 DEBUGP("type %d st_value %Lx r_addend %Lx loc %Lx\n",
95 (int)ELF64_R_TYPE(rel[i].r_info), 95 (int)ELF64_R_TYPE(rel[i].r_info),
96 sym->st_value, rel[i].r_addend, (u64)loc); 96 sym->st_value, rel[i].r_addend, (u64)loc);
97 97
98 val = sym->st_value + rel[i].r_addend; 98 val = sym->st_value + rel[i].r_addend;
99 99
100 switch (ELF64_R_TYPE(rel[i].r_info)) { 100 switch (ELF64_R_TYPE(rel[i].r_info)) {
101 case R_X86_64_NONE: 101 case R_X86_64_NONE:
@@ -113,16 +113,16 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
113 if ((s64)val != *(s32 *)loc) 113 if ((s64)val != *(s32 *)loc)
114 goto overflow; 114 goto overflow;
115 break; 115 break;
116 case R_X86_64_PC32: 116 case R_X86_64_PC32:
117 val -= (u64)loc; 117 val -= (u64)loc;
118 *(u32 *)loc = val; 118 *(u32 *)loc = val;
119#if 0 119#if 0
120 if ((s64)val != *(s32 *)loc) 120 if ((s64)val != *(s32 *)loc)
121 goto overflow; 121 goto overflow;
122#endif 122#endif
123 break; 123 break;
124 default: 124 default:
125 printk(KERN_ERR "module %s: Unknown rela relocation: %Lu\n", 125 printk(KERN_ERR "module %s: Unknown rela relocation: %llu\n",
126 me->name, ELF64_R_TYPE(rel[i].r_info)); 126 me->name, ELF64_R_TYPE(rel[i].r_info));
127 return -ENOEXEC; 127 return -ENOEXEC;
128 } 128 }
@@ -130,7 +130,7 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
130 return 0; 130 return 0;
131 131
132overflow: 132overflow:
133 printk(KERN_ERR "overflow in relocation type %d val %Lx\n", 133 printk(KERN_ERR "overflow in relocation type %d val %Lx\n",
134 (int)ELF64_R_TYPE(rel[i].r_info), val); 134 (int)ELF64_R_TYPE(rel[i].r_info), val);
135 printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n", 135 printk(KERN_ERR "`%s' likely not compiled with -mcmodel=kernel\n",
136 me->name); 136 me->name);
@@ -143,13 +143,13 @@ int apply_relocate(Elf_Shdr *sechdrs,
143 unsigned int relsec, 143 unsigned int relsec,
144 struct module *me) 144 struct module *me)
145{ 145{
146 printk("non add relocation not supported\n"); 146 printk(KERN_ERR "non add relocation not supported\n");
147 return -ENOSYS; 147 return -ENOSYS;
148} 148}
149 149
150int module_finalize(const Elf_Ehdr *hdr, 150int module_finalize(const Elf_Ehdr *hdr,
151 const Elf_Shdr *sechdrs, 151 const Elf_Shdr *sechdrs,
152 struct module *me) 152 struct module *me)
153{ 153{
154 const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL, 154 const Elf_Shdr *s, *text = NULL, *alt = NULL, *locks = NULL,
155 *para = NULL; 155 *para = NULL;
@@ -161,7 +161,7 @@ int module_finalize(const Elf_Ehdr *hdr,
161 if (!strcmp(".altinstructions", secstrings + s->sh_name)) 161 if (!strcmp(".altinstructions", secstrings + s->sh_name))
162 alt = s; 162 alt = s;
163 if (!strcmp(".smp_locks", secstrings + s->sh_name)) 163 if (!strcmp(".smp_locks", secstrings + s->sh_name))
164 locks= s; 164 locks = s;
165 if (!strcmp(".parainstructions", secstrings + s->sh_name)) 165 if (!strcmp(".parainstructions", secstrings + s->sh_name))
166 para = s; 166 para = s;
167 } 167 }
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index a649a4ccad43..fa6bb263892e 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -144,11 +144,11 @@ static void __init MP_ioapic_info(struct mpc_ioapic *m)
144 if (bad_ioapic(m->apicaddr)) 144 if (bad_ioapic(m->apicaddr))
145 return; 145 return;
146 146
147 mp_ioapics[nr_ioapics].mp_apicaddr = m->apicaddr; 147 mp_ioapics[nr_ioapics].apicaddr = m->apicaddr;
148 mp_ioapics[nr_ioapics].mp_apicid = m->apicid; 148 mp_ioapics[nr_ioapics].apicid = m->apicid;
149 mp_ioapics[nr_ioapics].mp_type = m->type; 149 mp_ioapics[nr_ioapics].type = m->type;
150 mp_ioapics[nr_ioapics].mp_apicver = m->apicver; 150 mp_ioapics[nr_ioapics].apicver = m->apicver;
151 mp_ioapics[nr_ioapics].mp_flags = m->flags; 151 mp_ioapics[nr_ioapics].flags = m->flags;
152 nr_ioapics++; 152 nr_ioapics++;
153} 153}
154 154
@@ -160,55 +160,55 @@ static void print_MP_intsrc_info(struct mpc_intsrc *m)
160 m->srcbusirq, m->dstapic, m->dstirq); 160 m->srcbusirq, m->dstapic, m->dstirq);
161} 161}
162 162
163static void __init print_mp_irq_info(struct mp_config_intsrc *mp_irq) 163static void __init print_mp_irq_info(struct mpc_intsrc *mp_irq)
164{ 164{
165 apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x," 165 apic_printk(APIC_VERBOSE, "Int: type %d, pol %d, trig %d, bus %02x,"
166 " IRQ %02x, APIC ID %x, APIC INT %02x\n", 166 " IRQ %02x, APIC ID %x, APIC INT %02x\n",
167 mp_irq->mp_irqtype, mp_irq->mp_irqflag & 3, 167 mp_irq->irqtype, mp_irq->irqflag & 3,
168 (mp_irq->mp_irqflag >> 2) & 3, mp_irq->mp_srcbus, 168 (mp_irq->irqflag >> 2) & 3, mp_irq->srcbus,
169 mp_irq->mp_srcbusirq, mp_irq->mp_dstapic, mp_irq->mp_dstirq); 169 mp_irq->srcbusirq, mp_irq->dstapic, mp_irq->dstirq);
170} 170}
171 171
172static void __init assign_to_mp_irq(struct mpc_intsrc *m, 172static void __init assign_to_mp_irq(struct mpc_intsrc *m,
173 struct mp_config_intsrc *mp_irq) 173 struct mpc_intsrc *mp_irq)
174{ 174{
175 mp_irq->mp_dstapic = m->dstapic; 175 mp_irq->dstapic = m->dstapic;
176 mp_irq->mp_type = m->type; 176 mp_irq->type = m->type;
177 mp_irq->mp_irqtype = m->irqtype; 177 mp_irq->irqtype = m->irqtype;
178 mp_irq->mp_irqflag = m->irqflag; 178 mp_irq->irqflag = m->irqflag;
179 mp_irq->mp_srcbus = m->srcbus; 179 mp_irq->srcbus = m->srcbus;
180 mp_irq->mp_srcbusirq = m->srcbusirq; 180 mp_irq->srcbusirq = m->srcbusirq;
181 mp_irq->mp_dstirq = m->dstirq; 181 mp_irq->dstirq = m->dstirq;
182} 182}
183 183
184static void __init assign_to_mpc_intsrc(struct mp_config_intsrc *mp_irq, 184static void __init assign_to_mpc_intsrc(struct mpc_intsrc *mp_irq,
185 struct mpc_intsrc *m) 185 struct mpc_intsrc *m)
186{ 186{
187 m->dstapic = mp_irq->mp_dstapic; 187 m->dstapic = mp_irq->dstapic;
188 m->type = mp_irq->mp_type; 188 m->type = mp_irq->type;
189 m->irqtype = mp_irq->mp_irqtype; 189 m->irqtype = mp_irq->irqtype;
190 m->irqflag = mp_irq->mp_irqflag; 190 m->irqflag = mp_irq->irqflag;
191 m->srcbus = mp_irq->mp_srcbus; 191 m->srcbus = mp_irq->srcbus;
192 m->srcbusirq = mp_irq->mp_srcbusirq; 192 m->srcbusirq = mp_irq->srcbusirq;
193 m->dstirq = mp_irq->mp_dstirq; 193 m->dstirq = mp_irq->dstirq;
194} 194}
195 195
196static int __init mp_irq_mpc_intsrc_cmp(struct mp_config_intsrc *mp_irq, 196static int __init mp_irq_mpc_intsrc_cmp(struct mpc_intsrc *mp_irq,
197 struct mpc_intsrc *m) 197 struct mpc_intsrc *m)
198{ 198{
199 if (mp_irq->mp_dstapic != m->dstapic) 199 if (mp_irq->dstapic != m->dstapic)
200 return 1; 200 return 1;
201 if (mp_irq->mp_type != m->type) 201 if (mp_irq->type != m->type)
202 return 2; 202 return 2;
203 if (mp_irq->mp_irqtype != m->irqtype) 203 if (mp_irq->irqtype != m->irqtype)
204 return 3; 204 return 3;
205 if (mp_irq->mp_irqflag != m->irqflag) 205 if (mp_irq->irqflag != m->irqflag)
206 return 4; 206 return 4;
207 if (mp_irq->mp_srcbus != m->srcbus) 207 if (mp_irq->srcbus != m->srcbus)
208 return 5; 208 return 5;
209 if (mp_irq->mp_srcbusirq != m->srcbusirq) 209 if (mp_irq->srcbusirq != m->srcbusirq)
210 return 6; 210 return 6;
211 if (mp_irq->mp_dstirq != m->dstirq) 211 if (mp_irq->dstirq != m->dstirq)
212 return 7; 212 return 7;
213 213
214 return 0; 214 return 0;
@@ -417,7 +417,7 @@ static void __init construct_default_ioirq_mptable(int mpc_default_type)
417 intsrc.type = MP_INTSRC; 417 intsrc.type = MP_INTSRC;
418 intsrc.irqflag = 0; /* conforming */ 418 intsrc.irqflag = 0; /* conforming */
419 intsrc.srcbus = 0; 419 intsrc.srcbus = 0;
420 intsrc.dstapic = mp_ioapics[0].mp_apicid; 420 intsrc.dstapic = mp_ioapics[0].apicid;
421 421
422 intsrc.irqtype = mp_INT; 422 intsrc.irqtype = mp_INT;
423 423
@@ -570,14 +570,14 @@ static inline void __init construct_default_ISA_mptable(int mpc_default_type)
570 } 570 }
571} 571}
572 572
573static struct intel_mp_floating *mpf_found; 573static struct mpf_intel *mpf_found;
574 574
575/* 575/*
576 * Scan the memory blocks for an SMP configuration block. 576 * Scan the memory blocks for an SMP configuration block.
577 */ 577 */
578static void __init __get_smp_config(unsigned int early) 578static void __init __get_smp_config(unsigned int early)
579{ 579{
580 struct intel_mp_floating *mpf = mpf_found; 580 struct mpf_intel *mpf = mpf_found;
581 581
582 if (!mpf) 582 if (!mpf)
583 return; 583 return;
@@ -598,9 +598,9 @@ static void __init __get_smp_config(unsigned int early)
598 } 598 }
599 599
600 printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n", 600 printk(KERN_INFO "Intel MultiProcessor Specification v1.%d\n",
601 mpf->mpf_specification); 601 mpf->specification);
602#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32) 602#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_32)
603 if (mpf->mpf_feature2 & (1 << 7)) { 603 if (mpf->feature2 & (1 << 7)) {
604 printk(KERN_INFO " IMCR and PIC compatibility mode.\n"); 604 printk(KERN_INFO " IMCR and PIC compatibility mode.\n");
605 pic_mode = 1; 605 pic_mode = 1;
606 } else { 606 } else {
@@ -611,7 +611,7 @@ static void __init __get_smp_config(unsigned int early)
611 /* 611 /*
612 * Now see if we need to read further. 612 * Now see if we need to read further.
613 */ 613 */
614 if (mpf->mpf_feature1 != 0) { 614 if (mpf->feature1 != 0) {
615 if (early) { 615 if (early) {
616 /* 616 /*
617 * local APIC has default address 617 * local APIC has default address
@@ -621,16 +621,16 @@ static void __init __get_smp_config(unsigned int early)
621 } 621 }
622 622
623 printk(KERN_INFO "Default MP configuration #%d\n", 623 printk(KERN_INFO "Default MP configuration #%d\n",
624 mpf->mpf_feature1); 624 mpf->feature1);
625 construct_default_ISA_mptable(mpf->mpf_feature1); 625 construct_default_ISA_mptable(mpf->feature1);
626 626
627 } else if (mpf->mpf_physptr) { 627 } else if (mpf->physptr) {
628 628
629 /* 629 /*
630 * Read the physical hardware table. Anything here will 630 * Read the physical hardware table. Anything here will
631 * override the defaults. 631 * override the defaults.
632 */ 632 */
633 if (!smp_read_mpc(phys_to_virt(mpf->mpf_physptr), early)) { 633 if (!smp_read_mpc(phys_to_virt(mpf->physptr), early)) {
634#ifdef CONFIG_X86_LOCAL_APIC 634#ifdef CONFIG_X86_LOCAL_APIC
635 smp_found_config = 0; 635 smp_found_config = 0;
636#endif 636#endif
@@ -688,19 +688,19 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
688 unsigned reserve) 688 unsigned reserve)
689{ 689{
690 unsigned int *bp = phys_to_virt(base); 690 unsigned int *bp = phys_to_virt(base);
691 struct intel_mp_floating *mpf; 691 struct mpf_intel *mpf;
692 692
693 apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n", 693 apic_printk(APIC_VERBOSE, "Scan SMP from %p for %ld bytes.\n",
694 bp, length); 694 bp, length);
695 BUILD_BUG_ON(sizeof(*mpf) != 16); 695 BUILD_BUG_ON(sizeof(*mpf) != 16);
696 696
697 while (length > 0) { 697 while (length > 0) {
698 mpf = (struct intel_mp_floating *)bp; 698 mpf = (struct mpf_intel *)bp;
699 if ((*bp == SMP_MAGIC_IDENT) && 699 if ((*bp == SMP_MAGIC_IDENT) &&
700 (mpf->mpf_length == 1) && 700 (mpf->length == 1) &&
701 !mpf_checksum((unsigned char *)bp, 16) && 701 !mpf_checksum((unsigned char *)bp, 16) &&
702 ((mpf->mpf_specification == 1) 702 ((mpf->specification == 1)
703 || (mpf->mpf_specification == 4))) { 703 || (mpf->specification == 4))) {
704#ifdef CONFIG_X86_LOCAL_APIC 704#ifdef CONFIG_X86_LOCAL_APIC
705 smp_found_config = 1; 705 smp_found_config = 1;
706#endif 706#endif
@@ -713,7 +713,7 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
713 return 1; 713 return 1;
714 reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE, 714 reserve_bootmem_generic(virt_to_phys(mpf), PAGE_SIZE,
715 BOOTMEM_DEFAULT); 715 BOOTMEM_DEFAULT);
716 if (mpf->mpf_physptr) { 716 if (mpf->physptr) {
717 unsigned long size = PAGE_SIZE; 717 unsigned long size = PAGE_SIZE;
718#ifdef CONFIG_X86_32 718#ifdef CONFIG_X86_32
719 /* 719 /*
@@ -722,14 +722,14 @@ static int __init smp_scan_config(unsigned long base, unsigned long length,
722 * the bottom is mapped now. 722 * the bottom is mapped now.
723 * PC-9800's MPC table places on the very last 723 * PC-9800's MPC table places on the very last
724 * of physical memory; so that simply reserving 724 * of physical memory; so that simply reserving
725 * PAGE_SIZE from mpg->mpf_physptr yields BUG() 725 * PAGE_SIZE from mpf->physptr yields BUG()
726 * in reserve_bootmem. 726 * in reserve_bootmem.
727 */ 727 */
728 unsigned long end = max_low_pfn * PAGE_SIZE; 728 unsigned long end = max_low_pfn * PAGE_SIZE;
729 if (mpf->mpf_physptr + size > end) 729 if (mpf->physptr + size > end)
730 size = end - mpf->mpf_physptr; 730 size = end - mpf->physptr;
731#endif 731#endif
732 reserve_bootmem_generic(mpf->mpf_physptr, size, 732 reserve_bootmem_generic(mpf->physptr, size,
733 BOOTMEM_DEFAULT); 733 BOOTMEM_DEFAULT);
734 } 734 }
735 735
@@ -809,15 +809,15 @@ static int __init get_MP_intsrc_index(struct mpc_intsrc *m)
809 /* not legacy */ 809 /* not legacy */
810 810
811 for (i = 0; i < mp_irq_entries; i++) { 811 for (i = 0; i < mp_irq_entries; i++) {
812 if (mp_irqs[i].mp_irqtype != mp_INT) 812 if (mp_irqs[i].irqtype != mp_INT)
813 continue; 813 continue;
814 814
815 if (mp_irqs[i].mp_irqflag != 0x0f) 815 if (mp_irqs[i].irqflag != 0x0f)
816 continue; 816 continue;
817 817
818 if (mp_irqs[i].mp_srcbus != m->srcbus) 818 if (mp_irqs[i].srcbus != m->srcbus)
819 continue; 819 continue;
820 if (mp_irqs[i].mp_srcbusirq != m->srcbusirq) 820 if (mp_irqs[i].srcbusirq != m->srcbusirq)
821 continue; 821 continue;
822 if (irq_used[i]) { 822 if (irq_used[i]) {
823 /* already claimed */ 823 /* already claimed */
@@ -922,10 +922,10 @@ static int __init replace_intsrc_all(struct mpc_table *mpc,
922 if (irq_used[i]) 922 if (irq_used[i])
923 continue; 923 continue;
924 924
925 if (mp_irqs[i].mp_irqtype != mp_INT) 925 if (mp_irqs[i].irqtype != mp_INT)
926 continue; 926 continue;
927 927
928 if (mp_irqs[i].mp_irqflag != 0x0f) 928 if (mp_irqs[i].irqflag != 0x0f)
929 continue; 929 continue;
930 930
931 if (nr_m_spare > 0) { 931 if (nr_m_spare > 0) {
@@ -1001,7 +1001,7 @@ static int __init update_mp_table(void)
1001{ 1001{
1002 char str[16]; 1002 char str[16];
1003 char oem[10]; 1003 char oem[10];
1004 struct intel_mp_floating *mpf; 1004 struct mpf_intel *mpf;
1005 struct mpc_table *mpc, *mpc_new; 1005 struct mpc_table *mpc, *mpc_new;
1006 1006
1007 if (!enable_update_mptable) 1007 if (!enable_update_mptable)
@@ -1014,19 +1014,19 @@ static int __init update_mp_table(void)
1014 /* 1014 /*
1015 * Now see if we need to go further. 1015 * Now see if we need to go further.
1016 */ 1016 */
1017 if (mpf->mpf_feature1 != 0) 1017 if (mpf->feature1 != 0)
1018 return 0; 1018 return 0;
1019 1019
1020 if (!mpf->mpf_physptr) 1020 if (!mpf->physptr)
1021 return 0; 1021 return 0;
1022 1022
1023 mpc = phys_to_virt(mpf->mpf_physptr); 1023 mpc = phys_to_virt(mpf->physptr);
1024 1024
1025 if (!smp_check_mpc(mpc, oem, str)) 1025 if (!smp_check_mpc(mpc, oem, str))
1026 return 0; 1026 return 0;
1027 1027
1028 printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf)); 1028 printk(KERN_INFO "mpf: %lx\n", virt_to_phys(mpf));
1029 printk(KERN_INFO "mpf_physptr: %x\n", mpf->mpf_physptr); 1029 printk(KERN_INFO "physptr: %x\n", mpf->physptr);
1030 1030
1031 if (mpc_new_phys && mpc->length > mpc_new_length) { 1031 if (mpc_new_phys && mpc->length > mpc_new_length) {
1032 mpc_new_phys = 0; 1032 mpc_new_phys = 0;
@@ -1047,23 +1047,23 @@ static int __init update_mp_table(void)
1047 } 1047 }
1048 printk(KERN_INFO "use in-positon replacing\n"); 1048 printk(KERN_INFO "use in-positon replacing\n");
1049 } else { 1049 } else {
1050 mpf->mpf_physptr = mpc_new_phys; 1050 mpf->physptr = mpc_new_phys;
1051 mpc_new = phys_to_virt(mpc_new_phys); 1051 mpc_new = phys_to_virt(mpc_new_phys);
1052 memcpy(mpc_new, mpc, mpc->length); 1052 memcpy(mpc_new, mpc, mpc->length);
1053 mpc = mpc_new; 1053 mpc = mpc_new;
1054 /* check if we can modify that */ 1054 /* check if we can modify that */
1055 if (mpc_new_phys - mpf->mpf_physptr) { 1055 if (mpc_new_phys - mpf->physptr) {
1056 struct intel_mp_floating *mpf_new; 1056 struct mpf_intel *mpf_new;
1057 /* steal 16 bytes from [0, 1k) */ 1057 /* steal 16 bytes from [0, 1k) */
1058 printk(KERN_INFO "mpf new: %x\n", 0x400 - 16); 1058 printk(KERN_INFO "mpf new: %x\n", 0x400 - 16);
1059 mpf_new = phys_to_virt(0x400 - 16); 1059 mpf_new = phys_to_virt(0x400 - 16);
1060 memcpy(mpf_new, mpf, 16); 1060 memcpy(mpf_new, mpf, 16);
1061 mpf = mpf_new; 1061 mpf = mpf_new;
1062 mpf->mpf_physptr = mpc_new_phys; 1062 mpf->physptr = mpc_new_phys;
1063 } 1063 }
1064 mpf->mpf_checksum = 0; 1064 mpf->checksum = 0;
1065 mpf->mpf_checksum -= mpf_checksum((unsigned char *)mpf, 16); 1065 mpf->checksum -= mpf_checksum((unsigned char *)mpf, 16);
1066 printk(KERN_INFO "mpf_physptr new: %x\n", mpf->mpf_physptr); 1066 printk(KERN_INFO "physptr new: %x\n", mpf->physptr);
1067 } 1067 }
1068 1068
1069 /* 1069 /*
diff --git a/arch/x86/kernel/msr.c b/arch/x86/kernel/msr.c
index 726266695b2c..3cf3413ec626 100644
--- a/arch/x86/kernel/msr.c
+++ b/arch/x86/kernel/msr.c
@@ -35,10 +35,10 @@
35#include <linux/device.h> 35#include <linux/device.h>
36#include <linux/cpu.h> 36#include <linux/cpu.h>
37#include <linux/notifier.h> 37#include <linux/notifier.h>
38#include <linux/uaccess.h>
38 39
39#include <asm/processor.h> 40#include <asm/processor.h>
40#include <asm/msr.h> 41#include <asm/msr.h>
41#include <asm/uaccess.h>
42#include <asm/system.h> 42#include <asm/system.h>
43 43
44static struct class *msr_class; 44static struct class *msr_class;
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 7228979f1e7f..23b6d9e6e4f5 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -61,11 +61,7 @@ static int endflag __initdata;
61 61
62static inline unsigned int get_nmi_count(int cpu) 62static inline unsigned int get_nmi_count(int cpu)
63{ 63{
64#ifdef CONFIG_X86_64 64 return per_cpu(irq_stat, cpu).__nmi_count;
65 return cpu_pda(cpu)->__nmi_count;
66#else
67 return nmi_count(cpu);
68#endif
69} 65}
70 66
71static inline int mce_in_progress(void) 67static inline int mce_in_progress(void)
@@ -82,12 +78,8 @@ static inline int mce_in_progress(void)
82 */ 78 */
83static inline unsigned int get_timer_irqs(int cpu) 79static inline unsigned int get_timer_irqs(int cpu)
84{ 80{
85#ifdef CONFIG_X86_64
86 return read_pda(apic_timer_irqs) + read_pda(irq0_irqs);
87#else
88 return per_cpu(irq_stat, cpu).apic_timer_irqs + 81 return per_cpu(irq_stat, cpu).apic_timer_irqs +
89 per_cpu(irq_stat, cpu).irq0_irqs; 82 per_cpu(irq_stat, cpu).irq0_irqs;
90#endif
91} 83}
92 84
93#ifdef CONFIG_SMP 85#ifdef CONFIG_SMP
diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c
index 00c2bcd41463..d5768b1af080 100644
--- a/arch/x86/kernel/pci-gart_64.c
+++ b/arch/x86/kernel/pci-gart_64.c
@@ -5,7 +5,7 @@
5 * This allows to use PCI devices that only support 32bit addresses on systems 5 * This allows to use PCI devices that only support 32bit addresses on systems
6 * with more than 4GB. 6 * with more than 4GB.
7 * 7 *
8 * See Documentation/DMA-mapping.txt for the interface specification. 8 * See Documentation/PCI/PCI-DMA-mapping.txt for the interface specification.
9 * 9 *
10 * Copyright 2002 Andi Kleen, SuSE Labs. 10 * Copyright 2002 Andi Kleen, SuSE Labs.
11 * Subject to the GNU General Public License v2 only. 11 * Subject to the GNU General Public License v2 only.
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a546f55c77b4..1a1ae8edc40c 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -66,9 +66,6 @@ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
66DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; 66DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
67EXPORT_PER_CPU_SYMBOL(current_task); 67EXPORT_PER_CPU_SYMBOL(current_task);
68 68
69DEFINE_PER_CPU(int, cpu_number);
70EXPORT_PER_CPU_SYMBOL(cpu_number);
71
72/* 69/*
73 * Return saved PC of a blocked thread. 70 * Return saved PC of a blocked thread.
74 */ 71 */
@@ -111,7 +108,6 @@ void cpu_idle(void)
111 play_dead(); 108 play_dead();
112 109
113 local_irq_disable(); 110 local_irq_disable();
114 __get_cpu_var(irq_stat).idle_timestamp = jiffies;
115 /* Don't trace irqs off for idle */ 111 /* Don't trace irqs off for idle */
116 stop_critical_timings(); 112 stop_critical_timings();
117 pm_idle(); 113 pm_idle();
@@ -591,7 +587,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
591 if (prev->gs | next->gs) 587 if (prev->gs | next->gs)
592 loadsegment(gs, next->gs); 588 loadsegment(gs, next->gs);
593 589
594 x86_write_percpu(current_task, next_p); 590 percpu_write(current_task, next_p);
595 591
596 return prev_p; 592 return prev_p;
597} 593}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 416fb9282f4f..c422eebb0c58 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -16,6 +16,7 @@
16 16
17#include <stdarg.h> 17#include <stdarg.h>
18 18
19#include <linux/stackprotector.h>
19#include <linux/cpu.h> 20#include <linux/cpu.h>
20#include <linux/errno.h> 21#include <linux/errno.h>
21#include <linux/sched.h> 22#include <linux/sched.h>
@@ -46,7 +47,6 @@
46#include <asm/processor.h> 47#include <asm/processor.h>
47#include <asm/i387.h> 48#include <asm/i387.h>
48#include <asm/mmu_context.h> 49#include <asm/mmu_context.h>
49#include <asm/pda.h>
50#include <asm/prctl.h> 50#include <asm/prctl.h>
51#include <asm/desc.h> 51#include <asm/desc.h>
52#include <asm/proto.h> 52#include <asm/proto.h>
@@ -57,6 +57,12 @@
57 57
58asmlinkage extern void ret_from_fork(void); 58asmlinkage extern void ret_from_fork(void);
59 59
60DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
61EXPORT_PER_CPU_SYMBOL(current_task);
62
63DEFINE_PER_CPU(unsigned long, old_rsp);
64static DEFINE_PER_CPU(unsigned char, is_idle);
65
60unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; 66unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
61 67
62static ATOMIC_NOTIFIER_HEAD(idle_notifier); 68static ATOMIC_NOTIFIER_HEAD(idle_notifier);
@@ -75,13 +81,13 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister);
75 81
76void enter_idle(void) 82void enter_idle(void)
77{ 83{
78 write_pda(isidle, 1); 84 percpu_write(is_idle, 1);
79 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); 85 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
80} 86}
81 87
82static void __exit_idle(void) 88static void __exit_idle(void)
83{ 89{
84 if (test_and_clear_bit_pda(0, isidle) == 0) 90 if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
85 return; 91 return;
86 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); 92 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
87} 93}
@@ -111,6 +117,17 @@ static inline void play_dead(void)
111void cpu_idle(void) 117void cpu_idle(void)
112{ 118{
113 current_thread_info()->status |= TS_POLLING; 119 current_thread_info()->status |= TS_POLLING;
120
121 /*
122 * If we're the non-boot CPU, nothing set the PDA stack
123 * canary up for us - and if we are the boot CPU we have
124 * a 0 stack canary. This is a good place for updating
125 * it, as we wont ever return from this function (so the
126 * invalid canaries already on the stack wont ever
127 * trigger):
128 */
129 boot_init_stack_canary();
130
114 /* endless idle loop with no priority at all */ 131 /* endless idle loop with no priority at all */
115 while (1) { 132 while (1) {
116 tick_nohz_stop_sched_tick(1); 133 tick_nohz_stop_sched_tick(1);
@@ -392,7 +409,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
392 load_gs_index(0); 409 load_gs_index(0);
393 regs->ip = new_ip; 410 regs->ip = new_ip;
394 regs->sp = new_sp; 411 regs->sp = new_sp;
395 write_pda(oldrsp, new_sp); 412 percpu_write(old_rsp, new_sp);
396 regs->cs = __USER_CS; 413 regs->cs = __USER_CS;
397 regs->ss = __USER_DS; 414 regs->ss = __USER_DS;
398 regs->flags = 0x200; 415 regs->flags = 0x200;
@@ -613,21 +630,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
613 /* 630 /*
614 * Switch the PDA and FPU contexts. 631 * Switch the PDA and FPU contexts.
615 */ 632 */
616 prev->usersp = read_pda(oldrsp); 633 prev->usersp = percpu_read(old_rsp);
617 write_pda(oldrsp, next->usersp); 634 percpu_write(old_rsp, next->usersp);
618 write_pda(pcurrent, next_p); 635 percpu_write(current_task, next_p);
619 636
620 write_pda(kernelstack, 637 percpu_write(kernel_stack,
621 (unsigned long)task_stack_page(next_p) + 638 (unsigned long)task_stack_page(next_p) +
622 THREAD_SIZE - PDA_STACKOFFSET); 639 THREAD_SIZE - KERNEL_STACK_OFFSET);
623#ifdef CONFIG_CC_STACKPROTECTOR
624 write_pda(stack_canary, next_p->stack_canary);
625 /*
626 * Build time only check to make sure the stack_canary is at
627 * offset 40 in the pda; this is a gcc ABI requirement
628 */
629 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
630#endif
631 640
632 /* 641 /*
633 * Now maybe reload the debug registers and handle I/O bitmaps 642 * Now maybe reload the debug registers and handle I/O bitmaps
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index 2b46eb41643b..f8536fee5c12 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -14,6 +14,7 @@
14#include <asm/reboot.h> 14#include <asm/reboot.h>
15#include <asm/pci_x86.h> 15#include <asm/pci_x86.h>
16#include <asm/virtext.h> 16#include <asm/virtext.h>
17#include <asm/cpu.h>
17 18
18#ifdef CONFIG_X86_32 19#ifdef CONFIG_X86_32
19# include <linux/dmi.h> 20# include <linux/dmi.h>
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ae0d8042cf69..f41c4486c270 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -89,7 +89,7 @@
89 89
90#include <asm/system.h> 90#include <asm/system.h>
91#include <asm/vsyscall.h> 91#include <asm/vsyscall.h>
92#include <asm/smp.h> 92#include <asm/cpu.h>
93#include <asm/desc.h> 93#include <asm/desc.h>
94#include <asm/dma.h> 94#include <asm/dma.h>
95#include <asm/iommu.h> 95#include <asm/iommu.h>
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index 55c46074eba0..0d1e7ac439f4 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -13,145 +13,46 @@
13#include <asm/mpspec.h> 13#include <asm/mpspec.h>
14#include <asm/apicdef.h> 14#include <asm/apicdef.h>
15#include <asm/highmem.h> 15#include <asm/highmem.h>
16#include <asm/proto.h>
17#include <asm/cpumask.h>
18#include <asm/cpu.h>
16 19
17#ifdef CONFIG_X86_LOCAL_APIC 20#ifdef CONFIG_DEBUG_PER_CPU_MAPS
18unsigned int num_processors; 21# define DBG(x...) printk(KERN_DEBUG x)
19unsigned disabled_cpus __cpuinitdata;
20/* Processor that is doing the boot up */
21unsigned int boot_cpu_physical_apicid = -1U;
22EXPORT_SYMBOL(boot_cpu_physical_apicid);
23unsigned int max_physical_apicid;
24
25/* Bitmask of physically existing CPUs */
26physid_mask_t phys_cpu_present_map;
27#endif
28
29/* map cpu index to physical APIC ID */
30DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
31DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
32EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
33EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
34
35#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
36#define X86_64_NUMA 1
37
38/* map cpu index to node index */
39DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
40EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
41
42/* which logical CPUs are on which nodes */
43cpumask_t *node_to_cpumask_map;
44EXPORT_SYMBOL(node_to_cpumask_map);
45
46/* setup node_to_cpumask_map */
47static void __init setup_node_to_cpumask_map(void);
48
49#else 22#else
50static inline void setup_node_to_cpumask_map(void) { } 23# define DBG(x...)
51#endif 24#endif
52 25
53#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) 26DEFINE_PER_CPU(int, cpu_number);
54/* 27EXPORT_PER_CPU_SYMBOL(cpu_number);
55 * Copy data used in early init routines from the initial arrays to the
56 * per cpu data areas. These arrays then become expendable and the
57 * *_early_ptr's are zeroed indicating that the static arrays are gone.
58 */
59static void __init setup_per_cpu_maps(void)
60{
61 int cpu;
62 28
63 for_each_possible_cpu(cpu) { 29#ifdef CONFIG_X86_64
64 per_cpu(x86_cpu_to_apicid, cpu) = 30#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load)
65 early_per_cpu_map(x86_cpu_to_apicid, cpu); 31#else
66 per_cpu(x86_bios_cpu_apicid, cpu) = 32#define BOOT_PERCPU_OFFSET 0
67 early_per_cpu_map(x86_bios_cpu_apicid, cpu);
68#ifdef X86_64_NUMA
69 per_cpu(x86_cpu_to_node_map, cpu) =
70 early_per_cpu_map(x86_cpu_to_node_map, cpu);
71#endif 33#endif
72 }
73 34
74 /* indicate the early static arrays will soon be gone */ 35DEFINE_PER_CPU(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
75 early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; 36EXPORT_PER_CPU_SYMBOL(this_cpu_off);
76 early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
77#ifdef X86_64_NUMA
78 early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
79#endif
80}
81 37
82#ifdef CONFIG_X86_32 38unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
83/* 39 [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET,
84 * Great future not-so-futuristic plan: make i386 and x86_64 do it 40};
85 * the same way
86 */
87unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
88EXPORT_SYMBOL(__per_cpu_offset); 41EXPORT_SYMBOL(__per_cpu_offset);
89static inline void setup_cpu_pda_map(void) { }
90
91#elif !defined(CONFIG_SMP)
92static inline void setup_cpu_pda_map(void) { }
93
94#else /* CONFIG_SMP && CONFIG_X86_64 */
95
96/*
97 * Allocate cpu_pda pointer table and array via alloc_bootmem.
98 */
99static void __init setup_cpu_pda_map(void)
100{
101 char *pda;
102 struct x8664_pda **new_cpu_pda;
103 unsigned long size;
104 int cpu;
105
106 size = roundup(sizeof(struct x8664_pda), cache_line_size());
107
108 /* allocate cpu_pda array and pointer table */
109 {
110 unsigned long tsize = nr_cpu_ids * sizeof(void *);
111 unsigned long asize = size * (nr_cpu_ids - 1);
112 42
113 tsize = roundup(tsize, cache_line_size()); 43static inline void setup_percpu_segment(int cpu)
114 new_cpu_pda = alloc_bootmem(tsize + asize);
115 pda = (char *)new_cpu_pda + tsize;
116 }
117
118 /* initialize pointer table to static pda's */
119 for_each_possible_cpu(cpu) {
120 if (cpu == 0) {
121 /* leave boot cpu pda in place */
122 new_cpu_pda[0] = cpu_pda(0);
123 continue;
124 }
125 new_cpu_pda[cpu] = (struct x8664_pda *)pda;
126 new_cpu_pda[cpu]->in_bootmem = 1;
127 pda += size;
128 }
129
130 /* point to new pointer table */
131 _cpu_pda = new_cpu_pda;
132}
133
134#endif /* CONFIG_SMP && CONFIG_X86_64 */
135
136#ifdef CONFIG_X86_64
137
138/* correctly size the local cpu masks */
139static void setup_cpu_local_masks(void)
140{ 44{
141 alloc_bootmem_cpumask_var(&cpu_initialized_mask); 45#ifdef CONFIG_X86_32
142 alloc_bootmem_cpumask_var(&cpu_callin_mask); 46 struct desc_struct gdt;
143 alloc_bootmem_cpumask_var(&cpu_callout_mask);
144 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
145}
146
147#else /* CONFIG_X86_32 */
148 47
149static inline void setup_cpu_local_masks(void) 48 pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF,
150{ 49 0x2 | DESCTYPE_S, 0x8);
50 gdt.s = 1;
51 write_gdt_entry(get_cpu_gdt_table(cpu),
52 GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
53#endif
151} 54}
152 55
153#endif /* CONFIG_X86_32 */
154
155/* 56/*
156 * Great future plan: 57 * Great future plan:
157 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. 58 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
@@ -159,18 +60,12 @@ static inline void setup_cpu_local_masks(void)
159 */ 60 */
160void __init setup_per_cpu_areas(void) 61void __init setup_per_cpu_areas(void)
161{ 62{
162 ssize_t size, old_size; 63 ssize_t size;
163 char *ptr; 64 char *ptr;
164 int cpu; 65 int cpu;
165 unsigned long align = 1;
166
167 /* Setup cpu_pda map */
168 setup_cpu_pda_map();
169 66
170 /* Copy section for each CPU (we discard the original) */ 67 /* Copy section for each CPU (we discard the original) */
171 old_size = PERCPU_ENOUGH_ROOM; 68 size = roundup(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
172 align = max_t(unsigned long, PAGE_SIZE, align);
173 size = roundup(old_size, align);
174 69
175 pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", 70 pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
176 NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); 71 NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
@@ -179,30 +74,67 @@ void __init setup_per_cpu_areas(void)
179 74
180 for_each_possible_cpu(cpu) { 75 for_each_possible_cpu(cpu) {
181#ifndef CONFIG_NEED_MULTIPLE_NODES 76#ifndef CONFIG_NEED_MULTIPLE_NODES
182 ptr = __alloc_bootmem(size, align, 77 ptr = alloc_bootmem_pages(size);
183 __pa(MAX_DMA_ADDRESS));
184#else 78#else
185 int node = early_cpu_to_node(cpu); 79 int node = early_cpu_to_node(cpu);
186 if (!node_online(node) || !NODE_DATA(node)) { 80 if (!node_online(node) || !NODE_DATA(node)) {
187 ptr = __alloc_bootmem(size, align, 81 ptr = alloc_bootmem_pages(size);
188 __pa(MAX_DMA_ADDRESS));
189 pr_info("cpu %d has no node %d or node-local memory\n", 82 pr_info("cpu %d has no node %d or node-local memory\n",
190 cpu, node); 83 cpu, node);
191 pr_debug("per cpu data for cpu%d at %016lx\n", 84 pr_debug("per cpu data for cpu%d at %016lx\n",
192 cpu, __pa(ptr)); 85 cpu, __pa(ptr));
193 } else { 86 } else {
194 ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, 87 ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
195 __pa(MAX_DMA_ADDRESS));
196 pr_debug("per cpu data for cpu%d on node%d at %016lx\n", 88 pr_debug("per cpu data for cpu%d on node%d at %016lx\n",
197 cpu, node, __pa(ptr)); 89 cpu, node, __pa(ptr));
198 } 90 }
199#endif 91#endif
92
93 memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);
200 per_cpu_offset(cpu) = ptr - __per_cpu_start; 94 per_cpu_offset(cpu) = ptr - __per_cpu_start;
201 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 95 per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
96 per_cpu(cpu_number, cpu) = cpu;
97 setup_percpu_segment(cpu);
98 /*
99 * Copy data used in early init routines from the
100 * initial arrays to the per cpu data areas. These
101 * arrays then become expendable and the *_early_ptr's
102 * are zeroed indicating that the static arrays are
103 * gone.
104 */
105#ifdef CONFIG_X86_LOCAL_APIC
106 per_cpu(x86_cpu_to_apicid, cpu) =
107 early_per_cpu_map(x86_cpu_to_apicid, cpu);
108 per_cpu(x86_bios_cpu_apicid, cpu) =
109 early_per_cpu_map(x86_bios_cpu_apicid, cpu);
110#endif
111#ifdef CONFIG_X86_64
112 per_cpu(irq_stack_ptr, cpu) =
113 per_cpu(irq_stack_union.irq_stack, cpu) +
114 IRQ_STACK_SIZE - 64;
115#ifdef CONFIG_NUMA
116 per_cpu(x86_cpu_to_node_map, cpu) =
117 early_per_cpu_map(x86_cpu_to_node_map, cpu);
118#endif
119#endif
120 /*
121 * Up to this point, the boot CPU has been using .data.init
122 * area. Reload any changed state for the boot CPU.
123 */
124 if (cpu == boot_cpu_id)
125 switch_to_new_gdt();
126
127 DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
202 } 128 }
203 129
204 /* Setup percpu data maps */ 130 /* indicate the early static arrays will soon be gone */
205 setup_per_cpu_maps(); 131#ifdef CONFIG_X86_LOCAL_APIC
132 early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
133 early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
134#endif
135#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
136 early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
137#endif
206 138
207 /* Setup node to cpumask map */ 139 /* Setup node to cpumask map */
208 setup_node_to_cpumask_map(); 140 setup_node_to_cpumask_map();
@@ -210,199 +142,3 @@ void __init setup_per_cpu_areas(void)
210 /* Setup cpu initialized, callin, callout masks */ 142 /* Setup cpu initialized, callin, callout masks */
211 setup_cpu_local_masks(); 143 setup_cpu_local_masks();
212} 144}
213
214#endif
215
216#ifdef X86_64_NUMA
217
218/*
219 * Allocate node_to_cpumask_map based on number of available nodes
220 * Requires node_possible_map to be valid.
221 *
222 * Note: node_to_cpumask() is not valid until after this is done.
223 */
224static void __init setup_node_to_cpumask_map(void)
225{
226 unsigned int node, num = 0;
227 cpumask_t *map;
228
229 /* setup nr_node_ids if not done yet */
230 if (nr_node_ids == MAX_NUMNODES) {
231 for_each_node_mask(node, node_possible_map)
232 num = node;
233 nr_node_ids = num + 1;
234 }
235
236 /* allocate the map */
237 map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
238
239 pr_debug("Node to cpumask map at %p for %d nodes\n",
240 map, nr_node_ids);
241
242 /* node_to_cpumask() will now work */
243 node_to_cpumask_map = map;
244}
245
246void __cpuinit numa_set_node(int cpu, int node)
247{
248 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
249
250 if (cpu_pda(cpu) && node != NUMA_NO_NODE)
251 cpu_pda(cpu)->nodenumber = node;
252
253 if (cpu_to_node_map)
254 cpu_to_node_map[cpu] = node;
255
256 else if (per_cpu_offset(cpu))
257 per_cpu(x86_cpu_to_node_map, cpu) = node;
258
259 else
260 pr_debug("Setting node for non-present cpu %d\n", cpu);
261}
262
263void __cpuinit numa_clear_node(int cpu)
264{
265 numa_set_node(cpu, NUMA_NO_NODE);
266}
267
268#ifndef CONFIG_DEBUG_PER_CPU_MAPS
269
270void __cpuinit numa_add_cpu(int cpu)
271{
272 cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
273}
274
275void __cpuinit numa_remove_cpu(int cpu)
276{
277 cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]);
278}
279
280#else /* CONFIG_DEBUG_PER_CPU_MAPS */
281
282/*
283 * --------- debug versions of the numa functions ---------
284 */
285static void __cpuinit numa_set_cpumask(int cpu, int enable)
286{
287 int node = cpu_to_node(cpu);
288 cpumask_t *mask;
289 char buf[64];
290
291 if (node_to_cpumask_map == NULL) {
292 printk(KERN_ERR "node_to_cpumask_map NULL\n");
293 dump_stack();
294 return;
295 }
296
297 mask = &node_to_cpumask_map[node];
298 if (enable)
299 cpu_set(cpu, *mask);
300 else
301 cpu_clear(cpu, *mask);
302
303 cpulist_scnprintf(buf, sizeof(buf), mask);
304 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
305 enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);
306}
307
308void __cpuinit numa_add_cpu(int cpu)
309{
310 numa_set_cpumask(cpu, 1);
311}
312
313void __cpuinit numa_remove_cpu(int cpu)
314{
315 numa_set_cpumask(cpu, 0);
316}
317
318int cpu_to_node(int cpu)
319{
320 if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
321 printk(KERN_WARNING
322 "cpu_to_node(%d): usage too early!\n", cpu);
323 dump_stack();
324 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
325 }
326 return per_cpu(x86_cpu_to_node_map, cpu);
327}
328EXPORT_SYMBOL(cpu_to_node);
329
330/*
331 * Same function as cpu_to_node() but used if called before the
332 * per_cpu areas are setup.
333 */
334int early_cpu_to_node(int cpu)
335{
336 if (early_per_cpu_ptr(x86_cpu_to_node_map))
337 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
338
339 if (!per_cpu_offset(cpu)) {
340 printk(KERN_WARNING
341 "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
342 dump_stack();
343 return NUMA_NO_NODE;
344 }
345 return per_cpu(x86_cpu_to_node_map, cpu);
346}
347
348
349/* empty cpumask */
350static const cpumask_t cpu_mask_none;
351
352/*
353 * Returns a pointer to the bitmask of CPUs on Node 'node'.
354 */
355const cpumask_t *cpumask_of_node(int node)
356{
357 if (node_to_cpumask_map == NULL) {
358 printk(KERN_WARNING
359 "cpumask_of_node(%d): no node_to_cpumask_map!\n",
360 node);
361 dump_stack();
362 return (const cpumask_t *)&cpu_online_map;
363 }
364 if (node >= nr_node_ids) {
365 printk(KERN_WARNING
366 "cpumask_of_node(%d): node > nr_node_ids(%d)\n",
367 node, nr_node_ids);
368 dump_stack();
369 return &cpu_mask_none;
370 }
371 return &node_to_cpumask_map[node];
372}
373EXPORT_SYMBOL(cpumask_of_node);
374
375/*
376 * Returns a bitmask of CPUs on Node 'node'.
377 *
378 * Side note: this function creates the returned cpumask on the stack
379 * so with a high NR_CPUS count, excessive stack space is used. The
380 * node_to_cpumask_ptr function should be used whenever possible.
381 */
382cpumask_t node_to_cpumask(int node)
383{
384 if (node_to_cpumask_map == NULL) {
385 printk(KERN_WARNING
386 "node_to_cpumask(%d): no node_to_cpumask_map!\n", node);
387 dump_stack();
388 return cpu_online_map;
389 }
390 if (node >= nr_node_ids) {
391 printk(KERN_WARNING
392 "node_to_cpumask(%d): node > nr_node_ids(%d)\n",
393 node, nr_node_ids);
394 dump_stack();
395 return cpu_mask_none;
396 }
397 return node_to_cpumask_map[node];
398}
399EXPORT_SYMBOL(node_to_cpumask);
400
401/*
402 * --------- end of debug versions of the numa functions ---------
403 */
404
405#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
406
407#endif /* X86_64_NUMA */
408
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 89bb7668041d..df0587f24c54 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -632,9 +632,16 @@ badframe:
632} 632}
633 633
634#ifdef CONFIG_X86_32 634#ifdef CONFIG_X86_32
635asmlinkage int sys_rt_sigreturn(struct pt_regs regs) 635/*
636 * Note: do not pass in pt_regs directly as with tail-call optimization
637 * GCC will incorrectly stomp on the caller's frame and corrupt user-space
638 * register state:
639 */
640asmlinkage int sys_rt_sigreturn(unsigned long __unused)
636{ 641{
637 return do_rt_sigreturn(&regs); 642 struct pt_regs *regs = (struct pt_regs *)&__unused;
643
644 return do_rt_sigreturn(regs);
638} 645}
639#else /* !CONFIG_X86_32 */ 646#else /* !CONFIG_X86_32 */
640asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) 647asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index bb1a3b1fc87f..f9dbcff43546 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -53,7 +53,6 @@
53#include <asm/nmi.h> 53#include <asm/nmi.h>
54#include <asm/irq.h> 54#include <asm/irq.h>
55#include <asm/idle.h> 55#include <asm/idle.h>
56#include <asm/smp.h>
57#include <asm/trampoline.h> 56#include <asm/trampoline.h>
58#include <asm/cpu.h> 57#include <asm/cpu.h>
59#include <asm/numa.h> 58#include <asm/numa.h>
@@ -63,6 +62,7 @@
63#include <asm/vmi.h> 62#include <asm/vmi.h>
64#include <asm/genapic.h> 63#include <asm/genapic.h>
65#include <asm/setup.h> 64#include <asm/setup.h>
65#include <asm/uv/uv.h>
66#include <linux/mc146818rtc.h> 66#include <linux/mc146818rtc.h>
67 67
68#include <mach_apic.h> 68#include <mach_apic.h>
@@ -745,52 +745,6 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
745 complete(&c_idle->done); 745 complete(&c_idle->done);
746} 746}
747 747
748#ifdef CONFIG_X86_64
749
750/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */
751static void __ref free_bootmem_pda(struct x8664_pda *oldpda)
752{
753 if (!after_bootmem)
754 free_bootmem((unsigned long)oldpda, sizeof(*oldpda));
755}
756
757/*
758 * Allocate node local memory for the AP pda.
759 *
760 * Must be called after the _cpu_pda pointer table is initialized.
761 */
762int __cpuinit get_local_pda(int cpu)
763{
764 struct x8664_pda *oldpda, *newpda;
765 unsigned long size = sizeof(struct x8664_pda);
766 int node = cpu_to_node(cpu);
767
768 if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem)
769 return 0;
770
771 oldpda = cpu_pda(cpu);
772 newpda = kmalloc_node(size, GFP_ATOMIC, node);
773 if (!newpda) {
774 printk(KERN_ERR "Could not allocate node local PDA "
775 "for CPU %d on node %d\n", cpu, node);
776
777 if (oldpda)
778 return 0; /* have a usable pda */
779 else
780 return -1;
781 }
782
783 if (oldpda) {
784 memcpy(newpda, oldpda, size);
785 free_bootmem_pda(oldpda);
786 }
787
788 newpda->in_bootmem = 0;
789 cpu_pda(cpu) = newpda;
790 return 0;
791}
792#endif /* CONFIG_X86_64 */
793
794static int __cpuinit do_boot_cpu(int apicid, int cpu) 748static int __cpuinit do_boot_cpu(int apicid, int cpu)
795/* 749/*
796 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad 750 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
@@ -808,16 +762,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
808 }; 762 };
809 INIT_WORK(&c_idle.work, do_fork_idle); 763 INIT_WORK(&c_idle.work, do_fork_idle);
810 764
811#ifdef CONFIG_X86_64
812 /* Allocate node local memory for AP pdas */
813 if (cpu > 0) {
814 boot_error = get_local_pda(cpu);
815 if (boot_error)
816 goto restore_state;
817 /* if can't get pda memory, can't start cpu */
818 }
819#endif
820
821 alternatives_smp_switch(1); 765 alternatives_smp_switch(1);
822 766
823 c_idle.idle = get_idle_for_cpu(cpu); 767 c_idle.idle = get_idle_for_cpu(cpu);
@@ -847,14 +791,16 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
847 791
848 set_idle_for_cpu(cpu, c_idle.idle); 792 set_idle_for_cpu(cpu, c_idle.idle);
849do_rest: 793do_rest:
850#ifdef CONFIG_X86_32
851 per_cpu(current_task, cpu) = c_idle.idle; 794 per_cpu(current_task, cpu) = c_idle.idle;
852 init_gdt(cpu); 795#ifdef CONFIG_X86_32
853 /* Stack for startup_32 can be just as for start_secondary onwards */ 796 /* Stack for startup_32 can be just as for start_secondary onwards */
854 irq_ctx_init(cpu); 797 irq_ctx_init(cpu);
855#else 798#else
856 cpu_pda(cpu)->pcurrent = c_idle.idle;
857 clear_tsk_thread_flag(c_idle.idle, TIF_FORK); 799 clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
800 initial_gs = per_cpu_offset(cpu);
801 per_cpu(kernel_stack, cpu) =
802 (unsigned long)task_stack_page(c_idle.idle) -
803 KERNEL_STACK_OFFSET + THREAD_SIZE;
858#endif 804#endif
859 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); 805 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
860 initial_code = (unsigned long)start_secondary; 806 initial_code = (unsigned long)start_secondary;
@@ -931,9 +877,7 @@ do_rest:
931 inquire_remote_apic(apicid); 877 inquire_remote_apic(apicid);
932 } 878 }
933 } 879 }
934#ifdef CONFIG_X86_64 880
935restore_state:
936#endif
937 if (boot_error) { 881 if (boot_error) {
938 /* Try to put things back the way they were before ... */ 882 /* Try to put things back the way they were before ... */
939 numa_remove_cpu(cpu); /* was set by numa_add_cpu */ 883 numa_remove_cpu(cpu); /* was set by numa_add_cpu */
@@ -1125,6 +1069,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
1125 printk(KERN_ERR "... forcing use of dummy APIC emulation." 1069 printk(KERN_ERR "... forcing use of dummy APIC emulation."
1126 "(tell your hw vendor)\n"); 1070 "(tell your hw vendor)\n");
1127 smpboot_clear_io_apic(); 1071 smpboot_clear_io_apic();
1072 disable_ioapic_setup();
1128 return -1; 1073 return -1;
1129 } 1074 }
1130 1075
@@ -1240,9 +1185,6 @@ out:
1240void __init native_smp_prepare_boot_cpu(void) 1185void __init native_smp_prepare_boot_cpu(void)
1241{ 1186{
1242 int me = smp_processor_id(); 1187 int me = smp_processor_id();
1243#ifdef CONFIG_X86_32
1244 init_gdt(me);
1245#endif
1246 switch_to_new_gdt(); 1188 switch_to_new_gdt();
1247 /* already set me in cpu_online_mask in boot_cpu_init() */ 1189 /* already set me in cpu_online_mask in boot_cpu_init() */
1248 cpumask_set_cpu(me, cpu_callout_mask); 1190 cpumask_set_cpu(me, cpu_callout_mask);
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
deleted file mode 100644
index 397e309839dd..000000000000
--- a/arch/x86/kernel/smpcommon.c
+++ /dev/null
@@ -1,30 +0,0 @@
1/*
2 * SMP stuff which is common to all sub-architectures.
3 */
4#include <linux/module.h>
5#include <asm/smp.h>
6
7#ifdef CONFIG_X86_32
8DEFINE_PER_CPU(unsigned long, this_cpu_off);
9EXPORT_PER_CPU_SYMBOL(this_cpu_off);
10
11/*
12 * Initialize the CPU's GDT. This is either the boot CPU doing itself
13 * (still using the master per-cpu area), or a CPU doing it for a
14 * secondary which will soon come up.
15 */
16__cpuinit void init_gdt(int cpu)
17{
18 struct desc_struct gdt;
19
20 pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF,
21 0x2 | DESCTYPE_S, 0x8);
22 gdt.s = 1;
23
24 write_gdt_entry(get_cpu_gdt_table(cpu),
25 GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
26
27 per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
28 per_cpu(cpu_number, cpu) = cpu;
29}
30#endif
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
deleted file mode 100644
index ce5054642247..000000000000
--- a/arch/x86/kernel/tlb_32.c
+++ /dev/null
@@ -1,256 +0,0 @@
1#include <linux/spinlock.h>
2#include <linux/cpu.h>
3#include <linux/interrupt.h>
4
5#include <asm/tlbflush.h>
6
7DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate)
8 ____cacheline_aligned = { &init_mm, 0, };
9
10/* must come after the send_IPI functions above for inlining */
11#include <mach_ipi.h>
12
13/*
14 * Smarter SMP flushing macros.
15 * c/o Linus Torvalds.
16 *
17 * These mean you can really definitely utterly forget about
18 * writing to user space from interrupts. (Its not allowed anyway).
19 *
20 * Optimizations Manfred Spraul <manfred@colorfullife.com>
21 */
22
23static cpumask_t flush_cpumask;
24static struct mm_struct *flush_mm;
25static unsigned long flush_va;
26static DEFINE_SPINLOCK(tlbstate_lock);
27
28/*
29 * We cannot call mmdrop() because we are in interrupt context,
30 * instead update mm->cpu_vm_mask.
31 *
32 * We need to reload %cr3 since the page tables may be going
33 * away from under us..
34 */
35void leave_mm(int cpu)
36{
37 BUG_ON(x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK);
38 cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask);
39 load_cr3(swapper_pg_dir);
40}
41EXPORT_SYMBOL_GPL(leave_mm);
42
43/*
44 *
45 * The flush IPI assumes that a thread switch happens in this order:
46 * [cpu0: the cpu that switches]
47 * 1) switch_mm() either 1a) or 1b)
48 * 1a) thread switch to a different mm
49 * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
50 * Stop ipi delivery for the old mm. This is not synchronized with
51 * the other cpus, but smp_invalidate_interrupt ignore flush ipis
52 * for the wrong mm, and in the worst case we perform a superfluous
53 * tlb flush.
54 * 1a2) set cpu_tlbstate to TLBSTATE_OK
55 * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
56 * was in lazy tlb mode.
57 * 1a3) update cpu_tlbstate[].active_mm
58 * Now cpu0 accepts tlb flushes for the new mm.
59 * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
60 * Now the other cpus will send tlb flush ipis.
61 * 1a4) change cr3.
62 * 1b) thread switch without mm change
63 * cpu_tlbstate[].active_mm is correct, cpu0 already handles
64 * flush ipis.
65 * 1b1) set cpu_tlbstate to TLBSTATE_OK
66 * 1b2) test_and_set the cpu bit in cpu_vm_mask.
67 * Atomically set the bit [other cpus will start sending flush ipis],
68 * and test the bit.
69 * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
70 * 2) switch %%esp, ie current
71 *
72 * The interrupt must handle 2 special cases:
73 * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
74 * - the cpu performs speculative tlb reads, i.e. even if the cpu only
75 * runs in kernel space, the cpu could load tlb entries for user space
76 * pages.
77 *
78 * The good news is that cpu_tlbstate is local to each cpu, no
79 * write/read ordering problems.
80 */
81
82/*
83 * TLB flush IPI:
84 *
85 * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
86 * 2) Leave the mm if we are in the lazy tlb mode.
87 */
88
89void smp_invalidate_interrupt(struct pt_regs *regs)
90{
91 unsigned long cpu;
92
93 cpu = get_cpu();
94
95 if (!cpu_isset(cpu, flush_cpumask))
96 goto out;
97 /*
98 * This was a BUG() but until someone can quote me the
99 * line from the intel manual that guarantees an IPI to
100 * multiple CPUs is retried _only_ on the erroring CPUs
101 * its staying as a return
102 *
103 * BUG();
104 */
105
106 if (flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) {
107 if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) {
108 if (flush_va == TLB_FLUSH_ALL)
109 local_flush_tlb();
110 else
111 __flush_tlb_one(flush_va);
112 } else
113 leave_mm(cpu);
114 }
115 ack_APIC_irq();
116 smp_mb__before_clear_bit();
117 cpu_clear(cpu, flush_cpumask);
118 smp_mb__after_clear_bit();
119out:
120 put_cpu_no_resched();
121 inc_irq_stat(irq_tlb_count);
122}
123
124void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
125 unsigned long va)
126{
127 cpumask_t cpumask = *cpumaskp;
128
129 /*
130 * A couple of (to be removed) sanity checks:
131 *
132 * - current CPU must not be in mask
133 * - mask must exist :)
134 */
135 BUG_ON(cpus_empty(cpumask));
136 BUG_ON(cpu_isset(smp_processor_id(), cpumask));
137 BUG_ON(!mm);
138
139#ifdef CONFIG_HOTPLUG_CPU
140 /* If a CPU which we ran on has gone down, OK. */
141 cpus_and(cpumask, cpumask, cpu_online_map);
142 if (unlikely(cpus_empty(cpumask)))
143 return;
144#endif
145
146 /*
147 * i'm not happy about this global shared spinlock in the
148 * MM hot path, but we'll see how contended it is.
149 * AK: x86-64 has a faster method that could be ported.
150 */
151 spin_lock(&tlbstate_lock);
152
153 flush_mm = mm;
154 flush_va = va;
155 cpus_or(flush_cpumask, cpumask, flush_cpumask);
156
157 /*
158 * Make the above memory operations globally visible before
159 * sending the IPI.
160 */
161 smp_mb();
162 /*
163 * We have to send the IPI only to
164 * CPUs affected.
165 */
166 send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR);
167
168 while (!cpus_empty(flush_cpumask))
169 /* nothing. lockup detection does not belong here */
170 cpu_relax();
171
172 flush_mm = NULL;
173 flush_va = 0;
174 spin_unlock(&tlbstate_lock);
175}
176
177void flush_tlb_current_task(void)
178{
179 struct mm_struct *mm = current->mm;
180 cpumask_t cpu_mask;
181
182 preempt_disable();
183 cpu_mask = mm->cpu_vm_mask;
184 cpu_clear(smp_processor_id(), cpu_mask);
185
186 local_flush_tlb();
187 if (!cpus_empty(cpu_mask))
188 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
189 preempt_enable();
190}
191
192void flush_tlb_mm(struct mm_struct *mm)
193{
194 cpumask_t cpu_mask;
195
196 preempt_disable();
197 cpu_mask = mm->cpu_vm_mask;
198 cpu_clear(smp_processor_id(), cpu_mask);
199
200 if (current->active_mm == mm) {
201 if (current->mm)
202 local_flush_tlb();
203 else
204 leave_mm(smp_processor_id());
205 }
206 if (!cpus_empty(cpu_mask))
207 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
208
209 preempt_enable();
210}
211
212void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
213{
214 struct mm_struct *mm = vma->vm_mm;
215 cpumask_t cpu_mask;
216
217 preempt_disable();
218 cpu_mask = mm->cpu_vm_mask;
219 cpu_clear(smp_processor_id(), cpu_mask);
220
221 if (current->active_mm == mm) {
222 if (current->mm)
223 __flush_tlb_one(va);
224 else
225 leave_mm(smp_processor_id());
226 }
227
228 if (!cpus_empty(cpu_mask))
229 flush_tlb_others(cpu_mask, mm, va);
230
231 preempt_enable();
232}
233EXPORT_SYMBOL(flush_tlb_page);
234
235static void do_flush_tlb_all(void *info)
236{
237 unsigned long cpu = smp_processor_id();
238
239 __flush_tlb_all();
240 if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY)
241 leave_mm(cpu);
242}
243
244void flush_tlb_all(void)
245{
246 on_each_cpu(do_flush_tlb_all, NULL, 1);
247}
248
249void reset_lazy_tlbstate(void)
250{
251 int cpu = raw_smp_processor_id();
252
253 per_cpu(cpu_tlbstate, cpu).state = 0;
254 per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
255}
256
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index f885023167e0..89fce1b6d01f 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -11,6 +11,7 @@
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12 12
13#include <asm/mmu_context.h> 13#include <asm/mmu_context.h>
14#include <asm/uv/uv.h>
14#include <asm/uv/uv_mmrs.h> 15#include <asm/uv/uv_mmrs.h>
15#include <asm/uv/uv_hub.h> 16#include <asm/uv/uv_hub.h>
16#include <asm/uv/uv_bau.h> 17#include <asm/uv/uv_bau.h>
@@ -200,6 +201,7 @@ static int uv_wait_completion(struct bau_desc *bau_desc,
200 destination_timeouts = 0; 201 destination_timeouts = 0;
201 } 202 }
202 } 203 }
204 cpu_relax();
203 } 205 }
204 return FLUSH_COMPLETE; 206 return FLUSH_COMPLETE;
205} 207}
@@ -209,14 +211,15 @@ static int uv_wait_completion(struct bau_desc *bau_desc,
209 * 211 *
210 * Send a broadcast and wait for a broadcast message to complete. 212 * Send a broadcast and wait for a broadcast message to complete.
211 * 213 *
212 * The cpumaskp mask contains the cpus the broadcast was sent to. 214 * The flush_mask contains the cpus the broadcast was sent to.
213 * 215 *
214 * Returns 1 if all remote flushing was done. The mask is zeroed. 216 * Returns NULL if all remote flushing was done. The mask is zeroed.
215 * Returns 0 if some remote flushing remains to be done. The mask is left 217 * Returns @flush_mask if some remote flushing remains to be done. The
216 * unchanged. 218 * mask will have some bits still set.
217 */ 219 */
218int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, 220const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
219 cpumask_t *cpumaskp) 221 struct bau_desc *bau_desc,
222 struct cpumask *flush_mask)
220{ 223{
221 int completion_status = 0; 224 int completion_status = 0;
222 int right_shift; 225 int right_shift;
@@ -263,59 +266,69 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
263 * Success, so clear the remote cpu's from the mask so we don't 266 * Success, so clear the remote cpu's from the mask so we don't
264 * use the IPI method of shootdown on them. 267 * use the IPI method of shootdown on them.
265 */ 268 */
266 for_each_cpu_mask(bit, *cpumaskp) { 269 for_each_cpu(bit, flush_mask) {
267 blade = uv_cpu_to_blade_id(bit); 270 blade = uv_cpu_to_blade_id(bit);
268 if (blade == this_blade) 271 if (blade == this_blade)
269 continue; 272 continue;
270 cpu_clear(bit, *cpumaskp); 273 cpumask_clear_cpu(bit, flush_mask);
271 } 274 }
272 if (!cpus_empty(*cpumaskp)) 275 if (!cpumask_empty(flush_mask))
273 return 0; 276 return flush_mask;
274 return 1; 277 return NULL;
275} 278}
276 279
277/** 280/**
278 * uv_flush_tlb_others - globally purge translation cache of a virtual 281 * uv_flush_tlb_others - globally purge translation cache of a virtual
279 * address or all TLB's 282 * address or all TLB's
280 * @cpumaskp: mask of all cpu's in which the address is to be removed 283 * @cpumask: mask of all cpu's in which the address is to be removed
281 * @mm: mm_struct containing virtual address range 284 * @mm: mm_struct containing virtual address range
282 * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) 285 * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
286 * @cpu: the current cpu
283 * 287 *
284 * This is the entry point for initiating any UV global TLB shootdown. 288 * This is the entry point for initiating any UV global TLB shootdown.
285 * 289 *
286 * Purges the translation caches of all specified processors of the given 290 * Purges the translation caches of all specified processors of the given
287 * virtual address, or purges all TLB's on specified processors. 291 * virtual address, or purges all TLB's on specified processors.
288 * 292 *
289 * The caller has derived the cpumaskp from the mm_struct and has subtracted 293 * The caller has derived the cpumask from the mm_struct. This function
290 * the local cpu from the mask. This function is called only if there 294 * is called only if there are bits set in the mask. (e.g. flush_tlb_page())
291 * are bits set in the mask. (e.g. flush_tlb_page())
292 * 295 *
293 * The cpumaskp is converted into a nodemask of the nodes containing 296 * The cpumask is converted into a nodemask of the nodes containing
294 * the cpus. 297 * the cpus.
295 * 298 *
296 * Returns 1 if all remote flushing was done. 299 * Note that this function should be called with preemption disabled.
297 * Returns 0 if some remote flushing remains to be done. 300 *
301 * Returns NULL if all remote flushing was done.
302 * Returns pointer to cpumask if some remote flushing remains to be
303 * done. The returned pointer is valid till preemption is re-enabled.
298 */ 304 */
299int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm, 305const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
300 unsigned long va) 306 struct mm_struct *mm,
307 unsigned long va, unsigned int cpu)
301{ 308{
309 static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
310 struct cpumask *flush_mask = &__get_cpu_var(flush_tlb_mask);
302 int i; 311 int i;
303 int bit; 312 int bit;
304 int blade; 313 int blade;
305 int cpu; 314 int uv_cpu;
306 int this_blade; 315 int this_blade;
307 int locals = 0; 316 int locals = 0;
308 struct bau_desc *bau_desc; 317 struct bau_desc *bau_desc;
309 318
310 cpu = uv_blade_processor_id(); 319 WARN_ON(!in_atomic());
320
321 cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
322
323 uv_cpu = uv_blade_processor_id();
311 this_blade = uv_numa_blade_id(); 324 this_blade = uv_numa_blade_id();
312 bau_desc = __get_cpu_var(bau_control).descriptor_base; 325 bau_desc = __get_cpu_var(bau_control).descriptor_base;
313 bau_desc += UV_ITEMS_PER_DESCRIPTOR * cpu; 326 bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu;
314 327
315 bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); 328 bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
316 329
317 i = 0; 330 i = 0;
318 for_each_cpu_mask(bit, *cpumaskp) { 331 for_each_cpu(bit, flush_mask) {
319 blade = uv_cpu_to_blade_id(bit); 332 blade = uv_cpu_to_blade_id(bit);
320 BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1)); 333 BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
321 if (blade == this_blade) { 334 if (blade == this_blade) {
@@ -330,17 +343,17 @@ int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm,
330 * no off_node flushing; return status for local node 343 * no off_node flushing; return status for local node
331 */ 344 */
332 if (locals) 345 if (locals)
333 return 0; 346 return flush_mask;
334 else 347 else
335 return 1; 348 return NULL;
336 } 349 }
337 __get_cpu_var(ptcstats).requestor++; 350 __get_cpu_var(ptcstats).requestor++;
338 __get_cpu_var(ptcstats).ntargeted += i; 351 __get_cpu_var(ptcstats).ntargeted += i;
339 352
340 bau_desc->payload.address = va; 353 bau_desc->payload.address = va;
341 bau_desc->payload.sending_cpu = smp_processor_id(); 354 bau_desc->payload.sending_cpu = cpu;
342 355
343 return uv_flush_send_and_wait(cpu, this_blade, bau_desc, cpumaskp); 356 return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask);
344} 357}
345 358
346/* 359/*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 98c2d055284b..ed5aee5f3fcc 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -59,7 +59,6 @@
59#ifdef CONFIG_X86_64 59#ifdef CONFIG_X86_64
60#include <asm/pgalloc.h> 60#include <asm/pgalloc.h>
61#include <asm/proto.h> 61#include <asm/proto.h>
62#include <asm/pda.h>
63#else 62#else
64#include <asm/processor-flags.h> 63#include <asm/processor-flags.h>
65#include <asm/arch_hooks.h> 64#include <asm/arch_hooks.h>
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 23206ba16874..1d3302cc2ddf 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -858,7 +858,7 @@ void __init vmi_init(void)
858#endif 858#endif
859} 859}
860 860
861void vmi_activate(void) 861void __init vmi_activate(void)
862{ 862{
863 unsigned long flags; 863 unsigned long flags;
864 864
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 82c67559dde7..3eba7f7bac05 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -178,14 +178,7 @@ SECTIONS
178 __initramfs_end = .; 178 __initramfs_end = .;
179 } 179 }
180#endif 180#endif
181 . = ALIGN(PAGE_SIZE); 181 PERCPU(PAGE_SIZE)
182 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
183 __per_cpu_start = .;
184 *(.data.percpu.page_aligned)
185 *(.data.percpu)
186 *(.data.percpu.shared_aligned)
187 __per_cpu_end = .;
188 }
189 . = ALIGN(PAGE_SIZE); 182 . = ALIGN(PAGE_SIZE);
190 /* freed after init ends here */ 183 /* freed after init ends here */
191 184
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 1a614c0e6bef..c9740996430a 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -5,6 +5,7 @@
5#define LOAD_OFFSET __START_KERNEL_map 5#define LOAD_OFFSET __START_KERNEL_map
6 6
7#include <asm-generic/vmlinux.lds.h> 7#include <asm-generic/vmlinux.lds.h>
8#include <asm/asm-offsets.h>
8#include <asm/page.h> 9#include <asm/page.h>
9 10
10#undef i386 /* in case the preprocessor is a 32bit one */ 11#undef i386 /* in case the preprocessor is a 32bit one */
@@ -13,12 +14,14 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
13OUTPUT_ARCH(i386:x86-64) 14OUTPUT_ARCH(i386:x86-64)
14ENTRY(phys_startup_64) 15ENTRY(phys_startup_64)
15jiffies_64 = jiffies; 16jiffies_64 = jiffies;
16_proxy_pda = 1;
17PHDRS { 17PHDRS {
18 text PT_LOAD FLAGS(5); /* R_E */ 18 text PT_LOAD FLAGS(5); /* R_E */
19 data PT_LOAD FLAGS(7); /* RWE */ 19 data PT_LOAD FLAGS(7); /* RWE */
20 user PT_LOAD FLAGS(7); /* RWE */ 20 user PT_LOAD FLAGS(7); /* RWE */
21 data.init PT_LOAD FLAGS(7); /* RWE */ 21 data.init PT_LOAD FLAGS(7); /* RWE */
22#ifdef CONFIG_SMP
23 percpu PT_LOAD FLAGS(7); /* RWE */
24#endif
22 note PT_NOTE FLAGS(0); /* ___ */ 25 note PT_NOTE FLAGS(0); /* ___ */
23} 26}
24SECTIONS 27SECTIONS
@@ -208,14 +211,28 @@ SECTIONS
208 __initramfs_end = .; 211 __initramfs_end = .;
209#endif 212#endif
210 213
214#ifdef CONFIG_SMP
215 /*
216 * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
217 * output PHDR, so the next output section - __data_nosave - should
218 * switch it back to data.init. Also, pda should be at the head of
219 * percpu area. Preallocate it and define the percpu offset symbol
220 * so that it can be accessed as a percpu variable.
221 */
222 . = ALIGN(PAGE_SIZE);
223 PERCPU_VADDR(0, :percpu)
224#else
211 PERCPU(PAGE_SIZE) 225 PERCPU(PAGE_SIZE)
226#endif
212 227
213 . = ALIGN(PAGE_SIZE); 228 . = ALIGN(PAGE_SIZE);
214 __init_end = .; 229 __init_end = .;
215 230
216 . = ALIGN(PAGE_SIZE); 231 . = ALIGN(PAGE_SIZE);
217 __nosave_begin = .; 232 __nosave_begin = .;
218 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } 233 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
234 *(.data.nosave)
235 } :data.init /* switch back to data.init, see PERCPU_VADDR() above */
219 . = ALIGN(PAGE_SIZE); 236 . = ALIGN(PAGE_SIZE);
220 __nosave_end = .; 237 __nosave_end = .;
221 238
@@ -244,3 +261,8 @@ SECTIONS
244 */ 261 */
245ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), 262ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
246 "kernel image bigger than KERNEL_IMAGE_SIZE") 263 "kernel image bigger than KERNEL_IMAGE_SIZE")
264
265#ifdef CONFIG_SMP
266ASSERT((per_cpu__irq_stack_union == 0),
267 "irq_stack_union is not at start of per-cpu area");
268#endif
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 695e426aa354..3909e3ba5ce3 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -58,5 +58,3 @@ EXPORT_SYMBOL(__memcpy);
58EXPORT_SYMBOL(empty_zero_page); 58EXPORT_SYMBOL(empty_zero_page);
59EXPORT_SYMBOL(init_level4_pgt); 59EXPORT_SYMBOL(init_level4_pgt);
60EXPORT_SYMBOL(load_gs_index); 60EXPORT_SYMBOL(load_gs_index);
61
62EXPORT_SYMBOL(_proxy_pda);
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index a7ed208f81e3..92f1c6f3e19d 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -931,7 +931,7 @@ static void lguest_restart(char *reason)
931 * that we can fit comfortably. 931 * that we can fit comfortably.
932 * 932 *
933 * First we need assembly templates of each of the patchable Guest operations, 933 * First we need assembly templates of each of the patchable Guest operations,
934 * and these are in lguest_asm.S. */ 934 * and these are in i386_head.S. */
935 935
936/*G:060 We construct a table from the assembler templates: */ 936/*G:060 We construct a table from the assembler templates: */
937static const struct lguest_insns 937static const struct lguest_insns
@@ -1093,7 +1093,7 @@ __init void lguest_init(void)
1093 acpi_ht = 0; 1093 acpi_ht = 0;
1094#endif 1094#endif
1095 1095
1096 /* We set the perferred console to "hvc". This is the "hypervisor 1096 /* We set the preferred console to "hvc". This is the "hypervisor
1097 * virtual console" driver written by the PowerPC people, which we also 1097 * virtual console" driver written by the PowerPC people, which we also
1098 * adapted for lguest's use. */ 1098 * adapted for lguest's use. */
1099 add_preferred_console("hvc", 0, NULL); 1099 add_preferred_console("hvc", 0, NULL);
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 4a20b2f9a381..7c8ca91bb9ec 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -56,7 +56,7 @@ do { \
56 " jmp 2b\n" \ 56 " jmp 2b\n" \
57 ".previous\n" \ 57 ".previous\n" \
58 _ASM_EXTABLE(0b,3b) \ 58 _ASM_EXTABLE(0b,3b) \
59 : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \ 59 : "=&d"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \
60 "=&D" (__d2) \ 60 "=&D" (__d2) \
61 : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ 61 : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
62 : "memory"); \ 62 : "memory"); \
@@ -218,7 +218,7 @@ long strnlen_user(const char __user *s, long n)
218 " .align 4\n" 218 " .align 4\n"
219 " .long 0b,2b\n" 219 " .long 0b,2b\n"
220 ".previous" 220 ".previous"
221 :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp) 221 :"=&r" (n), "=&D" (s), "=&a" (res), "=&c" (tmp)
222 :"0" (n), "1" (s), "2" (0), "3" (mask) 222 :"0" (n), "1" (s), "2" (0), "3" (mask)
223 :"cc"); 223 :"cc");
224 return res & mask; 224 return res & mask;
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 64d6c84e6353..ec13cb5f17ed 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -32,7 +32,7 @@ do { \
32 " jmp 2b\n" \ 32 " jmp 2b\n" \
33 ".previous\n" \ 33 ".previous\n" \
34 _ASM_EXTABLE(0b,3b) \ 34 _ASM_EXTABLE(0b,3b) \
35 : "=r"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \ 35 : "=&r"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \
36 "=&D" (__d2) \ 36 "=&D" (__d2) \
37 : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ 37 : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
38 : "memory"); \ 38 : "memory"); \
@@ -86,7 +86,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
86 ".previous\n" 86 ".previous\n"
87 _ASM_EXTABLE(0b,3b) 87 _ASM_EXTABLE(0b,3b)
88 _ASM_EXTABLE(1b,2b) 88 _ASM_EXTABLE(1b,2b)
89 : [size8] "=c"(size), [dst] "=&D" (__d0) 89 : [size8] "=&c"(size), [dst] "=&D" (__d0)
90 : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr), 90 : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr),
91 [zero] "r" (0UL), [eight] "r" (8UL)); 91 [zero] "r" (0UL), [eight] "r" (8UL));
92 return size; 92 return size;
diff --git a/arch/x86/mach-voyager/setup.c b/arch/x86/mach-voyager/setup.c
index a580b9562e76..0ade62555ff3 100644
--- a/arch/x86/mach-voyager/setup.c
+++ b/arch/x86/mach-voyager/setup.c
@@ -9,6 +9,7 @@
9#include <asm/e820.h> 9#include <asm/e820.h>
10#include <asm/io.h> 10#include <asm/io.h>
11#include <asm/setup.h> 11#include <asm/setup.h>
12#include <asm/cpu.h>
12 13
13void __init pre_intr_init_hook(void) 14void __init pre_intr_init_hook(void)
14{ 15{
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 9840b7ec749a..331cd6d56483 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -402,7 +402,7 @@ void __init find_smp_config(void)
402 VOYAGER_SUS_IN_CONTROL_PORT); 402 VOYAGER_SUS_IN_CONTROL_PORT);
403 403
404 current_thread_info()->cpu = boot_cpu_id; 404 current_thread_info()->cpu = boot_cpu_id;
405 x86_write_percpu(cpu_number, boot_cpu_id); 405 percpu_write(cpu_number, boot_cpu_id);
406} 406}
407 407
408/* 408/*
@@ -530,7 +530,6 @@ static void __init do_boot_cpu(__u8 cpu)
530 /* init_tasks (in sched.c) is indexed logically */ 530 /* init_tasks (in sched.c) is indexed logically */
531 stack_start.sp = (void *)idle->thread.sp; 531 stack_start.sp = (void *)idle->thread.sp;
532 532
533 init_gdt(cpu);
534 per_cpu(current_task, cpu) = idle; 533 per_cpu(current_task, cpu) = idle;
535 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); 534 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
536 irq_ctx_init(cpu); 535 irq_ctx_init(cpu);
@@ -1747,7 +1746,6 @@ static void __init voyager_smp_prepare_cpus(unsigned int max_cpus)
1747 1746
1748static void __cpuinit voyager_smp_prepare_boot_cpu(void) 1747static void __cpuinit voyager_smp_prepare_boot_cpu(void)
1749{ 1748{
1750 init_gdt(smp_processor_id());
1751 switch_to_new_gdt(); 1749 switch_to_new_gdt();
1752 1750
1753 cpu_set(smp_processor_id(), cpu_online_map); 1751 cpu_set(smp_processor_id(), cpu_online_map);
@@ -1780,7 +1778,6 @@ static void __init voyager_smp_cpus_done(unsigned int max_cpus)
1780void __init smp_setup_processor_id(void) 1778void __init smp_setup_processor_id(void)
1781{ 1779{
1782 current_thread_info()->cpu = hard_smp_processor_id(); 1780 current_thread_info()->cpu = hard_smp_processor_id();
1783 x86_write_percpu(cpu_number, hard_smp_processor_id());
1784} 1781}
1785 1782
1786static void voyager_send_call_func(cpumask_t callmask) 1783static void voyager_send_call_func(cpumask_t callmask)
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index d8cc96a2738f..9f05157220f5 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,6 +1,8 @@
1obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ 1obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
2 pat.o pgtable.o gup.o 2 pat.o pgtable.o gup.o
3 3
4obj-$(CONFIG_X86_SMP) += tlb.o
5
4obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o 6obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
5 7
6obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o 8obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 90dfae511a41..65709a6aa6ee 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -26,6 +26,7 @@
26#include <linux/kprobes.h> 26#include <linux/kprobes.h>
27#include <linux/uaccess.h> 27#include <linux/uaccess.h>
28#include <linux/kdebug.h> 28#include <linux/kdebug.h>
29#include <linux/magic.h>
29 30
30#include <asm/system.h> 31#include <asm/system.h>
31#include <asm/desc.h> 32#include <asm/desc.h>
@@ -91,8 +92,8 @@ static inline int notify_page_fault(struct pt_regs *regs)
91 * 92 *
92 * Opcode checker based on code by Richard Brunner 93 * Opcode checker based on code by Richard Brunner
93 */ 94 */
94static int is_prefetch(struct pt_regs *regs, unsigned long addr, 95static int is_prefetch(struct pt_regs *regs, unsigned long error_code,
95 unsigned long error_code) 96 unsigned long addr)
96{ 97{
97 unsigned char *instr; 98 unsigned char *instr;
98 int scan_more = 1; 99 int scan_more = 1;
@@ -409,15 +410,15 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
409} 410}
410 411
411#ifdef CONFIG_X86_64 412#ifdef CONFIG_X86_64
412static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, 413static noinline void pgtable_bad(struct pt_regs *regs,
413 unsigned long error_code) 414 unsigned long error_code, unsigned long address)
414{ 415{
415 unsigned long flags = oops_begin(); 416 unsigned long flags = oops_begin();
416 int sig = SIGKILL; 417 int sig = SIGKILL;
417 struct task_struct *tsk; 418 struct task_struct *tsk = current;
418 419
419 printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", 420 printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
420 current->comm, address); 421 tsk->comm, address);
421 dump_pagetable(address); 422 dump_pagetable(address);
422 tsk = current; 423 tsk = current;
423 tsk->thread.cr2 = address; 424 tsk->thread.cr2 = address;
@@ -429,6 +430,196 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
429} 430}
430#endif 431#endif
431 432
433static noinline void no_context(struct pt_regs *regs,
434 unsigned long error_code, unsigned long address)
435{
436 struct task_struct *tsk = current;
437 unsigned long *stackend;
438
439#ifdef CONFIG_X86_64
440 unsigned long flags;
441 int sig;
442#endif
443
444 /* Are we prepared to handle this kernel fault? */
445 if (fixup_exception(regs))
446 return;
447
448 /*
449 * X86_32
450 * Valid to do another page fault here, because if this fault
451 * had been triggered by is_prefetch fixup_exception would have
452 * handled it.
453 *
454 * X86_64
455 * Hall of shame of CPU/BIOS bugs.
456 */
457 if (is_prefetch(regs, error_code, address))
458 return;
459
460 if (is_errata93(regs, address))
461 return;
462
463 /*
464 * Oops. The kernel tried to access some bad page. We'll have to
465 * terminate things with extreme prejudice.
466 */
467#ifdef CONFIG_X86_32
468 bust_spinlocks(1);
469#else
470 flags = oops_begin();
471#endif
472
473 show_fault_oops(regs, error_code, address);
474
475 stackend = end_of_stack(tsk);
476 if (*stackend != STACK_END_MAGIC)
477 printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
478
479 tsk->thread.cr2 = address;
480 tsk->thread.trap_no = 14;
481 tsk->thread.error_code = error_code;
482
483#ifdef CONFIG_X86_32
484 die("Oops", regs, error_code);
485 bust_spinlocks(0);
486 do_exit(SIGKILL);
487#else
488 sig = SIGKILL;
489 if (__die("Oops", regs, error_code))
490 sig = 0;
491 /* Executive summary in case the body of the oops scrolled away */
492 printk(KERN_EMERG "CR2: %016lx\n", address);
493 oops_end(flags, regs, sig);
494#endif
495}
496
497static void __bad_area_nosemaphore(struct pt_regs *regs,
498 unsigned long error_code, unsigned long address,
499 int si_code)
500{
501 struct task_struct *tsk = current;
502
503 /* User mode accesses just cause a SIGSEGV */
504 if (error_code & PF_USER) {
505 /*
506 * It's possible to have interrupts off here.
507 */
508 local_irq_enable();
509
510 /*
511 * Valid to do another page fault here because this one came
512 * from user space.
513 */
514 if (is_prefetch(regs, error_code, address))
515 return;
516
517 if (is_errata100(regs, address))
518 return;
519
520 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
521 printk_ratelimit()) {
522 printk(
523 "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
524 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
525 tsk->comm, task_pid_nr(tsk), address,
526 (void *) regs->ip, (void *) regs->sp, error_code);
527 print_vma_addr(" in ", regs->ip);
528 printk("\n");
529 }
530
531 tsk->thread.cr2 = address;
532 /* Kernel addresses are always protection faults */
533 tsk->thread.error_code = error_code | (address >= TASK_SIZE);
534 tsk->thread.trap_no = 14;
535 force_sig_info_fault(SIGSEGV, si_code, address, tsk);
536 return;
537 }
538
539 if (is_f00f_bug(regs, address))
540 return;
541
542 no_context(regs, error_code, address);
543}
544
545static noinline void bad_area_nosemaphore(struct pt_regs *regs,
546 unsigned long error_code, unsigned long address)
547{
548 __bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR);
549}
550
551static void __bad_area(struct pt_regs *regs,
552 unsigned long error_code, unsigned long address,
553 int si_code)
554{
555 struct mm_struct *mm = current->mm;
556
557 /*
558 * Something tried to access memory that isn't in our memory map..
559 * Fix it, but check if it's kernel or user first..
560 */
561 up_read(&mm->mmap_sem);
562
563 __bad_area_nosemaphore(regs, error_code, address, si_code);
564}
565
566static noinline void bad_area(struct pt_regs *regs,
567 unsigned long error_code, unsigned long address)
568{
569 __bad_area(regs, error_code, address, SEGV_MAPERR);
570}
571
572static noinline void bad_area_access_error(struct pt_regs *regs,
573 unsigned long error_code, unsigned long address)
574{
575 __bad_area(regs, error_code, address, SEGV_ACCERR);
576}
577
578/* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */
579static void out_of_memory(struct pt_regs *regs,
580 unsigned long error_code, unsigned long address)
581{
582 /*
583 * We ran out of memory, call the OOM killer, and return the userspace
584 * (which will retry the fault, or kill us if we got oom-killed).
585 */
586 up_read(&current->mm->mmap_sem);
587 pagefault_out_of_memory();
588}
589
590static void do_sigbus(struct pt_regs *regs,
591 unsigned long error_code, unsigned long address)
592{
593 struct task_struct *tsk = current;
594 struct mm_struct *mm = tsk->mm;
595
596 up_read(&mm->mmap_sem);
597
598 /* Kernel mode? Handle exceptions or die */
599 if (!(error_code & PF_USER))
600 no_context(regs, error_code, address);
601#ifdef CONFIG_X86_32
602 /* User space => ok to do another page fault */
603 if (is_prefetch(regs, error_code, address))
604 return;
605#endif
606 tsk->thread.cr2 = address;
607 tsk->thread.error_code = error_code;
608 tsk->thread.trap_no = 14;
609 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
610}
611
612static noinline void mm_fault_error(struct pt_regs *regs,
613 unsigned long error_code, unsigned long address, unsigned int fault)
614{
615 if (fault & VM_FAULT_OOM)
616 out_of_memory(regs, error_code, address);
617 else if (fault & VM_FAULT_SIGBUS)
618 do_sigbus(regs, error_code, address);
619 else
620 BUG();
621}
622
432static int spurious_fault_check(unsigned long error_code, pte_t *pte) 623static int spurious_fault_check(unsigned long error_code, pte_t *pte)
433{ 624{
434 if ((error_code & PF_WRITE) && !pte_write(*pte)) 625 if ((error_code & PF_WRITE) && !pte_write(*pte))
@@ -448,8 +639,8 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte)
448 * There are no security implications to leaving a stale TLB when 639 * There are no security implications to leaving a stale TLB when
449 * increasing the permissions on a page. 640 * increasing the permissions on a page.
450 */ 641 */
451static int spurious_fault(unsigned long address, 642static noinline int spurious_fault(unsigned long error_code,
452 unsigned long error_code) 643 unsigned long address)
453{ 644{
454 pgd_t *pgd; 645 pgd_t *pgd;
455 pud_t *pud; 646 pud_t *pud;
@@ -494,7 +685,7 @@ static int spurious_fault(unsigned long address,
494 * 685 *
495 * This assumes no large pages in there. 686 * This assumes no large pages in there.
496 */ 687 */
497static int vmalloc_fault(unsigned long address) 688static noinline int vmalloc_fault(unsigned long address)
498{ 689{
499#ifdef CONFIG_X86_32 690#ifdef CONFIG_X86_32
500 unsigned long pgd_paddr; 691 unsigned long pgd_paddr;
@@ -573,6 +764,25 @@ static int vmalloc_fault(unsigned long address)
573 764
574int show_unhandled_signals = 1; 765int show_unhandled_signals = 1;
575 766
767static inline int access_error(unsigned long error_code, int write,
768 struct vm_area_struct *vma)
769{
770 if (write) {
771 /* write, present and write, not present */
772 if (unlikely(!(vma->vm_flags & VM_WRITE)))
773 return 1;
774 } else if (unlikely(error_code & PF_PROT)) {
775 /* read, present */
776 return 1;
777 } else {
778 /* read, not present */
779 if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
780 return 1;
781 }
782
783 return 0;
784}
785
576/* 786/*
577 * This routine handles page faults. It determines the address, 787 * This routine handles page faults. It determines the address,
578 * and the problem, and then passes it off to one of the appropriate 788 * and the problem, and then passes it off to one of the appropriate
@@ -583,16 +793,12 @@ asmlinkage
583#endif 793#endif
584void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) 794void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
585{ 795{
796 unsigned long address;
586 struct task_struct *tsk; 797 struct task_struct *tsk;
587 struct mm_struct *mm; 798 struct mm_struct *mm;
588 struct vm_area_struct *vma; 799 struct vm_area_struct *vma;
589 unsigned long address; 800 int write;
590 int write, si_code;
591 int fault; 801 int fault;
592#ifdef CONFIG_X86_64
593 unsigned long flags;
594 int sig;
595#endif
596 802
597 tsk = current; 803 tsk = current;
598 mm = tsk->mm; 804 mm = tsk->mm;
@@ -601,9 +807,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
601 /* get the address */ 807 /* get the address */
602 address = read_cr2(); 808 address = read_cr2();
603 809
604 si_code = SEGV_MAPERR; 810 if (unlikely(notify_page_fault(regs)))
605
606 if (notify_page_fault(regs))
607 return; 811 return;
608 if (unlikely(kmmio_fault(regs, address))) 812 if (unlikely(kmmio_fault(regs, address)))
609 return; 813 return;
@@ -631,17 +835,17 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
631 return; 835 return;
632 836
633 /* Can handle a stale RO->RW TLB */ 837 /* Can handle a stale RO->RW TLB */
634 if (spurious_fault(address, error_code)) 838 if (spurious_fault(error_code, address))
635 return; 839 return;
636 840
637 /* 841 /*
638 * Don't take the mm semaphore here. If we fixup a prefetch 842 * Don't take the mm semaphore here. If we fixup a prefetch
639 * fault we could otherwise deadlock. 843 * fault we could otherwise deadlock.
640 */ 844 */
641 goto bad_area_nosemaphore; 845 bad_area_nosemaphore(regs, error_code, address);
846 return;
642 } 847 }
643 848
644
645 /* 849 /*
646 * It's safe to allow irq's after cr2 has been saved and the 850 * It's safe to allow irq's after cr2 has been saved and the
647 * vmalloc fault has been handled. 851 * vmalloc fault has been handled.
@@ -657,15 +861,17 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
657 861
658#ifdef CONFIG_X86_64 862#ifdef CONFIG_X86_64
659 if (unlikely(error_code & PF_RSVD)) 863 if (unlikely(error_code & PF_RSVD))
660 pgtable_bad(address, regs, error_code); 864 pgtable_bad(regs, error_code, address);
661#endif 865#endif
662 866
663 /* 867 /*
664 * If we're in an interrupt, have no user context or are running in an 868 * If we're in an interrupt, have no user context or are running in an
665 * atomic region then we must not take the fault. 869 * atomic region then we must not take the fault.
666 */ 870 */
667 if (unlikely(in_atomic() || !mm)) 871 if (unlikely(in_atomic() || !mm)) {
668 goto bad_area_nosemaphore; 872 bad_area_nosemaphore(regs, error_code, address);
873 return;
874 }
669 875
670 /* 876 /*
671 * When running in the kernel we expect faults to occur only to 877 * When running in the kernel we expect faults to occur only to
@@ -683,20 +889,26 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
683 * source. If this is invalid we can skip the address space check, 889 * source. If this is invalid we can skip the address space check,
684 * thus avoiding the deadlock. 890 * thus avoiding the deadlock.
685 */ 891 */
686 if (!down_read_trylock(&mm->mmap_sem)) { 892 if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
687 if ((error_code & PF_USER) == 0 && 893 if ((error_code & PF_USER) == 0 &&
688 !search_exception_tables(regs->ip)) 894 !search_exception_tables(regs->ip)) {
689 goto bad_area_nosemaphore; 895 bad_area_nosemaphore(regs, error_code, address);
896 return;
897 }
690 down_read(&mm->mmap_sem); 898 down_read(&mm->mmap_sem);
691 } 899 }
692 900
693 vma = find_vma(mm, address); 901 vma = find_vma(mm, address);
694 if (!vma) 902 if (unlikely(!vma)) {
695 goto bad_area; 903 bad_area(regs, error_code, address);
696 if (vma->vm_start <= address) 904 return;
905 }
906 if (likely(vma->vm_start <= address))
697 goto good_area; 907 goto good_area;
698 if (!(vma->vm_flags & VM_GROWSDOWN)) 908 if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
699 goto bad_area; 909 bad_area(regs, error_code, address);
910 return;
911 }
700 if (error_code & PF_USER) { 912 if (error_code & PF_USER) {
701 /* 913 /*
702 * Accessing the stack below %sp is always a bug. 914 * Accessing the stack below %sp is always a bug.
@@ -704,31 +916,25 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
704 * and pusha to work. ("enter $65535,$31" pushes 916 * and pusha to work. ("enter $65535,$31" pushes
705 * 32 pointers and then decrements %sp by 65535.) 917 * 32 pointers and then decrements %sp by 65535.)
706 */ 918 */
707 if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp) 919 if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) {
708 goto bad_area; 920 bad_area(regs, error_code, address);
921 return;
922 }
709 } 923 }
710 if (expand_stack(vma, address)) 924 if (unlikely(expand_stack(vma, address))) {
711 goto bad_area; 925 bad_area(regs, error_code, address);
712/* 926 return;
713 * Ok, we have a good vm_area for this memory access, so 927 }
714 * we can handle it.. 928
715 */ 929 /*
930 * Ok, we have a good vm_area for this memory access, so
931 * we can handle it..
932 */
716good_area: 933good_area:
717 si_code = SEGV_ACCERR; 934 write = error_code & PF_WRITE;
718 write = 0; 935 if (unlikely(access_error(error_code, write, vma))) {
719 switch (error_code & (PF_PROT|PF_WRITE)) { 936 bad_area_access_error(regs, error_code, address);
720 default: /* 3: write, present */ 937 return;
721 /* fall through */
722 case PF_WRITE: /* write, not present */
723 if (!(vma->vm_flags & VM_WRITE))
724 goto bad_area;
725 write++;
726 break;
727 case PF_PROT: /* read, present */
728 goto bad_area;
729 case 0: /* read, not present */
730 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
731 goto bad_area;
732 } 938 }
733 939
734 /* 940 /*
@@ -738,11 +944,8 @@ good_area:
738 */ 944 */
739 fault = handle_mm_fault(mm, vma, address, write); 945 fault = handle_mm_fault(mm, vma, address, write);
740 if (unlikely(fault & VM_FAULT_ERROR)) { 946 if (unlikely(fault & VM_FAULT_ERROR)) {
741 if (fault & VM_FAULT_OOM) 947 mm_fault_error(regs, error_code, address, fault);
742 goto out_of_memory; 948 return;
743 else if (fault & VM_FAULT_SIGBUS)
744 goto do_sigbus;
745 BUG();
746 } 949 }
747 if (fault & VM_FAULT_MAJOR) 950 if (fault & VM_FAULT_MAJOR)
748 tsk->maj_flt++; 951 tsk->maj_flt++;
@@ -760,128 +963,6 @@ good_area:
760 } 963 }
761#endif 964#endif
762 up_read(&mm->mmap_sem); 965 up_read(&mm->mmap_sem);
763 return;
764
765/*
766 * Something tried to access memory that isn't in our memory map..
767 * Fix it, but check if it's kernel or user first..
768 */
769bad_area:
770 up_read(&mm->mmap_sem);
771
772bad_area_nosemaphore:
773 /* User mode accesses just cause a SIGSEGV */
774 if (error_code & PF_USER) {
775 /*
776 * It's possible to have interrupts off here.
777 */
778 local_irq_enable();
779
780 /*
781 * Valid to do another page fault here because this one came
782 * from user space.
783 */
784 if (is_prefetch(regs, address, error_code))
785 return;
786
787 if (is_errata100(regs, address))
788 return;
789
790 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
791 printk_ratelimit()) {
792 printk(
793 "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
794 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
795 tsk->comm, task_pid_nr(tsk), address,
796 (void *) regs->ip, (void *) regs->sp, error_code);
797 print_vma_addr(" in ", regs->ip);
798 printk("\n");
799 }
800
801 tsk->thread.cr2 = address;
802 /* Kernel addresses are always protection faults */
803 tsk->thread.error_code = error_code | (address >= TASK_SIZE);
804 tsk->thread.trap_no = 14;
805 force_sig_info_fault(SIGSEGV, si_code, address, tsk);
806 return;
807 }
808
809 if (is_f00f_bug(regs, address))
810 return;
811
812no_context:
813 /* Are we prepared to handle this kernel fault? */
814 if (fixup_exception(regs))
815 return;
816
817 /*
818 * X86_32
819 * Valid to do another page fault here, because if this fault
820 * had been triggered by is_prefetch fixup_exception would have
821 * handled it.
822 *
823 * X86_64
824 * Hall of shame of CPU/BIOS bugs.
825 */
826 if (is_prefetch(regs, address, error_code))
827 return;
828
829 if (is_errata93(regs, address))
830 return;
831
832/*
833 * Oops. The kernel tried to access some bad page. We'll have to
834 * terminate things with extreme prejudice.
835 */
836#ifdef CONFIG_X86_32
837 bust_spinlocks(1);
838#else
839 flags = oops_begin();
840#endif
841
842 show_fault_oops(regs, error_code, address);
843
844 tsk->thread.cr2 = address;
845 tsk->thread.trap_no = 14;
846 tsk->thread.error_code = error_code;
847
848#ifdef CONFIG_X86_32
849 die("Oops", regs, error_code);
850 bust_spinlocks(0);
851 do_exit(SIGKILL);
852#else
853 sig = SIGKILL;
854 if (__die("Oops", regs, error_code))
855 sig = 0;
856 /* Executive summary in case the body of the oops scrolled away */
857 printk(KERN_EMERG "CR2: %016lx\n", address);
858 oops_end(flags, regs, sig);
859#endif
860
861out_of_memory:
862 /*
863 * We ran out of memory, call the OOM killer, and return the userspace
864 * (which will retry the fault, or kill us if we got oom-killed).
865 */
866 up_read(&mm->mmap_sem);
867 pagefault_out_of_memory();
868 return;
869
870do_sigbus:
871 up_read(&mm->mmap_sem);
872
873 /* Kernel mode? Handle exceptions or die */
874 if (!(error_code & PF_USER))
875 goto no_context;
876#ifdef CONFIG_X86_32
877 /* User space => ok to do another page fault */
878 if (is_prefetch(regs, address, error_code))
879 return;
880#endif
881 tsk->thread.cr2 = address;
882 tsk->thread.error_code = error_code;
883 tsk->thread.trap_no = 14;
884 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
885} 966}
886 967
887DEFINE_SPINLOCK(pgd_lock); 968DEFINE_SPINLOCK(pgd_lock);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 88f1b10de3be..00263bf07a88 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -49,7 +49,6 @@
49#include <asm/paravirt.h> 49#include <asm/paravirt.h>
50#include <asm/setup.h> 50#include <asm/setup.h>
51#include <asm/cacheflush.h> 51#include <asm/cacheflush.h>
52#include <asm/smp.h>
53 52
54unsigned int __VMALLOC_RESERVE = 128 << 20; 53unsigned int __VMALLOC_RESERVE = 128 << 20;
55 54
@@ -138,6 +137,47 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
138 return pte_offset_kernel(pmd, 0); 137 return pte_offset_kernel(pmd, 0);
139} 138}
140 139
140static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
141 unsigned long vaddr, pte_t *lastpte)
142{
143#ifdef CONFIG_HIGHMEM
144 /*
145 * Something (early fixmap) may already have put a pte
146 * page here, which causes the page table allocation
147 * to become nonlinear. Attempt to fix it, and if it
148 * is still nonlinear then we have to bug.
149 */
150 int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT;
151 int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT;
152
153 if (pmd_idx_kmap_begin != pmd_idx_kmap_end
154 && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
155 && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end
156 && ((__pa(pte) >> PAGE_SHIFT) < table_start
157 || (__pa(pte) >> PAGE_SHIFT) >= table_end)) {
158 pte_t *newpte;
159 int i;
160
161 BUG_ON(after_init_bootmem);
162 newpte = alloc_low_page();
163 for (i = 0; i < PTRS_PER_PTE; i++)
164 set_pte(newpte + i, pte[i]);
165
166 paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT);
167 set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE));
168 BUG_ON(newpte != pte_offset_kernel(pmd, 0));
169 __flush_tlb_all();
170
171 paravirt_release_pte(__pa(pte) >> PAGE_SHIFT);
172 pte = newpte;
173 }
174 BUG_ON(vaddr < fix_to_virt(FIX_KMAP_BEGIN - 1)
175 && vaddr > fix_to_virt(FIX_KMAP_END)
176 && lastpte && lastpte + PTRS_PER_PTE != pte);
177#endif
178 return pte;
179}
180
141/* 181/*
142 * This function initializes a certain range of kernel virtual memory 182 * This function initializes a certain range of kernel virtual memory
143 * with new bootmem page tables, everywhere page tables are missing in 183 * with new bootmem page tables, everywhere page tables are missing in
@@ -154,6 +194,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
154 unsigned long vaddr; 194 unsigned long vaddr;
155 pgd_t *pgd; 195 pgd_t *pgd;
156 pmd_t *pmd; 196 pmd_t *pmd;
197 pte_t *pte = NULL;
157 198
158 vaddr = start; 199 vaddr = start;
159 pgd_idx = pgd_index(vaddr); 200 pgd_idx = pgd_index(vaddr);
@@ -165,7 +206,8 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
165 pmd = pmd + pmd_index(vaddr); 206 pmd = pmd + pmd_index(vaddr);
166 for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); 207 for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
167 pmd++, pmd_idx++) { 208 pmd++, pmd_idx++) {
168 one_page_table_init(pmd); 209 pte = page_table_kmap_check(one_page_table_init(pmd),
210 pmd, vaddr, pte);
169 211
170 vaddr += PMD_SIZE; 212 vaddr += PMD_SIZE;
171 } 213 }
@@ -508,7 +550,6 @@ static void __init early_ioremap_page_table_range_init(pgd_t *pgd_base)
508 * Fixed mappings, only the page table structure has to be 550 * Fixed mappings, only the page table structure has to be
509 * created - mappings will be set by set_fixmap(): 551 * created - mappings will be set by set_fixmap():
510 */ 552 */
511 early_ioremap_clear();
512 vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; 553 vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
513 end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; 554 end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
514 page_table_range_init(vaddr, end, pgd_base); 555 page_table_range_init(vaddr, end, pgd_base);
@@ -801,7 +842,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse)
801 tables += PAGE_ALIGN(ptes * sizeof(pte_t)); 842 tables += PAGE_ALIGN(ptes * sizeof(pte_t));
802 843
803 /* for fixmap */ 844 /* for fixmap */
804 tables += PAGE_SIZE * 2; 845 tables += PAGE_ALIGN(__end_of_fixed_addresses * sizeof(pte_t));
805 846
806 /* 847 /*
807 * RED-PEN putting page tables only on node 0 could 848 * RED-PEN putting page tables only on node 0 could
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 23f68e77ad1f..e6d36b490250 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -596,7 +596,7 @@ static void __init init_gbpages(void)
596 direct_gbpages = 0; 596 direct_gbpages = 0;
597} 597}
598 598
599static unsigned long __init kernel_physical_mapping_init(unsigned long start, 599static unsigned long __meminit kernel_physical_mapping_init(unsigned long start,
600 unsigned long end, 600 unsigned long end,
601 unsigned long page_size_mask) 601 unsigned long page_size_mask)
602{ 602{
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index d0151d8ce452..ca53224fc56c 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -17,6 +17,7 @@
17 */ 17 */
18 18
19#include <asm/iomap.h> 19#include <asm/iomap.h>
20#include <asm/pat.h>
20#include <linux/module.h> 21#include <linux/module.h>
21 22
22/* Map 'pfn' using fixed map 'type' and protections 'prot' 23/* Map 'pfn' using fixed map 'type' and protections 'prot'
@@ -29,6 +30,15 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
29 30
30 pagefault_disable(); 31 pagefault_disable();
31 32
33 /*
34 * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS.
35 * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the
36 * MTRR is UC or WC. UC_MINUS gets the real intention, of the
37 * user, which is "WC if the MTRR is WC, UC if you can't do that."
38 */
39 if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC))
40 prot = PAGE_KERNEL_UC_MINUS;
41
32 idx = type + KM_TYPE_NR*smp_processor_id(); 42 idx = type + KM_TYPE_NR*smp_processor_id();
33 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); 43 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
34 set_pte(kmap_pte-idx, pfn_pte(pfn, prot)); 44 set_pte(kmap_pte-idx, pfn_pte(pfn, prot));
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index bd85d42819e1..af750ab973b6 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -557,34 +557,9 @@ void __init early_ioremap_init(void)
557 } 557 }
558} 558}
559 559
560void __init early_ioremap_clear(void)
561{
562 pmd_t *pmd;
563
564 if (early_ioremap_debug)
565 printk(KERN_INFO "early_ioremap_clear()\n");
566
567 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
568 pmd_clear(pmd);
569 paravirt_release_pte(__pa(bm_pte) >> PAGE_SHIFT);
570 __flush_tlb_all();
571}
572
573void __init early_ioremap_reset(void) 560void __init early_ioremap_reset(void)
574{ 561{
575 enum fixed_addresses idx;
576 unsigned long addr, phys;
577 pte_t *pte;
578
579 after_paging_init = 1; 562 after_paging_init = 1;
580 for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) {
581 addr = fix_to_virt(idx);
582 pte = early_ioremap_pte(addr);
583 if (pte_present(*pte)) {
584 phys = pte_val(*pte) & PAGE_MASK;
585 set_fixmap(idx, phys);
586 }
587 }
588} 563}
589 564
590static void __init __early_set_fixmap(enum fixed_addresses idx, 565static void __init __early_set_fixmap(enum fixed_addresses idx,
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 71a14f89f89e..08d140fbc31b 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -20,6 +20,12 @@
20#include <asm/acpi.h> 20#include <asm/acpi.h>
21#include <asm/k8.h> 21#include <asm/k8.h>
22 22
23#ifdef CONFIG_DEBUG_PER_CPU_MAPS
24# define DBG(x...) printk(KERN_DEBUG x)
25#else
26# define DBG(x...)
27#endif
28
23struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; 29struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
24EXPORT_SYMBOL(node_data); 30EXPORT_SYMBOL(node_data);
25 31
@@ -33,6 +39,21 @@ int numa_off __initdata;
33static unsigned long __initdata nodemap_addr; 39static unsigned long __initdata nodemap_addr;
34static unsigned long __initdata nodemap_size; 40static unsigned long __initdata nodemap_size;
35 41
42DEFINE_PER_CPU(int, node_number) = 0;
43EXPORT_PER_CPU_SYMBOL(node_number);
44
45/*
46 * Map cpu index to node index
47 */
48DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
49EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
50
51/*
52 * Which logical CPUs are on which nodes
53 */
54cpumask_t *node_to_cpumask_map;
55EXPORT_SYMBOL(node_to_cpumask_map);
56
36/* 57/*
37 * Given a shift value, try to populate memnodemap[] 58 * Given a shift value, try to populate memnodemap[]
38 * Returns : 59 * Returns :
@@ -640,3 +661,199 @@ void __init init_cpu_to_node(void)
640#endif 661#endif
641 662
642 663
664/*
665 * Allocate node_to_cpumask_map based on number of available nodes
666 * Requires node_possible_map to be valid.
667 *
668 * Note: node_to_cpumask() is not valid until after this is done.
669 * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
670 */
671void __init setup_node_to_cpumask_map(void)
672{
673 unsigned int node, num = 0;
674 cpumask_t *map;
675
676 /* setup nr_node_ids if not done yet */
677 if (nr_node_ids == MAX_NUMNODES) {
678 for_each_node_mask(node, node_possible_map)
679 num = node;
680 nr_node_ids = num + 1;
681 }
682
683 /* allocate the map */
684 map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
685 DBG("node_to_cpumask_map at %p for %d nodes\n", map, nr_node_ids);
686
687 pr_debug("Node to cpumask map at %p for %d nodes\n",
688 map, nr_node_ids);
689
690 /* node_to_cpumask() will now work */
691 node_to_cpumask_map = map;
692}
693
694void __cpuinit numa_set_node(int cpu, int node)
695{
696 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
697
698 /* early setting, no percpu area yet */
699 if (cpu_to_node_map) {
700 cpu_to_node_map[cpu] = node;
701 return;
702 }
703
704#ifdef CONFIG_DEBUG_PER_CPU_MAPS
705 if (cpu >= nr_cpu_ids || !per_cpu_offset(cpu)) {
706 printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
707 dump_stack();
708 return;
709 }
710#endif
711 per_cpu(x86_cpu_to_node_map, cpu) = node;
712
713 if (node != NUMA_NO_NODE)
714 per_cpu(node_number, cpu) = node;
715}
716
717void __cpuinit numa_clear_node(int cpu)
718{
719 numa_set_node(cpu, NUMA_NO_NODE);
720}
721
722#ifndef CONFIG_DEBUG_PER_CPU_MAPS
723
724void __cpuinit numa_add_cpu(int cpu)
725{
726 cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
727}
728
729void __cpuinit numa_remove_cpu(int cpu)
730{
731 cpu_clear(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
732}
733
734#else /* CONFIG_DEBUG_PER_CPU_MAPS */
735
736/*
737 * --------- debug versions of the numa functions ---------
738 */
739static void __cpuinit numa_set_cpumask(int cpu, int enable)
740{
741 int node = early_cpu_to_node(cpu);
742 cpumask_t *mask;
743 char buf[64];
744
745 if (node_to_cpumask_map == NULL) {
746 printk(KERN_ERR "node_to_cpumask_map NULL\n");
747 dump_stack();
748 return;
749 }
750
751 mask = &node_to_cpumask_map[node];
752 if (enable)
753 cpu_set(cpu, *mask);
754 else
755 cpu_clear(cpu, *mask);
756
757 cpulist_scnprintf(buf, sizeof(buf), mask);
758 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
759 enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);
760}
761
762void __cpuinit numa_add_cpu(int cpu)
763{
764 numa_set_cpumask(cpu, 1);
765}
766
767void __cpuinit numa_remove_cpu(int cpu)
768{
769 numa_set_cpumask(cpu, 0);
770}
771
772int cpu_to_node(int cpu)
773{
774 if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
775 printk(KERN_WARNING
776 "cpu_to_node(%d): usage too early!\n", cpu);
777 dump_stack();
778 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
779 }
780 return per_cpu(x86_cpu_to_node_map, cpu);
781}
782EXPORT_SYMBOL(cpu_to_node);
783
784/*
785 * Same function as cpu_to_node() but used if called before the
786 * per_cpu areas are setup.
787 */
788int early_cpu_to_node(int cpu)
789{
790 if (early_per_cpu_ptr(x86_cpu_to_node_map))
791 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
792
793 if (!per_cpu_offset(cpu)) {
794 printk(KERN_WARNING
795 "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
796 dump_stack();
797 return NUMA_NO_NODE;
798 }
799 return per_cpu(x86_cpu_to_node_map, cpu);
800}
801
802
803/* empty cpumask */
804static const cpumask_t cpu_mask_none;
805
806/*
807 * Returns a pointer to the bitmask of CPUs on Node 'node'.
808 */
809const cpumask_t *cpumask_of_node(int node)
810{
811 if (node_to_cpumask_map == NULL) {
812 printk(KERN_WARNING
813 "cpumask_of_node(%d): no node_to_cpumask_map!\n",
814 node);
815 dump_stack();
816 return (const cpumask_t *)&cpu_online_map;
817 }
818 if (node >= nr_node_ids) {
819 printk(KERN_WARNING
820 "cpumask_of_node(%d): node > nr_node_ids(%d)\n",
821 node, nr_node_ids);
822 dump_stack();
823 return &cpu_mask_none;
824 }
825 return &node_to_cpumask_map[node];
826}
827EXPORT_SYMBOL(cpumask_of_node);
828
829/*
830 * Returns a bitmask of CPUs on Node 'node'.
831 *
832 * Side note: this function creates the returned cpumask on the stack
833 * so with a high NR_CPUS count, excessive stack space is used. The
834 * node_to_cpumask_ptr function should be used whenever possible.
835 */
836cpumask_t node_to_cpumask(int node)
837{
838 if (node_to_cpumask_map == NULL) {
839 printk(KERN_WARNING
840 "node_to_cpumask(%d): no node_to_cpumask_map!\n", node);
841 dump_stack();
842 return cpu_online_map;
843 }
844 if (node >= nr_node_ids) {
845 printk(KERN_WARNING
846 "node_to_cpumask(%d): node > nr_node_ids(%d)\n",
847 node, nr_node_ids);
848 dump_stack();
849 return cpu_mask_none;
850 }
851 return node_to_cpumask_map[node];
852}
853EXPORT_SYMBOL(node_to_cpumask);
854
855/*
856 * --------- end of debug versions of the numa functions ---------
857 */
858
859#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index e89d24815f26..84ba74820ad6 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -534,6 +534,36 @@ out_unlock:
534 return 0; 534 return 0;
535} 535}
536 536
537static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
538 int primary)
539{
540 /*
541 * Ignore all non primary paths.
542 */
543 if (!primary)
544 return 0;
545
546 /*
547 * Ignore the NULL PTE for kernel identity mapping, as it is expected
548 * to have holes.
549 * Also set numpages to '1' indicating that we processed cpa req for
550 * one virtual address page and its pfn. TBD: numpages can be set based
551 * on the initial value and the level returned by lookup_address().
552 */
553 if (within(vaddr, PAGE_OFFSET,
554 PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) {
555 cpa->numpages = 1;
556 cpa->pfn = __pa(vaddr) >> PAGE_SHIFT;
557 return 0;
558 } else {
559 WARN(1, KERN_WARNING "CPA: called for zero pte. "
560 "vaddr = %lx cpa->vaddr = %lx\n", vaddr,
561 *cpa->vaddr);
562
563 return -EFAULT;
564 }
565}
566
537static int __change_page_attr(struct cpa_data *cpa, int primary) 567static int __change_page_attr(struct cpa_data *cpa, int primary)
538{ 568{
539 unsigned long address; 569 unsigned long address;
@@ -549,17 +579,11 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
549repeat: 579repeat:
550 kpte = lookup_address(address, &level); 580 kpte = lookup_address(address, &level);
551 if (!kpte) 581 if (!kpte)
552 return 0; 582 return __cpa_process_fault(cpa, address, primary);
553 583
554 old_pte = *kpte; 584 old_pte = *kpte;
555 if (!pte_val(old_pte)) { 585 if (!pte_val(old_pte))
556 if (!primary) 586 return __cpa_process_fault(cpa, address, primary);
557 return 0;
558 WARN(1, KERN_WARNING "CPA: called for zero pte. "
559 "vaddr = %lx cpa->vaddr = %lx\n", address,
560 *cpa->vaddr);
561 return -EINVAL;
562 }
563 587
564 if (level == PG_LEVEL_4K) { 588 if (level == PG_LEVEL_4K) {
565 pte_t new_pte; 589 pte_t new_pte;
@@ -657,12 +681,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
657 vaddr = *cpa->vaddr; 681 vaddr = *cpa->vaddr;
658 682
659 if (!(within(vaddr, PAGE_OFFSET, 683 if (!(within(vaddr, PAGE_OFFSET,
660 PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT)) 684 PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) {
661#ifdef CONFIG_X86_64
662 || within(vaddr, PAGE_OFFSET + (1UL<<32),
663 PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))
664#endif
665 )) {
666 685
667 alias_cpa = *cpa; 686 alias_cpa = *cpa;
668 temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); 687 temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 8b08fb955274..7b61036427df 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -333,11 +333,23 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
333 req_type & _PAGE_CACHE_MASK); 333 req_type & _PAGE_CACHE_MASK);
334 } 334 }
335 335
336 is_range_ram = pagerange_is_ram(start, end); 336 if (new_type)
337 if (is_range_ram == 1) 337 *new_type = actual_type;
338 return reserve_ram_pages_type(start, end, req_type, new_type); 338
339 else if (is_range_ram < 0) 339 /*
340 return -EINVAL; 340 * For legacy reasons, some parts of the physical address range in the
341 * legacy 1MB region is treated as non-RAM (even when listed as RAM in
342 * the e820 tables). So we will track the memory attributes of this
343 * legacy 1MB region using the linear memtype_list always.
344 */
345 if (end >= ISA_END_ADDRESS) {
346 is_range_ram = pagerange_is_ram(start, end);
347 if (is_range_ram == 1)
348 return reserve_ram_pages_type(start, end, req_type,
349 new_type);
350 else if (is_range_ram < 0)
351 return -EINVAL;
352 }
341 353
342 new = kmalloc(sizeof(struct memtype), GFP_KERNEL); 354 new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
343 if (!new) 355 if (!new)
@@ -347,9 +359,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
347 new->end = end; 359 new->end = end;
348 new->type = actual_type; 360 new->type = actual_type;
349 361
350 if (new_type)
351 *new_type = actual_type;
352
353 spin_lock(&memtype_lock); 362 spin_lock(&memtype_lock);
354 363
355 if (cached_entry && start >= cached_start) 364 if (cached_entry && start >= cached_start)
@@ -437,11 +446,19 @@ int free_memtype(u64 start, u64 end)
437 if (is_ISA_range(start, end - 1)) 446 if (is_ISA_range(start, end - 1))
438 return 0; 447 return 0;
439 448
440 is_range_ram = pagerange_is_ram(start, end); 449 /*
441 if (is_range_ram == 1) 450 * For legacy reasons, some parts of the physical address range in the
442 return free_ram_pages_type(start, end); 451 * legacy 1MB region is treated as non-RAM (even when listed as RAM in
443 else if (is_range_ram < 0) 452 * the e820 tables). So we will track the memory attributes of this
444 return -EINVAL; 453 * legacy 1MB region using the linear memtype_list always.
454 */
455 if (end >= ISA_END_ADDRESS) {
456 is_range_ram = pagerange_is_ram(start, end);
457 if (is_range_ram == 1)
458 return free_ram_pages_type(start, end);
459 else if (is_range_ram < 0)
460 return -EINVAL;
461 }
445 462
446 spin_lock(&memtype_lock); 463 spin_lock(&memtype_lock);
447 list_for_each_entry(entry, &memtype_list, nd) { 464 list_for_each_entry(entry, &memtype_list, nd) {
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 09737c8af074..15df1baee100 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -21,6 +21,7 @@
21#include <asm/numa.h> 21#include <asm/numa.h>
22#include <asm/e820.h> 22#include <asm/e820.h>
23#include <asm/genapic.h> 23#include <asm/genapic.h>
24#include <asm/uv/uv.h>
24 25
25int acpi_numa __initdata; 26int acpi_numa __initdata;
26 27
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/mm/tlb.c
index f8be6f1d2e48..72a6d4ebe34d 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/mm/tlb.c
@@ -1,22 +1,18 @@
1#include <linux/init.h> 1#include <linux/init.h>
2 2
3#include <linux/mm.h> 3#include <linux/mm.h>
4#include <linux/delay.h>
5#include <linux/spinlock.h> 4#include <linux/spinlock.h>
6#include <linux/smp.h> 5#include <linux/smp.h>
7#include <linux/kernel_stat.h>
8#include <linux/mc146818rtc.h>
9#include <linux/interrupt.h> 6#include <linux/interrupt.h>
7#include <linux/module.h>
10 8
11#include <asm/mtrr.h>
12#include <asm/pgalloc.h>
13#include <asm/tlbflush.h> 9#include <asm/tlbflush.h>
14#include <asm/mmu_context.h> 10#include <asm/mmu_context.h>
15#include <asm/proto.h> 11#include <asm/apic.h>
16#include <asm/apicdef.h> 12#include <asm/uv/uv.h>
17#include <asm/idle.h> 13
18#include <asm/uv/uv_hub.h> 14DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
19#include <asm/uv/uv_bau.h> 15 = { &init_mm, 0, };
20 16
21#include <mach_ipi.h> 17#include <mach_ipi.h>
22/* 18/*
@@ -33,7 +29,7 @@
33 * To avoid global state use 8 different call vectors. 29 * To avoid global state use 8 different call vectors.
34 * Each CPU uses a specific vector to trigger flushes on other 30 * Each CPU uses a specific vector to trigger flushes on other
35 * CPUs. Depending on the received vector the target CPUs look into 31 * CPUs. Depending on the received vector the target CPUs look into
36 * the right per cpu variable for the flush data. 32 * the right array slot for the flush data.
37 * 33 *
38 * With more than 8 CPUs they are hashed to the 8 available 34 * With more than 8 CPUs they are hashed to the 8 available
39 * vectors. The limited global vector space forces us to this right now. 35 * vectors. The limited global vector space forces us to this right now.
@@ -43,18 +39,18 @@
43 39
44union smp_flush_state { 40union smp_flush_state {
45 struct { 41 struct {
46 cpumask_t flush_cpumask;
47 struct mm_struct *flush_mm; 42 struct mm_struct *flush_mm;
48 unsigned long flush_va; 43 unsigned long flush_va;
49 spinlock_t tlbstate_lock; 44 spinlock_t tlbstate_lock;
45 DECLARE_BITMAP(flush_cpumask, NR_CPUS);
50 }; 46 };
51 char pad[SMP_CACHE_BYTES]; 47 char pad[CONFIG_X86_INTERNODE_CACHE_BYTES];
52} ____cacheline_aligned; 48} ____cacheline_internodealigned_in_smp;
53 49
54/* State is put into the per CPU data section, but padded 50/* State is put into the per CPU data section, but padded
55 to a full cache line because other CPUs can access it and we don't 51 to a full cache line because other CPUs can access it and we don't
56 want false sharing in the per cpu data segment. */ 52 want false sharing in the per cpu data segment. */
57static DEFINE_PER_CPU(union smp_flush_state, flush_state); 53static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS];
58 54
59/* 55/*
60 * We cannot call mmdrop() because we are in interrupt context, 56 * We cannot call mmdrop() because we are in interrupt context,
@@ -62,9 +58,9 @@ static DEFINE_PER_CPU(union smp_flush_state, flush_state);
62 */ 58 */
63void leave_mm(int cpu) 59void leave_mm(int cpu)
64{ 60{
65 if (read_pda(mmu_state) == TLBSTATE_OK) 61 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
66 BUG(); 62 BUG();
67 cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); 63 cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
68 load_cr3(swapper_pg_dir); 64 load_cr3(swapper_pg_dir);
69} 65}
70EXPORT_SYMBOL_GPL(leave_mm); 66EXPORT_SYMBOL_GPL(leave_mm);
@@ -117,10 +113,20 @@ EXPORT_SYMBOL_GPL(leave_mm);
117 * Interrupts are disabled. 113 * Interrupts are disabled.
118 */ 114 */
119 115
120asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) 116/*
117 * FIXME: use of asmlinkage is not consistent. On x86_64 it's noop
118 * but still used for documentation purpose but the usage is slightly
119 * inconsistent. On x86_32, asmlinkage is regparm(0) but interrupt
120 * entry calls in with the first parameter in %eax. Maybe define
121 * intrlinkage?
122 */
123#ifdef CONFIG_X86_64
124asmlinkage
125#endif
126void smp_invalidate_interrupt(struct pt_regs *regs)
121{ 127{
122 int cpu; 128 unsigned int cpu;
123 int sender; 129 unsigned int sender;
124 union smp_flush_state *f; 130 union smp_flush_state *f;
125 131
126 cpu = smp_processor_id(); 132 cpu = smp_processor_id();
@@ -129,9 +135,9 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
129 * Use that to determine where the sender put the data. 135 * Use that to determine where the sender put the data.
130 */ 136 */
131 sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; 137 sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
132 f = &per_cpu(flush_state, sender); 138 f = &flush_state[sender];
133 139
134 if (!cpu_isset(cpu, f->flush_cpumask)) 140 if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask)))
135 goto out; 141 goto out;
136 /* 142 /*
137 * This was a BUG() but until someone can quote me the 143 * This was a BUG() but until someone can quote me the
@@ -142,8 +148,8 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
142 * BUG(); 148 * BUG();
143 */ 149 */
144 150
145 if (f->flush_mm == read_pda(active_mm)) { 151 if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
146 if (read_pda(mmu_state) == TLBSTATE_OK) { 152 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
147 if (f->flush_va == TLB_FLUSH_ALL) 153 if (f->flush_va == TLB_FLUSH_ALL)
148 local_flush_tlb(); 154 local_flush_tlb();
149 else 155 else
@@ -153,23 +159,21 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
153 } 159 }
154out: 160out:
155 ack_APIC_irq(); 161 ack_APIC_irq();
156 cpu_clear(cpu, f->flush_cpumask); 162 smp_mb__before_clear_bit();
163 cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask));
164 smp_mb__after_clear_bit();
157 inc_irq_stat(irq_tlb_count); 165 inc_irq_stat(irq_tlb_count);
158} 166}
159 167
160void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, 168static void flush_tlb_others_ipi(const struct cpumask *cpumask,
161 unsigned long va) 169 struct mm_struct *mm, unsigned long va)
162{ 170{
163 int sender; 171 unsigned int sender;
164 union smp_flush_state *f; 172 union smp_flush_state *f;
165 cpumask_t cpumask = *cpumaskp;
166
167 if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va))
168 return;
169 173
170 /* Caller has disabled preemption */ 174 /* Caller has disabled preemption */
171 sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; 175 sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
172 f = &per_cpu(flush_state, sender); 176 f = &flush_state[sender];
173 177
174 /* 178 /*
175 * Could avoid this lock when 179 * Could avoid this lock when
@@ -180,7 +184,8 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
180 184
181 f->flush_mm = mm; 185 f->flush_mm = mm;
182 f->flush_va = va; 186 f->flush_va = va;
183 cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask); 187 cpumask_andnot(to_cpumask(f->flush_cpumask),
188 cpumask, cpumask_of(smp_processor_id()));
184 189
185 /* 190 /*
186 * Make the above memory operations globally visible before 191 * Make the above memory operations globally visible before
@@ -191,9 +196,10 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
191 * We have to send the IPI only to 196 * We have to send the IPI only to
192 * CPUs affected. 197 * CPUs affected.
193 */ 198 */
194 send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender); 199 send_IPI_mask(to_cpumask(f->flush_cpumask),
200 INVALIDATE_TLB_VECTOR_START + sender);
195 201
196 while (!cpus_empty(f->flush_cpumask)) 202 while (!cpumask_empty(to_cpumask(f->flush_cpumask)))
197 cpu_relax(); 203 cpu_relax();
198 204
199 f->flush_mm = NULL; 205 f->flush_mm = NULL;
@@ -201,12 +207,28 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
201 spin_unlock(&f->tlbstate_lock); 207 spin_unlock(&f->tlbstate_lock);
202} 208}
203 209
210void native_flush_tlb_others(const struct cpumask *cpumask,
211 struct mm_struct *mm, unsigned long va)
212{
213 if (is_uv_system()) {
214 unsigned int cpu;
215
216 cpu = get_cpu();
217 cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu);
218 if (cpumask)
219 flush_tlb_others_ipi(cpumask, mm, va);
220 put_cpu();
221 return;
222 }
223 flush_tlb_others_ipi(cpumask, mm, va);
224}
225
204static int __cpuinit init_smp_flush(void) 226static int __cpuinit init_smp_flush(void)
205{ 227{
206 int i; 228 int i;
207 229
208 for_each_possible_cpu(i) 230 for (i = 0; i < ARRAY_SIZE(flush_state); i++)
209 spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); 231 spin_lock_init(&flush_state[i].tlbstate_lock);
210 232
211 return 0; 233 return 0;
212} 234}
@@ -215,25 +237,18 @@ core_initcall(init_smp_flush);
215void flush_tlb_current_task(void) 237void flush_tlb_current_task(void)
216{ 238{
217 struct mm_struct *mm = current->mm; 239 struct mm_struct *mm = current->mm;
218 cpumask_t cpu_mask;
219 240
220 preempt_disable(); 241 preempt_disable();
221 cpu_mask = mm->cpu_vm_mask;
222 cpu_clear(smp_processor_id(), cpu_mask);
223 242
224 local_flush_tlb(); 243 local_flush_tlb();
225 if (!cpus_empty(cpu_mask)) 244 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
226 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); 245 flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
227 preempt_enable(); 246 preempt_enable();
228} 247}
229 248
230void flush_tlb_mm(struct mm_struct *mm) 249void flush_tlb_mm(struct mm_struct *mm)
231{ 250{
232 cpumask_t cpu_mask;
233
234 preempt_disable(); 251 preempt_disable();
235 cpu_mask = mm->cpu_vm_mask;
236 cpu_clear(smp_processor_id(), cpu_mask);
237 252
238 if (current->active_mm == mm) { 253 if (current->active_mm == mm) {
239 if (current->mm) 254 if (current->mm)
@@ -241,8 +256,8 @@ void flush_tlb_mm(struct mm_struct *mm)
241 else 256 else
242 leave_mm(smp_processor_id()); 257 leave_mm(smp_processor_id());
243 } 258 }
244 if (!cpus_empty(cpu_mask)) 259 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
245 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); 260 flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
246 261
247 preempt_enable(); 262 preempt_enable();
248} 263}
@@ -250,11 +265,8 @@ void flush_tlb_mm(struct mm_struct *mm)
250void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) 265void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
251{ 266{
252 struct mm_struct *mm = vma->vm_mm; 267 struct mm_struct *mm = vma->vm_mm;
253 cpumask_t cpu_mask;
254 268
255 preempt_disable(); 269 preempt_disable();
256 cpu_mask = mm->cpu_vm_mask;
257 cpu_clear(smp_processor_id(), cpu_mask);
258 270
259 if (current->active_mm == mm) { 271 if (current->active_mm == mm) {
260 if (current->mm) 272 if (current->mm)
@@ -263,8 +275,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
263 leave_mm(smp_processor_id()); 275 leave_mm(smp_processor_id());
264 } 276 }
265 277
266 if (!cpus_empty(cpu_mask)) 278 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
267 flush_tlb_others(cpu_mask, mm, va); 279 flush_tlb_others(&mm->cpu_vm_mask, mm, va);
268 280
269 preempt_enable(); 281 preempt_enable();
270} 282}
@@ -274,7 +286,7 @@ static void do_flush_tlb_all(void *info)
274 unsigned long cpu = smp_processor_id(); 286 unsigned long cpu = smp_processor_id();
275 287
276 __flush_tlb_all(); 288 __flush_tlb_all();
277 if (read_pda(mmu_state) == TLBSTATE_LAZY) 289 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
278 leave_mm(cpu); 290 leave_mm(cpu);
279} 291}
280 292
diff --git a/arch/x86/scripts/strip-symbols b/arch/x86/scripts/strip-symbols
deleted file mode 100644
index a2f1ccb827c7..000000000000
--- a/arch/x86/scripts/strip-symbols
+++ /dev/null
@@ -1 +0,0 @@
1__cpu_vendor_dev_X86_VENDOR_*
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 6f1bb71aa13a..6b3f7eef57e3 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -634,35 +634,27 @@ static void xen_flush_tlb_single(unsigned long addr)
634 preempt_enable(); 634 preempt_enable();
635} 635}
636 636
637static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, 637static void xen_flush_tlb_others(const struct cpumask *cpus,
638 unsigned long va) 638 struct mm_struct *mm, unsigned long va)
639{ 639{
640 struct { 640 struct {
641 struct mmuext_op op; 641 struct mmuext_op op;
642 cpumask_t mask; 642 DECLARE_BITMAP(mask, NR_CPUS);
643 } *args; 643 } *args;
644 cpumask_t cpumask = *cpus;
645 struct multicall_space mcs; 644 struct multicall_space mcs;
646 645
647 /* 646 BUG_ON(cpumask_empty(cpus));
648 * A couple of (to be removed) sanity checks:
649 *
650 * - current CPU must not be in mask
651 * - mask must exist :)
652 */
653 BUG_ON(cpus_empty(cpumask));
654 BUG_ON(cpu_isset(smp_processor_id(), cpumask));
655 BUG_ON(!mm); 647 BUG_ON(!mm);
656 648
657 /* If a CPU which we ran on has gone down, OK. */
658 cpus_and(cpumask, cpumask, cpu_online_map);
659 if (cpus_empty(cpumask))
660 return;
661
662 mcs = xen_mc_entry(sizeof(*args)); 649 mcs = xen_mc_entry(sizeof(*args));
663 args = mcs.args; 650 args = mcs.args;
664 args->mask = cpumask; 651 args->op.arg2.vcpumask = to_cpumask(args->mask);
665 args->op.arg2.vcpumask = &args->mask; 652
653 /* Remove us, and any offline CPUS. */
654 cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask);
655 cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
656 if (unlikely(cpumask_empty(to_cpumask(args->mask))))
657 goto issue;
666 658
667 if (va == TLB_FLUSH_ALL) { 659 if (va == TLB_FLUSH_ALL) {
668 args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; 660 args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
@@ -673,6 +665,7 @@ static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm,
673 665
674 MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); 666 MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
675 667
668issue:
676 xen_mc_issue(PARAVIRT_LAZY_MMU); 669 xen_mc_issue(PARAVIRT_LAZY_MMU);
677} 670}
678 671
@@ -702,17 +695,17 @@ static void xen_write_cr0(unsigned long cr0)
702 695
703static void xen_write_cr2(unsigned long cr2) 696static void xen_write_cr2(unsigned long cr2)
704{ 697{
705 x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; 698 percpu_read(xen_vcpu)->arch.cr2 = cr2;
706} 699}
707 700
708static unsigned long xen_read_cr2(void) 701static unsigned long xen_read_cr2(void)
709{ 702{
710 return x86_read_percpu(xen_vcpu)->arch.cr2; 703 return percpu_read(xen_vcpu)->arch.cr2;
711} 704}
712 705
713static unsigned long xen_read_cr2_direct(void) 706static unsigned long xen_read_cr2_direct(void)
714{ 707{
715 return x86_read_percpu(xen_vcpu_info.arch.cr2); 708 return percpu_read(xen_vcpu_info.arch.cr2);
716} 709}
717 710
718static void xen_write_cr4(unsigned long cr4) 711static void xen_write_cr4(unsigned long cr4)
@@ -725,12 +718,12 @@ static void xen_write_cr4(unsigned long cr4)
725 718
726static unsigned long xen_read_cr3(void) 719static unsigned long xen_read_cr3(void)
727{ 720{
728 return x86_read_percpu(xen_cr3); 721 return percpu_read(xen_cr3);
729} 722}
730 723
731static void set_current_cr3(void *v) 724static void set_current_cr3(void *v)
732{ 725{
733 x86_write_percpu(xen_current_cr3, (unsigned long)v); 726 percpu_write(xen_current_cr3, (unsigned long)v);
734} 727}
735 728
736static void __xen_write_cr3(bool kernel, unsigned long cr3) 729static void __xen_write_cr3(bool kernel, unsigned long cr3)
@@ -755,7 +748,7 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3)
755 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 748 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
756 749
757 if (kernel) { 750 if (kernel) {
758 x86_write_percpu(xen_cr3, cr3); 751 percpu_write(xen_cr3, cr3);
759 752
760 /* Update xen_current_cr3 once the batch has actually 753 /* Update xen_current_cr3 once the batch has actually
761 been submitted. */ 754 been submitted. */
@@ -771,7 +764,7 @@ static void xen_write_cr3(unsigned long cr3)
771 764
772 /* Update while interrupts are disabled, so its atomic with 765 /* Update while interrupts are disabled, so its atomic with
773 respect to ipis */ 766 respect to ipis */
774 x86_write_percpu(xen_cr3, cr3); 767 percpu_write(xen_cr3, cr3);
775 768
776 __xen_write_cr3(true, cr3); 769 __xen_write_cr3(true, cr3);
777 770
@@ -1651,7 +1644,6 @@ asmlinkage void __init xen_start_kernel(void)
1651#ifdef CONFIG_X86_64 1644#ifdef CONFIG_X86_64
1652 /* Disable until direct per-cpu data access. */ 1645 /* Disable until direct per-cpu data access. */
1653 have_vcpu_info_placement = 0; 1646 have_vcpu_info_placement = 0;
1654 x86_64_init_pda();
1655#endif 1647#endif
1656 1648
1657 xen_smp_init(); 1649 xen_smp_init();
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index bb042608c602..2e8271431e1a 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -39,7 +39,7 @@ static unsigned long xen_save_fl(void)
39 struct vcpu_info *vcpu; 39 struct vcpu_info *vcpu;
40 unsigned long flags; 40 unsigned long flags;
41 41
42 vcpu = x86_read_percpu(xen_vcpu); 42 vcpu = percpu_read(xen_vcpu);
43 43
44 /* flag has opposite sense of mask */ 44 /* flag has opposite sense of mask */
45 flags = !vcpu->evtchn_upcall_mask; 45 flags = !vcpu->evtchn_upcall_mask;
@@ -62,7 +62,7 @@ static void xen_restore_fl(unsigned long flags)
62 make sure we're don't switch CPUs between getting the vcpu 62 make sure we're don't switch CPUs between getting the vcpu
63 pointer and updating the mask. */ 63 pointer and updating the mask. */
64 preempt_disable(); 64 preempt_disable();
65 vcpu = x86_read_percpu(xen_vcpu); 65 vcpu = percpu_read(xen_vcpu);
66 vcpu->evtchn_upcall_mask = flags; 66 vcpu->evtchn_upcall_mask = flags;
67 preempt_enable_no_resched(); 67 preempt_enable_no_resched();
68 68
@@ -83,7 +83,7 @@ static void xen_irq_disable(void)
83 make sure we're don't switch CPUs between getting the vcpu 83 make sure we're don't switch CPUs between getting the vcpu
84 pointer and updating the mask. */ 84 pointer and updating the mask. */
85 preempt_disable(); 85 preempt_disable();
86 x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1; 86 percpu_read(xen_vcpu)->evtchn_upcall_mask = 1;
87 preempt_enable_no_resched(); 87 preempt_enable_no_resched();
88} 88}
89 89
@@ -96,7 +96,7 @@ static void xen_irq_enable(void)
96 the caller is confused and is trying to re-enable interrupts 96 the caller is confused and is trying to re-enable interrupts
97 on an indeterminate processor. */ 97 on an indeterminate processor. */
98 98
99 vcpu = x86_read_percpu(xen_vcpu); 99 vcpu = percpu_read(xen_vcpu);
100 vcpu->evtchn_upcall_mask = 0; 100 vcpu->evtchn_upcall_mask = 0;
101 101
102 /* Doesn't matter if we get preempted here, because any 102 /* Doesn't matter if we get preempted here, because any
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 503c240e26c7..98cb9869eb24 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1063,18 +1063,14 @@ static void drop_other_mm_ref(void *info)
1063 struct mm_struct *mm = info; 1063 struct mm_struct *mm = info;
1064 struct mm_struct *active_mm; 1064 struct mm_struct *active_mm;
1065 1065
1066#ifdef CONFIG_X86_64 1066 active_mm = percpu_read(cpu_tlbstate.active_mm);
1067 active_mm = read_pda(active_mm);
1068#else
1069 active_mm = __get_cpu_var(cpu_tlbstate).active_mm;
1070#endif
1071 1067
1072 if (active_mm == mm) 1068 if (active_mm == mm)
1073 leave_mm(smp_processor_id()); 1069 leave_mm(smp_processor_id());
1074 1070
1075 /* If this cpu still has a stale cr3 reference, then make sure 1071 /* If this cpu still has a stale cr3 reference, then make sure
1076 it has been flushed. */ 1072 it has been flushed. */
1077 if (x86_read_percpu(xen_current_cr3) == __pa(mm->pgd)) { 1073 if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) {
1078 load_cr3(swapper_pg_dir); 1074 load_cr3(swapper_pg_dir);
1079 arch_flush_lazy_cpu_mode(); 1075 arch_flush_lazy_cpu_mode();
1080 } 1076 }
diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h
index 858938241616..e786fa7f2615 100644
--- a/arch/x86/xen/multicalls.h
+++ b/arch/x86/xen/multicalls.h
@@ -39,7 +39,7 @@ static inline void xen_mc_issue(unsigned mode)
39 xen_mc_flush(); 39 xen_mc_flush();
40 40
41 /* restore flags saved in xen_mc_batch */ 41 /* restore flags saved in xen_mc_batch */
42 local_irq_restore(x86_read_percpu(xen_mc_irq_flags)); 42 local_irq_restore(percpu_read(xen_mc_irq_flags));
43} 43}
44 44
45/* Set up a callback to be called when the current batch is flushed */ 45/* Set up a callback to be called when the current batch is flushed */
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index c44e2069c7c7..7735e3dd359c 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -50,11 +50,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
50 */ 50 */
51static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) 51static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
52{ 52{
53#ifdef CONFIG_X86_32 53 inc_irq_stat(irq_resched_count);
54 __get_cpu_var(irq_stat).irq_resched_count++;
55#else
56 add_pda(irq_resched_count, 1);
57#endif
58 54
59 return IRQ_HANDLED; 55 return IRQ_HANDLED;
60} 56}
@@ -78,7 +74,7 @@ static __cpuinit void cpu_bringup(void)
78 xen_setup_cpu_clockevents(); 74 xen_setup_cpu_clockevents();
79 75
80 cpu_set(cpu, cpu_online_map); 76 cpu_set(cpu, cpu_online_map);
81 x86_write_percpu(cpu_state, CPU_ONLINE); 77 percpu_write(cpu_state, CPU_ONLINE);
82 wmb(); 78 wmb();
83 79
84 /* We can take interrupts now: we're officially "up". */ 80 /* We can take interrupts now: we're officially "up". */
@@ -283,22 +279,10 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
283 struct task_struct *idle = idle_task(cpu); 279 struct task_struct *idle = idle_task(cpu);
284 int rc; 280 int rc;
285 281
286#ifdef CONFIG_X86_64
287 /* Allocate node local memory for AP pdas */
288 WARN_ON(cpu == 0);
289 if (cpu > 0) {
290 rc = get_local_pda(cpu);
291 if (rc)
292 return rc;
293 }
294#endif
295
296#ifdef CONFIG_X86_32
297 init_gdt(cpu);
298 per_cpu(current_task, cpu) = idle; 282 per_cpu(current_task, cpu) = idle;
283#ifdef CONFIG_X86_32
299 irq_ctx_init(cpu); 284 irq_ctx_init(cpu);
300#else 285#else
301 cpu_pda(cpu)->pcurrent = idle;
302 clear_tsk_thread_flag(idle, TIF_FORK); 286 clear_tsk_thread_flag(idle, TIF_FORK);
303#endif 287#endif
304 xen_setup_timer(cpu); 288 xen_setup_timer(cpu);
@@ -445,11 +429,7 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
445{ 429{
446 irq_enter(); 430 irq_enter();
447 generic_smp_call_function_interrupt(); 431 generic_smp_call_function_interrupt();
448#ifdef CONFIG_X86_32 432 inc_irq_stat(irq_call_count);
449 __get_cpu_var(irq_stat).irq_call_count++;
450#else
451 add_pda(irq_call_count, 1);
452#endif
453 irq_exit(); 433 irq_exit();
454 434
455 return IRQ_HANDLED; 435 return IRQ_HANDLED;
@@ -459,11 +439,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
459{ 439{
460 irq_enter(); 440 irq_enter();
461 generic_smp_call_function_single_interrupt(); 441 generic_smp_call_function_single_interrupt();
462#ifdef CONFIG_X86_32 442 inc_irq_stat(irq_call_count);
463 __get_cpu_var(irq_stat).irq_call_count++;
464#else
465 add_pda(irq_call_count, 1);
466#endif
467 irq_exit(); 443 irq_exit();
468 444
469 return IRQ_HANDLED; 445 return IRQ_HANDLED;
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 212ffe012b76..95be7b434724 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -6,6 +6,7 @@
6 6
7#include <asm/xen/hypercall.h> 7#include <asm/xen/hypercall.h>
8#include <asm/xen/page.h> 8#include <asm/xen/page.h>
9#include <asm/fixmap.h>
9 10
10#include "xen-ops.h" 11#include "xen-ops.h"
11#include "mmu.h" 12#include "mmu.h"
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 05794c566e87..d6fc51f4ce85 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -17,6 +17,7 @@
17#include <asm/processor-flags.h> 17#include <asm/processor-flags.h>
18#include <asm/errno.h> 18#include <asm/errno.h>
19#include <asm/segment.h> 19#include <asm/segment.h>
20#include <asm/percpu.h>
20 21
21#include <xen/interface/xen.h> 22#include <xen/interface/xen.h>
22 23
@@ -28,12 +29,10 @@
28 29
29#if 1 30#if 1
30/* 31/*
31 x86-64 does not yet support direct access to percpu variables 32 FIXME: x86_64 now can support direct access to percpu variables
32 via a segment override, so we just need to make sure this code 33 via a segment override. Update xen accordingly.
33 never gets used
34 */ 34 */
35#define BUG ud2a 35#define BUG ud2a
36#define PER_CPU_VAR(var, off) 0xdeadbeef
37#endif 36#endif
38 37
39/* 38/*
@@ -45,14 +44,14 @@ ENTRY(xen_irq_enable_direct)
45 BUG 44 BUG
46 45
47 /* Unmask events */ 46 /* Unmask events */
48 movb $0, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) 47 movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
49 48
50 /* Preempt here doesn't matter because that will deal with 49 /* Preempt here doesn't matter because that will deal with
51 any pending interrupts. The pending check may end up being 50 any pending interrupts. The pending check may end up being
52 run on the wrong CPU, but that doesn't hurt. */ 51 run on the wrong CPU, but that doesn't hurt. */
53 52
54 /* Test for pending */ 53 /* Test for pending */
55 testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending) 54 testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
56 jz 1f 55 jz 1f
57 56
582: call check_events 572: call check_events
@@ -69,7 +68,7 @@ ENDPATCH(xen_irq_enable_direct)
69ENTRY(xen_irq_disable_direct) 68ENTRY(xen_irq_disable_direct)
70 BUG 69 BUG
71 70
72 movb $1, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) 71 movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
73ENDPATCH(xen_irq_disable_direct) 72ENDPATCH(xen_irq_disable_direct)
74 ret 73 ret
75 ENDPROC(xen_irq_disable_direct) 74 ENDPROC(xen_irq_disable_direct)
@@ -87,7 +86,7 @@ ENDPATCH(xen_irq_disable_direct)
87ENTRY(xen_save_fl_direct) 86ENTRY(xen_save_fl_direct)
88 BUG 87 BUG
89 88
90 testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) 89 testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
91 setz %ah 90 setz %ah
92 addb %ah,%ah 91 addb %ah,%ah
93ENDPATCH(xen_save_fl_direct) 92ENDPATCH(xen_save_fl_direct)
@@ -107,13 +106,13 @@ ENTRY(xen_restore_fl_direct)
107 BUG 106 BUG
108 107
109 testb $X86_EFLAGS_IF>>8, %ah 108 testb $X86_EFLAGS_IF>>8, %ah
110 setz PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) 109 setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
111 /* Preempt here doesn't matter because that will deal with 110 /* Preempt here doesn't matter because that will deal with
112 any pending interrupts. The pending check may end up being 111 any pending interrupts. The pending check may end up being
113 run on the wrong CPU, but that doesn't hurt. */ 112 run on the wrong CPU, but that doesn't hurt. */
114 113
115 /* check for unmasked and pending */ 114 /* check for unmasked and pending */
116 cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending) 115 cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
117 jz 1f 116 jz 1f
1182: call check_events 1172: call check_events
1191: 1181:
@@ -195,11 +194,11 @@ RELOC(xen_sysexit, 1b+1)
195ENTRY(xen_sysret64) 194ENTRY(xen_sysret64)
196 /* We're already on the usermode stack at this point, but still 195 /* We're already on the usermode stack at this point, but still
197 with the kernel gs, so we can easily switch back */ 196 with the kernel gs, so we can easily switch back */
198 movq %rsp, %gs:pda_oldrsp 197 movq %rsp, PER_CPU_VAR(old_rsp)
199 movq %gs:pda_kernelstack,%rsp 198 movq PER_CPU_VAR(kernel_stack),%rsp
200 199
201 pushq $__USER_DS 200 pushq $__USER_DS
202 pushq %gs:pda_oldrsp 201 pushq PER_CPU_VAR(old_rsp)
203 pushq %r11 202 pushq %r11
204 pushq $__USER_CS 203 pushq $__USER_CS
205 pushq %rcx 204 pushq %rcx
@@ -212,11 +211,11 @@ RELOC(xen_sysret64, 1b+1)
212ENTRY(xen_sysret32) 211ENTRY(xen_sysret32)
213 /* We're already on the usermode stack at this point, but still 212 /* We're already on the usermode stack at this point, but still
214 with the kernel gs, so we can easily switch back */ 213 with the kernel gs, so we can easily switch back */
215 movq %rsp, %gs:pda_oldrsp 214 movq %rsp, PER_CPU_VAR(old_rsp)
216 movq %gs:pda_kernelstack, %rsp 215 movq PER_CPU_VAR(kernel_stack), %rsp
217 216
218 pushq $__USER32_DS 217 pushq $__USER32_DS
219 pushq %gs:pda_oldrsp 218 pushq PER_CPU_VAR(old_rsp)
220 pushq %r11 219 pushq %r11
221 pushq $__USER32_CS 220 pushq $__USER32_CS
222 pushq %rcx 221 pushq %rcx