aboutsummaryrefslogtreecommitdiffstats
path: root/arch/x86
diff options
context:
space:
mode:
Diffstat (limited to 'arch/x86')
-rw-r--r--arch/x86/Kconfig32
-rw-r--r--arch/x86/Kconfig.cpu10
-rw-r--r--arch/x86/Kconfig.debug1
-rw-r--r--arch/x86/Makefile2
-rw-r--r--arch/x86/boot/video-vesa.c11
-rw-r--r--arch/x86/configs/i386_defconfig4
-rw-r--r--arch/x86/configs/x86_64_defconfig4
-rw-r--r--arch/x86/ia32/ia32entry.S8
-rw-r--r--arch/x86/include/asm/Kbuild1
-rw-r--r--arch/x86/include/asm/apicnum.h12
-rw-r--r--arch/x86/include/asm/bitops.h14
-rw-r--r--arch/x86/include/asm/byteorder.h1
-rw-r--r--arch/x86/include/asm/cpumask.h4
-rw-r--r--arch/x86/include/asm/current.h24
-rw-r--r--arch/x86/include/asm/genapic_32.h7
-rw-r--r--arch/x86/include/asm/genapic_64.h6
-rw-r--r--arch/x86/include/asm/hardirq.h49
-rw-r--r--arch/x86/include/asm/hardirq_32.h33
-rw-r--r--arch/x86/include/asm/hardirq_64.h25
-rw-r--r--arch/x86/include/asm/io.h3
-rw-r--r--arch/x86/include/asm/irq_regs.h36
-rw-r--r--arch/x86/include/asm/irq_regs_32.h31
-rw-r--r--arch/x86/include/asm/irq_regs_64.h1
-rw-r--r--arch/x86/include/asm/irq_vectors.h49
-rw-r--r--arch/x86/include/asm/mach-default/entry_arch.h18
-rw-r--r--arch/x86/include/asm/mach-default/mach_wakecpu.h6
-rw-r--r--arch/x86/include/asm/mach-rdc321x/gpio.h60
-rw-r--r--arch/x86/include/asm/mmu_context.h63
-rw-r--r--arch/x86/include/asm/mmu_context_32.h55
-rw-r--r--arch/x86/include/asm/mmu_context_64.h54
-rw-r--r--arch/x86/include/asm/msr-index.h29
-rw-r--r--arch/x86/include/asm/mtrr.h10
-rw-r--r--arch/x86/include/asm/page.h3
-rw-r--r--arch/x86/include/asm/page_64.h4
-rw-r--r--arch/x86/include/asm/paravirt.h26
-rw-r--r--arch/x86/include/asm/pat.h4
-rw-r--r--arch/x86/include/asm/pda.h137
-rw-r--r--arch/x86/include/asm/percpu.h153
-rw-r--r--arch/x86/include/asm/pgalloc.h1
-rw-r--r--arch/x86/include/asm/pgtable.h57
-rw-r--r--arch/x86/include/asm/pgtable_64.h1
-rw-r--r--arch/x86/include/asm/prctl.h4
-rw-r--r--arch/x86/include/asm/processor.h19
-rw-r--r--arch/x86/include/asm/rdc321x_defs.h (renamed from arch/x86/include/asm/mach-rdc321x/rdc321x_defs.h)0
-rw-r--r--arch/x86/include/asm/setup.h9
-rw-r--r--arch/x86/include/asm/sigcontext32.h2
-rw-r--r--arch/x86/include/asm/smp.h9
-rw-r--r--arch/x86/include/asm/spinlock.h3
-rw-r--r--arch/x86/include/asm/stackprotector.h38
-rw-r--r--arch/x86/include/asm/syscalls.h2
-rw-r--r--arch/x86/include/asm/system.h29
-rw-r--r--arch/x86/include/asm/thread_info.h20
-rw-r--r--arch/x86/include/asm/timex.h13
-rw-r--r--arch/x86/include/asm/tlbflush.h15
-rw-r--r--arch/x86/include/asm/topology.h31
-rw-r--r--arch/x86/include/asm/trampoline.h1
-rw-r--r--arch/x86/include/asm/uv/uv.h33
-rw-r--r--arch/x86/include/asm/uv/uv_bau.h1
-rw-r--r--arch/x86/kernel/Makefile14
-rw-r--r--arch/x86/kernel/acpi/sleep.c1
-rw-r--r--arch/x86/kernel/apic.c50
-rw-r--r--arch/x86/kernel/asm-offsets_64.c11
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c34
-rw-r--r--arch/x86/kernel/cpu/common.c202
-rw-r--r--arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c20
-rw-r--r--arch/x86/kernel/cpu/intel.c25
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c63
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd_64.c21
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_intel_64.c1
-rw-r--r--arch/x86/kernel/cpu/mtrr/generic.c12
-rw-r--r--arch/x86/kernel/dumpstack_64.c35
-rw-r--r--arch/x86/kernel/efi.c2
-rw-r--r--arch/x86/kernel/efi_64.c1
-rw-r--r--arch/x86/kernel/entry_32.S8
-rw-r--r--arch/x86/kernel/entry_64.S45
-rw-r--r--arch/x86/kernel/genapic_64.c2
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c1
-rw-r--r--arch/x86/kernel/head64.c23
-rw-r--r--arch/x86/kernel/head_32.S19
-rw-r--r--arch/x86/kernel/head_64.S36
-rw-r--r--arch/x86/kernel/hpet.c3
-rw-r--r--arch/x86/kernel/io_apic.c46
-rw-r--r--arch/x86/kernel/irq.c6
-rw-r--r--arch/x86/kernel/irq_32.c2
-rw-r--r--arch/x86/kernel/irq_64.c9
-rw-r--r--arch/x86/kernel/irqinit_32.c11
-rw-r--r--arch/x86/kernel/kprobes.c2
-rw-r--r--arch/x86/kernel/mpparse.c1
-rw-r--r--arch/x86/kernel/nmi.c10
-rw-r--r--arch/x86/kernel/paravirt.c1
-rw-r--r--arch/x86/kernel/process_32.c6
-rw-r--r--arch/x86/kernel/process_64.c43
-rw-r--r--arch/x86/kernel/setup_percpu.c411
-rw-r--r--arch/x86/kernel/signal.c11
-rw-r--r--arch/x86/kernel/smpboot.c73
-rw-r--r--arch/x86/kernel/smpcommon.c30
-rw-r--r--arch/x86/kernel/syscall_table_32.S2
-rw-r--r--arch/x86/kernel/tlb_32.c256
-rw-r--r--arch/x86/kernel/tlb_uv.c69
-rw-r--r--arch/x86/kernel/traps.c1
-rw-r--r--arch/x86/kernel/vmi_32.c2
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S9
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S26
-rw-r--r--arch/x86/kernel/x8664_ksyms_64.c2
-rw-r--r--arch/x86/lib/usercopy_32.c4
-rw-r--r--arch/x86/lib/usercopy_64.c4
-rw-r--r--arch/x86/mach-rdc321x/Makefile5
-rw-r--r--arch/x86/mach-rdc321x/gpio.c194
-rw-r--r--arch/x86/mach-rdc321x/platform.c69
-rw-r--r--arch/x86/mach-voyager/voyager_smp.c5
-rw-r--r--arch/x86/mm/Makefile2
-rw-r--r--arch/x86/mm/fault.c448
-rw-r--r--arch/x86/mm/init_32.c48
-rw-r--r--arch/x86/mm/init_64.c2
-rw-r--r--arch/x86/mm/iomap_32.c10
-rw-r--r--arch/x86/mm/ioremap.c27
-rw-r--r--arch/x86/mm/numa_64.c217
-rw-r--r--arch/x86/mm/pageattr.c49
-rw-r--r--arch/x86/mm/pat.c119
-rw-r--r--arch/x86/mm/srat_64.c1
-rw-r--r--arch/x86/mm/tlb.c (renamed from arch/x86/kernel/tlb_64.c)122
-rw-r--r--arch/x86/pci/i386.c12
-rw-r--r--arch/x86/scripts/strip-symbols1
-rw-r--r--arch/x86/xen/enlighten.c47
-rw-r--r--arch/x86/xen/irq.c8
-rw-r--r--arch/x86/xen/mmu.c8
-rw-r--r--arch/x86/xen/multicalls.h2
-rw-r--r--arch/x86/xen/smp.c34
-rw-r--r--arch/x86/xen/suspend.c1
-rw-r--r--arch/x86/xen/xen-asm_64.S31
130 files changed, 1924 insertions, 2482 deletions
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 73f7fe8fd4d1..d6218e6c9824 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -133,7 +133,7 @@ config ARCH_HAS_CACHE_LINE_SIZE
133 def_bool y 133 def_bool y
134 134
135config HAVE_SETUP_PER_CPU_AREA 135config HAVE_SETUP_PER_CPU_AREA
136 def_bool X86_64_SMP || (X86_SMP && !X86_VOYAGER) 136 def_bool y
137 137
138config HAVE_CPUMASK_OF_CPU_MAP 138config HAVE_CPUMASK_OF_CPU_MAP
139 def_bool X86_64_SMP 139 def_bool X86_64_SMP
@@ -391,6 +391,13 @@ config X86_RDC321X
391 as R-8610-(G). 391 as R-8610-(G).
392 If you don't have one of these chips, you should say N here. 392 If you don't have one of these chips, you should say N here.
393 393
394config X86_UV
395 bool "SGI Ultraviolet"
396 depends on X86_64
397 help
398 This option is needed in order to support SGI Ultraviolet systems.
399 If you don't have one of these, you should say N here.
400
394config SCHED_OMIT_FRAME_POINTER 401config SCHED_OMIT_FRAME_POINTER
395 def_bool y 402 def_bool y
396 prompt "Single-depth WCHAN output" 403 prompt "Single-depth WCHAN output"
@@ -1340,13 +1347,17 @@ config SECCOMP
1340 1347
1341 If unsure, say Y. Only embedded should say N here. 1348 If unsure, say Y. Only embedded should say N here.
1342 1349
1350config CC_STACKPROTECTOR_ALL
1351 bool
1352
1343config CC_STACKPROTECTOR 1353config CC_STACKPROTECTOR
1344 bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)" 1354 bool "Enable -fstack-protector buffer overflow detection (EXPERIMENTAL)"
1345 depends on X86_64 && EXPERIMENTAL && BROKEN 1355 depends on X86_64
1356 select CC_STACKPROTECTOR_ALL
1346 help 1357 help
1347 This option turns on the -fstack-protector GCC feature. This 1358 This option turns on the -fstack-protector GCC feature. This
1348 feature puts, at the beginning of critical functions, a canary 1359 feature puts, at the beginning of functions, a canary value on
1349 value on the stack just before the return address, and validates 1360 the stack just before the return address, and validates
1350 the value just before actually returning. Stack based buffer 1361 the value just before actually returning. Stack based buffer
1351 overflows (that need to overwrite this return address) now also 1362 overflows (that need to overwrite this return address) now also
1352 overwrite the canary, which gets detected and the attack is then 1363 overwrite the canary, which gets detected and the attack is then
@@ -1354,15 +1365,8 @@ config CC_STACKPROTECTOR
1354 1365
1355 This feature requires gcc version 4.2 or above, or a distribution 1366 This feature requires gcc version 4.2 or above, or a distribution
1356 gcc with the feature backported. Older versions are automatically 1367 gcc with the feature backported. Older versions are automatically
1357 detected and for those versions, this configuration option is ignored. 1368 detected and for those versions, this configuration option is
1358 1369 ignored. (and a warning is printed during bootup)
1359config CC_STACKPROTECTOR_ALL
1360 bool "Use stack-protector for all functions"
1361 depends on CC_STACKPROTECTOR
1362 help
1363 Normally, GCC only inserts the canary value protection for
1364 functions that use large-ish on-stack buffers. By enabling
1365 this option, GCC will be asked to do this for ALL functions.
1366 1370
1367source kernel/Kconfig.hz 1371source kernel/Kconfig.hz
1368 1372
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 8078955845ae..8eb50ba9161e 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -292,25 +292,23 @@ config X86_CPU
292# Define implied options from the CPU selection here 292# Define implied options from the CPU selection here
293config X86_L1_CACHE_BYTES 293config X86_L1_CACHE_BYTES
294 int 294 int
295 default "128" if GENERIC_CPU || MPSC 295 default "128" if MPSC
296 default "64" if MK8 || MCORE2 296 default "64" if GENERIC_CPU || MK8 || MCORE2 || X86_32
297 depends on X86_64
298 297
299config X86_INTERNODE_CACHE_BYTES 298config X86_INTERNODE_CACHE_BYTES
300 int 299 int
301 default "4096" if X86_VSMP 300 default "4096" if X86_VSMP
302 default X86_L1_CACHE_BYTES if !X86_VSMP 301 default X86_L1_CACHE_BYTES if !X86_VSMP
303 depends on X86_64
304 302
305config X86_CMPXCHG 303config X86_CMPXCHG
306 def_bool X86_64 || (X86_32 && !M386) 304 def_bool X86_64 || (X86_32 && !M386)
307 305
308config X86_L1_CACHE_SHIFT 306config X86_L1_CACHE_SHIFT
309 int 307 int
310 default "7" if MPENTIUM4 || X86_GENERIC || GENERIC_CPU || MPSC 308 default "7" if MPENTIUM4 || MPSC
311 default "4" if X86_ELAN || M486 || M386 || MGEODEGX1 309 default "4" if X86_ELAN || M486 || M386 || MGEODEGX1
312 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX 310 default "5" if MWINCHIP3D || MWINCHIPC6 || MCRUSOE || MEFFICEON || MCYRIXIII || MK6 || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || M586 || MVIAC3_2 || MGEODE_LX
313 default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 311 default "6" if MK7 || MK8 || MPENTIUMM || MCORE2 || MVIAC7 || X86_GENERIC || GENERIC_CPU
314 312
315config X86_XADD 313config X86_XADD
316 def_bool y 314 def_bool y
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 10d6cc3fd052..28f111461ca8 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -117,6 +117,7 @@ config DEBUG_RODATA
117config DEBUG_RODATA_TEST 117config DEBUG_RODATA_TEST
118 bool "Testcase for the DEBUG_RODATA feature" 118 bool "Testcase for the DEBUG_RODATA feature"
119 depends on DEBUG_RODATA 119 depends on DEBUG_RODATA
120 default y
120 help 121 help
121 This option enables a testcase for the DEBUG_RODATA 122 This option enables a testcase for the DEBUG_RODATA
122 feature as well as for the change_page_attr() infrastructure. 123 feature as well as for the change_page_attr() infrastructure.
diff --git a/arch/x86/Makefile b/arch/x86/Makefile
index d1a47adb5aec..cacee981d166 100644
--- a/arch/x86/Makefile
+++ b/arch/x86/Makefile
@@ -73,7 +73,7 @@ else
73 73
74 stackp := $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh 74 stackp := $(CONFIG_SHELL) $(srctree)/scripts/gcc-x86_64-has-stack-protector.sh
75 stackp-$(CONFIG_CC_STACKPROTECTOR) := $(shell $(stackp) \ 75 stackp-$(CONFIG_CC_STACKPROTECTOR) := $(shell $(stackp) \
76 "$(CC)" -fstack-protector ) 76 "$(CC)" "-fstack-protector -DGCC_HAS_SP" )
77 stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(stackp) \ 77 stackp-$(CONFIG_CC_STACKPROTECTOR_ALL) += $(shell $(stackp) \
78 "$(CC)" -fstack-protector-all ) 78 "$(CC)" -fstack-protector-all )
79 79
diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c
index 75115849af33..4a58c8ce3f69 100644
--- a/arch/x86/boot/video-vesa.c
+++ b/arch/x86/boot/video-vesa.c
@@ -269,9 +269,8 @@ void vesa_store_edid(void)
269 we genuinely have to assume all registers are destroyed here. */ 269 we genuinely have to assume all registers are destroyed here. */
270 270
271 asm("pushw %%es; movw %2,%%es; "INT10"; popw %%es" 271 asm("pushw %%es; movw %2,%%es; "INT10"; popw %%es"
272 : "+a" (ax), "+b" (bx) 272 : "+a" (ax), "+b" (bx), "+c" (cx), "+D" (di)
273 : "c" (cx), "D" (di) 273 : : "esi", "edx");
274 : "esi");
275 274
276 if (ax != 0x004f) 275 if (ax != 0x004f)
277 return; /* No EDID */ 276 return; /* No EDID */
@@ -285,9 +284,9 @@ void vesa_store_edid(void)
285 dx = 0; /* EDID block number */ 284 dx = 0; /* EDID block number */
286 di =(size_t) &boot_params.edid_info; /* (ES:)Pointer to block */ 285 di =(size_t) &boot_params.edid_info; /* (ES:)Pointer to block */
287 asm(INT10 286 asm(INT10
288 : "+a" (ax), "+b" (bx), "+d" (dx), "=m" (boot_params.edid_info) 287 : "+a" (ax), "+b" (bx), "+d" (dx), "=m" (boot_params.edid_info),
289 : "c" (cx), "D" (di) 288 "+c" (cx), "+D" (di)
290 : "esi"); 289 : : "esi");
291#endif /* CONFIG_FIRMWARE_EDID */ 290#endif /* CONFIG_FIRMWARE_EDID */
292} 291}
293 292
diff --git a/arch/x86/configs/i386_defconfig b/arch/x86/configs/i386_defconfig
index b30a08ed8eb4..edba00d98ac3 100644
--- a/arch/x86/configs/i386_defconfig
+++ b/arch/x86/configs/i386_defconfig
@@ -1331,8 +1331,8 @@ CONFIG_I2C_I801=y
1331# Miscellaneous I2C Chip support 1331# Miscellaneous I2C Chip support
1332# 1332#
1333# CONFIG_DS1682 is not set 1333# CONFIG_DS1682 is not set
1334# CONFIG_AT24 is not set 1334# CONFIG_EEPROM_AT24 is not set
1335# CONFIG_SENSORS_EEPROM is not set 1335# CONFIG_EEPROM_LEGACY is not set
1336# CONFIG_SENSORS_PCF8574 is not set 1336# CONFIG_SENSORS_PCF8574 is not set
1337# CONFIG_PCF8575 is not set 1337# CONFIG_PCF8575 is not set
1338# CONFIG_SENSORS_PCA9539 is not set 1338# CONFIG_SENSORS_PCA9539 is not set
diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig
index 0e7dbc0a3e46..322dd2748fc9 100644
--- a/arch/x86/configs/x86_64_defconfig
+++ b/arch/x86/configs/x86_64_defconfig
@@ -1311,8 +1311,8 @@ CONFIG_I2C_I801=y
1311# Miscellaneous I2C Chip support 1311# Miscellaneous I2C Chip support
1312# 1312#
1313# CONFIG_DS1682 is not set 1313# CONFIG_DS1682 is not set
1314# CONFIG_AT24 is not set 1314# CONFIG_EEPROM_AT24 is not set
1315# CONFIG_SENSORS_EEPROM is not set 1315# CONFIG_EEPROM_LEGACY is not set
1316# CONFIG_SENSORS_PCF8574 is not set 1316# CONFIG_SENSORS_PCF8574 is not set
1317# CONFIG_PCF8575 is not set 1317# CONFIG_PCF8575 is not set
1318# CONFIG_SENSORS_PCA9539 is not set 1318# CONFIG_SENSORS_PCA9539 is not set
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 256b00b61892..9c79b2477008 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -112,8 +112,8 @@ ENTRY(ia32_sysenter_target)
112 CFI_DEF_CFA rsp,0 112 CFI_DEF_CFA rsp,0
113 CFI_REGISTER rsp,rbp 113 CFI_REGISTER rsp,rbp
114 SWAPGS_UNSAFE_STACK 114 SWAPGS_UNSAFE_STACK
115 movq %gs:pda_kernelstack, %rsp 115 movq PER_CPU_VAR(kernel_stack), %rsp
116 addq $(PDA_STACKOFFSET),%rsp 116 addq $(KERNEL_STACK_OFFSET),%rsp
117 /* 117 /*
118 * No need to follow this irqs on/off section: the syscall 118 * No need to follow this irqs on/off section: the syscall
119 * disabled irqs, here we enable it straight after entry: 119 * disabled irqs, here we enable it straight after entry:
@@ -273,13 +273,13 @@ ENDPROC(ia32_sysenter_target)
273ENTRY(ia32_cstar_target) 273ENTRY(ia32_cstar_target)
274 CFI_STARTPROC32 simple 274 CFI_STARTPROC32 simple
275 CFI_SIGNAL_FRAME 275 CFI_SIGNAL_FRAME
276 CFI_DEF_CFA rsp,PDA_STACKOFFSET 276 CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
277 CFI_REGISTER rip,rcx 277 CFI_REGISTER rip,rcx
278 /*CFI_REGISTER rflags,r11*/ 278 /*CFI_REGISTER rflags,r11*/
279 SWAPGS_UNSAFE_STACK 279 SWAPGS_UNSAFE_STACK
280 movl %esp,%r8d 280 movl %esp,%r8d
281 CFI_REGISTER rsp,r8 281 CFI_REGISTER rsp,r8
282 movq %gs:pda_kernelstack,%rsp 282 movq PER_CPU_VAR(kernel_stack),%rsp
283 /* 283 /*
284 * No need to follow this irqs on/off section: the syscall 284 * No need to follow this irqs on/off section: the syscall
285 * disabled irqs and here we enable it straight after entry: 285 * disabled irqs and here we enable it straight after entry:
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index a9f8a814a1f7..4a8e80cdcfa5 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -22,4 +22,3 @@ unifdef-y += unistd_32.h
22unifdef-y += unistd_64.h 22unifdef-y += unistd_64.h
23unifdef-y += vm86.h 23unifdef-y += vm86.h
24unifdef-y += vsyscall.h 24unifdef-y += vsyscall.h
25unifdef-y += swab.h
diff --git a/arch/x86/include/asm/apicnum.h b/arch/x86/include/asm/apicnum.h
new file mode 100644
index 000000000000..82f613c607ce
--- /dev/null
+++ b/arch/x86/include/asm/apicnum.h
@@ -0,0 +1,12 @@
1#ifndef _ASM_X86_APICNUM_H
2#define _ASM_X86_APICNUM_H
3
4/* define MAX_IO_APICS */
5#ifdef CONFIG_X86_32
6# define MAX_IO_APICS 64
7#else
8# define MAX_IO_APICS 128
9# define MAX_LOCAL_APIC 32768
10#endif
11
12#endif /* _ASM_X86_APICNUM_H */
diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h
index e02a359d2aa5..02b47a603fc8 100644
--- a/arch/x86/include/asm/bitops.h
+++ b/arch/x86/include/asm/bitops.h
@@ -3,6 +3,9 @@
3 3
4/* 4/*
5 * Copyright 1992, Linus Torvalds. 5 * Copyright 1992, Linus Torvalds.
6 *
7 * Note: inlines with more than a single statement should be marked
8 * __always_inline to avoid problems with older gcc's inlining heuristics.
6 */ 9 */
7 10
8#ifndef _LINUX_BITOPS_H 11#ifndef _LINUX_BITOPS_H
@@ -53,7 +56,8 @@
53 * Note that @nr may be almost arbitrarily large; this function is not 56 * Note that @nr may be almost arbitrarily large; this function is not
54 * restricted to acting on a single-word quantity. 57 * restricted to acting on a single-word quantity.
55 */ 58 */
56static inline void set_bit(unsigned int nr, volatile unsigned long *addr) 59static __always_inline void
60set_bit(unsigned int nr, volatile unsigned long *addr)
57{ 61{
58 if (IS_IMMEDIATE(nr)) { 62 if (IS_IMMEDIATE(nr)) {
59 asm volatile(LOCK_PREFIX "orb %1,%0" 63 asm volatile(LOCK_PREFIX "orb %1,%0"
@@ -90,7 +94,8 @@ static inline void __set_bit(int nr, volatile unsigned long *addr)
90 * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit() 94 * you should call smp_mb__before_clear_bit() and/or smp_mb__after_clear_bit()
91 * in order to ensure changes are visible on other processors. 95 * in order to ensure changes are visible on other processors.
92 */ 96 */
93static inline void clear_bit(int nr, volatile unsigned long *addr) 97static __always_inline void
98clear_bit(int nr, volatile unsigned long *addr)
94{ 99{
95 if (IS_IMMEDIATE(nr)) { 100 if (IS_IMMEDIATE(nr)) {
96 asm volatile(LOCK_PREFIX "andb %1,%0" 101 asm volatile(LOCK_PREFIX "andb %1,%0"
@@ -204,7 +209,8 @@ static inline int test_and_set_bit(int nr, volatile unsigned long *addr)
204 * 209 *
205 * This is the same as test_and_set_bit on x86. 210 * This is the same as test_and_set_bit on x86.
206 */ 211 */
207static inline int test_and_set_bit_lock(int nr, volatile unsigned long *addr) 212static __always_inline int
213test_and_set_bit_lock(int nr, volatile unsigned long *addr)
208{ 214{
209 return test_and_set_bit(nr, addr); 215 return test_and_set_bit(nr, addr);
210} 216}
@@ -300,7 +306,7 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
300 return oldbit; 306 return oldbit;
301} 307}
302 308
303static inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr) 309static __always_inline int constant_test_bit(unsigned int nr, const volatile unsigned long *addr)
304{ 310{
305 return ((1UL << (nr % BITS_PER_LONG)) & 311 return ((1UL << (nr % BITS_PER_LONG)) &
306 (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0; 312 (((unsigned long *)addr)[nr / BITS_PER_LONG])) != 0;
diff --git a/arch/x86/include/asm/byteorder.h b/arch/x86/include/asm/byteorder.h
index 7c49917e3d9d..b13a7a88f3eb 100644
--- a/arch/x86/include/asm/byteorder.h
+++ b/arch/x86/include/asm/byteorder.h
@@ -1,7 +1,6 @@
1#ifndef _ASM_X86_BYTEORDER_H 1#ifndef _ASM_X86_BYTEORDER_H
2#define _ASM_X86_BYTEORDER_H 2#define _ASM_X86_BYTEORDER_H
3 3
4#include <asm/swab.h>
5#include <linux/byteorder/little_endian.h> 4#include <linux/byteorder/little_endian.h>
6 5
7#endif /* _ASM_X86_BYTEORDER_H */ 6#endif /* _ASM_X86_BYTEORDER_H */
diff --git a/arch/x86/include/asm/cpumask.h b/arch/x86/include/asm/cpumask.h
index 26c6dad90479..a7f3c75f8ad7 100644
--- a/arch/x86/include/asm/cpumask.h
+++ b/arch/x86/include/asm/cpumask.h
@@ -10,6 +10,8 @@ extern cpumask_var_t cpu_callout_mask;
10extern cpumask_var_t cpu_initialized_mask; 10extern cpumask_var_t cpu_initialized_mask;
11extern cpumask_var_t cpu_sibling_setup_mask; 11extern cpumask_var_t cpu_sibling_setup_mask;
12 12
13extern void setup_cpu_local_masks(void);
14
13#else /* CONFIG_X86_32 */ 15#else /* CONFIG_X86_32 */
14 16
15extern cpumask_t cpu_callin_map; 17extern cpumask_t cpu_callin_map;
@@ -22,6 +24,8 @@ extern cpumask_t cpu_sibling_setup_map;
22#define cpu_initialized_mask ((struct cpumask *)&cpu_initialized) 24#define cpu_initialized_mask ((struct cpumask *)&cpu_initialized)
23#define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map) 25#define cpu_sibling_setup_mask ((struct cpumask *)&cpu_sibling_setup_map)
24 26
27static inline void setup_cpu_local_masks(void) { }
28
25#endif /* CONFIG_X86_32 */ 29#endif /* CONFIG_X86_32 */
26 30
27#endif /* __ASSEMBLY__ */ 31#endif /* __ASSEMBLY__ */
diff --git a/arch/x86/include/asm/current.h b/arch/x86/include/asm/current.h
index 0930b4f8d672..c68c361697e1 100644
--- a/arch/x86/include/asm/current.h
+++ b/arch/x86/include/asm/current.h
@@ -1,39 +1,21 @@
1#ifndef _ASM_X86_CURRENT_H 1#ifndef _ASM_X86_CURRENT_H
2#define _ASM_X86_CURRENT_H 2#define _ASM_X86_CURRENT_H
3 3
4#ifdef CONFIG_X86_32
5#include <linux/compiler.h> 4#include <linux/compiler.h>
6#include <asm/percpu.h> 5#include <asm/percpu.h>
7 6
7#ifndef __ASSEMBLY__
8struct task_struct; 8struct task_struct;
9 9
10DECLARE_PER_CPU(struct task_struct *, current_task); 10DECLARE_PER_CPU(struct task_struct *, current_task);
11static __always_inline struct task_struct *get_current(void)
12{
13 return x86_read_percpu(current_task);
14}
15
16#else /* X86_32 */
17
18#ifndef __ASSEMBLY__
19#include <asm/pda.h>
20
21struct task_struct;
22 11
23static __always_inline struct task_struct *get_current(void) 12static __always_inline struct task_struct *get_current(void)
24{ 13{
25 return read_pda(pcurrent); 14 return percpu_read(current_task);
26} 15}
27 16
28#else /* __ASSEMBLY__ */ 17#define current get_current()
29
30#include <asm/asm-offsets.h>
31#define GET_CURRENT(reg) movq %gs:(pda_pcurrent),reg
32 18
33#endif /* __ASSEMBLY__ */ 19#endif /* __ASSEMBLY__ */
34 20
35#endif /* X86_32 */
36
37#define current get_current()
38
39#endif /* _ASM_X86_CURRENT_H */ 21#endif /* _ASM_X86_CURRENT_H */
diff --git a/arch/x86/include/asm/genapic_32.h b/arch/x86/include/asm/genapic_32.h
index 2c05b737ee22..4334502d3664 100644
--- a/arch/x86/include/asm/genapic_32.h
+++ b/arch/x86/include/asm/genapic_32.h
@@ -138,11 +138,4 @@ struct genapic {
138extern struct genapic *genapic; 138extern struct genapic *genapic;
139extern void es7000_update_genapic_to_cluster(void); 139extern void es7000_update_genapic_to_cluster(void);
140 140
141enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
142#define get_uv_system_type() UV_NONE
143#define is_uv_system() 0
144#define uv_wakeup_secondary(a, b) 1
145#define uv_system_init() do {} while (0)
146
147
148#endif /* _ASM_X86_GENAPIC_32_H */ 141#endif /* _ASM_X86_GENAPIC_32_H */
diff --git a/arch/x86/include/asm/genapic_64.h b/arch/x86/include/asm/genapic_64.h
index adf32fb56aa6..7bb092c59055 100644
--- a/arch/x86/include/asm/genapic_64.h
+++ b/arch/x86/include/asm/genapic_64.h
@@ -51,15 +51,9 @@ extern struct genapic apic_x2apic_phys;
51extern int acpi_madt_oem_check(char *, char *); 51extern int acpi_madt_oem_check(char *, char *);
52 52
53extern void apic_send_IPI_self(int vector); 53extern void apic_send_IPI_self(int vector);
54enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
55extern enum uv_system_type get_uv_system_type(void);
56extern int is_uv_system(void);
57 54
58extern struct genapic apic_x2apic_uv_x; 55extern struct genapic apic_x2apic_uv_x;
59DECLARE_PER_CPU(int, x2apic_extra_bits); 56DECLARE_PER_CPU(int, x2apic_extra_bits);
60extern void uv_cpu_init(void);
61extern void uv_system_init(void);
62extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
63 57
64extern void setup_apic_routing(void); 58extern void setup_apic_routing(void);
65 59
diff --git a/arch/x86/include/asm/hardirq.h b/arch/x86/include/asm/hardirq.h
index 000787df66e6..176f058e7159 100644
--- a/arch/x86/include/asm/hardirq.h
+++ b/arch/x86/include/asm/hardirq.h
@@ -1,11 +1,52 @@
1#ifdef CONFIG_X86_32 1#ifndef _ASM_X86_HARDIRQ_H
2# include "hardirq_32.h" 2#define _ASM_X86_HARDIRQ_H
3#else 3
4# include "hardirq_64.h" 4#include <linux/threads.h>
5#include <linux/irq.h>
6
7typedef struct {
8 unsigned int __softirq_pending;
9 unsigned int __nmi_count; /* arch dependent */
10 unsigned int irq0_irqs;
11#ifdef CONFIG_X86_LOCAL_APIC
12 unsigned int apic_timer_irqs; /* arch dependent */
13 unsigned int irq_spurious_count;
14#endif
15#ifdef CONFIG_SMP
16 unsigned int irq_resched_count;
17 unsigned int irq_call_count;
18 unsigned int irq_tlb_count;
19#endif
20#ifdef CONFIG_X86_MCE
21 unsigned int irq_thermal_count;
22# ifdef CONFIG_X86_64
23 unsigned int irq_threshold_count;
24# endif
5#endif 25#endif
26} ____cacheline_aligned irq_cpustat_t;
27
28DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
29
30/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
31#define MAX_HARDIRQS_PER_CPU NR_VECTORS
32
33#define __ARCH_IRQ_STAT
34
35#define inc_irq_stat(member) percpu_add(irq_stat.member, 1)
36
37#define local_softirq_pending() percpu_read(irq_stat.__softirq_pending)
38
39#define __ARCH_SET_SOFTIRQ_PENDING
40
41#define set_softirq_pending(x) percpu_write(irq_stat.__softirq_pending, (x))
42#define or_softirq_pending(x) percpu_or(irq_stat.__softirq_pending, (x))
43
44extern void ack_bad_irq(unsigned int irq);
6 45
7extern u64 arch_irq_stat_cpu(unsigned int cpu); 46extern u64 arch_irq_stat_cpu(unsigned int cpu);
8#define arch_irq_stat_cpu arch_irq_stat_cpu 47#define arch_irq_stat_cpu arch_irq_stat_cpu
9 48
10extern u64 arch_irq_stat(void); 49extern u64 arch_irq_stat(void);
11#define arch_irq_stat arch_irq_stat 50#define arch_irq_stat arch_irq_stat
51
52#endif /* _ASM_X86_HARDIRQ_H */
diff --git a/arch/x86/include/asm/hardirq_32.h b/arch/x86/include/asm/hardirq_32.h
deleted file mode 100644
index d4b5d731073f..000000000000
--- a/arch/x86/include/asm/hardirq_32.h
+++ /dev/null
@@ -1,33 +0,0 @@
1#ifndef _ASM_X86_HARDIRQ_32_H
2#define _ASM_X86_HARDIRQ_32_H
3
4#include <linux/threads.h>
5#include <linux/irq.h>
6
7typedef struct {
8 unsigned int __softirq_pending;
9 unsigned long idle_timestamp;
10 unsigned int __nmi_count; /* arch dependent */
11 unsigned int apic_timer_irqs; /* arch dependent */
12 unsigned int irq0_irqs;
13 unsigned int irq_resched_count;
14 unsigned int irq_call_count;
15 unsigned int irq_tlb_count;
16 unsigned int irq_thermal_count;
17 unsigned int irq_spurious_count;
18} ____cacheline_aligned irq_cpustat_t;
19
20DECLARE_PER_CPU(irq_cpustat_t, irq_stat);
21
22/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
23#define MAX_HARDIRQS_PER_CPU NR_VECTORS
24
25#define __ARCH_IRQ_STAT
26#define __IRQ_STAT(cpu, member) (per_cpu(irq_stat, cpu).member)
27
28#define inc_irq_stat(member) (__get_cpu_var(irq_stat).member++)
29
30void ack_bad_irq(unsigned int irq);
31#include <linux/irq_cpustat.h>
32
33#endif /* _ASM_X86_HARDIRQ_32_H */
diff --git a/arch/x86/include/asm/hardirq_64.h b/arch/x86/include/asm/hardirq_64.h
deleted file mode 100644
index b5a6b5d56704..000000000000
--- a/arch/x86/include/asm/hardirq_64.h
+++ /dev/null
@@ -1,25 +0,0 @@
1#ifndef _ASM_X86_HARDIRQ_64_H
2#define _ASM_X86_HARDIRQ_64_H
3
4#include <linux/threads.h>
5#include <linux/irq.h>
6#include <asm/pda.h>
7#include <asm/apic.h>
8
9/* We can have at most NR_VECTORS irqs routed to a cpu at a time */
10#define MAX_HARDIRQS_PER_CPU NR_VECTORS
11
12#define __ARCH_IRQ_STAT 1
13
14#define inc_irq_stat(member) add_pda(member, 1)
15
16#define local_softirq_pending() read_pda(__softirq_pending)
17
18#define __ARCH_SET_SOFTIRQ_PENDING 1
19
20#define set_softirq_pending(x) write_pda(__softirq_pending, (x))
21#define or_softirq_pending(x) or_pda(__softirq_pending, (x))
22
23extern void ack_bad_irq(unsigned int irq);
24
25#endif /* _ASM_X86_HARDIRQ_64_H */
diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h
index 05cfed4485fa..bcf7ea4e1367 100644
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@@ -91,7 +91,7 @@ extern void unxlate_dev_mem_ptr(unsigned long phys, void *addr);
91 91
92extern int ioremap_change_attr(unsigned long vaddr, unsigned long size, 92extern int ioremap_change_attr(unsigned long vaddr, unsigned long size,
93 unsigned long prot_val); 93 unsigned long prot_val);
94extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size); 94extern void __iomem *ioremap_wc(resource_size_t offset, unsigned long size);
95 95
96/* 96/*
97 * early_ioremap() and early_iounmap() are for temporary early boot-time 97 * early_ioremap() and early_iounmap() are for temporary early boot-time
@@ -99,7 +99,6 @@ extern void __iomem *ioremap_wc(unsigned long offset, unsigned long size);
99 * A boot-time mapping is currently limited to at most 16 pages. 99 * A boot-time mapping is currently limited to at most 16 pages.
100 */ 100 */
101extern void early_ioremap_init(void); 101extern void early_ioremap_init(void);
102extern void early_ioremap_clear(void);
103extern void early_ioremap_reset(void); 102extern void early_ioremap_reset(void);
104extern void __iomem *early_ioremap(unsigned long offset, unsigned long size); 103extern void __iomem *early_ioremap(unsigned long offset, unsigned long size);
105extern void __iomem *early_memremap(unsigned long offset, unsigned long size); 104extern void __iomem *early_memremap(unsigned long offset, unsigned long size);
diff --git a/arch/x86/include/asm/irq_regs.h b/arch/x86/include/asm/irq_regs.h
index 89c898ab298b..77843225b7ea 100644
--- a/arch/x86/include/asm/irq_regs.h
+++ b/arch/x86/include/asm/irq_regs.h
@@ -1,5 +1,31 @@
1#ifdef CONFIG_X86_32 1/*
2# include "irq_regs_32.h" 2 * Per-cpu current frame pointer - the location of the last exception frame on
3#else 3 * the stack, stored in the per-cpu area.
4# include "irq_regs_64.h" 4 *
5#endif 5 * Jeremy Fitzhardinge <jeremy@goop.org>
6 */
7#ifndef _ASM_X86_IRQ_REGS_H
8#define _ASM_X86_IRQ_REGS_H
9
10#include <asm/percpu.h>
11
12#define ARCH_HAS_OWN_IRQ_REGS
13
14DECLARE_PER_CPU(struct pt_regs *, irq_regs);
15
16static inline struct pt_regs *get_irq_regs(void)
17{
18 return percpu_read(irq_regs);
19}
20
21static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
22{
23 struct pt_regs *old_regs;
24
25 old_regs = get_irq_regs();
26 percpu_write(irq_regs, new_regs);
27
28 return old_regs;
29}
30
31#endif /* _ASM_X86_IRQ_REGS_32_H */
diff --git a/arch/x86/include/asm/irq_regs_32.h b/arch/x86/include/asm/irq_regs_32.h
deleted file mode 100644
index 86afd7473457..000000000000
--- a/arch/x86/include/asm/irq_regs_32.h
+++ /dev/null
@@ -1,31 +0,0 @@
1/*
2 * Per-cpu current frame pointer - the location of the last exception frame on
3 * the stack, stored in the per-cpu area.
4 *
5 * Jeremy Fitzhardinge <jeremy@goop.org>
6 */
7#ifndef _ASM_X86_IRQ_REGS_32_H
8#define _ASM_X86_IRQ_REGS_32_H
9
10#include <asm/percpu.h>
11
12#define ARCH_HAS_OWN_IRQ_REGS
13
14DECLARE_PER_CPU(struct pt_regs *, irq_regs);
15
16static inline struct pt_regs *get_irq_regs(void)
17{
18 return x86_read_percpu(irq_regs);
19}
20
21static inline struct pt_regs *set_irq_regs(struct pt_regs *new_regs)
22{
23 struct pt_regs *old_regs;
24
25 old_regs = get_irq_regs();
26 x86_write_percpu(irq_regs, new_regs);
27
28 return old_regs;
29}
30
31#endif /* _ASM_X86_IRQ_REGS_32_H */
diff --git a/arch/x86/include/asm/irq_regs_64.h b/arch/x86/include/asm/irq_regs_64.h
deleted file mode 100644
index 3dd9c0b70270..000000000000
--- a/arch/x86/include/asm/irq_regs_64.h
+++ /dev/null
@@ -1 +0,0 @@
1#include <asm-generic/irq_regs.h>
diff --git a/arch/x86/include/asm/irq_vectors.h b/arch/x86/include/asm/irq_vectors.h
index f7ff65032b9d..9a83a10a5d51 100644
--- a/arch/x86/include/asm/irq_vectors.h
+++ b/arch/x86/include/asm/irq_vectors.h
@@ -49,31 +49,33 @@
49 * some of the following vectors are 'rare', they are merged 49 * some of the following vectors are 'rare', they are merged
50 * into a single vector (CALL_FUNCTION_VECTOR) to save vector space. 50 * into a single vector (CALL_FUNCTION_VECTOR) to save vector space.
51 * TLB, reschedule and local APIC vectors are performance-critical. 51 * TLB, reschedule and local APIC vectors are performance-critical.
52 *
53 * Vectors 0xf0-0xfa are free (reserved for future Linux use).
54 */ 52 */
55#ifdef CONFIG_X86_32 53#ifdef CONFIG_X86_32
56 54
57# define SPURIOUS_APIC_VECTOR 0xff 55# define SPURIOUS_APIC_VECTOR 0xff
58# define ERROR_APIC_VECTOR 0xfe 56# define ERROR_APIC_VECTOR 0xfe
59# define INVALIDATE_TLB_VECTOR 0xfd 57# define RESCHEDULE_VECTOR 0xfd
60# define RESCHEDULE_VECTOR 0xfc 58# define CALL_FUNCTION_VECTOR 0xfc
61# define CALL_FUNCTION_VECTOR 0xfb 59# define CALL_FUNCTION_SINGLE_VECTOR 0xfb
62# define CALL_FUNCTION_SINGLE_VECTOR 0xfa 60# define THERMAL_APIC_VECTOR 0xfa
63# define THERMAL_APIC_VECTOR 0xf0 61/* 0xf8 - 0xf9 : free */
62# define INVALIDATE_TLB_VECTOR_END 0xf7
63# define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */
64
65# define NUM_INVALIDATE_TLB_VECTORS 8
64 66
65#else 67#else
66 68
67#define SPURIOUS_APIC_VECTOR 0xff 69# define SPURIOUS_APIC_VECTOR 0xff
68#define ERROR_APIC_VECTOR 0xfe 70# define ERROR_APIC_VECTOR 0xfe
69#define RESCHEDULE_VECTOR 0xfd 71# define RESCHEDULE_VECTOR 0xfd
70#define CALL_FUNCTION_VECTOR 0xfc 72# define CALL_FUNCTION_VECTOR 0xfc
71#define CALL_FUNCTION_SINGLE_VECTOR 0xfb 73# define CALL_FUNCTION_SINGLE_VECTOR 0xfb
72#define THERMAL_APIC_VECTOR 0xfa 74# define THERMAL_APIC_VECTOR 0xfa
73#define THRESHOLD_APIC_VECTOR 0xf9 75# define THRESHOLD_APIC_VECTOR 0xf9
74#define UV_BAU_MESSAGE 0xf8 76# define UV_BAU_MESSAGE 0xf8
75#define INVALIDATE_TLB_VECTOR_END 0xf7 77# define INVALIDATE_TLB_VECTOR_END 0xf7
76#define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */ 78# define INVALIDATE_TLB_VECTOR_START 0xf0 /* f0-f7 used for TLB flush */
77 79
78#define NUM_INVALIDATE_TLB_VECTORS 8 80#define NUM_INVALIDATE_TLB_VECTORS 8
79 81
@@ -105,6 +107,8 @@
105 107
106#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER) 108#if defined(CONFIG_X86_IO_APIC) && !defined(CONFIG_X86_VOYAGER)
107 109
110#include <asm/apicnum.h> /* need MAX_IO_APICS */
111
108#ifndef CONFIG_SPARSE_IRQ 112#ifndef CONFIG_SPARSE_IRQ
109# if NR_CPUS < MAX_IO_APICS 113# if NR_CPUS < MAX_IO_APICS
110# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS)) 114# define NR_IRQS (NR_VECTORS + (32 * NR_CPUS))
@@ -112,11 +116,12 @@
112# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) 116# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS))
113# endif 117# endif
114#else 118#else
115# if (8 * NR_CPUS) > (32 * MAX_IO_APICS) 119
116# define NR_IRQS (NR_VECTORS + (8 * NR_CPUS)) 120# define NR_IRQS \
117# else 121 ((8 * NR_CPUS) > (32 * MAX_IO_APICS) ? \
118# define NR_IRQS (NR_VECTORS + (32 * MAX_IO_APICS)) 122 (NR_VECTORS + (8 * NR_CPUS)) : \
119# endif 123 (NR_VECTORS + (32 * MAX_IO_APICS))) \
124
120#endif 125#endif
121 126
122#elif defined(CONFIG_X86_VOYAGER) 127#elif defined(CONFIG_X86_VOYAGER)
diff --git a/arch/x86/include/asm/mach-default/entry_arch.h b/arch/x86/include/asm/mach-default/entry_arch.h
index 6b1add8e31dd..6fa399ad1de2 100644
--- a/arch/x86/include/asm/mach-default/entry_arch.h
+++ b/arch/x86/include/asm/mach-default/entry_arch.h
@@ -11,10 +11,26 @@
11 */ 11 */
12#ifdef CONFIG_X86_SMP 12#ifdef CONFIG_X86_SMP
13BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR) 13BUILD_INTERRUPT(reschedule_interrupt,RESCHEDULE_VECTOR)
14BUILD_INTERRUPT(invalidate_interrupt,INVALIDATE_TLB_VECTOR)
15BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) 14BUILD_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR)
16BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR) 15BUILD_INTERRUPT(call_function_single_interrupt,CALL_FUNCTION_SINGLE_VECTOR)
17BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR) 16BUILD_INTERRUPT(irq_move_cleanup_interrupt,IRQ_MOVE_CLEANUP_VECTOR)
17
18BUILD_INTERRUPT3(invalidate_interrupt0,INVALIDATE_TLB_VECTOR_START+0,
19 smp_invalidate_interrupt)
20BUILD_INTERRUPT3(invalidate_interrupt1,INVALIDATE_TLB_VECTOR_START+1,
21 smp_invalidate_interrupt)
22BUILD_INTERRUPT3(invalidate_interrupt2,INVALIDATE_TLB_VECTOR_START+2,
23 smp_invalidate_interrupt)
24BUILD_INTERRUPT3(invalidate_interrupt3,INVALIDATE_TLB_VECTOR_START+3,
25 smp_invalidate_interrupt)
26BUILD_INTERRUPT3(invalidate_interrupt4,INVALIDATE_TLB_VECTOR_START+4,
27 smp_invalidate_interrupt)
28BUILD_INTERRUPT3(invalidate_interrupt5,INVALIDATE_TLB_VECTOR_START+5,
29 smp_invalidate_interrupt)
30BUILD_INTERRUPT3(invalidate_interrupt6,INVALIDATE_TLB_VECTOR_START+6,
31 smp_invalidate_interrupt)
32BUILD_INTERRUPT3(invalidate_interrupt7,INVALIDATE_TLB_VECTOR_START+7,
33 smp_invalidate_interrupt)
18#endif 34#endif
19 35
20/* 36/*
diff --git a/arch/x86/include/asm/mach-default/mach_wakecpu.h b/arch/x86/include/asm/mach-default/mach_wakecpu.h
index ceb013660146..89897a6a65b9 100644
--- a/arch/x86/include/asm/mach-default/mach_wakecpu.h
+++ b/arch/x86/include/asm/mach-default/mach_wakecpu.h
@@ -24,7 +24,13 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
24{ 24{
25} 25}
26 26
27#ifdef CONFIG_SMP
27extern void __inquire_remote_apic(int apicid); 28extern void __inquire_remote_apic(int apicid);
29#else /* CONFIG_SMP */
30static inline void __inquire_remote_apic(int apicid)
31{
32}
33#endif /* CONFIG_SMP */
28 34
29static inline void inquire_remote_apic(int apicid) 35static inline void inquire_remote_apic(int apicid)
30{ 36{
diff --git a/arch/x86/include/asm/mach-rdc321x/gpio.h b/arch/x86/include/asm/mach-rdc321x/gpio.h
deleted file mode 100644
index c210ab5788b0..000000000000
--- a/arch/x86/include/asm/mach-rdc321x/gpio.h
+++ /dev/null
@@ -1,60 +0,0 @@
1#ifndef _ASM_X86_MACH_RDC321X_GPIO_H
2#define _ASM_X86_MACH_RDC321X_GPIO_H
3
4#include <linux/kernel.h>
5
6extern int rdc_gpio_get_value(unsigned gpio);
7extern void rdc_gpio_set_value(unsigned gpio, int value);
8extern int rdc_gpio_direction_input(unsigned gpio);
9extern int rdc_gpio_direction_output(unsigned gpio, int value);
10extern int rdc_gpio_request(unsigned gpio, const char *label);
11extern void rdc_gpio_free(unsigned gpio);
12extern void __init rdc321x_gpio_setup(void);
13
14/* Wrappers for the arch-neutral GPIO API */
15
16static inline int gpio_request(unsigned gpio, const char *label)
17{
18 return rdc_gpio_request(gpio, label);
19}
20
21static inline void gpio_free(unsigned gpio)
22{
23 might_sleep();
24 rdc_gpio_free(gpio);
25}
26
27static inline int gpio_direction_input(unsigned gpio)
28{
29 return rdc_gpio_direction_input(gpio);
30}
31
32static inline int gpio_direction_output(unsigned gpio, int value)
33{
34 return rdc_gpio_direction_output(gpio, value);
35}
36
37static inline int gpio_get_value(unsigned gpio)
38{
39 return rdc_gpio_get_value(gpio);
40}
41
42static inline void gpio_set_value(unsigned gpio, int value)
43{
44 rdc_gpio_set_value(gpio, value);
45}
46
47static inline int gpio_to_irq(unsigned gpio)
48{
49 return gpio;
50}
51
52static inline int irq_to_gpio(unsigned irq)
53{
54 return irq;
55}
56
57/* For cansleep */
58#include <asm-generic/gpio.h>
59
60#endif /* _ASM_X86_MACH_RDC321X_GPIO_H */
diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 8aeeb3fd73db..52948df9cd1d 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -21,11 +21,54 @@ static inline void paravirt_activate_mm(struct mm_struct *prev,
21int init_new_context(struct task_struct *tsk, struct mm_struct *mm); 21int init_new_context(struct task_struct *tsk, struct mm_struct *mm);
22void destroy_context(struct mm_struct *mm); 22void destroy_context(struct mm_struct *mm);
23 23
24#ifdef CONFIG_X86_32 24
25# include "mmu_context_32.h" 25static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
26#else 26{
27# include "mmu_context_64.h" 27#ifdef CONFIG_SMP
28 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
29 percpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
30#endif
31}
32
33static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
34 struct task_struct *tsk)
35{
36 unsigned cpu = smp_processor_id();
37
38 if (likely(prev != next)) {
39 /* stop flush ipis for the previous mm */
40 cpu_clear(cpu, prev->cpu_vm_mask);
41#ifdef CONFIG_SMP
42 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
43 percpu_write(cpu_tlbstate.active_mm, next);
28#endif 44#endif
45 cpu_set(cpu, next->cpu_vm_mask);
46
47 /* Re-load page tables */
48 load_cr3(next->pgd);
49
50 /*
51 * load the LDT, if the LDT is different:
52 */
53 if (unlikely(prev->context.ldt != next->context.ldt))
54 load_LDT_nolock(&next->context);
55 }
56#ifdef CONFIG_SMP
57 else {
58 percpu_write(cpu_tlbstate.state, TLBSTATE_OK);
59 BUG_ON(percpu_read(cpu_tlbstate.active_mm) != next);
60
61 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
62 /* We were in lazy tlb mode and leave_mm disabled
63 * tlb flush IPI delivery. We must reload CR3
64 * to make sure to use no freed page tables.
65 */
66 load_cr3(next->pgd);
67 load_LDT_nolock(&next->context);
68 }
69 }
70#endif
71}
29 72
30#define activate_mm(prev, next) \ 73#define activate_mm(prev, next) \
31do { \ 74do { \
@@ -33,5 +76,17 @@ do { \
33 switch_mm((prev), (next), NULL); \ 76 switch_mm((prev), (next), NULL); \
34} while (0); 77} while (0);
35 78
79#ifdef CONFIG_X86_32
80#define deactivate_mm(tsk, mm) \
81do { \
82 loadsegment(gs, 0); \
83} while (0)
84#else
85#define deactivate_mm(tsk, mm) \
86do { \
87 load_gs_index(0); \
88 loadsegment(fs, 0); \
89} while (0)
90#endif
36 91
37#endif /* _ASM_X86_MMU_CONTEXT_H */ 92#endif /* _ASM_X86_MMU_CONTEXT_H */
diff --git a/arch/x86/include/asm/mmu_context_32.h b/arch/x86/include/asm/mmu_context_32.h
deleted file mode 100644
index 7e98ce1d2c0e..000000000000
--- a/arch/x86/include/asm/mmu_context_32.h
+++ /dev/null
@@ -1,55 +0,0 @@
1#ifndef _ASM_X86_MMU_CONTEXT_32_H
2#define _ASM_X86_MMU_CONTEXT_32_H
3
4static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
5{
6#ifdef CONFIG_SMP
7 if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK)
8 x86_write_percpu(cpu_tlbstate.state, TLBSTATE_LAZY);
9#endif
10}
11
12static inline void switch_mm(struct mm_struct *prev,
13 struct mm_struct *next,
14 struct task_struct *tsk)
15{
16 int cpu = smp_processor_id();
17
18 if (likely(prev != next)) {
19 /* stop flush ipis for the previous mm */
20 cpu_clear(cpu, prev->cpu_vm_mask);
21#ifdef CONFIG_SMP
22 x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK);
23 x86_write_percpu(cpu_tlbstate.active_mm, next);
24#endif
25 cpu_set(cpu, next->cpu_vm_mask);
26
27 /* Re-load page tables */
28 load_cr3(next->pgd);
29
30 /*
31 * load the LDT, if the LDT is different:
32 */
33 if (unlikely(prev->context.ldt != next->context.ldt))
34 load_LDT_nolock(&next->context);
35 }
36#ifdef CONFIG_SMP
37 else {
38 x86_write_percpu(cpu_tlbstate.state, TLBSTATE_OK);
39 BUG_ON(x86_read_percpu(cpu_tlbstate.active_mm) != next);
40
41 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
42 /* We were in lazy tlb mode and leave_mm disabled
43 * tlb flush IPI delivery. We must reload %cr3.
44 */
45 load_cr3(next->pgd);
46 load_LDT_nolock(&next->context);
47 }
48 }
49#endif
50}
51
52#define deactivate_mm(tsk, mm) \
53 asm("movl %0,%%gs": :"r" (0));
54
55#endif /* _ASM_X86_MMU_CONTEXT_32_H */
diff --git a/arch/x86/include/asm/mmu_context_64.h b/arch/x86/include/asm/mmu_context_64.h
deleted file mode 100644
index 677d36e9540a..000000000000
--- a/arch/x86/include/asm/mmu_context_64.h
+++ /dev/null
@@ -1,54 +0,0 @@
1#ifndef _ASM_X86_MMU_CONTEXT_64_H
2#define _ASM_X86_MMU_CONTEXT_64_H
3
4#include <asm/pda.h>
5
6static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
7{
8#ifdef CONFIG_SMP
9 if (read_pda(mmu_state) == TLBSTATE_OK)
10 write_pda(mmu_state, TLBSTATE_LAZY);
11#endif
12}
13
14static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
15 struct task_struct *tsk)
16{
17 unsigned cpu = smp_processor_id();
18 if (likely(prev != next)) {
19 /* stop flush ipis for the previous mm */
20 cpu_clear(cpu, prev->cpu_vm_mask);
21#ifdef CONFIG_SMP
22 write_pda(mmu_state, TLBSTATE_OK);
23 write_pda(active_mm, next);
24#endif
25 cpu_set(cpu, next->cpu_vm_mask);
26 load_cr3(next->pgd);
27
28 if (unlikely(next->context.ldt != prev->context.ldt))
29 load_LDT_nolock(&next->context);
30 }
31#ifdef CONFIG_SMP
32 else {
33 write_pda(mmu_state, TLBSTATE_OK);
34 if (read_pda(active_mm) != next)
35 BUG();
36 if (!cpu_test_and_set(cpu, next->cpu_vm_mask)) {
37 /* We were in lazy tlb mode and leave_mm disabled
38 * tlb flush IPI delivery. We must reload CR3
39 * to make sure to use no freed page tables.
40 */
41 load_cr3(next->pgd);
42 load_LDT_nolock(&next->context);
43 }
44 }
45#endif
46}
47
48#define deactivate_mm(tsk, mm) \
49do { \
50 load_gs_index(0); \
51 asm volatile("movl %0,%%fs"::"r"(0)); \
52} while (0)
53
54#endif /* _ASM_X86_MMU_CONTEXT_64_H */
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index cb58643947b9..358acc59ae04 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -202,6 +202,35 @@
202#define MSR_IA32_THERM_STATUS 0x0000019c 202#define MSR_IA32_THERM_STATUS 0x0000019c
203#define MSR_IA32_MISC_ENABLE 0x000001a0 203#define MSR_IA32_MISC_ENABLE 0x000001a0
204 204
205/* MISC_ENABLE bits: architectural */
206#define MSR_IA32_MISC_ENABLE_FAST_STRING (1ULL << 0)
207#define MSR_IA32_MISC_ENABLE_TCC (1ULL << 1)
208#define MSR_IA32_MISC_ENABLE_EMON (1ULL << 7)
209#define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL (1ULL << 11)
210#define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1ULL << 12)
211#define MSR_IA32_MISC_ENABLE_ENHANCED_SPEEDSTEP (1ULL << 16)
212#define MSR_IA32_MISC_ENABLE_MWAIT (1ULL << 18)
213#define MSR_IA32_MISC_ENABLE_LIMIT_CPUID (1ULL << 22)
214#define MSR_IA32_MISC_ENABLE_XTPR_DISABLE (1ULL << 23)
215#define MSR_IA32_MISC_ENABLE_XD_DISABLE (1ULL << 34)
216
217/* MISC_ENABLE bits: model-specific, meaning may vary from core to core */
218#define MSR_IA32_MISC_ENABLE_X87_COMPAT (1ULL << 2)
219#define MSR_IA32_MISC_ENABLE_TM1 (1ULL << 3)
220#define MSR_IA32_MISC_ENABLE_SPLIT_LOCK_DISABLE (1ULL << 4)
221#define MSR_IA32_MISC_ENABLE_L3CACHE_DISABLE (1ULL << 6)
222#define MSR_IA32_MISC_ENABLE_SUPPRESS_LOCK (1ULL << 8)
223#define MSR_IA32_MISC_ENABLE_PREFETCH_DISABLE (1ULL << 9)
224#define MSR_IA32_MISC_ENABLE_FERR (1ULL << 10)
225#define MSR_IA32_MISC_ENABLE_FERR_MULTIPLEX (1ULL << 10)
226#define MSR_IA32_MISC_ENABLE_TM2 (1ULL << 13)
227#define MSR_IA32_MISC_ENABLE_ADJ_PREF_DISABLE (1ULL << 19)
228#define MSR_IA32_MISC_ENABLE_SPEEDSTEP_LOCK (1ULL << 20)
229#define MSR_IA32_MISC_ENABLE_L1D_CONTEXT (1ULL << 24)
230#define MSR_IA32_MISC_ENABLE_DCU_PREF_DISABLE (1ULL << 37)
231#define MSR_IA32_MISC_ENABLE_TURBO_DISABLE (1ULL << 38)
232#define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << 39)
233
205/* Intel Model 6 */ 234/* Intel Model 6 */
206#define MSR_P6_EVNTSEL0 0x00000186 235#define MSR_P6_EVNTSEL0 0x00000186
207#define MSR_P6_EVNTSEL1 0x00000187 236#define MSR_P6_EVNTSEL1 0x00000187
diff --git a/arch/x86/include/asm/mtrr.h b/arch/x86/include/asm/mtrr.h
index cb988aab716d..14080d22edb3 100644
--- a/arch/x86/include/asm/mtrr.h
+++ b/arch/x86/include/asm/mtrr.h
@@ -58,15 +58,15 @@ struct mtrr_gentry {
58#endif /* !__i386__ */ 58#endif /* !__i386__ */
59 59
60struct mtrr_var_range { 60struct mtrr_var_range {
61 u32 base_lo; 61 __u32 base_lo;
62 u32 base_hi; 62 __u32 base_hi;
63 u32 mask_lo; 63 __u32 mask_lo;
64 u32 mask_hi; 64 __u32 mask_hi;
65}; 65};
66 66
67/* In the Intel processor's MTRR interface, the MTRR type is always held in 67/* In the Intel processor's MTRR interface, the MTRR type is always held in
68 an 8 bit field: */ 68 an 8 bit field: */
69typedef u8 mtrr_type; 69typedef __u8 mtrr_type;
70 70
71#define MTRR_NUM_FIXED_RANGES 88 71#define MTRR_NUM_FIXED_RANGES 88
72#define MTRR_MAX_VAR_RANGES 256 72#define MTRR_MAX_VAR_RANGES 256
diff --git a/arch/x86/include/asm/page.h b/arch/x86/include/asm/page.h
index e9873a2e8695..6b9810859daf 100644
--- a/arch/x86/include/asm/page.h
+++ b/arch/x86/include/asm/page.h
@@ -147,7 +147,7 @@ static inline pteval_t native_pte_val(pte_t pte)
147 return pte.pte; 147 return pte.pte;
148} 148}
149 149
150static inline pteval_t native_pte_flags(pte_t pte) 150static inline pteval_t pte_flags(pte_t pte)
151{ 151{
152 return native_pte_val(pte) & PTE_FLAGS_MASK; 152 return native_pte_val(pte) & PTE_FLAGS_MASK;
153} 153}
@@ -173,7 +173,6 @@ static inline pteval_t native_pte_flags(pte_t pte)
173#endif 173#endif
174 174
175#define pte_val(x) native_pte_val(x) 175#define pte_val(x) native_pte_val(x)
176#define pte_flags(x) native_pte_flags(x)
177#define __pte(x) native_make_pte(x) 176#define __pte(x) native_make_pte(x)
178 177
179#endif /* CONFIG_PARAVIRT */ 178#endif /* CONFIG_PARAVIRT */
diff --git a/arch/x86/include/asm/page_64.h b/arch/x86/include/asm/page_64.h
index 5ebca29f44f0..e27fdbe5f9e4 100644
--- a/arch/x86/include/asm/page_64.h
+++ b/arch/x86/include/asm/page_64.h
@@ -13,8 +13,8 @@
13#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1) 13#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
14#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER) 14#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
15 15
16#define IRQSTACK_ORDER 2 16#define IRQ_STACK_ORDER 2
17#define IRQSTACKSIZE (PAGE_SIZE << IRQSTACK_ORDER) 17#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
18 18
19#define STACKFAULT_STACK 1 19#define STACKFAULT_STACK 1
20#define DOUBLEFAULT_STACK 2 20#define DOUBLEFAULT_STACK 2
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h
index 32bc6c2c1386..7e674ea80f0d 100644
--- a/arch/x86/include/asm/paravirt.h
+++ b/arch/x86/include/asm/paravirt.h
@@ -244,7 +244,8 @@ struct pv_mmu_ops {
244 void (*flush_tlb_user)(void); 244 void (*flush_tlb_user)(void);
245 void (*flush_tlb_kernel)(void); 245 void (*flush_tlb_kernel)(void);
246 void (*flush_tlb_single)(unsigned long addr); 246 void (*flush_tlb_single)(unsigned long addr);
247 void (*flush_tlb_others)(const cpumask_t *cpus, struct mm_struct *mm, 247 void (*flush_tlb_others)(const struct cpumask *cpus,
248 struct mm_struct *mm,
248 unsigned long va); 249 unsigned long va);
249 250
250 /* Hooks for allocating and freeing a pagetable top-level */ 251 /* Hooks for allocating and freeing a pagetable top-level */
@@ -279,7 +280,6 @@ struct pv_mmu_ops {
279 pte_t *ptep, pte_t pte); 280 pte_t *ptep, pte_t pte);
280 281
281 pteval_t (*pte_val)(pte_t); 282 pteval_t (*pte_val)(pte_t);
282 pteval_t (*pte_flags)(pte_t);
283 pte_t (*make_pte)(pteval_t pte); 283 pte_t (*make_pte)(pteval_t pte);
284 284
285 pgdval_t (*pgd_val)(pgd_t); 285 pgdval_t (*pgd_val)(pgd_t);
@@ -984,10 +984,11 @@ static inline void __flush_tlb_single(unsigned long addr)
984 PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr); 984 PVOP_VCALL1(pv_mmu_ops.flush_tlb_single, addr);
985} 985}
986 986
987static inline void flush_tlb_others(cpumask_t cpumask, struct mm_struct *mm, 987static inline void flush_tlb_others(const struct cpumask *cpumask,
988 struct mm_struct *mm,
988 unsigned long va) 989 unsigned long va)
989{ 990{
990 PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, &cpumask, mm, va); 991 PVOP_VCALL3(pv_mmu_ops.flush_tlb_others, cpumask, mm, va);
991} 992}
992 993
993static inline int paravirt_pgd_alloc(struct mm_struct *mm) 994static inline int paravirt_pgd_alloc(struct mm_struct *mm)
@@ -1084,23 +1085,6 @@ static inline pteval_t pte_val(pte_t pte)
1084 return ret; 1085 return ret;
1085} 1086}
1086 1087
1087static inline pteval_t pte_flags(pte_t pte)
1088{
1089 pteval_t ret;
1090
1091 if (sizeof(pteval_t) > sizeof(long))
1092 ret = PVOP_CALL2(pteval_t, pv_mmu_ops.pte_flags,
1093 pte.pte, (u64)pte.pte >> 32);
1094 else
1095 ret = PVOP_CALL1(pteval_t, pv_mmu_ops.pte_flags,
1096 pte.pte);
1097
1098#ifdef CONFIG_PARAVIRT_DEBUG
1099 BUG_ON(ret & PTE_PFN_MASK);
1100#endif
1101 return ret;
1102}
1103
1104static inline pgd_t __pgd(pgdval_t val) 1088static inline pgd_t __pgd(pgdval_t val)
1105{ 1089{
1106 pgdval_t ret; 1090 pgdval_t ret;
diff --git a/arch/x86/include/asm/pat.h b/arch/x86/include/asm/pat.h
index b8493b3b9890..9709fdff6615 100644
--- a/arch/x86/include/asm/pat.h
+++ b/arch/x86/include/asm/pat.h
@@ -5,10 +5,8 @@
5 5
6#ifdef CONFIG_X86_PAT 6#ifdef CONFIG_X86_PAT
7extern int pat_enabled; 7extern int pat_enabled;
8extern void validate_pat_support(struct cpuinfo_x86 *c);
9#else 8#else
10static const int pat_enabled; 9static const int pat_enabled;
11static inline void validate_pat_support(struct cpuinfo_x86 *c) { }
12#endif 10#endif
13 11
14extern void pat_init(void); 12extern void pat_init(void);
@@ -17,6 +15,4 @@ extern int reserve_memtype(u64 start, u64 end,
17 unsigned long req_type, unsigned long *ret_type); 15 unsigned long req_type, unsigned long *ret_type);
18extern int free_memtype(u64 start, u64 end); 16extern int free_memtype(u64 start, u64 end);
19 17
20extern void pat_disable(char *reason);
21
22#endif /* _ASM_X86_PAT_H */ 18#endif /* _ASM_X86_PAT_H */
diff --git a/arch/x86/include/asm/pda.h b/arch/x86/include/asm/pda.h
deleted file mode 100644
index 2fbfff88df37..000000000000
--- a/arch/x86/include/asm/pda.h
+++ /dev/null
@@ -1,137 +0,0 @@
1#ifndef _ASM_X86_PDA_H
2#define _ASM_X86_PDA_H
3
4#ifndef __ASSEMBLY__
5#include <linux/stddef.h>
6#include <linux/types.h>
7#include <linux/cache.h>
8#include <asm/page.h>
9
10/* Per processor datastructure. %gs points to it while the kernel runs */
11struct x8664_pda {
12 struct task_struct *pcurrent; /* 0 Current process */
13 unsigned long data_offset; /* 8 Per cpu data offset from linker
14 address */
15 unsigned long kernelstack; /* 16 top of kernel stack for current */
16 unsigned long oldrsp; /* 24 user rsp for system call */
17 int irqcount; /* 32 Irq nesting counter. Starts -1 */
18 unsigned int cpunumber; /* 36 Logical CPU number */
19#ifdef CONFIG_CC_STACKPROTECTOR
20 unsigned long stack_canary; /* 40 stack canary value */
21 /* gcc-ABI: this canary MUST be at
22 offset 40!!! */
23#endif
24 char *irqstackptr;
25 short nodenumber; /* number of current node (32k max) */
26 short in_bootmem; /* pda lives in bootmem */
27 unsigned int __softirq_pending;
28 unsigned int __nmi_count; /* number of NMI on this CPUs */
29 short mmu_state;
30 short isidle;
31 struct mm_struct *active_mm;
32 unsigned apic_timer_irqs;
33 unsigned irq0_irqs;
34 unsigned irq_resched_count;
35 unsigned irq_call_count;
36 unsigned irq_tlb_count;
37 unsigned irq_thermal_count;
38 unsigned irq_threshold_count;
39 unsigned irq_spurious_count;
40} ____cacheline_aligned_in_smp;
41
42extern struct x8664_pda **_cpu_pda;
43extern void pda_init(int);
44
45#define cpu_pda(i) (_cpu_pda[i])
46
47/*
48 * There is no fast way to get the base address of the PDA, all the accesses
49 * have to mention %fs/%gs. So it needs to be done this Torvaldian way.
50 */
51extern void __bad_pda_field(void) __attribute__((noreturn));
52
53/*
54 * proxy_pda doesn't actually exist, but tell gcc it is accessed for
55 * all PDA accesses so it gets read/write dependencies right.
56 */
57extern struct x8664_pda _proxy_pda;
58
59#define pda_offset(field) offsetof(struct x8664_pda, field)
60
61#define pda_to_op(op, field, val) \
62do { \
63 typedef typeof(_proxy_pda.field) T__; \
64 if (0) { T__ tmp__; tmp__ = (val); } /* type checking */ \
65 switch (sizeof(_proxy_pda.field)) { \
66 case 2: \
67 asm(op "w %1,%%gs:%c2" : \
68 "+m" (_proxy_pda.field) : \
69 "ri" ((T__)val), \
70 "i"(pda_offset(field))); \
71 break; \
72 case 4: \
73 asm(op "l %1,%%gs:%c2" : \
74 "+m" (_proxy_pda.field) : \
75 "ri" ((T__)val), \
76 "i" (pda_offset(field))); \
77 break; \
78 case 8: \
79 asm(op "q %1,%%gs:%c2": \
80 "+m" (_proxy_pda.field) : \
81 "ri" ((T__)val), \
82 "i"(pda_offset(field))); \
83 break; \
84 default: \
85 __bad_pda_field(); \
86 } \
87} while (0)
88
89#define pda_from_op(op, field) \
90({ \
91 typeof(_proxy_pda.field) ret__; \
92 switch (sizeof(_proxy_pda.field)) { \
93 case 2: \
94 asm(op "w %%gs:%c1,%0" : \
95 "=r" (ret__) : \
96 "i" (pda_offset(field)), \
97 "m" (_proxy_pda.field)); \
98 break; \
99 case 4: \
100 asm(op "l %%gs:%c1,%0": \
101 "=r" (ret__): \
102 "i" (pda_offset(field)), \
103 "m" (_proxy_pda.field)); \
104 break; \
105 case 8: \
106 asm(op "q %%gs:%c1,%0": \
107 "=r" (ret__) : \
108 "i" (pda_offset(field)), \
109 "m" (_proxy_pda.field)); \
110 break; \
111 default: \
112 __bad_pda_field(); \
113 } \
114 ret__; \
115})
116
117#define read_pda(field) pda_from_op("mov", field)
118#define write_pda(field, val) pda_to_op("mov", field, val)
119#define add_pda(field, val) pda_to_op("add", field, val)
120#define sub_pda(field, val) pda_to_op("sub", field, val)
121#define or_pda(field, val) pda_to_op("or", field, val)
122
123/* This is not atomic against other CPUs -- CPU preemption needs to be off */
124#define test_and_clear_bit_pda(bit, field) \
125({ \
126 int old__; \
127 asm volatile("btr %2,%%gs:%c3\n\tsbbl %0,%0" \
128 : "=r" (old__), "+m" (_proxy_pda.field) \
129 : "dIr" (bit), "i" (pda_offset(field)) : "memory");\
130 old__; \
131})
132
133#endif
134
135#define PDA_STACKOFFSET (5*8)
136
137#endif /* _ASM_X86_PDA_H */
diff --git a/arch/x86/include/asm/percpu.h b/arch/x86/include/asm/percpu.h
index ece72053ba63..0b64af4f13ac 100644
--- a/arch/x86/include/asm/percpu.h
+++ b/arch/x86/include/asm/percpu.h
@@ -2,53 +2,12 @@
2#define _ASM_X86_PERCPU_H 2#define _ASM_X86_PERCPU_H
3 3
4#ifdef CONFIG_X86_64 4#ifdef CONFIG_X86_64
5#include <linux/compiler.h> 5#define __percpu_seg gs
6 6#define __percpu_mov_op movq
7/* Same as asm-generic/percpu.h, except that we store the per cpu offset 7#else
8 in the PDA. Longer term the PDA and every per cpu variable 8#define __percpu_seg fs
9 should be just put into a single section and referenced directly 9#define __percpu_mov_op movl
10 from %gs */
11
12#ifdef CONFIG_SMP
13#include <asm/pda.h>
14
15#define __per_cpu_offset(cpu) (cpu_pda(cpu)->data_offset)
16#define __my_cpu_offset read_pda(data_offset)
17
18#define per_cpu_offset(x) (__per_cpu_offset(x))
19
20#endif 10#endif
21#include <asm-generic/percpu.h>
22
23DECLARE_PER_CPU(struct x8664_pda, pda);
24
25/*
26 * These are supposed to be implemented as a single instruction which
27 * operates on the per-cpu data base segment. x86-64 doesn't have
28 * that yet, so this is a fairly inefficient workaround for the
29 * meantime. The single instruction is atomic with respect to
30 * preemption and interrupts, so we need to explicitly disable
31 * interrupts here to achieve the same effect. However, because it
32 * can be used from within interrupt-disable/enable, we can't actually
33 * disable interrupts; disabling preemption is enough.
34 */
35#define x86_read_percpu(var) \
36 ({ \
37 typeof(per_cpu_var(var)) __tmp; \
38 preempt_disable(); \
39 __tmp = __get_cpu_var(var); \
40 preempt_enable(); \
41 __tmp; \
42 })
43
44#define x86_write_percpu(var, val) \
45 do { \
46 preempt_disable(); \
47 __get_cpu_var(var) = (val); \
48 preempt_enable(); \
49 } while(0)
50
51#else /* CONFIG_X86_64 */
52 11
53#ifdef __ASSEMBLY__ 12#ifdef __ASSEMBLY__
54 13
@@ -65,47 +24,26 @@ DECLARE_PER_CPU(struct x8664_pda, pda);
65 * PER_CPU(cpu_gdt_descr, %ebx) 24 * PER_CPU(cpu_gdt_descr, %ebx)
66 */ 25 */
67#ifdef CONFIG_SMP 26#ifdef CONFIG_SMP
68#define PER_CPU(var, reg) \ 27#define PER_CPU(var, reg) \
69 movl %fs:per_cpu__##this_cpu_off, reg; \ 28 __percpu_mov_op %__percpu_seg:per_cpu__this_cpu_off, reg; \
70 lea per_cpu__##var(reg), reg 29 lea per_cpu__##var(reg), reg
71#define PER_CPU_VAR(var) %fs:per_cpu__##var 30#define PER_CPU_VAR(var) %__percpu_seg:per_cpu__##var
72#else /* ! SMP */ 31#else /* ! SMP */
73#define PER_CPU(var, reg) \ 32#define PER_CPU(var, reg) \
74 movl $per_cpu__##var, reg 33 __percpu_mov_op $per_cpu__##var, reg
75#define PER_CPU_VAR(var) per_cpu__##var 34#define PER_CPU_VAR(var) per_cpu__##var
76#endif /* SMP */ 35#endif /* SMP */
77 36
78#else /* ...!ASSEMBLY */ 37#else /* ...!ASSEMBLY */
79 38
80/* 39#include <linux/stringify.h>
81 * PER_CPU finds an address of a per-cpu variable.
82 *
83 * Args:
84 * var - variable name
85 * cpu - 32bit register containing the current CPU number
86 *
87 * The resulting address is stored in the "cpu" argument.
88 *
89 * Example:
90 * PER_CPU(cpu_gdt_descr, %ebx)
91 */
92#ifdef CONFIG_SMP
93
94#define __my_cpu_offset x86_read_percpu(this_cpu_off)
95 40
96/* fs segment starts at (positive) offset == __per_cpu_offset[cpu] */ 41#ifdef CONFIG_SMP
97#define __percpu_seg "%%fs:" 42#define __percpu_arg(x) "%%"__stringify(__percpu_seg)":%P" #x
98 43#define __my_cpu_offset percpu_read(this_cpu_off)
99#else /* !SMP */ 44#else
100 45#define __percpu_arg(x) "%" #x
101#define __percpu_seg "" 46#endif
102
103#endif /* SMP */
104
105#include <asm-generic/percpu.h>
106
107/* We can use this directly for local CPU (faster). */
108DECLARE_PER_CPU(unsigned long, this_cpu_off);
109 47
110/* For arch-specific code, we can use direct single-insn ops (they 48/* For arch-specific code, we can use direct single-insn ops (they
111 * don't give an lvalue though). */ 49 * don't give an lvalue though). */
@@ -120,20 +58,25 @@ do { \
120 } \ 58 } \
121 switch (sizeof(var)) { \ 59 switch (sizeof(var)) { \
122 case 1: \ 60 case 1: \
123 asm(op "b %1,"__percpu_seg"%0" \ 61 asm(op "b %1,"__percpu_arg(0) \
124 : "+m" (var) \ 62 : "+m" (var) \
125 : "ri" ((T__)val)); \ 63 : "ri" ((T__)val)); \
126 break; \ 64 break; \
127 case 2: \ 65 case 2: \
128 asm(op "w %1,"__percpu_seg"%0" \ 66 asm(op "w %1,"__percpu_arg(0) \
129 : "+m" (var) \ 67 : "+m" (var) \
130 : "ri" ((T__)val)); \ 68 : "ri" ((T__)val)); \
131 break; \ 69 break; \
132 case 4: \ 70 case 4: \
133 asm(op "l %1,"__percpu_seg"%0" \ 71 asm(op "l %1,"__percpu_arg(0) \
134 : "+m" (var) \ 72 : "+m" (var) \
135 : "ri" ((T__)val)); \ 73 : "ri" ((T__)val)); \
136 break; \ 74 break; \
75 case 8: \
76 asm(op "q %1,"__percpu_arg(0) \
77 : "+m" (var) \
78 : "re" ((T__)val)); \
79 break; \
137 default: __bad_percpu_size(); \ 80 default: __bad_percpu_size(); \
138 } \ 81 } \
139} while (0) 82} while (0)
@@ -143,17 +86,22 @@ do { \
143 typeof(var) ret__; \ 86 typeof(var) ret__; \
144 switch (sizeof(var)) { \ 87 switch (sizeof(var)) { \
145 case 1: \ 88 case 1: \
146 asm(op "b "__percpu_seg"%1,%0" \ 89 asm(op "b "__percpu_arg(1)",%0" \
147 : "=r" (ret__) \ 90 : "=r" (ret__) \
148 : "m" (var)); \ 91 : "m" (var)); \
149 break; \ 92 break; \
150 case 2: \ 93 case 2: \
151 asm(op "w "__percpu_seg"%1,%0" \ 94 asm(op "w "__percpu_arg(1)",%0" \
152 : "=r" (ret__) \ 95 : "=r" (ret__) \
153 : "m" (var)); \ 96 : "m" (var)); \
154 break; \ 97 break; \
155 case 4: \ 98 case 4: \
156 asm(op "l "__percpu_seg"%1,%0" \ 99 asm(op "l "__percpu_arg(1)",%0" \
100 : "=r" (ret__) \
101 : "m" (var)); \
102 break; \
103 case 8: \
104 asm(op "q "__percpu_arg(1)",%0" \
157 : "=r" (ret__) \ 105 : "=r" (ret__) \
158 : "m" (var)); \ 106 : "m" (var)); \
159 break; \ 107 break; \
@@ -162,13 +110,30 @@ do { \
162 ret__; \ 110 ret__; \
163}) 111})
164 112
165#define x86_read_percpu(var) percpu_from_op("mov", per_cpu__##var) 113#define percpu_read(var) percpu_from_op("mov", per_cpu__##var)
166#define x86_write_percpu(var, val) percpu_to_op("mov", per_cpu__##var, val) 114#define percpu_write(var, val) percpu_to_op("mov", per_cpu__##var, val)
167#define x86_add_percpu(var, val) percpu_to_op("add", per_cpu__##var, val) 115#define percpu_add(var, val) percpu_to_op("add", per_cpu__##var, val)
168#define x86_sub_percpu(var, val) percpu_to_op("sub", per_cpu__##var, val) 116#define percpu_sub(var, val) percpu_to_op("sub", per_cpu__##var, val)
169#define x86_or_percpu(var, val) percpu_to_op("or", per_cpu__##var, val) 117#define percpu_and(var, val) percpu_to_op("and", per_cpu__##var, val)
118#define percpu_or(var, val) percpu_to_op("or", per_cpu__##var, val)
119#define percpu_xor(var, val) percpu_to_op("xor", per_cpu__##var, val)
120
121/* This is not atomic against other CPUs -- CPU preemption needs to be off */
122#define x86_test_and_clear_bit_percpu(bit, var) \
123({ \
124 int old__; \
125 asm volatile("btr %2,"__percpu_arg(1)"\n\tsbbl %0,%0" \
126 : "=r" (old__), "+m" (per_cpu__##var) \
127 : "dIr" (bit)); \
128 old__; \
129})
130
131#include <asm-generic/percpu.h>
132
133/* We can use this directly for local CPU (faster). */
134DECLARE_PER_CPU(unsigned long, this_cpu_off);
135
170#endif /* !__ASSEMBLY__ */ 136#endif /* !__ASSEMBLY__ */
171#endif /* !CONFIG_X86_64 */
172 137
173#ifdef CONFIG_SMP 138#ifdef CONFIG_SMP
174 139
@@ -195,9 +160,9 @@ do { \
195#define early_per_cpu_ptr(_name) (_name##_early_ptr) 160#define early_per_cpu_ptr(_name) (_name##_early_ptr)
196#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx]) 161#define early_per_cpu_map(_name, _idx) (_name##_early_map[_idx])
197#define early_per_cpu(_name, _cpu) \ 162#define early_per_cpu(_name, _cpu) \
198 (early_per_cpu_ptr(_name) ? \ 163 *(early_per_cpu_ptr(_name) ? \
199 early_per_cpu_ptr(_name)[_cpu] : \ 164 &early_per_cpu_ptr(_name)[_cpu] : \
200 per_cpu(_name, _cpu)) 165 &per_cpu(_name, _cpu))
201 166
202#else /* !CONFIG_SMP */ 167#else /* !CONFIG_SMP */
203#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \ 168#define DEFINE_EARLY_PER_CPU(_type, _name, _initvalue) \
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index cb7c151a8bff..dd14c54ac718 100644
--- a/arch/x86/include/asm/pgalloc.h
+++ b/arch/x86/include/asm/pgalloc.h
@@ -42,6 +42,7 @@ static inline void pte_free_kernel(struct mm_struct *mm, pte_t *pte)
42 42
43static inline void pte_free(struct mm_struct *mm, struct page *pte) 43static inline void pte_free(struct mm_struct *mm, struct page *pte)
44{ 44{
45 pgtable_page_dtor(pte);
45 __free_page(pte); 46 __free_page(pte);
46} 47}
47 48
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 83e69f4a37f0..6ceaef08486f 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -240,64 +240,78 @@ static inline int pmd_large(pmd_t pte)
240 (_PAGE_PSE | _PAGE_PRESENT); 240 (_PAGE_PSE | _PAGE_PRESENT);
241} 241}
242 242
243static inline pte_t pte_set_flags(pte_t pte, pteval_t set)
244{
245 pteval_t v = native_pte_val(pte);
246
247 return native_make_pte(v | set);
248}
249
250static inline pte_t pte_clear_flags(pte_t pte, pteval_t clear)
251{
252 pteval_t v = native_pte_val(pte);
253
254 return native_make_pte(v & ~clear);
255}
256
243static inline pte_t pte_mkclean(pte_t pte) 257static inline pte_t pte_mkclean(pte_t pte)
244{ 258{
245 return __pte(pte_val(pte) & ~_PAGE_DIRTY); 259 return pte_clear_flags(pte, _PAGE_DIRTY);
246} 260}
247 261
248static inline pte_t pte_mkold(pte_t pte) 262static inline pte_t pte_mkold(pte_t pte)
249{ 263{
250 return __pte(pte_val(pte) & ~_PAGE_ACCESSED); 264 return pte_clear_flags(pte, _PAGE_ACCESSED);
251} 265}
252 266
253static inline pte_t pte_wrprotect(pte_t pte) 267static inline pte_t pte_wrprotect(pte_t pte)
254{ 268{
255 return __pte(pte_val(pte) & ~_PAGE_RW); 269 return pte_clear_flags(pte, _PAGE_RW);
256} 270}
257 271
258static inline pte_t pte_mkexec(pte_t pte) 272static inline pte_t pte_mkexec(pte_t pte)
259{ 273{
260 return __pte(pte_val(pte) & ~_PAGE_NX); 274 return pte_clear_flags(pte, _PAGE_NX);
261} 275}
262 276
263static inline pte_t pte_mkdirty(pte_t pte) 277static inline pte_t pte_mkdirty(pte_t pte)
264{ 278{
265 return __pte(pte_val(pte) | _PAGE_DIRTY); 279 return pte_set_flags(pte, _PAGE_DIRTY);
266} 280}
267 281
268static inline pte_t pte_mkyoung(pte_t pte) 282static inline pte_t pte_mkyoung(pte_t pte)
269{ 283{
270 return __pte(pte_val(pte) | _PAGE_ACCESSED); 284 return pte_set_flags(pte, _PAGE_ACCESSED);
271} 285}
272 286
273static inline pte_t pte_mkwrite(pte_t pte) 287static inline pte_t pte_mkwrite(pte_t pte)
274{ 288{
275 return __pte(pte_val(pte) | _PAGE_RW); 289 return pte_set_flags(pte, _PAGE_RW);
276} 290}
277 291
278static inline pte_t pte_mkhuge(pte_t pte) 292static inline pte_t pte_mkhuge(pte_t pte)
279{ 293{
280 return __pte(pte_val(pte) | _PAGE_PSE); 294 return pte_set_flags(pte, _PAGE_PSE);
281} 295}
282 296
283static inline pte_t pte_clrhuge(pte_t pte) 297static inline pte_t pte_clrhuge(pte_t pte)
284{ 298{
285 return __pte(pte_val(pte) & ~_PAGE_PSE); 299 return pte_clear_flags(pte, _PAGE_PSE);
286} 300}
287 301
288static inline pte_t pte_mkglobal(pte_t pte) 302static inline pte_t pte_mkglobal(pte_t pte)
289{ 303{
290 return __pte(pte_val(pte) | _PAGE_GLOBAL); 304 return pte_set_flags(pte, _PAGE_GLOBAL);
291} 305}
292 306
293static inline pte_t pte_clrglobal(pte_t pte) 307static inline pte_t pte_clrglobal(pte_t pte)
294{ 308{
295 return __pte(pte_val(pte) & ~_PAGE_GLOBAL); 309 return pte_clear_flags(pte, _PAGE_GLOBAL);
296} 310}
297 311
298static inline pte_t pte_mkspecial(pte_t pte) 312static inline pte_t pte_mkspecial(pte_t pte)
299{ 313{
300 return __pte(pte_val(pte) | _PAGE_SPECIAL); 314 return pte_set_flags(pte, _PAGE_SPECIAL);
301} 315}
302 316
303extern pteval_t __supported_pte_mask; 317extern pteval_t __supported_pte_mask;
@@ -341,6 +355,25 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
341 355
342#define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) 356#define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask)
343 357
358static inline int is_new_memtype_allowed(unsigned long flags,
359 unsigned long new_flags)
360{
361 /*
362 * Certain new memtypes are not allowed with certain
363 * requested memtype:
364 * - request is uncached, return cannot be write-back
365 * - request is write-combine, return cannot be write-back
366 */
367 if ((flags == _PAGE_CACHE_UC_MINUS &&
368 new_flags == _PAGE_CACHE_WB) ||
369 (flags == _PAGE_CACHE_WC &&
370 new_flags == _PAGE_CACHE_WB)) {
371 return 0;
372 }
373
374 return 1;
375}
376
344#ifndef __ASSEMBLY__ 377#ifndef __ASSEMBLY__
345/* Indicate that x86 has its own track and untrack pfn vma functions */ 378/* Indicate that x86 has its own track and untrack pfn vma functions */
346#define __HAVE_PFNMAP_TRACKING 379#define __HAVE_PFNMAP_TRACKING
diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h
index ba09289accaa..1df9637dfda3 100644
--- a/arch/x86/include/asm/pgtable_64.h
+++ b/arch/x86/include/asm/pgtable_64.h
@@ -11,7 +11,6 @@
11#include <asm/processor.h> 11#include <asm/processor.h>
12#include <linux/bitops.h> 12#include <linux/bitops.h>
13#include <linux/threads.h> 13#include <linux/threads.h>
14#include <asm/pda.h>
15 14
16extern pud_t level3_kernel_pgt[512]; 15extern pud_t level3_kernel_pgt[512];
17extern pud_t level3_ident_pgt[512]; 16extern pud_t level3_ident_pgt[512];
diff --git a/arch/x86/include/asm/prctl.h b/arch/x86/include/asm/prctl.h
index a8894647dd9a..3ac5032fae09 100644
--- a/arch/x86/include/asm/prctl.h
+++ b/arch/x86/include/asm/prctl.h
@@ -6,8 +6,4 @@
6#define ARCH_GET_FS 0x1003 6#define ARCH_GET_FS 0x1003
7#define ARCH_GET_GS 0x1004 7#define ARCH_GET_GS 0x1004
8 8
9#ifdef CONFIG_X86_64
10extern long sys_arch_prctl(int, unsigned long);
11#endif /* CONFIG_X86_64 */
12
13#endif /* _ASM_X86_PRCTL_H */ 9#endif /* _ASM_X86_PRCTL_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 091cd8855f2e..84afa0d4d717 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -73,7 +73,7 @@ struct cpuinfo_x86 {
73 char pad0; 73 char pad0;
74#else 74#else
75 /* Number of 4K pages in DTLB/ITLB combined(in pages): */ 75 /* Number of 4K pages in DTLB/ITLB combined(in pages): */
76 int x86_tlbsize; 76 int x86_tlbsize;
77 __u8 x86_virt_bits; 77 __u8 x86_virt_bits;
78 __u8 x86_phys_bits; 78 __u8 x86_phys_bits;
79#endif 79#endif
@@ -378,6 +378,22 @@ union thread_xstate {
378 378
379#ifdef CONFIG_X86_64 379#ifdef CONFIG_X86_64
380DECLARE_PER_CPU(struct orig_ist, orig_ist); 380DECLARE_PER_CPU(struct orig_ist, orig_ist);
381
382union irq_stack_union {
383 char irq_stack[IRQ_STACK_SIZE];
384 /*
385 * GCC hardcodes the stack canary as %gs:40. Since the
386 * irq_stack is the object at %gs:0, we reserve the bottom
387 * 48 bytes of the irq stack for the canary.
388 */
389 struct {
390 char gs_base[40];
391 unsigned long stack_canary;
392 };
393};
394
395DECLARE_PER_CPU(union irq_stack_union, irq_stack_union);
396DECLARE_PER_CPU(char *, irq_stack_ptr);
381#endif 397#endif
382 398
383extern void print_cpu_info(struct cpuinfo_x86 *); 399extern void print_cpu_info(struct cpuinfo_x86 *);
@@ -754,7 +770,6 @@ extern struct desc_ptr early_gdt_descr;
754extern void cpu_set_gdt(int); 770extern void cpu_set_gdt(int);
755extern void switch_to_new_gdt(void); 771extern void switch_to_new_gdt(void);
756extern void cpu_init(void); 772extern void cpu_init(void);
757extern void init_gdt(int cpu);
758 773
759static inline unsigned long get_debugctlmsr(void) 774static inline unsigned long get_debugctlmsr(void)
760{ 775{
diff --git a/arch/x86/include/asm/mach-rdc321x/rdc321x_defs.h b/arch/x86/include/asm/rdc321x_defs.h
index c8e9c8bed3d0..c8e9c8bed3d0 100644
--- a/arch/x86/include/asm/mach-rdc321x/rdc321x_defs.h
+++ b/arch/x86/include/asm/rdc321x_defs.h
diff --git a/arch/x86/include/asm/setup.h b/arch/x86/include/asm/setup.h
index ebe858cdc8a3..45b40278b582 100644
--- a/arch/x86/include/asm/setup.h
+++ b/arch/x86/include/asm/setup.h
@@ -1,6 +1,8 @@
1#ifndef _ASM_X86_SETUP_H 1#ifndef _ASM_X86_SETUP_H
2#define _ASM_X86_SETUP_H 2#define _ASM_X86_SETUP_H
3 3
4#ifdef __KERNEL__
5
4#define COMMAND_LINE_SIZE 2048 6#define COMMAND_LINE_SIZE 2048
5 7
6#ifndef __ASSEMBLY__ 8#ifndef __ASSEMBLY__
@@ -8,10 +10,8 @@
8/* Interrupt control for vSMPowered x86_64 systems */ 10/* Interrupt control for vSMPowered x86_64 systems */
9void vsmp_init(void); 11void vsmp_init(void);
10 12
11
12void setup_bios_corruption_check(void); 13void setup_bios_corruption_check(void);
13 14
14
15#ifdef CONFIG_X86_VISWS 15#ifdef CONFIG_X86_VISWS
16extern void visws_early_detect(void); 16extern void visws_early_detect(void);
17extern int is_visws_box(void); 17extern int is_visws_box(void);
@@ -43,7 +43,7 @@ struct x86_quirks {
43 void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name); 43 void (*mpc_oem_bus_info)(struct mpc_bus *m, char *name);
44 void (*mpc_oem_pci_bus)(struct mpc_bus *m); 44 void (*mpc_oem_pci_bus)(struct mpc_bus *m);
45 void (*smp_read_mpc_oem)(struct mpc_oemtable *oemtable, 45 void (*smp_read_mpc_oem)(struct mpc_oemtable *oemtable,
46 unsigned short oemsize); 46 unsigned short oemsize);
47 int (*setup_ioapic_ids)(void); 47 int (*setup_ioapic_ids)(void);
48 int (*update_genapic)(void); 48 int (*update_genapic)(void);
49}; 49};
@@ -56,8 +56,6 @@ extern unsigned long saved_video_mode;
56#endif 56#endif
57#endif /* __ASSEMBLY__ */ 57#endif /* __ASSEMBLY__ */
58 58
59#ifdef __KERNEL__
60
61#ifdef __i386__ 59#ifdef __i386__
62 60
63#include <linux/pfn.h> 61#include <linux/pfn.h>
@@ -100,7 +98,6 @@ extern unsigned long init_pg_tables_start;
100extern unsigned long init_pg_tables_end; 98extern unsigned long init_pg_tables_end;
101 99
102#else 100#else
103void __init x86_64_init_pda(void);
104void __init x86_64_start_kernel(char *real_mode); 101void __init x86_64_start_kernel(char *real_mode);
105void __init x86_64_start_reservations(char *real_mode_data); 102void __init x86_64_start_reservations(char *real_mode_data);
106 103
diff --git a/arch/x86/include/asm/sigcontext32.h b/arch/x86/include/asm/sigcontext32.h
index 6126188cf3a9..ad1478c4ae12 100644
--- a/arch/x86/include/asm/sigcontext32.h
+++ b/arch/x86/include/asm/sigcontext32.h
@@ -1,6 +1,8 @@
1#ifndef _ASM_X86_SIGCONTEXT32_H 1#ifndef _ASM_X86_SIGCONTEXT32_H
2#define _ASM_X86_SIGCONTEXT32_H 2#define _ASM_X86_SIGCONTEXT32_H
3 3
4#include <linux/types.h>
5
4/* signal context for 32bit programs. */ 6/* signal context for 32bit programs. */
5 7
6#define X86_FXSR_MAGIC 0x0000 8#define X86_FXSR_MAGIC 0x0000
diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h
index a8cea7b09434..45ef8a1b9d7c 100644
--- a/arch/x86/include/asm/smp.h
+++ b/arch/x86/include/asm/smp.h
@@ -15,21 +15,16 @@
15# include <asm/io_apic.h> 15# include <asm/io_apic.h>
16# endif 16# endif
17#endif 17#endif
18#include <asm/pda.h>
19#include <asm/thread_info.h> 18#include <asm/thread_info.h>
20#include <asm/cpumask.h> 19#include <asm/cpumask.h>
21 20
22extern int __cpuinit get_local_pda(int cpu);
23
24extern int smp_num_siblings; 21extern int smp_num_siblings;
25extern unsigned int num_processors; 22extern unsigned int num_processors;
26 23
27DECLARE_PER_CPU(cpumask_t, cpu_sibling_map); 24DECLARE_PER_CPU(cpumask_t, cpu_sibling_map);
28DECLARE_PER_CPU(cpumask_t, cpu_core_map); 25DECLARE_PER_CPU(cpumask_t, cpu_core_map);
29DECLARE_PER_CPU(u16, cpu_llc_id); 26DECLARE_PER_CPU(u16, cpu_llc_id);
30#ifdef CONFIG_X86_32
31DECLARE_PER_CPU(int, cpu_number); 27DECLARE_PER_CPU(int, cpu_number);
32#endif
33 28
34static inline struct cpumask *cpu_sibling_mask(int cpu) 29static inline struct cpumask *cpu_sibling_mask(int cpu)
35{ 30{
@@ -162,11 +157,11 @@ extern unsigned disabled_cpus __cpuinitdata;
162 * from the initial startup. We map APIC_BASE very early in page_setup(), 157 * from the initial startup. We map APIC_BASE very early in page_setup(),
163 * so this is correct in the x86 case. 158 * so this is correct in the x86 case.
164 */ 159 */
165#define raw_smp_processor_id() (x86_read_percpu(cpu_number)) 160#define raw_smp_processor_id() (percpu_read(cpu_number))
166extern int safe_smp_processor_id(void); 161extern int safe_smp_processor_id(void);
167 162
168#elif defined(CONFIG_X86_64_SMP) 163#elif defined(CONFIG_X86_64_SMP)
169#define raw_smp_processor_id() read_pda(cpunumber) 164#define raw_smp_processor_id() (percpu_read(cpu_number))
170 165
171#define stack_smp_processor_id() \ 166#define stack_smp_processor_id() \
172({ \ 167({ \
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 2bd6b111a414..139b4249a5ec 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -267,8 +267,7 @@ static inline int __raw_read_trylock(raw_rwlock_t *lock)
267{ 267{
268 atomic_t *count = (atomic_t *)lock; 268 atomic_t *count = (atomic_t *)lock;
269 269
270 atomic_dec(count); 270 if (atomic_dec_return(count) >= 0)
271 if (atomic_read(count) >= 0)
272 return 1; 271 return 1;
273 atomic_inc(count); 272 atomic_inc(count);
274 return 0; 273 return 0;
diff --git a/arch/x86/include/asm/stackprotector.h b/arch/x86/include/asm/stackprotector.h
new file mode 100644
index 000000000000..36a700acaf2b
--- /dev/null
+++ b/arch/x86/include/asm/stackprotector.h
@@ -0,0 +1,38 @@
1#ifndef _ASM_STACKPROTECTOR_H
2#define _ASM_STACKPROTECTOR_H 1
3
4#include <asm/tsc.h>
5#include <asm/processor.h>
6
7/*
8 * Initialize the stackprotector canary value.
9 *
10 * NOTE: this must only be called from functions that never return,
11 * and it must always be inlined.
12 */
13static __always_inline void boot_init_stack_canary(void)
14{
15 u64 canary;
16 u64 tsc;
17
18 /*
19 * Build time only check to make sure the stack_canary is at
20 * offset 40 in the pda; this is a gcc ABI requirement
21 */
22 BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
23
24 /*
25 * We both use the random pool and the current TSC as a source
26 * of randomness. The TSC only matters for very early init,
27 * there it already has some randomness on most systems. Later
28 * on during the bootup the random pool has true entropy too.
29 */
30 get_random_bytes(&canary, sizeof(canary));
31 tsc = __native_read_tsc();
32 canary += tsc + (tsc << 32UL);
33
34 current->stack_canary = canary;
35 percpu_write(irq_stack_union.stack_canary, canary);
36}
37
38#endif
diff --git a/arch/x86/include/asm/syscalls.h b/arch/x86/include/asm/syscalls.h
index 9c6797c3e56c..c0b0bda754ee 100644
--- a/arch/x86/include/asm/syscalls.h
+++ b/arch/x86/include/asm/syscalls.h
@@ -40,7 +40,7 @@ asmlinkage int sys_sigaction(int, const struct old_sigaction __user *,
40 struct old_sigaction __user *); 40 struct old_sigaction __user *);
41asmlinkage int sys_sigaltstack(unsigned long); 41asmlinkage int sys_sigaltstack(unsigned long);
42asmlinkage unsigned long sys_sigreturn(unsigned long); 42asmlinkage unsigned long sys_sigreturn(unsigned long);
43asmlinkage int sys_rt_sigreturn(struct pt_regs); 43asmlinkage int sys_rt_sigreturn(unsigned long);
44 44
45/* kernel/ioport.c */ 45/* kernel/ioport.c */
46asmlinkage long sys_iopl(unsigned long); 46asmlinkage long sys_iopl(unsigned long);
diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h
index 8e626ea33a1a..c22383743f36 100644
--- a/arch/x86/include/asm/system.h
+++ b/arch/x86/include/asm/system.h
@@ -86,27 +86,44 @@ do { \
86 , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \ 86 , "rcx", "rbx", "rdx", "r8", "r9", "r10", "r11", \
87 "r12", "r13", "r14", "r15" 87 "r12", "r13", "r14", "r15"
88 88
89#ifdef CONFIG_CC_STACKPROTECTOR
90#define __switch_canary \
91 "movq %P[task_canary](%%rsi),%%r8\n\t" \
92 "movq %%r8,"__percpu_arg([gs_canary])"\n\t"
93#define __switch_canary_oparam \
94 , [gs_canary] "=m" (per_cpu_var(irq_stack_union.stack_canary))
95#define __switch_canary_iparam \
96 , [task_canary] "i" (offsetof(struct task_struct, stack_canary))
97#else /* CC_STACKPROTECTOR */
98#define __switch_canary
99#define __switch_canary_oparam
100#define __switch_canary_iparam
101#endif /* CC_STACKPROTECTOR */
102
89/* Save restore flags to clear handle leaking NT */ 103/* Save restore flags to clear handle leaking NT */
90#define switch_to(prev, next, last) \ 104#define switch_to(prev, next, last) \
91 asm volatile(SAVE_CONTEXT \ 105 asm volatile(SAVE_CONTEXT \
92 "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \ 106 "movq %%rsp,%P[threadrsp](%[prev])\n\t" /* save RSP */ \
93 "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \ 107 "movq %P[threadrsp](%[next]),%%rsp\n\t" /* restore RSP */ \
94 "call __switch_to\n\t" \ 108 "call __switch_to\n\t" \
95 ".globl thread_return\n" \ 109 ".globl thread_return\n" \
96 "thread_return:\n\t" \ 110 "thread_return:\n\t" \
97 "movq %%gs:%P[pda_pcurrent],%%rsi\n\t" \ 111 "movq "__percpu_arg([current_task])",%%rsi\n\t" \
112 __switch_canary \
98 "movq %P[thread_info](%%rsi),%%r8\n\t" \ 113 "movq %P[thread_info](%%rsi),%%r8\n\t" \
99 LOCK_PREFIX "btr %[tif_fork],%P[ti_flags](%%r8)\n\t" \
100 "movq %%rax,%%rdi\n\t" \ 114 "movq %%rax,%%rdi\n\t" \
101 "jc ret_from_fork\n\t" \ 115 "testl %[_tif_fork],%P[ti_flags](%%r8)\n\t" \
116 "jnz ret_from_fork\n\t" \
102 RESTORE_CONTEXT \ 117 RESTORE_CONTEXT \
103 : "=a" (last) \ 118 : "=a" (last) \
119 __switch_canary_oparam \
104 : [next] "S" (next), [prev] "D" (prev), \ 120 : [next] "S" (next), [prev] "D" (prev), \
105 [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \ 121 [threadrsp] "i" (offsetof(struct task_struct, thread.sp)), \
106 [ti_flags] "i" (offsetof(struct thread_info, flags)), \ 122 [ti_flags] "i" (offsetof(struct thread_info, flags)), \
107 [tif_fork] "i" (TIF_FORK), \ 123 [_tif_fork] "i" (_TIF_FORK), \
108 [thread_info] "i" (offsetof(struct task_struct, stack)), \ 124 [thread_info] "i" (offsetof(struct task_struct, stack)), \
109 [pda_pcurrent] "i" (offsetof(struct x8664_pda, pcurrent)) \ 125 [current_task] "m" (per_cpu_var(current_task)) \
126 __switch_canary_iparam \
110 : "memory", "cc" __EXTRA_CLOBBER) 127 : "memory", "cc" __EXTRA_CLOBBER)
111#endif 128#endif
112 129
diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index 3f90aeb456bc..df9d5f78385e 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -195,25 +195,21 @@ static inline struct thread_info *current_thread_info(void)
195 195
196#else /* X86_32 */ 196#else /* X86_32 */
197 197
198#include <asm/pda.h> 198#include <asm/percpu.h>
199#define KERNEL_STACK_OFFSET (5*8)
199 200
200/* 201/*
201 * macros/functions for gaining access to the thread information structure 202 * macros/functions for gaining access to the thread information structure
202 * preempt_count needs to be 1 initially, until the scheduler is functional. 203 * preempt_count needs to be 1 initially, until the scheduler is functional.
203 */ 204 */
204#ifndef __ASSEMBLY__ 205#ifndef __ASSEMBLY__
205static inline struct thread_info *current_thread_info(void) 206DECLARE_PER_CPU(unsigned long, kernel_stack);
206{
207 struct thread_info *ti;
208 ti = (void *)(read_pda(kernelstack) + PDA_STACKOFFSET - THREAD_SIZE);
209 return ti;
210}
211 207
212/* do not use in interrupt context */ 208static inline struct thread_info *current_thread_info(void)
213static inline struct thread_info *stack_thread_info(void)
214{ 209{
215 struct thread_info *ti; 210 struct thread_info *ti;
216 asm("andq %%rsp,%0; " : "=r" (ti) : "0" (~(THREAD_SIZE - 1))); 211 ti = (void *)(percpu_read(kernel_stack) +
212 KERNEL_STACK_OFFSET - THREAD_SIZE);
217 return ti; 213 return ti;
218} 214}
219 215
@@ -221,8 +217,8 @@ static inline struct thread_info *stack_thread_info(void)
221 217
222/* how to get the thread information struct from ASM */ 218/* how to get the thread information struct from ASM */
223#define GET_THREAD_INFO(reg) \ 219#define GET_THREAD_INFO(reg) \
224 movq %gs:pda_kernelstack,reg ; \ 220 movq PER_CPU_VAR(kernel_stack),reg ; \
225 subq $(THREAD_SIZE-PDA_STACKOFFSET),reg 221 subq $(THREAD_SIZE-KERNEL_STACK_OFFSET),reg
226 222
227#endif 223#endif
228 224
diff --git a/arch/x86/include/asm/timex.h b/arch/x86/include/asm/timex.h
index 1287dc1347d6..b5c9d45c981f 100644
--- a/arch/x86/include/asm/timex.h
+++ b/arch/x86/include/asm/timex.h
@@ -1,18 +1,13 @@
1/* x86 architecture timex specifications */
2#ifndef _ASM_X86_TIMEX_H 1#ifndef _ASM_X86_TIMEX_H
3#define _ASM_X86_TIMEX_H 2#define _ASM_X86_TIMEX_H
4 3
5#include <asm/processor.h> 4#include <asm/processor.h>
6#include <asm/tsc.h> 5#include <asm/tsc.h>
7 6
8#ifdef CONFIG_X86_ELAN 7/* The PIT ticks at this frequency (in HZ): */
9# define PIT_TICK_RATE 1189200 /* AMD Elan has different frequency! */ 8#define PIT_TICK_RATE 1193182
10#elif defined(CONFIG_X86_RDC321X) 9
11# define PIT_TICK_RATE 1041667 /* Underlying HZ for R8610 */ 10#define CLOCK_TICK_RATE PIT_TICK_RATE
12#else
13# define PIT_TICK_RATE 1193182 /* Underlying HZ */
14#endif
15#define CLOCK_TICK_RATE PIT_TICK_RATE
16 11
17#define ARCH_HAS_READ_CURRENT_TIMER 12#define ARCH_HAS_READ_CURRENT_TIMER
18 13
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index aed0b700b837..d3539f998f88 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -113,7 +113,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
113 __flush_tlb(); 113 __flush_tlb();
114} 114}
115 115
116static inline void native_flush_tlb_others(const cpumask_t *cpumask, 116static inline void native_flush_tlb_others(const struct cpumask *cpumask,
117 struct mm_struct *mm, 117 struct mm_struct *mm,
118 unsigned long va) 118 unsigned long va)
119{ 119{
@@ -142,31 +142,28 @@ static inline void flush_tlb_range(struct vm_area_struct *vma,
142 flush_tlb_mm(vma->vm_mm); 142 flush_tlb_mm(vma->vm_mm);
143} 143}
144 144
145void native_flush_tlb_others(const cpumask_t *cpumask, struct mm_struct *mm, 145void native_flush_tlb_others(const struct cpumask *cpumask,
146 unsigned long va); 146 struct mm_struct *mm, unsigned long va);
147 147
148#define TLBSTATE_OK 1 148#define TLBSTATE_OK 1
149#define TLBSTATE_LAZY 2 149#define TLBSTATE_LAZY 2
150 150
151#ifdef CONFIG_X86_32
152struct tlb_state { 151struct tlb_state {
153 struct mm_struct *active_mm; 152 struct mm_struct *active_mm;
154 int state; 153 int state;
155 char __cacheline_padding[L1_CACHE_BYTES-8];
156}; 154};
157DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate); 155DECLARE_PER_CPU(struct tlb_state, cpu_tlbstate);
158 156
159void reset_lazy_tlbstate(void);
160#else
161static inline void reset_lazy_tlbstate(void) 157static inline void reset_lazy_tlbstate(void)
162{ 158{
159 percpu_write(cpu_tlbstate.state, 0);
160 percpu_write(cpu_tlbstate.active_mm, &init_mm);
163} 161}
164#endif
165 162
166#endif /* SMP */ 163#endif /* SMP */
167 164
168#ifndef CONFIG_PARAVIRT 165#ifndef CONFIG_PARAVIRT
169#define flush_tlb_others(mask, mm, va) native_flush_tlb_others(&mask, mm, va) 166#define flush_tlb_others(mask, mm, va) native_flush_tlb_others(mask, mm, va)
170#endif 167#endif
171 168
172static inline void flush_tlb_kernel_range(unsigned long start, 169static inline void flush_tlb_kernel_range(unsigned long start,
diff --git a/arch/x86/include/asm/topology.h b/arch/x86/include/asm/topology.h
index 4e2f2e0aab27..77cfb2cfb386 100644
--- a/arch/x86/include/asm/topology.h
+++ b/arch/x86/include/asm/topology.h
@@ -74,6 +74,8 @@ static inline const struct cpumask *cpumask_of_node(int node)
74 return &node_to_cpumask_map[node]; 74 return &node_to_cpumask_map[node];
75} 75}
76 76
77static inline void setup_node_to_cpumask_map(void) { }
78
77#else /* CONFIG_X86_64 */ 79#else /* CONFIG_X86_64 */
78 80
79/* Mappings between node number and cpus on that node. */ 81/* Mappings between node number and cpus on that node. */
@@ -83,7 +85,8 @@ extern cpumask_t *node_to_cpumask_map;
83DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map); 85DECLARE_EARLY_PER_CPU(int, x86_cpu_to_node_map);
84 86
85/* Returns the number of the current Node. */ 87/* Returns the number of the current Node. */
86#define numa_node_id() read_pda(nodenumber) 88DECLARE_PER_CPU(int, node_number);
89#define numa_node_id() percpu_read(node_number)
87 90
88#ifdef CONFIG_DEBUG_PER_CPU_MAPS 91#ifdef CONFIG_DEBUG_PER_CPU_MAPS
89extern int cpu_to_node(int cpu); 92extern int cpu_to_node(int cpu);
@@ -102,10 +105,7 @@ static inline int cpu_to_node(int cpu)
102/* Same function but used if called before per_cpu areas are setup */ 105/* Same function but used if called before per_cpu areas are setup */
103static inline int early_cpu_to_node(int cpu) 106static inline int early_cpu_to_node(int cpu)
104{ 107{
105 if (early_per_cpu_ptr(x86_cpu_to_node_map)) 108 return early_per_cpu(x86_cpu_to_node_map, cpu);
106 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
107
108 return per_cpu(x86_cpu_to_node_map, cpu);
109} 109}
110 110
111/* Returns a pointer to the cpumask of CPUs on Node 'node'. */ 111/* Returns a pointer to the cpumask of CPUs on Node 'node'. */
@@ -122,6 +122,8 @@ static inline cpumask_t node_to_cpumask(int node)
122 122
123#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */ 123#endif /* !CONFIG_DEBUG_PER_CPU_MAPS */
124 124
125extern void setup_node_to_cpumask_map(void);
126
125/* 127/*
126 * Replace default node_to_cpumask_ptr with optimized version 128 * Replace default node_to_cpumask_ptr with optimized version
127 * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" 129 * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
@@ -192,9 +194,20 @@ extern int __node_distance(int, int);
192 194
193#else /* !CONFIG_NUMA */ 195#else /* !CONFIG_NUMA */
194 196
195#define numa_node_id() 0 197static inline int numa_node_id(void)
196#define cpu_to_node(cpu) 0 198{
197#define early_cpu_to_node(cpu) 0 199 return 0;
200}
201
202static inline int cpu_to_node(int cpu)
203{
204 return 0;
205}
206
207static inline int early_cpu_to_node(int cpu)
208{
209 return 0;
210}
198 211
199static inline const cpumask_t *cpumask_of_node(int node) 212static inline const cpumask_t *cpumask_of_node(int node)
200{ 213{
@@ -209,6 +222,8 @@ static inline int node_to_first_cpu(int node)
209 return first_cpu(cpu_online_map); 222 return first_cpu(cpu_online_map);
210} 223}
211 224
225static inline void setup_node_to_cpumask_map(void) { }
226
212/* 227/*
213 * Replace default node_to_cpumask_ptr with optimized version 228 * Replace default node_to_cpumask_ptr with optimized version
214 * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)" 229 * Deprecated: use "const struct cpumask *mask = cpumask_of_node(node)"
diff --git a/arch/x86/include/asm/trampoline.h b/arch/x86/include/asm/trampoline.h
index 780ba0ab94f9..90f06c25221d 100644
--- a/arch/x86/include/asm/trampoline.h
+++ b/arch/x86/include/asm/trampoline.h
@@ -13,6 +13,7 @@ extern unsigned char *trampoline_base;
13 13
14extern unsigned long init_rsp; 14extern unsigned long init_rsp;
15extern unsigned long initial_code; 15extern unsigned long initial_code;
16extern unsigned long initial_gs;
16 17
17#define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE) 18#define TRAMPOLINE_SIZE roundup(trampoline_end - trampoline_data, PAGE_SIZE)
18#define TRAMPOLINE_BASE 0x6000 19#define TRAMPOLINE_BASE 0x6000
diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h
new file mode 100644
index 000000000000..8ac1d7e312f3
--- /dev/null
+++ b/arch/x86/include/asm/uv/uv.h
@@ -0,0 +1,33 @@
1#ifndef _ASM_X86_UV_UV_H
2#define _ASM_X86_UV_UV_H
3
4enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
5
6#ifdef CONFIG_X86_UV
7
8extern enum uv_system_type get_uv_system_type(void);
9extern int is_uv_system(void);
10extern void uv_cpu_init(void);
11extern void uv_system_init(void);
12extern int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip);
13extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
14 struct mm_struct *mm,
15 unsigned long va,
16 unsigned int cpu);
17
18#else /* X86_UV */
19
20static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; }
21static inline int is_uv_system(void) { return 0; }
22static inline void uv_cpu_init(void) { }
23static inline void uv_system_init(void) { }
24static inline int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip)
25{ return 1; }
26static inline const struct cpumask *
27uv_flush_tlb_others(const struct cpumask *cpumask, struct mm_struct *mm,
28 unsigned long va, unsigned int cpu)
29{ return cpumask; }
30
31#endif /* X86_UV */
32
33#endif /* _ASM_X86_UV_UV_H */
diff --git a/arch/x86/include/asm/uv/uv_bau.h b/arch/x86/include/asm/uv/uv_bau.h
index 50423c7b56b2..9b0e61bf7a88 100644
--- a/arch/x86/include/asm/uv/uv_bau.h
+++ b/arch/x86/include/asm/uv/uv_bau.h
@@ -325,7 +325,6 @@ static inline void bau_cpubits_clear(struct bau_local_cpumask *dstp, int nbits)
325#define cpubit_isset(cpu, bau_local_cpumask) \ 325#define cpubit_isset(cpu, bau_local_cpumask) \
326 test_bit((cpu), (bau_local_cpumask).bits) 326 test_bit((cpu), (bau_local_cpumask).bits)
327 327
328extern int uv_flush_tlb_others(cpumask_t *, struct mm_struct *, unsigned long);
329extern void uv_bau_message_intr1(void); 328extern void uv_bau_message_intr1(void);
330extern void uv_bau_timeout_intr1(void); 329extern void uv_bau_timeout_intr1(void);
331 330
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index d364df03c1d6..37fa30bada17 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -23,11 +23,12 @@ nostackp := $(call cc-option, -fno-stack-protector)
23CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp) 23CFLAGS_vsyscall_64.o := $(PROFILING) -g0 $(nostackp)
24CFLAGS_hpet.o := $(nostackp) 24CFLAGS_hpet.o := $(nostackp)
25CFLAGS_tsc.o := $(nostackp) 25CFLAGS_tsc.o := $(nostackp)
26CFLAGS_paravirt.o := $(nostackp)
26 27
27obj-y := process_$(BITS).o signal.o entry_$(BITS).o 28obj-y := process_$(BITS).o signal.o entry_$(BITS).o
28obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o 29obj-y += traps.o irq.o irq_$(BITS).o dumpstack_$(BITS).o
29obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o 30obj-y += time_$(BITS).o ioport.o ldt.o dumpstack.o
30obj-y += setup.o i8259.o irqinit_$(BITS).o setup_percpu.o 31obj-y += setup.o i8259.o irqinit_$(BITS).o
31obj-$(CONFIG_X86_VISWS) += visws_quirks.o 32obj-$(CONFIG_X86_VISWS) += visws_quirks.o
32obj-$(CONFIG_X86_32) += probe_roms_32.o 33obj-$(CONFIG_X86_32) += probe_roms_32.o
33obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o 34obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o
@@ -57,9 +58,9 @@ obj-$(CONFIG_PCI) += early-quirks.o
57apm-y := apm_32.o 58apm-y := apm_32.o
58obj-$(CONFIG_APM) += apm.o 59obj-$(CONFIG_APM) += apm.o
59obj-$(CONFIG_X86_SMP) += smp.o 60obj-$(CONFIG_X86_SMP) += smp.o
60obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o tlb_$(BITS).o 61obj-$(CONFIG_X86_SMP) += smpboot.o tsc_sync.o ipi.o
61obj-$(CONFIG_X86_32_SMP) += smpcommon.o 62obj-$(CONFIG_SMP) += setup_percpu.o
62obj-$(CONFIG_X86_64_SMP) += tsc_sync.o smpcommon.o 63obj-$(CONFIG_X86_64_SMP) += tsc_sync.o
63obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o 64obj-$(CONFIG_X86_TRAMPOLINE) += trampoline_$(BITS).o
64obj-$(CONFIG_X86_MPPARSE) += mpparse.o 65obj-$(CONFIG_X86_MPPARSE) += mpparse.o
65obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o 66obj-$(CONFIG_X86_LOCAL_APIC) += apic.o nmi.o
@@ -114,10 +115,11 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb_64.o # NB rename without _64
114### 115###
115# 64 bit specific files 116# 64 bit specific files
116ifeq ($(CONFIG_X86_64),y) 117ifeq ($(CONFIG_X86_64),y)
117 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o 118 obj-y += genapic_64.o genapic_flat_64.o
118 obj-y += bios_uv.o uv_irq.o uv_sysfs.o
119 obj-y += genx2apic_cluster.o 119 obj-y += genx2apic_cluster.o
120 obj-y += genx2apic_phys.o 120 obj-y += genx2apic_phys.o
121 obj-$(CONFIG_X86_UV) += genx2apic_uv_x.o tlb_uv.o
122 obj-$(CONFIG_X86_UV) += bios_uv.o uv_irq.o uv_sysfs.o
121 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o 123 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
122 obj-$(CONFIG_AUDIT) += audit_64.o 124 obj-$(CONFIG_AUDIT) += audit_64.o
123 125
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 707c1f6f95fa..4abff454c55b 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -101,6 +101,7 @@ int acpi_save_state_mem(void)
101 stack_start.sp = temp_stack + sizeof(temp_stack); 101 stack_start.sp = temp_stack + sizeof(temp_stack);
102 early_gdt_descr.address = 102 early_gdt_descr.address =
103 (unsigned long)get_cpu_gdt_table(smp_processor_id()); 103 (unsigned long)get_cpu_gdt_table(smp_processor_id());
104 initial_gs = per_cpu_offset(smp_processor_id());
104#endif 105#endif
105 initial_code = (unsigned long)wakeup_long64; 106 initial_code = (unsigned long)wakeup_long64;
106 saved_magic = 0x123456789abcdef0; 107 saved_magic = 0x123456789abcdef0;
diff --git a/arch/x86/kernel/apic.c b/arch/x86/kernel/apic.c
index 566a08466b19..c6f15647eba9 100644
--- a/arch/x86/kernel/apic.c
+++ b/arch/x86/kernel/apic.c
@@ -47,6 +47,7 @@
47#include <asm/proto.h> 47#include <asm/proto.h>
48#include <asm/apic.h> 48#include <asm/apic.h>
49#include <asm/i8259.h> 49#include <asm/i8259.h>
50#include <asm/smp.h>
50 51
51#include <mach_apic.h> 52#include <mach_apic.h>
52#include <mach_apicdef.h> 53#include <mach_apicdef.h>
@@ -59,6 +60,24 @@
59# error SPURIOUS_APIC_VECTOR definition error 60# error SPURIOUS_APIC_VECTOR definition error
60#endif 61#endif
61 62
63unsigned int num_processors;
64unsigned disabled_cpus __cpuinitdata;
65/* Processor that is doing the boot up */
66unsigned int boot_cpu_physical_apicid = -1U;
67EXPORT_SYMBOL(boot_cpu_physical_apicid);
68unsigned int max_physical_apicid;
69
70/* Bitmask of physically existing CPUs */
71physid_mask_t phys_cpu_present_map;
72
73/*
74 * Map cpu index to physical APIC ID
75 */
76DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
77DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
78EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
79EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
80
62#ifdef CONFIG_X86_32 81#ifdef CONFIG_X86_32
63/* 82/*
64 * Knob to control our willingness to enable the local APIC. 83 * Knob to control our willingness to enable the local APIC.
@@ -894,6 +913,10 @@ void disable_local_APIC(void)
894{ 913{
895 unsigned int value; 914 unsigned int value;
896 915
916 /* APIC hasn't been mapped yet */
917 if (!apic_phys)
918 return;
919
897 clear_local_APIC(); 920 clear_local_APIC();
898 921
899 /* 922 /*
@@ -1125,6 +1148,13 @@ void __cpuinit setup_local_APIC(void)
1125 unsigned int value; 1148 unsigned int value;
1126 int i, j; 1149 int i, j;
1127 1150
1151 if (disable_apic) {
1152#ifdef CONFIG_X86_IO_APIC
1153 disable_ioapic_setup();
1154#endif
1155 return;
1156 }
1157
1128#ifdef CONFIG_X86_32 1158#ifdef CONFIG_X86_32
1129 /* Pound the ESR really hard over the head with a big hammer - mbligh */ 1159 /* Pound the ESR really hard over the head with a big hammer - mbligh */
1130 if (lapic_is_integrated() && esr_disable) { 1160 if (lapic_is_integrated() && esr_disable) {
@@ -1565,11 +1595,11 @@ int apic_version[MAX_APICS];
1565 1595
1566int __init APIC_init_uniprocessor(void) 1596int __init APIC_init_uniprocessor(void)
1567{ 1597{
1568#ifdef CONFIG_X86_64
1569 if (disable_apic) { 1598 if (disable_apic) {
1570 pr_info("Apic disabled\n"); 1599 pr_info("Apic disabled\n");
1571 return -1; 1600 return -1;
1572 } 1601 }
1602#ifdef CONFIG_X86_64
1573 if (!cpu_has_apic) { 1603 if (!cpu_has_apic) {
1574 disable_apic = 1; 1604 disable_apic = 1;
1575 pr_info("Apic disabled by BIOS\n"); 1605 pr_info("Apic disabled by BIOS\n");
@@ -1832,6 +1862,11 @@ void __cpuinit generic_processor_info(int apicid, int version)
1832 num_processors++; 1862 num_processors++;
1833 cpu = cpumask_next_zero(-1, cpu_present_mask); 1863 cpu = cpumask_next_zero(-1, cpu_present_mask);
1834 1864
1865 if (version != apic_version[boot_cpu_physical_apicid])
1866 WARN_ONCE(1,
1867 "ACPI: apic version mismatch, bootcpu: %x cpu %d: %x\n",
1868 apic_version[boot_cpu_physical_apicid], cpu, version);
1869
1835 physid_set(apicid, phys_cpu_present_map); 1870 physid_set(apicid, phys_cpu_present_map);
1836 if (apicid == boot_cpu_physical_apicid) { 1871 if (apicid == boot_cpu_physical_apicid) {
1837 /* 1872 /*
@@ -1867,17 +1902,8 @@ void __cpuinit generic_processor_info(int apicid, int version)
1867#endif 1902#endif
1868 1903
1869#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64) 1904#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
1870 /* are we being called early in kernel startup? */ 1905 early_per_cpu(x86_cpu_to_apicid, cpu) = apicid;
1871 if (early_per_cpu_ptr(x86_cpu_to_apicid)) { 1906 early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1872 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
1873 u16 *bios_cpu_apicid = early_per_cpu_ptr(x86_bios_cpu_apicid);
1874
1875 cpu_to_apicid[cpu] = apicid;
1876 bios_cpu_apicid[cpu] = apicid;
1877 } else {
1878 per_cpu(x86_cpu_to_apicid, cpu) = apicid;
1879 per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1880 }
1881#endif 1907#endif
1882 1908
1883 set_cpu_possible(cpu, true); 1909 set_cpu_possible(cpu, true);
diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c
index 1d41d3f1edbc..8793ab33e2c1 100644
--- a/arch/x86/kernel/asm-offsets_64.c
+++ b/arch/x86/kernel/asm-offsets_64.c
@@ -11,7 +11,6 @@
11#include <linux/hardirq.h> 11#include <linux/hardirq.h>
12#include <linux/suspend.h> 12#include <linux/suspend.h>
13#include <linux/kbuild.h> 13#include <linux/kbuild.h>
14#include <asm/pda.h>
15#include <asm/processor.h> 14#include <asm/processor.h>
16#include <asm/segment.h> 15#include <asm/segment.h>
17#include <asm/thread_info.h> 16#include <asm/thread_info.h>
@@ -48,16 +47,6 @@ int main(void)
48#endif 47#endif
49 BLANK(); 48 BLANK();
50#undef ENTRY 49#undef ENTRY
51#define ENTRY(entry) DEFINE(pda_ ## entry, offsetof(struct x8664_pda, entry))
52 ENTRY(kernelstack);
53 ENTRY(oldrsp);
54 ENTRY(pcurrent);
55 ENTRY(irqcount);
56 ENTRY(cpunumber);
57 ENTRY(irqstackptr);
58 ENTRY(data_offset);
59 BLANK();
60#undef ENTRY
61#ifdef CONFIG_PARAVIRT 50#ifdef CONFIG_PARAVIRT
62 BLANK(); 51 BLANK();
63 OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled); 52 OFFSET(PARAVIRT_enabled, pv_info, paravirt_enabled);
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index 2cf23634b6d9..4e581fdc0a5a 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -143,37 +143,3 @@ void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
143 return; 143 return;
144#endif 144#endif
145} 145}
146
147#ifdef CONFIG_X86_PAT
148void __cpuinit validate_pat_support(struct cpuinfo_x86 *c)
149{
150 if (!cpu_has_pat)
151 pat_disable("PAT not supported by CPU.");
152
153 switch (c->x86_vendor) {
154 case X86_VENDOR_INTEL:
155 /*
156 * There is a known erratum on Pentium III and Core Solo
157 * and Core Duo CPUs.
158 * " Page with PAT set to WC while associated MTRR is UC
159 * may consolidate to UC "
160 * Because of this erratum, it is better to stick with
161 * setting WC in MTRR rather than using PAT on these CPUs.
162 *
163 * Enable PAT WC only on P4, Core 2 or later CPUs.
164 */
165 if (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 15))
166 return;
167
168 pat_disable("PAT WC disabled due to known CPU erratum.");
169 return;
170
171 case X86_VENDOR_AMD:
172 case X86_VENDOR_CENTAUR:
173 case X86_VENDOR_TRANSMETA:
174 return;
175 }
176
177 pat_disable("PAT disabled. Not yet verified on this CPU type.");
178}
179#endif
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index f00258462444..275e2cb43b91 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -28,9 +28,9 @@
28#include <asm/apic.h> 28#include <asm/apic.h>
29#include <mach_apic.h> 29#include <mach_apic.h>
30#include <asm/genapic.h> 30#include <asm/genapic.h>
31#include <asm/uv/uv.h>
31#endif 32#endif
32 33
33#include <asm/pda.h>
34#include <asm/pgtable.h> 34#include <asm/pgtable.h>
35#include <asm/processor.h> 35#include <asm/processor.h>
36#include <asm/desc.h> 36#include <asm/desc.h>
@@ -52,6 +52,15 @@ cpumask_var_t cpu_initialized_mask;
52/* representing cpus for which sibling maps can be computed */ 52/* representing cpus for which sibling maps can be computed */
53cpumask_var_t cpu_sibling_setup_mask; 53cpumask_var_t cpu_sibling_setup_mask;
54 54
55/* correctly size the local cpu masks */
56void __init setup_cpu_local_masks(void)
57{
58 alloc_bootmem_cpumask_var(&cpu_initialized_mask);
59 alloc_bootmem_cpumask_var(&cpu_callin_mask);
60 alloc_bootmem_cpumask_var(&cpu_callout_mask);
61 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
62}
63
55#else /* CONFIG_X86_32 */ 64#else /* CONFIG_X86_32 */
56 65
57cpumask_t cpu_callin_map; 66cpumask_t cpu_callin_map;
@@ -64,23 +73,23 @@ cpumask_t cpu_sibling_setup_map;
64 73
65static struct cpu_dev *this_cpu __cpuinitdata; 74static struct cpu_dev *this_cpu __cpuinitdata;
66 75
76DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
67#ifdef CONFIG_X86_64 77#ifdef CONFIG_X86_64
68/* We need valid kernel segments for data and code in long mode too 78 /*
69 * IRET will check the segment types kkeil 2000/10/28 79 * We need valid kernel segments for data and code in long mode too
70 * Also sysret mandates a special GDT layout 80 * IRET will check the segment types kkeil 2000/10/28
71 */ 81 * Also sysret mandates a special GDT layout
72/* The TLS descriptors are currently at a different place compared to i386. 82 *
73 Hopefully nobody expects them at a fixed place (Wine?) */ 83 * The TLS descriptors are currently at a different place compared to i386.
74DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { 84 * Hopefully nobody expects them at a fixed place (Wine?)
85 */
75 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } }, 86 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
76 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } }, 87 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
77 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } }, 88 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
78 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } }, 89 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
79 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } }, 90 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
80 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } }, 91 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
81} };
82#else 92#else
83DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
84 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, 93 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
85 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, 94 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
86 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, 95 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
@@ -112,9 +121,9 @@ DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
112 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } }, 121 [GDT_ENTRY_APMBIOS_BASE+2] = { { { 0x0000ffff, 0x00409200 } } },
113 122
114 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, 123 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
115 [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, 124 [GDT_ENTRY_PERCPU] = { { { 0x0000ffff, 0x00cf9200 } } },
116} };
117#endif 125#endif
126} };
118EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); 127EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
119 128
120#ifdef CONFIG_X86_32 129#ifdef CONFIG_X86_32
@@ -215,6 +224,49 @@ static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
215#endif 224#endif
216 225
217/* 226/*
227 * Some CPU features depend on higher CPUID levels, which may not always
228 * be available due to CPUID level capping or broken virtualization
229 * software. Add those features to this table to auto-disable them.
230 */
231struct cpuid_dependent_feature {
232 u32 feature;
233 u32 level;
234};
235static const struct cpuid_dependent_feature __cpuinitconst
236cpuid_dependent_features[] = {
237 { X86_FEATURE_MWAIT, 0x00000005 },
238 { X86_FEATURE_DCA, 0x00000009 },
239 { X86_FEATURE_XSAVE, 0x0000000d },
240 { 0, 0 }
241};
242
243static void __cpuinit filter_cpuid_features(struct cpuinfo_x86 *c, bool warn)
244{
245 const struct cpuid_dependent_feature *df;
246 for (df = cpuid_dependent_features; df->feature; df++) {
247 /*
248 * Note: cpuid_level is set to -1 if unavailable, but
249 * extended_extended_level is set to 0 if unavailable
250 * and the legitimate extended levels are all negative
251 * when signed; hence the weird messing around with
252 * signs here...
253 */
254 if (cpu_has(c, df->feature) &&
255 ((s32)df->feature < 0 ?
256 (u32)df->feature > (u32)c->extended_cpuid_level :
257 (s32)df->feature > (s32)c->cpuid_level)) {
258 clear_cpu_cap(c, df->feature);
259 if (warn)
260 printk(KERN_WARNING
261 "CPU: CPU feature %s disabled "
262 "due to lack of CPUID level 0x%x\n",
263 x86_cap_flags[df->feature],
264 df->level);
265 }
266 }
267}
268
269/*
218 * Naming convention should be: <Name> [(<Codename>)] 270 * Naming convention should be: <Name> [(<Codename>)]
219 * This table only is used unless init_<vendor>() below doesn't set it; 271 * This table only is used unless init_<vendor>() below doesn't set it;
220 * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used 272 * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
@@ -249,12 +301,17 @@ __u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
249void switch_to_new_gdt(void) 301void switch_to_new_gdt(void)
250{ 302{
251 struct desc_ptr gdt_descr; 303 struct desc_ptr gdt_descr;
304 int cpu = smp_processor_id();
252 305
253 gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id()); 306 gdt_descr.address = (long)get_cpu_gdt_table(cpu);
254 gdt_descr.size = GDT_SIZE - 1; 307 gdt_descr.size = GDT_SIZE - 1;
255 load_gdt(&gdt_descr); 308 load_gdt(&gdt_descr);
309 /* Reload the per-cpu base */
256#ifdef CONFIG_X86_32 310#ifdef CONFIG_X86_32
257 asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory"); 311 loadsegment(fs, __KERNEL_PERCPU);
312#else
313 loadsegment(gs, 0);
314 wrmsrl(MSR_GS_BASE, (unsigned long)per_cpu(irq_stack_union.gs_base, cpu));
258#endif 315#endif
259} 316}
260 317
@@ -572,11 +629,10 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
572 if (this_cpu->c_early_init) 629 if (this_cpu->c_early_init)
573 this_cpu->c_early_init(c); 630 this_cpu->c_early_init(c);
574 631
575 validate_pat_support(c);
576
577#ifdef CONFIG_SMP 632#ifdef CONFIG_SMP
578 c->cpu_index = boot_cpu_id; 633 c->cpu_index = boot_cpu_id;
579#endif 634#endif
635 filter_cpuid_features(c, false);
580} 636}
581 637
582void __init early_cpu_init(void) 638void __init early_cpu_init(void)
@@ -710,6 +766,9 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
710 * we do "generic changes." 766 * we do "generic changes."
711 */ 767 */
712 768
769 /* Filter out anything that depends on CPUID levels we don't have */
770 filter_cpuid_features(c, true);
771
713 /* If the model name is still unset, do table lookup. */ 772 /* If the model name is still unset, do table lookup. */
714 if (!c->x86_model_id[0]) { 773 if (!c->x86_model_id[0]) {
715 char *p; 774 char *p;
@@ -879,54 +938,26 @@ static __init int setup_disablecpuid(char *arg)
879__setup("clearcpuid=", setup_disablecpuid); 938__setup("clearcpuid=", setup_disablecpuid);
880 939
881#ifdef CONFIG_X86_64 940#ifdef CONFIG_X86_64
882struct x8664_pda **_cpu_pda __read_mostly;
883EXPORT_SYMBOL(_cpu_pda);
884
885struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table }; 941struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
886 942
887static char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss; 943DEFINE_PER_CPU_FIRST(union irq_stack_union,
888 944 irq_stack_union) __aligned(PAGE_SIZE);
889void __cpuinit pda_init(int cpu) 945#ifdef CONFIG_SMP
890{ 946DEFINE_PER_CPU(char *, irq_stack_ptr); /* will be set during per cpu init */
891 struct x8664_pda *pda = cpu_pda(cpu); 947#else
948DEFINE_PER_CPU(char *, irq_stack_ptr) =
949 per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE - 64;
950#endif
892 951
893 /* Setup up data that may be needed in __get_free_pages early */ 952DEFINE_PER_CPU(unsigned long, kernel_stack) =
894 loadsegment(fs, 0); 953 (unsigned long)&init_thread_union - KERNEL_STACK_OFFSET + THREAD_SIZE;
895 loadsegment(gs, 0); 954EXPORT_PER_CPU_SYMBOL(kernel_stack);
896 /* Memory clobbers used to order PDA accessed */
897 mb();
898 wrmsrl(MSR_GS_BASE, pda);
899 mb();
900
901 pda->cpunumber = cpu;
902 pda->irqcount = -1;
903 pda->kernelstack = (unsigned long)stack_thread_info() -
904 PDA_STACKOFFSET + THREAD_SIZE;
905 pda->active_mm = &init_mm;
906 pda->mmu_state = 0;
907
908 if (cpu == 0) {
909 /* others are initialized in smpboot.c */
910 pda->pcurrent = &init_task;
911 pda->irqstackptr = boot_cpu_stack;
912 pda->irqstackptr += IRQSTACKSIZE - 64;
913 } else {
914 if (!pda->irqstackptr) {
915 pda->irqstackptr = (char *)
916 __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
917 if (!pda->irqstackptr)
918 panic("cannot allocate irqstack for cpu %d",
919 cpu);
920 pda->irqstackptr += IRQSTACKSIZE - 64;
921 }
922 955
923 if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE) 956DEFINE_PER_CPU(unsigned int, irq_count) = -1;
924 pda->nodenumber = cpu_to_node(cpu);
925 }
926}
927 957
928static char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + 958static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
929 DEBUG_STKSZ] __page_aligned_bss; 959 [(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ])
960 __aligned(PAGE_SIZE);
930 961
931extern asmlinkage void ignore_sysret(void); 962extern asmlinkage void ignore_sysret(void);
932 963
@@ -984,15 +1015,14 @@ void __cpuinit cpu_init(void)
984 struct tss_struct *t = &per_cpu(init_tss, cpu); 1015 struct tss_struct *t = &per_cpu(init_tss, cpu);
985 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu); 1016 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
986 unsigned long v; 1017 unsigned long v;
987 char *estacks = NULL;
988 struct task_struct *me; 1018 struct task_struct *me;
989 int i; 1019 int i;
990 1020
991 /* CPU 0 is initialised in head64.c */ 1021#ifdef CONFIG_NUMA
992 if (cpu != 0) 1022 if (cpu != 0 && percpu_read(node_number) == 0 &&
993 pda_init(cpu); 1023 cpu_to_node(cpu) != NUMA_NO_NODE)
994 else 1024 percpu_write(node_number, cpu_to_node(cpu));
995 estacks = boot_exception_stacks; 1025#endif
996 1026
997 me = current; 1027 me = current;
998 1028
@@ -1009,6 +1039,8 @@ void __cpuinit cpu_init(void)
1009 */ 1039 */
1010 1040
1011 switch_to_new_gdt(); 1041 switch_to_new_gdt();
1042 loadsegment(fs, 0);
1043
1012 load_idt((const struct desc_ptr *)&idt_descr); 1044 load_idt((const struct desc_ptr *)&idt_descr);
1013 1045
1014 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8); 1046 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
@@ -1026,18 +1058,13 @@ void __cpuinit cpu_init(void)
1026 * set up and load the per-CPU TSS 1058 * set up and load the per-CPU TSS
1027 */ 1059 */
1028 if (!orig_ist->ist[0]) { 1060 if (!orig_ist->ist[0]) {
1029 static const unsigned int order[N_EXCEPTION_STACKS] = { 1061 static const unsigned int sizes[N_EXCEPTION_STACKS] = {
1030 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER, 1062 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
1031 [DEBUG_STACK - 1] = DEBUG_STACK_ORDER 1063 [DEBUG_STACK - 1] = DEBUG_STKSZ
1032 }; 1064 };
1065 char *estacks = per_cpu(exception_stacks, cpu);
1033 for (v = 0; v < N_EXCEPTION_STACKS; v++) { 1066 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
1034 if (cpu) { 1067 estacks += sizes[v];
1035 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
1036 if (!estacks)
1037 panic("Cannot allocate exception "
1038 "stack %ld %d\n", v, cpu);
1039 }
1040 estacks += PAGE_SIZE << order[v];
1041 orig_ist->ist[v] = t->x86_tss.ist[v] = 1068 orig_ist->ist[v] = t->x86_tss.ist[v] =
1042 (unsigned long)estacks; 1069 (unsigned long)estacks;
1043 } 1070 }
@@ -1071,22 +1098,19 @@ void __cpuinit cpu_init(void)
1071 */ 1098 */
1072 if (kgdb_connected && arch_kgdb_ops.correct_hw_break) 1099 if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
1073 arch_kgdb_ops.correct_hw_break(); 1100 arch_kgdb_ops.correct_hw_break();
1074 else { 1101 else
1075#endif 1102#endif
1076 /* 1103 {
1077 * Clear all 6 debug registers: 1104 /*
1078 */ 1105 * Clear all 6 debug registers:
1079 1106 */
1080 set_debugreg(0UL, 0); 1107 set_debugreg(0UL, 0);
1081 set_debugreg(0UL, 1); 1108 set_debugreg(0UL, 1);
1082 set_debugreg(0UL, 2); 1109 set_debugreg(0UL, 2);
1083 set_debugreg(0UL, 3); 1110 set_debugreg(0UL, 3);
1084 set_debugreg(0UL, 6); 1111 set_debugreg(0UL, 6);
1085 set_debugreg(0UL, 7); 1112 set_debugreg(0UL, 7);
1086#ifdef CONFIG_KGDB
1087 /* If the kgdb is connected no debug regs should be altered. */
1088 } 1113 }
1089#endif
1090 1114
1091 fpu_init(); 1115 fpu_init();
1092 1116
diff --git a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
index 06fcd8f9323c..4b1c319d30c3 100644
--- a/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
+++ b/arch/x86/kernel/cpu/cpufreq/acpi-cpufreq.c
@@ -145,7 +145,7 @@ typedef union {
145 145
146struct drv_cmd { 146struct drv_cmd {
147 unsigned int type; 147 unsigned int type;
148 cpumask_var_t mask; 148 const struct cpumask *mask;
149 drv_addr_union addr; 149 drv_addr_union addr;
150 u32 val; 150 u32 val;
151}; 151};
@@ -231,15 +231,9 @@ static u32 get_cur_val(const struct cpumask *mask)
231 return 0; 231 return 0;
232 } 232 }
233 233
234 if (unlikely(!alloc_cpumask_var(&cmd.mask, GFP_KERNEL))) 234 cmd.mask = mask;
235 return 0;
236
237 cpumask_copy(cmd.mask, mask);
238
239 drv_read(&cmd); 235 drv_read(&cmd);
240 236
241 free_cpumask_var(cmd.mask);
242
243 dprintk("get_cur_val = %u\n", cmd.val); 237 dprintk("get_cur_val = %u\n", cmd.val);
244 238
245 return cmd.val; 239 return cmd.val;
@@ -369,7 +363,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
369 return freq; 363 return freq;
370} 364}
371 365
372static unsigned int check_freqs(const cpumask_t *mask, unsigned int freq, 366static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq,
373 struct acpi_cpufreq_data *data) 367 struct acpi_cpufreq_data *data)
374{ 368{
375 unsigned int cur_freq; 369 unsigned int cur_freq;
@@ -404,9 +398,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
404 return -ENODEV; 398 return -ENODEV;
405 } 399 }
406 400
407 if (unlikely(!alloc_cpumask_var(&cmd.mask, GFP_KERNEL)))
408 return -ENOMEM;
409
410 perf = data->acpi_data; 401 perf = data->acpi_data;
411 result = cpufreq_frequency_table_target(policy, 402 result = cpufreq_frequency_table_target(policy,
412 data->freq_table, 403 data->freq_table,
@@ -451,9 +442,9 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
451 442
452 /* cpufreq holds the hotplug lock, so we are safe from here on */ 443 /* cpufreq holds the hotplug lock, so we are safe from here on */
453 if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY) 444 if (policy->shared_type != CPUFREQ_SHARED_TYPE_ANY)
454 cpumask_and(cmd.mask, cpu_online_mask, policy->cpus); 445 cmd.mask = policy->cpus;
455 else 446 else
456 cpumask_copy(cmd.mask, cpumask_of(policy->cpu)); 447 cmd.mask = cpumask_of(policy->cpu);
457 448
458 freqs.old = perf->states[perf->state].core_frequency * 1000; 449 freqs.old = perf->states[perf->state].core_frequency * 1000;
459 freqs.new = data->freq_table[next_state].frequency; 450 freqs.new = data->freq_table[next_state].frequency;
@@ -480,7 +471,6 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
480 perf->state = next_perf_state; 471 perf->state = next_perf_state;
481 472
482out: 473out:
483 free_cpumask_var(cmd.mask);
484 return result; 474 return result;
485} 475}
486 476
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 8ea6929e974c..5deefae9064d 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -29,6 +29,19 @@
29 29
30static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) 30static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
31{ 31{
32 /* Unmask CPUID levels if masked: */
33 if (c->x86 > 6 || (c->x86 == 6 && c->x86_model >= 0xd)) {
34 u64 misc_enable;
35
36 rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
37
38 if (misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID) {
39 misc_enable &= ~MSR_IA32_MISC_ENABLE_LIMIT_CPUID;
40 wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable);
41 c->cpuid_level = cpuid_eax(0);
42 }
43 }
44
32 if ((c->x86 == 0xf && c->x86_model >= 0x03) || 45 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
33 (c->x86 == 0x6 && c->x86_model >= 0x0e)) 46 (c->x86 == 0x6 && c->x86_model >= 0x0e))
34 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 47 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
@@ -50,6 +63,18 @@ static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
50 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); 63 set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC);
51 } 64 }
52 65
66 /*
67 * There is a known erratum on Pentium III and Core Solo
68 * and Core Duo CPUs.
69 * " Page with PAT set to WC while associated MTRR is UC
70 * may consolidate to UC "
71 * Because of this erratum, it is better to stick with
72 * setting WC in MTRR rather than using PAT on these CPUs.
73 *
74 * Enable PAT WC only on P4, Core 2 or later CPUs.
75 */
76 if (c->x86 == 6 && c->x86_model < 15)
77 clear_cpu_cap(c, X86_FEATURE_PAT);
53} 78}
54 79
55#ifdef CONFIG_X86_32 80#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 48533d77be78..58527a9fc404 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -132,7 +132,16 @@ struct _cpuid4_info {
132 union _cpuid4_leaf_ecx ecx; 132 union _cpuid4_leaf_ecx ecx;
133 unsigned long size; 133 unsigned long size;
134 unsigned long can_disable; 134 unsigned long can_disable;
135 cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ 135 DECLARE_BITMAP(shared_cpu_map, NR_CPUS);
136};
137
138/* subset of above _cpuid4_info w/o shared_cpu_map */
139struct _cpuid4_info_regs {
140 union _cpuid4_leaf_eax eax;
141 union _cpuid4_leaf_ebx ebx;
142 union _cpuid4_leaf_ecx ecx;
143 unsigned long size;
144 unsigned long can_disable;
136}; 145};
137 146
138#ifdef CONFIG_PCI 147#ifdef CONFIG_PCI
@@ -263,7 +272,7 @@ amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
263} 272}
264 273
265static void __cpuinit 274static void __cpuinit
266amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf) 275amd_check_l3_disable(int index, struct _cpuid4_info_regs *this_leaf)
267{ 276{
268 if (index < 3) 277 if (index < 3)
269 return; 278 return;
@@ -271,7 +280,8 @@ amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
271} 280}
272 281
273static int 282static int
274__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) 283__cpuinit cpuid4_cache_lookup_regs(int index,
284 struct _cpuid4_info_regs *this_leaf)
275{ 285{
276 union _cpuid4_leaf_eax eax; 286 union _cpuid4_leaf_eax eax;
277 union _cpuid4_leaf_ebx ebx; 287 union _cpuid4_leaf_ebx ebx;
@@ -299,6 +309,15 @@ __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
299 return 0; 309 return 0;
300} 310}
301 311
312static int
313__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
314{
315 struct _cpuid4_info_regs *leaf_regs =
316 (struct _cpuid4_info_regs *)this_leaf;
317
318 return cpuid4_cache_lookup_regs(index, leaf_regs);
319}
320
302static int __cpuinit find_num_cache_leaves(void) 321static int __cpuinit find_num_cache_leaves(void)
303{ 322{
304 unsigned int eax, ebx, ecx, edx; 323 unsigned int eax, ebx, ecx, edx;
@@ -338,11 +357,10 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
338 * parameters cpuid leaf to find the cache details 357 * parameters cpuid leaf to find the cache details
339 */ 358 */
340 for (i = 0; i < num_cache_leaves; i++) { 359 for (i = 0; i < num_cache_leaves; i++) {
341 struct _cpuid4_info this_leaf; 360 struct _cpuid4_info_regs this_leaf;
342
343 int retval; 361 int retval;
344 362
345 retval = cpuid4_cache_lookup(i, &this_leaf); 363 retval = cpuid4_cache_lookup_regs(i, &this_leaf);
346 if (retval >= 0) { 364 if (retval >= 0) {
347 switch(this_leaf.eax.split.level) { 365 switch(this_leaf.eax.split.level) {
348 case 1: 366 case 1:
@@ -491,17 +509,20 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
491 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing; 509 num_threads_sharing = 1 + this_leaf->eax.split.num_threads_sharing;
492 510
493 if (num_threads_sharing == 1) 511 if (num_threads_sharing == 1)
494 cpu_set(cpu, this_leaf->shared_cpu_map); 512 cpumask_set_cpu(cpu, to_cpumask(this_leaf->shared_cpu_map));
495 else { 513 else {
496 index_msb = get_count_order(num_threads_sharing); 514 index_msb = get_count_order(num_threads_sharing);
497 515
498 for_each_online_cpu(i) { 516 for_each_online_cpu(i) {
499 if (cpu_data(i).apicid >> index_msb == 517 if (cpu_data(i).apicid >> index_msb ==
500 c->apicid >> index_msb) { 518 c->apicid >> index_msb) {
501 cpu_set(i, this_leaf->shared_cpu_map); 519 cpumask_set_cpu(i,
520 to_cpumask(this_leaf->shared_cpu_map));
502 if (i != cpu && per_cpu(cpuid4_info, i)) { 521 if (i != cpu && per_cpu(cpuid4_info, i)) {
503 sibling_leaf = CPUID4_INFO_IDX(i, index); 522 sibling_leaf =
504 cpu_set(cpu, sibling_leaf->shared_cpu_map); 523 CPUID4_INFO_IDX(i, index);
524 cpumask_set_cpu(cpu, to_cpumask(
525 sibling_leaf->shared_cpu_map));
505 } 526 }
506 } 527 }
507 } 528 }
@@ -513,9 +534,10 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
513 int sibling; 534 int sibling;
514 535
515 this_leaf = CPUID4_INFO_IDX(cpu, index); 536 this_leaf = CPUID4_INFO_IDX(cpu, index);
516 for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) { 537 for_each_cpu(sibling, to_cpumask(this_leaf->shared_cpu_map)) {
517 sibling_leaf = CPUID4_INFO_IDX(sibling, index); 538 sibling_leaf = CPUID4_INFO_IDX(sibling, index);
518 cpu_clear(cpu, sibling_leaf->shared_cpu_map); 539 cpumask_clear_cpu(cpu,
540 to_cpumask(sibling_leaf->shared_cpu_map));
519 } 541 }
520} 542}
521#else 543#else
@@ -620,8 +642,9 @@ static ssize_t show_shared_cpu_map_func(struct _cpuid4_info *this_leaf,
620 int n = 0; 642 int n = 0;
621 643
622 if (len > 1) { 644 if (len > 1) {
623 cpumask_t *mask = &this_leaf->shared_cpu_map; 645 const struct cpumask *mask;
624 646
647 mask = to_cpumask(this_leaf->shared_cpu_map);
625 n = type? 648 n = type?
626 cpulist_scnprintf(buf, len-2, mask) : 649 cpulist_scnprintf(buf, len-2, mask) :
627 cpumask_scnprintf(buf, len-2, mask); 650 cpumask_scnprintf(buf, len-2, mask);
@@ -684,7 +707,8 @@ static struct pci_dev *get_k8_northbridge(int node)
684 707
685static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf) 708static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
686{ 709{
687 int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); 710 const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
711 int node = cpu_to_node(cpumask_first(mask));
688 struct pci_dev *dev = NULL; 712 struct pci_dev *dev = NULL;
689 ssize_t ret = 0; 713 ssize_t ret = 0;
690 int i; 714 int i;
@@ -718,7 +742,8 @@ static ssize_t
718store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf, 742store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
719 size_t count) 743 size_t count)
720{ 744{
721 int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map)); 745 const struct cpumask *mask = to_cpumask(this_leaf->shared_cpu_map);
746 int node = cpu_to_node(cpumask_first(mask));
722 struct pci_dev *dev = NULL; 747 struct pci_dev *dev = NULL;
723 unsigned int ret, index, val; 748 unsigned int ret, index, val;
724 749
@@ -863,7 +888,7 @@ err_out:
863 return -ENOMEM; 888 return -ENOMEM;
864} 889}
865 890
866static cpumask_t cache_dev_map = CPU_MASK_NONE; 891static DECLARE_BITMAP(cache_dev_map, NR_CPUS);
867 892
868/* Add/Remove cache interface for CPU device */ 893/* Add/Remove cache interface for CPU device */
869static int __cpuinit cache_add_dev(struct sys_device * sys_dev) 894static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
@@ -903,7 +928,7 @@ static int __cpuinit cache_add_dev(struct sys_device * sys_dev)
903 } 928 }
904 kobject_uevent(&(this_object->kobj), KOBJ_ADD); 929 kobject_uevent(&(this_object->kobj), KOBJ_ADD);
905 } 930 }
906 cpu_set(cpu, cache_dev_map); 931 cpumask_set_cpu(cpu, to_cpumask(cache_dev_map));
907 932
908 kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD); 933 kobject_uevent(per_cpu(cache_kobject, cpu), KOBJ_ADD);
909 return 0; 934 return 0;
@@ -916,9 +941,9 @@ static void __cpuinit cache_remove_dev(struct sys_device * sys_dev)
916 941
917 if (per_cpu(cpuid4_info, cpu) == NULL) 942 if (per_cpu(cpuid4_info, cpu) == NULL)
918 return; 943 return;
919 if (!cpu_isset(cpu, cache_dev_map)) 944 if (!cpumask_test_cpu(cpu, to_cpumask(cache_dev_map)))
920 return; 945 return;
921 cpu_clear(cpu, cache_dev_map); 946 cpumask_clear_cpu(cpu, to_cpumask(cache_dev_map));
922 947
923 for (i = 0; i < num_cache_leaves; i++) 948 for (i = 0; i < num_cache_leaves; i++)
924 kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj)); 949 kobject_put(&(INDEX_KOBJECT_PTR(cpu,i)->kobj));
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
index 8ae8c4ff094d..4772e91e8246 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd_64.c
@@ -67,7 +67,7 @@ static struct threshold_block threshold_defaults = {
67struct threshold_bank { 67struct threshold_bank {
68 struct kobject *kobj; 68 struct kobject *kobj;
69 struct threshold_block *blocks; 69 struct threshold_block *blocks;
70 cpumask_t cpus; 70 cpumask_var_t cpus;
71}; 71};
72static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]); 72static DEFINE_PER_CPU(struct threshold_bank *, threshold_banks[NR_BANKS]);
73 73
@@ -481,7 +481,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
481 481
482#ifdef CONFIG_SMP 482#ifdef CONFIG_SMP
483 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ 483 if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */
484 i = first_cpu(per_cpu(cpu_core_map, cpu)); 484 i = cpumask_first(&per_cpu(cpu_core_map, cpu));
485 485
486 /* first core not up yet */ 486 /* first core not up yet */
487 if (cpu_data(i).cpu_core_id) 487 if (cpu_data(i).cpu_core_id)
@@ -501,7 +501,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
501 if (err) 501 if (err)
502 goto out; 502 goto out;
503 503
504 b->cpus = per_cpu(cpu_core_map, cpu); 504 cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu));
505 per_cpu(threshold_banks, cpu)[bank] = b; 505 per_cpu(threshold_banks, cpu)[bank] = b;
506 goto out; 506 goto out;
507 } 507 }
@@ -512,15 +512,20 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
512 err = -ENOMEM; 512 err = -ENOMEM;
513 goto out; 513 goto out;
514 } 514 }
515 if (!alloc_cpumask_var(&b->cpus, GFP_KERNEL)) {
516 kfree(b);
517 err = -ENOMEM;
518 goto out;
519 }
515 520
516 b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj); 521 b->kobj = kobject_create_and_add(name, &per_cpu(device_mce, cpu).kobj);
517 if (!b->kobj) 522 if (!b->kobj)
518 goto out_free; 523 goto out_free;
519 524
520#ifndef CONFIG_SMP 525#ifndef CONFIG_SMP
521 b->cpus = CPU_MASK_ALL; 526 cpumask_setall(b->cpus);
522#else 527#else
523 b->cpus = per_cpu(cpu_core_map, cpu); 528 cpumask_copy(b->cpus, &per_cpu(cpu_core_map, cpu));
524#endif 529#endif
525 530
526 per_cpu(threshold_banks, cpu)[bank] = b; 531 per_cpu(threshold_banks, cpu)[bank] = b;
@@ -529,7 +534,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
529 if (err) 534 if (err)
530 goto out_free; 535 goto out_free;
531 536
532 for_each_cpu_mask_nr(i, b->cpus) { 537 for_each_cpu(i, b->cpus) {
533 if (i == cpu) 538 if (i == cpu)
534 continue; 539 continue;
535 540
@@ -545,6 +550,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank)
545 550
546out_free: 551out_free:
547 per_cpu(threshold_banks, cpu)[bank] = NULL; 552 per_cpu(threshold_banks, cpu)[bank] = NULL;
553 free_cpumask_var(b->cpus);
548 kfree(b); 554 kfree(b);
549out: 555out:
550 return err; 556 return err;
@@ -619,7 +625,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
619#endif 625#endif
620 626
621 /* remove all sibling symlinks before unregistering */ 627 /* remove all sibling symlinks before unregistering */
622 for_each_cpu_mask_nr(i, b->cpus) { 628 for_each_cpu(i, b->cpus) {
623 if (i == cpu) 629 if (i == cpu)
624 continue; 630 continue;
625 631
@@ -632,6 +638,7 @@ static void threshold_remove_bank(unsigned int cpu, int bank)
632free_out: 638free_out:
633 kobject_del(b->kobj); 639 kobject_del(b->kobj);
634 kobject_put(b->kobj); 640 kobject_put(b->kobj);
641 free_cpumask_var(b->cpus);
635 kfree(b); 642 kfree(b);
636 per_cpu(threshold_banks, cpu)[bank] = NULL; 643 per_cpu(threshold_banks, cpu)[bank] = NULL;
637} 644}
diff --git a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
index 4b48f251fd39..5e8c79e748a6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_intel_64.c
@@ -7,6 +7,7 @@
7#include <linux/interrupt.h> 7#include <linux/interrupt.h>
8#include <linux/percpu.h> 8#include <linux/percpu.h>
9#include <asm/processor.h> 9#include <asm/processor.h>
10#include <asm/apic.h>
10#include <asm/msr.h> 11#include <asm/msr.h>
11#include <asm/mce.h> 12#include <asm/mce.h>
12#include <asm/hw_irq.h> 13#include <asm/hw_irq.h>
diff --git a/arch/x86/kernel/cpu/mtrr/generic.c b/arch/x86/kernel/cpu/mtrr/generic.c
index b59ddcc88cd8..0c0a455fe95c 100644
--- a/arch/x86/kernel/cpu/mtrr/generic.c
+++ b/arch/x86/kernel/cpu/mtrr/generic.c
@@ -33,11 +33,13 @@ u64 mtrr_tom2;
33struct mtrr_state_type mtrr_state = {}; 33struct mtrr_state_type mtrr_state = {};
34EXPORT_SYMBOL_GPL(mtrr_state); 34EXPORT_SYMBOL_GPL(mtrr_state);
35 35
36#undef MODULE_PARAM_PREFIX 36static int __initdata mtrr_show;
37#define MODULE_PARAM_PREFIX "mtrr." 37static int __init mtrr_debug(char *opt)
38 38{
39static int mtrr_show; 39 mtrr_show = 1;
40module_param_named(show, mtrr_show, bool, 0); 40 return 0;
41}
42early_param("mtrr.show", mtrr_debug);
41 43
42/* 44/*
43 * Returns the effective MTRR type for the region 45 * Returns the effective MTRR type for the region
diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c
index c302d0707048..d35db5993fd6 100644
--- a/arch/x86/kernel/dumpstack_64.c
+++ b/arch/x86/kernel/dumpstack_64.c
@@ -106,7 +106,8 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
106 const struct stacktrace_ops *ops, void *data) 106 const struct stacktrace_ops *ops, void *data)
107{ 107{
108 const unsigned cpu = get_cpu(); 108 const unsigned cpu = get_cpu();
109 unsigned long *irqstack_end = (unsigned long *)cpu_pda(cpu)->irqstackptr; 109 unsigned long *irq_stack_end =
110 (unsigned long *)per_cpu(irq_stack_ptr, cpu);
110 unsigned used = 0; 111 unsigned used = 0;
111 struct thread_info *tinfo; 112 struct thread_info *tinfo;
112 int graph = 0; 113 int graph = 0;
@@ -160,23 +161,23 @@ void dump_trace(struct task_struct *task, struct pt_regs *regs,
160 stack = (unsigned long *) estack_end[-2]; 161 stack = (unsigned long *) estack_end[-2];
161 continue; 162 continue;
162 } 163 }
163 if (irqstack_end) { 164 if (irq_stack_end) {
164 unsigned long *irqstack; 165 unsigned long *irq_stack;
165 irqstack = irqstack_end - 166 irq_stack = irq_stack_end -
166 (IRQSTACKSIZE - 64) / sizeof(*irqstack); 167 (IRQ_STACK_SIZE - 64) / sizeof(*irq_stack);
167 168
168 if (stack >= irqstack && stack < irqstack_end) { 169 if (stack >= irq_stack && stack < irq_stack_end) {
169 if (ops->stack(data, "IRQ") < 0) 170 if (ops->stack(data, "IRQ") < 0)
170 break; 171 break;
171 bp = print_context_stack(tinfo, stack, bp, 172 bp = print_context_stack(tinfo, stack, bp,
172 ops, data, irqstack_end, &graph); 173 ops, data, irq_stack_end, &graph);
173 /* 174 /*
174 * We link to the next stack (which would be 175 * We link to the next stack (which would be
175 * the process stack normally) the last 176 * the process stack normally) the last
176 * pointer (index -1 to end) in the IRQ stack: 177 * pointer (index -1 to end) in the IRQ stack:
177 */ 178 */
178 stack = (unsigned long *) (irqstack_end[-1]); 179 stack = (unsigned long *) (irq_stack_end[-1]);
179 irqstack_end = NULL; 180 irq_stack_end = NULL;
180 ops->stack(data, "EOI"); 181 ops->stack(data, "EOI");
181 continue; 182 continue;
182 } 183 }
@@ -199,10 +200,10 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
199 unsigned long *stack; 200 unsigned long *stack;
200 int i; 201 int i;
201 const int cpu = smp_processor_id(); 202 const int cpu = smp_processor_id();
202 unsigned long *irqstack_end = 203 unsigned long *irq_stack_end =
203 (unsigned long *) (cpu_pda(cpu)->irqstackptr); 204 (unsigned long *)(per_cpu(irq_stack_ptr, cpu));
204 unsigned long *irqstack = 205 unsigned long *irq_stack =
205 (unsigned long *) (cpu_pda(cpu)->irqstackptr - IRQSTACKSIZE); 206 (unsigned long *)(per_cpu(irq_stack_ptr, cpu) - IRQ_STACK_SIZE);
206 207
207 /* 208 /*
208 * debugging aid: "show_stack(NULL, NULL);" prints the 209 * debugging aid: "show_stack(NULL, NULL);" prints the
@@ -218,9 +219,9 @@ show_stack_log_lvl(struct task_struct *task, struct pt_regs *regs,
218 219
219 stack = sp; 220 stack = sp;
220 for (i = 0; i < kstack_depth_to_print; i++) { 221 for (i = 0; i < kstack_depth_to_print; i++) {
221 if (stack >= irqstack && stack <= irqstack_end) { 222 if (stack >= irq_stack && stack <= irq_stack_end) {
222 if (stack == irqstack_end) { 223 if (stack == irq_stack_end) {
223 stack = (unsigned long *) (irqstack_end[-1]); 224 stack = (unsigned long *) (irq_stack_end[-1]);
224 printk(" <EOI> "); 225 printk(" <EOI> ");
225 } 226 }
226 } else { 227 } else {
@@ -241,7 +242,7 @@ void show_registers(struct pt_regs *regs)
241 int i; 242 int i;
242 unsigned long sp; 243 unsigned long sp;
243 const int cpu = smp_processor_id(); 244 const int cpu = smp_processor_id();
244 struct task_struct *cur = cpu_pda(cpu)->pcurrent; 245 struct task_struct *cur = current;
245 246
246 sp = regs->sp; 247 sp = regs->sp;
247 printk("CPU %d ", cpu); 248 printk("CPU %d ", cpu);
diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c
index 1119d247fe11..b205272ad394 100644
--- a/arch/x86/kernel/efi.c
+++ b/arch/x86/kernel/efi.c
@@ -366,10 +366,12 @@ void __init efi_init(void)
366 SMBIOS_TABLE_GUID)) { 366 SMBIOS_TABLE_GUID)) {
367 efi.smbios = config_tables[i].table; 367 efi.smbios = config_tables[i].table;
368 printk(" SMBIOS=0x%lx ", config_tables[i].table); 368 printk(" SMBIOS=0x%lx ", config_tables[i].table);
369#ifdef CONFIG_X86_UV
369 } else if (!efi_guidcmp(config_tables[i].guid, 370 } else if (!efi_guidcmp(config_tables[i].guid,
370 UV_SYSTEM_TABLE_GUID)) { 371 UV_SYSTEM_TABLE_GUID)) {
371 efi.uv_systab = config_tables[i].table; 372 efi.uv_systab = config_tables[i].table;
372 printk(" UVsystab=0x%lx ", config_tables[i].table); 373 printk(" UVsystab=0x%lx ", config_tables[i].table);
374#endif
373 } else if (!efi_guidcmp(config_tables[i].guid, 375 } else if (!efi_guidcmp(config_tables[i].guid,
374 HCDP_TABLE_GUID)) { 376 HCDP_TABLE_GUID)) {
375 efi.hcdp = config_tables[i].table; 377 efi.hcdp = config_tables[i].table;
diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c
index 652c5287215f..a4ee29127fdf 100644
--- a/arch/x86/kernel/efi_64.c
+++ b/arch/x86/kernel/efi_64.c
@@ -36,6 +36,7 @@
36#include <asm/proto.h> 36#include <asm/proto.h>
37#include <asm/efi.h> 37#include <asm/efi.h>
38#include <asm/cacheflush.h> 38#include <asm/cacheflush.h>
39#include <asm/fixmap.h>
39 40
40static pgd_t save_pgd __initdata; 41static pgd_t save_pgd __initdata;
41static unsigned long efi_flags __initdata; 42static unsigned long efi_flags __initdata;
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index d6f0490a7391..a0b91aac72a1 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -672,7 +672,7 @@ common_interrupt:
672ENDPROC(common_interrupt) 672ENDPROC(common_interrupt)
673 CFI_ENDPROC 673 CFI_ENDPROC
674 674
675#define BUILD_INTERRUPT(name, nr) \ 675#define BUILD_INTERRUPT3(name, nr, fn) \
676ENTRY(name) \ 676ENTRY(name) \
677 RING0_INT_FRAME; \ 677 RING0_INT_FRAME; \
678 pushl $~(nr); \ 678 pushl $~(nr); \
@@ -680,11 +680,13 @@ ENTRY(name) \
680 SAVE_ALL; \ 680 SAVE_ALL; \
681 TRACE_IRQS_OFF \ 681 TRACE_IRQS_OFF \
682 movl %esp,%eax; \ 682 movl %esp,%eax; \
683 call smp_##name; \ 683 call fn; \
684 jmp ret_from_intr; \ 684 jmp ret_from_intr; \
685 CFI_ENDPROC; \ 685 CFI_ENDPROC; \
686ENDPROC(name) 686ENDPROC(name)
687 687
688#define BUILD_INTERRUPT(name, nr) BUILD_INTERRUPT3(name, nr, smp_##name)
689
688/* The include is where all of the SMP etc. interrupts come from */ 690/* The include is where all of the SMP etc. interrupts come from */
689#include "entry_arch.h" 691#include "entry_arch.h"
690 692
@@ -1203,7 +1205,6 @@ nmi_stack_correct:
1203 pushl %eax 1205 pushl %eax
1204 CFI_ADJUST_CFA_OFFSET 4 1206 CFI_ADJUST_CFA_OFFSET 4
1205 SAVE_ALL 1207 SAVE_ALL
1206 TRACE_IRQS_OFF
1207 xorl %edx,%edx # zero error code 1208 xorl %edx,%edx # zero error code
1208 movl %esp,%eax # pt_regs pointer 1209 movl %esp,%eax # pt_regs pointer
1209 call do_nmi 1210 call do_nmi
@@ -1244,7 +1245,6 @@ nmi_espfix_stack:
1244 pushl %eax 1245 pushl %eax
1245 CFI_ADJUST_CFA_OFFSET 4 1246 CFI_ADJUST_CFA_OFFSET 4
1246 SAVE_ALL 1247 SAVE_ALL
1247 TRACE_IRQS_OFF
1248 FIXUP_ESPFIX_STACK # %eax == %esp 1248 FIXUP_ESPFIX_STACK # %eax == %esp
1249 xorl %edx,%edx # zero error code 1249 xorl %edx,%edx # zero error code
1250 call do_nmi 1250 call do_nmi
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index e28c7a987793..82801fd2e931 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -52,6 +52,7 @@
52#include <asm/irqflags.h> 52#include <asm/irqflags.h>
53#include <asm/paravirt.h> 53#include <asm/paravirt.h>
54#include <asm/ftrace.h> 54#include <asm/ftrace.h>
55#include <asm/percpu.h>
55 56
56/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */ 57/* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this. */
57#include <linux/elf-em.h> 58#include <linux/elf-em.h>
@@ -209,7 +210,7 @@ ENTRY(native_usergs_sysret64)
209 210
210 /* %rsp:at FRAMEEND */ 211 /* %rsp:at FRAMEEND */
211 .macro FIXUP_TOP_OF_STACK tmp offset=0 212 .macro FIXUP_TOP_OF_STACK tmp offset=0
212 movq %gs:pda_oldrsp,\tmp 213 movq PER_CPU_VAR(old_rsp),\tmp
213 movq \tmp,RSP+\offset(%rsp) 214 movq \tmp,RSP+\offset(%rsp)
214 movq $__USER_DS,SS+\offset(%rsp) 215 movq $__USER_DS,SS+\offset(%rsp)
215 movq $__USER_CS,CS+\offset(%rsp) 216 movq $__USER_CS,CS+\offset(%rsp)
@@ -220,7 +221,7 @@ ENTRY(native_usergs_sysret64)
220 221
221 .macro RESTORE_TOP_OF_STACK tmp offset=0 222 .macro RESTORE_TOP_OF_STACK tmp offset=0
222 movq RSP+\offset(%rsp),\tmp 223 movq RSP+\offset(%rsp),\tmp
223 movq \tmp,%gs:pda_oldrsp 224 movq \tmp,PER_CPU_VAR(old_rsp)
224 movq EFLAGS+\offset(%rsp),\tmp 225 movq EFLAGS+\offset(%rsp),\tmp
225 movq \tmp,R11+\offset(%rsp) 226 movq \tmp,R11+\offset(%rsp)
226 .endm 227 .endm
@@ -336,15 +337,15 @@ ENTRY(save_args)
336 je 1f 337 je 1f
337 SWAPGS 338 SWAPGS
338 /* 339 /*
339 * irqcount is used to check if a CPU is already on an interrupt stack 340 * irq_count is used to check if a CPU is already on an interrupt stack
340 * or not. While this is essentially redundant with preempt_count it is 341 * or not. While this is essentially redundant with preempt_count it is
341 * a little cheaper to use a separate counter in the PDA (short of 342 * a little cheaper to use a separate counter in the PDA (short of
342 * moving irq_enter into assembly, which would be too much work) 343 * moving irq_enter into assembly, which would be too much work)
343 */ 344 */
3441: incl %gs:pda_irqcount 3451: incl PER_CPU_VAR(irq_count)
345 jne 2f 346 jne 2f
346 popq_cfi %rax /* move return address... */ 347 popq_cfi %rax /* move return address... */
347 mov %gs:pda_irqstackptr,%rsp 348 mov PER_CPU_VAR(irq_stack_ptr),%rsp
348 EMPTY_FRAME 0 349 EMPTY_FRAME 0
349 pushq_cfi %rax /* ... to the new stack */ 350 pushq_cfi %rax /* ... to the new stack */
350 /* 351 /*
@@ -408,6 +409,8 @@ END(save_paranoid)
408ENTRY(ret_from_fork) 409ENTRY(ret_from_fork)
409 DEFAULT_FRAME 410 DEFAULT_FRAME
410 411
412 LOCK ; btr $TIF_FORK,TI_flags(%r8)
413
411 push kernel_eflags(%rip) 414 push kernel_eflags(%rip)
412 CFI_ADJUST_CFA_OFFSET 8 415 CFI_ADJUST_CFA_OFFSET 8
413 popf # reset kernel eflags 416 popf # reset kernel eflags
@@ -467,7 +470,7 @@ END(ret_from_fork)
467ENTRY(system_call) 470ENTRY(system_call)
468 CFI_STARTPROC simple 471 CFI_STARTPROC simple
469 CFI_SIGNAL_FRAME 472 CFI_SIGNAL_FRAME
470 CFI_DEF_CFA rsp,PDA_STACKOFFSET 473 CFI_DEF_CFA rsp,KERNEL_STACK_OFFSET
471 CFI_REGISTER rip,rcx 474 CFI_REGISTER rip,rcx
472 /*CFI_REGISTER rflags,r11*/ 475 /*CFI_REGISTER rflags,r11*/
473 SWAPGS_UNSAFE_STACK 476 SWAPGS_UNSAFE_STACK
@@ -478,8 +481,8 @@ ENTRY(system_call)
478 */ 481 */
479ENTRY(system_call_after_swapgs) 482ENTRY(system_call_after_swapgs)
480 483
481 movq %rsp,%gs:pda_oldrsp 484 movq %rsp,PER_CPU_VAR(old_rsp)
482 movq %gs:pda_kernelstack,%rsp 485 movq PER_CPU_VAR(kernel_stack),%rsp
483 /* 486 /*
484 * No need to follow this irqs off/on section - it's straight 487 * No need to follow this irqs off/on section - it's straight
485 * and short: 488 * and short:
@@ -522,7 +525,7 @@ sysret_check:
522 CFI_REGISTER rip,rcx 525 CFI_REGISTER rip,rcx
523 RESTORE_ARGS 0,-ARG_SKIP,1 526 RESTORE_ARGS 0,-ARG_SKIP,1
524 /*CFI_REGISTER rflags,r11*/ 527 /*CFI_REGISTER rflags,r11*/
525 movq %gs:pda_oldrsp, %rsp 528 movq PER_CPU_VAR(old_rsp), %rsp
526 USERGS_SYSRET64 529 USERGS_SYSRET64
527 530
528 CFI_RESTORE_STATE 531 CFI_RESTORE_STATE
@@ -832,11 +835,11 @@ common_interrupt:
832 XCPT_FRAME 835 XCPT_FRAME
833 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */ 836 addq $-0x80,(%rsp) /* Adjust vector to [-256,-1] range */
834 interrupt do_IRQ 837 interrupt do_IRQ
835 /* 0(%rsp): oldrsp-ARGOFFSET */ 838 /* 0(%rsp): old_rsp-ARGOFFSET */
836ret_from_intr: 839ret_from_intr:
837 DISABLE_INTERRUPTS(CLBR_NONE) 840 DISABLE_INTERRUPTS(CLBR_NONE)
838 TRACE_IRQS_OFF 841 TRACE_IRQS_OFF
839 decl %gs:pda_irqcount 842 decl PER_CPU_VAR(irq_count)
840 leaveq 843 leaveq
841 CFI_DEF_CFA_REGISTER rsp 844 CFI_DEF_CFA_REGISTER rsp
842 CFI_ADJUST_CFA_OFFSET -8 845 CFI_ADJUST_CFA_OFFSET -8
@@ -981,8 +984,10 @@ apicinterrupt IRQ_MOVE_CLEANUP_VECTOR \
981 irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt 984 irq_move_cleanup_interrupt smp_irq_move_cleanup_interrupt
982#endif 985#endif
983 986
987#ifdef CONFIG_X86_UV
984apicinterrupt UV_BAU_MESSAGE \ 988apicinterrupt UV_BAU_MESSAGE \
985 uv_bau_message_intr1 uv_bau_message_interrupt 989 uv_bau_message_intr1 uv_bau_message_interrupt
990#endif
986apicinterrupt LOCAL_TIMER_VECTOR \ 991apicinterrupt LOCAL_TIMER_VECTOR \
987 apic_timer_interrupt smp_apic_timer_interrupt 992 apic_timer_interrupt smp_apic_timer_interrupt
988 993
@@ -1072,10 +1077,10 @@ ENTRY(\sym)
1072 TRACE_IRQS_OFF 1077 TRACE_IRQS_OFF
1073 movq %rsp,%rdi /* pt_regs pointer */ 1078 movq %rsp,%rdi /* pt_regs pointer */
1074 xorl %esi,%esi /* no error code */ 1079 xorl %esi,%esi /* no error code */
1075 movq %gs:pda_data_offset, %rbp 1080 PER_CPU(init_tss, %rbp)
1076 subq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) 1081 subq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
1077 call \do_sym 1082 call \do_sym
1078 addq $EXCEPTION_STKSZ, per_cpu__init_tss + TSS_ist + (\ist - 1) * 8(%rbp) 1083 addq $EXCEPTION_STKSZ, TSS_ist + (\ist - 1) * 8(%rbp)
1079 jmp paranoid_exit /* %ebx: no swapgs flag */ 1084 jmp paranoid_exit /* %ebx: no swapgs flag */
1080 CFI_ENDPROC 1085 CFI_ENDPROC
1081END(\sym) 1086END(\sym)
@@ -1259,14 +1264,14 @@ ENTRY(call_softirq)
1259 CFI_REL_OFFSET rbp,0 1264 CFI_REL_OFFSET rbp,0
1260 mov %rsp,%rbp 1265 mov %rsp,%rbp
1261 CFI_DEF_CFA_REGISTER rbp 1266 CFI_DEF_CFA_REGISTER rbp
1262 incl %gs:pda_irqcount 1267 incl PER_CPU_VAR(irq_count)
1263 cmove %gs:pda_irqstackptr,%rsp 1268 cmove PER_CPU_VAR(irq_stack_ptr),%rsp
1264 push %rbp # backlink for old unwinder 1269 push %rbp # backlink for old unwinder
1265 call __do_softirq 1270 call __do_softirq
1266 leaveq 1271 leaveq
1267 CFI_DEF_CFA_REGISTER rsp 1272 CFI_DEF_CFA_REGISTER rsp
1268 CFI_ADJUST_CFA_OFFSET -8 1273 CFI_ADJUST_CFA_OFFSET -8
1269 decl %gs:pda_irqcount 1274 decl PER_CPU_VAR(irq_count)
1270 ret 1275 ret
1271 CFI_ENDPROC 1276 CFI_ENDPROC
1272END(call_softirq) 1277END(call_softirq)
@@ -1296,15 +1301,15 @@ ENTRY(xen_do_hypervisor_callback) # do_hypervisor_callback(struct *pt_regs)
1296 movq %rdi, %rsp # we don't return, adjust the stack frame 1301 movq %rdi, %rsp # we don't return, adjust the stack frame
1297 CFI_ENDPROC 1302 CFI_ENDPROC
1298 DEFAULT_FRAME 1303 DEFAULT_FRAME
129911: incl %gs:pda_irqcount 130411: incl PER_CPU_VAR(irq_count)
1300 movq %rsp,%rbp 1305 movq %rsp,%rbp
1301 CFI_DEF_CFA_REGISTER rbp 1306 CFI_DEF_CFA_REGISTER rbp
1302 cmovzq %gs:pda_irqstackptr,%rsp 1307 cmovzq PER_CPU_VAR(irq_stack_ptr),%rsp
1303 pushq %rbp # backlink for old unwinder 1308 pushq %rbp # backlink for old unwinder
1304 call xen_evtchn_do_upcall 1309 call xen_evtchn_do_upcall
1305 popq %rsp 1310 popq %rsp
1306 CFI_DEF_CFA_REGISTER rsp 1311 CFI_DEF_CFA_REGISTER rsp
1307 decl %gs:pda_irqcount 1312 decl PER_CPU_VAR(irq_count)
1308 jmp error_exit 1313 jmp error_exit
1309 CFI_ENDPROC 1314 CFI_ENDPROC
1310END(do_hypervisor_callback) 1315END(do_hypervisor_callback)
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index 2bced78b0b8e..e656c2721154 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -32,7 +32,9 @@ extern struct genapic apic_x2apic_cluster;
32struct genapic __read_mostly *genapic = &apic_flat; 32struct genapic __read_mostly *genapic = &apic_flat;
33 33
34static struct genapic *apic_probe[] __initdata = { 34static struct genapic *apic_probe[] __initdata = {
35#ifdef CONFIG_X86_UV
35 &apic_x2apic_uv_x, 36 &apic_x2apic_uv_x,
37#endif
36 &apic_x2apic_phys, 38 &apic_x2apic_phys,
37 &apic_x2apic_cluster, 39 &apic_x2apic_cluster,
38 &apic_physflat, 40 &apic_physflat,
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index b193e082f6ce..bfe36249145c 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -25,6 +25,7 @@
25#include <asm/ipi.h> 25#include <asm/ipi.h>
26#include <asm/genapic.h> 26#include <asm/genapic.h>
27#include <asm/pgtable.h> 27#include <asm/pgtable.h>
28#include <asm/uv/uv.h>
28#include <asm/uv/uv_mmrs.h> 29#include <asm/uv/uv_mmrs.h>
29#include <asm/uv/uv_hub.h> 30#include <asm/uv/uv_hub.h>
30#include <asm/uv/bios.h> 31#include <asm/uv/bios.h>
diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c
index b9a4d8c4b935..f5b272247690 100644
--- a/arch/x86/kernel/head64.c
+++ b/arch/x86/kernel/head64.c
@@ -26,27 +26,6 @@
26#include <asm/bios_ebda.h> 26#include <asm/bios_ebda.h>
27#include <asm/trampoline.h> 27#include <asm/trampoline.h>
28 28
29/* boot cpu pda */
30static struct x8664_pda _boot_cpu_pda;
31
32#ifdef CONFIG_SMP
33/*
34 * We install an empty cpu_pda pointer table to indicate to early users
35 * (numa_set_node) that the cpu_pda pointer table for cpus other than
36 * the boot cpu is not yet setup.
37 */
38static struct x8664_pda *__cpu_pda[NR_CPUS] __initdata;
39#else
40static struct x8664_pda *__cpu_pda[NR_CPUS] __read_mostly;
41#endif
42
43void __init x86_64_init_pda(void)
44{
45 _cpu_pda = __cpu_pda;
46 cpu_pda(0) = &_boot_cpu_pda;
47 pda_init(0);
48}
49
50static void __init zap_identity_mappings(void) 29static void __init zap_identity_mappings(void)
51{ 30{
52 pgd_t *pgd = pgd_offset_k(0UL); 31 pgd_t *pgd = pgd_offset_k(0UL);
@@ -112,8 +91,6 @@ void __init x86_64_start_kernel(char * real_mode_data)
112 if (console_loglevel == 10) 91 if (console_loglevel == 10)
113 early_printk("Kernel alive\n"); 92 early_printk("Kernel alive\n");
114 93
115 x86_64_init_pda();
116
117 x86_64_start_reservations(real_mode_data); 94 x86_64_start_reservations(real_mode_data);
118} 95}
119 96
diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S
index e835b4eea70b..722464c520cf 100644
--- a/arch/x86/kernel/head_32.S
+++ b/arch/x86/kernel/head_32.S
@@ -429,12 +429,14 @@ is386: movl $2,%ecx # set MP
429 ljmp $(__KERNEL_CS),$1f 429 ljmp $(__KERNEL_CS),$1f
4301: movl $(__KERNEL_DS),%eax # reload all the segment registers 4301: movl $(__KERNEL_DS),%eax # reload all the segment registers
431 movl %eax,%ss # after changing gdt. 431 movl %eax,%ss # after changing gdt.
432 movl %eax,%fs # gets reset once there's real percpu
433 432
434 movl $(__USER_DS),%eax # DS/ES contains default USER segment 433 movl $(__USER_DS),%eax # DS/ES contains default USER segment
435 movl %eax,%ds 434 movl %eax,%ds
436 movl %eax,%es 435 movl %eax,%es
437 436
437 movl $(__KERNEL_PERCPU), %eax
438 movl %eax,%fs # set this cpu's percpu
439
438 xorl %eax,%eax # Clear GS and LDT 440 xorl %eax,%eax # Clear GS and LDT
439 movl %eax,%gs 441 movl %eax,%gs
440 lldt %ax 442 lldt %ax
@@ -446,8 +448,6 @@ is386: movl $2,%ecx # set MP
446 movb $1, ready 448 movb $1, ready
447 cmpb $0,%cl # the first CPU calls start_kernel 449 cmpb $0,%cl # the first CPU calls start_kernel
448 je 1f 450 je 1f
449 movl $(__KERNEL_PERCPU), %eax
450 movl %eax,%fs # set this cpu's percpu
451 movl (stack_start), %esp 451 movl (stack_start), %esp
4521: 4521:
453#endif /* CONFIG_SMP */ 453#endif /* CONFIG_SMP */
@@ -548,12 +548,8 @@ early_fault:
548 pushl %eax 548 pushl %eax
549 pushl %edx /* trapno */ 549 pushl %edx /* trapno */
550 pushl $fault_msg 550 pushl $fault_msg
551#ifdef CONFIG_EARLY_PRINTK
552 call early_printk
553#else
554 call printk 551 call printk
555#endif 552#endif
556#endif
557 call dump_stack 553 call dump_stack
558hlt_loop: 554hlt_loop:
559 hlt 555 hlt
@@ -580,11 +576,10 @@ ignore_int:
580 pushl 32(%esp) 576 pushl 32(%esp)
581 pushl 40(%esp) 577 pushl 40(%esp)
582 pushl $int_msg 578 pushl $int_msg
583#ifdef CONFIG_EARLY_PRINTK
584 call early_printk
585#else
586 call printk 579 call printk
587#endif 580
581 call dump_stack
582
588 addl $(5*4),%esp 583 addl $(5*4),%esp
589 popl %ds 584 popl %ds
590 popl %es 585 popl %es
@@ -660,7 +655,7 @@ early_recursion_flag:
660 .long 0 655 .long 0
661 656
662int_msg: 657int_msg:
663 .asciz "Unknown interrupt or fault at EIP %p %p %p\n" 658 .asciz "Unknown interrupt or fault at: %p %p %p\n"
664 659
665fault_msg: 660fault_msg:
666/* fault info: */ 661/* fault info: */
diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S
index 0e275d495563..a0a2b5ca9b7d 100644
--- a/arch/x86/kernel/head_64.S
+++ b/arch/x86/kernel/head_64.S
@@ -19,6 +19,7 @@
19#include <asm/msr.h> 19#include <asm/msr.h>
20#include <asm/cache.h> 20#include <asm/cache.h>
21#include <asm/processor-flags.h> 21#include <asm/processor-flags.h>
22#include <asm/percpu.h>
22 23
23#ifdef CONFIG_PARAVIRT 24#ifdef CONFIG_PARAVIRT
24#include <asm/asm-offsets.h> 25#include <asm/asm-offsets.h>
@@ -204,6 +205,19 @@ ENTRY(secondary_startup_64)
204 pushq $0 205 pushq $0
205 popfq 206 popfq
206 207
208#ifdef CONFIG_SMP
209 /*
210 * Fix up static pointers that need __per_cpu_load added. The assembler
211 * is unable to do this directly. This is only needed for the boot cpu.
212 * These values are set up with the correct base addresses by C code for
213 * secondary cpus.
214 */
215 movq initial_gs(%rip), %rax
216 cmpl $0, per_cpu__cpu_number(%rax)
217 jne 1f
218 addq %rax, early_gdt_descr_base(%rip)
2191:
220#endif
207 /* 221 /*
208 * We must switch to a new descriptor in kernel space for the GDT 222 * We must switch to a new descriptor in kernel space for the GDT
209 * because soon the kernel won't have access anymore to the userspace 223 * because soon the kernel won't have access anymore to the userspace
@@ -226,12 +240,15 @@ ENTRY(secondary_startup_64)
226 movl %eax,%fs 240 movl %eax,%fs
227 movl %eax,%gs 241 movl %eax,%gs
228 242
229 /* 243 /* Set up %gs.
230 * Setup up a dummy PDA. this is just for some early bootup code 244 *
231 * that does in_interrupt() 245 * The base of %gs always points to the bottom of the irqstack
232 */ 246 * union. If the stack protector canary is enabled, it is
247 * located at %gs:40. Note that, on SMP, the boot cpu uses
248 * init data section till per cpu areas are set up.
249 */
233 movl $MSR_GS_BASE,%ecx 250 movl $MSR_GS_BASE,%ecx
234 movq $empty_zero_page,%rax 251 movq initial_gs(%rip),%rax
235 movq %rax,%rdx 252 movq %rax,%rdx
236 shrq $32,%rdx 253 shrq $32,%rdx
237 wrmsr 254 wrmsr
@@ -257,6 +274,12 @@ ENTRY(secondary_startup_64)
257 .align 8 274 .align 8
258 ENTRY(initial_code) 275 ENTRY(initial_code)
259 .quad x86_64_start_kernel 276 .quad x86_64_start_kernel
277 ENTRY(initial_gs)
278#ifdef CONFIG_SMP
279 .quad __per_cpu_load
280#else
281 .quad PER_CPU_VAR(irq_stack_union)
282#endif
260 __FINITDATA 283 __FINITDATA
261 284
262 ENTRY(stack_start) 285 ENTRY(stack_start)
@@ -401,7 +424,8 @@ NEXT_PAGE(level2_spare_pgt)
401 .globl early_gdt_descr 424 .globl early_gdt_descr
402early_gdt_descr: 425early_gdt_descr:
403 .word GDT_ENTRIES*8-1 426 .word GDT_ENTRIES*8-1
404 .quad per_cpu__gdt_page 427early_gdt_descr_base:
428 .quad per_cpu__gdt_page
405 429
406ENTRY(phys_base) 430ENTRY(phys_base)
407 /* This must match the first entry in level2_kernel_pgt */ 431 /* This must match the first entry in level2_kernel_pgt */
diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c
index cd759ad90690..64d5ad0b8add 100644
--- a/arch/x86/kernel/hpet.c
+++ b/arch/x86/kernel/hpet.c
@@ -628,11 +628,12 @@ static int hpet_cpuhp_notify(struct notifier_block *n,
628 628
629 switch (action & 0xf) { 629 switch (action & 0xf) {
630 case CPU_ONLINE: 630 case CPU_ONLINE:
631 INIT_DELAYED_WORK(&work.work, hpet_work); 631 INIT_DELAYED_WORK_ON_STACK(&work.work, hpet_work);
632 init_completion(&work.complete); 632 init_completion(&work.complete);
633 /* FIXME: add schedule_work_on() */ 633 /* FIXME: add schedule_work_on() */
634 schedule_delayed_work_on(cpu, &work.work, 0); 634 schedule_delayed_work_on(cpu, &work.work, 0);
635 wait_for_completion(&work.complete); 635 wait_for_completion(&work.complete);
636 destroy_timer_on_stack(&work.work.timer);
636 break; 637 break;
637 case CPU_DEAD: 638 case CPU_DEAD:
638 if (hdev) { 639 if (hdev) {
diff --git a/arch/x86/kernel/io_apic.c b/arch/x86/kernel/io_apic.c
index 157aafa45583..bfb7d734062a 100644
--- a/arch/x86/kernel/io_apic.c
+++ b/arch/x86/kernel/io_apic.c
@@ -357,7 +357,7 @@ set_extra_move_desc(struct irq_desc *desc, const struct cpumask *mask)
357 357
358 if (!cfg->move_in_progress) { 358 if (!cfg->move_in_progress) {
359 /* it means that domain is not changed */ 359 /* it means that domain is not changed */
360 if (!cpumask_intersects(&desc->affinity, mask)) 360 if (!cpumask_intersects(desc->affinity, mask))
361 cfg->move_desc_pending = 1; 361 cfg->move_desc_pending = 1;
362 } 362 }
363} 363}
@@ -580,9 +580,9 @@ set_desc_affinity(struct irq_desc *desc, const struct cpumask *mask)
580 if (assign_irq_vector(irq, cfg, mask)) 580 if (assign_irq_vector(irq, cfg, mask))
581 return BAD_APICID; 581 return BAD_APICID;
582 582
583 cpumask_and(&desc->affinity, cfg->domain, mask); 583 cpumask_and(desc->affinity, cfg->domain, mask);
584 set_extra_move_desc(desc, mask); 584 set_extra_move_desc(desc, mask);
585 return cpu_mask_to_apicid_and(&desc->affinity, cpu_online_mask); 585 return cpu_mask_to_apicid_and(desc->affinity, cpu_online_mask);
586} 586}
587 587
588static void 588static void
@@ -2382,7 +2382,7 @@ migrate_ioapic_irq_desc(struct irq_desc *desc, const struct cpumask *mask)
2382 if (cfg->move_in_progress) 2382 if (cfg->move_in_progress)
2383 send_cleanup_vector(cfg); 2383 send_cleanup_vector(cfg);
2384 2384
2385 cpumask_copy(&desc->affinity, mask); 2385 cpumask_copy(desc->affinity, mask);
2386} 2386}
2387 2387
2388static int migrate_irq_remapped_level_desc(struct irq_desc *desc) 2388static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
@@ -2404,11 +2404,11 @@ static int migrate_irq_remapped_level_desc(struct irq_desc *desc)
2404 } 2404 }
2405 2405
2406 /* everthing is clear. we have right of way */ 2406 /* everthing is clear. we have right of way */
2407 migrate_ioapic_irq_desc(desc, &desc->pending_mask); 2407 migrate_ioapic_irq_desc(desc, desc->pending_mask);
2408 2408
2409 ret = 0; 2409 ret = 0;
2410 desc->status &= ~IRQ_MOVE_PENDING; 2410 desc->status &= ~IRQ_MOVE_PENDING;
2411 cpumask_clear(&desc->pending_mask); 2411 cpumask_clear(desc->pending_mask);
2412 2412
2413unmask: 2413unmask:
2414 unmask_IO_APIC_irq_desc(desc); 2414 unmask_IO_APIC_irq_desc(desc);
@@ -2433,7 +2433,7 @@ static void ir_irq_migration(struct work_struct *work)
2433 continue; 2433 continue;
2434 } 2434 }
2435 2435
2436 desc->chip->set_affinity(irq, &desc->pending_mask); 2436 desc->chip->set_affinity(irq, desc->pending_mask);
2437 spin_unlock_irqrestore(&desc->lock, flags); 2437 spin_unlock_irqrestore(&desc->lock, flags);
2438 } 2438 }
2439 } 2439 }
@@ -2447,7 +2447,7 @@ static void set_ir_ioapic_affinity_irq_desc(struct irq_desc *desc,
2447{ 2447{
2448 if (desc->status & IRQ_LEVEL) { 2448 if (desc->status & IRQ_LEVEL) {
2449 desc->status |= IRQ_MOVE_PENDING; 2449 desc->status |= IRQ_MOVE_PENDING;
2450 cpumask_copy(&desc->pending_mask, mask); 2450 cpumask_copy(desc->pending_mask, mask);
2451 migrate_irq_remapped_level_desc(desc); 2451 migrate_irq_remapped_level_desc(desc);
2452 return; 2452 return;
2453 } 2453 }
@@ -2515,7 +2515,7 @@ static void irq_complete_move(struct irq_desc **descp)
2515 2515
2516 /* domain has not changed, but affinity did */ 2516 /* domain has not changed, but affinity did */
2517 me = smp_processor_id(); 2517 me = smp_processor_id();
2518 if (cpu_isset(me, desc->affinity)) { 2518 if (cpumask_test_cpu(me, desc->affinity)) {
2519 *descp = desc = move_irq_desc(desc, me); 2519 *descp = desc = move_irq_desc(desc, me);
2520 /* get the new one */ 2520 /* get the new one */
2521 cfg = desc->chip_data; 2521 cfg = desc->chip_data;
@@ -3182,7 +3182,7 @@ unsigned int create_irq_nr(unsigned int irq_want)
3182 3182
3183 irq = 0; 3183 irq = 0;
3184 spin_lock_irqsave(&vector_lock, flags); 3184 spin_lock_irqsave(&vector_lock, flags);
3185 for (new = irq_want; new < NR_IRQS; new++) { 3185 for (new = irq_want; new < nr_irqs; new++) {
3186 if (platform_legacy_irq(new)) 3186 if (platform_legacy_irq(new))
3187 continue; 3187 continue;
3188 3188
@@ -3257,6 +3257,9 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
3257 int err; 3257 int err;
3258 unsigned dest; 3258 unsigned dest;
3259 3259
3260 if (disable_apic)
3261 return -ENXIO;
3262
3260 cfg = irq_cfg(irq); 3263 cfg = irq_cfg(irq);
3261 err = assign_irq_vector(irq, cfg, TARGET_CPUS); 3264 err = assign_irq_vector(irq, cfg, TARGET_CPUS);
3262 if (err) 3265 if (err)
@@ -3691,6 +3694,9 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3691 struct irq_cfg *cfg; 3694 struct irq_cfg *cfg;
3692 int err; 3695 int err;
3693 3696
3697 if (disable_apic)
3698 return -ENXIO;
3699
3694 cfg = irq_cfg(irq); 3700 cfg = irq_cfg(irq);
3695 err = assign_irq_vector(irq, cfg, TARGET_CPUS); 3701 err = assign_irq_vector(irq, cfg, TARGET_CPUS);
3696 if (!err) { 3702 if (!err) {
@@ -3725,7 +3731,7 @@ int arch_setup_ht_irq(unsigned int irq, struct pci_dev *dev)
3725} 3731}
3726#endif /* CONFIG_HT_IRQ */ 3732#endif /* CONFIG_HT_IRQ */
3727 3733
3728#ifdef CONFIG_X86_64 3734#ifdef CONFIG_X86_UV
3729/* 3735/*
3730 * Re-target the irq to the specified CPU and enable the specified MMR located 3736 * Re-target the irq to the specified CPU and enable the specified MMR located
3731 * on the specified blade to allow the sending of MSIs to the specified CPU. 3737 * on the specified blade to allow the sending of MSIs to the specified CPU.
@@ -3815,6 +3821,22 @@ void __init probe_nr_irqs_gsi(void)
3815 nr_irqs_gsi = nr; 3821 nr_irqs_gsi = nr;
3816} 3822}
3817 3823
3824#ifdef CONFIG_SPARSE_IRQ
3825int __init arch_probe_nr_irqs(void)
3826{
3827 int nr;
3828
3829 nr = ((8 * nr_cpu_ids) > (32 * nr_ioapics) ?
3830 (NR_VECTORS + (8 * nr_cpu_ids)) :
3831 (NR_VECTORS + (32 * nr_ioapics)));
3832
3833 if (nr < nr_irqs && nr > nr_irqs_gsi)
3834 nr_irqs = nr;
3835
3836 return 0;
3837}
3838#endif
3839
3818/* -------------------------------------------------------------------------- 3840/* --------------------------------------------------------------------------
3819 ACPI-based IOAPIC Configuration 3841 ACPI-based IOAPIC Configuration
3820 -------------------------------------------------------------------------- */ 3842 -------------------------------------------------------------------------- */
@@ -4004,7 +4026,7 @@ void __init setup_ioapic_dest(void)
4004 */ 4026 */
4005 if (desc->status & 4027 if (desc->status &
4006 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET)) 4028 (IRQ_NO_BALANCING | IRQ_AFFINITY_SET))
4007 mask = &desc->affinity; 4029 mask = desc->affinity;
4008 else 4030 else
4009 mask = TARGET_CPUS; 4031 mask = TARGET_CPUS;
4010 4032
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index 3973e2df7f87..8b30d0c2512c 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -36,11 +36,7 @@ void ack_bad_irq(unsigned int irq)
36#endif 36#endif
37} 37}
38 38
39#ifdef CONFIG_X86_32 39#define irq_stats(x) (&per_cpu(irq_stat, x))
40# define irq_stats(x) (&per_cpu(irq_stat, x))
41#else
42# define irq_stats(x) cpu_pda(x)
43#endif
44/* 40/*
45 * /proc/interrupts printing: 41 * /proc/interrupts printing:
46 */ 42 */
diff --git a/arch/x86/kernel/irq_32.c b/arch/x86/kernel/irq_32.c
index 74b9ff7341e9..e0f29be8ab0b 100644
--- a/arch/x86/kernel/irq_32.c
+++ b/arch/x86/kernel/irq_32.c
@@ -248,7 +248,7 @@ void fixup_irqs(void)
248 if (irq == 2) 248 if (irq == 2)
249 continue; 249 continue;
250 250
251 affinity = &desc->affinity; 251 affinity = desc->affinity;
252 if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) { 252 if (cpumask_any_and(affinity, cpu_online_mask) >= nr_cpu_ids) {
253 printk("Breaking affinity for irq %i\n", irq); 253 printk("Breaking affinity for irq %i\n", irq);
254 affinity = cpu_all_mask; 254 affinity = cpu_all_mask;
diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c
index 63c88e6ec025..018963aa6ee3 100644
--- a/arch/x86/kernel/irq_64.c
+++ b/arch/x86/kernel/irq_64.c
@@ -18,6 +18,13 @@
18#include <linux/smp.h> 18#include <linux/smp.h>
19#include <asm/io_apic.h> 19#include <asm/io_apic.h>
20#include <asm/idle.h> 20#include <asm/idle.h>
21#include <asm/apic.h>
22
23DEFINE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
24EXPORT_PER_CPU_SYMBOL(irq_stat);
25
26DEFINE_PER_CPU(struct pt_regs *, irq_regs);
27EXPORT_PER_CPU_SYMBOL(irq_regs);
21 28
22/* 29/*
23 * Probabilistic stack overflow check: 30 * Probabilistic stack overflow check:
@@ -100,7 +107,7 @@ void fixup_irqs(void)
100 /* interrupt's are disabled at this point */ 107 /* interrupt's are disabled at this point */
101 spin_lock(&desc->lock); 108 spin_lock(&desc->lock);
102 109
103 affinity = &desc->affinity; 110 affinity = desc->affinity;
104 if (!irq_has_action(irq) || 111 if (!irq_has_action(irq) ||
105 cpumask_equal(affinity, cpu_online_mask)) { 112 cpumask_equal(affinity, cpu_online_mask)) {
106 spin_unlock(&desc->lock); 113 spin_unlock(&desc->lock);
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index 1507ad4e674d..bf629cadec1a 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -149,8 +149,15 @@ void __init native_init_IRQ(void)
149 */ 149 */
150 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt); 150 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
151 151
152 /* IPI for invalidation */ 152 /* IPIs for invalidation */
153 alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt); 153 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+0, invalidate_interrupt0);
154 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+1, invalidate_interrupt1);
155 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+2, invalidate_interrupt2);
156 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+3, invalidate_interrupt3);
157 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+4, invalidate_interrupt4);
158 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+5, invalidate_interrupt5);
159 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+6, invalidate_interrupt6);
160 alloc_intr_gate(INVALIDATE_TLB_VECTOR_START+7, invalidate_interrupt7);
154 161
155 /* IPI for generic function call */ 162 /* IPI for generic function call */
156 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt); 163 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c
index 884d985b8b82..e948b28a5a9a 100644
--- a/arch/x86/kernel/kprobes.c
+++ b/arch/x86/kernel/kprobes.c
@@ -446,7 +446,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
446static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, 446static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs,
447 struct kprobe_ctlblk *kcb) 447 struct kprobe_ctlblk *kcb)
448{ 448{
449#if !defined(CONFIG_PREEMPT) || defined(CONFIG_PM) 449#if !defined(CONFIG_PREEMPT) || defined(CONFIG_FREEZER)
450 if (p->ainsn.boostable == 1 && !p->post_handler) { 450 if (p->ainsn.boostable == 1 && !p->post_handler) {
451 /* Boost up -- we can execute copied instructions directly */ 451 /* Boost up -- we can execute copied instructions directly */
452 reset_current_kprobe(); 452 reset_current_kprobe();
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index ad36377dc935..fa6bb263892e 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -27,6 +27,7 @@
27#include <asm/e820.h> 27#include <asm/e820.h>
28#include <asm/trampoline.h> 28#include <asm/trampoline.h>
29#include <asm/setup.h> 29#include <asm/setup.h>
30#include <asm/smp.h>
30 31
31#include <mach_apic.h> 32#include <mach_apic.h>
32#ifdef CONFIG_X86_32 33#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c
index 7228979f1e7f..23b6d9e6e4f5 100644
--- a/arch/x86/kernel/nmi.c
+++ b/arch/x86/kernel/nmi.c
@@ -61,11 +61,7 @@ static int endflag __initdata;
61 61
62static inline unsigned int get_nmi_count(int cpu) 62static inline unsigned int get_nmi_count(int cpu)
63{ 63{
64#ifdef CONFIG_X86_64 64 return per_cpu(irq_stat, cpu).__nmi_count;
65 return cpu_pda(cpu)->__nmi_count;
66#else
67 return nmi_count(cpu);
68#endif
69} 65}
70 66
71static inline int mce_in_progress(void) 67static inline int mce_in_progress(void)
@@ -82,12 +78,8 @@ static inline int mce_in_progress(void)
82 */ 78 */
83static inline unsigned int get_timer_irqs(int cpu) 79static inline unsigned int get_timer_irqs(int cpu)
84{ 80{
85#ifdef CONFIG_X86_64
86 return read_pda(apic_timer_irqs) + read_pda(irq0_irqs);
87#else
88 return per_cpu(irq_stat, cpu).apic_timer_irqs + 81 return per_cpu(irq_stat, cpu).apic_timer_irqs +
89 per_cpu(irq_stat, cpu).irq0_irqs; 82 per_cpu(irq_stat, cpu).irq0_irqs;
90#endif
91} 83}
92 84
93#ifdef CONFIG_SMP 85#ifdef CONFIG_SMP
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e4c8fb608873..202514be5923 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -435,7 +435,6 @@ struct pv_mmu_ops pv_mmu_ops = {
435#endif /* PAGETABLE_LEVELS >= 3 */ 435#endif /* PAGETABLE_LEVELS >= 3 */
436 436
437 .pte_val = native_pte_val, 437 .pte_val = native_pte_val,
438 .pte_flags = native_pte_flags,
439 .pgd_val = native_pgd_val, 438 .pgd_val = native_pgd_val,
440 439
441 .make_pte = native_make_pte, 440 .make_pte = native_make_pte,
diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c
index a546f55c77b4..1a1ae8edc40c 100644
--- a/arch/x86/kernel/process_32.c
+++ b/arch/x86/kernel/process_32.c
@@ -66,9 +66,6 @@ asmlinkage void ret_from_fork(void) __asm__("ret_from_fork");
66DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; 66DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
67EXPORT_PER_CPU_SYMBOL(current_task); 67EXPORT_PER_CPU_SYMBOL(current_task);
68 68
69DEFINE_PER_CPU(int, cpu_number);
70EXPORT_PER_CPU_SYMBOL(cpu_number);
71
72/* 69/*
73 * Return saved PC of a blocked thread. 70 * Return saved PC of a blocked thread.
74 */ 71 */
@@ -111,7 +108,6 @@ void cpu_idle(void)
111 play_dead(); 108 play_dead();
112 109
113 local_irq_disable(); 110 local_irq_disable();
114 __get_cpu_var(irq_stat).idle_timestamp = jiffies;
115 /* Don't trace irqs off for idle */ 111 /* Don't trace irqs off for idle */
116 stop_critical_timings(); 112 stop_critical_timings();
117 pm_idle(); 113 pm_idle();
@@ -591,7 +587,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
591 if (prev->gs | next->gs) 587 if (prev->gs | next->gs)
592 loadsegment(gs, next->gs); 588 loadsegment(gs, next->gs);
593 589
594 x86_write_percpu(current_task, next_p); 590 percpu_write(current_task, next_p);
595 591
596 return prev_p; 592 return prev_p;
597} 593}
diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c
index 416fb9282f4f..c422eebb0c58 100644
--- a/arch/x86/kernel/process_64.c
+++ b/arch/x86/kernel/process_64.c
@@ -16,6 +16,7 @@
16 16
17#include <stdarg.h> 17#include <stdarg.h>
18 18
19#include <linux/stackprotector.h>
19#include <linux/cpu.h> 20#include <linux/cpu.h>
20#include <linux/errno.h> 21#include <linux/errno.h>
21#include <linux/sched.h> 22#include <linux/sched.h>
@@ -46,7 +47,6 @@
46#include <asm/processor.h> 47#include <asm/processor.h>
47#include <asm/i387.h> 48#include <asm/i387.h>
48#include <asm/mmu_context.h> 49#include <asm/mmu_context.h>
49#include <asm/pda.h>
50#include <asm/prctl.h> 50#include <asm/prctl.h>
51#include <asm/desc.h> 51#include <asm/desc.h>
52#include <asm/proto.h> 52#include <asm/proto.h>
@@ -57,6 +57,12 @@
57 57
58asmlinkage extern void ret_from_fork(void); 58asmlinkage extern void ret_from_fork(void);
59 59
60DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task;
61EXPORT_PER_CPU_SYMBOL(current_task);
62
63DEFINE_PER_CPU(unsigned long, old_rsp);
64static DEFINE_PER_CPU(unsigned char, is_idle);
65
60unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED; 66unsigned long kernel_thread_flags = CLONE_VM | CLONE_UNTRACED;
61 67
62static ATOMIC_NOTIFIER_HEAD(idle_notifier); 68static ATOMIC_NOTIFIER_HEAD(idle_notifier);
@@ -75,13 +81,13 @@ EXPORT_SYMBOL_GPL(idle_notifier_unregister);
75 81
76void enter_idle(void) 82void enter_idle(void)
77{ 83{
78 write_pda(isidle, 1); 84 percpu_write(is_idle, 1);
79 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL); 85 atomic_notifier_call_chain(&idle_notifier, IDLE_START, NULL);
80} 86}
81 87
82static void __exit_idle(void) 88static void __exit_idle(void)
83{ 89{
84 if (test_and_clear_bit_pda(0, isidle) == 0) 90 if (x86_test_and_clear_bit_percpu(0, is_idle) == 0)
85 return; 91 return;
86 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL); 92 atomic_notifier_call_chain(&idle_notifier, IDLE_END, NULL);
87} 93}
@@ -111,6 +117,17 @@ static inline void play_dead(void)
111void cpu_idle(void) 117void cpu_idle(void)
112{ 118{
113 current_thread_info()->status |= TS_POLLING; 119 current_thread_info()->status |= TS_POLLING;
120
121 /*
122 * If we're the non-boot CPU, nothing set the PDA stack
123 * canary up for us - and if we are the boot CPU we have
124 * a 0 stack canary. This is a good place for updating
125 * it, as we wont ever return from this function (so the
126 * invalid canaries already on the stack wont ever
127 * trigger):
128 */
129 boot_init_stack_canary();
130
114 /* endless idle loop with no priority at all */ 131 /* endless idle loop with no priority at all */
115 while (1) { 132 while (1) {
116 tick_nohz_stop_sched_tick(1); 133 tick_nohz_stop_sched_tick(1);
@@ -392,7 +409,7 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp)
392 load_gs_index(0); 409 load_gs_index(0);
393 regs->ip = new_ip; 410 regs->ip = new_ip;
394 regs->sp = new_sp; 411 regs->sp = new_sp;
395 write_pda(oldrsp, new_sp); 412 percpu_write(old_rsp, new_sp);
396 regs->cs = __USER_CS; 413 regs->cs = __USER_CS;
397 regs->ss = __USER_DS; 414 regs->ss = __USER_DS;
398 regs->flags = 0x200; 415 regs->flags = 0x200;
@@ -613,21 +630,13 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
613 /* 630 /*
614 * Switch the PDA and FPU contexts. 631 * Switch the PDA and FPU contexts.
615 */ 632 */
616 prev->usersp = read_pda(oldrsp); 633 prev->usersp = percpu_read(old_rsp);
617 write_pda(oldrsp, next->usersp); 634 percpu_write(old_rsp, next->usersp);
618 write_pda(pcurrent, next_p); 635 percpu_write(current_task, next_p);
619 636
620 write_pda(kernelstack, 637 percpu_write(kernel_stack,
621 (unsigned long)task_stack_page(next_p) + 638 (unsigned long)task_stack_page(next_p) +
622 THREAD_SIZE - PDA_STACKOFFSET); 639 THREAD_SIZE - KERNEL_STACK_OFFSET);
623#ifdef CONFIG_CC_STACKPROTECTOR
624 write_pda(stack_canary, next_p->stack_canary);
625 /*
626 * Build time only check to make sure the stack_canary is at
627 * offset 40 in the pda; this is a gcc ABI requirement
628 */
629 BUILD_BUG_ON(offsetof(struct x8664_pda, stack_canary) != 40);
630#endif
631 640
632 /* 641 /*
633 * Now maybe reload the debug registers and handle I/O bitmaps 642 * Now maybe reload the debug registers and handle I/O bitmaps
diff --git a/arch/x86/kernel/setup_percpu.c b/arch/x86/kernel/setup_percpu.c
index bf63de72b643..0d1e7ac439f4 100644
--- a/arch/x86/kernel/setup_percpu.c
+++ b/arch/x86/kernel/setup_percpu.c
@@ -13,146 +13,46 @@
13#include <asm/mpspec.h> 13#include <asm/mpspec.h>
14#include <asm/apicdef.h> 14#include <asm/apicdef.h>
15#include <asm/highmem.h> 15#include <asm/highmem.h>
16#include <asm/proto.h>
16#include <asm/cpumask.h> 17#include <asm/cpumask.h>
18#include <asm/cpu.h>
17 19
18#ifdef CONFIG_X86_LOCAL_APIC 20#ifdef CONFIG_DEBUG_PER_CPU_MAPS
19unsigned int num_processors; 21# define DBG(x...) printk(KERN_DEBUG x)
20unsigned disabled_cpus __cpuinitdata;
21/* Processor that is doing the boot up */
22unsigned int boot_cpu_physical_apicid = -1U;
23EXPORT_SYMBOL(boot_cpu_physical_apicid);
24unsigned int max_physical_apicid;
25
26/* Bitmask of physically existing CPUs */
27physid_mask_t phys_cpu_present_map;
28#endif
29
30/* map cpu index to physical APIC ID */
31DEFINE_EARLY_PER_CPU(u16, x86_cpu_to_apicid, BAD_APICID);
32DEFINE_EARLY_PER_CPU(u16, x86_bios_cpu_apicid, BAD_APICID);
33EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_apicid);
34EXPORT_EARLY_PER_CPU_SYMBOL(x86_bios_cpu_apicid);
35
36#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
37#define X86_64_NUMA 1
38
39/* map cpu index to node index */
40DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
41EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
42
43/* which logical CPUs are on which nodes */
44cpumask_t *node_to_cpumask_map;
45EXPORT_SYMBOL(node_to_cpumask_map);
46
47/* setup node_to_cpumask_map */
48static void __init setup_node_to_cpumask_map(void);
49
50#else 22#else
51static inline void setup_node_to_cpumask_map(void) { } 23# define DBG(x...)
52#endif 24#endif
53 25
54#if defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) && defined(CONFIG_X86_SMP) 26DEFINE_PER_CPU(int, cpu_number);
55/* 27EXPORT_PER_CPU_SYMBOL(cpu_number);
56 * Copy data used in early init routines from the initial arrays to the
57 * per cpu data areas. These arrays then become expendable and the
58 * *_early_ptr's are zeroed indicating that the static arrays are gone.
59 */
60static void __init setup_per_cpu_maps(void)
61{
62 int cpu;
63 28
64 for_each_possible_cpu(cpu) { 29#ifdef CONFIG_X86_64
65 per_cpu(x86_cpu_to_apicid, cpu) = 30#define BOOT_PERCPU_OFFSET ((unsigned long)__per_cpu_load)
66 early_per_cpu_map(x86_cpu_to_apicid, cpu); 31#else
67 per_cpu(x86_bios_cpu_apicid, cpu) = 32#define BOOT_PERCPU_OFFSET 0
68 early_per_cpu_map(x86_bios_cpu_apicid, cpu);
69#ifdef X86_64_NUMA
70 per_cpu(x86_cpu_to_node_map, cpu) =
71 early_per_cpu_map(x86_cpu_to_node_map, cpu);
72#endif 33#endif
73 }
74 34
75 /* indicate the early static arrays will soon be gone */ 35DEFINE_PER_CPU(unsigned long, this_cpu_off) = BOOT_PERCPU_OFFSET;
76 early_per_cpu_ptr(x86_cpu_to_apicid) = NULL; 36EXPORT_PER_CPU_SYMBOL(this_cpu_off);
77 early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
78#ifdef X86_64_NUMA
79 early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
80#endif
81}
82 37
83#ifdef CONFIG_X86_32 38unsigned long __per_cpu_offset[NR_CPUS] __read_mostly = {
84/* 39 [0 ... NR_CPUS-1] = BOOT_PERCPU_OFFSET,
85 * Great future not-so-futuristic plan: make i386 and x86_64 do it 40};
86 * the same way
87 */
88unsigned long __per_cpu_offset[NR_CPUS] __read_mostly;
89EXPORT_SYMBOL(__per_cpu_offset); 41EXPORT_SYMBOL(__per_cpu_offset);
90static inline void setup_cpu_pda_map(void) { }
91
92#elif !defined(CONFIG_SMP)
93static inline void setup_cpu_pda_map(void) { }
94
95#else /* CONFIG_SMP && CONFIG_X86_64 */
96
97/*
98 * Allocate cpu_pda pointer table and array via alloc_bootmem.
99 */
100static void __init setup_cpu_pda_map(void)
101{
102 char *pda;
103 struct x8664_pda **new_cpu_pda;
104 unsigned long size;
105 int cpu;
106
107 size = roundup(sizeof(struct x8664_pda), cache_line_size());
108
109 /* allocate cpu_pda array and pointer table */
110 {
111 unsigned long tsize = nr_cpu_ids * sizeof(void *);
112 unsigned long asize = size * (nr_cpu_ids - 1);
113 42
114 tsize = roundup(tsize, cache_line_size()); 43static inline void setup_percpu_segment(int cpu)
115 new_cpu_pda = alloc_bootmem(tsize + asize);
116 pda = (char *)new_cpu_pda + tsize;
117 }
118
119 /* initialize pointer table to static pda's */
120 for_each_possible_cpu(cpu) {
121 if (cpu == 0) {
122 /* leave boot cpu pda in place */
123 new_cpu_pda[0] = cpu_pda(0);
124 continue;
125 }
126 new_cpu_pda[cpu] = (struct x8664_pda *)pda;
127 new_cpu_pda[cpu]->in_bootmem = 1;
128 pda += size;
129 }
130
131 /* point to new pointer table */
132 _cpu_pda = new_cpu_pda;
133}
134
135#endif /* CONFIG_SMP && CONFIG_X86_64 */
136
137#ifdef CONFIG_X86_64
138
139/* correctly size the local cpu masks */
140static void setup_cpu_local_masks(void)
141{ 44{
142 alloc_bootmem_cpumask_var(&cpu_initialized_mask); 45#ifdef CONFIG_X86_32
143 alloc_bootmem_cpumask_var(&cpu_callin_mask); 46 struct desc_struct gdt;
144 alloc_bootmem_cpumask_var(&cpu_callout_mask);
145 alloc_bootmem_cpumask_var(&cpu_sibling_setup_mask);
146}
147
148#else /* CONFIG_X86_32 */
149 47
150static inline void setup_cpu_local_masks(void) 48 pack_descriptor(&gdt, per_cpu_offset(cpu), 0xFFFFF,
151{ 49 0x2 | DESCTYPE_S, 0x8);
50 gdt.s = 1;
51 write_gdt_entry(get_cpu_gdt_table(cpu),
52 GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
53#endif
152} 54}
153 55
154#endif /* CONFIG_X86_32 */
155
156/* 56/*
157 * Great future plan: 57 * Great future plan:
158 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data. 58 * Declare PDA itself and support (irqstack,tss,pgd) as per cpu data.
@@ -160,18 +60,12 @@ static inline void setup_cpu_local_masks(void)
160 */ 60 */
161void __init setup_per_cpu_areas(void) 61void __init setup_per_cpu_areas(void)
162{ 62{
163 ssize_t size, old_size; 63 ssize_t size;
164 char *ptr; 64 char *ptr;
165 int cpu; 65 int cpu;
166 unsigned long align = 1;
167
168 /* Setup cpu_pda map */
169 setup_cpu_pda_map();
170 66
171 /* Copy section for each CPU (we discard the original) */ 67 /* Copy section for each CPU (we discard the original) */
172 old_size = PERCPU_ENOUGH_ROOM; 68 size = roundup(PERCPU_ENOUGH_ROOM, PAGE_SIZE);
173 align = max_t(unsigned long, PAGE_SIZE, align);
174 size = roundup(old_size, align);
175 69
176 pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n", 70 pr_info("NR_CPUS:%d nr_cpumask_bits:%d nr_cpu_ids:%d nr_node_ids:%d\n",
177 NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids); 71 NR_CPUS, nr_cpumask_bits, nr_cpu_ids, nr_node_ids);
@@ -180,30 +74,67 @@ void __init setup_per_cpu_areas(void)
180 74
181 for_each_possible_cpu(cpu) { 75 for_each_possible_cpu(cpu) {
182#ifndef CONFIG_NEED_MULTIPLE_NODES 76#ifndef CONFIG_NEED_MULTIPLE_NODES
183 ptr = __alloc_bootmem(size, align, 77 ptr = alloc_bootmem_pages(size);
184 __pa(MAX_DMA_ADDRESS));
185#else 78#else
186 int node = early_cpu_to_node(cpu); 79 int node = early_cpu_to_node(cpu);
187 if (!node_online(node) || !NODE_DATA(node)) { 80 if (!node_online(node) || !NODE_DATA(node)) {
188 ptr = __alloc_bootmem(size, align, 81 ptr = alloc_bootmem_pages(size);
189 __pa(MAX_DMA_ADDRESS));
190 pr_info("cpu %d has no node %d or node-local memory\n", 82 pr_info("cpu %d has no node %d or node-local memory\n",
191 cpu, node); 83 cpu, node);
192 pr_debug("per cpu data for cpu%d at %016lx\n", 84 pr_debug("per cpu data for cpu%d at %016lx\n",
193 cpu, __pa(ptr)); 85 cpu, __pa(ptr));
194 } else { 86 } else {
195 ptr = __alloc_bootmem_node(NODE_DATA(node), size, align, 87 ptr = alloc_bootmem_pages_node(NODE_DATA(node), size);
196 __pa(MAX_DMA_ADDRESS));
197 pr_debug("per cpu data for cpu%d on node%d at %016lx\n", 88 pr_debug("per cpu data for cpu%d on node%d at %016lx\n",
198 cpu, node, __pa(ptr)); 89 cpu, node, __pa(ptr));
199 } 90 }
200#endif 91#endif
92
93 memcpy(ptr, __per_cpu_load, __per_cpu_end - __per_cpu_start);
201 per_cpu_offset(cpu) = ptr - __per_cpu_start; 94 per_cpu_offset(cpu) = ptr - __per_cpu_start;
202 memcpy(ptr, __per_cpu_start, __per_cpu_end - __per_cpu_start); 95 per_cpu(this_cpu_off, cpu) = per_cpu_offset(cpu);
96 per_cpu(cpu_number, cpu) = cpu;
97 setup_percpu_segment(cpu);
98 /*
99 * Copy data used in early init routines from the
100 * initial arrays to the per cpu data areas. These
101 * arrays then become expendable and the *_early_ptr's
102 * are zeroed indicating that the static arrays are
103 * gone.
104 */
105#ifdef CONFIG_X86_LOCAL_APIC
106 per_cpu(x86_cpu_to_apicid, cpu) =
107 early_per_cpu_map(x86_cpu_to_apicid, cpu);
108 per_cpu(x86_bios_cpu_apicid, cpu) =
109 early_per_cpu_map(x86_bios_cpu_apicid, cpu);
110#endif
111#ifdef CONFIG_X86_64
112 per_cpu(irq_stack_ptr, cpu) =
113 per_cpu(irq_stack_union.irq_stack, cpu) +
114 IRQ_STACK_SIZE - 64;
115#ifdef CONFIG_NUMA
116 per_cpu(x86_cpu_to_node_map, cpu) =
117 early_per_cpu_map(x86_cpu_to_node_map, cpu);
118#endif
119#endif
120 /*
121 * Up to this point, the boot CPU has been using .data.init
122 * area. Reload any changed state for the boot CPU.
123 */
124 if (cpu == boot_cpu_id)
125 switch_to_new_gdt();
126
127 DBG("PERCPU: cpu %4d %p\n", cpu, ptr);
203 } 128 }
204 129
205 /* Setup percpu data maps */ 130 /* indicate the early static arrays will soon be gone */
206 setup_per_cpu_maps(); 131#ifdef CONFIG_X86_LOCAL_APIC
132 early_per_cpu_ptr(x86_cpu_to_apicid) = NULL;
133 early_per_cpu_ptr(x86_bios_cpu_apicid) = NULL;
134#endif
135#if defined(CONFIG_X86_64) && defined(CONFIG_NUMA)
136 early_per_cpu_ptr(x86_cpu_to_node_map) = NULL;
137#endif
207 138
208 /* Setup node to cpumask map */ 139 /* Setup node to cpumask map */
209 setup_node_to_cpumask_map(); 140 setup_node_to_cpumask_map();
@@ -211,199 +142,3 @@ void __init setup_per_cpu_areas(void)
211 /* Setup cpu initialized, callin, callout masks */ 142 /* Setup cpu initialized, callin, callout masks */
212 setup_cpu_local_masks(); 143 setup_cpu_local_masks();
213} 144}
214
215#endif
216
217#ifdef X86_64_NUMA
218
219/*
220 * Allocate node_to_cpumask_map based on number of available nodes
221 * Requires node_possible_map to be valid.
222 *
223 * Note: node_to_cpumask() is not valid until after this is done.
224 */
225static void __init setup_node_to_cpumask_map(void)
226{
227 unsigned int node, num = 0;
228 cpumask_t *map;
229
230 /* setup nr_node_ids if not done yet */
231 if (nr_node_ids == MAX_NUMNODES) {
232 for_each_node_mask(node, node_possible_map)
233 num = node;
234 nr_node_ids = num + 1;
235 }
236
237 /* allocate the map */
238 map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
239
240 pr_debug("Node to cpumask map at %p for %d nodes\n",
241 map, nr_node_ids);
242
243 /* node_to_cpumask() will now work */
244 node_to_cpumask_map = map;
245}
246
247void __cpuinit numa_set_node(int cpu, int node)
248{
249 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
250
251 if (cpu_pda(cpu) && node != NUMA_NO_NODE)
252 cpu_pda(cpu)->nodenumber = node;
253
254 if (cpu_to_node_map)
255 cpu_to_node_map[cpu] = node;
256
257 else if (per_cpu_offset(cpu))
258 per_cpu(x86_cpu_to_node_map, cpu) = node;
259
260 else
261 pr_debug("Setting node for non-present cpu %d\n", cpu);
262}
263
264void __cpuinit numa_clear_node(int cpu)
265{
266 numa_set_node(cpu, NUMA_NO_NODE);
267}
268
269#ifndef CONFIG_DEBUG_PER_CPU_MAPS
270
271void __cpuinit numa_add_cpu(int cpu)
272{
273 cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
274}
275
276void __cpuinit numa_remove_cpu(int cpu)
277{
278 cpu_clear(cpu, node_to_cpumask_map[cpu_to_node(cpu)]);
279}
280
281#else /* CONFIG_DEBUG_PER_CPU_MAPS */
282
283/*
284 * --------- debug versions of the numa functions ---------
285 */
286static void __cpuinit numa_set_cpumask(int cpu, int enable)
287{
288 int node = cpu_to_node(cpu);
289 cpumask_t *mask;
290 char buf[64];
291
292 if (node_to_cpumask_map == NULL) {
293 printk(KERN_ERR "node_to_cpumask_map NULL\n");
294 dump_stack();
295 return;
296 }
297
298 mask = &node_to_cpumask_map[node];
299 if (enable)
300 cpu_set(cpu, *mask);
301 else
302 cpu_clear(cpu, *mask);
303
304 cpulist_scnprintf(buf, sizeof(buf), mask);
305 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
306 enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);
307}
308
309void __cpuinit numa_add_cpu(int cpu)
310{
311 numa_set_cpumask(cpu, 1);
312}
313
314void __cpuinit numa_remove_cpu(int cpu)
315{
316 numa_set_cpumask(cpu, 0);
317}
318
319int cpu_to_node(int cpu)
320{
321 if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
322 printk(KERN_WARNING
323 "cpu_to_node(%d): usage too early!\n", cpu);
324 dump_stack();
325 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
326 }
327 return per_cpu(x86_cpu_to_node_map, cpu);
328}
329EXPORT_SYMBOL(cpu_to_node);
330
331/*
332 * Same function as cpu_to_node() but used if called before the
333 * per_cpu areas are setup.
334 */
335int early_cpu_to_node(int cpu)
336{
337 if (early_per_cpu_ptr(x86_cpu_to_node_map))
338 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
339
340 if (!per_cpu_offset(cpu)) {
341 printk(KERN_WARNING
342 "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
343 dump_stack();
344 return NUMA_NO_NODE;
345 }
346 return per_cpu(x86_cpu_to_node_map, cpu);
347}
348
349
350/* empty cpumask */
351static const cpumask_t cpu_mask_none;
352
353/*
354 * Returns a pointer to the bitmask of CPUs on Node 'node'.
355 */
356const cpumask_t *cpumask_of_node(int node)
357{
358 if (node_to_cpumask_map == NULL) {
359 printk(KERN_WARNING
360 "cpumask_of_node(%d): no node_to_cpumask_map!\n",
361 node);
362 dump_stack();
363 return (const cpumask_t *)&cpu_online_map;
364 }
365 if (node >= nr_node_ids) {
366 printk(KERN_WARNING
367 "cpumask_of_node(%d): node > nr_node_ids(%d)\n",
368 node, nr_node_ids);
369 dump_stack();
370 return &cpu_mask_none;
371 }
372 return &node_to_cpumask_map[node];
373}
374EXPORT_SYMBOL(cpumask_of_node);
375
376/*
377 * Returns a bitmask of CPUs on Node 'node'.
378 *
379 * Side note: this function creates the returned cpumask on the stack
380 * so with a high NR_CPUS count, excessive stack space is used. The
381 * node_to_cpumask_ptr function should be used whenever possible.
382 */
383cpumask_t node_to_cpumask(int node)
384{
385 if (node_to_cpumask_map == NULL) {
386 printk(KERN_WARNING
387 "node_to_cpumask(%d): no node_to_cpumask_map!\n", node);
388 dump_stack();
389 return cpu_online_map;
390 }
391 if (node >= nr_node_ids) {
392 printk(KERN_WARNING
393 "node_to_cpumask(%d): node > nr_node_ids(%d)\n",
394 node, nr_node_ids);
395 dump_stack();
396 return cpu_mask_none;
397 }
398 return node_to_cpumask_map[node];
399}
400EXPORT_SYMBOL(node_to_cpumask);
401
402/*
403 * --------- end of debug versions of the numa functions ---------
404 */
405
406#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
407
408#endif /* X86_64_NUMA */
409
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index cf34eb37fbee..7fc78b019815 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -649,9 +649,16 @@ badframe:
649} 649}
650 650
651#ifdef CONFIG_X86_32 651#ifdef CONFIG_X86_32
652asmlinkage int sys_rt_sigreturn(struct pt_regs regs) 652/*
653 * Note: do not pass in pt_regs directly as with tail-call optimization
654 * GCC will incorrectly stomp on the caller's frame and corrupt user-space
655 * register state:
656 */
657asmlinkage int sys_rt_sigreturn(unsigned long __unused)
653{ 658{
654 return do_rt_sigreturn(&regs); 659 struct pt_regs *regs = (struct pt_regs *)&__unused;
660
661 return do_rt_sigreturn(regs);
655} 662}
656#else /* !CONFIG_X86_32 */ 663#else /* !CONFIG_X86_32 */
657asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) 664asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 6c2b8444b830..f9dbcff43546 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -62,6 +62,7 @@
62#include <asm/vmi.h> 62#include <asm/vmi.h>
63#include <asm/genapic.h> 63#include <asm/genapic.h>
64#include <asm/setup.h> 64#include <asm/setup.h>
65#include <asm/uv/uv.h>
65#include <linux/mc146818rtc.h> 66#include <linux/mc146818rtc.h>
66 67
67#include <mach_apic.h> 68#include <mach_apic.h>
@@ -744,52 +745,6 @@ static void __cpuinit do_fork_idle(struct work_struct *work)
744 complete(&c_idle->done); 745 complete(&c_idle->done);
745} 746}
746 747
747#ifdef CONFIG_X86_64
748
749/* __ref because it's safe to call free_bootmem when after_bootmem == 0. */
750static void __ref free_bootmem_pda(struct x8664_pda *oldpda)
751{
752 if (!after_bootmem)
753 free_bootmem((unsigned long)oldpda, sizeof(*oldpda));
754}
755
756/*
757 * Allocate node local memory for the AP pda.
758 *
759 * Must be called after the _cpu_pda pointer table is initialized.
760 */
761int __cpuinit get_local_pda(int cpu)
762{
763 struct x8664_pda *oldpda, *newpda;
764 unsigned long size = sizeof(struct x8664_pda);
765 int node = cpu_to_node(cpu);
766
767 if (cpu_pda(cpu) && !cpu_pda(cpu)->in_bootmem)
768 return 0;
769
770 oldpda = cpu_pda(cpu);
771 newpda = kmalloc_node(size, GFP_ATOMIC, node);
772 if (!newpda) {
773 printk(KERN_ERR "Could not allocate node local PDA "
774 "for CPU %d on node %d\n", cpu, node);
775
776 if (oldpda)
777 return 0; /* have a usable pda */
778 else
779 return -1;
780 }
781
782 if (oldpda) {
783 memcpy(newpda, oldpda, size);
784 free_bootmem_pda(oldpda);
785 }
786
787 newpda->in_bootmem = 0;
788 cpu_pda(cpu) = newpda;
789 return 0;
790}
791#endif /* CONFIG_X86_64 */
792
793static int __cpuinit do_boot_cpu(int apicid, int cpu) 748static int __cpuinit do_boot_cpu(int apicid, int cpu)
794/* 749/*
795 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad 750 * NOTE - on most systems this is a PHYSICAL apic ID, but on multiquad
@@ -807,16 +762,6 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
807 }; 762 };
808 INIT_WORK(&c_idle.work, do_fork_idle); 763 INIT_WORK(&c_idle.work, do_fork_idle);
809 764
810#ifdef CONFIG_X86_64
811 /* Allocate node local memory for AP pdas */
812 if (cpu > 0) {
813 boot_error = get_local_pda(cpu);
814 if (boot_error)
815 goto restore_state;
816 /* if can't get pda memory, can't start cpu */
817 }
818#endif
819
820 alternatives_smp_switch(1); 765 alternatives_smp_switch(1);
821 766
822 c_idle.idle = get_idle_for_cpu(cpu); 767 c_idle.idle = get_idle_for_cpu(cpu);
@@ -846,14 +791,16 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu)
846 791
847 set_idle_for_cpu(cpu, c_idle.idle); 792 set_idle_for_cpu(cpu, c_idle.idle);
848do_rest: 793do_rest:
849#ifdef CONFIG_X86_32
850 per_cpu(current_task, cpu) = c_idle.idle; 794 per_cpu(current_task, cpu) = c_idle.idle;
851 init_gdt(cpu); 795#ifdef CONFIG_X86_32
852 /* Stack for startup_32 can be just as for start_secondary onwards */ 796 /* Stack for startup_32 can be just as for start_secondary onwards */
853 irq_ctx_init(cpu); 797 irq_ctx_init(cpu);
854#else 798#else
855 cpu_pda(cpu)->pcurrent = c_idle.idle;
856 clear_tsk_thread_flag(c_idle.idle, TIF_FORK); 799 clear_tsk_thread_flag(c_idle.idle, TIF_FORK);
800 initial_gs = per_cpu_offset(cpu);
801 per_cpu(kernel_stack, cpu) =
802 (unsigned long)task_stack_page(c_idle.idle) -
803 KERNEL_STACK_OFFSET + THREAD_SIZE;
857#endif 804#endif
858 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); 805 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
859 initial_code = (unsigned long)start_secondary; 806 initial_code = (unsigned long)start_secondary;
@@ -930,9 +877,7 @@ do_rest:
930 inquire_remote_apic(apicid); 877 inquire_remote_apic(apicid);
931 } 878 }
932 } 879 }
933#ifdef CONFIG_X86_64 880
934restore_state:
935#endif
936 if (boot_error) { 881 if (boot_error) {
937 /* Try to put things back the way they were before ... */ 882 /* Try to put things back the way they were before ... */
938 numa_remove_cpu(cpu); /* was set by numa_add_cpu */ 883 numa_remove_cpu(cpu); /* was set by numa_add_cpu */
@@ -1124,6 +1069,7 @@ static int __init smp_sanity_check(unsigned max_cpus)
1124 printk(KERN_ERR "... forcing use of dummy APIC emulation." 1069 printk(KERN_ERR "... forcing use of dummy APIC emulation."
1125 "(tell your hw vendor)\n"); 1070 "(tell your hw vendor)\n");
1126 smpboot_clear_io_apic(); 1071 smpboot_clear_io_apic();
1072 disable_ioapic_setup();
1127 return -1; 1073 return -1;
1128 } 1074 }
1129 1075
@@ -1239,9 +1185,6 @@ out:
1239void __init native_smp_prepare_boot_cpu(void) 1185void __init native_smp_prepare_boot_cpu(void)
1240{ 1186{
1241 int me = smp_processor_id(); 1187 int me = smp_processor_id();
1242#ifdef CONFIG_X86_32
1243 init_gdt(me);
1244#endif
1245 switch_to_new_gdt(); 1188 switch_to_new_gdt();
1246 /* already set me in cpu_online_mask in boot_cpu_init() */ 1189 /* already set me in cpu_online_mask in boot_cpu_init() */
1247 cpumask_set_cpu(me, cpu_callout_mask); 1190 cpumask_set_cpu(me, cpu_callout_mask);
diff --git a/arch/x86/kernel/smpcommon.c b/arch/x86/kernel/smpcommon.c
deleted file mode 100644
index 397e309839dd..000000000000
--- a/arch/x86/kernel/smpcommon.c
+++ /dev/null
@@ -1,30 +0,0 @@
1/*
2 * SMP stuff which is common to all sub-architectures.
3 */
4#include <linux/module.h>
5#include <asm/smp.h>
6
7#ifdef CONFIG_X86_32
8DEFINE_PER_CPU(unsigned long, this_cpu_off);
9EXPORT_PER_CPU_SYMBOL(this_cpu_off);
10
11/*
12 * Initialize the CPU's GDT. This is either the boot CPU doing itself
13 * (still using the master per-cpu area), or a CPU doing it for a
14 * secondary which will soon come up.
15 */
16__cpuinit void init_gdt(int cpu)
17{
18 struct desc_struct gdt;
19
20 pack_descriptor(&gdt, __per_cpu_offset[cpu], 0xFFFFF,
21 0x2 | DESCTYPE_S, 0x8);
22 gdt.s = 1;
23
24 write_gdt_entry(get_cpu_gdt_table(cpu),
25 GDT_ENTRY_PERCPU, &gdt, DESCTYPE_S);
26
27 per_cpu(this_cpu_off, cpu) = __per_cpu_offset[cpu];
28 per_cpu(cpu_number, cpu) = cpu;
29}
30#endif
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
index d44395ff34c3..e2e86a08f31d 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
@@ -88,7 +88,7 @@ ENTRY(sys_call_table)
88 .long sys_uselib 88 .long sys_uselib
89 .long sys_swapon 89 .long sys_swapon
90 .long sys_reboot 90 .long sys_reboot
91 .long old_readdir 91 .long sys_old_readdir
92 .long old_mmap /* 90 */ 92 .long old_mmap /* 90 */
93 .long sys_munmap 93 .long sys_munmap
94 .long sys_truncate 94 .long sys_truncate
diff --git a/arch/x86/kernel/tlb_32.c b/arch/x86/kernel/tlb_32.c
deleted file mode 100644
index ce5054642247..000000000000
--- a/arch/x86/kernel/tlb_32.c
+++ /dev/null
@@ -1,256 +0,0 @@
1#include <linux/spinlock.h>
2#include <linux/cpu.h>
3#include <linux/interrupt.h>
4
5#include <asm/tlbflush.h>
6
7DEFINE_PER_CPU(struct tlb_state, cpu_tlbstate)
8 ____cacheline_aligned = { &init_mm, 0, };
9
10/* must come after the send_IPI functions above for inlining */
11#include <mach_ipi.h>
12
13/*
14 * Smarter SMP flushing macros.
15 * c/o Linus Torvalds.
16 *
17 * These mean you can really definitely utterly forget about
18 * writing to user space from interrupts. (Its not allowed anyway).
19 *
20 * Optimizations Manfred Spraul <manfred@colorfullife.com>
21 */
22
23static cpumask_t flush_cpumask;
24static struct mm_struct *flush_mm;
25static unsigned long flush_va;
26static DEFINE_SPINLOCK(tlbstate_lock);
27
28/*
29 * We cannot call mmdrop() because we are in interrupt context,
30 * instead update mm->cpu_vm_mask.
31 *
32 * We need to reload %cr3 since the page tables may be going
33 * away from under us..
34 */
35void leave_mm(int cpu)
36{
37 BUG_ON(x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK);
38 cpu_clear(cpu, x86_read_percpu(cpu_tlbstate.active_mm)->cpu_vm_mask);
39 load_cr3(swapper_pg_dir);
40}
41EXPORT_SYMBOL_GPL(leave_mm);
42
43/*
44 *
45 * The flush IPI assumes that a thread switch happens in this order:
46 * [cpu0: the cpu that switches]
47 * 1) switch_mm() either 1a) or 1b)
48 * 1a) thread switch to a different mm
49 * 1a1) cpu_clear(cpu, old_mm->cpu_vm_mask);
50 * Stop ipi delivery for the old mm. This is not synchronized with
51 * the other cpus, but smp_invalidate_interrupt ignore flush ipis
52 * for the wrong mm, and in the worst case we perform a superfluous
53 * tlb flush.
54 * 1a2) set cpu_tlbstate to TLBSTATE_OK
55 * Now the smp_invalidate_interrupt won't call leave_mm if cpu0
56 * was in lazy tlb mode.
57 * 1a3) update cpu_tlbstate[].active_mm
58 * Now cpu0 accepts tlb flushes for the new mm.
59 * 1a4) cpu_set(cpu, new_mm->cpu_vm_mask);
60 * Now the other cpus will send tlb flush ipis.
61 * 1a4) change cr3.
62 * 1b) thread switch without mm change
63 * cpu_tlbstate[].active_mm is correct, cpu0 already handles
64 * flush ipis.
65 * 1b1) set cpu_tlbstate to TLBSTATE_OK
66 * 1b2) test_and_set the cpu bit in cpu_vm_mask.
67 * Atomically set the bit [other cpus will start sending flush ipis],
68 * and test the bit.
69 * 1b3) if the bit was 0: leave_mm was called, flush the tlb.
70 * 2) switch %%esp, ie current
71 *
72 * The interrupt must handle 2 special cases:
73 * - cr3 is changed before %%esp, ie. it cannot use current->{active_,}mm.
74 * - the cpu performs speculative tlb reads, i.e. even if the cpu only
75 * runs in kernel space, the cpu could load tlb entries for user space
76 * pages.
77 *
78 * The good news is that cpu_tlbstate is local to each cpu, no
79 * write/read ordering problems.
80 */
81
82/*
83 * TLB flush IPI:
84 *
85 * 1) Flush the tlb entries if the cpu uses the mm that's being flushed.
86 * 2) Leave the mm if we are in the lazy tlb mode.
87 */
88
89void smp_invalidate_interrupt(struct pt_regs *regs)
90{
91 unsigned long cpu;
92
93 cpu = get_cpu();
94
95 if (!cpu_isset(cpu, flush_cpumask))
96 goto out;
97 /*
98 * This was a BUG() but until someone can quote me the
99 * line from the intel manual that guarantees an IPI to
100 * multiple CPUs is retried _only_ on the erroring CPUs
101 * its staying as a return
102 *
103 * BUG();
104 */
105
106 if (flush_mm == x86_read_percpu(cpu_tlbstate.active_mm)) {
107 if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_OK) {
108 if (flush_va == TLB_FLUSH_ALL)
109 local_flush_tlb();
110 else
111 __flush_tlb_one(flush_va);
112 } else
113 leave_mm(cpu);
114 }
115 ack_APIC_irq();
116 smp_mb__before_clear_bit();
117 cpu_clear(cpu, flush_cpumask);
118 smp_mb__after_clear_bit();
119out:
120 put_cpu_no_resched();
121 inc_irq_stat(irq_tlb_count);
122}
123
124void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
125 unsigned long va)
126{
127 cpumask_t cpumask = *cpumaskp;
128
129 /*
130 * A couple of (to be removed) sanity checks:
131 *
132 * - current CPU must not be in mask
133 * - mask must exist :)
134 */
135 BUG_ON(cpus_empty(cpumask));
136 BUG_ON(cpu_isset(smp_processor_id(), cpumask));
137 BUG_ON(!mm);
138
139#ifdef CONFIG_HOTPLUG_CPU
140 /* If a CPU which we ran on has gone down, OK. */
141 cpus_and(cpumask, cpumask, cpu_online_map);
142 if (unlikely(cpus_empty(cpumask)))
143 return;
144#endif
145
146 /*
147 * i'm not happy about this global shared spinlock in the
148 * MM hot path, but we'll see how contended it is.
149 * AK: x86-64 has a faster method that could be ported.
150 */
151 spin_lock(&tlbstate_lock);
152
153 flush_mm = mm;
154 flush_va = va;
155 cpus_or(flush_cpumask, cpumask, flush_cpumask);
156
157 /*
158 * Make the above memory operations globally visible before
159 * sending the IPI.
160 */
161 smp_mb();
162 /*
163 * We have to send the IPI only to
164 * CPUs affected.
165 */
166 send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR);
167
168 while (!cpus_empty(flush_cpumask))
169 /* nothing. lockup detection does not belong here */
170 cpu_relax();
171
172 flush_mm = NULL;
173 flush_va = 0;
174 spin_unlock(&tlbstate_lock);
175}
176
177void flush_tlb_current_task(void)
178{
179 struct mm_struct *mm = current->mm;
180 cpumask_t cpu_mask;
181
182 preempt_disable();
183 cpu_mask = mm->cpu_vm_mask;
184 cpu_clear(smp_processor_id(), cpu_mask);
185
186 local_flush_tlb();
187 if (!cpus_empty(cpu_mask))
188 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
189 preempt_enable();
190}
191
192void flush_tlb_mm(struct mm_struct *mm)
193{
194 cpumask_t cpu_mask;
195
196 preempt_disable();
197 cpu_mask = mm->cpu_vm_mask;
198 cpu_clear(smp_processor_id(), cpu_mask);
199
200 if (current->active_mm == mm) {
201 if (current->mm)
202 local_flush_tlb();
203 else
204 leave_mm(smp_processor_id());
205 }
206 if (!cpus_empty(cpu_mask))
207 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL);
208
209 preempt_enable();
210}
211
212void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
213{
214 struct mm_struct *mm = vma->vm_mm;
215 cpumask_t cpu_mask;
216
217 preempt_disable();
218 cpu_mask = mm->cpu_vm_mask;
219 cpu_clear(smp_processor_id(), cpu_mask);
220
221 if (current->active_mm == mm) {
222 if (current->mm)
223 __flush_tlb_one(va);
224 else
225 leave_mm(smp_processor_id());
226 }
227
228 if (!cpus_empty(cpu_mask))
229 flush_tlb_others(cpu_mask, mm, va);
230
231 preempt_enable();
232}
233EXPORT_SYMBOL(flush_tlb_page);
234
235static void do_flush_tlb_all(void *info)
236{
237 unsigned long cpu = smp_processor_id();
238
239 __flush_tlb_all();
240 if (x86_read_percpu(cpu_tlbstate.state) == TLBSTATE_LAZY)
241 leave_mm(cpu);
242}
243
244void flush_tlb_all(void)
245{
246 on_each_cpu(do_flush_tlb_all, NULL, 1);
247}
248
249void reset_lazy_tlbstate(void)
250{
251 int cpu = raw_smp_processor_id();
252
253 per_cpu(cpu_tlbstate, cpu).state = 0;
254 per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
255}
256
diff --git a/arch/x86/kernel/tlb_uv.c b/arch/x86/kernel/tlb_uv.c
index f885023167e0..89fce1b6d01f 100644
--- a/arch/x86/kernel/tlb_uv.c
+++ b/arch/x86/kernel/tlb_uv.c
@@ -11,6 +11,7 @@
11#include <linux/kernel.h> 11#include <linux/kernel.h>
12 12
13#include <asm/mmu_context.h> 13#include <asm/mmu_context.h>
14#include <asm/uv/uv.h>
14#include <asm/uv/uv_mmrs.h> 15#include <asm/uv/uv_mmrs.h>
15#include <asm/uv/uv_hub.h> 16#include <asm/uv/uv_hub.h>
16#include <asm/uv/uv_bau.h> 17#include <asm/uv/uv_bau.h>
@@ -200,6 +201,7 @@ static int uv_wait_completion(struct bau_desc *bau_desc,
200 destination_timeouts = 0; 201 destination_timeouts = 0;
201 } 202 }
202 } 203 }
204 cpu_relax();
203 } 205 }
204 return FLUSH_COMPLETE; 206 return FLUSH_COMPLETE;
205} 207}
@@ -209,14 +211,15 @@ static int uv_wait_completion(struct bau_desc *bau_desc,
209 * 211 *
210 * Send a broadcast and wait for a broadcast message to complete. 212 * Send a broadcast and wait for a broadcast message to complete.
211 * 213 *
212 * The cpumaskp mask contains the cpus the broadcast was sent to. 214 * The flush_mask contains the cpus the broadcast was sent to.
213 * 215 *
214 * Returns 1 if all remote flushing was done. The mask is zeroed. 216 * Returns NULL if all remote flushing was done. The mask is zeroed.
215 * Returns 0 if some remote flushing remains to be done. The mask is left 217 * Returns @flush_mask if some remote flushing remains to be done. The
216 * unchanged. 218 * mask will have some bits still set.
217 */ 219 */
218int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc, 220const struct cpumask *uv_flush_send_and_wait(int cpu, int this_blade,
219 cpumask_t *cpumaskp) 221 struct bau_desc *bau_desc,
222 struct cpumask *flush_mask)
220{ 223{
221 int completion_status = 0; 224 int completion_status = 0;
222 int right_shift; 225 int right_shift;
@@ -263,59 +266,69 @@ int uv_flush_send_and_wait(int cpu, int this_blade, struct bau_desc *bau_desc,
263 * Success, so clear the remote cpu's from the mask so we don't 266 * Success, so clear the remote cpu's from the mask so we don't
264 * use the IPI method of shootdown on them. 267 * use the IPI method of shootdown on them.
265 */ 268 */
266 for_each_cpu_mask(bit, *cpumaskp) { 269 for_each_cpu(bit, flush_mask) {
267 blade = uv_cpu_to_blade_id(bit); 270 blade = uv_cpu_to_blade_id(bit);
268 if (blade == this_blade) 271 if (blade == this_blade)
269 continue; 272 continue;
270 cpu_clear(bit, *cpumaskp); 273 cpumask_clear_cpu(bit, flush_mask);
271 } 274 }
272 if (!cpus_empty(*cpumaskp)) 275 if (!cpumask_empty(flush_mask))
273 return 0; 276 return flush_mask;
274 return 1; 277 return NULL;
275} 278}
276 279
277/** 280/**
278 * uv_flush_tlb_others - globally purge translation cache of a virtual 281 * uv_flush_tlb_others - globally purge translation cache of a virtual
279 * address or all TLB's 282 * address or all TLB's
280 * @cpumaskp: mask of all cpu's in which the address is to be removed 283 * @cpumask: mask of all cpu's in which the address is to be removed
281 * @mm: mm_struct containing virtual address range 284 * @mm: mm_struct containing virtual address range
282 * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu) 285 * @va: virtual address to be removed (or TLB_FLUSH_ALL for all TLB's on cpu)
286 * @cpu: the current cpu
283 * 287 *
284 * This is the entry point for initiating any UV global TLB shootdown. 288 * This is the entry point for initiating any UV global TLB shootdown.
285 * 289 *
286 * Purges the translation caches of all specified processors of the given 290 * Purges the translation caches of all specified processors of the given
287 * virtual address, or purges all TLB's on specified processors. 291 * virtual address, or purges all TLB's on specified processors.
288 * 292 *
289 * The caller has derived the cpumaskp from the mm_struct and has subtracted 293 * The caller has derived the cpumask from the mm_struct. This function
290 * the local cpu from the mask. This function is called only if there 294 * is called only if there are bits set in the mask. (e.g. flush_tlb_page())
291 * are bits set in the mask. (e.g. flush_tlb_page())
292 * 295 *
293 * The cpumaskp is converted into a nodemask of the nodes containing 296 * The cpumask is converted into a nodemask of the nodes containing
294 * the cpus. 297 * the cpus.
295 * 298 *
296 * Returns 1 if all remote flushing was done. 299 * Note that this function should be called with preemption disabled.
297 * Returns 0 if some remote flushing remains to be done. 300 *
301 * Returns NULL if all remote flushing was done.
302 * Returns pointer to cpumask if some remote flushing remains to be
303 * done. The returned pointer is valid till preemption is re-enabled.
298 */ 304 */
299int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm, 305const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask,
300 unsigned long va) 306 struct mm_struct *mm,
307 unsigned long va, unsigned int cpu)
301{ 308{
309 static DEFINE_PER_CPU(cpumask_t, flush_tlb_mask);
310 struct cpumask *flush_mask = &__get_cpu_var(flush_tlb_mask);
302 int i; 311 int i;
303 int bit; 312 int bit;
304 int blade; 313 int blade;
305 int cpu; 314 int uv_cpu;
306 int this_blade; 315 int this_blade;
307 int locals = 0; 316 int locals = 0;
308 struct bau_desc *bau_desc; 317 struct bau_desc *bau_desc;
309 318
310 cpu = uv_blade_processor_id(); 319 WARN_ON(!in_atomic());
320
321 cpumask_andnot(flush_mask, cpumask, cpumask_of(cpu));
322
323 uv_cpu = uv_blade_processor_id();
311 this_blade = uv_numa_blade_id(); 324 this_blade = uv_numa_blade_id();
312 bau_desc = __get_cpu_var(bau_control).descriptor_base; 325 bau_desc = __get_cpu_var(bau_control).descriptor_base;
313 bau_desc += UV_ITEMS_PER_DESCRIPTOR * cpu; 326 bau_desc += UV_ITEMS_PER_DESCRIPTOR * uv_cpu;
314 327
315 bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE); 328 bau_nodes_clear(&bau_desc->distribution, UV_DISTRIBUTION_SIZE);
316 329
317 i = 0; 330 i = 0;
318 for_each_cpu_mask(bit, *cpumaskp) { 331 for_each_cpu(bit, flush_mask) {
319 blade = uv_cpu_to_blade_id(bit); 332 blade = uv_cpu_to_blade_id(bit);
320 BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1)); 333 BUG_ON(blade > (UV_DISTRIBUTION_SIZE - 1));
321 if (blade == this_blade) { 334 if (blade == this_blade) {
@@ -330,17 +343,17 @@ int uv_flush_tlb_others(cpumask_t *cpumaskp, struct mm_struct *mm,
330 * no off_node flushing; return status for local node 343 * no off_node flushing; return status for local node
331 */ 344 */
332 if (locals) 345 if (locals)
333 return 0; 346 return flush_mask;
334 else 347 else
335 return 1; 348 return NULL;
336 } 349 }
337 __get_cpu_var(ptcstats).requestor++; 350 __get_cpu_var(ptcstats).requestor++;
338 __get_cpu_var(ptcstats).ntargeted += i; 351 __get_cpu_var(ptcstats).ntargeted += i;
339 352
340 bau_desc->payload.address = va; 353 bau_desc->payload.address = va;
341 bau_desc->payload.sending_cpu = smp_processor_id(); 354 bau_desc->payload.sending_cpu = cpu;
342 355
343 return uv_flush_send_and_wait(cpu, this_blade, bau_desc, cpumaskp); 356 return uv_flush_send_and_wait(uv_cpu, this_blade, bau_desc, flush_mask);
344} 357}
345 358
346/* 359/*
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 98c2d055284b..ed5aee5f3fcc 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -59,7 +59,6 @@
59#ifdef CONFIG_X86_64 59#ifdef CONFIG_X86_64
60#include <asm/pgalloc.h> 60#include <asm/pgalloc.h>
61#include <asm/proto.h> 61#include <asm/proto.h>
62#include <asm/pda.h>
63#else 62#else
64#include <asm/processor-flags.h> 63#include <asm/processor-flags.h>
65#include <asm/arch_hooks.h> 64#include <asm/arch_hooks.h>
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 23206ba16874..1d3302cc2ddf 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -858,7 +858,7 @@ void __init vmi_init(void)
858#endif 858#endif
859} 859}
860 860
861void vmi_activate(void) 861void __init vmi_activate(void)
862{ 862{
863 unsigned long flags; 863 unsigned long flags;
864 864
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index 82c67559dde7..3eba7f7bac05 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -178,14 +178,7 @@ SECTIONS
178 __initramfs_end = .; 178 __initramfs_end = .;
179 } 179 }
180#endif 180#endif
181 . = ALIGN(PAGE_SIZE); 181 PERCPU(PAGE_SIZE)
182 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
183 __per_cpu_start = .;
184 *(.data.percpu.page_aligned)
185 *(.data.percpu)
186 *(.data.percpu.shared_aligned)
187 __per_cpu_end = .;
188 }
189 . = ALIGN(PAGE_SIZE); 182 . = ALIGN(PAGE_SIZE);
190 /* freed after init ends here */ 183 /* freed after init ends here */
191 184
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 1a614c0e6bef..c9740996430a 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -5,6 +5,7 @@
5#define LOAD_OFFSET __START_KERNEL_map 5#define LOAD_OFFSET __START_KERNEL_map
6 6
7#include <asm-generic/vmlinux.lds.h> 7#include <asm-generic/vmlinux.lds.h>
8#include <asm/asm-offsets.h>
8#include <asm/page.h> 9#include <asm/page.h>
9 10
10#undef i386 /* in case the preprocessor is a 32bit one */ 11#undef i386 /* in case the preprocessor is a 32bit one */
@@ -13,12 +14,14 @@ OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", "elf64-x86-64")
13OUTPUT_ARCH(i386:x86-64) 14OUTPUT_ARCH(i386:x86-64)
14ENTRY(phys_startup_64) 15ENTRY(phys_startup_64)
15jiffies_64 = jiffies; 16jiffies_64 = jiffies;
16_proxy_pda = 1;
17PHDRS { 17PHDRS {
18 text PT_LOAD FLAGS(5); /* R_E */ 18 text PT_LOAD FLAGS(5); /* R_E */
19 data PT_LOAD FLAGS(7); /* RWE */ 19 data PT_LOAD FLAGS(7); /* RWE */
20 user PT_LOAD FLAGS(7); /* RWE */ 20 user PT_LOAD FLAGS(7); /* RWE */
21 data.init PT_LOAD FLAGS(7); /* RWE */ 21 data.init PT_LOAD FLAGS(7); /* RWE */
22#ifdef CONFIG_SMP
23 percpu PT_LOAD FLAGS(7); /* RWE */
24#endif
22 note PT_NOTE FLAGS(0); /* ___ */ 25 note PT_NOTE FLAGS(0); /* ___ */
23} 26}
24SECTIONS 27SECTIONS
@@ -208,14 +211,28 @@ SECTIONS
208 __initramfs_end = .; 211 __initramfs_end = .;
209#endif 212#endif
210 213
214#ifdef CONFIG_SMP
215 /*
216 * percpu offsets are zero-based on SMP. PERCPU_VADDR() changes the
217 * output PHDR, so the next output section - __data_nosave - should
218 * switch it back to data.init. Also, pda should be at the head of
219 * percpu area. Preallocate it and define the percpu offset symbol
220 * so that it can be accessed as a percpu variable.
221 */
222 . = ALIGN(PAGE_SIZE);
223 PERCPU_VADDR(0, :percpu)
224#else
211 PERCPU(PAGE_SIZE) 225 PERCPU(PAGE_SIZE)
226#endif
212 227
213 . = ALIGN(PAGE_SIZE); 228 . = ALIGN(PAGE_SIZE);
214 __init_end = .; 229 __init_end = .;
215 230
216 . = ALIGN(PAGE_SIZE); 231 . = ALIGN(PAGE_SIZE);
217 __nosave_begin = .; 232 __nosave_begin = .;
218 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) { *(.data.nosave) } 233 .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {
234 *(.data.nosave)
235 } :data.init /* switch back to data.init, see PERCPU_VADDR() above */
219 . = ALIGN(PAGE_SIZE); 236 . = ALIGN(PAGE_SIZE);
220 __nosave_end = .; 237 __nosave_end = .;
221 238
@@ -244,3 +261,8 @@ SECTIONS
244 */ 261 */
245ASSERT((_end - _text <= KERNEL_IMAGE_SIZE), 262ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),
246 "kernel image bigger than KERNEL_IMAGE_SIZE") 263 "kernel image bigger than KERNEL_IMAGE_SIZE")
264
265#ifdef CONFIG_SMP
266ASSERT((per_cpu__irq_stack_union == 0),
267 "irq_stack_union is not at start of per-cpu area");
268#endif
diff --git a/arch/x86/kernel/x8664_ksyms_64.c b/arch/x86/kernel/x8664_ksyms_64.c
index 695e426aa354..3909e3ba5ce3 100644
--- a/arch/x86/kernel/x8664_ksyms_64.c
+++ b/arch/x86/kernel/x8664_ksyms_64.c
@@ -58,5 +58,3 @@ EXPORT_SYMBOL(__memcpy);
58EXPORT_SYMBOL(empty_zero_page); 58EXPORT_SYMBOL(empty_zero_page);
59EXPORT_SYMBOL(init_level4_pgt); 59EXPORT_SYMBOL(init_level4_pgt);
60EXPORT_SYMBOL(load_gs_index); 60EXPORT_SYMBOL(load_gs_index);
61
62EXPORT_SYMBOL(_proxy_pda);
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 4a20b2f9a381..7c8ca91bb9ec 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -56,7 +56,7 @@ do { \
56 " jmp 2b\n" \ 56 " jmp 2b\n" \
57 ".previous\n" \ 57 ".previous\n" \
58 _ASM_EXTABLE(0b,3b) \ 58 _ASM_EXTABLE(0b,3b) \
59 : "=d"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \ 59 : "=&d"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \
60 "=&D" (__d2) \ 60 "=&D" (__d2) \
61 : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ 61 : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
62 : "memory"); \ 62 : "memory"); \
@@ -218,7 +218,7 @@ long strnlen_user(const char __user *s, long n)
218 " .align 4\n" 218 " .align 4\n"
219 " .long 0b,2b\n" 219 " .long 0b,2b\n"
220 ".previous" 220 ".previous"
221 :"=r" (n), "=D" (s), "=a" (res), "=c" (tmp) 221 :"=&r" (n), "=&D" (s), "=&a" (res), "=&c" (tmp)
222 :"0" (n), "1" (s), "2" (0), "3" (mask) 222 :"0" (n), "1" (s), "2" (0), "3" (mask)
223 :"cc"); 223 :"cc");
224 return res & mask; 224 return res & mask;
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index 64d6c84e6353..ec13cb5f17ed 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -32,7 +32,7 @@ do { \
32 " jmp 2b\n" \ 32 " jmp 2b\n" \
33 ".previous\n" \ 33 ".previous\n" \
34 _ASM_EXTABLE(0b,3b) \ 34 _ASM_EXTABLE(0b,3b) \
35 : "=r"(res), "=c"(count), "=&a" (__d0), "=&S" (__d1), \ 35 : "=&r"(res), "=&c"(count), "=&a" (__d0), "=&S" (__d1), \
36 "=&D" (__d2) \ 36 "=&D" (__d2) \
37 : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \ 37 : "i"(-EFAULT), "0"(count), "1"(count), "3"(src), "4"(dst) \
38 : "memory"); \ 38 : "memory"); \
@@ -86,7 +86,7 @@ unsigned long __clear_user(void __user *addr, unsigned long size)
86 ".previous\n" 86 ".previous\n"
87 _ASM_EXTABLE(0b,3b) 87 _ASM_EXTABLE(0b,3b)
88 _ASM_EXTABLE(1b,2b) 88 _ASM_EXTABLE(1b,2b)
89 : [size8] "=c"(size), [dst] "=&D" (__d0) 89 : [size8] "=&c"(size), [dst] "=&D" (__d0)
90 : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr), 90 : [size1] "r"(size & 7), "[size8]" (size / 8), "[dst]"(addr),
91 [zero] "r" (0UL), [eight] "r" (8UL)); 91 [zero] "r" (0UL), [eight] "r" (8UL));
92 return size; 92 return size;
diff --git a/arch/x86/mach-rdc321x/Makefile b/arch/x86/mach-rdc321x/Makefile
deleted file mode 100644
index 8325b4ca431c..000000000000
--- a/arch/x86/mach-rdc321x/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
1#
2# Makefile for the RDC321x specific parts of the kernel
3#
4obj-$(CONFIG_X86_RDC321X) := gpio.o platform.o
5
diff --git a/arch/x86/mach-rdc321x/gpio.c b/arch/x86/mach-rdc321x/gpio.c
deleted file mode 100644
index 247f33d3a407..000000000000
--- a/arch/x86/mach-rdc321x/gpio.c
+++ /dev/null
@@ -1,194 +0,0 @@
1/*
2 * GPIO support for RDC SoC R3210/R8610
3 *
4 * Copyright (C) 2007, Florian Fainelli <florian@openwrt.org>
5 * Copyright (C) 2008, Volker Weiss <dev@tintuc.de>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
20 *
21 */
22
23
24#include <linux/spinlock.h>
25#include <linux/io.h>
26#include <linux/types.h>
27#include <linux/module.h>
28
29#include <asm/gpio.h>
30#include <asm/mach-rdc321x/rdc321x_defs.h>
31
32
33/* spin lock to protect our private copy of GPIO data register plus
34 the access to PCI conf registers. */
35static DEFINE_SPINLOCK(gpio_lock);
36
37/* copy of GPIO data registers */
38static u32 gpio_data_reg1;
39static u32 gpio_data_reg2;
40
41static u32 gpio_request_data[2];
42
43
44static inline void rdc321x_conf_write(unsigned addr, u32 value)
45{
46 outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR);
47 outl(value, RDC3210_CFGREG_DATA);
48}
49
50static inline void rdc321x_conf_or(unsigned addr, u32 value)
51{
52 outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR);
53 value |= inl(RDC3210_CFGREG_DATA);
54 outl(value, RDC3210_CFGREG_DATA);
55}
56
57static inline u32 rdc321x_conf_read(unsigned addr)
58{
59 outl((1 << 31) | (7 << 11) | addr, RDC3210_CFGREG_ADDR);
60
61 return inl(RDC3210_CFGREG_DATA);
62}
63
64/* configure pin as GPIO */
65static void rdc321x_configure_gpio(unsigned gpio)
66{
67 unsigned long flags;
68
69 spin_lock_irqsave(&gpio_lock, flags);
70 rdc321x_conf_or(gpio < 32
71 ? RDC321X_GPIO_CTRL_REG1 : RDC321X_GPIO_CTRL_REG2,
72 1 << (gpio & 0x1f));
73 spin_unlock_irqrestore(&gpio_lock, flags);
74}
75
76/* initially setup the 2 copies of the gpio data registers.
77 This function must be called by the platform setup code. */
78void __init rdc321x_gpio_setup()
79{
80 /* this might not be, what others (BIOS, bootloader, etc.)
81 wrote to these registers before, but it's a good guess. Still
82 better than just using 0xffffffff. */
83
84 gpio_data_reg1 = rdc321x_conf_read(RDC321X_GPIO_DATA_REG1);
85 gpio_data_reg2 = rdc321x_conf_read(RDC321X_GPIO_DATA_REG2);
86}
87
88/* determine, if gpio number is valid */
89static inline int rdc321x_is_gpio(unsigned gpio)
90{
91 return gpio <= RDC321X_MAX_GPIO;
92}
93
94/* request GPIO */
95int rdc_gpio_request(unsigned gpio, const char *label)
96{
97 unsigned long flags;
98
99 if (!rdc321x_is_gpio(gpio))
100 return -EINVAL;
101
102 spin_lock_irqsave(&gpio_lock, flags);
103 if (gpio_request_data[(gpio & 0x20) ? 1 : 0] & (1 << (gpio & 0x1f)))
104 goto inuse;
105 gpio_request_data[(gpio & 0x20) ? 1 : 0] |= (1 << (gpio & 0x1f));
106 spin_unlock_irqrestore(&gpio_lock, flags);
107
108 return 0;
109inuse:
110 spin_unlock_irqrestore(&gpio_lock, flags);
111 return -EINVAL;
112}
113EXPORT_SYMBOL(rdc_gpio_request);
114
115/* release previously-claimed GPIO */
116void rdc_gpio_free(unsigned gpio)
117{
118 unsigned long flags;
119
120 if (!rdc321x_is_gpio(gpio))
121 return;
122
123 spin_lock_irqsave(&gpio_lock, flags);
124 gpio_request_data[(gpio & 0x20) ? 1 : 0] &= ~(1 << (gpio & 0x1f));
125 spin_unlock_irqrestore(&gpio_lock, flags);
126}
127EXPORT_SYMBOL(rdc_gpio_free);
128
129/* read GPIO pin */
130int rdc_gpio_get_value(unsigned gpio)
131{
132 u32 reg;
133 unsigned long flags;
134
135 spin_lock_irqsave(&gpio_lock, flags);
136 reg = rdc321x_conf_read(gpio < 32
137 ? RDC321X_GPIO_DATA_REG1 : RDC321X_GPIO_DATA_REG2);
138 spin_unlock_irqrestore(&gpio_lock, flags);
139
140 return (1 << (gpio & 0x1f)) & reg ? 1 : 0;
141}
142EXPORT_SYMBOL(rdc_gpio_get_value);
143
144/* set GPIO pin to value */
145void rdc_gpio_set_value(unsigned gpio, int value)
146{
147 unsigned long flags;
148 u32 reg;
149
150 reg = 1 << (gpio & 0x1f);
151 if (gpio < 32) {
152 spin_lock_irqsave(&gpio_lock, flags);
153 if (value)
154 gpio_data_reg1 |= reg;
155 else
156 gpio_data_reg1 &= ~reg;
157 rdc321x_conf_write(RDC321X_GPIO_DATA_REG1, gpio_data_reg1);
158 spin_unlock_irqrestore(&gpio_lock, flags);
159 } else {
160 spin_lock_irqsave(&gpio_lock, flags);
161 if (value)
162 gpio_data_reg2 |= reg;
163 else
164 gpio_data_reg2 &= ~reg;
165 rdc321x_conf_write(RDC321X_GPIO_DATA_REG2, gpio_data_reg2);
166 spin_unlock_irqrestore(&gpio_lock, flags);
167 }
168}
169EXPORT_SYMBOL(rdc_gpio_set_value);
170
171/* configure GPIO pin as input */
172int rdc_gpio_direction_input(unsigned gpio)
173{
174 if (!rdc321x_is_gpio(gpio))
175 return -EINVAL;
176
177 rdc321x_configure_gpio(gpio);
178
179 return 0;
180}
181EXPORT_SYMBOL(rdc_gpio_direction_input);
182
183/* configure GPIO pin as output and set value */
184int rdc_gpio_direction_output(unsigned gpio, int value)
185{
186 if (!rdc321x_is_gpio(gpio))
187 return -EINVAL;
188
189 gpio_set_value(gpio, value);
190 rdc321x_configure_gpio(gpio);
191
192 return 0;
193}
194EXPORT_SYMBOL(rdc_gpio_direction_output);
diff --git a/arch/x86/mach-rdc321x/platform.c b/arch/x86/mach-rdc321x/platform.c
deleted file mode 100644
index 4f4e50c3ad3b..000000000000
--- a/arch/x86/mach-rdc321x/platform.c
+++ /dev/null
@@ -1,69 +0,0 @@
1/*
2 * Generic RDC321x platform devices
3 *
4 * Copyright (C) 2007 Florian Fainelli <florian@openwrt.org>
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version 2
9 * of the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the
18 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23#include <linux/init.h>
24#include <linux/kernel.h>
25#include <linux/list.h>
26#include <linux/device.h>
27#include <linux/platform_device.h>
28#include <linux/leds.h>
29
30#include <asm/gpio.h>
31
32/* LEDS */
33static struct gpio_led default_leds[] = {
34 { .name = "rdc:dmz", .gpio = 1, },
35};
36
37static struct gpio_led_platform_data rdc321x_led_data = {
38 .num_leds = ARRAY_SIZE(default_leds),
39 .leds = default_leds,
40};
41
42static struct platform_device rdc321x_leds = {
43 .name = "leds-gpio",
44 .id = -1,
45 .dev = {
46 .platform_data = &rdc321x_led_data,
47 }
48};
49
50/* Watchdog */
51static struct platform_device rdc321x_wdt = {
52 .name = "rdc321x-wdt",
53 .id = -1,
54 .num_resources = 0,
55};
56
57static struct platform_device *rdc321x_devs[] = {
58 &rdc321x_leds,
59 &rdc321x_wdt
60};
61
62static int __init rdc_board_setup(void)
63{
64 rdc321x_gpio_setup();
65
66 return platform_add_devices(rdc321x_devs, ARRAY_SIZE(rdc321x_devs));
67}
68
69arch_initcall(rdc_board_setup);
diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c
index 9840b7ec749a..331cd6d56483 100644
--- a/arch/x86/mach-voyager/voyager_smp.c
+++ b/arch/x86/mach-voyager/voyager_smp.c
@@ -402,7 +402,7 @@ void __init find_smp_config(void)
402 VOYAGER_SUS_IN_CONTROL_PORT); 402 VOYAGER_SUS_IN_CONTROL_PORT);
403 403
404 current_thread_info()->cpu = boot_cpu_id; 404 current_thread_info()->cpu = boot_cpu_id;
405 x86_write_percpu(cpu_number, boot_cpu_id); 405 percpu_write(cpu_number, boot_cpu_id);
406} 406}
407 407
408/* 408/*
@@ -530,7 +530,6 @@ static void __init do_boot_cpu(__u8 cpu)
530 /* init_tasks (in sched.c) is indexed logically */ 530 /* init_tasks (in sched.c) is indexed logically */
531 stack_start.sp = (void *)idle->thread.sp; 531 stack_start.sp = (void *)idle->thread.sp;
532 532
533 init_gdt(cpu);
534 per_cpu(current_task, cpu) = idle; 533 per_cpu(current_task, cpu) = idle;
535 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu); 534 early_gdt_descr.address = (unsigned long)get_cpu_gdt_table(cpu);
536 irq_ctx_init(cpu); 535 irq_ctx_init(cpu);
@@ -1747,7 +1746,6 @@ static void __init voyager_smp_prepare_cpus(unsigned int max_cpus)
1747 1746
1748static void __cpuinit voyager_smp_prepare_boot_cpu(void) 1747static void __cpuinit voyager_smp_prepare_boot_cpu(void)
1749{ 1748{
1750 init_gdt(smp_processor_id());
1751 switch_to_new_gdt(); 1749 switch_to_new_gdt();
1752 1750
1753 cpu_set(smp_processor_id(), cpu_online_map); 1751 cpu_set(smp_processor_id(), cpu_online_map);
@@ -1780,7 +1778,6 @@ static void __init voyager_smp_cpus_done(unsigned int max_cpus)
1780void __init smp_setup_processor_id(void) 1778void __init smp_setup_processor_id(void)
1781{ 1779{
1782 current_thread_info()->cpu = hard_smp_processor_id(); 1780 current_thread_info()->cpu = hard_smp_processor_id();
1783 x86_write_percpu(cpu_number, hard_smp_processor_id());
1784} 1781}
1785 1782
1786static void voyager_send_call_func(cpumask_t callmask) 1783static void voyager_send_call_func(cpumask_t callmask)
diff --git a/arch/x86/mm/Makefile b/arch/x86/mm/Makefile
index d8cc96a2738f..9f05157220f5 100644
--- a/arch/x86/mm/Makefile
+++ b/arch/x86/mm/Makefile
@@ -1,6 +1,8 @@
1obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \ 1obj-y := init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
2 pat.o pgtable.o gup.o 2 pat.o pgtable.o gup.o
3 3
4obj-$(CONFIG_X86_SMP) += tlb.o
5
4obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o 6obj-$(CONFIG_X86_32) += pgtable_32.o iomap_32.o
5 7
6obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o 8obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 9e268b6b204e..d3eee74f830a 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -26,6 +26,7 @@
26#include <linux/kprobes.h> 26#include <linux/kprobes.h>
27#include <linux/uaccess.h> 27#include <linux/uaccess.h>
28#include <linux/kdebug.h> 28#include <linux/kdebug.h>
29#include <linux/magic.h>
29 30
30#include <asm/system.h> 31#include <asm/system.h>
31#include <asm/desc.h> 32#include <asm/desc.h>
@@ -91,8 +92,8 @@ static inline int notify_page_fault(struct pt_regs *regs)
91 * 92 *
92 * Opcode checker based on code by Richard Brunner 93 * Opcode checker based on code by Richard Brunner
93 */ 94 */
94static int is_prefetch(struct pt_regs *regs, unsigned long addr, 95static int is_prefetch(struct pt_regs *regs, unsigned long error_code,
95 unsigned long error_code) 96 unsigned long addr)
96{ 97{
97 unsigned char *instr; 98 unsigned char *instr;
98 int scan_more = 1; 99 int scan_more = 1;
@@ -409,17 +410,16 @@ static void show_fault_oops(struct pt_regs *regs, unsigned long error_code,
409} 410}
410 411
411#ifdef CONFIG_X86_64 412#ifdef CONFIG_X86_64
412static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, 413static noinline void pgtable_bad(struct pt_regs *regs,
413 unsigned long error_code) 414 unsigned long error_code, unsigned long address)
414{ 415{
415 unsigned long flags = oops_begin(); 416 unsigned long flags = oops_begin();
416 int sig = SIGKILL; 417 int sig = SIGKILL;
417 struct task_struct *tsk; 418 struct task_struct *tsk = current;
418 419
419 printk(KERN_ALERT "%s: Corrupted page table at address %lx\n", 420 printk(KERN_ALERT "%s: Corrupted page table at address %lx\n",
420 current->comm, address); 421 tsk->comm, address);
421 dump_pagetable(address); 422 dump_pagetable(address);
422 tsk = current;
423 tsk->thread.cr2 = address; 423 tsk->thread.cr2 = address;
424 tsk->thread.trap_no = 14; 424 tsk->thread.trap_no = 14;
425 tsk->thread.error_code = error_code; 425 tsk->thread.error_code = error_code;
@@ -429,6 +429,196 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs,
429} 429}
430#endif 430#endif
431 431
432static noinline void no_context(struct pt_regs *regs,
433 unsigned long error_code, unsigned long address)
434{
435 struct task_struct *tsk = current;
436 unsigned long *stackend;
437
438#ifdef CONFIG_X86_64
439 unsigned long flags;
440 int sig;
441#endif
442
443 /* Are we prepared to handle this kernel fault? */
444 if (fixup_exception(regs))
445 return;
446
447 /*
448 * X86_32
449 * Valid to do another page fault here, because if this fault
450 * had been triggered by is_prefetch fixup_exception would have
451 * handled it.
452 *
453 * X86_64
454 * Hall of shame of CPU/BIOS bugs.
455 */
456 if (is_prefetch(regs, error_code, address))
457 return;
458
459 if (is_errata93(regs, address))
460 return;
461
462 /*
463 * Oops. The kernel tried to access some bad page. We'll have to
464 * terminate things with extreme prejudice.
465 */
466#ifdef CONFIG_X86_32
467 bust_spinlocks(1);
468#else
469 flags = oops_begin();
470#endif
471
472 show_fault_oops(regs, error_code, address);
473
474 stackend = end_of_stack(tsk);
475 if (*stackend != STACK_END_MAGIC)
476 printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
477
478 tsk->thread.cr2 = address;
479 tsk->thread.trap_no = 14;
480 tsk->thread.error_code = error_code;
481
482#ifdef CONFIG_X86_32
483 die("Oops", regs, error_code);
484 bust_spinlocks(0);
485 do_exit(SIGKILL);
486#else
487 sig = SIGKILL;
488 if (__die("Oops", regs, error_code))
489 sig = 0;
490 /* Executive summary in case the body of the oops scrolled away */
491 printk(KERN_EMERG "CR2: %016lx\n", address);
492 oops_end(flags, regs, sig);
493#endif
494}
495
496static void __bad_area_nosemaphore(struct pt_regs *regs,
497 unsigned long error_code, unsigned long address,
498 int si_code)
499{
500 struct task_struct *tsk = current;
501
502 /* User mode accesses just cause a SIGSEGV */
503 if (error_code & PF_USER) {
504 /*
505 * It's possible to have interrupts off here.
506 */
507 local_irq_enable();
508
509 /*
510 * Valid to do another page fault here because this one came
511 * from user space.
512 */
513 if (is_prefetch(regs, error_code, address))
514 return;
515
516 if (is_errata100(regs, address))
517 return;
518
519 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
520 printk_ratelimit()) {
521 printk(
522 "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
523 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
524 tsk->comm, task_pid_nr(tsk), address,
525 (void *) regs->ip, (void *) regs->sp, error_code);
526 print_vma_addr(" in ", regs->ip);
527 printk("\n");
528 }
529
530 tsk->thread.cr2 = address;
531 /* Kernel addresses are always protection faults */
532 tsk->thread.error_code = error_code | (address >= TASK_SIZE);
533 tsk->thread.trap_no = 14;
534 force_sig_info_fault(SIGSEGV, si_code, address, tsk);
535 return;
536 }
537
538 if (is_f00f_bug(regs, address))
539 return;
540
541 no_context(regs, error_code, address);
542}
543
544static noinline void bad_area_nosemaphore(struct pt_regs *regs,
545 unsigned long error_code, unsigned long address)
546{
547 __bad_area_nosemaphore(regs, error_code, address, SEGV_MAPERR);
548}
549
550static void __bad_area(struct pt_regs *regs,
551 unsigned long error_code, unsigned long address,
552 int si_code)
553{
554 struct mm_struct *mm = current->mm;
555
556 /*
557 * Something tried to access memory that isn't in our memory map..
558 * Fix it, but check if it's kernel or user first..
559 */
560 up_read(&mm->mmap_sem);
561
562 __bad_area_nosemaphore(regs, error_code, address, si_code);
563}
564
565static noinline void bad_area(struct pt_regs *regs,
566 unsigned long error_code, unsigned long address)
567{
568 __bad_area(regs, error_code, address, SEGV_MAPERR);
569}
570
571static noinline void bad_area_access_error(struct pt_regs *regs,
572 unsigned long error_code, unsigned long address)
573{
574 __bad_area(regs, error_code, address, SEGV_ACCERR);
575}
576
577/* TODO: fixup for "mm-invoke-oom-killer-from-page-fault.patch" */
578static void out_of_memory(struct pt_regs *regs,
579 unsigned long error_code, unsigned long address)
580{
581 /*
582 * We ran out of memory, call the OOM killer, and return the userspace
583 * (which will retry the fault, or kill us if we got oom-killed).
584 */
585 up_read(&current->mm->mmap_sem);
586 pagefault_out_of_memory();
587}
588
589static void do_sigbus(struct pt_regs *regs,
590 unsigned long error_code, unsigned long address)
591{
592 struct task_struct *tsk = current;
593 struct mm_struct *mm = tsk->mm;
594
595 up_read(&mm->mmap_sem);
596
597 /* Kernel mode? Handle exceptions or die */
598 if (!(error_code & PF_USER))
599 no_context(regs, error_code, address);
600#ifdef CONFIG_X86_32
601 /* User space => ok to do another page fault */
602 if (is_prefetch(regs, error_code, address))
603 return;
604#endif
605 tsk->thread.cr2 = address;
606 tsk->thread.error_code = error_code;
607 tsk->thread.trap_no = 14;
608 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
609}
610
611static noinline void mm_fault_error(struct pt_regs *regs,
612 unsigned long error_code, unsigned long address, unsigned int fault)
613{
614 if (fault & VM_FAULT_OOM)
615 out_of_memory(regs, error_code, address);
616 else if (fault & VM_FAULT_SIGBUS)
617 do_sigbus(regs, error_code, address);
618 else
619 BUG();
620}
621
432static int spurious_fault_check(unsigned long error_code, pte_t *pte) 622static int spurious_fault_check(unsigned long error_code, pte_t *pte)
433{ 623{
434 if ((error_code & PF_WRITE) && !pte_write(*pte)) 624 if ((error_code & PF_WRITE) && !pte_write(*pte))
@@ -448,8 +638,8 @@ static int spurious_fault_check(unsigned long error_code, pte_t *pte)
448 * There are no security implications to leaving a stale TLB when 638 * There are no security implications to leaving a stale TLB when
449 * increasing the permissions on a page. 639 * increasing the permissions on a page.
450 */ 640 */
451static int spurious_fault(unsigned long address, 641static noinline int spurious_fault(unsigned long error_code,
452 unsigned long error_code) 642 unsigned long address)
453{ 643{
454 pgd_t *pgd; 644 pgd_t *pgd;
455 pud_t *pud; 645 pud_t *pud;
@@ -494,7 +684,7 @@ static int spurious_fault(unsigned long address,
494 * 684 *
495 * This assumes no large pages in there. 685 * This assumes no large pages in there.
496 */ 686 */
497static int vmalloc_fault(unsigned long address) 687static noinline int vmalloc_fault(unsigned long address)
498{ 688{
499#ifdef CONFIG_X86_32 689#ifdef CONFIG_X86_32
500 unsigned long pgd_paddr; 690 unsigned long pgd_paddr;
@@ -534,7 +724,7 @@ static int vmalloc_fault(unsigned long address)
534 happen within a race in page table update. In the later 724 happen within a race in page table update. In the later
535 case just flush. */ 725 case just flush. */
536 726
537 pgd = pgd_offset(current->mm ?: &init_mm, address); 727 pgd = pgd_offset(current->active_mm, address);
538 pgd_ref = pgd_offset_k(address); 728 pgd_ref = pgd_offset_k(address);
539 if (pgd_none(*pgd_ref)) 729 if (pgd_none(*pgd_ref))
540 return -1; 730 return -1;
@@ -573,6 +763,25 @@ static int vmalloc_fault(unsigned long address)
573 763
574int show_unhandled_signals = 1; 764int show_unhandled_signals = 1;
575 765
766static inline int access_error(unsigned long error_code, int write,
767 struct vm_area_struct *vma)
768{
769 if (write) {
770 /* write, present and write, not present */
771 if (unlikely(!(vma->vm_flags & VM_WRITE)))
772 return 1;
773 } else if (unlikely(error_code & PF_PROT)) {
774 /* read, present */
775 return 1;
776 } else {
777 /* read, not present */
778 if (unlikely(!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE))))
779 return 1;
780 }
781
782 return 0;
783}
784
576/* 785/*
577 * This routine handles page faults. It determines the address, 786 * This routine handles page faults. It determines the address,
578 * and the problem, and then passes it off to one of the appropriate 787 * and the problem, and then passes it off to one of the appropriate
@@ -583,16 +792,12 @@ asmlinkage
583#endif 792#endif
584void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code) 793void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
585{ 794{
795 unsigned long address;
586 struct task_struct *tsk; 796 struct task_struct *tsk;
587 struct mm_struct *mm; 797 struct mm_struct *mm;
588 struct vm_area_struct *vma; 798 struct vm_area_struct *vma;
589 unsigned long address; 799 int write;
590 int write, si_code;
591 int fault; 800 int fault;
592#ifdef CONFIG_X86_64
593 unsigned long flags;
594 int sig;
595#endif
596 801
597 tsk = current; 802 tsk = current;
598 mm = tsk->mm; 803 mm = tsk->mm;
@@ -601,9 +806,7 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
601 /* get the address */ 806 /* get the address */
602 address = read_cr2(); 807 address = read_cr2();
603 808
604 si_code = SEGV_MAPERR; 809 if (unlikely(notify_page_fault(regs)))
605
606 if (notify_page_fault(regs))
607 return; 810 return;
608 if (unlikely(kmmio_fault(regs, address))) 811 if (unlikely(kmmio_fault(regs, address)))
609 return; 812 return;
@@ -631,17 +834,17 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
631 return; 834 return;
632 835
633 /* Can handle a stale RO->RW TLB */ 836 /* Can handle a stale RO->RW TLB */
634 if (spurious_fault(address, error_code)) 837 if (spurious_fault(error_code, address))
635 return; 838 return;
636 839
637 /* 840 /*
638 * Don't take the mm semaphore here. If we fixup a prefetch 841 * Don't take the mm semaphore here. If we fixup a prefetch
639 * fault we could otherwise deadlock. 842 * fault we could otherwise deadlock.
640 */ 843 */
641 goto bad_area_nosemaphore; 844 bad_area_nosemaphore(regs, error_code, address);
845 return;
642 } 846 }
643 847
644
645 /* 848 /*
646 * It's safe to allow irq's after cr2 has been saved and the 849 * It's safe to allow irq's after cr2 has been saved and the
647 * vmalloc fault has been handled. 850 * vmalloc fault has been handled.
@@ -657,15 +860,17 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
657 860
658#ifdef CONFIG_X86_64 861#ifdef CONFIG_X86_64
659 if (unlikely(error_code & PF_RSVD)) 862 if (unlikely(error_code & PF_RSVD))
660 pgtable_bad(address, regs, error_code); 863 pgtable_bad(regs, error_code, address);
661#endif 864#endif
662 865
663 /* 866 /*
664 * If we're in an interrupt, have no user context or are running in an 867 * If we're in an interrupt, have no user context or are running in an
665 * atomic region then we must not take the fault. 868 * atomic region then we must not take the fault.
666 */ 869 */
667 if (unlikely(in_atomic() || !mm)) 870 if (unlikely(in_atomic() || !mm)) {
668 goto bad_area_nosemaphore; 871 bad_area_nosemaphore(regs, error_code, address);
872 return;
873 }
669 874
670 /* 875 /*
671 * When running in the kernel we expect faults to occur only to 876 * When running in the kernel we expect faults to occur only to
@@ -683,20 +888,26 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
683 * source. If this is invalid we can skip the address space check, 888 * source. If this is invalid we can skip the address space check,
684 * thus avoiding the deadlock. 889 * thus avoiding the deadlock.
685 */ 890 */
686 if (!down_read_trylock(&mm->mmap_sem)) { 891 if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
687 if ((error_code & PF_USER) == 0 && 892 if ((error_code & PF_USER) == 0 &&
688 !search_exception_tables(regs->ip)) 893 !search_exception_tables(regs->ip)) {
689 goto bad_area_nosemaphore; 894 bad_area_nosemaphore(regs, error_code, address);
895 return;
896 }
690 down_read(&mm->mmap_sem); 897 down_read(&mm->mmap_sem);
691 } 898 }
692 899
693 vma = find_vma(mm, address); 900 vma = find_vma(mm, address);
694 if (!vma) 901 if (unlikely(!vma)) {
695 goto bad_area; 902 bad_area(regs, error_code, address);
696 if (vma->vm_start <= address) 903 return;
904 }
905 if (likely(vma->vm_start <= address))
697 goto good_area; 906 goto good_area;
698 if (!(vma->vm_flags & VM_GROWSDOWN)) 907 if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
699 goto bad_area; 908 bad_area(regs, error_code, address);
909 return;
910 }
700 if (error_code & PF_USER) { 911 if (error_code & PF_USER) {
701 /* 912 /*
702 * Accessing the stack below %sp is always a bug. 913 * Accessing the stack below %sp is always a bug.
@@ -704,31 +915,25 @@ void __kprobes do_page_fault(struct pt_regs *regs, unsigned long error_code)
704 * and pusha to work. ("enter $65535,$31" pushes 915 * and pusha to work. ("enter $65535,$31" pushes
705 * 32 pointers and then decrements %sp by 65535.) 916 * 32 pointers and then decrements %sp by 65535.)
706 */ 917 */
707 if (address + 65536 + 32 * sizeof(unsigned long) < regs->sp) 918 if (unlikely(address + 65536 + 32 * sizeof(unsigned long) < regs->sp)) {
708 goto bad_area; 919 bad_area(regs, error_code, address);
920 return;
921 }
709 } 922 }
710 if (expand_stack(vma, address)) 923 if (unlikely(expand_stack(vma, address))) {
711 goto bad_area; 924 bad_area(regs, error_code, address);
712/* 925 return;
713 * Ok, we have a good vm_area for this memory access, so 926 }
714 * we can handle it.. 927
715 */ 928 /*
929 * Ok, we have a good vm_area for this memory access, so
930 * we can handle it..
931 */
716good_area: 932good_area:
717 si_code = SEGV_ACCERR; 933 write = error_code & PF_WRITE;
718 write = 0; 934 if (unlikely(access_error(error_code, write, vma))) {
719 switch (error_code & (PF_PROT|PF_WRITE)) { 935 bad_area_access_error(regs, error_code, address);
720 default: /* 3: write, present */ 936 return;
721 /* fall through */
722 case PF_WRITE: /* write, not present */
723 if (!(vma->vm_flags & VM_WRITE))
724 goto bad_area;
725 write++;
726 break;
727 case PF_PROT: /* read, present */
728 goto bad_area;
729 case 0: /* read, not present */
730 if (!(vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE)))
731 goto bad_area;
732 } 937 }
733 938
734 /* 939 /*
@@ -738,11 +943,8 @@ good_area:
738 */ 943 */
739 fault = handle_mm_fault(mm, vma, address, write); 944 fault = handle_mm_fault(mm, vma, address, write);
740 if (unlikely(fault & VM_FAULT_ERROR)) { 945 if (unlikely(fault & VM_FAULT_ERROR)) {
741 if (fault & VM_FAULT_OOM) 946 mm_fault_error(regs, error_code, address, fault);
742 goto out_of_memory; 947 return;
743 else if (fault & VM_FAULT_SIGBUS)
744 goto do_sigbus;
745 BUG();
746 } 948 }
747 if (fault & VM_FAULT_MAJOR) 949 if (fault & VM_FAULT_MAJOR)
748 tsk->maj_flt++; 950 tsk->maj_flt++;
@@ -760,128 +962,6 @@ good_area:
760 } 962 }
761#endif 963#endif
762 up_read(&mm->mmap_sem); 964 up_read(&mm->mmap_sem);
763 return;
764
765/*
766 * Something tried to access memory that isn't in our memory map..
767 * Fix it, but check if it's kernel or user first..
768 */
769bad_area:
770 up_read(&mm->mmap_sem);
771
772bad_area_nosemaphore:
773 /* User mode accesses just cause a SIGSEGV */
774 if (error_code & PF_USER) {
775 /*
776 * It's possible to have interrupts off here.
777 */
778 local_irq_enable();
779
780 /*
781 * Valid to do another page fault here because this one came
782 * from user space.
783 */
784 if (is_prefetch(regs, address, error_code))
785 return;
786
787 if (is_errata100(regs, address))
788 return;
789
790 if (show_unhandled_signals && unhandled_signal(tsk, SIGSEGV) &&
791 printk_ratelimit()) {
792 printk(
793 "%s%s[%d]: segfault at %lx ip %p sp %p error %lx",
794 task_pid_nr(tsk) > 1 ? KERN_INFO : KERN_EMERG,
795 tsk->comm, task_pid_nr(tsk), address,
796 (void *) regs->ip, (void *) regs->sp, error_code);
797 print_vma_addr(" in ", regs->ip);
798 printk("\n");
799 }
800
801 tsk->thread.cr2 = address;
802 /* Kernel addresses are always protection faults */
803 tsk->thread.error_code = error_code | (address >= TASK_SIZE);
804 tsk->thread.trap_no = 14;
805 force_sig_info_fault(SIGSEGV, si_code, address, tsk);
806 return;
807 }
808
809 if (is_f00f_bug(regs, address))
810 return;
811
812no_context:
813 /* Are we prepared to handle this kernel fault? */
814 if (fixup_exception(regs))
815 return;
816
817 /*
818 * X86_32
819 * Valid to do another page fault here, because if this fault
820 * had been triggered by is_prefetch fixup_exception would have
821 * handled it.
822 *
823 * X86_64
824 * Hall of shame of CPU/BIOS bugs.
825 */
826 if (is_prefetch(regs, address, error_code))
827 return;
828
829 if (is_errata93(regs, address))
830 return;
831
832/*
833 * Oops. The kernel tried to access some bad page. We'll have to
834 * terminate things with extreme prejudice.
835 */
836#ifdef CONFIG_X86_32
837 bust_spinlocks(1);
838#else
839 flags = oops_begin();
840#endif
841
842 show_fault_oops(regs, error_code, address);
843
844 tsk->thread.cr2 = address;
845 tsk->thread.trap_no = 14;
846 tsk->thread.error_code = error_code;
847
848#ifdef CONFIG_X86_32
849 die("Oops", regs, error_code);
850 bust_spinlocks(0);
851 do_exit(SIGKILL);
852#else
853 sig = SIGKILL;
854 if (__die("Oops", regs, error_code))
855 sig = 0;
856 /* Executive summary in case the body of the oops scrolled away */
857 printk(KERN_EMERG "CR2: %016lx\n", address);
858 oops_end(flags, regs, sig);
859#endif
860
861out_of_memory:
862 /*
863 * We ran out of memory, call the OOM killer, and return the userspace
864 * (which will retry the fault, or kill us if we got oom-killed).
865 */
866 up_read(&mm->mmap_sem);
867 pagefault_out_of_memory();
868 return;
869
870do_sigbus:
871 up_read(&mm->mmap_sem);
872
873 /* Kernel mode? Handle exceptions or die */
874 if (!(error_code & PF_USER))
875 goto no_context;
876#ifdef CONFIG_X86_32
877 /* User space => ok to do another page fault */
878 if (is_prefetch(regs, address, error_code))
879 return;
880#endif
881 tsk->thread.cr2 = address;
882 tsk->thread.error_code = error_code;
883 tsk->thread.trap_no = 14;
884 force_sig_info_fault(SIGBUS, BUS_ADRERR, address, tsk);
885} 965}
886 966
887DEFINE_SPINLOCK(pgd_lock); 967DEFINE_SPINLOCK(pgd_lock);
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index 4a6989e47a53..00263bf07a88 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -137,6 +137,47 @@ static pte_t * __init one_page_table_init(pmd_t *pmd)
137 return pte_offset_kernel(pmd, 0); 137 return pte_offset_kernel(pmd, 0);
138} 138}
139 139
140static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
141 unsigned long vaddr, pte_t *lastpte)
142{
143#ifdef CONFIG_HIGHMEM
144 /*
145 * Something (early fixmap) may already have put a pte
146 * page here, which causes the page table allocation
147 * to become nonlinear. Attempt to fix it, and if it
148 * is still nonlinear then we have to bug.
149 */
150 int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT;
151 int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT;
152
153 if (pmd_idx_kmap_begin != pmd_idx_kmap_end
154 && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
155 && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end
156 && ((__pa(pte) >> PAGE_SHIFT) < table_start
157 || (__pa(pte) >> PAGE_SHIFT) >= table_end)) {
158 pte_t *newpte;
159 int i;
160
161 BUG_ON(after_init_bootmem);
162 newpte = alloc_low_page();
163 for (i = 0; i < PTRS_PER_PTE; i++)
164 set_pte(newpte + i, pte[i]);
165
166 paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT);
167 set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE));
168 BUG_ON(newpte != pte_offset_kernel(pmd, 0));
169 __flush_tlb_all();
170
171 paravirt_release_pte(__pa(pte) >> PAGE_SHIFT);
172 pte = newpte;
173 }
174 BUG_ON(vaddr < fix_to_virt(FIX_KMAP_BEGIN - 1)
175 && vaddr > fix_to_virt(FIX_KMAP_END)
176 && lastpte && lastpte + PTRS_PER_PTE != pte);
177#endif
178 return pte;
179}
180
140/* 181/*
141 * This function initializes a certain range of kernel virtual memory 182 * This function initializes a certain range of kernel virtual memory
142 * with new bootmem page tables, everywhere page tables are missing in 183 * with new bootmem page tables, everywhere page tables are missing in
@@ -153,6 +194,7 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
153 unsigned long vaddr; 194 unsigned long vaddr;
154 pgd_t *pgd; 195 pgd_t *pgd;
155 pmd_t *pmd; 196 pmd_t *pmd;
197 pte_t *pte = NULL;
156 198
157 vaddr = start; 199 vaddr = start;
158 pgd_idx = pgd_index(vaddr); 200 pgd_idx = pgd_index(vaddr);
@@ -164,7 +206,8 @@ page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
164 pmd = pmd + pmd_index(vaddr); 206 pmd = pmd + pmd_index(vaddr);
165 for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); 207 for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
166 pmd++, pmd_idx++) { 208 pmd++, pmd_idx++) {
167 one_page_table_init(pmd); 209 pte = page_table_kmap_check(one_page_table_init(pmd),
210 pmd, vaddr, pte);
168 211
169 vaddr += PMD_SIZE; 212 vaddr += PMD_SIZE;
170 } 213 }
@@ -507,7 +550,6 @@ static void __init early_ioremap_page_table_range_init(pgd_t *pgd_base)
507 * Fixed mappings, only the page table structure has to be 550 * Fixed mappings, only the page table structure has to be
508 * created - mappings will be set by set_fixmap(): 551 * created - mappings will be set by set_fixmap():
509 */ 552 */
510 early_ioremap_clear();
511 vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; 553 vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
512 end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK; 554 end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
513 page_table_range_init(vaddr, end, pgd_base); 555 page_table_range_init(vaddr, end, pgd_base);
@@ -800,7 +842,7 @@ static void __init find_early_table_space(unsigned long end, int use_pse)
800 tables += PAGE_ALIGN(ptes * sizeof(pte_t)); 842 tables += PAGE_ALIGN(ptes * sizeof(pte_t));
801 843
802 /* for fixmap */ 844 /* for fixmap */
803 tables += PAGE_SIZE * 2; 845 tables += PAGE_ALIGN(__end_of_fixed_addresses * sizeof(pte_t));
804 846
805 /* 847 /*
806 * RED-PEN putting page tables only on node 0 could 848 * RED-PEN putting page tables only on node 0 could
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index 23f68e77ad1f..e6d36b490250 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -596,7 +596,7 @@ static void __init init_gbpages(void)
596 direct_gbpages = 0; 596 direct_gbpages = 0;
597} 597}
598 598
599static unsigned long __init kernel_physical_mapping_init(unsigned long start, 599static unsigned long __meminit kernel_physical_mapping_init(unsigned long start,
600 unsigned long end, 600 unsigned long end,
601 unsigned long page_size_mask) 601 unsigned long page_size_mask)
602{ 602{
diff --git a/arch/x86/mm/iomap_32.c b/arch/x86/mm/iomap_32.c
index d0151d8ce452..ca53224fc56c 100644
--- a/arch/x86/mm/iomap_32.c
+++ b/arch/x86/mm/iomap_32.c
@@ -17,6 +17,7 @@
17 */ 17 */
18 18
19#include <asm/iomap.h> 19#include <asm/iomap.h>
20#include <asm/pat.h>
20#include <linux/module.h> 21#include <linux/module.h>
21 22
22/* Map 'pfn' using fixed map 'type' and protections 'prot' 23/* Map 'pfn' using fixed map 'type' and protections 'prot'
@@ -29,6 +30,15 @@ iomap_atomic_prot_pfn(unsigned long pfn, enum km_type type, pgprot_t prot)
29 30
30 pagefault_disable(); 31 pagefault_disable();
31 32
33 /*
34 * For non-PAT systems, promote PAGE_KERNEL_WC to PAGE_KERNEL_UC_MINUS.
35 * PAGE_KERNEL_WC maps to PWT, which translates to uncached if the
36 * MTRR is UC or WC. UC_MINUS gets the real intention, of the
37 * user, which is "WC if the MTRR is WC, UC if you can't do that."
38 */
39 if (!pat_enabled && pgprot_val(prot) == pgprot_val(PAGE_KERNEL_WC))
40 prot = PAGE_KERNEL_UC_MINUS;
41
32 idx = type + KM_TYPE_NR*smp_processor_id(); 42 idx = type + KM_TYPE_NR*smp_processor_id();
33 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx); 43 vaddr = __fix_to_virt(FIX_KMAP_BEGIN + idx);
34 set_pte(kmap_pte-idx, pfn_pte(pfn, prot)); 44 set_pte(kmap_pte-idx, pfn_pte(pfn, prot));
diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c
index bd85d42819e1..1448bcb7f22f 100644
--- a/arch/x86/mm/ioremap.c
+++ b/arch/x86/mm/ioremap.c
@@ -367,7 +367,7 @@ EXPORT_SYMBOL(ioremap_nocache);
367 * 367 *
368 * Must be freed with iounmap. 368 * Must be freed with iounmap.
369 */ 369 */
370void __iomem *ioremap_wc(unsigned long phys_addr, unsigned long size) 370void __iomem *ioremap_wc(resource_size_t phys_addr, unsigned long size)
371{ 371{
372 if (pat_enabled) 372 if (pat_enabled)
373 return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC, 373 return __ioremap_caller(phys_addr, size, _PAGE_CACHE_WC,
@@ -557,34 +557,9 @@ void __init early_ioremap_init(void)
557 } 557 }
558} 558}
559 559
560void __init early_ioremap_clear(void)
561{
562 pmd_t *pmd;
563
564 if (early_ioremap_debug)
565 printk(KERN_INFO "early_ioremap_clear()\n");
566
567 pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN));
568 pmd_clear(pmd);
569 paravirt_release_pte(__pa(bm_pte) >> PAGE_SHIFT);
570 __flush_tlb_all();
571}
572
573void __init early_ioremap_reset(void) 560void __init early_ioremap_reset(void)
574{ 561{
575 enum fixed_addresses idx;
576 unsigned long addr, phys;
577 pte_t *pte;
578
579 after_paging_init = 1; 562 after_paging_init = 1;
580 for (idx = FIX_BTMAP_BEGIN; idx >= FIX_BTMAP_END; idx--) {
581 addr = fix_to_virt(idx);
582 pte = early_ioremap_pte(addr);
583 if (pte_present(*pte)) {
584 phys = pte_val(*pte) & PAGE_MASK;
585 set_fixmap(idx, phys);
586 }
587 }
588} 563}
589 564
590static void __init __early_set_fixmap(enum fixed_addresses idx, 565static void __init __early_set_fixmap(enum fixed_addresses idx,
diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c
index 71a14f89f89e..08d140fbc31b 100644
--- a/arch/x86/mm/numa_64.c
+++ b/arch/x86/mm/numa_64.c
@@ -20,6 +20,12 @@
20#include <asm/acpi.h> 20#include <asm/acpi.h>
21#include <asm/k8.h> 21#include <asm/k8.h>
22 22
23#ifdef CONFIG_DEBUG_PER_CPU_MAPS
24# define DBG(x...) printk(KERN_DEBUG x)
25#else
26# define DBG(x...)
27#endif
28
23struct pglist_data *node_data[MAX_NUMNODES] __read_mostly; 29struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
24EXPORT_SYMBOL(node_data); 30EXPORT_SYMBOL(node_data);
25 31
@@ -33,6 +39,21 @@ int numa_off __initdata;
33static unsigned long __initdata nodemap_addr; 39static unsigned long __initdata nodemap_addr;
34static unsigned long __initdata nodemap_size; 40static unsigned long __initdata nodemap_size;
35 41
42DEFINE_PER_CPU(int, node_number) = 0;
43EXPORT_PER_CPU_SYMBOL(node_number);
44
45/*
46 * Map cpu index to node index
47 */
48DEFINE_EARLY_PER_CPU(int, x86_cpu_to_node_map, NUMA_NO_NODE);
49EXPORT_EARLY_PER_CPU_SYMBOL(x86_cpu_to_node_map);
50
51/*
52 * Which logical CPUs are on which nodes
53 */
54cpumask_t *node_to_cpumask_map;
55EXPORT_SYMBOL(node_to_cpumask_map);
56
36/* 57/*
37 * Given a shift value, try to populate memnodemap[] 58 * Given a shift value, try to populate memnodemap[]
38 * Returns : 59 * Returns :
@@ -640,3 +661,199 @@ void __init init_cpu_to_node(void)
640#endif 661#endif
641 662
642 663
664/*
665 * Allocate node_to_cpumask_map based on number of available nodes
666 * Requires node_possible_map to be valid.
667 *
668 * Note: node_to_cpumask() is not valid until after this is done.
669 * (Use CONFIG_DEBUG_PER_CPU_MAPS to check this.)
670 */
671void __init setup_node_to_cpumask_map(void)
672{
673 unsigned int node, num = 0;
674 cpumask_t *map;
675
676 /* setup nr_node_ids if not done yet */
677 if (nr_node_ids == MAX_NUMNODES) {
678 for_each_node_mask(node, node_possible_map)
679 num = node;
680 nr_node_ids = num + 1;
681 }
682
683 /* allocate the map */
684 map = alloc_bootmem_low(nr_node_ids * sizeof(cpumask_t));
685 DBG("node_to_cpumask_map at %p for %d nodes\n", map, nr_node_ids);
686
687 pr_debug("Node to cpumask map at %p for %d nodes\n",
688 map, nr_node_ids);
689
690 /* node_to_cpumask() will now work */
691 node_to_cpumask_map = map;
692}
693
694void __cpuinit numa_set_node(int cpu, int node)
695{
696 int *cpu_to_node_map = early_per_cpu_ptr(x86_cpu_to_node_map);
697
698 /* early setting, no percpu area yet */
699 if (cpu_to_node_map) {
700 cpu_to_node_map[cpu] = node;
701 return;
702 }
703
704#ifdef CONFIG_DEBUG_PER_CPU_MAPS
705 if (cpu >= nr_cpu_ids || !per_cpu_offset(cpu)) {
706 printk(KERN_ERR "numa_set_node: invalid cpu# (%d)\n", cpu);
707 dump_stack();
708 return;
709 }
710#endif
711 per_cpu(x86_cpu_to_node_map, cpu) = node;
712
713 if (node != NUMA_NO_NODE)
714 per_cpu(node_number, cpu) = node;
715}
716
717void __cpuinit numa_clear_node(int cpu)
718{
719 numa_set_node(cpu, NUMA_NO_NODE);
720}
721
722#ifndef CONFIG_DEBUG_PER_CPU_MAPS
723
724void __cpuinit numa_add_cpu(int cpu)
725{
726 cpu_set(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
727}
728
729void __cpuinit numa_remove_cpu(int cpu)
730{
731 cpu_clear(cpu, node_to_cpumask_map[early_cpu_to_node(cpu)]);
732}
733
734#else /* CONFIG_DEBUG_PER_CPU_MAPS */
735
736/*
737 * --------- debug versions of the numa functions ---------
738 */
739static void __cpuinit numa_set_cpumask(int cpu, int enable)
740{
741 int node = early_cpu_to_node(cpu);
742 cpumask_t *mask;
743 char buf[64];
744
745 if (node_to_cpumask_map == NULL) {
746 printk(KERN_ERR "node_to_cpumask_map NULL\n");
747 dump_stack();
748 return;
749 }
750
751 mask = &node_to_cpumask_map[node];
752 if (enable)
753 cpu_set(cpu, *mask);
754 else
755 cpu_clear(cpu, *mask);
756
757 cpulist_scnprintf(buf, sizeof(buf), mask);
758 printk(KERN_DEBUG "%s cpu %d node %d: mask now %s\n",
759 enable ? "numa_add_cpu" : "numa_remove_cpu", cpu, node, buf);
760}
761
762void __cpuinit numa_add_cpu(int cpu)
763{
764 numa_set_cpumask(cpu, 1);
765}
766
767void __cpuinit numa_remove_cpu(int cpu)
768{
769 numa_set_cpumask(cpu, 0);
770}
771
772int cpu_to_node(int cpu)
773{
774 if (early_per_cpu_ptr(x86_cpu_to_node_map)) {
775 printk(KERN_WARNING
776 "cpu_to_node(%d): usage too early!\n", cpu);
777 dump_stack();
778 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
779 }
780 return per_cpu(x86_cpu_to_node_map, cpu);
781}
782EXPORT_SYMBOL(cpu_to_node);
783
784/*
785 * Same function as cpu_to_node() but used if called before the
786 * per_cpu areas are setup.
787 */
788int early_cpu_to_node(int cpu)
789{
790 if (early_per_cpu_ptr(x86_cpu_to_node_map))
791 return early_per_cpu_ptr(x86_cpu_to_node_map)[cpu];
792
793 if (!per_cpu_offset(cpu)) {
794 printk(KERN_WARNING
795 "early_cpu_to_node(%d): no per_cpu area!\n", cpu);
796 dump_stack();
797 return NUMA_NO_NODE;
798 }
799 return per_cpu(x86_cpu_to_node_map, cpu);
800}
801
802
803/* empty cpumask */
804static const cpumask_t cpu_mask_none;
805
806/*
807 * Returns a pointer to the bitmask of CPUs on Node 'node'.
808 */
809const cpumask_t *cpumask_of_node(int node)
810{
811 if (node_to_cpumask_map == NULL) {
812 printk(KERN_WARNING
813 "cpumask_of_node(%d): no node_to_cpumask_map!\n",
814 node);
815 dump_stack();
816 return (const cpumask_t *)&cpu_online_map;
817 }
818 if (node >= nr_node_ids) {
819 printk(KERN_WARNING
820 "cpumask_of_node(%d): node > nr_node_ids(%d)\n",
821 node, nr_node_ids);
822 dump_stack();
823 return &cpu_mask_none;
824 }
825 return &node_to_cpumask_map[node];
826}
827EXPORT_SYMBOL(cpumask_of_node);
828
829/*
830 * Returns a bitmask of CPUs on Node 'node'.
831 *
832 * Side note: this function creates the returned cpumask on the stack
833 * so with a high NR_CPUS count, excessive stack space is used. The
834 * node_to_cpumask_ptr function should be used whenever possible.
835 */
836cpumask_t node_to_cpumask(int node)
837{
838 if (node_to_cpumask_map == NULL) {
839 printk(KERN_WARNING
840 "node_to_cpumask(%d): no node_to_cpumask_map!\n", node);
841 dump_stack();
842 return cpu_online_map;
843 }
844 if (node >= nr_node_ids) {
845 printk(KERN_WARNING
846 "node_to_cpumask(%d): node > nr_node_ids(%d)\n",
847 node, nr_node_ids);
848 dump_stack();
849 return cpu_mask_none;
850 }
851 return node_to_cpumask_map[node];
852}
853EXPORT_SYMBOL(node_to_cpumask);
854
855/*
856 * --------- end of debug versions of the numa functions ---------
857 */
858
859#endif /* CONFIG_DEBUG_PER_CPU_MAPS */
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index e89d24815f26..84ba74820ad6 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -534,6 +534,36 @@ out_unlock:
534 return 0; 534 return 0;
535} 535}
536 536
537static int __cpa_process_fault(struct cpa_data *cpa, unsigned long vaddr,
538 int primary)
539{
540 /*
541 * Ignore all non primary paths.
542 */
543 if (!primary)
544 return 0;
545
546 /*
547 * Ignore the NULL PTE for kernel identity mapping, as it is expected
548 * to have holes.
549 * Also set numpages to '1' indicating that we processed cpa req for
550 * one virtual address page and its pfn. TBD: numpages can be set based
551 * on the initial value and the level returned by lookup_address().
552 */
553 if (within(vaddr, PAGE_OFFSET,
554 PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))) {
555 cpa->numpages = 1;
556 cpa->pfn = __pa(vaddr) >> PAGE_SHIFT;
557 return 0;
558 } else {
559 WARN(1, KERN_WARNING "CPA: called for zero pte. "
560 "vaddr = %lx cpa->vaddr = %lx\n", vaddr,
561 *cpa->vaddr);
562
563 return -EFAULT;
564 }
565}
566
537static int __change_page_attr(struct cpa_data *cpa, int primary) 567static int __change_page_attr(struct cpa_data *cpa, int primary)
538{ 568{
539 unsigned long address; 569 unsigned long address;
@@ -549,17 +579,11 @@ static int __change_page_attr(struct cpa_data *cpa, int primary)
549repeat: 579repeat:
550 kpte = lookup_address(address, &level); 580 kpte = lookup_address(address, &level);
551 if (!kpte) 581 if (!kpte)
552 return 0; 582 return __cpa_process_fault(cpa, address, primary);
553 583
554 old_pte = *kpte; 584 old_pte = *kpte;
555 if (!pte_val(old_pte)) { 585 if (!pte_val(old_pte))
556 if (!primary) 586 return __cpa_process_fault(cpa, address, primary);
557 return 0;
558 WARN(1, KERN_WARNING "CPA: called for zero pte. "
559 "vaddr = %lx cpa->vaddr = %lx\n", address,
560 *cpa->vaddr);
561 return -EINVAL;
562 }
563 587
564 if (level == PG_LEVEL_4K) { 588 if (level == PG_LEVEL_4K) {
565 pte_t new_pte; 589 pte_t new_pte;
@@ -657,12 +681,7 @@ static int cpa_process_alias(struct cpa_data *cpa)
657 vaddr = *cpa->vaddr; 681 vaddr = *cpa->vaddr;
658 682
659 if (!(within(vaddr, PAGE_OFFSET, 683 if (!(within(vaddr, PAGE_OFFSET,
660 PAGE_OFFSET + (max_low_pfn_mapped << PAGE_SHIFT)) 684 PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT)))) {
661#ifdef CONFIG_X86_64
662 || within(vaddr, PAGE_OFFSET + (1UL<<32),
663 PAGE_OFFSET + (max_pfn_mapped << PAGE_SHIFT))
664#endif
665 )) {
666 685
667 alias_cpa = *cpa; 686 alias_cpa = *cpa;
668 temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT); 687 temp_cpa_vaddr = (unsigned long) __va(cpa->pfn << PAGE_SHIFT);
diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c
index 85cbd3cd3723..9127e31c7268 100644
--- a/arch/x86/mm/pat.c
+++ b/arch/x86/mm/pat.c
@@ -30,7 +30,7 @@
30#ifdef CONFIG_X86_PAT 30#ifdef CONFIG_X86_PAT
31int __read_mostly pat_enabled = 1; 31int __read_mostly pat_enabled = 1;
32 32
33void __cpuinit pat_disable(char *reason) 33void __cpuinit pat_disable(const char *reason)
34{ 34{
35 pat_enabled = 0; 35 pat_enabled = 0;
36 printk(KERN_INFO "%s\n", reason); 36 printk(KERN_INFO "%s\n", reason);
@@ -42,6 +42,11 @@ static int __init nopat(char *str)
42 return 0; 42 return 0;
43} 43}
44early_param("nopat", nopat); 44early_param("nopat", nopat);
45#else
46static inline void pat_disable(const char *reason)
47{
48 (void)reason;
49}
45#endif 50#endif
46 51
47 52
@@ -78,16 +83,20 @@ void pat_init(void)
78 if (!pat_enabled) 83 if (!pat_enabled)
79 return; 84 return;
80 85
81 /* Paranoia check. */ 86 if (!cpu_has_pat) {
82 if (!cpu_has_pat && boot_pat_state) { 87 if (!boot_pat_state) {
83 /* 88 pat_disable("PAT not supported by CPU.");
84 * If this happens we are on a secondary CPU, but 89 return;
85 * switched to PAT on the boot CPU. We have no way to 90 } else {
86 * undo PAT. 91 /*
87 */ 92 * If this happens we are on a secondary CPU, but
88 printk(KERN_ERR "PAT enabled, " 93 * switched to PAT on the boot CPU. We have no way to
89 "but not supported by secondary CPU\n"); 94 * undo PAT.
90 BUG(); 95 */
96 printk(KERN_ERR "PAT enabled, "
97 "but not supported by secondary CPU\n");
98 BUG();
99 }
91 } 100 }
92 101
93 /* Set PWT to Write-Combining. All other bits stay the same */ 102 /* Set PWT to Write-Combining. All other bits stay the same */
@@ -333,11 +342,23 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
333 req_type & _PAGE_CACHE_MASK); 342 req_type & _PAGE_CACHE_MASK);
334 } 343 }
335 344
336 is_range_ram = pagerange_is_ram(start, end); 345 if (new_type)
337 if (is_range_ram == 1) 346 *new_type = actual_type;
338 return reserve_ram_pages_type(start, end, req_type, new_type); 347
339 else if (is_range_ram < 0) 348 /*
340 return -EINVAL; 349 * For legacy reasons, some parts of the physical address range in the
350 * legacy 1MB region is treated as non-RAM (even when listed as RAM in
351 * the e820 tables). So we will track the memory attributes of this
352 * legacy 1MB region using the linear memtype_list always.
353 */
354 if (end >= ISA_END_ADDRESS) {
355 is_range_ram = pagerange_is_ram(start, end);
356 if (is_range_ram == 1)
357 return reserve_ram_pages_type(start, end, req_type,
358 new_type);
359 else if (is_range_ram < 0)
360 return -EINVAL;
361 }
341 362
342 new = kmalloc(sizeof(struct memtype), GFP_KERNEL); 363 new = kmalloc(sizeof(struct memtype), GFP_KERNEL);
343 if (!new) 364 if (!new)
@@ -347,9 +368,6 @@ int reserve_memtype(u64 start, u64 end, unsigned long req_type,
347 new->end = end; 368 new->end = end;
348 new->type = actual_type; 369 new->type = actual_type;
349 370
350 if (new_type)
351 *new_type = actual_type;
352
353 spin_lock(&memtype_lock); 371 spin_lock(&memtype_lock);
354 372
355 if (cached_entry && start >= cached_start) 373 if (cached_entry && start >= cached_start)
@@ -437,11 +455,19 @@ int free_memtype(u64 start, u64 end)
437 if (is_ISA_range(start, end - 1)) 455 if (is_ISA_range(start, end - 1))
438 return 0; 456 return 0;
439 457
440 is_range_ram = pagerange_is_ram(start, end); 458 /*
441 if (is_range_ram == 1) 459 * For legacy reasons, some parts of the physical address range in the
442 return free_ram_pages_type(start, end); 460 * legacy 1MB region is treated as non-RAM (even when listed as RAM in
443 else if (is_range_ram < 0) 461 * the e820 tables). So we will track the memory attributes of this
444 return -EINVAL; 462 * legacy 1MB region using the linear memtype_list always.
463 */
464 if (end >= ISA_END_ADDRESS) {
465 is_range_ram = pagerange_is_ram(start, end);
466 if (is_range_ram == 1)
467 return free_ram_pages_type(start, end);
468 else if (is_range_ram < 0)
469 return -EINVAL;
470 }
445 471
446 spin_lock(&memtype_lock); 472 spin_lock(&memtype_lock);
447 list_for_each_entry(entry, &memtype_list, nd) { 473 list_for_each_entry(entry, &memtype_list, nd) {
@@ -601,12 +627,13 @@ void unmap_devmem(unsigned long pfn, unsigned long size, pgprot_t vma_prot)
601 * Reserved non RAM regions only and after successful reserve_memtype, 627 * Reserved non RAM regions only and after successful reserve_memtype,
602 * this func also keeps identity mapping (if any) in sync with this new prot. 628 * this func also keeps identity mapping (if any) in sync with this new prot.
603 */ 629 */
604static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t vma_prot) 630static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t *vma_prot,
631 int strict_prot)
605{ 632{
606 int is_ram = 0; 633 int is_ram = 0;
607 int id_sz, ret; 634 int id_sz, ret;
608 unsigned long flags; 635 unsigned long flags;
609 unsigned long want_flags = (pgprot_val(vma_prot) & _PAGE_CACHE_MASK); 636 unsigned long want_flags = (pgprot_val(*vma_prot) & _PAGE_CACHE_MASK);
610 637
611 is_ram = pagerange_is_ram(paddr, paddr + size); 638 is_ram = pagerange_is_ram(paddr, paddr + size);
612 639
@@ -625,15 +652,24 @@ static int reserve_pfn_range(u64 paddr, unsigned long size, pgprot_t vma_prot)
625 return ret; 652 return ret;
626 653
627 if (flags != want_flags) { 654 if (flags != want_flags) {
628 free_memtype(paddr, paddr + size); 655 if (strict_prot || !is_new_memtype_allowed(want_flags, flags)) {
629 printk(KERN_ERR 656 free_memtype(paddr, paddr + size);
630 "%s:%d map pfn expected mapping type %s for %Lx-%Lx, got %s\n", 657 printk(KERN_ERR "%s:%d map pfn expected mapping type %s"
631 current->comm, current->pid, 658 " for %Lx-%Lx, got %s\n",
632 cattr_name(want_flags), 659 current->comm, current->pid,
633 (unsigned long long)paddr, 660 cattr_name(want_flags),
634 (unsigned long long)(paddr + size), 661 (unsigned long long)paddr,
635 cattr_name(flags)); 662 (unsigned long long)(paddr + size),
636 return -EINVAL; 663 cattr_name(flags));
664 return -EINVAL;
665 }
666 /*
667 * We allow returning different type than the one requested in
668 * non strict case.
669 */
670 *vma_prot = __pgprot((pgprot_val(*vma_prot) &
671 (~_PAGE_CACHE_MASK)) |
672 flags);
637 } 673 }
638 674
639 /* Need to keep identity mapping in sync */ 675 /* Need to keep identity mapping in sync */
@@ -689,6 +725,7 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
689 unsigned long vma_start = vma->vm_start; 725 unsigned long vma_start = vma->vm_start;
690 unsigned long vma_end = vma->vm_end; 726 unsigned long vma_end = vma->vm_end;
691 unsigned long vma_size = vma_end - vma_start; 727 unsigned long vma_size = vma_end - vma_start;
728 pgprot_t pgprot;
692 729
693 if (!pat_enabled) 730 if (!pat_enabled)
694 return 0; 731 return 0;
@@ -702,7 +739,8 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
702 WARN_ON_ONCE(1); 739 WARN_ON_ONCE(1);
703 return -EINVAL; 740 return -EINVAL;
704 } 741 }
705 return reserve_pfn_range(paddr, vma_size, __pgprot(prot)); 742 pgprot = __pgprot(prot);
743 return reserve_pfn_range(paddr, vma_size, &pgprot, 1);
706 } 744 }
707 745
708 /* reserve entire vma page by page, using pfn and prot from pte */ 746 /* reserve entire vma page by page, using pfn and prot from pte */
@@ -710,7 +748,8 @@ int track_pfn_vma_copy(struct vm_area_struct *vma)
710 if (follow_phys(vma, vma_start + i, 0, &prot, &paddr)) 748 if (follow_phys(vma, vma_start + i, 0, &prot, &paddr))
711 continue; 749 continue;
712 750
713 retval = reserve_pfn_range(paddr, PAGE_SIZE, __pgprot(prot)); 751 pgprot = __pgprot(prot);
752 retval = reserve_pfn_range(paddr, PAGE_SIZE, &pgprot, 1);
714 if (retval) 753 if (retval)
715 goto cleanup_ret; 754 goto cleanup_ret;
716 } 755 }
@@ -741,7 +780,7 @@ cleanup_ret:
741 * Note that this function can be called with caller trying to map only a 780 * Note that this function can be called with caller trying to map only a
742 * subrange/page inside the vma. 781 * subrange/page inside the vma.
743 */ 782 */
744int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot, 783int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t *prot,
745 unsigned long pfn, unsigned long size) 784 unsigned long pfn, unsigned long size)
746{ 785{
747 int retval = 0; 786 int retval = 0;
@@ -758,14 +797,14 @@ int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot,
758 if (is_linear_pfn_mapping(vma)) { 797 if (is_linear_pfn_mapping(vma)) {
759 /* reserve the whole chunk starting from vm_pgoff */ 798 /* reserve the whole chunk starting from vm_pgoff */
760 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT; 799 paddr = (resource_size_t)vma->vm_pgoff << PAGE_SHIFT;
761 return reserve_pfn_range(paddr, vma_size, prot); 800 return reserve_pfn_range(paddr, vma_size, prot, 0);
762 } 801 }
763 802
764 /* reserve page by page using pfn and size */ 803 /* reserve page by page using pfn and size */
765 base_paddr = (resource_size_t)pfn << PAGE_SHIFT; 804 base_paddr = (resource_size_t)pfn << PAGE_SHIFT;
766 for (i = 0; i < size; i += PAGE_SIZE) { 805 for (i = 0; i < size; i += PAGE_SIZE) {
767 paddr = base_paddr + i; 806 paddr = base_paddr + i;
768 retval = reserve_pfn_range(paddr, PAGE_SIZE, prot); 807 retval = reserve_pfn_range(paddr, PAGE_SIZE, prot, 0);
769 if (retval) 808 if (retval)
770 goto cleanup_ret; 809 goto cleanup_ret;
771 } 810 }
diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c
index 09737c8af074..15df1baee100 100644
--- a/arch/x86/mm/srat_64.c
+++ b/arch/x86/mm/srat_64.c
@@ -21,6 +21,7 @@
21#include <asm/numa.h> 21#include <asm/numa.h>
22#include <asm/e820.h> 22#include <asm/e820.h>
23#include <asm/genapic.h> 23#include <asm/genapic.h>
24#include <asm/uv/uv.h>
24 25
25int acpi_numa __initdata; 26int acpi_numa __initdata;
26 27
diff --git a/arch/x86/kernel/tlb_64.c b/arch/x86/mm/tlb.c
index f8be6f1d2e48..72a6d4ebe34d 100644
--- a/arch/x86/kernel/tlb_64.c
+++ b/arch/x86/mm/tlb.c
@@ -1,22 +1,18 @@
1#include <linux/init.h> 1#include <linux/init.h>
2 2
3#include <linux/mm.h> 3#include <linux/mm.h>
4#include <linux/delay.h>
5#include <linux/spinlock.h> 4#include <linux/spinlock.h>
6#include <linux/smp.h> 5#include <linux/smp.h>
7#include <linux/kernel_stat.h>
8#include <linux/mc146818rtc.h>
9#include <linux/interrupt.h> 6#include <linux/interrupt.h>
7#include <linux/module.h>
10 8
11#include <asm/mtrr.h>
12#include <asm/pgalloc.h>
13#include <asm/tlbflush.h> 9#include <asm/tlbflush.h>
14#include <asm/mmu_context.h> 10#include <asm/mmu_context.h>
15#include <asm/proto.h> 11#include <asm/apic.h>
16#include <asm/apicdef.h> 12#include <asm/uv/uv.h>
17#include <asm/idle.h> 13
18#include <asm/uv/uv_hub.h> 14DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
19#include <asm/uv/uv_bau.h> 15 = { &init_mm, 0, };
20 16
21#include <mach_ipi.h> 17#include <mach_ipi.h>
22/* 18/*
@@ -33,7 +29,7 @@
33 * To avoid global state use 8 different call vectors. 29 * To avoid global state use 8 different call vectors.
34 * Each CPU uses a specific vector to trigger flushes on other 30 * Each CPU uses a specific vector to trigger flushes on other
35 * CPUs. Depending on the received vector the target CPUs look into 31 * CPUs. Depending on the received vector the target CPUs look into
36 * the right per cpu variable for the flush data. 32 * the right array slot for the flush data.
37 * 33 *
38 * With more than 8 CPUs they are hashed to the 8 available 34 * With more than 8 CPUs they are hashed to the 8 available
39 * vectors. The limited global vector space forces us to this right now. 35 * vectors. The limited global vector space forces us to this right now.
@@ -43,18 +39,18 @@
43 39
44union smp_flush_state { 40union smp_flush_state {
45 struct { 41 struct {
46 cpumask_t flush_cpumask;
47 struct mm_struct *flush_mm; 42 struct mm_struct *flush_mm;
48 unsigned long flush_va; 43 unsigned long flush_va;
49 spinlock_t tlbstate_lock; 44 spinlock_t tlbstate_lock;
45 DECLARE_BITMAP(flush_cpumask, NR_CPUS);
50 }; 46 };
51 char pad[SMP_CACHE_BYTES]; 47 char pad[CONFIG_X86_INTERNODE_CACHE_BYTES];
52} ____cacheline_aligned; 48} ____cacheline_internodealigned_in_smp;
53 49
54/* State is put into the per CPU data section, but padded 50/* State is put into the per CPU data section, but padded
55 to a full cache line because other CPUs can access it and we don't 51 to a full cache line because other CPUs can access it and we don't
56 want false sharing in the per cpu data segment. */ 52 want false sharing in the per cpu data segment. */
57static DEFINE_PER_CPU(union smp_flush_state, flush_state); 53static union smp_flush_state flush_state[NUM_INVALIDATE_TLB_VECTORS];
58 54
59/* 55/*
60 * We cannot call mmdrop() because we are in interrupt context, 56 * We cannot call mmdrop() because we are in interrupt context,
@@ -62,9 +58,9 @@ static DEFINE_PER_CPU(union smp_flush_state, flush_state);
62 */ 58 */
63void leave_mm(int cpu) 59void leave_mm(int cpu)
64{ 60{
65 if (read_pda(mmu_state) == TLBSTATE_OK) 61 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
66 BUG(); 62 BUG();
67 cpu_clear(cpu, read_pda(active_mm)->cpu_vm_mask); 63 cpu_clear(cpu, percpu_read(cpu_tlbstate.active_mm)->cpu_vm_mask);
68 load_cr3(swapper_pg_dir); 64 load_cr3(swapper_pg_dir);
69} 65}
70EXPORT_SYMBOL_GPL(leave_mm); 66EXPORT_SYMBOL_GPL(leave_mm);
@@ -117,10 +113,20 @@ EXPORT_SYMBOL_GPL(leave_mm);
117 * Interrupts are disabled. 113 * Interrupts are disabled.
118 */ 114 */
119 115
120asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs) 116/*
117 * FIXME: use of asmlinkage is not consistent. On x86_64 it's noop
118 * but still used for documentation purpose but the usage is slightly
119 * inconsistent. On x86_32, asmlinkage is regparm(0) but interrupt
120 * entry calls in with the first parameter in %eax. Maybe define
121 * intrlinkage?
122 */
123#ifdef CONFIG_X86_64
124asmlinkage
125#endif
126void smp_invalidate_interrupt(struct pt_regs *regs)
121{ 127{
122 int cpu; 128 unsigned int cpu;
123 int sender; 129 unsigned int sender;
124 union smp_flush_state *f; 130 union smp_flush_state *f;
125 131
126 cpu = smp_processor_id(); 132 cpu = smp_processor_id();
@@ -129,9 +135,9 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
129 * Use that to determine where the sender put the data. 135 * Use that to determine where the sender put the data.
130 */ 136 */
131 sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START; 137 sender = ~regs->orig_ax - INVALIDATE_TLB_VECTOR_START;
132 f = &per_cpu(flush_state, sender); 138 f = &flush_state[sender];
133 139
134 if (!cpu_isset(cpu, f->flush_cpumask)) 140 if (!cpumask_test_cpu(cpu, to_cpumask(f->flush_cpumask)))
135 goto out; 141 goto out;
136 /* 142 /*
137 * This was a BUG() but until someone can quote me the 143 * This was a BUG() but until someone can quote me the
@@ -142,8 +148,8 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
142 * BUG(); 148 * BUG();
143 */ 149 */
144 150
145 if (f->flush_mm == read_pda(active_mm)) { 151 if (f->flush_mm == percpu_read(cpu_tlbstate.active_mm)) {
146 if (read_pda(mmu_state) == TLBSTATE_OK) { 152 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_OK) {
147 if (f->flush_va == TLB_FLUSH_ALL) 153 if (f->flush_va == TLB_FLUSH_ALL)
148 local_flush_tlb(); 154 local_flush_tlb();
149 else 155 else
@@ -153,23 +159,21 @@ asmlinkage void smp_invalidate_interrupt(struct pt_regs *regs)
153 } 159 }
154out: 160out:
155 ack_APIC_irq(); 161 ack_APIC_irq();
156 cpu_clear(cpu, f->flush_cpumask); 162 smp_mb__before_clear_bit();
163 cpumask_clear_cpu(cpu, to_cpumask(f->flush_cpumask));
164 smp_mb__after_clear_bit();
157 inc_irq_stat(irq_tlb_count); 165 inc_irq_stat(irq_tlb_count);
158} 166}
159 167
160void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm, 168static void flush_tlb_others_ipi(const struct cpumask *cpumask,
161 unsigned long va) 169 struct mm_struct *mm, unsigned long va)
162{ 170{
163 int sender; 171 unsigned int sender;
164 union smp_flush_state *f; 172 union smp_flush_state *f;
165 cpumask_t cpumask = *cpumaskp;
166
167 if (is_uv_system() && uv_flush_tlb_others(&cpumask, mm, va))
168 return;
169 173
170 /* Caller has disabled preemption */ 174 /* Caller has disabled preemption */
171 sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS; 175 sender = smp_processor_id() % NUM_INVALIDATE_TLB_VECTORS;
172 f = &per_cpu(flush_state, sender); 176 f = &flush_state[sender];
173 177
174 /* 178 /*
175 * Could avoid this lock when 179 * Could avoid this lock when
@@ -180,7 +184,8 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
180 184
181 f->flush_mm = mm; 185 f->flush_mm = mm;
182 f->flush_va = va; 186 f->flush_va = va;
183 cpus_or(f->flush_cpumask, cpumask, f->flush_cpumask); 187 cpumask_andnot(to_cpumask(f->flush_cpumask),
188 cpumask, cpumask_of(smp_processor_id()));
184 189
185 /* 190 /*
186 * Make the above memory operations globally visible before 191 * Make the above memory operations globally visible before
@@ -191,9 +196,10 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
191 * We have to send the IPI only to 196 * We have to send the IPI only to
192 * CPUs affected. 197 * CPUs affected.
193 */ 198 */
194 send_IPI_mask(&cpumask, INVALIDATE_TLB_VECTOR_START + sender); 199 send_IPI_mask(to_cpumask(f->flush_cpumask),
200 INVALIDATE_TLB_VECTOR_START + sender);
195 201
196 while (!cpus_empty(f->flush_cpumask)) 202 while (!cpumask_empty(to_cpumask(f->flush_cpumask)))
197 cpu_relax(); 203 cpu_relax();
198 204
199 f->flush_mm = NULL; 205 f->flush_mm = NULL;
@@ -201,12 +207,28 @@ void native_flush_tlb_others(const cpumask_t *cpumaskp, struct mm_struct *mm,
201 spin_unlock(&f->tlbstate_lock); 207 spin_unlock(&f->tlbstate_lock);
202} 208}
203 209
210void native_flush_tlb_others(const struct cpumask *cpumask,
211 struct mm_struct *mm, unsigned long va)
212{
213 if (is_uv_system()) {
214 unsigned int cpu;
215
216 cpu = get_cpu();
217 cpumask = uv_flush_tlb_others(cpumask, mm, va, cpu);
218 if (cpumask)
219 flush_tlb_others_ipi(cpumask, mm, va);
220 put_cpu();
221 return;
222 }
223 flush_tlb_others_ipi(cpumask, mm, va);
224}
225
204static int __cpuinit init_smp_flush(void) 226static int __cpuinit init_smp_flush(void)
205{ 227{
206 int i; 228 int i;
207 229
208 for_each_possible_cpu(i) 230 for (i = 0; i < ARRAY_SIZE(flush_state); i++)
209 spin_lock_init(&per_cpu(flush_state, i).tlbstate_lock); 231 spin_lock_init(&flush_state[i].tlbstate_lock);
210 232
211 return 0; 233 return 0;
212} 234}
@@ -215,25 +237,18 @@ core_initcall(init_smp_flush);
215void flush_tlb_current_task(void) 237void flush_tlb_current_task(void)
216{ 238{
217 struct mm_struct *mm = current->mm; 239 struct mm_struct *mm = current->mm;
218 cpumask_t cpu_mask;
219 240
220 preempt_disable(); 241 preempt_disable();
221 cpu_mask = mm->cpu_vm_mask;
222 cpu_clear(smp_processor_id(), cpu_mask);
223 242
224 local_flush_tlb(); 243 local_flush_tlb();
225 if (!cpus_empty(cpu_mask)) 244 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
226 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); 245 flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
227 preempt_enable(); 246 preempt_enable();
228} 247}
229 248
230void flush_tlb_mm(struct mm_struct *mm) 249void flush_tlb_mm(struct mm_struct *mm)
231{ 250{
232 cpumask_t cpu_mask;
233
234 preempt_disable(); 251 preempt_disable();
235 cpu_mask = mm->cpu_vm_mask;
236 cpu_clear(smp_processor_id(), cpu_mask);
237 252
238 if (current->active_mm == mm) { 253 if (current->active_mm == mm) {
239 if (current->mm) 254 if (current->mm)
@@ -241,8 +256,8 @@ void flush_tlb_mm(struct mm_struct *mm)
241 else 256 else
242 leave_mm(smp_processor_id()); 257 leave_mm(smp_processor_id());
243 } 258 }
244 if (!cpus_empty(cpu_mask)) 259 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
245 flush_tlb_others(cpu_mask, mm, TLB_FLUSH_ALL); 260 flush_tlb_others(&mm->cpu_vm_mask, mm, TLB_FLUSH_ALL);
246 261
247 preempt_enable(); 262 preempt_enable();
248} 263}
@@ -250,11 +265,8 @@ void flush_tlb_mm(struct mm_struct *mm)
250void flush_tlb_page(struct vm_area_struct *vma, unsigned long va) 265void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
251{ 266{
252 struct mm_struct *mm = vma->vm_mm; 267 struct mm_struct *mm = vma->vm_mm;
253 cpumask_t cpu_mask;
254 268
255 preempt_disable(); 269 preempt_disable();
256 cpu_mask = mm->cpu_vm_mask;
257 cpu_clear(smp_processor_id(), cpu_mask);
258 270
259 if (current->active_mm == mm) { 271 if (current->active_mm == mm) {
260 if (current->mm) 272 if (current->mm)
@@ -263,8 +275,8 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long va)
263 leave_mm(smp_processor_id()); 275 leave_mm(smp_processor_id());
264 } 276 }
265 277
266 if (!cpus_empty(cpu_mask)) 278 if (cpumask_any_but(&mm->cpu_vm_mask, smp_processor_id()) < nr_cpu_ids)
267 flush_tlb_others(cpu_mask, mm, va); 279 flush_tlb_others(&mm->cpu_vm_mask, mm, va);
268 280
269 preempt_enable(); 281 preempt_enable();
270} 282}
@@ -274,7 +286,7 @@ static void do_flush_tlb_all(void *info)
274 unsigned long cpu = smp_processor_id(); 286 unsigned long cpu = smp_processor_id();
275 287
276 __flush_tlb_all(); 288 __flush_tlb_all();
277 if (read_pda(mmu_state) == TLBSTATE_LAZY) 289 if (percpu_read(cpu_tlbstate.state) == TLBSTATE_LAZY)
278 leave_mm(cpu); 290 leave_mm(cpu);
279} 291}
280 292
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index f884740da318..5ead808dd70c 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -314,17 +314,7 @@ int pci_mmap_page_range(struct pci_dev *dev, struct vm_area_struct *vma,
314 return retval; 314 return retval;
315 315
316 if (flags != new_flags) { 316 if (flags != new_flags) {
317 /* 317 if (!is_new_memtype_allowed(flags, new_flags)) {
318 * Do not fallback to certain memory types with certain
319 * requested type:
320 * - request is uncached, return cannot be write-back
321 * - request is uncached, return cannot be write-combine
322 * - request is write-combine, return cannot be write-back
323 */
324 if ((flags == _PAGE_CACHE_UC_MINUS &&
325 (new_flags == _PAGE_CACHE_WB)) ||
326 (flags == _PAGE_CACHE_WC &&
327 new_flags == _PAGE_CACHE_WB)) {
328 free_memtype(addr, addr+len); 318 free_memtype(addr, addr+len);
329 return -EINVAL; 319 return -EINVAL;
330 } 320 }
diff --git a/arch/x86/scripts/strip-symbols b/arch/x86/scripts/strip-symbols
deleted file mode 100644
index a2f1ccb827c7..000000000000
--- a/arch/x86/scripts/strip-symbols
+++ /dev/null
@@ -1 +0,0 @@
1__cpu_vendor_dev_X86_VENDOR_*
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index bea215230b20..6b3f7eef57e3 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -634,35 +634,27 @@ static void xen_flush_tlb_single(unsigned long addr)
634 preempt_enable(); 634 preempt_enable();
635} 635}
636 636
637static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm, 637static void xen_flush_tlb_others(const struct cpumask *cpus,
638 unsigned long va) 638 struct mm_struct *mm, unsigned long va)
639{ 639{
640 struct { 640 struct {
641 struct mmuext_op op; 641 struct mmuext_op op;
642 cpumask_t mask; 642 DECLARE_BITMAP(mask, NR_CPUS);
643 } *args; 643 } *args;
644 cpumask_t cpumask = *cpus;
645 struct multicall_space mcs; 644 struct multicall_space mcs;
646 645
647 /* 646 BUG_ON(cpumask_empty(cpus));
648 * A couple of (to be removed) sanity checks:
649 *
650 * - current CPU must not be in mask
651 * - mask must exist :)
652 */
653 BUG_ON(cpus_empty(cpumask));
654 BUG_ON(cpu_isset(smp_processor_id(), cpumask));
655 BUG_ON(!mm); 647 BUG_ON(!mm);
656 648
657 /* If a CPU which we ran on has gone down, OK. */
658 cpus_and(cpumask, cpumask, cpu_online_map);
659 if (cpus_empty(cpumask))
660 return;
661
662 mcs = xen_mc_entry(sizeof(*args)); 649 mcs = xen_mc_entry(sizeof(*args));
663 args = mcs.args; 650 args = mcs.args;
664 args->mask = cpumask; 651 args->op.arg2.vcpumask = to_cpumask(args->mask);
665 args->op.arg2.vcpumask = &args->mask; 652
653 /* Remove us, and any offline CPUS. */
654 cpumask_and(to_cpumask(args->mask), cpus, cpu_online_mask);
655 cpumask_clear_cpu(smp_processor_id(), to_cpumask(args->mask));
656 if (unlikely(cpumask_empty(to_cpumask(args->mask))))
657 goto issue;
666 658
667 if (va == TLB_FLUSH_ALL) { 659 if (va == TLB_FLUSH_ALL) {
668 args->op.cmd = MMUEXT_TLB_FLUSH_MULTI; 660 args->op.cmd = MMUEXT_TLB_FLUSH_MULTI;
@@ -673,6 +665,7 @@ static void xen_flush_tlb_others(const cpumask_t *cpus, struct mm_struct *mm,
673 665
674 MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF); 666 MULTI_mmuext_op(mcs.mc, &args->op, 1, NULL, DOMID_SELF);
675 667
668issue:
676 xen_mc_issue(PARAVIRT_LAZY_MMU); 669 xen_mc_issue(PARAVIRT_LAZY_MMU);
677} 670}
678 671
@@ -702,17 +695,17 @@ static void xen_write_cr0(unsigned long cr0)
702 695
703static void xen_write_cr2(unsigned long cr2) 696static void xen_write_cr2(unsigned long cr2)
704{ 697{
705 x86_read_percpu(xen_vcpu)->arch.cr2 = cr2; 698 percpu_read(xen_vcpu)->arch.cr2 = cr2;
706} 699}
707 700
708static unsigned long xen_read_cr2(void) 701static unsigned long xen_read_cr2(void)
709{ 702{
710 return x86_read_percpu(xen_vcpu)->arch.cr2; 703 return percpu_read(xen_vcpu)->arch.cr2;
711} 704}
712 705
713static unsigned long xen_read_cr2_direct(void) 706static unsigned long xen_read_cr2_direct(void)
714{ 707{
715 return x86_read_percpu(xen_vcpu_info.arch.cr2); 708 return percpu_read(xen_vcpu_info.arch.cr2);
716} 709}
717 710
718static void xen_write_cr4(unsigned long cr4) 711static void xen_write_cr4(unsigned long cr4)
@@ -725,12 +718,12 @@ static void xen_write_cr4(unsigned long cr4)
725 718
726static unsigned long xen_read_cr3(void) 719static unsigned long xen_read_cr3(void)
727{ 720{
728 return x86_read_percpu(xen_cr3); 721 return percpu_read(xen_cr3);
729} 722}
730 723
731static void set_current_cr3(void *v) 724static void set_current_cr3(void *v)
732{ 725{
733 x86_write_percpu(xen_current_cr3, (unsigned long)v); 726 percpu_write(xen_current_cr3, (unsigned long)v);
734} 727}
735 728
736static void __xen_write_cr3(bool kernel, unsigned long cr3) 729static void __xen_write_cr3(bool kernel, unsigned long cr3)
@@ -755,7 +748,7 @@ static void __xen_write_cr3(bool kernel, unsigned long cr3)
755 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF); 748 MULTI_mmuext_op(mcs.mc, op, 1, NULL, DOMID_SELF);
756 749
757 if (kernel) { 750 if (kernel) {
758 x86_write_percpu(xen_cr3, cr3); 751 percpu_write(xen_cr3, cr3);
759 752
760 /* Update xen_current_cr3 once the batch has actually 753 /* Update xen_current_cr3 once the batch has actually
761 been submitted. */ 754 been submitted. */
@@ -771,7 +764,7 @@ static void xen_write_cr3(unsigned long cr3)
771 764
772 /* Update while interrupts are disabled, so its atomic with 765 /* Update while interrupts are disabled, so its atomic with
773 respect to ipis */ 766 respect to ipis */
774 x86_write_percpu(xen_cr3, cr3); 767 percpu_write(xen_cr3, cr3);
775 768
776 __xen_write_cr3(true, cr3); 769 __xen_write_cr3(true, cr3);
777 770
@@ -1314,7 +1307,6 @@ static const struct pv_mmu_ops xen_mmu_ops __initdata = {
1314 .ptep_modify_prot_commit = __ptep_modify_prot_commit, 1307 .ptep_modify_prot_commit = __ptep_modify_prot_commit,
1315 1308
1316 .pte_val = xen_pte_val, 1309 .pte_val = xen_pte_val,
1317 .pte_flags = native_pte_flags,
1318 .pgd_val = xen_pgd_val, 1310 .pgd_val = xen_pgd_val,
1319 1311
1320 .make_pte = xen_make_pte, 1312 .make_pte = xen_make_pte,
@@ -1652,7 +1644,6 @@ asmlinkage void __init xen_start_kernel(void)
1652#ifdef CONFIG_X86_64 1644#ifdef CONFIG_X86_64
1653 /* Disable until direct per-cpu data access. */ 1645 /* Disable until direct per-cpu data access. */
1654 have_vcpu_info_placement = 0; 1646 have_vcpu_info_placement = 0;
1655 x86_64_init_pda();
1656#endif 1647#endif
1657 1648
1658 xen_smp_init(); 1649 xen_smp_init();
diff --git a/arch/x86/xen/irq.c b/arch/x86/xen/irq.c
index bb042608c602..2e8271431e1a 100644
--- a/arch/x86/xen/irq.c
+++ b/arch/x86/xen/irq.c
@@ -39,7 +39,7 @@ static unsigned long xen_save_fl(void)
39 struct vcpu_info *vcpu; 39 struct vcpu_info *vcpu;
40 unsigned long flags; 40 unsigned long flags;
41 41
42 vcpu = x86_read_percpu(xen_vcpu); 42 vcpu = percpu_read(xen_vcpu);
43 43
44 /* flag has opposite sense of mask */ 44 /* flag has opposite sense of mask */
45 flags = !vcpu->evtchn_upcall_mask; 45 flags = !vcpu->evtchn_upcall_mask;
@@ -62,7 +62,7 @@ static void xen_restore_fl(unsigned long flags)
62 make sure we're don't switch CPUs between getting the vcpu 62 make sure we're don't switch CPUs between getting the vcpu
63 pointer and updating the mask. */ 63 pointer and updating the mask. */
64 preempt_disable(); 64 preempt_disable();
65 vcpu = x86_read_percpu(xen_vcpu); 65 vcpu = percpu_read(xen_vcpu);
66 vcpu->evtchn_upcall_mask = flags; 66 vcpu->evtchn_upcall_mask = flags;
67 preempt_enable_no_resched(); 67 preempt_enable_no_resched();
68 68
@@ -83,7 +83,7 @@ static void xen_irq_disable(void)
83 make sure we're don't switch CPUs between getting the vcpu 83 make sure we're don't switch CPUs between getting the vcpu
84 pointer and updating the mask. */ 84 pointer and updating the mask. */
85 preempt_disable(); 85 preempt_disable();
86 x86_read_percpu(xen_vcpu)->evtchn_upcall_mask = 1; 86 percpu_read(xen_vcpu)->evtchn_upcall_mask = 1;
87 preempt_enable_no_resched(); 87 preempt_enable_no_resched();
88} 88}
89 89
@@ -96,7 +96,7 @@ static void xen_irq_enable(void)
96 the caller is confused and is trying to re-enable interrupts 96 the caller is confused and is trying to re-enable interrupts
97 on an indeterminate processor. */ 97 on an indeterminate processor. */
98 98
99 vcpu = x86_read_percpu(xen_vcpu); 99 vcpu = percpu_read(xen_vcpu);
100 vcpu->evtchn_upcall_mask = 0; 100 vcpu->evtchn_upcall_mask = 0;
101 101
102 /* Doesn't matter if we get preempted here, because any 102 /* Doesn't matter if we get preempted here, because any
diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c
index 503c240e26c7..98cb9869eb24 100644
--- a/arch/x86/xen/mmu.c
+++ b/arch/x86/xen/mmu.c
@@ -1063,18 +1063,14 @@ static void drop_other_mm_ref(void *info)
1063 struct mm_struct *mm = info; 1063 struct mm_struct *mm = info;
1064 struct mm_struct *active_mm; 1064 struct mm_struct *active_mm;
1065 1065
1066#ifdef CONFIG_X86_64 1066 active_mm = percpu_read(cpu_tlbstate.active_mm);
1067 active_mm = read_pda(active_mm);
1068#else
1069 active_mm = __get_cpu_var(cpu_tlbstate).active_mm;
1070#endif
1071 1067
1072 if (active_mm == mm) 1068 if (active_mm == mm)
1073 leave_mm(smp_processor_id()); 1069 leave_mm(smp_processor_id());
1074 1070
1075 /* If this cpu still has a stale cr3 reference, then make sure 1071 /* If this cpu still has a stale cr3 reference, then make sure
1076 it has been flushed. */ 1072 it has been flushed. */
1077 if (x86_read_percpu(xen_current_cr3) == __pa(mm->pgd)) { 1073 if (percpu_read(xen_current_cr3) == __pa(mm->pgd)) {
1078 load_cr3(swapper_pg_dir); 1074 load_cr3(swapper_pg_dir);
1079 arch_flush_lazy_cpu_mode(); 1075 arch_flush_lazy_cpu_mode();
1080 } 1076 }
diff --git a/arch/x86/xen/multicalls.h b/arch/x86/xen/multicalls.h
index 858938241616..e786fa7f2615 100644
--- a/arch/x86/xen/multicalls.h
+++ b/arch/x86/xen/multicalls.h
@@ -39,7 +39,7 @@ static inline void xen_mc_issue(unsigned mode)
39 xen_mc_flush(); 39 xen_mc_flush();
40 40
41 /* restore flags saved in xen_mc_batch */ 41 /* restore flags saved in xen_mc_batch */
42 local_irq_restore(x86_read_percpu(xen_mc_irq_flags)); 42 local_irq_restore(percpu_read(xen_mc_irq_flags));
43} 43}
44 44
45/* Set up a callback to be called when the current batch is flushed */ 45/* Set up a callback to be called when the current batch is flushed */
diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c
index c44e2069c7c7..7735e3dd359c 100644
--- a/arch/x86/xen/smp.c
+++ b/arch/x86/xen/smp.c
@@ -50,11 +50,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id);
50 */ 50 */
51static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id) 51static irqreturn_t xen_reschedule_interrupt(int irq, void *dev_id)
52{ 52{
53#ifdef CONFIG_X86_32 53 inc_irq_stat(irq_resched_count);
54 __get_cpu_var(irq_stat).irq_resched_count++;
55#else
56 add_pda(irq_resched_count, 1);
57#endif
58 54
59 return IRQ_HANDLED; 55 return IRQ_HANDLED;
60} 56}
@@ -78,7 +74,7 @@ static __cpuinit void cpu_bringup(void)
78 xen_setup_cpu_clockevents(); 74 xen_setup_cpu_clockevents();
79 75
80 cpu_set(cpu, cpu_online_map); 76 cpu_set(cpu, cpu_online_map);
81 x86_write_percpu(cpu_state, CPU_ONLINE); 77 percpu_write(cpu_state, CPU_ONLINE);
82 wmb(); 78 wmb();
83 79
84 /* We can take interrupts now: we're officially "up". */ 80 /* We can take interrupts now: we're officially "up". */
@@ -283,22 +279,10 @@ static int __cpuinit xen_cpu_up(unsigned int cpu)
283 struct task_struct *idle = idle_task(cpu); 279 struct task_struct *idle = idle_task(cpu);
284 int rc; 280 int rc;
285 281
286#ifdef CONFIG_X86_64
287 /* Allocate node local memory for AP pdas */
288 WARN_ON(cpu == 0);
289 if (cpu > 0) {
290 rc = get_local_pda(cpu);
291 if (rc)
292 return rc;
293 }
294#endif
295
296#ifdef CONFIG_X86_32
297 init_gdt(cpu);
298 per_cpu(current_task, cpu) = idle; 282 per_cpu(current_task, cpu) = idle;
283#ifdef CONFIG_X86_32
299 irq_ctx_init(cpu); 284 irq_ctx_init(cpu);
300#else 285#else
301 cpu_pda(cpu)->pcurrent = idle;
302 clear_tsk_thread_flag(idle, TIF_FORK); 286 clear_tsk_thread_flag(idle, TIF_FORK);
303#endif 287#endif
304 xen_setup_timer(cpu); 288 xen_setup_timer(cpu);
@@ -445,11 +429,7 @@ static irqreturn_t xen_call_function_interrupt(int irq, void *dev_id)
445{ 429{
446 irq_enter(); 430 irq_enter();
447 generic_smp_call_function_interrupt(); 431 generic_smp_call_function_interrupt();
448#ifdef CONFIG_X86_32 432 inc_irq_stat(irq_call_count);
449 __get_cpu_var(irq_stat).irq_call_count++;
450#else
451 add_pda(irq_call_count, 1);
452#endif
453 irq_exit(); 433 irq_exit();
454 434
455 return IRQ_HANDLED; 435 return IRQ_HANDLED;
@@ -459,11 +439,7 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id)
459{ 439{
460 irq_enter(); 440 irq_enter();
461 generic_smp_call_function_single_interrupt(); 441 generic_smp_call_function_single_interrupt();
462#ifdef CONFIG_X86_32 442 inc_irq_stat(irq_call_count);
463 __get_cpu_var(irq_stat).irq_call_count++;
464#else
465 add_pda(irq_call_count, 1);
466#endif
467 irq_exit(); 443 irq_exit();
468 444
469 return IRQ_HANDLED; 445 return IRQ_HANDLED;
diff --git a/arch/x86/xen/suspend.c b/arch/x86/xen/suspend.c
index 212ffe012b76..95be7b434724 100644
--- a/arch/x86/xen/suspend.c
+++ b/arch/x86/xen/suspend.c
@@ -6,6 +6,7 @@
6 6
7#include <asm/xen/hypercall.h> 7#include <asm/xen/hypercall.h>
8#include <asm/xen/page.h> 8#include <asm/xen/page.h>
9#include <asm/fixmap.h>
9 10
10#include "xen-ops.h" 11#include "xen-ops.h"
11#include "mmu.h" 12#include "mmu.h"
diff --git a/arch/x86/xen/xen-asm_64.S b/arch/x86/xen/xen-asm_64.S
index 05794c566e87..d6fc51f4ce85 100644
--- a/arch/x86/xen/xen-asm_64.S
+++ b/arch/x86/xen/xen-asm_64.S
@@ -17,6 +17,7 @@
17#include <asm/processor-flags.h> 17#include <asm/processor-flags.h>
18#include <asm/errno.h> 18#include <asm/errno.h>
19#include <asm/segment.h> 19#include <asm/segment.h>
20#include <asm/percpu.h>
20 21
21#include <xen/interface/xen.h> 22#include <xen/interface/xen.h>
22 23
@@ -28,12 +29,10 @@
28 29
29#if 1 30#if 1
30/* 31/*
31 x86-64 does not yet support direct access to percpu variables 32 FIXME: x86_64 now can support direct access to percpu variables
32 via a segment override, so we just need to make sure this code 33 via a segment override. Update xen accordingly.
33 never gets used
34 */ 34 */
35#define BUG ud2a 35#define BUG ud2a
36#define PER_CPU_VAR(var, off) 0xdeadbeef
37#endif 36#endif
38 37
39/* 38/*
@@ -45,14 +44,14 @@ ENTRY(xen_irq_enable_direct)
45 BUG 44 BUG
46 45
47 /* Unmask events */ 46 /* Unmask events */
48 movb $0, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) 47 movb $0, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
49 48
50 /* Preempt here doesn't matter because that will deal with 49 /* Preempt here doesn't matter because that will deal with
51 any pending interrupts. The pending check may end up being 50 any pending interrupts. The pending check may end up being
52 run on the wrong CPU, but that doesn't hurt. */ 51 run on the wrong CPU, but that doesn't hurt. */
53 52
54 /* Test for pending */ 53 /* Test for pending */
55 testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending) 54 testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
56 jz 1f 55 jz 1f
57 56
582: call check_events 572: call check_events
@@ -69,7 +68,7 @@ ENDPATCH(xen_irq_enable_direct)
69ENTRY(xen_irq_disable_direct) 68ENTRY(xen_irq_disable_direct)
70 BUG 69 BUG
71 70
72 movb $1, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) 71 movb $1, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
73ENDPATCH(xen_irq_disable_direct) 72ENDPATCH(xen_irq_disable_direct)
74 ret 73 ret
75 ENDPROC(xen_irq_disable_direct) 74 ENDPROC(xen_irq_disable_direct)
@@ -87,7 +86,7 @@ ENDPATCH(xen_irq_disable_direct)
87ENTRY(xen_save_fl_direct) 86ENTRY(xen_save_fl_direct)
88 BUG 87 BUG
89 88
90 testb $0xff, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) 89 testb $0xff, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
91 setz %ah 90 setz %ah
92 addb %ah,%ah 91 addb %ah,%ah
93ENDPATCH(xen_save_fl_direct) 92ENDPATCH(xen_save_fl_direct)
@@ -107,13 +106,13 @@ ENTRY(xen_restore_fl_direct)
107 BUG 106 BUG
108 107
109 testb $X86_EFLAGS_IF>>8, %ah 108 testb $X86_EFLAGS_IF>>8, %ah
110 setz PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_mask) 109 setz PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_mask
111 /* Preempt here doesn't matter because that will deal with 110 /* Preempt here doesn't matter because that will deal with
112 any pending interrupts. The pending check may end up being 111 any pending interrupts. The pending check may end up being
113 run on the wrong CPU, but that doesn't hurt. */ 112 run on the wrong CPU, but that doesn't hurt. */
114 113
115 /* check for unmasked and pending */ 114 /* check for unmasked and pending */
116 cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info, XEN_vcpu_info_pending) 115 cmpw $0x0001, PER_CPU_VAR(xen_vcpu_info) + XEN_vcpu_info_pending
117 jz 1f 116 jz 1f
1182: call check_events 1172: call check_events
1191: 1181:
@@ -195,11 +194,11 @@ RELOC(xen_sysexit, 1b+1)
195ENTRY(xen_sysret64) 194ENTRY(xen_sysret64)
196 /* We're already on the usermode stack at this point, but still 195 /* We're already on the usermode stack at this point, but still
197 with the kernel gs, so we can easily switch back */ 196 with the kernel gs, so we can easily switch back */
198 movq %rsp, %gs:pda_oldrsp 197 movq %rsp, PER_CPU_VAR(old_rsp)
199 movq %gs:pda_kernelstack,%rsp 198 movq PER_CPU_VAR(kernel_stack),%rsp
200 199
201 pushq $__USER_DS 200 pushq $__USER_DS
202 pushq %gs:pda_oldrsp 201 pushq PER_CPU_VAR(old_rsp)
203 pushq %r11 202 pushq %r11
204 pushq $__USER_CS 203 pushq $__USER_CS
205 pushq %rcx 204 pushq %rcx
@@ -212,11 +211,11 @@ RELOC(xen_sysret64, 1b+1)
212ENTRY(xen_sysret32) 211ENTRY(xen_sysret32)
213 /* We're already on the usermode stack at this point, but still 212 /* We're already on the usermode stack at this point, but still
214 with the kernel gs, so we can easily switch back */ 213 with the kernel gs, so we can easily switch back */
215 movq %rsp, %gs:pda_oldrsp 214 movq %rsp, PER_CPU_VAR(old_rsp)
216 movq %gs:pda_kernelstack, %rsp 215 movq PER_CPU_VAR(kernel_stack), %rsp
217 216
218 pushq $__USER32_DS 217 pushq $__USER32_DS
219 pushq %gs:pda_oldrsp 218 pushq PER_CPU_VAR(old_rsp)
220 pushq %r11 219 pushq %r11
221 pushq $__USER32_CS 220 pushq $__USER32_CS
222 pushq %rcx 221 pushq %rcx