aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/kernel-parameters.txt6
-rw-r--r--arch/um/sys-x86_64/syscall_table.c4
-rw-r--r--arch/x86/Kconfig8
-rw-r--r--arch/x86/Kconfig.cpu54
-rw-r--r--arch/x86/boot/cpu.c17
-rw-r--r--arch/x86/boot/mkcpustr.c40
-rw-r--r--arch/x86/ia32/ia32_signal.c51
-rw-r--r--arch/x86/kernel/Makefile5
-rw-r--r--arch/x86/kernel/acpi/boot.c8
-rw-r--r--arch/x86/kernel/apic_32.c437
-rw-r--r--arch/x86/kernel/apic_64.c626
-rw-r--r--arch/x86/kernel/cpu/Makefile34
-rw-r--r--arch/x86/kernel/cpu/addon_cpuid_features.c88
-rw-r--r--arch/x86/kernel/cpu/amd.c548
-rw-r--r--arch/x86/kernel/cpu/amd_64.c224
-rw-r--r--arch/x86/kernel/cpu/centaur.c4
-rw-r--r--arch/x86/kernel/cpu/centaur_64.c6
-rw-r--r--arch/x86/kernel/cpu/cmpxchg.c72
-rw-r--r--arch/x86/kernel/cpu/common.c973
-rw-r--r--arch/x86/kernel/cpu/common_64.c763
-rw-r--r--arch/x86/kernel/cpu/cpu.h19
-rw-r--r--arch/x86/kernel/cpu/cyrix.c23
-rw-r--r--arch/x86/kernel/cpu/feature_names.c84
-rw-r--r--arch/x86/kernel/cpu/intel.c364
-rw-r--r--arch/x86/kernel/cpu/intel_64.c95
-rw-r--r--arch/x86/kernel/cpu/intel_cacheinfo.c169
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_64.c2
-rw-r--r--arch/x86/kernel/cpu/mkcapflags.pl32
-rw-r--r--arch/x86/kernel/cpu/powerflags.c20
-rw-r--r--arch/x86/kernel/cpu/transmeta.c32
-rw-r--r--arch/x86/kernel/cpu/umc.c3
-rw-r--r--arch/x86/kernel/e820.c28
-rw-r--r--arch/x86/kernel/es7000_32.c (renamed from arch/x86/mach-es7000/es7000plat.c)87
-rw-r--r--arch/x86/kernel/genapic_64.c88
-rw-r--r--arch/x86/kernel/genapic_flat_64.c62
-rw-r--r--arch/x86/kernel/genx2apic_cluster.c159
-rw-r--r--arch/x86/kernel/genx2apic_phys.c154
-rw-r--r--arch/x86/kernel/genx2apic_uv_x.c70
-rw-r--r--arch/x86/kernel/i387.c154
-rw-r--r--arch/x86/kernel/i8259.c24
-rw-r--r--arch/x86/kernel/io_apic_32.c47
-rw-r--r--arch/x86/kernel/io_apic_64.c639
-rw-r--r--arch/x86/kernel/irqinit_32.c49
-rw-r--r--arch/x86/kernel/mpparse.c2
-rw-r--r--arch/x86/kernel/numaq_32.c7
-rw-r--r--arch/x86/kernel/paravirt.c2
-rw-r--r--arch/x86/kernel/process.c1
-rw-r--r--arch/x86/kernel/setup.c2
-rw-r--r--arch/x86/kernel/sigframe.h14
-rw-r--r--arch/x86/kernel/signal_32.c45
-rw-r--r--arch/x86/kernel/signal_64.c95
-rw-r--r--arch/x86/kernel/smpboot.c38
-rw-r--r--arch/x86/kernel/summit_32.c2
-rw-r--r--arch/x86/kernel/traps_32.c1
-rw-r--r--arch/x86/kernel/traps_64.c6
-rw-r--r--arch/x86/kernel/vmi_32.c4
-rw-r--r--arch/x86/kernel/vmlinux_32.lds.S9
-rw-r--r--arch/x86/kernel/vmlinux_64.lds.S9
-rw-r--r--arch/x86/kernel/xsave.c316
-rw-r--r--arch/x86/kvm/vmx.h15
-rw-r--r--arch/x86/lguest/boot.c38
-rw-r--r--arch/x86/lib/Makefile3
-rw-r--r--arch/x86/lib/usercopy_32.c7
-rw-r--r--arch/x86/mach-default/setup.c15
-rw-r--r--arch/x86/mach-es7000/Makefile5
-rw-r--r--arch/x86/mach-es7000/es7000.h114
-rw-r--r--arch/x86/mach-generic/Makefile1
-rw-r--r--arch/x86/mach-generic/bigsmp.c9
-rw-r--r--arch/x86/mach-generic/es7000.c13
-rw-r--r--arch/x86/mach-generic/numaq.c12
-rw-r--r--arch/x86/mach-generic/summit.c11
-rw-r--r--arch/x86/mm/init_64.c56
-rw-r--r--arch/x86/pci/acpi.c5
-rw-r--r--arch/x86/pci/i386.c3
-rw-r--r--arch/x86/pci/mmconfig-shared.c12
-rw-r--r--arch/x86/power/cpu_32.c7
-rw-r--r--arch/x86/power/cpu_64.c7
-rw-r--r--arch/x86/xen/enlighten.c45
-rw-r--r--drivers/pci/Makefile2
-rw-r--r--drivers/pci/dma_remapping.h157
-rw-r--r--drivers/pci/dmar.c397
-rw-r--r--drivers/pci/intel-iommu.c185
-rw-r--r--drivers/pci/intel-iommu.h233
-rw-r--r--drivers/pci/intr_remapping.c471
-rw-r--r--drivers/pci/intr_remapping.h8
-rw-r--r--include/asm-generic/vmlinux.lds.h1
-rw-r--r--include/asm-x86/apic.h65
-rw-r--r--include/asm-x86/apicdef.h3
-rw-r--r--include/asm-x86/arch_hooks.h2
-rw-r--r--include/asm-x86/bigsmp/apic.h (renamed from include/asm-x86/mach-bigsmp/mach_apic.h)10
-rw-r--r--include/asm-x86/bigsmp/apicdef.h13
-rw-r--r--include/asm-x86/bigsmp/ipi.h (renamed from include/asm-x86/mach-bigsmp/mach_ipi.h)6
-rw-r--r--include/asm-x86/bugs.h5
-rw-r--r--include/asm-x86/cpufeature.h118
-rw-r--r--include/asm-x86/e820.h2
-rw-r--r--include/asm-x86/es7000/apic.h (renamed from include/asm-x86/mach-es7000/mach_apic.h)32
-rw-r--r--include/asm-x86/es7000/apicdef.h13
-rw-r--r--include/asm-x86/es7000/ipi.h (renamed from include/asm-x86/mach-es7000/mach_ipi.h)6
-rw-r--r--include/asm-x86/es7000/mpparse.h (renamed from include/asm-x86/mach-es7000/mach_mpparse.h)6
-rw-r--r--include/asm-x86/es7000/wakecpu.h (renamed from include/asm-x86/mach-es7000/mach_wakecpu.h)8
-rw-r--r--include/asm-x86/genapic_64.h8
-rw-r--r--include/asm-x86/hw_irq.h3
-rw-r--r--include/asm-x86/i387.h84
-rw-r--r--include/asm-x86/i8259.h3
-rw-r--r--include/asm-x86/io_apic.h20
-rw-r--r--include/asm-x86/ipi.h16
-rw-r--r--include/asm-x86/irq_remapping.h8
-rw-r--r--include/asm-x86/mach-bigsmp/mach_apicdef.h13
-rw-r--r--include/asm-x86/mach-default/mach_apic.h4
-rw-r--r--include/asm-x86/mach-default/mach_apicdef.h6
-rw-r--r--include/asm-x86/mach-es7000/mach_apicdef.h13
-rw-r--r--include/asm-x86/mach-numaq/mach_mpparse.h7
-rw-r--r--include/asm-x86/mach-summit/mach_apicdef.h13
-rw-r--r--include/asm-x86/mpspec.h3
-rw-r--r--include/asm-x86/msidef.h4
-rw-r--r--include/asm-x86/msr-index.h16
-rw-r--r--include/asm-x86/numaq/apic.h (renamed from include/asm-x86/mach-numaq/mach_apic.h)6
-rw-r--r--include/asm-x86/numaq/apicdef.h (renamed from include/asm-x86/mach-numaq/mach_apicdef.h)6
-rw-r--r--include/asm-x86/numaq/ipi.h (renamed from include/asm-x86/mach-numaq/mach_ipi.h)6
-rw-r--r--include/asm-x86/numaq/mpparse.h7
-rw-r--r--include/asm-x86/numaq/wakecpu.h (renamed from include/asm-x86/mach-numaq/mach_wakecpu.h)6
-rw-r--r--include/asm-x86/paravirt.h19
-rw-r--r--include/asm-x86/processor-cyrix.h8
-rw-r--r--include/asm-x86/processor-flags.h1
-rw-r--r--include/asm-x86/processor.h27
-rw-r--r--include/asm-x86/setup.h1
-rw-r--r--include/asm-x86/sigcontext.h87
-rw-r--r--include/asm-x86/sigcontext32.h6
-rw-r--r--include/asm-x86/smp.h17
-rw-r--r--include/asm-x86/summit/apic.h (renamed from include/asm-x86/mach-summit/mach_apic.h)24
-rw-r--r--include/asm-x86/summit/apicdef.h13
-rw-r--r--include/asm-x86/summit/ipi.h (renamed from include/asm-x86/mach-summit/mach_ipi.h)6
-rw-r--r--include/asm-x86/summit/irq_vectors_limits.h (renamed from include/asm-x86/mach-summit/irq_vectors_limits.h)6
-rw-r--r--include/asm-x86/summit/mpparse.h (renamed from include/asm-x86/mach-summit/mach_mpparse.h)13
-rw-r--r--include/asm-x86/thread_info.h1
-rw-r--r--include/asm-x86/ucontext.h6
-rw-r--r--include/asm-x86/xcr.h49
-rw-r--r--include/asm-x86/xsave.h118
-rw-r--r--include/linux/dmar.h127
-rw-r--r--include/linux/ioport.h3
-rw-r--r--include/linux/irq.h1
-rw-r--r--include/linux/percpu.h7
-rw-r--r--kernel/irq/manage.c9
-rw-r--r--kernel/resource.c68
144 files changed, 6628 insertions, 3287 deletions
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 2ca9c8f8c8d8..25efbaf1f59b 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1428,6 +1428,12 @@ and is between 256 and 4096 characters. It is defined in the file
1428 1428
1429 nolapic_timer [X86-32,APIC] Do not use the local APIC timer. 1429 nolapic_timer [X86-32,APIC] Do not use the local APIC timer.
1430 1430
1431 nox2apic [X86-64,APIC] Do not enable x2APIC mode.
1432
1433 x2apic_phys [X86-64,APIC] Use x2apic physical mode instead of
1434 default x2apic cluster mode on platforms
1435 supporting x2apic.
1436
1431 noltlbs [PPC] Do not use large page/tlb entries for kernel 1437 noltlbs [PPC] Do not use large page/tlb entries for kernel
1432 lowmem mapping on PPC40x. 1438 lowmem mapping on PPC40x.
1433 1439
diff --git a/arch/um/sys-x86_64/syscall_table.c b/arch/um/sys-x86_64/syscall_table.c
index c128eb897008..32f5fbe2d0d2 100644
--- a/arch/um/sys-x86_64/syscall_table.c
+++ b/arch/um/sys-x86_64/syscall_table.c
@@ -41,12 +41,12 @@
41#define stub_rt_sigreturn sys_rt_sigreturn 41#define stub_rt_sigreturn sys_rt_sigreturn
42 42
43#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ; 43#define __SYSCALL(nr, sym) extern asmlinkage void sym(void) ;
44#undef _ASM_X86_64_UNISTD_H_ 44#undef ASM_X86__UNISTD_64_H
45#include <asm-x86/unistd_64.h> 45#include <asm-x86/unistd_64.h>
46 46
47#undef __SYSCALL 47#undef __SYSCALL
48#define __SYSCALL(nr, sym) [ nr ] = sym, 48#define __SYSCALL(nr, sym) [ nr ] = sym,
49#undef _ASM_X86_64_UNISTD_H_ 49#undef ASM_X86__UNISTD_64_H
50 50
51typedef void (*sys_call_ptr_t)(void); 51typedef void (*sys_call_ptr_t)(void);
52 52
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 0d7cdbbfc1ee..44d4f2130d01 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1689,6 +1689,14 @@ config DMAR_FLOPPY_WA
1689 workaround will setup a 1:1 mapping for the first 1689 workaround will setup a 1:1 mapping for the first
1690 16M to make floppy (an ISA device) work. 1690 16M to make floppy (an ISA device) work.
1691 1691
1692config INTR_REMAP
1693 bool "Support for Interrupt Remapping (EXPERIMENTAL)"
1694 depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL
1695 help
1696 Supports Interrupt remapping for IO-APIC and MSI devices.
1697 To use x2apic mode in the CPU's which support x2APIC enhancements or
1698 to support platforms with CPU's having > 8 bit APIC ID, say Y.
1699
1692source "drivers/pci/pcie/Kconfig" 1700source "drivers/pci/pcie/Kconfig"
1693 1701
1694source "drivers/pci/Kconfig" 1702source "drivers/pci/Kconfig"
diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu
index 60a85768cfcb..f8843c3ae77d 100644
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@@ -419,6 +419,60 @@ config X86_DEBUGCTLMSR
419 def_bool y 419 def_bool y
420 depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386) 420 depends on !(MK6 || MWINCHIPC6 || MWINCHIP2 || MWINCHIP3D || MCYRIXIII || M586MMX || M586TSC || M586 || M486 || M386)
421 421
422menuconfig PROCESSOR_SELECT
423 default y
424 bool "Supported processor vendors" if EMBEDDED
425 help
426 This lets you choose what x86 vendor support code your kernel
427 will include.
428
429config CPU_SUP_INTEL
430 default y
431 bool "Support Intel processors" if PROCESSOR_SELECT
432 help
433 This enables extended support for Intel processors
434
435config CPU_SUP_CYRIX_32
436 default y
437 bool "Support Cyrix processors" if PROCESSOR_SELECT
438 depends on !64BIT
439 help
440 This enables extended support for Cyrix processors
441
442config CPU_SUP_AMD
443 default y
444 bool "Support AMD processors" if PROCESSOR_SELECT
445 help
446 This enables extended support for AMD processors
447
448config CPU_SUP_CENTAUR_32
449 default y
450 bool "Support Centaur processors" if PROCESSOR_SELECT
451 depends on !64BIT
452 help
453 This enables extended support for Centaur processors
454
455config CPU_SUP_CENTAUR_64
456 default y
457 bool "Support Centaur processors" if PROCESSOR_SELECT
458 depends on 64BIT
459 help
460 This enables extended support for Centaur processors
461
462config CPU_SUP_TRANSMETA_32
463 default y
464 bool "Support Transmeta processors" if PROCESSOR_SELECT
465 depends on !64BIT
466 help
467 This enables extended support for Transmeta processors
468
469config CPU_SUP_UMC_32
470 default y
471 bool "Support UMC processors" if PROCESSOR_SELECT
472 depends on !64BIT
473 help
474 This enables extended support for UMC processors
475
422config X86_DS 476config X86_DS
423 bool "Debug Store support" 477 bool "Debug Store support"
424 default y 478 default y
diff --git a/arch/x86/boot/cpu.c b/arch/x86/boot/cpu.c
index 75298fe2edca..6ec6bb6e9957 100644
--- a/arch/x86/boot/cpu.c
+++ b/arch/x86/boot/cpu.c
@@ -59,17 +59,18 @@ int validate_cpu(void)
59 u32 e = err_flags[i]; 59 u32 e = err_flags[i];
60 60
61 for (j = 0; j < 32; j++) { 61 for (j = 0; j < 32; j++) {
62 int n = (i << 5)+j; 62 if (msg_strs[0] < i ||
63 if (*msg_strs < n) { 63 (msg_strs[0] == i && msg_strs[1] < j)) {
64 /* Skip to the next string */ 64 /* Skip to the next string */
65 do { 65 msg_strs += 2;
66 msg_strs++; 66 while (*msg_strs++)
67 } while (*msg_strs); 67 ;
68 msg_strs++;
69 } 68 }
70 if (e & 1) { 69 if (e & 1) {
71 if (*msg_strs == n && msg_strs[1]) 70 if (msg_strs[0] == i &&
72 printf("%s ", msg_strs+1); 71 msg_strs[1] == j &&
72 msg_strs[2])
73 printf("%s ", msg_strs+2);
73 else 74 else
74 printf("%d:%d ", i, j); 75 printf("%d:%d ", i, j);
75 } 76 }
diff --git a/arch/x86/boot/mkcpustr.c b/arch/x86/boot/mkcpustr.c
index bbe76953bae9..8ef60f20b371 100644
--- a/arch/x86/boot/mkcpustr.c
+++ b/arch/x86/boot/mkcpustr.c
@@ -15,33 +15,33 @@
15 15
16#include <stdio.h> 16#include <stdio.h>
17 17
18#include "../kernel/cpu/feature_names.c" 18#include "../kernel/cpu/capflags.c"
19
20#if NCAPFLAGS > 8
21# error "Need to adjust the boot code handling of CPUID strings"
22#endif
23 19
24int main(void) 20int main(void)
25{ 21{
26 int i; 22 int i, j;
27 const char *str; 23 const char *str;
28 24
29 printf("static const char x86_cap_strs[] = \n"); 25 printf("static const char x86_cap_strs[] = \n");
30 26
31 for (i = 0; i < NCAPINTS*32; i++) { 27 for (i = 0; i < NCAPINTS; i++) {
32 str = x86_cap_flags[i]; 28 for (j = 0; j < 32; j++) {
33 29 str = x86_cap_flags[i*32+j];
34 if (i == NCAPINTS*32-1) { 30
35 /* The last entry must be unconditional; this 31 if (i == NCAPINTS-1 && j == 31) {
36 also consumes the compiler-added null character */ 32 /* The last entry must be unconditional; this
37 if (!str) 33 also consumes the compiler-added null
38 str = ""; 34 character */
39 printf("\t\"\\x%02x\"\"%s\"\n", i, str); 35 if (!str)
40 } else if (str) { 36 str = "";
41 printf("#if REQUIRED_MASK%d & (1 << %d)\n" 37 printf("\t\"\\x%02x\\x%02x\"\"%s\"\n",
42 "\t\"\\x%02x\"\"%s\\0\"\n" 38 i, j, str);
43 "#endif\n", 39 } else if (str) {
44 i >> 5, i & 31, i, str); 40 printf("#if REQUIRED_MASK%d & (1 << %d)\n"
41 "\t\"\\x%02x\\x%02x\"\"%s\\0\"\n"
42 "#endif\n",
43 i, j, i, j, str);
44 }
45 } 45 }
46 } 46 }
47 printf("\t;\n"); 47 printf("\t;\n");
diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c
index f1a2ac777faf..8d64c1bc8474 100644
--- a/arch/x86/ia32/ia32_signal.c
+++ b/arch/x86/ia32/ia32_signal.c
@@ -179,9 +179,10 @@ struct sigframe
179 u32 pretcode; 179 u32 pretcode;
180 int sig; 180 int sig;
181 struct sigcontext_ia32 sc; 181 struct sigcontext_ia32 sc;
182 struct _fpstate_ia32 fpstate; 182 struct _fpstate_ia32 fpstate_unused; /* look at kernel/sigframe.h */
183 unsigned int extramask[_COMPAT_NSIG_WORDS-1]; 183 unsigned int extramask[_COMPAT_NSIG_WORDS-1];
184 char retcode[8]; 184 char retcode[8];
185 /* fp state follows here */
185}; 186};
186 187
187struct rt_sigframe 188struct rt_sigframe
@@ -192,8 +193,8 @@ struct rt_sigframe
192 u32 puc; 193 u32 puc;
193 compat_siginfo_t info; 194 compat_siginfo_t info;
194 struct ucontext_ia32 uc; 195 struct ucontext_ia32 uc;
195 struct _fpstate_ia32 fpstate;
196 char retcode[8]; 196 char retcode[8];
197 /* fp state follows here */
197}; 198};
198 199
199#define COPY(x) { \ 200#define COPY(x) { \
@@ -215,7 +216,7 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
215 unsigned int *peax) 216 unsigned int *peax)
216{ 217{
217 unsigned int tmpflags, gs, oldgs, err = 0; 218 unsigned int tmpflags, gs, oldgs, err = 0;
218 struct _fpstate_ia32 __user *buf; 219 void __user *buf;
219 u32 tmp; 220 u32 tmp;
220 221
221 /* Always make any pending restarted system calls return -EINTR */ 222 /* Always make any pending restarted system calls return -EINTR */
@@ -259,26 +260,12 @@ static int ia32_restore_sigcontext(struct pt_regs *regs,
259 260
260 err |= __get_user(tmp, &sc->fpstate); 261 err |= __get_user(tmp, &sc->fpstate);
261 buf = compat_ptr(tmp); 262 buf = compat_ptr(tmp);
262 if (buf) { 263 err |= restore_i387_xstate_ia32(buf);
263 if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
264 goto badframe;
265 err |= restore_i387_ia32(buf);
266 } else {
267 struct task_struct *me = current;
268
269 if (used_math()) {
270 clear_fpu(me);
271 clear_used_math();
272 }
273 }
274 264
275 err |= __get_user(tmp, &sc->ax); 265 err |= __get_user(tmp, &sc->ax);
276 *peax = tmp; 266 *peax = tmp;
277 267
278 return err; 268 return err;
279
280badframe:
281 return 1;
282} 269}
283 270
284asmlinkage long sys32_sigreturn(struct pt_regs *regs) 271asmlinkage long sys32_sigreturn(struct pt_regs *regs)
@@ -350,7 +337,7 @@ badframe:
350 */ 337 */
351 338
352static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc, 339static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
353 struct _fpstate_ia32 __user *fpstate, 340 void __user *fpstate,
354 struct pt_regs *regs, unsigned int mask) 341 struct pt_regs *regs, unsigned int mask)
355{ 342{
356 int tmp, err = 0; 343 int tmp, err = 0;
@@ -380,7 +367,7 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
380 err |= __put_user((u32)regs->flags, &sc->flags); 367 err |= __put_user((u32)regs->flags, &sc->flags);
381 err |= __put_user((u32)regs->sp, &sc->sp_at_signal); 368 err |= __put_user((u32)regs->sp, &sc->sp_at_signal);
382 369
383 tmp = save_i387_ia32(fpstate); 370 tmp = save_i387_xstate_ia32(fpstate);
384 if (tmp < 0) 371 if (tmp < 0)
385 err = -EFAULT; 372 err = -EFAULT;
386 else { 373 else {
@@ -401,7 +388,8 @@ static int ia32_setup_sigcontext(struct sigcontext_ia32 __user *sc,
401 * Determine which stack to use.. 388 * Determine which stack to use..
402 */ 389 */
403static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, 390static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
404 size_t frame_size) 391 size_t frame_size,
392 void **fpstate)
405{ 393{
406 unsigned long sp; 394 unsigned long sp;
407 395
@@ -420,6 +408,11 @@ static void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs,
420 ka->sa.sa_restorer) 408 ka->sa.sa_restorer)
421 sp = (unsigned long) ka->sa.sa_restorer; 409 sp = (unsigned long) ka->sa.sa_restorer;
422 410
411 if (used_math()) {
412 sp = sp - sig_xstate_ia32_size;
413 *fpstate = (struct _fpstate_ia32 *) sp;
414 }
415
423 sp -= frame_size; 416 sp -= frame_size;
424 /* Align the stack pointer according to the i386 ABI, 417 /* Align the stack pointer according to the i386 ABI,
425 * i.e. so that on function entry ((sp + 4) & 15) == 0. */ 418 * i.e. so that on function entry ((sp + 4) & 15) == 0. */
@@ -433,6 +426,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
433 struct sigframe __user *frame; 426 struct sigframe __user *frame;
434 void __user *restorer; 427 void __user *restorer;
435 int err = 0; 428 int err = 0;
429 void __user *fpstate = NULL;
436 430
437 /* copy_to_user optimizes that into a single 8 byte store */ 431 /* copy_to_user optimizes that into a single 8 byte store */
438 static const struct { 432 static const struct {
@@ -447,7 +441,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
447 0, 441 0,
448 }; 442 };
449 443
450 frame = get_sigframe(ka, regs, sizeof(*frame)); 444 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
451 445
452 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 446 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
453 goto give_sigsegv; 447 goto give_sigsegv;
@@ -456,8 +450,7 @@ int ia32_setup_frame(int sig, struct k_sigaction *ka,
456 if (err) 450 if (err)
457 goto give_sigsegv; 451 goto give_sigsegv;
458 452
459 err |= ia32_setup_sigcontext(&frame->sc, &frame->fpstate, regs, 453 err |= ia32_setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]);
460 set->sig[0]);
461 if (err) 454 if (err)
462 goto give_sigsegv; 455 goto give_sigsegv;
463 456
@@ -521,6 +514,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
521 struct rt_sigframe __user *frame; 514 struct rt_sigframe __user *frame;
522 void __user *restorer; 515 void __user *restorer;
523 int err = 0; 516 int err = 0;
517 void __user *fpstate = NULL;
524 518
525 /* __copy_to_user optimizes that into a single 8 byte store */ 519 /* __copy_to_user optimizes that into a single 8 byte store */
526 static const struct { 520 static const struct {
@@ -536,7 +530,7 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
536 0, 530 0,
537 }; 531 };
538 532
539 frame = get_sigframe(ka, regs, sizeof(*frame)); 533 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
540 534
541 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 535 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
542 goto give_sigsegv; 536 goto give_sigsegv;
@@ -549,13 +543,16 @@ int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
549 goto give_sigsegv; 543 goto give_sigsegv;
550 544
551 /* Create the ucontext. */ 545 /* Create the ucontext. */
552 err |= __put_user(0, &frame->uc.uc_flags); 546 if (cpu_has_xsave)
547 err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
548 else
549 err |= __put_user(0, &frame->uc.uc_flags);
553 err |= __put_user(0, &frame->uc.uc_link); 550 err |= __put_user(0, &frame->uc.uc_link);
554 err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); 551 err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
555 err |= __put_user(sas_ss_flags(regs->sp), 552 err |= __put_user(sas_ss_flags(regs->sp),
556 &frame->uc.uc_stack.ss_flags); 553 &frame->uc.uc_stack.ss_flags);
557 err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); 554 err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
558 err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, 555 err |= ia32_setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
559 regs, set->sig[0]); 556 regs, set->sig[0]);
560 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); 557 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
561 if (err) 558 if (err)
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 3db651fc8ec5..c9be69fedb70 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -38,7 +38,7 @@ obj-y += tsc.o io_delay.o rtc.o
38 38
39obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o 39obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o
40obj-y += process.o 40obj-y += process.o
41obj-y += i387.o 41obj-y += i387.o xsave.o
42obj-y += ptrace.o 42obj-y += ptrace.o
43obj-y += ds.o 43obj-y += ds.o
44obj-$(CONFIG_X86_32) += tls.o 44obj-$(CONFIG_X86_32) += tls.o
@@ -69,6 +69,7 @@ obj-$(CONFIG_KEXEC) += machine_kexec_$(BITS).o
69obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o 69obj-$(CONFIG_KEXEC) += relocate_kernel_$(BITS).o crash.o
70obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o 70obj-$(CONFIG_CRASH_DUMP) += crash_dump_$(BITS).o
71obj-$(CONFIG_X86_NUMAQ) += numaq_32.o 71obj-$(CONFIG_X86_NUMAQ) += numaq_32.o
72obj-$(CONFIG_X86_ES7000) += es7000_32.o
72obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o 73obj-$(CONFIG_X86_SUMMIT_NUMA) += summit_32.o
73obj-y += vsmp_64.o 74obj-y += vsmp_64.o
74obj-$(CONFIG_KPROBES) += kprobes.o 75obj-$(CONFIG_KPROBES) += kprobes.o
@@ -104,6 +105,8 @@ obj-$(CONFIG_OLPC) += olpc.o
104ifeq ($(CONFIG_X86_64),y) 105ifeq ($(CONFIG_X86_64),y)
105 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o 106 obj-y += genapic_64.o genapic_flat_64.o genx2apic_uv_x.o tlb_uv.o
106 obj-y += bios_uv.o 107 obj-y += bios_uv.o
108 obj-y += genx2apic_cluster.o
109 obj-y += genx2apic_phys.o
107 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o 110 obj-$(CONFIG_X86_PM_TIMER) += pmtimer_64.o
108 obj-$(CONFIG_AUDIT) += audit_64.o 111 obj-$(CONFIG_AUDIT) += audit_64.o
109 112
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 7d40ef7b36e3..c2ac1b4515a0 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -252,10 +252,8 @@ static void __cpuinit acpi_register_lapic(int id, u8 enabled)
252 return; 252 return;
253 } 253 }
254 254
255#ifdef CONFIG_X86_32
256 if (boot_cpu_physical_apicid != -1U) 255 if (boot_cpu_physical_apicid != -1U)
257 ver = apic_version[boot_cpu_physical_apicid]; 256 ver = apic_version[boot_cpu_physical_apicid];
258#endif
259 257
260 generic_processor_info(id, ver); 258 generic_processor_info(id, ver);
261} 259}
@@ -774,11 +772,9 @@ static void __init acpi_register_lapic_address(unsigned long address)
774 772
775 set_fixmap_nocache(FIX_APIC_BASE, address); 773 set_fixmap_nocache(FIX_APIC_BASE, address);
776 if (boot_cpu_physical_apicid == -1U) { 774 if (boot_cpu_physical_apicid == -1U) {
777 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 775 boot_cpu_physical_apicid = read_apic_id();
778#ifdef CONFIG_X86_32
779 apic_version[boot_cpu_physical_apicid] = 776 apic_version[boot_cpu_physical_apicid] =
780 GET_APIC_VERSION(apic_read(APIC_LVR)); 777 GET_APIC_VERSION(apic_read(APIC_LVR));
781#endif
782 } 778 }
783} 779}
784 780
@@ -1350,7 +1346,9 @@ static void __init acpi_process_madt(void)
1350 acpi_ioapic = 1; 1346 acpi_ioapic = 1;
1351 1347
1352 smp_found_config = 1; 1348 smp_found_config = 1;
1349#ifdef CONFIG_X86_32
1353 setup_apic_routing(); 1350 setup_apic_routing();
1351#endif
1354 } 1352 }
1355 } 1353 }
1356 if (error == -EINVAL) { 1354 if (error == -EINVAL) {
diff --git a/arch/x86/kernel/apic_32.c b/arch/x86/kernel/apic_32.c
index f88bd0d982b0..a91c57cb666a 100644
--- a/arch/x86/kernel/apic_32.c
+++ b/arch/x86/kernel/apic_32.c
@@ -60,10 +60,8 @@ unsigned long mp_lapic_addr;
60static int force_enable_local_apic; 60static int force_enable_local_apic;
61int disable_apic; 61int disable_apic;
62 62
63/* Local APIC timer verification ok */
64static int local_apic_timer_verify_ok;
65/* Disable local APIC timer from the kernel commandline or via dmi quirk */ 63/* Disable local APIC timer from the kernel commandline or via dmi quirk */
66static int local_apic_timer_disabled; 64static int disable_apic_timer __cpuinitdata;
67/* Local APIC timer works in C2 */ 65/* Local APIC timer works in C2 */
68int local_apic_timer_c2_ok; 66int local_apic_timer_c2_ok;
69EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok); 67EXPORT_SYMBOL_GPL(local_apic_timer_c2_ok);
@@ -130,7 +128,11 @@ static inline int lapic_get_version(void)
130 */ 128 */
131static inline int lapic_is_integrated(void) 129static inline int lapic_is_integrated(void)
132{ 130{
131#ifdef CONFIG_X86_64
132 return 1;
133#else
133 return APIC_INTEGRATED(lapic_get_version()); 134 return APIC_INTEGRATED(lapic_get_version());
135#endif
134} 136}
135 137
136/* 138/*
@@ -145,13 +147,18 @@ static int modern_apic(void)
145 return lapic_get_version() >= 0x14; 147 return lapic_get_version() >= 0x14;
146} 148}
147 149
148void apic_wait_icr_idle(void) 150/*
151 * Paravirt kernels also might be using these below ops. So we still
152 * use generic apic_read()/apic_write(), which might be pointing to different
153 * ops in PARAVIRT case.
154 */
155void xapic_wait_icr_idle(void)
149{ 156{
150 while (apic_read(APIC_ICR) & APIC_ICR_BUSY) 157 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
151 cpu_relax(); 158 cpu_relax();
152} 159}
153 160
154u32 safe_apic_wait_icr_idle(void) 161u32 safe_xapic_wait_icr_idle(void)
155{ 162{
156 u32 send_status; 163 u32 send_status;
157 int timeout; 164 int timeout;
@@ -167,16 +174,48 @@ u32 safe_apic_wait_icr_idle(void)
167 return send_status; 174 return send_status;
168} 175}
169 176
177void xapic_icr_write(u32 low, u32 id)
178{
179 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
180 apic_write(APIC_ICR, low);
181}
182
183u64 xapic_icr_read(void)
184{
185 u32 icr1, icr2;
186
187 icr2 = apic_read(APIC_ICR2);
188 icr1 = apic_read(APIC_ICR);
189
190 return icr1 | ((u64)icr2 << 32);
191}
192
193static struct apic_ops xapic_ops = {
194 .read = native_apic_mem_read,
195 .write = native_apic_mem_write,
196 .icr_read = xapic_icr_read,
197 .icr_write = xapic_icr_write,
198 .wait_icr_idle = xapic_wait_icr_idle,
199 .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
200};
201
202struct apic_ops __read_mostly *apic_ops = &xapic_ops;
203EXPORT_SYMBOL_GPL(apic_ops);
204
170/** 205/**
171 * enable_NMI_through_LVT0 - enable NMI through local vector table 0 206 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
172 */ 207 */
173void __cpuinit enable_NMI_through_LVT0(void) 208void __cpuinit enable_NMI_through_LVT0(void)
174{ 209{
175 unsigned int v = APIC_DM_NMI; 210 unsigned int v;
176 211
177 /* Level triggered for 82489DX */ 212 /* unmask and set to NMI */
213 v = APIC_DM_NMI;
214
215 /* Level triggered for 82489DX (32bit mode) */
178 if (!lapic_is_integrated()) 216 if (!lapic_is_integrated())
179 v |= APIC_LVT_LEVEL_TRIGGER; 217 v |= APIC_LVT_LEVEL_TRIGGER;
218
180 apic_write(APIC_LVT0, v); 219 apic_write(APIC_LVT0, v);
181} 220}
182 221
@@ -193,9 +232,13 @@ int get_physical_broadcast(void)
193 */ 232 */
194int lapic_get_maxlvt(void) 233int lapic_get_maxlvt(void)
195{ 234{
196 unsigned int v = apic_read(APIC_LVR); 235 unsigned int v;
197 236
198 /* 82489DXs do not report # of LVT entries. */ 237 v = apic_read(APIC_LVR);
238 /*
239 * - we always have APIC integrated on 64bit mode
240 * - 82489DXs do not report # of LVT entries
241 */
199 return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2; 242 return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
200} 243}
201 244
@@ -203,8 +246,12 @@ int lapic_get_maxlvt(void)
203 * Local APIC timer 246 * Local APIC timer
204 */ 247 */
205 248
206/* Clock divisor is set to 16 */ 249/* Clock divisor */
250#ifdef CONFG_X86_64
251#define APIC_DIVISOR 1
252#else
207#define APIC_DIVISOR 16 253#define APIC_DIVISOR 16
254#endif
208 255
209/* 256/*
210 * This function sets up the local APIC timer, with a timeout of 257 * This function sets up the local APIC timer, with a timeout of
@@ -212,6 +259,9 @@ int lapic_get_maxlvt(void)
212 * this function twice on the boot CPU, once with a bogus timeout 259 * this function twice on the boot CPU, once with a bogus timeout
213 * value, second time for real. The other (noncalibrating) CPUs 260 * value, second time for real. The other (noncalibrating) CPUs
214 * call this function only once, with the real, calibrated value. 261 * call this function only once, with the real, calibrated value.
262 *
263 * We do reads before writes even if unnecessary, to get around the
264 * P5 APIC double write bug.
215 */ 265 */
216static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) 266static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
217{ 267{
@@ -233,14 +283,44 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
233 */ 283 */
234 tmp_value = apic_read(APIC_TDCR); 284 tmp_value = apic_read(APIC_TDCR);
235 apic_write(APIC_TDCR, 285 apic_write(APIC_TDCR,
236 (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) | 286 (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
237 APIC_TDR_DIV_16); 287 APIC_TDR_DIV_16);
238 288
239 if (!oneshot) 289 if (!oneshot)
240 apic_write(APIC_TMICT, clocks / APIC_DIVISOR); 290 apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
241} 291}
242 292
243/* 293/*
294 * Setup extended LVT, AMD specific (K8, family 10h)
295 *
296 * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
297 * MCE interrupts are supported. Thus MCE offset must be set to 0.
298 */
299
300#define APIC_EILVT_LVTOFF_MCE 0
301#define APIC_EILVT_LVTOFF_IBS 1
302
303static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
304{
305 unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
306 unsigned int v = (mask << 16) | (msg_type << 8) | vector;
307
308 apic_write(reg, v);
309}
310
311u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
312{
313 setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
314 return APIC_EILVT_LVTOFF_MCE;
315}
316
317u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
318{
319 setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
320 return APIC_EILVT_LVTOFF_IBS;
321}
322
323/*
244 * Program the next event, relative to now 324 * Program the next event, relative to now
245 */ 325 */
246static int lapic_next_event(unsigned long delta, 326static int lapic_next_event(unsigned long delta,
@@ -259,8 +339,8 @@ static void lapic_timer_setup(enum clock_event_mode mode,
259 unsigned long flags; 339 unsigned long flags;
260 unsigned int v; 340 unsigned int v;
261 341
262 /* Lapic used for broadcast ? */ 342 /* Lapic used as dummy for broadcast ? */
263 if (!local_apic_timer_verify_ok) 343 if (evt->features & CLOCK_EVT_FEAT_DUMMY)
264 return; 344 return;
265 345
266 local_irq_save(flags); 346 local_irq_save(flags);
@@ -473,7 +553,7 @@ static int __init calibrate_APIC_clock(void)
473 return -1; 553 return -1;
474 } 554 }
475 555
476 local_apic_timer_verify_ok = 1; 556 levt->features &= ~CLOCK_EVT_FEAT_DUMMY;
477 557
478 /* We trust the pm timer based calibration */ 558 /* We trust the pm timer based calibration */
479 if (!pm_referenced) { 559 if (!pm_referenced) {
@@ -507,11 +587,11 @@ static int __init calibrate_APIC_clock(void)
507 if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2) 587 if (deltaj >= LAPIC_CAL_LOOPS-2 && deltaj <= LAPIC_CAL_LOOPS+2)
508 apic_printk(APIC_VERBOSE, "... jiffies result ok\n"); 588 apic_printk(APIC_VERBOSE, "... jiffies result ok\n");
509 else 589 else
510 local_apic_timer_verify_ok = 0; 590 levt->features |= CLOCK_EVT_FEAT_DUMMY;
511 } else 591 } else
512 local_irq_enable(); 592 local_irq_enable();
513 593
514 if (!local_apic_timer_verify_ok) { 594 if (levt->features & CLOCK_EVT_FEAT_DUMMY) {
515 printk(KERN_WARNING 595 printk(KERN_WARNING
516 "APIC timer disabled due to verification failure.\n"); 596 "APIC timer disabled due to verification failure.\n");
517 return -1; 597 return -1;
@@ -533,7 +613,8 @@ void __init setup_boot_APIC_clock(void)
533 * timer as a dummy clock event source on SMP systems, so the 613 * timer as a dummy clock event source on SMP systems, so the
534 * broadcast mechanism is used. On UP systems simply ignore it. 614 * broadcast mechanism is used. On UP systems simply ignore it.
535 */ 615 */
536 if (local_apic_timer_disabled) { 616 if (disable_apic_timer) {
617 printk(KERN_INFO "Disabling APIC timer\n");
537 /* No broadcast on UP ! */ 618 /* No broadcast on UP ! */
538 if (num_possible_cpus() > 1) { 619 if (num_possible_cpus() > 1) {
539 lapic_clockevent.mult = 1; 620 lapic_clockevent.mult = 1;
@@ -602,7 +683,11 @@ static void local_apic_timer_interrupt(void)
602 /* 683 /*
603 * the NMI deadlock-detector uses this. 684 * the NMI deadlock-detector uses this.
604 */ 685 */
686#ifdef CONFIG_X86_64
687 add_pda(apic_timer_irqs, 1);
688#else
605 per_cpu(irq_stat, cpu).apic_timer_irqs++; 689 per_cpu(irq_stat, cpu).apic_timer_irqs++;
690#endif
606 691
607 evt->event_handler(evt); 692 evt->event_handler(evt);
608} 693}
@@ -642,35 +727,6 @@ int setup_profiling_timer(unsigned int multiplier)
642} 727}
643 728
644/* 729/*
645 * Setup extended LVT, AMD specific (K8, family 10h)
646 *
647 * Vector mappings are hard coded. On K8 only offset 0 (APIC500) and
648 * MCE interrupts are supported. Thus MCE offset must be set to 0.
649 */
650
651#define APIC_EILVT_LVTOFF_MCE 0
652#define APIC_EILVT_LVTOFF_IBS 1
653
654static void setup_APIC_eilvt(u8 lvt_off, u8 vector, u8 msg_type, u8 mask)
655{
656 unsigned long reg = (lvt_off << 4) + APIC_EILVT0;
657 unsigned int v = (mask << 16) | (msg_type << 8) | vector;
658 apic_write(reg, v);
659}
660
661u8 setup_APIC_eilvt_mce(u8 vector, u8 msg_type, u8 mask)
662{
663 setup_APIC_eilvt(APIC_EILVT_LVTOFF_MCE, vector, msg_type, mask);
664 return APIC_EILVT_LVTOFF_MCE;
665}
666
667u8 setup_APIC_eilvt_ibs(u8 vector, u8 msg_type, u8 mask)
668{
669 setup_APIC_eilvt(APIC_EILVT_LVTOFF_IBS, vector, msg_type, mask);
670 return APIC_EILVT_LVTOFF_IBS;
671}
672
673/*
674 * Local APIC start and shutdown 730 * Local APIC start and shutdown
675 */ 731 */
676 732
@@ -715,7 +771,7 @@ void clear_local_APIC(void)
715 } 771 }
716 772
717 /* lets not touch this if we didn't frob it */ 773 /* lets not touch this if we didn't frob it */
718#ifdef CONFIG_X86_MCE_P4THERMAL 774#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL)
719 if (maxlvt >= 5) { 775 if (maxlvt >= 5) {
720 v = apic_read(APIC_LVTTHMR); 776 v = apic_read(APIC_LVTTHMR);
721 apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED); 777 apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
@@ -732,10 +788,6 @@ void clear_local_APIC(void)
732 if (maxlvt >= 4) 788 if (maxlvt >= 4)
733 apic_write(APIC_LVTPC, APIC_LVT_MASKED); 789 apic_write(APIC_LVTPC, APIC_LVT_MASKED);
734 790
735#ifdef CONFIG_X86_MCE_P4THERMAL
736 if (maxlvt >= 5)
737 apic_write(APIC_LVTTHMR, APIC_LVT_MASKED);
738#endif
739 /* Integrated APIC (!82489DX) ? */ 791 /* Integrated APIC (!82489DX) ? */
740 if (lapic_is_integrated()) { 792 if (lapic_is_integrated()) {
741 if (maxlvt > 3) 793 if (maxlvt > 3)
@@ -750,7 +802,7 @@ void clear_local_APIC(void)
750 */ 802 */
751void disable_local_APIC(void) 803void disable_local_APIC(void)
752{ 804{
753 unsigned long value; 805 unsigned int value;
754 806
755 clear_local_APIC(); 807 clear_local_APIC();
756 808
@@ -762,6 +814,7 @@ void disable_local_APIC(void)
762 value &= ~APIC_SPIV_APIC_ENABLED; 814 value &= ~APIC_SPIV_APIC_ENABLED;
763 apic_write(APIC_SPIV, value); 815 apic_write(APIC_SPIV, value);
764 816
817#ifdef CONFIG_X86_32
765 /* 818 /*
766 * When LAPIC was disabled by the BIOS and enabled by the kernel, 819 * When LAPIC was disabled by the BIOS and enabled by the kernel,
767 * restore the disabled state. 820 * restore the disabled state.
@@ -773,6 +826,7 @@ void disable_local_APIC(void)
773 l &= ~MSR_IA32_APICBASE_ENABLE; 826 l &= ~MSR_IA32_APICBASE_ENABLE;
774 wrmsr(MSR_IA32_APICBASE, l, h); 827 wrmsr(MSR_IA32_APICBASE, l, h);
775 } 828 }
829#endif
776} 830}
777 831
778/* 832/*
@@ -789,11 +843,15 @@ void lapic_shutdown(void)
789 return; 843 return;
790 844
791 local_irq_save(flags); 845 local_irq_save(flags);
792 clear_local_APIC();
793 846
794 if (enabled_via_apicbase) 847#ifdef CONFIG_X86_32
848 if (!enabled_via_apicbase)
849 clear_local_APIC();
850 else
851#endif
795 disable_local_APIC(); 852 disable_local_APIC();
796 853
854
797 local_irq_restore(flags); 855 local_irq_restore(flags);
798} 856}
799 857
@@ -838,6 +896,12 @@ int __init verify_local_APIC(void)
838 */ 896 */
839 reg0 = apic_read(APIC_ID); 897 reg0 = apic_read(APIC_ID);
840 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); 898 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
899 apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
900 reg1 = apic_read(APIC_ID);
901 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
902 apic_write(APIC_ID, reg0);
903 if (reg1 != (reg0 ^ APIC_ID_MASK))
904 return 0;
841 905
842 /* 906 /*
843 * The next two are just to see if we have sane values. 907 * The next two are just to see if we have sane values.
@@ -863,14 +927,15 @@ void __init sync_Arb_IDs(void)
863 */ 927 */
864 if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD) 928 if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
865 return; 929 return;
930
866 /* 931 /*
867 * Wait for idle. 932 * Wait for idle.
868 */ 933 */
869 apic_wait_icr_idle(); 934 apic_wait_icr_idle();
870 935
871 apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); 936 apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
872 apic_write(APIC_ICR, 937 apic_write(APIC_ICR, APIC_DEST_ALLINC |
873 APIC_DEST_ALLINC | APIC_INT_LEVELTRIG | APIC_DM_INIT); 938 APIC_INT_LEVELTRIG | APIC_DM_INIT);
874} 939}
875 940
876/* 941/*
@@ -878,7 +943,7 @@ void __init sync_Arb_IDs(void)
878 */ 943 */
879void __init init_bsp_APIC(void) 944void __init init_bsp_APIC(void)
880{ 945{
881 unsigned long value; 946 unsigned int value;
882 947
883 /* 948 /*
884 * Don't do the setup now if we have a SMP BIOS as the 949 * Don't do the setup now if we have a SMP BIOS as the
@@ -899,11 +964,13 @@ void __init init_bsp_APIC(void)
899 value &= ~APIC_VECTOR_MASK; 964 value &= ~APIC_VECTOR_MASK;
900 value |= APIC_SPIV_APIC_ENABLED; 965 value |= APIC_SPIV_APIC_ENABLED;
901 966
967#ifdef CONFIG_X86_32
902 /* This bit is reserved on P4/Xeon and should be cleared */ 968 /* This bit is reserved on P4/Xeon and should be cleared */
903 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) && 969 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
904 (boot_cpu_data.x86 == 15)) 970 (boot_cpu_data.x86 == 15))
905 value &= ~APIC_SPIV_FOCUS_DISABLED; 971 value &= ~APIC_SPIV_FOCUS_DISABLED;
906 else 972 else
973#endif
907 value |= APIC_SPIV_FOCUS_DISABLED; 974 value |= APIC_SPIV_FOCUS_DISABLED;
908 value |= SPURIOUS_APIC_VECTOR; 975 value |= SPURIOUS_APIC_VECTOR;
909 apic_write(APIC_SPIV, value); 976 apic_write(APIC_SPIV, value);
@@ -922,6 +989,16 @@ static void __cpuinit lapic_setup_esr(void)
922{ 989{
923 unsigned long oldvalue, value, maxlvt; 990 unsigned long oldvalue, value, maxlvt;
924 if (lapic_is_integrated() && !esr_disable) { 991 if (lapic_is_integrated() && !esr_disable) {
992 if (esr_disable) {
993 /*
994 * Something untraceable is creating bad interrupts on
995 * secondary quads ... for the moment, just leave the
996 * ESR disabled - we can't do anything useful with the
997 * errors anyway - mbligh
998 */
999 printk(KERN_INFO "Leaving ESR disabled.\n");
1000 return;
1001 }
925 /* !82489DX */ 1002 /* !82489DX */
926 maxlvt = lapic_get_maxlvt(); 1003 maxlvt = lapic_get_maxlvt();
927 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */ 1004 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
@@ -942,16 +1019,7 @@ static void __cpuinit lapic_setup_esr(void)
942 "vector: 0x%08lx after: 0x%08lx\n", 1019 "vector: 0x%08lx after: 0x%08lx\n",
943 oldvalue, value); 1020 oldvalue, value);
944 } else { 1021 } else {
945 if (esr_disable) 1022 printk(KERN_INFO "No ESR for 82489DX.\n");
946 /*
947 * Something untraceable is creating bad interrupts on
948 * secondary quads ... for the moment, just leave the
949 * ESR disabled - we can't do anything useful with the
950 * errors anyway - mbligh
951 */
952 printk(KERN_INFO "Leaving ESR disabled.\n");
953 else
954 printk(KERN_INFO "No ESR for 82489DX.\n");
955 } 1023 }
956} 1024}
957 1025
@@ -1089,13 +1157,17 @@ void __cpuinit setup_local_APIC(void)
1089 1157
1090void __cpuinit end_local_APIC_setup(void) 1158void __cpuinit end_local_APIC_setup(void)
1091{ 1159{
1092 unsigned long value;
1093
1094 lapic_setup_esr(); 1160 lapic_setup_esr();
1095 /* Disable the local apic timer */ 1161
1096 value = apic_read(APIC_LVTT); 1162#ifdef CONFIG_X86_32
1097 value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR); 1163 {
1098 apic_write(APIC_LVTT, value); 1164 unsigned int value;
1165 /* Disable the local apic timer */
1166 value = apic_read(APIC_LVTT);
1167 value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
1168 apic_write(APIC_LVTT, value);
1169 }
1170#endif
1099 1171
1100 setup_apic_nmi_watchdog(NULL); 1172 setup_apic_nmi_watchdog(NULL);
1101 apic_pm_activate(); 1173 apic_pm_activate();
@@ -1205,7 +1277,7 @@ void __init init_apic_mappings(void)
1205 * default configuration (or the MP table is broken). 1277 * default configuration (or the MP table is broken).
1206 */ 1278 */
1207 if (boot_cpu_physical_apicid == -1U) 1279 if (boot_cpu_physical_apicid == -1U)
1208 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1280 boot_cpu_physical_apicid = read_apic_id();
1209 1281
1210} 1282}
1211 1283
@@ -1242,7 +1314,7 @@ int __init APIC_init_uniprocessor(void)
1242 * might be zero if read from MP tables. Get it from LAPIC. 1314 * might be zero if read from MP tables. Get it from LAPIC.
1243 */ 1315 */
1244#ifdef CONFIG_CRASH_DUMP 1316#ifdef CONFIG_CRASH_DUMP
1245 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1317 boot_cpu_physical_apicid = read_apic_id();
1246#endif 1318#endif
1247 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map); 1319 physid_set_mask_of_physid(boot_cpu_physical_apicid, &phys_cpu_present_map);
1248 1320
@@ -1321,59 +1393,12 @@ void smp_error_interrupt(struct pt_regs *regs)
1321 irq_exit(); 1393 irq_exit();
1322} 1394}
1323 1395
1324#ifdef CONFIG_SMP
1325void __init smp_intr_init(void)
1326{
1327 /*
1328 * IRQ0 must be given a fixed assignment and initialized,
1329 * because it's used before the IO-APIC is set up.
1330 */
1331 set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
1332
1333 /*
1334 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
1335 * IPI, driven by wakeup.
1336 */
1337 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
1338
1339 /* IPI for invalidation */
1340 alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
1341
1342 /* IPI for generic function call */
1343 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
1344
1345 /* IPI for single call function */
1346 set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR,
1347 call_function_single_interrupt);
1348}
1349#endif
1350
1351/*
1352 * Initialize APIC interrupts
1353 */
1354void __init apic_intr_init(void)
1355{
1356#ifdef CONFIG_SMP
1357 smp_intr_init();
1358#endif
1359 /* self generated IPI for local APIC timer */
1360 alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
1361
1362 /* IPI vectors for APIC spurious and error interrupts */
1363 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
1364 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
1365
1366 /* thermal monitor LVT interrupt */
1367#ifdef CONFIG_X86_MCE_P4THERMAL
1368 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
1369#endif
1370}
1371
1372/** 1396/**
1373 * connect_bsp_APIC - attach the APIC to the interrupt system 1397 * connect_bsp_APIC - attach the APIC to the interrupt system
1374 */ 1398 */
1375void __init connect_bsp_APIC(void) 1399void __init connect_bsp_APIC(void)
1376{ 1400{
1401#ifdef CONFIG_X86_32
1377 if (pic_mode) { 1402 if (pic_mode) {
1378 /* 1403 /*
1379 * Do not trust the local APIC being empty at bootup. 1404 * Do not trust the local APIC being empty at bootup.
@@ -1388,6 +1413,7 @@ void __init connect_bsp_APIC(void)
1388 outb(0x70, 0x22); 1413 outb(0x70, 0x22);
1389 outb(0x01, 0x23); 1414 outb(0x01, 0x23);
1390 } 1415 }
1416#endif
1391 enable_apic_mode(); 1417 enable_apic_mode();
1392} 1418}
1393 1419
@@ -1400,6 +1426,9 @@ void __init connect_bsp_APIC(void)
1400 */ 1426 */
1401void disconnect_bsp_APIC(int virt_wire_setup) 1427void disconnect_bsp_APIC(int virt_wire_setup)
1402{ 1428{
1429 unsigned int value;
1430
1431#ifdef CONFIG_X86_32
1403 if (pic_mode) { 1432 if (pic_mode) {
1404 /* 1433 /*
1405 * Put the board back into PIC mode (has an effect only on 1434 * Put the board back into PIC mode (has an effect only on
@@ -1411,54 +1440,53 @@ void disconnect_bsp_APIC(int virt_wire_setup)
1411 "entering PIC mode.\n"); 1440 "entering PIC mode.\n");
1412 outb(0x70, 0x22); 1441 outb(0x70, 0x22);
1413 outb(0x00, 0x23); 1442 outb(0x00, 0x23);
1414 } else { 1443 return;
1415 /* Go back to Virtual Wire compatibility mode */ 1444 }
1416 unsigned long value; 1445#endif
1417 1446
1418 /* For the spurious interrupt use vector F, and enable it */ 1447 /* Go back to Virtual Wire compatibility mode */
1419 value = apic_read(APIC_SPIV);
1420 value &= ~APIC_VECTOR_MASK;
1421 value |= APIC_SPIV_APIC_ENABLED;
1422 value |= 0xf;
1423 apic_write(APIC_SPIV, value);
1424 1448
1425 if (!virt_wire_setup) { 1449 /* For the spurious interrupt use vector F, and enable it */
1426 /* 1450 value = apic_read(APIC_SPIV);
1427 * For LVT0 make it edge triggered, active high, 1451 value &= ~APIC_VECTOR_MASK;
1428 * external and enabled 1452 value |= APIC_SPIV_APIC_ENABLED;
1429 */ 1453 value |= 0xf;
1430 value = apic_read(APIC_LVT0); 1454 apic_write(APIC_SPIV, value);
1431 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
1432 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
1433 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
1434 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
1435 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
1436 apic_write(APIC_LVT0, value);
1437 } else {
1438 /* Disable LVT0 */
1439 apic_write(APIC_LVT0, APIC_LVT_MASKED);
1440 }
1441 1455
1456 if (!virt_wire_setup) {
1442 /* 1457 /*
1443 * For LVT1 make it edge triggered, active high, nmi and 1458 * For LVT0 make it edge triggered, active high,
1444 * enabled 1459 * external and enabled
1445 */ 1460 */
1446 value = apic_read(APIC_LVT1); 1461 value = apic_read(APIC_LVT0);
1447 value &= ~( 1462 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
1448 APIC_MODE_MASK | APIC_SEND_PENDING |
1449 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | 1463 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
1450 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED); 1464 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
1451 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING; 1465 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
1452 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI); 1466 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_EXTINT);
1453 apic_write(APIC_LVT1, value); 1467 apic_write(APIC_LVT0, value);
1468 } else {
1469 /* Disable LVT0 */
1470 apic_write(APIC_LVT0, APIC_LVT_MASKED);
1454 } 1471 }
1472
1473 /*
1474 * For LVT1 make it edge triggered, active high,
1475 * nmi and enabled
1476 */
1477 value = apic_read(APIC_LVT1);
1478 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
1479 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
1480 APIC_LVT_LEVEL_TRIGGER | APIC_LVT_MASKED);
1481 value |= APIC_LVT_REMOTE_IRR | APIC_SEND_PENDING;
1482 value = SET_APIC_DELIVERY_MODE(value, APIC_MODE_NMI);
1483 apic_write(APIC_LVT1, value);
1455} 1484}
1456 1485
1457void __cpuinit generic_processor_info(int apicid, int version) 1486void __cpuinit generic_processor_info(int apicid, int version)
1458{ 1487{
1459 int cpu; 1488 int cpu;
1460 cpumask_t tmp_map; 1489 cpumask_t tmp_map;
1461 physid_mask_t phys_cpu;
1462 1490
1463 /* 1491 /*
1464 * Validate version 1492 * Validate version
@@ -1471,9 +1499,6 @@ void __cpuinit generic_processor_info(int apicid, int version)
1471 } 1499 }
1472 apic_version[apicid] = version; 1500 apic_version[apicid] = version;
1473 1501
1474 phys_cpu = apicid_to_cpu_present(apicid);
1475 physids_or(phys_cpu_present_map, phys_cpu_present_map, phys_cpu);
1476
1477 if (num_processors >= NR_CPUS) { 1502 if (num_processors >= NR_CPUS) {
1478 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." 1503 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
1479 " Processor ignored.\n", NR_CPUS); 1504 " Processor ignored.\n", NR_CPUS);
@@ -1484,17 +1509,19 @@ void __cpuinit generic_processor_info(int apicid, int version)
1484 cpus_complement(tmp_map, cpu_present_map); 1509 cpus_complement(tmp_map, cpu_present_map);
1485 cpu = first_cpu(tmp_map); 1510 cpu = first_cpu(tmp_map);
1486 1511
1487 if (apicid == boot_cpu_physical_apicid) 1512 physid_set(apicid, phys_cpu_present_map);
1513 if (apicid == boot_cpu_physical_apicid) {
1488 /* 1514 /*
1489 * x86_bios_cpu_apicid is required to have processors listed 1515 * x86_bios_cpu_apicid is required to have processors listed
1490 * in same order as logical cpu numbers. Hence the first 1516 * in same order as logical cpu numbers. Hence the first
1491 * entry is BSP, and so on. 1517 * entry is BSP, and so on.
1492 */ 1518 */
1493 cpu = 0; 1519 cpu = 0;
1494 1520 }
1495 if (apicid > max_physical_apicid) 1521 if (apicid > max_physical_apicid)
1496 max_physical_apicid = apicid; 1522 max_physical_apicid = apicid;
1497 1523
1524#ifdef CONFIG_X86_32
1498 /* 1525 /*
1499 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y 1526 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
1500 * but we need to work other dependencies like SMP_SUSPEND etc 1527 * but we need to work other dependencies like SMP_SUSPEND etc
@@ -1514,7 +1541,9 @@ void __cpuinit generic_processor_info(int apicid, int version)
1514 def_to_bigsmp = 1; 1541 def_to_bigsmp = 1;
1515 } 1542 }
1516 } 1543 }
1517#ifdef CONFIG_SMP 1544#endif
1545
1546#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
1518 /* are we being called early in kernel startup? */ 1547 /* are we being called early in kernel startup? */
1519 if (early_per_cpu_ptr(x86_cpu_to_apicid)) { 1548 if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
1520 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); 1549 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
@@ -1527,6 +1556,7 @@ void __cpuinit generic_processor_info(int apicid, int version)
1527 per_cpu(x86_bios_cpu_apicid, cpu) = apicid; 1556 per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1528 } 1557 }
1529#endif 1558#endif
1559
1530 cpu_set(cpu, cpu_possible_map); 1560 cpu_set(cpu, cpu_possible_map);
1531 cpu_set(cpu, cpu_present_map); 1561 cpu_set(cpu, cpu_present_map);
1532} 1562}
@@ -1537,6 +1567,11 @@ void __cpuinit generic_processor_info(int apicid, int version)
1537#ifdef CONFIG_PM 1567#ifdef CONFIG_PM
1538 1568
1539static struct { 1569static struct {
1570 /*
1571 * 'active' is true if the local APIC was enabled by us and
1572 * not the BIOS; this signifies that we are also responsible
1573 * for disabling it before entering apm/acpi suspend
1574 */
1540 int active; 1575 int active;
1541 /* r/w apic fields */ 1576 /* r/w apic fields */
1542 unsigned int apic_id; 1577 unsigned int apic_id;
@@ -1577,7 +1612,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
1577 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); 1612 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
1578 apic_pm_state.apic_tmict = apic_read(APIC_TMICT); 1613 apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
1579 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); 1614 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
1580#ifdef CONFIG_X86_MCE_P4THERMAL 1615#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
1581 if (maxlvt >= 5) 1616 if (maxlvt >= 5)
1582 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); 1617 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
1583#endif 1618#endif
@@ -1601,16 +1636,23 @@ static int lapic_resume(struct sys_device *dev)
1601 1636
1602 local_irq_save(flags); 1637 local_irq_save(flags);
1603 1638
1604 /* 1639#ifdef CONFIG_X86_64
1605 * Make sure the APICBASE points to the right address 1640 if (x2apic)
1606 * 1641 enable_x2apic();
1607 * FIXME! This will be wrong if we ever support suspend on 1642 else
1608 * SMP! We'll need to do this as part of the CPU restore! 1643#endif
1609 */ 1644 {
1610 rdmsr(MSR_IA32_APICBASE, l, h); 1645 /*
1611 l &= ~MSR_IA32_APICBASE_BASE; 1646 * Make sure the APICBASE points to the right address
1612 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; 1647 *
1613 wrmsr(MSR_IA32_APICBASE, l, h); 1648 * FIXME! This will be wrong if we ever support suspend on
1649 * SMP! We'll need to do this as part of the CPU restore!
1650 */
1651 rdmsr(MSR_IA32_APICBASE, l, h);
1652 l &= ~MSR_IA32_APICBASE_BASE;
1653 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
1654 wrmsr(MSR_IA32_APICBASE, l, h);
1655 }
1614 1656
1615 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); 1657 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
1616 apic_write(APIC_ID, apic_pm_state.apic_id); 1658 apic_write(APIC_ID, apic_pm_state.apic_id);
@@ -1620,7 +1662,7 @@ static int lapic_resume(struct sys_device *dev)
1620 apic_write(APIC_SPIV, apic_pm_state.apic_spiv); 1662 apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
1621 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); 1663 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
1622 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); 1664 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
1623#ifdef CONFIG_X86_MCE_P4THERMAL 1665#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
1624 if (maxlvt >= 5) 1666 if (maxlvt >= 5)
1625 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); 1667 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
1626#endif 1668#endif
@@ -1634,7 +1676,9 @@ static int lapic_resume(struct sys_device *dev)
1634 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); 1676 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
1635 apic_write(APIC_ESR, 0); 1677 apic_write(APIC_ESR, 0);
1636 apic_read(APIC_ESR); 1678 apic_read(APIC_ESR);
1679
1637 local_irq_restore(flags); 1680 local_irq_restore(flags);
1681
1638 return 0; 1682 return 0;
1639} 1683}
1640 1684
@@ -1690,20 +1734,20 @@ static int __init parse_lapic(char *arg)
1690} 1734}
1691early_param("lapic", parse_lapic); 1735early_param("lapic", parse_lapic);
1692 1736
1693static int __init parse_nolapic(char *arg) 1737static int __init setup_disableapic(char *arg)
1694{ 1738{
1695 disable_apic = 1; 1739 disable_apic = 1;
1696 setup_clear_cpu_cap(X86_FEATURE_APIC); 1740 setup_clear_cpu_cap(X86_FEATURE_APIC);
1697 return 0; 1741 return 0;
1698} 1742}
1699early_param("nolapic", parse_nolapic); 1743early_param("disableapic", setup_disableapic);
1700 1744
1701static int __init parse_disable_lapic_timer(char *arg) 1745/* same as disableapic, for compatibility */
1746static int __init setup_nolapic(char *arg)
1702{ 1747{
1703 local_apic_timer_disabled = 1; 1748 return setup_disableapic(arg);
1704 return 0;
1705} 1749}
1706early_param("nolapic_timer", parse_disable_lapic_timer); 1750early_param("nolapic", setup_nolapic);
1707 1751
1708static int __init parse_lapic_timer_c2_ok(char *arg) 1752static int __init parse_lapic_timer_c2_ok(char *arg)
1709{ 1753{
@@ -1712,15 +1756,40 @@ static int __init parse_lapic_timer_c2_ok(char *arg)
1712} 1756}
1713early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); 1757early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
1714 1758
1759static int __init parse_disable_apic_timer(char *arg)
1760{
1761 disable_apic_timer = 1;
1762 return 0;
1763}
1764early_param("noapictimer", parse_disable_apic_timer);
1765
1766static int __init parse_nolapic_timer(char *arg)
1767{
1768 disable_apic_timer = 1;
1769 return 0;
1770}
1771early_param("nolapic_timer", parse_nolapic_timer);
1772
1715static int __init apic_set_verbosity(char *arg) 1773static int __init apic_set_verbosity(char *arg)
1716{ 1774{
1717 if (!arg) 1775 if (!arg) {
1776#ifdef CONFIG_X86_64
1777 skip_ioapic_setup = 0;
1778 ioapic_force = 1;
1779 return 0;
1780#endif
1718 return -EINVAL; 1781 return -EINVAL;
1782 }
1719 1783
1720 if (strcmp(arg, "debug") == 0) 1784 if (strcmp("debug", arg) == 0)
1721 apic_verbosity = APIC_DEBUG; 1785 apic_verbosity = APIC_DEBUG;
1722 else if (strcmp(arg, "verbose") == 0) 1786 else if (strcmp("verbose", arg) == 0)
1723 apic_verbosity = APIC_VERBOSE; 1787 apic_verbosity = APIC_VERBOSE;
1788 else {
1789 printk(KERN_WARNING "APIC Verbosity level %s not recognised"
1790 " use apic=verbose or apic=debug\n", arg);
1791 return -EINVAL;
1792 }
1724 1793
1725 return 0; 1794 return 0;
1726} 1795}
diff --git a/arch/x86/kernel/apic_64.c b/arch/x86/kernel/apic_64.c
index 446c062e831c..53898b65a6ae 100644
--- a/arch/x86/kernel/apic_64.c
+++ b/arch/x86/kernel/apic_64.c
@@ -27,6 +27,7 @@
27#include <linux/clockchips.h> 27#include <linux/clockchips.h>
28#include <linux/acpi_pmtmr.h> 28#include <linux/acpi_pmtmr.h>
29#include <linux/module.h> 29#include <linux/module.h>
30#include <linux/dmar.h>
30 31
31#include <asm/atomic.h> 32#include <asm/atomic.h>
32#include <asm/smp.h> 33#include <asm/smp.h>
@@ -39,13 +40,20 @@
39#include <asm/proto.h> 40#include <asm/proto.h>
40#include <asm/timex.h> 41#include <asm/timex.h>
41#include <asm/apic.h> 42#include <asm/apic.h>
43#include <asm/i8259.h>
42 44
43#include <mach_ipi.h> 45#include <mach_ipi.h>
44#include <mach_apic.h> 46#include <mach_apic.h>
45 47
48/* Disable local APIC timer from the kernel commandline or via dmi quirk */
46static int disable_apic_timer __cpuinitdata; 49static int disable_apic_timer __cpuinitdata;
47static int apic_calibrate_pmtmr __initdata; 50static int apic_calibrate_pmtmr __initdata;
48int disable_apic; 51int disable_apic;
52int disable_x2apic;
53int x2apic;
54
55/* x2apic enabled before OS handover */
56int x2apic_preenabled;
49 57
50/* Local APIC timer works in C2 */ 58/* Local APIC timer works in C2 */
51int local_apic_timer_c2_ok; 59int local_apic_timer_c2_ok;
@@ -73,6 +81,9 @@ static void lapic_timer_setup(enum clock_event_mode mode,
73static void lapic_timer_broadcast(cpumask_t mask); 81static void lapic_timer_broadcast(cpumask_t mask);
74static void apic_pm_activate(void); 82static void apic_pm_activate(void);
75 83
84/*
85 * The local apic timer can be used for any function which is CPU local.
86 */
76static struct clock_event_device lapic_clockevent = { 87static struct clock_event_device lapic_clockevent = {
77 .name = "lapic", 88 .name = "lapic",
78 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT 89 .features = CLOCK_EVT_FEAT_PERIODIC | CLOCK_EVT_FEAT_ONESHOT
@@ -99,11 +110,15 @@ static inline int lapic_get_version(void)
99} 110}
100 111
101/* 112/*
102 * Check, if the APIC is integrated or a seperate chip 113 * Check, if the APIC is integrated or a separate chip
103 */ 114 */
104static inline int lapic_is_integrated(void) 115static inline int lapic_is_integrated(void)
105{ 116{
117#ifdef CONFIG_X86_64
106 return 1; 118 return 1;
119#else
120 return APIC_INTEGRATED(lapic_get_version());
121#endif
107} 122}
108 123
109/* 124/*
@@ -118,13 +133,18 @@ static int modern_apic(void)
118 return lapic_get_version() >= 0x14; 133 return lapic_get_version() >= 0x14;
119} 134}
120 135
121void apic_wait_icr_idle(void) 136/*
137 * Paravirt kernels also might be using these below ops. So we still
138 * use generic apic_read()/apic_write(), which might be pointing to different
139 * ops in PARAVIRT case.
140 */
141void xapic_wait_icr_idle(void)
122{ 142{
123 while (apic_read(APIC_ICR) & APIC_ICR_BUSY) 143 while (apic_read(APIC_ICR) & APIC_ICR_BUSY)
124 cpu_relax(); 144 cpu_relax();
125} 145}
126 146
127u32 safe_apic_wait_icr_idle(void) 147u32 safe_xapic_wait_icr_idle(void)
128{ 148{
129 u32 send_status; 149 u32 send_status;
130 int timeout; 150 int timeout;
@@ -140,6 +160,68 @@ u32 safe_apic_wait_icr_idle(void)
140 return send_status; 160 return send_status;
141} 161}
142 162
163void xapic_icr_write(u32 low, u32 id)
164{
165 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(id));
166 apic_write(APIC_ICR, low);
167}
168
169u64 xapic_icr_read(void)
170{
171 u32 icr1, icr2;
172
173 icr2 = apic_read(APIC_ICR2);
174 icr1 = apic_read(APIC_ICR);
175
176 return icr1 | ((u64)icr2 << 32);
177}
178
179static struct apic_ops xapic_ops = {
180 .read = native_apic_mem_read,
181 .write = native_apic_mem_write,
182 .icr_read = xapic_icr_read,
183 .icr_write = xapic_icr_write,
184 .wait_icr_idle = xapic_wait_icr_idle,
185 .safe_wait_icr_idle = safe_xapic_wait_icr_idle,
186};
187
188struct apic_ops __read_mostly *apic_ops = &xapic_ops;
189EXPORT_SYMBOL_GPL(apic_ops);
190
191static void x2apic_wait_icr_idle(void)
192{
193 /* no need to wait for icr idle in x2apic */
194 return;
195}
196
197static u32 safe_x2apic_wait_icr_idle(void)
198{
199 /* no need to wait for icr idle in x2apic */
200 return 0;
201}
202
203void x2apic_icr_write(u32 low, u32 id)
204{
205 wrmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), ((__u64) id) << 32 | low);
206}
207
208u64 x2apic_icr_read(void)
209{
210 unsigned long val;
211
212 rdmsrl(APIC_BASE_MSR + (APIC_ICR >> 4), val);
213 return val;
214}
215
216static struct apic_ops x2apic_ops = {
217 .read = native_apic_msr_read,
218 .write = native_apic_msr_write,
219 .icr_read = x2apic_icr_read,
220 .icr_write = x2apic_icr_write,
221 .wait_icr_idle = x2apic_wait_icr_idle,
222 .safe_wait_icr_idle = safe_x2apic_wait_icr_idle,
223};
224
143/** 225/**
144 * enable_NMI_through_LVT0 - enable NMI through local vector table 0 226 * enable_NMI_through_LVT0 - enable NMI through local vector table 0
145 */ 227 */
@@ -149,6 +231,11 @@ void __cpuinit enable_NMI_through_LVT0(void)
149 231
150 /* unmask and set to NMI */ 232 /* unmask and set to NMI */
151 v = APIC_DM_NMI; 233 v = APIC_DM_NMI;
234
235 /* Level triggered for 82489DX (32bit mode) */
236 if (!lapic_is_integrated())
237 v |= APIC_LVT_LEVEL_TRIGGER;
238
152 apic_write(APIC_LVT0, v); 239 apic_write(APIC_LVT0, v);
153} 240}
154 241
@@ -157,14 +244,28 @@ void __cpuinit enable_NMI_through_LVT0(void)
157 */ 244 */
158int lapic_get_maxlvt(void) 245int lapic_get_maxlvt(void)
159{ 246{
160 unsigned int v, maxlvt; 247 unsigned int v;
161 248
162 v = apic_read(APIC_LVR); 249 v = apic_read(APIC_LVR);
163 maxlvt = GET_APIC_MAXLVT(v); 250 /*
164 return maxlvt; 251 * - we always have APIC integrated on 64bit mode
252 * - 82489DXs do not report # of LVT entries
253 */
254 return APIC_INTEGRATED(GET_APIC_VERSION(v)) ? GET_APIC_MAXLVT(v) : 2;
165} 255}
166 256
167/* 257/*
258 * Local APIC timer
259 */
260
261/* Clock divisor */
262#ifdef CONFG_X86_64
263#define APIC_DIVISOR 1
264#else
265#define APIC_DIVISOR 16
266#endif
267
268/*
168 * This function sets up the local APIC timer, with a timeout of 269 * This function sets up the local APIC timer, with a timeout of
169 * 'clocks' APIC bus clock. During calibration we actually call 270 * 'clocks' APIC bus clock. During calibration we actually call
170 * this function twice on the boot CPU, once with a bogus timeout 271 * this function twice on the boot CPU, once with a bogus timeout
@@ -174,7 +275,6 @@ int lapic_get_maxlvt(void)
174 * We do reads before writes even if unnecessary, to get around the 275 * We do reads before writes even if unnecessary, to get around the
175 * P5 APIC double write bug. 276 * P5 APIC double write bug.
176 */ 277 */
177
178static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen) 278static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
179{ 279{
180 unsigned int lvtt_value, tmp_value; 280 unsigned int lvtt_value, tmp_value;
@@ -182,6 +282,9 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
182 lvtt_value = LOCAL_TIMER_VECTOR; 282 lvtt_value = LOCAL_TIMER_VECTOR;
183 if (!oneshot) 283 if (!oneshot)
184 lvtt_value |= APIC_LVT_TIMER_PERIODIC; 284 lvtt_value |= APIC_LVT_TIMER_PERIODIC;
285 if (!lapic_is_integrated())
286 lvtt_value |= SET_APIC_TIMER_BASE(APIC_TIMER_BASE_DIV);
287
185 if (!irqen) 288 if (!irqen)
186 lvtt_value |= APIC_LVT_MASKED; 289 lvtt_value |= APIC_LVT_MASKED;
187 290
@@ -191,12 +294,12 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
191 * Divide PICLK by 16 294 * Divide PICLK by 16
192 */ 295 */
193 tmp_value = apic_read(APIC_TDCR); 296 tmp_value = apic_read(APIC_TDCR);
194 apic_write(APIC_TDCR, (tmp_value 297 apic_write(APIC_TDCR,
195 & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) 298 (tmp_value & ~(APIC_TDR_DIV_1 | APIC_TDR_DIV_TMBASE)) |
196 | APIC_TDR_DIV_16); 299 APIC_TDR_DIV_16);
197 300
198 if (!oneshot) 301 if (!oneshot)
199 apic_write(APIC_TMICT, clocks); 302 apic_write(APIC_TMICT, clocks / APIC_DIVISOR);
200} 303}
201 304
202/* 305/*
@@ -366,7 +469,7 @@ static int __init calibrate_APIC_clock(void)
366 lapic_clockevent.min_delta_ns = 469 lapic_clockevent.min_delta_ns =
367 clockevent_delta2ns(0xF, &lapic_clockevent); 470 clockevent_delta2ns(0xF, &lapic_clockevent);
368 471
369 calibration_result = result / HZ; 472 calibration_result = (result * APIC_DIVISOR) / HZ;
370 473
371 /* 474 /*
372 * Do a sanity check on the APIC calibration result 475 * Do a sanity check on the APIC calibration result
@@ -388,10 +491,10 @@ static int __init calibrate_APIC_clock(void)
388void __init setup_boot_APIC_clock(void) 491void __init setup_boot_APIC_clock(void)
389{ 492{
390 /* 493 /*
391 * The local apic timer can be disabled via the kernel commandline. 494 * The local apic timer can be disabled via the kernel
392 * Register the lapic timer as a dummy clock event source on SMP 495 * commandline or from the CPU detection code. Register the lapic
393 * systems, so the broadcast mechanism is used. On UP systems simply 496 * timer as a dummy clock event source on SMP systems, so the
394 * ignore it. 497 * broadcast mechanism is used. On UP systems simply ignore it.
395 */ 498 */
396 if (disable_apic_timer) { 499 if (disable_apic_timer) {
397 printk(KERN_INFO "Disabling APIC timer\n"); 500 printk(KERN_INFO "Disabling APIC timer\n");
@@ -403,7 +506,9 @@ void __init setup_boot_APIC_clock(void)
403 return; 506 return;
404 } 507 }
405 508
406 printk(KERN_INFO "Using local APIC timer interrupts.\n"); 509 apic_printk(APIC_VERBOSE, "Using local APIC timer interrupts.\n"
510 "calibrating APIC timer ...\n");
511
407 if (calibrate_APIC_clock()) { 512 if (calibrate_APIC_clock()) {
408 /* No broadcast on UP ! */ 513 /* No broadcast on UP ! */
409 if (num_possible_cpus() > 1) 514 if (num_possible_cpus() > 1)
@@ -422,6 +527,7 @@ void __init setup_boot_APIC_clock(void)
422 printk(KERN_WARNING "APIC timer registered as dummy," 527 printk(KERN_WARNING "APIC timer registered as dummy,"
423 " due to nmi_watchdog=%d!\n", nmi_watchdog); 528 " due to nmi_watchdog=%d!\n", nmi_watchdog);
424 529
530 /* Setup the lapic or request the broadcast */
425 setup_APIC_timer(); 531 setup_APIC_timer();
426} 532}
427 533
@@ -460,7 +566,11 @@ static void local_apic_timer_interrupt(void)
460 /* 566 /*
461 * the NMI deadlock-detector uses this. 567 * the NMI deadlock-detector uses this.
462 */ 568 */
569#ifdef CONFIG_X86_64
463 add_pda(apic_timer_irqs, 1); 570 add_pda(apic_timer_irqs, 1);
571#else
572 per_cpu(irq_stat, cpu).apic_timer_irqs++;
573#endif
464 574
465 evt->event_handler(evt); 575 evt->event_handler(evt);
466} 576}
@@ -491,6 +601,7 @@ void smp_apic_timer_interrupt(struct pt_regs *regs)
491 irq_enter(); 601 irq_enter();
492 local_apic_timer_interrupt(); 602 local_apic_timer_interrupt();
493 irq_exit(); 603 irq_exit();
604
494 set_irq_regs(old_regs); 605 set_irq_regs(old_regs);
495} 606}
496 607
@@ -544,6 +655,13 @@ void clear_local_APIC(void)
544 apic_write(APIC_LVTPC, v | APIC_LVT_MASKED); 655 apic_write(APIC_LVTPC, v | APIC_LVT_MASKED);
545 } 656 }
546 657
658 /* lets not touch this if we didn't frob it */
659#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(X86_MCE_INTEL)
660 if (maxlvt >= 5) {
661 v = apic_read(APIC_LVTTHMR);
662 apic_write(APIC_LVTTHMR, v | APIC_LVT_MASKED);
663 }
664#endif
547 /* 665 /*
548 * Clean APIC state for other OSs: 666 * Clean APIC state for other OSs:
549 */ 667 */
@@ -554,8 +672,14 @@ void clear_local_APIC(void)
554 apic_write(APIC_LVTERR, APIC_LVT_MASKED); 672 apic_write(APIC_LVTERR, APIC_LVT_MASKED);
555 if (maxlvt >= 4) 673 if (maxlvt >= 4)
556 apic_write(APIC_LVTPC, APIC_LVT_MASKED); 674 apic_write(APIC_LVTPC, APIC_LVT_MASKED);
557 apic_write(APIC_ESR, 0); 675
558 apic_read(APIC_ESR); 676 /* Integrated APIC (!82489DX) ? */
677 if (lapic_is_integrated()) {
678 if (maxlvt > 3)
679 /* Clear ESR due to Pentium errata 3AP and 11AP */
680 apic_write(APIC_ESR, 0);
681 apic_read(APIC_ESR);
682 }
559} 683}
560 684
561/** 685/**
@@ -574,8 +698,28 @@ void disable_local_APIC(void)
574 value = apic_read(APIC_SPIV); 698 value = apic_read(APIC_SPIV);
575 value &= ~APIC_SPIV_APIC_ENABLED; 699 value &= ~APIC_SPIV_APIC_ENABLED;
576 apic_write(APIC_SPIV, value); 700 apic_write(APIC_SPIV, value);
701
702#ifdef CONFIG_X86_32
703 /*
704 * When LAPIC was disabled by the BIOS and enabled by the kernel,
705 * restore the disabled state.
706 */
707 if (enabled_via_apicbase) {
708 unsigned int l, h;
709
710 rdmsr(MSR_IA32_APICBASE, l, h);
711 l &= ~MSR_IA32_APICBASE_ENABLE;
712 wrmsr(MSR_IA32_APICBASE, l, h);
713 }
714#endif
577} 715}
578 716
717/*
718 * If Linux enabled the LAPIC against the BIOS default disable it down before
719 * re-entering the BIOS on shutdown. Otherwise the BIOS may get confused and
720 * not power-off. Additionally clear all LVT entries before disable_local_APIC
721 * for the case where Linux didn't enable the LAPIC.
722 */
579void lapic_shutdown(void) 723void lapic_shutdown(void)
580{ 724{
581 unsigned long flags; 725 unsigned long flags;
@@ -585,7 +729,13 @@ void lapic_shutdown(void)
585 729
586 local_irq_save(flags); 730 local_irq_save(flags);
587 731
588 disable_local_APIC(); 732#ifdef CONFIG_X86_32
733 if (!enabled_via_apicbase)
734 clear_local_APIC();
735 else
736#endif
737 disable_local_APIC();
738
589 739
590 local_irq_restore(flags); 740 local_irq_restore(flags);
591} 741}
@@ -629,10 +779,10 @@ int __init verify_local_APIC(void)
629 /* 779 /*
630 * The ID register is read/write in a real APIC. 780 * The ID register is read/write in a real APIC.
631 */ 781 */
632 reg0 = read_apic_id(); 782 reg0 = apic_read(APIC_ID);
633 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0); 783 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg0);
634 apic_write(APIC_ID, reg0 ^ APIC_ID_MASK); 784 apic_write(APIC_ID, reg0 ^ APIC_ID_MASK);
635 reg1 = read_apic_id(); 785 reg1 = apic_read(APIC_ID);
636 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1); 786 apic_printk(APIC_DEBUG, "Getting ID: %x\n", reg1);
637 apic_write(APIC_ID, reg0); 787 apic_write(APIC_ID, reg0);
638 if (reg1 != (reg0 ^ APIC_ID_MASK)) 788 if (reg1 != (reg0 ^ APIC_ID_MASK))
@@ -656,8 +806,11 @@ int __init verify_local_APIC(void)
656 */ 806 */
657void __init sync_Arb_IDs(void) 807void __init sync_Arb_IDs(void)
658{ 808{
659 /* Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 */ 809 /*
660 if (modern_apic()) 810 * Unsupported on P4 - see Intel Dev. Manual Vol. 3, Ch. 8.6.1 And not
811 * needed on AMD.
812 */
813 if (modern_apic() || boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
661 return; 814 return;
662 815
663 /* 816 /*
@@ -666,8 +819,8 @@ void __init sync_Arb_IDs(void)
666 apic_wait_icr_idle(); 819 apic_wait_icr_idle();
667 820
668 apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n"); 821 apic_printk(APIC_DEBUG, "Synchronizing Arb IDs.\n");
669 apic_write(APIC_ICR, APIC_DEST_ALLINC | APIC_INT_LEVELTRIG 822 apic_write(APIC_ICR, APIC_DEST_ALLINC |
670 | APIC_DM_INIT); 823 APIC_INT_LEVELTRIG | APIC_DM_INIT);
671} 824}
672 825
673/* 826/*
@@ -684,8 +837,6 @@ void __init init_bsp_APIC(void)
684 if (smp_found_config || !cpu_has_apic) 837 if (smp_found_config || !cpu_has_apic)
685 return; 838 return;
686 839
687 value = apic_read(APIC_LVR);
688
689 /* 840 /*
690 * Do not trust the local APIC being empty at bootup. 841 * Do not trust the local APIC being empty at bootup.
691 */ 842 */
@@ -697,7 +848,15 @@ void __init init_bsp_APIC(void)
697 value = apic_read(APIC_SPIV); 848 value = apic_read(APIC_SPIV);
698 value &= ~APIC_VECTOR_MASK; 849 value &= ~APIC_VECTOR_MASK;
699 value |= APIC_SPIV_APIC_ENABLED; 850 value |= APIC_SPIV_APIC_ENABLED;
700 value |= APIC_SPIV_FOCUS_DISABLED; 851
852#ifdef CONFIG_X86_32
853 /* This bit is reserved on P4/Xeon and should be cleared */
854 if ((boot_cpu_data.x86_vendor == X86_VENDOR_INTEL) &&
855 (boot_cpu_data.x86 == 15))
856 value &= ~APIC_SPIV_FOCUS_DISABLED;
857 else
858#endif
859 value |= APIC_SPIV_FOCUS_DISABLED;
701 value |= SPURIOUS_APIC_VECTOR; 860 value |= SPURIOUS_APIC_VECTOR;
702 apic_write(APIC_SPIV, value); 861 apic_write(APIC_SPIV, value);
703 862
@@ -706,9 +865,50 @@ void __init init_bsp_APIC(void)
706 */ 865 */
707 apic_write(APIC_LVT0, APIC_DM_EXTINT); 866 apic_write(APIC_LVT0, APIC_DM_EXTINT);
708 value = APIC_DM_NMI; 867 value = APIC_DM_NMI;
868 if (!lapic_is_integrated()) /* 82489DX */
869 value |= APIC_LVT_LEVEL_TRIGGER;
709 apic_write(APIC_LVT1, value); 870 apic_write(APIC_LVT1, value);
710} 871}
711 872
873static void __cpuinit lapic_setup_esr(void)
874{
875 unsigned long oldvalue, value, maxlvt;
876 if (lapic_is_integrated() && !esr_disable) {
877 if (esr_disable) {
878 /*
879 * Something untraceable is creating bad interrupts on
880 * secondary quads ... for the moment, just leave the
881 * ESR disabled - we can't do anything useful with the
882 * errors anyway - mbligh
883 */
884 printk(KERN_INFO "Leaving ESR disabled.\n");
885 return;
886 }
887 /* !82489DX */
888 maxlvt = lapic_get_maxlvt();
889 if (maxlvt > 3) /* Due to the Pentium erratum 3AP. */
890 apic_write(APIC_ESR, 0);
891 oldvalue = apic_read(APIC_ESR);
892
893 /* enables sending errors */
894 value = ERROR_APIC_VECTOR;
895 apic_write(APIC_LVTERR, value);
896 /*
897 * spec says clear errors after enabling vector.
898 */
899 if (maxlvt > 3)
900 apic_write(APIC_ESR, 0);
901 value = apic_read(APIC_ESR);
902 if (value != oldvalue)
903 apic_printk(APIC_VERBOSE, "ESR value before enabling "
904 "vector: 0x%08lx after: 0x%08lx\n",
905 oldvalue, value);
906 } else {
907 printk(KERN_INFO "No ESR for 82489DX.\n");
908 }
909}
910
911
712/** 912/**
713 * setup_local_APIC - setup the local APIC 913 * setup_local_APIC - setup the local APIC
714 */ 914 */
@@ -814,25 +1014,143 @@ void __cpuinit setup_local_APIC(void)
814 preempt_enable(); 1014 preempt_enable();
815} 1015}
816 1016
817static void __cpuinit lapic_setup_esr(void)
818{
819 unsigned maxlvt = lapic_get_maxlvt();
820
821 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR);
822 /*
823 * spec says clear errors after enabling vector.
824 */
825 if (maxlvt > 3)
826 apic_write(APIC_ESR, 0);
827}
828
829void __cpuinit end_local_APIC_setup(void) 1017void __cpuinit end_local_APIC_setup(void)
830{ 1018{
831 lapic_setup_esr(); 1019 lapic_setup_esr();
1020
1021#ifdef CONFIG_X86_32
1022 {
1023 unsigned int value;
1024 /* Disable the local apic timer */
1025 value = apic_read(APIC_LVTT);
1026 value |= (APIC_LVT_MASKED | LOCAL_TIMER_VECTOR);
1027 apic_write(APIC_LVTT, value);
1028 }
1029#endif
1030
832 setup_apic_nmi_watchdog(NULL); 1031 setup_apic_nmi_watchdog(NULL);
833 apic_pm_activate(); 1032 apic_pm_activate();
834} 1033}
835 1034
1035void check_x2apic(void)
1036{
1037 int msr, msr2;
1038
1039 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1040
1041 if (msr & X2APIC_ENABLE) {
1042 printk("x2apic enabled by BIOS, switching to x2apic ops\n");
1043 x2apic_preenabled = x2apic = 1;
1044 apic_ops = &x2apic_ops;
1045 }
1046}
1047
1048void enable_x2apic(void)
1049{
1050 int msr, msr2;
1051
1052 rdmsr(MSR_IA32_APICBASE, msr, msr2);
1053 if (!(msr & X2APIC_ENABLE)) {
1054 printk("Enabling x2apic\n");
1055 wrmsr(MSR_IA32_APICBASE, msr | X2APIC_ENABLE, 0);
1056 }
1057}
1058
1059void enable_IR_x2apic(void)
1060{
1061#ifdef CONFIG_INTR_REMAP
1062 int ret;
1063 unsigned long flags;
1064
1065 if (!cpu_has_x2apic)
1066 return;
1067
1068 if (!x2apic_preenabled && disable_x2apic) {
1069 printk(KERN_INFO
1070 "Skipped enabling x2apic and Interrupt-remapping "
1071 "because of nox2apic\n");
1072 return;
1073 }
1074
1075 if (x2apic_preenabled && disable_x2apic)
1076 panic("Bios already enabled x2apic, can't enforce nox2apic");
1077
1078 if (!x2apic_preenabled && skip_ioapic_setup) {
1079 printk(KERN_INFO
1080 "Skipped enabling x2apic and Interrupt-remapping "
1081 "because of skipping io-apic setup\n");
1082 return;
1083 }
1084
1085 ret = dmar_table_init();
1086 if (ret) {
1087 printk(KERN_INFO
1088 "dmar_table_init() failed with %d:\n", ret);
1089
1090 if (x2apic_preenabled)
1091 panic("x2apic enabled by bios. But IR enabling failed");
1092 else
1093 printk(KERN_INFO
1094 "Not enabling x2apic,Intr-remapping\n");
1095 return;
1096 }
1097
1098 local_irq_save(flags);
1099 mask_8259A();
1100 save_mask_IO_APIC_setup();
1101
1102 ret = enable_intr_remapping(1);
1103
1104 if (ret && x2apic_preenabled) {
1105 local_irq_restore(flags);
1106 panic("x2apic enabled by bios. But IR enabling failed");
1107 }
1108
1109 if (ret)
1110 goto end;
1111
1112 if (!x2apic) {
1113 x2apic = 1;
1114 apic_ops = &x2apic_ops;
1115 enable_x2apic();
1116 }
1117end:
1118 if (ret)
1119 /*
1120 * IR enabling failed
1121 */
1122 restore_IO_APIC_setup();
1123 else
1124 reinit_intr_remapped_IO_APIC(x2apic_preenabled);
1125
1126 unmask_8259A();
1127 local_irq_restore(flags);
1128
1129 if (!ret) {
1130 if (!x2apic_preenabled)
1131 printk(KERN_INFO
1132 "Enabled x2apic and interrupt-remapping\n");
1133 else
1134 printk(KERN_INFO
1135 "Enabled Interrupt-remapping\n");
1136 } else
1137 printk(KERN_ERR
1138 "Failed to enable Interrupt-remapping and x2apic\n");
1139#else
1140 if (!cpu_has_x2apic)
1141 return;
1142
1143 if (x2apic_preenabled)
1144 panic("x2apic enabled prior OS handover,"
1145 " enable CONFIG_INTR_REMAP");
1146
1147 printk(KERN_INFO "Enable CONFIG_INTR_REMAP for enabling intr-remapping "
1148 " and x2apic\n");
1149#endif
1150
1151 return;
1152}
1153
836/* 1154/*
837 * Detect and enable local APICs on non-SMP boards. 1155 * Detect and enable local APICs on non-SMP boards.
838 * Original code written by Keir Fraser. 1156 * Original code written by Keir Fraser.
@@ -872,7 +1190,7 @@ void __init early_init_lapic_mapping(void)
872 * Fetch the APIC ID of the BSP in case we have a 1190 * Fetch the APIC ID of the BSP in case we have a
873 * default configuration (or the MP table is broken). 1191 * default configuration (or the MP table is broken).
874 */ 1192 */
875 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1193 boot_cpu_physical_apicid = read_apic_id();
876} 1194}
877 1195
878/** 1196/**
@@ -880,6 +1198,11 @@ void __init early_init_lapic_mapping(void)
880 */ 1198 */
881void __init init_apic_mappings(void) 1199void __init init_apic_mappings(void)
882{ 1200{
1201 if (x2apic) {
1202 boot_cpu_physical_apicid = read_apic_id();
1203 return;
1204 }
1205
883 /* 1206 /*
884 * If no local APIC can be found then set up a fake all 1207 * If no local APIC can be found then set up a fake all
885 * zeroes page to simulate the local APIC and another 1208 * zeroes page to simulate the local APIC and another
@@ -899,13 +1222,15 @@ void __init init_apic_mappings(void)
899 * Fetch the APIC ID of the BSP in case we have a 1222 * Fetch the APIC ID of the BSP in case we have a
900 * default configuration (or the MP table is broken). 1223 * default configuration (or the MP table is broken).
901 */ 1224 */
902 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 1225 boot_cpu_physical_apicid = read_apic_id();
903} 1226}
904 1227
905/* 1228/*
906 * This initializes the IO-APIC and APIC hardware if this is 1229 * This initializes the IO-APIC and APIC hardware if this is
907 * a UP kernel. 1230 * a UP kernel.
908 */ 1231 */
1232int apic_version[MAX_APICS];
1233
909int __init APIC_init_uniprocessor(void) 1234int __init APIC_init_uniprocessor(void)
910{ 1235{
911 if (disable_apic) { 1236 if (disable_apic) {
@@ -918,6 +1243,9 @@ int __init APIC_init_uniprocessor(void)
918 return -1; 1243 return -1;
919 } 1244 }
920 1245
1246 enable_IR_x2apic();
1247 setup_apic_routing();
1248
921 verify_local_APIC(); 1249 verify_local_APIC();
922 1250
923 connect_bsp_APIC(); 1251 connect_bsp_APIC();
@@ -1004,17 +1332,57 @@ asmlinkage void smp_error_interrupt(void)
1004} 1332}
1005 1333
1006/** 1334/**
1007 * * connect_bsp_APIC - attach the APIC to the interrupt system 1335 * connect_bsp_APIC - attach the APIC to the interrupt system
1008 * */ 1336 */
1009void __init connect_bsp_APIC(void) 1337void __init connect_bsp_APIC(void)
1010{ 1338{
1339#ifdef CONFIG_X86_32
1340 if (pic_mode) {
1341 /*
1342 * Do not trust the local APIC being empty at bootup.
1343 */
1344 clear_local_APIC();
1345 /*
1346 * PIC mode, enable APIC mode in the IMCR, i.e. connect BSP's
1347 * local APIC to INT and NMI lines.
1348 */
1349 apic_printk(APIC_VERBOSE, "leaving PIC mode, "
1350 "enabling APIC mode.\n");
1351 outb(0x70, 0x22);
1352 outb(0x01, 0x23);
1353 }
1354#endif
1011 enable_apic_mode(); 1355 enable_apic_mode();
1012} 1356}
1013 1357
1358/**
1359 * disconnect_bsp_APIC - detach the APIC from the interrupt system
1360 * @virt_wire_setup: indicates, whether virtual wire mode is selected
1361 *
1362 * Virtual wire mode is necessary to deliver legacy interrupts even when the
1363 * APIC is disabled.
1364 */
1014void disconnect_bsp_APIC(int virt_wire_setup) 1365void disconnect_bsp_APIC(int virt_wire_setup)
1015{ 1366{
1367 unsigned int value;
1368
1369#ifdef CONFIG_X86_32
1370 if (pic_mode) {
1371 /*
1372 * Put the board back into PIC mode (has an effect only on
1373 * certain older boards). Note that APIC interrupts, including
1374 * IPIs, won't work beyond this point! The only exception are
1375 * INIT IPIs.
1376 */
1377 apic_printk(APIC_VERBOSE, "disabling APIC mode, "
1378 "entering PIC mode.\n");
1379 outb(0x70, 0x22);
1380 outb(0x00, 0x23);
1381 return;
1382 }
1383#endif
1384
1016 /* Go back to Virtual Wire compatibility mode */ 1385 /* Go back to Virtual Wire compatibility mode */
1017 unsigned long value;
1018 1386
1019 /* For the spurious interrupt use vector F, and enable it */ 1387 /* For the spurious interrupt use vector F, and enable it */
1020 value = apic_read(APIC_SPIV); 1388 value = apic_read(APIC_SPIV);
@@ -1040,7 +1408,10 @@ void disconnect_bsp_APIC(int virt_wire_setup)
1040 apic_write(APIC_LVT0, APIC_LVT_MASKED); 1408 apic_write(APIC_LVT0, APIC_LVT_MASKED);
1041 } 1409 }
1042 1410
1043 /* For LVT1 make it edge triggered, active high, nmi and enabled */ 1411 /*
1412 * For LVT1 make it edge triggered, active high,
1413 * nmi and enabled
1414 */
1044 value = apic_read(APIC_LVT1); 1415 value = apic_read(APIC_LVT1);
1045 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING | 1416 value &= ~(APIC_MODE_MASK | APIC_SEND_PENDING |
1046 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR | 1417 APIC_INPUT_POLARITY | APIC_LVT_REMOTE_IRR |
@@ -1055,9 +1426,20 @@ void __cpuinit generic_processor_info(int apicid, int version)
1055 int cpu; 1426 int cpu;
1056 cpumask_t tmp_map; 1427 cpumask_t tmp_map;
1057 1428
1429 /*
1430 * Validate version
1431 */
1432 if (version == 0x0) {
1433 printk(KERN_WARNING "BIOS bug, APIC version is 0 for CPU#%d! "
1434 "fixing up to 0x10. (tell your hw vendor)\n",
1435 version);
1436 version = 0x10;
1437 }
1438 apic_version[apicid] = version;
1439
1058 if (num_processors >= NR_CPUS) { 1440 if (num_processors >= NR_CPUS) {
1059 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached." 1441 printk(KERN_WARNING "WARNING: NR_CPUS limit of %i reached."
1060 " Processor ignored.\n", NR_CPUS); 1442 " Processor ignored.\n", NR_CPUS);
1061 return; 1443 return;
1062 } 1444 }
1063 1445
@@ -1077,6 +1459,29 @@ void __cpuinit generic_processor_info(int apicid, int version)
1077 if (apicid > max_physical_apicid) 1459 if (apicid > max_physical_apicid)
1078 max_physical_apicid = apicid; 1460 max_physical_apicid = apicid;
1079 1461
1462#ifdef CONFIG_X86_32
1463 /*
1464 * Would be preferable to switch to bigsmp when CONFIG_HOTPLUG_CPU=y
1465 * but we need to work other dependencies like SMP_SUSPEND etc
1466 * before this can be done without some confusion.
1467 * if (CPU_HOTPLUG_ENABLED || num_processors > 8)
1468 * - Ashok Raj <ashok.raj@intel.com>
1469 */
1470 if (max_physical_apicid >= 8) {
1471 switch (boot_cpu_data.x86_vendor) {
1472 case X86_VENDOR_INTEL:
1473 if (!APIC_XAPIC(version)) {
1474 def_to_bigsmp = 0;
1475 break;
1476 }
1477 /* If P4 and above fall through */
1478 case X86_VENDOR_AMD:
1479 def_to_bigsmp = 1;
1480 }
1481 }
1482#endif
1483
1484#if defined(CONFIG_X86_SMP) || defined(CONFIG_X86_64)
1080 /* are we being called early in kernel startup? */ 1485 /* are we being called early in kernel startup? */
1081 if (early_per_cpu_ptr(x86_cpu_to_apicid)) { 1486 if (early_per_cpu_ptr(x86_cpu_to_apicid)) {
1082 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid); 1487 u16 *cpu_to_apicid = early_per_cpu_ptr(x86_cpu_to_apicid);
@@ -1088,20 +1493,28 @@ void __cpuinit generic_processor_info(int apicid, int version)
1088 per_cpu(x86_cpu_to_apicid, cpu) = apicid; 1493 per_cpu(x86_cpu_to_apicid, cpu) = apicid;
1089 per_cpu(x86_bios_cpu_apicid, cpu) = apicid; 1494 per_cpu(x86_bios_cpu_apicid, cpu) = apicid;
1090 } 1495 }
1496#endif
1091 1497
1092 cpu_set(cpu, cpu_possible_map); 1498 cpu_set(cpu, cpu_possible_map);
1093 cpu_set(cpu, cpu_present_map); 1499 cpu_set(cpu, cpu_present_map);
1094} 1500}
1095 1501
1502int hard_smp_processor_id(void)
1503{
1504 return read_apic_id();
1505}
1506
1096/* 1507/*
1097 * Power management 1508 * Power management
1098 */ 1509 */
1099#ifdef CONFIG_PM 1510#ifdef CONFIG_PM
1100 1511
1101static struct { 1512static struct {
1102 /* 'active' is true if the local APIC was enabled by us and 1513 /*
1103 not the BIOS; this signifies that we are also responsible 1514 * 'active' is true if the local APIC was enabled by us and
1104 for disabling it before entering apm/acpi suspend */ 1515 * not the BIOS; this signifies that we are also responsible
1516 * for disabling it before entering apm/acpi suspend
1517 */
1105 int active; 1518 int active;
1106 /* r/w apic fields */ 1519 /* r/w apic fields */
1107 unsigned int apic_id; 1520 unsigned int apic_id;
@@ -1129,7 +1542,7 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
1129 1542
1130 maxlvt = lapic_get_maxlvt(); 1543 maxlvt = lapic_get_maxlvt();
1131 1544
1132 apic_pm_state.apic_id = read_apic_id(); 1545 apic_pm_state.apic_id = apic_read(APIC_ID);
1133 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI); 1546 apic_pm_state.apic_taskpri = apic_read(APIC_TASKPRI);
1134 apic_pm_state.apic_ldr = apic_read(APIC_LDR); 1547 apic_pm_state.apic_ldr = apic_read(APIC_LDR);
1135 apic_pm_state.apic_dfr = apic_read(APIC_DFR); 1548 apic_pm_state.apic_dfr = apic_read(APIC_DFR);
@@ -1142,10 +1555,11 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state)
1142 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR); 1555 apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
1143 apic_pm_state.apic_tmict = apic_read(APIC_TMICT); 1556 apic_pm_state.apic_tmict = apic_read(APIC_TMICT);
1144 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR); 1557 apic_pm_state.apic_tdcr = apic_read(APIC_TDCR);
1145#ifdef CONFIG_X86_MCE_INTEL 1558#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
1146 if (maxlvt >= 5) 1559 if (maxlvt >= 5)
1147 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR); 1560 apic_pm_state.apic_thmr = apic_read(APIC_LVTTHMR);
1148#endif 1561#endif
1562
1149 local_irq_save(flags); 1563 local_irq_save(flags);
1150 disable_local_APIC(); 1564 disable_local_APIC();
1151 local_irq_restore(flags); 1565 local_irq_restore(flags);
@@ -1164,10 +1578,25 @@ static int lapic_resume(struct sys_device *dev)
1164 maxlvt = lapic_get_maxlvt(); 1578 maxlvt = lapic_get_maxlvt();
1165 1579
1166 local_irq_save(flags); 1580 local_irq_save(flags);
1167 rdmsr(MSR_IA32_APICBASE, l, h); 1581
1168 l &= ~MSR_IA32_APICBASE_BASE; 1582#ifdef CONFIG_X86_64
1169 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr; 1583 if (x2apic)
1170 wrmsr(MSR_IA32_APICBASE, l, h); 1584 enable_x2apic();
1585 else
1586#endif
1587 {
1588 /*
1589 * Make sure the APICBASE points to the right address
1590 *
1591 * FIXME! This will be wrong if we ever support suspend on
1592 * SMP! We'll need to do this as part of the CPU restore!
1593 */
1594 rdmsr(MSR_IA32_APICBASE, l, h);
1595 l &= ~MSR_IA32_APICBASE_BASE;
1596 l |= MSR_IA32_APICBASE_ENABLE | mp_lapic_addr;
1597 wrmsr(MSR_IA32_APICBASE, l, h);
1598 }
1599
1171 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); 1600 apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED);
1172 apic_write(APIC_ID, apic_pm_state.apic_id); 1601 apic_write(APIC_ID, apic_pm_state.apic_id);
1173 apic_write(APIC_DFR, apic_pm_state.apic_dfr); 1602 apic_write(APIC_DFR, apic_pm_state.apic_dfr);
@@ -1176,7 +1605,7 @@ static int lapic_resume(struct sys_device *dev)
1176 apic_write(APIC_SPIV, apic_pm_state.apic_spiv); 1605 apic_write(APIC_SPIV, apic_pm_state.apic_spiv);
1177 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0); 1606 apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
1178 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1); 1607 apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
1179#ifdef CONFIG_X86_MCE_INTEL 1608#if defined(CONFIG_X86_MCE_P4THERMAL) || defined(CONFIG_X86_MCE_INTEL)
1180 if (maxlvt >= 5) 1609 if (maxlvt >= 5)
1181 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr); 1610 apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
1182#endif 1611#endif
@@ -1190,10 +1619,17 @@ static int lapic_resume(struct sys_device *dev)
1190 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr); 1619 apic_write(APIC_LVTERR, apic_pm_state.apic_lvterr);
1191 apic_write(APIC_ESR, 0); 1620 apic_write(APIC_ESR, 0);
1192 apic_read(APIC_ESR); 1621 apic_read(APIC_ESR);
1622
1193 local_irq_restore(flags); 1623 local_irq_restore(flags);
1624
1194 return 0; 1625 return 0;
1195} 1626}
1196 1627
1628/*
1629 * This device has no shutdown method - fully functioning local APICs
1630 * are needed on every CPU up until machine_halt/restart/poweroff.
1631 */
1632
1197static struct sysdev_class lapic_sysclass = { 1633static struct sysdev_class lapic_sysclass = {
1198 .name = "lapic", 1634 .name = "lapic",
1199 .resume = lapic_resume, 1635 .resume = lapic_resume,
@@ -1307,31 +1743,19 @@ __cpuinit int apic_is_clustered_box(void)
1307 return (clusters > 2); 1743 return (clusters > 2);
1308} 1744}
1309 1745
1310/* 1746static __init int setup_nox2apic(char *str)
1311 * APIC command line parameters
1312 */
1313static int __init apic_set_verbosity(char *str)
1314{ 1747{
1315 if (str == NULL) { 1748 disable_x2apic = 1;
1316 skip_ioapic_setup = 0; 1749 clear_cpu_cap(&boot_cpu_data, X86_FEATURE_X2APIC);
1317 ioapic_force = 1;
1318 return 0;
1319 }
1320 if (strcmp("debug", str) == 0)
1321 apic_verbosity = APIC_DEBUG;
1322 else if (strcmp("verbose", str) == 0)
1323 apic_verbosity = APIC_VERBOSE;
1324 else {
1325 printk(KERN_WARNING "APIC Verbosity level %s not recognised"
1326 " use apic=verbose or apic=debug\n", str);
1327 return -EINVAL;
1328 }
1329
1330 return 0; 1750 return 0;
1331} 1751}
1332early_param("apic", apic_set_verbosity); 1752early_param("nox2apic", setup_nox2apic);
1753
1333 1754
1334static __init int setup_disableapic(char *str) 1755/*
1756 * APIC command line parameters
1757 */
1758static int __init setup_disableapic(char *arg)
1335{ 1759{
1336 disable_apic = 1; 1760 disable_apic = 1;
1337 setup_clear_cpu_cap(X86_FEATURE_APIC); 1761 setup_clear_cpu_cap(X86_FEATURE_APIC);
@@ -1340,9 +1764,9 @@ static __init int setup_disableapic(char *str)
1340early_param("disableapic", setup_disableapic); 1764early_param("disableapic", setup_disableapic);
1341 1765
1342/* same as disableapic, for compatibility */ 1766/* same as disableapic, for compatibility */
1343static __init int setup_nolapic(char *str) 1767static int __init setup_nolapic(char *arg)
1344{ 1768{
1345 return setup_disableapic(str); 1769 return setup_disableapic(arg);
1346} 1770}
1347early_param("nolapic", setup_nolapic); 1771early_param("nolapic", setup_nolapic);
1348 1772
@@ -1353,14 +1777,19 @@ static int __init parse_lapic_timer_c2_ok(char *arg)
1353} 1777}
1354early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok); 1778early_param("lapic_timer_c2_ok", parse_lapic_timer_c2_ok);
1355 1779
1356static __init int setup_noapictimer(char *str) 1780static int __init parse_disable_apic_timer(char *arg)
1357{ 1781{
1358 if (str[0] != ' ' && str[0] != 0)
1359 return 0;
1360 disable_apic_timer = 1; 1782 disable_apic_timer = 1;
1361 return 1; 1783 return 0;
1362} 1784}
1363__setup("noapictimer", setup_noapictimer); 1785early_param("noapictimer", parse_disable_apic_timer);
1786
1787static int __init parse_nolapic_timer(char *arg)
1788{
1789 disable_apic_timer = 1;
1790 return 0;
1791}
1792early_param("nolapic_timer", parse_nolapic_timer);
1364 1793
1365static __init int setup_apicpmtimer(char *s) 1794static __init int setup_apicpmtimer(char *s)
1366{ 1795{
@@ -1370,6 +1799,31 @@ static __init int setup_apicpmtimer(char *s)
1370} 1799}
1371__setup("apicpmtimer", setup_apicpmtimer); 1800__setup("apicpmtimer", setup_apicpmtimer);
1372 1801
1802static int __init apic_set_verbosity(char *arg)
1803{
1804 if (!arg) {
1805#ifdef CONFIG_X86_64
1806 skip_ioapic_setup = 0;
1807 ioapic_force = 1;
1808 return 0;
1809#endif
1810 return -EINVAL;
1811 }
1812
1813 if (strcmp("debug", arg) == 0)
1814 apic_verbosity = APIC_DEBUG;
1815 else if (strcmp("verbose", arg) == 0)
1816 apic_verbosity = APIC_VERBOSE;
1817 else {
1818 printk(KERN_WARNING "APIC Verbosity level %s not recognised"
1819 " use apic=verbose or apic=debug\n", arg);
1820 return -EINVAL;
1821 }
1822
1823 return 0;
1824}
1825early_param("apic", apic_set_verbosity);
1826
1373static int __init lapic_insert_resource(void) 1827static int __init lapic_insert_resource(void)
1374{ 1828{
1375 if (!apic_phys) 1829 if (!apic_phys)
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index ee76eaad3001..7f0b45a5d788 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -3,22 +3,30 @@
3# 3#
4 4
5obj-y := intel_cacheinfo.o addon_cpuid_features.o 5obj-y := intel_cacheinfo.o addon_cpuid_features.o
6obj-y += proc.o feature_names.o 6obj-y += proc.o capflags.o powerflags.o common.o
7 7
8obj-$(CONFIG_X86_32) += common.o bugs.o 8obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o
9obj-$(CONFIG_X86_64) += common_64.o bugs_64.o 9obj-$(CONFIG_X86_64) += bugs_64.o
10obj-$(CONFIG_X86_32) += amd.o 10
11obj-$(CONFIG_X86_64) += amd_64.o 11obj-$(CONFIG_CPU_SUP_INTEL) += intel.o
12obj-$(CONFIG_X86_32) += cyrix.o 12obj-$(CONFIG_CPU_SUP_AMD) += amd.o
13obj-$(CONFIG_X86_32) += centaur.o 13obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o
14obj-$(CONFIG_X86_64) += centaur_64.o 14obj-$(CONFIG_CPU_SUP_CENTAUR_32) += centaur.o
15obj-$(CONFIG_X86_32) += transmeta.o 15obj-$(CONFIG_CPU_SUP_CENTAUR_64) += centaur_64.o
16obj-$(CONFIG_X86_32) += intel.o 16obj-$(CONFIG_CPU_SUP_TRANSMETA_32) += transmeta.o
17obj-$(CONFIG_X86_64) += intel_64.o 17obj-$(CONFIG_CPU_SUP_UMC_32) += umc.o
18obj-$(CONFIG_X86_32) += umc.o
19 18
20obj-$(CONFIG_X86_MCE) += mcheck/ 19obj-$(CONFIG_X86_MCE) += mcheck/
21obj-$(CONFIG_MTRR) += mtrr/ 20obj-$(CONFIG_MTRR) += mtrr/
22obj-$(CONFIG_CPU_FREQ) += cpufreq/ 21obj-$(CONFIG_CPU_FREQ) += cpufreq/
23 22
24obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o 23obj-$(CONFIG_X86_LOCAL_APIC) += perfctr-watchdog.o
24
25quiet_cmd_mkcapflags = MKCAP $@
26 cmd_mkcapflags = $(PERL) $(srctree)/$(src)/mkcapflags.pl $< $@
27
28cpufeature = $(src)/../../../../include/asm-x86/cpufeature.h
29
30targets += capflags.c
31$(obj)/capflags.c: $(cpufeature) $(src)/mkcapflags.pl FORCE
32 $(call if_changed,mkcapflags)
diff --git a/arch/x86/kernel/cpu/addon_cpuid_features.c b/arch/x86/kernel/cpu/addon_cpuid_features.c
index a6ef672adbba..0d9c993aa93e 100644
--- a/arch/x86/kernel/cpu/addon_cpuid_features.c
+++ b/arch/x86/kernel/cpu/addon_cpuid_features.c
@@ -7,6 +7,8 @@
7#include <asm/pat.h> 7#include <asm/pat.h>
8#include <asm/processor.h> 8#include <asm/processor.h>
9 9
10#include <mach_apic.h>
11
10struct cpuid_bit { 12struct cpuid_bit {
11 u16 feature; 13 u16 feature;
12 u8 reg; 14 u8 reg;
@@ -48,6 +50,92 @@ void __cpuinit init_scattered_cpuid_features(struct cpuinfo_x86 *c)
48 } 50 }
49} 51}
50 52
53/* leaf 0xb SMT level */
54#define SMT_LEVEL 0
55
56/* leaf 0xb sub-leaf types */
57#define INVALID_TYPE 0
58#define SMT_TYPE 1
59#define CORE_TYPE 2
60
61#define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff)
62#define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f)
63#define LEVEL_MAX_SIBLINGS(ebx) ((ebx) & 0xffff)
64
65/*
66 * Check for extended topology enumeration cpuid leaf 0xb and if it
67 * exists, use it for populating initial_apicid and cpu topology
68 * detection.
69 */
70void __cpuinit detect_extended_topology(struct cpuinfo_x86 *c)
71{
72#ifdef CONFIG_SMP
73 unsigned int eax, ebx, ecx, edx, sub_index;
74 unsigned int ht_mask_width, core_plus_mask_width;
75 unsigned int core_select_mask, core_level_siblings;
76
77 if (c->cpuid_level < 0xb)
78 return;
79
80 cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
81
82 /*
83 * check if the cpuid leaf 0xb is actually implemented.
84 */
85 if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE))
86 return;
87
88 set_cpu_cap(c, X86_FEATURE_XTOPOLOGY);
89
90 /*
91 * initial apic id, which also represents 32-bit extended x2apic id.
92 */
93 c->initial_apicid = edx;
94
95 /*
96 * Populate HT related information from sub-leaf level 0.
97 */
98 core_level_siblings = smp_num_siblings = LEVEL_MAX_SIBLINGS(ebx);
99 core_plus_mask_width = ht_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
100
101 sub_index = 1;
102 do {
103 cpuid_count(0xb, sub_index, &eax, &ebx, &ecx, &edx);
104
105 /*
106 * Check for the Core type in the implemented sub leaves.
107 */
108 if (LEAFB_SUBTYPE(ecx) == CORE_TYPE) {
109 core_level_siblings = LEVEL_MAX_SIBLINGS(ebx);
110 core_plus_mask_width = BITS_SHIFT_NEXT_LEVEL(eax);
111 break;
112 }
113
114 sub_index++;
115 } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE);
116
117 core_select_mask = (~(-1 << core_plus_mask_width)) >> ht_mask_width;
118
119#ifdef CONFIG_X86_32
120 c->cpu_core_id = phys_pkg_id(c->initial_apicid, ht_mask_width)
121 & core_select_mask;
122 c->phys_proc_id = phys_pkg_id(c->initial_apicid, core_plus_mask_width);
123#else
124 c->cpu_core_id = phys_pkg_id(ht_mask_width) & core_select_mask;
125 c->phys_proc_id = phys_pkg_id(core_plus_mask_width);
126#endif
127 c->x86_max_cores = (core_level_siblings / smp_num_siblings);
128
129
130 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
131 c->phys_proc_id);
132 if (c->x86_max_cores > 1)
133 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
134 c->cpu_core_id);
135 return;
136#endif
137}
138
51#ifdef CONFIG_X86_PAT 139#ifdef CONFIG_X86_PAT
52void __cpuinit validate_pat_support(struct cpuinfo_x86 *c) 140void __cpuinit validate_pat_support(struct cpuinfo_x86 *c)
53{ 141{
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 18514ed26104..32e73520adf7 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -1,13 +1,22 @@
1#include <linux/init.h> 1#include <linux/init.h>
2#include <linux/bitops.h> 2#include <linux/bitops.h>
3#include <linux/mm.h> 3#include <linux/mm.h>
4
4#include <asm/io.h> 5#include <asm/io.h>
5#include <asm/processor.h> 6#include <asm/processor.h>
6#include <asm/apic.h> 7#include <asm/apic.h>
7 8
9#ifdef CONFIG_X86_64
10# include <asm/numa_64.h>
11# include <asm/mmconfig.h>
12# include <asm/cacheflush.h>
13#endif
14
8#include <mach_apic.h> 15#include <mach_apic.h>
16
9#include "cpu.h" 17#include "cpu.h"
10 18
19#ifdef CONFIG_X86_32
11/* 20/*
12 * B step AMD K6 before B 9730xxxx have hardware bugs that can cause 21 * B step AMD K6 before B 9730xxxx have hardware bugs that can cause
13 * misexecution of code under Linux. Owners of such processors should 22 * misexecution of code under Linux. Owners of such processors should
@@ -24,26 +33,273 @@
24extern void vide(void); 33extern void vide(void);
25__asm__(".align 4\nvide: ret"); 34__asm__(".align 4\nvide: ret");
26 35
27static void __cpuinit early_init_amd(struct cpuinfo_x86 *c) 36static void __cpuinit init_amd_k5(struct cpuinfo_x86 *c)
28{ 37{
29 if (cpuid_eax(0x80000000) >= 0x80000007) { 38/*
30 c->x86_power = cpuid_edx(0x80000007); 39 * General Systems BIOSen alias the cpu frequency registers
31 if (c->x86_power & (1<<8)) 40 * of the Elan at 0x000df000. Unfortuantly, one of the Linux
32 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 41 * drivers subsequently pokes it, and changes the CPU speed.
42 * Workaround : Remove the unneeded alias.
43 */
44#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */
45#define CBAR_ENB (0x80000000)
46#define CBAR_KEY (0X000000CB)
47 if (c->x86_model == 9 || c->x86_model == 10) {
48 if (inl (CBAR) & CBAR_ENB)
49 outl (0 | CBAR_KEY, CBAR);
33 } 50 }
34
35 /* Set MTRR capability flag if appropriate */
36 if (c->x86_model == 13 || c->x86_model == 9 ||
37 (c->x86_model == 8 && c->x86_mask >= 8))
38 set_cpu_cap(c, X86_FEATURE_K6_MTRR);
39} 51}
40 52
41static void __cpuinit init_amd(struct cpuinfo_x86 *c) 53
54static void __cpuinit init_amd_k6(struct cpuinfo_x86 *c)
42{ 55{
43 u32 l, h; 56 u32 l, h;
44 int mbytes = num_physpages >> (20-PAGE_SHIFT); 57 int mbytes = num_physpages >> (20-PAGE_SHIFT);
45 int r;
46 58
59 if (c->x86_model < 6) {
60 /* Based on AMD doc 20734R - June 2000 */
61 if (c->x86_model == 0) {
62 clear_cpu_cap(c, X86_FEATURE_APIC);
63 set_cpu_cap(c, X86_FEATURE_PGE);
64 }
65 return;
66 }
67
68 if (c->x86_model == 6 && c->x86_mask == 1) {
69 const int K6_BUG_LOOP = 1000000;
70 int n;
71 void (*f_vide)(void);
72 unsigned long d, d2;
73
74 printk(KERN_INFO "AMD K6 stepping B detected - ");
75
76 /*
77 * It looks like AMD fixed the 2.6.2 bug and improved indirect
78 * calls at the same time.
79 */
80
81 n = K6_BUG_LOOP;
82 f_vide = vide;
83 rdtscl(d);
84 while (n--)
85 f_vide();
86 rdtscl(d2);
87 d = d2-d;
88
89 if (d > 20*K6_BUG_LOOP)
90 printk("system stability may be impaired when more than 32 MB are used.\n");
91 else
92 printk("probably OK (after B9730xxxx).\n");
93 printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n");
94 }
95
96 /* K6 with old style WHCR */
97 if (c->x86_model < 8 ||
98 (c->x86_model == 8 && c->x86_mask < 8)) {
99 /* We can only write allocate on the low 508Mb */
100 if (mbytes > 508)
101 mbytes = 508;
102
103 rdmsr(MSR_K6_WHCR, l, h);
104 if ((l&0x0000FFFF) == 0) {
105 unsigned long flags;
106 l = (1<<0)|((mbytes/4)<<1);
107 local_irq_save(flags);
108 wbinvd();
109 wrmsr(MSR_K6_WHCR, l, h);
110 local_irq_restore(flags);
111 printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n",
112 mbytes);
113 }
114 return;
115 }
116
117 if ((c->x86_model == 8 && c->x86_mask > 7) ||
118 c->x86_model == 9 || c->x86_model == 13) {
119 /* The more serious chips .. */
120
121 if (mbytes > 4092)
122 mbytes = 4092;
123
124 rdmsr(MSR_K6_WHCR, l, h);
125 if ((l&0xFFFF0000) == 0) {
126 unsigned long flags;
127 l = ((mbytes>>2)<<22)|(1<<16);
128 local_irq_save(flags);
129 wbinvd();
130 wrmsr(MSR_K6_WHCR, l, h);
131 local_irq_restore(flags);
132 printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n",
133 mbytes);
134 }
135
136 return;
137 }
138
139 if (c->x86_model == 10) {
140 /* AMD Geode LX is model 10 */
141 /* placeholder for any needed mods */
142 return;
143 }
144}
145
146static void __cpuinit init_amd_k7(struct cpuinfo_x86 *c)
147{
148 u32 l, h;
149
150 /*
151 * Bit 15 of Athlon specific MSR 15, needs to be 0
152 * to enable SSE on Palomino/Morgan/Barton CPU's.
153 * If the BIOS didn't enable it already, enable it here.
154 */
155 if (c->x86_model >= 6 && c->x86_model <= 10) {
156 if (!cpu_has(c, X86_FEATURE_XMM)) {
157 printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
158 rdmsr(MSR_K7_HWCR, l, h);
159 l &= ~0x00008000;
160 wrmsr(MSR_K7_HWCR, l, h);
161 set_cpu_cap(c, X86_FEATURE_XMM);
162 }
163 }
164
165 /*
166 * It's been determined by AMD that Athlons since model 8 stepping 1
167 * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
168 * As per AMD technical note 27212 0.2
169 */
170 if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
171 rdmsr(MSR_K7_CLK_CTL, l, h);
172 if ((l & 0xfff00000) != 0x20000000) {
173 printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l,
174 ((l & 0x000fffff)|0x20000000));
175 wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
176 }
177 }
178
179 set_cpu_cap(c, X86_FEATURE_K7);
180}
181#endif
182
183#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
184static int __cpuinit nearby_node(int apicid)
185{
186 int i, node;
187
188 for (i = apicid - 1; i >= 0; i--) {
189 node = apicid_to_node[i];
190 if (node != NUMA_NO_NODE && node_online(node))
191 return node;
192 }
193 for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
194 node = apicid_to_node[i];
195 if (node != NUMA_NO_NODE && node_online(node))
196 return node;
197 }
198 return first_node(node_online_map); /* Shouldn't happen */
199}
200#endif
201
202/*
203 * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
204 * Assumes number of cores is a power of two.
205 */
206static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
207{
208#ifdef CONFIG_X86_HT
209 unsigned bits;
210
211 bits = c->x86_coreid_bits;
212
213 /* Low order bits define the core id (index of core in socket) */
214 c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
215 /* Convert the initial APIC ID into the socket ID */
216 c->phys_proc_id = c->initial_apicid >> bits;
217#endif
218}
219
220static void __cpuinit srat_detect_node(struct cpuinfo_x86 *c)
221{
222#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
223 int cpu = smp_processor_id();
224 int node;
225 unsigned apicid = hard_smp_processor_id();
226
227 node = c->phys_proc_id;
228 if (apicid_to_node[apicid] != NUMA_NO_NODE)
229 node = apicid_to_node[apicid];
230 if (!node_online(node)) {
231 /* Two possibilities here:
232 - The CPU is missing memory and no node was created.
233 In that case try picking one from a nearby CPU
234 - The APIC IDs differ from the HyperTransport node IDs
235 which the K8 northbridge parsing fills in.
236 Assume they are all increased by a constant offset,
237 but in the same order as the HT nodeids.
238 If that doesn't result in a usable node fall back to the
239 path for the previous case. */
240
241 int ht_nodeid = c->initial_apicid;
242
243 if (ht_nodeid >= 0 &&
244 apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
245 node = apicid_to_node[ht_nodeid];
246 /* Pick a nearby node */
247 if (!node_online(node))
248 node = nearby_node(apicid);
249 }
250 numa_set_node(cpu, node);
251
252 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
253#endif
254}
255
256static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
257{
258#ifdef CONFIG_X86_HT
259 unsigned bits, ecx;
260
261 /* Multi core CPU? */
262 if (c->extended_cpuid_level < 0x80000008)
263 return;
264
265 ecx = cpuid_ecx(0x80000008);
266
267 c->x86_max_cores = (ecx & 0xff) + 1;
268
269 /* CPU telling us the core id bits shift? */
270 bits = (ecx >> 12) & 0xF;
271
272 /* Otherwise recompute */
273 if (bits == 0) {
274 while ((1 << bits) < c->x86_max_cores)
275 bits++;
276 }
277
278 c->x86_coreid_bits = bits;
279#endif
280}
281
282static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
283{
284 early_init_amd_mc(c);
285
286 /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
287 if (c->x86_power & (1<<8))
288 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
289
290#ifdef CONFIG_X86_64
291 set_cpu_cap(c, X86_FEATURE_SYSCALL32);
292#else
293 /* Set MTRR capability flag if appropriate */
294 if (c->x86 == 5)
295 if (c->x86_model == 13 || c->x86_model == 9 ||
296 (c->x86_model == 8 && c->x86_mask >= 8))
297 set_cpu_cap(c, X86_FEATURE_K6_MTRR);
298#endif
299}
300
301static void __cpuinit init_amd(struct cpuinfo_x86 *c)
302{
47#ifdef CONFIG_SMP 303#ifdef CONFIG_SMP
48 unsigned long long value; 304 unsigned long long value;
49 305
@@ -54,7 +310,7 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
54 * Errata 63 for SH-B3 steppings 310 * Errata 63 for SH-B3 steppings
55 * Errata 122 for all steppings (F+ have it disabled by default) 311 * Errata 122 for all steppings (F+ have it disabled by default)
56 */ 312 */
57 if (c->x86 == 15) { 313 if (c->x86 == 0xf) {
58 rdmsrl(MSR_K7_HWCR, value); 314 rdmsrl(MSR_K7_HWCR, value);
59 value |= 1 << 6; 315 value |= 1 << 6;
60 wrmsrl(MSR_K7_HWCR, value); 316 wrmsrl(MSR_K7_HWCR, value);
@@ -64,209 +320,119 @@ static void __cpuinit init_amd(struct cpuinfo_x86 *c)
64 early_init_amd(c); 320 early_init_amd(c);
65 321
66 /* 322 /*
67 * FIXME: We should handle the K5 here. Set up the write
68 * range and also turn on MSR 83 bits 4 and 31 (write alloc,
69 * no bus pipeline)
70 */
71
72 /*
73 * Bit 31 in normal CPUID used for nonstandard 3DNow ID; 323 * Bit 31 in normal CPUID used for nonstandard 3DNow ID;
74 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway 324 * 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway
75 */ 325 */
76 clear_cpu_cap(c, 0*32+31); 326 clear_cpu_cap(c, 0*32+31);
77 327
78 r = get_model_name(c); 328#ifdef CONFIG_X86_64
329 /* On C+ stepping K8 rep microcode works well for copy/memset */
330 if (c->x86 == 0xf) {
331 u32 level;
79 332
80 switch (c->x86) { 333 level = cpuid_eax(1);
81 case 4: 334 if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
82 /* 335 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
83 * General Systems BIOSen alias the cpu frequency registers
84 * of the Elan at 0x000df000. Unfortuantly, one of the Linux
85 * drivers subsequently pokes it, and changes the CPU speed.
86 * Workaround : Remove the unneeded alias.
87 */
88#define CBAR (0xfffc) /* Configuration Base Address (32-bit) */
89#define CBAR_ENB (0x80000000)
90#define CBAR_KEY (0X000000CB)
91 if (c->x86_model == 9 || c->x86_model == 10) {
92 if (inl (CBAR) & CBAR_ENB)
93 outl (0 | CBAR_KEY, CBAR);
94 }
95 break;
96 case 5:
97 if (c->x86_model < 6) {
98 /* Based on AMD doc 20734R - June 2000 */
99 if (c->x86_model == 0) {
100 clear_cpu_cap(c, X86_FEATURE_APIC);
101 set_cpu_cap(c, X86_FEATURE_PGE);
102 }
103 break;
104 }
105
106 if (c->x86_model == 6 && c->x86_mask == 1) {
107 const int K6_BUG_LOOP = 1000000;
108 int n;
109 void (*f_vide)(void);
110 unsigned long d, d2;
111
112 printk(KERN_INFO "AMD K6 stepping B detected - ");
113
114 /*
115 * It looks like AMD fixed the 2.6.2 bug and improved indirect
116 * calls at the same time.
117 */
118
119 n = K6_BUG_LOOP;
120 f_vide = vide;
121 rdtscl(d);
122 while (n--)
123 f_vide();
124 rdtscl(d2);
125 d = d2-d;
126
127 if (d > 20*K6_BUG_LOOP)
128 printk("system stability may be impaired when more than 32 MB are used.\n");
129 else
130 printk("probably OK (after B9730xxxx).\n");
131 printk(KERN_INFO "Please see http://membres.lycos.fr/poulot/k6bug.html\n");
132 }
133
134 /* K6 with old style WHCR */
135 if (c->x86_model < 8 ||
136 (c->x86_model == 8 && c->x86_mask < 8)) {
137 /* We can only write allocate on the low 508Mb */
138 if (mbytes > 508)
139 mbytes = 508;
140
141 rdmsr(MSR_K6_WHCR, l, h);
142 if ((l&0x0000FFFF) == 0) {
143 unsigned long flags;
144 l = (1<<0)|((mbytes/4)<<1);
145 local_irq_save(flags);
146 wbinvd();
147 wrmsr(MSR_K6_WHCR, l, h);
148 local_irq_restore(flags);
149 printk(KERN_INFO "Enabling old style K6 write allocation for %d Mb\n",
150 mbytes);
151 }
152 break;
153 }
154
155 if ((c->x86_model == 8 && c->x86_mask > 7) ||
156 c->x86_model == 9 || c->x86_model == 13) {
157 /* The more serious chips .. */
158
159 if (mbytes > 4092)
160 mbytes = 4092;
161
162 rdmsr(MSR_K6_WHCR, l, h);
163 if ((l&0xFFFF0000) == 0) {
164 unsigned long flags;
165 l = ((mbytes>>2)<<22)|(1<<16);
166 local_irq_save(flags);
167 wbinvd();
168 wrmsr(MSR_K6_WHCR, l, h);
169 local_irq_restore(flags);
170 printk(KERN_INFO "Enabling new style K6 write allocation for %d Mb\n",
171 mbytes);
172 }
173
174 break;
175 }
176
177 if (c->x86_model == 10) {
178 /* AMD Geode LX is model 10 */
179 /* placeholder for any needed mods */
180 break;
181 }
182 break;
183 case 6: /* An Athlon/Duron */
184
185 /*
186 * Bit 15 of Athlon specific MSR 15, needs to be 0
187 * to enable SSE on Palomino/Morgan/Barton CPU's.
188 * If the BIOS didn't enable it already, enable it here.
189 */
190 if (c->x86_model >= 6 && c->x86_model <= 10) {
191 if (!cpu_has(c, X86_FEATURE_XMM)) {
192 printk(KERN_INFO "Enabling disabled K7/SSE Support.\n");
193 rdmsr(MSR_K7_HWCR, l, h);
194 l &= ~0x00008000;
195 wrmsr(MSR_K7_HWCR, l, h);
196 set_cpu_cap(c, X86_FEATURE_XMM);
197 }
198 }
199
200 /*
201 * It's been determined by AMD that Athlons since model 8 stepping 1
202 * are more robust with CLK_CTL set to 200xxxxx instead of 600xxxxx
203 * As per AMD technical note 27212 0.2
204 */
205 if ((c->x86_model == 8 && c->x86_mask >= 1) || (c->x86_model > 8)) {
206 rdmsr(MSR_K7_CLK_CTL, l, h);
207 if ((l & 0xfff00000) != 0x20000000) {
208 printk ("CPU: CLK_CTL MSR was %x. Reprogramming to %x\n", l,
209 ((l & 0x000fffff)|0x20000000));
210 wrmsr(MSR_K7_CLK_CTL, (l & 0x000fffff)|0x20000000, h);
211 }
212 }
213 break;
214 } 336 }
337 if (c->x86 == 0x10 || c->x86 == 0x11)
338 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
339#else
340
341 /*
342 * FIXME: We should handle the K5 here. Set up the write
343 * range and also turn on MSR 83 bits 4 and 31 (write alloc,
344 * no bus pipeline)
345 */
215 346
216 switch (c->x86) { 347 switch (c->x86) {
217 case 15: 348 case 4:
218 /* Use K8 tuning for Fam10h and Fam11h */ 349 init_amd_k5(c);
219 case 0x10:
220 case 0x11:
221 set_cpu_cap(c, X86_FEATURE_K8);
222 break; 350 break;
223 case 6: 351 case 5:
224 set_cpu_cap(c, X86_FEATURE_K7); 352 init_amd_k6(c);
353 break;
354 case 6: /* An Athlon/Duron */
355 init_amd_k7(c);
225 break; 356 break;
226 } 357 }
358
359 /* K6s reports MCEs but don't actually have all the MSRs */
360 if (c->x86 < 6)
361 clear_cpu_cap(c, X86_FEATURE_MCE);
362#endif
363
364 /* Enable workaround for FXSAVE leak */
227 if (c->x86 >= 6) 365 if (c->x86 >= 6)
228 set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK); 366 set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
229 367
230 display_cacheinfo(c); 368 if (!c->x86_model_id[0]) {
231 369 switch (c->x86) {
232 if (cpuid_eax(0x80000000) >= 0x80000008) 370 case 0xf:
233 c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; 371 /* Should distinguish Models here, but this is only
372 a fallback anyways. */
373 strcpy(c->x86_model_id, "Hammer");
374 break;
375 }
376 }
234 377
235#ifdef CONFIG_X86_HT 378 display_cacheinfo(c);
236 /*
237 * On a AMD multi core setup the lower bits of the APIC id
238 * distinguish the cores.
239 */
240 if (c->x86_max_cores > 1) {
241 int cpu = smp_processor_id();
242 unsigned bits = (cpuid_ecx(0x80000008) >> 12) & 0xf;
243 379
244 if (bits == 0) { 380 /* Multi core CPU? */
245 while ((1 << bits) < c->x86_max_cores) 381 if (c->extended_cpuid_level >= 0x80000008) {
246 bits++; 382 amd_detect_cmp(c);
247 } 383 srat_detect_node(c);
248 c->cpu_core_id = c->phys_proc_id & ((1<<bits)-1);
249 c->phys_proc_id >>= bits;
250 printk(KERN_INFO "CPU %d(%d) -> Core %d\n",
251 cpu, c->x86_max_cores, c->cpu_core_id);
252 } 384 }
385
386#ifdef CONFIG_X86_32
387 detect_ht(c);
253#endif 388#endif
254 389
255 if (cpuid_eax(0x80000000) >= 0x80000006) { 390 if (c->extended_cpuid_level >= 0x80000006) {
256 if ((c->x86 == 0x10) && (cpuid_edx(0x80000006) & 0xf000)) 391 if ((c->x86 >= 0x0f) && (cpuid_edx(0x80000006) & 0xf000))
257 num_cache_leaves = 4; 392 num_cache_leaves = 4;
258 else 393 else
259 num_cache_leaves = 3; 394 num_cache_leaves = 3;
260 } 395 }
261 396
262 /* K6s reports MCEs but don't actually have all the MSRs */ 397 if (c->x86 >= 0xf && c->x86 <= 0x11)
263 if (c->x86 < 6) 398 set_cpu_cap(c, X86_FEATURE_K8);
264 clear_cpu_cap(c, X86_FEATURE_MCE);
265 399
266 if (cpu_has_xmm2) 400 if (cpu_has_xmm2) {
401 /* MFENCE stops RDTSC speculation */
267 set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC); 402 set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
403 }
404
405#ifdef CONFIG_X86_64
406 if (c->x86 == 0x10) {
407 /* do this for boot cpu */
408 if (c == &boot_cpu_data)
409 check_enable_amd_mmconf_dmi();
410
411 fam10h_check_enable_mmcfg();
412 }
413
414 if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
415 unsigned long long tseg;
416
417 /*
418 * Split up direct mapping around the TSEG SMM area.
419 * Don't do it for gbpages because there seems very little
420 * benefit in doing so.
421 */
422 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
423 printk(KERN_DEBUG "tseg: %010llx\n", tseg);
424 if ((tseg>>PMD_SHIFT) <
425 (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
426 ((tseg>>PMD_SHIFT) <
427 (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
428 (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
429 set_memory_4k((unsigned long)__va(tseg), 1);
430 }
431 }
432#endif
268} 433}
269 434
435#ifdef CONFIG_X86_32
270static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size) 436static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int size)
271{ 437{
272 /* AMD errata T13 (order #21922) */ 438 /* AMD errata T13 (order #21922) */
@@ -279,10 +445,12 @@ static unsigned int __cpuinit amd_size_cache(struct cpuinfo_x86 *c, unsigned int
279 } 445 }
280 return size; 446 return size;
281} 447}
448#endif
282 449
283static struct cpu_dev amd_cpu_dev __cpuinitdata = { 450static struct cpu_dev amd_cpu_dev __cpuinitdata = {
284 .c_vendor = "AMD", 451 .c_vendor = "AMD",
285 .c_ident = { "AuthenticAMD" }, 452 .c_ident = { "AuthenticAMD" },
453#ifdef CONFIG_X86_32
286 .c_models = { 454 .c_models = {
287 { .vendor = X86_VENDOR_AMD, .family = 4, .model_names = 455 { .vendor = X86_VENDOR_AMD, .family = 4, .model_names =
288 { 456 {
@@ -295,9 +463,11 @@ static struct cpu_dev amd_cpu_dev __cpuinitdata = {
295 } 463 }
296 }, 464 },
297 }, 465 },
466 .c_size_cache = amd_size_cache,
467#endif
298 .c_early_init = early_init_amd, 468 .c_early_init = early_init_amd,
299 .c_init = init_amd, 469 .c_init = init_amd,
300 .c_size_cache = amd_size_cache, 470 .c_x86_vendor = X86_VENDOR_AMD,
301}; 471};
302 472
303cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev); 473cpu_dev_register(amd_cpu_dev);
diff --git a/arch/x86/kernel/cpu/amd_64.c b/arch/x86/kernel/cpu/amd_64.c
deleted file mode 100644
index d1692b2a41ff..000000000000
--- a/arch/x86/kernel/cpu/amd_64.c
+++ /dev/null
@@ -1,224 +0,0 @@
1#include <linux/init.h>
2#include <linux/mm.h>
3
4#include <asm/numa_64.h>
5#include <asm/mmconfig.h>
6#include <asm/cacheflush.h>
7
8#include <mach_apic.h>
9
10#include "cpu.h"
11
12int force_mwait __cpuinitdata;
13
14#ifdef CONFIG_NUMA
15static int __cpuinit nearby_node(int apicid)
16{
17 int i, node;
18
19 for (i = apicid - 1; i >= 0; i--) {
20 node = apicid_to_node[i];
21 if (node != NUMA_NO_NODE && node_online(node))
22 return node;
23 }
24 for (i = apicid + 1; i < MAX_LOCAL_APIC; i++) {
25 node = apicid_to_node[i];
26 if (node != NUMA_NO_NODE && node_online(node))
27 return node;
28 }
29 return first_node(node_online_map); /* Shouldn't happen */
30}
31#endif
32
33/*
34 * On a AMD dual core setup the lower bits of the APIC id distingush the cores.
35 * Assumes number of cores is a power of two.
36 */
37static void __cpuinit amd_detect_cmp(struct cpuinfo_x86 *c)
38{
39#ifdef CONFIG_SMP
40 unsigned bits;
41#ifdef CONFIG_NUMA
42 int cpu = smp_processor_id();
43 int node = 0;
44 unsigned apicid = hard_smp_processor_id();
45#endif
46 bits = c->x86_coreid_bits;
47
48 /* Low order bits define the core id (index of core in socket) */
49 c->cpu_core_id = c->initial_apicid & ((1 << bits)-1);
50 /* Convert the initial APIC ID into the socket ID */
51 c->phys_proc_id = c->initial_apicid >> bits;
52
53#ifdef CONFIG_NUMA
54 node = c->phys_proc_id;
55 if (apicid_to_node[apicid] != NUMA_NO_NODE)
56 node = apicid_to_node[apicid];
57 if (!node_online(node)) {
58 /* Two possibilities here:
59 - The CPU is missing memory and no node was created.
60 In that case try picking one from a nearby CPU
61 - The APIC IDs differ from the HyperTransport node IDs
62 which the K8 northbridge parsing fills in.
63 Assume they are all increased by a constant offset,
64 but in the same order as the HT nodeids.
65 If that doesn't result in a usable node fall back to the
66 path for the previous case. */
67
68 int ht_nodeid = c->initial_apicid;
69
70 if (ht_nodeid >= 0 &&
71 apicid_to_node[ht_nodeid] != NUMA_NO_NODE)
72 node = apicid_to_node[ht_nodeid];
73 /* Pick a nearby node */
74 if (!node_online(node))
75 node = nearby_node(apicid);
76 }
77 numa_set_node(cpu, node);
78
79 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
80#endif
81#endif
82}
83
84static void __cpuinit early_init_amd_mc(struct cpuinfo_x86 *c)
85{
86#ifdef CONFIG_SMP
87 unsigned bits, ecx;
88
89 /* Multi core CPU? */
90 if (c->extended_cpuid_level < 0x80000008)
91 return;
92
93 ecx = cpuid_ecx(0x80000008);
94
95 c->x86_max_cores = (ecx & 0xff) + 1;
96
97 /* CPU telling us the core id bits shift? */
98 bits = (ecx >> 12) & 0xF;
99
100 /* Otherwise recompute */
101 if (bits == 0) {
102 while ((1 << bits) < c->x86_max_cores)
103 bits++;
104 }
105
106 c->x86_coreid_bits = bits;
107
108#endif
109}
110
111static void __cpuinit early_init_amd(struct cpuinfo_x86 *c)
112{
113 early_init_amd_mc(c);
114
115 /* c->x86_power is 8000_0007 edx. Bit 8 is constant TSC */
116 if (c->x86_power & (1<<8))
117 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
118
119 set_cpu_cap(c, X86_FEATURE_SYSCALL32);
120}
121
122static void __cpuinit init_amd(struct cpuinfo_x86 *c)
123{
124 unsigned level;
125
126#ifdef CONFIG_SMP
127 unsigned long value;
128
129 /*
130 * Disable TLB flush filter by setting HWCR.FFDIS on K8
131 * bit 6 of msr C001_0015
132 *
133 * Errata 63 for SH-B3 steppings
134 * Errata 122 for all steppings (F+ have it disabled by default)
135 */
136 if (c->x86 == 0xf) {
137 rdmsrl(MSR_K8_HWCR, value);
138 value |= 1 << 6;
139 wrmsrl(MSR_K8_HWCR, value);
140 }
141#endif
142
143 /* Bit 31 in normal CPUID used for nonstandard 3DNow ID;
144 3DNow is IDd by bit 31 in extended CPUID (1*32+31) anyway */
145 clear_cpu_cap(c, 0*32+31);
146
147 /* On C+ stepping K8 rep microcode works well for copy/memset */
148 if (c->x86 == 0xf) {
149 level = cpuid_eax(1);
150 if((level >= 0x0f48 && level < 0x0f50) || level >= 0x0f58)
151 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
152 }
153 if (c->x86 == 0x10 || c->x86 == 0x11)
154 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
155
156 /* Enable workaround for FXSAVE leak */
157 if (c->x86 >= 6)
158 set_cpu_cap(c, X86_FEATURE_FXSAVE_LEAK);
159
160 level = get_model_name(c);
161 if (!level) {
162 switch (c->x86) {
163 case 0xf:
164 /* Should distinguish Models here, but this is only
165 a fallback anyways. */
166 strcpy(c->x86_model_id, "Hammer");
167 break;
168 }
169 }
170 display_cacheinfo(c);
171
172 /* Multi core CPU? */
173 if (c->extended_cpuid_level >= 0x80000008)
174 amd_detect_cmp(c);
175
176 if (c->extended_cpuid_level >= 0x80000006 &&
177 (cpuid_edx(0x80000006) & 0xf000))
178 num_cache_leaves = 4;
179 else
180 num_cache_leaves = 3;
181
182 if (c->x86 >= 0xf && c->x86 <= 0x11)
183 set_cpu_cap(c, X86_FEATURE_K8);
184
185 /* MFENCE stops RDTSC speculation */
186 set_cpu_cap(c, X86_FEATURE_MFENCE_RDTSC);
187
188 if (c->x86 == 0x10) {
189 /* do this for boot cpu */
190 if (c == &boot_cpu_data)
191 check_enable_amd_mmconf_dmi();
192
193 fam10h_check_enable_mmcfg();
194 }
195
196 if (c == &boot_cpu_data && c->x86 >= 0xf && c->x86 <= 0x11) {
197 unsigned long long tseg;
198
199 /*
200 * Split up direct mapping around the TSEG SMM area.
201 * Don't do it for gbpages because there seems very little
202 * benefit in doing so.
203 */
204 if (!rdmsrl_safe(MSR_K8_TSEG_ADDR, &tseg)) {
205 printk(KERN_DEBUG "tseg: %010llx\n", tseg);
206 if ((tseg>>PMD_SHIFT) <
207 (max_low_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) ||
208 ((tseg>>PMD_SHIFT) <
209 (max_pfn_mapped>>(PMD_SHIFT-PAGE_SHIFT)) &&
210 (tseg>>PMD_SHIFT) >= (1ULL<<(32 - PMD_SHIFT))))
211 set_memory_4k((unsigned long)__va(tseg), 1);
212 }
213 }
214}
215
216static struct cpu_dev amd_cpu_dev __cpuinitdata = {
217 .c_vendor = "AMD",
218 .c_ident = { "AuthenticAMD" },
219 .c_early_init = early_init_amd,
220 .c_init = init_amd,
221};
222
223cpu_vendor_dev_register(X86_VENDOR_AMD, &amd_cpu_dev);
224
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index a0534c04d38a..89bfdd9cacc6 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -289,7 +289,6 @@ static void __cpuinit init_c3(struct cpuinfo_x86 *c)
289 if (c->x86_model >= 6 && c->x86_model < 9) 289 if (c->x86_model >= 6 && c->x86_model < 9)
290 set_cpu_cap(c, X86_FEATURE_3DNOW); 290 set_cpu_cap(c, X86_FEATURE_3DNOW);
291 291
292 get_model_name(c);
293 display_cacheinfo(c); 292 display_cacheinfo(c);
294} 293}
295 294
@@ -475,6 +474,7 @@ static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
475 .c_early_init = early_init_centaur, 474 .c_early_init = early_init_centaur,
476 .c_init = init_centaur, 475 .c_init = init_centaur,
477 .c_size_cache = centaur_size_cache, 476 .c_size_cache = centaur_size_cache,
477 .c_x86_vendor = X86_VENDOR_CENTAUR,
478}; 478};
479 479
480cpu_vendor_dev_register(X86_VENDOR_CENTAUR, &centaur_cpu_dev); 480cpu_dev_register(centaur_cpu_dev);
diff --git a/arch/x86/kernel/cpu/centaur_64.c b/arch/x86/kernel/cpu/centaur_64.c
index 1d181c40e2e1..a1625f5a1e78 100644
--- a/arch/x86/kernel/cpu/centaur_64.c
+++ b/arch/x86/kernel/cpu/centaur_64.c
@@ -16,9 +16,10 @@ static void __cpuinit early_init_centaur(struct cpuinfo_x86 *c)
16 16
17static void __cpuinit init_centaur(struct cpuinfo_x86 *c) 17static void __cpuinit init_centaur(struct cpuinfo_x86 *c)
18{ 18{
19 early_init_centaur(c);
20
19 if (c->x86 == 0x6 && c->x86_model >= 0xf) { 21 if (c->x86 == 0x6 && c->x86_model >= 0xf) {
20 c->x86_cache_alignment = c->x86_clflush_size * 2; 22 c->x86_cache_alignment = c->x86_clflush_size * 2;
21 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
22 set_cpu_cap(c, X86_FEATURE_REP_GOOD); 23 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
23 } 24 }
24 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC); 25 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
@@ -29,7 +30,8 @@ static struct cpu_dev centaur_cpu_dev __cpuinitdata = {
29 .c_ident = { "CentaurHauls" }, 30 .c_ident = { "CentaurHauls" },
30 .c_early_init = early_init_centaur, 31 .c_early_init = early_init_centaur,
31 .c_init = init_centaur, 32 .c_init = init_centaur,
33 .c_x86_vendor = X86_VENDOR_CENTAUR,
32}; 34};
33 35
34cpu_vendor_dev_register(X86_VENDOR_CENTAUR, &centaur_cpu_dev); 36cpu_dev_register(centaur_cpu_dev);
35 37
diff --git a/arch/x86/kernel/cpu/cmpxchg.c b/arch/x86/kernel/cpu/cmpxchg.c
new file mode 100644
index 000000000000..2056ccf572cc
--- /dev/null
+++ b/arch/x86/kernel/cpu/cmpxchg.c
@@ -0,0 +1,72 @@
1/*
2 * cmpxchg*() fallbacks for CPU not supporting these instructions
3 */
4
5#include <linux/kernel.h>
6#include <linux/smp.h>
7#include <linux/module.h>
8
9#ifndef CONFIG_X86_CMPXCHG
10unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new)
11{
12 u8 prev;
13 unsigned long flags;
14
15 /* Poor man's cmpxchg for 386. Unsuitable for SMP */
16 local_irq_save(flags);
17 prev = *(u8 *)ptr;
18 if (prev == old)
19 *(u8 *)ptr = new;
20 local_irq_restore(flags);
21 return prev;
22}
23EXPORT_SYMBOL(cmpxchg_386_u8);
24
25unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new)
26{
27 u16 prev;
28 unsigned long flags;
29
30 /* Poor man's cmpxchg for 386. Unsuitable for SMP */
31 local_irq_save(flags);
32 prev = *(u16 *)ptr;
33 if (prev == old)
34 *(u16 *)ptr = new;
35 local_irq_restore(flags);
36 return prev;
37}
38EXPORT_SYMBOL(cmpxchg_386_u16);
39
40unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new)
41{
42 u32 prev;
43 unsigned long flags;
44
45 /* Poor man's cmpxchg for 386. Unsuitable for SMP */
46 local_irq_save(flags);
47 prev = *(u32 *)ptr;
48 if (prev == old)
49 *(u32 *)ptr = new;
50 local_irq_restore(flags);
51 return prev;
52}
53EXPORT_SYMBOL(cmpxchg_386_u32);
54#endif
55
56#ifndef CONFIG_X86_CMPXCHG64
57unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new)
58{
59 u64 prev;
60 unsigned long flags;
61
62 /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */
63 local_irq_save(flags);
64 prev = *(u64 *)ptr;
65 if (prev == old)
66 *(u64 *)ptr = new;
67 local_irq_restore(flags);
68 return prev;
69}
70EXPORT_SYMBOL(cmpxchg_486_u64);
71#endif
72
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 4e456bd955bb..7581b62df184 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -1,28 +1,62 @@
1#include <linux/init.h> 1#include <linux/init.h>
2#include <linux/kernel.h>
3#include <linux/sched.h>
2#include <linux/string.h> 4#include <linux/string.h>
5#include <linux/bootmem.h>
6#include <linux/bitops.h>
7#include <linux/module.h>
8#include <linux/kgdb.h>
9#include <linux/topology.h>
3#include <linux/delay.h> 10#include <linux/delay.h>
4#include <linux/smp.h> 11#include <linux/smp.h>
5#include <linux/module.h>
6#include <linux/percpu.h> 12#include <linux/percpu.h>
7#include <linux/bootmem.h>
8#include <asm/processor.h>
9#include <asm/i387.h> 13#include <asm/i387.h>
10#include <asm/msr.h> 14#include <asm/msr.h>
11#include <asm/io.h> 15#include <asm/io.h>
16#include <asm/linkage.h>
12#include <asm/mmu_context.h> 17#include <asm/mmu_context.h>
13#include <asm/mtrr.h> 18#include <asm/mtrr.h>
14#include <asm/mce.h> 19#include <asm/mce.h>
15#include <asm/pat.h> 20#include <asm/pat.h>
16#include <asm/asm.h> 21#include <asm/asm.h>
22#include <asm/numa.h>
17#ifdef CONFIG_X86_LOCAL_APIC 23#ifdef CONFIG_X86_LOCAL_APIC
18#include <asm/mpspec.h> 24#include <asm/mpspec.h>
19#include <asm/apic.h> 25#include <asm/apic.h>
20#include <mach_apic.h> 26#include <mach_apic.h>
27#include <asm/genapic.h>
21#endif 28#endif
22 29
30#include <asm/pda.h>
31#include <asm/pgtable.h>
32#include <asm/processor.h>
33#include <asm/desc.h>
34#include <asm/atomic.h>
35#include <asm/proto.h>
36#include <asm/sections.h>
37#include <asm/setup.h>
38
23#include "cpu.h" 39#include "cpu.h"
24 40
41static struct cpu_dev *this_cpu __cpuinitdata;
42
43#ifdef CONFIG_X86_64
44/* We need valid kernel segments for data and code in long mode too
45 * IRET will check the segment types kkeil 2000/10/28
46 * Also sysret mandates a special GDT layout
47 */
48/* The TLS descriptors are currently at a different place compared to i386.
49 Hopefully nobody expects them at a fixed place (Wine?) */
25DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = { 50DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
51 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
52 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
53 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
54 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
55 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
56 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
57} };
58#else
59DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
26 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } }, 60 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00cf9a00 } } },
27 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } }, 61 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9200 } } },
28 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } }, 62 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00cffa00 } } },
@@ -56,17 +90,150 @@ DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
56 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } }, 90 [GDT_ENTRY_ESPFIX_SS] = { { { 0x00000000, 0x00c09200 } } },
57 [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } }, 91 [GDT_ENTRY_PERCPU] = { { { 0x00000000, 0x00000000 } } },
58} }; 92} };
93#endif
59EXPORT_PER_CPU_SYMBOL_GPL(gdt_page); 94EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
60 95
61__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata; 96#ifdef CONFIG_X86_32
62
63static int cachesize_override __cpuinitdata = -1; 97static int cachesize_override __cpuinitdata = -1;
64static int disable_x86_serial_nr __cpuinitdata = 1; 98static int disable_x86_serial_nr __cpuinitdata = 1;
65 99
66struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {}; 100static int __init cachesize_setup(char *str)
101{
102 get_option(&str, &cachesize_override);
103 return 1;
104}
105__setup("cachesize=", cachesize_setup);
106
107static int __init x86_fxsr_setup(char *s)
108{
109 setup_clear_cpu_cap(X86_FEATURE_FXSR);
110 setup_clear_cpu_cap(X86_FEATURE_XMM);
111 return 1;
112}
113__setup("nofxsr", x86_fxsr_setup);
114
115static int __init x86_sep_setup(char *s)
116{
117 setup_clear_cpu_cap(X86_FEATURE_SEP);
118 return 1;
119}
120__setup("nosep", x86_sep_setup);
121
122/* Standard macro to see if a specific flag is changeable */
123static inline int flag_is_changeable_p(u32 flag)
124{
125 u32 f1, f2;
126
127 asm("pushfl\n\t"
128 "pushfl\n\t"
129 "popl %0\n\t"
130 "movl %0,%1\n\t"
131 "xorl %2,%0\n\t"
132 "pushl %0\n\t"
133 "popfl\n\t"
134 "pushfl\n\t"
135 "popl %0\n\t"
136 "popfl\n\t"
137 : "=&r" (f1), "=&r" (f2)
138 : "ir" (flag));
139
140 return ((f1^f2) & flag) != 0;
141}
142
143/* Probe for the CPUID instruction */
144static int __cpuinit have_cpuid_p(void)
145{
146 return flag_is_changeable_p(X86_EFLAGS_ID);
147}
148
149static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
150{
151 if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) {
152 /* Disable processor serial number */
153 unsigned long lo, hi;
154 rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
155 lo |= 0x200000;
156 wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
157 printk(KERN_NOTICE "CPU serial number disabled.\n");
158 clear_cpu_cap(c, X86_FEATURE_PN);
159
160 /* Disabling the serial number may affect the cpuid level */
161 c->cpuid_level = cpuid_eax(0);
162 }
163}
164
165static int __init x86_serial_nr_setup(char *s)
166{
167 disable_x86_serial_nr = 0;
168 return 1;
169}
170__setup("serialnumber", x86_serial_nr_setup);
171#else
172static inline int flag_is_changeable_p(u32 flag)
173{
174 return 1;
175}
176/* Probe for the CPUID instruction */
177static inline int have_cpuid_p(void)
178{
179 return 1;
180}
181static inline void squash_the_stupid_serial_number(struct cpuinfo_x86 *c)
182{
183}
184#endif
185
186/*
187 * Naming convention should be: <Name> [(<Codename>)]
188 * This table only is used unless init_<vendor>() below doesn't set it;
189 * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
190 *
191 */
192
193/* Look up CPU names by table lookup. */
194static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
195{
196 struct cpu_model_info *info;
197
198 if (c->x86_model >= 16)
199 return NULL; /* Range check */
200
201 if (!this_cpu)
202 return NULL;
203
204 info = this_cpu->c_models;
205
206 while (info && info->family) {
207 if (info->family == c->x86)
208 return info->model_names[c->x86_model];
209 info++;
210 }
211 return NULL; /* Not found */
212}
213
214__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
215
216/* Current gdt points %fs at the "master" per-cpu area: after this,
217 * it's on the real one. */
218void switch_to_new_gdt(void)
219{
220 struct desc_ptr gdt_descr;
221
222 gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
223 gdt_descr.size = GDT_SIZE - 1;
224 load_gdt(&gdt_descr);
225#ifdef CONFIG_X86_32
226 asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
227#endif
228}
229
230static struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
67 231
68static void __cpuinit default_init(struct cpuinfo_x86 *c) 232static void __cpuinit default_init(struct cpuinfo_x86 *c)
69{ 233{
234#ifdef CONFIG_X86_64
235 display_cacheinfo(c);
236#else
70 /* Not much we can do here... */ 237 /* Not much we can do here... */
71 /* Check if at least it has cpuid */ 238 /* Check if at least it has cpuid */
72 if (c->cpuid_level == -1) { 239 if (c->cpuid_level == -1) {
@@ -76,28 +243,22 @@ static void __cpuinit default_init(struct cpuinfo_x86 *c)
76 else if (c->x86 == 3) 243 else if (c->x86 == 3)
77 strcpy(c->x86_model_id, "386"); 244 strcpy(c->x86_model_id, "386");
78 } 245 }
246#endif
79} 247}
80 248
81static struct cpu_dev __cpuinitdata default_cpu = { 249static struct cpu_dev __cpuinitdata default_cpu = {
82 .c_init = default_init, 250 .c_init = default_init,
83 .c_vendor = "Unknown", 251 .c_vendor = "Unknown",
252 .c_x86_vendor = X86_VENDOR_UNKNOWN,
84}; 253};
85static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
86 254
87static int __init cachesize_setup(char *str) 255static void __cpuinit get_model_name(struct cpuinfo_x86 *c)
88{
89 get_option(&str, &cachesize_override);
90 return 1;
91}
92__setup("cachesize=", cachesize_setup);
93
94int __cpuinit get_model_name(struct cpuinfo_x86 *c)
95{ 256{
96 unsigned int *v; 257 unsigned int *v;
97 char *p, *q; 258 char *p, *q;
98 259
99 if (cpuid_eax(0x80000000) < 0x80000004) 260 if (c->extended_cpuid_level < 0x80000004)
100 return 0; 261 return;
101 262
102 v = (unsigned int *) c->x86_model_id; 263 v = (unsigned int *) c->x86_model_id;
103 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]); 264 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
@@ -116,30 +277,34 @@ int __cpuinit get_model_name(struct cpuinfo_x86 *c)
116 while (q <= &c->x86_model_id[48]) 277 while (q <= &c->x86_model_id[48])
117 *q++ = '\0'; /* Zero-pad the rest */ 278 *q++ = '\0'; /* Zero-pad the rest */
118 } 279 }
119
120 return 1;
121} 280}
122 281
123
124void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c) 282void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
125{ 283{
126 unsigned int n, dummy, ecx, edx, l2size; 284 unsigned int n, dummy, ebx, ecx, edx, l2size;
127 285
128 n = cpuid_eax(0x80000000); 286 n = c->extended_cpuid_level;
129 287
130 if (n >= 0x80000005) { 288 if (n >= 0x80000005) {
131 cpuid(0x80000005, &dummy, &dummy, &ecx, &edx); 289 cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
132 printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n", 290 printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), D cache %dK (%d bytes/line)\n",
133 edx>>24, edx&0xFF, ecx>>24, ecx&0xFF); 291 edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
134 c->x86_cache_size = (ecx>>24)+(edx>>24); 292 c->x86_cache_size = (ecx>>24) + (edx>>24);
293#ifdef CONFIG_X86_64
294 /* On K8 L1 TLB is inclusive, so don't count it */
295 c->x86_tlbsize = 0;
296#endif
135 } 297 }
136 298
137 if (n < 0x80000006) /* Some chips just has a large L1. */ 299 if (n < 0x80000006) /* Some chips just has a large L1. */
138 return; 300 return;
139 301
140 ecx = cpuid_ecx(0x80000006); 302 cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
141 l2size = ecx >> 16; 303 l2size = ecx >> 16;
142 304
305#ifdef CONFIG_X86_64
306 c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
307#else
143 /* do processor-specific cache resizing */ 308 /* do processor-specific cache resizing */
144 if (this_cpu->c_size_cache) 309 if (this_cpu->c_size_cache)
145 l2size = this_cpu->c_size_cache(c, l2size); 310 l2size = this_cpu->c_size_cache(c, l2size);
@@ -150,116 +315,106 @@ void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
150 315
151 if (l2size == 0) 316 if (l2size == 0)
152 return; /* Again, no L2 cache is possible */ 317 return; /* Again, no L2 cache is possible */
318#endif
153 319
154 c->x86_cache_size = l2size; 320 c->x86_cache_size = l2size;
155 321
156 printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n", 322 printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
157 l2size, ecx & 0xFF); 323 l2size, ecx & 0xFF);
158} 324}
159 325
160/* 326void __cpuinit detect_ht(struct cpuinfo_x86 *c)
161 * Naming convention should be: <Name> [(<Codename>)]
162 * This table only is used unless init_<vendor>() below doesn't set it;
163 * in particular, if CPUID levels 0x80000002..4 are supported, this isn't used
164 *
165 */
166
167/* Look up CPU names by table lookup. */
168static char __cpuinit *table_lookup_model(struct cpuinfo_x86 *c)
169{ 327{
170 struct cpu_model_info *info; 328#ifdef CONFIG_X86_HT
329 u32 eax, ebx, ecx, edx;
330 int index_msb, core_bits;
171 331
172 if (c->x86_model >= 16) 332 if (!cpu_has(c, X86_FEATURE_HT))
173 return NULL; /* Range check */ 333 return;
174 334
175 if (!this_cpu) 335 if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
176 return NULL; 336 goto out;
177 337
178 info = this_cpu->c_models; 338 if (cpu_has(c, X86_FEATURE_XTOPOLOGY))
339 return;
179 340
180 while (info && info->family) { 341 cpuid(1, &eax, &ebx, &ecx, &edx);
181 if (info->family == c->x86) 342
182 return info->model_names[c->x86_model]; 343 smp_num_siblings = (ebx & 0xff0000) >> 16;
183 info++; 344
345 if (smp_num_siblings == 1) {
346 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
347 } else if (smp_num_siblings > 1) {
348
349 if (smp_num_siblings > NR_CPUS) {
350 printk(KERN_WARNING "CPU: Unsupported number of siblings %d",
351 smp_num_siblings);
352 smp_num_siblings = 1;
353 return;
354 }
355
356 index_msb = get_count_order(smp_num_siblings);
357#ifdef CONFIG_X86_64
358 c->phys_proc_id = phys_pkg_id(index_msb);
359#else
360 c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb);
361#endif
362
363 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
364
365 index_msb = get_count_order(smp_num_siblings);
366
367 core_bits = get_count_order(c->x86_max_cores);
368
369#ifdef CONFIG_X86_64
370 c->cpu_core_id = phys_pkg_id(index_msb) &
371 ((1 << core_bits) - 1);
372#else
373 c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) &
374 ((1 << core_bits) - 1);
375#endif
184 } 376 }
185 return NULL; /* Not found */
186}
187 377
378out:
379 if ((c->x86_max_cores * smp_num_siblings) > 1) {
380 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
381 c->phys_proc_id);
382 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
383 c->cpu_core_id);
384 }
385#endif
386}
188 387
189static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) 388static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
190{ 389{
191 char *v = c->x86_vendor_id; 390 char *v = c->x86_vendor_id;
192 int i; 391 int i;
193 static int printed; 392 static int printed;
194 393
195 for (i = 0; i < X86_VENDOR_NUM; i++) { 394 for (i = 0; i < X86_VENDOR_NUM; i++) {
196 if (cpu_devs[i]) { 395 if (!cpu_devs[i])
197 if (!strcmp(v, cpu_devs[i]->c_ident[0]) || 396 break;
198 (cpu_devs[i]->c_ident[1] && 397
199 !strcmp(v, cpu_devs[i]->c_ident[1]))) { 398 if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
200 c->x86_vendor = i; 399 (cpu_devs[i]->c_ident[1] &&
201 if (!early) 400 !strcmp(v, cpu_devs[i]->c_ident[1]))) {
202 this_cpu = cpu_devs[i]; 401 this_cpu = cpu_devs[i];
203 return; 402 c->x86_vendor = this_cpu->c_x86_vendor;
204 } 403 return;
205 } 404 }
206 } 405 }
406
207 if (!printed) { 407 if (!printed) {
208 printed++; 408 printed++;
209 printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n"); 409 printk(KERN_ERR "CPU: vendor_id '%s' unknown, using generic init.\n", v);
210 printk(KERN_ERR "CPU: Your system may be unstable.\n"); 410 printk(KERN_ERR "CPU: Your system may be unstable.\n");
211 } 411 }
412
212 c->x86_vendor = X86_VENDOR_UNKNOWN; 413 c->x86_vendor = X86_VENDOR_UNKNOWN;
213 this_cpu = &default_cpu; 414 this_cpu = &default_cpu;
214} 415}
215 416
216 417void __cpuinit cpu_detect(struct cpuinfo_x86 *c)
217static int __init x86_fxsr_setup(char *s)
218{
219 setup_clear_cpu_cap(X86_FEATURE_FXSR);
220 setup_clear_cpu_cap(X86_FEATURE_XMM);
221 return 1;
222}
223__setup("nofxsr", x86_fxsr_setup);
224
225
226static int __init x86_sep_setup(char *s)
227{
228 setup_clear_cpu_cap(X86_FEATURE_SEP);
229 return 1;
230}
231__setup("nosep", x86_sep_setup);
232
233
234/* Standard macro to see if a specific flag is changeable */
235static inline int flag_is_changeable_p(u32 flag)
236{
237 u32 f1, f2;
238
239 asm("pushfl\n\t"
240 "pushfl\n\t"
241 "popl %0\n\t"
242 "movl %0,%1\n\t"
243 "xorl %2,%0\n\t"
244 "pushl %0\n\t"
245 "popfl\n\t"
246 "pushfl\n\t"
247 "popl %0\n\t"
248 "popfl\n\t"
249 : "=&r" (f1), "=&r" (f2)
250 : "ir" (flag));
251
252 return ((f1^f2) & flag) != 0;
253}
254
255
256/* Probe for the CPUID instruction */
257static int __cpuinit have_cpuid_p(void)
258{
259 return flag_is_changeable_p(X86_EFLAGS_ID);
260}
261
262void __init cpu_detect(struct cpuinfo_x86 *c)
263{ 418{
264 /* Get vendor name */ 419 /* Get vendor name */
265 cpuid(0x00000000, (unsigned int *)&c->cpuid_level, 420 cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
@@ -268,50 +423,87 @@ void __init cpu_detect(struct cpuinfo_x86 *c)
268 (unsigned int *)&c->x86_vendor_id[4]); 423 (unsigned int *)&c->x86_vendor_id[4]);
269 424
270 c->x86 = 4; 425 c->x86 = 4;
426 /* Intel-defined flags: level 0x00000001 */
271 if (c->cpuid_level >= 0x00000001) { 427 if (c->cpuid_level >= 0x00000001) {
272 u32 junk, tfms, cap0, misc; 428 u32 junk, tfms, cap0, misc;
273 cpuid(0x00000001, &tfms, &misc, &junk, &cap0); 429 cpuid(0x00000001, &tfms, &misc, &junk, &cap0);
274 c->x86 = (tfms >> 8) & 15; 430 c->x86 = (tfms >> 8) & 0xf;
275 c->x86_model = (tfms >> 4) & 15; 431 c->x86_model = (tfms >> 4) & 0xf;
432 c->x86_mask = tfms & 0xf;
276 if (c->x86 == 0xf) 433 if (c->x86 == 0xf)
277 c->x86 += (tfms >> 20) & 0xff; 434 c->x86 += (tfms >> 20) & 0xff;
278 if (c->x86 >= 0x6) 435 if (c->x86 >= 0x6)
279 c->x86_model += ((tfms >> 16) & 0xF) << 4; 436 c->x86_model += ((tfms >> 16) & 0xf) << 4;
280 c->x86_mask = tfms & 15;
281 if (cap0 & (1<<19)) { 437 if (cap0 & (1<<19)) {
282 c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
283 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8; 438 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
439 c->x86_cache_alignment = c->x86_clflush_size;
284 } 440 }
285 } 441 }
286} 442}
287static void __cpuinit early_get_cap(struct cpuinfo_x86 *c) 443
444static void __cpuinit get_cpu_cap(struct cpuinfo_x86 *c)
288{ 445{
289 u32 tfms, xlvl; 446 u32 tfms, xlvl;
290 unsigned int ebx; 447 u32 ebx;
291 448
292 memset(&c->x86_capability, 0, sizeof c->x86_capability); 449 /* Intel-defined flags: level 0x00000001 */
293 if (have_cpuid_p()) { 450 if (c->cpuid_level >= 0x00000001) {
294 /* Intel-defined flags: level 0x00000001 */ 451 u32 capability, excap;
295 if (c->cpuid_level >= 0x00000001) { 452 cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
296 u32 capability, excap; 453 c->x86_capability[0] = capability;
297 cpuid(0x00000001, &tfms, &ebx, &excap, &capability); 454 c->x86_capability[4] = excap;
298 c->x86_capability[0] = capability; 455 }
299 c->x86_capability[4] = excap;
300 }
301 456
302 /* AMD-defined flags: level 0x80000001 */ 457 /* AMD-defined flags: level 0x80000001 */
303 xlvl = cpuid_eax(0x80000000); 458 xlvl = cpuid_eax(0x80000000);
304 if ((xlvl & 0xffff0000) == 0x80000000) { 459 c->extended_cpuid_level = xlvl;
305 if (xlvl >= 0x80000001) { 460 if ((xlvl & 0xffff0000) == 0x80000000) {
306 c->x86_capability[1] = cpuid_edx(0x80000001); 461 if (xlvl >= 0x80000001) {
307 c->x86_capability[6] = cpuid_ecx(0x80000001); 462 c->x86_capability[1] = cpuid_edx(0x80000001);
308 } 463 c->x86_capability[6] = cpuid_ecx(0x80000001);
309 } 464 }
465 }
310 466
467#ifdef CONFIG_X86_64
468 if (c->extended_cpuid_level >= 0x80000008) {
469 u32 eax = cpuid_eax(0x80000008);
470
471 c->x86_virt_bits = (eax >> 8) & 0xff;
472 c->x86_phys_bits = eax & 0xff;
311 } 473 }
474#endif
475
476 if (c->extended_cpuid_level >= 0x80000007)
477 c->x86_power = cpuid_edx(0x80000007);
312 478
313} 479}
314 480
481static void __cpuinit identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
482{
483#ifdef CONFIG_X86_32
484 int i;
485
486 /*
487 * First of all, decide if this is a 486 or higher
488 * It's a 486 if we can modify the AC flag
489 */
490 if (flag_is_changeable_p(X86_EFLAGS_AC))
491 c->x86 = 4;
492 else
493 c->x86 = 3;
494
495 for (i = 0; i < X86_VENDOR_NUM; i++)
496 if (cpu_devs[i] && cpu_devs[i]->c_identify) {
497 c->x86_vendor_id[0] = 0;
498 cpu_devs[i]->c_identify(c);
499 if (c->x86_vendor_id[0]) {
500 get_cpu_vendor(c);
501 break;
502 }
503 }
504#endif
505}
506
315/* 507/*
316 * Do minimum CPU detection early. 508 * Do minimum CPU detection early.
317 * Fields really needed: vendor, cpuid_level, family, model, mask, 509 * Fields really needed: vendor, cpuid_level, family, model, mask,
@@ -321,25 +513,61 @@ static void __cpuinit early_get_cap(struct cpuinfo_x86 *c)
321 * WARNING: this function is only called on the BP. Don't add code here 513 * WARNING: this function is only called on the BP. Don't add code here
322 * that is supposed to run on all CPUs. 514 * that is supposed to run on all CPUs.
323 */ 515 */
324static void __init early_cpu_detect(void) 516static void __init early_identify_cpu(struct cpuinfo_x86 *c)
325{ 517{
326 struct cpuinfo_x86 *c = &boot_cpu_data; 518#ifdef CONFIG_X86_64
327 519 c->x86_clflush_size = 64;
328 c->x86_cache_alignment = 32; 520#else
329 c->x86_clflush_size = 32; 521 c->x86_clflush_size = 32;
522#endif
523 c->x86_cache_alignment = c->x86_clflush_size;
524
525 memset(&c->x86_capability, 0, sizeof c->x86_capability);
526 c->extended_cpuid_level = 0;
330 527
331 if (!have_cpuid_p()) 528 if (!have_cpuid_p())
529 identify_cpu_without_cpuid(c);
530
531 /* cyrix could have cpuid enabled via c_identify()*/
532 if (!have_cpuid_p())
332 return; 533 return;
333 534
334 cpu_detect(c); 535 cpu_detect(c);
335 536
336 get_cpu_vendor(c, 1); 537 get_cpu_vendor(c);
337 538
338 early_get_cap(c); 539 get_cpu_cap(c);
339 540
340 if (c->x86_vendor != X86_VENDOR_UNKNOWN && 541 if (this_cpu->c_early_init)
341 cpu_devs[c->x86_vendor]->c_early_init) 542 this_cpu->c_early_init(c);
342 cpu_devs[c->x86_vendor]->c_early_init(c); 543
544 validate_pat_support(c);
545}
546
547void __init early_cpu_init(void)
548{
549 struct cpu_dev **cdev;
550 int count = 0;
551
552 printk("KERNEL supported cpus:\n");
553 for (cdev = __x86_cpu_dev_start; cdev < __x86_cpu_dev_end; cdev++) {
554 struct cpu_dev *cpudev = *cdev;
555 unsigned int j;
556
557 if (count >= X86_VENDOR_NUM)
558 break;
559 cpu_devs[count] = cpudev;
560 count++;
561
562 for (j = 0; j < 2; j++) {
563 if (!cpudev->c_ident[j])
564 continue;
565 printk(" %s %s\n", cpudev->c_vendor,
566 cpudev->c_ident[j]);
567 }
568 }
569
570 early_identify_cpu(&boot_cpu_data);
343} 571}
344 572
345/* 573/*
@@ -357,86 +585,41 @@ static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
357 585
358static void __cpuinit generic_identify(struct cpuinfo_x86 *c) 586static void __cpuinit generic_identify(struct cpuinfo_x86 *c)
359{ 587{
360 u32 tfms, xlvl; 588 c->extended_cpuid_level = 0;
361 unsigned int ebx;
362
363 if (have_cpuid_p()) {
364 /* Get vendor name */
365 cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
366 (unsigned int *)&c->x86_vendor_id[0],
367 (unsigned int *)&c->x86_vendor_id[8],
368 (unsigned int *)&c->x86_vendor_id[4]);
369
370 get_cpu_vendor(c, 0);
371 /* Initialize the standard set of capabilities */
372 /* Note that the vendor-specific code below might override */
373 /* Intel-defined flags: level 0x00000001 */
374 if (c->cpuid_level >= 0x00000001) {
375 u32 capability, excap;
376 cpuid(0x00000001, &tfms, &ebx, &excap, &capability);
377 c->x86_capability[0] = capability;
378 c->x86_capability[4] = excap;
379 c->x86 = (tfms >> 8) & 15;
380 c->x86_model = (tfms >> 4) & 15;
381 if (c->x86 == 0xf)
382 c->x86 += (tfms >> 20) & 0xff;
383 if (c->x86 >= 0x6)
384 c->x86_model += ((tfms >> 16) & 0xF) << 4;
385 c->x86_mask = tfms & 15;
386 c->initial_apicid = (ebx >> 24) & 0xFF;
387#ifdef CONFIG_X86_HT
388 c->apicid = phys_pkg_id(c->initial_apicid, 0);
389 c->phys_proc_id = c->initial_apicid;
390#else
391 c->apicid = c->initial_apicid;
392#endif
393 if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
394 c->x86_clflush_size = ((ebx >> 8) & 0xff) * 8;
395 } else {
396 /* Have CPUID level 0 only - unheard of */
397 c->x86 = 4;
398 }
399 589
400 /* AMD-defined flags: level 0x80000001 */ 590 if (!have_cpuid_p())
401 xlvl = cpuid_eax(0x80000000); 591 identify_cpu_without_cpuid(c);
402 if ((xlvl & 0xffff0000) == 0x80000000) {
403 if (xlvl >= 0x80000001) {
404 c->x86_capability[1] = cpuid_edx(0x80000001);
405 c->x86_capability[6] = cpuid_ecx(0x80000001);
406 }
407 if (xlvl >= 0x80000004)
408 get_model_name(c); /* Default name */
409 }
410 592
411 init_scattered_cpuid_features(c); 593 /* cyrix could have cpuid enabled via c_identify()*/
412 detect_nopl(c); 594 if (!have_cpuid_p())
413 } 595 return;
414}
415 596
416static void __cpuinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) 597 cpu_detect(c);
417{
418 if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr) {
419 /* Disable processor serial number */
420 unsigned long lo, hi;
421 rdmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
422 lo |= 0x200000;
423 wrmsr(MSR_IA32_BBL_CR_CTL, lo, hi);
424 printk(KERN_NOTICE "CPU serial number disabled.\n");
425 clear_cpu_cap(c, X86_FEATURE_PN);
426 598
427 /* Disabling the serial number may affect the cpuid level */ 599 get_cpu_vendor(c);
428 c->cpuid_level = cpuid_eax(0);
429 }
430}
431 600
432static int __init x86_serial_nr_setup(char *s) 601 get_cpu_cap(c);
433{
434 disable_x86_serial_nr = 0;
435 return 1;
436}
437__setup("serialnumber", x86_serial_nr_setup);
438 602
603 if (c->cpuid_level >= 0x00000001) {
604 c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xFF;
605#ifdef CONFIG_X86_32
606# ifdef CONFIG_X86_HT
607 c->apicid = phys_pkg_id(c->initial_apicid, 0);
608# else
609 c->apicid = c->initial_apicid;
610# endif
611#endif
439 612
613#ifdef CONFIG_X86_HT
614 c->phys_proc_id = c->initial_apicid;
615#endif
616 }
617
618 get_model_name(c); /* Default name */
619
620 init_scattered_cpuid_features(c);
621 detect_nopl(c);
622}
440 623
441/* 624/*
442 * This does the hard work of actually picking apart the CPU stuff... 625 * This does the hard work of actually picking apart the CPU stuff...
@@ -448,30 +631,29 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
448 c->loops_per_jiffy = loops_per_jiffy; 631 c->loops_per_jiffy = loops_per_jiffy;
449 c->x86_cache_size = -1; 632 c->x86_cache_size = -1;
450 c->x86_vendor = X86_VENDOR_UNKNOWN; 633 c->x86_vendor = X86_VENDOR_UNKNOWN;
451 c->cpuid_level = -1; /* CPUID not detected */
452 c->x86_model = c->x86_mask = 0; /* So far unknown... */ 634 c->x86_model = c->x86_mask = 0; /* So far unknown... */
453 c->x86_vendor_id[0] = '\0'; /* Unset */ 635 c->x86_vendor_id[0] = '\0'; /* Unset */
454 c->x86_model_id[0] = '\0'; /* Unset */ 636 c->x86_model_id[0] = '\0'; /* Unset */
455 c->x86_max_cores = 1; 637 c->x86_max_cores = 1;
638 c->x86_coreid_bits = 0;
639#ifdef CONFIG_X86_64
640 c->x86_clflush_size = 64;
641#else
642 c->cpuid_level = -1; /* CPUID not detected */
456 c->x86_clflush_size = 32; 643 c->x86_clflush_size = 32;
644#endif
645 c->x86_cache_alignment = c->x86_clflush_size;
457 memset(&c->x86_capability, 0, sizeof c->x86_capability); 646 memset(&c->x86_capability, 0, sizeof c->x86_capability);
458 647
459 if (!have_cpuid_p()) {
460 /*
461 * First of all, decide if this is a 486 or higher
462 * It's a 486 if we can modify the AC flag
463 */
464 if (flag_is_changeable_p(X86_EFLAGS_AC))
465 c->x86 = 4;
466 else
467 c->x86 = 3;
468 }
469
470 generic_identify(c); 648 generic_identify(c);
471 649
472 if (this_cpu->c_identify) 650 if (this_cpu->c_identify)
473 this_cpu->c_identify(c); 651 this_cpu->c_identify(c);
474 652
653#ifdef CONFIG_X86_64
654 c->apicid = phys_pkg_id(0);
655#endif
656
475 /* 657 /*
476 * Vendor-specific initialization. In this section we 658 * Vendor-specific initialization. In this section we
477 * canonicalize the feature flags, meaning if there are 659 * canonicalize the feature flags, meaning if there are
@@ -505,6 +687,10 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
505 c->x86, c->x86_model); 687 c->x86, c->x86_model);
506 } 688 }
507 689
690#ifdef CONFIG_X86_64
691 detect_ht(c);
692#endif
693
508 /* 694 /*
509 * On SMP, boot_cpu_data holds the common feature set between 695 * On SMP, boot_cpu_data holds the common feature set between
510 * all CPUs; so make sure that we indicate which features are 696 * all CPUs; so make sure that we indicate which features are
@@ -513,7 +699,7 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
513 */ 699 */
514 if (c != &boot_cpu_data) { 700 if (c != &boot_cpu_data) {
515 /* AND the already accumulated flags with these */ 701 /* AND the already accumulated flags with these */
516 for (i = 0 ; i < NCAPINTS ; i++) 702 for (i = 0; i < NCAPINTS; i++)
517 boot_cpu_data.x86_capability[i] &= c->x86_capability[i]; 703 boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
518 } 704 }
519 705
@@ -521,72 +707,79 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
521 for (i = 0; i < NCAPINTS; i++) 707 for (i = 0; i < NCAPINTS; i++)
522 c->x86_capability[i] &= ~cleared_cpu_caps[i]; 708 c->x86_capability[i] &= ~cleared_cpu_caps[i];
523 709
710#ifdef CONFIG_X86_MCE
524 /* Init Machine Check Exception if available. */ 711 /* Init Machine Check Exception if available. */
525 mcheck_init(c); 712 mcheck_init(c);
713#endif
526 714
527 select_idle_routine(c); 715 select_idle_routine(c);
716
717#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
718 numa_add_cpu(smp_processor_id());
719#endif
528} 720}
529 721
530void __init identify_boot_cpu(void) 722void __init identify_boot_cpu(void)
531{ 723{
532 identify_cpu(&boot_cpu_data); 724 identify_cpu(&boot_cpu_data);
725#ifdef CONFIG_X86_32
533 sysenter_setup(); 726 sysenter_setup();
534 enable_sep_cpu(); 727 enable_sep_cpu();
728#endif
535} 729}
536 730
537void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c) 731void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
538{ 732{
539 BUG_ON(c == &boot_cpu_data); 733 BUG_ON(c == &boot_cpu_data);
540 identify_cpu(c); 734 identify_cpu(c);
735#ifdef CONFIG_X86_32
541 enable_sep_cpu(); 736 enable_sep_cpu();
737#endif
542 mtrr_ap_init(); 738 mtrr_ap_init();
543} 739}
544 740
545#ifdef CONFIG_X86_HT 741struct msr_range {
546void __cpuinit detect_ht(struct cpuinfo_x86 *c) 742 unsigned min;
547{ 743 unsigned max;
548 u32 eax, ebx, ecx, edx; 744};
549 int index_msb, core_bits;
550
551 cpuid(1, &eax, &ebx, &ecx, &edx);
552
553 if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
554 return;
555
556 smp_num_siblings = (ebx & 0xff0000) >> 16;
557 745
558 if (smp_num_siblings == 1) { 746static struct msr_range msr_range_array[] __cpuinitdata = {
559 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); 747 { 0x00000000, 0x00000418},
560 } else if (smp_num_siblings > 1) { 748 { 0xc0000000, 0xc000040b},
749 { 0xc0010000, 0xc0010142},
750 { 0xc0011000, 0xc001103b},
751};
561 752
562 if (smp_num_siblings > NR_CPUS) { 753static void __cpuinit print_cpu_msr(void)
563 printk(KERN_WARNING "CPU: Unsupported number of the " 754{
564 "siblings %d", smp_num_siblings); 755 unsigned index;
565 smp_num_siblings = 1; 756 u64 val;
566 return; 757 int i;
758 unsigned index_min, index_max;
759
760 for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
761 index_min = msr_range_array[i].min;
762 index_max = msr_range_array[i].max;
763 for (index = index_min; index < index_max; index++) {
764 if (rdmsrl_amd_safe(index, &val))
765 continue;
766 printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
567 } 767 }
768 }
769}
568 770
569 index_msb = get_count_order(smp_num_siblings); 771static int show_msr __cpuinitdata;
570 c->phys_proc_id = phys_pkg_id(c->initial_apicid, index_msb); 772static __init int setup_show_msr(char *arg)
571 773{
572 printk(KERN_INFO "CPU: Physical Processor ID: %d\n", 774 int num;
573 c->phys_proc_id);
574
575 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
576
577 index_msb = get_count_order(smp_num_siblings) ;
578 775
579 core_bits = get_count_order(c->x86_max_cores); 776 get_option(&arg, &num);
580 777
581 c->cpu_core_id = phys_pkg_id(c->initial_apicid, index_msb) & 778 if (num > 0)
582 ((1 << core_bits) - 1); 779 show_msr = num;
583 780 return 1;
584 if (c->x86_max_cores > 1)
585 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
586 c->cpu_core_id);
587 }
588} 781}
589#endif 782__setup("show_msr=", setup_show_msr);
590 783
591static __init int setup_noclflush(char *arg) 784static __init int setup_noclflush(char *arg)
592{ 785{
@@ -605,17 +798,25 @@ void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
605 vendor = c->x86_vendor_id; 798 vendor = c->x86_vendor_id;
606 799
607 if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor))) 800 if (vendor && strncmp(c->x86_model_id, vendor, strlen(vendor)))
608 printk("%s ", vendor); 801 printk(KERN_CONT "%s ", vendor);
609 802
610 if (!c->x86_model_id[0]) 803 if (c->x86_model_id[0])
611 printk("%d86", c->x86); 804 printk(KERN_CONT "%s", c->x86_model_id);
612 else 805 else
613 printk("%s", c->x86_model_id); 806 printk(KERN_CONT "%d86", c->x86);
614 807
615 if (c->x86_mask || c->cpuid_level >= 0) 808 if (c->x86_mask || c->cpuid_level >= 0)
616 printk(" stepping %02x\n", c->x86_mask); 809 printk(KERN_CONT " stepping %02x\n", c->x86_mask);
617 else 810 else
618 printk("\n"); 811 printk(KERN_CONT "\n");
812
813#ifdef CONFIG_SMP
814 if (c->cpu_index < show_msr)
815 print_cpu_msr();
816#else
817 if (show_msr)
818 print_cpu_msr();
819#endif
619} 820}
620 821
621static __init int setup_disablecpuid(char *arg) 822static __init int setup_disablecpuid(char *arg)
@@ -631,19 +832,89 @@ __setup("clearcpuid=", setup_disablecpuid);
631 832
632cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE; 833cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
633 834
634void __init early_cpu_init(void) 835#ifdef CONFIG_X86_64
836struct x8664_pda **_cpu_pda __read_mostly;
837EXPORT_SYMBOL(_cpu_pda);
838
839struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
840
841char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
842
843void __cpuinit pda_init(int cpu)
844{
845 struct x8664_pda *pda = cpu_pda(cpu);
846
847 /* Setup up data that may be needed in __get_free_pages early */
848 loadsegment(fs, 0);
849 loadsegment(gs, 0);
850 /* Memory clobbers used to order PDA accessed */
851 mb();
852 wrmsrl(MSR_GS_BASE, pda);
853 mb();
854
855 pda->cpunumber = cpu;
856 pda->irqcount = -1;
857 pda->kernelstack = (unsigned long)stack_thread_info() -
858 PDA_STACKOFFSET + THREAD_SIZE;
859 pda->active_mm = &init_mm;
860 pda->mmu_state = 0;
861
862 if (cpu == 0) {
863 /* others are initialized in smpboot.c */
864 pda->pcurrent = &init_task;
865 pda->irqstackptr = boot_cpu_stack;
866 pda->irqstackptr += IRQSTACKSIZE - 64;
867 } else {
868 if (!pda->irqstackptr) {
869 pda->irqstackptr = (char *)
870 __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
871 if (!pda->irqstackptr)
872 panic("cannot allocate irqstack for cpu %d",
873 cpu);
874 pda->irqstackptr += IRQSTACKSIZE - 64;
875 }
876
877 if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
878 pda->nodenumber = cpu_to_node(cpu);
879 }
880}
881
882char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
883 DEBUG_STKSZ] __page_aligned_bss;
884
885extern asmlinkage void ignore_sysret(void);
886
887/* May not be marked __init: used by software suspend */
888void syscall_init(void)
635{ 889{
636 struct cpu_vendor_dev *cvdev; 890 /*
891 * LSTAR and STAR live in a bit strange symbiosis.
892 * They both write to the same internal register. STAR allows to
893 * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
894 */
895 wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
896 wrmsrl(MSR_LSTAR, system_call);
897 wrmsrl(MSR_CSTAR, ignore_sysret);
637 898
638 for (cvdev = __x86cpuvendor_start ; 899#ifdef CONFIG_IA32_EMULATION
639 cvdev < __x86cpuvendor_end ; 900 syscall32_cpu_init();
640 cvdev++) 901#endif
641 cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
642 902
643 early_cpu_detect(); 903 /* Flags to clear on syscall */
644 validate_pat_support(&boot_cpu_data); 904 wrmsrl(MSR_SYSCALL_MASK,
905 X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
645} 906}
646 907
908unsigned long kernel_eflags;
909
910/*
911 * Copies of the original ist values from the tss are only accessed during
912 * debugging, no special alignment required.
913 */
914DEFINE_PER_CPU(struct orig_ist, orig_ist);
915
916#else
917
647/* Make sure %fs is initialized properly in idle threads */ 918/* Make sure %fs is initialized properly in idle threads */
648struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs) 919struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
649{ 920{
@@ -651,25 +922,136 @@ struct pt_regs * __cpuinit idle_regs(struct pt_regs *regs)
651 regs->fs = __KERNEL_PERCPU; 922 regs->fs = __KERNEL_PERCPU;
652 return regs; 923 return regs;
653} 924}
654 925#endif
655/* Current gdt points %fs at the "master" per-cpu area: after this,
656 * it's on the real one. */
657void switch_to_new_gdt(void)
658{
659 struct desc_ptr gdt_descr;
660
661 gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
662 gdt_descr.size = GDT_SIZE - 1;
663 load_gdt(&gdt_descr);
664 asm("mov %0, %%fs" : : "r" (__KERNEL_PERCPU) : "memory");
665}
666 926
667/* 927/*
668 * cpu_init() initializes state that is per-CPU. Some data is already 928 * cpu_init() initializes state that is per-CPU. Some data is already
669 * initialized (naturally) in the bootstrap process, such as the GDT 929 * initialized (naturally) in the bootstrap process, such as the GDT
670 * and IDT. We reload them nevertheless, this function acts as a 930 * and IDT. We reload them nevertheless, this function acts as a
671 * 'CPU state barrier', nothing should get across. 931 * 'CPU state barrier', nothing should get across.
932 * A lot of state is already set up in PDA init for 64 bit
672 */ 933 */
934#ifdef CONFIG_X86_64
935void __cpuinit cpu_init(void)
936{
937 int cpu = stack_smp_processor_id();
938 struct tss_struct *t = &per_cpu(init_tss, cpu);
939 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
940 unsigned long v;
941 char *estacks = NULL;
942 struct task_struct *me;
943 int i;
944
945 /* CPU 0 is initialised in head64.c */
946 if (cpu != 0)
947 pda_init(cpu);
948 else
949 estacks = boot_exception_stacks;
950
951 me = current;
952
953 if (cpu_test_and_set(cpu, cpu_initialized))
954 panic("CPU#%d already initialized!\n", cpu);
955
956 printk(KERN_INFO "Initializing CPU#%d\n", cpu);
957
958 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
959
960 /*
961 * Initialize the per-CPU GDT with the boot GDT,
962 * and set up the GDT descriptor:
963 */
964
965 switch_to_new_gdt();
966 load_idt((const struct desc_ptr *)&idt_descr);
967
968 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
969 syscall_init();
970
971 wrmsrl(MSR_FS_BASE, 0);
972 wrmsrl(MSR_KERNEL_GS_BASE, 0);
973 barrier();
974
975 check_efer();
976 if (cpu != 0 && x2apic)
977 enable_x2apic();
978
979 /*
980 * set up and load the per-CPU TSS
981 */
982 if (!orig_ist->ist[0]) {
983 static const unsigned int order[N_EXCEPTION_STACKS] = {
984 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
985 [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
986 };
987 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
988 if (cpu) {
989 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
990 if (!estacks)
991 panic("Cannot allocate exception "
992 "stack %ld %d\n", v, cpu);
993 }
994 estacks += PAGE_SIZE << order[v];
995 orig_ist->ist[v] = t->x86_tss.ist[v] =
996 (unsigned long)estacks;
997 }
998 }
999
1000 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
1001 /*
1002 * <= is required because the CPU will access up to
1003 * 8 bits beyond the end of the IO permission bitmap.
1004 */
1005 for (i = 0; i <= IO_BITMAP_LONGS; i++)
1006 t->io_bitmap[i] = ~0UL;
1007
1008 atomic_inc(&init_mm.mm_count);
1009 me->active_mm = &init_mm;
1010 if (me->mm)
1011 BUG();
1012 enter_lazy_tlb(&init_mm, me);
1013
1014 load_sp0(t, &current->thread);
1015 set_tss_desc(cpu, t);
1016 load_TR_desc();
1017 load_LDT(&init_mm.context);
1018
1019#ifdef CONFIG_KGDB
1020 /*
1021 * If the kgdb is connected no debug regs should be altered. This
1022 * is only applicable when KGDB and a KGDB I/O module are built
1023 * into the kernel and you are using early debugging with
1024 * kgdbwait. KGDB will control the kernel HW breakpoint registers.
1025 */
1026 if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
1027 arch_kgdb_ops.correct_hw_break();
1028 else {
1029#endif
1030 /*
1031 * Clear all 6 debug registers:
1032 */
1033
1034 set_debugreg(0UL, 0);
1035 set_debugreg(0UL, 1);
1036 set_debugreg(0UL, 2);
1037 set_debugreg(0UL, 3);
1038 set_debugreg(0UL, 6);
1039 set_debugreg(0UL, 7);
1040#ifdef CONFIG_KGDB
1041 /* If the kgdb is connected no debug regs should be altered. */
1042 }
1043#endif
1044
1045 fpu_init();
1046
1047 raw_local_save_flags(kernel_eflags);
1048
1049 if (is_uv_system())
1050 uv_cpu_init();
1051}
1052
1053#else
1054
673void __cpuinit cpu_init(void) 1055void __cpuinit cpu_init(void)
674{ 1056{
675 int cpu = smp_processor_id(); 1057 int cpu = smp_processor_id();
@@ -723,9 +1105,20 @@ void __cpuinit cpu_init(void)
723 /* 1105 /*
724 * Force FPU initialization: 1106 * Force FPU initialization:
725 */ 1107 */
726 current_thread_info()->status = 0; 1108 if (cpu_has_xsave)
1109 current_thread_info()->status = TS_XSAVE;
1110 else
1111 current_thread_info()->status = 0;
727 clear_used_math(); 1112 clear_used_math();
728 mxcsr_feature_mask_init(); 1113 mxcsr_feature_mask_init();
1114
1115 /*
1116 * Boot processor to setup the FP and extended state context info.
1117 */
1118 if (!smp_processor_id())
1119 init_thread_xstate();
1120
1121 xsave_init();
729} 1122}
730 1123
731#ifdef CONFIG_HOTPLUG_CPU 1124#ifdef CONFIG_HOTPLUG_CPU
@@ -739,3 +1132,5 @@ void __cpuinit cpu_uninit(void)
739 per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm; 1132 per_cpu(cpu_tlbstate, cpu).active_mm = &init_mm;
740} 1133}
741#endif 1134#endif
1135
1136#endif
diff --git a/arch/x86/kernel/cpu/common_64.c b/arch/x86/kernel/cpu/common_64.c
deleted file mode 100644
index 305b465889b0..000000000000
--- a/arch/x86/kernel/cpu/common_64.c
+++ /dev/null
@@ -1,763 +0,0 @@
1#include <linux/init.h>
2#include <linux/kernel.h>
3#include <linux/sched.h>
4#include <linux/string.h>
5#include <linux/bootmem.h>
6#include <linux/bitops.h>
7#include <linux/module.h>
8#include <linux/kgdb.h>
9#include <linux/topology.h>
10#include <linux/delay.h>
11#include <linux/smp.h>
12#include <linux/percpu.h>
13#include <asm/i387.h>
14#include <asm/msr.h>
15#include <asm/io.h>
16#include <asm/linkage.h>
17#include <asm/mmu_context.h>
18#include <asm/mtrr.h>
19#include <asm/mce.h>
20#include <asm/pat.h>
21#include <asm/asm.h>
22#include <asm/numa.h>
23#ifdef CONFIG_X86_LOCAL_APIC
24#include <asm/mpspec.h>
25#include <asm/apic.h>
26#include <mach_apic.h>
27#endif
28#include <asm/pda.h>
29#include <asm/pgtable.h>
30#include <asm/processor.h>
31#include <asm/desc.h>
32#include <asm/atomic.h>
33#include <asm/proto.h>
34#include <asm/sections.h>
35#include <asm/setup.h>
36#include <asm/genapic.h>
37
38#include "cpu.h"
39
40/* We need valid kernel segments for data and code in long mode too
41 * IRET will check the segment types kkeil 2000/10/28
42 * Also sysret mandates a special GDT layout
43 */
44/* The TLS descriptors are currently at a different place compared to i386.
45 Hopefully nobody expects them at a fixed place (Wine?) */
46DEFINE_PER_CPU(struct gdt_page, gdt_page) = { .gdt = {
47 [GDT_ENTRY_KERNEL32_CS] = { { { 0x0000ffff, 0x00cf9b00 } } },
48 [GDT_ENTRY_KERNEL_CS] = { { { 0x0000ffff, 0x00af9b00 } } },
49 [GDT_ENTRY_KERNEL_DS] = { { { 0x0000ffff, 0x00cf9300 } } },
50 [GDT_ENTRY_DEFAULT_USER32_CS] = { { { 0x0000ffff, 0x00cffb00 } } },
51 [GDT_ENTRY_DEFAULT_USER_DS] = { { { 0x0000ffff, 0x00cff300 } } },
52 [GDT_ENTRY_DEFAULT_USER_CS] = { { { 0x0000ffff, 0x00affb00 } } },
53} };
54EXPORT_PER_CPU_SYMBOL_GPL(gdt_page);
55
56__u32 cleared_cpu_caps[NCAPINTS] __cpuinitdata;
57
58/* Current gdt points %fs at the "master" per-cpu area: after this,
59 * it's on the real one. */
60void switch_to_new_gdt(void)
61{
62 struct desc_ptr gdt_descr;
63
64 gdt_descr.address = (long)get_cpu_gdt_table(smp_processor_id());
65 gdt_descr.size = GDT_SIZE - 1;
66 load_gdt(&gdt_descr);
67}
68
69struct cpu_dev *cpu_devs[X86_VENDOR_NUM] = {};
70
71static void __cpuinit default_init(struct cpuinfo_x86 *c)
72{
73 display_cacheinfo(c);
74}
75
76static struct cpu_dev __cpuinitdata default_cpu = {
77 .c_init = default_init,
78 .c_vendor = "Unknown",
79};
80static struct cpu_dev *this_cpu __cpuinitdata = &default_cpu;
81
82int __cpuinit get_model_name(struct cpuinfo_x86 *c)
83{
84 unsigned int *v;
85
86 if (c->extended_cpuid_level < 0x80000004)
87 return 0;
88
89 v = (unsigned int *) c->x86_model_id;
90 cpuid(0x80000002, &v[0], &v[1], &v[2], &v[3]);
91 cpuid(0x80000003, &v[4], &v[5], &v[6], &v[7]);
92 cpuid(0x80000004, &v[8], &v[9], &v[10], &v[11]);
93 c->x86_model_id[48] = 0;
94 return 1;
95}
96
97
98void __cpuinit display_cacheinfo(struct cpuinfo_x86 *c)
99{
100 unsigned int n, dummy, ebx, ecx, edx;
101
102 n = c->extended_cpuid_level;
103
104 if (n >= 0x80000005) {
105 cpuid(0x80000005, &dummy, &ebx, &ecx, &edx);
106 printk(KERN_INFO "CPU: L1 I Cache: %dK (%d bytes/line), "
107 "D cache %dK (%d bytes/line)\n",
108 edx>>24, edx&0xFF, ecx>>24, ecx&0xFF);
109 c->x86_cache_size = (ecx>>24) + (edx>>24);
110 /* On K8 L1 TLB is inclusive, so don't count it */
111 c->x86_tlbsize = 0;
112 }
113
114 if (n >= 0x80000006) {
115 cpuid(0x80000006, &dummy, &ebx, &ecx, &edx);
116 ecx = cpuid_ecx(0x80000006);
117 c->x86_cache_size = ecx >> 16;
118 c->x86_tlbsize += ((ebx >> 16) & 0xfff) + (ebx & 0xfff);
119
120 printk(KERN_INFO "CPU: L2 Cache: %dK (%d bytes/line)\n",
121 c->x86_cache_size, ecx & 0xFF);
122 }
123}
124
125void __cpuinit detect_ht(struct cpuinfo_x86 *c)
126{
127#ifdef CONFIG_SMP
128 u32 eax, ebx, ecx, edx;
129 int index_msb, core_bits;
130
131 cpuid(1, &eax, &ebx, &ecx, &edx);
132
133
134 if (!cpu_has(c, X86_FEATURE_HT))
135 return;
136 if (cpu_has(c, X86_FEATURE_CMP_LEGACY))
137 goto out;
138
139 smp_num_siblings = (ebx & 0xff0000) >> 16;
140
141 if (smp_num_siblings == 1) {
142 printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
143 } else if (smp_num_siblings > 1) {
144
145 if (smp_num_siblings > NR_CPUS) {
146 printk(KERN_WARNING "CPU: Unsupported number of "
147 "siblings %d", smp_num_siblings);
148 smp_num_siblings = 1;
149 return;
150 }
151
152 index_msb = get_count_order(smp_num_siblings);
153 c->phys_proc_id = phys_pkg_id(index_msb);
154
155 smp_num_siblings = smp_num_siblings / c->x86_max_cores;
156
157 index_msb = get_count_order(smp_num_siblings);
158
159 core_bits = get_count_order(c->x86_max_cores);
160
161 c->cpu_core_id = phys_pkg_id(index_msb) &
162 ((1 << core_bits) - 1);
163 }
164out:
165 if ((c->x86_max_cores * smp_num_siblings) > 1) {
166 printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
167 c->phys_proc_id);
168 printk(KERN_INFO "CPU: Processor Core ID: %d\n",
169 c->cpu_core_id);
170 }
171
172#endif
173}
174
175static void __cpuinit get_cpu_vendor(struct cpuinfo_x86 *c)
176{
177 char *v = c->x86_vendor_id;
178 int i;
179 static int printed;
180
181 for (i = 0; i < X86_VENDOR_NUM; i++) {
182 if (cpu_devs[i]) {
183 if (!strcmp(v, cpu_devs[i]->c_ident[0]) ||
184 (cpu_devs[i]->c_ident[1] &&
185 !strcmp(v, cpu_devs[i]->c_ident[1]))) {
186 c->x86_vendor = i;
187 this_cpu = cpu_devs[i];
188 return;
189 }
190 }
191 }
192 if (!printed) {
193 printed++;
194 printk(KERN_ERR "CPU: Vendor unknown, using generic init.\n");
195 printk(KERN_ERR "CPU: Your system may be unstable.\n");
196 }
197 c->x86_vendor = X86_VENDOR_UNKNOWN;
198}
199
200static void __init early_cpu_support_print(void)
201{
202 int i,j;
203 struct cpu_dev *cpu_devx;
204
205 printk("KERNEL supported cpus:\n");
206 for (i = 0; i < X86_VENDOR_NUM; i++) {
207 cpu_devx = cpu_devs[i];
208 if (!cpu_devx)
209 continue;
210 for (j = 0; j < 2; j++) {
211 if (!cpu_devx->c_ident[j])
212 continue;
213 printk(" %s %s\n", cpu_devx->c_vendor,
214 cpu_devx->c_ident[j]);
215 }
216 }
217}
218
219/*
220 * The NOPL instruction is supposed to exist on all CPUs with
221 * family >= 6, unfortunately, that's not true in practice because
222 * of early VIA chips and (more importantly) broken virtualizers that
223 * are not easy to detect. Hence, probe for it based on first
224 * principles.
225 *
226 * Note: no 64-bit chip is known to lack these, but put the code here
227 * for consistency with 32 bits, and to make it utterly trivial to
228 * diagnose the problem should it ever surface.
229 */
230static void __cpuinit detect_nopl(struct cpuinfo_x86 *c)
231{
232 const u32 nopl_signature = 0x888c53b1; /* Random number */
233 u32 has_nopl = nopl_signature;
234
235 clear_cpu_cap(c, X86_FEATURE_NOPL);
236 if (c->x86 >= 6) {
237 asm volatile("\n"
238 "1: .byte 0x0f,0x1f,0xc0\n" /* nopl %eax */
239 "2:\n"
240 " .section .fixup,\"ax\"\n"
241 "3: xor %0,%0\n"
242 " jmp 2b\n"
243 " .previous\n"
244 _ASM_EXTABLE(1b,3b)
245 : "+a" (has_nopl));
246
247 if (has_nopl == nopl_signature)
248 set_cpu_cap(c, X86_FEATURE_NOPL);
249 }
250}
251
252static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c);
253
254void __init early_cpu_init(void)
255{
256 struct cpu_vendor_dev *cvdev;
257
258 for (cvdev = __x86cpuvendor_start ;
259 cvdev < __x86cpuvendor_end ;
260 cvdev++)
261 cpu_devs[cvdev->vendor] = cvdev->cpu_dev;
262 early_cpu_support_print();
263 early_identify_cpu(&boot_cpu_data);
264}
265
266/* Do some early cpuid on the boot CPU to get some parameter that are
267 needed before check_bugs. Everything advanced is in identify_cpu
268 below. */
269static void __cpuinit early_identify_cpu(struct cpuinfo_x86 *c)
270{
271 u32 tfms, xlvl;
272
273 c->loops_per_jiffy = loops_per_jiffy;
274 c->x86_cache_size = -1;
275 c->x86_vendor = X86_VENDOR_UNKNOWN;
276 c->x86_model = c->x86_mask = 0; /* So far unknown... */
277 c->x86_vendor_id[0] = '\0'; /* Unset */
278 c->x86_model_id[0] = '\0'; /* Unset */
279 c->x86_clflush_size = 64;
280 c->x86_cache_alignment = c->x86_clflush_size;
281 c->x86_max_cores = 1;
282 c->x86_coreid_bits = 0;
283 c->extended_cpuid_level = 0;
284 memset(&c->x86_capability, 0, sizeof c->x86_capability);
285
286 /* Get vendor name */
287 cpuid(0x00000000, (unsigned int *)&c->cpuid_level,
288 (unsigned int *)&c->x86_vendor_id[0],
289 (unsigned int *)&c->x86_vendor_id[8],
290 (unsigned int *)&c->x86_vendor_id[4]);
291
292 get_cpu_vendor(c);
293
294 /* Initialize the standard set of capabilities */
295 /* Note that the vendor-specific code below might override */
296
297 /* Intel-defined flags: level 0x00000001 */
298 if (c->cpuid_level >= 0x00000001) {
299 __u32 misc;
300 cpuid(0x00000001, &tfms, &misc, &c->x86_capability[4],
301 &c->x86_capability[0]);
302 c->x86 = (tfms >> 8) & 0xf;
303 c->x86_model = (tfms >> 4) & 0xf;
304 c->x86_mask = tfms & 0xf;
305 if (c->x86 == 0xf)
306 c->x86 += (tfms >> 20) & 0xff;
307 if (c->x86 >= 0x6)
308 c->x86_model += ((tfms >> 16) & 0xF) << 4;
309 if (test_cpu_cap(c, X86_FEATURE_CLFLSH))
310 c->x86_clflush_size = ((misc >> 8) & 0xff) * 8;
311 } else {
312 /* Have CPUID level 0 only - unheard of */
313 c->x86 = 4;
314 }
315
316 c->initial_apicid = (cpuid_ebx(1) >> 24) & 0xff;
317#ifdef CONFIG_SMP
318 c->phys_proc_id = c->initial_apicid;
319#endif
320 /* AMD-defined flags: level 0x80000001 */
321 xlvl = cpuid_eax(0x80000000);
322 c->extended_cpuid_level = xlvl;
323 if ((xlvl & 0xffff0000) == 0x80000000) {
324 if (xlvl >= 0x80000001) {
325 c->x86_capability[1] = cpuid_edx(0x80000001);
326 c->x86_capability[6] = cpuid_ecx(0x80000001);
327 }
328 if (xlvl >= 0x80000004)
329 get_model_name(c); /* Default name */
330 }
331
332 /* Transmeta-defined flags: level 0x80860001 */
333 xlvl = cpuid_eax(0x80860000);
334 if ((xlvl & 0xffff0000) == 0x80860000) {
335 /* Don't set x86_cpuid_level here for now to not confuse. */
336 if (xlvl >= 0x80860001)
337 c->x86_capability[2] = cpuid_edx(0x80860001);
338 }
339
340 if (c->extended_cpuid_level >= 0x80000007)
341 c->x86_power = cpuid_edx(0x80000007);
342
343 if (c->extended_cpuid_level >= 0x80000008) {
344 u32 eax = cpuid_eax(0x80000008);
345
346 c->x86_virt_bits = (eax >> 8) & 0xff;
347 c->x86_phys_bits = eax & 0xff;
348 }
349
350 detect_nopl(c);
351
352 if (c->x86_vendor != X86_VENDOR_UNKNOWN &&
353 cpu_devs[c->x86_vendor]->c_early_init)
354 cpu_devs[c->x86_vendor]->c_early_init(c);
355
356 validate_pat_support(c);
357}
358
359/*
360 * This does the hard work of actually picking apart the CPU stuff...
361 */
362static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
363{
364 int i;
365
366 early_identify_cpu(c);
367
368 init_scattered_cpuid_features(c);
369
370 c->apicid = phys_pkg_id(0);
371
372 /*
373 * Vendor-specific initialization. In this section we
374 * canonicalize the feature flags, meaning if there are
375 * features a certain CPU supports which CPUID doesn't
376 * tell us, CPUID claiming incorrect flags, or other bugs,
377 * we handle them here.
378 *
379 * At the end of this section, c->x86_capability better
380 * indicate the features this CPU genuinely supports!
381 */
382 if (this_cpu->c_init)
383 this_cpu->c_init(c);
384
385 detect_ht(c);
386
387 /*
388 * On SMP, boot_cpu_data holds the common feature set between
389 * all CPUs; so make sure that we indicate which features are
390 * common between the CPUs. The first time this routine gets
391 * executed, c == &boot_cpu_data.
392 */
393 if (c != &boot_cpu_data) {
394 /* AND the already accumulated flags with these */
395 for (i = 0; i < NCAPINTS; i++)
396 boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
397 }
398
399 /* Clear all flags overriden by options */
400 for (i = 0; i < NCAPINTS; i++)
401 c->x86_capability[i] &= ~cleared_cpu_caps[i];
402
403#ifdef CONFIG_X86_MCE
404 mcheck_init(c);
405#endif
406 select_idle_routine(c);
407
408#ifdef CONFIG_NUMA
409 numa_add_cpu(smp_processor_id());
410#endif
411
412}
413
414void __cpuinit identify_boot_cpu(void)
415{
416 identify_cpu(&boot_cpu_data);
417}
418
419void __cpuinit identify_secondary_cpu(struct cpuinfo_x86 *c)
420{
421 BUG_ON(c == &boot_cpu_data);
422 identify_cpu(c);
423 mtrr_ap_init();
424}
425
426static __init int setup_noclflush(char *arg)
427{
428 setup_clear_cpu_cap(X86_FEATURE_CLFLSH);
429 return 1;
430}
431__setup("noclflush", setup_noclflush);
432
433struct msr_range {
434 unsigned min;
435 unsigned max;
436};
437
438static struct msr_range msr_range_array[] __cpuinitdata = {
439 { 0x00000000, 0x00000418},
440 { 0xc0000000, 0xc000040b},
441 { 0xc0010000, 0xc0010142},
442 { 0xc0011000, 0xc001103b},
443};
444
445static void __cpuinit print_cpu_msr(void)
446{
447 unsigned index;
448 u64 val;
449 int i;
450 unsigned index_min, index_max;
451
452 for (i = 0; i < ARRAY_SIZE(msr_range_array); i++) {
453 index_min = msr_range_array[i].min;
454 index_max = msr_range_array[i].max;
455 for (index = index_min; index < index_max; index++) {
456 if (rdmsrl_amd_safe(index, &val))
457 continue;
458 printk(KERN_INFO " MSR%08x: %016llx\n", index, val);
459 }
460 }
461}
462
463static int show_msr __cpuinitdata;
464static __init int setup_show_msr(char *arg)
465{
466 int num;
467
468 get_option(&arg, &num);
469
470 if (num > 0)
471 show_msr = num;
472 return 1;
473}
474__setup("show_msr=", setup_show_msr);
475
476void __cpuinit print_cpu_info(struct cpuinfo_x86 *c)
477{
478 if (c->x86_model_id[0])
479 printk(KERN_CONT "%s", c->x86_model_id);
480
481 if (c->x86_mask || c->cpuid_level >= 0)
482 printk(KERN_CONT " stepping %02x\n", c->x86_mask);
483 else
484 printk(KERN_CONT "\n");
485
486#ifdef CONFIG_SMP
487 if (c->cpu_index < show_msr)
488 print_cpu_msr();
489#else
490 if (show_msr)
491 print_cpu_msr();
492#endif
493}
494
495static __init int setup_disablecpuid(char *arg)
496{
497 int bit;
498 if (get_option(&arg, &bit) && bit < NCAPINTS*32)
499 setup_clear_cpu_cap(bit);
500 else
501 return 0;
502 return 1;
503}
504__setup("clearcpuid=", setup_disablecpuid);
505
506cpumask_t cpu_initialized __cpuinitdata = CPU_MASK_NONE;
507
508struct x8664_pda **_cpu_pda __read_mostly;
509EXPORT_SYMBOL(_cpu_pda);
510
511struct desc_ptr idt_descr = { 256 * 16 - 1, (unsigned long) idt_table };
512
513char boot_cpu_stack[IRQSTACKSIZE] __page_aligned_bss;
514
515unsigned long __supported_pte_mask __read_mostly = ~0UL;
516EXPORT_SYMBOL_GPL(__supported_pte_mask);
517
518static int do_not_nx __cpuinitdata;
519
520/* noexec=on|off
521Control non executable mappings for 64bit processes.
522
523on Enable(default)
524off Disable
525*/
526static int __init nonx_setup(char *str)
527{
528 if (!str)
529 return -EINVAL;
530 if (!strncmp(str, "on", 2)) {
531 __supported_pte_mask |= _PAGE_NX;
532 do_not_nx = 0;
533 } else if (!strncmp(str, "off", 3)) {
534 do_not_nx = 1;
535 __supported_pte_mask &= ~_PAGE_NX;
536 }
537 return 0;
538}
539early_param("noexec", nonx_setup);
540
541int force_personality32;
542
543/* noexec32=on|off
544Control non executable heap for 32bit processes.
545To control the stack too use noexec=off
546
547on PROT_READ does not imply PROT_EXEC for 32bit processes (default)
548off PROT_READ implies PROT_EXEC
549*/
550static int __init nonx32_setup(char *str)
551{
552 if (!strcmp(str, "on"))
553 force_personality32 &= ~READ_IMPLIES_EXEC;
554 else if (!strcmp(str, "off"))
555 force_personality32 |= READ_IMPLIES_EXEC;
556 return 1;
557}
558__setup("noexec32=", nonx32_setup);
559
560void pda_init(int cpu)
561{
562 struct x8664_pda *pda = cpu_pda(cpu);
563
564 /* Setup up data that may be needed in __get_free_pages early */
565 loadsegment(fs, 0);
566 loadsegment(gs, 0);
567 /* Memory clobbers used to order PDA accessed */
568 mb();
569 wrmsrl(MSR_GS_BASE, pda);
570 mb();
571
572 pda->cpunumber = cpu;
573 pda->irqcount = -1;
574 pda->kernelstack = (unsigned long)stack_thread_info() -
575 PDA_STACKOFFSET + THREAD_SIZE;
576 pda->active_mm = &init_mm;
577 pda->mmu_state = 0;
578
579 if (cpu == 0) {
580 /* others are initialized in smpboot.c */
581 pda->pcurrent = &init_task;
582 pda->irqstackptr = boot_cpu_stack;
583 pda->irqstackptr += IRQSTACKSIZE - 64;
584 } else {
585 if (!pda->irqstackptr) {
586 pda->irqstackptr = (char *)
587 __get_free_pages(GFP_ATOMIC, IRQSTACK_ORDER);
588 if (!pda->irqstackptr)
589 panic("cannot allocate irqstack for cpu %d",
590 cpu);
591 pda->irqstackptr += IRQSTACKSIZE - 64;
592 }
593
594 if (pda->nodenumber == 0 && cpu_to_node(cpu) != NUMA_NO_NODE)
595 pda->nodenumber = cpu_to_node(cpu);
596 }
597}
598
599char boot_exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ +
600 DEBUG_STKSZ] __page_aligned_bss;
601
602extern asmlinkage void ignore_sysret(void);
603
604/* May not be marked __init: used by software suspend */
605void syscall_init(void)
606{
607 /*
608 * LSTAR and STAR live in a bit strange symbiosis.
609 * They both write to the same internal register. STAR allows to
610 * set CS/DS but only a 32bit target. LSTAR sets the 64bit rip.
611 */
612 wrmsrl(MSR_STAR, ((u64)__USER32_CS)<<48 | ((u64)__KERNEL_CS)<<32);
613 wrmsrl(MSR_LSTAR, system_call);
614 wrmsrl(MSR_CSTAR, ignore_sysret);
615
616#ifdef CONFIG_IA32_EMULATION
617 syscall32_cpu_init();
618#endif
619
620 /* Flags to clear on syscall */
621 wrmsrl(MSR_SYSCALL_MASK,
622 X86_EFLAGS_TF|X86_EFLAGS_DF|X86_EFLAGS_IF|X86_EFLAGS_IOPL);
623}
624
625void __cpuinit check_efer(void)
626{
627 unsigned long efer;
628
629 rdmsrl(MSR_EFER, efer);
630 if (!(efer & EFER_NX) || do_not_nx)
631 __supported_pte_mask &= ~_PAGE_NX;
632}
633
634unsigned long kernel_eflags;
635
636/*
637 * Copies of the original ist values from the tss are only accessed during
638 * debugging, no special alignment required.
639 */
640DEFINE_PER_CPU(struct orig_ist, orig_ist);
641
642/*
643 * cpu_init() initializes state that is per-CPU. Some data is already
644 * initialized (naturally) in the bootstrap process, such as the GDT
645 * and IDT. We reload them nevertheless, this function acts as a
646 * 'CPU state barrier', nothing should get across.
647 * A lot of state is already set up in PDA init.
648 */
649void __cpuinit cpu_init(void)
650{
651 int cpu = stack_smp_processor_id();
652 struct tss_struct *t = &per_cpu(init_tss, cpu);
653 struct orig_ist *orig_ist = &per_cpu(orig_ist, cpu);
654 unsigned long v;
655 char *estacks = NULL;
656 struct task_struct *me;
657 int i;
658
659 /* CPU 0 is initialised in head64.c */
660 if (cpu != 0)
661 pda_init(cpu);
662 else
663 estacks = boot_exception_stacks;
664
665 me = current;
666
667 if (cpu_test_and_set(cpu, cpu_initialized))
668 panic("CPU#%d already initialized!\n", cpu);
669
670 printk(KERN_INFO "Initializing CPU#%d\n", cpu);
671
672 clear_in_cr4(X86_CR4_VME|X86_CR4_PVI|X86_CR4_TSD|X86_CR4_DE);
673
674 /*
675 * Initialize the per-CPU GDT with the boot GDT,
676 * and set up the GDT descriptor:
677 */
678
679 switch_to_new_gdt();
680 load_idt((const struct desc_ptr *)&idt_descr);
681
682 memset(me->thread.tls_array, 0, GDT_ENTRY_TLS_ENTRIES * 8);
683 syscall_init();
684
685 wrmsrl(MSR_FS_BASE, 0);
686 wrmsrl(MSR_KERNEL_GS_BASE, 0);
687 barrier();
688
689 check_efer();
690
691 /*
692 * set up and load the per-CPU TSS
693 */
694 if (!orig_ist->ist[0]) {
695 static const unsigned int order[N_EXCEPTION_STACKS] = {
696 [0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STACK_ORDER,
697 [DEBUG_STACK - 1] = DEBUG_STACK_ORDER
698 };
699 for (v = 0; v < N_EXCEPTION_STACKS; v++) {
700 if (cpu) {
701 estacks = (char *)__get_free_pages(GFP_ATOMIC, order[v]);
702 if (!estacks)
703 panic("Cannot allocate exception "
704 "stack %ld %d\n", v, cpu);
705 }
706 estacks += PAGE_SIZE << order[v];
707 orig_ist->ist[v] = t->x86_tss.ist[v] =
708 (unsigned long)estacks;
709 }
710 }
711
712 t->x86_tss.io_bitmap_base = offsetof(struct tss_struct, io_bitmap);
713 /*
714 * <= is required because the CPU will access up to
715 * 8 bits beyond the end of the IO permission bitmap.
716 */
717 for (i = 0; i <= IO_BITMAP_LONGS; i++)
718 t->io_bitmap[i] = ~0UL;
719
720 atomic_inc(&init_mm.mm_count);
721 me->active_mm = &init_mm;
722 if (me->mm)
723 BUG();
724 enter_lazy_tlb(&init_mm, me);
725
726 load_sp0(t, &current->thread);
727 set_tss_desc(cpu, t);
728 load_TR_desc();
729 load_LDT(&init_mm.context);
730
731#ifdef CONFIG_KGDB
732 /*
733 * If the kgdb is connected no debug regs should be altered. This
734 * is only applicable when KGDB and a KGDB I/O module are built
735 * into the kernel and you are using early debugging with
736 * kgdbwait. KGDB will control the kernel HW breakpoint registers.
737 */
738 if (kgdb_connected && arch_kgdb_ops.correct_hw_break)
739 arch_kgdb_ops.correct_hw_break();
740 else {
741#endif
742 /*
743 * Clear all 6 debug registers:
744 */
745
746 set_debugreg(0UL, 0);
747 set_debugreg(0UL, 1);
748 set_debugreg(0UL, 2);
749 set_debugreg(0UL, 3);
750 set_debugreg(0UL, 6);
751 set_debugreg(0UL, 7);
752#ifdef CONFIG_KGDB
753 /* If the kgdb is connected no debug regs should be altered. */
754 }
755#endif
756
757 fpu_init();
758
759 raw_local_save_flags(kernel_eflags);
760
761 if (is_uv_system())
762 uv_cpu_init();
763}
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 4d894e8565fe..de4094a39210 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -21,23 +21,16 @@ struct cpu_dev {
21 void (*c_init)(struct cpuinfo_x86 * c); 21 void (*c_init)(struct cpuinfo_x86 * c);
22 void (*c_identify)(struct cpuinfo_x86 * c); 22 void (*c_identify)(struct cpuinfo_x86 * c);
23 unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size); 23 unsigned int (*c_size_cache)(struct cpuinfo_x86 * c, unsigned int size);
24 int c_x86_vendor;
24}; 25};
25 26
26extern struct cpu_dev * cpu_devs [X86_VENDOR_NUM]; 27#define cpu_dev_register(cpu_devX) \
28 static struct cpu_dev *__cpu_dev_##cpu_devX __used \
29 __attribute__((__section__(".x86_cpu_dev.init"))) = \
30 &cpu_devX;
27 31
28struct cpu_vendor_dev { 32extern struct cpu_dev *__x86_cpu_dev_start[], *__x86_cpu_dev_end[];
29 int vendor;
30 struct cpu_dev *cpu_dev;
31};
32
33#define cpu_vendor_dev_register(cpu_vendor_id, cpu_dev) \
34 static struct cpu_vendor_dev __cpu_vendor_dev_##cpu_vendor_id __used \
35 __attribute__((__section__(".x86cpuvendor.init"))) = \
36 { cpu_vendor_id, cpu_dev }
37
38extern struct cpu_vendor_dev __x86cpuvendor_start[], __x86cpuvendor_end[];
39 33
40extern int get_model_name(struct cpuinfo_x86 *c);
41extern void display_cacheinfo(struct cpuinfo_x86 *c); 34extern void display_cacheinfo(struct cpuinfo_x86 *c);
42 35
43#endif 36#endif
diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c
index 898a5a2002ed..ffd0f5ed071a 100644
--- a/arch/x86/kernel/cpu/cyrix.c
+++ b/arch/x86/kernel/cpu/cyrix.c
@@ -121,7 +121,7 @@ static void __cpuinit set_cx86_reorder(void)
121 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ 121 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
122 122
123 /* Load/Store Serialize to mem access disable (=reorder it) */ 123 /* Load/Store Serialize to mem access disable (=reorder it) */
124 setCx86(CX86_PCR0, getCx86(CX86_PCR0) & ~0x80); 124 setCx86_old(CX86_PCR0, getCx86_old(CX86_PCR0) & ~0x80);
125 /* set load/store serialize from 1GB to 4GB */ 125 /* set load/store serialize from 1GB to 4GB */
126 ccr3 |= 0xe0; 126 ccr3 |= 0xe0;
127 setCx86(CX86_CCR3, ccr3); 127 setCx86(CX86_CCR3, ccr3);
@@ -132,11 +132,11 @@ static void __cpuinit set_cx86_memwb(void)
132 printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n"); 132 printk(KERN_INFO "Enable Memory-Write-back mode on Cyrix/NSC processor.\n");
133 133
134 /* CCR2 bit 2: unlock NW bit */ 134 /* CCR2 bit 2: unlock NW bit */
135 setCx86(CX86_CCR2, getCx86(CX86_CCR2) & ~0x04); 135 setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) & ~0x04);
136 /* set 'Not Write-through' */ 136 /* set 'Not Write-through' */
137 write_cr0(read_cr0() | X86_CR0_NW); 137 write_cr0(read_cr0() | X86_CR0_NW);
138 /* CCR2 bit 2: lock NW bit and set WT1 */ 138 /* CCR2 bit 2: lock NW bit and set WT1 */
139 setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x14); 139 setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x14);
140} 140}
141 141
142/* 142/*
@@ -150,14 +150,14 @@ static void __cpuinit geode_configure(void)
150 local_irq_save(flags); 150 local_irq_save(flags);
151 151
152 /* Suspend on halt power saving and enable #SUSP pin */ 152 /* Suspend on halt power saving and enable #SUSP pin */
153 setCx86(CX86_CCR2, getCx86(CX86_CCR2) | 0x88); 153 setCx86_old(CX86_CCR2, getCx86_old(CX86_CCR2) | 0x88);
154 154
155 ccr3 = getCx86(CX86_CCR3); 155 ccr3 = getCx86(CX86_CCR3);
156 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ 156 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
157 157
158 158
159 /* FPU fast, DTE cache, Mem bypass */ 159 /* FPU fast, DTE cache, Mem bypass */
160 setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x38); 160 setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x38);
161 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ 161 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
162 162
163 set_cx86_memwb(); 163 set_cx86_memwb();
@@ -291,7 +291,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
291 /* GXm supports extended cpuid levels 'ala' AMD */ 291 /* GXm supports extended cpuid levels 'ala' AMD */
292 if (c->cpuid_level == 2) { 292 if (c->cpuid_level == 2) {
293 /* Enable cxMMX extensions (GX1 Datasheet 54) */ 293 /* Enable cxMMX extensions (GX1 Datasheet 54) */
294 setCx86(CX86_CCR7, getCx86(CX86_CCR7) | 1); 294 setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7) | 1);
295 295
296 /* 296 /*
297 * GXm : 0x30 ... 0x5f GXm datasheet 51 297 * GXm : 0x30 ... 0x5f GXm datasheet 51
@@ -301,7 +301,6 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
301 */ 301 */
302 if ((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <= dir1 && dir1 <= 0x8f)) 302 if ((0x30 <= dir1 && dir1 <= 0x6f) || (0x80 <= dir1 && dir1 <= 0x8f))
303 geode_configure(); 303 geode_configure();
304 get_model_name(c); /* get CPU marketing name */
305 return; 304 return;
306 } else { /* MediaGX */ 305 } else { /* MediaGX */
307 Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4'; 306 Cx86_cb[2] = (dir0_lsn & 1) ? '3' : '4';
@@ -314,7 +313,7 @@ static void __cpuinit init_cyrix(struct cpuinfo_x86 *c)
314 if (dir1 > 7) { 313 if (dir1 > 7) {
315 dir0_msn++; /* M II */ 314 dir0_msn++; /* M II */
316 /* Enable MMX extensions (App note 108) */ 315 /* Enable MMX extensions (App note 108) */
317 setCx86(CX86_CCR7, getCx86(CX86_CCR7)|1); 316 setCx86_old(CX86_CCR7, getCx86_old(CX86_CCR7)|1);
318 } else { 317 } else {
319 c->coma_bug = 1; /* 6x86MX, it has the bug. */ 318 c->coma_bug = 1; /* 6x86MX, it has the bug. */
320 } 319 }
@@ -429,7 +428,7 @@ static void __cpuinit cyrix_identify(struct cpuinfo_x86 *c)
429 local_irq_save(flags); 428 local_irq_save(flags);
430 ccr3 = getCx86(CX86_CCR3); 429 ccr3 = getCx86(CX86_CCR3);
431 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */ 430 setCx86(CX86_CCR3, (ccr3 & 0x0f) | 0x10); /* enable MAPEN */
432 setCx86(CX86_CCR4, getCx86(CX86_CCR4) | 0x80); /* enable cpuid */ 431 setCx86_old(CX86_CCR4, getCx86_old(CX86_CCR4) | 0x80); /* enable cpuid */
433 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */ 432 setCx86(CX86_CCR3, ccr3); /* disable MAPEN */
434 local_irq_restore(flags); 433 local_irq_restore(flags);
435 } 434 }
@@ -442,14 +441,16 @@ static struct cpu_dev cyrix_cpu_dev __cpuinitdata = {
442 .c_early_init = early_init_cyrix, 441 .c_early_init = early_init_cyrix,
443 .c_init = init_cyrix, 442 .c_init = init_cyrix,
444 .c_identify = cyrix_identify, 443 .c_identify = cyrix_identify,
444 .c_x86_vendor = X86_VENDOR_CYRIX,
445}; 445};
446 446
447cpu_vendor_dev_register(X86_VENDOR_CYRIX, &cyrix_cpu_dev); 447cpu_dev_register(cyrix_cpu_dev);
448 448
449static struct cpu_dev nsc_cpu_dev __cpuinitdata = { 449static struct cpu_dev nsc_cpu_dev __cpuinitdata = {
450 .c_vendor = "NSC", 450 .c_vendor = "NSC",
451 .c_ident = { "Geode by NSC" }, 451 .c_ident = { "Geode by NSC" },
452 .c_init = init_nsc, 452 .c_init = init_nsc,
453 .c_x86_vendor = X86_VENDOR_NSC,
453}; 454};
454 455
455cpu_vendor_dev_register(X86_VENDOR_NSC, &nsc_cpu_dev); 456cpu_dev_register(nsc_cpu_dev);
diff --git a/arch/x86/kernel/cpu/feature_names.c b/arch/x86/kernel/cpu/feature_names.c
deleted file mode 100644
index c9017799497c..000000000000
--- a/arch/x86/kernel/cpu/feature_names.c
+++ /dev/null
@@ -1,84 +0,0 @@
1/*
2 * Strings for the various x86 capability flags.
3 *
4 * This file must not contain any executable code.
5 */
6
7#include <asm/cpufeature.h>
8
9/*
10 * These flag bits must match the definitions in <asm/cpufeature.h>.
11 * NULL means this bit is undefined or reserved; either way it doesn't
12 * have meaning as far as Linux is concerned. Note that it's important
13 * to realize there is a difference between this table and CPUID -- if
14 * applications want to get the raw CPUID data, they should access
15 * /dev/cpu/<cpu_nr>/cpuid instead.
16 */
17const char * const x86_cap_flags[NCAPINTS*32] = {
18 /* Intel-defined */
19 "fpu", "vme", "de", "pse", "tsc", "msr", "pae", "mce",
20 "cx8", "apic", NULL, "sep", "mtrr", "pge", "mca", "cmov",
21 "pat", "pse36", "pn", "clflush", NULL, "dts", "acpi", "mmx",
22 "fxsr", "sse", "sse2", "ss", "ht", "tm", "ia64", "pbe",
23
24 /* AMD-defined */
25 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
26 NULL, NULL, NULL, "syscall", NULL, NULL, NULL, NULL,
27 NULL, NULL, NULL, "mp", "nx", NULL, "mmxext", NULL,
28 NULL, "fxsr_opt", "pdpe1gb", "rdtscp", NULL, "lm",
29 "3dnowext", "3dnow",
30
31 /* Transmeta-defined */
32 "recovery", "longrun", NULL, "lrti", NULL, NULL, NULL, NULL,
33 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
34 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
35 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
36
37 /* Other (Linux-defined) */
38 "cxmmx", "k6_mtrr", "cyrix_arr", "centaur_mcr",
39 NULL, NULL, NULL, NULL,
40 "constant_tsc", "up", NULL, "arch_perfmon",
41 "pebs", "bts", NULL, NULL,
42 "rep_good", NULL, NULL, NULL,
43 "nopl", NULL, NULL, NULL,
44 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
45
46 /* Intel-defined (#2) */
47 "pni", NULL, NULL, "monitor", "ds_cpl", "vmx", "smx", "est",
48 "tm2", "ssse3", "cid", NULL, NULL, "cx16", "xtpr", NULL,
49 NULL, NULL, "dca", "sse4_1", "sse4_2", NULL, NULL, "popcnt",
50 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
51
52 /* VIA/Cyrix/Centaur-defined */
53 NULL, NULL, "rng", "rng_en", NULL, NULL, "ace", "ace_en",
54 "ace2", "ace2_en", "phe", "phe_en", "pmm", "pmm_en", NULL, NULL,
55 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
56 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
57
58 /* AMD-defined (#2) */
59 "lahf_lm", "cmp_legacy", "svm", "extapic",
60 "cr8_legacy", "abm", "sse4a", "misalignsse",
61 "3dnowprefetch", "osvw", "ibs", "sse5",
62 "skinit", "wdt", NULL, NULL,
63 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
64 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
65
66 /* Auxiliary (Linux-defined) */
67 "ida", NULL, NULL, NULL, NULL, NULL, NULL, NULL,
68 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
69 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
70 NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL,
71};
72
73const char *const x86_power_flags[32] = {
74 "ts", /* temperature sensor */
75 "fid", /* frequency id control */
76 "vid", /* voltage id control */
77 "ttp", /* thermal trip */
78 "tm",
79 "stc",
80 "100mhzsteps",
81 "hwpstate",
82 "", /* tsc invariant mapped to constant_tsc */
83 /* nothing */
84};
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index f113ef4595f6..99468dbd08da 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -15,6 +15,11 @@
15#include <asm/ds.h> 15#include <asm/ds.h>
16#include <asm/bugs.h> 16#include <asm/bugs.h>
17 17
18#ifdef CONFIG_X86_64
19#include <asm/topology.h>
20#include <asm/numa_64.h>
21#endif
22
18#include "cpu.h" 23#include "cpu.h"
19 24
20#ifdef CONFIG_X86_LOCAL_APIC 25#ifdef CONFIG_X86_LOCAL_APIC
@@ -23,23 +28,22 @@
23#include <mach_apic.h> 28#include <mach_apic.h>
24#endif 29#endif
25 30
26#ifdef CONFIG_X86_INTEL_USERCOPY
27/*
28 * Alignment at which movsl is preferred for bulk memory copies.
29 */
30struct movsl_mask movsl_mask __read_mostly;
31#endif
32
33static void __cpuinit early_init_intel(struct cpuinfo_x86 *c) 31static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
34{ 32{
35 /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
36 if (c->x86 == 15 && c->x86_cache_alignment == 64)
37 c->x86_cache_alignment = 128;
38 if ((c->x86 == 0xf && c->x86_model >= 0x03) || 33 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
39 (c->x86 == 0x6 && c->x86_model >= 0x0e)) 34 (c->x86 == 0x6 && c->x86_model >= 0x0e))
40 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); 35 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
36
37#ifdef CONFIG_X86_64
38 set_cpu_cap(c, X86_FEATURE_SYSENTER32);
39#else
40 /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */
41 if (c->x86 == 15 && c->x86_cache_alignment == 64)
42 c->x86_cache_alignment = 128;
43#endif
41} 44}
42 45
46#ifdef CONFIG_X86_32
43/* 47/*
44 * Early probe support logic for ppro memory erratum #50 48 * Early probe support logic for ppro memory erratum #50
45 * 49 *
@@ -59,15 +63,54 @@ int __cpuinit ppro_with_ram_bug(void)
59 return 0; 63 return 0;
60} 64}
61 65
66#ifdef CONFIG_X86_F00F_BUG
67static void __cpuinit trap_init_f00f_bug(void)
68{
69 __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO);
62 70
63/* 71 /*
64 * P4 Xeon errata 037 workaround. 72 * Update the IDT descriptor and reload the IDT so that
65 * Hardware prefetcher may cause stale data to be loaded into the cache. 73 * it uses the read-only mapped virtual address.
66 */ 74 */
67static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c) 75 idt_descr.address = fix_to_virt(FIX_F00F_IDT);
76 load_idt(&idt_descr);
77}
78#endif
79
80static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
68{ 81{
69 unsigned long lo, hi; 82 unsigned long lo, hi;
70 83
84#ifdef CONFIG_X86_F00F_BUG
85 /*
86 * All current models of Pentium and Pentium with MMX technology CPUs
87 * have the F0 0F bug, which lets nonprivileged users lock up the system.
88 * Note that the workaround only should be initialized once...
89 */
90 c->f00f_bug = 0;
91 if (!paravirt_enabled() && c->x86 == 5) {
92 static int f00f_workaround_enabled;
93
94 c->f00f_bug = 1;
95 if (!f00f_workaround_enabled) {
96 trap_init_f00f_bug();
97 printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
98 f00f_workaround_enabled = 1;
99 }
100 }
101#endif
102
103 /*
104 * SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until
105 * model 3 mask 3
106 */
107 if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633)
108 clear_cpu_cap(c, X86_FEATURE_SEP);
109
110 /*
111 * P4 Xeon errata 037 workaround.
112 * Hardware prefetcher may cause stale data to be loaded into the cache.
113 */
71 if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) { 114 if ((c->x86 == 15) && (c->x86_model == 1) && (c->x86_mask == 1)) {
72 rdmsr(MSR_IA32_MISC_ENABLE, lo, hi); 115 rdmsr(MSR_IA32_MISC_ENABLE, lo, hi);
73 if ((lo & (1<<9)) == 0) { 116 if ((lo & (1<<9)) == 0) {
@@ -77,13 +120,68 @@ static void __cpuinit Intel_errata_workarounds(struct cpuinfo_x86 *c)
77 wrmsr (MSR_IA32_MISC_ENABLE, lo, hi); 120 wrmsr (MSR_IA32_MISC_ENABLE, lo, hi);
78 } 121 }
79 } 122 }
123
124 /*
125 * See if we have a good local APIC by checking for buggy Pentia,
126 * i.e. all B steppings and the C2 stepping of P54C when using their
127 * integrated APIC (see 11AP erratum in "Pentium Processor
128 * Specification Update").
129 */
130 if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
131 (c->x86_mask < 0x6 || c->x86_mask == 0xb))
132 set_cpu_cap(c, X86_FEATURE_11AP);
133
134
135#ifdef CONFIG_X86_INTEL_USERCOPY
136 /*
137 * Set up the preferred alignment for movsl bulk memory moves
138 */
139 switch (c->x86) {
140 case 4: /* 486: untested */
141 break;
142 case 5: /* Old Pentia: untested */
143 break;
144 case 6: /* PII/PIII only like movsl with 8-byte alignment */
145 movsl_mask.mask = 7;
146 break;
147 case 15: /* P4 is OK down to 8-byte alignment */
148 movsl_mask.mask = 7;
149 break;
150 }
151#endif
152
153#ifdef CONFIG_X86_NUMAQ
154 numaq_tsc_disable();
155#endif
80} 156}
157#else
158static void __cpuinit intel_workarounds(struct cpuinfo_x86 *c)
159{
160}
161#endif
81 162
163static void __cpuinit srat_detect_node(void)
164{
165#if defined(CONFIG_NUMA) && defined(CONFIG_X86_64)
166 unsigned node;
167 int cpu = smp_processor_id();
168 int apicid = hard_smp_processor_id();
169
170 /* Don't do the funky fallback heuristics the AMD version employs
171 for now. */
172 node = apicid_to_node[apicid];
173 if (node == NUMA_NO_NODE || !node_online(node))
174 node = first_node(node_online_map);
175 numa_set_node(cpu, node);
176
177 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
178#endif
179}
82 180
83/* 181/*
84 * find out the number of processor cores on the die 182 * find out the number of processor cores on the die
85 */ 183 */
86static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c) 184static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
87{ 185{
88 unsigned int eax, ebx, ecx, edx; 186 unsigned int eax, ebx, ecx, edx;
89 187
@@ -98,45 +196,51 @@ static int __cpuinit num_cpu_cores(struct cpuinfo_x86 *c)
98 return 1; 196 return 1;
99} 197}
100 198
101#ifdef CONFIG_X86_F00F_BUG 199static void __cpuinit detect_vmx_virtcap(struct cpuinfo_x86 *c)
102static void __cpuinit trap_init_f00f_bug(void)
103{ 200{
104 __set_fixmap(FIX_F00F_IDT, __pa(&idt_table), PAGE_KERNEL_RO); 201 /* Intel VMX MSR indicated features */
105 202#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000
106 /* 203#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000
107 * Update the IDT descriptor and reload the IDT so that 204#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000
108 * it uses the read-only mapped virtual address. 205#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001
109 */ 206#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002
110 idt_descr.address = fix_to_virt(FIX_F00F_IDT); 207#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020
111 load_idt(&idt_descr); 208
209 u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2;
210
211 clear_cpu_cap(c, X86_FEATURE_TPR_SHADOW);
212 clear_cpu_cap(c, X86_FEATURE_VNMI);
213 clear_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
214 clear_cpu_cap(c, X86_FEATURE_EPT);
215 clear_cpu_cap(c, X86_FEATURE_VPID);
216
217 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high);
218 msr_ctl = vmx_msr_high | vmx_msr_low;
219 if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)
220 set_cpu_cap(c, X86_FEATURE_TPR_SHADOW);
221 if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI)
222 set_cpu_cap(c, X86_FEATURE_VNMI);
223 if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) {
224 rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
225 vmx_msr_low, vmx_msr_high);
226 msr_ctl2 = vmx_msr_high | vmx_msr_low;
227 if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) &&
228 (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW))
229 set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
230 if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT)
231 set_cpu_cap(c, X86_FEATURE_EPT);
232 if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID)
233 set_cpu_cap(c, X86_FEATURE_VPID);
234 }
112} 235}
113#endif
114 236
115static void __cpuinit init_intel(struct cpuinfo_x86 *c) 237static void __cpuinit init_intel(struct cpuinfo_x86 *c)
116{ 238{
117 unsigned int l2 = 0; 239 unsigned int l2 = 0;
118 char *p = NULL;
119 240
120 early_init_intel(c); 241 early_init_intel(c);
121 242
122#ifdef CONFIG_X86_F00F_BUG 243 intel_workarounds(c);
123 /*
124 * All current models of Pentium and Pentium with MMX technology CPUs
125 * have the F0 0F bug, which lets nonprivileged users lock up the system.
126 * Note that the workaround only should be initialized once...
127 */
128 c->f00f_bug = 0;
129 if (!paravirt_enabled() && c->x86 == 5) {
130 static int f00f_workaround_enabled;
131
132 c->f00f_bug = 1;
133 if (!f00f_workaround_enabled) {
134 trap_init_f00f_bug();
135 printk(KERN_NOTICE "Intel Pentium with F0 0F bug - workaround enabled.\n");
136 f00f_workaround_enabled = 1;
137 }
138 }
139#endif
140 244
141 l2 = init_intel_cacheinfo(c); 245 l2 = init_intel_cacheinfo(c);
142 if (c->cpuid_level > 9) { 246 if (c->cpuid_level > 9) {
@@ -146,16 +250,32 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
146 set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON); 250 set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
147 } 251 }
148 252
149 /* SEP CPUID bug: Pentium Pro reports SEP but doesn't have it until model 3 mask 3 */ 253 if (cpu_has_xmm2)
150 if ((c->x86<<8 | c->x86_model<<4 | c->x86_mask) < 0x633) 254 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
151 clear_cpu_cap(c, X86_FEATURE_SEP); 255 if (cpu_has_ds) {
256 unsigned int l1;
257 rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
258 if (!(l1 & (1<<11)))
259 set_cpu_cap(c, X86_FEATURE_BTS);
260 if (!(l1 & (1<<12)))
261 set_cpu_cap(c, X86_FEATURE_PEBS);
262 ds_init_intel(c);
263 }
152 264
265#ifdef CONFIG_X86_64
266 if (c->x86 == 15)
267 c->x86_cache_alignment = c->x86_clflush_size * 2;
268 if (c->x86 == 6)
269 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
270#else
153 /* 271 /*
154 * Names for the Pentium II/Celeron processors 272 * Names for the Pentium II/Celeron processors
155 * detectable only by also checking the cache size. 273 * detectable only by also checking the cache size.
156 * Dixon is NOT a Celeron. 274 * Dixon is NOT a Celeron.
157 */ 275 */
158 if (c->x86 == 6) { 276 if (c->x86 == 6) {
277 char *p = NULL;
278
159 switch (c->x86_model) { 279 switch (c->x86_model) {
160 case 5: 280 case 5:
161 if (c->x86_mask == 0) { 281 if (c->x86_mask == 0) {
@@ -178,71 +298,41 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c)
178 p = "Celeron (Coppermine)"; 298 p = "Celeron (Coppermine)";
179 break; 299 break;
180 } 300 }
181 }
182
183 if (p)
184 strcpy(c->x86_model_id, p);
185
186 c->x86_max_cores = num_cpu_cores(c);
187
188 detect_ht(c);
189 301
190 /* Work around errata */ 302 if (p)
191 Intel_errata_workarounds(c); 303 strcpy(c->x86_model_id, p);
192
193#ifdef CONFIG_X86_INTEL_USERCOPY
194 /*
195 * Set up the preferred alignment for movsl bulk memory moves
196 */
197 switch (c->x86) {
198 case 4: /* 486: untested */
199 break;
200 case 5: /* Old Pentia: untested */
201 break;
202 case 6: /* PII/PIII only like movsl with 8-byte alignment */
203 movsl_mask.mask = 7;
204 break;
205 case 15: /* P4 is OK down to 8-byte alignment */
206 movsl_mask.mask = 7;
207 break;
208 } 304 }
209#endif
210 305
211 if (cpu_has_xmm2) 306 if (c->x86 == 15)
212 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
213 if (c->x86 == 15) {
214 set_cpu_cap(c, X86_FEATURE_P4); 307 set_cpu_cap(c, X86_FEATURE_P4);
215 }
216 if (c->x86 == 6) 308 if (c->x86 == 6)
217 set_cpu_cap(c, X86_FEATURE_P3); 309 set_cpu_cap(c, X86_FEATURE_P3);
218 if (cpu_has_ds) {
219 unsigned int l1;
220 rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
221 if (!(l1 & (1<<11)))
222 set_cpu_cap(c, X86_FEATURE_BTS);
223 if (!(l1 & (1<<12)))
224 set_cpu_cap(c, X86_FEATURE_PEBS);
225 ds_init_intel(c);
226 }
227 310
228 if (cpu_has_bts) 311 if (cpu_has_bts)
229 ptrace_bts_init_intel(c); 312 ptrace_bts_init_intel(c);
230 313
231 /* 314#endif
232 * See if we have a good local APIC by checking for buggy Pentia,
233 * i.e. all B steppings and the C2 stepping of P54C when using their
234 * integrated APIC (see 11AP erratum in "Pentium Processor
235 * Specification Update").
236 */
237 if (cpu_has_apic && (c->x86<<8 | c->x86_model<<4) == 0x520 &&
238 (c->x86_mask < 0x6 || c->x86_mask == 0xb))
239 set_cpu_cap(c, X86_FEATURE_11AP);
240 315
241#ifdef CONFIG_X86_NUMAQ 316 detect_extended_topology(c);
242 numaq_tsc_disable(); 317 if (!cpu_has(c, X86_FEATURE_XTOPOLOGY)) {
318 /*
319 * let's use the legacy cpuid vector 0x1 and 0x4 for topology
320 * detection.
321 */
322 c->x86_max_cores = intel_num_cpu_cores(c);
323#ifdef CONFIG_X86_32
324 detect_ht(c);
243#endif 325#endif
326 }
327
328 /* Work around errata */
329 srat_detect_node();
330
331 if (cpu_has(c, X86_FEATURE_VMX))
332 detect_vmx_virtcap(c);
244} 333}
245 334
335#ifdef CONFIG_X86_32
246static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size) 336static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned int size)
247{ 337{
248 /* 338 /*
@@ -255,10 +345,12 @@ static unsigned int __cpuinit intel_size_cache(struct cpuinfo_x86 *c, unsigned i
255 size = 256; 345 size = 256;
256 return size; 346 return size;
257} 347}
348#endif
258 349
259static struct cpu_dev intel_cpu_dev __cpuinitdata = { 350static struct cpu_dev intel_cpu_dev __cpuinitdata = {
260 .c_vendor = "Intel", 351 .c_vendor = "Intel",
261 .c_ident = { "GenuineIntel" }, 352 .c_ident = { "GenuineIntel" },
353#ifdef CONFIG_X86_32
262 .c_models = { 354 .c_models = {
263 { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names = 355 { .vendor = X86_VENDOR_INTEL, .family = 4, .model_names =
264 { 356 {
@@ -308,76 +400,12 @@ static struct cpu_dev intel_cpu_dev __cpuinitdata = {
308 } 400 }
309 }, 401 },
310 }, 402 },
403 .c_size_cache = intel_size_cache,
404#endif
311 .c_early_init = early_init_intel, 405 .c_early_init = early_init_intel,
312 .c_init = init_intel, 406 .c_init = init_intel,
313 .c_size_cache = intel_size_cache, 407 .c_x86_vendor = X86_VENDOR_INTEL,
314}; 408};
315 409
316cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev); 410cpu_dev_register(intel_cpu_dev);
317
318#ifndef CONFIG_X86_CMPXCHG
319unsigned long cmpxchg_386_u8(volatile void *ptr, u8 old, u8 new)
320{
321 u8 prev;
322 unsigned long flags;
323
324 /* Poor man's cmpxchg for 386. Unsuitable for SMP */
325 local_irq_save(flags);
326 prev = *(u8 *)ptr;
327 if (prev == old)
328 *(u8 *)ptr = new;
329 local_irq_restore(flags);
330 return prev;
331}
332EXPORT_SYMBOL(cmpxchg_386_u8);
333
334unsigned long cmpxchg_386_u16(volatile void *ptr, u16 old, u16 new)
335{
336 u16 prev;
337 unsigned long flags;
338
339 /* Poor man's cmpxchg for 386. Unsuitable for SMP */
340 local_irq_save(flags);
341 prev = *(u16 *)ptr;
342 if (prev == old)
343 *(u16 *)ptr = new;
344 local_irq_restore(flags);
345 return prev;
346}
347EXPORT_SYMBOL(cmpxchg_386_u16);
348
349unsigned long cmpxchg_386_u32(volatile void *ptr, u32 old, u32 new)
350{
351 u32 prev;
352 unsigned long flags;
353
354 /* Poor man's cmpxchg for 386. Unsuitable for SMP */
355 local_irq_save(flags);
356 prev = *(u32 *)ptr;
357 if (prev == old)
358 *(u32 *)ptr = new;
359 local_irq_restore(flags);
360 return prev;
361}
362EXPORT_SYMBOL(cmpxchg_386_u32);
363#endif
364
365#ifndef CONFIG_X86_CMPXCHG64
366unsigned long long cmpxchg_486_u64(volatile void *ptr, u64 old, u64 new)
367{
368 u64 prev;
369 unsigned long flags;
370
371 /* Poor man's cmpxchg8b for 386 and 486. Unsuitable for SMP */
372 local_irq_save(flags);
373 prev = *(u64 *)ptr;
374 if (prev == old)
375 *(u64 *)ptr = new;
376 local_irq_restore(flags);
377 return prev;
378}
379EXPORT_SYMBOL(cmpxchg_486_u64);
380#endif
381
382/* arch_initcall(intel_cpu_init); */
383 411
diff --git a/arch/x86/kernel/cpu/intel_64.c b/arch/x86/kernel/cpu/intel_64.c
deleted file mode 100644
index 1019c58d39f0..000000000000
--- a/arch/x86/kernel/cpu/intel_64.c
+++ /dev/null
@@ -1,95 +0,0 @@
1#include <linux/init.h>
2#include <linux/smp.h>
3#include <asm/processor.h>
4#include <asm/ptrace.h>
5#include <asm/topology.h>
6#include <asm/numa_64.h>
7
8#include "cpu.h"
9
10static void __cpuinit early_init_intel(struct cpuinfo_x86 *c)
11{
12 if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
13 (c->x86 == 0x6 && c->x86_model >= 0x0e))
14 set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC);
15
16 set_cpu_cap(c, X86_FEATURE_SYSENTER32);
17}
18
19/*
20 * find out the number of processor cores on the die
21 */
22static int __cpuinit intel_num_cpu_cores(struct cpuinfo_x86 *c)
23{
24 unsigned int eax, t;
25
26 if (c->cpuid_level < 4)
27 return 1;
28
29 cpuid_count(4, 0, &eax, &t, &t, &t);
30
31 if (eax & 0x1f)
32 return ((eax >> 26) + 1);
33 else
34 return 1;
35}
36
37static void __cpuinit srat_detect_node(void)
38{
39#ifdef CONFIG_NUMA
40 unsigned node;
41 int cpu = smp_processor_id();
42 int apicid = hard_smp_processor_id();
43
44 /* Don't do the funky fallback heuristics the AMD version employs
45 for now. */
46 node = apicid_to_node[apicid];
47 if (node == NUMA_NO_NODE || !node_online(node))
48 node = first_node(node_online_map);
49 numa_set_node(cpu, node);
50
51 printk(KERN_INFO "CPU %d/%x -> Node %d\n", cpu, apicid, node);
52#endif
53}
54
55static void __cpuinit init_intel(struct cpuinfo_x86 *c)
56{
57 init_intel_cacheinfo(c);
58 if (c->cpuid_level > 9) {
59 unsigned eax = cpuid_eax(10);
60 /* Check for version and the number of counters */
61 if ((eax & 0xff) && (((eax>>8) & 0xff) > 1))
62 set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
63 }
64
65 if (cpu_has_ds) {
66 unsigned int l1, l2;
67 rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
68 if (!(l1 & (1<<11)))
69 set_cpu_cap(c, X86_FEATURE_BTS);
70 if (!(l1 & (1<<12)))
71 set_cpu_cap(c, X86_FEATURE_PEBS);
72 }
73
74
75 if (cpu_has_bts)
76 ds_init_intel(c);
77
78 if (c->x86 == 15)
79 c->x86_cache_alignment = c->x86_clflush_size * 2;
80 if (c->x86 == 6)
81 set_cpu_cap(c, X86_FEATURE_REP_GOOD);
82 set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
83 c->x86_max_cores = intel_num_cpu_cores(c);
84
85 srat_detect_node();
86}
87
88static struct cpu_dev intel_cpu_dev __cpuinitdata = {
89 .c_vendor = "Intel",
90 .c_ident = { "GenuineIntel" },
91 .c_early_init = early_init_intel,
92 .c_init = init_intel,
93};
94cpu_vendor_dev_register(X86_VENDOR_INTEL, &intel_cpu_dev);
95
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c
index 6b0a10b002f1..3f46afbb1cf1 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/intel_cacheinfo.c
@@ -1,8 +1,8 @@
1/* 1/*
2 * Routines to indentify caches on Intel CPU. 2 * Routines to indentify caches on Intel CPU.
3 * 3 *
4 * Changes: 4 * Changes:
5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4) 5 * Venkatesh Pallipadi : Adding cache identification through cpuid(4)
6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure. 6 * Ashok Raj <ashok.raj@intel.com>: Work with CPU hotplug infrastructure.
7 * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD. 7 * Andi Kleen / Andreas Herrmann : CPUID4 emulation on AMD.
8 */ 8 */
@@ -13,6 +13,7 @@
13#include <linux/compiler.h> 13#include <linux/compiler.h>
14#include <linux/cpu.h> 14#include <linux/cpu.h>
15#include <linux/sched.h> 15#include <linux/sched.h>
16#include <linux/pci.h>
16 17
17#include <asm/processor.h> 18#include <asm/processor.h>
18#include <asm/smp.h> 19#include <asm/smp.h>
@@ -130,9 +131,18 @@ struct _cpuid4_info {
130 union _cpuid4_leaf_ebx ebx; 131 union _cpuid4_leaf_ebx ebx;
131 union _cpuid4_leaf_ecx ecx; 132 union _cpuid4_leaf_ecx ecx;
132 unsigned long size; 133 unsigned long size;
134 unsigned long can_disable;
133 cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */ 135 cpumask_t shared_cpu_map; /* future?: only cpus/node is needed */
134}; 136};
135 137
138#ifdef CONFIG_PCI
139static struct pci_device_id k8_nb_id[] = {
140 { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) },
141 { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) },
142 {}
143};
144#endif
145
136unsigned short num_cache_leaves; 146unsigned short num_cache_leaves;
137 147
138/* AMD doesn't have CPUID4. Emulate it here to report the same 148/* AMD doesn't have CPUID4. Emulate it here to report the same
@@ -182,9 +192,10 @@ static unsigned short assocs[] __cpuinitdata = {
182static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 }; 192static unsigned char levels[] __cpuinitdata = { 1, 1, 2, 3 };
183static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 }; 193static unsigned char types[] __cpuinitdata = { 1, 2, 3, 3 };
184 194
185static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax, 195static void __cpuinit
186 union _cpuid4_leaf_ebx *ebx, 196amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
187 union _cpuid4_leaf_ecx *ecx) 197 union _cpuid4_leaf_ebx *ebx,
198 union _cpuid4_leaf_ecx *ecx)
188{ 199{
189 unsigned dummy; 200 unsigned dummy;
190 unsigned line_size, lines_per_tag, assoc, size_in_kb; 201 unsigned line_size, lines_per_tag, assoc, size_in_kb;
@@ -251,27 +262,40 @@ static void __cpuinit amd_cpuid4(int leaf, union _cpuid4_leaf_eax *eax,
251 (ebx->split.ways_of_associativity + 1) - 1; 262 (ebx->split.ways_of_associativity + 1) - 1;
252} 263}
253 264
254static int __cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf) 265static void __cpuinit
266amd_check_l3_disable(int index, struct _cpuid4_info *this_leaf)
267{
268 if (index < 3)
269 return;
270 this_leaf->can_disable = 1;
271}
272
273static int
274__cpuinit cpuid4_cache_lookup(int index, struct _cpuid4_info *this_leaf)
255{ 275{
256 union _cpuid4_leaf_eax eax; 276 union _cpuid4_leaf_eax eax;
257 union _cpuid4_leaf_ebx ebx; 277 union _cpuid4_leaf_ebx ebx;
258 union _cpuid4_leaf_ecx ecx; 278 union _cpuid4_leaf_ecx ecx;
259 unsigned edx; 279 unsigned edx;
260 280
261 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) 281 if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) {
262 amd_cpuid4(index, &eax, &ebx, &ecx); 282 amd_cpuid4(index, &eax, &ebx, &ecx);
263 else 283 if (boot_cpu_data.x86 >= 0x10)
264 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx); 284 amd_check_l3_disable(index, this_leaf);
285 } else {
286 cpuid_count(4, index, &eax.full, &ebx.full, &ecx.full, &edx);
287 }
288
265 if (eax.split.type == CACHE_TYPE_NULL) 289 if (eax.split.type == CACHE_TYPE_NULL)
266 return -EIO; /* better error ? */ 290 return -EIO; /* better error ? */
267 291
268 this_leaf->eax = eax; 292 this_leaf->eax = eax;
269 this_leaf->ebx = ebx; 293 this_leaf->ebx = ebx;
270 this_leaf->ecx = ecx; 294 this_leaf->ecx = ecx;
271 this_leaf->size = (ecx.split.number_of_sets + 1) * 295 this_leaf->size = (ecx.split.number_of_sets + 1) *
272 (ebx.split.coherency_line_size + 1) * 296 (ebx.split.coherency_line_size + 1) *
273 (ebx.split.physical_line_partition + 1) * 297 (ebx.split.physical_line_partition + 1) *
274 (ebx.split.ways_of_associativity + 1); 298 (ebx.split.ways_of_associativity + 1);
275 return 0; 299 return 0;
276} 300}
277 301
@@ -453,7 +477,7 @@ unsigned int __cpuinit init_intel_cacheinfo(struct cpuinfo_x86 *c)
453 477
454/* pointer to _cpuid4_info array (for each cache leaf) */ 478/* pointer to _cpuid4_info array (for each cache leaf) */
455static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info); 479static DEFINE_PER_CPU(struct _cpuid4_info *, cpuid4_info);
456#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y])) 480#define CPUID4_INFO_IDX(x, y) (&((per_cpu(cpuid4_info, x))[y]))
457 481
458#ifdef CONFIG_SMP 482#ifdef CONFIG_SMP
459static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) 483static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index)
@@ -490,7 +514,7 @@ static void __cpuinit cache_remove_shared_cpu_map(unsigned int cpu, int index)
490 514
491 this_leaf = CPUID4_INFO_IDX(cpu, index); 515 this_leaf = CPUID4_INFO_IDX(cpu, index);
492 for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) { 516 for_each_cpu_mask_nr(sibling, this_leaf->shared_cpu_map) {
493 sibling_leaf = CPUID4_INFO_IDX(sibling, index); 517 sibling_leaf = CPUID4_INFO_IDX(sibling, index);
494 cpu_clear(cpu, sibling_leaf->shared_cpu_map); 518 cpu_clear(cpu, sibling_leaf->shared_cpu_map);
495 } 519 }
496} 520}
@@ -572,7 +596,7 @@ struct _index_kobject {
572 596
573/* pointer to array of kobjects for cpuX/cache/indexY */ 597/* pointer to array of kobjects for cpuX/cache/indexY */
574static DEFINE_PER_CPU(struct _index_kobject *, index_kobject); 598static DEFINE_PER_CPU(struct _index_kobject *, index_kobject);
575#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y])) 599#define INDEX_KOBJECT_PTR(x, y) (&((per_cpu(index_kobject, x))[y]))
576 600
577#define show_one_plus(file_name, object, val) \ 601#define show_one_plus(file_name, object, val) \
578static ssize_t show_##file_name \ 602static ssize_t show_##file_name \
@@ -637,6 +661,99 @@ static ssize_t show_type(struct _cpuid4_info *this_leaf, char *buf) {
637 } 661 }
638} 662}
639 663
664#define to_object(k) container_of(k, struct _index_kobject, kobj)
665#define to_attr(a) container_of(a, struct _cache_attr, attr)
666
667#ifdef CONFIG_PCI
668static struct pci_dev *get_k8_northbridge(int node)
669{
670 struct pci_dev *dev = NULL;
671 int i;
672
673 for (i = 0; i <= node; i++) {
674 do {
675 dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev);
676 if (!dev)
677 break;
678 } while (!pci_match_id(&k8_nb_id[0], dev));
679 if (!dev)
680 break;
681 }
682 return dev;
683}
684#else
685static struct pci_dev *get_k8_northbridge(int node)
686{
687 return NULL;
688}
689#endif
690
691static ssize_t show_cache_disable(struct _cpuid4_info *this_leaf, char *buf)
692{
693 int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
694 struct pci_dev *dev = NULL;
695 ssize_t ret = 0;
696 int i;
697
698 if (!this_leaf->can_disable)
699 return sprintf(buf, "Feature not enabled\n");
700
701 dev = get_k8_northbridge(node);
702 if (!dev) {
703 printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
704 return -EINVAL;
705 }
706
707 for (i = 0; i < 2; i++) {
708 unsigned int reg;
709
710 pci_read_config_dword(dev, 0x1BC + i * 4, &reg);
711
712 ret += sprintf(buf, "%sEntry: %d\n", buf, i);
713 ret += sprintf(buf, "%sReads: %s\tNew Entries: %s\n",
714 buf,
715 reg & 0x80000000 ? "Disabled" : "Allowed",
716 reg & 0x40000000 ? "Disabled" : "Allowed");
717 ret += sprintf(buf, "%sSubCache: %x\tIndex: %x\n",
718 buf, (reg & 0x30000) >> 16, reg & 0xfff);
719 }
720 return ret;
721}
722
723static ssize_t
724store_cache_disable(struct _cpuid4_info *this_leaf, const char *buf,
725 size_t count)
726{
727 int node = cpu_to_node(first_cpu(this_leaf->shared_cpu_map));
728 struct pci_dev *dev = NULL;
729 unsigned int ret, index, val;
730
731 if (!this_leaf->can_disable)
732 return 0;
733
734 if (strlen(buf) > 15)
735 return -EINVAL;
736
737 ret = sscanf(buf, "%x %x", &index, &val);
738 if (ret != 2)
739 return -EINVAL;
740 if (index > 1)
741 return -EINVAL;
742
743 val |= 0xc0000000;
744 dev = get_k8_northbridge(node);
745 if (!dev) {
746 printk(KERN_ERR "Attempting AMD northbridge operation on a system with no northbridge\n");
747 return -EINVAL;
748 }
749
750 pci_write_config_dword(dev, 0x1BC + index * 4, val & ~0x40000000);
751 wbinvd();
752 pci_write_config_dword(dev, 0x1BC + index * 4, val);
753
754 return 1;
755}
756
640struct _cache_attr { 757struct _cache_attr {
641 struct attribute attr; 758 struct attribute attr;
642 ssize_t (*show)(struct _cpuid4_info *, char *); 759 ssize_t (*show)(struct _cpuid4_info *, char *);
@@ -657,6 +774,8 @@ define_one_ro(size);
657define_one_ro(shared_cpu_map); 774define_one_ro(shared_cpu_map);
658define_one_ro(shared_cpu_list); 775define_one_ro(shared_cpu_list);
659 776
777static struct _cache_attr cache_disable = __ATTR(cache_disable, 0644, show_cache_disable, store_cache_disable);
778
660static struct attribute * default_attrs[] = { 779static struct attribute * default_attrs[] = {
661 &type.attr, 780 &type.attr,
662 &level.attr, 781 &level.attr,
@@ -667,12 +786,10 @@ static struct attribute * default_attrs[] = {
667 &size.attr, 786 &size.attr,
668 &shared_cpu_map.attr, 787 &shared_cpu_map.attr,
669 &shared_cpu_list.attr, 788 &shared_cpu_list.attr,
789 &cache_disable.attr,
670 NULL 790 NULL
671}; 791};
672 792
673#define to_object(k) container_of(k, struct _index_kobject, kobj)
674#define to_attr(a) container_of(a, struct _cache_attr, attr)
675
676static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf) 793static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
677{ 794{
678 struct _cache_attr *fattr = to_attr(attr); 795 struct _cache_attr *fattr = to_attr(attr);
@@ -682,14 +799,22 @@ static ssize_t show(struct kobject * kobj, struct attribute * attr, char * buf)
682 ret = fattr->show ? 799 ret = fattr->show ?
683 fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index), 800 fattr->show(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
684 buf) : 801 buf) :
685 0; 802 0;
686 return ret; 803 return ret;
687} 804}
688 805
689static ssize_t store(struct kobject * kobj, struct attribute * attr, 806static ssize_t store(struct kobject * kobj, struct attribute * attr,
690 const char * buf, size_t count) 807 const char * buf, size_t count)
691{ 808{
692 return 0; 809 struct _cache_attr *fattr = to_attr(attr);
810 struct _index_kobject *this_leaf = to_object(kobj);
811 ssize_t ret;
812
813 ret = fattr->store ?
814 fattr->store(CPUID4_INFO_IDX(this_leaf->cpu, this_leaf->index),
815 buf, count) :
816 0;
817 return ret;
693} 818}
694 819
695static struct sysfs_ops sysfs_ops = { 820static struct sysfs_ops sysfs_ops = {
diff --git a/arch/x86/kernel/cpu/mcheck/mce_64.c b/arch/x86/kernel/cpu/mcheck/mce_64.c
index 726a5fcdf341..4b031a4ac856 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_64.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_64.c
@@ -860,7 +860,7 @@ error:
860 return err; 860 return err;
861} 861}
862 862
863static void mce_remove_device(unsigned int cpu) 863static __cpuinit void mce_remove_device(unsigned int cpu)
864{ 864{
865 int i; 865 int i;
866 866
diff --git a/arch/x86/kernel/cpu/mkcapflags.pl b/arch/x86/kernel/cpu/mkcapflags.pl
new file mode 100644
index 000000000000..dfea390e1608
--- /dev/null
+++ b/arch/x86/kernel/cpu/mkcapflags.pl
@@ -0,0 +1,32 @@
1#!/usr/bin/perl
2#
3# Generate the x86_cap_flags[] array from include/asm-x86/cpufeature.h
4#
5
6($in, $out) = @ARGV;
7
8open(IN, "< $in\0") or die "$0: cannot open: $in: $!\n";
9open(OUT, "> $out\0") or die "$0: cannot create: $out: $!\n";
10
11print OUT "#include <asm/cpufeature.h>\n\n";
12print OUT "const char * const x86_cap_flags[NCAPINTS*32] = {\n";
13
14while (defined($line = <IN>)) {
15 if ($line =~ /^\s*\#\s*define\s+(X86_FEATURE_(\S+))\s+(.*)$/) {
16 $macro = $1;
17 $feature = $2;
18 $tail = $3;
19 if ($tail =~ /\/\*\s*\"([^"]*)\".*\*\//) {
20 $feature = $1;
21 }
22
23 if ($feature ne '') {
24 printf OUT "\t%-32s = \"%s\",\n",
25 "[$macro]", "\L$feature";
26 }
27 }
28}
29print OUT "};\n";
30
31close(IN);
32close(OUT);
diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c
new file mode 100644
index 000000000000..5abbea297e0c
--- /dev/null
+++ b/arch/x86/kernel/cpu/powerflags.c
@@ -0,0 +1,20 @@
1/*
2 * Strings for the various x86 power flags
3 *
4 * This file must not contain any executable code.
5 */
6
7#include <asm/cpufeature.h>
8
9const char *const x86_power_flags[32] = {
10 "ts", /* temperature sensor */
11 "fid", /* frequency id control */
12 "vid", /* voltage id control */
13 "ttp", /* thermal trip */
14 "tm",
15 "stc",
16 "100mhzsteps",
17 "hwpstate",
18 "", /* tsc invariant mapped to constant_tsc */
19 /* nothing */
20};
diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
index b911a2c61b8f..52b3fefbd5af 100644
--- a/arch/x86/kernel/cpu/transmeta.c
+++ b/arch/x86/kernel/cpu/transmeta.c
@@ -5,6 +5,18 @@
5#include <asm/msr.h> 5#include <asm/msr.h>
6#include "cpu.h" 6#include "cpu.h"
7 7
8static void __cpuinit early_init_transmeta(struct cpuinfo_x86 *c)
9{
10 u32 xlvl;
11
12 /* Transmeta-defined flags: level 0x80860001 */
13 xlvl = cpuid_eax(0x80860000);
14 if ((xlvl & 0xffff0000) == 0x80860000) {
15 if (xlvl >= 0x80860001)
16 c->x86_capability[2] = cpuid_edx(0x80860001);
17 }
18}
19
8static void __cpuinit init_transmeta(struct cpuinfo_x86 *c) 20static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
9{ 21{
10 unsigned int cap_mask, uk, max, dummy; 22 unsigned int cap_mask, uk, max, dummy;
@@ -12,7 +24,8 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
12 unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev; 24 unsigned int cpu_rev, cpu_freq = 0, cpu_flags, new_cpu_rev;
13 char cpu_info[65]; 25 char cpu_info[65];
14 26
15 get_model_name(c); /* Same as AMD/Cyrix */ 27 early_init_transmeta(c);
28
16 display_cacheinfo(c); 29 display_cacheinfo(c);
17 30
18 /* Print CMS and CPU revision */ 31 /* Print CMS and CPU revision */
@@ -85,23 +98,12 @@ static void __cpuinit init_transmeta(struct cpuinfo_x86 *c)
85#endif 98#endif
86} 99}
87 100
88static void __cpuinit transmeta_identify(struct cpuinfo_x86 *c)
89{
90 u32 xlvl;
91
92 /* Transmeta-defined flags: level 0x80860001 */
93 xlvl = cpuid_eax(0x80860000);
94 if ((xlvl & 0xffff0000) == 0x80860000) {
95 if (xlvl >= 0x80860001)
96 c->x86_capability[2] = cpuid_edx(0x80860001);
97 }
98}
99
100static struct cpu_dev transmeta_cpu_dev __cpuinitdata = { 101static struct cpu_dev transmeta_cpu_dev __cpuinitdata = {
101 .c_vendor = "Transmeta", 102 .c_vendor = "Transmeta",
102 .c_ident = { "GenuineTMx86", "TransmetaCPU" }, 103 .c_ident = { "GenuineTMx86", "TransmetaCPU" },
104 .c_early_init = early_init_transmeta,
103 .c_init = init_transmeta, 105 .c_init = init_transmeta,
104 .c_identify = transmeta_identify, 106 .c_x86_vendor = X86_VENDOR_TRANSMETA,
105}; 107};
106 108
107cpu_vendor_dev_register(X86_VENDOR_TRANSMETA, &transmeta_cpu_dev); 109cpu_dev_register(transmeta_cpu_dev);
diff --git a/arch/x86/kernel/cpu/umc.c b/arch/x86/kernel/cpu/umc.c
index b1fc90989d75..e777f79e0960 100644
--- a/arch/x86/kernel/cpu/umc.c
+++ b/arch/x86/kernel/cpu/umc.c
@@ -19,7 +19,8 @@ static struct cpu_dev umc_cpu_dev __cpuinitdata = {
19 } 19 }
20 }, 20 },
21 }, 21 },
22 .c_x86_vendor = X86_VENDOR_UMC,
22}; 23};
23 24
24cpu_vendor_dev_register(X86_VENDOR_UMC, &umc_cpu_dev); 25cpu_dev_register(umc_cpu_dev);
25 26
diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c
index 66e48aa2dd1b..78e642feac30 100644
--- a/arch/x86/kernel/e820.c
+++ b/arch/x86/kernel/e820.c
@@ -148,6 +148,9 @@ void __init e820_print_map(char *who)
148 case E820_NVS: 148 case E820_NVS:
149 printk(KERN_CONT "(ACPI NVS)\n"); 149 printk(KERN_CONT "(ACPI NVS)\n");
150 break; 150 break;
151 case E820_UNUSABLE:
152 printk("(unusable)\n");
153 break;
151 default: 154 default:
152 printk(KERN_CONT "type %u\n", e820.map[i].type); 155 printk(KERN_CONT "type %u\n", e820.map[i].type);
153 break; 156 break;
@@ -1260,6 +1263,7 @@ static inline const char *e820_type_to_string(int e820_type)
1260 case E820_RAM: return "System RAM"; 1263 case E820_RAM: return "System RAM";
1261 case E820_ACPI: return "ACPI Tables"; 1264 case E820_ACPI: return "ACPI Tables";
1262 case E820_NVS: return "ACPI Non-volatile Storage"; 1265 case E820_NVS: return "ACPI Non-volatile Storage";
1266 case E820_UNUSABLE: return "Unusable memory";
1263 default: return "reserved"; 1267 default: return "reserved";
1264 } 1268 }
1265} 1269}
@@ -1267,6 +1271,7 @@ static inline const char *e820_type_to_string(int e820_type)
1267/* 1271/*
1268 * Mark e820 reserved areas as busy for the resource manager. 1272 * Mark e820 reserved areas as busy for the resource manager.
1269 */ 1273 */
1274static struct resource __initdata *e820_res;
1270void __init e820_reserve_resources(void) 1275void __init e820_reserve_resources(void)
1271{ 1276{
1272 int i; 1277 int i;
@@ -1274,6 +1279,7 @@ void __init e820_reserve_resources(void)
1274 u64 end; 1279 u64 end;
1275 1280
1276 res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map); 1281 res = alloc_bootmem_low(sizeof(struct resource) * e820.nr_map);
1282 e820_res = res;
1277 for (i = 0; i < e820.nr_map; i++) { 1283 for (i = 0; i < e820.nr_map; i++) {
1278 end = e820.map[i].addr + e820.map[i].size - 1; 1284 end = e820.map[i].addr + e820.map[i].size - 1;
1279#ifndef CONFIG_RESOURCES_64BIT 1285#ifndef CONFIG_RESOURCES_64BIT
@@ -1287,7 +1293,14 @@ void __init e820_reserve_resources(void)
1287 res->end = end; 1293 res->end = end;
1288 1294
1289 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY; 1295 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
1290 insert_resource(&iomem_resource, res); 1296
1297 /*
1298 * don't register the region that could be conflicted with
1299 * pci device BAR resource and insert them later in
1300 * pcibios_resource_survey()
1301 */
1302 if (e820.map[i].type != E820_RESERVED || res->start < (1ULL<<20))
1303 insert_resource(&iomem_resource, res);
1291 res++; 1304 res++;
1292 } 1305 }
1293 1306
@@ -1299,6 +1312,19 @@ void __init e820_reserve_resources(void)
1299 } 1312 }
1300} 1313}
1301 1314
1315void __init e820_reserve_resources_late(void)
1316{
1317 int i;
1318 struct resource *res;
1319
1320 res = e820_res;
1321 for (i = 0; i < e820.nr_map; i++) {
1322 if (!res->parent && res->end)
1323 reserve_region_with_split(&iomem_resource, res->start, res->end, res->name);
1324 res++;
1325 }
1326}
1327
1302char *__init default_machine_specific_memory_setup(void) 1328char *__init default_machine_specific_memory_setup(void)
1303{ 1329{
1304 char *who = "BIOS-e820"; 1330 char *who = "BIOS-e820";
diff --git a/arch/x86/mach-es7000/es7000plat.c b/arch/x86/kernel/es7000_32.c
index 50189af14b85..849e5cd485b8 100644
--- a/arch/x86/mach-es7000/es7000plat.c
+++ b/arch/x86/kernel/es7000_32.c
@@ -39,10 +39,93 @@
39#include <asm/nmi.h> 39#include <asm/nmi.h>
40#include <asm/smp.h> 40#include <asm/smp.h>
41#include <asm/apicdef.h> 41#include <asm/apicdef.h>
42#include "es7000.h"
43#include <mach_mpparse.h> 42#include <mach_mpparse.h>
44 43
45/* 44/*
45 * ES7000 chipsets
46 */
47
48#define NON_UNISYS 0
49#define ES7000_CLASSIC 1
50#define ES7000_ZORRO 2
51
52
53#define MIP_REG 1
54#define MIP_PSAI_REG 4
55
56#define MIP_BUSY 1
57#define MIP_SPIN 0xf0000
58#define MIP_VALID 0x0100000000000000ULL
59#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff)
60
61#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff)
62
63struct mip_reg_info {
64 unsigned long long mip_info;
65 unsigned long long delivery_info;
66 unsigned long long host_reg;
67 unsigned long long mip_reg;
68};
69
70struct part_info {
71 unsigned char type;
72 unsigned char length;
73 unsigned char part_id;
74 unsigned char apic_mode;
75 unsigned long snum;
76 char ptype[16];
77 char sname[64];
78 char pname[64];
79};
80
81struct psai {
82 unsigned long long entry_type;
83 unsigned long long addr;
84 unsigned long long bep_addr;
85};
86
87struct es7000_mem_info {
88 unsigned char type;
89 unsigned char length;
90 unsigned char resv[6];
91 unsigned long long start;
92 unsigned long long size;
93};
94
95struct es7000_oem_table {
96 unsigned long long hdr;
97 struct mip_reg_info mip;
98 struct part_info pif;
99 struct es7000_mem_info shm;
100 struct psai psai;
101};
102
103#ifdef CONFIG_ACPI
104
105struct oem_table {
106 struct acpi_table_header Header;
107 u32 OEMTableAddr;
108 u32 OEMTableSize;
109};
110
111extern int find_unisys_acpi_oem_table(unsigned long *oem_addr);
112#endif
113
114struct mip_reg {
115 unsigned long long off_0;
116 unsigned long long off_8;
117 unsigned long long off_10;
118 unsigned long long off_18;
119 unsigned long long off_20;
120 unsigned long long off_28;
121 unsigned long long off_30;
122 unsigned long long off_38;
123};
124
125#define MIP_SW_APIC 0x1020b
126#define MIP_FUNC(VALUE) (VALUE & 0xff)
127
128/*
46 * ES7000 Globals 129 * ES7000 Globals
47 */ 130 */
48 131
@@ -72,7 +155,7 @@ es7000_rename_gsi(int ioapic, int gsi)
72 base += nr_ioapic_registers[i]; 155 base += nr_ioapic_registers[i];
73 } 156 }
74 157
75 if (!ioapic && (gsi < 16)) 158 if (!ioapic && (gsi < 16))
76 gsi += base; 159 gsi += base;
77 return gsi; 160 return gsi;
78} 161}
diff --git a/arch/x86/kernel/genapic_64.c b/arch/x86/kernel/genapic_64.c
index eaff0bbb1444..6c9bfc9e1e95 100644
--- a/arch/x86/kernel/genapic_64.c
+++ b/arch/x86/kernel/genapic_64.c
@@ -16,87 +16,63 @@
16#include <linux/ctype.h> 16#include <linux/ctype.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/hardirq.h> 18#include <linux/hardirq.h>
19#include <linux/dmar.h>
19 20
20#include <asm/smp.h> 21#include <asm/smp.h>
21#include <asm/ipi.h> 22#include <asm/ipi.h>
22#include <asm/genapic.h> 23#include <asm/genapic.h>
23 24
24#ifdef CONFIG_ACPI 25extern struct genapic apic_flat;
25#include <acpi/acpi_bus.h> 26extern struct genapic apic_physflat;
26#endif 27extern struct genapic apic_x2xpic_uv_x;
27 28extern struct genapic apic_x2apic_phys;
28DEFINE_PER_CPU(int, x2apic_extra_bits); 29extern struct genapic apic_x2apic_cluster;
29 30
30struct genapic __read_mostly *genapic = &apic_flat; 31struct genapic __read_mostly *genapic = &apic_flat;
31 32
32static enum uv_system_type uv_system_type; 33static struct genapic *apic_probe[] __initdata = {
34 &apic_x2apic_uv_x,
35 &apic_x2apic_phys,
36 &apic_x2apic_cluster,
37 &apic_physflat,
38 NULL,
39};
33 40
34/* 41/*
35 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode. 42 * Check the APIC IDs in bios_cpu_apicid and choose the APIC mode.
36 */ 43 */
37void __init setup_apic_routing(void) 44void __init setup_apic_routing(void)
38{ 45{
39 if (uv_system_type == UV_NON_UNIQUE_APIC) 46 if (genapic == &apic_x2apic_phys || genapic == &apic_x2apic_cluster) {
40 genapic = &apic_x2apic_uv_x; 47 if (!intr_remapping_enabled)
41 else 48 genapic = &apic_flat;
42#ifdef CONFIG_ACPI 49 }
43 /*
44 * Quirk: some x86_64 machines can only use physical APIC mode
45 * regardless of how many processors are present (x86_64 ES7000
46 * is an example).
47 */
48 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
49 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
50 genapic = &apic_physflat;
51 else
52#endif
53
54 if (max_physical_apicid < 8)
55 genapic = &apic_flat;
56 else
57 genapic = &apic_physflat;
58 50
59 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name); 51 if (genapic == &apic_flat) {
52 if (max_physical_apicid >= 8)
53 genapic = &apic_physflat;
54 printk(KERN_INFO "Setting APIC routing to %s\n", genapic->name);
55 }
60} 56}
61 57
62/* Same for both flat and physical. */ 58/* Same for both flat and physical. */
63 59
64void send_IPI_self(int vector) 60void apic_send_IPI_self(int vector)
65{ 61{
66 __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL); 62 __send_IPI_shortcut(APIC_DEST_SELF, vector, APIC_DEST_PHYSICAL);
67} 63}
68 64
69int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id) 65int __init acpi_madt_oem_check(char *oem_id, char *oem_table_id)
70{ 66{
71 if (!strcmp(oem_id, "SGI")) { 67 int i;
72 if (!strcmp(oem_table_id, "UVL")) 68
73 uv_system_type = UV_LEGACY_APIC; 69 for (i = 0; apic_probe[i]; ++i) {
74 else if (!strcmp(oem_table_id, "UVX")) 70 if (apic_probe[i]->acpi_madt_oem_check(oem_id, oem_table_id)) {
75 uv_system_type = UV_X2APIC; 71 genapic = apic_probe[i];
76 else if (!strcmp(oem_table_id, "UVH")) 72 printk(KERN_INFO "Setting APIC routing to %s.\n",
77 uv_system_type = UV_NON_UNIQUE_APIC; 73 genapic->name);
74 return 1;
75 }
78 } 76 }
79 return 0; 77 return 0;
80} 78}
81
82unsigned int read_apic_id(void)
83{
84 unsigned int id;
85
86 WARN_ON(preemptible() && num_online_cpus() > 1);
87 id = apic_read(APIC_ID);
88 if (uv_system_type >= UV_X2APIC)
89 id |= __get_cpu_var(x2apic_extra_bits);
90 return id;
91}
92
93enum uv_system_type get_uv_system_type(void)
94{
95 return uv_system_type;
96}
97
98int is_uv_system(void)
99{
100 return uv_system_type != UV_NONE;
101}
102EXPORT_SYMBOL_GPL(is_uv_system);
diff --git a/arch/x86/kernel/genapic_flat_64.c b/arch/x86/kernel/genapic_flat_64.c
index 786548a62d38..9eca5ba7a6b1 100644
--- a/arch/x86/kernel/genapic_flat_64.c
+++ b/arch/x86/kernel/genapic_flat_64.c
@@ -15,9 +15,20 @@
15#include <linux/kernel.h> 15#include <linux/kernel.h>
16#include <linux/ctype.h> 16#include <linux/ctype.h>
17#include <linux/init.h> 17#include <linux/init.h>
18#include <linux/hardirq.h>
18#include <asm/smp.h> 19#include <asm/smp.h>
19#include <asm/ipi.h> 20#include <asm/ipi.h>
20#include <asm/genapic.h> 21#include <asm/genapic.h>
22#include <mach_apicdef.h>
23
24#ifdef CONFIG_ACPI
25#include <acpi/acpi_bus.h>
26#endif
27
28static int __init flat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
29{
30 return 1;
31}
21 32
22static cpumask_t flat_target_cpus(void) 33static cpumask_t flat_target_cpus(void)
23{ 34{
@@ -95,9 +106,33 @@ static void flat_send_IPI_all(int vector)
95 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL); 106 __send_IPI_shortcut(APIC_DEST_ALLINC, vector, APIC_DEST_LOGICAL);
96} 107}
97 108
109static unsigned int get_apic_id(unsigned long x)
110{
111 unsigned int id;
112
113 id = (((x)>>24) & 0xFFu);
114 return id;
115}
116
117static unsigned long set_apic_id(unsigned int id)
118{
119 unsigned long x;
120
121 x = ((id & 0xFFu)<<24);
122 return x;
123}
124
125static unsigned int read_xapic_id(void)
126{
127 unsigned int id;
128
129 id = get_apic_id(apic_read(APIC_ID));
130 return id;
131}
132
98static int flat_apic_id_registered(void) 133static int flat_apic_id_registered(void)
99{ 134{
100 return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map); 135 return physid_isset(read_xapic_id(), phys_cpu_present_map);
101} 136}
102 137
103static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask) 138static unsigned int flat_cpu_mask_to_apicid(cpumask_t cpumask)
@@ -112,6 +147,7 @@ static unsigned int phys_pkg_id(int index_msb)
112 147
113struct genapic apic_flat = { 148struct genapic apic_flat = {
114 .name = "flat", 149 .name = "flat",
150 .acpi_madt_oem_check = flat_acpi_madt_oem_check,
115 .int_delivery_mode = dest_LowestPrio, 151 .int_delivery_mode = dest_LowestPrio,
116 .int_dest_mode = (APIC_DEST_LOGICAL != 0), 152 .int_dest_mode = (APIC_DEST_LOGICAL != 0),
117 .target_cpus = flat_target_cpus, 153 .target_cpus = flat_target_cpus,
@@ -121,8 +157,12 @@ struct genapic apic_flat = {
121 .send_IPI_all = flat_send_IPI_all, 157 .send_IPI_all = flat_send_IPI_all,
122 .send_IPI_allbutself = flat_send_IPI_allbutself, 158 .send_IPI_allbutself = flat_send_IPI_allbutself,
123 .send_IPI_mask = flat_send_IPI_mask, 159 .send_IPI_mask = flat_send_IPI_mask,
160 .send_IPI_self = apic_send_IPI_self,
124 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid, 161 .cpu_mask_to_apicid = flat_cpu_mask_to_apicid,
125 .phys_pkg_id = phys_pkg_id, 162 .phys_pkg_id = phys_pkg_id,
163 .get_apic_id = get_apic_id,
164 .set_apic_id = set_apic_id,
165 .apic_id_mask = (0xFFu<<24),
126}; 166};
127 167
128/* 168/*
@@ -130,6 +170,21 @@ struct genapic apic_flat = {
130 * We cannot use logical delivery in this case because the mask 170 * We cannot use logical delivery in this case because the mask
131 * overflows, so use physical mode. 171 * overflows, so use physical mode.
132 */ 172 */
173static int __init physflat_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
174{
175#ifdef CONFIG_ACPI
176 /*
177 * Quirk: some x86_64 machines can only use physical APIC mode
178 * regardless of how many processors are present (x86_64 ES7000
179 * is an example).
180 */
181 if (acpi_gbl_FADT.header.revision > FADT2_REVISION_ID &&
182 (acpi_gbl_FADT.flags & ACPI_FADT_APIC_PHYSICAL))
183 return 1;
184#endif
185
186 return 0;
187}
133 188
134static cpumask_t physflat_target_cpus(void) 189static cpumask_t physflat_target_cpus(void)
135{ 190{
@@ -176,6 +231,7 @@ static unsigned int physflat_cpu_mask_to_apicid(cpumask_t cpumask)
176 231
177struct genapic apic_physflat = { 232struct genapic apic_physflat = {
178 .name = "physical flat", 233 .name = "physical flat",
234 .acpi_madt_oem_check = physflat_acpi_madt_oem_check,
179 .int_delivery_mode = dest_Fixed, 235 .int_delivery_mode = dest_Fixed,
180 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 236 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
181 .target_cpus = physflat_target_cpus, 237 .target_cpus = physflat_target_cpus,
@@ -185,6 +241,10 @@ struct genapic apic_physflat = {
185 .send_IPI_all = physflat_send_IPI_all, 241 .send_IPI_all = physflat_send_IPI_all,
186 .send_IPI_allbutself = physflat_send_IPI_allbutself, 242 .send_IPI_allbutself = physflat_send_IPI_allbutself,
187 .send_IPI_mask = physflat_send_IPI_mask, 243 .send_IPI_mask = physflat_send_IPI_mask,
244 .send_IPI_self = apic_send_IPI_self,
188 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid, 245 .cpu_mask_to_apicid = physflat_cpu_mask_to_apicid,
189 .phys_pkg_id = phys_pkg_id, 246 .phys_pkg_id = phys_pkg_id,
247 .get_apic_id = get_apic_id,
248 .set_apic_id = set_apic_id,
249 .apic_id_mask = (0xFFu<<24),
190}; 250};
diff --git a/arch/x86/kernel/genx2apic_cluster.c b/arch/x86/kernel/genx2apic_cluster.c
new file mode 100644
index 000000000000..e4bf2cc0d743
--- /dev/null
+++ b/arch/x86/kernel/genx2apic_cluster.c
@@ -0,0 +1,159 @@
1#include <linux/threads.h>
2#include <linux/cpumask.h>
3#include <linux/string.h>
4#include <linux/kernel.h>
5#include <linux/ctype.h>
6#include <linux/init.h>
7#include <linux/dmar.h>
8
9#include <asm/smp.h>
10#include <asm/ipi.h>
11#include <asm/genapic.h>
12
13DEFINE_PER_CPU(u32, x86_cpu_to_logical_apicid);
14
15static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
16{
17 if (cpu_has_x2apic)
18 return 1;
19
20 return 0;
21}
22
23/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
24
25static cpumask_t x2apic_target_cpus(void)
26{
27 return cpumask_of_cpu(0);
28}
29
30/*
31 * for now each logical cpu is in its own vector allocation domain.
32 */
33static cpumask_t x2apic_vector_allocation_domain(int cpu)
34{
35 cpumask_t domain = CPU_MASK_NONE;
36 cpu_set(cpu, domain);
37 return domain;
38}
39
40static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
41 unsigned int dest)
42{
43 unsigned long cfg;
44
45 cfg = __prepare_ICR(0, vector, dest);
46
47 /*
48 * send the IPI.
49 */
50 x2apic_icr_write(cfg, apicid);
51}
52
53/*
54 * for now, we send the IPI's one by one in the cpumask.
55 * TBD: Based on the cpu mask, we can send the IPI's to the cluster group
56 * at once. We have 16 cpu's in a cluster. This will minimize IPI register
57 * writes.
58 */
59static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
60{
61 unsigned long flags;
62 unsigned long query_cpu;
63
64 local_irq_save(flags);
65 for_each_cpu_mask(query_cpu, mask) {
66 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_logical_apicid, query_cpu),
67 vector, APIC_DEST_LOGICAL);
68 }
69 local_irq_restore(flags);
70}
71
72static void x2apic_send_IPI_allbutself(int vector)
73{
74 cpumask_t mask = cpu_online_map;
75
76 cpu_clear(smp_processor_id(), mask);
77
78 if (!cpus_empty(mask))
79 x2apic_send_IPI_mask(mask, vector);
80}
81
82static void x2apic_send_IPI_all(int vector)
83{
84 x2apic_send_IPI_mask(cpu_online_map, vector);
85}
86
87static int x2apic_apic_id_registered(void)
88{
89 return 1;
90}
91
92static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
93{
94 int cpu;
95
96 /*
97 * We're using fixed IRQ delivery, can only return one phys APIC ID.
98 * May as well be the first.
99 */
100 cpu = first_cpu(cpumask);
101 if ((unsigned)cpu < NR_CPUS)
102 return per_cpu(x86_cpu_to_logical_apicid, cpu);
103 else
104 return BAD_APICID;
105}
106
107static unsigned int get_apic_id(unsigned long x)
108{
109 unsigned int id;
110
111 id = x;
112 return id;
113}
114
115static unsigned long set_apic_id(unsigned int id)
116{
117 unsigned long x;
118
119 x = id;
120 return x;
121}
122
123static unsigned int phys_pkg_id(int index_msb)
124{
125 return current_cpu_data.initial_apicid >> index_msb;
126}
127
128static void x2apic_send_IPI_self(int vector)
129{
130 apic_write(APIC_SELF_IPI, vector);
131}
132
133static void init_x2apic_ldr(void)
134{
135 int cpu = smp_processor_id();
136
137 per_cpu(x86_cpu_to_logical_apicid, cpu) = apic_read(APIC_LDR);
138 return;
139}
140
141struct genapic apic_x2apic_cluster = {
142 .name = "cluster x2apic",
143 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
144 .int_delivery_mode = dest_LowestPrio,
145 .int_dest_mode = (APIC_DEST_LOGICAL != 0),
146 .target_cpus = x2apic_target_cpus,
147 .vector_allocation_domain = x2apic_vector_allocation_domain,
148 .apic_id_registered = x2apic_apic_id_registered,
149 .init_apic_ldr = init_x2apic_ldr,
150 .send_IPI_all = x2apic_send_IPI_all,
151 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
152 .send_IPI_mask = x2apic_send_IPI_mask,
153 .send_IPI_self = x2apic_send_IPI_self,
154 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
155 .phys_pkg_id = phys_pkg_id,
156 .get_apic_id = get_apic_id,
157 .set_apic_id = set_apic_id,
158 .apic_id_mask = (0xFFFFFFFFu),
159};
diff --git a/arch/x86/kernel/genx2apic_phys.c b/arch/x86/kernel/genx2apic_phys.c
new file mode 100644
index 000000000000..8f1343df2627
--- /dev/null
+++ b/arch/x86/kernel/genx2apic_phys.c
@@ -0,0 +1,154 @@
1#include <linux/threads.h>
2#include <linux/cpumask.h>
3#include <linux/string.h>
4#include <linux/kernel.h>
5#include <linux/ctype.h>
6#include <linux/init.h>
7#include <linux/dmar.h>
8
9#include <asm/smp.h>
10#include <asm/ipi.h>
11#include <asm/genapic.h>
12
13static int x2apic_phys;
14
15static int set_x2apic_phys_mode(char *arg)
16{
17 x2apic_phys = 1;
18 return 0;
19}
20early_param("x2apic_phys", set_x2apic_phys_mode);
21
22static int __init x2apic_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
23{
24 if (cpu_has_x2apic && x2apic_phys)
25 return 1;
26
27 return 0;
28}
29
30/* Start with all IRQs pointing to boot CPU. IRQ balancing will shift them. */
31
32static cpumask_t x2apic_target_cpus(void)
33{
34 return cpumask_of_cpu(0);
35}
36
37static cpumask_t x2apic_vector_allocation_domain(int cpu)
38{
39 cpumask_t domain = CPU_MASK_NONE;
40 cpu_set(cpu, domain);
41 return domain;
42}
43
44static void __x2apic_send_IPI_dest(unsigned int apicid, int vector,
45 unsigned int dest)
46{
47 unsigned long cfg;
48
49 cfg = __prepare_ICR(0, vector, dest);
50
51 /*
52 * send the IPI.
53 */
54 x2apic_icr_write(cfg, apicid);
55}
56
57static void x2apic_send_IPI_mask(cpumask_t mask, int vector)
58{
59 unsigned long flags;
60 unsigned long query_cpu;
61
62 local_irq_save(flags);
63 for_each_cpu_mask(query_cpu, mask) {
64 __x2apic_send_IPI_dest(per_cpu(x86_cpu_to_apicid, query_cpu),
65 vector, APIC_DEST_PHYSICAL);
66 }
67 local_irq_restore(flags);
68}
69
70static void x2apic_send_IPI_allbutself(int vector)
71{
72 cpumask_t mask = cpu_online_map;
73
74 cpu_clear(smp_processor_id(), mask);
75
76 if (!cpus_empty(mask))
77 x2apic_send_IPI_mask(mask, vector);
78}
79
80static void x2apic_send_IPI_all(int vector)
81{
82 x2apic_send_IPI_mask(cpu_online_map, vector);
83}
84
85static int x2apic_apic_id_registered(void)
86{
87 return 1;
88}
89
90static unsigned int x2apic_cpu_mask_to_apicid(cpumask_t cpumask)
91{
92 int cpu;
93
94 /*
95 * We're using fixed IRQ delivery, can only return one phys APIC ID.
96 * May as well be the first.
97 */
98 cpu = first_cpu(cpumask);
99 if ((unsigned)cpu < NR_CPUS)
100 return per_cpu(x86_cpu_to_apicid, cpu);
101 else
102 return BAD_APICID;
103}
104
105static unsigned int get_apic_id(unsigned long x)
106{
107 unsigned int id;
108
109 id = x;
110 return id;
111}
112
113static unsigned long set_apic_id(unsigned int id)
114{
115 unsigned long x;
116
117 x = id;
118 return x;
119}
120
121static unsigned int phys_pkg_id(int index_msb)
122{
123 return current_cpu_data.initial_apicid >> index_msb;
124}
125
126void x2apic_send_IPI_self(int vector)
127{
128 apic_write(APIC_SELF_IPI, vector);
129}
130
131void init_x2apic_ldr(void)
132{
133 return;
134}
135
136struct genapic apic_x2apic_phys = {
137 .name = "physical x2apic",
138 .acpi_madt_oem_check = x2apic_acpi_madt_oem_check,
139 .int_delivery_mode = dest_Fixed,
140 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
141 .target_cpus = x2apic_target_cpus,
142 .vector_allocation_domain = x2apic_vector_allocation_domain,
143 .apic_id_registered = x2apic_apic_id_registered,
144 .init_apic_ldr = init_x2apic_ldr,
145 .send_IPI_all = x2apic_send_IPI_all,
146 .send_IPI_allbutself = x2apic_send_IPI_allbutself,
147 .send_IPI_mask = x2apic_send_IPI_mask,
148 .send_IPI_self = x2apic_send_IPI_self,
149 .cpu_mask_to_apicid = x2apic_cpu_mask_to_apicid,
150 .phys_pkg_id = phys_pkg_id,
151 .get_apic_id = get_apic_id,
152 .set_apic_id = set_apic_id,
153 .apic_id_mask = (0xFFFFFFFFu),
154};
diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c
index bfa837cb16be..ae2ffc8a400c 100644
--- a/arch/x86/kernel/genx2apic_uv_x.c
+++ b/arch/x86/kernel/genx2apic_uv_x.c
@@ -12,12 +12,12 @@
12#include <linux/threads.h> 12#include <linux/threads.h>
13#include <linux/cpumask.h> 13#include <linux/cpumask.h>
14#include <linux/string.h> 14#include <linux/string.h>
15#include <linux/kernel.h>
16#include <linux/ctype.h> 15#include <linux/ctype.h>
17#include <linux/init.h> 16#include <linux/init.h>
18#include <linux/sched.h> 17#include <linux/sched.h>
19#include <linux/bootmem.h> 18#include <linux/bootmem.h>
20#include <linux/module.h> 19#include <linux/module.h>
20#include <linux/hardirq.h>
21#include <asm/smp.h> 21#include <asm/smp.h>
22#include <asm/ipi.h> 22#include <asm/ipi.h>
23#include <asm/genapic.h> 23#include <asm/genapic.h>
@@ -26,6 +26,36 @@
26#include <asm/uv/uv_hub.h> 26#include <asm/uv/uv_hub.h>
27#include <asm/uv/bios.h> 27#include <asm/uv/bios.h>
28 28
29DEFINE_PER_CPU(int, x2apic_extra_bits);
30
31static enum uv_system_type uv_system_type;
32
33static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id)
34{
35 if (!strcmp(oem_id, "SGI")) {
36 if (!strcmp(oem_table_id, "UVL"))
37 uv_system_type = UV_LEGACY_APIC;
38 else if (!strcmp(oem_table_id, "UVX"))
39 uv_system_type = UV_X2APIC;
40 else if (!strcmp(oem_table_id, "UVH")) {
41 uv_system_type = UV_NON_UNIQUE_APIC;
42 return 1;
43 }
44 }
45 return 0;
46}
47
48enum uv_system_type get_uv_system_type(void)
49{
50 return uv_system_type;
51}
52
53int is_uv_system(void)
54{
55 return uv_system_type != UV_NONE;
56}
57EXPORT_SYMBOL_GPL(is_uv_system);
58
29DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info); 59DEFINE_PER_CPU(struct uv_hub_info_s, __uv_hub_info);
30EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info); 60EXPORT_PER_CPU_SYMBOL_GPL(__uv_hub_info);
31 61
@@ -123,6 +153,10 @@ static int uv_apic_id_registered(void)
123 return 1; 153 return 1;
124} 154}
125 155
156static void uv_init_apic_ldr(void)
157{
158}
159
126static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask) 160static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
127{ 161{
128 int cpu; 162 int cpu;
@@ -138,9 +172,34 @@ static unsigned int uv_cpu_mask_to_apicid(cpumask_t cpumask)
138 return BAD_APICID; 172 return BAD_APICID;
139} 173}
140 174
175static unsigned int get_apic_id(unsigned long x)
176{
177 unsigned int id;
178
179 WARN_ON(preemptible() && num_online_cpus() > 1);
180 id = x | __get_cpu_var(x2apic_extra_bits);
181
182 return id;
183}
184
185static unsigned long set_apic_id(unsigned int id)
186{
187 unsigned long x;
188
189 /* maskout x2apic_extra_bits ? */
190 x = id;
191 return x;
192}
193
194static unsigned int uv_read_apic_id(void)
195{
196
197 return get_apic_id(apic_read(APIC_ID));
198}
199
141static unsigned int phys_pkg_id(int index_msb) 200static unsigned int phys_pkg_id(int index_msb)
142{ 201{
143 return GET_APIC_ID(read_apic_id()) >> index_msb; 202 return uv_read_apic_id() >> index_msb;
144} 203}
145 204
146#ifdef ZZZ /* Needs x2apic patch */ 205#ifdef ZZZ /* Needs x2apic patch */
@@ -152,17 +211,22 @@ static void uv_send_IPI_self(int vector)
152 211
153struct genapic apic_x2apic_uv_x = { 212struct genapic apic_x2apic_uv_x = {
154 .name = "UV large system", 213 .name = "UV large system",
214 .acpi_madt_oem_check = uv_acpi_madt_oem_check,
155 .int_delivery_mode = dest_Fixed, 215 .int_delivery_mode = dest_Fixed,
156 .int_dest_mode = (APIC_DEST_PHYSICAL != 0), 216 .int_dest_mode = (APIC_DEST_PHYSICAL != 0),
157 .target_cpus = uv_target_cpus, 217 .target_cpus = uv_target_cpus,
158 .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */ 218 .vector_allocation_domain = uv_vector_allocation_domain,/* Fixme ZZZ */
159 .apic_id_registered = uv_apic_id_registered, 219 .apic_id_registered = uv_apic_id_registered,
220 .init_apic_ldr = uv_init_apic_ldr,
160 .send_IPI_all = uv_send_IPI_all, 221 .send_IPI_all = uv_send_IPI_all,
161 .send_IPI_allbutself = uv_send_IPI_allbutself, 222 .send_IPI_allbutself = uv_send_IPI_allbutself,
162 .send_IPI_mask = uv_send_IPI_mask, 223 .send_IPI_mask = uv_send_IPI_mask,
163 /* ZZZ.send_IPI_self = uv_send_IPI_self, */ 224 /* ZZZ.send_IPI_self = uv_send_IPI_self, */
164 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid, 225 .cpu_mask_to_apicid = uv_cpu_mask_to_apicid,
165 .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */ 226 .phys_pkg_id = phys_pkg_id, /* Fixme ZZZ */
227 .get_apic_id = get_apic_id,
228 .set_apic_id = set_apic_id,
229 .apic_id_mask = (0xFFFFFFFFu),
166}; 230};
167 231
168static __cpuinit void set_x2apic_extra_bits(int pnode) 232static __cpuinit void set_x2apic_extra_bits(int pnode)
@@ -401,3 +465,5 @@ void __cpuinit uv_cpu_init(void)
401 if (get_uv_system_type() == UV_NON_UNIQUE_APIC) 465 if (get_uv_system_type() == UV_NON_UNIQUE_APIC)
402 set_x2apic_extra_bits(uv_hub_info->pnode); 466 set_x2apic_extra_bits(uv_hub_info->pnode);
403} 467}
468
469
diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c
index eb9ddd8efb82..45723f1fe198 100644
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@@ -21,9 +21,12 @@
21# include <asm/sigcontext32.h> 21# include <asm/sigcontext32.h>
22# include <asm/user32.h> 22# include <asm/user32.h>
23#else 23#else
24# define save_i387_ia32 save_i387 24# define save_i387_xstate_ia32 save_i387_xstate
25# define restore_i387_ia32 restore_i387 25# define restore_i387_xstate_ia32 restore_i387_xstate
26# define _fpstate_ia32 _fpstate 26# define _fpstate_ia32 _fpstate
27# define _xstate_ia32 _xstate
28# define sig_xstate_ia32_size sig_xstate_size
29# define fx_sw_reserved_ia32 fx_sw_reserved
27# define user_i387_ia32_struct user_i387_struct 30# define user_i387_ia32_struct user_i387_struct
28# define user32_fxsr_struct user_fxsr_struct 31# define user32_fxsr_struct user_fxsr_struct
29#endif 32#endif
@@ -36,6 +39,7 @@
36 39
37static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; 40static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu;
38unsigned int xstate_size; 41unsigned int xstate_size;
42unsigned int sig_xstate_ia32_size = sizeof(struct _fpstate_ia32);
39static struct i387_fxsave_struct fx_scratch __cpuinitdata; 43static struct i387_fxsave_struct fx_scratch __cpuinitdata;
40 44
41void __cpuinit mxcsr_feature_mask_init(void) 45void __cpuinit mxcsr_feature_mask_init(void)
@@ -61,6 +65,11 @@ void __init init_thread_xstate(void)
61 return; 65 return;
62 } 66 }
63 67
68 if (cpu_has_xsave) {
69 xsave_cntxt_init();
70 return;
71 }
72
64 if (cpu_has_fxsr) 73 if (cpu_has_fxsr)
65 xstate_size = sizeof(struct i387_fxsave_struct); 74 xstate_size = sizeof(struct i387_fxsave_struct);
66#ifdef CONFIG_X86_32 75#ifdef CONFIG_X86_32
@@ -83,9 +92,19 @@ void __cpuinit fpu_init(void)
83 92
84 write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */ 93 write_cr0(oldcr0 & ~(X86_CR0_TS|X86_CR0_EM)); /* clear TS and EM */
85 94
95 /*
96 * Boot processor to setup the FP and extended state context info.
97 */
98 if (!smp_processor_id())
99 init_thread_xstate();
100 xsave_init();
101
86 mxcsr_feature_mask_init(); 102 mxcsr_feature_mask_init();
87 /* clean state in init */ 103 /* clean state in init */
88 current_thread_info()->status = 0; 104 if (cpu_has_xsave)
105 current_thread_info()->status = TS_XSAVE;
106 else
107 current_thread_info()->status = 0;
89 clear_used_math(); 108 clear_used_math();
90} 109}
91#endif /* CONFIG_X86_64 */ 110#endif /* CONFIG_X86_64 */
@@ -195,6 +214,13 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset,
195 */ 214 */
196 target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; 215 target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
197 216
217 /*
218 * update the header bits in the xsave header, indicating the
219 * presence of FP and SSE state.
220 */
221 if (cpu_has_xsave)
222 target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FPSSE;
223
198 return ret; 224 return ret;
199} 225}
200 226
@@ -395,6 +421,12 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset,
395 if (!ret) 421 if (!ret)
396 convert_to_fxsr(target, &env); 422 convert_to_fxsr(target, &env);
397 423
424 /*
425 * update the header bit in the xsave header, indicating the
426 * presence of FP.
427 */
428 if (cpu_has_xsave)
429 target->thread.xstate->xsave.xsave_hdr.xstate_bv |= XSTATE_FP;
398 return ret; 430 return ret;
399} 431}
400 432
@@ -407,7 +439,6 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf)
407 struct task_struct *tsk = current; 439 struct task_struct *tsk = current;
408 struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; 440 struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave;
409 441
410 unlazy_fpu(tsk);
411 fp->status = fp->swd; 442 fp->status = fp->swd;
412 if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) 443 if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct)))
413 return -1; 444 return -1;
@@ -421,8 +452,6 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
421 struct user_i387_ia32_struct env; 452 struct user_i387_ia32_struct env;
422 int err = 0; 453 int err = 0;
423 454
424 unlazy_fpu(tsk);
425
426 convert_from_fxsr(&env, tsk); 455 convert_from_fxsr(&env, tsk);
427 if (__copy_to_user(buf, &env, sizeof(env))) 456 if (__copy_to_user(buf, &env, sizeof(env)))
428 return -1; 457 return -1;
@@ -432,16 +461,40 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf)
432 if (err) 461 if (err)
433 return -1; 462 return -1;
434 463
435 if (__copy_to_user(&buf->_fxsr_env[0], fx, 464 if (__copy_to_user(&buf->_fxsr_env[0], fx, xstate_size))
436 sizeof(struct i387_fxsave_struct)))
437 return -1; 465 return -1;
438 return 1; 466 return 1;
439} 467}
440 468
441int save_i387_ia32(struct _fpstate_ia32 __user *buf) 469static int save_i387_xsave(void __user *buf)
470{
471 struct _fpstate_ia32 __user *fx = buf;
472 int err = 0;
473
474 if (save_i387_fxsave(fx) < 0)
475 return -1;
476
477 err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved_ia32,
478 sizeof(struct _fpx_sw_bytes));
479 err |= __put_user(FP_XSTATE_MAGIC2,
480 (__u32 __user *) (buf + sig_xstate_ia32_size
481 - FP_XSTATE_MAGIC2_SIZE));
482 if (err)
483 return -1;
484
485 return 1;
486}
487
488int save_i387_xstate_ia32(void __user *buf)
442{ 489{
490 struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
491 struct task_struct *tsk = current;
492
443 if (!used_math()) 493 if (!used_math())
444 return 0; 494 return 0;
495
496 if (!access_ok(VERIFY_WRITE, buf, sig_xstate_ia32_size))
497 return -EACCES;
445 /* 498 /*
446 * This will cause a "finit" to be triggered by the next 499 * This will cause a "finit" to be triggered by the next
447 * attempted FPU operation by the 'current' process. 500 * attempted FPU operation by the 'current' process.
@@ -451,13 +504,17 @@ int save_i387_ia32(struct _fpstate_ia32 __user *buf)
451 if (!HAVE_HWFP) { 504 if (!HAVE_HWFP) {
452 return fpregs_soft_get(current, NULL, 505 return fpregs_soft_get(current, NULL,
453 0, sizeof(struct user_i387_ia32_struct), 506 0, sizeof(struct user_i387_ia32_struct),
454 NULL, buf) ? -1 : 1; 507 NULL, fp) ? -1 : 1;
455 } 508 }
456 509
510 unlazy_fpu(tsk);
511
512 if (cpu_has_xsave)
513 return save_i387_xsave(fp);
457 if (cpu_has_fxsr) 514 if (cpu_has_fxsr)
458 return save_i387_fxsave(buf); 515 return save_i387_fxsave(fp);
459 else 516 else
460 return save_i387_fsave(buf); 517 return save_i387_fsave(fp);
461} 518}
462 519
463static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) 520static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
@@ -468,14 +525,15 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf)
468 sizeof(struct i387_fsave_struct)); 525 sizeof(struct i387_fsave_struct));
469} 526}
470 527
471static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) 528static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf,
529 unsigned int size)
472{ 530{
473 struct task_struct *tsk = current; 531 struct task_struct *tsk = current;
474 struct user_i387_ia32_struct env; 532 struct user_i387_ia32_struct env;
475 int err; 533 int err;
476 534
477 err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0], 535 err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0],
478 sizeof(struct i387_fxsave_struct)); 536 size);
479 /* mxcsr reserved bits must be masked to zero for security reasons */ 537 /* mxcsr reserved bits must be masked to zero for security reasons */
480 tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; 538 tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask;
481 if (err || __copy_from_user(&env, buf, sizeof(env))) 539 if (err || __copy_from_user(&env, buf, sizeof(env)))
@@ -485,14 +543,69 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf)
485 return 0; 543 return 0;
486} 544}
487 545
488int restore_i387_ia32(struct _fpstate_ia32 __user *buf) 546static int restore_i387_xsave(void __user *buf)
547{
548 struct _fpx_sw_bytes fx_sw_user;
549 struct _fpstate_ia32 __user *fx_user =
550 ((struct _fpstate_ia32 __user *) buf);
551 struct i387_fxsave_struct __user *fx =
552 (struct i387_fxsave_struct __user *) &fx_user->_fxsr_env[0];
553 struct xsave_hdr_struct *xsave_hdr =
554 &current->thread.xstate->xsave.xsave_hdr;
555 u64 mask;
556 int err;
557
558 if (check_for_xstate(fx, buf, &fx_sw_user))
559 goto fx_only;
560
561 mask = fx_sw_user.xstate_bv;
562
563 err = restore_i387_fxsave(buf, fx_sw_user.xstate_size);
564
565 xsave_hdr->xstate_bv &= pcntxt_mask;
566 /*
567 * These bits must be zero.
568 */
569 xsave_hdr->reserved1[0] = xsave_hdr->reserved1[1] = 0;
570
571 /*
572 * Init the state that is not present in the memory layout
573 * and enabled by the OS.
574 */
575 mask = ~(pcntxt_mask & ~mask);
576 xsave_hdr->xstate_bv &= mask;
577
578 return err;
579fx_only:
580 /*
581 * Couldn't find the extended state information in the memory
582 * layout. Restore the FP/SSE and init the other extended state
583 * enabled by the OS.
584 */
585 xsave_hdr->xstate_bv = XSTATE_FPSSE;
586 return restore_i387_fxsave(buf, sizeof(struct i387_fxsave_struct));
587}
588
589int restore_i387_xstate_ia32(void __user *buf)
489{ 590{
490 int err; 591 int err;
491 struct task_struct *tsk = current; 592 struct task_struct *tsk = current;
593 struct _fpstate_ia32 __user *fp = (struct _fpstate_ia32 __user *) buf;
492 594
493 if (HAVE_HWFP) 595 if (HAVE_HWFP)
494 clear_fpu(tsk); 596 clear_fpu(tsk);
495 597
598 if (!buf) {
599 if (used_math()) {
600 clear_fpu(tsk);
601 clear_used_math();
602 }
603
604 return 0;
605 } else
606 if (!access_ok(VERIFY_READ, buf, sig_xstate_ia32_size))
607 return -EACCES;
608
496 if (!used_math()) { 609 if (!used_math()) {
497 err = init_fpu(tsk); 610 err = init_fpu(tsk);
498 if (err) 611 if (err)
@@ -500,14 +613,17 @@ int restore_i387_ia32(struct _fpstate_ia32 __user *buf)
500 } 613 }
501 614
502 if (HAVE_HWFP) { 615 if (HAVE_HWFP) {
503 if (cpu_has_fxsr) 616 if (cpu_has_xsave)
504 err = restore_i387_fxsave(buf); 617 err = restore_i387_xsave(buf);
618 else if (cpu_has_fxsr)
619 err = restore_i387_fxsave(fp, sizeof(struct
620 i387_fxsave_struct));
505 else 621 else
506 err = restore_i387_fsave(buf); 622 err = restore_i387_fsave(fp);
507 } else { 623 } else {
508 err = fpregs_soft_set(current, NULL, 624 err = fpregs_soft_set(current, NULL,
509 0, sizeof(struct user_i387_ia32_struct), 625 0, sizeof(struct user_i387_ia32_struct),
510 NULL, buf) != 0; 626 NULL, fp) != 0;
511 } 627 }
512 set_used_math(); 628 set_used_math();
513 629
diff --git a/arch/x86/kernel/i8259.c b/arch/x86/kernel/i8259.c
index dc92b49d9204..4b8a53d841f7 100644
--- a/arch/x86/kernel/i8259.c
+++ b/arch/x86/kernel/i8259.c
@@ -282,6 +282,30 @@ static int __init i8259A_init_sysfs(void)
282 282
283device_initcall(i8259A_init_sysfs); 283device_initcall(i8259A_init_sysfs);
284 284
285void mask_8259A(void)
286{
287 unsigned long flags;
288
289 spin_lock_irqsave(&i8259A_lock, flags);
290
291 outb(0xff, PIC_MASTER_IMR); /* mask all of 8259A-1 */
292 outb(0xff, PIC_SLAVE_IMR); /* mask all of 8259A-2 */
293
294 spin_unlock_irqrestore(&i8259A_lock, flags);
295}
296
297void unmask_8259A(void)
298{
299 unsigned long flags;
300
301 spin_lock_irqsave(&i8259A_lock, flags);
302
303 outb(cached_master_mask, PIC_MASTER_IMR); /* restore master IRQ mask */
304 outb(cached_slave_mask, PIC_SLAVE_IMR); /* restore slave IRQ mask */
305
306 spin_unlock_irqrestore(&i8259A_lock, flags);
307}
308
285void init_8259A(int auto_eoi) 309void init_8259A(int auto_eoi)
286{ 310{
287 unsigned long flags; 311 unsigned long flags;
diff --git a/arch/x86/kernel/io_apic_32.c b/arch/x86/kernel/io_apic_32.c
index 09cddb57bec4..e710289f673e 100644
--- a/arch/x86/kernel/io_apic_32.c
+++ b/arch/x86/kernel/io_apic_32.c
@@ -46,10 +46,13 @@
46#include <asm/nmi.h> 46#include <asm/nmi.h>
47#include <asm/msidef.h> 47#include <asm/msidef.h>
48#include <asm/hypertransport.h> 48#include <asm/hypertransport.h>
49#include <asm/setup.h>
49 50
50#include <mach_apic.h> 51#include <mach_apic.h>
51#include <mach_apicdef.h> 52#include <mach_apicdef.h>
52 53
54#define __apicdebuginit(type) static type __init
55
53int (*ioapic_renumber_irq)(int ioapic, int irq); 56int (*ioapic_renumber_irq)(int ioapic, int irq);
54atomic_t irq_mis_count; 57atomic_t irq_mis_count;
55 58
@@ -1341,7 +1344,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
1341 ioapic_write_entry(apic, pin, entry); 1344 ioapic_write_entry(apic, pin, entry);
1342} 1345}
1343 1346
1344void __init print_IO_APIC(void) 1347
1348__apicdebuginit(void) print_IO_APIC(void)
1345{ 1349{
1346 int apic, i; 1350 int apic, i;
1347 union IO_APIC_reg_00 reg_00; 1351 union IO_APIC_reg_00 reg_00;
@@ -1456,9 +1460,7 @@ void __init print_IO_APIC(void)
1456 return; 1460 return;
1457} 1461}
1458 1462
1459#if 0 1463__apicdebuginit(void) print_APIC_bitfield(int base)
1460
1461static void print_APIC_bitfield(int base)
1462{ 1464{
1463 unsigned int v; 1465 unsigned int v;
1464 int i, j; 1466 int i, j;
@@ -1479,9 +1481,10 @@ static void print_APIC_bitfield(int base)
1479 } 1481 }
1480} 1482}
1481 1483
1482void /*__init*/ print_local_APIC(void *dummy) 1484__apicdebuginit(void) print_local_APIC(void *dummy)
1483{ 1485{
1484 unsigned int v, ver, maxlvt; 1486 unsigned int v, ver, maxlvt;
1487 u64 icr;
1485 1488
1486 if (apic_verbosity == APIC_QUIET) 1489 if (apic_verbosity == APIC_QUIET)
1487 return; 1490 return;
@@ -1490,7 +1493,7 @@ void /*__init*/ print_local_APIC(void *dummy)
1490 smp_processor_id(), hard_smp_processor_id()); 1493 smp_processor_id(), hard_smp_processor_id());
1491 v = apic_read(APIC_ID); 1494 v = apic_read(APIC_ID);
1492 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, 1495 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v,
1493 GET_APIC_ID(read_apic_id())); 1496 GET_APIC_ID(v));
1494 v = apic_read(APIC_LVR); 1497 v = apic_read(APIC_LVR);
1495 printk(KERN_INFO "... APIC VERSION: %08x\n", v); 1498 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1496 ver = GET_APIC_VERSION(v); 1499 ver = GET_APIC_VERSION(v);
@@ -1532,10 +1535,9 @@ void /*__init*/ print_local_APIC(void *dummy)
1532 printk(KERN_DEBUG "... APIC ESR: %08x\n", v); 1535 printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
1533 } 1536 }
1534 1537
1535 v = apic_read(APIC_ICR); 1538 icr = apic_icr_read();
1536 printk(KERN_DEBUG "... APIC ICR: %08x\n", v); 1539 printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
1537 v = apic_read(APIC_ICR2); 1540 printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
1538 printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
1539 1541
1540 v = apic_read(APIC_LVTT); 1542 v = apic_read(APIC_LVTT);
1541 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); 1543 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
@@ -1563,12 +1565,12 @@ void /*__init*/ print_local_APIC(void *dummy)
1563 printk("\n"); 1565 printk("\n");
1564} 1566}
1565 1567
1566void print_all_local_APICs(void) 1568__apicdebuginit(void) print_all_local_APICs(void)
1567{ 1569{
1568 on_each_cpu(print_local_APIC, NULL, 1); 1570 on_each_cpu(print_local_APIC, NULL, 1);
1569} 1571}
1570 1572
1571void /*__init*/ print_PIC(void) 1573__apicdebuginit(void) print_PIC(void)
1572{ 1574{
1573 unsigned int v; 1575 unsigned int v;
1574 unsigned long flags; 1576 unsigned long flags;
@@ -1600,7 +1602,17 @@ void /*__init*/ print_PIC(void)
1600 printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); 1602 printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
1601} 1603}
1602 1604
1603#endif /* 0 */ 1605__apicdebuginit(int) print_all_ICs(void)
1606{
1607 print_PIC();
1608 print_all_local_APICs();
1609 print_IO_APIC();
1610
1611 return 0;
1612}
1613
1614fs_initcall(print_all_ICs);
1615
1604 1616
1605static void __init enable_IO_APIC(void) 1617static void __init enable_IO_APIC(void)
1606{ 1618{
@@ -1698,8 +1710,7 @@ void disable_IO_APIC(void)
1698 entry.dest_mode = 0; /* Physical */ 1710 entry.dest_mode = 0; /* Physical */
1699 entry.delivery_mode = dest_ExtINT; /* ExtInt */ 1711 entry.delivery_mode = dest_ExtINT; /* ExtInt */
1700 entry.vector = 0; 1712 entry.vector = 0;
1701 entry.dest.physical.physical_dest = 1713 entry.dest.physical.physical_dest = read_apic_id();
1702 GET_APIC_ID(read_apic_id());
1703 1714
1704 /* 1715 /*
1705 * Add it to the IO-APIC irq-routing table: 1716 * Add it to the IO-APIC irq-routing table:
@@ -1725,10 +1736,8 @@ static void __init setup_ioapic_ids_from_mpc(void)
1725 unsigned char old_id; 1736 unsigned char old_id;
1726 unsigned long flags; 1737 unsigned long flags;
1727 1738
1728#ifdef CONFIG_X86_NUMAQ 1739 if (x86_quirks->setup_ioapic_ids && x86_quirks->setup_ioapic_ids())
1729 if (found_numaq)
1730 return; 1740 return;
1731#endif
1732 1741
1733 /* 1742 /*
1734 * Don't check I/O APIC IDs for xAPIC systems. They have 1743 * Don't check I/O APIC IDs for xAPIC systems. They have
@@ -2329,8 +2338,6 @@ void __init setup_IO_APIC(void)
2329 setup_IO_APIC_irqs(); 2338 setup_IO_APIC_irqs();
2330 init_IO_APIC_traps(); 2339 init_IO_APIC_traps();
2331 check_timer(); 2340 check_timer();
2332 if (!acpi_ioapic)
2333 print_IO_APIC();
2334} 2341}
2335 2342
2336/* 2343/*
diff --git a/arch/x86/kernel/io_apic_64.c b/arch/x86/kernel/io_apic_64.c
index 61a83b70c18f..a1bec2969c6a 100644
--- a/arch/x86/kernel/io_apic_64.c
+++ b/arch/x86/kernel/io_apic_64.c
@@ -37,6 +37,7 @@
37#include <acpi/acpi_bus.h> 37#include <acpi/acpi_bus.h>
38#endif 38#endif
39#include <linux/bootmem.h> 39#include <linux/bootmem.h>
40#include <linux/dmar.h>
40 41
41#include <asm/idle.h> 42#include <asm/idle.h>
42#include <asm/io.h> 43#include <asm/io.h>
@@ -49,10 +50,13 @@
49#include <asm/nmi.h> 50#include <asm/nmi.h>
50#include <asm/msidef.h> 51#include <asm/msidef.h>
51#include <asm/hypertransport.h> 52#include <asm/hypertransport.h>
53#include <asm/irq_remapping.h>
52 54
53#include <mach_ipi.h> 55#include <mach_ipi.h>
54#include <mach_apic.h> 56#include <mach_apic.h>
55 57
58#define __apicdebuginit(type) static type __init
59
56struct irq_cfg { 60struct irq_cfg {
57 cpumask_t domain; 61 cpumask_t domain;
58 cpumask_t old_domain; 62 cpumask_t old_domain;
@@ -87,8 +91,6 @@ int first_system_vector = 0xfe;
87 91
88char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE}; 92char system_vectors[NR_VECTORS] = { [0 ... NR_VECTORS-1] = SYS_VECTOR_FREE};
89 93
90#define __apicdebuginit __init
91
92int sis_apic_bug; /* not actually supported, dummy for compile */ 94int sis_apic_bug; /* not actually supported, dummy for compile */
93 95
94static int no_timer_check; 96static int no_timer_check;
@@ -108,6 +110,9 @@ static DEFINE_SPINLOCK(vector_lock);
108 */ 110 */
109int nr_ioapic_registers[MAX_IO_APICS]; 111int nr_ioapic_registers[MAX_IO_APICS];
110 112
113/* I/O APIC RTE contents at the OS boot up */
114struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS];
115
111/* I/O APIC entries */ 116/* I/O APIC entries */
112struct mp_config_ioapic mp_ioapics[MAX_IO_APICS]; 117struct mp_config_ioapic mp_ioapics[MAX_IO_APICS];
113int nr_ioapics; 118int nr_ioapics;
@@ -303,7 +308,12 @@ static void __target_IO_APIC_irq(unsigned int irq, unsigned int dest, u8 vector)
303 pin = entry->pin; 308 pin = entry->pin;
304 if (pin == -1) 309 if (pin == -1)
305 break; 310 break;
306 io_apic_write(apic, 0x11 + pin*2, dest); 311 /*
312 * With interrupt-remapping, destination information comes
313 * from interrupt-remapping table entry.
314 */
315 if (!irq_remapped(irq))
316 io_apic_write(apic, 0x11 + pin*2, dest);
307 reg = io_apic_read(apic, 0x10 + pin*2); 317 reg = io_apic_read(apic, 0x10 + pin*2);
308 reg &= ~IO_APIC_REDIR_VECTOR_MASK; 318 reg &= ~IO_APIC_REDIR_VECTOR_MASK;
309 reg |= vector; 319 reg |= vector;
@@ -440,6 +450,69 @@ static void clear_IO_APIC (void)
440 clear_IO_APIC_pin(apic, pin); 450 clear_IO_APIC_pin(apic, pin);
441} 451}
442 452
453/*
454 * Saves and masks all the unmasked IO-APIC RTE's
455 */
456int save_mask_IO_APIC_setup(void)
457{
458 union IO_APIC_reg_01 reg_01;
459 unsigned long flags;
460 int apic, pin;
461
462 /*
463 * The number of IO-APIC IRQ registers (== #pins):
464 */
465 for (apic = 0; apic < nr_ioapics; apic++) {
466 spin_lock_irqsave(&ioapic_lock, flags);
467 reg_01.raw = io_apic_read(apic, 1);
468 spin_unlock_irqrestore(&ioapic_lock, flags);
469 nr_ioapic_registers[apic] = reg_01.bits.entries+1;
470 }
471
472 for (apic = 0; apic < nr_ioapics; apic++) {
473 early_ioapic_entries[apic] =
474 kzalloc(sizeof(struct IO_APIC_route_entry) *
475 nr_ioapic_registers[apic], GFP_KERNEL);
476 if (!early_ioapic_entries[apic])
477 return -ENOMEM;
478 }
479
480 for (apic = 0; apic < nr_ioapics; apic++)
481 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) {
482 struct IO_APIC_route_entry entry;
483
484 entry = early_ioapic_entries[apic][pin] =
485 ioapic_read_entry(apic, pin);
486 if (!entry.mask) {
487 entry.mask = 1;
488 ioapic_write_entry(apic, pin, entry);
489 }
490 }
491 return 0;
492}
493
494void restore_IO_APIC_setup(void)
495{
496 int apic, pin;
497
498 for (apic = 0; apic < nr_ioapics; apic++)
499 for (pin = 0; pin < nr_ioapic_registers[apic]; pin++)
500 ioapic_write_entry(apic, pin,
501 early_ioapic_entries[apic][pin]);
502}
503
504void reinit_intr_remapped_IO_APIC(int intr_remapping)
505{
506 /*
507 * for now plain restore of previous settings.
508 * TBD: In the case of OS enabling interrupt-remapping,
509 * IO-APIC RTE's need to be setup to point to interrupt-remapping
510 * table entries. for now, do a plain restore, and wait for
511 * the setup_IO_APIC_irqs() to do proper initialization.
512 */
513 restore_IO_APIC_setup();
514}
515
443int skip_ioapic_setup; 516int skip_ioapic_setup;
444int ioapic_force; 517int ioapic_force;
445 518
@@ -839,18 +912,98 @@ void __setup_vector_irq(int cpu)
839} 912}
840 913
841static struct irq_chip ioapic_chip; 914static struct irq_chip ioapic_chip;
915#ifdef CONFIG_INTR_REMAP
916static struct irq_chip ir_ioapic_chip;
917#endif
842 918
843static void ioapic_register_intr(int irq, unsigned long trigger) 919static void ioapic_register_intr(int irq, unsigned long trigger)
844{ 920{
845 if (trigger) { 921 if (trigger)
846 irq_desc[irq].status |= IRQ_LEVEL; 922 irq_desc[irq].status |= IRQ_LEVEL;
847 set_irq_chip_and_handler_name(irq, &ioapic_chip, 923 else
848 handle_fasteoi_irq, "fasteoi");
849 } else {
850 irq_desc[irq].status &= ~IRQ_LEVEL; 924 irq_desc[irq].status &= ~IRQ_LEVEL;
925
926#ifdef CONFIG_INTR_REMAP
927 if (irq_remapped(irq)) {
928 irq_desc[irq].status |= IRQ_MOVE_PCNTXT;
929 if (trigger)
930 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
931 handle_fasteoi_irq,
932 "fasteoi");
933 else
934 set_irq_chip_and_handler_name(irq, &ir_ioapic_chip,
935 handle_edge_irq, "edge");
936 return;
937 }
938#endif
939 if (trigger)
940 set_irq_chip_and_handler_name(irq, &ioapic_chip,
941 handle_fasteoi_irq,
942 "fasteoi");
943 else
851 set_irq_chip_and_handler_name(irq, &ioapic_chip, 944 set_irq_chip_and_handler_name(irq, &ioapic_chip,
852 handle_edge_irq, "edge"); 945 handle_edge_irq, "edge");
946}
947
948static int setup_ioapic_entry(int apic, int irq,
949 struct IO_APIC_route_entry *entry,
950 unsigned int destination, int trigger,
951 int polarity, int vector)
952{
953 /*
954 * add it to the IO-APIC irq-routing table:
955 */
956 memset(entry,0,sizeof(*entry));
957
958#ifdef CONFIG_INTR_REMAP
959 if (intr_remapping_enabled) {
960 struct intel_iommu *iommu = map_ioapic_to_ir(apic);
961 struct irte irte;
962 struct IR_IO_APIC_route_entry *ir_entry =
963 (struct IR_IO_APIC_route_entry *) entry;
964 int index;
965
966 if (!iommu)
967 panic("No mapping iommu for ioapic %d\n", apic);
968
969 index = alloc_irte(iommu, irq, 1);
970 if (index < 0)
971 panic("Failed to allocate IRTE for ioapic %d\n", apic);
972
973 memset(&irte, 0, sizeof(irte));
974
975 irte.present = 1;
976 irte.dst_mode = INT_DEST_MODE;
977 irte.trigger_mode = trigger;
978 irte.dlvry_mode = INT_DELIVERY_MODE;
979 irte.vector = vector;
980 irte.dest_id = IRTE_DEST(destination);
981
982 modify_irte(irq, &irte);
983
984 ir_entry->index2 = (index >> 15) & 0x1;
985 ir_entry->zero = 0;
986 ir_entry->format = 1;
987 ir_entry->index = (index & 0x7fff);
988 } else
989#endif
990 {
991 entry->delivery_mode = INT_DELIVERY_MODE;
992 entry->dest_mode = INT_DEST_MODE;
993 entry->dest = destination;
853 } 994 }
995
996 entry->mask = 0; /* enable IRQ */
997 entry->trigger = trigger;
998 entry->polarity = polarity;
999 entry->vector = vector;
1000
1001 /* Mask level triggered irqs.
1002 * Use IRQ_DELAYED_DISABLE for edge triggered irqs.
1003 */
1004 if (trigger)
1005 entry->mask = 1;
1006 return 0;
854} 1007}
855 1008
856static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq, 1009static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
@@ -875,24 +1028,15 @@ static void setup_IO_APIC_irq(int apic, int pin, unsigned int irq,
875 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector, 1028 apic, mp_ioapics[apic].mp_apicid, pin, cfg->vector,
876 irq, trigger, polarity); 1029 irq, trigger, polarity);
877 1030
878 /*
879 * add it to the IO-APIC irq-routing table:
880 */
881 memset(&entry,0,sizeof(entry));
882
883 entry.delivery_mode = INT_DELIVERY_MODE;
884 entry.dest_mode = INT_DEST_MODE;
885 entry.dest = cpu_mask_to_apicid(mask);
886 entry.mask = 0; /* enable IRQ */
887 entry.trigger = trigger;
888 entry.polarity = polarity;
889 entry.vector = cfg->vector;
890 1031
891 /* Mask level triggered irqs. 1032 if (setup_ioapic_entry(mp_ioapics[apic].mp_apicid, irq, &entry,
892 * Use IRQ_DELAYED_DISABLE for edge triggered irqs. 1033 cpu_mask_to_apicid(mask), trigger, polarity,
893 */ 1034 cfg->vector)) {
894 if (trigger) 1035 printk("Failed to setup ioapic entry for ioapic %d, pin %d\n",
895 entry.mask = 1; 1036 mp_ioapics[apic].mp_apicid, pin);
1037 __clear_irq_vector(irq);
1038 return;
1039 }
896 1040
897 ioapic_register_intr(irq, trigger); 1041 ioapic_register_intr(irq, trigger);
898 if (irq < 16) 1042 if (irq < 16)
@@ -944,6 +1088,9 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
944{ 1088{
945 struct IO_APIC_route_entry entry; 1089 struct IO_APIC_route_entry entry;
946 1090
1091 if (intr_remapping_enabled)
1092 return;
1093
947 memset(&entry, 0, sizeof(entry)); 1094 memset(&entry, 0, sizeof(entry));
948 1095
949 /* 1096 /*
@@ -970,7 +1117,8 @@ static void __init setup_timer_IRQ0_pin(unsigned int apic, unsigned int pin,
970 ioapic_write_entry(apic, pin, entry); 1117 ioapic_write_entry(apic, pin, entry);
971} 1118}
972 1119
973void __apicdebuginit print_IO_APIC(void) 1120
1121__apicdebuginit(void) print_IO_APIC(void)
974{ 1122{
975 int apic, i; 1123 int apic, i;
976 union IO_APIC_reg_00 reg_00; 1124 union IO_APIC_reg_00 reg_00;
@@ -1064,9 +1212,7 @@ void __apicdebuginit print_IO_APIC(void)
1064 return; 1212 return;
1065} 1213}
1066 1214
1067#if 0 1215__apicdebuginit(void) print_APIC_bitfield(int base)
1068
1069static __apicdebuginit void print_APIC_bitfield (int base)
1070{ 1216{
1071 unsigned int v; 1217 unsigned int v;
1072 int i, j; 1218 int i, j;
@@ -1087,9 +1233,10 @@ static __apicdebuginit void print_APIC_bitfield (int base)
1087 } 1233 }
1088} 1234}
1089 1235
1090void __apicdebuginit print_local_APIC(void * dummy) 1236__apicdebuginit(void) print_local_APIC(void *dummy)
1091{ 1237{
1092 unsigned int v, ver, maxlvt; 1238 unsigned int v, ver, maxlvt;
1239 unsigned long icr;
1093 1240
1094 if (apic_verbosity == APIC_QUIET) 1241 if (apic_verbosity == APIC_QUIET)
1095 return; 1242 return;
@@ -1097,7 +1244,7 @@ void __apicdebuginit print_local_APIC(void * dummy)
1097 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n", 1244 printk("\n" KERN_DEBUG "printing local APIC contents on CPU#%d/%d:\n",
1098 smp_processor_id(), hard_smp_processor_id()); 1245 smp_processor_id(), hard_smp_processor_id());
1099 v = apic_read(APIC_ID); 1246 v = apic_read(APIC_ID);
1100 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, GET_APIC_ID(read_apic_id())); 1247 printk(KERN_INFO "... APIC ID: %08x (%01x)\n", v, read_apic_id());
1101 v = apic_read(APIC_LVR); 1248 v = apic_read(APIC_LVR);
1102 printk(KERN_INFO "... APIC VERSION: %08x\n", v); 1249 printk(KERN_INFO "... APIC VERSION: %08x\n", v);
1103 ver = GET_APIC_VERSION(v); 1250 ver = GET_APIC_VERSION(v);
@@ -1133,10 +1280,9 @@ void __apicdebuginit print_local_APIC(void * dummy)
1133 v = apic_read(APIC_ESR); 1280 v = apic_read(APIC_ESR);
1134 printk(KERN_DEBUG "... APIC ESR: %08x\n", v); 1281 printk(KERN_DEBUG "... APIC ESR: %08x\n", v);
1135 1282
1136 v = apic_read(APIC_ICR); 1283 icr = apic_icr_read();
1137 printk(KERN_DEBUG "... APIC ICR: %08x\n", v); 1284 printk(KERN_DEBUG "... APIC ICR: %08x\n", icr);
1138 v = apic_read(APIC_ICR2); 1285 printk(KERN_DEBUG "... APIC ICR2: %08x\n", icr >> 32);
1139 printk(KERN_DEBUG "... APIC ICR2: %08x\n", v);
1140 1286
1141 v = apic_read(APIC_LVTT); 1287 v = apic_read(APIC_LVTT);
1142 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v); 1288 printk(KERN_DEBUG "... APIC LVTT: %08x\n", v);
@@ -1164,12 +1310,12 @@ void __apicdebuginit print_local_APIC(void * dummy)
1164 printk("\n"); 1310 printk("\n");
1165} 1311}
1166 1312
1167void print_all_local_APICs (void) 1313__apicdebuginit(void) print_all_local_APICs(void)
1168{ 1314{
1169 on_each_cpu(print_local_APIC, NULL, 1); 1315 on_each_cpu(print_local_APIC, NULL, 1);
1170} 1316}
1171 1317
1172void __apicdebuginit print_PIC(void) 1318__apicdebuginit(void) print_PIC(void)
1173{ 1319{
1174 unsigned int v; 1320 unsigned int v;
1175 unsigned long flags; 1321 unsigned long flags;
@@ -1201,7 +1347,17 @@ void __apicdebuginit print_PIC(void)
1201 printk(KERN_DEBUG "... PIC ELCR: %04x\n", v); 1347 printk(KERN_DEBUG "... PIC ELCR: %04x\n", v);
1202} 1348}
1203 1349
1204#endif /* 0 */ 1350__apicdebuginit(int) print_all_ICs(void)
1351{
1352 print_PIC();
1353 print_all_local_APICs();
1354 print_IO_APIC();
1355
1356 return 0;
1357}
1358
1359fs_initcall(print_all_ICs);
1360
1205 1361
1206void __init enable_IO_APIC(void) 1362void __init enable_IO_APIC(void)
1207{ 1363{
@@ -1291,7 +1447,7 @@ void disable_IO_APIC(void)
1291 entry.dest_mode = 0; /* Physical */ 1447 entry.dest_mode = 0; /* Physical */
1292 entry.delivery_mode = dest_ExtINT; /* ExtInt */ 1448 entry.delivery_mode = dest_ExtINT; /* ExtInt */
1293 entry.vector = 0; 1449 entry.vector = 0;
1294 entry.dest = GET_APIC_ID(read_apic_id()); 1450 entry.dest = read_apic_id();
1295 1451
1296 /* 1452 /*
1297 * Add it to the IO-APIC irq-routing table: 1453 * Add it to the IO-APIC irq-routing table:
@@ -1397,6 +1553,147 @@ static int ioapic_retrigger_irq(unsigned int irq)
1397 */ 1553 */
1398 1554
1399#ifdef CONFIG_SMP 1555#ifdef CONFIG_SMP
1556
1557#ifdef CONFIG_INTR_REMAP
1558static void ir_irq_migration(struct work_struct *work);
1559
1560static DECLARE_DELAYED_WORK(ir_migration_work, ir_irq_migration);
1561
1562/*
1563 * Migrate the IO-APIC irq in the presence of intr-remapping.
1564 *
1565 * For edge triggered, irq migration is a simple atomic update(of vector
1566 * and cpu destination) of IRTE and flush the hardware cache.
1567 *
1568 * For level triggered, we need to modify the io-apic RTE aswell with the update
1569 * vector information, along with modifying IRTE with vector and destination.
1570 * So irq migration for level triggered is little bit more complex compared to
1571 * edge triggered migration. But the good news is, we use the same algorithm
1572 * for level triggered migration as we have today, only difference being,
1573 * we now initiate the irq migration from process context instead of the
1574 * interrupt context.
1575 *
1576 * In future, when we do a directed EOI (combined with cpu EOI broadcast
1577 * suppression) to the IO-APIC, level triggered irq migration will also be
1578 * as simple as edge triggered migration and we can do the irq migration
1579 * with a simple atomic update to IO-APIC RTE.
1580 */
1581static void migrate_ioapic_irq(int irq, cpumask_t mask)
1582{
1583 struct irq_cfg *cfg = irq_cfg + irq;
1584 struct irq_desc *desc = irq_desc + irq;
1585 cpumask_t tmp, cleanup_mask;
1586 struct irte irte;
1587 int modify_ioapic_rte = desc->status & IRQ_LEVEL;
1588 unsigned int dest;
1589 unsigned long flags;
1590
1591 cpus_and(tmp, mask, cpu_online_map);
1592 if (cpus_empty(tmp))
1593 return;
1594
1595 if (get_irte(irq, &irte))
1596 return;
1597
1598 if (assign_irq_vector(irq, mask))
1599 return;
1600
1601 cpus_and(tmp, cfg->domain, mask);
1602 dest = cpu_mask_to_apicid(tmp);
1603
1604 if (modify_ioapic_rte) {
1605 spin_lock_irqsave(&ioapic_lock, flags);
1606 __target_IO_APIC_irq(irq, dest, cfg->vector);
1607 spin_unlock_irqrestore(&ioapic_lock, flags);
1608 }
1609
1610 irte.vector = cfg->vector;
1611 irte.dest_id = IRTE_DEST(dest);
1612
1613 /*
1614 * Modified the IRTE and flushes the Interrupt entry cache.
1615 */
1616 modify_irte(irq, &irte);
1617
1618 if (cfg->move_in_progress) {
1619 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
1620 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
1621 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
1622 cfg->move_in_progress = 0;
1623 }
1624
1625 irq_desc[irq].affinity = mask;
1626}
1627
1628static int migrate_irq_remapped_level(int irq)
1629{
1630 int ret = -1;
1631
1632 mask_IO_APIC_irq(irq);
1633
1634 if (io_apic_level_ack_pending(irq)) {
1635 /*
1636 * Interrupt in progress. Migrating irq now will change the
1637 * vector information in the IO-APIC RTE and that will confuse
1638 * the EOI broadcast performed by cpu.
1639 * So, delay the irq migration to the next instance.
1640 */
1641 schedule_delayed_work(&ir_migration_work, 1);
1642 goto unmask;
1643 }
1644
1645 /* everthing is clear. we have right of way */
1646 migrate_ioapic_irq(irq, irq_desc[irq].pending_mask);
1647
1648 ret = 0;
1649 irq_desc[irq].status &= ~IRQ_MOVE_PENDING;
1650 cpus_clear(irq_desc[irq].pending_mask);
1651
1652unmask:
1653 unmask_IO_APIC_irq(irq);
1654 return ret;
1655}
1656
1657static void ir_irq_migration(struct work_struct *work)
1658{
1659 int irq;
1660
1661 for (irq = 0; irq < NR_IRQS; irq++) {
1662 struct irq_desc *desc = irq_desc + irq;
1663 if (desc->status & IRQ_MOVE_PENDING) {
1664 unsigned long flags;
1665
1666 spin_lock_irqsave(&desc->lock, flags);
1667 if (!desc->chip->set_affinity ||
1668 !(desc->status & IRQ_MOVE_PENDING)) {
1669 desc->status &= ~IRQ_MOVE_PENDING;
1670 spin_unlock_irqrestore(&desc->lock, flags);
1671 continue;
1672 }
1673
1674 desc->chip->set_affinity(irq,
1675 irq_desc[irq].pending_mask);
1676 spin_unlock_irqrestore(&desc->lock, flags);
1677 }
1678 }
1679}
1680
1681/*
1682 * Migrates the IRQ destination in the process context.
1683 */
1684static void set_ir_ioapic_affinity_irq(unsigned int irq, cpumask_t mask)
1685{
1686 if (irq_desc[irq].status & IRQ_LEVEL) {
1687 irq_desc[irq].status |= IRQ_MOVE_PENDING;
1688 irq_desc[irq].pending_mask = mask;
1689 migrate_irq_remapped_level(irq);
1690 return;
1691 }
1692
1693 migrate_ioapic_irq(irq, mask);
1694}
1695#endif
1696
1400asmlinkage void smp_irq_move_cleanup_interrupt(void) 1697asmlinkage void smp_irq_move_cleanup_interrupt(void)
1401{ 1698{
1402 unsigned vector, me; 1699 unsigned vector, me;
@@ -1453,6 +1750,17 @@ static void irq_complete_move(unsigned int irq)
1453#else 1750#else
1454static inline void irq_complete_move(unsigned int irq) {} 1751static inline void irq_complete_move(unsigned int irq) {}
1455#endif 1752#endif
1753#ifdef CONFIG_INTR_REMAP
1754static void ack_x2apic_level(unsigned int irq)
1755{
1756 ack_x2APIC_irq();
1757}
1758
1759static void ack_x2apic_edge(unsigned int irq)
1760{
1761 ack_x2APIC_irq();
1762}
1763#endif
1456 1764
1457static void ack_apic_edge(unsigned int irq) 1765static void ack_apic_edge(unsigned int irq)
1458{ 1766{
@@ -1527,6 +1835,21 @@ static struct irq_chip ioapic_chip __read_mostly = {
1527 .retrigger = ioapic_retrigger_irq, 1835 .retrigger = ioapic_retrigger_irq,
1528}; 1836};
1529 1837
1838#ifdef CONFIG_INTR_REMAP
1839static struct irq_chip ir_ioapic_chip __read_mostly = {
1840 .name = "IR-IO-APIC",
1841 .startup = startup_ioapic_irq,
1842 .mask = mask_IO_APIC_irq,
1843 .unmask = unmask_IO_APIC_irq,
1844 .ack = ack_x2apic_edge,
1845 .eoi = ack_x2apic_level,
1846#ifdef CONFIG_SMP
1847 .set_affinity = set_ir_ioapic_affinity_irq,
1848#endif
1849 .retrigger = ioapic_retrigger_irq,
1850};
1851#endif
1852
1530static inline void init_IO_APIC_traps(void) 1853static inline void init_IO_APIC_traps(void)
1531{ 1854{
1532 int irq; 1855 int irq;
@@ -1712,6 +2035,8 @@ static inline void __init check_timer(void)
1712 * 8259A. 2035 * 8259A.
1713 */ 2036 */
1714 if (pin1 == -1) { 2037 if (pin1 == -1) {
2038 if (intr_remapping_enabled)
2039 panic("BIOS bug: timer not connected to IO-APIC");
1715 pin1 = pin2; 2040 pin1 = pin2;
1716 apic1 = apic2; 2041 apic1 = apic2;
1717 no_pin1 = 1; 2042 no_pin1 = 1;
@@ -1738,6 +2063,8 @@ static inline void __init check_timer(void)
1738 clear_IO_APIC_pin(0, pin1); 2063 clear_IO_APIC_pin(0, pin1);
1739 goto out; 2064 goto out;
1740 } 2065 }
2066 if (intr_remapping_enabled)
2067 panic("timer doesn't work through Interrupt-remapped IO-APIC");
1741 clear_IO_APIC_pin(apic1, pin1); 2068 clear_IO_APIC_pin(apic1, pin1);
1742 if (!no_pin1) 2069 if (!no_pin1)
1743 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: " 2070 apic_printk(APIC_QUIET, KERN_ERR "..MP-BIOS bug: "
@@ -1854,8 +2181,6 @@ void __init setup_IO_APIC(void)
1854 setup_IO_APIC_irqs(); 2181 setup_IO_APIC_irqs();
1855 init_IO_APIC_traps(); 2182 init_IO_APIC_traps();
1856 check_timer(); 2183 check_timer();
1857 if (!acpi_ioapic)
1858 print_IO_APIC();
1859} 2184}
1860 2185
1861struct sysfs_ioapic_data { 2186struct sysfs_ioapic_data {
@@ -1977,6 +2302,9 @@ void destroy_irq(unsigned int irq)
1977 2302
1978 dynamic_irq_cleanup(irq); 2303 dynamic_irq_cleanup(irq);
1979 2304
2305#ifdef CONFIG_INTR_REMAP
2306 free_irte(irq);
2307#endif
1980 spin_lock_irqsave(&vector_lock, flags); 2308 spin_lock_irqsave(&vector_lock, flags);
1981 __clear_irq_vector(irq); 2309 __clear_irq_vector(irq);
1982 spin_unlock_irqrestore(&vector_lock, flags); 2310 spin_unlock_irqrestore(&vector_lock, flags);
@@ -1995,11 +2323,42 @@ static int msi_compose_msg(struct pci_dev *pdev, unsigned int irq, struct msi_ms
1995 2323
1996 tmp = TARGET_CPUS; 2324 tmp = TARGET_CPUS;
1997 err = assign_irq_vector(irq, tmp); 2325 err = assign_irq_vector(irq, tmp);
1998 if (!err) { 2326 if (err)
1999 cpus_and(tmp, cfg->domain, tmp); 2327 return err;
2000 dest = cpu_mask_to_apicid(tmp); 2328
2329 cpus_and(tmp, cfg->domain, tmp);
2330 dest = cpu_mask_to_apicid(tmp);
2331
2332#ifdef CONFIG_INTR_REMAP
2333 if (irq_remapped(irq)) {
2334 struct irte irte;
2335 int ir_index;
2336 u16 sub_handle;
2337
2338 ir_index = map_irq_to_irte_handle(irq, &sub_handle);
2339 BUG_ON(ir_index == -1);
2340
2341 memset (&irte, 0, sizeof(irte));
2342
2343 irte.present = 1;
2344 irte.dst_mode = INT_DEST_MODE;
2345 irte.trigger_mode = 0; /* edge */
2346 irte.dlvry_mode = INT_DELIVERY_MODE;
2347 irte.vector = cfg->vector;
2348 irte.dest_id = IRTE_DEST(dest);
2349
2350 modify_irte(irq, &irte);
2001 2351
2002 msg->address_hi = MSI_ADDR_BASE_HI; 2352 msg->address_hi = MSI_ADDR_BASE_HI;
2353 msg->data = sub_handle;
2354 msg->address_lo = MSI_ADDR_BASE_LO | MSI_ADDR_IR_EXT_INT |
2355 MSI_ADDR_IR_SHV |
2356 MSI_ADDR_IR_INDEX1(ir_index) |
2357 MSI_ADDR_IR_INDEX2(ir_index);
2358 } else
2359#endif
2360 {
2361 msg->address_hi = MSI_ADDR_BASE_HI;
2003 msg->address_lo = 2362 msg->address_lo =
2004 MSI_ADDR_BASE_LO | 2363 MSI_ADDR_BASE_LO |
2005 ((INT_DEST_MODE == 0) ? 2364 ((INT_DEST_MODE == 0) ?
@@ -2049,6 +2408,55 @@ static void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2049 write_msi_msg(irq, &msg); 2408 write_msi_msg(irq, &msg);
2050 irq_desc[irq].affinity = mask; 2409 irq_desc[irq].affinity = mask;
2051} 2410}
2411
2412#ifdef CONFIG_INTR_REMAP
2413/*
2414 * Migrate the MSI irq to another cpumask. This migration is
2415 * done in the process context using interrupt-remapping hardware.
2416 */
2417static void ir_set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
2418{
2419 struct irq_cfg *cfg = irq_cfg + irq;
2420 unsigned int dest;
2421 cpumask_t tmp, cleanup_mask;
2422 struct irte irte;
2423
2424 cpus_and(tmp, mask, cpu_online_map);
2425 if (cpus_empty(tmp))
2426 return;
2427
2428 if (get_irte(irq, &irte))
2429 return;
2430
2431 if (assign_irq_vector(irq, mask))
2432 return;
2433
2434 cpus_and(tmp, cfg->domain, mask);
2435 dest = cpu_mask_to_apicid(tmp);
2436
2437 irte.vector = cfg->vector;
2438 irte.dest_id = IRTE_DEST(dest);
2439
2440 /*
2441 * atomically update the IRTE with the new destination and vector.
2442 */
2443 modify_irte(irq, &irte);
2444
2445 /*
2446 * After this point, all the interrupts will start arriving
2447 * at the new destination. So, time to cleanup the previous
2448 * vector allocation.
2449 */
2450 if (cfg->move_in_progress) {
2451 cpus_and(cleanup_mask, cfg->old_domain, cpu_online_map);
2452 cfg->move_cleanup_count = cpus_weight(cleanup_mask);
2453 send_IPI_mask(cleanup_mask, IRQ_MOVE_CLEANUP_VECTOR);
2454 cfg->move_in_progress = 0;
2455 }
2456
2457 irq_desc[irq].affinity = mask;
2458}
2459#endif
2052#endif /* CONFIG_SMP */ 2460#endif /* CONFIG_SMP */
2053 2461
2054/* 2462/*
@@ -2066,26 +2474,157 @@ static struct irq_chip msi_chip = {
2066 .retrigger = ioapic_retrigger_irq, 2474 .retrigger = ioapic_retrigger_irq,
2067}; 2475};
2068 2476
2069int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) 2477#ifdef CONFIG_INTR_REMAP
2478static struct irq_chip msi_ir_chip = {
2479 .name = "IR-PCI-MSI",
2480 .unmask = unmask_msi_irq,
2481 .mask = mask_msi_irq,
2482 .ack = ack_x2apic_edge,
2483#ifdef CONFIG_SMP
2484 .set_affinity = ir_set_msi_irq_affinity,
2485#endif
2486 .retrigger = ioapic_retrigger_irq,
2487};
2488
2489/*
2490 * Map the PCI dev to the corresponding remapping hardware unit
2491 * and allocate 'nvec' consecutive interrupt-remapping table entries
2492 * in it.
2493 */
2494static int msi_alloc_irte(struct pci_dev *dev, int irq, int nvec)
2070{ 2495{
2496 struct intel_iommu *iommu;
2497 int index;
2498
2499 iommu = map_dev_to_ir(dev);
2500 if (!iommu) {
2501 printk(KERN_ERR
2502 "Unable to map PCI %s to iommu\n", pci_name(dev));
2503 return -ENOENT;
2504 }
2505
2506 index = alloc_irte(iommu, irq, nvec);
2507 if (index < 0) {
2508 printk(KERN_ERR
2509 "Unable to allocate %d IRTE for PCI %s\n", nvec,
2510 pci_name(dev));
2511 return -ENOSPC;
2512 }
2513 return index;
2514}
2515#endif
2516
2517static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc, int irq)
2518{
2519 int ret;
2071 struct msi_msg msg; 2520 struct msi_msg msg;
2521
2522 ret = msi_compose_msg(dev, irq, &msg);
2523 if (ret < 0)
2524 return ret;
2525
2526 set_irq_msi(irq, desc);
2527 write_msi_msg(irq, &msg);
2528
2529#ifdef CONFIG_INTR_REMAP
2530 if (irq_remapped(irq)) {
2531 struct irq_desc *desc = irq_desc + irq;
2532 /*
2533 * irq migration in process context
2534 */
2535 desc->status |= IRQ_MOVE_PCNTXT;
2536 set_irq_chip_and_handler_name(irq, &msi_ir_chip, handle_edge_irq, "edge");
2537 } else
2538#endif
2539 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge");
2540
2541 return 0;
2542}
2543
2544int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
2545{
2072 int irq, ret; 2546 int irq, ret;
2547
2073 irq = create_irq(); 2548 irq = create_irq();
2074 if (irq < 0) 2549 if (irq < 0)
2075 return irq; 2550 return irq;
2076 2551
2077 ret = msi_compose_msg(dev, irq, &msg); 2552#ifdef CONFIG_INTR_REMAP
2553 if (!intr_remapping_enabled)
2554 goto no_ir;
2555
2556 ret = msi_alloc_irte(dev, irq, 1);
2557 if (ret < 0)
2558 goto error;
2559no_ir:
2560#endif
2561 ret = setup_msi_irq(dev, desc, irq);
2078 if (ret < 0) { 2562 if (ret < 0) {
2079 destroy_irq(irq); 2563 destroy_irq(irq);
2080 return ret; 2564 return ret;
2081 } 2565 }
2566 return 0;
2082 2567
2083 set_irq_msi(irq, desc); 2568#ifdef CONFIG_INTR_REMAP
2084 write_msi_msg(irq, &msg); 2569error:
2570 destroy_irq(irq);
2571 return ret;
2572#endif
2573}
2085 2574
2086 set_irq_chip_and_handler_name(irq, &msi_chip, handle_edge_irq, "edge"); 2575int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
2576{
2577 int irq, ret, sub_handle;
2578 struct msi_desc *desc;
2579#ifdef CONFIG_INTR_REMAP
2580 struct intel_iommu *iommu = 0;
2581 int index = 0;
2582#endif
2583
2584 sub_handle = 0;
2585 list_for_each_entry(desc, &dev->msi_list, list) {
2586 irq = create_irq();
2587 if (irq < 0)
2588 return irq;
2589#ifdef CONFIG_INTR_REMAP
2590 if (!intr_remapping_enabled)
2591 goto no_ir;
2087 2592
2593 if (!sub_handle) {
2594 /*
2595 * allocate the consecutive block of IRTE's
2596 * for 'nvec'
2597 */
2598 index = msi_alloc_irte(dev, irq, nvec);
2599 if (index < 0) {
2600 ret = index;
2601 goto error;
2602 }
2603 } else {
2604 iommu = map_dev_to_ir(dev);
2605 if (!iommu) {
2606 ret = -ENOENT;
2607 goto error;
2608 }
2609 /*
2610 * setup the mapping between the irq and the IRTE
2611 * base index, the sub_handle pointing to the
2612 * appropriate interrupt remap table entry.
2613 */
2614 set_irte_irq(irq, iommu, index, sub_handle);
2615 }
2616no_ir:
2617#endif
2618 ret = setup_msi_irq(dev, desc, irq);
2619 if (ret < 0)
2620 goto error;
2621 sub_handle++;
2622 }
2088 return 0; 2623 return 0;
2624
2625error:
2626 destroy_irq(irq);
2627 return ret;
2089} 2628}
2090 2629
2091void arch_teardown_msi_irq(unsigned int irq) 2630void arch_teardown_msi_irq(unsigned int irq)
@@ -2333,6 +2872,10 @@ void __init setup_ioapic_dest(void)
2333 setup_IO_APIC_irq(ioapic, pin, irq, 2872 setup_IO_APIC_irq(ioapic, pin, irq,
2334 irq_trigger(irq_entry), 2873 irq_trigger(irq_entry),
2335 irq_polarity(irq_entry)); 2874 irq_polarity(irq_entry));
2875#ifdef CONFIG_INTR_REMAP
2876 else if (intr_remapping_enabled)
2877 set_ir_ioapic_affinity_irq(irq, TARGET_CPUS);
2878#endif
2336 else 2879 else
2337 set_ioapic_affinity_irq(irq, TARGET_CPUS); 2880 set_ioapic_affinity_irq(irq, TARGET_CPUS);
2338 } 2881 }
diff --git a/arch/x86/kernel/irqinit_32.c b/arch/x86/kernel/irqinit_32.c
index d66914287ee1..9200a1e2752d 100644
--- a/arch/x86/kernel/irqinit_32.c
+++ b/arch/x86/kernel/irqinit_32.c
@@ -74,6 +74,15 @@ void __init init_ISA_irqs (void)
74 } 74 }
75} 75}
76 76
77/*
78 * IRQ2 is cascade interrupt to second interrupt controller
79 */
80static struct irqaction irq2 = {
81 .handler = no_action,
82 .mask = CPU_MASK_NONE,
83 .name = "cascade",
84};
85
77/* Overridden in paravirt.c */ 86/* Overridden in paravirt.c */
78void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ"))); 87void init_IRQ(void) __attribute__((weak, alias("native_init_IRQ")));
79 88
@@ -98,6 +107,46 @@ void __init native_init_IRQ(void)
98 set_intr_gate(vector, interrupt[i]); 107 set_intr_gate(vector, interrupt[i]);
99 } 108 }
100 109
110#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_SMP)
111 /*
112 * IRQ0 must be given a fixed assignment and initialized,
113 * because it's used before the IO-APIC is set up.
114 */
115 set_intr_gate(FIRST_DEVICE_VECTOR, interrupt[0]);
116
117 /*
118 * The reschedule interrupt is a CPU-to-CPU reschedule-helper
119 * IPI, driven by wakeup.
120 */
121 alloc_intr_gate(RESCHEDULE_VECTOR, reschedule_interrupt);
122
123 /* IPI for invalidation */
124 alloc_intr_gate(INVALIDATE_TLB_VECTOR, invalidate_interrupt);
125
126 /* IPI for generic function call */
127 alloc_intr_gate(CALL_FUNCTION_VECTOR, call_function_interrupt);
128
129 /* IPI for single call function */
130 set_intr_gate(CALL_FUNCTION_SINGLE_VECTOR, call_function_single_interrupt);
131#endif
132
133#ifdef CONFIG_X86_LOCAL_APIC
134 /* self generated IPI for local APIC timer */
135 alloc_intr_gate(LOCAL_TIMER_VECTOR, apic_timer_interrupt);
136
137 /* IPI vectors for APIC spurious and error interrupts */
138 alloc_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt);
139 alloc_intr_gate(ERROR_APIC_VECTOR, error_interrupt);
140#endif
141
142#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86_MCE_P4THERMAL)
143 /* thermal monitor LVT interrupt */
144 alloc_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
145#endif
146
147 if (!acpi_ioapic)
148 setup_irq(2, &irq2);
149
101 /* setup after call gates are initialised (usually add in 150 /* setup after call gates are initialised (usually add in
102 * the architecture specific gates) 151 * the architecture specific gates)
103 */ 152 */
diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
index b3fb430725cb..f98f4e1dba09 100644
--- a/arch/x86/kernel/mpparse.c
+++ b/arch/x86/kernel/mpparse.c
@@ -397,7 +397,9 @@ static int __init smp_read_mpc(struct mp_config_table *mpc, unsigned early)
397 generic_bigsmp_probe(); 397 generic_bigsmp_probe();
398#endif 398#endif
399 399
400#ifdef CONFIG_X86_32
400 setup_apic_routing(); 401 setup_apic_routing();
402#endif
401 if (!num_processors) 403 if (!num_processors)
402 printk(KERN_ERR "MPTABLE: no processors registered!\n"); 404 printk(KERN_ERR "MPTABLE: no processors registered!\n");
403 return num_processors; 405 return num_processors;
diff --git a/arch/x86/kernel/numaq_32.c b/arch/x86/kernel/numaq_32.c
index eecc8c18f010..4caff39078e0 100644
--- a/arch/x86/kernel/numaq_32.c
+++ b/arch/x86/kernel/numaq_32.c
@@ -229,6 +229,12 @@ static void __init smp_read_mpc_oem(struct mp_config_oemtable *oemtable,
229 } 229 }
230} 230}
231 231
232static int __init numaq_setup_ioapic_ids(void)
233{
234 /* so can skip it */
235 return 1;
236}
237
232static struct x86_quirks numaq_x86_quirks __initdata = { 238static struct x86_quirks numaq_x86_quirks __initdata = {
233 .arch_pre_time_init = numaq_pre_time_init, 239 .arch_pre_time_init = numaq_pre_time_init,
234 .arch_time_init = NULL, 240 .arch_time_init = NULL,
@@ -243,6 +249,7 @@ static struct x86_quirks numaq_x86_quirks __initdata = {
243 .mpc_oem_bus_info = mpc_oem_bus_info, 249 .mpc_oem_bus_info = mpc_oem_bus_info,
244 .mpc_oem_pci_bus = mpc_oem_pci_bus, 250 .mpc_oem_pci_bus = mpc_oem_pci_bus,
245 .smp_read_mpc_oem = smp_read_mpc_oem, 251 .smp_read_mpc_oem = smp_read_mpc_oem,
252 .setup_ioapic_ids = numaq_setup_ioapic_ids,
246}; 253};
247 254
248void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem, 255void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c
index e2f43768723a..6b0bb73998dd 100644
--- a/arch/x86/kernel/paravirt.c
+++ b/arch/x86/kernel/paravirt.c
@@ -374,8 +374,6 @@ struct pv_cpu_ops pv_cpu_ops = {
374 374
375struct pv_apic_ops pv_apic_ops = { 375struct pv_apic_ops pv_apic_ops = {
376#ifdef CONFIG_X86_LOCAL_APIC 376#ifdef CONFIG_X86_LOCAL_APIC
377 .apic_write = native_apic_write,
378 .apic_read = native_apic_read,
379 .setup_boot_clock = setup_boot_APIC_clock, 377 .setup_boot_clock = setup_boot_APIC_clock,
380 .setup_secondary_clock = setup_secondary_APIC_clock, 378 .setup_secondary_clock = setup_secondary_APIC_clock,
381 .startup_ipi_hook = paravirt_nop, 379 .startup_ipi_hook = paravirt_nop,
diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c
index ec7a2ba9bce8..c622772744d8 100644
--- a/arch/x86/kernel/process.c
+++ b/arch/x86/kernel/process.c
@@ -15,7 +15,6 @@ unsigned long idle_nomwait;
15EXPORT_SYMBOL(idle_nomwait); 15EXPORT_SYMBOL(idle_nomwait);
16 16
17struct kmem_cache *task_xstate_cachep; 17struct kmem_cache *task_xstate_cachep;
18static int force_mwait __cpuinitdata;
19 18
20int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) 19int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src)
21{ 20{
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index 141efab52400..46c98efbbf8d 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -758,6 +758,8 @@ void __init setup_arch(char **cmdline_p)
758#else 758#else
759 num_physpages = max_pfn; 759 num_physpages = max_pfn;
760 760
761 if (cpu_has_x2apic)
762 check_x2apic();
761 763
762 /* How many end-of-memory variables you have, grandma! */ 764 /* How many end-of-memory variables you have, grandma! */
763 /* need this before calling reserve_initrd */ 765 /* need this before calling reserve_initrd */
diff --git a/arch/x86/kernel/sigframe.h b/arch/x86/kernel/sigframe.h
index 8b4956e800ac..cc673aa55ce4 100644
--- a/arch/x86/kernel/sigframe.h
+++ b/arch/x86/kernel/sigframe.h
@@ -3,9 +3,18 @@ struct sigframe {
3 char __user *pretcode; 3 char __user *pretcode;
4 int sig; 4 int sig;
5 struct sigcontext sc; 5 struct sigcontext sc;
6 struct _fpstate fpstate; 6 /*
7 * fpstate is unused. fpstate is moved/allocated after
8 * retcode[] below. This movement allows to have the FP state and the
9 * future state extensions (xsave) stay together.
10 * And at the same time retaining the unused fpstate, prevents changing
11 * the offset of extramask[] in the sigframe and thus prevent any
12 * legacy application accessing/modifying it.
13 */
14 struct _fpstate fpstate_unused;
7 unsigned long extramask[_NSIG_WORDS-1]; 15 unsigned long extramask[_NSIG_WORDS-1];
8 char retcode[8]; 16 char retcode[8];
17 /* fp state follows here */
9}; 18};
10 19
11struct rt_sigframe { 20struct rt_sigframe {
@@ -15,14 +24,15 @@ struct rt_sigframe {
15 void __user *puc; 24 void __user *puc;
16 struct siginfo info; 25 struct siginfo info;
17 struct ucontext uc; 26 struct ucontext uc;
18 struct _fpstate fpstate;
19 char retcode[8]; 27 char retcode[8];
28 /* fp state follows here */
20}; 29};
21#else 30#else
22struct rt_sigframe { 31struct rt_sigframe {
23 char __user *pretcode; 32 char __user *pretcode;
24 struct ucontext uc; 33 struct ucontext uc;
25 struct siginfo info; 34 struct siginfo info;
35 /* fp state follows here */
26}; 36};
27 37
28int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 38int ia32_setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
diff --git a/arch/x86/kernel/signal_32.c b/arch/x86/kernel/signal_32.c
index 2a2435d3037d..b21070ea33a4 100644
--- a/arch/x86/kernel/signal_32.c
+++ b/arch/x86/kernel/signal_32.c
@@ -161,28 +161,14 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
161 } 161 }
162 162
163 { 163 {
164 struct _fpstate __user *buf; 164 void __user *buf;
165 165
166 err |= __get_user(buf, &sc->fpstate); 166 err |= __get_user(buf, &sc->fpstate);
167 if (buf) { 167 err |= restore_i387_xstate(buf);
168 if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
169 goto badframe;
170 err |= restore_i387(buf);
171 } else {
172 struct task_struct *me = current;
173
174 if (used_math()) {
175 clear_fpu(me);
176 clear_used_math();
177 }
178 }
179 } 168 }
180 169
181 err |= __get_user(*pax, &sc->ax); 170 err |= __get_user(*pax, &sc->ax);
182 return err; 171 return err;
183
184badframe:
185 return 1;
186} 172}
187 173
188asmlinkage unsigned long sys_sigreturn(unsigned long __unused) 174asmlinkage unsigned long sys_sigreturn(unsigned long __unused)
@@ -264,7 +250,7 @@ badframe:
264 * Set up a signal frame. 250 * Set up a signal frame.
265 */ 251 */
266static int 252static int
267setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, 253setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
268 struct pt_regs *regs, unsigned long mask) 254 struct pt_regs *regs, unsigned long mask)
269{ 255{
270 int tmp, err = 0; 256 int tmp, err = 0;
@@ -291,7 +277,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
291 err |= __put_user(regs->sp, &sc->sp_at_signal); 277 err |= __put_user(regs->sp, &sc->sp_at_signal);
292 err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss); 278 err |= __put_user(regs->ss, (unsigned int __user *)&sc->ss);
293 279
294 tmp = save_i387(fpstate); 280 tmp = save_i387_xstate(fpstate);
295 if (tmp < 0) 281 if (tmp < 0)
296 err = 1; 282 err = 1;
297 else 283 else
@@ -308,7 +294,8 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate,
308 * Determine which stack to use.. 294 * Determine which stack to use..
309 */ 295 */
310static inline void __user * 296static inline void __user *
311get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size) 297get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size,
298 void **fpstate)
312{ 299{
313 unsigned long sp; 300 unsigned long sp;
314 301
@@ -334,6 +321,11 @@ get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, size_t frame_size)
334 sp = (unsigned long) ka->sa.sa_restorer; 321 sp = (unsigned long) ka->sa.sa_restorer;
335 } 322 }
336 323
324 if (used_math()) {
325 sp = sp - sig_xstate_size;
326 *fpstate = (struct _fpstate *) sp;
327 }
328
337 sp -= frame_size; 329 sp -= frame_size;
338 /* 330 /*
339 * Align the stack pointer according to the i386 ABI, 331 * Align the stack pointer according to the i386 ABI,
@@ -352,8 +344,9 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
352 void __user *restorer; 344 void __user *restorer;
353 int err = 0; 345 int err = 0;
354 int usig; 346 int usig;
347 void __user *fpstate = NULL;
355 348
356 frame = get_sigframe(ka, regs, sizeof(*frame)); 349 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
357 350
358 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 351 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
359 goto give_sigsegv; 352 goto give_sigsegv;
@@ -368,7 +361,7 @@ setup_frame(int sig, struct k_sigaction *ka, sigset_t *set,
368 if (err) 361 if (err)
369 goto give_sigsegv; 362 goto give_sigsegv;
370 363
371 err = setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]); 364 err = setup_sigcontext(&frame->sc, fpstate, regs, set->sig[0]);
372 if (err) 365 if (err)
373 goto give_sigsegv; 366 goto give_sigsegv;
374 367
@@ -429,8 +422,9 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
429 void __user *restorer; 422 void __user *restorer;
430 int err = 0; 423 int err = 0;
431 int usig; 424 int usig;
425 void __user *fpstate = NULL;
432 426
433 frame = get_sigframe(ka, regs, sizeof(*frame)); 427 frame = get_sigframe(ka, regs, sizeof(*frame), &fpstate);
434 428
435 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame))) 429 if (!access_ok(VERIFY_WRITE, frame, sizeof(*frame)))
436 goto give_sigsegv; 430 goto give_sigsegv;
@@ -449,13 +443,16 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
449 goto give_sigsegv; 443 goto give_sigsegv;
450 444
451 /* Create the ucontext. */ 445 /* Create the ucontext. */
452 err |= __put_user(0, &frame->uc.uc_flags); 446 if (cpu_has_xsave)
447 err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
448 else
449 err |= __put_user(0, &frame->uc.uc_flags);
453 err |= __put_user(0, &frame->uc.uc_link); 450 err |= __put_user(0, &frame->uc.uc_link);
454 err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp); 451 err |= __put_user(current->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
455 err |= __put_user(sas_ss_flags(regs->sp), 452 err |= __put_user(sas_ss_flags(regs->sp),
456 &frame->uc.uc_stack.ss_flags); 453 &frame->uc.uc_stack.ss_flags);
457 err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size); 454 err |= __put_user(current->sas_ss_size, &frame->uc.uc_stack.ss_size);
458 err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate, 455 err |= setup_sigcontext(&frame->uc.uc_mcontext, fpstate,
459 regs, set->sig[0]); 456 regs, set->sig[0]);
460 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set)); 457 err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
461 if (err) 458 if (err)
diff --git a/arch/x86/kernel/signal_64.c b/arch/x86/kernel/signal_64.c
index 694aa888bb19..823a55bf8c39 100644
--- a/arch/x86/kernel/signal_64.c
+++ b/arch/x86/kernel/signal_64.c
@@ -53,69 +53,6 @@ sys_sigaltstack(const stack_t __user *uss, stack_t __user *uoss,
53} 53}
54 54
55/* 55/*
56 * Signal frame handlers.
57 */
58
59static inline int save_i387(struct _fpstate __user *buf)
60{
61 struct task_struct *tsk = current;
62 int err = 0;
63
64 BUILD_BUG_ON(sizeof(struct user_i387_struct) !=
65 sizeof(tsk->thread.xstate->fxsave));
66
67 if ((unsigned long)buf % 16)
68 printk("save_i387: bad fpstate %p\n", buf);
69
70 if (!used_math())
71 return 0;
72 clear_used_math(); /* trigger finit */
73 if (task_thread_info(tsk)->status & TS_USEDFPU) {
74 err = save_i387_checking((struct i387_fxsave_struct __user *)
75 buf);
76 if (err)
77 return err;
78 task_thread_info(tsk)->status &= ~TS_USEDFPU;
79 stts();
80 } else {
81 if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
82 sizeof(struct i387_fxsave_struct)))
83 return -1;
84 }
85 return 1;
86}
87
88/*
89 * This restores directly out of user space. Exceptions are handled.
90 */
91static inline int restore_i387(struct _fpstate __user *buf)
92{
93 struct task_struct *tsk = current;
94 int err;
95
96 if (!used_math()) {
97 err = init_fpu(tsk);
98 if (err)
99 return err;
100 }
101
102 if (!(task_thread_info(current)->status & TS_USEDFPU)) {
103 clts();
104 task_thread_info(current)->status |= TS_USEDFPU;
105 }
106 err = restore_fpu_checking((__force struct i387_fxsave_struct *)buf);
107 if (unlikely(err)) {
108 /*
109 * Encountered an error while doing the restore from the
110 * user buffer, clear the fpu state.
111 */
112 clear_fpu(tsk);
113 clear_used_math();
114 }
115 return err;
116}
117
118/*
119 * Do a signal return; undo the signal stack. 56 * Do a signal return; undo the signal stack.
120 */ 57 */
121static int 58static int
@@ -159,25 +96,11 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc,
159 { 96 {
160 struct _fpstate __user *buf; 97 struct _fpstate __user *buf;
161 err |= __get_user(buf, &sc->fpstate); 98 err |= __get_user(buf, &sc->fpstate);
162 99 err |= restore_i387_xstate(buf);
163 if (buf) {
164 if (!access_ok(VERIFY_READ, buf, sizeof(*buf)))
165 goto badframe;
166 err |= restore_i387(buf);
167 } else {
168 struct task_struct *me = current;
169 if (used_math()) {
170 clear_fpu(me);
171 clear_used_math();
172 }
173 }
174 } 100 }
175 101
176 err |= __get_user(*pax, &sc->ax); 102 err |= __get_user(*pax, &sc->ax);
177 return err; 103 return err;
178
179badframe:
180 return 1;
181} 104}
182 105
183asmlinkage long sys_rt_sigreturn(struct pt_regs *regs) 106asmlinkage long sys_rt_sigreturn(struct pt_regs *regs)
@@ -269,26 +192,23 @@ get_stack(struct k_sigaction *ka, struct pt_regs *regs, unsigned long size)
269 sp = current->sas_ss_sp + current->sas_ss_size; 192 sp = current->sas_ss_sp + current->sas_ss_size;
270 } 193 }
271 194
272 return (void __user *)round_down(sp - size, 16); 195 return (void __user *)round_down(sp - size, 64);
273} 196}
274 197
275static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, 198static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
276 sigset_t *set, struct pt_regs *regs) 199 sigset_t *set, struct pt_regs *regs)
277{ 200{
278 struct rt_sigframe __user *frame; 201 struct rt_sigframe __user *frame;
279 struct _fpstate __user *fp = NULL; 202 void __user *fp = NULL;
280 int err = 0; 203 int err = 0;
281 struct task_struct *me = current; 204 struct task_struct *me = current;
282 205
283 if (used_math()) { 206 if (used_math()) {
284 fp = get_stack(ka, regs, sizeof(struct _fpstate)); 207 fp = get_stack(ka, regs, sig_xstate_size);
285 frame = (void __user *)round_down( 208 frame = (void __user *)round_down(
286 (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8; 209 (unsigned long)fp - sizeof(struct rt_sigframe), 16) - 8;
287 210
288 if (!access_ok(VERIFY_WRITE, fp, sizeof(struct _fpstate))) 211 if (save_i387_xstate(fp) < 0)
289 goto give_sigsegv;
290
291 if (save_i387(fp) < 0)
292 err |= -1; 212 err |= -1;
293 } else 213 } else
294 frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8; 214 frame = get_stack(ka, regs, sizeof(struct rt_sigframe)) - 8;
@@ -303,7 +223,10 @@ static int setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info,
303 } 223 }
304 224
305 /* Create the ucontext. */ 225 /* Create the ucontext. */
306 err |= __put_user(0, &frame->uc.uc_flags); 226 if (cpu_has_xsave)
227 err |= __put_user(UC_FP_XSTATE, &frame->uc.uc_flags);
228 else
229 err |= __put_user(0, &frame->uc.uc_flags);
307 err |= __put_user(0, &frame->uc.uc_link); 230 err |= __put_user(0, &frame->uc.uc_link);
308 err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp); 231 err |= __put_user(me->sas_ss_sp, &frame->uc.uc_stack.ss_sp);
309 err |= __put_user(sas_ss_flags(regs->sp), 232 err |= __put_user(sas_ss_flags(regs->sp),
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 4e7ccb0e2a9b..9056f7e272c0 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -123,7 +123,6 @@ EXPORT_PER_CPU_SYMBOL(cpu_info);
123 123
124static atomic_t init_deasserted; 124static atomic_t init_deasserted;
125 125
126static int boot_cpu_logical_apicid;
127 126
128/* representing cpus for which sibling maps can be computed */ 127/* representing cpus for which sibling maps can be computed */
129static cpumask_t cpu_sibling_setup_map; 128static cpumask_t cpu_sibling_setup_map;
@@ -165,6 +164,8 @@ static void unmap_cpu_to_node(int cpu)
165#endif 164#endif
166 165
167#ifdef CONFIG_X86_32 166#ifdef CONFIG_X86_32
167static int boot_cpu_logical_apicid;
168
168u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly = 169u8 cpu_2_logical_apicid[NR_CPUS] __read_mostly =
169 { [0 ... NR_CPUS-1] = BAD_APICID }; 170 { [0 ... NR_CPUS-1] = BAD_APICID };
170 171
@@ -210,7 +211,7 @@ static void __cpuinit smp_callin(void)
210 /* 211 /*
211 * (This works even if the APIC is not enabled.) 212 * (This works even if the APIC is not enabled.)
212 */ 213 */
213 phys_id = GET_APIC_ID(read_apic_id()); 214 phys_id = read_apic_id();
214 cpuid = smp_processor_id(); 215 cpuid = smp_processor_id();
215 if (cpu_isset(cpuid, cpu_callin_map)) { 216 if (cpu_isset(cpuid, cpu_callin_map)) {
216 panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__, 217 panic("%s: phys CPU#%d, CPU#%d already present??\n", __func__,
@@ -551,8 +552,7 @@ static inline void __inquire_remote_apic(int apicid)
551 printk(KERN_CONT 552 printk(KERN_CONT
552 "a previous APIC delivery may have failed\n"); 553 "a previous APIC delivery may have failed\n");
553 554
554 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(apicid)); 555 apic_icr_write(APIC_DM_REMRD | regs[i], apicid);
555 apic_write(APIC_ICR, APIC_DM_REMRD | regs[i]);
556 556
557 timeout = 0; 557 timeout = 0;
558 do { 558 do {
@@ -584,11 +584,9 @@ wakeup_secondary_cpu(int logical_apicid, unsigned long start_eip)
584 int maxlvt; 584 int maxlvt;
585 585
586 /* Target chip */ 586 /* Target chip */
587 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(logical_apicid));
588
589 /* Boot on the stack */ 587 /* Boot on the stack */
590 /* Kick the second */ 588 /* Kick the second */
591 apic_write(APIC_ICR, APIC_DM_NMI | APIC_DEST_LOGICAL); 589 apic_icr_write(APIC_DM_NMI | APIC_DEST_LOGICAL, logical_apicid);
592 590
593 pr_debug("Waiting for send to finish...\n"); 591 pr_debug("Waiting for send to finish...\n");
594 send_status = safe_apic_wait_icr_idle(); 592 send_status = safe_apic_wait_icr_idle();
@@ -641,13 +639,11 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
641 /* 639 /*
642 * Turn INIT on target chip 640 * Turn INIT on target chip
643 */ 641 */
644 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
645
646 /* 642 /*
647 * Send IPI 643 * Send IPI
648 */ 644 */
649 apic_write(APIC_ICR, 645 apic_icr_write(APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT,
650 APIC_INT_LEVELTRIG | APIC_INT_ASSERT | APIC_DM_INIT); 646 phys_apicid);
651 647
652 pr_debug("Waiting for send to finish...\n"); 648 pr_debug("Waiting for send to finish...\n");
653 send_status = safe_apic_wait_icr_idle(); 649 send_status = safe_apic_wait_icr_idle();
@@ -657,10 +653,8 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
657 pr_debug("Deasserting INIT.\n"); 653 pr_debug("Deasserting INIT.\n");
658 654
659 /* Target chip */ 655 /* Target chip */
660 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
661
662 /* Send IPI */ 656 /* Send IPI */
663 apic_write(APIC_ICR, APIC_INT_LEVELTRIG | APIC_DM_INIT); 657 apic_icr_write(APIC_INT_LEVELTRIG | APIC_DM_INIT, phys_apicid);
664 658
665 pr_debug("Waiting for send to finish...\n"); 659 pr_debug("Waiting for send to finish...\n");
666 send_status = safe_apic_wait_icr_idle(); 660 send_status = safe_apic_wait_icr_idle();
@@ -703,11 +697,10 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip)
703 */ 697 */
704 698
705 /* Target chip */ 699 /* Target chip */
706 apic_write(APIC_ICR2, SET_APIC_DEST_FIELD(phys_apicid));
707
708 /* Boot on the stack */ 700 /* Boot on the stack */
709 /* Kick the second */ 701 /* Kick the second */
710 apic_write(APIC_ICR, APIC_DM_STARTUP | (start_eip >> 12)); 702 apic_icr_write(APIC_DM_STARTUP | (start_eip >> 12),
703 phys_apicid);
711 704
712 /* 705 /*
713 * Give the other CPU some time to accept the IPI. 706 * Give the other CPU some time to accept the IPI.
@@ -1176,10 +1169,17 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1176 * Setup boot CPU information 1169 * Setup boot CPU information
1177 */ 1170 */
1178 smp_store_cpu_info(0); /* Final full version of the data */ 1171 smp_store_cpu_info(0); /* Final full version of the data */
1172#ifdef CONFIG_X86_32
1179 boot_cpu_logical_apicid = logical_smp_processor_id(); 1173 boot_cpu_logical_apicid = logical_smp_processor_id();
1174#endif
1180 current_thread_info()->cpu = 0; /* needed? */ 1175 current_thread_info()->cpu = 0; /* needed? */
1181 set_cpu_sibling_map(0); 1176 set_cpu_sibling_map(0);
1182 1177
1178#ifdef CONFIG_X86_64
1179 enable_IR_x2apic();
1180 setup_apic_routing();
1181#endif
1182
1183 if (smp_sanity_check(max_cpus) < 0) { 1183 if (smp_sanity_check(max_cpus) < 0) {
1184 printk(KERN_INFO "SMP disabled\n"); 1184 printk(KERN_INFO "SMP disabled\n");
1185 disable_smp(); 1185 disable_smp();
@@ -1187,9 +1187,9 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus)
1187 } 1187 }
1188 1188
1189 preempt_disable(); 1189 preempt_disable();
1190 if (GET_APIC_ID(read_apic_id()) != boot_cpu_physical_apicid) { 1190 if (read_apic_id() != boot_cpu_physical_apicid) {
1191 panic("Boot APIC ID in local APIC unexpected (%d vs %d)", 1191 panic("Boot APIC ID in local APIC unexpected (%d vs %d)",
1192 GET_APIC_ID(read_apic_id()), boot_cpu_physical_apicid); 1192 read_apic_id(), boot_cpu_physical_apicid);
1193 /* Or can we switch back to PIC here? */ 1193 /* Or can we switch back to PIC here? */
1194 } 1194 }
1195 preempt_enable(); 1195 preempt_enable();
diff --git a/arch/x86/kernel/summit_32.c b/arch/x86/kernel/summit_32.c
index d67ce5f044ba..7b987852e876 100644
--- a/arch/x86/kernel/summit_32.c
+++ b/arch/x86/kernel/summit_32.c
@@ -30,7 +30,7 @@
30#include <linux/init.h> 30#include <linux/init.h>
31#include <asm/io.h> 31#include <asm/io.h>
32#include <asm/bios_ebda.h> 32#include <asm/bios_ebda.h>
33#include <asm/mach-summit/mach_mpparse.h> 33#include <asm/summit/mpparse.h>
34 34
35static struct rio_table_hdr *rio_table_hdr __initdata; 35static struct rio_table_hdr *rio_table_hdr __initdata;
36static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata; 36static struct scal_detail *scal_devs[MAX_NUMNODES] __initdata;
diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c
index 03df8e45e5a1..da5a5964fccb 100644
--- a/arch/x86/kernel/traps_32.c
+++ b/arch/x86/kernel/traps_32.c
@@ -1228,7 +1228,6 @@ void __init trap_init(void)
1228 1228
1229 set_bit(SYSCALL_VECTOR, used_vectors); 1229 set_bit(SYSCALL_VECTOR, used_vectors);
1230 1230
1231 init_thread_xstate();
1232 /* 1231 /*
1233 * Should be a barrier for any external CPU state: 1232 * Should be a barrier for any external CPU state:
1234 */ 1233 */
diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c
index 7a31f104bef9..2887a789e38f 100644
--- a/arch/x86/kernel/traps_64.c
+++ b/arch/x86/kernel/traps_64.c
@@ -1138,7 +1138,7 @@ asmlinkage void math_state_restore(void)
1138 /* 1138 /*
1139 * Paranoid restore. send a SIGSEGV if we fail to restore the state. 1139 * Paranoid restore. send a SIGSEGV if we fail to restore the state.
1140 */ 1140 */
1141 if (unlikely(restore_fpu_checking(&me->thread.xstate->fxsave))) { 1141 if (unlikely(restore_fpu_checking(me))) {
1142 stts(); 1142 stts();
1143 force_sig(SIGSEGV, me); 1143 force_sig(SIGSEGV, me);
1144 return; 1144 return;
@@ -1179,10 +1179,6 @@ void __init trap_init(void)
1179 set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); 1179 set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall);
1180#endif 1180#endif
1181 /* 1181 /*
1182 * initialize the per thread extended state:
1183 */
1184 init_thread_xstate();
1185 /*
1186 * Should be a barrier for any external CPU state: 1182 * Should be a barrier for any external CPU state:
1187 */ 1183 */
1188 cpu_init(); 1184 cpu_init();
diff --git a/arch/x86/kernel/vmi_32.c b/arch/x86/kernel/vmi_32.c
index 8c9ad02af5a2..8b6c393ab9fd 100644
--- a/arch/x86/kernel/vmi_32.c
+++ b/arch/x86/kernel/vmi_32.c
@@ -905,8 +905,8 @@ static inline int __init activate_vmi(void)
905#endif 905#endif
906 906
907#ifdef CONFIG_X86_LOCAL_APIC 907#ifdef CONFIG_X86_LOCAL_APIC
908 para_fill(pv_apic_ops.apic_read, APICRead); 908 para_fill(apic_ops->read, APICRead);
909 para_fill(pv_apic_ops.apic_write, APICWrite); 909 para_fill(apic_ops->write, APICWrite);
910#endif 910#endif
911 911
912 /* 912 /*
diff --git a/arch/x86/kernel/vmlinux_32.lds.S b/arch/x86/kernel/vmlinux_32.lds.S
index af5bdad84604..a9b8560adbc2 100644
--- a/arch/x86/kernel/vmlinux_32.lds.S
+++ b/arch/x86/kernel/vmlinux_32.lds.S
@@ -140,10 +140,10 @@ SECTIONS
140 *(.con_initcall.init) 140 *(.con_initcall.init)
141 __con_initcall_end = .; 141 __con_initcall_end = .;
142 } 142 }
143 .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) { 143 .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
144 __x86cpuvendor_start = .; 144 __x86_cpu_dev_start = .;
145 *(.x86cpuvendor.init) 145 *(.x86_cpu_dev.init)
146 __x86cpuvendor_end = .; 146 __x86_cpu_dev_end = .;
147 } 147 }
148 SECURITY_INIT 148 SECURITY_INIT
149 . = ALIGN(4); 149 . = ALIGN(4);
@@ -180,6 +180,7 @@ SECTIONS
180 . = ALIGN(PAGE_SIZE); 180 . = ALIGN(PAGE_SIZE);
181 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { 181 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) {
182 __per_cpu_start = .; 182 __per_cpu_start = .;
183 *(.data.percpu.page_aligned)
183 *(.data.percpu) 184 *(.data.percpu)
184 *(.data.percpu.shared_aligned) 185 *(.data.percpu.shared_aligned)
185 __per_cpu_end = .; 186 __per_cpu_end = .;
diff --git a/arch/x86/kernel/vmlinux_64.lds.S b/arch/x86/kernel/vmlinux_64.lds.S
index 63e5c1a22e88..201e81a91a95 100644
--- a/arch/x86/kernel/vmlinux_64.lds.S
+++ b/arch/x86/kernel/vmlinux_64.lds.S
@@ -168,13 +168,12 @@ SECTIONS
168 *(.con_initcall.init) 168 *(.con_initcall.init)
169 } 169 }
170 __con_initcall_end = .; 170 __con_initcall_end = .;
171 . = ALIGN(16); 171 __x86_cpu_dev_start = .;
172 __x86cpuvendor_start = .; 172 .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {
173 .x86cpuvendor.init : AT(ADDR(.x86cpuvendor.init) - LOAD_OFFSET) { 173 *(.x86_cpu_dev.init)
174 *(.x86cpuvendor.init)
175 } 174 }
176 __x86cpuvendor_end = .;
177 SECURITY_INIT 175 SECURITY_INIT
176 __x86_cpu_dev_end = .;
178 177
179 . = ALIGN(8); 178 . = ALIGN(8);
180 .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) { 179 .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {
diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c
new file mode 100644
index 000000000000..07713d64debe
--- /dev/null
+++ b/arch/x86/kernel/xsave.c
@@ -0,0 +1,316 @@
1/*
2 * xsave/xrstor support.
3 *
4 * Author: Suresh Siddha <suresh.b.siddha@intel.com>
5 */
6#include <linux/bootmem.h>
7#include <linux/compat.h>
8#include <asm/i387.h>
9#ifdef CONFIG_IA32_EMULATION
10#include <asm/sigcontext32.h>
11#endif
12#include <asm/xcr.h>
13
14/*
15 * Supported feature mask by the CPU and the kernel.
16 */
17u64 pcntxt_mask;
18
19struct _fpx_sw_bytes fx_sw_reserved;
20#ifdef CONFIG_IA32_EMULATION
21struct _fpx_sw_bytes fx_sw_reserved_ia32;
22#endif
23
24/*
25 * Check for the presence of extended state information in the
26 * user fpstate pointer in the sigcontext.
27 */
28int check_for_xstate(struct i387_fxsave_struct __user *buf,
29 void __user *fpstate,
30 struct _fpx_sw_bytes *fx_sw_user)
31{
32 int min_xstate_size = sizeof(struct i387_fxsave_struct) +
33 sizeof(struct xsave_hdr_struct);
34 unsigned int magic2;
35 int err;
36
37 err = __copy_from_user(fx_sw_user, &buf->sw_reserved[0],
38 sizeof(struct _fpx_sw_bytes));
39
40 if (err)
41 return err;
42
43 /*
44 * First Magic check failed.
45 */
46 if (fx_sw_user->magic1 != FP_XSTATE_MAGIC1)
47 return -1;
48
49 /*
50 * Check for error scenarios.
51 */
52 if (fx_sw_user->xstate_size < min_xstate_size ||
53 fx_sw_user->xstate_size > xstate_size ||
54 fx_sw_user->xstate_size > fx_sw_user->extended_size)
55 return -1;
56
57 err = __get_user(magic2, (__u32 *) (((void *)fpstate) +
58 fx_sw_user->extended_size -
59 FP_XSTATE_MAGIC2_SIZE));
60 /*
61 * Check for the presence of second magic word at the end of memory
62 * layout. This detects the case where the user just copied the legacy
63 * fpstate layout with out copying the extended state information
64 * in the memory layout.
65 */
66 if (err || magic2 != FP_XSTATE_MAGIC2)
67 return -1;
68
69 return 0;
70}
71
72#ifdef CONFIG_X86_64
73/*
74 * Signal frame handlers.
75 */
76
77int save_i387_xstate(void __user *buf)
78{
79 struct task_struct *tsk = current;
80 int err = 0;
81
82 if (!access_ok(VERIFY_WRITE, buf, sig_xstate_size))
83 return -EACCES;
84
85 BUG_ON(sig_xstate_size < xstate_size);
86
87 if ((unsigned long)buf % 64)
88 printk("save_i387_xstate: bad fpstate %p\n", buf);
89
90 if (!used_math())
91 return 0;
92 clear_used_math(); /* trigger finit */
93 if (task_thread_info(tsk)->status & TS_USEDFPU) {
94 /*
95 * Start with clearing the user buffer. This will present a
96 * clean context for the bytes not touched by the fxsave/xsave.
97 */
98 __clear_user(buf, sig_xstate_size);
99
100 if (task_thread_info(tsk)->status & TS_XSAVE)
101 err = xsave_user(buf);
102 else
103 err = fxsave_user(buf);
104
105 if (err)
106 return err;
107 task_thread_info(tsk)->status &= ~TS_USEDFPU;
108 stts();
109 } else {
110 if (__copy_to_user(buf, &tsk->thread.xstate->fxsave,
111 xstate_size))
112 return -1;
113 }
114
115 if (task_thread_info(tsk)->status & TS_XSAVE) {
116 struct _fpstate __user *fx = buf;
117
118 err = __copy_to_user(&fx->sw_reserved, &fx_sw_reserved,
119 sizeof(struct _fpx_sw_bytes));
120
121 err |= __put_user(FP_XSTATE_MAGIC2,
122 (__u32 __user *) (buf + sig_xstate_size
123 - FP_XSTATE_MAGIC2_SIZE));
124 }
125
126 return 1;
127}
128
129/*
130 * Restore the extended state if present. Otherwise, restore the FP/SSE
131 * state.
132 */
133int restore_user_xstate(void __user *buf)
134{
135 struct _fpx_sw_bytes fx_sw_user;
136 u64 mask;
137 int err;
138
139 if (((unsigned long)buf % 64) ||
140 check_for_xstate(buf, buf, &fx_sw_user))
141 goto fx_only;
142
143 mask = fx_sw_user.xstate_bv;
144
145 /*
146 * restore the state passed by the user.
147 */
148 err = xrestore_user(buf, mask);
149 if (err)
150 return err;
151
152 /*
153 * init the state skipped by the user.
154 */
155 mask = pcntxt_mask & ~mask;
156
157 xrstor_state(init_xstate_buf, mask);
158
159 return 0;
160
161fx_only:
162 /*
163 * couldn't find the extended state information in the
164 * memory layout. Restore just the FP/SSE and init all
165 * the other extended state.
166 */
167 xrstor_state(init_xstate_buf, pcntxt_mask & ~XSTATE_FPSSE);
168 return fxrstor_checking((__force struct i387_fxsave_struct *)buf);
169}
170
171/*
172 * This restores directly out of user space. Exceptions are handled.
173 */
174int restore_i387_xstate(void __user *buf)
175{
176 struct task_struct *tsk = current;
177 int err = 0;
178
179 if (!buf) {
180 if (used_math())
181 goto clear;
182 return 0;
183 } else
184 if (!access_ok(VERIFY_READ, buf, sig_xstate_size))
185 return -EACCES;
186
187 if (!used_math()) {
188 err = init_fpu(tsk);
189 if (err)
190 return err;
191 }
192
193 if (!(task_thread_info(current)->status & TS_USEDFPU)) {
194 clts();
195 task_thread_info(current)->status |= TS_USEDFPU;
196 }
197 if (task_thread_info(tsk)->status & TS_XSAVE)
198 err = restore_user_xstate(buf);
199 else
200 err = fxrstor_checking((__force struct i387_fxsave_struct *)
201 buf);
202 if (unlikely(err)) {
203 /*
204 * Encountered an error while doing the restore from the
205 * user buffer, clear the fpu state.
206 */
207clear:
208 clear_fpu(tsk);
209 clear_used_math();
210 }
211 return err;
212}
213#endif
214
215/*
216 * Prepare the SW reserved portion of the fxsave memory layout, indicating
217 * the presence of the extended state information in the memory layout
218 * pointed by the fpstate pointer in the sigcontext.
219 * This will be saved when ever the FP and extended state context is
220 * saved on the user stack during the signal handler delivery to the user.
221 */
222void prepare_fx_sw_frame(void)
223{
224 int size_extended = (xstate_size - sizeof(struct i387_fxsave_struct)) +
225 FP_XSTATE_MAGIC2_SIZE;
226
227 sig_xstate_size = sizeof(struct _fpstate) + size_extended;
228
229#ifdef CONFIG_IA32_EMULATION
230 sig_xstate_ia32_size = sizeof(struct _fpstate_ia32) + size_extended;
231#endif
232
233 memset(&fx_sw_reserved, 0, sizeof(fx_sw_reserved));
234
235 fx_sw_reserved.magic1 = FP_XSTATE_MAGIC1;
236 fx_sw_reserved.extended_size = sig_xstate_size;
237 fx_sw_reserved.xstate_bv = pcntxt_mask;
238 fx_sw_reserved.xstate_size = xstate_size;
239#ifdef CONFIG_IA32_EMULATION
240 memcpy(&fx_sw_reserved_ia32, &fx_sw_reserved,
241 sizeof(struct _fpx_sw_bytes));
242 fx_sw_reserved_ia32.extended_size = sig_xstate_ia32_size;
243#endif
244}
245
246/*
247 * Represents init state for the supported extended state.
248 */
249struct xsave_struct *init_xstate_buf;
250
251#ifdef CONFIG_X86_64
252unsigned int sig_xstate_size = sizeof(struct _fpstate);
253#endif
254
255/*
256 * Enable the extended processor state save/restore feature
257 */
258void __cpuinit xsave_init(void)
259{
260 if (!cpu_has_xsave)
261 return;
262
263 set_in_cr4(X86_CR4_OSXSAVE);
264
265 /*
266 * Enable all the features that the HW is capable of
267 * and the Linux kernel is aware of.
268 */
269 xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
270}
271
272/*
273 * setup the xstate image representing the init state
274 */
275void setup_xstate_init(void)
276{
277 init_xstate_buf = alloc_bootmem(xstate_size);
278 init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT;
279}
280
281/*
282 * Enable and initialize the xsave feature.
283 */
284void __init xsave_cntxt_init(void)
285{
286 unsigned int eax, ebx, ecx, edx;
287
288 cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
289 pcntxt_mask = eax + ((u64)edx << 32);
290
291 if ((pcntxt_mask & XSTATE_FPSSE) != XSTATE_FPSSE) {
292 printk(KERN_ERR "FP/SSE not shown under xsave features 0x%llx\n",
293 pcntxt_mask);
294 BUG();
295 }
296
297 /*
298 * for now OS knows only about FP/SSE
299 */
300 pcntxt_mask = pcntxt_mask & XCNTXT_MASK;
301 xsave_init();
302
303 /*
304 * Recompute the context size for enabled features
305 */
306 cpuid_count(0xd, 0, &eax, &ebx, &ecx, &edx);
307 xstate_size = ebx;
308
309 prepare_fx_sw_frame();
310
311 setup_xstate_init();
312
313 printk(KERN_INFO "xsave/xrstor: enabled xstate_bv 0x%llx, "
314 "cntxt size 0x%x\n",
315 pcntxt_mask, xstate_size);
316}
diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
index 23e8373507ad..17e25995b65b 100644
--- a/arch/x86/kvm/vmx.h
+++ b/arch/x86/kvm/vmx.h
@@ -331,21 +331,6 @@ enum vmcs_field {
331 331
332#define AR_RESERVD_MASK 0xfffe0f00 332#define AR_RESERVD_MASK 0xfffe0f00
333 333
334#define MSR_IA32_VMX_BASIC 0x480
335#define MSR_IA32_VMX_PINBASED_CTLS 0x481
336#define MSR_IA32_VMX_PROCBASED_CTLS 0x482
337#define MSR_IA32_VMX_EXIT_CTLS 0x483
338#define MSR_IA32_VMX_ENTRY_CTLS 0x484
339#define MSR_IA32_VMX_MISC 0x485
340#define MSR_IA32_VMX_CR0_FIXED0 0x486
341#define MSR_IA32_VMX_CR0_FIXED1 0x487
342#define MSR_IA32_VMX_CR4_FIXED0 0x488
343#define MSR_IA32_VMX_CR4_FIXED1 0x489
344#define MSR_IA32_VMX_VMCS_ENUM 0x48a
345#define MSR_IA32_VMX_PROCBASED_CTLS2 0x48b
346#define MSR_IA32_VMX_EPT_VPID_CAP 0x48c
347
348#define MSR_IA32_FEATURE_CONTROL 0x3a
349#define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1 334#define MSR_IA32_FEATURE_CONTROL_LOCKED 0x1
350#define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4 335#define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED 0x4
351 336
diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c
index d9249a882aa5..65f0b8a47bed 100644
--- a/arch/x86/lguest/boot.c
+++ b/arch/x86/lguest/boot.c
@@ -55,6 +55,7 @@
55#include <linux/lguest_launcher.h> 55#include <linux/lguest_launcher.h>
56#include <linux/virtio_console.h> 56#include <linux/virtio_console.h>
57#include <linux/pm.h> 57#include <linux/pm.h>
58#include <asm/apic.h>
58#include <asm/lguest.h> 59#include <asm/lguest.h>
59#include <asm/paravirt.h> 60#include <asm/paravirt.h>
60#include <asm/param.h> 61#include <asm/param.h>
@@ -783,14 +784,44 @@ static void lguest_wbinvd(void)
783 * code qualifies for Advanced. It will also never interrupt anything. It 784 * code qualifies for Advanced. It will also never interrupt anything. It
784 * does, however, allow us to get through the Linux boot code. */ 785 * does, however, allow us to get through the Linux boot code. */
785#ifdef CONFIG_X86_LOCAL_APIC 786#ifdef CONFIG_X86_LOCAL_APIC
786static void lguest_apic_write(unsigned long reg, u32 v) 787static void lguest_apic_write(u32 reg, u32 v)
787{ 788{
788} 789}
789 790
790static u32 lguest_apic_read(unsigned long reg) 791static u32 lguest_apic_read(u32 reg)
791{ 792{
792 return 0; 793 return 0;
793} 794}
795
796static u64 lguest_apic_icr_read(void)
797{
798 return 0;
799}
800
801static void lguest_apic_icr_write(u32 low, u32 id)
802{
803 /* Warn to see if there's any stray references */
804 WARN_ON(1);
805}
806
807static void lguest_apic_wait_icr_idle(void)
808{
809 return;
810}
811
812static u32 lguest_apic_safe_wait_icr_idle(void)
813{
814 return 0;
815}
816
817static struct apic_ops lguest_basic_apic_ops = {
818 .read = lguest_apic_read,
819 .write = lguest_apic_write,
820 .icr_read = lguest_apic_icr_read,
821 .icr_write = lguest_apic_icr_write,
822 .wait_icr_idle = lguest_apic_wait_icr_idle,
823 .safe_wait_icr_idle = lguest_apic_safe_wait_icr_idle,
824};
794#endif 825#endif
795 826
796/* STOP! Until an interrupt comes in. */ 827/* STOP! Until an interrupt comes in. */
@@ -990,8 +1021,7 @@ __init void lguest_init(void)
990 1021
991#ifdef CONFIG_X86_LOCAL_APIC 1022#ifdef CONFIG_X86_LOCAL_APIC
992 /* apic read/write intercepts */ 1023 /* apic read/write intercepts */
993 pv_apic_ops.apic_write = lguest_apic_write; 1024 apic_ops = &lguest_basic_apic_ops;
994 pv_apic_ops.apic_read = lguest_apic_read;
995#endif 1025#endif
996 1026
997 /* time operations */ 1027 /* time operations */
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index aa3fa4119424..55e11aa6d66c 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -17,9 +17,6 @@ ifeq ($(CONFIG_X86_32),y)
17 lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o 17 lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o
18else 18else
19 obj-y += io_64.o iomap_copy_64.o 19 obj-y += io_64.o iomap_copy_64.o
20
21 CFLAGS_csum-partial_64.o := -funroll-loops
22
23 lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o 20 lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
24 lib-y += thunk_64.o clear_page_64.o copy_page_64.o 21 lib-y += thunk_64.o clear_page_64.o copy_page_64.o
25 lib-y += memmove_64.o memset_64.o 22 lib-y += memmove_64.o memset_64.o
diff --git a/arch/x86/lib/usercopy_32.c b/arch/x86/lib/usercopy_32.c
index 24e60944971a..9e68075544f6 100644
--- a/arch/x86/lib/usercopy_32.c
+++ b/arch/x86/lib/usercopy_32.c
@@ -14,6 +14,13 @@
14#include <asm/uaccess.h> 14#include <asm/uaccess.h>
15#include <asm/mmx.h> 15#include <asm/mmx.h>
16 16
17#ifdef CONFIG_X86_INTEL_USERCOPY
18/*
19 * Alignment at which movsl is preferred for bulk memory copies.
20 */
21struct movsl_mask movsl_mask __read_mostly;
22#endif
23
17static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned long n) 24static inline int __movsl_is_ok(unsigned long a1, unsigned long a2, unsigned long n)
18{ 25{
19#ifdef CONFIG_X86_INTEL_USERCOPY 26#ifdef CONFIG_X86_INTEL_USERCOPY
diff --git a/arch/x86/mach-default/setup.c b/arch/x86/mach-default/setup.c
index 3f2cf11f201a..37b9ae4d44c5 100644
--- a/arch/x86/mach-default/setup.c
+++ b/arch/x86/mach-default/setup.c
@@ -38,15 +38,6 @@ void __init pre_intr_init_hook(void)
38 init_ISA_irqs(); 38 init_ISA_irqs();
39} 39}
40 40
41/*
42 * IRQ2 is cascade interrupt to second interrupt controller
43 */
44static struct irqaction irq2 = {
45 .handler = no_action,
46 .mask = CPU_MASK_NONE,
47 .name = "cascade",
48};
49
50/** 41/**
51 * intr_init_hook - post gate setup interrupt initialisation 42 * intr_init_hook - post gate setup interrupt initialisation
52 * 43 *
@@ -62,12 +53,6 @@ void __init intr_init_hook(void)
62 if (x86_quirks->arch_intr_init()) 53 if (x86_quirks->arch_intr_init())
63 return; 54 return;
64 } 55 }
65#ifdef CONFIG_X86_LOCAL_APIC
66 apic_intr_init();
67#endif
68
69 if (!acpi_ioapic)
70 setup_irq(2, &irq2);
71} 56}
72 57
73/** 58/**
diff --git a/arch/x86/mach-es7000/Makefile b/arch/x86/mach-es7000/Makefile
deleted file mode 100644
index 3ef8b43b62fc..000000000000
--- a/arch/x86/mach-es7000/Makefile
+++ /dev/null
@@ -1,5 +0,0 @@
1#
2# Makefile for the linux kernel.
3#
4
5obj-$(CONFIG_X86_ES7000) := es7000plat.o
diff --git a/arch/x86/mach-es7000/es7000.h b/arch/x86/mach-es7000/es7000.h
deleted file mode 100644
index c8d5aa132fa0..000000000000
--- a/arch/x86/mach-es7000/es7000.h
+++ /dev/null
@@ -1,114 +0,0 @@
1/*
2 * Written by: Garry Forsgren, Unisys Corporation
3 * Natalie Protasevich, Unisys Corporation
4 * This file contains the code to configure and interface
5 * with Unisys ES7000 series hardware system manager.
6 *
7 * Copyright (c) 2003 Unisys Corporation. All Rights Reserved.
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms of version 2 of the GNU General Public License as
11 * published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope that it would be useful, but
14 * WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write the Free Software Foundation, Inc., 59
19 * Temple Place - Suite 330, Boston MA 02111-1307, USA.
20 *
21 * Contact information: Unisys Corporation, Township Line & Union Meeting
22 * Roads-A, Unisys Way, Blue Bell, Pennsylvania, 19424, or:
23 *
24 * http://www.unisys.com
25 */
26
27/*
28 * ES7000 chipsets
29 */
30
31#define NON_UNISYS 0
32#define ES7000_CLASSIC 1
33#define ES7000_ZORRO 2
34
35
36#define MIP_REG 1
37#define MIP_PSAI_REG 4
38
39#define MIP_BUSY 1
40#define MIP_SPIN 0xf0000
41#define MIP_VALID 0x0100000000000000ULL
42#define MIP_PORT(VALUE) ((VALUE >> 32) & 0xffff)
43
44#define MIP_RD_LO(VALUE) (VALUE & 0xffffffff)
45
46struct mip_reg_info {
47 unsigned long long mip_info;
48 unsigned long long delivery_info;
49 unsigned long long host_reg;
50 unsigned long long mip_reg;
51};
52
53struct part_info {
54 unsigned char type;
55 unsigned char length;
56 unsigned char part_id;
57 unsigned char apic_mode;
58 unsigned long snum;
59 char ptype[16];
60 char sname[64];
61 char pname[64];
62};
63
64struct psai {
65 unsigned long long entry_type;
66 unsigned long long addr;
67 unsigned long long bep_addr;
68};
69
70struct es7000_mem_info {
71 unsigned char type;
72 unsigned char length;
73 unsigned char resv[6];
74 unsigned long long start;
75 unsigned long long size;
76};
77
78struct es7000_oem_table {
79 unsigned long long hdr;
80 struct mip_reg_info mip;
81 struct part_info pif;
82 struct es7000_mem_info shm;
83 struct psai psai;
84};
85
86#ifdef CONFIG_ACPI
87
88struct oem_table {
89 struct acpi_table_header Header;
90 u32 OEMTableAddr;
91 u32 OEMTableSize;
92};
93
94extern int find_unisys_acpi_oem_table(unsigned long *oem_addr);
95#endif
96
97struct mip_reg {
98 unsigned long long off_0;
99 unsigned long long off_8;
100 unsigned long long off_10;
101 unsigned long long off_18;
102 unsigned long long off_20;
103 unsigned long long off_28;
104 unsigned long long off_30;
105 unsigned long long off_38;
106};
107
108#define MIP_SW_APIC 0x1020b
109#define MIP_FUNC(VALUE) (VALUE & 0xff)
110
111extern int parse_unisys_oem (char *oemptr);
112extern void setup_unisys(void);
113extern int es7000_start_cpu(int cpu, unsigned long eip);
114extern void es7000_sw_apic(void);
diff --git a/arch/x86/mach-generic/Makefile b/arch/x86/mach-generic/Makefile
index 0dbd7803a1d5..6730f4e7c744 100644
--- a/arch/x86/mach-generic/Makefile
+++ b/arch/x86/mach-generic/Makefile
@@ -9,4 +9,3 @@ obj-$(CONFIG_X86_NUMAQ) += numaq.o
9obj-$(CONFIG_X86_SUMMIT) += summit.o 9obj-$(CONFIG_X86_SUMMIT) += summit.o
10obj-$(CONFIG_X86_BIGSMP) += bigsmp.o 10obj-$(CONFIG_X86_BIGSMP) += bigsmp.o
11obj-$(CONFIG_X86_ES7000) += es7000.o 11obj-$(CONFIG_X86_ES7000) += es7000.o
12obj-$(CONFIG_X86_ES7000) += ../../x86/mach-es7000/
diff --git a/arch/x86/mach-generic/bigsmp.c b/arch/x86/mach-generic/bigsmp.c
index 59d771714559..df37fc9d6a26 100644
--- a/arch/x86/mach-generic/bigsmp.c
+++ b/arch/x86/mach-generic/bigsmp.c
@@ -5,18 +5,17 @@
5#define APIC_DEFINITION 1 5#define APIC_DEFINITION 1
6#include <linux/threads.h> 6#include <linux/threads.h>
7#include <linux/cpumask.h> 7#include <linux/cpumask.h>
8#include <asm/smp.h>
9#include <asm/mpspec.h> 8#include <asm/mpspec.h>
10#include <asm/genapic.h> 9#include <asm/genapic.h>
11#include <asm/fixmap.h> 10#include <asm/fixmap.h>
12#include <asm/apicdef.h> 11#include <asm/apicdef.h>
13#include <linux/kernel.h> 12#include <linux/kernel.h>
14#include <linux/smp.h>
15#include <linux/init.h> 13#include <linux/init.h>
16#include <linux/dmi.h> 14#include <linux/dmi.h>
17#include <asm/mach-bigsmp/mach_apic.h> 15#include <asm/bigsmp/apicdef.h>
18#include <asm/mach-bigsmp/mach_apicdef.h> 16#include <linux/smp.h>
19#include <asm/mach-bigsmp/mach_ipi.h> 17#include <asm/bigsmp/apic.h>
18#include <asm/bigsmp/ipi.h>
20#include <asm/mach-default/mach_mpparse.h> 19#include <asm/mach-default/mach_mpparse.h>
21 20
22static int dmi_bigsmp; /* can be set by dmi scanners */ 21static int dmi_bigsmp; /* can be set by dmi scanners */
diff --git a/arch/x86/mach-generic/es7000.c b/arch/x86/mach-generic/es7000.c
index 4742626f08c4..520cca0ee04e 100644
--- a/arch/x86/mach-generic/es7000.c
+++ b/arch/x86/mach-generic/es7000.c
@@ -4,20 +4,19 @@
4#define APIC_DEFINITION 1 4#define APIC_DEFINITION 1
5#include <linux/threads.h> 5#include <linux/threads.h>
6#include <linux/cpumask.h> 6#include <linux/cpumask.h>
7#include <asm/smp.h>
8#include <asm/mpspec.h> 7#include <asm/mpspec.h>
9#include <asm/genapic.h> 8#include <asm/genapic.h>
10#include <asm/fixmap.h> 9#include <asm/fixmap.h>
11#include <asm/apicdef.h> 10#include <asm/apicdef.h>
12#include <linux/kernel.h> 11#include <linux/kernel.h>
13#include <linux/string.h> 12#include <linux/string.h>
14#include <linux/smp.h>
15#include <linux/init.h> 13#include <linux/init.h>
16#include <asm/mach-es7000/mach_apicdef.h> 14#include <asm/es7000/apicdef.h>
17#include <asm/mach-es7000/mach_apic.h> 15#include <linux/smp.h>
18#include <asm/mach-es7000/mach_ipi.h> 16#include <asm/es7000/apic.h>
19#include <asm/mach-es7000/mach_mpparse.h> 17#include <asm/es7000/ipi.h>
20#include <asm/mach-es7000/mach_wakecpu.h> 18#include <asm/es7000/mpparse.h>
19#include <asm/es7000/wakecpu.h>
21 20
22static int probe_es7000(void) 21static int probe_es7000(void)
23{ 22{
diff --git a/arch/x86/mach-generic/numaq.c b/arch/x86/mach-generic/numaq.c
index 8091e68764c4..8cf58394975e 100644
--- a/arch/x86/mach-generic/numaq.c
+++ b/arch/x86/mach-generic/numaq.c
@@ -4,7 +4,6 @@
4#define APIC_DEFINITION 1 4#define APIC_DEFINITION 1
5#include <linux/threads.h> 5#include <linux/threads.h>
6#include <linux/cpumask.h> 6#include <linux/cpumask.h>
7#include <linux/smp.h>
8#include <asm/mpspec.h> 7#include <asm/mpspec.h>
9#include <asm/genapic.h> 8#include <asm/genapic.h>
10#include <asm/fixmap.h> 9#include <asm/fixmap.h>
@@ -12,11 +11,12 @@
12#include <linux/kernel.h> 11#include <linux/kernel.h>
13#include <linux/string.h> 12#include <linux/string.h>
14#include <linux/init.h> 13#include <linux/init.h>
15#include <asm/mach-numaq/mach_apic.h> 14#include <asm/numaq/apicdef.h>
16#include <asm/mach-numaq/mach_apicdef.h> 15#include <linux/smp.h>
17#include <asm/mach-numaq/mach_ipi.h> 16#include <asm/numaq/apic.h>
18#include <asm/mach-numaq/mach_mpparse.h> 17#include <asm/numaq/ipi.h>
19#include <asm/mach-numaq/mach_wakecpu.h> 18#include <asm/numaq/mpparse.h>
19#include <asm/numaq/wakecpu.h>
20#include <asm/numaq.h> 20#include <asm/numaq.h>
21 21
22static int mps_oem_check(struct mp_config_table *mpc, char *oem, 22static int mps_oem_check(struct mp_config_table *mpc, char *oem,
diff --git a/arch/x86/mach-generic/summit.c b/arch/x86/mach-generic/summit.c
index a97ea0f35b1e..6ad6b67a723d 100644
--- a/arch/x86/mach-generic/summit.c
+++ b/arch/x86/mach-generic/summit.c
@@ -4,19 +4,18 @@
4#define APIC_DEFINITION 1 4#define APIC_DEFINITION 1
5#include <linux/threads.h> 5#include <linux/threads.h>
6#include <linux/cpumask.h> 6#include <linux/cpumask.h>
7#include <asm/smp.h>
8#include <asm/mpspec.h> 7#include <asm/mpspec.h>
9#include <asm/genapic.h> 8#include <asm/genapic.h>
10#include <asm/fixmap.h> 9#include <asm/fixmap.h>
11#include <asm/apicdef.h> 10#include <asm/apicdef.h>
12#include <linux/kernel.h> 11#include <linux/kernel.h>
13#include <linux/string.h> 12#include <linux/string.h>
14#include <linux/smp.h>
15#include <linux/init.h> 13#include <linux/init.h>
16#include <asm/mach-summit/mach_apic.h> 14#include <asm/summit/apicdef.h>
17#include <asm/mach-summit/mach_apicdef.h> 15#include <linux/smp.h>
18#include <asm/mach-summit/mach_ipi.h> 16#include <asm/summit/apic.h>
19#include <asm/mach-summit/mach_mpparse.h> 17#include <asm/summit/ipi.h>
18#include <asm/summit/mpparse.h>
20 19
21static int probe_summit(void) 20static int probe_summit(void)
22{ 21{
diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
index fb30486c82f7..83e13f2d53d2 100644
--- a/arch/x86/mm/init_64.c
+++ b/arch/x86/mm/init_64.c
@@ -88,6 +88,62 @@ early_param("gbpages", parse_direct_gbpages_on);
88 88
89int after_bootmem; 89int after_bootmem;
90 90
91unsigned long __supported_pte_mask __read_mostly = ~0UL;
92EXPORT_SYMBOL_GPL(__supported_pte_mask);
93
94static int do_not_nx __cpuinitdata;
95
96/*
97 * noexec=on|off
98 * Control non-executable mappings for 64-bit processes.
99 *
100 * on Enable (default)
101 * off Disable
102 */
103static int __init nonx_setup(char *str)
104{
105 if (!str)
106 return -EINVAL;
107 if (!strncmp(str, "on", 2)) {
108 __supported_pte_mask |= _PAGE_NX;
109 do_not_nx = 0;
110 } else if (!strncmp(str, "off", 3)) {
111 do_not_nx = 1;
112 __supported_pte_mask &= ~_PAGE_NX;
113 }
114 return 0;
115}
116early_param("noexec", nonx_setup);
117
118void __cpuinit check_efer(void)
119{
120 unsigned long efer;
121
122 rdmsrl(MSR_EFER, efer);
123 if (!(efer & EFER_NX) || do_not_nx)
124 __supported_pte_mask &= ~_PAGE_NX;
125}
126
127int force_personality32;
128
129/*
130 * noexec32=on|off
131 * Control non executable heap for 32bit processes.
132 * To control the stack too use noexec=off
133 *
134 * on PROT_READ does not imply PROT_EXEC for 32-bit processes (default)
135 * off PROT_READ implies PROT_EXEC
136 */
137static int __init nonx32_setup(char *str)
138{
139 if (!strcmp(str, "on"))
140 force_personality32 &= ~READ_IMPLIES_EXEC;
141 else if (!strcmp(str, "off"))
142 force_personality32 |= READ_IMPLIES_EXEC;
143 return 1;
144}
145__setup("noexec32=", nonx32_setup);
146
91/* 147/*
92 * NOTE: This function is marked __ref because it calls __init function 148 * NOTE: This function is marked __ref because it calls __init function
93 * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0. 149 * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0.
diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
index 19af06927fbc..1d88d2b39771 100644
--- a/arch/x86/pci/acpi.c
+++ b/arch/x86/pci/acpi.c
@@ -250,10 +250,5 @@ int __init pci_acpi_init(void)
250 acpi_pci_irq_enable(dev); 250 acpi_pci_irq_enable(dev);
251 } 251 }
252 252
253#ifdef CONFIG_X86_IO_APIC
254 if (acpi_ioapic)
255 print_IO_APIC();
256#endif
257
258 return 0; 253 return 0;
259} 254}
diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c
index 8791fc55e715..844df0cbbd3e 100644
--- a/arch/x86/pci/i386.c
+++ b/arch/x86/pci/i386.c
@@ -33,6 +33,7 @@
33#include <linux/bootmem.h> 33#include <linux/bootmem.h>
34 34
35#include <asm/pat.h> 35#include <asm/pat.h>
36#include <asm/e820.h>
36 37
37#include "pci.h" 38#include "pci.h"
38 39
@@ -227,6 +228,8 @@ void __init pcibios_resource_survey(void)
227 pcibios_allocate_bus_resources(&pci_root_buses); 228 pcibios_allocate_bus_resources(&pci_root_buses);
228 pcibios_allocate_resources(0); 229 pcibios_allocate_resources(0);
229 pcibios_allocate_resources(1); 230 pcibios_allocate_resources(1);
231
232 e820_reserve_resources_late();
230} 233}
231 234
232/** 235/**
diff --git a/arch/x86/pci/mmconfig-shared.c b/arch/x86/pci/mmconfig-shared.c
index d9635764ce3d..654a2234f8f3 100644
--- a/arch/x86/pci/mmconfig-shared.c
+++ b/arch/x86/pci/mmconfig-shared.c
@@ -209,7 +209,7 @@ static int __init pci_mmcfg_check_hostbridge(void)
209 return name != NULL; 209 return name != NULL;
210} 210}
211 211
212static void __init pci_mmcfg_insert_resources(unsigned long resource_flags) 212static void __init pci_mmcfg_insert_resources(void)
213{ 213{
214#define PCI_MMCFG_RESOURCE_NAME_LEN 19 214#define PCI_MMCFG_RESOURCE_NAME_LEN 19
215 int i; 215 int i;
@@ -233,7 +233,7 @@ static void __init pci_mmcfg_insert_resources(unsigned long resource_flags)
233 cfg->pci_segment); 233 cfg->pci_segment);
234 res->start = cfg->address; 234 res->start = cfg->address;
235 res->end = res->start + (num_buses << 20) - 1; 235 res->end = res->start + (num_buses << 20) - 1;
236 res->flags = IORESOURCE_MEM | resource_flags; 236 res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
237 insert_resource(&iomem_resource, res); 237 insert_resource(&iomem_resource, res);
238 names += PCI_MMCFG_RESOURCE_NAME_LEN; 238 names += PCI_MMCFG_RESOURCE_NAME_LEN;
239 } 239 }
@@ -434,11 +434,9 @@ static void __init __pci_mmcfg_init(int early)
434 (pci_mmcfg_config[0].address == 0)) 434 (pci_mmcfg_config[0].address == 0))
435 return; 435 return;
436 436
437 if (pci_mmcfg_arch_init()) { 437 if (pci_mmcfg_arch_init())
438 if (known_bridge)
439 pci_mmcfg_insert_resources(IORESOURCE_BUSY);
440 pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF; 438 pci_probe = (pci_probe & ~PCI_PROBE_MASK) | PCI_PROBE_MMCONF;
441 } else { 439 else {
442 /* 440 /*
443 * Signal not to attempt to insert mmcfg resources because 441 * Signal not to attempt to insert mmcfg resources because
444 * the architecture mmcfg setup could not initialize. 442 * the architecture mmcfg setup could not initialize.
@@ -475,7 +473,7 @@ static int __init pci_mmcfg_late_insert_resources(void)
475 * marked so it won't cause request errors when __request_region is 473 * marked so it won't cause request errors when __request_region is
476 * called. 474 * called.
477 */ 475 */
478 pci_mmcfg_insert_resources(0); 476 pci_mmcfg_insert_resources();
479 477
480 return 0; 478 return 0;
481} 479}
diff --git a/arch/x86/power/cpu_32.c b/arch/x86/power/cpu_32.c
index d3e083dea720..274d06082f48 100644
--- a/arch/x86/power/cpu_32.c
+++ b/arch/x86/power/cpu_32.c
@@ -11,6 +11,7 @@
11#include <linux/suspend.h> 11#include <linux/suspend.h>
12#include <asm/mtrr.h> 12#include <asm/mtrr.h>
13#include <asm/mce.h> 13#include <asm/mce.h>
14#include <asm/xcr.h>
14 15
15static struct saved_context saved_context; 16static struct saved_context saved_context;
16 17
@@ -126,6 +127,12 @@ static void __restore_processor_state(struct saved_context *ctxt)
126 if (boot_cpu_has(X86_FEATURE_SEP)) 127 if (boot_cpu_has(X86_FEATURE_SEP))
127 enable_sep_cpu(); 128 enable_sep_cpu();
128 129
130 /*
131 * restore XCR0 for xsave capable cpu's.
132 */
133 if (cpu_has_xsave)
134 xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
135
129 fix_processor_context(); 136 fix_processor_context();
130 do_fpu_end(); 137 do_fpu_end();
131 mtrr_ap_init(); 138 mtrr_ap_init();
diff --git a/arch/x86/power/cpu_64.c b/arch/x86/power/cpu_64.c
index 66bdfb591fd8..e3b6cf70d62c 100644
--- a/arch/x86/power/cpu_64.c
+++ b/arch/x86/power/cpu_64.c
@@ -14,6 +14,7 @@
14#include <asm/page.h> 14#include <asm/page.h>
15#include <asm/pgtable.h> 15#include <asm/pgtable.h>
16#include <asm/mtrr.h> 16#include <asm/mtrr.h>
17#include <asm/xcr.h>
17 18
18static void fix_processor_context(void); 19static void fix_processor_context(void);
19 20
@@ -122,6 +123,12 @@ static void __restore_processor_state(struct saved_context *ctxt)
122 wrmsrl(MSR_GS_BASE, ctxt->gs_base); 123 wrmsrl(MSR_GS_BASE, ctxt->gs_base);
123 wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base); 124 wrmsrl(MSR_KERNEL_GS_BASE, ctxt->gs_kernel_base);
124 125
126 /*
127 * restore XCR0 for xsave capable cpu's.
128 */
129 if (cpu_has_xsave)
130 xsetbv(XCR_XFEATURE_ENABLED_MASK, pcntxt_mask);
131
125 fix_processor_context(); 132 fix_processor_context();
126 133
127 do_fpu_end(); 134 do_fpu_end();
diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c
index 7dcd321a0508..a27d562a9744 100644
--- a/arch/x86/xen/enlighten.c
+++ b/arch/x86/xen/enlighten.c
@@ -36,6 +36,7 @@
36#include <xen/hvc-console.h> 36#include <xen/hvc-console.h>
37 37
38#include <asm/paravirt.h> 38#include <asm/paravirt.h>
39#include <asm/apic.h>
39#include <asm/page.h> 40#include <asm/page.h>
40#include <asm/xen/hypercall.h> 41#include <asm/xen/hypercall.h>
41#include <asm/xen/hypervisor.h> 42#include <asm/xen/hypervisor.h>
@@ -580,16 +581,47 @@ static void xen_io_delay(void)
580} 581}
581 582
582#ifdef CONFIG_X86_LOCAL_APIC 583#ifdef CONFIG_X86_LOCAL_APIC
583static u32 xen_apic_read(unsigned long reg) 584static u32 xen_apic_read(u32 reg)
584{ 585{
585 return 0; 586 return 0;
586} 587}
587 588
588static void xen_apic_write(unsigned long reg, u32 val) 589static void xen_apic_write(u32 reg, u32 val)
589{ 590{
590 /* Warn to see if there's any stray references */ 591 /* Warn to see if there's any stray references */
591 WARN_ON(1); 592 WARN_ON(1);
592} 593}
594
595static u64 xen_apic_icr_read(void)
596{
597 return 0;
598}
599
600static void xen_apic_icr_write(u32 low, u32 id)
601{
602 /* Warn to see if there's any stray references */
603 WARN_ON(1);
604}
605
606static void xen_apic_wait_icr_idle(void)
607{
608 return;
609}
610
611static u32 xen_safe_apic_wait_icr_idle(void)
612{
613 return 0;
614}
615
616static struct apic_ops xen_basic_apic_ops = {
617 .read = xen_apic_read,
618 .write = xen_apic_write,
619 .icr_read = xen_apic_icr_read,
620 .icr_write = xen_apic_icr_write,
621 .wait_icr_idle = xen_apic_wait_icr_idle,
622 .safe_wait_icr_idle = xen_safe_apic_wait_icr_idle,
623};
624
593#endif 625#endif
594 626
595static void xen_flush_tlb(void) 627static void xen_flush_tlb(void)
@@ -1273,8 +1305,6 @@ static const struct pv_irq_ops xen_irq_ops __initdata = {
1273 1305
1274static const struct pv_apic_ops xen_apic_ops __initdata = { 1306static const struct pv_apic_ops xen_apic_ops __initdata = {
1275#ifdef CONFIG_X86_LOCAL_APIC 1307#ifdef CONFIG_X86_LOCAL_APIC
1276 .apic_write = xen_apic_write,
1277 .apic_read = xen_apic_read,
1278 .setup_boot_clock = paravirt_nop, 1308 .setup_boot_clock = paravirt_nop,
1279 .setup_secondary_clock = paravirt_nop, 1309 .setup_secondary_clock = paravirt_nop,
1280 .startup_ipi_hook = paravirt_nop, 1310 .startup_ipi_hook = paravirt_nop,
@@ -1677,6 +1707,13 @@ asmlinkage void __init xen_start_kernel(void)
1677 pv_apic_ops = xen_apic_ops; 1707 pv_apic_ops = xen_apic_ops;
1678 pv_mmu_ops = xen_mmu_ops; 1708 pv_mmu_ops = xen_mmu_ops;
1679 1709
1710#ifdef CONFIG_X86_LOCAL_APIC
1711 /*
1712 * set up the basic apic ops.
1713 */
1714 apic_ops = &xen_basic_apic_ops;
1715#endif
1716
1680 if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) { 1717 if (xen_feature(XENFEAT_mmu_pt_update_preserve_ad)) {
1681 pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start; 1718 pv_mmu_ops.ptep_modify_prot_start = xen_ptep_modify_prot_start;
1682 pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit; 1719 pv_mmu_ops.ptep_modify_prot_commit = xen_ptep_modify_prot_commit;
diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile
index 7d63f8ced24b..4b47f4ece5b7 100644
--- a/drivers/pci/Makefile
+++ b/drivers/pci/Makefile
@@ -26,6 +26,8 @@ obj-$(CONFIG_HT_IRQ) += htirq.o
26# Build Intel IOMMU support 26# Build Intel IOMMU support
27obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o 27obj-$(CONFIG_DMAR) += dmar.o iova.o intel-iommu.o
28 28
29obj-$(CONFIG_INTR_REMAP) += dmar.o intr_remapping.o
30
29# 31#
30# Some architectures use the generic PCI setup functions 32# Some architectures use the generic PCI setup functions
31# 33#
diff --git a/drivers/pci/dma_remapping.h b/drivers/pci/dma_remapping.h
new file mode 100644
index 000000000000..bff5c65f81dc
--- /dev/null
+++ b/drivers/pci/dma_remapping.h
@@ -0,0 +1,157 @@
1#ifndef _DMA_REMAPPING_H
2#define _DMA_REMAPPING_H
3
4/*
5 * We need a fixed PAGE_SIZE of 4K irrespective of
6 * arch PAGE_SIZE for IOMMU page tables.
7 */
8#define PAGE_SHIFT_4K (12)
9#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K)
10#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K)
11#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
12
13#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K)
14#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
15#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
16
17
18/*
19 * 0: Present
20 * 1-11: Reserved
21 * 12-63: Context Ptr (12 - (haw-1))
22 * 64-127: Reserved
23 */
24struct root_entry {
25 u64 val;
26 u64 rsvd1;
27};
28#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry))
29static inline bool root_present(struct root_entry *root)
30{
31 return (root->val & 1);
32}
33static inline void set_root_present(struct root_entry *root)
34{
35 root->val |= 1;
36}
37static inline void set_root_value(struct root_entry *root, unsigned long value)
38{
39 root->val |= value & PAGE_MASK_4K;
40}
41
42struct context_entry;
43static inline struct context_entry *
44get_context_addr_from_root(struct root_entry *root)
45{
46 return (struct context_entry *)
47 (root_present(root)?phys_to_virt(
48 root->val & PAGE_MASK_4K):
49 NULL);
50}
51
52/*
53 * low 64 bits:
54 * 0: present
55 * 1: fault processing disable
56 * 2-3: translation type
57 * 12-63: address space root
58 * high 64 bits:
59 * 0-2: address width
60 * 3-6: aval
61 * 8-23: domain id
62 */
63struct context_entry {
64 u64 lo;
65 u64 hi;
66};
67#define context_present(c) ((c).lo & 1)
68#define context_fault_disable(c) (((c).lo >> 1) & 1)
69#define context_translation_type(c) (((c).lo >> 2) & 3)
70#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
71#define context_address_width(c) ((c).hi & 7)
72#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
73
74#define context_set_present(c) do {(c).lo |= 1;} while (0)
75#define context_set_fault_enable(c) \
76 do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
77#define context_set_translation_type(c, val) \
78 do { \
79 (c).lo &= (((u64)-1) << 4) | 3; \
80 (c).lo |= ((val) & 3) << 2; \
81 } while (0)
82#define CONTEXT_TT_MULTI_LEVEL 0
83#define context_set_address_root(c, val) \
84 do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
85#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
86#define context_set_domain_id(c, val) \
87 do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
88#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
89
90/*
91 * 0: readable
92 * 1: writable
93 * 2-6: reserved
94 * 7: super page
95 * 8-11: available
96 * 12-63: Host physcial address
97 */
98struct dma_pte {
99 u64 val;
100};
101#define dma_clear_pte(p) do {(p).val = 0;} while (0)
102
103#define DMA_PTE_READ (1)
104#define DMA_PTE_WRITE (2)
105
106#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0)
107#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0)
108#define dma_set_pte_prot(p, prot) \
109 do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
110#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
111#define dma_set_pte_addr(p, addr) do {\
112 (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
113#define dma_pte_present(p) (((p).val & 3) != 0)
114
115struct intel_iommu;
116
117struct dmar_domain {
118 int id; /* domain id */
119 struct intel_iommu *iommu; /* back pointer to owning iommu */
120
121 struct list_head devices; /* all devices' list */
122 struct iova_domain iovad; /* iova's that belong to this domain */
123
124 struct dma_pte *pgd; /* virtual address */
125 spinlock_t mapping_lock; /* page table lock */
126 int gaw; /* max guest address width */
127
128 /* adjusted guest address width, 0 is level 2 30-bit */
129 int agaw;
130
131#define DOMAIN_FLAG_MULTIPLE_DEVICES 1
132 int flags;
133};
134
135/* PCI domain-device relationship */
136struct device_domain_info {
137 struct list_head link; /* link to domain siblings */
138 struct list_head global; /* link to global list */
139 u8 bus; /* PCI bus numer */
140 u8 devfn; /* PCI devfn number */
141 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
142 struct dmar_domain *domain; /* pointer to domain */
143};
144
145extern int init_dmars(void);
146extern void free_dmar_iommu(struct intel_iommu *iommu);
147
148extern int dmar_disabled;
149
150#ifndef CONFIG_DMAR_GFX_WA
151static inline void iommu_prepare_gfx_mapping(void)
152{
153 return;
154}
155#endif /* !CONFIG_DMAR_GFX_WA */
156
157#endif
diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c
index 8bf86ae2333f..bd2c01674f5e 100644
--- a/drivers/pci/dmar.c
+++ b/drivers/pci/dmar.c
@@ -19,13 +19,16 @@
19 * Author: Shaohua Li <shaohua.li@intel.com> 19 * Author: Shaohua Li <shaohua.li@intel.com>
20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com> 20 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21 * 21 *
22 * This file implements early detection/parsing of DMA Remapping Devices 22 * This file implements early detection/parsing of Remapping Devices
23 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI 23 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
24 * tables. 24 * tables.
25 *
26 * These routines are used by both DMA-remapping and Interrupt-remapping
25 */ 27 */
26 28
27#include <linux/pci.h> 29#include <linux/pci.h>
28#include <linux/dmar.h> 30#include <linux/dmar.h>
31#include <linux/timer.h>
29#include "iova.h" 32#include "iova.h"
30#include "intel-iommu.h" 33#include "intel-iommu.h"
31 34
@@ -37,7 +40,6 @@
37 * these units are not supported by the architecture. 40 * these units are not supported by the architecture.
38 */ 41 */
39LIST_HEAD(dmar_drhd_units); 42LIST_HEAD(dmar_drhd_units);
40LIST_HEAD(dmar_rmrr_units);
41 43
42static struct acpi_table_header * __initdata dmar_tbl; 44static struct acpi_table_header * __initdata dmar_tbl;
43 45
@@ -53,11 +55,6 @@ static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
53 list_add(&drhd->list, &dmar_drhd_units); 55 list_add(&drhd->list, &dmar_drhd_units);
54} 56}
55 57
56static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
57{
58 list_add(&rmrr->list, &dmar_rmrr_units);
59}
60
61static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, 58static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope,
62 struct pci_dev **dev, u16 segment) 59 struct pci_dev **dev, u16 segment)
63{ 60{
@@ -172,19 +169,37 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
172 struct acpi_dmar_hardware_unit *drhd; 169 struct acpi_dmar_hardware_unit *drhd;
173 struct dmar_drhd_unit *dmaru; 170 struct dmar_drhd_unit *dmaru;
174 int ret = 0; 171 int ret = 0;
175 static int include_all;
176 172
177 dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL); 173 dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
178 if (!dmaru) 174 if (!dmaru)
179 return -ENOMEM; 175 return -ENOMEM;
180 176
177 dmaru->hdr = header;
181 drhd = (struct acpi_dmar_hardware_unit *)header; 178 drhd = (struct acpi_dmar_hardware_unit *)header;
182 dmaru->reg_base_addr = drhd->address; 179 dmaru->reg_base_addr = drhd->address;
183 dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */ 180 dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
184 181
182 ret = alloc_iommu(dmaru);
183 if (ret) {
184 kfree(dmaru);
185 return ret;
186 }
187 dmar_register_drhd_unit(dmaru);
188 return 0;
189}
190
191static int __init
192dmar_parse_dev(struct dmar_drhd_unit *dmaru)
193{
194 struct acpi_dmar_hardware_unit *drhd;
195 static int include_all;
196 int ret;
197
198 drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr;
199
185 if (!dmaru->include_all) 200 if (!dmaru->include_all)
186 ret = dmar_parse_dev_scope((void *)(drhd + 1), 201 ret = dmar_parse_dev_scope((void *)(drhd + 1),
187 ((void *)drhd) + header->length, 202 ((void *)drhd) + drhd->header.length,
188 &dmaru->devices_cnt, &dmaru->devices, 203 &dmaru->devices_cnt, &dmaru->devices,
189 drhd->segment); 204 drhd->segment);
190 else { 205 else {
@@ -197,37 +212,59 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
197 include_all = 1; 212 include_all = 1;
198 } 213 }
199 214
200 if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) 215 if (ret || (dmaru->devices_cnt == 0 && !dmaru->include_all)) {
216 list_del(&dmaru->list);
201 kfree(dmaru); 217 kfree(dmaru);
202 else 218 }
203 dmar_register_drhd_unit(dmaru);
204 return ret; 219 return ret;
205} 220}
206 221
222#ifdef CONFIG_DMAR
223LIST_HEAD(dmar_rmrr_units);
224
225static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr)
226{
227 list_add(&rmrr->list, &dmar_rmrr_units);
228}
229
230
207static int __init 231static int __init
208dmar_parse_one_rmrr(struct acpi_dmar_header *header) 232dmar_parse_one_rmrr(struct acpi_dmar_header *header)
209{ 233{
210 struct acpi_dmar_reserved_memory *rmrr; 234 struct acpi_dmar_reserved_memory *rmrr;
211 struct dmar_rmrr_unit *rmrru; 235 struct dmar_rmrr_unit *rmrru;
212 int ret = 0;
213 236
214 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL); 237 rmrru = kzalloc(sizeof(*rmrru), GFP_KERNEL);
215 if (!rmrru) 238 if (!rmrru)
216 return -ENOMEM; 239 return -ENOMEM;
217 240
241 rmrru->hdr = header;
218 rmrr = (struct acpi_dmar_reserved_memory *)header; 242 rmrr = (struct acpi_dmar_reserved_memory *)header;
219 rmrru->base_address = rmrr->base_address; 243 rmrru->base_address = rmrr->base_address;
220 rmrru->end_address = rmrr->end_address; 244 rmrru->end_address = rmrr->end_address;
245
246 dmar_register_rmrr_unit(rmrru);
247 return 0;
248}
249
250static int __init
251rmrr_parse_dev(struct dmar_rmrr_unit *rmrru)
252{
253 struct acpi_dmar_reserved_memory *rmrr;
254 int ret;
255
256 rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr;
221 ret = dmar_parse_dev_scope((void *)(rmrr + 1), 257 ret = dmar_parse_dev_scope((void *)(rmrr + 1),
222 ((void *)rmrr) + header->length, 258 ((void *)rmrr) + rmrr->header.length,
223 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment); 259 &rmrru->devices_cnt, &rmrru->devices, rmrr->segment);
224 260
225 if (ret || (rmrru->devices_cnt == 0)) 261 if (ret || (rmrru->devices_cnt == 0)) {
262 list_del(&rmrru->list);
226 kfree(rmrru); 263 kfree(rmrru);
227 else 264 }
228 dmar_register_rmrr_unit(rmrru);
229 return ret; 265 return ret;
230} 266}
267#endif
231 268
232static void __init 269static void __init
233dmar_table_print_dmar_entry(struct acpi_dmar_header *header) 270dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
@@ -252,6 +289,7 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
252 } 289 }
253} 290}
254 291
292
255/** 293/**
256 * parse_dmar_table - parses the DMA reporting table 294 * parse_dmar_table - parses the DMA reporting table
257 */ 295 */
@@ -284,7 +322,9 @@ parse_dmar_table(void)
284 ret = dmar_parse_one_drhd(entry_header); 322 ret = dmar_parse_one_drhd(entry_header);
285 break; 323 break;
286 case ACPI_DMAR_TYPE_RESERVED_MEMORY: 324 case ACPI_DMAR_TYPE_RESERVED_MEMORY:
325#ifdef CONFIG_DMAR
287 ret = dmar_parse_one_rmrr(entry_header); 326 ret = dmar_parse_one_rmrr(entry_header);
327#endif
288 break; 328 break;
289 default: 329 default:
290 printk(KERN_WARNING PREFIX 330 printk(KERN_WARNING PREFIX
@@ -300,15 +340,77 @@ parse_dmar_table(void)
300 return ret; 340 return ret;
301} 341}
302 342
343int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
344 struct pci_dev *dev)
345{
346 int index;
347
348 while (dev) {
349 for (index = 0; index < cnt; index++)
350 if (dev == devices[index])
351 return 1;
303 352
304int __init dmar_table_init(void) 353 /* Check our parent */
354 dev = dev->bus->self;
355 }
356
357 return 0;
358}
359
360struct dmar_drhd_unit *
361dmar_find_matched_drhd_unit(struct pci_dev *dev)
305{ 362{
363 struct dmar_drhd_unit *drhd = NULL;
364
365 list_for_each_entry(drhd, &dmar_drhd_units, list) {
366 if (drhd->include_all || dmar_pci_device_match(drhd->devices,
367 drhd->devices_cnt, dev))
368 return drhd;
369 }
370
371 return NULL;
372}
373
374int __init dmar_dev_scope_init(void)
375{
376 struct dmar_drhd_unit *drhd;
377 int ret = -ENODEV;
378
379 for_each_drhd_unit(drhd) {
380 ret = dmar_parse_dev(drhd);
381 if (ret)
382 return ret;
383 }
384
385#ifdef CONFIG_DMAR
386 {
387 struct dmar_rmrr_unit *rmrr;
388 for_each_rmrr_units(rmrr) {
389 ret = rmrr_parse_dev(rmrr);
390 if (ret)
391 return ret;
392 }
393 }
394#endif
395
396 return ret;
397}
306 398
399
400int __init dmar_table_init(void)
401{
402 static int dmar_table_initialized;
307 int ret; 403 int ret;
308 404
405 if (dmar_table_initialized)
406 return 0;
407
408 dmar_table_initialized = 1;
409
309 ret = parse_dmar_table(); 410 ret = parse_dmar_table();
310 if (ret) { 411 if (ret) {
311 printk(KERN_INFO PREFIX "parse DMAR table failure.\n"); 412 if (ret != -ENODEV)
413 printk(KERN_INFO PREFIX "parse DMAR table failure.\n");
312 return ret; 414 return ret;
313 } 415 }
314 416
@@ -317,9 +419,14 @@ int __init dmar_table_init(void)
317 return -ENODEV; 419 return -ENODEV;
318 } 420 }
319 421
422#ifdef CONFIG_DMAR
320 if (list_empty(&dmar_rmrr_units)) 423 if (list_empty(&dmar_rmrr_units))
321 printk(KERN_INFO PREFIX "No RMRR found\n"); 424 printk(KERN_INFO PREFIX "No RMRR found\n");
425#endif
322 426
427#ifdef CONFIG_INTR_REMAP
428 parse_ioapics_under_ir();
429#endif
323 return 0; 430 return 0;
324} 431}
325 432
@@ -341,3 +448,255 @@ int __init early_dmar_detect(void)
341 448
342 return (ACPI_SUCCESS(status) ? 1 : 0); 449 return (ACPI_SUCCESS(status) ? 1 : 0);
343} 450}
451
452void __init detect_intel_iommu(void)
453{
454 int ret;
455
456 ret = early_dmar_detect();
457
458#ifdef CONFIG_DMAR
459 {
460 struct acpi_table_dmar *dmar;
461 /*
462 * for now we will disable dma-remapping when interrupt
463 * remapping is enabled.
464 * When support for queued invalidation for IOTLB invalidation
465 * is added, we will not need this any more.
466 */
467 dmar = (struct acpi_table_dmar *) dmar_tbl;
468 if (ret && cpu_has_x2apic && dmar->flags & 0x1) {
469 printk(KERN_INFO
470 "Queued invalidation will be enabled to support "
471 "x2apic and Intr-remapping.\n");
472 printk(KERN_INFO
473 "Disabling IOMMU detection, because of missing "
474 "queued invalidation support for IOTLB "
475 "invalidation\n");
476 printk(KERN_INFO
477 "Use \"nox2apic\", if you want to use Intel "
478 " IOMMU for DMA-remapping and don't care about "
479 " x2apic support\n");
480
481 dmar_disabled = 1;
482 return;
483 }
484
485 if (ret && !no_iommu && !iommu_detected && !swiotlb &&
486 !dmar_disabled)
487 iommu_detected = 1;
488 }
489#endif
490}
491
492
493int alloc_iommu(struct dmar_drhd_unit *drhd)
494{
495 struct intel_iommu *iommu;
496 int map_size;
497 u32 ver;
498 static int iommu_allocated = 0;
499
500 iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
501 if (!iommu)
502 return -ENOMEM;
503
504 iommu->seq_id = iommu_allocated++;
505
506 iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
507 if (!iommu->reg) {
508 printk(KERN_ERR "IOMMU: can't map the region\n");
509 goto error;
510 }
511 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
512 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
513
514 /* the registers might be more than one page */
515 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
516 cap_max_fault_reg_offset(iommu->cap));
517 map_size = PAGE_ALIGN_4K(map_size);
518 if (map_size > PAGE_SIZE_4K) {
519 iounmap(iommu->reg);
520 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
521 if (!iommu->reg) {
522 printk(KERN_ERR "IOMMU: can't map the region\n");
523 goto error;
524 }
525 }
526
527 ver = readl(iommu->reg + DMAR_VER_REG);
528 pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
529 drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
530 iommu->cap, iommu->ecap);
531
532 spin_lock_init(&iommu->register_lock);
533
534 drhd->iommu = iommu;
535 return 0;
536error:
537 kfree(iommu);
538 return -1;
539}
540
541void free_iommu(struct intel_iommu *iommu)
542{
543 if (!iommu)
544 return;
545
546#ifdef CONFIG_DMAR
547 free_dmar_iommu(iommu);
548#endif
549
550 if (iommu->reg)
551 iounmap(iommu->reg);
552 kfree(iommu);
553}
554
555/*
556 * Reclaim all the submitted descriptors which have completed its work.
557 */
558static inline void reclaim_free_desc(struct q_inval *qi)
559{
560 while (qi->desc_status[qi->free_tail] == QI_DONE) {
561 qi->desc_status[qi->free_tail] = QI_FREE;
562 qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
563 qi->free_cnt++;
564 }
565}
566
567/*
568 * Submit the queued invalidation descriptor to the remapping
569 * hardware unit and wait for its completion.
570 */
571void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
572{
573 struct q_inval *qi = iommu->qi;
574 struct qi_desc *hw, wait_desc;
575 int wait_index, index;
576 unsigned long flags;
577
578 if (!qi)
579 return;
580
581 hw = qi->desc;
582
583 spin_lock(&qi->q_lock);
584 while (qi->free_cnt < 3) {
585 spin_unlock(&qi->q_lock);
586 cpu_relax();
587 spin_lock(&qi->q_lock);
588 }
589
590 index = qi->free_head;
591 wait_index = (index + 1) % QI_LENGTH;
592
593 qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;
594
595 hw[index] = *desc;
596
597 wait_desc.low = QI_IWD_STATUS_DATA(2) | QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
598 wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);
599
600 hw[wait_index] = wait_desc;
601
602 __iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
603 __iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));
604
605 qi->free_head = (qi->free_head + 2) % QI_LENGTH;
606 qi->free_cnt -= 2;
607
608 spin_lock_irqsave(&iommu->register_lock, flags);
609 /*
610 * update the HW tail register indicating the presence of
611 * new descriptors.
612 */
613 writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG);
614 spin_unlock_irqrestore(&iommu->register_lock, flags);
615
616 while (qi->desc_status[wait_index] != QI_DONE) {
617 spin_unlock(&qi->q_lock);
618 cpu_relax();
619 spin_lock(&qi->q_lock);
620 }
621
622 qi->desc_status[index] = QI_DONE;
623
624 reclaim_free_desc(qi);
625 spin_unlock(&qi->q_lock);
626}
627
628/*
629 * Flush the global interrupt entry cache.
630 */
631void qi_global_iec(struct intel_iommu *iommu)
632{
633 struct qi_desc desc;
634
635 desc.low = QI_IEC_TYPE;
636 desc.high = 0;
637
638 qi_submit_sync(&desc, iommu);
639}
640
641/*
642 * Enable Queued Invalidation interface. This is a must to support
643 * interrupt-remapping. Also used by DMA-remapping, which replaces
644 * register based IOTLB invalidation.
645 */
646int dmar_enable_qi(struct intel_iommu *iommu)
647{
648 u32 cmd, sts;
649 unsigned long flags;
650 struct q_inval *qi;
651
652 if (!ecap_qis(iommu->ecap))
653 return -ENOENT;
654
655 /*
656 * queued invalidation is already setup and enabled.
657 */
658 if (iommu->qi)
659 return 0;
660
661 iommu->qi = kmalloc(sizeof(*qi), GFP_KERNEL);
662 if (!iommu->qi)
663 return -ENOMEM;
664
665 qi = iommu->qi;
666
667 qi->desc = (void *)(get_zeroed_page(GFP_KERNEL));
668 if (!qi->desc) {
669 kfree(qi);
670 iommu->qi = 0;
671 return -ENOMEM;
672 }
673
674 qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_KERNEL);
675 if (!qi->desc_status) {
676 free_page((unsigned long) qi->desc);
677 kfree(qi);
678 iommu->qi = 0;
679 return -ENOMEM;
680 }
681
682 qi->free_head = qi->free_tail = 0;
683 qi->free_cnt = QI_LENGTH;
684
685 spin_lock_init(&qi->q_lock);
686
687 spin_lock_irqsave(&iommu->register_lock, flags);
688 /* write zero to the tail reg */
689 writel(0, iommu->reg + DMAR_IQT_REG);
690
691 dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));
692
693 cmd = iommu->gcmd | DMA_GCMD_QIE;
694 iommu->gcmd |= DMA_GCMD_QIE;
695 writel(cmd, iommu->reg + DMAR_GCMD_REG);
696
697 /* Make sure hardware complete it */
698 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);
699 spin_unlock_irqrestore(&iommu->register_lock, flags);
700
701 return 0;
702}
diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c
index 6c4c1c3c50ee..389fdd6f4a9f 100644
--- a/drivers/pci/intel-iommu.c
+++ b/drivers/pci/intel-iommu.c
@@ -49,8 +49,6 @@
49 49
50#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48 50#define DEFAULT_DOMAIN_ADDRESS_WIDTH 48
51 51
52#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */
53
54#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1) 52#define DOMAIN_MAX_ADDR(gaw) ((((u64)1) << gaw) - 1)
55 53
56 54
@@ -58,8 +56,6 @@ static void flush_unmaps_timeout(unsigned long data);
58 56
59DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); 57DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0);
60 58
61static struct intel_iommu *g_iommus;
62
63#define HIGH_WATER_MARK 250 59#define HIGH_WATER_MARK 250
64struct deferred_flush_tables { 60struct deferred_flush_tables {
65 int next; 61 int next;
@@ -185,13 +181,6 @@ void free_iova_mem(struct iova *iova)
185 kmem_cache_free(iommu_iova_cache, iova); 181 kmem_cache_free(iommu_iova_cache, iova);
186} 182}
187 183
188static inline void __iommu_flush_cache(
189 struct intel_iommu *iommu, void *addr, int size)
190{
191 if (!ecap_coherent(iommu->ecap))
192 clflush_cache_range(addr, size);
193}
194
195/* Gets context entry for a given bus and devfn */ 184/* Gets context entry for a given bus and devfn */
196static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, 185static struct context_entry * device_to_context_entry(struct intel_iommu *iommu,
197 u8 bus, u8 devfn) 186 u8 bus, u8 devfn)
@@ -488,19 +477,6 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu)
488 return 0; 477 return 0;
489} 478}
490 479
491#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
492{\
493 cycles_t start_time = get_cycles();\
494 while (1) {\
495 sts = op (iommu->reg + offset);\
496 if (cond)\
497 break;\
498 if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
499 panic("DMAR hardware is malfunctioning\n");\
500 cpu_relax();\
501 }\
502}
503
504static void iommu_set_root_entry(struct intel_iommu *iommu) 480static void iommu_set_root_entry(struct intel_iommu *iommu)
505{ 481{
506 void *addr; 482 void *addr;
@@ -990,6 +966,8 @@ static int iommu_init_domains(struct intel_iommu *iommu)
990 return -ENOMEM; 966 return -ENOMEM;
991 } 967 }
992 968
969 spin_lock_init(&iommu->lock);
970
993 /* 971 /*
994 * if Caching mode is set, then invalid translations are tagged 972 * if Caching mode is set, then invalid translations are tagged
995 * with domainid 0. Hence we need to pre-allocate it. 973 * with domainid 0. Hence we need to pre-allocate it.
@@ -998,62 +976,15 @@ static int iommu_init_domains(struct intel_iommu *iommu)
998 set_bit(0, iommu->domain_ids); 976 set_bit(0, iommu->domain_ids);
999 return 0; 977 return 0;
1000} 978}
1001static struct intel_iommu *alloc_iommu(struct intel_iommu *iommu,
1002 struct dmar_drhd_unit *drhd)
1003{
1004 int ret;
1005 int map_size;
1006 u32 ver;
1007
1008 iommu->reg = ioremap(drhd->reg_base_addr, PAGE_SIZE_4K);
1009 if (!iommu->reg) {
1010 printk(KERN_ERR "IOMMU: can't map the region\n");
1011 goto error;
1012 }
1013 iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
1014 iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);
1015
1016 /* the registers might be more than one page */
1017 map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
1018 cap_max_fault_reg_offset(iommu->cap));
1019 map_size = PAGE_ALIGN_4K(map_size);
1020 if (map_size > PAGE_SIZE_4K) {
1021 iounmap(iommu->reg);
1022 iommu->reg = ioremap(drhd->reg_base_addr, map_size);
1023 if (!iommu->reg) {
1024 printk(KERN_ERR "IOMMU: can't map the region\n");
1025 goto error;
1026 }
1027 }
1028
1029 ver = readl(iommu->reg + DMAR_VER_REG);
1030 pr_debug("IOMMU %llx: ver %d:%d cap %llx ecap %llx\n",
1031 drhd->reg_base_addr, DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
1032 iommu->cap, iommu->ecap);
1033 ret = iommu_init_domains(iommu);
1034 if (ret)
1035 goto error_unmap;
1036 spin_lock_init(&iommu->lock);
1037 spin_lock_init(&iommu->register_lock);
1038 979
1039 drhd->iommu = iommu;
1040 return iommu;
1041error_unmap:
1042 iounmap(iommu->reg);
1043error:
1044 kfree(iommu);
1045 return NULL;
1046}
1047 980
1048static void domain_exit(struct dmar_domain *domain); 981static void domain_exit(struct dmar_domain *domain);
1049static void free_iommu(struct intel_iommu *iommu) 982
983void free_dmar_iommu(struct intel_iommu *iommu)
1050{ 984{
1051 struct dmar_domain *domain; 985 struct dmar_domain *domain;
1052 int i; 986 int i;
1053 987
1054 if (!iommu)
1055 return;
1056
1057 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap)); 988 i = find_first_bit(iommu->domain_ids, cap_ndoms(iommu->cap));
1058 for (; i < cap_ndoms(iommu->cap); ) { 989 for (; i < cap_ndoms(iommu->cap); ) {
1059 domain = iommu->domains[i]; 990 domain = iommu->domains[i];
@@ -1078,10 +1009,6 @@ static void free_iommu(struct intel_iommu *iommu)
1078 1009
1079 /* free context mapping */ 1010 /* free context mapping */
1080 free_context_table(iommu); 1011 free_context_table(iommu);
1081
1082 if (iommu->reg)
1083 iounmap(iommu->reg);
1084 kfree(iommu);
1085} 1012}
1086 1013
1087static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu) 1014static struct dmar_domain * iommu_alloc_domain(struct intel_iommu *iommu)
@@ -1426,37 +1353,6 @@ find_domain(struct pci_dev *pdev)
1426 return NULL; 1353 return NULL;
1427} 1354}
1428 1355
1429static int dmar_pci_device_match(struct pci_dev *devices[], int cnt,
1430 struct pci_dev *dev)
1431{
1432 int index;
1433
1434 while (dev) {
1435 for (index = 0; index < cnt; index++)
1436 if (dev == devices[index])
1437 return 1;
1438
1439 /* Check our parent */
1440 dev = dev->bus->self;
1441 }
1442
1443 return 0;
1444}
1445
1446static struct dmar_drhd_unit *
1447dmar_find_matched_drhd_unit(struct pci_dev *dev)
1448{
1449 struct dmar_drhd_unit *drhd = NULL;
1450
1451 list_for_each_entry(drhd, &dmar_drhd_units, list) {
1452 if (drhd->include_all || dmar_pci_device_match(drhd->devices,
1453 drhd->devices_cnt, dev))
1454 return drhd;
1455 }
1456
1457 return NULL;
1458}
1459
1460/* domain is initialized */ 1356/* domain is initialized */
1461static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) 1357static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw)
1462{ 1358{
@@ -1729,8 +1625,6 @@ int __init init_dmars(void)
1729 * endfor 1625 * endfor
1730 */ 1626 */
1731 for_each_drhd_unit(drhd) { 1627 for_each_drhd_unit(drhd) {
1732 if (drhd->ignored)
1733 continue;
1734 g_num_of_iommus++; 1628 g_num_of_iommus++;
1735 /* 1629 /*
1736 * lock not needed as this is only incremented in the single 1630 * lock not needed as this is only incremented in the single
@@ -1739,12 +1633,6 @@ int __init init_dmars(void)
1739 */ 1633 */
1740 } 1634 }
1741 1635
1742 g_iommus = kzalloc(g_num_of_iommus * sizeof(*iommu), GFP_KERNEL);
1743 if (!g_iommus) {
1744 ret = -ENOMEM;
1745 goto error;
1746 }
1747
1748 deferred_flush = kzalloc(g_num_of_iommus * 1636 deferred_flush = kzalloc(g_num_of_iommus *
1749 sizeof(struct deferred_flush_tables), GFP_KERNEL); 1637 sizeof(struct deferred_flush_tables), GFP_KERNEL);
1750 if (!deferred_flush) { 1638 if (!deferred_flush) {
@@ -1752,16 +1640,15 @@ int __init init_dmars(void)
1752 goto error; 1640 goto error;
1753 } 1641 }
1754 1642
1755 i = 0;
1756 for_each_drhd_unit(drhd) { 1643 for_each_drhd_unit(drhd) {
1757 if (drhd->ignored) 1644 if (drhd->ignored)
1758 continue; 1645 continue;
1759 iommu = alloc_iommu(&g_iommus[i], drhd); 1646
1760 i++; 1647 iommu = drhd->iommu;
1761 if (!iommu) { 1648
1762 ret = -ENOMEM; 1649 ret = iommu_init_domains(iommu);
1650 if (ret)
1763 goto error; 1651 goto error;
1764 }
1765 1652
1766 /* 1653 /*
1767 * TBD: 1654 * TBD:
@@ -1845,7 +1732,6 @@ error:
1845 iommu = drhd->iommu; 1732 iommu = drhd->iommu;
1846 free_iommu(iommu); 1733 free_iommu(iommu);
1847 } 1734 }
1848 kfree(g_iommus);
1849 return ret; 1735 return ret;
1850} 1736}
1851 1737
@@ -2002,7 +1888,10 @@ static void flush_unmaps(void)
2002 /* just flush them all */ 1888 /* just flush them all */
2003 for (i = 0; i < g_num_of_iommus; i++) { 1889 for (i = 0; i < g_num_of_iommus; i++) {
2004 if (deferred_flush[i].next) { 1890 if (deferred_flush[i].next) {
2005 iommu_flush_iotlb_global(&g_iommus[i], 0); 1891 struct intel_iommu *iommu =
1892 deferred_flush[i].domain[0]->iommu;
1893
1894 iommu_flush_iotlb_global(iommu, 0);
2006 for (j = 0; j < deferred_flush[i].next; j++) { 1895 for (j = 0; j < deferred_flush[i].next; j++) {
2007 __free_iova(&deferred_flush[i].domain[j]->iovad, 1896 __free_iova(&deferred_flush[i].domain[j]->iovad,
2008 deferred_flush[i].iova[j]); 1897 deferred_flush[i].iova[j]);
@@ -2032,7 +1921,8 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova)
2032 if (list_size == HIGH_WATER_MARK) 1921 if (list_size == HIGH_WATER_MARK)
2033 flush_unmaps(); 1922 flush_unmaps();
2034 1923
2035 iommu_id = dom->iommu - g_iommus; 1924 iommu_id = dom->iommu->seq_id;
1925
2036 next = deferred_flush[iommu_id].next; 1926 next = deferred_flush[iommu_id].next;
2037 deferred_flush[iommu_id].domain[next] = dom; 1927 deferred_flush[iommu_id].domain[next] = dom;
2038 deferred_flush[iommu_id].iova[next] = iova; 1928 deferred_flush[iommu_id].iova[next] = iova;
@@ -2348,38 +2238,6 @@ static void __init iommu_exit_mempool(void)
2348 2238
2349} 2239}
2350 2240
2351static int blacklist_iommu(const struct dmi_system_id *id)
2352{
2353 printk(KERN_INFO "%s detected; disabling IOMMU\n",
2354 id->ident);
2355 dmar_disabled = 1;
2356 return 0;
2357}
2358
2359static struct dmi_system_id __initdata intel_iommu_dmi_table[] = {
2360 { /* Some DG33BU BIOS revisions advertised non-existent VT-d */
2361 .callback = blacklist_iommu,
2362 .ident = "Intel DG33BU",
2363 { DMI_MATCH(DMI_BOARD_VENDOR, "Intel Corporation"),
2364 DMI_MATCH(DMI_BOARD_NAME, "DG33BU"),
2365 }
2366 },
2367 { }
2368};
2369
2370
2371void __init detect_intel_iommu(void)
2372{
2373 if (swiotlb || no_iommu || iommu_detected || dmar_disabled)
2374 return;
2375 if (early_dmar_detect()) {
2376 dmi_check_system(intel_iommu_dmi_table);
2377 if (dmar_disabled)
2378 return;
2379 iommu_detected = 1;
2380 }
2381}
2382
2383static void __init init_no_remapping_devices(void) 2241static void __init init_no_remapping_devices(void)
2384{ 2242{
2385 struct dmar_drhd_unit *drhd; 2243 struct dmar_drhd_unit *drhd;
@@ -2426,12 +2284,19 @@ int __init intel_iommu_init(void)
2426{ 2284{
2427 int ret = 0; 2285 int ret = 0;
2428 2286
2429 if (no_iommu || swiotlb || dmar_disabled)
2430 return -ENODEV;
2431
2432 if (dmar_table_init()) 2287 if (dmar_table_init())
2433 return -ENODEV; 2288 return -ENODEV;
2434 2289
2290 if (dmar_dev_scope_init())
2291 return -ENODEV;
2292
2293 /*
2294 * Check the need for DMA-remapping initialization now.
2295 * Above initialization will also be used by Interrupt-remapping.
2296 */
2297 if (no_iommu || swiotlb || dmar_disabled)
2298 return -ENODEV;
2299
2435 iommu_init_mempool(); 2300 iommu_init_mempool();
2436 dmar_init_reserved_ranges(); 2301 dmar_init_reserved_ranges();
2437 2302
diff --git a/drivers/pci/intel-iommu.h b/drivers/pci/intel-iommu.h
index afc0ad96122e..2142c01e0143 100644
--- a/drivers/pci/intel-iommu.h
+++ b/drivers/pci/intel-iommu.h
@@ -27,19 +27,8 @@
27#include <linux/sysdev.h> 27#include <linux/sysdev.h>
28#include "iova.h" 28#include "iova.h"
29#include <linux/io.h> 29#include <linux/io.h>
30 30#include <asm/cacheflush.h>
31/* 31#include "dma_remapping.h"
32 * We need a fixed PAGE_SIZE of 4K irrespective of
33 * arch PAGE_SIZE for IOMMU page tables.
34 */
35#define PAGE_SHIFT_4K (12)
36#define PAGE_SIZE_4K (1UL << PAGE_SHIFT_4K)
37#define PAGE_MASK_4K (((u64)-1) << PAGE_SHIFT_4K)
38#define PAGE_ALIGN_4K(addr) (((addr) + PAGE_SIZE_4K - 1) & PAGE_MASK_4K)
39
40#define IOVA_PFN(addr) ((addr) >> PAGE_SHIFT_4K)
41#define DMA_32BIT_PFN IOVA_PFN(DMA_32BIT_MASK)
42#define DMA_64BIT_PFN IOVA_PFN(DMA_64BIT_MASK)
43 32
44/* 33/*
45 * Intel IOMMU register specification per version 1.0 public spec. 34 * Intel IOMMU register specification per version 1.0 public spec.
@@ -63,6 +52,11 @@
63#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */ 52#define DMAR_PLMLIMIT_REG 0x6c /* PMRR low limit */
64#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */ 53#define DMAR_PHMBASE_REG 0x70 /* pmrr high base addr */
65#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */ 54#define DMAR_PHMLIMIT_REG 0x78 /* pmrr high limit */
55#define DMAR_IQH_REG 0x80 /* Invalidation queue head register */
56#define DMAR_IQT_REG 0x88 /* Invalidation queue tail register */
57#define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */
58#define DMAR_ICS_REG 0x98 /* Invalidation complete status register */
59#define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */
66 60
67#define OFFSET_STRIDE (9) 61#define OFFSET_STRIDE (9)
68/* 62/*
@@ -126,6 +120,10 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
126#define ecap_max_iotlb_offset(e) \ 120#define ecap_max_iotlb_offset(e) \
127 (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16) 121 (ecap_iotlb_offset(e) + ecap_niotlb_iunits(e) * 16)
128#define ecap_coherent(e) ((e) & 0x1) 122#define ecap_coherent(e) ((e) & 0x1)
123#define ecap_qis(e) ((e) & 0x2)
124#define ecap_eim_support(e) ((e >> 4) & 0x1)
125#define ecap_ir_support(e) ((e >> 3) & 0x1)
126#define ecap_max_handle_mask(e) ((e >> 20) & 0xf)
129 127
130 128
131/* IOTLB_REG */ 129/* IOTLB_REG */
@@ -141,6 +139,17 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
141#define DMA_TLB_IH_NONLEAF (((u64)1) << 6) 139#define DMA_TLB_IH_NONLEAF (((u64)1) << 6)
142#define DMA_TLB_MAX_SIZE (0x3f) 140#define DMA_TLB_MAX_SIZE (0x3f)
143 141
142/* INVALID_DESC */
143#define DMA_ID_TLB_GLOBAL_FLUSH (((u64)1) << 3)
144#define DMA_ID_TLB_DSI_FLUSH (((u64)2) << 3)
145#define DMA_ID_TLB_PSI_FLUSH (((u64)3) << 3)
146#define DMA_ID_TLB_READ_DRAIN (((u64)1) << 7)
147#define DMA_ID_TLB_WRITE_DRAIN (((u64)1) << 6)
148#define DMA_ID_TLB_DID(id) (((u64)((id & 0xffff) << 16)))
149#define DMA_ID_TLB_IH_NONLEAF (((u64)1) << 6)
150#define DMA_ID_TLB_ADDR(addr) (addr)
151#define DMA_ID_TLB_ADDR_MASK(mask) (mask)
152
144/* PMEN_REG */ 153/* PMEN_REG */
145#define DMA_PMEN_EPM (((u32)1)<<31) 154#define DMA_PMEN_EPM (((u32)1)<<31)
146#define DMA_PMEN_PRS (((u32)1)<<0) 155#define DMA_PMEN_PRS (((u32)1)<<0)
@@ -151,6 +160,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
151#define DMA_GCMD_SFL (((u32)1) << 29) 160#define DMA_GCMD_SFL (((u32)1) << 29)
152#define DMA_GCMD_EAFL (((u32)1) << 28) 161#define DMA_GCMD_EAFL (((u32)1) << 28)
153#define DMA_GCMD_WBF (((u32)1) << 27) 162#define DMA_GCMD_WBF (((u32)1) << 27)
163#define DMA_GCMD_QIE (((u32)1) << 26)
164#define DMA_GCMD_SIRTP (((u32)1) << 24)
165#define DMA_GCMD_IRE (((u32) 1) << 25)
154 166
155/* GSTS_REG */ 167/* GSTS_REG */
156#define DMA_GSTS_TES (((u32)1) << 31) 168#define DMA_GSTS_TES (((u32)1) << 31)
@@ -158,6 +170,9 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
158#define DMA_GSTS_FLS (((u32)1) << 29) 170#define DMA_GSTS_FLS (((u32)1) << 29)
159#define DMA_GSTS_AFLS (((u32)1) << 28) 171#define DMA_GSTS_AFLS (((u32)1) << 28)
160#define DMA_GSTS_WBFS (((u32)1) << 27) 172#define DMA_GSTS_WBFS (((u32)1) << 27)
173#define DMA_GSTS_QIES (((u32)1) << 26)
174#define DMA_GSTS_IRTPS (((u32)1) << 24)
175#define DMA_GSTS_IRES (((u32)1) << 25)
161 176
162/* CCMD_REG */ 177/* CCMD_REG */
163#define DMA_CCMD_ICC (((u64)1) << 63) 178#define DMA_CCMD_ICC (((u64)1) << 63)
@@ -187,158 +202,106 @@ static inline void dmar_writeq(void __iomem *addr, u64 val)
187#define dma_frcd_source_id(c) (c & 0xffff) 202#define dma_frcd_source_id(c) (c & 0xffff)
188#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */ 203#define dma_frcd_page_addr(d) (d & (((u64)-1) << 12)) /* low 64 bit */
189 204
190/* 205#define DMAR_OPERATION_TIMEOUT ((cycles_t) tsc_khz*10*1000) /* 10sec */
191 * 0: Present 206
192 * 1-11: Reserved 207#define IOMMU_WAIT_OP(iommu, offset, op, cond, sts) \
193 * 12-63: Context Ptr (12 - (haw-1)) 208{\
194 * 64-127: Reserved 209 cycles_t start_time = get_cycles();\
195 */ 210 while (1) {\
196struct root_entry { 211 sts = op (iommu->reg + offset);\
197 u64 val; 212 if (cond)\
198 u64 rsvd1; 213 break;\
199}; 214 if (DMAR_OPERATION_TIMEOUT < (get_cycles() - start_time))\
200#define ROOT_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct root_entry)) 215 panic("DMAR hardware is malfunctioning\n");\
201static inline bool root_present(struct root_entry *root) 216 cpu_relax();\
202{ 217 }\
203 return (root->val & 1);
204}
205static inline void set_root_present(struct root_entry *root)
206{
207 root->val |= 1;
208}
209static inline void set_root_value(struct root_entry *root, unsigned long value)
210{
211 root->val |= value & PAGE_MASK_4K;
212} 218}
213 219
214struct context_entry; 220#define QI_LENGTH 256 /* queue length */
215static inline struct context_entry *
216get_context_addr_from_root(struct root_entry *root)
217{
218 return (struct context_entry *)
219 (root_present(root)?phys_to_virt(
220 root->val & PAGE_MASK_4K):
221 NULL);
222}
223
224/*
225 * low 64 bits:
226 * 0: present
227 * 1: fault processing disable
228 * 2-3: translation type
229 * 12-63: address space root
230 * high 64 bits:
231 * 0-2: address width
232 * 3-6: aval
233 * 8-23: domain id
234 */
235struct context_entry {
236 u64 lo;
237 u64 hi;
238};
239#define context_present(c) ((c).lo & 1)
240#define context_fault_disable(c) (((c).lo >> 1) & 1)
241#define context_translation_type(c) (((c).lo >> 2) & 3)
242#define context_address_root(c) ((c).lo & PAGE_MASK_4K)
243#define context_address_width(c) ((c).hi & 7)
244#define context_domain_id(c) (((c).hi >> 8) & ((1 << 16) - 1))
245
246#define context_set_present(c) do {(c).lo |= 1;} while (0)
247#define context_set_fault_enable(c) \
248 do {(c).lo &= (((u64)-1) << 2) | 1;} while (0)
249#define context_set_translation_type(c, val) \
250 do { \
251 (c).lo &= (((u64)-1) << 4) | 3; \
252 (c).lo |= ((val) & 3) << 2; \
253 } while (0)
254#define CONTEXT_TT_MULTI_LEVEL 0
255#define context_set_address_root(c, val) \
256 do {(c).lo |= (val) & PAGE_MASK_4K;} while (0)
257#define context_set_address_width(c, val) do {(c).hi |= (val) & 7;} while (0)
258#define context_set_domain_id(c, val) \
259 do {(c).hi |= ((val) & ((1 << 16) - 1)) << 8;} while (0)
260#define context_clear_entry(c) do {(c).lo = 0; (c).hi = 0;} while (0)
261 221
262/* 222enum {
263 * 0: readable 223 QI_FREE,
264 * 1: writable 224 QI_IN_USE,
265 * 2-6: reserved 225 QI_DONE
266 * 7: super page
267 * 8-11: available
268 * 12-63: Host physcial address
269 */
270struct dma_pte {
271 u64 val;
272}; 226};
273#define dma_clear_pte(p) do {(p).val = 0;} while (0)
274
275#define DMA_PTE_READ (1)
276#define DMA_PTE_WRITE (2)
277 227
278#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while (0) 228#define QI_CC_TYPE 0x1
279#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while (0) 229#define QI_IOTLB_TYPE 0x2
280#define dma_set_pte_prot(p, prot) \ 230#define QI_DIOTLB_TYPE 0x3
281 do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0) 231#define QI_IEC_TYPE 0x4
282#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K) 232#define QI_IWD_TYPE 0x5
283#define dma_set_pte_addr(p, addr) do {\
284 (p).val |= ((addr) & PAGE_MASK_4K); } while (0)
285#define dma_pte_present(p) (((p).val & 3) != 0)
286 233
287struct intel_iommu; 234#define QI_IEC_SELECTIVE (((u64)1) << 4)
235#define QI_IEC_IIDEX(idx) (((u64)(idx & 0xffff) << 32))
236#define QI_IEC_IM(m) (((u64)(m & 0x1f) << 27))
288 237
289struct dmar_domain { 238#define QI_IWD_STATUS_DATA(d) (((u64)d) << 32)
290 int id; /* domain id */ 239#define QI_IWD_STATUS_WRITE (((u64)1) << 5)
291 struct intel_iommu *iommu; /* back pointer to owning iommu */
292 240
293 struct list_head devices; /* all devices' list */ 241struct qi_desc {
294 struct iova_domain iovad; /* iova's that belong to this domain */ 242 u64 low, high;
243};
295 244
296 struct dma_pte *pgd; /* virtual address */ 245struct q_inval {
297 spinlock_t mapping_lock; /* page table lock */ 246 spinlock_t q_lock;
298 int gaw; /* max guest address width */ 247 struct qi_desc *desc; /* invalidation queue */
248 int *desc_status; /* desc status */
249 int free_head; /* first free entry */
250 int free_tail; /* last free entry */
251 int free_cnt;
252};
299 253
300 /* adjusted guest address width, 0 is level 2 30-bit */ 254#ifdef CONFIG_INTR_REMAP
301 int agaw; 255/* 1MB - maximum possible interrupt remapping table size */
256#define INTR_REMAP_PAGE_ORDER 8
257#define INTR_REMAP_TABLE_REG_SIZE 0xf
302 258
303#define DOMAIN_FLAG_MULTIPLE_DEVICES 1 259#define INTR_REMAP_TABLE_ENTRIES 65536
304 int flags;
305};
306 260
307/* PCI domain-device relationship */ 261struct ir_table {
308struct device_domain_info { 262 struct irte *base;
309 struct list_head link; /* link to domain siblings */
310 struct list_head global; /* link to global list */
311 u8 bus; /* PCI bus numer */
312 u8 devfn; /* PCI devfn number */
313 struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */
314 struct dmar_domain *domain; /* pointer to domain */
315}; 263};
316 264#endif
317extern int init_dmars(void);
318 265
319struct intel_iommu { 266struct intel_iommu {
320 void __iomem *reg; /* Pointer to hardware regs, virtual addr */ 267 void __iomem *reg; /* Pointer to hardware regs, virtual addr */
321 u64 cap; 268 u64 cap;
322 u64 ecap; 269 u64 ecap;
323 unsigned long *domain_ids; /* bitmap of domains */
324 struct dmar_domain **domains; /* ptr to domains */
325 int seg; 270 int seg;
326 u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */ 271 u32 gcmd; /* Holds TE, EAFL. Don't need SRTP, SFL, WBF */
327 spinlock_t lock; /* protect context, domain ids */
328 spinlock_t register_lock; /* protect register handling */ 272 spinlock_t register_lock; /* protect register handling */
273 int seq_id; /* sequence id of the iommu */
274
275#ifdef CONFIG_DMAR
276 unsigned long *domain_ids; /* bitmap of domains */
277 struct dmar_domain **domains; /* ptr to domains */
278 spinlock_t lock; /* protect context, domain ids */
329 struct root_entry *root_entry; /* virtual address */ 279 struct root_entry *root_entry; /* virtual address */
330 280
331 unsigned int irq; 281 unsigned int irq;
332 unsigned char name[7]; /* Device Name */ 282 unsigned char name[7]; /* Device Name */
333 struct msi_msg saved_msg; 283 struct msi_msg saved_msg;
334 struct sys_device sysdev; 284 struct sys_device sysdev;
285#endif
286 struct q_inval *qi; /* Queued invalidation info */
287#ifdef CONFIG_INTR_REMAP
288 struct ir_table *ir_table; /* Interrupt remapping info */
289#endif
335}; 290};
336 291
337#ifndef CONFIG_DMAR_GFX_WA 292static inline void __iommu_flush_cache(
338static inline void iommu_prepare_gfx_mapping(void) 293 struct intel_iommu *iommu, void *addr, int size)
339{ 294{
340 return; 295 if (!ecap_coherent(iommu->ecap))
296 clflush_cache_range(addr, size);
341} 297}
342#endif /* !CONFIG_DMAR_GFX_WA */
343 298
299extern struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev);
300
301extern int alloc_iommu(struct dmar_drhd_unit *drhd);
302extern void free_iommu(struct intel_iommu *iommu);
303extern int dmar_enable_qi(struct intel_iommu *iommu);
304extern void qi_global_iec(struct intel_iommu *iommu);
305
306extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu);
344#endif 307#endif
diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c
new file mode 100644
index 000000000000..bb642cc5e18c
--- /dev/null
+++ b/drivers/pci/intr_remapping.c
@@ -0,0 +1,471 @@
1#include <linux/dmar.h>
2#include <linux/spinlock.h>
3#include <linux/jiffies.h>
4#include <linux/pci.h>
5#include <linux/irq.h>
6#include <asm/io_apic.h>
7#include "intel-iommu.h"
8#include "intr_remapping.h"
9
10static struct ioapic_scope ir_ioapic[MAX_IO_APICS];
11static int ir_ioapic_num;
12int intr_remapping_enabled;
13
14static struct {
15 struct intel_iommu *iommu;
16 u16 irte_index;
17 u16 sub_handle;
18 u8 irte_mask;
19} irq_2_iommu[NR_IRQS];
20
21static DEFINE_SPINLOCK(irq_2_ir_lock);
22
23int irq_remapped(int irq)
24{
25 if (irq > NR_IRQS)
26 return 0;
27
28 if (!irq_2_iommu[irq].iommu)
29 return 0;
30
31 return 1;
32}
33
34int get_irte(int irq, struct irte *entry)
35{
36 int index;
37
38 if (!entry || irq > NR_IRQS)
39 return -1;
40
41 spin_lock(&irq_2_ir_lock);
42 if (!irq_2_iommu[irq].iommu) {
43 spin_unlock(&irq_2_ir_lock);
44 return -1;
45 }
46
47 index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
48 *entry = *(irq_2_iommu[irq].iommu->ir_table->base + index);
49
50 spin_unlock(&irq_2_ir_lock);
51 return 0;
52}
53
54int alloc_irte(struct intel_iommu *iommu, int irq, u16 count)
55{
56 struct ir_table *table = iommu->ir_table;
57 u16 index, start_index;
58 unsigned int mask = 0;
59 int i;
60
61 if (!count)
62 return -1;
63
64 /*
65 * start the IRTE search from index 0.
66 */
67 index = start_index = 0;
68
69 if (count > 1) {
70 count = __roundup_pow_of_two(count);
71 mask = ilog2(count);
72 }
73
74 if (mask > ecap_max_handle_mask(iommu->ecap)) {
75 printk(KERN_ERR
76 "Requested mask %x exceeds the max invalidation handle"
77 " mask value %Lx\n", mask,
78 ecap_max_handle_mask(iommu->ecap));
79 return -1;
80 }
81
82 spin_lock(&irq_2_ir_lock);
83 do {
84 for (i = index; i < index + count; i++)
85 if (table->base[i].present)
86 break;
87 /* empty index found */
88 if (i == index + count)
89 break;
90
91 index = (index + count) % INTR_REMAP_TABLE_ENTRIES;
92
93 if (index == start_index) {
94 spin_unlock(&irq_2_ir_lock);
95 printk(KERN_ERR "can't allocate an IRTE\n");
96 return -1;
97 }
98 } while (1);
99
100 for (i = index; i < index + count; i++)
101 table->base[i].present = 1;
102
103 irq_2_iommu[irq].iommu = iommu;
104 irq_2_iommu[irq].irte_index = index;
105 irq_2_iommu[irq].sub_handle = 0;
106 irq_2_iommu[irq].irte_mask = mask;
107
108 spin_unlock(&irq_2_ir_lock);
109
110 return index;
111}
112
113static void qi_flush_iec(struct intel_iommu *iommu, int index, int mask)
114{
115 struct qi_desc desc;
116
117 desc.low = QI_IEC_IIDEX(index) | QI_IEC_TYPE | QI_IEC_IM(mask)
118 | QI_IEC_SELECTIVE;
119 desc.high = 0;
120
121 qi_submit_sync(&desc, iommu);
122}
123
124int map_irq_to_irte_handle(int irq, u16 *sub_handle)
125{
126 int index;
127
128 spin_lock(&irq_2_ir_lock);
129 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
130 spin_unlock(&irq_2_ir_lock);
131 return -1;
132 }
133
134 *sub_handle = irq_2_iommu[irq].sub_handle;
135 index = irq_2_iommu[irq].irte_index;
136 spin_unlock(&irq_2_ir_lock);
137 return index;
138}
139
140int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index, u16 subhandle)
141{
142 spin_lock(&irq_2_ir_lock);
143 if (irq >= NR_IRQS || irq_2_iommu[irq].iommu) {
144 spin_unlock(&irq_2_ir_lock);
145 return -1;
146 }
147
148 irq_2_iommu[irq].iommu = iommu;
149 irq_2_iommu[irq].irte_index = index;
150 irq_2_iommu[irq].sub_handle = subhandle;
151 irq_2_iommu[irq].irte_mask = 0;
152
153 spin_unlock(&irq_2_ir_lock);
154
155 return 0;
156}
157
158int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index)
159{
160 spin_lock(&irq_2_ir_lock);
161 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
162 spin_unlock(&irq_2_ir_lock);
163 return -1;
164 }
165
166 irq_2_iommu[irq].iommu = NULL;
167 irq_2_iommu[irq].irte_index = 0;
168 irq_2_iommu[irq].sub_handle = 0;
169 irq_2_iommu[irq].irte_mask = 0;
170
171 spin_unlock(&irq_2_ir_lock);
172
173 return 0;
174}
175
176int modify_irte(int irq, struct irte *irte_modified)
177{
178 int index;
179 struct irte *irte;
180 struct intel_iommu *iommu;
181
182 spin_lock(&irq_2_ir_lock);
183 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
184 spin_unlock(&irq_2_ir_lock);
185 return -1;
186 }
187
188 iommu = irq_2_iommu[irq].iommu;
189
190 index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
191 irte = &iommu->ir_table->base[index];
192
193 set_64bit((unsigned long *)irte, irte_modified->low | (1 << 1));
194 __iommu_flush_cache(iommu, irte, sizeof(*irte));
195
196 qi_flush_iec(iommu, index, 0);
197
198 spin_unlock(&irq_2_ir_lock);
199 return 0;
200}
201
202int flush_irte(int irq)
203{
204 int index;
205 struct intel_iommu *iommu;
206
207 spin_lock(&irq_2_ir_lock);
208 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
209 spin_unlock(&irq_2_ir_lock);
210 return -1;
211 }
212
213 iommu = irq_2_iommu[irq].iommu;
214
215 index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
216
217 qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask);
218 spin_unlock(&irq_2_ir_lock);
219
220 return 0;
221}
222
223struct intel_iommu *map_ioapic_to_ir(int apic)
224{
225 int i;
226
227 for (i = 0; i < MAX_IO_APICS; i++)
228 if (ir_ioapic[i].id == apic)
229 return ir_ioapic[i].iommu;
230 return NULL;
231}
232
233struct intel_iommu *map_dev_to_ir(struct pci_dev *dev)
234{
235 struct dmar_drhd_unit *drhd;
236
237 drhd = dmar_find_matched_drhd_unit(dev);
238 if (!drhd)
239 return NULL;
240
241 return drhd->iommu;
242}
243
244int free_irte(int irq)
245{
246 int index, i;
247 struct irte *irte;
248 struct intel_iommu *iommu;
249
250 spin_lock(&irq_2_ir_lock);
251 if (irq >= NR_IRQS || !irq_2_iommu[irq].iommu) {
252 spin_unlock(&irq_2_ir_lock);
253 return -1;
254 }
255
256 iommu = irq_2_iommu[irq].iommu;
257
258 index = irq_2_iommu[irq].irte_index + irq_2_iommu[irq].sub_handle;
259 irte = &iommu->ir_table->base[index];
260
261 if (!irq_2_iommu[irq].sub_handle) {
262 for (i = 0; i < (1 << irq_2_iommu[irq].irte_mask); i++)
263 set_64bit((unsigned long *)irte, 0);
264 qi_flush_iec(iommu, index, irq_2_iommu[irq].irte_mask);
265 }
266
267 irq_2_iommu[irq].iommu = NULL;
268 irq_2_iommu[irq].irte_index = 0;
269 irq_2_iommu[irq].sub_handle = 0;
270 irq_2_iommu[irq].irte_mask = 0;
271
272 spin_unlock(&irq_2_ir_lock);
273
274 return 0;
275}
276
277static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode)
278{
279 u64 addr;
280 u32 cmd, sts;
281 unsigned long flags;
282
283 addr = virt_to_phys((void *)iommu->ir_table->base);
284
285 spin_lock_irqsave(&iommu->register_lock, flags);
286
287 dmar_writeq(iommu->reg + DMAR_IRTA_REG,
288 (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE);
289
290 /* Set interrupt-remapping table pointer */
291 cmd = iommu->gcmd | DMA_GCMD_SIRTP;
292 writel(cmd, iommu->reg + DMAR_GCMD_REG);
293
294 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
295 readl, (sts & DMA_GSTS_IRTPS), sts);
296 spin_unlock_irqrestore(&iommu->register_lock, flags);
297
298 /*
299 * global invalidation of interrupt entry cache before enabling
300 * interrupt-remapping.
301 */
302 qi_global_iec(iommu);
303
304 spin_lock_irqsave(&iommu->register_lock, flags);
305
306 /* Enable interrupt-remapping */
307 cmd = iommu->gcmd | DMA_GCMD_IRE;
308 iommu->gcmd |= DMA_GCMD_IRE;
309 writel(cmd, iommu->reg + DMAR_GCMD_REG);
310
311 IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG,
312 readl, (sts & DMA_GSTS_IRES), sts);
313
314 spin_unlock_irqrestore(&iommu->register_lock, flags);
315}
316
317
318static int setup_intr_remapping(struct intel_iommu *iommu, int mode)
319{
320 struct ir_table *ir_table;
321 struct page *pages;
322
323 ir_table = iommu->ir_table = kzalloc(sizeof(struct ir_table),
324 GFP_KERNEL);
325
326 if (!iommu->ir_table)
327 return -ENOMEM;
328
329 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, INTR_REMAP_PAGE_ORDER);
330
331 if (!pages) {
332 printk(KERN_ERR "failed to allocate pages of order %d\n",
333 INTR_REMAP_PAGE_ORDER);
334 kfree(iommu->ir_table);
335 return -ENOMEM;
336 }
337
338 ir_table->base = page_address(pages);
339
340 iommu_set_intr_remapping(iommu, mode);
341 return 0;
342}
343
344int __init enable_intr_remapping(int eim)
345{
346 struct dmar_drhd_unit *drhd;
347 int setup = 0;
348
349 /*
350 * check for the Interrupt-remapping support
351 */
352 for_each_drhd_unit(drhd) {
353 struct intel_iommu *iommu = drhd->iommu;
354
355 if (!ecap_ir_support(iommu->ecap))
356 continue;
357
358 if (eim && !ecap_eim_support(iommu->ecap)) {
359 printk(KERN_INFO "DRHD %Lx: EIM not supported by DRHD, "
360 " ecap %Lx\n", drhd->reg_base_addr, iommu->ecap);
361 return -1;
362 }
363 }
364
365 /*
366 * Enable queued invalidation for all the DRHD's.
367 */
368 for_each_drhd_unit(drhd) {
369 int ret;
370 struct intel_iommu *iommu = drhd->iommu;
371 ret = dmar_enable_qi(iommu);
372
373 if (ret) {
374 printk(KERN_ERR "DRHD %Lx: failed to enable queued, "
375 " invalidation, ecap %Lx, ret %d\n",
376 drhd->reg_base_addr, iommu->ecap, ret);
377 return -1;
378 }
379 }
380
381 /*
382 * Setup Interrupt-remapping for all the DRHD's now.
383 */
384 for_each_drhd_unit(drhd) {
385 struct intel_iommu *iommu = drhd->iommu;
386
387 if (!ecap_ir_support(iommu->ecap))
388 continue;
389
390 if (setup_intr_remapping(iommu, eim))
391 goto error;
392
393 setup = 1;
394 }
395
396 if (!setup)
397 goto error;
398
399 intr_remapping_enabled = 1;
400
401 return 0;
402
403error:
404 /*
405 * handle error condition gracefully here!
406 */
407 return -1;
408}
409
410static int ir_parse_ioapic_scope(struct acpi_dmar_header *header,
411 struct intel_iommu *iommu)
412{
413 struct acpi_dmar_hardware_unit *drhd;
414 struct acpi_dmar_device_scope *scope;
415 void *start, *end;
416
417 drhd = (struct acpi_dmar_hardware_unit *)header;
418
419 start = (void *)(drhd + 1);
420 end = ((void *)drhd) + header->length;
421
422 while (start < end) {
423 scope = start;
424 if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_IOAPIC) {
425 if (ir_ioapic_num == MAX_IO_APICS) {
426 printk(KERN_WARNING "Exceeded Max IO APICS\n");
427 return -1;
428 }
429
430 printk(KERN_INFO "IOAPIC id %d under DRHD base"
431 " 0x%Lx\n", scope->enumeration_id,
432 drhd->address);
433
434 ir_ioapic[ir_ioapic_num].iommu = iommu;
435 ir_ioapic[ir_ioapic_num].id = scope->enumeration_id;
436 ir_ioapic_num++;
437 }
438 start += scope->length;
439 }
440
441 return 0;
442}
443
444/*
445 * Finds the assocaition between IOAPIC's and its Interrupt-remapping
446 * hardware unit.
447 */
448int __init parse_ioapics_under_ir(void)
449{
450 struct dmar_drhd_unit *drhd;
451 int ir_supported = 0;
452
453 for_each_drhd_unit(drhd) {
454 struct intel_iommu *iommu = drhd->iommu;
455
456 if (ecap_ir_support(iommu->ecap)) {
457 if (ir_parse_ioapic_scope(drhd->hdr, iommu))
458 return -1;
459
460 ir_supported = 1;
461 }
462 }
463
464 if (ir_supported && ir_ioapic_num != nr_ioapics) {
465 printk(KERN_WARNING
466 "Not all IO-APIC's listed under remapping hardware\n");
467 return -1;
468 }
469
470 return ir_supported;
471}
diff --git a/drivers/pci/intr_remapping.h b/drivers/pci/intr_remapping.h
new file mode 100644
index 000000000000..05f2635bbe4e
--- /dev/null
+++ b/drivers/pci/intr_remapping.h
@@ -0,0 +1,8 @@
1#include "intel-iommu.h"
2
3struct ioapic_scope {
4 struct intel_iommu *iommu;
5 unsigned int id;
6};
7
8#define IR_X2APIC_MODE(mode) (mode ? (1 << 11) : 0)
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index cb752ba72466..7440a0dceddb 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -385,6 +385,7 @@
385 . = ALIGN(align); \ 385 . = ALIGN(align); \
386 VMLINUX_SYMBOL(__per_cpu_start) = .; \ 386 VMLINUX_SYMBOL(__per_cpu_start) = .; \
387 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \ 387 .data.percpu : AT(ADDR(.data.percpu) - LOAD_OFFSET) { \
388 *(.data.percpu.page_aligned) \
388 *(.data.percpu) \ 389 *(.data.percpu) \
389 *(.data.percpu.shared_aligned) \ 390 *(.data.percpu.shared_aligned) \
390 } \ 391 } \
diff --git a/include/asm-x86/apic.h b/include/asm-x86/apic.h
index 65590c9aecd4..d76a0839abe9 100644
--- a/include/asm-x86/apic.h
+++ b/include/asm-x86/apic.h
@@ -9,6 +9,8 @@
9#include <asm/apicdef.h> 9#include <asm/apicdef.h>
10#include <asm/processor.h> 10#include <asm/processor.h>
11#include <asm/system.h> 11#include <asm/system.h>
12#include <asm/cpufeature.h>
13#include <asm/msr.h>
12 14
13#define ARCH_APICTIMER_STOPS_ON_C3 1 15#define ARCH_APICTIMER_STOPS_ON_C3 1
14 16
@@ -47,8 +49,6 @@ extern int disable_apic;
47#ifdef CONFIG_PARAVIRT 49#ifdef CONFIG_PARAVIRT
48#include <asm/paravirt.h> 50#include <asm/paravirt.h>
49#else 51#else
50#define apic_write native_apic_write
51#define apic_read native_apic_read
52#define setup_boot_clock setup_boot_APIC_clock 52#define setup_boot_clock setup_boot_APIC_clock
53#define setup_secondary_clock setup_secondary_APIC_clock 53#define setup_secondary_clock setup_secondary_APIC_clock
54#endif 54#endif
@@ -60,7 +60,7 @@ extern u64 xapic_icr_read(void);
60extern void xapic_icr_write(u32, u32); 60extern void xapic_icr_write(u32, u32);
61extern int setup_profiling_timer(unsigned int); 61extern int setup_profiling_timer(unsigned int);
62 62
63static inline void native_apic_write(unsigned long reg, u32 v) 63static inline void native_apic_mem_write(u32 reg, u32 v)
64{ 64{
65 volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg); 65 volatile u32 *addr = (volatile u32 *)(APIC_BASE + reg);
66 66
@@ -69,15 +69,68 @@ static inline void native_apic_write(unsigned long reg, u32 v)
69 ASM_OUTPUT2("0" (v), "m" (*addr))); 69 ASM_OUTPUT2("0" (v), "m" (*addr)));
70} 70}
71 71
72static inline u32 native_apic_read(unsigned long reg) 72static inline u32 native_apic_mem_read(u32 reg)
73{ 73{
74 return *((volatile u32 *)(APIC_BASE + reg)); 74 return *((volatile u32 *)(APIC_BASE + reg));
75} 75}
76 76
77extern void apic_wait_icr_idle(void); 77static inline void native_apic_msr_write(u32 reg, u32 v)
78extern u32 safe_apic_wait_icr_idle(void); 78{
79 if (reg == APIC_DFR || reg == APIC_ID || reg == APIC_LDR ||
80 reg == APIC_LVR)
81 return;
82
83 wrmsr(APIC_BASE_MSR + (reg >> 4), v, 0);
84}
85
86static inline u32 native_apic_msr_read(u32 reg)
87{
88 u32 low, high;
89
90 if (reg == APIC_DFR)
91 return -1;
92
93 rdmsr(APIC_BASE_MSR + (reg >> 4), low, high);
94 return low;
95}
96
97#ifndef CONFIG_X86_32
98extern int x2apic, x2apic_preenabled;
99extern void check_x2apic(void);
100extern void enable_x2apic(void);
101extern void enable_IR_x2apic(void);
102extern void x2apic_icr_write(u32 low, u32 id);
103#endif
104
105struct apic_ops {
106 u32 (*read)(u32 reg);
107 void (*write)(u32 reg, u32 v);
108 u64 (*icr_read)(void);
109 void (*icr_write)(u32 low, u32 high);
110 void (*wait_icr_idle)(void);
111 u32 (*safe_wait_icr_idle)(void);
112};
113
114extern struct apic_ops *apic_ops;
115
116#define apic_read (apic_ops->read)
117#define apic_write (apic_ops->write)
118#define apic_icr_read (apic_ops->icr_read)
119#define apic_icr_write (apic_ops->icr_write)
120#define apic_wait_icr_idle (apic_ops->wait_icr_idle)
121#define safe_apic_wait_icr_idle (apic_ops->safe_wait_icr_idle)
122
79extern int get_physical_broadcast(void); 123extern int get_physical_broadcast(void);
80 124
125#ifdef CONFIG_X86_64
126static inline void ack_x2APIC_irq(void)
127{
128 /* Docs say use 0 for future compatibility */
129 native_apic_msr_write(APIC_EOI, 0);
130}
131#endif
132
133
81static inline void ack_APIC_irq(void) 134static inline void ack_APIC_irq(void)
82{ 135{
83 /* 136 /*
diff --git a/include/asm-x86/apicdef.h b/include/asm-x86/apicdef.h
index c40687da20fc..b922c85ac91d 100644
--- a/include/asm-x86/apicdef.h
+++ b/include/asm-x86/apicdef.h
@@ -105,6 +105,7 @@
105#define APIC_TMICT 0x380 105#define APIC_TMICT 0x380
106#define APIC_TMCCT 0x390 106#define APIC_TMCCT 0x390
107#define APIC_TDCR 0x3E0 107#define APIC_TDCR 0x3E0
108#define APIC_SELF_IPI 0x3F0
108#define APIC_TDR_DIV_TMBASE (1 << 2) 109#define APIC_TDR_DIV_TMBASE (1 << 2)
109#define APIC_TDR_DIV_1 0xB 110#define APIC_TDR_DIV_1 0xB
110#define APIC_TDR_DIV_2 0x0 111#define APIC_TDR_DIV_2 0x0
@@ -128,6 +129,8 @@
128#define APIC_EILVT3 0x530 129#define APIC_EILVT3 0x530
129 130
130#define APIC_BASE (fix_to_virt(FIX_APIC_BASE)) 131#define APIC_BASE (fix_to_virt(FIX_APIC_BASE))
132#define APIC_BASE_MSR 0x800
133#define X2APIC_ENABLE (1UL << 10)
131 134
132#ifdef CONFIG_X86_32 135#ifdef CONFIG_X86_32
133# define MAX_IO_APICS 64 136# define MAX_IO_APICS 64
diff --git a/include/asm-x86/arch_hooks.h b/include/asm-x86/arch_hooks.h
index 72adc3a109cc..de4596b24c23 100644
--- a/include/asm-x86/arch_hooks.h
+++ b/include/asm-x86/arch_hooks.h
@@ -12,8 +12,6 @@
12/* these aren't arch hooks, they are generic routines 12/* these aren't arch hooks, they are generic routines
13 * that can be used by the hooks */ 13 * that can be used by the hooks */
14extern void init_ISA_irqs(void); 14extern void init_ISA_irqs(void);
15extern void apic_intr_init(void);
16extern void smp_intr_init(void);
17extern irqreturn_t timer_interrupt(int irq, void *dev_id); 15extern irqreturn_t timer_interrupt(int irq, void *dev_id);
18 16
19/* these are the defined hooks */ 17/* these are the defined hooks */
diff --git a/include/asm-x86/mach-bigsmp/mach_apic.h b/include/asm-x86/bigsmp/apic.h
index 05362d44a3ee..0a9cd7c5ca0c 100644
--- a/include/asm-x86/mach-bigsmp/mach_apic.h
+++ b/include/asm-x86/bigsmp/apic.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_BIGSMP__MACH_APIC_H 1#ifndef __ASM_MACH_APIC_H
2#define ASM_X86__MACH_BIGSMP__MACH_APIC_H 2#define __ASM_MACH_APIC_H
3 3
4#define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu)) 4#define xapic_phys_to_log_apicid(cpu) (per_cpu(x86_bios_cpu_apicid, cpu))
5#define esr_disable (1) 5#define esr_disable (1)
@@ -11,7 +11,7 @@ static inline int apic_id_registered(void)
11 11
12/* Round robin the irqs amoung the online cpus */ 12/* Round robin the irqs amoung the online cpus */
13static inline cpumask_t target_cpus(void) 13static inline cpumask_t target_cpus(void)
14{ 14{
15 static unsigned long cpu = NR_CPUS; 15 static unsigned long cpu = NR_CPUS;
16 do { 16 do {
17 if (cpu >= NR_CPUS) 17 if (cpu >= NR_CPUS)
@@ -23,7 +23,7 @@ static inline cpumask_t target_cpus(void)
23} 23}
24 24
25#undef APIC_DEST_LOGICAL 25#undef APIC_DEST_LOGICAL
26#define APIC_DEST_LOGICAL 0 26#define APIC_DEST_LOGICAL 0
27#define TARGET_CPUS (target_cpus()) 27#define TARGET_CPUS (target_cpus())
28#define APIC_DFR_VALUE (APIC_DFR_FLAT) 28#define APIC_DFR_VALUE (APIC_DFR_FLAT)
29#define INT_DELIVERY_MODE (dest_Fixed) 29#define INT_DELIVERY_MODE (dest_Fixed)
@@ -141,4 +141,4 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
141 return cpuid_apic >> index_msb; 141 return cpuid_apic >> index_msb;
142} 142}
143 143
144#endif /* ASM_X86__MACH_BIGSMP__MACH_APIC_H */ 144#endif /* __ASM_MACH_APIC_H */
diff --git a/include/asm-x86/bigsmp/apicdef.h b/include/asm-x86/bigsmp/apicdef.h
new file mode 100644
index 000000000000..392c3f5ef2fe
--- /dev/null
+++ b/include/asm-x86/bigsmp/apicdef.h
@@ -0,0 +1,13 @@
1#ifndef __ASM_MACH_APICDEF_H
2#define __ASM_MACH_APICDEF_H
3
4#define APIC_ID_MASK (0xFF<<24)
5
6static inline unsigned get_apic_id(unsigned long x)
7{
8 return (((x)>>24)&0xFF);
9}
10
11#define GET_APIC_ID(x) get_apic_id(x)
12
13#endif
diff --git a/include/asm-x86/mach-bigsmp/mach_ipi.h b/include/asm-x86/bigsmp/ipi.h
index b1b0f966a009..9404c535b7ec 100644
--- a/include/asm-x86/mach-bigsmp/mach_ipi.h
+++ b/include/asm-x86/bigsmp/ipi.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_BIGSMP__MACH_IPI_H 1#ifndef __ASM_MACH_IPI_H
2#define ASM_X86__MACH_BIGSMP__MACH_IPI_H 2#define __ASM_MACH_IPI_H
3 3
4void send_IPI_mask_sequence(cpumask_t mask, int vector); 4void send_IPI_mask_sequence(cpumask_t mask, int vector);
5 5
@@ -22,4 +22,4 @@ static inline void send_IPI_all(int vector)
22 send_IPI_mask(cpu_online_map, vector); 22 send_IPI_mask(cpu_online_map, vector);
23} 23}
24 24
25#endif /* ASM_X86__MACH_BIGSMP__MACH_IPI_H */ 25#endif /* __ASM_MACH_IPI_H */
diff --git a/include/asm-x86/bugs.h b/include/asm-x86/bugs.h
index 4761c461d23a..dc604985f2ad 100644
--- a/include/asm-x86/bugs.h
+++ b/include/asm-x86/bugs.h
@@ -2,6 +2,11 @@
2#define ASM_X86__BUGS_H 2#define ASM_X86__BUGS_H
3 3
4extern void check_bugs(void); 4extern void check_bugs(void);
5
6#if defined(CONFIG_CPU_SUP_INTEL) && defined(CONFIG_X86_32)
5int ppro_with_ram_bug(void); 7int ppro_with_ram_bug(void);
8#else
9static inline int ppro_with_ram_bug(void) { return 0; }
10#endif
6 11
7#endif /* ASM_X86__BUGS_H */ 12#endif /* ASM_X86__BUGS_H */
diff --git a/include/asm-x86/cpufeature.h b/include/asm-x86/cpufeature.h
index 250fa0cb144b..adfeae6586e1 100644
--- a/include/asm-x86/cpufeature.h
+++ b/include/asm-x86/cpufeature.h
@@ -6,7 +6,13 @@
6 6
7#include <asm/required-features.h> 7#include <asm/required-features.h>
8 8
9#define NCAPINTS 8 /* N 32-bit words worth of info */ 9#define NCAPINTS 9 /* N 32-bit words worth of info */
10
11/*
12 * Note: If the comment begins with a quoted string, that string is used
13 * in /proc/cpuinfo instead of the macro name. If the string is "",
14 * this feature bit is not displayed in /proc/cpuinfo at all.
15 */
10 16
11/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ 17/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */
12#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */ 18#define X86_FEATURE_FPU (0*32+ 0) /* Onboard FPU */
@@ -14,7 +20,7 @@
14#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */ 20#define X86_FEATURE_DE (0*32+ 2) /* Debugging Extensions */
15#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */ 21#define X86_FEATURE_PSE (0*32+ 3) /* Page Size Extensions */
16#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */ 22#define X86_FEATURE_TSC (0*32+ 4) /* Time Stamp Counter */
17#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers, RDMSR, WRMSR */ 23#define X86_FEATURE_MSR (0*32+ 5) /* Model-Specific Registers */
18#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */ 24#define X86_FEATURE_PAE (0*32+ 6) /* Physical Address Extensions */
19#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture */ 25#define X86_FEATURE_MCE (0*32+ 7) /* Machine Check Architecture */
20#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */ 26#define X86_FEATURE_CX8 (0*32+ 8) /* CMPXCHG8 instruction */
@@ -23,22 +29,23 @@
23#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */ 29#define X86_FEATURE_MTRR (0*32+12) /* Memory Type Range Registers */
24#define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */ 30#define X86_FEATURE_PGE (0*32+13) /* Page Global Enable */
25#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */ 31#define X86_FEATURE_MCA (0*32+14) /* Machine Check Architecture */
26#define X86_FEATURE_CMOV (0*32+15) /* CMOV instruction (FCMOVCC and FCOMI too if FPU present) */ 32#define X86_FEATURE_CMOV (0*32+15) /* CMOV instructions */
33 /* (plus FCMOVcc, FCOMI with FPU) */
27#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */ 34#define X86_FEATURE_PAT (0*32+16) /* Page Attribute Table */
28#define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */ 35#define X86_FEATURE_PSE36 (0*32+17) /* 36-bit PSEs */
29#define X86_FEATURE_PN (0*32+18) /* Processor serial number */ 36#define X86_FEATURE_PN (0*32+18) /* Processor serial number */
30#define X86_FEATURE_CLFLSH (0*32+19) /* Supports the CLFLUSH instruction */ 37#define X86_FEATURE_CLFLSH (0*32+19) /* "clflush" CLFLUSH instruction */
31#define X86_FEATURE_DS (0*32+21) /* Debug Store */ 38#define X86_FEATURE_DS (0*32+21) /* "dts" Debug Store */
32#define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */ 39#define X86_FEATURE_ACPI (0*32+22) /* ACPI via MSR */
33#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */ 40#define X86_FEATURE_MMX (0*32+23) /* Multimedia Extensions */
34#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE and FXRSTOR instructions (fast save and restore */ 41#define X86_FEATURE_FXSR (0*32+24) /* FXSAVE/FXRSTOR, CR4.OSFXSR */
35 /* of FPU context), and CR4.OSFXSR available */ 42#define X86_FEATURE_XMM (0*32+25) /* "sse" */
36#define X86_FEATURE_XMM (0*32+25) /* Streaming SIMD Extensions */ 43#define X86_FEATURE_XMM2 (0*32+26) /* "sse2" */
37#define X86_FEATURE_XMM2 (0*32+26) /* Streaming SIMD Extensions-2 */ 44#define X86_FEATURE_SELFSNOOP (0*32+27) /* "ss" CPU self snoop */
38#define X86_FEATURE_SELFSNOOP (0*32+27) /* CPU self snoop */
39#define X86_FEATURE_HT (0*32+28) /* Hyper-Threading */ 45#define X86_FEATURE_HT (0*32+28) /* Hyper-Threading */
40#define X86_FEATURE_ACC (0*32+29) /* Automatic clock control */ 46#define X86_FEATURE_ACC (0*32+29) /* "tm" Automatic clock control */
41#define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */ 47#define X86_FEATURE_IA64 (0*32+30) /* IA-64 processor */
48#define X86_FEATURE_PBE (0*32+31) /* Pending Break Enable */
42 49
43/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ 50/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
44/* Don't duplicate feature flags which are redundant with Intel! */ 51/* Don't duplicate feature flags which are redundant with Intel! */
@@ -46,7 +53,8 @@
46#define X86_FEATURE_MP (1*32+19) /* MP Capable. */ 53#define X86_FEATURE_MP (1*32+19) /* MP Capable. */
47#define X86_FEATURE_NX (1*32+20) /* Execute Disable */ 54#define X86_FEATURE_NX (1*32+20) /* Execute Disable */
48#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */ 55#define X86_FEATURE_MMXEXT (1*32+22) /* AMD MMX extensions */
49#define X86_FEATURE_GBPAGES (1*32+26) /* GB pages */ 56#define X86_FEATURE_FXSR_OPT (1*32+25) /* FXSAVE/FXRSTOR optimizations */
57#define X86_FEATURE_GBPAGES (1*32+26) /* "pdpe1gb" GB pages */
50#define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */ 58#define X86_FEATURE_RDTSCP (1*32+27) /* RDTSCP */
51#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */ 59#define X86_FEATURE_LM (1*32+29) /* Long Mode (x86-64) */
52#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */ 60#define X86_FEATURE_3DNOWEXT (1*32+30) /* AMD 3DNow! extensions */
@@ -64,53 +72,79 @@
64#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */ 72#define X86_FEATURE_CYRIX_ARR (3*32+ 2) /* Cyrix ARRs (= MTRRs) */
65#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */ 73#define X86_FEATURE_CENTAUR_MCR (3*32+ 3) /* Centaur MCRs (= MTRRs) */
66/* cpu types for specific tunings: */ 74/* cpu types for specific tunings: */
67#define X86_FEATURE_K8 (3*32+ 4) /* Opteron, Athlon64 */ 75#define X86_FEATURE_K8 (3*32+ 4) /* "" Opteron, Athlon64 */
68#define X86_FEATURE_K7 (3*32+ 5) /* Athlon */ 76#define X86_FEATURE_K7 (3*32+ 5) /* "" Athlon */
69#define X86_FEATURE_P3 (3*32+ 6) /* P3 */ 77#define X86_FEATURE_P3 (3*32+ 6) /* "" P3 */
70#define X86_FEATURE_P4 (3*32+ 7) /* P4 */ 78#define X86_FEATURE_P4 (3*32+ 7) /* "" P4 */
71#define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */ 79#define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
72#define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */ 80#define X86_FEATURE_UP (3*32+ 9) /* smp kernel running on up */
73#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* FXSAVE leaks FOP/FIP/FOP */ 81#define X86_FEATURE_FXSAVE_LEAK (3*32+10) /* "" FXSAVE leaks FOP/FIP/FOP */
74#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */ 82#define X86_FEATURE_ARCH_PERFMON (3*32+11) /* Intel Architectural PerfMon */
83#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */
75#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */ 84#define X86_FEATURE_PEBS (3*32+12) /* Precise-Event Based Sampling */
76#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */ 85#define X86_FEATURE_BTS (3*32+13) /* Branch Trace Store */
77#define X86_FEATURE_SYSCALL32 (3*32+14) /* syscall in ia32 userspace */ 86#define X86_FEATURE_SYSCALL32 (3*32+14) /* "" syscall in ia32 userspace */
78#define X86_FEATURE_SYSENTER32 (3*32+15) /* sysenter in ia32 userspace */ 87#define X86_FEATURE_SYSENTER32 (3*32+15) /* "" sysenter in ia32 userspace */
79#define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well on this CPU */ 88#define X86_FEATURE_REP_GOOD (3*32+16) /* rep microcode works well */
80#define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* Mfence synchronizes RDTSC */ 89#define X86_FEATURE_MFENCE_RDTSC (3*32+17) /* "" Mfence synchronizes RDTSC */
81#define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* Lfence synchronizes RDTSC */ 90#define X86_FEATURE_LFENCE_RDTSC (3*32+18) /* "" Lfence synchronizes RDTSC */
82#define X86_FEATURE_11AP (3*32+19) /* Bad local APIC aka 11AP */ 91#define X86_FEATURE_11AP (3*32+19) /* "" Bad local APIC aka 11AP */
83#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */ 92#define X86_FEATURE_NOPL (3*32+20) /* The NOPL (0F 1F) instructions */
84#define X86_FEATURE_AMDC1E (3*32+21) /* AMD C1E detected */ 93#define X86_FEATURE_AMDC1E (3*32+21) /* AMD C1E detected */
94#define X86_FEATURE_XTOPOLOGY (3*32+21) /* cpu topology enum extensions */
85 95
86/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ 96/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
87#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ 97#define X86_FEATURE_XMM3 (4*32+ 0) /* "pni" SSE-3 */
88#define X86_FEATURE_MWAIT (4*32+ 3) /* Monitor/Mwait support */ 98#define X86_FEATURE_PCLMULQDQ (4*32+ 1) /* PCLMULQDQ instruction */
89#define X86_FEATURE_DSCPL (4*32+ 4) /* CPL Qualified Debug Store */ 99#define X86_FEATURE_DTES64 (4*32+ 2) /* 64-bit Debug Store */
100#define X86_FEATURE_MWAIT (4*32+ 3) /* "monitor" Monitor/Mwait support */
101#define X86_FEATURE_DSCPL (4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */
102#define X86_FEATURE_VMX (4*32+ 5) /* Hardware virtualization */
103#define X86_FEATURE_SMX (4*32+ 6) /* Safer mode */
90#define X86_FEATURE_EST (4*32+ 7) /* Enhanced SpeedStep */ 104#define X86_FEATURE_EST (4*32+ 7) /* Enhanced SpeedStep */
91#define X86_FEATURE_TM2 (4*32+ 8) /* Thermal Monitor 2 */ 105#define X86_FEATURE_TM2 (4*32+ 8) /* Thermal Monitor 2 */
106#define X86_FEATURE_SSSE3 (4*32+ 9) /* Supplemental SSE-3 */
92#define X86_FEATURE_CID (4*32+10) /* Context ID */ 107#define X86_FEATURE_CID (4*32+10) /* Context ID */
108#define X86_FEATURE_FMA (4*32+12) /* Fused multiply-add */
93#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */ 109#define X86_FEATURE_CX16 (4*32+13) /* CMPXCHG16B */
94#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */ 110#define X86_FEATURE_XTPR (4*32+14) /* Send Task Priority Messages */
111#define X86_FEATURE_PDCM (4*32+15) /* Performance Capabilities */
95#define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */ 112#define X86_FEATURE_DCA (4*32+18) /* Direct Cache Access */
96#define X86_FEATURE_XMM4_2 (4*32+20) /* Streaming SIMD Extensions-4.2 */ 113#define X86_FEATURE_XMM4_1 (4*32+19) /* "sse4_1" SSE-4.1 */
114#define X86_FEATURE_XMM4_2 (4*32+20) /* "sse4_2" SSE-4.2 */
115#define X86_FEATURE_X2APIC (4*32+21) /* x2APIC */
116#define X86_FEATURE_AES (4*32+25) /* AES instructions */
117#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
118#define X86_FEATURE_OSXSAVE (4*32+27) /* "" XSAVE enabled in the OS */
119#define X86_FEATURE_AVX (4*32+28) /* Advanced Vector Extensions */
97 120
98/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ 121/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
99#define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */ 122#define X86_FEATURE_XSTORE (5*32+ 2) /* "rng" RNG present (xstore) */
100#define X86_FEATURE_XSTORE_EN (5*32+ 3) /* on-CPU RNG enabled */ 123#define X86_FEATURE_XSTORE_EN (5*32+ 3) /* "rng_en" RNG enabled */
101#define X86_FEATURE_XCRYPT (5*32+ 6) /* on-CPU crypto (xcrypt insn) */ 124#define X86_FEATURE_XCRYPT (5*32+ 6) /* "ace" on-CPU crypto (xcrypt) */
102#define X86_FEATURE_XCRYPT_EN (5*32+ 7) /* on-CPU crypto enabled */ 125#define X86_FEATURE_XCRYPT_EN (5*32+ 7) /* "ace_en" on-CPU crypto enabled */
103#define X86_FEATURE_ACE2 (5*32+ 8) /* Advanced Cryptography Engine v2 */ 126#define X86_FEATURE_ACE2 (5*32+ 8) /* Advanced Cryptography Engine v2 */
104#define X86_FEATURE_ACE2_EN (5*32+ 9) /* ACE v2 enabled */ 127#define X86_FEATURE_ACE2_EN (5*32+ 9) /* ACE v2 enabled */
105#define X86_FEATURE_PHE (5*32+ 10) /* PadLock Hash Engine */ 128#define X86_FEATURE_PHE (5*32+10) /* PadLock Hash Engine */
106#define X86_FEATURE_PHE_EN (5*32+ 11) /* PHE enabled */ 129#define X86_FEATURE_PHE_EN (5*32+11) /* PHE enabled */
107#define X86_FEATURE_PMM (5*32+ 12) /* PadLock Montgomery Multiplier */ 130#define X86_FEATURE_PMM (5*32+12) /* PadLock Montgomery Multiplier */
108#define X86_FEATURE_PMM_EN (5*32+ 13) /* PMM enabled */ 131#define X86_FEATURE_PMM_EN (5*32+13) /* PMM enabled */
109 132
110/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ 133/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */
111#define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */ 134#define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */
112#define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */ 135#define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */
113#define X86_FEATURE_IBS (6*32+ 10) /* Instruction Based Sampling */ 136#define X86_FEATURE_SVM (6*32+ 2) /* Secure virtual machine */
137#define X86_FEATURE_EXTAPIC (6*32+ 3) /* Extended APIC space */
138#define X86_FEATURE_CR8_LEGACY (6*32+ 4) /* CR8 in 32-bit mode */
139#define X86_FEATURE_ABM (6*32+ 5) /* Advanced bit manipulation */
140#define X86_FEATURE_SSE4A (6*32+ 6) /* SSE-4A */
141#define X86_FEATURE_MISALIGNSSE (6*32+ 7) /* Misaligned SSE mode */
142#define X86_FEATURE_3DNOWPREFETCH (6*32+ 8) /* 3DNow prefetch instructions */
143#define X86_FEATURE_OSVW (6*32+ 9) /* OS Visible Workaround */
144#define X86_FEATURE_IBS (6*32+10) /* Instruction Based Sampling */
145#define X86_FEATURE_SSE5 (6*32+11) /* SSE-5 */
146#define X86_FEATURE_SKINIT (6*32+12) /* SKINIT/STGI instructions */
147#define X86_FEATURE_WDT (6*32+13) /* Watchdog timer */
114 148
115/* 149/*
116 * Auxiliary flags: Linux defined - For features scattered in various 150 * Auxiliary flags: Linux defined - For features scattered in various
@@ -118,6 +152,13 @@
118 */ 152 */
119#define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */ 153#define X86_FEATURE_IDA (7*32+ 0) /* Intel Dynamic Acceleration */
120 154
155/* Virtualization flags: Linux defined */
156#define X86_FEATURE_TPR_SHADOW (8*32+ 0) /* Intel TPR Shadow */
157#define X86_FEATURE_VNMI (8*32+ 1) /* Intel Virtual NMI */
158#define X86_FEATURE_FLEXPRIORITY (8*32+ 2) /* Intel FlexPriority */
159#define X86_FEATURE_EPT (8*32+ 3) /* Intel Extended Page Table */
160#define X86_FEATURE_VPID (8*32+ 4) /* Intel Virtual Processor ID */
161
121#if defined(__KERNEL__) && !defined(__ASSEMBLY__) 162#if defined(__KERNEL__) && !defined(__ASSEMBLY__)
122 163
123#include <linux/bitops.h> 164#include <linux/bitops.h>
@@ -151,7 +192,7 @@ extern const char * const x86_power_flags[32];
151} while (0) 192} while (0)
152#define setup_force_cpu_cap(bit) do { \ 193#define setup_force_cpu_cap(bit) do { \
153 set_cpu_cap(&boot_cpu_data, bit); \ 194 set_cpu_cap(&boot_cpu_data, bit); \
154 clear_bit(bit, (unsigned long *)cleared_cpu_caps); \ 195 clear_bit(bit, (unsigned long *)cleared_cpu_caps); \
155} while (0) 196} while (0)
156 197
157#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU) 198#define cpu_has_fpu boot_cpu_has(X86_FEATURE_FPU)
@@ -192,7 +233,10 @@ extern const char * const x86_power_flags[32];
192#define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES) 233#define cpu_has_gbpages boot_cpu_has(X86_FEATURE_GBPAGES)
193#define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) 234#define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON)
194#define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT) 235#define cpu_has_pat boot_cpu_has(X86_FEATURE_PAT)
236#define cpu_has_xmm4_1 boot_cpu_has(X86_FEATURE_XMM4_1)
195#define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2) 237#define cpu_has_xmm4_2 boot_cpu_has(X86_FEATURE_XMM4_2)
238#define cpu_has_x2apic boot_cpu_has(X86_FEATURE_X2APIC)
239#define cpu_has_xsave boot_cpu_has(X86_FEATURE_XSAVE)
196 240
197#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64) 241#if defined(CONFIG_X86_INVLPG) || defined(CONFIG_X86_64)
198# define cpu_has_invlpg 1 242# define cpu_has_invlpg 1
diff --git a/include/asm-x86/e820.h b/include/asm-x86/e820.h
index f52daf176bcb..5abbdec06bd2 100644
--- a/include/asm-x86/e820.h
+++ b/include/asm-x86/e820.h
@@ -43,6 +43,7 @@
43#define E820_RESERVED 2 43#define E820_RESERVED 2
44#define E820_ACPI 3 44#define E820_ACPI 3
45#define E820_NVS 4 45#define E820_NVS 4
46#define E820_UNUSABLE 5
46 47
47/* reserved RAM used by kernel itself */ 48/* reserved RAM used by kernel itself */
48#define E820_RESERVED_KERN 128 49#define E820_RESERVED_KERN 128
@@ -121,6 +122,7 @@ extern void e820_register_active_regions(int nid, unsigned long start_pfn,
121extern u64 e820_hole_size(u64 start, u64 end); 122extern u64 e820_hole_size(u64 start, u64 end);
122extern void finish_e820_parsing(void); 123extern void finish_e820_parsing(void);
123extern void e820_reserve_resources(void); 124extern void e820_reserve_resources(void);
125extern void e820_reserve_resources_late(void);
124extern void setup_memory_map(void); 126extern void setup_memory_map(void);
125extern char *default_machine_specific_memory_setup(void); 127extern char *default_machine_specific_memory_setup(void);
126extern char *machine_specific_memory_setup(void); 128extern char *machine_specific_memory_setup(void);
diff --git a/include/asm-x86/mach-es7000/mach_apic.h b/include/asm-x86/es7000/apic.h
index c1f6f682d619..bd2c44d1f7ac 100644
--- a/include/asm-x86/mach-es7000/mach_apic.h
+++ b/include/asm-x86/es7000/apic.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_ES7000__MACH_APIC_H 1#ifndef __ASM_ES7000_APIC_H
2#define ASM_X86__MACH_ES7000__MACH_APIC_H 2#define __ASM_ES7000_APIC_H
3 3
4#define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu) 4#define xapic_phys_to_log_apicid(cpu) per_cpu(x86_bios_cpu_apicid, cpu)
5#define esr_disable (1) 5#define esr_disable (1)
@@ -10,7 +10,7 @@ static inline int apic_id_registered(void)
10} 10}
11 11
12static inline cpumask_t target_cpus(void) 12static inline cpumask_t target_cpus(void)
13{ 13{
14#if defined CONFIG_ES7000_CLUSTERED_APIC 14#if defined CONFIG_ES7000_CLUSTERED_APIC
15 return CPU_MASK_ALL; 15 return CPU_MASK_ALL;
16#else 16#else
@@ -23,24 +23,24 @@ static inline cpumask_t target_cpus(void)
23#define APIC_DFR_VALUE (APIC_DFR_CLUSTER) 23#define APIC_DFR_VALUE (APIC_DFR_CLUSTER)
24#define INT_DELIVERY_MODE (dest_LowestPrio) 24#define INT_DELIVERY_MODE (dest_LowestPrio)
25#define INT_DEST_MODE (1) /* logical delivery broadcast to all procs */ 25#define INT_DEST_MODE (1) /* logical delivery broadcast to all procs */
26#define NO_BALANCE_IRQ (1) 26#define NO_BALANCE_IRQ (1)
27#undef WAKE_SECONDARY_VIA_INIT 27#undef WAKE_SECONDARY_VIA_INIT
28#define WAKE_SECONDARY_VIA_MIP 28#define WAKE_SECONDARY_VIA_MIP
29#else 29#else
30#define APIC_DFR_VALUE (APIC_DFR_FLAT) 30#define APIC_DFR_VALUE (APIC_DFR_FLAT)
31#define INT_DELIVERY_MODE (dest_Fixed) 31#define INT_DELIVERY_MODE (dest_Fixed)
32#define INT_DEST_MODE (0) /* phys delivery to target procs */ 32#define INT_DEST_MODE (0) /* phys delivery to target procs */
33#define NO_BALANCE_IRQ (0) 33#define NO_BALANCE_IRQ (0)
34#undef APIC_DEST_LOGICAL 34#undef APIC_DEST_LOGICAL
35#define APIC_DEST_LOGICAL 0x0 35#define APIC_DEST_LOGICAL 0x0
36#define WAKE_SECONDARY_VIA_INIT 36#define WAKE_SECONDARY_VIA_INIT
37#endif 37#endif
38 38
39static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) 39static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
40{ 40{
41 return 0; 41 return 0;
42} 42}
43static inline unsigned long check_apicid_present(int bit) 43static inline unsigned long check_apicid_present(int bit)
44{ 44{
45 return physid_isset(bit, phys_cpu_present_map); 45 return physid_isset(bit, phys_cpu_present_map);
46} 46}
@@ -80,7 +80,7 @@ static inline void setup_apic_routing(void)
80{ 80{
81 int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id()); 81 int apic = per_cpu(x86_bios_cpu_apicid, smp_processor_id());
82 printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n", 82 printk("Enabling APIC mode: %s. Using %d I/O APICs, target cpus %lx\n",
83 (apic_version[apic] == 0x14) ? 83 (apic_version[apic] == 0x14) ?
84 "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(TARGET_CPUS)[0]); 84 "Physical Cluster" : "Logical Cluster", nr_ioapics, cpus_addr(TARGET_CPUS)[0]);
85} 85}
86 86
@@ -141,7 +141,7 @@ static inline void setup_portio_remap(void)
141extern unsigned int boot_cpu_physical_apicid; 141extern unsigned int boot_cpu_physical_apicid;
142static inline int check_phys_apicid_present(int cpu_physical_apicid) 142static inline int check_phys_apicid_present(int cpu_physical_apicid)
143{ 143{
144 boot_cpu_physical_apicid = GET_APIC_ID(read_apic_id()); 144 boot_cpu_physical_apicid = read_apic_id();
145 return (1); 145 return (1);
146} 146}
147 147
@@ -150,7 +150,7 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
150 int num_bits_set; 150 int num_bits_set;
151 int cpus_found = 0; 151 int cpus_found = 0;
152 int cpu; 152 int cpu;
153 int apicid; 153 int apicid;
154 154
155 num_bits_set = cpus_weight(cpumask); 155 num_bits_set = cpus_weight(cpumask);
156 /* Return id to all */ 156 /* Return id to all */
@@ -160,16 +160,16 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
160#else 160#else
161 return cpu_to_logical_apicid(0); 161 return cpu_to_logical_apicid(0);
162#endif 162#endif
163 /* 163 /*
164 * The cpus in the mask must all be on the apic cluster. If are not 164 * The cpus in the mask must all be on the apic cluster. If are not
165 * on the same apicid cluster return default value of TARGET_CPUS. 165 * on the same apicid cluster return default value of TARGET_CPUS.
166 */ 166 */
167 cpu = first_cpu(cpumask); 167 cpu = first_cpu(cpumask);
168 apicid = cpu_to_logical_apicid(cpu); 168 apicid = cpu_to_logical_apicid(cpu);
169 while (cpus_found < num_bits_set) { 169 while (cpus_found < num_bits_set) {
170 if (cpu_isset(cpu, cpumask)) { 170 if (cpu_isset(cpu, cpumask)) {
171 int new_apicid = cpu_to_logical_apicid(cpu); 171 int new_apicid = cpu_to_logical_apicid(cpu);
172 if (apicid_cluster(apicid) != 172 if (apicid_cluster(apicid) !=
173 apicid_cluster(new_apicid)){ 173 apicid_cluster(new_apicid)){
174 printk ("%s: Not a valid mask!\n",__FUNCTION__); 174 printk ("%s: Not a valid mask!\n",__FUNCTION__);
175#if defined CONFIG_ES7000_CLUSTERED_APIC 175#if defined CONFIG_ES7000_CLUSTERED_APIC
@@ -191,4 +191,4 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
191 return cpuid_apic >> index_msb; 191 return cpuid_apic >> index_msb;
192} 192}
193 193
194#endif /* ASM_X86__MACH_ES7000__MACH_APIC_H */ 194#endif /* __ASM_ES7000_APIC_H */
diff --git a/include/asm-x86/es7000/apicdef.h b/include/asm-x86/es7000/apicdef.h
new file mode 100644
index 000000000000..8b234a3cb851
--- /dev/null
+++ b/include/asm-x86/es7000/apicdef.h
@@ -0,0 +1,13 @@
1#ifndef __ASM_ES7000_APICDEF_H
2#define __ASM_ES7000_APICDEF_H
3
4#define APIC_ID_MASK (0xFF<<24)
5
6static inline unsigned get_apic_id(unsigned long x)
7{
8 return (((x)>>24)&0xFF);
9}
10
11#define GET_APIC_ID(x) get_apic_id(x)
12
13#endif
diff --git a/include/asm-x86/mach-es7000/mach_ipi.h b/include/asm-x86/es7000/ipi.h
index 3a21240e03dc..632a955fcc0a 100644
--- a/include/asm-x86/mach-es7000/mach_ipi.h
+++ b/include/asm-x86/es7000/ipi.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_ES7000__MACH_IPI_H 1#ifndef __ASM_ES7000_IPI_H
2#define ASM_X86__MACH_ES7000__MACH_IPI_H 2#define __ASM_ES7000_IPI_H
3 3
4void send_IPI_mask_sequence(cpumask_t mask, int vector); 4void send_IPI_mask_sequence(cpumask_t mask, int vector);
5 5
@@ -21,4 +21,4 @@ static inline void send_IPI_all(int vector)
21 send_IPI_mask(cpu_online_map, vector); 21 send_IPI_mask(cpu_online_map, vector);
22} 22}
23 23
24#endif /* ASM_X86__MACH_ES7000__MACH_IPI_H */ 24#endif /* __ASM_ES7000_IPI_H */
diff --git a/include/asm-x86/mach-es7000/mach_mpparse.h b/include/asm-x86/es7000/mpparse.h
index befde24705b7..7b5c889d8e7d 100644
--- a/include/asm-x86/mach-es7000/mach_mpparse.h
+++ b/include/asm-x86/es7000/mpparse.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_ES7000__MACH_MPPARSE_H 1#ifndef __ASM_ES7000_MPPARSE_H
2#define ASM_X86__MACH_ES7000__MACH_MPPARSE_H 2#define __ASM_ES7000_MPPARSE_H
3 3
4#include <linux/acpi.h> 4#include <linux/acpi.h>
5 5
@@ -26,4 +26,4 @@ static inline int es7000_check_dsdt(void)
26} 26}
27#endif 27#endif
28 28
29#endif /* ASM_X86__MACH_ES7000__MACH_MPPARSE_H */ 29#endif /* __ASM_MACH_MPPARSE_H */
diff --git a/include/asm-x86/mach-es7000/mach_wakecpu.h b/include/asm-x86/es7000/wakecpu.h
index 97c776ce13f2..3ffc5a7bf667 100644
--- a/include/asm-x86/mach-es7000/mach_wakecpu.h
+++ b/include/asm-x86/es7000/wakecpu.h
@@ -1,7 +1,7 @@
1#ifndef ASM_X86__MACH_ES7000__MACH_WAKECPU_H 1#ifndef __ASM_ES7000_WAKECPU_H
2#define ASM_X86__MACH_ES7000__MACH_WAKECPU_H 2#define __ASM_ES7000_WAKECPU_H
3 3
4/* 4/*
5 * This file copes with machines that wakeup secondary CPUs by the 5 * This file copes with machines that wakeup secondary CPUs by the
6 * INIT, INIT, STARTUP sequence. 6 * INIT, INIT, STARTUP sequence.
7 */ 7 */
@@ -56,4 +56,4 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
56 #define inquire_remote_apic(apicid) {} 56 #define inquire_remote_apic(apicid) {}
57#endif 57#endif
58 58
59#endif /* ASM_X86__MACH_ES7000__MACH_WAKECPU_H */ 59#endif /* __ASM_MACH_WAKECPU_H */
diff --git a/include/asm-x86/genapic_64.h b/include/asm-x86/genapic_64.h
index 25097a8cc5ef..ed6a4886c082 100644
--- a/include/asm-x86/genapic_64.h
+++ b/include/asm-x86/genapic_64.h
@@ -14,6 +14,7 @@
14 14
15struct genapic { 15struct genapic {
16 char *name; 16 char *name;
17 int (*acpi_madt_oem_check)(char *oem_id, char *oem_table_id);
17 u32 int_delivery_mode; 18 u32 int_delivery_mode;
18 u32 int_dest_mode; 19 u32 int_dest_mode;
19 int (*apic_id_registered)(void); 20 int (*apic_id_registered)(void);
@@ -24,17 +25,24 @@ struct genapic {
24 void (*send_IPI_mask)(cpumask_t mask, int vector); 25 void (*send_IPI_mask)(cpumask_t mask, int vector);
25 void (*send_IPI_allbutself)(int vector); 26 void (*send_IPI_allbutself)(int vector);
26 void (*send_IPI_all)(int vector); 27 void (*send_IPI_all)(int vector);
28 void (*send_IPI_self)(int vector);
27 /* */ 29 /* */
28 unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask); 30 unsigned int (*cpu_mask_to_apicid)(cpumask_t cpumask);
29 unsigned int (*phys_pkg_id)(int index_msb); 31 unsigned int (*phys_pkg_id)(int index_msb);
32 unsigned int (*get_apic_id)(unsigned long x);
33 unsigned long (*set_apic_id)(unsigned int id);
34 unsigned long apic_id_mask;
30}; 35};
31 36
32extern struct genapic *genapic; 37extern struct genapic *genapic;
33 38
34extern struct genapic apic_flat; 39extern struct genapic apic_flat;
35extern struct genapic apic_physflat; 40extern struct genapic apic_physflat;
41extern struct genapic apic_x2apic_cluster;
42extern struct genapic apic_x2apic_phys;
36extern int acpi_madt_oem_check(char *, char *); 43extern int acpi_madt_oem_check(char *, char *);
37 44
45extern void apic_send_IPI_self(int vector);
38enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; 46enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC};
39extern enum uv_system_type get_uv_system_type(void); 47extern enum uv_system_type get_uv_system_type(void);
40extern int is_uv_system(void); 48extern int is_uv_system(void);
diff --git a/include/asm-x86/hw_irq.h b/include/asm-x86/hw_irq.h
index 65997b15d56a..50f6e0316b50 100644
--- a/include/asm-x86/hw_irq.h
+++ b/include/asm-x86/hw_irq.h
@@ -64,7 +64,6 @@ extern unsigned long io_apic_irqs;
64extern void init_VISWS_APIC_irqs(void); 64extern void init_VISWS_APIC_irqs(void);
65extern void setup_IO_APIC(void); 65extern void setup_IO_APIC(void);
66extern void disable_IO_APIC(void); 66extern void disable_IO_APIC(void);
67extern void print_IO_APIC(void);
68extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn); 67extern int IO_APIC_get_PCI_irq_vector(int bus, int slot, int fn);
69extern void setup_ioapic_dest(void); 68extern void setup_ioapic_dest(void);
70 69
@@ -73,7 +72,9 @@ extern void enable_IO_APIC(void);
73#endif 72#endif
74 73
75/* IPI functions */ 74/* IPI functions */
75#ifdef CONFIG_X86_32
76extern void send_IPI_self(int vector); 76extern void send_IPI_self(int vector);
77#endif
77extern void send_IPI(int dest, int vector); 78extern void send_IPI(int dest, int vector);
78 79
79/* Statistics */ 80/* Statistics */
diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h
index 1ecdc3ed96e4..9ba862a4eac0 100644
--- a/include/asm-x86/i387.h
+++ b/include/asm-x86/i387.h
@@ -19,7 +19,9 @@
19#include <asm/sigcontext.h> 19#include <asm/sigcontext.h>
20#include <asm/user.h> 20#include <asm/user.h>
21#include <asm/uaccess.h> 21#include <asm/uaccess.h>
22#include <asm/xsave.h>
22 23
24extern unsigned int sig_xstate_size;
23extern void fpu_init(void); 25extern void fpu_init(void);
24extern void mxcsr_feature_mask_init(void); 26extern void mxcsr_feature_mask_init(void);
25extern int init_fpu(struct task_struct *child); 27extern int init_fpu(struct task_struct *child);
@@ -31,12 +33,18 @@ extern user_regset_active_fn fpregs_active, xfpregs_active;
31extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get; 33extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get;
32extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set; 34extern user_regset_set_fn fpregs_set, xfpregs_set, fpregs_soft_set;
33 35
36extern struct _fpx_sw_bytes fx_sw_reserved;
34#ifdef CONFIG_IA32_EMULATION 37#ifdef CONFIG_IA32_EMULATION
38extern unsigned int sig_xstate_ia32_size;
39extern struct _fpx_sw_bytes fx_sw_reserved_ia32;
35struct _fpstate_ia32; 40struct _fpstate_ia32;
36extern int save_i387_ia32(struct _fpstate_ia32 __user *buf); 41struct _xstate_ia32;
37extern int restore_i387_ia32(struct _fpstate_ia32 __user *buf); 42extern int save_i387_xstate_ia32(void __user *buf);
43extern int restore_i387_xstate_ia32(void __user *buf);
38#endif 44#endif
39 45
46#define X87_FSW_ES (1 << 7) /* Exception Summary */
47
40#ifdef CONFIG_X86_64 48#ifdef CONFIG_X86_64
41 49
42/* Ignore delayed exceptions from user space */ 50/* Ignore delayed exceptions from user space */
@@ -47,7 +55,7 @@ static inline void tolerant_fwait(void)
47 _ASM_EXTABLE(1b, 2b)); 55 _ASM_EXTABLE(1b, 2b));
48} 56}
49 57
50static inline int restore_fpu_checking(struct i387_fxsave_struct *fx) 58static inline int fxrstor_checking(struct i387_fxsave_struct *fx)
51{ 59{
52 int err; 60 int err;
53 61
@@ -67,15 +75,31 @@ static inline int restore_fpu_checking(struct i387_fxsave_struct *fx)
67 return err; 75 return err;
68} 76}
69 77
70#define X87_FSW_ES (1 << 7) /* Exception Summary */ 78static inline int restore_fpu_checking(struct task_struct *tsk)
79{
80 if (task_thread_info(tsk)->status & TS_XSAVE)
81 return xrstor_checking(&tsk->thread.xstate->xsave);
82 else
83 return fxrstor_checking(&tsk->thread.xstate->fxsave);
84}
71 85
72/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception 86/* AMD CPUs don't save/restore FDP/FIP/FOP unless an exception
73 is pending. Clear the x87 state here by setting it to fixed 87 is pending. Clear the x87 state here by setting it to fixed
74 values. The kernel data segment can be sometimes 0 and sometimes 88 values. The kernel data segment can be sometimes 0 and sometimes
75 new user value. Both should be ok. 89 new user value. Both should be ok.
76 Use the PDA as safe address because it should be already in L1. */ 90 Use the PDA as safe address because it should be already in L1. */
77static inline void clear_fpu_state(struct i387_fxsave_struct *fx) 91static inline void clear_fpu_state(struct task_struct *tsk)
78{ 92{
93 struct xsave_struct *xstate = &tsk->thread.xstate->xsave;
94 struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
95
96 /*
97 * xsave header may indicate the init state of the FP.
98 */
99 if ((task_thread_info(tsk)->status & TS_XSAVE) &&
100 !(xstate->xsave_hdr.xstate_bv & XSTATE_FP))
101 return;
102
79 if (unlikely(fx->swd & X87_FSW_ES)) 103 if (unlikely(fx->swd & X87_FSW_ES))
80 asm volatile("fnclex"); 104 asm volatile("fnclex");
81 alternative_input(ASM_NOP8 ASM_NOP2, 105 alternative_input(ASM_NOP8 ASM_NOP2,
@@ -84,7 +108,7 @@ static inline void clear_fpu_state(struct i387_fxsave_struct *fx)
84 X86_FEATURE_FXSAVE_LEAK); 108 X86_FEATURE_FXSAVE_LEAK);
85} 109}
86 110
87static inline int save_i387_checking(struct i387_fxsave_struct __user *fx) 111static inline int fxsave_user(struct i387_fxsave_struct __user *fx)
88{ 112{
89 int err; 113 int err;
90 114
@@ -108,7 +132,7 @@ static inline int save_i387_checking(struct i387_fxsave_struct __user *fx)
108 return err; 132 return err;
109} 133}
110 134
111static inline void __save_init_fpu(struct task_struct *tsk) 135static inline void fxsave(struct task_struct *tsk)
112{ 136{
113 /* Using "rex64; fxsave %0" is broken because, if the memory operand 137 /* Using "rex64; fxsave %0" is broken because, if the memory operand
114 uses any extended registers for addressing, a second REX prefix 138 uses any extended registers for addressing, a second REX prefix
@@ -133,7 +157,16 @@ static inline void __save_init_fpu(struct task_struct *tsk)
133 : "=m" (tsk->thread.xstate->fxsave) 157 : "=m" (tsk->thread.xstate->fxsave)
134 : "cdaSDb" (&tsk->thread.xstate->fxsave)); 158 : "cdaSDb" (&tsk->thread.xstate->fxsave));
135#endif 159#endif
136 clear_fpu_state(&tsk->thread.xstate->fxsave); 160}
161
162static inline void __save_init_fpu(struct task_struct *tsk)
163{
164 if (task_thread_info(tsk)->status & TS_XSAVE)
165 xsave(tsk);
166 else
167 fxsave(tsk);
168
169 clear_fpu_state(tsk);
137 task_thread_info(tsk)->status &= ~TS_USEDFPU; 170 task_thread_info(tsk)->status &= ~TS_USEDFPU;
138} 171}
139 172
@@ -148,6 +181,10 @@ static inline void tolerant_fwait(void)
148 181
149static inline void restore_fpu(struct task_struct *tsk) 182static inline void restore_fpu(struct task_struct *tsk)
150{ 183{
184 if (task_thread_info(tsk)->status & TS_XSAVE) {
185 xrstor_checking(&tsk->thread.xstate->xsave);
186 return;
187 }
151 /* 188 /*
152 * The "nop" is needed to make the instructions the same 189 * The "nop" is needed to make the instructions the same
153 * length. 190 * length.
@@ -173,6 +210,27 @@ static inline void restore_fpu(struct task_struct *tsk)
173 */ 210 */
174static inline void __save_init_fpu(struct task_struct *tsk) 211static inline void __save_init_fpu(struct task_struct *tsk)
175{ 212{
213 if (task_thread_info(tsk)->status & TS_XSAVE) {
214 struct xsave_struct *xstate = &tsk->thread.xstate->xsave;
215 struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave;
216
217 xsave(tsk);
218
219 /*
220 * xsave header may indicate the init state of the FP.
221 */
222 if (!(xstate->xsave_hdr.xstate_bv & XSTATE_FP))
223 goto end;
224
225 if (unlikely(fx->swd & X87_FSW_ES))
226 asm volatile("fnclex");
227
228 /*
229 * we can do a simple return here or be paranoid :)
230 */
231 goto clear_state;
232 }
233
176 /* Use more nops than strictly needed in case the compiler 234 /* Use more nops than strictly needed in case the compiler
177 varies code */ 235 varies code */
178 alternative_input( 236 alternative_input(
@@ -182,6 +240,7 @@ static inline void __save_init_fpu(struct task_struct *tsk)
182 X86_FEATURE_FXSR, 240 X86_FEATURE_FXSR,
183 [fx] "m" (tsk->thread.xstate->fxsave), 241 [fx] "m" (tsk->thread.xstate->fxsave),
184 [fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory"); 242 [fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory");
243clear_state:
185 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception 244 /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception
186 is pending. Clear the x87 state here by setting it to fixed 245 is pending. Clear the x87 state here by setting it to fixed
187 values. safe_address is a random variable that should be in L1 */ 246 values. safe_address is a random variable that should be in L1 */
@@ -191,16 +250,17 @@ static inline void __save_init_fpu(struct task_struct *tsk)
191 "fildl %[addr]", /* set F?P to defined value */ 250 "fildl %[addr]", /* set F?P to defined value */
192 X86_FEATURE_FXSAVE_LEAK, 251 X86_FEATURE_FXSAVE_LEAK,
193 [addr] "m" (safe_address)); 252 [addr] "m" (safe_address));
253end:
194 task_thread_info(tsk)->status &= ~TS_USEDFPU; 254 task_thread_info(tsk)->status &= ~TS_USEDFPU;
195} 255}
196 256
257#endif /* CONFIG_X86_64 */
258
197/* 259/*
198 * Signal frame handlers... 260 * Signal frame handlers...
199 */ 261 */
200extern int save_i387(struct _fpstate __user *buf); 262extern int save_i387_xstate(void __user *buf);
201extern int restore_i387(struct _fpstate __user *buf); 263extern int restore_i387_xstate(void __user *buf);
202
203#endif /* CONFIG_X86_64 */
204 264
205static inline void __unlazy_fpu(struct task_struct *tsk) 265static inline void __unlazy_fpu(struct task_struct *tsk)
206{ 266{
diff --git a/include/asm-x86/i8259.h b/include/asm-x86/i8259.h
index c586559a6957..23c1b3baaecd 100644
--- a/include/asm-x86/i8259.h
+++ b/include/asm-x86/i8259.h
@@ -57,4 +57,7 @@ static inline void outb_pic(unsigned char value, unsigned int port)
57 57
58extern struct irq_chip i8259A_chip; 58extern struct irq_chip i8259A_chip;
59 59
60extern void mask_8259A(void);
61extern void unmask_8259A(void);
62
60#endif /* ASM_X86__I8259_H */ 63#endif /* ASM_X86__I8259_H */
diff --git a/include/asm-x86/io_apic.h b/include/asm-x86/io_apic.h
index be62847ab07e..8ec68a50cf10 100644
--- a/include/asm-x86/io_apic.h
+++ b/include/asm-x86/io_apic.h
@@ -107,6 +107,20 @@ struct IO_APIC_route_entry {
107 107
108} __attribute__ ((packed)); 108} __attribute__ ((packed));
109 109
110struct IR_IO_APIC_route_entry {
111 __u64 vector : 8,
112 zero : 3,
113 index2 : 1,
114 delivery_status : 1,
115 polarity : 1,
116 irr : 1,
117 trigger : 1,
118 mask : 1,
119 reserved : 31,
120 format : 1,
121 index : 15;
122} __attribute__ ((packed));
123
110#ifdef CONFIG_X86_IO_APIC 124#ifdef CONFIG_X86_IO_APIC
111 125
112/* 126/*
@@ -183,6 +197,12 @@ extern int io_apic_set_pci_routing(int ioapic, int pin, int irq,
183extern int (*ioapic_renumber_irq)(int ioapic, int irq); 197extern int (*ioapic_renumber_irq)(int ioapic, int irq);
184extern void ioapic_init_mappings(void); 198extern void ioapic_init_mappings(void);
185 199
200#ifdef CONFIG_X86_64
201extern int save_mask_IO_APIC_setup(void);
202extern void restore_IO_APIC_setup(void);
203extern void reinit_intr_remapped_IO_APIC(int);
204#endif
205
186#else /* !CONFIG_X86_IO_APIC */ 206#else /* !CONFIG_X86_IO_APIC */
187#define io_apic_assign_pci_irqs 0 207#define io_apic_assign_pci_irqs 0
188static const int timer_through_8259 = 0; 208static const int timer_through_8259 = 0;
diff --git a/include/asm-x86/ipi.h b/include/asm-x86/ipi.h
index c1b226797518..30a692cfaff8 100644
--- a/include/asm-x86/ipi.h
+++ b/include/asm-x86/ipi.h
@@ -49,6 +49,12 @@ static inline int __prepare_ICR2(unsigned int mask)
49 return SET_APIC_DEST_FIELD(mask); 49 return SET_APIC_DEST_FIELD(mask);
50} 50}
51 51
52static inline void __xapic_wait_icr_idle(void)
53{
54 while (native_apic_mem_read(APIC_ICR) & APIC_ICR_BUSY)
55 cpu_relax();
56}
57
52static inline void __send_IPI_shortcut(unsigned int shortcut, int vector, 58static inline void __send_IPI_shortcut(unsigned int shortcut, int vector,
53 unsigned int dest) 59 unsigned int dest)
54{ 60{
@@ -64,7 +70,7 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector,
64 /* 70 /*
65 * Wait for idle. 71 * Wait for idle.
66 */ 72 */
67 apic_wait_icr_idle(); 73 __xapic_wait_icr_idle();
68 74
69 /* 75 /*
70 * No need to touch the target chip field 76 * No need to touch the target chip field
@@ -74,7 +80,7 @@ static inline void __send_IPI_shortcut(unsigned int shortcut, int vector,
74 /* 80 /*
75 * Send the IPI. The write to APIC_ICR fires this off. 81 * Send the IPI. The write to APIC_ICR fires this off.
76 */ 82 */
77 apic_write(APIC_ICR, cfg); 83 native_apic_mem_write(APIC_ICR, cfg);
78} 84}
79 85
80/* 86/*
@@ -92,13 +98,13 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector,
92 if (unlikely(vector == NMI_VECTOR)) 98 if (unlikely(vector == NMI_VECTOR))
93 safe_apic_wait_icr_idle(); 99 safe_apic_wait_icr_idle();
94 else 100 else
95 apic_wait_icr_idle(); 101 __xapic_wait_icr_idle();
96 102
97 /* 103 /*
98 * prepare target chip field 104 * prepare target chip field
99 */ 105 */
100 cfg = __prepare_ICR2(mask); 106 cfg = __prepare_ICR2(mask);
101 apic_write(APIC_ICR2, cfg); 107 native_apic_mem_write(APIC_ICR2, cfg);
102 108
103 /* 109 /*
104 * program the ICR 110 * program the ICR
@@ -108,7 +114,7 @@ static inline void __send_IPI_dest_field(unsigned int mask, int vector,
108 /* 114 /*
109 * Send the IPI. The write to APIC_ICR fires this off. 115 * Send the IPI. The write to APIC_ICR fires this off.
110 */ 116 */
111 apic_write(APIC_ICR, cfg); 117 native_apic_mem_write(APIC_ICR, cfg);
112} 118}
113 119
114static inline void send_IPI_mask_sequence(cpumask_t mask, int vector) 120static inline void send_IPI_mask_sequence(cpumask_t mask, int vector)
diff --git a/include/asm-x86/irq_remapping.h b/include/asm-x86/irq_remapping.h
new file mode 100644
index 000000000000..78242c6ffa58
--- /dev/null
+++ b/include/asm-x86/irq_remapping.h
@@ -0,0 +1,8 @@
1#ifndef _ASM_IRQ_REMAPPING_H
2#define _ASM_IRQ_REMAPPING_H
3
4extern int x2apic;
5
6#define IRTE_DEST(dest) ((x2apic) ? dest : dest << 8)
7
8#endif
diff --git a/include/asm-x86/mach-bigsmp/mach_apicdef.h b/include/asm-x86/mach-bigsmp/mach_apicdef.h
deleted file mode 100644
index 811935d9d49b..000000000000
--- a/include/asm-x86/mach-bigsmp/mach_apicdef.h
+++ /dev/null
@@ -1,13 +0,0 @@
1#ifndef ASM_X86__MACH_BIGSMP__MACH_APICDEF_H
2#define ASM_X86__MACH_BIGSMP__MACH_APICDEF_H
3
4#define APIC_ID_MASK (0xFF<<24)
5
6static inline unsigned get_apic_id(unsigned long x)
7{
8 return (((x)>>24)&0xFF);
9}
10
11#define GET_APIC_ID(x) get_apic_id(x)
12
13#endif /* ASM_X86__MACH_BIGSMP__MACH_APICDEF_H */
diff --git a/include/asm-x86/mach-default/mach_apic.h b/include/asm-x86/mach-default/mach_apic.h
index b615f40736be..2a330a41b3dd 100644
--- a/include/asm-x86/mach-default/mach_apic.h
+++ b/include/asm-x86/mach-default/mach_apic.h
@@ -30,6 +30,8 @@ static inline cpumask_t target_cpus(void)
30#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid) 30#define cpu_mask_to_apicid (genapic->cpu_mask_to_apicid)
31#define phys_pkg_id (genapic->phys_pkg_id) 31#define phys_pkg_id (genapic->phys_pkg_id)
32#define vector_allocation_domain (genapic->vector_allocation_domain) 32#define vector_allocation_domain (genapic->vector_allocation_domain)
33#define read_apic_id() (GET_APIC_ID(apic_read(APIC_ID)))
34#define send_IPI_self (genapic->send_IPI_self)
33extern void setup_apic_routing(void); 35extern void setup_apic_routing(void);
34#else 36#else
35#define INT_DELIVERY_MODE dest_LowestPrio 37#define INT_DELIVERY_MODE dest_LowestPrio
@@ -54,7 +56,7 @@ static inline void init_apic_ldr(void)
54 56
55static inline int apic_id_registered(void) 57static inline int apic_id_registered(void)
56{ 58{
57 return physid_isset(GET_APIC_ID(read_apic_id()), phys_cpu_present_map); 59 return physid_isset(read_apic_id(), phys_cpu_present_map);
58} 60}
59 61
60static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask) 62static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
diff --git a/include/asm-x86/mach-default/mach_apicdef.h b/include/asm-x86/mach-default/mach_apicdef.h
index 936704f816d6..0c2d41c41b20 100644
--- a/include/asm-x86/mach-default/mach_apicdef.h
+++ b/include/asm-x86/mach-default/mach_apicdef.h
@@ -4,9 +4,9 @@
4#include <asm/apic.h> 4#include <asm/apic.h>
5 5
6#ifdef CONFIG_X86_64 6#ifdef CONFIG_X86_64
7#define APIC_ID_MASK (0xFFu<<24) 7#define APIC_ID_MASK (genapic->apic_id_mask)
8#define GET_APIC_ID(x) (((x)>>24)&0xFFu) 8#define GET_APIC_ID(x) (genapic->get_apic_id(x))
9#define SET_APIC_ID(x) (((x)<<24)) 9#define SET_APIC_ID(x) (genapic->set_apic_id(x))
10#else 10#else
11#define APIC_ID_MASK (0xF<<24) 11#define APIC_ID_MASK (0xF<<24)
12static inline unsigned get_apic_id(unsigned long x) 12static inline unsigned get_apic_id(unsigned long x)
diff --git a/include/asm-x86/mach-es7000/mach_apicdef.h b/include/asm-x86/mach-es7000/mach_apicdef.h
deleted file mode 100644
index a07e56744028..000000000000
--- a/include/asm-x86/mach-es7000/mach_apicdef.h
+++ /dev/null
@@ -1,13 +0,0 @@
1#ifndef ASM_X86__MACH_ES7000__MACH_APICDEF_H
2#define ASM_X86__MACH_ES7000__MACH_APICDEF_H
3
4#define APIC_ID_MASK (0xFF<<24)
5
6static inline unsigned get_apic_id(unsigned long x)
7{
8 return (((x)>>24)&0xFF);
9}
10
11#define GET_APIC_ID(x) get_apic_id(x)
12
13#endif /* ASM_X86__MACH_ES7000__MACH_APICDEF_H */
diff --git a/include/asm-x86/mach-numaq/mach_mpparse.h b/include/asm-x86/mach-numaq/mach_mpparse.h
deleted file mode 100644
index 74ade184920b..000000000000
--- a/include/asm-x86/mach-numaq/mach_mpparse.h
+++ /dev/null
@@ -1,7 +0,0 @@
1#ifndef ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H
2#define ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H
3
4extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
5 char *productid);
6
7#endif /* ASM_X86__MACH_NUMAQ__MACH_MPPARSE_H */
diff --git a/include/asm-x86/mach-summit/mach_apicdef.h b/include/asm-x86/mach-summit/mach_apicdef.h
deleted file mode 100644
index d4bc8590c4f6..000000000000
--- a/include/asm-x86/mach-summit/mach_apicdef.h
+++ /dev/null
@@ -1,13 +0,0 @@
1#ifndef ASM_X86__MACH_SUMMIT__MACH_APICDEF_H
2#define ASM_X86__MACH_SUMMIT__MACH_APICDEF_H
3
4#define APIC_ID_MASK (0xFF<<24)
5
6static inline unsigned get_apic_id(unsigned long x)
7{
8 return (((x)>>24)&0xFF);
9}
10
11#define GET_APIC_ID(x) get_apic_id(x)
12
13#endif /* ASM_X86__MACH_SUMMIT__MACH_APICDEF_H */
diff --git a/include/asm-x86/mpspec.h b/include/asm-x86/mpspec.h
index 118da365e371..be2241a818f1 100644
--- a/include/asm-x86/mpspec.h
+++ b/include/asm-x86/mpspec.h
@@ -5,11 +5,12 @@
5 5
6#include <asm/mpspec_def.h> 6#include <asm/mpspec_def.h>
7 7
8extern int apic_version[MAX_APICS];
9
8#ifdef CONFIG_X86_32 10#ifdef CONFIG_X86_32
9#include <mach_mpspec.h> 11#include <mach_mpspec.h>
10 12
11extern unsigned int def_to_bigsmp; 13extern unsigned int def_to_bigsmp;
12extern int apic_version[MAX_APICS];
13extern u8 apicid_2_node[]; 14extern u8 apicid_2_node[];
14extern int pic_mode; 15extern int pic_mode;
15 16
diff --git a/include/asm-x86/msidef.h b/include/asm-x86/msidef.h
index 3139666a94fa..ed9190246876 100644
--- a/include/asm-x86/msidef.h
+++ b/include/asm-x86/msidef.h
@@ -48,4 +48,8 @@
48#define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \ 48#define MSI_ADDR_DEST_ID(dest) (((dest) << MSI_ADDR_DEST_ID_SHIFT) & \
49 MSI_ADDR_DEST_ID_MASK) 49 MSI_ADDR_DEST_ID_MASK)
50 50
51#define MSI_ADDR_IR_EXT_INT (1 << 4)
52#define MSI_ADDR_IR_SHV (1 << 3)
53#define MSI_ADDR_IR_INDEX1(index) ((index & 0x8000) >> 13)
54#define MSI_ADDR_IR_INDEX2(index) ((index & 0x7fff) << 5)
51#endif /* ASM_X86__MSIDEF_H */ 55#endif /* ASM_X86__MSIDEF_H */
diff --git a/include/asm-x86/msr-index.h b/include/asm-x86/msr-index.h
index 3052f058ab06..0bb43301a202 100644
--- a/include/asm-x86/msr-index.h
+++ b/include/asm-x86/msr-index.h
@@ -176,6 +176,7 @@
176#define MSR_IA32_TSC 0x00000010 176#define MSR_IA32_TSC 0x00000010
177#define MSR_IA32_PLATFORM_ID 0x00000017 177#define MSR_IA32_PLATFORM_ID 0x00000017
178#define MSR_IA32_EBL_CR_POWERON 0x0000002a 178#define MSR_IA32_EBL_CR_POWERON 0x0000002a
179#define MSR_IA32_FEATURE_CONTROL 0x0000003a
179 180
180#define MSR_IA32_APICBASE 0x0000001b 181#define MSR_IA32_APICBASE 0x0000001b
181#define MSR_IA32_APICBASE_BSP (1<<8) 182#define MSR_IA32_APICBASE_BSP (1<<8)
@@ -310,4 +311,19 @@
310/* Geode defined MSRs */ 311/* Geode defined MSRs */
311#define MSR_GEODE_BUSCONT_CONF0 0x00001900 312#define MSR_GEODE_BUSCONT_CONF0 0x00001900
312 313
314/* Intel VT MSRs */
315#define MSR_IA32_VMX_BASIC 0x00000480
316#define MSR_IA32_VMX_PINBASED_CTLS 0x00000481
317#define MSR_IA32_VMX_PROCBASED_CTLS 0x00000482
318#define MSR_IA32_VMX_EXIT_CTLS 0x00000483
319#define MSR_IA32_VMX_ENTRY_CTLS 0x00000484
320#define MSR_IA32_VMX_MISC 0x00000485
321#define MSR_IA32_VMX_CR0_FIXED0 0x00000486
322#define MSR_IA32_VMX_CR0_FIXED1 0x00000487
323#define MSR_IA32_VMX_CR4_FIXED0 0x00000488
324#define MSR_IA32_VMX_CR4_FIXED1 0x00000489
325#define MSR_IA32_VMX_VMCS_ENUM 0x0000048a
326#define MSR_IA32_VMX_PROCBASED_CTLS2 0x0000048b
327#define MSR_IA32_VMX_EPT_VPID_CAP 0x0000048c
328
313#endif /* ASM_X86__MSR_INDEX_H */ 329#endif /* ASM_X86__MSR_INDEX_H */
diff --git a/include/asm-x86/mach-numaq/mach_apic.h b/include/asm-x86/numaq/apic.h
index 7a0d39edfcfa..a8344ba6ea15 100644
--- a/include/asm-x86/mach-numaq/mach_apic.h
+++ b/include/asm-x86/numaq/apic.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_NUMAQ__MACH_APIC_H 1#ifndef __ASM_NUMAQ_APIC_H
2#define ASM_X86__MACH_NUMAQ__MACH_APIC_H 2#define __ASM_NUMAQ_APIC_H
3 3
4#include <asm/io.h> 4#include <asm/io.h>
5#include <linux/mmzone.h> 5#include <linux/mmzone.h>
@@ -135,4 +135,4 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
135 return cpuid_apic >> index_msb; 135 return cpuid_apic >> index_msb;
136} 136}
137 137
138#endif /* ASM_X86__MACH_NUMAQ__MACH_APIC_H */ 138#endif /* __ASM_NUMAQ_APIC_H */
diff --git a/include/asm-x86/mach-numaq/mach_apicdef.h b/include/asm-x86/numaq/apicdef.h
index f870ec5f7782..e012a46cc22a 100644
--- a/include/asm-x86/mach-numaq/mach_apicdef.h
+++ b/include/asm-x86/numaq/apicdef.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_NUMAQ__MACH_APICDEF_H 1#ifndef __ASM_NUMAQ_APICDEF_H
2#define ASM_X86__MACH_NUMAQ__MACH_APICDEF_H 2#define __ASM_NUMAQ_APICDEF_H
3 3
4 4
5#define APIC_ID_MASK (0xF<<24) 5#define APIC_ID_MASK (0xF<<24)
@@ -11,4 +11,4 @@ static inline unsigned get_apic_id(unsigned long x)
11 11
12#define GET_APIC_ID(x) get_apic_id(x) 12#define GET_APIC_ID(x) get_apic_id(x)
13 13
14#endif /* ASM_X86__MACH_NUMAQ__MACH_APICDEF_H */ 14#endif
diff --git a/include/asm-x86/mach-numaq/mach_ipi.h b/include/asm-x86/numaq/ipi.h
index 1e835823f4bc..935588d286cf 100644
--- a/include/asm-x86/mach-numaq/mach_ipi.h
+++ b/include/asm-x86/numaq/ipi.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_NUMAQ__MACH_IPI_H 1#ifndef __ASM_NUMAQ_IPI_H
2#define ASM_X86__MACH_NUMAQ__MACH_IPI_H 2#define __ASM_NUMAQ_IPI_H
3 3
4void send_IPI_mask_sequence(cpumask_t, int vector); 4void send_IPI_mask_sequence(cpumask_t, int vector);
5 5
@@ -22,4 +22,4 @@ static inline void send_IPI_all(int vector)
22 send_IPI_mask(cpu_online_map, vector); 22 send_IPI_mask(cpu_online_map, vector);
23} 23}
24 24
25#endif /* ASM_X86__MACH_NUMAQ__MACH_IPI_H */ 25#endif /* __ASM_NUMAQ_IPI_H */
diff --git a/include/asm-x86/numaq/mpparse.h b/include/asm-x86/numaq/mpparse.h
new file mode 100644
index 000000000000..252292e077b6
--- /dev/null
+++ b/include/asm-x86/numaq/mpparse.h
@@ -0,0 +1,7 @@
1#ifndef __ASM_NUMAQ_MPPARSE_H
2#define __ASM_NUMAQ_MPPARSE_H
3
4extern void numaq_mps_oem_check(struct mp_config_table *mpc, char *oem,
5 char *productid);
6
7#endif /* __ASM_NUMAQ_MPPARSE_H */
diff --git a/include/asm-x86/mach-numaq/mach_wakecpu.h b/include/asm-x86/numaq/wakecpu.h
index 0db8cea643c0..c577bda5b1c5 100644
--- a/include/asm-x86/mach-numaq/mach_wakecpu.h
+++ b/include/asm-x86/numaq/wakecpu.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H 1#ifndef __ASM_NUMAQ_WAKECPU_H
2#define ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H 2#define __ASM_NUMAQ_WAKECPU_H
3 3
4/* This file copes with machines that wakeup secondary CPUs by NMIs */ 4/* This file copes with machines that wakeup secondary CPUs by NMIs */
5 5
@@ -40,4 +40,4 @@ static inline void restore_NMI_vector(unsigned short *high, unsigned short *low)
40 40
41#define inquire_remote_apic(apicid) {} 41#define inquire_remote_apic(apicid) {}
42 42
43#endif /* ASM_X86__MACH_NUMAQ__MACH_WAKECPU_H */ 43#endif /* __ASM_NUMAQ_WAKECPU_H */
diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h
index 891971f57d35..d7d358a43996 100644
--- a/include/asm-x86/paravirt.h
+++ b/include/asm-x86/paravirt.h
@@ -201,12 +201,6 @@ struct pv_irq_ops {
201 201
202struct pv_apic_ops { 202struct pv_apic_ops {
203#ifdef CONFIG_X86_LOCAL_APIC 203#ifdef CONFIG_X86_LOCAL_APIC
204 /*
205 * Direct APIC operations, principally for VMI. Ideally
206 * these shouldn't be in this interface.
207 */
208 void (*apic_write)(unsigned long reg, u32 v);
209 u32 (*apic_read)(unsigned long reg);
210 void (*setup_boot_clock)(void); 204 void (*setup_boot_clock)(void);
211 void (*setup_secondary_clock)(void); 205 void (*setup_secondary_clock)(void);
212 206
@@ -910,19 +904,6 @@ static inline void slow_down_io(void)
910} 904}
911 905
912#ifdef CONFIG_X86_LOCAL_APIC 906#ifdef CONFIG_X86_LOCAL_APIC
913/*
914 * Basic functions accessing APICs.
915 */
916static inline void apic_write(unsigned long reg, u32 v)
917{
918 PVOP_VCALL2(pv_apic_ops.apic_write, reg, v);
919}
920
921static inline u32 apic_read(unsigned long reg)
922{
923 return PVOP_CALL1(unsigned long, pv_apic_ops.apic_read, reg);
924}
925
926static inline void setup_boot_clock(void) 907static inline void setup_boot_clock(void)
927{ 908{
928 PVOP_VCALL0(pv_apic_ops.setup_boot_clock); 909 PVOP_VCALL0(pv_apic_ops.setup_boot_clock);
diff --git a/include/asm-x86/processor-cyrix.h b/include/asm-x86/processor-cyrix.h
index 97568ada1f97..1198f2a0e42c 100644
--- a/include/asm-x86/processor-cyrix.h
+++ b/include/asm-x86/processor-cyrix.h
@@ -28,3 +28,11 @@ static inline void setCx86(u8 reg, u8 data)
28 outb(reg, 0x22); 28 outb(reg, 0x22);
29 outb(data, 0x23); 29 outb(data, 0x23);
30} 30}
31
32#define getCx86_old(reg) ({ outb((reg), 0x22); inb(0x23); })
33
34#define setCx86_old(reg, data) do { \
35 outb((reg), 0x22); \
36 outb((data), 0x23); \
37} while (0)
38
diff --git a/include/asm-x86/processor-flags.h b/include/asm-x86/processor-flags.h
index 5dd79774f693..dc5f0712f9fa 100644
--- a/include/asm-x86/processor-flags.h
+++ b/include/asm-x86/processor-flags.h
@@ -59,6 +59,7 @@
59#define X86_CR4_OSFXSR 0x00000200 /* enable fast FPU save and restore */ 59#define X86_CR4_OSFXSR 0x00000200 /* enable fast FPU save and restore */
60#define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */ 60#define X86_CR4_OSXMMEXCPT 0x00000400 /* enable unmasked SSE exceptions */
61#define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */ 61#define X86_CR4_VMXE 0x00002000 /* enable VMX virtualization */
62#define X86_CR4_OSXSAVE 0x00040000 /* enable xsave and xrestore */
62 63
63/* 64/*
64 * x86-64 Task Priority Register, CR8 65 * x86-64 Task Priority Register, CR8
diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h
index 5eaf9bf0a623..c7d35464a4bb 100644
--- a/include/asm-x86/processor.h
+++ b/include/asm-x86/processor.h
@@ -76,11 +76,11 @@ struct cpuinfo_x86 {
76 int x86_tlbsize; 76 int x86_tlbsize;
77 __u8 x86_virt_bits; 77 __u8 x86_virt_bits;
78 __u8 x86_phys_bits; 78 __u8 x86_phys_bits;
79#endif
79 /* CPUID returned core id bits: */ 80 /* CPUID returned core id bits: */
80 __u8 x86_coreid_bits; 81 __u8 x86_coreid_bits;
81 /* Max extended CPUID function supported: */ 82 /* Max extended CPUID function supported: */
82 __u32 extended_cpuid_level; 83 __u32 extended_cpuid_level;
83#endif
84 /* Maximum supported CPUID level, -1=no CPUID: */ 84 /* Maximum supported CPUID level, -1=no CPUID: */
85 int cpuid_level; 85 int cpuid_level;
86 __u32 x86_capability[NCAPINTS]; 86 __u32 x86_capability[NCAPINTS];
@@ -166,11 +166,8 @@ extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
166extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); 166extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
167extern unsigned short num_cache_leaves; 167extern unsigned short num_cache_leaves;
168 168
169#if defined(CONFIG_X86_HT) || defined(CONFIG_X86_64) 169extern void detect_extended_topology(struct cpuinfo_x86 *c);
170extern void detect_ht(struct cpuinfo_x86 *c); 170extern void detect_ht(struct cpuinfo_x86 *c);
171#else
172static inline void detect_ht(struct cpuinfo_x86 *c) {}
173#endif
174 171
175static inline void native_cpuid(unsigned int *eax, unsigned int *ebx, 172static inline void native_cpuid(unsigned int *eax, unsigned int *ebx,
176 unsigned int *ecx, unsigned int *edx) 173 unsigned int *ecx, unsigned int *edx)
@@ -327,7 +324,12 @@ struct i387_fxsave_struct {
327 /* 16*16 bytes for each XMM-reg = 256 bytes: */ 324 /* 16*16 bytes for each XMM-reg = 256 bytes: */
328 u32 xmm_space[64]; 325 u32 xmm_space[64];
329 326
330 u32 padding[24]; 327 u32 padding[12];
328
329 union {
330 u32 padding1[12];
331 u32 sw_reserved[12];
332 };
331 333
332} __attribute__((aligned(16))); 334} __attribute__((aligned(16)));
333 335
@@ -351,10 +353,23 @@ struct i387_soft_struct {
351 u32 entry_eip; 353 u32 entry_eip;
352}; 354};
353 355
356struct xsave_hdr_struct {
357 u64 xstate_bv;
358 u64 reserved1[2];
359 u64 reserved2[5];
360} __attribute__((packed));
361
362struct xsave_struct {
363 struct i387_fxsave_struct i387;
364 struct xsave_hdr_struct xsave_hdr;
365 /* new processor state extensions will go here */
366} __attribute__ ((packed, aligned (64)));
367
354union thread_xstate { 368union thread_xstate {
355 struct i387_fsave_struct fsave; 369 struct i387_fsave_struct fsave;
356 struct i387_fxsave_struct fxsave; 370 struct i387_fxsave_struct fxsave;
357 struct i387_soft_struct soft; 371 struct i387_soft_struct soft;
372 struct xsave_struct xsave;
358}; 373};
359 374
360#ifdef CONFIG_X86_64 375#ifdef CONFIG_X86_64
diff --git a/include/asm-x86/setup.h b/include/asm-x86/setup.h
index 9030cb73c4d7..11b6cc14b289 100644
--- a/include/asm-x86/setup.h
+++ b/include/asm-x86/setup.h
@@ -38,6 +38,7 @@ struct x86_quirks {
38 void (*mpc_oem_pci_bus)(struct mpc_config_bus *m); 38 void (*mpc_oem_pci_bus)(struct mpc_config_bus *m);
39 void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable, 39 void (*smp_read_mpc_oem)(struct mp_config_oemtable *oemtable,
40 unsigned short oemsize); 40 unsigned short oemsize);
41 int (*setup_ioapic_ids)(void);
41}; 42};
42 43
43extern struct x86_quirks *x86_quirks; 44extern struct x86_quirks *x86_quirks;
diff --git a/include/asm-x86/sigcontext.h b/include/asm-x86/sigcontext.h
index 24879c85b291..ee813f4fe5d5 100644
--- a/include/asm-x86/sigcontext.h
+++ b/include/asm-x86/sigcontext.h
@@ -4,6 +4,40 @@
4#include <linux/compiler.h> 4#include <linux/compiler.h>
5#include <asm/types.h> 5#include <asm/types.h>
6 6
7#define FP_XSTATE_MAGIC1 0x46505853U
8#define FP_XSTATE_MAGIC2 0x46505845U
9#define FP_XSTATE_MAGIC2_SIZE sizeof(FP_XSTATE_MAGIC2)
10
11/*
12 * bytes 464..511 in the current 512byte layout of fxsave/fxrstor frame
13 * are reserved for SW usage. On cpu's supporting xsave/xrstor, these bytes
14 * are used to extended the fpstate pointer in the sigcontext, which now
15 * includes the extended state information along with fpstate information.
16 *
17 * Presence of FP_XSTATE_MAGIC1 at the beginning of this SW reserved
18 * area and FP_XSTATE_MAGIC2 at the end of memory layout
19 * (extended_size - FP_XSTATE_MAGIC2_SIZE) indicates the presence of the
20 * extended state information in the memory layout pointed by the fpstate
21 * pointer in sigcontext.
22 */
23struct _fpx_sw_bytes {
24 __u32 magic1; /* FP_XSTATE_MAGIC1 */
25 __u32 extended_size; /* total size of the layout referred by
26 * fpstate pointer in the sigcontext.
27 */
28 __u64 xstate_bv;
29 /* feature bit mask (including fp/sse/extended
30 * state) that is present in the memory
31 * layout.
32 */
33 __u32 xstate_size; /* actual xsave state size, based on the
34 * features saved in the layout.
35 * 'extended_size' will be greater than
36 * 'xstate_size'.
37 */
38 __u32 padding[7]; /* for future use. */
39};
40
7#ifdef __i386__ 41#ifdef __i386__
8/* 42/*
9 * As documented in the iBCS2 standard.. 43 * As documented in the iBCS2 standard..
@@ -53,7 +87,13 @@ struct _fpstate {
53 unsigned long reserved; 87 unsigned long reserved;
54 struct _fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */ 88 struct _fpxreg _fxsr_st[8]; /* FXSR FPU reg data is ignored */
55 struct _xmmreg _xmm[8]; 89 struct _xmmreg _xmm[8];
56 unsigned long padding[56]; 90 unsigned long padding1[44];
91
92 union {
93 unsigned long padding2[12];
94 struct _fpx_sw_bytes sw_reserved; /* represents the extended
95 * state info */
96 };
57}; 97};
58 98
59#define X86_FXSR_MAGIC 0x0000 99#define X86_FXSR_MAGIC 0x0000
@@ -79,7 +119,15 @@ struct sigcontext {
79 unsigned long flags; 119 unsigned long flags;
80 unsigned long sp_at_signal; 120 unsigned long sp_at_signal;
81 unsigned short ss, __ssh; 121 unsigned short ss, __ssh;
82 struct _fpstate __user *fpstate; 122
123 /*
124 * fpstate is really (struct _fpstate *) or (struct _xstate *)
125 * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved
126 * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end
127 * of extended memory layout. See comments at the defintion of
128 * (struct _fpx_sw_bytes)
129 */
130 void __user *fpstate; /* zero when no FPU/extended context */
83 unsigned long oldmask; 131 unsigned long oldmask;
84 unsigned long cr2; 132 unsigned long cr2;
85}; 133};
@@ -130,7 +178,12 @@ struct _fpstate {
130 __u32 mxcsr_mask; 178 __u32 mxcsr_mask;
131 __u32 st_space[32]; /* 8*16 bytes for each FP-reg */ 179 __u32 st_space[32]; /* 8*16 bytes for each FP-reg */
132 __u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg */ 180 __u32 xmm_space[64]; /* 16*16 bytes for each XMM-reg */
133 __u32 reserved2[24]; 181 __u32 reserved2[12];
182 union {
183 __u32 reserved3[12];
184 struct _fpx_sw_bytes sw_reserved; /* represents the extended
185 * state information */
186 };
134}; 187};
135 188
136#ifdef __KERNEL__ 189#ifdef __KERNEL__
@@ -161,7 +214,15 @@ struct sigcontext {
161 unsigned long trapno; 214 unsigned long trapno;
162 unsigned long oldmask; 215 unsigned long oldmask;
163 unsigned long cr2; 216 unsigned long cr2;
164 struct _fpstate __user *fpstate; /* zero when no FPU context */ 217
218 /*
219 * fpstate is really (struct _fpstate *) or (struct _xstate *)
220 * depending on the FP_XSTATE_MAGIC1 encoded in the SW reserved
221 * bytes of (struct _fpstate) and FP_XSTATE_MAGIC2 present at the end
222 * of extended memory layout. See comments at the defintion of
223 * (struct _fpx_sw_bytes)
224 */
225 void __user *fpstate; /* zero when no FPU/extended context */
165 unsigned long reserved1[8]; 226 unsigned long reserved1[8];
166}; 227};
167#else /* __KERNEL__ */ 228#else /* __KERNEL__ */
@@ -202,4 +263,22 @@ struct sigcontext {
202 263
203#endif /* !__i386__ */ 264#endif /* !__i386__ */
204 265
266struct _xsave_hdr {
267 __u64 xstate_bv;
268 __u64 reserved1[2];
269 __u64 reserved2[5];
270};
271
272/*
273 * Extended state pointed by the fpstate pointer in the sigcontext.
274 * In addition to the fpstate, information encoded in the xstate_hdr
275 * indicates the presence of other extended state information
276 * supported by the processor and OS.
277 */
278struct _xstate {
279 struct _fpstate fpstate;
280 struct _xsave_hdr xstate_hdr;
281 /* new processor state extensions go here */
282};
283
205#endif /* ASM_X86__SIGCONTEXT_H */ 284#endif /* ASM_X86__SIGCONTEXT_H */
diff --git a/include/asm-x86/sigcontext32.h b/include/asm-x86/sigcontext32.h
index 4e2ec732dd01..8c347032c2f2 100644
--- a/include/asm-x86/sigcontext32.h
+++ b/include/asm-x86/sigcontext32.h
@@ -40,7 +40,11 @@ struct _fpstate_ia32 {
40 __u32 reserved; 40 __u32 reserved;
41 struct _fpxreg _fxsr_st[8]; 41 struct _fpxreg _fxsr_st[8];
42 struct _xmmreg _xmm[8]; /* It's actually 16 */ 42 struct _xmmreg _xmm[8]; /* It's actually 16 */
43 __u32 padding[56]; 43 __u32 padding[44];
44 union {
45 __u32 padding2[12];
46 struct _fpx_sw_bytes sw_reserved;
47 };
44}; 48};
45 49
46struct sigcontext_ia32 { 50struct sigcontext_ia32 {
diff --git a/include/asm-x86/smp.h b/include/asm-x86/smp.h
index 04f84f4e2c8b..29324c103341 100644
--- a/include/asm-x86/smp.h
+++ b/include/asm-x86/smp.h
@@ -167,30 +167,33 @@ extern int safe_smp_processor_id(void);
167 167
168#ifdef CONFIG_X86_LOCAL_APIC 168#ifdef CONFIG_X86_LOCAL_APIC
169 169
170#ifndef CONFIG_X86_64
170static inline int logical_smp_processor_id(void) 171static inline int logical_smp_processor_id(void)
171{ 172{
172 /* we don't want to mark this access volatile - bad code generation */ 173 /* we don't want to mark this access volatile - bad code generation */
173 return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR)); 174 return GET_APIC_LOGICAL_ID(*(u32 *)(APIC_BASE + APIC_LDR));
174} 175}
175 176
176#ifndef CONFIG_X86_64 177#include <mach_apicdef.h>
177static inline unsigned int read_apic_id(void) 178static inline unsigned int read_apic_id(void)
178{ 179{
179 return *(u32 *)(APIC_BASE + APIC_ID); 180 unsigned int reg;
181
182 reg = *(u32 *)(APIC_BASE + APIC_ID);
183
184 return GET_APIC_ID(reg);
180} 185}
181#else
182extern unsigned int read_apic_id(void);
183#endif 186#endif
184 187
185 188
186# ifdef APIC_DEFINITION 189# if defined(APIC_DEFINITION) || defined(CONFIG_X86_64)
187extern int hard_smp_processor_id(void); 190extern int hard_smp_processor_id(void);
188# else 191# else
189# include <mach_apicdef.h> 192#include <mach_apicdef.h>
190static inline int hard_smp_processor_id(void) 193static inline int hard_smp_processor_id(void)
191{ 194{
192 /* we don't want to mark this access volatile - bad code generation */ 195 /* we don't want to mark this access volatile - bad code generation */
193 return GET_APIC_ID(read_apic_id()); 196 return read_apic_id();
194} 197}
195# endif /* APIC_DEFINITION */ 198# endif /* APIC_DEFINITION */
196 199
diff --git a/include/asm-x86/mach-summit/mach_apic.h b/include/asm-x86/summit/apic.h
index 7a66758d701d..c5b2e4b10358 100644
--- a/include/asm-x86/mach-summit/mach_apic.h
+++ b/include/asm-x86/summit/apic.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_SUMMIT__MACH_APIC_H 1#ifndef __ASM_SUMMIT_APIC_H
2#define ASM_X86__MACH_SUMMIT__MACH_APIC_H 2#define __ASM_SUMMIT_APIC_H
3 3
4#include <asm/smp.h> 4#include <asm/smp.h>
5 5
@@ -21,7 +21,7 @@ static inline cpumask_t target_cpus(void)
21 * Just start on cpu 0. IRQ balancing will spread load 21 * Just start on cpu 0. IRQ balancing will spread load
22 */ 22 */
23 return cpumask_of_cpu(0); 23 return cpumask_of_cpu(0);
24} 24}
25#define TARGET_CPUS (target_cpus()) 25#define TARGET_CPUS (target_cpus())
26 26
27#define INT_DELIVERY_MODE (dest_LowestPrio) 27#define INT_DELIVERY_MODE (dest_LowestPrio)
@@ -30,10 +30,10 @@ static inline cpumask_t target_cpus(void)
30static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid) 30static inline unsigned long check_apicid_used(physid_mask_t bitmap, int apicid)
31{ 31{
32 return 0; 32 return 0;
33} 33}
34 34
35/* we don't use the phys_cpu_present_map to indicate apicid presence */ 35/* we don't use the phys_cpu_present_map to indicate apicid presence */
36static inline unsigned long check_apicid_present(int bit) 36static inline unsigned long check_apicid_present(int bit)
37{ 37{
38 return 1; 38 return 1;
39} 39}
@@ -122,7 +122,7 @@ static inline physid_mask_t ioapic_phys_id_map(physid_mask_t phys_id_map)
122 122
123static inline physid_mask_t apicid_to_cpu_present(int apicid) 123static inline physid_mask_t apicid_to_cpu_present(int apicid)
124{ 124{
125 return physid_mask_of_physid(apicid); 125 return physid_mask_of_physid(0);
126} 126}
127 127
128static inline void setup_portio_remap(void) 128static inline void setup_portio_remap(void)
@@ -143,22 +143,22 @@ static inline unsigned int cpu_mask_to_apicid(cpumask_t cpumask)
143 int num_bits_set; 143 int num_bits_set;
144 int cpus_found = 0; 144 int cpus_found = 0;
145 int cpu; 145 int cpu;
146 int apicid; 146 int apicid;
147 147
148 num_bits_set = cpus_weight(cpumask); 148 num_bits_set = cpus_weight(cpumask);
149 /* Return id to all */ 149 /* Return id to all */
150 if (num_bits_set == NR_CPUS) 150 if (num_bits_set == NR_CPUS)
151 return (int) 0xFF; 151 return (int) 0xFF;
152 /* 152 /*
153 * The cpus in the mask must all be on the apic cluster. If are not 153 * The cpus in the mask must all be on the apic cluster. If are not
154 * on the same apicid cluster return default value of TARGET_CPUS. 154 * on the same apicid cluster return default value of TARGET_CPUS.
155 */ 155 */
156 cpu = first_cpu(cpumask); 156 cpu = first_cpu(cpumask);
157 apicid = cpu_to_logical_apicid(cpu); 157 apicid = cpu_to_logical_apicid(cpu);
158 while (cpus_found < num_bits_set) { 158 while (cpus_found < num_bits_set) {
159 if (cpu_isset(cpu, cpumask)) { 159 if (cpu_isset(cpu, cpumask)) {
160 int new_apicid = cpu_to_logical_apicid(cpu); 160 int new_apicid = cpu_to_logical_apicid(cpu);
161 if (apicid_cluster(apicid) != 161 if (apicid_cluster(apicid) !=
162 apicid_cluster(new_apicid)){ 162 apicid_cluster(new_apicid)){
163 printk ("%s: Not a valid mask!\n",__FUNCTION__); 163 printk ("%s: Not a valid mask!\n",__FUNCTION__);
164 return 0xFF; 164 return 0xFF;
@@ -182,4 +182,4 @@ static inline u32 phys_pkg_id(u32 cpuid_apic, int index_msb)
182 return hard_smp_processor_id() >> index_msb; 182 return hard_smp_processor_id() >> index_msb;
183} 183}
184 184
185#endif /* ASM_X86__MACH_SUMMIT__MACH_APIC_H */ 185#endif /* __ASM_SUMMIT_APIC_H */
diff --git a/include/asm-x86/summit/apicdef.h b/include/asm-x86/summit/apicdef.h
new file mode 100644
index 000000000000..f3fbca1f61c1
--- /dev/null
+++ b/include/asm-x86/summit/apicdef.h
@@ -0,0 +1,13 @@
1#ifndef __ASM_SUMMIT_APICDEF_H
2#define __ASM_SUMMIT_APICDEF_H
3
4#define APIC_ID_MASK (0xFF<<24)
5
6static inline unsigned get_apic_id(unsigned long x)
7{
8 return (x>>24)&0xFF;
9}
10
11#define GET_APIC_ID(x) get_apic_id(x)
12
13#endif
diff --git a/include/asm-x86/mach-summit/mach_ipi.h b/include/asm-x86/summit/ipi.h
index a3b31c528d90..53bd1e7bd7b4 100644
--- a/include/asm-x86/mach-summit/mach_ipi.h
+++ b/include/asm-x86/summit/ipi.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_SUMMIT__MACH_IPI_H 1#ifndef __ASM_SUMMIT_IPI_H
2#define ASM_X86__MACH_SUMMIT__MACH_IPI_H 2#define __ASM_SUMMIT_IPI_H
3 3
4void send_IPI_mask_sequence(cpumask_t mask, int vector); 4void send_IPI_mask_sequence(cpumask_t mask, int vector);
5 5
@@ -22,4 +22,4 @@ static inline void send_IPI_all(int vector)
22 send_IPI_mask(cpu_online_map, vector); 22 send_IPI_mask(cpu_online_map, vector);
23} 23}
24 24
25#endif /* ASM_X86__MACH_SUMMIT__MACH_IPI_H */ 25#endif /* __ASM_SUMMIT_IPI_H */
diff --git a/include/asm-x86/mach-summit/irq_vectors_limits.h b/include/asm-x86/summit/irq_vectors_limits.h
index 22f376ad68e1..890ce3f5e09a 100644
--- a/include/asm-x86/mach-summit/irq_vectors_limits.h
+++ b/include/asm-x86/summit/irq_vectors_limits.h
@@ -1,5 +1,5 @@
1#ifndef ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H 1#ifndef _ASM_IRQ_VECTORS_LIMITS_H
2#define ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H 2#define _ASM_IRQ_VECTORS_LIMITS_H
3 3
4/* 4/*
5 * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs, 5 * For Summit or generic (i.e. installer) kernels, we have lots of I/O APICs,
@@ -11,4 +11,4 @@
11#define NR_IRQS 224 11#define NR_IRQS 224
12#define NR_IRQ_VECTORS 1024 12#define NR_IRQ_VECTORS 1024
13 13
14#endif /* ASM_X86__MACH_SUMMIT__IRQ_VECTORS_LIMITS_H */ 14#endif /* _ASM_IRQ_VECTORS_LIMITS_H */
diff --git a/include/asm-x86/mach-summit/mach_mpparse.h b/include/asm-x86/summit/mpparse.h
index 92396f28772b..013ce6fab2d5 100644
--- a/include/asm-x86/mach-summit/mach_mpparse.h
+++ b/include/asm-x86/summit/mpparse.h
@@ -1,7 +1,6 @@
1#ifndef ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H 1#ifndef __ASM_SUMMIT_MPPARSE_H
2#define ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H 2#define __ASM_SUMMIT_MPPARSE_H
3 3
4#include <mach_apic.h>
5#include <asm/tsc.h> 4#include <asm/tsc.h>
6 5
7extern int use_cyclone; 6extern int use_cyclone;
@@ -12,11 +11,11 @@ extern void setup_summit(void);
12#define setup_summit() {} 11#define setup_summit() {}
13#endif 12#endif
14 13
15static inline int mps_oem_check(struct mp_config_table *mpc, char *oem, 14static inline int mps_oem_check(struct mp_config_table *mpc, char *oem,
16 char *productid) 15 char *productid)
17{ 16{
18 if (!strncmp(oem, "IBM ENSW", 8) && 17 if (!strncmp(oem, "IBM ENSW", 8) &&
19 (!strncmp(productid, "VIGIL SMP", 9) 18 (!strncmp(productid, "VIGIL SMP", 9)
20 || !strncmp(productid, "EXA", 3) 19 || !strncmp(productid, "EXA", 3)
21 || !strncmp(productid, "RUTHLESS SMP", 12))){ 20 || !strncmp(productid, "RUTHLESS SMP", 12))){
22 mark_tsc_unstable("Summit based system"); 21 mark_tsc_unstable("Summit based system");
@@ -107,4 +106,4 @@ static inline int is_WPEG(struct rio_detail *rio){
107 rio->type == LookOutAWPEG || rio->type == LookOutBWPEG); 106 rio->type == LookOutAWPEG || rio->type == LookOutBWPEG);
108} 107}
109 108
110#endif /* ASM_X86__MACH_SUMMIT__MACH_MPPARSE_H */ 109#endif /* __ASM_SUMMIT_MPPARSE_H */
diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h
index 4db0066a3a35..3f4e52bb77f5 100644
--- a/include/asm-x86/thread_info.h
+++ b/include/asm-x86/thread_info.h
@@ -241,6 +241,7 @@ static inline struct thread_info *stack_thread_info(void)
241#define TS_POLLING 0x0004 /* true if in idle loop 241#define TS_POLLING 0x0004 /* true if in idle loop
242 and not sleeping */ 242 and not sleeping */
243#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */ 243#define TS_RESTORE_SIGMASK 0x0008 /* restore signal mask in do_signal() */
244#define TS_XSAVE 0x0010 /* Use xsave/xrstor */
244 245
245#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING) 246#define tsk_is_polling(t) (task_thread_info(t)->status & TS_POLLING)
246 247
diff --git a/include/asm-x86/ucontext.h b/include/asm-x86/ucontext.h
index 9948dd328084..89eaa5456a7e 100644
--- a/include/asm-x86/ucontext.h
+++ b/include/asm-x86/ucontext.h
@@ -1,6 +1,12 @@
1#ifndef ASM_X86__UCONTEXT_H 1#ifndef ASM_X86__UCONTEXT_H
2#define ASM_X86__UCONTEXT_H 2#define ASM_X86__UCONTEXT_H
3 3
4#define UC_FP_XSTATE 0x1 /* indicates the presence of extended state
5 * information in the memory layout pointed
6 * by the fpstate pointer in the ucontext's
7 * sigcontext struct (uc_mcontext).
8 */
9
4struct ucontext { 10struct ucontext {
5 unsigned long uc_flags; 11 unsigned long uc_flags;
6 struct ucontext *uc_link; 12 struct ucontext *uc_link;
diff --git a/include/asm-x86/xcr.h b/include/asm-x86/xcr.h
new file mode 100644
index 000000000000..f2cba4e79a23
--- /dev/null
+++ b/include/asm-x86/xcr.h
@@ -0,0 +1,49 @@
1/* -*- linux-c -*- ------------------------------------------------------- *
2 *
3 * Copyright 2008 rPath, Inc. - All Rights Reserved
4 *
5 * This file is part of the Linux kernel, and is made available under
6 * the terms of the GNU General Public License version 2 or (at your
7 * option) any later version; incorporated herein by reference.
8 *
9 * ----------------------------------------------------------------------- */
10
11/*
12 * asm-x86/xcr.h
13 *
14 * Definitions for the eXtended Control Register instructions
15 */
16
17#ifndef _ASM_X86_XCR_H
18#define _ASM_X86_XCR_H
19
20#define XCR_XFEATURE_ENABLED_MASK 0x00000000
21
22#ifdef __KERNEL__
23# ifndef __ASSEMBLY__
24
25#include <linux/types.h>
26
27static inline u64 xgetbv(u32 index)
28{
29 u32 eax, edx;
30
31 asm volatile(".byte 0x0f,0x01,0xd0" /* xgetbv */
32 : "=a" (eax), "=d" (edx)
33 : "c" (index));
34 return eax + ((u64)edx << 32);
35}
36
37static inline void xsetbv(u32 index, u64 value)
38{
39 u32 eax = value;
40 u32 edx = value >> 32;
41
42 asm volatile(".byte 0x0f,0x01,0xd1" /* xsetbv */
43 : : "a" (eax), "d" (edx), "c" (index));
44}
45
46# endif /* __ASSEMBLY__ */
47#endif /* __KERNEL__ */
48
49#endif /* _ASM_X86_XCR_H */
diff --git a/include/asm-x86/xsave.h b/include/asm-x86/xsave.h
new file mode 100644
index 000000000000..08e9a1ac07a9
--- /dev/null
+++ b/include/asm-x86/xsave.h
@@ -0,0 +1,118 @@
1#ifndef __ASM_X86_XSAVE_H
2#define __ASM_X86_XSAVE_H
3
4#include <linux/types.h>
5#include <asm/processor.h>
6#include <asm/i387.h>
7
8#define XSTATE_FP 0x1
9#define XSTATE_SSE 0x2
10
11#define XSTATE_FPSSE (XSTATE_FP | XSTATE_SSE)
12
13#define FXSAVE_SIZE 512
14
15/*
16 * These are the features that the OS can handle currently.
17 */
18#define XCNTXT_MASK (XSTATE_FP | XSTATE_SSE)
19
20#ifdef CONFIG_X86_64
21#define REX_PREFIX "0x48, "
22#else
23#define REX_PREFIX
24#endif
25
26extern unsigned int xstate_size;
27extern u64 pcntxt_mask;
28extern struct xsave_struct *init_xstate_buf;
29
30extern void xsave_cntxt_init(void);
31extern void xsave_init(void);
32extern int init_fpu(struct task_struct *child);
33extern int check_for_xstate(struct i387_fxsave_struct __user *buf,
34 void __user *fpstate,
35 struct _fpx_sw_bytes *sw);
36
37static inline int xrstor_checking(struct xsave_struct *fx)
38{
39 int err;
40
41 asm volatile("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
42 "2:\n"
43 ".section .fixup,\"ax\"\n"
44 "3: movl $-1,%[err]\n"
45 " jmp 2b\n"
46 ".previous\n"
47 _ASM_EXTABLE(1b, 3b)
48 : [err] "=r" (err)
49 : "D" (fx), "m" (*fx), "a" (-1), "d" (-1), "0" (0)
50 : "memory");
51
52 return err;
53}
54
55static inline int xsave_user(struct xsave_struct __user *buf)
56{
57 int err;
58 __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x27\n"
59 "2:\n"
60 ".section .fixup,\"ax\"\n"
61 "3: movl $-1,%[err]\n"
62 " jmp 2b\n"
63 ".previous\n"
64 ".section __ex_table,\"a\"\n"
65 _ASM_ALIGN "\n"
66 _ASM_PTR "1b,3b\n"
67 ".previous"
68 : [err] "=r" (err)
69 : "D" (buf), "a" (-1), "d" (-1), "0" (0)
70 : "memory");
71 if (unlikely(err) && __clear_user(buf, xstate_size))
72 err = -EFAULT;
73 /* No need to clear here because the caller clears USED_MATH */
74 return err;
75}
76
77static inline int xrestore_user(struct xsave_struct __user *buf, u64 mask)
78{
79 int err;
80 struct xsave_struct *xstate = ((__force struct xsave_struct *)buf);
81 u32 lmask = mask;
82 u32 hmask = mask >> 32;
83
84 __asm__ __volatile__("1: .byte " REX_PREFIX "0x0f,0xae,0x2f\n"
85 "2:\n"
86 ".section .fixup,\"ax\"\n"
87 "3: movl $-1,%[err]\n"
88 " jmp 2b\n"
89 ".previous\n"
90 ".section __ex_table,\"a\"\n"
91 _ASM_ALIGN "\n"
92 _ASM_PTR "1b,3b\n"
93 ".previous"
94 : [err] "=r" (err)
95 : "D" (xstate), "a" (lmask), "d" (hmask), "0" (0)
96 : "memory"); /* memory required? */
97 return err;
98}
99
100static inline void xrstor_state(struct xsave_struct *fx, u64 mask)
101{
102 u32 lmask = mask;
103 u32 hmask = mask >> 32;
104
105 asm volatile(".byte " REX_PREFIX "0x0f,0xae,0x2f\n\t"
106 : : "D" (fx), "m" (*fx), "a" (lmask), "d" (hmask)
107 : "memory");
108}
109
110static inline void xsave(struct task_struct *tsk)
111{
112 /* This, however, we can work around by forcing the compiler to select
113 an addressing mode that doesn't require extended registers. */
114 __asm__ __volatile__(".byte " REX_PREFIX "0x0f,0xae,0x27"
115 : : "D" (&(tsk->thread.xstate->xsave)),
116 "a" (-1), "d"(-1) : "memory");
117}
118#endif
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 56c73b847551..c360c558e59e 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -25,9 +25,99 @@
25#include <linux/types.h> 25#include <linux/types.h>
26#include <linux/msi.h> 26#include <linux/msi.h>
27 27
28#ifdef CONFIG_DMAR 28#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP)
29struct intel_iommu; 29struct intel_iommu;
30 30
31struct dmar_drhd_unit {
32 struct list_head list; /* list of drhd units */
33 struct acpi_dmar_header *hdr; /* ACPI header */
34 u64 reg_base_addr; /* register base address*/
35 struct pci_dev **devices; /* target device array */
36 int devices_cnt; /* target device count */
37 u8 ignored:1; /* ignore drhd */
38 u8 include_all:1;
39 struct intel_iommu *iommu;
40};
41
42extern struct list_head dmar_drhd_units;
43
44#define for_each_drhd_unit(drhd) \
45 list_for_each_entry(drhd, &dmar_drhd_units, list)
46
47extern int dmar_table_init(void);
48extern int early_dmar_detect(void);
49extern int dmar_dev_scope_init(void);
50
51/* Intel IOMMU detection */
52extern void detect_intel_iommu(void);
53
54
55extern int parse_ioapics_under_ir(void);
56extern int alloc_iommu(struct dmar_drhd_unit *);
57#else
58static inline void detect_intel_iommu(void)
59{
60 return;
61}
62
63static inline int dmar_table_init(void)
64{
65 return -ENODEV;
66}
67#endif /* !CONFIG_DMAR && !CONFIG_INTR_REMAP */
68
69#ifdef CONFIG_INTR_REMAP
70extern int intr_remapping_enabled;
71extern int enable_intr_remapping(int);
72
73struct irte {
74 union {
75 struct {
76 __u64 present : 1,
77 fpd : 1,
78 dst_mode : 1,
79 redir_hint : 1,
80 trigger_mode : 1,
81 dlvry_mode : 3,
82 avail : 4,
83 __reserved_1 : 4,
84 vector : 8,
85 __reserved_2 : 8,
86 dest_id : 32;
87 };
88 __u64 low;
89 };
90
91 union {
92 struct {
93 __u64 sid : 16,
94 sq : 2,
95 svt : 2,
96 __reserved_3 : 44;
97 };
98 __u64 high;
99 };
100};
101extern int get_irte(int irq, struct irte *entry);
102extern int modify_irte(int irq, struct irte *irte_modified);
103extern int alloc_irte(struct intel_iommu *iommu, int irq, u16 count);
104extern int set_irte_irq(int irq, struct intel_iommu *iommu, u16 index,
105 u16 sub_handle);
106extern int map_irq_to_irte_handle(int irq, u16 *sub_handle);
107extern int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index);
108extern int flush_irte(int irq);
109extern int free_irte(int irq);
110
111extern int irq_remapped(int irq);
112extern struct intel_iommu *map_dev_to_ir(struct pci_dev *dev);
113extern struct intel_iommu *map_ioapic_to_ir(int apic);
114#else
115#define irq_remapped(irq) (0)
116#define enable_intr_remapping(mode) (-1)
117#define intr_remapping_enabled (0)
118#endif
119
120#ifdef CONFIG_DMAR
31extern const char *dmar_get_fault_reason(u8 fault_reason); 121extern const char *dmar_get_fault_reason(u8 fault_reason);
32 122
33/* Can't use the common MSI interrupt functions 123/* Can't use the common MSI interrupt functions
@@ -40,47 +130,30 @@ extern void dmar_msi_write(int irq, struct msi_msg *msg);
40extern int dmar_set_interrupt(struct intel_iommu *iommu); 130extern int dmar_set_interrupt(struct intel_iommu *iommu);
41extern int arch_setup_dmar_msi(unsigned int irq); 131extern int arch_setup_dmar_msi(unsigned int irq);
42 132
43/* Intel IOMMU detection and initialization functions */ 133extern int iommu_detected, no_iommu;
44extern void detect_intel_iommu(void);
45extern int intel_iommu_init(void);
46
47extern int dmar_table_init(void);
48extern int early_dmar_detect(void);
49
50extern struct list_head dmar_drhd_units;
51extern struct list_head dmar_rmrr_units; 134extern struct list_head dmar_rmrr_units;
52
53struct dmar_drhd_unit {
54 struct list_head list; /* list of drhd units */
55 u64 reg_base_addr; /* register base address*/
56 struct pci_dev **devices; /* target device array */
57 int devices_cnt; /* target device count */
58 u8 ignored:1; /* ignore drhd */
59 u8 include_all:1;
60 struct intel_iommu *iommu;
61};
62
63struct dmar_rmrr_unit { 135struct dmar_rmrr_unit {
64 struct list_head list; /* list of rmrr units */ 136 struct list_head list; /* list of rmrr units */
137 struct acpi_dmar_header *hdr; /* ACPI header */
65 u64 base_address; /* reserved base address*/ 138 u64 base_address; /* reserved base address*/
66 u64 end_address; /* reserved end address */ 139 u64 end_address; /* reserved end address */
67 struct pci_dev **devices; /* target devices */ 140 struct pci_dev **devices; /* target devices */
68 int devices_cnt; /* target device count */ 141 int devices_cnt; /* target device count */
69}; 142};
70 143
71#define for_each_drhd_unit(drhd) \
72 list_for_each_entry(drhd, &dmar_drhd_units, list)
73#define for_each_rmrr_units(rmrr) \ 144#define for_each_rmrr_units(rmrr) \
74 list_for_each_entry(rmrr, &dmar_rmrr_units, list) 145 list_for_each_entry(rmrr, &dmar_rmrr_units, list)
146/* Intel DMAR initialization functions */
147extern int intel_iommu_init(void);
148extern int dmar_disabled;
75#else 149#else
76static inline void detect_intel_iommu(void)
77{
78 return;
79}
80static inline int intel_iommu_init(void) 150static inline int intel_iommu_init(void)
81{ 151{
152#ifdef CONFIG_INTR_REMAP
153 return dmar_dev_scope_init();
154#else
82 return -ENODEV; 155 return -ENODEV;
156#endif
83} 157}
84
85#endif /* !CONFIG_DMAR */ 158#endif /* !CONFIG_DMAR */
86#endif /* __DMAR_H__ */ 159#endif /* __DMAR_H__ */
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 350033e8f4e1..ee9bcc6f32b6 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -108,6 +108,9 @@ extern struct resource iomem_resource;
108 108
109extern int request_resource(struct resource *root, struct resource *new); 109extern int request_resource(struct resource *root, struct resource *new);
110extern int release_resource(struct resource *new); 110extern int release_resource(struct resource *new);
111extern void reserve_region_with_split(struct resource *root,
112 resource_size_t start, resource_size_t end,
113 const char *name);
111extern int insert_resource(struct resource *parent, struct resource *new); 114extern int insert_resource(struct resource *parent, struct resource *new);
112extern void insert_resource_expand_to_fit(struct resource *root, struct resource *new); 115extern void insert_resource_expand_to_fit(struct resource *root, struct resource *new);
113extern int allocate_resource(struct resource *root, struct resource *new, 116extern int allocate_resource(struct resource *root, struct resource *new,
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 8ccb462ea42c..8d9411bc60f6 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -62,6 +62,7 @@ typedef void (*irq_flow_handler_t)(unsigned int irq,
62#define IRQ_MOVE_PENDING 0x00200000 /* need to re-target IRQ destination */ 62#define IRQ_MOVE_PENDING 0x00200000 /* need to re-target IRQ destination */
63#define IRQ_NO_BALANCING 0x00400000 /* IRQ is excluded from balancing */ 63#define IRQ_NO_BALANCING 0x00400000 /* IRQ is excluded from balancing */
64#define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */ 64#define IRQ_SPURIOUS_DISABLED 0x00800000 /* IRQ was disabled by the spurious trap */
65#define IRQ_MOVE_PCNTXT 0x01000000 /* IRQ migration from process context */
65 66
66#ifdef CONFIG_IRQ_PER_CPU 67#ifdef CONFIG_IRQ_PER_CPU
67# define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU) 68# define CHECK_IRQ_PER_CPU(var) ((var) & IRQ_PER_CPU)
diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index fac3337547eb..9f2a3751873a 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -23,12 +23,19 @@
23 __attribute__((__section__(SHARED_ALIGNED_SECTION))) \ 23 __attribute__((__section__(SHARED_ALIGNED_SECTION))) \
24 PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \ 24 PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name \
25 ____cacheline_aligned_in_smp 25 ____cacheline_aligned_in_smp
26
27#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \
28 __attribute__((__section__(".data.percpu.page_aligned"))) \
29 PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
26#else 30#else
27#define DEFINE_PER_CPU(type, name) \ 31#define DEFINE_PER_CPU(type, name) \
28 PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name 32 PER_CPU_ATTRIBUTES __typeof__(type) per_cpu__##name
29 33
30#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \ 34#define DEFINE_PER_CPU_SHARED_ALIGNED(type, name) \
31 DEFINE_PER_CPU(type, name) 35 DEFINE_PER_CPU(type, name)
36
37#define DEFINE_PER_CPU_PAGE_ALIGNED(type, name) \
38 DEFINE_PER_CPU(type, name)
32#endif 39#endif
33 40
34#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var) 41#define EXPORT_PER_CPU_SYMBOL(var) EXPORT_SYMBOL(per_cpu__##var)
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0314074fa232..60c49e324390 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -89,7 +89,14 @@ int irq_set_affinity(unsigned int irq, cpumask_t cpumask)
89 set_balance_irq_affinity(irq, cpumask); 89 set_balance_irq_affinity(irq, cpumask);
90 90
91#ifdef CONFIG_GENERIC_PENDING_IRQ 91#ifdef CONFIG_GENERIC_PENDING_IRQ
92 set_pending_irq(irq, cpumask); 92 if (desc->status & IRQ_MOVE_PCNTXT) {
93 unsigned long flags;
94
95 spin_lock_irqsave(&desc->lock, flags);
96 desc->chip->set_affinity(irq, cpumask);
97 spin_unlock_irqrestore(&desc->lock, flags);
98 } else
99 set_pending_irq(irq, cpumask);
93#else 100#else
94 desc->affinity = cpumask; 101 desc->affinity = cpumask;
95 desc->chip->set_affinity(irq, cpumask); 102 desc->chip->set_affinity(irq, cpumask);
diff --git a/kernel/resource.c b/kernel/resource.c
index 03d796c1b2e9..414d6fc9131e 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -516,6 +516,74 @@ int adjust_resource(struct resource *res, resource_size_t start, resource_size_t
516 return result; 516 return result;
517} 517}
518 518
519static void __init __reserve_region_with_split(struct resource *root,
520 resource_size_t start, resource_size_t end,
521 const char *name)
522{
523 struct resource *parent = root;
524 struct resource *conflict;
525 struct resource *res = kzalloc(sizeof(*res), GFP_KERNEL);
526
527 if (!res)
528 return;
529
530 res->name = name;
531 res->start = start;
532 res->end = end;
533 res->flags = IORESOURCE_BUSY;
534
535 for (;;) {
536 conflict = __request_resource(parent, res);
537 if (!conflict)
538 break;
539 if (conflict != parent) {
540 parent = conflict;
541 if (!(conflict->flags & IORESOURCE_BUSY))
542 continue;
543 }
544
545 /* Uhhuh, that didn't work out.. */
546 kfree(res);
547 res = NULL;
548 break;
549 }
550
551 if (!res) {
552 printk(KERN_DEBUG " __reserve_region_with_split: (%s) [%llx, %llx], res: (%s) [%llx, %llx]\n",
553 conflict->name, conflict->start, conflict->end,
554 name, start, end);
555
556 /* failed, split and try again */
557
558 /* conflict coverred whole area */
559 if (conflict->start <= start && conflict->end >= end)
560 return;
561
562 if (conflict->start > start)
563 __reserve_region_with_split(root, start, conflict->start-1, name);
564 if (!(conflict->flags & IORESOURCE_BUSY)) {
565 resource_size_t common_start, common_end;
566
567 common_start = max(conflict->start, start);
568 common_end = min(conflict->end, end);
569 if (common_start < common_end)
570 __reserve_region_with_split(root, common_start, common_end, name);
571 }
572 if (conflict->end < end)
573 __reserve_region_with_split(root, conflict->end+1, end, name);
574 }
575
576}
577
578void reserve_region_with_split(struct resource *root,
579 resource_size_t start, resource_size_t end,
580 const char *name)
581{
582 write_lock(&resource_lock);
583 __reserve_region_with_split(root, start, end, name);
584 write_unlock(&resource_lock);
585}
586
519EXPORT_SYMBOL(adjust_resource); 587EXPORT_SYMBOL(adjust_resource);
520 588
521/** 589/**